gtfs_df 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70a7d05d5600baac0b36b21d0ebdd8fd0719cfe572844e44ae710c25fbf353f8
4
- data.tar.gz: df2b5f47cd19bfe213644dcda42b22778acf98d8f104bf71c4e231eaa4860bba
3
+ metadata.gz: 8c1060a2f120fc6f620adc523d8038a05ec6896826fbb950973c14b8a406fc37
4
+ data.tar.gz: 6f75be7bbec96e5f75994fa130f9d599061d796ef702cad6c833d8edb873ef6b
5
5
  SHA512:
6
- metadata.gz: ee94819792b5c1389783a7844ce23af40de13264c5c069e608ae9c2383254c3fdcdc09915f5c33348d204675e5e373f8c26b039ff97144be37721aa7f4bc1b49
7
- data.tar.gz: 28f28bb69a62738c817dafbc36c756a96f56f9f96d4d587aab2b9dffa8df5b6948b25d6b024e70856639cc2a97c75c49ff510bb7695c825f54405bf23701582d
6
+ metadata.gz: c8000394c34a67284d16df01e9e236b39d074bd300d1416be8557a7db2387357e860b7c741ff3eb483293895ca16bb3936efd857489124d309ced30a06d248aa
7
+ data.tar.gz: 15a5d19bb1d06ab1c9f21ae0e403ce6187dd370dcf868d20b9ee1de415a2a1ecabb1ae158b5dc10bc6236f329863e661581a2f8b39d0fb9eed50bc9dbad350db
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## [0.12.0] - 2026-06-12
2
+
3
+ ### 🚀 Features
4
+
5
+ - Allow loading a subset of tables
6
+
7
+ ### 🐛 Bug Fixes
8
+
9
+ - Truncate ragged lines
10
+ - Consider BOM when looking for empty files
1
11
  ## [0.11.1] - 2026-04-28
2
12
 
3
13
  ### 🐛 Bug Fixes
@@ -8,6 +18,10 @@
8
18
 
9
19
  - Validate filtering from feed with empty calendar
10
20
  - Validate unreferenced calendar dates
21
+
22
+ ### ⚙️ Miscellaneous Tasks
23
+
24
+ - Bump version to 0.11.1
11
25
  ## [0.11.0] - 2026-03-30
12
26
 
13
27
  ### ⚙️ Miscellaneous Tasks
@@ -12,7 +12,13 @@ module GtfsDf
12
12
  elsif input.is_a?(String)
13
13
  # TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
14
14
  # https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
15
- df = Polars.read_csv(input, infer_schema_length: 0, encoding: "utf8-lossy")
15
+ df = Polars
16
+ .read_csv(
17
+ input,
18
+ infer_schema_length: 0,
19
+ encoding: "utf8-lossy",
20
+ truncate_ragged_lines: true
21
+ )
16
22
  .rename(->(col) { col.strip })
17
23
 
18
24
  # Strip out empty lines. Unfortunately read_csv does not support the drop_empty_rows
@@ -6,13 +6,13 @@ module GtfsDf
6
6
  #
7
7
  # @param zip_path [String] Path to the GTFS zip file
8
8
  # @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
9
+ # @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
9
10
  # @return [Feed] The loaded GTFS feed
10
- def self.load_from_zip(zip_path, parse_times: false)
11
+ def self.load_from_zip(zip_path, parse_times: false, relevant_files: nil)
11
12
  data = nil
12
13
 
13
- relevant_files = GtfsDf::Feed::GTFS_FILES
14
- .map { |name| "#{name}.txt" }
15
- .to_set
14
+ relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
15
+ relevant_files = relevant_files.to_set
16
16
 
17
17
  seen = {}
18
18
 
@@ -37,7 +37,7 @@ module GtfsDf
37
37
  end
38
38
  end
39
39
 
40
- data = load_from_dir(tmpdir, parse_times: parse_times)
40
+ data = load_from_dir(tmpdir, parse_times:, relevant_files:)
41
41
  end
42
42
 
43
43
  data
@@ -47,12 +47,17 @@ module GtfsDf
47
47
  #
48
48
  # @param dir_path [String] Path to the GTFS directory
49
49
  # @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
50
+ # @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
50
51
  # @return [Feed] The loaded GTFS feed
51
- def self.load_from_dir(dir_path, parse_times: false)
52
+ def self.load_from_dir(dir_path, parse_times: false, relevant_files: nil)
53
+ relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
54
+ relevant_files = relevant_files.to_set
55
+
52
56
  data = {}
53
57
  GtfsDf::Feed::GTFS_FILES.each do |gtfs_file|
54
- path = File.join(dir_path, "#{gtfs_file}.txt")
55
- next unless File.exist?(path)
58
+ basename = "#{gtfs_file}.txt"
59
+ path = File.join(dir_path, basename)
60
+ next unless relevant_files.include?(basename) && File.exist?(path)
56
61
 
57
62
  data[gtfs_file] = data_frame(gtfs_file, path)
58
63
  end
@@ -66,7 +71,11 @@ module GtfsDf
66
71
  end
67
72
 
68
73
  private_class_method def self.has_header?(zip_entry)
69
- zip_entry.get_input_stream.readline.strip != ""
74
+ zip_entry
75
+ .get_input_stream
76
+ .readline
77
+ .delete_prefix("\xEF\xBB\xBF".b) # BOM
78
+ .strip != ""
70
79
  rescue
71
80
  false
72
81
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.11.1"
4
+ VERSION = "0.12.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado