gtfs_df 0.11.0 โ†’ 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3258de8699df9b1e0eb934b3edab17a0381536dbdf7bde0275f375854392478d
4
- data.tar.gz: c67a356c1fca9818b89a936dac816826f4a00b50b990d352d59d96d1231a4104
3
+ metadata.gz: 8c1060a2f120fc6f620adc523d8038a05ec6896826fbb950973c14b8a406fc37
4
+ data.tar.gz: 6f75be7bbec96e5f75994fa130f9d599061d796ef702cad6c833d8edb873ef6b
5
5
  SHA512:
6
- metadata.gz: 881cf6027f5e68eda3916153889ac96f27f992ac411564a5232ae6b409b2488d41b596bcc23a6a4492c0a9fc2fce471986dd5ce07ede354a87a4b21033e0d4c8
7
- data.tar.gz: 6f68989b3220ba20c6fc185560f0fedda797d4f0e06735a3c366f1722ecf6ce9fa35698716c0f339852202ceda7b9f4c8ef13fcb95b3e185a83202755d84f00e
6
+ metadata.gz: c8000394c34a67284d16df01e9e236b39d074bd300d1416be8557a7db2387357e860b7c741ff3eb483293895ca16bb3936efd857489124d309ced30a06d248aa
7
+ data.tar.gz: 15a5d19bb1d06ab1c9f21ae0e403ce6187dd370dcf868d20b9ee1de415a2a1ecabb1ae158b5dc10bc6236f329863e661581a2f8b39d0fb9eed50bc9dbad350db
data/CHANGELOG.md CHANGED
@@ -1,8 +1,33 @@
1
+ ## [0.12.0] - 2026-06-12
2
+
3
+ ### ๐Ÿš€ Features
4
+
5
+ - Allow loading a subset of tables
6
+
7
+ ### ๐Ÿ› Bug Fixes
8
+
9
+ - Truncate ragged lines
10
+ - Consider BOM when looking for empty files
11
+ ## [0.11.1] - 2026-04-28
12
+
13
+ ### ๐Ÿ› Bug Fixes
14
+
15
+ - Account for unreferenced calendar entries
16
+
17
+ ### ๐Ÿงช Testing
18
+
19
+ - Validate filtering from feed with empty calendar
20
+ - Validate unreferenced calendar dates
21
+
22
+ ### โš™๏ธ Miscellaneous Tasks
23
+
24
+ - Bump version to 0.11.1
1
25
  ## [0.11.0] - 2026-03-30
2
26
 
3
27
  ### โš™๏ธ Miscellaneous Tasks
4
28
 
5
29
  - [**breaking**] Update ruby-polars dependency
30
+ - Bump version to 0.11.0
6
31
  ## [0.10.2] - 2026-03-20
7
32
 
8
33
  ### ๐Ÿ› Bug Fixes
@@ -12,7 +12,13 @@ module GtfsDf
12
12
  elsif input.is_a?(String)
13
13
  # TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
14
14
  # https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
15
- df = Polars.read_csv(input, infer_schema_length: 0, encoding: "utf8-lossy")
15
+ df = Polars
16
+ .read_csv(
17
+ input,
18
+ infer_schema_length: 0,
19
+ encoding: "utf8-lossy",
20
+ truncate_ragged_lines: true
21
+ )
16
22
  .rename(->(col) { col.strip })
17
23
 
18
24
  # Strip out empty lines. Unfortunately read_csv does not support the drop_empty_rows
data/lib/gtfs_df/feed.rb CHANGED
@@ -432,21 +432,22 @@ module GtfsDf
432
432
  # union semantics across those two parents (a trip is valid if it appears in
433
433
  # either).
434
434
  #
435
- # Here we accumulate valid service_ids across calendar/calendar_dates, but only
436
- # within the pool of trips that are already reachable from structural parents.
435
+ # Accumulate service_ids from each calendar source, then apply the filter.
436
+ # If the filter results in 0 trips, we continue accumulating to allow the next
437
+ # calendar edge to add valid service_ids. This handles feeds where
438
+ # calendar.txt has unreferenced service_ids but all trips use
439
+ # calendar_dates.txt service_ids.
437
440
  accumulated_service_ids = Polars.concat([accumulated_service_ids, valid_values]).unique
438
-
439
- # Determine the base pool of trips:
440
- # - If we've already restricted trips via structural parents (routes,
441
- # stop_times, shapes, etc), use that as the base.
442
- # - Otherwise, like when filtering directly on trips, use the current
443
- # trips dataframe.
444
441
  trips_base_df ||= filtered[child_node.fetch(:file)]
445
442
  next unless trips_base_df && trips_base_df.height > 0
446
443
 
447
- filtered[child_node.fetch(:file)] = trips_base_df.filter(
444
+ filtered_trips = trips_base_df.filter(
448
445
  Polars.col("service_id").is_in(accumulated_service_ids.implode)
449
446
  )
447
+
448
+ if filtered_trips.height > 0
449
+ filtered[child_node.fetch(:file)] = filtered_trips
450
+ end
450
451
  else
451
452
  # Original single-edge logic for all other nodes
452
453
  before = child_df.height
@@ -6,13 +6,13 @@ module GtfsDf
6
6
  #
7
7
  # @param zip_path [String] Path to the GTFS zip file
8
8
  # @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
9
+ # @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
9
10
  # @return [Feed] The loaded GTFS feed
10
- def self.load_from_zip(zip_path, parse_times: false)
11
+ def self.load_from_zip(zip_path, parse_times: false, relevant_files: nil)
11
12
  data = nil
12
13
 
13
- relevant_files = GtfsDf::Feed::GTFS_FILES
14
- .map { |name| "#{name}.txt" }
15
- .to_set
14
+ relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
15
+ relevant_files = relevant_files.to_set
16
16
 
17
17
  seen = {}
18
18
 
@@ -37,7 +37,7 @@ module GtfsDf
37
37
  end
38
38
  end
39
39
 
40
- data = load_from_dir(tmpdir, parse_times: parse_times)
40
+ data = load_from_dir(tmpdir, parse_times:, relevant_files:)
41
41
  end
42
42
 
43
43
  data
@@ -47,12 +47,17 @@ module GtfsDf
47
47
  #
48
48
  # @param dir_path [String] Path to the GTFS directory
49
49
  # @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
50
+ # @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
50
51
  # @return [Feed] The loaded GTFS feed
51
- def self.load_from_dir(dir_path, parse_times: false)
52
+ def self.load_from_dir(dir_path, parse_times: false, relevant_files: nil)
53
+ relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
54
+ relevant_files = relevant_files.to_set
55
+
52
56
  data = {}
53
57
  GtfsDf::Feed::GTFS_FILES.each do |gtfs_file|
54
- path = File.join(dir_path, "#{gtfs_file}.txt")
55
- next unless File.exist?(path)
58
+ basename = "#{gtfs_file}.txt"
59
+ path = File.join(dir_path, basename)
60
+ next unless relevant_files.include?(basename) && File.exist?(path)
56
61
 
57
62
  data[gtfs_file] = data_frame(gtfs_file, path)
58
63
  end
@@ -66,7 +71,11 @@ module GtfsDf
66
71
  end
67
72
 
68
73
  private_class_method def self.has_header?(zip_entry)
69
- zip_entry.get_input_stream.readline.strip != ""
74
+ zip_entry
75
+ .get_input_stream
76
+ .readline
77
+ .delete_prefix("\xEF\xBB\xBF".b) # BOM
78
+ .strip != ""
70
79
  rescue
71
80
  false
72
81
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.11.0"
4
+ VERSION = "0.12.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado