gtfs_df 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0be3962480f99e4a2194d6e7da248d5ef82dc5479491ac807211b3dc4d69415a
4
- data.tar.gz: ceeaf189058006c0db3b26e6438b8c7fc7a0a077a48f192b588b53e74de695e8
3
+ metadata.gz: f128bb120303c7d0a4508f0d036812375893ea8c210a351535199b26f3aa3263
4
+ data.tar.gz: 554926a20907e8dedf02f786daa422a6bce1e3f876074a6b8106682b8a671eeb
5
5
  SHA512:
6
- metadata.gz: 1f27c1b9493aaf2220dfe9369a06a9c5cc26bcae2b9777b0d33bb061a47e1c176eea309580f893fad53dd80b3e7148e84a65ed6f3e0752ac6203c961c991d4ec
7
- data.tar.gz: ad94ff3fa5b2fb69f119fdcac7b8c14df0c2ad7863e42b692d10f5eb0bda151f204618ae8fd7209e9de5ebe0684187530e0e4f789aefaaabf340c8890e9d5df3
6
+ metadata.gz: 140df9b1b30d5210e6212ee8c7ee158217d26e2605ce9da9d0502eec90777ec4a4ee06a81555d858d69b53614af41d153836532aadd1a0053ec539b8caa9337e
7
+ data.tar.gz: 9f4fa2528b9897cac3aefb2019b80068adb2fc7022fd66601a91de284b7888cefafc4f5b2ad3f773ab01bb69849f43231fb6ef08155d0bc40fe1bcae9be5a8b1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,23 @@
1
+ ## [0.10.2] - 2026-03-20
2
+
3
+ ### 🐛 Bug Fixes
4
+
5
+ - Handle nested feeds
6
+
7
+ ### ⚙️ Miscellaneous Tasks
8
+
9
+ - Raise clear message for multi-feed inputs
10
+ ## [0.10.1] - 2026-03-18
11
+
12
+ ### 🐛 Bug Fixes
13
+
14
+ - Sanitize feed input
15
+
16
+ ### ⚙️ Miscellaneous Tasks
17
+
18
+ - Normalize platforms
19
+ - Ignore the examples folder on publishing
20
+ - Bump version to 0.10.1
1
21
  ## [0.10.0] - 2026-03-06
2
22
 
3
23
  ### 🚀 Features
@@ -9,6 +29,10 @@
9
29
  ### 🐛 Bug Fixes
10
30
 
11
31
  - Service dates and busiest week fixes
32
+
33
+ ### ⚙️ Miscellaneous Tasks
34
+
35
+ - Bump version to 0.10.0
12
36
  ## [0.9.3] - 2026-02-27
13
37
 
14
38
  ### 🐛 Bug Fixes
@@ -10,11 +10,30 @@ module GtfsDf
10
10
  def self.load_from_zip(zip_path, parse_times: false)
11
11
  data = nil
12
12
 
13
+ relevant_files = GtfsDf::Feed::GTFS_FILES
14
+ .map { |name| "#{name}.txt" }
15
+ .to_set
16
+
17
+ seen = {}
18
+
13
19
  Dir.mktmpdir do |tmpdir|
14
20
  Zip::File.open(zip_path) do |zip_file|
15
21
  zip_file.each do |entry|
16
- next unless entry.file?
17
- entry.extract(destination_directory: tmpdir)
22
+ # Extract files in nested directories into the root of the tmpdir
23
+ file_name = File.basename(entry.name)
24
+
25
+ if seen[file_name]
26
+ raise GtfsDf::Error, "Found multiple instances of the same file: #{seen[file_name]} and #{entry.name}"
27
+ end
28
+
29
+ # We're skipping:
30
+ # - unrelated files
31
+ # - empty feed files
32
+ next unless relevant_files.include?(file_name) && has_header?(entry)
33
+
34
+ seen[file_name] = entry.name
35
+
36
+ entry.extract(file_name, destination_directory: tmpdir)
18
37
  end
19
38
  end
20
39
 
@@ -45,5 +64,11 @@ module GtfsDf
45
64
  schema_class_name = gtfs_file.split("_").map(&:capitalize).join
46
65
  GtfsDf::Schema.const_get(schema_class_name).new(path).df
47
66
  end
67
+
68
+ private_class_method def self.has_header?(zip_entry)
69
+ zip_entry.get_input_stream.readline.strip != ""
70
+ rescue
71
+ false
72
+ end
48
73
  end
49
74
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.10.0"
4
+ VERSION = "0.10.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado
@@ -90,11 +90,6 @@ files:
90
90
  - devenv.lock
91
91
  - devenv.nix
92
92
  - devenv.yaml
93
- - examples/split-by-agency/.gitignore
94
- - examples/split-by-agency/Gemfile
95
- - examples/split-by-agency/Gemfile.lock
96
- - examples/split-by-agency/README.md
97
- - examples/split-by-agency/split_by_agency.rb
98
93
  - lib/gtfs_df.rb
99
94
  - lib/gtfs_df/base_gtfs_table.rb
100
95
  - lib/gtfs_df/feed.rb
@@ -1 +0,0 @@
1
- output/
@@ -1,5 +0,0 @@
1
- source "https://gem.coop"
2
-
3
- gem "gtfs_df", path: "../.."
4
- gem "optparse"
5
- gem "whirly"
@@ -1,54 +0,0 @@
1
- PATH
2
- remote: ../..
3
- specs:
4
- gtfs_df (0.10.0)
5
- networkx (~> 0.4)
6
- polars-df (~> 0.22, < 0.24)
7
- rubyzip (>= 3.0, < 4.0)
8
-
9
- GEM
10
- remote: https://gem.coop/
11
- specs:
12
- bigdecimal (4.0.1)
13
- json (2.18.0)
14
- matrix (0.4.3)
15
- networkx (0.4.0)
16
- matrix (~> 0.4)
17
- rb_heap (~> 1.0)
18
- optparse (0.8.1)
19
- polars-df (0.23.0-aarch64-linux)
20
- bigdecimal
21
- polars-df (0.23.0-aarch64-linux-musl)
22
- bigdecimal
23
- polars-df (0.23.0-arm64-darwin)
24
- bigdecimal
25
- polars-df (0.23.0-x86_64-darwin)
26
- bigdecimal
27
- polars-df (0.23.0-x86_64-linux)
28
- bigdecimal
29
- polars-df (0.23.0-x86_64-linux-musl)
30
- bigdecimal
31
- rb_heap (1.1.0)
32
- rubyzip (3.2.2)
33
- unicode-display_width (3.2.0)
34
- unicode-emoji (~> 4.1)
35
- unicode-emoji (4.2.0)
36
- whirly (0.4.0)
37
- json
38
- unicode-display_width (>= 1.1)
39
-
40
- PLATFORMS
41
- aarch64-linux
42
- aarch64-linux-musl
43
- arm64-darwin
44
- x86_64-darwin
45
- x86_64-linux
46
- x86_64-linux-musl
47
-
48
- DEPENDENCIES
49
- gtfs_df!
50
- optparse
51
- whirly
52
-
53
- BUNDLED WITH
54
- 2.6.9
@@ -1,26 +0,0 @@
1
- # Split GTFS by Agency Example
2
-
3
- This example demonstrates how to split a GTFS zip file into multiple files, one for each specified `agency_id`, using the `gtfs_df` Ruby gem.
4
-
5
- ## Usage
6
-
7
- ```
8
- bundle install
9
- ruby split_by_agency.rb -i <input-gtfs.zip> --ids agency1,agency2
10
- ```
11
-
12
- - The output files will be written to the `output/` directory, named `<agency_id>.zip`.
13
-
14
- ## Options
15
- - `-i`, `--input PATH` — Path to the input GTFS zip file
16
- - `--ids IDS` — Comma-separated list of agency IDs to extract
17
-
18
- ## Example
19
-
20
- ```
21
- ruby split_by_agency.rb -i ../../spec/fixtures/sample_gtfs.zip --ids DTA,OTA
22
- ```
23
-
24
- ---
25
-
26
- This is a port of the [original Python script](https://gist.github.com/davidmh/f51e5d93a9213e0e606a43167ff87403) using Partridge.
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "optparse"
5
- require "fileutils"
6
- require "gtfs_df"
7
- require "whirly"
8
-
9
- options = {}
10
- OptionParser.new do |opts|
11
- opts.banner = "Usage: split_by_agency.rb -i <input-gtfs.zip> --ids NUMBERS"
12
-
13
- opts.on("-i", "--input PATH", "Path to the input GTFS file") do |v|
14
- options[:input] = v
15
- end
16
- opts.on("--ids IDS", "Comma-separated list of agency_ids") do |v|
17
- options[:ids] = v
18
- end
19
- end.parse!
20
-
21
- unless options[:input] && options[:ids]
22
- warn "Both --input and --ids are required."
23
- exit 1
24
- end
25
-
26
- input_path = File.expand_path(options[:input])
27
- agency_ids = options[:ids].split(",")
28
- output_dir = File.expand_path("./output", __dir__)
29
- FileUtils.mkdir_p(output_dir)
30
-
31
- feed = nil
32
-
33
- Whirly.configure spinner: "dots", stop: "✓"
34
-
35
- Whirly.start do
36
- Whirly.status = "Loading"
37
-
38
- start_time = Time.now
39
- feed = GtfsDf::Reader.load_from_zip(input_path)
40
- elapsed = Time.now - start_time
41
-
42
- Whirly.status = "Loaded (#{elapsed.round(2)}s)"
43
- end
44
-
45
- agency_ids.each do |agency_id|
46
- Whirly.start do
47
- output_path = File.join(output_dir, "#{agency_id}.zip")
48
-
49
- start_time = Time.now
50
-
51
- Whirly.status = "-> #{agency_id} filtering..."
52
- filtered_feed = feed.filter({"agency" => {"agency_id" => agency_id}})
53
-
54
- Whirly.status = "-> #{agency_id} writing..."
55
- GtfsDf::Writer.write_to_zip(filtered_feed, output_path)
56
-
57
- elapsed = Time.now - start_time
58
-
59
- Whirly.status = "-> #{agency_id}.zip (#{elapsed.round(2)}s)"
60
- end
61
- end
62
-
63
- puts "✓ Done, all files are stored in the output/ directory"