gtfs_df 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/gtfs_df/base_gtfs_table.rb +7 -1
- data/lib/gtfs_df/reader.rb +18 -9
- data/lib/gtfs_df/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8c1060a2f120fc6f620adc523d8038a05ec6896826fbb950973c14b8a406fc37
|
|
4
|
+
data.tar.gz: 6f75be7bbec96e5f75994fa130f9d599061d796ef702cad6c833d8edb873ef6b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c8000394c34a67284d16df01e9e236b39d074bd300d1416be8557a7db2387357e860b7c741ff3eb483293895ca16bb3936efd857489124d309ced30a06d248aa
|
|
7
|
+
data.tar.gz: 15a5d19bb1d06ab1c9f21ae0e403ce6187dd370dcf868d20b9ee1de415a2a1ecabb1ae158b5dc10bc6236f329863e661581a2f8b39d0fb9eed50bc9dbad350db
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
## [0.12.0] - 2026-06-12
|
|
2
|
+
|
|
3
|
+
### 🚀 Features
|
|
4
|
+
|
|
5
|
+
- Allow loading a subset of tables
|
|
6
|
+
|
|
7
|
+
### 🐛 Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Truncate ragged lines
|
|
10
|
+
- Consider BOM when looking for empty files
|
|
1
11
|
## [0.11.1] - 2026-04-28
|
|
2
12
|
|
|
3
13
|
### 🐛 Bug Fixes
|
|
@@ -8,6 +18,10 @@
|
|
|
8
18
|
|
|
9
19
|
- Validate filtering from feed with empty calendar
|
|
10
20
|
- Validate unreferenced calendar dates
|
|
21
|
+
|
|
22
|
+
### ⚙️ Miscellaneous Tasks
|
|
23
|
+
|
|
24
|
+
- Bump version to 0.11.1
|
|
11
25
|
## [0.11.0] - 2026-03-30
|
|
12
26
|
|
|
13
27
|
### ⚙️ Miscellaneous Tasks
|
|
@@ -12,7 +12,13 @@ module GtfsDf
|
|
|
12
12
|
elsif input.is_a?(String)
|
|
13
13
|
# TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
|
|
14
14
|
# https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
|
|
15
|
-
df = Polars
|
|
15
|
+
df = Polars
|
|
16
|
+
.read_csv(
|
|
17
|
+
input,
|
|
18
|
+
infer_schema_length: 0,
|
|
19
|
+
encoding: "utf8-lossy",
|
|
20
|
+
truncate_ragged_lines: true
|
|
21
|
+
)
|
|
16
22
|
.rename(->(col) { col.strip })
|
|
17
23
|
|
|
18
24
|
# Strip out empty lines. Unfortunately read_csv does not support the drop_empty_rows
|
data/lib/gtfs_df/reader.rb
CHANGED
|
@@ -6,13 +6,13 @@ module GtfsDf
|
|
|
6
6
|
#
|
|
7
7
|
# @param zip_path [String] Path to the GTFS zip file
|
|
8
8
|
# @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
|
|
9
|
+
# @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
|
|
9
10
|
# @return [Feed] The loaded GTFS feed
|
|
10
|
-
def self.load_from_zip(zip_path, parse_times: false)
|
|
11
|
+
def self.load_from_zip(zip_path, parse_times: false, relevant_files: nil)
|
|
11
12
|
data = nil
|
|
12
13
|
|
|
13
|
-
relevant_files
|
|
14
|
-
|
|
15
|
-
.to_set
|
|
14
|
+
relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
|
|
15
|
+
relevant_files = relevant_files.to_set
|
|
16
16
|
|
|
17
17
|
seen = {}
|
|
18
18
|
|
|
@@ -37,7 +37,7 @@ module GtfsDf
|
|
|
37
37
|
end
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
data = load_from_dir(tmpdir, parse_times:
|
|
40
|
+
data = load_from_dir(tmpdir, parse_times:, relevant_files:)
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
data
|
|
@@ -47,12 +47,17 @@ module GtfsDf
|
|
|
47
47
|
#
|
|
48
48
|
# @param dir_path [String] Path to the GTFS directory
|
|
49
49
|
# @param parse_times [Boolean] Whether to parse time fields to seconds since midnight (default: false)
|
|
50
|
+
# @param relevant_files [Array<String>] A list of file names, useful to avoid loading tables you don't care about.
|
|
50
51
|
# @return [Feed] The loaded GTFS feed
|
|
51
|
-
def self.load_from_dir(dir_path, parse_times: false)
|
|
52
|
+
def self.load_from_dir(dir_path, parse_times: false, relevant_files: nil)
|
|
53
|
+
relevant_files ||= GtfsDf::Feed::GTFS_FILES.map { |name| "#{name}.txt" }
|
|
54
|
+
relevant_files = relevant_files.to_set
|
|
55
|
+
|
|
52
56
|
data = {}
|
|
53
57
|
GtfsDf::Feed::GTFS_FILES.each do |gtfs_file|
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
basename = "#{gtfs_file}.txt"
|
|
59
|
+
path = File.join(dir_path, basename)
|
|
60
|
+
next unless relevant_files.include?(basename) && File.exist?(path)
|
|
56
61
|
|
|
57
62
|
data[gtfs_file] = data_frame(gtfs_file, path)
|
|
58
63
|
end
|
|
@@ -66,7 +71,11 @@ module GtfsDf
|
|
|
66
71
|
end
|
|
67
72
|
|
|
68
73
|
private_class_method def self.has_header?(zip_entry)
|
|
69
|
-
zip_entry
|
|
74
|
+
zip_entry
|
|
75
|
+
.get_input_stream
|
|
76
|
+
.readline
|
|
77
|
+
.delete_prefix("\xEF\xBB\xBF".b) # BOM
|
|
78
|
+
.strip != ""
|
|
70
79
|
rescue
|
|
71
80
|
false
|
|
72
81
|
end
|
data/lib/gtfs_df/version.rb
CHANGED