gtfs_df 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/lib/gtfs_df/base_gtfs_table.rb +15 -14
- data/lib/gtfs_df/feed.rb +23 -18
- data/lib/gtfs_df/graph.rb +17 -16
- data/lib/gtfs_df/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 140458a6ce1013bef475e0a6cdcab6364cff04b8a18eedb5e5d0244e3bccf38a
|
|
4
|
+
data.tar.gz: c420a34f7004eca9267f32f53038632f822224924eac5b77aa98957bb3149e20
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 032d24ed1df3ed43e5e6953abebbeda70ba5450cab731e931e8548bd058e37d9472edd688a78b0c941e8ff995111b9f616c8aae23e7daba9f3a610813aade528
|
|
7
|
+
data.tar.gz: b808c05aeedea83faf728feded28a38a78ac7a1d2ff139ea36cb0f474f310228792f926a97160df637f9d85dca83272921ed9dff72a632c4aab575163643610d
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
## [0.5.0] - 2025-12-08
|
|
2
|
+
|
|
3
|
+
### Added
|
|
4
|
+
|
|
5
|
+
- add Feed#filter filter_only_children param
|
|
6
|
+
|
|
7
|
+
### Maintenance
|
|
8
|
+
|
|
9
|
+
- arrange edges so parent is always first
|
|
10
|
+
- build directed graph
|
|
11
|
+
- allow ! in commit messages
|
|
12
|
+
## [0.4.1] - 2025-12-05
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
|
|
16
|
+
- handle extra whitespace in csvs
|
|
17
|
+
|
|
18
|
+
### Maintenance
|
|
19
|
+
|
|
20
|
+
- remove unused initializer format
|
|
1
21
|
## [0.4.0] - 2025-12-04
|
|
2
22
|
|
|
3
23
|
### Added
|
|
@@ -10,20 +10,21 @@ module GtfsDf
|
|
|
10
10
|
if input.is_a?(Polars::DataFrame)
|
|
11
11
|
input
|
|
12
12
|
elsif input.is_a?(String)
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
dtypes =
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
|
|
13
|
+
# TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
|
|
14
|
+
# https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
|
|
15
|
+
df = Polars.read_csv(input, infer_schema_length: 0)
|
|
16
|
+
dtypes = self.class::SCHEMA.slice(*df.columns)
|
|
17
|
+
|
|
18
|
+
df
|
|
19
|
+
.with_columns(dtypes.keys.map do |col|
|
|
20
|
+
stripped = Polars.col(col).str.strip
|
|
21
|
+
Polars.when(stripped.str.len_chars.gt(0))
|
|
22
|
+
.then(stripped)
|
|
23
|
+
.otherwise(Polars.lit(nil))
|
|
24
|
+
end)
|
|
25
|
+
.with_columns(dtypes.map do |name, type|
|
|
26
|
+
Polars.col(name).cast(type)
|
|
27
|
+
end)
|
|
27
28
|
else
|
|
28
29
|
throw GtfsDf::Error, "Unrecognized input"
|
|
29
30
|
end
|
data/lib/gtfs_df/feed.rb
CHANGED
|
@@ -75,13 +75,17 @@ module GtfsDf
|
|
|
75
75
|
#
|
|
76
76
|
# @param view [Hash] The view used to filter the feed, with format { file => filters }.
|
|
77
77
|
# Example view: { 'routes' => { 'route_id' => '123' }, 'trips' => { 'service_id' => 'A' } }
|
|
78
|
-
# @param
|
|
79
|
-
#
|
|
80
|
-
#
|
|
81
|
-
#
|
|
82
|
-
#
|
|
83
|
-
#
|
|
84
|
-
|
|
78
|
+
# @param filter_only_children [Boolean] Whether only child dependencies should be pruned.
|
|
79
|
+
# When false, we:
|
|
80
|
+
# - Treat trips as the atomic unit of GTFS. Therefore, if we filter to one stop
|
|
81
|
+
# referenced by TripA, we will preserve _all stops_ referenced by TripA.
|
|
82
|
+
# - Prune unreferenced parent objects (e.g. route is a parent of trip. Unreferenced routes
|
|
83
|
+
# will be pruned.)
|
|
84
|
+
# When true we:
|
|
85
|
+
# - Do not treat trips as atomic. I can filter stopA without maintaining other stops for
|
|
86
|
+
# trips that reference it.
|
|
87
|
+
# - Only filter child objects
|
|
88
|
+
def filter(view, filter_only_children: false)
|
|
85
89
|
filtered = {}
|
|
86
90
|
|
|
87
91
|
GTFS_FILES.each do |file|
|
|
@@ -91,7 +95,11 @@ module GtfsDf
|
|
|
91
95
|
filtered[file] = df
|
|
92
96
|
end
|
|
93
97
|
|
|
94
|
-
if
|
|
98
|
+
if filter_only_children
|
|
99
|
+
view.each do |file, filters|
|
|
100
|
+
filtered = filter!(file, filters, filtered.dup, filter_only_children: true)
|
|
101
|
+
end
|
|
102
|
+
else
|
|
95
103
|
# Trips are the atomic unit of GTFS, we will generate a new view
|
|
96
104
|
# based on the set of trips that would be included for each invidual filter
|
|
97
105
|
# and cascade changes from this view in order to retain referential integrity
|
|
@@ -107,11 +115,7 @@ module GtfsDf
|
|
|
107
115
|
end
|
|
108
116
|
|
|
109
117
|
if trip_ids
|
|
110
|
-
filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered)
|
|
111
|
-
end
|
|
112
|
-
else
|
|
113
|
-
view.each do |file, filters|
|
|
114
|
-
filtered = filter!(file, filters, filtered.dup)
|
|
118
|
+
filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered.dup)
|
|
115
119
|
end
|
|
116
120
|
end
|
|
117
121
|
|
|
@@ -128,7 +132,7 @@ module GtfsDf
|
|
|
128
132
|
|
|
129
133
|
private
|
|
130
134
|
|
|
131
|
-
def filter!(file, filters, filtered)
|
|
135
|
+
def filter!(file, filters, filtered, filter_only_children: false)
|
|
132
136
|
unless filters.empty?
|
|
133
137
|
df = filtered[file]
|
|
134
138
|
|
|
@@ -144,7 +148,7 @@ module GtfsDf
|
|
|
144
148
|
|
|
145
149
|
filtered[file] = df
|
|
146
150
|
|
|
147
|
-
prune!(file, filtered)
|
|
151
|
+
prune!(file, filtered, filter_only_children:)
|
|
148
152
|
end
|
|
149
153
|
|
|
150
154
|
filtered
|
|
@@ -152,8 +156,9 @@ module GtfsDf
|
|
|
152
156
|
|
|
153
157
|
# Traverses the grah to prune unreferenced entities from child dataframes
|
|
154
158
|
# based on parent relationships. See GtfsDf::Graph::STOP_NODES
|
|
155
|
-
def prune!(root, filtered)
|
|
156
|
-
graph
|
|
159
|
+
def prune!(root, filtered, filter_only_children: false)
|
|
160
|
+
maybe_digraph = filter_only_children ? graph : graph.to_undirected
|
|
161
|
+
maybe_digraph.each_bfs_edge(root) do |parent_node_id, child_node_id|
|
|
157
162
|
parent_node = Graph::NODES[parent_node_id]
|
|
158
163
|
child_node = Graph::NODES[child_node_id]
|
|
159
164
|
parent_df = filtered[parent_node.fetch(:file)]
|
|
@@ -173,7 +178,7 @@ module GtfsDf
|
|
|
173
178
|
end
|
|
174
179
|
next unless child_df && child_df.height > 0
|
|
175
180
|
|
|
176
|
-
attrs =
|
|
181
|
+
attrs = maybe_digraph.get_edge_data(parent_node_id, child_node_id)
|
|
177
182
|
|
|
178
183
|
attrs[:dependencies].each do |dep|
|
|
179
184
|
parent_col = dep[parent_node_id]
|
data/lib/gtfs_df/graph.rb
CHANGED
|
@@ -42,15 +42,16 @@ module GtfsDf
|
|
|
42
42
|
|
|
43
43
|
# Returns a directed graph of GTFS file dependencies
|
|
44
44
|
def self.build
|
|
45
|
-
g = NetworkX::
|
|
45
|
+
g = NetworkX::DiGraph.new
|
|
46
46
|
NODES.keys.each { |node| g.add_node(node) }
|
|
47
47
|
|
|
48
|
+
# Edges should be parent, child
|
|
48
49
|
# TODO: Add fare_rules -> stops + test
|
|
49
50
|
edges = [
|
|
50
51
|
["agency", "routes", {dependencies: [
|
|
51
52
|
{"agency" => "agency_id", "routes" => "agency_id"}
|
|
52
53
|
]}],
|
|
53
|
-
["
|
|
54
|
+
["agency", "fare_attributes", {dependencies: [
|
|
54
55
|
{"fare_attributes" => "agency_id",
|
|
55
56
|
"agency" => "agency_id"}
|
|
56
57
|
]}],
|
|
@@ -58,7 +59,7 @@ module GtfsDf
|
|
|
58
59
|
{"fare_attributes" => "fare_id",
|
|
59
60
|
"fare_rules" => "fare_id"}
|
|
60
61
|
]}],
|
|
61
|
-
["
|
|
62
|
+
["routes", "fare_rules", {dependencies: [
|
|
62
63
|
{"fare_rules" => "route_id", "routes" => "route_id", :allow_null => true}
|
|
63
64
|
]}],
|
|
64
65
|
["routes", "trips", {dependencies: [
|
|
@@ -67,24 +68,24 @@ module GtfsDf
|
|
|
67
68
|
["trips", "stop_times", {dependencies: [
|
|
68
69
|
{"trips" => "trip_id", "stop_times" => "trip_id"}
|
|
69
70
|
]}],
|
|
70
|
-
["
|
|
71
|
+
["stops", "stop_times", {dependencies: [
|
|
71
72
|
{"stop_times" => "stop_id", "stops" => "stop_id"}
|
|
72
73
|
]}],
|
|
73
74
|
# Self-referential edge: stops can reference parent stations (location_type=1)
|
|
74
|
-
["
|
|
75
|
+
["parent_stations", "stops", {dependencies: [
|
|
75
76
|
{"stops" => "parent_station", "parent_stations" => "stop_id"}
|
|
76
77
|
]}],
|
|
77
78
|
["stops", "transfers", {dependencies: [
|
|
78
79
|
{"stops" => "stop_id", "transfers" => "from_stop_id"},
|
|
79
80
|
{"stops" => "stop_id", "transfers" => "to_stop_id"}
|
|
80
81
|
]}],
|
|
81
|
-
["
|
|
82
|
+
["calendar", "trips", {dependencies: [
|
|
82
83
|
{"trips" => "service_id", "calendar" => "service_id"}
|
|
83
84
|
]}],
|
|
84
|
-
["
|
|
85
|
+
["calendar_dates", "trips", {dependencies: [
|
|
85
86
|
{"trips" => "service_id", "calendar_dates" => "service_id"}
|
|
86
87
|
]}],
|
|
87
|
-
["
|
|
88
|
+
["shapes", "trips", {dependencies: [
|
|
88
89
|
{"trips" => "shape_id", "shapes" => "shape_id"}
|
|
89
90
|
]}],
|
|
90
91
|
["trips", "frequencies", {dependencies: [
|
|
@@ -97,11 +98,11 @@ module GtfsDf
|
|
|
97
98
|
{"stops" => "stop_id", "fare_leg_join_rules" => "from_stop_id"},
|
|
98
99
|
{"stops" => "stop_id", "fare_leg_join_rules" => "to_stop_id"}
|
|
99
100
|
]}],
|
|
100
|
-
["
|
|
101
|
+
["networks", "fare_leg_join_rules", {dependencies: [
|
|
101
102
|
{"fare_leg_join_rules" => "from_network_id", "networks" => "network_id"},
|
|
102
103
|
{"fare_leg_join_rules" => "to_network_id", "networks" => "network_id"}
|
|
103
104
|
]}],
|
|
104
|
-
["
|
|
105
|
+
["fare_leg_rules", "fare_leg_join_rules",
|
|
105
106
|
{dependencies: [
|
|
106
107
|
{"fare_leg_join_rules" => "fare_leg_rule_id", "fare_leg_rules" => "fare_leg_rule_id"}
|
|
107
108
|
]}],
|
|
@@ -110,14 +111,14 @@ module GtfsDf
|
|
|
110
111
|
{"fare_transfer_rules" => "from_leg_group_id", "fare_leg_rules" => "leg_group_id"},
|
|
111
112
|
{"fare_transfer_rules" => "to_leg_group_id", "fare_leg_rules" => "leg_group_id"}
|
|
112
113
|
]}],
|
|
113
|
-
["
|
|
114
|
+
["fare_products", "fare_transfer_rules",
|
|
114
115
|
{dependencies: [
|
|
115
116
|
{"fare_transfer_rules" => "fare_product_id", "fare_products" => "fare_product_id"}
|
|
116
117
|
]}],
|
|
117
118
|
["areas", "stop_areas", {dependencies: [
|
|
118
119
|
{"areas" => "area_id", "stop_areas" => "area_id"}
|
|
119
120
|
]}],
|
|
120
|
-
["
|
|
121
|
+
["areas", "stops", {dependencies: [
|
|
121
122
|
{"stops" => "area_id", "areas" => "area_id"}
|
|
122
123
|
]}],
|
|
123
124
|
["areas", "fare_leg_rules", {dependencies: [
|
|
@@ -133,10 +134,10 @@ module GtfsDf
|
|
|
133
134
|
["networks", "fare_leg_rules", {dependencies: [
|
|
134
135
|
{"networks" => "network_id", "fare_leg_rules" => "network_id"}
|
|
135
136
|
]}],
|
|
136
|
-
["
|
|
137
|
+
["routes", "route_networks", {dependencies: [
|
|
137
138
|
{"route_networks" => "route_id", "routes" => "route_id"}
|
|
138
139
|
]}],
|
|
139
|
-
["
|
|
140
|
+
["networks", "route_networks", {dependencies: [
|
|
140
141
|
{"route_networks" => "network_id", "networks" => "network_id"}
|
|
141
142
|
]}],
|
|
142
143
|
["location_groups", "location_group_stops", {dependencies: [
|
|
@@ -145,13 +146,13 @@ module GtfsDf
|
|
|
145
146
|
["location_groups", "stops", {dependencies: [
|
|
146
147
|
{"location_groups" => "location_group_id", "stops" => "location_group_id"}
|
|
147
148
|
]}],
|
|
148
|
-
["
|
|
149
|
+
["stops", "location_group_stops", {dependencies: [
|
|
149
150
|
{"location_group_stops" => "stop_id", "stops" => "stop_id"}
|
|
150
151
|
]}],
|
|
151
152
|
["stops", "location_group_stops", {dependencies: [
|
|
152
153
|
{"stops" => "stop_id", "location_group_stops" => "stop_id"}
|
|
153
154
|
]}],
|
|
154
|
-
["
|
|
155
|
+
["location_groups", "location_group_stops", {dependencies: [
|
|
155
156
|
{"location_group_stops" => "location_group_id", "location_groups" => "location_group_id"}
|
|
156
157
|
]}],
|
|
157
158
|
["booking_rules", "stop_times", {dependencies: [
|
data/lib/gtfs_df/version.rb
CHANGED