gtfs_df 0.9.2 โ 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.conform.yaml +1 -0
- data/CHANGELOG.md +16 -1
- data/examples/split-by-agency/Gemfile.lock +1 -1
- data/lib/gtfs_df/feed.rb +47 -31
- data/lib/gtfs_df/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9c0746b1937afcfb7000425a59976b3eba8662778437ddf3adc02ec7729c3a49
|
|
4
|
+
data.tar.gz: e3a553aef868b4c29e06731f0a5e3984efc067a096001e90150743a80f3d45bc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0fccae16bb46da6db651da04ea2591fa17e991219d4d8eb9be3a9444ed0b1b190b315f9223b738f399f3b6fade41c2c2e1081d1aad921a39483426d5b86b1aca
|
|
7
|
+
data.tar.gz: b98eccb0861c46d8510eaaea6a97c48d6b1fa3687d1c35d445edbf19c349a6c03d45c3a2654da20ab577aa083003eff5335a8fcad6ee0d9f6f7f86a78c2ed33b
|
data/.conform.yaml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,8 +1,23 @@
|
|
|
1
|
-
## [0.9.
|
|
1
|
+
## [0.9.3] - 2026-02-27
|
|
2
|
+
|
|
3
|
+
### ๐ Bug Fixes
|
|
4
|
+
|
|
5
|
+
- Allow multiple filters
|
|
6
|
+
- Refactor prune to keep caledar_dates-only dependencies
|
|
2
7
|
|
|
3
8
|
### ๐ Documentation
|
|
4
9
|
|
|
5
10
|
- Add Brooke to the list of authors
|
|
11
|
+
|
|
12
|
+
### ๐งช Testing
|
|
13
|
+
|
|
14
|
+
- Ensure we don't drop trips and routes
|
|
15
|
+
|
|
16
|
+
### โ๏ธ Miscellaneous Tasks
|
|
17
|
+
|
|
18
|
+
- Bump version to 0.9.2
|
|
19
|
+
- Avoid converting series into arrays
|
|
20
|
+
- Simplify trip pool reduction
|
|
6
21
|
## [0.9.1] - 2026-02-17
|
|
7
22
|
|
|
8
23
|
### ๐ Bug Fixes
|
data/lib/gtfs_df/feed.rb
CHANGED
|
@@ -121,19 +121,19 @@ module GtfsDf
|
|
|
121
121
|
# Trips are the atomic unit of GTFS, we will generate a new view
|
|
122
122
|
# based on the set of trips that would be included for each invidual filter
|
|
123
123
|
# and cascade changes from this view in order to retain referential integrity
|
|
124
|
-
trip_ids =
|
|
124
|
+
trip_ids = Polars::Series.new.alias("trip_id")
|
|
125
125
|
|
|
126
126
|
view.each do |file, filters|
|
|
127
127
|
new_filtered = filter!(file, filters, filtered.dup)
|
|
128
|
-
trip_ids = if trip_ids.
|
|
128
|
+
trip_ids = if trip_ids.empty?
|
|
129
129
|
new_filtered["trips"]["trip_id"]
|
|
130
130
|
else
|
|
131
|
-
trip_ids
|
|
131
|
+
trip_ids.filter(trip_ids.is_in(new_filtered["trips"]["trip_id"]))
|
|
132
132
|
end
|
|
133
133
|
end
|
|
134
134
|
|
|
135
135
|
if trip_ids
|
|
136
|
-
filtered = filter!("trips", {"trip_id" => trip_ids.
|
|
136
|
+
filtered = filter!("trips", {"trip_id" => trip_ids.implode}, filtered.dup)
|
|
137
137
|
end
|
|
138
138
|
end
|
|
139
139
|
|
|
@@ -181,7 +181,7 @@ module GtfsDf
|
|
|
181
181
|
df = filtered[file]
|
|
182
182
|
|
|
183
183
|
filters.each do |col, val|
|
|
184
|
-
df = if val.is_a?(Array)
|
|
184
|
+
df = if val.is_a?(Polars::Series) || val.is_a?(Array)
|
|
185
185
|
df.filter(Polars.col(col).is_in(val))
|
|
186
186
|
elsif val.respond_to?(:call)
|
|
187
187
|
df.filter(val.call(Polars.col(col)))
|
|
@@ -200,9 +200,16 @@ module GtfsDf
|
|
|
200
200
|
|
|
201
201
|
# Traverses the graph to prune unreferenced entities from child dataframes
|
|
202
202
|
# based on parent relationships. See GtfsDf::Graph::STOP_NODES
|
|
203
|
+
#
|
|
204
|
+
# The trips table has multiple parents (calendar, calendar_dates, routes,
|
|
205
|
+
# stop_times). We accumulate valid values from all of them and keep rows
|
|
206
|
+
# that match any parent, so trips referenced only via calendar_dates are
|
|
207
|
+
# not dropped when another edge is processed first.
|
|
203
208
|
def prune!(root, filtered, filter_only_children: false)
|
|
204
209
|
seen_edges = Set.new
|
|
205
210
|
rerooted_graph = Graph.build(bidirectional: !filter_only_children)
|
|
211
|
+
accumulated_service_ids = Polars::Series.new("service_id", dtype: Polars::String)
|
|
212
|
+
trips_base_df = nil
|
|
206
213
|
|
|
207
214
|
queue = [root]
|
|
208
215
|
|
|
@@ -245,37 +252,46 @@ module GtfsDf
|
|
|
245
252
|
attrs[:dependencies].each do |dep|
|
|
246
253
|
parent_col = dep[parent_node_id]
|
|
247
254
|
child_col = dep[child_node_id]
|
|
248
|
-
|
|
255
|
+
allow_null_flag = !!dep[:allow_null]
|
|
249
256
|
|
|
250
257
|
next unless parent_col && child_col &&
|
|
251
258
|
parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
|
|
252
259
|
|
|
253
260
|
# Get valid values from parent
|
|
254
|
-
valid_values = parent_df[parent_col].
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
261
|
+
valid_values = parent_df[parent_col].drop_nulls.unique
|
|
262
|
+
|
|
263
|
+
if child_node_id == "trips" && (parent_node_id == "calendar" || parent_node_id == "calendar_dates")
|
|
264
|
+
# Calendar + calendar_dates both define service for the same trips, so we want
|
|
265
|
+
# union semantics across those two parents (a trip is valid if it appears in
|
|
266
|
+
# either).
|
|
267
|
+
#
|
|
268
|
+
# Here we accumulate valid service_ids across calendar/calendar_dates, but only
|
|
269
|
+
# within the pool of trips that are already reachable from structural parents.
|
|
270
|
+
accumulated_service_ids = Polars.concat([accumulated_service_ids, valid_values]).unique
|
|
271
|
+
|
|
272
|
+
# Determine the base pool of trips:
|
|
273
|
+
# - If we've already restricted trips via structural parents (routes,
|
|
274
|
+
# stop_times, shapes, etc), use that as the base.
|
|
275
|
+
# - Otherwise, like when filtering directly on trips, use the current
|
|
276
|
+
# trips dataframe.
|
|
277
|
+
trips_base_df ||= filtered[child_node.fetch(:file)]
|
|
278
|
+
next unless trips_base_df && trips_base_df.height > 0
|
|
279
|
+
|
|
280
|
+
filtered[child_node.fetch(:file)] = trips_base_df.filter(
|
|
281
|
+
Polars.col("service_id").is_in(accumulated_service_ids.implode)
|
|
282
|
+
)
|
|
283
|
+
else
|
|
284
|
+
# Original single-edge logic for all other nodes
|
|
285
|
+
before = child_df.height
|
|
286
|
+
|
|
287
|
+
cond = Polars.col(child_col).is_in(valid_values.implode)
|
|
288
|
+
cond = (cond | Polars.col(child_col).is_null) if allow_null_flag
|
|
289
|
+
child_df = child_df.filter(cond)
|
|
290
|
+
|
|
291
|
+
if child_df.height < before
|
|
292
|
+
child_df = Polars.concat([child_df, saved_vals], how: "vertical") if saved_vals
|
|
293
|
+
filtered[child_node.fetch(:file)] = child_df
|
|
294
|
+
end
|
|
279
295
|
end
|
|
280
296
|
end
|
|
281
297
|
end
|
data/lib/gtfs_df/version.rb
CHANGED