gtfs_df 0.9.2 โ†’ 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01e5ed5d1ce0ac7df81fe984b7bc8ca7716f0010672b7c3738d336ed1fbeaee0
4
- data.tar.gz: 6f3a21037faaa76aadf9747bba7df94d7fdac9bb57e0b458f5f17696a7158d67
3
+ metadata.gz: 9c0746b1937afcfb7000425a59976b3eba8662778437ddf3adc02ec7729c3a49
4
+ data.tar.gz: e3a553aef868b4c29e06731f0a5e3984efc067a096001e90150743a80f3d45bc
5
5
  SHA512:
6
- metadata.gz: 1a50d3af43551dda4a03a829ca1d4fd21e323c771181ee2238f9bcea41f4cb0e883d1d543bc26024d2c31d39456212e85bcd6beb6c2935ae98993f34cfd8816c
7
- data.tar.gz: 7e1ef2009d75b6dc3f0b82d90183cb048a6a15db74ac7c4076e9acd264fed013a6920e65c95021b59bdfb263fbd4c014b9764b22f396f4957fcec0c7820cea87
6
+ metadata.gz: 0fccae16bb46da6db651da04ea2591fa17e991219d4d8eb9be3a9444ed0b1b190b315f9223b738f399f3b6fade41c2c2e1081d1aad921a39483426d5b86b1aca
7
+ data.tar.gz: b98eccb0861c46d8510eaaea6a97c48d6b1fa3687d1c35d445edbf19c349a6c03d45c3a2654da20ab577aa083003eff5335a8fcad6ee0d9f6f7f86a78c2ed33b
data/.conform.yaml CHANGED
@@ -24,3 +24,4 @@ policies:
24
24
  - docs
25
25
  - ci
26
26
  - qol
27
+ - test
data/CHANGELOG.md CHANGED
@@ -1,8 +1,23 @@
1
- ## [0.9.2] - 2026-02-21
1
+ ## [0.9.3] - 2026-02-27
2
+
3
+ ### ๐Ÿ› Bug Fixes
4
+
5
+ - Allow multiple filters
6
+ - Refactor prune to keep caledar_dates-only dependencies
2
7
 
3
8
  ### ๐Ÿ“š Documentation
4
9
 
5
10
  - Add Brooke to the list of authors
11
+
12
+ ### ๐Ÿงช Testing
13
+
14
+ - Ensure we don't drop trips and routes
15
+
16
+ ### โš™๏ธ Miscellaneous Tasks
17
+
18
+ - Bump version to 0.9.2
19
+ - Avoid converting series into arrays
20
+ - Simplify trip pool reduction
6
21
  ## [0.9.1] - 2026-02-17
7
22
 
8
23
  ### ๐Ÿ› Bug Fixes
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- gtfs_df (0.9.2)
4
+ gtfs_df (0.9.3)
5
5
  networkx (~> 0.4)
6
6
  polars-df (~> 0.22, < 0.24)
7
7
  rubyzip (>= 3.0, < 4.0)
data/lib/gtfs_df/feed.rb CHANGED
@@ -121,19 +121,19 @@ module GtfsDf
121
121
  # Trips are the atomic unit of GTFS, we will generate a new view
122
122
  # based on the set of trips that would be included for each invidual filter
123
123
  # and cascade changes from this view in order to retain referential integrity
124
- trip_ids = nil
124
+ trip_ids = Polars::Series.new.alias("trip_id")
125
125
 
126
126
  view.each do |file, filters|
127
127
  new_filtered = filter!(file, filters, filtered.dup)
128
- trip_ids = if trip_ids.nil?
128
+ trip_ids = if trip_ids.empty?
129
129
  new_filtered["trips"]["trip_id"]
130
130
  else
131
- trip_ids & new_filtered["trips"]["trip_id"]
131
+ trip_ids.filter(trip_ids.is_in(new_filtered["trips"]["trip_id"]))
132
132
  end
133
133
  end
134
134
 
135
135
  if trip_ids
136
- filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered.dup)
136
+ filtered = filter!("trips", {"trip_id" => trip_ids.implode}, filtered.dup)
137
137
  end
138
138
  end
139
139
 
@@ -181,7 +181,7 @@ module GtfsDf
181
181
  df = filtered[file]
182
182
 
183
183
  filters.each do |col, val|
184
- df = if val.is_a?(Array)
184
+ df = if val.is_a?(Polars::Series) || val.is_a?(Array)
185
185
  df.filter(Polars.col(col).is_in(val))
186
186
  elsif val.respond_to?(:call)
187
187
  df.filter(val.call(Polars.col(col)))
@@ -200,9 +200,16 @@ module GtfsDf
200
200
 
201
201
  # Traverses the graph to prune unreferenced entities from child dataframes
202
202
  # based on parent relationships. See GtfsDf::Graph::STOP_NODES
203
+ #
204
+ # The trips table has multiple parents (calendar, calendar_dates, routes,
205
+ # stop_times). We accumulate valid values from all of them and keep rows
206
+ # that match any parent, so trips referenced only via calendar_dates are
207
+ # not dropped when another edge is processed first.
203
208
  def prune!(root, filtered, filter_only_children: false)
204
209
  seen_edges = Set.new
205
210
  rerooted_graph = Graph.build(bidirectional: !filter_only_children)
211
+ accumulated_service_ids = Polars::Series.new("service_id", dtype: Polars::String)
212
+ trips_base_df = nil
206
213
 
207
214
  queue = [root]
208
215
 
@@ -245,37 +252,46 @@ module GtfsDf
245
252
  attrs[:dependencies].each do |dep|
246
253
  parent_col = dep[parent_node_id]
247
254
  child_col = dep[child_node_id]
248
- allow_null = !!dep[:allow_null]
255
+ allow_null_flag = !!dep[:allow_null]
249
256
 
250
257
  next unless parent_col && child_col &&
251
258
  parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
252
259
 
253
260
  # Get valid values from parent
254
- valid_values = parent_df[parent_col].to_a.uniq.compact
255
-
256
- # Annoying special case to make sure that if we have a calendar with exceptions,
257
- # the calendar_dates file doesn't end up pruning other files
258
- if parent_node_id == "calendar_dates" && parent_col == "service_id" &&
259
- filtered["calendar"]
260
- valid_values = (valid_values + calendar["service_id"].to_a).uniq
261
- end
262
-
263
- # Filter child to only include rows that reference valid parent values
264
- before = child_df.height
265
- filter = Polars.col(child_col).is_in(valid_values)
266
- if allow_null
267
- filter = (filter | Polars.col(child_col).is_null)
268
- end
269
- child_df = child_df.filter(filter)
270
- changed = child_df.height < before
271
-
272
- # If we removed a part of the child_df earlier, concat it back on
273
- if saved_vals
274
- child_df = Polars.concat([child_df, saved_vals], how: "vertical")
275
- end
276
-
277
- if changed
278
- filtered[child_node.fetch(:file)] = child_df
261
+ valid_values = parent_df[parent_col].drop_nulls.unique
262
+
263
+ if child_node_id == "trips" && (parent_node_id == "calendar" || parent_node_id == "calendar_dates")
264
+ # Calendar + calendar_dates both define service for the same trips, so we want
265
+ # union semantics across those two parents (a trip is valid if it appears in
266
+ # either).
267
+ #
268
+ # Here we accumulate valid service_ids across calendar/calendar_dates, but only
269
+ # within the pool of trips that are already reachable from structural parents.
270
+ accumulated_service_ids = Polars.concat([accumulated_service_ids, valid_values]).unique
271
+
272
+ # Determine the base pool of trips:
273
+ # - If we've already restricted trips via structural parents (routes,
274
+ # stop_times, shapes, etc), use that as the base.
275
+ # - Otherwise, like when filtering directly on trips, use the current
276
+ # trips dataframe.
277
+ trips_base_df ||= filtered[child_node.fetch(:file)]
278
+ next unless trips_base_df && trips_base_df.height > 0
279
+
280
+ filtered[child_node.fetch(:file)] = trips_base_df.filter(
281
+ Polars.col("service_id").is_in(accumulated_service_ids.implode)
282
+ )
283
+ else
284
+ # Original single-edge logic for all other nodes
285
+ before = child_df.height
286
+
287
+ cond = Polars.col(child_col).is_in(valid_values.implode)
288
+ cond = (cond | Polars.col(child_col).is_null) if allow_null_flag
289
+ child_df = child_df.filter(cond)
290
+
291
+ if child_df.height < before
292
+ child_df = Polars.concat([child_df, saved_vals], how: "vertical") if saved_vals
293
+ filtered[child_node.fetch(:file)] = child_df
294
+ end
279
295
  end
280
296
  end
281
297
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.9.2"
4
+ VERSION = "0.9.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado