gtfs_df 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 140458a6ce1013bef475e0a6cdcab6364cff04b8a18eedb5e5d0244e3bccf38a
4
- data.tar.gz: c420a34f7004eca9267f32f53038632f822224924eac5b77aa98957bb3149e20
3
+ metadata.gz: c7c1d87e57bbb44ceb4ce8112da7172c5a8c76f8e88d3b1e8fbb610aad850cf3
4
+ data.tar.gz: 6a1b68dfc723d3c70b779687a70100b735c9a57e91603b302685318660473c66
5
5
  SHA512:
6
- metadata.gz: 032d24ed1df3ed43e5e6953abebbeda70ba5450cab731e931e8548bd058e37d9472edd688a78b0c941e8ff995111b9f616c8aae23e7daba9f3a610813aade528
7
- data.tar.gz: b808c05aeedea83faf728feded28a38a78ac7a1d2ff139ea36cb0f474f310228792f926a97160df637f9d85dca83272921ed9dff72a632c4aab575163643610d
6
+ metadata.gz: 85684d79eac1479bac56995cd00a4d22106e5247979e2b54b07dee0d2b1948c4e86bf005be674b57656c0a9c14acf731a11d17163edde72d0302561a2d489159
7
+ data.tar.gz: 7033b279730614870ff8710e51b29ff20f2b399aff9cd1b13a8a2a3202940a88221d9f0edc5dc82a5c3593c9f2314fdc6082fd8c89673f8ec956ba9eada11e2a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## [0.6.0] - 2025-12-09
2
+ ### Fixed
3
+
4
+ - visit nodes multiple times
1
5
  ## [0.5.0] - 2025-12-08
2
6
 
3
7
  ### Added
data/lib/gtfs_df/feed.rb CHANGED
@@ -157,59 +157,76 @@ module GtfsDf
157
157
  # Traverses the grah to prune unreferenced entities from child dataframes
158
158
  # based on parent relationships. See GtfsDf::Graph::STOP_NODES
159
159
  def prune!(root, filtered, filter_only_children: false)
160
+ seen_edges = Set.new
160
161
  maybe_digraph = filter_only_children ? graph : graph.to_undirected
161
- maybe_digraph.each_bfs_edge(root) do |parent_node_id, child_node_id|
162
- parent_node = Graph::NODES[parent_node_id]
163
- child_node = Graph::NODES[child_node_id]
164
- parent_df = filtered[parent_node.fetch(:file)]
165
- next unless parent_df
166
-
167
- child_df = filtered[child_node.fetch(:file)]
168
- # Certain nodes are pre-filtered because they reference only
169
- # a piece of the dataframe
170
- filter_attrs = child_node[:filter_attrs]
171
- if filter_attrs && child_df.columns.include?(filter_attrs.fetch(:filter_col))
172
- filter = filter_attrs.fetch(:filter)
173
- # Temporarily remove rows that do not match node filter criteria to process them
174
- # separately (e.g., when filtering stops, parent stations that should be preserved
175
- # regardless of direct references)
176
- saved_vals = child_df.filter(filter.is_not)
177
- child_df = child_df.filter(filter)
178
- end
179
- next unless child_df && child_df.height > 0
180
-
181
- attrs = maybe_digraph.get_edge_data(parent_node_id, child_node_id)
182
-
183
- attrs[:dependencies].each do |dep|
184
- parent_col = dep[parent_node_id]
185
- child_col = dep[child_node_id]
186
- allow_null = !!dep[:allow_null]
187
-
188
- next unless parent_col && child_col &&
189
- parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
190
-
191
- # Get valid values from parent
192
- valid_values = parent_df[parent_col].to_a.uniq.compact
193
162
 
194
- # Filter child to only include rows that reference valid parent values
195
- before = child_df.height
196
- filter = Polars.col(child_col).is_in(valid_values)
197
- if allow_null
198
- filter = (filter | Polars.col(child_col).is_null)
163
+ queue = [root]
164
+
165
+ while queue.length > 0
166
+ parent_node_id = queue.shift
167
+ maybe_digraph.adj[parent_node_id].each do |child_node_id, attrs|
168
+ edge = edge_id(parent_node_id, child_node_id)
169
+
170
+ next if seen_edges.include?(edge)
171
+ seen_edges.add(edge)
172
+
173
+ parent_node = Graph::NODES[parent_node_id]
174
+ child_node = Graph::NODES[child_node_id]
175
+ parent_df = filtered[parent_node.fetch(:file)]
176
+ next unless parent_df
177
+
178
+ child_df = filtered[child_node.fetch(:file)]
179
+ # Certain nodes are pre-filtered because they reference only
180
+ # a piece of the dataframe
181
+ filter_attrs = child_node[:filter_attrs]
182
+ if filter_attrs && child_df.columns.include?(filter_attrs.fetch(:filter_col))
183
+ filter = filter_attrs.fetch(:filter)
184
+ # Temporarily remove rows that do not match node filter criteria to process them
185
+ # separately (e.g., when filtering stops, parent stations that should be preserved
186
+ # regardless of direct references)
187
+ saved_vals = child_df.filter(filter.is_not)
188
+ child_df = child_df.filter(filter)
199
189
  end
200
- child_df = child_df.filter(filter)
201
- changed = child_df.height < before
202
-
203
- # If we removed a part of the child_df earlier, concat it back on
204
- if saved_vals
205
- child_df = Polars.concat([child_df, saved_vals], how: "vertical")
206
- end
207
-
208
- if changed
209
- filtered[child_node.fetch(:file)] = child_df
190
+ next unless child_df && child_df.height > 0
191
+
192
+ queue << child_node_id
193
+
194
+ attrs[:dependencies].each do |dep|
195
+ parent_col = dep[parent_node_id]
196
+ child_col = dep[child_node_id]
197
+ allow_null = !!dep[:allow_null]
198
+
199
+ next unless parent_col && child_col &&
200
+ parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
201
+
202
+ # Get valid values from parent
203
+ valid_values = parent_df[parent_col].to_a.uniq.compact
204
+
205
+ # Filter child to only include rows that reference valid parent values
206
+ before = child_df.height
207
+ filter = Polars.col(child_col).is_in(valid_values)
208
+ if allow_null
209
+ filter = (filter | Polars.col(child_col).is_null)
210
+ end
211
+ child_df = child_df.filter(filter)
212
+ changed = child_df.height < before
213
+
214
+ # If we removed a part of the child_df earlier, concat it back on
215
+ if saved_vals
216
+ child_df = Polars.concat([child_df, saved_vals], how: "vertical")
217
+ end
218
+
219
+ if changed
220
+ filtered[child_node.fetch(:file)] = child_df
221
+ end
210
222
  end
211
223
  end
212
224
  end
213
225
  end
226
+
227
+ def edge_id(parent, child)
228
+ # Alphabetize to make sure this works with undirected graph
229
+ [parent, child].sort.join("-")
230
+ end
214
231
  end
215
232
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.5.0"
4
+ VERSION = "0.6.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado