gtfs_df 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/gtfs_df/feed.rb +64 -47
- data/lib/gtfs_df/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c7c1d87e57bbb44ceb4ce8112da7172c5a8c76f8e88d3b1e8fbb610aad850cf3
|
|
4
|
+
data.tar.gz: 6a1b68dfc723d3c70b779687a70100b735c9a57e91603b302685318660473c66
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 85684d79eac1479bac56995cd00a4d22106e5247979e2b54b07dee0d2b1948c4e86bf005be674b57656c0a9c14acf731a11d17163edde72d0302561a2d489159
|
|
7
|
+
data.tar.gz: 7033b279730614870ff8710e51b29ff20f2b399aff9cd1b13a8a2a3202940a88221d9f0edc5dc82a5c3593c9f2314fdc6082fd8c89673f8ec956ba9eada11e2a
|
data/CHANGELOG.md
CHANGED
data/lib/gtfs_df/feed.rb
CHANGED
|
@@ -157,59 +157,76 @@ module GtfsDf
|
|
|
157
157
|
# Traverses the grah to prune unreferenced entities from child dataframes
|
|
158
158
|
# based on parent relationships. See GtfsDf::Graph::STOP_NODES
|
|
159
159
|
def prune!(root, filtered, filter_only_children: false)
|
|
160
|
+
seen_edges = Set.new
|
|
160
161
|
maybe_digraph = filter_only_children ? graph : graph.to_undirected
|
|
161
|
-
maybe_digraph.each_bfs_edge(root) do |parent_node_id, child_node_id|
|
|
162
|
-
parent_node = Graph::NODES[parent_node_id]
|
|
163
|
-
child_node = Graph::NODES[child_node_id]
|
|
164
|
-
parent_df = filtered[parent_node.fetch(:file)]
|
|
165
|
-
next unless parent_df
|
|
166
|
-
|
|
167
|
-
child_df = filtered[child_node.fetch(:file)]
|
|
168
|
-
# Certain nodes are pre-filtered because they reference only
|
|
169
|
-
# a piece of the dataframe
|
|
170
|
-
filter_attrs = child_node[:filter_attrs]
|
|
171
|
-
if filter_attrs && child_df.columns.include?(filter_attrs.fetch(:filter_col))
|
|
172
|
-
filter = filter_attrs.fetch(:filter)
|
|
173
|
-
# Temporarily remove rows that do not match node filter criteria to process them
|
|
174
|
-
# separately (e.g., when filtering stops, parent stations that should be preserved
|
|
175
|
-
# regardless of direct references)
|
|
176
|
-
saved_vals = child_df.filter(filter.is_not)
|
|
177
|
-
child_df = child_df.filter(filter)
|
|
178
|
-
end
|
|
179
|
-
next unless child_df && child_df.height > 0
|
|
180
|
-
|
|
181
|
-
attrs = maybe_digraph.get_edge_data(parent_node_id, child_node_id)
|
|
182
|
-
|
|
183
|
-
attrs[:dependencies].each do |dep|
|
|
184
|
-
parent_col = dep[parent_node_id]
|
|
185
|
-
child_col = dep[child_node_id]
|
|
186
|
-
allow_null = !!dep[:allow_null]
|
|
187
|
-
|
|
188
|
-
next unless parent_col && child_col &&
|
|
189
|
-
parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
|
|
190
|
-
|
|
191
|
-
# Get valid values from parent
|
|
192
|
-
valid_values = parent_df[parent_col].to_a.uniq.compact
|
|
193
162
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
163
|
+
queue = [root]
|
|
164
|
+
|
|
165
|
+
while queue.length > 0
|
|
166
|
+
parent_node_id = queue.shift
|
|
167
|
+
maybe_digraph.adj[parent_node_id].each do |child_node_id, attrs|
|
|
168
|
+
edge = edge_id(parent_node_id, child_node_id)
|
|
169
|
+
|
|
170
|
+
next if seen_edges.include?(edge)
|
|
171
|
+
seen_edges.add(edge)
|
|
172
|
+
|
|
173
|
+
parent_node = Graph::NODES[parent_node_id]
|
|
174
|
+
child_node = Graph::NODES[child_node_id]
|
|
175
|
+
parent_df = filtered[parent_node.fetch(:file)]
|
|
176
|
+
next unless parent_df
|
|
177
|
+
|
|
178
|
+
child_df = filtered[child_node.fetch(:file)]
|
|
179
|
+
# Certain nodes are pre-filtered because they reference only
|
|
180
|
+
# a piece of the dataframe
|
|
181
|
+
filter_attrs = child_node[:filter_attrs]
|
|
182
|
+
if filter_attrs && child_df.columns.include?(filter_attrs.fetch(:filter_col))
|
|
183
|
+
filter = filter_attrs.fetch(:filter)
|
|
184
|
+
# Temporarily remove rows that do not match node filter criteria to process them
|
|
185
|
+
# separately (e.g., when filtering stops, parent stations that should be preserved
|
|
186
|
+
# regardless of direct references)
|
|
187
|
+
saved_vals = child_df.filter(filter.is_not)
|
|
188
|
+
child_df = child_df.filter(filter)
|
|
199
189
|
end
|
|
200
|
-
child_df
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
190
|
+
next unless child_df && child_df.height > 0
|
|
191
|
+
|
|
192
|
+
queue << child_node_id
|
|
193
|
+
|
|
194
|
+
attrs[:dependencies].each do |dep|
|
|
195
|
+
parent_col = dep[parent_node_id]
|
|
196
|
+
child_col = dep[child_node_id]
|
|
197
|
+
allow_null = !!dep[:allow_null]
|
|
198
|
+
|
|
199
|
+
next unless parent_col && child_col &&
|
|
200
|
+
parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
|
|
201
|
+
|
|
202
|
+
# Get valid values from parent
|
|
203
|
+
valid_values = parent_df[parent_col].to_a.uniq.compact
|
|
204
|
+
|
|
205
|
+
# Filter child to only include rows that reference valid parent values
|
|
206
|
+
before = child_df.height
|
|
207
|
+
filter = Polars.col(child_col).is_in(valid_values)
|
|
208
|
+
if allow_null
|
|
209
|
+
filter = (filter | Polars.col(child_col).is_null)
|
|
210
|
+
end
|
|
211
|
+
child_df = child_df.filter(filter)
|
|
212
|
+
changed = child_df.height < before
|
|
213
|
+
|
|
214
|
+
# If we removed a part of the child_df earlier, concat it back on
|
|
215
|
+
if saved_vals
|
|
216
|
+
child_df = Polars.concat([child_df, saved_vals], how: "vertical")
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
if changed
|
|
220
|
+
filtered[child_node.fetch(:file)] = child_df
|
|
221
|
+
end
|
|
210
222
|
end
|
|
211
223
|
end
|
|
212
224
|
end
|
|
213
225
|
end
|
|
226
|
+
|
|
227
|
+
def edge_id(parent, child)
|
|
228
|
+
# Alphabetize to make sure this works with undirected graph
|
|
229
|
+
[parent, child].sort.join("-")
|
|
230
|
+
end
|
|
214
231
|
end
|
|
215
232
|
end
|
data/lib/gtfs_df/version.rb
CHANGED