gtfs_df 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 205ec058b41c5bd1d2b01ff3950d4cd2ebb20f304d02d5bcd3dd0c447b4e0a6e
4
- data.tar.gz: 54cca637de421c26d2144df100f8430f5dddb639b3a37d1160dfbd9704630e33
3
+ metadata.gz: 140458a6ce1013bef475e0a6cdcab6364cff04b8a18eedb5e5d0244e3bccf38a
4
+ data.tar.gz: c420a34f7004eca9267f32f53038632f822224924eac5b77aa98957bb3149e20
5
5
  SHA512:
6
- metadata.gz: ef853b504ee701e77911259352d0057f6e327db8022370a1fa0dbaa597bfadcac7ac1c8fbe746c04ec4669d86384279a259ac35d4126f1483286937812cb7cce
7
- data.tar.gz: 728d684dd02b653cc779aa5e8152766f5dccafcb097e8fca5810b90f994c442645119f32efb62c84d8fbf1645109f579a3c74f6eb88c9e21dd03faa72193cdd0
6
+ metadata.gz: 032d24ed1df3ed43e5e6953abebbeda70ba5450cab731e931e8548bd058e37d9472edd688a78b0c941e8ff995111b9f616c8aae23e7daba9f3a610813aade528
7
+ data.tar.gz: b808c05aeedea83faf728feded28a38a78ac7a1d2ff139ea36cb0f474f310228792f926a97160df637f9d85dca83272921ed9dff72a632c4aab575163643610d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [0.5.0] - 2025-12-08
2
+
3
+ ### Added
4
+
5
+ - add Feed#filter filter_only_children param
6
+
7
+ ### Maintenance
8
+
9
+ - arrange edges so parent is always first
10
+ - build directed graph
11
+ - allow ! in commit messages
1
12
  ## [0.4.1] - 2025-12-05
2
13
 
3
14
  ### Added
data/lib/gtfs_df/feed.rb CHANGED
@@ -75,13 +75,17 @@ module GtfsDf
75
75
  #
76
76
  # @param view [Hash] The view used to filter the feed, with format { file => filters }.
77
77
  # Example view: { 'routes' => { 'route_id' => '123' }, 'trips' => { 'service_id' => 'A' } }
78
- # @param maintain_trip_dependencies [Boolean] Whether trip dependencies should be preserved.
79
- # By default, we treat trips as the atomic unit of GTFS. Therefore, if we filter to one stop
80
- # referenced by TripA, we will preserve _all stops_ referenced by TripA. However, it is
81
- # occasionally useful to prune bad data and _not_ maintain all trip dependencies.
82
- # For example, if StopA contains invalid coordinates, we may wish to filter it out but keep
83
- # the other stops for TripA. In this case, `maintain_trip_dependencies` should be set to false.
84
- def filter(view, maintain_trip_dependencies = true)
78
+ # @param filter_only_children [Boolean] Whether only child dependencies should be pruned.
79
+ # When false, we:
80
+ # - Treat trips as the atomic unit of GTFS. Therefore, if we filter to one stop
81
+ # referenced by TripA, we will preserve _all stops_ referenced by TripA.
82
+ # - Prune unreferenced parent objects (e.g. route is a parent of trip. Unreferenced routes
83
+ # will be pruned.)
84
+ # When true we:
85
+ # - Do not treat trips as atomic. I can filter stopA without maintaining other stops for
86
+ # trips that reference it.
87
+ # - Only filter child objects
88
+ def filter(view, filter_only_children: false)
85
89
  filtered = {}
86
90
 
87
91
  GTFS_FILES.each do |file|
@@ -91,7 +95,11 @@ module GtfsDf
91
95
  filtered[file] = df
92
96
  end
93
97
 
94
- if maintain_trip_dependencies
98
+ if filter_only_children
99
+ view.each do |file, filters|
100
+ filtered = filter!(file, filters, filtered.dup, filter_only_children: true)
101
+ end
102
+ else
95
103
  # Trips are the atomic unit of GTFS, we will generate a new view
96
104
  # based on the set of trips that would be included for each invidual filter
97
105
  # and cascade changes from this view in order to retain referential integrity
@@ -107,11 +115,7 @@ module GtfsDf
107
115
  end
108
116
 
109
117
  if trip_ids
110
- filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered)
111
- end
112
- else
113
- view.each do |file, filters|
114
- filtered = filter!(file, filters, filtered.dup)
118
+ filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered.dup)
115
119
  end
116
120
  end
117
121
 
@@ -128,7 +132,7 @@ module GtfsDf
128
132
 
129
133
  private
130
134
 
131
- def filter!(file, filters, filtered)
135
+ def filter!(file, filters, filtered, filter_only_children: false)
132
136
  unless filters.empty?
133
137
  df = filtered[file]
134
138
 
@@ -144,7 +148,7 @@ module GtfsDf
144
148
 
145
149
  filtered[file] = df
146
150
 
147
- prune!(file, filtered)
151
+ prune!(file, filtered, filter_only_children:)
148
152
  end
149
153
 
150
154
  filtered
@@ -152,8 +156,9 @@ module GtfsDf
152
156
 
153
157
  # Traverses the grah to prune unreferenced entities from child dataframes
154
158
  # based on parent relationships. See GtfsDf::Graph::STOP_NODES
155
- def prune!(root, filtered)
156
- graph.each_bfs_edge(root) do |parent_node_id, child_node_id|
159
+ def prune!(root, filtered, filter_only_children: false)
160
+ maybe_digraph = filter_only_children ? graph : graph.to_undirected
161
+ maybe_digraph.each_bfs_edge(root) do |parent_node_id, child_node_id|
157
162
  parent_node = Graph::NODES[parent_node_id]
158
163
  child_node = Graph::NODES[child_node_id]
159
164
  parent_df = filtered[parent_node.fetch(:file)]
@@ -173,7 +178,7 @@ module GtfsDf
173
178
  end
174
179
  next unless child_df && child_df.height > 0
175
180
 
176
- attrs = graph.get_edge_data(parent_node_id, child_node_id)
181
+ attrs = maybe_digraph.get_edge_data(parent_node_id, child_node_id)
177
182
 
178
183
  attrs[:dependencies].each do |dep|
179
184
  parent_col = dep[parent_node_id]
data/lib/gtfs_df/graph.rb CHANGED
@@ -42,15 +42,16 @@ module GtfsDf
42
42
 
43
43
  # Returns a directed graph of GTFS file dependencies
44
44
  def self.build
45
- g = NetworkX::Graph.new
45
+ g = NetworkX::DiGraph.new
46
46
  NODES.keys.each { |node| g.add_node(node) }
47
47
 
48
+ # Edges should be parent, child
48
49
  # TODO: Add fare_rules -> stops + test
49
50
  edges = [
50
51
  ["agency", "routes", {dependencies: [
51
52
  {"agency" => "agency_id", "routes" => "agency_id"}
52
53
  ]}],
53
- ["fare_attributes", "agency", {dependencies: [
54
+ ["agency", "fare_attributes", {dependencies: [
54
55
  {"fare_attributes" => "agency_id",
55
56
  "agency" => "agency_id"}
56
57
  ]}],
@@ -58,7 +59,7 @@ module GtfsDf
58
59
  {"fare_attributes" => "fare_id",
59
60
  "fare_rules" => "fare_id"}
60
61
  ]}],
61
- ["fare_rules", "routes", {dependencies: [
62
+ ["routes", "fare_rules", {dependencies: [
62
63
  {"fare_rules" => "route_id", "routes" => "route_id", :allow_null => true}
63
64
  ]}],
64
65
  ["routes", "trips", {dependencies: [
@@ -67,24 +68,24 @@ module GtfsDf
67
68
  ["trips", "stop_times", {dependencies: [
68
69
  {"trips" => "trip_id", "stop_times" => "trip_id"}
69
70
  ]}],
70
- ["stop_times", "stops", {dependencies: [
71
+ ["stops", "stop_times", {dependencies: [
71
72
  {"stop_times" => "stop_id", "stops" => "stop_id"}
72
73
  ]}],
73
74
  # Self-referential edge: stops can reference parent stations (location_type=1)
74
- ["stops", "parent_stations", {dependencies: [
75
+ ["parent_stations", "stops", {dependencies: [
75
76
  {"stops" => "parent_station", "parent_stations" => "stop_id"}
76
77
  ]}],
77
78
  ["stops", "transfers", {dependencies: [
78
79
  {"stops" => "stop_id", "transfers" => "from_stop_id"},
79
80
  {"stops" => "stop_id", "transfers" => "to_stop_id"}
80
81
  ]}],
81
- ["trips", "calendar", {dependencies: [
82
+ ["calendar", "trips", {dependencies: [
82
83
  {"trips" => "service_id", "calendar" => "service_id"}
83
84
  ]}],
84
- ["trips", "calendar_dates", {dependencies: [
85
+ ["calendar_dates", "trips", {dependencies: [
85
86
  {"trips" => "service_id", "calendar_dates" => "service_id"}
86
87
  ]}],
87
- ["trips", "shapes", {dependencies: [
88
+ ["shapes", "trips", {dependencies: [
88
89
  {"trips" => "shape_id", "shapes" => "shape_id"}
89
90
  ]}],
90
91
  ["trips", "frequencies", {dependencies: [
@@ -97,11 +98,11 @@ module GtfsDf
97
98
  {"stops" => "stop_id", "fare_leg_join_rules" => "from_stop_id"},
98
99
  {"stops" => "stop_id", "fare_leg_join_rules" => "to_stop_id"}
99
100
  ]}],
100
- ["fare_leg_join_rules", "networks", {dependencies: [
101
+ ["networks", "fare_leg_join_rules", {dependencies: [
101
102
  {"fare_leg_join_rules" => "from_network_id", "networks" => "network_id"},
102
103
  {"fare_leg_join_rules" => "to_network_id", "networks" => "network_id"}
103
104
  ]}],
104
- ["fare_leg_join_rules", "fare_leg_rules",
105
+ ["fare_leg_rules", "fare_leg_join_rules",
105
106
  {dependencies: [
106
107
  {"fare_leg_join_rules" => "fare_leg_rule_id", "fare_leg_rules" => "fare_leg_rule_id"}
107
108
  ]}],
@@ -110,14 +111,14 @@ module GtfsDf
110
111
  {"fare_transfer_rules" => "from_leg_group_id", "fare_leg_rules" => "leg_group_id"},
111
112
  {"fare_transfer_rules" => "to_leg_group_id", "fare_leg_rules" => "leg_group_id"}
112
113
  ]}],
113
- ["fare_transfer_rules", "fare_products",
114
+ ["fare_products", "fare_transfer_rules",
114
115
  {dependencies: [
115
116
  {"fare_transfer_rules" => "fare_product_id", "fare_products" => "fare_product_id"}
116
117
  ]}],
117
118
  ["areas", "stop_areas", {dependencies: [
118
119
  {"areas" => "area_id", "stop_areas" => "area_id"}
119
120
  ]}],
120
- ["stops", "areas", {dependencies: [
121
+ ["areas", "stops", {dependencies: [
121
122
  {"stops" => "area_id", "areas" => "area_id"}
122
123
  ]}],
123
124
  ["areas", "fare_leg_rules", {dependencies: [
@@ -133,10 +134,10 @@ module GtfsDf
133
134
  ["networks", "fare_leg_rules", {dependencies: [
134
135
  {"networks" => "network_id", "fare_leg_rules" => "network_id"}
135
136
  ]}],
136
- ["route_networks", "routes", {dependencies: [
137
+ ["routes", "route_networks", {dependencies: [
137
138
  {"route_networks" => "route_id", "routes" => "route_id"}
138
139
  ]}],
139
- ["route_networks", "networks", {dependencies: [
140
+ ["networks", "route_networks", {dependencies: [
140
141
  {"route_networks" => "network_id", "networks" => "network_id"}
141
142
  ]}],
142
143
  ["location_groups", "location_group_stops", {dependencies: [
@@ -145,13 +146,13 @@ module GtfsDf
145
146
  ["location_groups", "stops", {dependencies: [
146
147
  {"location_groups" => "location_group_id", "stops" => "location_group_id"}
147
148
  ]}],
148
- ["location_group_stops", "stops", {dependencies: [
149
+ ["stops", "location_group_stops", {dependencies: [
149
150
  {"location_group_stops" => "stop_id", "stops" => "stop_id"}
150
151
  ]}],
151
152
  ["stops", "location_group_stops", {dependencies: [
152
153
  {"stops" => "stop_id", "location_group_stops" => "stop_id"}
153
154
  ]}],
154
- ["location_group_stops", "location_groups", {dependencies: [
155
+ ["location_groups", "location_group_stops", {dependencies: [
155
156
  {"location_group_stops" => "location_group_id", "location_groups" => "location_group_id"}
156
157
  ]}],
157
158
  ["booking_rules", "stop_times", {dependencies: [
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.4.1"
4
+ VERSION = "0.5.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado