RubyGems - gtfs_df - Versions diffs - 0.9.2 → 0.9.3 - Mend

gtfs_df 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/.conform.yaml +1 -0
data/CHANGELOG.md +16 -1
data/examples/split-by-agency/Gemfile.lock +1 -1
data/lib/gtfs_df/feed.rb +47 -31
data/lib/gtfs_df/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 01e5ed5d1ce0ac7df81fe984b7bc8ca7716f0010672b7c3738d336ed1fbeaee0
-  data.tar.gz: 6f3a21037faaa76aadf9747bba7df94d7fdac9bb57e0b458f5f17696a7158d67
+  metadata.gz: 9c0746b1937afcfb7000425a59976b3eba8662778437ddf3adc02ec7729c3a49
+  data.tar.gz: e3a553aef868b4c29e06731f0a5e3984efc067a096001e90150743a80f3d45bc
 SHA512:
-  metadata.gz: 1a50d3af43551dda4a03a829ca1d4fd21e323c771181ee2238f9bcea41f4cb0e883d1d543bc26024d2c31d39456212e85bcd6beb6c2935ae98993f34cfd8816c
-  data.tar.gz: 7e1ef2009d75b6dc3f0b82d90183cb048a6a15db74ac7c4076e9acd264fed013a6920e65c95021b59bdfb263fbd4c014b9764b22f396f4957fcec0c7820cea87
+  metadata.gz: 0fccae16bb46da6db651da04ea2591fa17e991219d4d8eb9be3a9444ed0b1b190b315f9223b738f399f3b6fade41c2c2e1081d1aad921a39483426d5b86b1aca
+  data.tar.gz: b98eccb0861c46d8510eaaea6a97c48d6b1fa3687d1c35d445edbf19c349a6c03d45c3a2654da20ab577aa083003eff5335a8fcad6ee0d9f6f7f86a78c2ed33b

data/.conform.yaml CHANGED Viewed

@@ -24,3 +24,4 @@ policies:
                   - docs
                   - ci
                   - qol
+                  - test

data/CHANGELOG.md CHANGED Viewed

@@ -1,8 +1,23 @@
-## [0.9.2] - 2026-02-21
+## [0.9.3] - 2026-02-27
+### 🐛 Bug Fixes
+- Allow multiple filters
+- Refactor prune to keep caledar_dates-only dependencies
 ### 📚 Documentation
 - Add Brooke to the list of authors
+### 🧪 Testing
+- Ensure we don't drop trips and routes
+### ⚙️ Miscellaneous Tasks
+- Bump version to 0.9.2
+- Avoid converting series into arrays
+- Simplify trip pool reduction
 ## [0.9.1] - 2026-02-17
 ### 🐛 Bug Fixes

data/examples/split-by-agency/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: ../..
   specs:
-    gtfs_df (0.9.2)
+    gtfs_df (0.9.3)
       networkx (~> 0.4)
       polars-df (~> 0.22, < 0.24)
       rubyzip (>= 3.0, < 4.0)

data/lib/gtfs_df/feed.rb CHANGED Viewed

@@ -121,19 +121,19 @@ module GtfsDf
         # Trips are the atomic unit of GTFS, we will generate a new view
         # based on the set of trips that would be included for each invidual filter
         # and cascade changes from this view in order to retain referential integrity
-        trip_ids = nil
+        trip_ids = Polars::Series.new.alias("trip_id")
         view.each do |file, filters|
           new_filtered = filter!(file, filters, filtered.dup)
-          trip_ids = if trip_ids.nil?
+          trip_ids = if trip_ids.empty?
             new_filtered["trips"]["trip_id"]
           else
-            trip_ids & new_filtered["trips"]["trip_id"]
+            trip_ids.filter(trip_ids.is_in(new_filtered["trips"]["trip_id"]))
           end
         end
         if trip_ids
-          filtered = filter!("trips", {"trip_id" => trip_ids.to_a}, filtered.dup)
+          filtered = filter!("trips", {"trip_id" => trip_ids.implode}, filtered.dup)
         end
       end
@@ -181,7 +181,7 @@ module GtfsDf
         df = filtered[file]
         filters.each do |col, val|
-          df = if val.is_a?(Array)
+          df = if val.is_a?(Polars::Series) || val.is_a?(Array)
             df.filter(Polars.col(col).is_in(val))
           elsif val.respond_to?(:call)
             df.filter(val.call(Polars.col(col)))
@@ -200,9 +200,16 @@ module GtfsDf
     # Traverses the graph to prune unreferenced entities from child dataframes
     # based on parent relationships. See GtfsDf::Graph::STOP_NODES
+    #
+    # The trips table has multiple parents (calendar, calendar_dates, routes,
+    # stop_times). We accumulate valid values from all of them and keep rows
+    # that match any parent, so trips referenced only via calendar_dates are
+    # not dropped when another edge is processed first.
     def prune!(root, filtered, filter_only_children: false)
       seen_edges = Set.new
       rerooted_graph = Graph.build(bidirectional: !filter_only_children)
+      accumulated_service_ids = Polars::Series.new("service_id", dtype: Polars::String)
+      trips_base_df = nil
       queue = [root]
@@ -245,37 +252,46 @@ module GtfsDf
           attrs[:dependencies].each do |dep|
             parent_col = dep[parent_node_id]
             child_col = dep[child_node_id]
-            allow_null = !!dep[:allow_null]
+            allow_null_flag = !!dep[:allow_null]
             next unless parent_col && child_col &&
               parent_df.columns.include?(parent_col) && child_df.columns.include?(child_col)
             # Get valid values from parent
-            valid_values = parent_df[parent_col].to_a.uniq.compact
-            # Annoying special case to make sure that if we have a calendar with exceptions,
-            # the calendar_dates file doesn't end up pruning other files
-            if parent_node_id == "calendar_dates" && parent_col == "service_id" &&
-                filtered["calendar"]
-              valid_values = (valid_values + calendar["service_id"].to_a).uniq
-            end
-            # Filter child to only include rows that reference valid parent values
-            before = child_df.height
-            filter = Polars.col(child_col).is_in(valid_values)
-            if allow_null
-              filter = (filter | Polars.col(child_col).is_null)
-            end
-            child_df = child_df.filter(filter)
-            changed = child_df.height < before
-            # If we removed a part of the child_df earlier, concat it back on
-            if saved_vals
-              child_df = Polars.concat([child_df, saved_vals], how: "vertical")
-            end
-            if changed
-              filtered[child_node.fetch(:file)] = child_df
+            valid_values = parent_df[parent_col].drop_nulls.unique
+            if child_node_id == "trips" && (parent_node_id == "calendar" || parent_node_id == "calendar_dates")
+              # Calendar + calendar_dates both define service for the same trips, so we want
+              # union semantics across those two parents (a trip is valid if it appears in
+              # either).
+              #
+              # Here we accumulate valid service_ids across calendar/calendar_dates, but only
+              # within the pool of trips that are already reachable from structural parents.
+              accumulated_service_ids = Polars.concat([accumulated_service_ids, valid_values]).unique
+              # Determine the base pool of trips:
+              # - If we've already restricted trips via structural parents (routes,
+              #   stop_times, shapes, etc), use that as the base.
+              # - Otherwise, like when filtering directly on trips, use the current
+              #   trips dataframe.
+              trips_base_df ||= filtered[child_node.fetch(:file)]
+              next unless trips_base_df && trips_base_df.height > 0
+              filtered[child_node.fetch(:file)] = trips_base_df.filter(
+                Polars.col("service_id").is_in(accumulated_service_ids.implode)
+              )
+            else
+              # Original single-edge logic for all other nodes
+              before = child_df.height
+              cond = Polars.col(child_col).is_in(valid_values.implode)
+              cond = (cond | Polars.col(child_col).is_null) if allow_null_flag
+              child_df = child_df.filter(cond)
+              if child_df.height < before
+                child_df = Polars.concat([child_df, saved_vals], how: "vertical") if saved_vals
+                filtered[child_node.fetch(:file)] = child_df
+              end
             end
           end
         end

data/lib/gtfs_df/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module GtfsDf
-  VERSION = "0.9.2"
+  VERSION = "0.9.3"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: gtfs_df
 version: !ruby/object:Gem::Version
-  version: 0.9.2
+  version: 0.9.3
 platform: ruby
 authors:
 - David Mejorado