gtfs_df 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.conform.yaml +25 -0
  3. data/.envrc +12 -0
  4. data/.rspec +3 -0
  5. data/.rubocop.yml +2 -0
  6. data/.solargraph.yml +26 -0
  7. data/.standard.yml +3 -0
  8. data/CHANGELOG.md +3 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +105 -0
  11. data/Rakefile +10 -0
  12. data/devenv.lock +171 -0
  13. data/devenv.nix +13 -0
  14. data/devenv.yaml +8 -0
  15. data/examples/split-by-agency/.gitignore +1 -0
  16. data/examples/split-by-agency/Gemfile +5 -0
  17. data/examples/split-by-agency/Gemfile.lock +52 -0
  18. data/examples/split-by-agency/README.md +26 -0
  19. data/examples/split-by-agency/split_by_agency.rb +52 -0
  20. data/lib/gtfs_df/base_gtfs_table.rb +45 -0
  21. data/lib/gtfs_df/feed.rb +152 -0
  22. data/lib/gtfs_df/graph.rb +131 -0
  23. data/lib/gtfs_df/reader.rb +28 -0
  24. data/lib/gtfs_df/schema/agency.rb +26 -0
  25. data/lib/gtfs_df/schema/areas.rb +18 -0
  26. data/lib/gtfs_df/schema/attributions.rb +28 -0
  27. data/lib/gtfs_df/schema/booking_rules.rb +19 -0
  28. data/lib/gtfs_df/schema/calendar.rb +32 -0
  29. data/lib/gtfs_df/schema/calendar_dates.rb +19 -0
  30. data/lib/gtfs_df/schema/enum_values.rb +147 -0
  31. data/lib/gtfs_df/schema/fare_attributes.rb +25 -0
  32. data/lib/gtfs_df/schema/fare_leg_join_rules.rb +20 -0
  33. data/lib/gtfs_df/schema/fare_leg_rules.rb +21 -0
  34. data/lib/gtfs_df/schema/fare_media.rb +18 -0
  35. data/lib/gtfs_df/schema/fare_products.rb +24 -0
  36. data/lib/gtfs_df/schema/fare_rules.rb +19 -0
  37. data/lib/gtfs_df/schema/fare_transfer_rules.rb +23 -0
  38. data/lib/gtfs_df/schema/feed_info.rb +21 -0
  39. data/lib/gtfs_df/schema/frequencies.rb +22 -0
  40. data/lib/gtfs_df/schema/levels.rb +15 -0
  41. data/lib/gtfs_df/schema/location_group_stops.rb +17 -0
  42. data/lib/gtfs_df/schema/location_groups.rb +17 -0
  43. data/lib/gtfs_df/schema/networks.rb +17 -0
  44. data/lib/gtfs_df/schema/pathways.rb +29 -0
  45. data/lib/gtfs_df/schema/rider_categories.rb +18 -0
  46. data/lib/gtfs_df/schema/route_networks.rb +17 -0
  47. data/lib/gtfs_df/schema/routes.rb +33 -0
  48. data/lib/gtfs_df/schema/shapes.rb +24 -0
  49. data/lib/gtfs_df/schema/stop_areas.rb +19 -0
  50. data/lib/gtfs_df/schema/stop_attributes.rb +17 -0
  51. data/lib/gtfs_df/schema/stop_times.rb +38 -0
  52. data/lib/gtfs_df/schema/stops.rb +34 -0
  53. data/lib/gtfs_df/schema/transfers.rb +20 -0
  54. data/lib/gtfs_df/schema/translations.rb +24 -0
  55. data/lib/gtfs_df/schema/trips.rb +30 -0
  56. data/lib/gtfs_df/schema_validator.rb +89 -0
  57. data/lib/gtfs_df/utils.rb +52 -0
  58. data/lib/gtfs_df/version.rb +5 -0
  59. data/lib/gtfs_df/writer.rb +26 -0
  60. data/lib/gtfs_df.rb +49 -0
  61. data/sig/gtfs-df.rbs +4 -0
  62. metadata +148 -0
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class FareMedia < BaseGtfsTable
6
+ SCHEMA = {
7
+ "fare_media_id" => Polars::String,
8
+ "fare_media_name" => Polars::String,
9
+ "fare_media_type" => Polars::String
10
+ }.freeze
11
+
12
+ REQUIRED_FIELDS = %w[
13
+ fare_media_id
14
+ fare_media_name
15
+ ].freeze
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class FareProducts < BaseGtfsTable
6
+ SCHEMA = {
7
+ "fare_product_id" => Polars::String,
8
+ "fare_product_name" => Polars::String,
9
+ "amount" => Polars::Float64,
10
+ "currency" => Polars::String,
11
+ "duration" => Polars::Int64,
12
+ "duration_type" => Polars::String,
13
+ "fare_media_id" => Polars::String
14
+ }.freeze
15
+
16
+ REQUIRED_FIELDS = %w[
17
+ fare_product_id
18
+ fare_product_name
19
+ amount
20
+ currency
21
+ ].freeze
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class FareRules < BaseGtfsTable
6
+ SCHEMA = {
7
+ "fare_id" => Polars::String,
8
+ "route_id" => Polars::String,
9
+ "origin_id" => Polars::String,
10
+ "destination_id" => Polars::String,
11
+ "contains_id" => Polars::String
12
+ }.freeze
13
+
14
+ REQUIRED_FIELDS = %w[
15
+ fare_id
16
+ ].freeze
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class FareTransferRules < BaseGtfsTable
6
+ SCHEMA = {
7
+ "fare_transfer_rule_id" => Polars::String,
8
+ "from_leg_group_id" => Polars::String,
9
+ "to_leg_group_id" => Polars::String,
10
+ "transfer_count" => Polars::Int64,
11
+ "duration_limit" => Polars::Int64,
12
+ "duration_limit_type" => Polars::String,
13
+ "fare_product_id" => Polars::String
14
+ }.freeze
15
+
16
+ REQUIRED_FIELDS = %w[
17
+ fare_transfer_rule_id
18
+ from_leg_group_id
19
+ to_leg_group_id
20
+ ].freeze
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class FeedInfo < BaseGtfsTable
6
+ SCHEMA = {
7
+ "feed_publisher_name" => Polars::String,
8
+ "feed_publisher_url" => Polars::String,
9
+ "feed_lang" => Polars::String,
10
+ "default_lang" => Polars::String,
11
+ "feed_start_date" => Polars::String,
12
+ "feed_end_date" => Polars::String,
13
+ "feed_version" => Polars::String,
14
+ "feed_contact_email" => Polars::String,
15
+ "feed_contact_url" => Polars::String
16
+ }.freeze
17
+
18
+ REQUIRED_FIELDS = %w[feed_publisher_name feed_publisher_url feed_lang].freeze
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Frequencies < BaseGtfsTable
6
+ SCHEMA = {
7
+ "trip_id" => Polars::String,
8
+ "start_time" => Polars::String,
9
+ "end_time" => Polars::String,
10
+ "headway_secs" => Polars::Int64,
11
+ "exact_times" => Polars::Int64
12
+ }.freeze
13
+
14
+ REQUIRED_FIELDS = %w[
15
+ trip_id
16
+ start_time
17
+ end_time
18
+ headway_secs
19
+ ].freeze
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Levels < BaseGtfsTable
6
+ SCHEMA = {
7
+ "level_id" => Polars::String,
8
+ "level_index" => Polars::Float64,
9
+ "level_name" => Polars::String
10
+ }.freeze
11
+
12
+ REQUIRED_FIELDS = %w[level_id level_index].freeze
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class LocationGroupStops < BaseGtfsTable
6
+ SCHEMA = {
7
+ "location_group_id" => Polars::String,
8
+ "stop_id" => Polars::String
9
+ }.freeze
10
+
11
+ REQUIRED_FIELDS = %w[
12
+ location_group_id
13
+ stop_id
14
+ ].freeze
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class LocationGroups < BaseGtfsTable
6
+ SCHEMA = {
7
+ "location_group_id" => Polars::String,
8
+ "location_group_name" => Polars::String
9
+ }.freeze
10
+
11
+ REQUIRED_FIELDS = %w[
12
+ location_group_id
13
+ location_group_name
14
+ ].freeze
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Networks < BaseGtfsTable
6
+ SCHEMA = {
7
+ "network_id" => Polars::String,
8
+ "network_name" => Polars::String
9
+ }.freeze
10
+
11
+ REQUIRED_FIELDS = %w[
12
+ network_id
13
+ network_name
14
+ ].freeze
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Pathways < BaseGtfsTable
6
+ SCHEMA = {
7
+ "pathway_id" => Polars::String,
8
+ "from_stop_id" => Polars::String,
9
+ "to_stop_id" => Polars::String,
10
+ "pathway_mode" => Polars::Enum.new(EnumValues::PATHWAY_MODE.map(&:first)),
11
+ "is_bidirectional" => Polars::Enum.new(EnumValues::IS_BIDIRECTIONAL.map(&:first)),
12
+ "length" => Polars::Float64,
13
+ "traversal_time" => Polars::Int64,
14
+ "stair_count" => Polars::Int64,
15
+ "max_slope" => Polars::Float64,
16
+ "min_width" => Polars::Float64,
17
+ "signposted_as" => Polars::String,
18
+ "reversed_signposted_as" => Polars::String
19
+ }
20
+
21
+ REQUIRED_FIELDS = %w[pathway_id from_stop_id to_stop_id pathway_mode is_bidirectional].freeze
22
+
23
+ ENUM_VALUE_MAP = {
24
+ "pathway_mode" => :PATHWAY_MODE,
25
+ "is_bidirectional" => :IS_BIDIRECTIONAL
26
+ }
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class RiderCategories < BaseGtfsTable
6
+ SCHEMA = {
7
+ "rider_category_id" => Polars::String,
8
+ "rider_category_name" => Polars::String,
9
+ "rider_category_description" => Polars::String
10
+ }.freeze
11
+
12
+ REQUIRED_FIELDS = %w[
13
+ rider_category_id
14
+ rider_category_name
15
+ ].freeze
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class RouteNetworks < BaseGtfsTable
6
+ SCHEMA = {
7
+ "route_id" => Polars::String,
8
+ "network_id" => Polars::String
9
+ }.freeze
10
+
11
+ REQUIRED_FIELDS = %w[
12
+ route_id
13
+ network_id
14
+ ].freeze
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Routes < BaseGtfsTable
6
+ SCHEMA = {
7
+ "route_id" => Polars::String,
8
+ "agency_id" => Polars::String,
9
+ "route_short_name" => Polars::String,
10
+ "route_long_name" => Polars::String,
11
+ "route_desc" => Polars::String,
12
+ "route_type" => Polars::String,
13
+ "route_url" => Polars::String,
14
+ "route_color" => Polars::String,
15
+ "route_text_color" => Polars::String,
16
+ "route_sort_order" => Polars::String,
17
+ "continuous_pickup" => Polars::Int64,
18
+ "continuous_drop_off" => Polars::Int64,
19
+ "network_id" => Polars::String,
20
+ "cemv_support" => Polars::Enum.new(EnumValues::CEMV_SUPPORT.map(&:first))
21
+ }
22
+
23
+ REQUIRED_FIELDS = %w[route_id route_type].freeze
24
+
25
+ ENUM_VALUE_MAP = {
26
+ "route_type" => :ROUTE_TYPE,
27
+ "continuous_pickup" => :CONTINUOUS_PICKUP,
28
+ "continuous_drop_off" => :CONTINUOUS_DROP_OFF,
29
+ "cemv_support" => :CEMV_SUPPORT
30
+ }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../base_gtfs_table"
4
+
5
+ module GtfsDf
6
+ module Schema
7
+ class Shapes < BaseGtfsTable
8
+ SCHEMA = {
9
+ "shape_id" => Polars::String,
10
+ "shape_pt_lat" => Polars::Float64,
11
+ "shape_pt_lon" => Polars::Float64,
12
+ "shape_pt_sequence" => Polars::Int64,
13
+ "shape_dist_traveled" => Polars::Float64
14
+ }.freeze
15
+
16
+ REQUIRED_FIELDS = %w[
17
+ shape_id
18
+ shape_pt_lat
19
+ shape_pt_lon
20
+ shape_pt_sequence
21
+ ].freeze
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class StopAreas < BaseGtfsTable
6
+ SCHEMA = {
7
+ "stop_area_id" => Polars::String,
8
+ "stop_area_name" => Polars::String,
9
+ "stop_area_type" => Polars::String,
10
+ "parent_stop_area_id" => Polars::String
11
+ }.freeze
12
+
13
+ REQUIRED_FIELDS = %w[
14
+ stop_area_id
15
+ stop_area_name
16
+ ].freeze
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class StopAttributes < BaseGtfsTable
6
+ SCHEMA = {
7
+ "stop_id" => Polars::String,
8
+ "wheelchair_boarding" => Polars::Int64,
9
+ "bikes_allowed" => Polars::Int64
10
+ }.freeze
11
+
12
+ REQUIRED_FIELDS = %w[
13
+ stop_id
14
+ ].freeze
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class StopTimes < BaseGtfsTable
6
+ SCHEMA = {
7
+ "trip_id" => Polars::String,
8
+ "arrival_time" => Polars::String,
9
+ "departure_time" => Polars::String,
10
+ "stop_id" => Polars::String,
11
+ "location_group_id" => Polars::String,
12
+ "location_id" => Polars::String,
13
+ "stop_sequence" => Polars::Int64,
14
+ "stop_headsign" => Polars::String,
15
+ "start_pickup_drop_off_window" => Polars::String,
16
+ "end_pickup_drop_off_window" => Polars::String,
17
+ "pickup_type" => Polars::Enum.new(EnumValues::PICKUP_TYPE.map(&:first)),
18
+ "drop_off_type" => Polars::Enum.new(EnumValues::DROP_OFF_TYPE.map(&:first)),
19
+ "continuous_pickup" => Polars::Enum.new(EnumValues::CONTINUOUS_PICKUP.map(&:first)),
20
+ "continuous_drop_off" => Polars::Enum.new(EnumValues::CONTINUOUS_DROP_OFF.map(&:first)),
21
+ "shape_dist_traveled" => Polars::Float64,
22
+ "timepoint" => Polars::Enum.new(EnumValues::TIMEPOINT.map(&:first)),
23
+ "pickup_booking_rule_id" => Polars::String,
24
+ "drop_off_booking_rule_id" => Polars::String
25
+ }
26
+
27
+ REQUIRED_FIELDS = %w[trip_id stop_sequence stop_id]
28
+
29
+ ENUM_VALUE_MAP = {
30
+ "pickup_type" => :PICKUP_TYPE,
31
+ "drop_off_type" => :DROP_OFF_TYPE,
32
+ "continuous_pickup" => :CONTINUOUS_PICKUP,
33
+ "continuous_drop_off" => :CONTINUOUS_DROP_OFF,
34
+ "timepoint" => :TIMEPOINT
35
+ }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Stops < BaseGtfsTable
6
+ SCHEMA = {
7
+ "stop_id" => Polars::String,
8
+ "stop_code" => Polars::String,
9
+ "stop_name" => Polars::String,
10
+ "tts_stop_name" => Polars::String,
11
+ "stop_desc" => Polars::String,
12
+ "stop_lat" => Polars::String,
13
+ "stop_lon" => Polars::Float64,
14
+ "zone_id" => Polars::String,
15
+ "stop_url" => Polars::String,
16
+ "location_type" => Polars::Enum.new(EnumValues::LOCATION_TYPE.map(&:first)),
17
+ "parent_station" => Polars::String,
18
+ "stop_timezone" => Polars::String,
19
+ "wheelchair_boarding" => Polars::Enum.new(EnumValues::WHEELCHAIR_BOARDING.map(&:first)),
20
+ "level_id" => Polars::String,
21
+ "platform_code" => Polars::String,
22
+ "stop_access" => Polars::Enum.new(EnumValues::STOP_ACCESS.map(&:first))
23
+ }
24
+
25
+ REQUIRED_FIELDS = %w[stop_id stop_name stop_lat stop_lon].freeze
26
+
27
+ ENUM_VALUE_MAP = {
28
+ "location_type" => :LOCATION_TYPE,
29
+ "wheelchair_boarding" => :WHEELCHAIR_BOARDING,
30
+ "stop_access" => :STOP_ACCESS
31
+ }
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Transfers < BaseGtfsTable
6
+ SCHEMA = {
7
+ "from_stop_id" => Polars::String,
8
+ "to_stop_id" => Polars::String,
9
+ "transfer_type" => Polars::Int64,
10
+ "min_transfer_time" => Polars::Int64
11
+ }.freeze
12
+
13
+ REQUIRED_FIELDS = %w[
14
+ from_stop_id
15
+ to_stop_id
16
+ transfer_type
17
+ ].freeze
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Translations < BaseGtfsTable
6
+ SCHEMA = {
7
+ "table_name" => Polars::String,
8
+ "field_name" => Polars::String,
9
+ "language" => Polars::String,
10
+ "translation" => Polars::String,
11
+ "record_id" => Polars::String,
12
+ "record_sub_id" => Polars::String,
13
+ "field_value" => Polars::String
14
+ }.freeze
15
+
16
+ REQUIRED_FIELDS = %w[
17
+ table_name
18
+ field_name
19
+ language
20
+ translation
21
+ ].freeze
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ module Schema
5
+ class Trips < BaseGtfsTable
6
+ SCHEMA = {
7
+ "route_id" => Polars::String,
8
+ "service_id" => Polars::String,
9
+ "trip_id" => Polars::String,
10
+ "trip_headsign" => Polars::String,
11
+ "trip_short_name" => Polars::String,
12
+ "direction_id" => Polars::Enum.new(EnumValues::DIRECTION_ID.map(&:first)),
13
+ "block_id" => Polars::String,
14
+ "shape_id" => Polars::String,
15
+ "wheelchair_accessible" => Polars::Enum.new(EnumValues::WHEELCHAIR_ACCESSIBLE.map(&:first)),
16
+ "bikes_allowed" => Polars::Enum.new(EnumValues::BIKES_ALLOWED.map(&:first)),
17
+ "cars_allowed" => Polars::Enum.new(EnumValues::CARS_ALLOWED.map(&:first))
18
+ }
19
+
20
+ REQUIRED_FIELDS = %w[route_id service_id trip_id]
21
+
22
+ ENUM_VALUE_MAP = {
23
+ "direction_id" => :DIRECTION_ID,
24
+ "wheelchair_accessible" => :WHEELCHAIR_ACCESSIBLE,
25
+ "bikes_allowed" => :BIKES_ALLOWED,
26
+ "cars_allowed" => :CARS_ALLOWED
27
+ }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ class SchemaValidator
5
+ def initialize(df, klass)
6
+ @df = df
7
+ @required_fields = klass::REQUIRED_FIELDS
8
+ @schema = klass.const_defined?(:SCHEMA) ? klass::SCHEMA : {}
9
+ @source_class = klass
10
+ @errors = []
11
+ @validated = false
12
+ end
13
+
14
+ def valid?
15
+ validate unless @validated
16
+ @errors.empty?
17
+ end
18
+
19
+ def errors
20
+ validate unless @validated
21
+ @errors
22
+ end
23
+
24
+ private
25
+
26
+ def validate
27
+ @errors.clear
28
+ if @df.is_a?(Polars::LazyFrame)
29
+ # Required fields
30
+ @required_fields.each do |field|
31
+ @errors << "#{field}: missing" unless @df.columns.include?(field)
32
+ end
33
+
34
+ # Type validation
35
+ schema = @df.schema
36
+ @schema.each do |field, expected_type|
37
+ next unless schema.key?(field)
38
+
39
+ actual_type = schema[field]
40
+ if actual_type != expected_type
41
+ @errors << "#{field}: Type mismatch. Expected #{expected_type}, got #{actual_type}"
42
+ end
43
+ end
44
+ # Enum and null checks are skipped for LazyFrame
45
+ # Reason: Both require accessing actual column data, which would
46
+ # materialize the LazyFrame and defeat lazy evaluation. Only schema-based
47
+ # checks (required fields, types) are safe to run on LazyFrame metadata.
48
+
49
+ else
50
+ # Required fields
51
+ @required_fields.each do |field|
52
+ if !@df.include?(field)
53
+ @errors << "#{field}: missing"
54
+ elsif @df[field].null_count > 0
55
+ @errors << "#{field}: null"
56
+ end
57
+ end
58
+
59
+ # Enum validation
60
+ @schema.each do |field, expected_type|
61
+ next unless @df.include?(field)
62
+
63
+ next unless expected_type.is_a?(Polars::Enum)
64
+
65
+ allowed = expected_type.categories.to_a
66
+ invalid = @df[field].drop_nulls.to_a.reject { |v| allowed.include?(v.to_s) }
67
+ next if invalid.empty?
68
+
69
+ # Try to get value descriptions from ENUM_VALUE_MAP
70
+ value_descs = nil
71
+ if @source_class&.const_defined?(:ENUM_VALUE_MAP)
72
+ enum_map = @source_class::ENUM_VALUE_MAP
73
+ enum_key = enum_map[field] if enum_map
74
+ if enum_key && GtfsDf::Schema::EnumValues.const_defined?(enum_key)
75
+ value_descs = GtfsDf::Schema::EnumValues.const_get(enum_key)
76
+ end
77
+ end
78
+ allowed_str = if value_descs
79
+ value_descs.map { |val, desc| "#{val} (#{desc})" }.join(", ")
80
+ else
81
+ allowed.join(", ")
82
+ end
83
+ @errors << "#{field}: Invalid value(s) #{invalid.uniq.join(", ")}. Allowed: #{allowed_str}"
84
+ end
85
+ end
86
+ @validated = true
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,52 @@
1
+ module GtfsDf
2
+ module Utils
3
+ # Parses a GTFS time string
4
+ #
5
+ # The input string is expected to be in the HH:MM:SS format (H:MM:SS is
6
+ # also accepted).
7
+ #
8
+ # The time is measured from "noon minus 12h" of the service day
9
+ # (effectively midnight except for days on which daylight savings time
10
+ # changes occur). For times occurring after midnight on the service day,
11
+ # enter the time as a value greater than 24:00:00 in HH:MM:SS.
12
+ #
13
+ # @example 14:30:00 for 2:30PM or
14
+ # 25:35:00 for 1:35AM on the next day.
15
+ #
16
+ # @param str String
17
+ # Parses a GTFS time string or returns integer seconds if already provided.
18
+ # Accepts Integer (returns as-is), or HH:MM:SS string (possibly >24h).
19
+ def self.parse_time(str)
20
+ return str if str.is_a?(Integer)
21
+ return nil if str.nil? || (str.respond_to?(:strip) && str.strip.empty?)
22
+
23
+ parts = str.to_s.split(":")
24
+ return nil unless parts.size == 3 && parts.all? { |p| p.match?(/^\d+$/) }
25
+
26
+ hours, mins, secs = parts.map(&:to_i)
27
+ hours * 3600 + mins * 60 + secs
28
+ rescue
29
+ nil
30
+ end
31
+
32
+ # Parses a GTFS date string
33
+ #
34
+ # The input string is expected to be a service day in the YYYYMMDD format.
35
+ # Since time within a service day may be above 24:00:00, a service day may
36
+ # contain information for the subsequent day(s).
37
+ #
38
+ # @example 20180913 for September 13th, 2018.
39
+ #
40
+ # @param str String
41
+ def self.parse_date(str)
42
+ return nil if str.nil? || str.strip.empty?
43
+ return nil unless str.match?(/^\d{8}$/)
44
+
45
+ begin
46
+ Date.strptime(str, "%Y%m%d")
47
+ rescue ArgumentError
48
+ nil
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GtfsDf
4
+ VERSION = "0.1.1"
5
+ end