gtfs-reader 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +284 -0
- data/README.md +57 -0
- data/Rakefile +53 -0
- data/lib/gtfs_reader.rb +7 -0
- data/lib/gtfs_reader/bulk_feed_handler.rb +94 -0
- data/lib/gtfs_reader/config/column.rb +72 -0
- data/lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb +208 -0
- data/lib/gtfs_reader/config/feed_definition.rb +55 -0
- data/lib/gtfs_reader/config/file_definition.rb +111 -0
- data/lib/gtfs_reader/config/prefixed_column_setter.rb +26 -0
- data/lib/gtfs_reader/config/source.rb +61 -0
- data/lib/gtfs_reader/config/sources.rb +25 -0
- data/lib/gtfs_reader/configuration.rb +27 -0
- data/lib/gtfs_reader/core.rb +76 -0
- data/lib/gtfs_reader/exceptions.rb +29 -0
- data/lib/gtfs_reader/feed_handler.rb +30 -0
- data/lib/gtfs_reader/file_reader.rb +120 -0
- data/lib/gtfs_reader/file_row.rb +66 -0
- data/lib/gtfs_reader/log.rb +66 -0
- data/lib/gtfs_reader/source_updater.rb +130 -0
- data/lib/gtfs_reader/version.rb +71 -0
- metadata +195 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
module Config
|
3
|
+
class PrefixedColumnSetter
|
4
|
+
def initialize(definition, prefix)
|
5
|
+
@definition, @prefix = definition, prefix.to_sym
|
6
|
+
end
|
7
|
+
|
8
|
+
def col(name_alias, *args, &blk)
|
9
|
+
name = "#{@prefix}_#{name_alias}"
|
10
|
+
opts =
|
11
|
+
case args.first
|
12
|
+
when ::Hash then args.first
|
13
|
+
else {}
|
14
|
+
end
|
15
|
+
opts[:alias] = name_alias
|
16
|
+
args[0] = opts
|
17
|
+
|
18
|
+
@definition.col name.to_sym, *args, &blk
|
19
|
+
end
|
20
|
+
|
21
|
+
def output_map(*args, &block)
|
22
|
+
@definition.output_map *args, &block
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'active_support/core_ext/hash/reverse_merge'
|
2
|
+
|
3
|
+
require_relative 'feed_definition'
|
4
|
+
require_relative 'defaults/gtfs_feed_definition'
|
5
|
+
require_relative '../feed_handler'
|
6
|
+
require_relative '../bulk_feed_handler'
|
7
|
+
|
8
|
+
module GtfsReader
|
9
|
+
module Config
|
10
|
+
# A single source of GTFS data
|
11
|
+
class Source
|
12
|
+
attr_reader :name
|
13
|
+
|
14
|
+
def initialize(name)
|
15
|
+
@name = name
|
16
|
+
@feed_definition = Config::Defaults::FEED_DEFINITION
|
17
|
+
@feed_handler = FeedHandler.new {}
|
18
|
+
end
|
19
|
+
|
20
|
+
#@param u [String] if given, will be used as the URL for this source
|
21
|
+
#@return [String] the URL this source's ZIP file
|
22
|
+
def url(u=nil)
|
23
|
+
@url = u if u.present?
|
24
|
+
@url
|
25
|
+
end
|
26
|
+
|
27
|
+
# Define a block to call before the source is read. If this block
|
28
|
+
# returns +false+, cancel processing the source
|
29
|
+
def before(&block)
|
30
|
+
if block_given?
|
31
|
+
@before = block
|
32
|
+
end
|
33
|
+
@before
|
34
|
+
end
|
35
|
+
|
36
|
+
def feed_definition(&block)
|
37
|
+
if block_given?
|
38
|
+
@feed_definition = FeedDefinition.new.tap do |feed|
|
39
|
+
feed.instance_exec feed, &block
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@feed_definition
|
44
|
+
end
|
45
|
+
|
46
|
+
def handlers(*args, &block)
|
47
|
+
if block_given?
|
48
|
+
opts = Hash === args.last ? args.pop : {}
|
49
|
+
opts = opts.reverse_merge bulk: nil
|
50
|
+
@feed_handler =
|
51
|
+
if opts[:bulk]
|
52
|
+
BulkFeedHandler.new opts[:bulk], args, &block
|
53
|
+
else
|
54
|
+
FeedHandler.new args, &block
|
55
|
+
end
|
56
|
+
end
|
57
|
+
@feed_handler
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'source'
|
2
|
+
|
3
|
+
module GtfsReader
|
4
|
+
module Config
|
5
|
+
class Sources < ::BasicObject
|
6
|
+
def initialize
|
7
|
+
@sources = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def each(&block)
|
11
|
+
@sources.each &block
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](key)
|
15
|
+
@sources[key]
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(name, *args, &block)
|
19
|
+
(@sources[name] ||= Source.new name).tap do |src|
|
20
|
+
src.instance_exec src, &block if ::Kernel.block_given?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
class Configuration
|
3
|
+
|
4
|
+
# Creates simple configuration parameters which may be set by the user
|
5
|
+
#@param names [Array<Symbol>] the names of the parameters to create
|
6
|
+
def parameter(*names)
|
7
|
+
names.each do |name|
|
8
|
+
define_singleton_method name do |*values|
|
9
|
+
if value = values.first
|
10
|
+
instance_variable_set "@#{name}", value
|
11
|
+
else
|
12
|
+
instance_variable_get "@#{name}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def block_parameter(name, obj_class, *init_args)
|
19
|
+
obj = nil
|
20
|
+
define_singleton_method name do |*args, &block|
|
21
|
+
obj ||= obj_class.new *init_args
|
22
|
+
obj.instance_exec( obj, *args, &block ) if block
|
23
|
+
obj
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative 'configuration'
|
2
|
+
require_relative 'config/feed_definition'
|
3
|
+
require_relative 'config/sources'
|
4
|
+
require_relative 'source_updater'
|
5
|
+
|
6
|
+
module GtfsReader
|
7
|
+
extend self
|
8
|
+
|
9
|
+
#@override config(*args, &blk)
|
10
|
+
# @param args [Array] an array or arguments to pass to the given block
|
11
|
+
# @param blk [Proc] a block to to call in the context of the configuration
|
12
|
+
# object. Subsequent calls will use the same configuration for additional
|
13
|
+
# modification.
|
14
|
+
# @return [Configuration] the configuration object
|
15
|
+
#
|
16
|
+
#@override config
|
17
|
+
# @return [Configuration] the configuration object
|
18
|
+
def config(*args, &blk)
|
19
|
+
@cfg ||= create_config
|
20
|
+
if block_given?
|
21
|
+
@cfg.instance_exec *args.unshift(@cfg), &blk
|
22
|
+
elsif args.any?
|
23
|
+
raise ArgumentError, 'arguments given without a block'
|
24
|
+
end
|
25
|
+
@cfg
|
26
|
+
end
|
27
|
+
|
28
|
+
def update_all!
|
29
|
+
config.sources.each {|name, _| update name }
|
30
|
+
end
|
31
|
+
|
32
|
+
def update(name)
|
33
|
+
if config.verbose
|
34
|
+
update_verbosely name
|
35
|
+
else
|
36
|
+
Log.quiet { update_verbosely name }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def update_verbosely(name)
|
43
|
+
source = config.sources[name]
|
44
|
+
raise UnknownSourceError, "No source named '#{name}'" if source.nil?
|
45
|
+
updater = SourceUpdater.new name, source
|
46
|
+
begin
|
47
|
+
updater.instance_exec do
|
48
|
+
Log.info { "Updating #{name.to_s.green}".underline }
|
49
|
+
before_callbacks
|
50
|
+
read
|
51
|
+
check_files
|
52
|
+
check_columns
|
53
|
+
process_files
|
54
|
+
end
|
55
|
+
rescue SkipSourceError => e
|
56
|
+
Log.warn do
|
57
|
+
msg = e.message ? ": #{e.message}" : ''
|
58
|
+
"#{'Skipping'.red} #{source.name.to_s.yellow}" + msg
|
59
|
+
end
|
60
|
+
ensure
|
61
|
+
updater.finish
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_config
|
66
|
+
Configuration.new.tap do |cfg|
|
67
|
+
cfg.instance_exec do
|
68
|
+
parameter :verbose
|
69
|
+
parameter :skip_parsing
|
70
|
+
parameter :return_hashes
|
71
|
+
block_parameter :sources, Config::Sources
|
72
|
+
block_parameter :feed_definition, Config::FeedDefinition
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
|
3
|
+
class FileReaderError < StandardError; end
|
4
|
+
|
5
|
+
class RequiredColumnsMissing < FileReaderError
|
6
|
+
attr_reader :columns
|
7
|
+
def initialize(columns)
|
8
|
+
@columns = columns
|
9
|
+
super "Required columns missing: #{columns.join ', '}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class RequiredFilenamesMissing < FileReaderError
|
14
|
+
attr_reader :filenames
|
15
|
+
def initialize(filenames)
|
16
|
+
@filenames = filenames
|
17
|
+
super "Required files missing from zip file: #{filenames.join ', '}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class UnknownSourceError < StandardError; end
|
22
|
+
class SkipSourceError < StandardError; end
|
23
|
+
class HandlerMissingError < StandardError; end
|
24
|
+
|
25
|
+
module Config
|
26
|
+
class SourceDefinitionError < StandardError; end
|
27
|
+
class FileDefinitionError < StandardError; end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
class FeedHandler
|
3
|
+
def initialize(args=[], &block)
|
4
|
+
@read_callbacks = {}
|
5
|
+
FeedHandlerDsl.new(self).instance_exec *args, &block
|
6
|
+
end
|
7
|
+
|
8
|
+
def handler?(filename)
|
9
|
+
@read_callbacks.key? filename
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle_file(filename, enumerator)
|
13
|
+
enumerator.each &@read_callbacks[filename]
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_read_handler(filename, *args, &block)
|
17
|
+
@read_callbacks[filename] = block
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class FeedHandlerDsl
|
22
|
+
def initialize(feed_handler)
|
23
|
+
@feed_handler = feed_handler
|
24
|
+
end
|
25
|
+
|
26
|
+
def method_missing(filename, *args, &block)
|
27
|
+
@feed_handler.create_read_handler filename, *args, &block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'csv'
|
3
|
+
require_relative 'file_row'
|
4
|
+
|
5
|
+
module GtfsReader
|
6
|
+
CSV_OPTIONS = { headers: :first_row,
|
7
|
+
return_headers: true,
|
8
|
+
header_converters: :symbol }
|
9
|
+
|
10
|
+
# Iterates over the rows in a single file using a provided definition.
|
11
|
+
#@see #each
|
12
|
+
class FileReader
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
attr_reader :definition, :columns, :col_names
|
16
|
+
|
17
|
+
#@param data [IO,String] CSV data
|
18
|
+
#@param definition [FileDefinition] describes the expected columns in this
|
19
|
+
# file
|
20
|
+
def initialize(data, definition, opts={})
|
21
|
+
opts = { parse: true, validate: false, hash: true }.merge opts
|
22
|
+
|
23
|
+
@csv = CSV.new data, CSV_OPTIONS
|
24
|
+
@definition, @do_parse, @return_hash =
|
25
|
+
definition, opts[:parse], opts[:hash]
|
26
|
+
@index = 0
|
27
|
+
@csv_headers = @csv.shift.headers
|
28
|
+
@columns = find_columns opts[:validate]
|
29
|
+
end
|
30
|
+
|
31
|
+
def filename
|
32
|
+
@definition.filename
|
33
|
+
end
|
34
|
+
|
35
|
+
#@overload each(&blk)
|
36
|
+
# @yieldparam hash [Hash] a hash of columns to their values in this row
|
37
|
+
#@overload each
|
38
|
+
# @return [Enumerator] an {::Enumerator} that iterates of the rows in the
|
39
|
+
# file
|
40
|
+
#@see FileRow#to_hash
|
41
|
+
def each
|
42
|
+
return to_enum :each unless block_given?
|
43
|
+
|
44
|
+
while row = shift
|
45
|
+
yield(@return_hash ? row.to_hash : row.to_a)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#@return [FileRow,nil] the next row from the file, or +nil+ if the end of
|
50
|
+
# the file has been reached.
|
51
|
+
def shift
|
52
|
+
if row = @csv.shift
|
53
|
+
file_row(row).tap { @index += 1 }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def file_row(row)
|
60
|
+
FileRow.new @index, @col_names, row, @definition, @do_parse
|
61
|
+
end
|
62
|
+
|
63
|
+
# Check the list of headers in the file against the expected columns in
|
64
|
+
# the definition
|
65
|
+
def find_columns(validate)
|
66
|
+
@found_columns = []
|
67
|
+
prefix = "#{filename.yellow}:"
|
68
|
+
|
69
|
+
required = @definition.required_columns
|
70
|
+
unless required.empty?
|
71
|
+
Log.info { "#{prefix} #{'required columns'.magenta}" } if validate
|
72
|
+
|
73
|
+
missing = check_columns validate, prefix, required, :green, :red
|
74
|
+
raise RequiredColumnsMissing, missing if validate && missing.present?
|
75
|
+
end
|
76
|
+
|
77
|
+
optional = @definition.optional_columns
|
78
|
+
unless optional.empty?
|
79
|
+
Log.info { "#{prefix} #{'optional columns'.cyan}" } if validate
|
80
|
+
check_columns validate, prefix, optional, :cyan, :light_yellow
|
81
|
+
end
|
82
|
+
|
83
|
+
cols = @definition.columns.collect( &:name )
|
84
|
+
headers = @csv_headers.select {|h| cols.include? h }
|
85
|
+
|
86
|
+
@col_names ||= @found_columns.map &:name
|
87
|
+
::Hash[ *headers.inject([]) {|list,c| list << c << @definition[c] } ]
|
88
|
+
end
|
89
|
+
|
90
|
+
def check_columns(validate, prefix, expected, found_color, missing_color)
|
91
|
+
check = '✔'.colorize found_color
|
92
|
+
cross = '✘'.colorize missing_color
|
93
|
+
|
94
|
+
expected.map do |col|
|
95
|
+
name = col.name
|
96
|
+
missing =
|
97
|
+
if @csv_headers.include? name
|
98
|
+
@found_columns << col
|
99
|
+
nil
|
100
|
+
else
|
101
|
+
name
|
102
|
+
end
|
103
|
+
|
104
|
+
if validate
|
105
|
+
Log.info do
|
106
|
+
mark = missing ? cross : check
|
107
|
+
"#{prefix} #{name.to_s.rjust column_width} [#{mark}]"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
missing
|
111
|
+
end.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
def column_width
|
115
|
+
@column_width ||= @definition.columns.collect( &:name ).max do |a, b|
|
116
|
+
a.length <=> b.length
|
117
|
+
end.length
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module GtfsReader
|
4
|
+
# Contains the contents of a single row read in from the file
|
5
|
+
class FileRow
|
6
|
+
attr_reader :line_number
|
7
|
+
|
8
|
+
#@param line_number [Integer] the line number from the source file
|
9
|
+
#@return [Array<Symbol>]
|
10
|
+
#@param data [CSV::Row] the data for this row
|
11
|
+
#@param definition [FileDefinition] the definition of the columns that the
|
12
|
+
# data in this row represent
|
13
|
+
def initialize(line_number, headers, data, definition, do_parse)
|
14
|
+
@line_number, @headers, @data, @definition, @do_parse =
|
15
|
+
line_number, headers, data, definition, do_parse
|
16
|
+
@parsed = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
#@return [Array<Symbol>]
|
20
|
+
def headers
|
21
|
+
@headers
|
22
|
+
end
|
23
|
+
|
24
|
+
#@param column [Symbol] the name of the column to fetch
|
25
|
+
#@return the parsed data for the column at this row
|
26
|
+
#@see #raw
|
27
|
+
def [](column)
|
28
|
+
return raw(column) unless @do_parse
|
29
|
+
|
30
|
+
@parsed[column] ||= begin
|
31
|
+
ParserContext.new(column, self).
|
32
|
+
instance_exec raw(column), &@definition[column].parser
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
#@param (see #[])
|
37
|
+
#@return the data unparsed data from the column at this row
|
38
|
+
def raw(column)
|
39
|
+
@data[column]
|
40
|
+
end
|
41
|
+
|
42
|
+
#@return [Hash] a hash representing this row of data, where each key is the
|
43
|
+
# column name and each value is the parsed data for this row
|
44
|
+
def to_hash
|
45
|
+
::Hash[ *headers.inject([]) {|list,h| list << h << self[h] } ]
|
46
|
+
end
|
47
|
+
|
48
|
+
#@return [Array] an array representing this row of data
|
49
|
+
def to_a
|
50
|
+
headers.map {|h| self[h] }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class ParserContext
|
55
|
+
def initialize(column, file_row)
|
56
|
+
@column, @file_row = column, file_row
|
57
|
+
end
|
58
|
+
|
59
|
+
def method_missing(column)
|
60
|
+
if column == @column
|
61
|
+
raise "Parser for '#{column}' cannot refer to itself"
|
62
|
+
end
|
63
|
+
@file_row[column] or super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|