gtfs-reader 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +284 -0
- data/README.md +57 -0
- data/Rakefile +53 -0
- data/lib/gtfs_reader.rb +7 -0
- data/lib/gtfs_reader/bulk_feed_handler.rb +94 -0
- data/lib/gtfs_reader/config/column.rb +72 -0
- data/lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb +208 -0
- data/lib/gtfs_reader/config/feed_definition.rb +55 -0
- data/lib/gtfs_reader/config/file_definition.rb +111 -0
- data/lib/gtfs_reader/config/prefixed_column_setter.rb +26 -0
- data/lib/gtfs_reader/config/source.rb +61 -0
- data/lib/gtfs_reader/config/sources.rb +25 -0
- data/lib/gtfs_reader/configuration.rb +27 -0
- data/lib/gtfs_reader/core.rb +76 -0
- data/lib/gtfs_reader/exceptions.rb +29 -0
- data/lib/gtfs_reader/feed_handler.rb +30 -0
- data/lib/gtfs_reader/file_reader.rb +120 -0
- data/lib/gtfs_reader/file_row.rb +66 -0
- data/lib/gtfs_reader/log.rb +66 -0
- data/lib/gtfs_reader/source_updater.rb +130 -0
- data/lib/gtfs_reader/version.rb +71 -0
- metadata +195 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
module Config
|
3
|
+
class PrefixedColumnSetter
|
4
|
+
def initialize(definition, prefix)
|
5
|
+
@definition, @prefix = definition, prefix.to_sym
|
6
|
+
end
|
7
|
+
|
8
|
+
def col(name_alias, *args, &blk)
|
9
|
+
name = "#{@prefix}_#{name_alias}"
|
10
|
+
opts =
|
11
|
+
case args.first
|
12
|
+
when ::Hash then args.first
|
13
|
+
else {}
|
14
|
+
end
|
15
|
+
opts[:alias] = name_alias
|
16
|
+
args[0] = opts
|
17
|
+
|
18
|
+
@definition.col name.to_sym, *args, &blk
|
19
|
+
end
|
20
|
+
|
21
|
+
def output_map(*args, &block)
|
22
|
+
@definition.output_map *args, &block
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'active_support/core_ext/hash/reverse_merge'
|
2
|
+
|
3
|
+
require_relative 'feed_definition'
|
4
|
+
require_relative 'defaults/gtfs_feed_definition'
|
5
|
+
require_relative '../feed_handler'
|
6
|
+
require_relative '../bulk_feed_handler'
|
7
|
+
|
8
|
+
module GtfsReader
|
9
|
+
module Config
|
10
|
+
# A single source of GTFS data
|
11
|
+
class Source
|
12
|
+
attr_reader :name
|
13
|
+
|
14
|
+
def initialize(name)
|
15
|
+
@name = name
|
16
|
+
@feed_definition = Config::Defaults::FEED_DEFINITION
|
17
|
+
@feed_handler = FeedHandler.new {}
|
18
|
+
end
|
19
|
+
|
20
|
+
#@param u [String] if given, will be used as the URL for this source
|
21
|
+
#@return [String] the URL this source's ZIP file
|
22
|
+
def url(u=nil)
|
23
|
+
@url = u if u.present?
|
24
|
+
@url
|
25
|
+
end
|
26
|
+
|
27
|
+
# Define a block to call before the source is read. If this block
|
28
|
+
# returns +false+, cancel processing the source
|
29
|
+
def before(&block)
|
30
|
+
if block_given?
|
31
|
+
@before = block
|
32
|
+
end
|
33
|
+
@before
|
34
|
+
end
|
35
|
+
|
36
|
+
def feed_definition(&block)
|
37
|
+
if block_given?
|
38
|
+
@feed_definition = FeedDefinition.new.tap do |feed|
|
39
|
+
feed.instance_exec feed, &block
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
@feed_definition
|
44
|
+
end
|
45
|
+
|
46
|
+
def handlers(*args, &block)
|
47
|
+
if block_given?
|
48
|
+
opts = Hash === args.last ? args.pop : {}
|
49
|
+
opts = opts.reverse_merge bulk: nil
|
50
|
+
@feed_handler =
|
51
|
+
if opts[:bulk]
|
52
|
+
BulkFeedHandler.new opts[:bulk], args, &block
|
53
|
+
else
|
54
|
+
FeedHandler.new args, &block
|
55
|
+
end
|
56
|
+
end
|
57
|
+
@feed_handler
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'source'
|
2
|
+
|
3
|
+
module GtfsReader
|
4
|
+
module Config
|
5
|
+
class Sources < ::BasicObject
|
6
|
+
def initialize
|
7
|
+
@sources = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def each(&block)
|
11
|
+
@sources.each &block
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](key)
|
15
|
+
@sources[key]
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(name, *args, &block)
|
19
|
+
(@sources[name] ||= Source.new name).tap do |src|
|
20
|
+
src.instance_exec src, &block if ::Kernel.block_given?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
class Configuration
|
3
|
+
|
4
|
+
# Creates simple configuration parameters which may be set by the user
|
5
|
+
#@param names [Array<Symbol>] the names of the parameters to create
|
6
|
+
def parameter(*names)
|
7
|
+
names.each do |name|
|
8
|
+
define_singleton_method name do |*values|
|
9
|
+
if value = values.first
|
10
|
+
instance_variable_set "@#{name}", value
|
11
|
+
else
|
12
|
+
instance_variable_get "@#{name}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def block_parameter(name, obj_class, *init_args)
|
19
|
+
obj = nil
|
20
|
+
define_singleton_method name do |*args, &block|
|
21
|
+
obj ||= obj_class.new *init_args
|
22
|
+
obj.instance_exec( obj, *args, &block ) if block
|
23
|
+
obj
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative 'configuration'
|
2
|
+
require_relative 'config/feed_definition'
|
3
|
+
require_relative 'config/sources'
|
4
|
+
require_relative 'source_updater'
|
5
|
+
|
6
|
+
module GtfsReader
|
7
|
+
extend self
|
8
|
+
|
9
|
+
#@override config(*args, &blk)
|
10
|
+
# @param args [Array] an array or arguments to pass to the given block
|
11
|
+
# @param blk [Proc] a block to to call in the context of the configuration
|
12
|
+
# object. Subsequent calls will use the same configuration for additional
|
13
|
+
# modification.
|
14
|
+
# @return [Configuration] the configuration object
|
15
|
+
#
|
16
|
+
#@override config
|
17
|
+
# @return [Configuration] the configuration object
|
18
|
+
def config(*args, &blk)
|
19
|
+
@cfg ||= create_config
|
20
|
+
if block_given?
|
21
|
+
@cfg.instance_exec *args.unshift(@cfg), &blk
|
22
|
+
elsif args.any?
|
23
|
+
raise ArgumentError, 'arguments given without a block'
|
24
|
+
end
|
25
|
+
@cfg
|
26
|
+
end
|
27
|
+
|
28
|
+
def update_all!
|
29
|
+
config.sources.each {|name, _| update name }
|
30
|
+
end
|
31
|
+
|
32
|
+
def update(name)
|
33
|
+
if config.verbose
|
34
|
+
update_verbosely name
|
35
|
+
else
|
36
|
+
Log.quiet { update_verbosely name }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def update_verbosely(name)
|
43
|
+
source = config.sources[name]
|
44
|
+
raise UnknownSourceError, "No source named '#{name}'" if source.nil?
|
45
|
+
updater = SourceUpdater.new name, source
|
46
|
+
begin
|
47
|
+
updater.instance_exec do
|
48
|
+
Log.info { "Updating #{name.to_s.green}".underline }
|
49
|
+
before_callbacks
|
50
|
+
read
|
51
|
+
check_files
|
52
|
+
check_columns
|
53
|
+
process_files
|
54
|
+
end
|
55
|
+
rescue SkipSourceError => e
|
56
|
+
Log.warn do
|
57
|
+
msg = e.message ? ": #{e.message}" : ''
|
58
|
+
"#{'Skipping'.red} #{source.name.to_s.yellow}" + msg
|
59
|
+
end
|
60
|
+
ensure
|
61
|
+
updater.finish
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_config
|
66
|
+
Configuration.new.tap do |cfg|
|
67
|
+
cfg.instance_exec do
|
68
|
+
parameter :verbose
|
69
|
+
parameter :skip_parsing
|
70
|
+
parameter :return_hashes
|
71
|
+
block_parameter :sources, Config::Sources
|
72
|
+
block_parameter :feed_definition, Config::FeedDefinition
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
|
3
|
+
class FileReaderError < StandardError; end
|
4
|
+
|
5
|
+
class RequiredColumnsMissing < FileReaderError
|
6
|
+
attr_reader :columns
|
7
|
+
def initialize(columns)
|
8
|
+
@columns = columns
|
9
|
+
super "Required columns missing: #{columns.join ', '}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class RequiredFilenamesMissing < FileReaderError
|
14
|
+
attr_reader :filenames
|
15
|
+
def initialize(filenames)
|
16
|
+
@filenames = filenames
|
17
|
+
super "Required files missing from zip file: #{filenames.join ', '}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class UnknownSourceError < StandardError; end
|
22
|
+
class SkipSourceError < StandardError; end
|
23
|
+
class HandlerMissingError < StandardError; end
|
24
|
+
|
25
|
+
module Config
|
26
|
+
class SourceDefinitionError < StandardError; end
|
27
|
+
class FileDefinitionError < StandardError; end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module GtfsReader
|
2
|
+
class FeedHandler
|
3
|
+
def initialize(args=[], &block)
|
4
|
+
@read_callbacks = {}
|
5
|
+
FeedHandlerDsl.new(self).instance_exec *args, &block
|
6
|
+
end
|
7
|
+
|
8
|
+
def handler?(filename)
|
9
|
+
@read_callbacks.key? filename
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle_file(filename, enumerator)
|
13
|
+
enumerator.each &@read_callbacks[filename]
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_read_handler(filename, *args, &block)
|
17
|
+
@read_callbacks[filename] = block
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class FeedHandlerDsl
|
22
|
+
def initialize(feed_handler)
|
23
|
+
@feed_handler = feed_handler
|
24
|
+
end
|
25
|
+
|
26
|
+
def method_missing(filename, *args, &block)
|
27
|
+
@feed_handler.create_read_handler filename, *args, &block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'csv'
|
3
|
+
require_relative 'file_row'
|
4
|
+
|
5
|
+
module GtfsReader
|
6
|
+
CSV_OPTIONS = { headers: :first_row,
|
7
|
+
return_headers: true,
|
8
|
+
header_converters: :symbol }
|
9
|
+
|
10
|
+
# Iterates over the rows in a single file using a provided definition.
|
11
|
+
#@see #each
|
12
|
+
class FileReader
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
attr_reader :definition, :columns, :col_names
|
16
|
+
|
17
|
+
#@param data [IO,String] CSV data
|
18
|
+
#@param definition [FileDefinition] describes the expected columns in this
|
19
|
+
# file
|
20
|
+
def initialize(data, definition, opts={})
|
21
|
+
opts = { parse: true, validate: false, hash: true }.merge opts
|
22
|
+
|
23
|
+
@csv = CSV.new data, CSV_OPTIONS
|
24
|
+
@definition, @do_parse, @return_hash =
|
25
|
+
definition, opts[:parse], opts[:hash]
|
26
|
+
@index = 0
|
27
|
+
@csv_headers = @csv.shift.headers
|
28
|
+
@columns = find_columns opts[:validate]
|
29
|
+
end
|
30
|
+
|
31
|
+
def filename
|
32
|
+
@definition.filename
|
33
|
+
end
|
34
|
+
|
35
|
+
#@overload each(&blk)
|
36
|
+
# @yieldparam hash [Hash] a hash of columns to their values in this row
|
37
|
+
#@overload each
|
38
|
+
# @return [Enumerator] an {::Enumerator} that iterates of the rows in the
|
39
|
+
# file
|
40
|
+
#@see FileRow#to_hash
|
41
|
+
def each
|
42
|
+
return to_enum :each unless block_given?
|
43
|
+
|
44
|
+
while row = shift
|
45
|
+
yield(@return_hash ? row.to_hash : row.to_a)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#@return [FileRow,nil] the next row from the file, or +nil+ if the end of
|
50
|
+
# the file has been reached.
|
51
|
+
def shift
|
52
|
+
if row = @csv.shift
|
53
|
+
file_row(row).tap { @index += 1 }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def file_row(row)
|
60
|
+
FileRow.new @index, @col_names, row, @definition, @do_parse
|
61
|
+
end
|
62
|
+
|
63
|
+
# Check the list of headers in the file against the expected columns in
|
64
|
+
# the definition
|
65
|
+
def find_columns(validate)
|
66
|
+
@found_columns = []
|
67
|
+
prefix = "#{filename.yellow}:"
|
68
|
+
|
69
|
+
required = @definition.required_columns
|
70
|
+
unless required.empty?
|
71
|
+
Log.info { "#{prefix} #{'required columns'.magenta}" } if validate
|
72
|
+
|
73
|
+
missing = check_columns validate, prefix, required, :green, :red
|
74
|
+
raise RequiredColumnsMissing, missing if validate && missing.present?
|
75
|
+
end
|
76
|
+
|
77
|
+
optional = @definition.optional_columns
|
78
|
+
unless optional.empty?
|
79
|
+
Log.info { "#{prefix} #{'optional columns'.cyan}" } if validate
|
80
|
+
check_columns validate, prefix, optional, :cyan, :light_yellow
|
81
|
+
end
|
82
|
+
|
83
|
+
cols = @definition.columns.collect( &:name )
|
84
|
+
headers = @csv_headers.select {|h| cols.include? h }
|
85
|
+
|
86
|
+
@col_names ||= @found_columns.map &:name
|
87
|
+
::Hash[ *headers.inject([]) {|list,c| list << c << @definition[c] } ]
|
88
|
+
end
|
89
|
+
|
90
|
+
def check_columns(validate, prefix, expected, found_color, missing_color)
|
91
|
+
check = '✔'.colorize found_color
|
92
|
+
cross = '✘'.colorize missing_color
|
93
|
+
|
94
|
+
expected.map do |col|
|
95
|
+
name = col.name
|
96
|
+
missing =
|
97
|
+
if @csv_headers.include? name
|
98
|
+
@found_columns << col
|
99
|
+
nil
|
100
|
+
else
|
101
|
+
name
|
102
|
+
end
|
103
|
+
|
104
|
+
if validate
|
105
|
+
Log.info do
|
106
|
+
mark = missing ? cross : check
|
107
|
+
"#{prefix} #{name.to_s.rjust column_width} [#{mark}]"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
missing
|
111
|
+
end.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
def column_width
|
115
|
+
@column_width ||= @definition.columns.collect( &:name ).max do |a, b|
|
116
|
+
a.length <=> b.length
|
117
|
+
end.length
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module GtfsReader
|
4
|
+
# Contains the contents of a single row read in from the file
|
5
|
+
class FileRow
|
6
|
+
attr_reader :line_number
|
7
|
+
|
8
|
+
#@param line_number [Integer] the line number from the source file
|
9
|
+
#@return [Array<Symbol>]
|
10
|
+
#@param data [CSV::Row] the data for this row
|
11
|
+
#@param definition [FileDefinition] the definition of the columns that the
|
12
|
+
# data in this row represent
|
13
|
+
def initialize(line_number, headers, data, definition, do_parse)
|
14
|
+
@line_number, @headers, @data, @definition, @do_parse =
|
15
|
+
line_number, headers, data, definition, do_parse
|
16
|
+
@parsed = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
#@return [Array<Symbol>]
|
20
|
+
def headers
|
21
|
+
@headers
|
22
|
+
end
|
23
|
+
|
24
|
+
#@param column [Symbol] the name of the column to fetch
|
25
|
+
#@return the parsed data for the column at this row
|
26
|
+
#@see #raw
|
27
|
+
def [](column)
|
28
|
+
return raw(column) unless @do_parse
|
29
|
+
|
30
|
+
@parsed[column] ||= begin
|
31
|
+
ParserContext.new(column, self).
|
32
|
+
instance_exec raw(column), &@definition[column].parser
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
#@param (see #[])
|
37
|
+
#@return the data unparsed data from the column at this row
|
38
|
+
def raw(column)
|
39
|
+
@data[column]
|
40
|
+
end
|
41
|
+
|
42
|
+
#@return [Hash] a hash representing this row of data, where each key is the
|
43
|
+
# column name and each value is the parsed data for this row
|
44
|
+
def to_hash
|
45
|
+
::Hash[ *headers.inject([]) {|list,h| list << h << self[h] } ]
|
46
|
+
end
|
47
|
+
|
48
|
+
#@return [Array] an array representing this row of data
|
49
|
+
def to_a
|
50
|
+
headers.map {|h| self[h] }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class ParserContext
|
55
|
+
def initialize(column, file_row)
|
56
|
+
@column, @file_row = column, file_row
|
57
|
+
end
|
58
|
+
|
59
|
+
def method_missing(column)
|
60
|
+
if column == @column
|
61
|
+
raise "Parser for '#{column}' cannot refer to itself"
|
62
|
+
end
|
63
|
+
@file_row[column] or super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|