gtfs_reader 1.2.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -15
- data/Rakefile +6 -9
- data/lib/gtfs_reader/bulk_feed_handler.rb +28 -24
- data/lib/gtfs_reader/config/column.rb +16 -16
- data/lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb +50 -46
- data/lib/gtfs_reader/config/feed_definition.rb +19 -19
- data/lib/gtfs_reader/config/file_definition.rb +34 -33
- data/lib/gtfs_reader/config/source.rb +17 -15
- data/lib/gtfs_reader/config/sources.rb +7 -3
- data/lib/gtfs_reader/configuration.rb +8 -9
- data/lib/gtfs_reader/core.rb +14 -15
- data/lib/gtfs_reader/exceptions.rb +0 -1
- data/lib/gtfs_reader/feed_handler.rb +12 -8
- data/lib/gtfs_reader/file_reader.rb +34 -36
- data/lib/gtfs_reader/file_row.rb +40 -32
- data/lib/gtfs_reader/log.rb +37 -23
- data/lib/gtfs_reader/source_updater.rb +40 -43
- data/lib/gtfs_reader/version.rb +29 -28
- metadata +39 -39
@@ -9,46 +9,46 @@ module GtfsReader
|
|
9
9
|
@file_definition = {}
|
10
10
|
end
|
11
11
|
|
12
|
-
|
12
|
+
# @return [Array<FileDefinition>] All of the defined files.
|
13
13
|
def files
|
14
14
|
@file_definition.values
|
15
15
|
end
|
16
16
|
|
17
17
|
def required_files
|
18
|
-
files.select
|
18
|
+
files.select(&:required?)
|
19
19
|
end
|
20
20
|
|
21
21
|
def optional_files
|
22
|
-
files.reject
|
22
|
+
files.reject(&:required?)
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
#
|
25
|
+
# @overload file(name, *args, &block)
|
26
|
+
# Defines a new file in the feed.
|
27
27
|
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
28
|
+
# @param name [String] the name of this file within the feed. This name
|
29
|
+
# should not include a file extension (like +.txt+)
|
30
|
+
# @param args [Array] the first argument is used as a +Hash+ of options
|
31
|
+
# to create the new file definition
|
32
|
+
# @param block [Proc] this block is +instance_eval+ed on the new
|
33
|
+
# {FileDefinition file}
|
34
|
+
# @return [FileDefinition] the newly created file
|
35
35
|
#
|
36
|
-
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
36
|
+
# @overload file(name)
|
37
|
+
# @param name [String] the name of the file to return
|
38
|
+
# @return [FileDefinition] the previously created file with +name+
|
39
|
+
# @see FileDefinition
|
40
40
|
def file(name, *args, &block)
|
41
41
|
return @file_definition[name] unless block_given?
|
42
42
|
|
43
|
-
definition_for!(
|
44
|
-
|
43
|
+
definition_for!(name, args.first).tap do |definition|
|
44
|
+
definition.instance_exec(&block) if block
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
48
|
private
|
49
49
|
|
50
50
|
def definition_for!(name, opts)
|
51
|
-
@file_definition[name] ||= FileDefinition.new(
|
51
|
+
@file_definition[name] ||= FileDefinition.new(name, opts)
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
@@ -6,25 +6,26 @@ module GtfsReader
|
|
6
6
|
class FileDefinition
|
7
7
|
attr_reader :name
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
#
|
12
|
-
def initialize(name, opts={})
|
13
|
-
@name
|
14
|
-
@
|
9
|
+
# @param name [String] The name of the file within the feed.
|
10
|
+
# @option opts [Boolean] :required (false)
|
11
|
+
# If this file is required to be in the feed.
|
12
|
+
def initialize(name, opts = {})
|
13
|
+
@name = name
|
14
|
+
@columns = {}
|
15
|
+
@opts = { required: false }.merge(opts || {})
|
15
16
|
end
|
16
17
|
|
17
|
-
|
18
|
+
# @return [Boolean] If this file is required to be in the feed.
|
18
19
|
def required?
|
19
20
|
@opts[:required]
|
20
21
|
end
|
21
22
|
|
22
|
-
|
23
|
+
# @return [String] The filename of this file within the GTFS feed.
|
23
24
|
def filename
|
24
25
|
"#{name}.txt"
|
25
26
|
end
|
26
27
|
|
27
|
-
|
28
|
+
# @return [Column] The column with the given name
|
28
29
|
def [](name)
|
29
30
|
@columns[name]
|
30
31
|
end
|
@@ -33,39 +34,39 @@ module GtfsReader
|
|
33
34
|
@columns.values
|
34
35
|
end
|
35
36
|
|
36
|
-
|
37
|
+
# @return [Array<Column>] The columns required to appear in this file.
|
37
38
|
def required_columns
|
38
|
-
columns.select
|
39
|
+
columns.select(&:required?)
|
39
40
|
end
|
40
41
|
|
41
|
-
|
42
|
+
# @return [Array<Column>] The columns not required to appear in this file.
|
42
43
|
def optional_columns
|
43
|
-
columns.reject
|
44
|
+
columns.reject(&:required?)
|
44
45
|
end
|
45
46
|
|
46
|
-
|
47
|
-
#
|
47
|
+
# @return [Array<Column>] The columns which cannot have two rows with the
|
48
|
+
# same value.
|
48
49
|
def unique_columns
|
49
|
-
columns.select
|
50
|
+
columns.select(&:unique?)
|
50
51
|
end
|
51
52
|
|
52
53
|
# Creates a column with the given name.
|
53
54
|
#
|
54
|
-
|
55
|
-
|
56
|
-
#
|
57
|
-
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
55
|
+
# @param name [String] The name of the column to define.
|
56
|
+
# @param args [Array] The first element of this args list is used as a
|
57
|
+
# +Hash+ of options to create the new column with.
|
58
|
+
# @param block [Proc] An optional block used to parse the values of this
|
59
|
+
# column on each row.
|
60
|
+
# @yieldparam input [String] The value of this column for a particular row.
|
61
|
+
# @yieldreturn Any kind of object.
|
62
|
+
# @return [Column] The newly created column.
|
62
63
|
def col(name, *args, &block)
|
63
64
|
if @columns.key? name
|
64
|
-
@columns[name].parser
|
65
|
+
@columns[name].parser(&block) if block_given?
|
65
66
|
return @columns[name]
|
66
67
|
end
|
67
68
|
|
68
|
-
@columns[name] = Column.new
|
69
|
+
@columns[name] = Column.new(name, args.first, &block)
|
69
70
|
end
|
70
71
|
|
71
72
|
# Creates an input-output proc to convert column values from one form to
|
@@ -75,19 +76,19 @@ module GtfsReader
|
|
75
76
|
# known values. This helper creates such a proc from a given hash and
|
76
77
|
# optional default.
|
77
78
|
#
|
78
|
-
|
79
|
-
# input.
|
80
|
-
#@param reverse_map [Hash] A map of returns values to their input values.
|
79
|
+
# @param reverse_map [Hash] A map of returns values to their input values.
|
81
80
|
# This is in reverse because it looks better, like a list of labels:
|
82
81
|
# +{bus: 3, ferry: 4}+
|
83
|
-
|
82
|
+
# @param default [] The value to return if there is no mapping for a given
|
83
|
+
# input.
|
84
|
+
def output_map(reverse_map, default = nil)
|
84
85
|
if reverse_map.values.uniq.length != reverse_map.values.length
|
85
86
|
raise FileDefinitionError, "Duplicate values given: #{reverse_map}"
|
86
87
|
end
|
87
88
|
|
88
|
-
map = default.nil? ? {} : Hash.new(
|
89
|
-
reverse_map.each { |k,v| map[v] = k }
|
90
|
-
map.method(
|
89
|
+
map = default.nil? ? {} : Hash.new(default)
|
90
|
+
reverse_map.each { |k, v| map[v] = k }
|
91
|
+
map.method(:[]).to_proc
|
91
92
|
end
|
92
93
|
end
|
93
94
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'active_support/core_ext/hash/reverse_merge'
|
2
|
+
require 'active_support/core_ext/object/try'
|
2
3
|
|
3
4
|
require_relative 'feed_definition'
|
4
5
|
require_relative 'defaults/gtfs_feed_definition'
|
@@ -15,35 +16,35 @@ module GtfsReader
|
|
15
16
|
@name = name
|
16
17
|
@feed_definition = Config::Defaults::FEED_DEFINITION
|
17
18
|
@feed_handler = FeedHandler.new {}
|
19
|
+
@url = nil
|
20
|
+
@before = nil
|
18
21
|
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
def title(
|
23
|
-
@title =
|
23
|
+
# @param title [String] if given, will be used as the title of this source
|
24
|
+
# @return [String] the title of this source
|
25
|
+
def title(title = nil)
|
26
|
+
@title = title if title.present?
|
24
27
|
@title
|
25
28
|
end
|
26
29
|
|
27
|
-
|
28
|
-
|
29
|
-
def url(
|
30
|
-
@url =
|
30
|
+
# @param url [String] if given, will be used as the URL for this source
|
31
|
+
# @return [String] the URL this source's ZIP file
|
32
|
+
def url(url = nil)
|
33
|
+
@url = url if url.present?
|
31
34
|
@url
|
32
35
|
end
|
33
36
|
|
34
37
|
# Define a block to call before the source is read. If this block
|
35
38
|
# returns +false+, cancel processing the source
|
36
39
|
def before(&block)
|
37
|
-
if block_given?
|
38
|
-
@before = block
|
39
|
-
end
|
40
|
+
@before = block if block_given?
|
40
41
|
@before
|
41
42
|
end
|
42
43
|
|
43
44
|
def feed_definition(&block)
|
44
45
|
if block_given?
|
45
46
|
@feed_definition = FeedDefinition.new.tap do |feed|
|
46
|
-
feed.instance_exec
|
47
|
+
feed.instance_exec(feed, &block)
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
@@ -52,15 +53,16 @@ module GtfsReader
|
|
52
53
|
|
53
54
|
def handlers(*args, &block)
|
54
55
|
if block_given?
|
55
|
-
opts =
|
56
|
+
opts = args.last.try(:is_a?, Hash) ? args.pop : {}
|
56
57
|
opts = opts.reverse_merge bulk: nil
|
57
58
|
@feed_handler =
|
58
59
|
if opts[:bulk]
|
59
|
-
BulkFeedHandler.new
|
60
|
+
BulkFeedHandler.new(opts[:bulk], args, &block)
|
60
61
|
else
|
61
|
-
FeedHandler.new
|
62
|
+
FeedHandler.new(args, &block)
|
62
63
|
end
|
63
64
|
end
|
65
|
+
|
64
66
|
@feed_handler
|
65
67
|
end
|
66
68
|
end
|
@@ -8,18 +8,22 @@ module GtfsReader
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def each(&block)
|
11
|
-
@sources.each
|
11
|
+
@sources.each(&block)
|
12
12
|
end
|
13
13
|
|
14
14
|
def [](key)
|
15
15
|
@sources[key]
|
16
16
|
end
|
17
17
|
|
18
|
-
def method_missing(name, *
|
18
|
+
def method_missing(name, *_args, &block)
|
19
19
|
(@sources[name] ||= Source.new name).tap do |src|
|
20
|
-
src.instance_exec
|
20
|
+
src.instance_exec(src, &block) if ::Kernel.block_given?
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
24
|
+
def respond_to_missing?(_name, _include_private = false)
|
25
|
+
true
|
26
|
+
end
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module GtfsReader
|
2
2
|
class Configuration
|
3
|
-
|
4
3
|
# Creates simple configuration parameters which may be set by the user
|
5
|
-
|
4
|
+
# @param names [Array<Symbol>] the names of the parameters to create
|
6
5
|
def parameter(*names)
|
7
6
|
names.each do |name|
|
8
|
-
define_singleton_method
|
9
|
-
if value = values.first
|
10
|
-
instance_variable_set
|
7
|
+
define_singleton_method(name) do |*values|
|
8
|
+
if (value = values.first)
|
9
|
+
instance_variable_set("@#{name}", value)
|
11
10
|
else
|
12
|
-
instance_variable_get
|
11
|
+
instance_variable_get("@#{name}")
|
13
12
|
end
|
14
13
|
end
|
15
14
|
end
|
@@ -17,9 +16,9 @@ module GtfsReader
|
|
17
16
|
|
18
17
|
def block_parameter(name, obj_class, *init_args)
|
19
18
|
obj = nil
|
20
|
-
define_singleton_method
|
21
|
-
obj ||= obj_class.new
|
22
|
-
obj.instance_exec(
|
19
|
+
define_singleton_method(name) do |*args, &block|
|
20
|
+
obj ||= obj_class.new(*init_args)
|
21
|
+
obj.instance_exec(obj, *args, &block) if block
|
23
22
|
obj
|
24
23
|
end
|
25
24
|
end
|
data/lib/gtfs_reader/core.rb
CHANGED
@@ -4,21 +4,22 @@ require_relative 'config/sources'
|
|
4
4
|
require_relative 'source_updater'
|
5
5
|
|
6
6
|
module GtfsReader
|
7
|
-
|
7
|
+
module_function
|
8
8
|
|
9
|
-
|
9
|
+
# @override config(*args, &blk)
|
10
10
|
# @param args [Array] an array or arguments to pass to the given block
|
11
11
|
# @param blk [Proc] a block to to call in the context of the configuration
|
12
12
|
# object. Subsequent calls will use the same configuration for additional
|
13
13
|
# modification.
|
14
14
|
# @return [Configuration] the configuration object
|
15
15
|
#
|
16
|
-
|
16
|
+
# @override config
|
17
17
|
# @return [Configuration] the configuration object
|
18
18
|
def config(*args, &blk)
|
19
19
|
@cfg ||= create_config
|
20
|
+
|
20
21
|
if block_given?
|
21
|
-
@cfg.instance_exec
|
22
|
+
@cfg.instance_exec(*args.unshift(@cfg), &blk)
|
22
23
|
elsif args.any?
|
23
24
|
raise ArgumentError, 'arguments given without a block'
|
24
25
|
end
|
@@ -26,24 +27,22 @@ module GtfsReader
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def update_all!
|
29
|
-
config.sources.each {|name, _| update
|
30
|
+
config.sources.each { |name, _| update(name) }
|
30
31
|
end
|
31
32
|
|
32
33
|
def update(name)
|
33
34
|
if config.verbose
|
34
|
-
update_verbosely
|
35
|
+
update_verbosely(name)
|
35
36
|
else
|
36
|
-
Log.quiet { update_verbosely
|
37
|
+
Log.quiet { update_verbosely(name) }
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
|
-
private
|
41
|
-
|
42
41
|
def update_verbosely(name)
|
43
42
|
source = config.sources[name]
|
44
43
|
raise UnknownSourceError, "No source named '#{name}'" if source.nil?
|
45
44
|
|
46
|
-
updater = SourceUpdater.new
|
45
|
+
updater = SourceUpdater.new(name, source)
|
47
46
|
begin
|
48
47
|
updater.instance_exec do
|
49
48
|
Log.info { "Updating #{name.to_s.green}".underline }
|
@@ -67,11 +66,11 @@ module GtfsReader
|
|
67
66
|
def create_config
|
68
67
|
Configuration.new.tap do |cfg|
|
69
68
|
cfg.instance_exec do
|
70
|
-
parameter
|
71
|
-
parameter
|
72
|
-
parameter
|
73
|
-
block_parameter
|
74
|
-
block_parameter
|
69
|
+
parameter(:verbose)
|
70
|
+
parameter(:skip_parsing)
|
71
|
+
parameter(:return_hashes)
|
72
|
+
block_parameter(:sources, Config::Sources)
|
73
|
+
block_parameter(:feed_definition, Config::FeedDefinition)
|
75
74
|
end
|
76
75
|
end
|
77
76
|
end
|
@@ -1,22 +1,22 @@
|
|
1
1
|
module GtfsReader
|
2
2
|
# This handler returns each row individually as it is read in from the source.
|
3
3
|
class FeedHandler
|
4
|
-
def initialize(args=[], &block)
|
4
|
+
def initialize(args = [], &block)
|
5
5
|
@read_callbacks = {}
|
6
|
-
FeedHandlerDsl.new(self).instance_exec
|
6
|
+
FeedHandlerDsl.new(self).instance_exec(*args, &block)
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
# @param filename [String] the name of the file to handle
|
10
|
+
# @return [Boolean] if this handler can handle the given filename
|
11
11
|
def handler?(filename)
|
12
|
-
@read_callbacks.key?
|
12
|
+
@read_callbacks.key?(filename)
|
13
13
|
end
|
14
14
|
|
15
15
|
def handle_file(filename, enumerator)
|
16
|
-
enumerator.each
|
16
|
+
enumerator.each(&@read_callbacks[filename])
|
17
17
|
end
|
18
18
|
|
19
|
-
def create_read_handler(filename, *
|
19
|
+
def create_read_handler(filename, *_args, &block)
|
20
20
|
@read_callbacks[filename] = block
|
21
21
|
end
|
22
22
|
end
|
@@ -27,7 +27,11 @@ module GtfsReader
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def method_missing(filename, *args, &block)
|
30
|
-
@feed_handler.create_read_handler
|
30
|
+
@feed_handler.create_read_handler(filename, *args, &block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def respond_to_missing?(_name, _include_private = false)
|
34
|
+
true
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
@@ -5,59 +5,59 @@ require_relative 'file_row'
|
|
5
5
|
module GtfsReader
|
6
6
|
CSV_OPTIONS = { headers: :first_row,
|
7
7
|
return_headers: true,
|
8
|
-
header_converters: :symbol }
|
8
|
+
header_converters: :symbol }.freeze
|
9
9
|
|
10
10
|
# Iterates over the rows in a single file using a provided definition.
|
11
|
-
|
11
|
+
# @see #each
|
12
12
|
class FileReader
|
13
13
|
include Enumerable
|
14
14
|
|
15
15
|
attr_reader :definition, :columns, :col_names
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
#
|
20
|
-
def initialize(data, definition, opts={})
|
21
|
-
opts = { parse: true, validate: false, hash: true }.merge
|
17
|
+
# @param data [IO,String] CSV data
|
18
|
+
# @param definition [FileDefinition] describes the expected columns in this
|
19
|
+
# file
|
20
|
+
def initialize(data, definition, opts = {})
|
21
|
+
opts = { parse: true, validate: false, hash: true }.merge(opts)
|
22
22
|
|
23
|
-
@csv = CSV.new
|
24
|
-
@definition
|
25
|
-
|
23
|
+
@csv = CSV.new(data, CSV_OPTIONS)
|
24
|
+
@definition = definition
|
25
|
+
@do_parse = opts[:parse]
|
26
|
+
@return_hash = opts[:hash]
|
26
27
|
@index = 0
|
27
28
|
@csv_headers = @csv.shift.headers
|
28
|
-
@columns = find_columns
|
29
|
+
@columns = find_columns(opts[:validate])
|
29
30
|
end
|
30
31
|
|
31
32
|
def filename
|
32
33
|
@definition.filename
|
33
34
|
end
|
34
35
|
|
35
|
-
|
36
|
-
#
|
37
|
-
|
38
|
-
#
|
39
|
-
#
|
40
|
-
|
36
|
+
# @overload each(&blk)
|
37
|
+
# @yieldparam hash [Hash] a hash of columns to their values in this row
|
38
|
+
# @overload each
|
39
|
+
# @return [Enumerator] an {::Enumerator} that iterates of the rows in the
|
40
|
+
# file
|
41
|
+
# @see FileRow#to_hash
|
41
42
|
def each
|
42
|
-
return to_enum
|
43
|
+
return to_enum(:each) unless block_given?
|
43
44
|
|
44
|
-
while row = shift
|
45
|
+
while (row = shift)
|
45
46
|
yield(@return_hash ? row.to_hash : row.to_a)
|
46
47
|
end
|
47
48
|
end
|
48
49
|
|
49
|
-
|
50
|
+
# @return [FileRow,nil] the next row from the file, or +nil+ if the end of
|
50
51
|
# the file has been reached.
|
51
52
|
def shift
|
52
|
-
|
53
|
-
|
54
|
-
end
|
53
|
+
row = @csv.shift
|
54
|
+
file_row(row).tap { @index += 1 } if row
|
55
55
|
end
|
56
56
|
|
57
57
|
private
|
58
58
|
|
59
59
|
def file_row(row)
|
60
|
-
FileRow.new
|
60
|
+
FileRow.new(@index, @col_names, row, @definition, @do_parse)
|
61
61
|
end
|
62
62
|
|
63
63
|
# Check the list of headers in the file against the expected columns in
|
@@ -70,31 +70,31 @@ module GtfsReader
|
|
70
70
|
unless required.empty?
|
71
71
|
Log.info { "#{prefix} #{'required columns'.magenta}" } if validate
|
72
72
|
|
73
|
-
missing = check_columns
|
73
|
+
missing = check_columns(validate, prefix, required, :green, :red)
|
74
74
|
raise RequiredColumnsMissing, missing if validate && missing.present?
|
75
75
|
end
|
76
76
|
|
77
77
|
optional = @definition.optional_columns
|
78
78
|
unless optional.empty?
|
79
79
|
Log.info { "#{prefix} #{'optional columns'.cyan}" } if validate
|
80
|
-
check_columns
|
80
|
+
check_columns(validate, prefix, optional, :cyan, :light_yellow)
|
81
81
|
end
|
82
82
|
|
83
|
-
cols = @definition.columns.collect(
|
84
|
-
headers = @csv_headers.select {|h| cols.include?
|
83
|
+
cols = @definition.columns.collect(&:name)
|
84
|
+
headers = @csv_headers.select { |h| cols.include?(h) }
|
85
85
|
|
86
|
-
@col_names ||= @found_columns.map
|
87
|
-
::Hash[
|
86
|
+
@col_names ||= @found_columns.map(&:name)
|
87
|
+
::Hash[*headers.inject([]) { |list, c| list << c << @definition[c] }]
|
88
88
|
end
|
89
89
|
|
90
90
|
def check_columns(validate, prefix, expected, found_color, missing_color)
|
91
|
-
check = '✔'.colorize
|
92
|
-
cross = '✘'.colorize
|
91
|
+
check = '✔'.colorize(found_color)
|
92
|
+
cross = '✘'.colorize(missing_color)
|
93
93
|
|
94
94
|
expected.map do |col|
|
95
95
|
name = col.name
|
96
96
|
missing =
|
97
|
-
if @csv_headers.include?
|
97
|
+
if @csv_headers.include?(name)
|
98
98
|
@found_columns << col
|
99
99
|
nil
|
100
100
|
else
|
@@ -112,9 +112,7 @@ module GtfsReader
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def column_width
|
115
|
-
@column_width ||= @definition.columns.collect(
|
116
|
-
a.length <=> b.length
|
117
|
-
end.length
|
115
|
+
@column_width ||= @definition.columns.collect(&:name).max_by(&:length).length
|
118
116
|
end
|
119
117
|
end
|
120
118
|
end
|