gtfs_reader 1.2.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -15
- data/Rakefile +6 -9
- data/lib/gtfs_reader/bulk_feed_handler.rb +28 -24
- data/lib/gtfs_reader/config/column.rb +16 -16
- data/lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb +50 -46
- data/lib/gtfs_reader/config/feed_definition.rb +19 -19
- data/lib/gtfs_reader/config/file_definition.rb +34 -33
- data/lib/gtfs_reader/config/source.rb +17 -15
- data/lib/gtfs_reader/config/sources.rb +7 -3
- data/lib/gtfs_reader/configuration.rb +8 -9
- data/lib/gtfs_reader/core.rb +14 -15
- data/lib/gtfs_reader/exceptions.rb +0 -1
- data/lib/gtfs_reader/feed_handler.rb +12 -8
- data/lib/gtfs_reader/file_reader.rb +34 -36
- data/lib/gtfs_reader/file_row.rb +40 -32
- data/lib/gtfs_reader/log.rb +37 -23
- data/lib/gtfs_reader/source_updater.rb +40 -43
- data/lib/gtfs_reader/version.rb +29 -28
- metadata +39 -39
data/lib/gtfs_reader/file_row.rb
CHANGED
@@ -3,64 +3,72 @@ require 'csv'
|
|
3
3
|
module GtfsReader
|
4
4
|
# Contains the contents of a single row read in from the file
|
5
5
|
class FileRow
|
6
|
-
attr_reader :line_number
|
6
|
+
attr_reader :line_number, :headers
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
8
|
+
# @param line_number [Integer] the line number from the source file
|
9
|
+
# @return [Array<Symbol>]
|
10
|
+
# @param data [CSV::Row] the data for this row
|
11
|
+
# @param definition [FileDefinition] the definition of the columns that the
|
12
|
+
# data in this row represent
|
13
13
|
def initialize(line_number, headers, data, definition, do_parse)
|
14
|
-
@line_number
|
15
|
-
|
14
|
+
@line_number = line_number
|
15
|
+
@headers = headers
|
16
|
+
@data = data
|
17
|
+
@definition = definition
|
18
|
+
@do_parse = do_parse
|
16
19
|
@parsed = {}
|
17
20
|
end
|
18
21
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
#@param column [Symbol] the name of the column to fetch
|
25
|
-
#@return the parsed data for the column at this row
|
26
|
-
#@see #raw
|
22
|
+
# @param column [Symbol] the name of the column to fetch
|
23
|
+
# @return the parsed data for the column at this row
|
24
|
+
# @see #raw
|
27
25
|
def [](column)
|
28
26
|
return raw(column) unless @do_parse
|
29
27
|
|
30
|
-
@parsed[column] ||=
|
31
|
-
ParserContext.new(column, self)
|
32
|
-
|
33
|
-
|
28
|
+
@parsed[column] ||=
|
29
|
+
ParserContext.new(column, self)
|
30
|
+
.instance_exec(raw(column), &@definition[column].parser)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Boolean] if this row has the given column
|
34
|
+
def col?(col)
|
35
|
+
@headers.include?(col)
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
# @param (see #[])
|
39
|
+
# @return the data unparsed data from the column at this row
|
38
40
|
def raw(column)
|
39
41
|
@data[column]
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
#
|
44
|
+
# @return [Hash] a hash representing this row of data, where each key is the
|
45
|
+
# column name and each value is the parsed data for this row
|
44
46
|
def to_hash
|
45
|
-
::Hash[
|
47
|
+
::Hash[
|
48
|
+
*headers.inject([]) { |list, h| list << h << self[h] }
|
49
|
+
]
|
46
50
|
end
|
47
51
|
|
48
|
-
|
52
|
+
# @return [Array] an array representing this row of data
|
49
53
|
def to_a
|
50
|
-
headers.map {|h| self[h] }
|
54
|
+
headers.map { |h| self[h] }
|
51
55
|
end
|
52
56
|
end
|
53
57
|
|
54
58
|
class ParserContext
|
55
59
|
def initialize(column, file_row)
|
56
|
-
@column
|
60
|
+
@column = column
|
61
|
+
@file_row = file_row
|
57
62
|
end
|
58
63
|
|
59
64
|
def method_missing(column)
|
60
|
-
if column == @column
|
61
|
-
|
62
|
-
|
63
|
-
|
65
|
+
raise "Parser for '#{column}' cannot refer to itself" if column == @column
|
66
|
+
|
67
|
+
@file_row.col?(column) ? @file_row[column] : super
|
68
|
+
end
|
69
|
+
|
70
|
+
def respond_to_missing?(_name, _include_private = false)
|
71
|
+
true
|
64
72
|
end
|
65
73
|
end
|
66
74
|
end
|
data/lib/gtfs_reader/log.rb
CHANGED
@@ -1,18 +1,31 @@
|
|
1
|
-
require '
|
2
|
-
require 'log4r/formatter/patternformatter'
|
1
|
+
require 'logger'
|
3
2
|
require 'colorize'
|
4
3
|
|
5
4
|
module GtfsReader
|
6
5
|
module Log
|
7
6
|
class << self
|
8
|
-
def debug(*args, &block)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def
|
7
|
+
def debug(*args, &block)
|
8
|
+
log(:debug, *args, &block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def info(*args, &block)
|
12
|
+
log(:info, *args, &block)
|
13
|
+
end
|
14
|
+
|
15
|
+
def warn(*args, &block)
|
16
|
+
log(:warn, *args, &block)
|
17
|
+
end
|
18
|
+
|
19
|
+
def error(*args, &block)
|
20
|
+
log(:error, *args, &block)
|
21
|
+
end
|
22
|
+
|
23
|
+
def fatal(*args, &block)
|
24
|
+
log(:fatal, *args, &block)
|
25
|
+
end
|
13
26
|
|
14
27
|
def log(level, *args, &block)
|
15
|
-
logger.send
|
28
|
+
logger.send(level, *args, &block)
|
16
29
|
nil
|
17
30
|
end
|
18
31
|
|
@@ -23,14 +36,14 @@ module GtfsReader
|
|
23
36
|
|
24
37
|
def level=(lev)
|
25
38
|
logger.level =
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
case lev
|
40
|
+
when :debug then Logger::DEBUG
|
41
|
+
when :info then Logger::INFO
|
42
|
+
when :warn then Logger::WARN
|
43
|
+
when :error then Logger::ERROR
|
44
|
+
when :fatal then Logger::FATAL
|
45
|
+
else raise "unknown log level '#{lev}'"
|
46
|
+
end
|
34
47
|
end
|
35
48
|
|
36
49
|
def level
|
@@ -51,20 +64,21 @@ module GtfsReader
|
|
51
64
|
private
|
52
65
|
|
53
66
|
def create_logger
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
log.outputters << out
|
58
|
-
log.level = Log4r::INFO
|
59
|
-
log.debug { 'Starting GtfsReader...'.underline.colorize :yellow }
|
67
|
+
Logger.new($stderr).tap do |log|
|
68
|
+
log.level = Logger::INFO
|
69
|
+
log.debug { 'Starting GtfsReader...'.underline.colorize(:yellow) }
|
60
70
|
end
|
61
71
|
end
|
62
72
|
end
|
63
73
|
|
64
74
|
class NoOpLogger
|
65
|
-
def method_missing(*
|
75
|
+
def method_missing(*_args)
|
66
76
|
nil
|
67
77
|
end
|
78
|
+
|
79
|
+
def respond_to_missing?(_name, _include_private = false)
|
80
|
+
true
|
81
|
+
end
|
68
82
|
end
|
69
83
|
end
|
70
84
|
end
|
@@ -11,45 +11,44 @@ module GtfsReader
|
|
11
11
|
# Downloads remote Feed files, checks that they are valid, and passes each
|
12
12
|
# file in the feed to the handlers in the given [Source].
|
13
13
|
class SourceUpdater
|
14
|
-
|
15
|
-
|
14
|
+
# @param name [String] an arbitrary string describing this source
|
15
|
+
# @param source [Source]
|
16
16
|
def initialize(name, source)
|
17
|
-
@name
|
17
|
+
@name = name
|
18
|
+
@source = source
|
18
19
|
@temp_files = {}
|
19
20
|
end
|
20
21
|
|
21
22
|
# Call the "before" callback set on this source
|
22
23
|
def before_callbacks
|
23
|
-
if @source.before
|
24
|
-
@source.before.call fetch_data_set_identifier
|
25
|
-
end
|
24
|
+
@source.before.call(fetch_data_set_identifier) if @source.before
|
26
25
|
end
|
27
26
|
|
28
27
|
# Download the data from the remote server
|
29
28
|
def download_source
|
30
29
|
Log.debug { " Reading #{@source.url.green}" }
|
31
|
-
zip = Tempfile.new
|
30
|
+
zip = Tempfile.new('gtfs')
|
32
31
|
zip.binmode
|
33
32
|
zip << open(@source.url).read
|
34
33
|
zip.rewind
|
35
34
|
|
36
|
-
extract_to_tempfiles
|
35
|
+
extract_to_tempfiles(zip)
|
37
36
|
|
38
37
|
Log.debug { "Finished reading #{@source.url.green}" }
|
39
|
-
rescue
|
40
|
-
Log.error
|
38
|
+
rescue StandardException => e
|
39
|
+
Log.error(e.message)
|
41
40
|
raise e
|
42
41
|
ensure
|
43
|
-
zip.try
|
42
|
+
zip.try(:close)
|
44
43
|
end
|
45
44
|
|
46
45
|
def close
|
47
|
-
@temp_files.values.each
|
46
|
+
@temp_files.values.each(&:close)
|
48
47
|
end
|
49
48
|
|
50
49
|
# Parse the filenames in the feed and check which required and optional
|
51
50
|
# files are present.
|
52
|
-
|
51
|
+
# @raise [RequiredFilenamesMissing] if the feed is missing a file which is
|
53
52
|
# marked as "required" in the [FeedDefinition]
|
54
53
|
def check_files
|
55
54
|
@found_files = []
|
@@ -57,9 +56,7 @@ module GtfsReader
|
|
57
56
|
check_optional_files
|
58
57
|
# Add feed files of zip to the list of files to be processed
|
59
58
|
@source.feed_definition.files.each do |req|
|
60
|
-
if filenames.include?
|
61
|
-
@found_files << req
|
62
|
-
end
|
59
|
+
@found_files << req if filenames.include?(req.filename)
|
63
60
|
end
|
64
61
|
end
|
65
62
|
|
@@ -67,15 +64,15 @@ module GtfsReader
|
|
67
64
|
def check_columns
|
68
65
|
@found_files.each do |file|
|
69
66
|
@temp_files[file.filename].open do |data|
|
70
|
-
FileReader.new
|
67
|
+
FileReader.new(data, file, validate: true)
|
71
68
|
end
|
72
69
|
end
|
73
70
|
end
|
74
71
|
|
75
72
|
def process_files
|
76
73
|
@found_files.each do |file|
|
77
|
-
if @source.handlers.handler?
|
78
|
-
process_from_temp_file
|
74
|
+
if @source.handlers.handler?(file.name)
|
75
|
+
process_from_temp_file(file)
|
79
76
|
else
|
80
77
|
Log.warn { "Skipping #{file.filename.yellow} (no handler)" }
|
81
78
|
end
|
@@ -86,7 +83,7 @@ module GtfsReader
|
|
86
83
|
|
87
84
|
def extract_to_tempfiles(zip)
|
88
85
|
Zip::File.open(zip).each do |entry|
|
89
|
-
temp = Tempfile.new
|
86
|
+
temp = Tempfile.new("gtfs_file_#{entry.name}")
|
90
87
|
temp.binmode
|
91
88
|
temp << entry.get_input_stream.read
|
92
89
|
temp.close
|
@@ -96,22 +93,22 @@ module GtfsReader
|
|
96
93
|
|
97
94
|
# Check for the given list of expected filenames in the zip file
|
98
95
|
def check_missing_files(expected, found_color, missing_color)
|
99
|
-
check = '✔'.colorize
|
100
|
-
cross = '✘'.colorize
|
96
|
+
check = '✔'.colorize(found_color)
|
97
|
+
cross = '✘'.colorize(missing_color)
|
101
98
|
|
102
99
|
expected.map do |req|
|
103
100
|
filename = req.filename
|
104
|
-
if filenames.include?
|
105
|
-
Log.info { "#{filename.rjust
|
101
|
+
if filenames.include?(filename)
|
102
|
+
Log.info { "#{filename.rjust(filename_width)} [#{check}]" }
|
106
103
|
nil
|
107
104
|
else
|
108
|
-
Log.info { "#{filename.rjust
|
105
|
+
Log.info { "#{filename.rjust(filename_width)} [#{cross}]" }
|
109
106
|
filename
|
110
107
|
end
|
111
108
|
end.compact
|
112
109
|
end
|
113
110
|
|
114
|
-
|
111
|
+
# @return <FixNum> the maximum string-width of the filenames, so they can be
|
115
112
|
# aligned when printed on the console.
|
116
113
|
def filename_width
|
117
114
|
@filename_width ||= @source.feed_definition.files.max do |a, b|
|
@@ -132,22 +129,22 @@ module GtfsReader
|
|
132
129
|
# the same if they happen to have the same size)
|
133
130
|
# - The current date/time (this will always result in a fresh download)
|
134
131
|
def fetch_data_set_identifier
|
135
|
-
if @source.url =~ /\A#{URI::
|
136
|
-
uri = URI
|
132
|
+
if @source.url =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/
|
133
|
+
uri = URI(@source.url)
|
137
134
|
Net::HTTP.start(uri.host) do |http|
|
138
|
-
head_request = http.request_head
|
139
|
-
if head_request.key?
|
135
|
+
head_request = http.request_head(uri.path)
|
136
|
+
if head_request.key?('etag')
|
140
137
|
head_request['etag']
|
141
138
|
else
|
142
|
-
Log.warn
|
143
|
-
fetch_http_fallback_identifier
|
139
|
+
Log.warn("No ETag supplied with: #{uri.path}")
|
140
|
+
fetch_http_fallback_identifier(head_request)
|
144
141
|
end
|
145
142
|
end
|
146
143
|
else # it's not a url, it may be a file => last modified
|
147
144
|
begin
|
148
|
-
File.mtime
|
145
|
+
File.mtime(@source.url)
|
149
146
|
rescue StandardError => e
|
150
|
-
Log.error
|
147
|
+
Log.error(e)
|
151
148
|
raise e
|
152
149
|
end
|
153
150
|
end
|
@@ -156,9 +153,9 @@ module GtfsReader
|
|
156
153
|
# Find a "next best" ID when the HEAD request does not return an "ETag"
|
157
154
|
# header.
|
158
155
|
def fetch_http_fallback_identifier(head_request)
|
159
|
-
if head_request.key?
|
156
|
+
if head_request.key?('last-modified')
|
160
157
|
head_request['last-modified']
|
161
|
-
elsif head_request.key?
|
158
|
+
elsif head_request.key?('content-length')
|
162
159
|
head_request['content-length']
|
163
160
|
else
|
164
161
|
Time.now.to_s
|
@@ -169,27 +166,27 @@ module GtfsReader
|
|
169
166
|
do_parse = !GtfsReader.config.skip_parsing
|
170
167
|
hash = !!GtfsReader.config.return_hashes
|
171
168
|
|
172
|
-
Log.info
|
169
|
+
Log.info("Reading file #{file.filename.cyan}...")
|
173
170
|
begin
|
174
|
-
reader = FileReader.new
|
175
|
-
parse: do_parse, hash: hash
|
176
|
-
@source.handlers.handle_file
|
171
|
+
reader = FileReader.new(@temp_files[file.filename], file,
|
172
|
+
parse: do_parse, hash: hash)
|
173
|
+
@source.handlers.handle_file(file.name, reader)
|
177
174
|
end
|
178
175
|
end
|
179
176
|
|
180
|
-
|
177
|
+
# @raise [RequiredFilenamesMissing] if a file is missing a header which is
|
181
178
|
# marked as "required" in the [FeedDefinition]
|
182
179
|
def check_required_files
|
183
180
|
Log.info { 'required files'.magenta }
|
184
181
|
files = @source.feed_definition.required_files
|
185
|
-
missing = check_missing_files
|
182
|
+
missing = check_missing_files(files, :green, :red)
|
186
183
|
raise RequiredFilenamesMissing, missing unless missing.empty?
|
187
184
|
end
|
188
185
|
|
189
186
|
def check_optional_files
|
190
187
|
Log.info { 'optional files'.cyan }
|
191
188
|
files = @source.feed_definition.optional_files
|
192
|
-
check_missing_files
|
189
|
+
check_missing_files(files, :cyan, :light_yellow)
|
193
190
|
end
|
194
191
|
end
|
195
192
|
end
|
data/lib/gtfs_reader/version.rb
CHANGED
@@ -3,27 +3,28 @@ module GtfsReader
|
|
3
3
|
# {Bumper} class which will modify this file to increase the version
|
4
4
|
module Version
|
5
5
|
# The following four lines are generated, so don't mess with them.
|
6
|
-
MAJOR =
|
7
|
-
MINOR =
|
6
|
+
MAJOR = 3
|
7
|
+
MINOR = 1
|
8
8
|
PATCH = 0
|
9
9
|
BUILD = nil
|
10
10
|
|
11
|
-
|
11
|
+
# @return [String] the current version in the form of +1.2.3.build+
|
12
12
|
def self.to_s
|
13
|
-
[MAJOR, MINOR, PATCH, BUILD].compact.join
|
13
|
+
[MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
14
14
|
end
|
15
15
|
|
16
16
|
# A helper class which bumps the version number stored in this file
|
17
17
|
class Bumper
|
18
|
-
PARTS = %i[major minor patch]
|
19
|
-
PATTERN =
|
18
|
+
PARTS = %i[major minor patch].freeze
|
19
|
+
PATTERN = /(\s+)MAJOR = \d+\s+MINOR = \d+\s+PATCH = \d+\s+BUILD = .+/
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
#
|
24
|
-
def initialize(filename=__FILE__
|
21
|
+
# @param part [String] the part of the version to bump. one of major,
|
22
|
+
# minor, or patch
|
23
|
+
# @param filename [String] the file to edit
|
24
|
+
def initialize(part, filename = __FILE__)
|
25
25
|
raise "#{part} not one of #{PARTS}" unless PARTS.include? part
|
26
|
-
@filename
|
26
|
+
@filename = filename
|
27
|
+
@part = part
|
27
28
|
end
|
28
29
|
|
29
30
|
# Increase the version number and write it to this file
|
@@ -33,42 +34,42 @@ module GtfsReader
|
|
33
34
|
text = '\1' + ["MAJOR = #{parts[:major]}",
|
34
35
|
"MINOR = #{parts[:minor]}",
|
35
36
|
"PATCH = #{parts[:patch]}",
|
36
|
-
"BUILD = #{parts[:build] || 'nil'}"].join(
|
37
|
+
"BUILD = #{parts[:build] || 'nil'}"].join('\1')
|
37
38
|
|
38
|
-
out_data = File.read(
|
39
|
-
#puts out_data
|
40
|
-
File.open(
|
41
|
-
puts "Bumped version to #{
|
39
|
+
out_data = File.read(@filename).gsub(PATTERN, text)
|
40
|
+
# puts out_data
|
41
|
+
File.open(@filename, 'w') { |out| out << out_data }
|
42
|
+
puts "Bumped version to #{self}"
|
42
43
|
end
|
43
44
|
|
44
|
-
|
45
|
+
# @return [String] What the new version string will be.
|
45
46
|
def to_s
|
46
47
|
p = new_version
|
47
|
-
[p[:major], p[:minor], p[:patch], p[:build]].compact.join
|
48
|
+
[p[:major], p[:minor], p[:patch], p[:build]].compact.join('.')
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
51
52
|
|
52
53
|
def new_version
|
53
|
-
@
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
@new_version ||= { major: MAJOR,
|
55
|
+
minor: MINOR,
|
56
|
+
patch: PATCH,
|
57
|
+
build: BUILD }.merge(new_parts)
|
57
58
|
end
|
58
59
|
|
59
60
|
def new_parts
|
60
61
|
case @part
|
61
62
|
when :major then {
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
major: MAJOR + 1,
|
64
|
+
minor: 0,
|
65
|
+
patch: 0
|
65
66
|
}
|
66
67
|
when :minor then {
|
67
|
-
|
68
|
-
|
68
|
+
minor: MINOR + 1,
|
69
|
+
patch: 0
|
69
70
|
}
|
70
71
|
else {
|
71
|
-
|
72
|
+
patch: PATCH + 1
|
72
73
|
}
|
73
74
|
end
|
74
75
|
end
|