gtfs_reader 1.2.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -15
- data/Rakefile +6 -9
- data/lib/gtfs_reader/bulk_feed_handler.rb +28 -24
- data/lib/gtfs_reader/config/column.rb +16 -16
- data/lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb +50 -46
- data/lib/gtfs_reader/config/feed_definition.rb +19 -19
- data/lib/gtfs_reader/config/file_definition.rb +34 -33
- data/lib/gtfs_reader/config/source.rb +17 -15
- data/lib/gtfs_reader/config/sources.rb +7 -3
- data/lib/gtfs_reader/configuration.rb +8 -9
- data/lib/gtfs_reader/core.rb +14 -15
- data/lib/gtfs_reader/exceptions.rb +0 -1
- data/lib/gtfs_reader/feed_handler.rb +12 -8
- data/lib/gtfs_reader/file_reader.rb +34 -36
- data/lib/gtfs_reader/file_row.rb +40 -32
- data/lib/gtfs_reader/log.rb +37 -23
- data/lib/gtfs_reader/source_updater.rb +40 -43
- data/lib/gtfs_reader/version.rb +29 -28
- metadata +39 -39
data/lib/gtfs_reader/file_row.rb
CHANGED
@@ -3,64 +3,72 @@ require 'csv'
|
|
3
3
|
module GtfsReader
|
4
4
|
# Contains the contents of a single row read in from the file
|
5
5
|
class FileRow
|
6
|
-
attr_reader :line_number
|
6
|
+
attr_reader :line_number, :headers
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
8
|
+
# @param line_number [Integer] the line number from the source file
|
9
|
+
# @return [Array<Symbol>]
|
10
|
+
# @param data [CSV::Row] the data for this row
|
11
|
+
# @param definition [FileDefinition] the definition of the columns that the
|
12
|
+
# data in this row represent
|
13
13
|
def initialize(line_number, headers, data, definition, do_parse)
|
14
|
-
@line_number
|
15
|
-
|
14
|
+
@line_number = line_number
|
15
|
+
@headers = headers
|
16
|
+
@data = data
|
17
|
+
@definition = definition
|
18
|
+
@do_parse = do_parse
|
16
19
|
@parsed = {}
|
17
20
|
end
|
18
21
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
#@param column [Symbol] the name of the column to fetch
|
25
|
-
#@return the parsed data for the column at this row
|
26
|
-
#@see #raw
|
22
|
+
# @param column [Symbol] the name of the column to fetch
|
23
|
+
# @return the parsed data for the column at this row
|
24
|
+
# @see #raw
|
27
25
|
def [](column)
|
28
26
|
return raw(column) unless @do_parse
|
29
27
|
|
30
|
-
@parsed[column] ||=
|
31
|
-
ParserContext.new(column, self)
|
32
|
-
|
33
|
-
|
28
|
+
@parsed[column] ||=
|
29
|
+
ParserContext.new(column, self)
|
30
|
+
.instance_exec(raw(column), &@definition[column].parser)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Boolean] if this row has the given column
|
34
|
+
def col?(col)
|
35
|
+
@headers.include?(col)
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
# @param (see #[])
|
39
|
+
# @return the data unparsed data from the column at this row
|
38
40
|
def raw(column)
|
39
41
|
@data[column]
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
#
|
44
|
+
# @return [Hash] a hash representing this row of data, where each key is the
|
45
|
+
# column name and each value is the parsed data for this row
|
44
46
|
def to_hash
|
45
|
-
::Hash[
|
47
|
+
::Hash[
|
48
|
+
*headers.inject([]) { |list, h| list << h << self[h] }
|
49
|
+
]
|
46
50
|
end
|
47
51
|
|
48
|
-
|
52
|
+
# @return [Array] an array representing this row of data
|
49
53
|
def to_a
|
50
|
-
headers.map {|h| self[h] }
|
54
|
+
headers.map { |h| self[h] }
|
51
55
|
end
|
52
56
|
end
|
53
57
|
|
54
58
|
class ParserContext
|
55
59
|
def initialize(column, file_row)
|
56
|
-
@column
|
60
|
+
@column = column
|
61
|
+
@file_row = file_row
|
57
62
|
end
|
58
63
|
|
59
64
|
def method_missing(column)
|
60
|
-
if column == @column
|
61
|
-
|
62
|
-
|
63
|
-
|
65
|
+
raise "Parser for '#{column}' cannot refer to itself" if column == @column
|
66
|
+
|
67
|
+
@file_row.col?(column) ? @file_row[column] : super
|
68
|
+
end
|
69
|
+
|
70
|
+
def respond_to_missing?(_name, _include_private = false)
|
71
|
+
true
|
64
72
|
end
|
65
73
|
end
|
66
74
|
end
|
data/lib/gtfs_reader/log.rb
CHANGED
@@ -1,18 +1,31 @@
|
|
1
|
-
require '
|
2
|
-
require 'log4r/formatter/patternformatter'
|
1
|
+
require 'logger'
|
3
2
|
require 'colorize'
|
4
3
|
|
5
4
|
module GtfsReader
|
6
5
|
module Log
|
7
6
|
class << self
|
8
|
-
def debug(*args, &block)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def
|
7
|
+
def debug(*args, &block)
|
8
|
+
log(:debug, *args, &block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def info(*args, &block)
|
12
|
+
log(:info, *args, &block)
|
13
|
+
end
|
14
|
+
|
15
|
+
def warn(*args, &block)
|
16
|
+
log(:warn, *args, &block)
|
17
|
+
end
|
18
|
+
|
19
|
+
def error(*args, &block)
|
20
|
+
log(:error, *args, &block)
|
21
|
+
end
|
22
|
+
|
23
|
+
def fatal(*args, &block)
|
24
|
+
log(:fatal, *args, &block)
|
25
|
+
end
|
13
26
|
|
14
27
|
def log(level, *args, &block)
|
15
|
-
logger.send
|
28
|
+
logger.send(level, *args, &block)
|
16
29
|
nil
|
17
30
|
end
|
18
31
|
|
@@ -23,14 +36,14 @@ module GtfsReader
|
|
23
36
|
|
24
37
|
def level=(lev)
|
25
38
|
logger.level =
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
case lev
|
40
|
+
when :debug then Logger::DEBUG
|
41
|
+
when :info then Logger::INFO
|
42
|
+
when :warn then Logger::WARN
|
43
|
+
when :error then Logger::ERROR
|
44
|
+
when :fatal then Logger::FATAL
|
45
|
+
else raise "unknown log level '#{lev}'"
|
46
|
+
end
|
34
47
|
end
|
35
48
|
|
36
49
|
def level
|
@@ -51,20 +64,21 @@ module GtfsReader
|
|
51
64
|
private
|
52
65
|
|
53
66
|
def create_logger
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
log.outputters << out
|
58
|
-
log.level = Log4r::INFO
|
59
|
-
log.debug { 'Starting GtfsReader...'.underline.colorize :yellow }
|
67
|
+
Logger.new($stderr).tap do |log|
|
68
|
+
log.level = Logger::INFO
|
69
|
+
log.debug { 'Starting GtfsReader...'.underline.colorize(:yellow) }
|
60
70
|
end
|
61
71
|
end
|
62
72
|
end
|
63
73
|
|
64
74
|
class NoOpLogger
|
65
|
-
def method_missing(*
|
75
|
+
def method_missing(*_args)
|
66
76
|
nil
|
67
77
|
end
|
78
|
+
|
79
|
+
def respond_to_missing?(_name, _include_private = false)
|
80
|
+
true
|
81
|
+
end
|
68
82
|
end
|
69
83
|
end
|
70
84
|
end
|
@@ -11,45 +11,44 @@ module GtfsReader
|
|
11
11
|
# Downloads remote Feed files, checks that they are valid, and passes each
|
12
12
|
# file in the feed to the handlers in the given [Source].
|
13
13
|
class SourceUpdater
|
14
|
-
|
15
|
-
|
14
|
+
# @param name [String] an arbitrary string describing this source
|
15
|
+
# @param source [Source]
|
16
16
|
def initialize(name, source)
|
17
|
-
@name
|
17
|
+
@name = name
|
18
|
+
@source = source
|
18
19
|
@temp_files = {}
|
19
20
|
end
|
20
21
|
|
21
22
|
# Call the "before" callback set on this source
|
22
23
|
def before_callbacks
|
23
|
-
if @source.before
|
24
|
-
@source.before.call fetch_data_set_identifier
|
25
|
-
end
|
24
|
+
@source.before.call(fetch_data_set_identifier) if @source.before
|
26
25
|
end
|
27
26
|
|
28
27
|
# Download the data from the remote server
|
29
28
|
def download_source
|
30
29
|
Log.debug { " Reading #{@source.url.green}" }
|
31
|
-
zip = Tempfile.new
|
30
|
+
zip = Tempfile.new('gtfs')
|
32
31
|
zip.binmode
|
33
32
|
zip << open(@source.url).read
|
34
33
|
zip.rewind
|
35
34
|
|
36
|
-
extract_to_tempfiles
|
35
|
+
extract_to_tempfiles(zip)
|
37
36
|
|
38
37
|
Log.debug { "Finished reading #{@source.url.green}" }
|
39
|
-
rescue
|
40
|
-
Log.error
|
38
|
+
rescue StandardException => e
|
39
|
+
Log.error(e.message)
|
41
40
|
raise e
|
42
41
|
ensure
|
43
|
-
zip.try
|
42
|
+
zip.try(:close)
|
44
43
|
end
|
45
44
|
|
46
45
|
def close
|
47
|
-
@temp_files.values.each
|
46
|
+
@temp_files.values.each(&:close)
|
48
47
|
end
|
49
48
|
|
50
49
|
# Parse the filenames in the feed and check which required and optional
|
51
50
|
# files are present.
|
52
|
-
|
51
|
+
# @raise [RequiredFilenamesMissing] if the feed is missing a file which is
|
53
52
|
# marked as "required" in the [FeedDefinition]
|
54
53
|
def check_files
|
55
54
|
@found_files = []
|
@@ -57,9 +56,7 @@ module GtfsReader
|
|
57
56
|
check_optional_files
|
58
57
|
# Add feed files of zip to the list of files to be processed
|
59
58
|
@source.feed_definition.files.each do |req|
|
60
|
-
if filenames.include?
|
61
|
-
@found_files << req
|
62
|
-
end
|
59
|
+
@found_files << req if filenames.include?(req.filename)
|
63
60
|
end
|
64
61
|
end
|
65
62
|
|
@@ -67,15 +64,15 @@ module GtfsReader
|
|
67
64
|
def check_columns
|
68
65
|
@found_files.each do |file|
|
69
66
|
@temp_files[file.filename].open do |data|
|
70
|
-
FileReader.new
|
67
|
+
FileReader.new(data, file, validate: true)
|
71
68
|
end
|
72
69
|
end
|
73
70
|
end
|
74
71
|
|
75
72
|
def process_files
|
76
73
|
@found_files.each do |file|
|
77
|
-
if @source.handlers.handler?
|
78
|
-
process_from_temp_file
|
74
|
+
if @source.handlers.handler?(file.name)
|
75
|
+
process_from_temp_file(file)
|
79
76
|
else
|
80
77
|
Log.warn { "Skipping #{file.filename.yellow} (no handler)" }
|
81
78
|
end
|
@@ -86,7 +83,7 @@ module GtfsReader
|
|
86
83
|
|
87
84
|
def extract_to_tempfiles(zip)
|
88
85
|
Zip::File.open(zip).each do |entry|
|
89
|
-
temp = Tempfile.new
|
86
|
+
temp = Tempfile.new("gtfs_file_#{entry.name}")
|
90
87
|
temp.binmode
|
91
88
|
temp << entry.get_input_stream.read
|
92
89
|
temp.close
|
@@ -96,22 +93,22 @@ module GtfsReader
|
|
96
93
|
|
97
94
|
# Check for the given list of expected filenames in the zip file
|
98
95
|
def check_missing_files(expected, found_color, missing_color)
|
99
|
-
check = '✔'.colorize
|
100
|
-
cross = '✘'.colorize
|
96
|
+
check = '✔'.colorize(found_color)
|
97
|
+
cross = '✘'.colorize(missing_color)
|
101
98
|
|
102
99
|
expected.map do |req|
|
103
100
|
filename = req.filename
|
104
|
-
if filenames.include?
|
105
|
-
Log.info { "#{filename.rjust
|
101
|
+
if filenames.include?(filename)
|
102
|
+
Log.info { "#{filename.rjust(filename_width)} [#{check}]" }
|
106
103
|
nil
|
107
104
|
else
|
108
|
-
Log.info { "#{filename.rjust
|
105
|
+
Log.info { "#{filename.rjust(filename_width)} [#{cross}]" }
|
109
106
|
filename
|
110
107
|
end
|
111
108
|
end.compact
|
112
109
|
end
|
113
110
|
|
114
|
-
|
111
|
+
# @return <FixNum> the maximum string-width of the filenames, so they can be
|
115
112
|
# aligned when printed on the console.
|
116
113
|
def filename_width
|
117
114
|
@filename_width ||= @source.feed_definition.files.max do |a, b|
|
@@ -132,22 +129,22 @@ module GtfsReader
|
|
132
129
|
# the same if they happen to have the same size)
|
133
130
|
# - The current date/time (this will always result in a fresh download)
|
134
131
|
def fetch_data_set_identifier
|
135
|
-
if @source.url =~ /\A#{URI::
|
136
|
-
uri = URI
|
132
|
+
if @source.url =~ /\A#{URI::DEFAULT_PARSER.make_regexp}\z/
|
133
|
+
uri = URI(@source.url)
|
137
134
|
Net::HTTP.start(uri.host) do |http|
|
138
|
-
head_request = http.request_head
|
139
|
-
if head_request.key?
|
135
|
+
head_request = http.request_head(uri.path)
|
136
|
+
if head_request.key?('etag')
|
140
137
|
head_request['etag']
|
141
138
|
else
|
142
|
-
Log.warn
|
143
|
-
fetch_http_fallback_identifier
|
139
|
+
Log.warn("No ETag supplied with: #{uri.path}")
|
140
|
+
fetch_http_fallback_identifier(head_request)
|
144
141
|
end
|
145
142
|
end
|
146
143
|
else # it's not a url, it may be a file => last modified
|
147
144
|
begin
|
148
|
-
File.mtime
|
145
|
+
File.mtime(@source.url)
|
149
146
|
rescue StandardError => e
|
150
|
-
Log.error
|
147
|
+
Log.error(e)
|
151
148
|
raise e
|
152
149
|
end
|
153
150
|
end
|
@@ -156,9 +153,9 @@ module GtfsReader
|
|
156
153
|
# Find a "next best" ID when the HEAD request does not return an "ETag"
|
157
154
|
# header.
|
158
155
|
def fetch_http_fallback_identifier(head_request)
|
159
|
-
if head_request.key?
|
156
|
+
if head_request.key?('last-modified')
|
160
157
|
head_request['last-modified']
|
161
|
-
elsif head_request.key?
|
158
|
+
elsif head_request.key?('content-length')
|
162
159
|
head_request['content-length']
|
163
160
|
else
|
164
161
|
Time.now.to_s
|
@@ -169,27 +166,27 @@ module GtfsReader
|
|
169
166
|
do_parse = !GtfsReader.config.skip_parsing
|
170
167
|
hash = !!GtfsReader.config.return_hashes
|
171
168
|
|
172
|
-
Log.info
|
169
|
+
Log.info("Reading file #{file.filename.cyan}...")
|
173
170
|
begin
|
174
|
-
reader = FileReader.new
|
175
|
-
parse: do_parse, hash: hash
|
176
|
-
@source.handlers.handle_file
|
171
|
+
reader = FileReader.new(@temp_files[file.filename], file,
|
172
|
+
parse: do_parse, hash: hash)
|
173
|
+
@source.handlers.handle_file(file.name, reader)
|
177
174
|
end
|
178
175
|
end
|
179
176
|
|
180
|
-
|
177
|
+
# @raise [RequiredFilenamesMissing] if a file is missing a header which is
|
181
178
|
# marked as "required" in the [FeedDefinition]
|
182
179
|
def check_required_files
|
183
180
|
Log.info { 'required files'.magenta }
|
184
181
|
files = @source.feed_definition.required_files
|
185
|
-
missing = check_missing_files
|
182
|
+
missing = check_missing_files(files, :green, :red)
|
186
183
|
raise RequiredFilenamesMissing, missing unless missing.empty?
|
187
184
|
end
|
188
185
|
|
189
186
|
def check_optional_files
|
190
187
|
Log.info { 'optional files'.cyan }
|
191
188
|
files = @source.feed_definition.optional_files
|
192
|
-
check_missing_files
|
189
|
+
check_missing_files(files, :cyan, :light_yellow)
|
193
190
|
end
|
194
191
|
end
|
195
192
|
end
|
data/lib/gtfs_reader/version.rb
CHANGED
@@ -3,27 +3,28 @@ module GtfsReader
|
|
3
3
|
# {Bumper} class which will modify this file to increase the version
|
4
4
|
module Version
|
5
5
|
# The following four lines are generated, so don't mess with them.
|
6
|
-
MAJOR =
|
7
|
-
MINOR =
|
6
|
+
MAJOR = 3
|
7
|
+
MINOR = 1
|
8
8
|
PATCH = 0
|
9
9
|
BUILD = nil
|
10
10
|
|
11
|
-
|
11
|
+
# @return [String] the current version in the form of +1.2.3.build+
|
12
12
|
def self.to_s
|
13
|
-
[MAJOR, MINOR, PATCH, BUILD].compact.join
|
13
|
+
[MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
14
14
|
end
|
15
15
|
|
16
16
|
# A helper class which bumps the version number stored in this file
|
17
17
|
class Bumper
|
18
|
-
PARTS = %i[major minor patch]
|
19
|
-
PATTERN =
|
18
|
+
PARTS = %i[major minor patch].freeze
|
19
|
+
PATTERN = /(\s+)MAJOR = \d+\s+MINOR = \d+\s+PATCH = \d+\s+BUILD = .+/
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
#
|
24
|
-
def initialize(filename=__FILE__
|
21
|
+
# @param part [String] the part of the version to bump. one of major,
|
22
|
+
# minor, or patch
|
23
|
+
# @param filename [String] the file to edit
|
24
|
+
def initialize(part, filename = __FILE__)
|
25
25
|
raise "#{part} not one of #{PARTS}" unless PARTS.include? part
|
26
|
-
@filename
|
26
|
+
@filename = filename
|
27
|
+
@part = part
|
27
28
|
end
|
28
29
|
|
29
30
|
# Increase the version number and write it to this file
|
@@ -33,42 +34,42 @@ module GtfsReader
|
|
33
34
|
text = '\1' + ["MAJOR = #{parts[:major]}",
|
34
35
|
"MINOR = #{parts[:minor]}",
|
35
36
|
"PATCH = #{parts[:patch]}",
|
36
|
-
"BUILD = #{parts[:build] || 'nil'}"].join(
|
37
|
+
"BUILD = #{parts[:build] || 'nil'}"].join('\1')
|
37
38
|
|
38
|
-
out_data = File.read(
|
39
|
-
#puts out_data
|
40
|
-
File.open(
|
41
|
-
puts "Bumped version to #{
|
39
|
+
out_data = File.read(@filename).gsub(PATTERN, text)
|
40
|
+
# puts out_data
|
41
|
+
File.open(@filename, 'w') { |out| out << out_data }
|
42
|
+
puts "Bumped version to #{self}"
|
42
43
|
end
|
43
44
|
|
44
|
-
|
45
|
+
# @return [String] What the new version string will be.
|
45
46
|
def to_s
|
46
47
|
p = new_version
|
47
|
-
[p[:major], p[:minor], p[:patch], p[:build]].compact.join
|
48
|
+
[p[:major], p[:minor], p[:patch], p[:build]].compact.join('.')
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
51
52
|
|
52
53
|
def new_version
|
53
|
-
@
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
@new_version ||= { major: MAJOR,
|
55
|
+
minor: MINOR,
|
56
|
+
patch: PATCH,
|
57
|
+
build: BUILD }.merge(new_parts)
|
57
58
|
end
|
58
59
|
|
59
60
|
def new_parts
|
60
61
|
case @part
|
61
62
|
when :major then {
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
major: MAJOR + 1,
|
64
|
+
minor: 0,
|
65
|
+
patch: 0
|
65
66
|
}
|
66
67
|
when :minor then {
|
67
|
-
|
68
|
-
|
68
|
+
minor: MINOR + 1,
|
69
|
+
patch: 0
|
69
70
|
}
|
70
71
|
else {
|
71
|
-
|
72
|
+
patch: PATCH + 1
|
72
73
|
}
|
73
74
|
end
|
74
75
|
end
|