gtfs_reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ require 'log4r'
2
+ require 'log4r/formatter/patternformatter'
3
+ require 'colorize'
4
+
5
+ module GtfsReader
6
+ module Log
7
+ class << self
8
+ def debug(*args, &block); log :debug, *args, &block end
9
+ def info(*args, &block); log :info, *args, &block end
10
+ def warn(*args, &block); log :warn, *args, &block end
11
+ def error(*args, &block); log :error, *args, &block end
12
+ def fatal(*args, &block); log :fatal, *args, &block end
13
+
14
+ def log(level, *args, &block)
15
+ logger.send level, *args, &block
16
+ nil
17
+ end
18
+
19
+ def logger
20
+ @logger = yield if block_given?
21
+ @logger ||= create_logger
22
+ end
23
+
24
+ def level=(lev)
25
+ logger.level =
26
+ case lev
27
+ when :debug then logger.levels.index 'DEBUG'
28
+ when :info then logger.levels.index 'INFO'
29
+ when :warn then logger.levels.index 'WARN'
30
+ when :error then logger.levels.index 'ERROR'
31
+ when :fatal then logger.levels.index 'FATAL'
32
+ else raise "unknown log level '#{lev}'"
33
+ end
34
+ end
35
+
36
+ def level
37
+ logger.level
38
+ end
39
+
40
+ # Silence the logger for the duration of the given block
41
+ def quiet
42
+ old_logger = @logger
43
+ begin
44
+ @logger = NoOpLogger.new
45
+ yield
46
+ ensure
47
+ @logger = old_logger
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def create_logger
54
+ Log4r::Logger.new('GtfsReader').tap do |log|
55
+ out = Log4r::StdoutOutputter.new('log_stdout')
56
+ out.formatter = Log4r::PatternFormatter.new pattern: '%d [%l]: %m'
57
+ log.outputters << out
58
+ log.level = Log4r::INFO
59
+ log.debug { 'Starting GtfsReader...'.underline.colorize :yellow }
60
+ end
61
+ end
62
+ end
63
+
64
+ class NoOpLogger
65
+ def method_missing(*args, &block)
66
+ nil
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,194 @@
1
+ require 'active_support/core_ext/object/try'
2
+ require 'csv'
3
+ require 'net/http'
4
+ require 'open-uri'
5
+ require 'uri'
6
+ require 'zip/filesystem'
7
+
8
+ require_relative 'file_reader'
9
+
10
+ module GtfsReader
11
+ # Downloads remote Feed files, checks that they are valid, and passes each
12
+ # file in the feed to the handlers in the given [Source].
13
+ class SourceUpdater
14
+ #@param name [String] an arbitrary string describing this source
15
+ #@param source [Source]
16
+ def initialize(name, source)
17
+ @name, @source = name, source
18
+ @temp_files = {}
19
+ end
20
+
21
+ # Call the "before" callback set on this source
22
+ def before_callbacks
23
+ if @source.before
24
+ @source.before.call fetch_data_set_identifier
25
+ end
26
+ end
27
+
28
+ # Download the data from the remote server
29
+ def download_source
30
+ Log.debug { " Reading #{@source.url.green}" }
31
+ zip = Tempfile.new 'gtfs'
32
+ zip.binmode
33
+ zip << open(@source.url).read
34
+ zip.rewind
35
+
36
+ extract_to_tempfiles zip
37
+
38
+ Log.debug { "Finished reading #{@source.url.green}" }
39
+ rescue Exception => e
40
+ Log.error e.message
41
+ raise e
42
+ ensure
43
+ zip.try :close
44
+ end
45
+
46
+ def close
47
+ @temp_files.values.each &:close
48
+ end
49
+
50
+ # Parse the filenames in the feed and check which required and optional
51
+ # files are present.
52
+ #@raise [RequiredFilenamesMissing] if the feed is missing a file which is
53
+ # marked as "required" in the [FeedDefinition]
54
+ def check_files
55
+ @found_files = []
56
+ check_required_files
57
+ check_optional_files
58
+ # Add feed files of zip to the list of files to be processed
59
+ @source.feed_definition.files.each do |req|
60
+ if filenames.include? req.filename
61
+ @found_files << req
62
+ end
63
+ end
64
+ end
65
+
66
+ # Check that every file has its required columns
67
+ def check_columns
68
+ @found_files.each do |file|
69
+ @temp_files[file.filename].open do |data|
70
+ FileReader.new data, file, validate: true
71
+ end
72
+ end
73
+ end
74
+
75
+ def process_files
76
+ @found_files.each do |file|
77
+ if @source.handlers.handler? file.name
78
+ process_from_temp_file file
79
+ else
80
+ Log.warn { "Skipping #{file.filename.yellow} (no handler)" }
81
+ end
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def extract_to_tempfiles(zip)
88
+ Zip::File.open(zip).each do |entry|
89
+ temp = Tempfile.new "gtfs_file_#{entry.name}"
90
+ temp << entry.get_input_stream.read
91
+ temp.close
92
+ @temp_files[entry.name] = temp
93
+ end
94
+ end
95
+
96
+ # Check for the given list of expected filenames in the zip file
97
+ def check_missing_files(expected, found_color, missing_color)
98
+ check = '✔'.colorize found_color
99
+ cross = '✘'.colorize missing_color
100
+
101
+ expected.map do |req|
102
+ filename = req.filename
103
+ if filenames.include? filename
104
+ Log.info { "#{filename.rjust filename_width} [#{check}]" }
105
+ nil
106
+ else
107
+ Log.info { "#{filename.rjust filename_width} [#{cross}]" }
108
+ filename
109
+ end
110
+ end.compact
111
+ end
112
+
113
+ #@return <FixNum> the maximum string-width of the filenames, so they can be
114
+ # aligned when printed on the console.
115
+ def filename_width
116
+ @filename_width ||= @source.feed_definition.files.max do |a, b|
117
+ a.filename.length <=> b.filename.length
118
+ end.filename.length
119
+ end
120
+
121
+ def filenames
122
+ @temp_files.keys
123
+ end
124
+
125
+ # Perform a HEAD request against the source's URL, looking for a unique
126
+ # identifier for the remote data set. It will choose a header from the
127
+ # result in the given order of preference:
128
+ # - ETag
129
+ # - Last-Modified
130
+ # - Content-Length (may result in different data sets being considered
131
+ # the same if they happen to have the same size)
132
+ # - The current date/time (this will always result in a fresh download)
133
+ def fetch_data_set_identifier
134
+ if @source.url =~ /\A#{URI::regexp}\z/
135
+ uri = URI @source.url
136
+ Net::HTTP.start(uri.host) do |http|
137
+ head_request = http.request_head uri.path
138
+ if head_request.key? 'etag'
139
+ head_request['etag']
140
+ else
141
+ Log.warn "No ETag supplied with: #{uri.path}"
142
+ fetch_http_fallback_identifier head_request
143
+ end
144
+ end
145
+ else # it's not a url, it may be a file => last modified
146
+ begin
147
+ File.mtime @source.url
148
+ rescue StandardError => e
149
+ Log.error e
150
+ raise e
151
+ end
152
+ end
153
+ end
154
+
155
+ # Find a "next best" ID when the HEAD request does not return an "ETag"
156
+ # header.
157
+ def fetch_http_fallback_identifier(head_request)
158
+ if head_request.key? 'last-modified'
159
+ head_request['last-modified']
160
+ elsif head_request.key? 'content-length'
161
+ head_request['content-length']
162
+ else
163
+ Time.now.to_s
164
+ end
165
+ end
166
+
167
+ def process_from_temp_file(file)
168
+ do_parse = !GtfsReader.config.skip_parsing
169
+ hash = !!GtfsReader.config.return_hashes
170
+
171
+ Log.info "Reading file #{file.filename.cyan}..."
172
+ begin
173
+ reader = FileReader.new @temp_files[file.filename], file,
174
+ parse: do_parse, hash: hash
175
+ @source.handlers.handle_file file.name, reader
176
+ end
177
+ end
178
+
179
+ #@raise [RequiredFilenamesMissing] if a file is missing a header which is
180
+ # marked as "required" in the [FeedDefinition]
181
+ def check_required_files
182
+ Log.info { 'required files'.magenta }
183
+ files = @source.feed_definition.required_files
184
+ missing = check_missing_files files, :green, :red
185
+ raise RequiredFilenamesMissing, missing unless missing.empty?
186
+ end
187
+
188
+ def check_optional_files
189
+ Log.info { 'optional files'.cyan }
190
+ files = @source.feed_definition.optional_files
191
+ check_missing_files files, :cyan, :light_yellow
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,77 @@
1
+ module GtfsReader
2
+ # This module both contains the current version of GtfsReader, but also has a
3
+ # {Bumper} class which will modify this file to increase the version
4
+ module Version
5
+ # The following four lines are generated, so don't mess with them.
6
+ MAJOR = 1
7
+ MINOR = 0
8
+ PATCH = 0
9
+ BUILD = nil
10
+
11
+ #@return [String] the current version in the form of +1.2.3.build+
12
+ def self.to_s
13
+ [MAJOR, MINOR, PATCH, BUILD].compact.join '.'
14
+ end
15
+
16
+ # A helper class which bumps the version number stored in this file
17
+ class Bumper
18
+ PARTS = %i[major minor patch]
19
+ PATTERN = %r[(\s+)MAJOR = \d+\s+MINOR = \d+\s+PATCH = \d+\s+BUILD = .+]
20
+
21
+ #@param filename [String] the file to edit
22
+ #@param part [String] the part of the version to bump. one of major,
23
+ # minor, or patch
24
+ def initialize(filename=__FILE__, part)
25
+ raise "#{part} not one of #{PARTS}" unless PARTS.include? part
26
+ @filename, @part = filename, part
27
+ end
28
+
29
+ # Increase the version number and write it to this file
30
+ def bump
31
+ parts = new_version
32
+ # \1 holds a newline and the indentation from the source
33
+ text = '\1' + ["MAJOR = #{parts[:major]}",
34
+ "MINOR = #{parts[:minor]}",
35
+ "PATCH = #{parts[:patch]}",
36
+ "BUILD = #{parts[:build] || 'nil'}"].join( '\1' )
37
+
38
+ out_data = File.read( @filename ).gsub PATTERN, text
39
+ #puts out_data
40
+ File.open( @filename, 'w' ) { |out| out << out_data }
41
+ puts "Bumped version to #{to_s}"
42
+ end
43
+
44
+ #@return [String] What the new version string will be.
45
+ def to_s
46
+ p = new_version
47
+ [p[:major], p[:minor], p[:patch], p[:build]].compact.join ?.
48
+ end
49
+
50
+ private
51
+
52
+ def new_version
53
+ @vers ||= { major: MAJOR,
54
+ minor: MINOR,
55
+ patch: PATCH,
56
+ build: BUILD }.merge new_parts
57
+ end
58
+
59
+ def new_parts
60
+ case @part
61
+ when :major then {
62
+ major: MAJOR + 1,
63
+ minor: 0,
64
+ patch: 0
65
+ }
66
+ when :minor then {
67
+ minor: MINOR + 1,
68
+ patch: 0
69
+ }
70
+ else {
71
+ patch: PATCH + 1
72
+ }
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
metadata ADDED
@@ -0,0 +1,193 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gtfs_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jon Sangster
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: log4r
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: colorize
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activesupport
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: bundler
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: jeweler
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: guard-rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '4.2'
139
+ description: 'Reads and parses zip files conforming to Google''s GTFS spec. Such files
140
+ can take up quite a bit of memory when inflated, so this gem prefers to read them
141
+ as a stream of rows. GTFS Spec: https://developers.google.com/transit/gtfs'
142
+ email: jon@ertt.ca
143
+ executables: []
144
+ extensions: []
145
+ extra_rdoc_files:
146
+ - LICENSE
147
+ - README.md
148
+ files:
149
+ - LICENSE
150
+ - README.md
151
+ - Rakefile
152
+ - lib/gtfs_reader.rb
153
+ - lib/gtfs_reader/bulk_feed_handler.rb
154
+ - lib/gtfs_reader/config/column.rb
155
+ - lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb
156
+ - lib/gtfs_reader/config/feed_definition.rb
157
+ - lib/gtfs_reader/config/file_definition.rb
158
+ - lib/gtfs_reader/config/source.rb
159
+ - lib/gtfs_reader/config/sources.rb
160
+ - lib/gtfs_reader/configuration.rb
161
+ - lib/gtfs_reader/core.rb
162
+ - lib/gtfs_reader/exceptions.rb
163
+ - lib/gtfs_reader/feed_handler.rb
164
+ - lib/gtfs_reader/file_reader.rb
165
+ - lib/gtfs_reader/file_row.rb
166
+ - lib/gtfs_reader/log.rb
167
+ - lib/gtfs_reader/source_updater.rb
168
+ - lib/gtfs_reader/version.rb
169
+ homepage: http://github.com/sangster/gtfs_reader
170
+ licenses:
171
+ - GPL 3
172
+ metadata: {}
173
+ post_install_message:
174
+ rdoc_options: []
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ requirements: []
188
+ rubyforge_project:
189
+ rubygems_version: 2.2.2
190
+ signing_key:
191
+ specification_version: 4
192
+ summary: Read General Transit Feed Specification zip files
193
+ test_files: []