gtfs_reader 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,70 @@
1
+ require 'log4r'
2
+ require 'log4r/formatter/patternformatter'
3
+ require 'colorize'
4
+
5
+ module GtfsReader
6
+ module Log
7
+ class << self
8
+ def debug(*args, &block); log :debug, *args, &block end
9
+ def info(*args, &block); log :info, *args, &block end
10
+ def warn(*args, &block); log :warn, *args, &block end
11
+ def error(*args, &block); log :error, *args, &block end
12
+ def fatal(*args, &block); log :fatal, *args, &block end
13
+
14
+ def log(level, *args, &block)
15
+ logger.send level, *args, &block
16
+ nil
17
+ end
18
+
19
+ def logger
20
+ @logger = yield if block_given?
21
+ @logger ||= create_logger
22
+ end
23
+
24
+ def level=(lev)
25
+ logger.level =
26
+ case lev
27
+ when :debug then logger.levels.index 'DEBUG'
28
+ when :info then logger.levels.index 'INFO'
29
+ when :warn then logger.levels.index 'WARN'
30
+ when :error then logger.levels.index 'ERROR'
31
+ when :fatal then logger.levels.index 'FATAL'
32
+ else raise "unknown log level '#{lev}'"
33
+ end
34
+ end
35
+
36
+ def level
37
+ logger.level
38
+ end
39
+
40
+ # Silence the logger for the duration of the given block
41
+ def quiet
42
+ old_logger = @logger
43
+ begin
44
+ @logger = NoOpLogger.new
45
+ yield
46
+ ensure
47
+ @logger = old_logger
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def create_logger
54
+ Log4r::Logger.new('GtfsReader').tap do |log|
55
+ out = Log4r::StdoutOutputter.new('log_stdout')
56
+ out.formatter = Log4r::PatternFormatter.new pattern: '%d [%l]: %m'
57
+ log.outputters << out
58
+ log.level = Log4r::INFO
59
+ log.debug { 'Starting GtfsReader...'.underline.colorize :yellow }
60
+ end
61
+ end
62
+ end
63
+
64
+ class NoOpLogger
65
+ def method_missing(*args, &block)
66
+ nil
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,194 @@
1
+ require 'active_support/core_ext/object/try'
2
+ require 'csv'
3
+ require 'net/http'
4
+ require 'open-uri'
5
+ require 'uri'
6
+ require 'zip/filesystem'
7
+
8
+ require_relative 'file_reader'
9
+
10
+ module GtfsReader
11
+ # Downloads remote Feed files, checks that they are valid, and passes each
12
+ # file in the feed to the handlers in the given [Source].
13
+ class SourceUpdater
14
+ #@param name [String] an arbitrary string describing this source
15
+ #@param source [Source]
16
+ def initialize(name, source)
17
+ @name, @source = name, source
18
+ @temp_files = {}
19
+ end
20
+
21
+ # Call the "before" callback set on this source
22
+ def before_callbacks
23
+ if @source.before
24
+ @source.before.call fetch_data_set_identifier
25
+ end
26
+ end
27
+
28
+ # Download the data from the remote server
29
+ def download_source
30
+ Log.debug { " Reading #{@source.url.green}" }
31
+ zip = Tempfile.new 'gtfs'
32
+ zip.binmode
33
+ zip << open(@source.url).read
34
+ zip.rewind
35
+
36
+ extract_to_tempfiles zip
37
+
38
+ Log.debug { "Finished reading #{@source.url.green}" }
39
+ rescue Exception => e
40
+ Log.error e.message
41
+ raise e
42
+ ensure
43
+ zip.try :close
44
+ end
45
+
46
+ def close
47
+ @temp_files.values.each &:close
48
+ end
49
+
50
+ # Parse the filenames in the feed and check which required and optional
51
+ # files are present.
52
+ #@raise [RequiredFilenamesMissing] if the feed is missing a file which is
53
+ # marked as "required" in the [FeedDefinition]
54
+ def check_files
55
+ @found_files = []
56
+ check_required_files
57
+ check_optional_files
58
+ # Add feed files of zip to the list of files to be processed
59
+ @source.feed_definition.files.each do |req|
60
+ if filenames.include? req.filename
61
+ @found_files << req
62
+ end
63
+ end
64
+ end
65
+
66
+ # Check that every file has its required columns
67
+ def check_columns
68
+ @found_files.each do |file|
69
+ @temp_files[file.filename].open do |data|
70
+ FileReader.new data, file, validate: true
71
+ end
72
+ end
73
+ end
74
+
75
+ def process_files
76
+ @found_files.each do |file|
77
+ if @source.handlers.handler? file.name
78
+ process_from_temp_file file
79
+ else
80
+ Log.warn { "Skipping #{file.filename.yellow} (no handler)" }
81
+ end
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def extract_to_tempfiles(zip)
88
+ Zip::File.open(zip).each do |entry|
89
+ temp = Tempfile.new "gtfs_file_#{entry.name}"
90
+ temp << entry.get_input_stream.read
91
+ temp.close
92
+ @temp_files[entry.name] = temp
93
+ end
94
+ end
95
+
96
+ # Check for the given list of expected filenames in the zip file
97
+ def check_missing_files(expected, found_color, missing_color)
98
+ check = '✔'.colorize found_color
99
+ cross = '✘'.colorize missing_color
100
+
101
+ expected.map do |req|
102
+ filename = req.filename
103
+ if filenames.include? filename
104
+ Log.info { "#{filename.rjust filename_width} [#{check}]" }
105
+ nil
106
+ else
107
+ Log.info { "#{filename.rjust filename_width} [#{cross}]" }
108
+ filename
109
+ end
110
+ end.compact
111
+ end
112
+
113
+ #@return <FixNum> the maximum string-width of the filenames, so they can be
114
+ # aligned when printed on the console.
115
+ def filename_width
116
+ @filename_width ||= @source.feed_definition.files.max do |a, b|
117
+ a.filename.length <=> b.filename.length
118
+ end.filename.length
119
+ end
120
+
121
+ def filenames
122
+ @temp_files.keys
123
+ end
124
+
125
+ # Perform a HEAD request against the source's URL, looking for a unique
126
+ # identifier for the remote data set. It will choose a header from the
127
+ # result in the given order of preference:
128
+ # - ETag
129
+ # - Last-Modified
130
+ # - Content-Length (may result in different data sets being considered
131
+ # the same if they happen to have the same size)
132
+ # - The current date/time (this will always result in a fresh download)
133
+ def fetch_data_set_identifier
134
+ if @source.url =~ /\A#{URI::regexp}\z/
135
+ uri = URI @source.url
136
+ Net::HTTP.start(uri.host) do |http|
137
+ head_request = http.request_head uri.path
138
+ if head_request.key? 'etag'
139
+ head_request['etag']
140
+ else
141
+ Log.warn "No ETag supplied with: #{uri.path}"
142
+ fetch_http_fallback_identifier head_request
143
+ end
144
+ end
145
+ else # it's not a url, it may be a file => last modified
146
+ begin
147
+ File.mtime @source.url
148
+ rescue StandardError => e
149
+ Log.error e
150
+ raise e
151
+ end
152
+ end
153
+ end
154
+
155
+ # Find a "next best" ID when the HEAD request does not return an "ETag"
156
+ # header.
157
+ def fetch_http_fallback_identifier(head_request)
158
+ if head_request.key? 'last-modified'
159
+ head_request['last-modified']
160
+ elsif head_request.key? 'content-length'
161
+ head_request['content-length']
162
+ else
163
+ Time.now.to_s
164
+ end
165
+ end
166
+
167
+ def process_from_temp_file(file)
168
+ do_parse = !GtfsReader.config.skip_parsing
169
+ hash = !!GtfsReader.config.return_hashes
170
+
171
+ Log.info "Reading file #{file.filename.cyan}..."
172
+ begin
173
+ reader = FileReader.new @temp_files[file.filename], file,
174
+ parse: do_parse, hash: hash
175
+ @source.handlers.handle_file file.name, reader
176
+ end
177
+ end
178
+
179
+ #@raise [RequiredFilenamesMissing] if a file is missing a header which is
180
+ # marked as "required" in the [FeedDefinition]
181
+ def check_required_files
182
+ Log.info { 'required files'.magenta }
183
+ files = @source.feed_definition.required_files
184
+ missing = check_missing_files files, :green, :red
185
+ raise RequiredFilenamesMissing, missing unless missing.empty?
186
+ end
187
+
188
+ def check_optional_files
189
+ Log.info { 'optional files'.cyan }
190
+ files = @source.feed_definition.optional_files
191
+ check_missing_files files, :cyan, :light_yellow
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,77 @@
1
+ module GtfsReader
2
+ # This module both contains the current version of GtfsReader, but also has a
3
+ # {Bumper} class which will modify this file to increase the version
4
+ module Version
5
+ # The following four lines are generated, so don't mess with them.
6
+ MAJOR = 1
7
+ MINOR = 0
8
+ PATCH = 0
9
+ BUILD = nil
10
+
11
+ #@return [String] the current version in the form of +1.2.3.build+
12
+ def self.to_s
13
+ [MAJOR, MINOR, PATCH, BUILD].compact.join '.'
14
+ end
15
+
16
+ # A helper class which bumps the version number stored in this file
17
+ class Bumper
18
+ PARTS = %i[major minor patch]
19
+ PATTERN = %r[(\s+)MAJOR = \d+\s+MINOR = \d+\s+PATCH = \d+\s+BUILD = .+]
20
+
21
+ #@param filename [String] the file to edit
22
+ #@param part [String] the part of the version to bump. one of major,
23
+ # minor, or patch
24
+ def initialize(filename=__FILE__, part)
25
+ raise "#{part} not one of #{PARTS}" unless PARTS.include? part
26
+ @filename, @part = filename, part
27
+ end
28
+
29
+ # Increase the version number and write it to this file
30
+ def bump
31
+ parts = new_version
32
+ # \1 holds a newline and the indentation from the source
33
+ text = '\1' + ["MAJOR = #{parts[:major]}",
34
+ "MINOR = #{parts[:minor]}",
35
+ "PATCH = #{parts[:patch]}",
36
+ "BUILD = #{parts[:build] || 'nil'}"].join( '\1' )
37
+
38
+ out_data = File.read( @filename ).gsub PATTERN, text
39
+ #puts out_data
40
+ File.open( @filename, 'w' ) { |out| out << out_data }
41
+ puts "Bumped version to #{to_s}"
42
+ end
43
+
44
+ #@return [String] What the new version string will be.
45
+ def to_s
46
+ p = new_version
47
+ [p[:major], p[:minor], p[:patch], p[:build]].compact.join ?.
48
+ end
49
+
50
+ private
51
+
52
+ def new_version
53
+ @vers ||= { major: MAJOR,
54
+ minor: MINOR,
55
+ patch: PATCH,
56
+ build: BUILD }.merge new_parts
57
+ end
58
+
59
+ def new_parts
60
+ case @part
61
+ when :major then {
62
+ major: MAJOR + 1,
63
+ minor: 0,
64
+ patch: 0
65
+ }
66
+ when :minor then {
67
+ minor: MINOR + 1,
68
+ patch: 0
69
+ }
70
+ else {
71
+ patch: PATCH + 1
72
+ }
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
metadata ADDED
@@ -0,0 +1,193 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gtfs_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jon Sangster
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: log4r
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: colorize
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activesupport
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: bundler
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: jeweler
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: guard-rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '4.2'
139
+ description: 'Reads and parses zip files conforming to Google''s GTFS spec. Such files
140
+ can take up quite a bit of memory when inflated, so this gem prefers to read them
141
+ as a stream of rows. GTFS Spec: https://developers.google.com/transit/gtfs'
142
+ email: jon@ertt.ca
143
+ executables: []
144
+ extensions: []
145
+ extra_rdoc_files:
146
+ - LICENSE
147
+ - README.md
148
+ files:
149
+ - LICENSE
150
+ - README.md
151
+ - Rakefile
152
+ - lib/gtfs_reader.rb
153
+ - lib/gtfs_reader/bulk_feed_handler.rb
154
+ - lib/gtfs_reader/config/column.rb
155
+ - lib/gtfs_reader/config/defaults/gtfs_feed_definition.rb
156
+ - lib/gtfs_reader/config/feed_definition.rb
157
+ - lib/gtfs_reader/config/file_definition.rb
158
+ - lib/gtfs_reader/config/source.rb
159
+ - lib/gtfs_reader/config/sources.rb
160
+ - lib/gtfs_reader/configuration.rb
161
+ - lib/gtfs_reader/core.rb
162
+ - lib/gtfs_reader/exceptions.rb
163
+ - lib/gtfs_reader/feed_handler.rb
164
+ - lib/gtfs_reader/file_reader.rb
165
+ - lib/gtfs_reader/file_row.rb
166
+ - lib/gtfs_reader/log.rb
167
+ - lib/gtfs_reader/source_updater.rb
168
+ - lib/gtfs_reader/version.rb
169
+ homepage: http://github.com/sangster/gtfs_reader
170
+ licenses:
171
+ - GPL 3
172
+ metadata: {}
173
+ post_install_message:
174
+ rdoc_options: []
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ requirements: []
188
+ rubyforge_project:
189
+ rubygems_version: 2.2.2
190
+ signing_key:
191
+ specification_version: 4
192
+ summary: Read General Transit Feed Specification zip files
193
+ test_files: []