gtfs-reader 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ module GtfsReader
2
+ module Config
3
+ class PrefixedColumnSetter
4
+ def initialize(definition, prefix)
5
+ @definition, @prefix = definition, prefix.to_sym
6
+ end
7
+
8
+ def col(name_alias, *args, &blk)
9
+ name = "#{@prefix}_#{name_alias}"
10
+ opts =
11
+ case args.first
12
+ when ::Hash then args.first
13
+ else {}
14
+ end
15
+ opts[:alias] = name_alias
16
+ args[0] = opts
17
+
18
+ @definition.col name.to_sym, *args, &blk
19
+ end
20
+
21
+ def output_map(*args, &block)
22
+ @definition.output_map *args, &block
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,61 @@
1
+ require 'active_support/core_ext/hash/reverse_merge'
2
+
3
+ require_relative 'feed_definition'
4
+ require_relative 'defaults/gtfs_feed_definition'
5
+ require_relative '../feed_handler'
6
+ require_relative '../bulk_feed_handler'
7
+
8
+ module GtfsReader
9
+ module Config
10
+ # A single source of GTFS data
11
+ class Source
12
+ attr_reader :name
13
+
14
+ def initialize(name)
15
+ @name = name
16
+ @feed_definition = Config::Defaults::FEED_DEFINITION
17
+ @feed_handler = FeedHandler.new {}
18
+ end
19
+
20
+ #@param u [String] if given, will be used as the URL for this source
21
+ #@return [String] the URL this source's ZIP file
22
+ def url(u=nil)
23
+ @url = u if u.present?
24
+ @url
25
+ end
26
+
27
+ # Define a block to call before the source is read. If this block
28
+ # returns +false+, cancel processing the source
29
+ def before(&block)
30
+ if block_given?
31
+ @before = block
32
+ end
33
+ @before
34
+ end
35
+
36
+ def feed_definition(&block)
37
+ if block_given?
38
+ @feed_definition = FeedDefinition.new.tap do |feed|
39
+ feed.instance_exec feed, &block
40
+ end
41
+ end
42
+
43
+ @feed_definition
44
+ end
45
+
46
+ def handlers(*args, &block)
47
+ if block_given?
48
+ opts = Hash === args.last ? args.pop : {}
49
+ opts = opts.reverse_merge bulk: nil
50
+ @feed_handler =
51
+ if opts[:bulk]
52
+ BulkFeedHandler.new opts[:bulk], args, &block
53
+ else
54
+ FeedHandler.new args, &block
55
+ end
56
+ end
57
+ @feed_handler
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,25 @@
1
+ require_relative 'source'
2
+
3
+ module GtfsReader
4
+ module Config
5
+ class Sources < ::BasicObject
6
+ def initialize
7
+ @sources = {}
8
+ end
9
+
10
+ def each(&block)
11
+ @sources.each &block
12
+ end
13
+
14
+ def [](key)
15
+ @sources[key]
16
+ end
17
+
18
+ def method_missing(name, *args, &block)
19
+ (@sources[name] ||= Source.new name).tap do |src|
20
+ src.instance_exec src, &block if ::Kernel.block_given?
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ module GtfsReader
2
+ class Configuration
3
+
4
+ # Creates simple configuration parameters which may be set by the user
5
+ #@param names [Array<Symbol>] the names of the parameters to create
6
+ def parameter(*names)
7
+ names.each do |name|
8
+ define_singleton_method name do |*values|
9
+ if value = values.first
10
+ instance_variable_set "@#{name}", value
11
+ else
12
+ instance_variable_get "@#{name}"
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ def block_parameter(name, obj_class, *init_args)
19
+ obj = nil
20
+ define_singleton_method name do |*args, &block|
21
+ obj ||= obj_class.new *init_args
22
+ obj.instance_exec( obj, *args, &block ) if block
23
+ obj
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ require_relative 'configuration'
2
+ require_relative 'config/feed_definition'
3
+ require_relative 'config/sources'
4
+ require_relative 'source_updater'
5
+
6
+ module GtfsReader
7
+ extend self
8
+
9
+ #@override config(*args, &blk)
10
+ # @param args [Array] an array or arguments to pass to the given block
11
+ # @param blk [Proc] a block to to call in the context of the configuration
12
+ # object. Subsequent calls will use the same configuration for additional
13
+ # modification.
14
+ # @return [Configuration] the configuration object
15
+ #
16
+ #@override config
17
+ # @return [Configuration] the configuration object
18
+ def config(*args, &blk)
19
+ @cfg ||= create_config
20
+ if block_given?
21
+ @cfg.instance_exec *args.unshift(@cfg), &blk
22
+ elsif args.any?
23
+ raise ArgumentError, 'arguments given without a block'
24
+ end
25
+ @cfg
26
+ end
27
+
28
+ def update_all!
29
+ config.sources.each {|name, _| update name }
30
+ end
31
+
32
+ def update(name)
33
+ if config.verbose
34
+ update_verbosely name
35
+ else
36
+ Log.quiet { update_verbosely name }
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def update_verbosely(name)
43
+ source = config.sources[name]
44
+ raise UnknownSourceError, "No source named '#{name}'" if source.nil?
45
+ updater = SourceUpdater.new name, source
46
+ begin
47
+ updater.instance_exec do
48
+ Log.info { "Updating #{name.to_s.green}".underline }
49
+ before_callbacks
50
+ read
51
+ check_files
52
+ check_columns
53
+ process_files
54
+ end
55
+ rescue SkipSourceError => e
56
+ Log.warn do
57
+ msg = e.message ? ": #{e.message}" : ''
58
+ "#{'Skipping'.red} #{source.name.to_s.yellow}" + msg
59
+ end
60
+ ensure
61
+ updater.finish
62
+ end
63
+ end
64
+
65
+ def create_config
66
+ Configuration.new.tap do |cfg|
67
+ cfg.instance_exec do
68
+ parameter :verbose
69
+ parameter :skip_parsing
70
+ parameter :return_hashes
71
+ block_parameter :sources, Config::Sources
72
+ block_parameter :feed_definition, Config::FeedDefinition
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,29 @@
1
+ module GtfsReader
2
+
3
+ class FileReaderError < StandardError; end
4
+
5
+ class RequiredColumnsMissing < FileReaderError
6
+ attr_reader :columns
7
+ def initialize(columns)
8
+ @columns = columns
9
+ super "Required columns missing: #{columns.join ', '}"
10
+ end
11
+ end
12
+
13
+ class RequiredFilenamesMissing < FileReaderError
14
+ attr_reader :filenames
15
+ def initialize(filenames)
16
+ @filenames = filenames
17
+ super "Required files missing from zip file: #{filenames.join ', '}"
18
+ end
19
+ end
20
+
21
+ class UnknownSourceError < StandardError; end
22
+ class SkipSourceError < StandardError; end
23
+ class HandlerMissingError < StandardError; end
24
+
25
+ module Config
26
+ class SourceDefinitionError < StandardError; end
27
+ class FileDefinitionError < StandardError; end
28
+ end
29
+ end
@@ -0,0 +1,30 @@
1
+ module GtfsReader
2
+ class FeedHandler
3
+ def initialize(args=[], &block)
4
+ @read_callbacks = {}
5
+ FeedHandlerDsl.new(self).instance_exec *args, &block
6
+ end
7
+
8
+ def handler?(filename)
9
+ @read_callbacks.key? filename
10
+ end
11
+
12
+ def handle_file(filename, enumerator)
13
+ enumerator.each &@read_callbacks[filename]
14
+ end
15
+
16
+ def create_read_handler(filename, *args, &block)
17
+ @read_callbacks[filename] = block
18
+ end
19
+ end
20
+
21
+ class FeedHandlerDsl
22
+ def initialize(feed_handler)
23
+ @feed_handler = feed_handler
24
+ end
25
+
26
+ def method_missing(filename, *args, &block)
27
+ @feed_handler.create_read_handler filename, *args, &block
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,120 @@
1
+ require 'active_support/core_ext/object/blank'
2
+ require 'csv'
3
+ require_relative 'file_row'
4
+
5
+ module GtfsReader
6
+ CSV_OPTIONS = { headers: :first_row,
7
+ return_headers: true,
8
+ header_converters: :symbol }
9
+
10
+ # Iterates over the rows in a single file using a provided definition.
11
+ #@see #each
12
+ class FileReader
13
+ include Enumerable
14
+
15
+ attr_reader :definition, :columns, :col_names
16
+
17
+ #@param data [IO,String] CSV data
18
+ #@param definition [FileDefinition] describes the expected columns in this
19
+ # file
20
+ def initialize(data, definition, opts={})
21
+ opts = { parse: true, validate: false, hash: true }.merge opts
22
+
23
+ @csv = CSV.new data, CSV_OPTIONS
24
+ @definition, @do_parse, @return_hash =
25
+ definition, opts[:parse], opts[:hash]
26
+ @index = 0
27
+ @csv_headers = @csv.shift.headers
28
+ @columns = find_columns opts[:validate]
29
+ end
30
+
31
+ def filename
32
+ @definition.filename
33
+ end
34
+
35
+ #@overload each(&blk)
36
+ # @yieldparam hash [Hash] a hash of columns to their values in this row
37
+ #@overload each
38
+ # @return [Enumerator] an {::Enumerator} that iterates of the rows in the
39
+ # file
40
+ #@see FileRow#to_hash
41
+ def each
42
+ return to_enum :each unless block_given?
43
+
44
+ while row = shift
45
+ yield(@return_hash ? row.to_hash : row.to_a)
46
+ end
47
+ end
48
+
49
+ #@return [FileRow,nil] the next row from the file, or +nil+ if the end of
50
+ # the file has been reached.
51
+ def shift
52
+ if row = @csv.shift
53
+ file_row(row).tap { @index += 1 }
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ def file_row(row)
60
+ FileRow.new @index, @col_names, row, @definition, @do_parse
61
+ end
62
+
63
+ # Check the list of headers in the file against the expected columns in
64
+ # the definition
65
+ def find_columns(validate)
66
+ @found_columns = []
67
+ prefix = "#{filename.yellow}:"
68
+
69
+ required = @definition.required_columns
70
+ unless required.empty?
71
+ Log.info { "#{prefix} #{'required columns'.magenta}" } if validate
72
+
73
+ missing = check_columns validate, prefix, required, :green, :red
74
+ raise RequiredColumnsMissing, missing if validate && missing.present?
75
+ end
76
+
77
+ optional = @definition.optional_columns
78
+ unless optional.empty?
79
+ Log.info { "#{prefix} #{'optional columns'.cyan}" } if validate
80
+ check_columns validate, prefix, optional, :cyan, :light_yellow
81
+ end
82
+
83
+ cols = @definition.columns.collect( &:name )
84
+ headers = @csv_headers.select {|h| cols.include? h }
85
+
86
+ @col_names ||= @found_columns.map &:name
87
+ ::Hash[ *headers.inject([]) {|list,c| list << c << @definition[c] } ]
88
+ end
89
+
90
+ def check_columns(validate, prefix, expected, found_color, missing_color)
91
+ check = '✔'.colorize found_color
92
+ cross = '✘'.colorize missing_color
93
+
94
+ expected.map do |col|
95
+ name = col.name
96
+ missing =
97
+ if @csv_headers.include? name
98
+ @found_columns << col
99
+ nil
100
+ else
101
+ name
102
+ end
103
+
104
+ if validate
105
+ Log.info do
106
+ mark = missing ? cross : check
107
+ "#{prefix} #{name.to_s.rjust column_width} [#{mark}]"
108
+ end
109
+ end
110
+ missing
111
+ end.compact
112
+ end
113
+
114
+ def column_width
115
+ @column_width ||= @definition.columns.collect( &:name ).max do |a, b|
116
+ a.length <=> b.length
117
+ end.length
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,66 @@
1
+ require 'csv'
2
+
3
+ module GtfsReader
4
+ # Contains the contents of a single row read in from the file
5
+ class FileRow
6
+ attr_reader :line_number
7
+
8
+ #@param line_number [Integer] the line number from the source file
9
+ #@return [Array<Symbol>]
10
+ #@param data [CSV::Row] the data for this row
11
+ #@param definition [FileDefinition] the definition of the columns that the
12
+ # data in this row represent
13
+ def initialize(line_number, headers, data, definition, do_parse)
14
+ @line_number, @headers, @data, @definition, @do_parse =
15
+ line_number, headers, data, definition, do_parse
16
+ @parsed = {}
17
+ end
18
+
19
+ #@return [Array<Symbol>]
20
+ def headers
21
+ @headers
22
+ end
23
+
24
+ #@param column [Symbol] the name of the column to fetch
25
+ #@return the parsed data for the column at this row
26
+ #@see #raw
27
+ def [](column)
28
+ return raw(column) unless @do_parse
29
+
30
+ @parsed[column] ||= begin
31
+ ParserContext.new(column, self).
32
+ instance_exec raw(column), &@definition[column].parser
33
+ end
34
+ end
35
+
36
+ #@param (see #[])
37
+ #@return the data unparsed data from the column at this row
38
+ def raw(column)
39
+ @data[column]
40
+ end
41
+
42
+ #@return [Hash] a hash representing this row of data, where each key is the
43
+ # column name and each value is the parsed data for this row
44
+ def to_hash
45
+ ::Hash[ *headers.inject([]) {|list,h| list << h << self[h] } ]
46
+ end
47
+
48
+ #@return [Array] an array representing this row of data
49
+ def to_a
50
+ headers.map {|h| self[h] }
51
+ end
52
+ end
53
+
54
+ class ParserContext
55
+ def initialize(column, file_row)
56
+ @column, @file_row = column, file_row
57
+ end
58
+
59
+ def method_missing(column)
60
+ if column == @column
61
+ raise "Parser for '#{column}' cannot refer to itself"
62
+ end
63
+ @file_row[column] or super
64
+ end
65
+ end
66
+ end