gtfs-reader 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ module GtfsReader
2
+ module Config
3
+ class PrefixedColumnSetter
4
+ def initialize(definition, prefix)
5
+ @definition, @prefix = definition, prefix.to_sym
6
+ end
7
+
8
+ def col(name_alias, *args, &blk)
9
+ name = "#{@prefix}_#{name_alias}"
10
+ opts =
11
+ case args.first
12
+ when ::Hash then args.first
13
+ else {}
14
+ end
15
+ opts[:alias] = name_alias
16
+ args[0] = opts
17
+
18
+ @definition.col name.to_sym, *args, &blk
19
+ end
20
+
21
+ def output_map(*args, &block)
22
+ @definition.output_map *args, &block
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,61 @@
1
+ require 'active_support/core_ext/hash/reverse_merge'
2
+
3
+ require_relative 'feed_definition'
4
+ require_relative 'defaults/gtfs_feed_definition'
5
+ require_relative '../feed_handler'
6
+ require_relative '../bulk_feed_handler'
7
+
8
+ module GtfsReader
9
+ module Config
10
+ # A single source of GTFS data
11
+ class Source
12
+ attr_reader :name
13
+
14
+ def initialize(name)
15
+ @name = name
16
+ @feed_definition = Config::Defaults::FEED_DEFINITION
17
+ @feed_handler = FeedHandler.new {}
18
+ end
19
+
20
+ #@param u [String] if given, will be used as the URL for this source
21
+ #@return [String] the URL this source's ZIP file
22
+ def url(u=nil)
23
+ @url = u if u.present?
24
+ @url
25
+ end
26
+
27
+ # Define a block to call before the source is read. If this block
28
+ # returns +false+, cancel processing the source
29
+ def before(&block)
30
+ if block_given?
31
+ @before = block
32
+ end
33
+ @before
34
+ end
35
+
36
+ def feed_definition(&block)
37
+ if block_given?
38
+ @feed_definition = FeedDefinition.new.tap do |feed|
39
+ feed.instance_exec feed, &block
40
+ end
41
+ end
42
+
43
+ @feed_definition
44
+ end
45
+
46
+ def handlers(*args, &block)
47
+ if block_given?
48
+ opts = Hash === args.last ? args.pop : {}
49
+ opts = opts.reverse_merge bulk: nil
50
+ @feed_handler =
51
+ if opts[:bulk]
52
+ BulkFeedHandler.new opts[:bulk], args, &block
53
+ else
54
+ FeedHandler.new args, &block
55
+ end
56
+ end
57
+ @feed_handler
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,25 @@
1
+ require_relative 'source'
2
+
3
+ module GtfsReader
4
+ module Config
5
+ class Sources < ::BasicObject
6
+ def initialize
7
+ @sources = {}
8
+ end
9
+
10
+ def each(&block)
11
+ @sources.each &block
12
+ end
13
+
14
+ def [](key)
15
+ @sources[key]
16
+ end
17
+
18
+ def method_missing(name, *args, &block)
19
+ (@sources[name] ||= Source.new name).tap do |src|
20
+ src.instance_exec src, &block if ::Kernel.block_given?
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ module GtfsReader
2
+ class Configuration
3
+
4
+ # Creates simple configuration parameters which may be set by the user
5
+ #@param names [Array<Symbol>] the names of the parameters to create
6
+ def parameter(*names)
7
+ names.each do |name|
8
+ define_singleton_method name do |*values|
9
+ if value = values.first
10
+ instance_variable_set "@#{name}", value
11
+ else
12
+ instance_variable_get "@#{name}"
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ def block_parameter(name, obj_class, *init_args)
19
+ obj = nil
20
+ define_singleton_method name do |*args, &block|
21
+ obj ||= obj_class.new *init_args
22
+ obj.instance_exec( obj, *args, &block ) if block
23
+ obj
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ require_relative 'configuration'
2
+ require_relative 'config/feed_definition'
3
+ require_relative 'config/sources'
4
+ require_relative 'source_updater'
5
+
6
+ module GtfsReader
7
+ extend self
8
+
9
+ #@override config(*args, &blk)
10
+ # @param args [Array] an array or arguments to pass to the given block
11
+ # @param blk [Proc] a block to to call in the context of the configuration
12
+ # object. Subsequent calls will use the same configuration for additional
13
+ # modification.
14
+ # @return [Configuration] the configuration object
15
+ #
16
+ #@override config
17
+ # @return [Configuration] the configuration object
18
+ def config(*args, &blk)
19
+ @cfg ||= create_config
20
+ if block_given?
21
+ @cfg.instance_exec *args.unshift(@cfg), &blk
22
+ elsif args.any?
23
+ raise ArgumentError, 'arguments given without a block'
24
+ end
25
+ @cfg
26
+ end
27
+
28
+ def update_all!
29
+ config.sources.each {|name, _| update name }
30
+ end
31
+
32
+ def update(name)
33
+ if config.verbose
34
+ update_verbosely name
35
+ else
36
+ Log.quiet { update_verbosely name }
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def update_verbosely(name)
43
+ source = config.sources[name]
44
+ raise UnknownSourceError, "No source named '#{name}'" if source.nil?
45
+ updater = SourceUpdater.new name, source
46
+ begin
47
+ updater.instance_exec do
48
+ Log.info { "Updating #{name.to_s.green}".underline }
49
+ before_callbacks
50
+ read
51
+ check_files
52
+ check_columns
53
+ process_files
54
+ end
55
+ rescue SkipSourceError => e
56
+ Log.warn do
57
+ msg = e.message ? ": #{e.message}" : ''
58
+ "#{'Skipping'.red} #{source.name.to_s.yellow}" + msg
59
+ end
60
+ ensure
61
+ updater.finish
62
+ end
63
+ end
64
+
65
+ def create_config
66
+ Configuration.new.tap do |cfg|
67
+ cfg.instance_exec do
68
+ parameter :verbose
69
+ parameter :skip_parsing
70
+ parameter :return_hashes
71
+ block_parameter :sources, Config::Sources
72
+ block_parameter :feed_definition, Config::FeedDefinition
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,29 @@
1
+ module GtfsReader
2
+
3
+ class FileReaderError < StandardError; end
4
+
5
+ class RequiredColumnsMissing < FileReaderError
6
+ attr_reader :columns
7
+ def initialize(columns)
8
+ @columns = columns
9
+ super "Required columns missing: #{columns.join ', '}"
10
+ end
11
+ end
12
+
13
+ class RequiredFilenamesMissing < FileReaderError
14
+ attr_reader :filenames
15
+ def initialize(filenames)
16
+ @filenames = filenames
17
+ super "Required files missing from zip file: #{filenames.join ', '}"
18
+ end
19
+ end
20
+
21
+ class UnknownSourceError < StandardError; end
22
+ class SkipSourceError < StandardError; end
23
+ class HandlerMissingError < StandardError; end
24
+
25
+ module Config
26
+ class SourceDefinitionError < StandardError; end
27
+ class FileDefinitionError < StandardError; end
28
+ end
29
+ end
@@ -0,0 +1,30 @@
1
+ module GtfsReader
2
+ class FeedHandler
3
+ def initialize(args=[], &block)
4
+ @read_callbacks = {}
5
+ FeedHandlerDsl.new(self).instance_exec *args, &block
6
+ end
7
+
8
+ def handler?(filename)
9
+ @read_callbacks.key? filename
10
+ end
11
+
12
+ def handle_file(filename, enumerator)
13
+ enumerator.each &@read_callbacks[filename]
14
+ end
15
+
16
+ def create_read_handler(filename, *args, &block)
17
+ @read_callbacks[filename] = block
18
+ end
19
+ end
20
+
21
+ class FeedHandlerDsl
22
+ def initialize(feed_handler)
23
+ @feed_handler = feed_handler
24
+ end
25
+
26
+ def method_missing(filename, *args, &block)
27
+ @feed_handler.create_read_handler filename, *args, &block
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,120 @@
1
+ require 'active_support/core_ext/object/blank'
2
+ require 'csv'
3
+ require_relative 'file_row'
4
+
5
+ module GtfsReader
6
+ CSV_OPTIONS = { headers: :first_row,
7
+ return_headers: true,
8
+ header_converters: :symbol }
9
+
10
+ # Iterates over the rows in a single file using a provided definition.
11
+ #@see #each
12
+ class FileReader
13
+ include Enumerable
14
+
15
+ attr_reader :definition, :columns, :col_names
16
+
17
+ #@param data [IO,String] CSV data
18
+ #@param definition [FileDefinition] describes the expected columns in this
19
+ # file
20
+ def initialize(data, definition, opts={})
21
+ opts = { parse: true, validate: false, hash: true }.merge opts
22
+
23
+ @csv = CSV.new data, CSV_OPTIONS
24
+ @definition, @do_parse, @return_hash =
25
+ definition, opts[:parse], opts[:hash]
26
+ @index = 0
27
+ @csv_headers = @csv.shift.headers
28
+ @columns = find_columns opts[:validate]
29
+ end
30
+
31
+ def filename
32
+ @definition.filename
33
+ end
34
+
35
+ #@overload each(&blk)
36
+ # @yieldparam hash [Hash] a hash of columns to their values in this row
37
+ #@overload each
38
+ # @return [Enumerator] an {::Enumerator} that iterates of the rows in the
39
+ # file
40
+ #@see FileRow#to_hash
41
+ def each
42
+ return to_enum :each unless block_given?
43
+
44
+ while row = shift
45
+ yield(@return_hash ? row.to_hash : row.to_a)
46
+ end
47
+ end
48
+
49
+ #@return [FileRow,nil] the next row from the file, or +nil+ if the end of
50
+ # the file has been reached.
51
+ def shift
52
+ if row = @csv.shift
53
+ file_row(row).tap { @index += 1 }
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ def file_row(row)
60
+ FileRow.new @index, @col_names, row, @definition, @do_parse
61
+ end
62
+
63
+ # Check the list of headers in the file against the expected columns in
64
+ # the definition
65
+ def find_columns(validate)
66
+ @found_columns = []
67
+ prefix = "#{filename.yellow}:"
68
+
69
+ required = @definition.required_columns
70
+ unless required.empty?
71
+ Log.info { "#{prefix} #{'required columns'.magenta}" } if validate
72
+
73
+ missing = check_columns validate, prefix, required, :green, :red
74
+ raise RequiredColumnsMissing, missing if validate && missing.present?
75
+ end
76
+
77
+ optional = @definition.optional_columns
78
+ unless optional.empty?
79
+ Log.info { "#{prefix} #{'optional columns'.cyan}" } if validate
80
+ check_columns validate, prefix, optional, :cyan, :light_yellow
81
+ end
82
+
83
+ cols = @definition.columns.collect( &:name )
84
+ headers = @csv_headers.select {|h| cols.include? h }
85
+
86
+ @col_names ||= @found_columns.map &:name
87
+ ::Hash[ *headers.inject([]) {|list,c| list << c << @definition[c] } ]
88
+ end
89
+
90
+ def check_columns(validate, prefix, expected, found_color, missing_color)
91
+ check = '✔'.colorize found_color
92
+ cross = '✘'.colorize missing_color
93
+
94
+ expected.map do |col|
95
+ name = col.name
96
+ missing =
97
+ if @csv_headers.include? name
98
+ @found_columns << col
99
+ nil
100
+ else
101
+ name
102
+ end
103
+
104
+ if validate
105
+ Log.info do
106
+ mark = missing ? cross : check
107
+ "#{prefix} #{name.to_s.rjust column_width} [#{mark}]"
108
+ end
109
+ end
110
+ missing
111
+ end.compact
112
+ end
113
+
114
+ def column_width
115
+ @column_width ||= @definition.columns.collect( &:name ).max do |a, b|
116
+ a.length <=> b.length
117
+ end.length
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,66 @@
1
+ require 'csv'
2
+
3
+ module GtfsReader
4
+ # Contains the contents of a single row read in from the file
5
+ class FileRow
6
+ attr_reader :line_number
7
+
8
+ #@param line_number [Integer] the line number from the source file
9
+ #@return [Array<Symbol>]
10
+ #@param data [CSV::Row] the data for this row
11
+ #@param definition [FileDefinition] the definition of the columns that the
12
+ # data in this row represent
13
+ def initialize(line_number, headers, data, definition, do_parse)
14
+ @line_number, @headers, @data, @definition, @do_parse =
15
+ line_number, headers, data, definition, do_parse
16
+ @parsed = {}
17
+ end
18
+
19
+ #@return [Array<Symbol>]
20
+ def headers
21
+ @headers
22
+ end
23
+
24
+ #@param column [Symbol] the name of the column to fetch
25
+ #@return the parsed data for the column at this row
26
+ #@see #raw
27
+ def [](column)
28
+ return raw(column) unless @do_parse
29
+
30
+ @parsed[column] ||= begin
31
+ ParserContext.new(column, self).
32
+ instance_exec raw(column), &@definition[column].parser
33
+ end
34
+ end
35
+
36
+ #@param (see #[])
37
+ #@return the data unparsed data from the column at this row
38
+ def raw(column)
39
+ @data[column]
40
+ end
41
+
42
+ #@return [Hash] a hash representing this row of data, where each key is the
43
+ # column name and each value is the parsed data for this row
44
+ def to_hash
45
+ ::Hash[ *headers.inject([]) {|list,h| list << h << self[h] } ]
46
+ end
47
+
48
+ #@return [Array] an array representing this row of data
49
+ def to_a
50
+ headers.map {|h| self[h] }
51
+ end
52
+ end
53
+
54
+ class ParserContext
55
+ def initialize(column, file_row)
56
+ @column, @file_row = column, file_row
57
+ end
58
+
59
+ def method_missing(column)
60
+ if column == @column
61
+ raise "Parser for '#{column}' cannot refer to itself"
62
+ end
63
+ @file_row[column] or super
64
+ end
65
+ end
66
+ end