csv_party 0.0.1.pre9 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ module CSVParty
2
+ module DSL
3
+ def column(column, options = {}, &block)
4
+ config.add_column(column, options, &block)
5
+ end
6
+
7
+ def rows(&block)
8
+ config.row_importer = block
9
+ end
10
+
11
+ def import(&block)
12
+ config.file_importer = block
13
+ end
14
+
15
+ def errors(setting = nil, &block)
16
+ config.error_handler = setting || block
17
+ end
18
+
19
+ def skipped_rows(setting = nil, &block)
20
+ config.skipped_row_handler = setting || block
21
+ end
22
+
23
+ def aborted_rows(setting = nil, &block)
24
+ config.aborted_row_handler = setting || block
25
+ end
26
+
27
+ def depends_on(*args)
28
+ config.add_dependency(*args)
29
+ args.each do |arg|
30
+ attr_accessor arg
31
+ end
32
+ end
33
+
34
+ def config
35
+ @config ||= Configuration.new
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,157 @@
1
+ module CSVParty
2
+ class Error < StandardError
3
+ end
4
+
5
+ class UnknownParserError < Error
6
+ def initialize(column, parser, named_parsers)
7
+ parser = parser.to_s.gsub('parse_', '')
8
+ parsers = named_parsers.map { |p| p.to_s.gsub('parse_', '') }
9
+
10
+ super <<-MESSAGE
11
+ You're trying to use the :#{parser} parser for the :#{column} column, but it
12
+ doesn't exist. Available parsers are: :#{parsers.join(', :')}.
13
+ MESSAGE
14
+ end
15
+ end
16
+
17
+ class MissingCSVError < Error
18
+ def initialize(importer)
19
+ super <<-MESSAGE
20
+ You must specify a file path, IO object, or string to import:
21
+
22
+ # File path, IO object, or string
23
+ csv = 'path/to/csv'
24
+ csv = File.open('path/to/csv')
25
+ csv = 'Header1,Header2\\nvalue1,value2\\n'
26
+
27
+ Then, you assign that to your importer one of two ways:
28
+
29
+ importer = #{importer.class.name}.new(csv)
30
+ # or
31
+ importer = #{importer.class.name}.new
32
+ importer.csv = csv
33
+ MESSAGE
34
+ end
35
+ end
36
+
37
+ class NonexistentCSVFileError < Error
38
+ def initialize(file_path)
39
+ super <<-MESSAGE
40
+ The CSV file you are trying to import was not found:
41
+
42
+ #{file_path}
43
+
44
+ NOTE: If you are trying to import a single line CSV string, please ensure that
45
+ your CSV string has a header row and at least one row of values. Single line
46
+ strings are assumed to be a path to a CSV file.
47
+ MESSAGE
48
+ end
49
+ end
50
+
51
+ class UnrecognizedOptionsError < Error
52
+ def initialize(unrecognized_options, valid_data_options, valid_csv_options, dependencies)
53
+ @unrecognized_options = unrecognized_options
54
+ @valid_data_options = valid_data_options
55
+ @valid_csv_options = valid_csv_options
56
+ @dependencies = dependencies
57
+
58
+ super csv_and_data_options_message + dependency_options_message
59
+ end
60
+
61
+ def csv_and_data_options_message
62
+ <<-MESSAGE
63
+ The following options are not recognized: :#{@unrecognized_options.join(', :')}.
64
+ You can specify your CSV data via the :path, :file, or :content options, as well
65
+ as any options that the CSV library understands:
66
+
67
+ :#{@valid_csv_options.join("\n :")}
68
+ MESSAGE
69
+ end
70
+
71
+ def dependency_options_message
72
+ return '' unless @dependencies.any?
73
+
74
+ <<-MESSAGE
75
+
76
+ Or assignments for dependencies:
77
+
78
+ :#{@dependencies.join("\n :")}
79
+ MESSAGE
80
+ end
81
+ end
82
+
83
+ class DuplicateColumnError < Error
84
+ def initialize(name)
85
+ super <<-MESSAGE
86
+ A column named :#{name} has already been defined, please choose a different name.
87
+ MESSAGE
88
+ end
89
+ end
90
+
91
+ class ReservedColumnNameError < Error
92
+ def initialize(reserved_column_names)
93
+ super <<-MESSAGE
94
+ The following column names are reserved for interal use, please use a different
95
+ column name: :#{reserved_column_names.join(', :')}.
96
+ MESSAGE
97
+ end
98
+ end
99
+
100
+ class MissingColumnError < Error
101
+ def initialize(headers, missing_columns)
102
+ columns = missing_columns.join("', '")
103
+ super <<-MESSAGE
104
+ The CSV is missing column(s) with header(s) '#{columns}'. File has these
105
+ headers: #{headers.join(', ')}.
106
+ MESSAGE
107
+ end
108
+ end
109
+
110
+ class UndefinedRowProcessorError < Error
111
+ def initialize
112
+ super <<-MESSAGE
113
+ Your importer has to define a row processor which specifies what should be done
114
+ with each row. It should look something like this:
115
+
116
+ rows do |row|
117
+ row.column # access parsed column values
118
+ row.unparsed.column # access unparsed column values
119
+ end
120
+ MESSAGE
121
+ end
122
+ end
123
+
124
+ class MissingDependencyError < Error
125
+ def initialize(importer, dependency)
126
+ super <<-MESSAGE
127
+ This importer depends on #{dependency}, but you didn't assign it.
128
+ You can do that when instantiating your importer:
129
+
130
+ #{importer.class.name}.new('path/to/csv', #{dependency}: #{dependency})
131
+
132
+ Or any time before you import:
133
+
134
+ importer = #{importer.class.name}.new('path/to/csv')
135
+ importer.#{dependency} = #{dependency}
136
+ importer.import!
137
+ MESSAGE
138
+ end
139
+ end
140
+
141
+ class UnimportedRowsError < Error
142
+ def initialize
143
+ super <<-MESSAGE
144
+ The rows in your CSV file have not been imported.
145
+
146
+ You should include a call to import_rows! at the point in your import block
147
+ where you want them to be imported. It should should look something like this:
148
+
149
+ import do
150
+ # do stuff before importing rows
151
+ import_rows!
152
+ # do stuff after importing rows
153
+ end
154
+ MESSAGE
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,71 @@
1
+ module CSVParty
2
+ module Parsers
3
+ def parse_raw(value)
4
+ value
5
+ end
6
+
7
+ def parse_string(value)
8
+ value.to_s.strip
9
+ end
10
+
11
+ def parse_boolean(value)
12
+ value = value.to_s.strip.downcase
13
+
14
+ if %w[1 t true].include? value
15
+ true
16
+ elsif %w[0 f false].include? value
17
+ false
18
+ else
19
+ nil
20
+ end
21
+ end
22
+
23
+ def parse_integer(value)
24
+ prepare_numeric_value(value).to_i
25
+ end
26
+
27
+ def parse_decimal(value)
28
+ BigDecimal.new(prepare_numeric_value(value))
29
+ end
30
+
31
+ def parse_date(value, format = nil)
32
+ if format.nil?
33
+ Date.parse(value)
34
+ else
35
+ Date.strptime(value, format)
36
+ end
37
+ rescue ArgumentError
38
+ nil
39
+ end
40
+
41
+ def parse_time(value, format = nil)
42
+ if format.nil?
43
+ DateTime.parse(value).to_time
44
+ else
45
+ DateTime.strptime(value, format).to_time
46
+ end
47
+ rescue ArgumentError
48
+ nil
49
+ end
50
+
51
+ private
52
+
53
+ def prepare_numeric_value(value)
54
+ value = value.to_s.strip
55
+ value = convert_from_accounting_notation(value)
56
+ strip_non_numeric_characters(value)
57
+ end
58
+
59
+ def convert_from_accounting_notation(value)
60
+ if value =~ /\A\(.*\)\z/
61
+ value.delete('()').insert(0, '-')
62
+ else
63
+ value
64
+ end
65
+ end
66
+
67
+ def strip_non_numeric_characters(value)
68
+ value.gsub(/[^\-0-9.]/, '')
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,83 @@
1
+ module CSVParty
2
+ class Row
3
+ attr_accessor :row_number, :csv_string, :unparsed
4
+
5
+ def initialize(csv_row, config, runner)
6
+ @csv_row = csv_row
7
+ @config = config
8
+ @runner = runner
9
+ @attributes = OpenStruct.new
10
+ parse_row!(csv_row)
11
+ end
12
+
13
+ private
14
+
15
+ def parse_row!(csv_row)
16
+ self.csv_string = csv_row.to_csv
17
+ self.unparsed = extract_unparsed_values(csv_row)
18
+
19
+ @config.columns.each do |column, options|
20
+ header = options[:header]
21
+ value = csv_row[header]
22
+ @attributes[column] = parse_value(value, options)
23
+ end
24
+ end
25
+
26
+ def extract_unparsed_values(csv_row)
27
+ unparsed_row = OpenStruct.new
28
+ config.columns.each do |column, options|
29
+ header = options[:header]
30
+ unparsed_row[column] = csv_row[header]
31
+ end
32
+
33
+ return unparsed_row
34
+ end
35
+
36
+ def parse_value(value, options)
37
+ return nil if options[:intercept_blanks] && is_blank?(value)
38
+
39
+ parser = options[:parser]
40
+
41
+ if parser.is_a?(Symbol)
42
+ parse_with_method(value, options)
43
+ else
44
+ parse_with_block(value, options)
45
+ end
46
+ end
47
+
48
+ def parse_with_method(value, options)
49
+ format = options[:format]
50
+ parser = options[:parser]
51
+
52
+ if format.nil?
53
+ send(parser, value)
54
+ else
55
+ send(parser, value, format)
56
+ end
57
+ end
58
+
59
+ def parse_with_block(value, options)
60
+ parser = options[:parser]
61
+
62
+ instance_exec(value, &parser)
63
+ end
64
+
65
+ def is_blank?(value)
66
+ value.nil? || value.strip.empty?
67
+ end
68
+
69
+ def respond_to_missing?(method, _include_private)
70
+ @attributes.respond_to?(method) || @runner.respond_to?(method, true)
71
+ end
72
+
73
+ def method_missing(method, *args)
74
+ if @attributes.respond_to?(method)
75
+ @attributes.send(method, *args)
76
+ elsif @runner.respond_to?(method, true)
77
+ @runner.send(method, *args)
78
+ else
79
+ super
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,219 @@
1
+ require 'csv_party/parsers'
2
+
3
+ module CSVParty
4
+ class Runner
5
+ include Parsers
6
+
7
+ attr_accessor :csv, :config, :importer
8
+
9
+ def initialize(csv, config, importer)
10
+ self.csv = csv
11
+ self.config = config
12
+ self.importer = importer
13
+ @_rows_have_been_imported = false
14
+ @_current_row_number = 1
15
+ end
16
+
17
+ def import!
18
+ raise_unless_row_processor_is_defined!
19
+ raise_unless_all_named_parsers_exist!
20
+ raise_unless_all_dependencies_are_present!
21
+ initialize_csv!
22
+ initialize_regex_headers!
23
+ raise_unless_csv_has_all_columns!
24
+
25
+ if config.file_importer
26
+ instance_exec(&config.file_importer)
27
+ raise_unless_rows_have_been_imported!
28
+ else
29
+ import_rows!
30
+ end
31
+
32
+ return true
33
+ rescue AbortedImportError => error
34
+ importer.aborted = true
35
+ importer.abort_message = error.message
36
+ return false
37
+ end
38
+
39
+ def present_columns
40
+ @_headers
41
+ end
42
+
43
+ def missing_columns
44
+ config.required_columns - present_columns
45
+ end
46
+
47
+ private
48
+
49
+ def initialize_csv!
50
+ csv.shift
51
+ @_headers = csv.headers
52
+ csv.rewind
53
+ end
54
+
55
+ def import_rows!
56
+ loop do
57
+ begin
58
+ row = csv.shift
59
+ break unless row
60
+ import_row!(row)
61
+ rescue NextRowError
62
+ next
63
+ rescue SkippedRowError => error
64
+ handle_skipped_row(error)
65
+ rescue AbortedRowError => error
66
+ handle_aborted_row(error)
67
+ rescue AbortedImportError
68
+ raise
69
+ rescue CSV::MalformedCSVError
70
+ raise
71
+ rescue StandardError => error
72
+ handle_error(error, @_current_row_number, row.to_csv)
73
+ end
74
+ end
75
+
76
+ @_rows_have_been_imported = true
77
+ end
78
+
79
+ def import_row!(csv_row)
80
+ @_current_row_number += 1
81
+ @_current_parsed_row = Row.new(csv_row, config, self)
82
+ @_current_parsed_row.row_number = @_current_row_number
83
+ instance_exec(@_current_parsed_row, &config.row_importer)
84
+ end
85
+
86
+ def next_row!
87
+ raise NextRowError
88
+ end
89
+
90
+ def skip_row!(message = nil)
91
+ raise SkippedRowError, message
92
+ end
93
+
94
+ def abort_row!(message = nil)
95
+ raise AbortedRowError, message
96
+ end
97
+
98
+ def abort_import!(message)
99
+ raise AbortedImportError, message
100
+ end
101
+
102
+ def handle_error(error, line_number, csv_string)
103
+ raise error unless config.error_handler
104
+
105
+ if config.error_handler == :ignore
106
+ error_rows << error_struct(error, line_number, csv_string)
107
+ else
108
+ instance_exec(error, line_number, csv_string, &config.error_handler)
109
+ end
110
+ end
111
+
112
+ def handle_skipped_row(error)
113
+ return if config.skipped_row_handler == :ignore
114
+
115
+ @_current_parsed_row[:skip_message] = error.message
116
+
117
+ if config.skipped_row_handler.nil?
118
+ importer.skipped_rows << @_current_parsed_row
119
+ else
120
+ instance_exec(@_current_parsed_row, &config.skipped_row_handler)
121
+ end
122
+ end
123
+
124
+ def handle_aborted_row(error)
125
+ return if config.aborted_row_handler == :ignore
126
+
127
+ @_current_parsed_row[:abort_message] = error.message
128
+
129
+ if config.aborted_row_handler.nil?
130
+ importer.aborted_rows << @_current_parsed_row
131
+ else
132
+ instance_exec(@_current_parsed_row, &config.aborted_row_handler)
133
+ end
134
+ end
135
+
136
+ def error_struct(error, line_number, csv_string)
137
+ Struct.new(:error, :line_number, :csv_string)
138
+ .new(error, line_number, csv_string)
139
+ end
140
+
141
+ def raise_unless_row_processor_is_defined!
142
+ return if config.row_importer
143
+
144
+ raise UndefinedRowProcessorError.new
145
+ end
146
+
147
+ def raise_unless_rows_have_been_imported!
148
+ return if @_rows_have_been_imported
149
+
150
+ raise UnimportedRowsError.new
151
+ end
152
+
153
+ def raise_unless_all_dependencies_are_present!
154
+ config.dependencies.each do |dependency|
155
+ next unless importer.send(dependency).nil?
156
+
157
+ raise MissingDependencyError.new(self, dependency)
158
+ end
159
+ end
160
+
161
+ # This error has to be raised at runtime because, when the class body
162
+ # is being executed, the parser methods won't be available unless
163
+ # they are defined above the column definitions in the class body
164
+ def raise_unless_all_named_parsers_exist!
165
+ config.columns_with_named_parsers.each do |name, options|
166
+ parser = options[:parser]
167
+ next if named_parsers.include? parser
168
+
169
+ raise UnknownParserError.new(name, parser, named_parsers)
170
+ end
171
+ end
172
+
173
+ def named_parsers
174
+ (methods +
175
+ private_methods +
176
+ importer.methods +
177
+ importer.private_methods).grep(/^parse_/)
178
+ end
179
+
180
+ def raise_unless_csv_has_all_columns!
181
+ return if missing_columns.empty?
182
+
183
+ raise MissingColumnError.new(present_columns, missing_columns)
184
+ end
185
+
186
+ def initialize_regex_headers!
187
+ config.columns_with_regex_headers.each do |name, options|
188
+ found_header = @_headers.find do |header|
189
+ options[:header].match(header)
190
+ end
191
+ options[:header] = found_header || name.to_s
192
+ end
193
+ end
194
+
195
+ def respond_to_missing?(method, _include_private)
196
+ importer.respond_to?(method, true)
197
+ end
198
+
199
+ def method_missing(method, *args)
200
+ if importer.respond_to?(method, true)
201
+ importer.send(method, *args)
202
+ else
203
+ super
204
+ end
205
+ end
206
+
207
+ class NextRowError < Error
208
+ end
209
+
210
+ class SkippedRowError < Error
211
+ end
212
+
213
+ class AbortedRowError < Error
214
+ end
215
+
216
+ class AbortedImportError < Error
217
+ end
218
+ end
219
+ end