csv_party 0.0.1.pre9 → 1.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,38 @@
1
+ module CSVParty
2
+ module DSL
3
+ def column(column, options = {}, &block)
4
+ config.add_column(column, options, &block)
5
+ end
6
+
7
+ def rows(&block)
8
+ config.row_importer = block
9
+ end
10
+
11
+ def import(&block)
12
+ config.file_importer = block
13
+ end
14
+
15
+ def errors(setting = nil, &block)
16
+ config.error_handler = setting || block
17
+ end
18
+
19
+ def skipped_rows(setting = nil, &block)
20
+ config.skipped_row_handler = setting || block
21
+ end
22
+
23
+ def aborted_rows(setting = nil, &block)
24
+ config.aborted_row_handler = setting || block
25
+ end
26
+
27
+ def depends_on(*args)
28
+ config.add_dependency(*args)
29
+ args.each do |arg|
30
+ attr_accessor arg
31
+ end
32
+ end
33
+
34
+ def config
35
+ @config ||= Configuration.new
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,157 @@
1
+ module CSVParty
2
+ class Error < StandardError
3
+ end
4
+
5
+ class UnknownParserError < Error
6
+ def initialize(column, parser, named_parsers)
7
+ parser = parser.to_s.gsub('parse_', '')
8
+ parsers = named_parsers.map { |p| p.to_s.gsub('parse_', '') }
9
+
10
+ super <<-MESSAGE
11
+ You're trying to use the :#{parser} parser for the :#{column} column, but it
12
+ doesn't exist. Available parsers are: :#{parsers.join(', :')}.
13
+ MESSAGE
14
+ end
15
+ end
16
+
17
+ class MissingCSVError < Error
18
+ def initialize(importer)
19
+ super <<-MESSAGE
20
+ You must specify a file path, IO object, or string to import:
21
+
22
+ # File path, IO object, or string
23
+ csv = 'path/to/csv'
24
+ csv = File.open('path/to/csv')
25
+ csv = 'Header1,Header2\\nvalue1,value2\\n'
26
+
27
+ Then, you assign that to your importer one of two ways:
28
+
29
+ importer = #{importer.class.name}.new(csv)
30
+ # or
31
+ importer = #{importer.class.name}.new
32
+ importer.csv = csv
33
+ MESSAGE
34
+ end
35
+ end
36
+
37
+ class NonexistentCSVFileError < Error
38
+ def initialize(file_path)
39
+ super <<-MESSAGE
40
+ The CSV file you are trying to import was not found:
41
+
42
+ #{file_path}
43
+
44
+ NOTE: If you are trying to import a single line CSV string, please ensure that
45
+ your CSV string has a header row and at least one row of values. Single line
46
+ strings are assumed to be a path to a CSV file.
47
+ MESSAGE
48
+ end
49
+ end
50
+
51
+ class UnrecognizedOptionsError < Error
52
+ def initialize(unrecognized_options, valid_data_options, valid_csv_options, dependencies)
53
+ @unrecognized_options = unrecognized_options
54
+ @valid_data_options = valid_data_options
55
+ @valid_csv_options = valid_csv_options
56
+ @dependencies = dependencies
57
+
58
+ super csv_and_data_options_message + dependency_options_message
59
+ end
60
+
61
+ def csv_and_data_options_message
62
+ <<-MESSAGE
63
+ The following options are not recognized: :#{@unrecognized_options.join(', :')}.
64
+ You can specify your CSV data via the :path, :file, or :content options, as well
65
+ as any options that the CSV library understands:
66
+
67
+ :#{@valid_csv_options.join("\n :")}
68
+ MESSAGE
69
+ end
70
+
71
+ def dependency_options_message
72
+ return '' unless @dependencies.any?
73
+
74
+ <<-MESSAGE
75
+
76
+ Or assignments for dependencies:
77
+
78
+ :#{@dependencies.join("\n :")}
79
+ MESSAGE
80
+ end
81
+ end
82
+
83
+ class DuplicateColumnError < Error
84
+ def initialize(name)
85
+ super <<-MESSAGE
86
+ A column named :#{name} has already been defined, please choose a different name.
87
+ MESSAGE
88
+ end
89
+ end
90
+
91
+ class ReservedColumnNameError < Error
92
+ def initialize(reserved_column_names)
93
+ super <<-MESSAGE
94
+ The following column names are reserved for interal use, please use a different
95
+ column name: :#{reserved_column_names.join(', :')}.
96
+ MESSAGE
97
+ end
98
+ end
99
+
100
+ class MissingColumnError < Error
101
+ def initialize(headers, missing_columns)
102
+ columns = missing_columns.join("', '")
103
+ super <<-MESSAGE
104
+ The CSV is missing column(s) with header(s) '#{columns}'. File has these
105
+ headers: #{headers.join(', ')}.
106
+ MESSAGE
107
+ end
108
+ end
109
+
110
+ class UndefinedRowProcessorError < Error
111
+ def initialize
112
+ super <<-MESSAGE
113
+ Your importer has to define a row processor which specifies what should be done
114
+ with each row. It should look something like this:
115
+
116
+ rows do |row|
117
+ row.column # access parsed column values
118
+ row.unparsed.column # access unparsed column values
119
+ end
120
+ MESSAGE
121
+ end
122
+ end
123
+
124
+ class MissingDependencyError < Error
125
+ def initialize(importer, dependency)
126
+ super <<-MESSAGE
127
+ This importer depends on #{dependency}, but you didn't assign it.
128
+ You can do that when instantiating your importer:
129
+
130
+ #{importer.class.name}.new('path/to/csv', #{dependency}: #{dependency})
131
+
132
+ Or any time before you import:
133
+
134
+ importer = #{importer.class.name}.new('path/to/csv')
135
+ importer.#{dependency} = #{dependency}
136
+ importer.import!
137
+ MESSAGE
138
+ end
139
+ end
140
+
141
+ class UnimportedRowsError < Error
142
+ def initialize
143
+ super <<-MESSAGE
144
+ The rows in your CSV file have not been imported.
145
+
146
+ You should include a call to import_rows! at the point in your import block
147
+ where you want them to be imported. It should should look something like this:
148
+
149
+ import do
150
+ # do stuff before importing rows
151
+ import_rows!
152
+ # do stuff after importing rows
153
+ end
154
+ MESSAGE
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,71 @@
1
+ module CSVParty
2
+ module Parsers
3
+ def parse_raw(value)
4
+ value
5
+ end
6
+
7
+ def parse_string(value)
8
+ value.to_s.strip
9
+ end
10
+
11
+ def parse_boolean(value)
12
+ value = value.to_s.strip.downcase
13
+
14
+ if %w[1 t true].include? value
15
+ true
16
+ elsif %w[0 f false].include? value
17
+ false
18
+ else
19
+ nil
20
+ end
21
+ end
22
+
23
+ def parse_integer(value)
24
+ prepare_numeric_value(value).to_i
25
+ end
26
+
27
+ def parse_decimal(value)
28
+ BigDecimal.new(prepare_numeric_value(value))
29
+ end
30
+
31
+ def parse_date(value, format = nil)
32
+ if format.nil?
33
+ Date.parse(value)
34
+ else
35
+ Date.strptime(value, format)
36
+ end
37
+ rescue ArgumentError
38
+ nil
39
+ end
40
+
41
+ def parse_time(value, format = nil)
42
+ if format.nil?
43
+ DateTime.parse(value).to_time
44
+ else
45
+ DateTime.strptime(value, format).to_time
46
+ end
47
+ rescue ArgumentError
48
+ nil
49
+ end
50
+
51
+ private
52
+
53
+ def prepare_numeric_value(value)
54
+ value = value.to_s.strip
55
+ value = convert_from_accounting_notation(value)
56
+ strip_non_numeric_characters(value)
57
+ end
58
+
59
+ def convert_from_accounting_notation(value)
60
+ if value =~ /\A\(.*\)\z/
61
+ value.delete('()').insert(0, '-')
62
+ else
63
+ value
64
+ end
65
+ end
66
+
67
+ def strip_non_numeric_characters(value)
68
+ value.gsub(/[^\-0-9.]/, '')
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,83 @@
1
+ module CSVParty
2
+ class Row
3
+ attr_accessor :row_number, :csv_string, :unparsed
4
+
5
+ def initialize(csv_row, config, runner)
6
+ @csv_row = csv_row
7
+ @config = config
8
+ @runner = runner
9
+ @attributes = OpenStruct.new
10
+ parse_row!(csv_row)
11
+ end
12
+
13
+ private
14
+
15
+ def parse_row!(csv_row)
16
+ self.csv_string = csv_row.to_csv
17
+ self.unparsed = extract_unparsed_values(csv_row)
18
+
19
+ @config.columns.each do |column, options|
20
+ header = options[:header]
21
+ value = csv_row[header]
22
+ @attributes[column] = parse_value(value, options)
23
+ end
24
+ end
25
+
26
+ def extract_unparsed_values(csv_row)
27
+ unparsed_row = OpenStruct.new
28
+ config.columns.each do |column, options|
29
+ header = options[:header]
30
+ unparsed_row[column] = csv_row[header]
31
+ end
32
+
33
+ return unparsed_row
34
+ end
35
+
36
+ def parse_value(value, options)
37
+ return nil if options[:intercept_blanks] && is_blank?(value)
38
+
39
+ parser = options[:parser]
40
+
41
+ if parser.is_a?(Symbol)
42
+ parse_with_method(value, options)
43
+ else
44
+ parse_with_block(value, options)
45
+ end
46
+ end
47
+
48
+ def parse_with_method(value, options)
49
+ format = options[:format]
50
+ parser = options[:parser]
51
+
52
+ if format.nil?
53
+ send(parser, value)
54
+ else
55
+ send(parser, value, format)
56
+ end
57
+ end
58
+
59
+ def parse_with_block(value, options)
60
+ parser = options[:parser]
61
+
62
+ instance_exec(value, &parser)
63
+ end
64
+
65
+ def is_blank?(value)
66
+ value.nil? || value.strip.empty?
67
+ end
68
+
69
+ def respond_to_missing?(method, _include_private)
70
+ @attributes.respond_to?(method) || @runner.respond_to?(method, true)
71
+ end
72
+
73
+ def method_missing(method, *args)
74
+ if @attributes.respond_to?(method)
75
+ @attributes.send(method, *args)
76
+ elsif @runner.respond_to?(method, true)
77
+ @runner.send(method, *args)
78
+ else
79
+ super
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,219 @@
1
+ require 'csv_party/parsers'
2
+
3
+ module CSVParty
4
+ class Runner
5
+ include Parsers
6
+
7
+ attr_accessor :csv, :config, :importer
8
+
9
+ def initialize(csv, config, importer)
10
+ self.csv = csv
11
+ self.config = config
12
+ self.importer = importer
13
+ @_rows_have_been_imported = false
14
+ @_current_row_number = 1
15
+ end
16
+
17
+ def import!
18
+ raise_unless_row_processor_is_defined!
19
+ raise_unless_all_named_parsers_exist!
20
+ raise_unless_all_dependencies_are_present!
21
+ initialize_csv!
22
+ initialize_regex_headers!
23
+ raise_unless_csv_has_all_columns!
24
+
25
+ if config.file_importer
26
+ instance_exec(&config.file_importer)
27
+ raise_unless_rows_have_been_imported!
28
+ else
29
+ import_rows!
30
+ end
31
+
32
+ return true
33
+ rescue AbortedImportError => error
34
+ importer.aborted = true
35
+ importer.abort_message = error.message
36
+ return false
37
+ end
38
+
39
+ def present_columns
40
+ @_headers
41
+ end
42
+
43
+ def missing_columns
44
+ config.required_columns - present_columns
45
+ end
46
+
47
+ private
48
+
49
+ def initialize_csv!
50
+ csv.shift
51
+ @_headers = csv.headers
52
+ csv.rewind
53
+ end
54
+
55
+ def import_rows!
56
+ loop do
57
+ begin
58
+ row = csv.shift
59
+ break unless row
60
+ import_row!(row)
61
+ rescue NextRowError
62
+ next
63
+ rescue SkippedRowError => error
64
+ handle_skipped_row(error)
65
+ rescue AbortedRowError => error
66
+ handle_aborted_row(error)
67
+ rescue AbortedImportError
68
+ raise
69
+ rescue CSV::MalformedCSVError
70
+ raise
71
+ rescue StandardError => error
72
+ handle_error(error, @_current_row_number, row.to_csv)
73
+ end
74
+ end
75
+
76
+ @_rows_have_been_imported = true
77
+ end
78
+
79
+ def import_row!(csv_row)
80
+ @_current_row_number += 1
81
+ @_current_parsed_row = Row.new(csv_row, config, self)
82
+ @_current_parsed_row.row_number = @_current_row_number
83
+ instance_exec(@_current_parsed_row, &config.row_importer)
84
+ end
85
+
86
+ def next_row!
87
+ raise NextRowError
88
+ end
89
+
90
+ def skip_row!(message = nil)
91
+ raise SkippedRowError, message
92
+ end
93
+
94
+ def abort_row!(message = nil)
95
+ raise AbortedRowError, message
96
+ end
97
+
98
+ def abort_import!(message)
99
+ raise AbortedImportError, message
100
+ end
101
+
102
+ def handle_error(error, line_number, csv_string)
103
+ raise error unless config.error_handler
104
+
105
+ if config.error_handler == :ignore
106
+ error_rows << error_struct(error, line_number, csv_string)
107
+ else
108
+ instance_exec(error, line_number, csv_string, &config.error_handler)
109
+ end
110
+ end
111
+
112
+ def handle_skipped_row(error)
113
+ return if config.skipped_row_handler == :ignore
114
+
115
+ @_current_parsed_row[:skip_message] = error.message
116
+
117
+ if config.skipped_row_handler.nil?
118
+ importer.skipped_rows << @_current_parsed_row
119
+ else
120
+ instance_exec(@_current_parsed_row, &config.skipped_row_handler)
121
+ end
122
+ end
123
+
124
+ def handle_aborted_row(error)
125
+ return if config.aborted_row_handler == :ignore
126
+
127
+ @_current_parsed_row[:abort_message] = error.message
128
+
129
+ if config.aborted_row_handler.nil?
130
+ importer.aborted_rows << @_current_parsed_row
131
+ else
132
+ instance_exec(@_current_parsed_row, &config.aborted_row_handler)
133
+ end
134
+ end
135
+
136
+ def error_struct(error, line_number, csv_string)
137
+ Struct.new(:error, :line_number, :csv_string)
138
+ .new(error, line_number, csv_string)
139
+ end
140
+
141
+ def raise_unless_row_processor_is_defined!
142
+ return if config.row_importer
143
+
144
+ raise UndefinedRowProcessorError.new
145
+ end
146
+
147
+ def raise_unless_rows_have_been_imported!
148
+ return if @_rows_have_been_imported
149
+
150
+ raise UnimportedRowsError.new
151
+ end
152
+
153
+ def raise_unless_all_dependencies_are_present!
154
+ config.dependencies.each do |dependency|
155
+ next unless importer.send(dependency).nil?
156
+
157
+ raise MissingDependencyError.new(self, dependency)
158
+ end
159
+ end
160
+
161
+ # This error has to be raised at runtime because, when the class body
162
+ # is being executed, the parser methods won't be available unless
163
+ # they are defined above the column definitions in the class body
164
+ def raise_unless_all_named_parsers_exist!
165
+ config.columns_with_named_parsers.each do |name, options|
166
+ parser = options[:parser]
167
+ next if named_parsers.include? parser
168
+
169
+ raise UnknownParserError.new(name, parser, named_parsers)
170
+ end
171
+ end
172
+
173
+ def named_parsers
174
+ (methods +
175
+ private_methods +
176
+ importer.methods +
177
+ importer.private_methods).grep(/^parse_/)
178
+ end
179
+
180
+ def raise_unless_csv_has_all_columns!
181
+ return if missing_columns.empty?
182
+
183
+ raise MissingColumnError.new(present_columns, missing_columns)
184
+ end
185
+
186
+ def initialize_regex_headers!
187
+ config.columns_with_regex_headers.each do |name, options|
188
+ found_header = @_headers.find do |header|
189
+ options[:header].match(header)
190
+ end
191
+ options[:header] = found_header || name.to_s
192
+ end
193
+ end
194
+
195
+ def respond_to_missing?(method, _include_private)
196
+ importer.respond_to?(method, true)
197
+ end
198
+
199
+ def method_missing(method, *args)
200
+ if importer.respond_to?(method, true)
201
+ importer.send(method, *args)
202
+ else
203
+ super
204
+ end
205
+ end
206
+
207
+ class NextRowError < Error
208
+ end
209
+
210
+ class SkippedRowError < Error
211
+ end
212
+
213
+ class AbortedRowError < Error
214
+ end
215
+
216
+ class AbortedImportError < Error
217
+ end
218
+ end
219
+ end