csv_party 0.0.1.pre9 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/csv_party.rb CHANGED
@@ -1,309 +1,79 @@
1
- require 'csv'
2
1
  require 'bigdecimal'
2
+ require 'csv'
3
3
  require 'ostruct'
4
+ require 'csv_party/configuration'
5
+ require 'csv_party/dsl'
6
+ require 'csv_party/data_preparer'
7
+ require 'csv_party/errors'
8
+ require 'csv_party/row'
9
+ require 'csv_party/runner'
4
10
 
5
- class CSVParty
6
- attr_accessor :columns, :row_importer, :importer, :error_processor,
7
- :dependencies
8
-
9
- attr_reader :imported_rows, :skipped_rows, :aborted_rows,
10
- :abort_message
11
-
12
- def initialize(csv_path, options = {})
13
- initialize_import_settings
14
- initialize_counters_and_statuses
15
- initialize_dependencies(options)
16
-
17
- @headers = CSV.new(File.open(csv_path), options).shift
18
- options[:headers] = true
19
- @csv = CSV.new(File.open(csv_path), options)
20
-
21
- raise_unless_named_parsers_are_valid
22
- raise_unless_csv_has_all_headers
23
- end
24
-
25
- def import!
26
- if importer
27
- instance_exec(&importer)
28
- else
29
- import_rows!
30
- end
31
- rescue AbortedImportError => error
32
- @aborted = true
33
- @abort_message = error.message
34
- end
35
-
36
- def import_rows!
37
- loop do
38
- begin
39
- row = @csv.shift
40
- break unless row
41
- import_row!(row)
42
- imported_rows << @csv.lineno
43
- rescue SkippedRowError
44
- skipped_rows << @csv.lineno
45
- next
46
- rescue AbortedImportError => error
47
- raise AbortedImportError, error.message
48
- rescue StandardError => error
49
- process_error(error, @csv.lineno + 1)
50
- aborted_rows << @csv.lineno
51
- next
52
- end
53
- end
54
- end
55
-
56
- def self.column(column, options, &block)
57
- raise_if_duplicate_column(column)
58
- raise_if_missing_header(column, options)
59
-
60
- options = {
61
- blanks_as_nil: (options[:as] == :raw ? false : true),
62
- as: :string
63
- }.merge(options)
64
-
65
- parser = if block_given?
66
- block
67
- else
68
- "#{options[:as]}_parser".to_sym
69
- end
70
-
71
- columns[column] = {
72
- header: options[:header],
73
- parser: parser,
74
- blanks_as_nil: options[:blanks_as_nil]
75
- }
76
- end
77
-
78
- def self.rows(&block)
79
- @row_importer = block
80
- end
81
-
82
- def self.import(&block)
83
- @importer = block
84
- end
85
-
86
- def self.errors(&block)
87
- @error_processor = block
88
- end
89
-
90
- def self.depends_on(*args)
91
- args.each do |arg|
92
- dependencies << arg
93
- attr_accessor arg
94
- end
95
- end
11
+ module CSVParty
12
+ DATA_OPTIONS = [:path, :file, :content].freeze
13
+ CSV_OPTIONS = CSV::DEFAULT_OPTIONS.keys.push(:encoding).freeze
96
14
 
97
- def self.dependencies
98
- @dependencies ||= []
15
+ def self.included(base)
16
+ base.extend DSL
99
17
  end
100
18
 
101
- def self.columns
102
- @columns ||= {}
103
- end
104
-
105
- def self.row_importer
106
- @row_importer ||= nil
107
- end
19
+ attr_reader :skipped_rows, :aborted_rows, :error_rows
20
+ attr_accessor :aborted, :abort_message
108
21
 
109
- def self.importer
110
- @importer ||= nil
22
+ def initialize(options = {})
23
+ self.config = self.class.config
24
+ raise_unless_all_options_are_recognized!(options)
25
+ self.csv = DataPreparer.new(options).prepare
26
+ assign_dependencies_if_present(options)
111
27
  end
112
28
 
113
- def self.error_processor
114
- @error_processor ||= nil
29
+ def import!
30
+ @skipped_rows = []
31
+ @aborted_rows = []
32
+ @error_rows = []
33
+ @aborted = false
34
+ self.runner = Runner.new(csv, config, self)
35
+ runner.import!
115
36
  end
116
37
 
117
38
  def aborted?
118
39
  @aborted
119
40
  end
120
41
 
121
- def self.raise_if_duplicate_column(name)
122
- return unless columns.has_key?(name)
123
-
124
- raise DuplicateColumnError, "A column named :#{name} has already been \
125
- defined, choose a different name"
42
+ def present_columns
43
+ runner.present_columns
126
44
  end
127
- private_class_method :raise_if_duplicate_column
128
-
129
- def self.raise_if_missing_header(name, options)
130
- return if options.has_key?(:header)
131
45
 
132
- raise MissingHeaderError, "A header must be specified for #{name}"
46
+ def missing_columns
47
+ runner.missing_columns
133
48
  end
134
- private_class_method :raise_if_missing_header
135
49
 
136
50
  private
137
51
 
138
- def import_row!(row)
139
- parsed_row = parse_row(row)
140
- instance_exec(parsed_row, &row_importer)
141
- end
142
-
143
- def parse_row(row)
144
- unparsed_row = OpenStruct.new
145
- columns.each do |column, options|
146
- header = options[:header]
147
- unparsed_row[column] = row[header]
148
- end
149
-
150
- parsed_row = OpenStruct.new
151
- columns.each do |column, options|
152
- value = row[options[:header]]
153
- parsed_row[column] = parse_column(
154
- value,
155
- options[:parser],
156
- options[:blanks_as_nil]
157
- )
158
- end
159
-
160
- parsed_row[:unparsed] = unparsed_row
161
- parsed_row[:csv_string] = row.to_csv
162
-
163
- return parsed_row
164
- end
165
-
166
- def parse_column(value, parser, blanks_as_nil)
167
- if blanks_as_nil && is_blank?(value)
168
- nil
169
- elsif parser.is_a? Symbol
170
- send(parser, value)
171
- else
172
- instance_exec(value, &parser)
173
- end
174
- end
175
-
176
- def process_error(error, line_number)
177
- instance_exec(error, line_number, &error_processor)
178
- end
179
-
180
- def skip_row
181
- raise SkippedRowError
182
- end
183
-
184
- def abort_row(message)
185
- raise AbortedRowError, message
186
- end
187
-
188
- def abort_import(message)
189
- raise AbortedImportError, message
190
- end
191
-
192
- def is_blank?(value)
193
- value.nil? || value.strip.empty?
194
- end
195
-
196
- def raw_parser(value)
197
- value
198
- end
199
-
200
- def string_parser(value)
201
- value.to_s.strip
202
- end
52
+ attr_accessor :runner, :config, :csv
203
53
 
204
- def boolean_parser(value)
205
- %w[1 t true].include? value.to_s.strip.downcase
206
- end
207
-
208
- def integer_parser(value)
209
- value.to_i
210
- end
211
-
212
- def decimal_parser(value)
213
- cleaned_value = value.to_s.strip.gsub(/[^0-9.]/, '')
214
- BigDecimal.new(cleaned_value)
215
- end
216
-
217
- def named_parsers
218
- (private_methods + methods).grep(/_parser$/)
219
- end
220
-
221
- def columns_with_named_parsers
222
- columns.select { |_name, options| options[:parser].is_a? Symbol }
223
- end
224
-
225
- # This error has to be raised at runtime because, when the class body
226
- # is being executed, the parser methods won't be available unless
227
- # they are defined above the column definitions in the class body
228
- def raise_unless_named_parsers_are_valid
229
- columns_with_named_parsers.each do |name, options|
230
- parser = options[:parser]
231
- next if named_parsers.include? parser
232
-
233
- parser = parser.to_s.gsub('_parser', '')
234
- parsers = named_parsers
235
- .map { |p| p.to_s.gsub('_parser', '') }
236
- .join(', :')
237
- raise UnknownParserError,
238
- "You're trying to use the :#{parser} parser for the :#{name} \
239
- column, but it doesn't exist. Available parsers are: :#{parsers}."
54
+ def raise_unless_all_options_are_recognized!(options)
55
+ unrecognized_options = options.keys.reject do |option|
56
+ valid_options.include? option
240
57
  end
241
- end
58
+ return if unrecognized_options.empty?
242
59
 
243
- def defined_headers
244
- columns.map { |_name, options| options[:header] }
60
+ raise UnrecognizedOptionsError.new(unrecognized_options,
61
+ DATA_OPTIONS,
62
+ CSV_OPTIONS,
63
+ config.dependencies)
245
64
  end
246
65
 
247
- def raise_unless_csv_has_all_headers
248
- missing_columns = defined_headers - @headers
249
- return if missing_columns.empty?
66
+ def assign_dependencies_if_present(options)
67
+ return unless config.dependencies.any?
250
68
 
251
- columns = missing_columns.join("', '")
252
- raise MissingColumnError,
253
- "CSV file is missing column(s) with header(s) '#{columns}'. \
254
- File has these headers: #{@headers.join(', ')}."
255
- end
256
-
257
- def initialize_import_settings
258
- @columns = self.class.columns
259
- @row_importer = self.class.row_importer
260
- @importer = self.class.importer
261
- @error_processor = self.class.error_processor
262
- @dependencies = self.class.dependencies
263
- end
264
-
265
- def initialize_counters_and_statuses
266
- @imported_rows = []
267
- @skipped_rows = []
268
- @aborted_rows = []
269
- @aborted = false
270
- end
271
-
272
- def initialize_dependencies(options)
273
- dependencies.each do |dependency|
69
+ config.dependencies.each do |dependency|
274
70
  if options.has_key? dependency
275
71
  send("#{dependency}=", options.delete(dependency))
276
- else
277
- raise MissingDependencyError,
278
- <<-MESSAGE
279
- This importer depends on #{dependency}, but you didn't include it.
280
- Here's how you do that: #{self.class.name}.new('path/to/csv', #{dependency}: #{dependency})
281
- MESSAGE
282
72
  end
283
73
  end
284
74
  end
285
- end
286
75
 
287
- class UnknownParserError < ArgumentError
288
- end
289
-
290
- class MissingHeaderError < ArgumentError
291
- end
292
-
293
- class DuplicateColumnError < ArgumentError
294
- end
295
-
296
- class MissingColumnError < ArgumentError
297
- end
298
-
299
- class MissingDependencyError < ArgumentError
300
- end
301
-
302
- class SkippedRowError < RuntimeError
303
- end
304
-
305
- class AbortedRowError < RuntimeError
306
- end
307
-
308
- class AbortedImportError < RuntimeError
76
+ def valid_options
77
+ DATA_OPTIONS + CSV_OPTIONS + config.dependencies
78
+ end
309
79
  end
@@ -0,0 +1,82 @@
1
+ module CSVParty
2
+ class Configuration
3
+ attr_accessor :row_importer, :file_importer, :error_handler,
4
+ :skipped_row_handler, :aborted_row_handler
5
+
6
+ attr_reader :columns, :dependencies
7
+
8
+ def initialize
9
+ @columns = {}
10
+ @dependencies = []
11
+ end
12
+
13
+ def add_column(column, options = {}, &block)
14
+ raise_if_duplicate_column(column)
15
+ raise_if_reserved_column_name(column)
16
+
17
+ options = {
18
+ header: column_regex(column),
19
+ as: :string,
20
+ format: nil,
21
+ intercept_blanks: (options[:as] != :raw)
22
+ }.merge(options)
23
+
24
+ parser = if block_given?
25
+ block
26
+ else
27
+ "parse_#{options[:as]}".to_sym
28
+ end
29
+
30
+ columns[column] = {
31
+ header: options[:header],
32
+ parser: parser,
33
+ format: options[:format],
34
+ intercept_blanks: options[:intercept_blanks]
35
+ }
36
+ end
37
+
38
+ def add_dependency(*args)
39
+ args.each do |arg|
40
+ dependencies << arg
41
+ end
42
+ end
43
+
44
+ def columns_with_named_parsers
45
+ columns.select { |_name, options| options[:parser].is_a? Symbol }
46
+ end
47
+
48
+ def columns_with_regex_headers
49
+ columns.select { |_name, options| options[:header].is_a? Regexp }
50
+ end
51
+
52
+ def required_columns
53
+ columns.map { |_name, options| options[:header] }
54
+ end
55
+
56
+ private
57
+
58
+ def column_regex(column)
59
+ column = Regexp.escape(column.to_s)
60
+ underscored_or_whitespaced = "#{column}|#{column.tr('_', ' ')}"
61
+ /\A\s*#{underscored_or_whitespaced}\s*\z/i
62
+ end
63
+
64
+ def raise_if_duplicate_column(name)
65
+ return unless columns.has_key?(name)
66
+
67
+ raise DuplicateColumnError.new(name)
68
+ end
69
+
70
+ RESERVED_COLUMN_NAMES = [:unparsed,
71
+ :csv_string,
72
+ :row_number,
73
+ :skip_message,
74
+ :abort_message].freeze
75
+
76
+ def raise_if_reserved_column_name(column)
77
+ return unless RESERVED_COLUMN_NAMES.include? column
78
+
79
+ raise ReservedColumnNameError.new(RESERVED_COLUMN_NAMES)
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,45 @@
1
+ module CSVParty
2
+ class DataPreparer
3
+ def initialize(options)
4
+ @options = options
5
+ end
6
+
7
+ def prepare
8
+ raise_unless_csv_is_present!
9
+
10
+ data = if @options.has_key?(:path)
11
+ open_csv_path(@options[:path])
12
+ elsif @options.has_key?(:file)
13
+ @options[:file]
14
+ elsif @options.has_key?(:content)
15
+ @options[:content]
16
+ end
17
+ options = extract_csv_options.merge(headers: true)
18
+ CSV.new(data, options)
19
+ end
20
+
21
+ private
22
+
23
+ def open_csv_path(path)
24
+ raise NonexistentCSVFileError.new(path) unless File.file?(path)
25
+
26
+ if @options.has_key?(:encoding)
27
+ File.open(path, "r:#{@options[:encoding]}")
28
+ else
29
+ File.open(path)
30
+ end
31
+ end
32
+
33
+ def extract_csv_options
34
+ @options.select do |option, _value|
35
+ CSV_OPTIONS.include?(option)
36
+ end
37
+ end
38
+
39
+ def raise_unless_csv_is_present!
40
+ return if DATA_OPTIONS.any? { |option| @options.has_key?(option) }
41
+
42
+ raise MissingCSVError.new(self)
43
+ end
44
+ end
45
+ end