csv_party 0.0.1.pre9 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.md +21 -0
- data/README.md +218 -0
- data/ROADMAP.md +271 -0
- data/lib/csv_party.rb +45 -275
- data/lib/csv_party/configuration.rb +82 -0
- data/lib/csv_party/data_preparer.rb +45 -0
- data/lib/csv_party/dsl.rb +38 -0
- data/lib/csv_party/errors.rb +157 -0
- data/lib/csv_party/parsers.rb +71 -0
- data/lib/csv_party/row.rb +83 -0
- data/lib/csv_party/runner.rb +219 -0
- data/lib/csv_party/testing.rb +6 -0
- metadata +14 -3
data/lib/csv_party.rb
CHANGED
@@ -1,309 +1,79 @@
|
|
1
|
-
require 'csv'
|
2
1
|
require 'bigdecimal'
|
2
|
+
require 'csv'
|
3
3
|
require 'ostruct'
|
4
|
+
require 'csv_party/configuration'
|
5
|
+
require 'csv_party/dsl'
|
6
|
+
require 'csv_party/data_preparer'
|
7
|
+
require 'csv_party/errors'
|
8
|
+
require 'csv_party/row'
|
9
|
+
require 'csv_party/runner'
|
4
10
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
attr_reader :imported_rows, :skipped_rows, :aborted_rows,
|
10
|
-
:abort_message
|
11
|
-
|
12
|
-
def initialize(csv_path, options = {})
|
13
|
-
initialize_import_settings
|
14
|
-
initialize_counters_and_statuses
|
15
|
-
initialize_dependencies(options)
|
16
|
-
|
17
|
-
@headers = CSV.new(File.open(csv_path), options).shift
|
18
|
-
options[:headers] = true
|
19
|
-
@csv = CSV.new(File.open(csv_path), options)
|
20
|
-
|
21
|
-
raise_unless_named_parsers_are_valid
|
22
|
-
raise_unless_csv_has_all_headers
|
23
|
-
end
|
24
|
-
|
25
|
-
def import!
|
26
|
-
if importer
|
27
|
-
instance_exec(&importer)
|
28
|
-
else
|
29
|
-
import_rows!
|
30
|
-
end
|
31
|
-
rescue AbortedImportError => error
|
32
|
-
@aborted = true
|
33
|
-
@abort_message = error.message
|
34
|
-
end
|
35
|
-
|
36
|
-
def import_rows!
|
37
|
-
loop do
|
38
|
-
begin
|
39
|
-
row = @csv.shift
|
40
|
-
break unless row
|
41
|
-
import_row!(row)
|
42
|
-
imported_rows << @csv.lineno
|
43
|
-
rescue SkippedRowError
|
44
|
-
skipped_rows << @csv.lineno
|
45
|
-
next
|
46
|
-
rescue AbortedImportError => error
|
47
|
-
raise AbortedImportError, error.message
|
48
|
-
rescue StandardError => error
|
49
|
-
process_error(error, @csv.lineno + 1)
|
50
|
-
aborted_rows << @csv.lineno
|
51
|
-
next
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.column(column, options, &block)
|
57
|
-
raise_if_duplicate_column(column)
|
58
|
-
raise_if_missing_header(column, options)
|
59
|
-
|
60
|
-
options = {
|
61
|
-
blanks_as_nil: (options[:as] == :raw ? false : true),
|
62
|
-
as: :string
|
63
|
-
}.merge(options)
|
64
|
-
|
65
|
-
parser = if block_given?
|
66
|
-
block
|
67
|
-
else
|
68
|
-
"#{options[:as]}_parser".to_sym
|
69
|
-
end
|
70
|
-
|
71
|
-
columns[column] = {
|
72
|
-
header: options[:header],
|
73
|
-
parser: parser,
|
74
|
-
blanks_as_nil: options[:blanks_as_nil]
|
75
|
-
}
|
76
|
-
end
|
77
|
-
|
78
|
-
def self.rows(&block)
|
79
|
-
@row_importer = block
|
80
|
-
end
|
81
|
-
|
82
|
-
def self.import(&block)
|
83
|
-
@importer = block
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.errors(&block)
|
87
|
-
@error_processor = block
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.depends_on(*args)
|
91
|
-
args.each do |arg|
|
92
|
-
dependencies << arg
|
93
|
-
attr_accessor arg
|
94
|
-
end
|
95
|
-
end
|
11
|
+
module CSVParty
|
12
|
+
DATA_OPTIONS = [:path, :file, :content].freeze
|
13
|
+
CSV_OPTIONS = CSV::DEFAULT_OPTIONS.keys.push(:encoding).freeze
|
96
14
|
|
97
|
-
def self.
|
98
|
-
|
15
|
+
def self.included(base)
|
16
|
+
base.extend DSL
|
99
17
|
end
|
100
18
|
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def self.row_importer
|
106
|
-
@row_importer ||= nil
|
107
|
-
end
|
19
|
+
attr_reader :skipped_rows, :aborted_rows, :error_rows
|
20
|
+
attr_accessor :aborted, :abort_message
|
108
21
|
|
109
|
-
def
|
110
|
-
|
22
|
+
def initialize(options = {})
|
23
|
+
self.config = self.class.config
|
24
|
+
raise_unless_all_options_are_recognized!(options)
|
25
|
+
self.csv = DataPreparer.new(options).prepare
|
26
|
+
assign_dependencies_if_present(options)
|
111
27
|
end
|
112
28
|
|
113
|
-
def
|
114
|
-
@
|
29
|
+
def import!
|
30
|
+
@skipped_rows = []
|
31
|
+
@aborted_rows = []
|
32
|
+
@error_rows = []
|
33
|
+
@aborted = false
|
34
|
+
self.runner = Runner.new(csv, config, self)
|
35
|
+
runner.import!
|
115
36
|
end
|
116
37
|
|
117
38
|
def aborted?
|
118
39
|
@aborted
|
119
40
|
end
|
120
41
|
|
121
|
-
def
|
122
|
-
|
123
|
-
|
124
|
-
raise DuplicateColumnError, "A column named :#{name} has already been \
|
125
|
-
defined, choose a different name"
|
42
|
+
def present_columns
|
43
|
+
runner.present_columns
|
126
44
|
end
|
127
|
-
private_class_method :raise_if_duplicate_column
|
128
|
-
|
129
|
-
def self.raise_if_missing_header(name, options)
|
130
|
-
return if options.has_key?(:header)
|
131
45
|
|
132
|
-
|
46
|
+
def missing_columns
|
47
|
+
runner.missing_columns
|
133
48
|
end
|
134
|
-
private_class_method :raise_if_missing_header
|
135
49
|
|
136
50
|
private
|
137
51
|
|
138
|
-
|
139
|
-
parsed_row = parse_row(row)
|
140
|
-
instance_exec(parsed_row, &row_importer)
|
141
|
-
end
|
142
|
-
|
143
|
-
def parse_row(row)
|
144
|
-
unparsed_row = OpenStruct.new
|
145
|
-
columns.each do |column, options|
|
146
|
-
header = options[:header]
|
147
|
-
unparsed_row[column] = row[header]
|
148
|
-
end
|
149
|
-
|
150
|
-
parsed_row = OpenStruct.new
|
151
|
-
columns.each do |column, options|
|
152
|
-
value = row[options[:header]]
|
153
|
-
parsed_row[column] = parse_column(
|
154
|
-
value,
|
155
|
-
options[:parser],
|
156
|
-
options[:blanks_as_nil]
|
157
|
-
)
|
158
|
-
end
|
159
|
-
|
160
|
-
parsed_row[:unparsed] = unparsed_row
|
161
|
-
parsed_row[:csv_string] = row.to_csv
|
162
|
-
|
163
|
-
return parsed_row
|
164
|
-
end
|
165
|
-
|
166
|
-
def parse_column(value, parser, blanks_as_nil)
|
167
|
-
if blanks_as_nil && is_blank?(value)
|
168
|
-
nil
|
169
|
-
elsif parser.is_a? Symbol
|
170
|
-
send(parser, value)
|
171
|
-
else
|
172
|
-
instance_exec(value, &parser)
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
def process_error(error, line_number)
|
177
|
-
instance_exec(error, line_number, &error_processor)
|
178
|
-
end
|
179
|
-
|
180
|
-
def skip_row
|
181
|
-
raise SkippedRowError
|
182
|
-
end
|
183
|
-
|
184
|
-
def abort_row(message)
|
185
|
-
raise AbortedRowError, message
|
186
|
-
end
|
187
|
-
|
188
|
-
def abort_import(message)
|
189
|
-
raise AbortedImportError, message
|
190
|
-
end
|
191
|
-
|
192
|
-
def is_blank?(value)
|
193
|
-
value.nil? || value.strip.empty?
|
194
|
-
end
|
195
|
-
|
196
|
-
def raw_parser(value)
|
197
|
-
value
|
198
|
-
end
|
199
|
-
|
200
|
-
def string_parser(value)
|
201
|
-
value.to_s.strip
|
202
|
-
end
|
52
|
+
attr_accessor :runner, :config, :csv
|
203
53
|
|
204
|
-
def
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
def integer_parser(value)
|
209
|
-
value.to_i
|
210
|
-
end
|
211
|
-
|
212
|
-
def decimal_parser(value)
|
213
|
-
cleaned_value = value.to_s.strip.gsub(/[^0-9.]/, '')
|
214
|
-
BigDecimal.new(cleaned_value)
|
215
|
-
end
|
216
|
-
|
217
|
-
def named_parsers
|
218
|
-
(private_methods + methods).grep(/_parser$/)
|
219
|
-
end
|
220
|
-
|
221
|
-
def columns_with_named_parsers
|
222
|
-
columns.select { |_name, options| options[:parser].is_a? Symbol }
|
223
|
-
end
|
224
|
-
|
225
|
-
# This error has to be raised at runtime because, when the class body
|
226
|
-
# is being executed, the parser methods won't be available unless
|
227
|
-
# they are defined above the column definitions in the class body
|
228
|
-
def raise_unless_named_parsers_are_valid
|
229
|
-
columns_with_named_parsers.each do |name, options|
|
230
|
-
parser = options[:parser]
|
231
|
-
next if named_parsers.include? parser
|
232
|
-
|
233
|
-
parser = parser.to_s.gsub('_parser', '')
|
234
|
-
parsers = named_parsers
|
235
|
-
.map { |p| p.to_s.gsub('_parser', '') }
|
236
|
-
.join(', :')
|
237
|
-
raise UnknownParserError,
|
238
|
-
"You're trying to use the :#{parser} parser for the :#{name} \
|
239
|
-
column, but it doesn't exist. Available parsers are: :#{parsers}."
|
54
|
+
def raise_unless_all_options_are_recognized!(options)
|
55
|
+
unrecognized_options = options.keys.reject do |option|
|
56
|
+
valid_options.include? option
|
240
57
|
end
|
241
|
-
|
58
|
+
return if unrecognized_options.empty?
|
242
59
|
|
243
|
-
|
244
|
-
|
60
|
+
raise UnrecognizedOptionsError.new(unrecognized_options,
|
61
|
+
DATA_OPTIONS,
|
62
|
+
CSV_OPTIONS,
|
63
|
+
config.dependencies)
|
245
64
|
end
|
246
65
|
|
247
|
-
def
|
248
|
-
|
249
|
-
return if missing_columns.empty?
|
66
|
+
def assign_dependencies_if_present(options)
|
67
|
+
return unless config.dependencies.any?
|
250
68
|
|
251
|
-
|
252
|
-
raise MissingColumnError,
|
253
|
-
"CSV file is missing column(s) with header(s) '#{columns}'. \
|
254
|
-
File has these headers: #{@headers.join(', ')}."
|
255
|
-
end
|
256
|
-
|
257
|
-
def initialize_import_settings
|
258
|
-
@columns = self.class.columns
|
259
|
-
@row_importer = self.class.row_importer
|
260
|
-
@importer = self.class.importer
|
261
|
-
@error_processor = self.class.error_processor
|
262
|
-
@dependencies = self.class.dependencies
|
263
|
-
end
|
264
|
-
|
265
|
-
def initialize_counters_and_statuses
|
266
|
-
@imported_rows = []
|
267
|
-
@skipped_rows = []
|
268
|
-
@aborted_rows = []
|
269
|
-
@aborted = false
|
270
|
-
end
|
271
|
-
|
272
|
-
def initialize_dependencies(options)
|
273
|
-
dependencies.each do |dependency|
|
69
|
+
config.dependencies.each do |dependency|
|
274
70
|
if options.has_key? dependency
|
275
71
|
send("#{dependency}=", options.delete(dependency))
|
276
|
-
else
|
277
|
-
raise MissingDependencyError,
|
278
|
-
<<-MESSAGE
|
279
|
-
This importer depends on #{dependency}, but you didn't include it.
|
280
|
-
Here's how you do that: #{self.class.name}.new('path/to/csv', #{dependency}: #{dependency})
|
281
|
-
MESSAGE
|
282
72
|
end
|
283
73
|
end
|
284
74
|
end
|
285
|
-
end
|
286
75
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
class MissingHeaderError < ArgumentError
|
291
|
-
end
|
292
|
-
|
293
|
-
class DuplicateColumnError < ArgumentError
|
294
|
-
end
|
295
|
-
|
296
|
-
class MissingColumnError < ArgumentError
|
297
|
-
end
|
298
|
-
|
299
|
-
class MissingDependencyError < ArgumentError
|
300
|
-
end
|
301
|
-
|
302
|
-
class SkippedRowError < RuntimeError
|
303
|
-
end
|
304
|
-
|
305
|
-
class AbortedRowError < RuntimeError
|
306
|
-
end
|
307
|
-
|
308
|
-
class AbortedImportError < RuntimeError
|
76
|
+
def valid_options
|
77
|
+
DATA_OPTIONS + CSV_OPTIONS + config.dependencies
|
78
|
+
end
|
309
79
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module CSVParty
|
2
|
+
class Configuration
|
3
|
+
attr_accessor :row_importer, :file_importer, :error_handler,
|
4
|
+
:skipped_row_handler, :aborted_row_handler
|
5
|
+
|
6
|
+
attr_reader :columns, :dependencies
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@columns = {}
|
10
|
+
@dependencies = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_column(column, options = {}, &block)
|
14
|
+
raise_if_duplicate_column(column)
|
15
|
+
raise_if_reserved_column_name(column)
|
16
|
+
|
17
|
+
options = {
|
18
|
+
header: column_regex(column),
|
19
|
+
as: :string,
|
20
|
+
format: nil,
|
21
|
+
intercept_blanks: (options[:as] != :raw)
|
22
|
+
}.merge(options)
|
23
|
+
|
24
|
+
parser = if block_given?
|
25
|
+
block
|
26
|
+
else
|
27
|
+
"parse_#{options[:as]}".to_sym
|
28
|
+
end
|
29
|
+
|
30
|
+
columns[column] = {
|
31
|
+
header: options[:header],
|
32
|
+
parser: parser,
|
33
|
+
format: options[:format],
|
34
|
+
intercept_blanks: options[:intercept_blanks]
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_dependency(*args)
|
39
|
+
args.each do |arg|
|
40
|
+
dependencies << arg
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def columns_with_named_parsers
|
45
|
+
columns.select { |_name, options| options[:parser].is_a? Symbol }
|
46
|
+
end
|
47
|
+
|
48
|
+
def columns_with_regex_headers
|
49
|
+
columns.select { |_name, options| options[:header].is_a? Regexp }
|
50
|
+
end
|
51
|
+
|
52
|
+
def required_columns
|
53
|
+
columns.map { |_name, options| options[:header] }
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def column_regex(column)
|
59
|
+
column = Regexp.escape(column.to_s)
|
60
|
+
underscored_or_whitespaced = "#{column}|#{column.tr('_', ' ')}"
|
61
|
+
/\A\s*#{underscored_or_whitespaced}\s*\z/i
|
62
|
+
end
|
63
|
+
|
64
|
+
def raise_if_duplicate_column(name)
|
65
|
+
return unless columns.has_key?(name)
|
66
|
+
|
67
|
+
raise DuplicateColumnError.new(name)
|
68
|
+
end
|
69
|
+
|
70
|
+
RESERVED_COLUMN_NAMES = [:unparsed,
|
71
|
+
:csv_string,
|
72
|
+
:row_number,
|
73
|
+
:skip_message,
|
74
|
+
:abort_message].freeze
|
75
|
+
|
76
|
+
def raise_if_reserved_column_name(column)
|
77
|
+
return unless RESERVED_COLUMN_NAMES.include? column
|
78
|
+
|
79
|
+
raise ReservedColumnNameError.new(RESERVED_COLUMN_NAMES)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module CSVParty
|
2
|
+
class DataPreparer
|
3
|
+
def initialize(options)
|
4
|
+
@options = options
|
5
|
+
end
|
6
|
+
|
7
|
+
def prepare
|
8
|
+
raise_unless_csv_is_present!
|
9
|
+
|
10
|
+
data = if @options.has_key?(:path)
|
11
|
+
open_csv_path(@options[:path])
|
12
|
+
elsif @options.has_key?(:file)
|
13
|
+
@options[:file]
|
14
|
+
elsif @options.has_key?(:content)
|
15
|
+
@options[:content]
|
16
|
+
end
|
17
|
+
options = extract_csv_options.merge(headers: true)
|
18
|
+
CSV.new(data, options)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def open_csv_path(path)
|
24
|
+
raise NonexistentCSVFileError.new(path) unless File.file?(path)
|
25
|
+
|
26
|
+
if @options.has_key?(:encoding)
|
27
|
+
File.open(path, "r:#{@options[:encoding]}")
|
28
|
+
else
|
29
|
+
File.open(path)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_csv_options
|
34
|
+
@options.select do |option, _value|
|
35
|
+
CSV_OPTIONS.include?(option)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def raise_unless_csv_is_present!
|
40
|
+
return if DATA_OPTIONS.any? { |option| @options.has_key?(option) }
|
41
|
+
|
42
|
+
raise MissingCSVError.new(self)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|