csv_party 0.0.1.pre9 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.md +21 -0
- data/README.md +218 -0
- data/ROADMAP.md +271 -0
- data/lib/csv_party.rb +45 -275
- data/lib/csv_party/configuration.rb +82 -0
- data/lib/csv_party/data_preparer.rb +45 -0
- data/lib/csv_party/dsl.rb +38 -0
- data/lib/csv_party/errors.rb +157 -0
- data/lib/csv_party/parsers.rb +71 -0
- data/lib/csv_party/row.rb +83 -0
- data/lib/csv_party/runner.rb +219 -0
- data/lib/csv_party/testing.rb +6 -0
- metadata +14 -3
@@ -0,0 +1,38 @@
|
|
1
|
+
module CSVParty
|
2
|
+
module DSL
|
3
|
+
def column(column, options = {}, &block)
|
4
|
+
config.add_column(column, options, &block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def rows(&block)
|
8
|
+
config.row_importer = block
|
9
|
+
end
|
10
|
+
|
11
|
+
def import(&block)
|
12
|
+
config.file_importer = block
|
13
|
+
end
|
14
|
+
|
15
|
+
def errors(setting = nil, &block)
|
16
|
+
config.error_handler = setting || block
|
17
|
+
end
|
18
|
+
|
19
|
+
def skipped_rows(setting = nil, &block)
|
20
|
+
config.skipped_row_handler = setting || block
|
21
|
+
end
|
22
|
+
|
23
|
+
def aborted_rows(setting = nil, &block)
|
24
|
+
config.aborted_row_handler = setting || block
|
25
|
+
end
|
26
|
+
|
27
|
+
def depends_on(*args)
|
28
|
+
config.add_dependency(*args)
|
29
|
+
args.each do |arg|
|
30
|
+
attr_accessor arg
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def config
|
35
|
+
@config ||= Configuration.new
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
module CSVParty
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class UnknownParserError < Error
|
6
|
+
def initialize(column, parser, named_parsers)
|
7
|
+
parser = parser.to_s.gsub('parse_', '')
|
8
|
+
parsers = named_parsers.map { |p| p.to_s.gsub('parse_', '') }
|
9
|
+
|
10
|
+
super <<-MESSAGE
|
11
|
+
You're trying to use the :#{parser} parser for the :#{column} column, but it
|
12
|
+
doesn't exist. Available parsers are: :#{parsers.join(', :')}.
|
13
|
+
MESSAGE
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class MissingCSVError < Error
|
18
|
+
def initialize(importer)
|
19
|
+
super <<-MESSAGE
|
20
|
+
You must specify a file path, IO object, or string to import:
|
21
|
+
|
22
|
+
# File path, IO object, or string
|
23
|
+
csv = 'path/to/csv'
|
24
|
+
csv = File.open('path/to/csv')
|
25
|
+
csv = 'Header1,Header2\\nvalue1,value2\\n'
|
26
|
+
|
27
|
+
Then, you assign that to your importer one of two ways:
|
28
|
+
|
29
|
+
importer = #{importer.class.name}.new(csv)
|
30
|
+
# or
|
31
|
+
importer = #{importer.class.name}.new
|
32
|
+
importer.csv = csv
|
33
|
+
MESSAGE
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class NonexistentCSVFileError < Error
|
38
|
+
def initialize(file_path)
|
39
|
+
super <<-MESSAGE
|
40
|
+
The CSV file you are trying to import was not found:
|
41
|
+
|
42
|
+
#{file_path}
|
43
|
+
|
44
|
+
NOTE: If you are trying to import a single line CSV string, please ensure that
|
45
|
+
your CSV string has a header row and at least one row of values. Single line
|
46
|
+
strings are assumed to be a path to a CSV file.
|
47
|
+
MESSAGE
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class UnrecognizedOptionsError < Error
|
52
|
+
def initialize(unrecognized_options, valid_data_options, valid_csv_options, dependencies)
|
53
|
+
@unrecognized_options = unrecognized_options
|
54
|
+
@valid_data_options = valid_data_options
|
55
|
+
@valid_csv_options = valid_csv_options
|
56
|
+
@dependencies = dependencies
|
57
|
+
|
58
|
+
super csv_and_data_options_message + dependency_options_message
|
59
|
+
end
|
60
|
+
|
61
|
+
def csv_and_data_options_message
|
62
|
+
<<-MESSAGE
|
63
|
+
The following options are not recognized: :#{@unrecognized_options.join(', :')}.
|
64
|
+
You can specify your CSV data via the :path, :file, or :content options, as well
|
65
|
+
as any options that the CSV library understands:
|
66
|
+
|
67
|
+
:#{@valid_csv_options.join("\n :")}
|
68
|
+
MESSAGE
|
69
|
+
end
|
70
|
+
|
71
|
+
def dependency_options_message
|
72
|
+
return '' unless @dependencies.any?
|
73
|
+
|
74
|
+
<<-MESSAGE
|
75
|
+
|
76
|
+
Or assignments for dependencies:
|
77
|
+
|
78
|
+
:#{@dependencies.join("\n :")}
|
79
|
+
MESSAGE
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class DuplicateColumnError < Error
|
84
|
+
def initialize(name)
|
85
|
+
super <<-MESSAGE
|
86
|
+
A column named :#{name} has already been defined, please choose a different name.
|
87
|
+
MESSAGE
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class ReservedColumnNameError < Error
|
92
|
+
def initialize(reserved_column_names)
|
93
|
+
super <<-MESSAGE
|
94
|
+
The following column names are reserved for interal use, please use a different
|
95
|
+
column name: :#{reserved_column_names.join(', :')}.
|
96
|
+
MESSAGE
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class MissingColumnError < Error
|
101
|
+
def initialize(headers, missing_columns)
|
102
|
+
columns = missing_columns.join("', '")
|
103
|
+
super <<-MESSAGE
|
104
|
+
The CSV is missing column(s) with header(s) '#{columns}'. File has these
|
105
|
+
headers: #{headers.join(', ')}.
|
106
|
+
MESSAGE
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class UndefinedRowProcessorError < Error
|
111
|
+
def initialize
|
112
|
+
super <<-MESSAGE
|
113
|
+
Your importer has to define a row processor which specifies what should be done
|
114
|
+
with each row. It should look something like this:
|
115
|
+
|
116
|
+
rows do |row|
|
117
|
+
row.column # access parsed column values
|
118
|
+
row.unparsed.column # access unparsed column values
|
119
|
+
end
|
120
|
+
MESSAGE
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class MissingDependencyError < Error
|
125
|
+
def initialize(importer, dependency)
|
126
|
+
super <<-MESSAGE
|
127
|
+
This importer depends on #{dependency}, but you didn't assign it.
|
128
|
+
You can do that when instantiating your importer:
|
129
|
+
|
130
|
+
#{importer.class.name}.new('path/to/csv', #{dependency}: #{dependency})
|
131
|
+
|
132
|
+
Or any time before you import:
|
133
|
+
|
134
|
+
importer = #{importer.class.name}.new('path/to/csv')
|
135
|
+
importer.#{dependency} = #{dependency}
|
136
|
+
importer.import!
|
137
|
+
MESSAGE
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class UnimportedRowsError < Error
|
142
|
+
def initialize
|
143
|
+
super <<-MESSAGE
|
144
|
+
The rows in your CSV file have not been imported.
|
145
|
+
|
146
|
+
You should include a call to import_rows! at the point in your import block
|
147
|
+
where you want them to be imported. It should should look something like this:
|
148
|
+
|
149
|
+
import do
|
150
|
+
# do stuff before importing rows
|
151
|
+
import_rows!
|
152
|
+
# do stuff after importing rows
|
153
|
+
end
|
154
|
+
MESSAGE
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module CSVParty
|
2
|
+
module Parsers
|
3
|
+
def parse_raw(value)
|
4
|
+
value
|
5
|
+
end
|
6
|
+
|
7
|
+
def parse_string(value)
|
8
|
+
value.to_s.strip
|
9
|
+
end
|
10
|
+
|
11
|
+
def parse_boolean(value)
|
12
|
+
value = value.to_s.strip.downcase
|
13
|
+
|
14
|
+
if %w[1 t true].include? value
|
15
|
+
true
|
16
|
+
elsif %w[0 f false].include? value
|
17
|
+
false
|
18
|
+
else
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_integer(value)
|
24
|
+
prepare_numeric_value(value).to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_decimal(value)
|
28
|
+
BigDecimal.new(prepare_numeric_value(value))
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_date(value, format = nil)
|
32
|
+
if format.nil?
|
33
|
+
Date.parse(value)
|
34
|
+
else
|
35
|
+
Date.strptime(value, format)
|
36
|
+
end
|
37
|
+
rescue ArgumentError
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_time(value, format = nil)
|
42
|
+
if format.nil?
|
43
|
+
DateTime.parse(value).to_time
|
44
|
+
else
|
45
|
+
DateTime.strptime(value, format).to_time
|
46
|
+
end
|
47
|
+
rescue ArgumentError
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def prepare_numeric_value(value)
|
54
|
+
value = value.to_s.strip
|
55
|
+
value = convert_from_accounting_notation(value)
|
56
|
+
strip_non_numeric_characters(value)
|
57
|
+
end
|
58
|
+
|
59
|
+
def convert_from_accounting_notation(value)
|
60
|
+
if value =~ /\A\(.*\)\z/
|
61
|
+
value.delete('()').insert(0, '-')
|
62
|
+
else
|
63
|
+
value
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def strip_non_numeric_characters(value)
|
68
|
+
value.gsub(/[^\-0-9.]/, '')
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module CSVParty
|
2
|
+
class Row
|
3
|
+
attr_accessor :row_number, :csv_string, :unparsed
|
4
|
+
|
5
|
+
def initialize(csv_row, config, runner)
|
6
|
+
@csv_row = csv_row
|
7
|
+
@config = config
|
8
|
+
@runner = runner
|
9
|
+
@attributes = OpenStruct.new
|
10
|
+
parse_row!(csv_row)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def parse_row!(csv_row)
|
16
|
+
self.csv_string = csv_row.to_csv
|
17
|
+
self.unparsed = extract_unparsed_values(csv_row)
|
18
|
+
|
19
|
+
@config.columns.each do |column, options|
|
20
|
+
header = options[:header]
|
21
|
+
value = csv_row[header]
|
22
|
+
@attributes[column] = parse_value(value, options)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract_unparsed_values(csv_row)
|
27
|
+
unparsed_row = OpenStruct.new
|
28
|
+
config.columns.each do |column, options|
|
29
|
+
header = options[:header]
|
30
|
+
unparsed_row[column] = csv_row[header]
|
31
|
+
end
|
32
|
+
|
33
|
+
return unparsed_row
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_value(value, options)
|
37
|
+
return nil if options[:intercept_blanks] && is_blank?(value)
|
38
|
+
|
39
|
+
parser = options[:parser]
|
40
|
+
|
41
|
+
if parser.is_a?(Symbol)
|
42
|
+
parse_with_method(value, options)
|
43
|
+
else
|
44
|
+
parse_with_block(value, options)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_with_method(value, options)
|
49
|
+
format = options[:format]
|
50
|
+
parser = options[:parser]
|
51
|
+
|
52
|
+
if format.nil?
|
53
|
+
send(parser, value)
|
54
|
+
else
|
55
|
+
send(parser, value, format)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_with_block(value, options)
|
60
|
+
parser = options[:parser]
|
61
|
+
|
62
|
+
instance_exec(value, &parser)
|
63
|
+
end
|
64
|
+
|
65
|
+
def is_blank?(value)
|
66
|
+
value.nil? || value.strip.empty?
|
67
|
+
end
|
68
|
+
|
69
|
+
def respond_to_missing?(method, _include_private)
|
70
|
+
@attributes.respond_to?(method) || @runner.respond_to?(method, true)
|
71
|
+
end
|
72
|
+
|
73
|
+
def method_missing(method, *args)
|
74
|
+
if @attributes.respond_to?(method)
|
75
|
+
@attributes.send(method, *args)
|
76
|
+
elsif @runner.respond_to?(method, true)
|
77
|
+
@runner.send(method, *args)
|
78
|
+
else
|
79
|
+
super
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
require 'csv_party/parsers'
|
2
|
+
|
3
|
+
module CSVParty
|
4
|
+
class Runner
|
5
|
+
include Parsers
|
6
|
+
|
7
|
+
attr_accessor :csv, :config, :importer
|
8
|
+
|
9
|
+
def initialize(csv, config, importer)
|
10
|
+
self.csv = csv
|
11
|
+
self.config = config
|
12
|
+
self.importer = importer
|
13
|
+
@_rows_have_been_imported = false
|
14
|
+
@_current_row_number = 1
|
15
|
+
end
|
16
|
+
|
17
|
+
def import!
|
18
|
+
raise_unless_row_processor_is_defined!
|
19
|
+
raise_unless_all_named_parsers_exist!
|
20
|
+
raise_unless_all_dependencies_are_present!
|
21
|
+
initialize_csv!
|
22
|
+
initialize_regex_headers!
|
23
|
+
raise_unless_csv_has_all_columns!
|
24
|
+
|
25
|
+
if config.file_importer
|
26
|
+
instance_exec(&config.file_importer)
|
27
|
+
raise_unless_rows_have_been_imported!
|
28
|
+
else
|
29
|
+
import_rows!
|
30
|
+
end
|
31
|
+
|
32
|
+
return true
|
33
|
+
rescue AbortedImportError => error
|
34
|
+
importer.aborted = true
|
35
|
+
importer.abort_message = error.message
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
|
39
|
+
def present_columns
|
40
|
+
@_headers
|
41
|
+
end
|
42
|
+
|
43
|
+
def missing_columns
|
44
|
+
config.required_columns - present_columns
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def initialize_csv!
|
50
|
+
csv.shift
|
51
|
+
@_headers = csv.headers
|
52
|
+
csv.rewind
|
53
|
+
end
|
54
|
+
|
55
|
+
def import_rows!
|
56
|
+
loop do
|
57
|
+
begin
|
58
|
+
row = csv.shift
|
59
|
+
break unless row
|
60
|
+
import_row!(row)
|
61
|
+
rescue NextRowError
|
62
|
+
next
|
63
|
+
rescue SkippedRowError => error
|
64
|
+
handle_skipped_row(error)
|
65
|
+
rescue AbortedRowError => error
|
66
|
+
handle_aborted_row(error)
|
67
|
+
rescue AbortedImportError
|
68
|
+
raise
|
69
|
+
rescue CSV::MalformedCSVError
|
70
|
+
raise
|
71
|
+
rescue StandardError => error
|
72
|
+
handle_error(error, @_current_row_number, row.to_csv)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
@_rows_have_been_imported = true
|
77
|
+
end
|
78
|
+
|
79
|
+
def import_row!(csv_row)
|
80
|
+
@_current_row_number += 1
|
81
|
+
@_current_parsed_row = Row.new(csv_row, config, self)
|
82
|
+
@_current_parsed_row.row_number = @_current_row_number
|
83
|
+
instance_exec(@_current_parsed_row, &config.row_importer)
|
84
|
+
end
|
85
|
+
|
86
|
+
def next_row!
|
87
|
+
raise NextRowError
|
88
|
+
end
|
89
|
+
|
90
|
+
def skip_row!(message = nil)
|
91
|
+
raise SkippedRowError, message
|
92
|
+
end
|
93
|
+
|
94
|
+
def abort_row!(message = nil)
|
95
|
+
raise AbortedRowError, message
|
96
|
+
end
|
97
|
+
|
98
|
+
def abort_import!(message)
|
99
|
+
raise AbortedImportError, message
|
100
|
+
end
|
101
|
+
|
102
|
+
def handle_error(error, line_number, csv_string)
|
103
|
+
raise error unless config.error_handler
|
104
|
+
|
105
|
+
if config.error_handler == :ignore
|
106
|
+
error_rows << error_struct(error, line_number, csv_string)
|
107
|
+
else
|
108
|
+
instance_exec(error, line_number, csv_string, &config.error_handler)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def handle_skipped_row(error)
|
113
|
+
return if config.skipped_row_handler == :ignore
|
114
|
+
|
115
|
+
@_current_parsed_row[:skip_message] = error.message
|
116
|
+
|
117
|
+
if config.skipped_row_handler.nil?
|
118
|
+
importer.skipped_rows << @_current_parsed_row
|
119
|
+
else
|
120
|
+
instance_exec(@_current_parsed_row, &config.skipped_row_handler)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def handle_aborted_row(error)
|
125
|
+
return if config.aborted_row_handler == :ignore
|
126
|
+
|
127
|
+
@_current_parsed_row[:abort_message] = error.message
|
128
|
+
|
129
|
+
if config.aborted_row_handler.nil?
|
130
|
+
importer.aborted_rows << @_current_parsed_row
|
131
|
+
else
|
132
|
+
instance_exec(@_current_parsed_row, &config.aborted_row_handler)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def error_struct(error, line_number, csv_string)
|
137
|
+
Struct.new(:error, :line_number, :csv_string)
|
138
|
+
.new(error, line_number, csv_string)
|
139
|
+
end
|
140
|
+
|
141
|
+
def raise_unless_row_processor_is_defined!
|
142
|
+
return if config.row_importer
|
143
|
+
|
144
|
+
raise UndefinedRowProcessorError.new
|
145
|
+
end
|
146
|
+
|
147
|
+
def raise_unless_rows_have_been_imported!
|
148
|
+
return if @_rows_have_been_imported
|
149
|
+
|
150
|
+
raise UnimportedRowsError.new
|
151
|
+
end
|
152
|
+
|
153
|
+
def raise_unless_all_dependencies_are_present!
|
154
|
+
config.dependencies.each do |dependency|
|
155
|
+
next unless importer.send(dependency).nil?
|
156
|
+
|
157
|
+
raise MissingDependencyError.new(self, dependency)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# This error has to be raised at runtime because, when the class body
|
162
|
+
# is being executed, the parser methods won't be available unless
|
163
|
+
# they are defined above the column definitions in the class body
|
164
|
+
def raise_unless_all_named_parsers_exist!
|
165
|
+
config.columns_with_named_parsers.each do |name, options|
|
166
|
+
parser = options[:parser]
|
167
|
+
next if named_parsers.include? parser
|
168
|
+
|
169
|
+
raise UnknownParserError.new(name, parser, named_parsers)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def named_parsers
|
174
|
+
(methods +
|
175
|
+
private_methods +
|
176
|
+
importer.methods +
|
177
|
+
importer.private_methods).grep(/^parse_/)
|
178
|
+
end
|
179
|
+
|
180
|
+
def raise_unless_csv_has_all_columns!
|
181
|
+
return if missing_columns.empty?
|
182
|
+
|
183
|
+
raise MissingColumnError.new(present_columns, missing_columns)
|
184
|
+
end
|
185
|
+
|
186
|
+
def initialize_regex_headers!
|
187
|
+
config.columns_with_regex_headers.each do |name, options|
|
188
|
+
found_header = @_headers.find do |header|
|
189
|
+
options[:header].match(header)
|
190
|
+
end
|
191
|
+
options[:header] = found_header || name.to_s
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def respond_to_missing?(method, _include_private)
|
196
|
+
importer.respond_to?(method, true)
|
197
|
+
end
|
198
|
+
|
199
|
+
def method_missing(method, *args)
|
200
|
+
if importer.respond_to?(method, true)
|
201
|
+
importer.send(method, *args)
|
202
|
+
else
|
203
|
+
super
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
class NextRowError < Error
|
208
|
+
end
|
209
|
+
|
210
|
+
class SkippedRowError < Error
|
211
|
+
end
|
212
|
+
|
213
|
+
class AbortedRowError < Error
|
214
|
+
end
|
215
|
+
|
216
|
+
class AbortedImportError < Error
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|