iron-import 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +24 -3
- data/README.rdoc +78 -14
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +137 -49
- data/lib/iron/import/csv_reader.rb +2 -2
- data/lib/iron/import/data_reader.rb +8 -3
- data/lib/iron/import/error.rb +57 -6
- data/lib/iron/import/excel_reader.rb +1 -1
- data/lib/iron/import/html_reader.rb +1 -6
- data/lib/iron/import/importer.rb +166 -47
- data/lib/iron/import/row.rb +16 -1
- data/spec/importer/column_spec.rb +25 -0
- data/spec/importer/error_spec.rb +34 -0
- data/spec/importer/importer_spec.rb +119 -1
- data/spec/importer/row_spec.rb +5 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87cd90d663132748c61dfaa450449136f0cc00c4
|
4
|
+
data.tar.gz: e350419e3bdc6afb98b6a84b4fef8c9a4f1cd4be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1646f83be5af42715b1f71f9090aa0bf323bf9495d97d4abbb5543f85cb98f006fe9bd407823810c343e3dbc81a477da1445f399bcad9eca79108fd2869719f5
|
7
|
+
data.tar.gz: ba01474f1f4eebf7eaf2a23bc12824d0d57c00de61226ede66e1f1c9bd554d57161b8d811aeb2793cedc42f5a4b0bf8e9d34961c98f755aee5f09cfef1b81847
|
data/History.txt
CHANGED
@@ -1,10 +1,31 @@
|
|
1
|
+
== 0.8.0 / 2017-06-29
|
2
|
+
* Breaking Change: change signature of Importer#add_error to support new features
|
3
|
+
* Breaking Change: Importer.missing_headers will be [] instead of nil on all headers found
|
4
|
+
* Breaking Change: remove deprecated method Column#required!
|
5
|
+
* Add Importer#rows to directly access rows post-import
|
6
|
+
* Add Column#optional! to enable non-mandatory columns during header detection
|
7
|
+
* Add Column#present? and Column#missing? to test for presence after import attempt
|
8
|
+
* Add Importer#validate_columns to allow detecting invalid column combinations
|
9
|
+
* Add Importer#validate_rows to allow whole-row validation
|
10
|
+
* Add Importer#virtual_column and Column#calculate to enable virtual columns
|
11
|
+
* Track actual header text found for columns
|
12
|
+
* Update Column#to_s to use actual header text when present
|
13
|
+
* Revamp error tracking significantly to provide better reporting ability
|
14
|
+
* Improve error tracking to include row/column when knowable
|
15
|
+
* Improve error tracking to include problem values when knowable
|
16
|
+
* Add Column#error_values to return unique raw error values by column
|
17
|
+
* Add Column#error_values? which will be true when there is at least one error value for the column
|
18
|
+
* Add Row#error_map to return map of column key => raw value for each row
|
19
|
+
* Change Column#parse block processing to allow explicit #add_error call
|
20
|
+
* Change Column#validate block processing to allow explicit #add_error call and to add an implicit error on returned false
|
21
|
+
|
1
22
|
== 0.7.0 / 2017-02-16
|
2
23
|
|
3
24
|
* Breaking Change: Removed multi-sheet support - use multiple importers instead
|
4
|
-
* Breaking Change: Removed warnings as they were not being used
|
5
|
-
*
|
6
|
-
* Add Importer#scope to allow narrowing the search to one or more sheets/tables when importing
|
25
|
+
* Breaking Change: Removed warnings as they were confusing and not being used
|
26
|
+
* Deprecate Column#required! due to bugginess and overlap with Column#validate
|
7
27
|
* Add new HtmlReader support to handle parsing HTML <table> rows
|
28
|
+
* Add Importer#scope to allow narrowing the search to one or more sheets/tables when importing
|
8
29
|
* Modify Importer#import to support block mode combining #import and #process
|
9
30
|
* Add Importer#import_string for handling explicit CSV/HTML/Custom text
|
10
31
|
* Add Importer#on_error(&block) to allow inline conditional error handling
|
data/README.rdoc
CHANGED
@@ -4,11 +4,17 @@ Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
|
|
4
4
|
|
5
5
|
== DESCRIPTION
|
6
6
|
|
7
|
-
Simple, reliable tabular data import.
|
7
|
+
Simple, versatile, reliable tabular data import.
|
8
8
|
|
9
9
|
This gem provides a set of classes to support automating import of tabular data from
|
10
|
-
CSV, HTML, XLS and XLSX files
|
11
|
-
|
10
|
+
CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
|
11
|
+
pre-parsing data, validating data, filtering rows, and robust error tracking.
|
12
|
+
|
13
|
+
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
14
|
+
for the task. Breaking changes will be noted by increases in the minor version,
|
15
|
+
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
16
|
+
|
17
|
+
== WHO IS THIS FOR?
|
12
18
|
|
13
19
|
The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
|
14
20
|
However, using them with unreliable user submitted data requires a lot of error checking,
|
@@ -17,21 +23,36 @@ businesses, where Excel files are the lingua franca for all kinds of uses. This
|
|
17
23
|
attempts to extract years of experience building one-off importers into a simple library
|
18
24
|
for rapid import coding.
|
19
25
|
|
26
|
+
In addition, it's quite common for the same data to be transmitted in varying formats -
|
27
|
+
Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
|
28
|
+
tool-set for processing any of these types of data, often without changing a line of code.
|
29
|
+
|
20
30
|
This is NOT a general-purpose tool for reading spreadsheets. If you want access to
|
21
31
|
cell styling, reading underlying formulas, etc., you will be better served building
|
22
|
-
a custom importer based on Roo. But if you're looking to take
|
32
|
+
a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
|
23
33
|
validate and coerce values, then write each row to a database, all the while tracking
|
24
34
|
any errors encountered... well, this is the library for you!
|
25
35
|
|
26
|
-
|
27
|
-
|
28
|
-
|
36
|
+
== KEY FEATURES
|
37
|
+
|
38
|
+
- Simple yet robust data import and error handling using elegant builder syntax
|
39
|
+
- Import data from file, stream or string data sources
|
40
|
+
- Import XLS, XLSX, CSV and HTML tabular data
|
41
|
+
- Import custom tabular data via passed block
|
42
|
+
- Automatic column order and start row detection
|
43
|
+
- Support for optional columns and dynamic column sets
|
44
|
+
- Basic data coercion supporting string, int, float, date and cents types
|
45
|
+
- Custom data coercion via passed block
|
46
|
+
- Custom data validation via passed block
|
47
|
+
- Row filtering using custom block
|
48
|
+
- Automatically track and report errors with fine-grained context
|
49
|
+
- Prefer capturing errors over raising exceptions for more robust imports
|
29
50
|
|
30
51
|
== SAMPLE USAGE
|
31
52
|
|
32
53
|
# Define our importer, with three columns. The importer will look for a row containing
|
33
54
|
# "name"/"product", "description" and "price" (case insensitively) and automatically determine column
|
34
|
-
# order and starting row of the data.
|
55
|
+
# order and the starting row of the data.
|
35
56
|
importer = Importer.build do
|
36
57
|
column :name do
|
37
58
|
# Column order and start row are auto-detected
|
@@ -42,9 +63,9 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
42
63
|
parse do |raw_val|
|
43
64
|
raw_val.to_s.strip
|
44
65
|
end
|
45
|
-
# And
|
66
|
+
# And custom validation
|
46
67
|
validate do |parsed_val|
|
47
|
-
|
68
|
+
add_error('Invalid description') unless parsed_val.length > 5
|
48
69
|
end
|
49
70
|
end
|
50
71
|
column :price do
|
@@ -52,13 +73,13 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
52
73
|
type :cents
|
53
74
|
end
|
54
75
|
|
55
|
-
# Need to skip rows? Use a filter!
|
56
|
-
|
76
|
+
# Need to skip rows? Use a filter! Return true to include a row when processing
|
77
|
+
filter_rows do |row|
|
57
78
|
row[:price] != 0 && row[:name] != 'Sample'
|
58
79
|
end
|
59
80
|
end
|
60
81
|
|
61
|
-
# Import the provided file row-by-row (if importing succeeds), automatically
|
82
|
+
# Import the provided file or stream row-by-row (if importing succeeds), automatically
|
62
83
|
# using the proper library to read CSV data. This same code would work
|
63
84
|
# with XLS or XLSX files with no changes to the code.
|
64
85
|
importer.import('/tmp/source.csv') do |row|
|
@@ -67,9 +88,52 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
67
88
|
|
68
89
|
# Check for errors and do the right thing:
|
69
90
|
importer.on_error do
|
70
|
-
|
91
|
+
if missing_headers.any?
|
92
|
+
# Can't find required column header(s)
|
93
|
+
puts "Unable to locate columns: #{missing_headers}"
|
94
|
+
|
95
|
+
elsif columns.any?(&:error_values?)
|
96
|
+
# Invalid or unexpected values in one or more columns
|
97
|
+
columns.select(&:error_values?).each do |col|
|
98
|
+
puts "Invalid values for #{col}: #{col.error_values}"
|
99
|
+
end
|
100
|
+
|
101
|
+
else
|
102
|
+
# General errors, dump report
|
103
|
+
puts "Error(s) on import: " + error_summary
|
104
|
+
end
|
71
105
|
end
|
72
106
|
|
107
|
+
# You can chain the build/import/on-error blocks for a cleaner flow:
|
108
|
+
Importer.build do
|
109
|
+
column :one
|
110
|
+
column :two
|
111
|
+
end.import(params[:uploaded_file]) do |row|
|
112
|
+
SomeModel.create(row)
|
113
|
+
end.on_error do
|
114
|
+
raise "Errors found: " + error_summary
|
115
|
+
end
|
116
|
+
|
117
|
+
== IMPORT EXECUTION ORDER
|
118
|
+
|
119
|
+
It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
|
120
|
+
|
121
|
+
- Determine the **format** of stream/file to import
|
122
|
+
- Determine **import scope** (sheet/table/whatever) using Importer#scope settings, if any
|
123
|
+
- **Find column headers + start row**
|
124
|
+
- Validate presence of **required columns**
|
125
|
+
- **Validate column set** using Importer#validate_columns
|
126
|
+
- Run each row:
|
127
|
+
- **Parse** each column's value using Column#parse or Column#type
|
128
|
+
- **Filter the row** using Importer#filter_rows on parsed values to reject unwanted rows
|
129
|
+
- **Calculate virtual columns** using Column#calculate
|
130
|
+
- **Validate each parsed value** using Column#validate
|
131
|
+
- **Validate entire row** using Importer#validate_rows
|
132
|
+
|
133
|
+
Generally, the import will stop when an error occurs, save on row processing, where each row will
|
134
|
+
be run until an error for that row is found. The goal is to accumulate actionable info for
|
135
|
+
presentation to the end user who is uploading the file.
|
136
|
+
|
73
137
|
== REQUIREMENTS
|
74
138
|
|
75
139
|
Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.8.0
|
data/lib/iron/import/column.rb
CHANGED
@@ -8,13 +8,17 @@ class Importer
|
|
8
8
|
#
|
9
9
|
# Importer.build do
|
10
10
|
# column :key do
|
11
|
+
# # Mark this column as optional, i.e. if the header isn't found, the import will
|
12
|
+
# # work without error and the imported row will simply not contain this column's data.
|
13
|
+
# optional!
|
14
|
+
#
|
11
15
|
# # Set a fixed position - may be a column number or a letter-based
|
12
16
|
# # column description, ie 'A' == 1. In most cases, you can leave
|
13
17
|
# # this defaulted to nil, which will mean "look for the proper header"
|
14
18
|
# position 'C'
|
15
19
|
#
|
16
20
|
# # Specify a regex to locate the header for this column, defaults to
|
17
|
-
# # finding a string containing the key.
|
21
|
+
# # finding a string containing the key, ignored if position is set.
|
18
22
|
# header /(price|cost)/i
|
19
23
|
#
|
20
24
|
# # Tells the data parser what type of data this column contains, one
|
@@ -24,7 +28,8 @@ class Importer
|
|
24
28
|
# # Instead of a type, you can set an explicit parse block. Be aware
|
25
29
|
# # that different source types may give you different raw values for what
|
26
30
|
# # seems like the "same" source value, for example an Excel source file
|
27
|
-
# # will give you a float value for all numeric types, even "integers"
|
31
|
+
# # will give you a float value for all numeric types, even "integers", while
|
32
|
+
# # CSV and HTML values are always strings.
|
28
33
|
# parse do |raw_value|
|
29
34
|
# val = raw_value.to_i + 1000
|
30
35
|
# # NOTE: we're in a block, so don't do this:
|
@@ -35,9 +40,20 @@ class Importer
|
|
35
40
|
#
|
36
41
|
# # You can also add a custom validator to check the value and add
|
37
42
|
# # an error if it's not within a given range, or whatever. To fail validation,
|
38
|
-
# #
|
39
|
-
# validate do |parsed_value|
|
40
|
-
#
|
43
|
+
# # return false, raise an exception, or use #add_error
|
44
|
+
# validate do |parsed_value, row|
|
45
|
+
# add_error "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# # Mark a column as _virtual_, meaning it won't be looked for in the source
|
49
|
+
# # file/stream, and instead will be calculated using #calculate. When set,
|
50
|
+
# # causes importer to ignore position/header/type/parse settings.
|
51
|
+
# virtual!
|
52
|
+
#
|
53
|
+
# # When #virtual! is set, gets called to calculate each row's value for this
|
54
|
+
# # column using the row's parsed values.
|
55
|
+
# calculate do |row|
|
56
|
+
# row[:some_col] + 5
|
41
57
|
# end
|
42
58
|
# end
|
43
59
|
# end
|
@@ -46,10 +62,14 @@ class Importer
|
|
46
62
|
|
47
63
|
# Holds load-time data
|
48
64
|
class Data
|
49
|
-
attr_accessor :index
|
65
|
+
attr_accessor :index, :header_text, :errors
|
66
|
+
|
67
|
+
def initialize
|
68
|
+
@errors = []
|
69
|
+
end
|
50
70
|
|
51
71
|
def pos
|
52
|
-
@index ? Column::index_to_pos(@index) : '
|
72
|
+
@index ? Column::index_to_pos(@index) : 'Not Found'
|
53
73
|
end
|
54
74
|
end
|
55
75
|
|
@@ -59,7 +79,8 @@ class Importer
|
|
59
79
|
|
60
80
|
# Configuration
|
61
81
|
dsl_accessor :header, :position, :type
|
62
|
-
dsl_accessor :parse, :validate
|
82
|
+
dsl_accessor :parse, :validate, :calculate
|
83
|
+
dsl_flag :optional, :virtual
|
63
84
|
|
64
85
|
def self.pos_to_index(pos)
|
65
86
|
raise 'Invalid column position: ' + pos.inspect unless pos.is_a?(String) && pos.match(/\A[a-z]{1,3}\z/i)
|
@@ -73,6 +94,7 @@ class Importer
|
|
73
94
|
total - 1
|
74
95
|
end
|
75
96
|
|
97
|
+
# Convert a numeric index to an Excel-like column position, e.g. 3 => 'C'
|
76
98
|
def self.index_to_pos(index)
|
77
99
|
val = index.to_i
|
78
100
|
raise 'Invalid column index: ' + index.inspect if (!index.is_a?(Fixnum) || index.to_i < 0)
|
@@ -95,6 +117,12 @@ class Importer
|
|
95
117
|
# Save off our info
|
96
118
|
@key = key
|
97
119
|
@importer = importer
|
120
|
+
|
121
|
+
# Are we optional?
|
122
|
+
@optional = options_hash.delete(:optional) { false }
|
123
|
+
|
124
|
+
# Are we virtual?
|
125
|
+
@virtual = options_hash.delete(:virtual) { false }
|
98
126
|
|
99
127
|
# Return it as a string, by default
|
100
128
|
@type = options_hash.delete(:type) { :string }
|
@@ -105,8 +133,14 @@ class Importer
|
|
105
133
|
# By default, don't parse incoming data, just pass it through
|
106
134
|
@parse = options_hash.delete(:parse)
|
107
135
|
|
136
|
+
# Custom validation, anyone?
|
137
|
+
@validate = options_hash.delete(:validate)
|
138
|
+
|
139
|
+
# Custom validation, anyone?
|
140
|
+
@calculate = options_hash.delete(:calculate)
|
141
|
+
|
108
142
|
# Default matcher, looks for the presence of the column key as text anywhere
|
109
|
-
# in the header string, ignoring case and
|
143
|
+
# in the header string, ignoring case and treating underscores as spaces, ie
|
110
144
|
# :order_id => /\A\s*order id\s*\z/i
|
111
145
|
@header = options_hash.delete(:header) {
|
112
146
|
Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
|
@@ -126,22 +160,10 @@ class Importer
|
|
126
160
|
@data = Data.new
|
127
161
|
end
|
128
162
|
|
129
|
-
# DEPRECATED - duplicates functionality better provided by #validate, e.g.
|
130
|
-
#
|
131
|
-
# validate do |val|
|
132
|
-
# raise 'Missing required value for column foo' if val.nil?
|
133
|
-
# end
|
134
|
-
def required!
|
135
|
-
Kernel.warn "[DEPRECATION] Importer::Column#required! is deprecated. Please use #validate instead."
|
136
|
-
col = self.key
|
137
|
-
validate do |val|
|
138
|
-
raise "Missing required value for column :#{col}"
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
163
|
# When true, our header definition or index match the passed text or column index.
|
143
|
-
def match_header?(text,
|
144
|
-
return
|
164
|
+
def match_header?(text, test_index)
|
165
|
+
return false if virtual?
|
166
|
+
return true if test_index == self.fixed_index
|
145
167
|
if @header.is_a?(Regexp)
|
146
168
|
return !@header.match(text).nil?
|
147
169
|
else
|
@@ -149,34 +171,11 @@ class Importer
|
|
149
171
|
end
|
150
172
|
end
|
151
173
|
|
152
|
-
# Applies any custom parser defined to process the given value, capturing
|
153
|
-
# errors as needed
|
154
|
-
def parse_value(row, val)
|
155
|
-
return val if @parse.nil?
|
156
|
-
begin
|
157
|
-
@parse.call(val)
|
158
|
-
rescue Exception => e
|
159
|
-
@importer.add_error(row, "Error parsing #{self}: #{e}")
|
160
|
-
nil
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
# Applies any validation to a parsed value
|
165
|
-
def validate_value(row, val)
|
166
|
-
return true unless @validate
|
167
|
-
begin
|
168
|
-
@validate.call(val)
|
169
|
-
true
|
170
|
-
rescue Exception => e
|
171
|
-
@importer.add_error(row, "Validation error in #{self}: #{e}")
|
172
|
-
false
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
174
|
# Returns the fixed index of this column based on the set position.
|
177
175
|
# In other words, a position of 2 would return an index of 1 (as
|
178
176
|
# indicies are 0-based), where a position of 'C' would return 2.
|
179
177
|
def fixed_index
|
178
|
+
return nil if virtual?
|
180
179
|
return nil unless @position
|
181
180
|
if @position.is_a?(Fixnum)
|
182
181
|
@position - 1
|
@@ -185,9 +184,98 @@ class Importer
|
|
185
184
|
end
|
186
185
|
end
|
187
186
|
|
187
|
+
# Applies any custom parser defined to process the given value, capturing
|
188
|
+
# errors as needed
|
189
|
+
def parse_value(row, raw_val)
|
190
|
+
return raw_val if @parse.nil?
|
191
|
+
|
192
|
+
res = nil
|
193
|
+
had_error = Error.with_context(@importer, row, self, raw_val) do
|
194
|
+
res = DslProxy.exec(@importer, raw_val, &@parse)
|
195
|
+
end
|
196
|
+
had_error ? nil : res
|
197
|
+
end
|
198
|
+
|
199
|
+
def calculate_value(row)
|
200
|
+
return nil if @calculate.nil?
|
201
|
+
res = nil
|
202
|
+
had_error = Error.with_context(@importer, row, self, nil) do
|
203
|
+
res = DslProxy.exec(@importer, row, &@calculate)
|
204
|
+
end
|
205
|
+
had_error ? nil : res
|
206
|
+
end
|
207
|
+
|
208
|
+
# Applies any validation to a parsed value
|
209
|
+
def validate_value(row, parsed_val)
|
210
|
+
return true unless @validate
|
211
|
+
|
212
|
+
valid = false
|
213
|
+
had_error = Error.with_context(@importer, row, self, parsed_val) do
|
214
|
+
valid = DslProxy.exec(@importer, parsed_val, row, &@validate)
|
215
|
+
end
|
216
|
+
if had_error
|
217
|
+
return false
|
218
|
+
elsif valid.is_a?(FalseClass)
|
219
|
+
@importer.add_error("Invalid value: #{parsed_val.inspect}", :row => row, :column => self, :value => parsed_val)
|
220
|
+
return false
|
221
|
+
else
|
222
|
+
return true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Index of the column in the most recent import, if found, or
|
227
|
+
# nil if not present.
|
228
|
+
def index
|
229
|
+
@data.index
|
230
|
+
end
|
231
|
+
|
232
|
+
# When true, column was found in the last import, eg:
|
233
|
+
#
|
234
|
+
# importer.process do |row|
|
235
|
+
# puts "Size: #{row[:size]}" if column(:size).present?
|
236
|
+
# end
|
237
|
+
def present?
|
238
|
+
!@data.index.nil?
|
239
|
+
end
|
240
|
+
|
241
|
+
# Sugar, simply the opposite of #present?
|
242
|
+
def missing?
|
243
|
+
!present?
|
244
|
+
end
|
245
|
+
|
246
|
+
def parses?
|
247
|
+
!@parse.nil?
|
248
|
+
end
|
249
|
+
|
250
|
+
def validates?
|
251
|
+
!@validate.nil?
|
252
|
+
end
|
253
|
+
|
254
|
+
def calculates?
|
255
|
+
!@calculate.nil?
|
256
|
+
end
|
257
|
+
|
258
|
+
def errors
|
259
|
+
@data.errors
|
260
|
+
end
|
261
|
+
|
262
|
+
def error_values
|
263
|
+
errors.collect(&:value).uniq
|
264
|
+
end
|
265
|
+
|
266
|
+
def error_values?
|
267
|
+
error_values.any?
|
268
|
+
end
|
269
|
+
|
188
270
|
# Pretty name for ourselves
|
189
271
|
def to_s
|
190
|
-
|
272
|
+
if !virtual? && @data.header_text.blank?
|
273
|
+
"Column #{@data.pos}"
|
274
|
+
else
|
275
|
+
name = virtual? ? key.to_s : @data.header_text
|
276
|
+
name = name.gsub(/(^[a-z]|\s[a-z])/) {|m| m.capitalize }
|
277
|
+
"#{name} Column"
|
278
|
+
end
|
191
279
|
end
|
192
280
|
|
193
281
|
# Extracts the imported values for this column and returns them in an array.
|
@@ -30,9 +30,9 @@ class Importer
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
# Normally, we'd check the scopes and return the proper data, but for CSV files,
|
34
|
-
# there's only one scope...
|
35
33
|
def load_raw(scopes, &block)
|
34
|
+
# Normally, we'd check the scopes and return the proper data, but for CSV files,
|
35
|
+
# there's only one scope...
|
36
36
|
block.call(@raw_rows)
|
37
37
|
end
|
38
38
|
|
@@ -182,8 +182,14 @@ class Importer
|
|
182
182
|
!@importer.has_errors?
|
183
183
|
end
|
184
184
|
|
185
|
-
# Load up the
|
185
|
+
# Load up the sheet in the correct mode
|
186
186
|
def load_each(mode, source, scopes, &block)
|
187
|
+
# Handle some common error cases centrally
|
188
|
+
if mode == :file && !File.exist?(source)
|
189
|
+
add_error("File not found: #{source}")
|
190
|
+
return
|
191
|
+
end
|
192
|
+
|
187
193
|
# Let our derived classes open the file, etc. as they need
|
188
194
|
if init_source(mode, source)
|
189
195
|
# Once the source is set, run through each defined sheet, pass it to
|
@@ -209,12 +215,11 @@ class Importer
|
|
209
215
|
# handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
|
210
216
|
# being parsed correctly, add a custom #parse block on your Column definition.
|
211
217
|
def parse_value(val, type)
|
212
|
-
return nil if val.nil? || val.to_s == ''
|
218
|
+
return nil if val.nil? || val.to_s.strip == ''
|
213
219
|
|
214
220
|
case type
|
215
221
|
when :string then
|
216
222
|
val = val.to_s.strip
|
217
|
-
val.blank? ? nil : val
|
218
223
|
|
219
224
|
when :integer, :int then
|
220
225
|
if val.class < Numeric
|
data/lib/iron/import/error.rb
CHANGED
@@ -1,14 +1,65 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
3
|
class Error
|
4
|
-
|
5
|
-
attr_reader :row, :text
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
attr_reader :row, :column, :value, :text
|
6
|
+
|
7
|
+
# Block wrapper to set error context for any errors generated within the block
|
8
|
+
def self.with_context(importer, row, column, val)
|
9
|
+
# Set new context
|
10
|
+
old_row = @context_row
|
11
|
+
@context_row = row
|
12
|
+
old_col = @context_column
|
13
|
+
@context_column = column
|
14
|
+
old_val = @context_value
|
15
|
+
@context_value = val
|
16
|
+
old_err = @error_occurred
|
17
|
+
@error_occurred = false
|
18
|
+
|
19
|
+
# Run the block, catch raised exceptions as errors
|
20
|
+
begin
|
21
|
+
yield
|
22
|
+
rescue RuntimeError => e
|
23
|
+
# Old-style way of registering errors was to just raise 'foo'
|
24
|
+
importer.add_error(e.to_s)
|
10
25
|
end
|
26
|
+
had_error = @error_occurred
|
27
|
+
|
28
|
+
# Reset to old context
|
29
|
+
@context_row = old_row
|
30
|
+
@context_column = old_col
|
31
|
+
@context_value = old_val
|
32
|
+
@error_occurred = old_err
|
33
|
+
|
34
|
+
return had_error
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.context_row
|
38
|
+
@context_row
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.context_column
|
42
|
+
@context_column
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.context_value
|
46
|
+
@context_value
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.error_occurred!
|
50
|
+
@error_occurred = true
|
51
|
+
end
|
52
|
+
|
53
|
+
def initialize(text, context = {})
|
11
54
|
@text = text.to_s
|
55
|
+
@row = context[:row] || Error.context_row
|
56
|
+
@column = context[:column] || Error.context_column
|
57
|
+
@value = context[:value] || Error.context_value
|
58
|
+
|
59
|
+
@row.errors << self if @row
|
60
|
+
@column.errors << self if @column
|
61
|
+
|
62
|
+
Error.error_occurred!
|
12
63
|
end
|
13
64
|
|
14
65
|
def summary
|
@@ -39,7 +90,7 @@ class Importer
|
|
39
90
|
end
|
40
91
|
|
41
92
|
# Returns true if this error is for the given context, where
|
42
|
-
# context can be a Row
|
93
|
+
# context can be a Row or Importer instance.
|
43
94
|
def for_context?(context)
|
44
95
|
case context
|
45
96
|
when Row
|
@@ -13,12 +13,7 @@ class Importer
|
|
13
13
|
if mode == :stream
|
14
14
|
@html = Nokogiri::HTML(source)
|
15
15
|
elsif mode == :file
|
16
|
-
|
17
|
-
@html = File.open(source) {|f| Nokogiri::HTML(f) }
|
18
|
-
else
|
19
|
-
add_error("File not found: #{source}")
|
20
|
-
return false
|
21
|
-
end
|
16
|
+
@html = File.open(source) {|f| Nokogiri::HTML(f) }
|
22
17
|
else
|
23
18
|
add_error("Unsupported HTML mode: #{mode}")
|
24
19
|
return false
|
data/lib/iron/import/importer.rb
CHANGED
@@ -21,21 +21,51 @@
|
|
21
21
|
# A more realistic and complex example follows:
|
22
22
|
#
|
23
23
|
# Importer.build do
|
24
|
-
# # Define our columns and settings
|
24
|
+
# # Define our columns and their settings
|
25
25
|
# column :order_number do
|
26
|
-
#
|
26
|
+
# optional!
|
27
|
+
# header /order (\#|num.*|id)/i
|
27
28
|
# type :int
|
28
29
|
# end
|
30
|
+
# column :po_number do
|
31
|
+
# optional!
|
32
|
+
# type :string
|
33
|
+
# validate do |num|
|
34
|
+
# num.match(/[a-z0-9]{12}/i)
|
35
|
+
# end
|
36
|
+
# end
|
29
37
|
# column :date do
|
30
38
|
# type :date
|
31
39
|
# end
|
32
40
|
# column :amount do
|
33
41
|
# type :cents
|
34
42
|
# end
|
43
|
+
# virtual_column :tax do
|
44
|
+
# calculate do |row|
|
45
|
+
# row[:amount] * 0.05
|
46
|
+
# end
|
47
|
+
# end
|
35
48
|
#
|
49
|
+
# # When you have optional columns, you can validate that you have enough of them
|
50
|
+
# # using a custom block returning true if the found columns are good enough to
|
51
|
+
# # continue.
|
52
|
+
# validate_columns do |cols|
|
53
|
+
# # Require either an order # or a PO # column
|
54
|
+
# keys = cols.collect(&:key)
|
55
|
+
# keys.include?(:order_number) || keys.include?(:po_number)
|
56
|
+
# end
|
57
|
+
#
|
36
58
|
# # Filter out any rows missing an order number
|
37
59
|
# filter do |row|
|
38
|
-
# !row[:order_number].nil?
|
60
|
+
# !row[:order_number].nil? || !row[:po_number].nil?
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# # Use row-level validation to validate using
|
64
|
+
# # any or all column values for that row, to allow complex validation
|
65
|
+
# # scenarios that depend on the full context.
|
66
|
+
# validate_rows do |row|
|
67
|
+
# # Ensure PO Numbers are only valid starting in 2017
|
68
|
+
# add_error 'Invalid order - PO Num from before 2017' unless (row[:date] > Date.parse('2017-01-01') || row[:po_number].nil?)
|
39
69
|
# end
|
40
70
|
#
|
41
71
|
# end.import('/path/to/file.csv', format: :csv) do |row|
|
@@ -52,17 +82,16 @@ class Importer
|
|
52
82
|
|
53
83
|
# Inner class for holding load-time data that gets reset on each load call
|
54
84
|
class Data
|
55
|
-
attr_accessor :start_row, :rows
|
85
|
+
attr_accessor :start_row, :rows, :errors
|
56
86
|
def initialize
|
57
87
|
@start_row = nil
|
58
88
|
@rows = []
|
89
|
+
@errors = []
|
59
90
|
end
|
60
91
|
end
|
61
92
|
|
62
93
|
# Array of defined columns
|
63
94
|
attr_reader :columns
|
64
|
-
# Array of error messages collected during an import/process run
|
65
|
-
attr_accessor :errors
|
66
95
|
# Custom reader, if one has been defined using #on_file or #on_stream
|
67
96
|
attr_reader :custom_reader
|
68
97
|
# Set to the format selected during past import
|
@@ -81,6 +110,8 @@ class Importer
|
|
81
110
|
# Set to a block/lambda taking a parsed but unvalidated row as a hash,
|
82
111
|
# return true to keep, false to skip.
|
83
112
|
dsl_accessor :filter
|
113
|
+
# Alias for #filter
|
114
|
+
def filter_rows(*args, &block); filter(*args, &block); end
|
84
115
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
85
116
|
dsl_accessor :encoding
|
86
117
|
|
@@ -113,14 +144,15 @@ class Importer
|
|
113
144
|
# headerless!
|
114
145
|
#
|
115
146
|
# # Manually set the start row for data, defaults to nil
|
116
|
-
# # indicating that the data rows start immediatly following the header
|
147
|
+
# # indicating that the data rows start immediatly following the header, or
|
148
|
+
# # at the first row if #headerless!.
|
117
149
|
# start_row 4
|
118
150
|
#
|
119
151
|
# # Define a filter that will skip unneeded rows. The filter command takes
|
120
152
|
# # a block that receives the parsed (but not validated!) row data as an
|
121
153
|
# # associative hash of :col_key => <parsed value>, and returns
|
122
154
|
# # true to keep the row or false to exclude it.
|
123
|
-
#
|
155
|
+
# filter_rows do |row|
|
124
156
|
# row[:id].to_i > 5000
|
125
157
|
# end
|
126
158
|
#
|
@@ -171,6 +203,7 @@ class Importer
|
|
171
203
|
# Use whichever you prefer!
|
172
204
|
def column(key, options_hash = {}, &block)
|
173
205
|
# Find existing column with key to allow re-opening an existing definition
|
206
|
+
key = key.to_sym
|
174
207
|
col = @columns.detect {|c| c.key == key }
|
175
208
|
unless col
|
176
209
|
# if none found, add a new one
|
@@ -184,6 +217,11 @@ class Importer
|
|
184
217
|
col
|
185
218
|
end
|
186
219
|
|
220
|
+
def virtual_column(key, options_hash = {}, &block)
|
221
|
+
options_hash[:virtual] = true
|
222
|
+
column(key, options_hash, &block)
|
223
|
+
end
|
224
|
+
|
187
225
|
# Limit the search scope for a single format (:xls, :xlsx, :html, :custom)
|
188
226
|
# to the given value or values - the meaning and format of scopes is determined
|
189
227
|
# by that format's data reader.
|
@@ -213,7 +251,7 @@ class Importer
|
|
213
251
|
# a block accepting a file path, and returning an array of arrays (rows of
|
214
252
|
# raw column values). Use #add_error(msg) to add a reading error.
|
215
253
|
#
|
216
|
-
# Adding a custom stream parser will change the importer's default
|
254
|
+
# Adding a custom file/stream parser will change the importer's default
|
217
255
|
# format to :custom, though you can override it when calling #import as
|
218
256
|
# usual.
|
219
257
|
#
|
@@ -268,7 +306,7 @@ class Importer
|
|
268
306
|
# encoding: source encoding override, defaults to guessing based on input
|
269
307
|
#
|
270
308
|
# Generally, you should be able to throw a path or stream at it and it should work. The
|
271
|
-
# options exist to allow overriding in cases where the
|
309
|
+
# options exist to allow overriding in cases where the automated heuristics
|
272
310
|
# have failed and the input type is known by the caller.
|
273
311
|
#
|
274
312
|
# If you're trying to import from a raw string, use Importer#import_string instead.
|
@@ -362,7 +400,7 @@ class Importer
|
|
362
400
|
# Use this form of import for the common case of having a raw CSV or HTML string.
|
363
401
|
def import_string(string, options = {}, &block)
|
364
402
|
# Get a format here if needed
|
365
|
-
if options[:format].nil?
|
403
|
+
if options[:format].nil? || options[:format] == :auto
|
366
404
|
if @custom_reader
|
367
405
|
format = :custom
|
368
406
|
else
|
@@ -378,8 +416,8 @@ class Importer
|
|
378
416
|
# Call with a block accepting a single Importer::Row with contents that
|
379
417
|
# look like :column_key => <parsed value>. Any filtered rows
|
380
418
|
# will not be present. If you want to register an error, simply
|
381
|
-
# raise "some text" and it will be added to the importer's
|
382
|
-
# list for display to the user, logging, or whatever.
|
419
|
+
# raise "some text" or call #add_error and it will be added to the importer's
|
420
|
+
# error list for display to the user, logging, or whatever.
|
383
421
|
def process
|
384
422
|
@data.rows.each do |row|
|
385
423
|
begin
|
@@ -390,20 +428,46 @@ class Importer
|
|
390
428
|
end
|
391
429
|
end
|
392
430
|
|
431
|
+
# Call with a block to process error handling tasks. Block will only execute
|
432
|
+
# if an error (read, validate, exception, etc.) has occurred during the
|
433
|
+
# just-completed #import.
|
434
|
+
#
|
435
|
+
# Your block can access the #error_summary or the #errors array to do whatever
|
436
|
+
# logging, reporting etc. is desired.
|
393
437
|
def on_error(&block)
|
394
438
|
raise 'Invalid block passed to Importer#on_error: block may accept 0, 1 or 2 arguments' if block.arity > 2
|
395
439
|
|
396
440
|
if has_errors?
|
397
441
|
case block.arity
|
398
442
|
when 0 then DslProxy.exec(self, &block)
|
399
|
-
when 1 then DslProxy.exec(self,
|
400
|
-
when 2 then DslProxy.exec(self,
|
443
|
+
when 1 then DslProxy.exec(self, errors, &block)
|
444
|
+
when 2 then DslProxy.exec(self, errors, error_summary, &block)
|
401
445
|
end
|
402
446
|
end
|
403
447
|
|
404
448
|
self
|
405
449
|
end
|
406
450
|
|
451
|
+
# Call with a block accepting an array of Column objects and returning
|
452
|
+
# true if the columns in the array should constitute a valid header row. Intended
|
453
|
+
# for use with optional columns to define multiple supported column sets, or
|
454
|
+
# conditionally required secondary columns. Columns will be passed in in the
|
455
|
+
# order detected, so you can use ordering to help determine which columns are
|
456
|
+
# required if that helps.
|
457
|
+
def validate_columns(&block)
|
458
|
+
raise 'Invalid block passed to Importer#validate_columns: block should accept a single argument' if block.arity != 1
|
459
|
+
@column_validator = block
|
460
|
+
end
|
461
|
+
|
462
|
+
# Call with a block accepting a single Row instance. Just like Column#validate, you
|
463
|
+
# can fail by returning false, calling #add_error(msg) or by raising an exception.
|
464
|
+
# The intent of this method of validation is to allow using the full row context to
|
465
|
+
# validate
|
466
|
+
def validate_rows(&block)
|
467
|
+
raise 'Invalid block passed to Importer#validate_columns: block should accept a single Row argument' if block.arity != 1
|
468
|
+
@row_validator = block
|
469
|
+
end
|
470
|
+
|
407
471
|
# Process the raw values for the first rows in a sheet,
|
408
472
|
# and attempt to build a map of the column layout, and
|
409
473
|
# detect the first row of real data
|
@@ -419,7 +483,7 @@ class Importer
|
|
419
483
|
next_index += 1
|
420
484
|
end
|
421
485
|
@data.start_row = @start_row || 1
|
422
|
-
@missing_headers =
|
486
|
+
@missing_headers = []
|
423
487
|
return true
|
424
488
|
|
425
489
|
else
|
@@ -430,21 +494,42 @@ class Importer
|
|
430
494
|
next unless row
|
431
495
|
|
432
496
|
# Set up for this iteration
|
433
|
-
remaining = @columns.
|
497
|
+
remaining = @columns.select {|c| !c.virtual? }
|
434
498
|
|
435
499
|
# Step through this row's raw values, and look for a matching column for all columns
|
436
500
|
row.each_with_index do |val, i|
|
437
|
-
|
501
|
+
val = val.to_s
|
502
|
+
col = remaining.detect {|c| c.match_header?(val, i) }
|
438
503
|
if col
|
439
504
|
remaining -= [col]
|
440
505
|
col.data.index = i
|
506
|
+
col.data.header_text = val
|
441
507
|
end
|
442
508
|
end
|
509
|
+
# Reset remaining cols
|
510
|
+
remaining.each do |col|
|
511
|
+
col.data.index = nil
|
512
|
+
col.data.header_text = nil
|
513
|
+
end
|
443
514
|
|
444
|
-
|
515
|
+
# Have we found them all, or at least a valid sub-set?
|
516
|
+
header_found = remaining.empty?
|
517
|
+
unless header_found
|
518
|
+
if remaining.all?(&:optional?)
|
519
|
+
if @column_validator
|
520
|
+
# Run custom column validator
|
521
|
+
cols = found_columns
|
522
|
+
header_found = @column_validator.call(cols)
|
523
|
+
else
|
524
|
+
# No validator... do we have any found columns at all???
|
525
|
+
header_found = @columns.any?(&:present?)
|
526
|
+
end
|
527
|
+
end
|
528
|
+
end
|
529
|
+
if header_found
|
445
530
|
# Found all columns, have a map, update our start row to be the next line and return!
|
446
531
|
@data.start_row = @start_row || i+2
|
447
|
-
@missing_headers =
|
532
|
+
@missing_headers = []
|
448
533
|
return true
|
449
534
|
else
|
450
535
|
missing = remaining if (missing.nil? || missing.count > remaining.count)
|
@@ -452,7 +537,7 @@ class Importer
|
|
452
537
|
end
|
453
538
|
|
454
539
|
# If we get here, we're hosed
|
455
|
-
@missing_headers = missing.collect(&:key) if @missing_headers.
|
540
|
+
@missing_headers = missing.collect(&:key) if @missing_headers.empty? || @missing_headers.count > missing.count
|
456
541
|
false
|
457
542
|
end
|
458
543
|
end
|
@@ -467,46 +552,81 @@ class Importer
|
|
467
552
|
# Parse out the values
|
468
553
|
values = {}
|
469
554
|
@columns.each do |col|
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
555
|
+
if col.present? && !col.virtual?
|
556
|
+
index = col.data.index
|
557
|
+
raw_val = raw_data[index]
|
558
|
+
if col.parses?
|
559
|
+
# Use custom parser if this row has one
|
560
|
+
val = col.parse_value(row, raw_val)
|
561
|
+
else
|
562
|
+
# Otherwise use our standard parser
|
563
|
+
val = @reader.parse_value(raw_val, col.type)
|
564
|
+
end
|
565
|
+
values[col.key] = val
|
478
566
|
end
|
479
|
-
values[col.key] = val
|
480
567
|
end
|
481
568
|
|
482
|
-
# Set the values
|
569
|
+
# Set the values
|
483
570
|
row.set_values(values)
|
484
|
-
|
571
|
+
|
572
|
+
if !row.has_errors?
|
573
|
+
# Filter if needed
|
574
|
+
return nil if @filter && !@filter.call(row)
|
485
575
|
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
576
|
+
# Calculate virtual columns' values
|
577
|
+
@columns.each do |col|
|
578
|
+
if col.virtual?
|
579
|
+
row.values[col.key] = col.calculate_value(row)
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
# Validate values if any column has a custom validator
|
584
|
+
@columns.each do |col|
|
585
|
+
if col.present? && col.validates?
|
586
|
+
val = values[col.key]
|
587
|
+
col.validate_value(row, val)
|
588
|
+
end
|
589
|
+
end
|
491
590
|
|
591
|
+
# If we have a row validator, call it on the full row
|
592
|
+
if @row_validator && !row.has_errors?
|
593
|
+
valid = false
|
594
|
+
had_error = Error.with_context(@importer, row, nil, nil) do
|
595
|
+
valid = DslProxy.exec(self, row, &@row_validator)
|
596
|
+
end
|
597
|
+
if !had_error && valid.is_a?(FalseClass)
|
598
|
+
add_error("Invalid row: #{row.to_hash.inspect}", :row => row)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
492
603
|
# We is good
|
493
604
|
@data.rows << row
|
494
605
|
row
|
495
606
|
end
|
607
|
+
|
608
|
+
def rows
|
609
|
+
@data.rows
|
610
|
+
end
|
611
|
+
|
612
|
+
def found_columns
|
613
|
+
@columns.select(&:present?).sort_by(&:index)
|
614
|
+
end
|
615
|
+
|
616
|
+
# Array of error messages collected during an import/process run
|
617
|
+
def errors
|
618
|
+
@data.errors
|
619
|
+
end
|
496
620
|
|
497
621
|
# When true, one or more errors have been recorded during this import/process
|
498
622
|
# cycle.
|
499
623
|
def has_errors?
|
500
|
-
@errors.any?
|
624
|
+
@data.errors.any?
|
501
625
|
end
|
502
626
|
|
503
627
|
# Add an error to our error list. Will result in a failed import.
|
504
|
-
def add_error(
|
505
|
-
|
506
|
-
msg = context
|
507
|
-
context = nil
|
508
|
-
end
|
509
|
-
@errors << Error.new(context, msg)
|
628
|
+
def add_error(msg, context = {})
|
629
|
+
@data.errors << Error.new(msg, context)
|
510
630
|
end
|
511
631
|
|
512
632
|
# Returns a human-readable summary of the errors present on the importer, or
|
@@ -517,7 +637,7 @@ class Importer
|
|
517
637
|
|
518
638
|
# Group by error text - we often get the same error dozens of times
|
519
639
|
list = {}
|
520
|
-
@errors.each do |err|
|
640
|
+
@data.errors.each do |err|
|
521
641
|
errs = list[err.text] || []
|
522
642
|
errs << err
|
523
643
|
list[err.text] = errs
|
@@ -544,8 +664,7 @@ class Importer
|
|
544
664
|
protected
|
545
665
|
|
546
666
|
def reset
|
547
|
-
@
|
548
|
-
@missing_headers = nil
|
667
|
+
@missing_headers = []
|
549
668
|
@format = nil
|
550
669
|
@reader = nil
|
551
670
|
@data = Data.new
|
data/lib/iron/import/row.rb
CHANGED
@@ -2,12 +2,14 @@ class Importer
|
|
2
2
|
|
3
3
|
class Row
|
4
4
|
|
5
|
-
attr_reader :line, :values
|
5
|
+
attr_reader :line, :values, :errors
|
6
6
|
|
7
7
|
def initialize(importer, line, value_hash = nil)
|
8
8
|
@importer = importer
|
9
9
|
@line = line
|
10
10
|
set_values(value_hash)
|
11
|
+
|
12
|
+
@errors = []
|
11
13
|
end
|
12
14
|
|
13
15
|
def set_values(value_hash)
|
@@ -58,6 +60,19 @@ class Importer
|
|
58
60
|
@importer.add_error(self, msg)
|
59
61
|
end
|
60
62
|
|
63
|
+
def has_errors?
|
64
|
+
@errors && @errors.count > 0
|
65
|
+
end
|
66
|
+
|
67
|
+
# Return a map of column key to Error, intended for use in error reporting.
|
68
|
+
def error_map
|
69
|
+
map = {}
|
70
|
+
@errors.each do |err|
|
71
|
+
map[err.column.key] = err
|
72
|
+
end
|
73
|
+
map
|
74
|
+
end
|
75
|
+
|
61
76
|
end
|
62
77
|
|
63
78
|
end
|
@@ -112,4 +112,29 @@ describe Importer::Column do
|
|
112
112
|
@importer.has_errors?.should be_true
|
113
113
|
end
|
114
114
|
|
115
|
+
it 'should record optionalness' do
|
116
|
+
@col.optional?.should be_false
|
117
|
+
@col.optional!
|
118
|
+
@col.optional?.should be_true
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'should know if it is present in the headers' do
|
122
|
+
@col.present?.should be_false
|
123
|
+
@col.data.index = 2
|
124
|
+
@col.present?.should be_true
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'should use the header text as its name if present' do
|
128
|
+
@col.data.index = 2
|
129
|
+
@col.to_s.should == 'Column C'
|
130
|
+
@col.data.header_text = 'Invoice #'
|
131
|
+
@col.to_s.should == 'Invoice # Column'
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'should support virtual operation' do
|
135
|
+
@col.virtual!
|
136
|
+
@col.virtual?.should be_true
|
137
|
+
@col.to_s.should == 'Test Column'
|
138
|
+
end
|
139
|
+
|
115
140
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
describe Importer::Error do
|
2
|
+
|
3
|
+
before do
|
4
|
+
@importer = Importer.new
|
5
|
+
@row = Importer::Row.new(@importer, 5)
|
6
|
+
@col = Importer::Column.new(@importer, :test)
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should capture context' do
|
10
|
+
val = 'foo'
|
11
|
+
err = nil
|
12
|
+
Importer::Error.with_context(@importer, @row, @col, val) do
|
13
|
+
err = Importer::Error.new('hi')
|
14
|
+
end
|
15
|
+
err.row.should == @row
|
16
|
+
err.column.should == @col
|
17
|
+
err.value.should == val
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should return error status for #with_context' do
|
21
|
+
# Block runs fine, no error
|
22
|
+
had_err = Importer::Error.with_context(@importer, @row, @col, 'bob') do
|
23
|
+
false
|
24
|
+
end
|
25
|
+
had_err.should be_false
|
26
|
+
|
27
|
+
# Create a new error, we should get a true
|
28
|
+
had_err = Importer::Error.with_context(@importer, @row, @col, 'bob') do
|
29
|
+
Importer::Error.new('hi')
|
30
|
+
end
|
31
|
+
had_err.should be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -23,6 +23,21 @@ describe Importer do
|
|
23
23
|
importer.scopes.should == { :xls => [1, 'Sheet 2'], :html => ['table.funny'] }
|
24
24
|
end
|
25
25
|
|
26
|
+
it 'should calculate virtual columns' do
|
27
|
+
importer = Importer.build do
|
28
|
+
column :num, :type => :int
|
29
|
+
virtual_column :summary do
|
30
|
+
calculate do |row|
|
31
|
+
"Value = #{row[:num]}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
importer.import_string("num\n1\n2")
|
37
|
+
importer.error_summary.should be_nil
|
38
|
+
importer.column(:summary).to_a.should == ['Value = 1', 'Value = 2']
|
39
|
+
end
|
40
|
+
|
26
41
|
it 'should find headers automatically' do
|
27
42
|
# Define a few sample columns
|
28
43
|
importer = Importer.new
|
@@ -57,6 +72,59 @@ describe Importer do
|
|
57
72
|
importer.missing_headers.should == [:alpha]
|
58
73
|
end
|
59
74
|
|
75
|
+
it 'should succeed when missing optional columns' do
|
76
|
+
# Define a few sample columns
|
77
|
+
importer = Importer.new
|
78
|
+
importer.column(:alpha).optional!
|
79
|
+
importer.column(:beta)
|
80
|
+
importer.column(:gamma)
|
81
|
+
# Some dummy data
|
82
|
+
rows = [
|
83
|
+
['Bob', 'Beta', 'Gamma', 'Epsilon']
|
84
|
+
]
|
85
|
+
|
86
|
+
# Parse it!
|
87
|
+
importer.find_header(rows).should be_true
|
88
|
+
importer.missing_headers.should be_empty
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should support row-based validation' do
|
92
|
+
importer = Importer.build do
|
93
|
+
column :a, :type => :int
|
94
|
+
column :b, :type => :int
|
95
|
+
|
96
|
+
validate_rows do |row|
|
97
|
+
row[:a] + row[:b] == 5
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
importer.import_string("a,b\n1,4\n6,-1\n7,0\n1,1")
|
102
|
+
importer.errors.count.should == 2
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should support column order/presence validation' do
|
106
|
+
# Build an importer with optional columns
|
107
|
+
importer = Importer.new
|
108
|
+
importer.column(:alpha).optional!
|
109
|
+
importer.column(:beta).optional!
|
110
|
+
importer.column(:gamma)
|
111
|
+
# Set up a column validator
|
112
|
+
importer.validate_columns do |cols|
|
113
|
+
cols = cols.collect(&:key)
|
114
|
+
cols.sort == [:alpha, :gamma] || cols.sort == [:beta, :gamma]
|
115
|
+
end
|
116
|
+
|
117
|
+
# Missing required column
|
118
|
+
importer.find_header([['Alpha', 'Beta', 'Epsilon']]).should be_false
|
119
|
+
# Missing both optional
|
120
|
+
importer.find_header([['Bob', 'Gamma', 'Epsilon']]).should be_false
|
121
|
+
# Required + single optional
|
122
|
+
importer.find_header([['Bob', 'Gamma', 'Alpha']]).should be_true
|
123
|
+
importer.find_header([['Bob', 'Gamma', 'Beta']]).should be_true
|
124
|
+
# Required + both optional
|
125
|
+
importer.find_header([['Alpha', 'Gamma', 'Beta']]).should be_true
|
126
|
+
end
|
127
|
+
|
60
128
|
it 'should capture errors' do
|
61
129
|
importer = Importer.build do
|
62
130
|
column :foo
|
@@ -108,7 +176,7 @@ describe Importer do
|
|
108
176
|
it 'should import a string' do
|
109
177
|
sum = 0
|
110
178
|
csv = "one,two\n1,2"
|
111
|
-
Importer.build do
|
179
|
+
importer = Importer.build do
|
112
180
|
column :one
|
113
181
|
column :two
|
114
182
|
end.import_string(csv, :format => :csv) do |rows|
|
@@ -117,6 +185,7 @@ describe Importer do
|
|
117
185
|
sum = rows[:one].to_i + rows[:two].to_i
|
118
186
|
end
|
119
187
|
# Just make sure we ran correctly
|
188
|
+
importer.column(:one).to_s.should == 'One Column'
|
120
189
|
sum.should == 3
|
121
190
|
end
|
122
191
|
|
@@ -131,5 +200,54 @@ describe Importer do
|
|
131
200
|
importer.import_string("<div><table><tr><td>one</td></tr></table></div>")
|
132
201
|
importer.format.should == :html
|
133
202
|
end
|
203
|
+
|
204
|
+
it 'should capture errors with context' do
|
205
|
+
sum = 0
|
206
|
+
csv = "one,two,three\n1,2,X\n1,,3"
|
207
|
+
importer = Importer.build do
|
208
|
+
column :one
|
209
|
+
column :two do
|
210
|
+
validate do |val|
|
211
|
+
val.to_i == 2
|
212
|
+
end
|
213
|
+
end
|
214
|
+
column :three do
|
215
|
+
validate do |val|
|
216
|
+
add_error('Invalid value') unless val.to_i > 0
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
importer.import_string(csv)
|
221
|
+
|
222
|
+
# Just make sure we ran correctly
|
223
|
+
importer.errors.count.should == 2
|
224
|
+
importer.column(:two).errors.count.should == 1
|
225
|
+
importer.column(:three).errors.count.should == 1
|
226
|
+
importer.column(:three).error_values.should == ['X']
|
227
|
+
map = importer.rows.first.error_map
|
228
|
+
map[:two].should be_nil
|
229
|
+
map[:three].should be_a(Importer::Error)
|
230
|
+
end
|
231
|
+
|
232
|
+
it 'should import properly when optional columns are missing' do
|
233
|
+
csv = "one,two\n1,2\n1,"
|
234
|
+
importer = Importer.build do
|
235
|
+
column :one
|
236
|
+
column :two do
|
237
|
+
validate do |val|
|
238
|
+
val.to_i == 2
|
239
|
+
end
|
240
|
+
end
|
241
|
+
column :three do
|
242
|
+
optional!
|
243
|
+
validate do |val|
|
244
|
+
add_error('Invalid value') unless val.to_i > 0
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
importer.import_string(csv)
|
249
|
+
|
250
|
+
importer.found_columns.count.should == 2
|
251
|
+
end
|
134
252
|
|
135
253
|
end
|
data/spec/importer/row_spec.rb
CHANGED
@@ -33,6 +33,11 @@ describe Importer::Row do
|
|
33
33
|
@row.should be_empty
|
34
34
|
end
|
35
35
|
|
36
|
+
it 'should return nil on missing data' do
|
37
|
+
@row.set_values(:a => 1, :b => 2)
|
38
|
+
@row[:c].should be_nil
|
39
|
+
end
|
40
|
+
|
36
41
|
it 'should not change when to_hash values are changed' do
|
37
42
|
@row.set_values(:a => 1, :b => 2)
|
38
43
|
hash = @row.to_hash
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- spec/importer/csv_reader_spec.rb
|
118
118
|
- spec/importer/custom_reader_spec.rb
|
119
119
|
- spec/importer/data_reader_spec.rb
|
120
|
+
- spec/importer/error_spec.rb
|
120
121
|
- spec/importer/html_reader_spec.rb
|
121
122
|
- spec/importer/importer_spec.rb
|
122
123
|
- spec/importer/row_spec.rb
|
@@ -156,5 +157,5 @@ rubyforge_project:
|
|
156
157
|
rubygems_version: 2.4.3
|
157
158
|
signing_key:
|
158
159
|
specification_version: 4
|
159
|
-
summary: CSV, HTML, XLS, and XLSX import
|
160
|
+
summary: CSV, HTML, XLS, and XLSX import processing support
|
160
161
|
test_files: []
|