iron-import 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +24 -3
- data/README.rdoc +78 -14
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +137 -49
- data/lib/iron/import/csv_reader.rb +2 -2
- data/lib/iron/import/data_reader.rb +8 -3
- data/lib/iron/import/error.rb +57 -6
- data/lib/iron/import/excel_reader.rb +1 -1
- data/lib/iron/import/html_reader.rb +1 -6
- data/lib/iron/import/importer.rb +166 -47
- data/lib/iron/import/row.rb +16 -1
- data/spec/importer/column_spec.rb +25 -0
- data/spec/importer/error_spec.rb +34 -0
- data/spec/importer/importer_spec.rb +119 -1
- data/spec/importer/row_spec.rb +5 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87cd90d663132748c61dfaa450449136f0cc00c4
|
4
|
+
data.tar.gz: e350419e3bdc6afb98b6a84b4fef8c9a4f1cd4be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1646f83be5af42715b1f71f9090aa0bf323bf9495d97d4abbb5543f85cb98f006fe9bd407823810c343e3dbc81a477da1445f399bcad9eca79108fd2869719f5
|
7
|
+
data.tar.gz: ba01474f1f4eebf7eaf2a23bc12824d0d57c00de61226ede66e1f1c9bd554d57161b8d811aeb2793cedc42f5a4b0bf8e9d34961c98f755aee5f09cfef1b81847
|
data/History.txt
CHANGED
@@ -1,10 +1,31 @@
|
|
1
|
+
== 0.8.0 / 2017-06-29
|
2
|
+
* Breaking Change: change signature of Importer#add_error to support new features
|
3
|
+
* Breaking Change: Importer.missing_headers will be [] instead of nil on all headers found
|
4
|
+
* Breaking Change: remove deprecated method Column#required!
|
5
|
+
* Add Importer#rows to directly access rows post-import
|
6
|
+
* Add Column#optional! to enable non-mandatory columns during header detection
|
7
|
+
* Add Column#present? and Column#missing? to test for presence after import attempt
|
8
|
+
* Add Importer#validate_columns to allow detecting invalid column combinations
|
9
|
+
* Add Importer#validate_rows to allow whole-row validation
|
10
|
+
* Add Importer#virtual_column and Column#calculate to enable virtual columns
|
11
|
+
* Track actual header text found for columns
|
12
|
+
* Update Column#to_s to use actual header text when present
|
13
|
+
* Revamp error tracking significantly to provide better reporting ability
|
14
|
+
* Improve error tracking to include row/column when knowable
|
15
|
+
* Improve error tracking to include problem values when knowable
|
16
|
+
* Add Column#error_values to return unique raw error values by column
|
17
|
+
* Add Column#error_values? which will be true when there is at least one error value for the column
|
18
|
+
* Add Row#error_map to return map of column key => raw value for each row
|
19
|
+
* Change Column#parse block processing to allow explicit #add_error call
|
20
|
+
* Change Column#validate block processing to allow explicit #add_error call and to add an implicit error on returned false
|
21
|
+
|
1
22
|
== 0.7.0 / 2017-02-16
|
2
23
|
|
3
24
|
* Breaking Change: Removed multi-sheet support - use multiple importers instead
|
4
|
-
* Breaking Change: Removed warnings as they were not being used
|
5
|
-
*
|
6
|
-
* Add Importer#scope to allow narrowing the search to one or more sheets/tables when importing
|
25
|
+
* Breaking Change: Removed warnings as they were confusing and not being used
|
26
|
+
* Deprecate Column#required! due to bugginess and overlap with Column#validate
|
7
27
|
* Add new HtmlReader support to handle parsing HTML <table> rows
|
28
|
+
* Add Importer#scope to allow narrowing the search to one or more sheets/tables when importing
|
8
29
|
* Modify Importer#import to support block mode combining #import and #process
|
9
30
|
* Add Importer#import_string for handling explicit CSV/HTML/Custom text
|
10
31
|
* Add Importer#on_error(&block) to allow inline conditional error handling
|
data/README.rdoc
CHANGED
@@ -4,11 +4,17 @@ Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
|
|
4
4
|
|
5
5
|
== DESCRIPTION
|
6
6
|
|
7
|
-
Simple, reliable tabular data import.
|
7
|
+
Simple, versatile, reliable tabular data import.
|
8
8
|
|
9
9
|
This gem provides a set of classes to support automating import of tabular data from
|
10
|
-
CSV, HTML, XLS and XLSX files
|
11
|
-
|
10
|
+
CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
|
11
|
+
pre-parsing data, validating data, filtering rows, and robust error tracking.
|
12
|
+
|
13
|
+
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
14
|
+
for the task. Breaking changes will be noted by increases in the minor version,
|
15
|
+
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
16
|
+
|
17
|
+
== WHO IS THIS FOR?
|
12
18
|
|
13
19
|
The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
|
14
20
|
However, using them with unreliable user submitted data requires a lot of error checking,
|
@@ -17,21 +23,36 @@ businesses, where Excel files are the lingua franca for all kinds of uses. This
|
|
17
23
|
attempts to extract years of experience building one-off importers into a simple library
|
18
24
|
for rapid import coding.
|
19
25
|
|
26
|
+
In addition, it's quite common for the same data to be transmitted in varying formats -
|
27
|
+
Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
|
28
|
+
tool-set for processing any of these types of data, often without changing a line of code.
|
29
|
+
|
20
30
|
This is NOT a general-purpose tool for reading spreadsheets. If you want access to
|
21
31
|
cell styling, reading underlying formulas, etc., you will be better served building
|
22
|
-
a custom importer based on Roo. But if you're looking to take
|
32
|
+
a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
|
23
33
|
validate and coerce values, then write each row to a database, all the while tracking
|
24
34
|
any errors encountered... well, this is the library for you!
|
25
35
|
|
26
|
-
|
27
|
-
|
28
|
-
|
36
|
+
== KEY FEATURES
|
37
|
+
|
38
|
+
- Simple yet robust data import and error handling using elegant builder syntax
|
39
|
+
- Import data from file, stream or string data sources
|
40
|
+
- Import XLS, XLSX, CSV and HTML tabular data
|
41
|
+
- Import custom tabular data via passed block
|
42
|
+
- Automatic column order and start row detection
|
43
|
+
- Support for optional columns and dynamic column sets
|
44
|
+
- Basic data coercion supporting string, int, float, date and cents types
|
45
|
+
- Custom data coercion via passed block
|
46
|
+
- Custom data validation via passed block
|
47
|
+
- Row filtering using custom block
|
48
|
+
- Automatically track and report errors with fine-grained context
|
49
|
+
- Prefer capturing errors over raising exceptions for more robust imports
|
29
50
|
|
30
51
|
== SAMPLE USAGE
|
31
52
|
|
32
53
|
# Define our importer, with three columns. The importer will look for a row containing
|
33
54
|
# "name"/"product", "description" and "price" (case insensitively) and automatically determine column
|
34
|
-
# order and starting row of the data.
|
55
|
+
# order and the starting row of the data.
|
35
56
|
importer = Importer.build do
|
36
57
|
column :name do
|
37
58
|
# Column order and start row are auto-detected
|
@@ -42,9 +63,9 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
42
63
|
parse do |raw_val|
|
43
64
|
raw_val.to_s.strip
|
44
65
|
end
|
45
|
-
# And
|
66
|
+
# And custom validation
|
46
67
|
validate do |parsed_val|
|
47
|
-
|
68
|
+
add_error('Invalid description') unless parsed_val.length > 5
|
48
69
|
end
|
49
70
|
end
|
50
71
|
column :price do
|
@@ -52,13 +73,13 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
52
73
|
type :cents
|
53
74
|
end
|
54
75
|
|
55
|
-
# Need to skip rows? Use a filter!
|
56
|
-
|
76
|
+
# Need to skip rows? Use a filter! Return true to include a row when processing
|
77
|
+
filter_rows do |row|
|
57
78
|
row[:price] != 0 && row[:name] != 'Sample'
|
58
79
|
end
|
59
80
|
end
|
60
81
|
|
61
|
-
# Import the provided file row-by-row (if importing succeeds), automatically
|
82
|
+
# Import the provided file or stream row-by-row (if importing succeeds), automatically
|
62
83
|
# using the proper library to read CSV data. This same code would work
|
63
84
|
# with XLS or XLSX files with no changes to the code.
|
64
85
|
importer.import('/tmp/source.csv') do |row|
|
@@ -67,9 +88,52 @@ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semant
|
|
67
88
|
|
68
89
|
# Check for errors and do the right thing:
|
69
90
|
importer.on_error do
|
70
|
-
|
91
|
+
if missing_headers.any?
|
92
|
+
# Can't find required column header(s)
|
93
|
+
puts "Unable to locate columns: #{missing_headers}"
|
94
|
+
|
95
|
+
elsif columns.any?(&:error_values?)
|
96
|
+
# Invalid or unexpected values in one or more columns
|
97
|
+
columns.select(&:error_values?).each do |col|
|
98
|
+
puts "Invalid values for #{col}: #{col.error_values}"
|
99
|
+
end
|
100
|
+
|
101
|
+
else
|
102
|
+
# General errors, dump report
|
103
|
+
puts "Error(s) on import: " + error_summary
|
104
|
+
end
|
71
105
|
end
|
72
106
|
|
107
|
+
# You can chain the build/import/on-error blocks for a cleaner flow:
|
108
|
+
Importer.build do
|
109
|
+
column :one
|
110
|
+
column :two
|
111
|
+
end.import(params[:uploaded_file]) do |row|
|
112
|
+
SomeModel.create(row)
|
113
|
+
end.on_error do
|
114
|
+
raise "Errors found: " + error_summary
|
115
|
+
end
|
116
|
+
|
117
|
+
== IMPORT EXECUTION ORDER
|
118
|
+
|
119
|
+
It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
|
120
|
+
|
121
|
+
- Determine the **format** of stream/file to import
|
122
|
+
- Determine **import scope** (sheet/table/whatever) using Importer#scope settings, if any
|
123
|
+
- **Find column headers + start row**
|
124
|
+
- Validate presence of **required columns**
|
125
|
+
- **Validate column set** using Importer#validate_columns
|
126
|
+
- Run each row:
|
127
|
+
- **Parse** each column's value using Column#parse or Column#type
|
128
|
+
- **Filter the row** using Importer#filter_rows on parsed values to reject unwanted rows
|
129
|
+
- **Calculate virtual columns** using Column#calculate
|
130
|
+
- **Validate each parsed value** using Column#validate
|
131
|
+
- **Validate entire row** using Importer#validate_rows
|
132
|
+
|
133
|
+
Generally, the import will stop when an error occurs, save on row processing, where each row will
|
134
|
+
be run until an error for that row is found. The goal is to accumulate actionable info for
|
135
|
+
presentation to the end user who is uploading the file.
|
136
|
+
|
73
137
|
== REQUIREMENTS
|
74
138
|
|
75
139
|
Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.8.0
|
data/lib/iron/import/column.rb
CHANGED
@@ -8,13 +8,17 @@ class Importer
|
|
8
8
|
#
|
9
9
|
# Importer.build do
|
10
10
|
# column :key do
|
11
|
+
# # Mark this column as optional, i.e. if the header isn't found, the import will
|
12
|
+
# # work without error and the imported row will simply not contain this column's data.
|
13
|
+
# optional!
|
14
|
+
#
|
11
15
|
# # Set a fixed position - may be a column number or a letter-based
|
12
16
|
# # column description, ie 'A' == 1. In most cases, you can leave
|
13
17
|
# # this defaulted to nil, which will mean "look for the proper header"
|
14
18
|
# position 'C'
|
15
19
|
#
|
16
20
|
# # Specify a regex to locate the header for this column, defaults to
|
17
|
-
# # finding a string containing the key.
|
21
|
+
# # finding a string containing the key, ignored if position is set.
|
18
22
|
# header /(price|cost)/i
|
19
23
|
#
|
20
24
|
# # Tells the data parser what type of data this column contains, one
|
@@ -24,7 +28,8 @@ class Importer
|
|
24
28
|
# # Instead of a type, you can set an explicit parse block. Be aware
|
25
29
|
# # that different source types may give you different raw values for what
|
26
30
|
# # seems like the "same" source value, for example an Excel source file
|
27
|
-
# # will give you a float value for all numeric types, even "integers"
|
31
|
+
# # will give you a float value for all numeric types, even "integers", while
|
32
|
+
# # CSV and HTML values are always strings.
|
28
33
|
# parse do |raw_value|
|
29
34
|
# val = raw_value.to_i + 1000
|
30
35
|
# # NOTE: we're in a block, so don't do this:
|
@@ -35,9 +40,20 @@ class Importer
|
|
35
40
|
#
|
36
41
|
# # You can also add a custom validator to check the value and add
|
37
42
|
# # an error if it's not within a given range, or whatever. To fail validation,
|
38
|
-
# #
|
39
|
-
# validate do |parsed_value|
|
40
|
-
#
|
43
|
+
# # return false, raise an exception, or use #add_error
|
44
|
+
# validate do |parsed_value, row|
|
45
|
+
# add_error "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# # Mark a column as _virtual_, meaning it won't be looked for in the source
|
49
|
+
# # file/stream, and instead will be calculated using #calculate. When set,
|
50
|
+
# # causes importer to ignore position/header/type/parse settings.
|
51
|
+
# virtual!
|
52
|
+
#
|
53
|
+
# # When #virtual! is set, gets called to calculate each row's value for this
|
54
|
+
# # column using the row's parsed values.
|
55
|
+
# calculate do |row|
|
56
|
+
# row[:some_col] + 5
|
41
57
|
# end
|
42
58
|
# end
|
43
59
|
# end
|
@@ -46,10 +62,14 @@ class Importer
|
|
46
62
|
|
47
63
|
# Holds load-time data
|
48
64
|
class Data
|
49
|
-
attr_accessor :index
|
65
|
+
attr_accessor :index, :header_text, :errors
|
66
|
+
|
67
|
+
def initialize
|
68
|
+
@errors = []
|
69
|
+
end
|
50
70
|
|
51
71
|
def pos
|
52
|
-
@index ? Column::index_to_pos(@index) : '
|
72
|
+
@index ? Column::index_to_pos(@index) : 'Not Found'
|
53
73
|
end
|
54
74
|
end
|
55
75
|
|
@@ -59,7 +79,8 @@ class Importer
|
|
59
79
|
|
60
80
|
# Configuration
|
61
81
|
dsl_accessor :header, :position, :type
|
62
|
-
dsl_accessor :parse, :validate
|
82
|
+
dsl_accessor :parse, :validate, :calculate
|
83
|
+
dsl_flag :optional, :virtual
|
63
84
|
|
64
85
|
def self.pos_to_index(pos)
|
65
86
|
raise 'Invalid column position: ' + pos.inspect unless pos.is_a?(String) && pos.match(/\A[a-z]{1,3}\z/i)
|
@@ -73,6 +94,7 @@ class Importer
|
|
73
94
|
total - 1
|
74
95
|
end
|
75
96
|
|
97
|
+
# Convert a numeric index to an Excel-like column position, e.g. 3 => 'C'
|
76
98
|
def self.index_to_pos(index)
|
77
99
|
val = index.to_i
|
78
100
|
raise 'Invalid column index: ' + index.inspect if (!index.is_a?(Fixnum) || index.to_i < 0)
|
@@ -95,6 +117,12 @@ class Importer
|
|
95
117
|
# Save off our info
|
96
118
|
@key = key
|
97
119
|
@importer = importer
|
120
|
+
|
121
|
+
# Are we optional?
|
122
|
+
@optional = options_hash.delete(:optional) { false }
|
123
|
+
|
124
|
+
# Are we virtual?
|
125
|
+
@virtual = options_hash.delete(:virtual) { false }
|
98
126
|
|
99
127
|
# Return it as a string, by default
|
100
128
|
@type = options_hash.delete(:type) { :string }
|
@@ -105,8 +133,14 @@ class Importer
|
|
105
133
|
# By default, don't parse incoming data, just pass it through
|
106
134
|
@parse = options_hash.delete(:parse)
|
107
135
|
|
136
|
+
# Custom validation, anyone?
|
137
|
+
@validate = options_hash.delete(:validate)
|
138
|
+
|
139
|
+
# Custom validation, anyone?
|
140
|
+
@calculate = options_hash.delete(:calculate)
|
141
|
+
|
108
142
|
# Default matcher, looks for the presence of the column key as text anywhere
|
109
|
-
# in the header string, ignoring case and
|
143
|
+
# in the header string, ignoring case and treating underscores as spaces, ie
|
110
144
|
# :order_id => /\A\s*order id\s*\z/i
|
111
145
|
@header = options_hash.delete(:header) {
|
112
146
|
Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
|
@@ -126,22 +160,10 @@ class Importer
|
|
126
160
|
@data = Data.new
|
127
161
|
end
|
128
162
|
|
129
|
-
# DEPRECATED - duplicates functionality better provided by #validate, e.g.
|
130
|
-
#
|
131
|
-
# validate do |val|
|
132
|
-
# raise 'Missing required value for column foo' if val.nil?
|
133
|
-
# end
|
134
|
-
def required!
|
135
|
-
Kernel.warn "[DEPRECATION] Importer::Column#required! is deprecated. Please use #validate instead."
|
136
|
-
col = self.key
|
137
|
-
validate do |val|
|
138
|
-
raise "Missing required value for column :#{col}"
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
163
|
# When true, our header definition or index match the passed text or column index.
|
143
|
-
def match_header?(text,
|
144
|
-
return
|
164
|
+
def match_header?(text, test_index)
|
165
|
+
return false if virtual?
|
166
|
+
return true if test_index == self.fixed_index
|
145
167
|
if @header.is_a?(Regexp)
|
146
168
|
return !@header.match(text).nil?
|
147
169
|
else
|
@@ -149,34 +171,11 @@ class Importer
|
|
149
171
|
end
|
150
172
|
end
|
151
173
|
|
152
|
-
# Applies any custom parser defined to process the given value, capturing
|
153
|
-
# errors as needed
|
154
|
-
def parse_value(row, val)
|
155
|
-
return val if @parse.nil?
|
156
|
-
begin
|
157
|
-
@parse.call(val)
|
158
|
-
rescue Exception => e
|
159
|
-
@importer.add_error(row, "Error parsing #{self}: #{e}")
|
160
|
-
nil
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
# Applies any validation to a parsed value
|
165
|
-
def validate_value(row, val)
|
166
|
-
return true unless @validate
|
167
|
-
begin
|
168
|
-
@validate.call(val)
|
169
|
-
true
|
170
|
-
rescue Exception => e
|
171
|
-
@importer.add_error(row, "Validation error in #{self}: #{e}")
|
172
|
-
false
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
174
|
# Returns the fixed index of this column based on the set position.
|
177
175
|
# In other words, a position of 2 would return an index of 1 (as
|
178
176
|
# indicies are 0-based), where a position of 'C' would return 2.
|
179
177
|
def fixed_index
|
178
|
+
return nil if virtual?
|
180
179
|
return nil unless @position
|
181
180
|
if @position.is_a?(Fixnum)
|
182
181
|
@position - 1
|
@@ -185,9 +184,98 @@ class Importer
|
|
185
184
|
end
|
186
185
|
end
|
187
186
|
|
187
|
+
# Applies any custom parser defined to process the given value, capturing
|
188
|
+
# errors as needed
|
189
|
+
def parse_value(row, raw_val)
|
190
|
+
return raw_val if @parse.nil?
|
191
|
+
|
192
|
+
res = nil
|
193
|
+
had_error = Error.with_context(@importer, row, self, raw_val) do
|
194
|
+
res = DslProxy.exec(@importer, raw_val, &@parse)
|
195
|
+
end
|
196
|
+
had_error ? nil : res
|
197
|
+
end
|
198
|
+
|
199
|
+
def calculate_value(row)
|
200
|
+
return nil if @calculate.nil?
|
201
|
+
res = nil
|
202
|
+
had_error = Error.with_context(@importer, row, self, nil) do
|
203
|
+
res = DslProxy.exec(@importer, row, &@calculate)
|
204
|
+
end
|
205
|
+
had_error ? nil : res
|
206
|
+
end
|
207
|
+
|
208
|
+
# Applies any validation to a parsed value
|
209
|
+
def validate_value(row, parsed_val)
|
210
|
+
return true unless @validate
|
211
|
+
|
212
|
+
valid = false
|
213
|
+
had_error = Error.with_context(@importer, row, self, parsed_val) do
|
214
|
+
valid = DslProxy.exec(@importer, parsed_val, row, &@validate)
|
215
|
+
end
|
216
|
+
if had_error
|
217
|
+
return false
|
218
|
+
elsif valid.is_a?(FalseClass)
|
219
|
+
@importer.add_error("Invalid value: #{parsed_val.inspect}", :row => row, :column => self, :value => parsed_val)
|
220
|
+
return false
|
221
|
+
else
|
222
|
+
return true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Index of the column in the most recent import, if found, or
|
227
|
+
# nil if not present.
|
228
|
+
def index
|
229
|
+
@data.index
|
230
|
+
end
|
231
|
+
|
232
|
+
# When true, column was found in the last import, eg:
|
233
|
+
#
|
234
|
+
# importer.process do |row|
|
235
|
+
# puts "Size: #{row[:size]}" if column(:size).present?
|
236
|
+
# end
|
237
|
+
def present?
|
238
|
+
!@data.index.nil?
|
239
|
+
end
|
240
|
+
|
241
|
+
# Sugar, simply the opposite of #present?
|
242
|
+
def missing?
|
243
|
+
!present?
|
244
|
+
end
|
245
|
+
|
246
|
+
def parses?
|
247
|
+
!@parse.nil?
|
248
|
+
end
|
249
|
+
|
250
|
+
def validates?
|
251
|
+
!@validate.nil?
|
252
|
+
end
|
253
|
+
|
254
|
+
def calculates?
|
255
|
+
!@calculate.nil?
|
256
|
+
end
|
257
|
+
|
258
|
+
def errors
|
259
|
+
@data.errors
|
260
|
+
end
|
261
|
+
|
262
|
+
def error_values
|
263
|
+
errors.collect(&:value).uniq
|
264
|
+
end
|
265
|
+
|
266
|
+
def error_values?
|
267
|
+
error_values.any?
|
268
|
+
end
|
269
|
+
|
188
270
|
# Pretty name for ourselves
|
189
271
|
def to_s
|
190
|
-
|
272
|
+
if !virtual? && @data.header_text.blank?
|
273
|
+
"Column #{@data.pos}"
|
274
|
+
else
|
275
|
+
name = virtual? ? key.to_s : @data.header_text
|
276
|
+
name = name.gsub(/(^[a-z]|\s[a-z])/) {|m| m.capitalize }
|
277
|
+
"#{name} Column"
|
278
|
+
end
|
191
279
|
end
|
192
280
|
|
193
281
|
# Extracts the imported values for this column and returns them in an array.
|
@@ -30,9 +30,9 @@ class Importer
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
# Normally, we'd check the scopes and return the proper data, but for CSV files,
|
34
|
-
# there's only one scope...
|
35
33
|
def load_raw(scopes, &block)
|
34
|
+
# Normally, we'd check the scopes and return the proper data, but for CSV files,
|
35
|
+
# there's only one scope...
|
36
36
|
block.call(@raw_rows)
|
37
37
|
end
|
38
38
|
|
@@ -182,8 +182,14 @@ class Importer
|
|
182
182
|
!@importer.has_errors?
|
183
183
|
end
|
184
184
|
|
185
|
-
# Load up the
|
185
|
+
# Load up the sheet in the correct mode
|
186
186
|
def load_each(mode, source, scopes, &block)
|
187
|
+
# Handle some common error cases centrally
|
188
|
+
if mode == :file && !File.exist?(source)
|
189
|
+
add_error("File not found: #{source}")
|
190
|
+
return
|
191
|
+
end
|
192
|
+
|
187
193
|
# Let our derived classes open the file, etc. as they need
|
188
194
|
if init_source(mode, source)
|
189
195
|
# Once the source is set, run through each defined sheet, pass it to
|
@@ -209,12 +215,11 @@ class Importer
|
|
209
215
|
# handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
|
210
216
|
# being parsed correctly, add a custom #parse block on your Column definition.
|
211
217
|
def parse_value(val, type)
|
212
|
-
return nil if val.nil? || val.to_s == ''
|
218
|
+
return nil if val.nil? || val.to_s.strip == ''
|
213
219
|
|
214
220
|
case type
|
215
221
|
when :string then
|
216
222
|
val = val.to_s.strip
|
217
|
-
val.blank? ? nil : val
|
218
223
|
|
219
224
|
when :integer, :int then
|
220
225
|
if val.class < Numeric
|
data/lib/iron/import/error.rb
CHANGED
@@ -1,14 +1,65 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
3
|
class Error
|
4
|
-
|
5
|
-
attr_reader :row, :text
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
attr_reader :row, :column, :value, :text
|
6
|
+
|
7
|
+
# Block wrapper to set error context for any errors generated within the block
|
8
|
+
def self.with_context(importer, row, column, val)
|
9
|
+
# Set new context
|
10
|
+
old_row = @context_row
|
11
|
+
@context_row = row
|
12
|
+
old_col = @context_column
|
13
|
+
@context_column = column
|
14
|
+
old_val = @context_value
|
15
|
+
@context_value = val
|
16
|
+
old_err = @error_occurred
|
17
|
+
@error_occurred = false
|
18
|
+
|
19
|
+
# Run the block, catch raised exceptions as errors
|
20
|
+
begin
|
21
|
+
yield
|
22
|
+
rescue RuntimeError => e
|
23
|
+
# Old-style way of registering errors was to just raise 'foo'
|
24
|
+
importer.add_error(e.to_s)
|
10
25
|
end
|
26
|
+
had_error = @error_occurred
|
27
|
+
|
28
|
+
# Reset to old context
|
29
|
+
@context_row = old_row
|
30
|
+
@context_column = old_col
|
31
|
+
@context_value = old_val
|
32
|
+
@error_occurred = old_err
|
33
|
+
|
34
|
+
return had_error
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.context_row
|
38
|
+
@context_row
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.context_column
|
42
|
+
@context_column
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.context_value
|
46
|
+
@context_value
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.error_occurred!
|
50
|
+
@error_occurred = true
|
51
|
+
end
|
52
|
+
|
53
|
+
def initialize(text, context = {})
|
11
54
|
@text = text.to_s
|
55
|
+
@row = context[:row] || Error.context_row
|
56
|
+
@column = context[:column] || Error.context_column
|
57
|
+
@value = context[:value] || Error.context_value
|
58
|
+
|
59
|
+
@row.errors << self if @row
|
60
|
+
@column.errors << self if @column
|
61
|
+
|
62
|
+
Error.error_occurred!
|
12
63
|
end
|
13
64
|
|
14
65
|
def summary
|
@@ -39,7 +90,7 @@ class Importer
|
|
39
90
|
end
|
40
91
|
|
41
92
|
# Returns true if this error is for the given context, where
|
42
|
-
# context can be a Row
|
93
|
+
# context can be a Row or Importer instance.
|
43
94
|
def for_context?(context)
|
44
95
|
case context
|
45
96
|
when Row
|
@@ -13,12 +13,7 @@ class Importer
|
|
13
13
|
if mode == :stream
|
14
14
|
@html = Nokogiri::HTML(source)
|
15
15
|
elsif mode == :file
|
16
|
-
|
17
|
-
@html = File.open(source) {|f| Nokogiri::HTML(f) }
|
18
|
-
else
|
19
|
-
add_error("File not found: #{source}")
|
20
|
-
return false
|
21
|
-
end
|
16
|
+
@html = File.open(source) {|f| Nokogiri::HTML(f) }
|
22
17
|
else
|
23
18
|
add_error("Unsupported HTML mode: #{mode}")
|
24
19
|
return false
|
data/lib/iron/import/importer.rb
CHANGED
@@ -21,21 +21,51 @@
|
|
21
21
|
# A more realistic and complex example follows:
|
22
22
|
#
|
23
23
|
# Importer.build do
|
24
|
-
# # Define our columns and settings
|
24
|
+
# # Define our columns and their settings
|
25
25
|
# column :order_number do
|
26
|
-
#
|
26
|
+
# optional!
|
27
|
+
# header /order (\#|num.*|id)/i
|
27
28
|
# type :int
|
28
29
|
# end
|
30
|
+
# column :po_number do
|
31
|
+
# optional!
|
32
|
+
# type :string
|
33
|
+
# validate do |num|
|
34
|
+
# num.match(/[a-z0-9]{12}/i)
|
35
|
+
# end
|
36
|
+
# end
|
29
37
|
# column :date do
|
30
38
|
# type :date
|
31
39
|
# end
|
32
40
|
# column :amount do
|
33
41
|
# type :cents
|
34
42
|
# end
|
43
|
+
# virtual_column :tax do
|
44
|
+
# calculate do |row|
|
45
|
+
# row[:amount] * 0.05
|
46
|
+
# end
|
47
|
+
# end
|
35
48
|
#
|
49
|
+
# # When you have optional columns, you can validate that you have enough of them
|
50
|
+
# # using a custom block returning true if the found columns are good enough to
|
51
|
+
# # continue.
|
52
|
+
# validate_columns do |cols|
|
53
|
+
# # Require either an order # or a PO # column
|
54
|
+
# keys = cols.collect(&:key)
|
55
|
+
# keys.include?(:order_number) || keys.include?(:po_number)
|
56
|
+
# end
|
57
|
+
#
|
36
58
|
# # Filter out any rows missing an order number
|
37
59
|
# filter do |row|
|
38
|
-
# !row[:order_number].nil?
|
60
|
+
# !row[:order_number].nil? || !row[:po_number].nil?
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# # Use row-level validation to validate using
|
64
|
+
# # any or all column values for that row, to allow complex validation
|
65
|
+
# # scenarios that depend on the full context.
|
66
|
+
# validate_rows do |row|
|
67
|
+
# # Ensure PO Numbers are only valid starting in 2017
|
68
|
+
# add_error 'Invalid order - PO Num from before 2017' unless (row[:date] > Date.parse('2017-01-01') || row[:po_number].nil?)
|
39
69
|
# end
|
40
70
|
#
|
41
71
|
# end.import('/path/to/file.csv', format: :csv) do |row|
|
@@ -52,17 +82,16 @@ class Importer
|
|
52
82
|
|
53
83
|
# Inner class for holding load-time data that gets reset on each load call
|
54
84
|
class Data
|
55
|
-
attr_accessor :start_row, :rows
|
85
|
+
attr_accessor :start_row, :rows, :errors
|
56
86
|
def initialize
|
57
87
|
@start_row = nil
|
58
88
|
@rows = []
|
89
|
+
@errors = []
|
59
90
|
end
|
60
91
|
end
|
61
92
|
|
62
93
|
# Array of defined columns
|
63
94
|
attr_reader :columns
|
64
|
-
# Array of error messages collected during an import/process run
|
65
|
-
attr_accessor :errors
|
66
95
|
# Custom reader, if one has been defined using #on_file or #on_stream
|
67
96
|
attr_reader :custom_reader
|
68
97
|
# Set to the format selected during past import
|
@@ -81,6 +110,8 @@ class Importer
|
|
81
110
|
# Set to a block/lambda taking a parsed but unvalidated row as a hash,
|
82
111
|
# return true to keep, false to skip.
|
83
112
|
dsl_accessor :filter
|
113
|
+
# Alias for #filter
|
114
|
+
def filter_rows(*args, &block); filter(*args, &block); end
|
84
115
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
85
116
|
dsl_accessor :encoding
|
86
117
|
|
@@ -113,14 +144,15 @@ class Importer
|
|
113
144
|
# headerless!
|
114
145
|
#
|
115
146
|
# # Manually set the start row for data, defaults to nil
|
116
|
-
# # indicating that the data rows start immediatly following the header
|
147
|
+
# # indicating that the data rows start immediatly following the header, or
|
148
|
+
# # at the first row if #headerless!.
|
117
149
|
# start_row 4
|
118
150
|
#
|
119
151
|
# # Define a filter that will skip unneeded rows. The filter command takes
|
120
152
|
# # a block that receives the parsed (but not validated!) row data as an
|
121
153
|
# # associative hash of :col_key => <parsed value>, and returns
|
122
154
|
# # true to keep the row or false to exclude it.
|
123
|
-
#
|
155
|
+
# filter_rows do |row|
|
124
156
|
# row[:id].to_i > 5000
|
125
157
|
# end
|
126
158
|
#
|
@@ -171,6 +203,7 @@ class Importer
|
|
171
203
|
# Use whichever you prefer!
|
172
204
|
def column(key, options_hash = {}, &block)
|
173
205
|
# Find existing column with key to allow re-opening an existing definition
|
206
|
+
key = key.to_sym
|
174
207
|
col = @columns.detect {|c| c.key == key }
|
175
208
|
unless col
|
176
209
|
# if none found, add a new one
|
@@ -184,6 +217,11 @@ class Importer
|
|
184
217
|
col
|
185
218
|
end
|
186
219
|
|
220
|
+
def virtual_column(key, options_hash = {}, &block)
|
221
|
+
options_hash[:virtual] = true
|
222
|
+
column(key, options_hash, &block)
|
223
|
+
end
|
224
|
+
|
187
225
|
# Limit the search scope for a single format (:xls, :xlsx, :html, :custom)
|
188
226
|
# to the given value or values - the meaning and format of scopes is determined
|
189
227
|
# by that format's data reader.
|
@@ -213,7 +251,7 @@ class Importer
|
|
213
251
|
# a block accepting a file path, and returning an array of arrays (rows of
|
214
252
|
# raw column values). Use #add_error(msg) to add a reading error.
|
215
253
|
#
|
216
|
-
# Adding a custom stream parser will change the importer's default
|
254
|
+
# Adding a custom file/stream parser will change the importer's default
|
217
255
|
# format to :custom, though you can override it when calling #import as
|
218
256
|
# usual.
|
219
257
|
#
|
@@ -268,7 +306,7 @@ class Importer
|
|
268
306
|
# encoding: source encoding override, defaults to guessing based on input
|
269
307
|
#
|
270
308
|
# Generally, you should be able to throw a path or stream at it and it should work. The
|
271
|
-
# options exist to allow overriding in cases where the
|
309
|
+
# options exist to allow overriding in cases where the automated heuristics
|
272
310
|
# have failed and the input type is known by the caller.
|
273
311
|
#
|
274
312
|
# If you're trying to import from a raw string, use Importer#import_string instead.
|
@@ -362,7 +400,7 @@ class Importer
|
|
362
400
|
# Use this form of import for the common case of having a raw CSV or HTML string.
|
363
401
|
def import_string(string, options = {}, &block)
|
364
402
|
# Get a format here if needed
|
365
|
-
if options[:format].nil?
|
403
|
+
if options[:format].nil? || options[:format] == :auto
|
366
404
|
if @custom_reader
|
367
405
|
format = :custom
|
368
406
|
else
|
@@ -378,8 +416,8 @@ class Importer
|
|
378
416
|
# Call with a block accepting a single Importer::Row with contents that
|
379
417
|
# look like :column_key => <parsed value>. Any filtered rows
|
380
418
|
# will not be present. If you want to register an error, simply
|
381
|
-
# raise "some text" and it will be added to the importer's
|
382
|
-
# list for display to the user, logging, or whatever.
|
419
|
+
# raise "some text" or call #add_error and it will be added to the importer's
|
420
|
+
# error list for display to the user, logging, or whatever.
|
383
421
|
def process
|
384
422
|
@data.rows.each do |row|
|
385
423
|
begin
|
@@ -390,20 +428,46 @@ class Importer
|
|
390
428
|
end
|
391
429
|
end
|
392
430
|
|
431
|
+
# Call with a block to process error handling tasks. Block will only execute
|
432
|
+
# if an error (read, validate, exception, etc.) has occurred during the
|
433
|
+
# just-completed #import.
|
434
|
+
#
|
435
|
+
# Your block can access the #error_summary or the #errors array to do whatever
|
436
|
+
# logging, reporting etc. is desired.
|
393
437
|
def on_error(&block)
|
394
438
|
raise 'Invalid block passed to Importer#on_error: block may accept 0, 1 or 2 arguments' if block.arity > 2
|
395
439
|
|
396
440
|
if has_errors?
|
397
441
|
case block.arity
|
398
442
|
when 0 then DslProxy.exec(self, &block)
|
399
|
-
when 1 then DslProxy.exec(self,
|
400
|
-
when 2 then DslProxy.exec(self,
|
443
|
+
when 1 then DslProxy.exec(self, errors, &block)
|
444
|
+
when 2 then DslProxy.exec(self, errors, error_summary, &block)
|
401
445
|
end
|
402
446
|
end
|
403
447
|
|
404
448
|
self
|
405
449
|
end
|
406
450
|
|
451
|
+
# Call with a block accepting an array of Column objects and returning
|
452
|
+
# true if the columns in the array should constitute a valid header row. Intended
|
453
|
+
# for use with optional columns to define multiple supported column sets, or
|
454
|
+
# conditionally required secondary columns. Columns will be passed in in the
|
455
|
+
# order detected, so you can use ordering to help determine which columns are
|
456
|
+
# required if that helps.
|
457
|
+
def validate_columns(&block)
|
458
|
+
raise 'Invalid block passed to Importer#validate_columns: block should accept a single argument' if block.arity != 1
|
459
|
+
@column_validator = block
|
460
|
+
end
|
461
|
+
|
462
|
+
# Call with a block accepting a single Row instance. Just like Column#validate, you
|
463
|
+
# can fail by returning false, calling #add_error(msg) or by raising an exception.
|
464
|
+
# The intent of this method of validation is to allow using the full row context to
|
465
|
+
# validate
|
466
|
+
def validate_rows(&block)
|
467
|
+
raise 'Invalid block passed to Importer#validate_columns: block should accept a single Row argument' if block.arity != 1
|
468
|
+
@row_validator = block
|
469
|
+
end
|
470
|
+
|
407
471
|
# Process the raw values for the first rows in a sheet,
|
408
472
|
# and attempt to build a map of the column layout, and
|
409
473
|
# detect the first row of real data
|
@@ -419,7 +483,7 @@ class Importer
|
|
419
483
|
next_index += 1
|
420
484
|
end
|
421
485
|
@data.start_row = @start_row || 1
|
422
|
-
@missing_headers =
|
486
|
+
@missing_headers = []
|
423
487
|
return true
|
424
488
|
|
425
489
|
else
|
@@ -430,21 +494,42 @@ class Importer
|
|
430
494
|
next unless row
|
431
495
|
|
432
496
|
# Set up for this iteration
|
433
|
-
remaining = @columns.
|
497
|
+
remaining = @columns.select {|c| !c.virtual? }
|
434
498
|
|
435
499
|
# Step through this row's raw values, and look for a matching column for all columns
|
436
500
|
row.each_with_index do |val, i|
|
437
|
-
|
501
|
+
val = val.to_s
|
502
|
+
col = remaining.detect {|c| c.match_header?(val, i) }
|
438
503
|
if col
|
439
504
|
remaining -= [col]
|
440
505
|
col.data.index = i
|
506
|
+
col.data.header_text = val
|
441
507
|
end
|
442
508
|
end
|
509
|
+
# Reset remaining cols
|
510
|
+
remaining.each do |col|
|
511
|
+
col.data.index = nil
|
512
|
+
col.data.header_text = nil
|
513
|
+
end
|
443
514
|
|
444
|
-
|
515
|
+
# Have we found them all, or at least a valid sub-set?
|
516
|
+
header_found = remaining.empty?
|
517
|
+
unless header_found
|
518
|
+
if remaining.all?(&:optional?)
|
519
|
+
if @column_validator
|
520
|
+
# Run custom column validator
|
521
|
+
cols = found_columns
|
522
|
+
header_found = @column_validator.call(cols)
|
523
|
+
else
|
524
|
+
# No validator... do we have any found columns at all???
|
525
|
+
header_found = @columns.any?(&:present?)
|
526
|
+
end
|
527
|
+
end
|
528
|
+
end
|
529
|
+
if header_found
|
445
530
|
# Found all columns, have a map, update our start row to be the next line and return!
|
446
531
|
@data.start_row = @start_row || i+2
|
447
|
-
@missing_headers =
|
532
|
+
@missing_headers = []
|
448
533
|
return true
|
449
534
|
else
|
450
535
|
missing = remaining if (missing.nil? || missing.count > remaining.count)
|
@@ -452,7 +537,7 @@ class Importer
|
|
452
537
|
end
|
453
538
|
|
454
539
|
# If we get here, we're hosed
|
455
|
-
@missing_headers = missing.collect(&:key) if @missing_headers.
|
540
|
+
@missing_headers = missing.collect(&:key) if @missing_headers.empty? || @missing_headers.count > missing.count
|
456
541
|
false
|
457
542
|
end
|
458
543
|
end
|
@@ -467,46 +552,81 @@ class Importer
|
|
467
552
|
# Parse out the values
|
468
553
|
values = {}
|
469
554
|
@columns.each do |col|
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
555
|
+
if col.present? && !col.virtual?
|
556
|
+
index = col.data.index
|
557
|
+
raw_val = raw_data[index]
|
558
|
+
if col.parses?
|
559
|
+
# Use custom parser if this row has one
|
560
|
+
val = col.parse_value(row, raw_val)
|
561
|
+
else
|
562
|
+
# Otherwise use our standard parser
|
563
|
+
val = @reader.parse_value(raw_val, col.type)
|
564
|
+
end
|
565
|
+
values[col.key] = val
|
478
566
|
end
|
479
|
-
values[col.key] = val
|
480
567
|
end
|
481
568
|
|
482
|
-
# Set the values
|
569
|
+
# Set the values
|
483
570
|
row.set_values(values)
|
484
|
-
|
571
|
+
|
572
|
+
if !row.has_errors?
|
573
|
+
# Filter if needed
|
574
|
+
return nil if @filter && !@filter.call(row)
|
485
575
|
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
576
|
+
# Calculate virtual columns' values
|
577
|
+
@columns.each do |col|
|
578
|
+
if col.virtual?
|
579
|
+
row.values[col.key] = col.calculate_value(row)
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
# Validate values if any column has a custom validator
|
584
|
+
@columns.each do |col|
|
585
|
+
if col.present? && col.validates?
|
586
|
+
val = values[col.key]
|
587
|
+
col.validate_value(row, val)
|
588
|
+
end
|
589
|
+
end
|
491
590
|
|
591
|
+
# If we have a row validator, call it on the full row
|
592
|
+
if @row_validator && !row.has_errors?
|
593
|
+
valid = false
|
594
|
+
had_error = Error.with_context(@importer, row, nil, nil) do
|
595
|
+
valid = DslProxy.exec(self, row, &@row_validator)
|
596
|
+
end
|
597
|
+
if !had_error && valid.is_a?(FalseClass)
|
598
|
+
add_error("Invalid row: #{row.to_hash.inspect}", :row => row)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
492
603
|
# We is good
|
493
604
|
@data.rows << row
|
494
605
|
row
|
495
606
|
end
|
607
|
+
|
608
|
+
def rows
|
609
|
+
@data.rows
|
610
|
+
end
|
611
|
+
|
612
|
+
def found_columns
|
613
|
+
@columns.select(&:present?).sort_by(&:index)
|
614
|
+
end
|
615
|
+
|
616
|
+
# Array of error messages collected during an import/process run
|
617
|
+
def errors
|
618
|
+
@data.errors
|
619
|
+
end
|
496
620
|
|
497
621
|
# When true, one or more errors have been recorded during this import/process
|
498
622
|
# cycle.
|
499
623
|
def has_errors?
|
500
|
-
@errors.any?
|
624
|
+
@data.errors.any?
|
501
625
|
end
|
502
626
|
|
503
627
|
# Add an error to our error list. Will result in a failed import.
|
504
|
-
def add_error(
|
505
|
-
|
506
|
-
msg = context
|
507
|
-
context = nil
|
508
|
-
end
|
509
|
-
@errors << Error.new(context, msg)
|
628
|
+
def add_error(msg, context = {})
|
629
|
+
@data.errors << Error.new(msg, context)
|
510
630
|
end
|
511
631
|
|
512
632
|
# Returns a human-readable summary of the errors present on the importer, or
|
@@ -517,7 +637,7 @@ class Importer
|
|
517
637
|
|
518
638
|
# Group by error text - we often get the same error dozens of times
|
519
639
|
list = {}
|
520
|
-
@errors.each do |err|
|
640
|
+
@data.errors.each do |err|
|
521
641
|
errs = list[err.text] || []
|
522
642
|
errs << err
|
523
643
|
list[err.text] = errs
|
@@ -544,8 +664,7 @@ class Importer
|
|
544
664
|
protected
|
545
665
|
|
546
666
|
def reset
|
547
|
-
@
|
548
|
-
@missing_headers = nil
|
667
|
+
@missing_headers = []
|
549
668
|
@format = nil
|
550
669
|
@reader = nil
|
551
670
|
@data = Data.new
|
data/lib/iron/import/row.rb
CHANGED
@@ -2,12 +2,14 @@ class Importer
|
|
2
2
|
|
3
3
|
class Row
|
4
4
|
|
5
|
-
attr_reader :line, :values
|
5
|
+
attr_reader :line, :values, :errors
|
6
6
|
|
7
7
|
def initialize(importer, line, value_hash = nil)
|
8
8
|
@importer = importer
|
9
9
|
@line = line
|
10
10
|
set_values(value_hash)
|
11
|
+
|
12
|
+
@errors = []
|
11
13
|
end
|
12
14
|
|
13
15
|
def set_values(value_hash)
|
@@ -58,6 +60,19 @@ class Importer
|
|
58
60
|
@importer.add_error(self, msg)
|
59
61
|
end
|
60
62
|
|
63
|
+
def has_errors?
|
64
|
+
@errors && @errors.count > 0
|
65
|
+
end
|
66
|
+
|
67
|
+
# Return a map of column key to Error, intended for use in error reporting.
|
68
|
+
def error_map
|
69
|
+
map = {}
|
70
|
+
@errors.each do |err|
|
71
|
+
map[err.column.key] = err
|
72
|
+
end
|
73
|
+
map
|
74
|
+
end
|
75
|
+
|
61
76
|
end
|
62
77
|
|
63
78
|
end
|
@@ -112,4 +112,29 @@ describe Importer::Column do
|
|
112
112
|
@importer.has_errors?.should be_true
|
113
113
|
end
|
114
114
|
|
115
|
+
it 'should record optionalness' do
|
116
|
+
@col.optional?.should be_false
|
117
|
+
@col.optional!
|
118
|
+
@col.optional?.should be_true
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'should know if it is present in the headers' do
|
122
|
+
@col.present?.should be_false
|
123
|
+
@col.data.index = 2
|
124
|
+
@col.present?.should be_true
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'should use the header text as its name if present' do
|
128
|
+
@col.data.index = 2
|
129
|
+
@col.to_s.should == 'Column C'
|
130
|
+
@col.data.header_text = 'Invoice #'
|
131
|
+
@col.to_s.should == 'Invoice # Column'
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'should support virtual operation' do
|
135
|
+
@col.virtual!
|
136
|
+
@col.virtual?.should be_true
|
137
|
+
@col.to_s.should == 'Test Column'
|
138
|
+
end
|
139
|
+
|
115
140
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
describe Importer::Error do
|
2
|
+
|
3
|
+
before do
|
4
|
+
@importer = Importer.new
|
5
|
+
@row = Importer::Row.new(@importer, 5)
|
6
|
+
@col = Importer::Column.new(@importer, :test)
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should capture context' do
|
10
|
+
val = 'foo'
|
11
|
+
err = nil
|
12
|
+
Importer::Error.with_context(@importer, @row, @col, val) do
|
13
|
+
err = Importer::Error.new('hi')
|
14
|
+
end
|
15
|
+
err.row.should == @row
|
16
|
+
err.column.should == @col
|
17
|
+
err.value.should == val
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should return error status for #with_context' do
|
21
|
+
# Block runs fine, no error
|
22
|
+
had_err = Importer::Error.with_context(@importer, @row, @col, 'bob') do
|
23
|
+
false
|
24
|
+
end
|
25
|
+
had_err.should be_false
|
26
|
+
|
27
|
+
# Create a new error, we should get a true
|
28
|
+
had_err = Importer::Error.with_context(@importer, @row, @col, 'bob') do
|
29
|
+
Importer::Error.new('hi')
|
30
|
+
end
|
31
|
+
had_err.should be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -23,6 +23,21 @@ describe Importer do
|
|
23
23
|
importer.scopes.should == { :xls => [1, 'Sheet 2'], :html => ['table.funny'] }
|
24
24
|
end
|
25
25
|
|
26
|
+
it 'should calculate virtual columns' do
|
27
|
+
importer = Importer.build do
|
28
|
+
column :num, :type => :int
|
29
|
+
virtual_column :summary do
|
30
|
+
calculate do |row|
|
31
|
+
"Value = #{row[:num]}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
importer.import_string("num\n1\n2")
|
37
|
+
importer.error_summary.should be_nil
|
38
|
+
importer.column(:summary).to_a.should == ['Value = 1', 'Value = 2']
|
39
|
+
end
|
40
|
+
|
26
41
|
it 'should find headers automatically' do
|
27
42
|
# Define a few sample columns
|
28
43
|
importer = Importer.new
|
@@ -57,6 +72,59 @@ describe Importer do
|
|
57
72
|
importer.missing_headers.should == [:alpha]
|
58
73
|
end
|
59
74
|
|
75
|
+
it 'should succeed when missing optional columns' do
|
76
|
+
# Define a few sample columns
|
77
|
+
importer = Importer.new
|
78
|
+
importer.column(:alpha).optional!
|
79
|
+
importer.column(:beta)
|
80
|
+
importer.column(:gamma)
|
81
|
+
# Some dummy data
|
82
|
+
rows = [
|
83
|
+
['Bob', 'Beta', 'Gamma', 'Epsilon']
|
84
|
+
]
|
85
|
+
|
86
|
+
# Parse it!
|
87
|
+
importer.find_header(rows).should be_true
|
88
|
+
importer.missing_headers.should be_empty
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should support row-based validation' do
|
92
|
+
importer = Importer.build do
|
93
|
+
column :a, :type => :int
|
94
|
+
column :b, :type => :int
|
95
|
+
|
96
|
+
validate_rows do |row|
|
97
|
+
row[:a] + row[:b] == 5
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
importer.import_string("a,b\n1,4\n6,-1\n7,0\n1,1")
|
102
|
+
importer.errors.count.should == 2
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should support column order/presence validation' do
|
106
|
+
# Build an importer with optional columns
|
107
|
+
importer = Importer.new
|
108
|
+
importer.column(:alpha).optional!
|
109
|
+
importer.column(:beta).optional!
|
110
|
+
importer.column(:gamma)
|
111
|
+
# Set up a column validator
|
112
|
+
importer.validate_columns do |cols|
|
113
|
+
cols = cols.collect(&:key)
|
114
|
+
cols.sort == [:alpha, :gamma] || cols.sort == [:beta, :gamma]
|
115
|
+
end
|
116
|
+
|
117
|
+
# Missing required column
|
118
|
+
importer.find_header([['Alpha', 'Beta', 'Epsilon']]).should be_false
|
119
|
+
# Missing both optional
|
120
|
+
importer.find_header([['Bob', 'Gamma', 'Epsilon']]).should be_false
|
121
|
+
# Required + single optional
|
122
|
+
importer.find_header([['Bob', 'Gamma', 'Alpha']]).should be_true
|
123
|
+
importer.find_header([['Bob', 'Gamma', 'Beta']]).should be_true
|
124
|
+
# Required + both optional
|
125
|
+
importer.find_header([['Alpha', 'Gamma', 'Beta']]).should be_true
|
126
|
+
end
|
127
|
+
|
60
128
|
it 'should capture errors' do
|
61
129
|
importer = Importer.build do
|
62
130
|
column :foo
|
@@ -108,7 +176,7 @@ describe Importer do
|
|
108
176
|
it 'should import a string' do
|
109
177
|
sum = 0
|
110
178
|
csv = "one,two\n1,2"
|
111
|
-
Importer.build do
|
179
|
+
importer = Importer.build do
|
112
180
|
column :one
|
113
181
|
column :two
|
114
182
|
end.import_string(csv, :format => :csv) do |rows|
|
@@ -117,6 +185,7 @@ describe Importer do
|
|
117
185
|
sum = rows[:one].to_i + rows[:two].to_i
|
118
186
|
end
|
119
187
|
# Just make sure we ran correctly
|
188
|
+
importer.column(:one).to_s.should == 'One Column'
|
120
189
|
sum.should == 3
|
121
190
|
end
|
122
191
|
|
@@ -131,5 +200,54 @@ describe Importer do
|
|
131
200
|
importer.import_string("<div><table><tr><td>one</td></tr></table></div>")
|
132
201
|
importer.format.should == :html
|
133
202
|
end
|
203
|
+
|
204
|
+
it 'should capture errors with context' do
|
205
|
+
sum = 0
|
206
|
+
csv = "one,two,three\n1,2,X\n1,,3"
|
207
|
+
importer = Importer.build do
|
208
|
+
column :one
|
209
|
+
column :two do
|
210
|
+
validate do |val|
|
211
|
+
val.to_i == 2
|
212
|
+
end
|
213
|
+
end
|
214
|
+
column :three do
|
215
|
+
validate do |val|
|
216
|
+
add_error('Invalid value') unless val.to_i > 0
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
importer.import_string(csv)
|
221
|
+
|
222
|
+
# Just make sure we ran correctly
|
223
|
+
importer.errors.count.should == 2
|
224
|
+
importer.column(:two).errors.count.should == 1
|
225
|
+
importer.column(:three).errors.count.should == 1
|
226
|
+
importer.column(:three).error_values.should == ['X']
|
227
|
+
map = importer.rows.first.error_map
|
228
|
+
map[:two].should be_nil
|
229
|
+
map[:three].should be_a(Importer::Error)
|
230
|
+
end
|
231
|
+
|
232
|
+
it 'should import properly when optional columns are missing' do
|
233
|
+
csv = "one,two\n1,2\n1,"
|
234
|
+
importer = Importer.build do
|
235
|
+
column :one
|
236
|
+
column :two do
|
237
|
+
validate do |val|
|
238
|
+
val.to_i == 2
|
239
|
+
end
|
240
|
+
end
|
241
|
+
column :three do
|
242
|
+
optional!
|
243
|
+
validate do |val|
|
244
|
+
add_error('Invalid value') unless val.to_i > 0
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
importer.import_string(csv)
|
249
|
+
|
250
|
+
importer.found_columns.count.should == 2
|
251
|
+
end
|
134
252
|
|
135
253
|
end
|
data/spec/importer/row_spec.rb
CHANGED
@@ -33,6 +33,11 @@ describe Importer::Row do
|
|
33
33
|
@row.should be_empty
|
34
34
|
end
|
35
35
|
|
36
|
+
it 'should return nil on missing data' do
|
37
|
+
@row.set_values(:a => 1, :b => 2)
|
38
|
+
@row[:c].should be_nil
|
39
|
+
end
|
40
|
+
|
36
41
|
it 'should not change when to_hash values are changed' do
|
37
42
|
@row.set_values(:a => 1, :b => 2)
|
38
43
|
hash = @row.to_hash
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- spec/importer/csv_reader_spec.rb
|
118
118
|
- spec/importer/custom_reader_spec.rb
|
119
119
|
- spec/importer/data_reader_spec.rb
|
120
|
+
- spec/importer/error_spec.rb
|
120
121
|
- spec/importer/html_reader_spec.rb
|
121
122
|
- spec/importer/importer_spec.rb
|
122
123
|
- spec/importer/row_spec.rb
|
@@ -156,5 +157,5 @@ rubyforge_project:
|
|
156
157
|
rubygems_version: 2.4.3
|
157
158
|
signing_key:
|
158
159
|
specification_version: 4
|
159
|
-
summary: CSV, HTML, XLS, and XLSX import
|
160
|
+
summary: CSV, HTML, XLS, and XLSX import processing support
|
160
161
|
test_files: []
|