iron-import 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +16 -1
- data/README.rdoc +43 -16
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +27 -14
- data/lib/iron/import/csv_reader.rb +4 -4
- data/lib/iron/import/custom_reader.rb +14 -8
- data/lib/iron/import/data_reader.rb +42 -30
- data/lib/iron/import/error.rb +4 -16
- data/lib/iron/import/excel_reader.rb +69 -0
- data/lib/iron/import/html_reader.rb +78 -0
- data/lib/iron/import/importer.rb +432 -103
- data/lib/iron/import/row.rb +15 -11
- data/lib/iron/import/xls_reader.rb +3 -37
- data/lib/iron/import/xlsx_reader.rb +2 -37
- data/lib/iron/import.rb +2 -1
- data/spec/importer/column_spec.rb +4 -5
- data/spec/importer/csv_reader_spec.rb +1 -1
- data/spec/importer/custom_reader_spec.rb +6 -10
- data/spec/importer/data_reader_spec.rb +6 -5
- data/spec/importer/html_reader_spec.rb +105 -0
- data/spec/importer/importer_spec.rb +107 -0
- data/spec/importer/row_spec.rb +9 -2
- data/spec/importer/xls_reader_spec.rb +77 -0
- data/spec/importer/xlsx_reader_spec.rb +2 -3
- data/spec/samples/3-sheets.xls +0 -0
- data/spec/samples/col-span.html +29 -0
- data/spec/samples/html-th-td.html +11 -0
- data/spec/samples/multi-table.html +29 -0
- data/spec/samples/nanodrop.xlsx +0 -0
- data/spec/samples/scores.html +30 -0
- data/spec/samples/simple.html +14 -0
- data/spec/spec_helper.rb +1 -0
- metadata +30 -8
- data/lib/iron/import/sheet.rb +0 -263
- data/spec/importer/sheet_spec.rb +0 -65
data/lib/iron/import/importer.rb
CHANGED
@@ -18,119 +18,242 @@
|
|
18
18
|
# end
|
19
19
|
# end
|
20
20
|
#
|
21
|
-
# The row.all? call will verify that each row passed contains a value for all defined columns.
|
22
|
-
#
|
23
21
|
# A more realistic and complex example follows:
|
24
22
|
#
|
25
|
-
#
|
23
|
+
# Importer.build do
|
24
|
+
# # Define our columns and settings
|
26
25
|
# column :order_number do
|
27
|
-
#
|
26
|
+
# header /order (num.*|id)/i
|
27
|
+
# type :int
|
28
28
|
# end
|
29
|
-
# column :date
|
30
|
-
#
|
29
|
+
# column :date do
|
30
|
+
# type :date
|
31
|
+
# end
|
32
|
+
# column :amount do
|
33
|
+
# type :cents
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# # Filter out any rows missing an order number
|
37
|
+
# filter do |row|
|
38
|
+
# !row[:order_number].nil?
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# end.import('/path/to/file.csv', format: :csv) do |row|
|
42
|
+
# # Process each row as basically a hash of :column_key => value,
|
43
|
+
# # only called on import success
|
44
|
+
# Order.create(row.to_hash)
|
45
|
+
#
|
46
|
+
# end.on_error do
|
47
|
+
# # If we have any errors, do something
|
48
|
+
# raise error_summary
|
31
49
|
# end
|
32
50
|
#
|
33
51
|
class Importer
|
34
52
|
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
53
|
+
# Inner class for holding load-time data that gets reset on each load call
|
54
|
+
class Data
|
55
|
+
attr_accessor :start_row, :rows
|
56
|
+
def initialize
|
57
|
+
@start_row = nil
|
58
|
+
@rows = []
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Array of defined columns
|
63
|
+
attr_reader :columns
|
64
|
+
# Array of error messages collected during an import/process run
|
65
|
+
attr_accessor :errors
|
66
|
+
# Custom reader, if one has been defined using #on_file or #on_stream
|
67
|
+
attr_reader :custom_reader
|
68
|
+
# Set to the format selected during past import
|
69
|
+
attr_reader :format
|
70
|
+
# Import data
|
71
|
+
attr_reader :data
|
72
|
+
# Missing headers post-import
|
73
|
+
attr_reader :missing_headers
|
74
|
+
|
75
|
+
# When true, skips header detection
|
76
|
+
dsl_flag :headerless
|
77
|
+
# Explicitly sets the row number (1-indexed) where data rows begin,
|
78
|
+
# usually left defaulted to nil to automatically start after the header
|
79
|
+
# row.
|
80
|
+
dsl_accessor :start_row
|
81
|
+
# Set to a block/lambda taking a parsed but unvalidated row as a hash,
|
82
|
+
# return true to keep, false to skip.
|
83
|
+
dsl_accessor :filter
|
39
84
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
40
85
|
dsl_accessor :encoding
|
41
86
|
|
87
|
+
# Create a new importer! See #build for details on what to do
|
88
|
+
# in the block.
|
42
89
|
def self.build(options = {}, &block)
|
43
90
|
importer = Importer.new(options)
|
44
91
|
importer.build(&block)
|
45
92
|
importer
|
46
93
|
end
|
47
94
|
|
95
|
+
# Ye standard constructor!
|
48
96
|
def initialize(options = {})
|
97
|
+
@scopes = {}
|
49
98
|
@encoding = 'UTF-8'
|
50
|
-
@
|
99
|
+
@headerless = false
|
100
|
+
|
101
|
+
@filter = nil
|
102
|
+
@columns = []
|
51
103
|
|
52
104
|
reset
|
53
105
|
end
|
54
106
|
|
55
|
-
#
|
56
|
-
#
|
107
|
+
# Call to define the importer's column configuration and other setup options.
|
108
|
+
#
|
109
|
+
# The following builder options are available:
|
110
|
+
#
|
111
|
+
# importer = Importer.build do
|
112
|
+
# # Don't try to look for a header using column definitions, there is no header
|
113
|
+
# headerless!
|
114
|
+
#
|
115
|
+
# # Manually set the start row for data, defaults to nil
|
116
|
+
# # indicating that the data rows start immediatly following the header.
|
117
|
+
# start_row 4
|
118
|
+
#
|
119
|
+
# # Define a filter that will skip unneeded rows. The filter command takes
|
120
|
+
# # a block that receives the parsed (but not validated!) row data as an
|
121
|
+
# # associative hash of :col_key => <parsed value>, and returns
|
122
|
+
# # true to keep the row or false to exclude it.
|
123
|
+
# filter do |row|
|
124
|
+
# row[:id].to_i > 5000
|
125
|
+
# end
|
126
|
+
#
|
127
|
+
# # If you need to process a type of input that isn't built in, define
|
128
|
+
# # a custom reader with #on_file or #on_stream
|
129
|
+
# on_file do |path|
|
130
|
+
# ... read file at path, return array of each row's raw column values ...
|
131
|
+
# end
|
132
|
+
#
|
133
|
+
# # Got a multi-block format like Excel or HTML? You can optionally limit
|
134
|
+
# # searching by setting a scope or scopes to search:
|
135
|
+
# scope :xls, 'Sheet 2'
|
136
|
+
# # Or set a bunch of scopes in one go:
|
137
|
+
# scopes :html => ['div > table.data', 'table.aux-data'],
|
138
|
+
# :xls => [2, 'Orders']
|
139
|
+
#
|
140
|
+
# # Of course, the main thing you're going to do is to define columns. See the
|
141
|
+
# # Column class' notes for options when defining a column. Note that
|
142
|
+
# # you can define columns using either hash-style:
|
143
|
+
# column :id, :type => :integer
|
144
|
+
# # or builder-style:
|
145
|
+
# column :name do
|
146
|
+
# header /company\s*name/i
|
147
|
+
# type :string
|
148
|
+
# end
|
149
|
+
# end
|
57
150
|
def build(&block)
|
58
151
|
DslProxy.exec(self, &block) if block
|
59
152
|
self
|
60
153
|
end
|
61
154
|
|
62
|
-
#
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# Access a Sheet definition by id (either number (1-N) or sheet name).
|
68
|
-
# Used during #build calls to define a sheet with a passed block, like so:
|
155
|
+
# Add a new column definition to our list, allows customizing the new
|
156
|
+
# column with a builder block. See Importer::Column docs for
|
157
|
+
# options. In lieu of a builder mode, you can pass the same values
|
158
|
+
# as key => value pairs in the options hash to this method, so:
|
69
159
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# end
|
75
|
-
# sheet('Orders') do
|
76
|
-
# column :id
|
77
|
-
# column :price
|
78
|
-
# filter do |row|
|
79
|
-
# row[:price].prensent?
|
80
|
-
# end
|
160
|
+
# column(:foo) do
|
161
|
+
# type :string
|
162
|
+
# parse do |val|
|
163
|
+
# val.to_s.upcase
|
81
164
|
# end
|
82
165
|
# end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
166
|
+
#
|
167
|
+
# Is equivalent to:
|
168
|
+
#
|
169
|
+
# column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
|
170
|
+
#
|
171
|
+
# Use whichever you prefer!
|
172
|
+
def column(key, options_hash = {}, &block)
|
173
|
+
# Find existing column with key to allow re-opening an existing definition
|
174
|
+
col = @columns.detect {|c| c.key == key }
|
175
|
+
unless col
|
176
|
+
# if none found, add a new one
|
177
|
+
col = Column.new(self, key, options_hash)
|
178
|
+
@columns << col
|
91
179
|
end
|
92
|
-
sheet = @sheets[id]
|
93
180
|
|
94
|
-
#
|
95
|
-
|
181
|
+
# Customize if needed
|
182
|
+
DslProxy::exec(col, &block) if block
|
96
183
|
|
97
|
-
|
98
|
-
|
184
|
+
col
|
185
|
+
end
|
186
|
+
|
187
|
+
# Limit the search scope for a single format (:xls, :xlsx, :html, :custom)
|
188
|
+
# to the given value or values - the meaning and format of scopes is determined
|
189
|
+
# by that format's data reader.
|
190
|
+
def scope(format, *scopes)
|
191
|
+
@scopes[format] = scopes.flatten
|
192
|
+
end
|
193
|
+
|
194
|
+
# Limit the search scope for more than one format at a time. For example, if
|
195
|
+
# you support both XLS and XLSX formats (and why wouldn't you?) then you
|
196
|
+
# could tell the importer to look only at the sheets named "Orders" and
|
197
|
+
# "Legacy Orders" like so:
|
198
|
+
#
|
199
|
+
# scopes :xls => ['Orders', 'Legacy Orders'],
|
200
|
+
# :xlsx => ['Orders', 'Legacy Orders']
|
201
|
+
#
|
202
|
+
def scopes(map = :__read__)
|
203
|
+
if map == :__read__
|
204
|
+
return @scopes
|
205
|
+
else
|
206
|
+
map.each_pair do |format, scope|
|
207
|
+
scope(format, scope)
|
208
|
+
end
|
209
|
+
end
|
99
210
|
end
|
100
211
|
|
101
|
-
# Define a custom file reader to implement your own
|
212
|
+
# Define a custom file reader to implement your own parsing. Pass
|
213
|
+
# a block accepting a file path, and returning an array of arrays (rows of
|
214
|
+
# raw column values). Use #add_error(msg) to add a reading error.
|
215
|
+
#
|
216
|
+
# Adding a custom stream parser will change the importer's default
|
217
|
+
# format to :custom, though you can override it when calling #import as
|
218
|
+
# usual.
|
219
|
+
#
|
220
|
+
# Only one of #on_file or #on_stream needs to be implemented - the importer
|
221
|
+
# will cross convert as needed!
|
222
|
+
#
|
223
|
+
# Example:
|
224
|
+
#
|
225
|
+
# on_file do |path|
|
226
|
+
# # Read a file line by line
|
227
|
+
# File.readlines(path).collect do |line|
|
228
|
+
# # Each line has colon-separated values, so split 'em up
|
229
|
+
# line.split(/\s*:\s*/)
|
230
|
+
# end
|
231
|
+
# end
|
232
|
+
#
|
102
233
|
def on_file(&block)
|
103
234
|
@custom_reader = CustomReader.new(self) unless @custom_reader
|
104
235
|
@custom_reader.set_reader(:file, block)
|
105
236
|
end
|
106
237
|
|
238
|
+
# Just like #on_file, but for streams. Pass
|
239
|
+
# a block accepting a stream, and returning an array of arrays (rows of
|
240
|
+
# raw column values). Use #add_error(msg) to add a reading error.
|
241
|
+
#
|
242
|
+
# Example:
|
243
|
+
#
|
244
|
+
# on_stream do |stream|
|
245
|
+
# # Stream contains rows separated by a | char
|
246
|
+
# stream.readlines('|').collect do |line|
|
247
|
+
# # Each line has 3 fields of 10 characters each
|
248
|
+
# [line[0...10], line[10...20], line[20...30]]
|
249
|
+
# end
|
250
|
+
# end
|
251
|
+
#
|
107
252
|
def on_stream(&block)
|
108
253
|
@custom_reader = CustomReader.new(self) unless @custom_reader
|
109
254
|
@custom_reader.set_reader(:stream, block)
|
110
255
|
end
|
111
256
|
|
112
|
-
# Very, very commonly we only want to deal with the default sheet. In this case,
|
113
|
-
# let folks skip the sheet(n) do ... end block wrapper and just define columns
|
114
|
-
# against the main importer. Internally, proxy those calls to the first sheet.
|
115
|
-
def column(*args, &block)
|
116
|
-
default_sheet.column(*args, &block)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Ditto for filters
|
120
|
-
def filter(*args, &block)
|
121
|
-
default_sheet.filter(*args, &block)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Ditto for start row too
|
125
|
-
def start_row(row_num)
|
126
|
-
default_sheet.start_row(row_num)
|
127
|
-
end
|
128
|
-
|
129
|
-
# More facading
|
130
|
-
def headerless!
|
131
|
-
default_sheet.headerless!
|
132
|
-
end
|
133
|
-
|
134
257
|
# First call to a freshly #build'd importer, this will read the file/stream/path supplied,
|
135
258
|
# validate the required values, run custom validations... basically pre-parse and
|
136
259
|
# massage the supplied data. It will return true on success, or false if one
|
@@ -139,87 +262,293 @@ class Importer
|
|
139
262
|
# You may supply various options for the import using the options hash. Supported
|
140
263
|
# options include:
|
141
264
|
#
|
142
|
-
# format: one of :auto, :csv, :xls, :xlsx, defaults to :auto, forces treating the supplied
|
143
|
-
# source as the specified format, or auto-
|
265
|
+
# format: one of :auto, :csv, :html, :xls, :xlsx, defaults to :auto, forces treating the supplied
|
266
|
+
# source as the specified format, or attempts to auto-detect if set to :auto
|
267
|
+
# scope: specify the search scope for the data/format, overriding any scope set with #scope
|
144
268
|
# encoding: source encoding override, defaults to guessing based on input
|
145
269
|
#
|
146
|
-
# Generally, you should be able to throw a
|
270
|
+
# Generally, you should be able to throw a path or stream at it and it should work. The
|
147
271
|
# options exist to allow overriding in cases where the automation heuristics
|
148
272
|
# have failed and the input type is known by the caller.
|
149
273
|
#
|
274
|
+
# If you're trying to import from a raw string, use Importer#import_string instead.
|
275
|
+
#
|
150
276
|
# After #import has completed successfully, you can process the resulting data
|
151
|
-
# using #process or extract the raw data by calling #
|
152
|
-
|
277
|
+
# using #process or extract the raw data by calling #to_a to get an array of row hashes
|
278
|
+
#
|
279
|
+
# Note that as of version 0.7.0, there is a more compact operation mode enabled by passing
|
280
|
+
# a block to this call:
|
281
|
+
#
|
282
|
+
# importer.import(...) do |row|
|
283
|
+
# # Process each row here
|
284
|
+
# end
|
285
|
+
#
|
286
|
+
# In this mode, the block is called with each row as in #process, conditionally on no
|
287
|
+
# errors. In addition, when a block is passed, true/false is not returned (as the
|
288
|
+
# block is already conditionally called). Instead, it will return the importer to allow
|
289
|
+
# chaining to #on_error or other calls.
|
290
|
+
def import(path_or_stream, options = {}, &block)
|
153
291
|
# Clear all our load-time state, including all rows, header locations... you name it
|
154
292
|
reset
|
155
293
|
|
156
294
|
# Get the reader for this format
|
157
295
|
default = @custom_reader ? :custom : :auto
|
158
|
-
format = options.delete(:format) { default }
|
159
|
-
if format == :custom
|
296
|
+
@format = options.delete(:format) { default }
|
297
|
+
if @format == :custom
|
160
298
|
# Custom format selected, use our internal custom reader
|
161
|
-
@
|
299
|
+
@reader = @custom_reader
|
162
300
|
|
163
|
-
elsif format && format != :auto
|
301
|
+
elsif @format && @format != :auto
|
164
302
|
# Explicit format requested
|
165
|
-
@
|
166
|
-
unless @data
|
167
|
-
add_error("Unable to find format handler for format #{format} - aborting")
|
168
|
-
return
|
169
|
-
end
|
303
|
+
@reader = DataReader::for_format(self, @format)
|
170
304
|
|
171
305
|
else
|
172
306
|
# Auto select
|
173
|
-
@
|
307
|
+
@reader = DataReader::for_source(self, path_or_stream)
|
308
|
+
@format = @reader.format
|
309
|
+
end
|
310
|
+
|
311
|
+
# Verify we got one
|
312
|
+
unless @reader
|
313
|
+
add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
|
314
|
+
return
|
315
|
+
end
|
316
|
+
|
317
|
+
# What scopes (if any) should we limit our searching to?
|
318
|
+
scopes = options.delete(:scope) { @scopes[@format] }
|
319
|
+
if scopes && !scopes.is_a?(Array)
|
320
|
+
scopes = [scopes]
|
174
321
|
end
|
175
322
|
|
176
323
|
# Read in the data!
|
177
|
-
@
|
324
|
+
@reader.load(path_or_stream, scopes) do |raw_rows|
|
325
|
+
# Find our column layout, start of data, etc
|
326
|
+
if find_header(raw_rows)
|
327
|
+
# Now, run all the data and add it as a Row instance
|
328
|
+
raw_rows.each_with_index do |raw, index|
|
329
|
+
row_num = index + 1
|
330
|
+
if row_num >= @data.start_row
|
331
|
+
add_row(row_num, raw)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
# We've found a workable sheet/table/whatever, stop looking
|
335
|
+
true
|
336
|
+
|
337
|
+
else
|
338
|
+
# This sheet/table/whatever didn't have the needed header, try
|
339
|
+
# the next one (if any)
|
340
|
+
false
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# If we have any missing headers, note that fact
|
345
|
+
if @missing_headers && @missing_headers.count > 0
|
346
|
+
add_error("Unable to locate required column header for column(s): " + @missing_headers.collect{|c| ":#{c}"}.list_join(', '))
|
347
|
+
end
|
348
|
+
|
349
|
+
# If we're here with no errors, we rule!
|
350
|
+
success = !has_errors?
|
351
|
+
|
352
|
+
if block
|
353
|
+
# New way, if block is passed, process it on success
|
354
|
+
process(&block) if success
|
355
|
+
self
|
356
|
+
else
|
357
|
+
# Old way, return result
|
358
|
+
success
|
359
|
+
end
|
178
360
|
end
|
179
361
|
|
180
|
-
#
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
362
|
+
# Use this form of import for the common case of having a raw CSV or HTML string.
|
363
|
+
def import_string(string, options = {}, &block)
|
364
|
+
# Get a format here if needed
|
365
|
+
if options[:format].nil?
|
366
|
+
if @custom_reader
|
367
|
+
format = :custom
|
368
|
+
else
|
369
|
+
format = string.include?('<table') && string.include?('</tr>') ? :html : :csv
|
370
|
+
end
|
371
|
+
options[:format] = format
|
372
|
+
end
|
373
|
+
|
374
|
+
# Do the import, converting the string to a stream
|
375
|
+
import(StringIO.new(string), options, &block)
|
376
|
+
end
|
377
|
+
|
378
|
+
# Call with a block accepting a single Importer::Row with contents that
|
379
|
+
# look like :column_key => <parsed value>. Any filtered rows
|
380
|
+
# will not be present. If you want to register an error, simply
|
381
|
+
# raise "some text" and it will be added to the importer's error
|
382
|
+
# list for display to the user, logging, or whatever.
|
383
|
+
def process
|
384
|
+
@data.rows.each do |row|
|
385
|
+
begin
|
386
|
+
yield row
|
387
|
+
rescue Exception => e
|
388
|
+
add_error(row, e.to_s)
|
389
|
+
end
|
390
|
+
end
|
185
391
|
end
|
186
392
|
|
187
|
-
def
|
188
|
-
if
|
189
|
-
|
190
|
-
|
393
|
+
def on_error(&block)
|
394
|
+
raise 'Invalid block passed to Importer#on_error: block may accept 0, 1 or 2 arguments' if block.arity > 2
|
395
|
+
|
396
|
+
if has_errors?
|
397
|
+
case block.arity
|
398
|
+
when 0 then DslProxy.exec(self, &block)
|
399
|
+
when 1 then DslProxy.exec(self, @errors, &block)
|
400
|
+
when 2 then DslProxy.exec(self, @errors, error_summary, &block)
|
401
|
+
end
|
191
402
|
end
|
192
|
-
|
403
|
+
|
404
|
+
self
|
193
405
|
end
|
194
406
|
|
407
|
+
# Process the raw values for the first rows in a sheet,
|
408
|
+
# and attempt to build a map of the column layout, and
|
409
|
+
# detect the first row of real data
|
410
|
+
def find_header(raw_rows)
|
411
|
+
if headerless?
|
412
|
+
# Use implicit or explicit column position when told to not look for a header
|
413
|
+
next_index = 0
|
414
|
+
@columns.each do |col|
|
415
|
+
unless col.position.nil?
|
416
|
+
next_index = col.fixed_index
|
417
|
+
end
|
418
|
+
col.data.index = next_index
|
419
|
+
next_index += 1
|
420
|
+
end
|
421
|
+
@data.start_row = @start_row || 1
|
422
|
+
@missing_headers = nil
|
423
|
+
return true
|
424
|
+
|
425
|
+
else
|
426
|
+
# Match by testing
|
427
|
+
missing = nil
|
428
|
+
raw_rows.each_with_index do |row, i|
|
429
|
+
# Um, have data?
|
430
|
+
next unless row
|
431
|
+
|
432
|
+
# Set up for this iteration
|
433
|
+
remaining = @columns.dup
|
434
|
+
|
435
|
+
# Step through this row's raw values, and look for a matching column for all columns
|
436
|
+
row.each_with_index do |val, i|
|
437
|
+
col = remaining.detect {|c| c.match_header?(val.to_s, i) }
|
438
|
+
if col
|
439
|
+
remaining -= [col]
|
440
|
+
col.data.index = i
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
if remaining.empty?
|
445
|
+
# Found all columns, have a map, update our start row to be the next line and return!
|
446
|
+
@data.start_row = @start_row || i+2
|
447
|
+
@missing_headers = nil
|
448
|
+
return true
|
449
|
+
else
|
450
|
+
missing = remaining if (missing.nil? || missing.count > remaining.count)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
# If we get here, we're hosed
|
455
|
+
@missing_headers = missing.collect(&:key) if @missing_headers.nil? || @missing_headers.count > missing.count
|
456
|
+
false
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
# Add a new row to our stash, parsing/filtering/validating as we go!
|
461
|
+
def add_row(line, raw_data)
|
462
|
+
# Gracefully handle custom parsers that return nil for a row's data
|
463
|
+
raw_data ||= []
|
464
|
+
# Add the row
|
465
|
+
row = Row.new(self, line)
|
466
|
+
|
467
|
+
# Parse out the values
|
468
|
+
values = {}
|
469
|
+
@columns.each do |col|
|
470
|
+
index = col.data.index
|
471
|
+
raw_val = raw_data[index]
|
472
|
+
if col.parse
|
473
|
+
# Use custom parser if this row has one
|
474
|
+
val = col.parse_value(row, raw_val)
|
475
|
+
else
|
476
|
+
# Otherwise use our standard parser
|
477
|
+
val = @reader.parse_value(raw_val, col.type)
|
478
|
+
end
|
479
|
+
values[col.key] = val
|
480
|
+
end
|
481
|
+
|
482
|
+
# Set the values and filter if needed
|
483
|
+
row.set_values(values)
|
484
|
+
return nil if @filter && !@filter.call(row)
|
485
|
+
|
486
|
+
# Row is desired, now validate values
|
487
|
+
@columns.each do |col|
|
488
|
+
val = values[col.key]
|
489
|
+
col.validate_value(row, val)
|
490
|
+
end
|
491
|
+
|
492
|
+
# We is good
|
493
|
+
@data.rows << row
|
494
|
+
row
|
495
|
+
end
|
496
|
+
|
497
|
+
# When true, one or more errors have been recorded during this import/process
|
498
|
+
# cycle.
|
195
499
|
def has_errors?
|
196
500
|
@errors.any?
|
197
501
|
end
|
198
502
|
|
199
|
-
|
503
|
+
# Add an error to our error list. Will result in a failed import.
|
504
|
+
def add_error(context, msg = nil)
|
200
505
|
if context.is_a?(String) && msg.nil?
|
201
506
|
msg = context
|
202
507
|
context = nil
|
203
508
|
end
|
204
|
-
@
|
205
|
-
end
|
206
|
-
|
207
|
-
def has_warnings?
|
208
|
-
@warnings.any?
|
509
|
+
@errors << Error.new(context, msg)
|
209
510
|
end
|
210
511
|
|
211
|
-
# Returns a human-readable summary of the errors present on the importer
|
512
|
+
# Returns a human-readable summary of the errors present on the importer, or
|
513
|
+
# nil if no errors are present
|
212
514
|
def error_summary
|
515
|
+
# Simple case
|
213
516
|
return nil unless has_errors?
|
214
|
-
|
517
|
+
|
518
|
+
# Group by error text - we often get the same error dozens of times
|
519
|
+
list = {}
|
520
|
+
@errors.each do |err|
|
521
|
+
errs = list[err.text] || []
|
522
|
+
errs << err
|
523
|
+
list[err.text] = errs
|
524
|
+
end
|
525
|
+
|
526
|
+
# Build summary & return
|
527
|
+
list.values.collect do |errs|
|
528
|
+
summary = errs.first.summary
|
529
|
+
if errs.count == 1
|
530
|
+
summary
|
531
|
+
else
|
532
|
+
errs.count.to_s + ' x ' + summary
|
533
|
+
end
|
534
|
+
end.list_join(', ')
|
535
|
+
end
|
536
|
+
|
537
|
+
# After calling #import, you can dump the final values for each row
|
538
|
+
# as an array of hashes. Useful in debugging! For general processing,
|
539
|
+
# use #process or the block form of #import instead.
|
540
|
+
def to_a
|
541
|
+
@data.rows.collect(&:values)
|
215
542
|
end
|
216
543
|
|
217
544
|
protected
|
218
545
|
|
219
546
|
def reset
|
220
547
|
@errors = []
|
221
|
-
@
|
222
|
-
@
|
548
|
+
@missing_headers = nil
|
549
|
+
@format = nil
|
550
|
+
@reader = nil
|
551
|
+
@data = Data.new
|
223
552
|
end
|
224
553
|
|
225
554
|
end
|
data/lib/iron/import/row.rb
CHANGED
@@ -2,12 +2,12 @@ class Importer
|
|
2
2
|
|
3
3
|
class Row
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :line, :values
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
@
|
7
|
+
def initialize(importer, line, value_hash = nil)
|
8
|
+
@importer = importer
|
9
9
|
@line = line
|
10
|
-
|
10
|
+
set_values(value_hash)
|
11
11
|
end
|
12
12
|
|
13
13
|
def set_values(value_hash)
|
@@ -15,8 +15,9 @@ class Importer
|
|
15
15
|
end
|
16
16
|
|
17
17
|
# True when all columns have a non-nil value, useful in filtering out junk
|
18
|
-
# rows
|
18
|
+
# rows. Pass in one or more keys to check only those keys for presence.
|
19
19
|
def all?(*keys)
|
20
|
+
keys.flatten!
|
20
21
|
if keys.any?
|
21
22
|
# Check only the specified keys
|
22
23
|
valid = true
|
@@ -33,25 +34,28 @@ class Importer
|
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
37
|
+
# True when all row columns have nil values.
|
36
38
|
def empty?
|
37
39
|
@values.values.all?(&:nil?)
|
38
40
|
end
|
39
41
|
|
40
|
-
# Returns the value of a column
|
42
|
+
# Returns the value of a column.
|
41
43
|
def [](column_key)
|
42
44
|
@values[column_key]
|
43
45
|
end
|
44
|
-
|
46
|
+
|
47
|
+
# The row's name, e.g. 'Row 4'
|
45
48
|
def to_s
|
46
49
|
"Row #{@line}"
|
47
50
|
end
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
# This row's values as a hash of :column_key => <parsed + validated value>
|
53
|
+
def to_hash
|
54
|
+
@values.dup
|
51
55
|
end
|
52
56
|
|
53
|
-
def
|
54
|
-
@
|
57
|
+
def add_error(msg)
|
58
|
+
@importer.add_error(self, msg)
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|