iron-import 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +16 -1
- data/README.rdoc +43 -16
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +27 -14
- data/lib/iron/import/csv_reader.rb +4 -4
- data/lib/iron/import/custom_reader.rb +14 -8
- data/lib/iron/import/data_reader.rb +42 -30
- data/lib/iron/import/error.rb +4 -16
- data/lib/iron/import/excel_reader.rb +69 -0
- data/lib/iron/import/html_reader.rb +78 -0
- data/lib/iron/import/importer.rb +432 -103
- data/lib/iron/import/row.rb +15 -11
- data/lib/iron/import/xls_reader.rb +3 -37
- data/lib/iron/import/xlsx_reader.rb +2 -37
- data/lib/iron/import.rb +2 -1
- data/spec/importer/column_spec.rb +4 -5
- data/spec/importer/csv_reader_spec.rb +1 -1
- data/spec/importer/custom_reader_spec.rb +6 -10
- data/spec/importer/data_reader_spec.rb +6 -5
- data/spec/importer/html_reader_spec.rb +105 -0
- data/spec/importer/importer_spec.rb +107 -0
- data/spec/importer/row_spec.rb +9 -2
- data/spec/importer/xls_reader_spec.rb +77 -0
- data/spec/importer/xlsx_reader_spec.rb +2 -3
- data/spec/samples/3-sheets.xls +0 -0
- data/spec/samples/col-span.html +29 -0
- data/spec/samples/html-th-td.html +11 -0
- data/spec/samples/multi-table.html +29 -0
- data/spec/samples/nanodrop.xlsx +0 -0
- data/spec/samples/scores.html +30 -0
- data/spec/samples/simple.html +14 -0
- data/spec/spec_helper.rb +1 -0
- metadata +30 -8
- data/lib/iron/import/sheet.rb +0 -263
- data/spec/importer/sheet_spec.rb +0 -65
data/lib/iron/import/importer.rb
CHANGED
@@ -18,119 +18,242 @@
|
|
18
18
|
# end
|
19
19
|
# end
|
20
20
|
#
|
21
|
-
# The row.all? call will verify that each row passed contains a value for all defined columns.
|
22
|
-
#
|
23
21
|
# A more realistic and complex example follows:
|
24
22
|
#
|
25
|
-
#
|
23
|
+
# Importer.build do
|
24
|
+
# # Define our columns and settings
|
26
25
|
# column :order_number do
|
27
|
-
#
|
26
|
+
# header /order (num.*|id)/i
|
27
|
+
# type :int
|
28
28
|
# end
|
29
|
-
# column :date
|
30
|
-
#
|
29
|
+
# column :date do
|
30
|
+
# type :date
|
31
|
+
# end
|
32
|
+
# column :amount do
|
33
|
+
# type :cents
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# # Filter out any rows missing an order number
|
37
|
+
# filter do |row|
|
38
|
+
# !row[:order_number].nil?
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# end.import('/path/to/file.csv', format: :csv) do |row|
|
42
|
+
# # Process each row as basically a hash of :column_key => value,
|
43
|
+
# # only called on import success
|
44
|
+
# Order.create(row.to_hash)
|
45
|
+
#
|
46
|
+
# end.on_error do
|
47
|
+
# # If we have any errors, do something
|
48
|
+
# raise error_summary
|
31
49
|
# end
|
32
50
|
#
|
33
51
|
class Importer
|
34
52
|
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
53
|
+
# Inner class for holding load-time data that gets reset on each load call
|
54
|
+
class Data
|
55
|
+
attr_accessor :start_row, :rows
|
56
|
+
def initialize
|
57
|
+
@start_row = nil
|
58
|
+
@rows = []
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Array of defined columns
|
63
|
+
attr_reader :columns
|
64
|
+
# Array of error messages collected during an import/process run
|
65
|
+
attr_accessor :errors
|
66
|
+
# Custom reader, if one has been defined using #on_file or #on_stream
|
67
|
+
attr_reader :custom_reader
|
68
|
+
# Set to the format selected during past import
|
69
|
+
attr_reader :format
|
70
|
+
# Import data
|
71
|
+
attr_reader :data
|
72
|
+
# Missing headers post-import
|
73
|
+
attr_reader :missing_headers
|
74
|
+
|
75
|
+
# When true, skips header detection
|
76
|
+
dsl_flag :headerless
|
77
|
+
# Explicitly sets the row number (1-indexed) where data rows begin,
|
78
|
+
# usually left defaulted to nil to automatically start after the header
|
79
|
+
# row.
|
80
|
+
dsl_accessor :start_row
|
81
|
+
# Set to a block/lambda taking a parsed but unvalidated row as a hash,
|
82
|
+
# return true to keep, false to skip.
|
83
|
+
dsl_accessor :filter
|
39
84
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
40
85
|
dsl_accessor :encoding
|
41
86
|
|
87
|
+
# Create a new importer! See #build for details on what to do
|
88
|
+
# in the block.
|
42
89
|
def self.build(options = {}, &block)
|
43
90
|
importer = Importer.new(options)
|
44
91
|
importer.build(&block)
|
45
92
|
importer
|
46
93
|
end
|
47
94
|
|
95
|
+
# Ye standard constructor!
|
48
96
|
def initialize(options = {})
|
97
|
+
@scopes = {}
|
49
98
|
@encoding = 'UTF-8'
|
50
|
-
@
|
99
|
+
@headerless = false
|
100
|
+
|
101
|
+
@filter = nil
|
102
|
+
@columns = []
|
51
103
|
|
52
104
|
reset
|
53
105
|
end
|
54
106
|
|
55
|
-
#
|
56
|
-
#
|
107
|
+
# Call to define the importer's column configuration and other setup options.
|
108
|
+
#
|
109
|
+
# The following builder options are available:
|
110
|
+
#
|
111
|
+
# importer = Importer.build do
|
112
|
+
# # Don't try to look for a header using column definitions, there is no header
|
113
|
+
# headerless!
|
114
|
+
#
|
115
|
+
# # Manually set the start row for data, defaults to nil
|
116
|
+
# # indicating that the data rows start immediatly following the header.
|
117
|
+
# start_row 4
|
118
|
+
#
|
119
|
+
# # Define a filter that will skip unneeded rows. The filter command takes
|
120
|
+
# # a block that receives the parsed (but not validated!) row data as an
|
121
|
+
# # associative hash of :col_key => <parsed value>, and returns
|
122
|
+
# # true to keep the row or false to exclude it.
|
123
|
+
# filter do |row|
|
124
|
+
# row[:id].to_i > 5000
|
125
|
+
# end
|
126
|
+
#
|
127
|
+
# # If you need to process a type of input that isn't built in, define
|
128
|
+
# # a custom reader with #on_file or #on_stream
|
129
|
+
# on_file do |path|
|
130
|
+
# ... read file at path, return array of each row's raw column values ...
|
131
|
+
# end
|
132
|
+
#
|
133
|
+
# # Got a multi-block format like Excel or HTML? You can optionally limit
|
134
|
+
# # searching by setting a scope or scopes to search:
|
135
|
+
# scope :xls, 'Sheet 2'
|
136
|
+
# # Or set a bunch of scopes in one go:
|
137
|
+
# scopes :html => ['div > table.data', 'table.aux-data'],
|
138
|
+
# :xls => [2, 'Orders']
|
139
|
+
#
|
140
|
+
# # Of course, the main thing you're going to do is to define columns. See the
|
141
|
+
# # Column class' notes for options when defining a column. Note that
|
142
|
+
# # you can define columns using either hash-style:
|
143
|
+
# column :id, :type => :integer
|
144
|
+
# # or builder-style:
|
145
|
+
# column :name do
|
146
|
+
# header /company\s*name/i
|
147
|
+
# type :string
|
148
|
+
# end
|
149
|
+
# end
|
57
150
|
def build(&block)
|
58
151
|
DslProxy.exec(self, &block) if block
|
59
152
|
self
|
60
153
|
end
|
61
154
|
|
62
|
-
#
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# Access a Sheet definition by id (either number (1-N) or sheet name).
|
68
|
-
# Used during #build calls to define a sheet with a passed block, like so:
|
155
|
+
# Add a new column definition to our list, allows customizing the new
|
156
|
+
# column with a builder block. See Importer::Column docs for
|
157
|
+
# options. In lieu of a builder mode, you can pass the same values
|
158
|
+
# as key => value pairs in the options hash to this method, so:
|
69
159
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
# end
|
75
|
-
# sheet('Orders') do
|
76
|
-
# column :id
|
77
|
-
# column :price
|
78
|
-
# filter do |row|
|
79
|
-
# row[:price].prensent?
|
80
|
-
# end
|
160
|
+
# column(:foo) do
|
161
|
+
# type :string
|
162
|
+
# parse do |val|
|
163
|
+
# val.to_s.upcase
|
81
164
|
# end
|
82
165
|
# end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
166
|
+
#
|
167
|
+
# Is equivalent to:
|
168
|
+
#
|
169
|
+
# column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
|
170
|
+
#
|
171
|
+
# Use whichever you prefer!
|
172
|
+
def column(key, options_hash = {}, &block)
|
173
|
+
# Find existing column with key to allow re-opening an existing definition
|
174
|
+
col = @columns.detect {|c| c.key == key }
|
175
|
+
unless col
|
176
|
+
# if none found, add a new one
|
177
|
+
col = Column.new(self, key, options_hash)
|
178
|
+
@columns << col
|
91
179
|
end
|
92
|
-
sheet = @sheets[id]
|
93
180
|
|
94
|
-
#
|
95
|
-
|
181
|
+
# Customize if needed
|
182
|
+
DslProxy::exec(col, &block) if block
|
96
183
|
|
97
|
-
|
98
|
-
|
184
|
+
col
|
185
|
+
end
|
186
|
+
|
187
|
+
# Limit the search scope for a single format (:xls, :xlsx, :html, :custom)
|
188
|
+
# to the given value or values - the meaning and format of scopes is determined
|
189
|
+
# by that format's data reader.
|
190
|
+
def scope(format, *scopes)
|
191
|
+
@scopes[format] = scopes.flatten
|
192
|
+
end
|
193
|
+
|
194
|
+
# Limit the search scope for more than one format at a time. For example, if
|
195
|
+
# you support both XLS and XLSX formats (and why wouldn't you?) then you
|
196
|
+
# could tell the importer to look only at the sheets named "Orders" and
|
197
|
+
# "Legacy Orders" like so:
|
198
|
+
#
|
199
|
+
# scopes :xls => ['Orders', 'Legacy Orders'],
|
200
|
+
# :xlsx => ['Orders', 'Legacy Orders']
|
201
|
+
#
|
202
|
+
def scopes(map = :__read__)
|
203
|
+
if map == :__read__
|
204
|
+
return @scopes
|
205
|
+
else
|
206
|
+
map.each_pair do |format, scope|
|
207
|
+
scope(format, scope)
|
208
|
+
end
|
209
|
+
end
|
99
210
|
end
|
100
211
|
|
101
|
-
# Define a custom file reader to implement your own
|
212
|
+
# Define a custom file reader to implement your own parsing. Pass
|
213
|
+
# a block accepting a file path, and returning an array of arrays (rows of
|
214
|
+
# raw column values). Use #add_error(msg) to add a reading error.
|
215
|
+
#
|
216
|
+
# Adding a custom stream parser will change the importer's default
|
217
|
+
# format to :custom, though you can override it when calling #import as
|
218
|
+
# usual.
|
219
|
+
#
|
220
|
+
# Only one of #on_file or #on_stream needs to be implemented - the importer
|
221
|
+
# will cross convert as needed!
|
222
|
+
#
|
223
|
+
# Example:
|
224
|
+
#
|
225
|
+
# on_file do |path|
|
226
|
+
# # Read a file line by line
|
227
|
+
# File.readlines(path).collect do |line|
|
228
|
+
# # Each line has colon-separated values, so split 'em up
|
229
|
+
# line.split(/\s*:\s*/)
|
230
|
+
# end
|
231
|
+
# end
|
232
|
+
#
|
102
233
|
def on_file(&block)
|
103
234
|
@custom_reader = CustomReader.new(self) unless @custom_reader
|
104
235
|
@custom_reader.set_reader(:file, block)
|
105
236
|
end
|
106
237
|
|
238
|
+
# Just like #on_file, but for streams. Pass
|
239
|
+
# a block accepting a stream, and returning an array of arrays (rows of
|
240
|
+
# raw column values). Use #add_error(msg) to add a reading error.
|
241
|
+
#
|
242
|
+
# Example:
|
243
|
+
#
|
244
|
+
# on_stream do |stream|
|
245
|
+
# # Stream contains rows separated by a | char
|
246
|
+
# stream.readlines('|').collect do |line|
|
247
|
+
# # Each line has 3 fields of 10 characters each
|
248
|
+
# [line[0...10], line[10...20], line[20...30]]
|
249
|
+
# end
|
250
|
+
# end
|
251
|
+
#
|
107
252
|
def on_stream(&block)
|
108
253
|
@custom_reader = CustomReader.new(self) unless @custom_reader
|
109
254
|
@custom_reader.set_reader(:stream, block)
|
110
255
|
end
|
111
256
|
|
112
|
-
# Very, very commonly we only want to deal with the default sheet. In this case,
|
113
|
-
# let folks skip the sheet(n) do ... end block wrapper and just define columns
|
114
|
-
# against the main importer. Internally, proxy those calls to the first sheet.
|
115
|
-
def column(*args, &block)
|
116
|
-
default_sheet.column(*args, &block)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Ditto for filters
|
120
|
-
def filter(*args, &block)
|
121
|
-
default_sheet.filter(*args, &block)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Ditto for start row too
|
125
|
-
def start_row(row_num)
|
126
|
-
default_sheet.start_row(row_num)
|
127
|
-
end
|
128
|
-
|
129
|
-
# More facading
|
130
|
-
def headerless!
|
131
|
-
default_sheet.headerless!
|
132
|
-
end
|
133
|
-
|
134
257
|
# First call to a freshly #build'd importer, this will read the file/stream/path supplied,
|
135
258
|
# validate the required values, run custom validations... basically pre-parse and
|
136
259
|
# massage the supplied data. It will return true on success, or false if one
|
@@ -139,87 +262,293 @@ class Importer
|
|
139
262
|
# You may supply various options for the import using the options hash. Supported
|
140
263
|
# options include:
|
141
264
|
#
|
142
|
-
# format: one of :auto, :csv, :xls, :xlsx, defaults to :auto, forces treating the supplied
|
143
|
-
# source as the specified format, or auto-
|
265
|
+
# format: one of :auto, :csv, :html, :xls, :xlsx, defaults to :auto, forces treating the supplied
|
266
|
+
# source as the specified format, or attempts to auto-detect if set to :auto
|
267
|
+
# scope: specify the search scope for the data/format, overriding any scope set with #scope
|
144
268
|
# encoding: source encoding override, defaults to guessing based on input
|
145
269
|
#
|
146
|
-
# Generally, you should be able to throw a
|
270
|
+
# Generally, you should be able to throw a path or stream at it and it should work. The
|
147
271
|
# options exist to allow overriding in cases where the automation heuristics
|
148
272
|
# have failed and the input type is known by the caller.
|
149
273
|
#
|
274
|
+
# If you're trying to import from a raw string, use Importer#import_string instead.
|
275
|
+
#
|
150
276
|
# After #import has completed successfully, you can process the resulting data
|
151
|
-
# using #process or extract the raw data by calling #
|
152
|
-
|
277
|
+
# using #process or extract the raw data by calling #to_a to get an array of row hashes
|
278
|
+
#
|
279
|
+
# Note that as of version 0.7.0, there is a more compact operation mode enabled by passing
|
280
|
+
# a block to this call:
|
281
|
+
#
|
282
|
+
# importer.import(...) do |row|
|
283
|
+
# # Process each row here
|
284
|
+
# end
|
285
|
+
#
|
286
|
+
# In this mode, the block is called with each row as in #process, conditionally on no
|
287
|
+
# errors. In addition, when a block is passed, true/false is not returned (as the
|
288
|
+
# block is already conditionally called). Instead, it will return the importer to allow
|
289
|
+
# chaining to #on_error or other calls.
|
290
|
+
def import(path_or_stream, options = {}, &block)
|
153
291
|
# Clear all our load-time state, including all rows, header locations... you name it
|
154
292
|
reset
|
155
293
|
|
156
294
|
# Get the reader for this format
|
157
295
|
default = @custom_reader ? :custom : :auto
|
158
|
-
format = options.delete(:format) { default }
|
159
|
-
if format == :custom
|
296
|
+
@format = options.delete(:format) { default }
|
297
|
+
if @format == :custom
|
160
298
|
# Custom format selected, use our internal custom reader
|
161
|
-
@
|
299
|
+
@reader = @custom_reader
|
162
300
|
|
163
|
-
elsif format && format != :auto
|
301
|
+
elsif @format && @format != :auto
|
164
302
|
# Explicit format requested
|
165
|
-
@
|
166
|
-
unless @data
|
167
|
-
add_error("Unable to find format handler for format #{format} - aborting")
|
168
|
-
return
|
169
|
-
end
|
303
|
+
@reader = DataReader::for_format(self, @format)
|
170
304
|
|
171
305
|
else
|
172
306
|
# Auto select
|
173
|
-
@
|
307
|
+
@reader = DataReader::for_source(self, path_or_stream)
|
308
|
+
@format = @reader.format
|
309
|
+
end
|
310
|
+
|
311
|
+
# Verify we got one
|
312
|
+
unless @reader
|
313
|
+
add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
|
314
|
+
return
|
315
|
+
end
|
316
|
+
|
317
|
+
# What scopes (if any) should we limit our searching to?
|
318
|
+
scopes = options.delete(:scope) { @scopes[@format] }
|
319
|
+
if scopes && !scopes.is_a?(Array)
|
320
|
+
scopes = [scopes]
|
174
321
|
end
|
175
322
|
|
176
323
|
# Read in the data!
|
177
|
-
@
|
324
|
+
@reader.load(path_or_stream, scopes) do |raw_rows|
|
325
|
+
# Find our column layout, start of data, etc
|
326
|
+
if find_header(raw_rows)
|
327
|
+
# Now, run all the data and add it as a Row instance
|
328
|
+
raw_rows.each_with_index do |raw, index|
|
329
|
+
row_num = index + 1
|
330
|
+
if row_num >= @data.start_row
|
331
|
+
add_row(row_num, raw)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
# We've found a workable sheet/table/whatever, stop looking
|
335
|
+
true
|
336
|
+
|
337
|
+
else
|
338
|
+
# This sheet/table/whatever didn't have the needed header, try
|
339
|
+
# the next one (if any)
|
340
|
+
false
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# If we have any missing headers, note that fact
|
345
|
+
if @missing_headers && @missing_headers.count > 0
|
346
|
+
add_error("Unable to locate required column header for column(s): " + @missing_headers.collect{|c| ":#{c}"}.list_join(', '))
|
347
|
+
end
|
348
|
+
|
349
|
+
# If we're here with no errors, we rule!
|
350
|
+
success = !has_errors?
|
351
|
+
|
352
|
+
if block
|
353
|
+
# New way, if block is passed, process it on success
|
354
|
+
process(&block) if success
|
355
|
+
self
|
356
|
+
else
|
357
|
+
# Old way, return result
|
358
|
+
success
|
359
|
+
end
|
178
360
|
end
|
179
361
|
|
180
|
-
#
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
362
|
+
# Use this form of import for the common case of having a raw CSV or HTML string.
|
363
|
+
def import_string(string, options = {}, &block)
|
364
|
+
# Get a format here if needed
|
365
|
+
if options[:format].nil?
|
366
|
+
if @custom_reader
|
367
|
+
format = :custom
|
368
|
+
else
|
369
|
+
format = string.include?('<table') && string.include?('</tr>') ? :html : :csv
|
370
|
+
end
|
371
|
+
options[:format] = format
|
372
|
+
end
|
373
|
+
|
374
|
+
# Do the import, converting the string to a stream
|
375
|
+
import(StringIO.new(string), options, &block)
|
376
|
+
end
|
377
|
+
|
378
|
+
# Call with a block accepting a single Importer::Row with contents that
|
379
|
+
# look like :column_key => <parsed value>. Any filtered rows
|
380
|
+
# will not be present. If you want to register an error, simply
|
381
|
+
# raise "some text" and it will be added to the importer's error
|
382
|
+
# list for display to the user, logging, or whatever.
|
383
|
+
def process
|
384
|
+
@data.rows.each do |row|
|
385
|
+
begin
|
386
|
+
yield row
|
387
|
+
rescue Exception => e
|
388
|
+
add_error(row, e.to_s)
|
389
|
+
end
|
390
|
+
end
|
185
391
|
end
|
186
392
|
|
187
|
-
def
|
188
|
-
if
|
189
|
-
|
190
|
-
|
393
|
+
def on_error(&block)
|
394
|
+
raise 'Invalid block passed to Importer#on_error: block may accept 0, 1 or 2 arguments' if block.arity > 2
|
395
|
+
|
396
|
+
if has_errors?
|
397
|
+
case block.arity
|
398
|
+
when 0 then DslProxy.exec(self, &block)
|
399
|
+
when 1 then DslProxy.exec(self, @errors, &block)
|
400
|
+
when 2 then DslProxy.exec(self, @errors, error_summary, &block)
|
401
|
+
end
|
191
402
|
end
|
192
|
-
|
403
|
+
|
404
|
+
self
|
193
405
|
end
|
194
406
|
|
407
|
+
# Process the raw values for the first rows in a sheet,
|
408
|
+
# and attempt to build a map of the column layout, and
|
409
|
+
# detect the first row of real data
|
410
|
+
def find_header(raw_rows)
|
411
|
+
if headerless?
|
412
|
+
# Use implicit or explicit column position when told to not look for a header
|
413
|
+
next_index = 0
|
414
|
+
@columns.each do |col|
|
415
|
+
unless col.position.nil?
|
416
|
+
next_index = col.fixed_index
|
417
|
+
end
|
418
|
+
col.data.index = next_index
|
419
|
+
next_index += 1
|
420
|
+
end
|
421
|
+
@data.start_row = @start_row || 1
|
422
|
+
@missing_headers = nil
|
423
|
+
return true
|
424
|
+
|
425
|
+
else
|
426
|
+
# Match by testing
|
427
|
+
missing = nil
|
428
|
+
raw_rows.each_with_index do |row, i|
|
429
|
+
# Um, have data?
|
430
|
+
next unless row
|
431
|
+
|
432
|
+
# Set up for this iteration
|
433
|
+
remaining = @columns.dup
|
434
|
+
|
435
|
+
# Step through this row's raw values, and look for a matching column for all columns
|
436
|
+
row.each_with_index do |val, i|
|
437
|
+
col = remaining.detect {|c| c.match_header?(val.to_s, i) }
|
438
|
+
if col
|
439
|
+
remaining -= [col]
|
440
|
+
col.data.index = i
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
if remaining.empty?
|
445
|
+
# Found all columns, have a map, update our start row to be the next line and return!
|
446
|
+
@data.start_row = @start_row || i+2
|
447
|
+
@missing_headers = nil
|
448
|
+
return true
|
449
|
+
else
|
450
|
+
missing = remaining if (missing.nil? || missing.count > remaining.count)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
# If we get here, we're hosed
|
455
|
+
@missing_headers = missing.collect(&:key) if @missing_headers.nil? || @missing_headers.count > missing.count
|
456
|
+
false
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
# Add a new row to our stash, parsing/filtering/validating as we go!
|
461
|
+
def add_row(line, raw_data)
|
462
|
+
# Gracefully handle custom parsers that return nil for a row's data
|
463
|
+
raw_data ||= []
|
464
|
+
# Add the row
|
465
|
+
row = Row.new(self, line)
|
466
|
+
|
467
|
+
# Parse out the values
|
468
|
+
values = {}
|
469
|
+
@columns.each do |col|
|
470
|
+
index = col.data.index
|
471
|
+
raw_val = raw_data[index]
|
472
|
+
if col.parse
|
473
|
+
# Use custom parser if this row has one
|
474
|
+
val = col.parse_value(row, raw_val)
|
475
|
+
else
|
476
|
+
# Otherwise use our standard parser
|
477
|
+
val = @reader.parse_value(raw_val, col.type)
|
478
|
+
end
|
479
|
+
values[col.key] = val
|
480
|
+
end
|
481
|
+
|
482
|
+
# Set the values and filter if needed
|
483
|
+
row.set_values(values)
|
484
|
+
return nil if @filter && !@filter.call(row)
|
485
|
+
|
486
|
+
# Row is desired, now validate values
|
487
|
+
@columns.each do |col|
|
488
|
+
val = values[col.key]
|
489
|
+
col.validate_value(row, val)
|
490
|
+
end
|
491
|
+
|
492
|
+
# We is good
|
493
|
+
@data.rows << row
|
494
|
+
row
|
495
|
+
end
|
496
|
+
|
497
|
+
# When true, one or more errors have been recorded during this import/process
|
498
|
+
# cycle.
|
195
499
|
def has_errors?
|
196
500
|
@errors.any?
|
197
501
|
end
|
198
502
|
|
199
|
-
|
503
|
+
# Add an error to our error list. Will result in a failed import.
|
504
|
+
def add_error(context, msg = nil)
|
200
505
|
if context.is_a?(String) && msg.nil?
|
201
506
|
msg = context
|
202
507
|
context = nil
|
203
508
|
end
|
204
|
-
@
|
205
|
-
end
|
206
|
-
|
207
|
-
def has_warnings?
|
208
|
-
@warnings.any?
|
509
|
+
@errors << Error.new(context, msg)
|
209
510
|
end
|
210
511
|
|
211
|
-
# Returns a human-readable summary of the errors present on the importer
|
512
|
+
# Returns a human-readable summary of the errors present on the importer, or
|
513
|
+
# nil if no errors are present
|
212
514
|
def error_summary
|
515
|
+
# Simple case
|
213
516
|
return nil unless has_errors?
|
214
|
-
|
517
|
+
|
518
|
+
# Group by error text - we often get the same error dozens of times
|
519
|
+
list = {}
|
520
|
+
@errors.each do |err|
|
521
|
+
errs = list[err.text] || []
|
522
|
+
errs << err
|
523
|
+
list[err.text] = errs
|
524
|
+
end
|
525
|
+
|
526
|
+
# Build summary & return
|
527
|
+
list.values.collect do |errs|
|
528
|
+
summary = errs.first.summary
|
529
|
+
if errs.count == 1
|
530
|
+
summary
|
531
|
+
else
|
532
|
+
errs.count.to_s + ' x ' + summary
|
533
|
+
end
|
534
|
+
end.list_join(', ')
|
535
|
+
end
|
536
|
+
|
537
|
+
# After calling #import, you can dump the final values for each row
|
538
|
+
# as an array of hashes. Useful in debugging! For general processing,
|
539
|
+
# use #process or the block form of #import instead.
|
540
|
+
def to_a
|
541
|
+
@data.rows.collect(&:values)
|
215
542
|
end
|
216
543
|
|
217
544
|
protected
|
218
545
|
|
219
546
|
def reset
|
220
547
|
@errors = []
|
221
|
-
@
|
222
|
-
@
|
548
|
+
@missing_headers = nil
|
549
|
+
@format = nil
|
550
|
+
@reader = nil
|
551
|
+
@data = Data.new
|
223
552
|
end
|
224
553
|
|
225
554
|
end
|
data/lib/iron/import/row.rb
CHANGED
@@ -2,12 +2,12 @@ class Importer
|
|
2
2
|
|
3
3
|
class Row
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :line, :values
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
@
|
7
|
+
def initialize(importer, line, value_hash = nil)
|
8
|
+
@importer = importer
|
9
9
|
@line = line
|
10
|
-
|
10
|
+
set_values(value_hash)
|
11
11
|
end
|
12
12
|
|
13
13
|
def set_values(value_hash)
|
@@ -15,8 +15,9 @@ class Importer
|
|
15
15
|
end
|
16
16
|
|
17
17
|
# True when all columns have a non-nil value, useful in filtering out junk
|
18
|
-
# rows
|
18
|
+
# rows. Pass in one or more keys to check only those keys for presence.
|
19
19
|
def all?(*keys)
|
20
|
+
keys.flatten!
|
20
21
|
if keys.any?
|
21
22
|
# Check only the specified keys
|
22
23
|
valid = true
|
@@ -33,25 +34,28 @@ class Importer
|
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
37
|
+
# True when all row columns have nil values.
|
36
38
|
def empty?
|
37
39
|
@values.values.all?(&:nil?)
|
38
40
|
end
|
39
41
|
|
40
|
-
# Returns the value of a column
|
42
|
+
# Returns the value of a column.
|
41
43
|
def [](column_key)
|
42
44
|
@values[column_key]
|
43
45
|
end
|
44
|
-
|
46
|
+
|
47
|
+
# The row's name, e.g. 'Row 4'
|
45
48
|
def to_s
|
46
49
|
"Row #{@line}"
|
47
50
|
end
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
# This row's values as a hash of :column_key => <parsed + validated value>
|
53
|
+
def to_hash
|
54
|
+
@values.dup
|
51
55
|
end
|
52
56
|
|
53
|
-
def
|
54
|
-
@
|
57
|
+
def add_error(msg)
|
58
|
+
@importer.add_error(self, msg)
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|