iron-import 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +6 -0
- data/README.rdoc +2 -2
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +35 -14
- data/lib/iron/import/csv_reader.rb +26 -11
- data/lib/iron/import/custom_reader.rb +39 -0
- data/lib/iron/import/data_reader.rb +98 -12
- data/lib/iron/import/importer.rb +58 -21
- data/lib/iron/import/sheet.rb +74 -9
- data/lib/iron/import/xls_reader.rb +25 -39
- data/lib/iron/import/xlsx_reader.rb +25 -38
- data/lib/iron/import.rb +1 -0
- data/lib/iron-import.rb +1 -0
- data/spec/importer/custom_reader_spec.rb +46 -0
- data/spec/importer/data_reader_spec.rb +1 -1
- data/spec/samples/icd10-custom.txt +4 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04d666ea1e0170b0186d75fc8b0ec367a16e528a
|
4
|
+
data.tar.gz: 9dad576e17b7d8fc4b523ffe6b4e53c85feae9c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5a31e81381d78c29da480b296a8e9569ed415a32f50610881014de52a7b92c925687d3ee4ba683bf64a50562b26bf5785acf09070017710938d39c52f0087ad
|
7
|
+
data.tar.gz: d29901644886a98c617dd215b52edec0e8c011875ae2c0d6724c8f0f03c26bfdb9ea8028a3322c9cf1d16f91d9010950203c73ff0fc0485ce0bfc09ffc53e6f2
|
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -25,7 +25,7 @@ any warnings and errors encountered... well, this is the library for you!
|
|
25
25
|
|
26
26
|
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
27
27
|
for the task. Breaking changes will be noted by increases in the second-level version,
|
28
|
-
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not.
|
28
|
+
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
29
29
|
|
30
30
|
== SAMPLE USAGE
|
31
31
|
|
@@ -65,6 +65,6 @@ RVM users can skip the sudo:
|
|
65
65
|
|
66
66
|
Then use
|
67
67
|
|
68
|
-
require 'iron
|
68
|
+
require 'iron-import'
|
69
69
|
|
70
70
|
to require the library code.
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/iron/import/column.rb
CHANGED
@@ -24,13 +24,14 @@ class Importer
|
|
24
24
|
# # Instead of a type, you can set an explicit parse block. Be aware
|
25
25
|
# # that different source types may give you different raw values for what
|
26
26
|
# # seems like the "same" source value, for example an Excel source file
|
27
|
-
# # will give you a float value for all numeric types, even "integers"
|
27
|
+
# # will give you a float value for all numeric types, even "integers".
|
28
28
|
# parse do |raw_value|
|
29
29
|
# raw_value.to_i + 1000
|
30
30
|
# end
|
31
31
|
#
|
32
32
|
# # You can also add a custom validator to check the value and add
|
33
|
-
# # an error if it's not within a given range, or whatever
|
33
|
+
# # an error if it's not within a given range, or whatever. To fail validation,
|
34
|
+
# # simply raise the error you wish recorded.
|
34
35
|
# validate do |parsed_value|
|
35
36
|
# raise "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
|
36
37
|
# end
|
@@ -83,50 +84,60 @@ class Importer
|
|
83
84
|
str = chars[index] + str
|
84
85
|
str
|
85
86
|
end
|
86
|
-
|
87
|
-
|
87
|
+
|
88
|
+
# Create a new column definition, with the owning sheet, the key for the column,
|
89
|
+
# and an optional set of options. The options supported are the same as those supported
|
90
|
+
# in block/builder mode.
|
91
|
+
def initialize(sheet, key, options_hash = {})
|
88
92
|
# Save off our info
|
89
93
|
@key = key
|
90
94
|
@sheet = sheet
|
91
95
|
@importer = @sheet.importer
|
92
96
|
|
93
97
|
# Return it as a string, by default
|
94
|
-
@type = :string
|
98
|
+
@type = options_hash.delete(:type) { :string }
|
95
99
|
|
96
100
|
# By default, we allow empty values
|
97
|
-
@required = false
|
101
|
+
@required = options_hash.delete(:required) { false }
|
98
102
|
|
99
103
|
# Position can be explicitly set
|
100
|
-
@position =
|
104
|
+
@position = options_hash.delete(:position)
|
101
105
|
|
102
106
|
# By default, don't parse incoming data, just pass it through
|
103
|
-
@parse =
|
107
|
+
@parse = options_hash.delete(:parse)
|
104
108
|
|
105
109
|
# Default matcher, looks for the presence of the column key as text anywhere
|
106
110
|
# in the header string, ignoring case and using underscores as spaces, ie
|
107
111
|
# :order_id => /\A\s*order id\s*\z/i
|
108
|
-
@header =
|
112
|
+
@header = options_hash.delete(:header) {
|
113
|
+
Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
|
114
|
+
}
|
109
115
|
|
110
116
|
# Reset our state to pre-load status
|
111
117
|
reset
|
112
118
|
end
|
113
119
|
|
120
|
+
# Customize ourselves using block syntax
|
114
121
|
def build(&block)
|
115
122
|
DslProxy.exec(self, &block)
|
116
123
|
end
|
117
124
|
|
125
|
+
# Deletes all stored data in prep for an import run
|
118
126
|
def reset
|
119
127
|
@data = Data.new
|
120
128
|
end
|
121
129
|
|
122
|
-
# When true,
|
130
|
+
# When true, our header definition or index match the passed text or column index.
|
123
131
|
def match_header?(text, index)
|
124
|
-
|
125
|
-
|
126
|
-
|
132
|
+
return true if index == self.fixed_index
|
133
|
+
if @header.is_a?(Regexp)
|
134
|
+
return !@header.match(text).nil?
|
135
|
+
else
|
136
|
+
return @header.to_s.downcase == text
|
137
|
+
end
|
127
138
|
end
|
128
139
|
|
129
|
-
#
|
140
|
+
# Applies any custom parser defined to process the given value, capturing
|
130
141
|
# errors as needed
|
131
142
|
def parse_value(row, val)
|
132
143
|
return val if @parse.nil?
|
@@ -138,6 +149,7 @@ class Importer
|
|
138
149
|
end
|
139
150
|
end
|
140
151
|
|
152
|
+
# Applies any validation to a parsed value
|
141
153
|
def validate_value(row, val)
|
142
154
|
return unless @validate
|
143
155
|
begin
|
@@ -149,6 +161,9 @@ class Importer
|
|
149
161
|
end
|
150
162
|
end
|
151
163
|
|
164
|
+
# Returns the fixed index of this column based on the set position.
|
165
|
+
# In other words, a position of 2 would return an index of 1 (as
|
166
|
+
# indicies are 0-based), where a position of 'C' would return 2.
|
152
167
|
def fixed_index
|
153
168
|
return nil unless @position
|
154
169
|
if @position.is_a?(Fixnum)
|
@@ -158,14 +173,20 @@ class Importer
|
|
158
173
|
end
|
159
174
|
end
|
160
175
|
|
176
|
+
# Pretty name for ourselves
|
161
177
|
def to_s
|
162
178
|
'Column ' + @data.pos
|
163
179
|
end
|
164
180
|
|
181
|
+
# Extracts the sheet's values for this column and returns them in an array.
|
182
|
+
# Note that the array indices ARE NOT row indices, as the rows may have been
|
183
|
+
# filtered and any header rows have been skipped.
|
165
184
|
def to_a
|
166
185
|
@sheet.data.rows.collect {|r| r[@key] }
|
167
186
|
end
|
168
187
|
|
188
|
+
# Extracts the sheet's values for this column and returns them in a hash of
|
189
|
+
# row num => value for all non-filtered, non-header rows.
|
169
190
|
def to_h
|
170
191
|
res = {}
|
171
192
|
@sheet.data.rows.collect {|r| res[r.num] = r[@key] }
|
@@ -6,19 +6,34 @@ class Importer
|
|
6
6
|
|
7
7
|
def initialize(importer)
|
8
8
|
super(importer, :csv)
|
9
|
-
|
10
|
-
|
11
|
-
def load_stream(stream)
|
12
|
-
text = stream.read
|
13
|
-
encoding = @importer.encoding || 'UTF-8'
|
14
|
-
raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
|
15
|
-
@importer.default_sheet.parse_raw_data(raw_rows)
|
9
|
+
supports_file!
|
10
|
+
supports_stream!
|
16
11
|
end
|
17
12
|
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
def init_source(mode, source)
|
14
|
+
if mode == :stream
|
15
|
+
# For streams, we just read 'em in and parse 'em
|
16
|
+
text = source.read
|
17
|
+
encoding = @importer.encoding || 'UTF-8'
|
18
|
+
@raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
|
19
|
+
true
|
20
|
+
|
21
|
+
elsif mode == :file
|
22
|
+
# Files have a different path
|
23
|
+
encoding = @importer.encoding || 'UTF-8'
|
24
|
+
@raw_rows = CSV.read(source, :encoding => "#{encoding}:UTF-8")
|
25
|
+
true
|
26
|
+
|
27
|
+
else
|
28
|
+
@importer.add_error("Unsupported CSV mode: #{mode}")
|
29
|
+
false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Normally, we'd check the key and return the proper data, but for CSV files,
|
34
|
+
# there's only one "sheet"
|
35
|
+
def load_raw_sheet(key)
|
36
|
+
@raw_rows
|
22
37
|
end
|
23
38
|
|
24
39
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class Importer
|
2
|
+
|
3
|
+
# Special data reader that allows you to define a block to do the import yourself for cases
|
4
|
+
# where you have an odd text-based format or something else you want to be able to process
|
5
|
+
# using this gem. Check out Importer#on_file and Importer#on_stream to see how to use
|
6
|
+
# this reader type.
|
7
|
+
class CustomReader < DataReader
|
8
|
+
|
9
|
+
attr_accessor :readers
|
10
|
+
|
11
|
+
def initialize(importer)
|
12
|
+
super(importer, :custom)
|
13
|
+
@readers = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by the importer to add a handler for the given mode
|
17
|
+
def set_reader(mode, block)
|
18
|
+
@readers[mode] = block
|
19
|
+
@supports << mode
|
20
|
+
end
|
21
|
+
|
22
|
+
def init_source(mode, source)
|
23
|
+
@mode = mode
|
24
|
+
@source = source
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_raw_sheet(sheet)
|
28
|
+
reader = @readers[@mode]
|
29
|
+
reader.call(@source, sheet)
|
30
|
+
|
31
|
+
rescue Exception => e
|
32
|
+
# Catch any exceptions thrown and note them with helpful stacktrace info for debugging custom readers
|
33
|
+
@importer.add_error("Error in custom reader when loading sheet #{sheet}: #{e} @ #{e.backtrace.first}")
|
34
|
+
false
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -14,6 +14,24 @@ class Importer
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
# Implement our automatic reader selection, based on the import source
|
18
|
+
def self.for_source(importer, source)
|
19
|
+
data = nil
|
20
|
+
if is_stream?(source)
|
21
|
+
data = DataReader::for_stream(importer, source)
|
22
|
+
unless data
|
23
|
+
importer.add_error("Unable to find format handler for stream")
|
24
|
+
end
|
25
|
+
else
|
26
|
+
data = DataReader::for_path(importer, source)
|
27
|
+
unless data
|
28
|
+
importer.add_error("Unable to find format handler for file #{source}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
data
|
32
|
+
end
|
33
|
+
|
34
|
+
# Factory method to build a reader from an explicit format selector
|
17
35
|
def self.for_format(importer, format)
|
18
36
|
case format
|
19
37
|
when :csv
|
@@ -29,6 +47,7 @@ class Importer
|
|
29
47
|
end
|
30
48
|
end
|
31
49
|
|
50
|
+
# Figure out which format to use for a given path based on file name
|
32
51
|
def self.for_path(importer, path)
|
33
52
|
format = path.to_s.extract(/\.(csv|xlsx?)\z/i)
|
34
53
|
if format
|
@@ -39,11 +58,19 @@ class Importer
|
|
39
58
|
end
|
40
59
|
end
|
41
60
|
|
61
|
+
# Figure out which format to use based on a stream's source file info
|
42
62
|
def self.for_stream(importer, stream)
|
43
63
|
path = path_from_stream(stream)
|
44
64
|
for_path(importer, path)
|
45
65
|
end
|
46
66
|
|
67
|
+
# Attempt to determine if the given source is a stream
|
68
|
+
def self.is_stream?(source)
|
69
|
+
# For now, just assume anything that has a #read method is a stream, in
|
70
|
+
# duck-type fashion
|
71
|
+
source.respond_to?(:read)
|
72
|
+
end
|
73
|
+
|
47
74
|
# Try to find the original file name for the given stream,
|
48
75
|
# as in the case where a file is uploaded to Rails and we're dealing with an
|
49
76
|
# ActionDispatch::Http::UploadedFile.
|
@@ -60,16 +87,40 @@ class Importer
|
|
60
87
|
def initialize(importer, format)
|
61
88
|
@importer = importer
|
62
89
|
@format = format
|
63
|
-
@
|
90
|
+
@supports = []
|
64
91
|
end
|
65
92
|
|
93
|
+
def supports_stream!
|
94
|
+
@supports << :stream
|
95
|
+
end
|
96
|
+
|
97
|
+
def supports_file!
|
98
|
+
@supports << :file
|
99
|
+
end
|
100
|
+
|
101
|
+
def supports?(mode)
|
102
|
+
@supports.include?(mode)
|
103
|
+
end
|
104
|
+
|
105
|
+
def supports_file?
|
106
|
+
supports?(:file)
|
107
|
+
end
|
108
|
+
|
109
|
+
def supports_stream?
|
110
|
+
supports?(:stream)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Core data reader method. Takes a given input source (either a stream or
|
114
|
+
# a file path) and attempts to load it. Returns true if successful, false
|
115
|
+
# if not. If false, there will be one or more errors explaining what went
|
116
|
+
# wrong.
|
66
117
|
def load(path_or_stream)
|
67
118
|
# Figure out what we've been passed, and handle it
|
68
|
-
if
|
119
|
+
if self.class.is_stream?(path_or_stream)
|
69
120
|
# We have a stream (open file, upload, whatever)
|
70
|
-
if
|
121
|
+
if supports_stream?
|
71
122
|
# Stream loader defined, run it
|
72
|
-
|
123
|
+
load_sheets(:stream, path_or_stream)
|
73
124
|
else
|
74
125
|
# Write to temp file, as some of our readers only read physical files, annoyingly
|
75
126
|
file = Tempfile.new(['importer', ".#{format}"])
|
@@ -77,7 +128,7 @@ class Importer
|
|
77
128
|
begin
|
78
129
|
file.write path_or_stream.read
|
79
130
|
file.close
|
80
|
-
|
131
|
+
load_sheets(:file, file.path)
|
81
132
|
ensure
|
82
133
|
file.close
|
83
134
|
file.unlink
|
@@ -86,23 +137,58 @@ class Importer
|
|
86
137
|
|
87
138
|
elsif path_or_stream.is_a?(String)
|
88
139
|
# Assume it's a path
|
89
|
-
if
|
90
|
-
|
91
|
-
|
140
|
+
if File.exist?(path_or_stream)
|
141
|
+
if supports_file?
|
142
|
+
# We're all set, load up the given path
|
143
|
+
load_sheets(:file, path_or_stream)
|
144
|
+
else
|
145
|
+
# No file handler, so open the file and run the stream processor
|
146
|
+
file = File.open(path_or_stream, 'rb')
|
147
|
+
load_sheets(:stream, file)
|
148
|
+
end
|
92
149
|
else
|
93
|
-
|
94
|
-
file = File.open(path_or_stream, 'rb')
|
95
|
-
load_stream(file)
|
150
|
+
@importer.add_error("Unable to locate source file #{path_or_stream}")
|
96
151
|
end
|
97
152
|
|
98
153
|
else
|
99
|
-
|
154
|
+
@importer.add_error("Unable to load data source - not a file path or stream: #{path_or_stream.inspect}")
|
100
155
|
end
|
101
156
|
|
102
157
|
# Return our status
|
103
158
|
!@importer.has_errors?
|
104
159
|
end
|
105
160
|
|
161
|
+
# Load up the sheets in the correct mode
|
162
|
+
def load_sheets(mode, source)
|
163
|
+
# Let our derived classes open the file, etc. as they need
|
164
|
+
if init_source(mode, source)
|
165
|
+
# Once the source is set, run through each defined sheet, pass it to
|
166
|
+
# our sheet loader, and have the sheet parse it out.
|
167
|
+
@importer.sheets.values.each do |sheet|
|
168
|
+
res = load_raw_sheet(sheet)
|
169
|
+
if res === false
|
170
|
+
# D'oh.
|
171
|
+
else
|
172
|
+
# Tell the sheet to parse the data
|
173
|
+
sheet.parse_raw_data(res)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Override this method in derived classes to set up
|
180
|
+
# the given source in the given mode
|
181
|
+
def init_source(mode, source)
|
182
|
+
raise "Unimplemented method #init_source in data reader #{self.class.name}"
|
183
|
+
end
|
184
|
+
|
185
|
+
# Override this method in derived classes to take the given sheet definition,
|
186
|
+
# find that sheet in the input source, and read out the raw (unparsed) rows
|
187
|
+
# as an array of arrays. Return false if the sheet cannot be loaded.
|
188
|
+
def load_raw_sheet(sheet)
|
189
|
+
raise "Unimplemented method #load_raw_sheet in data reader #{self.class.name}"
|
190
|
+
end
|
191
|
+
|
106
192
|
# Provides default value parsing/coersion for all derived data readers. Attempts to be clever and
|
107
193
|
# handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
|
108
194
|
# being parsed correctly, add a custom #parse block on your Column definition.
|
data/lib/iron/import/importer.rb
CHANGED
@@ -33,8 +33,9 @@
|
|
33
33
|
class Importer
|
34
34
|
|
35
35
|
# Array of error message or nil for each non-header row
|
36
|
-
attr_accessor :errors, :warnings
|
36
|
+
attr_accessor :errors, :warnings
|
37
37
|
attr_accessor :sheets
|
38
|
+
attr_reader :data, :custom_reader
|
38
39
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
39
40
|
dsl_accessor :encoding
|
40
41
|
|
@@ -51,16 +52,34 @@ class Importer
|
|
51
52
|
reset
|
52
53
|
end
|
53
54
|
|
55
|
+
# Takes a block, and sets self to be importer instance, so you can
|
56
|
+
# just call #column, #sheet, etc. directly.
|
54
57
|
def build(&block)
|
55
58
|
DslProxy.exec(self, &block) if block
|
56
59
|
self
|
57
60
|
end
|
58
61
|
|
59
|
-
|
60
|
-
|
62
|
+
# For the common case where there is only one "sheet", e.g. CSV files.
|
63
|
+
def default_sheet(&block)
|
64
|
+
sheet(1, true, &block)
|
61
65
|
end
|
62
66
|
|
63
|
-
# Access a Sheet definition by id (either number (1-N) or sheet name)
|
67
|
+
# Access a Sheet definition by id (either number (1-N) or sheet name).
|
68
|
+
# Used during #build calls to define a sheet with a passed block, like so:
|
69
|
+
#
|
70
|
+
# Importer.build do
|
71
|
+
# sheet(1) do
|
72
|
+
# column :store_name
|
73
|
+
# column :store_address
|
74
|
+
# end
|
75
|
+
# sheet('Orders') do
|
76
|
+
# column :id
|
77
|
+
# column :price
|
78
|
+
# filter do |row|
|
79
|
+
# row[:price].prensent?
|
80
|
+
# end
|
81
|
+
# end
|
82
|
+
# end
|
64
83
|
def sheet(id, create=true, &block)
|
65
84
|
# Find the sheet, creating it if needed (and requested!)
|
66
85
|
if @sheets[id].nil?
|
@@ -78,18 +97,40 @@ class Importer
|
|
78
97
|
# Return the sheet
|
79
98
|
sheet
|
80
99
|
end
|
100
|
+
|
101
|
+
# Define a custom file reader to implement your own sheet parsing.
|
102
|
+
def on_file(&block)
|
103
|
+
@custom_reader = CustomReader.new(self) unless @custom_reader
|
104
|
+
@custom_reader.set_reader(:file, block)
|
105
|
+
end
|
106
|
+
|
107
|
+
def on_stream(&block)
|
108
|
+
@custom_reader = CustomReader.new(self) unless @custom_reader
|
109
|
+
@custom_reader.set_reader(:stream, block)
|
110
|
+
end
|
81
111
|
|
82
112
|
# Very, very commonly we only want to deal with the default sheet. In this case,
|
83
113
|
# let folks skip the sheet(n) do ... end block wrapper and just define columns
|
84
|
-
# against the main importer. Internally, proxy those calls to the first sheet
|
114
|
+
# against the main importer. Internally, proxy those calls to the first sheet.
|
85
115
|
def column(*args, &block)
|
86
116
|
default_sheet.column(*args, &block)
|
87
117
|
end
|
88
118
|
|
119
|
+
# Ditto for filters
|
89
120
|
def filter(*args, &block)
|
90
121
|
default_sheet.filter(*args, &block)
|
91
122
|
end
|
92
123
|
|
124
|
+
# Ditto for start row too
|
125
|
+
def start_row(row_num)
|
126
|
+
default_sheet.start_row(row_num)
|
127
|
+
end
|
128
|
+
|
129
|
+
# More facading
|
130
|
+
def headerless!
|
131
|
+
default_sheet.headerless!
|
132
|
+
end
|
133
|
+
|
93
134
|
# First call to a freshly #build'd importer, this will read the file/stream/path supplied,
|
94
135
|
# validate the required values, run custom validations... basically pre-parse and
|
95
136
|
# massage the supplied data. It will return true on success, or false if one
|
@@ -113,27 +154,23 @@ class Importer
|
|
113
154
|
reset
|
114
155
|
|
115
156
|
# Get the reader for this format
|
116
|
-
|
117
|
-
|
157
|
+
default = @custom_reader ? :custom : :auto
|
158
|
+
format = options.delete(:format) { default }
|
159
|
+
if format == :custom
|
160
|
+
# Custom format selected, use our internal custom reader
|
161
|
+
@data = @custom_reader
|
162
|
+
|
163
|
+
elsif format && format != :auto
|
164
|
+
# Explicit format requested
|
118
165
|
@data = DataReader::for_format(self, format)
|
119
|
-
unless
|
166
|
+
unless @data
|
120
167
|
add_error("Unable to find format handler for format #{format} - aborting")
|
121
168
|
return
|
122
169
|
end
|
170
|
+
|
123
171
|
else
|
124
|
-
|
125
|
-
|
126
|
-
unless @data
|
127
|
-
add_error("Unable to find format handler for stream - aborting")
|
128
|
-
return
|
129
|
-
end
|
130
|
-
else
|
131
|
-
@data = DataReader::for_path(self, path_or_stream)
|
132
|
-
unless @data
|
133
|
-
add_error("Unable to find format handler for file #{path_or_stream} - aborting")
|
134
|
-
return
|
135
|
-
end
|
136
|
-
end
|
172
|
+
# Auto select
|
173
|
+
@data = DataReader::for_source(self, path_or_stream)
|
137
174
|
end
|
138
175
|
|
139
176
|
# Read in the data!
|
data/lib/iron/import/sheet.rb
CHANGED
@@ -1,7 +1,39 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
3
|
# The Sheet class handles building the sheet's column configuration and other
|
4
|
-
# setup, then holds all load-time row data.
|
4
|
+
# setup, then holds all load-time row data. In some file types (Excel mostly)
|
5
|
+
# there may be more than one sheet definition in a given importer. In others,
|
6
|
+
# the default sheet is the only one (possibly implicitly) defined.
|
7
|
+
#
|
8
|
+
# The following builder options are available:
|
9
|
+
#
|
10
|
+
# Importer.build do
|
11
|
+
# sheet('Some Sheet Name') do
|
12
|
+
# # Don't try to look for a header using column definitions, there is no header
|
13
|
+
# headerless!
|
14
|
+
#
|
15
|
+
# # Manually set the start row for data in this sheet, defaults to nil
|
16
|
+
# # indicating that the data rows start immediatly following the header.
|
17
|
+
# start_row 4
|
18
|
+
#
|
19
|
+
# # Define a filter that will skip unneeded rows. The filter command takes
|
20
|
+
# # a block that receives the parsed (but not validated!) row data as an
|
21
|
+
# # associative hash of :col_key => <parsed value>, and returns
|
22
|
+
# # true to keep the row or false to exclude it.
|
23
|
+
# filter do |row|
|
24
|
+
# row[:id].to_i > 5000
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# # Of course, the main thing to do in a sheet is define columns. See the
|
28
|
+
# # Column class' notes for options when defining a column. Note that
|
29
|
+
# # you can define columns using either hash-style:
|
30
|
+
# column :id, :type => :integer
|
31
|
+
# # or builder-style:
|
32
|
+
# column :name do
|
33
|
+
# header /company\s*name/
|
34
|
+
# type :string
|
35
|
+
# end
|
36
|
+
# end
|
5
37
|
class Sheet
|
6
38
|
|
7
39
|
# Inner class for holding load-time data that gets reset on each load call
|
@@ -37,10 +69,16 @@ class Importer
|
|
37
69
|
reset
|
38
70
|
end
|
39
71
|
|
72
|
+
# Define our columns etc. via builder-style method calling
|
40
73
|
def build(&block)
|
41
74
|
DslProxy.exec(self, &block)
|
42
75
|
end
|
43
76
|
|
77
|
+
# Call with a block accepting a single Importer::Row with contents that
|
78
|
+
# look like :column_key => <parsed value>. Any filtered rows
|
79
|
+
# will not be present. If you want to register an error, simply
|
80
|
+
# raise "some text" and it will be added to the importer's error
|
81
|
+
# list for display to the user, logging, or whatever.
|
44
82
|
def process
|
45
83
|
@data.rows.each do |row|
|
46
84
|
begin
|
@@ -51,13 +89,33 @@ class Importer
|
|
51
89
|
end
|
52
90
|
end
|
53
91
|
|
54
|
-
|
92
|
+
# Add a new column definition to our list, allows customizing the new
|
93
|
+
# column with a builder block. See Importer::Column docs for
|
94
|
+
# options. In lieu of a builder mode, you can pass the same values
|
95
|
+
# as key => value pairs in the options hash to this method, so:
|
96
|
+
#
|
97
|
+
# column(:foo) do
|
98
|
+
# type :string
|
99
|
+
# parse do |val|
|
100
|
+
# val.to_s.upcase
|
101
|
+
# end
|
102
|
+
# end
|
103
|
+
#
|
104
|
+
# Is equivalent to:
|
105
|
+
#
|
106
|
+
# column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
|
107
|
+
#
|
108
|
+
# Use whichever you prefer!
|
109
|
+
def column(key, options_hash = {}, &block)
|
110
|
+
# Find existing column with key to allow re-opening an existing definition
|
55
111
|
col = @columns.detect {|c| c.key == key }
|
56
112
|
unless col
|
57
|
-
|
113
|
+
# if none found, add a new one
|
114
|
+
col = Column.new(self, key, options_hash)
|
58
115
|
@columns << col
|
59
116
|
end
|
60
117
|
|
118
|
+
# Customize if needed
|
61
119
|
DslProxy::exec(col, &block) if block
|
62
120
|
|
63
121
|
col
|
@@ -73,9 +131,9 @@ class Importer
|
|
73
131
|
if parse_header(raw_rows)
|
74
132
|
# Now, run all the data and add it as a Row instance
|
75
133
|
raw_rows.each_with_index do |raw, index|
|
76
|
-
|
77
|
-
if
|
78
|
-
add_row(
|
134
|
+
row_num = index + 1
|
135
|
+
if row_num >= @data.start_row
|
136
|
+
add_row(row_num, raw)
|
79
137
|
end
|
80
138
|
end
|
81
139
|
end
|
@@ -128,8 +186,8 @@ class Importer
|
|
128
186
|
# Use implicit or explicit column position when told to not look for a header
|
129
187
|
next_index = 0
|
130
188
|
@columns.each do |col|
|
131
|
-
|
132
|
-
next_index = col.
|
189
|
+
unless col.position.nil?
|
190
|
+
next_index = col.fixed_index
|
133
191
|
end
|
134
192
|
col.data.index = next_index
|
135
193
|
next_index += 1
|
@@ -140,6 +198,9 @@ class Importer
|
|
140
198
|
else
|
141
199
|
# Match by testing
|
142
200
|
raw_rows.each_with_index do |row, i|
|
201
|
+
# Um, have data?
|
202
|
+
next unless row
|
203
|
+
|
143
204
|
# Set up for this iteration
|
144
205
|
remaining = @columns.dup
|
145
206
|
|
@@ -165,11 +226,13 @@ class Importer
|
|
165
226
|
end
|
166
227
|
end
|
167
228
|
|
229
|
+
# When true, the given sheet name or zero-based index
|
230
|
+
# is a match with our id.
|
168
231
|
def match_sheet?(name, index)
|
169
232
|
if @id.is_a?(Fixnum)
|
170
233
|
@id.to_i == index+1
|
171
234
|
else
|
172
|
-
@id.to_s == name
|
235
|
+
@id.to_s.downcase == name.downcase
|
173
236
|
end
|
174
237
|
end
|
175
238
|
|
@@ -177,6 +240,8 @@ class Importer
|
|
177
240
|
"Sheet #{@id}"
|
178
241
|
end
|
179
242
|
|
243
|
+
# Return all parsed, filtered data in the sheet as an
|
244
|
+
# array of arrays.
|
180
245
|
def dump
|
181
246
|
@data.rows.collect(&:values)
|
182
247
|
end
|
@@ -6,55 +6,41 @@ class Importer
|
|
6
6
|
super(importer, :xlsx)
|
7
7
|
end
|
8
8
|
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
remaining_sheets = @importer.sheets.values
|
14
|
-
spreadsheet.sheets.each_with_index do |name, index|
|
15
|
-
# Look for a sheet definition that matches this sheet's name/index
|
16
|
-
sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
|
17
|
-
if sheet
|
18
|
-
# Remove from our list of remaining sheets
|
19
|
-
remaining_sheets.delete(sheet)
|
20
|
-
# Extract our raw data
|
21
|
-
raw_rows = []
|
22
|
-
spreadsheet.sheet(name).each_with_index do |row, line|
|
23
|
-
raw_rows << row
|
24
|
-
end
|
25
|
-
# Let the sheet sort it out
|
26
|
-
sheet.parse_raw_data(raw_rows)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
return true
|
9
|
+
def init_source(mode, source)
|
10
|
+
if mode == :file
|
11
|
+
@spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
|
12
|
+
true
|
30
13
|
else
|
31
|
-
@importer.add_error("
|
32
|
-
|
14
|
+
@importer.add_error("Unsupported XLS mode: #{mode}")
|
15
|
+
false
|
33
16
|
end
|
34
|
-
|
35
17
|
rescue Exception => e
|
36
|
-
@importer.add_error("Error reading file #{
|
18
|
+
@importer.add_error("Error reading file #{source}: #{e}")
|
37
19
|
false
|
38
20
|
end
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if !@importer.has_errors?
|
49
|
-
raw_rows.each_with_index do |raw, index|
|
50
|
-
line = index + 1
|
51
|
-
if line >= start_row
|
52
|
-
row = sheet.add_row(line, raw)
|
22
|
+
def load_raw_sheet(sheet)
|
23
|
+
@spreadsheet.sheets.each_with_index do |name, index|
|
24
|
+
# See if this sheet's name or index matches the requested sheet definition
|
25
|
+
if sheet.match_sheet?(name, index)
|
26
|
+
# Extract our raw data
|
27
|
+
raw_rows = []
|
28
|
+
@spreadsheet.sheet(name).each_with_index do |row, line|
|
29
|
+
raw_rows << row
|
53
30
|
end
|
31
|
+
return raw_rows
|
54
32
|
end
|
55
33
|
end
|
34
|
+
# This is not good.
|
35
|
+
@importer.add_error("Unable to find sheet #{sheet}")
|
36
|
+
return false
|
37
|
+
|
38
|
+
rescue Exception => e
|
39
|
+
# Not sure why we'd get here, but we strive for error-freedom here, yessir.
|
40
|
+
@importer.add_error("Error loading sheet #{sheet}: #{e}")
|
41
|
+
false
|
56
42
|
end
|
57
|
-
|
43
|
+
|
58
44
|
end
|
59
45
|
|
60
46
|
end
|
@@ -1,58 +1,45 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
|
+
# Uses the Roo gem to read in .xlsx files
|
3
4
|
class XlsxReader < DataReader
|
4
5
|
|
5
6
|
def initialize(importer)
|
6
7
|
super(importer, :xlsx)
|
8
|
+
supports_file!
|
7
9
|
end
|
8
10
|
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
remaining_sheets = @importer.sheets.values
|
14
|
-
spreadsheet.sheets.each_with_index do |name, index|
|
15
|
-
# Look for a sheet definition that matches this sheet's name/index
|
16
|
-
sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
|
17
|
-
if sheet
|
18
|
-
# Remove from our list of remaining sheets
|
19
|
-
remaining_sheets.delete(sheet)
|
20
|
-
# Extract our raw data
|
21
|
-
raw_rows = []
|
22
|
-
spreadsheet.sheet(name).each_with_index do |row, line|
|
23
|
-
raw_rows << row
|
24
|
-
end
|
25
|
-
# Let the sheet sort it out
|
26
|
-
sheet.parse_raw_data(raw_rows)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
return true
|
11
|
+
def init_source(mode, source)
|
12
|
+
if mode == :file
|
13
|
+
@spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
|
14
|
+
true
|
30
15
|
else
|
31
|
-
@importer.add_error("
|
32
|
-
|
16
|
+
@importer.add_error("Unsupported XLSX mode: #{mode}")
|
17
|
+
false
|
33
18
|
end
|
34
|
-
|
35
19
|
rescue Exception => e
|
36
|
-
@importer.add_error("Error reading file #{
|
20
|
+
@importer.add_error("Error reading file #{source}: #{e}")
|
37
21
|
false
|
38
22
|
end
|
39
23
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if !@importer.has_errors?
|
49
|
-
raw_rows.each_with_index do |raw, index|
|
50
|
-
line = index + 1
|
51
|
-
if line >= start_row
|
52
|
-
row = sheet.add_row(line, raw)
|
24
|
+
def load_raw_sheet(sheet)
|
25
|
+
@spreadsheet.sheets.each_with_index do |name, index|
|
26
|
+
# See if this sheet's name or index matches the requested sheet definition
|
27
|
+
if sheet.match_sheet?(name, index)
|
28
|
+
# Extract our raw data
|
29
|
+
raw_rows = []
|
30
|
+
@spreadsheet.sheet(name).each_with_index do |row, line|
|
31
|
+
raw_rows << row
|
53
32
|
end
|
33
|
+
return raw_rows
|
54
34
|
end
|
55
35
|
end
|
36
|
+
@importer.add_error("Unable to find sheet #{sheet}")
|
37
|
+
return false
|
38
|
+
|
39
|
+
rescue Exception => e
|
40
|
+
# Not sure why we'd get here, but we strive for error-freedom here, yessir.
|
41
|
+
@importer.add_error("Error loading sheet #{sheet}: #{e}")
|
42
|
+
false
|
56
43
|
end
|
57
44
|
|
58
45
|
end
|
data/lib/iron/import.rb
CHANGED
data/lib/iron-import.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'iron/import'
|
@@ -0,0 +1,46 @@
|
|
1
|
+
describe Importer::CustomReader do
|
2
|
+
|
3
|
+
before do
|
4
|
+
@importer = Importer.new
|
5
|
+
end
|
6
|
+
|
7
|
+
it 'should set up correctly for on_file handling' do
|
8
|
+
@importer.custom_reader.should be_nil
|
9
|
+
@importer.build do
|
10
|
+
headerless!
|
11
|
+
on_file do |source, sheet|
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
@importer.custom_reader.should be_an(Importer::CustomReader)
|
16
|
+
@importer.custom_reader.should be_supports_file
|
17
|
+
@importer.custom_reader.should_not be_supports_stream
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should load the ICD10 test document' do
|
21
|
+
importer = Importer.build do
|
22
|
+
headerless!
|
23
|
+
column :code do
|
24
|
+
required!
|
25
|
+
end
|
26
|
+
column :desc do
|
27
|
+
required!
|
28
|
+
end
|
29
|
+
|
30
|
+
on_file do |source, sheet|
|
31
|
+
File.readlines(source).collect do |line|
|
32
|
+
line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
importer.import(SpecHelper.sample_path('icd10-custom.txt'))
|
37
|
+
importer.error_summary.should be_nil
|
38
|
+
importer.default_sheet.dump.should == [
|
39
|
+
{:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
|
40
|
+
{:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
|
41
|
+
{:code => 'A009', :desc => 'Cholera, unspecified'},
|
42
|
+
{:code => 'A0100', :desc => 'Typhoid fever, unspecified'}
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -87,7 +87,7 @@ describe Importer::DataReader do
|
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'should build an instance based on stream' do
|
90
|
-
Importer::DataReader.for_stream(@importer,
|
90
|
+
Importer::DataReader.for_stream(@importer, double(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
|
91
91
|
end
|
92
92
|
|
93
93
|
end
|
metadata
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.2'
|
20
|
+
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
22
|
version: 1.2.1
|
20
23
|
type: :runtime
|
@@ -22,6 +25,9 @@ dependencies:
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.2'
|
30
|
+
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
32
|
version: 1.2.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
@@ -80,9 +86,11 @@ files:
|
|
80
86
|
- LICENSE
|
81
87
|
- README.rdoc
|
82
88
|
- Version.txt
|
89
|
+
- lib/iron-import.rb
|
83
90
|
- lib/iron/import.rb
|
84
91
|
- lib/iron/import/column.rb
|
85
92
|
- lib/iron/import/csv_reader.rb
|
93
|
+
- lib/iron/import/custom_reader.rb
|
86
94
|
- lib/iron/import/data_reader.rb
|
87
95
|
- lib/iron/import/error.rb
|
88
96
|
- lib/iron/import/importer.rb
|
@@ -92,11 +100,13 @@ files:
|
|
92
100
|
- lib/iron/import/xlsx_reader.rb
|
93
101
|
- spec/importer/column_spec.rb
|
94
102
|
- spec/importer/csv_reader_spec.rb
|
103
|
+
- spec/importer/custom_reader_spec.rb
|
95
104
|
- spec/importer/data_reader_spec.rb
|
96
105
|
- spec/importer/importer_spec.rb
|
97
106
|
- spec/importer/row_spec.rb
|
98
107
|
- spec/importer/sheet_spec.rb
|
99
108
|
- spec/importer/xlsx_reader_spec.rb
|
109
|
+
- spec/samples/icd10-custom.txt
|
100
110
|
- spec/samples/nanodrop.xlsx
|
101
111
|
- spec/samples/simple.csv
|
102
112
|
- spec/samples/test-products.xls
|