iron-import 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +6 -0
- data/README.rdoc +2 -2
- data/Version.txt +1 -1
- data/lib/iron/import/column.rb +35 -14
- data/lib/iron/import/csv_reader.rb +26 -11
- data/lib/iron/import/custom_reader.rb +39 -0
- data/lib/iron/import/data_reader.rb +98 -12
- data/lib/iron/import/importer.rb +58 -21
- data/lib/iron/import/sheet.rb +74 -9
- data/lib/iron/import/xls_reader.rb +25 -39
- data/lib/iron/import/xlsx_reader.rb +25 -38
- data/lib/iron/import.rb +1 -0
- data/lib/iron-import.rb +1 -0
- data/spec/importer/custom_reader_spec.rb +46 -0
- data/spec/importer/data_reader_spec.rb +1 -1
- data/spec/samples/icd10-custom.txt +4 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04d666ea1e0170b0186d75fc8b0ec367a16e528a
|
4
|
+
data.tar.gz: 9dad576e17b7d8fc4b523ffe6b4e53c85feae9c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5a31e81381d78c29da480b296a8e9569ed415a32f50610881014de52a7b92c925687d3ee4ba683bf64a50562b26bf5785acf09070017710938d39c52f0087ad
|
7
|
+
data.tar.gz: d29901644886a98c617dd215b52edec0e8c011875ae2c0d6724c8f0f03c26bfdb9ea8028a3322c9cf1d16f91d9010950203c73ff0fc0485ce0bfc09ffc53e6f2
|
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -25,7 +25,7 @@ any warnings and errors encountered... well, this is the library for you!
|
|
25
25
|
|
26
26
|
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
27
27
|
for the task. Breaking changes will be noted by increases in the second-level version,
|
28
|
-
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not.
|
28
|
+
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
29
29
|
|
30
30
|
== SAMPLE USAGE
|
31
31
|
|
@@ -65,6 +65,6 @@ RVM users can skip the sudo:
|
|
65
65
|
|
66
66
|
Then use
|
67
67
|
|
68
|
-
require 'iron
|
68
|
+
require 'iron-import'
|
69
69
|
|
70
70
|
to require the library code.
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/iron/import/column.rb
CHANGED
@@ -24,13 +24,14 @@ class Importer
|
|
24
24
|
# # Instead of a type, you can set an explicit parse block. Be aware
|
25
25
|
# # that different source types may give you different raw values for what
|
26
26
|
# # seems like the "same" source value, for example an Excel source file
|
27
|
-
# # will give you a float value for all numeric types, even "integers"
|
27
|
+
# # will give you a float value for all numeric types, even "integers".
|
28
28
|
# parse do |raw_value|
|
29
29
|
# raw_value.to_i + 1000
|
30
30
|
# end
|
31
31
|
#
|
32
32
|
# # You can also add a custom validator to check the value and add
|
33
|
-
# # an error if it's not within a given range, or whatever
|
33
|
+
# # an error if it's not within a given range, or whatever. To fail validation,
|
34
|
+
# # simply raise the error you wish recorded.
|
34
35
|
# validate do |parsed_value|
|
35
36
|
# raise "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
|
36
37
|
# end
|
@@ -83,50 +84,60 @@ class Importer
|
|
83
84
|
str = chars[index] + str
|
84
85
|
str
|
85
86
|
end
|
86
|
-
|
87
|
-
|
87
|
+
|
88
|
+
# Create a new column definition, with the owning sheet, the key for the column,
|
89
|
+
# and an optional set of options. The options supported are the same as those supported
|
90
|
+
# in block/builder mode.
|
91
|
+
def initialize(sheet, key, options_hash = {})
|
88
92
|
# Save off our info
|
89
93
|
@key = key
|
90
94
|
@sheet = sheet
|
91
95
|
@importer = @sheet.importer
|
92
96
|
|
93
97
|
# Return it as a string, by default
|
94
|
-
@type = :string
|
98
|
+
@type = options_hash.delete(:type) { :string }
|
95
99
|
|
96
100
|
# By default, we allow empty values
|
97
|
-
@required = false
|
101
|
+
@required = options_hash.delete(:required) { false }
|
98
102
|
|
99
103
|
# Position can be explicitly set
|
100
|
-
@position =
|
104
|
+
@position = options_hash.delete(:position)
|
101
105
|
|
102
106
|
# By default, don't parse incoming data, just pass it through
|
103
|
-
@parse =
|
107
|
+
@parse = options_hash.delete(:parse)
|
104
108
|
|
105
109
|
# Default matcher, looks for the presence of the column key as text anywhere
|
106
110
|
# in the header string, ignoring case and using underscores as spaces, ie
|
107
111
|
# :order_id => /\A\s*order id\s*\z/i
|
108
|
-
@header =
|
112
|
+
@header = options_hash.delete(:header) {
|
113
|
+
Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
|
114
|
+
}
|
109
115
|
|
110
116
|
# Reset our state to pre-load status
|
111
117
|
reset
|
112
118
|
end
|
113
119
|
|
120
|
+
# Customize ourselves using block syntax
|
114
121
|
def build(&block)
|
115
122
|
DslProxy.exec(self, &block)
|
116
123
|
end
|
117
124
|
|
125
|
+
# Deletes all stored data in prep for an import run
|
118
126
|
def reset
|
119
127
|
@data = Data.new
|
120
128
|
end
|
121
129
|
|
122
|
-
# When true,
|
130
|
+
# When true, our header definition or index match the passed text or column index.
|
123
131
|
def match_header?(text, index)
|
124
|
-
|
125
|
-
|
126
|
-
|
132
|
+
return true if index == self.fixed_index
|
133
|
+
if @header.is_a?(Regexp)
|
134
|
+
return !@header.match(text).nil?
|
135
|
+
else
|
136
|
+
return @header.to_s.downcase == text
|
137
|
+
end
|
127
138
|
end
|
128
139
|
|
129
|
-
#
|
140
|
+
# Applies any custom parser defined to process the given value, capturing
|
130
141
|
# errors as needed
|
131
142
|
def parse_value(row, val)
|
132
143
|
return val if @parse.nil?
|
@@ -138,6 +149,7 @@ class Importer
|
|
138
149
|
end
|
139
150
|
end
|
140
151
|
|
152
|
+
# Applies any validation to a parsed value
|
141
153
|
def validate_value(row, val)
|
142
154
|
return unless @validate
|
143
155
|
begin
|
@@ -149,6 +161,9 @@ class Importer
|
|
149
161
|
end
|
150
162
|
end
|
151
163
|
|
164
|
+
# Returns the fixed index of this column based on the set position.
|
165
|
+
# In other words, a position of 2 would return an index of 1 (as
|
166
|
+
# indicies are 0-based), where a position of 'C' would return 2.
|
152
167
|
def fixed_index
|
153
168
|
return nil unless @position
|
154
169
|
if @position.is_a?(Fixnum)
|
@@ -158,14 +173,20 @@ class Importer
|
|
158
173
|
end
|
159
174
|
end
|
160
175
|
|
176
|
+
# Pretty name for ourselves
|
161
177
|
def to_s
|
162
178
|
'Column ' + @data.pos
|
163
179
|
end
|
164
180
|
|
181
|
+
# Extracts the sheet's values for this column and returns them in an array.
|
182
|
+
# Note that the array indices ARE NOT row indices, as the rows may have been
|
183
|
+
# filtered and any header rows have been skipped.
|
165
184
|
def to_a
|
166
185
|
@sheet.data.rows.collect {|r| r[@key] }
|
167
186
|
end
|
168
187
|
|
188
|
+
# Extracts the sheet's values for this column and returns them in a hash of
|
189
|
+
# row num => value for all non-filtered, non-header rows.
|
169
190
|
def to_h
|
170
191
|
res = {}
|
171
192
|
@sheet.data.rows.collect {|r| res[r.num] = r[@key] }
|
@@ -6,19 +6,34 @@ class Importer
|
|
6
6
|
|
7
7
|
def initialize(importer)
|
8
8
|
super(importer, :csv)
|
9
|
-
|
10
|
-
|
11
|
-
def load_stream(stream)
|
12
|
-
text = stream.read
|
13
|
-
encoding = @importer.encoding || 'UTF-8'
|
14
|
-
raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
|
15
|
-
@importer.default_sheet.parse_raw_data(raw_rows)
|
9
|
+
supports_file!
|
10
|
+
supports_stream!
|
16
11
|
end
|
17
12
|
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
def init_source(mode, source)
|
14
|
+
if mode == :stream
|
15
|
+
# For streams, we just read 'em in and parse 'em
|
16
|
+
text = source.read
|
17
|
+
encoding = @importer.encoding || 'UTF-8'
|
18
|
+
@raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
|
19
|
+
true
|
20
|
+
|
21
|
+
elsif mode == :file
|
22
|
+
# Files have a different path
|
23
|
+
encoding = @importer.encoding || 'UTF-8'
|
24
|
+
@raw_rows = CSV.read(source, :encoding => "#{encoding}:UTF-8")
|
25
|
+
true
|
26
|
+
|
27
|
+
else
|
28
|
+
@importer.add_error("Unsupported CSV mode: #{mode}")
|
29
|
+
false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Normally, we'd check the key and return the proper data, but for CSV files,
|
34
|
+
# there's only one "sheet"
|
35
|
+
def load_raw_sheet(key)
|
36
|
+
@raw_rows
|
22
37
|
end
|
23
38
|
|
24
39
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class Importer
|
2
|
+
|
3
|
+
# Special data reader that allows you to define a block to do the import yourself for cases
|
4
|
+
# where you have an odd text-based format or something else you want to be able to process
|
5
|
+
# using this gem. Check out Importer#on_file and Importer#on_stream to see how to use
|
6
|
+
# this reader type.
|
7
|
+
class CustomReader < DataReader
|
8
|
+
|
9
|
+
attr_accessor :readers
|
10
|
+
|
11
|
+
def initialize(importer)
|
12
|
+
super(importer, :custom)
|
13
|
+
@readers = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by the importer to add a handler for the given mode
|
17
|
+
def set_reader(mode, block)
|
18
|
+
@readers[mode] = block
|
19
|
+
@supports << mode
|
20
|
+
end
|
21
|
+
|
22
|
+
def init_source(mode, source)
|
23
|
+
@mode = mode
|
24
|
+
@source = source
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_raw_sheet(sheet)
|
28
|
+
reader = @readers[@mode]
|
29
|
+
reader.call(@source, sheet)
|
30
|
+
|
31
|
+
rescue Exception => e
|
32
|
+
# Catch any exceptions thrown and note them with helpful stacktrace info for debugging custom readers
|
33
|
+
@importer.add_error("Error in custom reader when loading sheet #{sheet}: #{e} @ #{e.backtrace.first}")
|
34
|
+
false
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -14,6 +14,24 @@ class Importer
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
# Implement our automatic reader selection, based on the import source
|
18
|
+
def self.for_source(importer, source)
|
19
|
+
data = nil
|
20
|
+
if is_stream?(source)
|
21
|
+
data = DataReader::for_stream(importer, source)
|
22
|
+
unless data
|
23
|
+
importer.add_error("Unable to find format handler for stream")
|
24
|
+
end
|
25
|
+
else
|
26
|
+
data = DataReader::for_path(importer, source)
|
27
|
+
unless data
|
28
|
+
importer.add_error("Unable to find format handler for file #{source}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
data
|
32
|
+
end
|
33
|
+
|
34
|
+
# Factory method to build a reader from an explicit format selector
|
17
35
|
def self.for_format(importer, format)
|
18
36
|
case format
|
19
37
|
when :csv
|
@@ -29,6 +47,7 @@ class Importer
|
|
29
47
|
end
|
30
48
|
end
|
31
49
|
|
50
|
+
# Figure out which format to use for a given path based on file name
|
32
51
|
def self.for_path(importer, path)
|
33
52
|
format = path.to_s.extract(/\.(csv|xlsx?)\z/i)
|
34
53
|
if format
|
@@ -39,11 +58,19 @@ class Importer
|
|
39
58
|
end
|
40
59
|
end
|
41
60
|
|
61
|
+
# Figure out which format to use based on a stream's source file info
|
42
62
|
def self.for_stream(importer, stream)
|
43
63
|
path = path_from_stream(stream)
|
44
64
|
for_path(importer, path)
|
45
65
|
end
|
46
66
|
|
67
|
+
# Attempt to determine if the given source is a stream
|
68
|
+
def self.is_stream?(source)
|
69
|
+
# For now, just assume anything that has a #read method is a stream, in
|
70
|
+
# duck-type fashion
|
71
|
+
source.respond_to?(:read)
|
72
|
+
end
|
73
|
+
|
47
74
|
# Try to find the original file name for the given stream,
|
48
75
|
# as in the case where a file is uploaded to Rails and we're dealing with an
|
49
76
|
# ActionDispatch::Http::UploadedFile.
|
@@ -60,16 +87,40 @@ class Importer
|
|
60
87
|
def initialize(importer, format)
|
61
88
|
@importer = importer
|
62
89
|
@format = format
|
63
|
-
@
|
90
|
+
@supports = []
|
64
91
|
end
|
65
92
|
|
93
|
+
def supports_stream!
|
94
|
+
@supports << :stream
|
95
|
+
end
|
96
|
+
|
97
|
+
def supports_file!
|
98
|
+
@supports << :file
|
99
|
+
end
|
100
|
+
|
101
|
+
def supports?(mode)
|
102
|
+
@supports.include?(mode)
|
103
|
+
end
|
104
|
+
|
105
|
+
def supports_file?
|
106
|
+
supports?(:file)
|
107
|
+
end
|
108
|
+
|
109
|
+
def supports_stream?
|
110
|
+
supports?(:stream)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Core data reader method. Takes a given input source (either a stream or
|
114
|
+
# a file path) and attempts to load it. Returns true if successful, false
|
115
|
+
# if not. If false, there will be one or more errors explaining what went
|
116
|
+
# wrong.
|
66
117
|
def load(path_or_stream)
|
67
118
|
# Figure out what we've been passed, and handle it
|
68
|
-
if
|
119
|
+
if self.class.is_stream?(path_or_stream)
|
69
120
|
# We have a stream (open file, upload, whatever)
|
70
|
-
if
|
121
|
+
if supports_stream?
|
71
122
|
# Stream loader defined, run it
|
72
|
-
|
123
|
+
load_sheets(:stream, path_or_stream)
|
73
124
|
else
|
74
125
|
# Write to temp file, as some of our readers only read physical files, annoyingly
|
75
126
|
file = Tempfile.new(['importer', ".#{format}"])
|
@@ -77,7 +128,7 @@ class Importer
|
|
77
128
|
begin
|
78
129
|
file.write path_or_stream.read
|
79
130
|
file.close
|
80
|
-
|
131
|
+
load_sheets(:file, file.path)
|
81
132
|
ensure
|
82
133
|
file.close
|
83
134
|
file.unlink
|
@@ -86,23 +137,58 @@ class Importer
|
|
86
137
|
|
87
138
|
elsif path_or_stream.is_a?(String)
|
88
139
|
# Assume it's a path
|
89
|
-
if
|
90
|
-
|
91
|
-
|
140
|
+
if File.exist?(path_or_stream)
|
141
|
+
if supports_file?
|
142
|
+
# We're all set, load up the given path
|
143
|
+
load_sheets(:file, path_or_stream)
|
144
|
+
else
|
145
|
+
# No file handler, so open the file and run the stream processor
|
146
|
+
file = File.open(path_or_stream, 'rb')
|
147
|
+
load_sheets(:stream, file)
|
148
|
+
end
|
92
149
|
else
|
93
|
-
|
94
|
-
file = File.open(path_or_stream, 'rb')
|
95
|
-
load_stream(file)
|
150
|
+
@importer.add_error("Unable to locate source file #{path_or_stream}")
|
96
151
|
end
|
97
152
|
|
98
153
|
else
|
99
|
-
|
154
|
+
@importer.add_error("Unable to load data source - not a file path or stream: #{path_or_stream.inspect}")
|
100
155
|
end
|
101
156
|
|
102
157
|
# Return our status
|
103
158
|
!@importer.has_errors?
|
104
159
|
end
|
105
160
|
|
161
|
+
# Load up the sheets in the correct mode
|
162
|
+
def load_sheets(mode, source)
|
163
|
+
# Let our derived classes open the file, etc. as they need
|
164
|
+
if init_source(mode, source)
|
165
|
+
# Once the source is set, run through each defined sheet, pass it to
|
166
|
+
# our sheet loader, and have the sheet parse it out.
|
167
|
+
@importer.sheets.values.each do |sheet|
|
168
|
+
res = load_raw_sheet(sheet)
|
169
|
+
if res === false
|
170
|
+
# D'oh.
|
171
|
+
else
|
172
|
+
# Tell the sheet to parse the data
|
173
|
+
sheet.parse_raw_data(res)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Override this method in derived classes to set up
|
180
|
+
# the given source in the given mode
|
181
|
+
def init_source(mode, source)
|
182
|
+
raise "Unimplemented method #init_source in data reader #{self.class.name}"
|
183
|
+
end
|
184
|
+
|
185
|
+
# Override this method in derived classes to take the given sheet definition,
|
186
|
+
# find that sheet in the input source, and read out the raw (unparsed) rows
|
187
|
+
# as an array of arrays. Return false if the sheet cannot be loaded.
|
188
|
+
def load_raw_sheet(sheet)
|
189
|
+
raise "Unimplemented method #load_raw_sheet in data reader #{self.class.name}"
|
190
|
+
end
|
191
|
+
|
106
192
|
# Provides default value parsing/coersion for all derived data readers. Attempts to be clever and
|
107
193
|
# handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
|
108
194
|
# being parsed correctly, add a custom #parse block on your Column definition.
|
data/lib/iron/import/importer.rb
CHANGED
@@ -33,8 +33,9 @@
|
|
33
33
|
class Importer
|
34
34
|
|
35
35
|
# Array of error message or nil for each non-header row
|
36
|
-
attr_accessor :errors, :warnings
|
36
|
+
attr_accessor :errors, :warnings
|
37
37
|
attr_accessor :sheets
|
38
|
+
attr_reader :data, :custom_reader
|
38
39
|
# Source file/stream encoding, assumes UTF-8 if none specified
|
39
40
|
dsl_accessor :encoding
|
40
41
|
|
@@ -51,16 +52,34 @@ class Importer
|
|
51
52
|
reset
|
52
53
|
end
|
53
54
|
|
55
|
+
# Takes a block, and sets self to be importer instance, so you can
|
56
|
+
# just call #column, #sheet, etc. directly.
|
54
57
|
def build(&block)
|
55
58
|
DslProxy.exec(self, &block) if block
|
56
59
|
self
|
57
60
|
end
|
58
61
|
|
59
|
-
|
60
|
-
|
62
|
+
# For the common case where there is only one "sheet", e.g. CSV files.
|
63
|
+
def default_sheet(&block)
|
64
|
+
sheet(1, true, &block)
|
61
65
|
end
|
62
66
|
|
63
|
-
# Access a Sheet definition by id (either number (1-N) or sheet name)
|
67
|
+
# Access a Sheet definition by id (either number (1-N) or sheet name).
|
68
|
+
# Used during #build calls to define a sheet with a passed block, like so:
|
69
|
+
#
|
70
|
+
# Importer.build do
|
71
|
+
# sheet(1) do
|
72
|
+
# column :store_name
|
73
|
+
# column :store_address
|
74
|
+
# end
|
75
|
+
# sheet('Orders') do
|
76
|
+
# column :id
|
77
|
+
# column :price
|
78
|
+
# filter do |row|
|
79
|
+
# row[:price].prensent?
|
80
|
+
# end
|
81
|
+
# end
|
82
|
+
# end
|
64
83
|
def sheet(id, create=true, &block)
|
65
84
|
# Find the sheet, creating it if needed (and requested!)
|
66
85
|
if @sheets[id].nil?
|
@@ -78,18 +97,40 @@ class Importer
|
|
78
97
|
# Return the sheet
|
79
98
|
sheet
|
80
99
|
end
|
100
|
+
|
101
|
+
# Define a custom file reader to implement your own sheet parsing.
|
102
|
+
def on_file(&block)
|
103
|
+
@custom_reader = CustomReader.new(self) unless @custom_reader
|
104
|
+
@custom_reader.set_reader(:file, block)
|
105
|
+
end
|
106
|
+
|
107
|
+
def on_stream(&block)
|
108
|
+
@custom_reader = CustomReader.new(self) unless @custom_reader
|
109
|
+
@custom_reader.set_reader(:stream, block)
|
110
|
+
end
|
81
111
|
|
82
112
|
# Very, very commonly we only want to deal with the default sheet. In this case,
|
83
113
|
# let folks skip the sheet(n) do ... end block wrapper and just define columns
|
84
|
-
# against the main importer. Internally, proxy those calls to the first sheet
|
114
|
+
# against the main importer. Internally, proxy those calls to the first sheet.
|
85
115
|
def column(*args, &block)
|
86
116
|
default_sheet.column(*args, &block)
|
87
117
|
end
|
88
118
|
|
119
|
+
# Ditto for filters
|
89
120
|
def filter(*args, &block)
|
90
121
|
default_sheet.filter(*args, &block)
|
91
122
|
end
|
92
123
|
|
124
|
+
# Ditto for start row too
|
125
|
+
def start_row(row_num)
|
126
|
+
default_sheet.start_row(row_num)
|
127
|
+
end
|
128
|
+
|
129
|
+
# More facading
|
130
|
+
def headerless!
|
131
|
+
default_sheet.headerless!
|
132
|
+
end
|
133
|
+
|
93
134
|
# First call to a freshly #build'd importer, this will read the file/stream/path supplied,
|
94
135
|
# validate the required values, run custom validations... basically pre-parse and
|
95
136
|
# massage the supplied data. It will return true on success, or false if one
|
@@ -113,27 +154,23 @@ class Importer
|
|
113
154
|
reset
|
114
155
|
|
115
156
|
# Get the reader for this format
|
116
|
-
|
117
|
-
|
157
|
+
default = @custom_reader ? :custom : :auto
|
158
|
+
format = options.delete(:format) { default }
|
159
|
+
if format == :custom
|
160
|
+
# Custom format selected, use our internal custom reader
|
161
|
+
@data = @custom_reader
|
162
|
+
|
163
|
+
elsif format && format != :auto
|
164
|
+
# Explicit format requested
|
118
165
|
@data = DataReader::for_format(self, format)
|
119
|
-
unless
|
166
|
+
unless @data
|
120
167
|
add_error("Unable to find format handler for format #{format} - aborting")
|
121
168
|
return
|
122
169
|
end
|
170
|
+
|
123
171
|
else
|
124
|
-
|
125
|
-
|
126
|
-
unless @data
|
127
|
-
add_error("Unable to find format handler for stream - aborting")
|
128
|
-
return
|
129
|
-
end
|
130
|
-
else
|
131
|
-
@data = DataReader::for_path(self, path_or_stream)
|
132
|
-
unless @data
|
133
|
-
add_error("Unable to find format handler for file #{path_or_stream} - aborting")
|
134
|
-
return
|
135
|
-
end
|
136
|
-
end
|
172
|
+
# Auto select
|
173
|
+
@data = DataReader::for_source(self, path_or_stream)
|
137
174
|
end
|
138
175
|
|
139
176
|
# Read in the data!
|
data/lib/iron/import/sheet.rb
CHANGED
@@ -1,7 +1,39 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
3
|
# The Sheet class handles building the sheet's column configuration and other
|
4
|
-
# setup, then holds all load-time row data.
|
4
|
+
# setup, then holds all load-time row data. In some file types (Excel mostly)
|
5
|
+
# there may be more than one sheet definition in a given importer. In others,
|
6
|
+
# the default sheet is the only one (possibly implicitly) defined.
|
7
|
+
#
|
8
|
+
# The following builder options are available:
|
9
|
+
#
|
10
|
+
# Importer.build do
|
11
|
+
# sheet('Some Sheet Name') do
|
12
|
+
# # Don't try to look for a header using column definitions, there is no header
|
13
|
+
# headerless!
|
14
|
+
#
|
15
|
+
# # Manually set the start row for data in this sheet, defaults to nil
|
16
|
+
# # indicating that the data rows start immediatly following the header.
|
17
|
+
# start_row 4
|
18
|
+
#
|
19
|
+
# # Define a filter that will skip unneeded rows. The filter command takes
|
20
|
+
# # a block that receives the parsed (but not validated!) row data as an
|
21
|
+
# # associative hash of :col_key => <parsed value>, and returns
|
22
|
+
# # true to keep the row or false to exclude it.
|
23
|
+
# filter do |row|
|
24
|
+
# row[:id].to_i > 5000
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# # Of course, the main thing to do in a sheet is define columns. See the
|
28
|
+
# # Column class' notes for options when defining a column. Note that
|
29
|
+
# # you can define columns using either hash-style:
|
30
|
+
# column :id, :type => :integer
|
31
|
+
# # or builder-style:
|
32
|
+
# column :name do
|
33
|
+
# header /company\s*name/
|
34
|
+
# type :string
|
35
|
+
# end
|
36
|
+
# end
|
5
37
|
class Sheet
|
6
38
|
|
7
39
|
# Inner class for holding load-time data that gets reset on each load call
|
@@ -37,10 +69,16 @@ class Importer
|
|
37
69
|
reset
|
38
70
|
end
|
39
71
|
|
72
|
+
# Define our columns etc. via builder-style method calling
|
40
73
|
def build(&block)
|
41
74
|
DslProxy.exec(self, &block)
|
42
75
|
end
|
43
76
|
|
77
|
+
# Call with a block accepting a single Importer::Row with contents that
|
78
|
+
# look like :column_key => <parsed value>. Any filtered rows
|
79
|
+
# will not be present. If you want to register an error, simply
|
80
|
+
# raise "some text" and it will be added to the importer's error
|
81
|
+
# list for display to the user, logging, or whatever.
|
44
82
|
def process
|
45
83
|
@data.rows.each do |row|
|
46
84
|
begin
|
@@ -51,13 +89,33 @@ class Importer
|
|
51
89
|
end
|
52
90
|
end
|
53
91
|
|
54
|
-
|
92
|
+
# Add a new column definition to our list, allows customizing the new
|
93
|
+
# column with a builder block. See Importer::Column docs for
|
94
|
+
# options. In lieu of a builder mode, you can pass the same values
|
95
|
+
# as key => value pairs in the options hash to this method, so:
|
96
|
+
#
|
97
|
+
# column(:foo) do
|
98
|
+
# type :string
|
99
|
+
# parse do |val|
|
100
|
+
# val.to_s.upcase
|
101
|
+
# end
|
102
|
+
# end
|
103
|
+
#
|
104
|
+
# Is equivalent to:
|
105
|
+
#
|
106
|
+
# column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
|
107
|
+
#
|
108
|
+
# Use whichever you prefer!
|
109
|
+
def column(key, options_hash = {}, &block)
|
110
|
+
# Find existing column with key to allow re-opening an existing definition
|
55
111
|
col = @columns.detect {|c| c.key == key }
|
56
112
|
unless col
|
57
|
-
|
113
|
+
# if none found, add a new one
|
114
|
+
col = Column.new(self, key, options_hash)
|
58
115
|
@columns << col
|
59
116
|
end
|
60
117
|
|
118
|
+
# Customize if needed
|
61
119
|
DslProxy::exec(col, &block) if block
|
62
120
|
|
63
121
|
col
|
@@ -73,9 +131,9 @@ class Importer
|
|
73
131
|
if parse_header(raw_rows)
|
74
132
|
# Now, run all the data and add it as a Row instance
|
75
133
|
raw_rows.each_with_index do |raw, index|
|
76
|
-
|
77
|
-
if
|
78
|
-
add_row(
|
134
|
+
row_num = index + 1
|
135
|
+
if row_num >= @data.start_row
|
136
|
+
add_row(row_num, raw)
|
79
137
|
end
|
80
138
|
end
|
81
139
|
end
|
@@ -128,8 +186,8 @@ class Importer
|
|
128
186
|
# Use implicit or explicit column position when told to not look for a header
|
129
187
|
next_index = 0
|
130
188
|
@columns.each do |col|
|
131
|
-
|
132
|
-
next_index = col.
|
189
|
+
unless col.position.nil?
|
190
|
+
next_index = col.fixed_index
|
133
191
|
end
|
134
192
|
col.data.index = next_index
|
135
193
|
next_index += 1
|
@@ -140,6 +198,9 @@ class Importer
|
|
140
198
|
else
|
141
199
|
# Match by testing
|
142
200
|
raw_rows.each_with_index do |row, i|
|
201
|
+
# Um, have data?
|
202
|
+
next unless row
|
203
|
+
|
143
204
|
# Set up for this iteration
|
144
205
|
remaining = @columns.dup
|
145
206
|
|
@@ -165,11 +226,13 @@ class Importer
|
|
165
226
|
end
|
166
227
|
end
|
167
228
|
|
229
|
+
# When true, the given sheet name or zero-based index
|
230
|
+
# is a match with our id.
|
168
231
|
def match_sheet?(name, index)
|
169
232
|
if @id.is_a?(Fixnum)
|
170
233
|
@id.to_i == index+1
|
171
234
|
else
|
172
|
-
@id.to_s == name
|
235
|
+
@id.to_s.downcase == name.downcase
|
173
236
|
end
|
174
237
|
end
|
175
238
|
|
@@ -177,6 +240,8 @@ class Importer
|
|
177
240
|
"Sheet #{@id}"
|
178
241
|
end
|
179
242
|
|
243
|
+
# Return all parsed, filtered data in the sheet as an
|
244
|
+
# array of arrays.
|
180
245
|
def dump
|
181
246
|
@data.rows.collect(&:values)
|
182
247
|
end
|
@@ -6,55 +6,41 @@ class Importer
|
|
6
6
|
super(importer, :xlsx)
|
7
7
|
end
|
8
8
|
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
remaining_sheets = @importer.sheets.values
|
14
|
-
spreadsheet.sheets.each_with_index do |name, index|
|
15
|
-
# Look for a sheet definition that matches this sheet's name/index
|
16
|
-
sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
|
17
|
-
if sheet
|
18
|
-
# Remove from our list of remaining sheets
|
19
|
-
remaining_sheets.delete(sheet)
|
20
|
-
# Extract our raw data
|
21
|
-
raw_rows = []
|
22
|
-
spreadsheet.sheet(name).each_with_index do |row, line|
|
23
|
-
raw_rows << row
|
24
|
-
end
|
25
|
-
# Let the sheet sort it out
|
26
|
-
sheet.parse_raw_data(raw_rows)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
return true
|
9
|
+
def init_source(mode, source)
|
10
|
+
if mode == :file
|
11
|
+
@spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
|
12
|
+
true
|
30
13
|
else
|
31
|
-
@importer.add_error("
|
32
|
-
|
14
|
+
@importer.add_error("Unsupported XLS mode: #{mode}")
|
15
|
+
false
|
33
16
|
end
|
34
|
-
|
35
17
|
rescue Exception => e
|
36
|
-
@importer.add_error("Error reading file #{
|
18
|
+
@importer.add_error("Error reading file #{source}: #{e}")
|
37
19
|
false
|
38
20
|
end
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if !@importer.has_errors?
|
49
|
-
raw_rows.each_with_index do |raw, index|
|
50
|
-
line = index + 1
|
51
|
-
if line >= start_row
|
52
|
-
row = sheet.add_row(line, raw)
|
22
|
+
def load_raw_sheet(sheet)
|
23
|
+
@spreadsheet.sheets.each_with_index do |name, index|
|
24
|
+
# See if this sheet's name or index matches the requested sheet definition
|
25
|
+
if sheet.match_sheet?(name, index)
|
26
|
+
# Extract our raw data
|
27
|
+
raw_rows = []
|
28
|
+
@spreadsheet.sheet(name).each_with_index do |row, line|
|
29
|
+
raw_rows << row
|
53
30
|
end
|
31
|
+
return raw_rows
|
54
32
|
end
|
55
33
|
end
|
34
|
+
# This is not good.
|
35
|
+
@importer.add_error("Unable to find sheet #{sheet}")
|
36
|
+
return false
|
37
|
+
|
38
|
+
rescue Exception => e
|
39
|
+
# Not sure why we'd get here, but we strive for error-freedom here, yessir.
|
40
|
+
@importer.add_error("Error loading sheet #{sheet}: #{e}")
|
41
|
+
false
|
56
42
|
end
|
57
|
-
|
43
|
+
|
58
44
|
end
|
59
45
|
|
60
46
|
end
|
@@ -1,58 +1,45 @@
|
|
1
1
|
class Importer
|
2
2
|
|
3
|
+
# Uses the Roo gem to read in .xlsx files
|
3
4
|
class XlsxReader < DataReader
|
4
5
|
|
5
6
|
def initialize(importer)
|
6
7
|
super(importer, :xlsx)
|
8
|
+
supports_file!
|
7
9
|
end
|
8
10
|
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
remaining_sheets = @importer.sheets.values
|
14
|
-
spreadsheet.sheets.each_with_index do |name, index|
|
15
|
-
# Look for a sheet definition that matches this sheet's name/index
|
16
|
-
sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
|
17
|
-
if sheet
|
18
|
-
# Remove from our list of remaining sheets
|
19
|
-
remaining_sheets.delete(sheet)
|
20
|
-
# Extract our raw data
|
21
|
-
raw_rows = []
|
22
|
-
spreadsheet.sheet(name).each_with_index do |row, line|
|
23
|
-
raw_rows << row
|
24
|
-
end
|
25
|
-
# Let the sheet sort it out
|
26
|
-
sheet.parse_raw_data(raw_rows)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
return true
|
11
|
+
def init_source(mode, source)
|
12
|
+
if mode == :file
|
13
|
+
@spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
|
14
|
+
true
|
30
15
|
else
|
31
|
-
@importer.add_error("
|
32
|
-
|
16
|
+
@importer.add_error("Unsupported XLSX mode: #{mode}")
|
17
|
+
false
|
33
18
|
end
|
34
|
-
|
35
19
|
rescue Exception => e
|
36
|
-
@importer.add_error("Error reading file #{
|
20
|
+
@importer.add_error("Error reading file #{source}: #{e}")
|
37
21
|
false
|
38
22
|
end
|
39
23
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if !@importer.has_errors?
|
49
|
-
raw_rows.each_with_index do |raw, index|
|
50
|
-
line = index + 1
|
51
|
-
if line >= start_row
|
52
|
-
row = sheet.add_row(line, raw)
|
24
|
+
def load_raw_sheet(sheet)
|
25
|
+
@spreadsheet.sheets.each_with_index do |name, index|
|
26
|
+
# See if this sheet's name or index matches the requested sheet definition
|
27
|
+
if sheet.match_sheet?(name, index)
|
28
|
+
# Extract our raw data
|
29
|
+
raw_rows = []
|
30
|
+
@spreadsheet.sheet(name).each_with_index do |row, line|
|
31
|
+
raw_rows << row
|
53
32
|
end
|
33
|
+
return raw_rows
|
54
34
|
end
|
55
35
|
end
|
36
|
+
@importer.add_error("Unable to find sheet #{sheet}")
|
37
|
+
return false
|
38
|
+
|
39
|
+
rescue Exception => e
|
40
|
+
# Not sure why we'd get here, but we strive for error-freedom here, yessir.
|
41
|
+
@importer.add_error("Error loading sheet #{sheet}: #{e}")
|
42
|
+
false
|
56
43
|
end
|
57
44
|
|
58
45
|
end
|
data/lib/iron/import.rb
CHANGED
data/lib/iron-import.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'iron/import'
|
@@ -0,0 +1,46 @@
|
|
1
|
+
describe Importer::CustomReader do
|
2
|
+
|
3
|
+
before do
|
4
|
+
@importer = Importer.new
|
5
|
+
end
|
6
|
+
|
7
|
+
it 'should set up correctly for on_file handling' do
|
8
|
+
@importer.custom_reader.should be_nil
|
9
|
+
@importer.build do
|
10
|
+
headerless!
|
11
|
+
on_file do |source, sheet|
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
@importer.custom_reader.should be_an(Importer::CustomReader)
|
16
|
+
@importer.custom_reader.should be_supports_file
|
17
|
+
@importer.custom_reader.should_not be_supports_stream
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should load the ICD10 test document' do
|
21
|
+
importer = Importer.build do
|
22
|
+
headerless!
|
23
|
+
column :code do
|
24
|
+
required!
|
25
|
+
end
|
26
|
+
column :desc do
|
27
|
+
required!
|
28
|
+
end
|
29
|
+
|
30
|
+
on_file do |source, sheet|
|
31
|
+
File.readlines(source).collect do |line|
|
32
|
+
line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
importer.import(SpecHelper.sample_path('icd10-custom.txt'))
|
37
|
+
importer.error_summary.should be_nil
|
38
|
+
importer.default_sheet.dump.should == [
|
39
|
+
{:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
|
40
|
+
{:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
|
41
|
+
{:code => 'A009', :desc => 'Cholera, unspecified'},
|
42
|
+
{:code => 'A0100', :desc => 'Typhoid fever, unspecified'}
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -87,7 +87,7 @@ describe Importer::DataReader do
|
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'should build an instance based on stream' do
|
90
|
-
Importer::DataReader.for_stream(@importer,
|
90
|
+
Importer::DataReader.for_stream(@importer, double(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
|
91
91
|
end
|
92
92
|
|
93
93
|
end
|
metadata
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.2'
|
20
|
+
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
22
|
version: 1.2.1
|
20
23
|
type: :runtime
|
@@ -22,6 +25,9 @@ dependencies:
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.2'
|
30
|
+
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
32
|
version: 1.2.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
@@ -80,9 +86,11 @@ files:
|
|
80
86
|
- LICENSE
|
81
87
|
- README.rdoc
|
82
88
|
- Version.txt
|
89
|
+
- lib/iron-import.rb
|
83
90
|
- lib/iron/import.rb
|
84
91
|
- lib/iron/import/column.rb
|
85
92
|
- lib/iron/import/csv_reader.rb
|
93
|
+
- lib/iron/import/custom_reader.rb
|
86
94
|
- lib/iron/import/data_reader.rb
|
87
95
|
- lib/iron/import/error.rb
|
88
96
|
- lib/iron/import/importer.rb
|
@@ -92,11 +100,13 @@ files:
|
|
92
100
|
- lib/iron/import/xlsx_reader.rb
|
93
101
|
- spec/importer/column_spec.rb
|
94
102
|
- spec/importer/csv_reader_spec.rb
|
103
|
+
- spec/importer/custom_reader_spec.rb
|
95
104
|
- spec/importer/data_reader_spec.rb
|
96
105
|
- spec/importer/importer_spec.rb
|
97
106
|
- spec/importer/row_spec.rb
|
98
107
|
- spec/importer/sheet_spec.rb
|
99
108
|
- spec/importer/xlsx_reader_spec.rb
|
109
|
+
- spec/samples/icd10-custom.txt
|
100
110
|
- spec/samples/nanodrop.xlsx
|
101
111
|
- spec/samples/simple.csv
|
102
112
|
- spec/samples/test-products.xls
|