iron-import 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 70c4748d780e9854cbd60622563b74d3b7ce2b5c
4
- data.tar.gz: d6503f0f7a08b4c88da5813b3114446baf1fff1a
3
+ metadata.gz: 04d666ea1e0170b0186d75fc8b0ec367a16e528a
4
+ data.tar.gz: 9dad576e17b7d8fc4b523ffe6b4e53c85feae9c8
5
5
  SHA512:
6
- metadata.gz: 488a0e4b2d8ed83914bb2a6c907358ee584c0849f26bf9e64d6cc4bd8c2296997e4bc580f59b3bff4db6fa699a6abf94f5a85cd31c1585f03f728523025529a3
7
- data.tar.gz: 00c6e27cf433423c9c1cc14828c11cd895459b0c12e86aa57ec65b35b049b0b7939dda98edd3359aa4dab8af945b0a17b9ef5b7fc486300edeb8b987b21d65dd
6
+ metadata.gz: e5a31e81381d78c29da480b296a8e9569ed415a32f50610881014de52a7b92c925687d3ee4ba683bf64a50562b26bf5785acf09070017710938d39c52f0087ad
7
+ data.tar.gz: d29901644886a98c617dd215b52edec0e8c011875ae2c0d6724c8f0f03c26bfdb9ea8028a3322c9cf1d16f91d9010950203c73ff0fc0485ce0bfc09ffc53e6f2
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.6.0 / 2015-08-17
2
+
3
+ * Refactored readers to allow for custom format reading
4
+ * Vastly improved internal and user-facing comments
5
+ * Improved error logging, replaced some exceptions with errors
6
+
1
7
  == 0.5.0 / 2015-02-XX
2
8
 
3
9
  * Initial revision
data/README.rdoc CHANGED
@@ -25,7 +25,7 @@ any warnings and errors encountered... well, this is the library for you!
25
25
 
26
26
  IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
27
27
  for the task. Breaking changes will be noted by increases in the second-level version,
28
- ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not.
28
+ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
29
29
 
30
30
  == SAMPLE USAGE
31
31
 
@@ -65,6 +65,6 @@ RVM users can skip the sudo:
65
65
 
66
66
  Then use
67
67
 
68
- require 'iron/import'
68
+ require 'iron-import'
69
69
 
70
70
  to require the library code.
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.6.0
@@ -24,13 +24,14 @@ class Importer
24
24
  # # Instead of a type, you can set an explicit parse block. Be aware
25
25
  # # that different source types may give you different raw values for what
26
26
  # # seems like the "same" source value, for example an Excel source file
27
- # # will give you a float value for all numeric types, even "integers"
27
+ # # will give you a float value for all numeric types, even "integers".
28
28
  # parse do |raw_value|
29
29
  # raw_value.to_i + 1000
30
30
  # end
31
31
  #
32
32
  # # You can also add a custom validator to check the value and add
33
- # # an error if it's not within a given range, or whatever:
33
+ # # an error if it's not within a given range, or whatever. To fail validation,
34
+ # # simply raise the error you wish recorded.
34
35
  # validate do |parsed_value|
35
36
  # raise "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
36
37
  # end
@@ -83,50 +84,60 @@ class Importer
83
84
  str = chars[index] + str
84
85
  str
85
86
  end
86
-
87
- def initialize(sheet, key)
87
+
88
+ # Create a new column definition, with the owning sheet, the key for the column,
89
+ # and an optional set of options. The options supported are the same as those supported
90
+ # in block/builder mode.
91
+ def initialize(sheet, key, options_hash = {})
88
92
  # Save off our info
89
93
  @key = key
90
94
  @sheet = sheet
91
95
  @importer = @sheet.importer
92
96
 
93
97
  # Return it as a string, by default
94
- @type = :string
98
+ @type = options_hash.delete(:type) { :string }
95
99
 
96
100
  # By default, we allow empty values
97
- @required = false
101
+ @required = options_hash.delete(:required) { false }
98
102
 
99
103
  # Position can be explicitly set
100
- @position = nil
104
+ @position = options_hash.delete(:position)
101
105
 
102
106
  # By default, don't parse incoming data, just pass it through
103
- @parse = nil
107
+ @parse = options_hash.delete(:parse)
104
108
 
105
109
  # Default matcher, looks for the presence of the column key as text anywhere
106
110
  # in the header string, ignoring case and using underscores as spaces, ie
107
111
  # :order_id => /\A\s*order id\s*\z/i
108
- @header = Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
112
+ @header = options_hash.delete(:header) {
113
+ Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
114
+ }
109
115
 
110
116
  # Reset our state to pre-load status
111
117
  reset
112
118
  end
113
119
 
120
+ # Customize ourselves using block syntax
114
121
  def build(&block)
115
122
  DslProxy.exec(self, &block)
116
123
  end
117
124
 
125
+ # Deletes all stored data in prep for an import run
118
126
  def reset
119
127
  @data = Data.new
120
128
  end
121
129
 
122
- # When true, matches either the passed value or the index (if position has been explicitly set)
130
+ # When true, our header definition or index match the passed text or column index.
123
131
  def match_header?(text, index)
124
- res = index == self.fixed_index || (@header && !@header.match(text).nil?)
125
- # puts "#{@header.inspect} ~ #{text.inspect} => #{res.inspect}"
126
- res
132
+ return true if index == self.fixed_index
133
+ if @header.is_a?(Regexp)
134
+ return !@header.match(text).nil?
135
+ else
136
+ return @header.to_s.downcase == text
137
+ end
127
138
  end
128
139
 
129
- # Use any custom parser defined to process the given value, capturing
140
+ # Applies any custom parser defined to process the given value, capturing
130
141
  # errors as needed
131
142
  def parse_value(row, val)
132
143
  return val if @parse.nil?
@@ -138,6 +149,7 @@ class Importer
138
149
  end
139
150
  end
140
151
 
152
+ # Applies any validation to a parsed value
141
153
  def validate_value(row, val)
142
154
  return unless @validate
143
155
  begin
@@ -149,6 +161,9 @@ class Importer
149
161
  end
150
162
  end
151
163
 
164
+ # Returns the fixed index of this column based on the set position.
165
+ # In other words, a position of 2 would return an index of 1 (as
166
+ # indicies are 0-based), where a position of 'C' would return 2.
152
167
  def fixed_index
153
168
  return nil unless @position
154
169
  if @position.is_a?(Fixnum)
@@ -158,14 +173,20 @@ class Importer
158
173
  end
159
174
  end
160
175
 
176
+ # Pretty name for ourselves
161
177
  def to_s
162
178
  'Column ' + @data.pos
163
179
  end
164
180
 
181
+ # Extracts the sheet's values for this column and returns them in an array.
182
+ # Note that the array indices ARE NOT row indices, as the rows may have been
183
+ # filtered and any header rows have been skipped.
165
184
  def to_a
166
185
  @sheet.data.rows.collect {|r| r[@key] }
167
186
  end
168
187
 
188
+ # Extracts the sheet's values for this column and returns them in a hash of
189
+ # row num => value for all non-filtered, non-header rows.
169
190
  def to_h
170
191
  res = {}
171
192
  @sheet.data.rows.collect {|r| res[r.num] = r[@key] }
@@ -6,19 +6,34 @@ class Importer
6
6
 
7
7
  def initialize(importer)
8
8
  super(importer, :csv)
9
- end
10
-
11
- def load_stream(stream)
12
- text = stream.read
13
- encoding = @importer.encoding || 'UTF-8'
14
- raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
15
- @importer.default_sheet.parse_raw_data(raw_rows)
9
+ supports_file!
10
+ supports_stream!
16
11
  end
17
12
 
18
- def load_file(path)
19
- encoding = @importer.encoding || 'UTF-8'
20
- raw_rows = CSV.read(path, :encoding => "#{encoding}:UTF-8")
21
- @importer.default_sheet.parse_raw_data(raw_rows)
13
+ def init_source(mode, source)
14
+ if mode == :stream
15
+ # For streams, we just read 'em in and parse 'em
16
+ text = source.read
17
+ encoding = @importer.encoding || 'UTF-8'
18
+ @raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
19
+ true
20
+
21
+ elsif mode == :file
22
+ # Files have a different path
23
+ encoding = @importer.encoding || 'UTF-8'
24
+ @raw_rows = CSV.read(source, :encoding => "#{encoding}:UTF-8")
25
+ true
26
+
27
+ else
28
+ @importer.add_error("Unsupported CSV mode: #{mode}")
29
+ false
30
+ end
31
+ end
32
+
33
+ # Normally, we'd check the key and return the proper data, but for CSV files,
34
+ # there's only one "sheet"
35
+ def load_raw_sheet(key)
36
+ @raw_rows
22
37
  end
23
38
 
24
39
  end
@@ -0,0 +1,39 @@
1
+ class Importer
2
+
3
+ # Special data reader that allows you to define a block to do the import yourself for cases
4
+ # where you have an odd text-based format or something else you want to be able to process
5
+ # using this gem. Check out Importer#on_file and Importer#on_stream to see how to use
6
+ # this reader type.
7
+ class CustomReader < DataReader
8
+
9
+ attr_accessor :readers
10
+
11
+ def initialize(importer)
12
+ super(importer, :custom)
13
+ @readers = {}
14
+ end
15
+
16
+ # Called by the importer to add a handler for the given mode
17
+ def set_reader(mode, block)
18
+ @readers[mode] = block
19
+ @supports << mode
20
+ end
21
+
22
+ def init_source(mode, source)
23
+ @mode = mode
24
+ @source = source
25
+ end
26
+
27
+ def load_raw_sheet(sheet)
28
+ reader = @readers[@mode]
29
+ reader.call(@source, sheet)
30
+
31
+ rescue Exception => e
32
+ # Catch any exceptions thrown and note them with helpful stacktrace info for debugging custom readers
33
+ @importer.add_error("Error in custom reader when loading sheet #{sheet}: #{e} @ #{e.backtrace.first}")
34
+ false
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -14,6 +14,24 @@ class Importer
14
14
  end
15
15
  end
16
16
 
17
+ # Implement our automatic reader selection, based on the import source
18
+ def self.for_source(importer, source)
19
+ data = nil
20
+ if is_stream?(source)
21
+ data = DataReader::for_stream(importer, source)
22
+ unless data
23
+ importer.add_error("Unable to find format handler for stream")
24
+ end
25
+ else
26
+ data = DataReader::for_path(importer, source)
27
+ unless data
28
+ importer.add_error("Unable to find format handler for file #{source}")
29
+ end
30
+ end
31
+ data
32
+ end
33
+
34
+ # Factory method to build a reader from an explicit format selector
17
35
  def self.for_format(importer, format)
18
36
  case format
19
37
  when :csv
@@ -29,6 +47,7 @@ class Importer
29
47
  end
30
48
  end
31
49
 
50
+ # Figure out which format to use for a given path based on file name
32
51
  def self.for_path(importer, path)
33
52
  format = path.to_s.extract(/\.(csv|xlsx?)\z/i)
34
53
  if format
@@ -39,11 +58,19 @@ class Importer
39
58
  end
40
59
  end
41
60
 
61
+ # Figure out which format to use based on a stream's source file info
42
62
  def self.for_stream(importer, stream)
43
63
  path = path_from_stream(stream)
44
64
  for_path(importer, path)
45
65
  end
46
66
 
67
+ # Attempt to determine if the given source is a stream
68
+ def self.is_stream?(source)
69
+ # For now, just assume anything that has a #read method is a stream, in
70
+ # duck-type fashion
71
+ source.respond_to?(:read)
72
+ end
73
+
47
74
  # Try to find the original file name for the given stream,
48
75
  # as in the case where a file is uploaded to Rails and we're dealing with an
49
76
  # ActionDispatch::Http::UploadedFile.
@@ -60,16 +87,40 @@ class Importer
60
87
  def initialize(importer, format)
61
88
  @importer = importer
62
89
  @format = format
63
- @multisheet = true
90
+ @supports = []
64
91
  end
65
92
 
93
+ def supports_stream!
94
+ @supports << :stream
95
+ end
96
+
97
+ def supports_file!
98
+ @supports << :file
99
+ end
100
+
101
+ def supports?(mode)
102
+ @supports.include?(mode)
103
+ end
104
+
105
+ def supports_file?
106
+ supports?(:file)
107
+ end
108
+
109
+ def supports_stream?
110
+ supports?(:stream)
111
+ end
112
+
113
+ # Core data reader method. Takes a given input source (either a stream or
114
+ # a file path) and attempts to load it. Returns true if successful, false
115
+ # if not. If false, there will be one or more errors explaining what went
116
+ # wrong.
66
117
  def load(path_or_stream)
67
118
  # Figure out what we've been passed, and handle it
68
- if path_or_stream.respond_to?(:read)
119
+ if self.class.is_stream?(path_or_stream)
69
120
  # We have a stream (open file, upload, whatever)
70
- if respond_to?(:load_stream)
121
+ if supports_stream?
71
122
  # Stream loader defined, run it
72
- load_stream(path_or_stream)
123
+ load_sheets(:stream, path_or_stream)
73
124
  else
74
125
  # Write to temp file, as some of our readers only read physical files, annoyingly
75
126
  file = Tempfile.new(['importer', ".#{format}"])
@@ -77,7 +128,7 @@ class Importer
77
128
  begin
78
129
  file.write path_or_stream.read
79
130
  file.close
80
- load_file(file.path)
131
+ load_sheets(:file, file.path)
81
132
  ensure
82
133
  file.close
83
134
  file.unlink
@@ -86,23 +137,58 @@ class Importer
86
137
 
87
138
  elsif path_or_stream.is_a?(String)
88
139
  # Assume it's a path
89
- if respond_to?(:load_file)
90
- # We're all set, load up the given path
91
- load_file(path_or_stream)
140
+ if File.exist?(path_or_stream)
141
+ if supports_file?
142
+ # We're all set, load up the given path
143
+ load_sheets(:file, path_or_stream)
144
+ else
145
+ # No file handler, so open the file and run the stream processor
146
+ file = File.open(path_or_stream, 'rb')
147
+ load_sheets(:stream, file)
148
+ end
92
149
  else
93
- # No file handler, so open the file and run the stream processor
94
- file = File.open(path_or_stream, 'rb')
95
- load_stream(file)
150
+ @importer.add_error("Unable to locate source file #{path_or_stream}")
96
151
  end
97
152
 
98
153
  else
99
- raise "Unable to load data: #{path_or_stream.inspect}"
154
+ @importer.add_error("Unable to load data source - not a file path or stream: #{path_or_stream.inspect}")
100
155
  end
101
156
 
102
157
  # Return our status
103
158
  !@importer.has_errors?
104
159
  end
105
160
 
161
+ # Load up the sheets in the correct mode
162
+ def load_sheets(mode, source)
163
+ # Let our derived classes open the file, etc. as they need
164
+ if init_source(mode, source)
165
+ # Once the source is set, run through each defined sheet, pass it to
166
+ # our sheet loader, and have the sheet parse it out.
167
+ @importer.sheets.values.each do |sheet|
168
+ res = load_raw_sheet(sheet)
169
+ if res === false
170
+ # D'oh.
171
+ else
172
+ # Tell the sheet to parse the data
173
+ sheet.parse_raw_data(res)
174
+ end
175
+ end
176
+ end
177
+ end
178
+
179
+ # Override this method in derived classes to set up
180
+ # the given source in the given mode
181
+ def init_source(mode, source)
182
+ raise "Unimplemented method #init_source in data reader #{self.class.name}"
183
+ end
184
+
185
+ # Override this method in derived classes to take the given sheet definition,
186
+ # find that sheet in the input source, and read out the raw (unparsed) rows
187
+ # as an array of arrays. Return false if the sheet cannot be loaded.
188
+ def load_raw_sheet(sheet)
189
+ raise "Unimplemented method #load_raw_sheet in data reader #{self.class.name}"
190
+ end
191
+
106
192
  # Provides default value parsing/coersion for all derived data readers. Attempts to be clever and
107
193
  # handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
108
194
  # being parsed correctly, add a custom #parse block on your Column definition.
@@ -33,8 +33,9 @@
33
33
  class Importer
34
34
 
35
35
  # Array of error message or nil for each non-header row
36
- attr_accessor :errors, :warnings, :data
36
+ attr_accessor :errors, :warnings
37
37
  attr_accessor :sheets
38
+ attr_reader :data, :custom_reader
38
39
  # Source file/stream encoding, assumes UTF-8 if none specified
39
40
  dsl_accessor :encoding
40
41
 
@@ -51,16 +52,34 @@ class Importer
51
52
  reset
52
53
  end
53
54
 
55
+ # Takes a block, and sets self to be importer instance, so you can
56
+ # just call #column, #sheet, etc. directly.
54
57
  def build(&block)
55
58
  DslProxy.exec(self, &block) if block
56
59
  self
57
60
  end
58
61
 
59
- def default_sheet
60
- sheet(1)
62
+ # For the common case where there is only one "sheet", e.g. CSV files.
63
+ def default_sheet(&block)
64
+ sheet(1, true, &block)
61
65
  end
62
66
 
63
- # Access a Sheet definition by id (either number (1-N) or sheet name)
67
+ # Access a Sheet definition by id (either number (1-N) or sheet name).
68
+ # Used during #build calls to define a sheet with a passed block, like so:
69
+ #
70
+ # Importer.build do
71
+ # sheet(1) do
72
+ # column :store_name
73
+ # column :store_address
74
+ # end
75
+ # sheet('Orders') do
76
+ # column :id
77
+ # column :price
78
+ # filter do |row|
79
+ # row[:price].prensent?
80
+ # end
81
+ # end
82
+ # end
64
83
  def sheet(id, create=true, &block)
65
84
  # Find the sheet, creating it if needed (and requested!)
66
85
  if @sheets[id].nil?
@@ -78,18 +97,40 @@ class Importer
78
97
  # Return the sheet
79
98
  sheet
80
99
  end
100
+
101
+ # Define a custom file reader to implement your own sheet parsing.
102
+ def on_file(&block)
103
+ @custom_reader = CustomReader.new(self) unless @custom_reader
104
+ @custom_reader.set_reader(:file, block)
105
+ end
106
+
107
+ def on_stream(&block)
108
+ @custom_reader = CustomReader.new(self) unless @custom_reader
109
+ @custom_reader.set_reader(:stream, block)
110
+ end
81
111
 
82
112
  # Very, very commonly we only want to deal with the default sheet. In this case,
83
113
  # let folks skip the sheet(n) do ... end block wrapper and just define columns
84
- # against the main importer. Internally, proxy those calls to the first sheet
114
+ # against the main importer. Internally, proxy those calls to the first sheet.
85
115
  def column(*args, &block)
86
116
  default_sheet.column(*args, &block)
87
117
  end
88
118
 
119
+ # Ditto for filters
89
120
  def filter(*args, &block)
90
121
  default_sheet.filter(*args, &block)
91
122
  end
92
123
 
124
+ # Ditto for start row too
125
+ def start_row(row_num)
126
+ default_sheet.start_row(row_num)
127
+ end
128
+
129
+ # More facading
130
+ def headerless!
131
+ default_sheet.headerless!
132
+ end
133
+
93
134
  # First call to a freshly #build'd importer, this will read the file/stream/path supplied,
94
135
  # validate the required values, run custom validations... basically pre-parse and
95
136
  # massage the supplied data. It will return true on success, or false if one
@@ -113,27 +154,23 @@ class Importer
113
154
  reset
114
155
 
115
156
  # Get the reader for this format
116
- format = options.delete(:format)
117
- if format && format != :auto
157
+ default = @custom_reader ? :custom : :auto
158
+ format = options.delete(:format) { default }
159
+ if format == :custom
160
+ # Custom format selected, use our internal custom reader
161
+ @data = @custom_reader
162
+
163
+ elsif format && format != :auto
164
+ # Explicit format requested
118
165
  @data = DataReader::for_format(self, format)
119
- unless reader
166
+ unless @data
120
167
  add_error("Unable to find format handler for format #{format} - aborting")
121
168
  return
122
169
  end
170
+
123
171
  else
124
- if path_or_stream.respond_to?(:read)
125
- @data = DataReader::for_stream(self, path_or_stream)
126
- unless @data
127
- add_error("Unable to find format handler for stream - aborting")
128
- return
129
- end
130
- else
131
- @data = DataReader::for_path(self, path_or_stream)
132
- unless @data
133
- add_error("Unable to find format handler for file #{path_or_stream} - aborting")
134
- return
135
- end
136
- end
172
+ # Auto select
173
+ @data = DataReader::for_source(self, path_or_stream)
137
174
  end
138
175
 
139
176
  # Read in the data!
@@ -1,7 +1,39 @@
1
1
  class Importer
2
2
 
3
3
  # The Sheet class handles building the sheet's column configuration and other
4
- # setup, then holds all load-time row data.
4
+ # setup, then holds all load-time row data. In some file types (Excel mostly)
5
+ # there may be more than one sheet definition in a given importer. In others,
6
+ # the default sheet is the only one (possibly implicitly) defined.
7
+ #
8
+ # The following builder options are available:
9
+ #
10
+ # Importer.build do
11
+ # sheet('Some Sheet Name') do
12
+ # # Don't try to look for a header using column definitions, there is no header
13
+ # headerless!
14
+ #
15
+ # # Manually set the start row for data in this sheet, defaults to nil
16
+ # # indicating that the data rows start immediatly following the header.
17
+ # start_row 4
18
+ #
19
+ # # Define a filter that will skip unneeded rows. The filter command takes
20
+ # # a block that receives the parsed (but not validated!) row data as an
21
+ # # associative hash of :col_key => <parsed value>, and returns
22
+ # # true to keep the row or false to exclude it.
23
+ # filter do |row|
24
+ # row[:id].to_i > 5000
25
+ # end
26
+ #
27
+ # # Of course, the main thing to do in a sheet is define columns. See the
28
+ # # Column class' notes for options when defining a column. Note that
29
+ # # you can define columns using either hash-style:
30
+ # column :id, :type => :integer
31
+ # # or builder-style:
32
+ # column :name do
33
+ # header /company\s*name/
34
+ # type :string
35
+ # end
36
+ # end
5
37
  class Sheet
6
38
 
7
39
  # Inner class for holding load-time data that gets reset on each load call
@@ -37,10 +69,16 @@ class Importer
37
69
  reset
38
70
  end
39
71
 
72
+ # Define our columns etc. via builder-style method calling
40
73
  def build(&block)
41
74
  DslProxy.exec(self, &block)
42
75
  end
43
76
 
77
+ # Call with a block accepting a single Importer::Row with contents that
78
+ # look like :column_key => <parsed value>. Any filtered rows
79
+ # will not be present. If you want to register an error, simply
80
+ # raise "some text" and it will be added to the importer's error
81
+ # list for display to the user, logging, or whatever.
44
82
  def process
45
83
  @data.rows.each do |row|
46
84
  begin
@@ -51,13 +89,33 @@ class Importer
51
89
  end
52
90
  end
53
91
 
54
- def column(key, &block)
92
+ # Add a new column definition to our list, allows customizing the new
93
+ # column with a builder block. See Importer::Column docs for
94
+ # options. In lieu of a builder mode, you can pass the same values
95
+ # as key => value pairs in the options hash to this method, so:
96
+ #
97
+ # column(:foo) do
98
+ # type :string
99
+ # parse do |val|
100
+ # val.to_s.upcase
101
+ # end
102
+ # end
103
+ #
104
+ # Is equivalent to:
105
+ #
106
+ # column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
107
+ #
108
+ # Use whichever you prefer!
109
+ def column(key, options_hash = {}, &block)
110
+ # Find existing column with key to allow re-opening an existing definition
55
111
  col = @columns.detect {|c| c.key == key }
56
112
  unless col
57
- col = Column.new(self, key)
113
+ # if none found, add a new one
114
+ col = Column.new(self, key, options_hash)
58
115
  @columns << col
59
116
  end
60
117
 
118
+ # Customize if needed
61
119
  DslProxy::exec(col, &block) if block
62
120
 
63
121
  col
@@ -73,9 +131,9 @@ class Importer
73
131
  if parse_header(raw_rows)
74
132
  # Now, run all the data and add it as a Row instance
75
133
  raw_rows.each_with_index do |raw, index|
76
- line = index + 1
77
- if line >= @data.start_row
78
- add_row(line, raw)
134
+ row_num = index + 1
135
+ if row_num >= @data.start_row
136
+ add_row(row_num, raw)
79
137
  end
80
138
  end
81
139
  end
@@ -128,8 +186,8 @@ class Importer
128
186
  # Use implicit or explicit column position when told to not look for a header
129
187
  next_index = 0
130
188
  @columns.each do |col|
131
- if col.index.present?
132
- next_index = col.index
189
+ unless col.position.nil?
190
+ next_index = col.fixed_index
133
191
  end
134
192
  col.data.index = next_index
135
193
  next_index += 1
@@ -140,6 +198,9 @@ class Importer
140
198
  else
141
199
  # Match by testing
142
200
  raw_rows.each_with_index do |row, i|
201
+ # Um, have data?
202
+ next unless row
203
+
143
204
  # Set up for this iteration
144
205
  remaining = @columns.dup
145
206
 
@@ -165,11 +226,13 @@ class Importer
165
226
  end
166
227
  end
167
228
 
229
+ # When true, the given sheet name or zero-based index
230
+ # is a match with our id.
168
231
  def match_sheet?(name, index)
169
232
  if @id.is_a?(Fixnum)
170
233
  @id.to_i == index+1
171
234
  else
172
- @id.to_s == name
235
+ @id.to_s.downcase == name.downcase
173
236
  end
174
237
  end
175
238
 
@@ -177,6 +240,8 @@ class Importer
177
240
  "Sheet #{@id}"
178
241
  end
179
242
 
243
+ # Return all parsed, filtered data in the sheet as an
244
+ # array of arrays.
180
245
  def dump
181
246
  @data.rows.collect(&:values)
182
247
  end
@@ -6,55 +6,41 @@ class Importer
6
6
  super(importer, :xlsx)
7
7
  end
8
8
 
9
- def load_file(path)
10
- spreadsheet = Roo::Excel.new(path, :file_warning => :ignore)
11
- if spreadsheet
12
- # Get our list of sheet definitions, and run all the sheets in the spreadsheet
13
- remaining_sheets = @importer.sheets.values
14
- spreadsheet.sheets.each_with_index do |name, index|
15
- # Look for a sheet definition that matches this sheet's name/index
16
- sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
17
- if sheet
18
- # Remove from our list of remaining sheets
19
- remaining_sheets.delete(sheet)
20
- # Extract our raw data
21
- raw_rows = []
22
- spreadsheet.sheet(name).each_with_index do |row, line|
23
- raw_rows << row
24
- end
25
- # Let the sheet sort it out
26
- sheet.parse_raw_data(raw_rows)
27
- end
28
- end
29
- return true
9
+ def init_source(mode, source)
10
+ if mode == :file
11
+ @spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
12
+ true
30
13
  else
31
- @importer.add_error("Unable to read Excel file at path #{path}")
32
- return false
14
+ @importer.add_error("Unsupported XLS mode: #{mode}")
15
+ false
33
16
  end
34
-
35
17
  rescue Exception => e
36
- @importer.add_error("Error reading file #{path}: #{e}")
18
+ @importer.add_error("Error reading file #{source}: #{e}")
37
19
  false
38
20
  end
39
21
 
40
- private
41
-
42
- def load_raw_rows(sheet, raw_rows)
43
- # Figure out where our columns are and where our data starts
44
- column_map = sheet.find_header(raw_rows[0...5])
45
- start_row = sheet.data.start_row
46
-
47
- # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
48
- if !@importer.has_errors?
49
- raw_rows.each_with_index do |raw, index|
50
- line = index + 1
51
- if line >= start_row
52
- row = sheet.add_row(line, raw)
22
+ def load_raw_sheet(sheet)
23
+ @spreadsheet.sheets.each_with_index do |name, index|
24
+ # See if this sheet's name or index matches the requested sheet definition
25
+ if sheet.match_sheet?(name, index)
26
+ # Extract our raw data
27
+ raw_rows = []
28
+ @spreadsheet.sheet(name).each_with_index do |row, line|
29
+ raw_rows << row
53
30
  end
31
+ return raw_rows
54
32
  end
55
33
  end
34
+ # This is not good.
35
+ @importer.add_error("Unable to find sheet #{sheet}")
36
+ return false
37
+
38
+ rescue Exception => e
39
+ # Not sure why we'd get here, but we strive for error-freedom here, yessir.
40
+ @importer.add_error("Error loading sheet #{sheet}: #{e}")
41
+ false
56
42
  end
57
-
43
+
58
44
  end
59
45
 
60
46
  end
@@ -1,58 +1,45 @@
1
1
  class Importer
2
2
 
3
+ # Uses the Roo gem to read in .xlsx files
3
4
  class XlsxReader < DataReader
4
5
 
5
6
  def initialize(importer)
6
7
  super(importer, :xlsx)
8
+ supports_file!
7
9
  end
8
10
 
9
- def load_file(path)
10
- spreadsheet = Roo::Excelx.new(path, :file_warning => :ignore)
11
- if spreadsheet
12
- # Get our list of sheet definitions, and run all the sheets in the spreadsheet
13
- remaining_sheets = @importer.sheets.values
14
- spreadsheet.sheets.each_with_index do |name, index|
15
- # Look for a sheet definition that matches this sheet's name/index
16
- sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
17
- if sheet
18
- # Remove from our list of remaining sheets
19
- remaining_sheets.delete(sheet)
20
- # Extract our raw data
21
- raw_rows = []
22
- spreadsheet.sheet(name).each_with_index do |row, line|
23
- raw_rows << row
24
- end
25
- # Let the sheet sort it out
26
- sheet.parse_raw_data(raw_rows)
27
- end
28
- end
29
- return true
11
+ def init_source(mode, source)
12
+ if mode == :file
13
+ @spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
14
+ true
30
15
  else
31
- @importer.add_error("Unable to read ExcelX file at path #{path}")
32
- return false
16
+ @importer.add_error("Unsupported XLSX mode: #{mode}")
17
+ false
33
18
  end
34
-
35
19
  rescue Exception => e
36
- @importer.add_error("Error reading file #{path}: #{e} @ #{e.backtrace.first}")
20
+ @importer.add_error("Error reading file #{source}: #{e}")
37
21
  false
38
22
  end
39
23
 
40
- private
41
-
42
- def load_raw_rows(sheet, raw_rows)
43
- # Figure out where our columns are and where our data starts
44
- column_map = sheet.find_header(raw_rows[0...5])
45
- start_row = sheet.data.start_row
46
-
47
- # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
48
- if !@importer.has_errors?
49
- raw_rows.each_with_index do |raw, index|
50
- line = index + 1
51
- if line >= start_row
52
- row = sheet.add_row(line, raw)
24
+ def load_raw_sheet(sheet)
25
+ @spreadsheet.sheets.each_with_index do |name, index|
26
+ # See if this sheet's name or index matches the requested sheet definition
27
+ if sheet.match_sheet?(name, index)
28
+ # Extract our raw data
29
+ raw_rows = []
30
+ @spreadsheet.sheet(name).each_with_index do |row, line|
31
+ raw_rows << row
53
32
  end
33
+ return raw_rows
54
34
  end
55
35
  end
36
+ @importer.add_error("Unable to find sheet #{sheet}")
37
+ return false
38
+
39
+ rescue Exception => e
40
+ # Not sure why we'd get here, but we strive for error-freedom here, yessir.
41
+ @importer.add_error("Error loading sheet #{sheet}: #{e}")
42
+ false
56
43
  end
57
44
 
58
45
  end
data/lib/iron/import.rb CHANGED
@@ -11,4 +11,5 @@ require_relative 'import/data_reader'
11
11
  require_relative 'import/csv_reader'
12
12
  require_relative 'import/xls_reader'
13
13
  require_relative 'import/xlsx_reader'
14
+ require_relative 'import/custom_reader'
14
15
  require_relative 'import/importer'
@@ -0,0 +1 @@
1
+ require 'iron/import'
@@ -0,0 +1,46 @@
1
+ describe Importer::CustomReader do
2
+
3
+ before do
4
+ @importer = Importer.new
5
+ end
6
+
7
+ it 'should set up correctly for on_file handling' do
8
+ @importer.custom_reader.should be_nil
9
+ @importer.build do
10
+ headerless!
11
+ on_file do |source, sheet|
12
+ []
13
+ end
14
+ end
15
+ @importer.custom_reader.should be_an(Importer::CustomReader)
16
+ @importer.custom_reader.should be_supports_file
17
+ @importer.custom_reader.should_not be_supports_stream
18
+ end
19
+
20
+ it 'should load the ICD10 test document' do
21
+ importer = Importer.build do
22
+ headerless!
23
+ column :code do
24
+ required!
25
+ end
26
+ column :desc do
27
+ required!
28
+ end
29
+
30
+ on_file do |source, sheet|
31
+ File.readlines(source).collect do |line|
32
+ line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
33
+ end
34
+ end
35
+ end
36
+ importer.import(SpecHelper.sample_path('icd10-custom.txt'))
37
+ importer.error_summary.should be_nil
38
+ importer.default_sheet.dump.should == [
39
+ {:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
40
+ {:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
41
+ {:code => 'A009', :desc => 'Cholera, unspecified'},
42
+ {:code => 'A0100', :desc => 'Typhoid fever, unspecified'}
43
+ ]
44
+ end
45
+
46
+ end
@@ -87,7 +87,7 @@ describe Importer::DataReader do
87
87
  end
88
88
 
89
89
  it 'should build an instance based on stream' do
90
- Importer::DataReader.for_stream(@importer, mock(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
90
+ Importer::DataReader.for_stream(@importer, double(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
91
91
  end
92
92
 
93
93
  end
@@ -0,0 +1,4 @@
1
+ A000 Cholera due to Vibrio cholerae 01, biovar cholerae
2
+ A001 Cholera due to Vibrio cholerae 01, biovar eltor
3
+ A009 Cholera, unspecified
4
+ A0100 Typhoid fever, unspecified
metadata CHANGED
@@ -1,20 +1,23 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-19 00:00:00.000000000 Z
11
+ date: 2015-08-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.2'
20
+ - - ">="
18
21
  - !ruby/object:Gem::Version
19
22
  version: 1.2.1
20
23
  type: :runtime
@@ -22,6 +25,9 @@ dependencies:
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
27
  - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.2'
30
+ - - ">="
25
31
  - !ruby/object:Gem::Version
26
32
  version: 1.2.1
27
33
  - !ruby/object:Gem::Dependency
@@ -80,9 +86,11 @@ files:
80
86
  - LICENSE
81
87
  - README.rdoc
82
88
  - Version.txt
89
+ - lib/iron-import.rb
83
90
  - lib/iron/import.rb
84
91
  - lib/iron/import/column.rb
85
92
  - lib/iron/import/csv_reader.rb
93
+ - lib/iron/import/custom_reader.rb
86
94
  - lib/iron/import/data_reader.rb
87
95
  - lib/iron/import/error.rb
88
96
  - lib/iron/import/importer.rb
@@ -92,11 +100,13 @@ files:
92
100
  - lib/iron/import/xlsx_reader.rb
93
101
  - spec/importer/column_spec.rb
94
102
  - spec/importer/csv_reader_spec.rb
103
+ - spec/importer/custom_reader_spec.rb
95
104
  - spec/importer/data_reader_spec.rb
96
105
  - spec/importer/importer_spec.rb
97
106
  - spec/importer/row_spec.rb
98
107
  - spec/importer/sheet_spec.rb
99
108
  - spec/importer/xlsx_reader_spec.rb
109
+ - spec/samples/icd10-custom.txt
100
110
  - spec/samples/nanodrop.xlsx
101
111
  - spec/samples/simple.csv
102
112
  - spec/samples/test-products.xls