iron-import 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 70c4748d780e9854cbd60622563b74d3b7ce2b5c
4
- data.tar.gz: d6503f0f7a08b4c88da5813b3114446baf1fff1a
3
+ metadata.gz: 04d666ea1e0170b0186d75fc8b0ec367a16e528a
4
+ data.tar.gz: 9dad576e17b7d8fc4b523ffe6b4e53c85feae9c8
5
5
  SHA512:
6
- metadata.gz: 488a0e4b2d8ed83914bb2a6c907358ee584c0849f26bf9e64d6cc4bd8c2296997e4bc580f59b3bff4db6fa699a6abf94f5a85cd31c1585f03f728523025529a3
7
- data.tar.gz: 00c6e27cf433423c9c1cc14828c11cd895459b0c12e86aa57ec65b35b049b0b7939dda98edd3359aa4dab8af945b0a17b9ef5b7fc486300edeb8b987b21d65dd
6
+ metadata.gz: e5a31e81381d78c29da480b296a8e9569ed415a32f50610881014de52a7b92c925687d3ee4ba683bf64a50562b26bf5785acf09070017710938d39c52f0087ad
7
+ data.tar.gz: d29901644886a98c617dd215b52edec0e8c011875ae2c0d6724c8f0f03c26bfdb9ea8028a3322c9cf1d16f91d9010950203c73ff0fc0485ce0bfc09ffc53e6f2
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.6.0 / 2015-08-17
2
+
3
+ * Refactored readers to allow for custom format reading
4
+ * Vastly improved internal and user-facing comments
5
+ * Improved error logging, replaced some exceptions with errors
6
+
1
7
  == 0.5.0 / 2015-02-XX
2
8
 
3
9
  * Initial revision
data/README.rdoc CHANGED
@@ -25,7 +25,7 @@ any warnings and errors encountered... well, this is the library for you!
25
25
 
26
26
  IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
27
27
  for the task. Breaking changes will be noted by increases in the second-level version,
28
- ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not.
28
+ ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
29
29
 
30
30
  == SAMPLE USAGE
31
31
 
@@ -65,6 +65,6 @@ RVM users can skip the sudo:
65
65
 
66
66
  Then use
67
67
 
68
- require 'iron/import'
68
+ require 'iron-import'
69
69
 
70
70
  to require the library code.
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.6.0
@@ -24,13 +24,14 @@ class Importer
24
24
  # # Instead of a type, you can set an explicit parse block. Be aware
25
25
  # # that different source types may give you different raw values for what
26
26
  # # seems like the "same" source value, for example an Excel source file
27
- # # will give you a float value for all numeric types, even "integers"
27
+ # # will give you a float value for all numeric types, even "integers".
28
28
  # parse do |raw_value|
29
29
  # raw_value.to_i + 1000
30
30
  # end
31
31
  #
32
32
  # # You can also add a custom validator to check the value and add
33
- # # an error if it's not within a given range, or whatever:
33
+ # # an error if it's not within a given range, or whatever. To fail validation,
34
+ # # simply raise the error you wish recorded.
34
35
  # validate do |parsed_value|
35
36
  # raise "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
36
37
  # end
@@ -83,50 +84,60 @@ class Importer
83
84
  str = chars[index] + str
84
85
  str
85
86
  end
86
-
87
- def initialize(sheet, key)
87
+
88
+ # Create a new column definition, with the owning sheet, the key for the column,
89
+ # and an optional set of options. The options supported are the same as those supported
90
+ # in block/builder mode.
91
+ def initialize(sheet, key, options_hash = {})
88
92
  # Save off our info
89
93
  @key = key
90
94
  @sheet = sheet
91
95
  @importer = @sheet.importer
92
96
 
93
97
  # Return it as a string, by default
94
- @type = :string
98
+ @type = options_hash.delete(:type) { :string }
95
99
 
96
100
  # By default, we allow empty values
97
- @required = false
101
+ @required = options_hash.delete(:required) { false }
98
102
 
99
103
  # Position can be explicitly set
100
- @position = nil
104
+ @position = options_hash.delete(:position)
101
105
 
102
106
  # By default, don't parse incoming data, just pass it through
103
- @parse = nil
107
+ @parse = options_hash.delete(:parse)
104
108
 
105
109
  # Default matcher, looks for the presence of the column key as text anywhere
106
110
  # in the header string, ignoring case and using underscores as spaces, ie
107
111
  # :order_id => /\A\s*order id\s*\z/i
108
- @header = Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
112
+ @header = options_hash.delete(:header) {
113
+ Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
114
+ }
109
115
 
110
116
  # Reset our state to pre-load status
111
117
  reset
112
118
  end
113
119
 
120
+ # Customize ourselves using block syntax
114
121
  def build(&block)
115
122
  DslProxy.exec(self, &block)
116
123
  end
117
124
 
125
+ # Deletes all stored data in prep for an import run
118
126
  def reset
119
127
  @data = Data.new
120
128
  end
121
129
 
122
- # When true, matches either the passed value or the index (if position has been explicitly set)
130
+ # When true, our header definition or index match the passed text or column index.
123
131
  def match_header?(text, index)
124
- res = index == self.fixed_index || (@header && !@header.match(text).nil?)
125
- # puts "#{@header.inspect} ~ #{text.inspect} => #{res.inspect}"
126
- res
132
+ return true if index == self.fixed_index
133
+ if @header.is_a?(Regexp)
134
+ return !@header.match(text).nil?
135
+ else
136
+ return @header.to_s.downcase == text
137
+ end
127
138
  end
128
139
 
129
- # Use any custom parser defined to process the given value, capturing
140
+ # Applies any custom parser defined to process the given value, capturing
130
141
  # errors as needed
131
142
  def parse_value(row, val)
132
143
  return val if @parse.nil?
@@ -138,6 +149,7 @@ class Importer
138
149
  end
139
150
  end
140
151
 
152
+ # Applies any validation to a parsed value
141
153
  def validate_value(row, val)
142
154
  return unless @validate
143
155
  begin
@@ -149,6 +161,9 @@ class Importer
149
161
  end
150
162
  end
151
163
 
164
+ # Returns the fixed index of this column based on the set position.
165
+ # In other words, a position of 2 would return an index of 1 (as
166
+ # indicies are 0-based), where a position of 'C' would return 2.
152
167
  def fixed_index
153
168
  return nil unless @position
154
169
  if @position.is_a?(Fixnum)
@@ -158,14 +173,20 @@ class Importer
158
173
  end
159
174
  end
160
175
 
176
+ # Pretty name for ourselves
161
177
  def to_s
162
178
  'Column ' + @data.pos
163
179
  end
164
180
 
181
+ # Extracts the sheet's values for this column and returns them in an array.
182
+ # Note that the array indices ARE NOT row indices, as the rows may have been
183
+ # filtered and any header rows have been skipped.
165
184
  def to_a
166
185
  @sheet.data.rows.collect {|r| r[@key] }
167
186
  end
168
187
 
188
+ # Extracts the sheet's values for this column and returns them in a hash of
189
+ # row num => value for all non-filtered, non-header rows.
169
190
  def to_h
170
191
  res = {}
171
192
  @sheet.data.rows.collect {|r| res[r.num] = r[@key] }
@@ -6,19 +6,34 @@ class Importer
6
6
 
7
7
  def initialize(importer)
8
8
  super(importer, :csv)
9
- end
10
-
11
- def load_stream(stream)
12
- text = stream.read
13
- encoding = @importer.encoding || 'UTF-8'
14
- raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
15
- @importer.default_sheet.parse_raw_data(raw_rows)
9
+ supports_file!
10
+ supports_stream!
16
11
  end
17
12
 
18
- def load_file(path)
19
- encoding = @importer.encoding || 'UTF-8'
20
- raw_rows = CSV.read(path, :encoding => "#{encoding}:UTF-8")
21
- @importer.default_sheet.parse_raw_data(raw_rows)
13
+ def init_source(mode, source)
14
+ if mode == :stream
15
+ # For streams, we just read 'em in and parse 'em
16
+ text = source.read
17
+ encoding = @importer.encoding || 'UTF-8'
18
+ @raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
19
+ true
20
+
21
+ elsif mode == :file
22
+ # Files have a different path
23
+ encoding = @importer.encoding || 'UTF-8'
24
+ @raw_rows = CSV.read(source, :encoding => "#{encoding}:UTF-8")
25
+ true
26
+
27
+ else
28
+ @importer.add_error("Unsupported CSV mode: #{mode}")
29
+ false
30
+ end
31
+ end
32
+
33
+ # Normally, we'd check the key and return the proper data, but for CSV files,
34
+ # there's only one "sheet"
35
+ def load_raw_sheet(key)
36
+ @raw_rows
22
37
  end
23
38
 
24
39
  end
@@ -0,0 +1,39 @@
1
+ class Importer
2
+
3
+ # Special data reader that allows you to define a block to do the import yourself for cases
4
+ # where you have an odd text-based format or something else you want to be able to process
5
+ # using this gem. Check out Importer#on_file and Importer#on_stream to see how to use
6
+ # this reader type.
7
+ class CustomReader < DataReader
8
+
9
+ attr_accessor :readers
10
+
11
+ def initialize(importer)
12
+ super(importer, :custom)
13
+ @readers = {}
14
+ end
15
+
16
+ # Called by the importer to add a handler for the given mode
17
+ def set_reader(mode, block)
18
+ @readers[mode] = block
19
+ @supports << mode
20
+ end
21
+
22
+ def init_source(mode, source)
23
+ @mode = mode
24
+ @source = source
25
+ end
26
+
27
+ def load_raw_sheet(sheet)
28
+ reader = @readers[@mode]
29
+ reader.call(@source, sheet)
30
+
31
+ rescue Exception => e
32
+ # Catch any exceptions thrown and note them with helpful stacktrace info for debugging custom readers
33
+ @importer.add_error("Error in custom reader when loading sheet #{sheet}: #{e} @ #{e.backtrace.first}")
34
+ false
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -14,6 +14,24 @@ class Importer
14
14
  end
15
15
  end
16
16
 
17
+ # Implement our automatic reader selection, based on the import source
18
+ def self.for_source(importer, source)
19
+ data = nil
20
+ if is_stream?(source)
21
+ data = DataReader::for_stream(importer, source)
22
+ unless data
23
+ importer.add_error("Unable to find format handler for stream")
24
+ end
25
+ else
26
+ data = DataReader::for_path(importer, source)
27
+ unless data
28
+ importer.add_error("Unable to find format handler for file #{source}")
29
+ end
30
+ end
31
+ data
32
+ end
33
+
34
+ # Factory method to build a reader from an explicit format selector
17
35
  def self.for_format(importer, format)
18
36
  case format
19
37
  when :csv
@@ -29,6 +47,7 @@ class Importer
29
47
  end
30
48
  end
31
49
 
50
+ # Figure out which format to use for a given path based on file name
32
51
  def self.for_path(importer, path)
33
52
  format = path.to_s.extract(/\.(csv|xlsx?)\z/i)
34
53
  if format
@@ -39,11 +58,19 @@ class Importer
39
58
  end
40
59
  end
41
60
 
61
+ # Figure out which format to use based on a stream's source file info
42
62
  def self.for_stream(importer, stream)
43
63
  path = path_from_stream(stream)
44
64
  for_path(importer, path)
45
65
  end
46
66
 
67
+ # Attempt to determine if the given source is a stream
68
+ def self.is_stream?(source)
69
+ # For now, just assume anything that has a #read method is a stream, in
70
+ # duck-type fashion
71
+ source.respond_to?(:read)
72
+ end
73
+
47
74
  # Try to find the original file name for the given stream,
48
75
  # as in the case where a file is uploaded to Rails and we're dealing with an
49
76
  # ActionDispatch::Http::UploadedFile.
@@ -60,16 +87,40 @@ class Importer
60
87
  def initialize(importer, format)
61
88
  @importer = importer
62
89
  @format = format
63
- @multisheet = true
90
+ @supports = []
64
91
  end
65
92
 
93
+ def supports_stream!
94
+ @supports << :stream
95
+ end
96
+
97
+ def supports_file!
98
+ @supports << :file
99
+ end
100
+
101
+ def supports?(mode)
102
+ @supports.include?(mode)
103
+ end
104
+
105
+ def supports_file?
106
+ supports?(:file)
107
+ end
108
+
109
+ def supports_stream?
110
+ supports?(:stream)
111
+ end
112
+
113
+ # Core data reader method. Takes a given input source (either a stream or
114
+ # a file path) and attempts to load it. Returns true if successful, false
115
+ # if not. If false, there will be one or more errors explaining what went
116
+ # wrong.
66
117
  def load(path_or_stream)
67
118
  # Figure out what we've been passed, and handle it
68
- if path_or_stream.respond_to?(:read)
119
+ if self.class.is_stream?(path_or_stream)
69
120
  # We have a stream (open file, upload, whatever)
70
- if respond_to?(:load_stream)
121
+ if supports_stream?
71
122
  # Stream loader defined, run it
72
- load_stream(path_or_stream)
123
+ load_sheets(:stream, path_or_stream)
73
124
  else
74
125
  # Write to temp file, as some of our readers only read physical files, annoyingly
75
126
  file = Tempfile.new(['importer', ".#{format}"])
@@ -77,7 +128,7 @@ class Importer
77
128
  begin
78
129
  file.write path_or_stream.read
79
130
  file.close
80
- load_file(file.path)
131
+ load_sheets(:file, file.path)
81
132
  ensure
82
133
  file.close
83
134
  file.unlink
@@ -86,23 +137,58 @@ class Importer
86
137
 
87
138
  elsif path_or_stream.is_a?(String)
88
139
  # Assume it's a path
89
- if respond_to?(:load_file)
90
- # We're all set, load up the given path
91
- load_file(path_or_stream)
140
+ if File.exist?(path_or_stream)
141
+ if supports_file?
142
+ # We're all set, load up the given path
143
+ load_sheets(:file, path_or_stream)
144
+ else
145
+ # No file handler, so open the file and run the stream processor
146
+ file = File.open(path_or_stream, 'rb')
147
+ load_sheets(:stream, file)
148
+ end
92
149
  else
93
- # No file handler, so open the file and run the stream processor
94
- file = File.open(path_or_stream, 'rb')
95
- load_stream(file)
150
+ @importer.add_error("Unable to locate source file #{path_or_stream}")
96
151
  end
97
152
 
98
153
  else
99
- raise "Unable to load data: #{path_or_stream.inspect}"
154
+ @importer.add_error("Unable to load data source - not a file path or stream: #{path_or_stream.inspect}")
100
155
  end
101
156
 
102
157
  # Return our status
103
158
  !@importer.has_errors?
104
159
  end
105
160
 
161
+ # Load up the sheets in the correct mode
162
+ def load_sheets(mode, source)
163
+ # Let our derived classes open the file, etc. as they need
164
+ if init_source(mode, source)
165
+ # Once the source is set, run through each defined sheet, pass it to
166
+ # our sheet loader, and have the sheet parse it out.
167
+ @importer.sheets.values.each do |sheet|
168
+ res = load_raw_sheet(sheet)
169
+ if res === false
170
+ # D'oh.
171
+ else
172
+ # Tell the sheet to parse the data
173
+ sheet.parse_raw_data(res)
174
+ end
175
+ end
176
+ end
177
+ end
178
+
179
+ # Override this method in derived classes to set up
180
+ # the given source in the given mode
181
+ def init_source(mode, source)
182
+ raise "Unimplemented method #init_source in data reader #{self.class.name}"
183
+ end
184
+
185
+ # Override this method in derived classes to take the given sheet definition,
186
+ # find that sheet in the input source, and read out the raw (unparsed) rows
187
+ # as an array of arrays. Return false if the sheet cannot be loaded.
188
+ def load_raw_sheet(sheet)
189
+ raise "Unimplemented method #load_raw_sheet in data reader #{self.class.name}"
190
+ end
191
+
106
192
  # Provides default value parsing/coersion for all derived data readers. Attempts to be clever and
107
193
  # handle edge cases like converting '5.00' to 5 when in integer mode, etc. If you find your inputs aren't
108
194
  # being parsed correctly, add a custom #parse block on your Column definition.
@@ -33,8 +33,9 @@
33
33
  class Importer
34
34
 
35
35
  # Array of error message or nil for each non-header row
36
- attr_accessor :errors, :warnings, :data
36
+ attr_accessor :errors, :warnings
37
37
  attr_accessor :sheets
38
+ attr_reader :data, :custom_reader
38
39
  # Source file/stream encoding, assumes UTF-8 if none specified
39
40
  dsl_accessor :encoding
40
41
 
@@ -51,16 +52,34 @@ class Importer
51
52
  reset
52
53
  end
53
54
 
55
+ # Takes a block, and sets self to be importer instance, so you can
56
+ # just call #column, #sheet, etc. directly.
54
57
  def build(&block)
55
58
  DslProxy.exec(self, &block) if block
56
59
  self
57
60
  end
58
61
 
59
- def default_sheet
60
- sheet(1)
62
+ # For the common case where there is only one "sheet", e.g. CSV files.
63
+ def default_sheet(&block)
64
+ sheet(1, true, &block)
61
65
  end
62
66
 
63
- # Access a Sheet definition by id (either number (1-N) or sheet name)
67
+ # Access a Sheet definition by id (either number (1-N) or sheet name).
68
+ # Used during #build calls to define a sheet with a passed block, like so:
69
+ #
70
+ # Importer.build do
71
+ # sheet(1) do
72
+ # column :store_name
73
+ # column :store_address
74
+ # end
75
+ # sheet('Orders') do
76
+ # column :id
77
+ # column :price
78
+ # filter do |row|
79
+ # row[:price].prensent?
80
+ # end
81
+ # end
82
+ # end
64
83
  def sheet(id, create=true, &block)
65
84
  # Find the sheet, creating it if needed (and requested!)
66
85
  if @sheets[id].nil?
@@ -78,18 +97,40 @@ class Importer
78
97
  # Return the sheet
79
98
  sheet
80
99
  end
100
+
101
+ # Define a custom file reader to implement your own sheet parsing.
102
+ def on_file(&block)
103
+ @custom_reader = CustomReader.new(self) unless @custom_reader
104
+ @custom_reader.set_reader(:file, block)
105
+ end
106
+
107
+ def on_stream(&block)
108
+ @custom_reader = CustomReader.new(self) unless @custom_reader
109
+ @custom_reader.set_reader(:stream, block)
110
+ end
81
111
 
82
112
  # Very, very commonly we only want to deal with the default sheet. In this case,
83
113
  # let folks skip the sheet(n) do ... end block wrapper and just define columns
84
- # against the main importer. Internally, proxy those calls to the first sheet
114
+ # against the main importer. Internally, proxy those calls to the first sheet.
85
115
  def column(*args, &block)
86
116
  default_sheet.column(*args, &block)
87
117
  end
88
118
 
119
+ # Ditto for filters
89
120
  def filter(*args, &block)
90
121
  default_sheet.filter(*args, &block)
91
122
  end
92
123
 
124
+ # Ditto for start row too
125
+ def start_row(row_num)
126
+ default_sheet.start_row(row_num)
127
+ end
128
+
129
+ # More facading
130
+ def headerless!
131
+ default_sheet.headerless!
132
+ end
133
+
93
134
  # First call to a freshly #build'd importer, this will read the file/stream/path supplied,
94
135
  # validate the required values, run custom validations... basically pre-parse and
95
136
  # massage the supplied data. It will return true on success, or false if one
@@ -113,27 +154,23 @@ class Importer
113
154
  reset
114
155
 
115
156
  # Get the reader for this format
116
- format = options.delete(:format)
117
- if format && format != :auto
157
+ default = @custom_reader ? :custom : :auto
158
+ format = options.delete(:format) { default }
159
+ if format == :custom
160
+ # Custom format selected, use our internal custom reader
161
+ @data = @custom_reader
162
+
163
+ elsif format && format != :auto
164
+ # Explicit format requested
118
165
  @data = DataReader::for_format(self, format)
119
- unless reader
166
+ unless @data
120
167
  add_error("Unable to find format handler for format #{format} - aborting")
121
168
  return
122
169
  end
170
+
123
171
  else
124
- if path_or_stream.respond_to?(:read)
125
- @data = DataReader::for_stream(self, path_or_stream)
126
- unless @data
127
- add_error("Unable to find format handler for stream - aborting")
128
- return
129
- end
130
- else
131
- @data = DataReader::for_path(self, path_or_stream)
132
- unless @data
133
- add_error("Unable to find format handler for file #{path_or_stream} - aborting")
134
- return
135
- end
136
- end
172
+ # Auto select
173
+ @data = DataReader::for_source(self, path_or_stream)
137
174
  end
138
175
 
139
176
  # Read in the data!
@@ -1,7 +1,39 @@
1
1
  class Importer
2
2
 
3
3
  # The Sheet class handles building the sheet's column configuration and other
4
- # setup, then holds all load-time row data.
4
+ # setup, then holds all load-time row data. In some file types (Excel mostly)
5
+ # there may be more than one sheet definition in a given importer. In others,
6
+ # the default sheet is the only one (possibly implicitly) defined.
7
+ #
8
+ # The following builder options are available:
9
+ #
10
+ # Importer.build do
11
+ # sheet('Some Sheet Name') do
12
+ # # Don't try to look for a header using column definitions, there is no header
13
+ # headerless!
14
+ #
15
+ # # Manually set the start row for data in this sheet, defaults to nil
16
+ # # indicating that the data rows start immediatly following the header.
17
+ # start_row 4
18
+ #
19
+ # # Define a filter that will skip unneeded rows. The filter command takes
20
+ # # a block that receives the parsed (but not validated!) row data as an
21
+ # # associative hash of :col_key => <parsed value>, and returns
22
+ # # true to keep the row or false to exclude it.
23
+ # filter do |row|
24
+ # row[:id].to_i > 5000
25
+ # end
26
+ #
27
+ # # Of course, the main thing to do in a sheet is define columns. See the
28
+ # # Column class' notes for options when defining a column. Note that
29
+ # # you can define columns using either hash-style:
30
+ # column :id, :type => :integer
31
+ # # or builder-style:
32
+ # column :name do
33
+ # header /company\s*name/
34
+ # type :string
35
+ # end
36
+ # end
5
37
  class Sheet
6
38
 
7
39
  # Inner class for holding load-time data that gets reset on each load call
@@ -37,10 +69,16 @@ class Importer
37
69
  reset
38
70
  end
39
71
 
72
+ # Define our columns etc. via builder-style method calling
40
73
  def build(&block)
41
74
  DslProxy.exec(self, &block)
42
75
  end
43
76
 
77
+ # Call with a block accepting a single Importer::Row with contents that
78
+ # look like :column_key => <parsed value>. Any filtered rows
79
+ # will not be present. If you want to register an error, simply
80
+ # raise "some text" and it will be added to the importer's error
81
+ # list for display to the user, logging, or whatever.
44
82
  def process
45
83
  @data.rows.each do |row|
46
84
  begin
@@ -51,13 +89,33 @@ class Importer
51
89
  end
52
90
  end
53
91
 
54
- def column(key, &block)
92
+ # Add a new column definition to our list, allows customizing the new
93
+ # column with a builder block. See Importer::Column docs for
94
+ # options. In lieu of a builder mode, you can pass the same values
95
+ # as key => value pairs in the options hash to this method, so:
96
+ #
97
+ # column(:foo) do
98
+ # type :string
99
+ # parse do |val|
100
+ # val.to_s.upcase
101
+ # end
102
+ # end
103
+ #
104
+ # Is equivalent to:
105
+ #
106
+ # column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
107
+ #
108
+ # Use whichever you prefer!
109
+ def column(key, options_hash = {}, &block)
110
+ # Find existing column with key to allow re-opening an existing definition
55
111
  col = @columns.detect {|c| c.key == key }
56
112
  unless col
57
- col = Column.new(self, key)
113
+ # if none found, add a new one
114
+ col = Column.new(self, key, options_hash)
58
115
  @columns << col
59
116
  end
60
117
 
118
+ # Customize if needed
61
119
  DslProxy::exec(col, &block) if block
62
120
 
63
121
  col
@@ -73,9 +131,9 @@ class Importer
73
131
  if parse_header(raw_rows)
74
132
  # Now, run all the data and add it as a Row instance
75
133
  raw_rows.each_with_index do |raw, index|
76
- line = index + 1
77
- if line >= @data.start_row
78
- add_row(line, raw)
134
+ row_num = index + 1
135
+ if row_num >= @data.start_row
136
+ add_row(row_num, raw)
79
137
  end
80
138
  end
81
139
  end
@@ -128,8 +186,8 @@ class Importer
128
186
  # Use implicit or explicit column position when told to not look for a header
129
187
  next_index = 0
130
188
  @columns.each do |col|
131
- if col.index.present?
132
- next_index = col.index
189
+ unless col.position.nil?
190
+ next_index = col.fixed_index
133
191
  end
134
192
  col.data.index = next_index
135
193
  next_index += 1
@@ -140,6 +198,9 @@ class Importer
140
198
  else
141
199
  # Match by testing
142
200
  raw_rows.each_with_index do |row, i|
201
+ # Um, have data?
202
+ next unless row
203
+
143
204
  # Set up for this iteration
144
205
  remaining = @columns.dup
145
206
 
@@ -165,11 +226,13 @@ class Importer
165
226
  end
166
227
  end
167
228
 
229
+ # When true, the given sheet name or zero-based index
230
+ # is a match with our id.
168
231
  def match_sheet?(name, index)
169
232
  if @id.is_a?(Fixnum)
170
233
  @id.to_i == index+1
171
234
  else
172
- @id.to_s == name
235
+ @id.to_s.downcase == name.downcase
173
236
  end
174
237
  end
175
238
 
@@ -177,6 +240,8 @@ class Importer
177
240
  "Sheet #{@id}"
178
241
  end
179
242
 
243
+ # Return all parsed, filtered data in the sheet as an
244
+ # array of arrays.
180
245
  def dump
181
246
  @data.rows.collect(&:values)
182
247
  end
@@ -6,55 +6,41 @@ class Importer
6
6
  super(importer, :xlsx)
7
7
  end
8
8
 
9
- def load_file(path)
10
- spreadsheet = Roo::Excel.new(path, :file_warning => :ignore)
11
- if spreadsheet
12
- # Get our list of sheet definitions, and run all the sheets in the spreadsheet
13
- remaining_sheets = @importer.sheets.values
14
- spreadsheet.sheets.each_with_index do |name, index|
15
- # Look for a sheet definition that matches this sheet's name/index
16
- sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
17
- if sheet
18
- # Remove from our list of remaining sheets
19
- remaining_sheets.delete(sheet)
20
- # Extract our raw data
21
- raw_rows = []
22
- spreadsheet.sheet(name).each_with_index do |row, line|
23
- raw_rows << row
24
- end
25
- # Let the sheet sort it out
26
- sheet.parse_raw_data(raw_rows)
27
- end
28
- end
29
- return true
9
+ def init_source(mode, source)
10
+ if mode == :file
11
+ @spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
12
+ true
30
13
  else
31
- @importer.add_error("Unable to read Excel file at path #{path}")
32
- return false
14
+ @importer.add_error("Unsupported XLS mode: #{mode}")
15
+ false
33
16
  end
34
-
35
17
  rescue Exception => e
36
- @importer.add_error("Error reading file #{path}: #{e}")
18
+ @importer.add_error("Error reading file #{source}: #{e}")
37
19
  false
38
20
  end
39
21
 
40
- private
41
-
42
- def load_raw_rows(sheet, raw_rows)
43
- # Figure out where our columns are and where our data starts
44
- column_map = sheet.find_header(raw_rows[0...5])
45
- start_row = sheet.data.start_row
46
-
47
- # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
48
- if !@importer.has_errors?
49
- raw_rows.each_with_index do |raw, index|
50
- line = index + 1
51
- if line >= start_row
52
- row = sheet.add_row(line, raw)
22
+ def load_raw_sheet(sheet)
23
+ @spreadsheet.sheets.each_with_index do |name, index|
24
+ # See if this sheet's name or index matches the requested sheet definition
25
+ if sheet.match_sheet?(name, index)
26
+ # Extract our raw data
27
+ raw_rows = []
28
+ @spreadsheet.sheet(name).each_with_index do |row, line|
29
+ raw_rows << row
53
30
  end
31
+ return raw_rows
54
32
  end
55
33
  end
34
+ # This is not good.
35
+ @importer.add_error("Unable to find sheet #{sheet}")
36
+ return false
37
+
38
+ rescue Exception => e
39
+ # Not sure why we'd get here, but we strive for error-freedom here, yessir.
40
+ @importer.add_error("Error loading sheet #{sheet}: #{e}")
41
+ false
56
42
  end
57
-
43
+
58
44
  end
59
45
 
60
46
  end
@@ -1,58 +1,45 @@
1
1
  class Importer
2
2
 
3
+ # Uses the Roo gem to read in .xlsx files
3
4
  class XlsxReader < DataReader
4
5
 
5
6
  def initialize(importer)
6
7
  super(importer, :xlsx)
8
+ supports_file!
7
9
  end
8
10
 
9
- def load_file(path)
10
- spreadsheet = Roo::Excelx.new(path, :file_warning => :ignore)
11
- if spreadsheet
12
- # Get our list of sheet definitions, and run all the sheets in the spreadsheet
13
- remaining_sheets = @importer.sheets.values
14
- spreadsheet.sheets.each_with_index do |name, index|
15
- # Look for a sheet definition that matches this sheet's name/index
16
- sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
17
- if sheet
18
- # Remove from our list of remaining sheets
19
- remaining_sheets.delete(sheet)
20
- # Extract our raw data
21
- raw_rows = []
22
- spreadsheet.sheet(name).each_with_index do |row, line|
23
- raw_rows << row
24
- end
25
- # Let the sheet sort it out
26
- sheet.parse_raw_data(raw_rows)
27
- end
28
- end
29
- return true
11
+ def init_source(mode, source)
12
+ if mode == :file
13
+ @spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
14
+ true
30
15
  else
31
- @importer.add_error("Unable to read ExcelX file at path #{path}")
32
- return false
16
+ @importer.add_error("Unsupported XLSX mode: #{mode}")
17
+ false
33
18
  end
34
-
35
19
  rescue Exception => e
36
- @importer.add_error("Error reading file #{path}: #{e} @ #{e.backtrace.first}")
20
+ @importer.add_error("Error reading file #{source}: #{e}")
37
21
  false
38
22
  end
39
23
 
40
- private
41
-
42
- def load_raw_rows(sheet, raw_rows)
43
- # Figure out where our columns are and where our data starts
44
- column_map = sheet.find_header(raw_rows[0...5])
45
- start_row = sheet.data.start_row
46
-
47
- # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
48
- if !@importer.has_errors?
49
- raw_rows.each_with_index do |raw, index|
50
- line = index + 1
51
- if line >= start_row
52
- row = sheet.add_row(line, raw)
24
+ def load_raw_sheet(sheet)
25
+ @spreadsheet.sheets.each_with_index do |name, index|
26
+ # See if this sheet's name or index matches the requested sheet definition
27
+ if sheet.match_sheet?(name, index)
28
+ # Extract our raw data
29
+ raw_rows = []
30
+ @spreadsheet.sheet(name).each_with_index do |row, line|
31
+ raw_rows << row
53
32
  end
33
+ return raw_rows
54
34
  end
55
35
  end
36
+ @importer.add_error("Unable to find sheet #{sheet}")
37
+ return false
38
+
39
+ rescue Exception => e
40
+ # Not sure why we'd get here, but we strive for error-freedom here, yessir.
41
+ @importer.add_error("Error loading sheet #{sheet}: #{e}")
42
+ false
56
43
  end
57
44
 
58
45
  end
data/lib/iron/import.rb CHANGED
@@ -11,4 +11,5 @@ require_relative 'import/data_reader'
11
11
  require_relative 'import/csv_reader'
12
12
  require_relative 'import/xls_reader'
13
13
  require_relative 'import/xlsx_reader'
14
+ require_relative 'import/custom_reader'
14
15
  require_relative 'import/importer'
@@ -0,0 +1 @@
1
+ require 'iron/import'
@@ -0,0 +1,46 @@
1
+ describe Importer::CustomReader do
2
+
3
+ before do
4
+ @importer = Importer.new
5
+ end
6
+
7
+ it 'should set up correctly for on_file handling' do
8
+ @importer.custom_reader.should be_nil
9
+ @importer.build do
10
+ headerless!
11
+ on_file do |source, sheet|
12
+ []
13
+ end
14
+ end
15
+ @importer.custom_reader.should be_an(Importer::CustomReader)
16
+ @importer.custom_reader.should be_supports_file
17
+ @importer.custom_reader.should_not be_supports_stream
18
+ end
19
+
20
+ it 'should load the ICD10 test document' do
21
+ importer = Importer.build do
22
+ headerless!
23
+ column :code do
24
+ required!
25
+ end
26
+ column :desc do
27
+ required!
28
+ end
29
+
30
+ on_file do |source, sheet|
31
+ File.readlines(source).collect do |line|
32
+ line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
33
+ end
34
+ end
35
+ end
36
+ importer.import(SpecHelper.sample_path('icd10-custom.txt'))
37
+ importer.error_summary.should be_nil
38
+ importer.default_sheet.dump.should == [
39
+ {:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
40
+ {:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
41
+ {:code => 'A009', :desc => 'Cholera, unspecified'},
42
+ {:code => 'A0100', :desc => 'Typhoid fever, unspecified'}
43
+ ]
44
+ end
45
+
46
+ end
@@ -87,7 +87,7 @@ describe Importer::DataReader do
87
87
  end
88
88
 
89
89
  it 'should build an instance based on stream' do
90
- Importer::DataReader.for_stream(@importer, mock(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
90
+ Importer::DataReader.for_stream(@importer, double(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
91
91
  end
92
92
 
93
93
  end
@@ -0,0 +1,4 @@
1
+ A000 Cholera due to Vibrio cholerae 01, biovar cholerae
2
+ A001 Cholera due to Vibrio cholerae 01, biovar eltor
3
+ A009 Cholera, unspecified
4
+ A0100 Typhoid fever, unspecified
metadata CHANGED
@@ -1,20 +1,23 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-19 00:00:00.000000000 Z
11
+ date: 2015-08-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.2'
20
+ - - ">="
18
21
  - !ruby/object:Gem::Version
19
22
  version: 1.2.1
20
23
  type: :runtime
@@ -22,6 +25,9 @@ dependencies:
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
27
  - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.2'
30
+ - - ">="
25
31
  - !ruby/object:Gem::Version
26
32
  version: 1.2.1
27
33
  - !ruby/object:Gem::Dependency
@@ -80,9 +86,11 @@ files:
80
86
  - LICENSE
81
87
  - README.rdoc
82
88
  - Version.txt
89
+ - lib/iron-import.rb
83
90
  - lib/iron/import.rb
84
91
  - lib/iron/import/column.rb
85
92
  - lib/iron/import/csv_reader.rb
93
+ - lib/iron/import/custom_reader.rb
86
94
  - lib/iron/import/data_reader.rb
87
95
  - lib/iron/import/error.rb
88
96
  - lib/iron/import/importer.rb
@@ -92,11 +100,13 @@ files:
92
100
  - lib/iron/import/xlsx_reader.rb
93
101
  - spec/importer/column_spec.rb
94
102
  - spec/importer/csv_reader_spec.rb
103
+ - spec/importer/custom_reader_spec.rb
95
104
  - spec/importer/data_reader_spec.rb
96
105
  - spec/importer/importer_spec.rb
97
106
  - spec/importer/row_spec.rb
98
107
  - spec/importer/sheet_spec.rb
99
108
  - spec/importer/xlsx_reader_spec.rb
109
+ - spec/samples/icd10-custom.txt
100
110
  - spec/samples/nanodrop.xlsx
101
111
  - spec/samples/simple.csv
102
112
  - spec/samples/test-products.xls