RubyGems - iron-import - Versions diffs - 0.5.0 → 0.6.0 - Mend

iron-import 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/History.txt +6 -0
data/README.rdoc +2 -2
data/Version.txt +1 -1
data/lib/iron/import/column.rb +35 -14
data/lib/iron/import/csv_reader.rb +26 -11
data/lib/iron/import/custom_reader.rb +39 -0
data/lib/iron/import/data_reader.rb +98 -12
data/lib/iron/import/importer.rb +58 -21
data/lib/iron/import/sheet.rb +74 -9
data/lib/iron/import/xls_reader.rb +25 -39
data/lib/iron/import/xlsx_reader.rb +25 -38
data/lib/iron/import.rb +1 -0
data/lib/iron-import.rb +1 -0
data/spec/importer/custom_reader_spec.rb +46 -0
data/spec/importer/data_reader_spec.rb +1 -1
data/spec/samples/icd10-custom.txt +4 -0
metadata +12 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 70c4748d780e9854cbd60622563b74d3b7ce2b5c
-  data.tar.gz: d6503f0f7a08b4c88da5813b3114446baf1fff1a
+  metadata.gz: 04d666ea1e0170b0186d75fc8b0ec367a16e528a
+  data.tar.gz: 9dad576e17b7d8fc4b523ffe6b4e53c85feae9c8
 SHA512:
-  metadata.gz: 488a0e4b2d8ed83914bb2a6c907358ee584c0849f26bf9e64d6cc4bd8c2296997e4bc580f59b3bff4db6fa699a6abf94f5a85cd31c1585f03f728523025529a3
-  data.tar.gz: 00c6e27cf433423c9c1cc14828c11cd895459b0c12e86aa57ec65b35b049b0b7939dda98edd3359aa4dab8af945b0a17b9ef5b7fc486300edeb8b987b21d65dd
+  metadata.gz: e5a31e81381d78c29da480b296a8e9569ed415a32f50610881014de52a7b92c925687d3ee4ba683bf64a50562b26bf5785acf09070017710938d39c52f0087ad
+  data.tar.gz: d29901644886a98c617dd215b52edec0e8c011875ae2c0d6724c8f0f03c26bfdb9ea8028a3322c9cf1d16f91d9010950203c73ff0fc0485ce0bfc09ffc53e6f2

data/History.txt CHANGED Viewed

@@ -1,3 +1,9 @@
+== 0.6.0 / 2015-08-17
+* Refactored readers to allow for custom format reading
+* Vastly improved internal and user-facing comments
+* Improved error logging, replaced some exceptions with errors
 == 0.5.0 / 2015-02-XX
 * Initial revision

data/README.rdoc CHANGED Viewed

@@ -25,7 +25,7 @@ any warnings and errors encountered... well, this is the library for you!
 IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
 for the task.  Breaking changes will be noted by increases in the second-level version,
-ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not.
+ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
 == SAMPLE USAGE
@@ -65,6 +65,6 @@ RVM users can skip the sudo:
 Then use
-    require 'iron/import'
+    require 'iron-import'
 to require the library code.

data/Version.txt CHANGED Viewed

	@@ -1 +1 @@
1	- 0.5.0
1	+ 0.6.0

data/lib/iron/import/column.rb CHANGED Viewed

@@ -24,13 +24,14 @@ class Importer
   #       # Instead of a type, you can set an explicit parse block.  Be aware
   #       # that different source types may give you different raw values for what
   #       # seems like the "same" source value, for example an Excel source file
-  #       # will give you a float value for all numeric types, even "integers"
+  #       # will give you a float value for all numeric types, even "integers".
   #       parse do |raw_value|
   #         raw_value.to_i + 1000
   #       end
   #
   #       # You can also add a custom validator to check the value and add
-  #       # an error if it's not within a given range, or whatever:
+  #       # an error if it's not within a given range, or whatever.  To fail validation,
+  #       # simply raise the error you wish recorded.
   #       validate do |parsed_value|
   #         raise "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
   #       end
@@ -83,50 +84,60 @@ class Importer
       str = chars[index] + str
       str
     end
-    def initialize(sheet, key)
+    # Create a new column definition, with the owning sheet, the key for the column,
+    # and an optional set of options.  The options supported are the same as those supported
+    # in block/builder mode.
+    def initialize(sheet, key, options_hash = {})
       # Save off our info
       @key = key
       @sheet = sheet
       @importer = @sheet.importer
       # Return it as a string, by default
-      @type = :string
+      @type = options_hash.delete(:type) { :string }
       # By default, we allow empty values
-      @required = false
+      @required = options_hash.delete(:required) { false }
       # Position can be explicitly set
-      @position = nil
+      @position = options_hash.delete(:position)
       # By default, don't parse incoming data, just pass it through
-      @parse = nil
+      @parse = options_hash.delete(:parse)
       # Default matcher, looks for the presence of the column key as text anywhere
       # in the header string, ignoring case and using underscores as spaces, ie
       # :order_id => /\A\s*order id\s*\z/i
-      @header = Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
+      @header = options_hash.delete(:header) {
+        Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
+      }
       # Reset our state to pre-load status
       reset
     end
+    # Customize ourselves using block syntax
     def build(&block)
       DslProxy.exec(self, &block)
     end
+    # Deletes all stored data in prep for an import run
     def reset
       @data = Data.new
     end
-    # When true, matches either the passed value or the index (if position has been explicitly set)
+    # When true, our header definition or index match the passed text or column index.
     def match_header?(text, index)
-      res = index == self.fixed_index || (@header && !@header.match(text).nil?)
-      # puts "#{@header.inspect} ~ #{text.inspect} => #{res.inspect}"
-      res
+      return true if index == self.fixed_index
+      if @header.is_a?(Regexp)
+        return !@header.match(text).nil?
+      else
+        return @header.to_s.downcase == text
+      end
     end
-    # Use any custom parser defined to process the given value, capturing
+    # Applies any custom parser defined to process the given value, capturing
     # errors as needed
     def parse_value(row, val)
       return val if @parse.nil?
@@ -138,6 +149,7 @@ class Importer
       end
     end
+    # Applies any validation to a parsed value
     def validate_value(row, val)
       return unless @validate
       begin
@@ -149,6 +161,9 @@ class Importer
       end
     end
+    # Returns the fixed index of this column based on the set position.
+    # In other words, a position of 2 would return an index of 1 (as
+    # indicies are 0-based), where a position of 'C' would return 2.
     def fixed_index
       return nil unless @position
       if @position.is_a?(Fixnum)
@@ -158,14 +173,20 @@ class Importer
       end
     end
+    # Pretty name for ourselves
     def to_s
       'Column ' + @data.pos
     end
+    # Extracts the sheet's values for this column and returns them in an array.
+    # Note that the array indices ARE NOT row indices, as the rows may have been
+    # filtered and any header rows have been skipped.
     def to_a
       @sheet.data.rows.collect {|r| r[@key] }
     end
+    # Extracts the sheet's values for this column and returns them in a hash of
+    # row num => value for all non-filtered, non-header rows.
     def to_h
       res = {}
       @sheet.data.rows.collect {|r| res[r.num] = r[@key] }

data/lib/iron/import/csv_reader.rb CHANGED Viewed

@@ -6,19 +6,34 @@ class Importer
     def initialize(importer)
       super(importer, :csv)
-    end
-    def load_stream(stream)
-      text = stream.read
-      encoding = @importer.encoding || 'UTF-8'
-      raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
-      @importer.default_sheet.parse_raw_data(raw_rows)
+      supports_file!
+      supports_stream!
     end
-    def load_file(path)
-      encoding = @importer.encoding || 'UTF-8'
-      raw_rows = CSV.read(path, :encoding => "#{encoding}:UTF-8")
-      @importer.default_sheet.parse_raw_data(raw_rows)
+    def init_source(mode, source)
+      if mode == :stream
+        # For streams, we just read 'em in and parse 'em
+        text = source.read
+        encoding = @importer.encoding || 'UTF-8'
+        @raw_rows = CSV.parse(text, :encoding => "#{encoding}:UTF-8")
+        true
+      elsif mode == :file
+        # Files have a different path
+        encoding = @importer.encoding || 'UTF-8'
+        @raw_rows = CSV.read(source, :encoding => "#{encoding}:UTF-8")
+        true
+      else
+        @importer.add_error("Unsupported CSV mode: #{mode}")
+        false
+      end
+    end
+    # Normally, we'd check the key and return the proper data, but for CSV files,
+    # there's only one "sheet"
+    def load_raw_sheet(key)
+      @raw_rows
     end
   end

data/lib/iron/import/custom_reader.rb ADDED Viewed

@@ -0,0 +1,39 @@
+class Importer
+  # Special data reader that allows you to define a block to do the import yourself for cases
+  # where you have an odd text-based format or something else you want to be able to process
+  # using this gem.  Check out Importer#on_file and Importer#on_stream to see how to use
+  # this reader type.
+  class CustomReader < DataReader
+    attr_accessor :readers
+    def initialize(importer)
+      super(importer, :custom)
+      @readers = {}
+    end
+    # Called by the importer to add a handler for the given mode
+    def set_reader(mode, block)
+      @readers[mode] = block
+      @supports << mode
+    end
+    def init_source(mode, source)
+      @mode = mode
+      @source = source
+    end
+    def load_raw_sheet(sheet)
+      reader = @readers[@mode]
+      reader.call(@source, sheet)
+    rescue Exception => e
+      # Catch any exceptions thrown and note them with helpful stacktrace info for debugging custom readers
+      @importer.add_error("Error in custom reader when loading sheet #{sheet}: #{e} @ #{e.backtrace.first}")
+      false
+    end
+  end
+end

data/lib/iron/import/data_reader.rb CHANGED Viewed

@@ -14,6 +14,24 @@ class Importer
       end
     end
+    # Implement our automatic reader selection, based on the import source
+    def self.for_source(importer, source)
+      data = nil
+      if is_stream?(source)
+        data = DataReader::for_stream(importer, source)
+        unless data
+          importer.add_error("Unable to find format handler for stream")
+        end
+      else
+        data = DataReader::for_path(importer, source)
+        unless data
+          importer.add_error("Unable to find format handler for file #{source}")
+        end
+      end
+      data
+    end
+    # Factory method to build a reader from an explicit format selector
     def self.for_format(importer, format)
       case format
       when :csv
@@ -29,6 +47,7 @@ class Importer
       end
     end
+    # Figure out which format to use for a given path based on file name
     def self.for_path(importer, path)
       format = path.to_s.extract(/\.(csv|xlsx?)\z/i)
       if format
@@ -39,11 +58,19 @@ class Importer
       end
     end
+    # Figure out which format to use based on a stream's source file info
     def self.for_stream(importer, stream)
       path = path_from_stream(stream)
       for_path(importer, path)
     end
+    # Attempt to determine if the given source is a stream
+    def self.is_stream?(source)
+      # For now, just assume anything that has a #read method is a stream, in
+      # duck-type fashion
+      source.respond_to?(:read)
+    end
     # Try to find the original file name for the given stream,
     # as in the case where a file is uploaded to Rails and we're dealing with an
     # ActionDispatch::Http::UploadedFile.
@@ -60,16 +87,40 @@ class Importer
     def initialize(importer, format)
       @importer = importer
       @format = format
-      @multisheet = true
+      @supports = []
     end
+    def supports_stream!
+      @supports << :stream
+    end
+    def supports_file!
+      @supports << :file
+    end
+    def supports?(mode)
+      @supports.include?(mode)
+    end
+    def supports_file?
+      supports?(:file)
+    end
+    def supports_stream?
+      supports?(:stream)
+    end
+    # Core data reader method.  Takes a given input source (either a stream or
+    # a file path) and attempts to load it.  Returns true if successful, false
+    # if not.  If false, there will be one or more errors explaining what went
+    # wrong.
     def load(path_or_stream)
       # Figure out what we've been passed, and handle it
-      if path_or_stream.respond_to?(:read)
+      if self.class.is_stream?(path_or_stream)
         # We have a stream (open file, upload, whatever)
-        if respond_to?(:load_stream)
+        if supports_stream?
           # Stream loader defined, run it
-          load_stream(path_or_stream)
+          load_sheets(:stream, path_or_stream)
         else
           # Write to temp file, as some of our readers only read physical files, annoyingly
           file = Tempfile.new(['importer', ".#{format}"])
@@ -77,7 +128,7 @@ class Importer
           begin
             file.write path_or_stream.read
             file.close
-            load_file(file.path)
+            load_sheets(:file, file.path)
           ensure
             file.close
             file.unlink
@@ -86,23 +137,58 @@ class Importer
       elsif path_or_stream.is_a?(String)
         # Assume it's a path
-        if respond_to?(:load_file)
-          # We're all set, load up the given path
-          load_file(path_or_stream)
+        if File.exist?(path_or_stream)
+          if supports_file?
+            # We're all set, load up the given path
+            load_sheets(:file, path_or_stream)
+          else
+            # No file handler, so open the file and run the stream processor
+            file = File.open(path_or_stream, 'rb')
+            load_sheets(:stream, file)
+          end
         else
-          # No file handler, so open the file and run the stream processor
-          file = File.open(path_or_stream, 'rb')
-          load_stream(file)
+          @importer.add_error("Unable to locate source file #{path_or_stream}")
         end
       else
-        raise "Unable to load data: #{path_or_stream.inspect}"
+        @importer.add_error("Unable to load data source - not a file path or stream: #{path_or_stream.inspect}")
       end
       # Return our status
       !@importer.has_errors?
     end
+    # Load up the sheets in the correct mode
+    def load_sheets(mode, source)
+      # Let our derived classes open the file, etc. as they need
+      if init_source(mode, source)
+        # Once the source is set, run through each defined sheet, pass it to
+        # our sheet loader, and have the sheet parse it out.
+        @importer.sheets.values.each do |sheet|
+          res = load_raw_sheet(sheet)
+          if res === false
+            # D'oh.
+          else
+            # Tell the sheet to parse the data
+            sheet.parse_raw_data(res)
+          end
+        end
+      end
+    end
+    # Override this method in derived classes to set up
+    # the given source in the given mode
+    def init_source(mode, source)
+      raise "Unimplemented method #init_source in data reader #{self.class.name}"
+    end
+    # Override this method in derived classes to take the given sheet definition,
+    # find that sheet in the input source, and read out the raw (unparsed) rows
+    # as an array of arrays.  Return false if the sheet cannot be loaded.
+    def load_raw_sheet(sheet)
+      raise "Unimplemented method #load_raw_sheet in data reader #{self.class.name}"
+    end
     # Provides default value parsing/coersion for all derived data readers.  Attempts to be clever and
     # handle edge cases like converting '5.00' to 5 when in integer mode, etc.  If you find your inputs aren't
     # being parsed correctly, add a custom #parse block on your Column definition.

data/lib/iron/import/importer.rb CHANGED Viewed

@@ -33,8 +33,9 @@
 class Importer
   # Array of error message or nil for each non-header row
-  attr_accessor :errors, :warnings, :data
+  attr_accessor :errors, :warnings
   attr_accessor :sheets
+  attr_reader :data, :custom_reader
   # Source file/stream encoding, assumes UTF-8 if none specified
   dsl_accessor :encoding
@@ -51,16 +52,34 @@ class Importer
     reset
   end
+  # Takes a block, and sets self to be importer instance, so you can
+  # just call #column, #sheet, etc. directly.
   def build(&block)
     DslProxy.exec(self, &block) if block
     self
   end
-  def default_sheet
-    sheet(1)
+  # For the common case where there is only one "sheet", e.g. CSV files.
+  def default_sheet(&block)
+    sheet(1, true, &block)
   end
-  # Access a Sheet definition by id (either number (1-N) or sheet name)
+  # Access a Sheet definition by id (either number (1-N) or sheet name).
+  # Used during #build calls to define a sheet with a passed block, like so:
+  #
+  #   Importer.build do
+  #     sheet(1) do
+  #       column :store_name
+  #       column :store_address
+  #     end
+  #     sheet('Orders') do
+  #       column :id
+  #       column :price
+  #       filter do |row|
+  #         row[:price].prensent?
+  #       end
+  #     end
+  #   end
   def sheet(id, create=true, &block)
     # Find the sheet, creating it if needed (and requested!)
     if @sheets[id].nil?
@@ -78,18 +97,40 @@ class Importer
     # Return the sheet
     sheet
   end
+  # Define a custom file reader to implement your own sheet parsing.
+  def on_file(&block)
+    @custom_reader = CustomReader.new(self) unless @custom_reader
+    @custom_reader.set_reader(:file, block)
+  end
+  def on_stream(&block)
+    @custom_reader = CustomReader.new(self) unless @custom_reader
+    @custom_reader.set_reader(:stream, block)
+  end
   # Very, very commonly we only want to deal with the default sheet.  In this case,
   # let folks skip the sheet(n) do ... end block wrapper and just define columns
-  # against the main importer.  Internally, proxy those calls to the first sheet
+  # against the main importer.  Internally, proxy those calls to the first sheet.
   def column(*args, &block)
     default_sheet.column(*args, &block)
   end
+  # Ditto for filters
   def filter(*args, &block)
     default_sheet.filter(*args, &block)
   end
+  # Ditto for start row too
+  def start_row(row_num)
+    default_sheet.start_row(row_num)
+  end
+  # More facading
+  def headerless!
+    default_sheet.headerless!
+  end
   # First call to a freshly #build'd importer, this will read the file/stream/path supplied,
   # validate the required values, run custom validations... basically pre-parse and
   # massage the supplied data.  It will return true on success, or false if one
@@ -113,27 +154,23 @@ class Importer
     reset
     # Get the reader for this format
-    format = options.delete(:format)
-    if format && format != :auto
+    default = @custom_reader ? :custom : :auto
+    format = options.delete(:format) { default }
+    if format == :custom
+      # Custom format selected, use our internal custom reader
+      @data = @custom_reader
+    elsif format && format != :auto
+      # Explicit format requested
       @data = DataReader::for_format(self, format)
-      unless reader
+      unless @data
         add_error("Unable to find format handler for format #{format} - aborting")
         return
       end
     else
-      if path_or_stream.respond_to?(:read)
-        @data = DataReader::for_stream(self, path_or_stream)
-        unless @data
-          add_error("Unable to find format handler for stream - aborting")
-          return
-        end
-      else
-        @data = DataReader::for_path(self, path_or_stream)
-        unless @data
-          add_error("Unable to find format handler for file #{path_or_stream} - aborting")
-          return
-        end
-      end
+      # Auto select
+      @data = DataReader::for_source(self, path_or_stream)
     end
     # Read in the data!

data/lib/iron/import/sheet.rb CHANGED Viewed

@@ -1,7 +1,39 @@
 class Importer
   # The Sheet class handles building the sheet's column configuration and other
-  # setup, then holds all load-time row data.
+  # setup, then holds all load-time row data.  In some file types (Excel mostly)
+  # there may be more than one sheet definition in a given importer.  In others,
+  # the default sheet is the only one (possibly implicitly) defined.
+  #
+  # The following builder options are available:
+  #
+  #   Importer.build do
+  #     sheet('Some Sheet Name') do
+  #       # Don't try to look for a header using column definitions, there is no header
+  #       headerless!
+  #
+  #       # Manually set the start row for data in this sheet, defaults to nil
+  #       # indicating that the data rows start immediatly following the header.
+  #       start_row 4
+  #
+  #       # Define a filter that will skip unneeded rows.  The filter command takes
+  #       # a block that receives the parsed (but not validated!) row data as an
+  #       # associative hash of :col_key => <parsed value>, and returns
+  #       # true to keep the row or false to exclude it.
+  #       filter do |row|
+  #         row[:id].to_i > 5000
+  #       end
+  #
+  #       # Of course, the main thing to do in a sheet is define columns.  See the
+  #       # Column class' notes for options when defining a column.  Note that
+  #       # you can define columns using either hash-style:
+  #       column :id, :type => :integer
+  #       # or builder-style:
+  #       column :name do
+  #         header /company\s*name/
+  #         type :string
+  #       end
+  #    end
   class Sheet
     # Inner class for holding load-time data that gets reset on each load call
@@ -37,10 +69,16 @@ class Importer
       reset
     end
+    # Define our columns etc. via builder-style method calling
     def build(&block)
       DslProxy.exec(self, &block)
     end
+    # Call with a block accepting a single Importer::Row with contents that
+    # look like :column_key => <parsed value>.  Any filtered rows
+    # will not be present.  If you want to register an error, simply
+    # raise "some text" and it will be added to the importer's error
+    # list for display to the user, logging, or whatever.
     def process
       @data.rows.each do |row|
         begin
@@ -51,13 +89,33 @@ class Importer
       end
     end
-    def column(key, &block)
+    # Add a new column definition to our list, allows customizing the new
+    # column with a builder block.  See Importer::Column docs for
+    # options.  In lieu of a builder mode, you can pass the same values
+    # as key => value pairs in the options hash to this method, so:
+    #
+    #   column(:foo) do
+    #     type :string
+    #     parse do |val|
+    #       val.to_s.upcase
+    #     end
+    #   end
+    #
+    # Is equivalent to:
+    #
+    #   column(:foo, :type => :string, :parse => lambda {|val| val.to_s.upcase})
+    #
+    # Use whichever you prefer!
+    def column(key, options_hash = {}, &block)
+      # Find existing column with key to allow re-opening an existing definition
       col = @columns.detect {|c| c.key == key }
       unless col
-        col = Column.new(self, key)
+        # if none found, add a new one
+        col = Column.new(self, key, options_hash)
         @columns << col
       end
+      # Customize if needed
       DslProxy::exec(col, &block) if block
       col
@@ -73,9 +131,9 @@ class Importer
       if parse_header(raw_rows)
         # Now, run all the data and add it as a Row instance
         raw_rows.each_with_index do |raw, index|
-          line = index + 1
-          if line >= @data.start_row
-            add_row(line, raw)
+          row_num = index + 1
+          if row_num >= @data.start_row
+            add_row(row_num, raw)
           end
         end
       end
@@ -128,8 +186,8 @@ class Importer
         # Use implicit or explicit column position when told to not look for a header
         next_index = 0
         @columns.each do |col|
-          if col.index.present?
-            next_index = col.index
+          unless col.position.nil?
+            next_index = col.fixed_index
           end
           col.data.index = next_index
           next_index += 1
@@ -140,6 +198,9 @@ class Importer
       else
         # Match by testing
         raw_rows.each_with_index do |row, i|
+          # Um, have data?
+          next unless row
           # Set up for this iteration
           remaining = @columns.dup
@@ -165,11 +226,13 @@ class Importer
       end
     end
+    # When true, the given sheet name or zero-based index
+    # is a match with our id.
     def match_sheet?(name, index)
       if @id.is_a?(Fixnum)
         @id.to_i == index+1
       else
-        @id.to_s == name
+        @id.to_s.downcase == name.downcase
       end
     end
@@ -177,6 +240,8 @@ class Importer
       "Sheet #{@id}"
     end
+    # Return all parsed, filtered data in the sheet as an
+    # array of arrays.
     def dump
       @data.rows.collect(&:values)
     end

data/lib/iron/import/xls_reader.rb CHANGED Viewed

@@ -6,55 +6,41 @@ class Importer
       super(importer, :xlsx)
     end
-    def load_file(path)
-      spreadsheet = Roo::Excel.new(path, :file_warning => :ignore)
-      if spreadsheet
-        # Get our list of sheet definitions, and run all the sheets in the spreadsheet
-        remaining_sheets = @importer.sheets.values
-        spreadsheet.sheets.each_with_index do |name, index|
-          # Look for a sheet definition that matches this sheet's name/index
-          sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
-          if sheet
-            # Remove from our list of remaining sheets
-            remaining_sheets.delete(sheet)
-            # Extract our raw data
-            raw_rows = []
-            spreadsheet.sheet(name).each_with_index do |row, line|
-              raw_rows << row
-            end
-            # Let the sheet sort it out
-            sheet.parse_raw_data(raw_rows)
-          end
-        end
-        return true
+    def init_source(mode, source)
+      if mode == :file
+        @spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
+        true
       else
-        @importer.add_error("Unable to read Excel file at path #{path}")
-        return false
+        @importer.add_error("Unsupported XLS mode: #{mode}")
+        false
       end
     rescue Exception => e
-      @importer.add_error("Error reading file #{path}: #{e}")
+      @importer.add_error("Error reading file #{source}: #{e}")
       false
     end
-    private
-    def load_raw_rows(sheet, raw_rows)
-      # Figure out where our columns are and where our data starts
-      column_map = sheet.find_header(raw_rows[0...5])
-      start_row = sheet.data.start_row
-      # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
-      if !@importer.has_errors?
-        raw_rows.each_with_index do |raw, index|
-          line = index + 1
-          if line >= start_row
-            row = sheet.add_row(line, raw)
+    def load_raw_sheet(sheet)
+      @spreadsheet.sheets.each_with_index do |name, index|
+        # See if this sheet's name or index matches the requested sheet definition
+        if sheet.match_sheet?(name, index)
+          # Extract our raw data
+          raw_rows = []
+          @spreadsheet.sheet(name).each_with_index do |row, line|
+            raw_rows << row
           end
+          return raw_rows
         end
       end
+      # This is not good.
+      @importer.add_error("Unable to find sheet #{sheet}")
+      return false
+    rescue Exception => e
+      # Not sure why we'd get here, but we strive for error-freedom here, yessir.
+      @importer.add_error("Error loading sheet #{sheet}: #{e}")
+      false
     end
   end
 end

data/lib/iron/import/xlsx_reader.rb CHANGED Viewed

@@ -1,58 +1,45 @@
 class Importer
+  # Uses the Roo gem to read in .xlsx files
   class XlsxReader < DataReader
     def initialize(importer)
       super(importer, :xlsx)
+      supports_file!
     end
-    def load_file(path)
-      spreadsheet = Roo::Excelx.new(path, :file_warning => :ignore)
-      if spreadsheet
-        # Get our list of sheet definitions, and run all the sheets in the spreadsheet
-        remaining_sheets = @importer.sheets.values
-        spreadsheet.sheets.each_with_index do |name, index|
-          # Look for a sheet definition that matches this sheet's name/index
-          sheet = remaining_sheets.detect {|s| s.match_sheet?(name, index) }
-          if sheet
-            # Remove from our list of remaining sheets
-            remaining_sheets.delete(sheet)
-            # Extract our raw data
-            raw_rows = []
-            spreadsheet.sheet(name).each_with_index do |row, line|
-              raw_rows << row
-            end
-            # Let the sheet sort it out
-            sheet.parse_raw_data(raw_rows)
-          end
-        end
-        return true
+    def init_source(mode, source)
+      if mode == :file
+        @spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
+        true
       else
-        @importer.add_error("Unable to read ExcelX file at path #{path}")
-        return false
+        @importer.add_error("Unsupported XLSX mode: #{mode}")
+        false
       end
     rescue Exception => e
-      @importer.add_error("Error reading file #{path}: #{e} @ #{e.backtrace.first}")
+      @importer.add_error("Error reading file #{source}: #{e}")
       false
     end
-    private
-    def load_raw_rows(sheet, raw_rows)
-      # Figure out where our columns are and where our data starts
-      column_map = sheet.find_header(raw_rows[0...5])
-      start_row = sheet.data.start_row
-      # Run all the raw rows and convert them to Row instances, making notes of errors along the way...
-      if !@importer.has_errors?
-        raw_rows.each_with_index do |raw, index|
-          line = index + 1
-          if line >= start_row
-            row = sheet.add_row(line, raw)
+    def load_raw_sheet(sheet)
+      @spreadsheet.sheets.each_with_index do |name, index|
+        # See if this sheet's name or index matches the requested sheet definition
+        if sheet.match_sheet?(name, index)
+          # Extract our raw data
+          raw_rows = []
+          @spreadsheet.sheet(name).each_with_index do |row, line|
+            raw_rows << row
           end
+          return raw_rows
         end
       end
+      @importer.add_error("Unable to find sheet #{sheet}")
+      return false
+    rescue Exception => e
+      # Not sure why we'd get here, but we strive for error-freedom here, yessir.
+      @importer.add_error("Error loading sheet #{sheet}: #{e}")
+      false
     end
   end

data/lib/iron/import.rb CHANGED Viewed

@@ -11,4 +11,5 @@ require_relative 'import/data_reader'
 require_relative 'import/csv_reader'
 require_relative 'import/xls_reader'
 require_relative 'import/xlsx_reader'
+require_relative 'import/custom_reader'
 require_relative 'import/importer'

data/lib/iron-import.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'iron/import'

data/spec/importer/custom_reader_spec.rb ADDED Viewed

@@ -0,0 +1,46 @@
+describe Importer::CustomReader do
+  before do
+    @importer = Importer.new
+  end
+  it 'should set up correctly for on_file handling' do
+    @importer.custom_reader.should be_nil
+    @importer.build do
+      headerless!
+      on_file do |source, sheet|
+        []
+      end
+    end
+    @importer.custom_reader.should be_an(Importer::CustomReader)
+    @importer.custom_reader.should be_supports_file
+    @importer.custom_reader.should_not be_supports_stream
+  end
+  it 'should load the ICD10 test document' do
+    importer = Importer.build do
+      headerless!
+      column :code do
+        required!
+      end
+      column :desc do
+        required!
+      end
+      on_file do |source, sheet|
+        File.readlines(source).collect do |line|
+          line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
+        end
+      end
+    end
+    importer.import(SpecHelper.sample_path('icd10-custom.txt'))
+    importer.error_summary.should be_nil
+    importer.default_sheet.dump.should == [
+      {:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
+      {:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
+      {:code => 'A009', :desc => 'Cholera, unspecified'},
+      {:code => 'A0100', :desc => 'Typhoid fever, unspecified'}
+    ]
+  end
+end

data/spec/importer/data_reader_spec.rb CHANGED Viewed

@@ -87,7 +87,7 @@ describe Importer::DataReader do
   end
   it 'should build an instance based on stream' do
-    Importer::DataReader.for_stream(@importer, mock(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
+    Importer::DataReader.for_stream(@importer, double(original_filename: "nanodrop.xlsx", content_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")).should be_a(Importer::XlsxReader)
   end
 end

data/spec/samples/icd10-custom.txt ADDED Viewed

@@ -0,0 +1,4 @@
+A000    Cholera due to Vibrio cholerae 01, biovar cholerae
+A001    Cholera due to Vibrio cholerae 01, biovar eltor
+A009    Cholera, unspecified
+A0100   Typhoid fever, unspecified

metadata CHANGED Viewed

@@ -1,20 +1,23 @@
 --- !ruby/object:Gem::Specification
 name: iron-import
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
 platform: ruby
 authors:
 - Rob Morris
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-19 00:00:00.000000000 Z
+date: 2015-08-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: iron-extensions
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+    - - ">="
       - !ruby/object:Gem::Version
         version: 1.2.1
   type: :runtime
@@ -22,6 +25,9 @@ dependencies:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+    - - ">="
       - !ruby/object:Gem::Version
         version: 1.2.1
 - !ruby/object:Gem::Dependency
@@ -80,9 +86,11 @@ files:
 - LICENSE
 - README.rdoc
 - Version.txt
+- lib/iron-import.rb
 - lib/iron/import.rb
 - lib/iron/import/column.rb
 - lib/iron/import/csv_reader.rb
+- lib/iron/import/custom_reader.rb
 - lib/iron/import/data_reader.rb
 - lib/iron/import/error.rb
 - lib/iron/import/importer.rb
@@ -92,11 +100,13 @@ files:
 - lib/iron/import/xlsx_reader.rb
 - spec/importer/column_spec.rb
 - spec/importer/csv_reader_spec.rb
+- spec/importer/custom_reader_spec.rb
 - spec/importer/data_reader_spec.rb
 - spec/importer/importer_spec.rb
 - spec/importer/row_spec.rb
 - spec/importer/sheet_spec.rb
 - spec/importer/xlsx_reader_spec.rb
+- spec/samples/icd10-custom.txt
 - spec/samples/nanodrop.xlsx
 - spec/samples/simple.csv
 - spec/samples/test-products.xls