RubyGems - iron-import - Versions diffs - 0.8.4 → 0.8.5 - Mend

iron-import 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/History.txt +6 -0
data/Version.txt +1 -1
data/lib/iron/import/csv_reader.rb +17 -1
data/lib/iron/import/data_reader.rb +2 -1
data/lib/iron/import/importer.rb +1 -1
data/spec/importer/csv_reader_spec.rb +7 -0
data/spec/importer/data_reader_spec.rb +1 -0
data/spec/samples/sprouts.tsv +43 -0
metadata +3 -3
data/README.rdoc +0 -162

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f53e828e0c419e7ad91abd7d4bc4879154941828
-  data.tar.gz: 239576c3acf292c759363da0e007acf82e4cbfc3
+  metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
+  data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
 SHA512:
-  metadata.gz: 858285de3786fb5e37e313fb48fc52e05fe39f81ef4ed972060bf87f2c906210e62a9c6e6a7200afb348250403162fcd89da66b7526def7b52aa5b0247a87c04
-  data.tar.gz: a9b29b5cdb47b3fc4885484b75b31e070667abf30955f77fda115d2878bbb2fa269f6402dd669aabbebec925edac4bc543a38ebf260f26dc5749e6669dce05f3
+  metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
+  data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead

data/History.txt CHANGED Viewed

@@ -1,3 +1,9 @@
+== 0.8.5 / 2018-02-14
+* Add simple separator char detection for CSV files, currently supporting tabs & commas
+* Fix return value of Importer#import when unable to find handler for file/stream
+* Fix CSV importer to raise an error if now rows are found on import
 == 0.8.4 / 2018-01-24
 * Improve CSV reader to canonicalize newlines, converting \r + \r\n to \n before import, fixes Windows lameness

data/Version.txt CHANGED Viewed

	@@ -1 +1 @@
1	- 0.8.4
1	+ 0.8.5

data/lib/iron/import/csv_reader.rb CHANGED Viewed

@@ -29,17 +29,33 @@ class Importer
       text.gsub!(/\r\n/, "\n")
       text.gsub!(/\r/, "\n")
+      # Look at first line, count sep chars, pick the most common
+      sep_char = ','
+      line = text.split(/\n/, 2).first
+      if line.count("\t") > line.count(',')
+        sep_char = "\t"
+      end
       # Parse it out
       encoding = @importer.encoding || 'UTF-8'
       options = {
         :encoding => "#{encoding}:UTF-8",
-        :skip_blanks => true
+        :skip_blanks => true,
+        :col_sep => sep_char
       }
       begin
         @raw_rows = CSV.parse(text, options)
       rescue Exception => e
         @importer.add_error('Error encountered while parsing CSV')
         @importer.add_exception(e)
+        return false
+      end
+      if @raw_rows.nil? || @raw_rows.count == 0
+        @importer.add_error('No rows found - unable to process CSV file')
+        return false
+      else
+        return true
       end
     end

data/lib/iron/import/data_reader.rb CHANGED Viewed

@@ -62,10 +62,11 @@ class Importer
     # Figure out which format to use for a given path based on file name
     def self.for_path(importer, path)
-      format = path.to_s.extract(/\.(csv|html?|xlsx?)\z/i)
+      format = path.to_s.extract(/\.(csv|tsv|html?|xlsx?)\z/i)
       if format
         format = format.downcase
         format = 'html' if format == 'htm'
+        format = 'csv' if format == 'tsv'
         format = format.to_sym
         for_format(importer, format)
       else

data/lib/iron/import/importer.rb CHANGED Viewed

@@ -353,7 +353,7 @@ class Importer
     # Verify we got one
     unless @reader
       add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
-      return
+      return block ? self : false
     end
     # What scopes (if any) should we limit our searching to?

data/spec/importer/csv_reader_spec.rb CHANGED Viewed

@@ -46,6 +46,13 @@ describe Importer::CsvReader do
     ]
   end
+  it 'should auto-detect tab-separated data' do
+    @reader.load(SpecHelper.sample_path('sprouts.tsv')) do |rows|
+      rows.count.should == 43
+      rows.first.count.should == 5
+    end
+  end
   it 'should fail on WSM sample data' do
     importer = Importer.build do
       column :company_name do

data/spec/importer/data_reader_spec.rb CHANGED Viewed

@@ -115,6 +115,7 @@ describe Importer::DataReader do
   it 'should build an instance based on a path' do
     Importer::DataReader.for_path(@importer, '/tmp/foo.csv').should be_a(Importer::CsvReader)
+    Importer::DataReader.for_path(@importer, '/tmp/foo.TSV').should be_a(Importer::CsvReader)
     Importer::DataReader.for_path(@importer, 'BAR.XLS').should be_a(Importer::XlsReader)
     Importer::DataReader.for_path(@importer, '/tmp/nog_bog.xlsx').should be_a(Importer::XlsxReader)
     Importer::DataReader.for_path(@importer, '/tmp/nog_bog.htm').should be_a(Importer::HtmlReader)

data/spec/samples/sprouts.tsv ADDED Viewed

@@ -0,0 +1,43 @@
+UPC	STORE_NUMBER	DATE	DESCRIPTION	UNITS_SOLD
+00810453023927      	8	20170701	OPAL W7                             	1.000
+00810453023927      	208	20170701	OPAL W7                             	1.000
+00810453023149      	216	20170701	NIGHT M13                           	1.000
+00810453022722      	217	20170701	GLACIER W8                          	1.000
+00810453023934      	221	20170701	OPAL W8                             	1.000
+00810453022722      	222	20170701	GLACIER W8                          	1.000
+00810453023934      	231	20170701	OPAL W8                             	1.000
+00810453023941      	231	20170701	OPAL W9                             	2.000
+00810453022715      	233	20170701	GLACIER W7                          	1.000
+00810453022715      	236	20170701	GLACIER W7                          	1.000
+00810453022609      	242	20170701	COVE W6                             	1.000
+00810453023101      	244	20170701	NIGHT M9                            	1.000
+00810453022722      	245	20170701	GLACIER W8                          	1.000
+00810453023934      	245	20170701	OPAL W8                             	1.000
+00810453022722      	246	20170701	GLACIER W8                          	2.000
+00810453023934      	246	20170701	OPAL W8                             	3.000
+00810453023934      	247	20170701	OPAL W8                             	2.000
+00810453023927      	249	20170701	OPAL W7                             	1.000
+00810453023941      	251	20170701	OPAL W9                             	1.000
+00810453022616      	255	20170701	COVE W7                             	1.000
+00810453022708      	255	20170701	GLACIER W6                          	1.000
+00810453023132      	255	20170701	NIGHT M12                           	1.000
+00810453023941      	257	20170701	OPAL W9                             	1.000
+00810453023958      	257	20170701	OPAL W10                            	1.000
+00810453023118      	260	20170701	NIGHT M10                           	1.000
+00810453023958      	271	20170701	OPAL W10                            	1.000
+00810453022616      	274	20170701	COVE W7                             	2.000
+00810453023927      	288	20170701	OPAL W7                             	1.000
+00810453023958      	303	20170701	OPAL W10                            	1.000
+00810453022715      	306	20170701	GLACIER W7                          	1.000
+00810453022616      	412	20170701	COVE W7                             	1.000
+00810453023910      	415	20170701	OPAL W6                             	1.000
+00810453023934      	415	20170701	OPAL W8                             	1.000
+00810453023125      	505	20170701	NIGHT M11                           	1.000
+00810453022647      	517	20170701	COVE W10                            	1.000
+00810453022623      	520	20170701	COVE W8                             	1.000
+00810453023934      	521	20170701	OPAL W8                             	1.000
+00810453023941      	521	20170701	OPAL W9                             	1.000
+00810453023149      	526	20170701	NIGHT M13                           	1.000
+00810453023927      	578	20170701	OPAL W7                             	1.000
+00810453022722      	579	20170701	GLACIER W8                          	1.000
+00810453023934      	701	20170701	OPAL W8                             	1.000

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: iron-import
 version: !ruby/object:Gem::Version
-  version: 0.8.4
+  version: 0.8.5
 platform: ruby
 authors:
 - Rob Morris
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-24 00:00:00.000000000 Z
+date: 2018-02-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: iron-extensions
@@ -98,7 +98,6 @@ files:
 - ".rspec"
 - History.txt
 - LICENSE
-- README.rdoc
 - Version.txt
 - lib/iron-import.rb
 - lib/iron/import.rb
@@ -134,6 +133,7 @@ files:
 - spec/samples/scores.html
 - spec/samples/simple.csv
 - spec/samples/simple.html
+- spec/samples/sprouts.tsv
 - spec/samples/test-products.xls
 - spec/samples/wsm-data.csv
 - spec/spec_helper.rb

data/README.rdoc DELETED Viewed

@@ -1,162 +0,0 @@
-= GEM: iron-import
-Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
-== DESCRIPTION
-Simple, versatile, reliable tabular data import.
-This gem provides a set of classes to support automating import of tabular data from
-CSV, HTML, XLS and XLSX files.  Key features include defining columns, auto-detecting column order,
-pre-parsing data, validating data, filtering rows, and robust error tracking.
-IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
-for the task.  Breaking changes will be noted by increases in the minor version,
-ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
-== WHO IS THIS FOR?
-The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
-However, using them with unreliable user submitted data requires a lot of error checking,
-monkeying with data coercion, etc.  At Irongaze, we do a lot of work with growing
-businesses, where Excel files are the lingua franca for all kinds of uses.  This gem
-attempts to extract years of experience building one-off importers into a simple library
-for rapid import coding.
-In addition, it's quite common for the same data to be transmitted in varying formats -
-Excel files, HTML files, CSV files, custom text streams...  Use iron-import to have a single
-tool-set for processing any of these types of data, often without changing a line of code.
-This is NOT a general-purpose tool for reading spreadsheets.  If you want access to
-cell styling, reading underlying formulas, etc., you will be better served building
-a custom importer based on Roo.  But if you're looking to take a customer-uploaded CSV file,
-validate and coerce values, then write each row to a database, all the while tracking
-any errors encountered... well, this is the library for you!
-== KEY FEATURES
-- Simple yet robust data import and error handling using elegant builder syntax
-- Import data from file, stream or string data sources
-- Import XLS, XLSX, CSV and HTML tabular data
-- Import custom tabular data via passed block
-- Automatic column order and start row detection
-- Support for optional columns and dynamic column sets
-- Basic data coercion supporting string, int, float, date, bool and cents types
-- Custom data coercion via passed block
-- Custom data validation via passed block
-- Row filtering using custom block
-- Automatically track and report errors with fine-grained context
-- Prefer capturing errors over raising exceptions for more robust imports
-== SAMPLE USAGE
-    # Define our importer, with three columns.  The importer will look for a row containing
-    # "name"/"product", "description" and "price" (case insensitively) and automatically determine column
-    # order and the starting row of the data.
-    importer = Importer.build do
-      column :name do
-        # Provide a regex to find the header for this column
-        header /(name|product)/i
-      end
-      column :description do
-        # Columns can do custom parsing
-        parse do |raw_val|
-          raw_val.to_s.strip
-        end
-        # And custom validation
-        validate do |parsed_val|
-          add_error('Description too short') unless parsed_val.length > 5
-        end
-      end
-      column :price do
-        # Built in type conversion handles common cases - in this case
-        # will correctly turn 2.5, "$2.50" or "2.5" into 250
-        type :cents
-      end
-      # Need to skip rows?  Use a filter!  Return true to include a row when processing
-      filter_rows do |row|
-        row[:price] != 0 && row[:name] != 'Sample'
-      end
-    end
-    # Import the provided file or stream row-by-row (if importing succeeds), automatically
-    # using the proper library to read CSV data.  This same code would work
-    # with XLS or XLSX files with no changes to the code.
-    importer.import('/tmp/source.csv') do |row|
-      puts row[:name] + ' = ' + row[:description]
-    end
-    # Check for errors and do the right thing:
-    importer.on_error do
-      if missing_headers.any?
-        # Can't find required column header(s)
-        puts "Unable to locate columns: #{missing_headers}"
-      elsif columns.any?(&:error_values?)
-        # Invalid or unexpected values in one or more columns
-        columns.select(&:error_values?).each do |col|
-          puts "Invalid values for #{col}: #{col.error_values}"
-        end
-      else
-        # General errors, dump summary report
-        puts "Error(s) on import: " + error_summary
-      end
-    end
-    # You can chain the build/import/on-error blocks for a cleaner flow:
-    Importer.build do
-      column :one
-      column :two
-    end.import(params[:uploaded_file]) do |row|
-      SomeModel.create(row)
-    end.on_error do
-      raise "Errors found: " + error_summary
-    end
-== IMPORT EXECUTION ORDER
-It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
-- Determine the *format* of stream/file to import
-- Determine *import scope* (sheet/table/whatever) using Importer#scope settings, if any
-- *Find column headers + start row*
-- Validate presence of *required columns*
-- *Validate column set* using Importer#validate_columns
-- Run each row:
-  - *Parse* each column's value using Column#parse or Column#type
-  - *Filter the row* using Importer#filter_rows on parsed values to reject unwanted rows
-  - *Calculate virtual columns* using Column#calculate
-  - *Validate each parsed value* using Column#validate
-  - *Validate entire row* using Importer#validate_rows
-Generally, the import will stop when an error occurs, save on row processing, where each row will
-be run until an error for that row is found.  The goal is to accumulate actionable info for
-presentation to the end user who is uploading the file.
-== REQUIREMENTS
-Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
-Optionally requires the roo gem to support XLS and XLSX import and parsing.
-Optionally requires the nokogiri gem to support HTML import and parsing.
-Requires RSpec, nokogiri and roo to build/test.
-== INSTALLATION
-To install, simply run:
-    sudo gem install iron-import
-RVM users can skip the sudo:
-    gem install iron-import
-Then use
-    require 'iron-import'
-to require the library code.