iron-import 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f53e828e0c419e7ad91abd7d4bc4879154941828
4
- data.tar.gz: 239576c3acf292c759363da0e007acf82e4cbfc3
3
+ metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
4
+ data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
5
5
  SHA512:
6
- metadata.gz: 858285de3786fb5e37e313fb48fc52e05fe39f81ef4ed972060bf87f2c906210e62a9c6e6a7200afb348250403162fcd89da66b7526def7b52aa5b0247a87c04
7
- data.tar.gz: a9b29b5cdb47b3fc4885484b75b31e070667abf30955f77fda115d2878bbb2fa269f6402dd669aabbebec925edac4bc543a38ebf260f26dc5749e6669dce05f3
6
+ metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
7
+ data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.8.5 / 2018-02-14
2
+
3
+ * Add simple separator char detection for CSV files, currently supporting tabs & commas
4
+ * Fix return value of Importer#import when unable to find handler for file/stream
5
+ * Fix CSV importer to raise an error if now rows are found on import
6
+
1
7
  == 0.8.4 / 2018-01-24
2
8
 
3
9
  * Improve CSV reader to canonicalize newlines, converting \r + \r\n to \n before import, fixes Windows lameness
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.8.4
1
+ 0.8.5
@@ -29,17 +29,33 @@ class Importer
29
29
  text.gsub!(/\r\n/, "\n")
30
30
  text.gsub!(/\r/, "\n")
31
31
 
32
+ # Look at first line, count sep chars, pick the most common
33
+ sep_char = ','
34
+ line = text.split(/\n/, 2).first
35
+ if line.count("\t") > line.count(',')
36
+ sep_char = "\t"
37
+ end
38
+
32
39
  # Parse it out
33
40
  encoding = @importer.encoding || 'UTF-8'
34
41
  options = {
35
42
  :encoding => "#{encoding}:UTF-8",
36
- :skip_blanks => true
43
+ :skip_blanks => true,
44
+ :col_sep => sep_char
37
45
  }
38
46
  begin
39
47
  @raw_rows = CSV.parse(text, options)
40
48
  rescue Exception => e
41
49
  @importer.add_error('Error encountered while parsing CSV')
42
50
  @importer.add_exception(e)
51
+ return false
52
+ end
53
+
54
+ if @raw_rows.nil? || @raw_rows.count == 0
55
+ @importer.add_error('No rows found - unable to process CSV file')
56
+ return false
57
+ else
58
+ return true
43
59
  end
44
60
  end
45
61
 
@@ -62,10 +62,11 @@ class Importer
62
62
 
63
63
  # Figure out which format to use for a given path based on file name
64
64
  def self.for_path(importer, path)
65
- format = path.to_s.extract(/\.(csv|html?|xlsx?)\z/i)
65
+ format = path.to_s.extract(/\.(csv|tsv|html?|xlsx?)\z/i)
66
66
  if format
67
67
  format = format.downcase
68
68
  format = 'html' if format == 'htm'
69
+ format = 'csv' if format == 'tsv'
69
70
  format = format.to_sym
70
71
  for_format(importer, format)
71
72
  else
@@ -353,7 +353,7 @@ class Importer
353
353
  # Verify we got one
354
354
  unless @reader
355
355
  add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
356
- return
356
+ return block ? self : false
357
357
  end
358
358
 
359
359
  # What scopes (if any) should we limit our searching to?
@@ -46,6 +46,13 @@ describe Importer::CsvReader do
46
46
  ]
47
47
  end
48
48
 
49
+ it 'should auto-detect tab-separated data' do
50
+ @reader.load(SpecHelper.sample_path('sprouts.tsv')) do |rows|
51
+ rows.count.should == 43
52
+ rows.first.count.should == 5
53
+ end
54
+ end
55
+
49
56
  it 'should fail on WSM sample data' do
50
57
  importer = Importer.build do
51
58
  column :company_name do
@@ -115,6 +115,7 @@ describe Importer::DataReader do
115
115
 
116
116
  it 'should build an instance based on a path' do
117
117
  Importer::DataReader.for_path(@importer, '/tmp/foo.csv').should be_a(Importer::CsvReader)
118
+ Importer::DataReader.for_path(@importer, '/tmp/foo.TSV').should be_a(Importer::CsvReader)
118
119
  Importer::DataReader.for_path(@importer, 'BAR.XLS').should be_a(Importer::XlsReader)
119
120
  Importer::DataReader.for_path(@importer, '/tmp/nog_bog.xlsx').should be_a(Importer::XlsxReader)
120
121
  Importer::DataReader.for_path(@importer, '/tmp/nog_bog.htm').should be_a(Importer::HtmlReader)
@@ -0,0 +1,43 @@
1
+ UPC STORE_NUMBER DATE DESCRIPTION UNITS_SOLD
2
+ 00810453023927 8 20170701 OPAL W7 1.000
3
+ 00810453023927 208 20170701 OPAL W7 1.000
4
+ 00810453023149 216 20170701 NIGHT M13 1.000
5
+ 00810453022722 217 20170701 GLACIER W8 1.000
6
+ 00810453023934 221 20170701 OPAL W8 1.000
7
+ 00810453022722 222 20170701 GLACIER W8 1.000
8
+ 00810453023934 231 20170701 OPAL W8 1.000
9
+ 00810453023941 231 20170701 OPAL W9 2.000
10
+ 00810453022715 233 20170701 GLACIER W7 1.000
11
+ 00810453022715 236 20170701 GLACIER W7 1.000
12
+ 00810453022609 242 20170701 COVE W6 1.000
13
+ 00810453023101 244 20170701 NIGHT M9 1.000
14
+ 00810453022722 245 20170701 GLACIER W8 1.000
15
+ 00810453023934 245 20170701 OPAL W8 1.000
16
+ 00810453022722 246 20170701 GLACIER W8 2.000
17
+ 00810453023934 246 20170701 OPAL W8 3.000
18
+ 00810453023934 247 20170701 OPAL W8 2.000
19
+ 00810453023927 249 20170701 OPAL W7 1.000
20
+ 00810453023941 251 20170701 OPAL W9 1.000
21
+ 00810453022616 255 20170701 COVE W7 1.000
22
+ 00810453022708 255 20170701 GLACIER W6 1.000
23
+ 00810453023132 255 20170701 NIGHT M12 1.000
24
+ 00810453023941 257 20170701 OPAL W9 1.000
25
+ 00810453023958 257 20170701 OPAL W10 1.000
26
+ 00810453023118 260 20170701 NIGHT M10 1.000
27
+ 00810453023958 271 20170701 OPAL W10 1.000
28
+ 00810453022616 274 20170701 COVE W7 2.000
29
+ 00810453023927 288 20170701 OPAL W7 1.000
30
+ 00810453023958 303 20170701 OPAL W10 1.000
31
+ 00810453022715 306 20170701 GLACIER W7 1.000
32
+ 00810453022616 412 20170701 COVE W7 1.000
33
+ 00810453023910 415 20170701 OPAL W6 1.000
34
+ 00810453023934 415 20170701 OPAL W8 1.000
35
+ 00810453023125 505 20170701 NIGHT M11 1.000
36
+ 00810453022647 517 20170701 COVE W10 1.000
37
+ 00810453022623 520 20170701 COVE W8 1.000
38
+ 00810453023934 521 20170701 OPAL W8 1.000
39
+ 00810453023941 521 20170701 OPAL W9 1.000
40
+ 00810453023149 526 20170701 NIGHT M13 1.000
41
+ 00810453023927 578 20170701 OPAL W7 1.000
42
+ 00810453022722 579 20170701 GLACIER W8 1.000
43
+ 00810453023934 701 20170701 OPAL W8 1.000
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-24 00:00:00.000000000 Z
11
+ date: 2018-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
@@ -98,7 +98,6 @@ files:
98
98
  - ".rspec"
99
99
  - History.txt
100
100
  - LICENSE
101
- - README.rdoc
102
101
  - Version.txt
103
102
  - lib/iron-import.rb
104
103
  - lib/iron/import.rb
@@ -134,6 +133,7 @@ files:
134
133
  - spec/samples/scores.html
135
134
  - spec/samples/simple.csv
136
135
  - spec/samples/simple.html
136
+ - spec/samples/sprouts.tsv
137
137
  - spec/samples/test-products.xls
138
138
  - spec/samples/wsm-data.csv
139
139
  - spec/spec_helper.rb
data/README.rdoc DELETED
@@ -1,162 +0,0 @@
1
- = GEM: iron-import
2
-
3
- Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
4
-
5
- == DESCRIPTION
6
-
7
- Simple, versatile, reliable tabular data import.
8
-
9
- This gem provides a set of classes to support automating import of tabular data from
10
- CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
11
- pre-parsing data, validating data, filtering rows, and robust error tracking.
12
-
13
- IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
14
- for the task. Breaking changes will be noted by increases in the minor version,
15
- ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
16
-
17
- == WHO IS THIS FOR?
18
-
19
- The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
20
- However, using them with unreliable user submitted data requires a lot of error checking,
21
- monkeying with data coercion, etc. At Irongaze, we do a lot of work with growing
22
- businesses, where Excel files are the lingua franca for all kinds of uses. This gem
23
- attempts to extract years of experience building one-off importers into a simple library
24
- for rapid import coding.
25
-
26
- In addition, it's quite common for the same data to be transmitted in varying formats -
27
- Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
28
- tool-set for processing any of these types of data, often without changing a line of code.
29
-
30
- This is NOT a general-purpose tool for reading spreadsheets. If you want access to
31
- cell styling, reading underlying formulas, etc., you will be better served building
32
- a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
33
- validate and coerce values, then write each row to a database, all the while tracking
34
- any errors encountered... well, this is the library for you!
35
-
36
- == KEY FEATURES
37
-
38
- - Simple yet robust data import and error handling using elegant builder syntax
39
- - Import data from file, stream or string data sources
40
- - Import XLS, XLSX, CSV and HTML tabular data
41
- - Import custom tabular data via passed block
42
- - Automatic column order and start row detection
43
- - Support for optional columns and dynamic column sets
44
- - Basic data coercion supporting string, int, float, date, bool and cents types
45
- - Custom data coercion via passed block
46
- - Custom data validation via passed block
47
- - Row filtering using custom block
48
- - Automatically track and report errors with fine-grained context
49
- - Prefer capturing errors over raising exceptions for more robust imports
50
-
51
- == SAMPLE USAGE
52
-
53
- # Define our importer, with three columns. The importer will look for a row containing
54
- # "name"/"product", "description" and "price" (case insensitively) and automatically determine column
55
- # order and the starting row of the data.
56
- importer = Importer.build do
57
- column :name do
58
- # Provide a regex to find the header for this column
59
- header /(name|product)/i
60
- end
61
- column :description do
62
- # Columns can do custom parsing
63
- parse do |raw_val|
64
- raw_val.to_s.strip
65
- end
66
- # And custom validation
67
- validate do |parsed_val|
68
- add_error('Description too short') unless parsed_val.length > 5
69
- end
70
- end
71
- column :price do
72
- # Built in type conversion handles common cases - in this case
73
- # will correctly turn 2.5, "$2.50" or "2.5" into 250
74
- type :cents
75
- end
76
-
77
- # Need to skip rows? Use a filter! Return true to include a row when processing
78
- filter_rows do |row|
79
- row[:price] != 0 && row[:name] != 'Sample'
80
- end
81
- end
82
-
83
- # Import the provided file or stream row-by-row (if importing succeeds), automatically
84
- # using the proper library to read CSV data. This same code would work
85
- # with XLS or XLSX files with no changes to the code.
86
- importer.import('/tmp/source.csv') do |row|
87
- puts row[:name] + ' = ' + row[:description]
88
- end
89
-
90
- # Check for errors and do the right thing:
91
- importer.on_error do
92
- if missing_headers.any?
93
- # Can't find required column header(s)
94
- puts "Unable to locate columns: #{missing_headers}"
95
-
96
- elsif columns.any?(&:error_values?)
97
- # Invalid or unexpected values in one or more columns
98
- columns.select(&:error_values?).each do |col|
99
- puts "Invalid values for #{col}: #{col.error_values}"
100
- end
101
-
102
- else
103
- # General errors, dump summary report
104
- puts "Error(s) on import: " + error_summary
105
- end
106
- end
107
-
108
- # You can chain the build/import/on-error blocks for a cleaner flow:
109
- Importer.build do
110
- column :one
111
- column :two
112
- end.import(params[:uploaded_file]) do |row|
113
- SomeModel.create(row)
114
- end.on_error do
115
- raise "Errors found: " + error_summary
116
- end
117
-
118
- == IMPORT EXECUTION ORDER
119
-
120
- It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
121
-
122
- - Determine the *format* of stream/file to import
123
- - Determine *import scope* (sheet/table/whatever) using Importer#scope settings, if any
124
- - *Find column headers + start row*
125
- - Validate presence of *required columns*
126
- - *Validate column set* using Importer#validate_columns
127
- - Run each row:
128
- - *Parse* each column's value using Column#parse or Column#type
129
- - *Filter the row* using Importer#filter_rows on parsed values to reject unwanted rows
130
- - *Calculate virtual columns* using Column#calculate
131
- - *Validate each parsed value* using Column#validate
132
- - *Validate entire row* using Importer#validate_rows
133
-
134
- Generally, the import will stop when an error occurs, save on row processing, where each row will
135
- be run until an error for that row is found. The goal is to accumulate actionable info for
136
- presentation to the end user who is uploading the file.
137
-
138
- == REQUIREMENTS
139
-
140
- Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
141
-
142
- Optionally requires the roo gem to support XLS and XLSX import and parsing.
143
-
144
- Optionally requires the nokogiri gem to support HTML import and parsing.
145
-
146
- Requires RSpec, nokogiri and roo to build/test.
147
-
148
- == INSTALLATION
149
-
150
- To install, simply run:
151
-
152
- sudo gem install iron-import
153
-
154
- RVM users can skip the sudo:
155
-
156
- gem install iron-import
157
-
158
- Then use
159
-
160
- require 'iron-import'
161
-
162
- to require the library code.