iron-import 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +6 -0
- data/Version.txt +1 -1
- data/lib/iron/import/csv_reader.rb +17 -1
- data/lib/iron/import/data_reader.rb +2 -1
- data/lib/iron/import/importer.rb +1 -1
- data/spec/importer/csv_reader_spec.rb +7 -0
- data/spec/importer/data_reader_spec.rb +1 -0
- data/spec/samples/sprouts.tsv +43 -0
- metadata +3 -3
- data/README.rdoc +0 -162
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
|
4
|
+
data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
|
7
|
+
data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
|
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
== 0.8.5 / 2018-02-14
|
2
|
+
|
3
|
+
* Add simple separator char detection for CSV files, currently supporting tabs & commas
|
4
|
+
* Fix return value of Importer#import when unable to find handler for file/stream
|
5
|
+
* Fix CSV importer to raise an error if now rows are found on import
|
6
|
+
|
1
7
|
== 0.8.4 / 2018-01-24
|
2
8
|
|
3
9
|
* Improve CSV reader to canonicalize newlines, converting \r + \r\n to \n before import, fixes Windows lameness
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.5
|
@@ -29,17 +29,33 @@ class Importer
|
|
29
29
|
text.gsub!(/\r\n/, "\n")
|
30
30
|
text.gsub!(/\r/, "\n")
|
31
31
|
|
32
|
+
# Look at first line, count sep chars, pick the most common
|
33
|
+
sep_char = ','
|
34
|
+
line = text.split(/\n/, 2).first
|
35
|
+
if line.count("\t") > line.count(',')
|
36
|
+
sep_char = "\t"
|
37
|
+
end
|
38
|
+
|
32
39
|
# Parse it out
|
33
40
|
encoding = @importer.encoding || 'UTF-8'
|
34
41
|
options = {
|
35
42
|
:encoding => "#{encoding}:UTF-8",
|
36
|
-
:skip_blanks => true
|
43
|
+
:skip_blanks => true,
|
44
|
+
:col_sep => sep_char
|
37
45
|
}
|
38
46
|
begin
|
39
47
|
@raw_rows = CSV.parse(text, options)
|
40
48
|
rescue Exception => e
|
41
49
|
@importer.add_error('Error encountered while parsing CSV')
|
42
50
|
@importer.add_exception(e)
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
|
54
|
+
if @raw_rows.nil? || @raw_rows.count == 0
|
55
|
+
@importer.add_error('No rows found - unable to process CSV file')
|
56
|
+
return false
|
57
|
+
else
|
58
|
+
return true
|
43
59
|
end
|
44
60
|
end
|
45
61
|
|
@@ -62,10 +62,11 @@ class Importer
|
|
62
62
|
|
63
63
|
# Figure out which format to use for a given path based on file name
|
64
64
|
def self.for_path(importer, path)
|
65
|
-
format = path.to_s.extract(/\.(csv|html?|xlsx?)\z/i)
|
65
|
+
format = path.to_s.extract(/\.(csv|tsv|html?|xlsx?)\z/i)
|
66
66
|
if format
|
67
67
|
format = format.downcase
|
68
68
|
format = 'html' if format == 'htm'
|
69
|
+
format = 'csv' if format == 'tsv'
|
69
70
|
format = format.to_sym
|
70
71
|
for_format(importer, format)
|
71
72
|
else
|
data/lib/iron/import/importer.rb
CHANGED
@@ -353,7 +353,7 @@ class Importer
|
|
353
353
|
# Verify we got one
|
354
354
|
unless @reader
|
355
355
|
add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
|
356
|
-
return
|
356
|
+
return block ? self : false
|
357
357
|
end
|
358
358
|
|
359
359
|
# What scopes (if any) should we limit our searching to?
|
@@ -46,6 +46,13 @@ describe Importer::CsvReader do
|
|
46
46
|
]
|
47
47
|
end
|
48
48
|
|
49
|
+
it 'should auto-detect tab-separated data' do
|
50
|
+
@reader.load(SpecHelper.sample_path('sprouts.tsv')) do |rows|
|
51
|
+
rows.count.should == 43
|
52
|
+
rows.first.count.should == 5
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
49
56
|
it 'should fail on WSM sample data' do
|
50
57
|
importer = Importer.build do
|
51
58
|
column :company_name do
|
@@ -115,6 +115,7 @@ describe Importer::DataReader do
|
|
115
115
|
|
116
116
|
it 'should build an instance based on a path' do
|
117
117
|
Importer::DataReader.for_path(@importer, '/tmp/foo.csv').should be_a(Importer::CsvReader)
|
118
|
+
Importer::DataReader.for_path(@importer, '/tmp/foo.TSV').should be_a(Importer::CsvReader)
|
118
119
|
Importer::DataReader.for_path(@importer, 'BAR.XLS').should be_a(Importer::XlsReader)
|
119
120
|
Importer::DataReader.for_path(@importer, '/tmp/nog_bog.xlsx').should be_a(Importer::XlsxReader)
|
120
121
|
Importer::DataReader.for_path(@importer, '/tmp/nog_bog.htm').should be_a(Importer::HtmlReader)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
UPC STORE_NUMBER DATE DESCRIPTION UNITS_SOLD
|
2
|
+
00810453023927 8 20170701 OPAL W7 1.000
|
3
|
+
00810453023927 208 20170701 OPAL W7 1.000
|
4
|
+
00810453023149 216 20170701 NIGHT M13 1.000
|
5
|
+
00810453022722 217 20170701 GLACIER W8 1.000
|
6
|
+
00810453023934 221 20170701 OPAL W8 1.000
|
7
|
+
00810453022722 222 20170701 GLACIER W8 1.000
|
8
|
+
00810453023934 231 20170701 OPAL W8 1.000
|
9
|
+
00810453023941 231 20170701 OPAL W9 2.000
|
10
|
+
00810453022715 233 20170701 GLACIER W7 1.000
|
11
|
+
00810453022715 236 20170701 GLACIER W7 1.000
|
12
|
+
00810453022609 242 20170701 COVE W6 1.000
|
13
|
+
00810453023101 244 20170701 NIGHT M9 1.000
|
14
|
+
00810453022722 245 20170701 GLACIER W8 1.000
|
15
|
+
00810453023934 245 20170701 OPAL W8 1.000
|
16
|
+
00810453022722 246 20170701 GLACIER W8 2.000
|
17
|
+
00810453023934 246 20170701 OPAL W8 3.000
|
18
|
+
00810453023934 247 20170701 OPAL W8 2.000
|
19
|
+
00810453023927 249 20170701 OPAL W7 1.000
|
20
|
+
00810453023941 251 20170701 OPAL W9 1.000
|
21
|
+
00810453022616 255 20170701 COVE W7 1.000
|
22
|
+
00810453022708 255 20170701 GLACIER W6 1.000
|
23
|
+
00810453023132 255 20170701 NIGHT M12 1.000
|
24
|
+
00810453023941 257 20170701 OPAL W9 1.000
|
25
|
+
00810453023958 257 20170701 OPAL W10 1.000
|
26
|
+
00810453023118 260 20170701 NIGHT M10 1.000
|
27
|
+
00810453023958 271 20170701 OPAL W10 1.000
|
28
|
+
00810453022616 274 20170701 COVE W7 2.000
|
29
|
+
00810453023927 288 20170701 OPAL W7 1.000
|
30
|
+
00810453023958 303 20170701 OPAL W10 1.000
|
31
|
+
00810453022715 306 20170701 GLACIER W7 1.000
|
32
|
+
00810453022616 412 20170701 COVE W7 1.000
|
33
|
+
00810453023910 415 20170701 OPAL W6 1.000
|
34
|
+
00810453023934 415 20170701 OPAL W8 1.000
|
35
|
+
00810453023125 505 20170701 NIGHT M11 1.000
|
36
|
+
00810453022647 517 20170701 COVE W10 1.000
|
37
|
+
00810453022623 520 20170701 COVE W8 1.000
|
38
|
+
00810453023934 521 20170701 OPAL W8 1.000
|
39
|
+
00810453023941 521 20170701 OPAL W9 1.000
|
40
|
+
00810453023149 526 20170701 NIGHT M13 1.000
|
41
|
+
00810453023927 578 20170701 OPAL W7 1.000
|
42
|
+
00810453022722 579 20170701 GLACIER W8 1.000
|
43
|
+
00810453023934 701 20170701 OPAL W8 1.000
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
@@ -98,7 +98,6 @@ files:
|
|
98
98
|
- ".rspec"
|
99
99
|
- History.txt
|
100
100
|
- LICENSE
|
101
|
-
- README.rdoc
|
102
101
|
- Version.txt
|
103
102
|
- lib/iron-import.rb
|
104
103
|
- lib/iron/import.rb
|
@@ -134,6 +133,7 @@ files:
|
|
134
133
|
- spec/samples/scores.html
|
135
134
|
- spec/samples/simple.csv
|
136
135
|
- spec/samples/simple.html
|
136
|
+
- spec/samples/sprouts.tsv
|
137
137
|
- spec/samples/test-products.xls
|
138
138
|
- spec/samples/wsm-data.csv
|
139
139
|
- spec/spec_helper.rb
|
data/README.rdoc
DELETED
@@ -1,162 +0,0 @@
|
|
1
|
-
= GEM: iron-import
|
2
|
-
|
3
|
-
Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
|
4
|
-
|
5
|
-
== DESCRIPTION
|
6
|
-
|
7
|
-
Simple, versatile, reliable tabular data import.
|
8
|
-
|
9
|
-
This gem provides a set of classes to support automating import of tabular data from
|
10
|
-
CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
|
11
|
-
pre-parsing data, validating data, filtering rows, and robust error tracking.
|
12
|
-
|
13
|
-
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
14
|
-
for the task. Breaking changes will be noted by increases in the minor version,
|
15
|
-
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
16
|
-
|
17
|
-
== WHO IS THIS FOR?
|
18
|
-
|
19
|
-
The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
|
20
|
-
However, using them with unreliable user submitted data requires a lot of error checking,
|
21
|
-
monkeying with data coercion, etc. At Irongaze, we do a lot of work with growing
|
22
|
-
businesses, where Excel files are the lingua franca for all kinds of uses. This gem
|
23
|
-
attempts to extract years of experience building one-off importers into a simple library
|
24
|
-
for rapid import coding.
|
25
|
-
|
26
|
-
In addition, it's quite common for the same data to be transmitted in varying formats -
|
27
|
-
Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
|
28
|
-
tool-set for processing any of these types of data, often without changing a line of code.
|
29
|
-
|
30
|
-
This is NOT a general-purpose tool for reading spreadsheets. If you want access to
|
31
|
-
cell styling, reading underlying formulas, etc., you will be better served building
|
32
|
-
a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
|
33
|
-
validate and coerce values, then write each row to a database, all the while tracking
|
34
|
-
any errors encountered... well, this is the library for you!
|
35
|
-
|
36
|
-
== KEY FEATURES
|
37
|
-
|
38
|
-
- Simple yet robust data import and error handling using elegant builder syntax
|
39
|
-
- Import data from file, stream or string data sources
|
40
|
-
- Import XLS, XLSX, CSV and HTML tabular data
|
41
|
-
- Import custom tabular data via passed block
|
42
|
-
- Automatic column order and start row detection
|
43
|
-
- Support for optional columns and dynamic column sets
|
44
|
-
- Basic data coercion supporting string, int, float, date, bool and cents types
|
45
|
-
- Custom data coercion via passed block
|
46
|
-
- Custom data validation via passed block
|
47
|
-
- Row filtering using custom block
|
48
|
-
- Automatically track and report errors with fine-grained context
|
49
|
-
- Prefer capturing errors over raising exceptions for more robust imports
|
50
|
-
|
51
|
-
== SAMPLE USAGE
|
52
|
-
|
53
|
-
# Define our importer, with three columns. The importer will look for a row containing
|
54
|
-
# "name"/"product", "description" and "price" (case insensitively) and automatically determine column
|
55
|
-
# order and the starting row of the data.
|
56
|
-
importer = Importer.build do
|
57
|
-
column :name do
|
58
|
-
# Provide a regex to find the header for this column
|
59
|
-
header /(name|product)/i
|
60
|
-
end
|
61
|
-
column :description do
|
62
|
-
# Columns can do custom parsing
|
63
|
-
parse do |raw_val|
|
64
|
-
raw_val.to_s.strip
|
65
|
-
end
|
66
|
-
# And custom validation
|
67
|
-
validate do |parsed_val|
|
68
|
-
add_error('Description too short') unless parsed_val.length > 5
|
69
|
-
end
|
70
|
-
end
|
71
|
-
column :price do
|
72
|
-
# Built in type conversion handles common cases - in this case
|
73
|
-
# will correctly turn 2.5, "$2.50" or "2.5" into 250
|
74
|
-
type :cents
|
75
|
-
end
|
76
|
-
|
77
|
-
# Need to skip rows? Use a filter! Return true to include a row when processing
|
78
|
-
filter_rows do |row|
|
79
|
-
row[:price] != 0 && row[:name] != 'Sample'
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# Import the provided file or stream row-by-row (if importing succeeds), automatically
|
84
|
-
# using the proper library to read CSV data. This same code would work
|
85
|
-
# with XLS or XLSX files with no changes to the code.
|
86
|
-
importer.import('/tmp/source.csv') do |row|
|
87
|
-
puts row[:name] + ' = ' + row[:description]
|
88
|
-
end
|
89
|
-
|
90
|
-
# Check for errors and do the right thing:
|
91
|
-
importer.on_error do
|
92
|
-
if missing_headers.any?
|
93
|
-
# Can't find required column header(s)
|
94
|
-
puts "Unable to locate columns: #{missing_headers}"
|
95
|
-
|
96
|
-
elsif columns.any?(&:error_values?)
|
97
|
-
# Invalid or unexpected values in one or more columns
|
98
|
-
columns.select(&:error_values?).each do |col|
|
99
|
-
puts "Invalid values for #{col}: #{col.error_values}"
|
100
|
-
end
|
101
|
-
|
102
|
-
else
|
103
|
-
# General errors, dump summary report
|
104
|
-
puts "Error(s) on import: " + error_summary
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# You can chain the build/import/on-error blocks for a cleaner flow:
|
109
|
-
Importer.build do
|
110
|
-
column :one
|
111
|
-
column :two
|
112
|
-
end.import(params[:uploaded_file]) do |row|
|
113
|
-
SomeModel.create(row)
|
114
|
-
end.on_error do
|
115
|
-
raise "Errors found: " + error_summary
|
116
|
-
end
|
117
|
-
|
118
|
-
== IMPORT EXECUTION ORDER
|
119
|
-
|
120
|
-
It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
|
121
|
-
|
122
|
-
- Determine the *format* of stream/file to import
|
123
|
-
- Determine *import scope* (sheet/table/whatever) using Importer#scope settings, if any
|
124
|
-
- *Find column headers + start row*
|
125
|
-
- Validate presence of *required columns*
|
126
|
-
- *Validate column set* using Importer#validate_columns
|
127
|
-
- Run each row:
|
128
|
-
- *Parse* each column's value using Column#parse or Column#type
|
129
|
-
- *Filter the row* using Importer#filter_rows on parsed values to reject unwanted rows
|
130
|
-
- *Calculate virtual columns* using Column#calculate
|
131
|
-
- *Validate each parsed value* using Column#validate
|
132
|
-
- *Validate entire row* using Importer#validate_rows
|
133
|
-
|
134
|
-
Generally, the import will stop when an error occurs, save on row processing, where each row will
|
135
|
-
be run until an error for that row is found. The goal is to accumulate actionable info for
|
136
|
-
presentation to the end user who is uploading the file.
|
137
|
-
|
138
|
-
== REQUIREMENTS
|
139
|
-
|
140
|
-
Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
|
141
|
-
|
142
|
-
Optionally requires the roo gem to support XLS and XLSX import and parsing.
|
143
|
-
|
144
|
-
Optionally requires the nokogiri gem to support HTML import and parsing.
|
145
|
-
|
146
|
-
Requires RSpec, nokogiri and roo to build/test.
|
147
|
-
|
148
|
-
== INSTALLATION
|
149
|
-
|
150
|
-
To install, simply run:
|
151
|
-
|
152
|
-
sudo gem install iron-import
|
153
|
-
|
154
|
-
RVM users can skip the sudo:
|
155
|
-
|
156
|
-
gem install iron-import
|
157
|
-
|
158
|
-
Then use
|
159
|
-
|
160
|
-
require 'iron-import'
|
161
|
-
|
162
|
-
to require the library code.
|