iron-import 0.8.4 → 0.8.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +6 -0
- data/Version.txt +1 -1
- data/lib/iron/import/csv_reader.rb +17 -1
- data/lib/iron/import/data_reader.rb +2 -1
- data/lib/iron/import/importer.rb +1 -1
- data/spec/importer/csv_reader_spec.rb +7 -0
- data/spec/importer/data_reader_spec.rb +1 -0
- data/spec/samples/sprouts.tsv +43 -0
- metadata +3 -3
- data/README.rdoc +0 -162
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
|
4
|
+
data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
|
7
|
+
data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
|
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
== 0.8.5 / 2018-02-14
|
2
|
+
|
3
|
+
* Add simple separator char detection for CSV files, currently supporting tabs & commas
|
4
|
+
* Fix return value of Importer#import when unable to find handler for file/stream
|
5
|
+
* Fix CSV importer to raise an error if now rows are found on import
|
6
|
+
|
1
7
|
== 0.8.4 / 2018-01-24
|
2
8
|
|
3
9
|
* Improve CSV reader to canonicalize newlines, converting \r + \r\n to \n before import, fixes Windows lameness
|
data/Version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.5
|
@@ -29,17 +29,33 @@ class Importer
|
|
29
29
|
text.gsub!(/\r\n/, "\n")
|
30
30
|
text.gsub!(/\r/, "\n")
|
31
31
|
|
32
|
+
# Look at first line, count sep chars, pick the most common
|
33
|
+
sep_char = ','
|
34
|
+
line = text.split(/\n/, 2).first
|
35
|
+
if line.count("\t") > line.count(',')
|
36
|
+
sep_char = "\t"
|
37
|
+
end
|
38
|
+
|
32
39
|
# Parse it out
|
33
40
|
encoding = @importer.encoding || 'UTF-8'
|
34
41
|
options = {
|
35
42
|
:encoding => "#{encoding}:UTF-8",
|
36
|
-
:skip_blanks => true
|
43
|
+
:skip_blanks => true,
|
44
|
+
:col_sep => sep_char
|
37
45
|
}
|
38
46
|
begin
|
39
47
|
@raw_rows = CSV.parse(text, options)
|
40
48
|
rescue Exception => e
|
41
49
|
@importer.add_error('Error encountered while parsing CSV')
|
42
50
|
@importer.add_exception(e)
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
|
54
|
+
if @raw_rows.nil? || @raw_rows.count == 0
|
55
|
+
@importer.add_error('No rows found - unable to process CSV file')
|
56
|
+
return false
|
57
|
+
else
|
58
|
+
return true
|
43
59
|
end
|
44
60
|
end
|
45
61
|
|
@@ -62,10 +62,11 @@ class Importer
|
|
62
62
|
|
63
63
|
# Figure out which format to use for a given path based on file name
|
64
64
|
def self.for_path(importer, path)
|
65
|
-
format = path.to_s.extract(/\.(csv|html?|xlsx?)\z/i)
|
65
|
+
format = path.to_s.extract(/\.(csv|tsv|html?|xlsx?)\z/i)
|
66
66
|
if format
|
67
67
|
format = format.downcase
|
68
68
|
format = 'html' if format == 'htm'
|
69
|
+
format = 'csv' if format == 'tsv'
|
69
70
|
format = format.to_sym
|
70
71
|
for_format(importer, format)
|
71
72
|
else
|
data/lib/iron/import/importer.rb
CHANGED
@@ -353,7 +353,7 @@ class Importer
|
|
353
353
|
# Verify we got one
|
354
354
|
unless @reader
|
355
355
|
add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
|
356
|
-
return
|
356
|
+
return block ? self : false
|
357
357
|
end
|
358
358
|
|
359
359
|
# What scopes (if any) should we limit our searching to?
|
@@ -46,6 +46,13 @@ describe Importer::CsvReader do
|
|
46
46
|
]
|
47
47
|
end
|
48
48
|
|
49
|
+
it 'should auto-detect tab-separated data' do
|
50
|
+
@reader.load(SpecHelper.sample_path('sprouts.tsv')) do |rows|
|
51
|
+
rows.count.should == 43
|
52
|
+
rows.first.count.should == 5
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
49
56
|
it 'should fail on WSM sample data' do
|
50
57
|
importer = Importer.build do
|
51
58
|
column :company_name do
|
@@ -115,6 +115,7 @@ describe Importer::DataReader do
|
|
115
115
|
|
116
116
|
it 'should build an instance based on a path' do
|
117
117
|
Importer::DataReader.for_path(@importer, '/tmp/foo.csv').should be_a(Importer::CsvReader)
|
118
|
+
Importer::DataReader.for_path(@importer, '/tmp/foo.TSV').should be_a(Importer::CsvReader)
|
118
119
|
Importer::DataReader.for_path(@importer, 'BAR.XLS').should be_a(Importer::XlsReader)
|
119
120
|
Importer::DataReader.for_path(@importer, '/tmp/nog_bog.xlsx').should be_a(Importer::XlsxReader)
|
120
121
|
Importer::DataReader.for_path(@importer, '/tmp/nog_bog.htm').should be_a(Importer::HtmlReader)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
UPC STORE_NUMBER DATE DESCRIPTION UNITS_SOLD
|
2
|
+
00810453023927 8 20170701 OPAL W7 1.000
|
3
|
+
00810453023927 208 20170701 OPAL W7 1.000
|
4
|
+
00810453023149 216 20170701 NIGHT M13 1.000
|
5
|
+
00810453022722 217 20170701 GLACIER W8 1.000
|
6
|
+
00810453023934 221 20170701 OPAL W8 1.000
|
7
|
+
00810453022722 222 20170701 GLACIER W8 1.000
|
8
|
+
00810453023934 231 20170701 OPAL W8 1.000
|
9
|
+
00810453023941 231 20170701 OPAL W9 2.000
|
10
|
+
00810453022715 233 20170701 GLACIER W7 1.000
|
11
|
+
00810453022715 236 20170701 GLACIER W7 1.000
|
12
|
+
00810453022609 242 20170701 COVE W6 1.000
|
13
|
+
00810453023101 244 20170701 NIGHT M9 1.000
|
14
|
+
00810453022722 245 20170701 GLACIER W8 1.000
|
15
|
+
00810453023934 245 20170701 OPAL W8 1.000
|
16
|
+
00810453022722 246 20170701 GLACIER W8 2.000
|
17
|
+
00810453023934 246 20170701 OPAL W8 3.000
|
18
|
+
00810453023934 247 20170701 OPAL W8 2.000
|
19
|
+
00810453023927 249 20170701 OPAL W7 1.000
|
20
|
+
00810453023941 251 20170701 OPAL W9 1.000
|
21
|
+
00810453022616 255 20170701 COVE W7 1.000
|
22
|
+
00810453022708 255 20170701 GLACIER W6 1.000
|
23
|
+
00810453023132 255 20170701 NIGHT M12 1.000
|
24
|
+
00810453023941 257 20170701 OPAL W9 1.000
|
25
|
+
00810453023958 257 20170701 OPAL W10 1.000
|
26
|
+
00810453023118 260 20170701 NIGHT M10 1.000
|
27
|
+
00810453023958 271 20170701 OPAL W10 1.000
|
28
|
+
00810453022616 274 20170701 COVE W7 2.000
|
29
|
+
00810453023927 288 20170701 OPAL W7 1.000
|
30
|
+
00810453023958 303 20170701 OPAL W10 1.000
|
31
|
+
00810453022715 306 20170701 GLACIER W7 1.000
|
32
|
+
00810453022616 412 20170701 COVE W7 1.000
|
33
|
+
00810453023910 415 20170701 OPAL W6 1.000
|
34
|
+
00810453023934 415 20170701 OPAL W8 1.000
|
35
|
+
00810453023125 505 20170701 NIGHT M11 1.000
|
36
|
+
00810453022647 517 20170701 COVE W10 1.000
|
37
|
+
00810453022623 520 20170701 COVE W8 1.000
|
38
|
+
00810453023934 521 20170701 OPAL W8 1.000
|
39
|
+
00810453023941 521 20170701 OPAL W9 1.000
|
40
|
+
00810453023149 526 20170701 NIGHT M13 1.000
|
41
|
+
00810453023927 578 20170701 OPAL W7 1.000
|
42
|
+
00810453022722 579 20170701 GLACIER W8 1.000
|
43
|
+
00810453023934 701 20170701 OPAL W8 1.000
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iron-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Morris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iron-extensions
|
@@ -98,7 +98,6 @@ files:
|
|
98
98
|
- ".rspec"
|
99
99
|
- History.txt
|
100
100
|
- LICENSE
|
101
|
-
- README.rdoc
|
102
101
|
- Version.txt
|
103
102
|
- lib/iron-import.rb
|
104
103
|
- lib/iron/import.rb
|
@@ -134,6 +133,7 @@ files:
|
|
134
133
|
- spec/samples/scores.html
|
135
134
|
- spec/samples/simple.csv
|
136
135
|
- spec/samples/simple.html
|
136
|
+
- spec/samples/sprouts.tsv
|
137
137
|
- spec/samples/test-products.xls
|
138
138
|
- spec/samples/wsm-data.csv
|
139
139
|
- spec/spec_helper.rb
|
data/README.rdoc
DELETED
@@ -1,162 +0,0 @@
|
|
1
|
-
= GEM: iron-import
|
2
|
-
|
3
|
-
Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
|
4
|
-
|
5
|
-
== DESCRIPTION
|
6
|
-
|
7
|
-
Simple, versatile, reliable tabular data import.
|
8
|
-
|
9
|
-
This gem provides a set of classes to support automating import of tabular data from
|
10
|
-
CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
|
11
|
-
pre-parsing data, validating data, filtering rows, and robust error tracking.
|
12
|
-
|
13
|
-
IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
|
14
|
-
for the task. Breaking changes will be noted by increases in the minor version,
|
15
|
-
ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
|
16
|
-
|
17
|
-
== WHO IS THIS FOR?
|
18
|
-
|
19
|
-
The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
|
20
|
-
However, using them with unreliable user submitted data requires a lot of error checking,
|
21
|
-
monkeying with data coercion, etc. At Irongaze, we do a lot of work with growing
|
22
|
-
businesses, where Excel files are the lingua franca for all kinds of uses. This gem
|
23
|
-
attempts to extract years of experience building one-off importers into a simple library
|
24
|
-
for rapid import coding.
|
25
|
-
|
26
|
-
In addition, it's quite common for the same data to be transmitted in varying formats -
|
27
|
-
Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
|
28
|
-
tool-set for processing any of these types of data, often without changing a line of code.
|
29
|
-
|
30
|
-
This is NOT a general-purpose tool for reading spreadsheets. If you want access to
|
31
|
-
cell styling, reading underlying formulas, etc., you will be better served building
|
32
|
-
a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
|
33
|
-
validate and coerce values, then write each row to a database, all the while tracking
|
34
|
-
any errors encountered... well, this is the library for you!
|
35
|
-
|
36
|
-
== KEY FEATURES
|
37
|
-
|
38
|
-
- Simple yet robust data import and error handling using elegant builder syntax
|
39
|
-
- Import data from file, stream or string data sources
|
40
|
-
- Import XLS, XLSX, CSV and HTML tabular data
|
41
|
-
- Import custom tabular data via passed block
|
42
|
-
- Automatic column order and start row detection
|
43
|
-
- Support for optional columns and dynamic column sets
|
44
|
-
- Basic data coercion supporting string, int, float, date, bool and cents types
|
45
|
-
- Custom data coercion via passed block
|
46
|
-
- Custom data validation via passed block
|
47
|
-
- Row filtering using custom block
|
48
|
-
- Automatically track and report errors with fine-grained context
|
49
|
-
- Prefer capturing errors over raising exceptions for more robust imports
|
50
|
-
|
51
|
-
== SAMPLE USAGE
|
52
|
-
|
53
|
-
# Define our importer, with three columns. The importer will look for a row containing
|
54
|
-
# "name"/"product", "description" and "price" (case insensitively) and automatically determine column
|
55
|
-
# order and the starting row of the data.
|
56
|
-
importer = Importer.build do
|
57
|
-
column :name do
|
58
|
-
# Provide a regex to find the header for this column
|
59
|
-
header /(name|product)/i
|
60
|
-
end
|
61
|
-
column :description do
|
62
|
-
# Columns can do custom parsing
|
63
|
-
parse do |raw_val|
|
64
|
-
raw_val.to_s.strip
|
65
|
-
end
|
66
|
-
# And custom validation
|
67
|
-
validate do |parsed_val|
|
68
|
-
add_error('Description too short') unless parsed_val.length > 5
|
69
|
-
end
|
70
|
-
end
|
71
|
-
column :price do
|
72
|
-
# Built in type conversion handles common cases - in this case
|
73
|
-
# will correctly turn 2.5, "$2.50" or "2.5" into 250
|
74
|
-
type :cents
|
75
|
-
end
|
76
|
-
|
77
|
-
# Need to skip rows? Use a filter! Return true to include a row when processing
|
78
|
-
filter_rows do |row|
|
79
|
-
row[:price] != 0 && row[:name] != 'Sample'
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# Import the provided file or stream row-by-row (if importing succeeds), automatically
|
84
|
-
# using the proper library to read CSV data. This same code would work
|
85
|
-
# with XLS or XLSX files with no changes to the code.
|
86
|
-
importer.import('/tmp/source.csv') do |row|
|
87
|
-
puts row[:name] + ' = ' + row[:description]
|
88
|
-
end
|
89
|
-
|
90
|
-
# Check for errors and do the right thing:
|
91
|
-
importer.on_error do
|
92
|
-
if missing_headers.any?
|
93
|
-
# Can't find required column header(s)
|
94
|
-
puts "Unable to locate columns: #{missing_headers}"
|
95
|
-
|
96
|
-
elsif columns.any?(&:error_values?)
|
97
|
-
# Invalid or unexpected values in one or more columns
|
98
|
-
columns.select(&:error_values?).each do |col|
|
99
|
-
puts "Invalid values for #{col}: #{col.error_values}"
|
100
|
-
end
|
101
|
-
|
102
|
-
else
|
103
|
-
# General errors, dump summary report
|
104
|
-
puts "Error(s) on import: " + error_summary
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# You can chain the build/import/on-error blocks for a cleaner flow:
|
109
|
-
Importer.build do
|
110
|
-
column :one
|
111
|
-
column :two
|
112
|
-
end.import(params[:uploaded_file]) do |row|
|
113
|
-
SomeModel.create(row)
|
114
|
-
end.on_error do
|
115
|
-
raise "Errors found: " + error_summary
|
116
|
-
end
|
117
|
-
|
118
|
-
== IMPORT EXECUTION ORDER
|
119
|
-
|
120
|
-
It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
|
121
|
-
|
122
|
-
- Determine the *format* of stream/file to import
|
123
|
-
- Determine *import scope* (sheet/table/whatever) using Importer#scope settings, if any
|
124
|
-
- *Find column headers + start row*
|
125
|
-
- Validate presence of *required columns*
|
126
|
-
- *Validate column set* using Importer#validate_columns
|
127
|
-
- Run each row:
|
128
|
-
- *Parse* each column's value using Column#parse or Column#type
|
129
|
-
- *Filter the row* using Importer#filter_rows on parsed values to reject unwanted rows
|
130
|
-
- *Calculate virtual columns* using Column#calculate
|
131
|
-
- *Validate each parsed value* using Column#validate
|
132
|
-
- *Validate entire row* using Importer#validate_rows
|
133
|
-
|
134
|
-
Generally, the import will stop when an error occurs, save on row processing, where each row will
|
135
|
-
be run until an error for that row is found. The goal is to accumulate actionable info for
|
136
|
-
presentation to the end user who is uploading the file.
|
137
|
-
|
138
|
-
== REQUIREMENTS
|
139
|
-
|
140
|
-
Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
|
141
|
-
|
142
|
-
Optionally requires the roo gem to support XLS and XLSX import and parsing.
|
143
|
-
|
144
|
-
Optionally requires the nokogiri gem to support HTML import and parsing.
|
145
|
-
|
146
|
-
Requires RSpec, nokogiri and roo to build/test.
|
147
|
-
|
148
|
-
== INSTALLATION
|
149
|
-
|
150
|
-
To install, simply run:
|
151
|
-
|
152
|
-
sudo gem install iron-import
|
153
|
-
|
154
|
-
RVM users can skip the sudo:
|
155
|
-
|
156
|
-
gem install iron-import
|
157
|
-
|
158
|
-
Then use
|
159
|
-
|
160
|
-
require 'iron-import'
|
161
|
-
|
162
|
-
to require the library code.
|