iron-import 0.8.4 → 0.8.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f53e828e0c419e7ad91abd7d4bc4879154941828
4
- data.tar.gz: 239576c3acf292c759363da0e007acf82e4cbfc3
3
+ metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
4
+ data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
5
5
  SHA512:
6
- metadata.gz: 858285de3786fb5e37e313fb48fc52e05fe39f81ef4ed972060bf87f2c906210e62a9c6e6a7200afb348250403162fcd89da66b7526def7b52aa5b0247a87c04
7
- data.tar.gz: a9b29b5cdb47b3fc4885484b75b31e070667abf30955f77fda115d2878bbb2fa269f6402dd669aabbebec925edac4bc543a38ebf260f26dc5749e6669dce05f3
6
+ metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
7
+ data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.8.5 / 2018-02-14
2
+
3
+ * Add simple separator char detection for CSV files, currently supporting tabs & commas
4
+ * Fix return value of Importer#import when unable to find handler for file/stream
5
+ * Fix CSV importer to raise an error if now rows are found on import
6
+
1
7
  == 0.8.4 / 2018-01-24
2
8
 
3
9
  * Improve CSV reader to canonicalize newlines, converting \r + \r\n to \n before import, fixes Windows lameness
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.8.4
1
+ 0.8.5
@@ -29,17 +29,33 @@ class Importer
29
29
  text.gsub!(/\r\n/, "\n")
30
30
  text.gsub!(/\r/, "\n")
31
31
 
32
+ # Look at first line, count sep chars, pick the most common
33
+ sep_char = ','
34
+ line = text.split(/\n/, 2).first
35
+ if line.count("\t") > line.count(',')
36
+ sep_char = "\t"
37
+ end
38
+
32
39
  # Parse it out
33
40
  encoding = @importer.encoding || 'UTF-8'
34
41
  options = {
35
42
  :encoding => "#{encoding}:UTF-8",
36
- :skip_blanks => true
43
+ :skip_blanks => true,
44
+ :col_sep => sep_char
37
45
  }
38
46
  begin
39
47
  @raw_rows = CSV.parse(text, options)
40
48
  rescue Exception => e
41
49
  @importer.add_error('Error encountered while parsing CSV')
42
50
  @importer.add_exception(e)
51
+ return false
52
+ end
53
+
54
+ if @raw_rows.nil? || @raw_rows.count == 0
55
+ @importer.add_error('No rows found - unable to process CSV file')
56
+ return false
57
+ else
58
+ return true
43
59
  end
44
60
  end
45
61
 
@@ -62,10 +62,11 @@ class Importer
62
62
 
63
63
  # Figure out which format to use for a given path based on file name
64
64
  def self.for_path(importer, path)
65
- format = path.to_s.extract(/\.(csv|html?|xlsx?)\z/i)
65
+ format = path.to_s.extract(/\.(csv|tsv|html?|xlsx?)\z/i)
66
66
  if format
67
67
  format = format.downcase
68
68
  format = 'html' if format == 'htm'
69
+ format = 'csv' if format == 'tsv'
69
70
  format = format.to_sym
70
71
  for_format(importer, format)
71
72
  else
@@ -353,7 +353,7 @@ class Importer
353
353
  # Verify we got one
354
354
  unless @reader
355
355
  add_error("Unable to find format handler for format :#{format} on import of #{path_or_stream.class.name} source - aborting")
356
- return
356
+ return block ? self : false
357
357
  end
358
358
 
359
359
  # What scopes (if any) should we limit our searching to?
@@ -46,6 +46,13 @@ describe Importer::CsvReader do
46
46
  ]
47
47
  end
48
48
 
49
+ it 'should auto-detect tab-separated data' do
50
+ @reader.load(SpecHelper.sample_path('sprouts.tsv')) do |rows|
51
+ rows.count.should == 43
52
+ rows.first.count.should == 5
53
+ end
54
+ end
55
+
49
56
  it 'should fail on WSM sample data' do
50
57
  importer = Importer.build do
51
58
  column :company_name do
@@ -115,6 +115,7 @@ describe Importer::DataReader do
115
115
 
116
116
  it 'should build an instance based on a path' do
117
117
  Importer::DataReader.for_path(@importer, '/tmp/foo.csv').should be_a(Importer::CsvReader)
118
+ Importer::DataReader.for_path(@importer, '/tmp/foo.TSV').should be_a(Importer::CsvReader)
118
119
  Importer::DataReader.for_path(@importer, 'BAR.XLS').should be_a(Importer::XlsReader)
119
120
  Importer::DataReader.for_path(@importer, '/tmp/nog_bog.xlsx').should be_a(Importer::XlsxReader)
120
121
  Importer::DataReader.for_path(@importer, '/tmp/nog_bog.htm').should be_a(Importer::HtmlReader)
@@ -0,0 +1,43 @@
1
+ UPC STORE_NUMBER DATE DESCRIPTION UNITS_SOLD
2
+ 00810453023927 8 20170701 OPAL W7 1.000
3
+ 00810453023927 208 20170701 OPAL W7 1.000
4
+ 00810453023149 216 20170701 NIGHT M13 1.000
5
+ 00810453022722 217 20170701 GLACIER W8 1.000
6
+ 00810453023934 221 20170701 OPAL W8 1.000
7
+ 00810453022722 222 20170701 GLACIER W8 1.000
8
+ 00810453023934 231 20170701 OPAL W8 1.000
9
+ 00810453023941 231 20170701 OPAL W9 2.000
10
+ 00810453022715 233 20170701 GLACIER W7 1.000
11
+ 00810453022715 236 20170701 GLACIER W7 1.000
12
+ 00810453022609 242 20170701 COVE W6 1.000
13
+ 00810453023101 244 20170701 NIGHT M9 1.000
14
+ 00810453022722 245 20170701 GLACIER W8 1.000
15
+ 00810453023934 245 20170701 OPAL W8 1.000
16
+ 00810453022722 246 20170701 GLACIER W8 2.000
17
+ 00810453023934 246 20170701 OPAL W8 3.000
18
+ 00810453023934 247 20170701 OPAL W8 2.000
19
+ 00810453023927 249 20170701 OPAL W7 1.000
20
+ 00810453023941 251 20170701 OPAL W9 1.000
21
+ 00810453022616 255 20170701 COVE W7 1.000
22
+ 00810453022708 255 20170701 GLACIER W6 1.000
23
+ 00810453023132 255 20170701 NIGHT M12 1.000
24
+ 00810453023941 257 20170701 OPAL W9 1.000
25
+ 00810453023958 257 20170701 OPAL W10 1.000
26
+ 00810453023118 260 20170701 NIGHT M10 1.000
27
+ 00810453023958 271 20170701 OPAL W10 1.000
28
+ 00810453022616 274 20170701 COVE W7 2.000
29
+ 00810453023927 288 20170701 OPAL W7 1.000
30
+ 00810453023958 303 20170701 OPAL W10 1.000
31
+ 00810453022715 306 20170701 GLACIER W7 1.000
32
+ 00810453022616 412 20170701 COVE W7 1.000
33
+ 00810453023910 415 20170701 OPAL W6 1.000
34
+ 00810453023934 415 20170701 OPAL W8 1.000
35
+ 00810453023125 505 20170701 NIGHT M11 1.000
36
+ 00810453022647 517 20170701 COVE W10 1.000
37
+ 00810453022623 520 20170701 COVE W8 1.000
38
+ 00810453023934 521 20170701 OPAL W8 1.000
39
+ 00810453023941 521 20170701 OPAL W9 1.000
40
+ 00810453023149 526 20170701 NIGHT M13 1.000
41
+ 00810453023927 578 20170701 OPAL W7 1.000
42
+ 00810453022722 579 20170701 GLACIER W8 1.000
43
+ 00810453023934 701 20170701 OPAL W8 1.000
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-24 00:00:00.000000000 Z
11
+ date: 2018-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
@@ -98,7 +98,6 @@ files:
98
98
  - ".rspec"
99
99
  - History.txt
100
100
  - LICENSE
101
- - README.rdoc
102
101
  - Version.txt
103
102
  - lib/iron-import.rb
104
103
  - lib/iron/import.rb
@@ -134,6 +133,7 @@ files:
134
133
  - spec/samples/scores.html
135
134
  - spec/samples/simple.csv
136
135
  - spec/samples/simple.html
136
+ - spec/samples/sprouts.tsv
137
137
  - spec/samples/test-products.xls
138
138
  - spec/samples/wsm-data.csv
139
139
  - spec/spec_helper.rb
data/README.rdoc DELETED
@@ -1,162 +0,0 @@
1
- = GEM: iron-import
2
-
3
- Written by Rob Morris @ Irongaze Consulting LLC (http://irongaze.com)
4
-
5
- == DESCRIPTION
6
-
7
- Simple, versatile, reliable tabular data import.
8
-
9
- This gem provides a set of classes to support automating import of tabular data from
10
- CSV, HTML, XLS and XLSX files. Key features include defining columns, auto-detecting column order,
11
- pre-parsing data, validating data, filtering rows, and robust error tracking.
12
-
13
- IMPORTANT NOTE: this gem is in flux as we work to define the best possible abstraction
14
- for the task. Breaking changes will be noted by increases in the minor version,
15
- ie 0.5.0 and 0.5.1 will be compatible, but 0.6.0 will not (i.e. we follow semantic versioning).
16
-
17
- == WHO IS THIS FOR?
18
-
19
- The Roo/Spreadsheet gems do a great job of providing general purpose spreadsheet reading.
20
- However, using them with unreliable user submitted data requires a lot of error checking,
21
- monkeying with data coercion, etc. At Irongaze, we do a lot of work with growing
22
- businesses, where Excel files are the lingua franca for all kinds of uses. This gem
23
- attempts to extract years of experience building one-off importers into a simple library
24
- for rapid import coding.
25
-
26
- In addition, it's quite common for the same data to be transmitted in varying formats -
27
- Excel files, HTML files, CSV files, custom text streams... Use iron-import to have a single
28
- tool-set for processing any of these types of data, often without changing a line of code.
29
-
30
- This is NOT a general-purpose tool for reading spreadsheets. If you want access to
31
- cell styling, reading underlying formulas, etc., you will be better served building
32
- a custom importer based on Roo. But if you're looking to take a customer-uploaded CSV file,
33
- validate and coerce values, then write each row to a database, all the while tracking
34
- any errors encountered... well, this is the library for you!
35
-
36
- == KEY FEATURES
37
-
38
- - Simple yet robust data import and error handling using elegant builder syntax
39
- - Import data from file, stream or string data sources
40
- - Import XLS, XLSX, CSV and HTML tabular data
41
- - Import custom tabular data via passed block
42
- - Automatic column order and start row detection
43
- - Support for optional columns and dynamic column sets
44
- - Basic data coercion supporting string, int, float, date, bool and cents types
45
- - Custom data coercion via passed block
46
- - Custom data validation via passed block
47
- - Row filtering using custom block
48
- - Automatically track and report errors with fine-grained context
49
- - Prefer capturing errors over raising exceptions for more robust imports
50
-
51
- == SAMPLE USAGE
52
-
53
- # Define our importer, with three columns. The importer will look for a row containing
54
- # "name"/"product", "description" and "price" (case insensitively) and automatically determine column
55
- # order and the starting row of the data.
56
- importer = Importer.build do
57
- column :name do
58
- # Provide a regex to find the header for this column
59
- header /(name|product)/i
60
- end
61
- column :description do
62
- # Columns can do custom parsing
63
- parse do |raw_val|
64
- raw_val.to_s.strip
65
- end
66
- # And custom validation
67
- validate do |parsed_val|
68
- add_error('Description too short') unless parsed_val.length > 5
69
- end
70
- end
71
- column :price do
72
- # Built in type conversion handles common cases - in this case
73
- # will correctly turn 2.5, "$2.50" or "2.5" into 250
74
- type :cents
75
- end
76
-
77
- # Need to skip rows? Use a filter! Return true to include a row when processing
78
- filter_rows do |row|
79
- row[:price] != 0 && row[:name] != 'Sample'
80
- end
81
- end
82
-
83
- # Import the provided file or stream row-by-row (if importing succeeds), automatically
84
- # using the proper library to read CSV data. This same code would work
85
- # with XLS or XLSX files with no changes to the code.
86
- importer.import('/tmp/source.csv') do |row|
87
- puts row[:name] + ' = ' + row[:description]
88
- end
89
-
90
- # Check for errors and do the right thing:
91
- importer.on_error do
92
- if missing_headers.any?
93
- # Can't find required column header(s)
94
- puts "Unable to locate columns: #{missing_headers}"
95
-
96
- elsif columns.any?(&:error_values?)
97
- # Invalid or unexpected values in one or more columns
98
- columns.select(&:error_values?).each do |col|
99
- puts "Invalid values for #{col}: #{col.error_values}"
100
- end
101
-
102
- else
103
- # General errors, dump summary report
104
- puts "Error(s) on import: " + error_summary
105
- end
106
- end
107
-
108
- # You can chain the build/import/on-error blocks for a cleaner flow:
109
- Importer.build do
110
- column :one
111
- column :two
112
- end.import(params[:uploaded_file]) do |row|
113
- SomeModel.create(row)
114
- end.on_error do
115
- raise "Errors found: " + error_summary
116
- end
117
-
118
- == IMPORT EXECUTION ORDER
119
-
120
- It can be tricky to keep track of what happens in Importer#import, so here's a quick cheat-sheet:
121
-
122
- - Determine the *format* of stream/file to import
123
- - Determine *import scope* (sheet/table/whatever) using Importer#scope settings, if any
124
- - *Find column headers + start row*
125
- - Validate presence of *required columns*
126
- - *Validate column set* using Importer#validate_columns
127
- - Run each row:
128
- - *Parse* each column's value using Column#parse or Column#type
129
- - *Filter the row* using Importer#filter_rows on parsed values to reject unwanted rows
130
- - *Calculate virtual columns* using Column#calculate
131
- - *Validate each parsed value* using Column#validate
132
- - *Validate entire row* using Importer#validate_rows
133
-
134
- Generally, the import will stop when an error occurs, save on row processing, where each row will
135
- be run until an error for that row is found. The goal is to accumulate actionable info for
136
- presentation to the end user who is uploading the file.
137
-
138
- == REQUIREMENTS
139
-
140
- Depends on the iron-extensions and iron-dsl gems for CSV and custom import formats.
141
-
142
- Optionally requires the roo gem to support XLS and XLSX import and parsing.
143
-
144
- Optionally requires the nokogiri gem to support HTML import and parsing.
145
-
146
- Requires RSpec, nokogiri and roo to build/test.
147
-
148
- == INSTALLATION
149
-
150
- To install, simply run:
151
-
152
- sudo gem install iron-import
153
-
154
- RVM users can skip the sudo:
155
-
156
- gem install iron-import
157
-
158
- Then use
159
-
160
- require 'iron-import'
161
-
162
- to require the library code.