iron-import 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,46 +1,12 @@
1
1
  class Importer
2
2
 
3
- class XlsReader < DataReader
3
+ # Uses the Roo gem to read in .xls files
4
+ class XlsReader < ExcelReader
4
5
 
5
6
  def initialize(importer)
6
- super(importer, :xlsx)
7
+ super(importer, :xls)
7
8
  end
8
9
 
9
- def init_source(mode, source)
10
- if mode == :file
11
- @spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
12
- true
13
- else
14
- @importer.add_error("Unsupported XLS mode: #{mode}")
15
- false
16
- end
17
- rescue Exception => e
18
- @importer.add_error("Error reading file #{source}: #{e}")
19
- false
20
- end
21
-
22
- def load_raw_sheet(sheet)
23
- @spreadsheet.sheets.each_with_index do |name, index|
24
- # See if this sheet's name or index matches the requested sheet definition
25
- if sheet.match_sheet?(name, index)
26
- # Extract our raw data
27
- raw_rows = []
28
- @spreadsheet.sheet(name).each_with_index do |row, line|
29
- raw_rows << row
30
- end
31
- return raw_rows
32
- end
33
- end
34
- # This is not good.
35
- @importer.add_error("Unable to find sheet #{sheet}")
36
- return false
37
-
38
- rescue Exception => e
39
- # Not sure why we'd get here, but we strive for error-freedom here, yessir.
40
- @importer.add_error("Error loading sheet #{sheet}: #{e}")
41
- false
42
- end
43
-
44
10
  end
45
11
 
46
12
  end
@@ -1,47 +1,12 @@
1
1
  class Importer
2
2
 
3
3
  # Uses the Roo gem to read in .xlsx files
4
- class XlsxReader < DataReader
4
+ class XlsxReader < ExcelReader
5
5
 
6
6
  def initialize(importer)
7
7
  super(importer, :xlsx)
8
- supports_file!
9
8
  end
10
-
11
- def init_source(mode, source)
12
- if mode == :file
13
- @spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
14
- true
15
- else
16
- @importer.add_error("Unsupported XLSX mode: #{mode}")
17
- false
18
- end
19
- rescue Exception => e
20
- @importer.add_error("Error reading file #{source}: #{e}")
21
- false
22
- end
23
-
24
- def load_raw_sheet(sheet)
25
- @spreadsheet.sheets.each_with_index do |name, index|
26
- # See if this sheet's name or index matches the requested sheet definition
27
- if sheet.match_sheet?(name, index)
28
- # Extract our raw data
29
- raw_rows = []
30
- @spreadsheet.sheet(name).each_with_index do |row, line|
31
- raw_rows << row
32
- end
33
- return raw_rows
34
- end
35
- end
36
- @importer.add_error("Unable to find sheet #{sheet}")
37
- return false
38
-
39
- rescue Exception => e
40
- # Not sure why we'd get here, but we strive for error-freedom here, yessir.
41
- @importer.add_error("Error loading sheet #{sheet}: #{e}")
42
- false
43
- end
44
-
9
+
45
10
  end
46
11
 
47
12
  end
data/lib/iron/import.rb CHANGED
@@ -4,11 +4,12 @@ require 'iron/dsl'
4
4
 
5
5
  # Include required classes
6
6
  require_relative 'import/column'
7
- require_relative 'import/sheet'
8
7
  require_relative 'import/row'
9
8
  require_relative 'import/error'
10
9
  require_relative 'import/data_reader'
11
10
  require_relative 'import/csv_reader'
11
+ require_relative 'import/html_reader'
12
+ require_relative 'import/excel_reader'
12
13
  require_relative 'import/xls_reader'
13
14
  require_relative 'import/xlsx_reader'
14
15
  require_relative 'import/custom_reader'
@@ -2,17 +2,16 @@ describe Importer::Column do
2
2
 
3
3
  before do
4
4
  @importer = Importer.new
5
- @sheet = @importer.default_sheet
6
- @col = Importer::Column.new(@sheet, :test)
7
- @row = Importer::Row.new(@sheet, 1)
5
+ @col = Importer::Column.new(@importer, :test)
6
+ @row = Importer::Row.new(@importer, 1)
8
7
  end
9
8
 
10
9
  it 'should respond to build' do
11
10
  @col.should respond_to(:build)
12
11
  @col.build do
13
- required!
12
+ type :cents
14
13
  end
15
- @col.required?.should be_true
14
+ @col.type.should == :cents
16
15
  end
17
16
 
18
17
  it 'should convert position strings to indexes' do
@@ -21,7 +21,7 @@ describe Importer::CsvReader do
21
21
  end
22
22
  end
23
23
  importer.import(SpecHelper.sample_path('simple.csv')).should be_true
24
- importer.default_sheet.dump.should == [
24
+ importer.to_a.should == [
25
25
  {:number => 123, :string => 'Abc', :date => Date.new(1977,5,13), :cost => 899},
26
26
  {:number => nil, :string => nil, :date => nil, :cost => nil},
27
27
  {:number => 5, :string => 'String with end spaces', :date => Date.new(2004,2,1), :cost => 1000}
@@ -8,7 +8,7 @@ describe Importer::CustomReader do
8
8
  @importer.custom_reader.should be_nil
9
9
  @importer.build do
10
10
  headerless!
11
- on_file do |source, sheet|
11
+ on_file do |source|
12
12
  []
13
13
  end
14
14
  end
@@ -20,14 +20,10 @@ describe Importer::CustomReader do
20
20
  it 'should load the ICD10 test document' do
21
21
  importer = Importer.build do
22
22
  headerless!
23
- column :code do
24
- required!
25
- end
26
- column :desc do
27
- required!
28
- end
23
+ column :code
24
+ column :desc
29
25
 
30
- on_file do |source, sheet|
26
+ on_file do |source|
31
27
  File.readlines(source).collect do |line|
32
28
  line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
33
29
  end
@@ -35,7 +31,7 @@ describe Importer::CustomReader do
35
31
  end
36
32
  importer.import(SpecHelper.sample_path('icd10-custom.txt'))
37
33
  importer.error_summary.should be_nil
38
- importer.default_sheet.dump.should == [
34
+ importer.to_a.should == [
39
35
  {:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
40
36
  {:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
41
37
  {:code => 'A009', :desc => 'Cholera, unspecified'},
@@ -49,7 +45,7 @@ describe Importer::CustomReader do
49
45
  column :code
50
46
  column :desc
51
47
 
52
- on_file do |source, sheet|
48
+ on_file do |source|
53
49
  add_error('Unable to read cause no reader')
54
50
  end
55
51
  end
@@ -15,7 +15,7 @@ describe Importer::DataReader do
15
15
  55 => 55,
16
16
  3.0 => 3
17
17
  }.each_pair do |val, res|
18
- @reader.parse_value(val, :integer).should == res
18
+ @reader.parse_value(val, :integer).should === res
19
19
  end
20
20
  end
21
21
 
@@ -29,7 +29,7 @@ describe Importer::DataReader do
29
29
  55 => 55.0,
30
30
  '3' => 3.0
31
31
  }.each_pair do |val, res|
32
- @reader.parse_value(val, :float).should == res
32
+ @reader.parse_value(val, :float).should === res
33
33
  end
34
34
  end
35
35
 
@@ -41,13 +41,14 @@ describe Importer::DataReader do
41
41
  255 => '255',
42
42
  -1.5 => '-1.5'
43
43
  }.each_pair do |val, res|
44
- @reader.parse_value(val, :string).should == res
44
+ @reader.parse_value(val, :string).should === res
45
45
  end
46
46
  end
47
47
 
48
48
  it 'should parse cents' do
49
49
  {
50
50
  '$123.00' => 12300,
51
+ '9.95' => 995,
51
52
  '5' => 500,
52
53
  '0.5' => 50,
53
54
  '-95' => -9500,
@@ -55,7 +56,7 @@ describe Importer::DataReader do
55
56
  1.0 => 100,
56
57
  1.25 => 125
57
58
  }.each_pair do |val, res|
58
- @reader.parse_value(val, :cents).should == res
59
+ @reader.parse_value(val, :cents).should === res
59
60
  end
60
61
  end
61
62
 
@@ -68,7 +69,7 @@ describe Importer::DataReader do
68
69
  '5/10/2014, 10:28:07 PM' => Date.new(2014,5,10),
69
70
  Date.new(2000,4,1) => Date.new(2000,4,1)
70
71
  }.each_pair do |val, res|
71
- @reader.parse_value(val, :date).should == res
72
+ @reader.parse_value(val, :date).should === res
72
73
  end
73
74
  end
74
75
 
@@ -0,0 +1,105 @@
1
+ describe Importer::HtmlReader do
2
+
3
+ it 'should load a simple table' do
4
+ importer = Importer.build do
5
+ column :name
6
+ column :id do
7
+ type :int
8
+ end
9
+ end
10
+ res = importer.import(SpecHelper.sample_path('simple.html'))
11
+ importer.error_summary.should be_nil
12
+ res.should be_true
13
+ importer.to_a.should == [
14
+ {:name => 'John', :id => 888},
15
+ {:name => 'Anne', :id => 1234}
16
+ ]
17
+ end
18
+
19
+ it 'should honor start row' do
20
+ txt = '<table><tr><th>Notes From Clark:</th><td class="notes">Can you please send us 5 more white hooks for your display. Please rush ship this order. Thank you!</td></tr></table>'
21
+ importer = Importer.build do
22
+ start_row 1
23
+
24
+ column :note_header do
25
+ header /Notes From/i
26
+ end
27
+ column :note do
28
+ position 2
29
+ end
30
+ end
31
+ importer.import_string(txt).should be_true
32
+ importer.data.start_row.should == 1
33
+ importer.to_a.should == [{:note_header => 'Notes From Clark:', :note => 'Can you please send us 5 more white hooks for your display. Please rush ship this order. Thank you!'}]
34
+ end
35
+
36
+ it 'should properly expand colspan cells' do
37
+ importer = Importer.build do
38
+ column :one
39
+ column :two
40
+ column :three
41
+ end
42
+ res = importer.import(SpecHelper.sample_path('col-span.html'))
43
+ importer.error_summary.should be_nil
44
+ res.should be_true
45
+ importer.to_a.each do |row|
46
+ row[:one].should == 'First' unless row[:one].nil?
47
+ row[:two].should == 'Second' unless row[:two].nil?
48
+ row[:three].should == 'Third' unless row[:three].nil?
49
+ end
50
+ end
51
+
52
+ it 'should limit search by scope' do
53
+ importer = Importer.build do
54
+ column :alpha
55
+ column :beta
56
+ column :gamma
57
+ end
58
+ res = importer.import(SpecHelper.sample_path('multi-table.html'))
59
+ importer.error_summary.should be_nil
60
+ res.should be_true
61
+ importer.to_a.should == [
62
+ {:alpha => '1', :beta => '2', :gamma => '3'},
63
+ {:alpha => '4', :beta => '5', :gamma => '6'}
64
+ ]
65
+
66
+ res = importer.import(SpecHelper.sample_path('multi-table.html'), :scope => '.second table')
67
+ importer.error_summary.should be_nil
68
+ res.should be_true
69
+ importer.to_a.should == [
70
+ {:alpha => '7', :beta => '8', :gamma => '9'}
71
+ ]
72
+ end
73
+
74
+ it 'should strip tags from cells' do
75
+ importer = Importer.build do
76
+ column :q1 do
77
+ header /^Q1$/
78
+ end
79
+ end
80
+ res = importer.import(SpecHelper.sample_path('scores.html'))
81
+ importer.error_summary.should be_nil
82
+ res.should be_true
83
+ importer.to_a.should == [
84
+ {:q1 => '16'},
85
+ {:q1 => '13'}
86
+ ]
87
+ end
88
+
89
+ it 'should treat th and td cells impartially and return in order' do
90
+ importer = Importer.build do
91
+ column :a
92
+ column :b
93
+ column :c
94
+ column :d
95
+ end
96
+ res = importer.import(SpecHelper.sample_path('html-th-td.html'))
97
+ importer.error_summary.should be_nil
98
+ res.should be_true
99
+ importer.to_a.should == [
100
+ {:a => '1', :b => '2', :c => '3', :d => '4'},
101
+ {:a => '1', :b => '2', :c => '3', :d => '4'}
102
+ ]
103
+ end
104
+
105
+ end
@@ -2,6 +2,86 @@ describe Importer do
2
2
 
3
3
  it 'should respond to build' do
4
4
  Importer.should respond_to(:build)
5
+ importer = Importer.build do
6
+ column :foo
7
+ end
8
+ importer.columns.count.should == 1
9
+ end
10
+
11
+ it 'should set single search scopes' do
12
+ importer = Importer.build do
13
+ scope :xls, 'Sheet 2'
14
+ end
15
+ importer.scopes.should == { :xls => ['Sheet 2'] }
16
+ end
17
+
18
+ it 'should set multiple search scopes' do
19
+ importer = Importer.build do
20
+ scopes :xls => [1, 'Sheet 2'],
21
+ :html => 'table.funny'
22
+ end
23
+ importer.scopes.should == { :xls => [1, 'Sheet 2'], :html => ['table.funny'] }
24
+ end
25
+
26
+ it 'should find headers automatically' do
27
+ # Define a few sample columns
28
+ importer = Importer.new
29
+ importer.column(:alpha)
30
+ importer.column(:gamma)
31
+ # Some dummy data
32
+ rows = [
33
+ ['', '', '', ''],
34
+ ['Alpha', 'Beta', 'Gamma', 'Epsilon']
35
+ ]
36
+
37
+ # Parse it!
38
+ importer.find_header(rows).should be_true
39
+
40
+ importer.column(:alpha).data.index.should == 0
41
+ importer.column(:gamma).data.index.should == 2
42
+ importer.data.start_row.should == 3
43
+ end
44
+
45
+ it 'should report missing columns' do
46
+ # Define a few sample columns
47
+ importer = Importer.new
48
+ importer.column(:alpha)
49
+ importer.column(:gamma)
50
+ # Some dummy data
51
+ rows = [
52
+ ['Bob', 'Beta', 'Gamma', 'Epsilon']
53
+ ]
54
+
55
+ # Parse it!
56
+ importer.find_header(rows).should be_false
57
+ importer.missing_headers.should == [:alpha]
58
+ end
59
+
60
+ it 'should capture errors' do
61
+ importer = Importer.build do
62
+ column :foo
63
+ end
64
+ importer.add_error('An error')
65
+ importer.has_errors?.should be_true
66
+ importer.errors.count.should == 1
67
+ end
68
+
69
+ it 'should run conditional code when errors are present' do
70
+ importer = Importer.build do
71
+ column :foo
72
+ end
73
+ was_run = false
74
+
75
+ importer.on_error do
76
+ was_run = true
77
+ end
78
+ was_run.should be_false
79
+
80
+ importer.add_error('An error')
81
+ importer.on_error do
82
+ was_run = true
83
+ end
84
+ was_run.should be_true
5
85
  end
6
86
 
7
87
  it 'should import a test csv file' do
@@ -25,4 +105,31 @@ describe Importer do
25
105
  count.should == 3
26
106
  end
27
107
 
108
+ it 'should import a string' do
109
+ sum = 0
110
+ csv = "one,two\n1,2"
111
+ Importer.build do
112
+ column :one
113
+ column :two
114
+ end.import_string(csv, :format => :csv) do |rows|
115
+ rows[:one].should == '1'
116
+ rows[:two].should == '2'
117
+ sum = rows[:one].to_i + rows[:two].to_i
118
+ end
119
+ # Just make sure we ran correctly
120
+ sum.should == 3
121
+ end
122
+
123
+ it 'should pick the proper format based on content' do
124
+ importer = Importer.build do
125
+ column :one
126
+ column :two
127
+ end
128
+ importer.format.should be_nil
129
+ importer.import_string("one,two\n1,2")
130
+ importer.format.should == :csv
131
+ importer.import_string("<div><table><tr><td>one</td></tr></table></div>")
132
+ importer.format.should == :html
133
+ end
134
+
28
135
  end
@@ -2,8 +2,7 @@ describe Importer::Row do
2
2
 
3
3
  before do
4
4
  @importer = Importer.new
5
- @sheet = @importer.default_sheet
6
- @row = Importer::Row.new(@sheet, 5)
5
+ @row = Importer::Row.new(@importer, 5)
7
6
  end
8
7
 
9
8
  it 'should store and retrieve values' do
@@ -33,5 +32,13 @@ describe Importer::Row do
33
32
  @row.set_values(:a => nil, :b => nil)
34
33
  @row.should be_empty
35
34
  end
35
+
36
+ it 'should not change when to_hash values are changed' do
37
+ @row.set_values(:a => 1, :b => 2)
38
+ hash = @row.to_hash
39
+ hash.should == {:a => 1, :b => 2}
40
+ hash.delete(:a)
41
+ @row[:a].should == 1
42
+ end
36
43
 
37
44
  end
@@ -0,0 +1,77 @@
1
+ describe Importer::XlsReader do
2
+
3
+ it 'should read our products sample' do
4
+ importer = Importer.build do
5
+ column :part_num do
6
+ header /part/i
7
+ end
8
+ column :quantity do
9
+ type :int
10
+ end
11
+ column :desc do
12
+ header /description/i
13
+ end
14
+ column :unit_cost do
15
+ type :cents
16
+ end
17
+ column :total_cost do
18
+ type :cents
19
+ end
20
+ end
21
+ importer.import(SpecHelper.sample_path('test-products.xls'))
22
+ importer.error_summary.should be_nil
23
+ importer.to_a.should == [
24
+ {:part_num=>"00245",
25
+ :quantity=>2,
26
+ :desc=>"Washer",
27
+ :unit_cost=>899,
28
+ :total_cost=>1798},
29
+ {:part_num=>"10855",
30
+ :quantity=>4,
31
+ :desc=>"Misc Bits",
32
+ :unit_cost=>1000,
33
+ :total_cost=>4000},
34
+ {:part_num=>"19880-2",
35
+ :quantity=>3,
36
+ :desc=>"A duck!",
37
+ :unit_cost=>10731,
38
+ :total_cost=>32193},
39
+ {:part_num=>"18098-8",
40
+ :quantity=>1,
41
+ :desc=>"Tuesday",
42
+ :unit_cost=>5500,
43
+ :total_cost=>5500}
44
+ ]
45
+ end
46
+
47
+ it 'should search by scope' do
48
+ importer = Importer.build do
49
+ column :sheet do
50
+ type :int
51
+ end
52
+ column :val
53
+
54
+ filter do |row|
55
+ row.all?
56
+ end
57
+ end
58
+
59
+ # Default case
60
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'))
61
+ importer.format.should == :xls
62
+ importer.error_summary.should be_nil
63
+ importer.to_a.should == [{:sheet => 1, :val => 'Monkey'}]
64
+
65
+ # Pass scope to import
66
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'), :scope => 2)
67
+ importer.error_summary.should be_nil
68
+ importer.to_a.should == [{:sheet => 2, :val => 'Rhino'}]
69
+
70
+ # Define scope on importer
71
+ importer.scope :xls, 'Sheet 3'
72
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'))
73
+ importer.error_summary.should be_nil
74
+ importer.to_a.should == [{:sheet => 3, :val => 'Ant'}]
75
+ end
76
+
77
+ end
@@ -1,9 +1,8 @@
1
1
  describe Importer::XlsxReader do
2
2
 
3
- it 'should load our nanodrop data' do
3
+ it 'should load our nanodrop sample' do
4
4
  importer = Importer.build do
5
5
  column :sample_id do
6
- required!
7
6
  validate do |val|
8
7
  raise 'Invalid ID' unless val.match(/[0-9]{3,}\.[0-9]\z/)
9
8
  end
@@ -26,7 +25,7 @@ describe Importer::XlsxReader do
26
25
  res = importer.import(SpecHelper.sample_path('nanodrop.xlsx'))
27
26
  importer.error_summary.should be_nil
28
27
  res.should be_true
29
- importer.default_sheet.dump.should == [
28
+ importer.to_a.should == [
30
29
  {:sample_id => 'Windsor_buccal_500.1', :a260 => 2.574, :a280 => 1.277, :factor => 50},
31
30
  {:sample_id => 'Weston_fecal_206.2', :a260 => 0.746, :a280 => 0.351, :factor => 50}
32
31
  ]
Binary file
@@ -0,0 +1,29 @@
1
+ <table>
2
+ <tr>
3
+ <td>
4
+ One
5
+ </td>
6
+ <td>
7
+ Two
8
+ </td>
9
+ <td>
10
+ Three
11
+ </td>
12
+ </tr>
13
+ <tr>
14
+ <td colspan="2">First</td>
15
+ <td>Third</td>
16
+ </tr>
17
+ <tr>
18
+ <td>First</td>
19
+ <td colspan="2">Second</td>
20
+ </tr>
21
+ <tr>
22
+ <td colspan="3">First</td>
23
+ </tr>
24
+ <tr>
25
+ <td>First</td>
26
+ <td>Second</td>
27
+ <td>Third</td>
28
+ </tr>
29
+ </table>
@@ -0,0 +1,11 @@
1
+ <table>
2
+ <tr>
3
+ <th>A</th><th>B</th><th>C</th><td>D</td>
4
+ </tr>
5
+ <tr>
6
+ <th>1</th><td>2</td><td>3</td><th>4</th>
7
+ </tr>
8
+ <tr>
9
+ <td>1</td><td>2</td><th>3</th><td>4</td>
10
+ </tr>
11
+ </table>
@@ -0,0 +1,29 @@
1
+ <!doctype html>
2
+ <html>
3
+ <body>
4
+ <p>
5
+ Some text! I like table.
6
+ </p>
7
+ <table>
8
+ <tr>
9
+ <td>Alpha</td><td>Beta</td><td>Gamma</td>
10
+ </tr>
11
+ <tr>
12
+ <td>1</td><td>2</td><td>3</td>
13
+ </tr>
14
+ <tr>
15
+ <td>4</td><td>5</td><td>6</td>
16
+ </tr>
17
+ </table>
18
+ <div class="second">
19
+ <table>
20
+ <tr>
21
+ <td>Alpha</td><td>Beta</td><td>Gamma</td>
22
+ </tr>
23
+ <tr>
24
+ <td>7</td><td>8</td><td>9</td>
25
+ </tr>
26
+ </table>
27
+ </div>
28
+ </body>
29
+ </html>
File without changes