iron-import 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,46 +1,12 @@
1
1
  class Importer
2
2
 
3
- class XlsReader < DataReader
3
+ # Uses the Roo gem to read in .xls files
4
+ class XlsReader < ExcelReader
4
5
 
5
6
  def initialize(importer)
6
- super(importer, :xlsx)
7
+ super(importer, :xls)
7
8
  end
8
9
 
9
- def init_source(mode, source)
10
- if mode == :file
11
- @spreadsheet = Roo::Excel.new(source, :file_warning => :ignore)
12
- true
13
- else
14
- @importer.add_error("Unsupported XLS mode: #{mode}")
15
- false
16
- end
17
- rescue Exception => e
18
- @importer.add_error("Error reading file #{source}: #{e}")
19
- false
20
- end
21
-
22
- def load_raw_sheet(sheet)
23
- @spreadsheet.sheets.each_with_index do |name, index|
24
- # See if this sheet's name or index matches the requested sheet definition
25
- if sheet.match_sheet?(name, index)
26
- # Extract our raw data
27
- raw_rows = []
28
- @spreadsheet.sheet(name).each_with_index do |row, line|
29
- raw_rows << row
30
- end
31
- return raw_rows
32
- end
33
- end
34
- # This is not good.
35
- @importer.add_error("Unable to find sheet #{sheet}")
36
- return false
37
-
38
- rescue Exception => e
39
- # Not sure why we'd get here, but we strive for error-freedom here, yessir.
40
- @importer.add_error("Error loading sheet #{sheet}: #{e}")
41
- false
42
- end
43
-
44
10
  end
45
11
 
46
12
  end
@@ -1,47 +1,12 @@
1
1
  class Importer
2
2
 
3
3
  # Uses the Roo gem to read in .xlsx files
4
- class XlsxReader < DataReader
4
+ class XlsxReader < ExcelReader
5
5
 
6
6
  def initialize(importer)
7
7
  super(importer, :xlsx)
8
- supports_file!
9
8
  end
10
-
11
- def init_source(mode, source)
12
- if mode == :file
13
- @spreadsheet = Roo::Excelx.new(source, :file_warning => :ignore)
14
- true
15
- else
16
- @importer.add_error("Unsupported XLSX mode: #{mode}")
17
- false
18
- end
19
- rescue Exception => e
20
- @importer.add_error("Error reading file #{source}: #{e}")
21
- false
22
- end
23
-
24
- def load_raw_sheet(sheet)
25
- @spreadsheet.sheets.each_with_index do |name, index|
26
- # See if this sheet's name or index matches the requested sheet definition
27
- if sheet.match_sheet?(name, index)
28
- # Extract our raw data
29
- raw_rows = []
30
- @spreadsheet.sheet(name).each_with_index do |row, line|
31
- raw_rows << row
32
- end
33
- return raw_rows
34
- end
35
- end
36
- @importer.add_error("Unable to find sheet #{sheet}")
37
- return false
38
-
39
- rescue Exception => e
40
- # Not sure why we'd get here, but we strive for error-freedom here, yessir.
41
- @importer.add_error("Error loading sheet #{sheet}: #{e}")
42
- false
43
- end
44
-
9
+
45
10
  end
46
11
 
47
12
  end
data/lib/iron/import.rb CHANGED
@@ -4,11 +4,12 @@ require 'iron/dsl'
4
4
 
5
5
  # Include required classes
6
6
  require_relative 'import/column'
7
- require_relative 'import/sheet'
8
7
  require_relative 'import/row'
9
8
  require_relative 'import/error'
10
9
  require_relative 'import/data_reader'
11
10
  require_relative 'import/csv_reader'
11
+ require_relative 'import/html_reader'
12
+ require_relative 'import/excel_reader'
12
13
  require_relative 'import/xls_reader'
13
14
  require_relative 'import/xlsx_reader'
14
15
  require_relative 'import/custom_reader'
@@ -2,17 +2,16 @@ describe Importer::Column do
2
2
 
3
3
  before do
4
4
  @importer = Importer.new
5
- @sheet = @importer.default_sheet
6
- @col = Importer::Column.new(@sheet, :test)
7
- @row = Importer::Row.new(@sheet, 1)
5
+ @col = Importer::Column.new(@importer, :test)
6
+ @row = Importer::Row.new(@importer, 1)
8
7
  end
9
8
 
10
9
  it 'should respond to build' do
11
10
  @col.should respond_to(:build)
12
11
  @col.build do
13
- required!
12
+ type :cents
14
13
  end
15
- @col.required?.should be_true
14
+ @col.type.should == :cents
16
15
  end
17
16
 
18
17
  it 'should convert position strings to indexes' do
@@ -21,7 +21,7 @@ describe Importer::CsvReader do
21
21
  end
22
22
  end
23
23
  importer.import(SpecHelper.sample_path('simple.csv')).should be_true
24
- importer.default_sheet.dump.should == [
24
+ importer.to_a.should == [
25
25
  {:number => 123, :string => 'Abc', :date => Date.new(1977,5,13), :cost => 899},
26
26
  {:number => nil, :string => nil, :date => nil, :cost => nil},
27
27
  {:number => 5, :string => 'String with end spaces', :date => Date.new(2004,2,1), :cost => 1000}
@@ -8,7 +8,7 @@ describe Importer::CustomReader do
8
8
  @importer.custom_reader.should be_nil
9
9
  @importer.build do
10
10
  headerless!
11
- on_file do |source, sheet|
11
+ on_file do |source|
12
12
  []
13
13
  end
14
14
  end
@@ -20,14 +20,10 @@ describe Importer::CustomReader do
20
20
  it 'should load the ICD10 test document' do
21
21
  importer = Importer.build do
22
22
  headerless!
23
- column :code do
24
- required!
25
- end
26
- column :desc do
27
- required!
28
- end
23
+ column :code
24
+ column :desc
29
25
 
30
- on_file do |source, sheet|
26
+ on_file do |source|
31
27
  File.readlines(source).collect do |line|
32
28
  line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
33
29
  end
@@ -35,7 +31,7 @@ describe Importer::CustomReader do
35
31
  end
36
32
  importer.import(SpecHelper.sample_path('icd10-custom.txt'))
37
33
  importer.error_summary.should be_nil
38
- importer.default_sheet.dump.should == [
34
+ importer.to_a.should == [
39
35
  {:code => 'A000', :desc => 'Cholera due to Vibrio cholerae 01, biovar cholerae'},
40
36
  {:code => 'A001', :desc => 'Cholera due to Vibrio cholerae 01, biovar eltor'},
41
37
  {:code => 'A009', :desc => 'Cholera, unspecified'},
@@ -49,7 +45,7 @@ describe Importer::CustomReader do
49
45
  column :code
50
46
  column :desc
51
47
 
52
- on_file do |source, sheet|
48
+ on_file do |source|
53
49
  add_error('Unable to read cause no reader')
54
50
  end
55
51
  end
@@ -15,7 +15,7 @@ describe Importer::DataReader do
15
15
  55 => 55,
16
16
  3.0 => 3
17
17
  }.each_pair do |val, res|
18
- @reader.parse_value(val, :integer).should == res
18
+ @reader.parse_value(val, :integer).should === res
19
19
  end
20
20
  end
21
21
 
@@ -29,7 +29,7 @@ describe Importer::DataReader do
29
29
  55 => 55.0,
30
30
  '3' => 3.0
31
31
  }.each_pair do |val, res|
32
- @reader.parse_value(val, :float).should == res
32
+ @reader.parse_value(val, :float).should === res
33
33
  end
34
34
  end
35
35
 
@@ -41,13 +41,14 @@ describe Importer::DataReader do
41
41
  255 => '255',
42
42
  -1.5 => '-1.5'
43
43
  }.each_pair do |val, res|
44
- @reader.parse_value(val, :string).should == res
44
+ @reader.parse_value(val, :string).should === res
45
45
  end
46
46
  end
47
47
 
48
48
  it 'should parse cents' do
49
49
  {
50
50
  '$123.00' => 12300,
51
+ '9.95' => 995,
51
52
  '5' => 500,
52
53
  '0.5' => 50,
53
54
  '-95' => -9500,
@@ -55,7 +56,7 @@ describe Importer::DataReader do
55
56
  1.0 => 100,
56
57
  1.25 => 125
57
58
  }.each_pair do |val, res|
58
- @reader.parse_value(val, :cents).should == res
59
+ @reader.parse_value(val, :cents).should === res
59
60
  end
60
61
  end
61
62
 
@@ -68,7 +69,7 @@ describe Importer::DataReader do
68
69
  '5/10/2014, 10:28:07 PM' => Date.new(2014,5,10),
69
70
  Date.new(2000,4,1) => Date.new(2000,4,1)
70
71
  }.each_pair do |val, res|
71
- @reader.parse_value(val, :date).should == res
72
+ @reader.parse_value(val, :date).should === res
72
73
  end
73
74
  end
74
75
 
@@ -0,0 +1,105 @@
1
+ describe Importer::HtmlReader do
2
+
3
+ it 'should load a simple table' do
4
+ importer = Importer.build do
5
+ column :name
6
+ column :id do
7
+ type :int
8
+ end
9
+ end
10
+ res = importer.import(SpecHelper.sample_path('simple.html'))
11
+ importer.error_summary.should be_nil
12
+ res.should be_true
13
+ importer.to_a.should == [
14
+ {:name => 'John', :id => 888},
15
+ {:name => 'Anne', :id => 1234}
16
+ ]
17
+ end
18
+
19
+ it 'should honor start row' do
20
+ txt = '<table><tr><th>Notes From Clark:</th><td class="notes">Can you please send us 5 more white hooks for your display. Please rush ship this order. Thank you!</td></tr></table>'
21
+ importer = Importer.build do
22
+ start_row 1
23
+
24
+ column :note_header do
25
+ header /Notes From/i
26
+ end
27
+ column :note do
28
+ position 2
29
+ end
30
+ end
31
+ importer.import_string(txt).should be_true
32
+ importer.data.start_row.should == 1
33
+ importer.to_a.should == [{:note_header => 'Notes From Clark:', :note => 'Can you please send us 5 more white hooks for your display. Please rush ship this order. Thank you!'}]
34
+ end
35
+
36
+ it 'should properly expand colspan cells' do
37
+ importer = Importer.build do
38
+ column :one
39
+ column :two
40
+ column :three
41
+ end
42
+ res = importer.import(SpecHelper.sample_path('col-span.html'))
43
+ importer.error_summary.should be_nil
44
+ res.should be_true
45
+ importer.to_a.each do |row|
46
+ row[:one].should == 'First' unless row[:one].nil?
47
+ row[:two].should == 'Second' unless row[:two].nil?
48
+ row[:three].should == 'Third' unless row[:three].nil?
49
+ end
50
+ end
51
+
52
+ it 'should limit search by scope' do
53
+ importer = Importer.build do
54
+ column :alpha
55
+ column :beta
56
+ column :gamma
57
+ end
58
+ res = importer.import(SpecHelper.sample_path('multi-table.html'))
59
+ importer.error_summary.should be_nil
60
+ res.should be_true
61
+ importer.to_a.should == [
62
+ {:alpha => '1', :beta => '2', :gamma => '3'},
63
+ {:alpha => '4', :beta => '5', :gamma => '6'}
64
+ ]
65
+
66
+ res = importer.import(SpecHelper.sample_path('multi-table.html'), :scope => '.second table')
67
+ importer.error_summary.should be_nil
68
+ res.should be_true
69
+ importer.to_a.should == [
70
+ {:alpha => '7', :beta => '8', :gamma => '9'}
71
+ ]
72
+ end
73
+
74
+ it 'should strip tags from cells' do
75
+ importer = Importer.build do
76
+ column :q1 do
77
+ header /^Q1$/
78
+ end
79
+ end
80
+ res = importer.import(SpecHelper.sample_path('scores.html'))
81
+ importer.error_summary.should be_nil
82
+ res.should be_true
83
+ importer.to_a.should == [
84
+ {:q1 => '16'},
85
+ {:q1 => '13'}
86
+ ]
87
+ end
88
+
89
+ it 'should treat th and td cells impartially and return in order' do
90
+ importer = Importer.build do
91
+ column :a
92
+ column :b
93
+ column :c
94
+ column :d
95
+ end
96
+ res = importer.import(SpecHelper.sample_path('html-th-td.html'))
97
+ importer.error_summary.should be_nil
98
+ res.should be_true
99
+ importer.to_a.should == [
100
+ {:a => '1', :b => '2', :c => '3', :d => '4'},
101
+ {:a => '1', :b => '2', :c => '3', :d => '4'}
102
+ ]
103
+ end
104
+
105
+ end
@@ -2,6 +2,86 @@ describe Importer do
2
2
 
3
3
  it 'should respond to build' do
4
4
  Importer.should respond_to(:build)
5
+ importer = Importer.build do
6
+ column :foo
7
+ end
8
+ importer.columns.count.should == 1
9
+ end
10
+
11
+ it 'should set single search scopes' do
12
+ importer = Importer.build do
13
+ scope :xls, 'Sheet 2'
14
+ end
15
+ importer.scopes.should == { :xls => ['Sheet 2'] }
16
+ end
17
+
18
+ it 'should set multiple search scopes' do
19
+ importer = Importer.build do
20
+ scopes :xls => [1, 'Sheet 2'],
21
+ :html => 'table.funny'
22
+ end
23
+ importer.scopes.should == { :xls => [1, 'Sheet 2'], :html => ['table.funny'] }
24
+ end
25
+
26
+ it 'should find headers automatically' do
27
+ # Define a few sample columns
28
+ importer = Importer.new
29
+ importer.column(:alpha)
30
+ importer.column(:gamma)
31
+ # Some dummy data
32
+ rows = [
33
+ ['', '', '', ''],
34
+ ['Alpha', 'Beta', 'Gamma', 'Epsilon']
35
+ ]
36
+
37
+ # Parse it!
38
+ importer.find_header(rows).should be_true
39
+
40
+ importer.column(:alpha).data.index.should == 0
41
+ importer.column(:gamma).data.index.should == 2
42
+ importer.data.start_row.should == 3
43
+ end
44
+
45
+ it 'should report missing columns' do
46
+ # Define a few sample columns
47
+ importer = Importer.new
48
+ importer.column(:alpha)
49
+ importer.column(:gamma)
50
+ # Some dummy data
51
+ rows = [
52
+ ['Bob', 'Beta', 'Gamma', 'Epsilon']
53
+ ]
54
+
55
+ # Parse it!
56
+ importer.find_header(rows).should be_false
57
+ importer.missing_headers.should == [:alpha]
58
+ end
59
+
60
+ it 'should capture errors' do
61
+ importer = Importer.build do
62
+ column :foo
63
+ end
64
+ importer.add_error('An error')
65
+ importer.has_errors?.should be_true
66
+ importer.errors.count.should == 1
67
+ end
68
+
69
+ it 'should run conditional code when errors are present' do
70
+ importer = Importer.build do
71
+ column :foo
72
+ end
73
+ was_run = false
74
+
75
+ importer.on_error do
76
+ was_run = true
77
+ end
78
+ was_run.should be_false
79
+
80
+ importer.add_error('An error')
81
+ importer.on_error do
82
+ was_run = true
83
+ end
84
+ was_run.should be_true
5
85
  end
6
86
 
7
87
  it 'should import a test csv file' do
@@ -25,4 +105,31 @@ describe Importer do
25
105
  count.should == 3
26
106
  end
27
107
 
108
+ it 'should import a string' do
109
+ sum = 0
110
+ csv = "one,two\n1,2"
111
+ Importer.build do
112
+ column :one
113
+ column :two
114
+ end.import_string(csv, :format => :csv) do |rows|
115
+ rows[:one].should == '1'
116
+ rows[:two].should == '2'
117
+ sum = rows[:one].to_i + rows[:two].to_i
118
+ end
119
+ # Just make sure we ran correctly
120
+ sum.should == 3
121
+ end
122
+
123
+ it 'should pick the proper format based on content' do
124
+ importer = Importer.build do
125
+ column :one
126
+ column :two
127
+ end
128
+ importer.format.should be_nil
129
+ importer.import_string("one,two\n1,2")
130
+ importer.format.should == :csv
131
+ importer.import_string("<div><table><tr><td>one</td></tr></table></div>")
132
+ importer.format.should == :html
133
+ end
134
+
28
135
  end
@@ -2,8 +2,7 @@ describe Importer::Row do
2
2
 
3
3
  before do
4
4
  @importer = Importer.new
5
- @sheet = @importer.default_sheet
6
- @row = Importer::Row.new(@sheet, 5)
5
+ @row = Importer::Row.new(@importer, 5)
7
6
  end
8
7
 
9
8
  it 'should store and retrieve values' do
@@ -33,5 +32,13 @@ describe Importer::Row do
33
32
  @row.set_values(:a => nil, :b => nil)
34
33
  @row.should be_empty
35
34
  end
35
+
36
+ it 'should not change when to_hash values are changed' do
37
+ @row.set_values(:a => 1, :b => 2)
38
+ hash = @row.to_hash
39
+ hash.should == {:a => 1, :b => 2}
40
+ hash.delete(:a)
41
+ @row[:a].should == 1
42
+ end
36
43
 
37
44
  end
@@ -0,0 +1,77 @@
1
+ describe Importer::XlsReader do
2
+
3
+ it 'should read our products sample' do
4
+ importer = Importer.build do
5
+ column :part_num do
6
+ header /part/i
7
+ end
8
+ column :quantity do
9
+ type :int
10
+ end
11
+ column :desc do
12
+ header /description/i
13
+ end
14
+ column :unit_cost do
15
+ type :cents
16
+ end
17
+ column :total_cost do
18
+ type :cents
19
+ end
20
+ end
21
+ importer.import(SpecHelper.sample_path('test-products.xls'))
22
+ importer.error_summary.should be_nil
23
+ importer.to_a.should == [
24
+ {:part_num=>"00245",
25
+ :quantity=>2,
26
+ :desc=>"Washer",
27
+ :unit_cost=>899,
28
+ :total_cost=>1798},
29
+ {:part_num=>"10855",
30
+ :quantity=>4,
31
+ :desc=>"Misc Bits",
32
+ :unit_cost=>1000,
33
+ :total_cost=>4000},
34
+ {:part_num=>"19880-2",
35
+ :quantity=>3,
36
+ :desc=>"A duck!",
37
+ :unit_cost=>10731,
38
+ :total_cost=>32193},
39
+ {:part_num=>"18098-8",
40
+ :quantity=>1,
41
+ :desc=>"Tuesday",
42
+ :unit_cost=>5500,
43
+ :total_cost=>5500}
44
+ ]
45
+ end
46
+
47
+ it 'should search by scope' do
48
+ importer = Importer.build do
49
+ column :sheet do
50
+ type :int
51
+ end
52
+ column :val
53
+
54
+ filter do |row|
55
+ row.all?
56
+ end
57
+ end
58
+
59
+ # Default case
60
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'))
61
+ importer.format.should == :xls
62
+ importer.error_summary.should be_nil
63
+ importer.to_a.should == [{:sheet => 1, :val => 'Monkey'}]
64
+
65
+ # Pass scope to import
66
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'), :scope => 2)
67
+ importer.error_summary.should be_nil
68
+ importer.to_a.should == [{:sheet => 2, :val => 'Rhino'}]
69
+
70
+ # Define scope on importer
71
+ importer.scope :xls, 'Sheet 3'
72
+ res = importer.import(SpecHelper.sample_path('3-sheets.xls'))
73
+ importer.error_summary.should be_nil
74
+ importer.to_a.should == [{:sheet => 3, :val => 'Ant'}]
75
+ end
76
+
77
+ end
@@ -1,9 +1,8 @@
1
1
  describe Importer::XlsxReader do
2
2
 
3
- it 'should load our nanodrop data' do
3
+ it 'should load our nanodrop sample' do
4
4
  importer = Importer.build do
5
5
  column :sample_id do
6
- required!
7
6
  validate do |val|
8
7
  raise 'Invalid ID' unless val.match(/[0-9]{3,}\.[0-9]\z/)
9
8
  end
@@ -26,7 +25,7 @@ describe Importer::XlsxReader do
26
25
  res = importer.import(SpecHelper.sample_path('nanodrop.xlsx'))
27
26
  importer.error_summary.should be_nil
28
27
  res.should be_true
29
- importer.default_sheet.dump.should == [
28
+ importer.to_a.should == [
30
29
  {:sample_id => 'Windsor_buccal_500.1', :a260 => 2.574, :a280 => 1.277, :factor => 50},
31
30
  {:sample_id => 'Weston_fecal_206.2', :a260 => 0.746, :a280 => 0.351, :factor => 50}
32
31
  ]
Binary file
@@ -0,0 +1,29 @@
1
+ <table>
2
+ <tr>
3
+ <td>
4
+ One
5
+ </td>
6
+ <td>
7
+ Two
8
+ </td>
9
+ <td>
10
+ Three
11
+ </td>
12
+ </tr>
13
+ <tr>
14
+ <td colspan="2">First</td>
15
+ <td>Third</td>
16
+ </tr>
17
+ <tr>
18
+ <td>First</td>
19
+ <td colspan="2">Second</td>
20
+ </tr>
21
+ <tr>
22
+ <td colspan="3">First</td>
23
+ </tr>
24
+ <tr>
25
+ <td>First</td>
26
+ <td>Second</td>
27
+ <td>Third</td>
28
+ </tr>
29
+ </table>
@@ -0,0 +1,11 @@
1
+ <table>
2
+ <tr>
3
+ <th>A</th><th>B</th><th>C</th><td>D</td>
4
+ </tr>
5
+ <tr>
6
+ <th>1</th><td>2</td><td>3</td><th>4</th>
7
+ </tr>
8
+ <tr>
9
+ <td>1</td><td>2</td><th>3</th><td>4</td>
10
+ </tr>
11
+ </table>
@@ -0,0 +1,29 @@
1
+ <!doctype html>
2
+ <html>
3
+ <body>
4
+ <p>
5
+ Some text! I like table.
6
+ </p>
7
+ <table>
8
+ <tr>
9
+ <td>Alpha</td><td>Beta</td><td>Gamma</td>
10
+ </tr>
11
+ <tr>
12
+ <td>1</td><td>2</td><td>3</td>
13
+ </tr>
14
+ <tr>
15
+ <td>4</td><td>5</td><td>6</td>
16
+ </tr>
17
+ </table>
18
+ <div class="second">
19
+ <table>
20
+ <tr>
21
+ <td>Alpha</td><td>Beta</td><td>Gamma</td>
22
+ </tr>
23
+ <tr>
24
+ <td>7</td><td>8</td><td>9</td>
25
+ </tr>
26
+ </table>
27
+ </div>
28
+ </body>
29
+ </html>
File without changes