simple_xlsx_reader 0.9.8 → 1.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 592fa09e1f66127581441dcee23a2ec10481a08f
4
- data.tar.gz: 2165196985a70b2f594828e43523c237600f7c08
3
+ metadata.gz: 4d24749c8713f2f57e47d33a363801333130c5df
4
+ data.tar.gz: 0b26df686d579163845fd6e06936bff930c52c33
5
5
  SHA512:
6
- metadata.gz: 4ef8fe7df99b9bea2742c49663aa899421e5d4b6a145c62ee15afef9a8938ba1bfe0c5bf4b9d0999e2cffc6189436fd4f3b1a59af8a796a05c1dc4d6d9f7de77
7
- data.tar.gz: d1a2cb30e2dce280b529f6b877745cfbfa1f7cc8bbcacd506b268ca624da74d637e9e47bff8602f53d0c33ebd38055750c6eda88bbeaedd34d8eeb5e891e8b2e
6
+ metadata.gz: 523248f26fdf45c2978716836a2d81c5d3a4877bb74f29d7bfa51be1965e1ef61d1e67c83ec98bcac83f7b8e39ac08f666f6d31341428a5d0afa993e149ed7cd
7
+ data.tar.gz: 710789e5c5dd70d9d360e3a776662dc64138965b54984772cac9785018e2c9605a2b9ea3a2b889ec171183c64610901da527ae91989e9ea149d9016dc8497c71
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 1.0.0.pre
2
+
3
+ * Handle files with blank rows [Brian Hoffman]
4
+ * Preserve seconds when casting datetimes [Rob Newbould]
5
+ * Preserve empty rows (previously would be ommitted)
6
+ * Speed up parsing by ~55%
7
+
1
8
  ### 0.9.8
2
9
 
3
10
  * Rubyzip 1.0 compatability
data/README.md CHANGED
@@ -97,7 +97,18 @@ This project follows [semantic versioning 1.0](http://semver.org/spec/v1.0.0.htm
97
97
 
98
98
  ## Contributing
99
99
 
100
- 1. Fork it
100
+ Remember to write tests, think about edge cases, and run the existing
101
+ suite.
102
+
103
+ Note that as of commit 665cbafdde, the most extreme end of the
104
+ linear-time performance test, which is 10,000 rows (12 columns), runs in
105
+ ~4 seconds on Ruby 2.1 on a 2012 MBP. If the linear time assertion fails
106
+ or you're way off that, there is probably a performance regression in
107
+ your code.
108
+
109
+ Then, the standard stuff:
110
+
111
+ 1. Fork this project
101
112
  2. Create your feature branch (`git checkout -b my-new-feature`)
102
113
  3. Commit your changes (`git commit -am 'Add some feature'`)
103
114
  4. Push to the branch (`git push origin my-new-feature`)
@@ -74,13 +74,13 @@ module SimpleXlsxReader
74
74
  def self.load(file_path)
75
75
  self.new.tap do |xml|
76
76
  SimpleXlsxReader::Zip.open(file_path) do |zip|
77
- xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
78
- xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
77
+ xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
78
+ xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
79
79
 
80
80
  # optional feature used by excel, but not often used by xlsx
81
81
  # generation libraries
82
82
  if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
83
+ xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
84
84
  end
85
85
 
86
86
  xml.sheets = []
@@ -90,7 +90,7 @@ module SimpleXlsxReader
90
90
  break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
91
91
 
92
92
  xml.sheets <<
93
- Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
93
+ Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
94
94
  end
95
95
  end
96
96
  end
@@ -101,14 +101,14 @@ module SimpleXlsxReader
101
101
  # For internal use; translates source xml to Sheet objects.
102
102
  class Mapper < Struct.new(:xml)
103
103
  def load_sheets
104
- sheet_toc.each_with_index.map do |(sheet_name, sheet_number), i|
104
+ sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
105
105
  parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
106
106
  end
107
107
  end
108
108
 
109
109
  # Table of contents for the sheets, ex. {'Authors' => 0, ...}
110
110
  def sheet_toc
111
- xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
111
+ xml.workbook.xpath('/workbook/sheets/sheet').
112
112
  inject({}) do |acc, sheet|
113
113
 
114
114
  acc[sheet.attributes['name'].value] =
@@ -120,52 +120,55 @@ module SimpleXlsxReader
120
120
 
121
121
  def parse_sheet(sheet_name, xsheet)
122
122
  sheet = Sheet.new(sheet_name)
123
-
124
- last_column = last_column(xsheet)
125
- rownum = -1
126
- sheet.rows =
127
- xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
128
- rownum += 1
129
-
130
- colname = nil
131
- colnum = -1
132
- cells = []
133
- while(colname != last_column) do
134
- colname ? colname.next! : colname = 'A'
135
- colnum += 1
136
-
137
- xcell = xrow.at_xpath(
138
- %(xmlns:c[@r="#{colname + (rownum + 1).to_s}"]))
139
-
140
- # empty 'General' columns might not be in the xml
141
- next cells << nil if xcell.nil?
142
-
143
- type = xcell.attributes['t'] &&
144
- xcell.attributes['t'].value
145
- style = xcell.attributes['s'] &&
146
- style_types[xcell.attributes['s'].value.to_i]
147
-
148
- xvalue = type == 'inlineStr' ?
149
- xcell.at_xpath('xmlns:is/xmlns:t') : xcell.at_xpath('xmlns:v')
150
-
151
- cells << begin
152
- self.class.cast(xvalue && xvalue.text.strip, type, style,
153
- :shared_strings => shared_strings)
154
- rescue => e
155
- if !SimpleXlsxReader.configuration.catch_cell_load_errors
156
- error = CellLoadError.new(
157
- "Row #{rownum}, Col #{colnum}: #{e.message}")
158
- error.set_backtrace(e.backtrace)
159
- raise error
160
- else
161
- sheet.load_errors[[rownum, colnum]] = e.message
162
-
163
- xcell.text.strip
164
- end
123
+ sheet_width, sheet_height = *sheet_dimensions(xsheet)
124
+
125
+ sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
126
+ xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
127
+ column, row = *xcell.attr('r').match(/([A-Z]+)([0-9]+)/).captures
128
+ col_idx = column_letter_to_number(column) - 1
129
+ row_idx = row.to_i - 1
130
+
131
+ type = xcell.attributes['t'] &&
132
+ xcell.attributes['t'].value
133
+ style = xcell.attributes['s'] &&
134
+ style_types[xcell.attributes['s'].value.to_i]
135
+
136
+ # This is the main performance bottleneck. Using just 'xcell.text'
137
+ # would be ideal, and makes parsing super-fast. However, there's
138
+ # other junk in the cell, formula references in particular,
139
+ # so we really do have to look for specific value nodes.
140
+ # Maybe there is a really clever way to use xcell.text and parse out
141
+ # the correct value, but I can't think of one, or an alternative
142
+ # strategy.
143
+ #
144
+ # And yes, this really is faster than using xcell.at_xpath(...),
145
+ # by about 60%. Odd.
146
+ xvalue = type == 'inlineStr' ?
147
+ (xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
148
+ xcell.children.find {|c| c.name == 'v'}
149
+
150
+ cell = begin
151
+ self.class.cast(xvalue && xvalue.text.strip, type, style,
152
+ :shared_strings => shared_strings)
153
+ rescue => e
154
+ if !SimpleXlsxReader.configuration.catch_cell_load_errors
155
+ error = CellLoadError.new(
156
+ "Row #{row_idx}, Col #{col_idx}: #{e.message}")
157
+ error.set_backtrace(e.backtrace)
158
+ raise error
159
+ else
160
+ sheet.load_errors[[row_idx, col_idx]] = e.message
161
+
162
+ xcell.text.strip
165
163
  end
166
164
  end
167
165
 
168
- cells
166
+ # This shouldn't be necessary, but just in case, we'll create
167
+ # the row so we don't blow up. This means any null rows in between
168
+ # will be null instead of [null, null, ...]
169
+ sheet.rows[row_idx] ||= Array.new(sheet_width)
170
+
171
+ sheet.rows[row_idx][col_idx] = cell
169
172
  end
170
173
 
171
174
  sheet
@@ -180,17 +183,43 @@ module SimpleXlsxReader
180
183
  # and check the column name of the last header row. Obviously this isn't
181
184
  # the most robust strategy, but it likely fits 99% of use cases
182
185
  # considering it's not a problem with actual excel docs.
183
- def last_column(xsheet)
184
- dimension = xsheet.at_xpath('/xmlns:worksheet/xmlns:dimension')
186
+ def last_cell_label(xsheet)
187
+ dimension = xsheet.at_xpath('/worksheet/dimension')
185
188
  if dimension
186
- col = dimension.attributes['ref'].value.match(/:([A-Z]*)[1-9]*/)
187
- col ? col.captures.first : 'A'
189
+ col = dimension.attributes['ref'].value.match(/:([A-Z]+[0-9]+)/)
190
+ col ? col.captures.first : 'A1'
188
191
  else
189
- last = xsheet.at_xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c[last()]")
190
- last ? last.attributes['r'].value.match(/([A-Z]*)[1-9]*/).captures.first : 'A'
192
+ last = xsheet.at_xpath("/worksheet/sheetData/row[last()]/c[last()]")
193
+ last ? last.attributes['r'].value.match(/([A-Z]+[0-9]+)/).captures.first : 'A1'
191
194
  end
192
195
  end
193
196
 
197
+ # Returns dimensions (1-indexed)
198
+ def sheet_dimensions(xsheet)
199
+ column, row = *last_cell_label(xsheet).match(/([A-Z]+)([0-9]+)/).captures
200
+ [column_letter_to_number(column), row.to_i]
201
+ end
202
+
203
+ # formula fits an exponential factorial function of the form:
204
+ # 'A' = 1
205
+ # 'B' = 2
206
+ # 'Z' = 26
207
+ # 'AA' = 26 * 1 + 1
208
+ # 'AZ' = 26 * 1 + 26
209
+ # 'BA' = 26 * 2 + 1
210
+ # 'ZA' = 26 * 26 + 1
211
+ # 'ZZ' = 26 * 26 + 26
212
+ # 'AAA' = 26 * 26 * 1 + 26 * 1 + 1
213
+ # 'AAZ' = 26 * 26 * 1 + 26 * 1 + 26
214
+ # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
215
+ # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
216
+ def column_letter_to_number(column_letter)
217
+ pow = -1
218
+ column_letter.codepoints.reverse.inject(0) do |acc, charcode|
219
+ pow += 1
220
+ acc + 26**pow * (charcode - 64)
221
+ end
222
+ end
194
223
 
195
224
  # Excel doesn't record types for some cells, only its display style, so
196
225
  # we have to back out the type from that style.
@@ -208,7 +237,7 @@ module SimpleXlsxReader
208
237
  # type.
209
238
  def style_types
210
239
  @style_types ||=
211
- xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
240
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
212
241
  style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
213
242
  end
214
243
 
@@ -229,7 +258,7 @@ module SimpleXlsxReader
229
258
  # ex. {164 => :date_time}
230
259
  def custom_style_types
231
260
  @custom_style_types ||=
232
- xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
261
+ xml.styles.xpath('/styleSheet/numFmts/numFmt').
233
262
  inject({}) do |acc, xstyle|
234
263
 
235
264
  acc[xstyle.attributes['numFmtId'].value.to_i] =
@@ -321,11 +350,9 @@ module SimpleXlsxReader
321
350
 
322
351
  if fraction_of_24 # there is a time associated
323
352
  fraction_of_24 = "0.#{fraction_of_24}".to_f
324
- military = fraction_of_24 * 24
325
- hour = military.truncate
326
- minute = ((military % 1) * 60).truncate
353
+ seconds = (fraction_of_24 * 86400).round
327
354
 
328
- return Time.utc(date.year, date.month, date.day, hour, minute)
355
+ return Time.utc(date.year, date.month, date.day) + seconds
329
356
  else
330
357
  return date
331
358
  end
@@ -389,12 +416,12 @@ module SimpleXlsxReader
389
416
  def shared_strings
390
417
  @shared_strings ||= begin
391
418
  if xml.shared_strings
392
- xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
419
+ xml.shared_strings.xpath('/sst/si').map do |xsst|
393
420
  # a shared string can be a single value...
394
- sst = xsst.at_xpath('xmlns:t/text()')
421
+ sst = xsst.at_xpath('t/text()')
395
422
  sst = sst.text if sst
396
423
  # ... or a composite of seperately styled words/characters
397
- sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
424
+ sst ||= xsst.xpath('r/t/text()').map(&:text).join
398
425
  end
399
426
  else
400
427
  []
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "0.9.8"
2
+ VERSION = "1.0.0.pre"
3
3
  end
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.add_dependency 'nokogiri'
16
16
  gem.add_dependency 'rubyzip'
17
17
 
18
- gem.add_development_dependency 'minitest'
18
+ gem.add_development_dependency 'minitest', '>= 5.0'
19
19
  gem.add_development_dependency 'pry'
20
20
 
21
21
  gem.files = `git ls-files`.split($/)
@@ -0,0 +1,18 @@
1
+ require 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:datetimes_file) { File.join(File.dirname(__FILE__),
5
+ 'datetimes.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(datetimes_file) }
8
+
9
+ it 'converts date_times with the correct precision' do
10
+ subject.to_hash.must_equal({
11
+ "Datetimes" =>
12
+ [[Time.parse("2013-08-19 18:29:59 UTC")],
13
+ [Time.parse("2013-08-19 18:30:00 UTC")],
14
+ [Time.parse("2013-08-19 18:30:01 UTC")]]
15
+ })
16
+ end
17
+
18
+ end
Binary file
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
- describe SimpleXlsxReader do
4
+ describe 'SimpleXlsxReader Benchmark' do
5
5
 
6
6
  # n is 0-indexed for us, then converted to 1-indexed for excel
7
7
  def build_row(n)
@@ -58,8 +58,8 @@ describe SimpleXlsxReader do
58
58
  </sheetData>
59
59
  </worksheet>
60
60
  XML
61
- )
62
- base.at_xpath("/xmlns:worksheet/xmlns:sheetData").add_child(build_row(0))
61
+ ).remove_namespaces!
62
+ base.at_xpath("/worksheet/sheetData").add_child(build_row(0))
63
63
 
64
64
  @xml = SimpleXlsxReader::Document::Xml.new.tap do |xml|
65
65
  xml.sheets = [base]
@@ -76,7 +76,7 @@ describe SimpleXlsxReader do
76
76
  </cellXfs>
77
77
  </styleSheet>
78
78
  XML
79
- )
79
+ ).remove_namespaces!
80
80
  end
81
81
 
82
82
  # Every new sheet has one more row
@@ -84,8 +84,8 @@ describe SimpleXlsxReader do
84
84
  sheet = base.clone
85
85
 
86
86
  range.times do |n|
87
- sheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").last.
88
- add_next_sibling(build_row(n))
87
+ sheet.xpath("/worksheet/sheetData/row").last.
88
+ add_next_sibling(build_row(n+1))
89
89
  end
90
90
 
91
91
  @xml.sheets[range] = sheet
@@ -93,19 +93,19 @@ describe SimpleXlsxReader do
93
93
  end
94
94
 
95
95
  def self.bench_range
96
- bench_exp(1,1000)
96
+ bench_exp(1,10000)
97
97
  end
98
98
 
99
99
  bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
100
100
 
101
- raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.count}"\
101
+ raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
102
102
  if @xml.sheets[n].nil?
103
103
 
104
104
  sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
105
105
  parse_sheet('test', @xml.sheets[n])
106
106
 
107
- raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.count}"\
108
- if sheet.rows.count != n + 1
107
+ raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
108
+ if sheet.rows.size != n + 1
109
109
  end
110
110
 
111
111
  end
@@ -44,12 +44,12 @@ describe SimpleXlsxReader do
44
44
  end
45
45
 
46
46
  it 'reads time styles' do
47
- described_class.cast('41505.77084', nil, :time).
47
+ described_class.cast('41505.77083', nil, :time).
48
48
  must_equal Time.parse('2013-08-19 18:30 UTC')
49
49
  end
50
50
 
51
51
  it 'reads date_time styles' do
52
- described_class.cast('41505.77084', nil, :date_time).
52
+ described_class.cast('41505.77083', nil, :date_time).
53
53
  must_equal Time.parse('2013-08-19 18:30 UTC')
54
54
  end
55
55
 
@@ -59,12 +59,12 @@ describe SimpleXlsxReader do
59
59
  end
60
60
 
61
61
  it 'reads number types styled as times' do
62
- described_class.cast('41505.77084', 'n', :time).
62
+ described_class.cast('41505.77083', 'n', :time).
63
63
  must_equal Time.parse('2013-08-19 18:30 UTC')
64
64
  end
65
65
 
66
66
  it 'reads number types styled as date_times' do
67
- described_class.cast('41505.77084', 'n', :date_time).
67
+ described_class.cast('41505.77083', 'n', :date_time).
68
68
  must_equal Time.parse('2013-08-19 18:30 UTC')
69
69
  end
70
70
  end
@@ -73,7 +73,7 @@ describe SimpleXlsxReader do
73
73
  let(:xml) do
74
74
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
75
75
  xml.shared_strings = Nokogiri::XML(File.read(
76
- File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
76
+ File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
77
77
  end
78
78
  end
79
79
 
@@ -92,7 +92,7 @@ describe SimpleXlsxReader do
92
92
  let(:xml) do
93
93
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
94
94
  xml.styles = Nokogiri::XML(File.read(
95
- File.join(File.dirname(__FILE__), 'styles.xml') ))
95
+ File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
96
96
  end
97
97
  end
98
98
 
@@ -105,7 +105,7 @@ describe SimpleXlsxReader do
105
105
  end
106
106
  end
107
107
 
108
- describe '#last_column' do
108
+ describe '#last_cell_label' do
109
109
 
110
110
  let(:generic_style) do
111
111
  Nokogiri::XML(
@@ -116,7 +116,7 @@ describe SimpleXlsxReader do
116
116
  </cellXfs>
117
117
  </styleSheet>
118
118
  XML
119
- )
119
+ ).remove_namespaces!
120
120
  end
121
121
 
122
122
  # Note, this is not a valid sheet, since the last cell is actually D1 but
@@ -141,7 +141,7 @@ describe SimpleXlsxReader do
141
141
  </sheetData>
142
142
  </worksheet>
143
143
  XML
144
- )
144
+ ).remove_namespaces!
145
145
  end
146
146
 
147
147
  let(:empty_sheet) do
@@ -153,7 +153,7 @@ describe SimpleXlsxReader do
153
153
  </sheetData>
154
154
  </worksheet>
155
155
  XML
156
- )
156
+ ).remove_namespaces!
157
157
  end
158
158
 
159
159
  let(:xml) do
@@ -166,21 +166,45 @@ describe SimpleXlsxReader do
166
166
  subject { described_class.new(xml) }
167
167
 
168
168
  it 'uses /worksheet/dimension if available' do
169
- subject.last_column(sheet).must_equal 'C'
169
+ subject.last_cell_label(sheet).must_equal 'C1'
170
170
  end
171
171
 
172
172
  it 'uses the last header cell if /worksheet/dimension is missing' do
173
- sheet.xpath('/xmlns:worksheet/xmlns:dimension').remove
174
- subject.last_column(sheet).must_equal 'D'
173
+ sheet.xpath('/worksheet/dimension').remove
174
+ subject.last_cell_label(sheet).must_equal 'D1'
175
175
  end
176
176
 
177
- it 'returns "A" if the dimension is just one cell' do
178
- subject.last_column(empty_sheet).must_equal 'A'
177
+ it 'returns "A1" if the dimension is just one cell' do
178
+ subject.last_cell_label(empty_sheet).must_equal 'A1'
179
179
  end
180
180
 
181
- it 'returns "A" if the sheet is just one cell, but /worksheet/dimension is missing' do
182
- sheet.at_xpath('/xmlns:worksheet/xmlns:dimension').remove
183
- subject.last_column(empty_sheet).must_equal 'A'
181
+ it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
182
+ sheet.at_xpath('/worksheet/dimension').remove
183
+ subject.last_cell_label(empty_sheet).must_equal 'A1'
184
+ end
185
+ end
186
+
187
+ describe '#column_letter_to_number' do
188
+ let(:subject) { described_class.new }
189
+
190
+ [ ['A', 1 ],
191
+ ['B', 2 ],
192
+ ['Z', 26 ],
193
+ ['AA', 27 ],
194
+ ['AB', 28 ],
195
+ ['AZ', 52 ],
196
+ ['BA', 53 ],
197
+ ['BZ', 78 ],
198
+ ['ZZ', 702 ],
199
+ ['AAA', 703 ],
200
+ ['AAZ', 728 ],
201
+ ['ABA', 729 ],
202
+ ['ABZ', 754 ],
203
+ ['AZZ', 1378 ],
204
+ ['ZZZ', 18278] ].each do |(letter, number)|
205
+ it "converts #{letter} to #{number}" do
206
+ subject.column_letter_to_number(letter).must_equal number
207
+ end
184
208
  end
185
209
  end
186
210
 
@@ -204,7 +228,7 @@ describe SimpleXlsxReader do
204
228
  </sheetData>
205
229
  </worksheet>
206
230
  XML
207
- )]
231
+ ).remove_namespaces!]
208
232
 
209
233
  # s='0' above refers to the value of numFmtId at cellXfs index 0
210
234
  xml.styles = Nokogiri::XML(
@@ -215,7 +239,7 @@ describe SimpleXlsxReader do
215
239
  </cellXfs>
216
240
  </styleSheet>
217
241
  XML
218
- )
242
+ ).remove_namespaces!
219
243
  end
220
244
  end
221
245
 
@@ -264,7 +288,7 @@ describe SimpleXlsxReader do
264
288
  </sheetData>
265
289
  </worksheet>
266
290
  XML
267
- )]
291
+ ).remove_namespaces!]
268
292
 
269
293
  # s='0' above refers to the value of numFmtId at cellXfs index 0,
270
294
  # which is in this case 'General' type
@@ -278,7 +302,7 @@ describe SimpleXlsxReader do
278
302
  </cellXfs>
279
303
  </styleSheet>
280
304
  XML
281
- )
305
+ ).remove_namespaces!
282
306
  end
283
307
  end
284
308
 
@@ -318,5 +342,68 @@ describe SimpleXlsxReader do
318
342
  @row[6].must_equal 'Cell G1'
319
343
  end
320
344
  end
345
+
346
+ describe 'parsing documents with blank rows' do
347
+ let(:xml) do
348
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
349
+ xml.sheets = [Nokogiri::XML(
350
+ <<-XML
351
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
352
+ <dimension ref="A1:D7" />
353
+ <sheetData>
354
+ <row r="2" spans="1:1">
355
+ <c r="A2" s="0">
356
+ <v>0</v>
357
+ </c>
358
+ </row>
359
+ <row r="4" spans="1:1">
360
+ <c r="B4" s="0">
361
+ <v>1</v>
362
+ </c>
363
+ </row>
364
+ <row r="5" spans="1:1">
365
+ <c r="C5" s="0">
366
+ <v>2</v>
367
+ </c>
368
+ </row>
369
+ <row r="7" spans="1:1">
370
+ <c r="D7" s="0">
371
+ <v>3</v>
372
+ </c>
373
+ </row>
374
+ </sheetData>
375
+ </worksheet>
376
+ XML
377
+ ).remove_namespaces!]
378
+
379
+ xml.styles = Nokogiri::XML(
380
+ <<-XML
381
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
382
+ <cellXfs count="1">
383
+ <xf numFmtId="0" />
384
+ </cellXfs>
385
+ </styleSheet>
386
+ XML
387
+ ).remove_namespaces!
388
+ end
389
+ end
390
+
391
+ before do
392
+ @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
393
+ end
394
+
395
+ it "reads row data despite gaps in row numbering" do
396
+ @rows.must_equal [
397
+ [nil,nil,nil,nil],
398
+ ["0",nil,nil,nil],
399
+ [nil,nil,nil,nil],
400
+ [nil,"1",nil,nil],
401
+ [nil,nil,"2",nil],
402
+ [nil,nil,nil,nil],
403
+ [nil,nil,nil,"3"]
404
+ ]
405
+ end
406
+ end
407
+
321
408
  end
322
409
  end
data/test/test_helper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  gem 'minitest'
2
- require 'minitest/spec'
3
2
  require 'minitest/autorun'
3
+ require 'minitest/spec'
4
4
  require 'pry'
5
5
 
6
6
  $:.unshift File.expand_path("lib")
metadata CHANGED
@@ -1,69 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.8
4
+ version: 1.0.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-13 00:00:00.000000000 Z
11
+ date: 2014-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rubyzip
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '5.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '5.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  description: Read xlsx data the Ruby way
@@ -73,7 +73,7 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
- - .gitignore
76
+ - ".gitignore"
77
77
  - CHANGELOG.md
78
78
  - Gemfile
79
79
  - LICENSE.txt
@@ -82,6 +82,8 @@ files:
82
82
  - lib/simple_xlsx_reader.rb
83
83
  - lib/simple_xlsx_reader/version.rb
84
84
  - simple_xlsx_reader.gemspec
85
+ - test/datetime_test.rb
86
+ - test/datetimes.xlsx
85
87
  - test/performance_test.rb
86
88
  - test/sesame_street_blog.xlsx
87
89
  - test/shared_strings.xml
@@ -97,21 +99,23 @@ require_paths:
97
99
  - lib
98
100
  required_ruby_version: !ruby/object:Gem::Requirement
99
101
  requirements:
100
- - - '>='
102
+ - - ">="
101
103
  - !ruby/object:Gem::Version
102
104
  version: '0'
103
105
  required_rubygems_version: !ruby/object:Gem::Requirement
104
106
  requirements:
105
- - - '>='
107
+ - - ">"
106
108
  - !ruby/object:Gem::Version
107
- version: '0'
109
+ version: 1.3.1
108
110
  requirements: []
109
111
  rubyforge_project:
110
- rubygems_version: 2.0.3
112
+ rubygems_version: 2.2.0
111
113
  signing_key:
112
114
  specification_version: 4
113
115
  summary: Read xlsx data the Ruby way
114
116
  test_files:
117
+ - test/datetime_test.rb
118
+ - test/datetimes.xlsx
115
119
  - test/performance_test.rb
116
120
  - test/sesame_street_blog.xlsx
117
121
  - test/shared_strings.xml