simple_xlsx_reader 0.9.8 → 1.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 592fa09e1f66127581441dcee23a2ec10481a08f
4
- data.tar.gz: 2165196985a70b2f594828e43523c237600f7c08
3
+ metadata.gz: 4d24749c8713f2f57e47d33a363801333130c5df
4
+ data.tar.gz: 0b26df686d579163845fd6e06936bff930c52c33
5
5
  SHA512:
6
- metadata.gz: 4ef8fe7df99b9bea2742c49663aa899421e5d4b6a145c62ee15afef9a8938ba1bfe0c5bf4b9d0999e2cffc6189436fd4f3b1a59af8a796a05c1dc4d6d9f7de77
7
- data.tar.gz: d1a2cb30e2dce280b529f6b877745cfbfa1f7cc8bbcacd506b268ca624da74d637e9e47bff8602f53d0c33ebd38055750c6eda88bbeaedd34d8eeb5e891e8b2e
6
+ metadata.gz: 523248f26fdf45c2978716836a2d81c5d3a4877bb74f29d7bfa51be1965e1ef61d1e67c83ec98bcac83f7b8e39ac08f666f6d31341428a5d0afa993e149ed7cd
7
+ data.tar.gz: 710789e5c5dd70d9d360e3a776662dc64138965b54984772cac9785018e2c9605a2b9ea3a2b889ec171183c64610901da527ae91989e9ea149d9016dc8497c71
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 1.0.0.pre
2
+
3
+ * Handle files with blank rows [Brian Hoffman]
4
+ * Preserve seconds when casting datetimes [Rob Newbould]
5
+ * Preserve empty rows (previously would be ommitted)
6
+ * Speed up parsing by ~55%
7
+
1
8
  ### 0.9.8
2
9
 
3
10
  * Rubyzip 1.0 compatability
data/README.md CHANGED
@@ -97,7 +97,18 @@ This project follows [semantic versioning 1.0](http://semver.org/spec/v1.0.0.htm
97
97
 
98
98
  ## Contributing
99
99
 
100
- 1. Fork it
100
+ Remember to write tests, think about edge cases, and run the existing
101
+ suite.
102
+
103
+ Note that as of commit 665cbafdde, the most extreme end of the
104
+ linear-time performance test, which is 10,000 rows (12 columns), runs in
105
+ ~4 seconds on Ruby 2.1 on a 2012 MBP. If the linear time assertion fails
106
+ or you're way off that, there is probably a performance regression in
107
+ your code.
108
+
109
+ Then, the standard stuff:
110
+
111
+ 1. Fork this project
101
112
  2. Create your feature branch (`git checkout -b my-new-feature`)
102
113
  3. Commit your changes (`git commit -am 'Add some feature'`)
103
114
  4. Push to the branch (`git push origin my-new-feature`)
@@ -74,13 +74,13 @@ module SimpleXlsxReader
74
74
  def self.load(file_path)
75
75
  self.new.tap do |xml|
76
76
  SimpleXlsxReader::Zip.open(file_path) do |zip|
77
- xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
78
- xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
77
+ xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
78
+ xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
79
79
 
80
80
  # optional feature used by excel, but not often used by xlsx
81
81
  # generation libraries
82
82
  if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
83
+ xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
84
84
  end
85
85
 
86
86
  xml.sheets = []
@@ -90,7 +90,7 @@ module SimpleXlsxReader
90
90
  break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
91
91
 
92
92
  xml.sheets <<
93
- Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
93
+ Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
94
94
  end
95
95
  end
96
96
  end
@@ -101,14 +101,14 @@ module SimpleXlsxReader
101
101
  # For internal use; translates source xml to Sheet objects.
102
102
  class Mapper < Struct.new(:xml)
103
103
  def load_sheets
104
- sheet_toc.each_with_index.map do |(sheet_name, sheet_number), i|
104
+ sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
105
105
  parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
106
106
  end
107
107
  end
108
108
 
109
109
  # Table of contents for the sheets, ex. {'Authors' => 0, ...}
110
110
  def sheet_toc
111
- xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
111
+ xml.workbook.xpath('/workbook/sheets/sheet').
112
112
  inject({}) do |acc, sheet|
113
113
 
114
114
  acc[sheet.attributes['name'].value] =
@@ -120,52 +120,55 @@ module SimpleXlsxReader
120
120
 
121
121
  def parse_sheet(sheet_name, xsheet)
122
122
  sheet = Sheet.new(sheet_name)
123
-
124
- last_column = last_column(xsheet)
125
- rownum = -1
126
- sheet.rows =
127
- xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
128
- rownum += 1
129
-
130
- colname = nil
131
- colnum = -1
132
- cells = []
133
- while(colname != last_column) do
134
- colname ? colname.next! : colname = 'A'
135
- colnum += 1
136
-
137
- xcell = xrow.at_xpath(
138
- %(xmlns:c[@r="#{colname + (rownum + 1).to_s}"]))
139
-
140
- # empty 'General' columns might not be in the xml
141
- next cells << nil if xcell.nil?
142
-
143
- type = xcell.attributes['t'] &&
144
- xcell.attributes['t'].value
145
- style = xcell.attributes['s'] &&
146
- style_types[xcell.attributes['s'].value.to_i]
147
-
148
- xvalue = type == 'inlineStr' ?
149
- xcell.at_xpath('xmlns:is/xmlns:t') : xcell.at_xpath('xmlns:v')
150
-
151
- cells << begin
152
- self.class.cast(xvalue && xvalue.text.strip, type, style,
153
- :shared_strings => shared_strings)
154
- rescue => e
155
- if !SimpleXlsxReader.configuration.catch_cell_load_errors
156
- error = CellLoadError.new(
157
- "Row #{rownum}, Col #{colnum}: #{e.message}")
158
- error.set_backtrace(e.backtrace)
159
- raise error
160
- else
161
- sheet.load_errors[[rownum, colnum]] = e.message
162
-
163
- xcell.text.strip
164
- end
123
+ sheet_width, sheet_height = *sheet_dimensions(xsheet)
124
+
125
+ sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
126
+ xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
127
+ column, row = *xcell.attr('r').match(/([A-Z]+)([0-9]+)/).captures
128
+ col_idx = column_letter_to_number(column) - 1
129
+ row_idx = row.to_i - 1
130
+
131
+ type = xcell.attributes['t'] &&
132
+ xcell.attributes['t'].value
133
+ style = xcell.attributes['s'] &&
134
+ style_types[xcell.attributes['s'].value.to_i]
135
+
136
+ # This is the main performance bottleneck. Using just 'xcell.text'
137
+ # would be ideal, and makes parsing super-fast. However, there's
138
+ # other junk in the cell, formula references in particular,
139
+ # so we really do have to look for specific value nodes.
140
+ # Maybe there is a really clever way to use xcell.text and parse out
141
+ # the correct value, but I can't think of one, or an alternative
142
+ # strategy.
143
+ #
144
+ # And yes, this really is faster than using xcell.at_xpath(...),
145
+ # by about 60%. Odd.
146
+ xvalue = type == 'inlineStr' ?
147
+ (xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
148
+ xcell.children.find {|c| c.name == 'v'}
149
+
150
+ cell = begin
151
+ self.class.cast(xvalue && xvalue.text.strip, type, style,
152
+ :shared_strings => shared_strings)
153
+ rescue => e
154
+ if !SimpleXlsxReader.configuration.catch_cell_load_errors
155
+ error = CellLoadError.new(
156
+ "Row #{row_idx}, Col #{col_idx}: #{e.message}")
157
+ error.set_backtrace(e.backtrace)
158
+ raise error
159
+ else
160
+ sheet.load_errors[[row_idx, col_idx]] = e.message
161
+
162
+ xcell.text.strip
165
163
  end
166
164
  end
167
165
 
168
- cells
166
+ # This shouldn't be necessary, but just in case, we'll create
167
+ # the row so we don't blow up. This means any null rows in between
168
+ # will be null instead of [null, null, ...]
169
+ sheet.rows[row_idx] ||= Array.new(sheet_width)
170
+
171
+ sheet.rows[row_idx][col_idx] = cell
169
172
  end
170
173
 
171
174
  sheet
@@ -180,17 +183,43 @@ module SimpleXlsxReader
180
183
  # and check the column name of the last header row. Obviously this isn't
181
184
  # the most robust strategy, but it likely fits 99% of use cases
182
185
  # considering it's not a problem with actual excel docs.
183
- def last_column(xsheet)
184
- dimension = xsheet.at_xpath('/xmlns:worksheet/xmlns:dimension')
186
+ def last_cell_label(xsheet)
187
+ dimension = xsheet.at_xpath('/worksheet/dimension')
185
188
  if dimension
186
- col = dimension.attributes['ref'].value.match(/:([A-Z]*)[1-9]*/)
187
- col ? col.captures.first : 'A'
189
+ col = dimension.attributes['ref'].value.match(/:([A-Z]+[0-9]+)/)
190
+ col ? col.captures.first : 'A1'
188
191
  else
189
- last = xsheet.at_xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c[last()]")
190
- last ? last.attributes['r'].value.match(/([A-Z]*)[1-9]*/).captures.first : 'A'
192
+ last = xsheet.at_xpath("/worksheet/sheetData/row[last()]/c[last()]")
193
+ last ? last.attributes['r'].value.match(/([A-Z]+[0-9]+)/).captures.first : 'A1'
191
194
  end
192
195
  end
193
196
 
197
+ # Returns dimensions (1-indexed)
198
+ def sheet_dimensions(xsheet)
199
+ column, row = *last_cell_label(xsheet).match(/([A-Z]+)([0-9]+)/).captures
200
+ [column_letter_to_number(column), row.to_i]
201
+ end
202
+
203
+ # formula fits an exponential factorial function of the form:
204
+ # 'A' = 1
205
+ # 'B' = 2
206
+ # 'Z' = 26
207
+ # 'AA' = 26 * 1 + 1
208
+ # 'AZ' = 26 * 1 + 26
209
+ # 'BA' = 26 * 2 + 1
210
+ # 'ZA' = 26 * 26 + 1
211
+ # 'ZZ' = 26 * 26 + 26
212
+ # 'AAA' = 26 * 26 * 1 + 26 * 1 + 1
213
+ # 'AAZ' = 26 * 26 * 1 + 26 * 1 + 26
214
+ # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
215
+ # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
216
+ def column_letter_to_number(column_letter)
217
+ pow = -1
218
+ column_letter.codepoints.reverse.inject(0) do |acc, charcode|
219
+ pow += 1
220
+ acc + 26**pow * (charcode - 64)
221
+ end
222
+ end
194
223
 
195
224
  # Excel doesn't record types for some cells, only its display style, so
196
225
  # we have to back out the type from that style.
@@ -208,7 +237,7 @@ module SimpleXlsxReader
208
237
  # type.
209
238
  def style_types
210
239
  @style_types ||=
211
- xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
240
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
212
241
  style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
213
242
  end
214
243
 
@@ -229,7 +258,7 @@ module SimpleXlsxReader
229
258
  # ex. {164 => :date_time}
230
259
  def custom_style_types
231
260
  @custom_style_types ||=
232
- xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
261
+ xml.styles.xpath('/styleSheet/numFmts/numFmt').
233
262
  inject({}) do |acc, xstyle|
234
263
 
235
264
  acc[xstyle.attributes['numFmtId'].value.to_i] =
@@ -321,11 +350,9 @@ module SimpleXlsxReader
321
350
 
322
351
  if fraction_of_24 # there is a time associated
323
352
  fraction_of_24 = "0.#{fraction_of_24}".to_f
324
- military = fraction_of_24 * 24
325
- hour = military.truncate
326
- minute = ((military % 1) * 60).truncate
353
+ seconds = (fraction_of_24 * 86400).round
327
354
 
328
- return Time.utc(date.year, date.month, date.day, hour, minute)
355
+ return Time.utc(date.year, date.month, date.day) + seconds
329
356
  else
330
357
  return date
331
358
  end
@@ -389,12 +416,12 @@ module SimpleXlsxReader
389
416
  def shared_strings
390
417
  @shared_strings ||= begin
391
418
  if xml.shared_strings
392
- xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
419
+ xml.shared_strings.xpath('/sst/si').map do |xsst|
393
420
  # a shared string can be a single value...
394
- sst = xsst.at_xpath('xmlns:t/text()')
421
+ sst = xsst.at_xpath('t/text()')
395
422
  sst = sst.text if sst
396
423
  # ... or a composite of seperately styled words/characters
397
- sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
424
+ sst ||= xsst.xpath('r/t/text()').map(&:text).join
398
425
  end
399
426
  else
400
427
  []
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "0.9.8"
2
+ VERSION = "1.0.0.pre"
3
3
  end
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.add_dependency 'nokogiri'
16
16
  gem.add_dependency 'rubyzip'
17
17
 
18
- gem.add_development_dependency 'minitest'
18
+ gem.add_development_dependency 'minitest', '>= 5.0'
19
19
  gem.add_development_dependency 'pry'
20
20
 
21
21
  gem.files = `git ls-files`.split($/)
@@ -0,0 +1,18 @@
1
+ require 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:datetimes_file) { File.join(File.dirname(__FILE__),
5
+ 'datetimes.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(datetimes_file) }
8
+
9
+ it 'converts date_times with the correct precision' do
10
+ subject.to_hash.must_equal({
11
+ "Datetimes" =>
12
+ [[Time.parse("2013-08-19 18:29:59 UTC")],
13
+ [Time.parse("2013-08-19 18:30:00 UTC")],
14
+ [Time.parse("2013-08-19 18:30:01 UTC")]]
15
+ })
16
+ end
17
+
18
+ end
Binary file
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
- describe SimpleXlsxReader do
4
+ describe 'SimpleXlsxReader Benchmark' do
5
5
 
6
6
  # n is 0-indexed for us, then converted to 1-indexed for excel
7
7
  def build_row(n)
@@ -58,8 +58,8 @@ describe SimpleXlsxReader do
58
58
  </sheetData>
59
59
  </worksheet>
60
60
  XML
61
- )
62
- base.at_xpath("/xmlns:worksheet/xmlns:sheetData").add_child(build_row(0))
61
+ ).remove_namespaces!
62
+ base.at_xpath("/worksheet/sheetData").add_child(build_row(0))
63
63
 
64
64
  @xml = SimpleXlsxReader::Document::Xml.new.tap do |xml|
65
65
  xml.sheets = [base]
@@ -76,7 +76,7 @@ describe SimpleXlsxReader do
76
76
  </cellXfs>
77
77
  </styleSheet>
78
78
  XML
79
- )
79
+ ).remove_namespaces!
80
80
  end
81
81
 
82
82
  # Every new sheet has one more row
@@ -84,8 +84,8 @@ describe SimpleXlsxReader do
84
84
  sheet = base.clone
85
85
 
86
86
  range.times do |n|
87
- sheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").last.
88
- add_next_sibling(build_row(n))
87
+ sheet.xpath("/worksheet/sheetData/row").last.
88
+ add_next_sibling(build_row(n+1))
89
89
  end
90
90
 
91
91
  @xml.sheets[range] = sheet
@@ -93,19 +93,19 @@ describe SimpleXlsxReader do
93
93
  end
94
94
 
95
95
  def self.bench_range
96
- bench_exp(1,1000)
96
+ bench_exp(1,10000)
97
97
  end
98
98
 
99
99
  bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
100
100
 
101
- raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.count}"\
101
+ raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
102
102
  if @xml.sheets[n].nil?
103
103
 
104
104
  sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
105
105
  parse_sheet('test', @xml.sheets[n])
106
106
 
107
- raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.count}"\
108
- if sheet.rows.count != n + 1
107
+ raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
108
+ if sheet.rows.size != n + 1
109
109
  end
110
110
 
111
111
  end
@@ -44,12 +44,12 @@ describe SimpleXlsxReader do
44
44
  end
45
45
 
46
46
  it 'reads time styles' do
47
- described_class.cast('41505.77084', nil, :time).
47
+ described_class.cast('41505.77083', nil, :time).
48
48
  must_equal Time.parse('2013-08-19 18:30 UTC')
49
49
  end
50
50
 
51
51
  it 'reads date_time styles' do
52
- described_class.cast('41505.77084', nil, :date_time).
52
+ described_class.cast('41505.77083', nil, :date_time).
53
53
  must_equal Time.parse('2013-08-19 18:30 UTC')
54
54
  end
55
55
 
@@ -59,12 +59,12 @@ describe SimpleXlsxReader do
59
59
  end
60
60
 
61
61
  it 'reads number types styled as times' do
62
- described_class.cast('41505.77084', 'n', :time).
62
+ described_class.cast('41505.77083', 'n', :time).
63
63
  must_equal Time.parse('2013-08-19 18:30 UTC')
64
64
  end
65
65
 
66
66
  it 'reads number types styled as date_times' do
67
- described_class.cast('41505.77084', 'n', :date_time).
67
+ described_class.cast('41505.77083', 'n', :date_time).
68
68
  must_equal Time.parse('2013-08-19 18:30 UTC')
69
69
  end
70
70
  end
@@ -73,7 +73,7 @@ describe SimpleXlsxReader do
73
73
  let(:xml) do
74
74
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
75
75
  xml.shared_strings = Nokogiri::XML(File.read(
76
- File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
76
+ File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
77
77
  end
78
78
  end
79
79
 
@@ -92,7 +92,7 @@ describe SimpleXlsxReader do
92
92
  let(:xml) do
93
93
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
94
94
  xml.styles = Nokogiri::XML(File.read(
95
- File.join(File.dirname(__FILE__), 'styles.xml') ))
95
+ File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
96
96
  end
97
97
  end
98
98
 
@@ -105,7 +105,7 @@ describe SimpleXlsxReader do
105
105
  end
106
106
  end
107
107
 
108
- describe '#last_column' do
108
+ describe '#last_cell_label' do
109
109
 
110
110
  let(:generic_style) do
111
111
  Nokogiri::XML(
@@ -116,7 +116,7 @@ describe SimpleXlsxReader do
116
116
  </cellXfs>
117
117
  </styleSheet>
118
118
  XML
119
- )
119
+ ).remove_namespaces!
120
120
  end
121
121
 
122
122
  # Note, this is not a valid sheet, since the last cell is actually D1 but
@@ -141,7 +141,7 @@ describe SimpleXlsxReader do
141
141
  </sheetData>
142
142
  </worksheet>
143
143
  XML
144
- )
144
+ ).remove_namespaces!
145
145
  end
146
146
 
147
147
  let(:empty_sheet) do
@@ -153,7 +153,7 @@ describe SimpleXlsxReader do
153
153
  </sheetData>
154
154
  </worksheet>
155
155
  XML
156
- )
156
+ ).remove_namespaces!
157
157
  end
158
158
 
159
159
  let(:xml) do
@@ -166,21 +166,45 @@ describe SimpleXlsxReader do
166
166
  subject { described_class.new(xml) }
167
167
 
168
168
  it 'uses /worksheet/dimension if available' do
169
- subject.last_column(sheet).must_equal 'C'
169
+ subject.last_cell_label(sheet).must_equal 'C1'
170
170
  end
171
171
 
172
172
  it 'uses the last header cell if /worksheet/dimension is missing' do
173
- sheet.xpath('/xmlns:worksheet/xmlns:dimension').remove
174
- subject.last_column(sheet).must_equal 'D'
173
+ sheet.xpath('/worksheet/dimension').remove
174
+ subject.last_cell_label(sheet).must_equal 'D1'
175
175
  end
176
176
 
177
- it 'returns "A" if the dimension is just one cell' do
178
- subject.last_column(empty_sheet).must_equal 'A'
177
+ it 'returns "A1" if the dimension is just one cell' do
178
+ subject.last_cell_label(empty_sheet).must_equal 'A1'
179
179
  end
180
180
 
181
- it 'returns "A" if the sheet is just one cell, but /worksheet/dimension is missing' do
182
- sheet.at_xpath('/xmlns:worksheet/xmlns:dimension').remove
183
- subject.last_column(empty_sheet).must_equal 'A'
181
+ it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
182
+ sheet.at_xpath('/worksheet/dimension').remove
183
+ subject.last_cell_label(empty_sheet).must_equal 'A1'
184
+ end
185
+ end
186
+
187
+ describe '#column_letter_to_number' do
188
+ let(:subject) { described_class.new }
189
+
190
+ [ ['A', 1 ],
191
+ ['B', 2 ],
192
+ ['Z', 26 ],
193
+ ['AA', 27 ],
194
+ ['AB', 28 ],
195
+ ['AZ', 52 ],
196
+ ['BA', 53 ],
197
+ ['BZ', 78 ],
198
+ ['ZZ', 702 ],
199
+ ['AAA', 703 ],
200
+ ['AAZ', 728 ],
201
+ ['ABA', 729 ],
202
+ ['ABZ', 754 ],
203
+ ['AZZ', 1378 ],
204
+ ['ZZZ', 18278] ].each do |(letter, number)|
205
+ it "converts #{letter} to #{number}" do
206
+ subject.column_letter_to_number(letter).must_equal number
207
+ end
184
208
  end
185
209
  end
186
210
 
@@ -204,7 +228,7 @@ describe SimpleXlsxReader do
204
228
  </sheetData>
205
229
  </worksheet>
206
230
  XML
207
- )]
231
+ ).remove_namespaces!]
208
232
 
209
233
  # s='0' above refers to the value of numFmtId at cellXfs index 0
210
234
  xml.styles = Nokogiri::XML(
@@ -215,7 +239,7 @@ describe SimpleXlsxReader do
215
239
  </cellXfs>
216
240
  </styleSheet>
217
241
  XML
218
- )
242
+ ).remove_namespaces!
219
243
  end
220
244
  end
221
245
 
@@ -264,7 +288,7 @@ describe SimpleXlsxReader do
264
288
  </sheetData>
265
289
  </worksheet>
266
290
  XML
267
- )]
291
+ ).remove_namespaces!]
268
292
 
269
293
  # s='0' above refers to the value of numFmtId at cellXfs index 0,
270
294
  # which is in this case 'General' type
@@ -278,7 +302,7 @@ describe SimpleXlsxReader do
278
302
  </cellXfs>
279
303
  </styleSheet>
280
304
  XML
281
- )
305
+ ).remove_namespaces!
282
306
  end
283
307
  end
284
308
 
@@ -318,5 +342,68 @@ describe SimpleXlsxReader do
318
342
  @row[6].must_equal 'Cell G1'
319
343
  end
320
344
  end
345
+
346
+ describe 'parsing documents with blank rows' do
347
+ let(:xml) do
348
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
349
+ xml.sheets = [Nokogiri::XML(
350
+ <<-XML
351
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
352
+ <dimension ref="A1:D7" />
353
+ <sheetData>
354
+ <row r="2" spans="1:1">
355
+ <c r="A2" s="0">
356
+ <v>0</v>
357
+ </c>
358
+ </row>
359
+ <row r="4" spans="1:1">
360
+ <c r="B4" s="0">
361
+ <v>1</v>
362
+ </c>
363
+ </row>
364
+ <row r="5" spans="1:1">
365
+ <c r="C5" s="0">
366
+ <v>2</v>
367
+ </c>
368
+ </row>
369
+ <row r="7" spans="1:1">
370
+ <c r="D7" s="0">
371
+ <v>3</v>
372
+ </c>
373
+ </row>
374
+ </sheetData>
375
+ </worksheet>
376
+ XML
377
+ ).remove_namespaces!]
378
+
379
+ xml.styles = Nokogiri::XML(
380
+ <<-XML
381
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
382
+ <cellXfs count="1">
383
+ <xf numFmtId="0" />
384
+ </cellXfs>
385
+ </styleSheet>
386
+ XML
387
+ ).remove_namespaces!
388
+ end
389
+ end
390
+
391
+ before do
392
+ @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
393
+ end
394
+
395
+ it "reads row data despite gaps in row numbering" do
396
+ @rows.must_equal [
397
+ [nil,nil,nil,nil],
398
+ ["0",nil,nil,nil],
399
+ [nil,nil,nil,nil],
400
+ [nil,"1",nil,nil],
401
+ [nil,nil,"2",nil],
402
+ [nil,nil,nil,nil],
403
+ [nil,nil,nil,"3"]
404
+ ]
405
+ end
406
+ end
407
+
321
408
  end
322
409
  end
data/test/test_helper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  gem 'minitest'
2
- require 'minitest/spec'
3
2
  require 'minitest/autorun'
3
+ require 'minitest/spec'
4
4
  require 'pry'
5
5
 
6
6
  $:.unshift File.expand_path("lib")
metadata CHANGED
@@ -1,69 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.8
4
+ version: 1.0.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-13 00:00:00.000000000 Z
11
+ date: 2014-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rubyzip
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '5.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '5.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  description: Read xlsx data the Ruby way
@@ -73,7 +73,7 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
- - .gitignore
76
+ - ".gitignore"
77
77
  - CHANGELOG.md
78
78
  - Gemfile
79
79
  - LICENSE.txt
@@ -82,6 +82,8 @@ files:
82
82
  - lib/simple_xlsx_reader.rb
83
83
  - lib/simple_xlsx_reader/version.rb
84
84
  - simple_xlsx_reader.gemspec
85
+ - test/datetime_test.rb
86
+ - test/datetimes.xlsx
85
87
  - test/performance_test.rb
86
88
  - test/sesame_street_blog.xlsx
87
89
  - test/shared_strings.xml
@@ -97,21 +99,23 @@ require_paths:
97
99
  - lib
98
100
  required_ruby_version: !ruby/object:Gem::Requirement
99
101
  requirements:
100
- - - '>='
102
+ - - ">="
101
103
  - !ruby/object:Gem::Version
102
104
  version: '0'
103
105
  required_rubygems_version: !ruby/object:Gem::Requirement
104
106
  requirements:
105
- - - '>='
107
+ - - ">"
106
108
  - !ruby/object:Gem::Version
107
- version: '0'
109
+ version: 1.3.1
108
110
  requirements: []
109
111
  rubyforge_project:
110
- rubygems_version: 2.0.3
112
+ rubygems_version: 2.2.0
111
113
  signing_key:
112
114
  specification_version: 4
113
115
  summary: Read xlsx data the Ruby way
114
116
  test_files:
117
+ - test/datetime_test.rb
118
+ - test/datetimes.xlsx
115
119
  - test/performance_test.rb
116
120
  - test/sesame_street_blog.xlsx
117
121
  - test/shared_strings.xml