simple_xlsx_reader 1.0.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,39 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'test_helper'
2
4
  require 'time'
3
5
 
4
6
  SXR = SimpleXlsxReader
5
7
 
6
8
  describe SimpleXlsxReader do
9
+ let(:sesame_street_blog_file) do
10
+ File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx')
11
+ end
12
+
13
+ let(:document) { SimpleXlsxReader.open(sesame_street_blog_file) }
14
+
15
+ ##
16
+ # A high-level acceptance test testing misc features such as date/time parsing,
17
+ # hyperlinks (both function and ref kinds), formula dates, emty rows, etc.
18
+
7
19
  let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
8
20
  let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
21
+
9
22
  let(:expected_result) do
10
23
  {
11
- "Authors"=>
12
- [["Name", "Occupation"],
13
- ["Big Bird", "Teacher"]],
14
- "Posts"=>
15
- [["Author Name", "Title", "Body", "Created At", "Comment Count", "URL"],
16
- ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1, SXR::Hyperlink.new("http://www.example.com/hyperlink-function", "This uses the HYPERLINK() function")],
17
- ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2, SXR::Hyperlink.new("http://www.example.com/hyperlink-gui", "This uses the hyperlink GUI option")],
18
- ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0, nil],
19
- ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil, nil]]
24
+ 'Authors' =>
25
+ [
26
+ ['Name', 'Occupation'],
27
+ ['Big Bird', 'Teacher']
28
+ ],
29
+ 'Posts' =>
30
+ [
31
+ ['Author Name', 'Title', 'Body', 'Created At', 'Comment Count', 'URL'],
32
+ ['Big Bird', 'The Number 1', 'The Greatest', Time.parse('2002-01-01 11:00:00 UTC'), 1, SXR::Hyperlink.new('http://www.example.com/hyperlink-function', 'This uses the HYPERLINK() function')],
33
+ ['Big Bird', 'The Number 2', 'Second Best', Time.parse('2002-01-02 14:00:00 UTC'), 2, SXR::Hyperlink.new('http://www.example.com/hyperlink-gui', 'This uses the hyperlink GUI option')],
34
+ ['Big Bird', 'Formula Dates', 'Tricky tricky', Time.parse('2002-01-03 14:00:00 UTC'), 0, nil],
35
+ ['Empty Eagress', nil, 'The title, date, and comment have types, but no values', nil, nil, nil]
36
+ ]
20
37
  }
21
38
  end
22
39
 
@@ -25,7 +42,7 @@ describe SimpleXlsxReader do
25
42
  let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
26
43
 
27
44
  it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
28
- subject.to_hash.must_equal(expected_result)
45
+ _(subject.to_hash).must_equal(expected_result)
29
46
  end
30
47
  end
31
48
 
@@ -33,162 +50,557 @@ describe SimpleXlsxReader do
33
50
  let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
34
51
 
35
52
  it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
36
- subject.to_hash.must_equal(expected_result)
53
+ _(subject.to_hash).must_equal(expected_result)
37
54
  end
38
55
  end
56
+
57
+ it 'outputs strings in UTF-8 encoding' do
58
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
59
+ _(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
60
+ .must_equal [Encoding::UTF_8]
61
+ end
62
+
63
+ it 'can use all our enumerable nicities without slurping' do
64
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
65
+
66
+ headers = {
67
+ name: 'Author Name',
68
+ title: 'Title',
69
+ body: 'Body',
70
+ created_at: 'Created At',
71
+ count: /Count/
72
+ }
73
+
74
+ rows = document.sheets[1].rows
75
+ result =
76
+ rows.each(headers: headers).with_index.with_object({}) do |(row, i), acc|
77
+ acc[i] = row
78
+ end
79
+
80
+ _(result[0]).must_equal(
81
+ name: 'Big Bird',
82
+ title: 'The Number 1',
83
+ body: 'The Greatest',
84
+ created_at: Time.parse('2002-01-01 11:00:00 UTC'),
85
+ count: 1,
86
+ "URL" => 'http://www.example.com/hyperlink-function'
87
+ )
88
+
89
+ _(rows.slurped?).must_equal false
90
+ end
39
91
  end
40
92
 
41
- describe SimpleXlsxReader::Document do
42
- describe 'load from file path' do
43
- let(:subject) { SimpleXlsxReader::Document.new(file_path: sesame_street_blog_file_path) }
93
+ ##
94
+ # For more fine-grained unit tests, we sometimes build our own workbook via
95
+ # Nokogiri. TestXlsxBuilder has some defaults, and this let-style lets us
96
+ # concisely override them in nested describe blocks.
97
+
98
+ let(:shared_strings) { nil }
99
+ let(:styles) { nil }
100
+ let(:sheet) { nil }
101
+ let(:workbook) { nil }
102
+ let(:rels) { nil }
103
+
104
+ let(:xlsx) do
105
+ TestXlsxBuilder.new(
106
+ shared_strings: shared_strings,
107
+ styles: styles,
108
+ sheets: sheet && [sheet],
109
+ workbook: workbook,
110
+ rels: rels
111
+ )
112
+ end
44
113
 
45
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
46
- subject.to_hash.must_equal(expected_result)
47
- end
114
+ let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
115
+
116
+ describe 'Sheet#rows#each(headers: true)' do
117
+ let(:sheet) do
118
+ <<~XML
119
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
120
+ <dimension ref="A1:B3" />
121
+ <sheetData>
122
+ <row r="1">
123
+ <c r="A1" s="0">
124
+ <v>Header 1</v>
125
+ </c>
126
+ <c r="B1" s="0">
127
+ <v>Header 2</v>
128
+ </c>
129
+ </row>
130
+ <row r="2">
131
+ <c r="A2" s="0">
132
+ <v>Data 1-A</v>
133
+ </c>
134
+ <c r="B2" s="0">
135
+ <v>Data 1-B</v>
136
+ </c>
137
+ </row>
138
+ <row r="4">
139
+ <c r="A4" s="0">
140
+ <v>Data 2-A</v>
141
+ </c>
142
+ <c r="B4" s="0">
143
+ <v>Data 2-B</v>
144
+ </c>
145
+ </row>
146
+ </sheetData>
147
+ </worksheet>
148
+ XML
48
149
  end
49
150
 
50
- describe 'load from buffer' do
51
- let(:subject) { SimpleXlsxReader::Document.new(string_or_io: sesame_street_blog_io) }
151
+ it 'yields rows as hashes' do
152
+ acc = []
52
153
 
53
- it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
54
- subject.to_hash.must_equal(expected_result)
154
+ reader.sheets[0].rows.each(headers: true) do |row|
155
+ acc << row
55
156
  end
157
+
158
+ _(acc).must_equal(
159
+ [
160
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
161
+ { 'Header 1' => nil, 'Header 2' => nil },
162
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
163
+ ]
164
+ )
165
+ end
166
+ end
167
+
168
+ describe 'Sheet#rows#each(headers: ->(row) {...})' do
169
+ let(:sheet) do
170
+ <<~XML
171
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
172
+ <dimension ref="A1:B7" />
173
+ <sheetData>
174
+ <row r="1">
175
+ <c r="A1" s="0">
176
+ <v>a chart or something</v>
177
+ </c>
178
+ <c r="B1" s="0">
179
+ <v>Rabble rabble</v>
180
+ </c>
181
+ </row>
182
+ <row r="2">
183
+ <c r="A2" s="0">
184
+ <v>Chatty junk</v>
185
+ </c>
186
+ <c r="B2" s="0">
187
+ <v></v>
188
+ </c>
189
+ </row>
190
+ <row r="4">
191
+ <c r="A4" s="0">
192
+ <v>Header 1</v>
193
+ </c>
194
+ <c r="B4" s="0">
195
+ <v>Header 2</v>
196
+ </c>
197
+ </row>
198
+ <row r="5">
199
+ <c r="A5" s="0">
200
+ <v>Data 1-A</v>
201
+ </c>
202
+ <c r="B5" s="0">
203
+ <v>Data 1-B</v>
204
+ </c>
205
+ </row>
206
+ <row r="7">
207
+ <c r="A7" s="0">
208
+ <v>Data 2-A</v>
209
+ </c>
210
+ <c r="B7" s="0">
211
+ <v>Data 2-B</v>
212
+ </c>
213
+ </row>
214
+ </sheetData>
215
+ </worksheet>
216
+ XML
56
217
  end
57
218
 
58
- describe 'load from file path (legacy API)' do
59
- let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file_path) }
219
+ it 'yields rows as hashes' do
220
+ acc = []
60
221
 
61
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
62
- subject.to_hash.must_equal(expected_result)
222
+ finder = ->(row) { row.find {|c| c&.match(/Header/)} }
223
+ reader.sheets[0].rows.each(headers: finder) do |row|
224
+ acc << row
63
225
  end
226
+
227
+ _(acc).must_equal(
228
+ [
229
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
230
+ { 'Header 1' => nil, 'Header 2' => nil },
231
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
232
+ ]
233
+ )
64
234
  end
65
235
  end
66
236
 
67
- describe SimpleXlsxReader::Document::Mapper do
68
- let(:described_class) { SimpleXlsxReader::Document::Mapper }
237
+ describe "Sheet#rows#each(headers: a_hash)" do
238
+ let(:sheet) do
239
+ Nokogiri::XML(
240
+ <<~XML
241
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
242
+ <dimension ref="A1:C7" />
243
+ <sheetData>
244
+ <row r="1">
245
+ <c r="A1" s="0">
246
+ <v>a chart or something</v>
247
+ </c>
248
+ <c r="B1" s="0">
249
+ <v>Rabble rabble</v>
250
+ </c>
251
+ <c r="C1" s="0">
252
+ <v>Rabble rabble</v>
253
+ </c>
254
+ </row>
255
+ <row r="2">
256
+ <c r="A2" s="0">
257
+ <v>Chatty junk</v>
258
+ </c>
259
+ <c r="B2" s="0">
260
+ <v></v>
261
+ </c>
262
+ <c r="C2" s="0">
263
+ <v></v>
264
+ </c>
265
+ </row>
266
+ <row r="4">
267
+ <c r="A4" s="0">
268
+ <v>ID Number</v>
269
+ </c>
270
+ <c r="B4" s="0">
271
+ <v>ExacT</v>
272
+ </c>
273
+ <c r="C4" s="0">
274
+ <v>FOO Name</v>
275
+ </c>
276
+
277
+ </row>
278
+ <row r="5">
279
+ <c r="A5" s="0">
280
+ <v>ID 1-A</v>
281
+ </c>
282
+ <c r="B5" s="0">
283
+ <v>Exact 1-B</v>
284
+ </c>
285
+ <c r="C5" s="0">
286
+ <v>Name 1-C</v>
287
+ </c>
288
+ </row>
289
+ <row r="7">
290
+ <c r="A7" s="0">
291
+ <v>ID 2-A</v>
292
+ </c>
293
+ <c r="B7" s="0">
294
+ <v>Exact 2-B</v>
295
+ </c>
296
+ <c r="C7" s="0">
297
+ <v>Name 2-C</v>
298
+ </c>
299
+ </row>
300
+ </sheetData>
301
+ </worksheet>
302
+ XML
303
+ )
304
+ end
305
+
306
+ it 'transforms headers into symbols based on the header map' do
307
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
308
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
309
+
310
+ _(result).must_equal(
311
+ [
312
+ { id: 'ID 1-A', exact: 'Exact 1-B', name: 'Name 1-C' },
313
+ { id: nil, exact: nil, name: nil },
314
+ { id: 'ID 2-A', exact: 'Exact 2-B', name: 'Name 2-C' },
315
+ ]
316
+ )
317
+ end
318
+
319
+ it 'if a match isnt found, uses un-matched header name' do
320
+ sheet.xpath("//*[text() = 'ExacT']")
321
+ .first.children.first.content = 'not ExacT'
322
+
323
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
324
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
325
+
326
+ _(result).must_equal(
327
+ [
328
+ { id: 'ID 1-A', 'not ExacT' => 'Exact 1-B', name: 'Name 1-C' },
329
+ { id: nil, 'not ExacT' => nil, name: nil },
330
+ { id: 'ID 2-A', 'not ExacT' => 'Exact 2-B', name: 'Name 2-C' },
331
+ ]
332
+ )
333
+ end
334
+ end
335
+
336
+ describe 'Sheet#rows[]' do
337
+ it 'raises a RuntimeError if rows not slurped yet' do
338
+ _(-> { reader.sheets[0].rows[1] }).must_raise(RuntimeError)
339
+ end
340
+
341
+ it 'works if the rows have been slurped' do
342
+ _(reader.sheets[0].rows.tap(&:slurp)[0]).must_equal(
343
+ ['Cell A', 'Cell B', 'Cell C']
344
+ )
345
+ end
346
+
347
+ it 'works if the config allows auto slurping' do
348
+ SimpleXlsxReader.configuration.auto_slurp = true
349
+
350
+ _(reader.sheets[0].rows[0]).must_equal(
351
+ ['Cell A', 'Cell B', 'Cell C']
352
+ )
353
+
354
+ SimpleXlsxReader.configuration.auto_slurp = false
355
+ end
356
+ end
357
+
358
+ describe 'Sheet#rows#slurp' do
359
+ let(:rows) { reader.sheets[0].rows.tap(&:slurp) }
360
+
361
+ it 'loads the sheet parser results into memory' do
362
+ _(rows.slurped).must_equal(
363
+ [['Cell A', 'Cell B', 'Cell C']]
364
+ )
365
+ end
366
+
367
+ it '#each and #map use slurped results' do
368
+ _(rows.map(&:reverse)).must_equal(
369
+ [['Cell C', 'Cell B', 'Cell A']]
370
+ )
371
+ end
372
+ end
373
+
374
+ describe 'Sheet#rows#each' do
375
+ let(:sheet) do
376
+ <<~XML
377
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
378
+ <dimension ref="A1:B3" />
379
+ <sheetData>
380
+ <row r="1">
381
+ <c r="A1" s="0">
382
+ <v>Header 1</v>
383
+ </c>
384
+ <c r="B1" s="0">
385
+ <v>Header 2</v>
386
+ </c>
387
+ </row>
388
+ <row r="2">
389
+ <c r="A2" s="0">
390
+ <v>Data 1-A</v>
391
+ </c>
392
+ <c r="B2" s="0">
393
+ <v>Data 1-B</v>
394
+ </c>
395
+ </row>
396
+ <row r="4">
397
+ <c r="A4" s="0">
398
+ <v>Data 2-A</v>
399
+ </c>
400
+ <c r="B4" s="0">
401
+ <v>Data 2-B</v>
402
+ </c>
403
+ </row>
404
+ </sheetData>
405
+ </worksheet>
406
+ XML
407
+ end
408
+
409
+ let(:rows) { reader.sheets[0].rows }
410
+
411
+ it 'with no block, returns an enumerator when not slurped' do
412
+ _(rows.each.class).must_equal Enumerator
413
+ end
414
+
415
+ it 'with no block, passes on header argument in enumerator' do
416
+ _(rows.each(headers: true).inspect).must_match 'headers: true'
417
+ end
418
+
419
+ it 'returns an enumerator when slurped' do
420
+ rows.slurp
421
+ _(rows.each.class).must_equal Enumerator
422
+ end
423
+ end
424
+
425
+ describe 'Sheet#rows#map' do
426
+ let(:sheet) do
427
+ <<~XML
428
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
429
+ <dimension ref="A1:B3" />
430
+ <sheetData>
431
+ <row r="1">
432
+ <c r="A1" s="0">
433
+ <v>Header 1</v>
434
+ </c>
435
+ <c r="B1" s="0">
436
+ <v>Header 2</v>
437
+ </c>
438
+ </row>
439
+ <row r="2">
440
+ <c r="A2" s="0">
441
+ <v>Data 1-A</v>
442
+ </c>
443
+ <c r="B2" s="0">
444
+ <v>Data 1-B</v>
445
+ </c>
446
+ </row>
447
+ <row r="4">
448
+ <c r="A4" s="0">
449
+ <v>Data 2-A</v>
450
+ </c>
451
+ <c r="B4" s="0">
452
+ <v>Data 2-B</v>
453
+ </c>
454
+ </row>
455
+ </sheetData>
456
+ </worksheet>
457
+ XML
458
+ end
459
+
460
+ let(:rows) { reader.sheets[0].rows }
461
+
462
+ it 'does not slurp' do
463
+ _(rows.map(&:first)).must_equal(
464
+ ["Header 1", "Data 1-A", nil, "Data 2-A"]
465
+ )
466
+ _(rows.slurped?).must_equal false
467
+ end
468
+ end
469
+
470
+ describe 'Sheet#headers' do
471
+ let(:doc_sheet) { reader.sheets[0] }
472
+
473
+ it 'raises a RuntimeError if rows not slurped yet' do
474
+ _(-> { doc_sheet.headers }).must_raise(RuntimeError)
475
+ end
476
+
477
+ it 'returns first row if slurped' do
478
+ _(doc_sheet.tap(&:slurp).headers).must_equal(
479
+ ['Cell A', 'Cell B', 'Cell C']
480
+ )
481
+ end
482
+
483
+ it 'returns first row if auto_slurp' do
484
+ SimpleXlsxReader.configuration.auto_slurp = true
485
+
486
+ _(doc_sheet.headers).must_equal(
487
+ ['Cell A', 'Cell B', 'Cell C']
488
+ )
489
+
490
+ SimpleXlsxReader.configuration.auto_slurp = false
491
+ end
492
+ end
493
+
494
+ describe SimpleXlsxReader::Loader do
495
+ let(:described_class) { SimpleXlsxReader::Loader }
69
496
 
70
497
  describe '::cast' do
71
498
  it 'reads type s as a shared string' do
72
- described_class.cast('1', 's', nil, :shared_strings => ['a', 'b', 'c']).
73
- must_equal 'b'
499
+ _(described_class.cast('1', 's', nil, shared_strings: %w[a b c]))
500
+ .must_equal 'b'
74
501
  end
75
502
 
76
503
  it 'reads type inlineStr as a string' do
77
- described_class.cast('the value', nil, 'inlineStr').
78
- must_equal 'the value'
504
+ _(described_class.cast('the value', nil, 'inlineStr'))
505
+ .must_equal 'the value'
79
506
  end
80
507
 
81
508
  it 'reads date styles' do
82
- described_class.cast('41505', nil, :date).
83
- must_equal Date.parse('2013-08-19')
509
+ _(described_class.cast('41505', nil, :date))
510
+ .must_equal Date.parse('2013-08-19')
84
511
  end
85
512
 
86
513
  it 'reads time styles' do
87
- described_class.cast('41505.77083', nil, :time).
88
- must_equal Time.parse('2013-08-19 18:30 UTC')
514
+ _(described_class.cast('41505.77083', nil, :time))
515
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
89
516
  end
90
517
 
91
518
  it 'reads date_time styles' do
92
- described_class.cast('41505.77083', nil, :date_time).
93
- must_equal Time.parse('2013-08-19 18:30 UTC')
519
+ _(described_class.cast('41505.77083', nil, :date_time))
520
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
94
521
  end
95
522
 
96
523
  it 'reads number types styled as dates' do
97
- described_class.cast('41505', 'n', :date).
98
- must_equal Date.parse('2013-08-19')
524
+ _(described_class.cast('41505', 'n', :date))
525
+ .must_equal Date.parse('2013-08-19')
99
526
  end
100
527
 
101
528
  it 'reads number types styled as times' do
102
- described_class.cast('41505.77083', 'n', :time).
103
- must_equal Time.parse('2013-08-19 18:30 UTC')
529
+ _(described_class.cast('41505.77083', 'n', :time))
530
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
104
531
  end
105
532
 
106
533
  it 'reads less-than-zero complex number types styled as times' do
107
- described_class.cast('6.25E-2', 'n', :time).
108
- must_equal Time.parse('1899-12-30 01:30:00 UTC')
534
+ _(described_class.cast('6.25E-2', 'n', :time))
535
+ .must_equal Time.parse('1899-12-30 01:30:00 UTC')
109
536
  end
110
537
 
111
538
  it 'reads number types styled as date_times' do
112
- described_class.cast('41505.77083', 'n', :date_time).
113
- must_equal Time.parse('2013-08-19 18:30 UTC')
539
+ _(described_class.cast('41505.77083', 'n', :date_time))
540
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
114
541
  end
115
542
 
116
543
  it 'raises when date-styled values are not numerical' do
117
- lambda { described_class.cast('14 is not a valid date', nil, :date) }.
118
- must_raise(ArgumentError)
544
+ _(-> { described_class.cast('14 is not a valid date', nil, :date) })
545
+ .must_raise(ArgumentError)
119
546
  end
120
547
 
121
- describe "with the url option" do
122
- let(:url) { "http://www.example.com/hyperlink" }
548
+ describe 'with the url option' do
549
+ let(:url) { 'http://www.example.com/hyperlink' }
123
550
  it 'creates a hyperlink with a string type' do
124
- described_class.cast("A link", 'str', :string, url: url).
125
- must_equal SXR::Hyperlink.new(url, "A link")
551
+ _(described_class.cast('A link', 'str', :string, url: url))
552
+ .must_equal SXR::Hyperlink.new(url, 'A link')
126
553
  end
127
554
 
128
555
  it 'creates a hyperlink with a shared string type' do
129
- described_class.cast("2", 's', nil, shared_strings: ['a','b','c'], url: url).
130
- must_equal SXR::Hyperlink.new(url, 'c')
556
+ _(described_class.cast('2', 's', nil, shared_strings: %w[a b c], url: url))
557
+ .must_equal SXR::Hyperlink.new(url, 'c')
131
558
  end
132
559
  end
133
560
  end
134
561
 
135
- describe '#shared_strings' do
562
+ describe 'shared_strings' do
136
563
  let(:xml) do
137
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
138
- xml.shared_strings = Nokogiri::XML(File.read(
139
- File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
140
- end
564
+ File.open(File.join(File.dirname(__FILE__), 'shared_strings.xml'))
141
565
  end
142
566
 
143
- subject { described_class.new(xml) }
567
+ let(:ss) { SimpleXlsxReader::Loader::SharedStringsParser.parse(xml) }
144
568
 
145
569
  it 'parses strings formatted at the cell level' do
146
- subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
570
+ _(ss[0..2]).must_equal ['Cell A1', 'Cell B1', 'My Cell']
147
571
  end
148
572
 
149
573
  it 'parses strings formatted at the character level' do
150
- subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
574
+ _(ss[3..5]).must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
575
+ end
576
+
577
+ it 'parses looong strings containing unicode' do
578
+ _(ss[6]).must_include 'It only happens with both unicode *and* really long text.'
151
579
  end
152
580
  end
153
581
 
154
- describe '#style_types' do
155
- let(:xml) do
156
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
157
- xml.styles = Nokogiri::XML(File.read(
158
- File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
159
- end
582
+ describe 'style_types' do
583
+ let(:xml_file) do
584
+ File.open(File.join(File.dirname(__FILE__), 'styles.xml'))
160
585
  end
161
586
 
162
- let(:mapper) do
163
- SimpleXlsxReader::Document::Mapper.new(xml)
587
+ let(:parser) do
588
+ SimpleXlsxReader::Loader::StyleTypesParser.new(xml_file).tap(&:parse)
164
589
  end
165
590
 
166
591
  it 'reads custom formatted styles (numFmtId >= 164)' do
167
- mapper.style_types[1].must_equal :date_time
168
- mapper.custom_style_types[164].must_equal :date_time
592
+ _(parser.style_types[1]).must_equal :date_time
593
+ _(parser.custom_style_types[164]).must_equal :date_time
169
594
  end
170
595
 
171
596
  # something I've seen in the wild; don't think it's correct, but let's be flexible.
172
597
  it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
173
- mapper.style_types[2].must_equal :date_time
174
- mapper.custom_style_types[59].must_equal :date_time
598
+ _(parser.style_types[2]).must_equal :date_time
599
+ _(parser.custom_style_types[59]).must_equal :date_time
175
600
  end
176
601
  end
177
602
 
178
603
  describe '#last_cell_label' do
179
-
180
- let(:generic_style) do
181
- Nokogiri::XML(
182
- <<-XML
183
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
184
- <cellXfs count="1">
185
- <xf numFmtId="0" />
186
- </cellXfs>
187
- </styleSheet>
188
- XML
189
- ).remove_namespaces!
190
- end
191
-
192
604
  # Note, this is not a valid sheet, since the last cell is actually D1 but
193
605
  # the dimension specifies C1. This is just for testing.
194
606
  let(:sheet) do
@@ -214,347 +626,345 @@ describe SimpleXlsxReader do
214
626
  ).remove_namespaces!
215
627
  end
216
628
 
217
- let(:empty_sheet) do
218
- Nokogiri::XML(
219
- <<-XML
220
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
221
- <dimension ref="A1" />
222
- <sheetData>
223
- </sheetData>
224
- </worksheet>
225
- XML
226
- ).remove_namespaces!
227
- end
228
-
229
- let(:xml) do
230
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
231
- xml.sheets = [sheet]
232
- xml.styles = generic_style
629
+ let(:loader) do
630
+ SimpleXlsxReader::Loader.new(nil).tap do |l|
631
+ l.shared_strings = []
632
+ l.sheet_toc = { 'Sheet1': 0 }
633
+ l.style_types = []
634
+ l.base_date = SimpleXlsxReader::DATE_SYSTEM_1900
233
635
  end
234
636
  end
235
637
 
236
- subject { described_class.new(xml) }
638
+ let(:sheet_parser) do
639
+ tempfile = Tempfile.new(['sheet', '.xml'])
640
+ tempfile.write(sheet)
641
+ tempfile.rewind
642
+
643
+ SimpleXlsxReader::Loader::SheetParser.new(
644
+ file_io: tempfile,
645
+ loader: loader
646
+ ).tap { |parser| parser.parse {} }
647
+ end
237
648
 
238
649
  it 'uses /worksheet/dimension if available' do
239
- subject.last_cell_label(sheet).must_equal 'C1'
650
+ _(sheet_parser.last_cell_letter).must_equal 'C'
240
651
  end
241
652
 
242
653
  it 'uses the last header cell if /worksheet/dimension is missing' do
243
- sheet.xpath('/worksheet/dimension').remove
244
- subject.last_cell_label(sheet).must_equal 'D1'
654
+ sheet.at_xpath('/worksheet/dimension').remove
655
+ _(sheet_parser.last_cell_letter).must_equal 'D'
245
656
  end
246
657
 
247
658
  it 'returns "A1" if the dimension is just one cell' do
248
- subject.last_cell_label(empty_sheet).must_equal 'A1'
659
+ sheet.xpath('/worksheet/sheetData/row').remove
660
+ sheet.xpath('/worksheet/dimension').attr('ref', 'A1')
661
+ _(sheet_parser.last_cell_letter).must_equal 'A'
249
662
  end
250
663
 
251
- it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
252
- sheet.at_xpath('/worksheet/dimension').remove
253
- subject.last_cell_label(empty_sheet).must_equal 'A1'
664
+ it 'returns nil if the sheet is just one cell, but /worksheet/dimension is missing' do
665
+ sheet.xpath('/worksheet/sheetData/row').remove
666
+ sheet.xpath('/worksheet/dimension').remove
667
+ _(sheet_parser.last_cell_letter).must_be_nil
254
668
  end
255
669
  end
256
670
 
257
671
  describe '#column_letter_to_number' do
258
- let(:subject) { described_class.new }
259
-
260
- [ ['A', 1 ],
261
- ['B', 2 ],
262
- ['Z', 26 ],
263
- ['AA', 27 ],
264
- ['AB', 28 ],
265
- ['AZ', 52 ],
266
- ['BA', 53 ],
267
- ['BZ', 78 ],
268
- ['ZZ', 702 ],
269
- ['AAA', 703 ],
270
- ['AAZ', 728 ],
271
- ['ABA', 729 ],
272
- ['ABZ', 754 ],
273
- ['AZZ', 1378 ],
274
- ['ZZZ', 18278] ].each do |(letter, number)|
672
+ let(:subject) { SXR::Loader::SheetParser.new(file_io: nil, loader: nil) }
673
+
674
+ [
675
+ ['A', 1],
676
+ ['B', 2],
677
+ ['Z', 26],
678
+ ['AA', 27],
679
+ ['AB', 28],
680
+ ['AZ', 52],
681
+ ['BA', 53],
682
+ ['BZ', 78],
683
+ ['ZZ', 702],
684
+ ['AAA', 703],
685
+ ['AAZ', 728],
686
+ ['ABA', 729],
687
+ ['ABZ', 754],
688
+ ['AZZ', 1378],
689
+ ['ZZZ', 18_278]
690
+ ].each do |(letter, number)|
275
691
  it "converts #{letter} to #{number}" do
276
- subject.column_letter_to_number(letter).must_equal number
692
+ _(subject.column_letter_to_number(letter)).must_equal number
277
693
  end
278
694
  end
279
695
  end
696
+ end
280
697
 
281
- describe "parse errors" do
282
- after do
283
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
284
- end
698
+ describe 'parse errors' do
699
+ after do
700
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
701
+ end
285
702
 
286
- let(:xml) do
287
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
288
- xml.sheets = [Nokogiri::XML(
289
- <<-XML
290
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
291
- <dimension ref="A1:A1" />
292
- <sheetData>
293
- <row>
294
- <c r='A1' s='0'>
295
- <v>14 is a date style; this is not a date</v>
296
- </c>
297
- </row>
298
- </sheetData>
299
- </worksheet>
300
- XML
301
- ).remove_namespaces!]
302
-
303
- # s='0' above refers to the value of numFmtId at cellXfs index 0
304
- xml.styles = Nokogiri::XML(
305
- <<-XML
306
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
307
- <cellXfs count="1">
308
- <xf numFmtId="14" />
309
- </cellXfs>
310
- </styleSheet>
311
- XML
312
- ).remove_namespaces!
313
- end
314
- end
703
+ let(:sheet) do
704
+ Nokogiri::XML(
705
+ <<-XML
706
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
707
+ <dimension ref="A1:A1" />
708
+ <sheetData>
709
+ <row>
710
+ <c r='A1' s='0'>
711
+ <v>14 is a date style; this is not a date</v>
712
+ </c>
713
+ </row>
714
+ </sheetData>
715
+ </worksheet>
716
+ XML
717
+ ).remove_namespaces!
718
+ end
315
719
 
316
- it 'raises if configuration.catch_cell_load_errors' do
317
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
720
+ let(:styles) do
721
+ # s='0' above refers to the value of numFmtId at cellXfs index 0
722
+ Nokogiri::XML(
723
+ <<-XML
724
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
725
+ <cellXfs count="1">
726
+ <xf numFmtId="14" />
727
+ </cellXfs>
728
+ </styleSheet>
729
+ XML
730
+ ).remove_namespaces!
731
+ end
318
732
 
319
- lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first, nil) }.
320
- must_raise(SimpleXlsxReader::CellLoadError)
321
- end
733
+ it 'raises if configuration.catch_cell_load_errors' do
734
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
322
735
 
323
- it 'records a load error if not configuration.catch_cell_load_errors' do
324
- SimpleXlsxReader.configuration.catch_cell_load_errors = true
736
+ _(-> { SimpleXlsxReader.open(xlsx.archive.path).to_hash })
737
+ .must_raise(SimpleXlsxReader::CellLoadError)
738
+ end
325
739
 
326
- sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil)
327
- sheet.load_errors[[0,0]].must_include 'invalid value for Float'
328
- end
740
+ it 'records a load error if not configuration.catch_cell_load_errors' do
741
+ SimpleXlsxReader.configuration.catch_cell_load_errors = true
742
+
743
+ sheet = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].tap(&:slurp)
744
+ _(sheet.load_errors).must_equal(
745
+ [0, 0] => 'invalid value for Float(): "14 is a date style; this is not a date"'
746
+ )
329
747
  end
748
+ end
330
749
 
331
- describe "missing numFmtId attributes" do
750
+ describe 'missing numFmtId attributes' do
751
+ let(:sheet) do
752
+ Nokogiri::XML(
753
+ <<-XML
754
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
755
+ <dimension ref="A1:A1" />
756
+ <sheetData>
757
+ <row>
758
+ <c r='A1' s='s'>
759
+ <v>some content</v>
760
+ </c>
761
+ </row>
762
+ </sheetData>
763
+ </worksheet>
764
+ XML
765
+ ).remove_namespaces!
766
+ end
332
767
 
333
- let(:xml) do
334
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
335
- xml.sheets = [Nokogiri::XML(
336
- <<-XML
337
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
338
- <dimension ref="A1:A1" />
339
- <sheetData>
340
- <row>
341
- <c r='A1' s='s'>
342
- <v>some content</v>
343
- </c>
344
- </row>
345
- </sheetData>
346
- </worksheet>
347
- XML
348
- ).remove_namespaces!]
349
-
350
- xml.styles = Nokogiri::XML(
351
- <<-XML
352
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
353
-
354
- </styleSheet>
355
- XML
356
- ).remove_namespaces!
357
- end
358
- end
768
+ let(:styles) do
769
+ Nokogiri::XML(
770
+ <<-XML
771
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
359
772
 
360
- before do
361
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows[0]
362
- end
773
+ </styleSheet>
774
+ XML
775
+ ).remove_namespaces!
776
+ end
363
777
 
364
- it 'continues even when cells are missing numFmtId attributes ' do
365
- @row[0].must_equal 'some content'
366
- end
778
+ before do
779
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
780
+ end
367
781
 
782
+ it 'continues even when cells are missing numFmtId attributes ' do
783
+ _(@row[0]).must_equal 'some content'
368
784
  end
785
+ end
369
786
 
370
- describe 'parsing types' do
371
- let(:xml) do
372
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
373
- xml.sheets = [Nokogiri::XML(
374
- <<-XML
375
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
376
- <dimension ref="A1:G1" />
377
- <sheetData>
378
- <row>
379
- <c r='A1' s='0'>
380
- <v>Cell A1</v>
381
- </c>
382
-
383
- <c r='C1' s='1'>
384
- <v>2.4</v>
385
- </c>
386
- <c r='D1' s='1' />
387
-
388
- <c r='E1' s='2'>
389
- <v>30687</v>
390
- </c>
391
- <c r='F1' s='2' />
392
-
393
- <c r='G1' t='inlineStr' s='0'>
394
- <is><t>Cell G1</t></is>
395
- </c>
396
-
397
- <c r='H1' s='0'>
398
- <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
399
- <v>HYPERLINK function</v>
400
- </c>
401
-
402
- <c r='I1' s='0'>
403
- <v>GUI-made hyperlink</v>
404
- </c>
405
- </row>
406
- </sheetData>
407
-
408
- <hyperlinks>
409
- <hyperlink ref="I1" id="rId1"/>
410
- </hyperlinks>
411
- </worksheet>
412
- XML
413
- ).remove_namespaces!]
414
-
415
- # s='0' above refers to the value of numFmtId at cellXfs index 0,
416
- # which is in this case 'General' type
417
- xml.styles = Nokogiri::XML(
418
- <<-XML
419
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
420
- <cellXfs count="1">
421
- <xf numFmtId="0" />
422
- <xf numFmtId="2" />
423
- <xf numFmtId="14" />
424
- </cellXfs>
425
- </styleSheet>
426
- XML
427
- ).remove_namespaces!
428
-
429
- # Although not a "type" or "style" according to xlsx spec,
430
- # it sure could/should be, so let's test it with the rest of our
431
- # typecasting code.
432
- xml.sheet_rels = [Nokogiri::XML(
433
- <<-XML
434
- <Relationships>
435
- <Relationship
436
- Id="rId1"
437
- Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
438
- Target="http://www.example.com/hyperlink-gui"
439
- TargetMode="External"
440
- />
441
- </Relationships>
442
- XML
443
- ).remove_namespaces!]
787
+ describe 'parsing types' do
788
+ let(:sheet) do
789
+ Nokogiri::XML(
790
+ <<-XML
791
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
792
+ <dimension ref="A1:G1" />
793
+ <sheetData>
794
+ <row>
795
+ <c r='A1' s='0'>
796
+ <v>Cell A1</v>
797
+ </c>
444
798
 
445
- end
446
- end
799
+ <c r='C1' s='1'>
800
+ <v>2.4</v>
801
+ </c>
802
+ <c r='D1' s='1' />
447
803
 
448
- before do
449
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, xml.sheet_rels.first).rows[0]
450
- end
804
+ <c r='E1' s='2'>
805
+ <v>30687</v>
806
+ </c>
807
+ <c r='F1' s='2' />
451
808
 
452
- it "reads 'Generic' cells as strings" do
453
- @row[0].must_equal "Cell A1"
454
- end
809
+ <c r='G1' t='inlineStr' s='0'>
810
+ <is><t>Cell G1</t></is>
811
+ </c>
455
812
 
456
- it "reads empty 'Generic' cells as nil" do
457
- @row[1].must_equal nil
458
- end
813
+ <c r='H1' s='0'>
814
+ <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
815
+ <v>HYPERLINK function</v>
816
+ </c>
459
817
 
460
- # We could expand on these type tests, but really just a couple
461
- # demonstrate that it's wired together. Type-specific tests should go
462
- # on #cast
818
+ <c r='I1' s='0'>
819
+ <v>GUI-made hyperlink</v>
820
+ </c>
821
+ </row>
822
+ </sheetData>
463
823
 
464
- it "reads floats" do
465
- @row[2].must_equal 2.4
466
- end
824
+ <hyperlinks>
825
+ <hyperlink ref="I1" id="rId1"/>
826
+ </hyperlinks>
827
+ </worksheet>
828
+ XML
829
+ ).remove_namespaces!
830
+ end
467
831
 
468
- it "reads empty floats as nil" do
469
- @row[3].must_equal nil
470
- end
832
+ let(:styles) do
833
+ # s='0' above refers to the value of numFmtId at cellXfs index 0,
834
+ # which is in this case 'General' type
835
+ Nokogiri::XML(
836
+ <<-XML
837
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
838
+ <cellXfs count="1">
839
+ <xf numFmtId="0" />
840
+ <xf numFmtId="2" />
841
+ <xf numFmtId="14" />
842
+ </cellXfs>
843
+ </styleSheet>
844
+ XML
845
+ ).remove_namespaces!
846
+ end
471
847
 
472
- it "reads dates" do
473
- @row[4].must_equal Date.parse('Jan 6, 1984')
474
- end
848
+ # Although not a "type" or "style" according to xlsx spec,
849
+ # it sure could/should be, so let's test it with the rest of our
850
+ # typecasting code.
851
+ let(:rels) do
852
+ [
853
+ Nokogiri::XML(
854
+ <<-XML
855
+ <Relationships>
856
+ <Relationship
857
+ Id="rId1"
858
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
859
+ Target="http://www.example.com/hyperlink-gui"
860
+ TargetMode="External"
861
+ />
862
+ </Relationships>
863
+ XML
864
+ ).remove_namespaces!
865
+ ]
866
+ end
475
867
 
476
- it "reads empty date cells as nil" do
477
- @row[5].must_equal nil
478
- end
868
+ before do
869
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
870
+ end
479
871
 
480
- it "reads strings formatted as inlineStr" do
481
- @row[6].must_equal 'Cell G1'
482
- end
872
+ it "reads 'Generic' cells as strings" do
873
+ _(@row[0]).must_equal 'Cell A1'
874
+ end
483
875
 
484
- it "reads hyperlinks created via HYPERLINK()" do
485
- @row[7].must_equal(
486
- SXR::Hyperlink.new(
487
- "http://www.example.com/hyperlink-function", "HYPERLINK function"))
488
- end
876
+ it "reads empty 'Generic' cells as nil" do
877
+ _(@row[1]).must_be_nil
878
+ end
489
879
 
490
- it "reads hyperlinks created via the GUI" do
491
- @row[8].must_equal(
492
- SXR::Hyperlink.new(
493
- "http://www.example.com/hyperlink-gui", "GUI-made hyperlink"))
494
- end
880
+ # We could expand on these type tests, but really just a couple
881
+ # demonstrate that it's wired together. Type-specific tests should go
882
+ # on #cast
883
+
884
+ it 'reads floats' do
885
+ _(@row[2]).must_equal 2.4
495
886
  end
496
887
 
497
- describe 'parsing documents with blank rows' do
498
- let(:xml) do
499
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
500
- xml.sheets = [Nokogiri::XML(
501
- <<-XML
502
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
503
- <dimension ref="A1:D7" />
504
- <sheetData>
505
- <row r="2" spans="1:1">
506
- <c r="A2" s="0">
507
- <v>0</v>
508
- </c>
509
- </row>
510
- <row r="4" spans="1:1">
511
- <c r="B4" s="0">
512
- <v>1</v>
513
- </c>
514
- </row>
515
- <row r="5" spans="1:1">
516
- <c r="C5" s="0">
517
- <v>2</v>
518
- </c>
519
- </row>
520
- <row r="7" spans="1:1">
521
- <c r="D7" s="0">
522
- <v>3</v>
523
- </c>
524
- </row>
525
- </sheetData>
526
- </worksheet>
527
- XML
528
- ).remove_namespaces!]
529
-
530
- xml.styles = Nokogiri::XML(
531
- <<-XML
532
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
533
- <cellXfs count="1">
534
- <xf numFmtId="0" />
535
- </cellXfs>
536
- </styleSheet>
537
- XML
538
- ).remove_namespaces!
539
- end
540
- end
888
+ it 'reads empty floats as nil' do
889
+ _(@row[3]).must_be_nil
890
+ end
541
891
 
542
- before do
543
- @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows
544
- end
892
+ it 'reads dates' do
893
+ _(@row[4]).must_equal Date.parse('Jan 6, 1984')
894
+ end
545
895
 
546
- it "reads row data despite gaps in row numbering" do
547
- @rows.must_equal [
548
- [nil,nil,nil,nil],
549
- ["0",nil,nil,nil],
550
- [nil,nil,nil,nil],
551
- [nil,"1",nil,nil],
552
- [nil,nil,"2",nil],
553
- [nil,nil,nil,nil],
554
- [nil,nil,nil,"3"]
555
- ]
556
- end
896
+ it 'reads empty date cells as nil' do
897
+ _(@row[5]).must_be_nil
898
+ end
899
+
900
+ it 'reads strings formatted as inlineStr' do
901
+ _(@row[6]).must_equal 'Cell G1'
557
902
  end
558
903
 
904
+ it 'reads hyperlinks created via HYPERLINK()' do
905
+ _(@row[7]).must_equal(
906
+ SXR::Hyperlink.new(
907
+ 'http://www.example.com/hyperlink-function', 'HYPERLINK function'
908
+ )
909
+ )
910
+ end
911
+
912
+ it 'reads hyperlinks created via the GUI' do
913
+ _(@row[8]).must_equal(
914
+ SXR::Hyperlink.new(
915
+ 'http://www.example.com/hyperlink-gui', 'GUI-made hyperlink'
916
+ )
917
+ )
918
+ end
919
+ end
920
+
921
+ describe 'parsing documents with blank rows' do
922
+ let(:sheet) do
923
+ Nokogiri::XML(
924
+ <<-XML
925
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
926
+ <dimension ref="A1:D7" />
927
+ <sheetData>
928
+ <row r="2" spans="1:1">
929
+ <c r="A2" s="0">
930
+ <v>0</v>
931
+ </c>
932
+ </row>
933
+ <row r="4" spans="1:1">
934
+ <c r="B4" s="0">
935
+ <v>1</v>
936
+ </c>
937
+ </row>
938
+ <row r="5" spans="1:1">
939
+ <c r="C5" s="0">
940
+ <v>2</v>
941
+ </c>
942
+ </row>
943
+ <row r="7" spans="1:1">
944
+ <c r="D7" s="0">
945
+ <v>3</v>
946
+ </c>
947
+ </row>
948
+ </sheetData>
949
+ </worksheet>
950
+ XML
951
+ ).remove_namespaces!
952
+ end
953
+
954
+ before do
955
+ @rows = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a
956
+ end
957
+
958
+ it 'reads row data despite gaps in row numbering' do
959
+ _(@rows).must_equal [
960
+ [nil, nil, nil, nil],
961
+ ['0', nil, nil, nil],
962
+ [nil, nil, nil, nil],
963
+ [nil, '1', nil, nil],
964
+ [nil, nil, '2', nil],
965
+ [nil, nil, nil, nil],
966
+ [nil, nil, nil, '3']
967
+ ]
968
+ end
559
969
  end
560
970
  end