simple_xlsx_reader 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,131 +1,606 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'test_helper'
2
4
  require 'time'
3
5
 
6
+ SXR = SimpleXlsxReader
7
+
4
8
  describe SimpleXlsxReader do
5
- let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
- 'sesame_street_blog.xlsx') }
7
-
8
- let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
-
10
- describe '#to_hash' do
11
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
- subject.to_hash.must_equal({
13
- "Authors"=>
14
- [["Name", "Occupation"],
15
- ["Big Bird", "Teacher"]],
16
-
17
- "Posts"=>
18
- [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
- ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
- ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
21
- ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0],
22
- ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil]]
23
- })
9
+ let(:sesame_street_blog_file) do
10
+ File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx')
11
+ end
12
+
13
+ let(:document) { SimpleXlsxReader.open(sesame_street_blog_file) }
14
+
15
+ ##
16
+ # A high-level acceptance test testing misc features such as date/time parsing,
17
+ # hyperlinks (both function and ref kinds), formula dates, emty rows, etc.
18
+
19
+ let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
20
+ let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
21
+
22
+ let(:expected_result) do
23
+ {
24
+ 'Authors' =>
25
+ [
26
+ ['Name', 'Occupation'],
27
+ ['Big Bird', 'Teacher']
28
+ ],
29
+ 'Posts' =>
30
+ [
31
+ ['Author Name', 'Title', 'Body', 'Created At', 'Comment Count', 'URL'],
32
+ ['Big Bird', 'The Number 1', 'The Greatest', Time.parse('2002-01-01 11:00:00 UTC'), 1, SXR::Hyperlink.new('http://www.example.com/hyperlink-function', 'This uses the HYPERLINK() function')],
33
+ ['Big Bird', 'The Number 2', 'Second Best', Time.parse('2002-01-02 14:00:00 UTC'), 2, SXR::Hyperlink.new('http://www.example.com/hyperlink-gui', 'This uses the hyperlink GUI option')],
34
+ ['Big Bird', 'Formula Dates', 'Tricky tricky', Time.parse('2002-01-03 14:00:00 UTC'), 0, nil],
35
+ ['Empty Eagress', nil, 'The title, date, and comment have types, but no values', nil, nil, nil]
36
+ ]
37
+ }
38
+ end
39
+
40
+ describe SimpleXlsxReader do
41
+ describe 'load from file path' do
42
+ let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
43
+
44
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
45
+ _(subject.to_hash).must_equal(expected_result)
46
+ end
47
+ end
48
+
49
+ describe 'load from buffer' do
50
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
51
+
52
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
53
+ _(subject.to_hash).must_equal(expected_result)
54
+ end
55
+ end
56
+
57
+ it 'outputs strings in UTF-8 encoding' do
58
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
59
+ _(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
60
+ .must_equal [Encoding::UTF_8]
61
+ end
62
+
63
+ it 'can use all our enumerable nicities without slurping' do
64
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
65
+
66
+ headers = {
67
+ name: 'Author Name',
68
+ title: 'Title',
69
+ body: 'Body',
70
+ created_at: 'Created At',
71
+ count: /Count/
72
+ }
73
+
74
+ rows = document.sheets[1].rows
75
+ result =
76
+ rows.each(headers: headers).with_index.with_object({}) do |(row, i), acc|
77
+ acc[i] = row
78
+ end
79
+
80
+ _(result[0]).must_equal(
81
+ name: 'Big Bird',
82
+ title: 'The Number 1',
83
+ body: 'The Greatest',
84
+ created_at: Time.parse('2002-01-01 11:00:00 UTC'),
85
+ count: 1,
86
+ "URL" => 'http://www.example.com/hyperlink-function'
87
+ )
88
+
89
+ _(rows.slurped?).must_equal false
90
+ end
91
+ end
92
+
93
+ ##
94
+ # For more fine-grained unit tests, we sometimes build our own workbook via
95
+ # Nokogiri. TestXlsxBuilder has some defaults, and this let-style lets us
96
+ # concisely override them in nested describe blocks.
97
+
98
+ let(:shared_strings) { nil }
99
+ let(:styles) { nil }
100
+ let(:sheet) { nil }
101
+ let(:workbook) { nil }
102
+ let(:rels) { nil }
103
+
104
+ let(:xlsx) do
105
+ TestXlsxBuilder.new(
106
+ shared_strings: shared_strings,
107
+ styles: styles,
108
+ sheets: sheet && [sheet],
109
+ workbook: workbook,
110
+ rels: rels
111
+ )
112
+ end
113
+
114
+ let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
115
+
116
+ describe 'Sheet#rows#each(headers: true)' do
117
+ let(:sheet) do
118
+ <<~XML
119
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
120
+ <dimension ref="A1:B3" />
121
+ <sheetData>
122
+ <row r="1">
123
+ <c r="A1" s="0">
124
+ <v>Header 1</v>
125
+ </c>
126
+ <c r="B1" s="0">
127
+ <v>Header 2</v>
128
+ </c>
129
+ </row>
130
+ <row r="2">
131
+ <c r="A2" s="0">
132
+ <v>Data 1-A</v>
133
+ </c>
134
+ <c r="B2" s="0">
135
+ <v>Data 1-B</v>
136
+ </c>
137
+ </row>
138
+ <row r="4">
139
+ <c r="A4" s="0">
140
+ <v>Data 2-A</v>
141
+ </c>
142
+ <c r="B4" s="0">
143
+ <v>Data 2-B</v>
144
+ </c>
145
+ </row>
146
+ </sheetData>
147
+ </worksheet>
148
+ XML
149
+ end
150
+
151
+ it 'yields rows as hashes' do
152
+ acc = []
153
+
154
+ reader.sheets[0].rows.each(headers: true) do |row|
155
+ acc << row
156
+ end
157
+
158
+ _(acc).must_equal(
159
+ [
160
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
161
+ { 'Header 1' => nil, 'Header 2' => nil },
162
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
163
+ ]
164
+ )
165
+ end
166
+ end
167
+
168
+ describe 'Sheet#rows#each(headers: ->(row) {...})' do
169
+ let(:sheet) do
170
+ <<~XML
171
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
172
+ <dimension ref="A1:B7" />
173
+ <sheetData>
174
+ <row r="1">
175
+ <c r="A1" s="0">
176
+ <v>a chart or something</v>
177
+ </c>
178
+ <c r="B1" s="0">
179
+ <v>Rabble rabble</v>
180
+ </c>
181
+ </row>
182
+ <row r="2">
183
+ <c r="A2" s="0">
184
+ <v>Chatty junk</v>
185
+ </c>
186
+ <c r="B2" s="0">
187
+ <v></v>
188
+ </c>
189
+ </row>
190
+ <row r="4">
191
+ <c r="A4" s="0">
192
+ <v>Header 1</v>
193
+ </c>
194
+ <c r="B4" s="0">
195
+ <v>Header 2</v>
196
+ </c>
197
+ </row>
198
+ <row r="5">
199
+ <c r="A5" s="0">
200
+ <v>Data 1-A</v>
201
+ </c>
202
+ <c r="B5" s="0">
203
+ <v>Data 1-B</v>
204
+ </c>
205
+ </row>
206
+ <row r="7">
207
+ <c r="A7" s="0">
208
+ <v>Data 2-A</v>
209
+ </c>
210
+ <c r="B7" s="0">
211
+ <v>Data 2-B</v>
212
+ </c>
213
+ </row>
214
+ </sheetData>
215
+ </worksheet>
216
+ XML
217
+ end
218
+
219
+ it 'yields rows as hashes' do
220
+ acc = []
221
+
222
+ finder = ->(row) { row.find {|c| c&.match(/Header/)} }
223
+ reader.sheets[0].rows.each(headers: finder) do |row|
224
+ acc << row
225
+ end
226
+
227
+ _(acc).must_equal(
228
+ [
229
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
230
+ { 'Header 1' => nil, 'Header 2' => nil },
231
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
232
+ ]
233
+ )
234
+ end
235
+ end
236
+
237
+ describe "Sheet#rows#each(headers: a_hash)" do
238
+ let(:sheet) do
239
+ Nokogiri::XML(
240
+ <<~XML
241
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
242
+ <dimension ref="A1:C7" />
243
+ <sheetData>
244
+ <row r="1">
245
+ <c r="A1" s="0">
246
+ <v>a chart or something</v>
247
+ </c>
248
+ <c r="B1" s="0">
249
+ <v>Rabble rabble</v>
250
+ </c>
251
+ <c r="C1" s="0">
252
+ <v>Rabble rabble</v>
253
+ </c>
254
+ </row>
255
+ <row r="2">
256
+ <c r="A2" s="0">
257
+ <v>Chatty junk</v>
258
+ </c>
259
+ <c r="B2" s="0">
260
+ <v></v>
261
+ </c>
262
+ <c r="C2" s="0">
263
+ <v></v>
264
+ </c>
265
+ </row>
266
+ <row r="4">
267
+ <c r="A4" s="0">
268
+ <v>ID Number</v>
269
+ </c>
270
+ <c r="B4" s="0">
271
+ <v>ExacT</v>
272
+ </c>
273
+ <c r="C4" s="0">
274
+ <v>FOO Name</v>
275
+ </c>
276
+
277
+ </row>
278
+ <row r="5">
279
+ <c r="A5" s="0">
280
+ <v>ID 1-A</v>
281
+ </c>
282
+ <c r="B5" s="0">
283
+ <v>Exact 1-B</v>
284
+ </c>
285
+ <c r="C5" s="0">
286
+ <v>Name 1-C</v>
287
+ </c>
288
+ </row>
289
+ <row r="7">
290
+ <c r="A7" s="0">
291
+ <v>ID 2-A</v>
292
+ </c>
293
+ <c r="B7" s="0">
294
+ <v>Exact 2-B</v>
295
+ </c>
296
+ <c r="C7" s="0">
297
+ <v>Name 2-C</v>
298
+ </c>
299
+ </row>
300
+ </sheetData>
301
+ </worksheet>
302
+ XML
303
+ )
304
+ end
305
+
306
+ it 'transforms headers into symbols based on the header map' do
307
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
308
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
309
+
310
+ _(result).must_equal(
311
+ [
312
+ { id: 'ID 1-A', exact: 'Exact 1-B', name: 'Name 1-C' },
313
+ { id: nil, exact: nil, name: nil },
314
+ { id: 'ID 2-A', exact: 'Exact 2-B', name: 'Name 2-C' },
315
+ ]
316
+ )
317
+ end
318
+
319
+ it 'if a match isnt found, uses un-matched header name' do
320
+ sheet.xpath("//*[text() = 'ExacT']")
321
+ .first.children.first.content = 'not ExacT'
322
+
323
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
324
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
325
+
326
+ _(result).must_equal(
327
+ [
328
+ { id: 'ID 1-A', 'not ExacT' => 'Exact 1-B', name: 'Name 1-C' },
329
+ { id: nil, 'not ExacT' => nil, name: nil },
330
+ { id: 'ID 2-A', 'not ExacT' => 'Exact 2-B', name: 'Name 2-C' },
331
+ ]
332
+ )
333
+ end
334
+ end
335
+
336
+ describe 'Sheet#rows[]' do
337
+ it 'raises a RuntimeError if rows not slurped yet' do
338
+ _(-> { reader.sheets[0].rows[1] }).must_raise(RuntimeError)
339
+ end
340
+
341
+ it 'works if the rows have been slurped' do
342
+ _(reader.sheets[0].rows.tap(&:slurp)[0]).must_equal(
343
+ ['Cell A', 'Cell B', 'Cell C']
344
+ )
345
+ end
346
+
347
+ it 'works if the config allows auto slurping' do
348
+ SimpleXlsxReader.configuration.auto_slurp = true
349
+
350
+ _(reader.sheets[0].rows[0]).must_equal(
351
+ ['Cell A', 'Cell B', 'Cell C']
352
+ )
353
+
354
+ SimpleXlsxReader.configuration.auto_slurp = false
355
+ end
356
+ end
357
+
358
+ describe 'Sheet#rows#slurp' do
359
+ let(:rows) { reader.sheets[0].rows.tap(&:slurp) }
360
+
361
+ it 'loads the sheet parser results into memory' do
362
+ _(rows.slurped).must_equal(
363
+ [['Cell A', 'Cell B', 'Cell C']]
364
+ )
365
+ end
366
+
367
+ it '#each and #map use slurped results' do
368
+ _(rows.map(&:reverse)).must_equal(
369
+ [['Cell C', 'Cell B', 'Cell A']]
370
+ )
24
371
  end
25
372
  end
26
373
 
27
- describe SimpleXlsxReader::Document::Mapper do
28
- let(:described_class) { SimpleXlsxReader::Document::Mapper }
374
+ describe 'Sheet#rows#each' do
375
+ let(:sheet) do
376
+ <<~XML
377
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
378
+ <dimension ref="A1:B3" />
379
+ <sheetData>
380
+ <row r="1">
381
+ <c r="A1" s="0">
382
+ <v>Header 1</v>
383
+ </c>
384
+ <c r="B1" s="0">
385
+ <v>Header 2</v>
386
+ </c>
387
+ </row>
388
+ <row r="2">
389
+ <c r="A2" s="0">
390
+ <v>Data 1-A</v>
391
+ </c>
392
+ <c r="B2" s="0">
393
+ <v>Data 1-B</v>
394
+ </c>
395
+ </row>
396
+ <row r="4">
397
+ <c r="A4" s="0">
398
+ <v>Data 2-A</v>
399
+ </c>
400
+ <c r="B4" s="0">
401
+ <v>Data 2-B</v>
402
+ </c>
403
+ </row>
404
+ </sheetData>
405
+ </worksheet>
406
+ XML
407
+ end
408
+
409
+ let(:rows) { reader.sheets[0].rows }
410
+
411
+ it 'with no block, returns an enumerator when not slurped' do
412
+ _(rows.each.class).must_equal Enumerator
413
+ end
414
+
415
+ it 'with no block, passes on header argument in enumerator' do
416
+ _(rows.each(headers: true).inspect).must_match 'headers: true'
417
+ end
418
+
419
+ it 'returns an enumerator when slurped' do
420
+ rows.slurp
421
+ _(rows.each.class).must_equal Enumerator
422
+ end
423
+ end
424
+
425
+ describe 'Sheet#rows#map' do
426
+ let(:sheet) do
427
+ <<~XML
428
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
429
+ <dimension ref="A1:B3" />
430
+ <sheetData>
431
+ <row r="1">
432
+ <c r="A1" s="0">
433
+ <v>Header 1</v>
434
+ </c>
435
+ <c r="B1" s="0">
436
+ <v>Header 2</v>
437
+ </c>
438
+ </row>
439
+ <row r="2">
440
+ <c r="A2" s="0">
441
+ <v>Data 1-A</v>
442
+ </c>
443
+ <c r="B2" s="0">
444
+ <v>Data 1-B</v>
445
+ </c>
446
+ </row>
447
+ <row r="4">
448
+ <c r="A4" s="0">
449
+ <v>Data 2-A</v>
450
+ </c>
451
+ <c r="B4" s="0">
452
+ <v>Data 2-B</v>
453
+ </c>
454
+ </row>
455
+ </sheetData>
456
+ </worksheet>
457
+ XML
458
+ end
459
+
460
+ let(:rows) { reader.sheets[0].rows }
461
+
462
+ it 'does not slurp' do
463
+ _(rows.map(&:first)).must_equal(
464
+ ["Header 1", "Data 1-A", nil, "Data 2-A"]
465
+ )
466
+ _(rows.slurped?).must_equal false
467
+ end
468
+ end
469
+
470
+ describe 'Sheet#headers' do
471
+ let(:doc_sheet) { reader.sheets[0] }
472
+
473
+ it 'raises a RuntimeError if rows not slurped yet' do
474
+ _(-> { doc_sheet.headers }).must_raise(RuntimeError)
475
+ end
476
+
477
+ it 'returns first row if slurped' do
478
+ _(doc_sheet.tap(&:slurp).headers).must_equal(
479
+ ['Cell A', 'Cell B', 'Cell C']
480
+ )
481
+ end
482
+
483
+ it 'returns first row if auto_slurp' do
484
+ SimpleXlsxReader.configuration.auto_slurp = true
485
+
486
+ _(doc_sheet.headers).must_equal(
487
+ ['Cell A', 'Cell B', 'Cell C']
488
+ )
489
+
490
+ SimpleXlsxReader.configuration.auto_slurp = false
491
+ end
492
+ end
493
+
494
+ describe SimpleXlsxReader::Loader do
495
+ let(:described_class) { SimpleXlsxReader::Loader }
29
496
 
30
497
  describe '::cast' do
31
498
  it 'reads type s as a shared string' do
32
- described_class.cast('1', 's', nil, :shared_strings => ['a', 'b', 'c']).
33
- must_equal 'b'
499
+ _(described_class.cast('1', 's', nil, shared_strings: %w[a b c]))
500
+ .must_equal 'b'
34
501
  end
35
502
 
36
503
  it 'reads type inlineStr as a string' do
37
- described_class.cast('the value', nil, 'inlineStr').
38
- must_equal 'the value'
504
+ _(described_class.cast('the value', nil, 'inlineStr'))
505
+ .must_equal 'the value'
39
506
  end
40
507
 
41
508
  it 'reads date styles' do
42
- described_class.cast('41505', nil, :date).
43
- must_equal Date.parse('2013-08-19')
509
+ _(described_class.cast('41505', nil, :date))
510
+ .must_equal Date.parse('2013-08-19')
44
511
  end
45
512
 
46
513
  it 'reads time styles' do
47
- described_class.cast('41505.77083', nil, :time).
48
- must_equal Time.parse('2013-08-19 18:30 UTC')
514
+ _(described_class.cast('41505.77083', nil, :time))
515
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
49
516
  end
50
517
 
51
518
  it 'reads date_time styles' do
52
- described_class.cast('41505.77083', nil, :date_time).
53
- must_equal Time.parse('2013-08-19 18:30 UTC')
519
+ _(described_class.cast('41505.77083', nil, :date_time))
520
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
54
521
  end
55
522
 
56
523
  it 'reads number types styled as dates' do
57
- described_class.cast('41505', 'n', :date).
58
- must_equal Date.parse('2013-08-19')
524
+ _(described_class.cast('41505', 'n', :date))
525
+ .must_equal Date.parse('2013-08-19')
59
526
  end
60
527
 
61
528
  it 'reads number types styled as times' do
62
- described_class.cast('41505.77083', 'n', :time).
63
- must_equal Time.parse('2013-08-19 18:30 UTC')
529
+ _(described_class.cast('41505.77083', 'n', :time))
530
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
531
+ end
532
+
533
+ it 'reads less-than-zero complex number types styled as times' do
534
+ _(described_class.cast('6.25E-2', 'n', :time))
535
+ .must_equal Time.parse('1899-12-30 01:30:00 UTC')
64
536
  end
65
537
 
66
538
  it 'reads number types styled as date_times' do
67
- described_class.cast('41505.77083', 'n', :date_time).
68
- must_equal Time.parse('2013-08-19 18:30 UTC')
539
+ _(described_class.cast('41505.77083', 'n', :date_time))
540
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
541
+ end
542
+
543
+ it 'raises when date-styled values are not numerical' do
544
+ _(-> { described_class.cast('14 is not a valid date', nil, :date) })
545
+ .must_raise(ArgumentError)
546
+ end
547
+
548
+ describe 'with the url option' do
549
+ let(:url) { 'http://www.example.com/hyperlink' }
550
+ it 'creates a hyperlink with a string type' do
551
+ _(described_class.cast('A link', 'str', :string, url: url))
552
+ .must_equal SXR::Hyperlink.new(url, 'A link')
553
+ end
554
+
555
+ it 'creates a hyperlink with a shared string type' do
556
+ _(described_class.cast('2', 's', nil, shared_strings: %w[a b c], url: url))
557
+ .must_equal SXR::Hyperlink.new(url, 'c')
558
+ end
69
559
  end
70
560
  end
71
561
 
72
- describe '#shared_strings' do
562
+ describe 'shared_strings' do
73
563
  let(:xml) do
74
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
75
- xml.shared_strings = Nokogiri::XML(File.read(
76
- File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
77
- end
564
+ File.open(File.join(File.dirname(__FILE__), 'shared_strings.xml'))
78
565
  end
79
566
 
80
- subject { described_class.new(xml) }
567
+ let(:ss) { SimpleXlsxReader::Loader::SharedStringsParser.parse(xml) }
81
568
 
82
569
  it 'parses strings formatted at the cell level' do
83
- subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
570
+ _(ss[0..2]).must_equal ['Cell A1', 'Cell B1', 'My Cell']
84
571
  end
85
572
 
86
573
  it 'parses strings formatted at the character level' do
87
- subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
574
+ _(ss[3..5]).must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
575
+ end
576
+
577
+ it 'parses looong strings containing unicode' do
578
+ _(ss[6]).must_include 'It only happens with both unicode *and* really long text.'
88
579
  end
89
580
  end
90
581
 
91
- describe '#style_types' do
92
- let(:xml) do
93
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
94
- xml.styles = Nokogiri::XML(File.read(
95
- File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
96
- end
582
+ describe 'style_types' do
583
+ let(:xml_file) do
584
+ File.open(File.join(File.dirname(__FILE__), 'styles.xml'))
97
585
  end
98
586
 
99
- let(:mapper) do
100
- SimpleXlsxReader::Document::Mapper.new(xml)
587
+ let(:parser) do
588
+ SimpleXlsxReader::Loader::StyleTypesParser.new(xml_file).tap(&:parse)
101
589
  end
102
590
 
103
591
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
- mapper.style_types[1].must_equal :date_time
105
- mapper.custom_style_types[164].must_equal :date_time
592
+ _(parser.style_types[1]).must_equal :date_time
593
+ _(parser.custom_style_types[164]).must_equal :date_time
106
594
  end
107
595
 
108
596
  # something I've seen in the wild; don't think it's correct, but let's be flexible.
109
597
  it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
110
- mapper.style_types[2].must_equal :date_time
111
- mapper.custom_style_types[59].must_equal :date_time
598
+ _(parser.style_types[2]).must_equal :date_time
599
+ _(parser.custom_style_types[59]).must_equal :date_time
112
600
  end
113
601
  end
114
602
 
115
603
  describe '#last_cell_label' do
116
-
117
- let(:generic_style) do
118
- Nokogiri::XML(
119
- <<-XML
120
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
121
- <cellXfs count="1">
122
- <xf numFmtId="0" />
123
- </cellXfs>
124
- </styleSheet>
125
- XML
126
- ).remove_namespaces!
127
- end
128
-
129
604
  # Note, this is not a valid sheet, since the last cell is actually D1 but
130
605
  # the dimension specifies C1. This is just for testing.
131
606
  let(:sheet) do
@@ -151,305 +626,345 @@ describe SimpleXlsxReader do
151
626
  ).remove_namespaces!
152
627
  end
153
628
 
154
- let(:empty_sheet) do
155
- Nokogiri::XML(
156
- <<-XML
157
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
158
- <dimension ref="A1" />
159
- <sheetData>
160
- </sheetData>
161
- </worksheet>
162
- XML
163
- ).remove_namespaces!
164
- end
165
-
166
- let(:xml) do
167
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
168
- xml.sheets = [sheet]
169
- xml.styles = generic_style
629
+ let(:loader) do
630
+ SimpleXlsxReader::Loader.new(nil).tap do |l|
631
+ l.shared_strings = []
632
+ l.sheet_toc = { 'Sheet1': 0 }
633
+ l.style_types = []
634
+ l.base_date = SimpleXlsxReader::DATE_SYSTEM_1900
170
635
  end
171
636
  end
172
637
 
173
- subject { described_class.new(xml) }
638
+ let(:sheet_parser) do
639
+ tempfile = Tempfile.new(['sheet', '.xml'])
640
+ tempfile.write(sheet)
641
+ tempfile.rewind
642
+
643
+ SimpleXlsxReader::Loader::SheetParser.new(
644
+ file_io: tempfile,
645
+ loader: loader
646
+ ).tap { |parser| parser.parse {} }
647
+ end
174
648
 
175
649
  it 'uses /worksheet/dimension if available' do
176
- subject.last_cell_label(sheet).must_equal 'C1'
650
+ _(sheet_parser.last_cell_letter).must_equal 'C'
177
651
  end
178
652
 
179
653
  it 'uses the last header cell if /worksheet/dimension is missing' do
180
- sheet.xpath('/worksheet/dimension').remove
181
- subject.last_cell_label(sheet).must_equal 'D1'
654
+ sheet.at_xpath('/worksheet/dimension').remove
655
+ _(sheet_parser.last_cell_letter).must_equal 'D'
182
656
  end
183
657
 
184
658
  it 'returns "A1" if the dimension is just one cell' do
185
- subject.last_cell_label(empty_sheet).must_equal 'A1'
659
+ sheet.xpath('/worksheet/sheetData/row').remove
660
+ sheet.xpath('/worksheet/dimension').attr('ref', 'A1')
661
+ _(sheet_parser.last_cell_letter).must_equal 'A'
186
662
  end
187
663
 
188
- it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
189
- sheet.at_xpath('/worksheet/dimension').remove
190
- subject.last_cell_label(empty_sheet).must_equal 'A1'
664
+ it 'returns nil if the sheet is just one cell, but /worksheet/dimension is missing' do
665
+ sheet.xpath('/worksheet/sheetData/row').remove
666
+ sheet.xpath('/worksheet/dimension').remove
667
+ _(sheet_parser.last_cell_letter).must_be_nil
191
668
  end
192
669
  end
193
670
 
194
671
  describe '#column_letter_to_number' do
195
- let(:subject) { described_class.new }
196
-
197
- [ ['A', 1 ],
198
- ['B', 2 ],
199
- ['Z', 26 ],
200
- ['AA', 27 ],
201
- ['AB', 28 ],
202
- ['AZ', 52 ],
203
- ['BA', 53 ],
204
- ['BZ', 78 ],
205
- ['ZZ', 702 ],
206
- ['AAA', 703 ],
207
- ['AAZ', 728 ],
208
- ['ABA', 729 ],
209
- ['ABZ', 754 ],
210
- ['AZZ', 1378 ],
211
- ['ZZZ', 18278] ].each do |(letter, number)|
672
+ let(:subject) { SXR::Loader::SheetParser.new(file_io: nil, loader: nil) }
673
+
674
+ [
675
+ ['A', 1],
676
+ ['B', 2],
677
+ ['Z', 26],
678
+ ['AA', 27],
679
+ ['AB', 28],
680
+ ['AZ', 52],
681
+ ['BA', 53],
682
+ ['BZ', 78],
683
+ ['ZZ', 702],
684
+ ['AAA', 703],
685
+ ['AAZ', 728],
686
+ ['ABA', 729],
687
+ ['ABZ', 754],
688
+ ['AZZ', 1378],
689
+ ['ZZZ', 18_278]
690
+ ].each do |(letter, number)|
212
691
  it "converts #{letter} to #{number}" do
213
- subject.column_letter_to_number(letter).must_equal number
692
+ _(subject.column_letter_to_number(letter)).must_equal number
214
693
  end
215
694
  end
216
695
  end
696
+ end
217
697
 
218
- describe "parse errors" do
219
- after do
220
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
221
- end
698
+ describe 'parse errors' do
699
+ after do
700
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
701
+ end
222
702
 
223
- let(:xml) do
224
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
225
- xml.sheets = [Nokogiri::XML(
226
- <<-XML
227
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
228
- <dimension ref="A1:A1" />
229
- <sheetData>
230
- <row>
231
- <c r='A1' s='0'>
232
- <v>14 is a date style; this is not a date</v>
233
- </c>
234
- </row>
235
- </sheetData>
236
- </worksheet>
237
- XML
238
- ).remove_namespaces!]
239
-
240
- # s='0' above refers to the value of numFmtId at cellXfs index 0
241
- xml.styles = Nokogiri::XML(
242
- <<-XML
243
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
244
- <cellXfs count="1">
245
- <xf numFmtId="14" />
246
- </cellXfs>
247
- </styleSheet>
248
- XML
249
- ).remove_namespaces!
250
- end
251
- end
703
+ let(:sheet) do
704
+ Nokogiri::XML(
705
+ <<-XML
706
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
707
+ <dimension ref="A1:A1" />
708
+ <sheetData>
709
+ <row>
710
+ <c r='A1' s='0'>
711
+ <v>14 is a date style; this is not a date</v>
712
+ </c>
713
+ </row>
714
+ </sheetData>
715
+ </worksheet>
716
+ XML
717
+ ).remove_namespaces!
718
+ end
252
719
 
253
- it 'raises if configuration.catch_cell_load_errors' do
254
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
720
+ let(:styles) do
721
+ # s='0' above refers to the value of numFmtId at cellXfs index 0
722
+ Nokogiri::XML(
723
+ <<-XML
724
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
725
+ <cellXfs count="1">
726
+ <xf numFmtId="14" />
727
+ </cellXfs>
728
+ </styleSheet>
729
+ XML
730
+ ).remove_namespaces!
731
+ end
255
732
 
256
- lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
257
- must_raise(SimpleXlsxReader::CellLoadError)
258
- end
733
+ it 'raises if configuration.catch_cell_load_errors' do
734
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
259
735
 
260
- it 'records a load error if not configuration.catch_cell_load_errors' do
261
- SimpleXlsxReader.configuration.catch_cell_load_errors = true
736
+ _(-> { SimpleXlsxReader.open(xlsx.archive.path).to_hash })
737
+ .must_raise(SimpleXlsxReader::CellLoadError)
738
+ end
262
739
 
263
- sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
264
- sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
265
- end
740
+ it 'records a load error if not configuration.catch_cell_load_errors' do
741
+ SimpleXlsxReader.configuration.catch_cell_load_errors = true
742
+
743
+ sheet = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].tap(&:slurp)
744
+ _(sheet.load_errors).must_equal(
745
+ [0, 0] => 'invalid value for Float(): "14 is a date style; this is not a date"'
746
+ )
266
747
  end
748
+ end
267
749
 
268
- describe "missing numFmtId attributes" do
750
+ describe 'missing numFmtId attributes' do
751
+ let(:sheet) do
752
+ Nokogiri::XML(
753
+ <<-XML
754
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
755
+ <dimension ref="A1:A1" />
756
+ <sheetData>
757
+ <row>
758
+ <c r='A1' s='s'>
759
+ <v>some content</v>
760
+ </c>
761
+ </row>
762
+ </sheetData>
763
+ </worksheet>
764
+ XML
765
+ ).remove_namespaces!
766
+ end
269
767
 
270
- let(:xml) do
271
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
272
- xml.sheets = [Nokogiri::XML(
273
- <<-XML
274
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
275
- <dimension ref="A1:A1" />
276
- <sheetData>
277
- <row>
278
- <c r='A1' s='s'>
279
- <v>some content</v>
280
- </c>
281
- </row>
282
- </sheetData>
283
- </worksheet>
284
- XML
285
- ).remove_namespaces!]
286
-
287
- xml.styles = Nokogiri::XML(
288
- <<-XML
289
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
290
-
291
- </styleSheet>
292
- XML
293
- ).remove_namespaces!
294
- end
295
- end
768
+ let(:styles) do
769
+ Nokogiri::XML(
770
+ <<-XML
771
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
296
772
 
297
- before do
298
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
299
- end
773
+ </styleSheet>
774
+ XML
775
+ ).remove_namespaces!
776
+ end
300
777
 
301
- it 'continues even when cells are missing numFmtId attributes ' do
302
- @row[0].must_equal 'some content'
303
- end
778
+ before do
779
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
780
+ end
304
781
 
782
+ it 'continues even when cells are missing numFmtId attributes ' do
783
+ _(@row[0]).must_equal 'some content'
305
784
  end
785
+ end
306
786
 
307
- describe 'parsing types' do
308
- let(:xml) do
309
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
310
- xml.sheets = [Nokogiri::XML(
311
- <<-XML
312
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
313
- <dimension ref="A1:G1" />
314
- <sheetData>
315
- <row>
316
- <c r='A1' s='0'>
317
- <v>Cell A1</v>
318
- </c>
319
-
320
- <c r='C1' s='1'>
321
- <v>2.4</v>
322
- </c>
323
- <c r='D1' s='1' />
324
-
325
- <c r='E1' s='2'>
326
- <v>30687</v>
327
- </c>
328
- <c r='F1' s='2' />
329
-
330
- <c r='G1' t='inlineStr' s='0'>
331
- <is><t>Cell G1</t></is>
332
- </c>
333
- </row>
334
- </sheetData>
335
- </worksheet>
336
- XML
337
- ).remove_namespaces!]
338
-
339
- # s='0' above refers to the value of numFmtId at cellXfs index 0,
340
- # which is in this case 'General' type
341
- xml.styles = Nokogiri::XML(
342
- <<-XML
343
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
344
- <cellXfs count="1">
345
- <xf numFmtId="0" />
346
- <xf numFmtId="2" />
347
- <xf numFmtId="14" />
348
- </cellXfs>
349
- </styleSheet>
350
- XML
351
- ).remove_namespaces!
352
- end
353
- end
787
+ describe 'parsing types' do
788
+ let(:sheet) do
789
+ Nokogiri::XML(
790
+ <<-XML
791
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
792
+ <dimension ref="A1:G1" />
793
+ <sheetData>
794
+ <row>
795
+ <c r='A1' s='0'>
796
+ <v>Cell A1</v>
797
+ </c>
354
798
 
355
- before do
356
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
357
- end
799
+ <c r='C1' s='1'>
800
+ <v>2.4</v>
801
+ </c>
802
+ <c r='D1' s='1' />
358
803
 
359
- it "reads 'Generic' cells as strings" do
360
- @row[0].must_equal "Cell A1"
361
- end
804
+ <c r='E1' s='2'>
805
+ <v>30687</v>
806
+ </c>
807
+ <c r='F1' s='2' />
362
808
 
363
- it "reads empty 'Generic' cells as nil" do
364
- @row[1].must_equal nil
365
- end
809
+ <c r='G1' t='inlineStr' s='0'>
810
+ <is><t>Cell G1</t></is>
811
+ </c>
366
812
 
367
- # We could expand on these type tests, but really just a couple
368
- # demonstrate that it's wired together. Type-specific tests should go
369
- # on #cast
813
+ <c r='H1' s='0'>
814
+ <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
815
+ <v>HYPERLINK function</v>
816
+ </c>
370
817
 
371
- it "reads floats" do
372
- @row[2].must_equal 2.4
373
- end
818
+ <c r='I1' s='0'>
819
+ <v>GUI-made hyperlink</v>
820
+ </c>
821
+ </row>
822
+ </sheetData>
374
823
 
375
- it "reads empty floats as nil" do
376
- @row[3].must_equal nil
377
- end
824
+ <hyperlinks>
825
+ <hyperlink ref="I1" id="rId1"/>
826
+ </hyperlinks>
827
+ </worksheet>
828
+ XML
829
+ ).remove_namespaces!
830
+ end
378
831
 
379
- it "reads dates" do
380
- @row[4].must_equal Date.parse('Jan 6, 1984')
381
- end
832
+ let(:styles) do
833
+ # s='0' above refers to the value of numFmtId at cellXfs index 0,
834
+ # which is in this case 'General' type
835
+ Nokogiri::XML(
836
+ <<-XML
837
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
838
+ <cellXfs count="1">
839
+ <xf numFmtId="0" />
840
+ <xf numFmtId="2" />
841
+ <xf numFmtId="14" />
842
+ </cellXfs>
843
+ </styleSheet>
844
+ XML
845
+ ).remove_namespaces!
846
+ end
382
847
 
383
- it "reads empty date cells as nil" do
384
- @row[5].must_equal nil
385
- end
848
+ # Although not a "type" or "style" according to xlsx spec,
849
+ # it sure could/should be, so let's test it with the rest of our
850
+ # typecasting code.
851
+ let(:rels) do
852
+ [
853
+ Nokogiri::XML(
854
+ <<-XML
855
+ <Relationships>
856
+ <Relationship
857
+ Id="rId1"
858
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
859
+ Target="http://www.example.com/hyperlink-gui"
860
+ TargetMode="External"
861
+ />
862
+ </Relationships>
863
+ XML
864
+ ).remove_namespaces!
865
+ ]
866
+ end
386
867
 
387
- it "reads strings formatted as inlineStr" do
388
- @row[6].must_equal 'Cell G1'
389
- end
868
+ before do
869
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
390
870
  end
391
871
 
392
- describe 'parsing documents with blank rows' do
393
- let(:xml) do
394
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
395
- xml.sheets = [Nokogiri::XML(
396
- <<-XML
397
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
398
- <dimension ref="A1:D7" />
399
- <sheetData>
400
- <row r="2" spans="1:1">
401
- <c r="A2" s="0">
402
- <v>0</v>
403
- </c>
404
- </row>
405
- <row r="4" spans="1:1">
406
- <c r="B4" s="0">
407
- <v>1</v>
408
- </c>
409
- </row>
410
- <row r="5" spans="1:1">
411
- <c r="C5" s="0">
412
- <v>2</v>
413
- </c>
414
- </row>
415
- <row r="7" spans="1:1">
416
- <c r="D7" s="0">
417
- <v>3</v>
418
- </c>
419
- </row>
420
- </sheetData>
421
- </worksheet>
422
- XML
423
- ).remove_namespaces!]
424
-
425
- xml.styles = Nokogiri::XML(
426
- <<-XML
427
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
428
- <cellXfs count="1">
429
- <xf numFmtId="0" />
430
- </cellXfs>
431
- </styleSheet>
432
- XML
433
- ).remove_namespaces!
434
- end
435
- end
872
+ it "reads 'Generic' cells as strings" do
873
+ _(@row[0]).must_equal 'Cell A1'
874
+ end
436
875
 
437
- before do
438
- @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
439
- end
876
+ it "reads empty 'Generic' cells as nil" do
877
+ _(@row[1]).must_be_nil
878
+ end
440
879
 
441
- it "reads row data despite gaps in row numbering" do
442
- @rows.must_equal [
443
- [nil,nil,nil,nil],
444
- ["0",nil,nil,nil],
445
- [nil,nil,nil,nil],
446
- [nil,"1",nil,nil],
447
- [nil,nil,"2",nil],
448
- [nil,nil,nil,nil],
449
- [nil,nil,nil,"3"]
450
- ]
451
- end
880
+ # We could expand on these type tests, but really just a couple
881
+ # demonstrate that it's wired together. Type-specific tests should go
882
+ # on #cast
883
+
884
+ it 'reads floats' do
885
+ _(@row[2]).must_equal 2.4
886
+ end
887
+
888
+ it 'reads empty floats as nil' do
889
+ _(@row[3]).must_be_nil
890
+ end
891
+
892
+ it 'reads dates' do
893
+ _(@row[4]).must_equal Date.parse('Jan 6, 1984')
894
+ end
895
+
896
+ it 'reads empty date cells as nil' do
897
+ _(@row[5]).must_be_nil
452
898
  end
453
899
 
900
+ it 'reads strings formatted as inlineStr' do
901
+ _(@row[6]).must_equal 'Cell G1'
902
+ end
903
+
904
+ it 'reads hyperlinks created via HYPERLINK()' do
905
+ _(@row[7]).must_equal(
906
+ SXR::Hyperlink.new(
907
+ 'http://www.example.com/hyperlink-function', 'HYPERLINK function'
908
+ )
909
+ )
910
+ end
911
+
912
+ it 'reads hyperlinks created via the GUI' do
913
+ _(@row[8]).must_equal(
914
+ SXR::Hyperlink.new(
915
+ 'http://www.example.com/hyperlink-gui', 'GUI-made hyperlink'
916
+ )
917
+ )
918
+ end
919
+ end
920
+
921
+ describe 'parsing documents with blank rows' do
922
+ let(:sheet) do
923
+ Nokogiri::XML(
924
+ <<-XML
925
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
926
+ <dimension ref="A1:D7" />
927
+ <sheetData>
928
+ <row r="2" spans="1:1">
929
+ <c r="A2" s="0">
930
+ <v>0</v>
931
+ </c>
932
+ </row>
933
+ <row r="4" spans="1:1">
934
+ <c r="B4" s="0">
935
+ <v>1</v>
936
+ </c>
937
+ </row>
938
+ <row r="5" spans="1:1">
939
+ <c r="C5" s="0">
940
+ <v>2</v>
941
+ </c>
942
+ </row>
943
+ <row r="7" spans="1:1">
944
+ <c r="D7" s="0">
945
+ <v>3</v>
946
+ </c>
947
+ </row>
948
+ </sheetData>
949
+ </worksheet>
950
+ XML
951
+ ).remove_namespaces!
952
+ end
953
+
954
+ before do
955
+ @rows = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a
956
+ end
957
+
958
+ it 'reads row data despite gaps in row numbering' do
959
+ _(@rows).must_equal [
960
+ [nil, nil, nil, nil],
961
+ ['0', nil, nil, nil],
962
+ [nil, nil, nil, nil],
963
+ [nil, '1', nil, nil],
964
+ [nil, nil, '2', nil],
965
+ [nil, nil, nil, nil],
966
+ [nil, nil, nil, '3']
967
+ ]
968
+ end
454
969
  end
455
970
  end