simple_xlsx_reader 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,131 +1,606 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'test_helper'
2
4
  require 'time'
3
5
 
6
+ SXR = SimpleXlsxReader
7
+
4
8
  describe SimpleXlsxReader do
5
- let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
- 'sesame_street_blog.xlsx') }
7
-
8
- let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
-
10
- describe '#to_hash' do
11
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
- subject.to_hash.must_equal({
13
- "Authors"=>
14
- [["Name", "Occupation"],
15
- ["Big Bird", "Teacher"]],
16
-
17
- "Posts"=>
18
- [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
- ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
- ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
21
- ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0],
22
- ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil]]
23
- })
9
+ let(:sesame_street_blog_file) do
10
+ File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx')
11
+ end
12
+
13
+ let(:document) { SimpleXlsxReader.open(sesame_street_blog_file) }
14
+
15
+ ##
16
+ # A high-level acceptance test testing misc features such as date/time parsing,
17
+ # hyperlinks (both function and ref kinds), formula dates, emty rows, etc.
18
+
19
+ let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
20
+ let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
21
+
22
+ let(:expected_result) do
23
+ {
24
+ 'Authors' =>
25
+ [
26
+ ['Name', 'Occupation'],
27
+ ['Big Bird', 'Teacher']
28
+ ],
29
+ 'Posts' =>
30
+ [
31
+ ['Author Name', 'Title', 'Body', 'Created At', 'Comment Count', 'URL'],
32
+ ['Big Bird', 'The Number 1', 'The Greatest', Time.parse('2002-01-01 11:00:00 UTC'), 1, SXR::Hyperlink.new('http://www.example.com/hyperlink-function', 'This uses the HYPERLINK() function')],
33
+ ['Big Bird', 'The Number 2', 'Second Best', Time.parse('2002-01-02 14:00:00 UTC'), 2, SXR::Hyperlink.new('http://www.example.com/hyperlink-gui', 'This uses the hyperlink GUI option')],
34
+ ['Big Bird', 'Formula Dates', 'Tricky tricky', Time.parse('2002-01-03 14:00:00 UTC'), 0, nil],
35
+ ['Empty Eagress', nil, 'The title, date, and comment have types, but no values', nil, nil, nil]
36
+ ]
37
+ }
38
+ end
39
+
40
+ describe SimpleXlsxReader do
41
+ describe 'load from file path' do
42
+ let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
43
+
44
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
45
+ _(subject.to_hash).must_equal(expected_result)
46
+ end
47
+ end
48
+
49
+ describe 'load from buffer' do
50
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
51
+
52
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
53
+ _(subject.to_hash).must_equal(expected_result)
54
+ end
55
+ end
56
+
57
+ it 'outputs strings in UTF-8 encoding' do
58
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
59
+ _(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
60
+ .must_equal [Encoding::UTF_8]
61
+ end
62
+
63
+ it 'can use all our enumerable nicities without slurping' do
64
+ document = SimpleXlsxReader.parse(sesame_street_blog_io)
65
+
66
+ headers = {
67
+ name: 'Author Name',
68
+ title: 'Title',
69
+ body: 'Body',
70
+ created_at: 'Created At',
71
+ count: /Count/
72
+ }
73
+
74
+ rows = document.sheets[1].rows
75
+ result =
76
+ rows.each(headers: headers).with_index.with_object({}) do |(row, i), acc|
77
+ acc[i] = row
78
+ end
79
+
80
+ _(result[0]).must_equal(
81
+ name: 'Big Bird',
82
+ title: 'The Number 1',
83
+ body: 'The Greatest',
84
+ created_at: Time.parse('2002-01-01 11:00:00 UTC'),
85
+ count: 1,
86
+ "URL" => 'http://www.example.com/hyperlink-function'
87
+ )
88
+
89
+ _(rows.slurped?).must_equal false
90
+ end
91
+ end
92
+
93
+ ##
94
+ # For more fine-grained unit tests, we sometimes build our own workbook via
95
+ # Nokogiri. TestXlsxBuilder has some defaults, and this let-style lets us
96
+ # concisely override them in nested describe blocks.
97
+
98
+ let(:shared_strings) { nil }
99
+ let(:styles) { nil }
100
+ let(:sheet) { nil }
101
+ let(:workbook) { nil }
102
+ let(:rels) { nil }
103
+
104
+ let(:xlsx) do
105
+ TestXlsxBuilder.new(
106
+ shared_strings: shared_strings,
107
+ styles: styles,
108
+ sheets: sheet && [sheet],
109
+ workbook: workbook,
110
+ rels: rels
111
+ )
112
+ end
113
+
114
+ let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
115
+
116
+ describe 'Sheet#rows#each(headers: true)' do
117
+ let(:sheet) do
118
+ <<~XML
119
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
120
+ <dimension ref="A1:B3" />
121
+ <sheetData>
122
+ <row r="1">
123
+ <c r="A1" s="0">
124
+ <v>Header 1</v>
125
+ </c>
126
+ <c r="B1" s="0">
127
+ <v>Header 2</v>
128
+ </c>
129
+ </row>
130
+ <row r="2">
131
+ <c r="A2" s="0">
132
+ <v>Data 1-A</v>
133
+ </c>
134
+ <c r="B2" s="0">
135
+ <v>Data 1-B</v>
136
+ </c>
137
+ </row>
138
+ <row r="4">
139
+ <c r="A4" s="0">
140
+ <v>Data 2-A</v>
141
+ </c>
142
+ <c r="B4" s="0">
143
+ <v>Data 2-B</v>
144
+ </c>
145
+ </row>
146
+ </sheetData>
147
+ </worksheet>
148
+ XML
149
+ end
150
+
151
+ it 'yields rows as hashes' do
152
+ acc = []
153
+
154
+ reader.sheets[0].rows.each(headers: true) do |row|
155
+ acc << row
156
+ end
157
+
158
+ _(acc).must_equal(
159
+ [
160
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
161
+ { 'Header 1' => nil, 'Header 2' => nil },
162
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
163
+ ]
164
+ )
165
+ end
166
+ end
167
+
168
+ describe 'Sheet#rows#each(headers: ->(row) {...})' do
169
+ let(:sheet) do
170
+ <<~XML
171
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
172
+ <dimension ref="A1:B7" />
173
+ <sheetData>
174
+ <row r="1">
175
+ <c r="A1" s="0">
176
+ <v>a chart or something</v>
177
+ </c>
178
+ <c r="B1" s="0">
179
+ <v>Rabble rabble</v>
180
+ </c>
181
+ </row>
182
+ <row r="2">
183
+ <c r="A2" s="0">
184
+ <v>Chatty junk</v>
185
+ </c>
186
+ <c r="B2" s="0">
187
+ <v></v>
188
+ </c>
189
+ </row>
190
+ <row r="4">
191
+ <c r="A4" s="0">
192
+ <v>Header 1</v>
193
+ </c>
194
+ <c r="B4" s="0">
195
+ <v>Header 2</v>
196
+ </c>
197
+ </row>
198
+ <row r="5">
199
+ <c r="A5" s="0">
200
+ <v>Data 1-A</v>
201
+ </c>
202
+ <c r="B5" s="0">
203
+ <v>Data 1-B</v>
204
+ </c>
205
+ </row>
206
+ <row r="7">
207
+ <c r="A7" s="0">
208
+ <v>Data 2-A</v>
209
+ </c>
210
+ <c r="B7" s="0">
211
+ <v>Data 2-B</v>
212
+ </c>
213
+ </row>
214
+ </sheetData>
215
+ </worksheet>
216
+ XML
217
+ end
218
+
219
+ it 'yields rows as hashes' do
220
+ acc = []
221
+
222
+ finder = ->(row) { row.find {|c| c&.match(/Header/)} }
223
+ reader.sheets[0].rows.each(headers: finder) do |row|
224
+ acc << row
225
+ end
226
+
227
+ _(acc).must_equal(
228
+ [
229
+ { 'Header 1' => 'Data 1-A', 'Header 2' => 'Data 1-B' },
230
+ { 'Header 1' => nil, 'Header 2' => nil },
231
+ { 'Header 1' => 'Data 2-A', 'Header 2' => 'Data 2-B' }
232
+ ]
233
+ )
234
+ end
235
+ end
236
+
237
+ describe "Sheet#rows#each(headers: a_hash)" do
238
+ let(:sheet) do
239
+ Nokogiri::XML(
240
+ <<~XML
241
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
242
+ <dimension ref="A1:C7" />
243
+ <sheetData>
244
+ <row r="1">
245
+ <c r="A1" s="0">
246
+ <v>a chart or something</v>
247
+ </c>
248
+ <c r="B1" s="0">
249
+ <v>Rabble rabble</v>
250
+ </c>
251
+ <c r="C1" s="0">
252
+ <v>Rabble rabble</v>
253
+ </c>
254
+ </row>
255
+ <row r="2">
256
+ <c r="A2" s="0">
257
+ <v>Chatty junk</v>
258
+ </c>
259
+ <c r="B2" s="0">
260
+ <v></v>
261
+ </c>
262
+ <c r="C2" s="0">
263
+ <v></v>
264
+ </c>
265
+ </row>
266
+ <row r="4">
267
+ <c r="A4" s="0">
268
+ <v>ID Number</v>
269
+ </c>
270
+ <c r="B4" s="0">
271
+ <v>ExacT</v>
272
+ </c>
273
+ <c r="C4" s="0">
274
+ <v>FOO Name</v>
275
+ </c>
276
+
277
+ </row>
278
+ <row r="5">
279
+ <c r="A5" s="0">
280
+ <v>ID 1-A</v>
281
+ </c>
282
+ <c r="B5" s="0">
283
+ <v>Exact 1-B</v>
284
+ </c>
285
+ <c r="C5" s="0">
286
+ <v>Name 1-C</v>
287
+ </c>
288
+ </row>
289
+ <row r="7">
290
+ <c r="A7" s="0">
291
+ <v>ID 2-A</v>
292
+ </c>
293
+ <c r="B7" s="0">
294
+ <v>Exact 2-B</v>
295
+ </c>
296
+ <c r="C7" s="0">
297
+ <v>Name 2-C</v>
298
+ </c>
299
+ </row>
300
+ </sheetData>
301
+ </worksheet>
302
+ XML
303
+ )
304
+ end
305
+
306
+ it 'transforms headers into symbols based on the header map' do
307
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
308
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
309
+
310
+ _(result).must_equal(
311
+ [
312
+ { id: 'ID 1-A', exact: 'Exact 1-B', name: 'Name 1-C' },
313
+ { id: nil, exact: nil, name: nil },
314
+ { id: 'ID 2-A', exact: 'Exact 2-B', name: 'Name 2-C' },
315
+ ]
316
+ )
317
+ end
318
+
319
+ it 'if a match isnt found, uses un-matched header name' do
320
+ sheet.xpath("//*[text() = 'ExacT']")
321
+ .first.children.first.content = 'not ExacT'
322
+
323
+ header_map = {id: /ID/, name: /foo/i, exact: 'ExacT'}
324
+ result = reader.sheets[0].rows.each(headers: header_map).to_a
325
+
326
+ _(result).must_equal(
327
+ [
328
+ { id: 'ID 1-A', 'not ExacT' => 'Exact 1-B', name: 'Name 1-C' },
329
+ { id: nil, 'not ExacT' => nil, name: nil },
330
+ { id: 'ID 2-A', 'not ExacT' => 'Exact 2-B', name: 'Name 2-C' },
331
+ ]
332
+ )
333
+ end
334
+ end
335
+
336
+ describe 'Sheet#rows[]' do
337
+ it 'raises a RuntimeError if rows not slurped yet' do
338
+ _(-> { reader.sheets[0].rows[1] }).must_raise(RuntimeError)
339
+ end
340
+
341
+ it 'works if the rows have been slurped' do
342
+ _(reader.sheets[0].rows.tap(&:slurp)[0]).must_equal(
343
+ ['Cell A', 'Cell B', 'Cell C']
344
+ )
345
+ end
346
+
347
+ it 'works if the config allows auto slurping' do
348
+ SimpleXlsxReader.configuration.auto_slurp = true
349
+
350
+ _(reader.sheets[0].rows[0]).must_equal(
351
+ ['Cell A', 'Cell B', 'Cell C']
352
+ )
353
+
354
+ SimpleXlsxReader.configuration.auto_slurp = false
355
+ end
356
+ end
357
+
358
+ describe 'Sheet#rows#slurp' do
359
+ let(:rows) { reader.sheets[0].rows.tap(&:slurp) }
360
+
361
+ it 'loads the sheet parser results into memory' do
362
+ _(rows.slurped).must_equal(
363
+ [['Cell A', 'Cell B', 'Cell C']]
364
+ )
365
+ end
366
+
367
+ it '#each and #map use slurped results' do
368
+ _(rows.map(&:reverse)).must_equal(
369
+ [['Cell C', 'Cell B', 'Cell A']]
370
+ )
24
371
  end
25
372
  end
26
373
 
27
- describe SimpleXlsxReader::Document::Mapper do
28
- let(:described_class) { SimpleXlsxReader::Document::Mapper }
374
+ describe 'Sheet#rows#each' do
375
+ let(:sheet) do
376
+ <<~XML
377
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
378
+ <dimension ref="A1:B3" />
379
+ <sheetData>
380
+ <row r="1">
381
+ <c r="A1" s="0">
382
+ <v>Header 1</v>
383
+ </c>
384
+ <c r="B1" s="0">
385
+ <v>Header 2</v>
386
+ </c>
387
+ </row>
388
+ <row r="2">
389
+ <c r="A2" s="0">
390
+ <v>Data 1-A</v>
391
+ </c>
392
+ <c r="B2" s="0">
393
+ <v>Data 1-B</v>
394
+ </c>
395
+ </row>
396
+ <row r="4">
397
+ <c r="A4" s="0">
398
+ <v>Data 2-A</v>
399
+ </c>
400
+ <c r="B4" s="0">
401
+ <v>Data 2-B</v>
402
+ </c>
403
+ </row>
404
+ </sheetData>
405
+ </worksheet>
406
+ XML
407
+ end
408
+
409
+ let(:rows) { reader.sheets[0].rows }
410
+
411
+ it 'with no block, returns an enumerator when not slurped' do
412
+ _(rows.each.class).must_equal Enumerator
413
+ end
414
+
415
+ it 'with no block, passes on header argument in enumerator' do
416
+ _(rows.each(headers: true).inspect).must_match 'headers: true'
417
+ end
418
+
419
+ it 'returns an enumerator when slurped' do
420
+ rows.slurp
421
+ _(rows.each.class).must_equal Enumerator
422
+ end
423
+ end
424
+
425
+ describe 'Sheet#rows#map' do
426
+ let(:sheet) do
427
+ <<~XML
428
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
429
+ <dimension ref="A1:B3" />
430
+ <sheetData>
431
+ <row r="1">
432
+ <c r="A1" s="0">
433
+ <v>Header 1</v>
434
+ </c>
435
+ <c r="B1" s="0">
436
+ <v>Header 2</v>
437
+ </c>
438
+ </row>
439
+ <row r="2">
440
+ <c r="A2" s="0">
441
+ <v>Data 1-A</v>
442
+ </c>
443
+ <c r="B2" s="0">
444
+ <v>Data 1-B</v>
445
+ </c>
446
+ </row>
447
+ <row r="4">
448
+ <c r="A4" s="0">
449
+ <v>Data 2-A</v>
450
+ </c>
451
+ <c r="B4" s="0">
452
+ <v>Data 2-B</v>
453
+ </c>
454
+ </row>
455
+ </sheetData>
456
+ </worksheet>
457
+ XML
458
+ end
459
+
460
+ let(:rows) { reader.sheets[0].rows }
461
+
462
+ it 'does not slurp' do
463
+ _(rows.map(&:first)).must_equal(
464
+ ["Header 1", "Data 1-A", nil, "Data 2-A"]
465
+ )
466
+ _(rows.slurped?).must_equal false
467
+ end
468
+ end
469
+
470
+ describe 'Sheet#headers' do
471
+ let(:doc_sheet) { reader.sheets[0] }
472
+
473
+ it 'raises a RuntimeError if rows not slurped yet' do
474
+ _(-> { doc_sheet.headers }).must_raise(RuntimeError)
475
+ end
476
+
477
+ it 'returns first row if slurped' do
478
+ _(doc_sheet.tap(&:slurp).headers).must_equal(
479
+ ['Cell A', 'Cell B', 'Cell C']
480
+ )
481
+ end
482
+
483
+ it 'returns first row if auto_slurp' do
484
+ SimpleXlsxReader.configuration.auto_slurp = true
485
+
486
+ _(doc_sheet.headers).must_equal(
487
+ ['Cell A', 'Cell B', 'Cell C']
488
+ )
489
+
490
+ SimpleXlsxReader.configuration.auto_slurp = false
491
+ end
492
+ end
493
+
494
+ describe SimpleXlsxReader::Loader do
495
+ let(:described_class) { SimpleXlsxReader::Loader }
29
496
 
30
497
  describe '::cast' do
31
498
  it 'reads type s as a shared string' do
32
- described_class.cast('1', 's', nil, :shared_strings => ['a', 'b', 'c']).
33
- must_equal 'b'
499
+ _(described_class.cast('1', 's', nil, shared_strings: %w[a b c]))
500
+ .must_equal 'b'
34
501
  end
35
502
 
36
503
  it 'reads type inlineStr as a string' do
37
- described_class.cast('the value', nil, 'inlineStr').
38
- must_equal 'the value'
504
+ _(described_class.cast('the value', nil, 'inlineStr'))
505
+ .must_equal 'the value'
39
506
  end
40
507
 
41
508
  it 'reads date styles' do
42
- described_class.cast('41505', nil, :date).
43
- must_equal Date.parse('2013-08-19')
509
+ _(described_class.cast('41505', nil, :date))
510
+ .must_equal Date.parse('2013-08-19')
44
511
  end
45
512
 
46
513
  it 'reads time styles' do
47
- described_class.cast('41505.77083', nil, :time).
48
- must_equal Time.parse('2013-08-19 18:30 UTC')
514
+ _(described_class.cast('41505.77083', nil, :time))
515
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
49
516
  end
50
517
 
51
518
  it 'reads date_time styles' do
52
- described_class.cast('41505.77083', nil, :date_time).
53
- must_equal Time.parse('2013-08-19 18:30 UTC')
519
+ _(described_class.cast('41505.77083', nil, :date_time))
520
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
54
521
  end
55
522
 
56
523
  it 'reads number types styled as dates' do
57
- described_class.cast('41505', 'n', :date).
58
- must_equal Date.parse('2013-08-19')
524
+ _(described_class.cast('41505', 'n', :date))
525
+ .must_equal Date.parse('2013-08-19')
59
526
  end
60
527
 
61
528
  it 'reads number types styled as times' do
62
- described_class.cast('41505.77083', 'n', :time).
63
- must_equal Time.parse('2013-08-19 18:30 UTC')
529
+ _(described_class.cast('41505.77083', 'n', :time))
530
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
531
+ end
532
+
533
+ it 'reads less-than-zero complex number types styled as times' do
534
+ _(described_class.cast('6.25E-2', 'n', :time))
535
+ .must_equal Time.parse('1899-12-30 01:30:00 UTC')
64
536
  end
65
537
 
66
538
  it 'reads number types styled as date_times' do
67
- described_class.cast('41505.77083', 'n', :date_time).
68
- must_equal Time.parse('2013-08-19 18:30 UTC')
539
+ _(described_class.cast('41505.77083', 'n', :date_time))
540
+ .must_equal Time.parse('2013-08-19 18:30 UTC')
541
+ end
542
+
543
+ it 'raises when date-styled values are not numerical' do
544
+ _(-> { described_class.cast('14 is not a valid date', nil, :date) })
545
+ .must_raise(ArgumentError)
546
+ end
547
+
548
+ describe 'with the url option' do
549
+ let(:url) { 'http://www.example.com/hyperlink' }
550
+ it 'creates a hyperlink with a string type' do
551
+ _(described_class.cast('A link', 'str', :string, url: url))
552
+ .must_equal SXR::Hyperlink.new(url, 'A link')
553
+ end
554
+
555
+ it 'creates a hyperlink with a shared string type' do
556
+ _(described_class.cast('2', 's', nil, shared_strings: %w[a b c], url: url))
557
+ .must_equal SXR::Hyperlink.new(url, 'c')
558
+ end
69
559
  end
70
560
  end
71
561
 
72
- describe '#shared_strings' do
562
+ describe 'shared_strings' do
73
563
  let(:xml) do
74
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
75
- xml.shared_strings = Nokogiri::XML(File.read(
76
- File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
77
- end
564
+ File.open(File.join(File.dirname(__FILE__), 'shared_strings.xml'))
78
565
  end
79
566
 
80
- subject { described_class.new(xml) }
567
+ let(:ss) { SimpleXlsxReader::Loader::SharedStringsParser.parse(xml) }
81
568
 
82
569
  it 'parses strings formatted at the cell level' do
83
- subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
570
+ _(ss[0..2]).must_equal ['Cell A1', 'Cell B1', 'My Cell']
84
571
  end
85
572
 
86
573
  it 'parses strings formatted at the character level' do
87
- subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
574
+ _(ss[3..5]).must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
575
+ end
576
+
577
+ it 'parses looong strings containing unicode' do
578
+ _(ss[6]).must_include 'It only happens with both unicode *and* really long text.'
88
579
  end
89
580
  end
90
581
 
91
- describe '#style_types' do
92
- let(:xml) do
93
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
94
- xml.styles = Nokogiri::XML(File.read(
95
- File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
96
- end
582
+ describe 'style_types' do
583
+ let(:xml_file) do
584
+ File.open(File.join(File.dirname(__FILE__), 'styles.xml'))
97
585
  end
98
586
 
99
- let(:mapper) do
100
- SimpleXlsxReader::Document::Mapper.new(xml)
587
+ let(:parser) do
588
+ SimpleXlsxReader::Loader::StyleTypesParser.new(xml_file).tap(&:parse)
101
589
  end
102
590
 
103
591
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
- mapper.style_types[1].must_equal :date_time
105
- mapper.custom_style_types[164].must_equal :date_time
592
+ _(parser.style_types[1]).must_equal :date_time
593
+ _(parser.custom_style_types[164]).must_equal :date_time
106
594
  end
107
595
 
108
596
  # something I've seen in the wild; don't think it's correct, but let's be flexible.
109
597
  it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
110
- mapper.style_types[2].must_equal :date_time
111
- mapper.custom_style_types[59].must_equal :date_time
598
+ _(parser.style_types[2]).must_equal :date_time
599
+ _(parser.custom_style_types[59]).must_equal :date_time
112
600
  end
113
601
  end
114
602
 
115
603
  describe '#last_cell_label' do
116
-
117
- let(:generic_style) do
118
- Nokogiri::XML(
119
- <<-XML
120
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
121
- <cellXfs count="1">
122
- <xf numFmtId="0" />
123
- </cellXfs>
124
- </styleSheet>
125
- XML
126
- ).remove_namespaces!
127
- end
128
-
129
604
  # Note, this is not a valid sheet, since the last cell is actually D1 but
130
605
  # the dimension specifies C1. This is just for testing.
131
606
  let(:sheet) do
@@ -151,305 +626,345 @@ describe SimpleXlsxReader do
151
626
  ).remove_namespaces!
152
627
  end
153
628
 
154
- let(:empty_sheet) do
155
- Nokogiri::XML(
156
- <<-XML
157
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
158
- <dimension ref="A1" />
159
- <sheetData>
160
- </sheetData>
161
- </worksheet>
162
- XML
163
- ).remove_namespaces!
164
- end
165
-
166
- let(:xml) do
167
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
168
- xml.sheets = [sheet]
169
- xml.styles = generic_style
629
+ let(:loader) do
630
+ SimpleXlsxReader::Loader.new(nil).tap do |l|
631
+ l.shared_strings = []
632
+ l.sheet_toc = { 'Sheet1': 0 }
633
+ l.style_types = []
634
+ l.base_date = SimpleXlsxReader::DATE_SYSTEM_1900
170
635
  end
171
636
  end
172
637
 
173
- subject { described_class.new(xml) }
638
+ let(:sheet_parser) do
639
+ tempfile = Tempfile.new(['sheet', '.xml'])
640
+ tempfile.write(sheet)
641
+ tempfile.rewind
642
+
643
+ SimpleXlsxReader::Loader::SheetParser.new(
644
+ file_io: tempfile,
645
+ loader: loader
646
+ ).tap { |parser| parser.parse {} }
647
+ end
174
648
 
175
649
  it 'uses /worksheet/dimension if available' do
176
- subject.last_cell_label(sheet).must_equal 'C1'
650
+ _(sheet_parser.last_cell_letter).must_equal 'C'
177
651
  end
178
652
 
179
653
  it 'uses the last header cell if /worksheet/dimension is missing' do
180
- sheet.xpath('/worksheet/dimension').remove
181
- subject.last_cell_label(sheet).must_equal 'D1'
654
+ sheet.at_xpath('/worksheet/dimension').remove
655
+ _(sheet_parser.last_cell_letter).must_equal 'D'
182
656
  end
183
657
 
184
658
  it 'returns "A1" if the dimension is just one cell' do
185
- subject.last_cell_label(empty_sheet).must_equal 'A1'
659
+ sheet.xpath('/worksheet/sheetData/row').remove
660
+ sheet.xpath('/worksheet/dimension').attr('ref', 'A1')
661
+ _(sheet_parser.last_cell_letter).must_equal 'A'
186
662
  end
187
663
 
188
- it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
189
- sheet.at_xpath('/worksheet/dimension').remove
190
- subject.last_cell_label(empty_sheet).must_equal 'A1'
664
+ it 'returns nil if the sheet is just one cell, but /worksheet/dimension is missing' do
665
+ sheet.xpath('/worksheet/sheetData/row').remove
666
+ sheet.xpath('/worksheet/dimension').remove
667
+ _(sheet_parser.last_cell_letter).must_be_nil
191
668
  end
192
669
  end
193
670
 
194
671
  describe '#column_letter_to_number' do
195
- let(:subject) { described_class.new }
196
-
197
- [ ['A', 1 ],
198
- ['B', 2 ],
199
- ['Z', 26 ],
200
- ['AA', 27 ],
201
- ['AB', 28 ],
202
- ['AZ', 52 ],
203
- ['BA', 53 ],
204
- ['BZ', 78 ],
205
- ['ZZ', 702 ],
206
- ['AAA', 703 ],
207
- ['AAZ', 728 ],
208
- ['ABA', 729 ],
209
- ['ABZ', 754 ],
210
- ['AZZ', 1378 ],
211
- ['ZZZ', 18278] ].each do |(letter, number)|
672
+ let(:subject) { SXR::Loader::SheetParser.new(file_io: nil, loader: nil) }
673
+
674
+ [
675
+ ['A', 1],
676
+ ['B', 2],
677
+ ['Z', 26],
678
+ ['AA', 27],
679
+ ['AB', 28],
680
+ ['AZ', 52],
681
+ ['BA', 53],
682
+ ['BZ', 78],
683
+ ['ZZ', 702],
684
+ ['AAA', 703],
685
+ ['AAZ', 728],
686
+ ['ABA', 729],
687
+ ['ABZ', 754],
688
+ ['AZZ', 1378],
689
+ ['ZZZ', 18_278]
690
+ ].each do |(letter, number)|
212
691
  it "converts #{letter} to #{number}" do
213
- subject.column_letter_to_number(letter).must_equal number
692
+ _(subject.column_letter_to_number(letter)).must_equal number
214
693
  end
215
694
  end
216
695
  end
696
+ end
217
697
 
218
- describe "parse errors" do
219
- after do
220
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
221
- end
698
+ describe 'parse errors' do
699
+ after do
700
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
701
+ end
222
702
 
223
- let(:xml) do
224
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
225
- xml.sheets = [Nokogiri::XML(
226
- <<-XML
227
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
228
- <dimension ref="A1:A1" />
229
- <sheetData>
230
- <row>
231
- <c r='A1' s='0'>
232
- <v>14 is a date style; this is not a date</v>
233
- </c>
234
- </row>
235
- </sheetData>
236
- </worksheet>
237
- XML
238
- ).remove_namespaces!]
239
-
240
- # s='0' above refers to the value of numFmtId at cellXfs index 0
241
- xml.styles = Nokogiri::XML(
242
- <<-XML
243
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
244
- <cellXfs count="1">
245
- <xf numFmtId="14" />
246
- </cellXfs>
247
- </styleSheet>
248
- XML
249
- ).remove_namespaces!
250
- end
251
- end
703
+ let(:sheet) do
704
+ Nokogiri::XML(
705
+ <<-XML
706
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
707
+ <dimension ref="A1:A1" />
708
+ <sheetData>
709
+ <row>
710
+ <c r='A1' s='0'>
711
+ <v>14 is a date style; this is not a date</v>
712
+ </c>
713
+ </row>
714
+ </sheetData>
715
+ </worksheet>
716
+ XML
717
+ ).remove_namespaces!
718
+ end
252
719
 
253
- it 'raises if configuration.catch_cell_load_errors' do
254
- SimpleXlsxReader.configuration.catch_cell_load_errors = false
720
+ let(:styles) do
721
+ # s='0' above refers to the value of numFmtId at cellXfs index 0
722
+ Nokogiri::XML(
723
+ <<-XML
724
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
725
+ <cellXfs count="1">
726
+ <xf numFmtId="14" />
727
+ </cellXfs>
728
+ </styleSheet>
729
+ XML
730
+ ).remove_namespaces!
731
+ end
255
732
 
256
- lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
257
- must_raise(SimpleXlsxReader::CellLoadError)
258
- end
733
+ it 'raises if configuration.catch_cell_load_errors' do
734
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
259
735
 
260
- it 'records a load error if not configuration.catch_cell_load_errors' do
261
- SimpleXlsxReader.configuration.catch_cell_load_errors = true
736
+ _(-> { SimpleXlsxReader.open(xlsx.archive.path).to_hash })
737
+ .must_raise(SimpleXlsxReader::CellLoadError)
738
+ end
262
739
 
263
- sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
264
- sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
265
- end
740
+ it 'records a load error if not configuration.catch_cell_load_errors' do
741
+ SimpleXlsxReader.configuration.catch_cell_load_errors = true
742
+
743
+ sheet = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].tap(&:slurp)
744
+ _(sheet.load_errors).must_equal(
745
+ [0, 0] => 'invalid value for Float(): "14 is a date style; this is not a date"'
746
+ )
266
747
  end
748
+ end
267
749
 
268
- describe "missing numFmtId attributes" do
750
+ describe 'missing numFmtId attributes' do
751
+ let(:sheet) do
752
+ Nokogiri::XML(
753
+ <<-XML
754
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
755
+ <dimension ref="A1:A1" />
756
+ <sheetData>
757
+ <row>
758
+ <c r='A1' s='s'>
759
+ <v>some content</v>
760
+ </c>
761
+ </row>
762
+ </sheetData>
763
+ </worksheet>
764
+ XML
765
+ ).remove_namespaces!
766
+ end
269
767
 
270
- let(:xml) do
271
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
272
- xml.sheets = [Nokogiri::XML(
273
- <<-XML
274
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
275
- <dimension ref="A1:A1" />
276
- <sheetData>
277
- <row>
278
- <c r='A1' s='s'>
279
- <v>some content</v>
280
- </c>
281
- </row>
282
- </sheetData>
283
- </worksheet>
284
- XML
285
- ).remove_namespaces!]
286
-
287
- xml.styles = Nokogiri::XML(
288
- <<-XML
289
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
290
-
291
- </styleSheet>
292
- XML
293
- ).remove_namespaces!
294
- end
295
- end
768
+ let(:styles) do
769
+ Nokogiri::XML(
770
+ <<-XML
771
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
296
772
 
297
- before do
298
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
299
- end
773
+ </styleSheet>
774
+ XML
775
+ ).remove_namespaces!
776
+ end
300
777
 
301
- it 'continues even when cells are missing numFmtId attributes ' do
302
- @row[0].must_equal 'some content'
303
- end
778
+ before do
779
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
780
+ end
304
781
 
782
+ it 'continues even when cells are missing numFmtId attributes ' do
783
+ _(@row[0]).must_equal 'some content'
305
784
  end
785
+ end
306
786
 
307
- describe 'parsing types' do
308
- let(:xml) do
309
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
310
- xml.sheets = [Nokogiri::XML(
311
- <<-XML
312
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
313
- <dimension ref="A1:G1" />
314
- <sheetData>
315
- <row>
316
- <c r='A1' s='0'>
317
- <v>Cell A1</v>
318
- </c>
319
-
320
- <c r='C1' s='1'>
321
- <v>2.4</v>
322
- </c>
323
- <c r='D1' s='1' />
324
-
325
- <c r='E1' s='2'>
326
- <v>30687</v>
327
- </c>
328
- <c r='F1' s='2' />
329
-
330
- <c r='G1' t='inlineStr' s='0'>
331
- <is><t>Cell G1</t></is>
332
- </c>
333
- </row>
334
- </sheetData>
335
- </worksheet>
336
- XML
337
- ).remove_namespaces!]
338
-
339
- # s='0' above refers to the value of numFmtId at cellXfs index 0,
340
- # which is in this case 'General' type
341
- xml.styles = Nokogiri::XML(
342
- <<-XML
343
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
344
- <cellXfs count="1">
345
- <xf numFmtId="0" />
346
- <xf numFmtId="2" />
347
- <xf numFmtId="14" />
348
- </cellXfs>
349
- </styleSheet>
350
- XML
351
- ).remove_namespaces!
352
- end
353
- end
787
+ describe 'parsing types' do
788
+ let(:sheet) do
789
+ Nokogiri::XML(
790
+ <<-XML
791
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
792
+ <dimension ref="A1:G1" />
793
+ <sheetData>
794
+ <row>
795
+ <c r='A1' s='0'>
796
+ <v>Cell A1</v>
797
+ </c>
354
798
 
355
- before do
356
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
357
- end
799
+ <c r='C1' s='1'>
800
+ <v>2.4</v>
801
+ </c>
802
+ <c r='D1' s='1' />
358
803
 
359
- it "reads 'Generic' cells as strings" do
360
- @row[0].must_equal "Cell A1"
361
- end
804
+ <c r='E1' s='2'>
805
+ <v>30687</v>
806
+ </c>
807
+ <c r='F1' s='2' />
362
808
 
363
- it "reads empty 'Generic' cells as nil" do
364
- @row[1].must_equal nil
365
- end
809
+ <c r='G1' t='inlineStr' s='0'>
810
+ <is><t>Cell G1</t></is>
811
+ </c>
366
812
 
367
- # We could expand on these type tests, but really just a couple
368
- # demonstrate that it's wired together. Type-specific tests should go
369
- # on #cast
813
+ <c r='H1' s='0'>
814
+ <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
815
+ <v>HYPERLINK function</v>
816
+ </c>
370
817
 
371
- it "reads floats" do
372
- @row[2].must_equal 2.4
373
- end
818
+ <c r='I1' s='0'>
819
+ <v>GUI-made hyperlink</v>
820
+ </c>
821
+ </row>
822
+ </sheetData>
374
823
 
375
- it "reads empty floats as nil" do
376
- @row[3].must_equal nil
377
- end
824
+ <hyperlinks>
825
+ <hyperlink ref="I1" id="rId1"/>
826
+ </hyperlinks>
827
+ </worksheet>
828
+ XML
829
+ ).remove_namespaces!
830
+ end
378
831
 
379
- it "reads dates" do
380
- @row[4].must_equal Date.parse('Jan 6, 1984')
381
- end
832
+ let(:styles) do
833
+ # s='0' above refers to the value of numFmtId at cellXfs index 0,
834
+ # which is in this case 'General' type
835
+ Nokogiri::XML(
836
+ <<-XML
837
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
838
+ <cellXfs count="1">
839
+ <xf numFmtId="0" />
840
+ <xf numFmtId="2" />
841
+ <xf numFmtId="14" />
842
+ </cellXfs>
843
+ </styleSheet>
844
+ XML
845
+ ).remove_namespaces!
846
+ end
382
847
 
383
- it "reads empty date cells as nil" do
384
- @row[5].must_equal nil
385
- end
848
+ # Although not a "type" or "style" according to xlsx spec,
849
+ # it sure could/should be, so let's test it with the rest of our
850
+ # typecasting code.
851
+ let(:rels) do
852
+ [
853
+ Nokogiri::XML(
854
+ <<-XML
855
+ <Relationships>
856
+ <Relationship
857
+ Id="rId1"
858
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
859
+ Target="http://www.example.com/hyperlink-gui"
860
+ TargetMode="External"
861
+ />
862
+ </Relationships>
863
+ XML
864
+ ).remove_namespaces!
865
+ ]
866
+ end
386
867
 
387
- it "reads strings formatted as inlineStr" do
388
- @row[6].must_equal 'Cell G1'
389
- end
868
+ before do
869
+ @row = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a[0]
390
870
  end
391
871
 
392
- describe 'parsing documents with blank rows' do
393
- let(:xml) do
394
- SimpleXlsxReader::Document::Xml.new.tap do |xml|
395
- xml.sheets = [Nokogiri::XML(
396
- <<-XML
397
- <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
398
- <dimension ref="A1:D7" />
399
- <sheetData>
400
- <row r="2" spans="1:1">
401
- <c r="A2" s="0">
402
- <v>0</v>
403
- </c>
404
- </row>
405
- <row r="4" spans="1:1">
406
- <c r="B4" s="0">
407
- <v>1</v>
408
- </c>
409
- </row>
410
- <row r="5" spans="1:1">
411
- <c r="C5" s="0">
412
- <v>2</v>
413
- </c>
414
- </row>
415
- <row r="7" spans="1:1">
416
- <c r="D7" s="0">
417
- <v>3</v>
418
- </c>
419
- </row>
420
- </sheetData>
421
- </worksheet>
422
- XML
423
- ).remove_namespaces!]
424
-
425
- xml.styles = Nokogiri::XML(
426
- <<-XML
427
- <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
428
- <cellXfs count="1">
429
- <xf numFmtId="0" />
430
- </cellXfs>
431
- </styleSheet>
432
- XML
433
- ).remove_namespaces!
434
- end
435
- end
872
+ it "reads 'Generic' cells as strings" do
873
+ _(@row[0]).must_equal 'Cell A1'
874
+ end
436
875
 
437
- before do
438
- @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
439
- end
876
+ it "reads empty 'Generic' cells as nil" do
877
+ _(@row[1]).must_be_nil
878
+ end
440
879
 
441
- it "reads row data despite gaps in row numbering" do
442
- @rows.must_equal [
443
- [nil,nil,nil,nil],
444
- ["0",nil,nil,nil],
445
- [nil,nil,nil,nil],
446
- [nil,"1",nil,nil],
447
- [nil,nil,"2",nil],
448
- [nil,nil,nil,nil],
449
- [nil,nil,nil,"3"]
450
- ]
451
- end
880
+ # We could expand on these type tests, but really just a couple
881
+ # demonstrate that it's wired together. Type-specific tests should go
882
+ # on #cast
883
+
884
+ it 'reads floats' do
885
+ _(@row[2]).must_equal 2.4
886
+ end
887
+
888
+ it 'reads empty floats as nil' do
889
+ _(@row[3]).must_be_nil
890
+ end
891
+
892
+ it 'reads dates' do
893
+ _(@row[4]).must_equal Date.parse('Jan 6, 1984')
894
+ end
895
+
896
+ it 'reads empty date cells as nil' do
897
+ _(@row[5]).must_be_nil
452
898
  end
453
899
 
900
+ it 'reads strings formatted as inlineStr' do
901
+ _(@row[6]).must_equal 'Cell G1'
902
+ end
903
+
904
+ it 'reads hyperlinks created via HYPERLINK()' do
905
+ _(@row[7]).must_equal(
906
+ SXR::Hyperlink.new(
907
+ 'http://www.example.com/hyperlink-function', 'HYPERLINK function'
908
+ )
909
+ )
910
+ end
911
+
912
+ it 'reads hyperlinks created via the GUI' do
913
+ _(@row[8]).must_equal(
914
+ SXR::Hyperlink.new(
915
+ 'http://www.example.com/hyperlink-gui', 'GUI-made hyperlink'
916
+ )
917
+ )
918
+ end
919
+ end
920
+
921
+ describe 'parsing documents with blank rows' do
922
+ let(:sheet) do
923
+ Nokogiri::XML(
924
+ <<-XML
925
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
926
+ <dimension ref="A1:D7" />
927
+ <sheetData>
928
+ <row r="2" spans="1:1">
929
+ <c r="A2" s="0">
930
+ <v>0</v>
931
+ </c>
932
+ </row>
933
+ <row r="4" spans="1:1">
934
+ <c r="B4" s="0">
935
+ <v>1</v>
936
+ </c>
937
+ </row>
938
+ <row r="5" spans="1:1">
939
+ <c r="C5" s="0">
940
+ <v>2</v>
941
+ </c>
942
+ </row>
943
+ <row r="7" spans="1:1">
944
+ <c r="D7" s="0">
945
+ <v>3</v>
946
+ </c>
947
+ </row>
948
+ </sheetData>
949
+ </worksheet>
950
+ XML
951
+ ).remove_namespaces!
952
+ end
953
+
954
+ before do
955
+ @rows = SimpleXlsxReader.open(xlsx.archive.path).sheets[0].rows.to_a
956
+ end
957
+
958
+ it 'reads row data despite gaps in row numbering' do
959
+ _(@rows).must_equal [
960
+ [nil, nil, nil, nil],
961
+ ['0', nil, nil, nil],
962
+ [nil, nil, nil, nil],
963
+ [nil, '1', nil, nil],
964
+ [nil, nil, '2', nil],
965
+ [nil, nil, nil, nil],
966
+ [nil, nil, nil, '3']
967
+ ]
968
+ end
454
969
  end
455
970
  end