simple_xlsx_reader 1.0.1 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +8 -0
- data/CHANGELOG.md +23 -0
- data/README.md +13 -6
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/lib/simple_xlsx_reader.rb +137 -46
- data/simple_xlsx_reader.gemspec +4 -2
- data/test/date1904_test.rb +1 -1
- data/test/datetime_test.rb +3 -2
- data/test/datetimes.xlsx +0 -0
- data/test/gdocs_sheet.xlsx +0 -0
- data/test/gdocs_sheet_test.rb +15 -0
- data/test/lower_case_sharedstrings.xlsx +0 -0
- data/test/lower_case_sharedstrings_test.rb +15 -0
- data/test/performance_test.rb +3 -3
- data/test/sesame_street_blog.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +176 -25
- data/test/styles.xml +4 -2
- data/test/test_helper.rb +1 -0
- metadata +29 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e2b04473235c5ed2c2764f62a627fa6f16816c36e0fcff3497be229f8666a0f7
|
4
|
+
data.tar.gz: 9367b0082f31e9cb208d9f97ed6cb67d5276a459562809460694602339dfdaad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd42f7a0b8830a2f01703dca10ae779b973566ad25e3b74d31dc3693977fa5b2b3442e47bc1a3b50723bae3bb9f31facd923f1eaba06b51cc8b927e7fb207cf3
|
7
|
+
data.tar.gz: 38ecb026b0ad5a1985d88349a839a9d2972f85596504e6f300686f9751169a3c8d62582e79119106085a9cadc066517206da117993c3a30f48a5a0c58f256b4c
|
data/.travis.yml
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
### 1.0.5
|
2
|
+
|
3
|
+
* Support string or io input via `SimpleXlsxReader#parse` (@kalsan, @til)
|
4
|
+
|
5
|
+
### 1.0.4
|
6
|
+
|
7
|
+
* Fix Windows + RubyZip 1.2.1 bug preventing files from being read
|
8
|
+
* Add ability to parse hyperlinks
|
9
|
+
* Support files exported from Google Docs (@Strnadj)
|
10
|
+
|
11
|
+
### 1.0.3
|
12
|
+
|
13
|
+
Broken on Ruby 1.9; yanked.
|
14
|
+
|
15
|
+
### 1.0.2
|
16
|
+
|
17
|
+
* Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
|
18
|
+
* Better support for non-excel-generated xlsx files [bwlang]
|
19
|
+
* You don't always have a numFmtId column, and that's OK
|
20
|
+
* Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
|
21
|
+
* Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
|
22
|
+
* Be more flexible with custom formats using a numFmtId < 164
|
23
|
+
|
1
24
|
### 1.0.1
|
2
25
|
|
3
26
|
* Add support for the 1904 date system [zilverline]
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# SimpleXlsxReader
|
1
|
+
# SimpleXlsxReader [![Build Status](https://travis-ci.org/woahdae/simple_xlsx_reader.svg?branch=master)](https://travis-ci.org/woahdae/simple_xlsx_reader)
|
2
2
|
|
3
3
|
An xlsx reader for Ruby that parses xlsx cell values into plain ruby
|
4
4
|
primitives and dates/times.
|
@@ -35,14 +35,21 @@ Here's the totality of the public api, in code:
|
|
35
35
|
|
36
36
|
module SimpleXlsxReader
|
37
37
|
def self.open(file_path)
|
38
|
-
Document.new(file_path).tap(&:sheets)
|
38
|
+
Document.new(file_path: file_path).tap(&:sheets)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse(string_or_io)
|
42
|
+
Document.new(string_or_io: string_or_io).tap(&:sheets)
|
39
43
|
end
|
40
44
|
|
41
45
|
class Document
|
42
|
-
attr_reader :
|
46
|
+
attr_reader :string_or_io
|
47
|
+
|
48
|
+
def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
|
49
|
+
((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
|
50
|
+
fail(ArgumentError, 'either file_path or string_or_io must be provided')
|
43
51
|
|
44
|
-
|
45
|
-
@file_path = file_path
|
52
|
+
@string_or_io = string_or_io || File.new(file_path || legacy_file_path)
|
46
53
|
end
|
47
54
|
|
48
55
|
def sheets
|
@@ -54,7 +61,7 @@ Here's the totality of the public api, in code:
|
|
54
61
|
end
|
55
62
|
|
56
63
|
def xml
|
57
|
-
Xml.load(
|
64
|
+
Xml.load(string_or_io)
|
58
65
|
end
|
59
66
|
|
60
67
|
class Sheet < Struct.new(:name, :rows)
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -19,6 +19,33 @@ end
|
|
19
19
|
module SimpleXlsxReader
|
20
20
|
class CellLoadError < StandardError; end
|
21
21
|
|
22
|
+
# We support hyperlinks as a "type" even though they're technically
|
23
|
+
# represented either as a function or an external reference in the xlsx spec.
|
24
|
+
#
|
25
|
+
# Since having hyperlink data in our sheet usually means we might want to do
|
26
|
+
# something primarily with the URL (store it in the database, download it, etc),
|
27
|
+
# we go through extra effort to parse the function or follow the reference
|
28
|
+
# to represent the hyperlink primarily as a URL. However, maybe we do want
|
29
|
+
# the hyperlink "friendly name" part (as MS calls it), so here we've subclassed
|
30
|
+
# string to tack on the friendly name. This means 80% of us that just want
|
31
|
+
# the URL value will have to do nothing extra, but the 20% that might want the
|
32
|
+
# friendly name can access it.
|
33
|
+
#
|
34
|
+
# Note, by default, the value we would get by just asking the cell would
|
35
|
+
# be the "friendly name" and *not* the URL, which is tucked away in the
|
36
|
+
# function definition or a separate "relationships" meta-document.
|
37
|
+
#
|
38
|
+
# See MS documentation on the HYPERLINK function for some background:
|
39
|
+
# https://support.office.com/en-us/article/HYPERLINK-function-333c7ce6-c5ae-4164-9c47-7de9b76f577f
|
40
|
+
class Hyperlink < String
|
41
|
+
attr_reader :friendly_name
|
42
|
+
|
43
|
+
def initialize(url, friendly_name = nil)
|
44
|
+
@friendly_name = friendly_name
|
45
|
+
super(url)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
22
49
|
def self.configuration
|
23
50
|
@configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
|
24
51
|
c.catch_cell_load_errors = false
|
@@ -26,14 +53,21 @@ module SimpleXlsxReader
|
|
26
53
|
end
|
27
54
|
|
28
55
|
def self.open(file_path)
|
29
|
-
Document.new(file_path).tap(&:sheets)
|
56
|
+
Document.new(file_path: file_path).tap(&:sheets)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.parse(string_or_io)
|
60
|
+
Document.new(string_or_io: string_or_io).tap(&:sheets)
|
30
61
|
end
|
31
62
|
|
32
63
|
class Document
|
33
|
-
attr_reader :
|
64
|
+
attr_reader :string_or_io
|
65
|
+
|
66
|
+
def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
|
67
|
+
((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
|
68
|
+
fail(ArgumentError, 'either file_path or string_or_io must be provided')
|
34
69
|
|
35
|
-
|
36
|
-
@file_path = file_path
|
70
|
+
@string_or_io = string_or_io || File.new(file_path || legacy_file_path)
|
37
71
|
end
|
38
72
|
|
39
73
|
def sheets
|
@@ -45,7 +79,7 @@ module SimpleXlsxReader
|
|
45
79
|
end
|
46
80
|
|
47
81
|
def xml
|
48
|
-
Xml.load(
|
82
|
+
Xml.load(string_or_io)
|
49
83
|
end
|
50
84
|
|
51
85
|
class Sheet < Struct.new(:name, :rows)
|
@@ -69,28 +103,54 @@ module SimpleXlsxReader
|
|
69
103
|
##
|
70
104
|
# For internal use; stores source xml in nokogiri documents
|
71
105
|
class Xml
|
72
|
-
attr_accessor :workbook, :shared_strings, :sheets, :styles
|
106
|
+
attr_accessor :workbook, :shared_strings, :sheets, :sheet_rels, :styles
|
73
107
|
|
74
|
-
def self.load(
|
108
|
+
def self.load(string_or_io)
|
75
109
|
self.new.tap do |xml|
|
76
|
-
SimpleXlsxReader::Zip.
|
77
|
-
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
|
78
|
-
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
|
79
|
-
|
80
|
-
# optional feature used by excel, but not often used by xlsx
|
81
|
-
# generation libraries
|
82
|
-
if zip.file.file?('xl/sharedStrings.xml')
|
83
|
-
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
|
84
|
-
end
|
85
|
-
|
110
|
+
SimpleXlsxReader::Zip.open_buffer(string_or_io) do |zip|
|
86
111
|
xml.sheets = []
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
112
|
+
xml.sheet_rels = []
|
113
|
+
|
114
|
+
# This weird style of enumerating over the entries lets us
|
115
|
+
# concisely assign entries in a case insensitive and
|
116
|
+
# slash insensitive ('/' vs '\') manner.
|
117
|
+
#
|
118
|
+
# RubyZip used to normalize the slashes, but doesn't now:
|
119
|
+
# https://github.com/rubyzip/rubyzip/issues/324
|
120
|
+
zip.entries.each do |entry|
|
121
|
+
if entry.name.match(/^xl.workbook\.xml$/) # xl/workbook.xml
|
122
|
+
xml.workbook = Nokogiri::XML(zip.read(entry)).remove_namespaces!
|
123
|
+
elsif entry.name.match(/^xl.styles\.xml$/) # xl/styles.xml
|
124
|
+
xml.styles = Nokogiri::XML(zip.read(entry)).remove_namespaces!
|
125
|
+
elsif entry.name.match(/^xl.sharedStrings\.xml$/i) # xl/sharedStrings.xml
|
126
|
+
# optional feature used by excel, but not often used by xlsx
|
127
|
+
# generation libraries. Path name is sometimes lowercase, too.
|
128
|
+
xml.shared_strings = Nokogiri::XML(zip.read(entry)).remove_namespaces!
|
129
|
+
elsif match = entry.name.match(/^xl.worksheets.sheet([0-9]*)\.xml$/)
|
130
|
+
sheet_number = match.captures.first.to_i
|
131
|
+
xml.sheets[sheet_number] =
|
132
|
+
Nokogiri::XML(zip.read(entry)).remove_namespaces!
|
133
|
+
elsif match = entry.name.match(/^xl.worksheets._rels.sheet([0-9]*)\.xml\.rels$/)
|
134
|
+
sheet_number = match.captures.first.to_i
|
135
|
+
xml.sheet_rels[sheet_number] =
|
136
|
+
Nokogiri::XML(zip.read(entry)).remove_namespaces!
|
137
|
+
end
|
138
|
+
end
|
91
139
|
|
92
|
-
|
93
|
-
|
140
|
+
# Sometimes there's a zero-index sheet.xml, ex.
|
141
|
+
# Google Docs creates:
|
142
|
+
#
|
143
|
+
# xl/worksheets/sheet.xml
|
144
|
+
# xl/worksheets/sheet1.xml
|
145
|
+
# xl/worksheets/sheet2.xml
|
146
|
+
# While Excel creates:
|
147
|
+
# xl/worksheets/sheet1.xml
|
148
|
+
# xl/worksheets/sheet2.xml
|
149
|
+
#
|
150
|
+
# So, for the latter case, let's shift [null, <Sheet 1>, <Sheet 2>]
|
151
|
+
if !xml.sheets[0]
|
152
|
+
xml.sheets.shift
|
153
|
+
xml.sheet_rels.shift
|
94
154
|
end
|
95
155
|
end
|
96
156
|
end
|
@@ -105,7 +165,7 @@ module SimpleXlsxReader
|
|
105
165
|
|
106
166
|
def load_sheets
|
107
167
|
sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
|
108
|
-
parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
|
168
|
+
parse_sheet(sheet_name, xml.sheets[i], xml.sheet_rels[i]) # sheet_number is *not* the index into xml.sheets
|
109
169
|
end
|
110
170
|
end
|
111
171
|
|
@@ -121,9 +181,10 @@ module SimpleXlsxReader
|
|
121
181
|
end
|
122
182
|
end
|
123
183
|
|
124
|
-
def parse_sheet(sheet_name, xsheet)
|
184
|
+
def parse_sheet(sheet_name, xsheet, xrels)
|
125
185
|
sheet = Sheet.new(sheet_name)
|
126
186
|
sheet_width, sheet_height = *sheet_dimensions(xsheet)
|
187
|
+
cells_w_links = xsheet.xpath('//hyperlinks/hyperlink').inject({}) {|acc, e| acc[e.attr(:ref)] = e.attr(:id); acc}
|
127
188
|
|
128
189
|
sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
|
129
190
|
xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
|
@@ -148,10 +209,21 @@ module SimpleXlsxReader
|
|
148
209
|
# by about 60%. Odd.
|
149
210
|
xvalue = type == 'inlineStr' ?
|
150
211
|
(xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
|
151
|
-
xcell.children.find {|c| c.name == 'v'}
|
212
|
+
xcell.children.find {|c| c.name == 'f' && c.text.start_with?('HYPERLINK(') || c.name == 'v'}
|
213
|
+
|
214
|
+
if xvalue
|
215
|
+
value = xvalue.text.strip
|
216
|
+
|
217
|
+
if rel_id = cells_w_links[xcell.attr('r')] # a hyperlink made via GUI
|
218
|
+
url = xrels.at_xpath(%(//*[@Id="#{rel_id}"])).attr('Target')
|
219
|
+
elsif xvalue.name == 'f' # only time we have a function is if it's a hyperlink
|
220
|
+
url = value.slice(/HYPERLINK\("(.*?)"/, 1)
|
221
|
+
end
|
222
|
+
end
|
152
223
|
|
153
224
|
cell = begin
|
154
|
-
self.class.cast(
|
225
|
+
self.class.cast(value, type, style,
|
226
|
+
:url => url,
|
155
227
|
:shared_strings => shared_strings,
|
156
228
|
:base_date => base_date)
|
157
229
|
rescue => e
|
@@ -218,11 +290,13 @@ module SimpleXlsxReader
|
|
218
290
|
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
219
291
|
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
220
292
|
def column_letter_to_number(column_letter)
|
221
|
-
pow = -1
|
222
|
-
|
223
|
-
|
224
|
-
|
293
|
+
pow = column_letter.length - 1
|
294
|
+
result = 0
|
295
|
+
column_letter.each_byte do |b|
|
296
|
+
result += 26**pow * (b - 64)
|
297
|
+
pow -= 1
|
225
298
|
end
|
299
|
+
result
|
226
300
|
end
|
227
301
|
|
228
302
|
# Excel doesn't record types for some cells, only its display style, so
|
@@ -241,21 +315,32 @@ module SimpleXlsxReader
|
|
241
315
|
# type.
|
242
316
|
def style_types
|
243
317
|
@style_types ||=
|
244
|
-
|
245
|
-
|
318
|
+
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
319
|
+
style_type_by_num_fmt_id(num_fmt_id(xstyle))}
|
320
|
+
end
|
321
|
+
|
322
|
+
#returns the numFmtId value if it's available
|
323
|
+
def num_fmt_id(xstyle)
|
324
|
+
if xstyle.attributes['numFmtId']
|
325
|
+
xstyle.attributes['numFmtId'].value
|
326
|
+
else
|
327
|
+
nil
|
328
|
+
end
|
246
329
|
end
|
247
330
|
|
248
331
|
# Finds the type we think a style is; For example, fmtId 14 is a date
|
249
|
-
# style, so this would return :date
|
332
|
+
# style, so this would return :date.
|
333
|
+
#
|
334
|
+
# Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
|
335
|
+
# but in practice can sometimes be simply out of the usual "Any Language"
|
336
|
+
# id range that goes up to 49. For example, I have seen a numFmtId of
|
337
|
+
# 59 specified as a date. In Thai, 59 is a number format, so this seems
|
338
|
+
# like a bad idea, but we try to be flexible and just go with it.
|
250
339
|
def style_type_by_num_fmt_id(id)
|
251
340
|
return nil if id.nil?
|
252
341
|
|
253
342
|
id = id.to_i
|
254
|
-
|
255
|
-
custom_style_types[id]
|
256
|
-
else # we should know this one
|
257
|
-
NumFmtMap[id]
|
258
|
-
end
|
343
|
+
NumFmtMap[id] || custom_style_types[id]
|
259
344
|
end
|
260
345
|
|
261
346
|
# Map of (numFmtId >= 164) (custom styles) to our best guess at the type
|
@@ -314,7 +399,7 @@ module SimpleXlsxReader
|
|
314
399
|
type = style
|
315
400
|
end
|
316
401
|
|
317
|
-
case type
|
402
|
+
casted = case type
|
318
403
|
|
319
404
|
##
|
320
405
|
# There are few built-in types
|
@@ -347,15 +432,15 @@ module SimpleXlsxReader
|
|
347
432
|
# the trickiest. note that all these formats can vary on
|
348
433
|
# whether they actually contain a date, time, or datetime.
|
349
434
|
when :date, :time, :date_time
|
350
|
-
|
435
|
+
value = Float(value)
|
436
|
+
days_since_date_system_start = value.to_i
|
437
|
+
fraction_of_24 = value - days_since_date_system_start
|
351
438
|
|
352
439
|
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
353
|
-
date = options.fetch(:base_date, DATE_SYSTEM_1900) +
|
354
|
-
|
355
|
-
if fraction_of_24 # there is a time associated
|
356
|
-
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
357
|
-
seconds = (fraction_of_24 * 86400).round
|
440
|
+
date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
|
358
441
|
|
442
|
+
if fraction_of_24 > 0 # there is a time associated
|
443
|
+
seconds = (fraction_of_24 * 86400).round
|
359
444
|
return Time.utc(date.year, date.month, date.day) + seconds
|
360
445
|
else
|
361
446
|
return date
|
@@ -374,6 +459,12 @@ module SimpleXlsxReader
|
|
374
459
|
else
|
375
460
|
value
|
376
461
|
end
|
462
|
+
|
463
|
+
if options[:url]
|
464
|
+
Hyperlink.new(options[:url], casted)
|
465
|
+
else
|
466
|
+
casted
|
467
|
+
end
|
377
468
|
end
|
378
469
|
|
379
470
|
## Returns the base_date from which to calculate dates.
|
data/simple_xlsx_reader.gemspec
CHANGED
@@ -7,19 +7,21 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.name = "simple_xlsx_reader"
|
8
8
|
gem.version = SimpleXlsxReader::VERSION
|
9
9
|
gem.authors = ["Woody Peterson"]
|
10
|
-
gem.email = ["woody@
|
10
|
+
gem.email = ["woody.peterson@gmail.com"]
|
11
11
|
gem.description = %q{Read xlsx data the Ruby way}
|
12
12
|
gem.summary = %q{Read xlsx data the Ruby way}
|
13
13
|
gem.homepage = ""
|
14
|
+
gem.license = "MIT"
|
14
15
|
|
15
16
|
gem.add_dependency 'nokogiri'
|
16
17
|
gem.add_dependency 'rubyzip'
|
17
18
|
|
18
19
|
gem.add_development_dependency 'minitest', '>= 5.0'
|
20
|
+
gem.add_development_dependency 'rake'
|
19
21
|
gem.add_development_dependency 'pry'
|
20
22
|
|
21
23
|
gem.files = `git ls-files`.split($/)
|
22
24
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
23
|
-
gem.test_files = gem.files.grep(%r{^
|
25
|
+
gem.test_files = gem.files.grep(%r{^test/})
|
24
26
|
gem.require_paths = ["lib"]
|
25
27
|
end
|
data/test/date1904_test.rb
CHANGED
data/test/datetime_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
|
3
3
|
describe SimpleXlsxReader do
|
4
4
|
let(:datetimes_file) { File.join(File.dirname(__FILE__),
|
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
|
|
11
11
|
"Datetimes" =>
|
12
12
|
[[Time.parse("2013-08-19 18:29:59 UTC")],
|
13
13
|
[Time.parse("2013-08-19 18:30:00 UTC")],
|
14
|
-
[Time.parse("2013-08-19 18:30:01 UTC")]
|
14
|
+
[Time.parse("2013-08-19 18:30:01 UTC")],
|
15
|
+
[Time.parse("1899-12-30 00:30:00 UTC")]]
|
15
16
|
})
|
16
17
|
end
|
17
18
|
|
data/test/datetimes.xlsx
CHANGED
Binary file
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
describe SimpleXlsxReader do
|
5
|
+
let(:one_sheet_file) { File.join(File.dirname(__FILE__), 'gdocs_sheet.xlsx') }
|
6
|
+
let(:subject) { SimpleXlsxReader::Document.new(one_sheet_file) }
|
7
|
+
|
8
|
+
it 'able to load file from google docs' do
|
9
|
+
subject.to_hash.must_equal({
|
10
|
+
"List 1" => [["Empty gdocs list 1"]],
|
11
|
+
"List 2" => [["Empty gdocs list 2"]]
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
describe SimpleXlsxReader do
|
4
|
+
let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
|
5
|
+
'lower_case_sharedstrings.xlsx') }
|
6
|
+
|
7
|
+
let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
|
8
|
+
|
9
|
+
|
10
|
+
describe '#to_hash' do
|
11
|
+
it 'should have the word Well in the first row' do
|
12
|
+
subject.sheets.first.rows[0].must_include('Well')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/performance_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
require 'minitest/benchmark'
|
3
3
|
|
4
4
|
describe 'SimpleXlsxReader Benchmark' do
|
@@ -96,13 +96,13 @@ describe 'SimpleXlsxReader Benchmark' do
|
|
96
96
|
bench_exp(1,10000)
|
97
97
|
end
|
98
98
|
|
99
|
-
bench_performance_linear 'parses sheets in linear time', 0.
|
99
|
+
bench_performance_linear 'parses sheets in linear time', 0.999 do |n|
|
100
100
|
|
101
101
|
raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
|
102
102
|
if @xml.sheets[n].nil?
|
103
103
|
|
104
104
|
sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
|
105
|
-
parse_sheet('test', @xml.sheets[n])
|
105
|
+
parse_sheet('test', @xml.sheets[n], nil)
|
106
106
|
|
107
107
|
raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
|
108
108
|
if sheet.rows.size != n + 1
|
Binary file
|
@@ -1,26 +1,66 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
require 'time'
|
3
3
|
|
4
|
+
SXR = SimpleXlsxReader
|
5
|
+
|
4
6
|
describe SimpleXlsxReader do
|
5
|
-
let(:
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
"
|
14
|
-
[
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
}
|
7
|
+
let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
|
8
|
+
let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
|
9
|
+
let(:expected_result) do
|
10
|
+
{
|
11
|
+
"Authors"=>
|
12
|
+
[["Name", "Occupation"],
|
13
|
+
["Big Bird", "Teacher"]],
|
14
|
+
"Posts"=>
|
15
|
+
[["Author Name", "Title", "Body", "Created At", "Comment Count", "URL"],
|
16
|
+
["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1, SXR::Hyperlink.new("http://www.example.com/hyperlink-function", "This uses the HYPERLINK() function")],
|
17
|
+
["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2, SXR::Hyperlink.new("http://www.example.com/hyperlink-gui", "This uses the hyperlink GUI option")],
|
18
|
+
["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0, nil],
|
19
|
+
["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil, nil]]
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
describe SimpleXlsxReader do
|
24
|
+
describe 'load from file path' do
|
25
|
+
let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
|
26
|
+
|
27
|
+
it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
|
28
|
+
subject.to_hash.must_equal(expected_result)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe 'load from buffer' do
|
33
|
+
let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
|
34
|
+
|
35
|
+
it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
|
36
|
+
subject.to_hash.must_equal(expected_result)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe SimpleXlsxReader::Document do
|
42
|
+
describe 'load from file path' do
|
43
|
+
let(:subject) { SimpleXlsxReader::Document.new(file_path: sesame_street_blog_file_path) }
|
44
|
+
|
45
|
+
it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
|
46
|
+
subject.to_hash.must_equal(expected_result)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'load from buffer' do
|
51
|
+
let(:subject) { SimpleXlsxReader::Document.new(string_or_io: sesame_street_blog_io) }
|
52
|
+
|
53
|
+
it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
|
54
|
+
subject.to_hash.must_equal(expected_result)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe 'load from file path (legacy API)' do
|
59
|
+
let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file_path) }
|
60
|
+
|
61
|
+
it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
|
62
|
+
subject.to_hash.must_equal(expected_result)
|
63
|
+
end
|
24
64
|
end
|
25
65
|
end
|
26
66
|
|
@@ -63,10 +103,33 @@ describe SimpleXlsxReader do
|
|
63
103
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
64
104
|
end
|
65
105
|
|
106
|
+
it 'reads less-than-zero complex number types styled as times' do
|
107
|
+
described_class.cast('6.25E-2', 'n', :time).
|
108
|
+
must_equal Time.parse('1899-12-30 01:30:00 UTC')
|
109
|
+
end
|
110
|
+
|
66
111
|
it 'reads number types styled as date_times' do
|
67
112
|
described_class.cast('41505.77083', 'n', :date_time).
|
68
113
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
69
114
|
end
|
115
|
+
|
116
|
+
it 'raises when date-styled values are not numerical' do
|
117
|
+
lambda { described_class.cast('14 is not a valid date', nil, :date) }.
|
118
|
+
must_raise(ArgumentError)
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "with the url option" do
|
122
|
+
let(:url) { "http://www.example.com/hyperlink" }
|
123
|
+
it 'creates a hyperlink with a string type' do
|
124
|
+
described_class.cast("A link", 'str', :string, url: url).
|
125
|
+
must_equal SXR::Hyperlink.new(url, "A link")
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'creates a hyperlink with a shared string type' do
|
129
|
+
described_class.cast("2", 's', nil, shared_strings: ['a','b','c'], url: url).
|
130
|
+
must_equal SXR::Hyperlink.new(url, 'c')
|
131
|
+
end
|
132
|
+
end
|
70
133
|
end
|
71
134
|
|
72
135
|
describe '#shared_strings' do
|
@@ -102,6 +165,13 @@ describe SimpleXlsxReader do
|
|
102
165
|
|
103
166
|
it 'reads custom formatted styles (numFmtId >= 164)' do
|
104
167
|
mapper.style_types[1].must_equal :date_time
|
168
|
+
mapper.custom_style_types[164].must_equal :date_time
|
169
|
+
end
|
170
|
+
|
171
|
+
# something I've seen in the wild; don't think it's correct, but let's be flexible.
|
172
|
+
it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
|
173
|
+
mapper.style_types[2].must_equal :date_time
|
174
|
+
mapper.custom_style_types[59].must_equal :date_time
|
105
175
|
end
|
106
176
|
end
|
107
177
|
|
@@ -246,16 +316,55 @@ describe SimpleXlsxReader do
|
|
246
316
|
it 'raises if configuration.catch_cell_load_errors' do
|
247
317
|
SimpleXlsxReader.configuration.catch_cell_load_errors = false
|
248
318
|
|
249
|
-
lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
|
319
|
+
lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first, nil) }.
|
250
320
|
must_raise(SimpleXlsxReader::CellLoadError)
|
251
321
|
end
|
252
322
|
|
253
323
|
it 'records a load error if not configuration.catch_cell_load_errors' do
|
254
324
|
SimpleXlsxReader.configuration.catch_cell_load_errors = true
|
255
325
|
|
256
|
-
sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
|
257
|
-
sheet.load_errors[[0,0]].must_include 'invalid value for
|
326
|
+
sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil)
|
327
|
+
sheet.load_errors[[0,0]].must_include 'invalid value for Float'
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
describe "missing numFmtId attributes" do
|
332
|
+
|
333
|
+
let(:xml) do
|
334
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
335
|
+
xml.sheets = [Nokogiri::XML(
|
336
|
+
<<-XML
|
337
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
338
|
+
<dimension ref="A1:A1" />
|
339
|
+
<sheetData>
|
340
|
+
<row>
|
341
|
+
<c r='A1' s='s'>
|
342
|
+
<v>some content</v>
|
343
|
+
</c>
|
344
|
+
</row>
|
345
|
+
</sheetData>
|
346
|
+
</worksheet>
|
347
|
+
XML
|
348
|
+
).remove_namespaces!]
|
349
|
+
|
350
|
+
xml.styles = Nokogiri::XML(
|
351
|
+
<<-XML
|
352
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
353
|
+
|
354
|
+
</styleSheet>
|
355
|
+
XML
|
356
|
+
).remove_namespaces!
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
before do
|
361
|
+
@row = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows[0]
|
258
362
|
end
|
363
|
+
|
364
|
+
it 'continues even when cells are missing numFmtId attributes ' do
|
365
|
+
@row[0].must_equal 'some content'
|
366
|
+
end
|
367
|
+
|
259
368
|
end
|
260
369
|
|
261
370
|
describe 'parsing types' do
|
@@ -284,8 +393,21 @@ describe SimpleXlsxReader do
|
|
284
393
|
<c r='G1' t='inlineStr' s='0'>
|
285
394
|
<is><t>Cell G1</t></is>
|
286
395
|
</c>
|
396
|
+
|
397
|
+
<c r='H1' s='0'>
|
398
|
+
<f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
|
399
|
+
<v>HYPERLINK function</v>
|
400
|
+
</c>
|
401
|
+
|
402
|
+
<c r='I1' s='0'>
|
403
|
+
<v>GUI-made hyperlink</v>
|
404
|
+
</c>
|
287
405
|
</row>
|
288
406
|
</sheetData>
|
407
|
+
|
408
|
+
<hyperlinks>
|
409
|
+
<hyperlink ref="I1" id="rId1"/>
|
410
|
+
</hyperlinks>
|
289
411
|
</worksheet>
|
290
412
|
XML
|
291
413
|
).remove_namespaces!]
|
@@ -303,11 +425,28 @@ describe SimpleXlsxReader do
|
|
303
425
|
</styleSheet>
|
304
426
|
XML
|
305
427
|
).remove_namespaces!
|
428
|
+
|
429
|
+
# Although not a "type" or "style" according to xlsx spec,
|
430
|
+
# it sure could/should be, so let's test it with the rest of our
|
431
|
+
# typecasting code.
|
432
|
+
xml.sheet_rels = [Nokogiri::XML(
|
433
|
+
<<-XML
|
434
|
+
<Relationships>
|
435
|
+
<Relationship
|
436
|
+
Id="rId1"
|
437
|
+
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
|
438
|
+
Target="http://www.example.com/hyperlink-gui"
|
439
|
+
TargetMode="External"
|
440
|
+
/>
|
441
|
+
</Relationships>
|
442
|
+
XML
|
443
|
+
).remove_namespaces!]
|
444
|
+
|
306
445
|
end
|
307
446
|
end
|
308
447
|
|
309
448
|
before do
|
310
|
-
@row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
|
449
|
+
@row = described_class.new(xml).parse_sheet('test', xml.sheets.first, xml.sheet_rels.first).rows[0]
|
311
450
|
end
|
312
451
|
|
313
452
|
it "reads 'Generic' cells as strings" do
|
@@ -341,6 +480,18 @@ describe SimpleXlsxReader do
|
|
341
480
|
it "reads strings formatted as inlineStr" do
|
342
481
|
@row[6].must_equal 'Cell G1'
|
343
482
|
end
|
483
|
+
|
484
|
+
it "reads hyperlinks created via HYPERLINK()" do
|
485
|
+
@row[7].must_equal(
|
486
|
+
SXR::Hyperlink.new(
|
487
|
+
"http://www.example.com/hyperlink-function", "HYPERLINK function"))
|
488
|
+
end
|
489
|
+
|
490
|
+
it "reads hyperlinks created via the GUI" do
|
491
|
+
@row[8].must_equal(
|
492
|
+
SXR::Hyperlink.new(
|
493
|
+
"http://www.example.com/hyperlink-gui", "GUI-made hyperlink"))
|
494
|
+
end
|
344
495
|
end
|
345
496
|
|
346
497
|
describe 'parsing documents with blank rows' do
|
@@ -389,7 +540,7 @@ describe SimpleXlsxReader do
|
|
389
540
|
end
|
390
541
|
|
391
542
|
before do
|
392
|
-
@rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
|
543
|
+
@rows = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows
|
393
544
|
end
|
394
545
|
|
395
546
|
it "reads row data despite gaps in row numbering" do
|
data/test/styles.xml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
2
2
|
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
|
3
|
-
<numFmts count="
|
3
|
+
<numFmts count="2">
|
4
|
+
<numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
|
4
5
|
<numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
|
5
6
|
</numFmts>
|
6
7
|
<fonts count="3" x14ac:knownFonts="1">
|
@@ -50,9 +51,10 @@
|
|
50
51
|
<xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
51
52
|
<xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
52
53
|
</cellStyleXfs>
|
53
|
-
<cellXfs count="
|
54
|
+
<cellXfs count="4">
|
54
55
|
<xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
|
55
56
|
<xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
|
+
<xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
56
58
|
<xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
59
|
</cellXfs>
|
58
60
|
<cellStyles count="3">
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '5.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: pry
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -68,12 +82,13 @@ dependencies:
|
|
68
82
|
version: '0'
|
69
83
|
description: Read xlsx data the Ruby way
|
70
84
|
email:
|
71
|
-
- woody@
|
85
|
+
- woody.peterson@gmail.com
|
72
86
|
executables: []
|
73
87
|
extensions: []
|
74
88
|
extra_rdoc_files: []
|
75
89
|
files:
|
76
90
|
- ".gitignore"
|
91
|
+
- ".travis.yml"
|
77
92
|
- CHANGELOG.md
|
78
93
|
- Gemfile
|
79
94
|
- LICENSE.txt
|
@@ -86,6 +101,10 @@ files:
|
|
86
101
|
- test/date1904_test.rb
|
87
102
|
- test/datetime_test.rb
|
88
103
|
- test/datetimes.xlsx
|
104
|
+
- test/gdocs_sheet.xlsx
|
105
|
+
- test/gdocs_sheet_test.rb
|
106
|
+
- test/lower_case_sharedstrings.xlsx
|
107
|
+
- test/lower_case_sharedstrings_test.rb
|
89
108
|
- test/performance_test.rb
|
90
109
|
- test/sesame_street_blog.xlsx
|
91
110
|
- test/shared_strings.xml
|
@@ -93,7 +112,8 @@ files:
|
|
93
112
|
- test/styles.xml
|
94
113
|
- test/test_helper.rb
|
95
114
|
homepage: ''
|
96
|
-
licenses:
|
115
|
+
licenses:
|
116
|
+
- MIT
|
97
117
|
metadata: {}
|
98
118
|
post_install_message:
|
99
119
|
rdoc_options: []
|
@@ -110,8 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
130
|
- !ruby/object:Gem::Version
|
111
131
|
version: '0'
|
112
132
|
requirements: []
|
113
|
-
|
114
|
-
rubygems_version: 2.2.0
|
133
|
+
rubygems_version: 3.1.6
|
115
134
|
signing_key:
|
116
135
|
specification_version: 4
|
117
136
|
summary: Read xlsx data the Ruby way
|
@@ -120,6 +139,10 @@ test_files:
|
|
120
139
|
- test/date1904_test.rb
|
121
140
|
- test/datetime_test.rb
|
122
141
|
- test/datetimes.xlsx
|
142
|
+
- test/gdocs_sheet.xlsx
|
143
|
+
- test/gdocs_sheet_test.rb
|
144
|
+
- test/lower_case_sharedstrings.xlsx
|
145
|
+
- test/lower_case_sharedstrings_test.rb
|
123
146
|
- test/performance_test.rb
|
124
147
|
- test/sesame_street_blog.xlsx
|
125
148
|
- test/shared_strings.xml
|