simple_xlsx_reader 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/simple_xlsx_reader.rb +34 -20
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/date1904_test.rb +1 -1
- data/test/datetime_test.rb +3 -2
- data/test/datetimes.xlsx +0 -0
- data/test/lower_case_sharedstrings.xlsx +0 -0
- data/test/lower_case_sharedstrings_test.rb +15 -0
- data/test/performance_test.rb +1 -1
- data/test/simple_xlsx_reader_test.rb +47 -1
- data/test/styles.xml +4 -2
- data/test/test_helper.rb +1 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3b2fbcc0148d773ed19a266e64877600863730c
|
4
|
+
data.tar.gz: 61be9bbca611e49dc7ca725dd33d1560eec98018
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 041cd7eb5b2ebeeb310da52c0e173ff7c04ac95b55ae1c35cc510280c7d10e9ea6af703d10f2bed1f5f5678c77465fdd5807de2d8b3e4ccf7f1a01475178f487
|
7
|
+
data.tar.gz: 8587c21a003e2bf4af53b2c67348102d85713e997bc15e248f2b806d4d7388b078bb7a5315c503931a0f287e1a7771d990913878d916d010c36040b6777b98ab
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
### 1.0.2
|
2
|
+
|
3
|
+
* Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
|
4
|
+
* Better support for non-excel-generated xlsx files [bwlang]
|
5
|
+
* You don't always have a numFmtId column, and that's OK
|
6
|
+
* Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
|
7
|
+
* Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
|
8
|
+
|
1
9
|
### 1.0.1
|
2
10
|
|
3
11
|
* Add support for the 1904 date system [zilverline]
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -79,8 +79,9 @@ module SimpleXlsxReader
|
|
79
79
|
|
80
80
|
# optional feature used by excel, but not often used by xlsx
|
81
81
|
# generation libraries
|
82
|
-
|
83
|
-
|
82
|
+
ss_file = (zip.to_a.map(&:name) & ['xl/sharedStrings.xml','xl/sharedstrings.xml'])[0]
|
83
|
+
if ss_file
|
84
|
+
xml.shared_strings = Nokogiri::XML(zip.read(ss_file)).remove_namespaces!
|
84
85
|
end
|
85
86
|
|
86
87
|
xml.sheets = []
|
@@ -218,11 +219,13 @@ module SimpleXlsxReader
|
|
218
219
|
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
219
220
|
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
220
221
|
def column_letter_to_number(column_letter)
|
221
|
-
pow = -1
|
222
|
-
|
223
|
-
|
224
|
-
|
222
|
+
pow = column_letter.length - 1
|
223
|
+
result = 0
|
224
|
+
column_letter.each_byte do |b|
|
225
|
+
result += 26**pow * (b - 64)
|
226
|
+
pow -= 1
|
225
227
|
end
|
228
|
+
result
|
226
229
|
end
|
227
230
|
|
228
231
|
# Excel doesn't record types for some cells, only its display style, so
|
@@ -241,21 +244,32 @@ module SimpleXlsxReader
|
|
241
244
|
# type.
|
242
245
|
def style_types
|
243
246
|
@style_types ||=
|
244
|
-
|
245
|
-
|
247
|
+
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
248
|
+
style_type_by_num_fmt_id(num_fmt_id(xstyle))}
|
249
|
+
end
|
250
|
+
|
251
|
+
#returns the numFmtId value if it's available
|
252
|
+
def num_fmt_id(xstyle)
|
253
|
+
if xstyle.attributes['numFmtId']
|
254
|
+
xstyle.attributes['numFmtId'].value
|
255
|
+
else
|
256
|
+
nil
|
257
|
+
end
|
246
258
|
end
|
247
259
|
|
248
260
|
# Finds the type we think a style is; For example, fmtId 14 is a date
|
249
|
-
# style, so this would return :date
|
261
|
+
# style, so this would return :date.
|
262
|
+
#
|
263
|
+
# Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
|
264
|
+
# but in practice can sometimes be simply out of the usual "Any Language"
|
265
|
+
# id range that goes up to 49. For example, I have seen a numFmtId of
|
266
|
+
# 59 specified as a date. In Thai, 59 is a number format, so this seems
|
267
|
+
# like a bad idea, but we try to be flexible and just go with it.
|
250
268
|
def style_type_by_num_fmt_id(id)
|
251
269
|
return nil if id.nil?
|
252
270
|
|
253
271
|
id = id.to_i
|
254
|
-
|
255
|
-
custom_style_types[id]
|
256
|
-
else # we should know this one
|
257
|
-
NumFmtMap[id]
|
258
|
-
end
|
272
|
+
NumFmtMap[id] || custom_style_types[id]
|
259
273
|
end
|
260
274
|
|
261
275
|
# Map of (numFmtId >= 164) (custom styles) to our best guess at the type
|
@@ -347,15 +361,15 @@ module SimpleXlsxReader
|
|
347
361
|
# the trickiest. note that all these formats can vary on
|
348
362
|
# whether they actually contain a date, time, or datetime.
|
349
363
|
when :date, :time, :date_time
|
350
|
-
|
364
|
+
value = value.to_f
|
365
|
+
days_since_date_system_start = value.to_i
|
366
|
+
fraction_of_24 = value - days_since_date_system_start
|
351
367
|
|
352
368
|
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
353
|
-
date = options.fetch(:base_date, DATE_SYSTEM_1900) +
|
354
|
-
|
355
|
-
if fraction_of_24 # there is a time associated
|
356
|
-
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
357
|
-
seconds = (fraction_of_24 * 86400).round
|
369
|
+
date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
|
358
370
|
|
371
|
+
if fraction_of_24 > 0 # there is a time associated
|
372
|
+
seconds = (fraction_of_24 * 86400).round
|
359
373
|
return Time.utc(date.year, date.month, date.day) + seconds
|
360
374
|
else
|
361
375
|
return date
|
data/test/date1904_test.rb
CHANGED
data/test/datetime_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
|
3
3
|
describe SimpleXlsxReader do
|
4
4
|
let(:datetimes_file) { File.join(File.dirname(__FILE__),
|
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
|
|
11
11
|
"Datetimes" =>
|
12
12
|
[[Time.parse("2013-08-19 18:29:59 UTC")],
|
13
13
|
[Time.parse("2013-08-19 18:30:00 UTC")],
|
14
|
-
[Time.parse("2013-08-19 18:30:01 UTC")]
|
14
|
+
[Time.parse("2013-08-19 18:30:01 UTC")],
|
15
|
+
[Time.parse("1899-12-30 00:30:00 UTC")]]
|
15
16
|
})
|
16
17
|
end
|
17
18
|
|
data/test/datetimes.xlsx
CHANGED
Binary file
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
describe SimpleXlsxReader do
|
4
|
+
let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
|
5
|
+
'lower_case_sharedstrings.xlsx') }
|
6
|
+
|
7
|
+
let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
|
8
|
+
|
9
|
+
|
10
|
+
describe '#to_hash' do
|
11
|
+
it 'should have the word Well in the first row' do
|
12
|
+
subject.sheets.first.rows[0].must_include('Well')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/performance_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
require 'time'
|
3
3
|
|
4
4
|
describe SimpleXlsxReader do
|
@@ -102,6 +102,13 @@ describe SimpleXlsxReader do
|
|
102
102
|
|
103
103
|
it 'reads custom formatted styles (numFmtId >= 164)' do
|
104
104
|
mapper.style_types[1].must_equal :date_time
|
105
|
+
mapper.custom_style_types[164].must_equal :date_time
|
106
|
+
end
|
107
|
+
|
108
|
+
# something I've seen in the wild; don't think it's correct, but let's be flexible.
|
109
|
+
it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
|
110
|
+
mapper.style_types[2].must_equal :date_time
|
111
|
+
mapper.custom_style_types[59].must_equal :date_time
|
105
112
|
end
|
106
113
|
end
|
107
114
|
|
@@ -258,6 +265,45 @@ describe SimpleXlsxReader do
|
|
258
265
|
end
|
259
266
|
end
|
260
267
|
|
268
|
+
describe "missing numFmtId attributes" do
|
269
|
+
|
270
|
+
let(:xml) do
|
271
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
272
|
+
xml.sheets = [Nokogiri::XML(
|
273
|
+
<<-XML
|
274
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
275
|
+
<dimension ref="A1:A1" />
|
276
|
+
<sheetData>
|
277
|
+
<row>
|
278
|
+
<c r='A1' s='s'>
|
279
|
+
<v>some content</v>
|
280
|
+
</c>
|
281
|
+
</row>
|
282
|
+
</sheetData>
|
283
|
+
</worksheet>
|
284
|
+
XML
|
285
|
+
).remove_namespaces!]
|
286
|
+
|
287
|
+
xml.styles = Nokogiri::XML(
|
288
|
+
<<-XML
|
289
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
290
|
+
|
291
|
+
</styleSheet>
|
292
|
+
XML
|
293
|
+
).remove_namespaces!
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
before do
|
298
|
+
@row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
|
299
|
+
end
|
300
|
+
|
301
|
+
it 'continues even when cells are missing numFmtId attributes ' do
|
302
|
+
@row[0].must_equal 'some content'
|
303
|
+
end
|
304
|
+
|
305
|
+
end
|
306
|
+
|
261
307
|
describe 'parsing types' do
|
262
308
|
let(:xml) do
|
263
309
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
data/test/styles.xml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
2
2
|
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
|
3
|
-
<numFmts count="
|
3
|
+
<numFmts count="2">
|
4
|
+
<numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
|
4
5
|
<numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
|
5
6
|
</numFmts>
|
6
7
|
<fonts count="3" x14ac:knownFonts="1">
|
@@ -50,9 +51,10 @@
|
|
50
51
|
<xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
51
52
|
<xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
52
53
|
</cellStyleXfs>
|
53
|
-
<cellXfs count="
|
54
|
+
<cellXfs count="4">
|
54
55
|
<xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
|
55
56
|
<xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
|
+
<xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
56
58
|
<xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
59
|
</cellXfs>
|
58
60
|
<cellStyles count="3">
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -86,6 +86,8 @@ files:
|
|
86
86
|
- test/date1904_test.rb
|
87
87
|
- test/datetime_test.rb
|
88
88
|
- test/datetimes.xlsx
|
89
|
+
- test/lower_case_sharedstrings.xlsx
|
90
|
+
- test/lower_case_sharedstrings_test.rb
|
89
91
|
- test/performance_test.rb
|
90
92
|
- test/sesame_street_blog.xlsx
|
91
93
|
- test/shared_strings.xml
|
@@ -111,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
113
|
version: '0'
|
112
114
|
requirements: []
|
113
115
|
rubyforge_project:
|
114
|
-
rubygems_version: 2.2.
|
116
|
+
rubygems_version: 2.2.2
|
115
117
|
signing_key:
|
116
118
|
specification_version: 4
|
117
119
|
summary: Read xlsx data the Ruby way
|
@@ -120,6 +122,8 @@ test_files:
|
|
120
122
|
- test/date1904_test.rb
|
121
123
|
- test/datetime_test.rb
|
122
124
|
- test/datetimes.xlsx
|
125
|
+
- test/lower_case_sharedstrings.xlsx
|
126
|
+
- test/lower_case_sharedstrings_test.rb
|
123
127
|
- test/performance_test.rb
|
124
128
|
- test/sesame_street_blog.xlsx
|
125
129
|
- test/shared_strings.xml
|