simple_xlsx_reader 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/simple_xlsx_reader.rb +34 -20
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/date1904_test.rb +1 -1
- data/test/datetime_test.rb +3 -2
- data/test/datetimes.xlsx +0 -0
- data/test/lower_case_sharedstrings.xlsx +0 -0
- data/test/lower_case_sharedstrings_test.rb +15 -0
- data/test/performance_test.rb +1 -1
- data/test/simple_xlsx_reader_test.rb +47 -1
- data/test/styles.xml +4 -2
- data/test/test_helper.rb +1 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3b2fbcc0148d773ed19a266e64877600863730c
|
4
|
+
data.tar.gz: 61be9bbca611e49dc7ca725dd33d1560eec98018
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 041cd7eb5b2ebeeb310da52c0e173ff7c04ac95b55ae1c35cc510280c7d10e9ea6af703d10f2bed1f5f5678c77465fdd5807de2d8b3e4ccf7f1a01475178f487
|
7
|
+
data.tar.gz: 8587c21a003e2bf4af53b2c67348102d85713e997bc15e248f2b806d4d7388b078bb7a5315c503931a0f287e1a7771d990913878d916d010c36040b6777b98ab
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
### 1.0.2
|
2
|
+
|
3
|
+
* Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
|
4
|
+
* Better support for non-excel-generated xlsx files [bwlang]
|
5
|
+
* You don't always have a numFmtId column, and that's OK
|
6
|
+
* Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
|
7
|
+
* Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
|
8
|
+
|
1
9
|
### 1.0.1
|
2
10
|
|
3
11
|
* Add support for the 1904 date system [zilverline]
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -79,8 +79,9 @@ module SimpleXlsxReader
|
|
79
79
|
|
80
80
|
# optional feature used by excel, but not often used by xlsx
|
81
81
|
# generation libraries
|
82
|
-
|
83
|
-
|
82
|
+
ss_file = (zip.to_a.map(&:name) & ['xl/sharedStrings.xml','xl/sharedstrings.xml'])[0]
|
83
|
+
if ss_file
|
84
|
+
xml.shared_strings = Nokogiri::XML(zip.read(ss_file)).remove_namespaces!
|
84
85
|
end
|
85
86
|
|
86
87
|
xml.sheets = []
|
@@ -218,11 +219,13 @@ module SimpleXlsxReader
|
|
218
219
|
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
219
220
|
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
220
221
|
def column_letter_to_number(column_letter)
|
221
|
-
pow = -1
|
222
|
-
|
223
|
-
|
224
|
-
|
222
|
+
pow = column_letter.length - 1
|
223
|
+
result = 0
|
224
|
+
column_letter.each_byte do |b|
|
225
|
+
result += 26**pow * (b - 64)
|
226
|
+
pow -= 1
|
225
227
|
end
|
228
|
+
result
|
226
229
|
end
|
227
230
|
|
228
231
|
# Excel doesn't record types for some cells, only its display style, so
|
@@ -241,21 +244,32 @@ module SimpleXlsxReader
|
|
241
244
|
# type.
|
242
245
|
def style_types
|
243
246
|
@style_types ||=
|
244
|
-
|
245
|
-
|
247
|
+
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
248
|
+
style_type_by_num_fmt_id(num_fmt_id(xstyle))}
|
249
|
+
end
|
250
|
+
|
251
|
+
#returns the numFmtId value if it's available
|
252
|
+
def num_fmt_id(xstyle)
|
253
|
+
if xstyle.attributes['numFmtId']
|
254
|
+
xstyle.attributes['numFmtId'].value
|
255
|
+
else
|
256
|
+
nil
|
257
|
+
end
|
246
258
|
end
|
247
259
|
|
248
260
|
# Finds the type we think a style is; For example, fmtId 14 is a date
|
249
|
-
# style, so this would return :date
|
261
|
+
# style, so this would return :date.
|
262
|
+
#
|
263
|
+
# Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
|
264
|
+
# but in practice can sometimes be simply out of the usual "Any Language"
|
265
|
+
# id range that goes up to 49. For example, I have seen a numFmtId of
|
266
|
+
# 59 specified as a date. In Thai, 59 is a number format, so this seems
|
267
|
+
# like a bad idea, but we try to be flexible and just go with it.
|
250
268
|
def style_type_by_num_fmt_id(id)
|
251
269
|
return nil if id.nil?
|
252
270
|
|
253
271
|
id = id.to_i
|
254
|
-
|
255
|
-
custom_style_types[id]
|
256
|
-
else # we should know this one
|
257
|
-
NumFmtMap[id]
|
258
|
-
end
|
272
|
+
NumFmtMap[id] || custom_style_types[id]
|
259
273
|
end
|
260
274
|
|
261
275
|
# Map of (numFmtId >= 164) (custom styles) to our best guess at the type
|
@@ -347,15 +361,15 @@ module SimpleXlsxReader
|
|
347
361
|
# the trickiest. note that all these formats can vary on
|
348
362
|
# whether they actually contain a date, time, or datetime.
|
349
363
|
when :date, :time, :date_time
|
350
|
-
|
364
|
+
value = value.to_f
|
365
|
+
days_since_date_system_start = value.to_i
|
366
|
+
fraction_of_24 = value - days_since_date_system_start
|
351
367
|
|
352
368
|
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
353
|
-
date = options.fetch(:base_date, DATE_SYSTEM_1900) +
|
354
|
-
|
355
|
-
if fraction_of_24 # there is a time associated
|
356
|
-
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
357
|
-
seconds = (fraction_of_24 * 86400).round
|
369
|
+
date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
|
358
370
|
|
371
|
+
if fraction_of_24 > 0 # there is a time associated
|
372
|
+
seconds = (fraction_of_24 * 86400).round
|
359
373
|
return Time.utc(date.year, date.month, date.day) + seconds
|
360
374
|
else
|
361
375
|
return date
|
data/test/date1904_test.rb
CHANGED
data/test/datetime_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
|
3
3
|
describe SimpleXlsxReader do
|
4
4
|
let(:datetimes_file) { File.join(File.dirname(__FILE__),
|
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
|
|
11
11
|
"Datetimes" =>
|
12
12
|
[[Time.parse("2013-08-19 18:29:59 UTC")],
|
13
13
|
[Time.parse("2013-08-19 18:30:00 UTC")],
|
14
|
-
[Time.parse("2013-08-19 18:30:01 UTC")]
|
14
|
+
[Time.parse("2013-08-19 18:30:01 UTC")],
|
15
|
+
[Time.parse("1899-12-30 00:30:00 UTC")]]
|
15
16
|
})
|
16
17
|
end
|
17
18
|
|
data/test/datetimes.xlsx
CHANGED
Binary file
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
|
3
|
+
describe SimpleXlsxReader do
|
4
|
+
let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
|
5
|
+
'lower_case_sharedstrings.xlsx') }
|
6
|
+
|
7
|
+
let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
|
8
|
+
|
9
|
+
|
10
|
+
describe '#to_hash' do
|
11
|
+
it 'should have the word Well in the first row' do
|
12
|
+
subject.sheets.first.rows[0].must_include('Well')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/performance_test.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'test_helper'
|
2
2
|
require 'time'
|
3
3
|
|
4
4
|
describe SimpleXlsxReader do
|
@@ -102,6 +102,13 @@ describe SimpleXlsxReader do
|
|
102
102
|
|
103
103
|
it 'reads custom formatted styles (numFmtId >= 164)' do
|
104
104
|
mapper.style_types[1].must_equal :date_time
|
105
|
+
mapper.custom_style_types[164].must_equal :date_time
|
106
|
+
end
|
107
|
+
|
108
|
+
# something I've seen in the wild; don't think it's correct, but let's be flexible.
|
109
|
+
it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
|
110
|
+
mapper.style_types[2].must_equal :date_time
|
111
|
+
mapper.custom_style_types[59].must_equal :date_time
|
105
112
|
end
|
106
113
|
end
|
107
114
|
|
@@ -258,6 +265,45 @@ describe SimpleXlsxReader do
|
|
258
265
|
end
|
259
266
|
end
|
260
267
|
|
268
|
+
describe "missing numFmtId attributes" do
|
269
|
+
|
270
|
+
let(:xml) do
|
271
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
272
|
+
xml.sheets = [Nokogiri::XML(
|
273
|
+
<<-XML
|
274
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
275
|
+
<dimension ref="A1:A1" />
|
276
|
+
<sheetData>
|
277
|
+
<row>
|
278
|
+
<c r='A1' s='s'>
|
279
|
+
<v>some content</v>
|
280
|
+
</c>
|
281
|
+
</row>
|
282
|
+
</sheetData>
|
283
|
+
</worksheet>
|
284
|
+
XML
|
285
|
+
).remove_namespaces!]
|
286
|
+
|
287
|
+
xml.styles = Nokogiri::XML(
|
288
|
+
<<-XML
|
289
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
290
|
+
|
291
|
+
</styleSheet>
|
292
|
+
XML
|
293
|
+
).remove_namespaces!
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
before do
|
298
|
+
@row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
|
299
|
+
end
|
300
|
+
|
301
|
+
it 'continues even when cells are missing numFmtId attributes ' do
|
302
|
+
@row[0].must_equal 'some content'
|
303
|
+
end
|
304
|
+
|
305
|
+
end
|
306
|
+
|
261
307
|
describe 'parsing types' do
|
262
308
|
let(:xml) do
|
263
309
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
data/test/styles.xml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
2
2
|
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
|
3
|
-
<numFmts count="
|
3
|
+
<numFmts count="2">
|
4
|
+
<numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
|
4
5
|
<numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
|
5
6
|
</numFmts>
|
6
7
|
<fonts count="3" x14ac:knownFonts="1">
|
@@ -50,9 +51,10 @@
|
|
50
51
|
<xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
51
52
|
<xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
52
53
|
</cellStyleXfs>
|
53
|
-
<cellXfs count="
|
54
|
+
<cellXfs count="4">
|
54
55
|
<xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
|
55
56
|
<xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
|
+
<xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
56
58
|
<xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
59
|
</cellXfs>
|
58
60
|
<cellStyles count="3">
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -86,6 +86,8 @@ files:
|
|
86
86
|
- test/date1904_test.rb
|
87
87
|
- test/datetime_test.rb
|
88
88
|
- test/datetimes.xlsx
|
89
|
+
- test/lower_case_sharedstrings.xlsx
|
90
|
+
- test/lower_case_sharedstrings_test.rb
|
89
91
|
- test/performance_test.rb
|
90
92
|
- test/sesame_street_blog.xlsx
|
91
93
|
- test/shared_strings.xml
|
@@ -111,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
113
|
version: '0'
|
112
114
|
requirements: []
|
113
115
|
rubyforge_project:
|
114
|
-
rubygems_version: 2.2.
|
116
|
+
rubygems_version: 2.2.2
|
115
117
|
signing_key:
|
116
118
|
specification_version: 4
|
117
119
|
summary: Read xlsx data the Ruby way
|
@@ -120,6 +122,8 @@ test_files:
|
|
120
122
|
- test/date1904_test.rb
|
121
123
|
- test/datetime_test.rb
|
122
124
|
- test/datetimes.xlsx
|
125
|
+
- test/lower_case_sharedstrings.xlsx
|
126
|
+
- test/lower_case_sharedstrings_test.rb
|
123
127
|
- test/performance_test.rb
|
124
128
|
- test/sesame_street_blog.xlsx
|
125
129
|
- test/shared_strings.xml
|