simple_xlsx_reader 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/simple_xlsx_reader.rb +23 -19
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/sesame_street_blog.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +71 -3
- data/test/styles.xml +65 -0
- metadata +9 -6
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -122,20 +122,19 @@ module SimpleXlsxReader
|
|
122
122
|
colname ? colname.next! : colname = 'A'
|
123
123
|
colnum += 1
|
124
124
|
|
125
|
-
xcell = xrow.
|
126
|
-
%(xmlns:c[@r="#{colname + (rownum + 1).to_s}"]))
|
125
|
+
xcell = xrow.at_xpath(
|
126
|
+
%(xmlns:c[@r="#{colname + (rownum + 1).to_s}"]))
|
127
127
|
|
128
128
|
# empty 'General' columns might not be in the xml
|
129
129
|
next cells << nil if xcell.nil?
|
130
130
|
|
131
|
-
type
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
style_types[xcell.attributes['s'].value.to_i]
|
131
|
+
type = xcell.attributes['t'] &&
|
132
|
+
xcell.attributes['t'].value
|
133
|
+
style = xcell.attributes['s'] &&
|
134
|
+
style_types[xcell.attributes['s'].value.to_i]
|
136
135
|
|
137
136
|
cells << begin
|
138
|
-
self.class.cast(xcell.text.strip, type, :shared_strings => shared_strings)
|
137
|
+
self.class.cast(xcell.at_xpath('xmlns:v').text.strip, type, style, :shared_strings => shared_strings)
|
139
138
|
rescue => e
|
140
139
|
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
141
140
|
error = CellLoadError.new(
|
@@ -166,14 +165,13 @@ module SimpleXlsxReader
|
|
166
165
|
# the most robust strategy, but it likely fits 99% of use cases
|
167
166
|
# considering it's not a problem with actual excel docs.
|
168
167
|
def last_column(xsheet)
|
169
|
-
dimension = xsheet.
|
168
|
+
dimension = xsheet.at_xpath('/xmlns:worksheet/xmlns:dimension')
|
170
169
|
if dimension
|
171
|
-
dimension.attributes['ref'].value.
|
172
|
-
|
170
|
+
col = dimension.attributes['ref'].value.match(/:([A-Z]*)[1-9]*/)
|
171
|
+
col ? col.captures.first : 'A'
|
173
172
|
else
|
174
|
-
xsheet.at_xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c[last()]")
|
175
|
-
|
176
|
-
match(/([A-Z]*)[1-9]*/).captures.first
|
173
|
+
last = xsheet.at_xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c[last()]")
|
174
|
+
last ? last.attributes['r'].value.match(/([A-Z]*)[1-9]*/).captures.first : 'A'
|
177
175
|
end
|
178
176
|
end
|
179
177
|
|
@@ -204,15 +202,15 @@ module SimpleXlsxReader
|
|
204
202
|
return nil if id.nil?
|
205
203
|
|
206
204
|
id = id.to_i
|
207
|
-
if id
|
205
|
+
if id >= 164 # custom style, arg!
|
208
206
|
custom_style_types[id]
|
209
207
|
else # we should know this one
|
210
208
|
NumFmtMap[id]
|
211
209
|
end
|
212
210
|
end
|
213
211
|
|
214
|
-
# Map of (numFmtId
|
215
|
-
# ex. {
|
212
|
+
# Map of (numFmtId >= 164) (custom styles) to our best guess at the type
|
213
|
+
# ex. {164 => :date_time}
|
216
214
|
def custom_style_types
|
217
215
|
@custom_style_types ||=
|
218
216
|
xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
|
@@ -258,9 +256,15 @@ module SimpleXlsxReader
|
|
258
256
|
#
|
259
257
|
# options:
|
260
258
|
# - shared_strings: needed for 's' (shared string) type
|
261
|
-
def self.cast(value, type, options = {})
|
259
|
+
def self.cast(value, type, style, options = {})
|
262
260
|
return nil if value.nil? || value.empty?
|
263
261
|
|
262
|
+
# Sometimes the type is dictated by the style alone
|
263
|
+
if type.nil? ||
|
264
|
+
(type == 'n' && [:date, :time, :date_time].include?(style))
|
265
|
+
type = style
|
266
|
+
end
|
267
|
+
|
264
268
|
case type
|
265
269
|
|
266
270
|
##
|
@@ -371,7 +375,7 @@ module SimpleXlsxReader
|
|
371
375
|
if xml.shared_strings
|
372
376
|
xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
|
373
377
|
# a shared string can be a single value...
|
374
|
-
sst = xsst.
|
378
|
+
sst = xsst.at_xpath('xmlns:t/text()')
|
375
379
|
sst = sst.text if sst
|
376
380
|
# ... or a composite of seperately styled words/characters
|
377
381
|
sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
|
Binary file
|
@@ -17,7 +17,8 @@ describe SimpleXlsxReader do
|
|
17
17
|
"Posts"=>
|
18
18
|
[["Author Name", "Title", "Body", "Created At", "Comment Count"],
|
19
19
|
["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
|
20
|
-
["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2]
|
20
|
+
["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
|
21
|
+
["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0]]
|
21
22
|
})
|
22
23
|
end
|
23
24
|
end
|
@@ -27,13 +28,43 @@ describe SimpleXlsxReader do
|
|
27
28
|
|
28
29
|
describe '::cast' do
|
29
30
|
it 'reads type s as a shared string' do
|
30
|
-
described_class.cast('1', 's', :shared_strings => ['a', 'b', 'c']).
|
31
|
+
described_class.cast('1', 's', nil, :shared_strings => ['a', 'b', 'c']).
|
31
32
|
must_equal 'b'
|
32
33
|
end
|
33
34
|
|
34
35
|
it 'reads type inlineStr as a string' do
|
35
36
|
xml = Nokogiri::XML(%( <c t="inlineStr"><is><t>the value</t></is></c> ))
|
36
|
-
described_class.cast(xml.text, 'inlineStr').must_equal 'the value'
|
37
|
+
described_class.cast(xml.text, nil, 'inlineStr').must_equal 'the value'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'reads date styles' do
|
41
|
+
described_class.cast('41505', nil, :date).
|
42
|
+
must_equal Date.parse('2013-08-19')
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'reads time styles' do
|
46
|
+
described_class.cast('41505.77084', nil, :time).
|
47
|
+
must_equal Time.parse('2013-08-19 18:30 UTC')
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'reads date_time styles' do
|
51
|
+
described_class.cast('41505.77084', nil, :date_time).
|
52
|
+
must_equal Time.parse('2013-08-19 18:30 UTC')
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'reads number types styled as dates' do
|
56
|
+
described_class.cast('41505', 'n', :date).
|
57
|
+
must_equal Date.parse('2013-08-19')
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'reads number types styled as times' do
|
61
|
+
described_class.cast('41505.77084', 'n', :time).
|
62
|
+
must_equal Time.parse('2013-08-19 18:30 UTC')
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'reads number types styled as date_times' do
|
66
|
+
described_class.cast('41505.77084', 'n', :date_time).
|
67
|
+
must_equal Time.parse('2013-08-19 18:30 UTC')
|
37
68
|
end
|
38
69
|
end
|
39
70
|
|
@@ -56,6 +87,23 @@ describe SimpleXlsxReader do
|
|
56
87
|
end
|
57
88
|
end
|
58
89
|
|
90
|
+
describe '#style_types' do
|
91
|
+
let(:xml) do
|
92
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
93
|
+
xml.styles = Nokogiri::XML(File.read(
|
94
|
+
File.join(File.dirname(__FILE__), 'styles.xml') ))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
let(:mapper) do
|
99
|
+
SimpleXlsxReader::Document::Mapper.new(xml)
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'reads custom formatted styles (numFmtId >= 164)' do
|
103
|
+
mapper.style_types[1].must_equal :date_time
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
59
107
|
describe '#last_column' do
|
60
108
|
|
61
109
|
let(:generic_style) do
|
@@ -95,6 +143,17 @@ describe SimpleXlsxReader do
|
|
95
143
|
)
|
96
144
|
end
|
97
145
|
|
146
|
+
let(:empty_sheet) do
|
147
|
+
Nokogiri::XML(
|
148
|
+
<<-XML
|
149
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
150
|
+
<dimension ref="A1" />
|
151
|
+
<sheetData>
|
152
|
+
</sheetData>
|
153
|
+
</worksheet>
|
154
|
+
XML
|
155
|
+
)
|
156
|
+
end
|
98
157
|
let(:xml) do
|
99
158
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
100
159
|
xml.sheets = [sheet]
|
@@ -112,6 +171,15 @@ describe SimpleXlsxReader do
|
|
112
171
|
sheet.xpath('/xmlns:worksheet/xmlns:dimension').remove
|
113
172
|
subject.last_column(sheet).must_equal 'D'
|
114
173
|
end
|
174
|
+
|
175
|
+
it 'returns "A" if the dimension is just one cell' do
|
176
|
+
subject.last_column(empty_sheet).must_equal 'A'
|
177
|
+
end
|
178
|
+
|
179
|
+
it 'returns "A" if the sheet is just one cell, but /worksheet/dimension is missing' do
|
180
|
+
sheet.at_xpath('/xmlns:worksheet/xmlns:dimension').remove
|
181
|
+
subject.last_column(empty_sheet).must_equal 'A'
|
182
|
+
end
|
115
183
|
end
|
116
184
|
|
117
185
|
describe "parse errors" do
|
data/test/styles.xml
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
2
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
|
3
|
+
<numFmts count="1">
|
4
|
+
<numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
|
5
|
+
</numFmts>
|
6
|
+
<fonts count="3" x14ac:knownFonts="1">
|
7
|
+
<font>
|
8
|
+
<sz val="12"/>
|
9
|
+
<color theme="1"/>
|
10
|
+
<name val="Calibri"/>
|
11
|
+
<family val="2"/>
|
12
|
+
<scheme val="minor"/>
|
13
|
+
</font>
|
14
|
+
<font>
|
15
|
+
<u/>
|
16
|
+
<sz val="12"/>
|
17
|
+
<color theme="10"/>
|
18
|
+
<name val="Calibri"/>
|
19
|
+
<family val="2"/>
|
20
|
+
<scheme val="minor"/>
|
21
|
+
</font>
|
22
|
+
<font>
|
23
|
+
<u/>
|
24
|
+
<sz val="12"/>
|
25
|
+
<color theme="11"/>
|
26
|
+
<name val="Calibri"/>
|
27
|
+
<family val="2"/>
|
28
|
+
<scheme val="minor"/>
|
29
|
+
</font>
|
30
|
+
</fonts>
|
31
|
+
<fills count="2">
|
32
|
+
<fill>
|
33
|
+
<patternFill patternType="none"/>
|
34
|
+
</fill>
|
35
|
+
<fill>
|
36
|
+
<patternFill patternType="gray125"/>
|
37
|
+
</fill>
|
38
|
+
</fills>
|
39
|
+
<borders count="1">
|
40
|
+
<border>
|
41
|
+
<left/>
|
42
|
+
<right/>
|
43
|
+
<top/>
|
44
|
+
<bottom/>
|
45
|
+
<diagonal/>
|
46
|
+
</border>
|
47
|
+
</borders>
|
48
|
+
<cellStyleXfs count="3">
|
49
|
+
<xf numFmtId="0" fontId="0" fillId="0" borderId="0"/>
|
50
|
+
<xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
51
|
+
<xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
|
52
|
+
</cellStyleXfs>
|
53
|
+
<cellXfs count="3">
|
54
|
+
<xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
|
55
|
+
<xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
56
|
+
<xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
|
57
|
+
</cellXfs>
|
58
|
+
<cellStyles count="3">
|
59
|
+
<cellStyle name="Followed Hyperlink" xfId="2" builtinId="9" hidden="1"/>
|
60
|
+
<cellStyle name="Hyperlink" xfId="1" builtinId="8" hidden="1"/>
|
61
|
+
<cellStyle name="Normal" xfId="0" builtinId="0"/>
|
62
|
+
</cellStyles>
|
63
|
+
<dxfs count="0"/>
|
64
|
+
<tableStyles count="0" defaultTableStyle="TableStyleMedium9" defaultPivotStyle="PivotStyleMedium4"/>
|
65
|
+
</styleSheet>
|
metadata
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.3
|
5
4
|
prerelease:
|
5
|
+
version: 0.9.4
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Woody Peterson
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-05-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
+
type: :runtime
|
16
17
|
requirement: !ruby/object:Gem::Requirement
|
17
18
|
none: false
|
18
19
|
requirements:
|
19
20
|
- - ! '>='
|
20
21
|
- !ruby/object:Gem::Version
|
21
22
|
version: '0'
|
22
|
-
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
@@ -29,13 +29,13 @@ dependencies:
|
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: rubyzip
|
32
|
+
type: :runtime
|
32
33
|
requirement: !ruby/object:Gem::Requirement
|
33
34
|
none: false
|
34
35
|
requirements:
|
35
36
|
- - ! '>='
|
36
37
|
- !ruby/object:Gem::Version
|
37
38
|
version: '0'
|
38
|
-
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
@@ -45,13 +45,13 @@ dependencies:
|
|
45
45
|
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: minitest
|
48
|
+
type: :development
|
48
49
|
requirement: !ruby/object:Gem::Requirement
|
49
50
|
none: false
|
50
51
|
requirements:
|
51
52
|
- - ! '>='
|
52
53
|
- !ruby/object:Gem::Version
|
53
54
|
version: '0'
|
54
|
-
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
@@ -61,13 +61,13 @@ dependencies:
|
|
61
61
|
version: '0'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: pry
|
64
|
+
type: :development
|
64
65
|
requirement: !ruby/object:Gem::Requirement
|
65
66
|
none: false
|
66
67
|
requirements:
|
67
68
|
- - ! '>='
|
68
69
|
- !ruby/object:Gem::Version
|
69
70
|
version: '0'
|
70
|
-
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- test/sesame_street_blog.xlsx
|
95
95
|
- test/shared_strings.xml
|
96
96
|
- test/simple_xlsx_reader_test.rb
|
97
|
+
- test/styles.xml
|
97
98
|
- test/test_helper.rb
|
98
99
|
homepage: ''
|
99
100
|
licenses: []
|
@@ -123,4 +124,6 @@ test_files:
|
|
123
124
|
- test/sesame_street_blog.xlsx
|
124
125
|
- test/shared_strings.xml
|
125
126
|
- test/simple_xlsx_reader_test.rb
|
127
|
+
- test/styles.xml
|
126
128
|
- test/test_helper.rb
|
129
|
+
has_rdoc:
|