simple_xlsx_reader 3.0.1 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +3 -4
- data/lib/simple_xlsx_reader/loader/style_types_parser.rb +33 -32
- data/lib/simple_xlsx_reader/loader.rb +1 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/percentages_n_currencies.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +64 -1
- data/test/test_xlsx_builder.rb +1 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5e0de15ab326f027127322eac9d88d752ffd2d55797df23ed525a7eea1d9833
|
4
|
+
data.tar.gz: 415972aaf4f77e4bdb5e60b0095cf01f6d7e24a0cd28d493c8ad8d01fa50d66b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfea3adb62767bedfe6470377b54078066b1bf07e13c065160a31c260261f65a658c34c37dc850c01dd27ba7038a4261f1a6ea9c6ff42771e8113dbabc51897b
|
7
|
+
data.tar.gz: a4b996c3d15b2f54a61d8fc90366ba502d6bc108b1bfe5ed34b9d46f63fe7c52381f181421cc31eea46b7c73e6925d3aa3cfeb8e1b737021be93e4863ea9e703
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
### 4.0.1
|
2
|
+
|
3
|
+
* Fix nil error when handling some inline strings
|
4
|
+
|
5
|
+
Inline strings are almost exclusively used by non-Excel XLSX
|
6
|
+
implementations, but are valid, and sometimes have nil chunks.
|
7
|
+
|
8
|
+
Also, inline strings weren't preserving whitespace if Nokogiri is
|
9
|
+
parsing the string in chunks, as it does when encountering escaped
|
10
|
+
characters. Fixed.
|
11
|
+
|
12
|
+
### 4.0.0
|
13
|
+
|
14
|
+
* Fix percentage rounding errors. Previously we were dividing by 100, when we
|
15
|
+
actually don't need to, so percentage types were 100x too small. Fixes #21.
|
16
|
+
Major bump because workarounds might have been implemented for previous
|
17
|
+
incorrect behavior.
|
18
|
+
* Fix small oddity in one currency format where round numbers would be cast
|
19
|
+
to an integer instead of a float.
|
20
|
+
|
1
21
|
### 3.0.1
|
2
22
|
|
3
23
|
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
@@ -80,7 +80,7 @@ module SimpleXlsxReader
|
|
80
80
|
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
|
-
string
|
83
|
+
string, @type, @style,
|
84
84
|
url: @url || hyperlinks_by_cell&.[](@cell_name),
|
85
85
|
shared_strings: shared_strings,
|
86
86
|
base_date: base_date
|
@@ -99,11 +99,10 @@ module SimpleXlsxReader
|
|
99
99
|
else
|
100
100
|
@load_errors[[row_idx, col_idx]] = e.message
|
101
101
|
|
102
|
-
string
|
102
|
+
string
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
-
|
107
106
|
# For some reason I can't figure out in a reasonable timeframe,
|
108
107
|
# SAX parsing some workbooks captures separate strings in the same cell
|
109
108
|
# when we encounter UTF-8, although I can't get workbooks made in my
|
@@ -112,7 +111,7 @@ module SimpleXlsxReader
|
|
112
111
|
# to make it not do this (looked, couldn't find it).
|
113
112
|
#
|
114
113
|
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
115
|
-
@captured = @captured ? @captured + captured : captured
|
114
|
+
@captured = @captured ? @captured + (captured || '') : captured
|
116
115
|
end
|
117
116
|
|
118
117
|
def end_element(name)
|
@@ -9,38 +9,39 @@ module SimpleXlsxReader
|
|
9
9
|
|
10
10
|
# Map of non-custom numFmtId to casting symbol
|
11
11
|
NumFmtMap = {
|
12
|
-
0 => :string,
|
13
|
-
1 => :fixnum,
|
14
|
-
2 => :float,
|
15
|
-
3 => :fixnum,
|
16
|
-
4 => :float,
|
17
|
-
5 => :unsupported,
|
18
|
-
6 => :unsupported,
|
19
|
-
7 => :unsupported,
|
20
|
-
8 => :unsupported,
|
21
|
-
9 => :percentage,
|
22
|
-
10 => :percentage,
|
23
|
-
11 => :bignum,
|
24
|
-
12 => :unsupported,
|
25
|
-
13 => :unsupported,
|
26
|
-
14 => :date,
|
27
|
-
15 => :date,
|
28
|
-
16 => :date,
|
29
|
-
17 => :date,
|
30
|
-
18 => :time,
|
31
|
-
19 => :time,
|
32
|
-
20 => :time,
|
33
|
-
21 => :time,
|
34
|
-
22 => :date_time,
|
35
|
-
37 => :unsupported,
|
36
|
-
38 => :unsupported,
|
37
|
-
39 => :unsupported,
|
38
|
-
40 => :unsupported,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
12
|
+
0 => :string, # General
|
13
|
+
1 => :fixnum, # 0
|
14
|
+
2 => :float, # 0.00
|
15
|
+
3 => :fixnum, # #,##0
|
16
|
+
4 => :float, # #,##0.00
|
17
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
18
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
19
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
20
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
21
|
+
9 => :percentage, # 0%
|
22
|
+
10 => :percentage, # 0.00%
|
23
|
+
11 => :bignum, # 0.00E+00
|
24
|
+
12 => :unsupported, # # ?/?
|
25
|
+
13 => :unsupported, # # ??/??
|
26
|
+
14 => :date, # mm-dd-yy
|
27
|
+
15 => :date, # d-mmm-yy
|
28
|
+
16 => :date, # d-mmm
|
29
|
+
17 => :date, # mmm-yy
|
30
|
+
18 => :time, # h:mm AM/PM
|
31
|
+
19 => :time, # h:mm:ss AM/PM
|
32
|
+
20 => :time, # h:mm
|
33
|
+
21 => :time, # h:mm:ss
|
34
|
+
22 => :date_time, # m/d/yy h:mm
|
35
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
36
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
37
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
38
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
39
|
+
44 => :float, # some odd currency format ?from Office 2007?
|
40
|
+
45 => :time, # mm:ss
|
41
|
+
46 => :time, # [h]:mm:ss
|
42
|
+
47 => :time, # mmss.0
|
43
|
+
48 => :bignum, # ##0.0E+0
|
44
|
+
49 => :unsupported # @
|
44
45
|
}.freeze
|
45
46
|
|
46
47
|
def parse
|
@@ -162,7 +162,7 @@ module SimpleXlsxReader
|
|
162
162
|
when :float
|
163
163
|
value.to_f
|
164
164
|
when :percentage
|
165
|
-
value.to_f
|
165
|
+
value.to_f
|
166
166
|
# the trickiest. note that all these formats can vary on
|
167
167
|
# whether they actually contain a date, time, or datetime.
|
168
168
|
when :date, :time, :date_time
|
Binary file
|
@@ -122,6 +122,52 @@ describe SimpleXlsxReader do
|
|
122
122
|
|
123
123
|
let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
|
124
124
|
|
125
|
+
describe 'when parsing escaped characters' do
|
126
|
+
let(:escaped_content) do
|
127
|
+
'<a href="https://www.example.com">Link A</a> &bull; <a href="https://www.example.com">Link B</a>'
|
128
|
+
end
|
129
|
+
|
130
|
+
let(:unescaped_content) do
|
131
|
+
'<a href="https://www.example.com">Link A</a> • <a href="https://www.example.com">Link B</a>'
|
132
|
+
end
|
133
|
+
|
134
|
+
let(:sheet) do
|
135
|
+
<<~XML
|
136
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
137
|
+
<dimension ref="A1:B1" />
|
138
|
+
<sheetData>
|
139
|
+
<row r="1">
|
140
|
+
<c r="A1" s="1" t="s">
|
141
|
+
<v>0</v>
|
142
|
+
</c>
|
143
|
+
<c r='B1' s='0'>
|
144
|
+
<v>#{escaped_content}</v>
|
145
|
+
</c>
|
146
|
+
</row>
|
147
|
+
</sheetData>
|
148
|
+
</worksheet>
|
149
|
+
XML
|
150
|
+
end
|
151
|
+
|
152
|
+
let(:shared_strings) do
|
153
|
+
<<~XML
|
154
|
+
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
|
155
|
+
<si>
|
156
|
+
<t>#{escaped_content}</t>
|
157
|
+
</si>
|
158
|
+
</sst>
|
159
|
+
XML
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'loads correctly using inline strings' do
|
163
|
+
_(reader.sheets[0].rows.slurp[0][0]).must_equal(unescaped_content)
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'loads correctly using shared strings' do
|
167
|
+
_(reader.sheets[0].rows.slurp[0][1]).must_equal(unescaped_content)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
125
171
|
describe 'Sheet#rows#each(headers: true)' do
|
126
172
|
let(:sheet) do
|
127
173
|
<<~XML
|
@@ -929,7 +975,7 @@ describe SimpleXlsxReader do
|
|
929
975
|
)
|
930
976
|
)
|
931
977
|
end
|
932
|
-
|
978
|
+
|
933
979
|
it "reads 'Generic' cells with numbers as numbers" do
|
934
980
|
_(@row[9]).must_equal 1
|
935
981
|
end
|
@@ -1015,4 +1061,21 @@ describe SimpleXlsxReader do
|
|
1015
1061
|
)
|
1016
1062
|
end
|
1017
1063
|
end
|
1064
|
+
|
1065
|
+
describe 'when using percentages & currencies' do
|
1066
|
+
let(:pnc_path) do
|
1067
|
+
# This file provided by a GitHub user having parse errors in these fields
|
1068
|
+
File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
|
1072
|
+
|
1073
|
+
it 'reads percentages as floats of the form 0.XX' do
|
1074
|
+
_(sheet.rows.slurp[1][2]).must_equal(0.87)
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
it 'reads currencies as floats' do
|
1078
|
+
_(sheet.rows.slurp[1][4]).must_equal(300.0)
|
1079
|
+
end
|
1080
|
+
end
|
1018
1081
|
end
|
data/test/test_xlsx_builder.rb
CHANGED
@@ -57,7 +57,6 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
|
|
57
57
|
self.styles ||= DEFAULTS[:styles]
|
58
58
|
self.sheets ||= [DEFAULTS[:sheet]]
|
59
59
|
self.rels ||= []
|
60
|
-
self.shared_strings ||= []
|
61
60
|
end
|
62
61
|
|
63
62
|
def archive
|
@@ -76,7 +75,7 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
|
|
76
75
|
styles_file.write(styles)
|
77
76
|
end
|
78
77
|
|
79
|
-
if shared_strings
|
78
|
+
if shared_strings
|
80
79
|
zip.get_output_stream('xl/sharedStrings.xml') do |ss_file|
|
81
80
|
ss_file.write(shared_strings)
|
82
81
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- test/lower_case_sharedstrings.xlsx
|
116
116
|
- test/lower_case_sharedstrings_test.rb
|
117
117
|
- test/misc_numbers.xlsx
|
118
|
+
- test/percentages_n_currencies.xlsx
|
118
119
|
- test/performance_test.rb
|
119
120
|
- test/sesame_street_blog.xlsx
|
120
121
|
- test/shared_strings.xml
|
@@ -156,6 +157,7 @@ test_files:
|
|
156
157
|
- test/lower_case_sharedstrings.xlsx
|
157
158
|
- test/lower_case_sharedstrings_test.rb
|
158
159
|
- test/misc_numbers.xlsx
|
160
|
+
- test/percentages_n_currencies.xlsx
|
159
161
|
- test/performance_test.rb
|
160
162
|
- test/sesame_street_blog.xlsx
|
161
163
|
- test/shared_strings.xml
|