simple_xlsx_reader 3.0.1 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +3 -4
- data/lib/simple_xlsx_reader/loader/style_types_parser.rb +33 -32
- data/lib/simple_xlsx_reader/loader.rb +1 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/percentages_n_currencies.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +64 -1
- data/test/test_xlsx_builder.rb +1 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5e0de15ab326f027127322eac9d88d752ffd2d55797df23ed525a7eea1d9833
|
4
|
+
data.tar.gz: 415972aaf4f77e4bdb5e60b0095cf01f6d7e24a0cd28d493c8ad8d01fa50d66b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfea3adb62767bedfe6470377b54078066b1bf07e13c065160a31c260261f65a658c34c37dc850c01dd27ba7038a4261f1a6ea9c6ff42771e8113dbabc51897b
|
7
|
+
data.tar.gz: a4b996c3d15b2f54a61d8fc90366ba502d6bc108b1bfe5ed34b9d46f63fe7c52381f181421cc31eea46b7c73e6925d3aa3cfeb8e1b737021be93e4863ea9e703
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
### 4.0.1
|
2
|
+
|
3
|
+
* Fix nil error when handling some inline strings
|
4
|
+
|
5
|
+
Inline strings are almost exclusively used by non-Excel XLSX
|
6
|
+
implementations, but are valid, and sometimes have nil chunks.
|
7
|
+
|
8
|
+
Also, inline strings weren't preserving whitespace if Nokogiri is
|
9
|
+
parsing the string in chunks, as it does when encountering escaped
|
10
|
+
characters. Fixed.
|
11
|
+
|
12
|
+
### 4.0.0
|
13
|
+
|
14
|
+
* Fix percentage rounding errors. Previously we were dividing by 100, when we
|
15
|
+
actually don't need to, so percentage types were 100x too small. Fixes #21.
|
16
|
+
Major bump because workarounds might have been implemented for previous
|
17
|
+
incorrect behavior.
|
18
|
+
* Fix small oddity in one currency format where round numbers would be cast
|
19
|
+
to an integer instead of a float.
|
20
|
+
|
1
21
|
### 3.0.1
|
2
22
|
|
3
23
|
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
@@ -80,7 +80,7 @@ module SimpleXlsxReader
|
|
80
80
|
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
|
-
string
|
83
|
+
string, @type, @style,
|
84
84
|
url: @url || hyperlinks_by_cell&.[](@cell_name),
|
85
85
|
shared_strings: shared_strings,
|
86
86
|
base_date: base_date
|
@@ -99,11 +99,10 @@ module SimpleXlsxReader
|
|
99
99
|
else
|
100
100
|
@load_errors[[row_idx, col_idx]] = e.message
|
101
101
|
|
102
|
-
string
|
102
|
+
string
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
-
|
107
106
|
# For some reason I can't figure out in a reasonable timeframe,
|
108
107
|
# SAX parsing some workbooks captures separate strings in the same cell
|
109
108
|
# when we encounter UTF-8, although I can't get workbooks made in my
|
@@ -112,7 +111,7 @@ module SimpleXlsxReader
|
|
112
111
|
# to make it not do this (looked, couldn't find it).
|
113
112
|
#
|
114
113
|
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
115
|
-
@captured = @captured ? @captured + captured : captured
|
114
|
+
@captured = @captured ? @captured + (captured || '') : captured
|
116
115
|
end
|
117
116
|
|
118
117
|
def end_element(name)
|
@@ -9,38 +9,39 @@ module SimpleXlsxReader
|
|
9
9
|
|
10
10
|
# Map of non-custom numFmtId to casting symbol
|
11
11
|
NumFmtMap = {
|
12
|
-
0 => :string,
|
13
|
-
1 => :fixnum,
|
14
|
-
2 => :float,
|
15
|
-
3 => :fixnum,
|
16
|
-
4 => :float,
|
17
|
-
5 => :unsupported,
|
18
|
-
6 => :unsupported,
|
19
|
-
7 => :unsupported,
|
20
|
-
8 => :unsupported,
|
21
|
-
9 => :percentage,
|
22
|
-
10 => :percentage,
|
23
|
-
11 => :bignum,
|
24
|
-
12 => :unsupported,
|
25
|
-
13 => :unsupported,
|
26
|
-
14 => :date,
|
27
|
-
15 => :date,
|
28
|
-
16 => :date,
|
29
|
-
17 => :date,
|
30
|
-
18 => :time,
|
31
|
-
19 => :time,
|
32
|
-
20 => :time,
|
33
|
-
21 => :time,
|
34
|
-
22 => :date_time,
|
35
|
-
37 => :unsupported,
|
36
|
-
38 => :unsupported,
|
37
|
-
39 => :unsupported,
|
38
|
-
40 => :unsupported,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
12
|
+
0 => :string, # General
|
13
|
+
1 => :fixnum, # 0
|
14
|
+
2 => :float, # 0.00
|
15
|
+
3 => :fixnum, # #,##0
|
16
|
+
4 => :float, # #,##0.00
|
17
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
18
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
19
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
20
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
21
|
+
9 => :percentage, # 0%
|
22
|
+
10 => :percentage, # 0.00%
|
23
|
+
11 => :bignum, # 0.00E+00
|
24
|
+
12 => :unsupported, # # ?/?
|
25
|
+
13 => :unsupported, # # ??/??
|
26
|
+
14 => :date, # mm-dd-yy
|
27
|
+
15 => :date, # d-mmm-yy
|
28
|
+
16 => :date, # d-mmm
|
29
|
+
17 => :date, # mmm-yy
|
30
|
+
18 => :time, # h:mm AM/PM
|
31
|
+
19 => :time, # h:mm:ss AM/PM
|
32
|
+
20 => :time, # h:mm
|
33
|
+
21 => :time, # h:mm:ss
|
34
|
+
22 => :date_time, # m/d/yy h:mm
|
35
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
36
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
37
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
38
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
39
|
+
44 => :float, # some odd currency format ?from Office 2007?
|
40
|
+
45 => :time, # mm:ss
|
41
|
+
46 => :time, # [h]:mm:ss
|
42
|
+
47 => :time, # mmss.0
|
43
|
+
48 => :bignum, # ##0.0E+0
|
44
|
+
49 => :unsupported # @
|
44
45
|
}.freeze
|
45
46
|
|
46
47
|
def parse
|
@@ -162,7 +162,7 @@ module SimpleXlsxReader
|
|
162
162
|
when :float
|
163
163
|
value.to_f
|
164
164
|
when :percentage
|
165
|
-
value.to_f
|
165
|
+
value.to_f
|
166
166
|
# the trickiest. note that all these formats can vary on
|
167
167
|
# whether they actually contain a date, time, or datetime.
|
168
168
|
when :date, :time, :date_time
|
Binary file
|
@@ -122,6 +122,52 @@ describe SimpleXlsxReader do
|
|
122
122
|
|
123
123
|
let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
|
124
124
|
|
125
|
+
describe 'when parsing escaped characters' do
|
126
|
+
let(:escaped_content) do
|
127
|
+
'<a href="https://www.example.com">Link A</a> &bull; <a href="https://www.example.com">Link B</a>'
|
128
|
+
end
|
129
|
+
|
130
|
+
let(:unescaped_content) do
|
131
|
+
'<a href="https://www.example.com">Link A</a> • <a href="https://www.example.com">Link B</a>'
|
132
|
+
end
|
133
|
+
|
134
|
+
let(:sheet) do
|
135
|
+
<<~XML
|
136
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
137
|
+
<dimension ref="A1:B1" />
|
138
|
+
<sheetData>
|
139
|
+
<row r="1">
|
140
|
+
<c r="A1" s="1" t="s">
|
141
|
+
<v>0</v>
|
142
|
+
</c>
|
143
|
+
<c r='B1' s='0'>
|
144
|
+
<v>#{escaped_content}</v>
|
145
|
+
</c>
|
146
|
+
</row>
|
147
|
+
</sheetData>
|
148
|
+
</worksheet>
|
149
|
+
XML
|
150
|
+
end
|
151
|
+
|
152
|
+
let(:shared_strings) do
|
153
|
+
<<~XML
|
154
|
+
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
|
155
|
+
<si>
|
156
|
+
<t>#{escaped_content}</t>
|
157
|
+
</si>
|
158
|
+
</sst>
|
159
|
+
XML
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'loads correctly using inline strings' do
|
163
|
+
_(reader.sheets[0].rows.slurp[0][0]).must_equal(unescaped_content)
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'loads correctly using shared strings' do
|
167
|
+
_(reader.sheets[0].rows.slurp[0][1]).must_equal(unescaped_content)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
125
171
|
describe 'Sheet#rows#each(headers: true)' do
|
126
172
|
let(:sheet) do
|
127
173
|
<<~XML
|
@@ -929,7 +975,7 @@ describe SimpleXlsxReader do
|
|
929
975
|
)
|
930
976
|
)
|
931
977
|
end
|
932
|
-
|
978
|
+
|
933
979
|
it "reads 'Generic' cells with numbers as numbers" do
|
934
980
|
_(@row[9]).must_equal 1
|
935
981
|
end
|
@@ -1015,4 +1061,21 @@ describe SimpleXlsxReader do
|
|
1015
1061
|
)
|
1016
1062
|
end
|
1017
1063
|
end
|
1064
|
+
|
1065
|
+
describe 'when using percentages & currencies' do
|
1066
|
+
let(:pnc_path) do
|
1067
|
+
# This file provided by a GitHub user having parse errors in these fields
|
1068
|
+
File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
|
1072
|
+
|
1073
|
+
it 'reads percentages as floats of the form 0.XX' do
|
1074
|
+
_(sheet.rows.slurp[1][2]).must_equal(0.87)
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
it 'reads currencies as floats' do
|
1078
|
+
_(sheet.rows.slurp[1][4]).must_equal(300.0)
|
1079
|
+
end
|
1080
|
+
end
|
1018
1081
|
end
|
data/test/test_xlsx_builder.rb
CHANGED
@@ -57,7 +57,6 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
|
|
57
57
|
self.styles ||= DEFAULTS[:styles]
|
58
58
|
self.sheets ||= [DEFAULTS[:sheet]]
|
59
59
|
self.rels ||= []
|
60
|
-
self.shared_strings ||= []
|
61
60
|
end
|
62
61
|
|
63
62
|
def archive
|
@@ -76,7 +75,7 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
|
|
76
75
|
styles_file.write(styles)
|
77
76
|
end
|
78
77
|
|
79
|
-
if shared_strings
|
78
|
+
if shared_strings
|
80
79
|
zip.get_output_stream('xl/sharedStrings.xml') do |ss_file|
|
81
80
|
ss_file.write(shared_strings)
|
82
81
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- test/lower_case_sharedstrings.xlsx
|
116
116
|
- test/lower_case_sharedstrings_test.rb
|
117
117
|
- test/misc_numbers.xlsx
|
118
|
+
- test/percentages_n_currencies.xlsx
|
118
119
|
- test/performance_test.rb
|
119
120
|
- test/sesame_street_blog.xlsx
|
120
121
|
- test/shared_strings.xml
|
@@ -156,6 +157,7 @@ test_files:
|
|
156
157
|
- test/lower_case_sharedstrings.xlsx
|
157
158
|
- test/lower_case_sharedstrings_test.rb
|
158
159
|
- test/misc_numbers.xlsx
|
160
|
+
- test/percentages_n_currencies.xlsx
|
159
161
|
- test/performance_test.rb
|
160
162
|
- test/sesame_street_blog.xlsx
|
161
163
|
- test/shared_strings.xml
|