simple_xlsx_reader 3.0.1 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
- data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
3
+ metadata.gz: f5e0de15ab326f027127322eac9d88d752ffd2d55797df23ed525a7eea1d9833
4
+ data.tar.gz: 415972aaf4f77e4bdb5e60b0095cf01f6d7e24a0cd28d493c8ad8d01fa50d66b
5
5
  SHA512:
6
- metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
- data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
6
+ metadata.gz: cfea3adb62767bedfe6470377b54078066b1bf07e13c065160a31c260261f65a658c34c37dc850c01dd27ba7038a4261f1a6ea9c6ff42771e8113dbabc51897b
7
+ data.tar.gz: a4b996c3d15b2f54a61d8fc90366ba502d6bc108b1bfe5ed34b9d46f63fe7c52381f181421cc31eea46b7c73e6925d3aa3cfeb8e1b737021be93e4863ea9e703
data/CHANGELOG.md CHANGED
@@ -1,3 +1,23 @@
1
+ ### 4.0.1
2
+
3
+ * Fix nil error when handling some inline strings
4
+
5
+ Inline strings are almost exclusively used by non-Excel XLSX
6
+ implementations, but are valid, and sometimes have nil chunks.
7
+
8
+ Also, inline strings weren't preserving whitespace if Nokogiri is
9
+ parsing the string in chunks, as it does when encountering escaped
10
+ characters. Fixed.
11
+
12
+ ### 4.0.0
13
+
14
+ * Fix percentage rounding errors. Previously we were dividing by 100, when we
15
+ actually don't need to, so percentage types were 100x too small. Fixes #21.
16
+ Major bump because workarounds might have been implemented for previous
17
+ incorrect behavior.
18
+ * Fix small oddity in one currency format where round numbers would be cast
19
+ to an integer instead of a float.
20
+
1
21
  ### 3.0.1
2
22
 
3
23
  * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
@@ -80,7 +80,7 @@ module SimpleXlsxReader
80
80
  captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
- string.strip, @type, @style,
83
+ string, @type, @style,
84
84
  url: @url || hyperlinks_by_cell&.[](@cell_name),
85
85
  shared_strings: shared_strings,
86
86
  base_date: base_date
@@ -99,11 +99,10 @@ module SimpleXlsxReader
99
99
  else
100
100
  @load_errors[[row_idx, col_idx]] = e.message
101
101
 
102
- string.strip
102
+ string
103
103
  end
104
104
  end
105
105
 
106
-
107
106
  # For some reason I can't figure out in a reasonable timeframe,
108
107
  # SAX parsing some workbooks captures separate strings in the same cell
109
108
  # when we encounter UTF-8, although I can't get workbooks made in my
@@ -112,7 +111,7 @@ module SimpleXlsxReader
112
111
  # to make it not do this (looked, couldn't find it).
113
112
  #
114
113
  # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
- @captured = @captured ? @captured + captured : captured
114
+ @captured = @captured ? @captured + (captured || '') : captured
116
115
  end
117
116
 
118
117
  def end_element(name)
@@ -9,38 +9,39 @@ module SimpleXlsxReader
9
9
 
10
10
  # Map of non-custom numFmtId to casting symbol
11
11
  NumFmtMap = {
12
- 0 => :string, # General
13
- 1 => :fixnum, # 0
14
- 2 => :float, # 0.00
15
- 3 => :fixnum, # #,##0
16
- 4 => :float, # #,##0.00
17
- 5 => :unsupported, # $#,##0_);($#,##0)
18
- 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
- 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
- 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
- 9 => :percentage, # 0%
22
- 10 => :percentage, # 0.00%
23
- 11 => :bignum, # 0.00E+00
24
- 12 => :unsupported, # # ?/?
25
- 13 => :unsupported, # # ??/??
26
- 14 => :date, # mm-dd-yy
27
- 15 => :date, # d-mmm-yy
28
- 16 => :date, # d-mmm
29
- 17 => :date, # mmm-yy
30
- 18 => :time, # h:mm AM/PM
31
- 19 => :time, # h:mm:ss AM/PM
32
- 20 => :time, # h:mm
33
- 21 => :time, # h:mm:ss
34
- 22 => :date_time, # m/d/yy h:mm
35
- 37 => :unsupported, # #,##0 ;(#,##0)
36
- 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
- 39 => :unsupported, # #,##0.00;(#,##0.00)
38
- 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
- 45 => :time, # mm:ss
40
- 46 => :time, # [h]:mm:ss
41
- 47 => :time, # mmss.0
42
- 48 => :bignum, # ##0.0E+0
43
- 49 => :unsupported # @
12
+ 0 => :string, # General
13
+ 1 => :fixnum, # 0
14
+ 2 => :float, # 0.00
15
+ 3 => :fixnum, # #,##0
16
+ 4 => :float, # #,##0.00
17
+ 5 => :unsupported, # $#,##0_);($#,##0)
18
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
+ 9 => :percentage, # 0%
22
+ 10 => :percentage, # 0.00%
23
+ 11 => :bignum, # 0.00E+00
24
+ 12 => :unsupported, # # ?/?
25
+ 13 => :unsupported, # # ??/??
26
+ 14 => :date, # mm-dd-yy
27
+ 15 => :date, # d-mmm-yy
28
+ 16 => :date, # d-mmm
29
+ 17 => :date, # mmm-yy
30
+ 18 => :time, # h:mm AM/PM
31
+ 19 => :time, # h:mm:ss AM/PM
32
+ 20 => :time, # h:mm
33
+ 21 => :time, # h:mm:ss
34
+ 22 => :date_time, # m/d/yy h:mm
35
+ 37 => :unsupported, # #,##0 ;(#,##0)
36
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
38
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
+ 44 => :float, # some odd currency format ?from Office 2007?
40
+ 45 => :time, # mm:ss
41
+ 46 => :time, # [h]:mm:ss
42
+ 47 => :time, # mmss.0
43
+ 48 => :bignum, # ##0.0E+0
44
+ 49 => :unsupported # @
44
45
  }.freeze
45
46
 
46
47
  def parse
@@ -162,7 +162,7 @@ module SimpleXlsxReader
162
162
  when :float
163
163
  value.to_f
164
164
  when :percentage
165
- value.to_f / 100
165
+ value.to_f
166
166
  # the trickiest. note that all these formats can vary on
167
167
  # whether they actually contain a date, time, or datetime.
168
168
  when :date, :time, :date_time
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.1'
4
+ VERSION = '4.0.1'
5
5
  end
Binary file
@@ -122,6 +122,52 @@ describe SimpleXlsxReader do
122
122
 
123
123
  let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
124
124
 
125
+ describe 'when parsing escaped characters' do
126
+ let(:escaped_content) do
127
+ '<a href="https://www.example.com">Link A</a> • <a href="https://www.example.com">Link B</a>'
128
+ end
129
+
130
+ let(:unescaped_content) do
131
+ '<a href="https://www.example.com">Link A</a> &bull; <a href="https://www.example.com">Link B</a>'
132
+ end
133
+
134
+ let(:sheet) do
135
+ <<~XML
136
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
137
+ <dimension ref="A1:B1" />
138
+ <sheetData>
139
+ <row r="1">
140
+ <c r="A1" s="1" t="s">
141
+ <v>0</v>
142
+ </c>
143
+ <c r='B1' s='0'>
144
+ <v>#{escaped_content}</v>
145
+ </c>
146
+ </row>
147
+ </sheetData>
148
+ </worksheet>
149
+ XML
150
+ end
151
+
152
+ let(:shared_strings) do
153
+ <<~XML
154
+ <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
155
+ <si>
156
+ <t>#{escaped_content}</t>
157
+ </si>
158
+ </sst>
159
+ XML
160
+ end
161
+
162
+ it 'loads correctly using inline strings' do
163
+ _(reader.sheets[0].rows.slurp[0][0]).must_equal(unescaped_content)
164
+ end
165
+
166
+ it 'loads correctly using shared strings' do
167
+ _(reader.sheets[0].rows.slurp[0][1]).must_equal(unescaped_content)
168
+ end
169
+ end
170
+
125
171
  describe 'Sheet#rows#each(headers: true)' do
126
172
  let(:sheet) do
127
173
  <<~XML
@@ -929,7 +975,7 @@ describe SimpleXlsxReader do
929
975
  )
930
976
  )
931
977
  end
932
-
978
+
933
979
  it "reads 'Generic' cells with numbers as numbers" do
934
980
  _(@row[9]).must_equal 1
935
981
  end
@@ -1015,4 +1061,21 @@ describe SimpleXlsxReader do
1015
1061
  )
1016
1062
  end
1017
1063
  end
1064
+
1065
+ describe 'when using percentages & currencies' do
1066
+ let(:pnc_path) do
1067
+ # This file provided by a GitHub user having parse errors in these fields
1068
+ File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
1069
+ end
1070
+
1071
+ let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
1072
+
1073
+ it 'reads percentages as floats of the form 0.XX' do
1074
+ _(sheet.rows.slurp[1][2]).must_equal(0.87)
1075
+ end
1076
+
1077
+ it 'reads currencies as floats' do
1078
+ _(sheet.rows.slurp[1][4]).must_equal(300.0)
1079
+ end
1080
+ end
1018
1081
  end
@@ -57,7 +57,6 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
57
57
  self.styles ||= DEFAULTS[:styles]
58
58
  self.sheets ||= [DEFAULTS[:sheet]]
59
59
  self.rels ||= []
60
- self.shared_strings ||= []
61
60
  end
62
61
 
63
62
  def archive
@@ -76,7 +75,7 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
76
75
  styles_file.write(styles)
77
76
  end
78
77
 
79
- if shared_strings.any?
78
+ if shared_strings
80
79
  zip.get_output_stream('xl/sharedStrings.xml') do |ss_file|
81
80
  ss_file.write(shared_strings)
82
81
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-05 00:00:00.000000000 Z
11
+ date: 2023-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -115,6 +115,7 @@ files:
115
115
  - test/lower_case_sharedstrings.xlsx
116
116
  - test/lower_case_sharedstrings_test.rb
117
117
  - test/misc_numbers.xlsx
118
+ - test/percentages_n_currencies.xlsx
118
119
  - test/performance_test.rb
119
120
  - test/sesame_street_blog.xlsx
120
121
  - test/shared_strings.xml
@@ -156,6 +157,7 @@ test_files:
156
157
  - test/lower_case_sharedstrings.xlsx
157
158
  - test/lower_case_sharedstrings_test.rb
158
159
  - test/misc_numbers.xlsx
160
+ - test/percentages_n_currencies.xlsx
159
161
  - test/performance_test.rb
160
162
  - test/sesame_street_blog.xlsx
161
163
  - test/shared_strings.xml