simple_xlsx_reader 3.0.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
- data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
3
+ metadata.gz: f5e0de15ab326f027127322eac9d88d752ffd2d55797df23ed525a7eea1d9833
4
+ data.tar.gz: 415972aaf4f77e4bdb5e60b0095cf01f6d7e24a0cd28d493c8ad8d01fa50d66b
5
5
  SHA512:
6
- metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
- data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
6
+ metadata.gz: cfea3adb62767bedfe6470377b54078066b1bf07e13c065160a31c260261f65a658c34c37dc850c01dd27ba7038a4261f1a6ea9c6ff42771e8113dbabc51897b
7
+ data.tar.gz: a4b996c3d15b2f54a61d8fc90366ba502d6bc108b1bfe5ed34b9d46f63fe7c52381f181421cc31eea46b7c73e6925d3aa3cfeb8e1b737021be93e4863ea9e703
data/CHANGELOG.md CHANGED
@@ -1,3 +1,23 @@
1
+ ### 4.0.1
2
+
3
+ * Fix nil error when handling some inline strings
4
+
5
+ Inline strings are almost exclusively used by non-Excel XLSX
6
+ implementations, but are valid, and sometimes have nil chunks.
7
+
8
+ Also, inline strings weren't preserving whitespace if Nokogiri is
9
+ parsing the string in chunks, as it does when encountering escaped
10
+ characters. Fixed.
11
+
12
+ ### 4.0.0
13
+
14
+ * Fix percentage rounding errors. Previously we were dividing by 100, when we
15
+ actually don't need to, so percentage types were 100x too small. Fixes #21.
16
+ Major bump because workarounds might have been implemented for previous
17
+ incorrect behavior.
18
+ * Fix small oddity in one currency format where round numbers would be cast
19
+ to an integer instead of a float.
20
+
1
21
  ### 3.0.1
2
22
 
3
23
  * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
@@ -80,7 +80,7 @@ module SimpleXlsxReader
80
80
  captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
- string.strip, @type, @style,
83
+ string, @type, @style,
84
84
  url: @url || hyperlinks_by_cell&.[](@cell_name),
85
85
  shared_strings: shared_strings,
86
86
  base_date: base_date
@@ -99,11 +99,10 @@ module SimpleXlsxReader
99
99
  else
100
100
  @load_errors[[row_idx, col_idx]] = e.message
101
101
 
102
- string.strip
102
+ string
103
103
  end
104
104
  end
105
105
 
106
-
107
106
  # For some reason I can't figure out in a reasonable timeframe,
108
107
  # SAX parsing some workbooks captures separate strings in the same cell
109
108
  # when we encounter UTF-8, although I can't get workbooks made in my
@@ -112,7 +111,7 @@ module SimpleXlsxReader
112
111
  # to make it not do this (looked, couldn't find it).
113
112
  #
114
113
  # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
- @captured = @captured ? @captured + captured : captured
114
+ @captured = @captured ? @captured + (captured || '') : captured
116
115
  end
117
116
 
118
117
  def end_element(name)
@@ -9,38 +9,39 @@ module SimpleXlsxReader
9
9
 
10
10
  # Map of non-custom numFmtId to casting symbol
11
11
  NumFmtMap = {
12
- 0 => :string, # General
13
- 1 => :fixnum, # 0
14
- 2 => :float, # 0.00
15
- 3 => :fixnum, # #,##0
16
- 4 => :float, # #,##0.00
17
- 5 => :unsupported, # $#,##0_);($#,##0)
18
- 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
- 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
- 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
- 9 => :percentage, # 0%
22
- 10 => :percentage, # 0.00%
23
- 11 => :bignum, # 0.00E+00
24
- 12 => :unsupported, # # ?/?
25
- 13 => :unsupported, # # ??/??
26
- 14 => :date, # mm-dd-yy
27
- 15 => :date, # d-mmm-yy
28
- 16 => :date, # d-mmm
29
- 17 => :date, # mmm-yy
30
- 18 => :time, # h:mm AM/PM
31
- 19 => :time, # h:mm:ss AM/PM
32
- 20 => :time, # h:mm
33
- 21 => :time, # h:mm:ss
34
- 22 => :date_time, # m/d/yy h:mm
35
- 37 => :unsupported, # #,##0 ;(#,##0)
36
- 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
- 39 => :unsupported, # #,##0.00;(#,##0.00)
38
- 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
- 45 => :time, # mm:ss
40
- 46 => :time, # [h]:mm:ss
41
- 47 => :time, # mmss.0
42
- 48 => :bignum, # ##0.0E+0
43
- 49 => :unsupported # @
12
+ 0 => :string, # General
13
+ 1 => :fixnum, # 0
14
+ 2 => :float, # 0.00
15
+ 3 => :fixnum, # #,##0
16
+ 4 => :float, # #,##0.00
17
+ 5 => :unsupported, # $#,##0_);($#,##0)
18
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
+ 9 => :percentage, # 0%
22
+ 10 => :percentage, # 0.00%
23
+ 11 => :bignum, # 0.00E+00
24
+ 12 => :unsupported, # # ?/?
25
+ 13 => :unsupported, # # ??/??
26
+ 14 => :date, # mm-dd-yy
27
+ 15 => :date, # d-mmm-yy
28
+ 16 => :date, # d-mmm
29
+ 17 => :date, # mmm-yy
30
+ 18 => :time, # h:mm AM/PM
31
+ 19 => :time, # h:mm:ss AM/PM
32
+ 20 => :time, # h:mm
33
+ 21 => :time, # h:mm:ss
34
+ 22 => :date_time, # m/d/yy h:mm
35
+ 37 => :unsupported, # #,##0 ;(#,##0)
36
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
38
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
+ 44 => :float, # some odd currency format ?from Office 2007?
40
+ 45 => :time, # mm:ss
41
+ 46 => :time, # [h]:mm:ss
42
+ 47 => :time, # mmss.0
43
+ 48 => :bignum, # ##0.0E+0
44
+ 49 => :unsupported # @
44
45
  }.freeze
45
46
 
46
47
  def parse
@@ -162,7 +162,7 @@ module SimpleXlsxReader
162
162
  when :float
163
163
  value.to_f
164
164
  when :percentage
165
- value.to_f / 100
165
+ value.to_f
166
166
  # the trickiest. note that all these formats can vary on
167
167
  # whether they actually contain a date, time, or datetime.
168
168
  when :date, :time, :date_time
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.1'
4
+ VERSION = '4.0.1'
5
5
  end
Binary file
@@ -122,6 +122,52 @@ describe SimpleXlsxReader do
122
122
 
123
123
  let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
124
124
 
125
+ describe 'when parsing escaped characters' do
126
+ let(:escaped_content) do
127
+ '<a href="https://www.example.com">Link A</a> • <a href="https://www.example.com">Link B</a>'
128
+ end
129
+
130
+ let(:unescaped_content) do
131
+ '<a href="https://www.example.com">Link A</a> &bull; <a href="https://www.example.com">Link B</a>'
132
+ end
133
+
134
+ let(:sheet) do
135
+ <<~XML
136
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
137
+ <dimension ref="A1:B1" />
138
+ <sheetData>
139
+ <row r="1">
140
+ <c r="A1" s="1" t="s">
141
+ <v>0</v>
142
+ </c>
143
+ <c r='B1' s='0'>
144
+ <v>#{escaped_content}</v>
145
+ </c>
146
+ </row>
147
+ </sheetData>
148
+ </worksheet>
149
+ XML
150
+ end
151
+
152
+ let(:shared_strings) do
153
+ <<~XML
154
+ <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
155
+ <si>
156
+ <t>#{escaped_content}</t>
157
+ </si>
158
+ </sst>
159
+ XML
160
+ end
161
+
162
+ it 'loads correctly using inline strings' do
163
+ _(reader.sheets[0].rows.slurp[0][0]).must_equal(unescaped_content)
164
+ end
165
+
166
+ it 'loads correctly using shared strings' do
167
+ _(reader.sheets[0].rows.slurp[0][1]).must_equal(unescaped_content)
168
+ end
169
+ end
170
+
125
171
  describe 'Sheet#rows#each(headers: true)' do
126
172
  let(:sheet) do
127
173
  <<~XML
@@ -929,7 +975,7 @@ describe SimpleXlsxReader do
929
975
  )
930
976
  )
931
977
  end
932
-
978
+
933
979
  it "reads 'Generic' cells with numbers as numbers" do
934
980
  _(@row[9]).must_equal 1
935
981
  end
@@ -1015,4 +1061,21 @@ describe SimpleXlsxReader do
1015
1061
  )
1016
1062
  end
1017
1063
  end
1064
+
1065
+ describe 'when using percentages & currencies' do
1066
+ let(:pnc_path) do
1067
+ # This file provided by a GitHub user having parse errors in these fields
1068
+ File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
1069
+ end
1070
+
1071
+ let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
1072
+
1073
+ it 'reads percentages as floats of the form 0.XX' do
1074
+ _(sheet.rows.slurp[1][2]).must_equal(0.87)
1075
+ end
1076
+
1077
+ it 'reads currencies as floats' do
1078
+ _(sheet.rows.slurp[1][4]).must_equal(300.0)
1079
+ end
1080
+ end
1018
1081
  end
@@ -57,7 +57,6 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
57
57
  self.styles ||= DEFAULTS[:styles]
58
58
  self.sheets ||= [DEFAULTS[:sheet]]
59
59
  self.rels ||= []
60
- self.shared_strings ||= []
61
60
  end
62
61
 
63
62
  def archive
@@ -76,7 +75,7 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
76
75
  styles_file.write(styles)
77
76
  end
78
77
 
79
- if shared_strings.any?
78
+ if shared_strings
80
79
  zip.get_output_stream('xl/sharedStrings.xml') do |ss_file|
81
80
  ss_file.write(shared_strings)
82
81
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-05 00:00:00.000000000 Z
11
+ date: 2023-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -115,6 +115,7 @@ files:
115
115
  - test/lower_case_sharedstrings.xlsx
116
116
  - test/lower_case_sharedstrings_test.rb
117
117
  - test/misc_numbers.xlsx
118
+ - test/percentages_n_currencies.xlsx
118
119
  - test/performance_test.rb
119
120
  - test/sesame_street_blog.xlsx
120
121
  - test/shared_strings.xml
@@ -156,6 +157,7 @@ test_files:
156
157
  - test/lower_case_sharedstrings.xlsx
157
158
  - test/lower_case_sharedstrings_test.rb
158
159
  - test/misc_numbers.xlsx
160
+ - test/percentages_n_currencies.xlsx
159
161
  - test/performance_test.rb
160
162
  - test/sesame_street_blog.xlsx
161
163
  - test/shared_strings.xml