simple_xlsx_reader 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d60e969d7d2db69578d543b6ebab36d29b3e3e88b4d62d67d00218b09b644cc5
4
- data.tar.gz: 36e12c7d95b6319f8f3bb565a1bc1b8eea6d1ca44928b3188a6892bf1f0c6513
3
+ metadata.gz: f5e0de15ab326f027127322eac9d88d752ffd2d55797df23ed525a7eea1d9833
4
+ data.tar.gz: 415972aaf4f77e4bdb5e60b0095cf01f6d7e24a0cd28d493c8ad8d01fa50d66b
5
5
  SHA512:
6
- metadata.gz: 6610958e6cb393e6013d303dd541f80a19d91415f6ebbe1d03162b52580ac361ad3f7e8e9fef5904a1daae72fe0774a5a83f47617c75ac185748c78e2c828e5a
7
- data.tar.gz: f556a9d31d48aa7cfeb0a1a9194736f2740ae3a2c868ed6a65fc197411351b28751d9caa72fd2bbbeb7eb22acd9ba0e2d53606f414bd36843f811ccb93d80ed2
6
+ metadata.gz: cfea3adb62767bedfe6470377b54078066b1bf07e13c065160a31c260261f65a658c34c37dc850c01dd27ba7038a4261f1a6ea9c6ff42771e8113dbabc51897b
7
+ data.tar.gz: a4b996c3d15b2f54a61d8fc90366ba502d6bc108b1bfe5ed34b9d46f63fe7c52381f181421cc31eea46b7c73e6925d3aa3cfeb8e1b737021be93e4863ea9e703
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ### 4.0.1
2
+
3
+ * Fix nil error when handling some inline strings
4
+
5
+ Inline strings are almost exclusively used by non-Excel XLSX
6
+ implementations, but are valid, and sometimes have nil chunks.
7
+
8
+ Also, inline strings weren't preserving whitespace if Nokogiri is
9
+ parsing the string in chunks, as it does when encountering escaped
10
+ characters. Fixed.
11
+
1
12
  ### 4.0.0
2
13
 
3
14
  * Fix percentage rounding errors. Previously we were dividing by 100, when we
@@ -80,7 +80,7 @@ module SimpleXlsxReader
80
80
  captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
- string.strip, @type, @style,
83
+ string, @type, @style,
84
84
  url: @url || hyperlinks_by_cell&.[](@cell_name),
85
85
  shared_strings: shared_strings,
86
86
  base_date: base_date
@@ -99,7 +99,7 @@ module SimpleXlsxReader
99
99
  else
100
100
  @load_errors[[row_idx, col_idx]] = e.message
101
101
 
102
- string.strip
102
+ string
103
103
  end
104
104
  end
105
105
 
@@ -111,7 +111,7 @@ module SimpleXlsxReader
111
111
  # to make it not do this (looked, couldn't find it).
112
112
  #
113
113
  # Loading the workbook test/chunky_utf8.xlsx repros the issue.
114
- @captured = @captured ? @captured + captured : captured
114
+ @captured = @captured ? @captured + (captured || '') : captured
115
115
  end
116
116
 
117
117
  def end_element(name)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '4.0.0'
4
+ VERSION = '4.0.1'
5
5
  end
@@ -122,6 +122,52 @@ describe SimpleXlsxReader do
122
122
 
123
123
  let(:reader) { SimpleXlsxReader.open(xlsx.archive.path) }
124
124
 
125
+ describe 'when parsing escaped characters' do
126
+ let(:escaped_content) do
127
+ '<a href="https://www.example.com">Link A</a> • <a href="https://www.example.com">Link B</a>'
128
+ end
129
+
130
+ let(:unescaped_content) do
131
+ '<a href="https://www.example.com">Link A</a> &bull; <a href="https://www.example.com">Link B</a>'
132
+ end
133
+
134
+ let(:sheet) do
135
+ <<~XML
136
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
137
+ <dimension ref="A1:B1" />
138
+ <sheetData>
139
+ <row r="1">
140
+ <c r="A1" s="1" t="s">
141
+ <v>0</v>
142
+ </c>
143
+ <c r='B1' s='0'>
144
+ <v>#{escaped_content}</v>
145
+ </c>
146
+ </row>
147
+ </sheetData>
148
+ </worksheet>
149
+ XML
150
+ end
151
+
152
+ let(:shared_strings) do
153
+ <<~XML
154
+ <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
155
+ <si>
156
+ <t>#{escaped_content}</t>
157
+ </si>
158
+ </sst>
159
+ XML
160
+ end
161
+
162
+ it 'loads correctly using inline strings' do
163
+ _(reader.sheets[0].rows.slurp[0][0]).must_equal(unescaped_content)
164
+ end
165
+
166
+ it 'loads correctly using shared strings' do
167
+ _(reader.sheets[0].rows.slurp[0][1]).must_equal(unescaped_content)
168
+ end
169
+ end
170
+
125
171
  describe 'Sheet#rows#each(headers: true)' do
126
172
  let(:sheet) do
127
173
  <<~XML
@@ -929,7 +975,7 @@ describe SimpleXlsxReader do
929
975
  )
930
976
  )
931
977
  end
932
-
978
+
933
979
  it "reads 'Generic' cells with numbers as numbers" do
934
980
  _(@row[9]).must_equal 1
935
981
  end
@@ -57,7 +57,6 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
57
57
  self.styles ||= DEFAULTS[:styles]
58
58
  self.sheets ||= [DEFAULTS[:sheet]]
59
59
  self.rels ||= []
60
- self.shared_strings ||= []
61
60
  end
62
61
 
63
62
  def archive
@@ -76,7 +75,7 @@ TestXlsxBuilder = Struct.new(:shared_strings, :styles, :sheets, :workbook, :rels
76
75
  styles_file.write(styles)
77
76
  end
78
77
 
79
- if shared_strings.any?
78
+ if shared_strings
80
79
  zip.get_output_stream('xl/sharedStrings.xml') do |ss_file|
81
80
  ss_file.write(shared_strings)
82
81
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-05 00:00:00.000000000 Z
11
+ date: 2023-03-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri