simple_xlsx_reader 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
4
- data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
3
+ metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
+ data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
5
5
  SHA512:
6
- metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
7
- data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
6
+ metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
+ data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ### 3.0.1
2
+
3
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
4
+
1
5
  ### 3.0.0
2
6
 
3
7
  * Change the way we typecast cells in the General format. This probably won't
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,17 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+
107
+ # For some reason I can't figure out in a reasonable timeframe,
108
+ # SAX parsing some workbooks captures separate strings in the same cell
109
+ # when we encounter UTF-8, although I can't get workbooks made in my
110
+ # own version of excel to repro it. Our fix is just to keep building
111
+ # the string in this case, although maybe there's a setting in Nokogiri
112
+ # to make it not do this (looked, couldn't find it).
113
+ #
114
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
+ @captured = @captured ? @captured + captured : captured
105
116
  end
106
117
 
107
118
  def end_element(name)
@@ -134,7 +145,10 @@ module SimpleXlsxReader
134
145
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
146
  # considering it's not a problem with actual excel docs.
136
147
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
148
+ when 'v', 't'
149
+ @current_row[cell_idx] = @captured
150
+ @capture = false
151
+ @captured = nil
138
152
  when 'f' then @function = false
139
153
  when 'c' then @url = nil
140
154
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.0'
4
+ VERSION = '3.0.1'
5
5
  end
Binary file
@@ -1001,4 +1001,18 @@ describe SimpleXlsxReader do
1001
1001
  _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
1002
  end
1003
1003
  end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
1004
1018
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -145,6 +146,7 @@ signing_key:
145
146
  specification_version: 4
146
147
  summary: Read xlsx data the Ruby way
147
148
  test_files:
149
+ - test/chunky_utf8.xlsx
148
150
  - test/date1904.xlsx
149
151
  - test/date1904_test.rb
150
152
  - test/datetime_test.rb