simple_xlsx_reader 3.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
4
- data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
3
+ metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
+ data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
5
5
  SHA512:
6
- metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
7
- data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
6
+ metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
+ data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ### 3.0.1
2
+
3
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
4
+
1
5
  ### 3.0.0
2
6
 
3
7
  * Change the way we typecast cells in the General format. This probably won't
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,17 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+
107
+ # For some reason I can't figure out in a reasonable timeframe,
108
+ # SAX parsing some workbooks captures separate strings in the same cell
109
+ # when we encounter UTF-8, although I can't get workbooks made in my
110
+ # own version of excel to repro it. Our fix is just to keep building
111
+ # the string in this case, although maybe there's a setting in Nokogiri
112
+ # to make it not do this (looked, couldn't find it).
113
+ #
114
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
+ @captured = @captured ? @captured + captured : captured
105
116
  end
106
117
 
107
118
  def end_element(name)
@@ -134,7 +145,10 @@ module SimpleXlsxReader
134
145
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
146
  # considering it's not a problem with actual excel docs.
136
147
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
148
+ when 'v', 't'
149
+ @current_row[cell_idx] = @captured
150
+ @capture = false
151
+ @captured = nil
138
152
  when 'f' then @function = false
139
153
  when 'c' then @url = nil
140
154
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.0'
4
+ VERSION = '3.0.1'
5
5
  end
Binary file
@@ -1001,4 +1001,18 @@ describe SimpleXlsxReader do
1001
1001
  _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
1002
  end
1003
1003
  end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
1004
1018
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -145,6 +146,7 @@ signing_key:
145
146
  specification_version: 4
146
147
  summary: Read xlsx data the Ruby way
147
148
  test_files:
149
+ - test/chunky_utf8.xlsx
148
150
  - test/date1904.xlsx
149
151
  - test/date1904_test.rb
150
152
  - test/datetime_test.rb