simple_xlsx_reader 2.0.1 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20c74bef372629ffb807d50df274c90682e17d53eed296a1e05fbb99533d4a8e
4
- data.tar.gz: 9c4311913e79ad139414a4fe064d89fb246a1d46f30b7736c87f049985801660
3
+ metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
+ data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
5
5
  SHA512:
6
- metadata.gz: 18db8595a36d4d9bb0f1dfee5da58753b799a6358530dfb9436f9c7b72e8e06bb9101d86d8dc617669c075fced44db8c24fdca6c5a3b2cb6b908cc0cd645eeb1
7
- data.tar.gz: 23d4b057060c5f66ad3d57d0d65fa6e127ed4c7faa622ec0d236b58dd439179b41dff041ec2dd955c2ec893d577d9660465d2dcea0fe9a9f3af5e911ac5aa8e7
6
+ metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
+ data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ### 3.0.1
2
+
3
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
4
+
5
+ ### 3.0.0
6
+
7
+ * Change the way we typecast cells in the General format. This probably won't
8
+ break anything in your app, but it's a change in behavior that theoretically
9
+ could.
10
+
11
+ Previously, we were treating cells using General the format as strings, when
12
+ according to the Office XML standard, they should be treated as numbers. We
13
+ now attempt to cast such cells as numbers, and fall back to strings if number
14
+ casting fails.
15
+
16
+ Thanks @jrodrigosm
17
+
1
18
  ### 2.0.1
2
19
 
3
20
  * Restore ability to parse IO strings (@robbevp)
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,17 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+
107
+ # For some reason I can't figure out in a reasonable timeframe,
108
+ # SAX parsing some workbooks captures separate strings in the same cell
109
+ # when we encounter UTF-8, although I can't get workbooks made in my
110
+ # own version of excel to repro it. Our fix is just to keep building
111
+ # the string in this case, although maybe there's a setting in Nokogiri
112
+ # to make it not do this (looked, couldn't find it).
113
+ #
114
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
+ @captured = @captured ? @captured + captured : captured
105
116
  end
106
117
 
107
118
  def end_element(name)
@@ -134,7 +145,10 @@ module SimpleXlsxReader
134
145
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
146
  # considering it's not a problem with actual excel docs.
136
147
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
148
+ when 'v', 't'
149
+ @current_row[cell_idx] = @captured
150
+ @capture = false
151
+ @captured = nil
138
152
  when 'f' then @function = false
139
153
  when 'c' then @url = nil
140
154
  end
@@ -149,7 +149,13 @@ module SimpleXlsxReader
149
149
  # detected earlier and cast here by its standardized symbol
150
150
  ##
151
151
 
152
- when :string, :unsupported
152
+ # no type encoded with the the General format defaults to a number type
153
+ when nil, :string
154
+ retval = Integer(value, exception: false)
155
+ retval ||= Float(value, exception: false)
156
+ retval ||= value
157
+ retval
158
+ when :unsupported
153
159
  value
154
160
  when :fixnum
155
161
  value.to_i
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '2.0.1'
4
+ VERSION = '3.0.1'
5
5
  end
Binary file
Binary file
@@ -827,6 +827,10 @@ describe SimpleXlsxReader do
827
827
  <c r='I1' s='0'>
828
828
  <v>GUI-made hyperlink</v>
829
829
  </c>
830
+
831
+ <c r='J1' s='0'>
832
+ <v>1</v>
833
+ </c>
830
834
  </row>
831
835
  </sheetData>
832
836
 
@@ -925,6 +929,10 @@ describe SimpleXlsxReader do
925
929
  )
926
930
  )
927
931
  end
932
+
933
+ it "reads 'Generic' cells with numbers as numbers" do
934
+ _(@row[9]).must_equal 1
935
+ end
928
936
  end
929
937
 
930
938
  describe 'parsing documents with blank rows' do
@@ -936,7 +944,7 @@ describe SimpleXlsxReader do
936
944
  <sheetData>
937
945
  <row r="2" spans="1:1">
938
946
  <c r="A2" s="0">
939
- <v>0</v>
947
+ <v>a</v>
940
948
  </c>
941
949
  </row>
942
950
  <row r="4" spans="1:1">
@@ -967,13 +975,44 @@ describe SimpleXlsxReader do
967
975
  it 'reads row data despite gaps in row numbering' do
968
976
  _(@rows).must_equal [
969
977
  [nil, nil, nil, nil],
970
- ['0', nil, nil, nil],
978
+ ['a', nil, nil, nil],
971
979
  [nil, nil, nil, nil],
972
- [nil, '1', nil, nil],
973
- [nil, nil, '2', nil],
980
+ [nil, 1, nil, nil],
981
+ [nil, nil, 2, nil],
974
982
  [nil, nil, nil, nil],
975
- [nil, nil, nil, '3']
983
+ [nil, nil, nil, 3]
976
984
  ]
977
985
  end
978
986
  end
987
+
988
+ # https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
989
+ describe 'numeric fields styled as "General"' do
990
+ let(:misc_numbers_path) do
991
+ File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
992
+ end
993
+
994
+ let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
995
+
996
+ it 'reads medium sized integers as integers' do
997
+ _(sheet.rows.slurp[1][0]).must_equal 98070
998
+ end
999
+
1000
+ it 'reads large (>12 char) integers as integers' do
1001
+ _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
+ end
1003
+ end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
979
1018
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -113,6 +114,7 @@ files:
113
114
  - test/gdocs_sheet_test.rb
114
115
  - test/lower_case_sharedstrings.xlsx
115
116
  - test/lower_case_sharedstrings_test.rb
117
+ - test/misc_numbers.xlsx
116
118
  - test/performance_test.rb
117
119
  - test/sesame_street_blog.xlsx
118
120
  - test/shared_strings.xml
@@ -144,6 +146,7 @@ signing_key:
144
146
  specification_version: 4
145
147
  summary: Read xlsx data the Ruby way
146
148
  test_files:
149
+ - test/chunky_utf8.xlsx
147
150
  - test/date1904.xlsx
148
151
  - test/date1904_test.rb
149
152
  - test/datetime_test.rb
@@ -152,6 +155,7 @@ test_files:
152
155
  - test/gdocs_sheet_test.rb
153
156
  - test/lower_case_sharedstrings.xlsx
154
157
  - test/lower_case_sharedstrings_test.rb
158
+ - test/misc_numbers.xlsx
155
159
  - test/performance_test.rb
156
160
  - test/sesame_street_blog.xlsx
157
161
  - test/shared_strings.xml