simple_xlsx_reader 2.0.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20c74bef372629ffb807d50df274c90682e17d53eed296a1e05fbb99533d4a8e
4
- data.tar.gz: 9c4311913e79ad139414a4fe064d89fb246a1d46f30b7736c87f049985801660
3
+ metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
4
+ data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
5
5
  SHA512:
6
- metadata.gz: 18db8595a36d4d9bb0f1dfee5da58753b799a6358530dfb9436f9c7b72e8e06bb9101d86d8dc617669c075fced44db8c24fdca6c5a3b2cb6b908cc0cd645eeb1
7
- data.tar.gz: 23d4b057060c5f66ad3d57d0d65fa6e127ed4c7faa622ec0d236b58dd439179b41dff041ec2dd955c2ec893d577d9660465d2dcea0fe9a9f3af5e911ac5aa8e7
6
+ metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
7
+ data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ### 3.0.1
2
+
3
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
4
+
5
+ ### 3.0.0
6
+
7
+ * Change the way we typecast cells in the General format. This probably won't
8
+ break anything in your app, but it's a change in behavior that theoretically
9
+ could.
10
+
11
+ Previously, we were treating cells using General the format as strings, when
12
+ according to the Office XML standard, they should be treated as numbers. We
13
+ now attempt to cast such cells as numbers, and fall back to strings if number
14
+ casting fails.
15
+
16
+ Thanks @jrodrigosm
17
+
1
18
  ### 2.0.1
2
19
 
3
20
  * Restore ability to parse IO strings (@robbevp)
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,17 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+
107
+ # For some reason I can't figure out in a reasonable timeframe,
108
+ # SAX parsing some workbooks captures separate strings in the same cell
109
+ # when we encounter UTF-8, although I can't get workbooks made in my
110
+ # own version of excel to repro it. Our fix is just to keep building
111
+ # the string in this case, although maybe there's a setting in Nokogiri
112
+ # to make it not do this (looked, couldn't find it).
113
+ #
114
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
115
+ @captured = @captured ? @captured + captured : captured
105
116
  end
106
117
 
107
118
  def end_element(name)
@@ -134,7 +145,10 @@ module SimpleXlsxReader
134
145
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
146
  # considering it's not a problem with actual excel docs.
136
147
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
148
+ when 'v', 't'
149
+ @current_row[cell_idx] = @captured
150
+ @capture = false
151
+ @captured = nil
138
152
  when 'f' then @function = false
139
153
  when 'c' then @url = nil
140
154
  end
@@ -149,7 +149,13 @@ module SimpleXlsxReader
149
149
  # detected earlier and cast here by its standardized symbol
150
150
  ##
151
151
 
152
- when :string, :unsupported
152
+ # no type encoded with the the General format defaults to a number type
153
+ when nil, :string
154
+ retval = Integer(value, exception: false)
155
+ retval ||= Float(value, exception: false)
156
+ retval ||= value
157
+ retval
158
+ when :unsupported
153
159
  value
154
160
  when :fixnum
155
161
  value.to_i
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '2.0.1'
4
+ VERSION = '3.0.1'
5
5
  end
Binary file
Binary file
@@ -827,6 +827,10 @@ describe SimpleXlsxReader do
827
827
  <c r='I1' s='0'>
828
828
  <v>GUI-made hyperlink</v>
829
829
  </c>
830
+
831
+ <c r='J1' s='0'>
832
+ <v>1</v>
833
+ </c>
830
834
  </row>
831
835
  </sheetData>
832
836
 
@@ -925,6 +929,10 @@ describe SimpleXlsxReader do
925
929
  )
926
930
  )
927
931
  end
932
+
933
+ it "reads 'Generic' cells with numbers as numbers" do
934
+ _(@row[9]).must_equal 1
935
+ end
928
936
  end
929
937
 
930
938
  describe 'parsing documents with blank rows' do
@@ -936,7 +944,7 @@ describe SimpleXlsxReader do
936
944
  <sheetData>
937
945
  <row r="2" spans="1:1">
938
946
  <c r="A2" s="0">
939
- <v>0</v>
947
+ <v>a</v>
940
948
  </c>
941
949
  </row>
942
950
  <row r="4" spans="1:1">
@@ -967,13 +975,44 @@ describe SimpleXlsxReader do
967
975
  it 'reads row data despite gaps in row numbering' do
968
976
  _(@rows).must_equal [
969
977
  [nil, nil, nil, nil],
970
- ['0', nil, nil, nil],
978
+ ['a', nil, nil, nil],
971
979
  [nil, nil, nil, nil],
972
- [nil, '1', nil, nil],
973
- [nil, nil, '2', nil],
980
+ [nil, 1, nil, nil],
981
+ [nil, nil, 2, nil],
974
982
  [nil, nil, nil, nil],
975
- [nil, nil, nil, '3']
983
+ [nil, nil, nil, 3]
976
984
  ]
977
985
  end
978
986
  end
987
+
988
+ # https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
989
+ describe 'numeric fields styled as "General"' do
990
+ let(:misc_numbers_path) do
991
+ File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
992
+ end
993
+
994
+ let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
995
+
996
+ it 'reads medium sized integers as integers' do
997
+ _(sheet.rows.slurp[1][0]).must_equal 98070
998
+ end
999
+
1000
+ it 'reads large (>12 char) integers as integers' do
1001
+ _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
+ end
1003
+ end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
979
1018
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -113,6 +114,7 @@ files:
113
114
  - test/gdocs_sheet_test.rb
114
115
  - test/lower_case_sharedstrings.xlsx
115
116
  - test/lower_case_sharedstrings_test.rb
117
+ - test/misc_numbers.xlsx
116
118
  - test/performance_test.rb
117
119
  - test/sesame_street_blog.xlsx
118
120
  - test/shared_strings.xml
@@ -144,6 +146,7 @@ signing_key:
144
146
  specification_version: 4
145
147
  summary: Read xlsx data the Ruby way
146
148
  test_files:
149
+ - test/chunky_utf8.xlsx
147
150
  - test/date1904.xlsx
148
151
  - test/date1904_test.rb
149
152
  - test/datetime_test.rb
@@ -152,6 +155,7 @@ test_files:
152
155
  - test/gdocs_sheet_test.rb
153
156
  - test/lower_case_sharedstrings.xlsx
154
157
  - test/lower_case_sharedstrings_test.rb
158
+ - test/misc_numbers.xlsx
155
159
  - test/performance_test.rb
156
160
  - test/sesame_street_blog.xlsx
157
161
  - test/shared_strings.xml