simple_xlsx_reader 3.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
4
- data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
3
+ metadata.gz: d60e969d7d2db69578d543b6ebab36d29b3e3e88b4d62d67d00218b09b644cc5
4
+ data.tar.gz: 36e12c7d95b6319f8f3bb565a1bc1b8eea6d1ca44928b3188a6892bf1f0c6513
5
5
  SHA512:
6
- metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
7
- data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
6
+ metadata.gz: 6610958e6cb393e6013d303dd541f80a19d91415f6ebbe1d03162b52580ac361ad3f7e8e9fef5904a1daae72fe0774a5a83f47617c75ac185748c78e2c828e5a
7
+ data.tar.gz: f556a9d31d48aa7cfeb0a1a9194736f2740ae3a2c868ed6a65fc197411351b28751d9caa72fd2bbbeb7eb22acd9ba0e2d53606f414bd36843f811ccb93d80ed2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ### 4.0.0
2
+
3
+ * Fix percentage rounding errors. Previously we were dividing by 100, when we
4
+ actually don't need to, so percentage types were 100x too small. Fixes #21.
5
+ Major bump because workarounds might have been implemented for previous
6
+ incorrect behavior.
7
+ * Fix small oddity in one currency format where round numbers would be cast
8
+ to an integer instead of a float.
9
+
10
+ ### 3.0.1
11
+
12
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
13
+
1
14
  ### 3.0.0
2
15
 
3
16
  * Change the way we typecast cells in the General format. This probably won't
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,16 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+ # For some reason I can't figure out in a reasonable timeframe,
107
+ # SAX parsing some workbooks captures separate strings in the same cell
108
+ # when we encounter UTF-8, although I can't get workbooks made in my
109
+ # own version of excel to repro it. Our fix is just to keep building
110
+ # the string in this case, although maybe there's a setting in Nokogiri
111
+ # to make it not do this (looked, couldn't find it).
112
+ #
113
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
114
+ @captured = @captured ? @captured + captured : captured
105
115
  end
106
116
 
107
117
  def end_element(name)
@@ -134,7 +144,10 @@ module SimpleXlsxReader
134
144
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
145
  # considering it's not a problem with actual excel docs.
136
146
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
147
+ when 'v', 't'
148
+ @current_row[cell_idx] = @captured
149
+ @capture = false
150
+ @captured = nil
138
151
  when 'f' then @function = false
139
152
  when 'c' then @url = nil
140
153
  end
@@ -9,38 +9,39 @@ module SimpleXlsxReader
9
9
 
10
10
  # Map of non-custom numFmtId to casting symbol
11
11
  NumFmtMap = {
12
- 0 => :string, # General
13
- 1 => :fixnum, # 0
14
- 2 => :float, # 0.00
15
- 3 => :fixnum, # #,##0
16
- 4 => :float, # #,##0.00
17
- 5 => :unsupported, # $#,##0_);($#,##0)
18
- 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
- 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
- 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
- 9 => :percentage, # 0%
22
- 10 => :percentage, # 0.00%
23
- 11 => :bignum, # 0.00E+00
24
- 12 => :unsupported, # # ?/?
25
- 13 => :unsupported, # # ??/??
26
- 14 => :date, # mm-dd-yy
27
- 15 => :date, # d-mmm-yy
28
- 16 => :date, # d-mmm
29
- 17 => :date, # mmm-yy
30
- 18 => :time, # h:mm AM/PM
31
- 19 => :time, # h:mm:ss AM/PM
32
- 20 => :time, # h:mm
33
- 21 => :time, # h:mm:ss
34
- 22 => :date_time, # m/d/yy h:mm
35
- 37 => :unsupported, # #,##0 ;(#,##0)
36
- 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
- 39 => :unsupported, # #,##0.00;(#,##0.00)
38
- 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
- 45 => :time, # mm:ss
40
- 46 => :time, # [h]:mm:ss
41
- 47 => :time, # mmss.0
42
- 48 => :bignum, # ##0.0E+0
43
- 49 => :unsupported # @
12
+ 0 => :string, # General
13
+ 1 => :fixnum, # 0
14
+ 2 => :float, # 0.00
15
+ 3 => :fixnum, # #,##0
16
+ 4 => :float, # #,##0.00
17
+ 5 => :unsupported, # $#,##0_);($#,##0)
18
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
+ 9 => :percentage, # 0%
22
+ 10 => :percentage, # 0.00%
23
+ 11 => :bignum, # 0.00E+00
24
+ 12 => :unsupported, # # ?/?
25
+ 13 => :unsupported, # # ??/??
26
+ 14 => :date, # mm-dd-yy
27
+ 15 => :date, # d-mmm-yy
28
+ 16 => :date, # d-mmm
29
+ 17 => :date, # mmm-yy
30
+ 18 => :time, # h:mm AM/PM
31
+ 19 => :time, # h:mm:ss AM/PM
32
+ 20 => :time, # h:mm
33
+ 21 => :time, # h:mm:ss
34
+ 22 => :date_time, # m/d/yy h:mm
35
+ 37 => :unsupported, # #,##0 ;(#,##0)
36
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
38
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
+ 44 => :float, # some odd currency format ?from Office 2007?
40
+ 45 => :time, # mm:ss
41
+ 46 => :time, # [h]:mm:ss
42
+ 47 => :time, # mmss.0
43
+ 48 => :bignum, # ##0.0E+0
44
+ 49 => :unsupported # @
44
45
  }.freeze
45
46
 
46
47
  def parse
@@ -162,7 +162,7 @@ module SimpleXlsxReader
162
162
  when :float
163
163
  value.to_f
164
164
  when :percentage
165
- value.to_f / 100
165
+ value.to_f
166
166
  # the trickiest. note that all these formats can vary on
167
167
  # whether they actually contain a date, time, or datetime.
168
168
  when :date, :time, :date_time
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.0'
4
+ VERSION = '4.0.0'
5
5
  end
Binary file
Binary file
@@ -1001,4 +1001,35 @@ describe SimpleXlsxReader do
1001
1001
  _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
1002
  end
1003
1003
  end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
1018
+
1019
+ describe 'when using percentages & currencies' do
1020
+ let(:pnc_path) do
1021
+ # This file provided by a GitHub user having parse errors in these fields
1022
+ File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
1023
+ end
1024
+
1025
+ let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
1026
+
1027
+ it 'reads percentages as floats of the form 0.XX' do
1028
+ _(sheet.rows.slurp[1][2]).must_equal(0.87)
1029
+ end
1030
+
1031
+ it 'reads currencies as floats' do
1032
+ _(sheet.rows.slurp[1][4]).must_equal(300.0)
1033
+ end
1034
+ end
1004
1035
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -114,6 +115,7 @@ files:
114
115
  - test/lower_case_sharedstrings.xlsx
115
116
  - test/lower_case_sharedstrings_test.rb
116
117
  - test/misc_numbers.xlsx
118
+ - test/percentages_n_currencies.xlsx
117
119
  - test/performance_test.rb
118
120
  - test/sesame_street_blog.xlsx
119
121
  - test/shared_strings.xml
@@ -145,6 +147,7 @@ signing_key:
145
147
  specification_version: 4
146
148
  summary: Read xlsx data the Ruby way
147
149
  test_files:
150
+ - test/chunky_utf8.xlsx
148
151
  - test/date1904.xlsx
149
152
  - test/date1904_test.rb
150
153
  - test/datetime_test.rb
@@ -154,6 +157,7 @@ test_files:
154
157
  - test/lower_case_sharedstrings.xlsx
155
158
  - test/lower_case_sharedstrings_test.rb
156
159
  - test/misc_numbers.xlsx
160
+ - test/percentages_n_currencies.xlsx
157
161
  - test/performance_test.rb
158
162
  - test/sesame_street_blog.xlsx
159
163
  - test/shared_strings.xml