simple_xlsx_reader 3.0.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
4
- data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
3
+ metadata.gz: d60e969d7d2db69578d543b6ebab36d29b3e3e88b4d62d67d00218b09b644cc5
4
+ data.tar.gz: 36e12c7d95b6319f8f3bb565a1bc1b8eea6d1ca44928b3188a6892bf1f0c6513
5
5
  SHA512:
6
- metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
7
- data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
6
+ metadata.gz: 6610958e6cb393e6013d303dd541f80a19d91415f6ebbe1d03162b52580ac361ad3f7e8e9fef5904a1daae72fe0774a5a83f47617c75ac185748c78e2c828e5a
7
+ data.tar.gz: f556a9d31d48aa7cfeb0a1a9194736f2740ae3a2c868ed6a65fc197411351b28751d9caa72fd2bbbeb7eb22acd9ba0e2d53606f414bd36843f811ccb93d80ed2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ### 4.0.0
2
+
3
+ * Fix percentage rounding errors. Previously we were dividing by 100, when we
4
+ actually don't need to, so percentage types were 100x too small. Fixes #21.
5
+ Major bump because workarounds might have been implemented for previous
6
+ incorrect behavior.
7
+ * Fix small oddity in one currency format where round numbers would be cast
8
+ to an integer instead of a float.
9
+
10
+ ### 3.0.1
11
+
12
+ * Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
13
+
1
14
  ### 3.0.0
2
15
 
3
16
  * Change the way we typecast cells in the General format. This probably won't
@@ -77,7 +77,7 @@ module SimpleXlsxReader
77
77
 
78
78
  return unless @capture
79
79
 
80
- @current_row[cell_idx] =
80
+ captured =
81
81
  begin
82
82
  SimpleXlsxReader::Loader.cast(
83
83
  string.strip, @type, @style,
@@ -102,6 +102,16 @@ module SimpleXlsxReader
102
102
  string.strip
103
103
  end
104
104
  end
105
+
106
+ # For some reason I can't figure out in a reasonable timeframe,
107
+ # SAX parsing some workbooks captures separate strings in the same cell
108
+ # when we encounter UTF-8, although I can't get workbooks made in my
109
+ # own version of excel to repro it. Our fix is just to keep building
110
+ # the string in this case, although maybe there's a setting in Nokogiri
111
+ # to make it not do this (looked, couldn't find it).
112
+ #
113
+ # Loading the workbook test/chunky_utf8.xlsx repros the issue.
114
+ @captured = @captured ? @captured + captured : captured
105
115
  end
106
116
 
107
117
  def end_element(name)
@@ -134,7 +144,10 @@ module SimpleXlsxReader
134
144
  # isn't the most robust strategy, but it likely fits 99% of use cases
135
145
  # considering it's not a problem with actual excel docs.
136
146
  @dimension = "A1:#{@cell_name}" if @dimension.nil?
137
- when 'v', 't' then @capture = false
147
+ when 'v', 't'
148
+ @current_row[cell_idx] = @captured
149
+ @capture = false
150
+ @captured = nil
138
151
  when 'f' then @function = false
139
152
  when 'c' then @url = nil
140
153
  end
@@ -9,38 +9,39 @@ module SimpleXlsxReader
9
9
 
10
10
  # Map of non-custom numFmtId to casting symbol
11
11
  NumFmtMap = {
12
- 0 => :string, # General
13
- 1 => :fixnum, # 0
14
- 2 => :float, # 0.00
15
- 3 => :fixnum, # #,##0
16
- 4 => :float, # #,##0.00
17
- 5 => :unsupported, # $#,##0_);($#,##0)
18
- 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
- 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
- 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
- 9 => :percentage, # 0%
22
- 10 => :percentage, # 0.00%
23
- 11 => :bignum, # 0.00E+00
24
- 12 => :unsupported, # # ?/?
25
- 13 => :unsupported, # # ??/??
26
- 14 => :date, # mm-dd-yy
27
- 15 => :date, # d-mmm-yy
28
- 16 => :date, # d-mmm
29
- 17 => :date, # mmm-yy
30
- 18 => :time, # h:mm AM/PM
31
- 19 => :time, # h:mm:ss AM/PM
32
- 20 => :time, # h:mm
33
- 21 => :time, # h:mm:ss
34
- 22 => :date_time, # m/d/yy h:mm
35
- 37 => :unsupported, # #,##0 ;(#,##0)
36
- 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
- 39 => :unsupported, # #,##0.00;(#,##0.00)
38
- 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
- 45 => :time, # mm:ss
40
- 46 => :time, # [h]:mm:ss
41
- 47 => :time, # mmss.0
42
- 48 => :bignum, # ##0.0E+0
43
- 49 => :unsupported # @
12
+ 0 => :string, # General
13
+ 1 => :fixnum, # 0
14
+ 2 => :float, # 0.00
15
+ 3 => :fixnum, # #,##0
16
+ 4 => :float, # #,##0.00
17
+ 5 => :unsupported, # $#,##0_);($#,##0)
18
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
19
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
20
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
21
+ 9 => :percentage, # 0%
22
+ 10 => :percentage, # 0.00%
23
+ 11 => :bignum, # 0.00E+00
24
+ 12 => :unsupported, # # ?/?
25
+ 13 => :unsupported, # # ??/??
26
+ 14 => :date, # mm-dd-yy
27
+ 15 => :date, # d-mmm-yy
28
+ 16 => :date, # d-mmm
29
+ 17 => :date, # mmm-yy
30
+ 18 => :time, # h:mm AM/PM
31
+ 19 => :time, # h:mm:ss AM/PM
32
+ 20 => :time, # h:mm
33
+ 21 => :time, # h:mm:ss
34
+ 22 => :date_time, # m/d/yy h:mm
35
+ 37 => :unsupported, # #,##0 ;(#,##0)
36
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
37
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
38
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
39
+ 44 => :float, # some odd currency format ?from Office 2007?
40
+ 45 => :time, # mm:ss
41
+ 46 => :time, # [h]:mm:ss
42
+ 47 => :time, # mmss.0
43
+ 48 => :bignum, # ##0.0E+0
44
+ 49 => :unsupported # @
44
45
  }.freeze
45
46
 
46
47
  def parse
@@ -162,7 +162,7 @@ module SimpleXlsxReader
162
162
  when :float
163
163
  value.to_f
164
164
  when :percentage
165
- value.to_f / 100
165
+ value.to_f
166
166
  # the trickiest. note that all these formats can vary on
167
167
  # whether they actually contain a date, time, or datetime.
168
168
  when :date, :time, :date_time
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '3.0.0'
4
+ VERSION = '4.0.0'
5
5
  end
Binary file
Binary file
@@ -1001,4 +1001,35 @@ describe SimpleXlsxReader do
1001
1001
  _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
1002
  end
1003
1003
  end
1004
+
1005
+ describe 'with mysteriously chunky UTF-8 text' do
1006
+ let(:chunky_utf8_path) do
1007
+ File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
1008
+ end
1009
+
1010
+ let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
1011
+
1012
+ it 'reads the whole cell text' do
1013
+ _(sheet.rows.slurp[1]).must_equal(
1014
+ ["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
1015
+ )
1016
+ end
1017
+ end
1018
+
1019
+ describe 'when using percentages & currencies' do
1020
+ let(:pnc_path) do
1021
+ # This file provided by a GitHub user having parse errors in these fields
1022
+ File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
1023
+ end
1024
+
1025
+ let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
1026
+
1027
+ it 'reads percentages as floats of the form 0.XX' do
1028
+ _(sheet.rows.slurp[1][2]).must_equal(0.87)
1029
+ end
1030
+
1031
+ it 'reads currencies as floats' do
1032
+ _(sheet.rows.slurp[1][4]).must_equal(300.0)
1033
+ end
1034
+ end
1004
1035
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-01 00:00:00.000000000 Z
11
+ date: 2023-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,6 +105,7 @@ files:
105
105
  - lib/simple_xlsx_reader/loader/workbook_parser.rb
106
106
  - lib/simple_xlsx_reader/version.rb
107
107
  - simple_xlsx_reader.gemspec
108
+ - test/chunky_utf8.xlsx
108
109
  - test/date1904.xlsx
109
110
  - test/date1904_test.rb
110
111
  - test/datetime_test.rb
@@ -114,6 +115,7 @@ files:
114
115
  - test/lower_case_sharedstrings.xlsx
115
116
  - test/lower_case_sharedstrings_test.rb
116
117
  - test/misc_numbers.xlsx
118
+ - test/percentages_n_currencies.xlsx
117
119
  - test/performance_test.rb
118
120
  - test/sesame_street_blog.xlsx
119
121
  - test/shared_strings.xml
@@ -145,6 +147,7 @@ signing_key:
145
147
  specification_version: 4
146
148
  summary: Read xlsx data the Ruby way
147
149
  test_files:
150
+ - test/chunky_utf8.xlsx
148
151
  - test/date1904.xlsx
149
152
  - test/date1904_test.rb
150
153
  - test/datetime_test.rb
@@ -154,6 +157,7 @@ test_files:
154
157
  - test/lower_case_sharedstrings.xlsx
155
158
  - test/lower_case_sharedstrings_test.rb
156
159
  - test/misc_numbers.xlsx
160
+ - test/percentages_n_currencies.xlsx
157
161
  - test/performance_test.rb
158
162
  - test/sesame_street_blog.xlsx
159
163
  - test/shared_strings.xml