simple_xlsx_reader 3.0.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +15 -2
- data/lib/simple_xlsx_reader/loader/style_types_parser.rb +33 -32
- data/lib/simple_xlsx_reader/loader.rb +1 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/chunky_utf8.xlsx +0 -0
- data/test/percentages_n_currencies.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +31 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d60e969d7d2db69578d543b6ebab36d29b3e3e88b4d62d67d00218b09b644cc5
|
4
|
+
data.tar.gz: 36e12c7d95b6319f8f3bb565a1bc1b8eea6d1ca44928b3188a6892bf1f0c6513
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6610958e6cb393e6013d303dd541f80a19d91415f6ebbe1d03162b52580ac361ad3f7e8e9fef5904a1daae72fe0774a5a83f47617c75ac185748c78e2c828e5a
|
7
|
+
data.tar.gz: f556a9d31d48aa7cfeb0a1a9194736f2740ae3a2c868ed6a65fc197411351b28751d9caa72fd2bbbeb7eb22acd9ba0e2d53606f414bd36843f811ccb93d80ed2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
### 4.0.0
|
2
|
+
|
3
|
+
* Fix percentage rounding errors. Previously we were dividing by 100, when we
|
4
|
+
actually don't need to, so percentage types were 100x too small. Fixes #21.
|
5
|
+
Major bump because workarounds might have been implemented for previous
|
6
|
+
incorrect behavior.
|
7
|
+
* Fix small oddity in one currency format where round numbers would be cast
|
8
|
+
to an integer instead of a float.
|
9
|
+
|
10
|
+
### 3.0.1
|
11
|
+
|
12
|
+
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
13
|
+
|
1
14
|
### 3.0.0
|
2
15
|
|
3
16
|
* Change the way we typecast cells in the General format. This probably won't
|
@@ -77,7 +77,7 @@ module SimpleXlsxReader
|
|
77
77
|
|
78
78
|
return unless @capture
|
79
79
|
|
80
|
-
|
80
|
+
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
83
|
string.strip, @type, @style,
|
@@ -102,6 +102,16 @@ module SimpleXlsxReader
|
|
102
102
|
string.strip
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
# For some reason I can't figure out in a reasonable timeframe,
|
107
|
+
# SAX parsing some workbooks captures separate strings in the same cell
|
108
|
+
# when we encounter UTF-8, although I can't get workbooks made in my
|
109
|
+
# own version of excel to repro it. Our fix is just to keep building
|
110
|
+
# the string in this case, although maybe there's a setting in Nokogiri
|
111
|
+
# to make it not do this (looked, couldn't find it).
|
112
|
+
#
|
113
|
+
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
114
|
+
@captured = @captured ? @captured + captured : captured
|
105
115
|
end
|
106
116
|
|
107
117
|
def end_element(name)
|
@@ -134,7 +144,10 @@ module SimpleXlsxReader
|
|
134
144
|
# isn't the most robust strategy, but it likely fits 99% of use cases
|
135
145
|
# considering it's not a problem with actual excel docs.
|
136
146
|
@dimension = "A1:#{@cell_name}" if @dimension.nil?
|
137
|
-
when 'v', 't'
|
147
|
+
when 'v', 't'
|
148
|
+
@current_row[cell_idx] = @captured
|
149
|
+
@capture = false
|
150
|
+
@captured = nil
|
138
151
|
when 'f' then @function = false
|
139
152
|
when 'c' then @url = nil
|
140
153
|
end
|
@@ -9,38 +9,39 @@ module SimpleXlsxReader
|
|
9
9
|
|
10
10
|
# Map of non-custom numFmtId to casting symbol
|
11
11
|
NumFmtMap = {
|
12
|
-
0 => :string,
|
13
|
-
1 => :fixnum,
|
14
|
-
2 => :float,
|
15
|
-
3 => :fixnum,
|
16
|
-
4 => :float,
|
17
|
-
5 => :unsupported,
|
18
|
-
6 => :unsupported,
|
19
|
-
7 => :unsupported,
|
20
|
-
8 => :unsupported,
|
21
|
-
9 => :percentage,
|
22
|
-
10 => :percentage,
|
23
|
-
11 => :bignum,
|
24
|
-
12 => :unsupported,
|
25
|
-
13 => :unsupported,
|
26
|
-
14 => :date,
|
27
|
-
15 => :date,
|
28
|
-
16 => :date,
|
29
|
-
17 => :date,
|
30
|
-
18 => :time,
|
31
|
-
19 => :time,
|
32
|
-
20 => :time,
|
33
|
-
21 => :time,
|
34
|
-
22 => :date_time,
|
35
|
-
37 => :unsupported,
|
36
|
-
38 => :unsupported,
|
37
|
-
39 => :unsupported,
|
38
|
-
40 => :unsupported,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
12
|
+
0 => :string, # General
|
13
|
+
1 => :fixnum, # 0
|
14
|
+
2 => :float, # 0.00
|
15
|
+
3 => :fixnum, # #,##0
|
16
|
+
4 => :float, # #,##0.00
|
17
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
18
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
19
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
20
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
21
|
+
9 => :percentage, # 0%
|
22
|
+
10 => :percentage, # 0.00%
|
23
|
+
11 => :bignum, # 0.00E+00
|
24
|
+
12 => :unsupported, # # ?/?
|
25
|
+
13 => :unsupported, # # ??/??
|
26
|
+
14 => :date, # mm-dd-yy
|
27
|
+
15 => :date, # d-mmm-yy
|
28
|
+
16 => :date, # d-mmm
|
29
|
+
17 => :date, # mmm-yy
|
30
|
+
18 => :time, # h:mm AM/PM
|
31
|
+
19 => :time, # h:mm:ss AM/PM
|
32
|
+
20 => :time, # h:mm
|
33
|
+
21 => :time, # h:mm:ss
|
34
|
+
22 => :date_time, # m/d/yy h:mm
|
35
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
36
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
37
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
38
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
39
|
+
44 => :float, # some odd currency format ?from Office 2007?
|
40
|
+
45 => :time, # mm:ss
|
41
|
+
46 => :time, # [h]:mm:ss
|
42
|
+
47 => :time, # mmss.0
|
43
|
+
48 => :bignum, # ##0.0E+0
|
44
|
+
49 => :unsupported # @
|
44
45
|
}.freeze
|
45
46
|
|
46
47
|
def parse
|
@@ -162,7 +162,7 @@ module SimpleXlsxReader
|
|
162
162
|
when :float
|
163
163
|
value.to_f
|
164
164
|
when :percentage
|
165
|
-
value.to_f
|
165
|
+
value.to_f
|
166
166
|
# the trickiest. note that all these formats can vary on
|
167
167
|
# whether they actually contain a date, time, or datetime.
|
168
168
|
when :date, :time, :date_time
|
Binary file
|
Binary file
|
@@ -1001,4 +1001,35 @@ describe SimpleXlsxReader do
|
|
1001
1001
|
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
1002
|
end
|
1003
1003
|
end
|
1004
|
+
|
1005
|
+
describe 'with mysteriously chunky UTF-8 text' do
|
1006
|
+
let(:chunky_utf8_path) do
|
1007
|
+
File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
|
1011
|
+
|
1012
|
+
it 'reads the whole cell text' do
|
1013
|
+
_(sheet.rows.slurp[1]).must_equal(
|
1014
|
+
["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
describe 'when using percentages & currencies' do
|
1020
|
+
let(:pnc_path) do
|
1021
|
+
# This file provided by a GitHub user having parse errors in these fields
|
1022
|
+
File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
|
1026
|
+
|
1027
|
+
it 'reads percentages as floats of the form 0.XX' do
|
1028
|
+
_(sheet.rows.slurp[1][2]).must_equal(0.87)
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
it 'reads currencies as floats' do
|
1032
|
+
_(sheet.rows.slurp[1][4]).must_equal(300.0)
|
1033
|
+
end
|
1034
|
+
end
|
1004
1035
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- lib/simple_xlsx_reader/loader/workbook_parser.rb
|
106
106
|
- lib/simple_xlsx_reader/version.rb
|
107
107
|
- simple_xlsx_reader.gemspec
|
108
|
+
- test/chunky_utf8.xlsx
|
108
109
|
- test/date1904.xlsx
|
109
110
|
- test/date1904_test.rb
|
110
111
|
- test/datetime_test.rb
|
@@ -114,6 +115,7 @@ files:
|
|
114
115
|
- test/lower_case_sharedstrings.xlsx
|
115
116
|
- test/lower_case_sharedstrings_test.rb
|
116
117
|
- test/misc_numbers.xlsx
|
118
|
+
- test/percentages_n_currencies.xlsx
|
117
119
|
- test/performance_test.rb
|
118
120
|
- test/sesame_street_blog.xlsx
|
119
121
|
- test/shared_strings.xml
|
@@ -145,6 +147,7 @@ signing_key:
|
|
145
147
|
specification_version: 4
|
146
148
|
summary: Read xlsx data the Ruby way
|
147
149
|
test_files:
|
150
|
+
- test/chunky_utf8.xlsx
|
148
151
|
- test/date1904.xlsx
|
149
152
|
- test/date1904_test.rb
|
150
153
|
- test/datetime_test.rb
|
@@ -154,6 +157,7 @@ test_files:
|
|
154
157
|
- test/lower_case_sharedstrings.xlsx
|
155
158
|
- test/lower_case_sharedstrings_test.rb
|
156
159
|
- test/misc_numbers.xlsx
|
160
|
+
- test/percentages_n_currencies.xlsx
|
157
161
|
- test/performance_test.rb
|
158
162
|
- test/sesame_street_blog.xlsx
|
159
163
|
- test/shared_strings.xml
|