simple_xlsx_reader 3.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +15 -2
- data/lib/simple_xlsx_reader/loader/style_types_parser.rb +33 -32
- data/lib/simple_xlsx_reader/loader.rb +1 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/chunky_utf8.xlsx +0 -0
- data/test/percentages_n_currencies.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +31 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d60e969d7d2db69578d543b6ebab36d29b3e3e88b4d62d67d00218b09b644cc5
|
4
|
+
data.tar.gz: 36e12c7d95b6319f8f3bb565a1bc1b8eea6d1ca44928b3188a6892bf1f0c6513
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6610958e6cb393e6013d303dd541f80a19d91415f6ebbe1d03162b52580ac361ad3f7e8e9fef5904a1daae72fe0774a5a83f47617c75ac185748c78e2c828e5a
|
7
|
+
data.tar.gz: f556a9d31d48aa7cfeb0a1a9194736f2740ae3a2c868ed6a65fc197411351b28751d9caa72fd2bbbeb7eb22acd9ba0e2d53606f414bd36843f811ccb93d80ed2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
### 4.0.0
|
2
|
+
|
3
|
+
* Fix percentage rounding errors. Previously we were dividing by 100, when we
|
4
|
+
actually don't need to, so percentage types were 100x too small. Fixes #21.
|
5
|
+
Major bump because workarounds might have been implemented for previous
|
6
|
+
incorrect behavior.
|
7
|
+
* Fix small oddity in one currency format where round numbers would be cast
|
8
|
+
to an integer instead of a float.
|
9
|
+
|
10
|
+
### 3.0.1
|
11
|
+
|
12
|
+
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
13
|
+
|
1
14
|
### 3.0.0
|
2
15
|
|
3
16
|
* Change the way we typecast cells in the General format. This probably won't
|
@@ -77,7 +77,7 @@ module SimpleXlsxReader
|
|
77
77
|
|
78
78
|
return unless @capture
|
79
79
|
|
80
|
-
|
80
|
+
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
83
|
string.strip, @type, @style,
|
@@ -102,6 +102,16 @@ module SimpleXlsxReader
|
|
102
102
|
string.strip
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
# For some reason I can't figure out in a reasonable timeframe,
|
107
|
+
# SAX parsing some workbooks captures separate strings in the same cell
|
108
|
+
# when we encounter UTF-8, although I can't get workbooks made in my
|
109
|
+
# own version of excel to repro it. Our fix is just to keep building
|
110
|
+
# the string in this case, although maybe there's a setting in Nokogiri
|
111
|
+
# to make it not do this (looked, couldn't find it).
|
112
|
+
#
|
113
|
+
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
114
|
+
@captured = @captured ? @captured + captured : captured
|
105
115
|
end
|
106
116
|
|
107
117
|
def end_element(name)
|
@@ -134,7 +144,10 @@ module SimpleXlsxReader
|
|
134
144
|
# isn't the most robust strategy, but it likely fits 99% of use cases
|
135
145
|
# considering it's not a problem with actual excel docs.
|
136
146
|
@dimension = "A1:#{@cell_name}" if @dimension.nil?
|
137
|
-
when 'v', 't'
|
147
|
+
when 'v', 't'
|
148
|
+
@current_row[cell_idx] = @captured
|
149
|
+
@capture = false
|
150
|
+
@captured = nil
|
138
151
|
when 'f' then @function = false
|
139
152
|
when 'c' then @url = nil
|
140
153
|
end
|
@@ -9,38 +9,39 @@ module SimpleXlsxReader
|
|
9
9
|
|
10
10
|
# Map of non-custom numFmtId to casting symbol
|
11
11
|
NumFmtMap = {
|
12
|
-
0 => :string,
|
13
|
-
1 => :fixnum,
|
14
|
-
2 => :float,
|
15
|
-
3 => :fixnum,
|
16
|
-
4 => :float,
|
17
|
-
5 => :unsupported,
|
18
|
-
6 => :unsupported,
|
19
|
-
7 => :unsupported,
|
20
|
-
8 => :unsupported,
|
21
|
-
9 => :percentage,
|
22
|
-
10 => :percentage,
|
23
|
-
11 => :bignum,
|
24
|
-
12 => :unsupported,
|
25
|
-
13 => :unsupported,
|
26
|
-
14 => :date,
|
27
|
-
15 => :date,
|
28
|
-
16 => :date,
|
29
|
-
17 => :date,
|
30
|
-
18 => :time,
|
31
|
-
19 => :time,
|
32
|
-
20 => :time,
|
33
|
-
21 => :time,
|
34
|
-
22 => :date_time,
|
35
|
-
37 => :unsupported,
|
36
|
-
38 => :unsupported,
|
37
|
-
39 => :unsupported,
|
38
|
-
40 => :unsupported,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
12
|
+
0 => :string, # General
|
13
|
+
1 => :fixnum, # 0
|
14
|
+
2 => :float, # 0.00
|
15
|
+
3 => :fixnum, # #,##0
|
16
|
+
4 => :float, # #,##0.00
|
17
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
18
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
19
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
20
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
21
|
+
9 => :percentage, # 0%
|
22
|
+
10 => :percentage, # 0.00%
|
23
|
+
11 => :bignum, # 0.00E+00
|
24
|
+
12 => :unsupported, # # ?/?
|
25
|
+
13 => :unsupported, # # ??/??
|
26
|
+
14 => :date, # mm-dd-yy
|
27
|
+
15 => :date, # d-mmm-yy
|
28
|
+
16 => :date, # d-mmm
|
29
|
+
17 => :date, # mmm-yy
|
30
|
+
18 => :time, # h:mm AM/PM
|
31
|
+
19 => :time, # h:mm:ss AM/PM
|
32
|
+
20 => :time, # h:mm
|
33
|
+
21 => :time, # h:mm:ss
|
34
|
+
22 => :date_time, # m/d/yy h:mm
|
35
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
36
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
37
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
38
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
39
|
+
44 => :float, # some odd currency format ?from Office 2007?
|
40
|
+
45 => :time, # mm:ss
|
41
|
+
46 => :time, # [h]:mm:ss
|
42
|
+
47 => :time, # mmss.0
|
43
|
+
48 => :bignum, # ##0.0E+0
|
44
|
+
49 => :unsupported # @
|
44
45
|
}.freeze
|
45
46
|
|
46
47
|
def parse
|
@@ -162,7 +162,7 @@ module SimpleXlsxReader
|
|
162
162
|
when :float
|
163
163
|
value.to_f
|
164
164
|
when :percentage
|
165
|
-
value.to_f
|
165
|
+
value.to_f
|
166
166
|
# the trickiest. note that all these formats can vary on
|
167
167
|
# whether they actually contain a date, time, or datetime.
|
168
168
|
when :date, :time, :date_time
|
Binary file
|
Binary file
|
@@ -1001,4 +1001,35 @@ describe SimpleXlsxReader do
|
|
1001
1001
|
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
1002
|
end
|
1003
1003
|
end
|
1004
|
+
|
1005
|
+
describe 'with mysteriously chunky UTF-8 text' do
|
1006
|
+
let(:chunky_utf8_path) do
|
1007
|
+
File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
|
1011
|
+
|
1012
|
+
it 'reads the whole cell text' do
|
1013
|
+
_(sheet.rows.slurp[1]).must_equal(
|
1014
|
+
["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
describe 'when using percentages & currencies' do
|
1020
|
+
let(:pnc_path) do
|
1021
|
+
# This file provided by a GitHub user having parse errors in these fields
|
1022
|
+
File.join(File.dirname(__FILE__), 'percentages_n_currencies.xlsx')
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
let(:sheet) { SimpleXlsxReader.open(pnc_path).sheets[0] }
|
1026
|
+
|
1027
|
+
it 'reads percentages as floats of the form 0.XX' do
|
1028
|
+
_(sheet.rows.slurp[1][2]).must_equal(0.87)
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
it 'reads currencies as floats' do
|
1032
|
+
_(sheet.rows.slurp[1][4]).must_equal(300.0)
|
1033
|
+
end
|
1034
|
+
end
|
1004
1035
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- lib/simple_xlsx_reader/loader/workbook_parser.rb
|
106
106
|
- lib/simple_xlsx_reader/version.rb
|
107
107
|
- simple_xlsx_reader.gemspec
|
108
|
+
- test/chunky_utf8.xlsx
|
108
109
|
- test/date1904.xlsx
|
109
110
|
- test/date1904_test.rb
|
110
111
|
- test/datetime_test.rb
|
@@ -114,6 +115,7 @@ files:
|
|
114
115
|
- test/lower_case_sharedstrings.xlsx
|
115
116
|
- test/lower_case_sharedstrings_test.rb
|
116
117
|
- test/misc_numbers.xlsx
|
118
|
+
- test/percentages_n_currencies.xlsx
|
117
119
|
- test/performance_test.rb
|
118
120
|
- test/sesame_street_blog.xlsx
|
119
121
|
- test/shared_strings.xml
|
@@ -145,6 +147,7 @@ signing_key:
|
|
145
147
|
specification_version: 4
|
146
148
|
summary: Read xlsx data the Ruby way
|
147
149
|
test_files:
|
150
|
+
- test/chunky_utf8.xlsx
|
148
151
|
- test/date1904.xlsx
|
149
152
|
- test/date1904_test.rb
|
150
153
|
- test/datetime_test.rb
|
@@ -154,6 +157,7 @@ test_files:
|
|
154
157
|
- test/lower_case_sharedstrings.xlsx
|
155
158
|
- test/lower_case_sharedstrings_test.rb
|
156
159
|
- test/misc_numbers.xlsx
|
160
|
+
- test/percentages_n_currencies.xlsx
|
157
161
|
- test/performance_test.rb
|
158
162
|
- test/sesame_street_blog.xlsx
|
159
163
|
- test/shared_strings.xml
|