simple_xlsx_reader 2.0.1 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +16 -2
- data/lib/simple_xlsx_reader/loader.rb +7 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/chunky_utf8.xlsx +0 -0
- data/test/misc_numbers.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +44 -5
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
|
4
|
+
data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
|
7
|
+
data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
### 3.0.1
|
2
|
+
|
3
|
+
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
4
|
+
|
5
|
+
### 3.0.0
|
6
|
+
|
7
|
+
* Change the way we typecast cells in the General format. This probably won't
|
8
|
+
break anything in your app, but it's a change in behavior that theoretically
|
9
|
+
could.
|
10
|
+
|
11
|
+
Previously, we were treating cells using General the format as strings, when
|
12
|
+
according to the Office XML standard, they should be treated as numbers. We
|
13
|
+
now attempt to cast such cells as numbers, and fall back to strings if number
|
14
|
+
casting fails.
|
15
|
+
|
16
|
+
Thanks @jrodrigosm
|
17
|
+
|
1
18
|
### 2.0.1
|
2
19
|
|
3
20
|
* Restore ability to parse IO strings (@robbevp)
|
@@ -77,7 +77,7 @@ module SimpleXlsxReader
|
|
77
77
|
|
78
78
|
return unless @capture
|
79
79
|
|
80
|
-
|
80
|
+
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
83
|
string.strip, @type, @style,
|
@@ -102,6 +102,17 @@ module SimpleXlsxReader
|
|
102
102
|
string.strip
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
|
107
|
+
# For some reason I can't figure out in a reasonable timeframe,
|
108
|
+
# SAX parsing some workbooks captures separate strings in the same cell
|
109
|
+
# when we encounter UTF-8, although I can't get workbooks made in my
|
110
|
+
# own version of excel to repro it. Our fix is just to keep building
|
111
|
+
# the string in this case, although maybe there's a setting in Nokogiri
|
112
|
+
# to make it not do this (looked, couldn't find it).
|
113
|
+
#
|
114
|
+
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
115
|
+
@captured = @captured ? @captured + captured : captured
|
105
116
|
end
|
106
117
|
|
107
118
|
def end_element(name)
|
@@ -134,7 +145,10 @@ module SimpleXlsxReader
|
|
134
145
|
# isn't the most robust strategy, but it likely fits 99% of use cases
|
135
146
|
# considering it's not a problem with actual excel docs.
|
136
147
|
@dimension = "A1:#{@cell_name}" if @dimension.nil?
|
137
|
-
when 'v', 't'
|
148
|
+
when 'v', 't'
|
149
|
+
@current_row[cell_idx] = @captured
|
150
|
+
@capture = false
|
151
|
+
@captured = nil
|
138
152
|
when 'f' then @function = false
|
139
153
|
when 'c' then @url = nil
|
140
154
|
end
|
@@ -149,7 +149,13 @@ module SimpleXlsxReader
|
|
149
149
|
# detected earlier and cast here by its standardized symbol
|
150
150
|
##
|
151
151
|
|
152
|
-
|
152
|
+
# no type encoded with the the General format defaults to a number type
|
153
|
+
when nil, :string
|
154
|
+
retval = Integer(value, exception: false)
|
155
|
+
retval ||= Float(value, exception: false)
|
156
|
+
retval ||= value
|
157
|
+
retval
|
158
|
+
when :unsupported
|
153
159
|
value
|
154
160
|
when :fixnum
|
155
161
|
value.to_i
|
Binary file
|
Binary file
|
@@ -827,6 +827,10 @@ describe SimpleXlsxReader do
|
|
827
827
|
<c r='I1' s='0'>
|
828
828
|
<v>GUI-made hyperlink</v>
|
829
829
|
</c>
|
830
|
+
|
831
|
+
<c r='J1' s='0'>
|
832
|
+
<v>1</v>
|
833
|
+
</c>
|
830
834
|
</row>
|
831
835
|
</sheetData>
|
832
836
|
|
@@ -925,6 +929,10 @@ describe SimpleXlsxReader do
|
|
925
929
|
)
|
926
930
|
)
|
927
931
|
end
|
932
|
+
|
933
|
+
it "reads 'Generic' cells with numbers as numbers" do
|
934
|
+
_(@row[9]).must_equal 1
|
935
|
+
end
|
928
936
|
end
|
929
937
|
|
930
938
|
describe 'parsing documents with blank rows' do
|
@@ -936,7 +944,7 @@ describe SimpleXlsxReader do
|
|
936
944
|
<sheetData>
|
937
945
|
<row r="2" spans="1:1">
|
938
946
|
<c r="A2" s="0">
|
939
|
-
<v>
|
947
|
+
<v>a</v>
|
940
948
|
</c>
|
941
949
|
</row>
|
942
950
|
<row r="4" spans="1:1">
|
@@ -967,13 +975,44 @@ describe SimpleXlsxReader do
|
|
967
975
|
it 'reads row data despite gaps in row numbering' do
|
968
976
|
_(@rows).must_equal [
|
969
977
|
[nil, nil, nil, nil],
|
970
|
-
['
|
978
|
+
['a', nil, nil, nil],
|
971
979
|
[nil, nil, nil, nil],
|
972
|
-
[nil,
|
973
|
-
[nil, nil,
|
980
|
+
[nil, 1, nil, nil],
|
981
|
+
[nil, nil, 2, nil],
|
974
982
|
[nil, nil, nil, nil],
|
975
|
-
[nil, nil, nil,
|
983
|
+
[nil, nil, nil, 3]
|
976
984
|
]
|
977
985
|
end
|
978
986
|
end
|
987
|
+
|
988
|
+
# https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
|
989
|
+
describe 'numeric fields styled as "General"' do
|
990
|
+
let(:misc_numbers_path) do
|
991
|
+
File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
|
992
|
+
end
|
993
|
+
|
994
|
+
let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
|
995
|
+
|
996
|
+
it 'reads medium sized integers as integers' do
|
997
|
+
_(sheet.rows.slurp[1][0]).must_equal 98070
|
998
|
+
end
|
999
|
+
|
1000
|
+
it 'reads large (>12 char) integers as integers' do
|
1001
|
+
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
describe 'with mysteriously chunky UTF-8 text' do
|
1006
|
+
let(:chunky_utf8_path) do
|
1007
|
+
File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
|
1011
|
+
|
1012
|
+
it 'reads the whole cell text' do
|
1013
|
+
_(sheet.rows.slurp[1]).must_equal(
|
1014
|
+
["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
979
1018
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- lib/simple_xlsx_reader/loader/workbook_parser.rb
|
106
106
|
- lib/simple_xlsx_reader/version.rb
|
107
107
|
- simple_xlsx_reader.gemspec
|
108
|
+
- test/chunky_utf8.xlsx
|
108
109
|
- test/date1904.xlsx
|
109
110
|
- test/date1904_test.rb
|
110
111
|
- test/datetime_test.rb
|
@@ -113,6 +114,7 @@ files:
|
|
113
114
|
- test/gdocs_sheet_test.rb
|
114
115
|
- test/lower_case_sharedstrings.xlsx
|
115
116
|
- test/lower_case_sharedstrings_test.rb
|
117
|
+
- test/misc_numbers.xlsx
|
116
118
|
- test/performance_test.rb
|
117
119
|
- test/sesame_street_blog.xlsx
|
118
120
|
- test/shared_strings.xml
|
@@ -144,6 +146,7 @@ signing_key:
|
|
144
146
|
specification_version: 4
|
145
147
|
summary: Read xlsx data the Ruby way
|
146
148
|
test_files:
|
149
|
+
- test/chunky_utf8.xlsx
|
147
150
|
- test/date1904.xlsx
|
148
151
|
- test/date1904_test.rb
|
149
152
|
- test/datetime_test.rb
|
@@ -152,6 +155,7 @@ test_files:
|
|
152
155
|
- test/gdocs_sheet_test.rb
|
153
156
|
- test/lower_case_sharedstrings.xlsx
|
154
157
|
- test/lower_case_sharedstrings_test.rb
|
158
|
+
- test/misc_numbers.xlsx
|
155
159
|
- test/performance_test.rb
|
156
160
|
- test/sesame_street_blog.xlsx
|
157
161
|
- test/shared_strings.xml
|