simple_xlsx_reader 2.0.1 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +16 -2
- data/lib/simple_xlsx_reader/loader.rb +7 -1
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/chunky_utf8.xlsx +0 -0
- data/test/misc_numbers.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +44 -5
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f51a4ca0ca865cc2a9ddebeb72fa3db9cc3f309ce2d1a3d34a492f09e22789c
|
4
|
+
data.tar.gz: 90a2b1ac9071fcef0797f5839652d919169cfcf6862de8926b7b605dcc53cd7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38f0844bfa6e30cd9af9414057a767cc3bd7cf6ed11023a7306b18686ad3cb250a70191d9b77f8cbc2a590aaa24f822cbec6c546f667fe6e820bb356ddd369f9
|
7
|
+
data.tar.gz: 69af022e15fa95404ab0208be1b4b6661ae14033c73477b98b78f01795712313d63952dce0674ba34ee3aecc19105ef28adfa708de738682583f3b672668a251
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
### 3.0.1
|
2
|
+
|
3
|
+
* Fix parsing "chunky" UTF-8 workbooks. Closes issues #39 and #45. See ce67f0d4.
|
4
|
+
|
5
|
+
### 3.0.0
|
6
|
+
|
7
|
+
* Change the way we typecast cells in the General format. This probably won't
|
8
|
+
break anything in your app, but it's a change in behavior that theoretically
|
9
|
+
could.
|
10
|
+
|
11
|
+
Previously, we were treating cells using General the format as strings, when
|
12
|
+
according to the Office XML standard, they should be treated as numbers. We
|
13
|
+
now attempt to cast such cells as numbers, and fall back to strings if number
|
14
|
+
casting fails.
|
15
|
+
|
16
|
+
Thanks @jrodrigosm
|
17
|
+
|
1
18
|
### 2.0.1
|
2
19
|
|
3
20
|
* Restore ability to parse IO strings (@robbevp)
|
@@ -77,7 +77,7 @@ module SimpleXlsxReader
|
|
77
77
|
|
78
78
|
return unless @capture
|
79
79
|
|
80
|
-
|
80
|
+
captured =
|
81
81
|
begin
|
82
82
|
SimpleXlsxReader::Loader.cast(
|
83
83
|
string.strip, @type, @style,
|
@@ -102,6 +102,17 @@ module SimpleXlsxReader
|
|
102
102
|
string.strip
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
|
107
|
+
# For some reason I can't figure out in a reasonable timeframe,
|
108
|
+
# SAX parsing some workbooks captures separate strings in the same cell
|
109
|
+
# when we encounter UTF-8, although I can't get workbooks made in my
|
110
|
+
# own version of excel to repro it. Our fix is just to keep building
|
111
|
+
# the string in this case, although maybe there's a setting in Nokogiri
|
112
|
+
# to make it not do this (looked, couldn't find it).
|
113
|
+
#
|
114
|
+
# Loading the workbook test/chunky_utf8.xlsx repros the issue.
|
115
|
+
@captured = @captured ? @captured + captured : captured
|
105
116
|
end
|
106
117
|
|
107
118
|
def end_element(name)
|
@@ -134,7 +145,10 @@ module SimpleXlsxReader
|
|
134
145
|
# isn't the most robust strategy, but it likely fits 99% of use cases
|
135
146
|
# considering it's not a problem with actual excel docs.
|
136
147
|
@dimension = "A1:#{@cell_name}" if @dimension.nil?
|
137
|
-
when 'v', 't'
|
148
|
+
when 'v', 't'
|
149
|
+
@current_row[cell_idx] = @captured
|
150
|
+
@capture = false
|
151
|
+
@captured = nil
|
138
152
|
when 'f' then @function = false
|
139
153
|
when 'c' then @url = nil
|
140
154
|
end
|
@@ -149,7 +149,13 @@ module SimpleXlsxReader
|
|
149
149
|
# detected earlier and cast here by its standardized symbol
|
150
150
|
##
|
151
151
|
|
152
|
-
|
152
|
+
# no type encoded with the the General format defaults to a number type
|
153
|
+
when nil, :string
|
154
|
+
retval = Integer(value, exception: false)
|
155
|
+
retval ||= Float(value, exception: false)
|
156
|
+
retval ||= value
|
157
|
+
retval
|
158
|
+
when :unsupported
|
153
159
|
value
|
154
160
|
when :fixnum
|
155
161
|
value.to_i
|
Binary file
|
Binary file
|
@@ -827,6 +827,10 @@ describe SimpleXlsxReader do
|
|
827
827
|
<c r='I1' s='0'>
|
828
828
|
<v>GUI-made hyperlink</v>
|
829
829
|
</c>
|
830
|
+
|
831
|
+
<c r='J1' s='0'>
|
832
|
+
<v>1</v>
|
833
|
+
</c>
|
830
834
|
</row>
|
831
835
|
</sheetData>
|
832
836
|
|
@@ -925,6 +929,10 @@ describe SimpleXlsxReader do
|
|
925
929
|
)
|
926
930
|
)
|
927
931
|
end
|
932
|
+
|
933
|
+
it "reads 'Generic' cells with numbers as numbers" do
|
934
|
+
_(@row[9]).must_equal 1
|
935
|
+
end
|
928
936
|
end
|
929
937
|
|
930
938
|
describe 'parsing documents with blank rows' do
|
@@ -936,7 +944,7 @@ describe SimpleXlsxReader do
|
|
936
944
|
<sheetData>
|
937
945
|
<row r="2" spans="1:1">
|
938
946
|
<c r="A2" s="0">
|
939
|
-
<v>
|
947
|
+
<v>a</v>
|
940
948
|
</c>
|
941
949
|
</row>
|
942
950
|
<row r="4" spans="1:1">
|
@@ -967,13 +975,44 @@ describe SimpleXlsxReader do
|
|
967
975
|
it 'reads row data despite gaps in row numbering' do
|
968
976
|
_(@rows).must_equal [
|
969
977
|
[nil, nil, nil, nil],
|
970
|
-
['
|
978
|
+
['a', nil, nil, nil],
|
971
979
|
[nil, nil, nil, nil],
|
972
|
-
[nil,
|
973
|
-
[nil, nil,
|
980
|
+
[nil, 1, nil, nil],
|
981
|
+
[nil, nil, 2, nil],
|
974
982
|
[nil, nil, nil, nil],
|
975
|
-
[nil, nil, nil,
|
983
|
+
[nil, nil, nil, 3]
|
976
984
|
]
|
977
985
|
end
|
978
986
|
end
|
987
|
+
|
988
|
+
# https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
|
989
|
+
describe 'numeric fields styled as "General"' do
|
990
|
+
let(:misc_numbers_path) do
|
991
|
+
File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
|
992
|
+
end
|
993
|
+
|
994
|
+
let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
|
995
|
+
|
996
|
+
it 'reads medium sized integers as integers' do
|
997
|
+
_(sheet.rows.slurp[1][0]).must_equal 98070
|
998
|
+
end
|
999
|
+
|
1000
|
+
it 'reads large (>12 char) integers as integers' do
|
1001
|
+
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
describe 'with mysteriously chunky UTF-8 text' do
|
1006
|
+
let(:chunky_utf8_path) do
|
1007
|
+
File.join(File.dirname(__FILE__), 'chunky_utf8.xlsx')
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
let(:sheet) { SimpleXlsxReader.open(chunky_utf8_path).sheets[0] }
|
1011
|
+
|
1012
|
+
it 'reads the whole cell text' do
|
1013
|
+
_(sheet.rows.slurp[1]).must_equal(
|
1014
|
+
["sample-company-1", "Korntal-Münchingen", "Bronholmer straße"]
|
1015
|
+
)
|
1016
|
+
end
|
1017
|
+
end
|
979
1018
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-03-
|
11
|
+
date: 2023-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- lib/simple_xlsx_reader/loader/workbook_parser.rb
|
106
106
|
- lib/simple_xlsx_reader/version.rb
|
107
107
|
- simple_xlsx_reader.gemspec
|
108
|
+
- test/chunky_utf8.xlsx
|
108
109
|
- test/date1904.xlsx
|
109
110
|
- test/date1904_test.rb
|
110
111
|
- test/datetime_test.rb
|
@@ -113,6 +114,7 @@ files:
|
|
113
114
|
- test/gdocs_sheet_test.rb
|
114
115
|
- test/lower_case_sharedstrings.xlsx
|
115
116
|
- test/lower_case_sharedstrings_test.rb
|
117
|
+
- test/misc_numbers.xlsx
|
116
118
|
- test/performance_test.rb
|
117
119
|
- test/sesame_street_blog.xlsx
|
118
120
|
- test/shared_strings.xml
|
@@ -144,6 +146,7 @@ signing_key:
|
|
144
146
|
specification_version: 4
|
145
147
|
summary: Read xlsx data the Ruby way
|
146
148
|
test_files:
|
149
|
+
- test/chunky_utf8.xlsx
|
147
150
|
- test/date1904.xlsx
|
148
151
|
- test/date1904_test.rb
|
149
152
|
- test/datetime_test.rb
|
@@ -152,6 +155,7 @@ test_files:
|
|
152
155
|
- test/gdocs_sheet_test.rb
|
153
156
|
- test/lower_case_sharedstrings.xlsx
|
154
157
|
- test/lower_case_sharedstrings_test.rb
|
158
|
+
- test/misc_numbers.xlsx
|
155
159
|
- test/performance_test.rb
|
156
160
|
- test/sesame_street_blog.xlsx
|
157
161
|
- test/shared_strings.xml
|