simple_xlsx_reader 2.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -5
- data/CHANGELOG.md +18 -0
- data/lib/simple_xlsx_reader/document.rb +6 -4
- data/lib/simple_xlsx_reader/loader.rb +11 -5
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/lib/simple_xlsx_reader.rb +5 -2
- data/test/misc_numbers.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +39 -5
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
|
4
|
+
data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
|
7
|
+
data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
|
data/.github/workflows/ruby.yml
CHANGED
@@ -22,15 +22,12 @@ jobs:
|
|
22
22
|
runs-on: ubuntu-latest
|
23
23
|
strategy:
|
24
24
|
matrix:
|
25
|
-
ruby-version: ['2.6', '2.7', '3.0']
|
25
|
+
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
|
26
26
|
|
27
27
|
steps:
|
28
28
|
- uses: actions/checkout@v3
|
29
29
|
- name: Set up Ruby
|
30
|
-
|
31
|
-
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
32
|
-
# uses: ruby/setup-ruby@v1
|
33
|
-
uses: ruby/setup-ruby@2b019609e2b0f1ea1a2bc8ca11cb82ab46ada124
|
30
|
+
uses: ruby/setup-ruby@v1
|
34
31
|
with:
|
35
32
|
ruby-version: ${{ matrix.ruby-version }}
|
36
33
|
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
### 3.0.0
|
2
|
+
|
3
|
+
* Change the way we typecast cells in the General format. This probably won't
|
4
|
+
break anything in your app, but it's a change in behavior that theoretically
|
5
|
+
could.
|
6
|
+
|
7
|
+
Previously, we were treating cells using General the format as strings, when
|
8
|
+
according to the Office XML standard, they should be treated as numbers. We
|
9
|
+
now attempt to cast such cells as numbers, and fall back to strings if number
|
10
|
+
casting fails.
|
11
|
+
|
12
|
+
Thanks @jrodrigosm
|
13
|
+
|
14
|
+
### 2.0.1
|
15
|
+
|
16
|
+
* Restore ability to parse IO strings (@robbevp)
|
17
|
+
* Add Ruby 3.1 and 3.2 to CI (@taichi-ishitani)
|
18
|
+
|
1
19
|
### 2.0.0
|
2
20
|
|
3
21
|
* SPEED
|
@@ -8,14 +8,16 @@ module SimpleXlsxReader
|
|
8
8
|
# Main class for the public API. See the README for usage examples,
|
9
9
|
# or read the code, it's pretty friendly.
|
10
10
|
class Document
|
11
|
-
attr_reader :
|
11
|
+
attr_reader :string_or_io
|
12
12
|
|
13
|
-
def initialize(file_path)
|
14
|
-
|
13
|
+
def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
|
14
|
+
fail(ArgumentError, 'either file_path or string_or_io must be provided') if legacy_file_path.nil? && file_path.nil? && string_or_io.nil?
|
15
|
+
|
16
|
+
@string_or_io = string_or_io || File.new(legacy_file_path || file_path)
|
15
17
|
end
|
16
18
|
|
17
19
|
def sheets
|
18
|
-
@sheets ||= Loader.new(
|
20
|
+
@sheets ||= Loader.new(string_or_io).init_sheets
|
19
21
|
end
|
20
22
|
|
21
23
|
# Expensive because it slurps all the sheets into memory,
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module SimpleXlsxReader
|
4
|
-
class Loader < Struct.new(:
|
4
|
+
class Loader < Struct.new(:string_or_io)
|
5
5
|
attr_accessor :shared_strings, :sheet_parsers, :sheet_toc, :style_types, :base_date
|
6
6
|
|
7
7
|
def init_sheets
|
8
8
|
ZipReader.new(
|
9
|
-
|
9
|
+
string_or_io: string_or_io,
|
10
10
|
loader: self
|
11
11
|
).read
|
12
12
|
|
@@ -19,12 +19,12 @@ module SimpleXlsxReader
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
ZipReader = Struct.new(:
|
22
|
+
ZipReader = Struct.new(:string_or_io, :loader, keyword_init: true) do
|
23
23
|
attr_reader :zip
|
24
24
|
|
25
25
|
def initialize(*args)
|
26
26
|
super
|
27
|
-
@zip = SimpleXlsxReader::Zip.
|
27
|
+
@zip = SimpleXlsxReader::Zip.open_buffer(string_or_io)
|
28
28
|
end
|
29
29
|
|
30
30
|
def read
|
@@ -149,7 +149,13 @@ module SimpleXlsxReader
|
|
149
149
|
# detected earlier and cast here by its standardized symbol
|
150
150
|
##
|
151
151
|
|
152
|
-
|
152
|
+
# no type encoded with the the General format defaults to a number type
|
153
|
+
when nil, :string
|
154
|
+
retval = Integer(value, exception: false)
|
155
|
+
retval ||= Float(value, exception: false)
|
156
|
+
retval ||= value
|
157
|
+
retval
|
158
|
+
when :unsupported
|
153
159
|
value
|
154
160
|
when :fixnum
|
155
161
|
value.to_i
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -42,8 +42,11 @@ module SimpleXlsxReader
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def open(file_path)
|
45
|
-
Document.new(file_path).tap(&:sheets)
|
45
|
+
Document.new(file_path: file_path).tap(&:sheets)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse(string_or_io)
|
49
|
+
Document.new(string_or_io: string_or_io).tap(&:sheets)
|
46
50
|
end
|
47
|
-
alias parse open
|
48
51
|
end
|
49
52
|
end
|
Binary file
|
@@ -18,6 +18,7 @@ describe SimpleXlsxReader do
|
|
18
18
|
|
19
19
|
let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
|
20
20
|
let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
|
21
|
+
let(:sesame_street_blog_string) { IO.read(sesame_street_blog_file_path) }
|
21
22
|
|
22
23
|
let(:expected_result) do
|
23
24
|
{
|
@@ -54,6 +55,14 @@ describe SimpleXlsxReader do
|
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
58
|
+
describe 'load from string' do
|
59
|
+
let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
|
60
|
+
|
61
|
+
it 'reads an xlsx string into a hash of {[sheet name] => [data]}' do
|
62
|
+
_(subject.to_hash).must_equal(expected_result)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
57
66
|
it 'outputs strings in UTF-8 encoding' do
|
58
67
|
document = SimpleXlsxReader.parse(sesame_street_blog_io)
|
59
68
|
_(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
|
@@ -818,6 +827,10 @@ describe SimpleXlsxReader do
|
|
818
827
|
<c r='I1' s='0'>
|
819
828
|
<v>GUI-made hyperlink</v>
|
820
829
|
</c>
|
830
|
+
|
831
|
+
<c r='J1' s='0'>
|
832
|
+
<v>1</v>
|
833
|
+
</c>
|
821
834
|
</row>
|
822
835
|
</sheetData>
|
823
836
|
|
@@ -916,6 +929,10 @@ describe SimpleXlsxReader do
|
|
916
929
|
)
|
917
930
|
)
|
918
931
|
end
|
932
|
+
|
933
|
+
it "reads 'Generic' cells with numbers as numbers" do
|
934
|
+
_(@row[9]).must_equal 1
|
935
|
+
end
|
919
936
|
end
|
920
937
|
|
921
938
|
describe 'parsing documents with blank rows' do
|
@@ -927,7 +944,7 @@ describe SimpleXlsxReader do
|
|
927
944
|
<sheetData>
|
928
945
|
<row r="2" spans="1:1">
|
929
946
|
<c r="A2" s="0">
|
930
|
-
<v>
|
947
|
+
<v>a</v>
|
931
948
|
</c>
|
932
949
|
</row>
|
933
950
|
<row r="4" spans="1:1">
|
@@ -958,13 +975,30 @@ describe SimpleXlsxReader do
|
|
958
975
|
it 'reads row data despite gaps in row numbering' do
|
959
976
|
_(@rows).must_equal [
|
960
977
|
[nil, nil, nil, nil],
|
961
|
-
['
|
978
|
+
['a', nil, nil, nil],
|
962
979
|
[nil, nil, nil, nil],
|
963
|
-
[nil,
|
964
|
-
[nil, nil,
|
980
|
+
[nil, 1, nil, nil],
|
981
|
+
[nil, nil, 2, nil],
|
965
982
|
[nil, nil, nil, nil],
|
966
|
-
[nil, nil, nil,
|
983
|
+
[nil, nil, nil, 3]
|
967
984
|
]
|
968
985
|
end
|
969
986
|
end
|
987
|
+
|
988
|
+
# https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
|
989
|
+
describe 'numeric fields styled as "General"' do
|
990
|
+
let(:misc_numbers_path) do
|
991
|
+
File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
|
992
|
+
end
|
993
|
+
|
994
|
+
let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
|
995
|
+
|
996
|
+
it 'reads medium sized integers as integers' do
|
997
|
+
_(sheet.rows.slurp[1][0]).must_equal 98070
|
998
|
+
end
|
999
|
+
|
1000
|
+
it 'reads large (>12 char) integers as integers' do
|
1001
|
+
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
|
+
end
|
1003
|
+
end
|
970
1004
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- test/gdocs_sheet_test.rb
|
114
114
|
- test/lower_case_sharedstrings.xlsx
|
115
115
|
- test/lower_case_sharedstrings_test.rb
|
116
|
+
- test/misc_numbers.xlsx
|
116
117
|
- test/performance_test.rb
|
117
118
|
- test/sesame_street_blog.xlsx
|
118
119
|
- test/shared_strings.xml
|
@@ -152,6 +153,7 @@ test_files:
|
|
152
153
|
- test/gdocs_sheet_test.rb
|
153
154
|
- test/lower_case_sharedstrings.xlsx
|
154
155
|
- test/lower_case_sharedstrings_test.rb
|
156
|
+
- test/misc_numbers.xlsx
|
155
157
|
- test/performance_test.rb
|
156
158
|
- test/sesame_street_blog.xlsx
|
157
159
|
- test/shared_strings.xml
|