simple_xlsx_reader 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -5
- data/CHANGELOG.md +18 -0
- data/lib/simple_xlsx_reader/document.rb +6 -4
- data/lib/simple_xlsx_reader/loader.rb +11 -5
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/lib/simple_xlsx_reader.rb +5 -2
- data/test/misc_numbers.xlsx +0 -0
- data/test/simple_xlsx_reader_test.rb +39 -5
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
|
4
|
+
data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
|
7
|
+
data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
|
data/.github/workflows/ruby.yml
CHANGED
@@ -22,15 +22,12 @@ jobs:
|
|
22
22
|
runs-on: ubuntu-latest
|
23
23
|
strategy:
|
24
24
|
matrix:
|
25
|
-
ruby-version: ['2.6', '2.7', '3.0']
|
25
|
+
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
|
26
26
|
|
27
27
|
steps:
|
28
28
|
- uses: actions/checkout@v3
|
29
29
|
- name: Set up Ruby
|
30
|
-
|
31
|
-
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
32
|
-
# uses: ruby/setup-ruby@v1
|
33
|
-
uses: ruby/setup-ruby@2b019609e2b0f1ea1a2bc8ca11cb82ab46ada124
|
30
|
+
uses: ruby/setup-ruby@v1
|
34
31
|
with:
|
35
32
|
ruby-version: ${{ matrix.ruby-version }}
|
36
33
|
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
### 3.0.0
|
2
|
+
|
3
|
+
* Change the way we typecast cells in the General format. This probably won't
|
4
|
+
break anything in your app, but it's a change in behavior that theoretically
|
5
|
+
could.
|
6
|
+
|
7
|
+
Previously, we were treating cells using General the format as strings, when
|
8
|
+
according to the Office XML standard, they should be treated as numbers. We
|
9
|
+
now attempt to cast such cells as numbers, and fall back to strings if number
|
10
|
+
casting fails.
|
11
|
+
|
12
|
+
Thanks @jrodrigosm
|
13
|
+
|
14
|
+
### 2.0.1
|
15
|
+
|
16
|
+
* Restore ability to parse IO strings (@robbevp)
|
17
|
+
* Add Ruby 3.1 and 3.2 to CI (@taichi-ishitani)
|
18
|
+
|
1
19
|
### 2.0.0
|
2
20
|
|
3
21
|
* SPEED
|
@@ -8,14 +8,16 @@ module SimpleXlsxReader
|
|
8
8
|
# Main class for the public API. See the README for usage examples,
|
9
9
|
# or read the code, it's pretty friendly.
|
10
10
|
class Document
|
11
|
-
attr_reader :
|
11
|
+
attr_reader :string_or_io
|
12
12
|
|
13
|
-
def initialize(file_path)
|
14
|
-
|
13
|
+
def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
|
14
|
+
fail(ArgumentError, 'either file_path or string_or_io must be provided') if legacy_file_path.nil? && file_path.nil? && string_or_io.nil?
|
15
|
+
|
16
|
+
@string_or_io = string_or_io || File.new(legacy_file_path || file_path)
|
15
17
|
end
|
16
18
|
|
17
19
|
def sheets
|
18
|
-
@sheets ||= Loader.new(
|
20
|
+
@sheets ||= Loader.new(string_or_io).init_sheets
|
19
21
|
end
|
20
22
|
|
21
23
|
# Expensive because it slurps all the sheets into memory,
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module SimpleXlsxReader
|
4
|
-
class Loader < Struct.new(:
|
4
|
+
class Loader < Struct.new(:string_or_io)
|
5
5
|
attr_accessor :shared_strings, :sheet_parsers, :sheet_toc, :style_types, :base_date
|
6
6
|
|
7
7
|
def init_sheets
|
8
8
|
ZipReader.new(
|
9
|
-
|
9
|
+
string_or_io: string_or_io,
|
10
10
|
loader: self
|
11
11
|
).read
|
12
12
|
|
@@ -19,12 +19,12 @@ module SimpleXlsxReader
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
ZipReader = Struct.new(:
|
22
|
+
ZipReader = Struct.new(:string_or_io, :loader, keyword_init: true) do
|
23
23
|
attr_reader :zip
|
24
24
|
|
25
25
|
def initialize(*args)
|
26
26
|
super
|
27
|
-
@zip = SimpleXlsxReader::Zip.
|
27
|
+
@zip = SimpleXlsxReader::Zip.open_buffer(string_or_io)
|
28
28
|
end
|
29
29
|
|
30
30
|
def read
|
@@ -149,7 +149,13 @@ module SimpleXlsxReader
|
|
149
149
|
# detected earlier and cast here by its standardized symbol
|
150
150
|
##
|
151
151
|
|
152
|
-
|
152
|
+
# no type encoded with the the General format defaults to a number type
|
153
|
+
when nil, :string
|
154
|
+
retval = Integer(value, exception: false)
|
155
|
+
retval ||= Float(value, exception: false)
|
156
|
+
retval ||= value
|
157
|
+
retval
|
158
|
+
when :unsupported
|
153
159
|
value
|
154
160
|
when :fixnum
|
155
161
|
value.to_i
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -42,8 +42,11 @@ module SimpleXlsxReader
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def open(file_path)
|
45
|
-
Document.new(file_path).tap(&:sheets)
|
45
|
+
Document.new(file_path: file_path).tap(&:sheets)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse(string_or_io)
|
49
|
+
Document.new(string_or_io: string_or_io).tap(&:sheets)
|
46
50
|
end
|
47
|
-
alias parse open
|
48
51
|
end
|
49
52
|
end
|
Binary file
|
@@ -18,6 +18,7 @@ describe SimpleXlsxReader do
|
|
18
18
|
|
19
19
|
let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
|
20
20
|
let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
|
21
|
+
let(:sesame_street_blog_string) { IO.read(sesame_street_blog_file_path) }
|
21
22
|
|
22
23
|
let(:expected_result) do
|
23
24
|
{
|
@@ -54,6 +55,14 @@ describe SimpleXlsxReader do
|
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
58
|
+
describe 'load from string' do
|
59
|
+
let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
|
60
|
+
|
61
|
+
it 'reads an xlsx string into a hash of {[sheet name] => [data]}' do
|
62
|
+
_(subject.to_hash).must_equal(expected_result)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
57
66
|
it 'outputs strings in UTF-8 encoding' do
|
58
67
|
document = SimpleXlsxReader.parse(sesame_street_blog_io)
|
59
68
|
_(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
|
@@ -818,6 +827,10 @@ describe SimpleXlsxReader do
|
|
818
827
|
<c r='I1' s='0'>
|
819
828
|
<v>GUI-made hyperlink</v>
|
820
829
|
</c>
|
830
|
+
|
831
|
+
<c r='J1' s='0'>
|
832
|
+
<v>1</v>
|
833
|
+
</c>
|
821
834
|
</row>
|
822
835
|
</sheetData>
|
823
836
|
|
@@ -916,6 +929,10 @@ describe SimpleXlsxReader do
|
|
916
929
|
)
|
917
930
|
)
|
918
931
|
end
|
932
|
+
|
933
|
+
it "reads 'Generic' cells with numbers as numbers" do
|
934
|
+
_(@row[9]).must_equal 1
|
935
|
+
end
|
919
936
|
end
|
920
937
|
|
921
938
|
describe 'parsing documents with blank rows' do
|
@@ -927,7 +944,7 @@ describe SimpleXlsxReader do
|
|
927
944
|
<sheetData>
|
928
945
|
<row r="2" spans="1:1">
|
929
946
|
<c r="A2" s="0">
|
930
|
-
<v>
|
947
|
+
<v>a</v>
|
931
948
|
</c>
|
932
949
|
</row>
|
933
950
|
<row r="4" spans="1:1">
|
@@ -958,13 +975,30 @@ describe SimpleXlsxReader do
|
|
958
975
|
it 'reads row data despite gaps in row numbering' do
|
959
976
|
_(@rows).must_equal [
|
960
977
|
[nil, nil, nil, nil],
|
961
|
-
['
|
978
|
+
['a', nil, nil, nil],
|
962
979
|
[nil, nil, nil, nil],
|
963
|
-
[nil,
|
964
|
-
[nil, nil,
|
980
|
+
[nil, 1, nil, nil],
|
981
|
+
[nil, nil, 2, nil],
|
965
982
|
[nil, nil, nil, nil],
|
966
|
-
[nil, nil, nil,
|
983
|
+
[nil, nil, nil, 3]
|
967
984
|
]
|
968
985
|
end
|
969
986
|
end
|
987
|
+
|
988
|
+
# https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
|
989
|
+
describe 'numeric fields styled as "General"' do
|
990
|
+
let(:misc_numbers_path) do
|
991
|
+
File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
|
992
|
+
end
|
993
|
+
|
994
|
+
let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
|
995
|
+
|
996
|
+
it 'reads medium sized integers as integers' do
|
997
|
+
_(sheet.rows.slurp[1][0]).must_equal 98070
|
998
|
+
end
|
999
|
+
|
1000
|
+
it 'reads large (>12 char) integers as integers' do
|
1001
|
+
_(sheet.rows.slurp[1][1]).must_equal 1234567890123
|
1002
|
+
end
|
1003
|
+
end
|
970
1004
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- test/gdocs_sheet_test.rb
|
114
114
|
- test/lower_case_sharedstrings.xlsx
|
115
115
|
- test/lower_case_sharedstrings_test.rb
|
116
|
+
- test/misc_numbers.xlsx
|
116
117
|
- test/performance_test.rb
|
117
118
|
- test/sesame_street_blog.xlsx
|
118
119
|
- test/shared_strings.xml
|
@@ -152,6 +153,7 @@ test_files:
|
|
152
153
|
- test/gdocs_sheet_test.rb
|
153
154
|
- test/lower_case_sharedstrings.xlsx
|
154
155
|
- test/lower_case_sharedstrings_test.rb
|
156
|
+
- test/misc_numbers.xlsx
|
155
157
|
- test/performance_test.rb
|
156
158
|
- test/sesame_street_blog.xlsx
|
157
159
|
- test/shared_strings.xml
|