simple_xlsx_reader 5.0.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ruby.yml +2 -2
- data/CHANGELOG.md +9 -0
- data/README.md +2 -2
- data/lib/simple_xlsx_reader/hyperlink.rb +2 -2
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +30 -13
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/test/namespaces_and_missing_atts_test.rb +63 -0
- data/test/simple_xlsx_reader_test.rb +6 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f779f6e272fd4b79b2da180f31eb71b40e51b25d96f50fa72180908472c56750
|
4
|
+
data.tar.gz: 6bd6d85c61aaede6aebbe24f88b8fcc8a4018345581a3793e6e3a89a7bc285ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63b52b5835fe344b7a8116c6217c6df2f1ec9e68b2876212dec1b08a22dc33549f81905079d7ccea8f2612455e70ab1719aad82229d4fbc00d6da92e4c1ca7a1
|
7
|
+
data.tar.gz: e9e615a600b5bd19c7510fc1b0cbd42e2a2a5494ca9da63be9de813b1bedff73045faa3d0b2f2c8c7b6a7eefecc1219fbbcf6a9f79971b30ba2f8e8c7417cdd8
|
data/.github/workflows/ruby.yml
CHANGED
@@ -22,10 +22,10 @@ jobs:
|
|
22
22
|
runs-on: ubuntu-latest
|
23
23
|
strategy:
|
24
24
|
matrix:
|
25
|
-
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
|
25
|
+
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2', '3.3']
|
26
26
|
|
27
27
|
steps:
|
28
|
-
- uses: actions/checkout@
|
28
|
+
- uses: actions/checkout@v4
|
29
29
|
- name: Set up Ruby
|
30
30
|
uses: ruby/setup-ruby@v1
|
31
31
|
with:
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 5.1.0
|
2
|
+
|
3
|
+
* Parse sheets containing namespaces and no 'r' att (@skipchris)
|
4
|
+
* Fix Zlib error when loading from string (@myabc)
|
5
|
+
* Prevent a SimpleXlsxReader::CellLoadError (no implicit conversion of Integer
|
6
|
+
into String) when the casted value (friendly name) is not a string (@tsdbrown)
|
7
|
+
* Accidental 25% perfarmance improvement while experimenting with namespace
|
8
|
+
support (see #53f5a9).
|
9
|
+
|
1
10
|
### 5.0.0
|
2
11
|
|
3
12
|
* Change SimpleXlsxReader::Hyperlink to default to the visible cell value
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ then forgotten. We just want to get the data, and get out!
|
|
13
13
|
doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
|
14
14
|
doc.sheets # => [<#SXR::Sheet>, ...]
|
15
15
|
doc.sheets.first.name # 'Sheet1'
|
16
|
-
rows = doc.
|
16
|
+
rows = doc.sheets.first.rows # <SXR::Document::RowsProxy>
|
17
17
|
rows.each # an <Enumerator> ready to chain or stream
|
18
18
|
rows.each {} # Streams the rows to your block
|
19
19
|
rows.each(headers: true) {} # Streams row-hashes
|
@@ -44,7 +44,7 @@ SimpleXlsxReader strives to be fairly idiomatic Ruby:
|
|
44
44
|
|
45
45
|
```ruby
|
46
46
|
# quick example having fun w/ ruby
|
47
|
-
doc = SimpleXlsxReader.open(
|
47
|
+
doc = SimpleXlsxReader.open(file_path) # or SimpleXlsxReader.parse(string_or_io)
|
48
48
|
doc.sheets.first.rows.each(headers: {id: /ID/})
|
49
49
|
.with_index.with_object({}) do |(row, index), acc|
|
50
50
|
acc[row[:id]] = index
|
@@ -21,9 +21,9 @@ module SimpleXlsxReader
|
|
21
21
|
attr_reader :url
|
22
22
|
|
23
23
|
def initialize(url, friendly_name = nil)
|
24
|
-
@friendly_name = friendly_name
|
25
24
|
@url = url
|
26
|
-
|
25
|
+
@friendly_name = friendly_name&.to_s
|
26
|
+
super(@friendly_name || @url)
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
@@ -33,6 +33,9 @@ module SimpleXlsxReader
|
|
33
33
|
@capture = nil # silence warnings
|
34
34
|
@captured = nil # silence warnings
|
35
35
|
@dimension = nil # silence warnings
|
36
|
+
@column_index = 0
|
37
|
+
|
38
|
+
@file_io.rewind # if it's IO from IO.read, we need to rewind it
|
36
39
|
|
37
40
|
# In this project this is only used for GUI-made hyperlinks (as opposed
|
38
41
|
# to FUNCTION-based hyperlinks). Unfortunately the're needed to parse
|
@@ -40,30 +43,34 @@ module SimpleXlsxReader
|
|
40
43
|
# to just stream-parse the file twice, first for the hyperlinks at the
|
41
44
|
# bottom of the file, then for the file itself. In the future it would
|
42
45
|
# be clever to use grep to extract the xml into its own smaller file.
|
43
|
-
if xrels_file
|
44
|
-
xrels_file.
|
45
|
-
|
46
|
+
if xrels_file
|
47
|
+
if xrels_file.grep(/hyperlink/).any?
|
48
|
+
xrels_file.rewind
|
49
|
+
load_gui_hyperlinks # represented as hyperlinks_by_cell
|
50
|
+
end
|
51
|
+
@file_io.rewind # we've already parsed this once
|
46
52
|
end
|
47
53
|
|
48
|
-
@file_io.rewind # in case we've already parsed this once
|
49
|
-
|
50
54
|
Nokogiri::XML::SAX::Parser.new(self).parse(@file_io)
|
51
55
|
end
|
52
56
|
|
53
57
|
###
|
54
58
|
# SAX document hooks
|
55
59
|
|
56
|
-
def
|
60
|
+
def start_element_namespace(name, attrs = [], _prefix, _uri, _ns)
|
57
61
|
case name
|
58
|
-
when 'dimension'
|
62
|
+
when 'dimension'
|
63
|
+
@dimension = attrs.last.value
|
59
64
|
when 'row'
|
60
|
-
@current_row_num = attrs.find {|
|
65
|
+
@current_row_num = attrs.find {|attr| attr.localname == 'r'}&.value&.to_i
|
61
66
|
@current_row = Array.new(column_length)
|
67
|
+
@column_index = 0
|
62
68
|
when 'c'
|
63
|
-
attrs = attrs.inject({}) {|acc,
|
64
|
-
@cell_name = attrs['r']
|
69
|
+
attrs = attrs.inject({}) {|acc, attr| acc[attr.localname] = attr.value; acc}
|
70
|
+
@cell_name = attrs['r'] || column_number_to_letter(@column_index)
|
65
71
|
@type = attrs['t']
|
66
72
|
@style = attrs['s'] && style_types[attrs['s'].to_i]
|
73
|
+
@column_index += 1
|
67
74
|
when 'f' then @function = true
|
68
75
|
when 'v', 't' then @capture = true
|
69
76
|
end
|
@@ -114,7 +121,7 @@ module SimpleXlsxReader
|
|
114
121
|
@captured = @captured ? @captured + (captured || '') : captured
|
115
122
|
end
|
116
123
|
|
117
|
-
def
|
124
|
+
def end_element_namespace(name, _prefix, _uri)
|
118
125
|
case name
|
119
126
|
when 'row'
|
120
127
|
if @headers == true # ya a little funky
|
@@ -208,10 +215,10 @@ module SimpleXlsxReader
|
|
208
215
|
@hyperlinks_by_cell
|
209
216
|
end
|
210
217
|
|
211
|
-
def
|
218
|
+
def start_element_namespace(name, attrs, _prefix, _uri, _ns)
|
212
219
|
case name
|
213
220
|
when 'hyperlink'
|
214
|
-
attrs = attrs.inject({}) {|acc,
|
221
|
+
attrs = attrs.inject({}) {|acc, attr| acc[attr.localname] = attr.value; acc}
|
215
222
|
id = attrs['id'] || attrs['r:id']
|
216
223
|
|
217
224
|
@hyperlinks_by_cell[attrs['ref']] =
|
@@ -264,6 +271,16 @@ module SimpleXlsxReader
|
|
264
271
|
end
|
265
272
|
result
|
266
273
|
end
|
274
|
+
|
275
|
+
def column_number_to_letter(n)
|
276
|
+
result = []
|
277
|
+
loop do
|
278
|
+
result.unshift((n % 26 + 65).chr)
|
279
|
+
n = (n / 26) - 1
|
280
|
+
break if n < 0
|
281
|
+
end
|
282
|
+
result.join
|
283
|
+
end
|
267
284
|
end
|
268
285
|
end
|
269
286
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
describe SimpleXlsxReader do
|
6
|
+
# Based on a real-world sheet possibly generated by PowerBI, where the xml
|
7
|
+
# has namespacing and rows are missing the 'r' attribute.
|
8
|
+
let(:sheet) do
|
9
|
+
<<~XML
|
10
|
+
<?xml version="1.0" encoding="utf-8"?>
|
11
|
+
<x:worksheet xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
12
|
+
<x:sheetData>
|
13
|
+
<x:row>
|
14
|
+
<x:c s="2" t="inlineStr">
|
15
|
+
<x:is>
|
16
|
+
<x:t>Salmon</x:t>
|
17
|
+
</x:is>
|
18
|
+
</x:c>
|
19
|
+
<x:c s="2" t="inlineStr">
|
20
|
+
<x:is>
|
21
|
+
<x:t>Trout</x:t>
|
22
|
+
</x:is>
|
23
|
+
</x:c>
|
24
|
+
</x:row>
|
25
|
+
<x:row>
|
26
|
+
<x:c s="2" t="inlineStr">
|
27
|
+
<x:is>
|
28
|
+
<x:t>Cat</x:t>
|
29
|
+
</x:is>
|
30
|
+
</x:c>
|
31
|
+
<x:c s="2" t="inlineStr">
|
32
|
+
<x:is>
|
33
|
+
<x:t>Dog</x:t>
|
34
|
+
</x:is>
|
35
|
+
</x:c>
|
36
|
+
</x:row>
|
37
|
+
</x:sheetData>
|
38
|
+
</x:worksheet>
|
39
|
+
XML
|
40
|
+
end
|
41
|
+
|
42
|
+
let(:styles) do
|
43
|
+
<<~XML
|
44
|
+
<?xml version="1.0" encoding="utf-8"?><x:styleSheet xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><x:numFmts><x:numFmt numFmtId="181" formatCode="0" /><x:numFmt numFmtId="182" formatCode="m/d/yyyy h:mm:ss AM/PM" /><x:numFmt numFmtId="183" formatCode="dd MMMM yyyy" /></x:numFmts><x:fonts><x:font /><x:font><x:b /></x:font></x:fonts><x:fills><x:fill><x:patternFill patternType="none" /></x:fill><x:fill><x:patternFill patternType="gray125" /></x:fill></x:fills><x:borders><x:border /><x:border><x:bottom style="thin" /></x:border><x:border><x:right style="thin" /></x:border></x:borders><x:cellXfs><x:xf /><x:xf fontId="1" /><x:xf borderId="1" /><x:xf fontId="1" borderId="1" /><x:xf borderId="2" /><x:xf fontId="1" borderId="2" /><x:xf><x:alignment vertical="top" /></x:xf><x:xf fontId="1"><x:alignment vertical="top" /></x:xf><x:xf numFmtId="181" /><x:xf numFmtId="182" /><x:xf numFmtId="183" /><x:xf numFmtId="182" fontId="1" /><x:xf numFmtId="181" fontId="1" /><x:xf numFmtId="183" fontId="1" /></x:cellXfs></x:styleSheet>
|
45
|
+
XML
|
46
|
+
end
|
47
|
+
|
48
|
+
let(:wonky_file) do
|
49
|
+
TestXlsxBuilder.new(
|
50
|
+
sheets: [sheet],
|
51
|
+
styles: styles
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
let(:subject) { SimpleXlsxReader::Document.new(wonky_file.archive.path) }
|
56
|
+
|
57
|
+
describe '#to_hash' do
|
58
|
+
it 'should extract values from namespaced cells missing "r" attributes' do
|
59
|
+
_(subject.sheets.first.rows.to_a[0]).must_include('Salmon')
|
60
|
+
_(subject.sheets.first.rows.to_a[1]).must_include('Dog')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -56,7 +56,7 @@ describe SimpleXlsxReader do
|
|
56
56
|
end
|
57
57
|
|
58
58
|
describe 'load from string' do
|
59
|
-
let(:subject) { SimpleXlsxReader.parse(
|
59
|
+
let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_string) }
|
60
60
|
|
61
61
|
it 'reads an xlsx string into a hash of {[sheet name] => [data]}' do
|
62
62
|
_(subject.to_hash).must_equal(expected_result)
|
@@ -611,6 +611,11 @@ describe SimpleXlsxReader do
|
|
611
611
|
_(described_class.cast('2', 's', nil, shared_strings: %w[a b c], url: url))
|
612
612
|
.must_equal SXR::Hyperlink.new(url, 'c')
|
613
613
|
end
|
614
|
+
|
615
|
+
it 'creates a hyperlink with a fixnum friendly_name' do
|
616
|
+
_(described_class.cast('123', nil, :fixnum, url: url))
|
617
|
+
.must_equal SXR::Hyperlink.new(url, '123')
|
618
|
+
end
|
614
619
|
end
|
615
620
|
end
|
616
621
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-04-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -87,6 +87,7 @@ executables: []
|
|
87
87
|
extensions: []
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
|
+
- ".github/dependabot.yml"
|
90
91
|
- ".github/workflows/ruby.yml"
|
91
92
|
- ".gitignore"
|
92
93
|
- ".travis.yml"
|
@@ -115,6 +116,7 @@ files:
|
|
115
116
|
- test/lower_case_sharedstrings.xlsx
|
116
117
|
- test/lower_case_sharedstrings_test.rb
|
117
118
|
- test/misc_numbers.xlsx
|
119
|
+
- test/namespaces_and_missing_atts_test.rb
|
118
120
|
- test/percentages_n_currencies.xlsx
|
119
121
|
- test/performance_test.rb
|
120
122
|
- test/sesame_street_blog.xlsx
|
@@ -142,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
144
|
- !ruby/object:Gem::Version
|
143
145
|
version: '0'
|
144
146
|
requirements: []
|
145
|
-
rubygems_version: 3.
|
147
|
+
rubygems_version: 3.5.22
|
146
148
|
signing_key:
|
147
149
|
specification_version: 4
|
148
150
|
summary: Read xlsx data the Ruby way
|
@@ -157,6 +159,7 @@ test_files:
|
|
157
159
|
- test/lower_case_sharedstrings.xlsx
|
158
160
|
- test/lower_case_sharedstrings_test.rb
|
159
161
|
- test/misc_numbers.xlsx
|
162
|
+
- test/namespaces_and_missing_atts_test.rb
|
160
163
|
- test/percentages_n_currencies.xlsx
|
161
164
|
- test/performance_test.rb
|
162
165
|
- test/sesame_street_blog.xlsx
|