simple_xlsx_reader 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8552d34f153cbdc6561c40725488d193e9aa48debcded0af24d32daf01b2f951
4
- data.tar.gz: 2a0fecdec3698bb16717244fc7bf9b45b4fe0f6b216038e9823f9a5fea2ea8fa
3
+ metadata.gz: f779f6e272fd4b79b2da180f31eb71b40e51b25d96f50fa72180908472c56750
4
+ data.tar.gz: 6bd6d85c61aaede6aebbe24f88b8fcc8a4018345581a3793e6e3a89a7bc285ab
5
5
  SHA512:
6
- metadata.gz: 77f99e8ad1020f0313171dcd0b14f7200fdf116e16de312146eb66a4d9347e94a0bf1cb4483f606975cd8bc776e80995473485271e05ee0a11136ef72cdeeae5
7
- data.tar.gz: 7ee3ed8c37df6632981bd6eeb301de5f852df0f66534ce91593923cf1b51aa1dc0b07aed224d5d88cbd4b1f8a6901fdb17164e6e9f22fb10d4e5d90a3c24f437
6
+ metadata.gz: 63b52b5835fe344b7a8116c6217c6df2f1ec9e68b2876212dec1b08a22dc33549f81905079d7ccea8f2612455e70ab1719aad82229d4fbc00d6da92e4c1ca7a1
7
+ data.tar.gz: e9e615a600b5bd19c7510fc1b0cbd42e2a2a5494ca9da63be9de813b1bedff73045faa3d0b2f2c8c7b6a7eefecc1219fbbcf6a9f79971b30ba2f8e8c7417cdd8
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -22,10 +22,10 @@ jobs:
22
22
  runs-on: ubuntu-latest
23
23
  strategy:
24
24
  matrix:
25
- ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
25
+ ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2', '3.3']
26
26
 
27
27
  steps:
28
- - uses: actions/checkout@v3
28
+ - uses: actions/checkout@v4
29
29
  - name: Set up Ruby
30
30
  uses: ruby/setup-ruby@v1
31
31
  with:
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ### 5.1.0
2
+
3
+ * Parse sheets containing namespaces and no 'r' att (@skipchris)
4
+ * Fix Zlib error when loading from string (@myabc)
5
+ * Prevent a SimpleXlsxReader::CellLoadError (no implicit conversion of Integer
6
+ into String) when the casted value (friendly name) is not a string (@tsdbrown)
7
+ * Accidental 25% perfarmance improvement while experimenting with namespace
8
+ support (see #53f5a9).
9
+
1
10
  ### 5.0.0
2
11
 
3
12
  * Change SimpleXlsxReader::Hyperlink to default to the visible cell value
data/README.md CHANGED
@@ -13,7 +13,7 @@ then forgotten. We just want to get the data, and get out!
13
13
  doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
14
14
  doc.sheets # => [<#SXR::Sheet>, ...]
15
15
  doc.sheets.first.name # 'Sheet1'
16
- rows = doc.sheet.first.rows # <SXR::Document::RowsProxy>
16
+ rows = doc.sheets.first.rows # <SXR::Document::RowsProxy>
17
17
  rows.each # an <Enumerator> ready to chain or stream
18
18
  rows.each {} # Streams the rows to your block
19
19
  rows.each(headers: true) {} # Streams row-hashes
@@ -44,7 +44,7 @@ SimpleXlsxReader strives to be fairly idiomatic Ruby:
44
44
 
45
45
  ```ruby
46
46
  # quick example having fun w/ ruby
47
- doc = SimpleXlsxReader.open(path_or_io)
47
+ doc = SimpleXlsxReader.open(file_path) # or SimpleXlsxReader.parse(string_or_io)
48
48
  doc.sheets.first.rows.each(headers: {id: /ID/})
49
49
  .with_index.with_object({}) do |(row, index), acc|
50
50
  acc[row[:id]] = index
@@ -21,9 +21,9 @@ module SimpleXlsxReader
21
21
  attr_reader :url
22
22
 
23
23
  def initialize(url, friendly_name = nil)
24
- @friendly_name = friendly_name
25
24
  @url = url
26
- super(friendly_name || url)
25
+ @friendly_name = friendly_name&.to_s
26
+ super(@friendly_name || @url)
27
27
  end
28
28
  end
29
29
  end
@@ -33,6 +33,9 @@ module SimpleXlsxReader
33
33
  @capture = nil # silence warnings
34
34
  @captured = nil # silence warnings
35
35
  @dimension = nil # silence warnings
36
+ @column_index = 0
37
+
38
+ @file_io.rewind # if it's IO from IO.read, we need to rewind it
36
39
 
37
40
  # In this project this is only used for GUI-made hyperlinks (as opposed
38
41
  # to FUNCTION-based hyperlinks). Unfortunately the're needed to parse
@@ -40,30 +43,34 @@ module SimpleXlsxReader
40
43
  # to just stream-parse the file twice, first for the hyperlinks at the
41
44
  # bottom of the file, then for the file itself. In the future it would
42
45
  # be clever to use grep to extract the xml into its own smaller file.
43
- if xrels_file&.grep(/hyperlink/)&.any?
44
- xrels_file.rewind
45
- load_gui_hyperlinks # represented as hyperlinks_by_cell
46
+ if xrels_file
47
+ if xrels_file.grep(/hyperlink/).any?
48
+ xrels_file.rewind
49
+ load_gui_hyperlinks # represented as hyperlinks_by_cell
50
+ end
51
+ @file_io.rewind # we've already parsed this once
46
52
  end
47
53
 
48
- @file_io.rewind # in case we've already parsed this once
49
-
50
54
  Nokogiri::XML::SAX::Parser.new(self).parse(@file_io)
51
55
  end
52
56
 
53
57
  ###
54
58
  # SAX document hooks
55
59
 
56
- def start_element(name, attrs = [])
60
+ def start_element_namespace(name, attrs = [], _prefix, _uri, _ns)
57
61
  case name
58
- when 'dimension' then @dimension = attrs.last.last
62
+ when 'dimension'
63
+ @dimension = attrs.last.value
59
64
  when 'row'
60
- @current_row_num = attrs.find {|(k, v)| k == 'r'}&.last&.to_i
65
+ @current_row_num = attrs.find {|attr| attr.localname == 'r'}&.value&.to_i
61
66
  @current_row = Array.new(column_length)
67
+ @column_index = 0
62
68
  when 'c'
63
- attrs = attrs.inject({}) {|acc, (k, v)| acc[k] = v; acc}
64
- @cell_name = attrs['r']
69
+ attrs = attrs.inject({}) {|acc, attr| acc[attr.localname] = attr.value; acc}
70
+ @cell_name = attrs['r'] || column_number_to_letter(@column_index)
65
71
  @type = attrs['t']
66
72
  @style = attrs['s'] && style_types[attrs['s'].to_i]
73
+ @column_index += 1
67
74
  when 'f' then @function = true
68
75
  when 'v', 't' then @capture = true
69
76
  end
@@ -114,7 +121,7 @@ module SimpleXlsxReader
114
121
  @captured = @captured ? @captured + (captured || '') : captured
115
122
  end
116
123
 
117
- def end_element(name)
124
+ def end_element_namespace(name, _prefix, _uri)
118
125
  case name
119
126
  when 'row'
120
127
  if @headers == true # ya a little funky
@@ -208,10 +215,10 @@ module SimpleXlsxReader
208
215
  @hyperlinks_by_cell
209
216
  end
210
217
 
211
- def start_element(name, attrs)
218
+ def start_element_namespace(name, attrs, _prefix, _uri, _ns)
212
219
  case name
213
220
  when 'hyperlink'
214
- attrs = attrs.inject({}) {|acc, (k, v)| acc[k] = v; acc}
221
+ attrs = attrs.inject({}) {|acc, attr| acc[attr.localname] = attr.value; acc}
215
222
  id = attrs['id'] || attrs['r:id']
216
223
 
217
224
  @hyperlinks_by_cell[attrs['ref']] =
@@ -264,6 +271,16 @@ module SimpleXlsxReader
264
271
  end
265
272
  result
266
273
  end
274
+
275
+ def column_number_to_letter(n)
276
+ result = []
277
+ loop do
278
+ result.unshift((n % 26 + 65).chr)
279
+ n = (n / 26) - 1
280
+ break if n < 0
281
+ end
282
+ result.join
283
+ end
267
284
  end
268
285
  end
269
286
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '5.0.0'
4
+ VERSION = '5.1.0'
5
5
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ describe SimpleXlsxReader do
6
+ # Based on a real-world sheet possibly generated by PowerBI, where the xml
7
+ # has namespacing and rows are missing the 'r' attribute.
8
+ let(:sheet) do
9
+ <<~XML
10
+ <?xml version="1.0" encoding="utf-8"?>
11
+ <x:worksheet xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
12
+ <x:sheetData>
13
+ <x:row>
14
+ <x:c s="2" t="inlineStr">
15
+ <x:is>
16
+ <x:t>Salmon</x:t>
17
+ </x:is>
18
+ </x:c>
19
+ <x:c s="2" t="inlineStr">
20
+ <x:is>
21
+ <x:t>Trout</x:t>
22
+ </x:is>
23
+ </x:c>
24
+ </x:row>
25
+ <x:row>
26
+ <x:c s="2" t="inlineStr">
27
+ <x:is>
28
+ <x:t>Cat</x:t>
29
+ </x:is>
30
+ </x:c>
31
+ <x:c s="2" t="inlineStr">
32
+ <x:is>
33
+ <x:t>Dog</x:t>
34
+ </x:is>
35
+ </x:c>
36
+ </x:row>
37
+ </x:sheetData>
38
+ </x:worksheet>
39
+ XML
40
+ end
41
+
42
+ let(:styles) do
43
+ <<~XML
44
+ <?xml version="1.0" encoding="utf-8"?><x:styleSheet xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><x:numFmts><x:numFmt numFmtId="181" formatCode="0" /><x:numFmt numFmtId="182" formatCode="m/d/yyyy h:mm:ss AM/PM" /><x:numFmt numFmtId="183" formatCode="dd MMMM yyyy" /></x:numFmts><x:fonts><x:font /><x:font><x:b /></x:font></x:fonts><x:fills><x:fill><x:patternFill patternType="none" /></x:fill><x:fill><x:patternFill patternType="gray125" /></x:fill></x:fills><x:borders><x:border /><x:border><x:bottom style="thin" /></x:border><x:border><x:right style="thin" /></x:border></x:borders><x:cellXfs><x:xf /><x:xf fontId="1" /><x:xf borderId="1" /><x:xf fontId="1" borderId="1" /><x:xf borderId="2" /><x:xf fontId="1" borderId="2" /><x:xf><x:alignment vertical="top" /></x:xf><x:xf fontId="1"><x:alignment vertical="top" /></x:xf><x:xf numFmtId="181" /><x:xf numFmtId="182" /><x:xf numFmtId="183" /><x:xf numFmtId="182" fontId="1" /><x:xf numFmtId="181" fontId="1" /><x:xf numFmtId="183" fontId="1" /></x:cellXfs></x:styleSheet>
45
+ XML
46
+ end
47
+
48
+ let(:wonky_file) do
49
+ TestXlsxBuilder.new(
50
+ sheets: [sheet],
51
+ styles: styles
52
+ )
53
+ end
54
+
55
+ let(:subject) { SimpleXlsxReader::Document.new(wonky_file.archive.path) }
56
+
57
+ describe '#to_hash' do
58
+ it 'should extract values from namespaced cells missing "r" attributes' do
59
+ _(subject.sheets.first.rows.to_a[0]).must_include('Salmon')
60
+ _(subject.sheets.first.rows.to_a[1]).must_include('Dog')
61
+ end
62
+ end
63
+ end
@@ -56,7 +56,7 @@ describe SimpleXlsxReader do
56
56
  end
57
57
 
58
58
  describe 'load from string' do
59
- let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
59
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_string) }
60
60
 
61
61
  it 'reads an xlsx string into a hash of {[sheet name] => [data]}' do
62
62
  _(subject.to_hash).must_equal(expected_result)
@@ -611,6 +611,11 @@ describe SimpleXlsxReader do
611
611
  _(described_class.cast('2', 's', nil, shared_strings: %w[a b c], url: url))
612
612
  .must_equal SXR::Hyperlink.new(url, 'c')
613
613
  end
614
+
615
+ it 'creates a hyperlink with a fixnum friendly_name' do
616
+ _(described_class.cast('123', nil, :fixnum, url: url))
617
+ .must_equal SXR::Hyperlink.new(url, '123')
618
+ end
614
619
  end
615
620
  end
616
621
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0
4
+ version: 5.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-17 00:00:00.000000000 Z
11
+ date: 2025-04-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -87,6 +87,7 @@ executables: []
87
87
  extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
+ - ".github/dependabot.yml"
90
91
  - ".github/workflows/ruby.yml"
91
92
  - ".gitignore"
92
93
  - ".travis.yml"
@@ -115,6 +116,7 @@ files:
115
116
  - test/lower_case_sharedstrings.xlsx
116
117
  - test/lower_case_sharedstrings_test.rb
117
118
  - test/misc_numbers.xlsx
119
+ - test/namespaces_and_missing_atts_test.rb
118
120
  - test/percentages_n_currencies.xlsx
119
121
  - test/performance_test.rb
120
122
  - test/sesame_street_blog.xlsx
@@ -142,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
144
  - !ruby/object:Gem::Version
143
145
  version: '0'
144
146
  requirements: []
145
- rubygems_version: 3.3.7
147
+ rubygems_version: 3.5.22
146
148
  signing_key:
147
149
  specification_version: 4
148
150
  summary: Read xlsx data the Ruby way
@@ -157,6 +159,7 @@ test_files:
157
159
  - test/lower_case_sharedstrings.xlsx
158
160
  - test/lower_case_sharedstrings_test.rb
159
161
  - test/misc_numbers.xlsx
162
+ - test/namespaces_and_missing_atts_test.rb
160
163
  - test/percentages_n_currencies.xlsx
161
164
  - test/performance_test.rb
162
165
  - test/sesame_street_blog.xlsx