xsv 0.3.18 → 1.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -6
- data/CHANGELOG.md +7 -0
- data/README.md +14 -9
- data/lib/xsv.rb +13 -12
- data/lib/xsv/helpers.rb +51 -49
- data/lib/xsv/relationships_handler.rb +7 -24
- data/lib/xsv/sax_parser.rb +72 -0
- data/lib/xsv/shared_strings_parser.rb +12 -12
- data/lib/xsv/sheet.rb +5 -12
- data/lib/xsv/sheet_bounds_handler.rb +17 -27
- data/lib/xsv/sheet_rows_handler.rb +56 -72
- data/lib/xsv/sheets_ids_handler.rb +7 -40
- data/lib/xsv/styles_handler.rb +17 -33
- data/lib/xsv/version.rb +2 -1
- data/lib/xsv/workbook.rb +36 -37
- data/xsv.gemspec +2 -3
- metadata +8 -21
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 74b464996d0d55d48bce1400987079600f4e882eafd46006dc96e29879b6b67a
|
|
4
|
+
data.tar.gz: cc6f2164a38f4e9de9e6bf7c82663d8658adaf8570731c6a0e80f7f2b3fb744f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78d88c74b25547bfca47ce2974ae0f082628af93db97820fcbf830661b49bb5adf02f40a46bb2b07a0335ff4cd3f7803152d53e180a9f6e3355d9b229cda984f
|
|
7
|
+
data.tar.gz: 2ec1290130c81899491661bf826df5637084648a73a2f9d8e4e9562739f3b7277c07efc27f8cb28019f60ec3d9d5c4940ad3bc5cfa17f71eb8dffe4bf1963d03
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 1.0.0.pre 2021-01-18
|
|
4
|
+
|
|
5
|
+
- Switch to a minimalistic XML parser in native Ruby (#21)
|
|
6
|
+
- Ruby 3.0 compatibility
|
|
7
|
+
- Various internal cleanup and optimization
|
|
8
|
+
- API is backwards compatible with 0.3.x
|
|
9
|
+
|
|
3
10
|
## 0.3.18 2020-09-30
|
|
4
11
|
|
|
5
12
|
- Improve inline string support (#18)
|
data/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://travis-ci.org/martijn/xsv)
|
|
4
4
|
[](https://rubydoc.info/github/martijn/xsv)
|
|
5
5
|
|
|
6
|
-
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
6
|
+
Xsv is a fast, lightweight, pure Ruby parser for Office Open XML spreadsheet files
|
|
7
7
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
8
8
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
9
9
|
deals with minimal formatting and cannot create or modify documents.
|
|
@@ -33,7 +33,10 @@ Or install it yourself as:
|
|
|
33
33
|
|
|
34
34
|
$ gem install xsv
|
|
35
35
|
|
|
36
|
-
Xsv targets ruby
|
|
36
|
+
Xsv targets ruby >= 2.5 and has a just single dependency, `rubyzip`. It has been
|
|
37
|
+
tested successfully with MRI, JRuby, and TruffleRuby. Due to the lack of
|
|
38
|
+
native extensions should work well in multi-threaded environments or in Ractor
|
|
39
|
+
when that becomes stable.
|
|
37
40
|
|
|
38
41
|
## Usage
|
|
39
42
|
|
|
@@ -76,15 +79,15 @@ end
|
|
|
76
79
|
sheet[1] # => {"header1" => "value1", "header2" => "value2"}
|
|
77
80
|
```
|
|
78
81
|
|
|
79
|
-
Be aware that hash mode will lead to unpredictable results if
|
|
80
|
-
columns with the same
|
|
82
|
+
Be aware that hash mode will lead to unpredictable results if the worksheet
|
|
83
|
+
has multiple columns with the same header.
|
|
81
84
|
|
|
82
|
-
`Xsv::Workbook.open` accepts a filename, or
|
|
85
|
+
`Xsv::Workbook.open` accepts a filename, or an IO or String containing a workbook.
|
|
83
86
|
|
|
84
87
|
`Xsv::Sheet` implements `Enumerable` so you can call methods like `#first`,
|
|
85
|
-
`#filter`/`#select
|
|
88
|
+
`#filter`/`#select`, and `#map` on it.
|
|
86
89
|
|
|
87
|
-
The sheets
|
|
90
|
+
The sheets can be accessed by index or by name:
|
|
88
91
|
|
|
89
92
|
```ruby
|
|
90
93
|
x = Xsv::Workbook.open("sheet.xlsx")
|
|
@@ -94,7 +97,7 @@ sheet = x.sheets[0] # gets sheet by index
|
|
|
94
97
|
sheet = x.sheets_by_name('Name').first # gets sheet by name
|
|
95
98
|
```
|
|
96
99
|
|
|
97
|
-
To get all the
|
|
100
|
+
To get all the sheets names:
|
|
98
101
|
|
|
99
102
|
```ruby
|
|
100
103
|
sheet_names = x.sheets.map(&:name)
|
|
@@ -129,9 +132,11 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
|
129
132
|
Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
|
|
130
133
|
[Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
|
|
131
134
|
|
|
132
|
-
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
|
|
135
|
+
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage, and allocations.
|
|
133
136
|
Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
|
|
134
137
|
|
|
138
|
+
Pre-1.0, Xsv used a native extension for XML parsing, which was faster than the native Ruby one (on MRI). But even with the native Ruby version generally Xsv still outperforms other Ruby parsing gems.
|
|
139
|
+
|
|
135
140
|
## Contributing
|
|
136
141
|
|
|
137
142
|
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
data/lib/xsv.rb
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
require "date"
|
|
3
|
-
require "ox"
|
|
4
2
|
|
|
5
|
-
require
|
|
6
|
-
|
|
7
|
-
require
|
|
8
|
-
require
|
|
9
|
-
require
|
|
10
|
-
require
|
|
11
|
-
require
|
|
12
|
-
require
|
|
13
|
-
require
|
|
14
|
-
require
|
|
3
|
+
require 'date'
|
|
4
|
+
|
|
5
|
+
require 'xsv/helpers'
|
|
6
|
+
require 'xsv/sax_parser'
|
|
7
|
+
require 'xsv/relationships_handler'
|
|
8
|
+
require 'xsv/shared_strings_parser'
|
|
9
|
+
require 'xsv/sheet'
|
|
10
|
+
require 'xsv/sheet_bounds_handler'
|
|
11
|
+
require 'xsv/sheet_rows_handler'
|
|
12
|
+
require 'xsv/sheets_ids_handler'
|
|
13
|
+
require 'xsv/styles_handler'
|
|
14
|
+
require 'xsv/version'
|
|
15
|
+
require 'xsv/workbook'
|
|
15
16
|
|
|
16
17
|
# XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
17
18
|
# (commonly known as Excel or .xlsx files). It strives to be minimal in the
|
data/lib/xsv/helpers.rb
CHANGED
|
@@ -1,52 +1,54 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
module Helpers
|
|
4
5
|
# The default OOXML Spreadheet number formats according to the ECMA standard
|
|
5
6
|
# User formats are appended from index 174 onward
|
|
6
7
|
BUILT_IN_NUMBER_FORMATS = {
|
|
7
|
-
1 =>
|
|
8
|
-
2 =>
|
|
9
|
-
3 =>
|
|
10
|
-
4 =>
|
|
11
|
-
5 =>
|
|
12
|
-
6 =>
|
|
13
|
-
7 =>
|
|
14
|
-
8 =>
|
|
15
|
-
9 =>
|
|
16
|
-
10 =>
|
|
17
|
-
11 =>
|
|
18
|
-
12 =>
|
|
19
|
-
13 =>
|
|
20
|
-
14 =>
|
|
21
|
-
15 =>
|
|
22
|
-
16 =>
|
|
23
|
-
17 =>
|
|
24
|
-
18 =>
|
|
25
|
-
19 =>
|
|
26
|
-
20 =>
|
|
27
|
-
21 =>
|
|
28
|
-
22 =>
|
|
29
|
-
37 =>
|
|
30
|
-
38 =>
|
|
31
|
-
39 =>
|
|
32
|
-
40 =>
|
|
33
|
-
45 =>
|
|
34
|
-
46 =>
|
|
35
|
-
47 =>
|
|
36
|
-
48 =>
|
|
37
|
-
49 =>
|
|
8
|
+
1 => '0',
|
|
9
|
+
2 => '0.00',
|
|
10
|
+
3 => '#, ##0',
|
|
11
|
+
4 => '#, ##0.00',
|
|
12
|
+
5 => '$#, ##0_);($#, ##0)',
|
|
13
|
+
6 => '$#, ##0_);[Red]($#, ##0)',
|
|
14
|
+
7 => '$#, ##0.00_);($#, ##0.00)',
|
|
15
|
+
8 => '$#, ##0.00_);[Red]($#, ##0.00)',
|
|
16
|
+
9 => '0%',
|
|
17
|
+
10 => '0.00%',
|
|
18
|
+
11 => '0.00E+00',
|
|
19
|
+
12 => '# ?/?',
|
|
20
|
+
13 => '# ??/??',
|
|
21
|
+
14 => 'm/d/yyyy',
|
|
22
|
+
15 => 'd-mmm-yy',
|
|
23
|
+
16 => 'd-mmm',
|
|
24
|
+
17 => 'mmm-yy',
|
|
25
|
+
18 => 'h:mm AM/PM',
|
|
26
|
+
19 => 'h:mm:ss AM/PM',
|
|
27
|
+
20 => 'h:mm',
|
|
28
|
+
21 => 'h:mm:ss',
|
|
29
|
+
22 => 'm/d/yyyy h:mm',
|
|
30
|
+
37 => '#, ##0_);(#, ##0)',
|
|
31
|
+
38 => '#, ##0_);[Red](#, ##0)',
|
|
32
|
+
39 => '#, ##0.00_);(#, ##0.00)',
|
|
33
|
+
40 => '#, ##0.00_);[Red](#, ##0.00)',
|
|
34
|
+
45 => 'mm:ss',
|
|
35
|
+
46 => '[h]:mm:ss',
|
|
36
|
+
47 => 'mm:ss.0',
|
|
37
|
+
48 => '##0.0E+0',
|
|
38
|
+
49 => '@'
|
|
38
39
|
}.freeze
|
|
39
40
|
|
|
40
|
-
MINUTE = 60
|
|
41
|
-
HOUR = 3600
|
|
42
|
-
A_CODEPOINT =
|
|
41
|
+
MINUTE = 60
|
|
42
|
+
HOUR = 3600
|
|
43
|
+
A_CODEPOINT = 'A'.ord.freeze
|
|
43
44
|
# The epoch for all dates in OOXML Spreadsheet documents
|
|
44
45
|
EPOCH = Date.new(1899, 12, 30).freeze
|
|
45
46
|
|
|
46
47
|
# Return the index number for the given Excel column name (i.e. "A1" => 0)
|
|
47
48
|
def column_index(col)
|
|
48
49
|
col.each_codepoint.reduce(0) do |sum, n|
|
|
49
|
-
break sum - 1 if n < A_CODEPOINT
|
|
50
|
+
break sum - 1 if n < A_CODEPOINT # reached a number
|
|
51
|
+
|
|
50
52
|
sum * 26 + (n - A_CODEPOINT + 1)
|
|
51
53
|
end
|
|
52
54
|
end
|
|
@@ -59,9 +61,7 @@ module Xsv
|
|
|
59
61
|
# Return a time as a string for the given Excel time value
|
|
60
62
|
def parse_time(number)
|
|
61
63
|
# Disregard date part
|
|
62
|
-
|
|
63
|
-
number = number - number.truncate
|
|
64
|
-
end
|
|
64
|
+
number -= number.truncate if number.positive?
|
|
65
65
|
|
|
66
66
|
base = number * 24
|
|
67
67
|
|
|
@@ -70,11 +70,11 @@ module Xsv
|
|
|
70
70
|
|
|
71
71
|
# Compensate for rounding errors
|
|
72
72
|
if minutes >= 60
|
|
73
|
-
hours
|
|
73
|
+
hours += (minutes / 60)
|
|
74
74
|
minutes = minutes % 60
|
|
75
75
|
end
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
format('%02d:%02d', hours, minutes)
|
|
78
78
|
end
|
|
79
79
|
|
|
80
80
|
# Returns a time including a date as a {Time} object
|
|
@@ -92,9 +92,9 @@ module Xsv
|
|
|
92
92
|
|
|
93
93
|
# Returns a number as either Integer or Float
|
|
94
94
|
def parse_number(string)
|
|
95
|
-
if string.include?
|
|
95
|
+
if string.include? '.'
|
|
96
96
|
string.to_f
|
|
97
|
-
elsif string.include?
|
|
97
|
+
elsif string.include? 'E'
|
|
98
98
|
Complex(string).to_f
|
|
99
99
|
else
|
|
100
100
|
string.to_i
|
|
@@ -105,11 +105,11 @@ module Xsv
|
|
|
105
105
|
def parse_number_format(number, format)
|
|
106
106
|
number = parse_number(number) if number.is_a?(String)
|
|
107
107
|
|
|
108
|
-
if
|
|
108
|
+
if datetime_format?(format)
|
|
109
109
|
parse_datetime(number)
|
|
110
|
-
elsif
|
|
110
|
+
elsif date_format?(format)
|
|
111
111
|
parse_date(number)
|
|
112
|
-
elsif
|
|
112
|
+
elsif time_format?(format)
|
|
113
113
|
parse_time(number)
|
|
114
114
|
else
|
|
115
115
|
number
|
|
@@ -117,20 +117,22 @@ module Xsv
|
|
|
117
117
|
end
|
|
118
118
|
|
|
119
119
|
# Tests if the given format string includes both date and time
|
|
120
|
-
def
|
|
121
|
-
|
|
120
|
+
def datetime_format?(format)
|
|
121
|
+
date_format?(format) && time_format?(format)
|
|
122
122
|
end
|
|
123
123
|
|
|
124
124
|
# Tests if the given format string is a date
|
|
125
|
-
def
|
|
125
|
+
def date_format?(format)
|
|
126
126
|
return false if format.nil?
|
|
127
|
+
|
|
127
128
|
# If it contains at least 2 sequences of d's, m's or y's it's a date!
|
|
128
129
|
format.scan(/[dmy]+/).length > 1
|
|
129
130
|
end
|
|
130
131
|
|
|
131
132
|
# Tests if the given format string is a time
|
|
132
|
-
def
|
|
133
|
+
def time_format?(format)
|
|
133
134
|
return false if format.nil?
|
|
135
|
+
|
|
134
136
|
# If it contains at least 2 sequences of h's, m's or s's it's a time!
|
|
135
137
|
format.scan(/[hms]+/).length > 1
|
|
136
138
|
end
|
|
@@ -1,40 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# RelationshipsHandler parses the "xl/_rels/workbook.xml.rels" file to get the existing relationships.
|
|
4
5
|
# This is used internally when opening a workbook.
|
|
5
|
-
class RelationshipsHandler <
|
|
6
|
+
class RelationshipsHandler < SaxParser
|
|
6
7
|
def self.get_relations(io)
|
|
7
8
|
relations = []
|
|
8
|
-
handler = new do |relation|
|
|
9
|
-
relations << relation
|
|
10
|
-
end
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
return relations
|
|
14
|
-
end
|
|
10
|
+
new { |relation| relations << relation }.parse(io)
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
relations
|
|
13
|
+
end
|
|
17
14
|
|
|
18
15
|
def initialize(&block)
|
|
19
16
|
@block = block
|
|
20
|
-
@relationship = {}
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def start_element(name)
|
|
24
|
-
@relationship = {} if name == :Relationship
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def attr(name, value)
|
|
28
|
-
case name
|
|
29
|
-
when :Id, :Type, :Target
|
|
30
|
-
@relationship[name] = value
|
|
31
|
-
end
|
|
32
17
|
end
|
|
33
18
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@block.call(@relationship)
|
|
19
|
+
def start_element(name, attrs)
|
|
20
|
+
@block.call(attrs.slice(:Id, :Type, :Target)) if name == 'Relationship'
|
|
38
21
|
end
|
|
39
22
|
end
|
|
40
23
|
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Xsv
|
|
4
|
+
class SaxParser
|
|
5
|
+
ATTR_REGEX = /((\S+)="(.*?)")/m.freeze
|
|
6
|
+
|
|
7
|
+
def parse(io)
|
|
8
|
+
state = :look_start
|
|
9
|
+
if io.is_a?(String)
|
|
10
|
+
pbuf = io.dup
|
|
11
|
+
eof_reached = true
|
|
12
|
+
must_read = false
|
|
13
|
+
else
|
|
14
|
+
pbuf = String.new(capacity: 8192)
|
|
15
|
+
eof_reached = false
|
|
16
|
+
must_read = true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
loop do
|
|
20
|
+
if must_read
|
|
21
|
+
begin
|
|
22
|
+
pbuf << io.sysread(2048)
|
|
23
|
+
rescue EOFError, TypeError
|
|
24
|
+
# EOFError is thrown by IO, rubyzip returns nil from sysread on EOF
|
|
25
|
+
eof_reached = true
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
must_read = false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
if state == :look_start
|
|
32
|
+
if (o = pbuf.index('<'))
|
|
33
|
+
chars = pbuf.slice!(0, o + 1).chop!
|
|
34
|
+
characters(chars) unless chars.empty? || !respond_to?(:characters)
|
|
35
|
+
|
|
36
|
+
state = :look_end
|
|
37
|
+
elsif eof_reached
|
|
38
|
+
# Discard anything after the last tag in the document
|
|
39
|
+
break
|
|
40
|
+
else
|
|
41
|
+
# Continue loop to read more data into the buffer
|
|
42
|
+
must_read = true
|
|
43
|
+
next
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if state == :look_end
|
|
48
|
+
if (o = pbuf.index('>'))
|
|
49
|
+
tag_name, args = pbuf.slice!(0, o + 1).chop!.split(' ', 2)
|
|
50
|
+
|
|
51
|
+
if tag_name.start_with?('/')
|
|
52
|
+
end_element(tag_name[1..-1]) if respond_to?(:end_element)
|
|
53
|
+
else
|
|
54
|
+
if args.nil?
|
|
55
|
+
start_element(tag_name, nil)
|
|
56
|
+
else
|
|
57
|
+
start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
|
|
58
|
+
end_element(tag_name) if args.end_with?('/') && respond_to?(:end_element)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
state = :look_start
|
|
63
|
+
elsif eof_reached
|
|
64
|
+
raise 'Malformed XML document, looking for end of tag beyond EOF'
|
|
65
|
+
else
|
|
66
|
+
must_read = true
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# Interpret the sharedStrings.xml file from the workbook
|
|
4
5
|
# This is used internally when opening a sheet.
|
|
5
|
-
class SharedStringsParser <
|
|
6
|
+
class SharedStringsParser < SaxParser
|
|
6
7
|
def self.parse(io)
|
|
7
8
|
strings = []
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
return strings
|
|
9
|
+
new { |s| strings << s }.parse(io)
|
|
10
|
+
strings
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def initialize(&block)
|
|
@@ -15,24 +15,24 @@ module Xsv
|
|
|
15
15
|
@state = nil
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def start_element(name)
|
|
18
|
+
def start_element(name, _attrs)
|
|
19
19
|
case name
|
|
20
|
-
when
|
|
21
|
-
@current_string =
|
|
22
|
-
when
|
|
20
|
+
when 'si'
|
|
21
|
+
@current_string = ''
|
|
22
|
+
when 't'
|
|
23
23
|
@state = name
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def
|
|
28
|
-
@current_string += value if @state ==
|
|
27
|
+
def characters(value)
|
|
28
|
+
@current_string += value if @state == 't'
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def end_element(name)
|
|
32
32
|
case name
|
|
33
|
-
when
|
|
33
|
+
when 'si'
|
|
34
34
|
@block.call(@current_string)
|
|
35
|
-
when
|
|
35
|
+
when 't'
|
|
36
36
|
@state = nil
|
|
37
37
|
end
|
|
38
38
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# Sheet represents a single worksheet from a workbook and is normally accessed through {Workbook#sheets}
|
|
4
5
|
#
|
|
@@ -39,14 +40,14 @@ module Xsv
|
|
|
39
40
|
@headers = []
|
|
40
41
|
@mode = :array
|
|
41
42
|
@row_skip = 0
|
|
42
|
-
@hidden = ids[:state] ==
|
|
43
|
+
@hidden = ids[:state] == 'hidden'
|
|
43
44
|
|
|
44
45
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
|
45
46
|
end
|
|
46
47
|
|
|
47
48
|
# @return [String]
|
|
48
49
|
def inspect
|
|
49
|
-
"#<#{self.class.name}:#{
|
|
50
|
+
"#<#{self.class.name}:#{object_id}>"
|
|
50
51
|
end
|
|
51
52
|
|
|
52
53
|
# Returns true if the worksheet is hidden
|
|
@@ -60,15 +61,7 @@ module Xsv
|
|
|
60
61
|
|
|
61
62
|
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row, &block)
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
# handed a string. For larger sheets this leads to awful performance.
|
|
65
|
-
# This is probably caused by either something in SheetRowsHandler or
|
|
66
|
-
# the interaction between Zip::InputStream and Ox
|
|
67
|
-
if @size > 100_000_000
|
|
68
|
-
Ox.sax_parse(handler, @io)
|
|
69
|
-
else
|
|
70
|
-
Ox.sax_parse(handler, @io.read)
|
|
71
|
-
end
|
|
64
|
+
handler.parse(@io)
|
|
72
65
|
|
|
73
66
|
true
|
|
74
67
|
end
|
|
@@ -82,7 +75,7 @@ module Xsv
|
|
|
82
75
|
return row if i == number
|
|
83
76
|
end
|
|
84
77
|
|
|
85
|
-
|
|
78
|
+
empty_row
|
|
86
79
|
end
|
|
87
80
|
|
|
88
81
|
# Load headers in the top row of the worksheet. After parsing of headers
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# SheetBoundsHandler scans a sheet looking for the outer bounds of the content within.
|
|
4
5
|
# This is used internally when opening a sheet to deal with worksheets that do not
|
|
5
6
|
# have a correct dimension tag.
|
|
6
|
-
class SheetBoundsHandler <
|
|
7
|
+
class SheetBoundsHandler < SaxParser
|
|
7
8
|
include Xsv::Helpers
|
|
8
9
|
|
|
9
10
|
def self.get_bounds(sheet, workbook)
|
|
@@ -18,12 +19,11 @@ module Xsv
|
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
sheet.rewind
|
|
21
|
-
Ox.sax_parse(handler, sheet.read)
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
end
|
|
23
|
+
handler.parse(sheet)
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
[rows, cols]
|
|
26
|
+
end
|
|
27
27
|
|
|
28
28
|
def initialize(trim_empty_rows, &block)
|
|
29
29
|
@block = block
|
|
@@ -35,36 +35,22 @@ module Xsv
|
|
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
-
def start_element(name)
|
|
38
|
+
def start_element(name, attrs)
|
|
39
39
|
case name
|
|
40
|
-
when
|
|
40
|
+
when 'c'
|
|
41
41
|
@state = name
|
|
42
|
-
@cell =
|
|
43
|
-
when
|
|
42
|
+
@cell = attrs[:r]
|
|
43
|
+
when 'v'
|
|
44
44
|
col = column_index(@cell)
|
|
45
45
|
@maxColumn = col if col > @maxColumn
|
|
46
46
|
@maxRow = @row if @row > @maxRow
|
|
47
|
-
when
|
|
47
|
+
when 'row'
|
|
48
48
|
@state = name
|
|
49
|
-
@row =
|
|
50
|
-
when
|
|
49
|
+
@row = attrs[:r].to_i
|
|
50
|
+
when 'dimension'
|
|
51
51
|
@state = name
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def end_element(name)
|
|
56
|
-
if name == :sheetData
|
|
57
|
-
@block.call(@maxRow, @maxColumn)
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
52
|
|
|
61
|
-
|
|
62
|
-
if @state == :c && name == :r
|
|
63
|
-
@cell = value
|
|
64
|
-
elsif @state == :row && name == :r
|
|
65
|
-
@row = value.to_i
|
|
66
|
-
elsif @state == :dimension && name == :ref
|
|
67
|
-
_firstCell, lastCell = value.split(":")
|
|
53
|
+
_firstCell, lastCell = attrs[:ref].split(':')
|
|
68
54
|
|
|
69
55
|
if lastCell
|
|
70
56
|
@maxColumn = column_index(lastCell)
|
|
@@ -75,5 +61,9 @@ module Xsv
|
|
|
75
61
|
end
|
|
76
62
|
end
|
|
77
63
|
end
|
|
64
|
+
|
|
65
|
+
def end_element(name)
|
|
66
|
+
@block.call(@maxRow, @maxColumn) if name == 'sheetData'
|
|
67
|
+
end
|
|
78
68
|
end
|
|
79
69
|
end
|
|
@@ -1,100 +1,58 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# This is the core worksheet parser, implemented as an Ox::Sax handler. This is
|
|
4
5
|
# used internally to enumerate rows.
|
|
5
|
-
class SheetRowsHandler <
|
|
6
|
+
class SheetRowsHandler < SaxParser
|
|
6
7
|
include Xsv::Helpers
|
|
7
8
|
|
|
8
|
-
def format_cell
|
|
9
|
-
return nil if @current_value.empty?
|
|
10
|
-
|
|
11
|
-
case @current_cell[:t]
|
|
12
|
-
when "s"
|
|
13
|
-
@workbook.shared_strings[@current_value.to_i]
|
|
14
|
-
when "str", "inlineStr"
|
|
15
|
-
@current_value.dup
|
|
16
|
-
when "e" # N/A
|
|
17
|
-
nil
|
|
18
|
-
when nil, "n"
|
|
19
|
-
if @current_cell[:s]
|
|
20
|
-
style = @workbook.xfs[@current_cell[:s].to_i]
|
|
21
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
22
|
-
|
|
23
|
-
parse_number_format(@current_value, numFmt)
|
|
24
|
-
else
|
|
25
|
-
parse_number(@current_value)
|
|
26
|
-
end
|
|
27
|
-
when "b"
|
|
28
|
-
@current_value == "1"
|
|
29
|
-
else
|
|
30
|
-
raise Xsv::Error, "Encountered unknown column type #{@current_cell[:t]}"
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Ox::Sax implementation below
|
|
35
|
-
|
|
36
9
|
def initialize(mode, empty_row, workbook, row_skip, last_row, &block)
|
|
37
|
-
@block = block
|
|
38
|
-
|
|
39
|
-
# :sheetData
|
|
40
|
-
# :row
|
|
41
|
-
# :c
|
|
42
|
-
# :v
|
|
43
|
-
@state = nil
|
|
44
|
-
|
|
45
10
|
@mode = mode
|
|
46
11
|
@empty_row = empty_row
|
|
47
12
|
@workbook = workbook
|
|
48
13
|
@row_skip = row_skip
|
|
14
|
+
@last_row = last_row - @row_skip
|
|
15
|
+
@block = block
|
|
16
|
+
|
|
17
|
+
@state = nil
|
|
18
|
+
|
|
49
19
|
@row_index = 0
|
|
50
20
|
@current_row = {}
|
|
51
21
|
@current_row_attrs = {}
|
|
52
22
|
@current_cell = {}
|
|
53
23
|
@current_value = String.new
|
|
54
|
-
@last_row = last_row
|
|
55
24
|
|
|
56
|
-
if @mode == :hash
|
|
57
|
-
@headers = @empty_row.keys
|
|
58
|
-
end
|
|
25
|
+
@headers = @empty_row.keys if @mode == :hash
|
|
59
26
|
end
|
|
60
27
|
|
|
61
|
-
def start_element(name)
|
|
28
|
+
def start_element(name, attrs)
|
|
62
29
|
case name
|
|
63
|
-
when
|
|
30
|
+
when 'c'
|
|
64
31
|
@state = name
|
|
65
|
-
@current_cell
|
|
32
|
+
@current_cell = attrs
|
|
66
33
|
@current_value.clear
|
|
67
|
-
when
|
|
34
|
+
when 'v', 'is'
|
|
68
35
|
@state = name
|
|
69
|
-
when
|
|
36
|
+
when 'row'
|
|
70
37
|
@state = name
|
|
71
38
|
@current_row = @empty_row.dup
|
|
72
|
-
@current_row_attrs
|
|
73
|
-
when
|
|
74
|
-
@state = nil unless @state ==
|
|
39
|
+
@current_row_attrs = attrs
|
|
40
|
+
when 't'
|
|
41
|
+
@state = nil unless @state == 'is'
|
|
75
42
|
else
|
|
76
43
|
@state = nil
|
|
77
44
|
end
|
|
78
45
|
end
|
|
79
46
|
|
|
80
|
-
def
|
|
81
|
-
if @state ==
|
|
82
|
-
@current_value << value
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def attr(name, value)
|
|
87
|
-
case @state
|
|
88
|
-
when :c
|
|
89
|
-
@current_cell[name] = value
|
|
90
|
-
when :row
|
|
91
|
-
@current_row_attrs[name] = value
|
|
92
|
-
end
|
|
47
|
+
def characters(value)
|
|
48
|
+
@current_value << value if @state == 'v' || @state == 'is'
|
|
93
49
|
end
|
|
94
50
|
|
|
95
51
|
def end_element(name)
|
|
96
52
|
case name
|
|
97
|
-
when
|
|
53
|
+
when 'v'
|
|
54
|
+
@state = nil
|
|
55
|
+
when 'c'
|
|
98
56
|
col_index = column_index(@current_cell[:r])
|
|
99
57
|
|
|
100
58
|
case @mode
|
|
@@ -103,28 +61,54 @@ module Xsv
|
|
|
103
61
|
when :hash
|
|
104
62
|
@current_row[@headers[col_index]] = format_cell
|
|
105
63
|
end
|
|
106
|
-
when
|
|
107
|
-
|
|
108
|
-
|
|
64
|
+
when 'row'
|
|
65
|
+
real_row_number = @current_row_attrs[:r].to_i
|
|
66
|
+
adjusted_row_number = real_row_number - @row_skip
|
|
109
67
|
|
|
110
|
-
if
|
|
111
|
-
return
|
|
112
|
-
end
|
|
68
|
+
return if real_row_number <= @row_skip
|
|
113
69
|
|
|
114
70
|
@row_index += 1
|
|
115
71
|
|
|
116
72
|
# Skip first row if we're in hash mode
|
|
117
|
-
return if
|
|
73
|
+
return if adjusted_row_number == 1 && @mode == :hash
|
|
118
74
|
|
|
119
75
|
# Pad empty rows
|
|
120
|
-
while @row_index <
|
|
76
|
+
while @row_index < adjusted_row_number
|
|
121
77
|
@block.call(@empty_row)
|
|
122
78
|
@row_index += 1
|
|
123
79
|
next
|
|
124
80
|
end
|
|
125
81
|
|
|
126
82
|
# Do not return empty trailing rows
|
|
127
|
-
@block.call(@current_row) unless @row_index > @last_row
|
|
83
|
+
@block.call(@current_row) unless @row_index > @last_row
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def format_cell
|
|
90
|
+
return nil if @current_value.empty?
|
|
91
|
+
|
|
92
|
+
case @current_cell[:t]
|
|
93
|
+
when 's'
|
|
94
|
+
@workbook.shared_strings[@current_value.to_i]
|
|
95
|
+
when 'str', 'inlineStr'
|
|
96
|
+
@current_value.strip
|
|
97
|
+
when 'e' # N/A
|
|
98
|
+
nil
|
|
99
|
+
when nil, 'n'
|
|
100
|
+
if @current_cell[:s]
|
|
101
|
+
style = @workbook.xfs[@current_cell[:s].to_i]
|
|
102
|
+
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
103
|
+
|
|
104
|
+
parse_number_format(@current_value, numFmt)
|
|
105
|
+
else
|
|
106
|
+
parse_number(@current_value)
|
|
107
|
+
end
|
|
108
|
+
when 'b'
|
|
109
|
+
@current_value == '1'
|
|
110
|
+
else
|
|
111
|
+
raise Xsv::Error, "Encountered unknown column type #{@current_cell[:t]}"
|
|
128
112
|
end
|
|
129
113
|
end
|
|
130
114
|
end
|
|
@@ -1,56 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# SheetsIdsHandler interprets the relevant parts of workbook.xml
|
|
4
5
|
# This is used internally to get the sheets ids, relationship_ids, and names when opening a workbook.
|
|
5
|
-
class SheetsIdsHandler <
|
|
6
|
+
class SheetsIdsHandler < SaxParser
|
|
6
7
|
def self.get_sheets_ids(io)
|
|
7
8
|
sheets_ids = []
|
|
8
|
-
handler = new do |sheet_ids|
|
|
9
|
-
sheets_ids << sheet_ids
|
|
10
|
-
end
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
return sheets_ids
|
|
14
|
-
end
|
|
10
|
+
new { |sheet_ids| sheets_ids << sheet_ids }.parse(io)
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
sheets_ids
|
|
13
|
+
end
|
|
17
14
|
|
|
18
15
|
def initialize(&block)
|
|
19
16
|
@block = block
|
|
20
|
-
@parsing = false
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def start_element(name)
|
|
24
|
-
if name == :sheets
|
|
25
|
-
@parsing = true
|
|
26
|
-
return
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
return unless name == :sheet
|
|
30
|
-
|
|
31
|
-
@sheet_ids = {}
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def attr(name, value)
|
|
35
|
-
return unless @parsing
|
|
36
|
-
|
|
37
|
-
case name
|
|
38
|
-
when :name, :sheetId, :state
|
|
39
|
-
@sheet_ids[name] = value
|
|
40
|
-
when :'r:id'
|
|
41
|
-
@sheet_ids[:r_id] = value
|
|
42
|
-
end
|
|
43
17
|
end
|
|
44
18
|
|
|
45
|
-
def
|
|
46
|
-
if name ==
|
|
47
|
-
@parsing = false
|
|
48
|
-
return
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
return unless name == :sheet
|
|
52
|
-
|
|
53
|
-
@block.call(@sheet_ids)
|
|
19
|
+
def start_element(name, attrs)
|
|
20
|
+
@block.call(attrs.slice(:name, :sheetId, :state, :'r:id')) if name == 'sheet'
|
|
54
21
|
end
|
|
55
22
|
end
|
|
56
23
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
|
@@ -1,59 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
module Xsv
|
|
3
4
|
# StylesHandler interprets the relevant parts of styles.xml
|
|
4
5
|
# This is used internally when opening a sheet.
|
|
5
|
-
class StylesHandler <
|
|
6
|
-
def self.get_styles(io
|
|
7
|
-
|
|
8
|
-
@numFmts = nil
|
|
9
|
-
handler = new(numFmts) do |xfs, numFmts|
|
|
6
|
+
class StylesHandler < SaxParser
|
|
7
|
+
def self.get_styles(io)
|
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, numFmts|
|
|
10
9
|
@xfs = xfs
|
|
11
10
|
@numFmts = numFmts
|
|
12
11
|
end
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
return @xfs, @numFmts
|
|
16
|
-
end
|
|
13
|
+
handler.parse(io)
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
[@xfs, @numFmts]
|
|
16
|
+
end
|
|
19
17
|
|
|
20
18
|
def initialize(numFmts, &block)
|
|
21
19
|
@block = block
|
|
22
20
|
@state = nil
|
|
23
21
|
@xfs = []
|
|
24
22
|
@numFmts = numFmts
|
|
25
|
-
|
|
26
|
-
@xf = {}
|
|
27
|
-
@numFmt = {}
|
|
28
23
|
end
|
|
29
24
|
|
|
30
|
-
def start_element(name)
|
|
25
|
+
def start_element(name, attrs)
|
|
31
26
|
case name
|
|
32
|
-
when
|
|
33
|
-
@state =
|
|
34
|
-
when
|
|
35
|
-
@
|
|
36
|
-
when
|
|
37
|
-
@
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def attr(name, value)
|
|
42
|
-
case @state
|
|
43
|
-
when :cellXfs
|
|
44
|
-
@xf[name] = value
|
|
45
|
-
when :numFmts
|
|
46
|
-
@numFmt[name] = value
|
|
27
|
+
when 'cellXfs'
|
|
28
|
+
@state = 'cellXfs'
|
|
29
|
+
when 'xf'
|
|
30
|
+
@xfs << attrs if @state == 'cellXfs'
|
|
31
|
+
when 'numFmt'
|
|
32
|
+
@numFmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
|
47
33
|
end
|
|
48
34
|
end
|
|
49
35
|
|
|
50
36
|
def end_element(name)
|
|
51
|
-
if
|
|
52
|
-
@xfs << @xf
|
|
53
|
-
elsif @state == :numFmts && name == :numFmt
|
|
54
|
-
@numFmts[@numFmt[:numFmtId].to_i] = @numFmt[:formatCode]
|
|
55
|
-
elsif name == :styleSheet
|
|
37
|
+
if name == 'styleSheet'
|
|
56
38
|
@block.call(@xfs, @numFmts)
|
|
39
|
+
elsif name == 'cellXfs'
|
|
40
|
+
@state = nil
|
|
57
41
|
end
|
|
58
42
|
end
|
|
59
43
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
require 'zip'
|
|
3
4
|
|
|
4
5
|
module Xsv
|
|
5
6
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
|
6
7
|
# multiple Sheets that are available in the array that's accessible through {#sheets}
|
|
7
8
|
class Workbook
|
|
8
|
-
|
|
9
9
|
# Access the Sheet objects contained in the workbook
|
|
10
10
|
# @return [Array<Sheet>]
|
|
11
11
|
attr_reader :sheets
|
|
@@ -15,13 +15,13 @@ module Xsv
|
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
16
|
# options see {.initialize}
|
|
17
17
|
def self.open(data, **kws)
|
|
18
|
-
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
@workbook = if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
|
19
|
+
new(Zip::File.open_buffer(data), **kws)
|
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a file?
|
|
21
|
+
new(Zip::File.open_buffer(data), **kws)
|
|
22
|
+
else # must be a filename
|
|
23
|
+
new(Zip::File.open(data), **kws)
|
|
24
|
+
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
# Open a workbook from an instance of {Zip::File}. Generally it's recommended
|
|
@@ -36,19 +36,16 @@ module Xsv
|
|
|
36
36
|
@trim_empty_rows = trim_empty_rows
|
|
37
37
|
|
|
38
38
|
@sheets = []
|
|
39
|
-
@xfs =
|
|
40
|
-
@
|
|
41
|
-
|
|
42
|
-
fetch_shared_strings
|
|
43
|
-
|
|
44
|
-
fetch_sheets_ids
|
|
45
|
-
fetch_relationships
|
|
46
|
-
fetch_sheets
|
|
39
|
+
@xfs, @numFmts = fetch_styles
|
|
40
|
+
@sheet_ids = fetch_sheet_ids
|
|
41
|
+
@relationships = fetch_relationships
|
|
42
|
+
@shared_strings = fetch_shared_strings
|
|
43
|
+
@sheets = fetch_sheets
|
|
47
44
|
end
|
|
48
45
|
|
|
49
46
|
# @return [String]
|
|
50
47
|
def inspect
|
|
51
|
-
"#<#{self.class.name}:#{
|
|
48
|
+
"#<#{self.class.name}:#{object_id}>"
|
|
52
49
|
end
|
|
53
50
|
|
|
54
51
|
# Close the handle to the workbook file and leave all resources for the GC to collect
|
|
@@ -60,7 +57,7 @@ module Xsv
|
|
|
60
57
|
@numFmts = nil
|
|
61
58
|
@relationships = nil
|
|
62
59
|
@shared_strings = nil
|
|
63
|
-
@
|
|
60
|
+
@sheet_ids = nil
|
|
64
61
|
|
|
65
62
|
true
|
|
66
63
|
end
|
|
@@ -75,42 +72,44 @@ module Xsv
|
|
|
75
72
|
private
|
|
76
73
|
|
|
77
74
|
def fetch_shared_strings
|
|
78
|
-
handle = @zip.glob(
|
|
75
|
+
handle = @zip.glob('xl/sharedStrings.xml').first
|
|
79
76
|
return if handle.nil?
|
|
80
77
|
|
|
81
78
|
stream = handle.get_input_stream
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
stream.close
|
|
79
|
+
SharedStringsParser.parse(stream)
|
|
80
|
+
ensure
|
|
81
|
+
stream.close if stream
|
|
85
82
|
end
|
|
86
83
|
|
|
87
84
|
def fetch_styles
|
|
88
|
-
stream = @zip.glob(
|
|
85
|
+
stream = @zip.glob('xl/styles.xml').first.get_input_stream
|
|
89
86
|
|
|
90
|
-
|
|
87
|
+
StylesHandler.get_styles(stream)
|
|
88
|
+
ensure
|
|
89
|
+
stream.close
|
|
91
90
|
end
|
|
92
91
|
|
|
93
92
|
def fetch_sheets
|
|
94
|
-
@zip.glob(
|
|
93
|
+
@zip.glob('xl/worksheets/sheet*.xml').sort do |a, b|
|
|
95
94
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
96
|
-
end.
|
|
97
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
|
98
|
-
sheet_ids = @
|
|
99
|
-
|
|
95
|
+
end.map do |entry|
|
|
96
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?('worksheet') }
|
|
97
|
+
sheet_ids = @sheet_ids.detect { |i| i[:"r:id"] == rel[:Id] }
|
|
98
|
+
Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
|
100
99
|
end
|
|
101
100
|
end
|
|
102
101
|
|
|
103
|
-
def
|
|
104
|
-
stream = @zip.glob(
|
|
105
|
-
|
|
106
|
-
|
|
102
|
+
def fetch_sheet_ids
|
|
103
|
+
stream = @zip.glob('xl/workbook.xml').first.get_input_stream
|
|
104
|
+
SheetsIdsHandler.get_sheets_ids(stream)
|
|
105
|
+
ensure
|
|
107
106
|
stream.close
|
|
108
107
|
end
|
|
109
108
|
|
|
110
109
|
def fetch_relationships
|
|
111
|
-
stream = @zip.glob(
|
|
112
|
-
|
|
113
|
-
|
|
110
|
+
stream = @zip.glob('xl/_rels/workbook.xml.rels').first.get_input_stream
|
|
111
|
+
RelationshipsHandler.get_relations(stream)
|
|
112
|
+
ensure
|
|
114
113
|
stream.close
|
|
115
114
|
end
|
|
116
115
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -36,12 +36,11 @@ Gem::Specification.new do |spec|
|
|
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
37
37
|
spec.require_paths = ["lib"]
|
|
38
38
|
|
|
39
|
-
spec.required_ruby_version = "
|
|
39
|
+
spec.required_ruby_version = ">= 2.5"
|
|
40
40
|
|
|
41
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
|
42
|
-
spec.add_dependency "ox", ">= 2.9"
|
|
43
42
|
|
|
44
43
|
spec.add_development_dependency "bundler", "< 3"
|
|
45
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
46
|
-
spec.add_development_dependency "minitest", "~> 5.
|
|
45
|
+
spec.add_development_dependency "minitest", "~> 5.14.2"
|
|
47
46
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0.pre
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2021-01-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -30,20 +30,6 @@ dependencies:
|
|
|
30
30
|
- - "<"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
32
|
version: '3'
|
|
33
|
-
- !ruby/object:Gem::Dependency
|
|
34
|
-
name: ox
|
|
35
|
-
requirement: !ruby/object:Gem::Requirement
|
|
36
|
-
requirements:
|
|
37
|
-
- - ">="
|
|
38
|
-
- !ruby/object:Gem::Version
|
|
39
|
-
version: '2.9'
|
|
40
|
-
type: :runtime
|
|
41
|
-
prerelease: false
|
|
42
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - ">="
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '2.9'
|
|
47
33
|
- !ruby/object:Gem::Dependency
|
|
48
34
|
name: bundler
|
|
49
35
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -78,14 +64,14 @@ dependencies:
|
|
|
78
64
|
requirements:
|
|
79
65
|
- - "~>"
|
|
80
66
|
- !ruby/object:Gem::Version
|
|
81
|
-
version:
|
|
67
|
+
version: 5.14.2
|
|
82
68
|
type: :development
|
|
83
69
|
prerelease: false
|
|
84
70
|
version_requirements: !ruby/object:Gem::Requirement
|
|
85
71
|
requirements:
|
|
86
72
|
- - "~>"
|
|
87
73
|
- !ruby/object:Gem::Version
|
|
88
|
-
version:
|
|
74
|
+
version: 5.14.2
|
|
89
75
|
description: |2
|
|
90
76
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
91
77
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -109,6 +95,7 @@ files:
|
|
|
109
95
|
- lib/xsv.rb
|
|
110
96
|
- lib/xsv/helpers.rb
|
|
111
97
|
- lib/xsv/relationships_handler.rb
|
|
98
|
+
- lib/xsv/sax_parser.rb
|
|
112
99
|
- lib/xsv/shared_strings_parser.rb
|
|
113
100
|
- lib/xsv/sheet.rb
|
|
114
101
|
- lib/xsv/sheet_bounds_handler.rb
|
|
@@ -131,14 +118,14 @@ require_paths:
|
|
|
131
118
|
- lib
|
|
132
119
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
120
|
requirements:
|
|
134
|
-
- - "
|
|
121
|
+
- - ">="
|
|
135
122
|
- !ruby/object:Gem::Version
|
|
136
123
|
version: '2.5'
|
|
137
124
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
125
|
requirements:
|
|
139
|
-
- - "
|
|
126
|
+
- - ">"
|
|
140
127
|
- !ruby/object:Gem::Version
|
|
141
|
-
version:
|
|
128
|
+
version: 1.3.1
|
|
142
129
|
requirements: []
|
|
143
130
|
rubygems_version: 3.1.2
|
|
144
131
|
signing_key:
|