xsv 1.0.5 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -3
- data/.standard.yml +1 -1
- data/CHANGELOG.md +15 -0
- data/README.md +48 -22
- data/benchmark.rb +51 -0
- data/lib/xsv/sax_parser.rb +23 -7
- data/lib/xsv/sheet.rb +1 -1
- data/lib/xsv/sheet_bounds_handler.rb +10 -10
- data/lib/xsv/sheet_rows_handler.rb +13 -24
- data/lib/xsv/sheets_ids_handler.rb +1 -1
- data/lib/xsv/styles_handler.rb +8 -8
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +25 -34
- data/lib/xsv.rb +27 -0
- data/xsv.gemspec +4 -3
- metadata +26 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b2cc530ad96a5351ea6ab8a8b9d0f2ee9df0e1827a98e4244f239b3658bc2145
|
|
4
|
+
data.tar.gz: e286d74163ea3524dfcbd92553c0c4254b667e4ea13adae7468c0d1dc4c8089b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a89e58bc0447ecbd2eefdd81fe978a5e38438296bcce9dbf18b4dbc3fea5d5740016e35237d229fbab8201bb0e71b208ca66858c9d61e85f4d31d729e9048054
|
|
7
|
+
data.tar.gz: 45ddf90be0abe97dcd8aac6b08b9daeac2f68f7634062655efee6f59fbef50c6aad3eb69a461b4f7fcf9dd2751fa735de82e9a0821ec18d2f45cb4bada591698
|
data/.github/workflows/ruby.yml
CHANGED
|
@@ -9,9 +9,9 @@ name: Ruby
|
|
|
9
9
|
|
|
10
10
|
on:
|
|
11
11
|
push:
|
|
12
|
-
branches: [
|
|
12
|
+
branches: [ main ]
|
|
13
13
|
pull_request:
|
|
14
|
-
branches: [
|
|
14
|
+
branches: [ main ]
|
|
15
15
|
|
|
16
16
|
jobs:
|
|
17
17
|
test:
|
|
@@ -19,7 +19,7 @@ jobs:
|
|
|
19
19
|
runs-on: ubuntu-latest
|
|
20
20
|
strategy:
|
|
21
21
|
matrix:
|
|
22
|
-
ruby-version: ['2.6', '2.7', '3.0', 'jruby', 'truffleruby']
|
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
|
|
23
23
|
|
|
24
24
|
steps:
|
|
25
25
|
- uses: actions/checkout@v2
|
data/.standard.yml
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby_version: 2.
|
|
1
|
+
ruby_version: 2.6.9
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 1.1.1 2022-04-01
|
|
4
|
+
|
|
5
|
+
- Improve compatibility with files generated by the Open XML SDK (#40)
|
|
6
|
+
|
|
7
|
+
## 1.1.0 2022-02-13
|
|
8
|
+
|
|
9
|
+
- New, shorter `Xsv.open` syntax as a drop-in replacement for `Xsv::Workbook.open`, which is still supported
|
|
10
|
+
- Enable parsing of headers for all sheets by passing `parse_headers: true` to `Xsv.open`
|
|
11
|
+
- Improvements in performance and test coverage
|
|
12
|
+
- Dropped support for Ruby 2.5, which is EOL. Xsv 1.1.0 supports Ruby 2.6+, latest JRuby, latest TruffleRuby
|
|
13
|
+
|
|
14
|
+
## 1.0.6 2022-01-07
|
|
15
|
+
|
|
16
|
+
- Code cleanup, small performance improvements
|
|
17
|
+
|
|
3
18
|
## 1.0.5 2022-01-05
|
|
4
19
|
|
|
5
20
|
- Raise exception if given an empty buffer when opening workbook (thanks @kevin-j-m)
|
data/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# Xsv .xlsx reader
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
[](https://github.com/martijn/xsv/actions/workflows/ruby.yml)
|
|
5
|
+
[](https://app.codecov.io/gh/martijn/xsv)
|
|
6
|
+
[](https://rubydoc.info/github/martijn/xsv)
|
|
7
|
+
[](https://badge.fury.io/rb/xsv)
|
|
6
8
|
|
|
7
9
|
Xsv is a fast, lightweight, pure Ruby parser for ISO/IEC 29500 Office Open XML spreadsheet files
|
|
8
10
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -41,17 +43,18 @@ when that becomes stable.
|
|
|
41
43
|
|
|
42
44
|
## Usage
|
|
43
45
|
|
|
46
|
+
### Array and hash mode
|
|
44
47
|
Xsv has two modes of operation. By default, it returns an array for
|
|
45
48
|
each row in the sheet:
|
|
46
49
|
|
|
47
50
|
```ruby
|
|
48
|
-
x = Xsv
|
|
51
|
+
x = Xsv.open("sheet.xlsx") # => #<Xsv::Workbook sheets=1>
|
|
49
52
|
|
|
50
53
|
sheet = x.sheets[0]
|
|
51
54
|
|
|
52
55
|
# Iterate over rows
|
|
53
|
-
sheet.
|
|
54
|
-
row # => ["header1", "header2"]
|
|
56
|
+
sheet.each do |row|
|
|
57
|
+
row # => ["header1", "header2"]
|
|
55
58
|
end
|
|
56
59
|
|
|
57
60
|
# Access row by index (zero-based)
|
|
@@ -59,40 +62,63 @@ sheet[1] # => ["value1", "value2"]
|
|
|
59
62
|
```
|
|
60
63
|
|
|
61
64
|
Alternatively, it can load the headers from the first row and return a hash
|
|
62
|
-
for every row
|
|
65
|
+
for every row by calling `parse_headers!` on the sheet or setting the `parse_headers`
|
|
66
|
+
option on open:
|
|
63
67
|
|
|
64
68
|
```ruby
|
|
65
|
-
|
|
69
|
+
# Parse headers for all sheets on open
|
|
70
|
+
|
|
71
|
+
x = Xsv.open("sheet.xlsx", parse_headers: true)
|
|
72
|
+
|
|
73
|
+
x.sheets[0][1] # => {"header1" => "value1", "header2" => "value2"}
|
|
74
|
+
|
|
75
|
+
# Manually parse headers for a single sheet
|
|
76
|
+
|
|
77
|
+
x = Xsv.open("sheet.xlsx")
|
|
66
78
|
|
|
67
79
|
sheet = x.sheets[0]
|
|
68
80
|
|
|
69
|
-
sheet
|
|
81
|
+
sheet[0] # => ["header1", "header2"]
|
|
70
82
|
|
|
71
|
-
# Parse headers and switch to hash mode
|
|
72
83
|
sheet.parse_headers!
|
|
73
84
|
|
|
74
|
-
sheet
|
|
85
|
+
sheet[0] # => {"header1" => "value1", "header2" => "value2"}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Be aware that hash mode will lead to unpredictable results if the worksheet
|
|
89
|
+
has multiple columns with the same header. `Xsv::Sheet` implements `Enumerable` so along with `#each`
|
|
90
|
+
you can call methods like `#first`, `#filter`/`#select`, and `#map` on it.
|
|
91
|
+
|
|
92
|
+
### Opening a string or buffer instead of filename
|
|
75
93
|
|
|
76
|
-
|
|
77
|
-
|
|
94
|
+
`Xsv.open` accepts a filename, or an IO or String containing a workbook. Optionally, you can pass a block
|
|
95
|
+
which will be called with the workbook as parameter, like `File#open`. Example of this together:
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
# Use an existing IO-like object as source
|
|
99
|
+
|
|
100
|
+
file = File.open("sheet.xlsx")
|
|
101
|
+
|
|
102
|
+
Xsv.open(file) do |workbook|
|
|
103
|
+
puts workbook.inspect
|
|
78
104
|
end
|
|
79
105
|
|
|
80
|
-
|
|
81
|
-
```
|
|
106
|
+
# or even:
|
|
82
107
|
|
|
83
|
-
|
|
84
|
-
|
|
108
|
+
Xsv.open(file.read) do |workbook|
|
|
109
|
+
puts workbook.inspect
|
|
110
|
+
end
|
|
111
|
+
```
|
|
85
112
|
|
|
86
|
-
`Xsv::Workbook.open`
|
|
87
|
-
|
|
113
|
+
Prior to Xsv 1.1.0, `Xsv::Workbook.open` was used instead of `Xsv.open`. The parameters are identical and
|
|
114
|
+
the former is maintained for backwards compatibility.
|
|
88
115
|
|
|
89
|
-
|
|
90
|
-
`#filter`/`#select`, and `#map` on it.
|
|
116
|
+
### Accessing sheets by name
|
|
91
117
|
|
|
92
118
|
The sheets can be accessed by index or by name:
|
|
93
119
|
|
|
94
120
|
```ruby
|
|
95
|
-
x = Xsv
|
|
121
|
+
x = Xsv.open("sheet.xlsx")
|
|
96
122
|
|
|
97
123
|
sheet = x.sheets[0] # gets sheet by index
|
|
98
124
|
|
data/benchmark.rb
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/inline"
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source "https://rubygems.org"
|
|
7
|
+
|
|
8
|
+
gemspec
|
|
9
|
+
gem "benchmark-memory"
|
|
10
|
+
gem "benchmark-perf"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def bench_perf(sheet)
|
|
14
|
+
result = Benchmark::Perf.cpu(repeat: 5) do
|
|
15
|
+
sheet.each do |row|
|
|
16
|
+
row.each do |cell|
|
|
17
|
+
cell
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def bench_mem(sheet)
|
|
26
|
+
Benchmark.memory do |bm|
|
|
27
|
+
bm.report do
|
|
28
|
+
sheet.each do |row|
|
|
29
|
+
row.each do |cell|
|
|
30
|
+
cell
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
file = File.read("test/files/10k-sheet.xlsx")
|
|
38
|
+
|
|
39
|
+
workbook = Xsv.open(file)
|
|
40
|
+
|
|
41
|
+
puts "--- ARRAY MODE ---"
|
|
42
|
+
|
|
43
|
+
bench_perf(workbook.sheets[0])
|
|
44
|
+
bench_mem(workbook.sheets[0])
|
|
45
|
+
|
|
46
|
+
puts "\n--- HASH MODE ---"
|
|
47
|
+
|
|
48
|
+
workbook.sheets[0].parse_headers!
|
|
49
|
+
|
|
50
|
+
bench_perf(workbook.sheets[0])
|
|
51
|
+
bench_mem(workbook.sheets[0])
|
data/lib/xsv/sax_parser.rb
CHANGED
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
module Xsv
|
|
4
4
|
class SaxParser
|
|
5
|
-
ATTR_REGEX = /((\
|
|
5
|
+
ATTR_REGEX = /((\p{Alnum}+)="(.*?)")/mn
|
|
6
6
|
|
|
7
7
|
def parse(io)
|
|
8
|
+
responds_to_end_element = respond_to?(:end_element)
|
|
9
|
+
responds_to_characters = respond_to?(:characters)
|
|
10
|
+
|
|
8
11
|
state = :look_start
|
|
9
12
|
if io.is_a?(String)
|
|
10
13
|
pbuf = io.dup
|
|
@@ -32,7 +35,7 @@ module Xsv
|
|
|
32
35
|
if (o = pbuf.index("<"))
|
|
33
36
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
|
34
37
|
|
|
35
|
-
if
|
|
38
|
+
if responds_to_characters && !chars.empty?
|
|
36
39
|
if chars.index("&")
|
|
37
40
|
chars.gsub!("&", "&")
|
|
38
41
|
chars.gsub!("'", "'")
|
|
@@ -64,23 +67,36 @@ module Xsv
|
|
|
64
67
|
args = nil
|
|
65
68
|
end
|
|
66
69
|
|
|
70
|
+
stripped_tag_name = strip_namespace(tag_name)
|
|
71
|
+
|
|
67
72
|
if tag_name.start_with?("/")
|
|
68
|
-
end_element(tag_name[1
|
|
73
|
+
end_element(strip_namespace(tag_name[1..])) if responds_to_end_element
|
|
69
74
|
elsif args.nil?
|
|
70
|
-
start_element(
|
|
75
|
+
start_element(stripped_tag_name, nil)
|
|
71
76
|
else
|
|
72
|
-
start_element(
|
|
73
|
-
end_element(
|
|
77
|
+
start_element(stripped_tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |(_, k, v), h| h[k.to_sym] = v })
|
|
78
|
+
end_element(stripped_tag_name) if responds_to_end_element && args.end_with?("/")
|
|
74
79
|
end
|
|
75
80
|
|
|
76
81
|
state = :look_start
|
|
77
82
|
elsif eof_reached
|
|
78
|
-
raise "Malformed XML document, looking for end of tag beyond EOF"
|
|
83
|
+
raise Xsv::Error, "Malformed XML document, looking for end of tag beyond EOF"
|
|
79
84
|
else
|
|
80
85
|
must_read = true
|
|
81
86
|
end
|
|
82
87
|
end
|
|
83
88
|
end
|
|
84
89
|
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# I am not proud of this, but there's simply no need to deal with xmlns for this application ¯\_(ツ)_/¯
|
|
94
|
+
def strip_namespace(tag)
|
|
95
|
+
if (offset = tag.index(":"))
|
|
96
|
+
tag[offset + 1..]
|
|
97
|
+
else
|
|
98
|
+
tag
|
|
99
|
+
end
|
|
100
|
+
end
|
|
85
101
|
end
|
|
86
102
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -30,8 +30,8 @@ module Xsv
|
|
|
30
30
|
@state = nil
|
|
31
31
|
@cell = nil
|
|
32
32
|
@row = nil
|
|
33
|
-
@
|
|
34
|
-
@
|
|
33
|
+
@max_row = 0
|
|
34
|
+
@max_column = 0
|
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -42,28 +42,28 @@ module Xsv
|
|
|
42
42
|
@cell = attrs[:r]
|
|
43
43
|
when "v"
|
|
44
44
|
col = column_index(@cell)
|
|
45
|
-
@
|
|
46
|
-
@
|
|
45
|
+
@max_column = col if col > @max_column
|
|
46
|
+
@max_row = @row if @row > @max_row
|
|
47
47
|
when "row"
|
|
48
48
|
@state = name
|
|
49
49
|
@row = attrs[:r].to_i
|
|
50
50
|
when "dimension"
|
|
51
51
|
@state = name
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
_first_cell, last_cell = attrs[:ref].split(":")
|
|
54
54
|
|
|
55
|
-
if
|
|
56
|
-
@
|
|
55
|
+
if last_cell
|
|
56
|
+
@max_column = column_index(last_cell)
|
|
57
57
|
unless @trim_empty_rows
|
|
58
|
-
@
|
|
59
|
-
@block.call(@
|
|
58
|
+
@max_row = last_cell[/\d+$/].to_i
|
|
59
|
+
@block.call(@max_row, @max_column)
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
def end_element(name)
|
|
66
|
-
@block.call(@
|
|
66
|
+
@block.call(@max_row, @max_column) if name == "sheetData"
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
end
|
|
@@ -14,11 +14,11 @@ module Xsv
|
|
|
14
14
|
@last_row = last_row - @row_skip
|
|
15
15
|
@block = block
|
|
16
16
|
|
|
17
|
-
@
|
|
17
|
+
@store_characters = false
|
|
18
18
|
|
|
19
19
|
@row_index = 0
|
|
20
20
|
@current_row = {}
|
|
21
|
-
@
|
|
21
|
+
@current_row_number = 0
|
|
22
22
|
@current_cell = {}
|
|
23
23
|
@current_value = +""
|
|
24
24
|
|
|
@@ -28,44 +28,36 @@ module Xsv
|
|
|
28
28
|
def start_element(name, attrs)
|
|
29
29
|
case name
|
|
30
30
|
when "c"
|
|
31
|
-
@state = name
|
|
32
31
|
@current_cell = attrs
|
|
33
32
|
@current_value.clear
|
|
34
|
-
when "v", "is"
|
|
35
|
-
@
|
|
33
|
+
when "v", "is", "t"
|
|
34
|
+
@store_characters = true
|
|
36
35
|
when "row"
|
|
37
|
-
@state = name
|
|
38
36
|
@current_row = @empty_row.dup
|
|
39
|
-
@
|
|
40
|
-
when "t"
|
|
41
|
-
@state = nil unless @state == "is"
|
|
42
|
-
else
|
|
43
|
-
@state = nil
|
|
37
|
+
@current_row_number = attrs[:r].to_i
|
|
44
38
|
end
|
|
45
39
|
end
|
|
46
40
|
|
|
47
41
|
def characters(value)
|
|
48
|
-
@current_value << value if @
|
|
42
|
+
@current_value << value if @store_characters
|
|
49
43
|
end
|
|
50
44
|
|
|
51
45
|
def end_element(name)
|
|
52
46
|
case name
|
|
53
|
-
when "v"
|
|
54
|
-
@
|
|
47
|
+
when "v", "is", "t"
|
|
48
|
+
@store_characters = false
|
|
55
49
|
when "c"
|
|
56
50
|
col_index = column_index(@current_cell[:r])
|
|
57
51
|
|
|
58
|
-
|
|
59
|
-
when :array
|
|
52
|
+
if @mode == :array
|
|
60
53
|
@current_row[col_index] = format_cell
|
|
61
|
-
|
|
54
|
+
else
|
|
62
55
|
@current_row[@headers[col_index]] = format_cell
|
|
63
56
|
end
|
|
64
57
|
when "row"
|
|
65
|
-
|
|
66
|
-
adjusted_row_number = real_row_number - @row_skip
|
|
58
|
+
return if @current_row_number <= @row_skip
|
|
67
59
|
|
|
68
|
-
|
|
60
|
+
adjusted_row_number = @current_row_number - @row_skip
|
|
69
61
|
|
|
70
62
|
@row_index += 1
|
|
71
63
|
|
|
@@ -98,10 +90,7 @@ module Xsv
|
|
|
98
90
|
nil
|
|
99
91
|
when nil, "n"
|
|
100
92
|
if @current_cell[:s]
|
|
101
|
-
|
|
102
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
103
|
-
|
|
104
|
-
parse_number_format(@current_value, numFmt)
|
|
93
|
+
parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
|
|
105
94
|
else
|
|
106
95
|
parse_number(@current_value)
|
|
107
96
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
|
@@ -5,21 +5,21 @@ module Xsv
|
|
|
5
5
|
# This is used internally when opening a sheet.
|
|
6
6
|
class StylesHandler < SaxParser
|
|
7
7
|
def self.get_styles(io)
|
|
8
|
-
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs,
|
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
|
|
9
9
|
@xfs = xfs
|
|
10
|
-
@
|
|
10
|
+
@num_fmts = num_fmts
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
handler.parse(io)
|
|
14
14
|
|
|
15
|
-
[@xfs, @
|
|
15
|
+
[@xfs, @num_fmts]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def initialize(
|
|
18
|
+
def initialize(num_fmts, &block)
|
|
19
19
|
@block = block
|
|
20
20
|
@state = nil
|
|
21
21
|
@xfs = []
|
|
22
|
-
@
|
|
22
|
+
@num_fmts = num_fmts
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def start_element(name, attrs)
|
|
@@ -27,16 +27,16 @@ module Xsv
|
|
|
27
27
|
when "cellXfs"
|
|
28
28
|
@state = "cellXfs"
|
|
29
29
|
when "xf"
|
|
30
|
-
@xfs << attrs if @state == "cellXfs"
|
|
30
|
+
@xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
|
|
31
31
|
when "numFmt"
|
|
32
|
-
@
|
|
32
|
+
@num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
36
36
|
def end_element(name)
|
|
37
37
|
case name
|
|
38
38
|
when "styleSheet"
|
|
39
|
-
@block.call(@xfs, @
|
|
39
|
+
@block.call(@xfs, @num_fmts)
|
|
40
40
|
when "cellXfs"
|
|
41
41
|
@state = nil
|
|
42
42
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -10,38 +10,19 @@ module Xsv
|
|
|
10
10
|
# @return [Array<Sheet>]
|
|
11
11
|
attr_reader :sheets
|
|
12
12
|
|
|
13
|
-
attr_reader :shared_strings, :xfs, :
|
|
14
|
-
|
|
15
|
-
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
|
-
# options see {.initialize}
|
|
17
|
-
def self.open(data, **kws)
|
|
18
|
-
@workbook = if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
|
19
|
-
new(Zip::File.open_buffer(data), **kws)
|
|
20
|
-
elsif data.start_with?("PK\x03\x04") # is it a string containing a file?
|
|
21
|
-
new(Zip::File.open_buffer(data), **kws)
|
|
22
|
-
else # must be a filename
|
|
23
|
-
new(Zip::File.open(data), **kws)
|
|
24
|
-
end
|
|
13
|
+
attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
|
|
25
14
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
ensure
|
|
30
|
-
@workbook.close
|
|
31
|
-
end
|
|
32
|
-
else
|
|
33
|
-
@workbook
|
|
34
|
-
end
|
|
15
|
+
# @deprecated Use {Xsv.open} instead
|
|
16
|
+
def self.open(data, **kws, &block)
|
|
17
|
+
Xsv.open(data, **kws, &block)
|
|
35
18
|
end
|
|
36
19
|
|
|
37
20
|
# Open a workbook from an instance of {Zip::File}. Generally it's recommended
|
|
38
21
|
# to use the {.open} method instead of the constructor.
|
|
39
22
|
#
|
|
40
|
-
#
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
def initialize(zip, trim_empty_rows: false)
|
|
23
|
+
# @param trim_empty_rows [Boolean] Scan sheet for end of content and don't return trailing rows
|
|
24
|
+
# @param parse_headers [Boolean] Call `parse_headers!` on all sheets on load
|
|
25
|
+
def initialize(zip, trim_empty_rows: false, parse_headers: false)
|
|
45
26
|
raise ArgumentError, "Passed argument is not an instance of Zip::File. Did you mean to use Workbook.open?" unless zip.is_a?(Zip::File)
|
|
46
27
|
raise Xsv::Error, "Zip::File is empty" if zip.size.zero?
|
|
47
28
|
|
|
@@ -49,16 +30,16 @@ module Xsv
|
|
|
49
30
|
@trim_empty_rows = trim_empty_rows
|
|
50
31
|
|
|
51
32
|
@sheets = []
|
|
52
|
-
@xfs, @
|
|
33
|
+
@xfs, @num_fmts = fetch_styles
|
|
53
34
|
@sheet_ids = fetch_sheet_ids
|
|
54
35
|
@relationships = fetch_relationships
|
|
55
36
|
@shared_strings = fetch_shared_strings
|
|
56
|
-
@sheets = fetch_sheets
|
|
37
|
+
@sheets = fetch_sheets(parse_headers ? :hash : :array)
|
|
57
38
|
end
|
|
58
39
|
|
|
59
40
|
# @return [String]
|
|
60
41
|
def inspect
|
|
61
|
-
"#<#{self.class.name}:#{object_id}>"
|
|
42
|
+
"#<#{self.class.name}:#{object_id} sheets=#{sheets.count} trim_empty_rows=#{@trim_empty_rows}>"
|
|
62
43
|
end
|
|
63
44
|
|
|
64
45
|
# Close the handle to the workbook file and leave all resources for the GC to collect
|
|
@@ -68,7 +49,7 @@ module Xsv
|
|
|
68
49
|
@zip = nil
|
|
69
50
|
@sheets = nil
|
|
70
51
|
@xfs = nil
|
|
71
|
-
@
|
|
52
|
+
@num_fmts = nil
|
|
72
53
|
@relationships = nil
|
|
73
54
|
@shared_strings = nil
|
|
74
55
|
@sheet_ids = nil
|
|
@@ -83,6 +64,11 @@ module Xsv
|
|
|
83
64
|
@sheets.select { |s| s.name == name }
|
|
84
65
|
end
|
|
85
66
|
|
|
67
|
+
# Get number format for given style index
|
|
68
|
+
def get_num_fmt(style)
|
|
69
|
+
@num_fmts[@xfs[style][:numFmtId]]
|
|
70
|
+
end
|
|
71
|
+
|
|
86
72
|
private
|
|
87
73
|
|
|
88
74
|
def fetch_shared_strings
|
|
@@ -103,13 +89,18 @@ module Xsv
|
|
|
103
89
|
stream.close
|
|
104
90
|
end
|
|
105
91
|
|
|
106
|
-
def fetch_sheets
|
|
92
|
+
def fetch_sheets(mode)
|
|
107
93
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
108
94
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
109
95
|
end.map do |entry|
|
|
110
|
-
rel = @relationships.detect
|
|
111
|
-
|
|
112
|
-
|
|
96
|
+
rel = @relationships.detect do |r|
|
|
97
|
+
entry.name.end_with?(r[:Target].sub(/^\//, "")) && # ignore leading / in some files
|
|
98
|
+
r[:Type].end_with?("worksheet")
|
|
99
|
+
end
|
|
100
|
+
sheet_ids = @sheet_ids.detect { |i| i[:id] == rel[:Id] }
|
|
101
|
+
Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids).tap do |sheet|
|
|
102
|
+
sheet.parse_headers! if mode == :hash
|
|
103
|
+
end
|
|
113
104
|
end
|
|
114
105
|
end
|
|
115
106
|
|
data/lib/xsv.rb
CHANGED
|
@@ -24,4 +24,31 @@ module Xsv
|
|
|
24
24
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
|
25
25
|
# or misinterpreted .xlsx document
|
|
26
26
|
class AssertionFailed < StandardError; end
|
|
27
|
+
|
|
28
|
+
# Open the workbook of the given filename, string or buffer.
|
|
29
|
+
# @param filename_or_string [String, IO] the contents or filename of a workbook
|
|
30
|
+
# @param trim_empty_rows [Boolean] Scan sheet for end of content and don't return trailing rows
|
|
31
|
+
# @param parse_headers [Boolean] Call `parse_headers!` on all sheets on load
|
|
32
|
+
# @return [Xsv::Workbook] The workbook instance
|
|
33
|
+
def self.open(filename_or_string, trim_empty_rows: false, parse_headers: false)
|
|
34
|
+
zip = if filename_or_string.is_a?(IO) || filename_or_string.respond_to?(:read) # is it a buffer?
|
|
35
|
+
Zip::File.open_buffer(filename_or_string)
|
|
36
|
+
elsif filename_or_string.start_with?("PK\x03\x04") # is it a string containing a file?
|
|
37
|
+
Zip::File.open_buffer(filename_or_string)
|
|
38
|
+
else # must be a filename
|
|
39
|
+
Zip::File.open(filename_or_string)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
workbook = Xsv::Workbook.new(zip, trim_empty_rows: trim_empty_rows, parse_headers: parse_headers)
|
|
43
|
+
|
|
44
|
+
if block_given?
|
|
45
|
+
begin
|
|
46
|
+
yield(workbook)
|
|
47
|
+
ensure
|
|
48
|
+
workbook.close
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
workbook
|
|
52
|
+
end
|
|
53
|
+
end
|
|
27
54
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
if spec.respond_to?(:metadata)
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/martijn/xsv"
|
|
24
|
-
spec.metadata["changelog_uri"] = "https://
|
|
24
|
+
spec.metadata["changelog_uri"] = "https://raw.githubusercontent.com/martijn/xsv/main/CHANGELOG.md"
|
|
25
25
|
else
|
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
|
27
27
|
"public gem pushes."
|
|
@@ -36,12 +36,13 @@ Gem::Specification.new do |spec|
|
|
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
37
37
|
spec.require_paths = ["lib"]
|
|
38
38
|
|
|
39
|
-
spec.required_ruby_version = ">= 2.
|
|
39
|
+
spec.required_ruby_version = ">= 2.6"
|
|
40
40
|
|
|
41
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
|
42
42
|
|
|
43
43
|
spec.add_development_dependency "bundler", "< 3"
|
|
44
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.14.2"
|
|
46
|
-
spec.add_development_dependency "
|
|
46
|
+
spec.add_development_dependency "standard", "~> 1.6.0"
|
|
47
|
+
spec.add_development_dependency "codecov", ">= 0.6.0"
|
|
47
48
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-01
|
|
11
|
+
date: 2022-04-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -73,19 +73,33 @@ dependencies:
|
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
74
|
version: 5.14.2
|
|
75
75
|
- !ruby/object:Gem::Dependency
|
|
76
|
-
name:
|
|
76
|
+
name: standard
|
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - "~>"
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version:
|
|
81
|
+
version: 1.6.0
|
|
82
82
|
type: :development
|
|
83
83
|
prerelease: false
|
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
|
85
85
|
requirements:
|
|
86
86
|
- - "~>"
|
|
87
87
|
- !ruby/object:Gem::Version
|
|
88
|
-
version:
|
|
88
|
+
version: 1.6.0
|
|
89
|
+
- !ruby/object:Gem::Dependency
|
|
90
|
+
name: codecov
|
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: 0.6.0
|
|
96
|
+
type: :development
|
|
97
|
+
prerelease: false
|
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - ">="
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: 0.6.0
|
|
89
103
|
description: |2
|
|
90
104
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
91
105
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -105,6 +119,7 @@ files:
|
|
|
105
119
|
- LICENSE.txt
|
|
106
120
|
- README.md
|
|
107
121
|
- Rakefile
|
|
122
|
+
- benchmark.rb
|
|
108
123
|
- bin/console
|
|
109
124
|
- bin/setup
|
|
110
125
|
- lib/xsv.rb
|
|
@@ -126,8 +141,8 @@ licenses:
|
|
|
126
141
|
metadata:
|
|
127
142
|
homepage_uri: https://github.com/martijn/xsv
|
|
128
143
|
source_code_uri: https://github.com/martijn/xsv
|
|
129
|
-
changelog_uri: https://
|
|
130
|
-
post_install_message:
|
|
144
|
+
changelog_uri: https://raw.githubusercontent.com/martijn/xsv/main/CHANGELOG.md
|
|
145
|
+
post_install_message:
|
|
131
146
|
rdoc_options: []
|
|
132
147
|
require_paths:
|
|
133
148
|
- lib
|
|
@@ -135,15 +150,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
135
150
|
requirements:
|
|
136
151
|
- - ">="
|
|
137
152
|
- !ruby/object:Gem::Version
|
|
138
|
-
version: '2.
|
|
153
|
+
version: '2.6'
|
|
139
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
155
|
requirements:
|
|
141
156
|
- - ">="
|
|
142
157
|
- !ruby/object:Gem::Version
|
|
143
158
|
version: '0'
|
|
144
159
|
requirements: []
|
|
145
|
-
rubygems_version: 3.
|
|
146
|
-
signing_key:
|
|
160
|
+
rubygems_version: 3.2.3
|
|
161
|
+
signing_key:
|
|
147
162
|
specification_version: 4
|
|
148
163
|
summary: A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't
|
|
149
164
|
test_files: []
|