xsv 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -3
- data/CHANGELOG.md +4 -0
- data/benchmark.rb +51 -0
- data/lib/xsv/sax_parser.rb +6 -3
- data/lib/xsv/sheet_bounds_handler.rb +10 -10
- data/lib/xsv/sheet_rows_handler.rb +13 -24
- data/lib/xsv/styles_handler.rb +8 -8
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +8 -3
- data/xsv.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
|
4
|
+
data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
|
7
|
+
data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
|
data/.github/workflows/ruby.yml
CHANGED
@@ -9,9 +9,9 @@ name: Ruby
|
|
9
9
|
|
10
10
|
on:
|
11
11
|
push:
|
12
|
-
branches: [
|
12
|
+
branches: [ main ]
|
13
13
|
pull_request:
|
14
|
-
branches: [
|
14
|
+
branches: [ main ]
|
15
15
|
|
16
16
|
jobs:
|
17
17
|
test:
|
@@ -19,7 +19,7 @@ jobs:
|
|
19
19
|
runs-on: ubuntu-latest
|
20
20
|
strategy:
|
21
21
|
matrix:
|
22
|
-
ruby-version: ['2.6', '2.7', '3.0', 'jruby', 'truffleruby']
|
22
|
+
ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
|
23
23
|
|
24
24
|
steps:
|
25
25
|
- uses: actions/checkout@v2
|
data/CHANGELOG.md
CHANGED
data/benchmark.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/inline'
|
4
|
+
|
5
|
+
gemfile do
|
6
|
+
source "https://rubygems.org"
|
7
|
+
|
8
|
+
gemspec
|
9
|
+
gem "benchmark-memory"
|
10
|
+
gem "benchmark-perf"
|
11
|
+
end
|
12
|
+
|
13
|
+
def bench_perf(sheet)
|
14
|
+
result = Benchmark::Perf.cpu(repeat: 5) do
|
15
|
+
sheet.each do |row|
|
16
|
+
row.each do |cell|
|
17
|
+
cell
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
|
23
|
+
end
|
24
|
+
|
25
|
+
def bench_mem(sheet)
|
26
|
+
Benchmark.memory do |bm|
|
27
|
+
bm.report do
|
28
|
+
sheet.each do |row|
|
29
|
+
row.each do |cell|
|
30
|
+
cell
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
file = File.read("test/files/10k-sheet.xlsx")
|
38
|
+
|
39
|
+
workbook = Xsv::Workbook.open(file)
|
40
|
+
|
41
|
+
puts "--- ARRAY MODE ---"
|
42
|
+
|
43
|
+
bench_perf(workbook.sheets[0])
|
44
|
+
bench_mem(workbook.sheets[0])
|
45
|
+
|
46
|
+
puts "\n--- HASH MODE ---"
|
47
|
+
|
48
|
+
workbook.sheets[0].parse_headers!
|
49
|
+
|
50
|
+
bench_perf(workbook.sheets[0])
|
51
|
+
bench_mem(workbook.sheets[0])
|
data/lib/xsv/sax_parser.rb
CHANGED
@@ -5,6 +5,9 @@ module Xsv
|
|
5
5
|
ATTR_REGEX = /((\S+)="(.*?)")/m
|
6
6
|
|
7
7
|
def parse(io)
|
8
|
+
responds_to_end_element = respond_to?(:end_element)
|
9
|
+
responds_to_characters = respond_to?(:characters)
|
10
|
+
|
8
11
|
state = :look_start
|
9
12
|
if io.is_a?(String)
|
10
13
|
pbuf = io.dup
|
@@ -32,7 +35,7 @@ module Xsv
|
|
32
35
|
if (o = pbuf.index("<"))
|
33
36
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
34
37
|
|
35
|
-
if
|
38
|
+
if responds_to_characters && !chars.empty?
|
36
39
|
if chars.index("&")
|
37
40
|
chars.gsub!("&", "&")
|
38
41
|
chars.gsub!("'", "'")
|
@@ -65,12 +68,12 @@ module Xsv
|
|
65
68
|
end
|
66
69
|
|
67
70
|
if tag_name.start_with?("/")
|
68
|
-
end_element(tag_name[1..-1]) if
|
71
|
+
end_element(tag_name[1..-1]) if responds_to_end_element
|
69
72
|
elsif args.nil?
|
70
73
|
start_element(tag_name, nil)
|
71
74
|
else
|
72
75
|
start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
|
73
|
-
end_element(tag_name) if args.end_with?("/")
|
76
|
+
end_element(tag_name) if responds_to_end_element && args.end_with?("/")
|
74
77
|
end
|
75
78
|
|
76
79
|
state = :look_start
|
@@ -30,8 +30,8 @@ module Xsv
|
|
30
30
|
@state = nil
|
31
31
|
@cell = nil
|
32
32
|
@row = nil
|
33
|
-
@
|
34
|
-
@
|
33
|
+
@max_row = 0
|
34
|
+
@max_column = 0
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
36
36
|
end
|
37
37
|
|
@@ -42,28 +42,28 @@ module Xsv
|
|
42
42
|
@cell = attrs[:r]
|
43
43
|
when "v"
|
44
44
|
col = column_index(@cell)
|
45
|
-
@
|
46
|
-
@
|
45
|
+
@max_column = col if col > @max_column
|
46
|
+
@max_row = @row if @row > @max_row
|
47
47
|
when "row"
|
48
48
|
@state = name
|
49
49
|
@row = attrs[:r].to_i
|
50
50
|
when "dimension"
|
51
51
|
@state = name
|
52
52
|
|
53
|
-
|
53
|
+
_first_cell, last_cell = attrs[:ref].split(":")
|
54
54
|
|
55
|
-
if
|
56
|
-
@
|
55
|
+
if last_cell
|
56
|
+
@max_column = column_index(last_cell)
|
57
57
|
unless @trim_empty_rows
|
58
|
-
@
|
59
|
-
@block.call(@
|
58
|
+
@max_row = last_cell[/\d+$/].to_i
|
59
|
+
@block.call(@max_row, @max_column)
|
60
60
|
end
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
65
|
def end_element(name)
|
66
|
-
@block.call(@
|
66
|
+
@block.call(@max_row, @max_column) if name == "sheetData"
|
67
67
|
end
|
68
68
|
end
|
69
69
|
end
|
@@ -14,11 +14,11 @@ module Xsv
|
|
14
14
|
@last_row = last_row - @row_skip
|
15
15
|
@block = block
|
16
16
|
|
17
|
-
@
|
17
|
+
@store_characters = false
|
18
18
|
|
19
19
|
@row_index = 0
|
20
20
|
@current_row = {}
|
21
|
-
@
|
21
|
+
@current_row_number = 0
|
22
22
|
@current_cell = {}
|
23
23
|
@current_value = +""
|
24
24
|
|
@@ -28,44 +28,36 @@ module Xsv
|
|
28
28
|
def start_element(name, attrs)
|
29
29
|
case name
|
30
30
|
when "c"
|
31
|
-
@state = name
|
32
31
|
@current_cell = attrs
|
33
32
|
@current_value.clear
|
34
|
-
when "v", "is"
|
35
|
-
@
|
33
|
+
when "v", "is", "t"
|
34
|
+
@store_characters = true
|
36
35
|
when "row"
|
37
|
-
@state = name
|
38
36
|
@current_row = @empty_row.dup
|
39
|
-
@
|
40
|
-
when "t"
|
41
|
-
@state = nil unless @state == "is"
|
42
|
-
else
|
43
|
-
@state = nil
|
37
|
+
@current_row_number = attrs[:r].to_i
|
44
38
|
end
|
45
39
|
end
|
46
40
|
|
47
41
|
def characters(value)
|
48
|
-
@current_value << value if @
|
42
|
+
@current_value << value if @store_characters
|
49
43
|
end
|
50
44
|
|
51
45
|
def end_element(name)
|
52
46
|
case name
|
53
|
-
when "v"
|
54
|
-
@
|
47
|
+
when "v", "is", "t"
|
48
|
+
@store_characters = false
|
55
49
|
when "c"
|
56
50
|
col_index = column_index(@current_cell[:r])
|
57
51
|
|
58
|
-
|
59
|
-
when :array
|
52
|
+
if @mode == :array
|
60
53
|
@current_row[col_index] = format_cell
|
61
|
-
|
54
|
+
else
|
62
55
|
@current_row[@headers[col_index]] = format_cell
|
63
56
|
end
|
64
57
|
when "row"
|
65
|
-
|
66
|
-
adjusted_row_number = real_row_number - @row_skip
|
58
|
+
return if @current_row_number <= @row_skip
|
67
59
|
|
68
|
-
|
60
|
+
adjusted_row_number = @current_row_number - @row_skip
|
69
61
|
|
70
62
|
@row_index += 1
|
71
63
|
|
@@ -98,10 +90,7 @@ module Xsv
|
|
98
90
|
nil
|
99
91
|
when nil, "n"
|
100
92
|
if @current_cell[:s]
|
101
|
-
|
102
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
103
|
-
|
104
|
-
parse_number_format(@current_value, numFmt)
|
93
|
+
parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
|
105
94
|
else
|
106
95
|
parse_number(@current_value)
|
107
96
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
@@ -5,21 +5,21 @@ module Xsv
|
|
5
5
|
# This is used internally when opening a sheet.
|
6
6
|
class StylesHandler < SaxParser
|
7
7
|
def self.get_styles(io)
|
8
|
-
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs,
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
|
9
9
|
@xfs = xfs
|
10
|
-
@
|
10
|
+
@num_fmts = num_fmts
|
11
11
|
end
|
12
12
|
|
13
13
|
handler.parse(io)
|
14
14
|
|
15
|
-
[@xfs, @
|
15
|
+
[@xfs, @num_fmts]
|
16
16
|
end
|
17
17
|
|
18
|
-
def initialize(
|
18
|
+
def initialize(num_fmts, &block)
|
19
19
|
@block = block
|
20
20
|
@state = nil
|
21
21
|
@xfs = []
|
22
|
-
@
|
22
|
+
@num_fmts = num_fmts
|
23
23
|
end
|
24
24
|
|
25
25
|
def start_element(name, attrs)
|
@@ -27,16 +27,16 @@ module Xsv
|
|
27
27
|
when "cellXfs"
|
28
28
|
@state = "cellXfs"
|
29
29
|
when "xf"
|
30
|
-
@xfs << attrs if @state == "cellXfs"
|
30
|
+
@xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
|
31
31
|
when "numFmt"
|
32
|
-
@
|
32
|
+
@num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def end_element(name)
|
37
37
|
case name
|
38
38
|
when "styleSheet"
|
39
|
-
@block.call(@xfs, @
|
39
|
+
@block.call(@xfs, @num_fmts)
|
40
40
|
when "cellXfs"
|
41
41
|
@state = nil
|
42
42
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -10,7 +10,7 @@ module Xsv
|
|
10
10
|
# @return [Array<Sheet>]
|
11
11
|
attr_reader :sheets
|
12
12
|
|
13
|
-
attr_reader :shared_strings, :xfs, :
|
13
|
+
attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
|
14
14
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
@@ -49,7 +49,7 @@ module Xsv
|
|
49
49
|
@trim_empty_rows = trim_empty_rows
|
50
50
|
|
51
51
|
@sheets = []
|
52
|
-
@xfs, @
|
52
|
+
@xfs, @num_fmts = fetch_styles
|
53
53
|
@sheet_ids = fetch_sheet_ids
|
54
54
|
@relationships = fetch_relationships
|
55
55
|
@shared_strings = fetch_shared_strings
|
@@ -68,7 +68,7 @@ module Xsv
|
|
68
68
|
@zip = nil
|
69
69
|
@sheets = nil
|
70
70
|
@xfs = nil
|
71
|
-
@
|
71
|
+
@num_fmts = nil
|
72
72
|
@relationships = nil
|
73
73
|
@shared_strings = nil
|
74
74
|
@sheet_ids = nil
|
@@ -83,6 +83,11 @@ module Xsv
|
|
83
83
|
@sheets.select { |s| s.name == name }
|
84
84
|
end
|
85
85
|
|
86
|
+
# Get number format for given style index
|
87
|
+
def get_num_fmt(style)
|
88
|
+
@num_fmts[@xfs[style][:numFmtId]]
|
89
|
+
end
|
90
|
+
|
86
91
|
private
|
87
92
|
|
88
93
|
def fetch_shared_strings
|
data/xsv.gemspec
CHANGED
@@ -43,5 +43,5 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.add_development_dependency "bundler", "< 3"
|
44
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.14.2"
|
46
|
-
spec.add_development_dependency "
|
46
|
+
spec.add_development_dependency "standard", "~> 1.6.0"
|
47
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -73,19 +73,19 @@ dependencies:
|
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: 5.14.2
|
75
75
|
- !ruby/object:Gem::Dependency
|
76
|
-
name:
|
76
|
+
name: standard
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|
78
78
|
requirements:
|
79
79
|
- - "~>"
|
80
80
|
- !ruby/object:Gem::Version
|
81
|
-
version:
|
81
|
+
version: 1.6.0
|
82
82
|
type: :development
|
83
83
|
prerelease: false
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version:
|
88
|
+
version: 1.6.0
|
89
89
|
description: |2
|
90
90
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
91
91
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- LICENSE.txt
|
106
106
|
- README.md
|
107
107
|
- Rakefile
|
108
|
+
- benchmark.rb
|
108
109
|
- bin/console
|
109
110
|
- bin/setup
|
110
111
|
- lib/xsv.rb
|