xsv 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -3
- data/CHANGELOG.md +4 -0
- data/benchmark.rb +51 -0
- data/lib/xsv/sax_parser.rb +6 -3
- data/lib/xsv/sheet_bounds_handler.rb +10 -10
- data/lib/xsv/sheet_rows_handler.rb +13 -24
- data/lib/xsv/styles_handler.rb +8 -8
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +8 -3
- data/xsv.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
|
|
4
|
+
data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
|
|
7
|
+
data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
|
data/.github/workflows/ruby.yml
CHANGED
|
@@ -9,9 +9,9 @@ name: Ruby
|
|
|
9
9
|
|
|
10
10
|
on:
|
|
11
11
|
push:
|
|
12
|
-
branches: [
|
|
12
|
+
branches: [ main ]
|
|
13
13
|
pull_request:
|
|
14
|
-
branches: [
|
|
14
|
+
branches: [ main ]
|
|
15
15
|
|
|
16
16
|
jobs:
|
|
17
17
|
test:
|
|
@@ -19,7 +19,7 @@ jobs:
|
|
|
19
19
|
runs-on: ubuntu-latest
|
|
20
20
|
strategy:
|
|
21
21
|
matrix:
|
|
22
|
-
ruby-version: ['2.6', '2.7', '3.0', 'jruby', 'truffleruby']
|
|
22
|
+
ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
|
|
23
23
|
|
|
24
24
|
steps:
|
|
25
25
|
- uses: actions/checkout@v2
|
data/CHANGELOG.md
CHANGED
data/benchmark.rb
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'bundler/inline'
|
|
4
|
+
|
|
5
|
+
gemfile do
|
|
6
|
+
source "https://rubygems.org"
|
|
7
|
+
|
|
8
|
+
gemspec
|
|
9
|
+
gem "benchmark-memory"
|
|
10
|
+
gem "benchmark-perf"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def bench_perf(sheet)
|
|
14
|
+
result = Benchmark::Perf.cpu(repeat: 5) do
|
|
15
|
+
sheet.each do |row|
|
|
16
|
+
row.each do |cell|
|
|
17
|
+
cell
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def bench_mem(sheet)
|
|
26
|
+
Benchmark.memory do |bm|
|
|
27
|
+
bm.report do
|
|
28
|
+
sheet.each do |row|
|
|
29
|
+
row.each do |cell|
|
|
30
|
+
cell
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
file = File.read("test/files/10k-sheet.xlsx")
|
|
38
|
+
|
|
39
|
+
workbook = Xsv::Workbook.open(file)
|
|
40
|
+
|
|
41
|
+
puts "--- ARRAY MODE ---"
|
|
42
|
+
|
|
43
|
+
bench_perf(workbook.sheets[0])
|
|
44
|
+
bench_mem(workbook.sheets[0])
|
|
45
|
+
|
|
46
|
+
puts "\n--- HASH MODE ---"
|
|
47
|
+
|
|
48
|
+
workbook.sheets[0].parse_headers!
|
|
49
|
+
|
|
50
|
+
bench_perf(workbook.sheets[0])
|
|
51
|
+
bench_mem(workbook.sheets[0])
|
data/lib/xsv/sax_parser.rb
CHANGED
|
@@ -5,6 +5,9 @@ module Xsv
|
|
|
5
5
|
ATTR_REGEX = /((\S+)="(.*?)")/m
|
|
6
6
|
|
|
7
7
|
def parse(io)
|
|
8
|
+
responds_to_end_element = respond_to?(:end_element)
|
|
9
|
+
responds_to_characters = respond_to?(:characters)
|
|
10
|
+
|
|
8
11
|
state = :look_start
|
|
9
12
|
if io.is_a?(String)
|
|
10
13
|
pbuf = io.dup
|
|
@@ -32,7 +35,7 @@ module Xsv
|
|
|
32
35
|
if (o = pbuf.index("<"))
|
|
33
36
|
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
|
34
37
|
|
|
35
|
-
if
|
|
38
|
+
if responds_to_characters && !chars.empty?
|
|
36
39
|
if chars.index("&")
|
|
37
40
|
chars.gsub!("&", "&")
|
|
38
41
|
chars.gsub!("'", "'")
|
|
@@ -65,12 +68,12 @@ module Xsv
|
|
|
65
68
|
end
|
|
66
69
|
|
|
67
70
|
if tag_name.start_with?("/")
|
|
68
|
-
end_element(tag_name[1..-1]) if
|
|
71
|
+
end_element(tag_name[1..-1]) if responds_to_end_element
|
|
69
72
|
elsif args.nil?
|
|
70
73
|
start_element(tag_name, nil)
|
|
71
74
|
else
|
|
72
75
|
start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
|
|
73
|
-
end_element(tag_name) if args.end_with?("/")
|
|
76
|
+
end_element(tag_name) if responds_to_end_element && args.end_with?("/")
|
|
74
77
|
end
|
|
75
78
|
|
|
76
79
|
state = :look_start
|
|
@@ -30,8 +30,8 @@ module Xsv
|
|
|
30
30
|
@state = nil
|
|
31
31
|
@cell = nil
|
|
32
32
|
@row = nil
|
|
33
|
-
@
|
|
34
|
-
@
|
|
33
|
+
@max_row = 0
|
|
34
|
+
@max_column = 0
|
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -42,28 +42,28 @@ module Xsv
|
|
|
42
42
|
@cell = attrs[:r]
|
|
43
43
|
when "v"
|
|
44
44
|
col = column_index(@cell)
|
|
45
|
-
@
|
|
46
|
-
@
|
|
45
|
+
@max_column = col if col > @max_column
|
|
46
|
+
@max_row = @row if @row > @max_row
|
|
47
47
|
when "row"
|
|
48
48
|
@state = name
|
|
49
49
|
@row = attrs[:r].to_i
|
|
50
50
|
when "dimension"
|
|
51
51
|
@state = name
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
_first_cell, last_cell = attrs[:ref].split(":")
|
|
54
54
|
|
|
55
|
-
if
|
|
56
|
-
@
|
|
55
|
+
if last_cell
|
|
56
|
+
@max_column = column_index(last_cell)
|
|
57
57
|
unless @trim_empty_rows
|
|
58
|
-
@
|
|
59
|
-
@block.call(@
|
|
58
|
+
@max_row = last_cell[/\d+$/].to_i
|
|
59
|
+
@block.call(@max_row, @max_column)
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
def end_element(name)
|
|
66
|
-
@block.call(@
|
|
66
|
+
@block.call(@max_row, @max_column) if name == "sheetData"
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
end
|
|
@@ -14,11 +14,11 @@ module Xsv
|
|
|
14
14
|
@last_row = last_row - @row_skip
|
|
15
15
|
@block = block
|
|
16
16
|
|
|
17
|
-
@
|
|
17
|
+
@store_characters = false
|
|
18
18
|
|
|
19
19
|
@row_index = 0
|
|
20
20
|
@current_row = {}
|
|
21
|
-
@
|
|
21
|
+
@current_row_number = 0
|
|
22
22
|
@current_cell = {}
|
|
23
23
|
@current_value = +""
|
|
24
24
|
|
|
@@ -28,44 +28,36 @@ module Xsv
|
|
|
28
28
|
def start_element(name, attrs)
|
|
29
29
|
case name
|
|
30
30
|
when "c"
|
|
31
|
-
@state = name
|
|
32
31
|
@current_cell = attrs
|
|
33
32
|
@current_value.clear
|
|
34
|
-
when "v", "is"
|
|
35
|
-
@
|
|
33
|
+
when "v", "is", "t"
|
|
34
|
+
@store_characters = true
|
|
36
35
|
when "row"
|
|
37
|
-
@state = name
|
|
38
36
|
@current_row = @empty_row.dup
|
|
39
|
-
@
|
|
40
|
-
when "t"
|
|
41
|
-
@state = nil unless @state == "is"
|
|
42
|
-
else
|
|
43
|
-
@state = nil
|
|
37
|
+
@current_row_number = attrs[:r].to_i
|
|
44
38
|
end
|
|
45
39
|
end
|
|
46
40
|
|
|
47
41
|
def characters(value)
|
|
48
|
-
@current_value << value if @
|
|
42
|
+
@current_value << value if @store_characters
|
|
49
43
|
end
|
|
50
44
|
|
|
51
45
|
def end_element(name)
|
|
52
46
|
case name
|
|
53
|
-
when "v"
|
|
54
|
-
@
|
|
47
|
+
when "v", "is", "t"
|
|
48
|
+
@store_characters = false
|
|
55
49
|
when "c"
|
|
56
50
|
col_index = column_index(@current_cell[:r])
|
|
57
51
|
|
|
58
|
-
|
|
59
|
-
when :array
|
|
52
|
+
if @mode == :array
|
|
60
53
|
@current_row[col_index] = format_cell
|
|
61
|
-
|
|
54
|
+
else
|
|
62
55
|
@current_row[@headers[col_index]] = format_cell
|
|
63
56
|
end
|
|
64
57
|
when "row"
|
|
65
|
-
|
|
66
|
-
adjusted_row_number = real_row_number - @row_skip
|
|
58
|
+
return if @current_row_number <= @row_skip
|
|
67
59
|
|
|
68
|
-
|
|
60
|
+
adjusted_row_number = @current_row_number - @row_skip
|
|
69
61
|
|
|
70
62
|
@row_index += 1
|
|
71
63
|
|
|
@@ -98,10 +90,7 @@ module Xsv
|
|
|
98
90
|
nil
|
|
99
91
|
when nil, "n"
|
|
100
92
|
if @current_cell[:s]
|
|
101
|
-
|
|
102
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
103
|
-
|
|
104
|
-
parse_number_format(@current_value, numFmt)
|
|
93
|
+
parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
|
|
105
94
|
else
|
|
106
95
|
parse_number(@current_value)
|
|
107
96
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
|
@@ -5,21 +5,21 @@ module Xsv
|
|
|
5
5
|
# This is used internally when opening a sheet.
|
|
6
6
|
class StylesHandler < SaxParser
|
|
7
7
|
def self.get_styles(io)
|
|
8
|
-
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs,
|
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
|
|
9
9
|
@xfs = xfs
|
|
10
|
-
@
|
|
10
|
+
@num_fmts = num_fmts
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
handler.parse(io)
|
|
14
14
|
|
|
15
|
-
[@xfs, @
|
|
15
|
+
[@xfs, @num_fmts]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def initialize(
|
|
18
|
+
def initialize(num_fmts, &block)
|
|
19
19
|
@block = block
|
|
20
20
|
@state = nil
|
|
21
21
|
@xfs = []
|
|
22
|
-
@
|
|
22
|
+
@num_fmts = num_fmts
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def start_element(name, attrs)
|
|
@@ -27,16 +27,16 @@ module Xsv
|
|
|
27
27
|
when "cellXfs"
|
|
28
28
|
@state = "cellXfs"
|
|
29
29
|
when "xf"
|
|
30
|
-
@xfs << attrs if @state == "cellXfs"
|
|
30
|
+
@xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
|
|
31
31
|
when "numFmt"
|
|
32
|
-
@
|
|
32
|
+
@num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
36
36
|
def end_element(name)
|
|
37
37
|
case name
|
|
38
38
|
when "styleSheet"
|
|
39
|
-
@block.call(@xfs, @
|
|
39
|
+
@block.call(@xfs, @num_fmts)
|
|
40
40
|
when "cellXfs"
|
|
41
41
|
@state = nil
|
|
42
42
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -10,7 +10,7 @@ module Xsv
|
|
|
10
10
|
# @return [Array<Sheet>]
|
|
11
11
|
attr_reader :sheets
|
|
12
12
|
|
|
13
|
-
attr_reader :shared_strings, :xfs, :
|
|
13
|
+
attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
|
|
14
14
|
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
16
|
# options see {.initialize}
|
|
@@ -49,7 +49,7 @@ module Xsv
|
|
|
49
49
|
@trim_empty_rows = trim_empty_rows
|
|
50
50
|
|
|
51
51
|
@sheets = []
|
|
52
|
-
@xfs, @
|
|
52
|
+
@xfs, @num_fmts = fetch_styles
|
|
53
53
|
@sheet_ids = fetch_sheet_ids
|
|
54
54
|
@relationships = fetch_relationships
|
|
55
55
|
@shared_strings = fetch_shared_strings
|
|
@@ -68,7 +68,7 @@ module Xsv
|
|
|
68
68
|
@zip = nil
|
|
69
69
|
@sheets = nil
|
|
70
70
|
@xfs = nil
|
|
71
|
-
@
|
|
71
|
+
@num_fmts = nil
|
|
72
72
|
@relationships = nil
|
|
73
73
|
@shared_strings = nil
|
|
74
74
|
@sheet_ids = nil
|
|
@@ -83,6 +83,11 @@ module Xsv
|
|
|
83
83
|
@sheets.select { |s| s.name == name }
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# Get number format for given style index
|
|
87
|
+
def get_num_fmt(style)
|
|
88
|
+
@num_fmts[@xfs[style][:numFmtId]]
|
|
89
|
+
end
|
|
90
|
+
|
|
86
91
|
private
|
|
87
92
|
|
|
88
93
|
def fetch_shared_strings
|
data/xsv.gemspec
CHANGED
|
@@ -43,5 +43,5 @@ Gem::Specification.new do |spec|
|
|
|
43
43
|
spec.add_development_dependency "bundler", "< 3"
|
|
44
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.14.2"
|
|
46
|
-
spec.add_development_dependency "
|
|
46
|
+
spec.add_development_dependency "standard", "~> 1.6.0"
|
|
47
47
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-01-
|
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -73,19 +73,19 @@ dependencies:
|
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
74
|
version: 5.14.2
|
|
75
75
|
- !ruby/object:Gem::Dependency
|
|
76
|
-
name:
|
|
76
|
+
name: standard
|
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - "~>"
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version:
|
|
81
|
+
version: 1.6.0
|
|
82
82
|
type: :development
|
|
83
83
|
prerelease: false
|
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
|
85
85
|
requirements:
|
|
86
86
|
- - "~>"
|
|
87
87
|
- !ruby/object:Gem::Version
|
|
88
|
-
version:
|
|
88
|
+
version: 1.6.0
|
|
89
89
|
description: |2
|
|
90
90
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
91
91
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
@@ -105,6 +105,7 @@ files:
|
|
|
105
105
|
- LICENSE.txt
|
|
106
106
|
- README.md
|
|
107
107
|
- Rakefile
|
|
108
|
+
- benchmark.rb
|
|
108
109
|
- bin/console
|
|
109
110
|
- bin/setup
|
|
110
111
|
- lib/xsv.rb
|