xsv 1.0.2 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +32 -0
- data/.standard.yml +1 -0
- data/CHANGELOG.md +16 -0
- data/README.md +1 -1
- data/Rakefile +1 -1
- data/benchmark.rb +51 -0
- data/lib/xsv/helpers.rb +37 -35
- data/lib/xsv/relationships_handler.rb +1 -1
- data/lib/xsv/sax_parser.rb +18 -15
- data/lib/xsv/shared_strings_parser.rb +8 -8
- data/lib/xsv/sheet.rb +2 -2
- data/lib/xsv/sheet_bounds_handler.rb +14 -14
- data/lib/xsv/sheet_rows_handler.rb +26 -35
- data/lib/xsv/sheets_ids_handler.rb +1 -1
- data/lib/xsv/styles_handler.rb +14 -14
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +22 -16
- data/lib/xsv.rb +12 -12
- data/xsv.gemspec +3 -2
- metadata +24 -8
- data/.travis.yml +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
|
4
|
+
data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
|
7
|
+
data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ main ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ main ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
uses: ruby/setup-ruby@v1
|
28
|
+
with:
|
29
|
+
ruby-version: ${{ matrix.ruby-version }}
|
30
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake
|
data/.standard.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby_version: 2.5.0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Xsv Changelog
|
2
2
|
|
3
|
+
## 1.0.6 2022-01-07
|
4
|
+
|
5
|
+
- Code cleanup, small performance improvements
|
6
|
+
|
7
|
+
## 1.0.5 2022-01-05
|
8
|
+
|
9
|
+
- Raise exception if given an empty buffer when opening workbook (thanks @kevin-j-m)
|
10
|
+
|
11
|
+
## 1.0.4 2021-07-05
|
12
|
+
|
13
|
+
- Support for custom date/time columns
|
14
|
+
|
15
|
+
## 1.0.3 2021-05-06
|
16
|
+
|
17
|
+
- Handle nil number formats correctly (regression in Xsv 1.0.2, #29)
|
18
|
+
|
3
19
|
## 1.0.2 2021-05-01
|
4
20
|
|
5
21
|
- Ignore phonetic shared string data (thanks @sinoue-1003)
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
[![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://rubydoc.info/github/martijn/xsv)
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/xsv.svg)](https://badge.fury.io/rb/xsv)
|
6
6
|
|
7
|
-
Xsv is a fast, lightweight, pure Ruby parser for Office Open XML spreadsheet files
|
7
|
+
Xsv is a fast, lightweight, pure Ruby parser for ISO/IEC 29500 Office Open XML spreadsheet files
|
8
8
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
9
9
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
10
10
|
deals with minimal formatting and cannot create or modify documents.
|
data/Rakefile
CHANGED
data/benchmark.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/inline'
|
4
|
+
|
5
|
+
gemfile do
|
6
|
+
source "https://rubygems.org"
|
7
|
+
|
8
|
+
gemspec
|
9
|
+
gem "benchmark-memory"
|
10
|
+
gem "benchmark-perf"
|
11
|
+
end
|
12
|
+
|
13
|
+
def bench_perf(sheet)
|
14
|
+
result = Benchmark::Perf.cpu(repeat: 5) do
|
15
|
+
sheet.each do |row|
|
16
|
+
row.each do |cell|
|
17
|
+
cell
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
|
23
|
+
end
|
24
|
+
|
25
|
+
def bench_mem(sheet)
|
26
|
+
Benchmark.memory do |bm|
|
27
|
+
bm.report do
|
28
|
+
sheet.each do |row|
|
29
|
+
row.each do |cell|
|
30
|
+
cell
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
file = File.read("test/files/10k-sheet.xlsx")
|
38
|
+
|
39
|
+
workbook = Xsv::Workbook.open(file)
|
40
|
+
|
41
|
+
puts "--- ARRAY MODE ---"
|
42
|
+
|
43
|
+
bench_perf(workbook.sheets[0])
|
44
|
+
bench_mem(workbook.sheets[0])
|
45
|
+
|
46
|
+
puts "\n--- HASH MODE ---"
|
47
|
+
|
48
|
+
workbook.sheets[0].parse_headers!
|
49
|
+
|
50
|
+
bench_perf(workbook.sheets[0])
|
51
|
+
bench_mem(workbook.sheets[0])
|
data/lib/xsv/helpers.rb
CHANGED
@@ -5,42 +5,42 @@ module Xsv
|
|
5
5
|
# The default OOXML Spreadheet number formats according to the ECMA standard
|
6
6
|
# User formats are appended from index 174 onward
|
7
7
|
BUILT_IN_NUMBER_FORMATS = {
|
8
|
-
1 =>
|
9
|
-
2 =>
|
10
|
-
3 =>
|
11
|
-
4 =>
|
12
|
-
5 =>
|
13
|
-
6 =>
|
14
|
-
7 =>
|
15
|
-
8 =>
|
16
|
-
9 =>
|
17
|
-
10 =>
|
18
|
-
11 =>
|
19
|
-
12 =>
|
20
|
-
13 =>
|
21
|
-
14 =>
|
22
|
-
15 =>
|
23
|
-
16 =>
|
24
|
-
17 =>
|
25
|
-
18 =>
|
26
|
-
19 =>
|
27
|
-
20 =>
|
28
|
-
21 =>
|
29
|
-
22 =>
|
30
|
-
37 =>
|
31
|
-
38 =>
|
32
|
-
39 =>
|
33
|
-
40 =>
|
34
|
-
45 =>
|
35
|
-
46 =>
|
36
|
-
47 =>
|
37
|
-
48 =>
|
38
|
-
49 =>
|
8
|
+
1 => "0",
|
9
|
+
2 => "0.00",
|
10
|
+
3 => "#, ##0",
|
11
|
+
4 => "#, ##0.00",
|
12
|
+
5 => "$#, ##0_);($#, ##0)",
|
13
|
+
6 => "$#, ##0_);[Red]($#, ##0)",
|
14
|
+
7 => "$#, ##0.00_);($#, ##0.00)",
|
15
|
+
8 => "$#, ##0.00_);[Red]($#, ##0.00)",
|
16
|
+
9 => "0%",
|
17
|
+
10 => "0.00%",
|
18
|
+
11 => "0.00E+00",
|
19
|
+
12 => "# ?/?",
|
20
|
+
13 => "# ??/??",
|
21
|
+
14 => "m/d/yyyy",
|
22
|
+
15 => "d-mmm-yy",
|
23
|
+
16 => "d-mmm",
|
24
|
+
17 => "mmm-yy",
|
25
|
+
18 => "h:mm AM/PM",
|
26
|
+
19 => "h:mm:ss AM/PM",
|
27
|
+
20 => "h:mm",
|
28
|
+
21 => "h:mm:ss",
|
29
|
+
22 => "m/d/yyyy h:mm",
|
30
|
+
37 => "#, ##0_);(#, ##0)",
|
31
|
+
38 => "#, ##0_);[Red](#, ##0)",
|
32
|
+
39 => "#, ##0.00_);(#, ##0.00)",
|
33
|
+
40 => "#, ##0.00_);[Red](#, ##0.00)",
|
34
|
+
45 => "mm:ss",
|
35
|
+
46 => "[h]:mm:ss",
|
36
|
+
47 => "mm:ss.0",
|
37
|
+
48 => "##0.0E+0",
|
38
|
+
49 => "@"
|
39
39
|
}.freeze
|
40
40
|
|
41
41
|
MINUTE = 60
|
42
42
|
HOUR = 3600
|
43
|
-
A_CODEPOINT =
|
43
|
+
A_CODEPOINT = "A".ord.freeze
|
44
44
|
# The epoch for all dates in OOXML Spreadsheet documents
|
45
45
|
EPOCH = Date.new(1899, 12, 30).freeze
|
46
46
|
|
@@ -74,7 +74,7 @@ module Xsv
|
|
74
74
|
minutes = minutes % 60
|
75
75
|
end
|
76
76
|
|
77
|
-
format(
|
77
|
+
format("%02d:%02d", hours, minutes)
|
78
78
|
end
|
79
79
|
|
80
80
|
# Returns a time including a date as a {Time} object
|
@@ -92,9 +92,9 @@ module Xsv
|
|
92
92
|
|
93
93
|
# Returns a number as either Integer or Float
|
94
94
|
def parse_number(string)
|
95
|
-
if string.include?
|
95
|
+
if string.include? "."
|
96
96
|
string.to_f
|
97
|
-
elsif string.include?
|
97
|
+
elsif string.include? "E"
|
98
98
|
Complex(string).to_f
|
99
99
|
else
|
100
100
|
string.to_i
|
@@ -105,6 +105,8 @@ module Xsv
|
|
105
105
|
def parse_number_format(number, format)
|
106
106
|
number = parse_number(number) # number is always a string since it comes out of the Sax Parser
|
107
107
|
|
108
|
+
return number if format.nil?
|
109
|
+
|
108
110
|
is_date_format = format.scan(/[dmy]+/).length > 1
|
109
111
|
is_time_format = format.scan(/[hms]+/).length > 1
|
110
112
|
|
data/lib/xsv/sax_parser.rb
CHANGED
@@ -5,6 +5,9 @@ module Xsv
|
|
5
5
|
ATTR_REGEX = /((\S+)="(.*?)")/m
|
6
6
|
|
7
7
|
def parse(io)
|
8
|
+
responds_to_end_element = respond_to?(:end_element)
|
9
|
+
responds_to_characters = respond_to?(:characters)
|
10
|
+
|
8
11
|
state = :look_start
|
9
12
|
if io.is_a?(String)
|
10
13
|
pbuf = io.dup
|
@@ -29,16 +32,16 @@ module Xsv
|
|
29
32
|
end
|
30
33
|
|
31
34
|
if state == :look_start
|
32
|
-
if (o = pbuf.index(
|
33
|
-
chars = pbuf.slice!(0, o + 1).chop!.force_encoding(
|
35
|
+
if (o = pbuf.index("<"))
|
36
|
+
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
34
37
|
|
35
|
-
if
|
36
|
-
if chars.index(
|
37
|
-
chars.gsub!(
|
38
|
-
chars.gsub!(
|
39
|
-
chars.gsub!(
|
40
|
-
chars.gsub!(
|
41
|
-
chars.gsub!(
|
38
|
+
if responds_to_characters && !chars.empty?
|
39
|
+
if chars.index("&")
|
40
|
+
chars.gsub!("&", "&")
|
41
|
+
chars.gsub!("'", "'")
|
42
|
+
chars.gsub!(">", ">")
|
43
|
+
chars.gsub!("<", "<")
|
44
|
+
chars.gsub!(""", '"')
|
42
45
|
end
|
43
46
|
characters(chars)
|
44
47
|
end
|
@@ -55,8 +58,8 @@ module Xsv
|
|
55
58
|
end
|
56
59
|
|
57
60
|
if state == :look_end
|
58
|
-
if (o = pbuf.index(
|
59
|
-
if (s = pbuf.index(
|
61
|
+
if (o = pbuf.index(">"))
|
62
|
+
if (s = pbuf.index(" ")) && s < o
|
60
63
|
tag_name = pbuf.slice!(0, s + 1).chop!
|
61
64
|
args = pbuf.slice!(0, o - s)
|
62
65
|
else
|
@@ -64,18 +67,18 @@ module Xsv
|
|
64
67
|
args = nil
|
65
68
|
end
|
66
69
|
|
67
|
-
if tag_name.start_with?(
|
68
|
-
end_element(tag_name[1..-1]) if
|
70
|
+
if tag_name.start_with?("/")
|
71
|
+
end_element(tag_name[1..-1]) if responds_to_end_element
|
69
72
|
elsif args.nil?
|
70
73
|
start_element(tag_name, nil)
|
71
74
|
else
|
72
75
|
start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
|
73
|
-
end_element(tag_name) if args.end_with?(
|
76
|
+
end_element(tag_name) if responds_to_end_element && args.end_with?("/")
|
74
77
|
end
|
75
78
|
|
76
79
|
state = :look_start
|
77
80
|
elsif eof_reached
|
78
|
-
raise
|
81
|
+
raise "Malformed XML document, looking for end of tag beyond EOF"
|
79
82
|
else
|
80
83
|
must_read = true
|
81
84
|
end
|
@@ -18,29 +18,29 @@ module Xsv
|
|
18
18
|
|
19
19
|
def start_element(name, _attrs)
|
20
20
|
case name
|
21
|
-
when
|
22
|
-
@current_string =
|
21
|
+
when "si"
|
22
|
+
@current_string = ""
|
23
23
|
@skip = false
|
24
|
-
when
|
24
|
+
when "rPh"
|
25
25
|
@skip = true
|
26
|
-
when
|
26
|
+
when "t"
|
27
27
|
@state = name
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
31
|
def characters(value)
|
32
|
-
if @state ==
|
32
|
+
if @state == "t" && !@skip
|
33
33
|
@current_string += value
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
def end_element(name)
|
38
38
|
case name
|
39
|
-
when
|
39
|
+
when "si"
|
40
40
|
@block.call(@current_string)
|
41
|
-
when
|
41
|
+
when "rPh"
|
42
42
|
@skip = false
|
43
|
-
when
|
43
|
+
when "t"
|
44
44
|
@state = nil
|
45
45
|
end
|
46
46
|
end
|
data/lib/xsv/sheet.rb
CHANGED
@@ -40,7 +40,7 @@ module Xsv
|
|
40
40
|
@headers = []
|
41
41
|
@mode = :array
|
42
42
|
@row_skip = 0
|
43
|
-
@hidden = ids[:state] ==
|
43
|
+
@hidden = ids[:state] == "hidden"
|
44
44
|
|
45
45
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
46
46
|
end
|
@@ -66,7 +66,7 @@ module Xsv
|
|
66
66
|
true
|
67
67
|
end
|
68
68
|
|
69
|
-
|
69
|
+
alias_method :each, :each_row
|
70
70
|
|
71
71
|
# Get row by number, starting at 0. Returns either a hash or an array based on the current row.
|
72
72
|
# If the specified index is out of bounds an empty row is returned.
|
@@ -30,40 +30,40 @@ module Xsv
|
|
30
30
|
@state = nil
|
31
31
|
@cell = nil
|
32
32
|
@row = nil
|
33
|
-
@
|
34
|
-
@
|
33
|
+
@max_row = 0
|
34
|
+
@max_column = 0
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
36
36
|
end
|
37
37
|
|
38
38
|
def start_element(name, attrs)
|
39
39
|
case name
|
40
|
-
when
|
40
|
+
when "c"
|
41
41
|
@state = name
|
42
42
|
@cell = attrs[:r]
|
43
|
-
when
|
43
|
+
when "v"
|
44
44
|
col = column_index(@cell)
|
45
|
-
@
|
46
|
-
@
|
47
|
-
when
|
45
|
+
@max_column = col if col > @max_column
|
46
|
+
@max_row = @row if @row > @max_row
|
47
|
+
when "row"
|
48
48
|
@state = name
|
49
49
|
@row = attrs[:r].to_i
|
50
|
-
when
|
50
|
+
when "dimension"
|
51
51
|
@state = name
|
52
52
|
|
53
|
-
|
53
|
+
_first_cell, last_cell = attrs[:ref].split(":")
|
54
54
|
|
55
|
-
if
|
56
|
-
@
|
55
|
+
if last_cell
|
56
|
+
@max_column = column_index(last_cell)
|
57
57
|
unless @trim_empty_rows
|
58
|
-
@
|
59
|
-
@block.call(@
|
58
|
+
@max_row = last_cell[/\d+$/].to_i
|
59
|
+
@block.call(@max_row, @max_column)
|
60
60
|
end
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
65
|
def end_element(name)
|
66
|
-
@block.call(@
|
66
|
+
@block.call(@max_row, @max_column) if name == "sheetData"
|
67
67
|
end
|
68
68
|
end
|
69
69
|
end
|
@@ -14,58 +14,50 @@ module Xsv
|
|
14
14
|
@last_row = last_row - @row_skip
|
15
15
|
@block = block
|
16
16
|
|
17
|
-
@
|
17
|
+
@store_characters = false
|
18
18
|
|
19
19
|
@row_index = 0
|
20
20
|
@current_row = {}
|
21
|
-
@
|
21
|
+
@current_row_number = 0
|
22
22
|
@current_cell = {}
|
23
|
-
@current_value =
|
23
|
+
@current_value = +""
|
24
24
|
|
25
25
|
@headers = @empty_row.keys if @mode == :hash
|
26
26
|
end
|
27
27
|
|
28
28
|
def start_element(name, attrs)
|
29
29
|
case name
|
30
|
-
when
|
31
|
-
@state = name
|
30
|
+
when "c"
|
32
31
|
@current_cell = attrs
|
33
32
|
@current_value.clear
|
34
|
-
when
|
35
|
-
@
|
36
|
-
when
|
37
|
-
@state = name
|
33
|
+
when "v", "is", "t"
|
34
|
+
@store_characters = true
|
35
|
+
when "row"
|
38
36
|
@current_row = @empty_row.dup
|
39
|
-
@
|
40
|
-
when 't'
|
41
|
-
@state = nil unless @state == 'is'
|
42
|
-
else
|
43
|
-
@state = nil
|
37
|
+
@current_row_number = attrs[:r].to_i
|
44
38
|
end
|
45
39
|
end
|
46
40
|
|
47
41
|
def characters(value)
|
48
|
-
@current_value << value if @
|
42
|
+
@current_value << value if @store_characters
|
49
43
|
end
|
50
44
|
|
51
45
|
def end_element(name)
|
52
46
|
case name
|
53
|
-
when
|
54
|
-
@
|
55
|
-
when
|
47
|
+
when "v", "is", "t"
|
48
|
+
@store_characters = false
|
49
|
+
when "c"
|
56
50
|
col_index = column_index(@current_cell[:r])
|
57
51
|
|
58
|
-
|
59
|
-
when :array
|
52
|
+
if @mode == :array
|
60
53
|
@current_row[col_index] = format_cell
|
61
|
-
|
54
|
+
else
|
62
55
|
@current_row[@headers[col_index]] = format_cell
|
63
56
|
end
|
64
|
-
when
|
65
|
-
|
66
|
-
adjusted_row_number = real_row_number - @row_skip
|
57
|
+
when "row"
|
58
|
+
return if @current_row_number <= @row_skip
|
67
59
|
|
68
|
-
|
60
|
+
adjusted_row_number = @current_row_number - @row_skip
|
69
61
|
|
70
62
|
@row_index += 1
|
71
63
|
|
@@ -90,23 +82,22 @@ module Xsv
|
|
90
82
|
return nil if @current_value.empty?
|
91
83
|
|
92
84
|
case @current_cell[:t]
|
93
|
-
when
|
85
|
+
when "s"
|
94
86
|
@workbook.shared_strings[@current_value.to_i]
|
95
|
-
when
|
87
|
+
when "str", "inlineStr"
|
96
88
|
@current_value.strip
|
97
|
-
when
|
89
|
+
when "e" # N/A
|
98
90
|
nil
|
99
|
-
when nil,
|
91
|
+
when nil, "n"
|
100
92
|
if @current_cell[:s]
|
101
|
-
|
102
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
103
|
-
|
104
|
-
parse_number_format(@current_value, numFmt)
|
93
|
+
parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
|
105
94
|
else
|
106
95
|
parse_number(@current_value)
|
107
96
|
end
|
108
|
-
when
|
109
|
-
@current_value ==
|
97
|
+
when "b"
|
98
|
+
@current_value == "1"
|
99
|
+
when "d"
|
100
|
+
DateTime.parse(@current_value)
|
110
101
|
else
|
111
102
|
raise Xsv::Error, "Encountered unknown column type #{@current_cell[:t]}"
|
112
103
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
@@ -5,39 +5,39 @@ module Xsv
|
|
5
5
|
# This is used internally when opening a sheet.
|
6
6
|
class StylesHandler < SaxParser
|
7
7
|
def self.get_styles(io)
|
8
|
-
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs,
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
|
9
9
|
@xfs = xfs
|
10
|
-
@
|
10
|
+
@num_fmts = num_fmts
|
11
11
|
end
|
12
12
|
|
13
13
|
handler.parse(io)
|
14
14
|
|
15
|
-
[@xfs, @
|
15
|
+
[@xfs, @num_fmts]
|
16
16
|
end
|
17
17
|
|
18
|
-
def initialize(
|
18
|
+
def initialize(num_fmts, &block)
|
19
19
|
@block = block
|
20
20
|
@state = nil
|
21
21
|
@xfs = []
|
22
|
-
@
|
22
|
+
@num_fmts = num_fmts
|
23
23
|
end
|
24
24
|
|
25
25
|
def start_element(name, attrs)
|
26
26
|
case name
|
27
|
-
when
|
28
|
-
@state =
|
29
|
-
when
|
30
|
-
@xfs << attrs if @state ==
|
31
|
-
when
|
32
|
-
@
|
27
|
+
when "cellXfs"
|
28
|
+
@state = "cellXfs"
|
29
|
+
when "xf"
|
30
|
+
@xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
|
31
|
+
when "numFmt"
|
32
|
+
@num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def end_element(name)
|
37
37
|
case name
|
38
|
-
when
|
39
|
-
@block.call(@xfs, @
|
40
|
-
when
|
38
|
+
when "styleSheet"
|
39
|
+
@block.call(@xfs, @num_fmts)
|
40
|
+
when "cellXfs"
|
41
41
|
@state = nil
|
42
42
|
end
|
43
43
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "zip"
|
4
4
|
|
5
5
|
module Xsv
|
6
6
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
@@ -10,18 +10,18 @@ module Xsv
|
|
10
10
|
# @return [Array<Sheet>]
|
11
11
|
attr_reader :sheets
|
12
12
|
|
13
|
-
attr_reader :shared_strings, :xfs, :
|
13
|
+
attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
|
14
14
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
17
17
|
def self.open(data, **kws)
|
18
18
|
@workbook = if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
new(Zip::File.open_buffer(data), **kws)
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a file?
|
21
|
+
new(Zip::File.open_buffer(data), **kws)
|
22
|
+
else # must be a filename
|
23
|
+
new(Zip::File.open(data), **kws)
|
24
|
+
end
|
25
25
|
|
26
26
|
if block_given?
|
27
27
|
begin
|
@@ -43,12 +43,13 @@ module Xsv
|
|
43
43
|
#
|
44
44
|
def initialize(zip, trim_empty_rows: false)
|
45
45
|
raise ArgumentError, "Passed argument is not an instance of Zip::File. Did you mean to use Workbook.open?" unless zip.is_a?(Zip::File)
|
46
|
+
raise Xsv::Error, "Zip::File is empty" if zip.size.zero?
|
46
47
|
|
47
48
|
@zip = zip
|
48
49
|
@trim_empty_rows = trim_empty_rows
|
49
50
|
|
50
51
|
@sheets = []
|
51
|
-
@xfs, @
|
52
|
+
@xfs, @num_fmts = fetch_styles
|
52
53
|
@sheet_ids = fetch_sheet_ids
|
53
54
|
@relationships = fetch_relationships
|
54
55
|
@shared_strings = fetch_shared_strings
|
@@ -67,7 +68,7 @@ module Xsv
|
|
67
68
|
@zip = nil
|
68
69
|
@sheets = nil
|
69
70
|
@xfs = nil
|
70
|
-
@
|
71
|
+
@num_fmts = nil
|
71
72
|
@relationships = nil
|
72
73
|
@shared_strings = nil
|
73
74
|
@sheet_ids = nil
|
@@ -82,10 +83,15 @@ module Xsv
|
|
82
83
|
@sheets.select { |s| s.name == name }
|
83
84
|
end
|
84
85
|
|
86
|
+
# Get number format for given style index
|
87
|
+
def get_num_fmt(style)
|
88
|
+
@num_fmts[@xfs[style][:numFmtId]]
|
89
|
+
end
|
90
|
+
|
85
91
|
private
|
86
92
|
|
87
93
|
def fetch_shared_strings
|
88
|
-
handle = @zip.glob(
|
94
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
89
95
|
return if handle.nil?
|
90
96
|
|
91
97
|
stream = handle.get_input_stream
|
@@ -95,7 +101,7 @@ module Xsv
|
|
95
101
|
end
|
96
102
|
|
97
103
|
def fetch_styles
|
98
|
-
stream = @zip.glob(
|
104
|
+
stream = @zip.glob("xl/styles.xml").first.get_input_stream
|
99
105
|
|
100
106
|
StylesHandler.get_styles(stream)
|
101
107
|
ensure
|
@@ -103,24 +109,24 @@ module Xsv
|
|
103
109
|
end
|
104
110
|
|
105
111
|
def fetch_sheets
|
106
|
-
@zip.glob(
|
112
|
+
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
107
113
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
108
114
|
end.map do |entry|
|
109
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
115
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
110
116
|
sheet_ids = @sheet_ids.detect { |i| i[:"r:id"] == rel[:Id] }
|
111
117
|
Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
112
118
|
end
|
113
119
|
end
|
114
120
|
|
115
121
|
def fetch_sheet_ids
|
116
|
-
stream = @zip.glob(
|
122
|
+
stream = @zip.glob("xl/workbook.xml").first.get_input_stream
|
117
123
|
SheetsIdsHandler.get_sheets_ids(stream)
|
118
124
|
ensure
|
119
125
|
stream.close
|
120
126
|
end
|
121
127
|
|
122
128
|
def fetch_relationships
|
123
|
-
stream = @zip.glob(
|
129
|
+
stream = @zip.glob("xl/_rels/workbook.xml.rels").first.get_input_stream
|
124
130
|
RelationshipsHandler.get_relations(stream)
|
125
131
|
ensure
|
126
132
|
stream.close
|
data/lib/xsv.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "date"
|
4
4
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
5
|
+
require "xsv/helpers"
|
6
|
+
require "xsv/sax_parser"
|
7
|
+
require "xsv/relationships_handler"
|
8
|
+
require "xsv/shared_strings_parser"
|
9
|
+
require "xsv/sheet"
|
10
|
+
require "xsv/sheet_bounds_handler"
|
11
|
+
require "xsv/sheet_rows_handler"
|
12
|
+
require "xsv/sheets_ids_handler"
|
13
|
+
require "xsv/styles_handler"
|
14
|
+
require "xsv/version"
|
15
|
+
require "xsv/workbook"
|
16
16
|
|
17
17
|
# XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
18
18
|
# (commonly known as Excel or .xlsx files). It strives to be minimal in the
|
data/xsv.gemspec
CHANGED
@@ -8,13 +8,13 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ["Martijn Storck"]
|
9
9
|
spec.email = ["martijn@storck.io"]
|
10
10
|
|
11
|
-
spec.summary = "A fast and
|
11
|
+
spec.summary = "A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't"
|
12
12
|
spec.description = <<-EOF
|
13
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
14
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
15
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
16
16
|
deals with minimal formatting and cannot create or modify documents.
|
17
|
-
|
17
|
+
EOF
|
18
18
|
spec.homepage = "https://github.com/martijn/xsv"
|
19
19
|
spec.license = "MIT"
|
20
20
|
|
@@ -43,4 +43,5 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.add_development_dependency "bundler", "< 3"
|
44
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.14.2"
|
46
|
+
spec.add_development_dependency "standard", "~> 1.6.0"
|
46
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -72,6 +72,20 @@ dependencies:
|
|
72
72
|
- - "~>"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: 5.14.2
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: standard
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.6.0
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 1.6.0
|
75
89
|
description: |2
|
76
90
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
77
91
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
@@ -83,13 +97,15 @@ executables: []
|
|
83
97
|
extensions: []
|
84
98
|
extra_rdoc_files: []
|
85
99
|
files:
|
100
|
+
- ".github/workflows/ruby.yml"
|
86
101
|
- ".gitignore"
|
87
|
-
- ".
|
102
|
+
- ".standard.yml"
|
88
103
|
- CHANGELOG.md
|
89
104
|
- Gemfile
|
90
105
|
- LICENSE.txt
|
91
106
|
- README.md
|
92
107
|
- Rakefile
|
108
|
+
- benchmark.rb
|
93
109
|
- bin/console
|
94
110
|
- bin/setup
|
95
111
|
- lib/xsv.rb
|
@@ -112,7 +128,7 @@ metadata:
|
|
112
128
|
homepage_uri: https://github.com/martijn/xsv
|
113
129
|
source_code_uri: https://github.com/martijn/xsv
|
114
130
|
changelog_uri: https://github.com/martijn/xsv/CHANGELOG.md
|
115
|
-
post_install_message:
|
131
|
+
post_install_message:
|
116
132
|
rdoc_options: []
|
117
133
|
require_paths:
|
118
134
|
- lib
|
@@ -127,8 +143,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
143
|
- !ruby/object:Gem::Version
|
128
144
|
version: '0'
|
129
145
|
requirements: []
|
130
|
-
rubygems_version: 3.
|
131
|
-
signing_key:
|
146
|
+
rubygems_version: 3.3.3
|
147
|
+
signing_key:
|
132
148
|
specification_version: 4
|
133
|
-
summary: A fast and
|
149
|
+
summary: A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't
|
134
150
|
test_files: []
|