xsv 1.0.2 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +32 -0
- data/.standard.yml +1 -0
- data/CHANGELOG.md +16 -0
- data/README.md +1 -1
- data/Rakefile +1 -1
- data/benchmark.rb +51 -0
- data/lib/xsv/helpers.rb +37 -35
- data/lib/xsv/relationships_handler.rb +1 -1
- data/lib/xsv/sax_parser.rb +18 -15
- data/lib/xsv/shared_strings_parser.rb +8 -8
- data/lib/xsv/sheet.rb +2 -2
- data/lib/xsv/sheet_bounds_handler.rb +14 -14
- data/lib/xsv/sheet_rows_handler.rb +26 -35
- data/lib/xsv/sheets_ids_handler.rb +1 -1
- data/lib/xsv/styles_handler.rb +14 -14
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +22 -16
- data/lib/xsv.rb +12 -12
- data/xsv.gemspec +3 -2
- metadata +24 -8
- data/.travis.yml +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0755959285e8f4e588fc8f72f45c48904bc0b840c1abc7b250faf6bad978e7f0'
|
4
|
+
data.tar.gz: 482143461be2e72994e8d9758d1a971e87355acdd16cb027a5631956b7898927
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9a48303c59d254233e12994562a341854caffde500f78e5357edebfd16dca12cf7b9b39af6c3c9e1536491f1467456c0b8295bfebf4fddda0f315ab4fbe0875
|
7
|
+
data.tar.gz: db9fe14a1c829ca66d2d1daa59da9bab181c5b4ba17c89ebc0703369165310ad4390e8713effc329d0ad6ce9f932a822e368b7eec6ae1e3876b4ed27d4bc0969
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ main ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ main ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', 'jruby', 'truffleruby']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
uses: ruby/setup-ruby@v1
|
28
|
+
with:
|
29
|
+
ruby-version: ${{ matrix.ruby-version }}
|
30
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake
|
data/.standard.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby_version: 2.5.0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Xsv Changelog
|
2
2
|
|
3
|
+
## 1.0.6 2022-01-07
|
4
|
+
|
5
|
+
- Code cleanup, small performance improvements
|
6
|
+
|
7
|
+
## 1.0.5 2022-01-05
|
8
|
+
|
9
|
+
- Raise exception if given an empty buffer when opening workbook (thanks @kevin-j-m)
|
10
|
+
|
11
|
+
## 1.0.4 2021-07-05
|
12
|
+
|
13
|
+
- Support for custom date/time columns
|
14
|
+
|
15
|
+
## 1.0.3 2021-05-06
|
16
|
+
|
17
|
+
- Handle nil number formats correctly (regression in Xsv 1.0.2, #29)
|
18
|
+
|
3
19
|
## 1.0.2 2021-05-01
|
4
20
|
|
5
21
|
- Ignore phonetic shared string data (thanks @sinoue-1003)
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
[](https://rubydoc.info/github/martijn/xsv)
|
5
5
|
[](https://badge.fury.io/rb/xsv)
|
6
6
|
|
7
|
-
Xsv is a fast, lightweight, pure Ruby parser for Office Open XML spreadsheet files
|
7
|
+
Xsv is a fast, lightweight, pure Ruby parser for ISO/IEC 29500 Office Open XML spreadsheet files
|
8
8
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
9
9
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
10
10
|
deals with minimal formatting and cannot create or modify documents.
|
data/Rakefile
CHANGED
data/benchmark.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/inline'
|
4
|
+
|
5
|
+
gemfile do
|
6
|
+
source "https://rubygems.org"
|
7
|
+
|
8
|
+
gemspec
|
9
|
+
gem "benchmark-memory"
|
10
|
+
gem "benchmark-perf"
|
11
|
+
end
|
12
|
+
|
13
|
+
def bench_perf(sheet)
|
14
|
+
result = Benchmark::Perf.cpu(repeat: 5) do
|
15
|
+
sheet.each do |row|
|
16
|
+
row.each do |cell|
|
17
|
+
cell
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
puts "Performance benchmark: #{result.avg}s avg #{result.stdev}s stdev"
|
23
|
+
end
|
24
|
+
|
25
|
+
def bench_mem(sheet)
|
26
|
+
Benchmark.memory do |bm|
|
27
|
+
bm.report do
|
28
|
+
sheet.each do |row|
|
29
|
+
row.each do |cell|
|
30
|
+
cell
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
file = File.read("test/files/10k-sheet.xlsx")
|
38
|
+
|
39
|
+
workbook = Xsv::Workbook.open(file)
|
40
|
+
|
41
|
+
puts "--- ARRAY MODE ---"
|
42
|
+
|
43
|
+
bench_perf(workbook.sheets[0])
|
44
|
+
bench_mem(workbook.sheets[0])
|
45
|
+
|
46
|
+
puts "\n--- HASH MODE ---"
|
47
|
+
|
48
|
+
workbook.sheets[0].parse_headers!
|
49
|
+
|
50
|
+
bench_perf(workbook.sheets[0])
|
51
|
+
bench_mem(workbook.sheets[0])
|
data/lib/xsv/helpers.rb
CHANGED
@@ -5,42 +5,42 @@ module Xsv
|
|
5
5
|
# The default OOXML Spreadheet number formats according to the ECMA standard
|
6
6
|
# User formats are appended from index 174 onward
|
7
7
|
BUILT_IN_NUMBER_FORMATS = {
|
8
|
-
1 =>
|
9
|
-
2 =>
|
10
|
-
3 =>
|
11
|
-
4 =>
|
12
|
-
5 =>
|
13
|
-
6 =>
|
14
|
-
7 =>
|
15
|
-
8 =>
|
16
|
-
9 =>
|
17
|
-
10 =>
|
18
|
-
11 =>
|
19
|
-
12 =>
|
20
|
-
13 =>
|
21
|
-
14 =>
|
22
|
-
15 =>
|
23
|
-
16 =>
|
24
|
-
17 =>
|
25
|
-
18 =>
|
26
|
-
19 =>
|
27
|
-
20 =>
|
28
|
-
21 =>
|
29
|
-
22 =>
|
30
|
-
37 =>
|
31
|
-
38 =>
|
32
|
-
39 =>
|
33
|
-
40 =>
|
34
|
-
45 =>
|
35
|
-
46 =>
|
36
|
-
47 =>
|
37
|
-
48 =>
|
38
|
-
49 =>
|
8
|
+
1 => "0",
|
9
|
+
2 => "0.00",
|
10
|
+
3 => "#, ##0",
|
11
|
+
4 => "#, ##0.00",
|
12
|
+
5 => "$#, ##0_);($#, ##0)",
|
13
|
+
6 => "$#, ##0_);[Red]($#, ##0)",
|
14
|
+
7 => "$#, ##0.00_);($#, ##0.00)",
|
15
|
+
8 => "$#, ##0.00_);[Red]($#, ##0.00)",
|
16
|
+
9 => "0%",
|
17
|
+
10 => "0.00%",
|
18
|
+
11 => "0.00E+00",
|
19
|
+
12 => "# ?/?",
|
20
|
+
13 => "# ??/??",
|
21
|
+
14 => "m/d/yyyy",
|
22
|
+
15 => "d-mmm-yy",
|
23
|
+
16 => "d-mmm",
|
24
|
+
17 => "mmm-yy",
|
25
|
+
18 => "h:mm AM/PM",
|
26
|
+
19 => "h:mm:ss AM/PM",
|
27
|
+
20 => "h:mm",
|
28
|
+
21 => "h:mm:ss",
|
29
|
+
22 => "m/d/yyyy h:mm",
|
30
|
+
37 => "#, ##0_);(#, ##0)",
|
31
|
+
38 => "#, ##0_);[Red](#, ##0)",
|
32
|
+
39 => "#, ##0.00_);(#, ##0.00)",
|
33
|
+
40 => "#, ##0.00_);[Red](#, ##0.00)",
|
34
|
+
45 => "mm:ss",
|
35
|
+
46 => "[h]:mm:ss",
|
36
|
+
47 => "mm:ss.0",
|
37
|
+
48 => "##0.0E+0",
|
38
|
+
49 => "@"
|
39
39
|
}.freeze
|
40
40
|
|
41
41
|
MINUTE = 60
|
42
42
|
HOUR = 3600
|
43
|
-
A_CODEPOINT =
|
43
|
+
A_CODEPOINT = "A".ord.freeze
|
44
44
|
# The epoch for all dates in OOXML Spreadsheet documents
|
45
45
|
EPOCH = Date.new(1899, 12, 30).freeze
|
46
46
|
|
@@ -74,7 +74,7 @@ module Xsv
|
|
74
74
|
minutes = minutes % 60
|
75
75
|
end
|
76
76
|
|
77
|
-
format(
|
77
|
+
format("%02d:%02d", hours, minutes)
|
78
78
|
end
|
79
79
|
|
80
80
|
# Returns a time including a date as a {Time} object
|
@@ -92,9 +92,9 @@ module Xsv
|
|
92
92
|
|
93
93
|
# Returns a number as either Integer or Float
|
94
94
|
def parse_number(string)
|
95
|
-
if string.include?
|
95
|
+
if string.include? "."
|
96
96
|
string.to_f
|
97
|
-
elsif string.include?
|
97
|
+
elsif string.include? "E"
|
98
98
|
Complex(string).to_f
|
99
99
|
else
|
100
100
|
string.to_i
|
@@ -105,6 +105,8 @@ module Xsv
|
|
105
105
|
def parse_number_format(number, format)
|
106
106
|
number = parse_number(number) # number is always a string since it comes out of the Sax Parser
|
107
107
|
|
108
|
+
return number if format.nil?
|
109
|
+
|
108
110
|
is_date_format = format.scan(/[dmy]+/).length > 1
|
109
111
|
is_time_format = format.scan(/[hms]+/).length > 1
|
110
112
|
|
data/lib/xsv/sax_parser.rb
CHANGED
@@ -5,6 +5,9 @@ module Xsv
|
|
5
5
|
ATTR_REGEX = /((\S+)="(.*?)")/m
|
6
6
|
|
7
7
|
def parse(io)
|
8
|
+
responds_to_end_element = respond_to?(:end_element)
|
9
|
+
responds_to_characters = respond_to?(:characters)
|
10
|
+
|
8
11
|
state = :look_start
|
9
12
|
if io.is_a?(String)
|
10
13
|
pbuf = io.dup
|
@@ -29,16 +32,16 @@ module Xsv
|
|
29
32
|
end
|
30
33
|
|
31
34
|
if state == :look_start
|
32
|
-
if (o = pbuf.index(
|
33
|
-
chars = pbuf.slice!(0, o + 1).chop!.force_encoding(
|
35
|
+
if (o = pbuf.index("<"))
|
36
|
+
chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8")
|
34
37
|
|
35
|
-
if
|
36
|
-
if chars.index(
|
37
|
-
chars.gsub!(
|
38
|
-
chars.gsub!(
|
39
|
-
chars.gsub!(
|
40
|
-
chars.gsub!(
|
41
|
-
chars.gsub!(
|
38
|
+
if responds_to_characters && !chars.empty?
|
39
|
+
if chars.index("&")
|
40
|
+
chars.gsub!("&", "&")
|
41
|
+
chars.gsub!("'", "'")
|
42
|
+
chars.gsub!(">", ">")
|
43
|
+
chars.gsub!("<", "<")
|
44
|
+
chars.gsub!(""", '"')
|
42
45
|
end
|
43
46
|
characters(chars)
|
44
47
|
end
|
@@ -55,8 +58,8 @@ module Xsv
|
|
55
58
|
end
|
56
59
|
|
57
60
|
if state == :look_end
|
58
|
-
if (o = pbuf.index(
|
59
|
-
if (s = pbuf.index(
|
61
|
+
if (o = pbuf.index(">"))
|
62
|
+
if (s = pbuf.index(" ")) && s < o
|
60
63
|
tag_name = pbuf.slice!(0, s + 1).chop!
|
61
64
|
args = pbuf.slice!(0, o - s)
|
62
65
|
else
|
@@ -64,18 +67,18 @@ module Xsv
|
|
64
67
|
args = nil
|
65
68
|
end
|
66
69
|
|
67
|
-
if tag_name.start_with?(
|
68
|
-
end_element(tag_name[1..-1]) if
|
70
|
+
if tag_name.start_with?("/")
|
71
|
+
end_element(tag_name[1..-1]) if responds_to_end_element
|
69
72
|
elsif args.nil?
|
70
73
|
start_element(tag_name, nil)
|
71
74
|
else
|
72
75
|
start_element(tag_name, args.scan(ATTR_REGEX).each_with_object({}) { |m, h| h[m[1].to_sym] = m[2] })
|
73
|
-
end_element(tag_name) if args.end_with?(
|
76
|
+
end_element(tag_name) if responds_to_end_element && args.end_with?("/")
|
74
77
|
end
|
75
78
|
|
76
79
|
state = :look_start
|
77
80
|
elsif eof_reached
|
78
|
-
raise
|
81
|
+
raise "Malformed XML document, looking for end of tag beyond EOF"
|
79
82
|
else
|
80
83
|
must_read = true
|
81
84
|
end
|
@@ -18,29 +18,29 @@ module Xsv
|
|
18
18
|
|
19
19
|
def start_element(name, _attrs)
|
20
20
|
case name
|
21
|
-
when
|
22
|
-
@current_string =
|
21
|
+
when "si"
|
22
|
+
@current_string = ""
|
23
23
|
@skip = false
|
24
|
-
when
|
24
|
+
when "rPh"
|
25
25
|
@skip = true
|
26
|
-
when
|
26
|
+
when "t"
|
27
27
|
@state = name
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
31
|
def characters(value)
|
32
|
-
if @state ==
|
32
|
+
if @state == "t" && !@skip
|
33
33
|
@current_string += value
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
def end_element(name)
|
38
38
|
case name
|
39
|
-
when
|
39
|
+
when "si"
|
40
40
|
@block.call(@current_string)
|
41
|
-
when
|
41
|
+
when "rPh"
|
42
42
|
@skip = false
|
43
|
-
when
|
43
|
+
when "t"
|
44
44
|
@state = nil
|
45
45
|
end
|
46
46
|
end
|
data/lib/xsv/sheet.rb
CHANGED
@@ -40,7 +40,7 @@ module Xsv
|
|
40
40
|
@headers = []
|
41
41
|
@mode = :array
|
42
42
|
@row_skip = 0
|
43
|
-
@hidden = ids[:state] ==
|
43
|
+
@hidden = ids[:state] == "hidden"
|
44
44
|
|
45
45
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
46
46
|
end
|
@@ -66,7 +66,7 @@ module Xsv
|
|
66
66
|
true
|
67
67
|
end
|
68
68
|
|
69
|
-
|
69
|
+
alias_method :each, :each_row
|
70
70
|
|
71
71
|
# Get row by number, starting at 0. Returns either a hash or an array based on the current row.
|
72
72
|
# If the specified index is out of bounds an empty row is returned.
|
@@ -30,40 +30,40 @@ module Xsv
|
|
30
30
|
@state = nil
|
31
31
|
@cell = nil
|
32
32
|
@row = nil
|
33
|
-
@
|
34
|
-
@
|
33
|
+
@max_row = 0
|
34
|
+
@max_column = 0
|
35
35
|
@trim_empty_rows = trim_empty_rows
|
36
36
|
end
|
37
37
|
|
38
38
|
def start_element(name, attrs)
|
39
39
|
case name
|
40
|
-
when
|
40
|
+
when "c"
|
41
41
|
@state = name
|
42
42
|
@cell = attrs[:r]
|
43
|
-
when
|
43
|
+
when "v"
|
44
44
|
col = column_index(@cell)
|
45
|
-
@
|
46
|
-
@
|
47
|
-
when
|
45
|
+
@max_column = col if col > @max_column
|
46
|
+
@max_row = @row if @row > @max_row
|
47
|
+
when "row"
|
48
48
|
@state = name
|
49
49
|
@row = attrs[:r].to_i
|
50
|
-
when
|
50
|
+
when "dimension"
|
51
51
|
@state = name
|
52
52
|
|
53
|
-
|
53
|
+
_first_cell, last_cell = attrs[:ref].split(":")
|
54
54
|
|
55
|
-
if
|
56
|
-
@
|
55
|
+
if last_cell
|
56
|
+
@max_column = column_index(last_cell)
|
57
57
|
unless @trim_empty_rows
|
58
|
-
@
|
59
|
-
@block.call(@
|
58
|
+
@max_row = last_cell[/\d+$/].to_i
|
59
|
+
@block.call(@max_row, @max_column)
|
60
60
|
end
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
65
|
def end_element(name)
|
66
|
-
@block.call(@
|
66
|
+
@block.call(@max_row, @max_column) if name == "sheetData"
|
67
67
|
end
|
68
68
|
end
|
69
69
|
end
|
@@ -14,58 +14,50 @@ module Xsv
|
|
14
14
|
@last_row = last_row - @row_skip
|
15
15
|
@block = block
|
16
16
|
|
17
|
-
@
|
17
|
+
@store_characters = false
|
18
18
|
|
19
19
|
@row_index = 0
|
20
20
|
@current_row = {}
|
21
|
-
@
|
21
|
+
@current_row_number = 0
|
22
22
|
@current_cell = {}
|
23
|
-
@current_value =
|
23
|
+
@current_value = +""
|
24
24
|
|
25
25
|
@headers = @empty_row.keys if @mode == :hash
|
26
26
|
end
|
27
27
|
|
28
28
|
def start_element(name, attrs)
|
29
29
|
case name
|
30
|
-
when
|
31
|
-
@state = name
|
30
|
+
when "c"
|
32
31
|
@current_cell = attrs
|
33
32
|
@current_value.clear
|
34
|
-
when
|
35
|
-
@
|
36
|
-
when
|
37
|
-
@state = name
|
33
|
+
when "v", "is", "t"
|
34
|
+
@store_characters = true
|
35
|
+
when "row"
|
38
36
|
@current_row = @empty_row.dup
|
39
|
-
@
|
40
|
-
when 't'
|
41
|
-
@state = nil unless @state == 'is'
|
42
|
-
else
|
43
|
-
@state = nil
|
37
|
+
@current_row_number = attrs[:r].to_i
|
44
38
|
end
|
45
39
|
end
|
46
40
|
|
47
41
|
def characters(value)
|
48
|
-
@current_value << value if @
|
42
|
+
@current_value << value if @store_characters
|
49
43
|
end
|
50
44
|
|
51
45
|
def end_element(name)
|
52
46
|
case name
|
53
|
-
when
|
54
|
-
@
|
55
|
-
when
|
47
|
+
when "v", "is", "t"
|
48
|
+
@store_characters = false
|
49
|
+
when "c"
|
56
50
|
col_index = column_index(@current_cell[:r])
|
57
51
|
|
58
|
-
|
59
|
-
when :array
|
52
|
+
if @mode == :array
|
60
53
|
@current_row[col_index] = format_cell
|
61
|
-
|
54
|
+
else
|
62
55
|
@current_row[@headers[col_index]] = format_cell
|
63
56
|
end
|
64
|
-
when
|
65
|
-
|
66
|
-
adjusted_row_number = real_row_number - @row_skip
|
57
|
+
when "row"
|
58
|
+
return if @current_row_number <= @row_skip
|
67
59
|
|
68
|
-
|
60
|
+
adjusted_row_number = @current_row_number - @row_skip
|
69
61
|
|
70
62
|
@row_index += 1
|
71
63
|
|
@@ -90,23 +82,22 @@ module Xsv
|
|
90
82
|
return nil if @current_value.empty?
|
91
83
|
|
92
84
|
case @current_cell[:t]
|
93
|
-
when
|
85
|
+
when "s"
|
94
86
|
@workbook.shared_strings[@current_value.to_i]
|
95
|
-
when
|
87
|
+
when "str", "inlineStr"
|
96
88
|
@current_value.strip
|
97
|
-
when
|
89
|
+
when "e" # N/A
|
98
90
|
nil
|
99
|
-
when nil,
|
91
|
+
when nil, "n"
|
100
92
|
if @current_cell[:s]
|
101
|
-
|
102
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
103
|
-
|
104
|
-
parse_number_format(@current_value, numFmt)
|
93
|
+
parse_number_format(@current_value, @workbook.get_num_fmt(@current_cell[:s].to_i))
|
105
94
|
else
|
106
95
|
parse_number(@current_value)
|
107
96
|
end
|
108
|
-
when
|
109
|
-
@current_value ==
|
97
|
+
when "b"
|
98
|
+
@current_value == "1"
|
99
|
+
when "d"
|
100
|
+
DateTime.parse(@current_value)
|
110
101
|
else
|
111
102
|
raise Xsv::Error, "Encountered unknown column type #{@current_cell[:t]}"
|
112
103
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
@@ -5,39 +5,39 @@ module Xsv
|
|
5
5
|
# This is used internally when opening a sheet.
|
6
6
|
class StylesHandler < SaxParser
|
7
7
|
def self.get_styles(io)
|
8
|
-
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs,
|
8
|
+
handler = new(Xsv::Helpers::BUILT_IN_NUMBER_FORMATS.dup) do |xfs, num_fmts|
|
9
9
|
@xfs = xfs
|
10
|
-
@
|
10
|
+
@num_fmts = num_fmts
|
11
11
|
end
|
12
12
|
|
13
13
|
handler.parse(io)
|
14
14
|
|
15
|
-
[@xfs, @
|
15
|
+
[@xfs, @num_fmts]
|
16
16
|
end
|
17
17
|
|
18
|
-
def initialize(
|
18
|
+
def initialize(num_fmts, &block)
|
19
19
|
@block = block
|
20
20
|
@state = nil
|
21
21
|
@xfs = []
|
22
|
-
@
|
22
|
+
@num_fmts = num_fmts
|
23
23
|
end
|
24
24
|
|
25
25
|
def start_element(name, attrs)
|
26
26
|
case name
|
27
|
-
when
|
28
|
-
@state =
|
29
|
-
when
|
30
|
-
@xfs << attrs if @state ==
|
31
|
-
when
|
32
|
-
@
|
27
|
+
when "cellXfs"
|
28
|
+
@state = "cellXfs"
|
29
|
+
when "xf"
|
30
|
+
@xfs << attrs.transform_values(&:to_i) if @state == "cellXfs"
|
31
|
+
when "numFmt"
|
32
|
+
@num_fmts[attrs[:numFmtId].to_i] = attrs[:formatCode]
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def end_element(name)
|
37
37
|
case name
|
38
|
-
when
|
39
|
-
@block.call(@xfs, @
|
40
|
-
when
|
38
|
+
when "styleSheet"
|
39
|
+
@block.call(@xfs, @num_fmts)
|
40
|
+
when "cellXfs"
|
41
41
|
@state = nil
|
42
42
|
end
|
43
43
|
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "zip"
|
4
4
|
|
5
5
|
module Xsv
|
6
6
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
@@ -10,18 +10,18 @@ module Xsv
|
|
10
10
|
# @return [Array<Sheet>]
|
11
11
|
attr_reader :sheets
|
12
12
|
|
13
|
-
attr_reader :shared_strings, :xfs, :
|
13
|
+
attr_reader :shared_strings, :xfs, :num_fmts, :trim_empty_rows
|
14
14
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
17
17
|
def self.open(data, **kws)
|
18
18
|
@workbook = if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
new(Zip::File.open_buffer(data), **kws)
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a file?
|
21
|
+
new(Zip::File.open_buffer(data), **kws)
|
22
|
+
else # must be a filename
|
23
|
+
new(Zip::File.open(data), **kws)
|
24
|
+
end
|
25
25
|
|
26
26
|
if block_given?
|
27
27
|
begin
|
@@ -43,12 +43,13 @@ module Xsv
|
|
43
43
|
#
|
44
44
|
def initialize(zip, trim_empty_rows: false)
|
45
45
|
raise ArgumentError, "Passed argument is not an instance of Zip::File. Did you mean to use Workbook.open?" unless zip.is_a?(Zip::File)
|
46
|
+
raise Xsv::Error, "Zip::File is empty" if zip.size.zero?
|
46
47
|
|
47
48
|
@zip = zip
|
48
49
|
@trim_empty_rows = trim_empty_rows
|
49
50
|
|
50
51
|
@sheets = []
|
51
|
-
@xfs, @
|
52
|
+
@xfs, @num_fmts = fetch_styles
|
52
53
|
@sheet_ids = fetch_sheet_ids
|
53
54
|
@relationships = fetch_relationships
|
54
55
|
@shared_strings = fetch_shared_strings
|
@@ -67,7 +68,7 @@ module Xsv
|
|
67
68
|
@zip = nil
|
68
69
|
@sheets = nil
|
69
70
|
@xfs = nil
|
70
|
-
@
|
71
|
+
@num_fmts = nil
|
71
72
|
@relationships = nil
|
72
73
|
@shared_strings = nil
|
73
74
|
@sheet_ids = nil
|
@@ -82,10 +83,15 @@ module Xsv
|
|
82
83
|
@sheets.select { |s| s.name == name }
|
83
84
|
end
|
84
85
|
|
86
|
+
# Get number format for given style index
|
87
|
+
def get_num_fmt(style)
|
88
|
+
@num_fmts[@xfs[style][:numFmtId]]
|
89
|
+
end
|
90
|
+
|
85
91
|
private
|
86
92
|
|
87
93
|
def fetch_shared_strings
|
88
|
-
handle = @zip.glob(
|
94
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
89
95
|
return if handle.nil?
|
90
96
|
|
91
97
|
stream = handle.get_input_stream
|
@@ -95,7 +101,7 @@ module Xsv
|
|
95
101
|
end
|
96
102
|
|
97
103
|
def fetch_styles
|
98
|
-
stream = @zip.glob(
|
104
|
+
stream = @zip.glob("xl/styles.xml").first.get_input_stream
|
99
105
|
|
100
106
|
StylesHandler.get_styles(stream)
|
101
107
|
ensure
|
@@ -103,24 +109,24 @@ module Xsv
|
|
103
109
|
end
|
104
110
|
|
105
111
|
def fetch_sheets
|
106
|
-
@zip.glob(
|
112
|
+
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
107
113
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
108
114
|
end.map do |entry|
|
109
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
115
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
110
116
|
sheet_ids = @sheet_ids.detect { |i| i[:"r:id"] == rel[:Id] }
|
111
117
|
Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
112
118
|
end
|
113
119
|
end
|
114
120
|
|
115
121
|
def fetch_sheet_ids
|
116
|
-
stream = @zip.glob(
|
122
|
+
stream = @zip.glob("xl/workbook.xml").first.get_input_stream
|
117
123
|
SheetsIdsHandler.get_sheets_ids(stream)
|
118
124
|
ensure
|
119
125
|
stream.close
|
120
126
|
end
|
121
127
|
|
122
128
|
def fetch_relationships
|
123
|
-
stream = @zip.glob(
|
129
|
+
stream = @zip.glob("xl/_rels/workbook.xml.rels").first.get_input_stream
|
124
130
|
RelationshipsHandler.get_relations(stream)
|
125
131
|
ensure
|
126
132
|
stream.close
|
data/lib/xsv.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "date"
|
4
4
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
5
|
+
require "xsv/helpers"
|
6
|
+
require "xsv/sax_parser"
|
7
|
+
require "xsv/relationships_handler"
|
8
|
+
require "xsv/shared_strings_parser"
|
9
|
+
require "xsv/sheet"
|
10
|
+
require "xsv/sheet_bounds_handler"
|
11
|
+
require "xsv/sheet_rows_handler"
|
12
|
+
require "xsv/sheets_ids_handler"
|
13
|
+
require "xsv/styles_handler"
|
14
|
+
require "xsv/version"
|
15
|
+
require "xsv/workbook"
|
16
16
|
|
17
17
|
# XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
18
18
|
# (commonly known as Excel or .xlsx files). It strives to be minimal in the
|
data/xsv.gemspec
CHANGED
@@ -8,13 +8,13 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ["Martijn Storck"]
|
9
9
|
spec.email = ["martijn@storck.io"]
|
10
10
|
|
11
|
-
spec.summary = "A fast and
|
11
|
+
spec.summary = "A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't"
|
12
12
|
spec.description = <<-EOF
|
13
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
14
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
15
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
16
16
|
deals with minimal formatting and cannot create or modify documents.
|
17
|
-
|
17
|
+
EOF
|
18
18
|
spec.homepage = "https://github.com/martijn/xsv"
|
19
19
|
spec.license = "MIT"
|
20
20
|
|
@@ -43,4 +43,5 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.add_development_dependency "bundler", "< 3"
|
44
44
|
spec.add_development_dependency "rake", "~> 13.0"
|
45
45
|
spec.add_development_dependency "minitest", "~> 5.14.2"
|
46
|
+
spec.add_development_dependency "standard", "~> 1.6.0"
|
46
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -72,6 +72,20 @@ dependencies:
|
|
72
72
|
- - "~>"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: 5.14.2
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: standard
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.6.0
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 1.6.0
|
75
89
|
description: |2
|
76
90
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
77
91
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
@@ -83,13 +97,15 @@ executables: []
|
|
83
97
|
extensions: []
|
84
98
|
extra_rdoc_files: []
|
85
99
|
files:
|
100
|
+
- ".github/workflows/ruby.yml"
|
86
101
|
- ".gitignore"
|
87
|
-
- ".
|
102
|
+
- ".standard.yml"
|
88
103
|
- CHANGELOG.md
|
89
104
|
- Gemfile
|
90
105
|
- LICENSE.txt
|
91
106
|
- README.md
|
92
107
|
- Rakefile
|
108
|
+
- benchmark.rb
|
93
109
|
- bin/console
|
94
110
|
- bin/setup
|
95
111
|
- lib/xsv.rb
|
@@ -112,7 +128,7 @@ metadata:
|
|
112
128
|
homepage_uri: https://github.com/martijn/xsv
|
113
129
|
source_code_uri: https://github.com/martijn/xsv
|
114
130
|
changelog_uri: https://github.com/martijn/xsv/CHANGELOG.md
|
115
|
-
post_install_message:
|
131
|
+
post_install_message:
|
116
132
|
rdoc_options: []
|
117
133
|
require_paths:
|
118
134
|
- lib
|
@@ -127,8 +143,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
143
|
- !ruby/object:Gem::Version
|
128
144
|
version: '0'
|
129
145
|
requirements: []
|
130
|
-
rubygems_version: 3.
|
131
|
-
signing_key:
|
146
|
+
rubygems_version: 3.3.3
|
147
|
+
signing_key:
|
132
148
|
specification_version: 4
|
133
|
-
summary: A fast and
|
149
|
+
summary: A fast and lightweight xlsx parser that provides nothing a CSV parser wouldn't
|
134
150
|
test_files: []
|