xsv 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +2 -2
- data/Rakefile +9 -2
- data/lib/xsv/helpers.rb +11 -4
- data/lib/xsv/shared_strings_parser.rb +2 -0
- data/lib/xsv/sheet.rb +27 -3
- data/lib/xsv/sheet_bounds_handler.rb +3 -1
- data/lib/xsv/sheet_rows_handler.rb +4 -0
- data/lib/xsv/styles_handler.rb +1 -0
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +24 -3
- data/lib/xsv.rb +4 -0
- data/xsv.gemspec +8 -2
- metadata +11 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 14550b4494b57e9363e5b189222e242177e9c1328377d5458bb475231a6726b7
|
|
4
|
+
data.tar.gz: bddb0f2b94dcd67563f8d1b5525c6dffae2f3718fd08d4800e9e09ac21c57d59
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 38a39e5b4484214a1a530ddb5f5add47d56c3cff4df1b20e519bc4712e693c1ab6cca6270779f3dd24c7e019835f7b98daccc16f42fc5f905a66fe02623323c0
|
|
7
|
+
data.tar.gz: b60b28263fcf0352f85b254a61866463dca7ac7e077314ff1e3584ccfd6b90ccd03d7edf8e33f3f63441db17fa15f2ed7139cb4f55ae7c912a555ea5ff008e39
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
|
@@ -10,7 +10,7 @@ GEM
|
|
|
10
10
|
specs:
|
|
11
11
|
minitest (5.14.0)
|
|
12
12
|
ox (2.13.2)
|
|
13
|
-
rake (
|
|
13
|
+
rake (13.0.1)
|
|
14
14
|
rubyzip (2.2.0)
|
|
15
15
|
|
|
16
16
|
PLATFORMS
|
|
@@ -19,7 +19,7 @@ PLATFORMS
|
|
|
19
19
|
DEPENDENCIES
|
|
20
20
|
bundler (~> 1.17)
|
|
21
21
|
minitest (~> 5.0)
|
|
22
|
-
rake (~>
|
|
22
|
+
rake (~> 13.0)
|
|
23
23
|
xsv!
|
|
24
24
|
|
|
25
25
|
BUNDLED WITH
|
data/Rakefile
CHANGED
|
@@ -2,9 +2,16 @@ require "bundler/gem_tasks"
|
|
|
2
2
|
require "rake/testtask"
|
|
3
3
|
|
|
4
4
|
Rake::TestTask.new(:test) do |t|
|
|
5
|
-
t.libs << "test"
|
|
6
5
|
t.libs << "lib"
|
|
6
|
+
t.libs << "test"
|
|
7
7
|
t.test_files = FileList["test/**/*_test.rb"]
|
|
8
8
|
end
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Rake::TestTask.new(:bench) do |t|
|
|
11
|
+
t.libs << "lib"
|
|
12
|
+
t.libs << "test"
|
|
13
|
+
t.test_files = FileList["test/**/*_benchmark.rb"]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
task :default => [:test, :bench]
|
|
17
|
+
|
data/lib/xsv/helpers.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
module Xsv
|
|
3
3
|
module Helpers
|
|
4
|
+
# The default OOXML Spreadheet number formats according to the ECMA standard
|
|
5
|
+
# User formats are appended from index 174 onward
|
|
4
6
|
BUILT_IN_NUMBER_FORMATS = {
|
|
5
7
|
1 => "0",
|
|
6
8
|
2 => "0.00",
|
|
@@ -38,13 +40,15 @@ module Xsv
|
|
|
38
40
|
MINUTE = 60.freeze
|
|
39
41
|
HOUR = 3600.freeze
|
|
40
42
|
A_CODEPOINT = 'A'.ord.freeze
|
|
43
|
+
# The epoch for all dates in OOXML Spreadsheet documents
|
|
41
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
|
42
45
|
|
|
43
|
-
# Return the index number for the given Excel column name
|
|
46
|
+
# Return the index number for the given Excel column name (i.e. "A1" => 0)
|
|
44
47
|
def column_index(col)
|
|
45
|
-
col
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
col.each_codepoint.reduce(0) do |sum, n|
|
|
49
|
+
break sum - 1 if n < A_CODEPOINT # reached a number
|
|
50
|
+
sum * 26 + (n - A_CODEPOINT + 1)
|
|
51
|
+
end
|
|
48
52
|
end
|
|
49
53
|
|
|
50
54
|
# Return a Date for the given Excel date value
|
|
@@ -73,6 +77,7 @@ module Xsv
|
|
|
73
77
|
"%02d:%02d" % [hours, minutes]
|
|
74
78
|
end
|
|
75
79
|
|
|
80
|
+
# Returns a time including a date as a {Time} object
|
|
76
81
|
def parse_datetime(number)
|
|
77
82
|
date_base = number.truncate
|
|
78
83
|
time = parse_date(date_base).to_time
|
|
@@ -85,6 +90,7 @@ module Xsv
|
|
|
85
90
|
time + hours * HOUR + minutes.round * MINUTE
|
|
86
91
|
end
|
|
87
92
|
|
|
93
|
+
# Returns a number as either Integer or Float
|
|
88
94
|
def parse_number(string)
|
|
89
95
|
if string.include? "."
|
|
90
96
|
string.to_f
|
|
@@ -93,6 +99,7 @@ module Xsv
|
|
|
93
99
|
end
|
|
94
100
|
end
|
|
95
101
|
|
|
102
|
+
# Apply date or time number formats, if applicable
|
|
96
103
|
def parse_number_format(number, format)
|
|
97
104
|
number = parse_number(number) if number.is_a?(String)
|
|
98
105
|
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -1,14 +1,34 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
module Xsv
|
|
3
|
+
# Sheet represents a single worksheet from a workbook and is normally accessed through {Workbook#sheets}
|
|
4
|
+
#
|
|
5
|
+
# Xsv is designed for worksheets with a single table of data, optionally with a header row. Because sheet implements
|
|
6
|
+
# {Enumerable} the rows in the worksheet can be iterated over using methods such as `#each` and `#map`
|
|
7
|
+
#
|
|
8
|
+
# By default Sheet will return rows as arrays. But by calling the {#parse_headers!} method the first row of the sheet
|
|
9
|
+
# will be parsed and Sheet will switch to hash mode, returning each row as a hash with the values from the first
|
|
10
|
+
# row as keys.
|
|
11
|
+
#
|
|
12
|
+
# If the sheet contains leading data before the first row of data or the header row, this can be skipped by setting the
|
|
13
|
+
# {row_skip} attribute.
|
|
3
14
|
class Sheet
|
|
4
15
|
include Enumerable
|
|
5
16
|
include Xsv::Helpers
|
|
6
17
|
|
|
18
|
+
# Returns the current mode. Call {#parse_headers!} to switch to `:hash` mode
|
|
19
|
+
# @return [Symbol] `:hash` or `:array`
|
|
7
20
|
attr_reader :mode
|
|
8
21
|
|
|
9
|
-
# Set a number of rows to skip at the top of the sheet (header row offset)
|
|
22
|
+
# Set a number of rows to skip at the top of the sheet (header row offset).
|
|
23
|
+
# For hash mode, do not skip the header row as this will be automatically
|
|
24
|
+
# skipped.
|
|
10
25
|
attr_accessor :row_skip
|
|
11
26
|
|
|
27
|
+
# Create a new instance of Sheet. This is used internally by the {Workbook}.
|
|
28
|
+
# There is no need to create Sheets from application code.
|
|
29
|
+
#
|
|
30
|
+
# @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
|
|
31
|
+
# @param io [IO] A handle to an open worksheet XML file
|
|
12
32
|
def initialize(workbook, io)
|
|
13
33
|
@workbook = workbook
|
|
14
34
|
@io = io
|
|
@@ -19,11 +39,12 @@ module Xsv
|
|
|
19
39
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
|
20
40
|
end
|
|
21
41
|
|
|
42
|
+
# @return [String]
|
|
22
43
|
def inspect
|
|
23
44
|
"#<#{self.class.name}:#{self.object_id}>"
|
|
24
45
|
end
|
|
25
46
|
|
|
26
|
-
# Iterate over rows
|
|
47
|
+
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
|
27
48
|
def each_row
|
|
28
49
|
@io.rewind
|
|
29
50
|
|
|
@@ -38,7 +59,8 @@ module Xsv
|
|
|
38
59
|
|
|
39
60
|
alias each each_row
|
|
40
61
|
|
|
41
|
-
# Get row by number, starting at 0
|
|
62
|
+
# Get row by number, starting at 0. Returns either a hash or an array based on the current row.
|
|
63
|
+
# If the specified index is out of bounds an empty row is returned.
|
|
42
64
|
def [](number)
|
|
43
65
|
each_with_index do |row, i|
|
|
44
66
|
return row if i == number
|
|
@@ -49,6 +71,7 @@ module Xsv
|
|
|
49
71
|
|
|
50
72
|
# Load headers in the top row of the worksheet. After parsing of headers
|
|
51
73
|
# all methods return hashes instead of arrays
|
|
74
|
+
# @return [true]
|
|
52
75
|
def parse_headers!
|
|
53
76
|
@headers = parse_headers
|
|
54
77
|
@mode = :hash
|
|
@@ -56,6 +79,7 @@ module Xsv
|
|
|
56
79
|
true
|
|
57
80
|
end
|
|
58
81
|
|
|
82
|
+
# Return the headers of the sheet as an array
|
|
59
83
|
def headers
|
|
60
84
|
if @headers.any?
|
|
61
85
|
@headers
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
module Xsv
|
|
3
|
-
# SheetBoundsHandler scans a sheet looking for the outer bounds of the content within
|
|
3
|
+
# SheetBoundsHandler scans a sheet looking for the outer bounds of the content within.
|
|
4
|
+
# This is used internally when opening a sheet to deal with worksheets that do not
|
|
5
|
+
# have a correct dimension tag.
|
|
4
6
|
class SheetBoundsHandler < Ox::Sax
|
|
5
7
|
include Xsv::Helpers
|
|
6
8
|
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
module Xsv
|
|
3
|
+
# This is the core worksheet parser, implemented as an Ox::Sax handler. This is
|
|
4
|
+
# used internally to enumerate rows.
|
|
3
5
|
class SheetRowsHandler < Ox::Sax
|
|
4
6
|
include Xsv::Helpers
|
|
5
7
|
|
|
6
8
|
def format_cell
|
|
9
|
+
return nil if @current_value.empty?
|
|
10
|
+
|
|
7
11
|
case @current_cell[:t]
|
|
8
12
|
when "s"
|
|
9
13
|
@workbook.shared_strings[@current_value.to_i]
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -2,11 +2,18 @@
|
|
|
2
2
|
require 'zip'
|
|
3
3
|
|
|
4
4
|
module Xsv
|
|
5
|
+
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
|
6
|
+
# multiple Sheets that are available in the array that's accessible through {#sheets}
|
|
5
7
|
class Workbook
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
# Access the Sheet objects contained in the workbook
|
|
10
|
+
# @return [Array<Sheet>]
|
|
11
|
+
attr_reader :sheets
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
attr_reader :shared_strings, :xfs, :numFmts, :trim_empty_rows
|
|
14
|
+
|
|
15
|
+
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
|
+
# options see {.initialize}
|
|
10
17
|
def self.open(data, **kws)
|
|
11
18
|
if data.is_a?(IO)
|
|
12
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
|
@@ -17,7 +24,8 @@ module Xsv
|
|
|
17
24
|
end
|
|
18
25
|
end
|
|
19
26
|
|
|
20
|
-
# Open a workbook from an instance of Zip::File
|
|
27
|
+
# Open a workbook from an instance of {Zip::File}. Generally it's recommended
|
|
28
|
+
# to use the {.open} method instead of the constructor.
|
|
21
29
|
#
|
|
22
30
|
# Options:
|
|
23
31
|
#
|
|
@@ -36,10 +44,23 @@ module Xsv
|
|
|
36
44
|
fetch_sheets
|
|
37
45
|
end
|
|
38
46
|
|
|
47
|
+
# @return [String]
|
|
39
48
|
def inspect
|
|
40
49
|
"#<#{self.class.name}:#{self.object_id}>"
|
|
41
50
|
end
|
|
42
51
|
|
|
52
|
+
# Close the handle to the workbook file and leave all resources for the GC to collect
|
|
53
|
+
# @return [true]
|
|
54
|
+
def close
|
|
55
|
+
@zip.close
|
|
56
|
+
@sheets = nil
|
|
57
|
+
@xfs = nil
|
|
58
|
+
@numFmts = nil
|
|
59
|
+
@shared_strings = nil
|
|
60
|
+
|
|
61
|
+
true
|
|
62
|
+
end
|
|
63
|
+
|
|
43
64
|
private
|
|
44
65
|
|
|
45
66
|
def fetch_shared_strings
|
data/lib/xsv.rb
CHANGED
|
@@ -11,6 +11,10 @@ require "xsv/styles_handler"
|
|
|
11
11
|
require "xsv/version"
|
|
12
12
|
require "xsv/workbook"
|
|
13
13
|
|
|
14
|
+
# XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
15
|
+
# (commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
16
|
+
# sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
17
|
+
# deals with minimal formatting and cannot create or modify documents.
|
|
14
18
|
module Xsv
|
|
15
19
|
class Error < StandardError; end
|
|
16
20
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
data/xsv.gemspec
CHANGED
|
@@ -9,7 +9,13 @@ Gem::Specification.new do |spec|
|
|
|
9
9
|
spec.authors = ["Martijn Storck"]
|
|
10
10
|
spec.email = ["martijn@storck.io"]
|
|
11
11
|
|
|
12
|
-
spec.summary = "
|
|
12
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
|
13
|
+
spec.description = <<-EOF
|
|
14
|
+
XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
15
|
+
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
16
|
+
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
17
|
+
deals with minimal formatting and cannot create or modify documents.
|
|
18
|
+
EOF
|
|
13
19
|
spec.homepage = "https://github.com/martijn/xsv"
|
|
14
20
|
spec.license = "MIT"
|
|
15
21
|
|
|
@@ -37,6 +43,6 @@ Gem::Specification.new do |spec|
|
|
|
37
43
|
spec.add_dependency "ox", "~> 2.13"
|
|
38
44
|
|
|
39
45
|
spec.add_development_dependency "bundler", "~> 1.17"
|
|
40
|
-
spec.add_development_dependency "rake", "~>
|
|
46
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
41
47
|
spec.add_development_dependency "minitest", "~> 5.0"
|
|
42
48
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-02
|
|
11
|
+
date: 2020-03-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -58,14 +58,14 @@ dependencies:
|
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '
|
|
61
|
+
version: '13.0'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '
|
|
68
|
+
version: '13.0'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: minitest
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -80,7 +80,11 @@ dependencies:
|
|
|
80
80
|
- - "~>"
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
82
|
version: '5.0'
|
|
83
|
-
description:
|
|
83
|
+
description: |2
|
|
84
|
+
XSV is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
85
|
+
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
86
|
+
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
87
|
+
deals with minimal formatting and cannot create or modify documents.
|
|
84
88
|
email:
|
|
85
89
|
- martijn@storck.io
|
|
86
90
|
executables: []
|
|
@@ -89,6 +93,7 @@ extra_rdoc_files: []
|
|
|
89
93
|
files:
|
|
90
94
|
- ".gitignore"
|
|
91
95
|
- ".travis.yml"
|
|
96
|
+
- CHANGELOG.md
|
|
92
97
|
- Gemfile
|
|
93
98
|
- Gemfile.lock
|
|
94
99
|
- LICENSE.txt
|
|
@@ -132,5 +137,5 @@ requirements: []
|
|
|
132
137
|
rubygems_version: 3.1.2
|
|
133
138
|
signing_key:
|
|
134
139
|
specification_version: 4
|
|
135
|
-
summary:
|
|
140
|
+
summary: A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't
|
|
136
141
|
test_files: []
|