xsv 0.3.10 → 0.3.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +1 -1
- data/README.md +17 -1
- data/Rakefile +0 -1
- data/lib/xsv.rb +3 -0
- data/lib/xsv/helpers.rb +2 -2
- data/lib/xsv/relationships_handler.rb +40 -0
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +10 -2
- data/lib/xsv/sheets_ids_handler.rb +56 -0
- data/lib/xsv/styles_handler.rb +2 -2
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +36 -6
- data/xsv.gemspec +13 -14
- metadata +5 -4
- data/test.sh +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84691bdf67411e4a1e2d28c23a04e6656285247820d65c3124382261b0aba27e
|
4
|
+
data.tar.gz: 2ae8332a7e5e857039b729abee1478f063663e29b4eb8b9f45a4ae3ac2632916
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 666a4b190de6a64d676fc5dd66d5d836255b7c5b8114f21f53ce57b9d09deaa1cfd4f644c31018a48619449d5962dd86b6bdd41355861462275601fc7486c386
|
7
|
+
data.tar.gz: 3c6c957f46e31dac95f7c77f533ab43c52db1733f1d309b7a6e6bdc7e503da72963a3fe4f80a8269d46db436977bbc37c0b425203c2d75c8a6ef1be889714eb3
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,29 @@
|
|
1
1
|
# Xsv Changelog
|
2
2
|
|
3
|
+
## 0.3.15 2020-06-02
|
4
|
+
|
5
|
+
- Fix issue with workbooks that don't contain shared strings (#15)
|
6
|
+
|
7
|
+
## 0.3.14 2020-05-22
|
8
|
+
|
9
|
+
- Allow opening workbooks from Tempfile and anything that responds to #read
|
10
|
+
|
11
|
+
- Preserve whitespace in text cells
|
12
|
+
|
13
|
+
## 0.3.13 2020-05-12
|
14
|
+
|
15
|
+
- Add Sheet#hidden?
|
16
|
+
|
17
|
+
- Clean up code; get rid of some deprecation warnings
|
18
|
+
|
19
|
+
## 0.3.12 - 2020-04-15
|
20
|
+
|
21
|
+
- Accessing worksheets by name (texpert)
|
22
|
+
|
23
|
+
## 0.3.11 - 2020-04-03
|
24
|
+
|
25
|
+
- Backward compatibility with Ruby 2.5 (texpert)
|
26
|
+
|
3
27
|
## 0.3.10 - 2020-03-19
|
4
28
|
|
5
29
|
- Relax version requirements for dependencies
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -81,7 +81,23 @@ columns with the same name!
|
|
81
81
|
`Xsv::Workbook.open` accepts a filename, or a IO or String containing a workbook.
|
82
82
|
|
83
83
|
`Xsv::Sheet` implements `Enumerable` so you can call methods like `#first`,
|
84
|
-
`#filter` and `#map` on it.
|
84
|
+
`#filter`/`#select` and `#map` on it.
|
85
|
+
|
86
|
+
The sheets could be accessed by index or by name:
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
x = Xsv::Workbook.open("sheet.xlsx")
|
90
|
+
|
91
|
+
sheet = x.sheets[0] # gets sheet by index
|
92
|
+
|
93
|
+
sheet = x.sheets_by_name('Name').first # gets sheet by name
|
94
|
+
```
|
95
|
+
|
96
|
+
To get all the workbook's sheets names:
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
sheet_names = x.sheets.map(&:name)
|
100
|
+
```
|
85
101
|
|
86
102
|
### Assumptions
|
87
103
|
|
data/Rakefile
CHANGED
data/lib/xsv.rb
CHANGED
@@ -3,10 +3,12 @@ require "date"
|
|
3
3
|
require "ox"
|
4
4
|
|
5
5
|
require "xsv/helpers"
|
6
|
+
require "xsv/relationships_handler"
|
6
7
|
require "xsv/shared_strings_parser"
|
7
8
|
require "xsv/sheet"
|
8
9
|
require "xsv/sheet_bounds_handler"
|
9
10
|
require "xsv/sheet_rows_handler"
|
11
|
+
require "xsv/sheets_ids_handler"
|
10
12
|
require "xsv/styles_handler"
|
11
13
|
require "xsv/version"
|
12
14
|
require "xsv/workbook"
|
@@ -17,6 +19,7 @@ require "xsv/workbook"
|
|
17
19
|
# deals with minimal formatting and cannot create or modify documents.
|
18
20
|
module Xsv
|
19
21
|
class Error < StandardError; end
|
22
|
+
|
20
23
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
21
24
|
# or misinterpreted .xlsx document
|
22
25
|
class AssertionFailed < StandardError; end
|
data/lib/xsv/helpers.rb
CHANGED
@@ -39,7 +39,7 @@ module Xsv
|
|
39
39
|
|
40
40
|
MINUTE = 60.freeze
|
41
41
|
HOUR = 3600.freeze
|
42
|
-
A_CODEPOINT =
|
42
|
+
A_CODEPOINT = "A".ord.freeze
|
43
43
|
# The epoch for all dates in OOXML Spreadsheet documents
|
44
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
45
45
|
|
@@ -53,7 +53,7 @@ module Xsv
|
|
53
53
|
|
54
54
|
# Return a Date for the given Excel date value
|
55
55
|
def parse_date(number)
|
56
|
-
|
56
|
+
EPOCH + number
|
57
57
|
end
|
58
58
|
|
59
59
|
# Return a time as a string for the given Excel time value
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Xsv
|
3
|
+
# RelationshipsHandler parses the "xl/_rels/workbook.xml.rels" file to get the existing relationships.
|
4
|
+
# This is used internally when opening a workbook.
|
5
|
+
class RelationshipsHandler < Ox::Sax
|
6
|
+
def self.get_relations(io)
|
7
|
+
relations = []
|
8
|
+
handler = new do |relation|
|
9
|
+
relations << relation
|
10
|
+
end
|
11
|
+
|
12
|
+
Ox.sax_parse(handler, io.read)
|
13
|
+
return relations
|
14
|
+
end
|
15
|
+
|
16
|
+
# Ox::Sax implementation
|
17
|
+
|
18
|
+
def initialize(&block)
|
19
|
+
@block = block
|
20
|
+
@relationship = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
def start_element(name)
|
24
|
+
@relationship = {} if name == :Relationship
|
25
|
+
end
|
26
|
+
|
27
|
+
def attr(name, value)
|
28
|
+
case name
|
29
|
+
when :Id, :Type, :Target
|
30
|
+
@relationship[name] = value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def end_element(name)
|
35
|
+
return unless name == :Relationship
|
36
|
+
|
37
|
+
@block.call(@relationship)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/xsv/sheet.rb
CHANGED
@@ -17,7 +17,7 @@ module Xsv
|
|
17
17
|
|
18
18
|
# Returns the current mode. Call {#parse_headers!} to switch to `:hash` mode
|
19
19
|
# @return [Symbol] `:hash` or `:array`
|
20
|
-
attr_reader :mode
|
20
|
+
attr_reader :id, :mode, :name
|
21
21
|
|
22
22
|
# Set a number of rows to skip at the top of the sheet (header row offset).
|
23
23
|
# For hash mode, do not skip the header row as this will be automatically
|
@@ -30,13 +30,16 @@ module Xsv
|
|
30
30
|
# @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
|
31
31
|
# @param io [IO] A handle to an open worksheet XML file
|
32
32
|
# @param size [Number] size of the XML file
|
33
|
-
def initialize(workbook, io, size)
|
33
|
+
def initialize(workbook, io, size, ids)
|
34
34
|
@workbook = workbook
|
35
|
+
@id = ids[:sheetId].to_i
|
35
36
|
@io = io
|
37
|
+
@name = ids[:name]
|
36
38
|
@size = size
|
37
39
|
@headers = []
|
38
40
|
@mode = :array
|
39
41
|
@row_skip = 0
|
42
|
+
@hidden = ids[:state] == "hidden"
|
40
43
|
|
41
44
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
42
45
|
end
|
@@ -46,6 +49,11 @@ module Xsv
|
|
46
49
|
"#<#{self.class.name}:#{self.object_id}>"
|
47
50
|
end
|
48
51
|
|
52
|
+
# Returns true if the worksheet is hidden
|
53
|
+
def hidden?
|
54
|
+
@hidden
|
55
|
+
end
|
56
|
+
|
49
57
|
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
50
58
|
def each_row(&block)
|
51
59
|
@io.rewind
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Xsv
|
3
|
+
# SheetsIdsHandler interprets the relevant parts of workbook.xml
|
4
|
+
# This is used internally to get the sheets ids, relationship_ids, and names when opening a workbook.
|
5
|
+
class SheetsIdsHandler < Ox::Sax
|
6
|
+
def self.get_sheets_ids(io)
|
7
|
+
sheets_ids = []
|
8
|
+
handler = new do |sheet_ids|
|
9
|
+
sheets_ids << sheet_ids
|
10
|
+
end
|
11
|
+
|
12
|
+
Ox.sax_parse(handler, io.read)
|
13
|
+
return sheets_ids
|
14
|
+
end
|
15
|
+
|
16
|
+
# Ox::Sax implementation
|
17
|
+
|
18
|
+
def initialize(&block)
|
19
|
+
@block = block
|
20
|
+
@parsing = false
|
21
|
+
end
|
22
|
+
|
23
|
+
def start_element(name)
|
24
|
+
if name == :sheets
|
25
|
+
@parsing = true
|
26
|
+
return
|
27
|
+
end
|
28
|
+
|
29
|
+
return unless name == :sheet
|
30
|
+
|
31
|
+
@sheet_ids = {}
|
32
|
+
end
|
33
|
+
|
34
|
+
def attr(name, value)
|
35
|
+
return unless @parsing
|
36
|
+
|
37
|
+
case name
|
38
|
+
when :name, :sheetId, :state
|
39
|
+
@sheet_ids[name] = value
|
40
|
+
when :'r:id'
|
41
|
+
@sheet_ids[:r_id] = value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def end_element(name)
|
46
|
+
if name == :sheets
|
47
|
+
@parsing = false
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
return unless name == :sheet
|
52
|
+
|
53
|
+
@block.call(@sheet_ids)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require
|
2
|
+
require "zip"
|
3
3
|
|
4
4
|
module Xsv
|
5
5
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
@@ -15,11 +15,11 @@ module Xsv
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
17
17
|
def self.open(data, **kws)
|
18
|
-
if data.is_a?(IO)
|
18
|
+
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
19
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
20
|
-
elsif data.start_with?("PK\x03\x04")
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
|
21
21
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
22
|
-
else
|
22
|
+
else # must be a filename
|
23
23
|
@workbook = self.new(Zip::File.open(data), **kws)
|
24
24
|
end
|
25
25
|
end
|
@@ -41,6 +41,8 @@ module Xsv
|
|
41
41
|
|
42
42
|
fetch_shared_strings
|
43
43
|
fetch_styles
|
44
|
+
fetch_sheets_ids
|
45
|
+
fetch_relationships
|
44
46
|
fetch_sheets
|
45
47
|
end
|
46
48
|
|
@@ -56,15 +58,27 @@ module Xsv
|
|
56
58
|
@sheets = nil
|
57
59
|
@xfs = nil
|
58
60
|
@numFmts = nil
|
61
|
+
@relationships = nil
|
59
62
|
@shared_strings = nil
|
63
|
+
@sheets_ids = nil
|
60
64
|
|
61
65
|
true
|
62
66
|
end
|
63
67
|
|
68
|
+
# Returns an array of sheets for the case of same name sheets.
|
69
|
+
# @param [String] name
|
70
|
+
# @return [Array<Xsv::Sheet>]
|
71
|
+
def sheets_by_name(name)
|
72
|
+
@sheets.select { |s| s.name == name }
|
73
|
+
end
|
74
|
+
|
64
75
|
private
|
65
76
|
|
66
77
|
def fetch_shared_strings
|
67
|
-
|
78
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
79
|
+
return if handle.nil?
|
80
|
+
|
81
|
+
stream = handle.get_input_stream
|
68
82
|
@shared_strings = SharedStringsParser.parse(stream)
|
69
83
|
|
70
84
|
stream.close
|
@@ -80,8 +94,24 @@ module Xsv
|
|
80
94
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
81
95
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
82
96
|
end.each do |entry|
|
83
|
-
|
97
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
98
|
+
sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
|
99
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
84
100
|
end
|
85
101
|
end
|
102
|
+
|
103
|
+
def fetch_sheets_ids
|
104
|
+
stream = @zip.glob("xl/workbook.xml").first.get_input_stream
|
105
|
+
@sheets_ids = SheetsIdsHandler.get_sheets_ids(stream)
|
106
|
+
|
107
|
+
stream.close
|
108
|
+
end
|
109
|
+
|
110
|
+
def fetch_relationships
|
111
|
+
stream = @zip.glob("xl/_rels/workbook.xml.rels").first.get_input_stream
|
112
|
+
@relationships = RelationshipsHandler.get_relations(stream)
|
113
|
+
|
114
|
+
stream.close
|
115
|
+
end
|
86
116
|
end
|
87
117
|
end
|
data/xsv.gemspec
CHANGED
@@ -1,23 +1,22 @@
|
|
1
|
-
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "xsv/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
6
|
+
spec.name = "xsv"
|
7
|
+
spec.version = Xsv::VERSION
|
8
|
+
spec.authors = ["Martijn Storck"]
|
9
|
+
spec.email = ["martijn@storck.io"]
|
11
10
|
|
12
|
-
spec.summary
|
13
|
-
spec.description
|
11
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
12
|
+
spec.description = <<-EOF
|
14
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
15
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
16
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
17
16
|
deals with minimal formatting and cannot create or modify documents.
|
18
17
|
EOF
|
19
|
-
spec.homepage
|
20
|
-
spec.license
|
18
|
+
spec.homepage = "https://github.com/martijn/xsv"
|
19
|
+
spec.license = "MIT"
|
21
20
|
|
22
21
|
if spec.respond_to?(:metadata)
|
23
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
|
|
25
24
|
spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
|
26
25
|
else
|
27
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
28
|
-
|
27
|
+
"public gem pushes."
|
29
28
|
end
|
30
29
|
|
31
30
|
# Specify which files should be added to the gem when it is released.
|
32
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
33
|
-
spec.files
|
32
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
34
33
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
35
34
|
end
|
36
|
-
spec.bindir
|
37
|
-
spec.executables
|
35
|
+
spec.bindir = "exe"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
38
37
|
spec.require_paths = ["lib"]
|
39
38
|
|
40
|
-
spec.required_ruby_version =
|
39
|
+
spec.required_ruby_version = "~> 2.5"
|
41
40
|
|
42
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
43
42
|
spec.add_dependency "ox", ">= 2.9"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -108,14 +108,15 @@ files:
|
|
108
108
|
- bin/setup
|
109
109
|
- lib/xsv.rb
|
110
110
|
- lib/xsv/helpers.rb
|
111
|
+
- lib/xsv/relationships_handler.rb
|
111
112
|
- lib/xsv/shared_strings_parser.rb
|
112
113
|
- lib/xsv/sheet.rb
|
113
114
|
- lib/xsv/sheet_bounds_handler.rb
|
114
115
|
- lib/xsv/sheet_rows_handler.rb
|
116
|
+
- lib/xsv/sheets_ids_handler.rb
|
115
117
|
- lib/xsv/styles_handler.rb
|
116
118
|
- lib/xsv/version.rb
|
117
119
|
- lib/xsv/workbook.rb
|
118
|
-
- test.sh
|
119
120
|
- xsv.gemspec
|
120
121
|
homepage: https://github.com/martijn/xsv
|
121
122
|
licenses:
|
@@ -132,7 +133,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
132
133
|
requirements:
|
133
134
|
- - "~>"
|
134
135
|
- !ruby/object:Gem::Version
|
135
|
-
version: '2.
|
136
|
+
version: '2.5'
|
136
137
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
138
|
requirements:
|
138
139
|
- - ">="
|
data/test.sh
DELETED