xsv 0.3.12 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +1 -1
- data/README.md +8 -0
- data/Rakefile +0 -1
- data/lib/xsv.rb +2 -1
- data/lib/xsv/helpers.rb +4 -2
- data/lib/xsv/relationships_handler.rb +3 -2
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +7 -1
- data/lib/xsv/sheets_ids_handler.rb +13 -6
- data/lib/xsv/styles_handler.rb +2 -2
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -6
- data/xsv.gemspec +13 -14
- metadata +2 -3
- data/test.sh +0 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e04167014a265ca3a3551f185b1a37849cae430bda9fa5cd60f7a302643c3722
|
|
4
|
+
data.tar.gz: 9aced295617b61bb29b6d8c5d087722f73176dc555490fb231a0c5ba05472349
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fb97e738cffac1e980a1cfedbd2ed3def2f66d72f78384623be80ccf89cb838bb571a369f10277c066eeb1bf1fb10c19feb542cedc1cb4ffd6463550a459e81c
|
|
7
|
+
data.tar.gz: 20c758923b7ed3efd3b847ca74bbe6f5683a7f97f2facca0fedd1ca75011b8fab00caab3b44cd909357d9fed5b50808fecadb06b439b463988a5cdd59f88d374
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.17 2020-07-03
|
|
4
|
+
|
|
5
|
+
- Fix parsing of empty worksheets (#17)
|
|
6
|
+
|
|
7
|
+
## 0.3.16 2020-06-03
|
|
8
|
+
|
|
9
|
+
- Support complex numbers (#16)
|
|
10
|
+
|
|
11
|
+
## 0.3.15 2020-06-02
|
|
12
|
+
|
|
13
|
+
- Fix issue with workbooks that don't contain shared strings (#15)
|
|
14
|
+
|
|
15
|
+
## 0.3.14 2020-05-22
|
|
16
|
+
|
|
17
|
+
- Allow opening workbooks from Tempfile and anything that responds to #read
|
|
18
|
+
|
|
19
|
+
- Preserve whitespace in text cells
|
|
20
|
+
|
|
21
|
+
## 0.3.13 2020-05-12
|
|
22
|
+
|
|
23
|
+
- Add Sheet#hidden?
|
|
24
|
+
|
|
25
|
+
- Clean up code; get rid of some deprecation warnings
|
|
26
|
+
|
|
3
27
|
## 0.3.12 - 2020-04-15
|
|
4
28
|
|
|
5
29
|
- Accessing worksheets by name (texpert)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -123,6 +123,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
|
123
123
|
|
|
124
124
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
125
125
|
|
|
126
|
+
## Performance and Benchmarks
|
|
127
|
+
|
|
128
|
+
Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
|
|
129
|
+
[Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
|
|
130
|
+
|
|
131
|
+
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
|
|
132
|
+
Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
|
|
133
|
+
|
|
126
134
|
## Contributing
|
|
127
135
|
|
|
128
136
|
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
data/Rakefile
CHANGED
data/lib/xsv.rb
CHANGED
|
@@ -8,7 +8,7 @@ require "xsv/shared_strings_parser"
|
|
|
8
8
|
require "xsv/sheet"
|
|
9
9
|
require "xsv/sheet_bounds_handler"
|
|
10
10
|
require "xsv/sheet_rows_handler"
|
|
11
|
-
require
|
|
11
|
+
require "xsv/sheets_ids_handler"
|
|
12
12
|
require "xsv/styles_handler"
|
|
13
13
|
require "xsv/version"
|
|
14
14
|
require "xsv/workbook"
|
|
@@ -19,6 +19,7 @@ require "xsv/workbook"
|
|
|
19
19
|
# deals with minimal formatting and cannot create or modify documents.
|
|
20
20
|
module Xsv
|
|
21
21
|
class Error < StandardError; end
|
|
22
|
+
|
|
22
23
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
|
23
24
|
# or misinterpreted .xlsx document
|
|
24
25
|
class AssertionFailed < StandardError; end
|
data/lib/xsv/helpers.rb
CHANGED
|
@@ -39,7 +39,7 @@ module Xsv
|
|
|
39
39
|
|
|
40
40
|
MINUTE = 60.freeze
|
|
41
41
|
HOUR = 3600.freeze
|
|
42
|
-
A_CODEPOINT =
|
|
42
|
+
A_CODEPOINT = "A".ord.freeze
|
|
43
43
|
# The epoch for all dates in OOXML Spreadsheet documents
|
|
44
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
|
45
45
|
|
|
@@ -53,7 +53,7 @@ module Xsv
|
|
|
53
53
|
|
|
54
54
|
# Return a Date for the given Excel date value
|
|
55
55
|
def parse_date(number)
|
|
56
|
-
|
|
56
|
+
EPOCH + number
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
# Return a time as a string for the given Excel time value
|
|
@@ -94,6 +94,8 @@ module Xsv
|
|
|
94
94
|
def parse_number(string)
|
|
95
95
|
if string.include? "."
|
|
96
96
|
string.to_f
|
|
97
|
+
elsif string.include? "E"
|
|
98
|
+
Complex(string).to_f
|
|
97
99
|
else
|
|
98
100
|
string.to_i
|
|
99
101
|
end
|
|
@@ -17,6 +17,7 @@ module Xsv
|
|
|
17
17
|
|
|
18
18
|
def initialize(&block)
|
|
19
19
|
@block = block
|
|
20
|
+
@relationship = {}
|
|
20
21
|
end
|
|
21
22
|
|
|
22
23
|
def start_element(name)
|
|
@@ -25,8 +26,8 @@ module Xsv
|
|
|
25
26
|
|
|
26
27
|
def attr(name, value)
|
|
27
28
|
case name
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
when :Id, :Type, :Target
|
|
30
|
+
@relationship[name] = value
|
|
30
31
|
end
|
|
31
32
|
end
|
|
32
33
|
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -39,6 +39,7 @@ module Xsv
|
|
|
39
39
|
@headers = []
|
|
40
40
|
@mode = :array
|
|
41
41
|
@row_skip = 0
|
|
42
|
+
@hidden = ids[:state] == "hidden"
|
|
42
43
|
|
|
43
44
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
|
44
45
|
end
|
|
@@ -48,6 +49,11 @@ module Xsv
|
|
|
48
49
|
"#<#{self.class.name}:#{self.object_id}>"
|
|
49
50
|
end
|
|
50
51
|
|
|
52
|
+
# Returns true if the worksheet is hidden
|
|
53
|
+
def hidden?
|
|
54
|
+
@hidden
|
|
55
|
+
end
|
|
56
|
+
|
|
51
57
|
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
|
52
58
|
def each_row(&block)
|
|
53
59
|
@io.rewind
|
|
@@ -106,7 +112,7 @@ module Xsv
|
|
|
106
112
|
elsif @mode == :hash
|
|
107
113
|
@mode = :array
|
|
108
114
|
headers.tap { @mode = :hash }
|
|
109
|
-
end
|
|
115
|
+
end || []
|
|
110
116
|
end
|
|
111
117
|
|
|
112
118
|
def empty_row
|
|
@@ -17,10 +17,14 @@ module Xsv
|
|
|
17
17
|
|
|
18
18
|
def initialize(&block)
|
|
19
19
|
@block = block
|
|
20
|
+
@parsing = false
|
|
20
21
|
end
|
|
21
22
|
|
|
22
23
|
def start_element(name)
|
|
23
|
-
|
|
24
|
+
if name == :sheets
|
|
25
|
+
@parsing = true
|
|
26
|
+
return
|
|
27
|
+
end
|
|
24
28
|
|
|
25
29
|
return unless name == :sheet
|
|
26
30
|
|
|
@@ -31,15 +35,18 @@ module Xsv
|
|
|
31
35
|
return unless @parsing
|
|
32
36
|
|
|
33
37
|
case name
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
when :name, :sheetId, :state
|
|
39
|
+
@sheet_ids[name] = value
|
|
40
|
+
when :'r:id'
|
|
41
|
+
@sheet_ids[:r_id] = value
|
|
38
42
|
end
|
|
39
43
|
end
|
|
40
44
|
|
|
41
45
|
def end_element(name)
|
|
42
|
-
|
|
46
|
+
if name == :sheets
|
|
47
|
+
@parsing = false
|
|
48
|
+
return
|
|
49
|
+
end
|
|
43
50
|
|
|
44
51
|
return unless name == :sheet
|
|
45
52
|
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
require
|
|
2
|
+
require "zip"
|
|
3
3
|
|
|
4
4
|
module Xsv
|
|
5
5
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
|
@@ -15,11 +15,11 @@ module Xsv
|
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
16
|
# options see {.initialize}
|
|
17
17
|
def self.open(data, **kws)
|
|
18
|
-
if data.is_a?(IO)
|
|
18
|
+
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
|
19
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
|
20
|
-
elsif data.start_with?("PK\x03\x04")
|
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
|
|
21
21
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
|
22
|
-
else
|
|
22
|
+
else # must be a filename
|
|
23
23
|
@workbook = self.new(Zip::File.open(data), **kws)
|
|
24
24
|
end
|
|
25
25
|
end
|
|
@@ -75,7 +75,10 @@ module Xsv
|
|
|
75
75
|
private
|
|
76
76
|
|
|
77
77
|
def fetch_shared_strings
|
|
78
|
-
|
|
78
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
|
79
|
+
return if handle.nil?
|
|
80
|
+
|
|
81
|
+
stream = handle.get_input_stream
|
|
79
82
|
@shared_strings = SharedStringsParser.parse(stream)
|
|
80
83
|
|
|
81
84
|
stream.close
|
|
@@ -91,7 +94,7 @@ module Xsv
|
|
|
91
94
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
92
95
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
93
96
|
end.each do |entry|
|
|
94
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
|
97
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
|
95
98
|
sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
|
|
96
99
|
@sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
|
97
100
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -1,23 +1,22 @@
|
|
|
1
|
-
|
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
3
|
require "xsv/version"
|
|
5
4
|
|
|
6
5
|
Gem::Specification.new do |spec|
|
|
7
|
-
spec.name
|
|
8
|
-
spec.version
|
|
9
|
-
spec.authors
|
|
10
|
-
spec.email
|
|
6
|
+
spec.name = "xsv"
|
|
7
|
+
spec.version = Xsv::VERSION
|
|
8
|
+
spec.authors = ["Martijn Storck"]
|
|
9
|
+
spec.email = ["martijn@storck.io"]
|
|
11
10
|
|
|
12
|
-
spec.summary
|
|
13
|
-
spec.description
|
|
11
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
|
12
|
+
spec.description = <<-EOF
|
|
14
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
15
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
16
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
17
16
|
deals with minimal formatting and cannot create or modify documents.
|
|
18
17
|
EOF
|
|
19
|
-
spec.homepage
|
|
20
|
-
spec.license
|
|
18
|
+
spec.homepage = "https://github.com/martijn/xsv"
|
|
19
|
+
spec.license = "MIT"
|
|
21
20
|
|
|
22
21
|
if spec.respond_to?(:metadata)
|
|
23
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
|
|
|
25
24
|
spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
|
|
26
25
|
else
|
|
27
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
|
28
|
-
|
|
27
|
+
"public gem pushes."
|
|
29
28
|
end
|
|
30
29
|
|
|
31
30
|
# Specify which files should be added to the gem when it is released.
|
|
32
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
33
|
-
spec.files
|
|
32
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
|
34
33
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
35
34
|
end
|
|
36
|
-
spec.bindir
|
|
37
|
-
spec.executables
|
|
35
|
+
spec.bindir = "exe"
|
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
38
37
|
spec.require_paths = ["lib"]
|
|
39
38
|
|
|
40
|
-
spec.required_ruby_version =
|
|
39
|
+
spec.required_ruby_version = "~> 2.5"
|
|
41
40
|
|
|
42
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
|
43
42
|
spec.add_dependency "ox", ">= 2.9"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.17
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-07-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -117,7 +117,6 @@ files:
|
|
|
117
117
|
- lib/xsv/styles_handler.rb
|
|
118
118
|
- lib/xsv/version.rb
|
|
119
119
|
- lib/xsv/workbook.rb
|
|
120
|
-
- test.sh
|
|
121
120
|
- xsv.gemspec
|
|
122
121
|
homepage: https://github.com/martijn/xsv
|
|
123
122
|
licenses:
|
data/test.sh
DELETED