xsv 0.3.12 → 0.3.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +1 -1
- data/README.md +8 -0
- data/Rakefile +0 -1
- data/lib/xsv.rb +2 -1
- data/lib/xsv/helpers.rb +4 -2
- data/lib/xsv/relationships_handler.rb +3 -2
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +7 -1
- data/lib/xsv/sheets_ids_handler.rb +13 -6
- data/lib/xsv/styles_handler.rb +2 -2
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -6
- data/xsv.gemspec +13 -14
- metadata +2 -3
- data/test.sh +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e04167014a265ca3a3551f185b1a37849cae430bda9fa5cd60f7a302643c3722
|
4
|
+
data.tar.gz: 9aced295617b61bb29b6d8c5d087722f73176dc555490fb231a0c5ba05472349
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb97e738cffac1e980a1cfedbd2ed3def2f66d72f78384623be80ccf89cb838bb571a369f10277c066eeb1bf1fb10c19feb542cedc1cb4ffd6463550a459e81c
|
7
|
+
data.tar.gz: 20c758923b7ed3efd3b847ca74bbe6f5683a7f97f2facca0fedd1ca75011b8fab00caab3b44cd909357d9fed5b50808fecadb06b439b463988a5cdd59f88d374
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,29 @@
|
|
1
1
|
# Xsv Changelog
|
2
2
|
|
3
|
+
## 0.3.17 2020-07-03
|
4
|
+
|
5
|
+
- Fix parsing of empty worksheets (#17)
|
6
|
+
|
7
|
+
## 0.3.16 2020-06-03
|
8
|
+
|
9
|
+
- Support complex numbers (#16)
|
10
|
+
|
11
|
+
## 0.3.15 2020-06-02
|
12
|
+
|
13
|
+
- Fix issue with workbooks that don't contain shared strings (#15)
|
14
|
+
|
15
|
+
## 0.3.14 2020-05-22
|
16
|
+
|
17
|
+
- Allow opening workbooks from Tempfile and anything that responds to #read
|
18
|
+
|
19
|
+
- Preserve whitespace in text cells
|
20
|
+
|
21
|
+
## 0.3.13 2020-05-12
|
22
|
+
|
23
|
+
- Add Sheet#hidden?
|
24
|
+
|
25
|
+
- Clean up code; get rid of some deprecation warnings
|
26
|
+
|
3
27
|
## 0.3.12 - 2020-04-15
|
4
28
|
|
5
29
|
- Accessing worksheets by name (texpert)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -123,6 +123,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
123
123
|
|
124
124
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
125
125
|
|
126
|
+
## Performance and Benchmarks
|
127
|
+
|
128
|
+
Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
|
129
|
+
[Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
|
130
|
+
|
131
|
+
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
|
132
|
+
Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
|
133
|
+
|
126
134
|
## Contributing
|
127
135
|
|
128
136
|
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
data/Rakefile
CHANGED
data/lib/xsv.rb
CHANGED
@@ -8,7 +8,7 @@ require "xsv/shared_strings_parser"
|
|
8
8
|
require "xsv/sheet"
|
9
9
|
require "xsv/sheet_bounds_handler"
|
10
10
|
require "xsv/sheet_rows_handler"
|
11
|
-
require
|
11
|
+
require "xsv/sheets_ids_handler"
|
12
12
|
require "xsv/styles_handler"
|
13
13
|
require "xsv/version"
|
14
14
|
require "xsv/workbook"
|
@@ -19,6 +19,7 @@ require "xsv/workbook"
|
|
19
19
|
# deals with minimal formatting and cannot create or modify documents.
|
20
20
|
module Xsv
|
21
21
|
class Error < StandardError; end
|
22
|
+
|
22
23
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
23
24
|
# or misinterpreted .xlsx document
|
24
25
|
class AssertionFailed < StandardError; end
|
data/lib/xsv/helpers.rb
CHANGED
@@ -39,7 +39,7 @@ module Xsv
|
|
39
39
|
|
40
40
|
MINUTE = 60.freeze
|
41
41
|
HOUR = 3600.freeze
|
42
|
-
A_CODEPOINT =
|
42
|
+
A_CODEPOINT = "A".ord.freeze
|
43
43
|
# The epoch for all dates in OOXML Spreadsheet documents
|
44
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
45
45
|
|
@@ -53,7 +53,7 @@ module Xsv
|
|
53
53
|
|
54
54
|
# Return a Date for the given Excel date value
|
55
55
|
def parse_date(number)
|
56
|
-
|
56
|
+
EPOCH + number
|
57
57
|
end
|
58
58
|
|
59
59
|
# Return a time as a string for the given Excel time value
|
@@ -94,6 +94,8 @@ module Xsv
|
|
94
94
|
def parse_number(string)
|
95
95
|
if string.include? "."
|
96
96
|
string.to_f
|
97
|
+
elsif string.include? "E"
|
98
|
+
Complex(string).to_f
|
97
99
|
else
|
98
100
|
string.to_i
|
99
101
|
end
|
@@ -17,6 +17,7 @@ module Xsv
|
|
17
17
|
|
18
18
|
def initialize(&block)
|
19
19
|
@block = block
|
20
|
+
@relationship = {}
|
20
21
|
end
|
21
22
|
|
22
23
|
def start_element(name)
|
@@ -25,8 +26,8 @@ module Xsv
|
|
25
26
|
|
26
27
|
def attr(name, value)
|
27
28
|
case name
|
28
|
-
|
29
|
-
|
29
|
+
when :Id, :Type, :Target
|
30
|
+
@relationship[name] = value
|
30
31
|
end
|
31
32
|
end
|
32
33
|
|
data/lib/xsv/sheet.rb
CHANGED
@@ -39,6 +39,7 @@ module Xsv
|
|
39
39
|
@headers = []
|
40
40
|
@mode = :array
|
41
41
|
@row_skip = 0
|
42
|
+
@hidden = ids[:state] == "hidden"
|
42
43
|
|
43
44
|
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
44
45
|
end
|
@@ -48,6 +49,11 @@ module Xsv
|
|
48
49
|
"#<#{self.class.name}:#{self.object_id}>"
|
49
50
|
end
|
50
51
|
|
52
|
+
# Returns true if the worksheet is hidden
|
53
|
+
def hidden?
|
54
|
+
@hidden
|
55
|
+
end
|
56
|
+
|
51
57
|
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
52
58
|
def each_row(&block)
|
53
59
|
@io.rewind
|
@@ -106,7 +112,7 @@ module Xsv
|
|
106
112
|
elsif @mode == :hash
|
107
113
|
@mode = :array
|
108
114
|
headers.tap { @mode = :hash }
|
109
|
-
end
|
115
|
+
end || []
|
110
116
|
end
|
111
117
|
|
112
118
|
def empty_row
|
@@ -17,10 +17,14 @@ module Xsv
|
|
17
17
|
|
18
18
|
def initialize(&block)
|
19
19
|
@block = block
|
20
|
+
@parsing = false
|
20
21
|
end
|
21
22
|
|
22
23
|
def start_element(name)
|
23
|
-
|
24
|
+
if name == :sheets
|
25
|
+
@parsing = true
|
26
|
+
return
|
27
|
+
end
|
24
28
|
|
25
29
|
return unless name == :sheet
|
26
30
|
|
@@ -31,15 +35,18 @@ module Xsv
|
|
31
35
|
return unless @parsing
|
32
36
|
|
33
37
|
case name
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
when :name, :sheetId, :state
|
39
|
+
@sheet_ids[name] = value
|
40
|
+
when :'r:id'
|
41
|
+
@sheet_ids[:r_id] = value
|
38
42
|
end
|
39
43
|
end
|
40
44
|
|
41
45
|
def end_element(name)
|
42
|
-
|
46
|
+
if name == :sheets
|
47
|
+
@parsing = false
|
48
|
+
return
|
49
|
+
end
|
43
50
|
|
44
51
|
return unless name == :sheet
|
45
52
|
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require
|
2
|
+
require "zip"
|
3
3
|
|
4
4
|
module Xsv
|
5
5
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
@@ -15,11 +15,11 @@ module Xsv
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
17
17
|
def self.open(data, **kws)
|
18
|
-
if data.is_a?(IO)
|
18
|
+
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
19
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
20
|
-
elsif data.start_with?("PK\x03\x04")
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
|
21
21
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
22
|
-
else
|
22
|
+
else # must be a filename
|
23
23
|
@workbook = self.new(Zip::File.open(data), **kws)
|
24
24
|
end
|
25
25
|
end
|
@@ -75,7 +75,10 @@ module Xsv
|
|
75
75
|
private
|
76
76
|
|
77
77
|
def fetch_shared_strings
|
78
|
-
|
78
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
79
|
+
return if handle.nil?
|
80
|
+
|
81
|
+
stream = handle.get_input_stream
|
79
82
|
@shared_strings = SharedStringsParser.parse(stream)
|
80
83
|
|
81
84
|
stream.close
|
@@ -91,7 +94,7 @@ module Xsv
|
|
91
94
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
92
95
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
93
96
|
end.each do |entry|
|
94
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
97
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
95
98
|
sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
|
96
99
|
@sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
97
100
|
end
|
data/xsv.gemspec
CHANGED
@@ -1,23 +1,22 @@
|
|
1
|
-
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "xsv/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
6
|
+
spec.name = "xsv"
|
7
|
+
spec.version = Xsv::VERSION
|
8
|
+
spec.authors = ["Martijn Storck"]
|
9
|
+
spec.email = ["martijn@storck.io"]
|
11
10
|
|
12
|
-
spec.summary
|
13
|
-
spec.description
|
11
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
12
|
+
spec.description = <<-EOF
|
14
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
15
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
16
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
17
16
|
deals with minimal formatting and cannot create or modify documents.
|
18
17
|
EOF
|
19
|
-
spec.homepage
|
20
|
-
spec.license
|
18
|
+
spec.homepage = "https://github.com/martijn/xsv"
|
19
|
+
spec.license = "MIT"
|
21
20
|
|
22
21
|
if spec.respond_to?(:metadata)
|
23
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
|
|
25
24
|
spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
|
26
25
|
else
|
27
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
28
|
-
|
27
|
+
"public gem pushes."
|
29
28
|
end
|
30
29
|
|
31
30
|
# Specify which files should be added to the gem when it is released.
|
32
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
33
|
-
spec.files
|
32
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
34
33
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
35
34
|
end
|
36
|
-
spec.bindir
|
37
|
-
spec.executables
|
35
|
+
spec.bindir = "exe"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
38
37
|
spec.require_paths = ["lib"]
|
39
38
|
|
40
|
-
spec.required_ruby_version =
|
39
|
+
spec.required_ruby_version = "~> 2.5"
|
41
40
|
|
42
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
43
42
|
spec.add_dependency "ox", ">= 2.9"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- lib/xsv/styles_handler.rb
|
118
118
|
- lib/xsv/version.rb
|
119
119
|
- lib/xsv/workbook.rb
|
120
|
-
- test.sh
|
121
120
|
- xsv.gemspec
|
122
121
|
homepage: https://github.com/martijn/xsv
|
123
122
|
licenses:
|
data/test.sh
DELETED