xsv 0.3.13 → 0.3.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +23 -1
- data/Gemfile +1 -1
- data/README.md +12 -3
- data/Rakefile +0 -1
- data/lib/xsv.rb +2 -1
- data/lib/xsv/helpers.rb +4 -2
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +1 -1
- data/lib/xsv/sheet_rows_handler.rb +4 -2
- data/lib/xsv/styles_handler.rb +2 -2
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -6
- data/xsv.gemspec +13 -14
- metadata +2 -3
- data/test.sh +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75a753e746b55f6e75f168b41da7ded523a4373fab7ea4d696339efe8e05372e
|
4
|
+
data.tar.gz: 501389046d3a77d87e1287fba7e1d138fb5ca50fdfdbaffce7294e7131506970
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f569bae5677287af25722206784cba96057191cf518e868f1db719e53cdbe4dc75135925247f88b15337ea1cc8fb036a89ac0d2ffd88e4d4cebe63285ffa2326
|
7
|
+
data.tar.gz: 5a8b4084ca13108cbd9b3e4dab5eef27456aeb287241b600d65c78d0e0ef2cc5cd8fa54bf9ca3f1015d826d3e9740765f5bb9c3c349ea818591befd18ac5944d
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,28 @@
|
|
1
1
|
# Xsv Changelog
|
2
2
|
|
3
|
-
##
|
3
|
+
## 0.3.18 2020-09-30
|
4
|
+
|
5
|
+
- Improve inline string support (#18)
|
6
|
+
|
7
|
+
## 0.3.17 2020-07-03
|
8
|
+
|
9
|
+
- Fix parsing of empty worksheets (#17)
|
10
|
+
|
11
|
+
## 0.3.16 2020-06-03
|
12
|
+
|
13
|
+
- Support complex numbers (#16)
|
14
|
+
|
15
|
+
## 0.3.15 2020-06-02
|
16
|
+
|
17
|
+
- Fix issue with workbooks that don't contain shared strings (#15)
|
18
|
+
|
19
|
+
## 0.3.14 2020-05-22
|
20
|
+
|
21
|
+
- Allow opening workbooks from Tempfile and anything that responds to #read
|
22
|
+
|
23
|
+
- Preserve whitespace in text cells
|
24
|
+
|
25
|
+
## 0.3.13 2020-05-12
|
4
26
|
|
5
27
|
- Add Sheet#hidden?
|
6
28
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -11,8 +11,9 @@ deals with minimal formatting and cannot create or modify documents.
|
|
11
11
|
Xsv is designed for worksheets with a single table of data, optionally
|
12
12
|
with a header row. It only casts values to basic Ruby types (integer, float,
|
13
13
|
date and time) and does not deal with most formatting or more advanced
|
14
|
-
functionality.
|
15
|
-
minimal RAM and CPU consumption
|
14
|
+
functionality. It strives for fast processing of large worksheets with
|
15
|
+
minimal RAM and CPU consumption and has been in production use since the earliest
|
16
|
+
versions.
|
16
17
|
|
17
18
|
Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
|
18
19
|
|
@@ -36,7 +37,7 @@ Xsv targets ruby ~> 2.6 and depends on `rubyzip` and `ox`.
|
|
36
37
|
|
37
38
|
## Usage
|
38
39
|
|
39
|
-
Xsv has two modes of operation. By default it returns an array for
|
40
|
+
Xsv has two modes of operation. By default, it returns an array for
|
40
41
|
each row in the sheet:
|
41
42
|
|
42
43
|
```ruby
|
@@ -123,6 +124,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
123
124
|
|
124
125
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
125
126
|
|
127
|
+
## Performance and Benchmarks
|
128
|
+
|
129
|
+
Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
|
130
|
+
[Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
|
131
|
+
|
132
|
+
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
|
133
|
+
Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
|
134
|
+
|
126
135
|
## Contributing
|
127
136
|
|
128
137
|
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
data/Rakefile
CHANGED
data/lib/xsv.rb
CHANGED
@@ -8,7 +8,7 @@ require "xsv/shared_strings_parser"
|
|
8
8
|
require "xsv/sheet"
|
9
9
|
require "xsv/sheet_bounds_handler"
|
10
10
|
require "xsv/sheet_rows_handler"
|
11
|
-
require
|
11
|
+
require "xsv/sheets_ids_handler"
|
12
12
|
require "xsv/styles_handler"
|
13
13
|
require "xsv/version"
|
14
14
|
require "xsv/workbook"
|
@@ -19,6 +19,7 @@ require "xsv/workbook"
|
|
19
19
|
# deals with minimal formatting and cannot create or modify documents.
|
20
20
|
module Xsv
|
21
21
|
class Error < StandardError; end
|
22
|
+
|
22
23
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
23
24
|
# or misinterpreted .xlsx document
|
24
25
|
class AssertionFailed < StandardError; end
|
data/lib/xsv/helpers.rb
CHANGED
@@ -39,7 +39,7 @@ module Xsv
|
|
39
39
|
|
40
40
|
MINUTE = 60.freeze
|
41
41
|
HOUR = 3600.freeze
|
42
|
-
A_CODEPOINT =
|
42
|
+
A_CODEPOINT = "A".ord.freeze
|
43
43
|
# The epoch for all dates in OOXML Spreadsheet documents
|
44
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
45
45
|
|
@@ -53,7 +53,7 @@ module Xsv
|
|
53
53
|
|
54
54
|
# Return a Date for the given Excel date value
|
55
55
|
def parse_date(number)
|
56
|
-
|
56
|
+
EPOCH + number
|
57
57
|
end
|
58
58
|
|
59
59
|
# Return a time as a string for the given Excel time value
|
@@ -94,6 +94,8 @@ module Xsv
|
|
94
94
|
def parse_number(string)
|
95
95
|
if string.include? "."
|
96
96
|
string.to_f
|
97
|
+
elsif string.include? "E"
|
98
|
+
Complex(string).to_f
|
97
99
|
else
|
98
100
|
string.to_i
|
99
101
|
end
|
data/lib/xsv/sheet.rb
CHANGED
@@ -64,19 +64,21 @@ module Xsv
|
|
64
64
|
@state = name
|
65
65
|
@current_cell.clear
|
66
66
|
@current_value.clear
|
67
|
-
when :v
|
67
|
+
when :v, :is
|
68
68
|
@state = name
|
69
69
|
when :row
|
70
70
|
@state = name
|
71
71
|
@current_row = @empty_row.dup
|
72
72
|
@current_row_attrs.clear
|
73
|
+
when :t
|
74
|
+
@state = nil unless @state == :is
|
73
75
|
else
|
74
76
|
@state = nil
|
75
77
|
end
|
76
78
|
end
|
77
79
|
|
78
80
|
def text(value)
|
79
|
-
if @state == :v
|
81
|
+
if @state == :v || @state == :is
|
80
82
|
@current_value << value
|
81
83
|
end
|
82
84
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require
|
2
|
+
require "zip"
|
3
3
|
|
4
4
|
module Xsv
|
5
5
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
@@ -15,11 +15,11 @@ module Xsv
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
16
16
|
# options see {.initialize}
|
17
17
|
def self.open(data, **kws)
|
18
|
-
if data.is_a?(IO)
|
18
|
+
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
19
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
20
|
-
elsif data.start_with?("PK\x03\x04")
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
|
21
21
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
22
|
-
else
|
22
|
+
else # must be a filename
|
23
23
|
@workbook = self.new(Zip::File.open(data), **kws)
|
24
24
|
end
|
25
25
|
end
|
@@ -75,7 +75,10 @@ module Xsv
|
|
75
75
|
private
|
76
76
|
|
77
77
|
def fetch_shared_strings
|
78
|
-
|
78
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
79
|
+
return if handle.nil?
|
80
|
+
|
81
|
+
stream = handle.get_input_stream
|
79
82
|
@shared_strings = SharedStringsParser.parse(stream)
|
80
83
|
|
81
84
|
stream.close
|
@@ -91,7 +94,7 @@ module Xsv
|
|
91
94
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
92
95
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
93
96
|
end.each do |entry|
|
94
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
97
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
95
98
|
sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
|
96
99
|
@sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
97
100
|
end
|
data/xsv.gemspec
CHANGED
@@ -1,23 +1,22 @@
|
|
1
|
-
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "xsv/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
6
|
+
spec.name = "xsv"
|
7
|
+
spec.version = Xsv::VERSION
|
8
|
+
spec.authors = ["Martijn Storck"]
|
9
|
+
spec.email = ["martijn@storck.io"]
|
11
10
|
|
12
|
-
spec.summary
|
13
|
-
spec.description
|
11
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
12
|
+
spec.description = <<-EOF
|
14
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
15
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
16
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
17
16
|
deals with minimal formatting and cannot create or modify documents.
|
18
17
|
EOF
|
19
|
-
spec.homepage
|
20
|
-
spec.license
|
18
|
+
spec.homepage = "https://github.com/martijn/xsv"
|
19
|
+
spec.license = "MIT"
|
21
20
|
|
22
21
|
if spec.respond_to?(:metadata)
|
23
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
|
|
25
24
|
spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
|
26
25
|
else
|
27
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
28
|
-
|
27
|
+
"public gem pushes."
|
29
28
|
end
|
30
29
|
|
31
30
|
# Specify which files should be added to the gem when it is released.
|
32
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
33
|
-
spec.files
|
32
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
34
33
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
35
34
|
end
|
36
|
-
spec.bindir
|
37
|
-
spec.executables
|
35
|
+
spec.bindir = "exe"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
38
37
|
spec.require_paths = ["lib"]
|
39
38
|
|
40
|
-
spec.required_ruby_version =
|
39
|
+
spec.required_ruby_version = "~> 2.5"
|
41
40
|
|
42
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
43
42
|
spec.add_dependency "ox", ">= 2.9"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -117,7 +117,6 @@ files:
|
|
117
117
|
- lib/xsv/styles_handler.rb
|
118
118
|
- lib/xsv/version.rb
|
119
119
|
- lib/xsv/workbook.rb
|
120
|
-
- test.sh
|
121
120
|
- xsv.gemspec
|
122
121
|
homepage: https://github.com/martijn/xsv
|
123
122
|
licenses:
|
data/test.sh
DELETED