xsv 0.3.13 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +23 -1
- data/Gemfile +1 -1
- data/README.md +12 -3
- data/Rakefile +0 -1
- data/lib/xsv.rb +2 -1
- data/lib/xsv/helpers.rb +4 -2
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +1 -1
- data/lib/xsv/sheet_rows_handler.rb +4 -2
- data/lib/xsv/styles_handler.rb +2 -2
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -6
- data/xsv.gemspec +13 -14
- metadata +2 -3
- data/test.sh +0 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 75a753e746b55f6e75f168b41da7ded523a4373fab7ea4d696339efe8e05372e
|
|
4
|
+
data.tar.gz: 501389046d3a77d87e1287fba7e1d138fb5ca50fdfdbaffce7294e7131506970
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f569bae5677287af25722206784cba96057191cf518e868f1db719e53cdbe4dc75135925247f88b15337ea1cc8fb036a89ac0d2ffd88e4d4cebe63285ffa2326
|
|
7
|
+
data.tar.gz: 5a8b4084ca13108cbd9b3e4dab5eef27456aeb287241b600d65c78d0e0ef2cc5cd8fa54bf9ca3f1015d826d3e9740765f5bb9c3c349ea818591befd18ac5944d
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,28 @@
|
|
|
1
1
|
# Xsv Changelog
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## 0.3.18 2020-09-30
|
|
4
|
+
|
|
5
|
+
- Improve inline string support (#18)
|
|
6
|
+
|
|
7
|
+
## 0.3.17 2020-07-03
|
|
8
|
+
|
|
9
|
+
- Fix parsing of empty worksheets (#17)
|
|
10
|
+
|
|
11
|
+
## 0.3.16 2020-06-03
|
|
12
|
+
|
|
13
|
+
- Support complex numbers (#16)
|
|
14
|
+
|
|
15
|
+
## 0.3.15 2020-06-02
|
|
16
|
+
|
|
17
|
+
- Fix issue with workbooks that don't contain shared strings (#15)
|
|
18
|
+
|
|
19
|
+
## 0.3.14 2020-05-22
|
|
20
|
+
|
|
21
|
+
- Allow opening workbooks from Tempfile and anything that responds to #read
|
|
22
|
+
|
|
23
|
+
- Preserve whitespace in text cells
|
|
24
|
+
|
|
25
|
+
## 0.3.13 2020-05-12
|
|
4
26
|
|
|
5
27
|
- Add Sheet#hidden?
|
|
6
28
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -11,8 +11,9 @@ deals with minimal formatting and cannot create or modify documents.
|
|
|
11
11
|
Xsv is designed for worksheets with a single table of data, optionally
|
|
12
12
|
with a header row. It only casts values to basic Ruby types (integer, float,
|
|
13
13
|
date and time) and does not deal with most formatting or more advanced
|
|
14
|
-
functionality.
|
|
15
|
-
minimal RAM and CPU consumption
|
|
14
|
+
functionality. It strives for fast processing of large worksheets with
|
|
15
|
+
minimal RAM and CPU consumption and has been in production use since the earliest
|
|
16
|
+
versions.
|
|
16
17
|
|
|
17
18
|
Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
|
|
18
19
|
|
|
@@ -36,7 +37,7 @@ Xsv targets ruby ~> 2.6 and depends on `rubyzip` and `ox`.
|
|
|
36
37
|
|
|
37
38
|
## Usage
|
|
38
39
|
|
|
39
|
-
Xsv has two modes of operation. By default it returns an array for
|
|
40
|
+
Xsv has two modes of operation. By default, it returns an array for
|
|
40
41
|
each row in the sheet:
|
|
41
42
|
|
|
42
43
|
```ruby
|
|
@@ -123,6 +124,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
|
123
124
|
|
|
124
125
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
125
126
|
|
|
127
|
+
## Performance and Benchmarks
|
|
128
|
+
|
|
129
|
+
Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
|
|
130
|
+
[Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
|
|
131
|
+
|
|
132
|
+
Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
|
|
133
|
+
Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
|
|
134
|
+
|
|
126
135
|
## Contributing
|
|
127
136
|
|
|
128
137
|
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
data/Rakefile
CHANGED
data/lib/xsv.rb
CHANGED
|
@@ -8,7 +8,7 @@ require "xsv/shared_strings_parser"
|
|
|
8
8
|
require "xsv/sheet"
|
|
9
9
|
require "xsv/sheet_bounds_handler"
|
|
10
10
|
require "xsv/sheet_rows_handler"
|
|
11
|
-
require
|
|
11
|
+
require "xsv/sheets_ids_handler"
|
|
12
12
|
require "xsv/styles_handler"
|
|
13
13
|
require "xsv/version"
|
|
14
14
|
require "xsv/workbook"
|
|
@@ -19,6 +19,7 @@ require "xsv/workbook"
|
|
|
19
19
|
# deals with minimal formatting and cannot create or modify documents.
|
|
20
20
|
module Xsv
|
|
21
21
|
class Error < StandardError; end
|
|
22
|
+
|
|
22
23
|
# An AssertionFailed error indicates an unexpected condition, meaning a bug
|
|
23
24
|
# or misinterpreted .xlsx document
|
|
24
25
|
class AssertionFailed < StandardError; end
|
data/lib/xsv/helpers.rb
CHANGED
|
@@ -39,7 +39,7 @@ module Xsv
|
|
|
39
39
|
|
|
40
40
|
MINUTE = 60.freeze
|
|
41
41
|
HOUR = 3600.freeze
|
|
42
|
-
A_CODEPOINT =
|
|
42
|
+
A_CODEPOINT = "A".ord.freeze
|
|
43
43
|
# The epoch for all dates in OOXML Spreadsheet documents
|
|
44
44
|
EPOCH = Date.new(1899, 12, 30).freeze
|
|
45
45
|
|
|
@@ -53,7 +53,7 @@ module Xsv
|
|
|
53
53
|
|
|
54
54
|
# Return a Date for the given Excel date value
|
|
55
55
|
def parse_date(number)
|
|
56
|
-
|
|
56
|
+
EPOCH + number
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
# Return a time as a string for the given Excel time value
|
|
@@ -94,6 +94,8 @@ module Xsv
|
|
|
94
94
|
def parse_number(string)
|
|
95
95
|
if string.include? "."
|
|
96
96
|
string.to_f
|
|
97
|
+
elsif string.include? "E"
|
|
98
|
+
Complex(string).to_f
|
|
97
99
|
else
|
|
98
100
|
string.to_i
|
|
99
101
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -64,19 +64,21 @@ module Xsv
|
|
|
64
64
|
@state = name
|
|
65
65
|
@current_cell.clear
|
|
66
66
|
@current_value.clear
|
|
67
|
-
when :v
|
|
67
|
+
when :v, :is
|
|
68
68
|
@state = name
|
|
69
69
|
when :row
|
|
70
70
|
@state = name
|
|
71
71
|
@current_row = @empty_row.dup
|
|
72
72
|
@current_row_attrs.clear
|
|
73
|
+
when :t
|
|
74
|
+
@state = nil unless @state == :is
|
|
73
75
|
else
|
|
74
76
|
@state = nil
|
|
75
77
|
end
|
|
76
78
|
end
|
|
77
79
|
|
|
78
80
|
def text(value)
|
|
79
|
-
if @state == :v
|
|
81
|
+
if @state == :v || @state == :is
|
|
80
82
|
@current_value << value
|
|
81
83
|
end
|
|
82
84
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
require
|
|
2
|
+
require "zip"
|
|
3
3
|
|
|
4
4
|
module Xsv
|
|
5
5
|
# An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
|
|
@@ -15,11 +15,11 @@ module Xsv
|
|
|
15
15
|
# Open the workbook of the given filename, string or buffer. For additional
|
|
16
16
|
# options see {.initialize}
|
|
17
17
|
def self.open(data, **kws)
|
|
18
|
-
if data.is_a?(IO)
|
|
18
|
+
if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
|
|
19
19
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
|
20
|
-
elsif data.start_with?("PK\x03\x04")
|
|
20
|
+
elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
|
|
21
21
|
@workbook = self.new(Zip::File.open_buffer(data), **kws)
|
|
22
|
-
else
|
|
22
|
+
else # must be a filename
|
|
23
23
|
@workbook = self.new(Zip::File.open(data), **kws)
|
|
24
24
|
end
|
|
25
25
|
end
|
|
@@ -75,7 +75,10 @@ module Xsv
|
|
|
75
75
|
private
|
|
76
76
|
|
|
77
77
|
def fetch_shared_strings
|
|
78
|
-
|
|
78
|
+
handle = @zip.glob("xl/sharedStrings.xml").first
|
|
79
|
+
return if handle.nil?
|
|
80
|
+
|
|
81
|
+
stream = handle.get_input_stream
|
|
79
82
|
@shared_strings = SharedStringsParser.parse(stream)
|
|
80
83
|
|
|
81
84
|
stream.close
|
|
@@ -91,7 +94,7 @@ module Xsv
|
|
|
91
94
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
92
95
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
93
96
|
end.each do |entry|
|
|
94
|
-
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?(
|
|
97
|
+
rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
|
|
95
98
|
sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
|
|
96
99
|
@sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
|
|
97
100
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -1,23 +1,22 @@
|
|
|
1
|
-
|
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
3
|
require "xsv/version"
|
|
5
4
|
|
|
6
5
|
Gem::Specification.new do |spec|
|
|
7
|
-
spec.name
|
|
8
|
-
spec.version
|
|
9
|
-
spec.authors
|
|
10
|
-
spec.email
|
|
6
|
+
spec.name = "xsv"
|
|
7
|
+
spec.version = Xsv::VERSION
|
|
8
|
+
spec.authors = ["Martijn Storck"]
|
|
9
|
+
spec.email = ["martijn@storck.io"]
|
|
11
10
|
|
|
12
|
-
spec.summary
|
|
13
|
-
spec.description
|
|
11
|
+
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
|
12
|
+
spec.description = <<-EOF
|
|
14
13
|
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
15
14
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
16
15
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
17
16
|
deals with minimal formatting and cannot create or modify documents.
|
|
18
17
|
EOF
|
|
19
|
-
spec.homepage
|
|
20
|
-
spec.license
|
|
18
|
+
spec.homepage = "https://github.com/martijn/xsv"
|
|
19
|
+
spec.license = "MIT"
|
|
21
20
|
|
|
22
21
|
if spec.respond_to?(:metadata)
|
|
23
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
|
|
|
25
24
|
spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
|
|
26
25
|
else
|
|
27
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
|
28
|
-
|
|
27
|
+
"public gem pushes."
|
|
29
28
|
end
|
|
30
29
|
|
|
31
30
|
# Specify which files should be added to the gem when it is released.
|
|
32
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
33
|
-
spec.files
|
|
32
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
|
34
33
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
35
34
|
end
|
|
36
|
-
spec.bindir
|
|
37
|
-
spec.executables
|
|
35
|
+
spec.bindir = "exe"
|
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
38
37
|
spec.require_paths = ["lib"]
|
|
39
38
|
|
|
40
|
-
spec.required_ruby_version =
|
|
39
|
+
spec.required_ruby_version = "~> 2.5"
|
|
41
40
|
|
|
42
41
|
spec.add_dependency "rubyzip", ">= 1.3", "< 3"
|
|
43
42
|
spec.add_dependency "ox", ">= 2.9"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.18
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-09-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -117,7 +117,6 @@ files:
|
|
|
117
117
|
- lib/xsv/styles_handler.rb
|
|
118
118
|
- lib/xsv/version.rb
|
|
119
119
|
- lib/xsv/workbook.rb
|
|
120
|
-
- test.sh
|
|
121
120
|
- xsv.gemspec
|
|
122
121
|
homepage: https://github.com/martijn/xsv
|
|
123
122
|
licenses:
|
data/test.sh
DELETED