xsv 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -7
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +4 -6
- data/lib/xsv/sheet_bounds_handler.rb +1 -1
- data/lib/xsv/sheet_rows_handler.rb +5 -7
- data/lib/xsv/styles_handler.rb +1 -1
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -1
- data/xsv.gemspec +2 -2
- metadata +7 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4c6b42a947e6f518a9284de8262e1c1327267a6e1fdd55ea6d14fffba1f1b19d
|
|
4
|
+
data.tar.gz: '04887e8c4e50d3bb5762c6738237442d3b6360f4b96304b5a802215d7f4240ca'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e8ad674f6735a1711190469836f30911bb2ebce502d3e99bc268c1c439c7cf9b8f6841189c76950c1722a0861a769a00b6ee7f15e5ab1cb2763a3d6b05342057
|
|
7
|
+
data.tar.gz: 07c01bc46762348b87f745fd1c7b3aa9d16c0051916b46473a0c0c76bf4d1551f5d2cabc724a612069f3ce83eb959cbe06f4114bef361e615fc37b9ceae51193
|
data/README.md
CHANGED
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
# Xsv .xlsx reader
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
[](https://travis-ci.org/martijn/xsv)
|
|
4
|
+
[](https://rubydoc.info/github/martijn/xsv)
|
|
5
|
+
|
|
6
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
7
|
+
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
8
|
+
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
9
|
+
deals with minimal formatting and cannot create or modify documents.
|
|
10
|
+
|
|
11
|
+
Xsv is designed for worksheets with a single table of data, optionally
|
|
12
|
+
with a header row. It only casts values to basic Ruby types (integer, float,
|
|
13
|
+
date and time) and does not deal with most formatting or more advanced
|
|
14
|
+
functionality. The goal is to allow for fast parsing of large worksheets with
|
|
15
|
+
minimal RAM and CPU consumption.
|
|
8
16
|
|
|
9
17
|
Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
|
|
10
18
|
|
|
@@ -80,9 +88,9 @@ sheet:
|
|
|
80
88
|
|
|
81
89
|
- In array mode, your data starts on the first row
|
|
82
90
|
|
|
83
|
-
- In
|
|
91
|
+
- In hash mode the first row of the sheet contains headers, followed by rows of data
|
|
84
92
|
|
|
85
|
-
If your data or headers
|
|
93
|
+
If your data or headers do not start on the first row of the sheet you can
|
|
86
94
|
tell Xsv to skip a number of rows:
|
|
87
95
|
|
|
88
96
|
```ruby
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -28,7 +28,7 @@ module Xsv
|
|
|
28
28
|
# There is no need to create Sheets from application code.
|
|
29
29
|
#
|
|
30
30
|
# @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
|
|
31
|
-
# @param io [IO] A handle to an open worksheet XML file
|
|
31
|
+
# @param io [IO] A handle to an open worksheet XML file or a string with the XML contents
|
|
32
32
|
def initialize(workbook, io)
|
|
33
33
|
@workbook = workbook
|
|
34
34
|
@io = io
|
|
@@ -45,12 +45,10 @@ module Xsv
|
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
|
48
|
-
def each_row
|
|
49
|
-
@io.rewind
|
|
48
|
+
def each_row(&block)
|
|
49
|
+
@io.rewind if @io.respond_to?(:rewind)
|
|
50
50
|
|
|
51
|
-
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row)
|
|
52
|
-
yield(row)
|
|
53
|
-
end
|
|
51
|
+
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row, &block)
|
|
54
52
|
|
|
55
53
|
Ox.sax_parse(handler, @io)
|
|
56
54
|
|
|
@@ -16,9 +16,7 @@ module Xsv
|
|
|
16
16
|
when "e" # N/A
|
|
17
17
|
nil
|
|
18
18
|
when nil
|
|
19
|
-
if @
|
|
20
|
-
nil
|
|
21
|
-
elsif @current_cell[:s]
|
|
19
|
+
if @current_cell[:s]
|
|
22
20
|
style = @workbook.xfs[@current_cell[:s].to_i]
|
|
23
21
|
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
24
22
|
|
|
@@ -50,7 +48,7 @@ module Xsv
|
|
|
50
48
|
@current_row = {}
|
|
51
49
|
@current_row_attrs = {}
|
|
52
50
|
@current_cell = {}
|
|
53
|
-
@current_value =
|
|
51
|
+
@current_value = String.new
|
|
54
52
|
@last_row = last_row
|
|
55
53
|
|
|
56
54
|
if @mode == :hash
|
|
@@ -62,14 +60,14 @@ module Xsv
|
|
|
62
60
|
case name
|
|
63
61
|
when :c
|
|
64
62
|
@state = name
|
|
65
|
-
@current_cell
|
|
66
|
-
@current_value
|
|
63
|
+
@current_cell.clear
|
|
64
|
+
@current_value.clear
|
|
67
65
|
when :v
|
|
68
66
|
@state = name
|
|
69
67
|
when :row
|
|
70
68
|
@state = name
|
|
71
69
|
@current_row = @empty_row.dup
|
|
72
|
-
@current_row_attrs
|
|
70
|
+
@current_row_attrs.clear
|
|
73
71
|
else
|
|
74
72
|
@state = nil
|
|
75
73
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -80,7 +80,15 @@ module Xsv
|
|
|
80
80
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
81
81
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
82
82
|
end.each do |entry|
|
|
83
|
-
|
|
83
|
+
# For smaller sheets, memory performance is a lot better if Ox is
|
|
84
|
+
# handed a string. For larger sheets this leads to awful performance.
|
|
85
|
+
# This is probably caused by either something in SheetRowsHandler or
|
|
86
|
+
# the interaction between Zip::InputStream and Ox
|
|
87
|
+
if entry.size > 100_000_000
|
|
88
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream)
|
|
89
|
+
else
|
|
90
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream.read)
|
|
91
|
+
end
|
|
84
92
|
end
|
|
85
93
|
end
|
|
86
94
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
|
11
11
|
|
|
12
12
|
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
|
13
13
|
spec.description = <<-EOF
|
|
14
|
-
|
|
14
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
15
15
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
16
16
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
17
17
|
deals with minimal formatting and cannot create or modify documents.
|
|
@@ -42,7 +42,7 @@ Gem::Specification.new do |spec|
|
|
|
42
42
|
spec.add_dependency "rubyzip", "~> 2.2"
|
|
43
43
|
spec.add_dependency "ox", "~> 2.13"
|
|
44
44
|
|
|
45
|
-
spec.add_development_dependency "bundler", "
|
|
45
|
+
spec.add_development_dependency "bundler", "< 3"
|
|
46
46
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
47
47
|
spec.add_development_dependency "minitest", "~> 5.0"
|
|
48
48
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-03-
|
|
11
|
+
date: 2020-03-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -42,16 +42,16 @@ dependencies:
|
|
|
42
42
|
name: bundler
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
|
-
- - "
|
|
45
|
+
- - "<"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '3'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
|
-
- - "
|
|
52
|
+
- - "<"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '3'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: rake
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -81,7 +81,7 @@ dependencies:
|
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
82
|
version: '5.0'
|
|
83
83
|
description: |2
|
|
84
|
-
|
|
84
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
|
85
85
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
|
86
86
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
|
87
87
|
deals with minimal formatting and cannot create or modify documents.
|