xsv 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -7
- data/lib/xsv/shared_strings_parser.rb +1 -1
- data/lib/xsv/sheet.rb +4 -6
- data/lib/xsv/sheet_bounds_handler.rb +1 -1
- data/lib/xsv/sheet_rows_handler.rb +5 -7
- data/lib/xsv/styles_handler.rb +1 -1
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +9 -1
- data/xsv.gemspec +2 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6b42a947e6f518a9284de8262e1c1327267a6e1fdd55ea6d14fffba1f1b19d
|
4
|
+
data.tar.gz: '04887e8c4e50d3bb5762c6738237442d3b6360f4b96304b5a802215d7f4240ca'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8ad674f6735a1711190469836f30911bb2ebce502d3e99bc268c1c439c7cf9b8f6841189c76950c1722a0861a769a00b6ee7f15e5ab1cb2763a3d6b05342057
|
7
|
+
data.tar.gz: 07c01bc46762348b87f745fd1c7b3aa9d16c0051916b46473a0c0c76bf4d1551f5d2cabc724a612069f3ce83eb959cbe06f4114bef361e615fc37b9ceae51193
|
data/README.md
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
# Xsv .xlsx reader
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
[![Travis CI](https://img.shields.io/travis/martijn/xsv/master)](https://travis-ci.org/martijn/xsv)
|
4
|
+
[![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://rubydoc.info/github/martijn/xsv)
|
5
|
+
|
6
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
7
|
+
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
8
|
+
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
9
|
+
deals with minimal formatting and cannot create or modify documents.
|
10
|
+
|
11
|
+
Xsv is designed for worksheets with a single table of data, optionally
|
12
|
+
with a header row. It only casts values to basic Ruby types (integer, float,
|
13
|
+
date and time) and does not deal with most formatting or more advanced
|
14
|
+
functionality. The goal is to allow for fast parsing of large worksheets with
|
15
|
+
minimal RAM and CPU consumption.
|
8
16
|
|
9
17
|
Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
|
10
18
|
|
@@ -80,9 +88,9 @@ sheet:
|
|
80
88
|
|
81
89
|
- In array mode, your data starts on the first row
|
82
90
|
|
83
|
-
- In
|
91
|
+
- In hash mode the first row of the sheet contains headers, followed by rows of data
|
84
92
|
|
85
|
-
If your data or headers
|
93
|
+
If your data or headers do not start on the first row of the sheet you can
|
86
94
|
tell Xsv to skip a number of rows:
|
87
95
|
|
88
96
|
```ruby
|
data/lib/xsv/sheet.rb
CHANGED
@@ -28,7 +28,7 @@ module Xsv
|
|
28
28
|
# There is no need to create Sheets from application code.
|
29
29
|
#
|
30
30
|
# @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
|
31
|
-
# @param io [IO] A handle to an open worksheet XML file
|
31
|
+
# @param io [IO] A handle to an open worksheet XML file or a string with the XML contents
|
32
32
|
def initialize(workbook, io)
|
33
33
|
@workbook = workbook
|
34
34
|
@io = io
|
@@ -45,12 +45,10 @@ module Xsv
|
|
45
45
|
end
|
46
46
|
|
47
47
|
# Iterate over rows, returning either hashes or arrays based on the current mode.
|
48
|
-
def each_row
|
49
|
-
@io.rewind
|
48
|
+
def each_row(&block)
|
49
|
+
@io.rewind if @io.respond_to?(:rewind)
|
50
50
|
|
51
|
-
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row)
|
52
|
-
yield(row)
|
53
|
-
end
|
51
|
+
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row, &block)
|
54
52
|
|
55
53
|
Ox.sax_parse(handler, @io)
|
56
54
|
|
@@ -16,9 +16,7 @@ module Xsv
|
|
16
16
|
when "e" # N/A
|
17
17
|
nil
|
18
18
|
when nil
|
19
|
-
if @
|
20
|
-
nil
|
21
|
-
elsif @current_cell[:s]
|
19
|
+
if @current_cell[:s]
|
22
20
|
style = @workbook.xfs[@current_cell[:s].to_i]
|
23
21
|
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
24
22
|
|
@@ -50,7 +48,7 @@ module Xsv
|
|
50
48
|
@current_row = {}
|
51
49
|
@current_row_attrs = {}
|
52
50
|
@current_cell = {}
|
53
|
-
@current_value =
|
51
|
+
@current_value = String.new
|
54
52
|
@last_row = last_row
|
55
53
|
|
56
54
|
if @mode == :hash
|
@@ -62,14 +60,14 @@ module Xsv
|
|
62
60
|
case name
|
63
61
|
when :c
|
64
62
|
@state = name
|
65
|
-
@current_cell
|
66
|
-
@current_value
|
63
|
+
@current_cell.clear
|
64
|
+
@current_value.clear
|
67
65
|
when :v
|
68
66
|
@state = name
|
69
67
|
when :row
|
70
68
|
@state = name
|
71
69
|
@current_row = @empty_row.dup
|
72
|
-
@current_row_attrs
|
70
|
+
@current_row_attrs.clear
|
73
71
|
else
|
74
72
|
@state = nil
|
75
73
|
end
|
data/lib/xsv/styles_handler.rb
CHANGED
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
@@ -80,7 +80,15 @@ module Xsv
|
|
80
80
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
81
81
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
82
82
|
end.each do |entry|
|
83
|
-
|
83
|
+
# For smaller sheets, memory performance is a lot better if Ox is
|
84
|
+
# handed a string. For larger sheets this leads to awful performance.
|
85
|
+
# This is probably caused by either something in SheetRowsHandler or
|
86
|
+
# the interaction between Zip::InputStream and Ox
|
87
|
+
if entry.size > 100_000_000
|
88
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream)
|
89
|
+
else
|
90
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream.read)
|
91
|
+
end
|
84
92
|
end
|
85
93
|
end
|
86
94
|
end
|
data/xsv.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
|
13
13
|
spec.description = <<-EOF
|
14
|
-
|
14
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
15
15
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
16
16
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
17
17
|
deals with minimal formatting and cannot create or modify documents.
|
@@ -42,7 +42,7 @@ Gem::Specification.new do |spec|
|
|
42
42
|
spec.add_dependency "rubyzip", "~> 2.2"
|
43
43
|
spec.add_dependency "ox", "~> 2.13"
|
44
44
|
|
45
|
-
spec.add_development_dependency "bundler", "
|
45
|
+
spec.add_development_dependency "bundler", "< 3"
|
46
46
|
spec.add_development_dependency "rake", "~> 13.0"
|
47
47
|
spec.add_development_dependency "minitest", "~> 5.0"
|
48
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martijn Storck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -42,16 +42,16 @@ dependencies:
|
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "<"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '3'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "<"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '5.0'
|
83
83
|
description: |2
|
84
|
-
|
84
|
+
Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
|
85
85
|
(commonly known as Excel or .xlsx files). It strives to be minimal in the
|
86
86
|
sense that it provides nothing a CSV reader wouldn't, meaning it only
|
87
87
|
deals with minimal formatting and cannot create or modify documents.
|