xsv 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -5
- data/README.md +9 -7
- data/lib/xsv.rb +5 -0
- data/lib/xsv/shared_strings_parser.rb +37 -0
- data/lib/xsv/sheet.rb +19 -105
- data/lib/xsv/sheet_bounds_handler.rb +76 -0
- data/lib/xsv/sheet_rows_handler.rb +121 -0
- data/lib/xsv/styles_handler.rb +58 -0
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/workbook.rb +15 -22
- data/xsv.gemspec +1 -1
- metadata +9 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 34a568089851462504ab294931b59d454ed2df2788282e8aa15cc166e0c45271
|
|
4
|
+
data.tar.gz: 48e148855403abc349d62093d8351d68497681b43726c5894dc45b87c964a9e7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c634494fbff9d65dc9f16af6cdc457fb8caddd56da46bdaf256562a4b4d96ffe271ece897cec1ec153befa30eb617c3d39da70c5b5250039838848464108d14c
|
|
7
|
+
data.tar.gz: f3bde0d89ddd1d8b5badf35b7da02025cddcc41b3f6127f48ec56bfcef31d22e16ff4378f3c3a79502c42089c41dea35bce8b0245920ba23c4ab7e303c6f9340
|
data/Gemfile.lock
CHANGED
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
xsv (0.2.
|
|
5
|
-
|
|
4
|
+
xsv (0.2.3)
|
|
5
|
+
ox (~> 2.13)
|
|
6
6
|
rubyzip (~> 2.2)
|
|
7
7
|
|
|
8
8
|
GEM
|
|
9
9
|
remote: https://rubygems.org/
|
|
10
10
|
specs:
|
|
11
|
-
mini_portile2 (2.4.0)
|
|
12
11
|
minitest (5.14.0)
|
|
13
|
-
|
|
14
|
-
mini_portile2 (~> 2.4.0)
|
|
12
|
+
ox (2.13.2)
|
|
15
13
|
rake (10.5.0)
|
|
16
14
|
rubyzip (2.2.0)
|
|
17
15
|
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Xsv .xlsx reader
|
|
2
2
|
|
|
3
|
-
Xsv is a very basic parser for
|
|
4
|
-
provide feature parity with common CSV readers
|
|
5
|
-
it only parses values to basic Ruby types and does not
|
|
6
|
-
or more advanced functionality. The goal is to allow
|
|
7
|
-
worksheets with minimal RAM and CPU consumption.
|
|
3
|
+
Xsv is a very basic parser for Office Open XML spreadsheet files (.xlsx files)
|
|
4
|
+
that aims to provide feature parity with common CSV readers with high
|
|
5
|
+
performance. This means it only parses values to basic Ruby types and does not
|
|
6
|
+
deal with most formatting or more advanced functionality. The goal is to allow
|
|
7
|
+
for fast parsing of large worksheets with minimal RAM and CPU consumption.
|
|
8
8
|
|
|
9
|
-
Xsv stands for 'Excel Separated Values' because Excel just gets in the way.
|
|
9
|
+
Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
|
|
10
10
|
|
|
11
11
|
## Installation
|
|
12
12
|
|
|
@@ -99,7 +99,9 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
|
99
99
|
|
|
100
100
|
## Contributing
|
|
101
101
|
|
|
102
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
|
102
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
|
|
103
|
+
Please provide an .xlsx file with a minimum breaking example that is acceptable
|
|
104
|
+
for inclusion in the source code repository.
|
|
103
105
|
|
|
104
106
|
## License
|
|
105
107
|
|
data/lib/xsv.rb
CHANGED
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
require "date"
|
|
2
|
+
require "ox"
|
|
2
3
|
|
|
3
4
|
require "xsv/helpers"
|
|
5
|
+
require "xsv/shared_strings_parser"
|
|
4
6
|
require "xsv/sheet"
|
|
7
|
+
require "xsv/sheet_bounds_handler"
|
|
8
|
+
require "xsv/sheet_rows_handler"
|
|
9
|
+
require "xsv/styles_handler"
|
|
5
10
|
require "xsv/version"
|
|
6
11
|
require "xsv/workbook"
|
|
7
12
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Xsv
|
|
2
|
+
class SharedStringsParser < Ox::Sax
|
|
3
|
+
def self.parse(io)
|
|
4
|
+
strings = []
|
|
5
|
+
handler = new { |s| strings << s }
|
|
6
|
+
Ox.sax_parse(handler, io)
|
|
7
|
+
return strings
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def initialize(&block)
|
|
11
|
+
@block = block
|
|
12
|
+
@state = nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def start_element(name)
|
|
16
|
+
case name
|
|
17
|
+
when :si
|
|
18
|
+
@current_string = ""
|
|
19
|
+
when :t
|
|
20
|
+
@state = name
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def text(value)
|
|
25
|
+
@current_string += value if @state == :t
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def end_element(name)
|
|
29
|
+
case name
|
|
30
|
+
when :si
|
|
31
|
+
@block.call(@current_string)
|
|
32
|
+
when :t
|
|
33
|
+
@state = nil
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -3,26 +3,19 @@ module Xsv
|
|
|
3
3
|
include Enumerable
|
|
4
4
|
include Xsv::Helpers
|
|
5
5
|
|
|
6
|
-
attr_reader :
|
|
6
|
+
attr_reader :mode
|
|
7
7
|
|
|
8
8
|
# Set a number of rows to skip at the top of the sheet (header row offset)
|
|
9
9
|
attr_accessor :row_skip
|
|
10
10
|
|
|
11
|
-
def initialize(workbook,
|
|
11
|
+
def initialize(workbook, io)
|
|
12
12
|
@workbook = workbook
|
|
13
|
-
@
|
|
13
|
+
@io = io
|
|
14
14
|
@headers = []
|
|
15
15
|
@mode = :array
|
|
16
16
|
@row_skip = 0
|
|
17
17
|
|
|
18
|
-
@
|
|
19
|
-
|
|
20
|
-
if @has_cells
|
|
21
|
-
@column_count, @last_row = get_sheet_dimensions
|
|
22
|
-
else
|
|
23
|
-
@column_count = 0
|
|
24
|
-
@last_row = 0
|
|
25
|
-
end
|
|
18
|
+
@last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
|
|
26
19
|
end
|
|
27
20
|
|
|
28
21
|
def inspect
|
|
@@ -31,30 +24,14 @@ module Xsv
|
|
|
31
24
|
|
|
32
25
|
# Iterate over rows
|
|
33
26
|
def each_row
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@xml.css("sheetData row").each do |row_xml|
|
|
37
|
-
if row_index < 0
|
|
38
|
-
row_index += 1
|
|
39
|
-
next
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
row_index += 1
|
|
43
|
-
|
|
44
|
-
next if row_index == 1 && @mode == :hash
|
|
45
|
-
|
|
46
|
-
# pad empty rows
|
|
47
|
-
while row_index < row_xml["r"].to_i - @row_skip do
|
|
48
|
-
yield(empty_row)
|
|
49
|
-
row_index += 1
|
|
50
|
-
end
|
|
27
|
+
@io.rewind
|
|
51
28
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# Do not return empty trailing rows
|
|
55
|
-
break if row_index == @last_row - @row_skip
|
|
29
|
+
handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row) do |row|
|
|
30
|
+
yield(row)
|
|
56
31
|
end
|
|
57
32
|
|
|
33
|
+
Ox.sax_parse(handler, @io)
|
|
34
|
+
|
|
58
35
|
true
|
|
59
36
|
end
|
|
60
37
|
|
|
@@ -62,13 +39,11 @@ module Xsv
|
|
|
62
39
|
|
|
63
40
|
# Get row by number, starting at 0
|
|
64
41
|
def [](number)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if row_xml
|
|
68
|
-
parse_row(row_xml)
|
|
69
|
-
else
|
|
70
|
-
empty_row
|
|
42
|
+
each_with_index do |row, i|
|
|
43
|
+
return row if i == number
|
|
71
44
|
end
|
|
45
|
+
|
|
46
|
+
return empty_row
|
|
72
47
|
end
|
|
73
48
|
|
|
74
49
|
# Load headers in the top row of the worksheet. After parsing of headers
|
|
@@ -91,7 +66,12 @@ module Xsv
|
|
|
91
66
|
private
|
|
92
67
|
|
|
93
68
|
def parse_headers
|
|
94
|
-
|
|
69
|
+
if @mode == :array
|
|
70
|
+
first
|
|
71
|
+
elsif @mode == :hash
|
|
72
|
+
@mode == :array
|
|
73
|
+
headers.tap { @mode = :hash }
|
|
74
|
+
end
|
|
95
75
|
end
|
|
96
76
|
|
|
97
77
|
def empty_row
|
|
@@ -102,71 +82,5 @@ module Xsv
|
|
|
102
82
|
@headers.zip([]).to_h
|
|
103
83
|
end
|
|
104
84
|
end
|
|
105
|
-
|
|
106
|
-
def parse_row(xml, mode = nil)
|
|
107
|
-
mode ||= @mode
|
|
108
|
-
row = empty_row
|
|
109
|
-
|
|
110
|
-
xml.css("c").first(@column_count).each do |c_xml|
|
|
111
|
-
value = case c_xml["t"]
|
|
112
|
-
when "s"
|
|
113
|
-
@workbook.shared_strings[c_xml.css("v").inner_text.to_i]
|
|
114
|
-
when "str"
|
|
115
|
-
c_xml.css("v").inner_text.to_s
|
|
116
|
-
when "e" # N/A
|
|
117
|
-
nil
|
|
118
|
-
when nil
|
|
119
|
-
v = c_xml.at_css("v")
|
|
120
|
-
|
|
121
|
-
if v.nil?
|
|
122
|
-
nil
|
|
123
|
-
elsif c_xml["s"]
|
|
124
|
-
style = @workbook.xfs[c_xml["s"].to_i]
|
|
125
|
-
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
126
|
-
|
|
127
|
-
parse_number_format(v.inner_text, numFmt)
|
|
128
|
-
else
|
|
129
|
-
parse_number(v.inner_text)
|
|
130
|
-
end
|
|
131
|
-
else
|
|
132
|
-
raise Xsv::Error, "Encountered unknown column type #{c_xml["t"]}"
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
# Determine column position and pad row with nil values
|
|
136
|
-
col_index = column_index(c_xml["r"])
|
|
137
|
-
|
|
138
|
-
case mode
|
|
139
|
-
when :array
|
|
140
|
-
row[col_index] = value
|
|
141
|
-
when :hash
|
|
142
|
-
row[@headers[col_index]] = value
|
|
143
|
-
end
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
row
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
# Read or estimate outer bounds of sheet
|
|
150
|
-
def get_sheet_dimensions
|
|
151
|
-
dimension = xml.at_css("dimension")
|
|
152
|
-
|
|
153
|
-
if dimension
|
|
154
|
-
_firstCell, lastCell = dimension["ref"].split(":")
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
if lastCell
|
|
158
|
-
# Assume the dimension reflects the content
|
|
159
|
-
column_count = column_index(lastCell) + 1
|
|
160
|
-
else
|
|
161
|
-
# Find the last cell in every row that has a value
|
|
162
|
-
rightmost_cells = @xml.xpath("//xmlns:row/xmlns:c[*[local-name() = 'v']][last()]").map { |c| column_index(c["r"]) }
|
|
163
|
-
column_count = rightmost_cells.max + 1
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# Find the last row that contains actual values
|
|
167
|
-
last_row = @xml.at_xpath("//xmlns:row[*[xmlns:v]][last()]")["r"].to_i
|
|
168
|
-
|
|
169
|
-
return [column_count, last_row]
|
|
170
|
-
end
|
|
171
85
|
end
|
|
172
86
|
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
module Xsv
|
|
2
|
+
# SheetBoundsHandler scans a sheet looking for the outer bounds of the content within
|
|
3
|
+
class SheetBoundsHandler < Ox::Sax
|
|
4
|
+
include Xsv::Helpers
|
|
5
|
+
|
|
6
|
+
def self.get_bounds(sheet, workbook)
|
|
7
|
+
rows = 0
|
|
8
|
+
cols = 0
|
|
9
|
+
|
|
10
|
+
handler = new(workbook.trim_empty_rows) do |row, col|
|
|
11
|
+
rows = row
|
|
12
|
+
cols = col == 0 ? 0 : col + 1
|
|
13
|
+
|
|
14
|
+
return rows, cols
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
sheet.rewind
|
|
18
|
+
Ox.sax_parse(handler, sheet)
|
|
19
|
+
|
|
20
|
+
return rows, cols
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Ox::Sax implementation
|
|
24
|
+
|
|
25
|
+
def initialize(trim_empty_rows, &block)
|
|
26
|
+
@block = block
|
|
27
|
+
@state = nil
|
|
28
|
+
@cell = nil
|
|
29
|
+
@row = nil
|
|
30
|
+
@maxRow = 0
|
|
31
|
+
@maxColumn = 0
|
|
32
|
+
@trim_empty_rows = trim_empty_rows
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def start_element(name)
|
|
36
|
+
case name
|
|
37
|
+
when :c
|
|
38
|
+
@state = name
|
|
39
|
+
@cell = nil
|
|
40
|
+
when :v
|
|
41
|
+
col = column_index(@cell)
|
|
42
|
+
@maxColumn = col if col > @maxColumn
|
|
43
|
+
@maxRow = @row if @row > @maxRow
|
|
44
|
+
when :row
|
|
45
|
+
@state = name
|
|
46
|
+
@row = nil
|
|
47
|
+
when :dimension
|
|
48
|
+
@state = name
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def end_element(name)
|
|
53
|
+
if name == :sheetData
|
|
54
|
+
@block.call(@maxRow, @maxColumn)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def attr(name, value)
|
|
59
|
+
if @state == :c && name == :r
|
|
60
|
+
@cell = value
|
|
61
|
+
elsif @state == :row && name == :r
|
|
62
|
+
@row = value.to_i
|
|
63
|
+
elsif @state == :dimension && name == :ref
|
|
64
|
+
_firstCell, lastCell = value.split(":")
|
|
65
|
+
|
|
66
|
+
if lastCell
|
|
67
|
+
@maxColumn = column_index(lastCell)
|
|
68
|
+
unless @trim_empty_rows
|
|
69
|
+
@maxRow = lastCell[/\d+$/].to_i
|
|
70
|
+
@block.call(@maxRow, @maxColumn)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
module Xsv
|
|
2
|
+
class SheetRowsHandler < Ox::Sax
|
|
3
|
+
include Xsv::Helpers
|
|
4
|
+
|
|
5
|
+
def format_cell
|
|
6
|
+
case @current_cell[:t]
|
|
7
|
+
when "s"
|
|
8
|
+
@workbook.shared_strings[@current_value.to_i]
|
|
9
|
+
when "str"
|
|
10
|
+
@current_value
|
|
11
|
+
when "e" # N/A
|
|
12
|
+
nil
|
|
13
|
+
when nil
|
|
14
|
+
if @current_value == ""
|
|
15
|
+
nil
|
|
16
|
+
elsif @current_cell[:s]
|
|
17
|
+
style = @workbook.xfs[@current_cell[:s].to_i]
|
|
18
|
+
numFmt = @workbook.numFmts[style[:numFmtId].to_i]
|
|
19
|
+
|
|
20
|
+
parse_number_format(@current_value, numFmt)
|
|
21
|
+
else
|
|
22
|
+
parse_number(@current_value)
|
|
23
|
+
end
|
|
24
|
+
else
|
|
25
|
+
raise Xsv::Error, "Encountered unknown column type #{@current_cell[:t]}"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Ox::Sax implementation below
|
|
30
|
+
|
|
31
|
+
def initialize(mode, empty_row, workbook, row_skip, last_row, &block)
|
|
32
|
+
@block = block
|
|
33
|
+
|
|
34
|
+
# :sheetData
|
|
35
|
+
# :row
|
|
36
|
+
# :c
|
|
37
|
+
# :v
|
|
38
|
+
@state = nil
|
|
39
|
+
|
|
40
|
+
@mode = mode
|
|
41
|
+
@empty_row = empty_row
|
|
42
|
+
@workbook = workbook
|
|
43
|
+
@row_skip = row_skip
|
|
44
|
+
@row_index = 0 - @row_skip
|
|
45
|
+
@current_row = {}
|
|
46
|
+
@current_row_attrs = {}
|
|
47
|
+
@current_cell = {}
|
|
48
|
+
@current_value = nil
|
|
49
|
+
@last_row = last_row
|
|
50
|
+
|
|
51
|
+
if @mode == :hash
|
|
52
|
+
@headers = @empty_row.keys
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def start_element(name)
|
|
57
|
+
case name
|
|
58
|
+
when :c
|
|
59
|
+
@state = name
|
|
60
|
+
@current_cell = {}
|
|
61
|
+
@current_value = ""
|
|
62
|
+
when :v
|
|
63
|
+
@state = name
|
|
64
|
+
when :row
|
|
65
|
+
@state = name
|
|
66
|
+
@current_row = @empty_row.dup
|
|
67
|
+
@current_row_attrs = {}
|
|
68
|
+
else
|
|
69
|
+
@state = nil
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def text(value)
|
|
74
|
+
if @state == :v
|
|
75
|
+
@current_value += value
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def attr(name, value)
|
|
80
|
+
case @state
|
|
81
|
+
when :c
|
|
82
|
+
@current_cell[name] = value
|
|
83
|
+
when :row
|
|
84
|
+
@current_row_attrs[name] = value
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def end_element(name)
|
|
89
|
+
case name
|
|
90
|
+
when :c
|
|
91
|
+
col_index = column_index(@current_cell[:r])
|
|
92
|
+
|
|
93
|
+
case @mode
|
|
94
|
+
when :array
|
|
95
|
+
@current_row[col_index] = format_cell
|
|
96
|
+
when :hash
|
|
97
|
+
@current_row[@headers[col_index]] = format_cell
|
|
98
|
+
end
|
|
99
|
+
when :row
|
|
100
|
+
if @row_index < 0
|
|
101
|
+
@row_index += 1
|
|
102
|
+
return
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
@row_index += 1
|
|
106
|
+
|
|
107
|
+
# Skip first row if we're in hash mode
|
|
108
|
+
return if @row_index == 1 && @mode == :hash
|
|
109
|
+
|
|
110
|
+
# Pad empty rows
|
|
111
|
+
while @row_index < @current_row_attrs[:r].to_i - @row_skip
|
|
112
|
+
@block.call(@empty_row)
|
|
113
|
+
@row_index += 1
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Do not return empty trailing rows
|
|
117
|
+
@block.call(@current_row) unless @row_index > @last_row - @row_skip
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
module Xsv
|
|
2
|
+
# StylesHandler interprets the relevant parts of styles.xml
|
|
3
|
+
class StylesHandler < Ox::Sax
|
|
4
|
+
def self.get_styles(io, numFmts)
|
|
5
|
+
@xfs = nil
|
|
6
|
+
@numFmts = nil
|
|
7
|
+
handler = new(numFmts) do |xfs, numFmts|
|
|
8
|
+
@xfs = xfs
|
|
9
|
+
@numFmts = numFmts
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
Ox.sax_parse(handler, io)
|
|
13
|
+
return @xfs, @numFmts
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Ox::Sax implementation
|
|
17
|
+
|
|
18
|
+
def initialize(numFmts, &block)
|
|
19
|
+
@block = block
|
|
20
|
+
@state = nil
|
|
21
|
+
@xfs = []
|
|
22
|
+
@numFmts = numFmts
|
|
23
|
+
|
|
24
|
+
@xf = {}
|
|
25
|
+
@numFmt = {}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def start_element(name)
|
|
29
|
+
case name
|
|
30
|
+
when :cellXfs, :numFmts
|
|
31
|
+
@state = name
|
|
32
|
+
when :xf
|
|
33
|
+
@xf = {}
|
|
34
|
+
when :numFmt
|
|
35
|
+
@numFmt = {}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def attr(name, value)
|
|
40
|
+
case @state
|
|
41
|
+
when :cellXfs
|
|
42
|
+
@xf[name] = value
|
|
43
|
+
when :numFmts
|
|
44
|
+
@numFmt[name] = value
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def end_element(name)
|
|
49
|
+
if @state == :cellXfs && name == :xf
|
|
50
|
+
@xfs << @xf
|
|
51
|
+
elsif @state == :numFmts && name == :numFmt
|
|
52
|
+
@numFmts[@numFmt[:numFmtId].to_i] = @numFmt[:formatCode]
|
|
53
|
+
elsif name == :styleSheet
|
|
54
|
+
@block.call(@xfs, @numFmts)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
data/lib/xsv/version.rb
CHANGED
data/lib/xsv/workbook.rb
CHANGED
|
@@ -4,22 +4,28 @@ require 'zip'
|
|
|
4
4
|
module Xsv
|
|
5
5
|
class Workbook
|
|
6
6
|
|
|
7
|
-
attr_reader :sheets, :shared_strings, :xfs, :numFmts
|
|
7
|
+
attr_reader :sheets, :shared_strings, :xfs, :numFmts, :trim_empty_rows
|
|
8
8
|
|
|
9
9
|
# Open the workbook of the given filename, string or buffer
|
|
10
|
-
def self.open(data)
|
|
10
|
+
def self.open(data, **kws)
|
|
11
11
|
if data.is_a?(IO)
|
|
12
|
-
@workbook = self.new(Zip::File.open_buffer(data))
|
|
12
|
+
@workbook = self.new(Zip::File.open_buffer(data), kws)
|
|
13
13
|
elsif data.start_with?("PK\x03\x04")
|
|
14
|
-
@workbook = self.new(Zip::File.open_buffer(data))
|
|
14
|
+
@workbook = self.new(Zip::File.open_buffer(data), kws)
|
|
15
15
|
else
|
|
16
|
-
@workbook = self.new(Zip::File.open(data))
|
|
16
|
+
@workbook = self.new(Zip::File.open(data), kws)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
# Open a workbook from an instance of Zip::File
|
|
21
|
-
|
|
21
|
+
#
|
|
22
|
+
# Options:
|
|
23
|
+
#
|
|
24
|
+
# trim_empty_rows (false) Scan sheet for end of content and don't return trailing rows
|
|
25
|
+
#
|
|
26
|
+
def initialize(zip, trim_empty_rows: false)
|
|
22
27
|
@zip = zip
|
|
28
|
+
@trim_empty_rows = trim_empty_rows
|
|
23
29
|
|
|
24
30
|
@sheets = []
|
|
25
31
|
@xfs = []
|
|
@@ -38,35 +44,22 @@ module Xsv
|
|
|
38
44
|
|
|
39
45
|
def fetch_shared_strings
|
|
40
46
|
stream = @zip.glob("xl/sharedStrings.xml").first.get_input_stream
|
|
41
|
-
|
|
42
|
-
expected_count = xml.at_css("sst")["uniqueCount"].to_i
|
|
43
|
-
@shared_strings = xml.css("sst si").map { |si| si.css("t").map(&:inner_text).join }
|
|
44
|
-
|
|
45
|
-
if @shared_strings.count != expected_count
|
|
46
|
-
raise Xsv::AssertionFailed, "Mismatch in shared strings count! #{expected_count} <> #{@shared_strings.count}"
|
|
47
|
-
end
|
|
47
|
+
@shared_strings = SharedStringsParser.parse(stream)
|
|
48
48
|
|
|
49
49
|
stream.close
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
def fetch_styles
|
|
53
53
|
stream = @zip.glob("xl/styles.xml").first.get_input_stream
|
|
54
|
-
xml = Nokogiri::XML(stream)
|
|
55
54
|
|
|
56
|
-
|
|
57
|
-
@xfs << xf.attributes.map { |k, v| [k.to_sym, v.value] }.to_h
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
xml.css("numFmts numFmt").each do |numFmt|
|
|
61
|
-
@numFmts[numFmt["numFmtId"].to_i] = numFmt["formatCode"]
|
|
62
|
-
end
|
|
55
|
+
@xfs, @numFmts = StylesHandler.get_styles(stream, @numFmts)
|
|
63
56
|
end
|
|
64
57
|
|
|
65
58
|
def fetch_sheets
|
|
66
59
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
|
|
67
60
|
a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
|
|
68
61
|
end.each do |entry|
|
|
69
|
-
@sheets << Xsv::Sheet.new(self,
|
|
62
|
+
@sheets << Xsv::Sheet.new(self, entry.get_input_stream)
|
|
70
63
|
end
|
|
71
64
|
end
|
|
72
65
|
end
|
data/xsv.gemspec
CHANGED
|
@@ -34,7 +34,7 @@ Gem::Specification.new do |spec|
|
|
|
34
34
|
spec.required_ruby_version = '~> 2.6'
|
|
35
35
|
|
|
36
36
|
spec.add_dependency "rubyzip", "~> 2.2"
|
|
37
|
-
spec.add_dependency "
|
|
37
|
+
spec.add_dependency "ox", "~> 2.13"
|
|
38
38
|
|
|
39
39
|
spec.add_development_dependency "bundler", "~> 1.17"
|
|
40
40
|
spec.add_development_dependency "rake", "~> 10.0"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-02-
|
|
11
|
+
date: 2020-02-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -25,19 +25,19 @@ dependencies:
|
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '2.2'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
28
|
+
name: ox
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - "~>"
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
33
|
+
version: '2.13'
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
40
|
+
version: '2.13'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: bundler
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -98,7 +98,11 @@ files:
|
|
|
98
98
|
- bin/setup
|
|
99
99
|
- lib/xsv.rb
|
|
100
100
|
- lib/xsv/helpers.rb
|
|
101
|
+
- lib/xsv/shared_strings_parser.rb
|
|
101
102
|
- lib/xsv/sheet.rb
|
|
103
|
+
- lib/xsv/sheet_bounds_handler.rb
|
|
104
|
+
- lib/xsv/sheet_rows_handler.rb
|
|
105
|
+
- lib/xsv/styles_handler.rb
|
|
102
106
|
- lib/xsv/version.rb
|
|
103
107
|
- lib/xsv/workbook.rb
|
|
104
108
|
- test.sh
|