xsv 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +40 -8
- data/lib/xsv/helpers.rb +71 -0
- data/lib/xsv/sheet.rb +60 -30
- data/lib/xsv/version.rb +1 -1
- data/lib/xsv/{file.rb → workbook.rb} +28 -9
- data/lib/xsv.rb +3 -1
- data/test.sh +3 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 47e4ee16a95b100a1c1bbc526912235bbd1386601a33e3c3449320fe4ea8bc52
|
|
4
|
+
data.tar.gz: 5b0f8320ff29a3dd036cf4396052cbcee00556c2f116dce63c81eb6f4bb69e2a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 40bde712c1df13d4fd330b24fdd3cb9e40e5983b4271c172b6f77245aa4d57b8aadc470be5faffb5c759e807c6bf5a507f87139279b86395cddf0fc70b5446fb
|
|
7
|
+
data.tar.gz: 943f5436b416f226f8bcfa2a856dc8079f061be20ca8cc534a9e147692906a280f85f6eb300b7c4e4681ce70a6c5bb81029c6abb70deab87950919e21f03104a
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# Xsv .xlsx reader
|
|
2
2
|
|
|
3
3
|
Xsv is a very basic parser for Excel files in the .xlsx format that strives to
|
|
4
|
-
provide feature parity with common CSV readers and nothing more. This
|
|
5
|
-
|
|
4
|
+
provide feature parity with common CSV readers and nothing more. This means
|
|
5
|
+
it only parses values to basic Ruby types and does not deal with formatting
|
|
6
|
+
or more advanced functionality. The goal is to allow for fast parsing of large
|
|
7
|
+
worksheets with minimal RAM and CPU consumption.
|
|
6
8
|
|
|
7
9
|
Xsv stands for 'Excel Separated Values' because Excel just gets in the way.
|
|
8
10
|
|
|
@@ -24,18 +26,48 @@ Or install it yourself as:
|
|
|
24
26
|
|
|
25
27
|
## Usage
|
|
26
28
|
|
|
29
|
+
Xsv has two modes of operation. By default it returns an array for
|
|
30
|
+
each row in the sheet:
|
|
31
|
+
|
|
27
32
|
```ruby
|
|
28
|
-
x = Xsv::
|
|
33
|
+
x = Xsv::Workbook.open("sheet.xlsx")
|
|
34
|
+
|
|
35
|
+
sheet = x.sheets[0]
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
# Iterate over rows
|
|
38
|
+
sheet.each_row do |row|
|
|
39
|
+
row # => ["header1", "header2"], etc.
|
|
32
40
|
end
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
41
|
+
|
|
42
|
+
# Access row by index (zero-based)
|
|
43
|
+
sheet[1] # => ["value1", "value2"]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Alternatively, it can load the headers from the first row and return a hash
|
|
47
|
+
for every row:
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
x = Xsv::Workbook.open("sheet.xlsx")
|
|
51
|
+
|
|
52
|
+
sheet = x.sheets[0]
|
|
53
|
+
|
|
54
|
+
sheet.mode # => :array
|
|
55
|
+
|
|
56
|
+
# Parse headers and switch to hash mode
|
|
57
|
+
sheet.parse_headers!
|
|
58
|
+
|
|
59
|
+
sheet.mode # => :hash
|
|
60
|
+
|
|
61
|
+
sheet.each_row do |row|
|
|
62
|
+
row # => {"header1" => "value1", "header2" => "value2"}, etc.
|
|
36
63
|
end
|
|
64
|
+
|
|
65
|
+
sheet[1] # => {"header1" => "value1", "header2" => "value2"}
|
|
37
66
|
```
|
|
38
67
|
|
|
68
|
+
Be aware that hash mode will lead to unpredictable results if you have multiple
|
|
69
|
+
columns with the same name!
|
|
70
|
+
|
|
39
71
|
## Development
|
|
40
72
|
|
|
41
73
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/xsv/helpers.rb
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
module Xsv
|
|
2
2
|
module Helpers
|
|
3
|
+
BUILT_IN_NUMBER_FORMATS = {
|
|
4
|
+
1 => "0",
|
|
5
|
+
2 => "0.00",
|
|
6
|
+
3 => "#, ##0",
|
|
7
|
+
4 => "#, ##0.00",
|
|
8
|
+
5 => "$#, ##0_);($#, ##0)",
|
|
9
|
+
6 => "$#, ##0_);[Red]($#, ##0)",
|
|
10
|
+
7 => "$#, ##0.00_);($#, ##0.00)",
|
|
11
|
+
8 => "$#, ##0.00_);[Red]($#, ##0.00)",
|
|
12
|
+
9 => "0%",
|
|
13
|
+
10 => "0.00%",
|
|
14
|
+
11 => "0.00E+00",
|
|
15
|
+
12 => "# ?/?",
|
|
16
|
+
13 => "# ??/??",
|
|
17
|
+
14 => "m/d/yyyy",
|
|
18
|
+
15 => "d-mmm-yy",
|
|
19
|
+
16 => "d-mmm",
|
|
20
|
+
17 => "mmm-yy",
|
|
21
|
+
18 => "h:mm AM/PM",
|
|
22
|
+
19 => "h:mm:ss AM/PM",
|
|
23
|
+
20 => "h:mm",
|
|
24
|
+
21 => "h:mm:ss",
|
|
25
|
+
22 => "m/d/yyyy h:mm",
|
|
26
|
+
37 => "#, ##0_);(#, ##0)",
|
|
27
|
+
38 => "#, ##0_);[Red](#, ##0)",
|
|
28
|
+
39 => "#, ##0.00_);(#, ##0.00)",
|
|
29
|
+
40 => "#, ##0.00_);[Red](#, ##0.00)",
|
|
30
|
+
45 => "mm:ss",
|
|
31
|
+
46 => "[h]:mm:ss",
|
|
32
|
+
47 => "mm:ss.0",
|
|
33
|
+
48 => "##0.0E+0",
|
|
34
|
+
49 => "@",
|
|
35
|
+
}
|
|
36
|
+
|
|
3
37
|
# Return the index number for the given Excel column name
|
|
4
38
|
def column_index(col)
|
|
5
39
|
val = 0
|
|
@@ -10,5 +44,42 @@ module Xsv
|
|
|
10
44
|
end
|
|
11
45
|
return val - 1
|
|
12
46
|
end
|
|
47
|
+
|
|
48
|
+
# Return a Date for the given Excel date value
|
|
49
|
+
def parse_date(number)
|
|
50
|
+
Date.new(1899, 12, 30) + number
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Return a time as a string for the given Excel time value
|
|
54
|
+
def parse_time(number)
|
|
55
|
+
base = number * 24
|
|
56
|
+
|
|
57
|
+
hours = base.truncate
|
|
58
|
+
minutes = (base - hours) * 60
|
|
59
|
+
|
|
60
|
+
"%02d:%02d" % [base, minutes.round]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def parse_number(string)
|
|
64
|
+
if string.include? "."
|
|
65
|
+
string.to_f
|
|
66
|
+
else
|
|
67
|
+
string.to_i
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Tests if the given format string is a date
|
|
72
|
+
def is_date_format?(format)
|
|
73
|
+
return false if format.nil?
|
|
74
|
+
# If it contains at least 2 sequences of d's, m's or y's it's a date!
|
|
75
|
+
format.scan(/[dmy]+/).length > 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Tests if the given format string is a time
|
|
79
|
+
def is_time_format?(format)
|
|
80
|
+
return false if format.nil?
|
|
81
|
+
# If it contains at least 2 sequences of h's, m's or s's it's a time!
|
|
82
|
+
format.scan(/[hms]+/).length > 1
|
|
83
|
+
end
|
|
13
84
|
end
|
|
14
85
|
end
|
data/lib/xsv/sheet.rb
CHANGED
|
@@ -2,25 +2,37 @@ module Xsv
|
|
|
2
2
|
class Sheet
|
|
3
3
|
include Xsv::Helpers
|
|
4
4
|
|
|
5
|
-
attr_reader :xml
|
|
5
|
+
attr_reader :xml, :mode
|
|
6
6
|
|
|
7
7
|
def initialize(workbook, xml)
|
|
8
8
|
@workbook = workbook
|
|
9
9
|
@xml = xml
|
|
10
10
|
@headers = []
|
|
11
|
+
|
|
12
|
+
# Determine number of columns
|
|
13
|
+
bounds = @xml.css("cols col").map { |c| [c["min"].to_i, c["max"].to_i] }.flatten
|
|
14
|
+
@column_count = (bounds.max - bounds.min) + 1
|
|
15
|
+
|
|
16
|
+
@mode = :array
|
|
11
17
|
end
|
|
12
18
|
|
|
13
19
|
def inspect
|
|
14
20
|
"#<#{self.class.name}:#{self.object_id}>"
|
|
15
21
|
end
|
|
16
22
|
|
|
17
|
-
# Iterate over rows
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@
|
|
23
|
+
# Iterate over rows
|
|
24
|
+
def each_row
|
|
25
|
+
row_index = 0
|
|
26
|
+
@xml.css("sheetData row").each do |row_xml|
|
|
27
|
+
row_index += 1
|
|
21
28
|
|
|
22
|
-
|
|
23
|
-
|
|
29
|
+
next if row_index == 1 && @mode == :hash
|
|
30
|
+
|
|
31
|
+
# pad empty rows
|
|
32
|
+
while row_index < row_xml["r"].to_i do
|
|
33
|
+
yield(empty_row)
|
|
34
|
+
row_index += 1
|
|
35
|
+
end
|
|
24
36
|
|
|
25
37
|
yield(parse_row(row_xml))
|
|
26
38
|
end
|
|
@@ -30,14 +42,23 @@ module Xsv
|
|
|
30
42
|
|
|
31
43
|
# Get row by number, starting at 0
|
|
32
44
|
def [](number)
|
|
33
|
-
|
|
45
|
+
row_xml = xml.css("sheetData row[r=#{number + 1}]").first
|
|
46
|
+
|
|
47
|
+
if row_xml
|
|
48
|
+
parse_row(row_xml)
|
|
49
|
+
else
|
|
50
|
+
empty_row
|
|
51
|
+
end
|
|
34
52
|
end
|
|
35
53
|
|
|
36
54
|
# Load headers in the top row of the worksheet. After parsing of headers
|
|
37
55
|
# all methods return hashes instead of arrays
|
|
38
56
|
def parse_headers!
|
|
57
|
+
@mode = :array
|
|
39
58
|
parse_headers
|
|
40
59
|
|
|
60
|
+
@mode = :hash
|
|
61
|
+
|
|
41
62
|
true
|
|
42
63
|
end
|
|
43
64
|
|
|
@@ -47,14 +68,17 @@ module Xsv
|
|
|
47
68
|
@headers = parse_row(@xml.css("sheetData row").first)
|
|
48
69
|
end
|
|
49
70
|
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
71
|
+
def empty_row
|
|
72
|
+
case @mode
|
|
73
|
+
when :array
|
|
74
|
+
[nil] * @column_count
|
|
75
|
+
when :hash
|
|
76
|
+
@headers.zip([]).to_h
|
|
55
77
|
end
|
|
78
|
+
end
|
|
56
79
|
|
|
57
|
-
|
|
80
|
+
def parse_row(xml)
|
|
81
|
+
row = empty_row
|
|
58
82
|
|
|
59
83
|
xml.css("c").each do |c_xml|
|
|
60
84
|
value = case c_xml["t"]
|
|
@@ -65,7 +89,23 @@ module Xsv
|
|
|
65
89
|
when "e" # N/A
|
|
66
90
|
nil
|
|
67
91
|
when nil
|
|
68
|
-
c_xml.css("v").inner_text
|
|
92
|
+
value = parse_number(c_xml.css("v").inner_text)
|
|
93
|
+
|
|
94
|
+
if c_xml["s"]
|
|
95
|
+
style = @workbook.xfs[c_xml["s"].to_i]
|
|
96
|
+
numFmtId = style[:numFmtId].to_i
|
|
97
|
+
if numFmtId == 0
|
|
98
|
+
value
|
|
99
|
+
elsif is_date_format?(@workbook.numFmts[numFmtId])
|
|
100
|
+
parse_date(value)
|
|
101
|
+
elsif is_time_format?(@workbook.numFmts[numFmtId])
|
|
102
|
+
parse_time(value)
|
|
103
|
+
else
|
|
104
|
+
value
|
|
105
|
+
end
|
|
106
|
+
else
|
|
107
|
+
value
|
|
108
|
+
end
|
|
69
109
|
else
|
|
70
110
|
raise Xsv::Error, "Encountered unknown column type #{c_xml["t"]}"
|
|
71
111
|
end
|
|
@@ -73,22 +113,12 @@ module Xsv
|
|
|
73
113
|
# Determine column position and pad row with nil values
|
|
74
114
|
col_index = column_index(c_xml["r"].scan(/^[A-Z]+/).first)
|
|
75
115
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
end
|
|
82
|
-
next_index += 1
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
if @headers.any?
|
|
86
|
-
row[@headers[next_index]] = value
|
|
87
|
-
else
|
|
88
|
-
row << value
|
|
116
|
+
case @mode
|
|
117
|
+
when :array
|
|
118
|
+
row[col_index] = value
|
|
119
|
+
when :hash
|
|
120
|
+
row[@headers[col_index]] = value
|
|
89
121
|
end
|
|
90
|
-
|
|
91
|
-
next_index += 1
|
|
92
122
|
end
|
|
93
123
|
|
|
94
124
|
row
|
data/lib/xsv/version.rb
CHANGED
|
@@ -2,14 +2,25 @@ require 'nokogiri'
|
|
|
2
2
|
require 'zip'
|
|
3
3
|
|
|
4
4
|
module Xsv
|
|
5
|
-
class
|
|
5
|
+
class Workbook
|
|
6
6
|
|
|
7
|
-
attr_reader :sheets, :shared_strings
|
|
7
|
+
attr_reader :sheets, :shared_strings, :xfs, :numFmts
|
|
8
|
+
|
|
9
|
+
# Open the workbook of the given filename
|
|
10
|
+
def self.open(file)
|
|
11
|
+
@workbook = self.new(Zip::File.open(file))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Open a workbook from an instance of Zip::File
|
|
15
|
+
def initialize(zip)
|
|
16
|
+
@zip = zip
|
|
8
17
|
|
|
9
|
-
def initialize(file)
|
|
10
|
-
@zip = Zip::File.open(file)
|
|
11
18
|
@sheets = []
|
|
19
|
+
@xfs = []
|
|
20
|
+
@numFmts = Xsv::Helpers::BUILT_IN_NUMBER_FORMATS
|
|
21
|
+
|
|
12
22
|
fetch_shared_strings
|
|
23
|
+
fetch_styles
|
|
13
24
|
fetch_sheets
|
|
14
25
|
end
|
|
15
26
|
|
|
@@ -17,11 +28,6 @@ module Xsv
|
|
|
17
28
|
"#<#{self.class.name}:#{self.object_id}>"
|
|
18
29
|
end
|
|
19
30
|
|
|
20
|
-
def close
|
|
21
|
-
# FIXME @sheets.each { |s| s.xml.close }
|
|
22
|
-
@zip.close
|
|
23
|
-
end
|
|
24
|
-
|
|
25
31
|
private
|
|
26
32
|
|
|
27
33
|
def fetch_shared_strings
|
|
@@ -37,6 +43,19 @@ module Xsv
|
|
|
37
43
|
stream.close
|
|
38
44
|
end
|
|
39
45
|
|
|
46
|
+
def fetch_styles
|
|
47
|
+
stream = @zip.glob("xl/styles.xml").first.get_input_stream
|
|
48
|
+
xml = Nokogiri::XML(stream)
|
|
49
|
+
|
|
50
|
+
xml.css("cellXfs xf").each do |xf|
|
|
51
|
+
@xfs << xf.attributes.map { |k, v| [k.to_sym, v.value] }.to_h
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
xml.css("numFmts numFmt").each do |numFmt|
|
|
55
|
+
@numFmts[numFmt["numFmtId"].to_i] = numFmt["formatCode"]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
40
59
|
def fetch_sheets
|
|
41
60
|
@zip.glob("xl/worksheets/sheet*.xml").sort do |entry|
|
|
42
61
|
entry.name.scan(/\d+/).first.to_i
|
data/lib/xsv.rb
CHANGED
data/test.sh
ADDED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martijn Storck
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-02-
|
|
11
|
+
date: 2020-02-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rubyzip
|
|
@@ -97,10 +97,11 @@ files:
|
|
|
97
97
|
- bin/console
|
|
98
98
|
- bin/setup
|
|
99
99
|
- lib/xsv.rb
|
|
100
|
-
- lib/xsv/file.rb
|
|
101
100
|
- lib/xsv/helpers.rb
|
|
102
101
|
- lib/xsv/sheet.rb
|
|
103
102
|
- lib/xsv/version.rb
|
|
103
|
+
- lib/xsv/workbook.rb
|
|
104
|
+
- test.sh
|
|
104
105
|
- xsv.gemspec
|
|
105
106
|
homepage: https://github.com/martijn/xsv
|
|
106
107
|
licenses:
|