saxlsx 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/saxlsx/file_system.rb +7 -2
- data/lib/saxlsx/rows_collection.rb +9 -2
- data/lib/saxlsx/rows_collection_parser.rb +100 -13
- data/lib/saxlsx/style_collection.rb +15 -0
- data/lib/saxlsx/style_collection_parser.rb +52 -0
- data/lib/saxlsx/version.rb +1 -1
- data/lib/saxlsx/workbook.rb +5 -1
- data/spec/data/Spec.xlsx +0 -0
- data/spec/sheet_spec.rb +21 -9
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
|
4
|
+
data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
|
7
|
+
data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
|
data/lib/saxlsx/file_system.rb
CHANGED
@@ -26,8 +26,13 @@ module Saxlsx
|
|
26
26
|
@zip.read('xl/sharedStrings.xml')
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
@zip.
|
29
|
+
def styles
|
30
|
+
@zip.read('xl/styles.xml')
|
31
|
+
end
|
32
|
+
|
33
|
+
def sheet(i)
|
34
|
+
f = @zip.glob('xl/worksheets/sheet*.xml').sort[i]
|
35
|
+
@zip.read(f)
|
31
36
|
end
|
32
37
|
|
33
38
|
end
|
@@ -7,14 +7,21 @@ module Saxlsx
|
|
7
7
|
@index = index
|
8
8
|
@file_system = file_system
|
9
9
|
@shared_strings = shared_strings
|
10
|
+
@sheet = file_system.sheet(index)
|
10
11
|
end
|
11
12
|
|
12
13
|
def each(&block)
|
13
|
-
RowsCollectionParser.parse @index, @
|
14
|
+
RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
|
14
15
|
end
|
15
16
|
|
17
|
+
def count
|
18
|
+
@count ||= @sheet.match(/<dimension ref="[^:]+:[A-Z]*(\d+)"/)[1].to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :size :count
|
22
|
+
|
16
23
|
def [](value)
|
17
24
|
to_a[value]
|
18
25
|
end
|
19
26
|
end
|
20
|
-
end
|
27
|
+
end
|
@@ -1,12 +1,49 @@
|
|
1
1
|
module Saxlsx
|
2
2
|
class RowsCollectionParser < Ox::Sax
|
3
|
+
NUM_FORMATS = {
|
4
|
+
0 => :string, # General
|
5
|
+
1 => :fixnum, # 0
|
6
|
+
2 => :float, # 0.00
|
7
|
+
3 => :fixnum, # #,##0
|
8
|
+
4 => :float, # #,##0.00
|
9
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
10
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
11
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
12
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
13
|
+
9 => :percentage, # 0%
|
14
|
+
10 => :percentage, # 0.00%
|
15
|
+
11 => :bignum, # 0.00E+00
|
16
|
+
12 => :unsupported, # # ?/?
|
17
|
+
13 => :unsupported, # # ??/??
|
18
|
+
14 => :date, # mm-dd-yy
|
19
|
+
15 => :date, # d-mmm-yy
|
20
|
+
16 => :date, # d-mmm
|
21
|
+
17 => :date, # mmm-yy
|
22
|
+
18 => :time, # h:mm AM/PM
|
23
|
+
19 => :time, # h:mm:ss AM/PM
|
24
|
+
20 => :time, # h:mm
|
25
|
+
21 => :time, # h:mm:ss
|
26
|
+
22 => :date_time, # m/d/yy h:mm
|
27
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
28
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
29
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
30
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
31
|
+
45 => :time, # mm:ss
|
32
|
+
46 => :time, # [h]:mm:ss
|
33
|
+
47 => :time, # mmss.0
|
34
|
+
48 => :bignum, # ##0.0E+0
|
35
|
+
49 => :unsupported # @
|
36
|
+
}
|
3
37
|
|
4
|
-
|
5
|
-
|
38
|
+
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
39
|
+
|
40
|
+
def self.parse(index, data, workbook, &block)
|
41
|
+
SaxParser.parse self.new(workbook, &block), data
|
6
42
|
end
|
7
43
|
|
8
|
-
def initialize(
|
9
|
-
@shared_strings = shared_strings
|
44
|
+
def initialize(workbook, &block)
|
45
|
+
@shared_strings = workbook.shared_strings
|
46
|
+
@number_formats = workbook.number_formats
|
10
47
|
@block = block
|
11
48
|
end
|
12
49
|
|
@@ -18,7 +55,10 @@ module Saxlsx
|
|
18
55
|
@next_column = 'A'
|
19
56
|
end
|
20
57
|
|
21
|
-
|
58
|
+
if name == :c
|
59
|
+
@current_type = nil
|
60
|
+
@current_number_format = nil
|
61
|
+
end
|
22
62
|
end
|
23
63
|
|
24
64
|
def end_element(name)
|
@@ -30,8 +70,14 @@ module Saxlsx
|
|
30
70
|
|
31
71
|
def attr(name, value)
|
32
72
|
if @current_element == :c
|
33
|
-
|
34
|
-
|
73
|
+
case name
|
74
|
+
when :t
|
75
|
+
@current_type = value
|
76
|
+
when :r
|
77
|
+
@current_column = value.gsub(/\d/, '')
|
78
|
+
when :s
|
79
|
+
@current_number_format = detect_format_type(value.to_i)
|
80
|
+
end
|
35
81
|
end
|
36
82
|
end
|
37
83
|
|
@@ -50,17 +96,58 @@ module Saxlsx
|
|
50
96
|
|
51
97
|
def value_of(text)
|
52
98
|
case @current_type
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
99
|
+
when 's'
|
100
|
+
@shared_strings[text.to_i]
|
101
|
+
when 'b'
|
102
|
+
BooleanParser.parse text
|
103
|
+
else
|
104
|
+
case @current_number_format
|
105
|
+
when :date
|
106
|
+
DATE_SYSTEM_1900 + text.to_i
|
107
|
+
when :date_time
|
108
|
+
# Round time to seconds
|
109
|
+
date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
|
110
|
+
DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
|
111
|
+
when :fixnum
|
112
|
+
text.to_i
|
113
|
+
when :float
|
58
114
|
text.to_f
|
115
|
+
when :bignum
|
116
|
+
BigDecimal.new(text)
|
117
|
+
when :percentage
|
118
|
+
text.to_f / 100
|
59
119
|
else
|
60
|
-
|
120
|
+
if @current_type == 'n'
|
121
|
+
text.to_f
|
122
|
+
elsif text =~ /\A-?\d+(\.\d+(?:e[+-]\d+)?)?\Z/i # Auto convert numbers
|
123
|
+
$1 ? text.to_f : text.to_i
|
124
|
+
else
|
125
|
+
CGI.unescapeHTML(text)
|
126
|
+
end
|
127
|
+
end
|
61
128
|
end
|
129
|
+
end
|
62
130
|
|
131
|
+
def detect_format_type(index)
|
132
|
+
format = @number_formats[index]
|
133
|
+
NUM_FORMATS[format] || detect_custom_format_type(format)
|
63
134
|
end
|
64
135
|
|
136
|
+
# This is the least deterministic part of reading xlsx files. Due to
|
137
|
+
# custom styles, you can't know for sure when a date is a date other than
|
138
|
+
# looking at its format and gessing. It's not impossible to guess right,
|
139
|
+
# though.
|
140
|
+
#
|
141
|
+
# http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
|
142
|
+
def detect_custom_format_type(code)
|
143
|
+
code = code.gsub(/\[[^\]]+\]/, '') # Strip meta - [...]
|
144
|
+
if code =~ /0/
|
145
|
+
:float
|
146
|
+
elsif code =~ /[ymdhis]/i
|
147
|
+
:date_time
|
148
|
+
else
|
149
|
+
:unsupported
|
150
|
+
end
|
151
|
+
end
|
65
152
|
end
|
66
153
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Saxlsx
|
2
|
+
class StyleCollectionParser < Ox::Sax
|
3
|
+
def self.parse(file_system, &block)
|
4
|
+
SaxParser.parse self.new(&block), file_system.styles
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(&block)
|
8
|
+
@block = block
|
9
|
+
@cell_styles = false
|
10
|
+
@custom_num_fmts = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_element(name)
|
14
|
+
case name
|
15
|
+
when :cellXfs
|
16
|
+
@cell_styles = true
|
17
|
+
when :xf
|
18
|
+
@num_fmt_id = nil
|
19
|
+
when :numFmt
|
20
|
+
@num_fmt_id = nil
|
21
|
+
@num_fmt_code = nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def end_element(name)
|
26
|
+
case name
|
27
|
+
when :cellXfs
|
28
|
+
@cell_styles = false
|
29
|
+
when :xf
|
30
|
+
if @cell_styles
|
31
|
+
custom_num_fmt_code = @custom_num_fmts[@num_fmt_id]
|
32
|
+
if custom_num_fmt_code
|
33
|
+
@block.call custom_num_fmt_code
|
34
|
+
else
|
35
|
+
@block.call @num_fmt_id
|
36
|
+
end
|
37
|
+
end
|
38
|
+
when :numFmt
|
39
|
+
@custom_num_fmts[@num_fmt_id] = @num_fmt_code
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def attr(name, value)
|
44
|
+
case name
|
45
|
+
when :numFmtId
|
46
|
+
@num_fmt_id = value.to_i
|
47
|
+
when :formatCode
|
48
|
+
@num_fmt_code = value
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/saxlsx/version.rb
CHANGED
data/lib/saxlsx/workbook.rb
CHANGED
@@ -19,7 +19,7 @@ module Saxlsx
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def sheets(name=nil)
|
22
|
-
@sheets ||= SheetCollection.new(@file_system,
|
22
|
+
@sheets ||= SheetCollection.new(@file_system, self).to_a
|
23
23
|
name.nil? ? @sheets : @sheets.detect { |s| s.name == name }
|
24
24
|
end
|
25
25
|
|
@@ -31,6 +31,10 @@ module Saxlsx
|
|
31
31
|
@shared_strings ||= SharedStringCollection.new(@file_system).to_a
|
32
32
|
end
|
33
33
|
|
34
|
+
def number_formats
|
35
|
+
@number_formats ||= StyleCollection.new(@file_system).to_a
|
36
|
+
end
|
37
|
+
|
34
38
|
def to_csv(path)
|
35
39
|
sheets.each { |s| s.to_csv path }
|
36
40
|
end
|
data/spec/data/Spec.xlsx
CHANGED
Binary file
|
data/spec/sheet_spec.rb
CHANGED
@@ -12,11 +12,11 @@ describe Sheet do
|
|
12
12
|
|
13
13
|
it 'Rows count' do
|
14
14
|
Workbook.open filename do |w|
|
15
|
-
w.sheets[0].should
|
16
|
-
w.sheets[1].should
|
17
|
-
w.sheets[2].should
|
18
|
-
w.sheets[3].should
|
19
|
-
w.sheets[4].should
|
15
|
+
w.sheets[0].rows.count.should eq 7
|
16
|
+
w.sheets[1].rows.count.should eq 9
|
17
|
+
w.sheets[2].rows.count.should eq 3
|
18
|
+
w.sheets[3].rows.count.should eq 2
|
19
|
+
w.sheets[4].rows.count.should eq 3
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -29,8 +29,20 @@ describe Sheet do
|
|
29
29
|
it 'Rows content' do
|
30
30
|
Workbook.open filename do |w|
|
31
31
|
w.sheets[0].tap do |s|
|
32
|
-
s.rows[0].should eq [
|
33
|
-
|
32
|
+
s.rows[0].should eq [
|
33
|
+
'LevenshteinDistance',
|
34
|
+
3.14,
|
35
|
+
3,
|
36
|
+
DateTime.new(1970, 1, 1, 1, 0, 0),
|
37
|
+
DateTime.new(1970, 1, 1),
|
38
|
+
BigDecimal.new('3.4028236692093801E+38')
|
39
|
+
]
|
40
|
+
s.rows[1].should eq [
|
41
|
+
'Case sensitive',
|
42
|
+
false,
|
43
|
+
3.0,
|
44
|
+
DateTime.new(1970, 1, 1, 1, 0, 0)
|
45
|
+
]
|
34
46
|
s.rows[2].should eq ['Fields', 'Type', 'URL Mining']
|
35
47
|
s.rows[3].should eq ['autor', 'text', false]
|
36
48
|
s.rows[4].should eq ['texto', 'text', false]
|
@@ -68,8 +80,8 @@ describe Sheet do
|
|
68
80
|
w.sheets[0].to_csv tmp_path
|
69
81
|
|
70
82
|
csv = File.open(csv_file, 'r') { |f| f.readlines }
|
71
|
-
csv[0].should eq "
|
72
|
-
csv[1].should eq "
|
83
|
+
csv[0].should eq %{"LevenshteinDistance","3.14","3","1970-01-01T01:00:00+00:00","1970-01-01T00:00:00+00:00","0.34028236692093801E39"\n}
|
84
|
+
csv[1].should eq %{"Case sensitive","false","3.0","1970-01-01T01:00:00+00:00"\n}
|
73
85
|
csv[2].should eq "\"Fields\",\"Type\",\"URL Mining\"\n"
|
74
86
|
csv[3].should eq "\"autor\",\"text\",\"false\"\n"
|
75
87
|
csv[4].should eq "\"texto\",\"text\",\"false\"\n"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxlsx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edgars Beigarts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -119,6 +119,8 @@ files:
|
|
119
119
|
- lib/saxlsx/sheet.rb
|
120
120
|
- lib/saxlsx/sheet_collection.rb
|
121
121
|
- lib/saxlsx/sheet_collection_parser.rb
|
122
|
+
- lib/saxlsx/style_collection.rb
|
123
|
+
- lib/saxlsx/style_collection_parser.rb
|
122
124
|
- lib/saxlsx/version.rb
|
123
125
|
- lib/saxlsx/workbook.rb
|
124
126
|
- saxlsx.gemspec
|