saxlsx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/saxlsx/file_system.rb +7 -2
- data/lib/saxlsx/rows_collection.rb +9 -2
- data/lib/saxlsx/rows_collection_parser.rb +100 -13
- data/lib/saxlsx/style_collection.rb +15 -0
- data/lib/saxlsx/style_collection_parser.rb +52 -0
- data/lib/saxlsx/version.rb +1 -1
- data/lib/saxlsx/workbook.rb +5 -1
- data/spec/data/Spec.xlsx +0 -0
- data/spec/sheet_spec.rb +21 -9
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
|
4
|
+
data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
|
7
|
+
data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
|
data/lib/saxlsx/file_system.rb
CHANGED
@@ -26,8 +26,13 @@ module Saxlsx
|
|
26
26
|
@zip.read('xl/sharedStrings.xml')
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
@zip.
|
29
|
+
def styles
|
30
|
+
@zip.read('xl/styles.xml')
|
31
|
+
end
|
32
|
+
|
33
|
+
def sheet(i)
|
34
|
+
f = @zip.glob('xl/worksheets/sheet*.xml').sort[i]
|
35
|
+
@zip.read(f)
|
31
36
|
end
|
32
37
|
|
33
38
|
end
|
@@ -7,14 +7,21 @@ module Saxlsx
|
|
7
7
|
@index = index
|
8
8
|
@file_system = file_system
|
9
9
|
@shared_strings = shared_strings
|
10
|
+
@sheet = file_system.sheet(index)
|
10
11
|
end
|
11
12
|
|
12
13
|
def each(&block)
|
13
|
-
RowsCollectionParser.parse @index, @
|
14
|
+
RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
|
14
15
|
end
|
15
16
|
|
17
|
+
def count
|
18
|
+
@count ||= @sheet.match(/<dimension ref="[^:]+:[A-Z]*(\d+)"/)[1].to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :size :count
|
22
|
+
|
16
23
|
def [](value)
|
17
24
|
to_a[value]
|
18
25
|
end
|
19
26
|
end
|
20
|
-
end
|
27
|
+
end
|
@@ -1,12 +1,49 @@
|
|
1
1
|
module Saxlsx
|
2
2
|
class RowsCollectionParser < Ox::Sax
|
3
|
+
NUM_FORMATS = {
|
4
|
+
0 => :string, # General
|
5
|
+
1 => :fixnum, # 0
|
6
|
+
2 => :float, # 0.00
|
7
|
+
3 => :fixnum, # #,##0
|
8
|
+
4 => :float, # #,##0.00
|
9
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
10
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
11
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
12
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
13
|
+
9 => :percentage, # 0%
|
14
|
+
10 => :percentage, # 0.00%
|
15
|
+
11 => :bignum, # 0.00E+00
|
16
|
+
12 => :unsupported, # # ?/?
|
17
|
+
13 => :unsupported, # # ??/??
|
18
|
+
14 => :date, # mm-dd-yy
|
19
|
+
15 => :date, # d-mmm-yy
|
20
|
+
16 => :date, # d-mmm
|
21
|
+
17 => :date, # mmm-yy
|
22
|
+
18 => :time, # h:mm AM/PM
|
23
|
+
19 => :time, # h:mm:ss AM/PM
|
24
|
+
20 => :time, # h:mm
|
25
|
+
21 => :time, # h:mm:ss
|
26
|
+
22 => :date_time, # m/d/yy h:mm
|
27
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
28
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
29
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
30
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
31
|
+
45 => :time, # mm:ss
|
32
|
+
46 => :time, # [h]:mm:ss
|
33
|
+
47 => :time, # mmss.0
|
34
|
+
48 => :bignum, # ##0.0E+0
|
35
|
+
49 => :unsupported # @
|
36
|
+
}
|
3
37
|
|
4
|
-
|
5
|
-
|
38
|
+
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
39
|
+
|
40
|
+
def self.parse(index, data, workbook, &block)
|
41
|
+
SaxParser.parse self.new(workbook, &block), data
|
6
42
|
end
|
7
43
|
|
8
|
-
def initialize(
|
9
|
-
@shared_strings = shared_strings
|
44
|
+
def initialize(workbook, &block)
|
45
|
+
@shared_strings = workbook.shared_strings
|
46
|
+
@number_formats = workbook.number_formats
|
10
47
|
@block = block
|
11
48
|
end
|
12
49
|
|
@@ -18,7 +55,10 @@ module Saxlsx
|
|
18
55
|
@next_column = 'A'
|
19
56
|
end
|
20
57
|
|
21
|
-
|
58
|
+
if name == :c
|
59
|
+
@current_type = nil
|
60
|
+
@current_number_format = nil
|
61
|
+
end
|
22
62
|
end
|
23
63
|
|
24
64
|
def end_element(name)
|
@@ -30,8 +70,14 @@ module Saxlsx
|
|
30
70
|
|
31
71
|
def attr(name, value)
|
32
72
|
if @current_element == :c
|
33
|
-
|
34
|
-
|
73
|
+
case name
|
74
|
+
when :t
|
75
|
+
@current_type = value
|
76
|
+
when :r
|
77
|
+
@current_column = value.gsub(/\d/, '')
|
78
|
+
when :s
|
79
|
+
@current_number_format = detect_format_type(value.to_i)
|
80
|
+
end
|
35
81
|
end
|
36
82
|
end
|
37
83
|
|
@@ -50,17 +96,58 @@ module Saxlsx
|
|
50
96
|
|
51
97
|
def value_of(text)
|
52
98
|
case @current_type
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
99
|
+
when 's'
|
100
|
+
@shared_strings[text.to_i]
|
101
|
+
when 'b'
|
102
|
+
BooleanParser.parse text
|
103
|
+
else
|
104
|
+
case @current_number_format
|
105
|
+
when :date
|
106
|
+
DATE_SYSTEM_1900 + text.to_i
|
107
|
+
when :date_time
|
108
|
+
# Round time to seconds
|
109
|
+
date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
|
110
|
+
DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
|
111
|
+
when :fixnum
|
112
|
+
text.to_i
|
113
|
+
when :float
|
58
114
|
text.to_f
|
115
|
+
when :bignum
|
116
|
+
BigDecimal.new(text)
|
117
|
+
when :percentage
|
118
|
+
text.to_f / 100
|
59
119
|
else
|
60
|
-
|
120
|
+
if @current_type == 'n'
|
121
|
+
text.to_f
|
122
|
+
elsif text =~ /\A-?\d+(\.\d+(?:e[+-]\d+)?)?\Z/i # Auto convert numbers
|
123
|
+
$1 ? text.to_f : text.to_i
|
124
|
+
else
|
125
|
+
CGI.unescapeHTML(text)
|
126
|
+
end
|
127
|
+
end
|
61
128
|
end
|
129
|
+
end
|
62
130
|
|
131
|
+
def detect_format_type(index)
|
132
|
+
format = @number_formats[index]
|
133
|
+
NUM_FORMATS[format] || detect_custom_format_type(format)
|
63
134
|
end
|
64
135
|
|
136
|
+
# This is the least deterministic part of reading xlsx files. Due to
|
137
|
+
# custom styles, you can't know for sure when a date is a date other than
|
138
|
+
# looking at its format and gessing. It's not impossible to guess right,
|
139
|
+
# though.
|
140
|
+
#
|
141
|
+
# http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
|
142
|
+
def detect_custom_format_type(code)
|
143
|
+
code = code.gsub(/\[[^\]]+\]/, '') # Strip meta - [...]
|
144
|
+
if code =~ /0/
|
145
|
+
:float
|
146
|
+
elsif code =~ /[ymdhis]/i
|
147
|
+
:date_time
|
148
|
+
else
|
149
|
+
:unsupported
|
150
|
+
end
|
151
|
+
end
|
65
152
|
end
|
66
153
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Saxlsx
|
2
|
+
class StyleCollectionParser < Ox::Sax
|
3
|
+
def self.parse(file_system, &block)
|
4
|
+
SaxParser.parse self.new(&block), file_system.styles
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(&block)
|
8
|
+
@block = block
|
9
|
+
@cell_styles = false
|
10
|
+
@custom_num_fmts = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_element(name)
|
14
|
+
case name
|
15
|
+
when :cellXfs
|
16
|
+
@cell_styles = true
|
17
|
+
when :xf
|
18
|
+
@num_fmt_id = nil
|
19
|
+
when :numFmt
|
20
|
+
@num_fmt_id = nil
|
21
|
+
@num_fmt_code = nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def end_element(name)
|
26
|
+
case name
|
27
|
+
when :cellXfs
|
28
|
+
@cell_styles = false
|
29
|
+
when :xf
|
30
|
+
if @cell_styles
|
31
|
+
custom_num_fmt_code = @custom_num_fmts[@num_fmt_id]
|
32
|
+
if custom_num_fmt_code
|
33
|
+
@block.call custom_num_fmt_code
|
34
|
+
else
|
35
|
+
@block.call @num_fmt_id
|
36
|
+
end
|
37
|
+
end
|
38
|
+
when :numFmt
|
39
|
+
@custom_num_fmts[@num_fmt_id] = @num_fmt_code
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def attr(name, value)
|
44
|
+
case name
|
45
|
+
when :numFmtId
|
46
|
+
@num_fmt_id = value.to_i
|
47
|
+
when :formatCode
|
48
|
+
@num_fmt_code = value
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/saxlsx/version.rb
CHANGED
data/lib/saxlsx/workbook.rb
CHANGED
@@ -19,7 +19,7 @@ module Saxlsx
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def sheets(name=nil)
|
22
|
-
@sheets ||= SheetCollection.new(@file_system,
|
22
|
+
@sheets ||= SheetCollection.new(@file_system, self).to_a
|
23
23
|
name.nil? ? @sheets : @sheets.detect { |s| s.name == name }
|
24
24
|
end
|
25
25
|
|
@@ -31,6 +31,10 @@ module Saxlsx
|
|
31
31
|
@shared_strings ||= SharedStringCollection.new(@file_system).to_a
|
32
32
|
end
|
33
33
|
|
34
|
+
def number_formats
|
35
|
+
@number_formats ||= StyleCollection.new(@file_system).to_a
|
36
|
+
end
|
37
|
+
|
34
38
|
def to_csv(path)
|
35
39
|
sheets.each { |s| s.to_csv path }
|
36
40
|
end
|
data/spec/data/Spec.xlsx
CHANGED
Binary file
|
data/spec/sheet_spec.rb
CHANGED
@@ -12,11 +12,11 @@ describe Sheet do
|
|
12
12
|
|
13
13
|
it 'Rows count' do
|
14
14
|
Workbook.open filename do |w|
|
15
|
-
w.sheets[0].should
|
16
|
-
w.sheets[1].should
|
17
|
-
w.sheets[2].should
|
18
|
-
w.sheets[3].should
|
19
|
-
w.sheets[4].should
|
15
|
+
w.sheets[0].rows.count.should eq 7
|
16
|
+
w.sheets[1].rows.count.should eq 9
|
17
|
+
w.sheets[2].rows.count.should eq 3
|
18
|
+
w.sheets[3].rows.count.should eq 2
|
19
|
+
w.sheets[4].rows.count.should eq 3
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -29,8 +29,20 @@ describe Sheet do
|
|
29
29
|
it 'Rows content' do
|
30
30
|
Workbook.open filename do |w|
|
31
31
|
w.sheets[0].tap do |s|
|
32
|
-
s.rows[0].should eq [
|
33
|
-
|
32
|
+
s.rows[0].should eq [
|
33
|
+
'LevenshteinDistance',
|
34
|
+
3.14,
|
35
|
+
3,
|
36
|
+
DateTime.new(1970, 1, 1, 1, 0, 0),
|
37
|
+
DateTime.new(1970, 1, 1),
|
38
|
+
BigDecimal.new('3.4028236692093801E+38')
|
39
|
+
]
|
40
|
+
s.rows[1].should eq [
|
41
|
+
'Case sensitive',
|
42
|
+
false,
|
43
|
+
3.0,
|
44
|
+
DateTime.new(1970, 1, 1, 1, 0, 0)
|
45
|
+
]
|
34
46
|
s.rows[2].should eq ['Fields', 'Type', 'URL Mining']
|
35
47
|
s.rows[3].should eq ['autor', 'text', false]
|
36
48
|
s.rows[4].should eq ['texto', 'text', false]
|
@@ -68,8 +80,8 @@ describe Sheet do
|
|
68
80
|
w.sheets[0].to_csv tmp_path
|
69
81
|
|
70
82
|
csv = File.open(csv_file, 'r') { |f| f.readlines }
|
71
|
-
csv[0].should eq "
|
72
|
-
csv[1].should eq "
|
83
|
+
csv[0].should eq %{"LevenshteinDistance","3.14","3","1970-01-01T01:00:00+00:00","1970-01-01T00:00:00+00:00","0.34028236692093801E39"\n}
|
84
|
+
csv[1].should eq %{"Case sensitive","false","3.0","1970-01-01T01:00:00+00:00"\n}
|
73
85
|
csv[2].should eq "\"Fields\",\"Type\",\"URL Mining\"\n"
|
74
86
|
csv[3].should eq "\"autor\",\"text\",\"false\"\n"
|
75
87
|
csv[4].should eq "\"texto\",\"text\",\"false\"\n"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxlsx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edgars Beigarts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -119,6 +119,8 @@ files:
|
|
119
119
|
- lib/saxlsx/sheet.rb
|
120
120
|
- lib/saxlsx/sheet_collection.rb
|
121
121
|
- lib/saxlsx/sheet_collection_parser.rb
|
122
|
+
- lib/saxlsx/style_collection.rb
|
123
|
+
- lib/saxlsx/style_collection_parser.rb
|
122
124
|
- lib/saxlsx/version.rb
|
123
125
|
- lib/saxlsx/workbook.rb
|
124
126
|
- saxlsx.gemspec
|