dullard 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +5 -0
- data/dullard.gemspec +5 -3
- data/lib/dullard/reader.rb +154 -14
- data/lib/dullard/version.rb +1 -1
- data/spec/dullard_spec.rb +73 -0
- data/spec/test.xlsx +0 -0
- metadata +29 -12
- data/specs/dullard_spec.rb +0 -34
- data/specs/test.xlsx +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d02ed091d258d5d690ba9e3ba350c392488664e
|
4
|
+
data.tar.gz: 9f61befa4e3003f7e469a26ff7c4412ffa4d6e55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87d7d94d530d337ba92623146e5d0a115e44b04d15b1ee91fcf4c49bc76e90ace74bbf29f6e707ad3853d3e314e000cb4a3ec96a9b256576755adaeacce8022c
|
7
|
+
data.tar.gz: 2d10be78032321162cc1e2416771946c02cecea129f2959f12ec9a3bc49e2e9d68563ac18b9896159fef1a097d1d90fa5bf6ac7698985a2ec1fc3afcf8248297
|
data/Rakefile
CHANGED
data/dullard.gemspec
CHANGED
@@ -6,6 +6,7 @@ Gem::Specification.new do |gem|
|
|
6
6
|
gem.email = ["ted@shlashdot.org"]
|
7
7
|
gem.summary = %q{A fast XLSX parser using Nokogiri}
|
8
8
|
gem.homepage = "http://github.com/thirtyseven/dullard"
|
9
|
+
gem.license = "MIT"
|
9
10
|
|
10
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
11
12
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,7 +15,8 @@ Gem::Specification.new do |gem|
|
|
14
15
|
gem.require_paths = ["lib"]
|
15
16
|
gem.version = Dullard::VERSION
|
16
17
|
|
17
|
-
gem.add_development_dependency "rspec", "~> 2.
|
18
|
-
gem.
|
19
|
-
gem.add_dependency "
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.14"
|
19
|
+
gem.add_development_dependency "rake", "~> 10.1"
|
20
|
+
gem.add_dependency "nokogiri", "~> 1.6"
|
21
|
+
gem.add_dependency "rubyzip", "~> 1.1"
|
20
22
|
end
|
data/lib/dullard/reader.rb
CHANGED
@@ -1,12 +1,86 @@
|
|
1
|
-
require 'zip/
|
1
|
+
require 'zip/filesystem'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
4
|
module Dullard; end
|
5
5
|
|
6
6
|
class Dullard::Workbook
|
7
|
-
|
7
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
8
|
+
# Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
|
9
|
+
FORMATS = {
|
10
|
+
'general' => :float,
|
11
|
+
'0' => :float,
|
12
|
+
'0.00' => :float,
|
13
|
+
'#,##0' => :float,
|
14
|
+
'#,##0.00' => :float,
|
15
|
+
'0%' => :percentage,
|
16
|
+
'0.00%' => :percentage,
|
17
|
+
'0.00E+00' => :float,
|
18
|
+
'# ?/?' => :float, #??? TODO:
|
19
|
+
'# ??/??' => :float, #??? TODO:
|
20
|
+
'mm-dd-yy' => :date,
|
21
|
+
'd-mmm-yy' => :date,
|
22
|
+
'd-mmm' => :date,
|
23
|
+
'mmm-yy' => :date,
|
24
|
+
'h:mm am/pm' => :date,
|
25
|
+
'h:mm:ss am/pm' => :date,
|
26
|
+
'h:mm' => :time,
|
27
|
+
'h:mm:ss' => :time,
|
28
|
+
'm/d/yy h:mm' => :date,
|
29
|
+
'#,##0 ;(#,##0)' => :float,
|
30
|
+
'#,##0 ;[red](#,##0)' => :float,
|
31
|
+
'#,##0.00;(#,##0.00)' => :float,
|
32
|
+
'#,##0.00;[red](#,##0.00)' => :float,
|
33
|
+
'mm:ss' => :time,
|
34
|
+
'[h]:mm:ss' => :time,
|
35
|
+
'mmss.0' => :time,
|
36
|
+
'##0.0e+0' => :float,
|
37
|
+
'@' => :float,
|
38
|
+
#-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
|
39
|
+
"yyyy\\-mm\\-dd" => :date,
|
40
|
+
'dd/mm/yy' => :date,
|
41
|
+
'hh:mm:ss' => :time,
|
42
|
+
"dd/mm/yy\\ hh:mm" => :datetime,
|
43
|
+
'm/d/yy' => :date,
|
44
|
+
'mm/dd/yy' => :date,
|
45
|
+
'mm/dd/yyyy' => :date,
|
46
|
+
}
|
47
|
+
|
48
|
+
STANDARD_FORMATS = {
|
49
|
+
0 => 'General',
|
50
|
+
1 => '0',
|
51
|
+
2 => '0.00',
|
52
|
+
3 => '#,##0',
|
53
|
+
4 => '#,##0.00',
|
54
|
+
9 => '0%',
|
55
|
+
10 => '0.00%',
|
56
|
+
11 => '0.00E+00',
|
57
|
+
12 => '# ?/?',
|
58
|
+
13 => '# ??/??',
|
59
|
+
14 => 'mm-dd-yy',
|
60
|
+
15 => 'd-mmm-yy',
|
61
|
+
16 => 'd-mmm',
|
62
|
+
17 => 'mmm-yy',
|
63
|
+
18 => 'h:mm AM/PM',
|
64
|
+
19 => 'h:mm:ss AM/PM',
|
65
|
+
20 => 'h:mm',
|
66
|
+
21 => 'h:mm:ss',
|
67
|
+
22 => 'm/d/yy h:mm',
|
68
|
+
37 => '#,##0 ;(#,##0)',
|
69
|
+
38 => '#,##0 ;[Red](#,##0)',
|
70
|
+
39 => '#,##0.00;(#,##0.00)',
|
71
|
+
40 => '#,##0.00;[Red](#,##0.00)',
|
72
|
+
45 => 'mm:ss',
|
73
|
+
46 => '[h]:mm:ss',
|
74
|
+
47 => 'mmss.0',
|
75
|
+
48 => '##0.0E+0',
|
76
|
+
49 => '@',
|
77
|
+
}
|
78
|
+
|
79
|
+
def initialize(file, user_defined_formats = {})
|
8
80
|
@file = file
|
9
|
-
@zipfs = Zip::
|
81
|
+
@zipfs = Zip::File.open(@file)
|
82
|
+
@user_defined_formats = user_defined_formats
|
83
|
+
read_styles
|
10
84
|
end
|
11
85
|
|
12
86
|
def sheets
|
@@ -33,6 +107,51 @@ class Dullard::Workbook
|
|
33
107
|
@string_table
|
34
108
|
end
|
35
109
|
|
110
|
+
def read_styles
|
111
|
+
doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
|
112
|
+
|
113
|
+
@num_formats = {}
|
114
|
+
@cell_xfs = []
|
115
|
+
|
116
|
+
doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
|
117
|
+
numFmtId = numFmt.attributes['numFmtId'].value.to_i
|
118
|
+
formatCode = numFmt.attributes['formatCode'].value
|
119
|
+
@num_formats[numFmtId] = formatCode
|
120
|
+
end
|
121
|
+
|
122
|
+
doc.css('/styleSheet/cellXfs/xf').each do |xf|
|
123
|
+
numFmtId = xf.attributes['numFmtId'].value.to_i
|
124
|
+
@cell_xfs << numFmtId
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
130
|
+
# convert internal excelx attribute to a format
|
131
|
+
def attribute2format(s)
|
132
|
+
id = @cell_xfs[s.to_i].to_i
|
133
|
+
result = @num_formats[id]
|
134
|
+
|
135
|
+
if result == nil
|
136
|
+
if STANDARD_FORMATS.has_key? id
|
137
|
+
result = STANDARD_FORMATS[id]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
result.downcase
|
142
|
+
end
|
143
|
+
|
144
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
145
|
+
def format2type(format)
|
146
|
+
if FORMATS.has_key? format
|
147
|
+
FORMATS[format]
|
148
|
+
elsif @user_defined_formats.has_key? format
|
149
|
+
@user_defined_formats[format]
|
150
|
+
else
|
151
|
+
:float
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
36
155
|
def zipfs
|
37
156
|
@zipfs
|
38
157
|
end
|
@@ -57,12 +176,13 @@ class Dullard::Sheet
|
|
57
176
|
end
|
58
177
|
|
59
178
|
def rows
|
60
|
-
Enumerator.new(
|
179
|
+
Enumerator.new(row_count) do |y|
|
61
180
|
next unless @file
|
62
181
|
@file.rewind
|
63
182
|
shared = false
|
64
183
|
row = nil
|
65
184
|
column = nil
|
185
|
+
cell_type = nil
|
66
186
|
Nokogiri::XML::Reader(@file).each do |node|
|
67
187
|
case node.node_type
|
68
188
|
when Nokogiri::XML::Reader::TYPE_ELEMENT
|
@@ -72,6 +192,11 @@ class Dullard::Sheet
|
|
72
192
|
column = 0
|
73
193
|
next
|
74
194
|
when "c"
|
195
|
+
if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
|
196
|
+
cell_format_index = node.attributes['s'].to_i
|
197
|
+
cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
|
198
|
+
end
|
199
|
+
|
75
200
|
rcolumn = node.attributes["r"]
|
76
201
|
if rcolumn
|
77
202
|
rcolumn.delete!("0-9")
|
@@ -91,7 +216,21 @@ class Dullard::Sheet
|
|
91
216
|
end
|
92
217
|
end
|
93
218
|
value = node.value
|
219
|
+
|
94
220
|
if value
|
221
|
+
case cell_type
|
222
|
+
when :datetime
|
223
|
+
when :time
|
224
|
+
when :date
|
225
|
+
value = (DateTime.new(1899,12,30) + value.to_f)
|
226
|
+
when :percentage # ? TODO
|
227
|
+
when :float
|
228
|
+
value = value.to_f
|
229
|
+
else
|
230
|
+
# leave as string
|
231
|
+
end
|
232
|
+
cell_type = nil
|
233
|
+
|
95
234
|
row << (shared ? string_lookup(value.to_i) : value)
|
96
235
|
end
|
97
236
|
end
|
@@ -113,14 +252,9 @@ class Dullard::Sheet
|
|
113
252
|
end
|
114
253
|
end
|
115
254
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
def rows_size
|
122
|
-
if defined? @rows_size
|
123
|
-
@rows_size
|
255
|
+
def row_count
|
256
|
+
if defined? @row_count
|
257
|
+
@row_count
|
124
258
|
elsif @file
|
125
259
|
@file.rewind
|
126
260
|
Nokogiri::XML::Reader(@file).each do |node|
|
@@ -128,13 +262,19 @@ class Dullard::Sheet
|
|
128
262
|
case node.name
|
129
263
|
when "dimension"
|
130
264
|
if ref = node.attributes["ref"]
|
131
|
-
break @
|
265
|
+
break @row_count = ref.scan(/\d+$/).first.to_i
|
132
266
|
end
|
133
267
|
when "sheetData"
|
134
|
-
break @
|
268
|
+
break @row_count = nil
|
135
269
|
end
|
136
270
|
end
|
137
271
|
end
|
138
272
|
end
|
139
273
|
end
|
274
|
+
|
275
|
+
private
|
276
|
+
def path
|
277
|
+
"xl/worksheets/sheet#{@index}.xml"
|
278
|
+
end
|
279
|
+
|
140
280
|
end
|
data/lib/dullard/version.rb
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'dullard'
|
2
|
+
|
3
|
+
describe "dullard," do
|
4
|
+
before(:each) do
|
5
|
+
@file = File.open(File.expand_path("../test.xlsx", __FILE__))
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "when it has no user defined formats," do
|
9
|
+
before(:each) do
|
10
|
+
@xlsx = Dullard::Workbook.new @file
|
11
|
+
end
|
12
|
+
|
13
|
+
it "can open a file" do
|
14
|
+
@xlsx.should_not be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it "can find sheets" do
|
18
|
+
@xlsx.sheets.count.should == 1
|
19
|
+
end
|
20
|
+
|
21
|
+
it "reads the right number of columns, even with blanks" do
|
22
|
+
rows = @xlsx.sheets[0].rows
|
23
|
+
rows.next.count.should == 300
|
24
|
+
rows.next.count.should == 9
|
25
|
+
rows.next.count.should == 1
|
26
|
+
end
|
27
|
+
|
28
|
+
it "reads the right number of rows" do
|
29
|
+
@xlsx.sheets[0].row_count.should == 117
|
30
|
+
end
|
31
|
+
|
32
|
+
it "reads the right number of rows from the metadata when present" do
|
33
|
+
@xlsx.sheets[0].rows.size.should == 117
|
34
|
+
end
|
35
|
+
|
36
|
+
it "reads date/time properly" do
|
37
|
+
count = 0
|
38
|
+
@xlsx.sheets[0].rows.each do |row|
|
39
|
+
count += 1
|
40
|
+
|
41
|
+
if count == 116
|
42
|
+
row[0].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
|
43
|
+
row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
|
44
|
+
row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
|
45
|
+
row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
count.should == 117
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "when it has user defined formats," do
|
53
|
+
before(:each) do
|
54
|
+
@xlsx = Dullard::Workbook.new @file, {'GENERAL' => :string, 'm/d/yyyy' => :date, 'M/D/YYYY' => :date,}
|
55
|
+
end
|
56
|
+
|
57
|
+
it "converts the user defined formatted cells properly" do
|
58
|
+
count = 0
|
59
|
+
@xlsx.sheets[0].rows.each do |row|
|
60
|
+
count += 1
|
61
|
+
|
62
|
+
if count == 117
|
63
|
+
row[0].should == 'teststring'
|
64
|
+
row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
|
65
|
+
row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
|
66
|
+
row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
|
67
|
+
row[4].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
count.should == 117
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/spec/test.xlsx
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dullard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ted Kaplan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -16,42 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2.
|
19
|
+
version: '2.14'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: nokogiri
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ~>
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
47
|
+
version: '1.6'
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - ~>
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
54
|
+
version: '1.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rubyzip
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - ~>
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
61
|
+
version: '1.1'
|
48
62
|
type: :runtime
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - ~>
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
68
|
+
version: '1.1'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- ted@shlashdot.org
|
@@ -68,10 +82,11 @@ files:
|
|
68
82
|
- lib/dullard.rb
|
69
83
|
- lib/dullard/reader.rb
|
70
84
|
- lib/dullard/version.rb
|
71
|
-
-
|
72
|
-
-
|
85
|
+
- spec/dullard_spec.rb
|
86
|
+
- spec/test.xlsx
|
73
87
|
homepage: http://github.com/thirtyseven/dullard
|
74
|
-
licenses:
|
88
|
+
licenses:
|
89
|
+
- MIT
|
75
90
|
metadata: {}
|
76
91
|
post_install_message:
|
77
92
|
rdoc_options: []
|
@@ -93,4 +108,6 @@ rubygems_version: 2.0.5
|
|
93
108
|
signing_key:
|
94
109
|
specification_version: 4
|
95
110
|
summary: A fast XLSX parser using Nokogiri
|
96
|
-
test_files:
|
111
|
+
test_files:
|
112
|
+
- spec/dullard_spec.rb
|
113
|
+
- spec/test.xlsx
|
data/specs/dullard_spec.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'dullard'
|
2
|
-
|
3
|
-
describe "dullard" do
|
4
|
-
before(:each) do
|
5
|
-
@file = File.open(File.expand_path("../test.xlsx", __FILE__))
|
6
|
-
@xlsx = Dullard::Workbook.new @file
|
7
|
-
end
|
8
|
-
it "can open a file" do
|
9
|
-
@xlsx.should_not be_nil
|
10
|
-
end
|
11
|
-
|
12
|
-
it "can find sheets" do
|
13
|
-
@xlsx.sheets.count.should == 1
|
14
|
-
end
|
15
|
-
|
16
|
-
it "reads the right number of columns, even with blanks" do
|
17
|
-
rows = @xlsx.sheets[0].rows
|
18
|
-
rows.next.count.should == 300
|
19
|
-
rows.next.count.should == 9
|
20
|
-
rows.next.count.should == 1
|
21
|
-
end
|
22
|
-
|
23
|
-
it "reads the right number of rows" do
|
24
|
-
count = 0
|
25
|
-
@xlsx.sheets[0].rows.each do |row|
|
26
|
-
count += 1
|
27
|
-
end
|
28
|
-
count.should == 115
|
29
|
-
end
|
30
|
-
|
31
|
-
it "reads the right number of rows from the metadata when present" do
|
32
|
-
@xlsx.sheets[0].rows.size.should == 115
|
33
|
-
end
|
34
|
-
end
|
data/specs/test.xlsx
DELETED
Binary file
|