dullard 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +5 -0
- data/dullard.gemspec +5 -3
- data/lib/dullard/reader.rb +154 -14
- data/lib/dullard/version.rb +1 -1
- data/spec/dullard_spec.rb +73 -0
- data/spec/test.xlsx +0 -0
- metadata +29 -12
- data/specs/dullard_spec.rb +0 -34
- data/specs/test.xlsx +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d02ed091d258d5d690ba9e3ba350c392488664e
|
4
|
+
data.tar.gz: 9f61befa4e3003f7e469a26ff7c4412ffa4d6e55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87d7d94d530d337ba92623146e5d0a115e44b04d15b1ee91fcf4c49bc76e90ace74bbf29f6e707ad3853d3e314e000cb4a3ec96a9b256576755adaeacce8022c
|
7
|
+
data.tar.gz: 2d10be78032321162cc1e2416771946c02cecea129f2959f12ec9a3bc49e2e9d68563ac18b9896159fef1a097d1d90fa5bf6ac7698985a2ec1fc3afcf8248297
|
data/Rakefile
CHANGED
data/dullard.gemspec
CHANGED
@@ -6,6 +6,7 @@ Gem::Specification.new do |gem|
|
|
6
6
|
gem.email = ["ted@shlashdot.org"]
|
7
7
|
gem.summary = %q{A fast XLSX parser using Nokogiri}
|
8
8
|
gem.homepage = "http://github.com/thirtyseven/dullard"
|
9
|
+
gem.license = "MIT"
|
9
10
|
|
10
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
11
12
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,7 +15,8 @@ Gem::Specification.new do |gem|
|
|
14
15
|
gem.require_paths = ["lib"]
|
15
16
|
gem.version = Dullard::VERSION
|
16
17
|
|
17
|
-
gem.add_development_dependency "rspec", "~> 2.
|
18
|
-
gem.
|
19
|
-
gem.add_dependency "
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.14"
|
19
|
+
gem.add_development_dependency "rake", "~> 10.1"
|
20
|
+
gem.add_dependency "nokogiri", "~> 1.6"
|
21
|
+
gem.add_dependency "rubyzip", "~> 1.1"
|
20
22
|
end
|
data/lib/dullard/reader.rb
CHANGED
@@ -1,12 +1,86 @@
|
|
1
|
-
require 'zip/
|
1
|
+
require 'zip/filesystem'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
4
|
module Dullard; end
|
5
5
|
|
6
6
|
class Dullard::Workbook
|
7
|
-
|
7
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
8
|
+
# Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
|
9
|
+
FORMATS = {
|
10
|
+
'general' => :float,
|
11
|
+
'0' => :float,
|
12
|
+
'0.00' => :float,
|
13
|
+
'#,##0' => :float,
|
14
|
+
'#,##0.00' => :float,
|
15
|
+
'0%' => :percentage,
|
16
|
+
'0.00%' => :percentage,
|
17
|
+
'0.00E+00' => :float,
|
18
|
+
'# ?/?' => :float, #??? TODO:
|
19
|
+
'# ??/??' => :float, #??? TODO:
|
20
|
+
'mm-dd-yy' => :date,
|
21
|
+
'd-mmm-yy' => :date,
|
22
|
+
'd-mmm' => :date,
|
23
|
+
'mmm-yy' => :date,
|
24
|
+
'h:mm am/pm' => :date,
|
25
|
+
'h:mm:ss am/pm' => :date,
|
26
|
+
'h:mm' => :time,
|
27
|
+
'h:mm:ss' => :time,
|
28
|
+
'm/d/yy h:mm' => :date,
|
29
|
+
'#,##0 ;(#,##0)' => :float,
|
30
|
+
'#,##0 ;[red](#,##0)' => :float,
|
31
|
+
'#,##0.00;(#,##0.00)' => :float,
|
32
|
+
'#,##0.00;[red](#,##0.00)' => :float,
|
33
|
+
'mm:ss' => :time,
|
34
|
+
'[h]:mm:ss' => :time,
|
35
|
+
'mmss.0' => :time,
|
36
|
+
'##0.0e+0' => :float,
|
37
|
+
'@' => :float,
|
38
|
+
#-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
|
39
|
+
"yyyy\\-mm\\-dd" => :date,
|
40
|
+
'dd/mm/yy' => :date,
|
41
|
+
'hh:mm:ss' => :time,
|
42
|
+
"dd/mm/yy\\ hh:mm" => :datetime,
|
43
|
+
'm/d/yy' => :date,
|
44
|
+
'mm/dd/yy' => :date,
|
45
|
+
'mm/dd/yyyy' => :date,
|
46
|
+
}
|
47
|
+
|
48
|
+
STANDARD_FORMATS = {
|
49
|
+
0 => 'General',
|
50
|
+
1 => '0',
|
51
|
+
2 => '0.00',
|
52
|
+
3 => '#,##0',
|
53
|
+
4 => '#,##0.00',
|
54
|
+
9 => '0%',
|
55
|
+
10 => '0.00%',
|
56
|
+
11 => '0.00E+00',
|
57
|
+
12 => '# ?/?',
|
58
|
+
13 => '# ??/??',
|
59
|
+
14 => 'mm-dd-yy',
|
60
|
+
15 => 'd-mmm-yy',
|
61
|
+
16 => 'd-mmm',
|
62
|
+
17 => 'mmm-yy',
|
63
|
+
18 => 'h:mm AM/PM',
|
64
|
+
19 => 'h:mm:ss AM/PM',
|
65
|
+
20 => 'h:mm',
|
66
|
+
21 => 'h:mm:ss',
|
67
|
+
22 => 'm/d/yy h:mm',
|
68
|
+
37 => '#,##0 ;(#,##0)',
|
69
|
+
38 => '#,##0 ;[Red](#,##0)',
|
70
|
+
39 => '#,##0.00;(#,##0.00)',
|
71
|
+
40 => '#,##0.00;[Red](#,##0.00)',
|
72
|
+
45 => 'mm:ss',
|
73
|
+
46 => '[h]:mm:ss',
|
74
|
+
47 => 'mmss.0',
|
75
|
+
48 => '##0.0E+0',
|
76
|
+
49 => '@',
|
77
|
+
}
|
78
|
+
|
79
|
+
def initialize(file, user_defined_formats = {})
|
8
80
|
@file = file
|
9
|
-
@zipfs = Zip::
|
81
|
+
@zipfs = Zip::File.open(@file)
|
82
|
+
@user_defined_formats = user_defined_formats
|
83
|
+
read_styles
|
10
84
|
end
|
11
85
|
|
12
86
|
def sheets
|
@@ -33,6 +107,51 @@ class Dullard::Workbook
|
|
33
107
|
@string_table
|
34
108
|
end
|
35
109
|
|
110
|
+
def read_styles
|
111
|
+
doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
|
112
|
+
|
113
|
+
@num_formats = {}
|
114
|
+
@cell_xfs = []
|
115
|
+
|
116
|
+
doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
|
117
|
+
numFmtId = numFmt.attributes['numFmtId'].value.to_i
|
118
|
+
formatCode = numFmt.attributes['formatCode'].value
|
119
|
+
@num_formats[numFmtId] = formatCode
|
120
|
+
end
|
121
|
+
|
122
|
+
doc.css('/styleSheet/cellXfs/xf').each do |xf|
|
123
|
+
numFmtId = xf.attributes['numFmtId'].value.to_i
|
124
|
+
@cell_xfs << numFmtId
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
130
|
+
# convert internal excelx attribute to a format
|
131
|
+
def attribute2format(s)
|
132
|
+
id = @cell_xfs[s.to_i].to_i
|
133
|
+
result = @num_formats[id]
|
134
|
+
|
135
|
+
if result == nil
|
136
|
+
if STANDARD_FORMATS.has_key? id
|
137
|
+
result = STANDARD_FORMATS[id]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
result.downcase
|
142
|
+
end
|
143
|
+
|
144
|
+
# Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
|
145
|
+
def format2type(format)
|
146
|
+
if FORMATS.has_key? format
|
147
|
+
FORMATS[format]
|
148
|
+
elsif @user_defined_formats.has_key? format
|
149
|
+
@user_defined_formats[format]
|
150
|
+
else
|
151
|
+
:float
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
36
155
|
def zipfs
|
37
156
|
@zipfs
|
38
157
|
end
|
@@ -57,12 +176,13 @@ class Dullard::Sheet
|
|
57
176
|
end
|
58
177
|
|
59
178
|
def rows
|
60
|
-
Enumerator.new(
|
179
|
+
Enumerator.new(row_count) do |y|
|
61
180
|
next unless @file
|
62
181
|
@file.rewind
|
63
182
|
shared = false
|
64
183
|
row = nil
|
65
184
|
column = nil
|
185
|
+
cell_type = nil
|
66
186
|
Nokogiri::XML::Reader(@file).each do |node|
|
67
187
|
case node.node_type
|
68
188
|
when Nokogiri::XML::Reader::TYPE_ELEMENT
|
@@ -72,6 +192,11 @@ class Dullard::Sheet
|
|
72
192
|
column = 0
|
73
193
|
next
|
74
194
|
when "c"
|
195
|
+
if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
|
196
|
+
cell_format_index = node.attributes['s'].to_i
|
197
|
+
cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
|
198
|
+
end
|
199
|
+
|
75
200
|
rcolumn = node.attributes["r"]
|
76
201
|
if rcolumn
|
77
202
|
rcolumn.delete!("0-9")
|
@@ -91,7 +216,21 @@ class Dullard::Sheet
|
|
91
216
|
end
|
92
217
|
end
|
93
218
|
value = node.value
|
219
|
+
|
94
220
|
if value
|
221
|
+
case cell_type
|
222
|
+
when :datetime
|
223
|
+
when :time
|
224
|
+
when :date
|
225
|
+
value = (DateTime.new(1899,12,30) + value.to_f)
|
226
|
+
when :percentage # ? TODO
|
227
|
+
when :float
|
228
|
+
value = value.to_f
|
229
|
+
else
|
230
|
+
# leave as string
|
231
|
+
end
|
232
|
+
cell_type = nil
|
233
|
+
|
95
234
|
row << (shared ? string_lookup(value.to_i) : value)
|
96
235
|
end
|
97
236
|
end
|
@@ -113,14 +252,9 @@ class Dullard::Sheet
|
|
113
252
|
end
|
114
253
|
end
|
115
254
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
def rows_size
|
122
|
-
if defined? @rows_size
|
123
|
-
@rows_size
|
255
|
+
def row_count
|
256
|
+
if defined? @row_count
|
257
|
+
@row_count
|
124
258
|
elsif @file
|
125
259
|
@file.rewind
|
126
260
|
Nokogiri::XML::Reader(@file).each do |node|
|
@@ -128,13 +262,19 @@ class Dullard::Sheet
|
|
128
262
|
case node.name
|
129
263
|
when "dimension"
|
130
264
|
if ref = node.attributes["ref"]
|
131
|
-
break @
|
265
|
+
break @row_count = ref.scan(/\d+$/).first.to_i
|
132
266
|
end
|
133
267
|
when "sheetData"
|
134
|
-
break @
|
268
|
+
break @row_count = nil
|
135
269
|
end
|
136
270
|
end
|
137
271
|
end
|
138
272
|
end
|
139
273
|
end
|
274
|
+
|
275
|
+
private
|
276
|
+
def path
|
277
|
+
"xl/worksheets/sheet#{@index}.xml"
|
278
|
+
end
|
279
|
+
|
140
280
|
end
|
data/lib/dullard/version.rb
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'dullard'
|
2
|
+
|
3
|
+
describe "dullard," do
|
4
|
+
before(:each) do
|
5
|
+
@file = File.open(File.expand_path("../test.xlsx", __FILE__))
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "when it has no user defined formats," do
|
9
|
+
before(:each) do
|
10
|
+
@xlsx = Dullard::Workbook.new @file
|
11
|
+
end
|
12
|
+
|
13
|
+
it "can open a file" do
|
14
|
+
@xlsx.should_not be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it "can find sheets" do
|
18
|
+
@xlsx.sheets.count.should == 1
|
19
|
+
end
|
20
|
+
|
21
|
+
it "reads the right number of columns, even with blanks" do
|
22
|
+
rows = @xlsx.sheets[0].rows
|
23
|
+
rows.next.count.should == 300
|
24
|
+
rows.next.count.should == 9
|
25
|
+
rows.next.count.should == 1
|
26
|
+
end
|
27
|
+
|
28
|
+
it "reads the right number of rows" do
|
29
|
+
@xlsx.sheets[0].row_count.should == 117
|
30
|
+
end
|
31
|
+
|
32
|
+
it "reads the right number of rows from the metadata when present" do
|
33
|
+
@xlsx.sheets[0].rows.size.should == 117
|
34
|
+
end
|
35
|
+
|
36
|
+
it "reads date/time properly" do
|
37
|
+
count = 0
|
38
|
+
@xlsx.sheets[0].rows.each do |row|
|
39
|
+
count += 1
|
40
|
+
|
41
|
+
if count == 116
|
42
|
+
row[0].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
|
43
|
+
row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
|
44
|
+
row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
|
45
|
+
row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
count.should == 117
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "when it has user defined formats," do
|
53
|
+
before(:each) do
|
54
|
+
@xlsx = Dullard::Workbook.new @file, {'GENERAL' => :string, 'm/d/yyyy' => :date, 'M/D/YYYY' => :date,}
|
55
|
+
end
|
56
|
+
|
57
|
+
it "converts the user defined formatted cells properly" do
|
58
|
+
count = 0
|
59
|
+
@xlsx.sheets[0].rows.each do |row|
|
60
|
+
count += 1
|
61
|
+
|
62
|
+
if count == 117
|
63
|
+
row[0].should == 'teststring'
|
64
|
+
row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
|
65
|
+
row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
|
66
|
+
row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
|
67
|
+
row[4].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
count.should == 117
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/spec/test.xlsx
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dullard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ted Kaplan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -16,42 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2.
|
19
|
+
version: '2.14'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: nokogiri
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ~>
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
47
|
+
version: '1.6'
|
34
48
|
type: :runtime
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - ~>
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
54
|
+
version: '1.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rubyzip
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - ~>
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
61
|
+
version: '1.1'
|
48
62
|
type: :runtime
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - ~>
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
68
|
+
version: '1.1'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- ted@shlashdot.org
|
@@ -68,10 +82,11 @@ files:
|
|
68
82
|
- lib/dullard.rb
|
69
83
|
- lib/dullard/reader.rb
|
70
84
|
- lib/dullard/version.rb
|
71
|
-
-
|
72
|
-
-
|
85
|
+
- spec/dullard_spec.rb
|
86
|
+
- spec/test.xlsx
|
73
87
|
homepage: http://github.com/thirtyseven/dullard
|
74
|
-
licenses:
|
88
|
+
licenses:
|
89
|
+
- MIT
|
75
90
|
metadata: {}
|
76
91
|
post_install_message:
|
77
92
|
rdoc_options: []
|
@@ -93,4 +108,6 @@ rubygems_version: 2.0.5
|
|
93
108
|
signing_key:
|
94
109
|
specification_version: 4
|
95
110
|
summary: A fast XLSX parser using Nokogiri
|
96
|
-
test_files:
|
111
|
+
test_files:
|
112
|
+
- spec/dullard_spec.rb
|
113
|
+
- spec/test.xlsx
|
data/specs/dullard_spec.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'dullard'
|
2
|
-
|
3
|
-
describe "dullard" do
|
4
|
-
before(:each) do
|
5
|
-
@file = File.open(File.expand_path("../test.xlsx", __FILE__))
|
6
|
-
@xlsx = Dullard::Workbook.new @file
|
7
|
-
end
|
8
|
-
it "can open a file" do
|
9
|
-
@xlsx.should_not be_nil
|
10
|
-
end
|
11
|
-
|
12
|
-
it "can find sheets" do
|
13
|
-
@xlsx.sheets.count.should == 1
|
14
|
-
end
|
15
|
-
|
16
|
-
it "reads the right number of columns, even with blanks" do
|
17
|
-
rows = @xlsx.sheets[0].rows
|
18
|
-
rows.next.count.should == 300
|
19
|
-
rows.next.count.should == 9
|
20
|
-
rows.next.count.should == 1
|
21
|
-
end
|
22
|
-
|
23
|
-
it "reads the right number of rows" do
|
24
|
-
count = 0
|
25
|
-
@xlsx.sheets[0].rows.each do |row|
|
26
|
-
count += 1
|
27
|
-
end
|
28
|
-
count.should == 115
|
29
|
-
end
|
30
|
-
|
31
|
-
it "reads the right number of rows from the metadata when present" do
|
32
|
-
@xlsx.sheets[0].rows.size.should == 115
|
33
|
-
end
|
34
|
-
end
|
data/specs/test.xlsx
DELETED
Binary file
|