dullard 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 76b6de08a6a72d691a3f3deb06f8a82018c90049
4
- data.tar.gz: d56a5c10a24db975ce01c0e3e352739e128508f1
3
+ metadata.gz: 9d02ed091d258d5d690ba9e3ba350c392488664e
4
+ data.tar.gz: 9f61befa4e3003f7e469a26ff7c4412ffa4d6e55
5
5
  SHA512:
6
- metadata.gz: 943f8b15089aab39fb627fc84a45523e093b60e6980666cb139be7102d99bb2adf3f71a2ed32d5d693784d4612199390de7a62e7dd06de4d80495a7de832ccbc
7
- data.tar.gz: 3402538efd1a687823a4acc8e4a9fe811d381de1e20d40d354746cacb2e2ff557b6eb6259c5446f05b89375c61cb81efe0516fdc7cf26ec22800ed92325c45eb
6
+ metadata.gz: 87d7d94d530d337ba92623146e5d0a115e44b04d15b1ee91fcf4c49bc76e90ace74bbf29f6e707ad3853d3e314e000cb4a3ec96a9b256576755adaeacce8022c
7
+ data.tar.gz: 2d10be78032321162cc1e2416771946c02cecea129f2959f12ec9a3bc49e2e9d68563ac18b9896159fef1a097d1d90fa5bf6ac7698985a2ec1fc3afcf8248297
data/Rakefile CHANGED
@@ -1,2 +1,7 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :default => :spec
@@ -6,6 +6,7 @@ Gem::Specification.new do |gem|
6
6
  gem.email = ["ted@shlashdot.org"]
7
7
  gem.summary = %q{A fast XLSX parser using Nokogiri}
8
8
  gem.homepage = "http://github.com/thirtyseven/dullard"
9
+ gem.license = "MIT"
9
10
 
10
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
11
12
  gem.files = `git ls-files`.split("\n")
@@ -14,7 +15,8 @@ Gem::Specification.new do |gem|
14
15
  gem.require_paths = ["lib"]
15
16
  gem.version = Dullard::VERSION
16
17
 
17
- gem.add_development_dependency "rspec", "~> 2.6"
18
- gem.add_dependency "nokogiri", "~> 1.5"
19
- gem.add_dependency "rubyzip", "~> 0.9.6"
18
+ gem.add_development_dependency "rspec", "~> 2.14"
19
+ gem.add_development_dependency "rake", "~> 10.1"
20
+ gem.add_dependency "nokogiri", "~> 1.6"
21
+ gem.add_dependency "rubyzip", "~> 1.1"
20
22
  end
@@ -1,12 +1,86 @@
1
- require 'zip/zipfilesystem'
1
+ require 'zip/filesystem'
2
2
  require 'nokogiri'
3
3
 
4
4
  module Dullard; end
5
5
 
6
6
  class Dullard::Workbook
7
- def initialize(file)
7
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
8
+ # Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
9
+ FORMATS = {
10
+ 'general' => :float,
11
+ '0' => :float,
12
+ '0.00' => :float,
13
+ '#,##0' => :float,
14
+ '#,##0.00' => :float,
15
+ '0%' => :percentage,
16
+ '0.00%' => :percentage,
17
+ '0.00E+00' => :float,
18
+ '# ?/?' => :float, #??? TODO:
19
+ '# ??/??' => :float, #??? TODO:
20
+ 'mm-dd-yy' => :date,
21
+ 'd-mmm-yy' => :date,
22
+ 'd-mmm' => :date,
23
+ 'mmm-yy' => :date,
24
+ 'h:mm am/pm' => :date,
25
+ 'h:mm:ss am/pm' => :date,
26
+ 'h:mm' => :time,
27
+ 'h:mm:ss' => :time,
28
+ 'm/d/yy h:mm' => :date,
29
+ '#,##0 ;(#,##0)' => :float,
30
+ '#,##0 ;[red](#,##0)' => :float,
31
+ '#,##0.00;(#,##0.00)' => :float,
32
+ '#,##0.00;[red](#,##0.00)' => :float,
33
+ 'mm:ss' => :time,
34
+ '[h]:mm:ss' => :time,
35
+ 'mmss.0' => :time,
36
+ '##0.0e+0' => :float,
37
+ '@' => :float,
38
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
39
+ "yyyy\\-mm\\-dd" => :date,
40
+ 'dd/mm/yy' => :date,
41
+ 'hh:mm:ss' => :time,
42
+ "dd/mm/yy\\ hh:mm" => :datetime,
43
+ 'm/d/yy' => :date,
44
+ 'mm/dd/yy' => :date,
45
+ 'mm/dd/yyyy' => :date,
46
+ }
47
+
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+
79
+ def initialize(file, user_defined_formats = {})
8
80
  @file = file
9
- @zipfs = Zip::ZipFile.open(@file)
81
+ @zipfs = Zip::File.open(@file)
82
+ @user_defined_formats = user_defined_formats
83
+ read_styles
10
84
  end
11
85
 
12
86
  def sheets
@@ -33,6 +107,51 @@ class Dullard::Workbook
33
107
  @string_table
34
108
  end
35
109
 
110
+ def read_styles
111
+ doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
112
+
113
+ @num_formats = {}
114
+ @cell_xfs = []
115
+
116
+ doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
117
+ numFmtId = numFmt.attributes['numFmtId'].value.to_i
118
+ formatCode = numFmt.attributes['formatCode'].value
119
+ @num_formats[numFmtId] = formatCode
120
+ end
121
+
122
+ doc.css('/styleSheet/cellXfs/xf').each do |xf|
123
+ numFmtId = xf.attributes['numFmtId'].value.to_i
124
+ @cell_xfs << numFmtId
125
+ end
126
+ end
127
+
128
+
129
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
130
+ # convert internal excelx attribute to a format
131
+ def attribute2format(s)
132
+ id = @cell_xfs[s.to_i].to_i
133
+ result = @num_formats[id]
134
+
135
+ if result == nil
136
+ if STANDARD_FORMATS.has_key? id
137
+ result = STANDARD_FORMATS[id]
138
+ end
139
+ end
140
+
141
+ result.downcase
142
+ end
143
+
144
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
145
+ def format2type(format)
146
+ if FORMATS.has_key? format
147
+ FORMATS[format]
148
+ elsif @user_defined_formats.has_key? format
149
+ @user_defined_formats[format]
150
+ else
151
+ :float
152
+ end
153
+ end
154
+
36
155
  def zipfs
37
156
  @zipfs
38
157
  end
@@ -57,12 +176,13 @@ class Dullard::Sheet
57
176
  end
58
177
 
59
178
  def rows
60
- Enumerator.new(rows_size) do |y|
179
+ Enumerator.new(row_count) do |y|
61
180
  next unless @file
62
181
  @file.rewind
63
182
  shared = false
64
183
  row = nil
65
184
  column = nil
185
+ cell_type = nil
66
186
  Nokogiri::XML::Reader(@file).each do |node|
67
187
  case node.node_type
68
188
  when Nokogiri::XML::Reader::TYPE_ELEMENT
@@ -72,6 +192,11 @@ class Dullard::Sheet
72
192
  column = 0
73
193
  next
74
194
  when "c"
195
+ if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
196
+ cell_format_index = node.attributes['s'].to_i
197
+ cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
198
+ end
199
+
75
200
  rcolumn = node.attributes["r"]
76
201
  if rcolumn
77
202
  rcolumn.delete!("0-9")
@@ -91,7 +216,21 @@ class Dullard::Sheet
91
216
  end
92
217
  end
93
218
  value = node.value
219
+
94
220
  if value
221
+ case cell_type
222
+ when :datetime
223
+ when :time
224
+ when :date
225
+ value = (DateTime.new(1899,12,30) + value.to_f)
226
+ when :percentage # ? TODO
227
+ when :float
228
+ value = value.to_f
229
+ else
230
+ # leave as string
231
+ end
232
+ cell_type = nil
233
+
95
234
  row << (shared ? string_lookup(value.to_i) : value)
96
235
  end
97
236
  end
@@ -113,14 +252,9 @@ class Dullard::Sheet
113
252
  end
114
253
  end
115
254
 
116
- private
117
- def path
118
- "xl/worksheets/sheet#{@index}.xml"
119
- end
120
-
121
- def rows_size
122
- if defined? @rows_size
123
- @rows_size
255
+ def row_count
256
+ if defined? @row_count
257
+ @row_count
124
258
  elsif @file
125
259
  @file.rewind
126
260
  Nokogiri::XML::Reader(@file).each do |node|
@@ -128,13 +262,19 @@ class Dullard::Sheet
128
262
  case node.name
129
263
  when "dimension"
130
264
  if ref = node.attributes["ref"]
131
- break @rows_size = ref.scan(/\d+$/).first.to_i
265
+ break @row_count = ref.scan(/\d+$/).first.to_i
132
266
  end
133
267
  when "sheetData"
134
- break @rows_size = nil
268
+ break @row_count = nil
135
269
  end
136
270
  end
137
271
  end
138
272
  end
139
273
  end
274
+
275
+ private
276
+ def path
277
+ "xl/worksheets/sheet#{@index}.xml"
278
+ end
279
+
140
280
  end
@@ -1,3 +1,3 @@
1
1
  module Dullard
2
- VERSION = "0.0.5"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,73 @@
1
+ require 'dullard'
2
+
3
+ describe "dullard," do
4
+ before(:each) do
5
+ @file = File.open(File.expand_path("../test.xlsx", __FILE__))
6
+ end
7
+
8
+ describe "when it has no user defined formats," do
9
+ before(:each) do
10
+ @xlsx = Dullard::Workbook.new @file
11
+ end
12
+
13
+ it "can open a file" do
14
+ @xlsx.should_not be_nil
15
+ end
16
+
17
+ it "can find sheets" do
18
+ @xlsx.sheets.count.should == 1
19
+ end
20
+
21
+ it "reads the right number of columns, even with blanks" do
22
+ rows = @xlsx.sheets[0].rows
23
+ rows.next.count.should == 300
24
+ rows.next.count.should == 9
25
+ rows.next.count.should == 1
26
+ end
27
+
28
+ it "reads the right number of rows" do
29
+ @xlsx.sheets[0].row_count.should == 117
30
+ end
31
+
32
+ it "reads the right number of rows from the metadata when present" do
33
+ @xlsx.sheets[0].rows.size.should == 117
34
+ end
35
+
36
+ it "reads date/time properly" do
37
+ count = 0
38
+ @xlsx.sheets[0].rows.each do |row|
39
+ count += 1
40
+
41
+ if count == 116
42
+ row[0].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
43
+ row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
44
+ row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
45
+ row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
46
+ end
47
+ end
48
+ count.should == 117
49
+ end
50
+ end
51
+
52
+ describe "when it has user defined formats," do
53
+ before(:each) do
54
+ @xlsx = Dullard::Workbook.new @file, {'GENERAL' => :string, 'm/d/yyyy' => :date, 'M/D/YYYY' => :date,}
55
+ end
56
+
57
+ it "converts the user defined formatted cells properly" do
58
+ count = 0
59
+ @xlsx.sheets[0].rows.each do |row|
60
+ count += 1
61
+
62
+ if count == 117
63
+ row[0].should == 'teststring'
64
+ row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
65
+ row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
66
+ row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
67
+ row[4].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
68
+ end
69
+ end
70
+ count.should == 117
71
+ end
72
+ end
73
+ end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dullard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ted Kaplan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-21 00:00:00.000000000 Z
11
+ date: 2014-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -16,42 +16,56 @@ dependencies:
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '2.6'
19
+ version: '2.14'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '2.6'
26
+ version: '2.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.1'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: nokogiri
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ~>
32
46
  - !ruby/object:Gem::Version
33
- version: '1.5'
47
+ version: '1.6'
34
48
  type: :runtime
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - ~>
39
53
  - !ruby/object:Gem::Version
40
- version: '1.5'
54
+ version: '1.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rubyzip
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - ~>
46
60
  - !ruby/object:Gem::Version
47
- version: 0.9.6
61
+ version: '1.1'
48
62
  type: :runtime
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - ~>
53
67
  - !ruby/object:Gem::Version
54
- version: 0.9.6
68
+ version: '1.1'
55
69
  description:
56
70
  email:
57
71
  - ted@shlashdot.org
@@ -68,10 +82,11 @@ files:
68
82
  - lib/dullard.rb
69
83
  - lib/dullard/reader.rb
70
84
  - lib/dullard/version.rb
71
- - specs/dullard_spec.rb
72
- - specs/test.xlsx
85
+ - spec/dullard_spec.rb
86
+ - spec/test.xlsx
73
87
  homepage: http://github.com/thirtyseven/dullard
74
- licenses: []
88
+ licenses:
89
+ - MIT
75
90
  metadata: {}
76
91
  post_install_message:
77
92
  rdoc_options: []
@@ -93,4 +108,6 @@ rubygems_version: 2.0.5
93
108
  signing_key:
94
109
  specification_version: 4
95
110
  summary: A fast XLSX parser using Nokogiri
96
- test_files: []
111
+ test_files:
112
+ - spec/dullard_spec.rb
113
+ - spec/test.xlsx
@@ -1,34 +0,0 @@
1
- require 'dullard'
2
-
3
- describe "dullard" do
4
- before(:each) do
5
- @file = File.open(File.expand_path("../test.xlsx", __FILE__))
6
- @xlsx = Dullard::Workbook.new @file
7
- end
8
- it "can open a file" do
9
- @xlsx.should_not be_nil
10
- end
11
-
12
- it "can find sheets" do
13
- @xlsx.sheets.count.should == 1
14
- end
15
-
16
- it "reads the right number of columns, even with blanks" do
17
- rows = @xlsx.sheets[0].rows
18
- rows.next.count.should == 300
19
- rows.next.count.should == 9
20
- rows.next.count.should == 1
21
- end
22
-
23
- it "reads the right number of rows" do
24
- count = 0
25
- @xlsx.sheets[0].rows.each do |row|
26
- count += 1
27
- end
28
- count.should == 115
29
- end
30
-
31
- it "reads the right number of rows from the metadata when present" do
32
- @xlsx.sheets[0].rows.size.should == 115
33
- end
34
- end
Binary file