dullard 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 76b6de08a6a72d691a3f3deb06f8a82018c90049
4
- data.tar.gz: d56a5c10a24db975ce01c0e3e352739e128508f1
3
+ metadata.gz: 9d02ed091d258d5d690ba9e3ba350c392488664e
4
+ data.tar.gz: 9f61befa4e3003f7e469a26ff7c4412ffa4d6e55
5
5
  SHA512:
6
- metadata.gz: 943f8b15089aab39fb627fc84a45523e093b60e6980666cb139be7102d99bb2adf3f71a2ed32d5d693784d4612199390de7a62e7dd06de4d80495a7de832ccbc
7
- data.tar.gz: 3402538efd1a687823a4acc8e4a9fe811d381de1e20d40d354746cacb2e2ff557b6eb6259c5446f05b89375c61cb81efe0516fdc7cf26ec22800ed92325c45eb
6
+ metadata.gz: 87d7d94d530d337ba92623146e5d0a115e44b04d15b1ee91fcf4c49bc76e90ace74bbf29f6e707ad3853d3e314e000cb4a3ec96a9b256576755adaeacce8022c
7
+ data.tar.gz: 2d10be78032321162cc1e2416771946c02cecea129f2959f12ec9a3bc49e2e9d68563ac18b9896159fef1a097d1d90fa5bf6ac7698985a2ec1fc3afcf8248297
data/Rakefile CHANGED
@@ -1,2 +1,7 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :default => :spec
@@ -6,6 +6,7 @@ Gem::Specification.new do |gem|
6
6
  gem.email = ["ted@shlashdot.org"]
7
7
  gem.summary = %q{A fast XLSX parser using Nokogiri}
8
8
  gem.homepage = "http://github.com/thirtyseven/dullard"
9
+ gem.license = "MIT"
9
10
 
10
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
11
12
  gem.files = `git ls-files`.split("\n")
@@ -14,7 +15,8 @@ Gem::Specification.new do |gem|
14
15
  gem.require_paths = ["lib"]
15
16
  gem.version = Dullard::VERSION
16
17
 
17
- gem.add_development_dependency "rspec", "~> 2.6"
18
- gem.add_dependency "nokogiri", "~> 1.5"
19
- gem.add_dependency "rubyzip", "~> 0.9.6"
18
+ gem.add_development_dependency "rspec", "~> 2.14"
19
+ gem.add_development_dependency "rake", "~> 10.1"
20
+ gem.add_dependency "nokogiri", "~> 1.6"
21
+ gem.add_dependency "rubyzip", "~> 1.1"
20
22
  end
@@ -1,12 +1,86 @@
1
- require 'zip/zipfilesystem'
1
+ require 'zip/filesystem'
2
2
  require 'nokogiri'
3
3
 
4
4
  module Dullard; end
5
5
 
6
6
  class Dullard::Workbook
7
- def initialize(file)
7
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
8
+ # Some additional formats added by Paul Hendryx (phendryx@gmail.com) that are common in LibreOffice.
9
+ FORMATS = {
10
+ 'general' => :float,
11
+ '0' => :float,
12
+ '0.00' => :float,
13
+ '#,##0' => :float,
14
+ '#,##0.00' => :float,
15
+ '0%' => :percentage,
16
+ '0.00%' => :percentage,
17
+ '0.00E+00' => :float,
18
+ '# ?/?' => :float, #??? TODO:
19
+ '# ??/??' => :float, #??? TODO:
20
+ 'mm-dd-yy' => :date,
21
+ 'd-mmm-yy' => :date,
22
+ 'd-mmm' => :date,
23
+ 'mmm-yy' => :date,
24
+ 'h:mm am/pm' => :date,
25
+ 'h:mm:ss am/pm' => :date,
26
+ 'h:mm' => :time,
27
+ 'h:mm:ss' => :time,
28
+ 'm/d/yy h:mm' => :date,
29
+ '#,##0 ;(#,##0)' => :float,
30
+ '#,##0 ;[red](#,##0)' => :float,
31
+ '#,##0.00;(#,##0.00)' => :float,
32
+ '#,##0.00;[red](#,##0.00)' => :float,
33
+ 'mm:ss' => :time,
34
+ '[h]:mm:ss' => :time,
35
+ 'mmss.0' => :time,
36
+ '##0.0e+0' => :float,
37
+ '@' => :float,
38
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
39
+ "yyyy\\-mm\\-dd" => :date,
40
+ 'dd/mm/yy' => :date,
41
+ 'hh:mm:ss' => :time,
42
+ "dd/mm/yy\\ hh:mm" => :datetime,
43
+ 'm/d/yy' => :date,
44
+ 'mm/dd/yy' => :date,
45
+ 'mm/dd/yyyy' => :date,
46
+ }
47
+
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+
79
+ def initialize(file, user_defined_formats = {})
8
80
  @file = file
9
- @zipfs = Zip::ZipFile.open(@file)
81
+ @zipfs = Zip::File.open(@file)
82
+ @user_defined_formats = user_defined_formats
83
+ read_styles
10
84
  end
11
85
 
12
86
  def sheets
@@ -33,6 +107,51 @@ class Dullard::Workbook
33
107
  @string_table
34
108
  end
35
109
 
110
+ def read_styles
111
+ doc = Nokogiri::XML(@zipfs.file.open("xl/styles.xml"))
112
+
113
+ @num_formats = {}
114
+ @cell_xfs = []
115
+
116
+ doc.css('/styleSheet/numFmts/numFmt').each do |numFmt|
117
+ numFmtId = numFmt.attributes['numFmtId'].value.to_i
118
+ formatCode = numFmt.attributes['formatCode'].value
119
+ @num_formats[numFmtId] = formatCode
120
+ end
121
+
122
+ doc.css('/styleSheet/cellXfs/xf').each do |xf|
123
+ numFmtId = xf.attributes['numFmtId'].value.to_i
124
+ @cell_xfs << numFmtId
125
+ end
126
+ end
127
+
128
+
129
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
130
+ # convert internal excelx attribute to a format
131
+ def attribute2format(s)
132
+ id = @cell_xfs[s.to_i].to_i
133
+ result = @num_formats[id]
134
+
135
+ if result == nil
136
+ if STANDARD_FORMATS.has_key? id
137
+ result = STANDARD_FORMATS[id]
138
+ end
139
+ end
140
+
141
+ result.downcase
142
+ end
143
+
144
+ # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb)
145
+ def format2type(format)
146
+ if FORMATS.has_key? format
147
+ FORMATS[format]
148
+ elsif @user_defined_formats.has_key? format
149
+ @user_defined_formats[format]
150
+ else
151
+ :float
152
+ end
153
+ end
154
+
36
155
  def zipfs
37
156
  @zipfs
38
157
  end
@@ -57,12 +176,13 @@ class Dullard::Sheet
57
176
  end
58
177
 
59
178
  def rows
60
- Enumerator.new(rows_size) do |y|
179
+ Enumerator.new(row_count) do |y|
61
180
  next unless @file
62
181
  @file.rewind
63
182
  shared = false
64
183
  row = nil
65
184
  column = nil
185
+ cell_type = nil
66
186
  Nokogiri::XML::Reader(@file).each do |node|
67
187
  case node.node_type
68
188
  when Nokogiri::XML::Reader::TYPE_ELEMENT
@@ -72,6 +192,11 @@ class Dullard::Sheet
72
192
  column = 0
73
193
  next
74
194
  when "c"
195
+ if node.attributes['t'] != 's' && node.attributes['t'] != 'b'
196
+ cell_format_index = node.attributes['s'].to_i
197
+ cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index))
198
+ end
199
+
75
200
  rcolumn = node.attributes["r"]
76
201
  if rcolumn
77
202
  rcolumn.delete!("0-9")
@@ -91,7 +216,21 @@ class Dullard::Sheet
91
216
  end
92
217
  end
93
218
  value = node.value
219
+
94
220
  if value
221
+ case cell_type
222
+ when :datetime
223
+ when :time
224
+ when :date
225
+ value = (DateTime.new(1899,12,30) + value.to_f)
226
+ when :percentage # ? TODO
227
+ when :float
228
+ value = value.to_f
229
+ else
230
+ # leave as string
231
+ end
232
+ cell_type = nil
233
+
95
234
  row << (shared ? string_lookup(value.to_i) : value)
96
235
  end
97
236
  end
@@ -113,14 +252,9 @@ class Dullard::Sheet
113
252
  end
114
253
  end
115
254
 
116
- private
117
- def path
118
- "xl/worksheets/sheet#{@index}.xml"
119
- end
120
-
121
- def rows_size
122
- if defined? @rows_size
123
- @rows_size
255
+ def row_count
256
+ if defined? @row_count
257
+ @row_count
124
258
  elsif @file
125
259
  @file.rewind
126
260
  Nokogiri::XML::Reader(@file).each do |node|
@@ -128,13 +262,19 @@ class Dullard::Sheet
128
262
  case node.name
129
263
  when "dimension"
130
264
  if ref = node.attributes["ref"]
131
- break @rows_size = ref.scan(/\d+$/).first.to_i
265
+ break @row_count = ref.scan(/\d+$/).first.to_i
132
266
  end
133
267
  when "sheetData"
134
- break @rows_size = nil
268
+ break @row_count = nil
135
269
  end
136
270
  end
137
271
  end
138
272
  end
139
273
  end
274
+
275
+ private
276
+ def path
277
+ "xl/worksheets/sheet#{@index}.xml"
278
+ end
279
+
140
280
  end
@@ -1,3 +1,3 @@
1
1
  module Dullard
2
- VERSION = "0.0.5"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,73 @@
1
+ require 'dullard'
2
+
3
+ describe "dullard," do
4
+ before(:each) do
5
+ @file = File.open(File.expand_path("../test.xlsx", __FILE__))
6
+ end
7
+
8
+ describe "when it has no user defined formats," do
9
+ before(:each) do
10
+ @xlsx = Dullard::Workbook.new @file
11
+ end
12
+
13
+ it "can open a file" do
14
+ @xlsx.should_not be_nil
15
+ end
16
+
17
+ it "can find sheets" do
18
+ @xlsx.sheets.count.should == 1
19
+ end
20
+
21
+ it "reads the right number of columns, even with blanks" do
22
+ rows = @xlsx.sheets[0].rows
23
+ rows.next.count.should == 300
24
+ rows.next.count.should == 9
25
+ rows.next.count.should == 1
26
+ end
27
+
28
+ it "reads the right number of rows" do
29
+ @xlsx.sheets[0].row_count.should == 117
30
+ end
31
+
32
+ it "reads the right number of rows from the metadata when present" do
33
+ @xlsx.sheets[0].rows.size.should == 117
34
+ end
35
+
36
+ it "reads date/time properly" do
37
+ count = 0
38
+ @xlsx.sheets[0].rows.each do |row|
39
+ count += 1
40
+
41
+ if count == 116
42
+ row[0].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
43
+ row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
44
+ row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
45
+ row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
46
+ end
47
+ end
48
+ count.should == 117
49
+ end
50
+ end
51
+
52
+ describe "when it has user defined formats," do
53
+ before(:each) do
54
+ @xlsx = Dullard::Workbook.new @file, {'GENERAL' => :string, 'm/d/yyyy' => :date, 'M/D/YYYY' => :date,}
55
+ end
56
+
57
+ it "converts the user defined formatted cells properly" do
58
+ count = 0
59
+ @xlsx.sheets[0].rows.each do |row|
60
+ count += 1
61
+
62
+ if count == 117
63
+ row[0].should == 'teststring'
64
+ row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00"
65
+ row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58"
66
+ row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48"
67
+ row[4].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52"
68
+ end
69
+ end
70
+ count.should == 117
71
+ end
72
+ end
73
+ end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dullard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ted Kaplan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-21 00:00:00.000000000 Z
11
+ date: 2014-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -16,42 +16,56 @@ dependencies:
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: '2.6'
19
+ version: '2.14'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: '2.6'
26
+ version: '2.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.1'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: nokogiri
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ~>
32
46
  - !ruby/object:Gem::Version
33
- version: '1.5'
47
+ version: '1.6'
34
48
  type: :runtime
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - ~>
39
53
  - !ruby/object:Gem::Version
40
- version: '1.5'
54
+ version: '1.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rubyzip
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - ~>
46
60
  - !ruby/object:Gem::Version
47
- version: 0.9.6
61
+ version: '1.1'
48
62
  type: :runtime
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - ~>
53
67
  - !ruby/object:Gem::Version
54
- version: 0.9.6
68
+ version: '1.1'
55
69
  description:
56
70
  email:
57
71
  - ted@shlashdot.org
@@ -68,10 +82,11 @@ files:
68
82
  - lib/dullard.rb
69
83
  - lib/dullard/reader.rb
70
84
  - lib/dullard/version.rb
71
- - specs/dullard_spec.rb
72
- - specs/test.xlsx
85
+ - spec/dullard_spec.rb
86
+ - spec/test.xlsx
73
87
  homepage: http://github.com/thirtyseven/dullard
74
- licenses: []
88
+ licenses:
89
+ - MIT
75
90
  metadata: {}
76
91
  post_install_message:
77
92
  rdoc_options: []
@@ -93,4 +108,6 @@ rubygems_version: 2.0.5
93
108
  signing_key:
94
109
  specification_version: 4
95
110
  summary: A fast XLSX parser using Nokogiri
96
- test_files: []
111
+ test_files:
112
+ - spec/dullard_spec.rb
113
+ - spec/test.xlsx
@@ -1,34 +0,0 @@
1
- require 'dullard'
2
-
3
- describe "dullard" do
4
- before(:each) do
5
- @file = File.open(File.expand_path("../test.xlsx", __FILE__))
6
- @xlsx = Dullard::Workbook.new @file
7
- end
8
- it "can open a file" do
9
- @xlsx.should_not be_nil
10
- end
11
-
12
- it "can find sheets" do
13
- @xlsx.sheets.count.should == 1
14
- end
15
-
16
- it "reads the right number of columns, even with blanks" do
17
- rows = @xlsx.sheets[0].rows
18
- rows.next.count.should == 300
19
- rows.next.count.should == 9
20
- rows.next.count.should == 1
21
- end
22
-
23
- it "reads the right number of rows" do
24
- count = 0
25
- @xlsx.sheets[0].rows.each do |row|
26
- count += 1
27
- end
28
- count.should == 115
29
- end
30
-
31
- it "reads the right number of rows from the metadata when present" do
32
- @xlsx.sheets[0].rows.size.should == 115
33
- end
34
- end
Binary file