dullard 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dullard.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Edward Kaplan
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ Super simple, super fast XLSX parsing.
2
+
3
+ require 'dullard'
4
+
5
+ workbook = Dullard::Workbook.new "file"
6
+ workbook.sheet[0].rows.each do |row|
7
+ puts row # => ["a","b","c",...]
8
+ end
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/dullard.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/dullard/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ted Kaplan"]
6
+ gem.email = ["ted@shlashdot.org"]
7
+ gem.summary = %q{A fast XLSX parser using Nokogiri}
8
+ gem.homepage = "http://github.com/thirtyseven/dullard"
9
+
10
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
11
+ gem.files = `git ls-files`.split("\n")
12
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
13
+ gem.name = "dullard"
14
+ gem.require_paths = ["lib"]
15
+ gem.version = Dullard::VERSION
16
+
17
+ gem.add_development_dependency "rspec", "~> 2.6"
18
+ gem.add_dependency "nokogiri", "~> 1.5"
19
+ gem.add_dependency "rubyzip", "~> 0.9.6"
20
+ end
@@ -0,0 +1,83 @@
1
+ require 'zip/zipfilesystem'
2
+ require 'nokogiri'
3
+
4
+ module Dullard; end
5
+
6
+ class Dullard::Workbook
7
+ def initialize(file)
8
+ @file = file
9
+ @zipfs = Zip::ZipFile.open(@file)
10
+ end
11
+
12
+ def sheets
13
+ workbook = Nokogiri::XML::Document.parse(@zipfs.file.open("xl/workbook.xml"))
14
+ @sheets = workbook.css("sheet").map {|n| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId")) }
15
+ end
16
+
17
+ def string_table
18
+ @string_tabe ||= read_string_table
19
+ end
20
+
21
+ def read_string_table
22
+ @string_table = []
23
+ state = :top
24
+ Nokogiri::XML::Reader(@zipfs.file.open("xl/sharedStrings.xml")).each do |node|
25
+ case state
26
+ when :top
27
+ if node.name == "t"
28
+ state = :entry
29
+ end
30
+ when :entry
31
+ @string_table << node.value
32
+ state = :top
33
+ end
34
+ end
35
+ @string_table
36
+ end
37
+
38
+ def zipfs
39
+ @zipfs
40
+ end
41
+ end
42
+
43
+ class Dullard::Sheet
44
+ attr_reader :name, :workbook
45
+ def initialize(workbook, name, id)
46
+ @workbook = workbook
47
+ @name = name
48
+ @id = id
49
+ end
50
+
51
+ def string_lookup(i)
52
+ @workbook.string_table[i]
53
+ end
54
+
55
+ def rows
56
+ Enumerator.new do |y|
57
+ state = :top
58
+ shared = false
59
+ row = []
60
+ Nokogiri::XML::Reader(@workbook.zipfs.file.open("xl/worksheets/sheet#{@id}.xml")).each do |node|
61
+ case state
62
+ when :top
63
+ if node.name == "row"
64
+ state = :row
65
+ end
66
+ when :row
67
+ if node.name == "row"
68
+ y << row
69
+ row = []
70
+ else
71
+ state = :cell
72
+ shared = (node.attribute("t") == "s")
73
+ end
74
+ when :cell
75
+ row << (shared ? string_lookup(node.value.to_i) : node.value)
76
+ state = :row
77
+ end
78
+ end
79
+ y << row
80
+ end
81
+ end
82
+ end
83
+
@@ -0,0 +1,3 @@
1
+ module Dullard
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ require './dullard'
2
+
3
+ describe "dullard" do
4
+ before(:each) do
5
+ @file = File.open("SHAPE5_CorePrePost.xlsx")
6
+ @xlsx = Dullard::Workbook.new @file
7
+ end
8
+ it "can open a file" do
9
+ @xlsx.should_not be_nil
10
+ end
11
+
12
+ it "can find sheets" do
13
+ @xlsx.sheets.count.should == 1
14
+ end
15
+
16
+ it "can read rows" do
17
+ @xlsx.sheets[0].rows.first.count.should >= 300
18
+ end
19
+
20
+ it "reads the right number of rows" do
21
+ count = 0
22
+ @xlsx.sheets[0].rows.each do |row|
23
+ count += 1
24
+ end
25
+ count.should == 115
26
+ end
27
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dullard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ted Kaplan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70212465314540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.6'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70212465314540
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &70212465295320 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '1.5'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70212465295320
36
+ - !ruby/object:Gem::Dependency
37
+ name: rubyzip
38
+ requirement: &70212465295080 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 0.9.6
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70212465295080
47
+ description:
48
+ email:
49
+ - ted@shlashdot.org
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - LICENSE
57
+ - README.md
58
+ - Rakefile
59
+ - dullard.gemspec
60
+ - lib/dullard/dullard.rb
61
+ - lib/dullard/version.rb
62
+ - specs/dullard_spec.rb
63
+ homepage: http://github.com/thirtyseven/dullard
64
+ licenses: []
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 1.8.17
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: A fast XLSX parser using Nokogiri
87
+ test_files: []