dullard 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +2 -2
- data/README.md +6 -3
- data/lib/dullard/reader.rb +77 -12
- data/lib/dullard/version.rb +1 -1
- data/specs/dullard_spec.rb +11 -5
- data/specs/test.xlsx +0 -0
- metadata +26 -18
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 76b6de08a6a72d691a3f3deb06f8a82018c90049
|
4
|
+
data.tar.gz: d56a5c10a24db975ce01c0e3e352739e128508f1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 943f8b15089aab39fb627fc84a45523e093b60e6980666cb139be7102d99bb2adf3f71a2ed32d5d693784d4612199390de7a62e7dd06de4d80495a7de832ccbc
|
7
|
+
data.tar.gz: 3402538efd1a687823a4acc8e4a9fe811d381de1e20d40d354746cacb2e2ff557b6eb6259c5446f05b89375c61cb81efe0516fdc7cf26ec22800ed92325c45eb
|
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c)
|
1
|
+
Copyright (c) 2013 Edward Kaplan
|
2
2
|
|
3
3
|
MIT License
|
4
4
|
|
@@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
19
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
20
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
21
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
Super simple, super fast XLSX parsing.
|
1
|
+
Super simple, super fast stream-based XLSX parsing. Suitable for very large
|
2
|
+
files.
|
3
|
+
|
4
|
+
Requires Ruby 2.0.
|
2
5
|
|
3
6
|
require 'dullard'
|
4
7
|
|
5
|
-
workbook = Dullard::Workbook.new "file"
|
6
|
-
workbook.
|
8
|
+
workbook = Dullard::Workbook.new "file.xlsx"
|
9
|
+
workbook.sheets[0].rows.each do |row|
|
7
10
|
puts row # => ["a","b","c",...]
|
8
11
|
end
|
data/lib/dullard/reader.rb
CHANGED
@@ -11,7 +11,7 @@ class Dullard::Workbook
|
|
11
11
|
|
12
12
|
def sheets
|
13
13
|
workbook = Nokogiri::XML::Document.parse(@zipfs.file.open("xl/workbook.xml"))
|
14
|
-
@sheets = workbook.css("sheet").map {|n| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId")) }
|
14
|
+
@sheets = workbook.css("sheet").each_with_index.map {|n,i| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId"), i+1) }
|
15
15
|
end
|
16
16
|
|
17
17
|
def string_table
|
@@ -44,10 +44,12 @@ end
|
|
44
44
|
|
45
45
|
class Dullard::Sheet
|
46
46
|
attr_reader :name, :workbook
|
47
|
-
def initialize(workbook, name, id)
|
47
|
+
def initialize(workbook, name, id, index)
|
48
48
|
@workbook = workbook
|
49
49
|
@name = name
|
50
50
|
@id = id
|
51
|
+
@index = index
|
52
|
+
@file = @workbook.zipfs.file.open(path) if @workbook.zipfs.file.exist?(path)
|
51
53
|
end
|
52
54
|
|
53
55
|
def string_lookup(i)
|
@@ -55,21 +57,84 @@ class Dullard::Sheet
|
|
55
57
|
end
|
56
58
|
|
57
59
|
def rows
|
58
|
-
Enumerator.new do |y|
|
60
|
+
Enumerator.new(rows_size) do |y|
|
61
|
+
next unless @file
|
62
|
+
@file.rewind
|
59
63
|
shared = false
|
60
64
|
row = nil
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
65
|
+
column = nil
|
66
|
+
Nokogiri::XML::Reader(@file).each do |node|
|
67
|
+
case node.node_type
|
68
|
+
when Nokogiri::XML::Reader::TYPE_ELEMENT
|
69
|
+
case node.name
|
70
|
+
when "row"
|
71
|
+
row = []
|
72
|
+
column = 0
|
73
|
+
next
|
74
|
+
when "c"
|
75
|
+
rcolumn = node.attributes["r"]
|
76
|
+
if rcolumn
|
77
|
+
rcolumn.delete!("0-9")
|
78
|
+
while column < self.class.column_names.size and rcolumn != self.class.column_names[column]
|
79
|
+
row << nil
|
80
|
+
column += 1
|
81
|
+
end
|
82
|
+
end
|
67
83
|
shared = (node.attribute("t") == "s")
|
68
|
-
|
69
|
-
|
84
|
+
column += 1
|
85
|
+
next
|
86
|
+
end
|
87
|
+
when Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
88
|
+
if node.name == "row"
|
89
|
+
y << row
|
90
|
+
next
|
91
|
+
end
|
92
|
+
end
|
93
|
+
value = node.value
|
94
|
+
if value
|
95
|
+
row << (shared ? string_lookup(value.to_i) : value)
|
70
96
|
end
|
71
97
|
end
|
72
98
|
end
|
73
99
|
end
|
74
|
-
end
|
75
100
|
|
101
|
+
# Returns A to ZZZ.
|
102
|
+
def self.column_names
|
103
|
+
if @column_names
|
104
|
+
@column_names
|
105
|
+
else
|
106
|
+
proc = Proc.new do |prev|
|
107
|
+
("#{prev}A".."#{prev}Z").to_a
|
108
|
+
end
|
109
|
+
x = proc.call("")
|
110
|
+
y = x.map(&proc).flatten
|
111
|
+
z = y.map(&proc).flatten
|
112
|
+
@column_names = x + y + z
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
def path
|
118
|
+
"xl/worksheets/sheet#{@index}.xml"
|
119
|
+
end
|
120
|
+
|
121
|
+
def rows_size
|
122
|
+
if defined? @rows_size
|
123
|
+
@rows_size
|
124
|
+
elsif @file
|
125
|
+
@file.rewind
|
126
|
+
Nokogiri::XML::Reader(@file).each do |node|
|
127
|
+
if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
|
128
|
+
case node.name
|
129
|
+
when "dimension"
|
130
|
+
if ref = node.attributes["ref"]
|
131
|
+
break @rows_size = ref.scan(/\d+$/).first.to_i
|
132
|
+
end
|
133
|
+
when "sheetData"
|
134
|
+
break @rows_size = nil
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
data/lib/dullard/version.rb
CHANGED
data/specs/dullard_spec.rb
CHANGED
@@ -2,7 +2,7 @@ require 'dullard'
|
|
2
2
|
|
3
3
|
describe "dullard" do
|
4
4
|
before(:each) do
|
5
|
-
@file = File.open("
|
5
|
+
@file = File.open(File.expand_path("../test.xlsx", __FILE__))
|
6
6
|
@xlsx = Dullard::Workbook.new @file
|
7
7
|
end
|
8
8
|
it "can open a file" do
|
@@ -10,19 +10,25 @@ describe "dullard" do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it "can find sheets" do
|
13
|
-
@xlsx.sheets.count.should ==
|
13
|
+
@xlsx.sheets.count.should == 1
|
14
14
|
end
|
15
15
|
|
16
|
-
it "
|
17
|
-
@xlsx.sheets[0].rows
|
16
|
+
it "reads the right number of columns, even with blanks" do
|
17
|
+
rows = @xlsx.sheets[0].rows
|
18
|
+
rows.next.count.should == 300
|
19
|
+
rows.next.count.should == 9
|
20
|
+
rows.next.count.should == 1
|
18
21
|
end
|
19
22
|
|
20
23
|
it "reads the right number of rows" do
|
21
24
|
count = 0
|
22
25
|
@xlsx.sheets[0].rows.each do |row|
|
23
|
-
puts row
|
24
26
|
count += 1
|
25
27
|
end
|
26
28
|
count.should == 115
|
27
29
|
end
|
30
|
+
|
31
|
+
it "reads the right number of rows from the metadata when present" do
|
32
|
+
@xlsx.sheets[0].rows.size.should == 115
|
33
|
+
end
|
28
34
|
end
|
data/specs/test.xlsx
ADDED
Binary file
|
metadata
CHANGED
@@ -1,49 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dullard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.5
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ted Kaplan
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-07-21 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '2.6'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.6'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: nokogiri
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ~>
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '1.5'
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rubyzip
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
45
|
- - ~>
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: 0.9.6
|
44
48
|
type: :runtime
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.9.6
|
47
55
|
description:
|
48
56
|
email:
|
49
57
|
- ted@shlashdot.org
|
@@ -61,28 +69,28 @@ files:
|
|
61
69
|
- lib/dullard/reader.rb
|
62
70
|
- lib/dullard/version.rb
|
63
71
|
- specs/dullard_spec.rb
|
72
|
+
- specs/test.xlsx
|
64
73
|
homepage: http://github.com/thirtyseven/dullard
|
65
74
|
licenses: []
|
75
|
+
metadata: {}
|
66
76
|
post_install_message:
|
67
77
|
rdoc_options: []
|
68
78
|
require_paths:
|
69
79
|
- lib
|
70
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
81
|
requirements:
|
73
|
-
- -
|
82
|
+
- - '>='
|
74
83
|
- !ruby/object:Gem::Version
|
75
84
|
version: '0'
|
76
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
86
|
requirements:
|
79
|
-
- -
|
87
|
+
- - '>='
|
80
88
|
- !ruby/object:Gem::Version
|
81
89
|
version: '0'
|
82
90
|
requirements: []
|
83
91
|
rubyforge_project:
|
84
|
-
rubygems_version:
|
92
|
+
rubygems_version: 2.0.5
|
85
93
|
signing_key:
|
86
|
-
specification_version:
|
94
|
+
specification_version: 4
|
87
95
|
summary: A fast XLSX parser using Nokogiri
|
88
96
|
test_files: []
|