dullard 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 76b6de08a6a72d691a3f3deb06f8a82018c90049
4
+ data.tar.gz: d56a5c10a24db975ce01c0e3e352739e128508f1
5
+ SHA512:
6
+ metadata.gz: 943f8b15089aab39fb627fc84a45523e093b60e6980666cb139be7102d99bb2adf3f71a2ed32d5d693784d4612199390de7a62e7dd06de4d80495a7de832ccbc
7
+ data.tar.gz: 3402538efd1a687823a4acc8e4a9fe811d381de1e20d40d354746cacb2e2ff557b6eb6259c5446f05b89375c61cb81efe0516fdc7cf26ec22800ed92325c45eb
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Edward Kaplan
1
+ Copyright (c) 2013 Edward Kaplan
2
2
 
3
3
  MIT License
4
4
 
@@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
19
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
20
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
21
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,8 +1,11 @@
1
- Super simple, super fast XLSX parsing.
1
+ Super simple, super fast stream-based XLSX parsing. Suitable for very large
2
+ files.
3
+
4
+ Requires Ruby 2.0.
2
5
 
3
6
  require 'dullard'
4
7
 
5
- workbook = Dullard::Workbook.new "file"
6
- workbook.sheet[0].rows.each do |row|
8
+ workbook = Dullard::Workbook.new "file.xlsx"
9
+ workbook.sheets[0].rows.each do |row|
7
10
  puts row # => ["a","b","c",...]
8
11
  end
@@ -11,7 +11,7 @@ class Dullard::Workbook
11
11
 
12
12
  def sheets
13
13
  workbook = Nokogiri::XML::Document.parse(@zipfs.file.open("xl/workbook.xml"))
14
- @sheets = workbook.css("sheet").map {|n| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId")) }
14
+ @sheets = workbook.css("sheet").each_with_index.map {|n,i| Dullard::Sheet.new(self, n.attr("name"), n.attr("sheetId"), i+1) }
15
15
  end
16
16
 
17
17
  def string_table
@@ -44,10 +44,12 @@ end
44
44
 
45
45
  class Dullard::Sheet
46
46
  attr_reader :name, :workbook
47
- def initialize(workbook, name, id)
47
+ def initialize(workbook, name, id, index)
48
48
  @workbook = workbook
49
49
  @name = name
50
50
  @id = id
51
+ @index = index
52
+ @file = @workbook.zipfs.file.open(path) if @workbook.zipfs.file.exist?(path)
51
53
  end
52
54
 
53
55
  def string_lookup(i)
@@ -55,21 +57,84 @@ class Dullard::Sheet
55
57
  end
56
58
 
57
59
  def rows
58
- Enumerator.new do |y|
60
+ Enumerator.new(rows_size) do |y|
61
+ next unless @file
62
+ @file.rewind
59
63
  shared = false
60
64
  row = nil
61
- Nokogiri::XML::Reader(@workbook.zipfs.file.open("xl/worksheets/sheet#{@id}.xml")).each do |node|
62
- if node.name == "row" and node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
63
- row = []
64
- elsif node.name == "row" and node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
65
- y << row
66
- elsif node.name == "c" and node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
65
+ column = nil
66
+ Nokogiri::XML::Reader(@file).each do |node|
67
+ case node.node_type
68
+ when Nokogiri::XML::Reader::TYPE_ELEMENT
69
+ case node.name
70
+ when "row"
71
+ row = []
72
+ column = 0
73
+ next
74
+ when "c"
75
+ rcolumn = node.attributes["r"]
76
+ if rcolumn
77
+ rcolumn.delete!("0-9")
78
+ while column < self.class.column_names.size and rcolumn != self.class.column_names[column]
79
+ row << nil
80
+ column += 1
81
+ end
82
+ end
67
83
  shared = (node.attribute("t") == "s")
68
- elsif node.value?
69
- row << (shared ? string_lookup(node.value.to_i) : node.value)
84
+ column += 1
85
+ next
86
+ end
87
+ when Nokogiri::XML::Reader::TYPE_END_ELEMENT
88
+ if node.name == "row"
89
+ y << row
90
+ next
91
+ end
92
+ end
93
+ value = node.value
94
+ if value
95
+ row << (shared ? string_lookup(value.to_i) : value)
70
96
  end
71
97
  end
72
98
  end
73
99
  end
74
- end
75
100
 
101
+ # Returns A to ZZZ.
102
+ def self.column_names
103
+ if @column_names
104
+ @column_names
105
+ else
106
+ proc = Proc.new do |prev|
107
+ ("#{prev}A".."#{prev}Z").to_a
108
+ end
109
+ x = proc.call("")
110
+ y = x.map(&proc).flatten
111
+ z = y.map(&proc).flatten
112
+ @column_names = x + y + z
113
+ end
114
+ end
115
+
116
+ private
117
+ def path
118
+ "xl/worksheets/sheet#{@index}.xml"
119
+ end
120
+
121
+ def rows_size
122
+ if defined? @rows_size
123
+ @rows_size
124
+ elsif @file
125
+ @file.rewind
126
+ Nokogiri::XML::Reader(@file).each do |node|
127
+ if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
128
+ case node.name
129
+ when "dimension"
130
+ if ref = node.attributes["ref"]
131
+ break @rows_size = ref.scan(/\d+$/).first.to_i
132
+ end
133
+ when "sheetData"
134
+ break @rows_size = nil
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -1,3 +1,3 @@
1
1
  module Dullard
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -2,7 +2,7 @@ require 'dullard'
2
2
 
3
3
  describe "dullard" do
4
4
  before(:each) do
5
- @file = File.open("testfiles/SHAPE5_CorePrePost.xlsx")
5
+ @file = File.open(File.expand_path("../test.xlsx", __FILE__))
6
6
  @xlsx = Dullard::Workbook.new @file
7
7
  end
8
8
  it "can open a file" do
@@ -10,19 +10,25 @@ describe "dullard" do
10
10
  end
11
11
 
12
12
  it "can find sheets" do
13
- @xlsx.sheets.count.should == 3
13
+ @xlsx.sheets.count.should == 1
14
14
  end
15
15
 
16
- it "can read rows" do
17
- @xlsx.sheets[0].rows.first.should == ["S.NO", "PAPER ID", "TOPIC", "AUTHOR DETAILS", "DATE OF PRESENTATION"]
16
+ it "reads the right number of columns, even with blanks" do
17
+ rows = @xlsx.sheets[0].rows
18
+ rows.next.count.should == 300
19
+ rows.next.count.should == 9
20
+ rows.next.count.should == 1
18
21
  end
19
22
 
20
23
  it "reads the right number of rows" do
21
24
  count = 0
22
25
  @xlsx.sheets[0].rows.each do |row|
23
- puts row
24
26
  count += 1
25
27
  end
26
28
  count.should == 115
27
29
  end
30
+
31
+ it "reads the right number of rows from the metadata when present" do
32
+ @xlsx.sheets[0].rows.size.should == 115
33
+ end
28
34
  end
data/specs/test.xlsx ADDED
Binary file
metadata CHANGED
@@ -1,49 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dullard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
5
- prerelease:
4
+ version: 0.0.5
6
5
  platform: ruby
7
6
  authors:
8
7
  - Ted Kaplan
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-03-24 00:00:00.000000000 Z
11
+ date: 2013-07-21 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
16
- requirement: &70311325924940 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
21
19
  version: '2.6'
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *70311325924940
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.6'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: nokogiri
27
- requirement: &70311325924440 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
31
  - - ~>
31
32
  - !ruby/object:Gem::Version
32
33
  version: '1.5'
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *70311325924440
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: rubyzip
38
- requirement: &70311325923940 !ruby/object:Gem::Requirement
39
- none: false
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
45
  - - ~>
42
46
  - !ruby/object:Gem::Version
43
47
  version: 0.9.6
44
48
  type: :runtime
45
49
  prerelease: false
46
- version_requirements: *70311325923940
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.6
47
55
  description:
48
56
  email:
49
57
  - ted@shlashdot.org
@@ -61,28 +69,28 @@ files:
61
69
  - lib/dullard/reader.rb
62
70
  - lib/dullard/version.rb
63
71
  - specs/dullard_spec.rb
72
+ - specs/test.xlsx
64
73
  homepage: http://github.com/thirtyseven/dullard
65
74
  licenses: []
75
+ metadata: {}
66
76
  post_install_message:
67
77
  rdoc_options: []
68
78
  require_paths:
69
79
  - lib
70
80
  required_ruby_version: !ruby/object:Gem::Requirement
71
- none: false
72
81
  requirements:
73
- - - ! '>='
82
+ - - '>='
74
83
  - !ruby/object:Gem::Version
75
84
  version: '0'
76
85
  required_rubygems_version: !ruby/object:Gem::Requirement
77
- none: false
78
86
  requirements:
79
- - - ! '>='
87
+ - - '>='
80
88
  - !ruby/object:Gem::Version
81
89
  version: '0'
82
90
  requirements: []
83
91
  rubyforge_project:
84
- rubygems_version: 1.8.17
92
+ rubygems_version: 2.0.5
85
93
  signing_key:
86
- specification_version: 3
94
+ specification_version: 4
87
95
  summary: A fast XLSX parser using Nokogiri
88
96
  test_files: []