saxerator 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Guardfile +1 -1
- data/README.md +14 -10
- data/lib/saxerator/configuration.rb +1 -2
- data/lib/saxerator/parser/nokogiri.rb +10 -15
- data/lib/saxerator/version.rb +1 -1
- data/saxerator.gemspec +9 -4
- data/spec/lib/saxerator_spec.rb +8 -0
- metadata +76 -20
data/Guardfile
CHANGED
@@ -5,7 +5,7 @@ guard :bundler do
|
|
5
5
|
watch(/^saxerator\.gemspec$/)
|
6
6
|
end
|
7
7
|
|
8
|
-
guard :rspec, :cli => '--color
|
8
|
+
guard :rspec, :cli => '--color' do
|
9
9
|
watch(%r{^spec/.+_spec\.rb$})
|
10
10
|
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
11
11
|
watch(%r{^spec/fixtures/.+\.xml$}) { :spec }
|
data/README.md
CHANGED
@@ -3,31 +3,35 @@ Saxerator
|
|
3
3
|
|
4
4
|
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
5
5
|
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
6
|
-
This approach is ideal for large xml files
|
6
|
+
This approach is ideal for large xml files containing a collection of elements that you can process
|
7
|
+
independently.
|
8
|
+
|
9
|
+
Each xml chunk is parsed into a JSON-like Ruby Hash structure for consumption.
|
7
10
|
|
8
11
|
Examples
|
9
12
|
--------
|
10
13
|
|
11
14
|
```ruby
|
12
|
-
Saxerator.parser(File.new("rss.xml"))
|
13
|
-
|
15
|
+
parser = Saxerator.parser(File.new("rss.xml"))
|
16
|
+
|
17
|
+
parser.for_tag(:item).each do |item|
|
18
|
+
# where the xml contains <item><title>...</title><author>...</author></item>
|
19
|
+
# item will look like {'title' => '...', 'author' => '...'}
|
20
|
+
puts "#{item['title']}: #{item['author']}"
|
14
21
|
end
|
22
|
+
|
23
|
+
# a String is returned here since the given element contains only character data
|
24
|
+
puts "First title: #{parser.for_tag(:title).first}"
|
15
25
|
```
|
16
26
|
|
17
27
|
Compatibility
|
18
28
|
-------------
|
19
|
-
|
29
|
+
Known compatible rubies:
|
20
30
|
|
21
31
|
* MRI 1.9.3-p125
|
22
32
|
* MRI 1.9.2-p318
|
23
33
|
* JRuby 1.6.7 (with JRUBY_OPTS=--1.9)
|
24
34
|
|
25
|
-
Saxerator may work with other versions with support for Fiber.
|
26
|
-
|
27
|
-
Known incompatible rubies:
|
28
|
-
|
29
|
-
* MRI 1.9.2-p290 (Fiber segfaults)
|
30
|
-
|
31
35
|
FAQ
|
32
36
|
---
|
33
37
|
Why the name 'Saxerator'?
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'fiber'
|
3
2
|
|
4
3
|
module Saxerator
|
5
4
|
module Parser
|
@@ -13,27 +12,23 @@ module Saxerator
|
|
13
12
|
end
|
14
13
|
|
15
14
|
def each(&block)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
result = fiber.resume
|
24
|
-
yield(result) unless result.nil?
|
25
|
-
end
|
26
|
-
rescue FiberError
|
27
|
-
end
|
15
|
+
document = Document.new(@config, @tag, block)
|
16
|
+
parser = ::Nokogiri::XML::SAX::Parser.new document
|
17
|
+
|
18
|
+
# Always have to start at the beginning of a File
|
19
|
+
@source.rewind if(@source.is_a?(File))
|
20
|
+
|
21
|
+
parser.parse(@source)
|
28
22
|
end
|
29
23
|
|
30
24
|
class Document < ::Nokogiri::XML::SAX::Document
|
31
25
|
attr_accessor :stack
|
32
26
|
|
33
|
-
def initialize(config, tag)
|
27
|
+
def initialize(config, tag, block)
|
34
28
|
@config = config
|
35
29
|
@tag = tag
|
36
30
|
@stack = []
|
31
|
+
@block = block
|
37
32
|
end
|
38
33
|
|
39
34
|
def start_element(name, attrs = [])
|
@@ -47,7 +42,7 @@ module Saxerator
|
|
47
42
|
last = stack.pop
|
48
43
|
stack.last.add_node last
|
49
44
|
elsif stack.size == 1
|
50
|
-
|
45
|
+
@block.yield(stack.pop.to_hash)
|
51
46
|
end
|
52
47
|
end
|
53
48
|
|
data/lib/saxerator/version.rb
CHANGED
data/saxerator.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
$:.push File.expand_path('../lib', __FILE__)
|
3
3
|
require 'saxerator/version'
|
4
|
-
require '
|
4
|
+
require 'rake' # for FileList
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'saxerator'
|
@@ -10,9 +10,13 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.email = ['bradley.schaefer@gmail.com']
|
11
11
|
s.homepage = 'https://github.com/soulcutter/saxerator'
|
12
12
|
s.summary = 'A SAX-based XML parser for parsing large files into manageable chunks'
|
13
|
-
s.description =
|
14
|
-
|
15
|
-
|
13
|
+
s.description = <<-eos
|
14
|
+
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
15
|
+
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
16
|
+
This approach is ideal for large xml files containing a collection of elements that you can process
|
17
|
+
independently.
|
18
|
+
eos
|
19
|
+
s.license = 'MIT'
|
16
20
|
|
17
21
|
s.rubyforge_project = 'saxerator'
|
18
22
|
|
@@ -35,6 +39,7 @@ Gem::Specification.new do |s|
|
|
35
39
|
|
36
40
|
s.add_runtime_dependency 'nokogiri'
|
37
41
|
|
42
|
+
s.add_development_dependency 'rake'
|
38
43
|
s.add_development_dependency 'rspec'
|
39
44
|
s.add_development_dependency 'guard'
|
40
45
|
s.add_development_dependency 'guard-bundler'
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -38,6 +38,14 @@ describe Saxerator do
|
|
38
38
|
subject.for_tag(:blurb).each { |x| results << x }
|
39
39
|
results.should == ['one', 'two', 'three']
|
40
40
|
end
|
41
|
+
|
42
|
+
it "should allow multiple operations on the same parser" do
|
43
|
+
# This exposes a bug where if a File is not reset only the first
|
44
|
+
# Enumerable method works as expected
|
45
|
+
subject.for_tag(:blurb).first.should == 'one'
|
46
|
+
subject.for_tag(:blurb).first.should == 'one'
|
47
|
+
end
|
48
|
+
|
41
49
|
end
|
42
50
|
|
43
51
|
context "with a file with nested elements" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,31 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
25
46
|
- !ruby/object:Gem::Dependency
|
26
47
|
name: rspec
|
27
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
28
49
|
none: false
|
29
50
|
requirements:
|
30
51
|
- - ! '>='
|
@@ -32,10 +53,15 @@ dependencies:
|
|
32
53
|
version: '0'
|
33
54
|
type: :development
|
34
55
|
prerelease: false
|
35
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
36
62
|
- !ruby/object:Gem::Dependency
|
37
63
|
name: guard
|
38
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
39
65
|
none: false
|
40
66
|
requirements:
|
41
67
|
- - ! '>='
|
@@ -43,10 +69,15 @@ dependencies:
|
|
43
69
|
version: '0'
|
44
70
|
type: :development
|
45
71
|
prerelease: false
|
46
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
47
78
|
- !ruby/object:Gem::Dependency
|
48
79
|
name: guard-bundler
|
49
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
50
81
|
none: false
|
51
82
|
requirements:
|
52
83
|
- - ! '>='
|
@@ -54,10 +85,15 @@ dependencies:
|
|
54
85
|
version: '0'
|
55
86
|
type: :development
|
56
87
|
prerelease: false
|
57
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
58
94
|
- !ruby/object:Gem::Dependency
|
59
95
|
name: guard-rspec
|
60
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
61
97
|
none: false
|
62
98
|
requirements:
|
63
99
|
- - ! '>='
|
@@ -65,10 +101,15 @@ dependencies:
|
|
65
101
|
version: '0'
|
66
102
|
type: :development
|
67
103
|
prerelease: false
|
68
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
69
110
|
- !ruby/object:Gem::Dependency
|
70
111
|
name: simplecov
|
71
|
-
requirement:
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
72
113
|
none: false
|
73
114
|
requirements:
|
74
115
|
- - ! '>='
|
@@ -76,10 +117,15 @@ dependencies:
|
|
76
117
|
version: '0'
|
77
118
|
type: :development
|
78
119
|
prerelease: false
|
79
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
80
126
|
- !ruby/object:Gem::Dependency
|
81
127
|
name: ipsum
|
82
|
-
requirement:
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
83
129
|
none: false
|
84
130
|
requirements:
|
85
131
|
- - ! '>='
|
@@ -87,8 +133,17 @@ dependencies:
|
|
87
133
|
version: '0'
|
88
134
|
type: :development
|
89
135
|
prerelease: false
|
90
|
-
version_requirements:
|
91
|
-
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
description: ! " Saxerator is a SAX-based xml parser designed for parsing very
|
143
|
+
large files into manageable chunks. Rather than\n dealing directly with SAX callback
|
144
|
+
methods, Saxerator gives you Enumerable access to chunks of an xml document.\n This
|
145
|
+
approach is ideal for large xml files containing a collection of elements that you
|
146
|
+
can process\n independently.\n"
|
92
147
|
email:
|
93
148
|
- bradley.schaefer@gmail.com
|
94
149
|
executables: []
|
@@ -116,7 +171,8 @@ files:
|
|
116
171
|
- .rvmrc
|
117
172
|
- .gitignore
|
118
173
|
homepage: https://github.com/soulcutter/saxerator
|
119
|
-
licenses:
|
174
|
+
licenses:
|
175
|
+
- MIT
|
120
176
|
post_install_message:
|
121
177
|
rdoc_options: []
|
122
178
|
require_paths:
|
@@ -126,7 +182,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
126
182
|
requirements:
|
127
183
|
- - ! '>='
|
128
184
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
185
|
+
version: '0'
|
130
186
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
187
|
none: false
|
132
188
|
requirements:
|
@@ -135,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
191
|
version: '0'
|
136
192
|
requirements: []
|
137
193
|
rubyforge_project: saxerator
|
138
|
-
rubygems_version: 1.8.
|
194
|
+
rubygems_version: 1.8.21
|
139
195
|
signing_key:
|
140
196
|
specification_version: 3
|
141
197
|
summary: A SAX-based XML parser for parsing large files into manageable chunks
|