xml_stream_parser 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/History.txt +7 -0
- data/LICENSE +20 -0
- data/Manifest.txt +9 -0
- data/README.rdoc +17 -0
- data/README.txt +85 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/init.rb +3 -0
- data/lib/instance_exec.rb +34 -0
- data/lib/xml_stream_parser.rb +203 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/xml_stream_parser_spec.rb +414 -0
- data/xml_stream_parser.gemspec +61 -0
- metadata +93 -0
data/.document
ADDED
data/.gitignore
ADDED
data/History.txt
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 mccraigmccraig
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= xml_stream_parser
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 mccraigmccraig. See LICENSE for details.
|
data/README.txt
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
= xml_stream_parser
|
2
|
+
|
3
|
+
this code was developed by trampoline systems [ http://trampolinesystems.com ]
|
4
|
+
as part of its sonar platform and released under a BSD licence for community use
|
5
|
+
|
6
|
+
http://www.github.com/mccraigmccraig/xml_stream_parser
|
7
|
+
|
8
|
+
== DESCRIPTION:
|
9
|
+
|
10
|
+
a basic library for pull parsing of large xml documents
|
11
|
+
|
12
|
+
== FEATURES:
|
13
|
+
|
14
|
+
- pull parsing of large xml documents with no dom construction
|
15
|
+
- provides simple operations for constructing higher level parsers
|
16
|
+
|
17
|
+
== PROBLEMS:
|
18
|
+
|
19
|
+
- it's very basic
|
20
|
+
- no validation
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'xml_stream_parser'
|
26
|
+
|
27
|
+
# parse xml stream data, possibly never ending, and do things with it
|
28
|
+
|
29
|
+
doc = <<-EOF
|
30
|
+
<people>
|
31
|
+
<person name="alice">likes cheese</person>
|
32
|
+
<person name="bob">likes music</person>
|
33
|
+
<person name="charles">likes alice</person>
|
34
|
+
</people>
|
35
|
+
EOF
|
36
|
+
|
37
|
+
# can be parsed with
|
38
|
+
|
39
|
+
people = {}
|
40
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
41
|
+
element "people" do |name,attrs|
|
42
|
+
elements "person" do |name, attrs|
|
43
|
+
people[attrs["name"]] = text
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
== REQUIREMENTS:
|
49
|
+
|
50
|
+
Ruby or JRuby
|
51
|
+
|
52
|
+
== INSTALL:
|
53
|
+
|
54
|
+
sudo gem sources -a http://gems.github.com
|
55
|
+
sudo gem install mccraigmccraig-xml_stream_parser
|
56
|
+
|
57
|
+
== LICENSE:
|
58
|
+
|
59
|
+
(The BSD License)
|
60
|
+
|
61
|
+
Copyright (c) 2009, Trampoline Systems Ltd, http://trampolinesystems.com/
|
62
|
+
All rights reserved.
|
63
|
+
|
64
|
+
Redistribution and use in source and binary forms, with or without modification,
|
65
|
+
are permitted provided that the following conditions are met:
|
66
|
+
|
67
|
+
* Redistributions of source code must retain the above copyright notice,
|
68
|
+
this list of conditions and the following disclaimer.
|
69
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
70
|
+
this list of conditions and the following disclaimer in the documentation
|
71
|
+
and/or other materials provided with the distribution.
|
72
|
+
* Neither the name of the <ORGANIZATION> nor the names of its contributors may
|
73
|
+
be used to endorse or promote products derived from this software without
|
74
|
+
specific prior written permission.
|
75
|
+
|
76
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
77
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
78
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
79
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
80
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
81
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
82
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
83
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
84
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
85
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "xml_stream_parser"
|
8
|
+
gem.summary = %Q{simple xml stream parser for ruby}
|
9
|
+
gem.description = %Q{easily parse xml documents of any size with ruby}
|
10
|
+
gem.email = "craig@trampolinesystems.com"
|
11
|
+
gem.homepage = "http://github.com/mccraigmccraig/xml_stream_parser"
|
12
|
+
gem.authors = ["mccraigmccraig"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.8"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "xml_stream_parser #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.0
|
data/init.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# defines Object.instance_exec to permit call of a Proc with params
|
2
|
+
# in the context of an instance : instance.instance_exec( foo, bar, &proc )
|
3
|
+
# taken from rails 2.2
|
4
|
+
#
|
5
|
+
class Object
|
6
|
+
unless defined? instance_exec # 1.9
|
7
|
+
module InstanceExecMethods #:nodoc:
|
8
|
+
@mutex = Mutex.new
|
9
|
+
class << self
|
10
|
+
attr_reader :mutex
|
11
|
+
end
|
12
|
+
end
|
13
|
+
include InstanceExecMethods
|
14
|
+
|
15
|
+
# Evaluate the block with the given arguments within the context of
|
16
|
+
# this object, so self is set to the method receiver.
|
17
|
+
#
|
18
|
+
# From Mauricio's http://eigenclass.org/hiki/bounded+space+instance_exec
|
19
|
+
def instance_exec(*args, &block)
|
20
|
+
method_name = InstanceExecMethods.mutex.synchronize do
|
21
|
+
n = 0
|
22
|
+
n += 1 while respond_to?(method_name = "__instance_exec#{n}")
|
23
|
+
InstanceExecMethods.module_eval { define_method(method_name, &block) }
|
24
|
+
method_name
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
send(method_name, *args)
|
29
|
+
ensure
|
30
|
+
InstanceExecMethods.module_eval { remove_method(method_name) } rescue nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rexml/parsers/pullparser'
|
4
|
+
require File.join( File.dirname(__FILE__), 'instance_exec')
|
5
|
+
|
6
|
+
module REXML
|
7
|
+
module Parsers
|
8
|
+
class PullEvent
|
9
|
+
# PullEvent is missing the end_document? method, even tho
|
10
|
+
# the BaseParser produces the event
|
11
|
+
def end_document?
|
12
|
+
@contents[0] == :end_document
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class XmlStreamParser
|
19
|
+
|
20
|
+
VERSION = "0.2.0"
|
21
|
+
|
22
|
+
class Sentinel
|
23
|
+
def to_s
|
24
|
+
self.class.to_s
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Nothing < Sentinel
|
29
|
+
end
|
30
|
+
|
31
|
+
class EndContext < Sentinel
|
32
|
+
end
|
33
|
+
|
34
|
+
module Sentinels
|
35
|
+
NOTHING = Nothing.new
|
36
|
+
END_CONTEXT = EndContext.new
|
37
|
+
end
|
38
|
+
|
39
|
+
include Sentinels
|
40
|
+
|
41
|
+
# the REXML::Parsers::PullParser used internally
|
42
|
+
attr_reader :pull_parser
|
43
|
+
attr_reader :dsl
|
44
|
+
|
45
|
+
# parse retaining block context... permitting
|
46
|
+
# the parse to easily be split over multiple methods
|
47
|
+
def parse(data, &block)
|
48
|
+
parse_dsl(data, false, &block)
|
49
|
+
end
|
50
|
+
|
51
|
+
# parse with optional dsl mode
|
52
|
+
# if dsl is true [ default ] then the block will be instance_exec'd in
|
53
|
+
# the context of the parser, if dsl is false the block will be called
|
54
|
+
# retaining it's current context
|
55
|
+
def parse_dsl(data, dsl=true, &block)
|
56
|
+
io = case data
|
57
|
+
when IO
|
58
|
+
data
|
59
|
+
when StringIO
|
60
|
+
data
|
61
|
+
when String
|
62
|
+
StringIO.new(data)
|
63
|
+
end
|
64
|
+
|
65
|
+
@pull_parser = REXML::Parsers::PullParser.new( io )
|
66
|
+
@dsl = dsl
|
67
|
+
if self.dsl
|
68
|
+
self.instance_exec(&block)
|
69
|
+
else
|
70
|
+
block.call(self)
|
71
|
+
end
|
72
|
+
ensure
|
73
|
+
@pull_parser = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
# find an element with name in element_names : inter-element whitespace is ignored
|
77
|
+
# - encountering end_element terminates and returns END_CONTEXT, leaving parser on end_element
|
78
|
+
# - encountering end_document terminates and returns END_CONTEXT
|
79
|
+
# - encountering start_element for an element not in element_names NOTHING, parser on start_element
|
80
|
+
# - encountering start_element for an element in element_names returns element name, parser on start_element
|
81
|
+
def find_element( element_names )
|
82
|
+
element_names = [ *element_names ]
|
83
|
+
|
84
|
+
while( true )
|
85
|
+
e = @pull_parser.peek
|
86
|
+
if e.start_element?
|
87
|
+
if element_names.include?( e[0] )
|
88
|
+
return e[0]
|
89
|
+
else
|
90
|
+
return NOTHING
|
91
|
+
end
|
92
|
+
elsif e.end_element?
|
93
|
+
return END_CONTEXT
|
94
|
+
elsif e.end_document?
|
95
|
+
return END_CONTEXT
|
96
|
+
elsif e.text?
|
97
|
+
# ignore whitespace between elements
|
98
|
+
raise "unexpected text content: #{e.inspect}" if e[0] !~ /[[:space:]]/
|
99
|
+
@pull_parser.pull
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# parse and throw away content until we escape the current context, either
|
105
|
+
# through end_element, or end_document
|
106
|
+
def discard()
|
107
|
+
element_stack = []
|
108
|
+
|
109
|
+
while(true)
|
110
|
+
e = @pull_parser.peek
|
111
|
+
name = e[0]
|
112
|
+
if e.start_element?
|
113
|
+
element_stack.push(name)
|
114
|
+
elsif e.end_element?
|
115
|
+
return nil if element_stack.size == 0
|
116
|
+
raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}" if name != element_stack.last
|
117
|
+
element_stack.pop
|
118
|
+
elsif e.end_document?
|
119
|
+
return nil if element_stack.size ==0
|
120
|
+
raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}"
|
121
|
+
end
|
122
|
+
@pull_parser.pull
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# consume an element
|
127
|
+
# - if optional is false the element must be present
|
128
|
+
# - if optional is true and the element is not present then NOTHING/END_CONTEXT
|
129
|
+
# will be returned
|
130
|
+
# - consumes start_element, calls block on content, consumes end_element
|
131
|
+
def element( element_names, optional=false, &block )
|
132
|
+
element_names = [ *element_names ]
|
133
|
+
|
134
|
+
f = find_element(element_names)
|
135
|
+
e = @pull_parser.peek
|
136
|
+
|
137
|
+
if f.is_a? Sentinel
|
138
|
+
if optional
|
139
|
+
return f
|
140
|
+
else
|
141
|
+
raise "expected start element: <#{element_names.join('|')}, got: #{e.inspect}>"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
e = @pull_parser.pull # consume the start tag
|
146
|
+
name = e[0]
|
147
|
+
attrs = e[1]
|
148
|
+
|
149
|
+
# block should consume all element content, and leave parser on end_element, or
|
150
|
+
# whitespace before it
|
151
|
+
err=false
|
152
|
+
begin
|
153
|
+
if self.dsl
|
154
|
+
v = self.instance_exec(name, attrs, &block)
|
155
|
+
else
|
156
|
+
v = block.call(name,attrs)
|
157
|
+
end
|
158
|
+
return v if ! v.is_a? Sentinel # do not propagate Sentinels. they confuse callers
|
159
|
+
rescue
|
160
|
+
err=true # note that we are erroring, so as not to mask the exception from ensure block
|
161
|
+
raise
|
162
|
+
ensure
|
163
|
+
if !err # if return was called in the block, ensure we consume the end_element
|
164
|
+
e = @pull_parser.pull
|
165
|
+
e = @pull_parser.pull if e.text? && e[0] =~ /[[:space:]]/
|
166
|
+
raise "expected end tag: #{name}, got: #{e.inspect}" if ! e.end_element? || e[0] != name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# find and consume elements, calling block on each one found
|
172
|
+
# return result of last find : NOTHING or END_CONTEXT sentinel
|
173
|
+
def elements( element_names, &block )
|
174
|
+
while true
|
175
|
+
break if element(element_names, true, &block).is_a? Sentinel
|
176
|
+
end
|
177
|
+
|
178
|
+
return nil
|
179
|
+
end
|
180
|
+
|
181
|
+
# consume text element
|
182
|
+
# returns the text, or nil if none
|
183
|
+
def text( &block )
|
184
|
+
e = @pull_parser.peek
|
185
|
+
raise "expected text node, got #{e.inspect}" if ! e.text? && ! e.end_element?
|
186
|
+
text = if e.text?
|
187
|
+
@pull_parser.pull
|
188
|
+
e[0]
|
189
|
+
else
|
190
|
+
nil
|
191
|
+
end
|
192
|
+
if block
|
193
|
+
if self.dsl
|
194
|
+
text = self.instance_exec( text , &block)
|
195
|
+
else
|
196
|
+
text = block.call(text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
text
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
#!/usr/bin/env spec
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'spec'
|
5
|
+
require 'set'
|
6
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
7
|
+
|
8
|
+
describe XmlStreamParser do
|
9
|
+
it "should work on a StringIO" do
|
10
|
+
io = StringIO.new( "<foo/>")
|
11
|
+
XmlStreamParser.new.parse_dsl(io) do
|
12
|
+
element("foo") do |name,attrs|
|
13
|
+
name.should ==("foo")
|
14
|
+
name
|
15
|
+
end
|
16
|
+
end.should ==("foo")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse a simple one element document" do
|
20
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
|
21
|
+
called = false
|
22
|
+
element("foo") { |name,attrs|
|
23
|
+
called = true
|
24
|
+
name.should ==("foo")
|
25
|
+
attrs.should ==({})
|
26
|
+
}
|
27
|
+
called.should ==(true)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "find_element" do
|
32
|
+
|
33
|
+
it "should skip whitespace to find an element" do
|
34
|
+
XmlStreamParser.new.parse_dsl( " \n\n\n<foo></foo>") do
|
35
|
+
name = find_element("foo")
|
36
|
+
name.should ==("foo")
|
37
|
+
e = pull_parser.pull
|
38
|
+
e.start_element?.should ==(true)
|
39
|
+
e[0].should == "foo"
|
40
|
+
e[1].should == {}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return NOTHING on unexpected elements" do
|
45
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>") do
|
46
|
+
find_element("bar")
|
47
|
+
end.should ==(XmlStreamParser::NOTHING)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should match one of multiple elements" do
|
51
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
|
52
|
+
find_element( ["bar","foo" ] )
|
53
|
+
end.should ==("foo")
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
it "should return END_CONTEXT if element context terminates" do
|
58
|
+
called = false
|
59
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
60
|
+
element("foo") do |name,attrs|
|
61
|
+
name.should ==("foo")
|
62
|
+
|
63
|
+
n = find_element("bar")
|
64
|
+
n.should ==(XmlStreamParser::END_CONTEXT)
|
65
|
+
|
66
|
+
e = pull_parser.peek
|
67
|
+
e.end_element?.should ==(true)
|
68
|
+
e[0].should ==("foo")
|
69
|
+
|
70
|
+
called = true
|
71
|
+
end
|
72
|
+
end
|
73
|
+
called.should ==(true)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should return END_CONTEXT if document ends" do
|
77
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>') do
|
78
|
+
element("foo") do |name,attrs|
|
79
|
+
end
|
80
|
+
f = find_element("bar")
|
81
|
+
f.should ==( XmlStreamParser::END_CONTEXT )
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "discard" do
|
87
|
+
|
88
|
+
it "should discard text content of an element" do
|
89
|
+
XmlStreamParser.new.parse_dsl( '<foo>blah blah blah</foo>') do
|
90
|
+
element("foo") do |name,attrs|
|
91
|
+
discard
|
92
|
+
"foo"
|
93
|
+
end
|
94
|
+
end.should ==("foo")
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should discard element content of an element" do
|
98
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/><foobar></foobar></foo>') do
|
99
|
+
element("foo") do |name,attrs|
|
100
|
+
discard
|
101
|
+
"foo"
|
102
|
+
end
|
103
|
+
end.should ==("foo")
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should discard mixed content of an element" do
|
107
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/>blah blah<foobar></foobar> blah blah </foo>') do
|
108
|
+
element("foo") do |name,attrs|
|
109
|
+
discard
|
110
|
+
"foo"
|
111
|
+
end
|
112
|
+
end.should ==("foo")
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "element" do
|
118
|
+
|
119
|
+
it "should return NOTHING if optional and element not found" do
|
120
|
+
XmlStreamParser.new.parse_dsl( '<foo><foofoo/></foo>' ) do
|
121
|
+
element("foo") do |name,attrs|
|
122
|
+
element("bar",true) do |name,attrs|
|
123
|
+
"bar"
|
124
|
+
end.should ==(XmlStreamParser::NOTHING)
|
125
|
+
element("foofoo") do |name,attrs|
|
126
|
+
"foofoo"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end.should ==("foofoo" )
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should return END_CONTEXT if optional and context ends" do
|
133
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
134
|
+
element("foo") do |name,attrs|
|
135
|
+
element("bar",true) do |name,attrs|
|
136
|
+
"bar"
|
137
|
+
end.should ==(XmlStreamParser::END_CONTEXT)
|
138
|
+
"foofoo"
|
139
|
+
end
|
140
|
+
end.should ==("foofoo")
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should not propagate sentinel values up the call hierarchy" do
|
144
|
+
called = false
|
145
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
146
|
+
element("foo") do |name,attrs|
|
147
|
+
called = true
|
148
|
+
element("bar",true) do |name,attrs|
|
149
|
+
"bar"
|
150
|
+
end.should ==(XmlStreamParser::END_CONTEXT)
|
151
|
+
end
|
152
|
+
end.should_not ==(XmlStreamParser::END_CONTEXT)
|
153
|
+
called.should == (true)
|
154
|
+
end
|
155
|
+
|
156
|
+
class Foo
|
157
|
+
def self.parse_bar( p )
|
158
|
+
p.element("bar") do |name,attrs|
|
159
|
+
return "barbar"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should consume the end tag even if block calls return" do
|
165
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
|
166
|
+
element( "foo" ) do |name, attrs|
|
167
|
+
Foo.parse_bar( self )
|
168
|
+
end
|
169
|
+
end.should ==("barbar" )
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should consume the end tag even if block calls break" do
|
173
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
|
174
|
+
element( "foo" ) do |name, attrs|
|
175
|
+
element( "bar" ) do |name, attrs|
|
176
|
+
break
|
177
|
+
end
|
178
|
+
"foo"
|
179
|
+
end
|
180
|
+
end.should ==( "foo" )
|
181
|
+
end
|
182
|
+
|
183
|
+
it "should raise on premature document termination" do
|
184
|
+
lambda {
|
185
|
+
XmlStreamParser.new.parse_dsl( '<foo>' ) do
|
186
|
+
element("foo") do |name,attrs|
|
187
|
+
element("bar",false) do |name,attrs|
|
188
|
+
"bar"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
}.should raise_error(RuntimeError)
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should raise on premature context termination" do
|
196
|
+
lambda {
|
197
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
198
|
+
element("foo") do |name,attrs|
|
199
|
+
element("bar",false) do |name,attrs|
|
200
|
+
"bar"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
}.should raise_error(RuntimeError)
|
205
|
+
end
|
206
|
+
|
207
|
+
it "should consume an element, giving name and attributes to the provided block and returning block result" do
|
208
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
|
209
|
+
element( "foo" ) do |name, attrs|
|
210
|
+
name.should ==("foo")
|
211
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
212
|
+
"blockresult"
|
213
|
+
end.should ==("blockresult")
|
214
|
+
e = pull_parser.peek
|
215
|
+
e.end_document?.should ==(true)
|
216
|
+
"foofoo"
|
217
|
+
end.should ==("foofoo")
|
218
|
+
end
|
219
|
+
|
220
|
+
it "should consume one of many element names, giving name and attrs to block and returning block result" do
|
221
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
|
222
|
+
element( ["bar","foo"] ) do |name, attrs|
|
223
|
+
name.should ==("foo")
|
224
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
225
|
+
"blockresult"
|
226
|
+
end
|
227
|
+
end.should ==("blockresult")
|
228
|
+
end
|
229
|
+
|
230
|
+
it "should ignore whitespace inside element" do
|
231
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"> \n \n</foo>') do
|
232
|
+
element( "foo" ) do |name, attrs|
|
233
|
+
name.should ==("foo")
|
234
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
235
|
+
"blockresult"
|
236
|
+
end.should ==("blockresult")
|
237
|
+
e = pull_parser.peek
|
238
|
+
e.end_document?.should ==(true)
|
239
|
+
"foofoo"
|
240
|
+
end.should ==("foofoo")
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
describe "text" do
|
246
|
+
it "should consume an element with text content and give it's name, attrs, text to the block and return the block result" do
|
247
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar">hello mum</foo>') do
|
248
|
+
element( "foo" ) do |name, attrs|
|
249
|
+
name.should ==("foo")
|
250
|
+
attrs.should ==({ "a"=>"bar" })
|
251
|
+
text
|
252
|
+
end
|
253
|
+
end.should ==("hello mum")
|
254
|
+
end
|
255
|
+
|
256
|
+
it "should raise if the element contains element content" do
|
257
|
+
lambda {
|
258
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar"><bar/></foo>') do
|
259
|
+
element("foo") do |name,attrs|
|
260
|
+
text()
|
261
|
+
end
|
262
|
+
end
|
263
|
+
}.should raise_error(RuntimeError)
|
264
|
+
end
|
265
|
+
|
266
|
+
it "should raise if the element contains mixed content" do
|
267
|
+
lambda {
|
268
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar">some <bar/> text</foo>') do
|
269
|
+
element("foo") do |name,attrs|
|
270
|
+
text()
|
271
|
+
end
|
272
|
+
end
|
273
|
+
}.should raise_error(RuntimeError)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
describe "elements" do
|
278
|
+
it "should consume multiple elements" do
|
279
|
+
el_counts = Hash.new(0)
|
280
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/><bar/><foobar/></foo>') do
|
281
|
+
element("foo") do |name,attrs|
|
282
|
+
elements( ["bar","foobar"] ) do |name,attrs|
|
283
|
+
el_counts[name] += 1
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
el_counts.should ==({ "bar"=>2, "foobar"=>1 })
|
288
|
+
end
|
289
|
+
|
290
|
+
it "should not complain if there are no matching elements" do
|
291
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>') do
|
292
|
+
element("foo") do |name,attrs|
|
293
|
+
elements( ["bar","foobar"] ) do |name,attrs|
|
294
|
+
el_counts[name] += 1
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
describe "non-DSL mode" do
|
303
|
+
it "should pass the parser to the parse() block" do
|
304
|
+
def foo()
|
305
|
+
"foo"
|
306
|
+
end
|
307
|
+
|
308
|
+
XmlStreamParser.new.parse( '<foo></foo>') do |p|
|
309
|
+
p.should_not ==(nil)
|
310
|
+
foo()
|
311
|
+
end.should =="foo"
|
312
|
+
end
|
313
|
+
|
314
|
+
it "should retain contenxt for element blocks" do
|
315
|
+
def foo()
|
316
|
+
"foo"
|
317
|
+
end
|
318
|
+
XmlStreamParser.new.parse( '<foo></foo>') do |p|
|
319
|
+
p.element('foo') do |name,attrs|
|
320
|
+
name.should =='foo'
|
321
|
+
attrs.should == {}
|
322
|
+
foo()
|
323
|
+
end
|
324
|
+
end.should =="foo"
|
325
|
+
end
|
326
|
+
|
327
|
+
it "should retain context for text blocks" do
|
328
|
+
def bar()
|
329
|
+
"barbar"
|
330
|
+
end
|
331
|
+
XmlStreamParser.new.parse( '<foo>bar</foo>') do |p|
|
332
|
+
p.element('foo') do |name,attrs|
|
333
|
+
p.text{ |t| t.should =='bar' ; t }
|
334
|
+
bar()
|
335
|
+
end
|
336
|
+
end.should =='barbar'
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
describe "some more complex examples" do
|
341
|
+
|
342
|
+
it "should parse a list of people" do
|
343
|
+
doc = <<-EOF
|
344
|
+
<people>
|
345
|
+
<person name="alice">likes cheese</person>
|
346
|
+
<person name="bob">likes music</person>
|
347
|
+
<person name="charles">likes alice</person>
|
348
|
+
</people>
|
349
|
+
EOF
|
350
|
+
|
351
|
+
people = {}
|
352
|
+
|
353
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
354
|
+
element("people") do |name,attrs|
|
355
|
+
elements("person") do |name, attrs|
|
356
|
+
people[attrs["name"]] = text
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
people.should ==({ "alice"=>"likes cheese",
|
362
|
+
"bob"=>"likes music",
|
363
|
+
"charles"=>"likes alice"})
|
364
|
+
end
|
365
|
+
|
366
|
+
it "should parse a list of people and their friends" do
|
367
|
+
doc = <<-EOF
|
368
|
+
<people>
|
369
|
+
<person name="alice">
|
370
|
+
<friend name="bob"/>
|
371
|
+
<likes>cheese</likes>
|
372
|
+
<friend name="charles"/>
|
373
|
+
</person>
|
374
|
+
<person name="bob">
|
375
|
+
<friend name="alice"/>
|
376
|
+
<likes>wolf dogs</likes>
|
377
|
+
</person>
|
378
|
+
<person name="charles">
|
379
|
+
<friend name="alice"/>
|
380
|
+
<likes>bach</likes>
|
381
|
+
</person>
|
382
|
+
</people>
|
383
|
+
EOF
|
384
|
+
|
385
|
+
people = Hash.new{ |h,k| h[k] = {:friends=>Set.new([]), :likes=>Set.new([]) } }
|
386
|
+
|
387
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
388
|
+
element("people") do |name,attrs|
|
389
|
+
elements("person") do |name, attrs|
|
390
|
+
person_name = attrs["name"]
|
391
|
+
people[person_name]
|
392
|
+
|
393
|
+
elements(["friend","likes"]) do |name,attrs|
|
394
|
+
case name
|
395
|
+
when "friend" then
|
396
|
+
people[person_name][:friends] << attrs["name"]
|
397
|
+
when "likes" then
|
398
|
+
people[person_name][:likes] << text
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
people.should ==( {
|
406
|
+
"alice"=>{ :friends=>Set.new(["bob","charles"]), :likes=>Set.new(["cheese"])},
|
407
|
+
"bob"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["wolf dogs"])},
|
408
|
+
"charles"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["bach"])}
|
409
|
+
})
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
414
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{xml_stream_parser}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["mccraigmccraig"]
|
12
|
+
s.date = %q{2010-06-24}
|
13
|
+
s.description = %q{easily parse xml documents of any size with ruby}
|
14
|
+
s.email = %q{craig@trampolinesystems.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc",
|
18
|
+
"README.txt"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".document",
|
22
|
+
".gitignore",
|
23
|
+
"History.txt",
|
24
|
+
"LICENSE",
|
25
|
+
"Manifest.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"README.txt",
|
28
|
+
"Rakefile",
|
29
|
+
"VERSION",
|
30
|
+
"init.rb",
|
31
|
+
"lib/instance_exec.rb",
|
32
|
+
"lib/xml_stream_parser.rb",
|
33
|
+
"spec/spec.opts",
|
34
|
+
"spec/spec_helper.rb",
|
35
|
+
"spec/xml_stream_parser_spec.rb",
|
36
|
+
"xml_stream_parser.gemspec"
|
37
|
+
]
|
38
|
+
s.homepage = %q{http://github.com/mccraigmccraig/xml_stream_parser}
|
39
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
40
|
+
s.require_paths = ["lib"]
|
41
|
+
s.rubygems_version = %q{1.3.6}
|
42
|
+
s.summary = %q{simple xml stream parser for ruby}
|
43
|
+
s.test_files = [
|
44
|
+
"spec/spec_helper.rb",
|
45
|
+
"spec/xml_stream_parser_spec.rb"
|
46
|
+
]
|
47
|
+
|
48
|
+
if s.respond_to? :specification_version then
|
49
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
50
|
+
s.specification_version = 3
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.8"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rspec>, [">= 1.2.8"])
|
56
|
+
end
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<rspec>, [">= 1.2.8"])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml_stream_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- mccraigmccraig
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-06-24 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 2
|
30
|
+
- 8
|
31
|
+
version: 1.2.8
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: easily parse xml documents of any size with ruby
|
35
|
+
email: craig@trampolinesystems.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- LICENSE
|
42
|
+
- README.rdoc
|
43
|
+
- README.txt
|
44
|
+
files:
|
45
|
+
- .document
|
46
|
+
- .gitignore
|
47
|
+
- History.txt
|
48
|
+
- LICENSE
|
49
|
+
- Manifest.txt
|
50
|
+
- README.rdoc
|
51
|
+
- README.txt
|
52
|
+
- Rakefile
|
53
|
+
- VERSION
|
54
|
+
- init.rb
|
55
|
+
- lib/instance_exec.rb
|
56
|
+
- lib/xml_stream_parser.rb
|
57
|
+
- spec/spec.opts
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
- spec/xml_stream_parser_spec.rb
|
60
|
+
- xml_stream_parser.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/mccraigmccraig/xml_stream_parser
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options:
|
67
|
+
- --charset=UTF-8
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
segments:
|
82
|
+
- 0
|
83
|
+
version: "0"
|
84
|
+
requirements: []
|
85
|
+
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.3.6
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: simple xml stream parser for ruby
|
91
|
+
test_files:
|
92
|
+
- spec/spec_helper.rb
|
93
|
+
- spec/xml_stream_parser_spec.rb
|