xml_stream_parser 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/History.txt +7 -0
- data/LICENSE +20 -0
- data/Manifest.txt +9 -0
- data/README.rdoc +17 -0
- data/README.txt +85 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/init.rb +3 -0
- data/lib/instance_exec.rb +34 -0
- data/lib/xml_stream_parser.rb +203 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/xml_stream_parser_spec.rb +414 -0
- data/xml_stream_parser.gemspec +61 -0
- metadata +93 -0
data/.document
ADDED
data/.gitignore
ADDED
data/History.txt
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 mccraigmccraig
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= xml_stream_parser
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 mccraigmccraig. See LICENSE for details.
|
data/README.txt
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
= xml_stream_parser
|
2
|
+
|
3
|
+
this code was developed by trampoline systems [ http://trampolinesystems.com ]
|
4
|
+
as part of its sonar platform and released under a BSD licence for community use
|
5
|
+
|
6
|
+
http://www.github.com/mccraigmccraig/xml_stream_parser
|
7
|
+
|
8
|
+
== DESCRIPTION:
|
9
|
+
|
10
|
+
a basic library for pull parsing of large xml documents
|
11
|
+
|
12
|
+
== FEATURES:
|
13
|
+
|
14
|
+
- pull parsing of large xml documents with no dom construction
|
15
|
+
- provides simple operations for constructing higher level parsers
|
16
|
+
|
17
|
+
== PROBLEMS:
|
18
|
+
|
19
|
+
- it's very basic
|
20
|
+
- no validation
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'xml_stream_parser'
|
26
|
+
|
27
|
+
# parse xml stream data, possibly never ending, and do things with it
|
28
|
+
|
29
|
+
doc = <<-EOF
|
30
|
+
<people>
|
31
|
+
<person name="alice">likes cheese</person>
|
32
|
+
<person name="bob">likes music</person>
|
33
|
+
<person name="charles">likes alice</person>
|
34
|
+
</people>
|
35
|
+
EOF
|
36
|
+
|
37
|
+
# can be parsed with
|
38
|
+
|
39
|
+
people = {}
|
40
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
41
|
+
element "people" do |name,attrs|
|
42
|
+
elements "person" do |name, attrs|
|
43
|
+
people[attrs["name"]] = text
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
== REQUIREMENTS:
|
49
|
+
|
50
|
+
Ruby or JRuby
|
51
|
+
|
52
|
+
== INSTALL:
|
53
|
+
|
54
|
+
sudo gem sources -a http://gems.github.com
|
55
|
+
sudo gem install mccraigmccraig-xml_stream_parser
|
56
|
+
|
57
|
+
== LICENSE:
|
58
|
+
|
59
|
+
(The BSD License)
|
60
|
+
|
61
|
+
Copyright (c) 2009, Trampoline Systems Ltd, http://trampolinesystems.com/
|
62
|
+
All rights reserved.
|
63
|
+
|
64
|
+
Redistribution and use in source and binary forms, with or without modification,
|
65
|
+
are permitted provided that the following conditions are met:
|
66
|
+
|
67
|
+
* Redistributions of source code must retain the above copyright notice,
|
68
|
+
this list of conditions and the following disclaimer.
|
69
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
70
|
+
this list of conditions and the following disclaimer in the documentation
|
71
|
+
and/or other materials provided with the distribution.
|
72
|
+
* Neither the name of the <ORGANIZATION> nor the names of its contributors may
|
73
|
+
be used to endorse or promote products derived from this software without
|
74
|
+
specific prior written permission.
|
75
|
+
|
76
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
77
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
78
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
79
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
80
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
81
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
82
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
83
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
84
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
85
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "xml_stream_parser"
|
8
|
+
gem.summary = %Q{simple xml stream parser for ruby}
|
9
|
+
gem.description = %Q{easily parse xml documents of any size with ruby}
|
10
|
+
gem.email = "craig@trampolinesystems.com"
|
11
|
+
gem.homepage = "http://github.com/mccraigmccraig/xml_stream_parser"
|
12
|
+
gem.authors = ["mccraigmccraig"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.8"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "xml_stream_parser #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.0
|
data/init.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# defines Object.instance_exec to permit call of a Proc with params
|
2
|
+
# in the context of an instance : instance.instance_exec( foo, bar, &proc )
|
3
|
+
# taken from rails 2.2
|
4
|
+
#
|
5
|
+
class Object
|
6
|
+
unless defined? instance_exec # 1.9
|
7
|
+
module InstanceExecMethods #:nodoc:
|
8
|
+
@mutex = Mutex.new
|
9
|
+
class << self
|
10
|
+
attr_reader :mutex
|
11
|
+
end
|
12
|
+
end
|
13
|
+
include InstanceExecMethods
|
14
|
+
|
15
|
+
# Evaluate the block with the given arguments within the context of
|
16
|
+
# this object, so self is set to the method receiver.
|
17
|
+
#
|
18
|
+
# From Mauricio's http://eigenclass.org/hiki/bounded+space+instance_exec
|
19
|
+
def instance_exec(*args, &block)
|
20
|
+
method_name = InstanceExecMethods.mutex.synchronize do
|
21
|
+
n = 0
|
22
|
+
n += 1 while respond_to?(method_name = "__instance_exec#{n}")
|
23
|
+
InstanceExecMethods.module_eval { define_method(method_name, &block) }
|
24
|
+
method_name
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
send(method_name, *args)
|
29
|
+
ensure
|
30
|
+
InstanceExecMethods.module_eval { remove_method(method_name) } rescue nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rexml/parsers/pullparser'
|
4
|
+
require File.join( File.dirname(__FILE__), 'instance_exec')
|
5
|
+
|
6
|
+
module REXML
|
7
|
+
module Parsers
|
8
|
+
class PullEvent
|
9
|
+
# PullEvent is missing the end_document? method, even tho
|
10
|
+
# the BaseParser produces the event
|
11
|
+
def end_document?
|
12
|
+
@contents[0] == :end_document
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class XmlStreamParser
|
19
|
+
|
20
|
+
VERSION = "0.2.0"
|
21
|
+
|
22
|
+
class Sentinel
|
23
|
+
def to_s
|
24
|
+
self.class.to_s
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Nothing < Sentinel
|
29
|
+
end
|
30
|
+
|
31
|
+
class EndContext < Sentinel
|
32
|
+
end
|
33
|
+
|
34
|
+
module Sentinels
|
35
|
+
NOTHING = Nothing.new
|
36
|
+
END_CONTEXT = EndContext.new
|
37
|
+
end
|
38
|
+
|
39
|
+
include Sentinels
|
40
|
+
|
41
|
+
# the REXML::Parsers::PullParser used internally
|
42
|
+
attr_reader :pull_parser
|
43
|
+
attr_reader :dsl
|
44
|
+
|
45
|
+
# parse retaining block context... permitting
|
46
|
+
# the parse to easily be split over multiple methods
|
47
|
+
def parse(data, &block)
|
48
|
+
parse_dsl(data, false, &block)
|
49
|
+
end
|
50
|
+
|
51
|
+
# parse with optional dsl mode
|
52
|
+
# if dsl is true [ default ] then the block will be instance_exec'd in
|
53
|
+
# the context of the parser, if dsl is false the block will be called
|
54
|
+
# retaining it's current context
|
55
|
+
def parse_dsl(data, dsl=true, &block)
|
56
|
+
io = case data
|
57
|
+
when IO
|
58
|
+
data
|
59
|
+
when StringIO
|
60
|
+
data
|
61
|
+
when String
|
62
|
+
StringIO.new(data)
|
63
|
+
end
|
64
|
+
|
65
|
+
@pull_parser = REXML::Parsers::PullParser.new( io )
|
66
|
+
@dsl = dsl
|
67
|
+
if self.dsl
|
68
|
+
self.instance_exec(&block)
|
69
|
+
else
|
70
|
+
block.call(self)
|
71
|
+
end
|
72
|
+
ensure
|
73
|
+
@pull_parser = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
# find an element with name in element_names : inter-element whitespace is ignored
|
77
|
+
# - encountering end_element terminates and returns END_CONTEXT, leaving parser on end_element
|
78
|
+
# - encountering end_document terminates and returns END_CONTEXT
|
79
|
+
# - encountering start_element for an element not in element_names NOTHING, parser on start_element
|
80
|
+
# - encountering start_element for an element in element_names returns element name, parser on start_element
|
81
|
+
def find_element( element_names )
|
82
|
+
element_names = [ *element_names ]
|
83
|
+
|
84
|
+
while( true )
|
85
|
+
e = @pull_parser.peek
|
86
|
+
if e.start_element?
|
87
|
+
if element_names.include?( e[0] )
|
88
|
+
return e[0]
|
89
|
+
else
|
90
|
+
return NOTHING
|
91
|
+
end
|
92
|
+
elsif e.end_element?
|
93
|
+
return END_CONTEXT
|
94
|
+
elsif e.end_document?
|
95
|
+
return END_CONTEXT
|
96
|
+
elsif e.text?
|
97
|
+
# ignore whitespace between elements
|
98
|
+
raise "unexpected text content: #{e.inspect}" if e[0] !~ /[[:space:]]/
|
99
|
+
@pull_parser.pull
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# parse and throw away content until we escape the current context, either
|
105
|
+
# through end_element, or end_document
|
106
|
+
def discard()
|
107
|
+
element_stack = []
|
108
|
+
|
109
|
+
while(true)
|
110
|
+
e = @pull_parser.peek
|
111
|
+
name = e[0]
|
112
|
+
if e.start_element?
|
113
|
+
element_stack.push(name)
|
114
|
+
elsif e.end_element?
|
115
|
+
return nil if element_stack.size == 0
|
116
|
+
raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}" if name != element_stack.last
|
117
|
+
element_stack.pop
|
118
|
+
elsif e.end_document?
|
119
|
+
return nil if element_stack.size ==0
|
120
|
+
raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}"
|
121
|
+
end
|
122
|
+
@pull_parser.pull
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# consume an element
|
127
|
+
# - if optional is false the element must be present
|
128
|
+
# - if optional is true and the element is not present then NOTHING/END_CONTEXT
|
129
|
+
# will be returned
|
130
|
+
# - consumes start_element, calls block on content, consumes end_element
|
131
|
+
def element( element_names, optional=false, &block )
|
132
|
+
element_names = [ *element_names ]
|
133
|
+
|
134
|
+
f = find_element(element_names)
|
135
|
+
e = @pull_parser.peek
|
136
|
+
|
137
|
+
if f.is_a? Sentinel
|
138
|
+
if optional
|
139
|
+
return f
|
140
|
+
else
|
141
|
+
raise "expected start element: <#{element_names.join('|')}, got: #{e.inspect}>"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
e = @pull_parser.pull # consume the start tag
|
146
|
+
name = e[0]
|
147
|
+
attrs = e[1]
|
148
|
+
|
149
|
+
# block should consume all element content, and leave parser on end_element, or
|
150
|
+
# whitespace before it
|
151
|
+
err=false
|
152
|
+
begin
|
153
|
+
if self.dsl
|
154
|
+
v = self.instance_exec(name, attrs, &block)
|
155
|
+
else
|
156
|
+
v = block.call(name,attrs)
|
157
|
+
end
|
158
|
+
return v if ! v.is_a? Sentinel # do not propagate Sentinels. they confuse callers
|
159
|
+
rescue
|
160
|
+
err=true # note that we are erroring, so as not to mask the exception from ensure block
|
161
|
+
raise
|
162
|
+
ensure
|
163
|
+
if !err # if return was called in the block, ensure we consume the end_element
|
164
|
+
e = @pull_parser.pull
|
165
|
+
e = @pull_parser.pull if e.text? && e[0] =~ /[[:space:]]/
|
166
|
+
raise "expected end tag: #{name}, got: #{e.inspect}" if ! e.end_element? || e[0] != name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# find and consume elements, calling block on each one found
|
172
|
+
# return result of last find : NOTHING or END_CONTEXT sentinel
|
173
|
+
def elements( element_names, &block )
|
174
|
+
while true
|
175
|
+
break if element(element_names, true, &block).is_a? Sentinel
|
176
|
+
end
|
177
|
+
|
178
|
+
return nil
|
179
|
+
end
|
180
|
+
|
181
|
+
# consume text element
|
182
|
+
# returns the text, or nil if none
|
183
|
+
def text( &block )
|
184
|
+
e = @pull_parser.peek
|
185
|
+
raise "expected text node, got #{e.inspect}" if ! e.text? && ! e.end_element?
|
186
|
+
text = if e.text?
|
187
|
+
@pull_parser.pull
|
188
|
+
e[0]
|
189
|
+
else
|
190
|
+
nil
|
191
|
+
end
|
192
|
+
if block
|
193
|
+
if self.dsl
|
194
|
+
text = self.instance_exec( text , &block)
|
195
|
+
else
|
196
|
+
text = block.call(text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
text
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
#!/usr/bin/env spec
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'spec'
|
5
|
+
require 'set'
|
6
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
7
|
+
|
8
|
+
describe XmlStreamParser do
|
9
|
+
it "should work on a StringIO" do
|
10
|
+
io = StringIO.new( "<foo/>")
|
11
|
+
XmlStreamParser.new.parse_dsl(io) do
|
12
|
+
element("foo") do |name,attrs|
|
13
|
+
name.should ==("foo")
|
14
|
+
name
|
15
|
+
end
|
16
|
+
end.should ==("foo")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse a simple one element document" do
|
20
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
|
21
|
+
called = false
|
22
|
+
element("foo") { |name,attrs|
|
23
|
+
called = true
|
24
|
+
name.should ==("foo")
|
25
|
+
attrs.should ==({})
|
26
|
+
}
|
27
|
+
called.should ==(true)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "find_element" do
|
32
|
+
|
33
|
+
it "should skip whitespace to find an element" do
|
34
|
+
XmlStreamParser.new.parse_dsl( " \n\n\n<foo></foo>") do
|
35
|
+
name = find_element("foo")
|
36
|
+
name.should ==("foo")
|
37
|
+
e = pull_parser.pull
|
38
|
+
e.start_element?.should ==(true)
|
39
|
+
e[0].should == "foo"
|
40
|
+
e[1].should == {}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return NOTHING on unexpected elements" do
|
45
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>") do
|
46
|
+
find_element("bar")
|
47
|
+
end.should ==(XmlStreamParser::NOTHING)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should match one of multiple elements" do
|
51
|
+
XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
|
52
|
+
find_element( ["bar","foo" ] )
|
53
|
+
end.should ==("foo")
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
it "should return END_CONTEXT if element context terminates" do
|
58
|
+
called = false
|
59
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
60
|
+
element("foo") do |name,attrs|
|
61
|
+
name.should ==("foo")
|
62
|
+
|
63
|
+
n = find_element("bar")
|
64
|
+
n.should ==(XmlStreamParser::END_CONTEXT)
|
65
|
+
|
66
|
+
e = pull_parser.peek
|
67
|
+
e.end_element?.should ==(true)
|
68
|
+
e[0].should ==("foo")
|
69
|
+
|
70
|
+
called = true
|
71
|
+
end
|
72
|
+
end
|
73
|
+
called.should ==(true)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should return END_CONTEXT if document ends" do
|
77
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>') do
|
78
|
+
element("foo") do |name,attrs|
|
79
|
+
end
|
80
|
+
f = find_element("bar")
|
81
|
+
f.should ==( XmlStreamParser::END_CONTEXT )
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "discard" do
|
87
|
+
|
88
|
+
it "should discard text content of an element" do
|
89
|
+
XmlStreamParser.new.parse_dsl( '<foo>blah blah blah</foo>') do
|
90
|
+
element("foo") do |name,attrs|
|
91
|
+
discard
|
92
|
+
"foo"
|
93
|
+
end
|
94
|
+
end.should ==("foo")
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should discard element content of an element" do
|
98
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/><foobar></foobar></foo>') do
|
99
|
+
element("foo") do |name,attrs|
|
100
|
+
discard
|
101
|
+
"foo"
|
102
|
+
end
|
103
|
+
end.should ==("foo")
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should discard mixed content of an element" do
|
107
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/>blah blah<foobar></foobar> blah blah </foo>') do
|
108
|
+
element("foo") do |name,attrs|
|
109
|
+
discard
|
110
|
+
"foo"
|
111
|
+
end
|
112
|
+
end.should ==("foo")
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "element" do
|
118
|
+
|
119
|
+
it "should return NOTHING if optional and element not found" do
|
120
|
+
XmlStreamParser.new.parse_dsl( '<foo><foofoo/></foo>' ) do
|
121
|
+
element("foo") do |name,attrs|
|
122
|
+
element("bar",true) do |name,attrs|
|
123
|
+
"bar"
|
124
|
+
end.should ==(XmlStreamParser::NOTHING)
|
125
|
+
element("foofoo") do |name,attrs|
|
126
|
+
"foofoo"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end.should ==("foofoo" )
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should return END_CONTEXT if optional and context ends" do
|
133
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
134
|
+
element("foo") do |name,attrs|
|
135
|
+
element("bar",true) do |name,attrs|
|
136
|
+
"bar"
|
137
|
+
end.should ==(XmlStreamParser::END_CONTEXT)
|
138
|
+
"foofoo"
|
139
|
+
end
|
140
|
+
end.should ==("foofoo")
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should not propagate sentinel values up the call hierarchy" do
|
144
|
+
called = false
|
145
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
146
|
+
element("foo") do |name,attrs|
|
147
|
+
called = true
|
148
|
+
element("bar",true) do |name,attrs|
|
149
|
+
"bar"
|
150
|
+
end.should ==(XmlStreamParser::END_CONTEXT)
|
151
|
+
end
|
152
|
+
end.should_not ==(XmlStreamParser::END_CONTEXT)
|
153
|
+
called.should == (true)
|
154
|
+
end
|
155
|
+
|
156
|
+
class Foo
|
157
|
+
def self.parse_bar( p )
|
158
|
+
p.element("bar") do |name,attrs|
|
159
|
+
return "barbar"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should consume the end tag even if block calls return" do
|
165
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
|
166
|
+
element( "foo" ) do |name, attrs|
|
167
|
+
Foo.parse_bar( self )
|
168
|
+
end
|
169
|
+
end.should ==("barbar" )
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should consume the end tag even if block calls break" do
|
173
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
|
174
|
+
element( "foo" ) do |name, attrs|
|
175
|
+
element( "bar" ) do |name, attrs|
|
176
|
+
break
|
177
|
+
end
|
178
|
+
"foo"
|
179
|
+
end
|
180
|
+
end.should ==( "foo" )
|
181
|
+
end
|
182
|
+
|
183
|
+
it "should raise on premature document termination" do
|
184
|
+
lambda {
|
185
|
+
XmlStreamParser.new.parse_dsl( '<foo>' ) do
|
186
|
+
element("foo") do |name,attrs|
|
187
|
+
element("bar",false) do |name,attrs|
|
188
|
+
"bar"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
}.should raise_error(RuntimeError)
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should raise on premature context termination" do
|
196
|
+
lambda {
|
197
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
|
198
|
+
element("foo") do |name,attrs|
|
199
|
+
element("bar",false) do |name,attrs|
|
200
|
+
"bar"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
}.should raise_error(RuntimeError)
|
205
|
+
end
|
206
|
+
|
207
|
+
it "should consume an element, giving name and attributes to the provided block and returning block result" do
|
208
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
|
209
|
+
element( "foo" ) do |name, attrs|
|
210
|
+
name.should ==("foo")
|
211
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
212
|
+
"blockresult"
|
213
|
+
end.should ==("blockresult")
|
214
|
+
e = pull_parser.peek
|
215
|
+
e.end_document?.should ==(true)
|
216
|
+
"foofoo"
|
217
|
+
end.should ==("foofoo")
|
218
|
+
end
|
219
|
+
|
220
|
+
it "should consume one of many element names, giving name and attrs to block and returning block result" do
|
221
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
|
222
|
+
element( ["bar","foo"] ) do |name, attrs|
|
223
|
+
name.should ==("foo")
|
224
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
225
|
+
"blockresult"
|
226
|
+
end
|
227
|
+
end.should ==("blockresult")
|
228
|
+
end
|
229
|
+
|
230
|
+
it "should ignore whitespace inside element" do
|
231
|
+
XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"> \n \n</foo>') do
|
232
|
+
element( "foo" ) do |name, attrs|
|
233
|
+
name.should ==("foo")
|
234
|
+
attrs.should ==({ "a"=>"one", "b"=>"two" })
|
235
|
+
"blockresult"
|
236
|
+
end.should ==("blockresult")
|
237
|
+
e = pull_parser.peek
|
238
|
+
e.end_document?.should ==(true)
|
239
|
+
"foofoo"
|
240
|
+
end.should ==("foofoo")
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
describe "text" do
|
246
|
+
it "should consume an element with text content and give it's name, attrs, text to the block and return the block result" do
|
247
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar">hello mum</foo>') do
|
248
|
+
element( "foo" ) do |name, attrs|
|
249
|
+
name.should ==("foo")
|
250
|
+
attrs.should ==({ "a"=>"bar" })
|
251
|
+
text
|
252
|
+
end
|
253
|
+
end.should ==("hello mum")
|
254
|
+
end
|
255
|
+
|
256
|
+
it "should raise if the element contains element content" do
|
257
|
+
lambda {
|
258
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar"><bar/></foo>') do
|
259
|
+
element("foo") do |name,attrs|
|
260
|
+
text()
|
261
|
+
end
|
262
|
+
end
|
263
|
+
}.should raise_error(RuntimeError)
|
264
|
+
end
|
265
|
+
|
266
|
+
it "should raise if the element contains mixed content" do
|
267
|
+
lambda {
|
268
|
+
XmlStreamParser.new.parse_dsl( '<foo a="bar">some <bar/> text</foo>') do
|
269
|
+
element("foo") do |name,attrs|
|
270
|
+
text()
|
271
|
+
end
|
272
|
+
end
|
273
|
+
}.should raise_error(RuntimeError)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
describe "elements" do
|
278
|
+
it "should consume multiple elements" do
|
279
|
+
el_counts = Hash.new(0)
|
280
|
+
XmlStreamParser.new.parse_dsl( '<foo><bar/><bar/><foobar/></foo>') do
|
281
|
+
element("foo") do |name,attrs|
|
282
|
+
elements( ["bar","foobar"] ) do |name,attrs|
|
283
|
+
el_counts[name] += 1
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
el_counts.should ==({ "bar"=>2, "foobar"=>1 })
|
288
|
+
end
|
289
|
+
|
290
|
+
it "should not complain if there are no matching elements" do
|
291
|
+
XmlStreamParser.new.parse_dsl( '<foo></foo>') do
|
292
|
+
element("foo") do |name,attrs|
|
293
|
+
elements( ["bar","foobar"] ) do |name,attrs|
|
294
|
+
el_counts[name] += 1
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
describe "non-DSL mode" do
|
303
|
+
it "should pass the parser to the parse() block" do
|
304
|
+
def foo()
|
305
|
+
"foo"
|
306
|
+
end
|
307
|
+
|
308
|
+
XmlStreamParser.new.parse( '<foo></foo>') do |p|
|
309
|
+
p.should_not ==(nil)
|
310
|
+
foo()
|
311
|
+
end.should =="foo"
|
312
|
+
end
|
313
|
+
|
314
|
+
it "should retain contenxt for element blocks" do
|
315
|
+
def foo()
|
316
|
+
"foo"
|
317
|
+
end
|
318
|
+
XmlStreamParser.new.parse( '<foo></foo>') do |p|
|
319
|
+
p.element('foo') do |name,attrs|
|
320
|
+
name.should =='foo'
|
321
|
+
attrs.should == {}
|
322
|
+
foo()
|
323
|
+
end
|
324
|
+
end.should =="foo"
|
325
|
+
end
|
326
|
+
|
327
|
+
it "should retain context for text blocks" do
|
328
|
+
def bar()
|
329
|
+
"barbar"
|
330
|
+
end
|
331
|
+
XmlStreamParser.new.parse( '<foo>bar</foo>') do |p|
|
332
|
+
p.element('foo') do |name,attrs|
|
333
|
+
p.text{ |t| t.should =='bar' ; t }
|
334
|
+
bar()
|
335
|
+
end
|
336
|
+
end.should =='barbar'
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
describe "some more complex examples" do
|
341
|
+
|
342
|
+
it "should parse a list of people" do
|
343
|
+
doc = <<-EOF
|
344
|
+
<people>
|
345
|
+
<person name="alice">likes cheese</person>
|
346
|
+
<person name="bob">likes music</person>
|
347
|
+
<person name="charles">likes alice</person>
|
348
|
+
</people>
|
349
|
+
EOF
|
350
|
+
|
351
|
+
people = {}
|
352
|
+
|
353
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
354
|
+
element("people") do |name,attrs|
|
355
|
+
elements("person") do |name, attrs|
|
356
|
+
people[attrs["name"]] = text
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
people.should ==({ "alice"=>"likes cheese",
|
362
|
+
"bob"=>"likes music",
|
363
|
+
"charles"=>"likes alice"})
|
364
|
+
end
|
365
|
+
|
366
|
+
it "should parse a list of people and their friends" do
|
367
|
+
doc = <<-EOF
|
368
|
+
<people>
|
369
|
+
<person name="alice">
|
370
|
+
<friend name="bob"/>
|
371
|
+
<likes>cheese</likes>
|
372
|
+
<friend name="charles"/>
|
373
|
+
</person>
|
374
|
+
<person name="bob">
|
375
|
+
<friend name="alice"/>
|
376
|
+
<likes>wolf dogs</likes>
|
377
|
+
</person>
|
378
|
+
<person name="charles">
|
379
|
+
<friend name="alice"/>
|
380
|
+
<likes>bach</likes>
|
381
|
+
</person>
|
382
|
+
</people>
|
383
|
+
EOF
|
384
|
+
|
385
|
+
people = Hash.new{ |h,k| h[k] = {:friends=>Set.new([]), :likes=>Set.new([]) } }
|
386
|
+
|
387
|
+
XmlStreamParser.new.parse_dsl(doc) do
|
388
|
+
element("people") do |name,attrs|
|
389
|
+
elements("person") do |name, attrs|
|
390
|
+
person_name = attrs["name"]
|
391
|
+
people[person_name]
|
392
|
+
|
393
|
+
elements(["friend","likes"]) do |name,attrs|
|
394
|
+
case name
|
395
|
+
when "friend" then
|
396
|
+
people[person_name][:friends] << attrs["name"]
|
397
|
+
when "likes" then
|
398
|
+
people[person_name][:likes] << text
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
people.should ==( {
|
406
|
+
"alice"=>{ :friends=>Set.new(["bob","charles"]), :likes=>Set.new(["cheese"])},
|
407
|
+
"bob"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["wolf dogs"])},
|
408
|
+
"charles"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["bach"])}
|
409
|
+
})
|
410
|
+
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
414
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{xml_stream_parser}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["mccraigmccraig"]
|
12
|
+
s.date = %q{2010-06-24}
|
13
|
+
s.description = %q{easily parse xml documents of any size with ruby}
|
14
|
+
s.email = %q{craig@trampolinesystems.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc",
|
18
|
+
"README.txt"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
".document",
|
22
|
+
".gitignore",
|
23
|
+
"History.txt",
|
24
|
+
"LICENSE",
|
25
|
+
"Manifest.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"README.txt",
|
28
|
+
"Rakefile",
|
29
|
+
"VERSION",
|
30
|
+
"init.rb",
|
31
|
+
"lib/instance_exec.rb",
|
32
|
+
"lib/xml_stream_parser.rb",
|
33
|
+
"spec/spec.opts",
|
34
|
+
"spec/spec_helper.rb",
|
35
|
+
"spec/xml_stream_parser_spec.rb",
|
36
|
+
"xml_stream_parser.gemspec"
|
37
|
+
]
|
38
|
+
s.homepage = %q{http://github.com/mccraigmccraig/xml_stream_parser}
|
39
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
40
|
+
s.require_paths = ["lib"]
|
41
|
+
s.rubygems_version = %q{1.3.6}
|
42
|
+
s.summary = %q{simple xml stream parser for ruby}
|
43
|
+
s.test_files = [
|
44
|
+
"spec/spec_helper.rb",
|
45
|
+
"spec/xml_stream_parser_spec.rb"
|
46
|
+
]
|
47
|
+
|
48
|
+
if s.respond_to? :specification_version then
|
49
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
50
|
+
s.specification_version = 3
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.8"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<rspec>, [">= 1.2.8"])
|
56
|
+
end
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<rspec>, [">= 1.2.8"])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml_stream_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- mccraigmccraig
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-06-24 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 2
|
30
|
+
- 8
|
31
|
+
version: 1.2.8
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: easily parse xml documents of any size with ruby
|
35
|
+
email: craig@trampolinesystems.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- LICENSE
|
42
|
+
- README.rdoc
|
43
|
+
- README.txt
|
44
|
+
files:
|
45
|
+
- .document
|
46
|
+
- .gitignore
|
47
|
+
- History.txt
|
48
|
+
- LICENSE
|
49
|
+
- Manifest.txt
|
50
|
+
- README.rdoc
|
51
|
+
- README.txt
|
52
|
+
- Rakefile
|
53
|
+
- VERSION
|
54
|
+
- init.rb
|
55
|
+
- lib/instance_exec.rb
|
56
|
+
- lib/xml_stream_parser.rb
|
57
|
+
- spec/spec.opts
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
- spec/xml_stream_parser_spec.rb
|
60
|
+
- xml_stream_parser.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/mccraigmccraig/xml_stream_parser
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options:
|
67
|
+
- --charset=UTF-8
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
segments:
|
82
|
+
- 0
|
83
|
+
version: "0"
|
84
|
+
requirements: []
|
85
|
+
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.3.6
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: simple xml stream parser for ruby
|
91
|
+
test_files:
|
92
|
+
- spec/spec_helper.rb
|
93
|
+
- spec/xml_stream_parser_spec.rb
|