xml_stream_parser 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
@@ -0,0 +1,7 @@
1
+ === 0.1.0 / 2009-06-10
2
+
3
+ * initial release
4
+
5
+ === 0.2.0 / 2009-06-19
6
+
7
+ * more dsl like syntax
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mccraigmccraig
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,9 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ init.rb
6
+ lib/instance_exec.rb
7
+ lib/xml_stream_parser.rb
8
+ spec/xml_stream_parser_spec.rb
9
+ xml_stream_parser.gemspec
@@ -0,0 +1,17 @@
1
+ = xml_stream_parser
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 mccraigmccraig. See LICENSE for details.
@@ -0,0 +1,85 @@
1
+ = xml_stream_parser
2
+
3
+ this code was developed by trampoline systems [ http://trampolinesystems.com ]
4
+ as part of its sonar platform and released under a BSD licence for community use
5
+
6
+ http://www.github.com/mccraigmccraig/xml_stream_parser
7
+
8
+ == DESCRIPTION:
9
+
10
+ a basic library for pull parsing of large xml documents
11
+
12
+ == FEATURES:
13
+
14
+ - pull parsing of large xml documents with no dom construction
15
+ - provides simple operations for constructing higher level parsers
16
+
17
+ == PROBLEMS:
18
+
19
+ - it's very basic
20
+ - no validation
21
+
22
+ == SYNOPSIS:
23
+
24
+ require 'rubygems'
25
+ require 'xml_stream_parser'
26
+
27
+ # parse xml stream data, possibly never ending, and do things with it
28
+
29
+ doc = <<-EOF
30
+ <people>
31
+ <person name="alice">likes cheese</person>
32
+ <person name="bob">likes music</person>
33
+ <person name="charles">likes alice</person>
34
+ </people>
35
+ EOF
36
+
37
+ # can be parsed with
38
+
39
+ people = {}
40
+ XmlStreamParser.new.parse_dsl(doc) do
41
+ element "people" do |name,attrs|
42
+ elements "person" do |name, attrs|
43
+ people[attrs["name"]] = text
44
+ end
45
+ end
46
+ end
47
+
48
+ == REQUIREMENTS:
49
+
50
+ Ruby or JRuby
51
+
52
+ == INSTALL:
53
+
54
+ sudo gem sources -a http://gems.github.com
55
+ sudo gem install mccraigmccraig-xml_stream_parser
56
+
57
+ == LICENSE:
58
+
59
+ (The BSD License)
60
+
61
+ Copyright (c) 2009, Trampoline Systems Ltd, http://trampolinesystems.com/
62
+ All rights reserved.
63
+
64
+ Redistribution and use in source and binary forms, with or without modification,
65
+ are permitted provided that the following conditions are met:
66
+
67
+ * Redistributions of source code must retain the above copyright notice,
68
+ this list of conditions and the following disclaimer.
69
+ * Redistributions in binary form must reproduce the above copyright notice,
70
+ this list of conditions and the following disclaimer in the documentation
71
+ and/or other materials provided with the distribution.
72
+ * Neither the name of the <ORGANIZATION> nor the names of its contributors may
73
+ be used to endorse or promote products derived from this software without
74
+ specific prior written permission.
75
+
76
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
77
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
78
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
79
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
80
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
81
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
82
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
83
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
84
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
85
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "xml_stream_parser"
8
+ gem.summary = %Q{simple xml stream parser for ruby}
9
+ gem.description = %Q{easily parse xml documents of any size with ruby}
10
+ gem.email = "craig@trampolinesystems.com"
11
+ gem.homepage = "http://github.com/mccraigmccraig/xml_stream_parser"
12
+ gem.authors = ["mccraigmccraig"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.8"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "xml_stream_parser #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.3.0
data/init.rb ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.join( File.dirname(__FILE__), "lib", "xml_stream_parser" )
@@ -0,0 +1,34 @@
1
+ # defines Object.instance_exec to permit call of a Proc with params
2
+ # in the context of an instance : instance.instance_exec( foo, bar, &proc )
3
+ # taken from rails 2.2
4
+ #
5
+ class Object
6
+ unless defined? instance_exec # 1.9
7
+ module InstanceExecMethods #:nodoc:
8
+ @mutex = Mutex.new
9
+ class << self
10
+ attr_reader :mutex
11
+ end
12
+ end
13
+ include InstanceExecMethods
14
+
15
+ # Evaluate the block with the given arguments within the context of
16
+ # this object, so self is set to the method receiver.
17
+ #
18
+ # From Mauricio's http://eigenclass.org/hiki/bounded+space+instance_exec
19
+ def instance_exec(*args, &block)
20
+ method_name = InstanceExecMethods.mutex.synchronize do
21
+ n = 0
22
+ n += 1 while respond_to?(method_name = "__instance_exec#{n}")
23
+ InstanceExecMethods.module_eval { define_method(method_name, &block) }
24
+ method_name
25
+ end
26
+
27
+ begin
28
+ send(method_name, *args)
29
+ ensure
30
+ InstanceExecMethods.module_eval { remove_method(method_name) } rescue nil
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,203 @@
1
+ require 'stringio'
2
+ require 'rexml/document'
3
+ require 'rexml/parsers/pullparser'
4
+ require File.join( File.dirname(__FILE__), 'instance_exec')
5
+
6
+ module REXML
7
+ module Parsers
8
+ class PullEvent
9
+ # PullEvent is missing the end_document? method, even tho
10
+ # the BaseParser produces the event
11
+ def end_document?
12
+ @contents[0] == :end_document
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ class XmlStreamParser
19
+
20
+ VERSION = "0.2.0"
21
+
22
+ class Sentinel
23
+ def to_s
24
+ self.class.to_s
25
+ end
26
+ end
27
+
28
+ class Nothing < Sentinel
29
+ end
30
+
31
+ class EndContext < Sentinel
32
+ end
33
+
34
+ module Sentinels
35
+ NOTHING = Nothing.new
36
+ END_CONTEXT = EndContext.new
37
+ end
38
+
39
+ include Sentinels
40
+
41
+ # the REXML::Parsers::PullParser used internally
42
+ attr_reader :pull_parser
43
+ attr_reader :dsl
44
+
45
+ # parse retaining block context... permitting
46
+ # the parse to easily be split over multiple methods
47
+ def parse(data, &block)
48
+ parse_dsl(data, false, &block)
49
+ end
50
+
51
+ # parse with optional dsl mode
52
+ # if dsl is true [ default ] then the block will be instance_exec'd in
53
+ # the context of the parser, if dsl is false the block will be called
54
+ # retaining it's current context
55
+ def parse_dsl(data, dsl=true, &block)
56
+ io = case data
57
+ when IO
58
+ data
59
+ when StringIO
60
+ data
61
+ when String
62
+ StringIO.new(data)
63
+ end
64
+
65
+ @pull_parser = REXML::Parsers::PullParser.new( io )
66
+ @dsl = dsl
67
+ if self.dsl
68
+ self.instance_exec(&block)
69
+ else
70
+ block.call(self)
71
+ end
72
+ ensure
73
+ @pull_parser = nil
74
+ end
75
+
76
+ # find an element with name in element_names : inter-element whitespace is ignored
77
+ # - encountering end_element terminates and returns END_CONTEXT, leaving parser on end_element
78
+ # - encountering end_document terminates and returns END_CONTEXT
79
+ # - encountering start_element for an element not in element_names NOTHING, parser on start_element
80
+ # - encountering start_element for an element in element_names returns element name, parser on start_element
81
+ def find_element( element_names )
82
+ element_names = [ *element_names ]
83
+
84
+ while( true )
85
+ e = @pull_parser.peek
86
+ if e.start_element?
87
+ if element_names.include?( e[0] )
88
+ return e[0]
89
+ else
90
+ return NOTHING
91
+ end
92
+ elsif e.end_element?
93
+ return END_CONTEXT
94
+ elsif e.end_document?
95
+ return END_CONTEXT
96
+ elsif e.text?
97
+ # ignore whitespace between elements
98
+ raise "unexpected text content: #{e.inspect}" if e[0] !~ /[[:space:]]/
99
+ @pull_parser.pull
100
+ end
101
+ end
102
+ end
103
+
104
+ # parse and throw away content until we escape the current context, either
105
+ # through end_element, or end_document
106
+ def discard()
107
+ element_stack = []
108
+
109
+ while(true)
110
+ e = @pull_parser.peek
111
+ name = e[0]
112
+ if e.start_element?
113
+ element_stack.push(name)
114
+ elsif e.end_element?
115
+ return nil if element_stack.size == 0
116
+ raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}" if name != element_stack.last
117
+ element_stack.pop
118
+ elsif e.end_document?
119
+ return nil if element_stack.size ==0
120
+ raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}"
121
+ end
122
+ @pull_parser.pull
123
+ end
124
+ end
125
+
126
+ # consume an element
127
+ # - if optional is false the element must be present
128
+ # - if optional is true and the element is not present then NOTHING/END_CONTEXT
129
+ # will be returned
130
+ # - consumes start_element, calls block on content, consumes end_element
131
+ def element( element_names, optional=false, &block )
132
+ element_names = [ *element_names ]
133
+
134
+ f = find_element(element_names)
135
+ e = @pull_parser.peek
136
+
137
+ if f.is_a? Sentinel
138
+ if optional
139
+ return f
140
+ else
141
+ raise "expected start element: <#{element_names.join('|')}, got: #{e.inspect}>"
142
+ end
143
+ end
144
+
145
+ e = @pull_parser.pull # consume the start tag
146
+ name = e[0]
147
+ attrs = e[1]
148
+
149
+ # block should consume all element content, and leave parser on end_element, or
150
+ # whitespace before it
151
+ err=false
152
+ begin
153
+ if self.dsl
154
+ v = self.instance_exec(name, attrs, &block)
155
+ else
156
+ v = block.call(name,attrs)
157
+ end
158
+ return v if ! v.is_a? Sentinel # do not propagate Sentinels. they confuse callers
159
+ rescue
160
+ err=true # note that we are erroring, so as not to mask the exception from ensure block
161
+ raise
162
+ ensure
163
+ if !err # if return was called in the block, ensure we consume the end_element
164
+ e = @pull_parser.pull
165
+ e = @pull_parser.pull if e.text? && e[0] =~ /[[:space:]]/
166
+ raise "expected end tag: #{name}, got: #{e.inspect}" if ! e.end_element? || e[0] != name
167
+ end
168
+ end
169
+ end
170
+
171
+ # find and consume elements, calling block on each one found
172
+ # return result of last find : NOTHING or END_CONTEXT sentinel
173
+ def elements( element_names, &block )
174
+ while true
175
+ break if element(element_names, true, &block).is_a? Sentinel
176
+ end
177
+
178
+ return nil
179
+ end
180
+
181
+ # consume text element
182
+ # returns the text, or nil if none
183
+ def text( &block )
184
+ e = @pull_parser.peek
185
+ raise "expected text node, got #{e.inspect}" if ! e.text? && ! e.end_element?
186
+ text = if e.text?
187
+ @pull_parser.pull
188
+ e[0]
189
+ else
190
+ nil
191
+ end
192
+ if block
193
+ if self.dsl
194
+ text = self.instance_exec( text , &block)
195
+ else
196
+ text = block.call(text)
197
+ end
198
+ end
199
+ text
200
+ end
201
+
202
+ end
203
+
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'xml_stream_parser'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
@@ -0,0 +1,414 @@
1
+ #!/usr/bin/env spec
2
+
3
+ require 'rubygems'
4
+ require 'spec'
5
+ require 'set'
6
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
7
+
8
+ describe XmlStreamParser do
9
+ it "should work on a StringIO" do
10
+ io = StringIO.new( "<foo/>")
11
+ XmlStreamParser.new.parse_dsl(io) do
12
+ element("foo") do |name,attrs|
13
+ name.should ==("foo")
14
+ name
15
+ end
16
+ end.should ==("foo")
17
+ end
18
+
19
+ it "should parse a simple one element document" do
20
+ XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
21
+ called = false
22
+ element("foo") { |name,attrs|
23
+ called = true
24
+ name.should ==("foo")
25
+ attrs.should ==({})
26
+ }
27
+ called.should ==(true)
28
+ end
29
+ end
30
+
31
+ describe "find_element" do
32
+
33
+ it "should skip whitespace to find an element" do
34
+ XmlStreamParser.new.parse_dsl( " \n\n\n<foo></foo>") do
35
+ name = find_element("foo")
36
+ name.should ==("foo")
37
+ e = pull_parser.pull
38
+ e.start_element?.should ==(true)
39
+ e[0].should == "foo"
40
+ e[1].should == {}
41
+ end
42
+ end
43
+
44
+ it "should return NOTHING on unexpected elements" do
45
+ XmlStreamParser.new.parse_dsl( "<foo></foo>") do
46
+ find_element("bar")
47
+ end.should ==(XmlStreamParser::NOTHING)
48
+ end
49
+
50
+ it "should match one of multiple elements" do
51
+ XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
52
+ find_element( ["bar","foo" ] )
53
+ end.should ==("foo")
54
+ end
55
+
56
+
57
+ it "should return END_CONTEXT if element context terminates" do
58
+ called = false
59
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
60
+ element("foo") do |name,attrs|
61
+ name.should ==("foo")
62
+
63
+ n = find_element("bar")
64
+ n.should ==(XmlStreamParser::END_CONTEXT)
65
+
66
+ e = pull_parser.peek
67
+ e.end_element?.should ==(true)
68
+ e[0].should ==("foo")
69
+
70
+ called = true
71
+ end
72
+ end
73
+ called.should ==(true)
74
+ end
75
+
76
+ it "should return END_CONTEXT if document ends" do
77
+ XmlStreamParser.new.parse_dsl( '<foo></foo>') do
78
+ element("foo") do |name,attrs|
79
+ end
80
+ f = find_element("bar")
81
+ f.should ==( XmlStreamParser::END_CONTEXT )
82
+ end
83
+ end
84
+ end
85
+
86
+ describe "discard" do
87
+
88
+ it "should discard text content of an element" do
89
+ XmlStreamParser.new.parse_dsl( '<foo>blah blah blah</foo>') do
90
+ element("foo") do |name,attrs|
91
+ discard
92
+ "foo"
93
+ end
94
+ end.should ==("foo")
95
+ end
96
+
97
+ it "should discard element content of an element" do
98
+ XmlStreamParser.new.parse_dsl( '<foo><bar/><foobar></foobar></foo>') do
99
+ element("foo") do |name,attrs|
100
+ discard
101
+ "foo"
102
+ end
103
+ end.should ==("foo")
104
+ end
105
+
106
+ it "should discard mixed content of an element" do
107
+ XmlStreamParser.new.parse_dsl( '<foo><bar/>blah blah<foobar></foobar> blah blah </foo>') do
108
+ element("foo") do |name,attrs|
109
+ discard
110
+ "foo"
111
+ end
112
+ end.should ==("foo")
113
+ end
114
+
115
+ end
116
+
117
+ describe "element" do
118
+
119
+ it "should return NOTHING if optional and element not found" do
120
+ XmlStreamParser.new.parse_dsl( '<foo><foofoo/></foo>' ) do
121
+ element("foo") do |name,attrs|
122
+ element("bar",true) do |name,attrs|
123
+ "bar"
124
+ end.should ==(XmlStreamParser::NOTHING)
125
+ element("foofoo") do |name,attrs|
126
+ "foofoo"
127
+ end
128
+ end
129
+ end.should ==("foofoo" )
130
+ end
131
+
132
+ it "should return END_CONTEXT if optional and context ends" do
133
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
134
+ element("foo") do |name,attrs|
135
+ element("bar",true) do |name,attrs|
136
+ "bar"
137
+ end.should ==(XmlStreamParser::END_CONTEXT)
138
+ "foofoo"
139
+ end
140
+ end.should ==("foofoo")
141
+ end
142
+
143
+ it "should not propagate sentinel values up the call hierarchy" do
144
+ called = false
145
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
146
+ element("foo") do |name,attrs|
147
+ called = true
148
+ element("bar",true) do |name,attrs|
149
+ "bar"
150
+ end.should ==(XmlStreamParser::END_CONTEXT)
151
+ end
152
+ end.should_not ==(XmlStreamParser::END_CONTEXT)
153
+ called.should == (true)
154
+ end
155
+
156
+ class Foo
157
+ def self.parse_bar( p )
158
+ p.element("bar") do |name,attrs|
159
+ return "barbar"
160
+ end
161
+ end
162
+ end
163
+
164
+ it "should consume the end tag even if block calls return" do
165
+ XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
166
+ element( "foo" ) do |name, attrs|
167
+ Foo.parse_bar( self )
168
+ end
169
+ end.should ==("barbar" )
170
+ end
171
+
172
+ it "should consume the end tag even if block calls break" do
173
+ XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
174
+ element( "foo" ) do |name, attrs|
175
+ element( "bar" ) do |name, attrs|
176
+ break
177
+ end
178
+ "foo"
179
+ end
180
+ end.should ==( "foo" )
181
+ end
182
+
183
+ it "should raise on premature document termination" do
184
+ lambda {
185
+ XmlStreamParser.new.parse_dsl( '<foo>' ) do
186
+ element("foo") do |name,attrs|
187
+ element("bar",false) do |name,attrs|
188
+ "bar"
189
+ end
190
+ end
191
+ end
192
+ }.should raise_error(RuntimeError)
193
+ end
194
+
195
+ it "should raise on premature context termination" do
196
+ lambda {
197
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
198
+ element("foo") do |name,attrs|
199
+ element("bar",false) do |name,attrs|
200
+ "bar"
201
+ end
202
+ end
203
+ end
204
+ }.should raise_error(RuntimeError)
205
+ end
206
+
207
+ it "should consume an element, giving name and attributes to the provided block and returning block result" do
208
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
209
+ element( "foo" ) do |name, attrs|
210
+ name.should ==("foo")
211
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
212
+ "blockresult"
213
+ end.should ==("blockresult")
214
+ e = pull_parser.peek
215
+ e.end_document?.should ==(true)
216
+ "foofoo"
217
+ end.should ==("foofoo")
218
+ end
219
+
220
+ it "should consume one of many element names, giving name and attrs to block and returning block result" do
221
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
222
+ element( ["bar","foo"] ) do |name, attrs|
223
+ name.should ==("foo")
224
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
225
+ "blockresult"
226
+ end
227
+ end.should ==("blockresult")
228
+ end
229
+
230
+ it "should ignore whitespace inside element" do
231
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"> \n \n</foo>') do
232
+ element( "foo" ) do |name, attrs|
233
+ name.should ==("foo")
234
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
235
+ "blockresult"
236
+ end.should ==("blockresult")
237
+ e = pull_parser.peek
238
+ e.end_document?.should ==(true)
239
+ "foofoo"
240
+ end.should ==("foofoo")
241
+ end
242
+
243
+ end
244
+
245
+ describe "text" do
246
+ it "should consume an element with text content and give it's name, attrs, text to the block and return the block result" do
247
+ XmlStreamParser.new.parse_dsl( '<foo a="bar">hello mum</foo>') do
248
+ element( "foo" ) do |name, attrs|
249
+ name.should ==("foo")
250
+ attrs.should ==({ "a"=>"bar" })
251
+ text
252
+ end
253
+ end.should ==("hello mum")
254
+ end
255
+
256
+ it "should raise if the element contains element content" do
257
+ lambda {
258
+ XmlStreamParser.new.parse_dsl( '<foo a="bar"><bar/></foo>') do
259
+ element("foo") do |name,attrs|
260
+ text()
261
+ end
262
+ end
263
+ }.should raise_error(RuntimeError)
264
+ end
265
+
266
+ it "should raise if the element contains mixed content" do
267
+ lambda {
268
+ XmlStreamParser.new.parse_dsl( '<foo a="bar">some <bar/> text</foo>') do
269
+ element("foo") do |name,attrs|
270
+ text()
271
+ end
272
+ end
273
+ }.should raise_error(RuntimeError)
274
+ end
275
+ end
276
+
277
+ describe "elements" do
278
+ it "should consume multiple elements" do
279
+ el_counts = Hash.new(0)
280
+ XmlStreamParser.new.parse_dsl( '<foo><bar/><bar/><foobar/></foo>') do
281
+ element("foo") do |name,attrs|
282
+ elements( ["bar","foobar"] ) do |name,attrs|
283
+ el_counts[name] += 1
284
+ end
285
+ end
286
+ end
287
+ el_counts.should ==({ "bar"=>2, "foobar"=>1 })
288
+ end
289
+
290
+ it "should not complain if there are no matching elements" do
291
+ XmlStreamParser.new.parse_dsl( '<foo></foo>') do
292
+ element("foo") do |name,attrs|
293
+ elements( ["bar","foobar"] ) do |name,attrs|
294
+ el_counts[name] += 1
295
+ end
296
+ end
297
+ end
298
+ end
299
+
300
+ end
301
+
302
+ describe "non-DSL mode" do
303
+ it "should pass the parser to the parse() block" do
304
+ def foo()
305
+ "foo"
306
+ end
307
+
308
+ XmlStreamParser.new.parse( '<foo></foo>') do |p|
309
+ p.should_not ==(nil)
310
+ foo()
311
+ end.should =="foo"
312
+ end
313
+
314
+ it "should retain contenxt for element blocks" do
315
+ def foo()
316
+ "foo"
317
+ end
318
+ XmlStreamParser.new.parse( '<foo></foo>') do |p|
319
+ p.element('foo') do |name,attrs|
320
+ name.should =='foo'
321
+ attrs.should == {}
322
+ foo()
323
+ end
324
+ end.should =="foo"
325
+ end
326
+
327
+ it "should retain context for text blocks" do
328
+ def bar()
329
+ "barbar"
330
+ end
331
+ XmlStreamParser.new.parse( '<foo>bar</foo>') do |p|
332
+ p.element('foo') do |name,attrs|
333
+ p.text{ |t| t.should =='bar' ; t }
334
+ bar()
335
+ end
336
+ end.should =='barbar'
337
+ end
338
+ end
339
+
340
+ describe "some more complex examples" do
341
+
342
+ it "should parse a list of people" do
343
+ doc = <<-EOF
344
+ <people>
345
+ <person name="alice">likes cheese</person>
346
+ <person name="bob">likes music</person>
347
+ <person name="charles">likes alice</person>
348
+ </people>
349
+ EOF
350
+
351
+ people = {}
352
+
353
+ XmlStreamParser.new.parse_dsl(doc) do
354
+ element("people") do |name,attrs|
355
+ elements("person") do |name, attrs|
356
+ people[attrs["name"]] = text
357
+ end
358
+ end
359
+ end
360
+
361
+ people.should ==({ "alice"=>"likes cheese",
362
+ "bob"=>"likes music",
363
+ "charles"=>"likes alice"})
364
+ end
365
+
366
+ it "should parse a list of people and their friends" do
367
+ doc = <<-EOF
368
+ <people>
369
+ <person name="alice">
370
+ <friend name="bob"/>
371
+ <likes>cheese</likes>
372
+ <friend name="charles"/>
373
+ </person>
374
+ <person name="bob">
375
+ <friend name="alice"/>
376
+ <likes>wolf dogs</likes>
377
+ </person>
378
+ <person name="charles">
379
+ <friend name="alice"/>
380
+ <likes>bach</likes>
381
+ </person>
382
+ </people>
383
+ EOF
384
+
385
+ people = Hash.new{ |h,k| h[k] = {:friends=>Set.new([]), :likes=>Set.new([]) } }
386
+
387
+ XmlStreamParser.new.parse_dsl(doc) do
388
+ element("people") do |name,attrs|
389
+ elements("person") do |name, attrs|
390
+ person_name = attrs["name"]
391
+ people[person_name]
392
+
393
+ elements(["friend","likes"]) do |name,attrs|
394
+ case name
395
+ when "friend" then
396
+ people[person_name][:friends] << attrs["name"]
397
+ when "likes" then
398
+ people[person_name][:likes] << text
399
+ end
400
+ end
401
+ end
402
+ end
403
+ end
404
+
405
+ people.should ==( {
406
+ "alice"=>{ :friends=>Set.new(["bob","charles"]), :likes=>Set.new(["cheese"])},
407
+ "bob"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["wolf dogs"])},
408
+ "charles"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["bach"])}
409
+ })
410
+
411
+ end
412
+
413
+ end
414
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{xml_stream_parser}
8
+ s.version = "0.3.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["mccraigmccraig"]
12
+ s.date = %q{2010-06-24}
13
+ s.description = %q{easily parse xml documents of any size with ruby}
14
+ s.email = %q{craig@trampolinesystems.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc",
18
+ "README.txt"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "History.txt",
24
+ "LICENSE",
25
+ "Manifest.txt",
26
+ "README.rdoc",
27
+ "README.txt",
28
+ "Rakefile",
29
+ "VERSION",
30
+ "init.rb",
31
+ "lib/instance_exec.rb",
32
+ "lib/xml_stream_parser.rb",
33
+ "spec/spec.opts",
34
+ "spec/spec_helper.rb",
35
+ "spec/xml_stream_parser_spec.rb",
36
+ "xml_stream_parser.gemspec"
37
+ ]
38
+ s.homepage = %q{http://github.com/mccraigmccraig/xml_stream_parser}
39
+ s.rdoc_options = ["--charset=UTF-8"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = %q{1.3.6}
42
+ s.summary = %q{simple xml stream parser for ruby}
43
+ s.test_files = [
44
+ "spec/spec_helper.rb",
45
+ "spec/xml_stream_parser_spec.rb"
46
+ ]
47
+
48
+ if s.respond_to? :specification_version then
49
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
50
+ s.specification_version = 3
51
+
52
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
53
+ s.add_development_dependency(%q<rspec>, [">= 1.2.8"])
54
+ else
55
+ s.add_dependency(%q<rspec>, [">= 1.2.8"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<rspec>, [">= 1.2.8"])
59
+ end
60
+ end
61
+
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xml_stream_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
10
+ platform: ruby
11
+ authors:
12
+ - mccraigmccraig
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-06-24 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 8
31
+ version: 1.2.8
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: easily parse xml documents of any size with ruby
35
+ email: craig@trampolinesystems.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files:
41
+ - LICENSE
42
+ - README.rdoc
43
+ - README.txt
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - History.txt
48
+ - LICENSE
49
+ - Manifest.txt
50
+ - README.rdoc
51
+ - README.txt
52
+ - Rakefile
53
+ - VERSION
54
+ - init.rb
55
+ - lib/instance_exec.rb
56
+ - lib/xml_stream_parser.rb
57
+ - spec/spec.opts
58
+ - spec/spec_helper.rb
59
+ - spec/xml_stream_parser_spec.rb
60
+ - xml_stream_parser.gemspec
61
+ has_rdoc: true
62
+ homepage: http://github.com/mccraigmccraig/xml_stream_parser
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options:
67
+ - --charset=UTF-8
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.6
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: simple xml stream parser for ruby
91
+ test_files:
92
+ - spec/spec_helper.rb
93
+ - spec/xml_stream_parser_spec.rb