xml_stream_parser 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
@@ -0,0 +1,7 @@
1
+ === 0.1.0 / 2009-06-10
2
+
3
+ * initial release
4
+
5
+ === 0.2.0 / 2009-06-19
6
+
7
+ * more dsl like syntax
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mccraigmccraig
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,9 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ init.rb
6
+ lib/instance_exec.rb
7
+ lib/xml_stream_parser.rb
8
+ spec/xml_stream_parser_spec.rb
9
+ xml_stream_parser.gemspec
@@ -0,0 +1,17 @@
1
+ = xml_stream_parser
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 mccraigmccraig. See LICENSE for details.
@@ -0,0 +1,85 @@
1
+ = xml_stream_parser
2
+
3
+ this code was developed by trampoline systems [ http://trampolinesystems.com ]
4
+ as part of its sonar platform and released under a BSD licence for community use
5
+
6
+ http://www.github.com/mccraigmccraig/xml_stream_parser
7
+
8
+ == DESCRIPTION:
9
+
10
+ a basic library for pull parsing of large xml documents
11
+
12
+ == FEATURES:
13
+
14
+ - pull parsing of large xml documents with no dom construction
15
+ - provides simple operations for constructing higher level parsers
16
+
17
+ == PROBLEMS:
18
+
19
+ - it's very basic
20
+ - no validation
21
+
22
+ == SYNOPSIS:
23
+
24
+ require 'rubygems'
25
+ require 'xml_stream_parser'
26
+
27
+ # parse xml stream data, possibly never ending, and do things with it
28
+
29
+ doc = <<-EOF
30
+ <people>
31
+ <person name="alice">likes cheese</person>
32
+ <person name="bob">likes music</person>
33
+ <person name="charles">likes alice</person>
34
+ </people>
35
+ EOF
36
+
37
+ # can be parsed with
38
+
39
+ people = {}
40
+ XmlStreamParser.new.parse_dsl(doc) do
41
+ element "people" do |name,attrs|
42
+ elements "person" do |name, attrs|
43
+ people[attrs["name"]] = text
44
+ end
45
+ end
46
+ end
47
+
48
+ == REQUIREMENTS:
49
+
50
+ Ruby or JRuby
51
+
52
+ == INSTALL:
53
+
54
+ sudo gem sources -a http://gems.github.com
55
+ sudo gem install mccraigmccraig-xml_stream_parser
56
+
57
+ == LICENSE:
58
+
59
+ (The BSD License)
60
+
61
+ Copyright (c) 2009, Trampoline Systems Ltd, http://trampolinesystems.com/
62
+ All rights reserved.
63
+
64
+ Redistribution and use in source and binary forms, with or without modification,
65
+ are permitted provided that the following conditions are met:
66
+
67
+ * Redistributions of source code must retain the above copyright notice,
68
+ this list of conditions and the following disclaimer.
69
+ * Redistributions in binary form must reproduce the above copyright notice,
70
+ this list of conditions and the following disclaimer in the documentation
71
+ and/or other materials provided with the distribution.
72
+ * Neither the name of the <ORGANIZATION> nor the names of its contributors may
73
+ be used to endorse or promote products derived from this software without
74
+ specific prior written permission.
75
+
76
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
77
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
78
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
79
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
80
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
81
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
82
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
83
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
84
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
85
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "xml_stream_parser"
8
+ gem.summary = %Q{simple xml stream parser for ruby}
9
+ gem.description = %Q{easily parse xml documents of any size with ruby}
10
+ gem.email = "craig@trampolinesystems.com"
11
+ gem.homepage = "http://github.com/mccraigmccraig/xml_stream_parser"
12
+ gem.authors = ["mccraigmccraig"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.8"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "xml_stream_parser #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.3.0
data/init.rb ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.join( File.dirname(__FILE__), "lib", "xml_stream_parser" )
@@ -0,0 +1,34 @@
1
+ # defines Object.instance_exec to permit call of a Proc with params
2
+ # in the context of an instance : instance.instance_exec( foo, bar, &proc )
3
+ # taken from rails 2.2
4
+ #
5
+ class Object
6
+ unless defined? instance_exec # 1.9
7
+ module InstanceExecMethods #:nodoc:
8
+ @mutex = Mutex.new
9
+ class << self
10
+ attr_reader :mutex
11
+ end
12
+ end
13
+ include InstanceExecMethods
14
+
15
+ # Evaluate the block with the given arguments within the context of
16
+ # this object, so self is set to the method receiver.
17
+ #
18
+ # From Mauricio's http://eigenclass.org/hiki/bounded+space+instance_exec
19
+ def instance_exec(*args, &block)
20
+ method_name = InstanceExecMethods.mutex.synchronize do
21
+ n = 0
22
+ n += 1 while respond_to?(method_name = "__instance_exec#{n}")
23
+ InstanceExecMethods.module_eval { define_method(method_name, &block) }
24
+ method_name
25
+ end
26
+
27
+ begin
28
+ send(method_name, *args)
29
+ ensure
30
+ InstanceExecMethods.module_eval { remove_method(method_name) } rescue nil
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,203 @@
1
+ require 'stringio'
2
+ require 'rexml/document'
3
+ require 'rexml/parsers/pullparser'
4
+ require File.join( File.dirname(__FILE__), 'instance_exec')
5
+
6
+ module REXML
7
+ module Parsers
8
+ class PullEvent
9
+ # PullEvent is missing the end_document? method, even tho
10
+ # the BaseParser produces the event
11
+ def end_document?
12
+ @contents[0] == :end_document
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ class XmlStreamParser
19
+
20
+ VERSION = "0.2.0"
21
+
22
+ class Sentinel
23
+ def to_s
24
+ self.class.to_s
25
+ end
26
+ end
27
+
28
+ class Nothing < Sentinel
29
+ end
30
+
31
+ class EndContext < Sentinel
32
+ end
33
+
34
+ module Sentinels
35
+ NOTHING = Nothing.new
36
+ END_CONTEXT = EndContext.new
37
+ end
38
+
39
+ include Sentinels
40
+
41
+ # the REXML::Parsers::PullParser used internally
42
+ attr_reader :pull_parser
43
+ attr_reader :dsl
44
+
45
+ # parse retaining block context... permitting
46
+ # the parse to easily be split over multiple methods
47
+ def parse(data, &block)
48
+ parse_dsl(data, false, &block)
49
+ end
50
+
51
+ # parse with optional dsl mode
52
+ # if dsl is true [ default ] then the block will be instance_exec'd in
53
+ # the context of the parser, if dsl is false the block will be called
54
+ # retaining it's current context
55
+ def parse_dsl(data, dsl=true, &block)
56
+ io = case data
57
+ when IO
58
+ data
59
+ when StringIO
60
+ data
61
+ when String
62
+ StringIO.new(data)
63
+ end
64
+
65
+ @pull_parser = REXML::Parsers::PullParser.new( io )
66
+ @dsl = dsl
67
+ if self.dsl
68
+ self.instance_exec(&block)
69
+ else
70
+ block.call(self)
71
+ end
72
+ ensure
73
+ @pull_parser = nil
74
+ end
75
+
76
+ # find an element with name in element_names : inter-element whitespace is ignored
77
+ # - encountering end_element terminates and returns END_CONTEXT, leaving parser on end_element
78
+ # - encountering end_document terminates and returns END_CONTEXT
79
+ # - encountering start_element for an element not in element_names NOTHING, parser on start_element
80
+ # - encountering start_element for an element in element_names returns element name, parser on start_element
81
+ def find_element( element_names )
82
+ element_names = [ *element_names ]
83
+
84
+ while( true )
85
+ e = @pull_parser.peek
86
+ if e.start_element?
87
+ if element_names.include?( e[0] )
88
+ return e[0]
89
+ else
90
+ return NOTHING
91
+ end
92
+ elsif e.end_element?
93
+ return END_CONTEXT
94
+ elsif e.end_document?
95
+ return END_CONTEXT
96
+ elsif e.text?
97
+ # ignore whitespace between elements
98
+ raise "unexpected text content: #{e.inspect}" if e[0] !~ /[[:space:]]/
99
+ @pull_parser.pull
100
+ end
101
+ end
102
+ end
103
+
104
+ # parse and throw away content until we escape the current context, either
105
+ # through end_element, or end_document
106
+ def discard()
107
+ element_stack = []
108
+
109
+ while(true)
110
+ e = @pull_parser.peek
111
+ name = e[0]
112
+ if e.start_element?
113
+ element_stack.push(name)
114
+ elsif e.end_element?
115
+ return nil if element_stack.size == 0
116
+ raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}" if name != element_stack.last
117
+ element_stack.pop
118
+ elsif e.end_document?
119
+ return nil if element_stack.size ==0
120
+ raise "mismatched end_element. expected </#{element_stack.last}>, got: #{e.inspect}"
121
+ end
122
+ @pull_parser.pull
123
+ end
124
+ end
125
+
126
+ # consume an element
127
+ # - if optional is false the element must be present
128
+ # - if optional is true and the element is not present then NOTHING/END_CONTEXT
129
+ # will be returned
130
+ # - consumes start_element, calls block on content, consumes end_element
131
+ def element( element_names, optional=false, &block )
132
+ element_names = [ *element_names ]
133
+
134
+ f = find_element(element_names)
135
+ e = @pull_parser.peek
136
+
137
+ if f.is_a? Sentinel
138
+ if optional
139
+ return f
140
+ else
141
+ raise "expected start element: <#{element_names.join('|')}, got: #{e.inspect}>"
142
+ end
143
+ end
144
+
145
+ e = @pull_parser.pull # consume the start tag
146
+ name = e[0]
147
+ attrs = e[1]
148
+
149
+ # block should consume all element content, and leave parser on end_element, or
150
+ # whitespace before it
151
+ err=false
152
+ begin
153
+ if self.dsl
154
+ v = self.instance_exec(name, attrs, &block)
155
+ else
156
+ v = block.call(name,attrs)
157
+ end
158
+ return v if ! v.is_a? Sentinel # do not propagate Sentinels. they confuse callers
159
+ rescue
160
+ err=true # note that we are erroring, so as not to mask the exception from ensure block
161
+ raise
162
+ ensure
163
+ if !err # if return was called in the block, ensure we consume the end_element
164
+ e = @pull_parser.pull
165
+ e = @pull_parser.pull if e.text? && e[0] =~ /[[:space:]]/
166
+ raise "expected end tag: #{name}, got: #{e.inspect}" if ! e.end_element? || e[0] != name
167
+ end
168
+ end
169
+ end
170
+
171
+ # find and consume elements, calling block on each one found
172
+ # return result of last find : NOTHING or END_CONTEXT sentinel
173
+ def elements( element_names, &block )
174
+ while true
175
+ break if element(element_names, true, &block).is_a? Sentinel
176
+ end
177
+
178
+ return nil
179
+ end
180
+
181
+ # consume text element
182
+ # returns the text, or nil if none
183
+ def text( &block )
184
+ e = @pull_parser.peek
185
+ raise "expected text node, got #{e.inspect}" if ! e.text? && ! e.end_element?
186
+ text = if e.text?
187
+ @pull_parser.pull
188
+ e[0]
189
+ else
190
+ nil
191
+ end
192
+ if block
193
+ if self.dsl
194
+ text = self.instance_exec( text , &block)
195
+ else
196
+ text = block.call(text)
197
+ end
198
+ end
199
+ text
200
+ end
201
+
202
+ end
203
+
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'xml_stream_parser'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
@@ -0,0 +1,414 @@
1
+ #!/usr/bin/env spec
2
+
3
+ require 'rubygems'
4
+ require 'spec'
5
+ require 'set'
6
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
7
+
8
+ describe XmlStreamParser do
9
+ it "should work on a StringIO" do
10
+ io = StringIO.new( "<foo/>")
11
+ XmlStreamParser.new.parse_dsl(io) do
12
+ element("foo") do |name,attrs|
13
+ name.should ==("foo")
14
+ name
15
+ end
16
+ end.should ==("foo")
17
+ end
18
+
19
+ it "should parse a simple one element document" do
20
+ XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
21
+ called = false
22
+ element("foo") { |name,attrs|
23
+ called = true
24
+ name.should ==("foo")
25
+ attrs.should ==({})
26
+ }
27
+ called.should ==(true)
28
+ end
29
+ end
30
+
31
+ describe "find_element" do
32
+
33
+ it "should skip whitespace to find an element" do
34
+ XmlStreamParser.new.parse_dsl( " \n\n\n<foo></foo>") do
35
+ name = find_element("foo")
36
+ name.should ==("foo")
37
+ e = pull_parser.pull
38
+ e.start_element?.should ==(true)
39
+ e[0].should == "foo"
40
+ e[1].should == {}
41
+ end
42
+ end
43
+
44
+ it "should return NOTHING on unexpected elements" do
45
+ XmlStreamParser.new.parse_dsl( "<foo></foo>") do
46
+ find_element("bar")
47
+ end.should ==(XmlStreamParser::NOTHING)
48
+ end
49
+
50
+ it "should match one of multiple elements" do
51
+ XmlStreamParser.new.parse_dsl( "<foo></foo>" ) do
52
+ find_element( ["bar","foo" ] )
53
+ end.should ==("foo")
54
+ end
55
+
56
+
57
+ it "should return END_CONTEXT if element context terminates" do
58
+ called = false
59
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
60
+ element("foo") do |name,attrs|
61
+ name.should ==("foo")
62
+
63
+ n = find_element("bar")
64
+ n.should ==(XmlStreamParser::END_CONTEXT)
65
+
66
+ e = pull_parser.peek
67
+ e.end_element?.should ==(true)
68
+ e[0].should ==("foo")
69
+
70
+ called = true
71
+ end
72
+ end
73
+ called.should ==(true)
74
+ end
75
+
76
+ it "should return END_CONTEXT if document ends" do
77
+ XmlStreamParser.new.parse_dsl( '<foo></foo>') do
78
+ element("foo") do |name,attrs|
79
+ end
80
+ f = find_element("bar")
81
+ f.should ==( XmlStreamParser::END_CONTEXT )
82
+ end
83
+ end
84
+ end
85
+
86
+ describe "discard" do
87
+
88
+ it "should discard text content of an element" do
89
+ XmlStreamParser.new.parse_dsl( '<foo>blah blah blah</foo>') do
90
+ element("foo") do |name,attrs|
91
+ discard
92
+ "foo"
93
+ end
94
+ end.should ==("foo")
95
+ end
96
+
97
+ it "should discard element content of an element" do
98
+ XmlStreamParser.new.parse_dsl( '<foo><bar/><foobar></foobar></foo>') do
99
+ element("foo") do |name,attrs|
100
+ discard
101
+ "foo"
102
+ end
103
+ end.should ==("foo")
104
+ end
105
+
106
+ it "should discard mixed content of an element" do
107
+ XmlStreamParser.new.parse_dsl( '<foo><bar/>blah blah<foobar></foobar> blah blah </foo>') do
108
+ element("foo") do |name,attrs|
109
+ discard
110
+ "foo"
111
+ end
112
+ end.should ==("foo")
113
+ end
114
+
115
+ end
116
+
117
+ describe "element" do
118
+
119
+ it "should return NOTHING if optional and element not found" do
120
+ XmlStreamParser.new.parse_dsl( '<foo><foofoo/></foo>' ) do
121
+ element("foo") do |name,attrs|
122
+ element("bar",true) do |name,attrs|
123
+ "bar"
124
+ end.should ==(XmlStreamParser::NOTHING)
125
+ element("foofoo") do |name,attrs|
126
+ "foofoo"
127
+ end
128
+ end
129
+ end.should ==("foofoo" )
130
+ end
131
+
132
+ it "should return END_CONTEXT if optional and context ends" do
133
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
134
+ element("foo") do |name,attrs|
135
+ element("bar",true) do |name,attrs|
136
+ "bar"
137
+ end.should ==(XmlStreamParser::END_CONTEXT)
138
+ "foofoo"
139
+ end
140
+ end.should ==("foofoo")
141
+ end
142
+
143
+ it "should not propagate sentinel values up the call hierarchy" do
144
+ called = false
145
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
146
+ element("foo") do |name,attrs|
147
+ called = true
148
+ element("bar",true) do |name,attrs|
149
+ "bar"
150
+ end.should ==(XmlStreamParser::END_CONTEXT)
151
+ end
152
+ end.should_not ==(XmlStreamParser::END_CONTEXT)
153
+ called.should == (true)
154
+ end
155
+
156
+ class Foo
157
+ def self.parse_bar( p )
158
+ p.element("bar") do |name,attrs|
159
+ return "barbar"
160
+ end
161
+ end
162
+ end
163
+
164
+ it "should consume the end tag even if block calls return" do
165
+ XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
166
+ element( "foo" ) do |name, attrs|
167
+ Foo.parse_bar( self )
168
+ end
169
+ end.should ==("barbar" )
170
+ end
171
+
172
+ it "should consume the end tag even if block calls break" do
173
+ XmlStreamParser.new.parse_dsl( '<foo><bar/></foo>') do
174
+ element( "foo" ) do |name, attrs|
175
+ element( "bar" ) do |name, attrs|
176
+ break
177
+ end
178
+ "foo"
179
+ end
180
+ end.should ==( "foo" )
181
+ end
182
+
183
+ it "should raise on premature document termination" do
184
+ lambda {
185
+ XmlStreamParser.new.parse_dsl( '<foo>' ) do
186
+ element("foo") do |name,attrs|
187
+ element("bar",false) do |name,attrs|
188
+ "bar"
189
+ end
190
+ end
191
+ end
192
+ }.should raise_error(RuntimeError)
193
+ end
194
+
195
+ it "should raise on premature context termination" do
196
+ lambda {
197
+ XmlStreamParser.new.parse_dsl( '<foo></foo>' ) do
198
+ element("foo") do |name,attrs|
199
+ element("bar",false) do |name,attrs|
200
+ "bar"
201
+ end
202
+ end
203
+ end
204
+ }.should raise_error(RuntimeError)
205
+ end
206
+
207
+ it "should consume an element, giving name and attributes to the provided block and returning block result" do
208
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
209
+ element( "foo" ) do |name, attrs|
210
+ name.should ==("foo")
211
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
212
+ "blockresult"
213
+ end.should ==("blockresult")
214
+ e = pull_parser.peek
215
+ e.end_document?.should ==(true)
216
+ "foofoo"
217
+ end.should ==("foofoo")
218
+ end
219
+
220
+ it "should consume one of many element names, giving name and attrs to block and returning block result" do
221
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"></foo>') do
222
+ element( ["bar","foo"] ) do |name, attrs|
223
+ name.should ==("foo")
224
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
225
+ "blockresult"
226
+ end
227
+ end.should ==("blockresult")
228
+ end
229
+
230
+ it "should ignore whitespace inside element" do
231
+ XmlStreamParser.new.parse_dsl( '<foo a="one" b="two"> \n \n</foo>') do
232
+ element( "foo" ) do |name, attrs|
233
+ name.should ==("foo")
234
+ attrs.should ==({ "a"=>"one", "b"=>"two" })
235
+ "blockresult"
236
+ end.should ==("blockresult")
237
+ e = pull_parser.peek
238
+ e.end_document?.should ==(true)
239
+ "foofoo"
240
+ end.should ==("foofoo")
241
+ end
242
+
243
+ end
244
+
245
+ describe "text" do
246
+ it "should consume an element with text content and give it's name, attrs, text to the block and return the block result" do
247
+ XmlStreamParser.new.parse_dsl( '<foo a="bar">hello mum</foo>') do
248
+ element( "foo" ) do |name, attrs|
249
+ name.should ==("foo")
250
+ attrs.should ==({ "a"=>"bar" })
251
+ text
252
+ end
253
+ end.should ==("hello mum")
254
+ end
255
+
256
+ it "should raise if the element contains element content" do
257
+ lambda {
258
+ XmlStreamParser.new.parse_dsl( '<foo a="bar"><bar/></foo>') do
259
+ element("foo") do |name,attrs|
260
+ text()
261
+ end
262
+ end
263
+ }.should raise_error(RuntimeError)
264
+ end
265
+
266
+ it "should raise if the element contains mixed content" do
267
+ lambda {
268
+ XmlStreamParser.new.parse_dsl( '<foo a="bar">some <bar/> text</foo>') do
269
+ element("foo") do |name,attrs|
270
+ text()
271
+ end
272
+ end
273
+ }.should raise_error(RuntimeError)
274
+ end
275
+ end
276
+
277
+ describe "elements" do
278
+ it "should consume multiple elements" do
279
+ el_counts = Hash.new(0)
280
+ XmlStreamParser.new.parse_dsl( '<foo><bar/><bar/><foobar/></foo>') do
281
+ element("foo") do |name,attrs|
282
+ elements( ["bar","foobar"] ) do |name,attrs|
283
+ el_counts[name] += 1
284
+ end
285
+ end
286
+ end
287
+ el_counts.should ==({ "bar"=>2, "foobar"=>1 })
288
+ end
289
+
290
+ it "should not complain if there are no matching elements" do
291
+ XmlStreamParser.new.parse_dsl( '<foo></foo>') do
292
+ element("foo") do |name,attrs|
293
+ elements( ["bar","foobar"] ) do |name,attrs|
294
+ el_counts[name] += 1
295
+ end
296
+ end
297
+ end
298
+ end
299
+
300
+ end
301
+
302
+ describe "non-DSL mode" do
303
+ it "should pass the parser to the parse() block" do
304
+ def foo()
305
+ "foo"
306
+ end
307
+
308
+ XmlStreamParser.new.parse( '<foo></foo>') do |p|
309
+ p.should_not ==(nil)
310
+ foo()
311
+ end.should =="foo"
312
+ end
313
+
314
+ it "should retain contenxt for element blocks" do
315
+ def foo()
316
+ "foo"
317
+ end
318
+ XmlStreamParser.new.parse( '<foo></foo>') do |p|
319
+ p.element('foo') do |name,attrs|
320
+ name.should =='foo'
321
+ attrs.should == {}
322
+ foo()
323
+ end
324
+ end.should =="foo"
325
+ end
326
+
327
+ it "should retain context for text blocks" do
328
+ def bar()
329
+ "barbar"
330
+ end
331
+ XmlStreamParser.new.parse( '<foo>bar</foo>') do |p|
332
+ p.element('foo') do |name,attrs|
333
+ p.text{ |t| t.should =='bar' ; t }
334
+ bar()
335
+ end
336
+ end.should =='barbar'
337
+ end
338
+ end
339
+
340
+ describe "some more complex examples" do
341
+
342
+ it "should parse a list of people" do
343
+ doc = <<-EOF
344
+ <people>
345
+ <person name="alice">likes cheese</person>
346
+ <person name="bob">likes music</person>
347
+ <person name="charles">likes alice</person>
348
+ </people>
349
+ EOF
350
+
351
+ people = {}
352
+
353
+ XmlStreamParser.new.parse_dsl(doc) do
354
+ element("people") do |name,attrs|
355
+ elements("person") do |name, attrs|
356
+ people[attrs["name"]] = text
357
+ end
358
+ end
359
+ end
360
+
361
+ people.should ==({ "alice"=>"likes cheese",
362
+ "bob"=>"likes music",
363
+ "charles"=>"likes alice"})
364
+ end
365
+
366
+ it "should parse a list of people and their friends" do
367
+ doc = <<-EOF
368
+ <people>
369
+ <person name="alice">
370
+ <friend name="bob"/>
371
+ <likes>cheese</likes>
372
+ <friend name="charles"/>
373
+ </person>
374
+ <person name="bob">
375
+ <friend name="alice"/>
376
+ <likes>wolf dogs</likes>
377
+ </person>
378
+ <person name="charles">
379
+ <friend name="alice"/>
380
+ <likes>bach</likes>
381
+ </person>
382
+ </people>
383
+ EOF
384
+
385
+ people = Hash.new{ |h,k| h[k] = {:friends=>Set.new([]), :likes=>Set.new([]) } }
386
+
387
+ XmlStreamParser.new.parse_dsl(doc) do
388
+ element("people") do |name,attrs|
389
+ elements("person") do |name, attrs|
390
+ person_name = attrs["name"]
391
+ people[person_name]
392
+
393
+ elements(["friend","likes"]) do |name,attrs|
394
+ case name
395
+ when "friend" then
396
+ people[person_name][:friends] << attrs["name"]
397
+ when "likes" then
398
+ people[person_name][:likes] << text
399
+ end
400
+ end
401
+ end
402
+ end
403
+ end
404
+
405
+ people.should ==( {
406
+ "alice"=>{ :friends=>Set.new(["bob","charles"]), :likes=>Set.new(["cheese"])},
407
+ "bob"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["wolf dogs"])},
408
+ "charles"=>{ :friends=>Set.new(["alice"]), :likes=>Set.new(["bach"])}
409
+ })
410
+
411
+ end
412
+
413
+ end
414
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{xml_stream_parser}
8
+ s.version = "0.3.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["mccraigmccraig"]
12
+ s.date = %q{2010-06-24}
13
+ s.description = %q{easily parse xml documents of any size with ruby}
14
+ s.email = %q{craig@trampolinesystems.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc",
18
+ "README.txt"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "History.txt",
24
+ "LICENSE",
25
+ "Manifest.txt",
26
+ "README.rdoc",
27
+ "README.txt",
28
+ "Rakefile",
29
+ "VERSION",
30
+ "init.rb",
31
+ "lib/instance_exec.rb",
32
+ "lib/xml_stream_parser.rb",
33
+ "spec/spec.opts",
34
+ "spec/spec_helper.rb",
35
+ "spec/xml_stream_parser_spec.rb",
36
+ "xml_stream_parser.gemspec"
37
+ ]
38
+ s.homepage = %q{http://github.com/mccraigmccraig/xml_stream_parser}
39
+ s.rdoc_options = ["--charset=UTF-8"]
40
+ s.require_paths = ["lib"]
41
+ s.rubygems_version = %q{1.3.6}
42
+ s.summary = %q{simple xml stream parser for ruby}
43
+ s.test_files = [
44
+ "spec/spec_helper.rb",
45
+ "spec/xml_stream_parser_spec.rb"
46
+ ]
47
+
48
+ if s.respond_to? :specification_version then
49
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
50
+ s.specification_version = 3
51
+
52
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
53
+ s.add_development_dependency(%q<rspec>, [">= 1.2.8"])
54
+ else
55
+ s.add_dependency(%q<rspec>, [">= 1.2.8"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<rspec>, [">= 1.2.8"])
59
+ end
60
+ end
61
+
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xml_stream_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
10
+ platform: ruby
11
+ authors:
12
+ - mccraigmccraig
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-06-24 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 8
31
+ version: 1.2.8
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: easily parse xml documents of any size with ruby
35
+ email: craig@trampolinesystems.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files:
41
+ - LICENSE
42
+ - README.rdoc
43
+ - README.txt
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - History.txt
48
+ - LICENSE
49
+ - Manifest.txt
50
+ - README.rdoc
51
+ - README.txt
52
+ - Rakefile
53
+ - VERSION
54
+ - init.rb
55
+ - lib/instance_exec.rb
56
+ - lib/xml_stream_parser.rb
57
+ - spec/spec.opts
58
+ - spec/spec_helper.rb
59
+ - spec/xml_stream_parser_spec.rb
60
+ - xml_stream_parser.gemspec
61
+ has_rdoc: true
62
+ homepage: http://github.com/mccraigmccraig/xml_stream_parser
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options:
67
+ - --charset=UTF-8
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.6
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: simple xml stream parser for ruby
91
+ test_files:
92
+ - spec/spec_helper.rb
93
+ - spec/xml_stream_parser_spec.rb