xml-sax-machines 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +58 -0
- data/Rakefile +34 -0
- data/VERSION.yml +5 -0
- data/lib/xml-sax-machines/builder.rb +72 -0
- data/lib/xml-sax-machines/debug.rb +36 -0
- data/lib/xml-sax-machines/filter.rb +68 -0
- data/lib/xml-sax-machines/fragment_builder.rb +101 -0
- data/lib/xml-sax-machines/pipeline.rb +16 -0
- data/lib/xml-sax-machines.rb +15 -0
- data/test/helper.rb +6 -0
- data/test/test_builder.rb +51 -0
- data/test/test_debug.rb +56 -0
- data/test/test_filter.rb +15 -0
- data/test/test_fragment_builder.rb +24 -0
- data/test/test_pipeline.rb +18 -0
- data/xml-sax-machines.gemspec +64 -0
- metadata +99 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 "Shane Hanna"
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
= XML SAX Machines
|
2
|
+
|
3
|
+
* http://github.com/shanna/xml-sax-machines/tree/master
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Assorted XML SAX readers, filters and writers for nokogiri.
|
8
|
+
|
9
|
+
== Dependencies
|
10
|
+
|
11
|
+
Ruby::
|
12
|
+
* nokogiri ~> 1.2.2
|
13
|
+
|
14
|
+
== Install
|
15
|
+
|
16
|
+
* Via git: git clone git://github.com/shanna/xml-sax-machines.git
|
17
|
+
* Via gem: gem install shanna-xml-sax-machines -s http://gems.github.com
|
18
|
+
|
19
|
+
== Flow
|
20
|
+
|
21
|
+
=== XML::SAX::Filter
|
22
|
+
Pass through filter. Base class for creating chainable SAX filters.
|
23
|
+
|
24
|
+
=== XML::SAX::Pipeline
|
25
|
+
A linear sequence of SAX filters.
|
26
|
+
|
27
|
+
== Filters
|
28
|
+
|
29
|
+
=== XML::SAX::Handler
|
30
|
+
Base class for creating chainable SAX handlers.
|
31
|
+
|
32
|
+
=== XML::SAX::Debug
|
33
|
+
Debuging output for SAX events.
|
34
|
+
|
35
|
+
=== XML::SAX::Builder
|
36
|
+
Build in-memory document trees from SAX streams.
|
37
|
+
|
38
|
+
=== XML::SAX::FragmentBuilder
|
39
|
+
Process in-memory record based document fragments. Builds well balanced XML chunks matching an XPath into a partial
|
40
|
+
in-memory document tree for processing by a callback block.
|
41
|
+
|
42
|
+
== Recognition
|
43
|
+
|
44
|
+
This library is my interpretation of patterns introduced to me through Perl's <tt>XML::SAX::Machines</tt>. All credit
|
45
|
+
to Barrie Slaymaker and contributors.
|
46
|
+
|
47
|
+
* http://search.cpan.org/dist/XML-SAX-Machines
|
48
|
+
|
49
|
+
== TODO
|
50
|
+
|
51
|
+
* Tee filter.
|
52
|
+
* Json reader/writer?
|
53
|
+
* Any other Ruby parsers that can generate a SAX stream?
|
54
|
+
* Namespace handling in XML::SAX::Builder and XML::SAX::FragmentBuilder.
|
55
|
+
|
56
|
+
== Copyright
|
57
|
+
|
58
|
+
Copyright (c) 2009 "Shane Hanna". See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
gem.name = 'xml-sax-machines'
|
7
|
+
gem.summary = %q{Assorted XML SAX readers, filters and writers.}
|
8
|
+
gem.description = %q{XML SAX Machines}
|
9
|
+
gem.email = 'shane.hanna@gmail.com'
|
10
|
+
gem.homepage = 'http://github.com/shanna/xml-sax-machines'
|
11
|
+
gem.authors = ['Shane Hanna']
|
12
|
+
gem.add_dependency 'nokogiri', ['>= 1.4']
|
13
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/testtask'
|
21
|
+
Rake::TestTask.new(:test) do |test|
|
22
|
+
test.libs << 'lib' << 'test'
|
23
|
+
test.pattern = 'test/**/test_*.rb'
|
24
|
+
test.verbose = true
|
25
|
+
end
|
26
|
+
|
27
|
+
task :test => :check_dependencies
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
require 'yard'
|
31
|
+
YARD::Rake::YardocTask.new do |yard|
|
32
|
+
yard.files = ['lib/**/*.rb']
|
33
|
+
end
|
34
|
+
|
data/VERSION.yml
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Build a Nokogiri::XML::Document from a SAX stream.
|
5
|
+
#
|
6
|
+
# ==== Example
|
7
|
+
#
|
8
|
+
# builder = XML::SAX::Builder.new
|
9
|
+
# parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
10
|
+
# parser << %q{<root>xml content</root>}
|
11
|
+
# parser.finish
|
12
|
+
#
|
13
|
+
# puts builder.document.children.to_s #=> xml content
|
14
|
+
#
|
15
|
+
# ==== See
|
16
|
+
# * XML::SAX::Filter
|
17
|
+
#
|
18
|
+
# --
|
19
|
+
# TODO:
|
20
|
+
# * Namespaces.
|
21
|
+
class Builder < Filter
|
22
|
+
|
23
|
+
# The document object.
|
24
|
+
#
|
25
|
+
# ==== Returns
|
26
|
+
# Nokogiri::XML::Document
|
27
|
+
attr_reader :document
|
28
|
+
|
29
|
+
def start_document #:nodoc:
|
30
|
+
super
|
31
|
+
@document = Nokogiri::XML::Document.new
|
32
|
+
@context = @document
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element(name, attributes = []) #:nodoc:
|
36
|
+
super
|
37
|
+
el = Nokogiri::XML::Element.new(name, @document)
|
38
|
+
Hash[*attributes.flatten].each_pair{|k, v| el[k] = v}
|
39
|
+
@context = @context.add_child(el)
|
40
|
+
end
|
41
|
+
|
42
|
+
def end_element(name) #:nodoc:
|
43
|
+
super
|
44
|
+
raise "Unmatched closing element. Got '#{name}' but expected '#{@context.name}'" \
|
45
|
+
unless name == @context.name
|
46
|
+
@context = @context.parent
|
47
|
+
end
|
48
|
+
|
49
|
+
def characters(string) #:nodoc:
|
50
|
+
super
|
51
|
+
# http://nokogiri.lighthouseapp.com/projects/19607-nokogiri/tickets/68-xpath-incorrect-when-text-siblings-exist#ticket-68-1
|
52
|
+
sibling = @context.children.last
|
53
|
+
if sibling.kind_of?(Nokogiri::XML::Text)
|
54
|
+
sibling.content += string
|
55
|
+
else
|
56
|
+
@context.add_child(Nokogiri::XML::Text.new(string, @document))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def cdata_block(string) #:nodoc:
|
61
|
+
super
|
62
|
+
@context.add_child(Nokogiri::XML::CDATA.new(@document, string))
|
63
|
+
end
|
64
|
+
|
65
|
+
def comment(string) #:nodoc:
|
66
|
+
super
|
67
|
+
@context.add_child(Nokogiri::XML::Comment.new(@document, string))
|
68
|
+
end
|
69
|
+
|
70
|
+
end # Builder
|
71
|
+
end # SAX
|
72
|
+
end # XML
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# SAX Debug filter.
|
5
|
+
#
|
6
|
+
# Warn all SAX event methods before calling the next filter in the chain. Handy as it can be placed anywhere in a
|
7
|
+
# pipeline to see what events are being passed to the next filter.
|
8
|
+
#
|
9
|
+
# ==== See
|
10
|
+
# * XML::SAX::Filter
|
11
|
+
#
|
12
|
+
class Debug < Filter
|
13
|
+
|
14
|
+
%w{
|
15
|
+
cdata_block
|
16
|
+
characters
|
17
|
+
comment
|
18
|
+
end_document
|
19
|
+
end_element
|
20
|
+
end_element_namespace
|
21
|
+
error
|
22
|
+
start_document
|
23
|
+
start_element
|
24
|
+
start_element_namespace
|
25
|
+
warning
|
26
|
+
xmldecl
|
27
|
+
}.each do |method|
|
28
|
+
define_method(method.to_sym) do |*args|
|
29
|
+
warn "#{method}: #{args.inspect}"
|
30
|
+
super(*args)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end # Debug
|
35
|
+
end # SAX
|
36
|
+
end # XML
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# SAX Filter base class.
|
5
|
+
#
|
6
|
+
# Chain SAX filters together by delegating missing SAX event methods to the next filter in the chain. Simply call
|
7
|
+
# super in any SAX event methods you overload to pass the call to the next filter in the chain.
|
8
|
+
#
|
9
|
+
# Extend this Class rather than <tt>Nokogiri::XML::SAX::Document</tt> which acts as a final filter.
|
10
|
+
#
|
11
|
+
# ==== Notes
|
12
|
+
# Filter chains are built in reverse by setting the filter attribute. Use <tt>XML::SAX::Pipeline</tt> to
|
13
|
+
# construct filter chains in a more logical order. This keeps the filter constructor clear of any prerequisite
|
14
|
+
# API in subclasses.
|
15
|
+
#
|
16
|
+
# ==== See
|
17
|
+
# * XML::SAX::Pipeline
|
18
|
+
# * Nokogiri::XML::SAX::Document
|
19
|
+
#
|
20
|
+
#--
|
21
|
+
# TODO:
|
22
|
+
# * Examples.
|
23
|
+
class Filter < Nokogiri::XML::SAX::Document
|
24
|
+
|
25
|
+
# Barf if the filter isn't a Nokogiri::XML::SAX::Document or XML::SAX::Filter.
|
26
|
+
# The next filter in the chain.
|
27
|
+
attr_accessor :filter
|
28
|
+
|
29
|
+
def cdata_block(string) #:nodoc:
|
30
|
+
@filter.cdata_block(string) if @filter
|
31
|
+
end
|
32
|
+
|
33
|
+
def characters(string) #:nodoc:
|
34
|
+
@filter.characters(string) if @filter
|
35
|
+
end
|
36
|
+
|
37
|
+
def comment(string) #:nodoc:
|
38
|
+
@filter.comment(string) if @filter
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_document #:nodoc:
|
42
|
+
@filter.end_document if @filter
|
43
|
+
end
|
44
|
+
|
45
|
+
def end_element(name) #:nodoc:
|
46
|
+
@filter.end_element(name) if @filter
|
47
|
+
end
|
48
|
+
|
49
|
+
def error(string) #:nodoc:
|
50
|
+
@filter.error(string) if @filter
|
51
|
+
end
|
52
|
+
|
53
|
+
def start_document #:nodoc:
|
54
|
+
@filter.start_document if @filter
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name, attributes = []) #:nodoc:
|
58
|
+
@filter.start_element(name, attributes = []) if @filter
|
59
|
+
end
|
60
|
+
|
61
|
+
def warning(string) #:nodoc:
|
62
|
+
@filter.warning(string) if @filter
|
63
|
+
end
|
64
|
+
|
65
|
+
end # Filter
|
66
|
+
end # SAX
|
67
|
+
end # XML
|
68
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Build a Nokogiri::XML::Document fragments that match an XPath.
|
5
|
+
#
|
6
|
+
# Stream large (or small) record based XML documents building each matching XPath into a document fragment making
|
7
|
+
# futher manipulation of each record easier.
|
8
|
+
#
|
9
|
+
# ==== Notes
|
10
|
+
# * In order to save memory well balanced elements that do not match any XPath are unlinked. This means you *cannot*
|
11
|
+
# match records by position in relation to siblings.
|
12
|
+
# * Because we are parsing a SAX stream there is no read ahead. You *cannot* match records by any children the
|
13
|
+
# element may have once further events are pushed.
|
14
|
+
# * You can match by attributes of an element.
|
15
|
+
#
|
16
|
+
# ==== Example
|
17
|
+
#
|
18
|
+
# builder = XML::SAX::FragmentBuilder.new(nil, {
|
19
|
+
# '//record' => lambda{|record| puts el.to_s} # Process each matched record element.
|
20
|
+
# })
|
21
|
+
# parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
22
|
+
# parser << %q{
|
23
|
+
# <root>
|
24
|
+
# <record id="1">record one</record>
|
25
|
+
# <record id="2">record two</record>
|
26
|
+
# </root>
|
27
|
+
# }
|
28
|
+
# #=> <record id="1">record one</record>
|
29
|
+
# #=> <record id="2">record two</record>
|
30
|
+
# parser.finish
|
31
|
+
#
|
32
|
+
# ==== See
|
33
|
+
# * XML::SAX::Builder
|
34
|
+
# * XML::SAX::Filter
|
35
|
+
#
|
36
|
+
# --
|
37
|
+
# TODO:
|
38
|
+
# * Namespaces.
|
39
|
+
class FragmentBuilder < Builder
|
40
|
+
private :document # Would return an empty/partial document you really shouldn't mess with.
|
41
|
+
|
42
|
+
# ==== Parameters
|
43
|
+
# handler<Nokogiri::XML::SAX::Document>::
|
44
|
+
# Optional next <tt>XML::SAX::Filter</tt> or <tt>Nokogiri::XML::SAX::Document<tt>(final) in the chain.
|
45
|
+
# By default a <tt>Nokogiri::XML::SAX::Document</tt> will be used making the chain final.
|
46
|
+
#
|
47
|
+
# options<Hash>::
|
48
|
+
# {xpath<String> => &block<Proc>} pairs. The first element passed to the block will be the matching
|
49
|
+
# Nokogiri::XML::Node. Keep in mind the node will be unlinked after your block returns.
|
50
|
+
def initialize(options = {})
|
51
|
+
super()
|
52
|
+
@find = options
|
53
|
+
@found = {}
|
54
|
+
@buffer = 0
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name, attributes = []) #:nodoc:
|
58
|
+
super
|
59
|
+
@find.each_pair do |xpath, block|
|
60
|
+
if match = @document.at(xpath)
|
61
|
+
unless @found[match.path]
|
62
|
+
@buffer += 1
|
63
|
+
@found[match.path] = block
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def end_element(name) #:nodoc:
|
70
|
+
path = @context.path
|
71
|
+
if @buffer > 0 && block = @found.delete(path)
|
72
|
+
@buffer -= 1
|
73
|
+
block.call(@context)
|
74
|
+
end
|
75
|
+
super
|
76
|
+
|
77
|
+
if @buffer == 0 && !(path == '/')
|
78
|
+
@document.at(path).unlink
|
79
|
+
|
80
|
+
# Unlinked children are not garbage collected till the document they were created in is (I think).
|
81
|
+
# This hack job halves memory usage but it still grows too fast for my liking :(
|
82
|
+
@document = @document.dup
|
83
|
+
@context = @document.at(@context.path) rescue nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def characters(string) # :nodoc:
|
88
|
+
@buffer > 0 ? super : (filter && filter.characters(string))
|
89
|
+
end
|
90
|
+
|
91
|
+
def comment(string) # :nodoc:
|
92
|
+
@buffer > 0 ? super : (filter && filter.comment(string))
|
93
|
+
end
|
94
|
+
|
95
|
+
def cdata_block(string) # :nodoc:
|
96
|
+
@buffer > 0 ? super : (filter && filter.cdata_block(string))
|
97
|
+
end
|
98
|
+
|
99
|
+
end # FragmentBuilder
|
100
|
+
end # SAX
|
101
|
+
end # XML
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Define a filter chain top to bottom.
|
5
|
+
#
|
6
|
+
#--
|
7
|
+
# TODO:
|
8
|
+
# * More enumerable methods so you can alter pipelines.
|
9
|
+
class Pipeline < Filter
|
10
|
+
def initialize(*filters)
|
11
|
+
self.filter = filters.each_with_index{|f, i| f.filter = filters[i + 1]}.first
|
12
|
+
end
|
13
|
+
end # Pipeline
|
14
|
+
|
15
|
+
end # SAX
|
16
|
+
end # XML
|
@@ -0,0 +1,15 @@
|
|
1
|
+
begin
|
2
|
+
require 'nokogiri'
|
3
|
+
rescue LoadError
|
4
|
+
require 'rubygems'
|
5
|
+
require 'nokogiri'
|
6
|
+
end
|
7
|
+
|
8
|
+
module XML
|
9
|
+
module SAX
|
10
|
+
end # SAX
|
11
|
+
end # XML
|
12
|
+
|
13
|
+
# TODO: Conditionally load some machines?
|
14
|
+
base = File.join(File.dirname(__FILE__), 'xml-sax-machines')
|
15
|
+
%w{filter pipeline debug builder fragment_builder}.each{|r| require File.join(base, r)}
|
data/test/helper.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'Builder' do
|
4
|
+
it 'creates root' do
|
5
|
+
assert_equal 'r', build('<r/>').root.name
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'creates comments' do
|
9
|
+
assert_equal '<!-- woot -->', build('<r><!-- woot --></r>').root.children.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'creates cdata_blocks' do
|
13
|
+
assert_equal '<![CDATA[ woot ]]>', build('<r><![CDATA[ woot ]]></r>').root.children.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'creates characters' do
|
17
|
+
assert_equal 'woot', build('<r>woot</r>').root.children.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates empty element' do
|
21
|
+
assert build('<r><foo/></r>').at('/r/foo')
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'creates element with attributes' do
|
25
|
+
el = build('<r><foo id="1"/></r>').at('/r/foo')
|
26
|
+
assert_equal '1', el['id']
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'creates element with child element' do
|
30
|
+
assert build('<r><foo><bar/></foo></r>').at('/r/foo/bar')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'creates element with mixed content' do
|
34
|
+
el = build('<r><foo>text<bar/></foo></r>').at('/r/foo')
|
35
|
+
assert_equal 'text<bar/>', el.children.to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'creates element siblings' do
|
39
|
+
el = build('<r><foo/><bar/></r>').root
|
40
|
+
assert_equal 2, el.children.length
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
def build(string)
|
45
|
+
builder = XML::SAX::Builder.new
|
46
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
47
|
+
parser << string
|
48
|
+
parser.finish
|
49
|
+
builder.document
|
50
|
+
end
|
51
|
+
end # Builder
|
data/test/test_debug.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Debug event method warning' do
|
4
|
+
it 'warns #start_document' do
|
5
|
+
assert_match regexp('start_document: []'), parse('<r/>')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'warns #end_document' do
|
9
|
+
assert_match regexp('end_document: []'), parse('<r/>')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'warns #start_element' do
|
13
|
+
assert_match regexp('start_element: ["r", []]'), parse('<r/>')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'warns #start_element with attributes' do
|
17
|
+
assert_match regexp('start_element: ["r", [["id", "1"]]]'), parse('<r id="1"/>')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'warns #end_element' do
|
21
|
+
assert_match regexp('end_element: ["r"]'), parse('<r/>')
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'warns #characters' do
|
25
|
+
assert_match regexp('characters: ["woot"]'), parse('<r>woot</r>')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'warns #comment' do
|
29
|
+
assert_match regexp('comment: [" woot "]'), parse('<r><!-- woot --></r>')
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'warns #cdata_block' do
|
33
|
+
assert_match regexp('cdata_block: [" woot "]'), parse('<r><![CDATA[ woot ]]></r>')
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
def parse(xml)
|
38
|
+
parser = Nokogiri::XML::SAX::PushParser.new(XML::SAX::Debug.new)
|
39
|
+
capture_stderr do
|
40
|
+
parser << xml
|
41
|
+
parser.finish
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def regexp(string)
|
46
|
+
Regexp.compile('^' + Regexp.escape(string))
|
47
|
+
end
|
48
|
+
|
49
|
+
def capture_stderr(&block)
|
50
|
+
$stderr = StringIO.new
|
51
|
+
yield
|
52
|
+
result = $stderr.rewind && $stderr.read
|
53
|
+
$stderr = STDERR
|
54
|
+
result
|
55
|
+
end
|
56
|
+
end # DebugTest
|
data/test/test_filter.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Filter' do
|
4
|
+
it 'runs base filter without error' do
|
5
|
+
begin
|
6
|
+
parser = Nokogiri::XML::SAX::PushParser.new(XML::SAX::Filter.new)
|
7
|
+
parser << '<r/>'
|
8
|
+
parser.finish
|
9
|
+
rescue
|
10
|
+
assert false
|
11
|
+
else
|
12
|
+
assert true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::FragmentBuilder' do
|
4
|
+
it 'calls callback for record' do
|
5
|
+
builder = XML::SAX::FragmentBuilder.new(
|
6
|
+
'//foo' => lambda do |el|
|
7
|
+
assert_equal 'foo', el.name, 'foo element'
|
8
|
+
assert_equal 1, el.parent.children.length, 'no siblings'
|
9
|
+
end
|
10
|
+
)
|
11
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
12
|
+
parser << '<r><foo/><foo/><foo/></r>'
|
13
|
+
parser.finish
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'buffers children for record' do
|
17
|
+
builder = XML::SAX::FragmentBuilder.new(
|
18
|
+
'//foo' => lambda{|el| assert_equal 2, el.children.length}
|
19
|
+
)
|
20
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
21
|
+
parser << '<r><foo>text<el>el</el></foo></r>'
|
22
|
+
parser.finish
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Filter' do
|
4
|
+
it 'chains filters first to last' do
|
5
|
+
class Debug1 < XML::SAX::Debug; end
|
6
|
+
class Debug2 < XML::SAX::Debug; end
|
7
|
+
class Debug3 < XML::SAX::Debug; end
|
8
|
+
|
9
|
+
pipe = XML::SAX::Pipeline.new(
|
10
|
+
Debug1.new,
|
11
|
+
Debug2.new,
|
12
|
+
Debug3.new
|
13
|
+
)
|
14
|
+
assert_kind_of Debug1, pipe.filter
|
15
|
+
assert_kind_of Debug2, pipe.filter.filter
|
16
|
+
assert_kind_of Debug3, pipe.filter.filter.filter
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{xml-sax-machines}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Shane Hanna"]
|
12
|
+
s.date = %q{2010-11-29}
|
13
|
+
s.description = %q{XML SAX Machines}
|
14
|
+
s.email = %q{shane.hanna@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"LICENSE",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION.yml",
|
24
|
+
"lib/xml-sax-machines.rb",
|
25
|
+
"lib/xml-sax-machines/builder.rb",
|
26
|
+
"lib/xml-sax-machines/debug.rb",
|
27
|
+
"lib/xml-sax-machines/filter.rb",
|
28
|
+
"lib/xml-sax-machines/fragment_builder.rb",
|
29
|
+
"lib/xml-sax-machines/pipeline.rb",
|
30
|
+
"test/helper.rb",
|
31
|
+
"test/test_builder.rb",
|
32
|
+
"test/test_debug.rb",
|
33
|
+
"test/test_filter.rb",
|
34
|
+
"test/test_fragment_builder.rb",
|
35
|
+
"test/test_pipeline.rb",
|
36
|
+
"xml-sax-machines.gemspec"
|
37
|
+
]
|
38
|
+
s.homepage = %q{http://github.com/shanna/xml-sax-machines}
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.7}
|
41
|
+
s.summary = %q{Assorted XML SAX readers, filters and writers.}
|
42
|
+
s.test_files = [
|
43
|
+
"test/helper.rb",
|
44
|
+
"test/test_builder.rb",
|
45
|
+
"test/test_debug.rb",
|
46
|
+
"test/test_filter.rb",
|
47
|
+
"test/test_fragment_builder.rb",
|
48
|
+
"test/test_pipeline.rb"
|
49
|
+
]
|
50
|
+
|
51
|
+
if s.respond_to? :specification_version then
|
52
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
53
|
+
s.specification_version = 3
|
54
|
+
|
55
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
56
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.4"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4"])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml-sax-machines
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Shane Hanna
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-11-29 00:00:00 +11:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 4
|
31
|
+
version: "1.4"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
description: XML SAX Machines
|
35
|
+
email: shane.hanna@gmail.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- LICENSE
|
42
|
+
- README.rdoc
|
43
|
+
files:
|
44
|
+
- LICENSE
|
45
|
+
- README.rdoc
|
46
|
+
- Rakefile
|
47
|
+
- VERSION.yml
|
48
|
+
- lib/xml-sax-machines.rb
|
49
|
+
- lib/xml-sax-machines/builder.rb
|
50
|
+
- lib/xml-sax-machines/debug.rb
|
51
|
+
- lib/xml-sax-machines/filter.rb
|
52
|
+
- lib/xml-sax-machines/fragment_builder.rb
|
53
|
+
- lib/xml-sax-machines/pipeline.rb
|
54
|
+
- test/helper.rb
|
55
|
+
- test/test_builder.rb
|
56
|
+
- test/test_debug.rb
|
57
|
+
- test/test_filter.rb
|
58
|
+
- test/test_fragment_builder.rb
|
59
|
+
- test/test_pipeline.rb
|
60
|
+
- xml-sax-machines.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/shanna/xml-sax-machines
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
version: "0"
|
86
|
+
requirements: []
|
87
|
+
|
88
|
+
rubyforge_project:
|
89
|
+
rubygems_version: 1.3.7
|
90
|
+
signing_key:
|
91
|
+
specification_version: 3
|
92
|
+
summary: Assorted XML SAX readers, filters and writers.
|
93
|
+
test_files:
|
94
|
+
- test/helper.rb
|
95
|
+
- test/test_builder.rb
|
96
|
+
- test/test_debug.rb
|
97
|
+
- test/test_filter.rb
|
98
|
+
- test/test_fragment_builder.rb
|
99
|
+
- test/test_pipeline.rb
|