xml-sax-machines 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +58 -0
- data/Rakefile +34 -0
- data/VERSION.yml +5 -0
- data/lib/xml-sax-machines/builder.rb +72 -0
- data/lib/xml-sax-machines/debug.rb +36 -0
- data/lib/xml-sax-machines/filter.rb +68 -0
- data/lib/xml-sax-machines/fragment_builder.rb +101 -0
- data/lib/xml-sax-machines/pipeline.rb +16 -0
- data/lib/xml-sax-machines.rb +15 -0
- data/test/helper.rb +6 -0
- data/test/test_builder.rb +51 -0
- data/test/test_debug.rb +56 -0
- data/test/test_filter.rb +15 -0
- data/test/test_fragment_builder.rb +24 -0
- data/test/test_pipeline.rb +18 -0
- data/xml-sax-machines.gemspec +64 -0
- metadata +99 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 "Shane Hanna"
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
= XML SAX Machines
|
2
|
+
|
3
|
+
* http://github.com/shanna/xml-sax-machines/tree/master
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Assorted XML SAX readers, filters and writers for nokogiri.
|
8
|
+
|
9
|
+
== Dependencies
|
10
|
+
|
11
|
+
Ruby::
|
12
|
+
* nokogiri ~> 1.2.2
|
13
|
+
|
14
|
+
== Install
|
15
|
+
|
16
|
+
* Via git: git clone git://github.com/shanna/xml-sax-machines.git
|
17
|
+
* Via gem: gem install shanna-xml-sax-machines -s http://gems.github.com
|
18
|
+
|
19
|
+
== Flow
|
20
|
+
|
21
|
+
=== XML::SAX::Filter
|
22
|
+
Pass through filter. Base class for creating chainable SAX filters.
|
23
|
+
|
24
|
+
=== XML::SAX::Pipeline
|
25
|
+
A linear sequence of SAX filters.
|
26
|
+
|
27
|
+
== Filters
|
28
|
+
|
29
|
+
=== XML::SAX::Handler
|
30
|
+
Base class for creating chainable SAX handlers.
|
31
|
+
|
32
|
+
=== XML::SAX::Debug
|
33
|
+
Debuging output for SAX events.
|
34
|
+
|
35
|
+
=== XML::SAX::Builder
|
36
|
+
Build in-memory document trees from SAX streams.
|
37
|
+
|
38
|
+
=== XML::SAX::FragmentBuilder
|
39
|
+
Process in-memory record based document fragments. Builds well balanced XML chunks matching an XPath into a partial
|
40
|
+
in-memory document tree for processing by a callback block.
|
41
|
+
|
42
|
+
== Recognition
|
43
|
+
|
44
|
+
This library is my interpretation of patterns introduced to me through Perl's <tt>XML::SAX::Machines</tt>. All credit
|
45
|
+
to Barrie Slaymaker and contributors.
|
46
|
+
|
47
|
+
* http://search.cpan.org/dist/XML-SAX-Machines
|
48
|
+
|
49
|
+
== TODO
|
50
|
+
|
51
|
+
* Tee filter.
|
52
|
+
* Json reader/writer?
|
53
|
+
* Any other Ruby parsers that can generate a SAX stream?
|
54
|
+
* Namespace handling in XML::SAX::Builder and XML::SAX::FragmentBuilder.
|
55
|
+
|
56
|
+
== Copyright
|
57
|
+
|
58
|
+
Copyright (c) 2009 "Shane Hanna". See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
gem.name = 'xml-sax-machines'
|
7
|
+
gem.summary = %q{Assorted XML SAX readers, filters and writers.}
|
8
|
+
gem.description = %q{XML SAX Machines}
|
9
|
+
gem.email = 'shane.hanna@gmail.com'
|
10
|
+
gem.homepage = 'http://github.com/shanna/xml-sax-machines'
|
11
|
+
gem.authors = ['Shane Hanna']
|
12
|
+
gem.add_dependency 'nokogiri', ['>= 1.4']
|
13
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/testtask'
|
21
|
+
Rake::TestTask.new(:test) do |test|
|
22
|
+
test.libs << 'lib' << 'test'
|
23
|
+
test.pattern = 'test/**/test_*.rb'
|
24
|
+
test.verbose = true
|
25
|
+
end
|
26
|
+
|
27
|
+
task :test => :check_dependencies
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
require 'yard'
|
31
|
+
YARD::Rake::YardocTask.new do |yard|
|
32
|
+
yard.files = ['lib/**/*.rb']
|
33
|
+
end
|
34
|
+
|
data/VERSION.yml
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Build a Nokogiri::XML::Document from a SAX stream.
|
5
|
+
#
|
6
|
+
# ==== Example
|
7
|
+
#
|
8
|
+
# builder = XML::SAX::Builder.new
|
9
|
+
# parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
10
|
+
# parser << %q{<root>xml content</root>}
|
11
|
+
# parser.finish
|
12
|
+
#
|
13
|
+
# puts builder.document.children.to_s #=> xml content
|
14
|
+
#
|
15
|
+
# ==== See
|
16
|
+
# * XML::SAX::Filter
|
17
|
+
#
|
18
|
+
# --
|
19
|
+
# TODO:
|
20
|
+
# * Namespaces.
|
21
|
+
class Builder < Filter
|
22
|
+
|
23
|
+
# The document object.
|
24
|
+
#
|
25
|
+
# ==== Returns
|
26
|
+
# Nokogiri::XML::Document
|
27
|
+
attr_reader :document
|
28
|
+
|
29
|
+
def start_document #:nodoc:
|
30
|
+
super
|
31
|
+
@document = Nokogiri::XML::Document.new
|
32
|
+
@context = @document
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element(name, attributes = []) #:nodoc:
|
36
|
+
super
|
37
|
+
el = Nokogiri::XML::Element.new(name, @document)
|
38
|
+
Hash[*attributes.flatten].each_pair{|k, v| el[k] = v}
|
39
|
+
@context = @context.add_child(el)
|
40
|
+
end
|
41
|
+
|
42
|
+
def end_element(name) #:nodoc:
|
43
|
+
super
|
44
|
+
raise "Unmatched closing element. Got '#{name}' but expected '#{@context.name}'" \
|
45
|
+
unless name == @context.name
|
46
|
+
@context = @context.parent
|
47
|
+
end
|
48
|
+
|
49
|
+
def characters(string) #:nodoc:
|
50
|
+
super
|
51
|
+
# http://nokogiri.lighthouseapp.com/projects/19607-nokogiri/tickets/68-xpath-incorrect-when-text-siblings-exist#ticket-68-1
|
52
|
+
sibling = @context.children.last
|
53
|
+
if sibling.kind_of?(Nokogiri::XML::Text)
|
54
|
+
sibling.content += string
|
55
|
+
else
|
56
|
+
@context.add_child(Nokogiri::XML::Text.new(string, @document))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def cdata_block(string) #:nodoc:
|
61
|
+
super
|
62
|
+
@context.add_child(Nokogiri::XML::CDATA.new(@document, string))
|
63
|
+
end
|
64
|
+
|
65
|
+
def comment(string) #:nodoc:
|
66
|
+
super
|
67
|
+
@context.add_child(Nokogiri::XML::Comment.new(@document, string))
|
68
|
+
end
|
69
|
+
|
70
|
+
end # Builder
|
71
|
+
end # SAX
|
72
|
+
end # XML
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# SAX Debug filter.
|
5
|
+
#
|
6
|
+
# Warn all SAX event methods before calling the next filter in the chain. Handy as it can be placed anywhere in a
|
7
|
+
# pipeline to see what events are being passed to the next filter.
|
8
|
+
#
|
9
|
+
# ==== See
|
10
|
+
# * XML::SAX::Filter
|
11
|
+
#
|
12
|
+
class Debug < Filter
|
13
|
+
|
14
|
+
%w{
|
15
|
+
cdata_block
|
16
|
+
characters
|
17
|
+
comment
|
18
|
+
end_document
|
19
|
+
end_element
|
20
|
+
end_element_namespace
|
21
|
+
error
|
22
|
+
start_document
|
23
|
+
start_element
|
24
|
+
start_element_namespace
|
25
|
+
warning
|
26
|
+
xmldecl
|
27
|
+
}.each do |method|
|
28
|
+
define_method(method.to_sym) do |*args|
|
29
|
+
warn "#{method}: #{args.inspect}"
|
30
|
+
super(*args)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end # Debug
|
35
|
+
end # SAX
|
36
|
+
end # XML
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# SAX Filter base class.
|
5
|
+
#
|
6
|
+
# Chain SAX filters together by delegating missing SAX event methods to the next filter in the chain. Simply call
|
7
|
+
# super in any SAX event methods you overload to pass the call to the next filter in the chain.
|
8
|
+
#
|
9
|
+
# Extend this Class rather than <tt>Nokogiri::XML::SAX::Document</tt> which acts as a final filter.
|
10
|
+
#
|
11
|
+
# ==== Notes
|
12
|
+
# Filter chains are built in reverse by setting the filter attribute. Use <tt>XML::SAX::Pipeline</tt> to
|
13
|
+
# construct filter chains in a more logical order. This keeps the filter constructor clear of any prerequisite
|
14
|
+
# API in subclasses.
|
15
|
+
#
|
16
|
+
# ==== See
|
17
|
+
# * XML::SAX::Pipeline
|
18
|
+
# * Nokogiri::XML::SAX::Document
|
19
|
+
#
|
20
|
+
#--
|
21
|
+
# TODO:
|
22
|
+
# * Examples.
|
23
|
+
class Filter < Nokogiri::XML::SAX::Document
|
24
|
+
|
25
|
+
# Barf if the filter isn't a Nokogiri::XML::SAX::Document or XML::SAX::Filter.
|
26
|
+
# The next filter in the chain.
|
27
|
+
attr_accessor :filter
|
28
|
+
|
29
|
+
def cdata_block(string) #:nodoc:
|
30
|
+
@filter.cdata_block(string) if @filter
|
31
|
+
end
|
32
|
+
|
33
|
+
def characters(string) #:nodoc:
|
34
|
+
@filter.characters(string) if @filter
|
35
|
+
end
|
36
|
+
|
37
|
+
def comment(string) #:nodoc:
|
38
|
+
@filter.comment(string) if @filter
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_document #:nodoc:
|
42
|
+
@filter.end_document if @filter
|
43
|
+
end
|
44
|
+
|
45
|
+
def end_element(name) #:nodoc:
|
46
|
+
@filter.end_element(name) if @filter
|
47
|
+
end
|
48
|
+
|
49
|
+
def error(string) #:nodoc:
|
50
|
+
@filter.error(string) if @filter
|
51
|
+
end
|
52
|
+
|
53
|
+
def start_document #:nodoc:
|
54
|
+
@filter.start_document if @filter
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name, attributes = []) #:nodoc:
|
58
|
+
@filter.start_element(name, attributes = []) if @filter
|
59
|
+
end
|
60
|
+
|
61
|
+
def warning(string) #:nodoc:
|
62
|
+
@filter.warning(string) if @filter
|
63
|
+
end
|
64
|
+
|
65
|
+
end # Filter
|
66
|
+
end # SAX
|
67
|
+
end # XML
|
68
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Build a Nokogiri::XML::Document fragments that match an XPath.
|
5
|
+
#
|
6
|
+
# Stream large (or small) record based XML documents building each matching XPath into a document fragment making
|
7
|
+
# futher manipulation of each record easier.
|
8
|
+
#
|
9
|
+
# ==== Notes
|
10
|
+
# * In order to save memory well balanced elements that do not match any XPath are unlinked. This means you *cannot*
|
11
|
+
# match records by position in relation to siblings.
|
12
|
+
# * Because we are parsing a SAX stream there is no read ahead. You *cannot* match records by any children the
|
13
|
+
# element may have once further events are pushed.
|
14
|
+
# * You can match by attributes of an element.
|
15
|
+
#
|
16
|
+
# ==== Example
|
17
|
+
#
|
18
|
+
# builder = XML::SAX::FragmentBuilder.new(nil, {
|
19
|
+
# '//record' => lambda{|record| puts el.to_s} # Process each matched record element.
|
20
|
+
# })
|
21
|
+
# parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
22
|
+
# parser << %q{
|
23
|
+
# <root>
|
24
|
+
# <record id="1">record one</record>
|
25
|
+
# <record id="2">record two</record>
|
26
|
+
# </root>
|
27
|
+
# }
|
28
|
+
# #=> <record id="1">record one</record>
|
29
|
+
# #=> <record id="2">record two</record>
|
30
|
+
# parser.finish
|
31
|
+
#
|
32
|
+
# ==== See
|
33
|
+
# * XML::SAX::Builder
|
34
|
+
# * XML::SAX::Filter
|
35
|
+
#
|
36
|
+
# --
|
37
|
+
# TODO:
|
38
|
+
# * Namespaces.
|
39
|
+
class FragmentBuilder < Builder
|
40
|
+
private :document # Would return an empty/partial document you really shouldn't mess with.
|
41
|
+
|
42
|
+
# ==== Parameters
|
43
|
+
# handler<Nokogiri::XML::SAX::Document>::
|
44
|
+
# Optional next <tt>XML::SAX::Filter</tt> or <tt>Nokogiri::XML::SAX::Document<tt>(final) in the chain.
|
45
|
+
# By default a <tt>Nokogiri::XML::SAX::Document</tt> will be used making the chain final.
|
46
|
+
#
|
47
|
+
# options<Hash>::
|
48
|
+
# {xpath<String> => &block<Proc>} pairs. The first element passed to the block will be the matching
|
49
|
+
# Nokogiri::XML::Node. Keep in mind the node will be unlinked after your block returns.
|
50
|
+
def initialize(options = {})
|
51
|
+
super()
|
52
|
+
@find = options
|
53
|
+
@found = {}
|
54
|
+
@buffer = 0
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name, attributes = []) #:nodoc:
|
58
|
+
super
|
59
|
+
@find.each_pair do |xpath, block|
|
60
|
+
if match = @document.at(xpath)
|
61
|
+
unless @found[match.path]
|
62
|
+
@buffer += 1
|
63
|
+
@found[match.path] = block
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def end_element(name) #:nodoc:
|
70
|
+
path = @context.path
|
71
|
+
if @buffer > 0 && block = @found.delete(path)
|
72
|
+
@buffer -= 1
|
73
|
+
block.call(@context)
|
74
|
+
end
|
75
|
+
super
|
76
|
+
|
77
|
+
if @buffer == 0 && !(path == '/')
|
78
|
+
@document.at(path).unlink
|
79
|
+
|
80
|
+
# Unlinked children are not garbage collected till the document they were created in is (I think).
|
81
|
+
# This hack job halves memory usage but it still grows too fast for my liking :(
|
82
|
+
@document = @document.dup
|
83
|
+
@context = @document.at(@context.path) rescue nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def characters(string) # :nodoc:
|
88
|
+
@buffer > 0 ? super : (filter && filter.characters(string))
|
89
|
+
end
|
90
|
+
|
91
|
+
def comment(string) # :nodoc:
|
92
|
+
@buffer > 0 ? super : (filter && filter.comment(string))
|
93
|
+
end
|
94
|
+
|
95
|
+
def cdata_block(string) # :nodoc:
|
96
|
+
@buffer > 0 ? super : (filter && filter.cdata_block(string))
|
97
|
+
end
|
98
|
+
|
99
|
+
end # FragmentBuilder
|
100
|
+
end # SAX
|
101
|
+
end # XML
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module XML
|
2
|
+
module SAX
|
3
|
+
|
4
|
+
# Define a filter chain top to bottom.
|
5
|
+
#
|
6
|
+
#--
|
7
|
+
# TODO:
|
8
|
+
# * More enumerable methods so you can alter pipelines.
|
9
|
+
class Pipeline < Filter
|
10
|
+
def initialize(*filters)
|
11
|
+
self.filter = filters.each_with_index{|f, i| f.filter = filters[i + 1]}.first
|
12
|
+
end
|
13
|
+
end # Pipeline
|
14
|
+
|
15
|
+
end # SAX
|
16
|
+
end # XML
|
@@ -0,0 +1,15 @@
|
|
1
|
+
begin
|
2
|
+
require 'nokogiri'
|
3
|
+
rescue LoadError
|
4
|
+
require 'rubygems'
|
5
|
+
require 'nokogiri'
|
6
|
+
end
|
7
|
+
|
8
|
+
module XML
|
9
|
+
module SAX
|
10
|
+
end # SAX
|
11
|
+
end # XML
|
12
|
+
|
13
|
+
# TODO: Conditionally load some machines?
|
14
|
+
base = File.join(File.dirname(__FILE__), 'xml-sax-machines')
|
15
|
+
%w{filter pipeline debug builder fragment_builder}.each{|r| require File.join(base, r)}
|
data/test/helper.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'Builder' do
|
4
|
+
it 'creates root' do
|
5
|
+
assert_equal 'r', build('<r/>').root.name
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'creates comments' do
|
9
|
+
assert_equal '<!-- woot -->', build('<r><!-- woot --></r>').root.children.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'creates cdata_blocks' do
|
13
|
+
assert_equal '<![CDATA[ woot ]]>', build('<r><![CDATA[ woot ]]></r>').root.children.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'creates characters' do
|
17
|
+
assert_equal 'woot', build('<r>woot</r>').root.children.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates empty element' do
|
21
|
+
assert build('<r><foo/></r>').at('/r/foo')
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'creates element with attributes' do
|
25
|
+
el = build('<r><foo id="1"/></r>').at('/r/foo')
|
26
|
+
assert_equal '1', el['id']
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'creates element with child element' do
|
30
|
+
assert build('<r><foo><bar/></foo></r>').at('/r/foo/bar')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'creates element with mixed content' do
|
34
|
+
el = build('<r><foo>text<bar/></foo></r>').at('/r/foo')
|
35
|
+
assert_equal 'text<bar/>', el.children.to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'creates element siblings' do
|
39
|
+
el = build('<r><foo/><bar/></r>').root
|
40
|
+
assert_equal 2, el.children.length
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
def build(string)
|
45
|
+
builder = XML::SAX::Builder.new
|
46
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
47
|
+
parser << string
|
48
|
+
parser.finish
|
49
|
+
builder.document
|
50
|
+
end
|
51
|
+
end # Builder
|
data/test/test_debug.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Debug event method warning' do
|
4
|
+
it 'warns #start_document' do
|
5
|
+
assert_match regexp('start_document: []'), parse('<r/>')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'warns #end_document' do
|
9
|
+
assert_match regexp('end_document: []'), parse('<r/>')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'warns #start_element' do
|
13
|
+
assert_match regexp('start_element: ["r", []]'), parse('<r/>')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'warns #start_element with attributes' do
|
17
|
+
assert_match regexp('start_element: ["r", [["id", "1"]]]'), parse('<r id="1"/>')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'warns #end_element' do
|
21
|
+
assert_match regexp('end_element: ["r"]'), parse('<r/>')
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'warns #characters' do
|
25
|
+
assert_match regexp('characters: ["woot"]'), parse('<r>woot</r>')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'warns #comment' do
|
29
|
+
assert_match regexp('comment: [" woot "]'), parse('<r><!-- woot --></r>')
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'warns #cdata_block' do
|
33
|
+
assert_match regexp('cdata_block: [" woot "]'), parse('<r><![CDATA[ woot ]]></r>')
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
def parse(xml)
|
38
|
+
parser = Nokogiri::XML::SAX::PushParser.new(XML::SAX::Debug.new)
|
39
|
+
capture_stderr do
|
40
|
+
parser << xml
|
41
|
+
parser.finish
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def regexp(string)
|
46
|
+
Regexp.compile('^' + Regexp.escape(string))
|
47
|
+
end
|
48
|
+
|
49
|
+
def capture_stderr(&block)
|
50
|
+
$stderr = StringIO.new
|
51
|
+
yield
|
52
|
+
result = $stderr.rewind && $stderr.read
|
53
|
+
$stderr = STDERR
|
54
|
+
result
|
55
|
+
end
|
56
|
+
end # DebugTest
|
data/test/test_filter.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Filter' do
|
4
|
+
it 'runs base filter without error' do
|
5
|
+
begin
|
6
|
+
parser = Nokogiri::XML::SAX::PushParser.new(XML::SAX::Filter.new)
|
7
|
+
parser << '<r/>'
|
8
|
+
parser.finish
|
9
|
+
rescue
|
10
|
+
assert false
|
11
|
+
else
|
12
|
+
assert true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::FragmentBuilder' do
|
4
|
+
it 'calls callback for record' do
|
5
|
+
builder = XML::SAX::FragmentBuilder.new(
|
6
|
+
'//foo' => lambda do |el|
|
7
|
+
assert_equal 'foo', el.name, 'foo element'
|
8
|
+
assert_equal 1, el.parent.children.length, 'no siblings'
|
9
|
+
end
|
10
|
+
)
|
11
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
12
|
+
parser << '<r><foo/><foo/><foo/></r>'
|
13
|
+
parser.finish
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'buffers children for record' do
|
17
|
+
builder = XML::SAX::FragmentBuilder.new(
|
18
|
+
'//foo' => lambda{|el| assert_equal 2, el.children.length}
|
19
|
+
)
|
20
|
+
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
21
|
+
parser << '<r><foo>text<el>el</el></foo></r>'
|
22
|
+
parser.finish
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe 'XML::SAX::Filter' do
|
4
|
+
it 'chains filters first to last' do
|
5
|
+
class Debug1 < XML::SAX::Debug; end
|
6
|
+
class Debug2 < XML::SAX::Debug; end
|
7
|
+
class Debug3 < XML::SAX::Debug; end
|
8
|
+
|
9
|
+
pipe = XML::SAX::Pipeline.new(
|
10
|
+
Debug1.new,
|
11
|
+
Debug2.new,
|
12
|
+
Debug3.new
|
13
|
+
)
|
14
|
+
assert_kind_of Debug1, pipe.filter
|
15
|
+
assert_kind_of Debug2, pipe.filter.filter
|
16
|
+
assert_kind_of Debug3, pipe.filter.filter.filter
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{xml-sax-machines}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Shane Hanna"]
|
12
|
+
s.date = %q{2010-11-29}
|
13
|
+
s.description = %q{XML SAX Machines}
|
14
|
+
s.email = %q{shane.hanna@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"LICENSE",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION.yml",
|
24
|
+
"lib/xml-sax-machines.rb",
|
25
|
+
"lib/xml-sax-machines/builder.rb",
|
26
|
+
"lib/xml-sax-machines/debug.rb",
|
27
|
+
"lib/xml-sax-machines/filter.rb",
|
28
|
+
"lib/xml-sax-machines/fragment_builder.rb",
|
29
|
+
"lib/xml-sax-machines/pipeline.rb",
|
30
|
+
"test/helper.rb",
|
31
|
+
"test/test_builder.rb",
|
32
|
+
"test/test_debug.rb",
|
33
|
+
"test/test_filter.rb",
|
34
|
+
"test/test_fragment_builder.rb",
|
35
|
+
"test/test_pipeline.rb",
|
36
|
+
"xml-sax-machines.gemspec"
|
37
|
+
]
|
38
|
+
s.homepage = %q{http://github.com/shanna/xml-sax-machines}
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.7}
|
41
|
+
s.summary = %q{Assorted XML SAX readers, filters and writers.}
|
42
|
+
s.test_files = [
|
43
|
+
"test/helper.rb",
|
44
|
+
"test/test_builder.rb",
|
45
|
+
"test/test_debug.rb",
|
46
|
+
"test/test_filter.rb",
|
47
|
+
"test/test_fragment_builder.rb",
|
48
|
+
"test/test_pipeline.rb"
|
49
|
+
]
|
50
|
+
|
51
|
+
if s.respond_to? :specification_version then
|
52
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
53
|
+
s.specification_version = 3
|
54
|
+
|
55
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
56
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.4"])
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4"])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml-sax-machines
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Shane Hanna
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-11-29 00:00:00 +11:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 4
|
31
|
+
version: "1.4"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
description: XML SAX Machines
|
35
|
+
email: shane.hanna@gmail.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- LICENSE
|
42
|
+
- README.rdoc
|
43
|
+
files:
|
44
|
+
- LICENSE
|
45
|
+
- README.rdoc
|
46
|
+
- Rakefile
|
47
|
+
- VERSION.yml
|
48
|
+
- lib/xml-sax-machines.rb
|
49
|
+
- lib/xml-sax-machines/builder.rb
|
50
|
+
- lib/xml-sax-machines/debug.rb
|
51
|
+
- lib/xml-sax-machines/filter.rb
|
52
|
+
- lib/xml-sax-machines/fragment_builder.rb
|
53
|
+
- lib/xml-sax-machines/pipeline.rb
|
54
|
+
- test/helper.rb
|
55
|
+
- test/test_builder.rb
|
56
|
+
- test/test_debug.rb
|
57
|
+
- test/test_filter.rb
|
58
|
+
- test/test_fragment_builder.rb
|
59
|
+
- test/test_pipeline.rb
|
60
|
+
- xml-sax-machines.gemspec
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/shanna/xml-sax-machines
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
version: "0"
|
86
|
+
requirements: []
|
87
|
+
|
88
|
+
rubyforge_project:
|
89
|
+
rubygems_version: 1.3.7
|
90
|
+
signing_key:
|
91
|
+
specification_version: 3
|
92
|
+
summary: Assorted XML SAX readers, filters and writers.
|
93
|
+
test_files:
|
94
|
+
- test/helper.rb
|
95
|
+
- test/test_builder.rb
|
96
|
+
- test/test_debug.rb
|
97
|
+
- test/test_filter.rb
|
98
|
+
- test/test_fragment_builder.rb
|
99
|
+
- test/test_pipeline.rb
|