shanna-xml-sax-machines 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +18 -3
- data/VERSION.yml +2 -2
- data/lib/xml-sax-machines.rb +1 -1
- data/lib/xml-sax-machines/filter.rb +7 -24
- data/lib/xml-sax-machines/fragment_builder.rb +2 -2
- data/lib/xml-sax-machines/pipeline.rb +16 -0
- data/test/fragment_builder_test.rb +4 -4
- data/test/pipeline_test.rb +22 -0
- metadata +1 -1
data/README.rdoc
CHANGED
|
@@ -16,10 +16,18 @@ Ruby::
|
|
|
16
16
|
* Via git: git clone git://github.com/shanna/xml-sax-machines.git
|
|
17
17
|
* Via gem: gem install shanna-xml-sax-machines -s http://gems.github.com
|
|
18
18
|
|
|
19
|
-
==
|
|
19
|
+
== Flow
|
|
20
20
|
|
|
21
21
|
=== XML::SAX::Filter
|
|
22
|
-
Base class for creating chainable SAX filters.
|
|
22
|
+
Pass through filter. Base class for creating chainable SAX filters.
|
|
23
|
+
|
|
24
|
+
=== XML::SAX::Pipeline
|
|
25
|
+
A linear sequence of SAX filters.
|
|
26
|
+
|
|
27
|
+
== Filters
|
|
28
|
+
|
|
29
|
+
=== XML::SAX::Handler
|
|
30
|
+
Base class for creating chainable SAX handlers.
|
|
23
31
|
|
|
24
32
|
=== XML::SAX::Debug
|
|
25
33
|
Debuging output for SAX events.
|
|
@@ -31,9 +39,16 @@ Build in-memory document trees from SAX streams.
|
|
|
31
39
|
Process in-memory record based document fragments. Builds well balanced XML chunks matching an XPath into a partial
|
|
32
40
|
in-memory document tree for processing by a callback block.
|
|
33
41
|
|
|
42
|
+
== Recognition
|
|
43
|
+
|
|
44
|
+
This library is my interpretation of patterns introduced to me through Perl's <tt>XML::SAX::Machines</tt>. All credit
|
|
45
|
+
to Barrie Slaymaker and contributors.
|
|
46
|
+
|
|
47
|
+
* http://search.cpan.org/dist/XML-SAX-Machines
|
|
48
|
+
|
|
34
49
|
== TODO
|
|
35
50
|
|
|
36
|
-
*
|
|
51
|
+
* Tee filter.
|
|
37
52
|
* Json reader/writer?
|
|
38
53
|
* Any other Ruby parsers that can generate a SAX stream?
|
|
39
54
|
* Namespace handling in XML::SAX::Builder and XML::SAX::FragmentBuilder.
|
data/VERSION.yml
CHANGED
data/lib/xml-sax-machines.rb
CHANGED
|
@@ -12,4 +12,4 @@ end # XML
|
|
|
12
12
|
|
|
13
13
|
# TODO: Conditionally load some machines?
|
|
14
14
|
base = File.join(File.dirname(__FILE__), 'xml-sax-machines')
|
|
15
|
-
%w{filter debug builder fragment_builder}.each{|r| require File.join(base, r)}
|
|
15
|
+
%w{filter pipeline debug builder fragment_builder}.each{|r| require File.join(base, r)}
|
|
@@ -8,7 +8,13 @@ module XML
|
|
|
8
8
|
#
|
|
9
9
|
# Extend this Class rather than <tt>Nokogiri::XML::SAX::Document</tt> which acts as a final filter.
|
|
10
10
|
#
|
|
11
|
+
# ==== Notes
|
|
12
|
+
# Filter chains are built in reverse by setting the filter attribute. Use <tt>XML::SAX::Pipeline</tt> to
|
|
13
|
+
# construct filter chains in a more logical order. This keeps the filter constructor clear of any prerequisite
|
|
14
|
+
# API in subclasses.
|
|
15
|
+
#
|
|
11
16
|
# ==== See
|
|
17
|
+
# * XML::SAX::Pipeline
|
|
12
18
|
# * Nokogiri::XML::SAX::Document
|
|
13
19
|
#
|
|
14
20
|
#--
|
|
@@ -16,33 +22,10 @@ module XML
|
|
|
16
22
|
# * Examples.
|
|
17
23
|
class Filter < Nokogiri::XML::SAX::Document
|
|
18
24
|
|
|
25
|
+
# Barf if the filter isn't a Nokogiri::XML::SAX::Document or XML::SAX::Filter.
|
|
19
26
|
# The next filter in the chain.
|
|
20
27
|
attr_accessor :filter
|
|
21
28
|
|
|
22
|
-
# New filter instance.
|
|
23
|
-
#
|
|
24
|
-
# ==== Notes
|
|
25
|
-
# Filter chains are built in reverse, the filter passed during construction is called *after* the current
|
|
26
|
-
# filter.
|
|
27
|
-
#
|
|
28
|
-
# ==== See
|
|
29
|
-
# * XML::SAX::Pipeline
|
|
30
|
-
#
|
|
31
|
-
# ==== Parameters
|
|
32
|
-
# filter<Nokogiri::XML::SAX::Document>::
|
|
33
|
-
# Optional next <tt>XML::SAX::Filter</tt> or <tt>Nokogiri::XML::SAX::Document<tt>(final) in the chain.
|
|
34
|
-
# By default a <tt>Nokogiri::XML::SAX::Document</tt> will be used making the chain final.
|
|
35
|
-
#
|
|
36
|
-
# options<Hash>::
|
|
37
|
-
# Optional per-filter arguments.
|
|
38
|
-
#
|
|
39
|
-
#--
|
|
40
|
-
# TODO:
|
|
41
|
-
# * Barf if the filter isn't a Nokogiri::XML::SAX::Document or XML::SAX::Filter.
|
|
42
|
-
def initialize(filter = nil, options = {})
|
|
43
|
-
@filter = filter
|
|
44
|
-
end
|
|
45
|
-
|
|
46
29
|
def cdata_block(string) #:nodoc:
|
|
47
30
|
@filter.cdata_block(string) if @filter
|
|
48
31
|
end
|
|
@@ -47,8 +47,8 @@ module XML
|
|
|
47
47
|
# options<Hash>::
|
|
48
48
|
# {xpath<String> => &block<Proc>} pairs. The first element passed to the block will be the matching
|
|
49
49
|
# Nokogiri::XML::Node. Keep in mind the node will be unlinked after your block returns.
|
|
50
|
-
def initialize(
|
|
51
|
-
super(
|
|
50
|
+
def initialize(options = {})
|
|
51
|
+
super()
|
|
52
52
|
@find = options
|
|
53
53
|
@found = {}
|
|
54
54
|
@buffer = 0
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
module XML
|
|
2
|
+
module SAX
|
|
3
|
+
|
|
4
|
+
# Define a filter chain top to bottom.
|
|
5
|
+
#
|
|
6
|
+
#--
|
|
7
|
+
# TODO:
|
|
8
|
+
# * More enumerable methods so you can alter pipelines.
|
|
9
|
+
class Pipeline < Filter
|
|
10
|
+
def initialize(*filters)
|
|
11
|
+
self.filter = filters.each_with_index{|f, i| f.filter = filters[i + 1]}.first
|
|
12
|
+
end
|
|
13
|
+
end # Pipeline
|
|
14
|
+
|
|
15
|
+
end # SAX
|
|
16
|
+
end # XML
|
|
@@ -4,21 +4,21 @@ class FragmentBuilderTest < Test::Unit::TestCase
|
|
|
4
4
|
context 'XML::SAX::FragmentBuilder' do
|
|
5
5
|
|
|
6
6
|
should 'call callback for record' do
|
|
7
|
-
builder = XML::SAX::FragmentBuilder.new(
|
|
7
|
+
builder = XML::SAX::FragmentBuilder.new(
|
|
8
8
|
'//foo' => lambda do |el|
|
|
9
9
|
assert_equal 'foo', el.name, 'foo element'
|
|
10
10
|
assert_equal 1, el.parent.children.length, 'no siblings'
|
|
11
11
|
end
|
|
12
|
-
|
|
12
|
+
)
|
|
13
13
|
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
|
14
14
|
parser << '<r><foo/><foo/><foo/></r>'
|
|
15
15
|
parser.finish
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
should 'have buffered children for record' do
|
|
19
|
-
builder = XML::SAX::FragmentBuilder.new(
|
|
19
|
+
builder = XML::SAX::FragmentBuilder.new(
|
|
20
20
|
'//foo' => lambda{|el| assert_equal 2, el.children.length}
|
|
21
|
-
|
|
21
|
+
)
|
|
22
22
|
parser = Nokogiri::XML::SAX::PushParser.new(builder)
|
|
23
23
|
parser << '<r><foo>text<el>el</el></foo></r>'
|
|
24
24
|
parser.finish
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
|
2
|
+
|
|
3
|
+
class PipelineTest < Test::Unit::TestCase
|
|
4
|
+
context 'XML::SAX::Filter' do
|
|
5
|
+
|
|
6
|
+
should 'chain filters first to last' do
|
|
7
|
+
class Debug1 < XML::SAX::Debug; end
|
|
8
|
+
class Debug2 < XML::SAX::Debug; end
|
|
9
|
+
class Debug3 < XML::SAX::Debug; end
|
|
10
|
+
|
|
11
|
+
pipe = XML::SAX::Pipeline.new(
|
|
12
|
+
Debug1.new,
|
|
13
|
+
Debug2.new,
|
|
14
|
+
Debug3.new
|
|
15
|
+
)
|
|
16
|
+
assert_kind_of Debug1, pipe.filter
|
|
17
|
+
assert_kind_of Debug2, pipe.filter.filter
|
|
18
|
+
assert_kind_of Debug3, pipe.filter.filter.filter
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
end # FilterTest
|