superfeedr-sax-machine 0.0.22.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +87 -0
- data/Rakefile +14 -0
- data/lib/sax-machine/ns_stack.rb +38 -0
- data/lib/sax-machine/sax_collection_config.rb +56 -0
- data/lib/sax-machine/sax_config.rb +53 -0
- data/lib/sax-machine/sax_document.rb +107 -0
- data/lib/sax-machine/sax_element_config.rb +75 -0
- data/lib/sax-machine/sax_event_recorder.rb +35 -0
- data/lib/sax-machine/sax_handler.rb +124 -0
- data/lib/sax-machine.rb +12 -0
- data/spec/sax-machine/sax_document_spec.rb +657 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +13 -0
- metadata +79 -0
data/README.textile
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
h1. SAX Machine
|
2
|
+
|
3
|
+
"http://github.com/pauldix/sax-machine/wikis":http://github.com/pauldix/sax-machine/wikis
|
4
|
+
|
5
|
+
"http://github.com/pauldix/sax-machine/tree/master":http://github.com/pauldix/sax-machine/tree/master
|
6
|
+
|
7
|
+
h2. Description
|
8
|
+
|
9
|
+
A declarative SAX parsing library backed by Nokogiri
|
10
|
+
|
11
|
+
h2. Usage
|
12
|
+
|
13
|
+
<pre>
|
14
|
+
require 'sax-machine'
|
15
|
+
|
16
|
+
# Class for parsing an atom entry out of a feedburner atom feed
|
17
|
+
class AtomEntry
|
18
|
+
include SAXMachine
|
19
|
+
element :title
|
20
|
+
# the :as argument makes this available through atom_entry.author instead of .name
|
21
|
+
element :name, :as => :author
|
22
|
+
element "feedburner:origLink", :as => :url
|
23
|
+
element :summary
|
24
|
+
element :content
|
25
|
+
element :published
|
26
|
+
end
|
27
|
+
|
28
|
+
# Class for parsing Atom feeds
|
29
|
+
class Atom
|
30
|
+
include SAXMachine
|
31
|
+
element :title
|
32
|
+
# the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
|
33
|
+
# the :value argument means that instead of setting the value to the text between the tag,
|
34
|
+
# it sets it to the attribute value of :href
|
35
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
36
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
37
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
38
|
+
end
|
39
|
+
|
40
|
+
# you can then parse like this
|
41
|
+
feed = Atom.parse(xml_text)
|
42
|
+
# then you're ready to rock
|
43
|
+
feed.title # => whatever the title of the blog is
|
44
|
+
feed.url # => the main url of the blog
|
45
|
+
feed.feed_url # => goes to the feedburner feed
|
46
|
+
|
47
|
+
feed.entries.first.title # => title of the first entry
|
48
|
+
feed.entries.first.author # => the author of the first entry
|
49
|
+
feed.entries.first.url # => the permalink on the blog for this entry
|
50
|
+
# etc ...
|
51
|
+
|
52
|
+
# you can also use the elements method without specifying a class like so
|
53
|
+
class SomeServiceResponse
|
54
|
+
elements :message, :as => :messages
|
55
|
+
end
|
56
|
+
|
57
|
+
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
58
|
+
response.messages.first # => "hi"
|
59
|
+
response.messages.last # => "world"
|
60
|
+
</pre>
|
61
|
+
|
62
|
+
h2. LICENSE
|
63
|
+
|
64
|
+
(The MIT License)
|
65
|
+
|
66
|
+
Copyright (c) 2009:
|
67
|
+
|
68
|
+
"Paul Dix":http://pauldix.net
|
69
|
+
|
70
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
71
|
+
a copy of this software and associated documentation files (the
|
72
|
+
'Software'), to deal in the Software without restriction, including
|
73
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
74
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
75
|
+
permit persons to whom the Software is furnished to do so, subject to
|
76
|
+
the following conditions:
|
77
|
+
|
78
|
+
The above copyright notice and this permission notice shall be
|
79
|
+
included in all copies or substantial portions of the Software.
|
80
|
+
|
81
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
82
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
83
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
84
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
85
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
86
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
87
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "spec"
|
2
|
+
require "spec/rake/spectask"
|
3
|
+
require 'lib/sax-machine.rb'
|
4
|
+
|
5
|
+
Spec::Rake::SpecTask.new do |t|
|
6
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
7
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
8
|
+
end
|
9
|
+
|
10
|
+
task :install do
|
11
|
+
rm_rf "*.gem"
|
12
|
+
puts `gem build sax-machine.gemspec`
|
13
|
+
puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
|
14
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class NSStack < Hash
|
3
|
+
def initialize(parent=nil, attrs=[])
|
4
|
+
# Initialize
|
5
|
+
super()
|
6
|
+
@parent = parent
|
7
|
+
|
8
|
+
# Parse attributes
|
9
|
+
attrs.each do |attr|
|
10
|
+
if attr.kind_of?(Array)
|
11
|
+
k, v = attr
|
12
|
+
case k
|
13
|
+
when 'xmlns' then self[''] = v
|
14
|
+
when /^xmlns:(.+)/ then self[$1] = v
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Lookup
|
21
|
+
def [](name)
|
22
|
+
if (ns = super(name.to_s))
|
23
|
+
# I've got it
|
24
|
+
ns
|
25
|
+
elsif @parent
|
26
|
+
# Parent may have it
|
27
|
+
@parent[name]
|
28
|
+
else
|
29
|
+
# Undefined, empty namespace
|
30
|
+
''
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def pop
|
35
|
+
@parent
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class CollectionConfig
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :default_xmlns
|
7
|
+
|
8
|
+
def initialize(name, options)
|
9
|
+
@name = name.to_s
|
10
|
+
@class = options[:class]
|
11
|
+
@as = options[:as].to_s
|
12
|
+
@xmlns = case options[:xmlns]
|
13
|
+
when Array then options[:xmlns]
|
14
|
+
when String then [options[:xmlns]]
|
15
|
+
else nil
|
16
|
+
end
|
17
|
+
@default_xmlns = options[:default_xmlns]
|
18
|
+
if @default_xmlns && @xmlns && !@xmlns.include?('')
|
19
|
+
@xmlns << ''
|
20
|
+
end
|
21
|
+
@record_events = options[:events]
|
22
|
+
end
|
23
|
+
|
24
|
+
def handler(nsstack)
|
25
|
+
if @default_xmlns && (nsstack.nil? || nsstack[''] == '')
|
26
|
+
nsstack = NSStack.new(nsstack, nsstack)
|
27
|
+
nsstack[''] = @default_xmlns
|
28
|
+
end
|
29
|
+
unless @record_events
|
30
|
+
SAXHandler.new(@class.new, nsstack)
|
31
|
+
else
|
32
|
+
SAXEventRecorder.new(nsstack)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def accessor
|
37
|
+
as
|
38
|
+
end
|
39
|
+
|
40
|
+
def xmlns_match?(ns)
|
41
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
|
46
|
+
def as
|
47
|
+
@as
|
48
|
+
end
|
49
|
+
|
50
|
+
def class
|
51
|
+
@class || @name
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require "sax-machine/sax_element_config"
|
2
|
+
require "sax-machine/sax_collection_config"
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXConfig
|
6
|
+
attr_reader :top_level_elements, :collection_elements
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@top_level_elements = {}
|
10
|
+
@collection_elements = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_top_level_element(name, options)
|
14
|
+
@top_level_elements[name.to_s] ||= []
|
15
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_collection_element(name, options)
|
19
|
+
@collection_elements[name.to_s] ||= []
|
20
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def collection_config(name, nsstack)
|
24
|
+
prefix, name = name.split(':', 2)
|
25
|
+
prefix, name = nil, prefix unless name # No prefix
|
26
|
+
namespace = nsstack[prefix]
|
27
|
+
|
28
|
+
(@collection_elements[name.to_s] || []).detect { |ce|
|
29
|
+
ce.name.to_s == name.to_s &&
|
30
|
+
ce.xmlns_match?(namespace)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def element_configs_for_attribute(name, attrs)
|
35
|
+
name = name.split(':', 2).last
|
36
|
+
(@top_level_elements[name.to_s] || []).select do |element_config|
|
37
|
+
element_config.has_value_and_attrs_match?(attrs)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def element_config_for_tag(name, attrs, nsstack)
|
42
|
+
prefix, name = name.split(':', 2)
|
43
|
+
prefix, name = nil, prefix unless name # No prefix
|
44
|
+
namespace = nsstack[prefix]
|
45
|
+
|
46
|
+
(@top_level_elements[name.to_s] || []).detect do |element_config|
|
47
|
+
element_config.xmlns_match?(namespace) &&
|
48
|
+
element_config.attrs_match?(attrs)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.extend ClassMethods
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse(xml_text)
|
10
|
+
unless @parser
|
11
|
+
sax_handler = SAXHandler.new(self)
|
12
|
+
@parser = Nokogiri::XML::SAX::PushParser.new(sax_handler)
|
13
|
+
@parser.options |= Nokogiri::XML::ParseOptions::RECOVER if @parser.respond_to?(:options)
|
14
|
+
end
|
15
|
+
@parser << xml_text
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_finish
|
20
|
+
if @parser
|
21
|
+
@parser.finish
|
22
|
+
end
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
|
28
|
+
def parse(xml_text)
|
29
|
+
# It might be cleaner to aditionally call parse_finish here, but
|
30
|
+
# then Nokogiri/libxml2 barfs on incomplete documents. Desired
|
31
|
+
# behaviour?
|
32
|
+
new.parse(xml_text)
|
33
|
+
end
|
34
|
+
|
35
|
+
def element(name, options = {})
|
36
|
+
options[:as] ||= name
|
37
|
+
sax_config.add_top_level_element(name, options)
|
38
|
+
|
39
|
+
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
40
|
+
# this is how we allow custom parsing behavior. So you could define the setter
|
41
|
+
# and have it parse the string into a date or whatever.
|
42
|
+
attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
|
43
|
+
attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
|
44
|
+
end
|
45
|
+
|
46
|
+
def columns
|
47
|
+
r = []
|
48
|
+
sax_config.top_level_elements.each do |name, ecs|
|
49
|
+
r += ecs
|
50
|
+
end
|
51
|
+
r
|
52
|
+
end
|
53
|
+
|
54
|
+
def column(sym)
|
55
|
+
(sax_config.top_level_elements[sym.to_s] || []).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def data_class(sym)
|
59
|
+
column(sym).data_class
|
60
|
+
end
|
61
|
+
|
62
|
+
def required?(sym)
|
63
|
+
column(sym).required?
|
64
|
+
end
|
65
|
+
|
66
|
+
def column_names
|
67
|
+
columns.map{|e| e.column}
|
68
|
+
end
|
69
|
+
|
70
|
+
def elements(name, options = {})
|
71
|
+
options[:as] ||= name
|
72
|
+
if options[:class] || options[:events]
|
73
|
+
sax_config.add_collection_element(name, options)
|
74
|
+
else
|
75
|
+
class_eval <<-SRC
|
76
|
+
def add_#{options[:as]}(value)
|
77
|
+
#{options[:as]} << value
|
78
|
+
end
|
79
|
+
SRC
|
80
|
+
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
81
|
+
end
|
82
|
+
|
83
|
+
if !instance_methods.include?(options[:as].to_s)
|
84
|
+
class_eval <<-SRC
|
85
|
+
def #{options[:as]}
|
86
|
+
@#{options[:as]} ||= []
|
87
|
+
end
|
88
|
+
SRC
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
|
92
|
+
end
|
93
|
+
|
94
|
+
def sax_config
|
95
|
+
@sax_config ||= SAXConfig.new
|
96
|
+
end
|
97
|
+
|
98
|
+
def attr_writer_once(attr)
|
99
|
+
class_eval <<-SRC
|
100
|
+
def #{attr}=(val)
|
101
|
+
@#{attr} ||= val
|
102
|
+
end
|
103
|
+
SRC
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class ElementConfig
|
5
|
+
attr_reader :name, :setter, :data_class
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
|
10
|
+
if options.has_key?(:with)
|
11
|
+
# for faster comparisons later
|
12
|
+
@with = options[:with].to_a.flatten.collect {|o| o.to_s}
|
13
|
+
else
|
14
|
+
@with = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
if options.has_key?(:value)
|
18
|
+
@value = options[:value].to_s
|
19
|
+
else
|
20
|
+
@value = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
@as = options[:as]
|
24
|
+
@collection = options[:collection]
|
25
|
+
|
26
|
+
if @collection
|
27
|
+
@setter = "add_#{options[:as]}"
|
28
|
+
else
|
29
|
+
@setter = "#{@as}="
|
30
|
+
end
|
31
|
+
@data_class = options[:class]
|
32
|
+
@required = options[:required]
|
33
|
+
|
34
|
+
@xmlns = case options[:xmlns]
|
35
|
+
when Array then options[:xmlns]
|
36
|
+
when String then [options[:xmlns]]
|
37
|
+
else nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def column
|
42
|
+
@as || @name.to_sym
|
43
|
+
end
|
44
|
+
|
45
|
+
def required?
|
46
|
+
@required
|
47
|
+
end
|
48
|
+
|
49
|
+
def value_from_attrs(attrs)
|
50
|
+
attrs.index(@value) ? attrs[attrs.index(@value) + 1] : nil
|
51
|
+
end
|
52
|
+
|
53
|
+
def attrs_match?(attrs)
|
54
|
+
if @with
|
55
|
+
@with == (@with & attrs)
|
56
|
+
else
|
57
|
+
true
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def has_value_and_attrs_match?(attrs)
|
62
|
+
!@value.nil? && attrs_match?(attrs)
|
63
|
+
end
|
64
|
+
|
65
|
+
def xmlns_match?(ns)
|
66
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
67
|
+
end
|
68
|
+
|
69
|
+
def collection?
|
70
|
+
@collection
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXEventRecorder < SAXHandler
|
3
|
+
def initialize(nsstack)
|
4
|
+
super(nil, nsstack)
|
5
|
+
@events = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def object
|
9
|
+
# First and last belong to the parent element
|
10
|
+
@events[1..-2]
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_element(name, attrs = [])
|
14
|
+
@nsstack = NSStack.new(@nsstack, attrs)
|
15
|
+
prefix, name = name.split(':', 2)
|
16
|
+
prefix, name = nil, prefix unless name
|
17
|
+
@events << [:start_element, @nsstack[prefix], name, attrs]
|
18
|
+
end
|
19
|
+
|
20
|
+
def end_element(name)
|
21
|
+
prefix, name = name.split(':', 2)
|
22
|
+
prefix, name = nil, prefix unless name
|
23
|
+
@events << [:end_element, @nsstack[prefix], name]
|
24
|
+
@nsstack = @nsstack.pop
|
25
|
+
end
|
26
|
+
|
27
|
+
def characters(string)
|
28
|
+
@events << [:chars, string]
|
29
|
+
end
|
30
|
+
|
31
|
+
def sax_config
|
32
|
+
raise
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "sax-machine/ns_stack"
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXHandler < Nokogiri::XML::SAX::Document
|
6
|
+
attr_reader :object
|
7
|
+
|
8
|
+
def initialize(object, nsstack=nil)
|
9
|
+
@object = object
|
10
|
+
@nsstack = nsstack || NSStack.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def characters(string)
|
14
|
+
if parsing_collection?
|
15
|
+
@collection_handler.characters(string)
|
16
|
+
elsif @element_config
|
17
|
+
@value << string
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def cdata_block(string)
|
22
|
+
characters(string)
|
23
|
+
end
|
24
|
+
|
25
|
+
def start_element(name, attrs = [])
|
26
|
+
|
27
|
+
@name = name
|
28
|
+
@attrs = attrs.map { |a| SAXHandler.decode_xml(a) }
|
29
|
+
@nsstack = NSStack.new(@nsstack, @attrs)
|
30
|
+
|
31
|
+
if parsing_collection?
|
32
|
+
@collection_handler.start_element(@name, @attrs)
|
33
|
+
|
34
|
+
elsif @collection_config = sax_config.collection_config(@name, @nsstack)
|
35
|
+
@collection_handler = @collection_config.handler(@nsstack)
|
36
|
+
if @object.class != @collection_handler.object.class
|
37
|
+
@collection_handler.start_element(@name, @attrs)
|
38
|
+
end
|
39
|
+
elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
|
40
|
+
parse_element_attributes(element_configs)
|
41
|
+
set_element_config_for_element_value
|
42
|
+
|
43
|
+
else
|
44
|
+
set_element_config_for_element_value
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def end_element(name)
|
49
|
+
if parsing_collection? && @collection_config.name == name.split(':').last
|
50
|
+
@collection_handler.end_element(name)
|
51
|
+
@object.send(@collection_config.accessor) << @collection_handler.object
|
52
|
+
reset_current_collection
|
53
|
+
|
54
|
+
elsif parsing_collection?
|
55
|
+
@collection_handler.end_element(name)
|
56
|
+
|
57
|
+
elsif characaters_captured?
|
58
|
+
@object.send(@element_config.setter, @value)
|
59
|
+
end
|
60
|
+
|
61
|
+
reset_current_tag
|
62
|
+
@nsstack = @nsstack.pop
|
63
|
+
end
|
64
|
+
|
65
|
+
def characaters_captured?
|
66
|
+
!@value.nil? && !@value.empty?
|
67
|
+
end
|
68
|
+
|
69
|
+
def parsing_collection?
|
70
|
+
!@collection_handler.nil?
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse_collection_instance_attributes
|
74
|
+
instance = @collection_handler.object
|
75
|
+
@attrs.each_with_index do |attr_name,index|
|
76
|
+
instance.send("#{attr_name}=", @attrs[index + 1]) if index % 2 == 0 && instance.methods.include?("#{attr_name}=")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_element_attributes(element_configs)
|
81
|
+
element_configs.each do |ec|
|
82
|
+
@object.send(ec.setter, ec.value_from_attrs(@attrs))
|
83
|
+
end
|
84
|
+
@element_config = nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def set_element_config_for_element_value
|
88
|
+
@value = ""
|
89
|
+
@element_config = sax_config.element_config_for_tag(@name, @attrs, @nsstack)
|
90
|
+
end
|
91
|
+
|
92
|
+
def reset_current_collection
|
93
|
+
@collection_handler = nil
|
94
|
+
@collection_config = nil
|
95
|
+
end
|
96
|
+
|
97
|
+
def reset_current_tag
|
98
|
+
@name = nil
|
99
|
+
@attrs = nil
|
100
|
+
@value = nil
|
101
|
+
@element_config = nil
|
102
|
+
end
|
103
|
+
|
104
|
+
def sax_config
|
105
|
+
@object.class.sax_config
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# Decodes XML special characters.
|
110
|
+
def self.decode_xml(str)
|
111
|
+
return str.map &method(:decode_xml) if str.kind_of?(Array)
|
112
|
+
|
113
|
+
# entities = {
|
114
|
+
# '#38' => '&',
|
115
|
+
# '#13' => "\r",
|
116
|
+
# }
|
117
|
+
# entities.keys.inject(str) { |string, key|
|
118
|
+
# string.gsub(/&#{key};/, entities[key])
|
119
|
+
# }
|
120
|
+
CGI.unescapeHTML(str)
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
data/lib/sax-machine.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require "cgi"
|
4
|
+
|
5
|
+
require "sax-machine/sax_document"
|
6
|
+
require "sax-machine/sax_handler"
|
7
|
+
require "sax-machine/sax_config"
|
8
|
+
require "sax-machine/sax_event_recorder"
|
9
|
+
|
10
|
+
module SAXMachine
|
11
|
+
VERSION = "0.0.21"
|
12
|
+
end
|
@@ -0,0 +1,657 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe "SAXMachine" do
|
4
|
+
describe "element" do
|
5
|
+
describe "when parsing a single element" do
|
6
|
+
before :each do
|
7
|
+
@klass = Class.new do
|
8
|
+
include SAXMachine
|
9
|
+
element :title
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should provide an accessor" do
|
14
|
+
document = @klass.new
|
15
|
+
document.title = "Title"
|
16
|
+
document.title.should == "Title"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should allow introspection of the elements" do
|
20
|
+
@klass.column_names.should =~ [:title]
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should not overwrite the setter if there is already one present" do
|
24
|
+
@klass = Class.new do
|
25
|
+
def title=(val)
|
26
|
+
@title = "#{val} **"
|
27
|
+
end
|
28
|
+
include SAXMachine
|
29
|
+
element :title
|
30
|
+
end
|
31
|
+
document = @klass.new
|
32
|
+
document.title = "Title"
|
33
|
+
document.title.should == "Title **"
|
34
|
+
end
|
35
|
+
describe "the class attribute" do
|
36
|
+
before(:each) do
|
37
|
+
@klass = Class.new do
|
38
|
+
include SAXMachine
|
39
|
+
element :date, :class => DateTime
|
40
|
+
end
|
41
|
+
@document = @klass.new
|
42
|
+
@document.date = DateTime.now.to_s
|
43
|
+
end
|
44
|
+
it "should be available" do
|
45
|
+
@klass.data_class(:date).should == DateTime
|
46
|
+
end
|
47
|
+
end
|
48
|
+
describe "the required attribute" do
|
49
|
+
it "should be available" do
|
50
|
+
@klass = Class.new do
|
51
|
+
include SAXMachine
|
52
|
+
element :date, :required => true
|
53
|
+
end
|
54
|
+
@klass.required?(:date).should be_true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should not overwrite the accessor when the element is not present" do
|
59
|
+
document = @klass.new
|
60
|
+
document.title = "Title"
|
61
|
+
document.parse("<foo></foo>")
|
62
|
+
document.title.should == "Title"
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should *not* overwrite the value when the element is present (new behaviour!)" do
|
66
|
+
document = @klass.new
|
67
|
+
document.title = "Old title"
|
68
|
+
document.parse("<title>New title</title>")
|
69
|
+
document.title.should == "Old title"
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should save the element text into an accessor" do
|
73
|
+
document = @klass.parse("<title>My Title</title>")
|
74
|
+
document.title.should == "My Title"
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should save cdata into an accessor" do
|
78
|
+
document = @klass.parse("<title><![CDATA[A Title]]></title>")
|
79
|
+
document.title.should == "A Title"
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should save the element text into an accessor when there are multiple elements" do
|
83
|
+
document = @klass.parse("<xml><title>My Title</title><foo>bar</foo></xml>")
|
84
|
+
document.title.should == "My Title"
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should save the first element text when there are multiple of the same element" do
|
88
|
+
document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
|
89
|
+
document.title.should == "My Title"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "when parsing multiple elements" do
|
94
|
+
before :each do
|
95
|
+
@klass = Class.new do
|
96
|
+
include SAXMachine
|
97
|
+
element :title
|
98
|
+
element :name
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should save the element text for a second tag" do
|
103
|
+
document = @klass.parse("<xml><title>My Title</title><name>Paul</name></xml>")
|
104
|
+
document.name.should == "Paul"
|
105
|
+
document.title.should == "My Title"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe "when using options for parsing elements" do
|
110
|
+
describe "using the 'as' option" do
|
111
|
+
before :each do
|
112
|
+
@klass = Class.new do
|
113
|
+
include SAXMachine
|
114
|
+
element :description, :as => :summary
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should provide an accessor using the 'as' name" do
|
119
|
+
document = @klass.new
|
120
|
+
document.summary = "a small summary"
|
121
|
+
document.summary.should == "a small summary"
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should save the element text into the 'as' accessor" do
|
125
|
+
document = @klass.parse("<description>here is a description</description>")
|
126
|
+
document.summary.should == "here is a description"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe "using the :with option" do
|
131
|
+
describe "and the :value option" do
|
132
|
+
before :each do
|
133
|
+
@klass = Class.new do
|
134
|
+
include SAXMachine
|
135
|
+
element :link, :value => :href, :with => {:foo => "bar"}
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should escape correctly the ampersand" do
|
140
|
+
document = @klass.parse("<link href='http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom' foo='bar'>asdf</link>")
|
141
|
+
document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom"
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should save the value of a matching element" do
|
145
|
+
document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
|
146
|
+
document.link.should == "test"
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should save the value of the first matching element" do
|
150
|
+
document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
|
151
|
+
document.link.should == "first"
|
152
|
+
end
|
153
|
+
|
154
|
+
describe "and the :as option" do
|
155
|
+
before :each do
|
156
|
+
@klass = Class.new do
|
157
|
+
include SAXMachine
|
158
|
+
element :link, :value => :href, :as => :url, :with => {:foo => "bar"}
|
159
|
+
element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should save the value of the first matching element" do
|
164
|
+
document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
|
165
|
+
document.url.should == "first"
|
166
|
+
document.second_url.should == "second"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
describe "with only one element" do
|
172
|
+
before :each do
|
173
|
+
@klass = Class.new do
|
174
|
+
include SAXMachine
|
175
|
+
element :link, :with => {:foo => "bar"}
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
it "should save the text of an element that has matching attributes" do
|
180
|
+
document = @klass.parse("<link foo=\"bar\">match</link>")
|
181
|
+
document.link.should == "match"
|
182
|
+
end
|
183
|
+
|
184
|
+
it "should not save the text of an element that doesn't have matching attributes" do
|
185
|
+
document = @klass.parse("<link>no match</link>")
|
186
|
+
document.link.should be_nil
|
187
|
+
end
|
188
|
+
|
189
|
+
it "should save the text of an element that has matching attributes when it is the second of that type" do
|
190
|
+
document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
|
191
|
+
document.link.should == "match"
|
192
|
+
end
|
193
|
+
|
194
|
+
it "should save the text of an element that has matching attributes plus a few more" do
|
195
|
+
document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
|
196
|
+
document.link.should == "match"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "with multiple elements of same tag" do
|
201
|
+
before :each do
|
202
|
+
@klass = Class.new do
|
203
|
+
include SAXMachine
|
204
|
+
element :link, :as => :first, :with => {:foo => "bar"}
|
205
|
+
element :link, :as => :second, :with => {:asdf => "jkl"}
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
it "should match the first element" do
|
210
|
+
document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
|
211
|
+
document.first.should == "first match"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "should match the second element" do
|
215
|
+
document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
|
216
|
+
document.second.should == "second match"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end # using the 'with' option
|
220
|
+
|
221
|
+
describe "using the 'value' option" do
|
222
|
+
before :each do
|
223
|
+
@klass = Class.new do
|
224
|
+
include SAXMachine
|
225
|
+
element :link, :value => :foo
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
it "should save the attribute value" do
|
230
|
+
document = @klass.parse("<link foo='test'>hello</link>")
|
231
|
+
document.link.should == 'test'
|
232
|
+
end
|
233
|
+
|
234
|
+
it "should save the attribute value when there is no text enclosed by the tag" do
|
235
|
+
document = @klass.parse("<link foo='test'></link>")
|
236
|
+
document.link.should == 'test'
|
237
|
+
end
|
238
|
+
|
239
|
+
it "should save the attribute value when the tag close is in the open" do
|
240
|
+
document = @klass.parse("<link foo='test'/>")
|
241
|
+
document.link.should == 'test'
|
242
|
+
end
|
243
|
+
|
244
|
+
it "should save two different attribute values on a single tag" do
|
245
|
+
@klass = Class.new do
|
246
|
+
include SAXMachine
|
247
|
+
element :link, :value => :foo, :as => :first
|
248
|
+
element :link, :value => :bar, :as => :second
|
249
|
+
end
|
250
|
+
document = @klass.parse("<link foo='foo value' bar='bar value'></link>")
|
251
|
+
document.first.should == "foo value"
|
252
|
+
document.second.should == "bar value"
|
253
|
+
end
|
254
|
+
|
255
|
+
it "should not fail if one of the attribute hasn't been defined" do
|
256
|
+
@klass = Class.new do
|
257
|
+
include SAXMachine
|
258
|
+
element :link, :value => :foo, :as => :first
|
259
|
+
element :link, :value => :bar, :as => :second
|
260
|
+
end
|
261
|
+
document = @klass.parse("<link foo='foo value'></link>")
|
262
|
+
document.first.should == "foo value"
|
263
|
+
document.second.should be_nil
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
describe "when desiring both the content and attributes of an element" do
|
268
|
+
before :each do
|
269
|
+
@klass = Class.new do
|
270
|
+
include SAXMachine
|
271
|
+
element :link
|
272
|
+
element :link, :value => :foo, :as => :link_foo
|
273
|
+
element :link, :value => :bar, :as => :link_bar
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
it "should parse the element and attribute values" do
|
278
|
+
document = @klass.parse("<link foo='test1' bar='test2'>hello</link>")
|
279
|
+
document.link.should == 'hello'
|
280
|
+
document.link_foo.should == 'test1'
|
281
|
+
document.link_bar.should == 'test2'
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
describe "when specifying namespaces" do
|
286
|
+
before :all do
|
287
|
+
@klass = Class.new do
|
288
|
+
include SAXMachine
|
289
|
+
element :a, :xmlns => 'urn:test'
|
290
|
+
element :b, :xmlns => ['', 'urn:test']
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
it "should get the element with the xmlns" do
|
295
|
+
document = @klass.parse("<a xmlns='urn:test'>hello</a>")
|
296
|
+
document.a.should == 'hello'
|
297
|
+
end
|
298
|
+
|
299
|
+
it "shouldn't get the element without the xmlns" do
|
300
|
+
document = @klass.parse("<a>hello</a>")
|
301
|
+
document.a.should be_nil
|
302
|
+
end
|
303
|
+
|
304
|
+
it "shouldn't get the element with the wrong xmlns" do
|
305
|
+
document = @klass.parse("<a xmlns='urn:test2'>hello</a>")
|
306
|
+
document.a.should be_nil
|
307
|
+
end
|
308
|
+
|
309
|
+
it "should get an element without xmlns if the empty namespace is desired" do
|
310
|
+
document = @klass.parse("<b>hello</b>")
|
311
|
+
document.b.should == 'hello'
|
312
|
+
end
|
313
|
+
|
314
|
+
it "should get an element with the right prefix" do
|
315
|
+
document = @klass.parse("<p:a xmlns:p='urn:test'>hello</p:a>")
|
316
|
+
document.a.should == 'hello'
|
317
|
+
end
|
318
|
+
|
319
|
+
it "should not get an element with the wrong prefix" do
|
320
|
+
document = @klass.parse("<x:a xmlns:p='urn:test' xmlns:x='urn:test2'>hello</x:a>")
|
321
|
+
document.a.should be_nil
|
322
|
+
end
|
323
|
+
|
324
|
+
it "should get a prefixed element without xmlns if the empty namespace is desired" do
|
325
|
+
pending "this needs a less pickier nokogiri push parser"
|
326
|
+
document = @klass.parse("<x:b>hello</x:b>")
|
327
|
+
document.b.should == 'hello'
|
328
|
+
end
|
329
|
+
|
330
|
+
it "should get the namespaced element even it's not first" do
|
331
|
+
document = @klass.parse("<root xmlns:a='urn:test'><a>foo</a><a>foo</a><a:a>bar</a:a></root>")
|
332
|
+
document.a.should == 'bar'
|
333
|
+
end
|
334
|
+
|
335
|
+
it "should parse multiple namespaces" do
|
336
|
+
klass = Class.new do
|
337
|
+
include SAXMachine
|
338
|
+
element :a, :xmlns => 'urn:test'
|
339
|
+
element :b, :xmlns => 'urn:test2'
|
340
|
+
end
|
341
|
+
document = klass.parse("<root xmlns='urn:test' xmlns:b='urn:test2'><b:b>bar</b:b><a>foo</a></root>")
|
342
|
+
document.a.should == 'foo'
|
343
|
+
document.b.should == 'bar'
|
344
|
+
end
|
345
|
+
|
346
|
+
context "when passing a default namespace" do
|
347
|
+
before :all do
|
348
|
+
@xmlns = 'urn:test'
|
349
|
+
class Inner
|
350
|
+
include SAXMachine
|
351
|
+
element :a, :xmlns => @xmlns
|
352
|
+
end
|
353
|
+
@outer = Class.new do
|
354
|
+
include SAXMachine
|
355
|
+
elements :root, :default_xmlns => @xmlns, :class => Inner
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
it "should replace the empty namespace with a default" do
|
360
|
+
document = @outer.parse("<root><a>Hello</a></root>")
|
361
|
+
document.root[0].a.should == 'Hello'
|
362
|
+
end
|
363
|
+
|
364
|
+
it "should not replace another namespace" do
|
365
|
+
document = @outer.parse("<root xmlns='urn:test2'><a>Hello</a></root>")
|
366
|
+
document.root[0].a.should == 'Hello'
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
describe "elements" do
|
375
|
+
describe "when parsing multiple elements" do
|
376
|
+
before :all do
|
377
|
+
@klass = Class.new do
|
378
|
+
include SAXMachine
|
379
|
+
elements :entry, :as => :entries
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
it "should provide a collection accessor" do
|
384
|
+
document = @klass.new
|
385
|
+
document.entries << :foo
|
386
|
+
document.entries.should == [:foo]
|
387
|
+
end
|
388
|
+
|
389
|
+
it "should parse a single element" do
|
390
|
+
document = @klass.parse("<entry>hello</entry>")
|
391
|
+
document.entries.should == ["hello"]
|
392
|
+
end
|
393
|
+
|
394
|
+
it "should parse multiple elements" do
|
395
|
+
document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
|
396
|
+
document.entries.should == ["hello", "world"]
|
397
|
+
end
|
398
|
+
|
399
|
+
it "should parse multiple elements when taking an attribute value" do
|
400
|
+
attribute_klass = Class.new do
|
401
|
+
include SAXMachine
|
402
|
+
elements :entry, :as => :entries, :value => :foo
|
403
|
+
end
|
404
|
+
doc = attribute_klass.parse("<xml><entry foo='asdf' /><entry foo='jkl' /></xml>")
|
405
|
+
doc.entries.should == ["asdf", "jkl"]
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
describe "when using the class option" do
|
410
|
+
before :each do
|
411
|
+
class Foo
|
412
|
+
include SAXMachine
|
413
|
+
element :title
|
414
|
+
end
|
415
|
+
@klass = Class.new do
|
416
|
+
include SAXMachine
|
417
|
+
elements :entry, :as => :entries, :class => Foo
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
it "should parse a single element with children" do
|
422
|
+
document = @klass.parse("<entry><title>a title</title></entry>")
|
423
|
+
document.entries.size.should == 1
|
424
|
+
document.entries.first.title.should == "a title"
|
425
|
+
end
|
426
|
+
|
427
|
+
it "should parse multiple elements with children" do
|
428
|
+
document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
|
429
|
+
document.entries.size.should == 2
|
430
|
+
document.entries.first.title.should == "title 1"
|
431
|
+
document.entries.last.title.should == "title 2"
|
432
|
+
end
|
433
|
+
|
434
|
+
it "should not parse a top level element that is specified only in a child" do
|
435
|
+
document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
|
436
|
+
document.entries.size.should == 1
|
437
|
+
document.entries.first.title.should == "correct title"
|
438
|
+
end
|
439
|
+
|
440
|
+
it "should parse out an attribute value from the tag that starts the collection" do
|
441
|
+
class Foo
|
442
|
+
element :entry, :value => :href, :as => :url
|
443
|
+
end
|
444
|
+
document = @klass.parse("<xml><entry href='http://pauldix.net'><title>paul</title></entry></xml>")
|
445
|
+
document.entries.size.should == 1
|
446
|
+
document.entries.first.title.should == "paul"
|
447
|
+
document.entries.first.url.should == "http://pauldix.net"
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
describe "when desiring sax events" do
|
452
|
+
XHTML_XMLNS = "http://www.w3.org/1999/xhtml"
|
453
|
+
|
454
|
+
before :all do
|
455
|
+
@klass = Class.new do
|
456
|
+
include SAXMachine
|
457
|
+
elements :body, :events => true
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
it "should parse a simple child" do
|
462
|
+
document = @klass.parse("<body><p/></body>")
|
463
|
+
document.body[0].should == [[:start_element, "", "p", []],
|
464
|
+
[:end_element, "", "p"]]
|
465
|
+
end
|
466
|
+
it "should parse a simple child with text" do
|
467
|
+
document = @klass.parse("<body><p>Hello</p></body>")
|
468
|
+
document.body[0].should == [[:start_element, "", "p", []],
|
469
|
+
[:chars, "Hello"],
|
470
|
+
[:end_element, "", "p"]]
|
471
|
+
end
|
472
|
+
it "should parse nested children" do
|
473
|
+
document = @klass.parse("<body><p><span/></p></body>")
|
474
|
+
document.body[0].should == [[:start_element, "", "p", []],
|
475
|
+
[:start_element, "", "span", []],
|
476
|
+
[:end_element, "", "span"],
|
477
|
+
[:end_element, "", "p"]]
|
478
|
+
end
|
479
|
+
it "should parse multiple children" do
|
480
|
+
document = @klass.parse("<body><p>Hello</p><p>World</p></body>")
|
481
|
+
document.body[0].should == [[:start_element, "", "p", []],
|
482
|
+
[:chars, "Hello"],
|
483
|
+
[:end_element, "", "p"],
|
484
|
+
[:start_element, "", "p", []],
|
485
|
+
[:chars, "World"],
|
486
|
+
[:end_element, "", "p"]]
|
487
|
+
end
|
488
|
+
it "should pass namespaces" do
|
489
|
+
document = @klass.parse("<body xmlns='#{XHTML_XMLNS}'><p/></body>")
|
490
|
+
document.body[0].should == [[:start_element, XHTML_XMLNS, "p", []],
|
491
|
+
[:end_element, XHTML_XMLNS, "p"]]
|
492
|
+
end
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
describe "full example" do
|
497
|
+
XMLNS_ATOM = "http://www.w3.org/2005/Atom"
|
498
|
+
XMLNS_FEEDBURNER = "http://rssnamespace.org/feedburner/ext/1.0"
|
499
|
+
|
500
|
+
before :each do
|
501
|
+
@xml = File.read('spec/sax-machine/atom.xml')
|
502
|
+
class AtomEntry
|
503
|
+
include SAXMachine
|
504
|
+
element :title
|
505
|
+
element :name, :as => :author
|
506
|
+
element :origLink, :as => :orig_link, :xmlns => XMLNS_FEEDBURNER
|
507
|
+
element :summary
|
508
|
+
element :content
|
509
|
+
element :published
|
510
|
+
end
|
511
|
+
|
512
|
+
class Atom
|
513
|
+
include SAXMachine
|
514
|
+
element :title
|
515
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
516
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
517
|
+
elements :entry, :as => :entries, :class => AtomEntry, :xmlns => XMLNS_ATOM
|
518
|
+
end
|
519
|
+
end # before
|
520
|
+
|
521
|
+
it "should parse the url" do
|
522
|
+
f = Atom.parse(@xml)
|
523
|
+
f.url.should == "http://www.pauldix.net/"
|
524
|
+
end
|
525
|
+
|
526
|
+
it "should parse all entries" do
|
527
|
+
f = Atom.parse(@xml)
|
528
|
+
f.entries.length.should == 5
|
529
|
+
end
|
530
|
+
|
531
|
+
it "should parse the feedburner:origLink" do
|
532
|
+
f = Atom.parse(@xml)
|
533
|
+
f.entries[0].orig_link.should == 'http://www.pauldix.net/2008/09/marshal-data-to.html'
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
describe "another full example" do
|
538
|
+
|
539
|
+
RSS_XMLNS = 'http://purl.org/rss/1.0/'
|
540
|
+
ATOM_XMLNS = 'http://www.w3.org/2005/Atom'
|
541
|
+
class Entry
|
542
|
+
include SAXMachine
|
543
|
+
element :title, :xmlns => RSS_XMLNS
|
544
|
+
element :title, :xmlns => ATOM_XMLNS
|
545
|
+
element :link, :xmlns => RSS_XMLNS
|
546
|
+
element :link, :xmlns => ATOM_XMLNS, :value => 'href'
|
547
|
+
end
|
548
|
+
class Channel
|
549
|
+
include SAXMachine
|
550
|
+
element :title, :xmlns => RSS_XMLNS
|
551
|
+
element :title, :xmlns => ATOM_XMLNS
|
552
|
+
element :link, :xmlns => RSS_XMLNS
|
553
|
+
element :link, :xmlns => ATOM_XMLNS, :value => 'href'
|
554
|
+
elements :entry, :as => :entries, :class => Entry
|
555
|
+
elements :item, :as => :entries, :class => Entry
|
556
|
+
end
|
557
|
+
class Root
|
558
|
+
include SAXMachine
|
559
|
+
elements :rss, :as => :channels, :default_xmlns => RSS_XMLNS, :class => Channel
|
560
|
+
elements :feed, :as => :channels, :default_xmlns => ATOM_XMLNS, :class => Channel
|
561
|
+
end
|
562
|
+
|
563
|
+
context "when parsing a complex example" do
|
564
|
+
before :all do
|
565
|
+
@document = Root.parse(<<-eoxml).channels[0]
|
566
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
567
|
+
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"
|
568
|
+
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
569
|
+
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
570
|
+
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
571
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
572
|
+
xmlns:cc="http://web.resource.org/cc/">
|
573
|
+
<channel>
|
574
|
+
<title>Delicious/tag/pubsubhubbub</title>
|
575
|
+
<atom:link rel="self" type="application/rss+xml" href="http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15"/>
|
576
|
+
<link>http://delicious.com/tag/pubsubhubbub</link>
|
577
|
+
<description>recent bookmarks tagged pubsubhubbub</description>
|
578
|
+
</channel>
|
579
|
+
</rss>
|
580
|
+
eoxml
|
581
|
+
end
|
582
|
+
it "should parse the title" do
|
583
|
+
@document.title.should == 'Delicious/tag/pubsubhubbub'
|
584
|
+
end
|
585
|
+
it "should parse the link" do
|
586
|
+
@document.link.should == 'http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15'
|
587
|
+
end
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
describe "yet another full example" do
|
592
|
+
|
593
|
+
context "when parsing a Twitter example" do
|
594
|
+
before :all do
|
595
|
+
|
596
|
+
RSS_XMLNS = ['http://purl.org/rss/1.0/', '']
|
597
|
+
|
598
|
+
ATOM_XMLNS = 'http://www.w3.org/2005/Atom' unless defined? ATOM_XMLNS
|
599
|
+
class Link
|
600
|
+
include SAXMachine
|
601
|
+
end
|
602
|
+
|
603
|
+
class Entry
|
604
|
+
include SAXMachine
|
605
|
+
element :title, :xmlns => RSS_XMLNS
|
606
|
+
element :link, :xmlns => RSS_XMLNS, :as => :entry_link
|
607
|
+
element :title, :xmlns => ATOM_XMLNS, :as => :title
|
608
|
+
elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
|
609
|
+
end
|
610
|
+
|
611
|
+
class Feed
|
612
|
+
include SAXMachine
|
613
|
+
element :title, :xmlns => RSS_XMLNS, :as => :title
|
614
|
+
element :link, :xmlns => RSS_XMLNS, :as => :feed_link
|
615
|
+
elements :item, :xmlns => RSS_XMLNS, :as => :entries, :class => Entry
|
616
|
+
element :title, :xmlns => ATOM_XMLNS, :as => :title
|
617
|
+
elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
|
618
|
+
end
|
619
|
+
|
620
|
+
@document = Feed.parse(<<-eoxml)
|
621
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
622
|
+
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
623
|
+
<channel>
|
624
|
+
<atom:link type="application/rss+xml" rel="self" href="http://twitter.com/statuses/user_timeline/5381582.rss"/>
|
625
|
+
<title>Twitter / julien51</title>
|
626
|
+
<link>http://twitter.com/julien51</link>
|
627
|
+
<description>Twitter updates from julien / julien51.</description>
|
628
|
+
<language>en-us</language>
|
629
|
+
<ttl>40</ttl>
|
630
|
+
<item>
|
631
|
+
<title>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</title>
|
632
|
+
<description>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</description>
|
633
|
+
<pubDate>Thu, 30 Jul 2009 01:00:30 +0000</pubDate>
|
634
|
+
<guid>http://twitter.com/julien51/statuses/2920716033</guid>
|
635
|
+
<link>http://twitter.com/julien51/statuses/2920716033</link>
|
636
|
+
</item>
|
637
|
+
<item>
|
638
|
+
<title>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</title>
|
639
|
+
<description>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</description>
|
640
|
+
<pubDate>Wed, 29 Jul 2009 23:07:32 +0000</pubDate>
|
641
|
+
<guid>http://twitter.com/julien51/statuses/2918869948</guid>
|
642
|
+
<link>http://twitter.com/julien51/statuses/2918869948</link>
|
643
|
+
</item>
|
644
|
+
</channel>
|
645
|
+
</rss>
|
646
|
+
eoxml
|
647
|
+
end
|
648
|
+
it "should parse the title" do
|
649
|
+
@document.title.should == 'Twitter / julien51'
|
650
|
+
end
|
651
|
+
|
652
|
+
it "should find an entry" do
|
653
|
+
@document.entries.length.should == 2
|
654
|
+
end
|
655
|
+
end
|
656
|
+
end
|
657
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "spec"
|
3
|
+
|
4
|
+
# gem install redgreen for colored test output
|
5
|
+
begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
|
6
|
+
|
7
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
|
8
|
+
$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
|
9
|
+
|
10
|
+
require "lib/sax-machine"
|
11
|
+
|
12
|
+
# Spec::Runner.configure do |config|
|
13
|
+
# end
|
metadata
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: superfeedr-sax-machine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.22.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Dix
|
8
|
+
- Stephan Maka
|
9
|
+
- julien51
|
10
|
+
- superfeedr
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
|
15
|
+
date: 2009-11-16 00:00:00 +01:00
|
16
|
+
default_executable:
|
17
|
+
dependencies:
|
18
|
+
- !ruby/object:Gem::Dependency
|
19
|
+
name: nokogiri
|
20
|
+
type: :runtime
|
21
|
+
version_requirement:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.0
|
27
|
+
version:
|
28
|
+
description:
|
29
|
+
email: paul@pauldix.net stephan@spaceboyz.net
|
30
|
+
executables: []
|
31
|
+
|
32
|
+
extensions: []
|
33
|
+
|
34
|
+
extra_rdoc_files: []
|
35
|
+
|
36
|
+
files:
|
37
|
+
- lib/sax-machine.rb
|
38
|
+
- lib/sax-machine/sax_config.rb
|
39
|
+
- lib/sax-machine/sax_collection_config.rb
|
40
|
+
- lib/sax-machine/sax_element_config.rb
|
41
|
+
- lib/sax-machine/sax_document.rb
|
42
|
+
- lib/sax-machine/sax_handler.rb
|
43
|
+
- lib/sax-machine/ns_stack.rb
|
44
|
+
- lib/sax-machine/sax_event_recorder.rb
|
45
|
+
- README.textile
|
46
|
+
- Rakefile
|
47
|
+
- spec/spec.opts
|
48
|
+
- spec/spec_helper.rb
|
49
|
+
- spec/sax-machine/sax_document_spec.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/pauldix/sax-machine
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "0"
|
64
|
+
version:
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 1.3.5
|
75
|
+
signing_key:
|
76
|
+
specification_version: 2
|
77
|
+
summary: Declarative SAX Parsing with Nokogiri
|
78
|
+
test_files: []
|
79
|
+
|