superfeedr-sax-machine 0.0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile ADDED
@@ -0,0 +1,87 @@
1
+ h1. SAX Machine
2
+
3
+ "http://github.com/pauldix/sax-machine/wikis":http://github.com/pauldix/sax-machine/wikis
4
+
5
+ "http://github.com/pauldix/sax-machine/tree/master":http://github.com/pauldix/sax-machine/tree/master
6
+
7
+ h2. Description
8
+
9
+ A declarative SAX parsing library backed by Nokogiri
10
+
11
+ h2. Usage
12
+
13
+ <pre>
14
+ require 'sax-machine'
15
+
16
+ # Class for parsing an atom entry out of a feedburner atom feed
17
+ class AtomEntry
18
+ include SAXMachine
19
+ element :title
20
+ # the :as argument makes this available through atom_entry.author instead of .name
21
+ element :name, :as => :author
22
+ element "feedburner:origLink", :as => :url
23
+ element :summary
24
+ element :content
25
+ element :published
26
+ end
27
+
28
+ # Class for parsing Atom feeds
29
+ class Atom
30
+ include SAXMachine
31
+ element :title
32
+ # the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
33
+ # the :value argument means that instead of setting the value to the text between the tag,
34
+ # it sets it to the attribute value of :href
35
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
36
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
37
+ elements :entry, :as => :entries, :class => AtomEntry
38
+ end
39
+
40
+ # you can then parse like this
41
+ feed = Atom.parse(xml_text)
42
+ # then you're ready to rock
43
+ feed.title # => whatever the title of the blog is
44
+ feed.url # => the main url of the blog
45
+ feed.feed_url # => goes to the feedburner feed
46
+
47
+ feed.entries.first.title # => title of the first entry
48
+ feed.entries.first.author # => the author of the first entry
49
+ feed.entries.first.url # => the permalink on the blog for this entry
50
+ # etc ...
51
+
52
+ # you can also use the elements method without specifying a class like so
53
+ class SomeServiceResponse
54
+ elements :message, :as => :messages
55
+ end
56
+
57
+ response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
58
+ response.messages.first # => "hi"
59
+ response.messages.last # => "world"
60
+ </pre>
61
+
62
+ h2. LICENSE
63
+
64
+ (The MIT License)
65
+
66
+ Copyright (c) 2009:
67
+
68
+ "Paul Dix":http://pauldix.net
69
+
70
+ Permission is hereby granted, free of charge, to any person obtaining
71
+ a copy of this software and associated documentation files (the
72
+ 'Software'), to deal in the Software without restriction, including
73
+ without limitation the rights to use, copy, modify, merge, publish,
74
+ distribute, sublicense, and/or sell copies of the Software, and to
75
+ permit persons to whom the Software is furnished to do so, subject to
76
+ the following conditions:
77
+
78
+ The above copyright notice and this permission notice shall be
79
+ included in all copies or substantial portions of the Software.
80
+
81
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
82
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
83
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
84
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
85
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
86
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
87
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "spec"
2
+ require "spec/rake/spectask"
3
+ require 'lib/sax-machine.rb'
4
+
5
+ Spec::Rake::SpecTask.new do |t|
6
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
7
+ t.spec_files = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :install do
11
+ rm_rf "*.gem"
12
+ puts `gem build sax-machine.gemspec`
13
+ puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
14
+ end
@@ -0,0 +1,38 @@
1
+ module SAXMachine
2
+ class NSStack < Hash
3
+ def initialize(parent=nil, attrs=[])
4
+ # Initialize
5
+ super()
6
+ @parent = parent
7
+
8
+ # Parse attributes
9
+ attrs.each do |attr|
10
+ if attr.kind_of?(Array)
11
+ k, v = attr
12
+ case k
13
+ when 'xmlns' then self[''] = v
14
+ when /^xmlns:(.+)/ then self[$1] = v
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ # Lookup
21
+ def [](name)
22
+ if (ns = super(name.to_s))
23
+ # I've got it
24
+ ns
25
+ elsif @parent
26
+ # Parent may have it
27
+ @parent[name]
28
+ else
29
+ # Undefined, empty namespace
30
+ ''
31
+ end
32
+ end
33
+
34
+ def pop
35
+ @parent
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class CollectionConfig
5
+ attr_reader :name
6
+ attr_reader :default_xmlns
7
+
8
+ def initialize(name, options)
9
+ @name = name.to_s
10
+ @class = options[:class]
11
+ @as = options[:as].to_s
12
+ @xmlns = case options[:xmlns]
13
+ when Array then options[:xmlns]
14
+ when String then [options[:xmlns]]
15
+ else nil
16
+ end
17
+ @default_xmlns = options[:default_xmlns]
18
+ if @default_xmlns && @xmlns && !@xmlns.include?('')
19
+ @xmlns << ''
20
+ end
21
+ @record_events = options[:events]
22
+ end
23
+
24
+ def handler(nsstack)
25
+ if @default_xmlns && (nsstack.nil? || nsstack[''] == '')
26
+ nsstack = NSStack.new(nsstack, nsstack)
27
+ nsstack[''] = @default_xmlns
28
+ end
29
+ unless @record_events
30
+ SAXHandler.new(@class.new, nsstack)
31
+ else
32
+ SAXEventRecorder.new(nsstack)
33
+ end
34
+ end
35
+
36
+ def accessor
37
+ as
38
+ end
39
+
40
+ def xmlns_match?(ns)
41
+ @xmlns.nil? || @xmlns.include?(ns)
42
+ end
43
+
44
+ protected
45
+
46
+ def as
47
+ @as
48
+ end
49
+
50
+ def class
51
+ @class || @name
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ require "sax-machine/sax_element_config"
2
+ require "sax-machine/sax_collection_config"
3
+
4
+ module SAXMachine
5
+ class SAXConfig
6
+ attr_reader :top_level_elements, :collection_elements
7
+
8
+ def initialize
9
+ @top_level_elements = {}
10
+ @collection_elements = {}
11
+ end
12
+
13
+ def add_top_level_element(name, options)
14
+ @top_level_elements[name.to_s] ||= []
15
+ @top_level_elements[name.to_s] << ElementConfig.new(name, options)
16
+ end
17
+
18
+ def add_collection_element(name, options)
19
+ @collection_elements[name.to_s] ||= []
20
+ @collection_elements[name.to_s] << CollectionConfig.new(name, options)
21
+ end
22
+
23
+ def collection_config(name, nsstack)
24
+ prefix, name = name.split(':', 2)
25
+ prefix, name = nil, prefix unless name # No prefix
26
+ namespace = nsstack[prefix]
27
+
28
+ (@collection_elements[name.to_s] || []).detect { |ce|
29
+ ce.name.to_s == name.to_s &&
30
+ ce.xmlns_match?(namespace)
31
+ }
32
+ end
33
+
34
+ def element_configs_for_attribute(name, attrs)
35
+ name = name.split(':', 2).last
36
+ (@top_level_elements[name.to_s] || []).select do |element_config|
37
+ element_config.has_value_and_attrs_match?(attrs)
38
+ end
39
+ end
40
+
41
+ def element_config_for_tag(name, attrs, nsstack)
42
+ prefix, name = name.split(':', 2)
43
+ prefix, name = nil, prefix unless name # No prefix
44
+ namespace = nsstack[prefix]
45
+
46
+ (@top_level_elements[name.to_s] || []).detect do |element_config|
47
+ element_config.xmlns_match?(namespace) &&
48
+ element_config.attrs_match?(attrs)
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,107 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ def parse(xml_text)
10
+ unless @parser
11
+ sax_handler = SAXHandler.new(self)
12
+ @parser = Nokogiri::XML::SAX::PushParser.new(sax_handler)
13
+ @parser.options |= Nokogiri::XML::ParseOptions::RECOVER if @parser.respond_to?(:options)
14
+ end
15
+ @parser << xml_text
16
+ self
17
+ end
18
+
19
+ def parse_finish
20
+ if @parser
21
+ @parser.finish
22
+ end
23
+ self
24
+ end
25
+
26
+ module ClassMethods
27
+
28
+ def parse(xml_text)
29
+ # It might be cleaner to aditionally call parse_finish here, but
30
+ # then Nokogiri/libxml2 barfs on incomplete documents. Desired
31
+ # behaviour?
32
+ new.parse(xml_text)
33
+ end
34
+
35
+ def element(name, options = {})
36
+ options[:as] ||= name
37
+ sax_config.add_top_level_element(name, options)
38
+
39
+ # we only want to insert the getter and setter if they haven't defined it from elsewhere.
40
+ # this is how we allow custom parsing behavior. So you could define the setter
41
+ # and have it parse the string into a date or whatever.
42
+ attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
43
+ attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
44
+ end
45
+
46
+ def columns
47
+ r = []
48
+ sax_config.top_level_elements.each do |name, ecs|
49
+ r += ecs
50
+ end
51
+ r
52
+ end
53
+
54
+ def column(sym)
55
+ (sax_config.top_level_elements[sym.to_s] || []).first
56
+ end
57
+
58
+ def data_class(sym)
59
+ column(sym).data_class
60
+ end
61
+
62
+ def required?(sym)
63
+ column(sym).required?
64
+ end
65
+
66
+ def column_names
67
+ columns.map{|e| e.column}
68
+ end
69
+
70
+ def elements(name, options = {})
71
+ options[:as] ||= name
72
+ if options[:class] || options[:events]
73
+ sax_config.add_collection_element(name, options)
74
+ else
75
+ class_eval <<-SRC
76
+ def add_#{options[:as]}(value)
77
+ #{options[:as]} << value
78
+ end
79
+ SRC
80
+ sax_config.add_top_level_element(name, options.merge(:collection => true))
81
+ end
82
+
83
+ if !instance_methods.include?(options[:as].to_s)
84
+ class_eval <<-SRC
85
+ def #{options[:as]}
86
+ @#{options[:as]} ||= []
87
+ end
88
+ SRC
89
+ end
90
+
91
+ attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
92
+ end
93
+
94
+ def sax_config
95
+ @sax_config ||= SAXConfig.new
96
+ end
97
+
98
+ def attr_writer_once(attr)
99
+ class_eval <<-SRC
100
+ def #{attr}=(val)
101
+ @#{attr} ||= val
102
+ end
103
+ SRC
104
+ end
105
+ end
106
+
107
+ end
@@ -0,0 +1,75 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementConfig
5
+ attr_reader :name, :setter, :data_class
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+
10
+ if options.has_key?(:with)
11
+ # for faster comparisons later
12
+ @with = options[:with].to_a.flatten.collect {|o| o.to_s}
13
+ else
14
+ @with = nil
15
+ end
16
+
17
+ if options.has_key?(:value)
18
+ @value = options[:value].to_s
19
+ else
20
+ @value = nil
21
+ end
22
+
23
+ @as = options[:as]
24
+ @collection = options[:collection]
25
+
26
+ if @collection
27
+ @setter = "add_#{options[:as]}"
28
+ else
29
+ @setter = "#{@as}="
30
+ end
31
+ @data_class = options[:class]
32
+ @required = options[:required]
33
+
34
+ @xmlns = case options[:xmlns]
35
+ when Array then options[:xmlns]
36
+ when String then [options[:xmlns]]
37
+ else nil
38
+ end
39
+ end
40
+
41
+ def column
42
+ @as || @name.to_sym
43
+ end
44
+
45
+ def required?
46
+ @required
47
+ end
48
+
49
+ def value_from_attrs(attrs)
50
+ attrs.index(@value) ? attrs[attrs.index(@value) + 1] : nil
51
+ end
52
+
53
+ def attrs_match?(attrs)
54
+ if @with
55
+ @with == (@with & attrs)
56
+ else
57
+ true
58
+ end
59
+ end
60
+
61
+ def has_value_and_attrs_match?(attrs)
62
+ !@value.nil? && attrs_match?(attrs)
63
+ end
64
+
65
+ def xmlns_match?(ns)
66
+ @xmlns.nil? || @xmlns.include?(ns)
67
+ end
68
+
69
+ def collection?
70
+ @collection
71
+ end
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,35 @@
1
+ module SAXMachine
2
+ class SAXEventRecorder < SAXHandler
3
+ def initialize(nsstack)
4
+ super(nil, nsstack)
5
+ @events = []
6
+ end
7
+
8
+ def object
9
+ # First and last belong to the parent element
10
+ @events[1..-2]
11
+ end
12
+
13
+ def start_element(name, attrs = [])
14
+ @nsstack = NSStack.new(@nsstack, attrs)
15
+ prefix, name = name.split(':', 2)
16
+ prefix, name = nil, prefix unless name
17
+ @events << [:start_element, @nsstack[prefix], name, attrs]
18
+ end
19
+
20
+ def end_element(name)
21
+ prefix, name = name.split(':', 2)
22
+ prefix, name = nil, prefix unless name
23
+ @events << [:end_element, @nsstack[prefix], name]
24
+ @nsstack = @nsstack.pop
25
+ end
26
+
27
+ def characters(string)
28
+ @events << [:chars, string]
29
+ end
30
+
31
+ def sax_config
32
+ raise
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,124 @@
1
+ require "nokogiri"
2
+ require "sax-machine/ns_stack"
3
+
4
+ module SAXMachine
5
+ class SAXHandler < Nokogiri::XML::SAX::Document
6
+ attr_reader :object
7
+
8
+ def initialize(object, nsstack=nil)
9
+ @object = object
10
+ @nsstack = nsstack || NSStack.new
11
+ end
12
+
13
+ def characters(string)
14
+ if parsing_collection?
15
+ @collection_handler.characters(string)
16
+ elsif @element_config
17
+ @value << string
18
+ end
19
+ end
20
+
21
+ def cdata_block(string)
22
+ characters(string)
23
+ end
24
+
25
+ def start_element(name, attrs = [])
26
+
27
+ @name = name
28
+ @attrs = attrs.map { |a| SAXHandler.decode_xml(a) }
29
+ @nsstack = NSStack.new(@nsstack, @attrs)
30
+
31
+ if parsing_collection?
32
+ @collection_handler.start_element(@name, @attrs)
33
+
34
+ elsif @collection_config = sax_config.collection_config(@name, @nsstack)
35
+ @collection_handler = @collection_config.handler(@nsstack)
36
+ if @object.class != @collection_handler.object.class
37
+ @collection_handler.start_element(@name, @attrs)
38
+ end
39
+ elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
40
+ parse_element_attributes(element_configs)
41
+ set_element_config_for_element_value
42
+
43
+ else
44
+ set_element_config_for_element_value
45
+ end
46
+ end
47
+
48
+ def end_element(name)
49
+ if parsing_collection? && @collection_config.name == name.split(':').last
50
+ @collection_handler.end_element(name)
51
+ @object.send(@collection_config.accessor) << @collection_handler.object
52
+ reset_current_collection
53
+
54
+ elsif parsing_collection?
55
+ @collection_handler.end_element(name)
56
+
57
+ elsif characaters_captured?
58
+ @object.send(@element_config.setter, @value)
59
+ end
60
+
61
+ reset_current_tag
62
+ @nsstack = @nsstack.pop
63
+ end
64
+
65
+ def characaters_captured?
66
+ !@value.nil? && !@value.empty?
67
+ end
68
+
69
+ def parsing_collection?
70
+ !@collection_handler.nil?
71
+ end
72
+
73
+ def parse_collection_instance_attributes
74
+ instance = @collection_handler.object
75
+ @attrs.each_with_index do |attr_name,index|
76
+ instance.send("#{attr_name}=", @attrs[index + 1]) if index % 2 == 0 && instance.methods.include?("#{attr_name}=")
77
+ end
78
+ end
79
+
80
+ def parse_element_attributes(element_configs)
81
+ element_configs.each do |ec|
82
+ @object.send(ec.setter, ec.value_from_attrs(@attrs))
83
+ end
84
+ @element_config = nil
85
+ end
86
+
87
+ def set_element_config_for_element_value
88
+ @value = ""
89
+ @element_config = sax_config.element_config_for_tag(@name, @attrs, @nsstack)
90
+ end
91
+
92
+ def reset_current_collection
93
+ @collection_handler = nil
94
+ @collection_config = nil
95
+ end
96
+
97
+ def reset_current_tag
98
+ @name = nil
99
+ @attrs = nil
100
+ @value = nil
101
+ @element_config = nil
102
+ end
103
+
104
+ def sax_config
105
+ @object.class.sax_config
106
+ end
107
+
108
+ ##
109
+ # Decodes XML special characters.
110
+ def self.decode_xml(str)
111
+ return str.map &method(:decode_xml) if str.kind_of?(Array)
112
+
113
+ # entities = {
114
+ # '#38' => '&amp;',
115
+ # '#13' => "\r",
116
+ # }
117
+ # entities.keys.inject(str) { |string, key|
118
+ # string.gsub(/&#{key};/, entities[key])
119
+ # }
120
+ CGI.unescapeHTML(str)
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require "cgi"
4
+
5
+ require "sax-machine/sax_document"
6
+ require "sax-machine/sax_handler"
7
+ require "sax-machine/sax_config"
8
+ require "sax-machine/sax_event_recorder"
9
+
10
+ module SAXMachine
11
+ VERSION = "0.0.21"
12
+ end
@@ -0,0 +1,657 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "SAXMachine" do
4
+ describe "element" do
5
+ describe "when parsing a single element" do
6
+ before :each do
7
+ @klass = Class.new do
8
+ include SAXMachine
9
+ element :title
10
+ end
11
+ end
12
+
13
+ it "should provide an accessor" do
14
+ document = @klass.new
15
+ document.title = "Title"
16
+ document.title.should == "Title"
17
+ end
18
+
19
+ it "should allow introspection of the elements" do
20
+ @klass.column_names.should =~ [:title]
21
+ end
22
+
23
+ it "should not overwrite the setter if there is already one present" do
24
+ @klass = Class.new do
25
+ def title=(val)
26
+ @title = "#{val} **"
27
+ end
28
+ include SAXMachine
29
+ element :title
30
+ end
31
+ document = @klass.new
32
+ document.title = "Title"
33
+ document.title.should == "Title **"
34
+ end
35
+ describe "the class attribute" do
36
+ before(:each) do
37
+ @klass = Class.new do
38
+ include SAXMachine
39
+ element :date, :class => DateTime
40
+ end
41
+ @document = @klass.new
42
+ @document.date = DateTime.now.to_s
43
+ end
44
+ it "should be available" do
45
+ @klass.data_class(:date).should == DateTime
46
+ end
47
+ end
48
+ describe "the required attribute" do
49
+ it "should be available" do
50
+ @klass = Class.new do
51
+ include SAXMachine
52
+ element :date, :required => true
53
+ end
54
+ @klass.required?(:date).should be_true
55
+ end
56
+ end
57
+
58
+ it "should not overwrite the accessor when the element is not present" do
59
+ document = @klass.new
60
+ document.title = "Title"
61
+ document.parse("<foo></foo>")
62
+ document.title.should == "Title"
63
+ end
64
+
65
+ it "should *not* overwrite the value when the element is present (new behaviour!)" do
66
+ document = @klass.new
67
+ document.title = "Old title"
68
+ document.parse("<title>New title</title>")
69
+ document.title.should == "Old title"
70
+ end
71
+
72
+ it "should save the element text into an accessor" do
73
+ document = @klass.parse("<title>My Title</title>")
74
+ document.title.should == "My Title"
75
+ end
76
+
77
+ it "should save cdata into an accessor" do
78
+ document = @klass.parse("<title><![CDATA[A Title]]></title>")
79
+ document.title.should == "A Title"
80
+ end
81
+
82
+ it "should save the element text into an accessor when there are multiple elements" do
83
+ document = @klass.parse("<xml><title>My Title</title><foo>bar</foo></xml>")
84
+ document.title.should == "My Title"
85
+ end
86
+
87
+ it "should save the first element text when there are multiple of the same element" do
88
+ document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
89
+ document.title.should == "My Title"
90
+ end
91
+ end
92
+
93
+ describe "when parsing multiple elements" do
94
+ before :each do
95
+ @klass = Class.new do
96
+ include SAXMachine
97
+ element :title
98
+ element :name
99
+ end
100
+ end
101
+
102
+ it "should save the element text for a second tag" do
103
+ document = @klass.parse("<xml><title>My Title</title><name>Paul</name></xml>")
104
+ document.name.should == "Paul"
105
+ document.title.should == "My Title"
106
+ end
107
+ end
108
+
109
+ describe "when using options for parsing elements" do
110
+ describe "using the 'as' option" do
111
+ before :each do
112
+ @klass = Class.new do
113
+ include SAXMachine
114
+ element :description, :as => :summary
115
+ end
116
+ end
117
+
118
+ it "should provide an accessor using the 'as' name" do
119
+ document = @klass.new
120
+ document.summary = "a small summary"
121
+ document.summary.should == "a small summary"
122
+ end
123
+
124
+ it "should save the element text into the 'as' accessor" do
125
+ document = @klass.parse("<description>here is a description</description>")
126
+ document.summary.should == "here is a description"
127
+ end
128
+ end
129
+
130
+ describe "using the :with option" do
131
+ describe "and the :value option" do
132
+ before :each do
133
+ @klass = Class.new do
134
+ include SAXMachine
135
+ element :link, :value => :href, :with => {:foo => "bar"}
136
+ end
137
+ end
138
+
139
+ it "should escape correctly the ampersand" do
140
+ document = @klass.parse("<link href='http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&amp;lang=en-us&amp;format=atom' foo='bar'>asdf</link>")
141
+ document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom"
142
+ end
143
+
144
+ it "should save the value of a matching element" do
145
+ document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
146
+ document.link.should == "test"
147
+ end
148
+
149
+ it "should save the value of the first matching element" do
150
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
151
+ document.link.should == "first"
152
+ end
153
+
154
+ describe "and the :as option" do
155
+ before :each do
156
+ @klass = Class.new do
157
+ include SAXMachine
158
+ element :link, :value => :href, :as => :url, :with => {:foo => "bar"}
159
+ element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
160
+ end
161
+ end
162
+
163
+ it "should save the value of the first matching element" do
164
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
165
+ document.url.should == "first"
166
+ document.second_url.should == "second"
167
+ end
168
+ end
169
+ end
170
+
171
+ describe "with only one element" do
172
+ before :each do
173
+ @klass = Class.new do
174
+ include SAXMachine
175
+ element :link, :with => {:foo => "bar"}
176
+ end
177
+ end
178
+
179
+ it "should save the text of an element that has matching attributes" do
180
+ document = @klass.parse("<link foo=\"bar\">match</link>")
181
+ document.link.should == "match"
182
+ end
183
+
184
+ it "should not save the text of an element that doesn't have matching attributes" do
185
+ document = @klass.parse("<link>no match</link>")
186
+ document.link.should be_nil
187
+ end
188
+
189
+ it "should save the text of an element that has matching attributes when it is the second of that type" do
190
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
191
+ document.link.should == "match"
192
+ end
193
+
194
+ it "should save the text of an element that has matching attributes plus a few more" do
195
+ document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
196
+ document.link.should == "match"
197
+ end
198
+ end
199
+
200
+ describe "with multiple elements of same tag" do
201
+ before :each do
202
+ @klass = Class.new do
203
+ include SAXMachine
204
+ element :link, :as => :first, :with => {:foo => "bar"}
205
+ element :link, :as => :second, :with => {:asdf => "jkl"}
206
+ end
207
+ end
208
+
209
+ it "should match the first element" do
210
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
211
+ document.first.should == "first match"
212
+ end
213
+
214
+ it "should match the second element" do
215
+ document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
216
+ document.second.should == "second match"
217
+ end
218
+ end
219
+ end # using the 'with' option
220
+
221
+ describe "using the 'value' option" do
222
+ before :each do
223
+ @klass = Class.new do
224
+ include SAXMachine
225
+ element :link, :value => :foo
226
+ end
227
+ end
228
+
229
+ it "should save the attribute value" do
230
+ document = @klass.parse("<link foo='test'>hello</link>")
231
+ document.link.should == 'test'
232
+ end
233
+
234
+ it "should save the attribute value when there is no text enclosed by the tag" do
235
+ document = @klass.parse("<link foo='test'></link>")
236
+ document.link.should == 'test'
237
+ end
238
+
239
+ it "should save the attribute value when the tag close is in the open" do
240
+ document = @klass.parse("<link foo='test'/>")
241
+ document.link.should == 'test'
242
+ end
243
+
244
+ it "should save two different attribute values on a single tag" do
245
+ @klass = Class.new do
246
+ include SAXMachine
247
+ element :link, :value => :foo, :as => :first
248
+ element :link, :value => :bar, :as => :second
249
+ end
250
+ document = @klass.parse("<link foo='foo value' bar='bar value'></link>")
251
+ document.first.should == "foo value"
252
+ document.second.should == "bar value"
253
+ end
254
+
255
+ it "should not fail if one of the attribute hasn't been defined" do
256
+ @klass = Class.new do
257
+ include SAXMachine
258
+ element :link, :value => :foo, :as => :first
259
+ element :link, :value => :bar, :as => :second
260
+ end
261
+ document = @klass.parse("<link foo='foo value'></link>")
262
+ document.first.should == "foo value"
263
+ document.second.should be_nil
264
+ end
265
+ end
266
+
267
+ describe "when desiring both the content and attributes of an element" do
268
+ before :each do
269
+ @klass = Class.new do
270
+ include SAXMachine
271
+ element :link
272
+ element :link, :value => :foo, :as => :link_foo
273
+ element :link, :value => :bar, :as => :link_bar
274
+ end
275
+ end
276
+
277
+ it "should parse the element and attribute values" do
278
+ document = @klass.parse("<link foo='test1' bar='test2'>hello</link>")
279
+ document.link.should == 'hello'
280
+ document.link_foo.should == 'test1'
281
+ document.link_bar.should == 'test2'
282
+ end
283
+ end
284
+
285
+ describe "when specifying namespaces" do
286
+ before :all do
287
+ @klass = Class.new do
288
+ include SAXMachine
289
+ element :a, :xmlns => 'urn:test'
290
+ element :b, :xmlns => ['', 'urn:test']
291
+ end
292
+ end
293
+
294
+ it "should get the element with the xmlns" do
295
+ document = @klass.parse("<a xmlns='urn:test'>hello</a>")
296
+ document.a.should == 'hello'
297
+ end
298
+
299
+ it "shouldn't get the element without the xmlns" do
300
+ document = @klass.parse("<a>hello</a>")
301
+ document.a.should be_nil
302
+ end
303
+
304
+ it "shouldn't get the element with the wrong xmlns" do
305
+ document = @klass.parse("<a xmlns='urn:test2'>hello</a>")
306
+ document.a.should be_nil
307
+ end
308
+
309
+ it "should get an element without xmlns if the empty namespace is desired" do
310
+ document = @klass.parse("<b>hello</b>")
311
+ document.b.should == 'hello'
312
+ end
313
+
314
+ it "should get an element with the right prefix" do
315
+ document = @klass.parse("<p:a xmlns:p='urn:test'>hello</p:a>")
316
+ document.a.should == 'hello'
317
+ end
318
+
319
+ it "should not get an element with the wrong prefix" do
320
+ document = @klass.parse("<x:a xmlns:p='urn:test' xmlns:x='urn:test2'>hello</x:a>")
321
+ document.a.should be_nil
322
+ end
323
+
324
+ it "should get a prefixed element without xmlns if the empty namespace is desired" do
325
+ pending "this needs a less pickier nokogiri push parser"
326
+ document = @klass.parse("<x:b>hello</x:b>")
327
+ document.b.should == 'hello'
328
+ end
329
+
330
+ it "should get the namespaced element even it's not first" do
331
+ document = @klass.parse("<root xmlns:a='urn:test'><a>foo</a><a>foo</a><a:a>bar</a:a></root>")
332
+ document.a.should == 'bar'
333
+ end
334
+
335
+ it "should parse multiple namespaces" do
336
+ klass = Class.new do
337
+ include SAXMachine
338
+ element :a, :xmlns => 'urn:test'
339
+ element :b, :xmlns => 'urn:test2'
340
+ end
341
+ document = klass.parse("<root xmlns='urn:test' xmlns:b='urn:test2'><b:b>bar</b:b><a>foo</a></root>")
342
+ document.a.should == 'foo'
343
+ document.b.should == 'bar'
344
+ end
345
+
346
+ context "when passing a default namespace" do
347
+ before :all do
348
+ @xmlns = 'urn:test'
349
+ class Inner
350
+ include SAXMachine
351
+ element :a, :xmlns => @xmlns
352
+ end
353
+ @outer = Class.new do
354
+ include SAXMachine
355
+ elements :root, :default_xmlns => @xmlns, :class => Inner
356
+ end
357
+ end
358
+
359
+ it "should replace the empty namespace with a default" do
360
+ document = @outer.parse("<root><a>Hello</a></root>")
361
+ document.root[0].a.should == 'Hello'
362
+ end
363
+
364
+ it "should not replace another namespace" do
365
+ document = @outer.parse("<root xmlns='urn:test2'><a>Hello</a></root>")
366
+ document.root[0].a.should == 'Hello'
367
+ end
368
+ end
369
+ end
370
+
371
+ end
372
+ end
373
+
374
+ describe "elements" do
375
+ describe "when parsing multiple elements" do
376
+ before :all do
377
+ @klass = Class.new do
378
+ include SAXMachine
379
+ elements :entry, :as => :entries
380
+ end
381
+ end
382
+
383
+ it "should provide a collection accessor" do
384
+ document = @klass.new
385
+ document.entries << :foo
386
+ document.entries.should == [:foo]
387
+ end
388
+
389
+ it "should parse a single element" do
390
+ document = @klass.parse("<entry>hello</entry>")
391
+ document.entries.should == ["hello"]
392
+ end
393
+
394
+ it "should parse multiple elements" do
395
+ document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
396
+ document.entries.should == ["hello", "world"]
397
+ end
398
+
399
+ it "should parse multiple elements when taking an attribute value" do
400
+ attribute_klass = Class.new do
401
+ include SAXMachine
402
+ elements :entry, :as => :entries, :value => :foo
403
+ end
404
+ doc = attribute_klass.parse("<xml><entry foo='asdf' /><entry foo='jkl' /></xml>")
405
+ doc.entries.should == ["asdf", "jkl"]
406
+ end
407
+ end
408
+
409
+ describe "when using the class option" do
410
+ before :each do
411
+ class Foo
412
+ include SAXMachine
413
+ element :title
414
+ end
415
+ @klass = Class.new do
416
+ include SAXMachine
417
+ elements :entry, :as => :entries, :class => Foo
418
+ end
419
+ end
420
+
421
+ it "should parse a single element with children" do
422
+ document = @klass.parse("<entry><title>a title</title></entry>")
423
+ document.entries.size.should == 1
424
+ document.entries.first.title.should == "a title"
425
+ end
426
+
427
+ it "should parse multiple elements with children" do
428
+ document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
429
+ document.entries.size.should == 2
430
+ document.entries.first.title.should == "title 1"
431
+ document.entries.last.title.should == "title 2"
432
+ end
433
+
434
+ it "should not parse a top level element that is specified only in a child" do
435
+ document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
436
+ document.entries.size.should == 1
437
+ document.entries.first.title.should == "correct title"
438
+ end
439
+
440
+ it "should parse out an attribute value from the tag that starts the collection" do
441
+ class Foo
442
+ element :entry, :value => :href, :as => :url
443
+ end
444
+ document = @klass.parse("<xml><entry href='http://pauldix.net'><title>paul</title></entry></xml>")
445
+ document.entries.size.should == 1
446
+ document.entries.first.title.should == "paul"
447
+ document.entries.first.url.should == "http://pauldix.net"
448
+ end
449
+ end
450
+
451
+ describe "when desiring sax events" do
452
+ XHTML_XMLNS = "http://www.w3.org/1999/xhtml"
453
+
454
+ before :all do
455
+ @klass = Class.new do
456
+ include SAXMachine
457
+ elements :body, :events => true
458
+ end
459
+ end
460
+
461
+ it "should parse a simple child" do
462
+ document = @klass.parse("<body><p/></body>")
463
+ document.body[0].should == [[:start_element, "", "p", []],
464
+ [:end_element, "", "p"]]
465
+ end
466
+ it "should parse a simple child with text" do
467
+ document = @klass.parse("<body><p>Hello</p></body>")
468
+ document.body[0].should == [[:start_element, "", "p", []],
469
+ [:chars, "Hello"],
470
+ [:end_element, "", "p"]]
471
+ end
472
+ it "should parse nested children" do
473
+ document = @klass.parse("<body><p><span/></p></body>")
474
+ document.body[0].should == [[:start_element, "", "p", []],
475
+ [:start_element, "", "span", []],
476
+ [:end_element, "", "span"],
477
+ [:end_element, "", "p"]]
478
+ end
479
+ it "should parse multiple children" do
480
+ document = @klass.parse("<body><p>Hello</p><p>World</p></body>")
481
+ document.body[0].should == [[:start_element, "", "p", []],
482
+ [:chars, "Hello"],
483
+ [:end_element, "", "p"],
484
+ [:start_element, "", "p", []],
485
+ [:chars, "World"],
486
+ [:end_element, "", "p"]]
487
+ end
488
+ it "should pass namespaces" do
489
+ document = @klass.parse("<body xmlns='#{XHTML_XMLNS}'><p/></body>")
490
+ document.body[0].should == [[:start_element, XHTML_XMLNS, "p", []],
491
+ [:end_element, XHTML_XMLNS, "p"]]
492
+ end
493
+ end
494
+ end
495
+
496
+ describe "full example" do
497
+ XMLNS_ATOM = "http://www.w3.org/2005/Atom"
498
+ XMLNS_FEEDBURNER = "http://rssnamespace.org/feedburner/ext/1.0"
499
+
500
+ before :each do
501
+ @xml = File.read('spec/sax-machine/atom.xml')
502
+ class AtomEntry
503
+ include SAXMachine
504
+ element :title
505
+ element :name, :as => :author
506
+ element :origLink, :as => :orig_link, :xmlns => XMLNS_FEEDBURNER
507
+ element :summary
508
+ element :content
509
+ element :published
510
+ end
511
+
512
+ class Atom
513
+ include SAXMachine
514
+ element :title
515
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
516
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
517
+ elements :entry, :as => :entries, :class => AtomEntry, :xmlns => XMLNS_ATOM
518
+ end
519
+ end # before
520
+
521
+ it "should parse the url" do
522
+ f = Atom.parse(@xml)
523
+ f.url.should == "http://www.pauldix.net/"
524
+ end
525
+
526
+ it "should parse all entries" do
527
+ f = Atom.parse(@xml)
528
+ f.entries.length.should == 5
529
+ end
530
+
531
+ it "should parse the feedburner:origLink" do
532
+ f = Atom.parse(@xml)
533
+ f.entries[0].orig_link.should == 'http://www.pauldix.net/2008/09/marshal-data-to.html'
534
+ end
535
+ end
536
+
537
+ describe "another full example" do
538
+
539
+ RSS_XMLNS = 'http://purl.org/rss/1.0/'
540
+ ATOM_XMLNS = 'http://www.w3.org/2005/Atom'
541
+ class Entry
542
+ include SAXMachine
543
+ element :title, :xmlns => RSS_XMLNS
544
+ element :title, :xmlns => ATOM_XMLNS
545
+ element :link, :xmlns => RSS_XMLNS
546
+ element :link, :xmlns => ATOM_XMLNS, :value => 'href'
547
+ end
548
+ class Channel
549
+ include SAXMachine
550
+ element :title, :xmlns => RSS_XMLNS
551
+ element :title, :xmlns => ATOM_XMLNS
552
+ element :link, :xmlns => RSS_XMLNS
553
+ element :link, :xmlns => ATOM_XMLNS, :value => 'href'
554
+ elements :entry, :as => :entries, :class => Entry
555
+ elements :item, :as => :entries, :class => Entry
556
+ end
557
+ class Root
558
+ include SAXMachine
559
+ elements :rss, :as => :channels, :default_xmlns => RSS_XMLNS, :class => Channel
560
+ elements :feed, :as => :channels, :default_xmlns => ATOM_XMLNS, :class => Channel
561
+ end
562
+
563
+ context "when parsing a complex example" do
564
+ before :all do
565
+ @document = Root.parse(<<-eoxml).channels[0]
566
+ <?xml version="1.0" encoding="UTF-8"?>
567
+ <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"
568
+ xmlns:content="http://purl.org/rss/1.0/modules/content/"
569
+ xmlns:wfw="http://wellformedweb.org/CommentAPI/"
570
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
571
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
572
+ xmlns:cc="http://web.resource.org/cc/">
573
+ <channel>
574
+ <title>Delicious/tag/pubsubhubbub</title>
575
+ <atom:link rel="self" type="application/rss+xml" href="http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15"/>
576
+ <link>http://delicious.com/tag/pubsubhubbub</link>
577
+ <description>recent bookmarks tagged pubsubhubbub</description>
578
+ </channel>
579
+ </rss>
580
+ eoxml
581
+ end
582
+ it "should parse the title" do
583
+ @document.title.should == 'Delicious/tag/pubsubhubbub'
584
+ end
585
+ it "should parse the link" do
586
+ @document.link.should == 'http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15'
587
+ end
588
+ end
589
+ end
590
+
591
+ describe "yet another full example" do
592
+
593
+ context "when parsing a Twitter example" do
594
+ before :all do
595
+
596
+ RSS_XMLNS = ['http://purl.org/rss/1.0/', '']
597
+
598
+ ATOM_XMLNS = 'http://www.w3.org/2005/Atom' unless defined? ATOM_XMLNS
599
+ class Link
600
+ include SAXMachine
601
+ end
602
+
603
+ class Entry
604
+ include SAXMachine
605
+ element :title, :xmlns => RSS_XMLNS
606
+ element :link, :xmlns => RSS_XMLNS, :as => :entry_link
607
+ element :title, :xmlns => ATOM_XMLNS, :as => :title
608
+ elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
609
+ end
610
+
611
+ class Feed
612
+ include SAXMachine
613
+ element :title, :xmlns => RSS_XMLNS, :as => :title
614
+ element :link, :xmlns => RSS_XMLNS, :as => :feed_link
615
+ elements :item, :xmlns => RSS_XMLNS, :as => :entries, :class => Entry
616
+ element :title, :xmlns => ATOM_XMLNS, :as => :title
617
+ elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
618
+ end
619
+
620
+ @document = Feed.parse(<<-eoxml)
621
+ <?xml version="1.0" encoding="UTF-8"?>
622
+ <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
623
+ <channel>
624
+ <atom:link type="application/rss+xml" rel="self" href="http://twitter.com/statuses/user_timeline/5381582.rss"/>
625
+ <title>Twitter / julien51</title>
626
+ <link>http://twitter.com/julien51</link>
627
+ <description>Twitter updates from julien / julien51.</description>
628
+ <language>en-us</language>
629
+ <ttl>40</ttl>
630
+ <item>
631
+ <title>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</title>
632
+ <description>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</description>
633
+ <pubDate>Thu, 30 Jul 2009 01:00:30 +0000</pubDate>
634
+ <guid>http://twitter.com/julien51/statuses/2920716033</guid>
635
+ <link>http://twitter.com/julien51/statuses/2920716033</link>
636
+ </item>
637
+ <item>
638
+ <title>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</title>
639
+ <description>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</description>
640
+ <pubDate>Wed, 29 Jul 2009 23:07:32 +0000</pubDate>
641
+ <guid>http://twitter.com/julien51/statuses/2918869948</guid>
642
+ <link>http://twitter.com/julien51/statuses/2918869948</link>
643
+ </item>
644
+ </channel>
645
+ </rss>
646
+ eoxml
647
+ end
648
+ it "should parse the title" do
649
+ @document.title.should == 'Twitter / julien51'
650
+ end
651
+
652
+ it "should find an entry" do
653
+ @document.entries.length.should == 2
654
+ end
655
+ end
656
+ end
657
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,13 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "lib/sax-machine"
11
+
12
+ # Spec::Runner.configure do |config|
13
+ # end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: superfeedr-sax-machine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.22.1
5
+ platform: ruby
6
+ authors:
7
+ - Paul Dix
8
+ - Stephan Maka
9
+ - julien51
10
+ - superfeedr
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+
15
+ date: 2009-11-16 00:00:00 +01:00
16
+ default_executable:
17
+ dependencies:
18
+ - !ruby/object:Gem::Dependency
19
+ name: nokogiri
20
+ type: :runtime
21
+ version_requirement:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.0
27
+ version:
28
+ description:
29
+ email: paul@pauldix.net stephan@spaceboyz.net
30
+ executables: []
31
+
32
+ extensions: []
33
+
34
+ extra_rdoc_files: []
35
+
36
+ files:
37
+ - lib/sax-machine.rb
38
+ - lib/sax-machine/sax_config.rb
39
+ - lib/sax-machine/sax_collection_config.rb
40
+ - lib/sax-machine/sax_element_config.rb
41
+ - lib/sax-machine/sax_document.rb
42
+ - lib/sax-machine/sax_handler.rb
43
+ - lib/sax-machine/ns_stack.rb
44
+ - lib/sax-machine/sax_event_recorder.rb
45
+ - README.textile
46
+ - Rakefile
47
+ - spec/spec.opts
48
+ - spec/spec_helper.rb
49
+ - spec/sax-machine/sax_document_spec.rb
50
+ has_rdoc: true
51
+ homepage: http://github.com/pauldix/sax-machine
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: "0"
64
+ version:
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.5
75
+ signing_key:
76
+ specification_version: 2
77
+ summary: Declarative SAX Parsing with Nokogiri
78
+ test_files: []
79
+