superfeedr-sax-machine 0.0.22.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile ADDED
@@ -0,0 +1,87 @@
1
+ h1. SAX Machine
2
+
3
+ "http://github.com/pauldix/sax-machine/wikis":http://github.com/pauldix/sax-machine/wikis
4
+
5
+ "http://github.com/pauldix/sax-machine/tree/master":http://github.com/pauldix/sax-machine/tree/master
6
+
7
+ h2. Description
8
+
9
+ A declarative SAX parsing library backed by Nokogiri
10
+
11
+ h2. Usage
12
+
13
+ <pre>
14
+ require 'sax-machine'
15
+
16
+ # Class for parsing an atom entry out of a feedburner atom feed
17
+ class AtomEntry
18
+ include SAXMachine
19
+ element :title
20
+ # the :as argument makes this available through atom_entry.author instead of .name
21
+ element :name, :as => :author
22
+ element "feedburner:origLink", :as => :url
23
+ element :summary
24
+ element :content
25
+ element :published
26
+ end
27
+
28
+ # Class for parsing Atom feeds
29
+ class Atom
30
+ include SAXMachine
31
+ element :title
32
+ # the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
33
+ # the :value argument means that instead of setting the value to the text between the tag,
34
+ # it sets it to the attribute value of :href
35
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
36
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
37
+ elements :entry, :as => :entries, :class => AtomEntry
38
+ end
39
+
40
+ # you can then parse like this
41
+ feed = Atom.parse(xml_text)
42
+ # then you're ready to rock
43
+ feed.title # => whatever the title of the blog is
44
+ feed.url # => the main url of the blog
45
+ feed.feed_url # => goes to the feedburner feed
46
+
47
+ feed.entries.first.title # => title of the first entry
48
+ feed.entries.first.author # => the author of the first entry
49
+ feed.entries.first.url # => the permalink on the blog for this entry
50
+ # etc ...
51
+
52
+ # you can also use the elements method without specifying a class like so
53
+ class SomeServiceResponse
54
+ elements :message, :as => :messages
55
+ end
56
+
57
+ response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
58
+ response.messages.first # => "hi"
59
+ response.messages.last # => "world"
60
+ </pre>
61
+
62
+ h2. LICENSE
63
+
64
+ (The MIT License)
65
+
66
+ Copyright (c) 2009:
67
+
68
+ "Paul Dix":http://pauldix.net
69
+
70
+ Permission is hereby granted, free of charge, to any person obtaining
71
+ a copy of this software and associated documentation files (the
72
+ 'Software'), to deal in the Software without restriction, including
73
+ without limitation the rights to use, copy, modify, merge, publish,
74
+ distribute, sublicense, and/or sell copies of the Software, and to
75
+ permit persons to whom the Software is furnished to do so, subject to
76
+ the following conditions:
77
+
78
+ The above copyright notice and this permission notice shall be
79
+ included in all copies or substantial portions of the Software.
80
+
81
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
82
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
83
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
84
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
85
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
86
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
87
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "spec"
2
+ require "spec/rake/spectask"
3
+ require 'lib/sax-machine.rb'
4
+
5
+ Spec::Rake::SpecTask.new do |t|
6
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
7
+ t.spec_files = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :install do
11
+ rm_rf "*.gem"
12
+ puts `gem build sax-machine.gemspec`
13
+ puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
14
+ end
@@ -0,0 +1,38 @@
1
+ module SAXMachine
2
+ class NSStack < Hash
3
+ def initialize(parent=nil, attrs=[])
4
+ # Initialize
5
+ super()
6
+ @parent = parent
7
+
8
+ # Parse attributes
9
+ attrs.each do |attr|
10
+ if attr.kind_of?(Array)
11
+ k, v = attr
12
+ case k
13
+ when 'xmlns' then self[''] = v
14
+ when /^xmlns:(.+)/ then self[$1] = v
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ # Lookup
21
+ def [](name)
22
+ if (ns = super(name.to_s))
23
+ # I've got it
24
+ ns
25
+ elsif @parent
26
+ # Parent may have it
27
+ @parent[name]
28
+ else
29
+ # Undefined, empty namespace
30
+ ''
31
+ end
32
+ end
33
+
34
+ def pop
35
+ @parent
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class CollectionConfig
5
+ attr_reader :name
6
+ attr_reader :default_xmlns
7
+
8
+ def initialize(name, options)
9
+ @name = name.to_s
10
+ @class = options[:class]
11
+ @as = options[:as].to_s
12
+ @xmlns = case options[:xmlns]
13
+ when Array then options[:xmlns]
14
+ when String then [options[:xmlns]]
15
+ else nil
16
+ end
17
+ @default_xmlns = options[:default_xmlns]
18
+ if @default_xmlns && @xmlns && !@xmlns.include?('')
19
+ @xmlns << ''
20
+ end
21
+ @record_events = options[:events]
22
+ end
23
+
24
+ def handler(nsstack)
25
+ if @default_xmlns && (nsstack.nil? || nsstack[''] == '')
26
+ nsstack = NSStack.new(nsstack, nsstack)
27
+ nsstack[''] = @default_xmlns
28
+ end
29
+ unless @record_events
30
+ SAXHandler.new(@class.new, nsstack)
31
+ else
32
+ SAXEventRecorder.new(nsstack)
33
+ end
34
+ end
35
+
36
+ def accessor
37
+ as
38
+ end
39
+
40
+ def xmlns_match?(ns)
41
+ @xmlns.nil? || @xmlns.include?(ns)
42
+ end
43
+
44
+ protected
45
+
46
+ def as
47
+ @as
48
+ end
49
+
50
+ def class
51
+ @class || @name
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ require "sax-machine/sax_element_config"
2
+ require "sax-machine/sax_collection_config"
3
+
4
+ module SAXMachine
5
+ class SAXConfig
6
+ attr_reader :top_level_elements, :collection_elements
7
+
8
+ def initialize
9
+ @top_level_elements = {}
10
+ @collection_elements = {}
11
+ end
12
+
13
+ def add_top_level_element(name, options)
14
+ @top_level_elements[name.to_s] ||= []
15
+ @top_level_elements[name.to_s] << ElementConfig.new(name, options)
16
+ end
17
+
18
+ def add_collection_element(name, options)
19
+ @collection_elements[name.to_s] ||= []
20
+ @collection_elements[name.to_s] << CollectionConfig.new(name, options)
21
+ end
22
+
23
+ def collection_config(name, nsstack)
24
+ prefix, name = name.split(':', 2)
25
+ prefix, name = nil, prefix unless name # No prefix
26
+ namespace = nsstack[prefix]
27
+
28
+ (@collection_elements[name.to_s] || []).detect { |ce|
29
+ ce.name.to_s == name.to_s &&
30
+ ce.xmlns_match?(namespace)
31
+ }
32
+ end
33
+
34
+ def element_configs_for_attribute(name, attrs)
35
+ name = name.split(':', 2).last
36
+ (@top_level_elements[name.to_s] || []).select do |element_config|
37
+ element_config.has_value_and_attrs_match?(attrs)
38
+ end
39
+ end
40
+
41
+ def element_config_for_tag(name, attrs, nsstack)
42
+ prefix, name = name.split(':', 2)
43
+ prefix, name = nil, prefix unless name # No prefix
44
+ namespace = nsstack[prefix]
45
+
46
+ (@top_level_elements[name.to_s] || []).detect do |element_config|
47
+ element_config.xmlns_match?(namespace) &&
48
+ element_config.attrs_match?(attrs)
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,107 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ def parse(xml_text)
10
+ unless @parser
11
+ sax_handler = SAXHandler.new(self)
12
+ @parser = Nokogiri::XML::SAX::PushParser.new(sax_handler)
13
+ @parser.options |= Nokogiri::XML::ParseOptions::RECOVER if @parser.respond_to?(:options)
14
+ end
15
+ @parser << xml_text
16
+ self
17
+ end
18
+
19
+ def parse_finish
20
+ if @parser
21
+ @parser.finish
22
+ end
23
+ self
24
+ end
25
+
26
+ module ClassMethods
27
+
28
+ def parse(xml_text)
29
+ # It might be cleaner to aditionally call parse_finish here, but
30
+ # then Nokogiri/libxml2 barfs on incomplete documents. Desired
31
+ # behaviour?
32
+ new.parse(xml_text)
33
+ end
34
+
35
+ def element(name, options = {})
36
+ options[:as] ||= name
37
+ sax_config.add_top_level_element(name, options)
38
+
39
+ # we only want to insert the getter and setter if they haven't defined it from elsewhere.
40
+ # this is how we allow custom parsing behavior. So you could define the setter
41
+ # and have it parse the string into a date or whatever.
42
+ attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
43
+ attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
44
+ end
45
+
46
+ def columns
47
+ r = []
48
+ sax_config.top_level_elements.each do |name, ecs|
49
+ r += ecs
50
+ end
51
+ r
52
+ end
53
+
54
+ def column(sym)
55
+ (sax_config.top_level_elements[sym.to_s] || []).first
56
+ end
57
+
58
+ def data_class(sym)
59
+ column(sym).data_class
60
+ end
61
+
62
+ def required?(sym)
63
+ column(sym).required?
64
+ end
65
+
66
+ def column_names
67
+ columns.map{|e| e.column}
68
+ end
69
+
70
+ def elements(name, options = {})
71
+ options[:as] ||= name
72
+ if options[:class] || options[:events]
73
+ sax_config.add_collection_element(name, options)
74
+ else
75
+ class_eval <<-SRC
76
+ def add_#{options[:as]}(value)
77
+ #{options[:as]} << value
78
+ end
79
+ SRC
80
+ sax_config.add_top_level_element(name, options.merge(:collection => true))
81
+ end
82
+
83
+ if !instance_methods.include?(options[:as].to_s)
84
+ class_eval <<-SRC
85
+ def #{options[:as]}
86
+ @#{options[:as]} ||= []
87
+ end
88
+ SRC
89
+ end
90
+
91
+ attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
92
+ end
93
+
94
+ def sax_config
95
+ @sax_config ||= SAXConfig.new
96
+ end
97
+
98
+ def attr_writer_once(attr)
99
+ class_eval <<-SRC
100
+ def #{attr}=(val)
101
+ @#{attr} ||= val
102
+ end
103
+ SRC
104
+ end
105
+ end
106
+
107
+ end
@@ -0,0 +1,75 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementConfig
5
+ attr_reader :name, :setter, :data_class
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+
10
+ if options.has_key?(:with)
11
+ # for faster comparisons later
12
+ @with = options[:with].to_a.flatten.collect {|o| o.to_s}
13
+ else
14
+ @with = nil
15
+ end
16
+
17
+ if options.has_key?(:value)
18
+ @value = options[:value].to_s
19
+ else
20
+ @value = nil
21
+ end
22
+
23
+ @as = options[:as]
24
+ @collection = options[:collection]
25
+
26
+ if @collection
27
+ @setter = "add_#{options[:as]}"
28
+ else
29
+ @setter = "#{@as}="
30
+ end
31
+ @data_class = options[:class]
32
+ @required = options[:required]
33
+
34
+ @xmlns = case options[:xmlns]
35
+ when Array then options[:xmlns]
36
+ when String then [options[:xmlns]]
37
+ else nil
38
+ end
39
+ end
40
+
41
+ def column
42
+ @as || @name.to_sym
43
+ end
44
+
45
+ def required?
46
+ @required
47
+ end
48
+
49
+ def value_from_attrs(attrs)
50
+ attrs.index(@value) ? attrs[attrs.index(@value) + 1] : nil
51
+ end
52
+
53
+ def attrs_match?(attrs)
54
+ if @with
55
+ @with == (@with & attrs)
56
+ else
57
+ true
58
+ end
59
+ end
60
+
61
+ def has_value_and_attrs_match?(attrs)
62
+ !@value.nil? && attrs_match?(attrs)
63
+ end
64
+
65
+ def xmlns_match?(ns)
66
+ @xmlns.nil? || @xmlns.include?(ns)
67
+ end
68
+
69
+ def collection?
70
+ @collection
71
+ end
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,35 @@
1
+ module SAXMachine
2
+ class SAXEventRecorder < SAXHandler
3
+ def initialize(nsstack)
4
+ super(nil, nsstack)
5
+ @events = []
6
+ end
7
+
8
+ def object
9
+ # First and last belong to the parent element
10
+ @events[1..-2]
11
+ end
12
+
13
+ def start_element(name, attrs = [])
14
+ @nsstack = NSStack.new(@nsstack, attrs)
15
+ prefix, name = name.split(':', 2)
16
+ prefix, name = nil, prefix unless name
17
+ @events << [:start_element, @nsstack[prefix], name, attrs]
18
+ end
19
+
20
+ def end_element(name)
21
+ prefix, name = name.split(':', 2)
22
+ prefix, name = nil, prefix unless name
23
+ @events << [:end_element, @nsstack[prefix], name]
24
+ @nsstack = @nsstack.pop
25
+ end
26
+
27
+ def characters(string)
28
+ @events << [:chars, string]
29
+ end
30
+
31
+ def sax_config
32
+ raise
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,124 @@
1
+ require "nokogiri"
2
+ require "sax-machine/ns_stack"
3
+
4
+ module SAXMachine
5
+ class SAXHandler < Nokogiri::XML::SAX::Document
6
+ attr_reader :object
7
+
8
+ def initialize(object, nsstack=nil)
9
+ @object = object
10
+ @nsstack = nsstack || NSStack.new
11
+ end
12
+
13
+ def characters(string)
14
+ if parsing_collection?
15
+ @collection_handler.characters(string)
16
+ elsif @element_config
17
+ @value << string
18
+ end
19
+ end
20
+
21
+ def cdata_block(string)
22
+ characters(string)
23
+ end
24
+
25
+ def start_element(name, attrs = [])
26
+
27
+ @name = name
28
+ @attrs = attrs.map { |a| SAXHandler.decode_xml(a) }
29
+ @nsstack = NSStack.new(@nsstack, @attrs)
30
+
31
+ if parsing_collection?
32
+ @collection_handler.start_element(@name, @attrs)
33
+
34
+ elsif @collection_config = sax_config.collection_config(@name, @nsstack)
35
+ @collection_handler = @collection_config.handler(@nsstack)
36
+ if @object.class != @collection_handler.object.class
37
+ @collection_handler.start_element(@name, @attrs)
38
+ end
39
+ elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
40
+ parse_element_attributes(element_configs)
41
+ set_element_config_for_element_value
42
+
43
+ else
44
+ set_element_config_for_element_value
45
+ end
46
+ end
47
+
48
+ def end_element(name)
49
+ if parsing_collection? && @collection_config.name == name.split(':').last
50
+ @collection_handler.end_element(name)
51
+ @object.send(@collection_config.accessor) << @collection_handler.object
52
+ reset_current_collection
53
+
54
+ elsif parsing_collection?
55
+ @collection_handler.end_element(name)
56
+
57
+ elsif characaters_captured?
58
+ @object.send(@element_config.setter, @value)
59
+ end
60
+
61
+ reset_current_tag
62
+ @nsstack = @nsstack.pop
63
+ end
64
+
65
+ def characaters_captured?
66
+ !@value.nil? && !@value.empty?
67
+ end
68
+
69
+ def parsing_collection?
70
+ !@collection_handler.nil?
71
+ end
72
+
73
+ def parse_collection_instance_attributes
74
+ instance = @collection_handler.object
75
+ @attrs.each_with_index do |attr_name,index|
76
+ instance.send("#{attr_name}=", @attrs[index + 1]) if index % 2 == 0 && instance.methods.include?("#{attr_name}=")
77
+ end
78
+ end
79
+
80
+ def parse_element_attributes(element_configs)
81
+ element_configs.each do |ec|
82
+ @object.send(ec.setter, ec.value_from_attrs(@attrs))
83
+ end
84
+ @element_config = nil
85
+ end
86
+
87
+ def set_element_config_for_element_value
88
+ @value = ""
89
+ @element_config = sax_config.element_config_for_tag(@name, @attrs, @nsstack)
90
+ end
91
+
92
+ def reset_current_collection
93
+ @collection_handler = nil
94
+ @collection_config = nil
95
+ end
96
+
97
+ def reset_current_tag
98
+ @name = nil
99
+ @attrs = nil
100
+ @value = nil
101
+ @element_config = nil
102
+ end
103
+
104
+ def sax_config
105
+ @object.class.sax_config
106
+ end
107
+
108
+ ##
109
+ # Decodes XML special characters.
110
+ def self.decode_xml(str)
111
+ return str.map &method(:decode_xml) if str.kind_of?(Array)
112
+
113
+ # entities = {
114
+ # '#38' => '&amp;',
115
+ # '#13' => "\r",
116
+ # }
117
+ # entities.keys.inject(str) { |string, key|
118
+ # string.gsub(/&#{key};/, entities[key])
119
+ # }
120
+ CGI.unescapeHTML(str)
121
+ end
122
+
123
+ end
124
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require "cgi"
4
+
5
+ require "sax-machine/sax_document"
6
+ require "sax-machine/sax_handler"
7
+ require "sax-machine/sax_config"
8
+ require "sax-machine/sax_event_recorder"
9
+
10
+ module SAXMachine
11
+ VERSION = "0.0.21"
12
+ end
@@ -0,0 +1,657 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "SAXMachine" do
4
+ describe "element" do
5
+ describe "when parsing a single element" do
6
+ before :each do
7
+ @klass = Class.new do
8
+ include SAXMachine
9
+ element :title
10
+ end
11
+ end
12
+
13
+ it "should provide an accessor" do
14
+ document = @klass.new
15
+ document.title = "Title"
16
+ document.title.should == "Title"
17
+ end
18
+
19
+ it "should allow introspection of the elements" do
20
+ @klass.column_names.should =~ [:title]
21
+ end
22
+
23
+ it "should not overwrite the setter if there is already one present" do
24
+ @klass = Class.new do
25
+ def title=(val)
26
+ @title = "#{val} **"
27
+ end
28
+ include SAXMachine
29
+ element :title
30
+ end
31
+ document = @klass.new
32
+ document.title = "Title"
33
+ document.title.should == "Title **"
34
+ end
35
+ describe "the class attribute" do
36
+ before(:each) do
37
+ @klass = Class.new do
38
+ include SAXMachine
39
+ element :date, :class => DateTime
40
+ end
41
+ @document = @klass.new
42
+ @document.date = DateTime.now.to_s
43
+ end
44
+ it "should be available" do
45
+ @klass.data_class(:date).should == DateTime
46
+ end
47
+ end
48
+ describe "the required attribute" do
49
+ it "should be available" do
50
+ @klass = Class.new do
51
+ include SAXMachine
52
+ element :date, :required => true
53
+ end
54
+ @klass.required?(:date).should be_true
55
+ end
56
+ end
57
+
58
+ it "should not overwrite the accessor when the element is not present" do
59
+ document = @klass.new
60
+ document.title = "Title"
61
+ document.parse("<foo></foo>")
62
+ document.title.should == "Title"
63
+ end
64
+
65
+ it "should *not* overwrite the value when the element is present (new behaviour!)" do
66
+ document = @klass.new
67
+ document.title = "Old title"
68
+ document.parse("<title>New title</title>")
69
+ document.title.should == "Old title"
70
+ end
71
+
72
+ it "should save the element text into an accessor" do
73
+ document = @klass.parse("<title>My Title</title>")
74
+ document.title.should == "My Title"
75
+ end
76
+
77
+ it "should save cdata into an accessor" do
78
+ document = @klass.parse("<title><![CDATA[A Title]]></title>")
79
+ document.title.should == "A Title"
80
+ end
81
+
82
+ it "should save the element text into an accessor when there are multiple elements" do
83
+ document = @klass.parse("<xml><title>My Title</title><foo>bar</foo></xml>")
84
+ document.title.should == "My Title"
85
+ end
86
+
87
+ it "should save the first element text when there are multiple of the same element" do
88
+ document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
89
+ document.title.should == "My Title"
90
+ end
91
+ end
92
+
93
+ describe "when parsing multiple elements" do
94
+ before :each do
95
+ @klass = Class.new do
96
+ include SAXMachine
97
+ element :title
98
+ element :name
99
+ end
100
+ end
101
+
102
+ it "should save the element text for a second tag" do
103
+ document = @klass.parse("<xml><title>My Title</title><name>Paul</name></xml>")
104
+ document.name.should == "Paul"
105
+ document.title.should == "My Title"
106
+ end
107
+ end
108
+
109
+ describe "when using options for parsing elements" do
110
+ describe "using the 'as' option" do
111
+ before :each do
112
+ @klass = Class.new do
113
+ include SAXMachine
114
+ element :description, :as => :summary
115
+ end
116
+ end
117
+
118
+ it "should provide an accessor using the 'as' name" do
119
+ document = @klass.new
120
+ document.summary = "a small summary"
121
+ document.summary.should == "a small summary"
122
+ end
123
+
124
+ it "should save the element text into the 'as' accessor" do
125
+ document = @klass.parse("<description>here is a description</description>")
126
+ document.summary.should == "here is a description"
127
+ end
128
+ end
129
+
130
+ describe "using the :with option" do
131
+ describe "and the :value option" do
132
+ before :each do
133
+ @klass = Class.new do
134
+ include SAXMachine
135
+ element :link, :value => :href, :with => {:foo => "bar"}
136
+ end
137
+ end
138
+
139
+ it "should escape correctly the ampersand" do
140
+ document = @klass.parse("<link href='http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&amp;lang=en-us&amp;format=atom' foo='bar'>asdf</link>")
141
+ document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom"
142
+ end
143
+
144
+ it "should save the value of a matching element" do
145
+ document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
146
+ document.link.should == "test"
147
+ end
148
+
149
+ it "should save the value of the first matching element" do
150
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
151
+ document.link.should == "first"
152
+ end
153
+
154
+ describe "and the :as option" do
155
+ before :each do
156
+ @klass = Class.new do
157
+ include SAXMachine
158
+ element :link, :value => :href, :as => :url, :with => {:foo => "bar"}
159
+ element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
160
+ end
161
+ end
162
+
163
+ it "should save the value of the first matching element" do
164
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
165
+ document.url.should == "first"
166
+ document.second_url.should == "second"
167
+ end
168
+ end
169
+ end
170
+
171
+ describe "with only one element" do
172
+ before :each do
173
+ @klass = Class.new do
174
+ include SAXMachine
175
+ element :link, :with => {:foo => "bar"}
176
+ end
177
+ end
178
+
179
+ it "should save the text of an element that has matching attributes" do
180
+ document = @klass.parse("<link foo=\"bar\">match</link>")
181
+ document.link.should == "match"
182
+ end
183
+
184
+ it "should not save the text of an element that doesn't have matching attributes" do
185
+ document = @klass.parse("<link>no match</link>")
186
+ document.link.should be_nil
187
+ end
188
+
189
+ it "should save the text of an element that has matching attributes when it is the second of that type" do
190
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
191
+ document.link.should == "match"
192
+ end
193
+
194
+ it "should save the text of an element that has matching attributes plus a few more" do
195
+ document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
196
+ document.link.should == "match"
197
+ end
198
+ end
199
+
200
+ describe "with multiple elements of same tag" do
201
+ before :each do
202
+ @klass = Class.new do
203
+ include SAXMachine
204
+ element :link, :as => :first, :with => {:foo => "bar"}
205
+ element :link, :as => :second, :with => {:asdf => "jkl"}
206
+ end
207
+ end
208
+
209
+ it "should match the first element" do
210
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
211
+ document.first.should == "first match"
212
+ end
213
+
214
+ it "should match the second element" do
215
+ document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
216
+ document.second.should == "second match"
217
+ end
218
+ end
219
+ end # using the 'with' option
220
+
221
+ describe "using the 'value' option" do
222
+ before :each do
223
+ @klass = Class.new do
224
+ include SAXMachine
225
+ element :link, :value => :foo
226
+ end
227
+ end
228
+
229
+ it "should save the attribute value" do
230
+ document = @klass.parse("<link foo='test'>hello</link>")
231
+ document.link.should == 'test'
232
+ end
233
+
234
+ it "should save the attribute value when there is no text enclosed by the tag" do
235
+ document = @klass.parse("<link foo='test'></link>")
236
+ document.link.should == 'test'
237
+ end
238
+
239
+ it "should save the attribute value when the tag close is in the open" do
240
+ document = @klass.parse("<link foo='test'/>")
241
+ document.link.should == 'test'
242
+ end
243
+
244
+ it "should save two different attribute values on a single tag" do
245
+ @klass = Class.new do
246
+ include SAXMachine
247
+ element :link, :value => :foo, :as => :first
248
+ element :link, :value => :bar, :as => :second
249
+ end
250
+ document = @klass.parse("<link foo='foo value' bar='bar value'></link>")
251
+ document.first.should == "foo value"
252
+ document.second.should == "bar value"
253
+ end
254
+
255
+ it "should not fail if one of the attribute hasn't been defined" do
256
+ @klass = Class.new do
257
+ include SAXMachine
258
+ element :link, :value => :foo, :as => :first
259
+ element :link, :value => :bar, :as => :second
260
+ end
261
+ document = @klass.parse("<link foo='foo value'></link>")
262
+ document.first.should == "foo value"
263
+ document.second.should be_nil
264
+ end
265
+ end
266
+
267
+ describe "when desiring both the content and attributes of an element" do
268
+ before :each do
269
+ @klass = Class.new do
270
+ include SAXMachine
271
+ element :link
272
+ element :link, :value => :foo, :as => :link_foo
273
+ element :link, :value => :bar, :as => :link_bar
274
+ end
275
+ end
276
+
277
+ it "should parse the element and attribute values" do
278
+ document = @klass.parse("<link foo='test1' bar='test2'>hello</link>")
279
+ document.link.should == 'hello'
280
+ document.link_foo.should == 'test1'
281
+ document.link_bar.should == 'test2'
282
+ end
283
+ end
284
+
285
+ describe "when specifying namespaces" do
286
+ before :all do
287
+ @klass = Class.new do
288
+ include SAXMachine
289
+ element :a, :xmlns => 'urn:test'
290
+ element :b, :xmlns => ['', 'urn:test']
291
+ end
292
+ end
293
+
294
+ it "should get the element with the xmlns" do
295
+ document = @klass.parse("<a xmlns='urn:test'>hello</a>")
296
+ document.a.should == 'hello'
297
+ end
298
+
299
+ it "shouldn't get the element without the xmlns" do
300
+ document = @klass.parse("<a>hello</a>")
301
+ document.a.should be_nil
302
+ end
303
+
304
+ it "shouldn't get the element with the wrong xmlns" do
305
+ document = @klass.parse("<a xmlns='urn:test2'>hello</a>")
306
+ document.a.should be_nil
307
+ end
308
+
309
+ it "should get an element without xmlns if the empty namespace is desired" do
310
+ document = @klass.parse("<b>hello</b>")
311
+ document.b.should == 'hello'
312
+ end
313
+
314
+ it "should get an element with the right prefix" do
315
+ document = @klass.parse("<p:a xmlns:p='urn:test'>hello</p:a>")
316
+ document.a.should == 'hello'
317
+ end
318
+
319
+ it "should not get an element with the wrong prefix" do
320
+ document = @klass.parse("<x:a xmlns:p='urn:test' xmlns:x='urn:test2'>hello</x:a>")
321
+ document.a.should be_nil
322
+ end
323
+
324
+ it "should get a prefixed element without xmlns if the empty namespace is desired" do
325
+ pending "this needs a less pickier nokogiri push parser"
326
+ document = @klass.parse("<x:b>hello</x:b>")
327
+ document.b.should == 'hello'
328
+ end
329
+
330
+ it "should get the namespaced element even it's not first" do
331
+ document = @klass.parse("<root xmlns:a='urn:test'><a>foo</a><a>foo</a><a:a>bar</a:a></root>")
332
+ document.a.should == 'bar'
333
+ end
334
+
335
+ it "should parse multiple namespaces" do
336
+ klass = Class.new do
337
+ include SAXMachine
338
+ element :a, :xmlns => 'urn:test'
339
+ element :b, :xmlns => 'urn:test2'
340
+ end
341
+ document = klass.parse("<root xmlns='urn:test' xmlns:b='urn:test2'><b:b>bar</b:b><a>foo</a></root>")
342
+ document.a.should == 'foo'
343
+ document.b.should == 'bar'
344
+ end
345
+
346
+ context "when passing a default namespace" do
347
+ before :all do
348
+ @xmlns = 'urn:test'
349
+ class Inner
350
+ include SAXMachine
351
+ element :a, :xmlns => @xmlns
352
+ end
353
+ @outer = Class.new do
354
+ include SAXMachine
355
+ elements :root, :default_xmlns => @xmlns, :class => Inner
356
+ end
357
+ end
358
+
359
+ it "should replace the empty namespace with a default" do
360
+ document = @outer.parse("<root><a>Hello</a></root>")
361
+ document.root[0].a.should == 'Hello'
362
+ end
363
+
364
+ it "should not replace another namespace" do
365
+ document = @outer.parse("<root xmlns='urn:test2'><a>Hello</a></root>")
366
+ document.root[0].a.should == 'Hello'
367
+ end
368
+ end
369
+ end
370
+
371
+ end
372
+ end
373
+
374
+ describe "elements" do
375
+ describe "when parsing multiple elements" do
376
+ before :all do
377
+ @klass = Class.new do
378
+ include SAXMachine
379
+ elements :entry, :as => :entries
380
+ end
381
+ end
382
+
383
+ it "should provide a collection accessor" do
384
+ document = @klass.new
385
+ document.entries << :foo
386
+ document.entries.should == [:foo]
387
+ end
388
+
389
+ it "should parse a single element" do
390
+ document = @klass.parse("<entry>hello</entry>")
391
+ document.entries.should == ["hello"]
392
+ end
393
+
394
+ it "should parse multiple elements" do
395
+ document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
396
+ document.entries.should == ["hello", "world"]
397
+ end
398
+
399
+ it "should parse multiple elements when taking an attribute value" do
400
+ attribute_klass = Class.new do
401
+ include SAXMachine
402
+ elements :entry, :as => :entries, :value => :foo
403
+ end
404
+ doc = attribute_klass.parse("<xml><entry foo='asdf' /><entry foo='jkl' /></xml>")
405
+ doc.entries.should == ["asdf", "jkl"]
406
+ end
407
+ end
408
+
409
+ describe "when using the class option" do
410
+ before :each do
411
+ class Foo
412
+ include SAXMachine
413
+ element :title
414
+ end
415
+ @klass = Class.new do
416
+ include SAXMachine
417
+ elements :entry, :as => :entries, :class => Foo
418
+ end
419
+ end
420
+
421
+ it "should parse a single element with children" do
422
+ document = @klass.parse("<entry><title>a title</title></entry>")
423
+ document.entries.size.should == 1
424
+ document.entries.first.title.should == "a title"
425
+ end
426
+
427
+ it "should parse multiple elements with children" do
428
+ document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
429
+ document.entries.size.should == 2
430
+ document.entries.first.title.should == "title 1"
431
+ document.entries.last.title.should == "title 2"
432
+ end
433
+
434
+ it "should not parse a top level element that is specified only in a child" do
435
+ document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
436
+ document.entries.size.should == 1
437
+ document.entries.first.title.should == "correct title"
438
+ end
439
+
440
+ it "should parse out an attribute value from the tag that starts the collection" do
441
+ class Foo
442
+ element :entry, :value => :href, :as => :url
443
+ end
444
+ document = @klass.parse("<xml><entry href='http://pauldix.net'><title>paul</title></entry></xml>")
445
+ document.entries.size.should == 1
446
+ document.entries.first.title.should == "paul"
447
+ document.entries.first.url.should == "http://pauldix.net"
448
+ end
449
+ end
450
+
451
+ describe "when desiring sax events" do
452
+ XHTML_XMLNS = "http://www.w3.org/1999/xhtml"
453
+
454
+ before :all do
455
+ @klass = Class.new do
456
+ include SAXMachine
457
+ elements :body, :events => true
458
+ end
459
+ end
460
+
461
+ it "should parse a simple child" do
462
+ document = @klass.parse("<body><p/></body>")
463
+ document.body[0].should == [[:start_element, "", "p", []],
464
+ [:end_element, "", "p"]]
465
+ end
466
+ it "should parse a simple child with text" do
467
+ document = @klass.parse("<body><p>Hello</p></body>")
468
+ document.body[0].should == [[:start_element, "", "p", []],
469
+ [:chars, "Hello"],
470
+ [:end_element, "", "p"]]
471
+ end
472
+ it "should parse nested children" do
473
+ document = @klass.parse("<body><p><span/></p></body>")
474
+ document.body[0].should == [[:start_element, "", "p", []],
475
+ [:start_element, "", "span", []],
476
+ [:end_element, "", "span"],
477
+ [:end_element, "", "p"]]
478
+ end
479
+ it "should parse multiple children" do
480
+ document = @klass.parse("<body><p>Hello</p><p>World</p></body>")
481
+ document.body[0].should == [[:start_element, "", "p", []],
482
+ [:chars, "Hello"],
483
+ [:end_element, "", "p"],
484
+ [:start_element, "", "p", []],
485
+ [:chars, "World"],
486
+ [:end_element, "", "p"]]
487
+ end
488
+ it "should pass namespaces" do
489
+ document = @klass.parse("<body xmlns='#{XHTML_XMLNS}'><p/></body>")
490
+ document.body[0].should == [[:start_element, XHTML_XMLNS, "p", []],
491
+ [:end_element, XHTML_XMLNS, "p"]]
492
+ end
493
+ end
494
+ end
495
+
496
+ describe "full example" do
497
+ XMLNS_ATOM = "http://www.w3.org/2005/Atom"
498
+ XMLNS_FEEDBURNER = "http://rssnamespace.org/feedburner/ext/1.0"
499
+
500
+ before :each do
501
+ @xml = File.read('spec/sax-machine/atom.xml')
502
+ class AtomEntry
503
+ include SAXMachine
504
+ element :title
505
+ element :name, :as => :author
506
+ element :origLink, :as => :orig_link, :xmlns => XMLNS_FEEDBURNER
507
+ element :summary
508
+ element :content
509
+ element :published
510
+ end
511
+
512
+ class Atom
513
+ include SAXMachine
514
+ element :title
515
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
516
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
517
+ elements :entry, :as => :entries, :class => AtomEntry, :xmlns => XMLNS_ATOM
518
+ end
519
+ end # before
520
+
521
+ it "should parse the url" do
522
+ f = Atom.parse(@xml)
523
+ f.url.should == "http://www.pauldix.net/"
524
+ end
525
+
526
+ it "should parse all entries" do
527
+ f = Atom.parse(@xml)
528
+ f.entries.length.should == 5
529
+ end
530
+
531
+ it "should parse the feedburner:origLink" do
532
+ f = Atom.parse(@xml)
533
+ f.entries[0].orig_link.should == 'http://www.pauldix.net/2008/09/marshal-data-to.html'
534
+ end
535
+ end
536
+
537
+ describe "another full example" do
538
+
539
+ RSS_XMLNS = 'http://purl.org/rss/1.0/'
540
+ ATOM_XMLNS = 'http://www.w3.org/2005/Atom'
541
+ class Entry
542
+ include SAXMachine
543
+ element :title, :xmlns => RSS_XMLNS
544
+ element :title, :xmlns => ATOM_XMLNS
545
+ element :link, :xmlns => RSS_XMLNS
546
+ element :link, :xmlns => ATOM_XMLNS, :value => 'href'
547
+ end
548
+ class Channel
549
+ include SAXMachine
550
+ element :title, :xmlns => RSS_XMLNS
551
+ element :title, :xmlns => ATOM_XMLNS
552
+ element :link, :xmlns => RSS_XMLNS
553
+ element :link, :xmlns => ATOM_XMLNS, :value => 'href'
554
+ elements :entry, :as => :entries, :class => Entry
555
+ elements :item, :as => :entries, :class => Entry
556
+ end
557
+ class Root
558
+ include SAXMachine
559
+ elements :rss, :as => :channels, :default_xmlns => RSS_XMLNS, :class => Channel
560
+ elements :feed, :as => :channels, :default_xmlns => ATOM_XMLNS, :class => Channel
561
+ end
562
+
563
+ context "when parsing a complex example" do
564
+ before :all do
565
+ @document = Root.parse(<<-eoxml).channels[0]
566
+ <?xml version="1.0" encoding="UTF-8"?>
567
+ <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"
568
+ xmlns:content="http://purl.org/rss/1.0/modules/content/"
569
+ xmlns:wfw="http://wellformedweb.org/CommentAPI/"
570
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
571
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
572
+ xmlns:cc="http://web.resource.org/cc/">
573
+ <channel>
574
+ <title>Delicious/tag/pubsubhubbub</title>
575
+ <atom:link rel="self" type="application/rss+xml" href="http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15"/>
576
+ <link>http://delicious.com/tag/pubsubhubbub</link>
577
+ <description>recent bookmarks tagged pubsubhubbub</description>
578
+ </channel>
579
+ </rss>
580
+ eoxml
581
+ end
582
+ it "should parse the title" do
583
+ @document.title.should == 'Delicious/tag/pubsubhubbub'
584
+ end
585
+ it "should parse the link" do
586
+ @document.link.should == 'http://feeds.delicious.com/v2/rss/tag/pubsubhubbub?count=15'
587
+ end
588
+ end
589
+ end
590
+
591
+ describe "yet another full example" do
592
+
593
+ context "when parsing a Twitter example" do
594
+ before :all do
595
+
596
+ RSS_XMLNS = ['http://purl.org/rss/1.0/', '']
597
+
598
+ ATOM_XMLNS = 'http://www.w3.org/2005/Atom' unless defined? ATOM_XMLNS
599
+ class Link
600
+ include SAXMachine
601
+ end
602
+
603
+ class Entry
604
+ include SAXMachine
605
+ element :title, :xmlns => RSS_XMLNS
606
+ element :link, :xmlns => RSS_XMLNS, :as => :entry_link
607
+ element :title, :xmlns => ATOM_XMLNS, :as => :title
608
+ elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
609
+ end
610
+
611
+ class Feed
612
+ include SAXMachine
613
+ element :title, :xmlns => RSS_XMLNS, :as => :title
614
+ element :link, :xmlns => RSS_XMLNS, :as => :feed_link
615
+ elements :item, :xmlns => RSS_XMLNS, :as => :entries, :class => Entry
616
+ element :title, :xmlns => ATOM_XMLNS, :as => :title
617
+ elements :link, :xmlns => ATOM_XMLNS, :as => :links, :class => Link
618
+ end
619
+
620
+ @document = Feed.parse(<<-eoxml)
621
+ <?xml version="1.0" encoding="UTF-8"?>
622
+ <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
623
+ <channel>
624
+ <atom:link type="application/rss+xml" rel="self" href="http://twitter.com/statuses/user_timeline/5381582.rss"/>
625
+ <title>Twitter / julien51</title>
626
+ <link>http://twitter.com/julien51</link>
627
+ <description>Twitter updates from julien / julien51.</description>
628
+ <language>en-us</language>
629
+ <ttl>40</ttl>
630
+ <item>
631
+ <title>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</title>
632
+ <description>julien51: @github : I get an error when trying to build one of my gems (julien51-sax-machine), it seems related to another gem's gemspec.</description>
633
+ <pubDate>Thu, 30 Jul 2009 01:00:30 +0000</pubDate>
634
+ <guid>http://twitter.com/julien51/statuses/2920716033</guid>
635
+ <link>http://twitter.com/julien51/statuses/2920716033</link>
636
+ </item>
637
+ <item>
638
+ <title>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</title>
639
+ <description>julien51: Hum, San Francisco's summer are delightful. http://bit.ly/VeXt4</description>
640
+ <pubDate>Wed, 29 Jul 2009 23:07:32 +0000</pubDate>
641
+ <guid>http://twitter.com/julien51/statuses/2918869948</guid>
642
+ <link>http://twitter.com/julien51/statuses/2918869948</link>
643
+ </item>
644
+ </channel>
645
+ </rss>
646
+ eoxml
647
+ end
648
+ it "should parse the title" do
649
+ @document.title.should == 'Twitter / julien51'
650
+ end
651
+
652
+ it "should find an entry" do
653
+ @document.entries.length.should == 2
654
+ end
655
+ end
656
+ end
657
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,13 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "lib/sax-machine"
11
+
12
+ # Spec::Runner.configure do |config|
13
+ # end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: superfeedr-sax-machine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.22.1
5
+ platform: ruby
6
+ authors:
7
+ - Paul Dix
8
+ - Stephan Maka
9
+ - julien51
10
+ - superfeedr
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+
15
+ date: 2009-11-16 00:00:00 +01:00
16
+ default_executable:
17
+ dependencies:
18
+ - !ruby/object:Gem::Dependency
19
+ name: nokogiri
20
+ type: :runtime
21
+ version_requirement:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.0
27
+ version:
28
+ description:
29
+ email: paul@pauldix.net stephan@spaceboyz.net
30
+ executables: []
31
+
32
+ extensions: []
33
+
34
+ extra_rdoc_files: []
35
+
36
+ files:
37
+ - lib/sax-machine.rb
38
+ - lib/sax-machine/sax_config.rb
39
+ - lib/sax-machine/sax_collection_config.rb
40
+ - lib/sax-machine/sax_element_config.rb
41
+ - lib/sax-machine/sax_document.rb
42
+ - lib/sax-machine/sax_handler.rb
43
+ - lib/sax-machine/ns_stack.rb
44
+ - lib/sax-machine/sax_event_recorder.rb
45
+ - README.textile
46
+ - Rakefile
47
+ - spec/spec.opts
48
+ - spec/spec_helper.rb
49
+ - spec/sax-machine/sax_document_spec.rb
50
+ has_rdoc: true
51
+ homepage: http://github.com/pauldix/sax-machine
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: "0"
64
+ version:
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.5
75
+ signing_key:
76
+ specification_version: 2
77
+ summary: Declarative SAX Parsing with Nokogiri
78
+ test_files: []
79
+