sax-machine-patched 0.2.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/Gemfile +9 -0
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/README.md +107 -0
- data/Rakefile +6 -0
- data/lib/sax-machine.rb +8 -0
- data/lib/sax-machine/sax_ancestor_config.rb +21 -0
- data/lib/sax-machine/sax_attribute_config.rb +40 -0
- data/lib/sax-machine/sax_collection_config.rb +37 -0
- data/lib/sax-machine/sax_config.rb +74 -0
- data/lib/sax-machine/sax_configure.rb +38 -0
- data/lib/sax-machine/sax_document.rb +118 -0
- data/lib/sax-machine/sax_element_config.rb +65 -0
- data/lib/sax-machine/sax_element_value_config.rb +24 -0
- data/lib/sax-machine/sax_handler.rb +192 -0
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +24 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +53 -0
- data/spec/sax-machine/include_sax_machine_spec.rb +42 -0
- data/spec/sax-machine/sax_document_spec.rb +838 -0
- data/spec/spec_helper.rb +15 -0
- metadata +115 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
data/HISTORY.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# 0.2.0.rc1
|
2
|
+
* Tried to reduce the number of instances of respond_to? in the code by
|
3
|
+
pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
4
|
+
* The parse stack is now composed of simple objects instead of it being
|
5
|
+
an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
6
|
+
* Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
7
|
+
* Cleaned up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
8
|
+
* Encapsulated stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
9
|
+
* #cdata_block is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
10
|
+
|
11
|
+
# 0.1.0
|
12
|
+
* rename parent to ancestor
|
13
|
+
* added SAXMachine.configure
|
data/README.md
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
# SAX Machine [![Build Status](https://secure.travis-ci.org/pauldix/sax-machine.png?branch=master)](http://travis-ci.org/pauldix/sax-machine)
|
2
|
+
|
3
|
+
[Wiki](https://github.com/pauldix/sax-machine/wiki)
|
4
|
+
|
5
|
+
## Description
|
6
|
+
|
7
|
+
A declarative SAX parsing library backed by Nokogiri
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
```ruby
|
11
|
+
require 'sax-machine'
|
12
|
+
|
13
|
+
# Class for information associated with content parts in a feed.
|
14
|
+
# Ex: <content type="text">sample</content>
|
15
|
+
# instance.type will be "text", instance.text will be "sample"
|
16
|
+
class AtomContent
|
17
|
+
include SAXMachine
|
18
|
+
attribute :type
|
19
|
+
value :text
|
20
|
+
end
|
21
|
+
|
22
|
+
# Class for parsing an atom entry out of a feedburner atom feed
|
23
|
+
class AtomEntry
|
24
|
+
include SAXMachine
|
25
|
+
element :title
|
26
|
+
# the :as argument makes this available through atom_entry.author instead of .name
|
27
|
+
element :name, :as => :author
|
28
|
+
element "feedburner:origLink", :as => :url
|
29
|
+
element :summary
|
30
|
+
element :content, :class => AtomContent
|
31
|
+
element :published
|
32
|
+
ancestor :ancestor
|
33
|
+
end
|
34
|
+
|
35
|
+
# Class for parsing Atom feeds
|
36
|
+
class Atom
|
37
|
+
include SAXMachine
|
38
|
+
element :title
|
39
|
+
# the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
|
40
|
+
# the :value argument means that instead of setting the value to the text between the tag,
|
41
|
+
# it sets it to the attribute value of :href
|
42
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
43
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
44
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
45
|
+
end
|
46
|
+
|
47
|
+
# you can then parse like this
|
48
|
+
feed = Atom.parse(xml_text)
|
49
|
+
# then you're ready to rock
|
50
|
+
feed.title # => whatever the title of the blog is
|
51
|
+
feed.url # => the main url of the blog
|
52
|
+
feed.feed_url # => goes to the feedburner feed
|
53
|
+
|
54
|
+
feed.entries.first.title # => title of the first entry
|
55
|
+
feed.entries.first.author # => the author of the first entry
|
56
|
+
feed.entries.first.url # => the permalink on the blog for this entry
|
57
|
+
feed.entries.first.ancestor # => the Atom ancestor
|
58
|
+
# etc ...
|
59
|
+
|
60
|
+
# you can also use the elements method without specifying a class like so
|
61
|
+
class SomeServiceResponse
|
62
|
+
include SAXMachine
|
63
|
+
elements :message, :as => :messages
|
64
|
+
end
|
65
|
+
|
66
|
+
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
67
|
+
response.messages.first # => "hi"
|
68
|
+
response.messages.last # => "world"
|
69
|
+
|
70
|
+
# To limit conflicts in the class used for mappping, you can use the alternate SAXMachine.configure syntax
|
71
|
+
|
72
|
+
class X < ActiveRecord::Base
|
73
|
+
# this way no element, elements or ancestor method will be added to X
|
74
|
+
SAXMachine.configure(X) do |c|
|
75
|
+
c.element :title
|
76
|
+
end
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
## LICENSE
|
81
|
+
|
82
|
+
The MIT License
|
83
|
+
|
84
|
+
Copyright (c) 2009-2012:
|
85
|
+
|
86
|
+
* [Paul Dix](http://www.pauldix.net)
|
87
|
+
* [Julien Kirch](http://www.archiloque.net)
|
88
|
+
* [Ezekiel Templin](http://zeke.templ.in)
|
89
|
+
|
90
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
91
|
+
a copy of this software and associated documentation files (the
|
92
|
+
'Software'), to deal in the Software without restriction, including
|
93
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
94
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
95
|
+
permit persons to whom the Software is furnished to do so, subject to
|
96
|
+
the following conditions:
|
97
|
+
|
98
|
+
The above copyright notice and this permission notice shall be
|
99
|
+
included in all copies or substantial portions of the Software.
|
100
|
+
|
101
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
102
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
103
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
104
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
105
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
106
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
107
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/lib/sax-machine.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class AncestorConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
|
10
|
+
@as = options[:as]
|
11
|
+
@setter = "#{@as}="
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class AttributeConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@as = options[:as]
|
10
|
+
@setter = "#{@as}="
|
11
|
+
@required = options[:required]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
@required
|
20
|
+
end
|
21
|
+
|
22
|
+
def value_from_attrs(attrs)
|
23
|
+
attrs.fetch(@name, nil)
|
24
|
+
end
|
25
|
+
|
26
|
+
def attrs_match?(attrs)
|
27
|
+
attrs.key?(@name) || attrs.value?(@name)
|
28
|
+
end
|
29
|
+
|
30
|
+
def has_value_and_attrs_match?(attrs)
|
31
|
+
attrs_match?(attrs)
|
32
|
+
end
|
33
|
+
|
34
|
+
def collection?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class CollectionConfig
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@class = options[:class]
|
10
|
+
@as = options[:as].to_s
|
11
|
+
@with = options.fetch(:with, {})
|
12
|
+
end
|
13
|
+
|
14
|
+
def accessor
|
15
|
+
as
|
16
|
+
end
|
17
|
+
|
18
|
+
def attrs_match?(attrs)
|
19
|
+
@with.all? do |key, value|
|
20
|
+
value === attrs[key.to_s]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def data_class
|
25
|
+
@class || @name
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
|
30
|
+
def as
|
31
|
+
@as
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require "sax-machine/sax_attribute_config"
|
2
|
+
require "sax-machine/sax_element_value_config"
|
3
|
+
require "sax-machine/sax_element_config"
|
4
|
+
require "sax-machine/sax_collection_config"
|
5
|
+
require "sax-machine/sax_ancestor_config"
|
6
|
+
|
7
|
+
module SAXMachine
|
8
|
+
class SAXConfig
|
9
|
+
|
10
|
+
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
# Default value is an empty array
|
14
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
15
|
+
@top_level_attributes = []
|
16
|
+
@top_level_element_value = []
|
17
|
+
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
18
|
+
@ancestors = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def columns
|
22
|
+
@top_level_elements.map {|name, ecs| ecs }.flatten
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize_copy(sax_config)
|
26
|
+
super
|
27
|
+
@top_level_elements = sax_config.top_level_elements.clone
|
28
|
+
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
|
+
@top_level_element_value = sax_config.top_level_element_value.clone
|
30
|
+
@collection_elements = sax_config.collection_elements.clone
|
31
|
+
@ancestors = sax_config.ancestors.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_top_level_element(name, options)
|
35
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_top_level_attribute(name, options)
|
39
|
+
@top_level_attributes << AttributeConfig.new(options.delete(:name), options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_top_level_element_value(name, options)
|
43
|
+
@top_level_element_value << ElementValueConfig.new(options.delete(:name), options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_collection_element(name, options)
|
47
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_ancestor(name, options)
|
51
|
+
@ancestors << AncestorConfig.new(name, options)
|
52
|
+
end
|
53
|
+
|
54
|
+
def collection_config(name, attrs)
|
55
|
+
@collection_elements[name.to_s].detect { |cc| cc.attrs_match?(attrs) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def attribute_configs_for_element(attrs)
|
59
|
+
@top_level_attributes.select { |aa| aa.attrs_match?(attrs) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def element_values_for_element
|
63
|
+
@top_level_element_value
|
64
|
+
end
|
65
|
+
|
66
|
+
def element_configs_for_attribute(name, attrs)
|
67
|
+
@top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) }
|
68
|
+
end
|
69
|
+
|
70
|
+
def element_config_for_tag(name, attrs)
|
71
|
+
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
|
3
|
+
def self.configure(clazz)
|
4
|
+
extended_clazz = Class.new(clazz)
|
5
|
+
extended_clazz.send(:include, SAXMachine)
|
6
|
+
|
7
|
+
# override create_attr to create attributes on the original class
|
8
|
+
def extended_clazz.create_attr real_name
|
9
|
+
superclass.send(:attr_reader, real_name) unless superclass.method_defined?(real_name)
|
10
|
+
superclass.send(:attr_writer, real_name) unless superclass.method_defined?("#{real_name}=")
|
11
|
+
end
|
12
|
+
|
13
|
+
yield(extended_clazz)
|
14
|
+
|
15
|
+
clazz.extend LightWeightSaxMachine
|
16
|
+
clazz.sax_config = extended_clazz.sax_config
|
17
|
+
|
18
|
+
(class << clazz;self;end).send(:define_method, :parse) do |xml_text|
|
19
|
+
extended_clazz.parse(xml_text)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
module LightWeightSaxMachine
|
25
|
+
|
26
|
+
attr_writer :sax_config
|
27
|
+
|
28
|
+
def sax_config
|
29
|
+
@sax_config ||= SAXConfig.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def inherited(subclass)
|
33
|
+
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.send(:include, InstanceMethods)
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(xml_text, on_error = nil, on_warning = nil)
|
11
|
+
sax_handler = SAXHandler.new(self, on_error, on_warning)
|
12
|
+
parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
|
13
|
+
parser.parse(xml_text) do |ctx|
|
14
|
+
ctx.replace_entities = true
|
15
|
+
end
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
module InstanceMethods
|
20
|
+
def initialize(attributes = {})
|
21
|
+
attributes.each do |name, value|
|
22
|
+
send("#{name}=", value)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module ClassMethods
|
28
|
+
|
29
|
+
def inherited(subclass)
|
30
|
+
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse(xml_text, on_error = nil, on_warning = nil)
|
34
|
+
new.parse(xml_text, on_error, on_warning)
|
35
|
+
end
|
36
|
+
|
37
|
+
def element(name, options = {})
|
38
|
+
real_name = (options[:as] ||= name).to_s
|
39
|
+
sax_config.add_top_level_element(name, options)
|
40
|
+
create_attr real_name
|
41
|
+
end
|
42
|
+
|
43
|
+
def attribute(name, options = {})
|
44
|
+
real_name = (options[:as] ||= name).to_s
|
45
|
+
sax_config.add_top_level_attribute(self.class.to_s, options.merge(:name => name))
|
46
|
+
create_attr real_name
|
47
|
+
end
|
48
|
+
|
49
|
+
def value(name, options = {})
|
50
|
+
real_name = (options[:as] ||= name).to_s
|
51
|
+
sax_config.add_top_level_element_value(self.class.to_s, options.merge(:name => name))
|
52
|
+
create_attr real_name
|
53
|
+
end
|
54
|
+
|
55
|
+
def ancestor(name, options = {})
|
56
|
+
real_name = (options[:as] ||= name).to_s
|
57
|
+
sax_config.add_ancestor(name, options)
|
58
|
+
create_attr(real_name)
|
59
|
+
end
|
60
|
+
|
61
|
+
def columns
|
62
|
+
sax_config.columns
|
63
|
+
end
|
64
|
+
|
65
|
+
def column(sym)
|
66
|
+
columns.select { |c| c.column == sym }[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
def data_class(sym)
|
70
|
+
column(sym).data_class
|
71
|
+
end
|
72
|
+
|
73
|
+
def required?(sym)
|
74
|
+
column(sym).required?
|
75
|
+
end
|
76
|
+
|
77
|
+
def column_names
|
78
|
+
columns.map { |e| e.column }
|
79
|
+
end
|
80
|
+
|
81
|
+
def elements(name, options = {})
|
82
|
+
options[:as] ||= name
|
83
|
+
if options[:class]
|
84
|
+
sax_config.add_collection_element(name, options)
|
85
|
+
else
|
86
|
+
class_eval <<-SRC
|
87
|
+
def add_#{options[:as]}(value)
|
88
|
+
#{options[:as]} << value
|
89
|
+
end
|
90
|
+
SRC
|
91
|
+
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
92
|
+
end
|
93
|
+
|
94
|
+
if !method_defined?(options[:as].to_s)
|
95
|
+
class_eval <<-SRC
|
96
|
+
def #{options[:as]}
|
97
|
+
@#{options[:as]} ||= []
|
98
|
+
end
|
99
|
+
SRC
|
100
|
+
end
|
101
|
+
|
102
|
+
attr_writer options[:as] unless method_defined?("#{options[:as]}=")
|
103
|
+
end
|
104
|
+
|
105
|
+
def sax_config
|
106
|
+
@sax_config ||= SAXConfig.new
|
107
|
+
end
|
108
|
+
|
109
|
+
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
110
|
+
# this is how we allow custom parsing behavior. So you could define the setter
|
111
|
+
# and have it parse the string into a date or whatever.
|
112
|
+
def create_attr real_name
|
113
|
+
attr_reader real_name unless method_defined?(real_name)
|
114
|
+
attr_writer real_name unless method_defined?("#{real_name}=")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|