sax-machine-patched 0.2.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/Gemfile +9 -0
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/README.md +107 -0
- data/Rakefile +6 -0
- data/lib/sax-machine.rb +8 -0
- data/lib/sax-machine/sax_ancestor_config.rb +21 -0
- data/lib/sax-machine/sax_attribute_config.rb +40 -0
- data/lib/sax-machine/sax_collection_config.rb +37 -0
- data/lib/sax-machine/sax_config.rb +74 -0
- data/lib/sax-machine/sax_configure.rb +38 -0
- data/lib/sax-machine/sax_document.rb +118 -0
- data/lib/sax-machine/sax_element_config.rb +65 -0
- data/lib/sax-machine/sax_element_value_config.rb +24 -0
- data/lib/sax-machine/sax_handler.rb +192 -0
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +24 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +53 -0
- data/spec/sax-machine/include_sax_machine_spec.rb +42 -0
- data/spec/sax-machine/sax_document_spec.rb +838 -0
- data/spec/spec_helper.rb +15 -0
- metadata +115 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
data/HISTORY.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# 0.2.0.rc1
|
2
|
+
* Tried to reduce the number of instances of respond_to? in the code by
|
3
|
+
pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
4
|
+
* The parse stack is now composed of simple objects instead of it being
|
5
|
+
an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
6
|
+
* Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
7
|
+
* Cleaned up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
8
|
+
* Encapsulated stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
9
|
+
* #cdata_block is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
10
|
+
|
11
|
+
# 0.1.0
|
12
|
+
* rename parent to ancestor
|
13
|
+
* added SAXMachine.configure
|
data/README.md
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
# SAX Machine [](http://travis-ci.org/pauldix/sax-machine)
|
2
|
+
|
3
|
+
[Wiki](https://github.com/pauldix/sax-machine/wiki)
|
4
|
+
|
5
|
+
## Description
|
6
|
+
|
7
|
+
A declarative SAX parsing library backed by Nokogiri
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
```ruby
|
11
|
+
require 'sax-machine'
|
12
|
+
|
13
|
+
# Class for information associated with content parts in a feed.
|
14
|
+
# Ex: <content type="text">sample</content>
|
15
|
+
# instance.type will be "text", instance.text will be "sample"
|
16
|
+
class AtomContent
|
17
|
+
include SAXMachine
|
18
|
+
attribute :type
|
19
|
+
value :text
|
20
|
+
end
|
21
|
+
|
22
|
+
# Class for parsing an atom entry out of a feedburner atom feed
|
23
|
+
class AtomEntry
|
24
|
+
include SAXMachine
|
25
|
+
element :title
|
26
|
+
# the :as argument makes this available through atom_entry.author instead of .name
|
27
|
+
element :name, :as => :author
|
28
|
+
element "feedburner:origLink", :as => :url
|
29
|
+
element :summary
|
30
|
+
element :content, :class => AtomContent
|
31
|
+
element :published
|
32
|
+
ancestor :ancestor
|
33
|
+
end
|
34
|
+
|
35
|
+
# Class for parsing Atom feeds
|
36
|
+
class Atom
|
37
|
+
include SAXMachine
|
38
|
+
element :title
|
39
|
+
# the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
|
40
|
+
# the :value argument means that instead of setting the value to the text between the tag,
|
41
|
+
# it sets it to the attribute value of :href
|
42
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
43
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
44
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
45
|
+
end
|
46
|
+
|
47
|
+
# you can then parse like this
|
48
|
+
feed = Atom.parse(xml_text)
|
49
|
+
# then you're ready to rock
|
50
|
+
feed.title # => whatever the title of the blog is
|
51
|
+
feed.url # => the main url of the blog
|
52
|
+
feed.feed_url # => goes to the feedburner feed
|
53
|
+
|
54
|
+
feed.entries.first.title # => title of the first entry
|
55
|
+
feed.entries.first.author # => the author of the first entry
|
56
|
+
feed.entries.first.url # => the permalink on the blog for this entry
|
57
|
+
feed.entries.first.ancestor # => the Atom ancestor
|
58
|
+
# etc ...
|
59
|
+
|
60
|
+
# you can also use the elements method without specifying a class like so
|
61
|
+
class SomeServiceResponse
|
62
|
+
include SAXMachine
|
63
|
+
elements :message, :as => :messages
|
64
|
+
end
|
65
|
+
|
66
|
+
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
67
|
+
response.messages.first # => "hi"
|
68
|
+
response.messages.last # => "world"
|
69
|
+
|
70
|
+
# To limit conflicts in the class used for mappping, you can use the alternate SAXMachine.configure syntax
|
71
|
+
|
72
|
+
class X < ActiveRecord::Base
|
73
|
+
# this way no element, elements or ancestor method will be added to X
|
74
|
+
SAXMachine.configure(X) do |c|
|
75
|
+
c.element :title
|
76
|
+
end
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
## LICENSE
|
81
|
+
|
82
|
+
The MIT License
|
83
|
+
|
84
|
+
Copyright (c) 2009-2012:
|
85
|
+
|
86
|
+
* [Paul Dix](http://www.pauldix.net)
|
87
|
+
* [Julien Kirch](http://www.archiloque.net)
|
88
|
+
* [Ezekiel Templin](http://zeke.templ.in)
|
89
|
+
|
90
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
91
|
+
a copy of this software and associated documentation files (the
|
92
|
+
'Software'), to deal in the Software without restriction, including
|
93
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
94
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
95
|
+
permit persons to whom the Software is furnished to do so, subject to
|
96
|
+
the following conditions:
|
97
|
+
|
98
|
+
The above copyright notice and this permission notice shall be
|
99
|
+
included in all copies or substantial portions of the Software.
|
100
|
+
|
101
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
102
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
103
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
104
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
105
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
106
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
107
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/lib/sax-machine.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class AncestorConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
|
10
|
+
@as = options[:as]
|
11
|
+
@setter = "#{@as}="
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class AttributeConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@as = options[:as]
|
10
|
+
@setter = "#{@as}="
|
11
|
+
@required = options[:required]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
@required
|
20
|
+
end
|
21
|
+
|
22
|
+
def value_from_attrs(attrs)
|
23
|
+
attrs.fetch(@name, nil)
|
24
|
+
end
|
25
|
+
|
26
|
+
def attrs_match?(attrs)
|
27
|
+
attrs.key?(@name) || attrs.value?(@name)
|
28
|
+
end
|
29
|
+
|
30
|
+
def has_value_and_attrs_match?(attrs)
|
31
|
+
attrs_match?(attrs)
|
32
|
+
end
|
33
|
+
|
34
|
+
def collection?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class CollectionConfig
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@class = options[:class]
|
10
|
+
@as = options[:as].to_s
|
11
|
+
@with = options.fetch(:with, {})
|
12
|
+
end
|
13
|
+
|
14
|
+
def accessor
|
15
|
+
as
|
16
|
+
end
|
17
|
+
|
18
|
+
def attrs_match?(attrs)
|
19
|
+
@with.all? do |key, value|
|
20
|
+
value === attrs[key.to_s]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def data_class
|
25
|
+
@class || @name
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
|
30
|
+
def as
|
31
|
+
@as
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require "sax-machine/sax_attribute_config"
|
2
|
+
require "sax-machine/sax_element_value_config"
|
3
|
+
require "sax-machine/sax_element_config"
|
4
|
+
require "sax-machine/sax_collection_config"
|
5
|
+
require "sax-machine/sax_ancestor_config"
|
6
|
+
|
7
|
+
module SAXMachine
|
8
|
+
class SAXConfig
|
9
|
+
|
10
|
+
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
# Default value is an empty array
|
14
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
15
|
+
@top_level_attributes = []
|
16
|
+
@top_level_element_value = []
|
17
|
+
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
18
|
+
@ancestors = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def columns
|
22
|
+
@top_level_elements.map {|name, ecs| ecs }.flatten
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize_copy(sax_config)
|
26
|
+
super
|
27
|
+
@top_level_elements = sax_config.top_level_elements.clone
|
28
|
+
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
|
+
@top_level_element_value = sax_config.top_level_element_value.clone
|
30
|
+
@collection_elements = sax_config.collection_elements.clone
|
31
|
+
@ancestors = sax_config.ancestors.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_top_level_element(name, options)
|
35
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_top_level_attribute(name, options)
|
39
|
+
@top_level_attributes << AttributeConfig.new(options.delete(:name), options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_top_level_element_value(name, options)
|
43
|
+
@top_level_element_value << ElementValueConfig.new(options.delete(:name), options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_collection_element(name, options)
|
47
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_ancestor(name, options)
|
51
|
+
@ancestors << AncestorConfig.new(name, options)
|
52
|
+
end
|
53
|
+
|
54
|
+
def collection_config(name, attrs)
|
55
|
+
@collection_elements[name.to_s].detect { |cc| cc.attrs_match?(attrs) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def attribute_configs_for_element(attrs)
|
59
|
+
@top_level_attributes.select { |aa| aa.attrs_match?(attrs) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def element_values_for_element
|
63
|
+
@top_level_element_value
|
64
|
+
end
|
65
|
+
|
66
|
+
def element_configs_for_attribute(name, attrs)
|
67
|
+
@top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) }
|
68
|
+
end
|
69
|
+
|
70
|
+
def element_config_for_tag(name, attrs)
|
71
|
+
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
|
3
|
+
def self.configure(clazz)
|
4
|
+
extended_clazz = Class.new(clazz)
|
5
|
+
extended_clazz.send(:include, SAXMachine)
|
6
|
+
|
7
|
+
# override create_attr to create attributes on the original class
|
8
|
+
def extended_clazz.create_attr real_name
|
9
|
+
superclass.send(:attr_reader, real_name) unless superclass.method_defined?(real_name)
|
10
|
+
superclass.send(:attr_writer, real_name) unless superclass.method_defined?("#{real_name}=")
|
11
|
+
end
|
12
|
+
|
13
|
+
yield(extended_clazz)
|
14
|
+
|
15
|
+
clazz.extend LightWeightSaxMachine
|
16
|
+
clazz.sax_config = extended_clazz.sax_config
|
17
|
+
|
18
|
+
(class << clazz;self;end).send(:define_method, :parse) do |xml_text|
|
19
|
+
extended_clazz.parse(xml_text)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
module LightWeightSaxMachine
|
25
|
+
|
26
|
+
attr_writer :sax_config
|
27
|
+
|
28
|
+
def sax_config
|
29
|
+
@sax_config ||= SAXConfig.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def inherited(subclass)
|
33
|
+
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.send(:include, InstanceMethods)
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(xml_text, on_error = nil, on_warning = nil)
|
11
|
+
sax_handler = SAXHandler.new(self, on_error, on_warning)
|
12
|
+
parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
|
13
|
+
parser.parse(xml_text) do |ctx|
|
14
|
+
ctx.replace_entities = true
|
15
|
+
end
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
module InstanceMethods
|
20
|
+
def initialize(attributes = {})
|
21
|
+
attributes.each do |name, value|
|
22
|
+
send("#{name}=", value)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module ClassMethods
|
28
|
+
|
29
|
+
def inherited(subclass)
|
30
|
+
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse(xml_text, on_error = nil, on_warning = nil)
|
34
|
+
new.parse(xml_text, on_error, on_warning)
|
35
|
+
end
|
36
|
+
|
37
|
+
def element(name, options = {})
|
38
|
+
real_name = (options[:as] ||= name).to_s
|
39
|
+
sax_config.add_top_level_element(name, options)
|
40
|
+
create_attr real_name
|
41
|
+
end
|
42
|
+
|
43
|
+
def attribute(name, options = {})
|
44
|
+
real_name = (options[:as] ||= name).to_s
|
45
|
+
sax_config.add_top_level_attribute(self.class.to_s, options.merge(:name => name))
|
46
|
+
create_attr real_name
|
47
|
+
end
|
48
|
+
|
49
|
+
def value(name, options = {})
|
50
|
+
real_name = (options[:as] ||= name).to_s
|
51
|
+
sax_config.add_top_level_element_value(self.class.to_s, options.merge(:name => name))
|
52
|
+
create_attr real_name
|
53
|
+
end
|
54
|
+
|
55
|
+
def ancestor(name, options = {})
|
56
|
+
real_name = (options[:as] ||= name).to_s
|
57
|
+
sax_config.add_ancestor(name, options)
|
58
|
+
create_attr(real_name)
|
59
|
+
end
|
60
|
+
|
61
|
+
def columns
|
62
|
+
sax_config.columns
|
63
|
+
end
|
64
|
+
|
65
|
+
def column(sym)
|
66
|
+
columns.select { |c| c.column == sym }[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
def data_class(sym)
|
70
|
+
column(sym).data_class
|
71
|
+
end
|
72
|
+
|
73
|
+
def required?(sym)
|
74
|
+
column(sym).required?
|
75
|
+
end
|
76
|
+
|
77
|
+
def column_names
|
78
|
+
columns.map { |e| e.column }
|
79
|
+
end
|
80
|
+
|
81
|
+
def elements(name, options = {})
|
82
|
+
options[:as] ||= name
|
83
|
+
if options[:class]
|
84
|
+
sax_config.add_collection_element(name, options)
|
85
|
+
else
|
86
|
+
class_eval <<-SRC
|
87
|
+
def add_#{options[:as]}(value)
|
88
|
+
#{options[:as]} << value
|
89
|
+
end
|
90
|
+
SRC
|
91
|
+
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
92
|
+
end
|
93
|
+
|
94
|
+
if !method_defined?(options[:as].to_s)
|
95
|
+
class_eval <<-SRC
|
96
|
+
def #{options[:as]}
|
97
|
+
@#{options[:as]} ||= []
|
98
|
+
end
|
99
|
+
SRC
|
100
|
+
end
|
101
|
+
|
102
|
+
attr_writer options[:as] unless method_defined?("#{options[:as]}=")
|
103
|
+
end
|
104
|
+
|
105
|
+
def sax_config
|
106
|
+
@sax_config ||= SAXConfig.new
|
107
|
+
end
|
108
|
+
|
109
|
+
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
110
|
+
# this is how we allow custom parsing behavior. So you could define the setter
|
111
|
+
# and have it parse the string into a date or whatever.
|
112
|
+
def create_attr real_name
|
113
|
+
attr_reader real_name unless method_defined?(real_name)
|
114
|
+
attr_writer real_name unless method_defined?("#{real_name}=")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|