sax-machine-patched 0.2.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/Gemfile +9 -0
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/README.md +107 -0
- data/Rakefile +6 -0
- data/lib/sax-machine.rb +8 -0
- data/lib/sax-machine/sax_ancestor_config.rb +21 -0
- data/lib/sax-machine/sax_attribute_config.rb +40 -0
- data/lib/sax-machine/sax_collection_config.rb +37 -0
- data/lib/sax-machine/sax_config.rb +74 -0
- data/lib/sax-machine/sax_configure.rb +38 -0
- data/lib/sax-machine/sax_document.rb +118 -0
- data/lib/sax-machine/sax_element_config.rb +65 -0
- data/lib/sax-machine/sax_element_value_config.rb +24 -0
- data/lib/sax-machine/sax_handler.rb +192 -0
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +24 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +53 -0
- data/spec/sax-machine/include_sax_machine_spec.rb +42 -0
- data/spec/sax-machine/sax_document_spec.rb +838 -0
- data/spec/spec_helper.rb +15 -0
- metadata +115 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class ElementConfig
|
5
|
+
attr_reader :name, :setter, :data_class, :collection
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@with = options.fetch(:with, {})
|
10
|
+
|
11
|
+
if options.has_key?(:value)
|
12
|
+
@value = options[:value].to_s
|
13
|
+
else
|
14
|
+
@value = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
@as = options[:as]
|
18
|
+
@collection = options[:collection]
|
19
|
+
|
20
|
+
if @collection
|
21
|
+
@setter = "add_#{options[:as]}"
|
22
|
+
else
|
23
|
+
@setter = "#{@as}="
|
24
|
+
end
|
25
|
+
@data_class = options[:class]
|
26
|
+
@required = options[:required]
|
27
|
+
end
|
28
|
+
|
29
|
+
def value_configured?
|
30
|
+
!@value.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
"name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def column
|
38
|
+
@as || @name.to_sym
|
39
|
+
end
|
40
|
+
|
41
|
+
def required?
|
42
|
+
@required
|
43
|
+
end
|
44
|
+
|
45
|
+
def value_from_attrs(attrs)
|
46
|
+
attrs.fetch(@value, nil)
|
47
|
+
end
|
48
|
+
|
49
|
+
def attrs_match?(attrs)
|
50
|
+
@with.all? do |key, value|
|
51
|
+
value === attrs[key.to_s]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def has_value_and_attrs_match?(attrs)
|
56
|
+
!@value.nil? && attrs_match?(attrs)
|
57
|
+
end
|
58
|
+
|
59
|
+
def collection?
|
60
|
+
@collection
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class ElementValueConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@as = options[:as]
|
10
|
+
@setter = "#{@as}="
|
11
|
+
@required = options[:required]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
@required
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXHandler < Nokogiri::XML::SAX::Document
|
6
|
+
NO_BUFFER = :no_buffer
|
7
|
+
|
8
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
9
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
10
|
+
self.object = object
|
11
|
+
self.config = config
|
12
|
+
self.buffer = buffer
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(object, on_error = nil, on_warning = nil)
|
17
|
+
@stack = [ StackNode.new(object) ]
|
18
|
+
@parsed_configs = {}
|
19
|
+
@on_error = on_error
|
20
|
+
@on_warning = on_warning
|
21
|
+
end
|
22
|
+
|
23
|
+
def characters(data)
|
24
|
+
node = stack.last
|
25
|
+
|
26
|
+
if node.buffer == NO_BUFFER
|
27
|
+
node.buffer = data.dup
|
28
|
+
else
|
29
|
+
node.buffer << data
|
30
|
+
end
|
31
|
+
end
|
32
|
+
alias cdata_block characters
|
33
|
+
|
34
|
+
def start_element(name, attrs = [])
|
35
|
+
|
36
|
+
name = normalize_name(name)
|
37
|
+
node = stack.last
|
38
|
+
object = node.object
|
39
|
+
|
40
|
+
sax_config = sax_config_for(object)
|
41
|
+
|
42
|
+
if sax_config
|
43
|
+
attrs = Hash[attrs]
|
44
|
+
|
45
|
+
if collection_config = sax_config.collection_config(name, attrs)
|
46
|
+
object = collection_config.data_class.new
|
47
|
+
sax_config = sax_config_for(object)
|
48
|
+
|
49
|
+
stack.push(StackNode.new(object, collection_config))
|
50
|
+
|
51
|
+
set_attributes_on(object, attrs)
|
52
|
+
end
|
53
|
+
|
54
|
+
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
55
|
+
unless parsed_config?(object, ec)
|
56
|
+
object.send(ec.setter, ec.value_from_attrs(attrs))
|
57
|
+
mark_as_parsed(object, ec)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
62
|
+
new_object =
|
63
|
+
case element_config.data_class.to_s
|
64
|
+
when 'Integer' then 0
|
65
|
+
when 'Float' then 0.0
|
66
|
+
when 'Time' then Time.at(0)
|
67
|
+
when '' then object
|
68
|
+
else
|
69
|
+
element_config.data_class.new
|
70
|
+
end
|
71
|
+
|
72
|
+
stack.push(StackNode.new(new_object, element_config))
|
73
|
+
|
74
|
+
set_attributes_on(new_object, attrs)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def end_element(name)
|
80
|
+
name = normalize_name(name)
|
81
|
+
|
82
|
+
start_tag = stack[-2]
|
83
|
+
close_tag = stack[-1]
|
84
|
+
|
85
|
+
return unless start_tag && close_tag
|
86
|
+
|
87
|
+
object = start_tag.object
|
88
|
+
element = close_tag.object
|
89
|
+
config = close_tag.config
|
90
|
+
value = close_tag.buffer
|
91
|
+
|
92
|
+
return unless config.name == name
|
93
|
+
|
94
|
+
unless parsed_config?(object, config)
|
95
|
+
if (element_value_config = element_values_for(config))
|
96
|
+
element_value_config.each { |evc| element.send(evc.setter, value) }
|
97
|
+
end
|
98
|
+
|
99
|
+
if config.respond_to?(:accessor)
|
100
|
+
subconfig = sax_config_for(element)
|
101
|
+
|
102
|
+
if econf = subconfig.element_config_for_tag(name, [])
|
103
|
+
element.send(econf.setter, value) unless econf.value_configured?
|
104
|
+
end
|
105
|
+
|
106
|
+
object.send(config.accessor) << element
|
107
|
+
else
|
108
|
+
value =
|
109
|
+
case config.data_class.to_s
|
110
|
+
when 'String' then value.to_s
|
111
|
+
when 'Integer' then value.to_i
|
112
|
+
when 'Float' then value.to_f
|
113
|
+
# Assumes that time elements will be string-based and are not
|
114
|
+
# something else, e.g. seconds since epoch
|
115
|
+
when 'Time' then Time.parse(value.to_s)
|
116
|
+
when '' then value
|
117
|
+
else
|
118
|
+
element
|
119
|
+
end
|
120
|
+
|
121
|
+
object.send(config.setter, value) unless value == NO_BUFFER
|
122
|
+
|
123
|
+
mark_as_parsed(object, config)
|
124
|
+
end
|
125
|
+
|
126
|
+
# try to set the ancestor
|
127
|
+
if (sax_config = sax_config_for(element))
|
128
|
+
sax_config.ancestors.each do |ancestor|
|
129
|
+
element.send(ancestor.setter, object)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
stack.pop
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def mark_as_parsed(object, element_config)
|
140
|
+
unless element_config.collection?
|
141
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def parsed_config?(object, element_config)
|
146
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
147
|
+
end
|
148
|
+
|
149
|
+
def warning(string)
|
150
|
+
if @on_warning
|
151
|
+
@on_warning.call(string)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def error(string)
|
156
|
+
if @on_error
|
157
|
+
@on_error.call(string)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
def sax_config_for(object)
|
163
|
+
if object.class.respond_to?(:sax_config)
|
164
|
+
object.class.sax_config
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def element_values_for(config)
|
169
|
+
if config.data_class.respond_to?(:sax_config)
|
170
|
+
config.data_class.sax_config.element_values_for_element
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def normalize_name(name)
|
175
|
+
name.gsub(/\-/, '_')
|
176
|
+
end
|
177
|
+
|
178
|
+
def set_attributes_on(object, attributes)
|
179
|
+
config = sax_config_for(object)
|
180
|
+
|
181
|
+
if config
|
182
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
183
|
+
object.send(ac.setter, ac.value_from_attrs(attributes))
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def stack
|
189
|
+
@stack
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/sax-machine.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/sax-machine/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'sax-machine-patched'
|
6
|
+
s.version = SAXMachine::VERSION
|
7
|
+
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
|
9
|
+
s.email = %q{paul@pauldix.net}
|
10
|
+
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
11
|
+
|
12
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri Patch release}
|
13
|
+
|
14
|
+
s.license = %q{MIT}
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.platform = Gem::Platform::RUBY
|
21
|
+
|
22
|
+
s.add_dependency 'nokogiri', '1.6.0'
|
23
|
+
s.add_development_dependency "rspec", "~> 2.13.0"
|
24
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
|
3
|
+
<OperationRequest>
|
4
|
+
<HTTPHeaders>
|
5
|
+
<Header Name="UserAgent">
|
6
|
+
</Header>
|
7
|
+
</HTTPHeaders>
|
8
|
+
<RequestId>16WRJBVEM155Q026KCV1</RequestId>
|
9
|
+
<Arguments>
|
10
|
+
<Argument Name="SearchIndex" Value="Books"></Argument>
|
11
|
+
<Argument Name="Service" Value="AWSECommerceService"></Argument>
|
12
|
+
<Argument Name="Title" Value="Ruby on Rails"></Argument>
|
13
|
+
<Argument Name="Operation" Value="ItemSearch"></Argument>
|
14
|
+
<Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
|
15
|
+
</Arguments>
|
16
|
+
<RequestProcessingTime>0.064924955368042</RequestProcessingTime>
|
17
|
+
</OperationRequest>
|
18
|
+
<Items>
|
19
|
+
<Request>
|
20
|
+
<IsValid>True</IsValid>
|
21
|
+
<ItemSearchRequest>
|
22
|
+
<SearchIndex>Books</SearchIndex>
|
23
|
+
<Title>Ruby on Rails</Title>
|
24
|
+
</ItemSearchRequest>
|
25
|
+
</Request>
|
26
|
+
<TotalResults>22</TotalResults>
|
27
|
+
<TotalPages>3</TotalPages>
|
28
|
+
<Item>
|
29
|
+
<ASIN>0321480791</ASIN>
|
30
|
+
<DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
|
31
|
+
<ItemAttributes>
|
32
|
+
<Author>Michael Hartl</Author>
|
33
|
+
<Author>Aurelius Prochazka</Author>
|
34
|
+
<Manufacturer>Addison-Wesley Professional</Manufacturer>
|
35
|
+
<ProductGroup>Book</ProductGroup>
|
36
|
+
<Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
|
37
|
+
</ItemAttributes>
|
38
|
+
</Item>
|
39
|
+
</Items>
|
40
|
+
</ItemSearchResponse>
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'benchmark'
|
3
|
+
require 'happymapper'
|
4
|
+
require 'sax-machine'
|
5
|
+
require 'rfeedparser'
|
6
|
+
include Benchmark
|
7
|
+
benchmark_iterations = 100
|
8
|
+
|
9
|
+
module Feedzirra
|
10
|
+
class AtomEntry
|
11
|
+
include SAXMachine
|
12
|
+
element :title
|
13
|
+
element :name, :as => :author
|
14
|
+
element "feedburner:origLink", :as => :url
|
15
|
+
element :summary
|
16
|
+
element :content
|
17
|
+
element :published
|
18
|
+
end
|
19
|
+
|
20
|
+
# Class for parsing Atom feeds
|
21
|
+
class Atom
|
22
|
+
include SAXMachine
|
23
|
+
element :title
|
24
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
25
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
26
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
27
|
+
end
|
28
|
+
end
|
29
|
+
feed_text = File.read("spec/sax-machine/atom.xml")
|
30
|
+
|
31
|
+
benchmark do |t|
|
32
|
+
t.report("feedzirra") do
|
33
|
+
benchmark_iterations.times {
|
34
|
+
Feedzirra::Atom.new.parse(feed_text)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
t.report("rfeedparser") do
|
39
|
+
benchmark_iterations.times {
|
40
|
+
FeedParser.parse(feed_text)
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# class AtomEntry
|
46
|
+
# include SAXMachine
|
47
|
+
# element :title
|
48
|
+
# element :name, :as => :author
|
49
|
+
# element :summary
|
50
|
+
# end
|
51
|
+
# class Atom
|
52
|
+
# include SAXMachine
|
53
|
+
# element :title
|
54
|
+
# elements :entry, :as => :entries, :class => AtomEntry
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# class Entry
|
58
|
+
# include HappyMapper
|
59
|
+
# element :title, String
|
60
|
+
# element :name, String
|
61
|
+
# element :summary, String
|
62
|
+
# end
|
63
|
+
# class Feed
|
64
|
+
# include HappyMapper
|
65
|
+
# element :title, String
|
66
|
+
# has_many :entry, Entry
|
67
|
+
# end
|
68
|
+
# feed_text = File.read("spec/sax-machine/atom.xml")
|
69
|
+
#
|
70
|
+
# benchmark do |t|
|
71
|
+
# t.report("sax-machine") do
|
72
|
+
# benchmark_iterations.times {
|
73
|
+
# Atom.new.parse(feed_text)
|
74
|
+
# }
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# t.report("happymapper") do
|
78
|
+
# benchmark_iterations.times {
|
79
|
+
# Feed.parse(feed_text)
|
80
|
+
# }
|
81
|
+
# end
|
82
|
+
# end
|
83
|
+
|
84
|
+
# xml = File.read("spec/benchmarks/public_timeline.xml")
|
85
|
+
# class Status
|
86
|
+
# include HappyMapper
|
87
|
+
#
|
88
|
+
# element :text, String
|
89
|
+
# element :source, String
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# class Statuses
|
93
|
+
# include SAXMachine
|
94
|
+
#
|
95
|
+
# elements :status, {:as => :statuses, :class => Class.new do
|
96
|
+
# include SAXMachine
|
97
|
+
# element :text
|
98
|
+
# element :source
|
99
|
+
# end}
|
100
|
+
# end
|
101
|
+
#
|
102
|
+
# benchmark do |t|
|
103
|
+
# t.report("happy mapper") do
|
104
|
+
# benchmark_iterations.times {
|
105
|
+
# Status.parse(xml)
|
106
|
+
# }
|
107
|
+
# end
|
108
|
+
#
|
109
|
+
# t.report("sax-machine") do
|
110
|
+
# benchmark_iterations.times {
|
111
|
+
# Statuses.parse(xml)
|
112
|
+
# }
|
113
|
+
# end
|
114
|
+
# end
|
115
|
+
|
116
|
+
# xml = File.read("spec/benchmarks/amazon.xml")
|
117
|
+
# class HItem
|
118
|
+
# include HappyMapper
|
119
|
+
#
|
120
|
+
# tag 'Item' # if you put class in module you need tag
|
121
|
+
# element :asin, String, :tag => 'ASIN'
|
122
|
+
# element :detail_page_url, String, :tag => 'DetailPageURL'
|
123
|
+
# element :manufacturer, String, :tag => 'Manufacturer', :deep => true
|
124
|
+
# end
|
125
|
+
# class HItems
|
126
|
+
# include HappyMapper
|
127
|
+
#
|
128
|
+
# tag 'Items' # if you put class in module you need tag
|
129
|
+
# # element :total_results, Integer, :tag => 'TotalResults'
|
130
|
+
# # element :total_pages, Integer, :tag => 'TotalPages'
|
131
|
+
# has_many :items, Item
|
132
|
+
# end
|
133
|
+
#
|
134
|
+
# class Item
|
135
|
+
# include SAXMachine
|
136
|
+
#
|
137
|
+
# element :ASIN, :as => :asin
|
138
|
+
# element :DetailPageUrl, :as => :detail_page_url
|
139
|
+
# element :Manufacturer, :as => :manufacturer
|
140
|
+
# end
|
141
|
+
# class Items
|
142
|
+
# include SAXMachine
|
143
|
+
# elements :Item, :as => :items
|
144
|
+
# end
|
145
|
+
#
|
146
|
+
# benchmark do |t|
|
147
|
+
# t.report("sax-machine") do
|
148
|
+
# benchmark_iterations.times {
|
149
|
+
# Items.new.parse(xml)
|
150
|
+
# }
|
151
|
+
# end
|
152
|
+
#
|
153
|
+
# t.report("happymapper") do
|
154
|
+
# benchmark_iterations.times {
|
155
|
+
# HItems.parse(xml)
|
156
|
+
# }
|
157
|
+
# end
|
158
|
+
# end
|