sax-machine-patched 0.2.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +12 -0
- data/Gemfile +9 -0
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/README.md +107 -0
- data/Rakefile +6 -0
- data/lib/sax-machine.rb +8 -0
- data/lib/sax-machine/sax_ancestor_config.rb +21 -0
- data/lib/sax-machine/sax_attribute_config.rb +40 -0
- data/lib/sax-machine/sax_collection_config.rb +37 -0
- data/lib/sax-machine/sax_config.rb +74 -0
- data/lib/sax-machine/sax_configure.rb +38 -0
- data/lib/sax-machine/sax_document.rb +118 -0
- data/lib/sax-machine/sax_element_config.rb +65 -0
- data/lib/sax-machine/sax_element_value_config.rb +24 -0
- data/lib/sax-machine/sax_handler.rb +192 -0
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +24 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +53 -0
- data/spec/sax-machine/include_sax_machine_spec.rb +42 -0
- data/spec/sax-machine/sax_document_spec.rb +838 -0
- data/spec/spec_helper.rb +15 -0
- metadata +115 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class ElementConfig
|
5
|
+
attr_reader :name, :setter, :data_class, :collection
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@with = options.fetch(:with, {})
|
10
|
+
|
11
|
+
if options.has_key?(:value)
|
12
|
+
@value = options[:value].to_s
|
13
|
+
else
|
14
|
+
@value = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
@as = options[:as]
|
18
|
+
@collection = options[:collection]
|
19
|
+
|
20
|
+
if @collection
|
21
|
+
@setter = "add_#{options[:as]}"
|
22
|
+
else
|
23
|
+
@setter = "#{@as}="
|
24
|
+
end
|
25
|
+
@data_class = options[:class]
|
26
|
+
@required = options[:required]
|
27
|
+
end
|
28
|
+
|
29
|
+
def value_configured?
|
30
|
+
!@value.nil?
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
"name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def column
|
38
|
+
@as || @name.to_sym
|
39
|
+
end
|
40
|
+
|
41
|
+
def required?
|
42
|
+
@required
|
43
|
+
end
|
44
|
+
|
45
|
+
def value_from_attrs(attrs)
|
46
|
+
attrs.fetch(@value, nil)
|
47
|
+
end
|
48
|
+
|
49
|
+
def attrs_match?(attrs)
|
50
|
+
@with.all? do |key, value|
|
51
|
+
value === attrs[key.to_s]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def has_value_and_attrs_match?(attrs)
|
56
|
+
!@value.nil? && attrs_match?(attrs)
|
57
|
+
end
|
58
|
+
|
59
|
+
def collection?
|
60
|
+
@collection
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class ElementValueConfig
|
5
|
+
attr_reader :name, :setter
|
6
|
+
|
7
|
+
def initialize(name, options)
|
8
|
+
@name = name.to_s
|
9
|
+
@as = options[:as]
|
10
|
+
@setter = "#{@as}="
|
11
|
+
@required = options[:required]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
@required
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXHandler < Nokogiri::XML::SAX::Document
|
6
|
+
NO_BUFFER = :no_buffer
|
7
|
+
|
8
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
9
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
10
|
+
self.object = object
|
11
|
+
self.config = config
|
12
|
+
self.buffer = buffer
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(object, on_error = nil, on_warning = nil)
|
17
|
+
@stack = [ StackNode.new(object) ]
|
18
|
+
@parsed_configs = {}
|
19
|
+
@on_error = on_error
|
20
|
+
@on_warning = on_warning
|
21
|
+
end
|
22
|
+
|
23
|
+
def characters(data)
|
24
|
+
node = stack.last
|
25
|
+
|
26
|
+
if node.buffer == NO_BUFFER
|
27
|
+
node.buffer = data.dup
|
28
|
+
else
|
29
|
+
node.buffer << data
|
30
|
+
end
|
31
|
+
end
|
32
|
+
alias cdata_block characters
|
33
|
+
|
34
|
+
def start_element(name, attrs = [])
|
35
|
+
|
36
|
+
name = normalize_name(name)
|
37
|
+
node = stack.last
|
38
|
+
object = node.object
|
39
|
+
|
40
|
+
sax_config = sax_config_for(object)
|
41
|
+
|
42
|
+
if sax_config
|
43
|
+
attrs = Hash[attrs]
|
44
|
+
|
45
|
+
if collection_config = sax_config.collection_config(name, attrs)
|
46
|
+
object = collection_config.data_class.new
|
47
|
+
sax_config = sax_config_for(object)
|
48
|
+
|
49
|
+
stack.push(StackNode.new(object, collection_config))
|
50
|
+
|
51
|
+
set_attributes_on(object, attrs)
|
52
|
+
end
|
53
|
+
|
54
|
+
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
55
|
+
unless parsed_config?(object, ec)
|
56
|
+
object.send(ec.setter, ec.value_from_attrs(attrs))
|
57
|
+
mark_as_parsed(object, ec)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
62
|
+
new_object =
|
63
|
+
case element_config.data_class.to_s
|
64
|
+
when 'Integer' then 0
|
65
|
+
when 'Float' then 0.0
|
66
|
+
when 'Time' then Time.at(0)
|
67
|
+
when '' then object
|
68
|
+
else
|
69
|
+
element_config.data_class.new
|
70
|
+
end
|
71
|
+
|
72
|
+
stack.push(StackNode.new(new_object, element_config))
|
73
|
+
|
74
|
+
set_attributes_on(new_object, attrs)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def end_element(name)
|
80
|
+
name = normalize_name(name)
|
81
|
+
|
82
|
+
start_tag = stack[-2]
|
83
|
+
close_tag = stack[-1]
|
84
|
+
|
85
|
+
return unless start_tag && close_tag
|
86
|
+
|
87
|
+
object = start_tag.object
|
88
|
+
element = close_tag.object
|
89
|
+
config = close_tag.config
|
90
|
+
value = close_tag.buffer
|
91
|
+
|
92
|
+
return unless config.name == name
|
93
|
+
|
94
|
+
unless parsed_config?(object, config)
|
95
|
+
if (element_value_config = element_values_for(config))
|
96
|
+
element_value_config.each { |evc| element.send(evc.setter, value) }
|
97
|
+
end
|
98
|
+
|
99
|
+
if config.respond_to?(:accessor)
|
100
|
+
subconfig = sax_config_for(element)
|
101
|
+
|
102
|
+
if econf = subconfig.element_config_for_tag(name, [])
|
103
|
+
element.send(econf.setter, value) unless econf.value_configured?
|
104
|
+
end
|
105
|
+
|
106
|
+
object.send(config.accessor) << element
|
107
|
+
else
|
108
|
+
value =
|
109
|
+
case config.data_class.to_s
|
110
|
+
when 'String' then value.to_s
|
111
|
+
when 'Integer' then value.to_i
|
112
|
+
when 'Float' then value.to_f
|
113
|
+
# Assumes that time elements will be string-based and are not
|
114
|
+
# something else, e.g. seconds since epoch
|
115
|
+
when 'Time' then Time.parse(value.to_s)
|
116
|
+
when '' then value
|
117
|
+
else
|
118
|
+
element
|
119
|
+
end
|
120
|
+
|
121
|
+
object.send(config.setter, value) unless value == NO_BUFFER
|
122
|
+
|
123
|
+
mark_as_parsed(object, config)
|
124
|
+
end
|
125
|
+
|
126
|
+
# try to set the ancestor
|
127
|
+
if (sax_config = sax_config_for(element))
|
128
|
+
sax_config.ancestors.each do |ancestor|
|
129
|
+
element.send(ancestor.setter, object)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
stack.pop
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def mark_as_parsed(object, element_config)
|
140
|
+
unless element_config.collection?
|
141
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def parsed_config?(object, element_config)
|
146
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
147
|
+
end
|
148
|
+
|
149
|
+
def warning(string)
|
150
|
+
if @on_warning
|
151
|
+
@on_warning.call(string)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def error(string)
|
156
|
+
if @on_error
|
157
|
+
@on_error.call(string)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
def sax_config_for(object)
|
163
|
+
if object.class.respond_to?(:sax_config)
|
164
|
+
object.class.sax_config
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def element_values_for(config)
|
169
|
+
if config.data_class.respond_to?(:sax_config)
|
170
|
+
config.data_class.sax_config.element_values_for_element
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def normalize_name(name)
|
175
|
+
name.gsub(/\-/, '_')
|
176
|
+
end
|
177
|
+
|
178
|
+
def set_attributes_on(object, attributes)
|
179
|
+
config = sax_config_for(object)
|
180
|
+
|
181
|
+
if config
|
182
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
183
|
+
object.send(ac.setter, ac.value_from_attrs(attributes))
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def stack
|
189
|
+
@stack
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/sax-machine.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/sax-machine/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'sax-machine-patched'
|
6
|
+
s.version = SAXMachine::VERSION
|
7
|
+
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
|
9
|
+
s.email = %q{paul@pauldix.net}
|
10
|
+
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
11
|
+
|
12
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri Patch release}
|
13
|
+
|
14
|
+
s.license = %q{MIT}
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.platform = Gem::Platform::RUBY
|
21
|
+
|
22
|
+
s.add_dependency 'nokogiri', '1.6.0'
|
23
|
+
s.add_development_dependency "rspec", "~> 2.13.0"
|
24
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
|
3
|
+
<OperationRequest>
|
4
|
+
<HTTPHeaders>
|
5
|
+
<Header Name="UserAgent">
|
6
|
+
</Header>
|
7
|
+
</HTTPHeaders>
|
8
|
+
<RequestId>16WRJBVEM155Q026KCV1</RequestId>
|
9
|
+
<Arguments>
|
10
|
+
<Argument Name="SearchIndex" Value="Books"></Argument>
|
11
|
+
<Argument Name="Service" Value="AWSECommerceService"></Argument>
|
12
|
+
<Argument Name="Title" Value="Ruby on Rails"></Argument>
|
13
|
+
<Argument Name="Operation" Value="ItemSearch"></Argument>
|
14
|
+
<Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
|
15
|
+
</Arguments>
|
16
|
+
<RequestProcessingTime>0.064924955368042</RequestProcessingTime>
|
17
|
+
</OperationRequest>
|
18
|
+
<Items>
|
19
|
+
<Request>
|
20
|
+
<IsValid>True</IsValid>
|
21
|
+
<ItemSearchRequest>
|
22
|
+
<SearchIndex>Books</SearchIndex>
|
23
|
+
<Title>Ruby on Rails</Title>
|
24
|
+
</ItemSearchRequest>
|
25
|
+
</Request>
|
26
|
+
<TotalResults>22</TotalResults>
|
27
|
+
<TotalPages>3</TotalPages>
|
28
|
+
<Item>
|
29
|
+
<ASIN>0321480791</ASIN>
|
30
|
+
<DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
|
31
|
+
<ItemAttributes>
|
32
|
+
<Author>Michael Hartl</Author>
|
33
|
+
<Author>Aurelius Prochazka</Author>
|
34
|
+
<Manufacturer>Addison-Wesley Professional</Manufacturer>
|
35
|
+
<ProductGroup>Book</ProductGroup>
|
36
|
+
<Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
|
37
|
+
</ItemAttributes>
|
38
|
+
</Item>
|
39
|
+
</Items>
|
40
|
+
</ItemSearchResponse>
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'benchmark'
|
3
|
+
require 'happymapper'
|
4
|
+
require 'sax-machine'
|
5
|
+
require 'rfeedparser'
|
6
|
+
include Benchmark
|
7
|
+
benchmark_iterations = 100
|
8
|
+
|
9
|
+
module Feedzirra
|
10
|
+
class AtomEntry
|
11
|
+
include SAXMachine
|
12
|
+
element :title
|
13
|
+
element :name, :as => :author
|
14
|
+
element "feedburner:origLink", :as => :url
|
15
|
+
element :summary
|
16
|
+
element :content
|
17
|
+
element :published
|
18
|
+
end
|
19
|
+
|
20
|
+
# Class for parsing Atom feeds
|
21
|
+
class Atom
|
22
|
+
include SAXMachine
|
23
|
+
element :title
|
24
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
25
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
26
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
27
|
+
end
|
28
|
+
end
|
29
|
+
feed_text = File.read("spec/sax-machine/atom.xml")
|
30
|
+
|
31
|
+
benchmark do |t|
|
32
|
+
t.report("feedzirra") do
|
33
|
+
benchmark_iterations.times {
|
34
|
+
Feedzirra::Atom.new.parse(feed_text)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
t.report("rfeedparser") do
|
39
|
+
benchmark_iterations.times {
|
40
|
+
FeedParser.parse(feed_text)
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# class AtomEntry
|
46
|
+
# include SAXMachine
|
47
|
+
# element :title
|
48
|
+
# element :name, :as => :author
|
49
|
+
# element :summary
|
50
|
+
# end
|
51
|
+
# class Atom
|
52
|
+
# include SAXMachine
|
53
|
+
# element :title
|
54
|
+
# elements :entry, :as => :entries, :class => AtomEntry
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# class Entry
|
58
|
+
# include HappyMapper
|
59
|
+
# element :title, String
|
60
|
+
# element :name, String
|
61
|
+
# element :summary, String
|
62
|
+
# end
|
63
|
+
# class Feed
|
64
|
+
# include HappyMapper
|
65
|
+
# element :title, String
|
66
|
+
# has_many :entry, Entry
|
67
|
+
# end
|
68
|
+
# feed_text = File.read("spec/sax-machine/atom.xml")
|
69
|
+
#
|
70
|
+
# benchmark do |t|
|
71
|
+
# t.report("sax-machine") do
|
72
|
+
# benchmark_iterations.times {
|
73
|
+
# Atom.new.parse(feed_text)
|
74
|
+
# }
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# t.report("happymapper") do
|
78
|
+
# benchmark_iterations.times {
|
79
|
+
# Feed.parse(feed_text)
|
80
|
+
# }
|
81
|
+
# end
|
82
|
+
# end
|
83
|
+
|
84
|
+
# xml = File.read("spec/benchmarks/public_timeline.xml")
|
85
|
+
# class Status
|
86
|
+
# include HappyMapper
|
87
|
+
#
|
88
|
+
# element :text, String
|
89
|
+
# element :source, String
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# class Statuses
|
93
|
+
# include SAXMachine
|
94
|
+
#
|
95
|
+
# elements :status, {:as => :statuses, :class => Class.new do
|
96
|
+
# include SAXMachine
|
97
|
+
# element :text
|
98
|
+
# element :source
|
99
|
+
# end}
|
100
|
+
# end
|
101
|
+
#
|
102
|
+
# benchmark do |t|
|
103
|
+
# t.report("happy mapper") do
|
104
|
+
# benchmark_iterations.times {
|
105
|
+
# Status.parse(xml)
|
106
|
+
# }
|
107
|
+
# end
|
108
|
+
#
|
109
|
+
# t.report("sax-machine") do
|
110
|
+
# benchmark_iterations.times {
|
111
|
+
# Statuses.parse(xml)
|
112
|
+
# }
|
113
|
+
# end
|
114
|
+
# end
|
115
|
+
|
116
|
+
# xml = File.read("spec/benchmarks/amazon.xml")
|
117
|
+
# class HItem
|
118
|
+
# include HappyMapper
|
119
|
+
#
|
120
|
+
# tag 'Item' # if you put class in module you need tag
|
121
|
+
# element :asin, String, :tag => 'ASIN'
|
122
|
+
# element :detail_page_url, String, :tag => 'DetailPageURL'
|
123
|
+
# element :manufacturer, String, :tag => 'Manufacturer', :deep => true
|
124
|
+
# end
|
125
|
+
# class HItems
|
126
|
+
# include HappyMapper
|
127
|
+
#
|
128
|
+
# tag 'Items' # if you put class in module you need tag
|
129
|
+
# # element :total_results, Integer, :tag => 'TotalResults'
|
130
|
+
# # element :total_pages, Integer, :tag => 'TotalPages'
|
131
|
+
# has_many :items, Item
|
132
|
+
# end
|
133
|
+
#
|
134
|
+
# class Item
|
135
|
+
# include SAXMachine
|
136
|
+
#
|
137
|
+
# element :ASIN, :as => :asin
|
138
|
+
# element :DetailPageUrl, :as => :detail_page_url
|
139
|
+
# element :Manufacturer, :as => :manufacturer
|
140
|
+
# end
|
141
|
+
# class Items
|
142
|
+
# include SAXMachine
|
143
|
+
# elements :Item, :as => :items
|
144
|
+
# end
|
145
|
+
#
|
146
|
+
# benchmark do |t|
|
147
|
+
# t.report("sax-machine") do
|
148
|
+
# benchmark_iterations.times {
|
149
|
+
# Items.new.parse(xml)
|
150
|
+
# }
|
151
|
+
# end
|
152
|
+
#
|
153
|
+
# t.report("happymapper") do
|
154
|
+
# benchmark_iterations.times {
|
155
|
+
# HItems.parse(xml)
|
156
|
+
# }
|
157
|
+
# end
|
158
|
+
# end
|