sax-machine-patched 0.2.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,65 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementConfig
5
+ attr_reader :name, :setter, :data_class, :collection
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+ @with = options.fetch(:with, {})
10
+
11
+ if options.has_key?(:value)
12
+ @value = options[:value].to_s
13
+ else
14
+ @value = nil
15
+ end
16
+
17
+ @as = options[:as]
18
+ @collection = options[:collection]
19
+
20
+ if @collection
21
+ @setter = "add_#{options[:as]}"
22
+ else
23
+ @setter = "#{@as}="
24
+ end
25
+ @data_class = options[:class]
26
+ @required = options[:required]
27
+ end
28
+
29
+ def value_configured?
30
+ !@value.nil?
31
+ end
32
+
33
+ def to_s
34
+ "name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
35
+ end
36
+
37
+ def column
38
+ @as || @name.to_sym
39
+ end
40
+
41
+ def required?
42
+ @required
43
+ end
44
+
45
+ def value_from_attrs(attrs)
46
+ attrs.fetch(@value, nil)
47
+ end
48
+
49
+ def attrs_match?(attrs)
50
+ @with.all? do |key, value|
51
+ value === attrs[key.to_s]
52
+ end
53
+ end
54
+
55
+ def has_value_and_attrs_match?(attrs)
56
+ !@value.nil? && attrs_match?(attrs)
57
+ end
58
+
59
+ def collection?
60
+ @collection
61
+ end
62
+ end
63
+
64
+ end
65
+ end
@@ -0,0 +1,24 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementValueConfig
5
+ attr_reader :name, :setter
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+ @as = options[:as]
10
+ @setter = "#{@as}="
11
+ @required = options[:required]
12
+ end
13
+
14
+ def column
15
+ @as || @name.to_sym
16
+ end
17
+
18
+ def required?
19
+ @required
20
+ end
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,192 @@
1
+ require "nokogiri"
2
+ require "time"
3
+
4
+ module SAXMachine
5
+ class SAXHandler < Nokogiri::XML::SAX::Document
6
+ NO_BUFFER = :no_buffer
7
+
8
+ class StackNode < Struct.new(:object, :config, :buffer)
9
+ def initialize(object, config = nil, buffer = NO_BUFFER)
10
+ self.object = object
11
+ self.config = config
12
+ self.buffer = buffer
13
+ end
14
+ end
15
+
16
+ def initialize(object, on_error = nil, on_warning = nil)
17
+ @stack = [ StackNode.new(object) ]
18
+ @parsed_configs = {}
19
+ @on_error = on_error
20
+ @on_warning = on_warning
21
+ end
22
+
23
+ def characters(data)
24
+ node = stack.last
25
+
26
+ if node.buffer == NO_BUFFER
27
+ node.buffer = data.dup
28
+ else
29
+ node.buffer << data
30
+ end
31
+ end
32
+ alias cdata_block characters
33
+
34
+ def start_element(name, attrs = [])
35
+
36
+ name = normalize_name(name)
37
+ node = stack.last
38
+ object = node.object
39
+
40
+ sax_config = sax_config_for(object)
41
+
42
+ if sax_config
43
+ attrs = Hash[attrs]
44
+
45
+ if collection_config = sax_config.collection_config(name, attrs)
46
+ object = collection_config.data_class.new
47
+ sax_config = sax_config_for(object)
48
+
49
+ stack.push(StackNode.new(object, collection_config))
50
+
51
+ set_attributes_on(object, attrs)
52
+ end
53
+
54
+ sax_config.element_configs_for_attribute(name, attrs).each do |ec|
55
+ unless parsed_config?(object, ec)
56
+ object.send(ec.setter, ec.value_from_attrs(attrs))
57
+ mark_as_parsed(object, ec)
58
+ end
59
+ end
60
+
61
+ if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
62
+ new_object =
63
+ case element_config.data_class.to_s
64
+ when 'Integer' then 0
65
+ when 'Float' then 0.0
66
+ when 'Time' then Time.at(0)
67
+ when '' then object
68
+ else
69
+ element_config.data_class.new
70
+ end
71
+
72
+ stack.push(StackNode.new(new_object, element_config))
73
+
74
+ set_attributes_on(new_object, attrs)
75
+ end
76
+ end
77
+ end
78
+
79
+ def end_element(name)
80
+ name = normalize_name(name)
81
+
82
+ start_tag = stack[-2]
83
+ close_tag = stack[-1]
84
+
85
+ return unless start_tag && close_tag
86
+
87
+ object = start_tag.object
88
+ element = close_tag.object
89
+ config = close_tag.config
90
+ value = close_tag.buffer
91
+
92
+ return unless config.name == name
93
+
94
+ unless parsed_config?(object, config)
95
+ if (element_value_config = element_values_for(config))
96
+ element_value_config.each { |evc| element.send(evc.setter, value) }
97
+ end
98
+
99
+ if config.respond_to?(:accessor)
100
+ subconfig = sax_config_for(element)
101
+
102
+ if econf = subconfig.element_config_for_tag(name, [])
103
+ element.send(econf.setter, value) unless econf.value_configured?
104
+ end
105
+
106
+ object.send(config.accessor) << element
107
+ else
108
+ value =
109
+ case config.data_class.to_s
110
+ when 'String' then value.to_s
111
+ when 'Integer' then value.to_i
112
+ when 'Float' then value.to_f
113
+ # Assumes that time elements will be string-based and are not
114
+ # something else, e.g. seconds since epoch
115
+ when 'Time' then Time.parse(value.to_s)
116
+ when '' then value
117
+ else
118
+ element
119
+ end
120
+
121
+ object.send(config.setter, value) unless value == NO_BUFFER
122
+
123
+ mark_as_parsed(object, config)
124
+ end
125
+
126
+ # try to set the ancestor
127
+ if (sax_config = sax_config_for(element))
128
+ sax_config.ancestors.each do |ancestor|
129
+ element.send(ancestor.setter, object)
130
+ end
131
+ end
132
+ end
133
+
134
+ stack.pop
135
+ end
136
+
137
+ private
138
+
139
+ def mark_as_parsed(object, element_config)
140
+ unless element_config.collection?
141
+ @parsed_configs[[object.object_id, element_config.object_id]] = true
142
+ end
143
+ end
144
+
145
+ def parsed_config?(object, element_config)
146
+ @parsed_configs[[object.object_id, element_config.object_id]]
147
+ end
148
+
149
+ def warning(string)
150
+ if @on_warning
151
+ @on_warning.call(string)
152
+ end
153
+ end
154
+
155
+ def error(string)
156
+ if @on_error
157
+ @on_error.call(string)
158
+ end
159
+ end
160
+
161
+
162
+ def sax_config_for(object)
163
+ if object.class.respond_to?(:sax_config)
164
+ object.class.sax_config
165
+ end
166
+ end
167
+
168
+ def element_values_for(config)
169
+ if config.data_class.respond_to?(:sax_config)
170
+ config.data_class.sax_config.element_values_for_element
171
+ end
172
+ end
173
+
174
+ def normalize_name(name)
175
+ name.gsub(/\-/, '_')
176
+ end
177
+
178
+ def set_attributes_on(object, attributes)
179
+ config = sax_config_for(object)
180
+
181
+ if config
182
+ config.attribute_configs_for_element(attributes).each do |ac|
183
+ object.send(ac.setter, ac.value_from_attrs(attributes))
184
+ end
185
+ end
186
+ end
187
+
188
+ def stack
189
+ @stack
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,3 @@
1
+ module SAXMachine
2
+ VERSION = "0.2.0.rc2"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sax-machine/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'sax-machine-patched'
6
+ s.version = SAXMachine::VERSION
7
+
8
+ s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
9
+ s.email = %q{paul@pauldix.net}
10
+ s.homepage = %q{http://github.com/pauldix/sax-machine}
11
+
12
+ s.summary = %q{Declarative SAX Parsing with Nokogiri Patch release}
13
+
14
+ s.license = %q{MIT}
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.require_paths = ["lib"]
19
+
20
+ s.platform = Gem::Platform::RUBY
21
+
22
+ s.add_dependency 'nokogiri', '1.6.0'
23
+ s.add_development_dependency "rspec", "~> 2.13.0"
24
+ end
@@ -0,0 +1,40 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
3
+ <OperationRequest>
4
+ <HTTPHeaders>
5
+ <Header Name="UserAgent">
6
+ </Header>
7
+ </HTTPHeaders>
8
+ <RequestId>16WRJBVEM155Q026KCV1</RequestId>
9
+ <Arguments>
10
+ <Argument Name="SearchIndex" Value="Books"></Argument>
11
+ <Argument Name="Service" Value="AWSECommerceService"></Argument>
12
+ <Argument Name="Title" Value="Ruby on Rails"></Argument>
13
+ <Argument Name="Operation" Value="ItemSearch"></Argument>
14
+ <Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
15
+ </Arguments>
16
+ <RequestProcessingTime>0.064924955368042</RequestProcessingTime>
17
+ </OperationRequest>
18
+ <Items>
19
+ <Request>
20
+ <IsValid>True</IsValid>
21
+ <ItemSearchRequest>
22
+ <SearchIndex>Books</SearchIndex>
23
+ <Title>Ruby on Rails</Title>
24
+ </ItemSearchRequest>
25
+ </Request>
26
+ <TotalResults>22</TotalResults>
27
+ <TotalPages>3</TotalPages>
28
+ <Item>
29
+ <ASIN>0321480791</ASIN>
30
+ <DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
31
+ <ItemAttributes>
32
+ <Author>Michael Hartl</Author>
33
+ <Author>Aurelius Prochazka</Author>
34
+ <Manufacturer>Addison-Wesley Professional</Manufacturer>
35
+ <ProductGroup>Book</ProductGroup>
36
+ <Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
37
+ </ItemAttributes>
38
+ </Item>
39
+ </Items>
40
+ </ItemSearchResponse>
@@ -0,0 +1,158 @@
1
+ require 'rubygems'
2
+ require 'benchmark'
3
+ require 'happymapper'
4
+ require 'sax-machine'
5
+ require 'rfeedparser'
6
+ include Benchmark
7
+ benchmark_iterations = 100
8
+
9
+ module Feedzirra
10
+ class AtomEntry
11
+ include SAXMachine
12
+ element :title
13
+ element :name, :as => :author
14
+ element "feedburner:origLink", :as => :url
15
+ element :summary
16
+ element :content
17
+ element :published
18
+ end
19
+
20
+ # Class for parsing Atom feeds
21
+ class Atom
22
+ include SAXMachine
23
+ element :title
24
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
25
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
26
+ elements :entry, :as => :entries, :class => AtomEntry
27
+ end
28
+ end
29
+ feed_text = File.read("spec/sax-machine/atom.xml")
30
+
31
+ benchmark do |t|
32
+ t.report("feedzirra") do
33
+ benchmark_iterations.times {
34
+ Feedzirra::Atom.new.parse(feed_text)
35
+ }
36
+ end
37
+
38
+ t.report("rfeedparser") do
39
+ benchmark_iterations.times {
40
+ FeedParser.parse(feed_text)
41
+ }
42
+ end
43
+ end
44
+
45
+ # class AtomEntry
46
+ # include SAXMachine
47
+ # element :title
48
+ # element :name, :as => :author
49
+ # element :summary
50
+ # end
51
+ # class Atom
52
+ # include SAXMachine
53
+ # element :title
54
+ # elements :entry, :as => :entries, :class => AtomEntry
55
+ # end
56
+ #
57
+ # class Entry
58
+ # include HappyMapper
59
+ # element :title, String
60
+ # element :name, String
61
+ # element :summary, String
62
+ # end
63
+ # class Feed
64
+ # include HappyMapper
65
+ # element :title, String
66
+ # has_many :entry, Entry
67
+ # end
68
+ # feed_text = File.read("spec/sax-machine/atom.xml")
69
+ #
70
+ # benchmark do |t|
71
+ # t.report("sax-machine") do
72
+ # benchmark_iterations.times {
73
+ # Atom.new.parse(feed_text)
74
+ # }
75
+ # end
76
+ #
77
+ # t.report("happymapper") do
78
+ # benchmark_iterations.times {
79
+ # Feed.parse(feed_text)
80
+ # }
81
+ # end
82
+ # end
83
+
84
+ # xml = File.read("spec/benchmarks/public_timeline.xml")
85
+ # class Status
86
+ # include HappyMapper
87
+ #
88
+ # element :text, String
89
+ # element :source, String
90
+ # end
91
+ #
92
+ # class Statuses
93
+ # include SAXMachine
94
+ #
95
+ # elements :status, {:as => :statuses, :class => Class.new do
96
+ # include SAXMachine
97
+ # element :text
98
+ # element :source
99
+ # end}
100
+ # end
101
+ #
102
+ # benchmark do |t|
103
+ # t.report("happy mapper") do
104
+ # benchmark_iterations.times {
105
+ # Status.parse(xml)
106
+ # }
107
+ # end
108
+ #
109
+ # t.report("sax-machine") do
110
+ # benchmark_iterations.times {
111
+ # Statuses.parse(xml)
112
+ # }
113
+ # end
114
+ # end
115
+
116
+ # xml = File.read("spec/benchmarks/amazon.xml")
117
+ # class HItem
118
+ # include HappyMapper
119
+ #
120
+ # tag 'Item' # if you put class in module you need tag
121
+ # element :asin, String, :tag => 'ASIN'
122
+ # element :detail_page_url, String, :tag => 'DetailPageURL'
123
+ # element :manufacturer, String, :tag => 'Manufacturer', :deep => true
124
+ # end
125
+ # class HItems
126
+ # include HappyMapper
127
+ #
128
+ # tag 'Items' # if you put class in module you need tag
129
+ # # element :total_results, Integer, :tag => 'TotalResults'
130
+ # # element :total_pages, Integer, :tag => 'TotalPages'
131
+ # has_many :items, Item
132
+ # end
133
+ #
134
+ # class Item
135
+ # include SAXMachine
136
+ #
137
+ # element :ASIN, :as => :asin
138
+ # element :DetailPageUrl, :as => :detail_page_url
139
+ # element :Manufacturer, :as => :manufacturer
140
+ # end
141
+ # class Items
142
+ # include SAXMachine
143
+ # elements :Item, :as => :items
144
+ # end
145
+ #
146
+ # benchmark do |t|
147
+ # t.report("sax-machine") do
148
+ # benchmark_iterations.times {
149
+ # Items.new.parse(xml)
150
+ # }
151
+ # end
152
+ #
153
+ # t.report("happymapper") do
154
+ # benchmark_iterations.times {
155
+ # HItems.parse(xml)
156
+ # }
157
+ # end
158
+ # end