sax-machine-patched 0.2.0.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementConfig
5
+ attr_reader :name, :setter, :data_class, :collection
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+ @with = options.fetch(:with, {})
10
+
11
+ if options.has_key?(:value)
12
+ @value = options[:value].to_s
13
+ else
14
+ @value = nil
15
+ end
16
+
17
+ @as = options[:as]
18
+ @collection = options[:collection]
19
+
20
+ if @collection
21
+ @setter = "add_#{options[:as]}"
22
+ else
23
+ @setter = "#{@as}="
24
+ end
25
+ @data_class = options[:class]
26
+ @required = options[:required]
27
+ end
28
+
29
+ def value_configured?
30
+ !@value.nil?
31
+ end
32
+
33
+ def to_s
34
+ "name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
35
+ end
36
+
37
+ def column
38
+ @as || @name.to_sym
39
+ end
40
+
41
+ def required?
42
+ @required
43
+ end
44
+
45
+ def value_from_attrs(attrs)
46
+ attrs.fetch(@value, nil)
47
+ end
48
+
49
+ def attrs_match?(attrs)
50
+ @with.all? do |key, value|
51
+ value === attrs[key.to_s]
52
+ end
53
+ end
54
+
55
+ def has_value_and_attrs_match?(attrs)
56
+ !@value.nil? && attrs_match?(attrs)
57
+ end
58
+
59
+ def collection?
60
+ @collection
61
+ end
62
+ end
63
+
64
+ end
65
+ end
@@ -0,0 +1,24 @@
1
+ module SAXMachine
2
+ class SAXConfig
3
+
4
+ class ElementValueConfig
5
+ attr_reader :name, :setter
6
+
7
+ def initialize(name, options)
8
+ @name = name.to_s
9
+ @as = options[:as]
10
+ @setter = "#{@as}="
11
+ @required = options[:required]
12
+ end
13
+
14
+ def column
15
+ @as || @name.to_sym
16
+ end
17
+
18
+ def required?
19
+ @required
20
+ end
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,192 @@
1
+ require "nokogiri"
2
+ require "time"
3
+
4
+ module SAXMachine
5
+ class SAXHandler < Nokogiri::XML::SAX::Document
6
+ NO_BUFFER = :no_buffer
7
+
8
+ class StackNode < Struct.new(:object, :config, :buffer)
9
+ def initialize(object, config = nil, buffer = NO_BUFFER)
10
+ self.object = object
11
+ self.config = config
12
+ self.buffer = buffer
13
+ end
14
+ end
15
+
16
+ def initialize(object, on_error = nil, on_warning = nil)
17
+ @stack = [ StackNode.new(object) ]
18
+ @parsed_configs = {}
19
+ @on_error = on_error
20
+ @on_warning = on_warning
21
+ end
22
+
23
+ def characters(data)
24
+ node = stack.last
25
+
26
+ if node.buffer == NO_BUFFER
27
+ node.buffer = data.dup
28
+ else
29
+ node.buffer << data
30
+ end
31
+ end
32
+ alias cdata_block characters
33
+
34
+ def start_element(name, attrs = [])
35
+
36
+ name = normalize_name(name)
37
+ node = stack.last
38
+ object = node.object
39
+
40
+ sax_config = sax_config_for(object)
41
+
42
+ if sax_config
43
+ attrs = Hash[attrs]
44
+
45
+ if collection_config = sax_config.collection_config(name, attrs)
46
+ object = collection_config.data_class.new
47
+ sax_config = sax_config_for(object)
48
+
49
+ stack.push(StackNode.new(object, collection_config))
50
+
51
+ set_attributes_on(object, attrs)
52
+ end
53
+
54
+ sax_config.element_configs_for_attribute(name, attrs).each do |ec|
55
+ unless parsed_config?(object, ec)
56
+ object.send(ec.setter, ec.value_from_attrs(attrs))
57
+ mark_as_parsed(object, ec)
58
+ end
59
+ end
60
+
61
+ if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
62
+ new_object =
63
+ case element_config.data_class.to_s
64
+ when 'Integer' then 0
65
+ when 'Float' then 0.0
66
+ when 'Time' then Time.at(0)
67
+ when '' then object
68
+ else
69
+ element_config.data_class.new
70
+ end
71
+
72
+ stack.push(StackNode.new(new_object, element_config))
73
+
74
+ set_attributes_on(new_object, attrs)
75
+ end
76
+ end
77
+ end
78
+
79
+ def end_element(name)
80
+ name = normalize_name(name)
81
+
82
+ start_tag = stack[-2]
83
+ close_tag = stack[-1]
84
+
85
+ return unless start_tag && close_tag
86
+
87
+ object = start_tag.object
88
+ element = close_tag.object
89
+ config = close_tag.config
90
+ value = close_tag.buffer
91
+
92
+ return unless config.name == name
93
+
94
+ unless parsed_config?(object, config)
95
+ if (element_value_config = element_values_for(config))
96
+ element_value_config.each { |evc| element.send(evc.setter, value) }
97
+ end
98
+
99
+ if config.respond_to?(:accessor)
100
+ subconfig = sax_config_for(element)
101
+
102
+ if econf = subconfig.element_config_for_tag(name, [])
103
+ element.send(econf.setter, value) unless econf.value_configured?
104
+ end
105
+
106
+ object.send(config.accessor) << element
107
+ else
108
+ value =
109
+ case config.data_class.to_s
110
+ when 'String' then value.to_s
111
+ when 'Integer' then value.to_i
112
+ when 'Float' then value.to_f
113
+ # Assumes that time elements will be string-based and are not
114
+ # something else, e.g. seconds since epoch
115
+ when 'Time' then Time.parse(value.to_s)
116
+ when '' then value
117
+ else
118
+ element
119
+ end
120
+
121
+ object.send(config.setter, value) unless value == NO_BUFFER
122
+
123
+ mark_as_parsed(object, config)
124
+ end
125
+
126
+ # try to set the ancestor
127
+ if (sax_config = sax_config_for(element))
128
+ sax_config.ancestors.each do |ancestor|
129
+ element.send(ancestor.setter, object)
130
+ end
131
+ end
132
+ end
133
+
134
+ stack.pop
135
+ end
136
+
137
+ private
138
+
139
+ def mark_as_parsed(object, element_config)
140
+ unless element_config.collection?
141
+ @parsed_configs[[object.object_id, element_config.object_id]] = true
142
+ end
143
+ end
144
+
145
+ def parsed_config?(object, element_config)
146
+ @parsed_configs[[object.object_id, element_config.object_id]]
147
+ end
148
+
149
+ def warning(string)
150
+ if @on_warning
151
+ @on_warning.call(string)
152
+ end
153
+ end
154
+
155
+ def error(string)
156
+ if @on_error
157
+ @on_error.call(string)
158
+ end
159
+ end
160
+
161
+
162
+ def sax_config_for(object)
163
+ if object.class.respond_to?(:sax_config)
164
+ object.class.sax_config
165
+ end
166
+ end
167
+
168
+ def element_values_for(config)
169
+ if config.data_class.respond_to?(:sax_config)
170
+ config.data_class.sax_config.element_values_for_element
171
+ end
172
+ end
173
+
174
+ def normalize_name(name)
175
+ name.gsub(/\-/, '_')
176
+ end
177
+
178
+ def set_attributes_on(object, attributes)
179
+ config = sax_config_for(object)
180
+
181
+ if config
182
+ config.attribute_configs_for_element(attributes).each do |ac|
183
+ object.send(ac.setter, ac.value_from_attrs(attributes))
184
+ end
185
+ end
186
+ end
187
+
188
+ def stack
189
+ @stack
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,3 @@
1
+ module SAXMachine
2
+ VERSION = "0.2.0.rc2"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sax-machine/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'sax-machine-patched'
6
+ s.version = SAXMachine::VERSION
7
+
8
+ s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
9
+ s.email = %q{paul@pauldix.net}
10
+ s.homepage = %q{http://github.com/pauldix/sax-machine}
11
+
12
+ s.summary = %q{Declarative SAX Parsing with Nokogiri Patch release}
13
+
14
+ s.license = %q{MIT}
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.require_paths = ["lib"]
19
+
20
+ s.platform = Gem::Platform::RUBY
21
+
22
+ s.add_dependency 'nokogiri', '1.6.0'
23
+ s.add_development_dependency "rspec", "~> 2.13.0"
24
+ end
@@ -0,0 +1,40 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
3
+ <OperationRequest>
4
+ <HTTPHeaders>
5
+ <Header Name="UserAgent">
6
+ </Header>
7
+ </HTTPHeaders>
8
+ <RequestId>16WRJBVEM155Q026KCV1</RequestId>
9
+ <Arguments>
10
+ <Argument Name="SearchIndex" Value="Books"></Argument>
11
+ <Argument Name="Service" Value="AWSECommerceService"></Argument>
12
+ <Argument Name="Title" Value="Ruby on Rails"></Argument>
13
+ <Argument Name="Operation" Value="ItemSearch"></Argument>
14
+ <Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
15
+ </Arguments>
16
+ <RequestProcessingTime>0.064924955368042</RequestProcessingTime>
17
+ </OperationRequest>
18
+ <Items>
19
+ <Request>
20
+ <IsValid>True</IsValid>
21
+ <ItemSearchRequest>
22
+ <SearchIndex>Books</SearchIndex>
23
+ <Title>Ruby on Rails</Title>
24
+ </ItemSearchRequest>
25
+ </Request>
26
+ <TotalResults>22</TotalResults>
27
+ <TotalPages>3</TotalPages>
28
+ <Item>
29
+ <ASIN>0321480791</ASIN>
30
+ <DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
31
+ <ItemAttributes>
32
+ <Author>Michael Hartl</Author>
33
+ <Author>Aurelius Prochazka</Author>
34
+ <Manufacturer>Addison-Wesley Professional</Manufacturer>
35
+ <ProductGroup>Book</ProductGroup>
36
+ <Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
37
+ </ItemAttributes>
38
+ </Item>
39
+ </Items>
40
+ </ItemSearchResponse>
@@ -0,0 +1,158 @@
1
+ require 'rubygems'
2
+ require 'benchmark'
3
+ require 'happymapper'
4
+ require 'sax-machine'
5
+ require 'rfeedparser'
6
+ include Benchmark
7
+ benchmark_iterations = 100
8
+
9
+ module Feedzirra
10
+ class AtomEntry
11
+ include SAXMachine
12
+ element :title
13
+ element :name, :as => :author
14
+ element "feedburner:origLink", :as => :url
15
+ element :summary
16
+ element :content
17
+ element :published
18
+ end
19
+
20
+ # Class for parsing Atom feeds
21
+ class Atom
22
+ include SAXMachine
23
+ element :title
24
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
25
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
26
+ elements :entry, :as => :entries, :class => AtomEntry
27
+ end
28
+ end
29
+ feed_text = File.read("spec/sax-machine/atom.xml")
30
+
31
+ benchmark do |t|
32
+ t.report("feedzirra") do
33
+ benchmark_iterations.times {
34
+ Feedzirra::Atom.new.parse(feed_text)
35
+ }
36
+ end
37
+
38
+ t.report("rfeedparser") do
39
+ benchmark_iterations.times {
40
+ FeedParser.parse(feed_text)
41
+ }
42
+ end
43
+ end
44
+
45
+ # class AtomEntry
46
+ # include SAXMachine
47
+ # element :title
48
+ # element :name, :as => :author
49
+ # element :summary
50
+ # end
51
+ # class Atom
52
+ # include SAXMachine
53
+ # element :title
54
+ # elements :entry, :as => :entries, :class => AtomEntry
55
+ # end
56
+ #
57
+ # class Entry
58
+ # include HappyMapper
59
+ # element :title, String
60
+ # element :name, String
61
+ # element :summary, String
62
+ # end
63
+ # class Feed
64
+ # include HappyMapper
65
+ # element :title, String
66
+ # has_many :entry, Entry
67
+ # end
68
+ # feed_text = File.read("spec/sax-machine/atom.xml")
69
+ #
70
+ # benchmark do |t|
71
+ # t.report("sax-machine") do
72
+ # benchmark_iterations.times {
73
+ # Atom.new.parse(feed_text)
74
+ # }
75
+ # end
76
+ #
77
+ # t.report("happymapper") do
78
+ # benchmark_iterations.times {
79
+ # Feed.parse(feed_text)
80
+ # }
81
+ # end
82
+ # end
83
+
84
+ # xml = File.read("spec/benchmarks/public_timeline.xml")
85
+ # class Status
86
+ # include HappyMapper
87
+ #
88
+ # element :text, String
89
+ # element :source, String
90
+ # end
91
+ #
92
+ # class Statuses
93
+ # include SAXMachine
94
+ #
95
+ # elements :status, {:as => :statuses, :class => Class.new do
96
+ # include SAXMachine
97
+ # element :text
98
+ # element :source
99
+ # end}
100
+ # end
101
+ #
102
+ # benchmark do |t|
103
+ # t.report("happy mapper") do
104
+ # benchmark_iterations.times {
105
+ # Status.parse(xml)
106
+ # }
107
+ # end
108
+ #
109
+ # t.report("sax-machine") do
110
+ # benchmark_iterations.times {
111
+ # Statuses.parse(xml)
112
+ # }
113
+ # end
114
+ # end
115
+
116
+ # xml = File.read("spec/benchmarks/amazon.xml")
117
+ # class HItem
118
+ # include HappyMapper
119
+ #
120
+ # tag 'Item' # if you put class in module you need tag
121
+ # element :asin, String, :tag => 'ASIN'
122
+ # element :detail_page_url, String, :tag => 'DetailPageURL'
123
+ # element :manufacturer, String, :tag => 'Manufacturer', :deep => true
124
+ # end
125
+ # class HItems
126
+ # include HappyMapper
127
+ #
128
+ # tag 'Items' # if you put class in module you need tag
129
+ # # element :total_results, Integer, :tag => 'TotalResults'
130
+ # # element :total_pages, Integer, :tag => 'TotalPages'
131
+ # has_many :items, Item
132
+ # end
133
+ #
134
+ # class Item
135
+ # include SAXMachine
136
+ #
137
+ # element :ASIN, :as => :asin
138
+ # element :DetailPageUrl, :as => :detail_page_url
139
+ # element :Manufacturer, :as => :manufacturer
140
+ # end
141
+ # class Items
142
+ # include SAXMachine
143
+ # elements :Item, :as => :items
144
+ # end
145
+ #
146
+ # benchmark do |t|
147
+ # t.report("sax-machine") do
148
+ # benchmark_iterations.times {
149
+ # Items.new.parse(xml)
150
+ # }
151
+ # end
152
+ #
153
+ # t.report("happymapper") do
154
+ # benchmark_iterations.times {
155
+ # HItems.parse(xml)
156
+ # }
157
+ # end
158
+ # end