sax-machine 0.0.16 → 0.2.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,67 +1,191 @@
1
1
  require "nokogiri"
2
+ require "time"
2
3
 
3
4
  module SAXMachine
4
5
  class SAXHandler < Nokogiri::XML::SAX::Document
5
- attr_reader :stack
6
+ NO_BUFFER = :no_buffer
6
7
 
7
- def initialize(object)
8
- @stack = [[object, nil, ""]]
9
- @parsed_configs = {}
8
+ class StackNode < Struct.new(:object, :config, :buffer)
9
+ def initialize(object, config = nil, buffer = NO_BUFFER)
10
+ self.object = object
11
+ self.config = config
12
+ self.buffer = buffer
13
+ end
10
14
  end
11
15
 
12
- def characters(string)
13
- object, config, value = stack.last
14
- value << string
16
+ def initialize(object, on_error = nil, on_warning = nil)
17
+ @stack = [ StackNode.new(object) ]
18
+ @parsed_configs = {}
19
+ @on_error = on_error
20
+ @on_warning = on_warning
15
21
  end
16
22
 
17
- def cdata_block(string)
18
- characters(string)
23
+ def characters(data)
24
+ node = stack.last
25
+
26
+ if node.buffer == NO_BUFFER
27
+ node.buffer = data.dup
28
+ else
29
+ node.buffer << data
30
+ end
19
31
  end
32
+ alias cdata_block characters
20
33
 
21
34
  def start_element(name, attrs = [])
22
35
  attrs.flatten!
23
- object, config, value = stack.last
24
- sax_config = object.class.respond_to?(:sax_config) ? object.class.sax_config : nil
36
+
37
+ name = normalize_name(name)
38
+ node = stack.last
39
+ object = node.object
40
+
41
+ sax_config = sax_config_for(object)
25
42
 
26
43
  if sax_config
27
44
  if collection_config = sax_config.collection_config(name, attrs)
28
- stack.push [object = collection_config.data_class.new, collection_config, ""]
29
- object, sax_config, is_collection = object, object.class.sax_config, true
45
+ object = collection_config.data_class.new
46
+ sax_config = sax_config_for(object)
47
+
48
+ stack.push(StackNode.new(object, collection_config))
49
+
50
+ set_attributes_on(object, attrs)
30
51
  end
52
+
31
53
  sax_config.element_configs_for_attribute(name, attrs).each do |ec|
32
54
  unless parsed_config?(object, ec)
33
55
  object.send(ec.setter, ec.value_from_attrs(attrs))
34
56
  mark_as_parsed(object, ec)
35
57
  end
36
58
  end
59
+
37
60
  if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
38
- stack.push [element_config.data_class ? element_config.data_class.new : object, element_config, ""]
61
+ new_object =
62
+ case element_config.data_class.to_s
63
+ when 'Integer' then 0
64
+ when 'Float' then 0.0
65
+ when 'Time' then Time.at(0)
66
+ when '' then object
67
+ else
68
+ element_config.data_class.new
69
+ end
70
+
71
+ stack.push(StackNode.new(new_object, element_config))
72
+
73
+ set_attributes_on(new_object, attrs)
39
74
  end
40
75
  end
41
76
  end
42
77
 
43
78
  def end_element(name)
44
- (object, tag_config, _), (element, config, value) = stack[-2..-1]
45
- return unless stack.size > 1 && config && config.name.to_s == name.to_s
79
+ name = normalize_name(name)
80
+
81
+ start_tag = stack[-2]
82
+ close_tag = stack[-1]
83
+
84
+ return unless start_tag && close_tag
85
+
86
+ object = start_tag.object
87
+ element = close_tag.object
88
+ config = close_tag.config
89
+ value = close_tag.buffer
90
+
91
+ return unless config.name == name
46
92
 
47
93
  unless parsed_config?(object, config)
94
+ if (element_value_config = element_values_for(config))
95
+ element_value_config.each { |evc| element.send(evc.setter, value) }
96
+ end
97
+
48
98
  if config.respond_to?(:accessor)
99
+ subconfig = sax_config_for(element)
100
+
101
+ if econf = subconfig.element_config_for_tag(name, [])
102
+ element.send(econf.setter, value) unless econf.value_configured?
103
+ end
104
+
49
105
  object.send(config.accessor) << element
50
106
  else
51
- value = config.data_class ? element : value
52
- object.send(config.setter, value) unless value == ""
107
+ value =
108
+ case config.data_class.to_s
109
+ when 'String' then value.to_s
110
+ when 'Integer' then value.to_i
111
+ when 'Float' then value.to_f
112
+ # Assumes that time elements will be string-based and are not
113
+ # something else, e.g. seconds since epoch
114
+ when 'Time' then Time.parse(value.to_s)
115
+ when '' then value
116
+ else
117
+ element
118
+ end
119
+
120
+ object.send(config.setter, value) unless value == NO_BUFFER
121
+
53
122
  mark_as_parsed(object, config)
54
123
  end
124
+
125
+ # try to set the ancestor
126
+ if (sax_config = sax_config_for(element))
127
+ sax_config.ancestors.each do |ancestor|
128
+ element.send(ancestor.setter, object)
129
+ end
130
+ end
55
131
  end
132
+
56
133
  stack.pop
57
134
  end
58
135
 
136
+ private
137
+
59
138
  def mark_as_parsed(object, element_config)
60
- @parsed_configs[[object.object_id, element_config.object_id]] = true unless element_config.collection?
139
+ unless element_config.collection?
140
+ @parsed_configs[[object.object_id, element_config.object_id]] = true
141
+ end
61
142
  end
62
143
 
63
144
  def parsed_config?(object, element_config)
64
145
  @parsed_configs[[object.object_id, element_config.object_id]]
65
146
  end
147
+
148
+ def warning(string)
149
+ if @on_warning
150
+ @on_warning.call(string)
151
+ end
152
+ end
153
+
154
+ def error(string)
155
+ if @on_error
156
+ @on_error.call(string)
157
+ end
158
+ end
159
+
160
+
161
+ def sax_config_for(object)
162
+ if object.class.respond_to?(:sax_config)
163
+ object.class.sax_config
164
+ end
165
+ end
166
+
167
+ def element_values_for(config)
168
+ if config.data_class.respond_to?(:sax_config)
169
+ config.data_class.sax_config.element_values_for_element
170
+ end
171
+ end
172
+
173
+ def normalize_name(name)
174
+ name.gsub(/\-/, '_')
175
+ end
176
+
177
+ def set_attributes_on(object, attributes)
178
+ config = sax_config_for(object)
179
+
180
+ if config
181
+ config.attribute_configs_for_element(attributes).each do |ac|
182
+ object.send(ac.setter, ac.value_from_attrs(attributes))
183
+ end
184
+ end
185
+ end
186
+
187
+ def stack
188
+ @stack
189
+ end
66
190
  end
67
- end
191
+ end
@@ -0,0 +1,3 @@
1
+ module SAXMachine
2
+ VERSION = "0.2.0.rc1"
3
+ end
data/lib/sax-machine.rb CHANGED
@@ -1,11 +1,8 @@
1
- require "rubygems"
2
-
3
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
-
1
+ require "sax-machine/version"
5
2
  require "sax-machine/sax_document"
3
+ require "sax-machine/sax_configure"
6
4
  require "sax-machine/sax_handler"
7
5
  require "sax-machine/sax_config"
8
6
 
9
7
  module SAXMachine
10
- VERSION = "0.0.16"
11
8
  end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sax-machine/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'sax-machine'
6
+ s.version = SAXMachine::VERSION
7
+
8
+ s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
9
+ s.date = Date.today
10
+ s.email = %q{paul@pauldix.net}
11
+ s.homepage = %q{http://github.com/pauldix/sax-machine}
12
+
13
+ s.summary = %q{Declarative SAX Parsing with Nokogiri}
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.require_paths = ["lib"]
18
+
19
+ s.platform = Gem::Platform::RUBY
20
+
21
+ s.add_dependency 'nokogiri', "~> 1.5.2"
22
+ s.add_development_dependency "rspec", "~> 2.10.0"
23
+ end
@@ -0,0 +1,40 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
3
+ <OperationRequest>
4
+ <HTTPHeaders>
5
+ <Header Name="UserAgent">
6
+ </Header>
7
+ </HTTPHeaders>
8
+ <RequestId>16WRJBVEM155Q026KCV1</RequestId>
9
+ <Arguments>
10
+ <Argument Name="SearchIndex" Value="Books"></Argument>
11
+ <Argument Name="Service" Value="AWSECommerceService"></Argument>
12
+ <Argument Name="Title" Value="Ruby on Rails"></Argument>
13
+ <Argument Name="Operation" Value="ItemSearch"></Argument>
14
+ <Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
15
+ </Arguments>
16
+ <RequestProcessingTime>0.064924955368042</RequestProcessingTime>
17
+ </OperationRequest>
18
+ <Items>
19
+ <Request>
20
+ <IsValid>True</IsValid>
21
+ <ItemSearchRequest>
22
+ <SearchIndex>Books</SearchIndex>
23
+ <Title>Ruby on Rails</Title>
24
+ </ItemSearchRequest>
25
+ </Request>
26
+ <TotalResults>22</TotalResults>
27
+ <TotalPages>3</TotalPages>
28
+ <Item>
29
+ <ASIN>0321480791</ASIN>
30
+ <DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
31
+ <ItemAttributes>
32
+ <Author>Michael Hartl</Author>
33
+ <Author>Aurelius Prochazka</Author>
34
+ <Manufacturer>Addison-Wesley Professional</Manufacturer>
35
+ <ProductGroup>Book</ProductGroup>
36
+ <Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
37
+ </ItemAttributes>
38
+ </Item>
39
+ </Items>
40
+ </ItemSearchResponse>
@@ -0,0 +1,158 @@
1
+ require 'rubygems'
2
+ require 'benchmark'
3
+ require 'happymapper'
4
+ require 'sax-machine'
5
+ require 'rfeedparser'
6
+ include Benchmark
7
+ benchmark_iterations = 100
8
+
9
+ module Feedzirra
10
+ class AtomEntry
11
+ include SAXMachine
12
+ element :title
13
+ element :name, :as => :author
14
+ element "feedburner:origLink", :as => :url
15
+ element :summary
16
+ element :content
17
+ element :published
18
+ end
19
+
20
+ # Class for parsing Atom feeds
21
+ class Atom
22
+ include SAXMachine
23
+ element :title
24
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
25
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
26
+ elements :entry, :as => :entries, :class => AtomEntry
27
+ end
28
+ end
29
+ feed_text = File.read("spec/sax-machine/atom.xml")
30
+
31
+ benchmark do |t|
32
+ t.report("feedzirra") do
33
+ benchmark_iterations.times {
34
+ Feedzirra::Atom.new.parse(feed_text)
35
+ }
36
+ end
37
+
38
+ t.report("rfeedparser") do
39
+ benchmark_iterations.times {
40
+ FeedParser.parse(feed_text)
41
+ }
42
+ end
43
+ end
44
+
45
+ # class AtomEntry
46
+ # include SAXMachine
47
+ # element :title
48
+ # element :name, :as => :author
49
+ # element :summary
50
+ # end
51
+ # class Atom
52
+ # include SAXMachine
53
+ # element :title
54
+ # elements :entry, :as => :entries, :class => AtomEntry
55
+ # end
56
+ #
57
+ # class Entry
58
+ # include HappyMapper
59
+ # element :title, String
60
+ # element :name, String
61
+ # element :summary, String
62
+ # end
63
+ # class Feed
64
+ # include HappyMapper
65
+ # element :title, String
66
+ # has_many :entry, Entry
67
+ # end
68
+ # feed_text = File.read("spec/sax-machine/atom.xml")
69
+ #
70
+ # benchmark do |t|
71
+ # t.report("sax-machine") do
72
+ # benchmark_iterations.times {
73
+ # Atom.new.parse(feed_text)
74
+ # }
75
+ # end
76
+ #
77
+ # t.report("happymapper") do
78
+ # benchmark_iterations.times {
79
+ # Feed.parse(feed_text)
80
+ # }
81
+ # end
82
+ # end
83
+
84
+ # xml = File.read("spec/benchmarks/public_timeline.xml")
85
+ # class Status
86
+ # include HappyMapper
87
+ #
88
+ # element :text, String
89
+ # element :source, String
90
+ # end
91
+ #
92
+ # class Statuses
93
+ # include SAXMachine
94
+ #
95
+ # elements :status, {:as => :statuses, :class => Class.new do
96
+ # include SAXMachine
97
+ # element :text
98
+ # element :source
99
+ # end}
100
+ # end
101
+ #
102
+ # benchmark do |t|
103
+ # t.report("happy mapper") do
104
+ # benchmark_iterations.times {
105
+ # Status.parse(xml)
106
+ # }
107
+ # end
108
+ #
109
+ # t.report("sax-machine") do
110
+ # benchmark_iterations.times {
111
+ # Statuses.parse(xml)
112
+ # }
113
+ # end
114
+ # end
115
+
116
+ # xml = File.read("spec/benchmarks/amazon.xml")
117
+ # class HItem
118
+ # include HappyMapper
119
+ #
120
+ # tag 'Item' # if you put class in module you need tag
121
+ # element :asin, String, :tag => 'ASIN'
122
+ # element :detail_page_url, String, :tag => 'DetailPageURL'
123
+ # element :manufacturer, String, :tag => 'Manufacturer', :deep => true
124
+ # end
125
+ # class HItems
126
+ # include HappyMapper
127
+ #
128
+ # tag 'Items' # if you put class in module you need tag
129
+ # # element :total_results, Integer, :tag => 'TotalResults'
130
+ # # element :total_pages, Integer, :tag => 'TotalPages'
131
+ # has_many :items, Item
132
+ # end
133
+ #
134
+ # class Item
135
+ # include SAXMachine
136
+ #
137
+ # element :ASIN, :as => :asin
138
+ # element :DetailPageUrl, :as => :detail_page_url
139
+ # element :Manufacturer, :as => :manufacturer
140
+ # end
141
+ # class Items
142
+ # include SAXMachine
143
+ # elements :Item, :as => :items
144
+ # end
145
+ #
146
+ # benchmark do |t|
147
+ # t.report("sax-machine") do
148
+ # benchmark_iterations.times {
149
+ # Items.new.parse(xml)
150
+ # }
151
+ # end
152
+ #
153
+ # t.report("happymapper") do
154
+ # benchmark_iterations.times {
155
+ # HItems.parse(xml)
156
+ # }
157
+ # end
158
+ # end