sax-machine 0.1.0 → 0.2.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +8 -2
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/{README.textile → README.md} +12 -15
- data/Rakefile +6 -21
- data/lib/sax-machine.rb +1 -5
- data/lib/sax-machine/sax_handler.rb +116 -32
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +23 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +1 -1
- data/spec/sax-machine/include_sax_machine_spec.rb +1 -1
- data/spec/sax-machine/sax_document_spec.rb +61 -2
- data/spec/spec_helper.rb +12 -8
- metadata +78 -58
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :development, :test do
|
6
|
+
gem 'rake'
|
7
|
+
gem 'guard-rspec'
|
8
|
+
gem 'growl', :require => false
|
9
|
+
gem 'simplecov', :require => false, :platforms => :mri_19
|
10
|
+
end
|
data/Guardfile
ADDED
data/HISTORY.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# 0.2.0.rc1
|
2
|
+
* Tried to reduce the number of instances of respond_to? in the code by
|
3
|
+
pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
4
|
+
* The parse stack is now composed of simple objects instead of it being
|
5
|
+
an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
6
|
+
* Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
7
|
+
* Cleaned up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
8
|
+
* Encapsulated stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
9
|
+
* #cdata_block is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
10
|
+
|
11
|
+
# 0.1.0
|
12
|
+
* rename parent to ancestor
|
13
|
+
* added SAXMachine.configure
|
@@ -1,16 +1,13 @@
|
|
1
|
-
|
1
|
+
# SAX Machine [](http://travis-ci.org/pauldix/sax-machine)
|
2
2
|
|
3
|
-
|
3
|
+
[Wiki](https://github.com/pauldix/sax-machine/wiki)
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
h2. Description
|
5
|
+
## Description
|
8
6
|
|
9
7
|
A declarative SAX parsing library backed by Nokogiri
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
<pre>
|
9
|
+
## Usage
|
10
|
+
```ruby
|
14
11
|
require 'sax-machine'
|
15
12
|
|
16
13
|
# Class for information associated with content parts in a feed.
|
@@ -69,26 +66,26 @@ end
|
|
69
66
|
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
70
67
|
response.messages.first # => "hi"
|
71
68
|
response.messages.last # => "world"
|
72
|
-
</pre>
|
73
69
|
|
74
70
|
# To limit conflicts in the class used for mappping, you can use the alternate SAXMachine.configure syntax
|
75
71
|
|
76
72
|
class X < ActiveRecord::Base
|
77
|
-
|
78
73
|
# this way no element, elements or ancestor method will be added to X
|
79
74
|
SAXMachine.configure(X) do |c|
|
80
75
|
c.element :title
|
81
76
|
end
|
82
|
-
|
83
77
|
end
|
78
|
+
```
|
84
79
|
|
85
|
-
|
80
|
+
## LICENSE
|
86
81
|
|
87
|
-
|
82
|
+
The MIT License
|
88
83
|
|
89
|
-
Copyright (c) 2009
|
84
|
+
Copyright (c) 2009-2012:
|
90
85
|
|
91
|
-
|
86
|
+
* [Paul Dix](http://www.pauldix.net)
|
87
|
+
* [Julien Kirch](http://www.archiloque.net)
|
88
|
+
* [Ezekiel Templin](http://zeke.templ.in)
|
92
89
|
|
93
90
|
Permission is hereby granted, free of charge, to any person obtaining
|
94
91
|
a copy of this software and associated documentation files (the
|
data/Rakefile
CHANGED
@@ -1,21 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
RSpec::Core::RakeTask.new do |t|
|
8
|
-
t.rspec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
9
|
-
end
|
10
|
-
|
11
|
-
task :default => [:spec]
|
12
|
-
|
13
|
-
task :test do
|
14
|
-
sh 'rspec spec'
|
15
|
-
end
|
16
|
-
|
17
|
-
task :install do
|
18
|
-
rm_rf "*.gem"
|
19
|
-
puts `gem build sax-machine.gemspec`
|
20
|
-
puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
|
21
|
-
end
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
task :test => :spec
|
6
|
+
task :default => :test
|
data/lib/sax-machine.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
|
-
require "
|
2
|
-
|
3
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
4
|
-
|
1
|
+
require "sax-machine/version"
|
5
2
|
require "sax-machine/sax_document"
|
6
3
|
require "sax-machine/sax_configure"
|
7
4
|
require "sax-machine/sax_handler"
|
8
5
|
require "sax-machine/sax_config"
|
9
6
|
|
10
7
|
module SAXMachine
|
11
|
-
VERSION = "0.0.16"
|
12
8
|
end
|
@@ -1,107 +1,191 @@
|
|
1
1
|
require "nokogiri"
|
2
|
+
require "time"
|
2
3
|
|
3
4
|
module SAXMachine
|
4
5
|
class SAXHandler < Nokogiri::XML::SAX::Document
|
5
|
-
|
6
|
+
NO_BUFFER = :no_buffer
|
7
|
+
|
8
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
9
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
10
|
+
self.object = object
|
11
|
+
self.config = config
|
12
|
+
self.buffer = buffer
|
13
|
+
end
|
14
|
+
end
|
6
15
|
|
7
16
|
def initialize(object, on_error = nil, on_warning = nil)
|
8
|
-
@stack = [
|
17
|
+
@stack = [ StackNode.new(object) ]
|
9
18
|
@parsed_configs = {}
|
10
19
|
@on_error = on_error
|
11
20
|
@on_warning = on_warning
|
12
21
|
end
|
13
22
|
|
14
|
-
def characters(
|
15
|
-
|
16
|
-
value << string
|
17
|
-
end
|
23
|
+
def characters(data)
|
24
|
+
node = stack.last
|
18
25
|
|
19
|
-
|
20
|
-
|
26
|
+
if node.buffer == NO_BUFFER
|
27
|
+
node.buffer = data.dup
|
28
|
+
else
|
29
|
+
node.buffer << data
|
30
|
+
end
|
21
31
|
end
|
32
|
+
alias cdata_block characters
|
22
33
|
|
23
34
|
def start_element(name, attrs = [])
|
24
35
|
attrs.flatten!
|
25
|
-
|
26
|
-
|
36
|
+
|
37
|
+
name = normalize_name(name)
|
38
|
+
node = stack.last
|
39
|
+
object = node.object
|
40
|
+
|
41
|
+
sax_config = sax_config_for(object)
|
27
42
|
|
28
43
|
if sax_config
|
29
44
|
if collection_config = sax_config.collection_config(name, attrs)
|
30
|
-
|
31
|
-
|
45
|
+
object = collection_config.data_class.new
|
46
|
+
sax_config = sax_config_for(object)
|
32
47
|
|
33
|
-
|
34
|
-
|
35
|
-
|
48
|
+
stack.push(StackNode.new(object, collection_config))
|
49
|
+
|
50
|
+
set_attributes_on(object, attrs)
|
36
51
|
end
|
52
|
+
|
37
53
|
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
38
54
|
unless parsed_config?(object, ec)
|
39
55
|
object.send(ec.setter, ec.value_from_attrs(attrs))
|
40
56
|
mark_as_parsed(object, ec)
|
41
57
|
end
|
42
58
|
end
|
43
|
-
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
44
|
-
new_object = element_config.data_class ? element_config.data_class.new : object
|
45
|
-
stack.push [new_object, element_config, String.new]
|
46
59
|
|
47
|
-
|
48
|
-
|
49
|
-
|
60
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
61
|
+
new_object =
|
62
|
+
case element_config.data_class.to_s
|
63
|
+
when 'Integer' then 0
|
64
|
+
when 'Float' then 0.0
|
65
|
+
when 'Time' then Time.at(0)
|
66
|
+
when '' then object
|
67
|
+
else
|
68
|
+
element_config.data_class.new
|
69
|
+
end
|
70
|
+
|
71
|
+
stack.push(StackNode.new(new_object, element_config))
|
72
|
+
|
73
|
+
set_attributes_on(new_object, attrs)
|
50
74
|
end
|
51
75
|
end
|
52
76
|
end
|
53
77
|
|
54
78
|
def end_element(name)
|
55
|
-
|
56
|
-
|
79
|
+
name = normalize_name(name)
|
80
|
+
|
81
|
+
start_tag = stack[-2]
|
82
|
+
close_tag = stack[-1]
|
83
|
+
|
84
|
+
return unless start_tag && close_tag
|
85
|
+
|
86
|
+
object = start_tag.object
|
87
|
+
element = close_tag.object
|
88
|
+
config = close_tag.config
|
89
|
+
value = close_tag.buffer
|
90
|
+
|
91
|
+
return unless config.name == name
|
57
92
|
|
58
93
|
unless parsed_config?(object, config)
|
59
|
-
if (element_value_config =
|
94
|
+
if (element_value_config = element_values_for(config))
|
60
95
|
element_value_config.each { |evc| element.send(evc.setter, value) }
|
61
96
|
end
|
62
97
|
|
63
98
|
if config.respond_to?(:accessor)
|
64
|
-
subconfig = element
|
99
|
+
subconfig = sax_config_for(element)
|
100
|
+
|
65
101
|
if econf = subconfig.element_config_for_tag(name, [])
|
66
102
|
element.send(econf.setter, value) unless econf.value_configured?
|
67
103
|
end
|
104
|
+
|
68
105
|
object.send(config.accessor) << element
|
69
106
|
else
|
70
|
-
value =
|
71
|
-
|
107
|
+
value =
|
108
|
+
case config.data_class.to_s
|
109
|
+
when 'String' then value.to_s
|
110
|
+
when 'Integer' then value.to_i
|
111
|
+
when 'Float' then value.to_f
|
112
|
+
# Assumes that time elements will be string-based and are not
|
113
|
+
# something else, e.g. seconds since epoch
|
114
|
+
when 'Time' then Time.parse(value.to_s)
|
115
|
+
when '' then value
|
116
|
+
else
|
117
|
+
element
|
118
|
+
end
|
119
|
+
|
120
|
+
object.send(config.setter, value) unless value == NO_BUFFER
|
121
|
+
|
72
122
|
mark_as_parsed(object, config)
|
73
123
|
end
|
74
124
|
|
75
125
|
# try to set the ancestor
|
76
|
-
sax_config = element
|
77
|
-
if sax_config
|
126
|
+
if (sax_config = sax_config_for(element))
|
78
127
|
sax_config.ancestors.each do |ancestor|
|
79
128
|
element.send(ancestor.setter, object)
|
80
129
|
end
|
81
130
|
end
|
82
131
|
end
|
132
|
+
|
83
133
|
stack.pop
|
84
134
|
end
|
85
135
|
|
136
|
+
private
|
137
|
+
|
86
138
|
def mark_as_parsed(object, element_config)
|
87
|
-
|
139
|
+
unless element_config.collection?
|
140
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
141
|
+
end
|
88
142
|
end
|
89
143
|
|
90
144
|
def parsed_config?(object, element_config)
|
91
145
|
@parsed_configs[[object.object_id, element_config.object_id]]
|
92
146
|
end
|
93
147
|
|
94
|
-
def warning
|
148
|
+
def warning(string)
|
95
149
|
if @on_warning
|
96
150
|
@on_warning.call(string)
|
97
151
|
end
|
98
152
|
end
|
99
153
|
|
100
|
-
def error
|
154
|
+
def error(string)
|
101
155
|
if @on_error
|
102
156
|
@on_error.call(string)
|
103
157
|
end
|
104
158
|
end
|
105
159
|
|
160
|
+
|
161
|
+
def sax_config_for(object)
|
162
|
+
if object.class.respond_to?(:sax_config)
|
163
|
+
object.class.sax_config
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def element_values_for(config)
|
168
|
+
if config.data_class.respond_to?(:sax_config)
|
169
|
+
config.data_class.sax_config.element_values_for_element
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def normalize_name(name)
|
174
|
+
name.gsub(/\-/, '_')
|
175
|
+
end
|
176
|
+
|
177
|
+
def set_attributes_on(object, attributes)
|
178
|
+
config = sax_config_for(object)
|
179
|
+
|
180
|
+
if config
|
181
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
182
|
+
object.send(ac.setter, ac.value_from_attrs(attributes))
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def stack
|
188
|
+
@stack
|
189
|
+
end
|
106
190
|
end
|
107
191
|
end
|
data/sax-machine.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/sax-machine/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'sax-machine'
|
6
|
+
s.version = SAXMachine::VERSION
|
7
|
+
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
|
9
|
+
s.date = Date.today
|
10
|
+
s.email = %q{paul@pauldix.net}
|
11
|
+
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
12
|
+
|
13
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri}
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
|
19
|
+
s.platform = Gem::Platform::RUBY
|
20
|
+
|
21
|
+
s.add_dependency 'nokogiri', "~> 1.5.2"
|
22
|
+
s.add_development_dependency "rspec", "~> 2.10.0"
|
23
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
|
3
|
+
<OperationRequest>
|
4
|
+
<HTTPHeaders>
|
5
|
+
<Header Name="UserAgent">
|
6
|
+
</Header>
|
7
|
+
</HTTPHeaders>
|
8
|
+
<RequestId>16WRJBVEM155Q026KCV1</RequestId>
|
9
|
+
<Arguments>
|
10
|
+
<Argument Name="SearchIndex" Value="Books"></Argument>
|
11
|
+
<Argument Name="Service" Value="AWSECommerceService"></Argument>
|
12
|
+
<Argument Name="Title" Value="Ruby on Rails"></Argument>
|
13
|
+
<Argument Name="Operation" Value="ItemSearch"></Argument>
|
14
|
+
<Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
|
15
|
+
</Arguments>
|
16
|
+
<RequestProcessingTime>0.064924955368042</RequestProcessingTime>
|
17
|
+
</OperationRequest>
|
18
|
+
<Items>
|
19
|
+
<Request>
|
20
|
+
<IsValid>True</IsValid>
|
21
|
+
<ItemSearchRequest>
|
22
|
+
<SearchIndex>Books</SearchIndex>
|
23
|
+
<Title>Ruby on Rails</Title>
|
24
|
+
</ItemSearchRequest>
|
25
|
+
</Request>
|
26
|
+
<TotalResults>22</TotalResults>
|
27
|
+
<TotalPages>3</TotalPages>
|
28
|
+
<Item>
|
29
|
+
<ASIN>0321480791</ASIN>
|
30
|
+
<DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
|
31
|
+
<ItemAttributes>
|
32
|
+
<Author>Michael Hartl</Author>
|
33
|
+
<Author>Aurelius Prochazka</Author>
|
34
|
+
<Manufacturer>Addison-Wesley Professional</Manufacturer>
|
35
|
+
<ProductGroup>Book</ProductGroup>
|
36
|
+
<Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
|
37
|
+
</ItemAttributes>
|
38
|
+
</Item>
|
39
|
+
</Items>
|
40
|
+
</ItemSearchResponse>
|