sax-machine 0.1.0 → 0.2.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +8 -2
- data/Guardfile +5 -0
- data/HISTORY.md +13 -0
- data/{README.textile → README.md} +12 -15
- data/Rakefile +6 -21
- data/lib/sax-machine.rb +1 -5
- data/lib/sax-machine/sax_handler.rb +116 -32
- data/lib/sax-machine/version.rb +3 -0
- data/sax-machine.gemspec +23 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/configure_sax_machine_spec.rb +1 -1
- data/spec/sax-machine/include_sax_machine_spec.rb +1 -1
- data/spec/sax-machine/sax_document_spec.rb +61 -2
- data/spec/spec_helper.rb +12 -8
- metadata +78 -58
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :development, :test do
|
6
|
+
gem 'rake'
|
7
|
+
gem 'guard-rspec'
|
8
|
+
gem 'growl', :require => false
|
9
|
+
gem 'simplecov', :require => false, :platforms => :mri_19
|
10
|
+
end
|
data/Guardfile
ADDED
data/HISTORY.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# 0.2.0.rc1
|
2
|
+
* Tried to reduce the number of instances of respond_to? in the code by
|
3
|
+
pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
4
|
+
* The parse stack is now composed of simple objects instead of it being
|
5
|
+
an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
6
|
+
* Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
7
|
+
* Cleaned up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
8
|
+
* Encapsulated stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
9
|
+
* #cdata_block is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
10
|
+
|
11
|
+
# 0.1.0
|
12
|
+
* rename parent to ancestor
|
13
|
+
* added SAXMachine.configure
|
@@ -1,16 +1,13 @@
|
|
1
|
-
|
1
|
+
# SAX Machine [![Build Status](https://secure.travis-ci.org/pauldix/sax-machine.png?branch=master)](http://travis-ci.org/pauldix/sax-machine)
|
2
2
|
|
3
|
-
|
3
|
+
[Wiki](https://github.com/pauldix/sax-machine/wiki)
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
h2. Description
|
5
|
+
## Description
|
8
6
|
|
9
7
|
A declarative SAX parsing library backed by Nokogiri
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
<pre>
|
9
|
+
## Usage
|
10
|
+
```ruby
|
14
11
|
require 'sax-machine'
|
15
12
|
|
16
13
|
# Class for information associated with content parts in a feed.
|
@@ -69,26 +66,26 @@ end
|
|
69
66
|
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
70
67
|
response.messages.first # => "hi"
|
71
68
|
response.messages.last # => "world"
|
72
|
-
</pre>
|
73
69
|
|
74
70
|
# To limit conflicts in the class used for mappping, you can use the alternate SAXMachine.configure syntax
|
75
71
|
|
76
72
|
class X < ActiveRecord::Base
|
77
|
-
|
78
73
|
# this way no element, elements or ancestor method will be added to X
|
79
74
|
SAXMachine.configure(X) do |c|
|
80
75
|
c.element :title
|
81
76
|
end
|
82
|
-
|
83
77
|
end
|
78
|
+
```
|
84
79
|
|
85
|
-
|
80
|
+
## LICENSE
|
86
81
|
|
87
|
-
|
82
|
+
The MIT License
|
88
83
|
|
89
|
-
Copyright (c) 2009
|
84
|
+
Copyright (c) 2009-2012:
|
90
85
|
|
91
|
-
|
86
|
+
* [Paul Dix](http://www.pauldix.net)
|
87
|
+
* [Julien Kirch](http://www.archiloque.net)
|
88
|
+
* [Ezekiel Templin](http://zeke.templ.in)
|
92
89
|
|
93
90
|
Permission is hereby granted, free of charge, to any person obtaining
|
94
91
|
a copy of this software and associated documentation files (the
|
data/Rakefile
CHANGED
@@ -1,21 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
RSpec::Core::RakeTask.new do |t|
|
8
|
-
t.rspec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
9
|
-
end
|
10
|
-
|
11
|
-
task :default => [:spec]
|
12
|
-
|
13
|
-
task :test do
|
14
|
-
sh 'rspec spec'
|
15
|
-
end
|
16
|
-
|
17
|
-
task :install do
|
18
|
-
rm_rf "*.gem"
|
19
|
-
puts `gem build sax-machine.gemspec`
|
20
|
-
puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
|
21
|
-
end
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
task :test => :spec
|
6
|
+
task :default => :test
|
data/lib/sax-machine.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
|
-
require "
|
2
|
-
|
3
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
4
|
-
|
1
|
+
require "sax-machine/version"
|
5
2
|
require "sax-machine/sax_document"
|
6
3
|
require "sax-machine/sax_configure"
|
7
4
|
require "sax-machine/sax_handler"
|
8
5
|
require "sax-machine/sax_config"
|
9
6
|
|
10
7
|
module SAXMachine
|
11
|
-
VERSION = "0.0.16"
|
12
8
|
end
|
@@ -1,107 +1,191 @@
|
|
1
1
|
require "nokogiri"
|
2
|
+
require "time"
|
2
3
|
|
3
4
|
module SAXMachine
|
4
5
|
class SAXHandler < Nokogiri::XML::SAX::Document
|
5
|
-
|
6
|
+
NO_BUFFER = :no_buffer
|
7
|
+
|
8
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
9
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
10
|
+
self.object = object
|
11
|
+
self.config = config
|
12
|
+
self.buffer = buffer
|
13
|
+
end
|
14
|
+
end
|
6
15
|
|
7
16
|
def initialize(object, on_error = nil, on_warning = nil)
|
8
|
-
@stack = [
|
17
|
+
@stack = [ StackNode.new(object) ]
|
9
18
|
@parsed_configs = {}
|
10
19
|
@on_error = on_error
|
11
20
|
@on_warning = on_warning
|
12
21
|
end
|
13
22
|
|
14
|
-
def characters(
|
15
|
-
|
16
|
-
value << string
|
17
|
-
end
|
23
|
+
def characters(data)
|
24
|
+
node = stack.last
|
18
25
|
|
19
|
-
|
20
|
-
|
26
|
+
if node.buffer == NO_BUFFER
|
27
|
+
node.buffer = data.dup
|
28
|
+
else
|
29
|
+
node.buffer << data
|
30
|
+
end
|
21
31
|
end
|
32
|
+
alias cdata_block characters
|
22
33
|
|
23
34
|
def start_element(name, attrs = [])
|
24
35
|
attrs.flatten!
|
25
|
-
|
26
|
-
|
36
|
+
|
37
|
+
name = normalize_name(name)
|
38
|
+
node = stack.last
|
39
|
+
object = node.object
|
40
|
+
|
41
|
+
sax_config = sax_config_for(object)
|
27
42
|
|
28
43
|
if sax_config
|
29
44
|
if collection_config = sax_config.collection_config(name, attrs)
|
30
|
-
|
31
|
-
|
45
|
+
object = collection_config.data_class.new
|
46
|
+
sax_config = sax_config_for(object)
|
32
47
|
|
33
|
-
|
34
|
-
|
35
|
-
|
48
|
+
stack.push(StackNode.new(object, collection_config))
|
49
|
+
|
50
|
+
set_attributes_on(object, attrs)
|
36
51
|
end
|
52
|
+
|
37
53
|
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
38
54
|
unless parsed_config?(object, ec)
|
39
55
|
object.send(ec.setter, ec.value_from_attrs(attrs))
|
40
56
|
mark_as_parsed(object, ec)
|
41
57
|
end
|
42
58
|
end
|
43
|
-
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
44
|
-
new_object = element_config.data_class ? element_config.data_class.new : object
|
45
|
-
stack.push [new_object, element_config, String.new]
|
46
59
|
|
47
|
-
|
48
|
-
|
49
|
-
|
60
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
61
|
+
new_object =
|
62
|
+
case element_config.data_class.to_s
|
63
|
+
when 'Integer' then 0
|
64
|
+
when 'Float' then 0.0
|
65
|
+
when 'Time' then Time.at(0)
|
66
|
+
when '' then object
|
67
|
+
else
|
68
|
+
element_config.data_class.new
|
69
|
+
end
|
70
|
+
|
71
|
+
stack.push(StackNode.new(new_object, element_config))
|
72
|
+
|
73
|
+
set_attributes_on(new_object, attrs)
|
50
74
|
end
|
51
75
|
end
|
52
76
|
end
|
53
77
|
|
54
78
|
def end_element(name)
|
55
|
-
|
56
|
-
|
79
|
+
name = normalize_name(name)
|
80
|
+
|
81
|
+
start_tag = stack[-2]
|
82
|
+
close_tag = stack[-1]
|
83
|
+
|
84
|
+
return unless start_tag && close_tag
|
85
|
+
|
86
|
+
object = start_tag.object
|
87
|
+
element = close_tag.object
|
88
|
+
config = close_tag.config
|
89
|
+
value = close_tag.buffer
|
90
|
+
|
91
|
+
return unless config.name == name
|
57
92
|
|
58
93
|
unless parsed_config?(object, config)
|
59
|
-
if (element_value_config =
|
94
|
+
if (element_value_config = element_values_for(config))
|
60
95
|
element_value_config.each { |evc| element.send(evc.setter, value) }
|
61
96
|
end
|
62
97
|
|
63
98
|
if config.respond_to?(:accessor)
|
64
|
-
subconfig = element
|
99
|
+
subconfig = sax_config_for(element)
|
100
|
+
|
65
101
|
if econf = subconfig.element_config_for_tag(name, [])
|
66
102
|
element.send(econf.setter, value) unless econf.value_configured?
|
67
103
|
end
|
104
|
+
|
68
105
|
object.send(config.accessor) << element
|
69
106
|
else
|
70
|
-
value =
|
71
|
-
|
107
|
+
value =
|
108
|
+
case config.data_class.to_s
|
109
|
+
when 'String' then value.to_s
|
110
|
+
when 'Integer' then value.to_i
|
111
|
+
when 'Float' then value.to_f
|
112
|
+
# Assumes that time elements will be string-based and are not
|
113
|
+
# something else, e.g. seconds since epoch
|
114
|
+
when 'Time' then Time.parse(value.to_s)
|
115
|
+
when '' then value
|
116
|
+
else
|
117
|
+
element
|
118
|
+
end
|
119
|
+
|
120
|
+
object.send(config.setter, value) unless value == NO_BUFFER
|
121
|
+
|
72
122
|
mark_as_parsed(object, config)
|
73
123
|
end
|
74
124
|
|
75
125
|
# try to set the ancestor
|
76
|
-
sax_config = element
|
77
|
-
if sax_config
|
126
|
+
if (sax_config = sax_config_for(element))
|
78
127
|
sax_config.ancestors.each do |ancestor|
|
79
128
|
element.send(ancestor.setter, object)
|
80
129
|
end
|
81
130
|
end
|
82
131
|
end
|
132
|
+
|
83
133
|
stack.pop
|
84
134
|
end
|
85
135
|
|
136
|
+
private
|
137
|
+
|
86
138
|
def mark_as_parsed(object, element_config)
|
87
|
-
|
139
|
+
unless element_config.collection?
|
140
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
141
|
+
end
|
88
142
|
end
|
89
143
|
|
90
144
|
def parsed_config?(object, element_config)
|
91
145
|
@parsed_configs[[object.object_id, element_config.object_id]]
|
92
146
|
end
|
93
147
|
|
94
|
-
def warning
|
148
|
+
def warning(string)
|
95
149
|
if @on_warning
|
96
150
|
@on_warning.call(string)
|
97
151
|
end
|
98
152
|
end
|
99
153
|
|
100
|
-
def error
|
154
|
+
def error(string)
|
101
155
|
if @on_error
|
102
156
|
@on_error.call(string)
|
103
157
|
end
|
104
158
|
end
|
105
159
|
|
160
|
+
|
161
|
+
def sax_config_for(object)
|
162
|
+
if object.class.respond_to?(:sax_config)
|
163
|
+
object.class.sax_config
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def element_values_for(config)
|
168
|
+
if config.data_class.respond_to?(:sax_config)
|
169
|
+
config.data_class.sax_config.element_values_for_element
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def normalize_name(name)
|
174
|
+
name.gsub(/\-/, '_')
|
175
|
+
end
|
176
|
+
|
177
|
+
def set_attributes_on(object, attributes)
|
178
|
+
config = sax_config_for(object)
|
179
|
+
|
180
|
+
if config
|
181
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
182
|
+
object.send(ac.setter, ac.value_from_attrs(attributes))
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def stack
|
188
|
+
@stack
|
189
|
+
end
|
106
190
|
end
|
107
191
|
end
|
data/sax-machine.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/sax-machine/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'sax-machine'
|
6
|
+
s.version = SAXMachine::VERSION
|
7
|
+
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
|
9
|
+
s.date = Date.today
|
10
|
+
s.email = %q{paul@pauldix.net}
|
11
|
+
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
12
|
+
|
13
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri}
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
|
19
|
+
s.platform = Gem::Platform::RUBY
|
20
|
+
|
21
|
+
s.add_dependency 'nokogiri', "~> 1.5.2"
|
22
|
+
s.add_development_dependency "rspec", "~> 2.10.0"
|
23
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
|
3
|
+
<OperationRequest>
|
4
|
+
<HTTPHeaders>
|
5
|
+
<Header Name="UserAgent">
|
6
|
+
</Header>
|
7
|
+
</HTTPHeaders>
|
8
|
+
<RequestId>16WRJBVEM155Q026KCV1</RequestId>
|
9
|
+
<Arguments>
|
10
|
+
<Argument Name="SearchIndex" Value="Books"></Argument>
|
11
|
+
<Argument Name="Service" Value="AWSECommerceService"></Argument>
|
12
|
+
<Argument Name="Title" Value="Ruby on Rails"></Argument>
|
13
|
+
<Argument Name="Operation" Value="ItemSearch"></Argument>
|
14
|
+
<Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
|
15
|
+
</Arguments>
|
16
|
+
<RequestProcessingTime>0.064924955368042</RequestProcessingTime>
|
17
|
+
</OperationRequest>
|
18
|
+
<Items>
|
19
|
+
<Request>
|
20
|
+
<IsValid>True</IsValid>
|
21
|
+
<ItemSearchRequest>
|
22
|
+
<SearchIndex>Books</SearchIndex>
|
23
|
+
<Title>Ruby on Rails</Title>
|
24
|
+
</ItemSearchRequest>
|
25
|
+
</Request>
|
26
|
+
<TotalResults>22</TotalResults>
|
27
|
+
<TotalPages>3</TotalPages>
|
28
|
+
<Item>
|
29
|
+
<ASIN>0321480791</ASIN>
|
30
|
+
<DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
|
31
|
+
<ItemAttributes>
|
32
|
+
<Author>Michael Hartl</Author>
|
33
|
+
<Author>Aurelius Prochazka</Author>
|
34
|
+
<Manufacturer>Addison-Wesley Professional</Manufacturer>
|
35
|
+
<ProductGroup>Book</ProductGroup>
|
36
|
+
<Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
|
37
|
+
</ItemAttributes>
|
38
|
+
</Item>
|
39
|
+
</Items>
|
40
|
+
</ItemSearchResponse>
|