sporkmonger-sax-machine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README.md +56 -0
- data/Rakefile +53 -0
- data/lib/sax-machine.rb +17 -0
- data/lib/sax-machine/ns_stack.rb +41 -0
- data/lib/sax-machine/sax_collection_config.rb +56 -0
- data/lib/sax-machine/sax_config.rb +57 -0
- data/lib/sax-machine/sax_document.rb +107 -0
- data/lib/sax-machine/sax_element_config.rb +80 -0
- data/lib/sax-machine/sax_event_recorder.rb +35 -0
- data/lib/sax-machine/sax_handler.rb +118 -0
- data/lib/sax-machine/version.rb +33 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/sax_document_spec.rb +667 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +7 -0
- data/tasks/clobber.rake +2 -0
- data/tasks/gem.rake +69 -0
- data/tasks/git.rake +40 -0
- data/tasks/metrics.rake +22 -0
- data/tasks/rdoc.rake +26 -0
- data/tasks/spec.rake +70 -0
- data/tasks/yard.rake +26 -0
- metadata +175 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
SAX Machine, Copyright (c) 2009 Paul Dix
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# SAX Machine
|
2
|
+
|
3
|
+
A declarative sax parsing library backed by Nokogiri.
|
4
|
+
|
5
|
+
# Example Usage
|
6
|
+
|
7
|
+
require 'sax-machine'
|
8
|
+
|
9
|
+
# Class for parsing an atom entry out of a feedburner atom feed
|
10
|
+
class AtomEntry
|
11
|
+
include SAXMachine
|
12
|
+
element :title
|
13
|
+
# the :as argument makes this available through atom_entry.author instead of .name
|
14
|
+
element :name, :as => :author
|
15
|
+
element "feedburner:origLink", :as => :url
|
16
|
+
element :summary
|
17
|
+
element :content
|
18
|
+
element :published
|
19
|
+
end
|
20
|
+
|
21
|
+
# Class for parsing Atom feeds
|
22
|
+
class Atom
|
23
|
+
include SAXMachine
|
24
|
+
element :title
|
25
|
+
# the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
|
26
|
+
# the :value argument means that instead of setting the value to the text between the tag,
|
27
|
+
# it sets it to the attribute value of :href
|
28
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
29
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
30
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
31
|
+
end
|
32
|
+
|
33
|
+
# you can then parse like this
|
34
|
+
feed = Atom.parse(xml_text)
|
35
|
+
# then you're ready to rock
|
36
|
+
feed.title # => whatever the title of the blog is
|
37
|
+
feed.url # => the main url of the blog
|
38
|
+
feed.feed_url # => goes to the feedburner feed
|
39
|
+
|
40
|
+
feed.entries.first.title # => title of the first entry
|
41
|
+
feed.entries.first.author # => the author of the first entry
|
42
|
+
feed.entries.first.url # => the permalink on the blog for this entry
|
43
|
+
# etc ...
|
44
|
+
|
45
|
+
# you can also use the elements method without specifying a class like so
|
46
|
+
class SomeServiceResponse
|
47
|
+
elements :message, :as => :messages
|
48
|
+
end
|
49
|
+
|
50
|
+
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
51
|
+
response.messages.first # => "hi"
|
52
|
+
response.messages.last # => "world"
|
53
|
+
|
54
|
+
# Install
|
55
|
+
|
56
|
+
* sudo gem install sporkmonger-sax-machine
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
lib_dir = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
|
2
|
+
$:.unshift(lib_dir)
|
3
|
+
$:.uniq!
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rake'
|
7
|
+
require 'rake/testtask'
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
require 'rake/packagetask'
|
10
|
+
require 'rake/gempackagetask'
|
11
|
+
|
12
|
+
begin
|
13
|
+
require 'spec/rake/spectask'
|
14
|
+
rescue LoadError
|
15
|
+
STDERR.puts 'Please install rspec:'
|
16
|
+
STDERR.puts 'sudo gem install rspec'
|
17
|
+
exit(1)
|
18
|
+
end
|
19
|
+
|
20
|
+
require File.join(File.dirname(__FILE__), 'lib/sax-machine', 'version')
|
21
|
+
|
22
|
+
PKG_DISPLAY_NAME = 'SAX Machine'
|
23
|
+
PKG_NAME = 'sporkmonger-sax-machine'
|
24
|
+
PKG_VERSION = SAXMachine::VERSION::STRING
|
25
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
26
|
+
|
27
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
28
|
+
|
29
|
+
PKG_AUTHOR = 'Paul Dix'
|
30
|
+
PKG_AUTHOR_EMAIL = 'paul@pauldix.net'
|
31
|
+
PKG_HOMEPAGE = 'https://github.com/sporkmonger/sax-machine'
|
32
|
+
PKG_SUMMARY = 'A declarative sax parsing library backed by Nokogiri.'
|
33
|
+
PKG_DESCRIPTION = <<-TEXT
|
34
|
+
A declarative sax parsing library backed by Nokogiri.
|
35
|
+
TEXT
|
36
|
+
|
37
|
+
PKG_FILES = FileList[
|
38
|
+
'lib/**/*', 'spec/**/*', 'vendor/**/*',
|
39
|
+
'tasks/**/*', 'website/**/*',
|
40
|
+
'[A-Z]*', 'Rakefile'
|
41
|
+
].exclude(/database\.yml/).exclude(/[_\.]git$/)
|
42
|
+
|
43
|
+
RCOV_ENABLED = (RUBY_PLATFORM != 'java' && RUBY_VERSION =~ /^1\.8/)
|
44
|
+
if RCOV_ENABLED
|
45
|
+
task :default => 'spec:verify'
|
46
|
+
else
|
47
|
+
task :default => 'spec'
|
48
|
+
end
|
49
|
+
|
50
|
+
WINDOWS = (RUBY_PLATFORM =~ /mswin|win32|mingw|bccwin|cygwin/) rescue false
|
51
|
+
SUDO = WINDOWS ? '' : ('sudo' unless ENV['SUDOLESS'])
|
52
|
+
|
53
|
+
Dir['tasks/**/*.rake'].each { |rake| load rake }
|
data/lib/sax-machine.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
end
|
4
|
+
|
5
|
+
require 'sax-machine/version'
|
6
|
+
|
7
|
+
require 'cgi'
|
8
|
+
|
9
|
+
require 'sax-machine/sax_document'
|
10
|
+
require 'sax-machine/sax_handler'
|
11
|
+
require 'sax-machine/sax_config'
|
12
|
+
require 'sax-machine/sax_event_recorder'
|
13
|
+
|
14
|
+
module SAXMachine
|
15
|
+
EMPTY_STRING = ''
|
16
|
+
COLON = ':'
|
17
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class NSStack < Hash
|
3
|
+
XMLNS = 'xmlns'
|
4
|
+
|
5
|
+
def initialize(parent=nil, attrs=nil)
|
6
|
+
# Initialize
|
7
|
+
super()
|
8
|
+
@parent = parent
|
9
|
+
|
10
|
+
return self unless attrs
|
11
|
+
# Parse attributes
|
12
|
+
attrs.each do |attr|
|
13
|
+
if attr.kind_of?(Array)
|
14
|
+
k, v = attr
|
15
|
+
case k
|
16
|
+
when XMLNS then self[EMPTY_STRING] = v
|
17
|
+
when /^xmlns:(.+)/ then self[$1] = v
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Lookup
|
24
|
+
def [](name)
|
25
|
+
if (ns = super(name.to_s))
|
26
|
+
# I've got it
|
27
|
+
ns
|
28
|
+
elsif @parent
|
29
|
+
# Parent may have it
|
30
|
+
@parent[name]
|
31
|
+
else
|
32
|
+
# Undefined, empty namespace
|
33
|
+
EMPTY_STRING
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def pop
|
38
|
+
@parent
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class CollectionConfig
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :default_xmlns
|
7
|
+
|
8
|
+
def initialize(name, options)
|
9
|
+
@name = name.to_s
|
10
|
+
@class = options[:class]
|
11
|
+
@as = options[:as].to_s
|
12
|
+
@xmlns = case options[:xmlns]
|
13
|
+
when Array then options[:xmlns]
|
14
|
+
when String then [options[:xmlns]]
|
15
|
+
else nil
|
16
|
+
end
|
17
|
+
@default_xmlns = options[:default_xmlns]
|
18
|
+
if @default_xmlns && @xmlns && !@xmlns.include?(EMPTY_STRING)
|
19
|
+
@xmlns << EMPTY_STRING
|
20
|
+
end
|
21
|
+
@record_events = options[:events]
|
22
|
+
end
|
23
|
+
|
24
|
+
def handler(nsstack)
|
25
|
+
if @default_xmlns && (nsstack.nil? || nsstack[EMPTY_STRING] == EMPTY_STRING)
|
26
|
+
nsstack = NSStack.new(nsstack, nsstack)
|
27
|
+
nsstack[EMPTY_STRING] = @default_xmlns
|
28
|
+
end
|
29
|
+
unless @record_events
|
30
|
+
SAXHandler.new(@class.new, nsstack)
|
31
|
+
else
|
32
|
+
SAXEventRecorder.new(nsstack)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def accessor
|
37
|
+
as
|
38
|
+
end
|
39
|
+
|
40
|
+
def xmlns_match?(ns)
|
41
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
|
46
|
+
def as
|
47
|
+
@as
|
48
|
+
end
|
49
|
+
|
50
|
+
def class
|
51
|
+
@class || @name
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'sax-machine/sax_element_config'
|
2
|
+
require 'sax-machine/sax_collection_config'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXConfig
|
6
|
+
attr_reader :top_level_elements, :collection_elements
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@top_level_elements = {}
|
10
|
+
@collection_elements = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_top_level_element(name, options)
|
14
|
+
@top_level_elements[name.to_s] ||= []
|
15
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_collection_element(name, options)
|
19
|
+
@collection_elements[name.to_s] ||= []
|
20
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def collection_config(name, nsstack)
|
24
|
+
prefix, name = name.split(COLON, 2)
|
25
|
+
prefix, name = nil, prefix unless name # No prefix
|
26
|
+
namespace = nsstack[prefix]
|
27
|
+
|
28
|
+
return nil unless (a = @collection_elements[name.to_s])
|
29
|
+
a.detect { |ce|
|
30
|
+
ce.name.to_s == name.to_s &&
|
31
|
+
ce.xmlns_match?(namespace)
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def element_configs_for_attribute(name, attrs)
|
36
|
+
name = name.split(COLON, 2).last
|
37
|
+
|
38
|
+
return [] unless (a = @top_level_elements[name.to_s])
|
39
|
+
a.select do |element_config|
|
40
|
+
element_config.has_value_and_attrs_match?(attrs)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def element_config_for_tag(name, attrs, nsstack)
|
45
|
+
prefix, name = name.split(COLON, 2)
|
46
|
+
prefix, name = nil, prefix unless name # No prefix
|
47
|
+
namespace = nsstack[prefix]
|
48
|
+
|
49
|
+
return nil unless (a = @top_level_elements[name.to_s])
|
50
|
+
a.detect do |element_config|
|
51
|
+
element_config.xmlns_match?(namespace) &&
|
52
|
+
element_config.attrs_match?(attrs)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.extend ClassMethods
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse(xml_text)
|
10
|
+
unless @parser
|
11
|
+
sax_handler = SAXHandler.new(self)
|
12
|
+
@parser = Nokogiri::XML::SAX::PushParser.new(sax_handler)
|
13
|
+
@parser.options |= Nokogiri::XML::ParseOptions::RECOVER if @parser.respond_to?(:options)
|
14
|
+
end
|
15
|
+
@parser << xml_text
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_finish
|
20
|
+
if @parser
|
21
|
+
@parser.finish
|
22
|
+
end
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
|
28
|
+
def parse(xml_text)
|
29
|
+
# It might be cleaner to aditionally call parse_finish here, but
|
30
|
+
# then Nokogiri/libxml2 barfs on incomplete documents. Desired
|
31
|
+
# behaviour?
|
32
|
+
new.parse(xml_text)
|
33
|
+
end
|
34
|
+
|
35
|
+
def element(name, options = {})
|
36
|
+
options[:as] ||= name
|
37
|
+
sax_config.add_top_level_element(name, options)
|
38
|
+
|
39
|
+
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
40
|
+
# this is how we allow custom parsing behavior. So you could define the setter
|
41
|
+
# and have it parse the string into a date or whatever.
|
42
|
+
attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
|
43
|
+
attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
|
44
|
+
end
|
45
|
+
|
46
|
+
def columns
|
47
|
+
r = []
|
48
|
+
sax_config.top_level_elements.each do |name, ecs|
|
49
|
+
r += ecs
|
50
|
+
end
|
51
|
+
r
|
52
|
+
end
|
53
|
+
|
54
|
+
def column(sym)
|
55
|
+
(sax_config.top_level_elements[sym.to_s] || []).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def data_class(sym)
|
59
|
+
column(sym).data_class
|
60
|
+
end
|
61
|
+
|
62
|
+
def required?(sym)
|
63
|
+
column(sym).required?
|
64
|
+
end
|
65
|
+
|
66
|
+
def column_names
|
67
|
+
columns.map{|e| e.column}
|
68
|
+
end
|
69
|
+
|
70
|
+
def elements(name, options = {})
|
71
|
+
options[:as] ||= name
|
72
|
+
if options[:class] || options[:events]
|
73
|
+
sax_config.add_collection_element(name, options)
|
74
|
+
else
|
75
|
+
class_eval <<-SRC
|
76
|
+
def add_#{options[:as]}(value)
|
77
|
+
#{options[:as]} << value
|
78
|
+
end
|
79
|
+
SRC
|
80
|
+
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
81
|
+
end
|
82
|
+
|
83
|
+
if !instance_methods.include?(options[:as].to_s)
|
84
|
+
class_eval <<-SRC
|
85
|
+
def #{options[:as]}
|
86
|
+
@#{options[:as]} ||= []
|
87
|
+
end
|
88
|
+
SRC
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
|
92
|
+
end
|
93
|
+
|
94
|
+
def sax_config
|
95
|
+
@sax_config ||= SAXConfig.new
|
96
|
+
end
|
97
|
+
|
98
|
+
def attr_writer_once(attr)
|
99
|
+
class_eval <<-SRC
|
100
|
+
def #{attr}=(val)
|
101
|
+
@#{attr} ||= val
|
102
|
+
end
|
103
|
+
SRC
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
class ElementConfig
|
4
|
+
attr_reader :name, :setter, :data_class
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
|
9
|
+
if options.has_key?(:with)
|
10
|
+
# for faster comparisons later
|
11
|
+
@with = options[:with].to_a.flatten.collect {|o| o.to_s}
|
12
|
+
else
|
13
|
+
@with = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
if options.has_key?(:value)
|
17
|
+
@value = options[:value].to_s
|
18
|
+
else
|
19
|
+
@value = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
@as = options[:as]
|
23
|
+
@collection = options[:collection]
|
24
|
+
|
25
|
+
if @collection
|
26
|
+
@setter = "add_#{options[:as]}"
|
27
|
+
else
|
28
|
+
@setter = "#{@as}="
|
29
|
+
end
|
30
|
+
@data_class = options[:class]
|
31
|
+
@required = options[:required]
|
32
|
+
|
33
|
+
@xmlns = case options[:xmlns]
|
34
|
+
when Array then options[:xmlns]
|
35
|
+
when String then [options[:xmlns]]
|
36
|
+
else nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def column
|
41
|
+
@as || @name.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def required?
|
45
|
+
@required
|
46
|
+
end
|
47
|
+
|
48
|
+
def value_from_attrs(attrs)
|
49
|
+
pair = attrs.detect { |k, v| k == @value }
|
50
|
+
pair ? pair.last : nil
|
51
|
+
end
|
52
|
+
|
53
|
+
def attrs_match?(attrs)
|
54
|
+
if @with
|
55
|
+
if attrs.nil?
|
56
|
+
# If no attributes, match only if the :with clause is empty.
|
57
|
+
@with.empty?
|
58
|
+
else
|
59
|
+
# Attributes must match :with clause.
|
60
|
+
attrs.include?(@with)
|
61
|
+
end
|
62
|
+
else
|
63
|
+
true
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def has_value_and_attrs_match?(attrs)
|
68
|
+
!@value.nil? && attrs_match?(attrs)
|
69
|
+
end
|
70
|
+
|
71
|
+
def xmlns_match?(ns)
|
72
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
73
|
+
end
|
74
|
+
|
75
|
+
def collection?
|
76
|
+
@collection
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|