sporkmonger-sax-machine 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README.md +56 -0
- data/Rakefile +53 -0
- data/lib/sax-machine.rb +17 -0
- data/lib/sax-machine/ns_stack.rb +41 -0
- data/lib/sax-machine/sax_collection_config.rb +56 -0
- data/lib/sax-machine/sax_config.rb +57 -0
- data/lib/sax-machine/sax_document.rb +107 -0
- data/lib/sax-machine/sax_element_config.rb +80 -0
- data/lib/sax-machine/sax_event_recorder.rb +35 -0
- data/lib/sax-machine/sax_handler.rb +118 -0
- data/lib/sax-machine/version.rb +33 -0
- data/spec/benchmarks/amazon.xml +40 -0
- data/spec/benchmarks/benchmark.rb +158 -0
- data/spec/benchmarks/public_timeline.xml +411 -0
- data/spec/sax-machine/atom.xml +165 -0
- data/spec/sax-machine/sax_document_spec.rb +667 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +7 -0
- data/tasks/clobber.rake +2 -0
- data/tasks/gem.rake +69 -0
- data/tasks/git.rake +40 -0
- data/tasks/metrics.rake +22 -0
- data/tasks/rdoc.rake +26 -0
- data/tasks/spec.rake +70 -0
- data/tasks/yard.rake +26 -0
- metadata +175 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
SAX Machine, Copyright (c) 2009 Paul Dix
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# SAX Machine
|
2
|
+
|
3
|
+
A declarative sax parsing library backed by Nokogiri.
|
4
|
+
|
5
|
+
# Example Usage
|
6
|
+
|
7
|
+
require 'sax-machine'
|
8
|
+
|
9
|
+
# Class for parsing an atom entry out of a feedburner atom feed
|
10
|
+
class AtomEntry
|
11
|
+
include SAXMachine
|
12
|
+
element :title
|
13
|
+
# the :as argument makes this available through atom_entry.author instead of .name
|
14
|
+
element :name, :as => :author
|
15
|
+
element "feedburner:origLink", :as => :url
|
16
|
+
element :summary
|
17
|
+
element :content
|
18
|
+
element :published
|
19
|
+
end
|
20
|
+
|
21
|
+
# Class for parsing Atom feeds
|
22
|
+
class Atom
|
23
|
+
include SAXMachine
|
24
|
+
element :title
|
25
|
+
# the :with argument means that you only match a link tag that has an attribute of :type => "text/html"
|
26
|
+
# the :value argument means that instead of setting the value to the text between the tag,
|
27
|
+
# it sets it to the attribute value of :href
|
28
|
+
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
29
|
+
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
30
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
31
|
+
end
|
32
|
+
|
33
|
+
# you can then parse like this
|
34
|
+
feed = Atom.parse(xml_text)
|
35
|
+
# then you're ready to rock
|
36
|
+
feed.title # => whatever the title of the blog is
|
37
|
+
feed.url # => the main url of the blog
|
38
|
+
feed.feed_url # => goes to the feedburner feed
|
39
|
+
|
40
|
+
feed.entries.first.title # => title of the first entry
|
41
|
+
feed.entries.first.author # => the author of the first entry
|
42
|
+
feed.entries.first.url # => the permalink on the blog for this entry
|
43
|
+
# etc ...
|
44
|
+
|
45
|
+
# you can also use the elements method without specifying a class like so
|
46
|
+
class SomeServiceResponse
|
47
|
+
elements :message, :as => :messages
|
48
|
+
end
|
49
|
+
|
50
|
+
response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
|
51
|
+
response.messages.first # => "hi"
|
52
|
+
response.messages.last # => "world"
|
53
|
+
|
54
|
+
# Install
|
55
|
+
|
56
|
+
* sudo gem install sporkmonger-sax-machine
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
lib_dir = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
|
2
|
+
$:.unshift(lib_dir)
|
3
|
+
$:.uniq!
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rake'
|
7
|
+
require 'rake/testtask'
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
require 'rake/packagetask'
|
10
|
+
require 'rake/gempackagetask'
|
11
|
+
|
12
|
+
begin
|
13
|
+
require 'spec/rake/spectask'
|
14
|
+
rescue LoadError
|
15
|
+
STDERR.puts 'Please install rspec:'
|
16
|
+
STDERR.puts 'sudo gem install rspec'
|
17
|
+
exit(1)
|
18
|
+
end
|
19
|
+
|
20
|
+
require File.join(File.dirname(__FILE__), 'lib/sax-machine', 'version')
|
21
|
+
|
22
|
+
PKG_DISPLAY_NAME = 'SAX Machine'
|
23
|
+
PKG_NAME = 'sporkmonger-sax-machine'
|
24
|
+
PKG_VERSION = SAXMachine::VERSION::STRING
|
25
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
26
|
+
|
27
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
28
|
+
|
29
|
+
PKG_AUTHOR = 'Paul Dix'
|
30
|
+
PKG_AUTHOR_EMAIL = 'paul@pauldix.net'
|
31
|
+
PKG_HOMEPAGE = 'https://github.com/sporkmonger/sax-machine'
|
32
|
+
PKG_SUMMARY = 'A declarative sax parsing library backed by Nokogiri.'
|
33
|
+
PKG_DESCRIPTION = <<-TEXT
|
34
|
+
A declarative sax parsing library backed by Nokogiri.
|
35
|
+
TEXT
|
36
|
+
|
37
|
+
PKG_FILES = FileList[
|
38
|
+
'lib/**/*', 'spec/**/*', 'vendor/**/*',
|
39
|
+
'tasks/**/*', 'website/**/*',
|
40
|
+
'[A-Z]*', 'Rakefile'
|
41
|
+
].exclude(/database\.yml/).exclude(/[_\.]git$/)
|
42
|
+
|
43
|
+
RCOV_ENABLED = (RUBY_PLATFORM != 'java' && RUBY_VERSION =~ /^1\.8/)
|
44
|
+
if RCOV_ENABLED
|
45
|
+
task :default => 'spec:verify'
|
46
|
+
else
|
47
|
+
task :default => 'spec'
|
48
|
+
end
|
49
|
+
|
50
|
+
WINDOWS = (RUBY_PLATFORM =~ /mswin|win32|mingw|bccwin|cygwin/) rescue false
|
51
|
+
SUDO = WINDOWS ? '' : ('sudo' unless ENV['SUDOLESS'])
|
52
|
+
|
53
|
+
Dir['tasks/**/*.rake'].each { |rake| load rake }
|
data/lib/sax-machine.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
end
|
4
|
+
|
5
|
+
require 'sax-machine/version'
|
6
|
+
|
7
|
+
require 'cgi'
|
8
|
+
|
9
|
+
require 'sax-machine/sax_document'
|
10
|
+
require 'sax-machine/sax_handler'
|
11
|
+
require 'sax-machine/sax_config'
|
12
|
+
require 'sax-machine/sax_event_recorder'
|
13
|
+
|
14
|
+
module SAXMachine
|
15
|
+
EMPTY_STRING = ''
|
16
|
+
COLON = ':'
|
17
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class NSStack < Hash
|
3
|
+
XMLNS = 'xmlns'
|
4
|
+
|
5
|
+
def initialize(parent=nil, attrs=nil)
|
6
|
+
# Initialize
|
7
|
+
super()
|
8
|
+
@parent = parent
|
9
|
+
|
10
|
+
return self unless attrs
|
11
|
+
# Parse attributes
|
12
|
+
attrs.each do |attr|
|
13
|
+
if attr.kind_of?(Array)
|
14
|
+
k, v = attr
|
15
|
+
case k
|
16
|
+
when XMLNS then self[EMPTY_STRING] = v
|
17
|
+
when /^xmlns:(.+)/ then self[$1] = v
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Lookup
|
24
|
+
def [](name)
|
25
|
+
if (ns = super(name.to_s))
|
26
|
+
# I've got it
|
27
|
+
ns
|
28
|
+
elsif @parent
|
29
|
+
# Parent may have it
|
30
|
+
@parent[name]
|
31
|
+
else
|
32
|
+
# Undefined, empty namespace
|
33
|
+
EMPTY_STRING
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def pop
|
38
|
+
@parent
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
|
4
|
+
class CollectionConfig
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :default_xmlns
|
7
|
+
|
8
|
+
def initialize(name, options)
|
9
|
+
@name = name.to_s
|
10
|
+
@class = options[:class]
|
11
|
+
@as = options[:as].to_s
|
12
|
+
@xmlns = case options[:xmlns]
|
13
|
+
when Array then options[:xmlns]
|
14
|
+
when String then [options[:xmlns]]
|
15
|
+
else nil
|
16
|
+
end
|
17
|
+
@default_xmlns = options[:default_xmlns]
|
18
|
+
if @default_xmlns && @xmlns && !@xmlns.include?(EMPTY_STRING)
|
19
|
+
@xmlns << EMPTY_STRING
|
20
|
+
end
|
21
|
+
@record_events = options[:events]
|
22
|
+
end
|
23
|
+
|
24
|
+
def handler(nsstack)
|
25
|
+
if @default_xmlns && (nsstack.nil? || nsstack[EMPTY_STRING] == EMPTY_STRING)
|
26
|
+
nsstack = NSStack.new(nsstack, nsstack)
|
27
|
+
nsstack[EMPTY_STRING] = @default_xmlns
|
28
|
+
end
|
29
|
+
unless @record_events
|
30
|
+
SAXHandler.new(@class.new, nsstack)
|
31
|
+
else
|
32
|
+
SAXEventRecorder.new(nsstack)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def accessor
|
37
|
+
as
|
38
|
+
end
|
39
|
+
|
40
|
+
def xmlns_match?(ns)
|
41
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
|
46
|
+
def as
|
47
|
+
@as
|
48
|
+
end
|
49
|
+
|
50
|
+
def class
|
51
|
+
@class || @name
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'sax-machine/sax_element_config'
|
2
|
+
require 'sax-machine/sax_collection_config'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXConfig
|
6
|
+
attr_reader :top_level_elements, :collection_elements
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@top_level_elements = {}
|
10
|
+
@collection_elements = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_top_level_element(name, options)
|
14
|
+
@top_level_elements[name.to_s] ||= []
|
15
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_collection_element(name, options)
|
19
|
+
@collection_elements[name.to_s] ||= []
|
20
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def collection_config(name, nsstack)
|
24
|
+
prefix, name = name.split(COLON, 2)
|
25
|
+
prefix, name = nil, prefix unless name # No prefix
|
26
|
+
namespace = nsstack[prefix]
|
27
|
+
|
28
|
+
return nil unless (a = @collection_elements[name.to_s])
|
29
|
+
a.detect { |ce|
|
30
|
+
ce.name.to_s == name.to_s &&
|
31
|
+
ce.xmlns_match?(namespace)
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def element_configs_for_attribute(name, attrs)
|
36
|
+
name = name.split(COLON, 2).last
|
37
|
+
|
38
|
+
return [] unless (a = @top_level_elements[name.to_s])
|
39
|
+
a.select do |element_config|
|
40
|
+
element_config.has_value_and_attrs_match?(attrs)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def element_config_for_tag(name, attrs, nsstack)
|
45
|
+
prefix, name = name.split(COLON, 2)
|
46
|
+
prefix, name = nil, prefix unless name # No prefix
|
47
|
+
namespace = nsstack[prefix]
|
48
|
+
|
49
|
+
return nil unless (a = @top_level_elements[name.to_s])
|
50
|
+
a.detect do |element_config|
|
51
|
+
element_config.xmlns_match?(namespace) &&
|
52
|
+
element_config.attrs_match?(attrs)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.extend ClassMethods
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse(xml_text)
|
10
|
+
unless @parser
|
11
|
+
sax_handler = SAXHandler.new(self)
|
12
|
+
@parser = Nokogiri::XML::SAX::PushParser.new(sax_handler)
|
13
|
+
@parser.options |= Nokogiri::XML::ParseOptions::RECOVER if @parser.respond_to?(:options)
|
14
|
+
end
|
15
|
+
@parser << xml_text
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_finish
|
20
|
+
if @parser
|
21
|
+
@parser.finish
|
22
|
+
end
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
|
28
|
+
def parse(xml_text)
|
29
|
+
# It might be cleaner to aditionally call parse_finish here, but
|
30
|
+
# then Nokogiri/libxml2 barfs on incomplete documents. Desired
|
31
|
+
# behaviour?
|
32
|
+
new.parse(xml_text)
|
33
|
+
end
|
34
|
+
|
35
|
+
def element(name, options = {})
|
36
|
+
options[:as] ||= name
|
37
|
+
sax_config.add_top_level_element(name, options)
|
38
|
+
|
39
|
+
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
40
|
+
# this is how we allow custom parsing behavior. So you could define the setter
|
41
|
+
# and have it parse the string into a date or whatever.
|
42
|
+
attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
|
43
|
+
attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
|
44
|
+
end
|
45
|
+
|
46
|
+
def columns
|
47
|
+
r = []
|
48
|
+
sax_config.top_level_elements.each do |name, ecs|
|
49
|
+
r += ecs
|
50
|
+
end
|
51
|
+
r
|
52
|
+
end
|
53
|
+
|
54
|
+
def column(sym)
|
55
|
+
(sax_config.top_level_elements[sym.to_s] || []).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def data_class(sym)
|
59
|
+
column(sym).data_class
|
60
|
+
end
|
61
|
+
|
62
|
+
def required?(sym)
|
63
|
+
column(sym).required?
|
64
|
+
end
|
65
|
+
|
66
|
+
def column_names
|
67
|
+
columns.map{|e| e.column}
|
68
|
+
end
|
69
|
+
|
70
|
+
def elements(name, options = {})
|
71
|
+
options[:as] ||= name
|
72
|
+
if options[:class] || options[:events]
|
73
|
+
sax_config.add_collection_element(name, options)
|
74
|
+
else
|
75
|
+
class_eval <<-SRC
|
76
|
+
def add_#{options[:as]}(value)
|
77
|
+
#{options[:as]} << value
|
78
|
+
end
|
79
|
+
SRC
|
80
|
+
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
81
|
+
end
|
82
|
+
|
83
|
+
if !instance_methods.include?(options[:as].to_s)
|
84
|
+
class_eval <<-SRC
|
85
|
+
def #{options[:as]}
|
86
|
+
@#{options[:as]} ||= []
|
87
|
+
end
|
88
|
+
SRC
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
|
92
|
+
end
|
93
|
+
|
94
|
+
def sax_config
|
95
|
+
@sax_config ||= SAXConfig.new
|
96
|
+
end
|
97
|
+
|
98
|
+
def attr_writer_once(attr)
|
99
|
+
class_eval <<-SRC
|
100
|
+
def #{attr}=(val)
|
101
|
+
@#{attr} ||= val
|
102
|
+
end
|
103
|
+
SRC
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module SAXMachine
|
2
|
+
class SAXConfig
|
3
|
+
class ElementConfig
|
4
|
+
attr_reader :name, :setter, :data_class
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
|
9
|
+
if options.has_key?(:with)
|
10
|
+
# for faster comparisons later
|
11
|
+
@with = options[:with].to_a.flatten.collect {|o| o.to_s}
|
12
|
+
else
|
13
|
+
@with = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
if options.has_key?(:value)
|
17
|
+
@value = options[:value].to_s
|
18
|
+
else
|
19
|
+
@value = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
@as = options[:as]
|
23
|
+
@collection = options[:collection]
|
24
|
+
|
25
|
+
if @collection
|
26
|
+
@setter = "add_#{options[:as]}"
|
27
|
+
else
|
28
|
+
@setter = "#{@as}="
|
29
|
+
end
|
30
|
+
@data_class = options[:class]
|
31
|
+
@required = options[:required]
|
32
|
+
|
33
|
+
@xmlns = case options[:xmlns]
|
34
|
+
when Array then options[:xmlns]
|
35
|
+
when String then [options[:xmlns]]
|
36
|
+
else nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def column
|
41
|
+
@as || @name.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def required?
|
45
|
+
@required
|
46
|
+
end
|
47
|
+
|
48
|
+
def value_from_attrs(attrs)
|
49
|
+
pair = attrs.detect { |k, v| k == @value }
|
50
|
+
pair ? pair.last : nil
|
51
|
+
end
|
52
|
+
|
53
|
+
def attrs_match?(attrs)
|
54
|
+
if @with
|
55
|
+
if attrs.nil?
|
56
|
+
# If no attributes, match only if the :with clause is empty.
|
57
|
+
@with.empty?
|
58
|
+
else
|
59
|
+
# Attributes must match :with clause.
|
60
|
+
attrs.include?(@with)
|
61
|
+
end
|
62
|
+
else
|
63
|
+
true
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def has_value_and_attrs_match?(attrs)
|
68
|
+
!@value.nil? && attrs_match?(attrs)
|
69
|
+
end
|
70
|
+
|
71
|
+
def xmlns_match?(ns)
|
72
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
73
|
+
end
|
74
|
+
|
75
|
+
def collection?
|
76
|
+
@collection
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|