rind 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README.rdoc +111 -0
- data/lib/rind.rb +15 -0
- data/lib/rind/document.rb +81 -0
- data/lib/rind/equality.rb +9 -0
- data/lib/rind/html.rb +41 -0
- data/lib/rind/manipulate.rb +31 -0
- data/lib/rind/nodes.rb +234 -0
- data/lib/rind/parser.rb +141 -0
- data/lib/rind/traverse.rb +86 -0
- data/lib/rind/xml.rb +28 -0
- data/lib/rind/xpath.rb +144 -0
- data/test/all_test.rb +14 -0
- data/test/cdata_test.rb +9 -0
- data/test/children_test.rb +50 -0
- data/test/comment_test.rb +9 -0
- data/test/document_test.rb +23 -0
- data/test/element_test.rb +49 -0
- data/test/equality_test.rb +19 -0
- data/test/files/document_test.html +8 -0
- data/test/files/traverse_test.html +13 -0
- data/test/html_test.rb +16 -0
- data/test/manipulate_test.rb +23 -0
- data/test/nodes_test.rb +16 -0
- data/test/parser_test.rb +7 -0
- data/test/traverse_test.rb +57 -0
- data/test/xml_test.rb +14 -0
- data/test/xpath_test.rb +27 -0
- metadata +109 -0
data/lib/rind/parser.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
module Rind
|
2
|
+
TEXT = 0
|
3
|
+
CDATA = 1
|
4
|
+
COMMENT = 2
|
5
|
+
DOCTYPE = 3
|
6
|
+
PRO_INST = 4
|
7
|
+
END_TAG = 5
|
8
|
+
START_TAG = 6
|
9
|
+
|
10
|
+
def self.parse(file_name, type, base_namespace, namespaces_allowed)
|
11
|
+
create_tree(tokenize(file_name, type), type, base_namespace, namespaces_allowed)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.tokenize(file_name, type)
|
15
|
+
content = File.read(file_name)
|
16
|
+
|
17
|
+
# tag types
|
18
|
+
name = /[a-zA-Z_]/
|
19
|
+
cdata = /<!\[CDATA\[(.*?)\]\]>/m
|
20
|
+
comment = /<!--(.*?)-->/m
|
21
|
+
doctype = /<!DOCTYPE(.*?)>/m
|
22
|
+
processing_instruction = /<\?(.*?)>/m
|
23
|
+
end_tag = /<\s*\/\s*((?:#{name}+:)?#{name}+)\s*>/m
|
24
|
+
start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)?\/?>/m
|
25
|
+
|
26
|
+
# extract tokens from the file content
|
27
|
+
tokens = Array.new
|
28
|
+
text_start = 0
|
29
|
+
content.scan(/#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o) do |token|
|
30
|
+
# remove nil entries from the unmatched tag checks
|
31
|
+
token.compact!
|
32
|
+
# get match object
|
33
|
+
match = $~
|
34
|
+
|
35
|
+
# create a proceeding text token if one exists
|
36
|
+
text_end = match.begin(0)
|
37
|
+
if text_start < text_end
|
38
|
+
text = content[text_start...text_end]
|
39
|
+
tokens.push([TEXT, text]) if text !~ /^\s*$/
|
40
|
+
end
|
41
|
+
text_start = match.end(0)
|
42
|
+
|
43
|
+
# create a token for the appropriate tag
|
44
|
+
if match.begin(1) # cdata
|
45
|
+
tokens.push([CDATA, token].flatten)
|
46
|
+
elsif match.begin(2) # comment
|
47
|
+
tokens.push([COMMENT, token].flatten)
|
48
|
+
elsif match.begin(3) # doctype tag
|
49
|
+
tokens.push([DOCTYPE, token].flatten)
|
50
|
+
elsif match.begin(4) # processing instruction
|
51
|
+
tokens.push([PRO_INST, token].flatten)
|
52
|
+
elsif match.begin(5) # end tag
|
53
|
+
tokens.push([END_TAG, token].flatten)
|
54
|
+
elsif match.begin(6) # start tag
|
55
|
+
tokens.push([START_TAG, token].flatten)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
tokens
|
60
|
+
end
|
61
|
+
private_class_method :tokenize
|
62
|
+
|
63
|
+
# tokens will arrive in reverse order
|
64
|
+
def self.create_tree(tokens, type, base_namespace, namespaces_allowed, complete_tag = nil)
|
65
|
+
dom = Rind::Nodes.new
|
66
|
+
|
67
|
+
# create the nodes and push them onto the dom tree
|
68
|
+
while 0 < tokens.length
|
69
|
+
token = tokens.pop
|
70
|
+
|
71
|
+
case token[0]
|
72
|
+
when TEXT
|
73
|
+
dom.push(Rind::Text.new(token[1]))
|
74
|
+
when CDATA
|
75
|
+
dom.push(Rind::CDATA.new(token[1]))
|
76
|
+
when COMMENT
|
77
|
+
dom.push(Rind::Comment.new(token[1]))
|
78
|
+
when DOCTYPE
|
79
|
+
dom.push(Rind::DocType.new(token[1]))
|
80
|
+
when PRO_INST
|
81
|
+
dom.push(Rind::ProcessingInstruction.new(token[1]))
|
82
|
+
when END_TAG
|
83
|
+
# recursively retreive all the children and the matching start tag
|
84
|
+
children = create_tree(tokens, type, base_namespace, namespaces_allowed, token[1])
|
85
|
+
start_tag = children.shift
|
86
|
+
|
87
|
+
start_tag.children.replace(children)
|
88
|
+
|
89
|
+
dom.push(start_tag)
|
90
|
+
when START_TAG
|
91
|
+
namespace_name, local_name = extract_name_and_namespace(token[1], base_namespace)
|
92
|
+
|
93
|
+
# create the element
|
94
|
+
if not namespaces_allowed.nil? and (namespace_name == base_namespace or namespaces_allowed.include? namespace_name)
|
95
|
+
passed_namespace = namespace_name == base_namespace ? nil : namespace_name
|
96
|
+
begin
|
97
|
+
node = create_node(get_library_name(namespace_name), local_name.capitalize, token[2], nil, passed_namespace)
|
98
|
+
rescue
|
99
|
+
node = create_node("Rind::#{type.capitalize}", local_name.capitalize, token[2], nil, passed_namespace)
|
100
|
+
end
|
101
|
+
else
|
102
|
+
node = create_node("Rind::#{type.capitalize}", local_name.capitalize, token[2], nil, namespace_name)
|
103
|
+
end
|
104
|
+
dom.push(node)
|
105
|
+
|
106
|
+
# break if this tag completes the grouping from an end tag
|
107
|
+
break if complete_tag.eql?(token[1])
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# output in the correct order
|
112
|
+
dom.reverse
|
113
|
+
end
|
114
|
+
private_class_method :create_tree
|
115
|
+
|
116
|
+
def self.extract_name_and_namespace(tag_name, base_namespace)
|
117
|
+
tag_name =~ /^(?:([\w:]+):)?(\w+)$/
|
118
|
+
namespace_name, local_name = $1, $2
|
119
|
+
namespace_name = base_namespace if namespace_name.nil?
|
120
|
+
|
121
|
+
[namespace_name, local_name]
|
122
|
+
end
|
123
|
+
private_class_method :extract_name_and_namespace
|
124
|
+
|
125
|
+
def self.get_library_name(namespace_name)
|
126
|
+
namespace_name.split(/:/).collect{|ns| ns.capitalize}.join('::')
|
127
|
+
end
|
128
|
+
private_class_method :get_library_name
|
129
|
+
|
130
|
+
def self.create_node(class_namespace, class_name, attributes, children, namespace_name)
|
131
|
+
command = "#{class_namespace}::#{class_name.capitalize}.new("
|
132
|
+
options = [":attributes => attributes"]
|
133
|
+
options.push(":namespace_name => '#{namespace_name}'")
|
134
|
+
options.push(":children => children") if not children.nil?
|
135
|
+
command.concat(options.join(','))
|
136
|
+
command.concat(')')
|
137
|
+
|
138
|
+
class_eval(command)
|
139
|
+
end
|
140
|
+
private_class_method :create_node
|
141
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Traverse
|
2
|
+
attr_accessor :parent
|
3
|
+
|
4
|
+
# Creates a Rind::Nodes list of all ancestors. If an Xpath
|
5
|
+
# is provided it will only return the nodes that match.
|
6
|
+
def ancestors(path = nil)
|
7
|
+
if not self.parent.nil?
|
8
|
+
ancestors = Rind::Nodes.new([self.parent])
|
9
|
+
|
10
|
+
parent_ancestors = self.parent.ancestors
|
11
|
+
ancestors.push(*parent_ancestors) if not parent_ancestors.nil?
|
12
|
+
|
13
|
+
path.nil? ? ancestors : ancestors.filter(path)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Creates a Rind::Nodes list of all descendants. If an Xpath
|
18
|
+
# is provided it will only return the nodes that match.
|
19
|
+
def descendants(path = nil)
|
20
|
+
if self.respond_to? :children and not self.children.empty?
|
21
|
+
descendants = Rind::Nodes.new(self.children)
|
22
|
+
|
23
|
+
self.children.each do |child|
|
24
|
+
child_descendants = child.descendants
|
25
|
+
descendants.push(*child_descendants) if not child_descendants.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
path.nil? ? descendants : descendants.filter(path)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns the first descendant node. If an Xpath is provided
|
33
|
+
# it will return the first one that matches.
|
34
|
+
def down(path = nil)
|
35
|
+
descendants = self.descendants(path)
|
36
|
+
descendants.first if not descendants.nil?
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns the first sibling that follows the current node in
|
40
|
+
# the list of siblings. If an Xpath is provided it will return
|
41
|
+
# the first one that matches.
|
42
|
+
def next(path = nil)
|
43
|
+
siblings = self.next_siblings(path)
|
44
|
+
siblings.first if not siblings.nil?
|
45
|
+
end
|
46
|
+
|
47
|
+
# Creates a Rind::Nodes list of all siblings that follow the
|
48
|
+
# current node in the list of siblings. If an Xpath is provided
|
49
|
+
# it will only return the nodes that match.
|
50
|
+
def next_siblings(path = nil)
|
51
|
+
children = self.parent.children
|
52
|
+
siblings = Rind::Nodes.new(children[children.index(self)+1..children.length-1])
|
53
|
+
path.nil? ? siblings : siblings.filter(path)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Returns the first sibling that proceeds the current node in
|
57
|
+
# the list of siblings. If an Xpath is provided it will return
|
58
|
+
# the first one that matches.
|
59
|
+
def prev(path = nil)
|
60
|
+
siblings = self.prev_siblings(path)
|
61
|
+
siblings.last if not siblings.nil?
|
62
|
+
end
|
63
|
+
|
64
|
+
# Creates a Rind::Nodes list of all siblings that proceed the
|
65
|
+
# current node in the list of siblings. If an Xpath is provided
|
66
|
+
# it will only return the nodes that match.
|
67
|
+
def prev_siblings(path = nil)
|
68
|
+
children = self.parent.children
|
69
|
+
siblings = Rind::Nodes.new(children[0...children.index(self)])
|
70
|
+
path.nil? ? siblings : siblings.filter(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Creates a Rind::Nodes list of all siblings. If an Xpath is
|
74
|
+
# provided it will only return the nodes that match.
|
75
|
+
def siblings(path = nil)
|
76
|
+
siblings = Rind::Nodes.new(self.parent.children.find_all{|child| not child.equal? self})
|
77
|
+
path.nil? ? siblings : siblings.filter(path)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns the first ancestor node. If an Xpath is provided
|
81
|
+
# it will return the first one that matches.
|
82
|
+
def up(path = nil)
|
83
|
+
ancestors = self.ancestors(path)
|
84
|
+
ancestors.first if not ancestors.nil?
|
85
|
+
end
|
86
|
+
end
|
data/lib/rind/xml.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Rind
|
2
|
+
# Rind::Xml will dynamically create any XML element.
|
3
|
+
module Xml
|
4
|
+
def self.const_missing(full_class_name, options={}) # :nodoc:
|
5
|
+
klass = Class.new(Element) do
|
6
|
+
# <b>Parent:</b> Element
|
7
|
+
# === Example
|
8
|
+
# Rind::Xml::Foo.new(
|
9
|
+
# :attributes => {:id => "bar"},
|
10
|
+
# :children => "Hello World!"
|
11
|
+
# )
|
12
|
+
def initialize(options={})
|
13
|
+
super(options)
|
14
|
+
end
|
15
|
+
|
16
|
+
def expanded_name # :nodoc:
|
17
|
+
if @namespace_name.nil? or @namespace_name == '' or @namespace_name =~ /^(?:rind:)?xml/
|
18
|
+
@local_name
|
19
|
+
else
|
20
|
+
[@namespace_name, @local_name].join(':')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
const_set full_class_name, klass
|
25
|
+
klass
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/rind/xpath.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
# Current Xpath support is fairly basic but covers almost all axes and node tests.
|
2
|
+
# Predicates are limited to attribute and position checks. I intend to expand support
|
3
|
+
# but that should cover most of the needed functionality.
|
4
|
+
module Xpath
|
5
|
+
# Xpath search of a node that returns a list of matching nodes.
|
6
|
+
def s(path)
|
7
|
+
node = self.clone;
|
8
|
+
|
9
|
+
# absolute paths to the top
|
10
|
+
if '/' == path[0,1]
|
11
|
+
while not node.parent.nil?
|
12
|
+
node = node.parent
|
13
|
+
end
|
14
|
+
if 1 < path.length and '/' == path[1,1]
|
15
|
+
path[0] = ''
|
16
|
+
else
|
17
|
+
path[0] = 'self::'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# node check
|
22
|
+
nodes = [node]
|
23
|
+
path.split('/').each do |step|
|
24
|
+
case step
|
25
|
+
when ''
|
26
|
+
step = 'descendant-or-self::node()'
|
27
|
+
when '.'
|
28
|
+
step = 'self::node()'
|
29
|
+
when '..'
|
30
|
+
step = 'parent::node()'
|
31
|
+
end
|
32
|
+
|
33
|
+
step.gsub!(/^@/, 'attribute::')
|
34
|
+
|
35
|
+
step =~ /^(?:(.*?)::)?(.+?)(\[.*?)?$/
|
36
|
+
axis, node_test, predicates = $1, $2, $3
|
37
|
+
axis = 'child' if axis.nil?
|
38
|
+
|
39
|
+
# find matching nodes
|
40
|
+
nodes = nodes.collect{|node| node.find_matching_nodes(axis, node_test)}.flatten.compact
|
41
|
+
|
42
|
+
# check predicates
|
43
|
+
if not predicates.nil?
|
44
|
+
# true() and false()
|
45
|
+
predicates.gsub!(/(true|false)\(\)/, '\1')
|
46
|
+
# ==
|
47
|
+
predicates.gsub!(/=/, '==')
|
48
|
+
|
49
|
+
predicates.scan(/\[(.*?)\]/) do |predicate|
|
50
|
+
predicate = predicate[0]
|
51
|
+
# last()
|
52
|
+
predicate.gsub!(/last\(\)/, nodes.length.to_s)
|
53
|
+
|
54
|
+
nodes = nodes.find_all{|node| node.validate_predicate(predicate.clone, nodes.index(node)+1)}
|
55
|
+
break if nodes.empty?
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
return nil if nodes.empty?
|
60
|
+
end
|
61
|
+
|
62
|
+
Rind::Nodes.new(nodes)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Xpath search returning only the first matching node in the list.
|
66
|
+
def sf(path)
|
67
|
+
nodes = self.s(path)
|
68
|
+
nodes.nil? ? nil : nodes.first
|
69
|
+
end
|
70
|
+
|
71
|
+
def find_matching_nodes(axis, node_test) # :nodoc:
|
72
|
+
case axis
|
73
|
+
when 'ancestor'
|
74
|
+
self.ancestors.find_all{|node| node.is_matching_node?(node_test)}
|
75
|
+
when 'ancestor-or-self'
|
76
|
+
self.find_matching_nodes('self', node_test) + self.find_matching_nodes('ancestor', node_test)
|
77
|
+
when 'attribute'
|
78
|
+
'*' == node_test ? self[] : self[node_test] || []
|
79
|
+
when 'child'
|
80
|
+
if self.respond_to? :children
|
81
|
+
self.children.find_all{|node| node.is_matching_node?(node_test)}
|
82
|
+
else
|
83
|
+
[]
|
84
|
+
end
|
85
|
+
when 'descendant'
|
86
|
+
self.descendants.find_all{|node| node.is_matching_node?(node_test)}
|
87
|
+
when 'descendant-or-self'
|
88
|
+
self.find_matching_nodes('self', node_test) + self.find_matching_nodes('descendant', node_test)
|
89
|
+
when 'following-sibling'
|
90
|
+
self.next_siblings.find_all{|node| node.is_matching_node?(node_test)}
|
91
|
+
when 'parent'
|
92
|
+
self.parent.is_matching_node?(node_test) ? [self.parent] : []
|
93
|
+
when 'preceding-sibling'
|
94
|
+
self.prev_siblings.find_all{|node| node.is_matching_node?(node_test)}
|
95
|
+
when 'self'
|
96
|
+
self.is_matching_node?(node_test) ? [self] : []
|
97
|
+
else
|
98
|
+
raise "Invalid axis: #{axis}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
protected :find_matching_nodes
|
102
|
+
|
103
|
+
def is_matching_node?(node_test) # :nodoc:
|
104
|
+
case node_test
|
105
|
+
when '*'
|
106
|
+
self.is_a?(Rind::Text) ? false : true
|
107
|
+
when 'comment()'
|
108
|
+
self.is_a?(Rind::Comment) ? true : false
|
109
|
+
when 'node()'
|
110
|
+
true
|
111
|
+
when 'processing-instruction()'
|
112
|
+
self.is_a?(Rind::ProcessingInstruction) ? true : false
|
113
|
+
when 'text()'
|
114
|
+
self.is_a?(Rind::Text) ? true : false
|
115
|
+
else
|
116
|
+
if self.is_a?(Rind::Element)
|
117
|
+
if self.namespace_name == 'rind:html' and self.local_name == node_test
|
118
|
+
true
|
119
|
+
elsif self.expanded_name == node_test
|
120
|
+
true
|
121
|
+
end
|
122
|
+
else
|
123
|
+
false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
protected :is_matching_node?
|
128
|
+
|
129
|
+
def validate_predicate(predicate, position) # :nodoc:
|
130
|
+
# attribute replacement
|
131
|
+
predicate.gsub!(/@([0-9a-zA-Z]+)/){self.respond_to?(:[]) ? "self[:#{$1}]" : 'nil'}
|
132
|
+
# position()
|
133
|
+
predicate.gsub!(/position\(\)/, position.to_s)
|
134
|
+
|
135
|
+
valid = eval predicate
|
136
|
+
# a number indicates a position request
|
137
|
+
if valid.is_a? Fixnum
|
138
|
+
valid == position
|
139
|
+
else
|
140
|
+
valid
|
141
|
+
end
|
142
|
+
end
|
143
|
+
protected :validate_predicate
|
144
|
+
end
|
data/test/all_test.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'children_test'
|
3
|
+
require 'cdata_test'
|
4
|
+
require 'comment_test'
|
5
|
+
require 'document_test'
|
6
|
+
require 'equality_test'
|
7
|
+
require 'element_test'
|
8
|
+
require 'html_test'
|
9
|
+
require 'xml_test'
|
10
|
+
require 'nodes_test'
|
11
|
+
require 'parser_test'
|
12
|
+
require 'xpath_test'
|
13
|
+
require 'traverse_test'
|
14
|
+
require 'manipulate_test'
|
data/test/cdata_test.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rind'
|
3
|
+
|
4
|
+
class ChildrenTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
@parent = Rind::Html::Html.new()
|
7
|
+
@one = Rind::Html::P.new(:attributes => {:id => '1'})
|
8
|
+
@two = Rind::Html::P.new(:attributes => {:id => '2'})
|
9
|
+
@three = Rind::Html::P.new(:attributes => {:id => '3'})
|
10
|
+
@children = Rind::Children.new(@parent, @one, @two, @three)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_delete
|
14
|
+
# normal
|
15
|
+
child = @children.delete(@one)
|
16
|
+
assert_same(child, @one)
|
17
|
+
assert_nil(child.parent)
|
18
|
+
assert_equal(@children.length, 2)
|
19
|
+
|
20
|
+
# item not found
|
21
|
+
child = @children.delete('a')
|
22
|
+
assert_nil(child)
|
23
|
+
|
24
|
+
# item not found with block
|
25
|
+
child = @children.delete('a') {'not found'}
|
26
|
+
assert_equal(child, 'not found')
|
27
|
+
end
|
28
|
+
|
29
|
+
# testing the clearing of the parent using internal wrapper
|
30
|
+
def test_pop
|
31
|
+
child = @children.pop
|
32
|
+
assert_same(child, @three)
|
33
|
+
assert_nil(child.parent)
|
34
|
+
assert_equal(@children.length, 2)
|
35
|
+
end
|
36
|
+
|
37
|
+
# testing the addition of the parent using internal wrapper
|
38
|
+
def test_push
|
39
|
+
children = Rind::Children.new(@parent)
|
40
|
+
children.push(@one, @two, @three)
|
41
|
+
assert_equal(children, @children)
|
42
|
+
children.each do |child|
|
43
|
+
assert_same(child.parent, @parent)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_to_s
|
48
|
+
assert_equal(@children.to_s, '<p id="1"></p><p id="2"></p><p id="3"></p>')
|
49
|
+
end
|
50
|
+
end
|