rind 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ module Rind
2
+ TEXT = 0
3
+ CDATA = 1
4
+ COMMENT = 2
5
+ DOCTYPE = 3
6
+ PRO_INST = 4
7
+ END_TAG = 5
8
+ START_TAG = 6
9
+
10
+ def self.parse(file_name, type, base_namespace, namespaces_allowed)
11
+ create_tree(tokenize(file_name, type), type, base_namespace, namespaces_allowed)
12
+ end
13
+
14
+ def self.tokenize(file_name, type)
15
+ content = File.read(file_name)
16
+
17
+ # tag types
18
+ name = /[a-zA-Z_]/
19
+ cdata = /<!\[CDATA\[(.*?)\]\]>/m
20
+ comment = /<!--(.*?)-->/m
21
+ doctype = /<!DOCTYPE(.*?)>/m
22
+ processing_instruction = /<\?(.*?)>/m
23
+ end_tag = /<\s*\/\s*((?:#{name}+:)?#{name}+)\s*>/m
24
+ start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)?\/?>/m
25
+
26
+ # extract tokens from the file content
27
+ tokens = Array.new
28
+ text_start = 0
29
+ content.scan(/#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o) do |token|
30
+ # remove nil entries from the unmatched tag checks
31
+ token.compact!
32
+ # get match object
33
+ match = $~
34
+
35
+ # create a proceeding text token if one exists
36
+ text_end = match.begin(0)
37
+ if text_start < text_end
38
+ text = content[text_start...text_end]
39
+ tokens.push([TEXT, text]) if text !~ /^\s*$/
40
+ end
41
+ text_start = match.end(0)
42
+
43
+ # create a token for the appropriate tag
44
+ if match.begin(1) # cdata
45
+ tokens.push([CDATA, token].flatten)
46
+ elsif match.begin(2) # comment
47
+ tokens.push([COMMENT, token].flatten)
48
+ elsif match.begin(3) # doctype tag
49
+ tokens.push([DOCTYPE, token].flatten)
50
+ elsif match.begin(4) # processing instruction
51
+ tokens.push([PRO_INST, token].flatten)
52
+ elsif match.begin(5) # end tag
53
+ tokens.push([END_TAG, token].flatten)
54
+ elsif match.begin(6) # start tag
55
+ tokens.push([START_TAG, token].flatten)
56
+ end
57
+ end
58
+
59
+ tokens
60
+ end
61
+ private_class_method :tokenize
62
+
63
+ # tokens will arrive in reverse order
64
+ def self.create_tree(tokens, type, base_namespace, namespaces_allowed, complete_tag = nil)
65
+ dom = Rind::Nodes.new
66
+
67
+ # create the nodes and push them onto the dom tree
68
+ while 0 < tokens.length
69
+ token = tokens.pop
70
+
71
+ case token[0]
72
+ when TEXT
73
+ dom.push(Rind::Text.new(token[1]))
74
+ when CDATA
75
+ dom.push(Rind::CDATA.new(token[1]))
76
+ when COMMENT
77
+ dom.push(Rind::Comment.new(token[1]))
78
+ when DOCTYPE
79
+ dom.push(Rind::DocType.new(token[1]))
80
+ when PRO_INST
81
+ dom.push(Rind::ProcessingInstruction.new(token[1]))
82
+ when END_TAG
83
+ # recursively retreive all the children and the matching start tag
84
+ children = create_tree(tokens, type, base_namespace, namespaces_allowed, token[1])
85
+ start_tag = children.shift
86
+
87
+ start_tag.children.replace(children)
88
+
89
+ dom.push(start_tag)
90
+ when START_TAG
91
+ namespace_name, local_name = extract_name_and_namespace(token[1], base_namespace)
92
+
93
+ # create the element
94
+ if not namespaces_allowed.nil? and (namespace_name == base_namespace or namespaces_allowed.include? namespace_name)
95
+ passed_namespace = namespace_name == base_namespace ? nil : namespace_name
96
+ begin
97
+ node = create_node(get_library_name(namespace_name), local_name.capitalize, token[2], nil, passed_namespace)
98
+ rescue
99
+ node = create_node("Rind::#{type.capitalize}", local_name.capitalize, token[2], nil, passed_namespace)
100
+ end
101
+ else
102
+ node = create_node("Rind::#{type.capitalize}", local_name.capitalize, token[2], nil, namespace_name)
103
+ end
104
+ dom.push(node)
105
+
106
+ # break if this tag completes the grouping from an end tag
107
+ break if complete_tag.eql?(token[1])
108
+ end
109
+ end
110
+
111
+ # output in the correct order
112
+ dom.reverse
113
+ end
114
+ private_class_method :create_tree
115
+
116
+ def self.extract_name_and_namespace(tag_name, base_namespace)
117
+ tag_name =~ /^(?:([\w:]+):)?(\w+)$/
118
+ namespace_name, local_name = $1, $2
119
+ namespace_name = base_namespace if namespace_name.nil?
120
+
121
+ [namespace_name, local_name]
122
+ end
123
+ private_class_method :extract_name_and_namespace
124
+
125
+ def self.get_library_name(namespace_name)
126
+ namespace_name.split(/:/).collect{|ns| ns.capitalize}.join('::')
127
+ end
128
+ private_class_method :get_library_name
129
+
130
+ def self.create_node(class_namespace, class_name, attributes, children, namespace_name)
131
+ command = "#{class_namespace}::#{class_name.capitalize}.new("
132
+ options = [":attributes => attributes"]
133
+ options.push(":namespace_name => '#{namespace_name}'")
134
+ options.push(":children => children") if not children.nil?
135
+ command.concat(options.join(','))
136
+ command.concat(')')
137
+
138
+ class_eval(command)
139
+ end
140
+ private_class_method :create_node
141
+ end
@@ -0,0 +1,86 @@
1
+ module Traverse
2
+ attr_accessor :parent
3
+
4
+ # Creates a Rind::Nodes list of all ancestors. If an Xpath
5
+ # is provided it will only return the nodes that match.
6
+ def ancestors(path = nil)
7
+ if not self.parent.nil?
8
+ ancestors = Rind::Nodes.new([self.parent])
9
+
10
+ parent_ancestors = self.parent.ancestors
11
+ ancestors.push(*parent_ancestors) if not parent_ancestors.nil?
12
+
13
+ path.nil? ? ancestors : ancestors.filter(path)
14
+ end
15
+ end
16
+
17
+ # Creates a Rind::Nodes list of all descendants. If an Xpath
18
+ # is provided it will only return the nodes that match.
19
+ def descendants(path = nil)
20
+ if self.respond_to? :children and not self.children.empty?
21
+ descendants = Rind::Nodes.new(self.children)
22
+
23
+ self.children.each do |child|
24
+ child_descendants = child.descendants
25
+ descendants.push(*child_descendants) if not child_descendants.nil?
26
+ end
27
+
28
+ path.nil? ? descendants : descendants.filter(path)
29
+ end
30
+ end
31
+
32
+ # Returns the first descendant node. If an Xpath is provided
33
+ # it will return the first one that matches.
34
+ def down(path = nil)
35
+ descendants = self.descendants(path)
36
+ descendants.first if not descendants.nil?
37
+ end
38
+
39
+ # Returns the first sibling that follows the current node in
40
+ # the list of siblings. If an Xpath is provided it will return
41
+ # the first one that matches.
42
+ def next(path = nil)
43
+ siblings = self.next_siblings(path)
44
+ siblings.first if not siblings.nil?
45
+ end
46
+
47
+ # Creates a Rind::Nodes list of all siblings that follow the
48
+ # current node in the list of siblings. If an Xpath is provided
49
+ # it will only return the nodes that match.
50
+ def next_siblings(path = nil)
51
+ children = self.parent.children
52
+ siblings = Rind::Nodes.new(children[children.index(self)+1..children.length-1])
53
+ path.nil? ? siblings : siblings.filter(path)
54
+ end
55
+
56
+ # Returns the first sibling that proceeds the current node in
57
+ # the list of siblings. If an Xpath is provided it will return
58
+ # the first one that matches.
59
+ def prev(path = nil)
60
+ siblings = self.prev_siblings(path)
61
+ siblings.last if not siblings.nil?
62
+ end
63
+
64
+ # Creates a Rind::Nodes list of all siblings that proceed the
65
+ # current node in the list of siblings. If an Xpath is provided
66
+ # it will only return the nodes that match.
67
+ def prev_siblings(path = nil)
68
+ children = self.parent.children
69
+ siblings = Rind::Nodes.new(children[0...children.index(self)])
70
+ path.nil? ? siblings : siblings.filter(path)
71
+ end
72
+
73
+ # Creates a Rind::Nodes list of all siblings. If an Xpath is
74
+ # provided it will only return the nodes that match.
75
+ def siblings(path = nil)
76
+ siblings = Rind::Nodes.new(self.parent.children.find_all{|child| not child.equal? self})
77
+ path.nil? ? siblings : siblings.filter(path)
78
+ end
79
+
80
+ # Returns the first ancestor node. If an Xpath is provided
81
+ # it will return the first one that matches.
82
+ def up(path = nil)
83
+ ancestors = self.ancestors(path)
84
+ ancestors.first if not ancestors.nil?
85
+ end
86
+ end
data/lib/rind/xml.rb ADDED
@@ -0,0 +1,28 @@
1
+ module Rind
2
+ # Rind::Xml will dynamically create any XML element.
3
+ module Xml
4
+ def self.const_missing(full_class_name, options={}) # :nodoc:
5
+ klass = Class.new(Element) do
6
+ # <b>Parent:</b> Element
7
+ # === Example
8
+ # Rind::Xml::Foo.new(
9
+ # :attributes => {:id => "bar"},
10
+ # :children => "Hello World!"
11
+ # )
12
+ def initialize(options={})
13
+ super(options)
14
+ end
15
+
16
+ def expanded_name # :nodoc:
17
+ if @namespace_name.nil? or @namespace_name == '' or @namespace_name =~ /^(?:rind:)?xml/
18
+ @local_name
19
+ else
20
+ [@namespace_name, @local_name].join(':')
21
+ end
22
+ end
23
+ end
24
+ const_set full_class_name, klass
25
+ klass
26
+ end
27
+ end
28
+ end
data/lib/rind/xpath.rb ADDED
@@ -0,0 +1,144 @@
1
+ # Current Xpath support is fairly basic but covers almost all axes and node tests.
2
+ # Predicates are limited to attribute and position checks. I intend to expand support
3
+ # but that should cover most of the needed functionality.
4
+ module Xpath
5
+ # Xpath search of a node that returns a list of matching nodes.
6
+ def s(path)
7
+ node = self.clone;
8
+
9
+ # absolute paths to the top
10
+ if '/' == path[0,1]
11
+ while not node.parent.nil?
12
+ node = node.parent
13
+ end
14
+ if 1 < path.length and '/' == path[1,1]
15
+ path[0] = ''
16
+ else
17
+ path[0] = 'self::'
18
+ end
19
+ end
20
+
21
+ # node check
22
+ nodes = [node]
23
+ path.split('/').each do |step|
24
+ case step
25
+ when ''
26
+ step = 'descendant-or-self::node()'
27
+ when '.'
28
+ step = 'self::node()'
29
+ when '..'
30
+ step = 'parent::node()'
31
+ end
32
+
33
+ step.gsub!(/^@/, 'attribute::')
34
+
35
+ step =~ /^(?:(.*?)::)?(.+?)(\[.*?)?$/
36
+ axis, node_test, predicates = $1, $2, $3
37
+ axis = 'child' if axis.nil?
38
+
39
+ # find matching nodes
40
+ nodes = nodes.collect{|node| node.find_matching_nodes(axis, node_test)}.flatten.compact
41
+
42
+ # check predicates
43
+ if not predicates.nil?
44
+ # true() and false()
45
+ predicates.gsub!(/(true|false)\(\)/, '\1')
46
+ # ==
47
+ predicates.gsub!(/=/, '==')
48
+
49
+ predicates.scan(/\[(.*?)\]/) do |predicate|
50
+ predicate = predicate[0]
51
+ # last()
52
+ predicate.gsub!(/last\(\)/, nodes.length.to_s)
53
+
54
+ nodes = nodes.find_all{|node| node.validate_predicate(predicate.clone, nodes.index(node)+1)}
55
+ break if nodes.empty?
56
+ end
57
+ end
58
+
59
+ return nil if nodes.empty?
60
+ end
61
+
62
+ Rind::Nodes.new(nodes)
63
+ end
64
+
65
+ # Xpath search returning only the first matching node in the list.
66
+ def sf(path)
67
+ nodes = self.s(path)
68
+ nodes.nil? ? nil : nodes.first
69
+ end
70
+
71
+ def find_matching_nodes(axis, node_test) # :nodoc:
72
+ case axis
73
+ when 'ancestor'
74
+ self.ancestors.find_all{|node| node.is_matching_node?(node_test)}
75
+ when 'ancestor-or-self'
76
+ self.find_matching_nodes('self', node_test) + self.find_matching_nodes('ancestor', node_test)
77
+ when 'attribute'
78
+ '*' == node_test ? self[] : self[node_test] || []
79
+ when 'child'
80
+ if self.respond_to? :children
81
+ self.children.find_all{|node| node.is_matching_node?(node_test)}
82
+ else
83
+ []
84
+ end
85
+ when 'descendant'
86
+ self.descendants.find_all{|node| node.is_matching_node?(node_test)}
87
+ when 'descendant-or-self'
88
+ self.find_matching_nodes('self', node_test) + self.find_matching_nodes('descendant', node_test)
89
+ when 'following-sibling'
90
+ self.next_siblings.find_all{|node| node.is_matching_node?(node_test)}
91
+ when 'parent'
92
+ self.parent.is_matching_node?(node_test) ? [self.parent] : []
93
+ when 'preceding-sibling'
94
+ self.prev_siblings.find_all{|node| node.is_matching_node?(node_test)}
95
+ when 'self'
96
+ self.is_matching_node?(node_test) ? [self] : []
97
+ else
98
+ raise "Invalid axis: #{axis}"
99
+ end
100
+ end
101
+ protected :find_matching_nodes
102
+
103
+ def is_matching_node?(node_test) # :nodoc:
104
+ case node_test
105
+ when '*'
106
+ self.is_a?(Rind::Text) ? false : true
107
+ when 'comment()'
108
+ self.is_a?(Rind::Comment) ? true : false
109
+ when 'node()'
110
+ true
111
+ when 'processing-instruction()'
112
+ self.is_a?(Rind::ProcessingInstruction) ? true : false
113
+ when 'text()'
114
+ self.is_a?(Rind::Text) ? true : false
115
+ else
116
+ if self.is_a?(Rind::Element)
117
+ if self.namespace_name == 'rind:html' and self.local_name == node_test
118
+ true
119
+ elsif self.expanded_name == node_test
120
+ true
121
+ end
122
+ else
123
+ false
124
+ end
125
+ end
126
+ end
127
+ protected :is_matching_node?
128
+
129
+ def validate_predicate(predicate, position) # :nodoc:
130
+ # attribute replacement
131
+ predicate.gsub!(/@([0-9a-zA-Z]+)/){self.respond_to?(:[]) ? "self[:#{$1}]" : 'nil'}
132
+ # position()
133
+ predicate.gsub!(/position\(\)/, position.to_s)
134
+
135
+ valid = eval predicate
136
+ # a number indicates a position request
137
+ if valid.is_a? Fixnum
138
+ valid == position
139
+ else
140
+ valid
141
+ end
142
+ end
143
+ protected :validate_predicate
144
+ end
data/test/all_test.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'test/unit'
2
+ require 'children_test'
3
+ require 'cdata_test'
4
+ require 'comment_test'
5
+ require 'document_test'
6
+ require 'equality_test'
7
+ require 'element_test'
8
+ require 'html_test'
9
+ require 'xml_test'
10
+ require 'nodes_test'
11
+ require 'parser_test'
12
+ require 'xpath_test'
13
+ require 'traverse_test'
14
+ require 'manipulate_test'
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+ require 'rind'
3
+
4
+ class CdataTest < Test::Unit::TestCase
5
+ def test_to_s
6
+ cdata = Rind::Cdata.new('foo')
7
+ assert_equal(cdata.to_s, '<![CDATA[foo]]>')
8
+ end
9
+ end
@@ -0,0 +1,50 @@
1
+ require 'test/unit'
2
+ require 'rind'
3
+
4
+ class ChildrenTest < Test::Unit::TestCase
5
+ def setup
6
+ @parent = Rind::Html::Html.new()
7
+ @one = Rind::Html::P.new(:attributes => {:id => '1'})
8
+ @two = Rind::Html::P.new(:attributes => {:id => '2'})
9
+ @three = Rind::Html::P.new(:attributes => {:id => '3'})
10
+ @children = Rind::Children.new(@parent, @one, @two, @three)
11
+ end
12
+
13
+ def test_delete
14
+ # normal
15
+ child = @children.delete(@one)
16
+ assert_same(child, @one)
17
+ assert_nil(child.parent)
18
+ assert_equal(@children.length, 2)
19
+
20
+ # item not found
21
+ child = @children.delete('a')
22
+ assert_nil(child)
23
+
24
+ # item not found with block
25
+ child = @children.delete('a') {'not found'}
26
+ assert_equal(child, 'not found')
27
+ end
28
+
29
+ # testing the clearing of the parent using internal wrapper
30
+ def test_pop
31
+ child = @children.pop
32
+ assert_same(child, @three)
33
+ assert_nil(child.parent)
34
+ assert_equal(@children.length, 2)
35
+ end
36
+
37
+ # testing the addition of the parent using internal wrapper
38
+ def test_push
39
+ children = Rind::Children.new(@parent)
40
+ children.push(@one, @two, @three)
41
+ assert_equal(children, @children)
42
+ children.each do |child|
43
+ assert_same(child.parent, @parent)
44
+ end
45
+ end
46
+
47
+ def test_to_s
48
+ assert_equal(@children.to_s, '<p id="1"></p><p id="2"></p><p id="3"></p>')
49
+ end
50
+ end