html5 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +58 -0
- data/README +9 -0
- data/Rakefile.rb +17 -0
- data/lib/html5/constants.rb +818 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
- data/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +607 -0
- data/lib/html5/html5parser/in_caption_phase.rb +68 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/html5/html5parser/in_row_phase.rb +87 -0
- data/lib/html5/html5parser/in_select_phase.rb +84 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
- data/lib/html5/html5parser/in_table_phase.rb +110 -0
- data/lib/html5/html5parser/initial_phase.rb +134 -0
- data/lib/html5/html5parser/phase.rb +158 -0
- data/lib/html5/html5parser/root_element_phase.rb +42 -0
- data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/html5/html5parser.rb +248 -0
- data/lib/html5/inputstream.rb +654 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +188 -0
- data/lib/html5/serializer/htmlserializer.rb +180 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/tokenizer.rb +968 -0
- data/lib/html5/treebuilders/base.rb +334 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +208 -0
- data/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treewalkers/base.rb +154 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5.rb +13 -0
- data/parse.rb +217 -0
- data/tests/preamble.rb +82 -0
- data/tests/test_encoding.rb +35 -0
- data/tests/test_lxp.rb +263 -0
- data/tests/test_parser.rb +68 -0
- data/tests/test_sanitizer.rb +142 -0
- data/tests/test_serializer.rb +68 -0
- data/tests/test_stream.rb +62 -0
- data/tests/test_tokenizer.rb +94 -0
- data/tests/test_treewalkers.rb +116 -0
- data/tests/tokenizer_test_parser.rb +63 -0
- metadata +120 -0
@@ -0,0 +1,208 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module HTML5
|
6
|
+
module TreeBuilders
|
7
|
+
module REXML
|
8
|
+
|
9
|
+
class Node < Base::Node
|
10
|
+
extend Forwardable
|
11
|
+
def_delegators :@rxobj, :name, :attributes
|
12
|
+
attr_accessor :rxobj
|
13
|
+
|
14
|
+
def initialize name
|
15
|
+
super name
|
16
|
+
@rxobj = self.class.rxclass.new name
|
17
|
+
end
|
18
|
+
|
19
|
+
def appendChild node
|
20
|
+
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
|
21
|
+
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
|
22
|
+
childNodes.last.rxobj.raw = true
|
23
|
+
else
|
24
|
+
childNodes.push node
|
25
|
+
rxobj.add node.rxobj
|
26
|
+
end
|
27
|
+
node.parent = self
|
28
|
+
end
|
29
|
+
|
30
|
+
def removeChild node
|
31
|
+
childNodes.delete node
|
32
|
+
rxobj.delete node.rxobj
|
33
|
+
node.parent = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def insertText data, before=nil
|
37
|
+
if before
|
38
|
+
insertBefore TextNode.new(data), before
|
39
|
+
else
|
40
|
+
appendChild TextNode.new(data)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def insertBefore node, refNode
|
45
|
+
index = childNodes.index(refNode)
|
46
|
+
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
|
47
|
+
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
48
|
+
childNodes[index-1].rxobj.raw = true
|
49
|
+
else
|
50
|
+
childNodes.insert index, node
|
51
|
+
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def hasContent
|
56
|
+
(childNodes.length > 0)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Element < Node
|
61
|
+
def self.rxclass
|
62
|
+
::REXML::Element
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize name
|
66
|
+
super name
|
67
|
+
end
|
68
|
+
|
69
|
+
def cloneNode
|
70
|
+
newNode = self.class.new name
|
71
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
72
|
+
newNode
|
73
|
+
end
|
74
|
+
|
75
|
+
def attributes= value
|
76
|
+
value.each {|name, value| rxobj.attributes[name] = value}
|
77
|
+
end
|
78
|
+
|
79
|
+
def printTree indent=0
|
80
|
+
tree = "\n|#{' ' * indent}<#{name}>"
|
81
|
+
indent += 2
|
82
|
+
for name, value in attributes
|
83
|
+
next if name == 'xmlns'
|
84
|
+
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
85
|
+
end
|
86
|
+
for child in childNodes
|
87
|
+
tree += child.printTree(indent)
|
88
|
+
end
|
89
|
+
tree
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class Document < Node
|
94
|
+
def self.rxclass
|
95
|
+
::REXML::Document
|
96
|
+
end
|
97
|
+
|
98
|
+
def initialize
|
99
|
+
super nil
|
100
|
+
end
|
101
|
+
|
102
|
+
def appendChild node
|
103
|
+
if node.kind_of? Element and node.name == 'html'
|
104
|
+
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
105
|
+
end
|
106
|
+
super node
|
107
|
+
end
|
108
|
+
|
109
|
+
def printTree indent=0
|
110
|
+
tree = "#document"
|
111
|
+
for child in childNodes
|
112
|
+
tree += child.printTree(indent + 2)
|
113
|
+
end
|
114
|
+
return tree
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class DocumentType < Node
|
119
|
+
def_delegator :@rxobj, :public, :public_id
|
120
|
+
|
121
|
+
def_delegator :@rxobj, :system, :system_id
|
122
|
+
|
123
|
+
def self.rxclass
|
124
|
+
::REXML::DocType
|
125
|
+
end
|
126
|
+
|
127
|
+
def initialize name, public_id, system_id
|
128
|
+
super(name)
|
129
|
+
if public_id
|
130
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
|
131
|
+
elsif system_id
|
132
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
|
133
|
+
else
|
134
|
+
@rxobj = ::REXML::DocType.new name
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def printTree indent=0
|
139
|
+
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
class DocumentFragment < Element
|
144
|
+
def initialize
|
145
|
+
super nil
|
146
|
+
end
|
147
|
+
|
148
|
+
def printTree indent=0
|
149
|
+
tree = ""
|
150
|
+
for child in childNodes
|
151
|
+
tree += child.printTree(indent+2)
|
152
|
+
end
|
153
|
+
return tree
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
class TextNode < Node
|
158
|
+
def initialize data
|
159
|
+
raw = data.gsub('&', '&').gsub('<', '<').gsub('>', '>')
|
160
|
+
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
161
|
+
end
|
162
|
+
|
163
|
+
def printTree indent=0
|
164
|
+
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
class CommentNode < Node
|
169
|
+
def self.rxclass
|
170
|
+
::REXML::Comment
|
171
|
+
end
|
172
|
+
|
173
|
+
def printTree indent=0
|
174
|
+
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
class TreeBuilder < Base::TreeBuilder
|
179
|
+
def initialize
|
180
|
+
@documentClass = Document
|
181
|
+
@doctypeClass = DocumentType
|
182
|
+
@elementClass = Element
|
183
|
+
@commentClass = CommentNode
|
184
|
+
@fragmentClass = DocumentFragment
|
185
|
+
end
|
186
|
+
|
187
|
+
def insertDoctype(name, public_id, system_id)
|
188
|
+
doctype = @doctypeClass.new(name, public_id, system_id)
|
189
|
+
@document.appendChild(doctype)
|
190
|
+
end
|
191
|
+
|
192
|
+
def testSerializer node
|
193
|
+
node.printTree
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_document
|
197
|
+
@document.rxobj
|
198
|
+
end
|
199
|
+
|
200
|
+
def get_fragment
|
201
|
+
@document = super
|
202
|
+
return @document.rxobj.children
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeBuilders
|
5
|
+
module SimpleTree
|
6
|
+
|
7
|
+
class Node < Base::Node
|
8
|
+
# Node representing an item in the tree.
|
9
|
+
# name - The tag name associated with the node
|
10
|
+
attr_accessor :name
|
11
|
+
|
12
|
+
# The value of the current node (applies to text nodes and
|
13
|
+
# comments
|
14
|
+
attr_accessor :value
|
15
|
+
|
16
|
+
# a dict holding name, value pairs for attributes of the node
|
17
|
+
attr_accessor :attributes
|
18
|
+
|
19
|
+
def initialize name
|
20
|
+
super
|
21
|
+
@name = name
|
22
|
+
@value = nil
|
23
|
+
@attributes = {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def appendChild node
|
27
|
+
if node.kind_of? TextNode and
|
28
|
+
childNodes.length > 0 and childNodes.last.kind_of? TextNode
|
29
|
+
childNodes.last.value += node.value
|
30
|
+
else
|
31
|
+
childNodes << node
|
32
|
+
end
|
33
|
+
node.parent = self
|
34
|
+
end
|
35
|
+
|
36
|
+
def removeChild node
|
37
|
+
childNodes.delete node
|
38
|
+
node.parent = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def cloneNode
|
42
|
+
newNode = self.class.new name
|
43
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
44
|
+
newNode.value = value
|
45
|
+
newNode
|
46
|
+
end
|
47
|
+
|
48
|
+
def insertText data, before=nil
|
49
|
+
if before
|
50
|
+
insertBefore TextNode.new(data), before
|
51
|
+
else
|
52
|
+
appendChild TextNode.new(data)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def insertBefore node, refNode
|
57
|
+
index = childNodes.index(refNode)
|
58
|
+
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
|
59
|
+
childNodes[index-1].value += node.value
|
60
|
+
else
|
61
|
+
childNodes.insert index, node
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def printTree indent=0
|
66
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
67
|
+
for child in childNodes
|
68
|
+
tree += child.printTree(indent + 2)
|
69
|
+
end
|
70
|
+
return tree
|
71
|
+
end
|
72
|
+
|
73
|
+
def hasContent
|
74
|
+
childNodes.length > 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Element < Node
|
79
|
+
def to_s
|
80
|
+
"<#{name}>"
|
81
|
+
end
|
82
|
+
|
83
|
+
def printTree indent=0
|
84
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
85
|
+
indent += 2
|
86
|
+
for name, value in attributes
|
87
|
+
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
88
|
+
end
|
89
|
+
for child in childNodes
|
90
|
+
tree += child.printTree(indent)
|
91
|
+
end
|
92
|
+
tree
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Document < Node
|
97
|
+
def to_s
|
98
|
+
"#document"
|
99
|
+
end
|
100
|
+
|
101
|
+
def initialize
|
102
|
+
super nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def printTree indent=0
|
106
|
+
tree = to_s
|
107
|
+
for child in childNodes
|
108
|
+
tree += child.printTree(indent + 2)
|
109
|
+
end
|
110
|
+
tree
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class DocumentType < Node
|
115
|
+
attr_accessor :public_id, :system_id
|
116
|
+
|
117
|
+
def to_s
|
118
|
+
"<!DOCTYPE #{name}>"
|
119
|
+
end
|
120
|
+
|
121
|
+
def initialize name
|
122
|
+
super name
|
123
|
+
@public_id = nil
|
124
|
+
@system_id = nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class DocumentFragment < Element
|
129
|
+
def initialize
|
130
|
+
super nil
|
131
|
+
end
|
132
|
+
|
133
|
+
def printTree indent=0
|
134
|
+
tree = ""
|
135
|
+
for child in childNodes
|
136
|
+
tree += child.printTree(indent+2)
|
137
|
+
end
|
138
|
+
return tree
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
class TextNode < Node
|
143
|
+
def initialize value
|
144
|
+
super nil
|
145
|
+
@value = value
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_s
|
149
|
+
'"%s"' % value
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
class CommentNode < Node
|
154
|
+
def initialize value
|
155
|
+
super nil
|
156
|
+
@value = value
|
157
|
+
end
|
158
|
+
|
159
|
+
def to_s
|
160
|
+
"<!-- %s -->" % value
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class TreeBuilder < Base::TreeBuilder
|
165
|
+
def initialize
|
166
|
+
@documentClass = Document
|
167
|
+
@doctypeClass = DocumentType
|
168
|
+
@elementClass = Element
|
169
|
+
@commentClass = CommentNode
|
170
|
+
@fragmentClass = DocumentFragment
|
171
|
+
end
|
172
|
+
|
173
|
+
def testSerializer node
|
174
|
+
node.printTree
|
175
|
+
end
|
176
|
+
|
177
|
+
def get_fragment
|
178
|
+
@document = super
|
179
|
+
@document.childNodes
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module HTML5
|
2
|
+
module TreeBuilders
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def [](name)
|
6
|
+
case name.to_s.downcase
|
7
|
+
when 'simpletree' then
|
8
|
+
require 'html5/treebuilders/simpletree'
|
9
|
+
SimpleTree::TreeBuilder
|
10
|
+
when 'rexml' then
|
11
|
+
require 'html5/treebuilders/rexml'
|
12
|
+
REXML::TreeBuilder
|
13
|
+
when 'hpricot' then
|
14
|
+
require 'html5/treebuilders/hpricot'
|
15
|
+
Hpricot::TreeBuilder
|
16
|
+
else
|
17
|
+
raise "Unknown TreeBuilder #{name}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :get_tree_builder :[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
module HTML5
|
3
|
+
module TreeWalkers
|
4
|
+
|
5
|
+
module TokenConstructor
|
6
|
+
def error(msg)
|
7
|
+
{:type => "SerializeError", :data => msg}
|
8
|
+
end
|
9
|
+
|
10
|
+
def normalize_attrs(attrs)
|
11
|
+
attrs.to_a
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty_tag(name, attrs, has_children=false)
|
15
|
+
error(_("Void element has children")) if has_children
|
16
|
+
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_tag(name, attrs)
|
20
|
+
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag(name)
|
24
|
+
{:type => :EndTag, :name => name, :data => []}
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(data)
|
28
|
+
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
29
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
30
|
+
data = data[$1.length .. -1]
|
31
|
+
return if data.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
35
|
+
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
36
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
37
|
+
else
|
38
|
+
yield({:type => :Characters, :data => data})
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def comment(data)
|
43
|
+
{:type => :Comment, :data => data}
|
44
|
+
end
|
45
|
+
|
46
|
+
def doctype(name, public_id, system_id, correct=nil)
|
47
|
+
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
|
48
|
+
end
|
49
|
+
|
50
|
+
def unknown(nodeType)
|
51
|
+
error(_("Unknown node type: ") + nodeType.to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
def _(str)
|
55
|
+
str
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class Base
|
60
|
+
include TokenConstructor
|
61
|
+
|
62
|
+
def initialize(tree)
|
63
|
+
@tree = tree
|
64
|
+
end
|
65
|
+
|
66
|
+
def each
|
67
|
+
raise NotImplementedError
|
68
|
+
end
|
69
|
+
|
70
|
+
alias walk each
|
71
|
+
end
|
72
|
+
|
73
|
+
class NonRecursiveTreeWalker < TreeWalkers::Base
|
74
|
+
def node_details(node)
|
75
|
+
raise NotImplementedError
|
76
|
+
end
|
77
|
+
|
78
|
+
def first_child(node)
|
79
|
+
raise NotImplementedError
|
80
|
+
end
|
81
|
+
|
82
|
+
def next_sibling(node)
|
83
|
+
raise NotImplementedError
|
84
|
+
end
|
85
|
+
|
86
|
+
def parent(node)
|
87
|
+
raise NotImplementedError
|
88
|
+
end
|
89
|
+
|
90
|
+
def each
|
91
|
+
current_node = @tree
|
92
|
+
while current_node != nil
|
93
|
+
details = node_details(current_node)
|
94
|
+
has_children = false
|
95
|
+
|
96
|
+
case details.shift
|
97
|
+
when :DOCTYPE
|
98
|
+
yield doctype(*details)
|
99
|
+
|
100
|
+
when :TEXT
|
101
|
+
text(*details) {|token| yield token}
|
102
|
+
|
103
|
+
when :ELEMENT
|
104
|
+
name, attributes, has_children = details
|
105
|
+
if VOID_ELEMENTS.include?(name)
|
106
|
+
yield empty_tag(name, attributes.to_a, has_children)
|
107
|
+
has_children = false
|
108
|
+
else
|
109
|
+
yield start_tag(name, attributes.to_a)
|
110
|
+
end
|
111
|
+
|
112
|
+
when :COMMENT
|
113
|
+
yield comment(details[0])
|
114
|
+
|
115
|
+
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
116
|
+
has_children = true
|
117
|
+
|
118
|
+
when nil
|
119
|
+
# ignore (REXML::XMLDecl is an example)
|
120
|
+
|
121
|
+
else
|
122
|
+
yield unknown(details[0])
|
123
|
+
end
|
124
|
+
|
125
|
+
first_child = has_children ? first_child(current_node) : nil
|
126
|
+
if first_child != nil
|
127
|
+
current_node = first_child
|
128
|
+
else
|
129
|
+
while current_node != nil
|
130
|
+
details = node_details(current_node)
|
131
|
+
if details.shift == :ELEMENT
|
132
|
+
name, attributes, has_children = details
|
133
|
+
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
|
134
|
+
end
|
135
|
+
|
136
|
+
if @tree == current_node
|
137
|
+
current_node = nil
|
138
|
+
else
|
139
|
+
next_sibling = next_sibling(current_node)
|
140
|
+
if next_sibling != nil
|
141
|
+
current_node = next_sibling
|
142
|
+
break
|
143
|
+
end
|
144
|
+
|
145
|
+
current_node = parent(current_node)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
require 'rexml/document'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
module TreeWalkers
|
6
|
+
module Hpricot
|
7
|
+
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
8
|
+
|
9
|
+
def node_details(node)
|
10
|
+
case node
|
11
|
+
when ::Hpricot::Elem
|
12
|
+
if node.name.empty?
|
13
|
+
[:DOCUMENT_FRAGMENT]
|
14
|
+
else
|
15
|
+
[:ELEMENT, node.name,
|
16
|
+
node.attributes.map {|name, value| [name, value]},
|
17
|
+
!node.empty?]
|
18
|
+
end
|
19
|
+
when ::Hpricot::Text
|
20
|
+
[:TEXT, node.content]
|
21
|
+
when ::Hpricot::Comment
|
22
|
+
[:COMMENT, node.content]
|
23
|
+
when ::Hpricot::Doc
|
24
|
+
[:DOCUMENT]
|
25
|
+
when ::Hpricot::DocType
|
26
|
+
[:DOCTYPE, node.target, node.public_id, node.system_id]
|
27
|
+
when ::Hpricot::XMLDecl
|
28
|
+
[nil]
|
29
|
+
else
|
30
|
+
[:UNKNOWN, node.class.inspect]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def first_child(node)
|
35
|
+
node.children.first
|
36
|
+
end
|
37
|
+
|
38
|
+
def next_sibling(node)
|
39
|
+
node.next_node
|
40
|
+
end
|
41
|
+
|
42
|
+
def parent(node)
|
43
|
+
node.parent
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
require 'rexml/document'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
module TreeWalkers
|
6
|
+
module REXML
|
7
|
+
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
8
|
+
|
9
|
+
def node_details(node)
|
10
|
+
case node
|
11
|
+
when ::REXML::Document
|
12
|
+
[:DOCUMENT]
|
13
|
+
when ::REXML::Element
|
14
|
+
if !node.name
|
15
|
+
[:DOCUMENT_FRAGMENT]
|
16
|
+
else
|
17
|
+
[:ELEMENT, node.name,
|
18
|
+
node.attributes.map {|name,value| [name,value]},
|
19
|
+
node.has_elements? || node.has_text?]
|
20
|
+
end
|
21
|
+
when ::REXML::Text
|
22
|
+
[:TEXT, node.value]
|
23
|
+
when ::REXML::Comment
|
24
|
+
[:COMMENT, node.string]
|
25
|
+
when ::REXML::DocType
|
26
|
+
[:DOCTYPE, node.name, node.public, node.system]
|
27
|
+
when ::REXML::XMLDecl
|
28
|
+
[nil]
|
29
|
+
else
|
30
|
+
[:UNKNOWN, node.class.inspect]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def first_child(node)
|
35
|
+
node.children.first
|
36
|
+
end
|
37
|
+
|
38
|
+
def next_sibling(node)
|
39
|
+
node.next_sibling
|
40
|
+
end
|
41
|
+
|
42
|
+
def parent(node)
|
43
|
+
node.parent
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeWalkers
|
5
|
+
module SimpleTree
|
6
|
+
class TreeWalker < HTML5::TreeWalkers::Base
|
7
|
+
include HTML5::TreeBuilders::SimpleTree
|
8
|
+
|
9
|
+
def walk(node)
|
10
|
+
case node
|
11
|
+
when Document, DocumentFragment
|
12
|
+
return
|
13
|
+
|
14
|
+
when DocumentType
|
15
|
+
yield doctype(node.name, node.public_id, node.system_id)
|
16
|
+
|
17
|
+
when TextNode
|
18
|
+
text(node.value) {|token| yield token}
|
19
|
+
|
20
|
+
when Element
|
21
|
+
if VOID_ELEMENTS.include?(node.name)
|
22
|
+
yield empty_tag(node.name, node.attributes, node.hasContent())
|
23
|
+
else
|
24
|
+
yield start_tag(node.name, node.attributes)
|
25
|
+
for child in node.childNodes
|
26
|
+
walk(child) {|token| yield token}
|
27
|
+
end
|
28
|
+
yield end_tag(node.name)
|
29
|
+
end
|
30
|
+
|
31
|
+
when CommentNode
|
32
|
+
yield comment(node.value)
|
33
|
+
|
34
|
+
else
|
35
|
+
puts '?'
|
36
|
+
yield unknown(node.class)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def each
|
41
|
+
for child in @tree.childNodes
|
42
|
+
walk(child) {|node| yield node}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|