spk-html5 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +10 -0
- data/Manifest.txt +73 -0
- data/README +45 -0
- data/Rakefile.rb +33 -0
- data/bin/html5 +7 -0
- data/lib/html5.rb +13 -0
- data/lib/html5/cli.rb +248 -0
- data/lib/html5/constants.rb +1061 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/iso639codes.rb +755 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/rfc2046.rb +31 -0
- data/lib/html5/filters/rfc3987.rb +91 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/validator.rb +834 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser.rb +247 -0
- data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
- data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/html5/html5parser/after_head_phase.rb +55 -0
- data/lib/html5/html5parser/before_head_phase.rb +44 -0
- data/lib/html5/html5parser/before_html_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +636 -0
- data/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
- data/lib/html5/html5parser/in_head_phase.rb +143 -0
- data/lib/html5/html5parser/in_row_phase.rb +96 -0
- data/lib/html5/html5parser/in_select_phase.rb +90 -0
- data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
- data/lib/html5/html5parser/in_table_phase.rb +177 -0
- data/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/html5/html5parser/phase.rb +171 -0
- data/lib/html5/inputstream.rb +735 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +209 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +1059 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treebuilders/base.rb +339 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +215 -0
- data/lib/html5/treebuilders/simpletree.rb +191 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5/treewalkers/base.rb +162 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/version.rb +3 -0
- data/test/preamble.rb +69 -0
- data/test/test_cli.rb +16 -0
- data/test/test_encoding.rb +35 -0
- data/test/test_input_stream.rb +26 -0
- data/test/test_lxp.rb +283 -0
- data/test/test_parser.rb +63 -0
- data/test/test_sanitizer.rb +173 -0
- data/test/test_serializer.rb +67 -0
- data/test/test_sniffer.rb +27 -0
- data/test/test_stream.rb +71 -0
- data/test/test_tokenizer.rb +95 -0
- data/test/test_treewalkers.rb +135 -0
- data/test/test_validator.rb +31 -0
- data/test/tokenizer_test_parser.rb +67 -0
- data/test19.rb +38 -0
- metadata +198 -0
@@ -0,0 +1,215 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module HTML5
|
6
|
+
module TreeBuilders
|
7
|
+
module REXML
|
8
|
+
|
9
|
+
class Node < Base::Node
|
10
|
+
extend Forwardable
|
11
|
+
def_delegators :@rxobj, :name, :attributes
|
12
|
+
attr_accessor :rxobj
|
13
|
+
|
14
|
+
def initialize name
|
15
|
+
super name
|
16
|
+
@rxobj = self.class.rxclass.new name
|
17
|
+
end
|
18
|
+
|
19
|
+
def appendChild node
|
20
|
+
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
|
21
|
+
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
|
22
|
+
childNodes.last.rxobj.raw = true
|
23
|
+
else
|
24
|
+
childNodes.push node
|
25
|
+
rxobj.add node.rxobj
|
26
|
+
end
|
27
|
+
node.parent = self
|
28
|
+
end
|
29
|
+
|
30
|
+
def removeChild node
|
31
|
+
childNodes.delete node
|
32
|
+
rxobj.delete node.rxobj
|
33
|
+
node.parent = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def insertText data, before=nil
|
37
|
+
if before
|
38
|
+
insertBefore TextNode.new(data), before
|
39
|
+
else
|
40
|
+
appendChild TextNode.new(data)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def insertBefore node, refNode
|
45
|
+
index = childNodes.index(refNode)
|
46
|
+
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
|
47
|
+
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
48
|
+
childNodes[index-1].rxobj.raw = true
|
49
|
+
else
|
50
|
+
childNodes.insert index, node
|
51
|
+
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def hasContent
|
56
|
+
(childNodes.length > 0)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Element < Node
|
61
|
+
attr_reader :namespace
|
62
|
+
def self.rxclass
|
63
|
+
::REXML::Element
|
64
|
+
end
|
65
|
+
|
66
|
+
def initialize name, namespace=nil
|
67
|
+
super name
|
68
|
+
@namespace = namespace
|
69
|
+
end
|
70
|
+
|
71
|
+
def cloneNode
|
72
|
+
newNode = self.class.new name
|
73
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
74
|
+
newNode
|
75
|
+
end
|
76
|
+
|
77
|
+
def attributes= value
|
78
|
+
value.each {|name, v| rxobj.attributes[name] = v}
|
79
|
+
end
|
80
|
+
|
81
|
+
def printTree indent=0
|
82
|
+
tree = "\n|#{' ' * indent}<#{namespace ? namespace.to_s + ' ' : ''}#{name}>"
|
83
|
+
indent += 2
|
84
|
+
for name, value in attributes
|
85
|
+
next if name == 'xmlns'
|
86
|
+
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
87
|
+
end
|
88
|
+
for child in childNodes
|
89
|
+
tree += child.printTree(indent)
|
90
|
+
end
|
91
|
+
tree
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class Document < Node
|
96
|
+
def self.rxclass
|
97
|
+
::REXML::Document
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize
|
101
|
+
super nil
|
102
|
+
end
|
103
|
+
|
104
|
+
# ryansking: not sure why this was here. removing it doesn't cause any tests to fail
|
105
|
+
# def appendChild node
|
106
|
+
# if node.kind_of? Element and node.name == 'html'
|
107
|
+
# node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
108
|
+
# end
|
109
|
+
# super node
|
110
|
+
# end
|
111
|
+
|
112
|
+
def printTree indent=0
|
113
|
+
tree = "#document"
|
114
|
+
for child in childNodes
|
115
|
+
tree += child.printTree(indent + 2)
|
116
|
+
end
|
117
|
+
return tree
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class DocumentType < Node
|
122
|
+
def_delegator :@rxobj, :public, :public_id
|
123
|
+
|
124
|
+
def_delegator :@rxobj, :system, :system_id
|
125
|
+
|
126
|
+
def self.rxclass
|
127
|
+
::REXML::DocType
|
128
|
+
end
|
129
|
+
|
130
|
+
def initialize name, public_id, system_id
|
131
|
+
super(name)
|
132
|
+
if public_id
|
133
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
|
134
|
+
elsif system_id
|
135
|
+
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
|
136
|
+
else
|
137
|
+
@rxobj = ::REXML::DocType.new name
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def printTree indent=0
|
142
|
+
"\n|#{' ' * indent}<!DOCTYPE #{name}" + ([public_id, system_id].any? ? " \"#{public_id}\" \"#{system_id}\"" : "") + ">"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class DocumentFragment < Element
|
147
|
+
def initialize
|
148
|
+
super nil
|
149
|
+
end
|
150
|
+
|
151
|
+
def printTree indent=0
|
152
|
+
tree = ""
|
153
|
+
for child in childNodes
|
154
|
+
tree += child.printTree(indent+2)
|
155
|
+
end
|
156
|
+
return tree
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class TextNode < Node
|
161
|
+
def initialize data
|
162
|
+
raw = data.gsub('&', '&').gsub('<', '<').gsub('>', '>')
|
163
|
+
if String.method_defined? :encode
|
164
|
+
@rxobj = ::REXML::Text.new(raw.force_encoding('UTF-8'), true, nil, true)
|
165
|
+
else
|
166
|
+
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def printTree indent=0
|
171
|
+
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class CommentNode < Node
|
176
|
+
def self.rxclass
|
177
|
+
::REXML::Comment
|
178
|
+
end
|
179
|
+
|
180
|
+
def printTree indent=0
|
181
|
+
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
class TreeBuilder < Base::TreeBuilder
|
186
|
+
def initialize
|
187
|
+
@documentClass = Document
|
188
|
+
@doctypeClass = DocumentType
|
189
|
+
@elementClass = Element
|
190
|
+
@commentClass = CommentNode
|
191
|
+
@fragmentClass = DocumentFragment
|
192
|
+
end
|
193
|
+
|
194
|
+
def insertDoctype(name, public_id, system_id)
|
195
|
+
doctype = @doctypeClass.new(name, public_id, system_id)
|
196
|
+
@document.appendChild(doctype)
|
197
|
+
end
|
198
|
+
|
199
|
+
def testSerializer node
|
200
|
+
node.printTree
|
201
|
+
end
|
202
|
+
|
203
|
+
def get_document
|
204
|
+
@document.rxobj
|
205
|
+
end
|
206
|
+
|
207
|
+
def get_fragment
|
208
|
+
@document = super
|
209
|
+
return @document.rxobj.children
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeBuilders
|
5
|
+
module SimpleTree
|
6
|
+
|
7
|
+
class Node < Base::Node
|
8
|
+
# Node representing an item in the tree.
|
9
|
+
# name - The tag name associated with the node
|
10
|
+
attr_accessor :name
|
11
|
+
|
12
|
+
# The value of the current node (applies to text nodes and
|
13
|
+
# comments
|
14
|
+
attr_accessor :value
|
15
|
+
|
16
|
+
# a dict holding name, value pairs for attributes of the node
|
17
|
+
attr_accessor :attributes
|
18
|
+
|
19
|
+
def initialize name
|
20
|
+
super
|
21
|
+
@name = name
|
22
|
+
@value = nil
|
23
|
+
@attributes = {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def appendChild node
|
27
|
+
if node.kind_of? TextNode and
|
28
|
+
childNodes.length > 0 and childNodes.last.kind_of? TextNode
|
29
|
+
childNodes.last.value += node.value
|
30
|
+
else
|
31
|
+
childNodes << node
|
32
|
+
end
|
33
|
+
node.parent = self
|
34
|
+
end
|
35
|
+
|
36
|
+
def removeChild node
|
37
|
+
childNodes.delete node
|
38
|
+
node.parent = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def cloneNode
|
42
|
+
newNode = self.class.new name
|
43
|
+
attributes.each {|name,value| newNode.attributes[name] = value}
|
44
|
+
newNode.value = value
|
45
|
+
newNode
|
46
|
+
end
|
47
|
+
|
48
|
+
def insertText data, before=nil
|
49
|
+
if before
|
50
|
+
insertBefore TextNode.new(data), before
|
51
|
+
else
|
52
|
+
appendChild TextNode.new(data)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def insertBefore node, refNode
|
57
|
+
index = childNodes.index(refNode)
|
58
|
+
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
|
59
|
+
childNodes[index-1].value += node.value
|
60
|
+
else
|
61
|
+
childNodes.insert index, node
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def printTree indent=0
|
66
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
67
|
+
for child in childNodes
|
68
|
+
tree += child.printTree(indent + 2)
|
69
|
+
end
|
70
|
+
return tree
|
71
|
+
end
|
72
|
+
|
73
|
+
def hasContent
|
74
|
+
childNodes.length > 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Element < Node
|
79
|
+
attr_accessor :namespace
|
80
|
+
def initialize(name, namespace=nil)
|
81
|
+
super(name)
|
82
|
+
@namespace = namespace
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_s
|
86
|
+
"<#{namespace ? namespace.to_s + ' ' : ''}#{name}>"
|
87
|
+
end
|
88
|
+
|
89
|
+
def printTree indent=0
|
90
|
+
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
91
|
+
indent += 2
|
92
|
+
for name, value in attributes
|
93
|
+
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
94
|
+
end
|
95
|
+
for child in childNodes
|
96
|
+
tree += child.printTree(indent)
|
97
|
+
end
|
98
|
+
tree
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class Document < Node
|
103
|
+
def to_s
|
104
|
+
"#document"
|
105
|
+
end
|
106
|
+
|
107
|
+
def initialize
|
108
|
+
super nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def printTree indent=0
|
112
|
+
tree = to_s
|
113
|
+
for child in childNodes
|
114
|
+
tree += child.printTree(indent + 2)
|
115
|
+
end
|
116
|
+
tree
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class DocumentType < Node
|
121
|
+
attr_accessor :public_id, :system_id
|
122
|
+
|
123
|
+
def to_s
|
124
|
+
"<!DOCTYPE #{name}" + ([@public_id, @system_id].any? ? " \"#{@public_id}\" \"#{@system_id}\"" : '') + ">"
|
125
|
+
end
|
126
|
+
|
127
|
+
def initialize name
|
128
|
+
super name
|
129
|
+
@public_id = nil
|
130
|
+
@system_id = nil
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class DocumentFragment < Element
|
135
|
+
def initialize
|
136
|
+
super nil
|
137
|
+
end
|
138
|
+
|
139
|
+
def printTree indent=0
|
140
|
+
tree = ""
|
141
|
+
for child in childNodes
|
142
|
+
tree += child.printTree(indent+2)
|
143
|
+
end
|
144
|
+
return tree
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class TextNode < Node
|
149
|
+
def initialize value
|
150
|
+
super nil
|
151
|
+
@value = value
|
152
|
+
end
|
153
|
+
|
154
|
+
def to_s
|
155
|
+
'"%s"' % value
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class CommentNode < Node
|
160
|
+
def initialize value
|
161
|
+
super nil
|
162
|
+
@value = value
|
163
|
+
end
|
164
|
+
|
165
|
+
def to_s
|
166
|
+
"<!-- %s -->" % value
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
class TreeBuilder < Base::TreeBuilder
|
171
|
+
def initialize
|
172
|
+
@documentClass = Document
|
173
|
+
@doctypeClass = DocumentType
|
174
|
+
@elementClass = Element
|
175
|
+
@commentClass = CommentNode
|
176
|
+
@fragmentClass = DocumentFragment
|
177
|
+
end
|
178
|
+
|
179
|
+
def testSerializer node
|
180
|
+
node.printTree
|
181
|
+
end
|
182
|
+
|
183
|
+
def get_fragment
|
184
|
+
@document = super
|
185
|
+
@document
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeWalkers
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def [](name)
|
8
|
+
case name.to_s.downcase
|
9
|
+
when 'simpletree'
|
10
|
+
require 'html5/treewalkers/simpletree'
|
11
|
+
SimpleTree::TreeWalker
|
12
|
+
when 'rexml'
|
13
|
+
require 'html5/treewalkers/rexml'
|
14
|
+
REXML::TreeWalker
|
15
|
+
when 'hpricot'
|
16
|
+
require 'html5/treewalkers/hpricot'
|
17
|
+
Hpricot::TreeWalker
|
18
|
+
else
|
19
|
+
raise "Unknown TreeWalker #{name}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
alias :get_tree_walker :[]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
module HTML5
|
3
|
+
module TreeWalkers
|
4
|
+
|
5
|
+
module TokenConstructor
|
6
|
+
def error(msg)
|
7
|
+
{:type => "SerializeError", :data => msg}
|
8
|
+
end
|
9
|
+
|
10
|
+
def normalize_attrs(attrs)
|
11
|
+
attrs.to_a
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty_tag(name, attrs, has_children=false)
|
15
|
+
error(_("Void element has children")) if has_children
|
16
|
+
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_tag(name, attrs)
|
20
|
+
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
|
21
|
+
end
|
22
|
+
|
23
|
+
def end_tag(name)
|
24
|
+
{:type => :EndTag, :name => name, :data => []}
|
25
|
+
end
|
26
|
+
|
27
|
+
def text(data)
|
28
|
+
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
29
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
30
|
+
data = data[$1.length .. -1]
|
31
|
+
return if data.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
35
|
+
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
36
|
+
yield({:type => :SpaceCharacters, :data => $1})
|
37
|
+
else
|
38
|
+
yield({:type => :Characters, :data => data})
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def comment(data)
|
43
|
+
{:type => :Comment, :data => data}
|
44
|
+
end
|
45
|
+
|
46
|
+
def doctype(name, public_id, system_id, correct=nil)
|
47
|
+
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
|
48
|
+
end
|
49
|
+
|
50
|
+
def unknown(nodeType)
|
51
|
+
error(_("Unknown node type: ") + nodeType.to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
def _(str)
|
55
|
+
str
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class Base
|
60
|
+
include TokenConstructor
|
61
|
+
|
62
|
+
def initialize(tree)
|
63
|
+
@tree = tree
|
64
|
+
end
|
65
|
+
|
66
|
+
def each
|
67
|
+
raise NotImplementedError
|
68
|
+
end
|
69
|
+
|
70
|
+
alias walk each
|
71
|
+
|
72
|
+
def to_ary
|
73
|
+
a = []
|
74
|
+
each do |i|
|
75
|
+
a << i
|
76
|
+
end
|
77
|
+
a
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class NonRecursiveTreeWalker < TreeWalkers::Base
|
82
|
+
def node_details(node)
|
83
|
+
raise NotImplementedError
|
84
|
+
end
|
85
|
+
|
86
|
+
def first_child(node)
|
87
|
+
raise NotImplementedError
|
88
|
+
end
|
89
|
+
|
90
|
+
def next_sibling(node)
|
91
|
+
raise NotImplementedError
|
92
|
+
end
|
93
|
+
|
94
|
+
def parent(node)
|
95
|
+
raise NotImplementedError
|
96
|
+
end
|
97
|
+
|
98
|
+
def each
|
99
|
+
current_node = @tree
|
100
|
+
while current_node != nil
|
101
|
+
details = node_details(current_node)
|
102
|
+
has_children = false
|
103
|
+
|
104
|
+
case details.shift
|
105
|
+
when :DOCTYPE
|
106
|
+
yield doctype(*details)
|
107
|
+
|
108
|
+
when :TEXT
|
109
|
+
text(*details) {|token| yield token}
|
110
|
+
|
111
|
+
when :ELEMENT
|
112
|
+
name, attributes, has_children = details
|
113
|
+
if VOID_ELEMENTS.include?(name)
|
114
|
+
yield empty_tag(name, attributes.to_a, has_children)
|
115
|
+
has_children = false
|
116
|
+
else
|
117
|
+
yield start_tag(name, attributes.to_a)
|
118
|
+
end
|
119
|
+
|
120
|
+
when :COMMENT
|
121
|
+
yield comment(details[0])
|
122
|
+
|
123
|
+
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
124
|
+
has_children = true
|
125
|
+
|
126
|
+
when nil
|
127
|
+
# ignore (REXML::XMLDecl is an example)
|
128
|
+
|
129
|
+
else
|
130
|
+
yield unknown(details[0])
|
131
|
+
end
|
132
|
+
|
133
|
+
first_child = has_children ? first_child(current_node) : nil
|
134
|
+
if first_child != nil
|
135
|
+
current_node = first_child
|
136
|
+
else
|
137
|
+
while current_node != nil
|
138
|
+
details = node_details(current_node)
|
139
|
+
if details.shift == :ELEMENT
|
140
|
+
name, attributes, has_children = details
|
141
|
+
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
|
142
|
+
end
|
143
|
+
|
144
|
+
if @tree == current_node
|
145
|
+
current_node = nil
|
146
|
+
else
|
147
|
+
next_sibling = next_sibling(current_node)
|
148
|
+
if next_sibling != nil
|
149
|
+
current_node = next_sibling
|
150
|
+
break
|
151
|
+
end
|
152
|
+
|
153
|
+
current_node = parent(current_node)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|