spk-html5 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +10 -0
- data/Manifest.txt +73 -0
- data/README +45 -0
- data/Rakefile.rb +33 -0
- data/bin/html5 +7 -0
- data/lib/html5.rb +13 -0
- data/lib/html5/cli.rb +248 -0
- data/lib/html5/constants.rb +1061 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/iso639codes.rb +755 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/rfc2046.rb +31 -0
- data/lib/html5/filters/rfc3987.rb +91 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/validator.rb +834 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser.rb +247 -0
- data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
- data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/html5/html5parser/after_head_phase.rb +55 -0
- data/lib/html5/html5parser/before_head_phase.rb +44 -0
- data/lib/html5/html5parser/before_html_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +636 -0
- data/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
- data/lib/html5/html5parser/in_head_phase.rb +143 -0
- data/lib/html5/html5parser/in_row_phase.rb +96 -0
- data/lib/html5/html5parser/in_select_phase.rb +90 -0
- data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
- data/lib/html5/html5parser/in_table_phase.rb +177 -0
- data/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/html5/html5parser/phase.rb +171 -0
- data/lib/html5/inputstream.rb +735 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +209 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +1059 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treebuilders/base.rb +339 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +215 -0
- data/lib/html5/treebuilders/simpletree.rb +191 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5/treewalkers/base.rb +162 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/version.rb +3 -0
- data/test/preamble.rb +69 -0
- data/test/test_cli.rb +16 -0
- data/test/test_encoding.rb +35 -0
- data/test/test_input_stream.rb +26 -0
- data/test/test_lxp.rb +283 -0
- data/test/test_parser.rb +63 -0
- data/test/test_sanitizer.rb +173 -0
- data/test/test_serializer.rb +67 -0
- data/test/test_sniffer.rb +27 -0
- data/test/test_stream.rb +71 -0
- data/test/test_tokenizer.rb +95 -0
- data/test/test_treewalkers.rb +135 -0
- data/test/test_validator.rb +31 -0
- data/test/tokenizer_test_parser.rb +67 -0
- data/test19.rb +38 -0
- metadata +198 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module HTML5
|
2
|
+
module TreeBuilders
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def [](name)
|
6
|
+
case name.to_s.downcase
|
7
|
+
when 'simpletree' then
|
8
|
+
require 'html5/treebuilders/simpletree'
|
9
|
+
SimpleTree::TreeBuilder
|
10
|
+
when 'rexml' then
|
11
|
+
require 'html5/treebuilders/rexml'
|
12
|
+
REXML::TreeBuilder
|
13
|
+
when 'hpricot' then
|
14
|
+
require 'html5/treebuilders/hpricot'
|
15
|
+
Hpricot::TreeBuilder
|
16
|
+
else
|
17
|
+
raise "Unknown TreeBuilder #{name}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :get_tree_builder :[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,339 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
|
3
|
+
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
|
4
|
+
|
5
|
+
module HTML5
|
6
|
+
|
7
|
+
# The scope markers are inserted when entering buttons, object elements,
|
8
|
+
# marquees, table cells, and table captions, and are used to prevent formatting
|
9
|
+
# from "leaking" into tables, buttons, object elements, and marquees.
|
10
|
+
Marker = nil
|
11
|
+
|
12
|
+
module TreeBuilders
|
13
|
+
module Base
|
14
|
+
|
15
|
+
class Node
|
16
|
+
# The parent of the current node (or nil for the document node)
|
17
|
+
attr_accessor :parent
|
18
|
+
|
19
|
+
# a list of child nodes of the current node. This must
|
20
|
+
# include all elements but not necessarily other node types
|
21
|
+
attr_accessor :childNodes
|
22
|
+
|
23
|
+
# A list of miscellaneous flags that can be set on the node
|
24
|
+
attr_accessor :flags
|
25
|
+
|
26
|
+
def initialize(name)
|
27
|
+
@parent = nil
|
28
|
+
@childNodes = []
|
29
|
+
@flags = []
|
30
|
+
end
|
31
|
+
|
32
|
+
# Insert node as a child of the current node
|
33
|
+
def appendChild(node)
|
34
|
+
raise NotImplementedError
|
35
|
+
end
|
36
|
+
|
37
|
+
# Insert data as text in the current node, positioned before the
|
38
|
+
# start of node insertBefore or to the end of the node's text.
|
39
|
+
def insertText(data, insertBefore=nil)
|
40
|
+
raise NotImplementedError
|
41
|
+
end
|
42
|
+
|
43
|
+
# Insert node as a child of the current node, before refNode in the
|
44
|
+
# list of child nodes. Raises ValueError if refNode is not a child of
|
45
|
+
# the current node
|
46
|
+
def insertBefore(node, refNode)
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# Remove node from the children of the current node
|
51
|
+
def removeChild(node)
|
52
|
+
raise NotImplementedError
|
53
|
+
end
|
54
|
+
|
55
|
+
# Move all the children of the current node to newParent.
|
56
|
+
# This is needed so that trees that don't store text as nodes move the
|
57
|
+
# text in the correct way
|
58
|
+
def reparentChildren(newParent)
|
59
|
+
#XXX - should this method be made more general?
|
60
|
+
@childNodes.each { |child| newParent.appendChild(child) }
|
61
|
+
@childNodes = []
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return a shallow copy of the current node i.e. a node with the same
|
65
|
+
# name and attributes but with no parent or child nodes
|
66
|
+
def cloneNode
|
67
|
+
raise NotImplementedError
|
68
|
+
end
|
69
|
+
|
70
|
+
# Return true if the node has children or text, false otherwise
|
71
|
+
def hasContent
|
72
|
+
raise NotImplementedError
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Base treebuilder implementation
|
77
|
+
class TreeBuilder
|
78
|
+
|
79
|
+
attr_accessor :open_elements
|
80
|
+
|
81
|
+
attr_accessor :activeFormattingElements
|
82
|
+
|
83
|
+
attr_accessor :document
|
84
|
+
|
85
|
+
attr_accessor :head_pointer
|
86
|
+
|
87
|
+
attr_accessor :formPointer
|
88
|
+
|
89
|
+
attr_reader :insert_from_table
|
90
|
+
# Class to use for document root
|
91
|
+
documentClass = nil
|
92
|
+
|
93
|
+
# Class to use for HTML elements
|
94
|
+
elementClass = nil
|
95
|
+
|
96
|
+
# Class to use for comments
|
97
|
+
commentClass = nil
|
98
|
+
|
99
|
+
# Class to use for doctypes
|
100
|
+
doctypeClass = nil
|
101
|
+
|
102
|
+
# Fragment class
|
103
|
+
fragmentClass = nil
|
104
|
+
|
105
|
+
def initialize
|
106
|
+
reset
|
107
|
+
end
|
108
|
+
|
109
|
+
def reset
|
110
|
+
@open_elements = []
|
111
|
+
@activeFormattingElements = []
|
112
|
+
|
113
|
+
#XXX - rename these to headElement, formElement
|
114
|
+
@head_pointer = nil
|
115
|
+
@formPointer = nil
|
116
|
+
|
117
|
+
self.insert_from_table = false
|
118
|
+
|
119
|
+
@document = @documentClass.new
|
120
|
+
end
|
121
|
+
|
122
|
+
def elementInScope(target, tableVariant=false)
|
123
|
+
# Exit early when possible.
|
124
|
+
return true if @open_elements[-1] && @open_elements[-1].name == target
|
125
|
+
return false if @open_elements.length == 0
|
126
|
+
# AT How about while true and simply set node to [-1] and set it to
|
127
|
+
# [-2] at the end...
|
128
|
+
@open_elements.reverse.each do |element|
|
129
|
+
if element.name == target
|
130
|
+
return true
|
131
|
+
elsif element.name == 'table'
|
132
|
+
return false
|
133
|
+
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
134
|
+
return false
|
135
|
+
elsif element.name == 'html'
|
136
|
+
return false
|
137
|
+
end
|
138
|
+
end
|
139
|
+
assert false # We should never reach this point
|
140
|
+
end
|
141
|
+
|
142
|
+
def reconstructActiveFormattingElements
|
143
|
+
# Within this algorithm the order of steps described in the
|
144
|
+
# specification is not quite the same as the order of steps in the
|
145
|
+
# code. It should still do the same though.
|
146
|
+
|
147
|
+
# Step 1: stop the algorithm when there's nothing to do.
|
148
|
+
return if @activeFormattingElements.empty?
|
149
|
+
|
150
|
+
# Step 2 and step 3: we start with the last element. So i is -1.
|
151
|
+
i = -1
|
152
|
+
entry = @activeFormattingElements[i]
|
153
|
+
return if entry == Marker or @open_elements.include?(entry)
|
154
|
+
|
155
|
+
# Step 6
|
156
|
+
until entry == Marker or @open_elements.include?(entry)
|
157
|
+
# Step 5: let entry be one earlier in the list.
|
158
|
+
i -= 1
|
159
|
+
begin
|
160
|
+
entry = @activeFormattingElements[i]
|
161
|
+
rescue
|
162
|
+
# Step 4: at this point we need to jump to step 8. By not doing
|
163
|
+
# i += 1 which is also done in step 7 we achieve that.
|
164
|
+
break
|
165
|
+
end
|
166
|
+
end
|
167
|
+
while true
|
168
|
+
# Step 7
|
169
|
+
i += 1
|
170
|
+
|
171
|
+
# Step 8
|
172
|
+
clone = @activeFormattingElements[i].cloneNode
|
173
|
+
|
174
|
+
# Step 9
|
175
|
+
element = insert_element(clone.name, clone.attributes)
|
176
|
+
|
177
|
+
# Step 10
|
178
|
+
@activeFormattingElements[i] = element
|
179
|
+
|
180
|
+
# Step 11
|
181
|
+
break if element == @activeFormattingElements[-1]
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def clearActiveFormattingElements
|
186
|
+
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
187
|
+
end
|
188
|
+
|
189
|
+
# Check if an element exists between the end of the active
|
190
|
+
# formatting elements and the last marker. If it does, return it, else
|
191
|
+
# return false
|
192
|
+
def elementInActiveFormattingElements(name)
|
193
|
+
@activeFormattingElements.reverse.each do |element|
|
194
|
+
# Check for Marker first because if it's a Marker it doesn't have a
|
195
|
+
# name attribute.
|
196
|
+
break if element == Marker
|
197
|
+
return element if element.name == name
|
198
|
+
end
|
199
|
+
return false
|
200
|
+
end
|
201
|
+
|
202
|
+
def insertDoctype(name, public_id, system_id)
|
203
|
+
doctype = @doctypeClass.new(name)
|
204
|
+
doctype.public_id = public_id
|
205
|
+
doctype.system_id = system_id
|
206
|
+
@document.appendChild(doctype)
|
207
|
+
end
|
208
|
+
|
209
|
+
def insert_comment(data, parent=nil)
|
210
|
+
parent = @open_elements[-1] if parent.nil?
|
211
|
+
parent.appendChild(@commentClass.new(data))
|
212
|
+
end
|
213
|
+
|
214
|
+
# Create an element but don't insert it anywhere
|
215
|
+
def createElement(name, attributes)
|
216
|
+
element = @elementClass.new(name)
|
217
|
+
element.attributes = attributes
|
218
|
+
return element
|
219
|
+
end
|
220
|
+
|
221
|
+
# Switch the function used to insert an element from the
|
222
|
+
# normal one to the misnested table one and back again
|
223
|
+
def insert_from_table=(value)
|
224
|
+
@insert_from_table = value
|
225
|
+
@insert_element = value ? :insert_elementTable : :insert_elementNormal
|
226
|
+
end
|
227
|
+
|
228
|
+
def insert_element(name, attributes, namespace = nil)
|
229
|
+
send(@insert_element, name, attributes, namespace)
|
230
|
+
end
|
231
|
+
|
232
|
+
def insert_foreign_element(name, attributes, namespace)
|
233
|
+
insert_element(name, attributes, namespace)
|
234
|
+
end
|
235
|
+
|
236
|
+
def insert_elementNormal(name, attributes, namespace=nil)
|
237
|
+
element = @elementClass.new(name, namespace)
|
238
|
+
element.attributes = attributes
|
239
|
+
@open_elements.last.appendChild(element)
|
240
|
+
@open_elements.push(element)
|
241
|
+
element
|
242
|
+
end
|
243
|
+
|
244
|
+
# Create an element and insert it into the tree
|
245
|
+
def insert_elementTable(name, attributes, namespace=nil)
|
246
|
+
element = @elementClass.new(name, namespace)
|
247
|
+
element.attributes = attributes
|
248
|
+
if !TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
|
249
|
+
return insert_elementNormal(name, attributes)
|
250
|
+
else
|
251
|
+
#We should be in the InTable mode. This means we want to do
|
252
|
+
#special magic element rearranging
|
253
|
+
parent, insertBefore = getTableMisnestedNodePosition
|
254
|
+
if insertBefore.nil?
|
255
|
+
parent.appendChild(element)
|
256
|
+
else
|
257
|
+
parent.insertBefore(element, insertBefore)
|
258
|
+
end
|
259
|
+
@open_elements.push(element)
|
260
|
+
end
|
261
|
+
return element
|
262
|
+
end
|
263
|
+
|
264
|
+
def insertText(data, parent=nil)
|
265
|
+
parent = @open_elements[-1] if parent.nil?
|
266
|
+
|
267
|
+
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
|
268
|
+
parent.insertText(data)
|
269
|
+
else
|
270
|
+
#We should be in the InTable mode. This means we want to do
|
271
|
+
#special magic element rearranging
|
272
|
+
parent, insertBefore = getTableMisnestedNodePosition
|
273
|
+
parent.insertText(data, insertBefore)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# Get the foster parent element, and sibling to insert before
|
278
|
+
# (or nil) when inserting a misnested table node
|
279
|
+
def getTableMisnestedNodePosition
|
280
|
+
#The foster parent element is the one which comes before the most
|
281
|
+
#recently opened table element
|
282
|
+
#XXX - this is really inelegant
|
283
|
+
lastTable = nil
|
284
|
+
fosterParent = nil
|
285
|
+
insertBefore = nil
|
286
|
+
@open_elements.reverse.each do |element|
|
287
|
+
if element.name == "table"
|
288
|
+
lastTable = element
|
289
|
+
break
|
290
|
+
end
|
291
|
+
end
|
292
|
+
if lastTable
|
293
|
+
#XXX - we should really check that this parent is actually a
|
294
|
+
#node here
|
295
|
+
if lastTable.parent
|
296
|
+
fosterParent = lastTable.parent
|
297
|
+
insertBefore = lastTable
|
298
|
+
else
|
299
|
+
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
|
300
|
+
end
|
301
|
+
else
|
302
|
+
fosterParent = @open_elements[0]
|
303
|
+
end
|
304
|
+
return fosterParent, insertBefore
|
305
|
+
end
|
306
|
+
|
307
|
+
def generateImpliedEndTags(exclude=nil)
|
308
|
+
name = @open_elements[-1].name
|
309
|
+
|
310
|
+
# XXX td, th and tr are not actually needed
|
311
|
+
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
|
312
|
+
@open_elements.pop
|
313
|
+
# XXX This is not entirely what the specification says. We should
|
314
|
+
# investigate it more closely.
|
315
|
+
generateImpliedEndTags(exclude)
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def get_document
|
320
|
+
@document
|
321
|
+
end
|
322
|
+
|
323
|
+
def get_fragment
|
324
|
+
#assert @inner_html
|
325
|
+
fragment = @fragmentClass.new
|
326
|
+
@open_elements[0].reparentChildren(fragment)
|
327
|
+
return fragment
|
328
|
+
end
|
329
|
+
|
330
|
+
# Serialize the subtree of node in the format required by unit tests
|
331
|
+
# node - the node from which to start serializing
|
332
|
+
def testSerializer(node)
|
333
|
+
raise NotImplementedError
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
require 'html5/treebuilders/base'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'hpricot'
|
4
|
+
require 'forwardable'
|
5
|
+
|
6
|
+
module HTML5
|
7
|
+
module TreeBuilders
|
8
|
+
module Hpricot
|
9
|
+
|
10
|
+
class Node < Base::Node
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
def_delegators :@hpricot, :name
|
14
|
+
|
15
|
+
attr_accessor :hpricot
|
16
|
+
|
17
|
+
def initialize(name)
|
18
|
+
super(name)
|
19
|
+
@hpricot = self.class.hpricot_class.new name
|
20
|
+
end
|
21
|
+
|
22
|
+
def appendChild(node)
|
23
|
+
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
24
|
+
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
|
25
|
+
else
|
26
|
+
childNodes << node
|
27
|
+
hpricot.children << node.hpricot
|
28
|
+
end
|
29
|
+
if (oldparent = node.hpricot.parent) != nil
|
30
|
+
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
31
|
+
end
|
32
|
+
node.hpricot.parent = hpricot
|
33
|
+
node.parent = self
|
34
|
+
end
|
35
|
+
|
36
|
+
def removeChild(node)
|
37
|
+
childNodes.delete(node)
|
38
|
+
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
39
|
+
node.hpricot.parent = nil
|
40
|
+
node.parent = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
def insertText(data, before=nil)
|
44
|
+
if before
|
45
|
+
insertBefore(TextNode.new(data), before)
|
46
|
+
else
|
47
|
+
appendChild(TextNode.new(data))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def insertBefore(node, refNode)
|
52
|
+
index = childNodes.index(refNode)
|
53
|
+
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
54
|
+
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
55
|
+
else
|
56
|
+
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
|
57
|
+
childNodes.insert(index, node)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def hasContent
|
62
|
+
childNodes.any?
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Element < Node
|
67
|
+
attr_reader :namespace
|
68
|
+
|
69
|
+
def self.hpricot_class
|
70
|
+
::Hpricot::Elem
|
71
|
+
end
|
72
|
+
|
73
|
+
def initialize(name, namespace=nil)
|
74
|
+
super(name)
|
75
|
+
|
76
|
+
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
77
|
+
@namespace = namespace
|
78
|
+
end
|
79
|
+
|
80
|
+
def name
|
81
|
+
@hpricot.stag.name
|
82
|
+
end
|
83
|
+
|
84
|
+
def cloneNode
|
85
|
+
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
86
|
+
node.hpricot[name] = value
|
87
|
+
node
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
92
|
+
# so alterations to the returned value (a hash) will be lost.
|
93
|
+
#
|
94
|
+
# AttributeProxy works around this by forwarding :[]= calls
|
95
|
+
# to the raw_attributes accessor on the element start tag.
|
96
|
+
#
|
97
|
+
class AttributeProxy
|
98
|
+
def initialize(hpricot)
|
99
|
+
@hpricot = hpricot
|
100
|
+
end
|
101
|
+
|
102
|
+
def []=(k, v)
|
103
|
+
@hpricot.stag.send(stag_attributes_method)[k] = v
|
104
|
+
end
|
105
|
+
|
106
|
+
def stag_attributes_method
|
107
|
+
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
108
|
+
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
109
|
+
end
|
110
|
+
|
111
|
+
def method_missing(*a, &b)
|
112
|
+
@hpricot.attributes.send(*a, &b)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def attributes
|
117
|
+
AttributeProxy.new(@hpricot)
|
118
|
+
end
|
119
|
+
|
120
|
+
def attributes=(attrs)
|
121
|
+
attrs.each { |name, value| @hpricot[name] = value }
|
122
|
+
end
|
123
|
+
|
124
|
+
def printTree(indent=0)
|
125
|
+
tree = "\n|#{' ' * indent}<#{!@namespace.nil? ? @namespace.to_s + ' ' : ''}#{name}>"
|
126
|
+
indent += 2
|
127
|
+
attributes.each do |name, value|
|
128
|
+
next if name == 'xmlns'
|
129
|
+
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
130
|
+
end
|
131
|
+
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class Document < Node
|
136
|
+
def self.hpricot_class
|
137
|
+
::Hpricot::Doc
|
138
|
+
end
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
super(nil)
|
142
|
+
end
|
143
|
+
|
144
|
+
def printTree(indent=0)
|
145
|
+
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class DocumentType < Node
|
150
|
+
def_delegators :@hpricot, :public_id, :system_id
|
151
|
+
|
152
|
+
def self.hpricot_class
|
153
|
+
::Hpricot::DocType
|
154
|
+
end
|
155
|
+
|
156
|
+
def initialize(name, public_id, system_id)
|
157
|
+
@hpricot = self.class.hpricot_class.new(name, public_id, system_id)
|
158
|
+
end
|
159
|
+
|
160
|
+
def printTree(indent=0)
|
161
|
+
if hpricot.target and hpricot.target.any?
|
162
|
+
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}" +
|
163
|
+
([hpricot.public_id, hpricot.system_id].any? ? " \"#{hpricot.public_id}\" \"#{hpricot.system_id}\"" : '') +
|
164
|
+
'>'
|
165
|
+
else
|
166
|
+
"\n|#{' ' * indent}<!DOCTYPE >"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
class DocumentFragment < Element
|
172
|
+
def initialize
|
173
|
+
super('')
|
174
|
+
end
|
175
|
+
|
176
|
+
def printTree(indent=0)
|
177
|
+
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
class TextNode < Node
|
182
|
+
def initialize(data)
|
183
|
+
@hpricot = ::Hpricot::Text.new(data)
|
184
|
+
end
|
185
|
+
|
186
|
+
def printTree(indent=0)
|
187
|
+
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
class CommentNode < Node
|
192
|
+
def self.hpricot_class
|
193
|
+
::Hpricot::Comment
|
194
|
+
end
|
195
|
+
|
196
|
+
def printTree(indent=0)
|
197
|
+
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
class TreeBuilder < Base::TreeBuilder
|
202
|
+
def initialize
|
203
|
+
@documentClass = Document
|
204
|
+
@doctypeClass = DocumentType
|
205
|
+
@elementClass = Element
|
206
|
+
@commentClass = CommentNode
|
207
|
+
@fragmentClass = DocumentFragment
|
208
|
+
end
|
209
|
+
|
210
|
+
def insertDoctype(name, public_id, system_id)
|
211
|
+
doctype = @doctypeClass.new(name, public_id, system_id)
|
212
|
+
@document.appendChild(doctype)
|
213
|
+
end
|
214
|
+
|
215
|
+
def testSerializer(node)
|
216
|
+
node.printTree
|
217
|
+
end
|
218
|
+
|
219
|
+
def get_document
|
220
|
+
@document.hpricot
|
221
|
+
end
|
222
|
+
|
223
|
+
def get_fragment
|
224
|
+
@document = super
|
225
|
+
return @document.hpricot.children
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|