hpricot 0.6-jruby
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +62 -0
- data/COPYING +18 -0
- data/README +284 -0
- data/Rakefile +211 -0
- data/ext/hpricot_scan/HpricotScanService.java +1340 -0
- data/ext/hpricot_scan/extconf.rb +6 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_scan.c +5976 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
- data/ext/hpricot_scan/hpricot_scan.rl +273 -0
- data/extras/mingw-rbconfig.rb +176 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +200 -0
- data/lib/hpricot/elements.rb +510 -0
- data/lib/hpricot/htmlinfo.rb +672 -0
- data/lib/hpricot/inspect.rb +107 -0
- data/lib/hpricot/modules.rb +37 -0
- data/lib/hpricot/parse.rb +297 -0
- data/lib/hpricot/tag.rb +228 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +821 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/i686-linux/hpricot_scan.jar +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/test_alter.rb +65 -0
- data/test/test_builder.rb +24 -0
- data/test/test_parser.rb +379 -0
- data/test/test_paths.rb +16 -0
- data/test/test_preserved.rb +66 -0
- data/test/test_xml.rb +28 -0
- metadata +98 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
module Hpricot
|
4
|
+
# :stopdoc:
|
5
|
+
class Elements
|
6
|
+
def pretty_print(q)
|
7
|
+
q.object_group(self) { super }
|
8
|
+
end
|
9
|
+
alias inspect pretty_print_inspect
|
10
|
+
end
|
11
|
+
|
12
|
+
class Doc
|
13
|
+
def pretty_print(q)
|
14
|
+
q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
|
15
|
+
end
|
16
|
+
alias inspect pretty_print_inspect
|
17
|
+
end
|
18
|
+
|
19
|
+
class Elem
|
20
|
+
def pretty_print(q)
|
21
|
+
if empty?
|
22
|
+
q.group(1, '{emptyelem', '}') {
|
23
|
+
q.breakable; q.pp @stag
|
24
|
+
}
|
25
|
+
else
|
26
|
+
q.group(1, "{elem", "}") {
|
27
|
+
q.breakable; q.pp @stag
|
28
|
+
if @children
|
29
|
+
@children.each {|elt| q.breakable; q.pp elt }
|
30
|
+
end
|
31
|
+
if @etag
|
32
|
+
q.breakable; q.pp @etag
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
alias inspect pretty_print_inspect
|
38
|
+
end
|
39
|
+
|
40
|
+
module Leaf
|
41
|
+
def pretty_print(q)
|
42
|
+
q.group(1, '{', '}') {
|
43
|
+
q.text self.class.name.sub(/.*::/,'').downcase
|
44
|
+
if rs = @raw_string
|
45
|
+
rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
|
46
|
+
q.breakable
|
47
|
+
q.pp line
|
48
|
+
}
|
49
|
+
elsif self.respond_to? :to_s
|
50
|
+
q.breakable
|
51
|
+
q.text self.to_s
|
52
|
+
end
|
53
|
+
}
|
54
|
+
end
|
55
|
+
alias inspect pretty_print_inspect
|
56
|
+
end
|
57
|
+
|
58
|
+
class STag
|
59
|
+
def pretty_print(q)
|
60
|
+
q.group(1, '<', '>') {
|
61
|
+
q.text @name
|
62
|
+
|
63
|
+
if @raw_attributes
|
64
|
+
@raw_attributes.each {|n, t|
|
65
|
+
q.breakable
|
66
|
+
if t
|
67
|
+
q.text "#{n}=\"#{Hpricot.uxs(t)}\""
|
68
|
+
else
|
69
|
+
q.text n
|
70
|
+
end
|
71
|
+
}
|
72
|
+
end
|
73
|
+
}
|
74
|
+
end
|
75
|
+
alias inspect pretty_print_inspect
|
76
|
+
end
|
77
|
+
|
78
|
+
class ETag
|
79
|
+
def pretty_print(q)
|
80
|
+
q.group(1, '</', '>') {
|
81
|
+
q.text @name
|
82
|
+
}
|
83
|
+
end
|
84
|
+
alias inspect pretty_print_inspect
|
85
|
+
end
|
86
|
+
|
87
|
+
class Text
|
88
|
+
def pretty_print(q)
|
89
|
+
q.text @content.dump
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class BogusETag
|
94
|
+
def pretty_print(q)
|
95
|
+
q.group(1, '{', '}') {
|
96
|
+
q.text self.class.name.sub(/.*::/,'').downcase
|
97
|
+
if rs = @raw_string
|
98
|
+
q.breakable
|
99
|
+
q.text rs
|
100
|
+
else
|
101
|
+
q.text "</#{@name}>"
|
102
|
+
end
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|
106
|
+
# :startdoc:
|
107
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Hpricot
|
2
|
+
class Name; include Hpricot end
|
3
|
+
class Context; include Hpricot end
|
4
|
+
|
5
|
+
# :stopdoc:
|
6
|
+
module Tag; include Hpricot end
|
7
|
+
class STag; include Tag end
|
8
|
+
class ETag; include Tag end
|
9
|
+
# :startdoc:
|
10
|
+
|
11
|
+
module Node; include Hpricot end
|
12
|
+
module Container; include Node end
|
13
|
+
class Doc; include Container end
|
14
|
+
class Elem; include Container end
|
15
|
+
module Leaf; include Node end
|
16
|
+
class Text; include Leaf end
|
17
|
+
class XMLDecl; include Leaf end
|
18
|
+
class DocType; include Leaf end
|
19
|
+
class ProcIns; include Leaf end
|
20
|
+
class Comment; include Leaf end
|
21
|
+
class BogusETag; include Leaf end
|
22
|
+
|
23
|
+
module Traverse end
|
24
|
+
module Container::Trav; include Traverse end
|
25
|
+
module Leaf::Trav; include Traverse end
|
26
|
+
class Doc; module Trav; include Container::Trav end; include Trav end
|
27
|
+
class Elem; module Trav; include Container::Trav end; include Trav end
|
28
|
+
class Text; module Trav; include Leaf::Trav end; include Trav end
|
29
|
+
class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
|
30
|
+
class DocType; module Trav; include Leaf::Trav end; include Trav end
|
31
|
+
class ProcIns; module Trav; include Leaf::Trav end; include Trav end
|
32
|
+
class Comment; module Trav; include Leaf::Trav end; include Trav end
|
33
|
+
class BogusETag; module Trav; include Leaf::Trav end; include Trav end
|
34
|
+
|
35
|
+
class Error < StandardError; end
|
36
|
+
end
|
37
|
+
|
@@ -0,0 +1,297 @@
|
|
1
|
+
require 'hpricot/htmlinfo'
|
2
|
+
|
3
|
+
def Hpricot(input = nil, opts = {}, &blk)
|
4
|
+
Hpricot.parse(input, opts, &blk)
|
5
|
+
end
|
6
|
+
|
7
|
+
module Hpricot
|
8
|
+
# Exception class used for any errors related to deficiencies in the system when
|
9
|
+
# handling the character encodings of a document.
|
10
|
+
class EncodingError < StandardError; end
|
11
|
+
|
12
|
+
# Hpricot.parse parses <i>input</i> and return a document tree.
|
13
|
+
# represented by Hpricot::Doc.
|
14
|
+
def Hpricot.parse(input = nil, opts = {}, &blk)
|
15
|
+
Doc.new(make(input, opts, &blk))
|
16
|
+
end
|
17
|
+
|
18
|
+
# Hpricot::XML parses <i>input</i>, disregarding all the HTML rules
|
19
|
+
# and returning a document tree.
|
20
|
+
def Hpricot.XML(input, opts = {})
|
21
|
+
Doc.new(make(input, opts.merge(:xml => true)))
|
22
|
+
end
|
23
|
+
|
24
|
+
# :stopdoc:
|
25
|
+
|
26
|
+
def Hpricot.make(input = nil, opts = {}, &blk)
|
27
|
+
opts = {:fixup_tags => false}.merge(opts)
|
28
|
+
unless input or blk
|
29
|
+
raise ArgumentError, "An Hpricot document must be built from an input source (a String) or a block."
|
30
|
+
end
|
31
|
+
|
32
|
+
conv = opts[:xml] ? :to_s : :downcase
|
33
|
+
|
34
|
+
fragment =
|
35
|
+
if input
|
36
|
+
case opts[:encoding]
|
37
|
+
when nil
|
38
|
+
when 'utf-8'
|
39
|
+
unless defined? Encoding::Character::UTF8
|
40
|
+
raise EncodingError, "The ruby-character-encodings library could not be found for utf-8 mode."
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise EncodingError, "No encoding option `#{opts[:encoding]}' is available."
|
44
|
+
end
|
45
|
+
|
46
|
+
if opts[:xhtml_strict]
|
47
|
+
opts[:fixup_tags] = true
|
48
|
+
end
|
49
|
+
|
50
|
+
stack = [[nil, nil, [], [], [], []]]
|
51
|
+
Hpricot.scan(input) do |token|
|
52
|
+
if stack.last[5] == :CDATA and ![:procins, :comment, :cdata].include?(token[0]) and
|
53
|
+
!(token[0] == :etag and token[1].casecmp(stack.last[0]).zero?)
|
54
|
+
token[0] = :text
|
55
|
+
token[1] = token[3] if token[3]
|
56
|
+
end
|
57
|
+
|
58
|
+
if !opts[:xml] and token[0] == :emptytag
|
59
|
+
token[1] = token[1].send(conv)
|
60
|
+
if ElementContent[token[1].downcase] != :EMPTY
|
61
|
+
token[0] = :stag
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# TODO: downcase instead when parsing attributes?
|
66
|
+
if !opts[:xml] and token[2].is_a?(Hash)
|
67
|
+
token[2] = token[2].inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
|
68
|
+
end
|
69
|
+
|
70
|
+
case token[0]
|
71
|
+
when :stag
|
72
|
+
case opts[:encoding] when 'utf-8'
|
73
|
+
token.map! { |str| u(str) if str.is_a? String }
|
74
|
+
end
|
75
|
+
|
76
|
+
stagname = token[0] = token[1] = token[1].send(conv)
|
77
|
+
if ElementContent[stagname] == :EMPTY and !opts[:xml]
|
78
|
+
token[0] = :emptytag
|
79
|
+
stack.last[2] << token
|
80
|
+
else
|
81
|
+
unless opts[:xml]
|
82
|
+
if opts[:fixup_tags]
|
83
|
+
# obey the tag rules set up by the current element
|
84
|
+
if ElementContent.has_key? stagname
|
85
|
+
trans = nil
|
86
|
+
(stack.length-1).downto(0) do |i|
|
87
|
+
untags = stack[i][5]
|
88
|
+
break unless untags.include? stagname
|
89
|
+
# puts "** ILLEGAL #{stagname} IN #{stack[i][0]}"
|
90
|
+
trans = i
|
91
|
+
end
|
92
|
+
if trans.to_i > 1
|
93
|
+
eles = stack.slice!(trans..-1)
|
94
|
+
stack.last[2] += eles
|
95
|
+
# puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}"
|
96
|
+
end
|
97
|
+
elsif opts[:xhtml_strict]
|
98
|
+
token[2] = {'class' => stagname}
|
99
|
+
stagname = token[0] = "div"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# setup tag rules for inside this element
|
104
|
+
if ElementContent[stagname] == :CDATA
|
105
|
+
uncontainable_tags = :CDATA
|
106
|
+
elsif opts[:fixup_tags]
|
107
|
+
possible_tags = ElementContent[stagname]
|
108
|
+
excluded_tags, included_tags = stack.last[3..4]
|
109
|
+
if possible_tags
|
110
|
+
excluded_tags = excluded_tags | (ElementExclusions[stagname] || [])
|
111
|
+
included_tags = included_tags | (ElementInclusions[stagname] || [])
|
112
|
+
containable_tags = (possible_tags | included_tags) - excluded_tags
|
113
|
+
uncontainable_tags = ElementContent.keys - containable_tags
|
114
|
+
else
|
115
|
+
# If the tagname is unknown, it is assumed that any element
|
116
|
+
# except excluded can be contained.
|
117
|
+
uncontainable_tags = excluded_tags
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
unless opts[:xml]
|
122
|
+
case token[2] when Hash
|
123
|
+
token[2] = token[2].inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags]
|
127
|
+
end
|
128
|
+
when :etag
|
129
|
+
etagname = token[0] = token[1].send(conv)
|
130
|
+
if opts[:xhtml_strict] and not ElementContent.has_key? etagname
|
131
|
+
etagname = token[0] = "div"
|
132
|
+
end
|
133
|
+
matched_elem = nil
|
134
|
+
(stack.length-1).downto(0) do |i|
|
135
|
+
stagname, = stack[i]
|
136
|
+
if stagname == etagname
|
137
|
+
matched_elem = stack[i]
|
138
|
+
stack[i][1] += token
|
139
|
+
eles = stack.slice!((i+1)..-1)
|
140
|
+
stack.last[2] += eles
|
141
|
+
break
|
142
|
+
end
|
143
|
+
end
|
144
|
+
unless matched_elem
|
145
|
+
stack.last[2] << [:bogus_etag, token.first, token.last]
|
146
|
+
else
|
147
|
+
ele = stack.pop
|
148
|
+
stack.last[2] << ele
|
149
|
+
end
|
150
|
+
when :text
|
151
|
+
l = stack.last[2].last
|
152
|
+
if l and l[0] == :text
|
153
|
+
l[1] += token[1]
|
154
|
+
else
|
155
|
+
stack.last[2] << token
|
156
|
+
end
|
157
|
+
else
|
158
|
+
stack.last[2] << token
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
while 1 < stack.length
|
163
|
+
ele = stack.pop
|
164
|
+
stack.last[2] << ele
|
165
|
+
end
|
166
|
+
|
167
|
+
structure_list = stack[0][2]
|
168
|
+
structure_list.map {|s| build_node(s, opts) }
|
169
|
+
elsif blk
|
170
|
+
Hpricot.build(&blk).children
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def Hpricot.build_node(structure, opts = {})
|
175
|
+
case structure[0]
|
176
|
+
when String
|
177
|
+
tagname, _, attrs, sraw, _, _, _, eraw = structure[1]
|
178
|
+
children = structure[2]
|
179
|
+
etag = eraw && ETag.parse(tagname, eraw)
|
180
|
+
stag = STag.parse(tagname, attrs, sraw, true)
|
181
|
+
if !children.empty? || etag
|
182
|
+
Elem.new(stag,
|
183
|
+
children.map {|c| build_node(c, opts) },
|
184
|
+
etag)
|
185
|
+
else
|
186
|
+
Elem.new(stag)
|
187
|
+
end
|
188
|
+
when :text
|
189
|
+
Text.parse_pcdata(structure[1])
|
190
|
+
when :emptytag
|
191
|
+
Elem.new(STag.parse(structure[1], structure[2], structure[3], false))
|
192
|
+
when :bogus_etag
|
193
|
+
BogusETag.parse(structure[1], structure[2])
|
194
|
+
when :xmldecl
|
195
|
+
XMLDecl.parse(structure[2], structure[3])
|
196
|
+
when :doctype
|
197
|
+
if opts[:xhtml_strict]
|
198
|
+
structure[2]['system_id'] = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
|
199
|
+
structure[2]['public_id'] = "-//W3C//DTD XHTML 1.0 Strict//EN"
|
200
|
+
end
|
201
|
+
DocType.parse(structure[1], structure[2], structure[3])
|
202
|
+
when :procins
|
203
|
+
ProcIns.parse(structure[1])
|
204
|
+
when :comment
|
205
|
+
Comment.parse(structure[1])
|
206
|
+
when :cdata_content
|
207
|
+
Text.parse_cdata_content(structure[1])
|
208
|
+
when :cdata
|
209
|
+
Text.parse_cdata_section(structure[1])
|
210
|
+
else
|
211
|
+
raise Exception, "[bug] unknown structure: #{structure.inspect}"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def STag.parse(qname, attrs, raw_string, is_stag)
|
216
|
+
result = STag.new(qname, attrs)
|
217
|
+
result.raw_string = raw_string
|
218
|
+
result
|
219
|
+
end
|
220
|
+
|
221
|
+
def ETag.parse(qname, raw_string)
|
222
|
+
result = self.new(qname)
|
223
|
+
result.raw_string = raw_string
|
224
|
+
result
|
225
|
+
end
|
226
|
+
|
227
|
+
def BogusETag.parse(qname, raw_string)
|
228
|
+
result = self.new(qname)
|
229
|
+
result.raw_string = raw_string
|
230
|
+
result
|
231
|
+
end
|
232
|
+
|
233
|
+
def Text.parse_pcdata(raw_string)
|
234
|
+
result = Text.new(raw_string)
|
235
|
+
result
|
236
|
+
end
|
237
|
+
|
238
|
+
def Text.parse_cdata_content(raw_string)
|
239
|
+
result = CData.new(raw_string)
|
240
|
+
result
|
241
|
+
end
|
242
|
+
|
243
|
+
def Text.parse_cdata_section(content)
|
244
|
+
result = CData.new(content)
|
245
|
+
result
|
246
|
+
end
|
247
|
+
|
248
|
+
def XMLDecl.parse(attrs, raw_string)
|
249
|
+
attrs ||= {}
|
250
|
+
version = attrs['version']
|
251
|
+
encoding = attrs['encoding']
|
252
|
+
case attrs['standalone']
|
253
|
+
when 'yes'
|
254
|
+
standalone = true
|
255
|
+
when 'no'
|
256
|
+
standalone = false
|
257
|
+
else
|
258
|
+
standalone = nil
|
259
|
+
end
|
260
|
+
|
261
|
+
result = XMLDecl.new(version, encoding, standalone)
|
262
|
+
result.raw_string = raw_string
|
263
|
+
result
|
264
|
+
end
|
265
|
+
|
266
|
+
def DocType.parse(root_element_name, attrs, raw_string)
|
267
|
+
if attrs
|
268
|
+
public_identifier = attrs['public_id']
|
269
|
+
system_identifier = attrs['system_id']
|
270
|
+
end
|
271
|
+
|
272
|
+
root_element_name = root_element_name.downcase
|
273
|
+
|
274
|
+
result = DocType.new(root_element_name, public_identifier, system_identifier)
|
275
|
+
result.raw_string = raw_string
|
276
|
+
result
|
277
|
+
end
|
278
|
+
|
279
|
+
def ProcIns.parse(raw_string)
|
280
|
+
_, target, content = *raw_string.match(/\A<\?(\S+)\s+(.+)/m)
|
281
|
+
result = ProcIns.new(target, content)
|
282
|
+
result
|
283
|
+
end
|
284
|
+
|
285
|
+
def Comment.parse(content)
|
286
|
+
result = Comment.new(content)
|
287
|
+
result
|
288
|
+
end
|
289
|
+
|
290
|
+
module Pat
|
291
|
+
NameChar = /[-A-Za-z0-9._:]/
|
292
|
+
Name = /[A-Za-z_:]#{NameChar}*/
|
293
|
+
Nmtoken = /#{NameChar}+/
|
294
|
+
end
|
295
|
+
|
296
|
+
# :startdoc:
|
297
|
+
end
|
data/lib/hpricot/tag.rb
ADDED
@@ -0,0 +1,228 @@
|
|
1
|
+
module Hpricot
|
2
|
+
# :stopdoc:
|
3
|
+
|
4
|
+
class Doc
|
5
|
+
attr_accessor :children
|
6
|
+
def initialize(children = [])
|
7
|
+
@children = children ? children.each { |c| c.parent = self } : []
|
8
|
+
end
|
9
|
+
def output(out, opts = {})
|
10
|
+
@children.each do |n|
|
11
|
+
n.output(out, opts)
|
12
|
+
end
|
13
|
+
out
|
14
|
+
end
|
15
|
+
def altered!; end
|
16
|
+
end
|
17
|
+
|
18
|
+
class BaseEle
|
19
|
+
attr_accessor :raw_string, :parent
|
20
|
+
def html_quote(str)
|
21
|
+
"\"" + str.gsub('"', '\\"') + "\""
|
22
|
+
end
|
23
|
+
def if_output(opts)
|
24
|
+
if opts[:preserve] and not @raw_string.nil?
|
25
|
+
@raw_string
|
26
|
+
else
|
27
|
+
yield opts
|
28
|
+
end
|
29
|
+
end
|
30
|
+
def pathname; self.name end
|
31
|
+
def altered!
|
32
|
+
@raw_string = nil
|
33
|
+
end
|
34
|
+
def self.alterable(*fields)
|
35
|
+
attr_accessor(*fields)
|
36
|
+
fields.each do |f|
|
37
|
+
define_method("#{f}=") do |v|
|
38
|
+
altered!
|
39
|
+
instance_variable_set("@#{f}", v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Elem
|
46
|
+
attr_accessor :stag, :etag, :children
|
47
|
+
def initialize(stag, children=nil, etag=nil)
|
48
|
+
@stag, @etag = stag, etag
|
49
|
+
@children = children ? children.each { |c| c.parent = self } : []
|
50
|
+
end
|
51
|
+
def empty?; @children.empty? end
|
52
|
+
[:name, :raw_attributes, :parent, :altered!].each do |m|
|
53
|
+
[m, "#{m}="].each { |m2| define_method(m2) { |*a| [@etag, @stag].inject { |_,t| t.send(m2, *a) if t and t.respond_to?(m2) } } }
|
54
|
+
end
|
55
|
+
def attributes
|
56
|
+
if raw_attributes
|
57
|
+
raw_attributes.inject({}) do |hsh, (k, v)|
|
58
|
+
hsh[k] = Hpricot.uxs(v)
|
59
|
+
hsh
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
def to_plain_text
|
64
|
+
if self.name == 'br'
|
65
|
+
"\n"
|
66
|
+
elsif self.name == 'p'
|
67
|
+
"\n\n" + super + "\n\n"
|
68
|
+
elsif self.name == 'a' and self.has_attribute?('href')
|
69
|
+
"#{super} [#{self['href']}]"
|
70
|
+
elsif self.name == 'img' and self.has_attribute?('src')
|
71
|
+
"[img:#{self['src']}]"
|
72
|
+
else
|
73
|
+
super
|
74
|
+
end
|
75
|
+
end
|
76
|
+
def pathname; self.name end
|
77
|
+
def output(out, opts = {})
|
78
|
+
if empty? and ElementContent[@stag.name] == :EMPTY
|
79
|
+
@stag.output(out, opts.merge(:style => :empty))
|
80
|
+
else
|
81
|
+
@stag.output(out, opts)
|
82
|
+
@children.each { |n| n.output(out, opts) }
|
83
|
+
if @etag
|
84
|
+
@etag.output(out, opts)
|
85
|
+
elsif !opts[:preserve]
|
86
|
+
ETag.new(@stag.name).output(out, opts)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
out
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class STag < BaseEle
|
94
|
+
def initialize(name, attributes=nil)
|
95
|
+
@name = name.to_s
|
96
|
+
@raw_attributes = attributes || {}
|
97
|
+
end
|
98
|
+
alterable :name, :raw_attributes
|
99
|
+
def attributes_as_html
|
100
|
+
if @raw_attributes
|
101
|
+
@raw_attributes.map do |aname, aval|
|
102
|
+
" #{aname}" +
|
103
|
+
(aval ? "=\"#{aval}\"" : "")
|
104
|
+
end.join
|
105
|
+
end
|
106
|
+
end
|
107
|
+
def output(out, opts = {})
|
108
|
+
out <<
|
109
|
+
if_output(opts) do
|
110
|
+
"<#{@name}#{attributes_as_html}" +
|
111
|
+
(opts[:style] == :empty ? " /" : "") +
|
112
|
+
">"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class ETag < BaseEle
|
118
|
+
def initialize(qualified_name)
|
119
|
+
@name = qualified_name.to_s
|
120
|
+
end
|
121
|
+
alterable :name
|
122
|
+
def output(out, opts = {})
|
123
|
+
out <<
|
124
|
+
if_output(opts) do
|
125
|
+
"</#{@name}>"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
class BogusETag < ETag
|
131
|
+
def output(out, opts = {}); out << if_output(opts) { '' }; end
|
132
|
+
end
|
133
|
+
|
134
|
+
class Text < BaseEle
|
135
|
+
def initialize(text)
|
136
|
+
@content = text
|
137
|
+
end
|
138
|
+
alterable :content
|
139
|
+
def pathname; "text()" end
|
140
|
+
def to_s
|
141
|
+
Hpricot.uxs(@content)
|
142
|
+
end
|
143
|
+
alias_method :inner_text, :to_s
|
144
|
+
alias_method :to_plain_text, :to_s
|
145
|
+
def output(out, opts = {})
|
146
|
+
out <<
|
147
|
+
if_output(opts) do
|
148
|
+
@content
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
class CData < Text
|
154
|
+
alias_method :to_s, :content
|
155
|
+
alias_method :to_plain_text, :content
|
156
|
+
def output(out, opts = {})
|
157
|
+
out <<
|
158
|
+
if_output(opts) do
|
159
|
+
"<![CDATA[#@content]]>"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class XMLDecl < BaseEle
|
165
|
+
def initialize(version, encoding, standalone)
|
166
|
+
@version, @encoding, @standalone = version, encoding, standalone
|
167
|
+
end
|
168
|
+
alterable :version, :encoding, :standalone
|
169
|
+
def pathname; "xmldecl()" end
|
170
|
+
def output(out, opts = {})
|
171
|
+
out <<
|
172
|
+
if_output(opts) do
|
173
|
+
"<?xml version=\"#{@version}\"" +
|
174
|
+
(@encoding ? " encoding=\"#{encoding}\"" : "") +
|
175
|
+
(@standalone != nil ? " standalone=\"#{standalone ? 'yes' : 'no'}\"" : "") +
|
176
|
+
"?>"
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
class DocType < BaseEle
|
182
|
+
def initialize(target, pubid, sysid)
|
183
|
+
@target, @public_id, @system_id = target, pubid, sysid
|
184
|
+
end
|
185
|
+
alterable :target, :public_id, :system_id
|
186
|
+
def pathname; "doctype()" end
|
187
|
+
def output(out, opts = {})
|
188
|
+
out <<
|
189
|
+
if_output(opts) do
|
190
|
+
"<!DOCTYPE #{@target} " +
|
191
|
+
(@public_id ? "PUBLIC \"#{@public_id}\"" : "SYSTEM") +
|
192
|
+
(@system_id ? " #{html_quote(@system_id)}" : "") + ">"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
class ProcIns < BaseEle
|
198
|
+
def initialize(target, content)
|
199
|
+
@target, @content = target, content
|
200
|
+
end
|
201
|
+
def pathname; "procins()" end
|
202
|
+
alterable :target, :content
|
203
|
+
def output(out, opts = {})
|
204
|
+
out <<
|
205
|
+
if_output(opts) do
|
206
|
+
"<?#{@target}" +
|
207
|
+
(@content ? " #{@content}" : "") +
|
208
|
+
"?>"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
class Comment < BaseEle
|
214
|
+
def initialize(content)
|
215
|
+
@content = content
|
216
|
+
end
|
217
|
+
def pathname; "comment()" end
|
218
|
+
alterable :content
|
219
|
+
def output(out, opts = {})
|
220
|
+
out <<
|
221
|
+
if_output(opts) do
|
222
|
+
"<!--#{@content}-->"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# :startdoc:
|
228
|
+
end
|