htmltools 1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ # This module is a mix-in that provides parent/child behavior to real
2
+ # Element classes. Because it defines <tt>each()</tt> and includes Enumerable,
3
+ # you can iterate through a tree using the usual Enumerable methods.
4
+ #
5
+ # Copyright:: Copyright (C) 2003, Johannes Brodwall <johannes@brodwall.com>,
6
+ # Copyright (C) 2002, Ned Konz <ned@bike-nomad.com>
7
+ # License:: Same as Ruby's
8
+ # CVS ID: $Id: element.rb,v 1.4 2004/09/24 23:28:55 jhannes Exp $
9
+
10
+ require 'html/tags'
11
+
12
+ module HTMLTree
13
+ module TreeElement
14
+ include Enumerable
15
+
16
+ protected
17
+
18
+ def initialize_tree_element(parent_or_nil = nil, contents_or_nil = nil)
19
+ @_content, @_parent = contents_or_nil, parent_or_nil
20
+ if parent_or_nil
21
+ parent_or_nil.add_child(self)
22
+ end
23
+ end
24
+
25
+ attr_accessor :_parent
26
+
27
+ public
28
+
29
+ # Add one or more children to this node.
30
+ def add_child(*children_to_add)
31
+ if can_have_children?
32
+ children_to_add.each do |child|
33
+ @_content << child
34
+ child._parent = self
35
+ end
36
+ else
37
+ raise(ArgumentError.exception('node cannot have children'))
38
+ end
39
+ end
40
+
41
+ alias_method :add_children, :add_child
42
+
43
+ # Remove one or more children from this node.
44
+ def remove_child(*children_to_remove)
45
+ if can_have_children?
46
+ children_to_remove.each do |child|
47
+ child._parent = nil if @_content.delete(child)
48
+ end
49
+ else
50
+ raise(ArgumentError.exception('node cannot have children'))
51
+ end
52
+ end
53
+
54
+ alias_method :remove_children, :remove_child
55
+
56
+ # Change my parent. Disconnects from prior parent, if any.
57
+ def parent=(parent_or_nil)
58
+ @_parent.remove_child(self) if @_parent
59
+ parent_or_nil.add_child(self) if parent_or_nil
60
+ end
61
+
62
+ # Return true if my content is a collection of Elements
63
+ # rather than actual data.
64
+ def can_have_children?
65
+ @_content.kind_of?(Array)
66
+ end
67
+
68
+ # Return a collection of my children. Returns an empty Array if I am a
69
+ # data element, just to keep other methods simple.
70
+ def children
71
+ can_have_children? ? @_content : []
72
+ end
73
+
74
+ # Return my content; either my children or my data.
75
+ def content
76
+ @_content
77
+ end
78
+
79
+ # Return my parent element.
80
+ def parent
81
+ @_parent
82
+ end
83
+
84
+ def path
85
+ "/"
86
+ end
87
+
88
+ # Return the ultimate parent.
89
+ def root
90
+ @_parent ? self : @_parent.root
91
+ end
92
+
93
+ # Return true if I have any children.
94
+ def has_children?
95
+ children.size > 0
96
+ end
97
+
98
+ # Breadth-first iterator, required by Enumerable.
99
+ def each(&block)
100
+ block.call(self)
101
+ children.each { |ch| ch.each(&block) }
102
+ end
103
+
104
+ # Print out to $stdout (or given IO or String)
105
+ # a formatted dump of my structure.
106
+ def dump(indent=0, io=$stdout)
107
+ io << " " * indent
108
+ io << self.to_s
109
+ io << "\n"
110
+ children.each { |ea| ea.dump(indent+1, io) }
111
+ end
112
+
113
+ end
114
+
115
+ # This is a Element that represents the whole document (and makes a
116
+ # scope for the DTD declaration)
117
+ class Document
118
+ include TreeElement
119
+
120
+ def initialize
121
+ initialize_tree_element(nil, [])
122
+ end
123
+
124
+ def to_s
125
+ ''
126
+ end
127
+
128
+ def each(&block)
129
+ children.each { |ch| ch.each(&block) }
130
+ end
131
+
132
+ def write(io)
133
+ children.each { |t| t.write(io) }
134
+ end
135
+
136
+ def tag
137
+ ''
138
+ end
139
+
140
+ # Return my child <html> node, if any.
141
+ def html_node
142
+ children.detect { |ea| ea.tag == 'html' }
143
+ end
144
+ end
145
+
146
+ # This is a TreeElement that represents tagged items in an HTML
147
+ # document.
148
+ class Element
149
+ include TreeElement
150
+
151
+ protected
152
+
153
+ # parent_or_nil:: TreeElement or nil
154
+ # tag_name:: String
155
+ def initialize(parent_or_nil = nil, tag_name = nil)
156
+ initialize_tree_element(parent_or_nil, [])
157
+ @_tag = tag_name
158
+ @_attributes = {}
159
+ @_attribute_order = []
160
+ end
161
+
162
+ public
163
+
164
+ def can_have_children?; true; end
165
+
166
+ # Return true if I'm data instead of a tag
167
+ def data?; false; end
168
+
169
+ def to_s
170
+ a = [ "<", tag ]
171
+ @_attribute_order.each { |k|
172
+ v = @_attributes[k]
173
+ a << " #{k.to_s}=\"#{v.to_s}\""
174
+ }
175
+ a << ">"
176
+ a.join('')
177
+ end
178
+
179
+ # Append an attribute. <tt>values</tt> are first flattened into an Array,
180
+ # then converted into strings.
181
+ #
182
+ # If there is a single attribute value, it will appear as a String,
183
+ # otherwise it will be an Array of Strings.
184
+ #
185
+ # Example:
186
+ # element.add_attribute("width", "123")
187
+ # element.add_attribute("value", [ "a", "b" ])
188
+ def add_attribute(name, *values)
189
+ values = values.flatten.collect { |ea| ea.to_s.strip }
190
+ name = name.downcase
191
+ if @_attributes.include?(name)
192
+ @_attributes[name] = @_attributes[name].to_a + values
193
+ else
194
+ @_attributes[name] = values.size > 1 ? values : values[0]
195
+ end
196
+ @_attribute_order << name
197
+ self
198
+ end
199
+
200
+ # Return my tag (should be a String)
201
+ def tag; @_tag; end
202
+
203
+ # Return an HTML::Tag for further information, or nil if this is an
204
+ # unknown tag.
205
+ def tag_info
206
+ begin
207
+ HTML::Tag.named(@_tag)
208
+ rescue NoSuchHTMLTagError
209
+ nil
210
+ end
211
+ end
212
+
213
+ # Return the path to this element from the root
214
+ def path
215
+ path = []
216
+ node = self
217
+ while node do
218
+ path.unshift node.tag
219
+ node = node.parent
220
+ end
221
+ path.join(".")
222
+ end
223
+
224
+ def show_structure(indent = 0)
225
+ puts(' ' * indent) + path
226
+ elements.each { |node| node.show_structure(indent + 2) }
227
+ nil
228
+ end
229
+
230
+ # Return the children of this node that are elements (not data)
231
+ def elements
232
+ children.select { |node| node.is_a? Element }
233
+ end
234
+
235
+ # Return my attributes Hash.
236
+ def attributes; @_attributes; end
237
+
238
+ # Return the order of my attributes
239
+ def attribute_order; @_attribute_order; end
240
+
241
+ # Return the value of a single attribute (a String or Array).
242
+ def attribute(name); @_attributes[name]; end
243
+
244
+ # Return the value of a single attribute (a String or Array).
245
+ def [](name); attribute(name); end
246
+
247
+ # Replace an attribute.
248
+ def []=(name, *values)
249
+ @_attributes[name] = values.size > 1 ? values : values[0]
250
+ @_attribute_order.delete(name)
251
+ self
252
+ end
253
+
254
+ # Print me (and my descendents) on the given IO stream.
255
+ def write(io)
256
+ io << self
257
+ children.each { |t| t.write(io) }
258
+ unless tag_info.is_empty_element
259
+ io.puts( "</#{tag()}>" )
260
+ end
261
+ end
262
+
263
+ end
264
+
265
+ # This is a TreeElement that represents leaf data nodes (CDATA, scripts,
266
+ # comments, processing directives). It forwards unknown messages to the
267
+ # content element, so it otherwise behaves like a String.
268
+ class Data
269
+ include TreeElement
270
+
271
+ protected
272
+
273
+ # parent_or_nil:: parent, TreeElement or nil
274
+ # str:: contents, String
275
+ def initialize(parent_or_nil = nil, str = '')
276
+ initialize_tree_element(parent_or_nil, str)
277
+ end
278
+
279
+ public
280
+
281
+ # Return true because I am a data Element.
282
+ def data?; true; end
283
+
284
+ # Return false because I have no children.
285
+ def can_have_children?; false; end
286
+
287
+ # Return an empty collection because I have no children.
288
+ def children; []; end
289
+
290
+ # Return my (empty) tag String.
291
+ def tag; ''; end
292
+
293
+ # Return my (empty) attributes Hash.
294
+ def attributes; {}; end
295
+
296
+ def to_s
297
+ @_content
298
+ end
299
+
300
+ # Print me on the given IO stream.
301
+ def write(io)
302
+ io << self
303
+ end
304
+
305
+ # Forward all other methods to my content, so I can otherwise behave
306
+ # like a String.
307
+ def method_missing(sym, *args)
308
+ @_content.method(sym).call(*args)
309
+ end
310
+ end
311
+
312
+ class Comment < Data
313
+ def to_s
314
+ '<!--' + @_content + '-->'
315
+ end
316
+ end
317
+
318
+ class Special < Data
319
+ def to_s
320
+ '<' + @_content + '>'
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,49 @@
1
+ # This module adds method 'full_path' to REXML hierarchy
2
+ # This method will print out the path to the given node, for example
3
+ # /html/body/p[0]/@text
4
+ #
5
+ # Copyright:: Copyright (C) 2004, Johannes Brodwall <johannes@brodwall.com>
6
+ # License:: Same as Ruby's
7
+ # CVS ID: $Id: rexml-nodepath.rb,v 1.5 2005/05/25 17:38:45 jhannes Exp $
8
+
9
+
10
+ module REXML
11
+ class Child
12
+ def parent_path
13
+ parent ? parent.full_path : ''
14
+ end
15
+ end
16
+
17
+ class Document
18
+ def full_path
19
+ ''
20
+ end
21
+ end
22
+
23
+ class Element
24
+ def child_index
25
+ return "" unless parent
26
+ siblings = parent.to_a.select do |node|
27
+ node.kind_of? Element and node.expanded_name == self.expanded_name
28
+ end
29
+ return "" if siblings.size < 2
30
+ "[" + (siblings.index(self)+1).to_s + "]"
31
+ end
32
+
33
+ def full_path
34
+ parent_path + '/' + expanded_name + child_index
35
+ end
36
+ end
37
+
38
+ class Text
39
+ def full_path
40
+ parent_path + '/text()' + " " + expanded_name
41
+ end
42
+ end
43
+
44
+ class Attribute
45
+ def full_path
46
+ element.full_path + '/@' + name
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,372 @@
1
+ # A parser for SGML, using the derived class as static DTD.
2
+ #
3
+ # Taken from http://raa.ruby-lang.org/list.rhtml?name=html-parser-2
4
+ # This file seems to be included in the current install of Ruby,
5
+ # but with a bug related to attributes quoted with '"', so I have
6
+ # included in in the HTML package of this distribution
7
+ #
8
+ # Copyright:: Copyright (C) 2003, Johannes Brodwall <johannes@brodwall.com>,
9
+ # Copyright (C) 2002, Ned Konz <ned@bike-nomad.com>
10
+ # License:: Same as Ruby's
11
+ # CVS ID: $Id: sgml-parser.rb,v 1.4 2004/09/24 23:28:55 jhannes Exp $
12
+
13
+
14
+ module HTML
15
+
16
+ class SGMLParser
17
+
18
+ attr_reader :src_range
19
+
20
+ # Regular expressions used for parsing:
21
+ Interesting = /[&<]/
22
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
23
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
24
+ '![^<>]*)?')
25
+
26
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
27
+ Charref = /&#([0-9]+)[^0-9]/
28
+
29
+ Starttagopen = /<[>a-zA-Z]/
30
+ Endtagopen = /<\/[<>a-zA-Z]/
31
+ Endbracket = /[<>]/
32
+ Special = /<![^<>]*>/
33
+ Commentopen = /<!--/
34
+ Commentclose = /--[ \t\n]*>/
35
+ Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
36
+ Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
37
+ '(\s*=\s*' +
38
+ "('[^']*'" +
39
+ '|"[^"]*"' +
40
+ '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
41
+ Endtagfind = /\s*\/\s*>/
42
+ Entitydefs =
43
+ {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
44
+
45
+ def initialize(verbose=false)
46
+ @verbose = verbose
47
+ reset
48
+ end
49
+
50
+ def reset
51
+ @rawdata = ''
52
+ @stack = []
53
+ @lasttag = '???'
54
+ @nomoretags = false
55
+ @literal = false
56
+ @offset = 0
57
+ @ranges = []
58
+ end
59
+
60
+ def get_source(range)
61
+ start = range.first
62
+ end_index = range.end
63
+ exclusive = range.exclude_end?
64
+ offset_range = Range.new(start-@offset, end_index-@offset, exclusive)
65
+ return @rawdata[offset_range]
66
+ end
67
+
68
+ def set_range(start, end_index)
69
+ @src_range = Range.new(start+@offset, end_index+@offset, exclusive = true)
70
+ #puts "setting range #{@src_range}, text = \"#{get_source(src_range)}\""
71
+ end
72
+
73
+ def has_context(gi)
74
+ @stack.include? gi
75
+ end
76
+
77
+ def setnomoretags
78
+ @nomoretags = true
79
+ @literal = true
80
+ end
81
+
82
+ def setliteral(*args)
83
+ @literal = true
84
+ end
85
+
86
+ def feed(data)
87
+ @rawdata << data
88
+ goahead(false)
89
+ end
90
+
91
+ def close
92
+ goahead(true)
93
+ end
94
+
95
+ def handle_data_range(rawdata, start, end_index)
96
+ if end_index > start
97
+ set_range(start, end_index)
98
+ handle_data(rawdata[start...end_index])
99
+ end
100
+ return end_index
101
+ end
102
+
103
+ def goahead(_end)
104
+ rawdata = @rawdata
105
+ i = 0
106
+ n = rawdata.length
107
+ while i < n
108
+ if @nomoretags
109
+ i = handle_data_range(rawdata, i, n)
110
+ break
111
+ end
112
+ j = rawdata.index(Interesting, i)
113
+ j = n unless j
114
+ i = handle_data_range(rawdata, i, j)
115
+ break if (i == n)
116
+ if rawdata[i] == ?< #
117
+ if rawdata.index(Starttagopen, i) == i
118
+ if @literal
119
+ i = handle_data_range(rawdata, i, i+1)
120
+ next
121
+ end
122
+ k = parse_starttag(i)
123
+ break unless k
124
+ i = k
125
+ next
126
+ end
127
+ if rawdata.index(Endtagopen, i) == i
128
+ k = parse_endtag(i)
129
+ break unless k
130
+ i = k
131
+ @literal = false
132
+ next
133
+ end
134
+ if rawdata.index(Commentopen, i) == i
135
+ if @literal
136
+ i = handle_data_range(rawdata, i, i+1)
137
+ next
138
+ end
139
+ k = parse_comment(i)
140
+ break unless k
141
+ i += k
142
+ next
143
+ end
144
+ if rawdata.index(Special, i) == i
145
+ if @literal
146
+ i = handle_data_range(rawdata, i, i+1)
147
+ next
148
+ end
149
+ k = parse_special(i)
150
+ break unless k
151
+ i += k
152
+ next
153
+ end
154
+ elsif rawdata[i] == ?& #
155
+ if rawdata.index(Charref, i) == i
156
+ end_index = i + $&.length
157
+ end_index -= 1 unless rawdata[end_index-1] == ?;
158
+ set_range(i, end_index)
159
+ handle_charref($1)
160
+ i = end_index
161
+ next
162
+ end
163
+ if rawdata.index(Entityref, i) == i
164
+ end_index = i + $&.length
165
+ end_index -= 1 unless rawdata[end_index-1] == ?;
166
+ set_range(i, end_index)
167
+ handle_entityref($1)
168
+ i = end_index
169
+ next
170
+ end
171
+ else
172
+ raise RuntimeError, 'neither < nor & ??'
173
+ end
174
+ # We get here only if incomplete matches but
175
+ # nothing else
176
+ match = rawdata.index(Incomplete, i)
177
+ unless match == i
178
+ i = handle_data_range(rawdata, i, i+1)
179
+ next
180
+ end
181
+ j = match + $&.length
182
+ break if j == n # Really incomplete
183
+ i = handle_data_range(rawdata, i, j)
184
+ end
185
+ # end while
186
+ if _end and i < n
187
+ i = handle_data_range(rawdata, i, n)
188
+ end
189
+ @rawdata = rawdata[i..-1]
190
+ @offset += i
191
+ end
192
+
193
+ def parse_comment(i)
194
+ rawdata = @rawdata
195
+ if rawdata[i, 4] != '<!--'
196
+ raise RuntimeError, 'unexpected call to handle_comment'
197
+ end
198
+ match = rawdata.index(Commentclose, i)
199
+ return nil unless match
200
+ matched_length = $&.length
201
+ j = match
202
+ src_length = match + matched_length - i
203
+ set_range(i, i + src_length)
204
+ handle_comment(rawdata[i+4..(j-1)])
205
+ return src_length
206
+ end
207
+
208
+ def parse_starttag(i)
209
+ rawdata = @rawdata
210
+ j = rawdata.index(Endbracket, i + 1)
211
+ return nil unless j
212
+ attrs = []
213
+ if rawdata[i+1] == ?> #
214
+ # SGML shorthand: <> == <last open tag seen>
215
+ k = j
216
+ tag = @lasttag
217
+ else
218
+ match = rawdata.index(Tagfind, i + 1)
219
+ unless match
220
+ raise RuntimeError, 'unexpected call to parse_starttag'
221
+ end
222
+ k = i + 1 + ($&.length)
223
+ tag = $&.downcase
224
+ @lasttag = tag
225
+ end
226
+ while k < j
227
+ break if rawdata.index(Endtagfind, k)
228
+ break unless rawdata.index(Attrfind, k)
229
+ matched_length = $&.length
230
+ attrname, rest, attrvalue = $1, $2, $3
231
+ if not rest
232
+ attrvalue = '' # was: = attrname
233
+ elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
234
+ (attrvalue[0] == ?" && attrvalue[-1] == ?")
235
+ attrvalue = attrvalue[1..-2]
236
+ end
237
+ attrs << [attrname.downcase, attrvalue]
238
+ k += matched_length
239
+ end
240
+ if rawdata[j] == ?> #
241
+ j += 1
242
+ end
243
+ set_range(i, j)
244
+ finish_starttag(tag, attrs)
245
+ return j
246
+ end
247
+
248
+ def parse_endtag(i)
249
+ rawdata = @rawdata
250
+ j = rawdata.index(Endbracket, i + 1)
251
+ return nil unless j
252
+ tag = (rawdata[i+2..j-1].strip).downcase
253
+ if rawdata[j] == ?> #
254
+ j += 1
255
+ end
256
+ set_range(i, j)
257
+ finish_endtag(tag)
258
+ return j
259
+ end
260
+
261
+ def finish_starttag(tag, attrs)
262
+ method = 'start_' + tag
263
+ if self.respond_to?(method)
264
+ @stack << tag
265
+ handle_starttag(tag, method, attrs)
266
+ return 1
267
+ else
268
+ method = 'do_' + tag
269
+ if self.respond_to?(method)
270
+ handle_starttag(tag, method, attrs)
271
+ return 0
272
+ else
273
+ unknown_starttag(tag, attrs)
274
+ return -1
275
+ end
276
+ end
277
+ end
278
+
279
+ def finish_endtag(tag)
280
+ if tag == ''
281
+ found = @stack.length - 1
282
+ if found < 0
283
+ unknown_endtag(tag)
284
+ return
285
+ end
286
+ else
287
+ unless @stack.include? tag
288
+ method = 'end_' + tag
289
+ unless self.respond_to?(method)
290
+ unknown_endtag(tag)
291
+ end
292
+ return
293
+ end
294
+ found = @stack.index(tag) #or @stack.length
295
+ end
296
+ while @stack.length > found
297
+ tag = @stack[-1]
298
+ method = 'end_' + tag
299
+ if respond_to?(method)
300
+ handle_endtag(tag, method)
301
+ else
302
+ unknown_endtag(tag)
303
+ end
304
+ @stack.pop
305
+ end
306
+ end
307
+
308
+ def parse_special(i)
309
+ rawdata = @rawdata
310
+ match = rawdata.index(Endbracket, i+1)
311
+ return nil unless match
312
+ matched_length = $&.length
313
+ src_length = match - i + matched_length
314
+ set_range(i, i + src_length)
315
+ handle_special(rawdata[i+1..(match-1)])
316
+ return src_length
317
+ end
318
+
319
+ def handle_starttag(tag, method, attrs)
320
+ self.send(method, attrs)
321
+ end
322
+
323
+ def handle_endtag(tag, method)
324
+ self.send(method)
325
+ end
326
+
327
+ def report_unbalanced(tag)
328
+ if @verbose
329
+ print '*** Unbalanced </' + tag + '>', "\n"
330
+ print '*** Stack:', self.stack, "\n"
331
+ end
332
+ end
333
+
334
+ def handle_charref(name)
335
+ n = Integer(name)
336
+ if !(0 <= n && n <= 255)
337
+ unknown_charref(name)
338
+ return
339
+ end
340
+ handle_data(n.chr)
341
+ end
342
+
343
+ def handle_entityref(name)
344
+ table = Entitydefs
345
+ if table.include?(name)
346
+ handle_data(table[name])
347
+ else
348
+ unknown_entityref(name)
349
+ return
350
+ end
351
+ end
352
+
353
+ def handle_data(data)
354
+ end
355
+
356
+ def handle_comment(data)
357
+ end
358
+
359
+ def handle_special(data)
360
+ end
361
+
362
+ def unknown_starttag(tag, attrs)
363
+ end
364
+ def unknown_endtag(tag)
365
+ end
366
+ def unknown_charref(ref)
367
+ end
368
+ def unknown_entityref(ref)
369
+ end
370
+
371
+ end
372
+ end