ruby-web 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. data/ChangeLog +474 -0
  2. data/INSTALL.txt +9 -0
  3. data/InstalledFiles +180 -0
  4. data/LICENSE.txt +74 -0
  5. data/Rakefile +529 -0
  6. data/TODO +65 -0
  7. data/doc/additional.xml +149 -0
  8. data/doc/core.xml +652 -0
  9. data/doc/credits/index.xml +52 -0
  10. data/doc/credits/php.contributors.xml +118 -0
  11. data/doc/credits/php.language-snippets.ent +622 -0
  12. data/doc/install/index.xml +136 -0
  13. data/doc/install/mac/index.xml +21 -0
  14. data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
  15. data/doc/install/unix/index.xml +46 -0
  16. data/doc/install/win/apache1.xml +166 -0
  17. data/doc/install/win/apache2.xml +141 -0
  18. data/doc/install/win/iis.xml +162 -0
  19. data/doc/install/win/index.xml +24 -0
  20. data/doc/install/win/installer.xml +31 -0
  21. data/doc/install/win/manual.xml +43 -0
  22. data/doc/manual.xml +69 -0
  23. data/doc/old/apache_cgi.txt +23 -0
  24. data/doc/old/fastcgi.txt +23 -0
  25. data/doc/old/mod_ruby.txt +21 -0
  26. data/doc/old/snippets.rdoc +183 -0
  27. data/doc/old/webrick.txt +23 -0
  28. data/doc/old/windows_cgi.txt +9 -0
  29. data/doc/tutorial.xml +14 -0
  30. data/doc/xsl/manual-multi.xsl +10 -0
  31. data/doc/xsl/manual-pdf.xsl +6 -0
  32. data/doc/xsl/manual-single.xsl +6 -0
  33. data/doc/xsl/manual.css +22 -0
  34. data/install.rb +1022 -0
  35. data/lib/formatter.rb +314 -0
  36. data/lib/html-parser.rb +429 -0
  37. data/lib/htmlrepair.rb +113 -0
  38. data/lib/htmlsplit.rb +842 -0
  39. data/lib/sgml-parser.rb +332 -0
  40. data/lib/web.rb +68 -0
  41. data/lib/web/assertinclude.rb +129 -0
  42. data/lib/web/config.rb +50 -0
  43. data/lib/web/connection.rb +1070 -0
  44. data/lib/web/convenience.rb +154 -0
  45. data/lib/web/formreader.rb +318 -0
  46. data/lib/web/htmlparser/html-parser.rb +429 -0
  47. data/lib/web/htmlparser/sgml-parser.rb +332 -0
  48. data/lib/web/htmltools/element.rb +296 -0
  49. data/lib/web/htmltools/stparser.rb +276 -0
  50. data/lib/web/htmltools/tags.rb +286 -0
  51. data/lib/web/htmltools/tree.rb +139 -0
  52. data/lib/web/htmltools/xmltree.rb +160 -0
  53. data/lib/web/htmltools/xpath.rb +71 -0
  54. data/lib/web/info.rb +63 -0
  55. data/lib/web/load.rb +210 -0
  56. data/lib/web/mime.rb +87 -0
  57. data/lib/web/phprb.rb +340 -0
  58. data/lib/web/resources/test/cookie.rb +33 -0
  59. data/lib/web/resources/test/counter.rb +20 -0
  60. data/lib/web/resources/test/multipart.rb +14 -0
  61. data/lib/web/resources/test/redirect.rb +8 -0
  62. data/lib/web/resources/test/stock.rb +33 -0
  63. data/lib/web/sapi/apache.rb +129 -0
  64. data/lib/web/sapi/fastcgi.rb +22 -0
  65. data/lib/web/sapi/install/apache.rb +180 -0
  66. data/lib/web/sapi/install/iis.rb +93 -0
  67. data/lib/web/sapi/install/macosx.rb +90 -0
  68. data/lib/web/sapi/webrick.rb +86 -0
  69. data/lib/web/session.rb +83 -0
  70. data/lib/web/shim/cgi.rb +129 -0
  71. data/lib/web/shim/rails.rb +175 -0
  72. data/lib/web/stringio.rb +78 -0
  73. data/lib/web/strscanparser.rb +24 -0
  74. data/lib/web/tagparser.rb +96 -0
  75. data/lib/web/testing.rb +666 -0
  76. data/lib/web/traceoutput.rb +75 -0
  77. data/lib/web/unit.rb +56 -0
  78. data/lib/web/upload.rb +59 -0
  79. data/lib/web/validate.rb +52 -0
  80. data/lib/web/wiki.rb +557 -0
  81. data/lib/web/wiki/linker.rb +72 -0
  82. data/lib/web/wiki/page.rb +201 -0
  83. data/lib/webunit.rb +27 -0
  84. data/lib/webunit/assert.rb +152 -0
  85. data/lib/webunit/converter.rb +154 -0
  86. data/lib/webunit/cookie.rb +118 -0
  87. data/lib/webunit/domwalker.rb +185 -0
  88. data/lib/webunit/exception.rb +14 -0
  89. data/lib/webunit/form.rb +116 -0
  90. data/lib/webunit/frame.rb +37 -0
  91. data/lib/webunit/htmlelem.rb +122 -0
  92. data/lib/webunit/image.rb +26 -0
  93. data/lib/webunit/jscript.rb +31 -0
  94. data/lib/webunit/link.rb +33 -0
  95. data/lib/webunit/params.rb +321 -0
  96. data/lib/webunit/parser.rb +229 -0
  97. data/lib/webunit/response.rb +464 -0
  98. data/lib/webunit/runtest.rb +41 -0
  99. data/lib/webunit/table.rb +148 -0
  100. data/lib/webunit/testcase.rb +45 -0
  101. data/lib/webunit/ui/cui/testrunner.rb +50 -0
  102. data/lib/webunit/utils.rb +68 -0
  103. data/lib/webunit/webunit.rb +28 -0
  104. data/test/dev/action.rb +83 -0
  105. data/test/dev/forms.rb +104 -0
  106. data/test/dev/forms2.rb +104 -0
  107. data/test/dev/parser.rb +17 -0
  108. data/test/dev/scripts/dump.rb +24 -0
  109. data/test/dev/scripts/makedist.rb +62 -0
  110. data/test/dev/scripts/uri.rb +41 -0
  111. data/test/dev/scripts/uri/common.rb +432 -0
  112. data/test/dev/scripts/uri/ftp.rb +149 -0
  113. data/test/dev/scripts/uri/generic.rb +1106 -0
  114. data/test/dev/scripts/uri/http.rb +76 -0
  115. data/test/dev/scripts/uri/https.rb +26 -0
  116. data/test/dev/scripts/uri/ldap.rb +238 -0
  117. data/test/dev/scripts/uri/mailto.rb +260 -0
  118. data/test/dev/scripts/urireg.rb +174 -0
  119. data/test/dev/simpledispatcher.rb +156 -0
  120. data/test/dev/test.action.rb +146 -0
  121. data/test/dev/test.formreader.rb +463 -0
  122. data/test/dev/test.simpledispatcher.rb +186 -0
  123. data/test/dev/webunit/conv/digit-0.rb +21 -0
  124. data/test/dev/webunit/conv/digit-1.rb +17 -0
  125. data/test/dev/webunit/conv/digit.rb +23 -0
  126. data/test/dev/webunit/conv/test_digit-0.rb +16 -0
  127. data/test/dev/webunit/conv/test_digit-1.rb +19 -0
  128. data/test/dev/webunit/conv/test_digit.rb +26 -0
  129. data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
  130. data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
  131. data/test/dev/webunit/conv/test_digit_view.rb +134 -0
  132. data/test/installation/htdocs/cgi_test.rb +296 -0
  133. data/test/installation/htdocs/test_install.rb +4 -0
  134. data/test/installation/runwebtest.rb +5 -0
  135. data/test/installation/test_cookie.rb +128 -0
  136. data/test/installation/test_form.rb +47 -0
  137. data/test/installation/test_multipart.rb +51 -0
  138. data/test/installation/test_request.rb +24 -0
  139. data/test/installation/test_response.rb +35 -0
  140. data/test/unit/htdocs/cookie.rb +32 -0
  141. data/test/unit/htdocs/multipart.rb +28 -0
  142. data/test/unit/htdocs/redirect.rb +12 -0
  143. data/test/unit/htdocs/simple.rb +13 -0
  144. data/test/unit/htdocs/stock.rb +33 -0
  145. data/test/unit/test_assert.rb +162 -0
  146. data/test/unit/test_cookie.rb +114 -0
  147. data/test/unit/test_domwalker.rb +77 -0
  148. data/test/unit/test_form.rb +42 -0
  149. data/test/unit/test_frame.rb +40 -0
  150. data/test/unit/test_htmlelem.rb +74 -0
  151. data/test/unit/test_image.rb +45 -0
  152. data/test/unit/test_jscript.rb +57 -0
  153. data/test/unit/test_link.rb +85 -0
  154. data/test/unit/test_multipart.rb +51 -0
  155. data/test/unit/test_params.rb +210 -0
  156. data/test/unit/test_parser.rb +53 -0
  157. data/test/unit/test_response.rb +150 -0
  158. data/test/unit/test_table.rb +70 -0
  159. data/test/unit/test_utils.rb +106 -0
  160. data/test/unit/test_webunit.rb +28 -0
  161. data/test/web/mod_ruby_stub.rb +39 -0
  162. data/test/web/test.assertinclude.rb +109 -0
  163. data/test/web/test.buffer.rb +182 -0
  164. data/test/web/test.code.loader.rb +78 -0
  165. data/test/web/test.config.rb +31 -0
  166. data/test/web/test.error.handling.rb +91 -0
  167. data/test/web/test.formreader-2.0.rb +352 -0
  168. data/test/web/test.load.rb +125 -0
  169. data/test/web/test.mime-type.rb +23 -0
  170. data/test/web/test.narf.cgi.rb +106 -0
  171. data/test/web/test.phprb.rb +239 -0
  172. data/test/web/test.request.rb +368 -0
  173. data/test/web/test.response.rb +637 -0
  174. data/test/web/test.ruby-web.rb +10 -0
  175. data/test/web/test.session.rb +50 -0
  176. data/test/web/test.shim.cgi.rb +96 -0
  177. data/test/web/test.tagparser.rb +65 -0
  178. data/test/web/test.template2.rb +297 -0
  179. data/test/web/test.testing2.rb +318 -0
  180. data/test/web/test.upload.rb +45 -0
  181. data/test/web/test.validate.rb +46 -0
  182. data/test/web/test.web.test.rb +495 -0
  183. data/test/wiki/test.history.rb +297 -0
  184. data/test/wiki/test.illustration_page.rb +287 -0
  185. data/test/wiki/test.linker.rb +197 -0
  186. data/test/wiki/test.tarpit.rb +56 -0
  187. data/test/wiki/test.wiki.rb +300 -0
  188. data/test/wikitestroot/admin.rb +7 -0
  189. data/test/wikitestroot/wiki.rb +6 -0
  190. metadata +234 -0
@@ -0,0 +1,296 @@
1
+ # Copyright:: Copyright (C) 2002, Ned Konz <ned@bike-nomad.com>
2
+ # License:: Same as Ruby's
3
+ # CVS ID: $Id: element.rb,v 1.8 2002/06/04 01:55:59 ned Exp $
4
+
5
+ # This module is a mix-in that provides parent/child behavior to real
6
+ # Element classes. Because it defines <tt>each()</tt> and includes Enumerable,
7
+ # you can iterate through a tree using the usual Enumerable methods.
8
+
9
+ require 'web/htmltools/tags'
10
+
11
+ module HTMLTree #:nodoc: all
12
+ module TreeElement
13
+ include Enumerable
14
+
15
+ protected
16
+
17
+ def initialize_tree_element(parent_or_nil = nil, contents_or_nil = nil)
18
+ @_content, @_parent = contents_or_nil, parent_or_nil
19
+ if parent_or_nil
20
+ parent_or_nil.add_child(self)
21
+ end
22
+ end
23
+
24
+ attr_accessor :_parent
25
+
26
+ public
27
+
28
+ # Add one or more children to this node.
29
+ def add_child(*children_to_add)
30
+ if can_have_children?
31
+ children_to_add.each do |child|
32
+ @_content << child
33
+ child._parent = self
34
+ end
35
+ else
36
+ raise(ArgumentError.exception('node cannot have children'))
37
+ end
38
+ end
39
+
40
+ alias_method(:add_children, :add_child)
41
+
42
+ # Remove one or more children from this node.
43
+ def remove_child(*children_to_remove)
44
+ if can_have_children?
45
+ children_to_remove.each do |child|
46
+ child._parent = nil if @_content.delete(child)
47
+ end
48
+ else
49
+ raise(ArgumentError.exception('node cannot have children'))
50
+ end
51
+ end
52
+
53
+ alias_method(:remove_children, :remove_child)
54
+
55
+ # Change my parent. Disconnects from prior parent, if any.
56
+ def parent=(parent_or_nil)
57
+ @_parent.remove_child(self) if @_parent
58
+ parent_or_nil.add_child(self) if parent_or_nil
59
+ end
60
+
61
+ # Return true if my content is a collection of Elements
62
+ # rather than actual data.
63
+ def can_have_children?
64
+ @_content.kind_of?(Array)
65
+ end
66
+
67
+ # Return a collection of my children. Returns an empty Array if I am a
68
+ # data element, just to keep other methods simple.
69
+ def children
70
+ can_have_children? ? @_content : []
71
+ end
72
+
73
+ # Return my content; either my children or my data.
74
+ def content
75
+ @_content
76
+ end
77
+
78
+ # Return my parent element.
79
+ def parent
80
+ @_parent
81
+ end
82
+
83
+ # Return the ultimate parent.
84
+ def root
85
+ @_parent ? self : @_parent.root
86
+ end
87
+
88
+ # Return true if I have any children.
89
+ def has_children?
90
+ children.size > 0
91
+ end
92
+
93
+ # Breadth-first iterator, required by Enumerable.
94
+ def each(&block)
95
+ block.call(self)
96
+ children.each { |ch| ch.each(&block) }
97
+ end
98
+
99
+ # Print out to $stdout (or given IO or String)
100
+ # a formatted dump of my structure.
101
+ def dump(indent=0, io=$stdout)
102
+ io << " " * indent
103
+ io << self.to_s
104
+ io << "\n"
105
+ children.each { |ea| ea.dump(indent+1, io) }
106
+ end
107
+
108
+ end
109
+
110
+ # This is a Element that represents the whole document (and makes a
111
+ # scope for the DTD declaration)
112
+ class Document
113
+ include TreeElement
114
+
115
+ def initialize
116
+ initialize_tree_element(nil, [])
117
+ end
118
+
119
+ def to_s
120
+ ''
121
+ end
122
+
123
+ def each(&block)
124
+ children.each { |ch| ch.each(&block) }
125
+ end
126
+
127
+ def write(io)
128
+ children.each { |t| t.write(io) }
129
+ end
130
+
131
+ def tag
132
+ ''
133
+ end
134
+
135
+ # Return my child <html> node, if any.
136
+ def html_node
137
+ children.detect { |ea| ea.tag == 'html' }
138
+ end
139
+ end
140
+
141
+ # This is a TreeElement that represents tagged items in an HTML
142
+ # document.
143
+ class Element
144
+ include TreeElement
145
+
146
+ protected
147
+
148
+ # parent_or_nil:: TreeElement or nil
149
+ # tag_name:: String
150
+ def initialize(parent_or_nil = nil, tag_name = nil)
151
+ initialize_tree_element(parent_or_nil, [])
152
+ @_tag = tag_name
153
+ @_attributes = {}
154
+ @_attribute_order = []
155
+ end
156
+
157
+ public
158
+
159
+ def can_have_children?; true; end
160
+
161
+ # Return true if I'm data instead of a tag
162
+ def data?; false; end
163
+
164
+ def to_s
165
+ a = [ "<", tag ]
166
+ @_attribute_order.each { |k|
167
+ v = @_attributes[k]
168
+ a << " #{k.to_s}=\"#{v.to_s}\""
169
+ }
170
+ a << ">"
171
+ a.join('')
172
+ end
173
+
174
+ # Append an attribute. <tt>values</tt> are first flattened into an Array,
175
+ # then converted into strings.
176
+ #
177
+ # If there is a single attribute value, it will appear as a String,
178
+ # otherwise it will be an Array of Strings.
179
+ #
180
+ # Example:
181
+ # element.add_attribute("width", "123")
182
+ # element.add_attribute("value", [ "a", "b" ])
183
+ def add_attribute(name, *values)
184
+ values = values.flatten.collect { |ea| ea.to_s.strip }
185
+ name = name.downcase
186
+ if @_attributes.include?(name)
187
+ @_attributes[name] = @_attributes[name].to_a + values
188
+ else
189
+ @_attributes[name] = values.size > 1 ? values : values[0]
190
+ end
191
+ @_attribute_order << name
192
+ self
193
+ end
194
+
195
+ # Return my tag (should be a String)
196
+ def tag; @_tag; end
197
+
198
+ # Return an HTML::Tag for further information, or nil if this is an
199
+ # unknown tag.
200
+ def tag_info
201
+ begin
202
+ HTML::Tag.named(@_tag)
203
+ rescue NoSuchHTMLTagError
204
+ nil
205
+ end
206
+ end
207
+
208
+ # Return my attributes Hash.
209
+ def attributes; @_attributes; end
210
+
211
+ # Return the order of my attributes
212
+ def attribute_order; @_attribute_order; end
213
+
214
+ # Return the value of a single attribute (a String or Array).
215
+ def attribute(name); @_attributes[name]; end
216
+
217
+ # Return the value of a single attribute (a String or Array).
218
+ def [](name); attribute(name); end
219
+
220
+ # Replace an attribute.
221
+ def []=(name, *values)
222
+ @_attributes[name] = values.size > 1 ? values : values[0]
223
+ @_attribute_order.delete(name)
224
+ self
225
+ end
226
+
227
+ # Print me (and my descendents) on the given IO stream.
228
+ def write(io)
229
+ io << self
230
+ children.each { |t| t.write(io) }
231
+ unless tag_info.is_empty_element
232
+ io.puts( "</#{tag()}>" )
233
+ end
234
+ end
235
+
236
+ end
237
+
238
+ # This is a TreeElement that represents leaf data nodes (CDATA, scripts,
239
+ # comments, processing directives). It forwards unknown messages to the
240
+ # content element, so it otherwise behaves like a String.
241
+ class Data
242
+ include TreeElement
243
+
244
+ protected
245
+
246
+ # parent_or_nil:: parent, TreeElement or nil
247
+ # str:: contents, String
248
+ def initialize(parent_or_nil = nil, str = '')
249
+ initialize_tree_element(parent_or_nil, str)
250
+ end
251
+
252
+ public
253
+
254
+ # Return true because I am a data Element.
255
+ def data?; true; end
256
+
257
+ # Return false because I have no children.
258
+ def can_have_children?; false; end
259
+
260
+ # Return an empty collection because I have no children.
261
+ def children; []; end
262
+
263
+ # Return my (empty) tag String.
264
+ def tag; ''; end
265
+
266
+ # Return my (empty) attributes Hash.
267
+ def attributes; {}; end
268
+
269
+ def to_s
270
+ @_content
271
+ end
272
+
273
+ # Print me on the given IO stream.
274
+ def write(io)
275
+ io << self
276
+ end
277
+
278
+ # Forward all other methods to my content, so I can otherwise behave
279
+ # like a String.
280
+ def method_missing(sym, *args)
281
+ @_content.method(sym).call(*args)
282
+ end
283
+ end
284
+
285
+ class Comment < Data
286
+ def to_s
287
+ '<!--' + @_content + '-->'
288
+ end
289
+ end
290
+
291
+ class Special < Data
292
+ def to_s
293
+ '<' + @_content + '>'
294
+ end
295
+ end
296
+ end
@@ -0,0 +1,276 @@
1
+ # Copyright:: Copyright(C) 2002 Ned Konz
2
+ # License:: Ruby's License
3
+ # CVS ID:: $Id: stparser.rb,v 1.6 2002/06/04 01:55:59 ned Exp $
4
+
5
+ require 'web/htmlparser/sgml-parser'
6
+ require 'web/htmltools/tags'
7
+
8
+ # This is an SGMLParser subclass that knows about HTML 4.0 rules
9
+ # and can spot empty tags and deal with tags that may have omitted endtags.
10
+ module HTML #:nodoc: all
11
+ class StackingParser < SGMLParser
12
+ # accessors
13
+
14
+ def stack; @tagStack; end
15
+
16
+ def last_tag; @tagStack[-1] || 'html'; end
17
+
18
+ def parent_tag; @tagStack[-2] || 'html'; end
19
+
20
+ def strip_whitespace=(flag); @stripWhitespace = flag; end
21
+
22
+ # input methods
23
+
24
+ # Open and parse the given file.
25
+ def parse_file_named(name)
26
+ File.open(name) { |f|
27
+ while bytes = f.read(65536)
28
+ feed(bytes)
29
+ end
30
+ }
31
+ end
32
+
33
+ # Feed some more data to the parser.
34
+ def feed(string)
35
+ super
36
+ while @saved.size > 0
37
+ saved = @saved
38
+ @saved = ''
39
+ super(saved)
40
+ end
41
+ end
42
+
43
+ # available only to subclasses
44
+ private
45
+
46
+ if $DEBUG
47
+ def dprint(*stuff)
48
+ print((" " * @tagStack.size), stuff) if @verbose
49
+ end
50
+ else
51
+ def dprint(*stuff); end
52
+ end
53
+
54
+ def warn(msg)
55
+ $stderr.print(msg) if @verbose
56
+ end
57
+
58
+ def initialize(verbose=false, strip_white=false)
59
+ super(verbose)
60
+ @tagStack = []
61
+ @saved = ''
62
+ @stripWhitespace = strip_white
63
+ end
64
+
65
+ # handle_data will call this.
66
+ def skip_script(data)
67
+ # is the end of the script in this buffer?
68
+ if m = data.index(%r{</[A-Za-z]})
69
+ @nomoretags = false
70
+ @saved = data[m..-1]
71
+ handle_script(data[0,m]) # call user handler
72
+ else
73
+ handle_script(data)
74
+ end
75
+ end
76
+
77
+ # Unfortunately, sgml-parser calls this and there's important work to do in
78
+ # it. So the user handler has to be named something different.
79
+ def handle_data(data)
80
+ # need to handle scripts
81
+ if last_tag() == 'script' && @nomoretags
82
+ skip_script(data)
83
+ else
84
+ if @stripWhitespace
85
+ begin
86
+ data.strip! if HTML::Tag.named(last_tag()).can_ignore_whitespace
87
+ rescue NoSuchHTMLTagError
88
+ data.strip!
89
+ end
90
+ end
91
+ handle_cdata(data) if data.size > 0 # call user handler
92
+ end
93
+ end
94
+
95
+ def finish_starttag(tag, attrs)
96
+ dprint "*START* #{tag} #{attrs.inspect}\n"
97
+ # dprint "-START- #{tag}\n"
98
+ begin
99
+ unless HTML::Tag.named(last_tag()).can_contain(tag, parent_tag())
100
+ dprint "-INSERT-\n"
101
+ finish_endtag(last_tag())
102
+ end
103
+ rescue NoSuchHTMLTagError
104
+ # hmm.. last_tag was unknown.
105
+ # Assume it doesn't have an optional endtag.
106
+ end
107
+
108
+ push(tag)
109
+
110
+ begin
111
+ if HTML::Tag.named(tag).is_empty_element
112
+ dprint "-EMPTY-\n"
113
+ handle_empty_tag(tag, attrs) # call user handler
114
+ drop_to_tag(tag)
115
+ else
116
+ handle_start_tag(tag, attrs) # call user handler
117
+ end
118
+
119
+ if tag.downcase == 'script'
120
+ @nomoretags = true
121
+ end
122
+ rescue NoSuchHTMLTagError
123
+ # hmm... the start tag is unknown.
124
+ # And we pushed it.
125
+ # If it's empty, we'll get rid of it at the next end tag.
126
+ handle_unknown_tag(tag, attrs)
127
+ end
128
+ end
129
+
130
+ # return true if tag is not extra
131
+ def drop_to_tag(tag)
132
+ dropped = @tagStack.size - (@tagStack.rindex(tag.downcase) || @tagStack.size)
133
+ if dropped == 0 # got an end tag but we haven't seen start tag?
134
+ handle_extra_end_tag(tag) # call user handler
135
+ return false
136
+ end
137
+ dropped.times do
138
+ begin
139
+ # detect missing end tag
140
+ if last_tag != tag and ! HTML::Tag.named(last_tag).can_omit_end_tag
141
+ handle_missing_end_tag(last_tag) # call user handler
142
+ end
143
+ rescue NoSuchHTMLTagError
144
+ # oops, don't recognize last_tag.
145
+ end
146
+ pop
147
+ end
148
+ return true
149
+ end
150
+
151
+ def finish_endtag(tag)
152
+ dprint "*END* #{tag}\n"
153
+ if drop_to_tag(tag)
154
+ dprint "-END- #{tag} #{@tagStack.inspect}\n"
155
+ handle_end_tag(tag) # call user handler
156
+ end
157
+ end
158
+
159
+ def push(tag)
160
+ @tagStack.push(tag.downcase)
161
+ dprint "*PUSH* #{tag} => #{@tagStack.inspect}\n"
162
+ end
163
+
164
+ def pop
165
+ tag = @tagStack.pop
166
+ dprint "*POP* #{tag} => #{@tagStack.inspect}\n"
167
+ tag
168
+ end
169
+
170
+ def unknown_charref(name)
171
+ handle_unknown_character(name)
172
+ end
173
+
174
+ def unknown_entityref(name)
175
+ handle_unknown_entity(name)
176
+ end
177
+
178
+ # callbacks: can be overridden in subclasses
179
+
180
+ def handle_start_tag(tag, attrs)
181
+ end
182
+
183
+ def handle_end_tag(tag)
184
+ end
185
+
186
+ # by default, an empty tag is handled as a start tag
187
+ # with an inserted end tag.
188
+ def handle_empty_tag(tag, attrs)
189
+ handle_start_tag(tag, attrs)
190
+ handle_end_tag(tag)
191
+ end
192
+
193
+ def handle_unknown_tag(tag, attrs)
194
+ warn("warning: unknown tag #{tag}\n")
195
+ end
196
+
197
+ def handle_missing_end_tag(tag)
198
+ warn("warning: missing end tag </#{tag}>\n")
199
+ end
200
+
201
+ def handle_extra_end_tag(tag)
202
+ warn("warning: extra end tag </#{tag}>\n")
203
+ end
204
+
205
+ def handle_cdata(data)
206
+ end
207
+
208
+ def handle_script(data)
209
+ end
210
+
211
+ def handle_unknown_character(name)
212
+ end
213
+
214
+ def handle_unknown_entity(name)
215
+ end
216
+
217
+ # call super if you want the data stripped
218
+ def handle_comment(data)
219
+ data.strip! if @stripWhitespace
220
+ end
221
+
222
+ def handle_special(data)
223
+ end
224
+
225
+ end
226
+ end
227
+
228
+ # test script
229
+ if $0 == __FILE__
230
+ $stdout.sync = true
231
+
232
+ class TestStackingParser < HTML::StackingParser #:nodoc: all
233
+ def dump_stack
234
+ stack.each { |ea| print ea, '/' }
235
+ end
236
+ def handle_start_tag(tag, attrs)
237
+ print("START: #{tag} #{attrs.inspect}\n")
238
+ end
239
+ def handle_end_tag(tag)
240
+ # print("END: #{tag}\n")
241
+ end
242
+ def handle_empty_tag(tag, attrs)
243
+ # print("EMPTY: #{tag} #{attrs.inspect}\n")
244
+ end
245
+ def handle_cdata(data)
246
+ # print("DATA: #{data.size} chars\n")
247
+ if last_tag() != 'style'
248
+ str = data.strip
249
+ if str.size > 0
250
+ dump_stack
251
+ print(str.inspect, "\n")
252
+ end
253
+ end
254
+ end
255
+ def handle_script(data)
256
+ # print("SCRIPT: #{data.size} chars\n")
257
+ end
258
+ def handle_unknown_character(name)
259
+ print("UNKC: #{name}\n")
260
+ end
261
+ def handle_unknown_entity(name)
262
+ print("UNKE: #{name}\n")
263
+ end
264
+ def handle_comment(data)
265
+ super
266
+ print("COMMENT: #{data}\n")
267
+ end
268
+ def handle_special(data)
269
+ print("SPECIAL: #{data}\n")
270
+ end
271
+ end
272
+
273
+ $DEBUG = false
274
+ p = TestStackingParser.new(true)
275
+ p.parse_file_named(ARGV[0] || 'ebay.html')
276
+ end