ruby-web 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +474 -0
- data/INSTALL.txt +9 -0
- data/InstalledFiles +180 -0
- data/LICENSE.txt +74 -0
- data/Rakefile +529 -0
- data/TODO +65 -0
- data/doc/additional.xml +149 -0
- data/doc/core.xml +652 -0
- data/doc/credits/index.xml +52 -0
- data/doc/credits/php.contributors.xml +118 -0
- data/doc/credits/php.language-snippets.ent +622 -0
- data/doc/install/index.xml +136 -0
- data/doc/install/mac/index.xml +21 -0
- data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
- data/doc/install/unix/index.xml +46 -0
- data/doc/install/win/apache1.xml +166 -0
- data/doc/install/win/apache2.xml +141 -0
- data/doc/install/win/iis.xml +162 -0
- data/doc/install/win/index.xml +24 -0
- data/doc/install/win/installer.xml +31 -0
- data/doc/install/win/manual.xml +43 -0
- data/doc/manual.xml +69 -0
- data/doc/old/apache_cgi.txt +23 -0
- data/doc/old/fastcgi.txt +23 -0
- data/doc/old/mod_ruby.txt +21 -0
- data/doc/old/snippets.rdoc +183 -0
- data/doc/old/webrick.txt +23 -0
- data/doc/old/windows_cgi.txt +9 -0
- data/doc/tutorial.xml +14 -0
- data/doc/xsl/manual-multi.xsl +10 -0
- data/doc/xsl/manual-pdf.xsl +6 -0
- data/doc/xsl/manual-single.xsl +6 -0
- data/doc/xsl/manual.css +22 -0
- data/install.rb +1022 -0
- data/lib/formatter.rb +314 -0
- data/lib/html-parser.rb +429 -0
- data/lib/htmlrepair.rb +113 -0
- data/lib/htmlsplit.rb +842 -0
- data/lib/sgml-parser.rb +332 -0
- data/lib/web.rb +68 -0
- data/lib/web/assertinclude.rb +129 -0
- data/lib/web/config.rb +50 -0
- data/lib/web/connection.rb +1070 -0
- data/lib/web/convenience.rb +154 -0
- data/lib/web/formreader.rb +318 -0
- data/lib/web/htmlparser/html-parser.rb +429 -0
- data/lib/web/htmlparser/sgml-parser.rb +332 -0
- data/lib/web/htmltools/element.rb +296 -0
- data/lib/web/htmltools/stparser.rb +276 -0
- data/lib/web/htmltools/tags.rb +286 -0
- data/lib/web/htmltools/tree.rb +139 -0
- data/lib/web/htmltools/xmltree.rb +160 -0
- data/lib/web/htmltools/xpath.rb +71 -0
- data/lib/web/info.rb +63 -0
- data/lib/web/load.rb +210 -0
- data/lib/web/mime.rb +87 -0
- data/lib/web/phprb.rb +340 -0
- data/lib/web/resources/test/cookie.rb +33 -0
- data/lib/web/resources/test/counter.rb +20 -0
- data/lib/web/resources/test/multipart.rb +14 -0
- data/lib/web/resources/test/redirect.rb +8 -0
- data/lib/web/resources/test/stock.rb +33 -0
- data/lib/web/sapi/apache.rb +129 -0
- data/lib/web/sapi/fastcgi.rb +22 -0
- data/lib/web/sapi/install/apache.rb +180 -0
- data/lib/web/sapi/install/iis.rb +93 -0
- data/lib/web/sapi/install/macosx.rb +90 -0
- data/lib/web/sapi/webrick.rb +86 -0
- data/lib/web/session.rb +83 -0
- data/lib/web/shim/cgi.rb +129 -0
- data/lib/web/shim/rails.rb +175 -0
- data/lib/web/stringio.rb +78 -0
- data/lib/web/strscanparser.rb +24 -0
- data/lib/web/tagparser.rb +96 -0
- data/lib/web/testing.rb +666 -0
- data/lib/web/traceoutput.rb +75 -0
- data/lib/web/unit.rb +56 -0
- data/lib/web/upload.rb +59 -0
- data/lib/web/validate.rb +52 -0
- data/lib/web/wiki.rb +557 -0
- data/lib/web/wiki/linker.rb +72 -0
- data/lib/web/wiki/page.rb +201 -0
- data/lib/webunit.rb +27 -0
- data/lib/webunit/assert.rb +152 -0
- data/lib/webunit/converter.rb +154 -0
- data/lib/webunit/cookie.rb +118 -0
- data/lib/webunit/domwalker.rb +185 -0
- data/lib/webunit/exception.rb +14 -0
- data/lib/webunit/form.rb +116 -0
- data/lib/webunit/frame.rb +37 -0
- data/lib/webunit/htmlelem.rb +122 -0
- data/lib/webunit/image.rb +26 -0
- data/lib/webunit/jscript.rb +31 -0
- data/lib/webunit/link.rb +33 -0
- data/lib/webunit/params.rb +321 -0
- data/lib/webunit/parser.rb +229 -0
- data/lib/webunit/response.rb +464 -0
- data/lib/webunit/runtest.rb +41 -0
- data/lib/webunit/table.rb +148 -0
- data/lib/webunit/testcase.rb +45 -0
- data/lib/webunit/ui/cui/testrunner.rb +50 -0
- data/lib/webunit/utils.rb +68 -0
- data/lib/webunit/webunit.rb +28 -0
- data/test/dev/action.rb +83 -0
- data/test/dev/forms.rb +104 -0
- data/test/dev/forms2.rb +104 -0
- data/test/dev/parser.rb +17 -0
- data/test/dev/scripts/dump.rb +24 -0
- data/test/dev/scripts/makedist.rb +62 -0
- data/test/dev/scripts/uri.rb +41 -0
- data/test/dev/scripts/uri/common.rb +432 -0
- data/test/dev/scripts/uri/ftp.rb +149 -0
- data/test/dev/scripts/uri/generic.rb +1106 -0
- data/test/dev/scripts/uri/http.rb +76 -0
- data/test/dev/scripts/uri/https.rb +26 -0
- data/test/dev/scripts/uri/ldap.rb +238 -0
- data/test/dev/scripts/uri/mailto.rb +260 -0
- data/test/dev/scripts/urireg.rb +174 -0
- data/test/dev/simpledispatcher.rb +156 -0
- data/test/dev/test.action.rb +146 -0
- data/test/dev/test.formreader.rb +463 -0
- data/test/dev/test.simpledispatcher.rb +186 -0
- data/test/dev/webunit/conv/digit-0.rb +21 -0
- data/test/dev/webunit/conv/digit-1.rb +17 -0
- data/test/dev/webunit/conv/digit.rb +23 -0
- data/test/dev/webunit/conv/test_digit-0.rb +16 -0
- data/test/dev/webunit/conv/test_digit-1.rb +19 -0
- data/test/dev/webunit/conv/test_digit.rb +26 -0
- data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
- data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
- data/test/dev/webunit/conv/test_digit_view.rb +134 -0
- data/test/installation/htdocs/cgi_test.rb +296 -0
- data/test/installation/htdocs/test_install.rb +4 -0
- data/test/installation/runwebtest.rb +5 -0
- data/test/installation/test_cookie.rb +128 -0
- data/test/installation/test_form.rb +47 -0
- data/test/installation/test_multipart.rb +51 -0
- data/test/installation/test_request.rb +24 -0
- data/test/installation/test_response.rb +35 -0
- data/test/unit/htdocs/cookie.rb +32 -0
- data/test/unit/htdocs/multipart.rb +28 -0
- data/test/unit/htdocs/redirect.rb +12 -0
- data/test/unit/htdocs/simple.rb +13 -0
- data/test/unit/htdocs/stock.rb +33 -0
- data/test/unit/test_assert.rb +162 -0
- data/test/unit/test_cookie.rb +114 -0
- data/test/unit/test_domwalker.rb +77 -0
- data/test/unit/test_form.rb +42 -0
- data/test/unit/test_frame.rb +40 -0
- data/test/unit/test_htmlelem.rb +74 -0
- data/test/unit/test_image.rb +45 -0
- data/test/unit/test_jscript.rb +57 -0
- data/test/unit/test_link.rb +85 -0
- data/test/unit/test_multipart.rb +51 -0
- data/test/unit/test_params.rb +210 -0
- data/test/unit/test_parser.rb +53 -0
- data/test/unit/test_response.rb +150 -0
- data/test/unit/test_table.rb +70 -0
- data/test/unit/test_utils.rb +106 -0
- data/test/unit/test_webunit.rb +28 -0
- data/test/web/mod_ruby_stub.rb +39 -0
- data/test/web/test.assertinclude.rb +109 -0
- data/test/web/test.buffer.rb +182 -0
- data/test/web/test.code.loader.rb +78 -0
- data/test/web/test.config.rb +31 -0
- data/test/web/test.error.handling.rb +91 -0
- data/test/web/test.formreader-2.0.rb +352 -0
- data/test/web/test.load.rb +125 -0
- data/test/web/test.mime-type.rb +23 -0
- data/test/web/test.narf.cgi.rb +106 -0
- data/test/web/test.phprb.rb +239 -0
- data/test/web/test.request.rb +368 -0
- data/test/web/test.response.rb +637 -0
- data/test/web/test.ruby-web.rb +10 -0
- data/test/web/test.session.rb +50 -0
- data/test/web/test.shim.cgi.rb +96 -0
- data/test/web/test.tagparser.rb +65 -0
- data/test/web/test.template2.rb +297 -0
- data/test/web/test.testing2.rb +318 -0
- data/test/web/test.upload.rb +45 -0
- data/test/web/test.validate.rb +46 -0
- data/test/web/test.web.test.rb +495 -0
- data/test/wiki/test.history.rb +297 -0
- data/test/wiki/test.illustration_page.rb +287 -0
- data/test/wiki/test.linker.rb +197 -0
- data/test/wiki/test.tarpit.rb +56 -0
- data/test/wiki/test.wiki.rb +300 -0
- data/test/wikitestroot/admin.rb +7 -0
- data/test/wikitestroot/wiki.rb +6 -0
- metadata +234 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
# Copyright:: Copyright (C) 2002, Ned Konz <ned@bike-nomad.com>
|
2
|
+
# License:: Same as Ruby's
|
3
|
+
# CVS ID: $Id: element.rb,v 1.8 2002/06/04 01:55:59 ned Exp $
|
4
|
+
|
5
|
+
# This module is a mix-in that provides parent/child behavior to real
|
6
|
+
# Element classes. Because it defines <tt>each()</tt> and includes Enumerable,
|
7
|
+
# you can iterate through a tree using the usual Enumerable methods.
|
8
|
+
|
9
|
+
require 'web/htmltools/tags'
|
10
|
+
|
11
|
+
module HTMLTree #:nodoc: all
|
12
|
+
module TreeElement
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def initialize_tree_element(parent_or_nil = nil, contents_or_nil = nil)
|
18
|
+
@_content, @_parent = contents_or_nil, parent_or_nil
|
19
|
+
if parent_or_nil
|
20
|
+
parent_or_nil.add_child(self)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :_parent
|
25
|
+
|
26
|
+
public
|
27
|
+
|
28
|
+
# Add one or more children to this node.
|
29
|
+
def add_child(*children_to_add)
|
30
|
+
if can_have_children?
|
31
|
+
children_to_add.each do |child|
|
32
|
+
@_content << child
|
33
|
+
child._parent = self
|
34
|
+
end
|
35
|
+
else
|
36
|
+
raise(ArgumentError.exception('node cannot have children'))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
alias_method(:add_children, :add_child)
|
41
|
+
|
42
|
+
# Remove one or more children from this node.
|
43
|
+
def remove_child(*children_to_remove)
|
44
|
+
if can_have_children?
|
45
|
+
children_to_remove.each do |child|
|
46
|
+
child._parent = nil if @_content.delete(child)
|
47
|
+
end
|
48
|
+
else
|
49
|
+
raise(ArgumentError.exception('node cannot have children'))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
alias_method(:remove_children, :remove_child)
|
54
|
+
|
55
|
+
# Change my parent. Disconnects from prior parent, if any.
|
56
|
+
def parent=(parent_or_nil)
|
57
|
+
@_parent.remove_child(self) if @_parent
|
58
|
+
parent_or_nil.add_child(self) if parent_or_nil
|
59
|
+
end
|
60
|
+
|
61
|
+
# Return true if my content is a collection of Elements
|
62
|
+
# rather than actual data.
|
63
|
+
def can_have_children?
|
64
|
+
@_content.kind_of?(Array)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Return a collection of my children. Returns an empty Array if I am a
|
68
|
+
# data element, just to keep other methods simple.
|
69
|
+
def children
|
70
|
+
can_have_children? ? @_content : []
|
71
|
+
end
|
72
|
+
|
73
|
+
# Return my content; either my children or my data.
|
74
|
+
def content
|
75
|
+
@_content
|
76
|
+
end
|
77
|
+
|
78
|
+
# Return my parent element.
|
79
|
+
def parent
|
80
|
+
@_parent
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return the ultimate parent.
|
84
|
+
def root
|
85
|
+
@_parent ? self : @_parent.root
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return true if I have any children.
|
89
|
+
def has_children?
|
90
|
+
children.size > 0
|
91
|
+
end
|
92
|
+
|
93
|
+
# Breadth-first iterator, required by Enumerable.
|
94
|
+
def each(&block)
|
95
|
+
block.call(self)
|
96
|
+
children.each { |ch| ch.each(&block) }
|
97
|
+
end
|
98
|
+
|
99
|
+
# Print out to $stdout (or given IO or String)
|
100
|
+
# a formatted dump of my structure.
|
101
|
+
def dump(indent=0, io=$stdout)
|
102
|
+
io << " " * indent
|
103
|
+
io << self.to_s
|
104
|
+
io << "\n"
|
105
|
+
children.each { |ea| ea.dump(indent+1, io) }
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
# This is a Element that represents the whole document (and makes a
|
111
|
+
# scope for the DTD declaration)
|
112
|
+
class Document
|
113
|
+
include TreeElement
|
114
|
+
|
115
|
+
def initialize
|
116
|
+
initialize_tree_element(nil, [])
|
117
|
+
end
|
118
|
+
|
119
|
+
def to_s
|
120
|
+
''
|
121
|
+
end
|
122
|
+
|
123
|
+
def each(&block)
|
124
|
+
children.each { |ch| ch.each(&block) }
|
125
|
+
end
|
126
|
+
|
127
|
+
def write(io)
|
128
|
+
children.each { |t| t.write(io) }
|
129
|
+
end
|
130
|
+
|
131
|
+
def tag
|
132
|
+
''
|
133
|
+
end
|
134
|
+
|
135
|
+
# Return my child <html> node, if any.
|
136
|
+
def html_node
|
137
|
+
children.detect { |ea| ea.tag == 'html' }
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# This is a TreeElement that represents tagged items in an HTML
|
142
|
+
# document.
|
143
|
+
class Element
|
144
|
+
include TreeElement
|
145
|
+
|
146
|
+
protected
|
147
|
+
|
148
|
+
# parent_or_nil:: TreeElement or nil
|
149
|
+
# tag_name:: String
|
150
|
+
def initialize(parent_or_nil = nil, tag_name = nil)
|
151
|
+
initialize_tree_element(parent_or_nil, [])
|
152
|
+
@_tag = tag_name
|
153
|
+
@_attributes = {}
|
154
|
+
@_attribute_order = []
|
155
|
+
end
|
156
|
+
|
157
|
+
public
|
158
|
+
|
159
|
+
def can_have_children?; true; end
|
160
|
+
|
161
|
+
# Return true if I'm data instead of a tag
|
162
|
+
def data?; false; end
|
163
|
+
|
164
|
+
def to_s
|
165
|
+
a = [ "<", tag ]
|
166
|
+
@_attribute_order.each { |k|
|
167
|
+
v = @_attributes[k]
|
168
|
+
a << " #{k.to_s}=\"#{v.to_s}\""
|
169
|
+
}
|
170
|
+
a << ">"
|
171
|
+
a.join('')
|
172
|
+
end
|
173
|
+
|
174
|
+
# Append an attribute. <tt>values</tt> are first flattened into an Array,
|
175
|
+
# then converted into strings.
|
176
|
+
#
|
177
|
+
# If there is a single attribute value, it will appear as a String,
|
178
|
+
# otherwise it will be an Array of Strings.
|
179
|
+
#
|
180
|
+
# Example:
|
181
|
+
# element.add_attribute("width", "123")
|
182
|
+
# element.add_attribute("value", [ "a", "b" ])
|
183
|
+
def add_attribute(name, *values)
|
184
|
+
values = values.flatten.collect { |ea| ea.to_s.strip }
|
185
|
+
name = name.downcase
|
186
|
+
if @_attributes.include?(name)
|
187
|
+
@_attributes[name] = @_attributes[name].to_a + values
|
188
|
+
else
|
189
|
+
@_attributes[name] = values.size > 1 ? values : values[0]
|
190
|
+
end
|
191
|
+
@_attribute_order << name
|
192
|
+
self
|
193
|
+
end
|
194
|
+
|
195
|
+
# Return my tag (should be a String)
|
196
|
+
def tag; @_tag; end
|
197
|
+
|
198
|
+
# Return an HTML::Tag for further information, or nil if this is an
|
199
|
+
# unknown tag.
|
200
|
+
def tag_info
|
201
|
+
begin
|
202
|
+
HTML::Tag.named(@_tag)
|
203
|
+
rescue NoSuchHTMLTagError
|
204
|
+
nil
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Return my attributes Hash.
|
209
|
+
def attributes; @_attributes; end
|
210
|
+
|
211
|
+
# Return the order of my attributes
|
212
|
+
def attribute_order; @_attribute_order; end
|
213
|
+
|
214
|
+
# Return the value of a single attribute (a String or Array).
|
215
|
+
def attribute(name); @_attributes[name]; end
|
216
|
+
|
217
|
+
# Return the value of a single attribute (a String or Array).
|
218
|
+
def [](name); attribute(name); end
|
219
|
+
|
220
|
+
# Replace an attribute.
|
221
|
+
def []=(name, *values)
|
222
|
+
@_attributes[name] = values.size > 1 ? values : values[0]
|
223
|
+
@_attribute_order.delete(name)
|
224
|
+
self
|
225
|
+
end
|
226
|
+
|
227
|
+
# Print me (and my descendents) on the given IO stream.
|
228
|
+
def write(io)
|
229
|
+
io << self
|
230
|
+
children.each { |t| t.write(io) }
|
231
|
+
unless tag_info.is_empty_element
|
232
|
+
io.puts( "</#{tag()}>" )
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
|
238
|
+
# This is a TreeElement that represents leaf data nodes (CDATA, scripts,
|
239
|
+
# comments, processing directives). It forwards unknown messages to the
|
240
|
+
# content element, so it otherwise behaves like a String.
|
241
|
+
class Data
|
242
|
+
include TreeElement
|
243
|
+
|
244
|
+
protected
|
245
|
+
|
246
|
+
# parent_or_nil:: parent, TreeElement or nil
|
247
|
+
# str:: contents, String
|
248
|
+
def initialize(parent_or_nil = nil, str = '')
|
249
|
+
initialize_tree_element(parent_or_nil, str)
|
250
|
+
end
|
251
|
+
|
252
|
+
public
|
253
|
+
|
254
|
+
# Return true because I am a data Element.
|
255
|
+
def data?; true; end
|
256
|
+
|
257
|
+
# Return false because I have no children.
|
258
|
+
def can_have_children?; false; end
|
259
|
+
|
260
|
+
# Return an empty collection because I have no children.
|
261
|
+
def children; []; end
|
262
|
+
|
263
|
+
# Return my (empty) tag String.
|
264
|
+
def tag; ''; end
|
265
|
+
|
266
|
+
# Return my (empty) attributes Hash.
|
267
|
+
def attributes; {}; end
|
268
|
+
|
269
|
+
def to_s
|
270
|
+
@_content
|
271
|
+
end
|
272
|
+
|
273
|
+
# Print me on the given IO stream.
|
274
|
+
def write(io)
|
275
|
+
io << self
|
276
|
+
end
|
277
|
+
|
278
|
+
# Forward all other methods to my content, so I can otherwise behave
|
279
|
+
# like a String.
|
280
|
+
def method_missing(sym, *args)
|
281
|
+
@_content.method(sym).call(*args)
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
class Comment < Data
|
286
|
+
def to_s
|
287
|
+
'<!--' + @_content + '-->'
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class Special < Data
|
292
|
+
def to_s
|
293
|
+
'<' + @_content + '>'
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
@@ -0,0 +1,276 @@
|
|
1
|
+
# Copyright:: Copyright(C) 2002 Ned Konz
|
2
|
+
# License:: Ruby's License
|
3
|
+
# CVS ID:: $Id: stparser.rb,v 1.6 2002/06/04 01:55:59 ned Exp $
|
4
|
+
|
5
|
+
require 'web/htmlparser/sgml-parser'
|
6
|
+
require 'web/htmltools/tags'
|
7
|
+
|
8
|
+
# This is an SGMLParser subclass that knows about HTML 4.0 rules
|
9
|
+
# and can spot empty tags and deal with tags that may have omitted endtags.
|
10
|
+
module HTML #:nodoc: all
|
11
|
+
class StackingParser < SGMLParser
|
12
|
+
# accessors
|
13
|
+
|
14
|
+
def stack; @tagStack; end
|
15
|
+
|
16
|
+
def last_tag; @tagStack[-1] || 'html'; end
|
17
|
+
|
18
|
+
def parent_tag; @tagStack[-2] || 'html'; end
|
19
|
+
|
20
|
+
def strip_whitespace=(flag); @stripWhitespace = flag; end
|
21
|
+
|
22
|
+
# input methods
|
23
|
+
|
24
|
+
# Open and parse the given file.
|
25
|
+
def parse_file_named(name)
|
26
|
+
File.open(name) { |f|
|
27
|
+
while bytes = f.read(65536)
|
28
|
+
feed(bytes)
|
29
|
+
end
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
# Feed some more data to the parser.
|
34
|
+
def feed(string)
|
35
|
+
super
|
36
|
+
while @saved.size > 0
|
37
|
+
saved = @saved
|
38
|
+
@saved = ''
|
39
|
+
super(saved)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# available only to subclasses
|
44
|
+
private
|
45
|
+
|
46
|
+
if $DEBUG
|
47
|
+
def dprint(*stuff)
|
48
|
+
print((" " * @tagStack.size), stuff) if @verbose
|
49
|
+
end
|
50
|
+
else
|
51
|
+
def dprint(*stuff); end
|
52
|
+
end
|
53
|
+
|
54
|
+
def warn(msg)
|
55
|
+
$stderr.print(msg) if @verbose
|
56
|
+
end
|
57
|
+
|
58
|
+
def initialize(verbose=false, strip_white=false)
|
59
|
+
super(verbose)
|
60
|
+
@tagStack = []
|
61
|
+
@saved = ''
|
62
|
+
@stripWhitespace = strip_white
|
63
|
+
end
|
64
|
+
|
65
|
+
# handle_data will call this.
|
66
|
+
def skip_script(data)
|
67
|
+
# is the end of the script in this buffer?
|
68
|
+
if m = data.index(%r{</[A-Za-z]})
|
69
|
+
@nomoretags = false
|
70
|
+
@saved = data[m..-1]
|
71
|
+
handle_script(data[0,m]) # call user handler
|
72
|
+
else
|
73
|
+
handle_script(data)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Unfortunately, sgml-parser calls this and there's important work to do in
|
78
|
+
# it. So the user handler has to be named something different.
|
79
|
+
def handle_data(data)
|
80
|
+
# need to handle scripts
|
81
|
+
if last_tag() == 'script' && @nomoretags
|
82
|
+
skip_script(data)
|
83
|
+
else
|
84
|
+
if @stripWhitespace
|
85
|
+
begin
|
86
|
+
data.strip! if HTML::Tag.named(last_tag()).can_ignore_whitespace
|
87
|
+
rescue NoSuchHTMLTagError
|
88
|
+
data.strip!
|
89
|
+
end
|
90
|
+
end
|
91
|
+
handle_cdata(data) if data.size > 0 # call user handler
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def finish_starttag(tag, attrs)
|
96
|
+
dprint "*START* #{tag} #{attrs.inspect}\n"
|
97
|
+
# dprint "-START- #{tag}\n"
|
98
|
+
begin
|
99
|
+
unless HTML::Tag.named(last_tag()).can_contain(tag, parent_tag())
|
100
|
+
dprint "-INSERT-\n"
|
101
|
+
finish_endtag(last_tag())
|
102
|
+
end
|
103
|
+
rescue NoSuchHTMLTagError
|
104
|
+
# hmm.. last_tag was unknown.
|
105
|
+
# Assume it doesn't have an optional endtag.
|
106
|
+
end
|
107
|
+
|
108
|
+
push(tag)
|
109
|
+
|
110
|
+
begin
|
111
|
+
if HTML::Tag.named(tag).is_empty_element
|
112
|
+
dprint "-EMPTY-\n"
|
113
|
+
handle_empty_tag(tag, attrs) # call user handler
|
114
|
+
drop_to_tag(tag)
|
115
|
+
else
|
116
|
+
handle_start_tag(tag, attrs) # call user handler
|
117
|
+
end
|
118
|
+
|
119
|
+
if tag.downcase == 'script'
|
120
|
+
@nomoretags = true
|
121
|
+
end
|
122
|
+
rescue NoSuchHTMLTagError
|
123
|
+
# hmm... the start tag is unknown.
|
124
|
+
# And we pushed it.
|
125
|
+
# If it's empty, we'll get rid of it at the next end tag.
|
126
|
+
handle_unknown_tag(tag, attrs)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# return true if tag is not extra
|
131
|
+
def drop_to_tag(tag)
|
132
|
+
dropped = @tagStack.size - (@tagStack.rindex(tag.downcase) || @tagStack.size)
|
133
|
+
if dropped == 0 # got an end tag but we haven't seen start tag?
|
134
|
+
handle_extra_end_tag(tag) # call user handler
|
135
|
+
return false
|
136
|
+
end
|
137
|
+
dropped.times do
|
138
|
+
begin
|
139
|
+
# detect missing end tag
|
140
|
+
if last_tag != tag and ! HTML::Tag.named(last_tag).can_omit_end_tag
|
141
|
+
handle_missing_end_tag(last_tag) # call user handler
|
142
|
+
end
|
143
|
+
rescue NoSuchHTMLTagError
|
144
|
+
# oops, don't recognize last_tag.
|
145
|
+
end
|
146
|
+
pop
|
147
|
+
end
|
148
|
+
return true
|
149
|
+
end
|
150
|
+
|
151
|
+
def finish_endtag(tag)
|
152
|
+
dprint "*END* #{tag}\n"
|
153
|
+
if drop_to_tag(tag)
|
154
|
+
dprint "-END- #{tag} #{@tagStack.inspect}\n"
|
155
|
+
handle_end_tag(tag) # call user handler
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def push(tag)
|
160
|
+
@tagStack.push(tag.downcase)
|
161
|
+
dprint "*PUSH* #{tag} => #{@tagStack.inspect}\n"
|
162
|
+
end
|
163
|
+
|
164
|
+
def pop
|
165
|
+
tag = @tagStack.pop
|
166
|
+
dprint "*POP* #{tag} => #{@tagStack.inspect}\n"
|
167
|
+
tag
|
168
|
+
end
|
169
|
+
|
170
|
+
def unknown_charref(name)
|
171
|
+
handle_unknown_character(name)
|
172
|
+
end
|
173
|
+
|
174
|
+
def unknown_entityref(name)
|
175
|
+
handle_unknown_entity(name)
|
176
|
+
end
|
177
|
+
|
178
|
+
# callbacks: can be overridden in subclasses
|
179
|
+
|
180
|
+
def handle_start_tag(tag, attrs)
|
181
|
+
end
|
182
|
+
|
183
|
+
def handle_end_tag(tag)
|
184
|
+
end
|
185
|
+
|
186
|
+
# by default, an empty tag is handled as a start tag
|
187
|
+
# with an inserted end tag.
|
188
|
+
def handle_empty_tag(tag, attrs)
|
189
|
+
handle_start_tag(tag, attrs)
|
190
|
+
handle_end_tag(tag)
|
191
|
+
end
|
192
|
+
|
193
|
+
def handle_unknown_tag(tag, attrs)
|
194
|
+
warn("warning: unknown tag #{tag}\n")
|
195
|
+
end
|
196
|
+
|
197
|
+
def handle_missing_end_tag(tag)
|
198
|
+
warn("warning: missing end tag </#{tag}>\n")
|
199
|
+
end
|
200
|
+
|
201
|
+
def handle_extra_end_tag(tag)
|
202
|
+
warn("warning: extra end tag </#{tag}>\n")
|
203
|
+
end
|
204
|
+
|
205
|
+
def handle_cdata(data)
|
206
|
+
end
|
207
|
+
|
208
|
+
def handle_script(data)
|
209
|
+
end
|
210
|
+
|
211
|
+
def handle_unknown_character(name)
|
212
|
+
end
|
213
|
+
|
214
|
+
def handle_unknown_entity(name)
|
215
|
+
end
|
216
|
+
|
217
|
+
# call super if you want the data stripped
|
218
|
+
def handle_comment(data)
|
219
|
+
data.strip! if @stripWhitespace
|
220
|
+
end
|
221
|
+
|
222
|
+
def handle_special(data)
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# test script
|
229
|
+
if $0 == __FILE__
|
230
|
+
$stdout.sync = true
|
231
|
+
|
232
|
+
class TestStackingParser < HTML::StackingParser #:nodoc: all
|
233
|
+
def dump_stack
|
234
|
+
stack.each { |ea| print ea, '/' }
|
235
|
+
end
|
236
|
+
def handle_start_tag(tag, attrs)
|
237
|
+
print("START: #{tag} #{attrs.inspect}\n")
|
238
|
+
end
|
239
|
+
def handle_end_tag(tag)
|
240
|
+
# print("END: #{tag}\n")
|
241
|
+
end
|
242
|
+
def handle_empty_tag(tag, attrs)
|
243
|
+
# print("EMPTY: #{tag} #{attrs.inspect}\n")
|
244
|
+
end
|
245
|
+
def handle_cdata(data)
|
246
|
+
# print("DATA: #{data.size} chars\n")
|
247
|
+
if last_tag() != 'style'
|
248
|
+
str = data.strip
|
249
|
+
if str.size > 0
|
250
|
+
dump_stack
|
251
|
+
print(str.inspect, "\n")
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
255
|
+
def handle_script(data)
|
256
|
+
# print("SCRIPT: #{data.size} chars\n")
|
257
|
+
end
|
258
|
+
def handle_unknown_character(name)
|
259
|
+
print("UNKC: #{name}\n")
|
260
|
+
end
|
261
|
+
def handle_unknown_entity(name)
|
262
|
+
print("UNKE: #{name}\n")
|
263
|
+
end
|
264
|
+
def handle_comment(data)
|
265
|
+
super
|
266
|
+
print("COMMENT: #{data}\n")
|
267
|
+
end
|
268
|
+
def handle_special(data)
|
269
|
+
print("SPECIAL: #{data}\n")
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
$DEBUG = false
|
274
|
+
p = TestStackingParser.new(true)
|
275
|
+
p.parse_file_named(ARGV[0] || 'ebay.html')
|
276
|
+
end
|