ruby-web 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +474 -0
- data/INSTALL.txt +9 -0
- data/InstalledFiles +180 -0
- data/LICENSE.txt +74 -0
- data/Rakefile +529 -0
- data/TODO +65 -0
- data/doc/additional.xml +149 -0
- data/doc/core.xml +652 -0
- data/doc/credits/index.xml +52 -0
- data/doc/credits/php.contributors.xml +118 -0
- data/doc/credits/php.language-snippets.ent +622 -0
- data/doc/install/index.xml +136 -0
- data/doc/install/mac/index.xml +21 -0
- data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
- data/doc/install/unix/index.xml +46 -0
- data/doc/install/win/apache1.xml +166 -0
- data/doc/install/win/apache2.xml +141 -0
- data/doc/install/win/iis.xml +162 -0
- data/doc/install/win/index.xml +24 -0
- data/doc/install/win/installer.xml +31 -0
- data/doc/install/win/manual.xml +43 -0
- data/doc/manual.xml +69 -0
- data/doc/old/apache_cgi.txt +23 -0
- data/doc/old/fastcgi.txt +23 -0
- data/doc/old/mod_ruby.txt +21 -0
- data/doc/old/snippets.rdoc +183 -0
- data/doc/old/webrick.txt +23 -0
- data/doc/old/windows_cgi.txt +9 -0
- data/doc/tutorial.xml +14 -0
- data/doc/xsl/manual-multi.xsl +10 -0
- data/doc/xsl/manual-pdf.xsl +6 -0
- data/doc/xsl/manual-single.xsl +6 -0
- data/doc/xsl/manual.css +22 -0
- data/install.rb +1022 -0
- data/lib/formatter.rb +314 -0
- data/lib/html-parser.rb +429 -0
- data/lib/htmlrepair.rb +113 -0
- data/lib/htmlsplit.rb +842 -0
- data/lib/sgml-parser.rb +332 -0
- data/lib/web.rb +68 -0
- data/lib/web/assertinclude.rb +129 -0
- data/lib/web/config.rb +50 -0
- data/lib/web/connection.rb +1070 -0
- data/lib/web/convenience.rb +154 -0
- data/lib/web/formreader.rb +318 -0
- data/lib/web/htmlparser/html-parser.rb +429 -0
- data/lib/web/htmlparser/sgml-parser.rb +332 -0
- data/lib/web/htmltools/element.rb +296 -0
- data/lib/web/htmltools/stparser.rb +276 -0
- data/lib/web/htmltools/tags.rb +286 -0
- data/lib/web/htmltools/tree.rb +139 -0
- data/lib/web/htmltools/xmltree.rb +160 -0
- data/lib/web/htmltools/xpath.rb +71 -0
- data/lib/web/info.rb +63 -0
- data/lib/web/load.rb +210 -0
- data/lib/web/mime.rb +87 -0
- data/lib/web/phprb.rb +340 -0
- data/lib/web/resources/test/cookie.rb +33 -0
- data/lib/web/resources/test/counter.rb +20 -0
- data/lib/web/resources/test/multipart.rb +14 -0
- data/lib/web/resources/test/redirect.rb +8 -0
- data/lib/web/resources/test/stock.rb +33 -0
- data/lib/web/sapi/apache.rb +129 -0
- data/lib/web/sapi/fastcgi.rb +22 -0
- data/lib/web/sapi/install/apache.rb +180 -0
- data/lib/web/sapi/install/iis.rb +93 -0
- data/lib/web/sapi/install/macosx.rb +90 -0
- data/lib/web/sapi/webrick.rb +86 -0
- data/lib/web/session.rb +83 -0
- data/lib/web/shim/cgi.rb +129 -0
- data/lib/web/shim/rails.rb +175 -0
- data/lib/web/stringio.rb +78 -0
- data/lib/web/strscanparser.rb +24 -0
- data/lib/web/tagparser.rb +96 -0
- data/lib/web/testing.rb +666 -0
- data/lib/web/traceoutput.rb +75 -0
- data/lib/web/unit.rb +56 -0
- data/lib/web/upload.rb +59 -0
- data/lib/web/validate.rb +52 -0
- data/lib/web/wiki.rb +557 -0
- data/lib/web/wiki/linker.rb +72 -0
- data/lib/web/wiki/page.rb +201 -0
- data/lib/webunit.rb +27 -0
- data/lib/webunit/assert.rb +152 -0
- data/lib/webunit/converter.rb +154 -0
- data/lib/webunit/cookie.rb +118 -0
- data/lib/webunit/domwalker.rb +185 -0
- data/lib/webunit/exception.rb +14 -0
- data/lib/webunit/form.rb +116 -0
- data/lib/webunit/frame.rb +37 -0
- data/lib/webunit/htmlelem.rb +122 -0
- data/lib/webunit/image.rb +26 -0
- data/lib/webunit/jscript.rb +31 -0
- data/lib/webunit/link.rb +33 -0
- data/lib/webunit/params.rb +321 -0
- data/lib/webunit/parser.rb +229 -0
- data/lib/webunit/response.rb +464 -0
- data/lib/webunit/runtest.rb +41 -0
- data/lib/webunit/table.rb +148 -0
- data/lib/webunit/testcase.rb +45 -0
- data/lib/webunit/ui/cui/testrunner.rb +50 -0
- data/lib/webunit/utils.rb +68 -0
- data/lib/webunit/webunit.rb +28 -0
- data/test/dev/action.rb +83 -0
- data/test/dev/forms.rb +104 -0
- data/test/dev/forms2.rb +104 -0
- data/test/dev/parser.rb +17 -0
- data/test/dev/scripts/dump.rb +24 -0
- data/test/dev/scripts/makedist.rb +62 -0
- data/test/dev/scripts/uri.rb +41 -0
- data/test/dev/scripts/uri/common.rb +432 -0
- data/test/dev/scripts/uri/ftp.rb +149 -0
- data/test/dev/scripts/uri/generic.rb +1106 -0
- data/test/dev/scripts/uri/http.rb +76 -0
- data/test/dev/scripts/uri/https.rb +26 -0
- data/test/dev/scripts/uri/ldap.rb +238 -0
- data/test/dev/scripts/uri/mailto.rb +260 -0
- data/test/dev/scripts/urireg.rb +174 -0
- data/test/dev/simpledispatcher.rb +156 -0
- data/test/dev/test.action.rb +146 -0
- data/test/dev/test.formreader.rb +463 -0
- data/test/dev/test.simpledispatcher.rb +186 -0
- data/test/dev/webunit/conv/digit-0.rb +21 -0
- data/test/dev/webunit/conv/digit-1.rb +17 -0
- data/test/dev/webunit/conv/digit.rb +23 -0
- data/test/dev/webunit/conv/test_digit-0.rb +16 -0
- data/test/dev/webunit/conv/test_digit-1.rb +19 -0
- data/test/dev/webunit/conv/test_digit.rb +26 -0
- data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
- data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
- data/test/dev/webunit/conv/test_digit_view.rb +134 -0
- data/test/installation/htdocs/cgi_test.rb +296 -0
- data/test/installation/htdocs/test_install.rb +4 -0
- data/test/installation/runwebtest.rb +5 -0
- data/test/installation/test_cookie.rb +128 -0
- data/test/installation/test_form.rb +47 -0
- data/test/installation/test_multipart.rb +51 -0
- data/test/installation/test_request.rb +24 -0
- data/test/installation/test_response.rb +35 -0
- data/test/unit/htdocs/cookie.rb +32 -0
- data/test/unit/htdocs/multipart.rb +28 -0
- data/test/unit/htdocs/redirect.rb +12 -0
- data/test/unit/htdocs/simple.rb +13 -0
- data/test/unit/htdocs/stock.rb +33 -0
- data/test/unit/test_assert.rb +162 -0
- data/test/unit/test_cookie.rb +114 -0
- data/test/unit/test_domwalker.rb +77 -0
- data/test/unit/test_form.rb +42 -0
- data/test/unit/test_frame.rb +40 -0
- data/test/unit/test_htmlelem.rb +74 -0
- data/test/unit/test_image.rb +45 -0
- data/test/unit/test_jscript.rb +57 -0
- data/test/unit/test_link.rb +85 -0
- data/test/unit/test_multipart.rb +51 -0
- data/test/unit/test_params.rb +210 -0
- data/test/unit/test_parser.rb +53 -0
- data/test/unit/test_response.rb +150 -0
- data/test/unit/test_table.rb +70 -0
- data/test/unit/test_utils.rb +106 -0
- data/test/unit/test_webunit.rb +28 -0
- data/test/web/mod_ruby_stub.rb +39 -0
- data/test/web/test.assertinclude.rb +109 -0
- data/test/web/test.buffer.rb +182 -0
- data/test/web/test.code.loader.rb +78 -0
- data/test/web/test.config.rb +31 -0
- data/test/web/test.error.handling.rb +91 -0
- data/test/web/test.formreader-2.0.rb +352 -0
- data/test/web/test.load.rb +125 -0
- data/test/web/test.mime-type.rb +23 -0
- data/test/web/test.narf.cgi.rb +106 -0
- data/test/web/test.phprb.rb +239 -0
- data/test/web/test.request.rb +368 -0
- data/test/web/test.response.rb +637 -0
- data/test/web/test.ruby-web.rb +10 -0
- data/test/web/test.session.rb +50 -0
- data/test/web/test.shim.cgi.rb +96 -0
- data/test/web/test.tagparser.rb +65 -0
- data/test/web/test.template2.rb +297 -0
- data/test/web/test.testing2.rb +318 -0
- data/test/web/test.upload.rb +45 -0
- data/test/web/test.validate.rb +46 -0
- data/test/web/test.web.test.rb +495 -0
- data/test/wiki/test.history.rb +297 -0
- data/test/wiki/test.illustration_page.rb +287 -0
- data/test/wiki/test.linker.rb +197 -0
- data/test/wiki/test.tarpit.rb +56 -0
- data/test/wiki/test.wiki.rb +300 -0
- data/test/wikitestroot/admin.rb +7 -0
- data/test/wikitestroot/wiki.rb +6 -0
- metadata +234 -0
@@ -0,0 +1,429 @@
|
|
1
|
+
# HTML parser
|
2
|
+
|
3
|
+
require 'sgml-parser'
|
4
|
+
|
5
|
+
class HTMLParser < SGMLParser #:nodoc: all
|
6
|
+
|
7
|
+
def initialize(formatter, verbose=nil)
|
8
|
+
super(verbose)
|
9
|
+
@formatter = formatter
|
10
|
+
@savedata = nil
|
11
|
+
@isindex = 0
|
12
|
+
@title = nil
|
13
|
+
@base = nil
|
14
|
+
@anchor = nil
|
15
|
+
@anchorlist = []
|
16
|
+
@nofill = 0
|
17
|
+
@list_stack = []
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def handle_data(data)
|
22
|
+
if @savedata
|
23
|
+
@savedata = @savedata + data
|
24
|
+
else
|
25
|
+
if @nofill != 0
|
26
|
+
@formatter.add_literal_data(data)
|
27
|
+
else
|
28
|
+
@formatter.add_flowing_data(data)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def save_bgn
|
34
|
+
@savedata = ''
|
35
|
+
end
|
36
|
+
|
37
|
+
def save_end
|
38
|
+
data = @savedata
|
39
|
+
@savedata = nil
|
40
|
+
data = '' if data == nil
|
41
|
+
if @nofill == 0
|
42
|
+
data = data.split.join(" ")
|
43
|
+
end
|
44
|
+
return data
|
45
|
+
end
|
46
|
+
|
47
|
+
def anchor_bgn(href, name, type)
|
48
|
+
@anchor = href
|
49
|
+
if @anchor
|
50
|
+
@anchorlist << href
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def anchor_end
|
55
|
+
if @anchor
|
56
|
+
#handle_data(format "[%d]", @anchorlist.length)
|
57
|
+
@anchor = nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def handle_image(src, alt, *args)
|
62
|
+
handle_data(alt)
|
63
|
+
end
|
64
|
+
|
65
|
+
def start_html(attrs) end
|
66
|
+
def end_html() end
|
67
|
+
|
68
|
+
def start_head(attrs) end
|
69
|
+
def end_head() end
|
70
|
+
|
71
|
+
def start_body(attrs) end
|
72
|
+
def end_body() end
|
73
|
+
|
74
|
+
def start_title(attrs)
|
75
|
+
save_bgn
|
76
|
+
end
|
77
|
+
|
78
|
+
def end_title
|
79
|
+
@title = save_end
|
80
|
+
end
|
81
|
+
|
82
|
+
def do_base(attrs)
|
83
|
+
for a, v in attrs
|
84
|
+
if a == 'href'
|
85
|
+
@base = v
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def do_isindex(attrs)
|
91
|
+
@isindex = 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def do_link(attrs)
|
95
|
+
end
|
96
|
+
|
97
|
+
def do_meta(attrs)
|
98
|
+
end
|
99
|
+
|
100
|
+
def do_nextid(attrs) # Deprecated
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def start_h1(attrs)
|
105
|
+
@formatter.end_paragraph(1)
|
106
|
+
@formatter.push_font('h1', 0, 1, 0)
|
107
|
+
end
|
108
|
+
|
109
|
+
def end_h1
|
110
|
+
@formatter.end_paragraph(1)
|
111
|
+
@formatter.pop_font()
|
112
|
+
end
|
113
|
+
|
114
|
+
def start_h2(attrs)
|
115
|
+
@formatter.end_paragraph(1)
|
116
|
+
@formatter.push_font('h2', 0, 1, 0)
|
117
|
+
end
|
118
|
+
|
119
|
+
def end_h2
|
120
|
+
@formatter.end_paragraph(1)
|
121
|
+
@formatter.pop_font()
|
122
|
+
end
|
123
|
+
|
124
|
+
def start_h3(attrs)
|
125
|
+
@formatter.end_paragraph(1)
|
126
|
+
@formatter.push_font('h3', 0, 1, 0)
|
127
|
+
end
|
128
|
+
|
129
|
+
def end_h3
|
130
|
+
@formatter.end_paragraph(1)
|
131
|
+
@formatter.pop_font()
|
132
|
+
end
|
133
|
+
|
134
|
+
def start_h4(attrs)
|
135
|
+
@formatter.end_paragraph(1)
|
136
|
+
@formatter.push_font('h4', 0, 1, 0)
|
137
|
+
end
|
138
|
+
|
139
|
+
def end_h4
|
140
|
+
@formatter.end_paragraph(1)
|
141
|
+
@formatter.pop_font()
|
142
|
+
end
|
143
|
+
|
144
|
+
def start_h5(attrs)
|
145
|
+
@formatter.end_paragraph(1)
|
146
|
+
@formatter.push_font('h5', 0, 1, 0)
|
147
|
+
end
|
148
|
+
|
149
|
+
def end_h5
|
150
|
+
@formatter.end_paragraph(1)
|
151
|
+
@formatter.pop_font()
|
152
|
+
end
|
153
|
+
|
154
|
+
def start_h6(attrs)
|
155
|
+
@formatter.end_paragraph(1)
|
156
|
+
@formatter.push_font('h6', 0, 1, 0)
|
157
|
+
end
|
158
|
+
|
159
|
+
def end_h6
|
160
|
+
@formatter.end_paragraph(1)
|
161
|
+
@formatter.pop_font()
|
162
|
+
end
|
163
|
+
|
164
|
+
def do_p(attrs)
|
165
|
+
@formatter.end_paragraph(1)
|
166
|
+
end
|
167
|
+
|
168
|
+
def start_pre(attrs)
|
169
|
+
@formatter.end_paragraph(1)
|
170
|
+
@formatter.push_font(nil, nil, nil, 1)
|
171
|
+
@nofill = @nofill + 1
|
172
|
+
end
|
173
|
+
|
174
|
+
def end_pre
|
175
|
+
@formatter.end_paragraph(1)
|
176
|
+
@formatter.pop_font()
|
177
|
+
@nofill = @nofill - 1
|
178
|
+
if @nofill < 0 then @nofill = 0 end
|
179
|
+
end
|
180
|
+
|
181
|
+
def start_xmp(attrs)
|
182
|
+
start_pre(attrs)
|
183
|
+
setliteral('xmp') # Tell SGML parser
|
184
|
+
end
|
185
|
+
|
186
|
+
def end_xmp
|
187
|
+
end_pre
|
188
|
+
end
|
189
|
+
|
190
|
+
def start_listing(attrs)
|
191
|
+
start_pre(attrs)
|
192
|
+
setliteral('listing') # Tell SGML parser
|
193
|
+
end
|
194
|
+
|
195
|
+
def end_listing
|
196
|
+
end_pre
|
197
|
+
end
|
198
|
+
|
199
|
+
def start_address(attrs)
|
200
|
+
@formatter.end_paragraph(0)
|
201
|
+
@formatter.push_font(nil, 1, nil, nil)
|
202
|
+
end
|
203
|
+
|
204
|
+
def end_address
|
205
|
+
@formatter.end_paragraph(0)
|
206
|
+
@formatter.pop_font()
|
207
|
+
end
|
208
|
+
|
209
|
+
def start_blockquote(attrs)
|
210
|
+
@formatter.end_paragraph(1)
|
211
|
+
@formatter.push_margin('blockquote')
|
212
|
+
end
|
213
|
+
|
214
|
+
def end_blockquote
|
215
|
+
@formatter.end_paragraph(1)
|
216
|
+
@formatter.pop_margin()
|
217
|
+
end
|
218
|
+
|
219
|
+
def start_ul(attrs)
|
220
|
+
@formatter.end_paragraph(0)
|
221
|
+
@formatter.push_margin('ul')
|
222
|
+
@list_stack << ['ul', '*', 0]
|
223
|
+
end
|
224
|
+
|
225
|
+
def end_ul
|
226
|
+
if @list_stack
|
227
|
+
@list_stack.pop
|
228
|
+
end
|
229
|
+
@formatter.end_paragraph(0)
|
230
|
+
@formatter.pop_margin
|
231
|
+
end
|
232
|
+
|
233
|
+
def do_li(attrs)
|
234
|
+
@formatter.end_paragraph(0)
|
235
|
+
if @list_stack && @list_stack.size > 0
|
236
|
+
dummy, label, counter = top = @list_stack[-1]
|
237
|
+
top[2] = counter = counter+1
|
238
|
+
else
|
239
|
+
label, counter = '*', 0
|
240
|
+
end
|
241
|
+
@formatter.add_label_data(label, counter)
|
242
|
+
end
|
243
|
+
|
244
|
+
def start_ol(attrs)
|
245
|
+
@formatter.end_paragraph(0)
|
246
|
+
@formatter.push_margin('ol')
|
247
|
+
label = '1.'
|
248
|
+
for a, v in attrs
|
249
|
+
if a == 'type'
|
250
|
+
if v.length == 1
|
251
|
+
v = v + '.'
|
252
|
+
label = v
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
@list_stack << ['ol', label, 0]
|
257
|
+
end
|
258
|
+
|
259
|
+
def end_ol
|
260
|
+
if @list_stack
|
261
|
+
@list_stack.pop
|
262
|
+
end
|
263
|
+
@formatter.end_paragraph(0)
|
264
|
+
@formatter.pop_margin
|
265
|
+
end
|
266
|
+
|
267
|
+
def start_menu(attrs)
|
268
|
+
start_ul(attrs)
|
269
|
+
end
|
270
|
+
|
271
|
+
def end_menu
|
272
|
+
end_ul
|
273
|
+
end
|
274
|
+
|
275
|
+
def start_dir(attrs)
|
276
|
+
start_ul(attrs)
|
277
|
+
end
|
278
|
+
|
279
|
+
def end_dir
|
280
|
+
end_ul
|
281
|
+
end
|
282
|
+
|
283
|
+
def start_dl(attrs)
|
284
|
+
@formatter.end_paragraph(1)
|
285
|
+
@list_stack << ['dl', '', 0]
|
286
|
+
end
|
287
|
+
|
288
|
+
def end_dl
|
289
|
+
ddpop(1)
|
290
|
+
if @list_stack.length > 0
|
291
|
+
@list_stack.pop
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def do_dt(attrs)
|
296
|
+
ddpop
|
297
|
+
end
|
298
|
+
|
299
|
+
def do_dd(attrs)
|
300
|
+
ddpop
|
301
|
+
@formatter.push_margin('dd')
|
302
|
+
@list_stack << ['dd', '', 0]
|
303
|
+
end
|
304
|
+
|
305
|
+
def ddpop(bl=0)
|
306
|
+
@formatter.end_paragraph(bl)
|
307
|
+
if @list_stack.length > 0
|
308
|
+
if @list_stack[-1][0] == 'dd'
|
309
|
+
@list_stack.pop
|
310
|
+
@formatter.pop_margin
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def start_cite(attrs) start_i(attrs) end
|
316
|
+
def end_cite() end_i end
|
317
|
+
|
318
|
+
def start_code(attrs) start_tt(attrs) end
|
319
|
+
def end_code() end_tt end
|
320
|
+
|
321
|
+
def start_em(attrs) start_i(attrs) end
|
322
|
+
def end_em() end_i end
|
323
|
+
|
324
|
+
def start_kbd(attrs) start_tt(attrs) end
|
325
|
+
def end_kbd() end_tt end
|
326
|
+
|
327
|
+
def start_samp(attrs) start_tt(attrs) end
|
328
|
+
def end_samp() end_tt end
|
329
|
+
|
330
|
+
def start_strong(attrs) start_b(attrs) end
|
331
|
+
def end_strong() end_b end
|
332
|
+
|
333
|
+
def start_var(attrs) start_i(attrs) end
|
334
|
+
def end_var() end_i end
|
335
|
+
|
336
|
+
def start_i(attrs)
|
337
|
+
@formatter.push_font(nil, 1, nil, nil)
|
338
|
+
end
|
339
|
+
def end_i
|
340
|
+
@formatter.pop_font
|
341
|
+
end
|
342
|
+
|
343
|
+
def start_b(attrs)
|
344
|
+
@formatter.push_font(nil, nil, 1, nil)
|
345
|
+
end
|
346
|
+
def end_b
|
347
|
+
@formatter.pop_font
|
348
|
+
end
|
349
|
+
|
350
|
+
def start_tt(attrs)
|
351
|
+
@formatter.push_font(nil, nil, nil, 1)
|
352
|
+
end
|
353
|
+
def end_tt
|
354
|
+
@formatter.pop_font
|
355
|
+
end
|
356
|
+
|
357
|
+
def start_a(attrs)
|
358
|
+
href = nil
|
359
|
+
name = nil
|
360
|
+
type = nil
|
361
|
+
for attrname, value in attrs
|
362
|
+
value = value.strip
|
363
|
+
if attrname == 'href'
|
364
|
+
href = value
|
365
|
+
end
|
366
|
+
if attrname == 'name'
|
367
|
+
name = value
|
368
|
+
end
|
369
|
+
if attrname == 'type'
|
370
|
+
type = value.downcase
|
371
|
+
end
|
372
|
+
end
|
373
|
+
anchor_bgn(href, name, type)
|
374
|
+
end
|
375
|
+
|
376
|
+
def end_a
|
377
|
+
anchor_end
|
378
|
+
end
|
379
|
+
|
380
|
+
def do_br(attrs)
|
381
|
+
@formatter.add_line_break
|
382
|
+
end
|
383
|
+
|
384
|
+
def do_hr(attrs)
|
385
|
+
@formatter.add_hor_rule
|
386
|
+
end
|
387
|
+
|
388
|
+
def do_img(attrs)
|
389
|
+
align = nil
|
390
|
+
alt = '(image)'
|
391
|
+
ismap = nil
|
392
|
+
src = nil
|
393
|
+
width = 0
|
394
|
+
height = 0
|
395
|
+
for attrname, value in attrs
|
396
|
+
if attrname == 'align'
|
397
|
+
align = value
|
398
|
+
end
|
399
|
+
if attrname == 'alt'
|
400
|
+
alt = value
|
401
|
+
end
|
402
|
+
if attrname == 'ismap'
|
403
|
+
ismap = value
|
404
|
+
end
|
405
|
+
if attrname == 'src'
|
406
|
+
src = value
|
407
|
+
end
|
408
|
+
if attrname == 'width'
|
409
|
+
width = Integer(value)
|
410
|
+
end
|
411
|
+
if attrname == 'height'
|
412
|
+
height = Integer(value)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
handle_image(src, alt, ismap, align, width, height)
|
416
|
+
end
|
417
|
+
|
418
|
+
def do_plaintext(attrs)
|
419
|
+
start_pre(attrs)
|
420
|
+
setnomoretags # Tell SGML parser
|
421
|
+
end
|
422
|
+
|
423
|
+
def unknown_starttag(tag, attrs)
|
424
|
+
end
|
425
|
+
|
426
|
+
def unknown_endtag(tag)
|
427
|
+
end
|
428
|
+
|
429
|
+
end
|
@@ -0,0 +1,332 @@
|
|
1
|
+
# A parser for SGML, using the derived class as static DTD.
|
2
|
+
|
3
|
+
class SGMLParser #:nodoc: all
|
4
|
+
|
5
|
+
# Regular expressions used for parsing:
|
6
|
+
Interesting = /[&<]/
|
7
|
+
Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
|
8
|
+
'<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
|
9
|
+
'![^<>]*)?')
|
10
|
+
|
11
|
+
Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
|
12
|
+
Charref = /&#([0-9]+)[^0-9]/
|
13
|
+
|
14
|
+
Starttagopen = /<[>a-zA-Z]/
|
15
|
+
Endtagopen = /<\/[<>a-zA-Z]/
|
16
|
+
Endbracket = /[<>]/
|
17
|
+
Special = /<![^<>]*>/
|
18
|
+
Commentopen = /<!--/
|
19
|
+
Commentclose = /--[ \t\n]*>/
|
20
|
+
Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
|
21
|
+
Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
|
22
|
+
'(\s*=\s*' +
|
23
|
+
"('[^']*'" +
|
24
|
+
'|"[^"]*"' +
|
25
|
+
'|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
|
26
|
+
|
27
|
+
Entitydefs =
|
28
|
+
{'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
|
29
|
+
|
30
|
+
def initialize(verbose=false)
|
31
|
+
@verbose = verbose
|
32
|
+
reset
|
33
|
+
end
|
34
|
+
|
35
|
+
def reset
|
36
|
+
@rawdata = ''
|
37
|
+
@stack = []
|
38
|
+
@lasttag = '???'
|
39
|
+
@nomoretags = false
|
40
|
+
@literal = false
|
41
|
+
end
|
42
|
+
|
43
|
+
def has_context(gi)
|
44
|
+
@stack.include? gi
|
45
|
+
end
|
46
|
+
|
47
|
+
def setnomoretags
|
48
|
+
@nomoretags = true
|
49
|
+
@literal = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def setliteral(*args)
|
53
|
+
@literal = true
|
54
|
+
end
|
55
|
+
|
56
|
+
def feed(data)
|
57
|
+
@rawdata << data
|
58
|
+
goahead(false)
|
59
|
+
end
|
60
|
+
|
61
|
+
def close
|
62
|
+
goahead(true)
|
63
|
+
end
|
64
|
+
|
65
|
+
def goahead(_end)
|
66
|
+
rawdata = @rawdata
|
67
|
+
i = 0
|
68
|
+
n = rawdata.length
|
69
|
+
while i < n
|
70
|
+
if @nomoretags
|
71
|
+
handle_data(rawdata[i..(n-1)])
|
72
|
+
i = n
|
73
|
+
break
|
74
|
+
end
|
75
|
+
j = rawdata.index(Interesting, i)
|
76
|
+
j = n unless j
|
77
|
+
if i < j
|
78
|
+
handle_data(rawdata[i..(j-1)])
|
79
|
+
end
|
80
|
+
i = j
|
81
|
+
break if (i == n)
|
82
|
+
if rawdata[i] == ?< #
|
83
|
+
if rawdata.index(Starttagopen, i) == i
|
84
|
+
if @literal
|
85
|
+
handle_data(rawdata[i, 1])
|
86
|
+
i += 1
|
87
|
+
next
|
88
|
+
end
|
89
|
+
k = parse_starttag(i)
|
90
|
+
break unless k
|
91
|
+
i = k
|
92
|
+
next
|
93
|
+
end
|
94
|
+
if rawdata.index(Endtagopen, i) == i
|
95
|
+
k = parse_endtag(i)
|
96
|
+
break unless k
|
97
|
+
i = k
|
98
|
+
@literal = false
|
99
|
+
next
|
100
|
+
end
|
101
|
+
if rawdata.index(Commentopen, i) == i
|
102
|
+
if @literal
|
103
|
+
handle_data(rawdata[i,1])
|
104
|
+
i += 1
|
105
|
+
next
|
106
|
+
end
|
107
|
+
k = parse_comment(i)
|
108
|
+
break unless k
|
109
|
+
i += k
|
110
|
+
next
|
111
|
+
end
|
112
|
+
if rawdata.index(Special, i) == i
|
113
|
+
if @literal
|
114
|
+
handle_data(rawdata[i, 1])
|
115
|
+
i += 1
|
116
|
+
next
|
117
|
+
end
|
118
|
+
k = parse_special(i)
|
119
|
+
break unless k
|
120
|
+
i += k
|
121
|
+
next
|
122
|
+
end
|
123
|
+
elsif rawdata[i] == ?& #
|
124
|
+
if rawdata.index(Charref, i) == i
|
125
|
+
i += $&.length
|
126
|
+
handle_charref($1)
|
127
|
+
i -= 1 unless rawdata[i-1] == ?;
|
128
|
+
next
|
129
|
+
end
|
130
|
+
if rawdata.index(Entityref, i) == i
|
131
|
+
i += $&.length
|
132
|
+
handle_entityref($1)
|
133
|
+
i -= 1 unless rawdata[i-1] == ?;
|
134
|
+
next
|
135
|
+
end
|
136
|
+
else
|
137
|
+
raise RuntimeError, 'neither < nor & ??'
|
138
|
+
end
|
139
|
+
# We get here only if incomplete matches but
|
140
|
+
# nothing else
|
141
|
+
match = rawdata.index(Incomplete, i)
|
142
|
+
unless match == i
|
143
|
+
handle_data(rawdata[i, 1])
|
144
|
+
i += 1
|
145
|
+
next
|
146
|
+
end
|
147
|
+
j = match + $&.length
|
148
|
+
break if j == n # Really incomplete
|
149
|
+
handle_data(rawdata[i..(j-1)])
|
150
|
+
i = j
|
151
|
+
end
|
152
|
+
# end while
|
153
|
+
if _end and i < n
|
154
|
+
handle_data(@rawdata[i..(n-1)])
|
155
|
+
i = n
|
156
|
+
end
|
157
|
+
@rawdata = rawdata[i..-1]
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_comment(i)
|
161
|
+
rawdata = @rawdata
|
162
|
+
if rawdata[i, 4] != '<!--'
|
163
|
+
raise RuntimeError, 'unexpected call to handle_comment'
|
164
|
+
end
|
165
|
+
match = rawdata.index(Commentclose, i)
|
166
|
+
return nil unless match
|
167
|
+
matched_length = $&.length
|
168
|
+
j = match
|
169
|
+
handle_comment(rawdata[i+4..(j-1)])
|
170
|
+
j = match + matched_length
|
171
|
+
return j-i
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_starttag(i)
|
175
|
+
rawdata = @rawdata
|
176
|
+
j = rawdata.index(Endbracket, i + 1)
|
177
|
+
return nil unless j
|
178
|
+
attrs = []
|
179
|
+
if rawdata[i+1] == ?> #
|
180
|
+
# SGML shorthand: <> == <last open tag seen>
|
181
|
+
k = j
|
182
|
+
tag = @lasttag
|
183
|
+
else
|
184
|
+
match = rawdata.index(Tagfind, i + 1)
|
185
|
+
unless match
|
186
|
+
raise RuntimeError, 'unexpected call to parse_starttag'
|
187
|
+
end
|
188
|
+
k = i + 1 + ($&.length)
|
189
|
+
tag = $&.downcase
|
190
|
+
@lasttag = tag
|
191
|
+
end
|
192
|
+
while k < j
|
193
|
+
break unless rawdata.index(Attrfind, k)
|
194
|
+
matched_length = $&.length
|
195
|
+
attrname, rest, attrvalue = $1, $2, $3
|
196
|
+
if not rest
|
197
|
+
attrvalue = '' # was: = attrname
|
198
|
+
elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
|
199
|
+
(attrvalue[0] == ?" && attrvalue[-1] == ?")
|
200
|
+
attrvalue = attrvalue[1..-2]
|
201
|
+
end
|
202
|
+
attrs << [attrname.downcase, attrvalue]
|
203
|
+
k += matched_length
|
204
|
+
end
|
205
|
+
if rawdata[j] == ?> #
|
206
|
+
j += 1
|
207
|
+
end
|
208
|
+
finish_starttag(tag, attrs)
|
209
|
+
return j
|
210
|
+
end
|
211
|
+
|
212
|
+
def parse_endtag(i)
|
213
|
+
rawdata = @rawdata
|
214
|
+
j = rawdata.index(Endbracket, i + 1)
|
215
|
+
return nil unless j
|
216
|
+
tag = (rawdata[i+2..j-1].strip).downcase
|
217
|
+
if rawdata[j] == ?> #
|
218
|
+
j += 1
|
219
|
+
end
|
220
|
+
finish_endtag(tag)
|
221
|
+
return j
|
222
|
+
end
|
223
|
+
|
224
|
+
def finish_starttag(tag, attrs)
|
225
|
+
method = 'start_' + tag
|
226
|
+
if self.respond_to?(method)
|
227
|
+
@stack << tag
|
228
|
+
handle_starttag(tag, method, attrs)
|
229
|
+
return 1
|
230
|
+
else
|
231
|
+
method = 'do_' + tag
|
232
|
+
if self.respond_to?(method)
|
233
|
+
handle_starttag(tag, method, attrs)
|
234
|
+
return 0
|
235
|
+
else
|
236
|
+
unknown_starttag(tag, attrs)
|
237
|
+
return -1
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def finish_endtag(tag)
|
243
|
+
if tag == ''
|
244
|
+
found = @stack.length - 1
|
245
|
+
if found < 0
|
246
|
+
unknown_endtag(tag)
|
247
|
+
return
|
248
|
+
end
|
249
|
+
else
|
250
|
+
unless @stack.include? tag
|
251
|
+
method = 'end_' + tag
|
252
|
+
unless self.respond_to?(method)
|
253
|
+
unknown_endtag(tag)
|
254
|
+
end
|
255
|
+
return
|
256
|
+
end
|
257
|
+
found = @stack.index(tag) #or @stack.length
|
258
|
+
end
|
259
|
+
while @stack.length > found
|
260
|
+
tag = @stack[-1]
|
261
|
+
method = 'end_' + tag
|
262
|
+
if respond_to?(method)
|
263
|
+
handle_endtag(tag, method)
|
264
|
+
else
|
265
|
+
unknown_endtag(tag)
|
266
|
+
end
|
267
|
+
@stack.pop
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def parse_special(i)
|
272
|
+
rawdata = @rawdata
|
273
|
+
match = rawdata.index(Endbracket, i+1)
|
274
|
+
return nil unless match
|
275
|
+
matched_length = $&.length
|
276
|
+
handle_special(rawdata[i+1..(match-1)])
|
277
|
+
return match - i + matched_length
|
278
|
+
end
|
279
|
+
|
280
|
+
def handle_starttag(tag, method, attrs)
|
281
|
+
self.send(method, attrs)
|
282
|
+
end
|
283
|
+
|
284
|
+
def handle_endtag(tag, method)
|
285
|
+
self.send(method)
|
286
|
+
end
|
287
|
+
|
288
|
+
def report_unbalanced(tag)
|
289
|
+
if @verbose
|
290
|
+
print '*** Unbalanced </' + tag + '>', "\n"
|
291
|
+
print '*** Stack:', self.stack, "\n"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def handle_charref(name)
|
296
|
+
n = Integer(name)
|
297
|
+
if !(0 <= n && n <= 255)
|
298
|
+
unknown_charref(name)
|
299
|
+
return
|
300
|
+
end
|
301
|
+
handle_data(n.chr)
|
302
|
+
end
|
303
|
+
|
304
|
+
def handle_entityref(name)
|
305
|
+
table = Entitydefs
|
306
|
+
if table.include?(name)
|
307
|
+
handle_data(table[name])
|
308
|
+
else
|
309
|
+
unknown_entityref(name)
|
310
|
+
return
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def handle_data(data)
|
315
|
+
end
|
316
|
+
|
317
|
+
def handle_comment(data)
|
318
|
+
end
|
319
|
+
|
320
|
+
def handle_special(data)
|
321
|
+
end
|
322
|
+
|
323
|
+
def unknown_starttag(tag, attrs)
|
324
|
+
end
|
325
|
+
def unknown_endtag(tag)
|
326
|
+
end
|
327
|
+
def unknown_charref(ref)
|
328
|
+
end
|
329
|
+
def unknown_entityref(ref)
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|