ruby-web 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (190) hide show
  1. data/ChangeLog +474 -0
  2. data/INSTALL.txt +9 -0
  3. data/InstalledFiles +180 -0
  4. data/LICENSE.txt +74 -0
  5. data/Rakefile +529 -0
  6. data/TODO +65 -0
  7. data/doc/additional.xml +149 -0
  8. data/doc/core.xml +652 -0
  9. data/doc/credits/index.xml +52 -0
  10. data/doc/credits/php.contributors.xml +118 -0
  11. data/doc/credits/php.language-snippets.ent +622 -0
  12. data/doc/install/index.xml +136 -0
  13. data/doc/install/mac/index.xml +21 -0
  14. data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
  15. data/doc/install/unix/index.xml +46 -0
  16. data/doc/install/win/apache1.xml +166 -0
  17. data/doc/install/win/apache2.xml +141 -0
  18. data/doc/install/win/iis.xml +162 -0
  19. data/doc/install/win/index.xml +24 -0
  20. data/doc/install/win/installer.xml +31 -0
  21. data/doc/install/win/manual.xml +43 -0
  22. data/doc/manual.xml +69 -0
  23. data/doc/old/apache_cgi.txt +23 -0
  24. data/doc/old/fastcgi.txt +23 -0
  25. data/doc/old/mod_ruby.txt +21 -0
  26. data/doc/old/snippets.rdoc +183 -0
  27. data/doc/old/webrick.txt +23 -0
  28. data/doc/old/windows_cgi.txt +9 -0
  29. data/doc/tutorial.xml +14 -0
  30. data/doc/xsl/manual-multi.xsl +10 -0
  31. data/doc/xsl/manual-pdf.xsl +6 -0
  32. data/doc/xsl/manual-single.xsl +6 -0
  33. data/doc/xsl/manual.css +22 -0
  34. data/install.rb +1022 -0
  35. data/lib/formatter.rb +314 -0
  36. data/lib/html-parser.rb +429 -0
  37. data/lib/htmlrepair.rb +113 -0
  38. data/lib/htmlsplit.rb +842 -0
  39. data/lib/sgml-parser.rb +332 -0
  40. data/lib/web.rb +68 -0
  41. data/lib/web/assertinclude.rb +129 -0
  42. data/lib/web/config.rb +50 -0
  43. data/lib/web/connection.rb +1070 -0
  44. data/lib/web/convenience.rb +154 -0
  45. data/lib/web/formreader.rb +318 -0
  46. data/lib/web/htmlparser/html-parser.rb +429 -0
  47. data/lib/web/htmlparser/sgml-parser.rb +332 -0
  48. data/lib/web/htmltools/element.rb +296 -0
  49. data/lib/web/htmltools/stparser.rb +276 -0
  50. data/lib/web/htmltools/tags.rb +286 -0
  51. data/lib/web/htmltools/tree.rb +139 -0
  52. data/lib/web/htmltools/xmltree.rb +160 -0
  53. data/lib/web/htmltools/xpath.rb +71 -0
  54. data/lib/web/info.rb +63 -0
  55. data/lib/web/load.rb +210 -0
  56. data/lib/web/mime.rb +87 -0
  57. data/lib/web/phprb.rb +340 -0
  58. data/lib/web/resources/test/cookie.rb +33 -0
  59. data/lib/web/resources/test/counter.rb +20 -0
  60. data/lib/web/resources/test/multipart.rb +14 -0
  61. data/lib/web/resources/test/redirect.rb +8 -0
  62. data/lib/web/resources/test/stock.rb +33 -0
  63. data/lib/web/sapi/apache.rb +129 -0
  64. data/lib/web/sapi/fastcgi.rb +22 -0
  65. data/lib/web/sapi/install/apache.rb +180 -0
  66. data/lib/web/sapi/install/iis.rb +93 -0
  67. data/lib/web/sapi/install/macosx.rb +90 -0
  68. data/lib/web/sapi/webrick.rb +86 -0
  69. data/lib/web/session.rb +83 -0
  70. data/lib/web/shim/cgi.rb +129 -0
  71. data/lib/web/shim/rails.rb +175 -0
  72. data/lib/web/stringio.rb +78 -0
  73. data/lib/web/strscanparser.rb +24 -0
  74. data/lib/web/tagparser.rb +96 -0
  75. data/lib/web/testing.rb +666 -0
  76. data/lib/web/traceoutput.rb +75 -0
  77. data/lib/web/unit.rb +56 -0
  78. data/lib/web/upload.rb +59 -0
  79. data/lib/web/validate.rb +52 -0
  80. data/lib/web/wiki.rb +557 -0
  81. data/lib/web/wiki/linker.rb +72 -0
  82. data/lib/web/wiki/page.rb +201 -0
  83. data/lib/webunit.rb +27 -0
  84. data/lib/webunit/assert.rb +152 -0
  85. data/lib/webunit/converter.rb +154 -0
  86. data/lib/webunit/cookie.rb +118 -0
  87. data/lib/webunit/domwalker.rb +185 -0
  88. data/lib/webunit/exception.rb +14 -0
  89. data/lib/webunit/form.rb +116 -0
  90. data/lib/webunit/frame.rb +37 -0
  91. data/lib/webunit/htmlelem.rb +122 -0
  92. data/lib/webunit/image.rb +26 -0
  93. data/lib/webunit/jscript.rb +31 -0
  94. data/lib/webunit/link.rb +33 -0
  95. data/lib/webunit/params.rb +321 -0
  96. data/lib/webunit/parser.rb +229 -0
  97. data/lib/webunit/response.rb +464 -0
  98. data/lib/webunit/runtest.rb +41 -0
  99. data/lib/webunit/table.rb +148 -0
  100. data/lib/webunit/testcase.rb +45 -0
  101. data/lib/webunit/ui/cui/testrunner.rb +50 -0
  102. data/lib/webunit/utils.rb +68 -0
  103. data/lib/webunit/webunit.rb +28 -0
  104. data/test/dev/action.rb +83 -0
  105. data/test/dev/forms.rb +104 -0
  106. data/test/dev/forms2.rb +104 -0
  107. data/test/dev/parser.rb +17 -0
  108. data/test/dev/scripts/dump.rb +24 -0
  109. data/test/dev/scripts/makedist.rb +62 -0
  110. data/test/dev/scripts/uri.rb +41 -0
  111. data/test/dev/scripts/uri/common.rb +432 -0
  112. data/test/dev/scripts/uri/ftp.rb +149 -0
  113. data/test/dev/scripts/uri/generic.rb +1106 -0
  114. data/test/dev/scripts/uri/http.rb +76 -0
  115. data/test/dev/scripts/uri/https.rb +26 -0
  116. data/test/dev/scripts/uri/ldap.rb +238 -0
  117. data/test/dev/scripts/uri/mailto.rb +260 -0
  118. data/test/dev/scripts/urireg.rb +174 -0
  119. data/test/dev/simpledispatcher.rb +156 -0
  120. data/test/dev/test.action.rb +146 -0
  121. data/test/dev/test.formreader.rb +463 -0
  122. data/test/dev/test.simpledispatcher.rb +186 -0
  123. data/test/dev/webunit/conv/digit-0.rb +21 -0
  124. data/test/dev/webunit/conv/digit-1.rb +17 -0
  125. data/test/dev/webunit/conv/digit.rb +23 -0
  126. data/test/dev/webunit/conv/test_digit-0.rb +16 -0
  127. data/test/dev/webunit/conv/test_digit-1.rb +19 -0
  128. data/test/dev/webunit/conv/test_digit.rb +26 -0
  129. data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
  130. data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
  131. data/test/dev/webunit/conv/test_digit_view.rb +134 -0
  132. data/test/installation/htdocs/cgi_test.rb +296 -0
  133. data/test/installation/htdocs/test_install.rb +4 -0
  134. data/test/installation/runwebtest.rb +5 -0
  135. data/test/installation/test_cookie.rb +128 -0
  136. data/test/installation/test_form.rb +47 -0
  137. data/test/installation/test_multipart.rb +51 -0
  138. data/test/installation/test_request.rb +24 -0
  139. data/test/installation/test_response.rb +35 -0
  140. data/test/unit/htdocs/cookie.rb +32 -0
  141. data/test/unit/htdocs/multipart.rb +28 -0
  142. data/test/unit/htdocs/redirect.rb +12 -0
  143. data/test/unit/htdocs/simple.rb +13 -0
  144. data/test/unit/htdocs/stock.rb +33 -0
  145. data/test/unit/test_assert.rb +162 -0
  146. data/test/unit/test_cookie.rb +114 -0
  147. data/test/unit/test_domwalker.rb +77 -0
  148. data/test/unit/test_form.rb +42 -0
  149. data/test/unit/test_frame.rb +40 -0
  150. data/test/unit/test_htmlelem.rb +74 -0
  151. data/test/unit/test_image.rb +45 -0
  152. data/test/unit/test_jscript.rb +57 -0
  153. data/test/unit/test_link.rb +85 -0
  154. data/test/unit/test_multipart.rb +51 -0
  155. data/test/unit/test_params.rb +210 -0
  156. data/test/unit/test_parser.rb +53 -0
  157. data/test/unit/test_response.rb +150 -0
  158. data/test/unit/test_table.rb +70 -0
  159. data/test/unit/test_utils.rb +106 -0
  160. data/test/unit/test_webunit.rb +28 -0
  161. data/test/web/mod_ruby_stub.rb +39 -0
  162. data/test/web/test.assertinclude.rb +109 -0
  163. data/test/web/test.buffer.rb +182 -0
  164. data/test/web/test.code.loader.rb +78 -0
  165. data/test/web/test.config.rb +31 -0
  166. data/test/web/test.error.handling.rb +91 -0
  167. data/test/web/test.formreader-2.0.rb +352 -0
  168. data/test/web/test.load.rb +125 -0
  169. data/test/web/test.mime-type.rb +23 -0
  170. data/test/web/test.narf.cgi.rb +106 -0
  171. data/test/web/test.phprb.rb +239 -0
  172. data/test/web/test.request.rb +368 -0
  173. data/test/web/test.response.rb +637 -0
  174. data/test/web/test.ruby-web.rb +10 -0
  175. data/test/web/test.session.rb +50 -0
  176. data/test/web/test.shim.cgi.rb +96 -0
  177. data/test/web/test.tagparser.rb +65 -0
  178. data/test/web/test.template2.rb +297 -0
  179. data/test/web/test.testing2.rb +318 -0
  180. data/test/web/test.upload.rb +45 -0
  181. data/test/web/test.validate.rb +46 -0
  182. data/test/web/test.web.test.rb +495 -0
  183. data/test/wiki/test.history.rb +297 -0
  184. data/test/wiki/test.illustration_page.rb +287 -0
  185. data/test/wiki/test.linker.rb +197 -0
  186. data/test/wiki/test.tarpit.rb +56 -0
  187. data/test/wiki/test.wiki.rb +300 -0
  188. data/test/wikitestroot/admin.rb +7 -0
  189. data/test/wikitestroot/wiki.rb +6 -0
  190. metadata +234 -0
@@ -0,0 +1,429 @@
1
+ # HTML parser
2
+
3
+ require 'sgml-parser'
4
+
5
+ class HTMLParser < SGMLParser #:nodoc: all
6
+
7
+ def initialize(formatter, verbose=nil)
8
+ super(verbose)
9
+ @formatter = formatter
10
+ @savedata = nil
11
+ @isindex = 0
12
+ @title = nil
13
+ @base = nil
14
+ @anchor = nil
15
+ @anchorlist = []
16
+ @nofill = 0
17
+ @list_stack = []
18
+ end
19
+
20
+
21
+ def handle_data(data)
22
+ if @savedata
23
+ @savedata = @savedata + data
24
+ else
25
+ if @nofill != 0
26
+ @formatter.add_literal_data(data)
27
+ else
28
+ @formatter.add_flowing_data(data)
29
+ end
30
+ end
31
+ end
32
+
33
+ def save_bgn
34
+ @savedata = ''
35
+ end
36
+
37
+ def save_end
38
+ data = @savedata
39
+ @savedata = nil
40
+ data = '' if data == nil
41
+ if @nofill == 0
42
+ data = data.split.join(" ")
43
+ end
44
+ return data
45
+ end
46
+
47
+ def anchor_bgn(href, name, type)
48
+ @anchor = href
49
+ if @anchor
50
+ @anchorlist << href
51
+ end
52
+ end
53
+
54
+ def anchor_end
55
+ if @anchor
56
+ #handle_data(format "[%d]", @anchorlist.length)
57
+ @anchor = nil
58
+ end
59
+ end
60
+
61
+ def handle_image(src, alt, *args)
62
+ handle_data(alt)
63
+ end
64
+
65
+ def start_html(attrs) end
66
+ def end_html() end
67
+
68
+ def start_head(attrs) end
69
+ def end_head() end
70
+
71
+ def start_body(attrs) end
72
+ def end_body() end
73
+
74
+ def start_title(attrs)
75
+ save_bgn
76
+ end
77
+
78
+ def end_title
79
+ @title = save_end
80
+ end
81
+
82
+ def do_base(attrs)
83
+ for a, v in attrs
84
+ if a == 'href'
85
+ @base = v
86
+ end
87
+ end
88
+ end
89
+
90
+ def do_isindex(attrs)
91
+ @isindex = 1
92
+ end
93
+
94
+ def do_link(attrs)
95
+ end
96
+
97
+ def do_meta(attrs)
98
+ end
99
+
100
+ def do_nextid(attrs) # Deprecated
101
+ end
102
+
103
+
104
+ def start_h1(attrs)
105
+ @formatter.end_paragraph(1)
106
+ @formatter.push_font('h1', 0, 1, 0)
107
+ end
108
+
109
+ def end_h1
110
+ @formatter.end_paragraph(1)
111
+ @formatter.pop_font()
112
+ end
113
+
114
+ def start_h2(attrs)
115
+ @formatter.end_paragraph(1)
116
+ @formatter.push_font('h2', 0, 1, 0)
117
+ end
118
+
119
+ def end_h2
120
+ @formatter.end_paragraph(1)
121
+ @formatter.pop_font()
122
+ end
123
+
124
+ def start_h3(attrs)
125
+ @formatter.end_paragraph(1)
126
+ @formatter.push_font('h3', 0, 1, 0)
127
+ end
128
+
129
+ def end_h3
130
+ @formatter.end_paragraph(1)
131
+ @formatter.pop_font()
132
+ end
133
+
134
+ def start_h4(attrs)
135
+ @formatter.end_paragraph(1)
136
+ @formatter.push_font('h4', 0, 1, 0)
137
+ end
138
+
139
+ def end_h4
140
+ @formatter.end_paragraph(1)
141
+ @formatter.pop_font()
142
+ end
143
+
144
+ def start_h5(attrs)
145
+ @formatter.end_paragraph(1)
146
+ @formatter.push_font('h5', 0, 1, 0)
147
+ end
148
+
149
+ def end_h5
150
+ @formatter.end_paragraph(1)
151
+ @formatter.pop_font()
152
+ end
153
+
154
+ def start_h6(attrs)
155
+ @formatter.end_paragraph(1)
156
+ @formatter.push_font('h6', 0, 1, 0)
157
+ end
158
+
159
+ def end_h6
160
+ @formatter.end_paragraph(1)
161
+ @formatter.pop_font()
162
+ end
163
+
164
+ def do_p(attrs)
165
+ @formatter.end_paragraph(1)
166
+ end
167
+
168
+ def start_pre(attrs)
169
+ @formatter.end_paragraph(1)
170
+ @formatter.push_font(nil, nil, nil, 1)
171
+ @nofill = @nofill + 1
172
+ end
173
+
174
+ def end_pre
175
+ @formatter.end_paragraph(1)
176
+ @formatter.pop_font()
177
+ @nofill = @nofill - 1
178
+ if @nofill < 0 then @nofill = 0 end
179
+ end
180
+
181
+ def start_xmp(attrs)
182
+ start_pre(attrs)
183
+ setliteral('xmp') # Tell SGML parser
184
+ end
185
+
186
+ def end_xmp
187
+ end_pre
188
+ end
189
+
190
+ def start_listing(attrs)
191
+ start_pre(attrs)
192
+ setliteral('listing') # Tell SGML parser
193
+ end
194
+
195
+ def end_listing
196
+ end_pre
197
+ end
198
+
199
+ def start_address(attrs)
200
+ @formatter.end_paragraph(0)
201
+ @formatter.push_font(nil, 1, nil, nil)
202
+ end
203
+
204
+ def end_address
205
+ @formatter.end_paragraph(0)
206
+ @formatter.pop_font()
207
+ end
208
+
209
+ def start_blockquote(attrs)
210
+ @formatter.end_paragraph(1)
211
+ @formatter.push_margin('blockquote')
212
+ end
213
+
214
+ def end_blockquote
215
+ @formatter.end_paragraph(1)
216
+ @formatter.pop_margin()
217
+ end
218
+
219
+ def start_ul(attrs)
220
+ @formatter.end_paragraph(0)
221
+ @formatter.push_margin('ul')
222
+ @list_stack << ['ul', '*', 0]
223
+ end
224
+
225
+ def end_ul
226
+ if @list_stack
227
+ @list_stack.pop
228
+ end
229
+ @formatter.end_paragraph(0)
230
+ @formatter.pop_margin
231
+ end
232
+
233
+ def do_li(attrs)
234
+ @formatter.end_paragraph(0)
235
+ if @list_stack && @list_stack.size > 0
236
+ dummy, label, counter = top = @list_stack[-1]
237
+ top[2] = counter = counter+1
238
+ else
239
+ label, counter = '*', 0
240
+ end
241
+ @formatter.add_label_data(label, counter)
242
+ end
243
+
244
+ def start_ol(attrs)
245
+ @formatter.end_paragraph(0)
246
+ @formatter.push_margin('ol')
247
+ label = '1.'
248
+ for a, v in attrs
249
+ if a == 'type'
250
+ if v.length == 1
251
+ v = v + '.'
252
+ label = v
253
+ end
254
+ end
255
+ end
256
+ @list_stack << ['ol', label, 0]
257
+ end
258
+
259
+ def end_ol
260
+ if @list_stack
261
+ @list_stack.pop
262
+ end
263
+ @formatter.end_paragraph(0)
264
+ @formatter.pop_margin
265
+ end
266
+
267
+ def start_menu(attrs)
268
+ start_ul(attrs)
269
+ end
270
+
271
+ def end_menu
272
+ end_ul
273
+ end
274
+
275
+ def start_dir(attrs)
276
+ start_ul(attrs)
277
+ end
278
+
279
+ def end_dir
280
+ end_ul
281
+ end
282
+
283
+ def start_dl(attrs)
284
+ @formatter.end_paragraph(1)
285
+ @list_stack << ['dl', '', 0]
286
+ end
287
+
288
+ def end_dl
289
+ ddpop(1)
290
+ if @list_stack.length > 0
291
+ @list_stack.pop
292
+ end
293
+ end
294
+
295
+ def do_dt(attrs)
296
+ ddpop
297
+ end
298
+
299
+ def do_dd(attrs)
300
+ ddpop
301
+ @formatter.push_margin('dd')
302
+ @list_stack << ['dd', '', 0]
303
+ end
304
+
305
+ def ddpop(bl=0)
306
+ @formatter.end_paragraph(bl)
307
+ if @list_stack.length > 0
308
+ if @list_stack[-1][0] == 'dd'
309
+ @list_stack.pop
310
+ @formatter.pop_margin
311
+ end
312
+ end
313
+ end
314
+
315
+ def start_cite(attrs) start_i(attrs) end
316
+ def end_cite() end_i end
317
+
318
+ def start_code(attrs) start_tt(attrs) end
319
+ def end_code() end_tt end
320
+
321
+ def start_em(attrs) start_i(attrs) end
322
+ def end_em() end_i end
323
+
324
+ def start_kbd(attrs) start_tt(attrs) end
325
+ def end_kbd() end_tt end
326
+
327
+ def start_samp(attrs) start_tt(attrs) end
328
+ def end_samp() end_tt end
329
+
330
+ def start_strong(attrs) start_b(attrs) end
331
+ def end_strong() end_b end
332
+
333
+ def start_var(attrs) start_i(attrs) end
334
+ def end_var() end_i end
335
+
336
+ def start_i(attrs)
337
+ @formatter.push_font(nil, 1, nil, nil)
338
+ end
339
+ def end_i
340
+ @formatter.pop_font
341
+ end
342
+
343
+ def start_b(attrs)
344
+ @formatter.push_font(nil, nil, 1, nil)
345
+ end
346
+ def end_b
347
+ @formatter.pop_font
348
+ end
349
+
350
+ def start_tt(attrs)
351
+ @formatter.push_font(nil, nil, nil, 1)
352
+ end
353
+ def end_tt
354
+ @formatter.pop_font
355
+ end
356
+
357
+ def start_a(attrs)
358
+ href = nil
359
+ name = nil
360
+ type = nil
361
+ for attrname, value in attrs
362
+ value = value.strip
363
+ if attrname == 'href'
364
+ href = value
365
+ end
366
+ if attrname == 'name'
367
+ name = value
368
+ end
369
+ if attrname == 'type'
370
+ type = value.downcase
371
+ end
372
+ end
373
+ anchor_bgn(href, name, type)
374
+ end
375
+
376
+ def end_a
377
+ anchor_end
378
+ end
379
+
380
+ def do_br(attrs)
381
+ @formatter.add_line_break
382
+ end
383
+
384
+ def do_hr(attrs)
385
+ @formatter.add_hor_rule
386
+ end
387
+
388
+ def do_img(attrs)
389
+ align = nil
390
+ alt = '(image)'
391
+ ismap = nil
392
+ src = nil
393
+ width = 0
394
+ height = 0
395
+ for attrname, value in attrs
396
+ if attrname == 'align'
397
+ align = value
398
+ end
399
+ if attrname == 'alt'
400
+ alt = value
401
+ end
402
+ if attrname == 'ismap'
403
+ ismap = value
404
+ end
405
+ if attrname == 'src'
406
+ src = value
407
+ end
408
+ if attrname == 'width'
409
+ width = Integer(value)
410
+ end
411
+ if attrname == 'height'
412
+ height = Integer(value)
413
+ end
414
+ end
415
+ handle_image(src, alt, ismap, align, width, height)
416
+ end
417
+
418
+ def do_plaintext(attrs)
419
+ start_pre(attrs)
420
+ setnomoretags # Tell SGML parser
421
+ end
422
+
423
+ def unknown_starttag(tag, attrs)
424
+ end
425
+
426
+ def unknown_endtag(tag)
427
+ end
428
+
429
+ end
@@ -0,0 +1,332 @@
1
+ # A parser for SGML, using the derived class as static DTD.
2
+
3
+ class SGMLParser #:nodoc: all
4
+
5
+ # Regular expressions used for parsing:
6
+ Interesting = /[&<]/
7
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
8
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
9
+ '![^<>]*)?')
10
+
11
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
12
+ Charref = /&#([0-9]+)[^0-9]/
13
+
14
+ Starttagopen = /<[>a-zA-Z]/
15
+ Endtagopen = /<\/[<>a-zA-Z]/
16
+ Endbracket = /[<>]/
17
+ Special = /<![^<>]*>/
18
+ Commentopen = /<!--/
19
+ Commentclose = /--[ \t\n]*>/
20
+ Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
21
+ Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
22
+ '(\s*=\s*' +
23
+ "('[^']*'" +
24
+ '|"[^"]*"' +
25
+ '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
26
+
27
+ Entitydefs =
28
+ {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
29
+
30
+ def initialize(verbose=false)
31
+ @verbose = verbose
32
+ reset
33
+ end
34
+
35
+ def reset
36
+ @rawdata = ''
37
+ @stack = []
38
+ @lasttag = '???'
39
+ @nomoretags = false
40
+ @literal = false
41
+ end
42
+
43
+ def has_context(gi)
44
+ @stack.include? gi
45
+ end
46
+
47
+ def setnomoretags
48
+ @nomoretags = true
49
+ @literal = true
50
+ end
51
+
52
+ def setliteral(*args)
53
+ @literal = true
54
+ end
55
+
56
+ def feed(data)
57
+ @rawdata << data
58
+ goahead(false)
59
+ end
60
+
61
+ def close
62
+ goahead(true)
63
+ end
64
+
65
+ def goahead(_end)
66
+ rawdata = @rawdata
67
+ i = 0
68
+ n = rawdata.length
69
+ while i < n
70
+ if @nomoretags
71
+ handle_data(rawdata[i..(n-1)])
72
+ i = n
73
+ break
74
+ end
75
+ j = rawdata.index(Interesting, i)
76
+ j = n unless j
77
+ if i < j
78
+ handle_data(rawdata[i..(j-1)])
79
+ end
80
+ i = j
81
+ break if (i == n)
82
+ if rawdata[i] == ?< #
83
+ if rawdata.index(Starttagopen, i) == i
84
+ if @literal
85
+ handle_data(rawdata[i, 1])
86
+ i += 1
87
+ next
88
+ end
89
+ k = parse_starttag(i)
90
+ break unless k
91
+ i = k
92
+ next
93
+ end
94
+ if rawdata.index(Endtagopen, i) == i
95
+ k = parse_endtag(i)
96
+ break unless k
97
+ i = k
98
+ @literal = false
99
+ next
100
+ end
101
+ if rawdata.index(Commentopen, i) == i
102
+ if @literal
103
+ handle_data(rawdata[i,1])
104
+ i += 1
105
+ next
106
+ end
107
+ k = parse_comment(i)
108
+ break unless k
109
+ i += k
110
+ next
111
+ end
112
+ if rawdata.index(Special, i) == i
113
+ if @literal
114
+ handle_data(rawdata[i, 1])
115
+ i += 1
116
+ next
117
+ end
118
+ k = parse_special(i)
119
+ break unless k
120
+ i += k
121
+ next
122
+ end
123
+ elsif rawdata[i] == ?& #
124
+ if rawdata.index(Charref, i) == i
125
+ i += $&.length
126
+ handle_charref($1)
127
+ i -= 1 unless rawdata[i-1] == ?;
128
+ next
129
+ end
130
+ if rawdata.index(Entityref, i) == i
131
+ i += $&.length
132
+ handle_entityref($1)
133
+ i -= 1 unless rawdata[i-1] == ?;
134
+ next
135
+ end
136
+ else
137
+ raise RuntimeError, 'neither < nor & ??'
138
+ end
139
+ # We get here only if incomplete matches but
140
+ # nothing else
141
+ match = rawdata.index(Incomplete, i)
142
+ unless match == i
143
+ handle_data(rawdata[i, 1])
144
+ i += 1
145
+ next
146
+ end
147
+ j = match + $&.length
148
+ break if j == n # Really incomplete
149
+ handle_data(rawdata[i..(j-1)])
150
+ i = j
151
+ end
152
+ # end while
153
+ if _end and i < n
154
+ handle_data(@rawdata[i..(n-1)])
155
+ i = n
156
+ end
157
+ @rawdata = rawdata[i..-1]
158
+ end
159
+
160
+ def parse_comment(i)
161
+ rawdata = @rawdata
162
+ if rawdata[i, 4] != '<!--'
163
+ raise RuntimeError, 'unexpected call to handle_comment'
164
+ end
165
+ match = rawdata.index(Commentclose, i)
166
+ return nil unless match
167
+ matched_length = $&.length
168
+ j = match
169
+ handle_comment(rawdata[i+4..(j-1)])
170
+ j = match + matched_length
171
+ return j-i
172
+ end
173
+
174
+ def parse_starttag(i)
175
+ rawdata = @rawdata
176
+ j = rawdata.index(Endbracket, i + 1)
177
+ return nil unless j
178
+ attrs = []
179
+ if rawdata[i+1] == ?> #
180
+ # SGML shorthand: <> == <last open tag seen>
181
+ k = j
182
+ tag = @lasttag
183
+ else
184
+ match = rawdata.index(Tagfind, i + 1)
185
+ unless match
186
+ raise RuntimeError, 'unexpected call to parse_starttag'
187
+ end
188
+ k = i + 1 + ($&.length)
189
+ tag = $&.downcase
190
+ @lasttag = tag
191
+ end
192
+ while k < j
193
+ break unless rawdata.index(Attrfind, k)
194
+ matched_length = $&.length
195
+ attrname, rest, attrvalue = $1, $2, $3
196
+ if not rest
197
+ attrvalue = '' # was: = attrname
198
+ elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
199
+ (attrvalue[0] == ?" && attrvalue[-1] == ?")
200
+ attrvalue = attrvalue[1..-2]
201
+ end
202
+ attrs << [attrname.downcase, attrvalue]
203
+ k += matched_length
204
+ end
205
+ if rawdata[j] == ?> #
206
+ j += 1
207
+ end
208
+ finish_starttag(tag, attrs)
209
+ return j
210
+ end
211
+
212
+ def parse_endtag(i)
213
+ rawdata = @rawdata
214
+ j = rawdata.index(Endbracket, i + 1)
215
+ return nil unless j
216
+ tag = (rawdata[i+2..j-1].strip).downcase
217
+ if rawdata[j] == ?> #
218
+ j += 1
219
+ end
220
+ finish_endtag(tag)
221
+ return j
222
+ end
223
+
224
+ def finish_starttag(tag, attrs)
225
+ method = 'start_' + tag
226
+ if self.respond_to?(method)
227
+ @stack << tag
228
+ handle_starttag(tag, method, attrs)
229
+ return 1
230
+ else
231
+ method = 'do_' + tag
232
+ if self.respond_to?(method)
233
+ handle_starttag(tag, method, attrs)
234
+ return 0
235
+ else
236
+ unknown_starttag(tag, attrs)
237
+ return -1
238
+ end
239
+ end
240
+ end
241
+
242
+ def finish_endtag(tag)
243
+ if tag == ''
244
+ found = @stack.length - 1
245
+ if found < 0
246
+ unknown_endtag(tag)
247
+ return
248
+ end
249
+ else
250
+ unless @stack.include? tag
251
+ method = 'end_' + tag
252
+ unless self.respond_to?(method)
253
+ unknown_endtag(tag)
254
+ end
255
+ return
256
+ end
257
+ found = @stack.index(tag) #or @stack.length
258
+ end
259
+ while @stack.length > found
260
+ tag = @stack[-1]
261
+ method = 'end_' + tag
262
+ if respond_to?(method)
263
+ handle_endtag(tag, method)
264
+ else
265
+ unknown_endtag(tag)
266
+ end
267
+ @stack.pop
268
+ end
269
+ end
270
+
271
+ def parse_special(i)
272
+ rawdata = @rawdata
273
+ match = rawdata.index(Endbracket, i+1)
274
+ return nil unless match
275
+ matched_length = $&.length
276
+ handle_special(rawdata[i+1..(match-1)])
277
+ return match - i + matched_length
278
+ end
279
+
280
+ def handle_starttag(tag, method, attrs)
281
+ self.send(method, attrs)
282
+ end
283
+
284
+ def handle_endtag(tag, method)
285
+ self.send(method)
286
+ end
287
+
288
+ def report_unbalanced(tag)
289
+ if @verbose
290
+ print '*** Unbalanced </' + tag + '>', "\n"
291
+ print '*** Stack:', self.stack, "\n"
292
+ end
293
+ end
294
+
295
+ def handle_charref(name)
296
+ n = Integer(name)
297
+ if !(0 <= n && n <= 255)
298
+ unknown_charref(name)
299
+ return
300
+ end
301
+ handle_data(n.chr)
302
+ end
303
+
304
+ def handle_entityref(name)
305
+ table = Entitydefs
306
+ if table.include?(name)
307
+ handle_data(table[name])
308
+ else
309
+ unknown_entityref(name)
310
+ return
311
+ end
312
+ end
313
+
314
+ def handle_data(data)
315
+ end
316
+
317
+ def handle_comment(data)
318
+ end
319
+
320
+ def handle_special(data)
321
+ end
322
+
323
+ def unknown_starttag(tag, attrs)
324
+ end
325
+ def unknown_endtag(tag)
326
+ end
327
+ def unknown_charref(ref)
328
+ end
329
+ def unknown_entityref(ref)
330
+ end
331
+
332
+ end