ruby-web 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. data/ChangeLog +474 -0
  2. data/INSTALL.txt +9 -0
  3. data/InstalledFiles +180 -0
  4. data/LICENSE.txt +74 -0
  5. data/Rakefile +529 -0
  6. data/TODO +65 -0
  7. data/doc/additional.xml +149 -0
  8. data/doc/core.xml +652 -0
  9. data/doc/credits/index.xml +52 -0
  10. data/doc/credits/php.contributors.xml +118 -0
  11. data/doc/credits/php.language-snippets.ent +622 -0
  12. data/doc/install/index.xml +136 -0
  13. data/doc/install/mac/index.xml +21 -0
  14. data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
  15. data/doc/install/unix/index.xml +46 -0
  16. data/doc/install/win/apache1.xml +166 -0
  17. data/doc/install/win/apache2.xml +141 -0
  18. data/doc/install/win/iis.xml +162 -0
  19. data/doc/install/win/index.xml +24 -0
  20. data/doc/install/win/installer.xml +31 -0
  21. data/doc/install/win/manual.xml +43 -0
  22. data/doc/manual.xml +69 -0
  23. data/doc/old/apache_cgi.txt +23 -0
  24. data/doc/old/fastcgi.txt +23 -0
  25. data/doc/old/mod_ruby.txt +21 -0
  26. data/doc/old/snippets.rdoc +183 -0
  27. data/doc/old/webrick.txt +23 -0
  28. data/doc/old/windows_cgi.txt +9 -0
  29. data/doc/tutorial.xml +14 -0
  30. data/doc/xsl/manual-multi.xsl +10 -0
  31. data/doc/xsl/manual-pdf.xsl +6 -0
  32. data/doc/xsl/manual-single.xsl +6 -0
  33. data/doc/xsl/manual.css +22 -0
  34. data/install.rb +1022 -0
  35. data/lib/formatter.rb +314 -0
  36. data/lib/html-parser.rb +429 -0
  37. data/lib/htmlrepair.rb +113 -0
  38. data/lib/htmlsplit.rb +842 -0
  39. data/lib/sgml-parser.rb +332 -0
  40. data/lib/web.rb +68 -0
  41. data/lib/web/assertinclude.rb +129 -0
  42. data/lib/web/config.rb +50 -0
  43. data/lib/web/connection.rb +1070 -0
  44. data/lib/web/convenience.rb +154 -0
  45. data/lib/web/formreader.rb +318 -0
  46. data/lib/web/htmlparser/html-parser.rb +429 -0
  47. data/lib/web/htmlparser/sgml-parser.rb +332 -0
  48. data/lib/web/htmltools/element.rb +296 -0
  49. data/lib/web/htmltools/stparser.rb +276 -0
  50. data/lib/web/htmltools/tags.rb +286 -0
  51. data/lib/web/htmltools/tree.rb +139 -0
  52. data/lib/web/htmltools/xmltree.rb +160 -0
  53. data/lib/web/htmltools/xpath.rb +71 -0
  54. data/lib/web/info.rb +63 -0
  55. data/lib/web/load.rb +210 -0
  56. data/lib/web/mime.rb +87 -0
  57. data/lib/web/phprb.rb +340 -0
  58. data/lib/web/resources/test/cookie.rb +33 -0
  59. data/lib/web/resources/test/counter.rb +20 -0
  60. data/lib/web/resources/test/multipart.rb +14 -0
  61. data/lib/web/resources/test/redirect.rb +8 -0
  62. data/lib/web/resources/test/stock.rb +33 -0
  63. data/lib/web/sapi/apache.rb +129 -0
  64. data/lib/web/sapi/fastcgi.rb +22 -0
  65. data/lib/web/sapi/install/apache.rb +180 -0
  66. data/lib/web/sapi/install/iis.rb +93 -0
  67. data/lib/web/sapi/install/macosx.rb +90 -0
  68. data/lib/web/sapi/webrick.rb +86 -0
  69. data/lib/web/session.rb +83 -0
  70. data/lib/web/shim/cgi.rb +129 -0
  71. data/lib/web/shim/rails.rb +175 -0
  72. data/lib/web/stringio.rb +78 -0
  73. data/lib/web/strscanparser.rb +24 -0
  74. data/lib/web/tagparser.rb +96 -0
  75. data/lib/web/testing.rb +666 -0
  76. data/lib/web/traceoutput.rb +75 -0
  77. data/lib/web/unit.rb +56 -0
  78. data/lib/web/upload.rb +59 -0
  79. data/lib/web/validate.rb +52 -0
  80. data/lib/web/wiki.rb +557 -0
  81. data/lib/web/wiki/linker.rb +72 -0
  82. data/lib/web/wiki/page.rb +201 -0
  83. data/lib/webunit.rb +27 -0
  84. data/lib/webunit/assert.rb +152 -0
  85. data/lib/webunit/converter.rb +154 -0
  86. data/lib/webunit/cookie.rb +118 -0
  87. data/lib/webunit/domwalker.rb +185 -0
  88. data/lib/webunit/exception.rb +14 -0
  89. data/lib/webunit/form.rb +116 -0
  90. data/lib/webunit/frame.rb +37 -0
  91. data/lib/webunit/htmlelem.rb +122 -0
  92. data/lib/webunit/image.rb +26 -0
  93. data/lib/webunit/jscript.rb +31 -0
  94. data/lib/webunit/link.rb +33 -0
  95. data/lib/webunit/params.rb +321 -0
  96. data/lib/webunit/parser.rb +229 -0
  97. data/lib/webunit/response.rb +464 -0
  98. data/lib/webunit/runtest.rb +41 -0
  99. data/lib/webunit/table.rb +148 -0
  100. data/lib/webunit/testcase.rb +45 -0
  101. data/lib/webunit/ui/cui/testrunner.rb +50 -0
  102. data/lib/webunit/utils.rb +68 -0
  103. data/lib/webunit/webunit.rb +28 -0
  104. data/test/dev/action.rb +83 -0
  105. data/test/dev/forms.rb +104 -0
  106. data/test/dev/forms2.rb +104 -0
  107. data/test/dev/parser.rb +17 -0
  108. data/test/dev/scripts/dump.rb +24 -0
  109. data/test/dev/scripts/makedist.rb +62 -0
  110. data/test/dev/scripts/uri.rb +41 -0
  111. data/test/dev/scripts/uri/common.rb +432 -0
  112. data/test/dev/scripts/uri/ftp.rb +149 -0
  113. data/test/dev/scripts/uri/generic.rb +1106 -0
  114. data/test/dev/scripts/uri/http.rb +76 -0
  115. data/test/dev/scripts/uri/https.rb +26 -0
  116. data/test/dev/scripts/uri/ldap.rb +238 -0
  117. data/test/dev/scripts/uri/mailto.rb +260 -0
  118. data/test/dev/scripts/urireg.rb +174 -0
  119. data/test/dev/simpledispatcher.rb +156 -0
  120. data/test/dev/test.action.rb +146 -0
  121. data/test/dev/test.formreader.rb +463 -0
  122. data/test/dev/test.simpledispatcher.rb +186 -0
  123. data/test/dev/webunit/conv/digit-0.rb +21 -0
  124. data/test/dev/webunit/conv/digit-1.rb +17 -0
  125. data/test/dev/webunit/conv/digit.rb +23 -0
  126. data/test/dev/webunit/conv/test_digit-0.rb +16 -0
  127. data/test/dev/webunit/conv/test_digit-1.rb +19 -0
  128. data/test/dev/webunit/conv/test_digit.rb +26 -0
  129. data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
  130. data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
  131. data/test/dev/webunit/conv/test_digit_view.rb +134 -0
  132. data/test/installation/htdocs/cgi_test.rb +296 -0
  133. data/test/installation/htdocs/test_install.rb +4 -0
  134. data/test/installation/runwebtest.rb +5 -0
  135. data/test/installation/test_cookie.rb +128 -0
  136. data/test/installation/test_form.rb +47 -0
  137. data/test/installation/test_multipart.rb +51 -0
  138. data/test/installation/test_request.rb +24 -0
  139. data/test/installation/test_response.rb +35 -0
  140. data/test/unit/htdocs/cookie.rb +32 -0
  141. data/test/unit/htdocs/multipart.rb +28 -0
  142. data/test/unit/htdocs/redirect.rb +12 -0
  143. data/test/unit/htdocs/simple.rb +13 -0
  144. data/test/unit/htdocs/stock.rb +33 -0
  145. data/test/unit/test_assert.rb +162 -0
  146. data/test/unit/test_cookie.rb +114 -0
  147. data/test/unit/test_domwalker.rb +77 -0
  148. data/test/unit/test_form.rb +42 -0
  149. data/test/unit/test_frame.rb +40 -0
  150. data/test/unit/test_htmlelem.rb +74 -0
  151. data/test/unit/test_image.rb +45 -0
  152. data/test/unit/test_jscript.rb +57 -0
  153. data/test/unit/test_link.rb +85 -0
  154. data/test/unit/test_multipart.rb +51 -0
  155. data/test/unit/test_params.rb +210 -0
  156. data/test/unit/test_parser.rb +53 -0
  157. data/test/unit/test_response.rb +150 -0
  158. data/test/unit/test_table.rb +70 -0
  159. data/test/unit/test_utils.rb +106 -0
  160. data/test/unit/test_webunit.rb +28 -0
  161. data/test/web/mod_ruby_stub.rb +39 -0
  162. data/test/web/test.assertinclude.rb +109 -0
  163. data/test/web/test.buffer.rb +182 -0
  164. data/test/web/test.code.loader.rb +78 -0
  165. data/test/web/test.config.rb +31 -0
  166. data/test/web/test.error.handling.rb +91 -0
  167. data/test/web/test.formreader-2.0.rb +352 -0
  168. data/test/web/test.load.rb +125 -0
  169. data/test/web/test.mime-type.rb +23 -0
  170. data/test/web/test.narf.cgi.rb +106 -0
  171. data/test/web/test.phprb.rb +239 -0
  172. data/test/web/test.request.rb +368 -0
  173. data/test/web/test.response.rb +637 -0
  174. data/test/web/test.ruby-web.rb +10 -0
  175. data/test/web/test.session.rb +50 -0
  176. data/test/web/test.shim.cgi.rb +96 -0
  177. data/test/web/test.tagparser.rb +65 -0
  178. data/test/web/test.template2.rb +297 -0
  179. data/test/web/test.testing2.rb +318 -0
  180. data/test/web/test.upload.rb +45 -0
  181. data/test/web/test.validate.rb +46 -0
  182. data/test/web/test.web.test.rb +495 -0
  183. data/test/wiki/test.history.rb +297 -0
  184. data/test/wiki/test.illustration_page.rb +287 -0
  185. data/test/wiki/test.linker.rb +197 -0
  186. data/test/wiki/test.tarpit.rb +56 -0
  187. data/test/wiki/test.wiki.rb +300 -0
  188. data/test/wikitestroot/admin.rb +7 -0
  189. data/test/wikitestroot/wiki.rb +6 -0
  190. metadata +234 -0
@@ -0,0 +1,429 @@
1
+ # HTML parser
2
+
3
+ require 'sgml-parser'
4
+
5
+ class HTMLParser < SGMLParser #:nodoc: all
6
+
7
+ def initialize(formatter, verbose=nil)
8
+ super(verbose)
9
+ @formatter = formatter
10
+ @savedata = nil
11
+ @isindex = 0
12
+ @title = nil
13
+ @base = nil
14
+ @anchor = nil
15
+ @anchorlist = []
16
+ @nofill = 0
17
+ @list_stack = []
18
+ end
19
+
20
+
21
+ def handle_data(data)
22
+ if @savedata
23
+ @savedata = @savedata + data
24
+ else
25
+ if @nofill != 0
26
+ @formatter.add_literal_data(data)
27
+ else
28
+ @formatter.add_flowing_data(data)
29
+ end
30
+ end
31
+ end
32
+
33
+ def save_bgn
34
+ @savedata = ''
35
+ end
36
+
37
+ def save_end
38
+ data = @savedata
39
+ @savedata = nil
40
+ data = '' if data == nil
41
+ if @nofill == 0
42
+ data = data.split.join(" ")
43
+ end
44
+ return data
45
+ end
46
+
47
+ def anchor_bgn(href, name, type)
48
+ @anchor = href
49
+ if @anchor
50
+ @anchorlist << href
51
+ end
52
+ end
53
+
54
+ def anchor_end
55
+ if @anchor
56
+ #handle_data(format "[%d]", @anchorlist.length)
57
+ @anchor = nil
58
+ end
59
+ end
60
+
61
+ def handle_image(src, alt, *args)
62
+ handle_data(alt)
63
+ end
64
+
65
+ def start_html(attrs) end
66
+ def end_html() end
67
+
68
+ def start_head(attrs) end
69
+ def end_head() end
70
+
71
+ def start_body(attrs) end
72
+ def end_body() end
73
+
74
+ def start_title(attrs)
75
+ save_bgn
76
+ end
77
+
78
+ def end_title
79
+ @title = save_end
80
+ end
81
+
82
+ def do_base(attrs)
83
+ for a, v in attrs
84
+ if a == 'href'
85
+ @base = v
86
+ end
87
+ end
88
+ end
89
+
90
+ def do_isindex(attrs)
91
+ @isindex = 1
92
+ end
93
+
94
+ def do_link(attrs)
95
+ end
96
+
97
+ def do_meta(attrs)
98
+ end
99
+
100
+ def do_nextid(attrs) # Deprecated
101
+ end
102
+
103
+
104
+ def start_h1(attrs)
105
+ @formatter.end_paragraph(1)
106
+ @formatter.push_font('h1', 0, 1, 0)
107
+ end
108
+
109
+ def end_h1
110
+ @formatter.end_paragraph(1)
111
+ @formatter.pop_font()
112
+ end
113
+
114
+ def start_h2(attrs)
115
+ @formatter.end_paragraph(1)
116
+ @formatter.push_font('h2', 0, 1, 0)
117
+ end
118
+
119
+ def end_h2
120
+ @formatter.end_paragraph(1)
121
+ @formatter.pop_font()
122
+ end
123
+
124
+ def start_h3(attrs)
125
+ @formatter.end_paragraph(1)
126
+ @formatter.push_font('h3', 0, 1, 0)
127
+ end
128
+
129
+ def end_h3
130
+ @formatter.end_paragraph(1)
131
+ @formatter.pop_font()
132
+ end
133
+
134
+ def start_h4(attrs)
135
+ @formatter.end_paragraph(1)
136
+ @formatter.push_font('h4', 0, 1, 0)
137
+ end
138
+
139
+ def end_h4
140
+ @formatter.end_paragraph(1)
141
+ @formatter.pop_font()
142
+ end
143
+
144
+ def start_h5(attrs)
145
+ @formatter.end_paragraph(1)
146
+ @formatter.push_font('h5', 0, 1, 0)
147
+ end
148
+
149
+ def end_h5
150
+ @formatter.end_paragraph(1)
151
+ @formatter.pop_font()
152
+ end
153
+
154
+ def start_h6(attrs)
155
+ @formatter.end_paragraph(1)
156
+ @formatter.push_font('h6', 0, 1, 0)
157
+ end
158
+
159
+ def end_h6
160
+ @formatter.end_paragraph(1)
161
+ @formatter.pop_font()
162
+ end
163
+
164
+ def do_p(attrs)
165
+ @formatter.end_paragraph(1)
166
+ end
167
+
168
+ def start_pre(attrs)
169
+ @formatter.end_paragraph(1)
170
+ @formatter.push_font(nil, nil, nil, 1)
171
+ @nofill = @nofill + 1
172
+ end
173
+
174
+ def end_pre
175
+ @formatter.end_paragraph(1)
176
+ @formatter.pop_font()
177
+ @nofill = @nofill - 1
178
+ if @nofill < 0 then @nofill = 0 end
179
+ end
180
+
181
+ def start_xmp(attrs)
182
+ start_pre(attrs)
183
+ setliteral('xmp') # Tell SGML parser
184
+ end
185
+
186
+ def end_xmp
187
+ end_pre
188
+ end
189
+
190
+ def start_listing(attrs)
191
+ start_pre(attrs)
192
+ setliteral('listing') # Tell SGML parser
193
+ end
194
+
195
+ def end_listing
196
+ end_pre
197
+ end
198
+
199
+ def start_address(attrs)
200
+ @formatter.end_paragraph(0)
201
+ @formatter.push_font(nil, 1, nil, nil)
202
+ end
203
+
204
+ def end_address
205
+ @formatter.end_paragraph(0)
206
+ @formatter.pop_font()
207
+ end
208
+
209
+ def start_blockquote(attrs)
210
+ @formatter.end_paragraph(1)
211
+ @formatter.push_margin('blockquote')
212
+ end
213
+
214
+ def end_blockquote
215
+ @formatter.end_paragraph(1)
216
+ @formatter.pop_margin()
217
+ end
218
+
219
+ def start_ul(attrs)
220
+ @formatter.end_paragraph(0)
221
+ @formatter.push_margin('ul')
222
+ @list_stack << ['ul', '*', 0]
223
+ end
224
+
225
+ def end_ul
226
+ if @list_stack
227
+ @list_stack.pop
228
+ end
229
+ @formatter.end_paragraph(0)
230
+ @formatter.pop_margin
231
+ end
232
+
233
+ def do_li(attrs)
234
+ @formatter.end_paragraph(0)
235
+ if @list_stack && @list_stack.size > 0
236
+ dummy, label, counter = top = @list_stack[-1]
237
+ top[2] = counter = counter+1
238
+ else
239
+ label, counter = '*', 0
240
+ end
241
+ @formatter.add_label_data(label, counter)
242
+ end
243
+
244
+ def start_ol(attrs)
245
+ @formatter.end_paragraph(0)
246
+ @formatter.push_margin('ol')
247
+ label = '1.'
248
+ for a, v in attrs
249
+ if a == 'type'
250
+ if v.length == 1
251
+ v = v + '.'
252
+ label = v
253
+ end
254
+ end
255
+ end
256
+ @list_stack << ['ol', label, 0]
257
+ end
258
+
259
+ def end_ol
260
+ if @list_stack
261
+ @list_stack.pop
262
+ end
263
+ @formatter.end_paragraph(0)
264
+ @formatter.pop_margin
265
+ end
266
+
267
+ def start_menu(attrs)
268
+ start_ul(attrs)
269
+ end
270
+
271
+ def end_menu
272
+ end_ul
273
+ end
274
+
275
+ def start_dir(attrs)
276
+ start_ul(attrs)
277
+ end
278
+
279
+ def end_dir
280
+ end_ul
281
+ end
282
+
283
+ def start_dl(attrs)
284
+ @formatter.end_paragraph(1)
285
+ @list_stack << ['dl', '', 0]
286
+ end
287
+
288
+ def end_dl
289
+ ddpop(1)
290
+ if @list_stack.length > 0
291
+ @list_stack.pop
292
+ end
293
+ end
294
+
295
+ def do_dt(attrs)
296
+ ddpop
297
+ end
298
+
299
+ def do_dd(attrs)
300
+ ddpop
301
+ @formatter.push_margin('dd')
302
+ @list_stack << ['dd', '', 0]
303
+ end
304
+
305
+ def ddpop(bl=0)
306
+ @formatter.end_paragraph(bl)
307
+ if @list_stack.length > 0
308
+ if @list_stack[-1][0] == 'dd'
309
+ @list_stack.pop
310
+ @formatter.pop_margin
311
+ end
312
+ end
313
+ end
314
+
315
+ def start_cite(attrs) start_i(attrs) end
316
+ def end_cite() end_i end
317
+
318
+ def start_code(attrs) start_tt(attrs) end
319
+ def end_code() end_tt end
320
+
321
+ def start_em(attrs) start_i(attrs) end
322
+ def end_em() end_i end
323
+
324
+ def start_kbd(attrs) start_tt(attrs) end
325
+ def end_kbd() end_tt end
326
+
327
+ def start_samp(attrs) start_tt(attrs) end
328
+ def end_samp() end_tt end
329
+
330
+ def start_strong(attrs) start_b(attrs) end
331
+ def end_strong() end_b end
332
+
333
+ def start_var(attrs) start_i(attrs) end
334
+ def end_var() end_i end
335
+
336
+ def start_i(attrs)
337
+ @formatter.push_font(nil, 1, nil, nil)
338
+ end
339
+ def end_i
340
+ @formatter.pop_font
341
+ end
342
+
343
+ def start_b(attrs)
344
+ @formatter.push_font(nil, nil, 1, nil)
345
+ end
346
+ def end_b
347
+ @formatter.pop_font
348
+ end
349
+
350
+ def start_tt(attrs)
351
+ @formatter.push_font(nil, nil, nil, 1)
352
+ end
353
+ def end_tt
354
+ @formatter.pop_font
355
+ end
356
+
357
+ def start_a(attrs)
358
+ href = nil
359
+ name = nil
360
+ type = nil
361
+ for attrname, value in attrs
362
+ value = value.strip
363
+ if attrname == 'href'
364
+ href = value
365
+ end
366
+ if attrname == 'name'
367
+ name = value
368
+ end
369
+ if attrname == 'type'
370
+ type = value.downcase
371
+ end
372
+ end
373
+ anchor_bgn(href, name, type)
374
+ end
375
+
376
+ def end_a
377
+ anchor_end
378
+ end
379
+
380
+ def do_br(attrs)
381
+ @formatter.add_line_break
382
+ end
383
+
384
+ def do_hr(attrs)
385
+ @formatter.add_hor_rule
386
+ end
387
+
388
+ def do_img(attrs)
389
+ align = nil
390
+ alt = '(image)'
391
+ ismap = nil
392
+ src = nil
393
+ width = 0
394
+ height = 0
395
+ for attrname, value in attrs
396
+ if attrname == 'align'
397
+ align = value
398
+ end
399
+ if attrname == 'alt'
400
+ alt = value
401
+ end
402
+ if attrname == 'ismap'
403
+ ismap = value
404
+ end
405
+ if attrname == 'src'
406
+ src = value
407
+ end
408
+ if attrname == 'width'
409
+ width = Integer(value)
410
+ end
411
+ if attrname == 'height'
412
+ height = Integer(value)
413
+ end
414
+ end
415
+ handle_image(src, alt, ismap, align, width, height)
416
+ end
417
+
418
+ def do_plaintext(attrs)
419
+ start_pre(attrs)
420
+ setnomoretags # Tell SGML parser
421
+ end
422
+
423
+ def unknown_starttag(tag, attrs)
424
+ end
425
+
426
+ def unknown_endtag(tag)
427
+ end
428
+
429
+ end
@@ -0,0 +1,332 @@
1
+ # A parser for SGML, using the derived class as static DTD.
2
+
3
+ class SGMLParser #:nodoc: all
4
+
5
+ # Regular expressions used for parsing:
6
+ Interesting = /[&<]/
7
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
8
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
9
+ '![^<>]*)?')
10
+
11
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
12
+ Charref = /&#([0-9]+)[^0-9]/
13
+
14
+ Starttagopen = /<[>a-zA-Z]/
15
+ Endtagopen = /<\/[<>a-zA-Z]/
16
+ Endbracket = /[<>]/
17
+ Special = /<![^<>]*>/
18
+ Commentopen = /<!--/
19
+ Commentclose = /--[ \t\n]*>/
20
+ Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
21
+ Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
22
+ '(\s*=\s*' +
23
+ "('[^']*'" +
24
+ '|"[^"]*"' +
25
+ '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
26
+
27
+ Entitydefs =
28
+ {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
29
+
30
+ def initialize(verbose=false)
31
+ @verbose = verbose
32
+ reset
33
+ end
34
+
35
+ def reset
36
+ @rawdata = ''
37
+ @stack = []
38
+ @lasttag = '???'
39
+ @nomoretags = false
40
+ @literal = false
41
+ end
42
+
43
+ def has_context(gi)
44
+ @stack.include? gi
45
+ end
46
+
47
+ def setnomoretags
48
+ @nomoretags = true
49
+ @literal = true
50
+ end
51
+
52
+ def setliteral(*args)
53
+ @literal = true
54
+ end
55
+
56
+ def feed(data)
57
+ @rawdata << data
58
+ goahead(false)
59
+ end
60
+
61
+ def close
62
+ goahead(true)
63
+ end
64
+
65
+ def goahead(_end)
66
+ rawdata = @rawdata
67
+ i = 0
68
+ n = rawdata.length
69
+ while i < n
70
+ if @nomoretags
71
+ handle_data(rawdata[i..(n-1)])
72
+ i = n
73
+ break
74
+ end
75
+ j = rawdata.index(Interesting, i)
76
+ j = n unless j
77
+ if i < j
78
+ handle_data(rawdata[i..(j-1)])
79
+ end
80
+ i = j
81
+ break if (i == n)
82
+ if rawdata[i] == ?< #
83
+ if rawdata.index(Starttagopen, i) == i
84
+ if @literal
85
+ handle_data(rawdata[i, 1])
86
+ i += 1
87
+ next
88
+ end
89
+ k = parse_starttag(i)
90
+ break unless k
91
+ i = k
92
+ next
93
+ end
94
+ if rawdata.index(Endtagopen, i) == i
95
+ k = parse_endtag(i)
96
+ break unless k
97
+ i = k
98
+ @literal = false
99
+ next
100
+ end
101
+ if rawdata.index(Commentopen, i) == i
102
+ if @literal
103
+ handle_data(rawdata[i,1])
104
+ i += 1
105
+ next
106
+ end
107
+ k = parse_comment(i)
108
+ break unless k
109
+ i += k
110
+ next
111
+ end
112
+ if rawdata.index(Special, i) == i
113
+ if @literal
114
+ handle_data(rawdata[i, 1])
115
+ i += 1
116
+ next
117
+ end
118
+ k = parse_special(i)
119
+ break unless k
120
+ i += k
121
+ next
122
+ end
123
+ elsif rawdata[i] == ?& #
124
+ if rawdata.index(Charref, i) == i
125
+ i += $&.length
126
+ handle_charref($1)
127
+ i -= 1 unless rawdata[i-1] == ?;
128
+ next
129
+ end
130
+ if rawdata.index(Entityref, i) == i
131
+ i += $&.length
132
+ handle_entityref($1)
133
+ i -= 1 unless rawdata[i-1] == ?;
134
+ next
135
+ end
136
+ else
137
+ raise RuntimeError, 'neither < nor & ??'
138
+ end
139
+ # We get here only if incomplete matches but
140
+ # nothing else
141
+ match = rawdata.index(Incomplete, i)
142
+ unless match == i
143
+ handle_data(rawdata[i, 1])
144
+ i += 1
145
+ next
146
+ end
147
+ j = match + $&.length
148
+ break if j == n # Really incomplete
149
+ handle_data(rawdata[i..(j-1)])
150
+ i = j
151
+ end
152
+ # end while
153
+ if _end and i < n
154
+ handle_data(@rawdata[i..(n-1)])
155
+ i = n
156
+ end
157
+ @rawdata = rawdata[i..-1]
158
+ end
159
+
160
+ def parse_comment(i)
161
+ rawdata = @rawdata
162
+ if rawdata[i, 4] != '<!--'
163
+ raise RuntimeError, 'unexpected call to handle_comment'
164
+ end
165
+ match = rawdata.index(Commentclose, i)
166
+ return nil unless match
167
+ matched_length = $&.length
168
+ j = match
169
+ handle_comment(rawdata[i+4..(j-1)])
170
+ j = match + matched_length
171
+ return j-i
172
+ end
173
+
174
+ def parse_starttag(i)
175
+ rawdata = @rawdata
176
+ j = rawdata.index(Endbracket, i + 1)
177
+ return nil unless j
178
+ attrs = []
179
+ if rawdata[i+1] == ?> #
180
+ # SGML shorthand: <> == <last open tag seen>
181
+ k = j
182
+ tag = @lasttag
183
+ else
184
+ match = rawdata.index(Tagfind, i + 1)
185
+ unless match
186
+ raise RuntimeError, 'unexpected call to parse_starttag'
187
+ end
188
+ k = i + 1 + ($&.length)
189
+ tag = $&.downcase
190
+ @lasttag = tag
191
+ end
192
+ while k < j
193
+ break unless rawdata.index(Attrfind, k)
194
+ matched_length = $&.length
195
+ attrname, rest, attrvalue = $1, $2, $3
196
+ if not rest
197
+ attrvalue = '' # was: = attrname
198
+ elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
199
+ (attrvalue[0] == ?" && attrvalue[-1] == ?")
200
+ attrvalue = attrvalue[1..-2]
201
+ end
202
+ attrs << [attrname.downcase, attrvalue]
203
+ k += matched_length
204
+ end
205
+ if rawdata[j] == ?> #
206
+ j += 1
207
+ end
208
+ finish_starttag(tag, attrs)
209
+ return j
210
+ end
211
+
212
+ def parse_endtag(i)
213
+ rawdata = @rawdata
214
+ j = rawdata.index(Endbracket, i + 1)
215
+ return nil unless j
216
+ tag = (rawdata[i+2..j-1].strip).downcase
217
+ if rawdata[j] == ?> #
218
+ j += 1
219
+ end
220
+ finish_endtag(tag)
221
+ return j
222
+ end
223
+
224
+ def finish_starttag(tag, attrs)
225
+ method = 'start_' + tag
226
+ if self.respond_to?(method)
227
+ @stack << tag
228
+ handle_starttag(tag, method, attrs)
229
+ return 1
230
+ else
231
+ method = 'do_' + tag
232
+ if self.respond_to?(method)
233
+ handle_starttag(tag, method, attrs)
234
+ return 0
235
+ else
236
+ unknown_starttag(tag, attrs)
237
+ return -1
238
+ end
239
+ end
240
+ end
241
+
242
+ def finish_endtag(tag)
243
+ if tag == ''
244
+ found = @stack.length - 1
245
+ if found < 0
246
+ unknown_endtag(tag)
247
+ return
248
+ end
249
+ else
250
+ unless @stack.include? tag
251
+ method = 'end_' + tag
252
+ unless self.respond_to?(method)
253
+ unknown_endtag(tag)
254
+ end
255
+ return
256
+ end
257
+ found = @stack.index(tag) #or @stack.length
258
+ end
259
+ while @stack.length > found
260
+ tag = @stack[-1]
261
+ method = 'end_' + tag
262
+ if respond_to?(method)
263
+ handle_endtag(tag, method)
264
+ else
265
+ unknown_endtag(tag)
266
+ end
267
+ @stack.pop
268
+ end
269
+ end
270
+
271
+ def parse_special(i)
272
+ rawdata = @rawdata
273
+ match = rawdata.index(Endbracket, i+1)
274
+ return nil unless match
275
+ matched_length = $&.length
276
+ handle_special(rawdata[i+1..(match-1)])
277
+ return match - i + matched_length
278
+ end
279
+
280
+ def handle_starttag(tag, method, attrs)
281
+ self.send(method, attrs)
282
+ end
283
+
284
+ def handle_endtag(tag, method)
285
+ self.send(method)
286
+ end
287
+
288
+ def report_unbalanced(tag)
289
+ if @verbose
290
+ print '*** Unbalanced </' + tag + '>', "\n"
291
+ print '*** Stack:', self.stack, "\n"
292
+ end
293
+ end
294
+
295
+ def handle_charref(name)
296
+ n = Integer(name)
297
+ if !(0 <= n && n <= 255)
298
+ unknown_charref(name)
299
+ return
300
+ end
301
+ handle_data(n.chr)
302
+ end
303
+
304
+ def handle_entityref(name)
305
+ table = Entitydefs
306
+ if table.include?(name)
307
+ handle_data(table[name])
308
+ else
309
+ unknown_entityref(name)
310
+ return
311
+ end
312
+ end
313
+
314
+ def handle_data(data)
315
+ end
316
+
317
+ def handle_comment(data)
318
+ end
319
+
320
+ def handle_special(data)
321
+ end
322
+
323
+ def unknown_starttag(tag, attrs)
324
+ end
325
+ def unknown_endtag(tag)
326
+ end
327
+ def unknown_charref(ref)
328
+ end
329
+ def unknown_entityref(ref)
330
+ end
331
+
332
+ end