ruby-web 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. data/ChangeLog +474 -0
  2. data/INSTALL.txt +9 -0
  3. data/InstalledFiles +180 -0
  4. data/LICENSE.txt +74 -0
  5. data/Rakefile +529 -0
  6. data/TODO +65 -0
  7. data/doc/additional.xml +149 -0
  8. data/doc/core.xml +652 -0
  9. data/doc/credits/index.xml +52 -0
  10. data/doc/credits/php.contributors.xml +118 -0
  11. data/doc/credits/php.language-snippets.ent +622 -0
  12. data/doc/install/index.xml +136 -0
  13. data/doc/install/mac/index.xml +21 -0
  14. data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
  15. data/doc/install/unix/index.xml +46 -0
  16. data/doc/install/win/apache1.xml +166 -0
  17. data/doc/install/win/apache2.xml +141 -0
  18. data/doc/install/win/iis.xml +162 -0
  19. data/doc/install/win/index.xml +24 -0
  20. data/doc/install/win/installer.xml +31 -0
  21. data/doc/install/win/manual.xml +43 -0
  22. data/doc/manual.xml +69 -0
  23. data/doc/old/apache_cgi.txt +23 -0
  24. data/doc/old/fastcgi.txt +23 -0
  25. data/doc/old/mod_ruby.txt +21 -0
  26. data/doc/old/snippets.rdoc +183 -0
  27. data/doc/old/webrick.txt +23 -0
  28. data/doc/old/windows_cgi.txt +9 -0
  29. data/doc/tutorial.xml +14 -0
  30. data/doc/xsl/manual-multi.xsl +10 -0
  31. data/doc/xsl/manual-pdf.xsl +6 -0
  32. data/doc/xsl/manual-single.xsl +6 -0
  33. data/doc/xsl/manual.css +22 -0
  34. data/install.rb +1022 -0
  35. data/lib/formatter.rb +314 -0
  36. data/lib/html-parser.rb +429 -0
  37. data/lib/htmlrepair.rb +113 -0
  38. data/lib/htmlsplit.rb +842 -0
  39. data/lib/sgml-parser.rb +332 -0
  40. data/lib/web.rb +68 -0
  41. data/lib/web/assertinclude.rb +129 -0
  42. data/lib/web/config.rb +50 -0
  43. data/lib/web/connection.rb +1070 -0
  44. data/lib/web/convenience.rb +154 -0
  45. data/lib/web/formreader.rb +318 -0
  46. data/lib/web/htmlparser/html-parser.rb +429 -0
  47. data/lib/web/htmlparser/sgml-parser.rb +332 -0
  48. data/lib/web/htmltools/element.rb +296 -0
  49. data/lib/web/htmltools/stparser.rb +276 -0
  50. data/lib/web/htmltools/tags.rb +286 -0
  51. data/lib/web/htmltools/tree.rb +139 -0
  52. data/lib/web/htmltools/xmltree.rb +160 -0
  53. data/lib/web/htmltools/xpath.rb +71 -0
  54. data/lib/web/info.rb +63 -0
  55. data/lib/web/load.rb +210 -0
  56. data/lib/web/mime.rb +87 -0
  57. data/lib/web/phprb.rb +340 -0
  58. data/lib/web/resources/test/cookie.rb +33 -0
  59. data/lib/web/resources/test/counter.rb +20 -0
  60. data/lib/web/resources/test/multipart.rb +14 -0
  61. data/lib/web/resources/test/redirect.rb +8 -0
  62. data/lib/web/resources/test/stock.rb +33 -0
  63. data/lib/web/sapi/apache.rb +129 -0
  64. data/lib/web/sapi/fastcgi.rb +22 -0
  65. data/lib/web/sapi/install/apache.rb +180 -0
  66. data/lib/web/sapi/install/iis.rb +93 -0
  67. data/lib/web/sapi/install/macosx.rb +90 -0
  68. data/lib/web/sapi/webrick.rb +86 -0
  69. data/lib/web/session.rb +83 -0
  70. data/lib/web/shim/cgi.rb +129 -0
  71. data/lib/web/shim/rails.rb +175 -0
  72. data/lib/web/stringio.rb +78 -0
  73. data/lib/web/strscanparser.rb +24 -0
  74. data/lib/web/tagparser.rb +96 -0
  75. data/lib/web/testing.rb +666 -0
  76. data/lib/web/traceoutput.rb +75 -0
  77. data/lib/web/unit.rb +56 -0
  78. data/lib/web/upload.rb +59 -0
  79. data/lib/web/validate.rb +52 -0
  80. data/lib/web/wiki.rb +557 -0
  81. data/lib/web/wiki/linker.rb +72 -0
  82. data/lib/web/wiki/page.rb +201 -0
  83. data/lib/webunit.rb +27 -0
  84. data/lib/webunit/assert.rb +152 -0
  85. data/lib/webunit/converter.rb +154 -0
  86. data/lib/webunit/cookie.rb +118 -0
  87. data/lib/webunit/domwalker.rb +185 -0
  88. data/lib/webunit/exception.rb +14 -0
  89. data/lib/webunit/form.rb +116 -0
  90. data/lib/webunit/frame.rb +37 -0
  91. data/lib/webunit/htmlelem.rb +122 -0
  92. data/lib/webunit/image.rb +26 -0
  93. data/lib/webunit/jscript.rb +31 -0
  94. data/lib/webunit/link.rb +33 -0
  95. data/lib/webunit/params.rb +321 -0
  96. data/lib/webunit/parser.rb +229 -0
  97. data/lib/webunit/response.rb +464 -0
  98. data/lib/webunit/runtest.rb +41 -0
  99. data/lib/webunit/table.rb +148 -0
  100. data/lib/webunit/testcase.rb +45 -0
  101. data/lib/webunit/ui/cui/testrunner.rb +50 -0
  102. data/lib/webunit/utils.rb +68 -0
  103. data/lib/webunit/webunit.rb +28 -0
  104. data/test/dev/action.rb +83 -0
  105. data/test/dev/forms.rb +104 -0
  106. data/test/dev/forms2.rb +104 -0
  107. data/test/dev/parser.rb +17 -0
  108. data/test/dev/scripts/dump.rb +24 -0
  109. data/test/dev/scripts/makedist.rb +62 -0
  110. data/test/dev/scripts/uri.rb +41 -0
  111. data/test/dev/scripts/uri/common.rb +432 -0
  112. data/test/dev/scripts/uri/ftp.rb +149 -0
  113. data/test/dev/scripts/uri/generic.rb +1106 -0
  114. data/test/dev/scripts/uri/http.rb +76 -0
  115. data/test/dev/scripts/uri/https.rb +26 -0
  116. data/test/dev/scripts/uri/ldap.rb +238 -0
  117. data/test/dev/scripts/uri/mailto.rb +260 -0
  118. data/test/dev/scripts/urireg.rb +174 -0
  119. data/test/dev/simpledispatcher.rb +156 -0
  120. data/test/dev/test.action.rb +146 -0
  121. data/test/dev/test.formreader.rb +463 -0
  122. data/test/dev/test.simpledispatcher.rb +186 -0
  123. data/test/dev/webunit/conv/digit-0.rb +21 -0
  124. data/test/dev/webunit/conv/digit-1.rb +17 -0
  125. data/test/dev/webunit/conv/digit.rb +23 -0
  126. data/test/dev/webunit/conv/test_digit-0.rb +16 -0
  127. data/test/dev/webunit/conv/test_digit-1.rb +19 -0
  128. data/test/dev/webunit/conv/test_digit.rb +26 -0
  129. data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
  130. data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
  131. data/test/dev/webunit/conv/test_digit_view.rb +134 -0
  132. data/test/installation/htdocs/cgi_test.rb +296 -0
  133. data/test/installation/htdocs/test_install.rb +4 -0
  134. data/test/installation/runwebtest.rb +5 -0
  135. data/test/installation/test_cookie.rb +128 -0
  136. data/test/installation/test_form.rb +47 -0
  137. data/test/installation/test_multipart.rb +51 -0
  138. data/test/installation/test_request.rb +24 -0
  139. data/test/installation/test_response.rb +35 -0
  140. data/test/unit/htdocs/cookie.rb +32 -0
  141. data/test/unit/htdocs/multipart.rb +28 -0
  142. data/test/unit/htdocs/redirect.rb +12 -0
  143. data/test/unit/htdocs/simple.rb +13 -0
  144. data/test/unit/htdocs/stock.rb +33 -0
  145. data/test/unit/test_assert.rb +162 -0
  146. data/test/unit/test_cookie.rb +114 -0
  147. data/test/unit/test_domwalker.rb +77 -0
  148. data/test/unit/test_form.rb +42 -0
  149. data/test/unit/test_frame.rb +40 -0
  150. data/test/unit/test_htmlelem.rb +74 -0
  151. data/test/unit/test_image.rb +45 -0
  152. data/test/unit/test_jscript.rb +57 -0
  153. data/test/unit/test_link.rb +85 -0
  154. data/test/unit/test_multipart.rb +51 -0
  155. data/test/unit/test_params.rb +210 -0
  156. data/test/unit/test_parser.rb +53 -0
  157. data/test/unit/test_response.rb +150 -0
  158. data/test/unit/test_table.rb +70 -0
  159. data/test/unit/test_utils.rb +106 -0
  160. data/test/unit/test_webunit.rb +28 -0
  161. data/test/web/mod_ruby_stub.rb +39 -0
  162. data/test/web/test.assertinclude.rb +109 -0
  163. data/test/web/test.buffer.rb +182 -0
  164. data/test/web/test.code.loader.rb +78 -0
  165. data/test/web/test.config.rb +31 -0
  166. data/test/web/test.error.handling.rb +91 -0
  167. data/test/web/test.formreader-2.0.rb +352 -0
  168. data/test/web/test.load.rb +125 -0
  169. data/test/web/test.mime-type.rb +23 -0
  170. data/test/web/test.narf.cgi.rb +106 -0
  171. data/test/web/test.phprb.rb +239 -0
  172. data/test/web/test.request.rb +368 -0
  173. data/test/web/test.response.rb +637 -0
  174. data/test/web/test.ruby-web.rb +10 -0
  175. data/test/web/test.session.rb +50 -0
  176. data/test/web/test.shim.cgi.rb +96 -0
  177. data/test/web/test.tagparser.rb +65 -0
  178. data/test/web/test.template2.rb +297 -0
  179. data/test/web/test.testing2.rb +318 -0
  180. data/test/web/test.upload.rb +45 -0
  181. data/test/web/test.validate.rb +46 -0
  182. data/test/web/test.web.test.rb +495 -0
  183. data/test/wiki/test.history.rb +297 -0
  184. data/test/wiki/test.illustration_page.rb +287 -0
  185. data/test/wiki/test.linker.rb +197 -0
  186. data/test/wiki/test.tarpit.rb +56 -0
  187. data/test/wiki/test.wiki.rb +300 -0
  188. data/test/wikitestroot/admin.rb +7 -0
  189. data/test/wikitestroot/wiki.rb +6 -0
  190. metadata +234 -0
@@ -0,0 +1,113 @@
1
+ =begin
2
+
3
+ = HTML Repair Library
4
+
5
+ htmlrepair.rb
6
+
7
+ Version 1.0.1
8
+
9
+ Copyright (C) 2000 MoonWolf Development
10
+
11
+ MoonWolf <moonwolf-ruby@moonwolf.com>
12
+
13
+ ��ά���줿��λ�������䤦��
14
+
15
+ == �Ȥ���
16
+
17
+ obj = HTMLSplit.new(html)
18
+ obj.repair
19
+
20
+ =end
21
+
22
+ require "htmlsplit"
23
+
24
+ class HTMLSplit
25
+
26
+ PARENTTAG = {
27
+ 'p' => %w(body table),
28
+ 'a' => %w(body),
29
+ 'thead' => %w(table),
30
+ 'tfoot' => %w(table),
31
+ 'tbody' => %w(table),
32
+ 'tr' => %w(table thead tfoor tbody),
33
+ 'td' => %w(tr),
34
+ 'th' => %w(tr),
35
+ 'li' => %w(ol ul),
36
+ 'dt' => %w(dl),
37
+ 'dd' => %w(dl),
38
+ 'col' => %w(colgroup),
39
+ 'param' => %w(applet),
40
+ 'area' => %w(map),
41
+ 'input' => %w(form),
42
+ 'textarea' => %w(form),
43
+ 'button' => %w(form),
44
+ 'select' => %w(form),
45
+ 'keygen' => %w(form),
46
+ 'label' => %w(form),
47
+ 'fieldset' => %w(form),
48
+ 'legend' => %w(fieldset),
49
+ 'option' => %w(select),
50
+ }
51
+
52
+ def repair
53
+ tag = []
54
+ doc = []
55
+ @document.each {|e|
56
+ case e
57
+ when EmptyElementTag
58
+ doc.push e
59
+ when StartTag
60
+ if PARENTTAG[e.name] && (a = tag.rindex(e.name))
61
+ #�ͥ��Ȥ���λ�����ξ�ά�������å�
62
+ flag = true
63
+ tag[a..-1].each {|t|
64
+ if PARENTTAG[e.name].include?(t)
65
+ #����ʥͥ���
66
+ flag = false
67
+ break
68
+ end
69
+ }
70
+ if flag
71
+ #��ά���줿��λ���������
72
+ while t=tag.pop
73
+ c = EndTag.new(t)
74
+ doc.push c
75
+ if t==e.name
76
+ break
77
+ end
78
+ end
79
+ end
80
+ else
81
+ end
82
+ #
83
+ tag.push e.name
84
+ doc.push e
85
+ when EndTag
86
+ if tag.include?(e.name)
87
+ while t = tag.pop
88
+ if t==e.name
89
+ break
90
+ else
91
+ c = EndTag.new(t)
92
+ doc.push c
93
+ end
94
+ end
95
+ else
96
+ end
97
+ doc.push e
98
+ when CharacterData
99
+ doc.push e
100
+ when Declaration
101
+ doc.push e
102
+ when Comment
103
+ doc.push e
104
+ else
105
+ doc.push e
106
+ end
107
+ }
108
+ while t = tag.pop
109
+ doc.push EndTag.new(t)
110
+ end
111
+ @document = doc
112
+ end
113
+ end
@@ -0,0 +1,842 @@
1
+ =begin Start of Document
2
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
3
+ <html>
4
+ <head>
5
+ <title>htmlsplit.rb</title>
6
+ <link href="rubydoc.css" rel="stylesheet">
7
+ </head>
8
+ <body>
9
+ | <a href="./">���</a> |
10
+ <h1>HTML Split Library</h1>
11
+ <p> HTML���ɤ߽񤭤��롣 �ɤ߹����ʸ��ϥ�����ʸ���������ˤʤ롣 to_s�᥽�åɤ�HTML���᤹���Ȥ�����롣</p>
12
+ <h2>���饹����</h2>
13
+ <table bgcolor="#FFFFFF" border="1">
14
+ <tr><td><a href="#HTMLSplit">HTMLSplit</a><td>HTML�򥿥���ʸ���ǡ�����ʬ�䤹�롣
15
+ <tr>
16
+ <td><a href="#CharacterData">CharacterData</a>
17
+ <td>ʸ���ǡ���
18
+ <tr><td><a href="#EmptyElementTag">EmptyElementTag</a>
19
+ <td>�����ǤΥ���
20
+ <tr>
21
+ <td><a href="#StartTag">StartTag</a>
22
+ <td>���ϥ���
23
+ <tr>
24
+ <td><a href="#EndTag">EndTag</a>
25
+ <td>�����
26
+ <tr>
27
+ <td><a href="#Comment">Comment</a>
28
+ <td>������
29
+ <tr>
30
+ <td><a href="#Declaration">Declaration</a>
31
+ <td>���(DOCTYPE)
32
+ <tr>
33
+ <td><a href="#SSI">SSI</a>
34
+ <td>SSI��
35
+ <tr>
36
+ <td><a href="#ERuby">ERuby</a>
37
+ <td>eRuby/ASP/JSP������ץȢ�
38
+ <tr>
39
+ <td><a href="#PHP">PHP</a>
40
+ <td>PHP������ץȢ�
41
+ </table>
42
+ ��������°���ͤʤɤ������ޤ줿������ץȤ�ǧ���Ǥ��ޤ���
43
+ <h2>�Ȥ��� </h2>
44
+ <h3>�ɤ߹���</h3>
45
+ <pre class="Exception"><samp>#!/usr/bin/ruby
46
+ require "htmlsplit"
47
+
48
+ obj = HTMlSplit.new(ARGF.read)</samp></pre>
49
+ <h3>����</h3>
50
+ <pre>
51
+ obj.document.each {|e|
52
+ print e.to_s
53
+ }
54
+ </pre>
55
+ <h3>°��������</h3>
56
+ <pre>
57
+ img = Tag('img/')
58
+ img['src']='xxx.png' #&lt;img src="xxx.png"&gt;
59
+
60
+ o = Tag('option')
61
+ o['selected']=true #&lt;option selected&gt;
62
+ </pre>
63
+ =end
64
+
65
+ require "cgi"
66
+ require "kconv"
67
+
68
+ =begin EmptyElementTag
69
+
70
+ <h2><a name="EmptyElementTag">EmptyElementTag</a></h2>
71
+ �����ǤΥ���
72
+ <h3>���饹�᥽�å�</h3>
73
+ <dl compact>
74
+ <dt>new(<var class="String">name</var>[,<var class="Hash">attr</var>])
75
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾�� <var class="Hash">attr</var>�ϥ�����°��nil�ޤ���Hash
76
+ </dl>
77
+ <h3>�᥽�å�</h3>
78
+ <dl compact>
79
+ <dt class="String">name
80
+ <dd>����̾���֤���
81
+ <dt class="Hash">attr
82
+ <dd>°�����֤���
83
+ <dt class="String">to_s
84
+ <dd>HTML���֤���
85
+ <dt class="String">self[<var class="String">key</var>]
86
+ <dd>key�˴�Ϣ�Ť���줿°���ͤ��֤��ޤ���
87
+ �������륭������Ͽ����Ƥ��ʤ����ˤϡ�nil���֤��ޤ���
88
+ <dt class="String">self[<var class="String">key</var>]= <var class="String">value</var>
89
+ <dd><var class="String">key</var>���Ф���<var class="String">value</var>���Ϣ�Ť��ޤ���
90
+ <var class="String">value</var>��nil�λ���<var class="String">key</var>���Ф����Ϣ��������ޤ���
91
+ </dl>
92
+ =end
93
+ class EmptyElementTag
94
+ def initialize(name,attr=nil)
95
+ @name = name.downcase
96
+ @attr = attr
97
+ end
98
+ attr_accessor :name
99
+ attr_accessor :attr
100
+ def to_s
101
+ if @attr
102
+ "<"+@name+@attr.keys.sort.collect{|n|
103
+ v = @attr[n]
104
+ if v==true
105
+ ' ' + n
106
+ else
107
+ ' ' + n + '="' + CGI::escapeHTML(v) + '"'
108
+ end
109
+ }.to_s+">"
110
+ else
111
+ "<#{@name}>"
112
+ end
113
+ end
114
+ def [](key)
115
+ attr and attr[key]
116
+ end
117
+ def []=(key,value)
118
+ if attr
119
+ attr[key]=value
120
+ else
121
+ attr = value and {key=>value}
122
+ end
123
+ end
124
+ end
125
+ =begin StartTag
126
+ <h2>StartTag</h2>
127
+ ���ϥ���
128
+ <h3>���饹�᥽�å�</h3>
129
+ <dl compact>
130
+ <dt>new(<var class="String">name</var>[,<var class="Hash">attr</var>])
131
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾�� <var class="Hash">attr</var>�ϥ�����°��nil�ޤ���Hash
132
+ </dl>
133
+ <h3>�᥽�å�</h3>
134
+ <dl compact>
135
+ <dt class="String">name
136
+ <dd>����̾���֤���
137
+ <dt class="Hash">attr
138
+ <dd>°�����֤���
139
+ <dt class="String">to_s
140
+ <dd>HTML���֤���
141
+ <dt class="String">self[<var class="String">key</var>]
142
+ <dd>key�˴�Ϣ�Ť���줿°���ͤ��֤��ޤ���
143
+ �������륭������Ͽ����Ƥ��ʤ����ˤϡ�nil���֤��ޤ���
144
+ <dt class="String">self[<var class="String">key</var>]= <var class="String">value</var>
145
+ <dd><var class="String">key</var>���Ф���<var class="String">value</var>���Ϣ�Ť��ޤ���
146
+ <var class="String">value</var>��nil�λ���<var class="String">key</var>���Ф����Ϣ��������ޤ���
147
+ </dl>
148
+ =end
149
+ class StartTag
150
+ attr_accessor :name
151
+ attr_accessor :attr
152
+ def initialize(name,attr=nil)
153
+ @name = name.downcase
154
+ @attr = attr
155
+ end
156
+ def to_s
157
+ if @attr
158
+ "<"+@name+@attr.keys.sort.collect{|n|
159
+ v = @attr[n]
160
+ if v==true
161
+ ' ' + n
162
+ else
163
+ ' ' + n + '="' + CGI::escapeHTML(v) + '"'
164
+ end
165
+ }.to_s+">"
166
+ else
167
+ "<#{@name}>"
168
+ end
169
+ end
170
+ def [](key)
171
+ attr and attr[key]
172
+ end
173
+ def []=(key,value)
174
+ if attr
175
+ attr[key]=value
176
+ else
177
+ attr = value and {key=>value}
178
+ end
179
+ end
180
+ end
181
+ =begin EndTag
182
+ <h2>EndTag</h2>
183
+ �����
184
+ <h3>���饹�᥽�å�</h3>
185
+ <dl compact>
186
+ <dt>new(<var class="String">name</var>)
187
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾��
188
+ </dl>
189
+ <h3>�᥽�å�</h3>
190
+ <dl compact>
191
+ <dt class="String">name
192
+ <dd>����̾���֤���
193
+ <dt class="String">to_s
194
+ <dd>HTML���֤���
195
+ </dl>
196
+ =end
197
+ class EndTag
198
+ def initialize(name)
199
+ @name = name.downcase
200
+ end
201
+ attr_accessor :name
202
+ def to_s
203
+ "</#{@name}>"
204
+ end
205
+ end
206
+ =begin CharacterData
207
+ <h2>CharacterData</h2>
208
+ ʸ���ǡ���
209
+ <h3>���饹�᥽�å�</h3>
210
+ <dl compact>
211
+ <dt>new(<var class="String">text</var>)
212
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
213
+ </dl>
214
+ <h3>�᥽�å�</h3>
215
+ <dl compact>
216
+ <dt class="String">text
217
+ <dd>�ƥ����Ȥ��֤���
218
+ <dt class="String">to_s
219
+ <dd>HTML���֤���
220
+ </dl>
221
+ =end
222
+ class CharacterData
223
+ def initialize(text)
224
+ @text = text
225
+ end
226
+ attr_accessor :text
227
+ def to_s
228
+ @text
229
+ end
230
+ end
231
+ =begin Declaraion
232
+ <h2>Declaraion</h2>
233
+ SGML���
234
+ <h3>���饹�᥽�å�</h3>
235
+ <dl compact>
236
+ <dt>new(<var class="String">text</var>)
237
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
238
+ </dl>
239
+ <h3>�᥽�å�</h3>
240
+ <dl compact>
241
+ <dt class="String">text
242
+ <dd>�ƥ����Ȥ��֤���
243
+ <dt class="String">to_s
244
+ <dd>HTML���֤���
245
+ </dl>
246
+ =end
247
+ class Declaration
248
+ def initialize(text)
249
+ @text = text
250
+ end
251
+ attr_accessor :text
252
+ def to_s
253
+ "<!#{@text}>"
254
+ end
255
+ end
256
+ =begin Comment
257
+ <h2>Comment</h2>
258
+ ������
259
+ <h3>���饹�᥽�å�</h3>
260
+ <dl compact>
261
+ <dt>new(<var class="String">text</var>)
262
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
263
+ </dl>
264
+ <h3>�᥽�å�</h3>
265
+ <dl compact>
266
+ <dt class="String">text
267
+ <dd>�ƥ����Ȥ��֤���
268
+ <dt class="String">to_s
269
+ <dd>HTML���֤���
270
+ </dl>
271
+ =end
272
+ class Comment
273
+ def initialize(text)
274
+ @text = text
275
+ end
276
+ attr_accessor :text
277
+ def to_s
278
+ "<!--#{@text}-->"
279
+ end
280
+ end
281
+ =begin SSI
282
+ <h2>SSI</h2>
283
+ SSI
284
+ <h3>���饹�᥽�å�</h3>
285
+ <dl compact>
286
+ <dt>new(<var class="String">text</var>)
287
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
288
+ </dl>
289
+ <h3>�᥽�å�</h3>
290
+ <dl compact>
291
+ <dt class="String">text
292
+ <dd>�ƥ����Ȥ��֤���
293
+ <dt class="String">to_s
294
+ <dd>HTML���֤���
295
+ </dl>
296
+ =end
297
+ class SSI
298
+ def initialize(text)
299
+ @text = text
300
+ end
301
+ attr_accessor :text
302
+ def to_s
303
+ "<!--#{@text}-->"
304
+ end
305
+ end
306
+ =begin ERuby
307
+ <h2>ERuby</h2>
308
+ eRuby/ASP/JSP������ץ�
309
+ <h3>���饹�᥽�å�</h3>
310
+ <dl compact>
311
+ <dt>new(<var class="String">text</var>)
312
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
313
+ </dl>
314
+ <h3>�᥽�å�</h3>
315
+ <dl compact>
316
+ <dt class="String">text
317
+ <dd>�ƥ����Ȥ��֤���
318
+ <dt class="String">to_s
319
+ <dd>HTML���֤���
320
+ </dl>
321
+ =end
322
+ class ERuby
323
+ def initialize(text)
324
+ @text = text
325
+ end
326
+ attr_accessor :text
327
+ def to_s
328
+ "<%#{@text}%>"
329
+ end
330
+ end
331
+ =begin PHP
332
+ <h2>PHP</h2>
333
+ PHP������ץ�
334
+ <h3>���饹�᥽�å�</h3>
335
+ <dl compact>
336
+ <dt>new(<var class="String">text</var>)
337
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
338
+ </dl>
339
+ <h3>�᥽�å�</h3>
340
+ <dl compact>
341
+ <dt class="String">text
342
+ <dd>�ƥ����Ȥ��֤���
343
+ <dt class="String">to_s
344
+ <dd>HTML���֤���
345
+ </dl>
346
+ =end
347
+ class PHP
348
+ attr_accessor :text
349
+ def initialize(text)
350
+ @text = text
351
+ end
352
+ def to_s
353
+ "<?#{@text}?>"
354
+ end
355
+ end
356
+ =begin HTMLSplit
357
+
358
+ <h2><a name="HTMLSplit">HTMLSplit</a></h2>
359
+ HTML�ɤ߽�
360
+ <h3>���饹�᥽�å�</h3>
361
+ <dl compact>
362
+ <dt>new(<var class="String">html</var>)
363
+ <dd>���������֥������Ȥ��������롣 <var class="String">html</var>��HTMLʸ��
364
+ </dl>
365
+ <h3>�᥽�å�</h3>
366
+ <dl compact>
367
+ <dt class="Array">document
368
+ <dd>�ɥ�����Ȥ�������֤���
369
+ <dt class="String">to_s
370
+ <dd>HTML���֤���
371
+ <dt class="Iterator">each {|<var>obj</var>,<var class="Array">tag</var>| ...}
372
+ <dd>�ɥ�����Ȥγƥ��֥�������(<var>obj</var>)���Ф��ƥ֥��å���ɾ�����ޤ���
373
+ <var class="Array">tag</var>�ϳ��ϥ����Υꥹ�ȡ� [ StartTag , <var class="Integer">����ǥ���</var>] ��
374
+ <dt class="Integer">index(<var class="Class">class</var>, <var class="Integer">start</var>, <var class="Integer">end</var>, <var>value</var>, <var class="Integer">count</var>) {|obj| ...}
375
+ <dd><var class="Integer">start</var>����<var class="Integer">end</var>�ޤǤ����Ǥ�<var class="Class">class</var>��������<var class="Integer">count</var>���ܤ����Ǥΰ��֤��֤��ޤ���
376
+ ���������Ǥ��ҤȤĤ�ʤ��ä����ˤ�nil���֤��ޤ���<br>
377
+ <var>value</var>��nil�ʳ����ͤ���ꤷ�����ˤ����Ǥ�<var>value</var>���������������å���Ԥ��ޤ���<var class="Class">class</var>��EmptyElementTag,StartTag,EndTag�λ��ϥ���̾������ʳ��ϥƥ����Ȥˤ�ä���Ӥ��ޤ���<br>
378
+ �֥��å�����ꤷ�ƸƤӽФ��줿���ˤϥ֥��å������Ǥ���������ɾ�����롣
379
+ <dt class="Integer">end_index(<var class="Integer">start</var>)
380
+ <dd><var class="Integer">start</var>���б�����EndTag�Υ���ǥ������֤��ޤ���
381
+ �б��������Ǥ��ʤ��ä����ˤ�nil���֤��ޤ���<br>
382
+ </dl>
383
+ =end
384
+ class HTMLSplit
385
+ EMPTY = %w(area base basefont bgsound br col frame hr img input isindex
386
+ keygen link meta nextid param spacer wbr)
387
+ def initialize(html)
388
+ @document = [] #�ѡ�������HTML�Υꥹ��
389
+ name = ''
390
+ text = ''
391
+ attr = {}
392
+ attrname = ''
393
+ state = :TEXT
394
+ #
395
+ html.each_byte {|c|
396
+ char = c.chr
397
+ case state
398
+ when :TEXT
399
+ if c==60
400
+ if text.length>0
401
+ @document << CharacterData.new(text)
402
+ end
403
+ name = ''
404
+ attr={}
405
+ state = :TAGNAME
406
+ else
407
+ text << char
408
+ end
409
+ when :TAGNAME
410
+ case char
411
+ when '>'
412
+ name.downcase!
413
+ if EMPTY.include?(name)
414
+ @document << EmptyElementTag.new(name,nil)
415
+ elsif name[-1,1]=='/'
416
+ @document << StartTag.new(name[0..-2],nil)
417
+ @document << EndTag.new(name[0..-2])
418
+ else
419
+ if name[0,1]=='/'
420
+ @document << EndTag.new(name[1..-1])
421
+ else
422
+ @document << StartTag.new(name,nil)
423
+ end
424
+ end
425
+ text = ''
426
+ state = :TEXT
427
+ when '!'
428
+ text = ''
429
+ state = :DECLARE
430
+ when '%'
431
+ text = ''
432
+ state = :ERUBY
433
+ when '?'
434
+ text = ''
435
+ state = :PHP
436
+ when /\s/
437
+ text=''
438
+ state = :SPACE
439
+ else
440
+ name << char
441
+ end
442
+ when :SPACE #°���֤ζ���
443
+ case char
444
+ when '>'
445
+ name.downcase!
446
+ if EMPTY.include?(name)
447
+ @document << EmptyElementTag.new(name,attr)
448
+ else
449
+ if name[0,1]=='/'
450
+ @document << EndTag.new(name[1..-1])
451
+ else
452
+ @document << StartTag.new(name,attr)
453
+ end
454
+ end
455
+ text = ''
456
+ state = :TEXT
457
+ when /\s/
458
+ else
459
+ attrname=char
460
+ state = :ATTRNAME
461
+ end
462
+ when :ATTRNAME #°��̾
463
+ case char
464
+ when /\s/
465
+ state = :BEFOREEQUAL
466
+ when '='
467
+ state = :AFTEREQUAL
468
+ when '>'
469
+ attr[attrname.downcase]=true
470
+ name.downcase!
471
+ if EMPTY.include?(name)
472
+ @document << EmptyElementTag.new(name,attr)
473
+ elsif attrname=='/'
474
+ attr.delete('/')
475
+ @document << StartTag.new(name,attr)
476
+ @document << EndTag.new(name)
477
+ else
478
+ if name[0,1]=='/'
479
+ @document << EndTag.new(name[1..-1])
480
+ else
481
+ @document << StartTag.new(name,attr)
482
+ end
483
+ end
484
+ text = ''
485
+ state = :TEXT
486
+ else
487
+ attrname << char
488
+ end
489
+ when :BEFOREEQUAL #=
490
+ case char
491
+ when '='
492
+ state = :AFTEREQUAL
493
+ when '>'
494
+ attr[attrname.downcase]=true
495
+ name.downcase!
496
+ if EMPTY.include?(name)
497
+ @document << EmptyElementTag.new(name,attr)
498
+ else
499
+ if name[0,1]=='/'
500
+ @document << EndTag.new(name[1..-1])
501
+ else
502
+ @document << StartTag.new(name,attr)
503
+ end
504
+ end
505
+ text = ''
506
+ state = :TEXT
507
+ when /\s/
508
+ else
509
+ attr[attrname.downcase]=true
510
+ attrname = char
511
+ state = :ATTRNAME
512
+ end
513
+ when :AFTEREQUAL #=
514
+ case char
515
+ when "'"
516
+ text=''
517
+ state = :SQVALUE
518
+ when '"'
519
+ text=''
520
+ state = :DQVALUE
521
+ when '>'
522
+ attr[attrname.downcase]=true
523
+ name.downcase!
524
+ if EMPTY.include?(name)
525
+ @document << EmptyElementTag.new(name,attr)
526
+ else
527
+ if name[0,1]=='/'
528
+ @document << EndTag.new(name[1..-1])
529
+ else
530
+ @document << StartTag.new(name,attr)
531
+ end
532
+ end
533
+ text = ''
534
+ state = :TEXT
535
+ when /\s/
536
+ else
537
+ text=char
538
+ state = :VALUE
539
+ end
540
+ when :VALUE #��
541
+ case char
542
+ when /\s/
543
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
544
+ state = :SPACE
545
+ when '>'
546
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
547
+ name.downcase!
548
+ if EMPTY.include?(name)
549
+ @document << EmptyElementTag.new(name,attr)
550
+ else
551
+ if name[0,1]=='/'
552
+ @document << EndTag.new(name[1..-1])
553
+ else
554
+ @document << StartTag.new(name,attr)
555
+ end
556
+ end
557
+ text = ''
558
+ state = :TEXT
559
+ else
560
+ text << char
561
+ end
562
+ when :SQVALUE #'��'
563
+ if c==39
564
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
565
+ state = :SPACE
566
+ else
567
+ text << char
568
+ end
569
+ when :DQVALUE #"��"
570
+ if c==34
571
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
572
+ state = :SPACE
573
+ else
574
+ text << char
575
+ end
576
+ when :COMMENT
577
+ case char
578
+ when '>'
579
+ if text[-2,2]=='--' #�����Ƚ�λ
580
+ text = text[0..-3]
581
+ if text=~/^#[a-z]+/ #SSI
582
+ @document << SSI.new(text)
583
+ else
584
+ @document << Comment.new(text)
585
+ end
586
+ text = ''
587
+ state = :TEXT
588
+ else
589
+ text << char
590
+ end
591
+ else
592
+ text << char
593
+ end
594
+ when :ERUBY
595
+ case char
596
+ when '>'
597
+ if text[-1,1]=='%' #eRuby��λ
598
+ text = text[0..-2]
599
+ @document << ERuby.new(text)
600
+ text = ''
601
+ state = :TEXT
602
+ else
603
+ text << char
604
+ end
605
+ else
606
+ text << char
607
+ end
608
+ when :PHP
609
+ case char
610
+ when '>'
611
+ if text[-1,1]=='?' #eRuby��λ
612
+ text = text[0..-2]
613
+ @document << PHP.new(text)
614
+ text = ''
615
+ state = :TEXT
616
+ else
617
+ text << char
618
+ end
619
+ else
620
+ text << char
621
+ end
622
+ when :DECLARE
623
+ case char
624
+ when '>'
625
+ @document << Declaration.new(text)
626
+ text = ''
627
+ state = :TEXT
628
+ else
629
+ text << char
630
+ if text=='--'
631
+ text = ''
632
+ state = :COMMENT
633
+ end
634
+ end
635
+ end
636
+ }
637
+ #EOF���
638
+ case state
639
+ when :TEXT
640
+ @document << CharacterData.new(text) if text.length>0
641
+ when :TAGNAME
642
+ @document << CharacterData.new('<'+text)
643
+ when :SPACE #°���֤ζ���
644
+ name.downcase!
645
+ if EMPTY.include?(name)
646
+ @document << EmptyElementTag.new(name,attr)
647
+ else
648
+ if name[0,1]=='/'
649
+ @document << EndTag.new(name[1..-1])
650
+ else
651
+ @document << StartTag.new(name,attr)
652
+ end
653
+ end
654
+ when :ATTRNAME #°��̾
655
+ attr[attrname.downcase]=true
656
+ name.downcase!
657
+ if EMPTY.include?(name)
658
+ @document << EmptyElementTag.new(name,attr)
659
+ else
660
+ if name[0,1]=='/'
661
+ @document << EndTag.new(name[1..-1])
662
+ else
663
+ @document << StartTag.new(name,attr)
664
+ end
665
+ end
666
+ when :BEFOREEQUAL #=
667
+ attr[attrname.downcase]=true
668
+ name.downcase!
669
+ if EMPTY.include?(name)
670
+ @document << EmptyElementTag.new(name,attr)
671
+ else
672
+ if name[0,1]=='/'
673
+ @document << EndTag.new(name[1..-1])
674
+ else
675
+ @document << StartTag.new(name,attr)
676
+ end
677
+ end
678
+ when :AFTEREQUAL #=
679
+ attr[attrname.downcase]=true
680
+ name.downcase!
681
+ if EMPTY.include?(name)
682
+ @document << EmptyElementTag.new(name,attr)
683
+ else
684
+ if name[0,1]=='/'
685
+ @document << EndTag.new(name[1..-1])
686
+ else
687
+ @document << StartTag.new(name,attr)
688
+ end
689
+ end
690
+ when :VALUE #��
691
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
692
+ name.downcase!
693
+ if EMPTY.include?(name)
694
+ @document << EmptyElementTag.new(name,attr)
695
+ else
696
+ if name[0,1]=='/'
697
+ @document << EndTag.new(name[1..-1])
698
+ else
699
+ @document << StartTag.new(name,attr)
700
+ end
701
+ end
702
+ when :SQVALUE #'��'
703
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
704
+ when :DQVALUE #"��"
705
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
706
+ when :COMMENT
707
+ if text=~/^#[a-zA-Z]+/ #SSI
708
+ @document << SSI.new(text)
709
+ else
710
+ @document << Comment.new(text)
711
+ end
712
+ when :ERUBY
713
+ @document << ERuby.new(text)
714
+ when :PHP
715
+ @document << PHP.new(text)
716
+ when :DECLARE
717
+ @document << Declaration.new(text)
718
+ end
719
+ end
720
+ #
721
+ attr_accessor :document
722
+ #
723
+ def to_s
724
+ s = ''
725
+ @document.each {|e|
726
+ s<<(e.to_s)
727
+ }
728
+ s
729
+ end
730
+ #
731
+ def each
732
+ tag = []
733
+ i = 0
734
+ @document.each {|e|
735
+ case e
736
+ when StartTag
737
+ tag.push [e,i]
738
+ when EndTag
739
+ idx = nil
740
+ (tag.size-1).downto(0) {|j|
741
+ if tag[j][0].name==e.name
742
+ idx = j
743
+ break
744
+ end
745
+ }
746
+ #
747
+ if idx
748
+ if idx==0
749
+ tag = []
750
+ else
751
+ tag = tag[0..idx-1]
752
+ end
753
+ end
754
+ else
755
+ end
756
+ yield e,tag
757
+ i += 1
758
+ }
759
+ end
760
+ #
761
+ def index(_class,_start=0,_end=-1,value=nil,count=1)
762
+ idx=_start
763
+ found=false
764
+ @document[_start.._end].each {|obj|
765
+ if obj.type==_class
766
+ if value
767
+ case obj
768
+ when StartTag,EmptyElementTag,EndTag
769
+ if value===obj.name
770
+ if (not iterator?) or yield(obj)
771
+ if (count-=1)<=0
772
+ found = true
773
+ break
774
+ end
775
+ end
776
+ end
777
+ else
778
+ if value===obj.text
779
+ if (not iterator?) or yield(obj)
780
+ if (count-=1)<=0
781
+ found = true
782
+ break
783
+ end
784
+ end
785
+ end
786
+ end
787
+ else
788
+ if (not iterator?) or yield(obj)
789
+ if (count-=1)<=0
790
+ found = true
791
+ break
792
+ end
793
+ end
794
+ end
795
+ end
796
+ idx+=1
797
+ }
798
+ if found
799
+ idx
800
+ else
801
+ nil
802
+ end
803
+ end
804
+ #
805
+ def end_index(start_index)
806
+ tag = []
807
+ end_index = nil
808
+ (start_index...@document.size).each {|idx|
809
+ e= @document[idx]
810
+ case e
811
+ when StartTag
812
+ tag.push [e,idx]
813
+ when EndTag
814
+ i = nil
815
+ (tag.size-1).downto(0) {|j|
816
+ if tag[j][0].name==e.name
817
+ i = j
818
+ break
819
+ end
820
+ }
821
+ #
822
+ if i
823
+ if i==0
824
+ tag = []
825
+ else
826
+ tag = tag[0..i-1]
827
+ end
828
+ end
829
+ if tag.size==0
830
+ end_index = idx
831
+ break
832
+ end
833
+ else
834
+ end
835
+ }
836
+ end_index
837
+ end
838
+ end
839
+ =begin End of Document
840
+ </body>
841
+ </html>
842
+ =end