ruby-web 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (190) hide show
  1. data/ChangeLog +474 -0
  2. data/INSTALL.txt +9 -0
  3. data/InstalledFiles +180 -0
  4. data/LICENSE.txt +74 -0
  5. data/Rakefile +529 -0
  6. data/TODO +65 -0
  7. data/doc/additional.xml +149 -0
  8. data/doc/core.xml +652 -0
  9. data/doc/credits/index.xml +52 -0
  10. data/doc/credits/php.contributors.xml +118 -0
  11. data/doc/credits/php.language-snippets.ent +622 -0
  12. data/doc/install/index.xml +136 -0
  13. data/doc/install/mac/index.xml +21 -0
  14. data/doc/install/ruby-web.install.rb.instructions.xml +7 -0
  15. data/doc/install/unix/index.xml +46 -0
  16. data/doc/install/win/apache1.xml +166 -0
  17. data/doc/install/win/apache2.xml +141 -0
  18. data/doc/install/win/iis.xml +162 -0
  19. data/doc/install/win/index.xml +24 -0
  20. data/doc/install/win/installer.xml +31 -0
  21. data/doc/install/win/manual.xml +43 -0
  22. data/doc/manual.xml +69 -0
  23. data/doc/old/apache_cgi.txt +23 -0
  24. data/doc/old/fastcgi.txt +23 -0
  25. data/doc/old/mod_ruby.txt +21 -0
  26. data/doc/old/snippets.rdoc +183 -0
  27. data/doc/old/webrick.txt +23 -0
  28. data/doc/old/windows_cgi.txt +9 -0
  29. data/doc/tutorial.xml +14 -0
  30. data/doc/xsl/manual-multi.xsl +10 -0
  31. data/doc/xsl/manual-pdf.xsl +6 -0
  32. data/doc/xsl/manual-single.xsl +6 -0
  33. data/doc/xsl/manual.css +22 -0
  34. data/install.rb +1022 -0
  35. data/lib/formatter.rb +314 -0
  36. data/lib/html-parser.rb +429 -0
  37. data/lib/htmlrepair.rb +113 -0
  38. data/lib/htmlsplit.rb +842 -0
  39. data/lib/sgml-parser.rb +332 -0
  40. data/lib/web.rb +68 -0
  41. data/lib/web/assertinclude.rb +129 -0
  42. data/lib/web/config.rb +50 -0
  43. data/lib/web/connection.rb +1070 -0
  44. data/lib/web/convenience.rb +154 -0
  45. data/lib/web/formreader.rb +318 -0
  46. data/lib/web/htmlparser/html-parser.rb +429 -0
  47. data/lib/web/htmlparser/sgml-parser.rb +332 -0
  48. data/lib/web/htmltools/element.rb +296 -0
  49. data/lib/web/htmltools/stparser.rb +276 -0
  50. data/lib/web/htmltools/tags.rb +286 -0
  51. data/lib/web/htmltools/tree.rb +139 -0
  52. data/lib/web/htmltools/xmltree.rb +160 -0
  53. data/lib/web/htmltools/xpath.rb +71 -0
  54. data/lib/web/info.rb +63 -0
  55. data/lib/web/load.rb +210 -0
  56. data/lib/web/mime.rb +87 -0
  57. data/lib/web/phprb.rb +340 -0
  58. data/lib/web/resources/test/cookie.rb +33 -0
  59. data/lib/web/resources/test/counter.rb +20 -0
  60. data/lib/web/resources/test/multipart.rb +14 -0
  61. data/lib/web/resources/test/redirect.rb +8 -0
  62. data/lib/web/resources/test/stock.rb +33 -0
  63. data/lib/web/sapi/apache.rb +129 -0
  64. data/lib/web/sapi/fastcgi.rb +22 -0
  65. data/lib/web/sapi/install/apache.rb +180 -0
  66. data/lib/web/sapi/install/iis.rb +93 -0
  67. data/lib/web/sapi/install/macosx.rb +90 -0
  68. data/lib/web/sapi/webrick.rb +86 -0
  69. data/lib/web/session.rb +83 -0
  70. data/lib/web/shim/cgi.rb +129 -0
  71. data/lib/web/shim/rails.rb +175 -0
  72. data/lib/web/stringio.rb +78 -0
  73. data/lib/web/strscanparser.rb +24 -0
  74. data/lib/web/tagparser.rb +96 -0
  75. data/lib/web/testing.rb +666 -0
  76. data/lib/web/traceoutput.rb +75 -0
  77. data/lib/web/unit.rb +56 -0
  78. data/lib/web/upload.rb +59 -0
  79. data/lib/web/validate.rb +52 -0
  80. data/lib/web/wiki.rb +557 -0
  81. data/lib/web/wiki/linker.rb +72 -0
  82. data/lib/web/wiki/page.rb +201 -0
  83. data/lib/webunit.rb +27 -0
  84. data/lib/webunit/assert.rb +152 -0
  85. data/lib/webunit/converter.rb +154 -0
  86. data/lib/webunit/cookie.rb +118 -0
  87. data/lib/webunit/domwalker.rb +185 -0
  88. data/lib/webunit/exception.rb +14 -0
  89. data/lib/webunit/form.rb +116 -0
  90. data/lib/webunit/frame.rb +37 -0
  91. data/lib/webunit/htmlelem.rb +122 -0
  92. data/lib/webunit/image.rb +26 -0
  93. data/lib/webunit/jscript.rb +31 -0
  94. data/lib/webunit/link.rb +33 -0
  95. data/lib/webunit/params.rb +321 -0
  96. data/lib/webunit/parser.rb +229 -0
  97. data/lib/webunit/response.rb +464 -0
  98. data/lib/webunit/runtest.rb +41 -0
  99. data/lib/webunit/table.rb +148 -0
  100. data/lib/webunit/testcase.rb +45 -0
  101. data/lib/webunit/ui/cui/testrunner.rb +50 -0
  102. data/lib/webunit/utils.rb +68 -0
  103. data/lib/webunit/webunit.rb +28 -0
  104. data/test/dev/action.rb +83 -0
  105. data/test/dev/forms.rb +104 -0
  106. data/test/dev/forms2.rb +104 -0
  107. data/test/dev/parser.rb +17 -0
  108. data/test/dev/scripts/dump.rb +24 -0
  109. data/test/dev/scripts/makedist.rb +62 -0
  110. data/test/dev/scripts/uri.rb +41 -0
  111. data/test/dev/scripts/uri/common.rb +432 -0
  112. data/test/dev/scripts/uri/ftp.rb +149 -0
  113. data/test/dev/scripts/uri/generic.rb +1106 -0
  114. data/test/dev/scripts/uri/http.rb +76 -0
  115. data/test/dev/scripts/uri/https.rb +26 -0
  116. data/test/dev/scripts/uri/ldap.rb +238 -0
  117. data/test/dev/scripts/uri/mailto.rb +260 -0
  118. data/test/dev/scripts/urireg.rb +174 -0
  119. data/test/dev/simpledispatcher.rb +156 -0
  120. data/test/dev/test.action.rb +146 -0
  121. data/test/dev/test.formreader.rb +463 -0
  122. data/test/dev/test.simpledispatcher.rb +186 -0
  123. data/test/dev/webunit/conv/digit-0.rb +21 -0
  124. data/test/dev/webunit/conv/digit-1.rb +17 -0
  125. data/test/dev/webunit/conv/digit.rb +23 -0
  126. data/test/dev/webunit/conv/test_digit-0.rb +16 -0
  127. data/test/dev/webunit/conv/test_digit-1.rb +19 -0
  128. data/test/dev/webunit/conv/test_digit.rb +26 -0
  129. data/test/dev/webunit/conv/test_digit_view-0.rb +76 -0
  130. data/test/dev/webunit/conv/test_digit_view-1.rb +102 -0
  131. data/test/dev/webunit/conv/test_digit_view.rb +134 -0
  132. data/test/installation/htdocs/cgi_test.rb +296 -0
  133. data/test/installation/htdocs/test_install.rb +4 -0
  134. data/test/installation/runwebtest.rb +5 -0
  135. data/test/installation/test_cookie.rb +128 -0
  136. data/test/installation/test_form.rb +47 -0
  137. data/test/installation/test_multipart.rb +51 -0
  138. data/test/installation/test_request.rb +24 -0
  139. data/test/installation/test_response.rb +35 -0
  140. data/test/unit/htdocs/cookie.rb +32 -0
  141. data/test/unit/htdocs/multipart.rb +28 -0
  142. data/test/unit/htdocs/redirect.rb +12 -0
  143. data/test/unit/htdocs/simple.rb +13 -0
  144. data/test/unit/htdocs/stock.rb +33 -0
  145. data/test/unit/test_assert.rb +162 -0
  146. data/test/unit/test_cookie.rb +114 -0
  147. data/test/unit/test_domwalker.rb +77 -0
  148. data/test/unit/test_form.rb +42 -0
  149. data/test/unit/test_frame.rb +40 -0
  150. data/test/unit/test_htmlelem.rb +74 -0
  151. data/test/unit/test_image.rb +45 -0
  152. data/test/unit/test_jscript.rb +57 -0
  153. data/test/unit/test_link.rb +85 -0
  154. data/test/unit/test_multipart.rb +51 -0
  155. data/test/unit/test_params.rb +210 -0
  156. data/test/unit/test_parser.rb +53 -0
  157. data/test/unit/test_response.rb +150 -0
  158. data/test/unit/test_table.rb +70 -0
  159. data/test/unit/test_utils.rb +106 -0
  160. data/test/unit/test_webunit.rb +28 -0
  161. data/test/web/mod_ruby_stub.rb +39 -0
  162. data/test/web/test.assertinclude.rb +109 -0
  163. data/test/web/test.buffer.rb +182 -0
  164. data/test/web/test.code.loader.rb +78 -0
  165. data/test/web/test.config.rb +31 -0
  166. data/test/web/test.error.handling.rb +91 -0
  167. data/test/web/test.formreader-2.0.rb +352 -0
  168. data/test/web/test.load.rb +125 -0
  169. data/test/web/test.mime-type.rb +23 -0
  170. data/test/web/test.narf.cgi.rb +106 -0
  171. data/test/web/test.phprb.rb +239 -0
  172. data/test/web/test.request.rb +368 -0
  173. data/test/web/test.response.rb +637 -0
  174. data/test/web/test.ruby-web.rb +10 -0
  175. data/test/web/test.session.rb +50 -0
  176. data/test/web/test.shim.cgi.rb +96 -0
  177. data/test/web/test.tagparser.rb +65 -0
  178. data/test/web/test.template2.rb +297 -0
  179. data/test/web/test.testing2.rb +318 -0
  180. data/test/web/test.upload.rb +45 -0
  181. data/test/web/test.validate.rb +46 -0
  182. data/test/web/test.web.test.rb +495 -0
  183. data/test/wiki/test.history.rb +297 -0
  184. data/test/wiki/test.illustration_page.rb +287 -0
  185. data/test/wiki/test.linker.rb +197 -0
  186. data/test/wiki/test.tarpit.rb +56 -0
  187. data/test/wiki/test.wiki.rb +300 -0
  188. data/test/wikitestroot/admin.rb +7 -0
  189. data/test/wikitestroot/wiki.rb +6 -0
  190. metadata +234 -0
@@ -0,0 +1,113 @@
1
+ =begin
2
+
3
+ = HTML Repair Library
4
+
5
+ htmlrepair.rb
6
+
7
+ Version 1.0.1
8
+
9
+ Copyright (C) 2000 MoonWolf Development
10
+
11
+ MoonWolf <moonwolf-ruby@moonwolf.com>
12
+
13
+ ��ά���줿��λ�������䤦��
14
+
15
+ == �Ȥ���
16
+
17
+ obj = HTMLSplit.new(html)
18
+ obj.repair
19
+
20
+ =end
21
+
22
+ require "htmlsplit"
23
+
24
+ class HTMLSplit
25
+
26
+ PARENTTAG = {
27
+ 'p' => %w(body table),
28
+ 'a' => %w(body),
29
+ 'thead' => %w(table),
30
+ 'tfoot' => %w(table),
31
+ 'tbody' => %w(table),
32
+ 'tr' => %w(table thead tfoor tbody),
33
+ 'td' => %w(tr),
34
+ 'th' => %w(tr),
35
+ 'li' => %w(ol ul),
36
+ 'dt' => %w(dl),
37
+ 'dd' => %w(dl),
38
+ 'col' => %w(colgroup),
39
+ 'param' => %w(applet),
40
+ 'area' => %w(map),
41
+ 'input' => %w(form),
42
+ 'textarea' => %w(form),
43
+ 'button' => %w(form),
44
+ 'select' => %w(form),
45
+ 'keygen' => %w(form),
46
+ 'label' => %w(form),
47
+ 'fieldset' => %w(form),
48
+ 'legend' => %w(fieldset),
49
+ 'option' => %w(select),
50
+ }
51
+
52
+ def repair
53
+ tag = []
54
+ doc = []
55
+ @document.each {|e|
56
+ case e
57
+ when EmptyElementTag
58
+ doc.push e
59
+ when StartTag
60
+ if PARENTTAG[e.name] && (a = tag.rindex(e.name))
61
+ #�ͥ��Ȥ���λ�����ξ�ά�������å�
62
+ flag = true
63
+ tag[a..-1].each {|t|
64
+ if PARENTTAG[e.name].include?(t)
65
+ #����ʥͥ���
66
+ flag = false
67
+ break
68
+ end
69
+ }
70
+ if flag
71
+ #��ά���줿��λ���������
72
+ while t=tag.pop
73
+ c = EndTag.new(t)
74
+ doc.push c
75
+ if t==e.name
76
+ break
77
+ end
78
+ end
79
+ end
80
+ else
81
+ end
82
+ #
83
+ tag.push e.name
84
+ doc.push e
85
+ when EndTag
86
+ if tag.include?(e.name)
87
+ while t = tag.pop
88
+ if t==e.name
89
+ break
90
+ else
91
+ c = EndTag.new(t)
92
+ doc.push c
93
+ end
94
+ end
95
+ else
96
+ end
97
+ doc.push e
98
+ when CharacterData
99
+ doc.push e
100
+ when Declaration
101
+ doc.push e
102
+ when Comment
103
+ doc.push e
104
+ else
105
+ doc.push e
106
+ end
107
+ }
108
+ while t = tag.pop
109
+ doc.push EndTag.new(t)
110
+ end
111
+ @document = doc
112
+ end
113
+ end
@@ -0,0 +1,842 @@
1
+ =begin Start of Document
2
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
3
+ <html>
4
+ <head>
5
+ <title>htmlsplit.rb</title>
6
+ <link href="rubydoc.css" rel="stylesheet">
7
+ </head>
8
+ <body>
9
+ | <a href="./">���</a> |
10
+ <h1>HTML Split Library</h1>
11
+ <p> HTML���ɤ߽񤭤��롣 �ɤ߹����ʸ��ϥ�����ʸ���������ˤʤ롣 to_s�᥽�åɤ�HTML���᤹���Ȥ�����롣</p>
12
+ <h2>���饹����</h2>
13
+ <table bgcolor="#FFFFFF" border="1">
14
+ <tr><td><a href="#HTMLSplit">HTMLSplit</a><td>HTML�򥿥���ʸ���ǡ�����ʬ�䤹�롣
15
+ <tr>
16
+ <td><a href="#CharacterData">CharacterData</a>
17
+ <td>ʸ���ǡ���
18
+ <tr><td><a href="#EmptyElementTag">EmptyElementTag</a>
19
+ <td>�����ǤΥ���
20
+ <tr>
21
+ <td><a href="#StartTag">StartTag</a>
22
+ <td>���ϥ���
23
+ <tr>
24
+ <td><a href="#EndTag">EndTag</a>
25
+ <td>�����
26
+ <tr>
27
+ <td><a href="#Comment">Comment</a>
28
+ <td>������
29
+ <tr>
30
+ <td><a href="#Declaration">Declaration</a>
31
+ <td>���(DOCTYPE)
32
+ <tr>
33
+ <td><a href="#SSI">SSI</a>
34
+ <td>SSI��
35
+ <tr>
36
+ <td><a href="#ERuby">ERuby</a>
37
+ <td>eRuby/ASP/JSP������ץȢ�
38
+ <tr>
39
+ <td><a href="#PHP">PHP</a>
40
+ <td>PHP������ץȢ�
41
+ </table>
42
+ ��������°���ͤʤɤ������ޤ줿������ץȤ�ǧ���Ǥ��ޤ���
43
+ <h2>�Ȥ��� </h2>
44
+ <h3>�ɤ߹���</h3>
45
+ <pre class="Exception"><samp>#!/usr/bin/ruby
46
+ require "htmlsplit"
47
+
48
+ obj = HTMlSplit.new(ARGF.read)</samp></pre>
49
+ <h3>����</h3>
50
+ <pre>
51
+ obj.document.each {|e|
52
+ print e.to_s
53
+ }
54
+ </pre>
55
+ <h3>°��������</h3>
56
+ <pre>
57
+ img = Tag('img/')
58
+ img['src']='xxx.png' #&lt;img src="xxx.png"&gt;
59
+
60
+ o = Tag('option')
61
+ o['selected']=true #&lt;option selected&gt;
62
+ </pre>
63
+ =end
64
+
65
+ require "cgi"
66
+ require "kconv"
67
+
68
+ =begin EmptyElementTag
69
+
70
+ <h2><a name="EmptyElementTag">EmptyElementTag</a></h2>
71
+ �����ǤΥ���
72
+ <h3>���饹�᥽�å�</h3>
73
+ <dl compact>
74
+ <dt>new(<var class="String">name</var>[,<var class="Hash">attr</var>])
75
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾�� <var class="Hash">attr</var>�ϥ�����°��nil�ޤ���Hash
76
+ </dl>
77
+ <h3>�᥽�å�</h3>
78
+ <dl compact>
79
+ <dt class="String">name
80
+ <dd>����̾���֤���
81
+ <dt class="Hash">attr
82
+ <dd>°�����֤���
83
+ <dt class="String">to_s
84
+ <dd>HTML���֤���
85
+ <dt class="String">self[<var class="String">key</var>]
86
+ <dd>key�˴�Ϣ�Ť���줿°���ͤ��֤��ޤ���
87
+ �������륭������Ͽ����Ƥ��ʤ����ˤϡ�nil���֤��ޤ���
88
+ <dt class="String">self[<var class="String">key</var>]= <var class="String">value</var>
89
+ <dd><var class="String">key</var>���Ф���<var class="String">value</var>���Ϣ�Ť��ޤ���
90
+ <var class="String">value</var>��nil�λ���<var class="String">key</var>���Ф����Ϣ��������ޤ���
91
+ </dl>
92
+ =end
93
+ class EmptyElementTag
94
+ def initialize(name,attr=nil)
95
+ @name = name.downcase
96
+ @attr = attr
97
+ end
98
+ attr_accessor :name
99
+ attr_accessor :attr
100
+ def to_s
101
+ if @attr
102
+ "<"+@name+@attr.keys.sort.collect{|n|
103
+ v = @attr[n]
104
+ if v==true
105
+ ' ' + n
106
+ else
107
+ ' ' + n + '="' + CGI::escapeHTML(v) + '"'
108
+ end
109
+ }.to_s+">"
110
+ else
111
+ "<#{@name}>"
112
+ end
113
+ end
114
+ def [](key)
115
+ attr and attr[key]
116
+ end
117
+ def []=(key,value)
118
+ if attr
119
+ attr[key]=value
120
+ else
121
+ attr = value and {key=>value}
122
+ end
123
+ end
124
+ end
125
+ =begin StartTag
126
+ <h2>StartTag</h2>
127
+ ���ϥ���
128
+ <h3>���饹�᥽�å�</h3>
129
+ <dl compact>
130
+ <dt>new(<var class="String">name</var>[,<var class="Hash">attr</var>])
131
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾�� <var class="Hash">attr</var>�ϥ�����°��nil�ޤ���Hash
132
+ </dl>
133
+ <h3>�᥽�å�</h3>
134
+ <dl compact>
135
+ <dt class="String">name
136
+ <dd>����̾���֤���
137
+ <dt class="Hash">attr
138
+ <dd>°�����֤���
139
+ <dt class="String">to_s
140
+ <dd>HTML���֤���
141
+ <dt class="String">self[<var class="String">key</var>]
142
+ <dd>key�˴�Ϣ�Ť���줿°���ͤ��֤��ޤ���
143
+ �������륭������Ͽ����Ƥ��ʤ����ˤϡ�nil���֤��ޤ���
144
+ <dt class="String">self[<var class="String">key</var>]= <var class="String">value</var>
145
+ <dd><var class="String">key</var>���Ф���<var class="String">value</var>���Ϣ�Ť��ޤ���
146
+ <var class="String">value</var>��nil�λ���<var class="String">key</var>���Ф����Ϣ��������ޤ���
147
+ </dl>
148
+ =end
149
+ class StartTag
150
+ attr_accessor :name
151
+ attr_accessor :attr
152
+ def initialize(name,attr=nil)
153
+ @name = name.downcase
154
+ @attr = attr
155
+ end
156
+ def to_s
157
+ if @attr
158
+ "<"+@name+@attr.keys.sort.collect{|n|
159
+ v = @attr[n]
160
+ if v==true
161
+ ' ' + n
162
+ else
163
+ ' ' + n + '="' + CGI::escapeHTML(v) + '"'
164
+ end
165
+ }.to_s+">"
166
+ else
167
+ "<#{@name}>"
168
+ end
169
+ end
170
+ def [](key)
171
+ attr and attr[key]
172
+ end
173
+ def []=(key,value)
174
+ if attr
175
+ attr[key]=value
176
+ else
177
+ attr = value and {key=>value}
178
+ end
179
+ end
180
+ end
181
+ =begin EndTag
182
+ <h2>EndTag</h2>
183
+ �����
184
+ <h3>���饹�᥽�å�</h3>
185
+ <dl compact>
186
+ <dt>new(<var class="String">name</var>)
187
+ <dd>���������֥������Ȥ��������롣 <var class="String">name</var>�ϥ�����̾��
188
+ </dl>
189
+ <h3>�᥽�å�</h3>
190
+ <dl compact>
191
+ <dt class="String">name
192
+ <dd>����̾���֤���
193
+ <dt class="String">to_s
194
+ <dd>HTML���֤���
195
+ </dl>
196
+ =end
197
+ class EndTag
198
+ def initialize(name)
199
+ @name = name.downcase
200
+ end
201
+ attr_accessor :name
202
+ def to_s
203
+ "</#{@name}>"
204
+ end
205
+ end
206
+ =begin CharacterData
207
+ <h2>CharacterData</h2>
208
+ ʸ���ǡ���
209
+ <h3>���饹�᥽�å�</h3>
210
+ <dl compact>
211
+ <dt>new(<var class="String">text</var>)
212
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
213
+ </dl>
214
+ <h3>�᥽�å�</h3>
215
+ <dl compact>
216
+ <dt class="String">text
217
+ <dd>�ƥ����Ȥ��֤���
218
+ <dt class="String">to_s
219
+ <dd>HTML���֤���
220
+ </dl>
221
+ =end
222
+ class CharacterData
223
+ def initialize(text)
224
+ @text = text
225
+ end
226
+ attr_accessor :text
227
+ def to_s
228
+ @text
229
+ end
230
+ end
231
+ =begin Declaraion
232
+ <h2>Declaraion</h2>
233
+ SGML���
234
+ <h3>���饹�᥽�å�</h3>
235
+ <dl compact>
236
+ <dt>new(<var class="String">text</var>)
237
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
238
+ </dl>
239
+ <h3>�᥽�å�</h3>
240
+ <dl compact>
241
+ <dt class="String">text
242
+ <dd>�ƥ����Ȥ��֤���
243
+ <dt class="String">to_s
244
+ <dd>HTML���֤���
245
+ </dl>
246
+ =end
247
+ class Declaration
248
+ def initialize(text)
249
+ @text = text
250
+ end
251
+ attr_accessor :text
252
+ def to_s
253
+ "<!#{@text}>"
254
+ end
255
+ end
256
+ =begin Comment
257
+ <h2>Comment</h2>
258
+ ������
259
+ <h3>���饹�᥽�å�</h3>
260
+ <dl compact>
261
+ <dt>new(<var class="String">text</var>)
262
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
263
+ </dl>
264
+ <h3>�᥽�å�</h3>
265
+ <dl compact>
266
+ <dt class="String">text
267
+ <dd>�ƥ����Ȥ��֤���
268
+ <dt class="String">to_s
269
+ <dd>HTML���֤���
270
+ </dl>
271
+ =end
272
+ class Comment
273
+ def initialize(text)
274
+ @text = text
275
+ end
276
+ attr_accessor :text
277
+ def to_s
278
+ "<!--#{@text}-->"
279
+ end
280
+ end
281
+ =begin SSI
282
+ <h2>SSI</h2>
283
+ SSI
284
+ <h3>���饹�᥽�å�</h3>
285
+ <dl compact>
286
+ <dt>new(<var class="String">text</var>)
287
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
288
+ </dl>
289
+ <h3>�᥽�å�</h3>
290
+ <dl compact>
291
+ <dt class="String">text
292
+ <dd>�ƥ����Ȥ��֤���
293
+ <dt class="String">to_s
294
+ <dd>HTML���֤���
295
+ </dl>
296
+ =end
297
+ class SSI
298
+ def initialize(text)
299
+ @text = text
300
+ end
301
+ attr_accessor :text
302
+ def to_s
303
+ "<!--#{@text}-->"
304
+ end
305
+ end
306
+ =begin ERuby
307
+ <h2>ERuby</h2>
308
+ eRuby/ASP/JSP������ץ�
309
+ <h3>���饹�᥽�å�</h3>
310
+ <dl compact>
311
+ <dt>new(<var class="String">text</var>)
312
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
313
+ </dl>
314
+ <h3>�᥽�å�</h3>
315
+ <dl compact>
316
+ <dt class="String">text
317
+ <dd>�ƥ����Ȥ��֤���
318
+ <dt class="String">to_s
319
+ <dd>HTML���֤���
320
+ </dl>
321
+ =end
322
+ class ERuby
323
+ def initialize(text)
324
+ @text = text
325
+ end
326
+ attr_accessor :text
327
+ def to_s
328
+ "<%#{@text}%>"
329
+ end
330
+ end
331
+ =begin PHP
332
+ <h2>PHP</h2>
333
+ PHP������ץ�
334
+ <h3>���饹�᥽�å�</h3>
335
+ <dl compact>
336
+ <dt>new(<var class="String">text</var>)
337
+ <dd>���������֥������Ȥ��������롣 <var class="String">text</var>�ϥƥ�����
338
+ </dl>
339
+ <h3>�᥽�å�</h3>
340
+ <dl compact>
341
+ <dt class="String">text
342
+ <dd>�ƥ����Ȥ��֤���
343
+ <dt class="String">to_s
344
+ <dd>HTML���֤���
345
+ </dl>
346
+ =end
347
+ class PHP
348
+ attr_accessor :text
349
+ def initialize(text)
350
+ @text = text
351
+ end
352
+ def to_s
353
+ "<?#{@text}?>"
354
+ end
355
+ end
356
+ =begin HTMLSplit
357
+
358
+ <h2><a name="HTMLSplit">HTMLSplit</a></h2>
359
+ HTML�ɤ߽�
360
+ <h3>���饹�᥽�å�</h3>
361
+ <dl compact>
362
+ <dt>new(<var class="String">html</var>)
363
+ <dd>���������֥������Ȥ��������롣 <var class="String">html</var>��HTMLʸ��
364
+ </dl>
365
+ <h3>�᥽�å�</h3>
366
+ <dl compact>
367
+ <dt class="Array">document
368
+ <dd>�ɥ�����Ȥ�������֤���
369
+ <dt class="String">to_s
370
+ <dd>HTML���֤���
371
+ <dt class="Iterator">each {|<var>obj</var>,<var class="Array">tag</var>| ...}
372
+ <dd>�ɥ�����Ȥγƥ��֥�������(<var>obj</var>)���Ф��ƥ֥��å���ɾ�����ޤ���
373
+ <var class="Array">tag</var>�ϳ��ϥ����Υꥹ�ȡ� [ StartTag , <var class="Integer">����ǥ���</var>] ��
374
+ <dt class="Integer">index(<var class="Class">class</var>, <var class="Integer">start</var>, <var class="Integer">end</var>, <var>value</var>, <var class="Integer">count</var>) {|obj| ...}
375
+ <dd><var class="Integer">start</var>����<var class="Integer">end</var>�ޤǤ����Ǥ�<var class="Class">class</var>��������<var class="Integer">count</var>���ܤ����Ǥΰ��֤��֤��ޤ���
376
+ ���������Ǥ��ҤȤĤ�ʤ��ä����ˤ�nil���֤��ޤ���<br>
377
+ <var>value</var>��nil�ʳ����ͤ���ꤷ�����ˤ����Ǥ�<var>value</var>���������������å���Ԥ��ޤ���<var class="Class">class</var>��EmptyElementTag,StartTag,EndTag�λ��ϥ���̾������ʳ��ϥƥ����Ȥˤ�ä���Ӥ��ޤ���<br>
378
+ �֥��å�����ꤷ�ƸƤӽФ��줿���ˤϥ֥��å������Ǥ���������ɾ�����롣
379
+ <dt class="Integer">end_index(<var class="Integer">start</var>)
380
+ <dd><var class="Integer">start</var>���б�����EndTag�Υ���ǥ������֤��ޤ���
381
+ �б��������Ǥ��ʤ��ä����ˤ�nil���֤��ޤ���<br>
382
+ </dl>
383
+ =end
384
+ class HTMLSplit
385
+ EMPTY = %w(area base basefont bgsound br col frame hr img input isindex
386
+ keygen link meta nextid param spacer wbr)
387
+ def initialize(html)
388
+ @document = [] #�ѡ�������HTML�Υꥹ��
389
+ name = ''
390
+ text = ''
391
+ attr = {}
392
+ attrname = ''
393
+ state = :TEXT
394
+ #
395
+ html.each_byte {|c|
396
+ char = c.chr
397
+ case state
398
+ when :TEXT
399
+ if c==60
400
+ if text.length>0
401
+ @document << CharacterData.new(text)
402
+ end
403
+ name = ''
404
+ attr={}
405
+ state = :TAGNAME
406
+ else
407
+ text << char
408
+ end
409
+ when :TAGNAME
410
+ case char
411
+ when '>'
412
+ name.downcase!
413
+ if EMPTY.include?(name)
414
+ @document << EmptyElementTag.new(name,nil)
415
+ elsif name[-1,1]=='/'
416
+ @document << StartTag.new(name[0..-2],nil)
417
+ @document << EndTag.new(name[0..-2])
418
+ else
419
+ if name[0,1]=='/'
420
+ @document << EndTag.new(name[1..-1])
421
+ else
422
+ @document << StartTag.new(name,nil)
423
+ end
424
+ end
425
+ text = ''
426
+ state = :TEXT
427
+ when '!'
428
+ text = ''
429
+ state = :DECLARE
430
+ when '%'
431
+ text = ''
432
+ state = :ERUBY
433
+ when '?'
434
+ text = ''
435
+ state = :PHP
436
+ when /\s/
437
+ text=''
438
+ state = :SPACE
439
+ else
440
+ name << char
441
+ end
442
+ when :SPACE #°���֤ζ���
443
+ case char
444
+ when '>'
445
+ name.downcase!
446
+ if EMPTY.include?(name)
447
+ @document << EmptyElementTag.new(name,attr)
448
+ else
449
+ if name[0,1]=='/'
450
+ @document << EndTag.new(name[1..-1])
451
+ else
452
+ @document << StartTag.new(name,attr)
453
+ end
454
+ end
455
+ text = ''
456
+ state = :TEXT
457
+ when /\s/
458
+ else
459
+ attrname=char
460
+ state = :ATTRNAME
461
+ end
462
+ when :ATTRNAME #°��̾
463
+ case char
464
+ when /\s/
465
+ state = :BEFOREEQUAL
466
+ when '='
467
+ state = :AFTEREQUAL
468
+ when '>'
469
+ attr[attrname.downcase]=true
470
+ name.downcase!
471
+ if EMPTY.include?(name)
472
+ @document << EmptyElementTag.new(name,attr)
473
+ elsif attrname=='/'
474
+ attr.delete('/')
475
+ @document << StartTag.new(name,attr)
476
+ @document << EndTag.new(name)
477
+ else
478
+ if name[0,1]=='/'
479
+ @document << EndTag.new(name[1..-1])
480
+ else
481
+ @document << StartTag.new(name,attr)
482
+ end
483
+ end
484
+ text = ''
485
+ state = :TEXT
486
+ else
487
+ attrname << char
488
+ end
489
+ when :BEFOREEQUAL #=
490
+ case char
491
+ when '='
492
+ state = :AFTEREQUAL
493
+ when '>'
494
+ attr[attrname.downcase]=true
495
+ name.downcase!
496
+ if EMPTY.include?(name)
497
+ @document << EmptyElementTag.new(name,attr)
498
+ else
499
+ if name[0,1]=='/'
500
+ @document << EndTag.new(name[1..-1])
501
+ else
502
+ @document << StartTag.new(name,attr)
503
+ end
504
+ end
505
+ text = ''
506
+ state = :TEXT
507
+ when /\s/
508
+ else
509
+ attr[attrname.downcase]=true
510
+ attrname = char
511
+ state = :ATTRNAME
512
+ end
513
+ when :AFTEREQUAL #=
514
+ case char
515
+ when "'"
516
+ text=''
517
+ state = :SQVALUE
518
+ when '"'
519
+ text=''
520
+ state = :DQVALUE
521
+ when '>'
522
+ attr[attrname.downcase]=true
523
+ name.downcase!
524
+ if EMPTY.include?(name)
525
+ @document << EmptyElementTag.new(name,attr)
526
+ else
527
+ if name[0,1]=='/'
528
+ @document << EndTag.new(name[1..-1])
529
+ else
530
+ @document << StartTag.new(name,attr)
531
+ end
532
+ end
533
+ text = ''
534
+ state = :TEXT
535
+ when /\s/
536
+ else
537
+ text=char
538
+ state = :VALUE
539
+ end
540
+ when :VALUE #��
541
+ case char
542
+ when /\s/
543
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
544
+ state = :SPACE
545
+ when '>'
546
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
547
+ name.downcase!
548
+ if EMPTY.include?(name)
549
+ @document << EmptyElementTag.new(name,attr)
550
+ else
551
+ if name[0,1]=='/'
552
+ @document << EndTag.new(name[1..-1])
553
+ else
554
+ @document << StartTag.new(name,attr)
555
+ end
556
+ end
557
+ text = ''
558
+ state = :TEXT
559
+ else
560
+ text << char
561
+ end
562
+ when :SQVALUE #'��'
563
+ if c==39
564
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
565
+ state = :SPACE
566
+ else
567
+ text << char
568
+ end
569
+ when :DQVALUE #"��"
570
+ if c==34
571
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
572
+ state = :SPACE
573
+ else
574
+ text << char
575
+ end
576
+ when :COMMENT
577
+ case char
578
+ when '>'
579
+ if text[-2,2]=='--' #�����Ƚ�λ
580
+ text = text[0..-3]
581
+ if text=~/^#[a-z]+/ #SSI
582
+ @document << SSI.new(text)
583
+ else
584
+ @document << Comment.new(text)
585
+ end
586
+ text = ''
587
+ state = :TEXT
588
+ else
589
+ text << char
590
+ end
591
+ else
592
+ text << char
593
+ end
594
+ when :ERUBY
595
+ case char
596
+ when '>'
597
+ if text[-1,1]=='%' #eRuby��λ
598
+ text = text[0..-2]
599
+ @document << ERuby.new(text)
600
+ text = ''
601
+ state = :TEXT
602
+ else
603
+ text << char
604
+ end
605
+ else
606
+ text << char
607
+ end
608
+ when :PHP
609
+ case char
610
+ when '>'
611
+ if text[-1,1]=='?' #eRuby��λ
612
+ text = text[0..-2]
613
+ @document << PHP.new(text)
614
+ text = ''
615
+ state = :TEXT
616
+ else
617
+ text << char
618
+ end
619
+ else
620
+ text << char
621
+ end
622
+ when :DECLARE
623
+ case char
624
+ when '>'
625
+ @document << Declaration.new(text)
626
+ text = ''
627
+ state = :TEXT
628
+ else
629
+ text << char
630
+ if text=='--'
631
+ text = ''
632
+ state = :COMMENT
633
+ end
634
+ end
635
+ end
636
+ }
637
+ #EOF���
638
+ case state
639
+ when :TEXT
640
+ @document << CharacterData.new(text) if text.length>0
641
+ when :TAGNAME
642
+ @document << CharacterData.new('<'+text)
643
+ when :SPACE #°���֤ζ���
644
+ name.downcase!
645
+ if EMPTY.include?(name)
646
+ @document << EmptyElementTag.new(name,attr)
647
+ else
648
+ if name[0,1]=='/'
649
+ @document << EndTag.new(name[1..-1])
650
+ else
651
+ @document << StartTag.new(name,attr)
652
+ end
653
+ end
654
+ when :ATTRNAME #°��̾
655
+ attr[attrname.downcase]=true
656
+ name.downcase!
657
+ if EMPTY.include?(name)
658
+ @document << EmptyElementTag.new(name,attr)
659
+ else
660
+ if name[0,1]=='/'
661
+ @document << EndTag.new(name[1..-1])
662
+ else
663
+ @document << StartTag.new(name,attr)
664
+ end
665
+ end
666
+ when :BEFOREEQUAL #=
667
+ attr[attrname.downcase]=true
668
+ name.downcase!
669
+ if EMPTY.include?(name)
670
+ @document << EmptyElementTag.new(name,attr)
671
+ else
672
+ if name[0,1]=='/'
673
+ @document << EndTag.new(name[1..-1])
674
+ else
675
+ @document << StartTag.new(name,attr)
676
+ end
677
+ end
678
+ when :AFTEREQUAL #=
679
+ attr[attrname.downcase]=true
680
+ name.downcase!
681
+ if EMPTY.include?(name)
682
+ @document << EmptyElementTag.new(name,attr)
683
+ else
684
+ if name[0,1]=='/'
685
+ @document << EndTag.new(name[1..-1])
686
+ else
687
+ @document << StartTag.new(name,attr)
688
+ end
689
+ end
690
+ when :VALUE #��
691
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
692
+ name.downcase!
693
+ if EMPTY.include?(name)
694
+ @document << EmptyElementTag.new(name,attr)
695
+ else
696
+ if name[0,1]=='/'
697
+ @document << EndTag.new(name[1..-1])
698
+ else
699
+ @document << StartTag.new(name,attr)
700
+ end
701
+ end
702
+ when :SQVALUE #'��'
703
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
704
+ when :DQVALUE #"��"
705
+ attr[attrname.downcase]=CGI::unescapeHTML(text)
706
+ when :COMMENT
707
+ if text=~/^#[a-zA-Z]+/ #SSI
708
+ @document << SSI.new(text)
709
+ else
710
+ @document << Comment.new(text)
711
+ end
712
+ when :ERUBY
713
+ @document << ERuby.new(text)
714
+ when :PHP
715
+ @document << PHP.new(text)
716
+ when :DECLARE
717
+ @document << Declaration.new(text)
718
+ end
719
+ end
720
+ #
721
+ attr_accessor :document
722
+ #
723
+ def to_s
724
+ s = ''
725
+ @document.each {|e|
726
+ s<<(e.to_s)
727
+ }
728
+ s
729
+ end
730
+ #
731
+ def each
732
+ tag = []
733
+ i = 0
734
+ @document.each {|e|
735
+ case e
736
+ when StartTag
737
+ tag.push [e,i]
738
+ when EndTag
739
+ idx = nil
740
+ (tag.size-1).downto(0) {|j|
741
+ if tag[j][0].name==e.name
742
+ idx = j
743
+ break
744
+ end
745
+ }
746
+ #
747
+ if idx
748
+ if idx==0
749
+ tag = []
750
+ else
751
+ tag = tag[0..idx-1]
752
+ end
753
+ end
754
+ else
755
+ end
756
+ yield e,tag
757
+ i += 1
758
+ }
759
+ end
760
+ #
761
+ def index(_class,_start=0,_end=-1,value=nil,count=1)
762
+ idx=_start
763
+ found=false
764
+ @document[_start.._end].each {|obj|
765
+ if obj.type==_class
766
+ if value
767
+ case obj
768
+ when StartTag,EmptyElementTag,EndTag
769
+ if value===obj.name
770
+ if (not iterator?) or yield(obj)
771
+ if (count-=1)<=0
772
+ found = true
773
+ break
774
+ end
775
+ end
776
+ end
777
+ else
778
+ if value===obj.text
779
+ if (not iterator?) or yield(obj)
780
+ if (count-=1)<=0
781
+ found = true
782
+ break
783
+ end
784
+ end
785
+ end
786
+ end
787
+ else
788
+ if (not iterator?) or yield(obj)
789
+ if (count-=1)<=0
790
+ found = true
791
+ break
792
+ end
793
+ end
794
+ end
795
+ end
796
+ idx+=1
797
+ }
798
+ if found
799
+ idx
800
+ else
801
+ nil
802
+ end
803
+ end
804
+ #
805
+ def end_index(start_index)
806
+ tag = []
807
+ end_index = nil
808
+ (start_index...@document.size).each {|idx|
809
+ e= @document[idx]
810
+ case e
811
+ when StartTag
812
+ tag.push [e,idx]
813
+ when EndTag
814
+ i = nil
815
+ (tag.size-1).downto(0) {|j|
816
+ if tag[j][0].name==e.name
817
+ i = j
818
+ break
819
+ end
820
+ }
821
+ #
822
+ if i
823
+ if i==0
824
+ tag = []
825
+ else
826
+ tag = tag[0..i-1]
827
+ end
828
+ end
829
+ if tag.size==0
830
+ end_index = idx
831
+ break
832
+ end
833
+ else
834
+ end
835
+ }
836
+ end_index
837
+ end
838
+ end
839
+ =begin End of Document
840
+ </body>
841
+ </html>
842
+ =end