html5 0.1.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. data/History.txt +9 -2
  2. data/Manifest.txt +61 -2
  3. data/README +41 -5
  4. data/Rakefile.rb +22 -6
  5. data/{parse.rb → bin/html5} +11 -11
  6. data/lib/core_ext/string.rb +17 -0
  7. data/lib/html5/constants.rb +228 -0
  8. data/lib/html5/filters/iso639codes.rb +752 -0
  9. data/lib/html5/filters/rfc2046.rb +30 -0
  10. data/lib/html5/filters/rfc3987.rb +89 -0
  11. data/lib/html5/filters/validator.rb +830 -0
  12. data/lib/html5/html5parser.rb +25 -25
  13. data/lib/html5/html5parser/after_body_phase.rb +3 -3
  14. data/lib/html5/html5parser/after_frameset_phase.rb +3 -4
  15. data/lib/html5/html5parser/after_head_phase.rb +6 -6
  16. data/lib/html5/html5parser/before_head_phase.rb +1 -1
  17. data/lib/html5/html5parser/in_body_phase.rb +54 -48
  18. data/lib/html5/html5parser/in_caption_phase.rb +7 -6
  19. data/lib/html5/html5parser/in_cell_phase.rb +3 -3
  20. data/lib/html5/html5parser/in_column_group_phase.rb +1 -1
  21. data/lib/html5/html5parser/in_frameset_phase.rb +5 -5
  22. data/lib/html5/html5parser/in_head_phase.rb +10 -10
  23. data/lib/html5/html5parser/in_row_phase.rb +4 -2
  24. data/lib/html5/html5parser/in_select_phase.rb +7 -6
  25. data/lib/html5/html5parser/in_table_body_phase.rb +8 -5
  26. data/lib/html5/html5parser/in_table_phase.rb +12 -7
  27. data/lib/html5/html5parser/initial_phase.rb +5 -6
  28. data/lib/html5/html5parser/phase.rb +5 -9
  29. data/lib/html5/html5parser/root_element_phase.rb +1 -2
  30. data/lib/html5/html5parser/trailing_end_phase.rb +3 -3
  31. data/lib/html5/inputstream.rb +25 -31
  32. data/lib/html5/liberalxmlparser.rb +2 -2
  33. data/lib/html5/sanitizer.rb +6 -6
  34. data/lib/html5/serializer/htmlserializer.rb +2 -3
  35. data/lib/html5/sniffer.rb +45 -0
  36. data/lib/html5/tokenizer.rb +57 -59
  37. data/lib/html5/treebuilders/rexml.rb +7 -6
  38. data/lib/html5/treebuilders/simpletree.rb +1 -1
  39. data/lib/html5/treewalkers/base.rb +8 -0
  40. data/lib/html5/version.rb +3 -0
  41. data/testdata/encoding/chardet/test_big5.txt +51 -0
  42. data/testdata/encoding/test-yahoo-jp.dat +10 -0
  43. data/testdata/encoding/tests1.dat +394 -0
  44. data/testdata/encoding/tests2.dat +81 -0
  45. data/testdata/sanitizer/tests1.dat +416 -0
  46. data/testdata/serializer/core.test +104 -0
  47. data/testdata/serializer/injectmeta.test +65 -0
  48. data/testdata/serializer/optionaltags.test +900 -0
  49. data/testdata/serializer/options.test +60 -0
  50. data/testdata/serializer/whitespace.test +51 -0
  51. data/testdata/sites/google-results.htm +1 -0
  52. data/testdata/sites/python-ref-import.htm +1 -0
  53. data/testdata/sites/web-apps-old.htm +1 -0
  54. data/testdata/sites/web-apps.htm +34275 -0
  55. data/testdata/sniffer/htmlOrFeed.json +43 -0
  56. data/testdata/tokenizer/contentModelFlags.test +48 -0
  57. data/testdata/tokenizer/entities.test +2339 -0
  58. data/testdata/tokenizer/escapeFlag.test +21 -0
  59. data/testdata/tokenizer/test1.test +172 -0
  60. data/testdata/tokenizer/test2.test +129 -0
  61. data/testdata/tokenizer/test3.test +367 -0
  62. data/testdata/tokenizer/test4.test +198 -0
  63. data/testdata/tree-construction/tests1.dat +1950 -0
  64. data/testdata/tree-construction/tests2.dat +773 -0
  65. data/testdata/tree-construction/tests3.dat +270 -0
  66. data/testdata/tree-construction/tests4.dat +60 -0
  67. data/testdata/tree-construction/tests5.dat +175 -0
  68. data/testdata/tree-construction/tests6.dat +196 -0
  69. data/testdata/validator/attributes.test +1035 -0
  70. data/testdata/validator/base-href-attribute.test +787 -0
  71. data/testdata/validator/base-target-attribute.test +35 -0
  72. data/testdata/validator/blockquote-cite-attribute.test +7 -0
  73. data/testdata/validator/classattribute.test +152 -0
  74. data/testdata/validator/contenteditableattribute.test +59 -0
  75. data/testdata/validator/contextmenuattribute.test +115 -0
  76. data/testdata/validator/dirattribute.test +59 -0
  77. data/testdata/validator/draggableattribute.test +63 -0
  78. data/testdata/validator/html-xmlns-attribute.test +23 -0
  79. data/testdata/validator/idattribute.test +115 -0
  80. data/testdata/validator/inputattributes.test +2795 -0
  81. data/testdata/validator/irrelevantattribute.test +63 -0
  82. data/testdata/validator/langattribute.test +5579 -0
  83. data/testdata/validator/li-value-attribute.test +7 -0
  84. data/testdata/validator/link-href-attribute.test +7 -0
  85. data/testdata/validator/link-hreflang-attribute.test +7 -0
  86. data/testdata/validator/link-rel-attribute.test +271 -0
  87. data/testdata/validator/ol-start-attribute.test +7 -0
  88. data/testdata/validator/starttags.test +375 -0
  89. data/testdata/validator/style-scoped-attribute.test +7 -0
  90. data/testdata/validator/tabindexattribute.test +79 -0
  91. data/tests/preamble.rb +7 -17
  92. data/tests/test_encoding.rb +1 -1
  93. data/tests/test_lxp.rb +16 -0
  94. data/tests/test_parser.rb +2 -2
  95. data/tests/test_sniffer.rb +27 -0
  96. data/tests/test_treewalkers.rb +41 -22
  97. data/tests/test_validator.rb +31 -0
  98. metadata +65 -6
@@ -0,0 +1,81 @@
1
+ #data
2
+ <meta
3
+ #encoding
4
+ windows-1252
5
+
6
+ #data
7
+ <
8
+ #encoding
9
+ windows-1252
10
+
11
+ #data
12
+ <!
13
+ #encoding
14
+ windows-1252
15
+
16
+ #data
17
+ <meta charset = "
18
+ #encoding
19
+ windows-1252
20
+
21
+ #data
22
+ <meta charset=EUC-jp
23
+ #encoding
24
+ windows-1252
25
+
26
+ #data
27
+ <meta <meta charset='EUC-jp'>
28
+ #encoding
29
+ EUC-jp
30
+
31
+ #data
32
+ <meta charset = 'EUC-jp'>
33
+ #encoding
34
+ EUC-jp
35
+
36
+ #data
37
+ <!-- -->
38
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
39
+ #encoding
40
+ utf-8
41
+
42
+ #data
43
+ <!-- -->
44
+ <meta http-equiv="Content-Type" content="text/html; charset=utf
45
+ #encoding
46
+ windows-1252
47
+
48
+ #data
49
+ <meta http-equiv="Content-Type<meta charset="utf-8">
50
+ #encoding
51
+ windows-1252
52
+
53
+ #data
54
+ <meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
55
+ #encoding
56
+ utf-8
57
+
58
+ #data
59
+ <meta http-equiv="Content-Type" content="text/html; charset='utf-8">
60
+ #encoding
61
+ windows-1252
62
+
63
+ #data
64
+ <meta
65
+ #encoding
66
+ windows-1252
67
+
68
+ #data
69
+ <meta charset =
70
+ #encoding
71
+ windows-1252
72
+
73
+ #data
74
+ <meta charset= utf-8
75
+ #encoding
76
+ windows-1252
77
+
78
+ #data
79
+ <meta content = "text/html;
80
+ #encoding
81
+ windows-1252
@@ -0,0 +1,416 @@
1
+ [
2
+ {
3
+ "name": "IE_Comments",
4
+ "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
5
+ "output": ""
6
+ },
7
+
8
+ {
9
+ "name": "IE_Comments_2",
10
+ "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
11
+ "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
12
+ "rexml": "Ill-formed XHTML!"
13
+ },
14
+
15
+ {
16
+ "name": "allow_colons_in_path_component",
17
+ "input": "<a href=\"./this:that\">foo</a>",
18
+ "output": "<a href='./this:that'>foo</a>"
19
+ },
20
+
21
+ {
22
+ "name": "background_attribute",
23
+ "input": "<div background=\"javascript:alert('XSS')\"></div>",
24
+ "output": "<div/>",
25
+ "xhtml": "<div></div>",
26
+ "rexml": "<div></div>"
27
+ },
28
+
29
+ {
30
+ "name": "bgsound",
31
+ "input": "<bgsound src=\"javascript:alert('XSS');\" />",
32
+ "output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
33
+ "rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
34
+ },
35
+
36
+ {
37
+ "name": "div_background_image_unicode_encoded",
38
+ "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
39
+ "output": "<div style=''>foo</div>"
40
+ },
41
+
42
+ {
43
+ "name": "div_expression",
44
+ "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
45
+ "output": "<div style=''>foo</div>"
46
+ },
47
+
48
+ {
49
+ "name": "double_open_angle_brackets",
50
+ "input": "<img src=http://ha.ckers.org/scriptlet.html <",
51
+ "output": "<img src='http://ha.ckers.org/scriptlet.html'/>",
52
+ "rexml": "Ill-formed XHTML!"
53
+ },
54
+
55
+ {
56
+ "name": "double_open_angle_brackets_2",
57
+ "input": "<script src=http://ha.ckers.org/scriptlet.html <",
58
+ "output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
59
+ "rexml": "Ill-formed XHTML!"
60
+ },
61
+
62
+ {
63
+ "name": "grave_accents",
64
+ "input": "<img src=`javascript:alert('XSS')` />",
65
+ "output": "<img/>",
66
+ "rexml": "Ill-formed XHTML!"
67
+ },
68
+
69
+ {
70
+ "name": "img_dynsrc_lowsrc",
71
+ "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
72
+ "output": "<img/>",
73
+ "rexml": "<img />"
74
+ },
75
+
76
+ {
77
+ "name": "img_vbscript",
78
+ "input": "<img src='vbscript:msgbox(\"XSS\")' />",
79
+ "output": "<img/>",
80
+ "rexml": "<img />"
81
+ },
82
+
83
+ {
84
+ "name": "input_image",
85
+ "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
86
+ "output": "<input type='image'/>",
87
+ "rexml": "<input type='image' />"
88
+ },
89
+
90
+ {
91
+ "name": "link_stylesheets",
92
+ "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
93
+ "output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;",
94
+ "rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
95
+ },
96
+
97
+ {
98
+ "name": "link_stylesheets_2",
99
+ "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
100
+ "output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;",
101
+ "rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
102
+ },
103
+
104
+ {
105
+ "name": "list_style_image",
106
+ "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
107
+ "output": "<li style=''>foo</li>"
108
+ },
109
+
110
+ {
111
+ "name": "no_closing_script_tags",
112
+ "input": "<script src=http://ha.ckers.org/xss.js?<b>",
113
+ "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
114
+ "rexml": "Ill-formed XHTML!"
115
+ },
116
+
117
+ {
118
+ "name": "non_alpha_non_digit",
119
+ "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
120
+ "output": "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
121
+ "rexml": "Ill-formed XHTML!"
122
+ },
123
+
124
+ {
125
+ "name": "non_alpha_non_digit_2",
126
+ "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
127
+ "output": "<a>foo</a>",
128
+ "rexml": "Ill-formed XHTML!"
129
+ },
130
+
131
+ {
132
+ "name": "non_alpha_non_digit_3",
133
+ "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
134
+ "output": "<img src='http://ha.ckers.org/xss.js'/>",
135
+ "rexml": "Ill-formed XHTML!"
136
+ },
137
+
138
+ {
139
+ "name": "non_alpha_non_digit_II",
140
+ "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
141
+ "output": "<a>foo</a>",
142
+ "rexml": "Ill-formed XHTML!"
143
+ },
144
+
145
+ {
146
+ "name": "non_alpha_non_digit_III",
147
+ "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
148
+ "output": "<a>foo</a>",
149
+ "rexml": "Ill-formed XHTML!"
150
+ },
151
+
152
+ {
153
+ "name": "platypus",
154
+ "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
155
+ "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
156
+ },
157
+
158
+ {
159
+ "name": "protocol_resolution_in_script_tag",
160
+ "input": "<script src=//ha.ckers.org/.j></script>",
161
+ "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
162
+ "rexml": "Ill-formed XHTML!"
163
+ },
164
+
165
+ {
166
+ "name": "should_allow_anchors",
167
+ "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
168
+ "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
169
+ },
170
+
171
+ {
172
+ "name": "should_allow_image_alt_attribute",
173
+ "input": "<img alt='foo' onclick='bar' />",
174
+ "output": "<img alt='foo'/>",
175
+ "rexml": "<img alt='foo' />"
176
+ },
177
+
178
+ {
179
+ "name": "should_allow_image_height_attribute",
180
+ "input": "<img height='foo' onclick='bar' />",
181
+ "output": "<img height='foo'/>",
182
+ "rexml": "<img height='foo' />"
183
+ },
184
+
185
+ {
186
+ "name": "should_allow_image_src_attribute",
187
+ "input": "<img src='foo' onclick='bar' />",
188
+ "output": "<img src='foo'/>",
189
+ "rexml": "<img src='foo' />"
190
+ },
191
+
192
+ {
193
+ "name": "should_allow_image_width_attribute",
194
+ "input": "<img width='foo' onclick='bar' />",
195
+ "output": "<img width='foo'/>",
196
+ "rexml": "<img width='foo' />"
197
+ },
198
+
199
+ {
200
+ "name": "should_handle_blank_text",
201
+ "input": "",
202
+ "output": ""
203
+ },
204
+
205
+ {
206
+ "name": "should_handle_malformed_image_tags",
207
+ "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
208
+ "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
209
+ "rexml": "Ill-formed XHTML!"
210
+ },
211
+
212
+ {
213
+ "name": "should_handle_non_html",
214
+ "input": "abc",
215
+ "output": "abc"
216
+ },
217
+
218
+ {
219
+ "name": "should_not_fall_for_ridiculous_hack",
220
+ "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
221
+ "output": "<img/>",
222
+ "rexml": "<img />"
223
+ },
224
+
225
+ {
226
+ "name": "should_not_fall_for_xss_image_hack_0",
227
+ "input": "<img src=\"javascript:alert('XSS');\" />",
228
+ "output": "<img/>",
229
+ "rexml": "<img />"
230
+ },
231
+
232
+ {
233
+ "name": "should_not_fall_for_xss_image_hack_1",
234
+ "input": "<img src=javascript:alert('XSS') />",
235
+ "output": "<img/>",
236
+ "rexml": "Ill-formed XHTML!"
237
+ },
238
+
239
+ {
240
+ "name": "should_not_fall_for_xss_image_hack_10",
241
+ "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
242
+ "output": "<img/>",
243
+ "rexml": "<img />"
244
+ },
245
+
246
+ {
247
+ "name": "should_not_fall_for_xss_image_hack_11",
248
+ "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
249
+ "output": "<img/>",
250
+ "rexml": "<img />"
251
+ },
252
+
253
+ {
254
+ "name": "should_not_fall_for_xss_image_hack_12",
255
+ "input": "<img src=\" &#14; javascript:alert('XSS');\" />",
256
+ "output": "<img/>",
257
+ "rexml": "<img />"
258
+ },
259
+
260
+ {
261
+ "name": "should_not_fall_for_xss_image_hack_13",
262
+ "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
263
+ "output": "<img/>",
264
+ "rexml": "<img />"
265
+ },
266
+
267
+ {
268
+ "name": "should_not_fall_for_xss_image_hack_14",
269
+ "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
270
+ "output": "<img/>",
271
+ "rexml": "<img />"
272
+ },
273
+
274
+ {
275
+ "name": "should_not_fall_for_xss_image_hack_2",
276
+ "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
277
+ "output": "<img/>",
278
+ "rexml": "<img />"
279
+ },
280
+
281
+ {
282
+ "name": "should_not_fall_for_xss_image_hack_3",
283
+ "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
284
+ "output": "<img/>",
285
+ "rexml": "<img />"
286
+ },
287
+
288
+ {
289
+ "name": "should_not_fall_for_xss_image_hack_4",
290
+ "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
291
+ "output": "<img/>",
292
+ "rexml": "<img />"
293
+ },
294
+
295
+ {
296
+ "name": "should_not_fall_for_xss_image_hack_5",
297
+ "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
298
+ "output": "<img/>",
299
+ "rexml": "<img />"
300
+ },
301
+
302
+ {
303
+ "name": "should_not_fall_for_xss_image_hack_6",
304
+ "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
305
+ "output": "<img/>",
306
+ "rexml": "<img />"
307
+ },
308
+
309
+ {
310
+ "name": "should_not_fall_for_xss_image_hack_7",
311
+ "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
312
+ "output": "<img/>",
313
+ "rexml": "<img />"
314
+ },
315
+
316
+ {
317
+ "name": "should_not_fall_for_xss_image_hack_8",
318
+ "input": "<img src=\"jav\tascript:alert('XSS');\" />",
319
+ "output": "<img/>",
320
+ "rexml": "<img />"
321
+ },
322
+
323
+ {
324
+ "name": "should_not_fall_for_xss_image_hack_9",
325
+ "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
326
+ "output": "<img/>",
327
+ "rexml": "<img />"
328
+ },
329
+
330
+ {
331
+ "name": "should_sanitize_half_open_scripts",
332
+ "input": "<img src=\"javascript:alert('XSS')\"",
333
+ "output": "<img/>",
334
+ "rexml": "Ill-formed XHTML!"
335
+ },
336
+
337
+ {
338
+ "name": "should_sanitize_invalid_script_tag",
339
+ "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
340
+ "output": "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
341
+ "rexml": "Ill-formed XHTML!"
342
+ },
343
+
344
+ {
345
+ "name": "should_sanitize_script_tag_with_multiple_open_brackets",
346
+ "input": "<<script>alert(\"XSS\");//<</script>",
347
+ "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
348
+ "rexml": "Ill-formed XHTML!"
349
+ },
350
+
351
+ {
352
+ "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
353
+ "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
354
+ "output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
355
+ "rexml": "Ill-formed XHTML!"
356
+ },
357
+
358
+ {
359
+ "name": "should_sanitize_tag_broken_up_by_null",
360
+ "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
361
+ "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
362
+ "rexml": "Ill-formed XHTML!"
363
+ },
364
+
365
+ {
366
+ "name": "should_sanitize_unclosed_script",
367
+ "input": "<script src=http://ha.ckers.org/xss.js?<b>",
368
+ "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
369
+ "rexml": "Ill-formed XHTML!"
370
+ },
371
+
372
+ {
373
+ "name": "should_strip_href_attribute_in_a_with_bad_protocols",
374
+ "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
375
+ "output": "<a title='1'>boo</a>"
376
+ },
377
+
378
+ {
379
+ "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
380
+ "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
381
+ "output": "<a title='1'>boo</a>"
382
+ },
383
+
384
+ {
385
+ "name": "should_strip_src_attribute_in_img_with_bad_protocols",
386
+ "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
387
+ "output": "<img title='1'/>boo",
388
+ "rexml": "<img title='1' />"
389
+ },
390
+
391
+ {
392
+ "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
393
+ "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
394
+ "output": "<img title='1'/>boo",
395
+ "rexml": "<img title='1' />"
396
+ },
397
+
398
+ {
399
+ "name": "xml_base",
400
+ "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
401
+ "output": "<div>foo</div>"
402
+ },
403
+
404
+ {
405
+ "name": "xul",
406
+ "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
407
+ "output": "<p style=''>fubar</p>"
408
+ },
409
+
410
+ {
411
+ "name": "quotes_in_attributes",
412
+ "input": "<img src='foo' title='\"foo\" bar' />",
413
+ "rexml": "<img src='foo' title='\"foo\" bar' />",
414
+ "output": "<img title='&quot;foo&quot; bar' src='foo'/>"
415
+ }
416
+ ]