whitewash 2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +676 -0
- data/ChangeLog.mtn +64 -0
- data/README.rdoc +50 -0
- data/data/whitewash/html5_whitelist.yaml +392 -0
- data/data/whitewash/whitelist.yaml +214 -0
- data/lib/whitewash.rb +129 -0
- data/setup.rb +1360 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/whitewash_spec.rb +99 -0
- data/whitewash.gemspec +18 -0
- metadata +78 -0
data/ChangeLog.mtn
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
-----------------------------------------------------------------
|
2
|
+
Revision: 43067aa79f57dfb92165fd94da151d45f56ed87c
|
3
|
+
Ancestor: 3a0dff129b6a5408055b65dd804f3831d8d16f15
|
4
|
+
Author: angdraug@debian.org
|
5
|
+
Date: 2009-08-22T12:36:05
|
6
|
+
Branch: whitewash-head
|
7
|
+
|
8
|
+
Modified files:
|
9
|
+
lib/whitewash.rb
|
10
|
+
|
11
|
+
ChangeLog:
|
12
|
+
|
13
|
+
wrap global variables handling in Thread.exclusive
|
14
|
+
|
15
|
+
-----------------------------------------------------------------
|
16
|
+
Revision: 3a0dff129b6a5408055b65dd804f3831d8d16f15
|
17
|
+
Ancestor: b544d3c9fe594e3862cf518ae51421e1f3816cc9
|
18
|
+
Author: angdraug@debian.org
|
19
|
+
Date: 2009-08-13T11:01:26
|
20
|
+
Branch: whitewash-head
|
21
|
+
|
22
|
+
Modified files:
|
23
|
+
README.rdoc lib/whitewash.rb
|
24
|
+
|
25
|
+
ChangeLog:
|
26
|
+
|
27
|
+
made it easier to override tidypath
|
28
|
+
|
29
|
+
* made default_whitelist a public class method so that it can be used
|
30
|
+
in Whitewash.new() invokation
|
31
|
+
* documented the way Whitewash looks for Tidy
|
32
|
+
|
33
|
+
-----------------------------------------------------------------
|
34
|
+
Revision: b544d3c9fe594e3862cf518ae51421e1f3816cc9
|
35
|
+
Ancestor: c6398a8b6433921353ec5b0a1cf616804a550961
|
36
|
+
Author: angdraug@debian.org
|
37
|
+
Date: 2009-07-28T11:25:34
|
38
|
+
Branch: whitewash-head
|
39
|
+
|
40
|
+
Modified files:
|
41
|
+
lib/whitewash.rb
|
42
|
+
|
43
|
+
ChangeLog:
|
44
|
+
|
45
|
+
require rbconfig for access to Config::CONFIG
|
46
|
+
|
47
|
+
-----------------------------------------------------------------
|
48
|
+
Revision: c6398a8b6433921353ec5b0a1cf616804a550961
|
49
|
+
Ancestor:
|
50
|
+
Author: angdraug@debian.org
|
51
|
+
Date: 2009-07-27T16:21:20
|
52
|
+
Branch: whitewash-head
|
53
|
+
|
54
|
+
Added files:
|
55
|
+
COPYING README.rdoc data/whitewash/whitelist.yaml
|
56
|
+
lib/whitewash.rb lib/whitewash_rexml_attribute_patch.rb
|
57
|
+
setup.rb
|
58
|
+
Added directories:
|
59
|
+
. data data/whitewash lib
|
60
|
+
|
61
|
+
ChangeLog:
|
62
|
+
|
63
|
+
initial checkin: Whitewash is a spin-off of sanitize.rb from Samizdat project
|
64
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
= Whitewash - whitelist-based HTML filter for Ruby
|
2
|
+
|
3
|
+
This module allows Ruby programs to clean up any HTML document or
|
4
|
+
fragment coming from an untrusted source and to remove all dangerous
|
5
|
+
constructs that could be used for cross-site scripting or request
|
6
|
+
forgery.
|
7
|
+
|
8
|
+
|
9
|
+
== Synopsys
|
10
|
+
|
11
|
+
require 'whitewash'
|
12
|
+
|
13
|
+
whitewash = Whitewash.new # use default whitelist.yaml
|
14
|
+
clean_xhtml = whitewash.sanitize(html)
|
15
|
+
|
16
|
+
|
17
|
+
== Summary
|
18
|
+
|
19
|
+
All HTML tags, attribute names and values, and CSS properties are
|
20
|
+
filtered through a whitelist that defines which names and what kinds of
|
21
|
+
values are allowed, everything that doesn't match the whitelist is
|
22
|
+
removed.
|
23
|
+
|
24
|
+
The whitelist is provided externally, default whitelist is loaded from
|
25
|
+
the whitelist.yaml shipped with Whitewash. The default is most strict
|
26
|
+
(for example, it does not allow cross-site links to images in IMG tags)
|
27
|
+
and can be considered safe for all uses. If you find that it lets
|
28
|
+
anything exploitable through, please report it as a bug to Whitewash
|
29
|
+
developers.
|
30
|
+
|
31
|
+
An alternative whitelist is provided for HTML5 in html5_whitelist.yaml.
|
32
|
+
This whitelist is as much a work in progress as HTML5 standard itself,
|
33
|
+
use at your own risk.
|
34
|
+
|
35
|
+
|
36
|
+
== External Dependencies
|
37
|
+
|
38
|
+
Whitewash relies on Nokogiri to parse arbitrary HTML and put it back
|
39
|
+
together as valid XHTML:
|
40
|
+
|
41
|
+
* http://nokogiri.org/
|
42
|
+
|
43
|
+
|
44
|
+
== Copying
|
45
|
+
|
46
|
+
Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
|
47
|
+
|
48
|
+
This program is free software.
|
49
|
+
You can distribute/modify this program under the terms of the GNU
|
50
|
+
General Public License version 3 or later.
|
@@ -0,0 +1,392 @@
|
|
1
|
+
---
|
2
|
+
# html5_whitelist.yaml
|
3
|
+
#
|
4
|
+
# Allowed HTML5 tags and attributes.
|
5
|
+
# HTML5 is still under development, and this file
|
6
|
+
# definition is actual for 2011, October.
|
7
|
+
|
8
|
+
# _common defines attributes that can be present in any tag
|
9
|
+
_common:
|
10
|
+
accesskey: !ruby/regexp /\A[:alnum:]\z/
|
11
|
+
class: &name !ruby/regexp /\A[a-z0-9 .:_-]+\z/i
|
12
|
+
contenteditable: !ruby/regexp /\Ztrue|false|inherit\z/
|
13
|
+
contextmenu: *name
|
14
|
+
dir: !ruby/regexp /\Altr|rtl|auto\z/
|
15
|
+
draggable: !ruby/regexp /\Atrue|false|auto\z/
|
16
|
+
dropzone: !ruby/regexp /\Acopy|move|link\z/
|
17
|
+
hidden: "hidden"
|
18
|
+
id: *name
|
19
|
+
lang: &lang !ruby/regexp /\A[a-z]+(-[a-z]*)?\z/i
|
20
|
+
spellcheck: !ruby/regexp /\Atrue|false\z/
|
21
|
+
style: &cdata !ruby/regexp /\A[^'"]*\z/
|
22
|
+
tabindex: &number !ruby/regexp /\A[0-9]+\z/
|
23
|
+
title: *cdata
|
24
|
+
|
25
|
+
# _css lists CSS properties allowed inside "style" attribute
|
26
|
+
_css: [ animation, animation-name, animation-duration,
|
27
|
+
animation-timing-function, animation-delay, animation-iteration-count,
|
28
|
+
animation-direction, animation-play-state, background, background-attachment,
|
29
|
+
background-color, background-image, background-position, background-repeat,
|
30
|
+
background-clip, background-origin, background-size, border, border-bottom,
|
31
|
+
border-bottom-color, border-bottom-style, border-bottom-width, border-color,
|
32
|
+
border-left, border-left-color, border-left-style, border-left-width,
|
33
|
+
border-right, border-right-color, border-right-style, border-right-width,
|
34
|
+
border-style, border-top, border-top-color, border-top-style, border-top-width,
|
35
|
+
border-width, outline, outline-color, outline-style, outline-width,
|
36
|
+
border-bottom-left-radius, border-bottom-right-radius, border-image,
|
37
|
+
border-image-outset, border-image-repeat, border-image-slice,
|
38
|
+
border-image-source, border-image-width, border-radius, border-top-left-radius,
|
39
|
+
border-top-right-radius, box-decoration-break, box-shadow, overflow-x,
|
40
|
+
overflow-y, overflow-style, rotation, rotation-point, color-profile, opacity,
|
41
|
+
rendering-intent, bookmark-label, bookmark-level, bookmark-target,
|
42
|
+
float-offset, hyphenate-after, hyphenate-before, hyphenate-character,
|
43
|
+
hyphenate-lines, hyphenate-resource, hyphens, image-resolution, marks,
|
44
|
+
string-set, height, max-height, max-width, min-height, min-width, width,
|
45
|
+
box-align, box-direction, box-flex, box-flex-group, box-lines,
|
46
|
+
box-ordinal-group, box-orient, box-pack, font, font-family, font-size,
|
47
|
+
font-style, font-variant, font-weight, font-size-adjust, font-stretch, content,
|
48
|
+
counter-increment, counter-reset, quotes, crop, move-to, page-policy,
|
49
|
+
grid-columns, grid-rows, target, target-name, target-new, target-position,
|
50
|
+
alignment-adjust, alignment-baseline, baseline-shift, dominant-baseline,
|
51
|
+
drop-initial-after-adjust, drop-initial-after-align,
|
52
|
+
drop-initial-before-adjust, drop-initial-before-align, drop-initial-size,
|
53
|
+
drop-initial-value, inline-box-align, line-stacking, line-stacking-ruby,
|
54
|
+
line-stacking-shift, line-stacking-strategy, text-height, list-style,
|
55
|
+
list-style-image, list-style-position, list-style-type, margin, margin-bottom,
|
56
|
+
margin-left, margin-right, margin-top, marquee-direction, marquee-play-count,
|
57
|
+
marquee-speed, marquee-style, column-count, column-fill, column-gap,
|
58
|
+
column-rule, column-rule-color, column-rule-style, column-rule-width,
|
59
|
+
column-span, column-width, columns, padding, padding-bottom, padding-left,
|
60
|
+
padding-right, padding-top, fit, fit-position, image-orientation, page, size,
|
61
|
+
bottom, clear, clip, cursor, display, float, left, overflow, position, right,
|
62
|
+
top, visibility, z-index, orphans, page-break-after, page-break-before,
|
63
|
+
page-break-inside, widows, ruby-align, ruby-overhang, ruby-position, ruby-span,
|
64
|
+
mark, mark-after, mark-before, phonemes, rest, rest-after, rest-before,
|
65
|
+
voice-balance, voice-duration, voice-pitch, voice-pitch-range, voice-rate,
|
66
|
+
voice-stress, voice-volume, border-collapse, border-spacing, caption-side,
|
67
|
+
empty-cells, table-layout, color, direction, letter-spacing, line-height,
|
68
|
+
text-align, text-decoration, text-indent, text-transform, unicode-bidi,
|
69
|
+
vertical-align, white-space, word-spacing, hanging-punctuation,
|
70
|
+
punctuation-trim, text-align-last, text-justify, text-outline, text-overflow,
|
71
|
+
text-shadow, text-wrap, word-break, word-wrap, transform, transform-origin,
|
72
|
+
transform-style, perspective, perspective-origin, backface-visibility,
|
73
|
+
transition, transition-property, transition-duration,
|
74
|
+
transition-timing-function, transition-delay, appearance, box-sizing, icon,
|
75
|
+
nav-down, nav-index, nav-left, nav-right, nav-up, outline-offset, resize ]
|
76
|
+
|
77
|
+
a:
|
78
|
+
href: &uri !ruby/regexp /\A((http|https|ftp|mailto):[^"\s]+|[^:\s]+)\z/i
|
79
|
+
hreflang: *lang
|
80
|
+
media: &keyword !ruby/regexp /\A[a-z ]+\z/i
|
81
|
+
rel: *keyword
|
82
|
+
target: *keyword
|
83
|
+
type: &type !ruby/regexp /\A[ ,a-z0-9\/+-]+\z/i
|
84
|
+
abbr:
|
85
|
+
address:
|
86
|
+
area:
|
87
|
+
alt: *cdata
|
88
|
+
coords: !ruby/regexp /\A[0-9,]+\z/i
|
89
|
+
href: *uri
|
90
|
+
media: *keyword
|
91
|
+
rel: *keyword
|
92
|
+
target: *keyword
|
93
|
+
type: *type
|
94
|
+
article:
|
95
|
+
aside:
|
96
|
+
audio:
|
97
|
+
autoplay: "autoplay"
|
98
|
+
controls: "controls"
|
99
|
+
loop: "loop"
|
100
|
+
preload: *keyword
|
101
|
+
src: *uri
|
102
|
+
b:
|
103
|
+
base:
|
104
|
+
href: *uri
|
105
|
+
target: *keyword
|
106
|
+
bdi:
|
107
|
+
bdo:
|
108
|
+
dir: *keyword
|
109
|
+
blockquote:
|
110
|
+
cite: *uri
|
111
|
+
body:
|
112
|
+
br:
|
113
|
+
button:
|
114
|
+
autofocus: "autofocus"
|
115
|
+
disabled: "disabled"
|
116
|
+
form: *name
|
117
|
+
formaction: *uri
|
118
|
+
formenctype: *type
|
119
|
+
formmethod: &method !ruby/regexp /\Aget|post\z/i
|
120
|
+
formnovalidate: "formnovalidate"
|
121
|
+
formtarget: *keyword
|
122
|
+
name: *cdata
|
123
|
+
type: *keyword
|
124
|
+
value: *cdata
|
125
|
+
canvas:
|
126
|
+
width: &length !ruby/regexp /\A[0-9]+(%|px)?\z/
|
127
|
+
height: *length
|
128
|
+
caption:
|
129
|
+
cite:
|
130
|
+
code:
|
131
|
+
col:
|
132
|
+
span: *number
|
133
|
+
colgroup:
|
134
|
+
span: *number
|
135
|
+
command:
|
136
|
+
checked: "checked"
|
137
|
+
disabled: "disabled"
|
138
|
+
icon: *uri
|
139
|
+
label: *cdata
|
140
|
+
radiogroup: *name
|
141
|
+
type: *keyword
|
142
|
+
datalist:
|
143
|
+
dd:
|
144
|
+
del:
|
145
|
+
cite: *uri
|
146
|
+
datetime: &datetime !ruby/regexp /\A\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[A-Z]+\z/i
|
147
|
+
details:
|
148
|
+
open: "open"
|
149
|
+
dfn:
|
150
|
+
div:
|
151
|
+
dl:
|
152
|
+
dt:
|
153
|
+
em:
|
154
|
+
embed:
|
155
|
+
height: *length
|
156
|
+
src: *uri
|
157
|
+
type: *type
|
158
|
+
width: *length
|
159
|
+
fieldset:
|
160
|
+
disabled: "disabled"
|
161
|
+
form: *name
|
162
|
+
name: *cdata
|
163
|
+
figcaption:
|
164
|
+
figure:
|
165
|
+
footer:
|
166
|
+
form:
|
167
|
+
action: *uri
|
168
|
+
"accept-charset": *cdata
|
169
|
+
autocomplete : &onoff !ruby/regexp /\Aon|off\z/
|
170
|
+
enctype: *type
|
171
|
+
method: *method
|
172
|
+
name: *cdata
|
173
|
+
novalidate: "novalidate"
|
174
|
+
target: *keyword
|
175
|
+
h1:
|
176
|
+
h2:
|
177
|
+
h3:
|
178
|
+
h4:
|
179
|
+
h5:
|
180
|
+
h6:
|
181
|
+
head:
|
182
|
+
header:
|
183
|
+
hgroup:
|
184
|
+
hr:
|
185
|
+
html:
|
186
|
+
manifest: *uri
|
187
|
+
xmlns: "http://www.w3.org/1999/xhtml"
|
188
|
+
i:
|
189
|
+
iframe:
|
190
|
+
height: *length
|
191
|
+
name: *name
|
192
|
+
sandbox: *keyword
|
193
|
+
seamless: "seamless"
|
194
|
+
src: *uri
|
195
|
+
srcdoc: *cdata
|
196
|
+
width: *length
|
197
|
+
img:
|
198
|
+
alt: *cdata
|
199
|
+
src: *uri
|
200
|
+
height: *length
|
201
|
+
usemap: &usemap !ruby/regexp /\A#[a-z0-9 .:_-]+\z/i
|
202
|
+
ismap: "ismap"
|
203
|
+
width: *length
|
204
|
+
input:
|
205
|
+
accept: *type
|
206
|
+
alt: *cdata
|
207
|
+
autocomplete: *onoff
|
208
|
+
autofocus: "autofocus"
|
209
|
+
checked: "checked"
|
210
|
+
disabled: "disabled"
|
211
|
+
form: *name
|
212
|
+
formaction: *uri
|
213
|
+
formenctype: *type
|
214
|
+
formmethod: *method
|
215
|
+
formnovalidate: "formnovalidate"
|
216
|
+
formtarget: *keyword
|
217
|
+
height: *length
|
218
|
+
list: *name
|
219
|
+
max: *number # date
|
220
|
+
multiple: "multiple"
|
221
|
+
name: *cdata
|
222
|
+
pattern: *cdata
|
223
|
+
placeholder: *cdata
|
224
|
+
readonly: "readonly"
|
225
|
+
required: "required"
|
226
|
+
size: *number
|
227
|
+
src: *uri
|
228
|
+
step: *number
|
229
|
+
type: *keyword
|
230
|
+
value: *cdata
|
231
|
+
width: *length
|
232
|
+
ins:
|
233
|
+
cite: *uri
|
234
|
+
datetime: *datetime
|
235
|
+
keygen:
|
236
|
+
autofocus: "disabled"
|
237
|
+
challenge: "challenge"
|
238
|
+
disabled: "disabled"
|
239
|
+
form: *name
|
240
|
+
keytype: *keyword
|
241
|
+
name: *name
|
242
|
+
kbd:
|
243
|
+
label:
|
244
|
+
for: *name
|
245
|
+
form: *name
|
246
|
+
legend:
|
247
|
+
li:
|
248
|
+
value: *cdata
|
249
|
+
link:
|
250
|
+
href: *uri
|
251
|
+
hreflang: *lang
|
252
|
+
media: *keyword
|
253
|
+
rel: *keyword
|
254
|
+
sizes: &sizes !ruby/regexp /\A[0-9]+x[0-9]+|any\z/
|
255
|
+
target: *keyword
|
256
|
+
type: *type
|
257
|
+
map:
|
258
|
+
name: *name
|
259
|
+
mark:
|
260
|
+
menu:
|
261
|
+
label: *cdata
|
262
|
+
type: *keyword
|
263
|
+
meta:
|
264
|
+
charset: *cdata
|
265
|
+
name: *name
|
266
|
+
"http-equiv": *name
|
267
|
+
content: *cdata
|
268
|
+
meter:
|
269
|
+
form: *name
|
270
|
+
high: &float !ruby/regexp /\A[0-9]+(\.[0-9]+)?\z/
|
271
|
+
low: *float
|
272
|
+
max: *float
|
273
|
+
min: *float
|
274
|
+
optimun: *float
|
275
|
+
value: *float
|
276
|
+
nav:
|
277
|
+
noscript:
|
278
|
+
object:
|
279
|
+
data: *uri
|
280
|
+
form: *name
|
281
|
+
height: *number
|
282
|
+
hspace: *number
|
283
|
+
name: *name
|
284
|
+
standby: *cdata
|
285
|
+
type: *type
|
286
|
+
usemap: *usemap
|
287
|
+
width: *number
|
288
|
+
ol:
|
289
|
+
reversed: "reversed"
|
290
|
+
start: *number
|
291
|
+
type: &listtype !ruby/regexp /\A[1AaIi]\z/
|
292
|
+
optgroup:
|
293
|
+
label: *cdata
|
294
|
+
disabled: "disabled"
|
295
|
+
option:
|
296
|
+
label: *cdata
|
297
|
+
value: *cdata
|
298
|
+
selected: "selected"
|
299
|
+
disabled: "disabled"
|
300
|
+
output:
|
301
|
+
for: *name
|
302
|
+
form: *name
|
303
|
+
name: *name
|
304
|
+
p:
|
305
|
+
param:
|
306
|
+
name: *name
|
307
|
+
value: *cdata
|
308
|
+
pre:
|
309
|
+
progress:
|
310
|
+
max: *float
|
311
|
+
value: *float
|
312
|
+
q:
|
313
|
+
cite: *uri
|
314
|
+
rp:
|
315
|
+
rt:
|
316
|
+
ruby:
|
317
|
+
s:
|
318
|
+
samp:
|
319
|
+
script:
|
320
|
+
async: "async"
|
321
|
+
defer: "defer"
|
322
|
+
type: *type
|
323
|
+
charset: *cdata
|
324
|
+
src: *uri
|
325
|
+
section:
|
326
|
+
select:
|
327
|
+
autofocus: "autofocus"
|
328
|
+
disabled: "disabled"
|
329
|
+
form: *name
|
330
|
+
multiple: "multiple"
|
331
|
+
name: *name
|
332
|
+
size: *number
|
333
|
+
small:
|
334
|
+
source:
|
335
|
+
media: *keyword
|
336
|
+
src: *uri
|
337
|
+
type: *type
|
338
|
+
span:
|
339
|
+
strong:
|
340
|
+
style:
|
341
|
+
type: "text/css"
|
342
|
+
media: *keyword
|
343
|
+
scoped: "scoped"
|
344
|
+
sub:
|
345
|
+
summary:
|
346
|
+
sup:
|
347
|
+
table:
|
348
|
+
border: !ruby/regexp /\A1?\z/
|
349
|
+
tbody:
|
350
|
+
td:
|
351
|
+
colspan: *number
|
352
|
+
headers: &idrefs !ruby/regexp /\A[a-z0-9 ,#.:_-]+\z/i
|
353
|
+
rowspan: *number
|
354
|
+
textarea:
|
355
|
+
autofocus: "autofocus"
|
356
|
+
cols: *number
|
357
|
+
disabled: "disabled"
|
358
|
+
dirname: *name
|
359
|
+
form: *name
|
360
|
+
maxlenght: *number
|
361
|
+
name: *name
|
362
|
+
placeholder: *cdata
|
363
|
+
readonly: "readonly"
|
364
|
+
required: "required"
|
365
|
+
rows: *number
|
366
|
+
wrap: &wrap !ruby/regexp /\Ahard|soft\z/i
|
367
|
+
tfoot:
|
368
|
+
th:
|
369
|
+
colspan: *number
|
370
|
+
headers: *idrefs
|
371
|
+
rowspan: *number
|
372
|
+
scope: *keyword
|
373
|
+
thead:
|
374
|
+
time:
|
375
|
+
datetime: *datetime
|
376
|
+
pubdate: "pubdate"
|
377
|
+
title:
|
378
|
+
tr:
|
379
|
+
u:
|
380
|
+
ul:
|
381
|
+
var:
|
382
|
+
video:
|
383
|
+
audio: "muted"
|
384
|
+
autoplay: "autoplay"
|
385
|
+
controls: "controls"
|
386
|
+
height: *number
|
387
|
+
loop: "loop"
|
388
|
+
poster: *uri
|
389
|
+
preload: !ruby/regexp /\Aauto|metadata|none\z/
|
390
|
+
src: *uri
|
391
|
+
width: *number
|
392
|
+
wbr:
|