whitewash 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +676 -0
- data/ChangeLog.mtn +64 -0
- data/README.rdoc +50 -0
- data/data/whitewash/html5_whitelist.yaml +392 -0
- data/data/whitewash/whitelist.yaml +214 -0
- data/lib/whitewash.rb +129 -0
- data/setup.rb +1360 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/whitewash_spec.rb +99 -0
- data/whitewash.gemspec +18 -0
- metadata +78 -0
data/ChangeLog.mtn
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
-----------------------------------------------------------------
|
2
|
+
Revision: 43067aa79f57dfb92165fd94da151d45f56ed87c
|
3
|
+
Ancestor: 3a0dff129b6a5408055b65dd804f3831d8d16f15
|
4
|
+
Author: angdraug@debian.org
|
5
|
+
Date: 2009-08-22T12:36:05
|
6
|
+
Branch: whitewash-head
|
7
|
+
|
8
|
+
Modified files:
|
9
|
+
lib/whitewash.rb
|
10
|
+
|
11
|
+
ChangeLog:
|
12
|
+
|
13
|
+
wrap global variables handling in Thread.exclusive
|
14
|
+
|
15
|
+
-----------------------------------------------------------------
|
16
|
+
Revision: 3a0dff129b6a5408055b65dd804f3831d8d16f15
|
17
|
+
Ancestor: b544d3c9fe594e3862cf518ae51421e1f3816cc9
|
18
|
+
Author: angdraug@debian.org
|
19
|
+
Date: 2009-08-13T11:01:26
|
20
|
+
Branch: whitewash-head
|
21
|
+
|
22
|
+
Modified files:
|
23
|
+
README.rdoc lib/whitewash.rb
|
24
|
+
|
25
|
+
ChangeLog:
|
26
|
+
|
27
|
+
made it easier to override tidypath
|
28
|
+
|
29
|
+
* made default_whitelist a public class method so that it can be used
|
30
|
+
in Whitewash.new() invokation
|
31
|
+
* documented the way Whitewash looks for Tidy
|
32
|
+
|
33
|
+
-----------------------------------------------------------------
|
34
|
+
Revision: b544d3c9fe594e3862cf518ae51421e1f3816cc9
|
35
|
+
Ancestor: c6398a8b6433921353ec5b0a1cf616804a550961
|
36
|
+
Author: angdraug@debian.org
|
37
|
+
Date: 2009-07-28T11:25:34
|
38
|
+
Branch: whitewash-head
|
39
|
+
|
40
|
+
Modified files:
|
41
|
+
lib/whitewash.rb
|
42
|
+
|
43
|
+
ChangeLog:
|
44
|
+
|
45
|
+
require rbconfig for access to Config::CONFIG
|
46
|
+
|
47
|
+
-----------------------------------------------------------------
|
48
|
+
Revision: c6398a8b6433921353ec5b0a1cf616804a550961
|
49
|
+
Ancestor:
|
50
|
+
Author: angdraug@debian.org
|
51
|
+
Date: 2009-07-27T16:21:20
|
52
|
+
Branch: whitewash-head
|
53
|
+
|
54
|
+
Added files:
|
55
|
+
COPYING README.rdoc data/whitewash/whitelist.yaml
|
56
|
+
lib/whitewash.rb lib/whitewash_rexml_attribute_patch.rb
|
57
|
+
setup.rb
|
58
|
+
Added directories:
|
59
|
+
. data data/whitewash lib
|
60
|
+
|
61
|
+
ChangeLog:
|
62
|
+
|
63
|
+
initial checkin: Whitewash is a spin-off of sanitize.rb from Samizdat project
|
64
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
= Whitewash - whitelist-based HTML filter for Ruby
|
2
|
+
|
3
|
+
This module allows Ruby programs to clean up any HTML document or
|
4
|
+
fragment coming from an untrusted source and to remove all dangerous
|
5
|
+
constructs that could be used for cross-site scripting or request
|
6
|
+
forgery.
|
7
|
+
|
8
|
+
|
9
|
+
== Synopsys
|
10
|
+
|
11
|
+
require 'whitewash'
|
12
|
+
|
13
|
+
whitewash = Whitewash.new # use default whitelist.yaml
|
14
|
+
clean_xhtml = whitewash.sanitize(html)
|
15
|
+
|
16
|
+
|
17
|
+
== Summary
|
18
|
+
|
19
|
+
All HTML tags, attribute names and values, and CSS properties are
|
20
|
+
filtered through a whitelist that defines which names and what kinds of
|
21
|
+
values are allowed, everything that doesn't match the whitelist is
|
22
|
+
removed.
|
23
|
+
|
24
|
+
The whitelist is provided externally, default whitelist is loaded from
|
25
|
+
the whitelist.yaml shipped with Whitewash. The default is most strict
|
26
|
+
(for example, it does not allow cross-site links to images in IMG tags)
|
27
|
+
and can be considered safe for all uses. If you find that it lets
|
28
|
+
anything exploitable through, please report it as a bug to Whitewash
|
29
|
+
developers.
|
30
|
+
|
31
|
+
An alternative whitelist is provided for HTML5 in html5_whitelist.yaml.
|
32
|
+
This whitelist is as much a work in progress as HTML5 standard itself,
|
33
|
+
use at your own risk.
|
34
|
+
|
35
|
+
|
36
|
+
== External Dependencies
|
37
|
+
|
38
|
+
Whitewash relies on Nokogiri to parse arbitrary HTML and put it back
|
39
|
+
together as valid XHTML:
|
40
|
+
|
41
|
+
* http://nokogiri.org/
|
42
|
+
|
43
|
+
|
44
|
+
== Copying
|
45
|
+
|
46
|
+
Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
|
47
|
+
|
48
|
+
This program is free software.
|
49
|
+
You can distribute/modify this program under the terms of the GNU
|
50
|
+
General Public License version 3 or later.
|
@@ -0,0 +1,392 @@
|
|
1
|
+
---
|
2
|
+
# html5_whitelist.yaml
|
3
|
+
#
|
4
|
+
# Allowed HTML5 tags and attributes.
|
5
|
+
# HTML5 is still under development, and this file
|
6
|
+
# definition is actual for 2011, October.
|
7
|
+
|
8
|
+
# _common defines attributes that can be present in any tag
|
9
|
+
_common:
|
10
|
+
accesskey: !ruby/regexp /\A[:alnum:]\z/
|
11
|
+
class: &name !ruby/regexp /\A[a-z0-9 .:_-]+\z/i
|
12
|
+
contenteditable: !ruby/regexp /\Ztrue|false|inherit\z/
|
13
|
+
contextmenu: *name
|
14
|
+
dir: !ruby/regexp /\Altr|rtl|auto\z/
|
15
|
+
draggable: !ruby/regexp /\Atrue|false|auto\z/
|
16
|
+
dropzone: !ruby/regexp /\Acopy|move|link\z/
|
17
|
+
hidden: "hidden"
|
18
|
+
id: *name
|
19
|
+
lang: &lang !ruby/regexp /\A[a-z]+(-[a-z]*)?\z/i
|
20
|
+
spellcheck: !ruby/regexp /\Atrue|false\z/
|
21
|
+
style: &cdata !ruby/regexp /\A[^'"]*\z/
|
22
|
+
tabindex: &number !ruby/regexp /\A[0-9]+\z/
|
23
|
+
title: *cdata
|
24
|
+
|
25
|
+
# _css lists CSS properties allowed inside "style" attribute
|
26
|
+
_css: [ animation, animation-name, animation-duration,
|
27
|
+
animation-timing-function, animation-delay, animation-iteration-count,
|
28
|
+
animation-direction, animation-play-state, background, background-attachment,
|
29
|
+
background-color, background-image, background-position, background-repeat,
|
30
|
+
background-clip, background-origin, background-size, border, border-bottom,
|
31
|
+
border-bottom-color, border-bottom-style, border-bottom-width, border-color,
|
32
|
+
border-left, border-left-color, border-left-style, border-left-width,
|
33
|
+
border-right, border-right-color, border-right-style, border-right-width,
|
34
|
+
border-style, border-top, border-top-color, border-top-style, border-top-width,
|
35
|
+
border-width, outline, outline-color, outline-style, outline-width,
|
36
|
+
border-bottom-left-radius, border-bottom-right-radius, border-image,
|
37
|
+
border-image-outset, border-image-repeat, border-image-slice,
|
38
|
+
border-image-source, border-image-width, border-radius, border-top-left-radius,
|
39
|
+
border-top-right-radius, box-decoration-break, box-shadow, overflow-x,
|
40
|
+
overflow-y, overflow-style, rotation, rotation-point, color-profile, opacity,
|
41
|
+
rendering-intent, bookmark-label, bookmark-level, bookmark-target,
|
42
|
+
float-offset, hyphenate-after, hyphenate-before, hyphenate-character,
|
43
|
+
hyphenate-lines, hyphenate-resource, hyphens, image-resolution, marks,
|
44
|
+
string-set, height, max-height, max-width, min-height, min-width, width,
|
45
|
+
box-align, box-direction, box-flex, box-flex-group, box-lines,
|
46
|
+
box-ordinal-group, box-orient, box-pack, font, font-family, font-size,
|
47
|
+
font-style, font-variant, font-weight, font-size-adjust, font-stretch, content,
|
48
|
+
counter-increment, counter-reset, quotes, crop, move-to, page-policy,
|
49
|
+
grid-columns, grid-rows, target, target-name, target-new, target-position,
|
50
|
+
alignment-adjust, alignment-baseline, baseline-shift, dominant-baseline,
|
51
|
+
drop-initial-after-adjust, drop-initial-after-align,
|
52
|
+
drop-initial-before-adjust, drop-initial-before-align, drop-initial-size,
|
53
|
+
drop-initial-value, inline-box-align, line-stacking, line-stacking-ruby,
|
54
|
+
line-stacking-shift, line-stacking-strategy, text-height, list-style,
|
55
|
+
list-style-image, list-style-position, list-style-type, margin, margin-bottom,
|
56
|
+
margin-left, margin-right, margin-top, marquee-direction, marquee-play-count,
|
57
|
+
marquee-speed, marquee-style, column-count, column-fill, column-gap,
|
58
|
+
column-rule, column-rule-color, column-rule-style, column-rule-width,
|
59
|
+
column-span, column-width, columns, padding, padding-bottom, padding-left,
|
60
|
+
padding-right, padding-top, fit, fit-position, image-orientation, page, size,
|
61
|
+
bottom, clear, clip, cursor, display, float, left, overflow, position, right,
|
62
|
+
top, visibility, z-index, orphans, page-break-after, page-break-before,
|
63
|
+
page-break-inside, widows, ruby-align, ruby-overhang, ruby-position, ruby-span,
|
64
|
+
mark, mark-after, mark-before, phonemes, rest, rest-after, rest-before,
|
65
|
+
voice-balance, voice-duration, voice-pitch, voice-pitch-range, voice-rate,
|
66
|
+
voice-stress, voice-volume, border-collapse, border-spacing, caption-side,
|
67
|
+
empty-cells, table-layout, color, direction, letter-spacing, line-height,
|
68
|
+
text-align, text-decoration, text-indent, text-transform, unicode-bidi,
|
69
|
+
vertical-align, white-space, word-spacing, hanging-punctuation,
|
70
|
+
punctuation-trim, text-align-last, text-justify, text-outline, text-overflow,
|
71
|
+
text-shadow, text-wrap, word-break, word-wrap, transform, transform-origin,
|
72
|
+
transform-style, perspective, perspective-origin, backface-visibility,
|
73
|
+
transition, transition-property, transition-duration,
|
74
|
+
transition-timing-function, transition-delay, appearance, box-sizing, icon,
|
75
|
+
nav-down, nav-index, nav-left, nav-right, nav-up, outline-offset, resize ]
|
76
|
+
|
77
|
+
a:
|
78
|
+
href: &uri !ruby/regexp /\A((http|https|ftp|mailto):[^"\s]+|[^:\s]+)\z/i
|
79
|
+
hreflang: *lang
|
80
|
+
media: &keyword !ruby/regexp /\A[a-z ]+\z/i
|
81
|
+
rel: *keyword
|
82
|
+
target: *keyword
|
83
|
+
type: &type !ruby/regexp /\A[ ,a-z0-9\/+-]+\z/i
|
84
|
+
abbr:
|
85
|
+
address:
|
86
|
+
area:
|
87
|
+
alt: *cdata
|
88
|
+
coords: !ruby/regexp /\A[0-9,]+\z/i
|
89
|
+
href: *uri
|
90
|
+
media: *keyword
|
91
|
+
rel: *keyword
|
92
|
+
target: *keyword
|
93
|
+
type: *type
|
94
|
+
article:
|
95
|
+
aside:
|
96
|
+
audio:
|
97
|
+
autoplay: "autoplay"
|
98
|
+
controls: "controls"
|
99
|
+
loop: "loop"
|
100
|
+
preload: *keyword
|
101
|
+
src: *uri
|
102
|
+
b:
|
103
|
+
base:
|
104
|
+
href: *uri
|
105
|
+
target: *keyword
|
106
|
+
bdi:
|
107
|
+
bdo:
|
108
|
+
dir: *keyword
|
109
|
+
blockquote:
|
110
|
+
cite: *uri
|
111
|
+
body:
|
112
|
+
br:
|
113
|
+
button:
|
114
|
+
autofocus: "autofocus"
|
115
|
+
disabled: "disabled"
|
116
|
+
form: *name
|
117
|
+
formaction: *uri
|
118
|
+
formenctype: *type
|
119
|
+
formmethod: &method !ruby/regexp /\Aget|post\z/i
|
120
|
+
formnovalidate: "formnovalidate"
|
121
|
+
formtarget: *keyword
|
122
|
+
name: *cdata
|
123
|
+
type: *keyword
|
124
|
+
value: *cdata
|
125
|
+
canvas:
|
126
|
+
width: &length !ruby/regexp /\A[0-9]+(%|px)?\z/
|
127
|
+
height: *length
|
128
|
+
caption:
|
129
|
+
cite:
|
130
|
+
code:
|
131
|
+
col:
|
132
|
+
span: *number
|
133
|
+
colgroup:
|
134
|
+
span: *number
|
135
|
+
command:
|
136
|
+
checked: "checked"
|
137
|
+
disabled: "disabled"
|
138
|
+
icon: *uri
|
139
|
+
label: *cdata
|
140
|
+
radiogroup: *name
|
141
|
+
type: *keyword
|
142
|
+
datalist:
|
143
|
+
dd:
|
144
|
+
del:
|
145
|
+
cite: *uri
|
146
|
+
datetime: &datetime !ruby/regexp /\A\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[A-Z]+\z/i
|
147
|
+
details:
|
148
|
+
open: "open"
|
149
|
+
dfn:
|
150
|
+
div:
|
151
|
+
dl:
|
152
|
+
dt:
|
153
|
+
em:
|
154
|
+
embed:
|
155
|
+
height: *length
|
156
|
+
src: *uri
|
157
|
+
type: *type
|
158
|
+
width: *length
|
159
|
+
fieldset:
|
160
|
+
disabled: "disabled"
|
161
|
+
form: *name
|
162
|
+
name: *cdata
|
163
|
+
figcaption:
|
164
|
+
figure:
|
165
|
+
footer:
|
166
|
+
form:
|
167
|
+
action: *uri
|
168
|
+
"accept-charset": *cdata
|
169
|
+
autocomplete : &onoff !ruby/regexp /\Aon|off\z/
|
170
|
+
enctype: *type
|
171
|
+
method: *method
|
172
|
+
name: *cdata
|
173
|
+
novalidate: "novalidate"
|
174
|
+
target: *keyword
|
175
|
+
h1:
|
176
|
+
h2:
|
177
|
+
h3:
|
178
|
+
h4:
|
179
|
+
h5:
|
180
|
+
h6:
|
181
|
+
head:
|
182
|
+
header:
|
183
|
+
hgroup:
|
184
|
+
hr:
|
185
|
+
html:
|
186
|
+
manifest: *uri
|
187
|
+
xmlns: "http://www.w3.org/1999/xhtml"
|
188
|
+
i:
|
189
|
+
iframe:
|
190
|
+
height: *length
|
191
|
+
name: *name
|
192
|
+
sandbox: *keyword
|
193
|
+
seamless: "seamless"
|
194
|
+
src: *uri
|
195
|
+
srcdoc: *cdata
|
196
|
+
width: *length
|
197
|
+
img:
|
198
|
+
alt: *cdata
|
199
|
+
src: *uri
|
200
|
+
height: *length
|
201
|
+
usemap: &usemap !ruby/regexp /\A#[a-z0-9 .:_-]+\z/i
|
202
|
+
ismap: "ismap"
|
203
|
+
width: *length
|
204
|
+
input:
|
205
|
+
accept: *type
|
206
|
+
alt: *cdata
|
207
|
+
autocomplete: *onoff
|
208
|
+
autofocus: "autofocus"
|
209
|
+
checked: "checked"
|
210
|
+
disabled: "disabled"
|
211
|
+
form: *name
|
212
|
+
formaction: *uri
|
213
|
+
formenctype: *type
|
214
|
+
formmethod: *method
|
215
|
+
formnovalidate: "formnovalidate"
|
216
|
+
formtarget: *keyword
|
217
|
+
height: *length
|
218
|
+
list: *name
|
219
|
+
max: *number # date
|
220
|
+
multiple: "multiple"
|
221
|
+
name: *cdata
|
222
|
+
pattern: *cdata
|
223
|
+
placeholder: *cdata
|
224
|
+
readonly: "readonly"
|
225
|
+
required: "required"
|
226
|
+
size: *number
|
227
|
+
src: *uri
|
228
|
+
step: *number
|
229
|
+
type: *keyword
|
230
|
+
value: *cdata
|
231
|
+
width: *length
|
232
|
+
ins:
|
233
|
+
cite: *uri
|
234
|
+
datetime: *datetime
|
235
|
+
keygen:
|
236
|
+
autofocus: "disabled"
|
237
|
+
challenge: "challenge"
|
238
|
+
disabled: "disabled"
|
239
|
+
form: *name
|
240
|
+
keytype: *keyword
|
241
|
+
name: *name
|
242
|
+
kbd:
|
243
|
+
label:
|
244
|
+
for: *name
|
245
|
+
form: *name
|
246
|
+
legend:
|
247
|
+
li:
|
248
|
+
value: *cdata
|
249
|
+
link:
|
250
|
+
href: *uri
|
251
|
+
hreflang: *lang
|
252
|
+
media: *keyword
|
253
|
+
rel: *keyword
|
254
|
+
sizes: &sizes !ruby/regexp /\A[0-9]+x[0-9]+|any\z/
|
255
|
+
target: *keyword
|
256
|
+
type: *type
|
257
|
+
map:
|
258
|
+
name: *name
|
259
|
+
mark:
|
260
|
+
menu:
|
261
|
+
label: *cdata
|
262
|
+
type: *keyword
|
263
|
+
meta:
|
264
|
+
charset: *cdata
|
265
|
+
name: *name
|
266
|
+
"http-equiv": *name
|
267
|
+
content: *cdata
|
268
|
+
meter:
|
269
|
+
form: *name
|
270
|
+
high: &float !ruby/regexp /\A[0-9]+(\.[0-9]+)?\z/
|
271
|
+
low: *float
|
272
|
+
max: *float
|
273
|
+
min: *float
|
274
|
+
optimun: *float
|
275
|
+
value: *float
|
276
|
+
nav:
|
277
|
+
noscript:
|
278
|
+
object:
|
279
|
+
data: *uri
|
280
|
+
form: *name
|
281
|
+
height: *number
|
282
|
+
hspace: *number
|
283
|
+
name: *name
|
284
|
+
standby: *cdata
|
285
|
+
type: *type
|
286
|
+
usemap: *usemap
|
287
|
+
width: *number
|
288
|
+
ol:
|
289
|
+
reversed: "reversed"
|
290
|
+
start: *number
|
291
|
+
type: &listtype !ruby/regexp /\A[1AaIi]\z/
|
292
|
+
optgroup:
|
293
|
+
label: *cdata
|
294
|
+
disabled: "disabled"
|
295
|
+
option:
|
296
|
+
label: *cdata
|
297
|
+
value: *cdata
|
298
|
+
selected: "selected"
|
299
|
+
disabled: "disabled"
|
300
|
+
output:
|
301
|
+
for: *name
|
302
|
+
form: *name
|
303
|
+
name: *name
|
304
|
+
p:
|
305
|
+
param:
|
306
|
+
name: *name
|
307
|
+
value: *cdata
|
308
|
+
pre:
|
309
|
+
progress:
|
310
|
+
max: *float
|
311
|
+
value: *float
|
312
|
+
q:
|
313
|
+
cite: *uri
|
314
|
+
rp:
|
315
|
+
rt:
|
316
|
+
ruby:
|
317
|
+
s:
|
318
|
+
samp:
|
319
|
+
script:
|
320
|
+
async: "async"
|
321
|
+
defer: "defer"
|
322
|
+
type: *type
|
323
|
+
charset: *cdata
|
324
|
+
src: *uri
|
325
|
+
section:
|
326
|
+
select:
|
327
|
+
autofocus: "autofocus"
|
328
|
+
disabled: "disabled"
|
329
|
+
form: *name
|
330
|
+
multiple: "multiple"
|
331
|
+
name: *name
|
332
|
+
size: *number
|
333
|
+
small:
|
334
|
+
source:
|
335
|
+
media: *keyword
|
336
|
+
src: *uri
|
337
|
+
type: *type
|
338
|
+
span:
|
339
|
+
strong:
|
340
|
+
style:
|
341
|
+
type: "text/css"
|
342
|
+
media: *keyword
|
343
|
+
scoped: "scoped"
|
344
|
+
sub:
|
345
|
+
summary:
|
346
|
+
sup:
|
347
|
+
table:
|
348
|
+
border: !ruby/regexp /\A1?\z/
|
349
|
+
tbody:
|
350
|
+
td:
|
351
|
+
colspan: *number
|
352
|
+
headers: &idrefs !ruby/regexp /\A[a-z0-9 ,#.:_-]+\z/i
|
353
|
+
rowspan: *number
|
354
|
+
textarea:
|
355
|
+
autofocus: "autofocus"
|
356
|
+
cols: *number
|
357
|
+
disabled: "disabled"
|
358
|
+
dirname: *name
|
359
|
+
form: *name
|
360
|
+
maxlenght: *number
|
361
|
+
name: *name
|
362
|
+
placeholder: *cdata
|
363
|
+
readonly: "readonly"
|
364
|
+
required: "required"
|
365
|
+
rows: *number
|
366
|
+
wrap: &wrap !ruby/regexp /\Ahard|soft\z/i
|
367
|
+
tfoot:
|
368
|
+
th:
|
369
|
+
colspan: *number
|
370
|
+
headers: *idrefs
|
371
|
+
rowspan: *number
|
372
|
+
scope: *keyword
|
373
|
+
thead:
|
374
|
+
time:
|
375
|
+
datetime: *datetime
|
376
|
+
pubdate: "pubdate"
|
377
|
+
title:
|
378
|
+
tr:
|
379
|
+
u:
|
380
|
+
ul:
|
381
|
+
var:
|
382
|
+
video:
|
383
|
+
audio: "muted"
|
384
|
+
autoplay: "autoplay"
|
385
|
+
controls: "controls"
|
386
|
+
height: *number
|
387
|
+
loop: "loop"
|
388
|
+
poster: *uri
|
389
|
+
preload: !ruby/regexp /\Aauto|metadata|none\z/
|
390
|
+
src: *uri
|
391
|
+
width: *number
|
392
|
+
wbr:
|