whitewash 2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,64 @@
1
+ -----------------------------------------------------------------
2
+ Revision: 43067aa79f57dfb92165fd94da151d45f56ed87c
3
+ Ancestor: 3a0dff129b6a5408055b65dd804f3831d8d16f15
4
+ Author: angdraug@debian.org
5
+ Date: 2009-08-22T12:36:05
6
+ Branch: whitewash-head
7
+
8
+ Modified files:
9
+ lib/whitewash.rb
10
+
11
+ ChangeLog:
12
+
13
+ wrap global variables handling in Thread.exclusive
14
+
15
+ -----------------------------------------------------------------
16
+ Revision: 3a0dff129b6a5408055b65dd804f3831d8d16f15
17
+ Ancestor: b544d3c9fe594e3862cf518ae51421e1f3816cc9
18
+ Author: angdraug@debian.org
19
+ Date: 2009-08-13T11:01:26
20
+ Branch: whitewash-head
21
+
22
+ Modified files:
23
+ README.rdoc lib/whitewash.rb
24
+
25
+ ChangeLog:
26
+
27
+ made it easier to override tidypath
28
+
29
+ * made default_whitelist a public class method so that it can be used
30
+ in Whitewash.new() invokation
31
+ * documented the way Whitewash looks for Tidy
32
+
33
+ -----------------------------------------------------------------
34
+ Revision: b544d3c9fe594e3862cf518ae51421e1f3816cc9
35
+ Ancestor: c6398a8b6433921353ec5b0a1cf616804a550961
36
+ Author: angdraug@debian.org
37
+ Date: 2009-07-28T11:25:34
38
+ Branch: whitewash-head
39
+
40
+ Modified files:
41
+ lib/whitewash.rb
42
+
43
+ ChangeLog:
44
+
45
+ require rbconfig for access to Config::CONFIG
46
+
47
+ -----------------------------------------------------------------
48
+ Revision: c6398a8b6433921353ec5b0a1cf616804a550961
49
+ Ancestor:
50
+ Author: angdraug@debian.org
51
+ Date: 2009-07-27T16:21:20
52
+ Branch: whitewash-head
53
+
54
+ Added files:
55
+ COPYING README.rdoc data/whitewash/whitelist.yaml
56
+ lib/whitewash.rb lib/whitewash_rexml_attribute_patch.rb
57
+ setup.rb
58
+ Added directories:
59
+ . data data/whitewash lib
60
+
61
+ ChangeLog:
62
+
63
+ initial checkin: Whitewash is a spin-off of sanitize.rb from Samizdat project
64
+
@@ -0,0 +1,50 @@
1
+ = Whitewash - whitelist-based HTML filter for Ruby
2
+
3
+ This module allows Ruby programs to clean up any HTML document or
4
+ fragment coming from an untrusted source and to remove all dangerous
5
+ constructs that could be used for cross-site scripting or request
6
+ forgery.
7
+
8
+
9
+ == Synopsys
10
+
11
+ require 'whitewash'
12
+
13
+ whitewash = Whitewash.new # use default whitelist.yaml
14
+ clean_xhtml = whitewash.sanitize(html)
15
+
16
+
17
+ == Summary
18
+
19
+ All HTML tags, attribute names and values, and CSS properties are
20
+ filtered through a whitelist that defines which names and what kinds of
21
+ values are allowed, everything that doesn't match the whitelist is
22
+ removed.
23
+
24
+ The whitelist is provided externally, default whitelist is loaded from
25
+ the whitelist.yaml shipped with Whitewash. The default is most strict
26
+ (for example, it does not allow cross-site links to images in IMG tags)
27
+ and can be considered safe for all uses. If you find that it lets
28
+ anything exploitable through, please report it as a bug to Whitewash
29
+ developers.
30
+
31
+ An alternative whitelist is provided for HTML5 in html5_whitelist.yaml.
32
+ This whitelist is as much a work in progress as HTML5 standard itself,
33
+ use at your own risk.
34
+
35
+
36
+ == External Dependencies
37
+
38
+ Whitewash relies on Nokogiri to parse arbitrary HTML and put it back
39
+ together as valid XHTML:
40
+
41
+ * http://nokogiri.org/
42
+
43
+
44
+ == Copying
45
+
46
+ Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
47
+
48
+ This program is free software.
49
+ You can distribute/modify this program under the terms of the GNU
50
+ General Public License version 3 or later.
@@ -0,0 +1,392 @@
1
+ ---
2
+ # html5_whitelist.yaml
3
+ #
4
+ # Allowed HTML5 tags and attributes.
5
+ # HTML5 is still under development, and this file
6
+ # definition is actual for 2011, October.
7
+
8
+ # _common defines attributes that can be present in any tag
9
+ _common:
10
+ accesskey: !ruby/regexp /\A[:alnum:]\z/
11
+ class: &name !ruby/regexp /\A[a-z0-9 .:_-]+\z/i
12
+ contenteditable: !ruby/regexp /\Ztrue|false|inherit\z/
13
+ contextmenu: *name
14
+ dir: !ruby/regexp /\Altr|rtl|auto\z/
15
+ draggable: !ruby/regexp /\Atrue|false|auto\z/
16
+ dropzone: !ruby/regexp /\Acopy|move|link\z/
17
+ hidden: "hidden"
18
+ id: *name
19
+ lang: &lang !ruby/regexp /\A[a-z]+(-[a-z]*)?\z/i
20
+ spellcheck: !ruby/regexp /\Atrue|false\z/
21
+ style: &cdata !ruby/regexp /\A[^'"]*\z/
22
+ tabindex: &number !ruby/regexp /\A[0-9]+\z/
23
+ title: *cdata
24
+
25
+ # _css lists CSS properties allowed inside "style" attribute
26
+ _css: [ animation, animation-name, animation-duration,
27
+ animation-timing-function, animation-delay, animation-iteration-count,
28
+ animation-direction, animation-play-state, background, background-attachment,
29
+ background-color, background-image, background-position, background-repeat,
30
+ background-clip, background-origin, background-size, border, border-bottom,
31
+ border-bottom-color, border-bottom-style, border-bottom-width, border-color,
32
+ border-left, border-left-color, border-left-style, border-left-width,
33
+ border-right, border-right-color, border-right-style, border-right-width,
34
+ border-style, border-top, border-top-color, border-top-style, border-top-width,
35
+ border-width, outline, outline-color, outline-style, outline-width,
36
+ border-bottom-left-radius, border-bottom-right-radius, border-image,
37
+ border-image-outset, border-image-repeat, border-image-slice,
38
+ border-image-source, border-image-width, border-radius, border-top-left-radius,
39
+ border-top-right-radius, box-decoration-break, box-shadow, overflow-x,
40
+ overflow-y, overflow-style, rotation, rotation-point, color-profile, opacity,
41
+ rendering-intent, bookmark-label, bookmark-level, bookmark-target,
42
+ float-offset, hyphenate-after, hyphenate-before, hyphenate-character,
43
+ hyphenate-lines, hyphenate-resource, hyphens, image-resolution, marks,
44
+ string-set, height, max-height, max-width, min-height, min-width, width,
45
+ box-align, box-direction, box-flex, box-flex-group, box-lines,
46
+ box-ordinal-group, box-orient, box-pack, font, font-family, font-size,
47
+ font-style, font-variant, font-weight, font-size-adjust, font-stretch, content,
48
+ counter-increment, counter-reset, quotes, crop, move-to, page-policy,
49
+ grid-columns, grid-rows, target, target-name, target-new, target-position,
50
+ alignment-adjust, alignment-baseline, baseline-shift, dominant-baseline,
51
+ drop-initial-after-adjust, drop-initial-after-align,
52
+ drop-initial-before-adjust, drop-initial-before-align, drop-initial-size,
53
+ drop-initial-value, inline-box-align, line-stacking, line-stacking-ruby,
54
+ line-stacking-shift, line-stacking-strategy, text-height, list-style,
55
+ list-style-image, list-style-position, list-style-type, margin, margin-bottom,
56
+ margin-left, margin-right, margin-top, marquee-direction, marquee-play-count,
57
+ marquee-speed, marquee-style, column-count, column-fill, column-gap,
58
+ column-rule, column-rule-color, column-rule-style, column-rule-width,
59
+ column-span, column-width, columns, padding, padding-bottom, padding-left,
60
+ padding-right, padding-top, fit, fit-position, image-orientation, page, size,
61
+ bottom, clear, clip, cursor, display, float, left, overflow, position, right,
62
+ top, visibility, z-index, orphans, page-break-after, page-break-before,
63
+ page-break-inside, widows, ruby-align, ruby-overhang, ruby-position, ruby-span,
64
+ mark, mark-after, mark-before, phonemes, rest, rest-after, rest-before,
65
+ voice-balance, voice-duration, voice-pitch, voice-pitch-range, voice-rate,
66
+ voice-stress, voice-volume, border-collapse, border-spacing, caption-side,
67
+ empty-cells, table-layout, color, direction, letter-spacing, line-height,
68
+ text-align, text-decoration, text-indent, text-transform, unicode-bidi,
69
+ vertical-align, white-space, word-spacing, hanging-punctuation,
70
+ punctuation-trim, text-align-last, text-justify, text-outline, text-overflow,
71
+ text-shadow, text-wrap, word-break, word-wrap, transform, transform-origin,
72
+ transform-style, perspective, perspective-origin, backface-visibility,
73
+ transition, transition-property, transition-duration,
74
+ transition-timing-function, transition-delay, appearance, box-sizing, icon,
75
+ nav-down, nav-index, nav-left, nav-right, nav-up, outline-offset, resize ]
76
+
77
+ a:
78
+ href: &uri !ruby/regexp /\A((http|https|ftp|mailto):[^"\s]+|[^:\s]+)\z/i
79
+ hreflang: *lang
80
+ media: &keyword !ruby/regexp /\A[a-z ]+\z/i
81
+ rel: *keyword
82
+ target: *keyword
83
+ type: &type !ruby/regexp /\A[ ,a-z0-9\/+-]+\z/i
84
+ abbr:
85
+ address:
86
+ area:
87
+ alt: *cdata
88
+ coords: !ruby/regexp /\A[0-9,]+\z/i
89
+ href: *uri
90
+ media: *keyword
91
+ rel: *keyword
92
+ target: *keyword
93
+ type: *type
94
+ article:
95
+ aside:
96
+ audio:
97
+ autoplay: "autoplay"
98
+ controls: "controls"
99
+ loop: "loop"
100
+ preload: *keyword
101
+ src: *uri
102
+ b:
103
+ base:
104
+ href: *uri
105
+ target: *keyword
106
+ bdi:
107
+ bdo:
108
+ dir: *keyword
109
+ blockquote:
110
+ cite: *uri
111
+ body:
112
+ br:
113
+ button:
114
+ autofocus: "autofocus"
115
+ disabled: "disabled"
116
+ form: *name
117
+ formaction: *uri
118
+ formenctype: *type
119
+ formmethod: &method !ruby/regexp /\Aget|post\z/i
120
+ formnovalidate: "formnovalidate"
121
+ formtarget: *keyword
122
+ name: *cdata
123
+ type: *keyword
124
+ value: *cdata
125
+ canvas:
126
+ width: &length !ruby/regexp /\A[0-9]+(%|px)?\z/
127
+ height: *length
128
+ caption:
129
+ cite:
130
+ code:
131
+ col:
132
+ span: *number
133
+ colgroup:
134
+ span: *number
135
+ command:
136
+ checked: "checked"
137
+ disabled: "disabled"
138
+ icon: *uri
139
+ label: *cdata
140
+ radiogroup: *name
141
+ type: *keyword
142
+ datalist:
143
+ dd:
144
+ del:
145
+ cite: *uri
146
+ datetime: &datetime !ruby/regexp /\A\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[A-Z]+\z/i
147
+ details:
148
+ open: "open"
149
+ dfn:
150
+ div:
151
+ dl:
152
+ dt:
153
+ em:
154
+ embed:
155
+ height: *length
156
+ src: *uri
157
+ type: *type
158
+ width: *length
159
+ fieldset:
160
+ disabled: "disabled"
161
+ form: *name
162
+ name: *cdata
163
+ figcaption:
164
+ figure:
165
+ footer:
166
+ form:
167
+ action: *uri
168
+ "accept-charset": *cdata
169
+ autocomplete : &onoff !ruby/regexp /\Aon|off\z/
170
+ enctype: *type
171
+ method: *method
172
+ name: *cdata
173
+ novalidate: "novalidate"
174
+ target: *keyword
175
+ h1:
176
+ h2:
177
+ h3:
178
+ h4:
179
+ h5:
180
+ h6:
181
+ head:
182
+ header:
183
+ hgroup:
184
+ hr:
185
+ html:
186
+ manifest: *uri
187
+ xmlns: "http://www.w3.org/1999/xhtml"
188
+ i:
189
+ iframe:
190
+ height: *length
191
+ name: *name
192
+ sandbox: *keyword
193
+ seamless: "seamless"
194
+ src: *uri
195
+ srcdoc: *cdata
196
+ width: *length
197
+ img:
198
+ alt: *cdata
199
+ src: *uri
200
+ height: *length
201
+ usemap: &usemap !ruby/regexp /\A#[a-z0-9 .:_-]+\z/i
202
+ ismap: "ismap"
203
+ width: *length
204
+ input:
205
+ accept: *type
206
+ alt: *cdata
207
+ autocomplete: *onoff
208
+ autofocus: "autofocus"
209
+ checked: "checked"
210
+ disabled: "disabled"
211
+ form: *name
212
+ formaction: *uri
213
+ formenctype: *type
214
+ formmethod: *method
215
+ formnovalidate: "formnovalidate"
216
+ formtarget: *keyword
217
+ height: *length
218
+ list: *name
219
+ max: *number # date
220
+ multiple: "multiple"
221
+ name: *cdata
222
+ pattern: *cdata
223
+ placeholder: *cdata
224
+ readonly: "readonly"
225
+ required: "required"
226
+ size: *number
227
+ src: *uri
228
+ step: *number
229
+ type: *keyword
230
+ value: *cdata
231
+ width: *length
232
+ ins:
233
+ cite: *uri
234
+ datetime: *datetime
235
+ keygen:
236
+ autofocus: "disabled"
237
+ challenge: "challenge"
238
+ disabled: "disabled"
239
+ form: *name
240
+ keytype: *keyword
241
+ name: *name
242
+ kbd:
243
+ label:
244
+ for: *name
245
+ form: *name
246
+ legend:
247
+ li:
248
+ value: *cdata
249
+ link:
250
+ href: *uri
251
+ hreflang: *lang
252
+ media: *keyword
253
+ rel: *keyword
254
+ sizes: &sizes !ruby/regexp /\A[0-9]+x[0-9]+|any\z/
255
+ target: *keyword
256
+ type: *type
257
+ map:
258
+ name: *name
259
+ mark:
260
+ menu:
261
+ label: *cdata
262
+ type: *keyword
263
+ meta:
264
+ charset: *cdata
265
+ name: *name
266
+ "http-equiv": *name
267
+ content: *cdata
268
+ meter:
269
+ form: *name
270
+ high: &float !ruby/regexp /\A[0-9]+(\.[0-9]+)?\z/
271
+ low: *float
272
+ max: *float
273
+ min: *float
274
+ optimun: *float
275
+ value: *float
276
+ nav:
277
+ noscript:
278
+ object:
279
+ data: *uri
280
+ form: *name
281
+ height: *number
282
+ hspace: *number
283
+ name: *name
284
+ standby: *cdata
285
+ type: *type
286
+ usemap: *usemap
287
+ width: *number
288
+ ol:
289
+ reversed: "reversed"
290
+ start: *number
291
+ type: &listtype !ruby/regexp /\A[1AaIi]\z/
292
+ optgroup:
293
+ label: *cdata
294
+ disabled: "disabled"
295
+ option:
296
+ label: *cdata
297
+ value: *cdata
298
+ selected: "selected"
299
+ disabled: "disabled"
300
+ output:
301
+ for: *name
302
+ form: *name
303
+ name: *name
304
+ p:
305
+ param:
306
+ name: *name
307
+ value: *cdata
308
+ pre:
309
+ progress:
310
+ max: *float
311
+ value: *float
312
+ q:
313
+ cite: *uri
314
+ rp:
315
+ rt:
316
+ ruby:
317
+ s:
318
+ samp:
319
+ script:
320
+ async: "async"
321
+ defer: "defer"
322
+ type: *type
323
+ charset: *cdata
324
+ src: *uri
325
+ section:
326
+ select:
327
+ autofocus: "autofocus"
328
+ disabled: "disabled"
329
+ form: *name
330
+ multiple: "multiple"
331
+ name: *name
332
+ size: *number
333
+ small:
334
+ source:
335
+ media: *keyword
336
+ src: *uri
337
+ type: *type
338
+ span:
339
+ strong:
340
+ style:
341
+ type: "text/css"
342
+ media: *keyword
343
+ scoped: "scoped"
344
+ sub:
345
+ summary:
346
+ sup:
347
+ table:
348
+ border: !ruby/regexp /\A1?\z/
349
+ tbody:
350
+ td:
351
+ colspan: *number
352
+ headers: &idrefs !ruby/regexp /\A[a-z0-9 ,#.:_-]+\z/i
353
+ rowspan: *number
354
+ textarea:
355
+ autofocus: "autofocus"
356
+ cols: *number
357
+ disabled: "disabled"
358
+ dirname: *name
359
+ form: *name
360
+ maxlenght: *number
361
+ name: *name
362
+ placeholder: *cdata
363
+ readonly: "readonly"
364
+ required: "required"
365
+ rows: *number
366
+ wrap: &wrap !ruby/regexp /\Ahard|soft\z/i
367
+ tfoot:
368
+ th:
369
+ colspan: *number
370
+ headers: *idrefs
371
+ rowspan: *number
372
+ scope: *keyword
373
+ thead:
374
+ time:
375
+ datetime: *datetime
376
+ pubdate: "pubdate"
377
+ title:
378
+ tr:
379
+ u:
380
+ ul:
381
+ var:
382
+ video:
383
+ audio: "muted"
384
+ autoplay: "autoplay"
385
+ controls: "controls"
386
+ height: *number
387
+ loop: "loop"
388
+ poster: *uri
389
+ preload: !ruby/regexp /\Aauto|metadata|none\z/
390
+ src: *uri
391
+ width: *number
392
+ wbr: