whitewash 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ -----------------------------------------------------------------
2
+ Revision: 43067aa79f57dfb92165fd94da151d45f56ed87c
3
+ Ancestor: 3a0dff129b6a5408055b65dd804f3831d8d16f15
4
+ Author: angdraug@debian.org
5
+ Date: 2009-08-22T12:36:05
6
+ Branch: whitewash-head
7
+
8
+ Modified files:
9
+ lib/whitewash.rb
10
+
11
+ ChangeLog:
12
+
13
+ wrap global variables handling in Thread.exclusive
14
+
15
+ -----------------------------------------------------------------
16
+ Revision: 3a0dff129b6a5408055b65dd804f3831d8d16f15
17
+ Ancestor: b544d3c9fe594e3862cf518ae51421e1f3816cc9
18
+ Author: angdraug@debian.org
19
+ Date: 2009-08-13T11:01:26
20
+ Branch: whitewash-head
21
+
22
+ Modified files:
23
+ README.rdoc lib/whitewash.rb
24
+
25
+ ChangeLog:
26
+
27
+ made it easier to override tidypath
28
+
29
+ * made default_whitelist a public class method so that it can be used
30
+ in Whitewash.new() invokation
31
+ * documented the way Whitewash looks for Tidy
32
+
33
+ -----------------------------------------------------------------
34
+ Revision: b544d3c9fe594e3862cf518ae51421e1f3816cc9
35
+ Ancestor: c6398a8b6433921353ec5b0a1cf616804a550961
36
+ Author: angdraug@debian.org
37
+ Date: 2009-07-28T11:25:34
38
+ Branch: whitewash-head
39
+
40
+ Modified files:
41
+ lib/whitewash.rb
42
+
43
+ ChangeLog:
44
+
45
+ require rbconfig for access to Config::CONFIG
46
+
47
+ -----------------------------------------------------------------
48
+ Revision: c6398a8b6433921353ec5b0a1cf616804a550961
49
+ Ancestor:
50
+ Author: angdraug@debian.org
51
+ Date: 2009-07-27T16:21:20
52
+ Branch: whitewash-head
53
+
54
+ Added files:
55
+ COPYING README.rdoc data/whitewash/whitelist.yaml
56
+ lib/whitewash.rb lib/whitewash_rexml_attribute_patch.rb
57
+ setup.rb
58
+ Added directories:
59
+ . data data/whitewash lib
60
+
61
+ ChangeLog:
62
+
63
+ initial checkin: Whitewash is a spin-off of sanitize.rb from Samizdat project
64
+
@@ -0,0 +1,50 @@
1
+ = Whitewash - whitelist-based HTML filter for Ruby
2
+
3
+ This module allows Ruby programs to clean up any HTML document or
4
+ fragment coming from an untrusted source and to remove all dangerous
5
+ constructs that could be used for cross-site scripting or request
6
+ forgery.
7
+
8
+
9
+ == Synopsys
10
+
11
+ require 'whitewash'
12
+
13
+ whitewash = Whitewash.new # use default whitelist.yaml
14
+ clean_xhtml = whitewash.sanitize(html)
15
+
16
+
17
+ == Summary
18
+
19
+ All HTML tags, attribute names and values, and CSS properties are
20
+ filtered through a whitelist that defines which names and what kinds of
21
+ values are allowed, everything that doesn't match the whitelist is
22
+ removed.
23
+
24
+ The whitelist is provided externally, default whitelist is loaded from
25
+ the whitelist.yaml shipped with Whitewash. The default is most strict
26
+ (for example, it does not allow cross-site links to images in IMG tags)
27
+ and can be considered safe for all uses. If you find that it lets
28
+ anything exploitable through, please report it as a bug to Whitewash
29
+ developers.
30
+
31
+ An alternative whitelist is provided for HTML5 in html5_whitelist.yaml.
32
+ This whitelist is as much a work in progress as HTML5 standard itself,
33
+ use at your own risk.
34
+
35
+
36
+ == External Dependencies
37
+
38
+ Whitewash relies on Nokogiri to parse arbitrary HTML and put it back
39
+ together as valid XHTML:
40
+
41
+ * http://nokogiri.org/
42
+
43
+
44
+ == Copying
45
+
46
+ Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
47
+
48
+ This program is free software.
49
+ You can distribute/modify this program under the terms of the GNU
50
+ General Public License version 3 or later.
@@ -0,0 +1,392 @@
1
+ ---
2
+ # html5_whitelist.yaml
3
+ #
4
+ # Allowed HTML5 tags and attributes.
5
+ # HTML5 is still under development, and this file
6
+ # definition is actual for 2011, October.
7
+
8
+ # _common defines attributes that can be present in any tag
9
+ _common:
10
+ accesskey: !ruby/regexp /\A[:alnum:]\z/
11
+ class: &name !ruby/regexp /\A[a-z0-9 .:_-]+\z/i
12
+ contenteditable: !ruby/regexp /\Ztrue|false|inherit\z/
13
+ contextmenu: *name
14
+ dir: !ruby/regexp /\Altr|rtl|auto\z/
15
+ draggable: !ruby/regexp /\Atrue|false|auto\z/
16
+ dropzone: !ruby/regexp /\Acopy|move|link\z/
17
+ hidden: "hidden"
18
+ id: *name
19
+ lang: &lang !ruby/regexp /\A[a-z]+(-[a-z]*)?\z/i
20
+ spellcheck: !ruby/regexp /\Atrue|false\z/
21
+ style: &cdata !ruby/regexp /\A[^'"]*\z/
22
+ tabindex: &number !ruby/regexp /\A[0-9]+\z/
23
+ title: *cdata
24
+
25
+ # _css lists CSS properties allowed inside "style" attribute
26
+ _css: [ animation, animation-name, animation-duration,
27
+ animation-timing-function, animation-delay, animation-iteration-count,
28
+ animation-direction, animation-play-state, background, background-attachment,
29
+ background-color, background-image, background-position, background-repeat,
30
+ background-clip, background-origin, background-size, border, border-bottom,
31
+ border-bottom-color, border-bottom-style, border-bottom-width, border-color,
32
+ border-left, border-left-color, border-left-style, border-left-width,
33
+ border-right, border-right-color, border-right-style, border-right-width,
34
+ border-style, border-top, border-top-color, border-top-style, border-top-width,
35
+ border-width, outline, outline-color, outline-style, outline-width,
36
+ border-bottom-left-radius, border-bottom-right-radius, border-image,
37
+ border-image-outset, border-image-repeat, border-image-slice,
38
+ border-image-source, border-image-width, border-radius, border-top-left-radius,
39
+ border-top-right-radius, box-decoration-break, box-shadow, overflow-x,
40
+ overflow-y, overflow-style, rotation, rotation-point, color-profile, opacity,
41
+ rendering-intent, bookmark-label, bookmark-level, bookmark-target,
42
+ float-offset, hyphenate-after, hyphenate-before, hyphenate-character,
43
+ hyphenate-lines, hyphenate-resource, hyphens, image-resolution, marks,
44
+ string-set, height, max-height, max-width, min-height, min-width, width,
45
+ box-align, box-direction, box-flex, box-flex-group, box-lines,
46
+ box-ordinal-group, box-orient, box-pack, font, font-family, font-size,
47
+ font-style, font-variant, font-weight, font-size-adjust, font-stretch, content,
48
+ counter-increment, counter-reset, quotes, crop, move-to, page-policy,
49
+ grid-columns, grid-rows, target, target-name, target-new, target-position,
50
+ alignment-adjust, alignment-baseline, baseline-shift, dominant-baseline,
51
+ drop-initial-after-adjust, drop-initial-after-align,
52
+ drop-initial-before-adjust, drop-initial-before-align, drop-initial-size,
53
+ drop-initial-value, inline-box-align, line-stacking, line-stacking-ruby,
54
+ line-stacking-shift, line-stacking-strategy, text-height, list-style,
55
+ list-style-image, list-style-position, list-style-type, margin, margin-bottom,
56
+ margin-left, margin-right, margin-top, marquee-direction, marquee-play-count,
57
+ marquee-speed, marquee-style, column-count, column-fill, column-gap,
58
+ column-rule, column-rule-color, column-rule-style, column-rule-width,
59
+ column-span, column-width, columns, padding, padding-bottom, padding-left,
60
+ padding-right, padding-top, fit, fit-position, image-orientation, page, size,
61
+ bottom, clear, clip, cursor, display, float, left, overflow, position, right,
62
+ top, visibility, z-index, orphans, page-break-after, page-break-before,
63
+ page-break-inside, widows, ruby-align, ruby-overhang, ruby-position, ruby-span,
64
+ mark, mark-after, mark-before, phonemes, rest, rest-after, rest-before,
65
+ voice-balance, voice-duration, voice-pitch, voice-pitch-range, voice-rate,
66
+ voice-stress, voice-volume, border-collapse, border-spacing, caption-side,
67
+ empty-cells, table-layout, color, direction, letter-spacing, line-height,
68
+ text-align, text-decoration, text-indent, text-transform, unicode-bidi,
69
+ vertical-align, white-space, word-spacing, hanging-punctuation,
70
+ punctuation-trim, text-align-last, text-justify, text-outline, text-overflow,
71
+ text-shadow, text-wrap, word-break, word-wrap, transform, transform-origin,
72
+ transform-style, perspective, perspective-origin, backface-visibility,
73
+ transition, transition-property, transition-duration,
74
+ transition-timing-function, transition-delay, appearance, box-sizing, icon,
75
+ nav-down, nav-index, nav-left, nav-right, nav-up, outline-offset, resize ]
76
+
77
+ a:
78
+ href: &uri !ruby/regexp /\A((http|https|ftp|mailto):[^"\s]+|[^:\s]+)\z/i
79
+ hreflang: *lang
80
+ media: &keyword !ruby/regexp /\A[a-z ]+\z/i
81
+ rel: *keyword
82
+ target: *keyword
83
+ type: &type !ruby/regexp /\A[ ,a-z0-9\/+-]+\z/i
84
+ abbr:
85
+ address:
86
+ area:
87
+ alt: *cdata
88
+ coords: !ruby/regexp /\A[0-9,]+\z/i
89
+ href: *uri
90
+ media: *keyword
91
+ rel: *keyword
92
+ target: *keyword
93
+ type: *type
94
+ article:
95
+ aside:
96
+ audio:
97
+ autoplay: "autoplay"
98
+ controls: "controls"
99
+ loop: "loop"
100
+ preload: *keyword
101
+ src: *uri
102
+ b:
103
+ base:
104
+ href: *uri
105
+ target: *keyword
106
+ bdi:
107
+ bdo:
108
+ dir: *keyword
109
+ blockquote:
110
+ cite: *uri
111
+ body:
112
+ br:
113
+ button:
114
+ autofocus: "autofocus"
115
+ disabled: "disabled"
116
+ form: *name
117
+ formaction: *uri
118
+ formenctype: *type
119
+ formmethod: &method !ruby/regexp /\Aget|post\z/i
120
+ formnovalidate: "formnovalidate"
121
+ formtarget: *keyword
122
+ name: *cdata
123
+ type: *keyword
124
+ value: *cdata
125
+ canvas:
126
+ width: &length !ruby/regexp /\A[0-9]+(%|px)?\z/
127
+ height: *length
128
+ caption:
129
+ cite:
130
+ code:
131
+ col:
132
+ span: *number
133
+ colgroup:
134
+ span: *number
135
+ command:
136
+ checked: "checked"
137
+ disabled: "disabled"
138
+ icon: *uri
139
+ label: *cdata
140
+ radiogroup: *name
141
+ type: *keyword
142
+ datalist:
143
+ dd:
144
+ del:
145
+ cite: *uri
146
+ datetime: &datetime !ruby/regexp /\A\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[A-Z]+\z/i
147
+ details:
148
+ open: "open"
149
+ dfn:
150
+ div:
151
+ dl:
152
+ dt:
153
+ em:
154
+ embed:
155
+ height: *length
156
+ src: *uri
157
+ type: *type
158
+ width: *length
159
+ fieldset:
160
+ disabled: "disabled"
161
+ form: *name
162
+ name: *cdata
163
+ figcaption:
164
+ figure:
165
+ footer:
166
+ form:
167
+ action: *uri
168
+ "accept-charset": *cdata
169
+ autocomplete : &onoff !ruby/regexp /\Aon|off\z/
170
+ enctype: *type
171
+ method: *method
172
+ name: *cdata
173
+ novalidate: "novalidate"
174
+ target: *keyword
175
+ h1:
176
+ h2:
177
+ h3:
178
+ h4:
179
+ h5:
180
+ h6:
181
+ head:
182
+ header:
183
+ hgroup:
184
+ hr:
185
+ html:
186
+ manifest: *uri
187
+ xmlns: "http://www.w3.org/1999/xhtml"
188
+ i:
189
+ iframe:
190
+ height: *length
191
+ name: *name
192
+ sandbox: *keyword
193
+ seamless: "seamless"
194
+ src: *uri
195
+ srcdoc: *cdata
196
+ width: *length
197
+ img:
198
+ alt: *cdata
199
+ src: *uri
200
+ height: *length
201
+ usemap: &usemap !ruby/regexp /\A#[a-z0-9 .:_-]+\z/i
202
+ ismap: "ismap"
203
+ width: *length
204
+ input:
205
+ accept: *type
206
+ alt: *cdata
207
+ autocomplete: *onoff
208
+ autofocus: "autofocus"
209
+ checked: "checked"
210
+ disabled: "disabled"
211
+ form: *name
212
+ formaction: *uri
213
+ formenctype: *type
214
+ formmethod: *method
215
+ formnovalidate: "formnovalidate"
216
+ formtarget: *keyword
217
+ height: *length
218
+ list: *name
219
+ max: *number # date
220
+ multiple: "multiple"
221
+ name: *cdata
222
+ pattern: *cdata
223
+ placeholder: *cdata
224
+ readonly: "readonly"
225
+ required: "required"
226
+ size: *number
227
+ src: *uri
228
+ step: *number
229
+ type: *keyword
230
+ value: *cdata
231
+ width: *length
232
+ ins:
233
+ cite: *uri
234
+ datetime: *datetime
235
+ keygen:
236
+ autofocus: "disabled"
237
+ challenge: "challenge"
238
+ disabled: "disabled"
239
+ form: *name
240
+ keytype: *keyword
241
+ name: *name
242
+ kbd:
243
+ label:
244
+ for: *name
245
+ form: *name
246
+ legend:
247
+ li:
248
+ value: *cdata
249
+ link:
250
+ href: *uri
251
+ hreflang: *lang
252
+ media: *keyword
253
+ rel: *keyword
254
+ sizes: &sizes !ruby/regexp /\A[0-9]+x[0-9]+|any\z/
255
+ target: *keyword
256
+ type: *type
257
+ map:
258
+ name: *name
259
+ mark:
260
+ menu:
261
+ label: *cdata
262
+ type: *keyword
263
+ meta:
264
+ charset: *cdata
265
+ name: *name
266
+ "http-equiv": *name
267
+ content: *cdata
268
+ meter:
269
+ form: *name
270
+ high: &float !ruby/regexp /\A[0-9]+(\.[0-9]+)?\z/
271
+ low: *float
272
+ max: *float
273
+ min: *float
274
+ optimun: *float
275
+ value: *float
276
+ nav:
277
+ noscript:
278
+ object:
279
+ data: *uri
280
+ form: *name
281
+ height: *number
282
+ hspace: *number
283
+ name: *name
284
+ standby: *cdata
285
+ type: *type
286
+ usemap: *usemap
287
+ width: *number
288
+ ol:
289
+ reversed: "reversed"
290
+ start: *number
291
+ type: &listtype !ruby/regexp /\A[1AaIi]\z/
292
+ optgroup:
293
+ label: *cdata
294
+ disabled: "disabled"
295
+ option:
296
+ label: *cdata
297
+ value: *cdata
298
+ selected: "selected"
299
+ disabled: "disabled"
300
+ output:
301
+ for: *name
302
+ form: *name
303
+ name: *name
304
+ p:
305
+ param:
306
+ name: *name
307
+ value: *cdata
308
+ pre:
309
+ progress:
310
+ max: *float
311
+ value: *float
312
+ q:
313
+ cite: *uri
314
+ rp:
315
+ rt:
316
+ ruby:
317
+ s:
318
+ samp:
319
+ script:
320
+ async: "async"
321
+ defer: "defer"
322
+ type: *type
323
+ charset: *cdata
324
+ src: *uri
325
+ section:
326
+ select:
327
+ autofocus: "autofocus"
328
+ disabled: "disabled"
329
+ form: *name
330
+ multiple: "multiple"
331
+ name: *name
332
+ size: *number
333
+ small:
334
+ source:
335
+ media: *keyword
336
+ src: *uri
337
+ type: *type
338
+ span:
339
+ strong:
340
+ style:
341
+ type: "text/css"
342
+ media: *keyword
343
+ scoped: "scoped"
344
+ sub:
345
+ summary:
346
+ sup:
347
+ table:
348
+ border: !ruby/regexp /\A1?\z/
349
+ tbody:
350
+ td:
351
+ colspan: *number
352
+ headers: &idrefs !ruby/regexp /\A[a-z0-9 ,#.:_-]+\z/i
353
+ rowspan: *number
354
+ textarea:
355
+ autofocus: "autofocus"
356
+ cols: *number
357
+ disabled: "disabled"
358
+ dirname: *name
359
+ form: *name
360
+ maxlenght: *number
361
+ name: *name
362
+ placeholder: *cdata
363
+ readonly: "readonly"
364
+ required: "required"
365
+ rows: *number
366
+ wrap: &wrap !ruby/regexp /\Ahard|soft\z/i
367
+ tfoot:
368
+ th:
369
+ colspan: *number
370
+ headers: *idrefs
371
+ rowspan: *number
372
+ scope: *keyword
373
+ thead:
374
+ time:
375
+ datetime: *datetime
376
+ pubdate: "pubdate"
377
+ title:
378
+ tr:
379
+ u:
380
+ ul:
381
+ var:
382
+ video:
383
+ audio: "muted"
384
+ autoplay: "autoplay"
385
+ controls: "controls"
386
+ height: *number
387
+ loop: "loop"
388
+ poster: *uri
389
+ preload: !ruby/regexp /\Aauto|metadata|none\z/
390
+ src: *uri
391
+ width: *number
392
+ wbr: