dandruff 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +23 -0
- data/CHANGELOG.md +69 -0
- data/COMPARISON.md +175 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +142 -0
- data/LICENSE.txt +21 -0
- data/Makefile +41 -0
- data/README.md +1196 -0
- data/Rakefile +12 -0
- data/examples/basic_usage.rb +84 -0
- data/examples/email_sanitization_example.md +268 -0
- data/failed-expectations.md +192 -0
- data/lib/dandruff/attributes.rb +223 -0
- data/lib/dandruff/config.rb +500 -0
- data/lib/dandruff/expressions.rb +103 -0
- data/lib/dandruff/tags.rb +160 -0
- data/lib/dandruff/utils.rb +27 -0
- data/lib/dandruff/version.rb +5 -0
- data/lib/dandruff.rb +1095 -0
- metadata +134 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dandruff
|
|
4
|
+
# Attribute allowlists and security denylists for HTML sanitization
|
|
5
|
+
#
|
|
6
|
+
# This module defines comprehensive attribute allowlists for different content types
|
|
7
|
+
# (HTML, SVG, MathML) and security-focused denylists for dangerous attributes and
|
|
8
|
+
# DOM clobbering attack vectors. These lists are based on DOMPurify's battle-tested
|
|
9
|
+
# security model and web standards.
|
|
10
|
+
#
|
|
11
|
+
# @example Using attribute lists in configuration
|
|
12
|
+
# dandruff.configure do |config|
|
|
13
|
+
# config.allowed_attributes = Dandruff::Attributes::HTML
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# @see Config Configuration class that uses these attribute lists
|
|
17
|
+
module Attributes
|
|
18
|
+
# Standard HTML attribute allowlist
|
|
19
|
+
#
|
|
20
|
+
# Comprehensive list of safe HTML attributes for standard web content. These attributes
|
|
21
|
+
# cover forms, media, accessibility, styling, and interactive elements while excluding
|
|
22
|
+
# dangerous event handlers and script execution vectors.
|
|
23
|
+
#
|
|
24
|
+
# **Includes:** Layout and presentation (width, height, align, style, class, id),
|
|
25
|
+
# links (href, target), forms (type, name, value, placeholder), media (src, controls, poster),
|
|
26
|
+
# accessibility (alt, title, role, tabindex, lang), and HTML5 features (autocomplete, loading)
|
|
27
|
+
#
|
|
28
|
+
# **Excludes:** Event handlers (onclick, onload, onerror), javascript: URIs, and other XSS vectors
|
|
29
|
+
#
|
|
30
|
+
# **Security:** Safe for rich HTML content. All URI-like attributes (href, src) are validated
|
|
31
|
+
# separately to prevent javascript: and data:text/html attacks. Style attributes are parsed
|
|
32
|
+
# and sanitized to prevent CSS injection.
|
|
33
|
+
#
|
|
34
|
+
# @example Standard HTML content
|
|
35
|
+
# dandruff.configure do |config|
|
|
36
|
+
# config.allowed_attributes = Dandruff::Attributes::HTML
|
|
37
|
+
# end
|
|
38
|
+
HTML = %w[
|
|
39
|
+
accept action align alt autocapitalize autocomplete autopictureinpicture autoplay
|
|
40
|
+
background bgcolor border capture cellpadding cellspacing checked cite class clear
|
|
41
|
+
color cols colspan controls controlslist coords crossorigin datetime decoding
|
|
42
|
+
default dir disabled disablepictureinpicture disableremoteplayback download
|
|
43
|
+
draggable enctype enterkeyhint exportparts face for headers height hidden high
|
|
44
|
+
href hreflang id inert inputmode integrity ismap kind label lang list loading
|
|
45
|
+
loop low max maxlength media method min minlength multiple muted name nonce
|
|
46
|
+
noshade novalidate nowrap open optimum part pattern placeholder playsinline
|
|
47
|
+
popover popovertarget popovertargetaction poster preload pubdate radiogroup
|
|
48
|
+
readonly rel required rev reversed role rows rowspan spellcheck scope selected
|
|
49
|
+
shape size sizes slot span srclang start src srcset step style summary tabindex
|
|
50
|
+
title translate type usemap valign value width wrap xmlns
|
|
51
|
+
].freeze
|
|
52
|
+
|
|
53
|
+
# SVG attribute allowlist
|
|
54
|
+
#
|
|
55
|
+
# Comprehensive list of attributes for SVG (Scalable Vector Graphics) elements.
|
|
56
|
+
# Includes presentation attributes, animation attributes, filter attributes, and
|
|
57
|
+
# transformation attributes needed for full SVG functionality.
|
|
58
|
+
#
|
|
59
|
+
# **Includes:** Geometric properties (x, y, cx, cy, width, height, r, rx, ry),
|
|
60
|
+
# styling (fill, stroke, opacity, color), transformations (transform, rotate, scale),
|
|
61
|
+
# gradients (gradienttransform, spreadmethod), filters (fe* attributes), and
|
|
62
|
+
# text rendering (font-*, text-*)
|
|
63
|
+
#
|
|
64
|
+
# **Security:** Safe for SVG rendering when combined with tag validation. Prevents
|
|
65
|
+
# mXSS attacks through proper namespace handling and attribute validation.
|
|
66
|
+
#
|
|
67
|
+
# @example SVG graphics
|
|
68
|
+
# dandruff.configure do |config|
|
|
69
|
+
# config.use_profiles = { svg: true } # Includes SVG attributes
|
|
70
|
+
# end
|
|
71
|
+
SVG = %w[
|
|
72
|
+
accent-height accumulate additive alignment-baseline amplitude ascent attributename
|
|
73
|
+
attributetype azimuth basefrequency baseline-shift begin bias by class clip
|
|
74
|
+
clippathunits clip-path clip-rule color color-interpolation color-interpolation-filters
|
|
75
|
+
color-profile color-rendering cx cy d dx dy diffuseconstant direction display
|
|
76
|
+
divisor dur edgemode elevation end exponent fill fill-opacity fill-rule filter
|
|
77
|
+
filterunits flood-color flood-opacity font-family font-size font-size-adjust
|
|
78
|
+
font-stretch font-style font-variant font-weight fx fy g1 g2 glyph-name
|
|
79
|
+
glyphref gradientunits gradienttransform height href id image-rendering in in2
|
|
80
|
+
intercept k k1 k2 k3 k4 kerning keypoints keysplines keytimes lang lengthadjust
|
|
81
|
+
letter-spacing kernelmatrix kernelunitlength lighting-color local marker-end
|
|
82
|
+
marker-mid marker-start markerheight markerunits markerwidth mask mask-type
|
|
83
|
+
media method mode min name numoctaves offset operator opacity order orient
|
|
84
|
+
orientation origin overflow paint-order path pathlength patterncontentunits
|
|
85
|
+
patterntransform patternunits points preservealpha preserveaspectratio
|
|
86
|
+
primitiveunits r rx ry radius refx refy repeatcount repeatdur restart result
|
|
87
|
+
rotate scale seed shape-rendering slope specularconstant specularexponent
|
|
88
|
+
spreadmethod startoffset stddeviation stitchtiles stop-color stop-opacity
|
|
89
|
+
stroke-dasharray stroke-dashoffset stroke-linecap stroke-linejoin stroke-miterlimit
|
|
90
|
+
stroke-opacity stroke stroke-width style surfacescale systemlanguage tabindex
|
|
91
|
+
tablevalues targetx targety transform transform-origin text-anchor
|
|
92
|
+
text-decoration text-rendering textlength type u1 u2 unicode values viewbox
|
|
93
|
+
visibility version vert-adv-y vert-origin-x vert-origin-y width word-spacing
|
|
94
|
+
wrap writing-mode xchannelselector ychannelselector x x1 x2 xmlns y y1 y2 z
|
|
95
|
+
zoomandpan
|
|
96
|
+
].freeze
|
|
97
|
+
|
|
98
|
+
# MathML attribute allowlist
|
|
99
|
+
#
|
|
100
|
+
# Comprehensive list of attributes for MathML (Mathematical Markup Language) elements.
|
|
101
|
+
# Includes attributes for mathematical notation, spacing, alignment, and styling.
|
|
102
|
+
#
|
|
103
|
+
# **Includes:** Spacing and alignment (lspace, rspace, linethickness, rowspacing),
|
|
104
|
+
# sizing (mathsize, minsize, maxsize), styling (mathcolor, mathbackground),
|
|
105
|
+
# notation (notation, accent, fence), and structural (displaystyle, scriptlevel)
|
|
106
|
+
#
|
|
107
|
+
# **Security:** Safe for mathematical notation when properly namespaced. Prevents
|
|
108
|
+
# MathML-based mXSS attacks through namespace validation.
|
|
109
|
+
#
|
|
110
|
+
# @example Mathematical formulas
|
|
111
|
+
# dandruff.configure do |config|
|
|
112
|
+
# config.use_profiles = { math_ml: true } # Includes MathML attributes
|
|
113
|
+
# end
|
|
114
|
+
MATH_ML = %w[
|
|
115
|
+
accent accentunder align bevelled close columnsalign columnlines colspan denomalign
|
|
116
|
+
depth dir display displaystyle encoding fence frame height href id largeop length
|
|
117
|
+
linethickness lspace lquote mathbackground mathcolor mathsize mathvariant
|
|
118
|
+
maxsize minsize movablelimits notation numalign open rowalign rowlines
|
|
119
|
+
rowspacing rowspan rspace rquote scriptlevel scriptminsize scriptsizemultiplier
|
|
120
|
+
selection separator separators stretchy subscriptshift supscriptshift symmetric
|
|
121
|
+
voffset width xmlns
|
|
122
|
+
].freeze
|
|
123
|
+
|
|
124
|
+
# XML namespace attributes
|
|
125
|
+
#
|
|
126
|
+
# Attributes used for XML namespace declarations and XLink href references.
|
|
127
|
+
# Required for proper SVG linking and namespace handling.
|
|
128
|
+
#
|
|
129
|
+
# **Includes:** xlink:href, xml:id, xlink:title, xml:space, xmlns:xlink, xmlns
|
|
130
|
+
#
|
|
131
|
+
# **Security:** Namespace attributes are validated to prevent namespace confusion
|
|
132
|
+
# attacks. xmlns: prefixed attributes are carefully checked to prevent injection.
|
|
133
|
+
#
|
|
134
|
+
# @api private
|
|
135
|
+
XML = %w[
|
|
136
|
+
xlink:href xml:id xlink:title xml:space xmlns:xlink xmlns
|
|
137
|
+
].freeze
|
|
138
|
+
|
|
139
|
+
# HTML Email attribute allowlist (includes legacy presentation attributes)
|
|
140
|
+
#
|
|
141
|
+
# Extended attribute list for HTML email rendering. Includes legacy presentational
|
|
142
|
+
# attributes required by email clients like bgcolor, align, valign, cellpadding, etc.
|
|
143
|
+
#
|
|
144
|
+
# **Includes:** HTML attributes + legacy table attributes (cellpadding, cellspacing,
|
|
145
|
+
# bgcolor, valign), font attributes (face, size, color), layout attributes
|
|
146
|
+
# (leftmargin, topmargin, marginwidth, marginheight), and meta attributes (content)
|
|
147
|
+
#
|
|
148
|
+
# **Security:** Designed for sandboxed email contexts. All attributes are still
|
|
149
|
+
# validated for XSS vectors. Use with html_email profile for per-tag restrictions.
|
|
150
|
+
#
|
|
151
|
+
# **Note:** Email clients vary widely - test thoroughly across clients.
|
|
152
|
+
#
|
|
153
|
+
# @example Email sanitization
|
|
154
|
+
# dandruff.configure do |config|
|
|
155
|
+
# config.use_profiles = { html_email: true }
|
|
156
|
+
# end
|
|
157
|
+
HTML_EMAIL = (HTML + %w[
|
|
158
|
+
target bgcolor text link vlink alink background border cellpadding cellspacing
|
|
159
|
+
width height align valign face size color content leftmargin topmargin marginwidth marginheight
|
|
160
|
+
]).freeze
|
|
161
|
+
|
|
162
|
+
# Dangerous event handler and script protocol patterns
|
|
163
|
+
#
|
|
164
|
+
# List of dangerous attribute patterns that enable script execution. These are
|
|
165
|
+
# ALWAYS blocked regardless of configuration to prevent XSS attacks.
|
|
166
|
+
#
|
|
167
|
+
# **Includes:**
|
|
168
|
+
# - Event handlers: onclick, onload, onerror, onmouseover, onfocus, etc.
|
|
169
|
+
# - URI protocols: javascript:, vbscript:, data:text/html
|
|
170
|
+
#
|
|
171
|
+
# **Security:** This is a security-critical denylist. These patterns enable direct
|
|
172
|
+
# script execution and are blocked even if explicitly allowed elsewhere.
|
|
173
|
+
#
|
|
174
|
+
# @example Blocked patterns
|
|
175
|
+
# # <a onclick="alert(1)"> - onclick blocked
|
|
176
|
+
# # <img src="javascript:alert(1)"> - javascript: blocked
|
|
177
|
+
# # <link href="vbscript:msgbox(1)"> - vbscript: blocked
|
|
178
|
+
#
|
|
179
|
+
# @api private
|
|
180
|
+
DANGEROUS = %w[
|
|
181
|
+
onclick ondblclick onmousedown onmouseup onmouseover onmousemove
|
|
182
|
+
onmouseout onkeypress onkeydown onkeyup onload onunload onabort
|
|
183
|
+
onerror onfocus onblur onchange onsubmit onreset onselect
|
|
184
|
+
onscroll onresize oncopy oncut onpaste ondrag ondrop
|
|
185
|
+
javascript: vbscript: data:text/html
|
|
186
|
+
].freeze
|
|
187
|
+
|
|
188
|
+
# DOM clobbering attack attribute values
|
|
189
|
+
#
|
|
190
|
+
# List of dangerous id/name attribute values that can be used for DOM clobbering
|
|
191
|
+
# attacks. These values would allow attackers to override built-in DOM properties
|
|
192
|
+
# and methods, potentially bypassing security checks.
|
|
193
|
+
#
|
|
194
|
+
# **Includes:** Browser object properties (window, document, location, alert),
|
|
195
|
+
# DOM properties (innerHTML, outerHTML, attributes, children), prototype chain
|
|
196
|
+
# (__proto__, constructor, prototype), and critical methods (getElementById,
|
|
197
|
+
# createElement, setAttribute, etc.)
|
|
198
|
+
#
|
|
199
|
+
# **Security:** When `sanitize_dom: true` (default), these values are blocked in
|
|
200
|
+
# id and name attributes to prevent DOM clobbering. Can be disabled for email
|
|
201
|
+
# rendering where DOM clobbering is less critical.
|
|
202
|
+
#
|
|
203
|
+
# **Background:** DOM clobbering occurs when HTML attributes like id/name override
|
|
204
|
+
# built-in browser objects, e.g., `<img id="document">` makes `document` refer to
|
|
205
|
+
# the image instead of the DOM document object.
|
|
206
|
+
#
|
|
207
|
+
# @example Prevented attacks
|
|
208
|
+
# # <form name="document"> - blocked, would clobber window.document
|
|
209
|
+
# # <img id="location"> - blocked, would clobber window.location
|
|
210
|
+
# # <div id="alert"> - blocked, would clobber window.alert
|
|
211
|
+
#
|
|
212
|
+
# @see Config#sanitize_dom Configuration option to enable/disable DOM clobbering protection
|
|
213
|
+
# @api private
|
|
214
|
+
DOM_CLOBBERING = %w[
|
|
215
|
+
__proto__ __parent__ constructor prototype contentwindow contentdocument parentnode ownerdocument location
|
|
216
|
+
attributes nodevalue innerhtml outerhtml localname documenturi srcdoc url
|
|
217
|
+
createelement renamenode appendchild insertbefore replacechild removechild normalize clonenode
|
|
218
|
+
alert document window frames frame form forms elements children documentelement implementation
|
|
219
|
+
cookie body adoptNode activeElement firstElementChild submit acceptCharset hasChildNodes namespaceURI
|
|
220
|
+
getElementById setAttribute removeAttributeNode nodeType nodeName parentNode
|
|
221
|
+
].map(&:downcase).freeze
|
|
222
|
+
end
|
|
223
|
+
end
|