dandruff 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +23 -0
- data/CHANGELOG.md +69 -0
- data/COMPARISON.md +175 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +142 -0
- data/LICENSE.txt +21 -0
- data/Makefile +41 -0
- data/README.md +1196 -0
- data/Rakefile +12 -0
- data/examples/basic_usage.rb +84 -0
- data/examples/email_sanitization_example.md +268 -0
- data/failed-expectations.md +192 -0
- data/lib/dandruff/attributes.rb +223 -0
- data/lib/dandruff/config.rb +500 -0
- data/lib/dandruff/expressions.rb +103 -0
- data/lib/dandruff/tags.rb +160 -0
- data/lib/dandruff/utils.rb +27 -0
- data/lib/dandruff/version.rb +5 -0
- data/lib/dandruff.rb +1095 -0
- metadata +134 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dandruff
|
|
4
|
+
# Tag allowlists for HTML sanitization
|
|
5
|
+
#
|
|
6
|
+
# This module defines comprehensive tag allowlists for different content types.
|
|
7
|
+
# Each constant represents a curated set of safe tags that can be used based on
|
|
8
|
+
# your sanitization requirements. These lists are based on DOMPurify's battle-tested
|
|
9
|
+
# security model and regularly updated to reflect web standards.
|
|
10
|
+
#
|
|
11
|
+
# @example Using tag lists in configuration
|
|
12
|
+
# dandruff.configure do |config|
|
|
13
|
+
# config.allowed_tags = Dandruff::Tags::MINIMAL_HTML
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# @see Config Configuration class that uses these tag lists
|
|
17
|
+
module Tags
|
|
18
|
+
# Minimal HTML tag set for basic formatted text
|
|
19
|
+
#
|
|
20
|
+
# Use this for simple user-generated content where you only need basic formatting
|
|
21
|
+
# like bold, italic, headings, links, and lists. This is the most restrictive
|
|
22
|
+
# allowlist and provides the smallest attack surface.
|
|
23
|
+
#
|
|
24
|
+
# **Includes:** Text formatting (b, i, em, strong), headings (h1-h6), links (a),
|
|
25
|
+
# lists (ul, ol, li), code blocks (code, pre), basic structure (div, span, p),
|
|
26
|
+
# tables (table, tr, td, th, tbody, thead), images (img), quotes (blockquote)
|
|
27
|
+
#
|
|
28
|
+
# **Security:** Minimal surface area, excludes all form elements, scripts, and media
|
|
29
|
+
#
|
|
30
|
+
# @example Minimal blog comments
|
|
31
|
+
# dandruff.configure do |config|
|
|
32
|
+
# config.allowed_tags = Dandruff::Tags::MINIMAL_HTML
|
|
33
|
+
# end
|
|
34
|
+
MINIMAL_HTML = %w[
|
|
35
|
+
a b blockquote br code div em h1 h2 h3 h4 h5 h6 i img li ol p pre span strong table tbody td th thead tr ul
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# Comprehensive HTML5 tag allowlist matching DOMPurify defaults
|
|
39
|
+
#
|
|
40
|
+
# This is the default allowlist used when no specific tags are configured. It includes
|
|
41
|
+
# all standard HTML5 semantic and structural elements, media elements, and form controls.
|
|
42
|
+
# Dangerous tags like raw script execution are still excluded even from this list.
|
|
43
|
+
#
|
|
44
|
+
# **Includes:** All semantic HTML5 (article, section, nav, aside, header, footer, main),
|
|
45
|
+
# media elements (audio, video, picture, canvas), form elements (form, input, select, textarea),
|
|
46
|
+
# interactive elements (button, details, dialog), and comprehensive text formatting
|
|
47
|
+
#
|
|
48
|
+
# **Security:** Comprehensive but safe - excludes actual script execution vectors while
|
|
49
|
+
# allowing rich content. Note that script/iframe/object tags are removed during sanitization
|
|
50
|
+
# even if listed here.
|
|
51
|
+
#
|
|
52
|
+
# @example Default rich content
|
|
53
|
+
# # This is used automatically when no allowed_tags are specified
|
|
54
|
+
# dandruff = Dandruff.new
|
|
55
|
+
# clean = dandruff.sanitize(html) # Uses HTML allowlist
|
|
56
|
+
HTML = %w[
|
|
57
|
+
a abbr address area article aside audio b bdi bdo blockquote body br button canvas caption cite code
|
|
58
|
+
col colgroup data datalist dd del details dfn dialog div dl dt em embed fieldset figcaption figure footer form
|
|
59
|
+
h1 h2 h3 h4 h5 h6 head header hgroup hr html i iframe img input ins kbd label legend li main map mark
|
|
60
|
+
meter nav noscript object ol optgroup option output p param picture pre progress q rp rt ruby s samp
|
|
61
|
+
script section search select small source span strong sub summary sup table tbody td template textarea tfoot
|
|
62
|
+
th thead time title tr track u ul var video wbr
|
|
63
|
+
].freeze
|
|
64
|
+
|
|
65
|
+
# SVG (Scalable Vector Graphics) tag allowlist
|
|
66
|
+
#
|
|
67
|
+
# Use this when you need to support inline SVG content. Includes core SVG elements
|
|
68
|
+
# for shapes, paths, gradients, and basic filters. Combine with `use_profiles: { svg: true }`
|
|
69
|
+
# configuration for complete SVG support including attributes.
|
|
70
|
+
#
|
|
71
|
+
# **Includes:** Basic shapes (rect, circle, ellipse, line, polygon, polyline, path),
|
|
72
|
+
# grouping (g, defs, symbol, use), gradients (linearGradient, radialGradient),
|
|
73
|
+
# text (text, tspan, textPath), and structural elements (svg, pattern, marker, mask, filter)
|
|
74
|
+
#
|
|
75
|
+
# **Security:** Safe for SVG rendering but requires corresponding attribute allowlist.
|
|
76
|
+
# mXSS attacks via SVG are prevented through attribute sanitization.
|
|
77
|
+
#
|
|
78
|
+
# @example SVG icons and graphics
|
|
79
|
+
# dandruff.configure do |config|
|
|
80
|
+
# config.use_profiles = { html: true, svg: true }
|
|
81
|
+
# end
|
|
82
|
+
SVG = %w[svg g path rect circle ellipse line polyline polygon text tspan textPath marker pattern defs desc mask
|
|
83
|
+
linearGradient radialGradient stop use image view symbol feImage filter a title].freeze
|
|
84
|
+
|
|
85
|
+
# SVG filter effects tag allowlist
|
|
86
|
+
#
|
|
87
|
+
# Advanced SVG filter primitives for visual effects like blur, color manipulation,
|
|
88
|
+
# lighting, and compositing. Use this in addition to SVG tags when you need
|
|
89
|
+
# filter effects support.
|
|
90
|
+
#
|
|
91
|
+
# **Includes:** All SVG filter primitives (feBlend, feColorMatrix, feGaussianBlur,
|
|
92
|
+
# feDropShadow, feMorphology, etc.)
|
|
93
|
+
#
|
|
94
|
+
# **Security:** Safe when combined with proper attribute sanitization. Filter effects
|
|
95
|
+
# cannot execute scripts but can be used for sophisticated visual rendering.
|
|
96
|
+
#
|
|
97
|
+
# @example SVG with filters
|
|
98
|
+
# dandruff.configure do |config|
|
|
99
|
+
# config.use_profiles = { svg: true, svg_filters: true }
|
|
100
|
+
# end
|
|
101
|
+
SVG_FILTERS = %w[
|
|
102
|
+
filter feBlend feColorMatrix feComponentTransfer feComposite feConvolveMatrix feDiffuseLighting
|
|
103
|
+
feDisplacementMap feDropShadow feFlood feFuncA feFuncB feFuncG feFuncR feGaussianBlur feImage feMerge
|
|
104
|
+
feMergeNode feMorphology feOffset feSpecularLighting feTile feTurbulence
|
|
105
|
+
].freeze
|
|
106
|
+
|
|
107
|
+
# MathML (Mathematical Markup Language) tag allowlist
|
|
108
|
+
#
|
|
109
|
+
# Use this for mathematical and scientific content. Includes core MathML elements
|
|
110
|
+
# for rendering mathematical notation and formulas.
|
|
111
|
+
#
|
|
112
|
+
# **Includes:** Numbers and identifiers (mi, mn, mo, ms, mtext), layout elements
|
|
113
|
+
# (mrow, mfrac, msqrt, mroot, mstyle), tables (mtable, mtr, mtd), spacing (mspace, mpadding)
|
|
114
|
+
#
|
|
115
|
+
# **Security:** Safe for mathematical notation. Prevents mXSS attacks that can occur
|
|
116
|
+
# with MathML namespace confusion.
|
|
117
|
+
#
|
|
118
|
+
# @example Mathematical content
|
|
119
|
+
# dandruff.configure do |config|
|
|
120
|
+
# config.use_profiles = { html: true, math_ml: true }
|
|
121
|
+
# end
|
|
122
|
+
MATH_ML = %w[
|
|
123
|
+
math mi mn mo ms mtext mspace menclose mstyle mfrac msqrt mroot mtable mtr mtd maligngroup malignmark
|
|
124
|
+
mpadded mphantom mrow
|
|
125
|
+
].freeze
|
|
126
|
+
|
|
127
|
+
# HTML Email tag allowlist (includes legacy presentational tags)
|
|
128
|
+
#
|
|
129
|
+
# Specialized tag list for HTML email rendering. Includes legacy presentational tags
|
|
130
|
+
# (font, center) and document structure tags (head, meta, style) needed for email clients.
|
|
131
|
+
# Excludes interactive elements (forms, buttons) and script execution vectors.
|
|
132
|
+
#
|
|
133
|
+
# **Includes:** HTML tags + head, meta, title, style, center, font
|
|
134
|
+
# **Excludes:** script, form, input, select, textarea, button, object, embed, iframe, frame, frameset
|
|
135
|
+
#
|
|
136
|
+
# **Security:** Designed for sandboxed email rendering contexts. Allows style tags
|
|
137
|
+
# (required for email) but with content sanitization. Removes all form and script elements.
|
|
138
|
+
#
|
|
139
|
+
# **Note:** Email clients have inconsistent rendering, so test thoroughly. Use with
|
|
140
|
+
# `use_profiles: { html_email: true }` for complete email configuration including
|
|
141
|
+
# per-tag attribute restrictions.
|
|
142
|
+
#
|
|
143
|
+
# @example Email content sanitization
|
|
144
|
+
# dandruff.configure do |config|
|
|
145
|
+
# config.use_profiles = { html_email: true }
|
|
146
|
+
# end
|
|
147
|
+
HTML_EMAIL = (HTML + %w[
|
|
148
|
+
head meta title style center font
|
|
149
|
+
] - %w[
|
|
150
|
+
script form input select textarea button object embed iframe frame frameset
|
|
151
|
+
]).freeze
|
|
152
|
+
|
|
153
|
+
# Text node marker
|
|
154
|
+
#
|
|
155
|
+
# Special marker for text nodes in the DOM. Used internally for text content handling.
|
|
156
|
+
#
|
|
157
|
+
# @api private
|
|
158
|
+
TEXT = %w[#text].freeze
|
|
159
|
+
end
|
|
160
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dandruff
|
|
4
|
+
# Utility functions for the Dandruff sanitizer
|
|
5
|
+
module Utils
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Performs a deep duplicate of an object
|
|
9
|
+
#
|
|
10
|
+
# @param obj [Object] the object to duplicate
|
|
11
|
+
# @return [Object] the deep duplicated object
|
|
12
|
+
def deep_dup(obj)
|
|
13
|
+
case obj
|
|
14
|
+
when Hash
|
|
15
|
+
obj.transform_values { |v| deep_dup(v) }
|
|
16
|
+
when Array
|
|
17
|
+
obj.map { |v| deep_dup(v) }
|
|
18
|
+
else
|
|
19
|
+
begin
|
|
20
|
+
obj.dup
|
|
21
|
+
rescue StandardError
|
|
22
|
+
obj
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|