dandruff 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +23 -0
- data/CHANGELOG.md +69 -0
- data/COMPARISON.md +175 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +142 -0
- data/LICENSE.txt +21 -0
- data/Makefile +41 -0
- data/README.md +1196 -0
- data/Rakefile +12 -0
- data/examples/basic_usage.rb +84 -0
- data/examples/email_sanitization_example.md +268 -0
- data/failed-expectations.md +192 -0
- data/lib/dandruff/attributes.rb +223 -0
- data/lib/dandruff/config.rb +500 -0
- data/lib/dandruff/expressions.rb +103 -0
- data/lib/dandruff/tags.rb +160 -0
- data/lib/dandruff/utils.rb +27 -0
- data/lib/dandruff/version.rb +5 -0
- data/lib/dandruff.rb +1095 -0
- metadata +134 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dandruff
|
|
4
|
+
# Configuration class for the Dandruff sanitizer
|
|
5
|
+
#
|
|
6
|
+
# This class manages all configuration options for customizing HTML sanitization behavior.
|
|
7
|
+
# It provides sensible security-focused defaults and allows fine-grained control through
|
|
8
|
+
# numerous configuration options. Configuration can be set during initialization or modified
|
|
9
|
+
# later through accessor methods.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic configuration
|
|
12
|
+
# config = Dandruff::Config.new(
|
|
13
|
+
# allowed_tags: ['p', 'strong', 'em'],
|
|
14
|
+
# allowed_attributes: ['class', 'href']
|
|
15
|
+
# )
|
|
16
|
+
#
|
|
17
|
+
# @example Using profiles
|
|
18
|
+
# config = Dandruff::Config.new(use_profiles: { html: true, svg: true })
|
|
19
|
+
#
|
|
20
|
+
# @example Block configuration
|
|
21
|
+
# dandruff = Dandruff.new do |config|
|
|
22
|
+
# config.allowed_tags = ['p', 'a']
|
|
23
|
+
# config.forbidden_attributes = ['onclick']
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# @see Sanitizer Main sanitizer class that uses this configuration
|
|
27
|
+
class Config
|
|
28
|
+
# @!attribute [rw] additional_attributes
|
|
29
|
+
# Additional attributes to allow beyond defaults
|
|
30
|
+
# @return [Array<String>] array of attribute names to add to allowlist
|
|
31
|
+
# @example
|
|
32
|
+
# config.additional_attributes = ['data-custom', 'aria-label']
|
|
33
|
+
|
|
34
|
+
# @!attribute [rw] additional_tags
|
|
35
|
+
# Additional tags to allow beyond defaults
|
|
36
|
+
# @return [Array<String>] array of tag names to add to allowlist
|
|
37
|
+
# @example
|
|
38
|
+
# config.additional_tags = ['custom-element', 'web-component']
|
|
39
|
+
|
|
40
|
+
# @!attribute [rw] additional_uri_safe_attributes
|
|
41
|
+
# Additional attributes that should be treated as URIs and validated
|
|
42
|
+
# @return [Array<String>] array of attribute names
|
|
43
|
+
# @example
|
|
44
|
+
# config.additional_uri_safe_attributes = ['data-link', 'poster']
|
|
45
|
+
|
|
46
|
+
# @!attribute [rw] allow_aria_attributes
|
|
47
|
+
# Allow aria-* attributes for accessibility
|
|
48
|
+
# @return [Boolean] true to allow aria attributes (default: true)
|
|
49
|
+
|
|
50
|
+
# @!attribute [rw] allow_data_attributes
|
|
51
|
+
# Allow data-* attributes for custom data
|
|
52
|
+
# @return [Boolean] true to allow data attributes (default: true)
|
|
53
|
+
|
|
54
|
+
# @!attribute [rw] allow_data_uri
|
|
55
|
+
# Allow data: URIs in src and other URI attributes
|
|
56
|
+
# @return [Boolean] true to allow data URIs (default: true for safe elements)
|
|
57
|
+
# @note Data URIs can be large and may pose security risks if not validated
|
|
58
|
+
|
|
59
|
+
# @!attribute [rw] allow_unknown_protocols
|
|
60
|
+
# Allow URI protocols not in the default safe list
|
|
61
|
+
# @return [Boolean] true to allow unknown protocols (default: false)
|
|
62
|
+
# @note Enabling this reduces security - use with caution
|
|
63
|
+
|
|
64
|
+
# @!attribute [rw] allow_self_close_in_attributes
|
|
65
|
+
# Allow self-closing syntax in attributes
|
|
66
|
+
# @return [Boolean] (default: true)
|
|
67
|
+
|
|
68
|
+
# @!attribute [rw] allowed_attributes
|
|
69
|
+
# Exact allowlist of attributes (replaces defaults when set)
|
|
70
|
+
# @return [Array<String>, nil] array of allowed attributes or nil to use defaults
|
|
71
|
+
# @example
|
|
72
|
+
# config.allowed_attributes = ['href', 'class', 'id']
|
|
73
|
+
|
|
74
|
+
# @!attribute [rw] allowed_attributes_per_tag
|
|
75
|
+
# Per-tag attribute restrictions for fine-grained control
|
|
76
|
+
# @return [Hash<String, Array<String>>, nil] hash mapping tag names to allowed attributes
|
|
77
|
+
# @example
|
|
78
|
+
# config.allowed_attributes_per_tag = {
|
|
79
|
+
# 'a' => ['href', 'title'],
|
|
80
|
+
# 'img' => ['src', 'alt']
|
|
81
|
+
# }
|
|
82
|
+
|
|
83
|
+
# @!attribute [rw] allowed_tags
|
|
84
|
+
# Exact allowlist of tags (replaces defaults when set)
|
|
85
|
+
# @return [Array<String>, nil] array of allowed tags or nil to use defaults
|
|
86
|
+
# @example
|
|
87
|
+
# config.allowed_tags = ['p', 'strong', 'em', 'a']
|
|
88
|
+
|
|
89
|
+
# @!attribute [rw] allowed_uri_regexp
|
|
90
|
+
# Custom regexp for validating URI attributes
|
|
91
|
+
# @return [Regexp, nil] custom URI validation pattern or nil for default
|
|
92
|
+
# @example Only allow HTTPS
|
|
93
|
+
# config.allowed_uri_regexp = /^https:/
|
|
94
|
+
|
|
95
|
+
# @!attribute [rw] forbidden_attributes
|
|
96
|
+
# Attributes that are always removed (takes precedence over allowed)
|
|
97
|
+
# @return [Array<String>] array of forbidden attribute names
|
|
98
|
+
# @example
|
|
99
|
+
# config.forbidden_attributes = ['onclick', 'onerror']
|
|
100
|
+
|
|
101
|
+
# @!attribute [rw] forbidden_tags
|
|
102
|
+
# Tags that are always removed (takes precedence over allowed)
|
|
103
|
+
# @return [Array<String>] array of forbidden tag names
|
|
104
|
+
# @example
|
|
105
|
+
# config.forbidden_tags = ['script', 'iframe']
|
|
106
|
+
|
|
107
|
+
# @!attribute [rw] allow_style_tags
|
|
108
|
+
# Allow <style> tags with content sanitization
|
|
109
|
+
# @return [Boolean] true to allow style tags (default: true)
|
|
110
|
+
# @note Style tag content is scanned for unsafe patterns
|
|
111
|
+
|
|
112
|
+
# @!attribute [rw] allow_document_elements
|
|
113
|
+
# Allow html/head/body document structure elements
|
|
114
|
+
# @return [Boolean] true to allow document elements (default: false)
|
|
115
|
+
|
|
116
|
+
# @!attribute [rw] keep_content
|
|
117
|
+
# Keep text content when removing disallowed tags
|
|
118
|
+
# @return [Boolean] true to preserve content (default: true)
|
|
119
|
+
# @example
|
|
120
|
+
# # With keep_content: true
|
|
121
|
+
# # <script>alert()</script>Hello -> Hello
|
|
122
|
+
# # With keep_content: false
|
|
123
|
+
# # <script>alert()</script>Hello -> (empty)
|
|
124
|
+
|
|
125
|
+
# @!attribute [rw] return_dom
|
|
126
|
+
# Return Nokogiri document instead of HTML string
|
|
127
|
+
# @return [Boolean] true to return DOM (default: false)
|
|
128
|
+
|
|
129
|
+
# @!attribute [rw] return_dom_fragment
|
|
130
|
+
# Return Nokogiri fragment instead of HTML string
|
|
131
|
+
# @return [Boolean] true to return fragment (default: false)
|
|
132
|
+
|
|
133
|
+
# @!attribute [rw] whole_document
|
|
134
|
+
# Parse and sanitize as complete HTML document
|
|
135
|
+
# @return [Boolean] true for whole document (default: false)
|
|
136
|
+
|
|
137
|
+
# @!attribute [rw] safe_for_templates
|
|
138
|
+
# Remove template expressions ({{, <%= , ${)
|
|
139
|
+
# @return [Boolean] true to remove templates (default: false)
|
|
140
|
+
|
|
141
|
+
# @!attribute [rw] safe_for_xml
|
|
142
|
+
# Remove comments in XML contexts
|
|
143
|
+
# @return [Boolean] true to remove XML comments (default: true)
|
|
144
|
+
|
|
145
|
+
# @!attribute [rw] sanitize_dom
|
|
146
|
+
# Enable DOM clobbering protection
|
|
147
|
+
# @return [Boolean] true for protection (default: true)
|
|
148
|
+
# @note Prevents id/name values from clobbering built-in DOM properties
|
|
149
|
+
|
|
150
|
+
# @!attribute [rw] sanitize_until_stable
|
|
151
|
+
# Re-sanitize multiple passes to prevent mXSS
|
|
152
|
+
# @return [Boolean] true for multi-pass (default: true)
|
|
153
|
+
# @note Important for preventing mutation-based XSS attacks
|
|
154
|
+
|
|
155
|
+
# @!attribute [rw] mutation_max_passes
|
|
156
|
+
# Maximum sanitization passes for stability
|
|
157
|
+
# @return [Integer] max passes (default: 2)
|
|
158
|
+
# @note Higher values increase security but reduce performance
|
|
159
|
+
|
|
160
|
+
# @!attribute [rw] namespace
|
|
161
|
+
# XML namespace for document parsing
|
|
162
|
+
# @return [String] namespace URI (default: 'http://www.w3.org/1999/xhtml')
|
|
163
|
+
|
|
164
|
+
# @!attribute [rw] parser_media_type
|
|
165
|
+
# Parser media type for content parsing
|
|
166
|
+
# @return [String] media type (default: 'text/html')
|
|
167
|
+
|
|
168
|
+
# @!attribute [rw] minimal_profile
|
|
169
|
+
# Use minimal HTML-only profile (excludes SVG/MathML)
|
|
170
|
+
# @return [Boolean] true for minimal (default: false)
|
|
171
|
+
|
|
172
|
+
# @!attribute [rw] force_body
|
|
173
|
+
# Force body context when parsing
|
|
174
|
+
# @return [Boolean] (default: false)
|
|
175
|
+
|
|
176
|
+
# @!attribute [rw] in_place
|
|
177
|
+
# Attempt to sanitize in place (experimental)
|
|
178
|
+
# @return [Boolean] (default: false)
|
|
179
|
+
|
|
180
|
+
attr_accessor :additional_attributes, :add_attributes, :add_data_uri_tags,
|
|
181
|
+
:additional_tags, :additional_uri_safe_attributes, :add_uri_safe_attributes,
|
|
182
|
+
:allow_aria_attributes, :allow_data_attributes, :allow_data_uri, :allow_unknown_protocols,
|
|
183
|
+
:allow_self_close_in_attributes, :allowed_attributes, :allowed_attributes_per_tag, :allowed_tags,
|
|
184
|
+
:allowed_namespaces, :allowed_uri_regexp, :custom_element_handling,
|
|
185
|
+
:forbidden_attributes, :forbid_attributes, :forbid_contents, :add_forbid_contents, :forbidden_tags,
|
|
186
|
+
:force_body, :html_integration_points, :in_place, :keep_content,
|
|
187
|
+
:mathml_text_integration_points, :namespace, :parser_media_type,
|
|
188
|
+
:return_dom_fragment, :return_dom,
|
|
189
|
+
:safe_for_templates, :safe_for_xml, :sanitize_dom, :sanitize_until_stable, :mutation_max_passes,
|
|
190
|
+
:sanitize_named_props, :trusted_types_policy, :allow_style_tags, :minimal_profile,
|
|
191
|
+
:whole_document, :allow_document_elements
|
|
192
|
+
|
|
193
|
+
# Initializes a new configuration instance
|
|
194
|
+
#
|
|
195
|
+
# @param cfg [Hash] configuration options to apply
|
|
196
|
+
def initialize(cfg = {})
|
|
197
|
+
# Attribute defaults
|
|
198
|
+
@allow_aria_attributes = true # permit aria-* attributes
|
|
199
|
+
@allow_data_attributes = true # permit data-* attributes
|
|
200
|
+
@allow_self_close_in_attributes = true
|
|
201
|
+
|
|
202
|
+
# URI/protocol defaults
|
|
203
|
+
@allow_data_uri = true # allow data URIs for safe elements by default
|
|
204
|
+
@allow_unknown_protocols = false # block unknown protocols by default
|
|
205
|
+
|
|
206
|
+
# Output / parsing defaults
|
|
207
|
+
@safe_for_templates = false
|
|
208
|
+
@safe_for_xml = true
|
|
209
|
+
@whole_document = false
|
|
210
|
+
@allow_document_elements = false
|
|
211
|
+
@force_body = false
|
|
212
|
+
@return_dom = false
|
|
213
|
+
@return_dom_fragment = false
|
|
214
|
+
|
|
215
|
+
# Sanitization controls
|
|
216
|
+
@sanitize_dom = true # DOM clobbering protection enabled
|
|
217
|
+
@sanitize_named_props = false
|
|
218
|
+
@sanitize_until_stable = true # run multiple passes to deter mXSS
|
|
219
|
+
@mutation_max_passes = 2 # conservative default pass limit
|
|
220
|
+
@keep_content = true
|
|
221
|
+
@in_place = false
|
|
222
|
+
@minimal_profile = false
|
|
223
|
+
@allow_style_tags = true
|
|
224
|
+
|
|
225
|
+
# Profiles / namespaces
|
|
226
|
+
@use_profiles = {}
|
|
227
|
+
@namespace = 'http://www.w3.org/1999/xhtml'
|
|
228
|
+
@parser_media_type = 'text/html'
|
|
229
|
+
|
|
230
|
+
# Tag/attribute allow/forbid defaults
|
|
231
|
+
@forbidden_tags = %w[base link meta annotation-xml noscript]
|
|
232
|
+
|
|
233
|
+
@allowed_attributes = nil
|
|
234
|
+
|
|
235
|
+
apply_config(cfg)
|
|
236
|
+
process_profiles unless @use_profiles.empty?
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Configuration key normalization mapping
|
|
240
|
+
#
|
|
241
|
+
# Maps configuration hash keys (including legacy aliases) to their corresponding
|
|
242
|
+
# setter methods. This allows flexible configuration key naming while maintaining
|
|
243
|
+
# backward compatibility with older key names.
|
|
244
|
+
#
|
|
245
|
+
# Keys are normalized to lowercase before lookup, so configuration is case-insensitive.
|
|
246
|
+
#
|
|
247
|
+
# @example Using different key styles
|
|
248
|
+
# Config.new(allowed_tags: ['p']) # Modern style
|
|
249
|
+
# Config.new('allowed_tags' => ['p']) # String keys
|
|
250
|
+
# Config.new(add_tags: ['custom']) # Legacy alias
|
|
251
|
+
#
|
|
252
|
+
# @api private
|
|
253
|
+
CONFIG_MAPPING = {
|
|
254
|
+
'add_tags' => :additional_tags=, # backward compatibility
|
|
255
|
+
'additional_tags' => :additional_tags=,
|
|
256
|
+
'add_attr' => :additional_attributes=, # backward compatibility
|
|
257
|
+
'additional_attributes' => :additional_attributes=,
|
|
258
|
+
'add_attributes' => :additional_attributes=, # backward compatibility
|
|
259
|
+
'add_uri_safe_attr' => :additional_uri_safe_attributes=, # backward compatibility
|
|
260
|
+
'additional_uri_safe_attributes' => :additional_uri_safe_attributes=,
|
|
261
|
+
'add_uri_safe_attributes' => :additional_uri_safe_attributes=, # backward compatibility
|
|
262
|
+
'allowed_tags' => :allowed_tags=,
|
|
263
|
+
'allowed_attr' => :allowed_attributes=, # backward compatibility
|
|
264
|
+
'allowed_attributes' => :allowed_attributes=,
|
|
265
|
+
'allowed_attributes_per_tag' => :allowed_attributes_per_tag=,
|
|
266
|
+
'forbidden_tags' => :forbidden_tags=,
|
|
267
|
+
'forbid_tags' => :forbidden_tags=, # backward compatibility
|
|
268
|
+
'forbidden_attr' => :forbidden_attributes=, # backward compatibility
|
|
269
|
+
'forbidden_attributes' => :forbidden_attributes=,
|
|
270
|
+
'forbid_attributes' => :forbidden_attributes=, # backward compatibility
|
|
271
|
+
'allow_data_uri' => :allow_data_uri=,
|
|
272
|
+
'allow_aria_attr' => :allow_aria_attributes=, # backward compatibility
|
|
273
|
+
'allow_aria_attributes' => :allow_aria_attributes=,
|
|
274
|
+
'allow_data_attr' => :allow_data_attributes=, # backward compatibility
|
|
275
|
+
'allow_data_attributes' => :allow_data_attributes=,
|
|
276
|
+
'allow_self_close_in_attr' => :allow_self_close_in_attributes=, # backward compatibility
|
|
277
|
+
'allow_self_close_in_attributes' => :allow_self_close_in_attributes=,
|
|
278
|
+
'allow_style_tags' => :allow_style_tags=,
|
|
279
|
+
'allow_document_elements' => :allow_document_elements=,
|
|
280
|
+
'minimal_profile' => :minimal_profile=,
|
|
281
|
+
'pass_limit' => :mutation_max_passes=
|
|
282
|
+
}.freeze
|
|
283
|
+
|
|
284
|
+
# Per-tag attribute restrictions for HTML email profile
|
|
285
|
+
#
|
|
286
|
+
# Defines which attributes are allowed on specific tags when using the html_email profile.
|
|
287
|
+
# This provides fine-grained security control by limiting each tag to only its appropriate
|
|
288
|
+
# attributes, preventing attribute confusion attacks where dangerous attributes appear on
|
|
289
|
+
# unexpected tags.
|
|
290
|
+
#
|
|
291
|
+
# **Security rationale:** Email clients have inconsistent rendering behavior, and allowing
|
|
292
|
+
# arbitrary attributes on any tag can lead to security issues. For example, allowing 'href'
|
|
293
|
+
# on 'img' tags or 'src' on 'a' tags could enable attacks. Per-tag restrictions prevent this.
|
|
294
|
+
#
|
|
295
|
+
# **Usage:** This constant is automatically used when `use_profiles: { html_email: true }`
|
|
296
|
+
# is configured. You can also use it as a template for your own per-tag attribute rules.
|
|
297
|
+
#
|
|
298
|
+
# @example Using email profile
|
|
299
|
+
# config = Config.new(use_profiles: { html_email: true })
|
|
300
|
+
# # Automatically uses HTML_EMAIL_ATTRIBUTES for per-tag control
|
|
301
|
+
#
|
|
302
|
+
# @example Custom per-tag attributes
|
|
303
|
+
# config.allowed_attributes_per_tag = {
|
|
304
|
+
# 'a' => ['href', 'title'],
|
|
305
|
+
# 'img' => ['src', 'alt', 'width', 'height']
|
|
306
|
+
# }
|
|
307
|
+
#
|
|
308
|
+
# @see #allowed_attributes_per_tag Configuration option for per-tag control
|
|
309
|
+
HTML_EMAIL_ATTRIBUTES = {
|
|
310
|
+
# Document structure
|
|
311
|
+
'body' => %w[bgcolor text link vlink alink background style class id leftmargin topmargin marginwidth
|
|
312
|
+
marginheight],
|
|
313
|
+
'html' => %w[lang dir xmlns],
|
|
314
|
+
'head' => [],
|
|
315
|
+
'meta' => %w[name content charset],
|
|
316
|
+
'title' => [],
|
|
317
|
+
'style' => %w[type],
|
|
318
|
+
|
|
319
|
+
# Table elements (core of email layouts)
|
|
320
|
+
'table' => %w[width height border cellpadding cellspacing align bgcolor background style class id role summary],
|
|
321
|
+
'thead' => %w[align class id style dir lang title],
|
|
322
|
+
'tbody' => %w[align class id style dir lang title],
|
|
323
|
+
'tfoot' => %w[align class id style dir lang title],
|
|
324
|
+
'tr' => %w[height bgcolor background valign align style class id],
|
|
325
|
+
'td' => %w[width height colspan rowspan align valign bgcolor background style class id headers scope],
|
|
326
|
+
'th' => %w[width height colspan rowspan align valign bgcolor background style class id headers scope],
|
|
327
|
+
|
|
328
|
+
# Legacy presentation elements
|
|
329
|
+
'font' => %w[face size color style],
|
|
330
|
+
'center' => %w[align class id style dir lang title],
|
|
331
|
+
|
|
332
|
+
# Links and media
|
|
333
|
+
'a' => %w[href target title class id style name rel],
|
|
334
|
+
'img' => %w[src alt width height border align style class id],
|
|
335
|
+
|
|
336
|
+
# Headings
|
|
337
|
+
'h1' => %w[align class id style dir lang title],
|
|
338
|
+
'h2' => %w[align class id style dir lang title],
|
|
339
|
+
'h3' => %w[align class id style dir lang title],
|
|
340
|
+
'h4' => %w[align class id style dir lang title],
|
|
341
|
+
'h5' => %w[align class id style dir lang title],
|
|
342
|
+
'h6' => %w[align class id style dir lang title],
|
|
343
|
+
|
|
344
|
+
# Block elements
|
|
345
|
+
'p' => %w[align class id style dir lang title],
|
|
346
|
+
'div' => %w[align class id style dir lang title],
|
|
347
|
+
'span' => %w[align class id style dir lang title],
|
|
348
|
+
'blockquote' => %w[align class id style dir lang title cite],
|
|
349
|
+
'pre' => %w[align class id style dir lang title],
|
|
350
|
+
'code' => %w[align class id style dir lang title],
|
|
351
|
+
|
|
352
|
+
# Lists
|
|
353
|
+
'ul' => %w[align class id style dir lang title type],
|
|
354
|
+
'ol' => %w[align class id style dir lang title type start],
|
|
355
|
+
'li' => %w[align class id style dir lang title value],
|
|
356
|
+
|
|
357
|
+
# Inline formatting
|
|
358
|
+
'strong' => %w[align class id style dir lang title],
|
|
359
|
+
'em' => %w[align class id style dir lang title],
|
|
360
|
+
'b' => %w[align class id style dir lang title],
|
|
361
|
+
'i' => %w[align class id style dir lang title],
|
|
362
|
+
'u' => %w[align class id style dir lang title],
|
|
363
|
+
's' => %w[align class id style dir lang title],
|
|
364
|
+
'strike' => %w[align class id style dir lang title],
|
|
365
|
+
'sup' => %w[align class id style dir lang title],
|
|
366
|
+
'sub' => %w[align class id style dir lang title],
|
|
367
|
+
'small' => %w[align class id style dir lang title],
|
|
368
|
+
'big' => %w[align class id style dir lang title],
|
|
369
|
+
'mark' => %w[align class id style dir lang title],
|
|
370
|
+
'del' => %w[align class id style dir lang title cite datetime],
|
|
371
|
+
'ins' => %w[align class id style dir lang title cite datetime],
|
|
372
|
+
|
|
373
|
+
# Empty elements
|
|
374
|
+
'br' => %w[class style],
|
|
375
|
+
'hr' => %w[align class id style dir lang title width size noshade]
|
|
376
|
+
}.freeze
|
|
377
|
+
|
|
378
|
+
# Sets content type profiles and rebuilds configuration
|
|
379
|
+
#
|
|
380
|
+
# Profiles are pre-configured sets of tags and attributes for common content types.
|
|
381
|
+
# When you set profiles, the configuration automatically enables the appropriate
|
|
382
|
+
# tags, attributes, and security settings for those content types.
|
|
383
|
+
#
|
|
384
|
+
# **Available profiles:**
|
|
385
|
+
# - `:html` - Standard HTML5 content
|
|
386
|
+
# - `:svg` - SVG graphics
|
|
387
|
+
# - `:svg_filters` - SVG filter effects
|
|
388
|
+
# - `:math_ml` - Mathematical notation
|
|
389
|
+
# - `:html_email` - HTML email with legacy attributes
|
|
390
|
+
#
|
|
391
|
+
# @param profiles [Hash<Symbol, Boolean>] hash of profile names to enable
|
|
392
|
+
# @return [Hash] the set profiles
|
|
393
|
+
#
|
|
394
|
+
# @example Enable multiple profiles
|
|
395
|
+
# config.use_profiles = { html: true, svg: true }
|
|
396
|
+
#
|
|
397
|
+
# @example Email profile
|
|
398
|
+
# config.use_profiles = { html_email: true }
|
|
399
|
+
#
|
|
400
|
+
# @note Setting profiles resets allowed_tags and allowed_attributes to nil,
|
|
401
|
+
# allowing the profile configuration to take effect
|
|
402
|
+
def use_profiles=(profiles)
|
|
403
|
+
@use_profiles = profiles || {}
|
|
404
|
+
reset_profile_dependent_settings
|
|
405
|
+
process_profiles unless @use_profiles.empty?
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
private
|
|
409
|
+
|
|
410
|
+
# Applies configuration options from a hash
|
|
411
|
+
#
|
|
412
|
+
# @param cfg [Hash] configuration hash
|
|
413
|
+
def apply_config(cfg)
|
|
414
|
+
cfg.each do |key, value|
|
|
415
|
+
normalized = key.to_s.downcase
|
|
416
|
+
setter = CONFIG_MAPPING[normalized] || :"#{normalized}="
|
|
417
|
+
send(setter, value) if respond_to?(setter)
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# Resets configuration settings that depend on profiles
|
|
422
|
+
#
|
|
423
|
+
# Called when profiles are changed to clear out profile-dependent settings
|
|
424
|
+
# before applying new profile configuration.
|
|
425
|
+
#
|
|
426
|
+
# @return [void]
|
|
427
|
+
# @api private
|
|
428
|
+
def reset_profile_dependent_settings
|
|
429
|
+
@allowed_tags = nil
|
|
430
|
+
@allowed_attributes = nil
|
|
431
|
+
@allowed_attributes_per_tag = nil
|
|
432
|
+
@allow_style_tags = true
|
|
433
|
+
@allow_document_elements = false
|
|
434
|
+
@allow_unknown_protocols = false
|
|
435
|
+
@whole_document = false
|
|
436
|
+
@forbidden_tags = %w[base link meta annotation-xml noscript]
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
# Processes profile configurations to set allowed tags and attributes
|
|
440
|
+
#
|
|
441
|
+
# @return [void]
|
|
442
|
+
def process_profiles
|
|
443
|
+
configure_allowed_tags if @allowed_tags.nil?
|
|
444
|
+
configure_allowed_attributes if @allowed_attributes.nil?
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Configures allowed tags based on active profiles
|
|
448
|
+
#
|
|
449
|
+
# Builds the allowed tags list by combining tags from each enabled profile.
|
|
450
|
+
# Always includes '#text' for text content handling.
|
|
451
|
+
#
|
|
452
|
+
# @return [void]
|
|
453
|
+
# @api private
|
|
454
|
+
def configure_allowed_tags
|
|
455
|
+
@allowed_tags = ['#text']
|
|
456
|
+
@allowed_tags += Tags::HTML if @use_profiles[:html]
|
|
457
|
+
@allowed_tags += Tags::SVG if @use_profiles[:svg]
|
|
458
|
+
@allowed_tags += Tags::SVG_FILTERS if @use_profiles[:svg_filters]
|
|
459
|
+
@allowed_tags += Tags::MATH_ML if @use_profiles[:math_ml]
|
|
460
|
+
configure_html_email_tags if @use_profiles[:html_email]
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# Configures settings specific to HTML email profile
|
|
464
|
+
#
|
|
465
|
+
# Email rendering requires special handling:
|
|
466
|
+
# - Allows style tags (required for email styling)
|
|
467
|
+
# - Allows document elements (html, head, body)
|
|
468
|
+
# - Treats as whole document
|
|
469
|
+
# - Disables DOM clobbering protection (emails are sandboxed)
|
|
470
|
+
# - Permits meta and style tags in forbidden list
|
|
471
|
+
#
|
|
472
|
+
# @return [void]
|
|
473
|
+
# @api private
|
|
474
|
+
def configure_html_email_tags
|
|
475
|
+
@allowed_tags += Tags::HTML_EMAIL
|
|
476
|
+
@allow_style_tags = true
|
|
477
|
+
@allow_document_elements = true
|
|
478
|
+
@allow_unknown_protocols = false
|
|
479
|
+
@whole_document = true
|
|
480
|
+
@sanitize_dom = false # Emails use IDs for styling, rendered in sandboxed contexts
|
|
481
|
+
@forbidden_tags -= %w[meta style]
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Configures allowed attributes based on active profiles
|
|
485
|
+
#
|
|
486
|
+
# Builds the allowed attributes list by combining attributes from each enabled profile.
|
|
487
|
+
# For html_email profile, uses per-tag attribute restrictions instead of global list.
|
|
488
|
+
#
|
|
489
|
+
# @return [void]
|
|
490
|
+
# @api private
|
|
491
|
+
def configure_allowed_attributes
|
|
492
|
+
@allowed_attributes = []
|
|
493
|
+
@allowed_attributes += Attributes::HTML if @use_profiles[:html]
|
|
494
|
+
@allowed_attributes_per_tag = HTML_EMAIL_ATTRIBUTES if @use_profiles[:html_email]
|
|
495
|
+
@allowed_attributes += Attributes::SVG + Attributes::XML if @use_profiles[:svg]
|
|
496
|
+
@allowed_attributes += Attributes::SVG + Attributes::XML if @use_profiles[:svg_filters]
|
|
497
|
+
@allowed_attributes += Attributes::MATH_ML + Attributes::XML if @use_profiles[:math_ml]
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dandruff
|
|
4
|
+
# Regular expressions for attribute matching and content validation
|
|
5
|
+
#
|
|
6
|
+
# This module contains regular expressions used throughout Dandruff for validating
|
|
7
|
+
# attributes, detecting template expressions, and checking URI protocols. These patterns
|
|
8
|
+
# are critical for security and should not be modified without careful consideration.
|
|
9
|
+
#
|
|
10
|
+
# @api private
|
|
11
|
+
module Expressions
|
|
12
|
+
# Matches HTML5 data attributes (data-*)
|
|
13
|
+
#
|
|
14
|
+
# Validates attribute names that follow the data attribute specification.
|
|
15
|
+
# Data attributes must start with 'data-' followed by one or more word characters or hyphens.
|
|
16
|
+
#
|
|
17
|
+
# @example Matching data attributes
|
|
18
|
+
# 'data-user-id' =~ Expressions::DATA_ATTR # matches
|
|
19
|
+
# 'data-toggle' =~ Expressions::DATA_ATTR # matches
|
|
20
|
+
# 'data' =~ Expressions::DATA_ATTR # does not match
|
|
21
|
+
# 'data-' =~ Expressions::DATA_ATTR # does not match
|
|
22
|
+
DATA_ATTR = /^data-[\w-]+$/
|
|
23
|
+
|
|
24
|
+
# Matches ARIA accessibility attributes (aria-*)
|
|
25
|
+
#
|
|
26
|
+
# Validates attribute names that follow the ARIA specification.
|
|
27
|
+
# ARIA attributes must start with 'aria-' followed by one or more word characters or hyphens.
|
|
28
|
+
#
|
|
29
|
+
# @example Matching aria attributes
|
|
30
|
+
# 'aria-label' =~ Expressions::ARIA_ATTR # matches
|
|
31
|
+
# 'aria-hidden' =~ Expressions::ARIA_ATTR # matches
|
|
32
|
+
# 'aria' =~ Expressions::ARIA_ATTR # does not match
|
|
33
|
+
ARIA_ATTR = /^aria-[\w-]+$/
|
|
34
|
+
|
|
35
|
+
# Matches Mustache/Handlebars template expressions
|
|
36
|
+
#
|
|
37
|
+
# Detects template expressions in the format `{{ expression }}`. Used when
|
|
38
|
+
# `safe_for_templates` is enabled to prevent template injection attacks.
|
|
39
|
+
#
|
|
40
|
+
# @example Matching mustache expressions
|
|
41
|
+
# '{{ user.name }}' =~ Expressions::MUSTACHE_EXPR # matches
|
|
42
|
+
# '{{value}}' =~ Expressions::MUSTACHE_EXPR # matches
|
|
43
|
+
#
|
|
44
|
+
# @see Config#safe_for_templates
|
|
45
|
+
MUSTACHE_EXPR = /\{\{[^}]+\}\}/
|
|
46
|
+
|
|
47
|
+
# Matches ERB (Embedded Ruby) template expressions
|
|
48
|
+
#
|
|
49
|
+
# Detects ERB expressions in the format `<% expression %>`, `<%= expression %>`,
|
|
50
|
+
# or `<%- expression %>`. Used when `safe_for_templates` is enabled.
|
|
51
|
+
#
|
|
52
|
+
# @example Matching ERB expressions
|
|
53
|
+
# '<%= user.name %>' =~ Expressions::ERB_EXPR # matches
|
|
54
|
+
# '<% if admin? %>' =~ Expressions::ERB_EXPR # matches
|
|
55
|
+
# '<%- value -%>' =~ Expressions::ERB_EXPR # matches
|
|
56
|
+
#
|
|
57
|
+
# @see Config#safe_for_templates
|
|
58
|
+
ERB_EXPR = /<%[=-]?[^%]+%>/
|
|
59
|
+
|
|
60
|
+
# Matches JavaScript template literal expressions
|
|
61
|
+
#
|
|
62
|
+
# Detects template expressions in the format `${ expression }`. Used when
|
|
63
|
+
# `safe_for_templates` is enabled to prevent template injection.
|
|
64
|
+
#
|
|
65
|
+
# @example Matching template literals
|
|
66
|
+
# '${user.name}' =~ Expressions::TMPLIT_EXPR # matches
|
|
67
|
+
# '${value}' =~ Expressions::TMPLIT_EXPR # matches
|
|
68
|
+
#
|
|
69
|
+
# @see Config#safe_for_templates
|
|
70
|
+
TMPLIT_EXPR = /\$\{[^}]+\}/
|
|
71
|
+
|
|
72
|
+
# Validates allowed URI protocols and relative URLs
|
|
73
|
+
#
|
|
74
|
+
# This is the default URI validation pattern matching DOMPurify's behavior.
|
|
75
|
+
# Allows: http, https, mailto, ftp, tel protocols and relative URLs.
|
|
76
|
+
# Blocks: javascript, data, vbscript, and other dangerous protocols.
|
|
77
|
+
#
|
|
78
|
+
# **Allowed protocols:** http, https, mailto, ftp, tel, relative URLs (/, ./, ../)
|
|
79
|
+
# **Blocked protocols:** javascript, vbscript, data (unless explicitly enabled)
|
|
80
|
+
#
|
|
81
|
+
# @example Valid URIs
|
|
82
|
+
# 'https://example.com' =~ Expressions::IS_ALLOWED_URI # matches
|
|
83
|
+
# 'mailto:user@example.com' =~ Expressions::IS_ALLOWED_URI # matches
|
|
84
|
+
# '/path/to/page' =~ Expressions::IS_ALLOWED_URI # matches
|
|
85
|
+
# 'javascript:alert(1)' =~ Expressions::IS_ALLOWED_URI # does not match
|
|
86
|
+
#
|
|
87
|
+
# @see Config#allowed_uri_regexp Custom URI pattern override
|
|
88
|
+
IS_ALLOWED_URI = /^(?:(?:https?|mailto|ftp|tel):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i
|
|
89
|
+
|
|
90
|
+
# Detects dangerous JavaScript and data:text/html URIs
|
|
91
|
+
#
|
|
92
|
+
# Matches URIs that start with `javascript:` or `data:text/html` protocols,
|
|
93
|
+
# which are common XSS attack vectors. These are always blocked regardless
|
|
94
|
+
# of other configuration. Whitespace before the protocol is also detected.
|
|
95
|
+
#
|
|
96
|
+
# @example Dangerous URIs
|
|
97
|
+
# 'javascript:alert(1)' =~ Expressions::IS_SCRIPT_OR_DATA # matches
|
|
98
|
+
# ' javascript:void(0)' =~ Expressions::IS_SCRIPT_OR_DATA # matches (whitespace)
|
|
99
|
+
# 'data:text/html,<script>' =~ Expressions::IS_SCRIPT_OR_DATA # matches
|
|
100
|
+
# 'data:image/png;base64' =~ Expressions::IS_SCRIPT_OR_DATA # does not match
|
|
101
|
+
IS_SCRIPT_OR_DATA = %r{^(?:\s*javascript:|\s*data:text/html)}i
|
|
102
|
+
end
|
|
103
|
+
end
|