dandruff 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,500 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dandruff
4
+ # Configuration class for the Dandruff sanitizer
5
+ #
6
+ # This class manages all configuration options for customizing HTML sanitization behavior.
7
+ # It provides sensible security-focused defaults and allows fine-grained control through
8
+ # numerous configuration options. Configuration can be set during initialization or modified
9
+ # later through accessor methods.
10
+ #
11
+ # @example Basic configuration
12
+ # config = Dandruff::Config.new(
13
+ # allowed_tags: ['p', 'strong', 'em'],
14
+ # allowed_attributes: ['class', 'href']
15
+ # )
16
+ #
17
+ # @example Using profiles
18
+ # config = Dandruff::Config.new(use_profiles: { html: true, svg: true })
19
+ #
20
+ # @example Block configuration
21
+ # dandruff = Dandruff.new do |config|
22
+ # config.allowed_tags = ['p', 'a']
23
+ # config.forbidden_attributes = ['onclick']
24
+ # end
25
+ #
26
+ # @see Sanitizer Main sanitizer class that uses this configuration
27
+ class Config
28
+ # @!attribute [rw] additional_attributes
29
+ # Additional attributes to allow beyond defaults
30
+ # @return [Array<String>] array of attribute names to add to allowlist
31
+ # @example
32
+ # config.additional_attributes = ['data-custom', 'aria-label']
33
+
34
+ # @!attribute [rw] additional_tags
35
+ # Additional tags to allow beyond defaults
36
+ # @return [Array<String>] array of tag names to add to allowlist
37
+ # @example
38
+ # config.additional_tags = ['custom-element', 'web-component']
39
+
40
+ # @!attribute [rw] additional_uri_safe_attributes
41
+ # Additional attributes that should be treated as URIs and validated
42
+ # @return [Array<String>] array of attribute names
43
+ # @example
44
+ # config.additional_uri_safe_attributes = ['data-link', 'poster']
45
+
46
+ # @!attribute [rw] allow_aria_attributes
47
+ # Allow aria-* attributes for accessibility
48
+ # @return [Boolean] true to allow aria attributes (default: true)
49
+
50
+ # @!attribute [rw] allow_data_attributes
51
+ # Allow data-* attributes for custom data
52
+ # @return [Boolean] true to allow data attributes (default: true)
53
+
54
+ # @!attribute [rw] allow_data_uri
55
+ # Allow data: URIs in src and other URI attributes
56
+ # @return [Boolean] true to allow data URIs (default: true for safe elements)
57
+ # @note Data URIs can be large and may pose security risks if not validated
58
+
59
+ # @!attribute [rw] allow_unknown_protocols
60
+ # Allow URI protocols not in the default safe list
61
+ # @return [Boolean] true to allow unknown protocols (default: false)
62
+ # @note Enabling this reduces security - use with caution
63
+
64
+ # @!attribute [rw] allow_self_close_in_attributes
65
+ # Allow self-closing syntax in attributes
66
+ # @return [Boolean] (default: true)
67
+
68
+ # @!attribute [rw] allowed_attributes
69
+ # Exact allowlist of attributes (replaces defaults when set)
70
+ # @return [Array<String>, nil] array of allowed attributes or nil to use defaults
71
+ # @example
72
+ # config.allowed_attributes = ['href', 'class', 'id']
73
+
74
+ # @!attribute [rw] allowed_attributes_per_tag
75
+ # Per-tag attribute restrictions for fine-grained control
76
+ # @return [Hash<String, Array<String>>, nil] hash mapping tag names to allowed attributes
77
+ # @example
78
+ # config.allowed_attributes_per_tag = {
79
+ # 'a' => ['href', 'title'],
80
+ # 'img' => ['src', 'alt']
81
+ # }
82
+
83
+ # @!attribute [rw] allowed_tags
84
+ # Exact allowlist of tags (replaces defaults when set)
85
+ # @return [Array<String>, nil] array of allowed tags or nil to use defaults
86
+ # @example
87
+ # config.allowed_tags = ['p', 'strong', 'em', 'a']
88
+
89
+ # @!attribute [rw] allowed_uri_regexp
90
+ # Custom regexp for validating URI attributes
91
+ # @return [Regexp, nil] custom URI validation pattern or nil for default
92
+ # @example Only allow HTTPS
93
+ # config.allowed_uri_regexp = /^https:/
94
+
95
+ # @!attribute [rw] forbidden_attributes
96
+ # Attributes that are always removed (takes precedence over allowed)
97
+ # @return [Array<String>] array of forbidden attribute names
98
+ # @example
99
+ # config.forbidden_attributes = ['onclick', 'onerror']
100
+
101
+ # @!attribute [rw] forbidden_tags
102
+ # Tags that are always removed (takes precedence over allowed)
103
+ # @return [Array<String>] array of forbidden tag names
104
+ # @example
105
+ # config.forbidden_tags = ['script', 'iframe']
106
+
107
+ # @!attribute [rw] allow_style_tags
108
+ # Allow <style> tags with content sanitization
109
+ # @return [Boolean] true to allow style tags (default: true)
110
+ # @note Style tag content is scanned for unsafe patterns
111
+
112
+ # @!attribute [rw] allow_document_elements
113
+ # Allow html/head/body document structure elements
114
+ # @return [Boolean] true to allow document elements (default: false)
115
+
116
+ # @!attribute [rw] keep_content
117
+ # Keep text content when removing disallowed tags
118
+ # @return [Boolean] true to preserve content (default: true)
119
+ # @example
120
+ # # With keep_content: true
121
+ # # <script>alert()</script>Hello -> Hello
122
+ # # With keep_content: false
123
+ # # <script>alert()</script>Hello -> (empty)
124
+
125
+ # @!attribute [rw] return_dom
126
+ # Return Nokogiri document instead of HTML string
127
+ # @return [Boolean] true to return DOM (default: false)
128
+
129
+ # @!attribute [rw] return_dom_fragment
130
+ # Return Nokogiri fragment instead of HTML string
131
+ # @return [Boolean] true to return fragment (default: false)
132
+
133
+ # @!attribute [rw] whole_document
134
+ # Parse and sanitize as complete HTML document
135
+ # @return [Boolean] true for whole document (default: false)
136
+
137
+ # @!attribute [rw] safe_for_templates
138
+ # Remove template expressions ({{, <%= , ${)
139
+ # @return [Boolean] true to remove templates (default: false)
140
+
141
+ # @!attribute [rw] safe_for_xml
142
+ # Remove comments in XML contexts
143
+ # @return [Boolean] true to remove XML comments (default: true)
144
+
145
+ # @!attribute [rw] sanitize_dom
146
+ # Enable DOM clobbering protection
147
+ # @return [Boolean] true for protection (default: true)
148
+ # @note Prevents id/name values from clobbering built-in DOM properties
149
+
150
+ # @!attribute [rw] sanitize_until_stable
151
+ # Re-sanitize multiple passes to prevent mXSS
152
+ # @return [Boolean] true for multi-pass (default: true)
153
+ # @note Important for preventing mutation-based XSS attacks
154
+
155
+ # @!attribute [rw] mutation_max_passes
156
+ # Maximum sanitization passes for stability
157
+ # @return [Integer] max passes (default: 2)
158
+ # @note Higher values increase security but reduce performance
159
+
160
+ # @!attribute [rw] namespace
161
+ # XML namespace for document parsing
162
+ # @return [String] namespace URI (default: 'http://www.w3.org/1999/xhtml')
163
+
164
+ # @!attribute [rw] parser_media_type
165
+ # Parser media type for content parsing
166
+ # @return [String] media type (default: 'text/html')
167
+
168
+ # @!attribute [rw] minimal_profile
169
+ # Use minimal HTML-only profile (excludes SVG/MathML)
170
+ # @return [Boolean] true for minimal (default: false)
171
+
172
+ # @!attribute [rw] force_body
173
+ # Force body context when parsing
174
+ # @return [Boolean] (default: false)
175
+
176
+ # @!attribute [rw] in_place
177
+ # Attempt to sanitize in place (experimental)
178
+ # @return [Boolean] (default: false)
179
+
180
+ attr_accessor :additional_attributes, :add_attributes, :add_data_uri_tags,
181
+ :additional_tags, :additional_uri_safe_attributes, :add_uri_safe_attributes,
182
+ :allow_aria_attributes, :allow_data_attributes, :allow_data_uri, :allow_unknown_protocols,
183
+ :allow_self_close_in_attributes, :allowed_attributes, :allowed_attributes_per_tag, :allowed_tags,
184
+ :allowed_namespaces, :allowed_uri_regexp, :custom_element_handling,
185
+ :forbidden_attributes, :forbid_attributes, :forbid_contents, :add_forbid_contents, :forbidden_tags,
186
+ :force_body, :html_integration_points, :in_place, :keep_content,
187
+ :mathml_text_integration_points, :namespace, :parser_media_type,
188
+ :return_dom_fragment, :return_dom,
189
+ :safe_for_templates, :safe_for_xml, :sanitize_dom, :sanitize_until_stable, :mutation_max_passes,
190
+ :sanitize_named_props, :trusted_types_policy, :allow_style_tags, :minimal_profile,
191
+ :whole_document, :allow_document_elements
192
+
193
+ # Initializes a new configuration instance
194
+ #
195
+ # @param cfg [Hash] configuration options to apply
196
+ def initialize(cfg = {})
197
+ # Attribute defaults
198
+ @allow_aria_attributes = true # permit aria-* attributes
199
+ @allow_data_attributes = true # permit data-* attributes
200
+ @allow_self_close_in_attributes = true
201
+
202
+ # URI/protocol defaults
203
+ @allow_data_uri = true # allow data URIs for safe elements by default
204
+ @allow_unknown_protocols = false # block unknown protocols by default
205
+
206
+ # Output / parsing defaults
207
+ @safe_for_templates = false
208
+ @safe_for_xml = true
209
+ @whole_document = false
210
+ @allow_document_elements = false
211
+ @force_body = false
212
+ @return_dom = false
213
+ @return_dom_fragment = false
214
+
215
+ # Sanitization controls
216
+ @sanitize_dom = true # DOM clobbering protection enabled
217
+ @sanitize_named_props = false
218
+ @sanitize_until_stable = true # run multiple passes to deter mXSS
219
+ @mutation_max_passes = 2 # conservative default pass limit
220
+ @keep_content = true
221
+ @in_place = false
222
+ @minimal_profile = false
223
+ @allow_style_tags = true
224
+
225
+ # Profiles / namespaces
226
+ @use_profiles = {}
227
+ @namespace = 'http://www.w3.org/1999/xhtml'
228
+ @parser_media_type = 'text/html'
229
+
230
+ # Tag/attribute allow/forbid defaults
231
+ @forbidden_tags = %w[base link meta annotation-xml noscript]
232
+
233
+ @allowed_attributes = nil
234
+
235
+ apply_config(cfg)
236
+ process_profiles unless @use_profiles.empty?
237
+ end
238
+
239
+ # Configuration key normalization mapping
240
+ #
241
+ # Maps configuration hash keys (including legacy aliases) to their corresponding
242
+ # setter methods. This allows flexible configuration key naming while maintaining
243
+ # backward compatibility with older key names.
244
+ #
245
+ # Keys are normalized to lowercase before lookup, so configuration is case-insensitive.
246
+ #
247
+ # @example Using different key styles
248
+ # Config.new(allowed_tags: ['p']) # Modern style
249
+ # Config.new('allowed_tags' => ['p']) # String keys
250
+ # Config.new(add_tags: ['custom']) # Legacy alias
251
+ #
252
+ # @api private
253
+ CONFIG_MAPPING = {
254
+ 'add_tags' => :additional_tags=, # backward compatibility
255
+ 'additional_tags' => :additional_tags=,
256
+ 'add_attr' => :additional_attributes=, # backward compatibility
257
+ 'additional_attributes' => :additional_attributes=,
258
+ 'add_attributes' => :additional_attributes=, # backward compatibility
259
+ 'add_uri_safe_attr' => :additional_uri_safe_attributes=, # backward compatibility
260
+ 'additional_uri_safe_attributes' => :additional_uri_safe_attributes=,
261
+ 'add_uri_safe_attributes' => :additional_uri_safe_attributes=, # backward compatibility
262
+ 'allowed_tags' => :allowed_tags=,
263
+ 'allowed_attr' => :allowed_attributes=, # backward compatibility
264
+ 'allowed_attributes' => :allowed_attributes=,
265
+ 'allowed_attributes_per_tag' => :allowed_attributes_per_tag=,
266
+ 'forbidden_tags' => :forbidden_tags=,
267
+ 'forbid_tags' => :forbidden_tags=, # backward compatibility
268
+ 'forbidden_attr' => :forbidden_attributes=, # backward compatibility
269
+ 'forbidden_attributes' => :forbidden_attributes=,
270
+ 'forbid_attributes' => :forbidden_attributes=, # backward compatibility
271
+ 'allow_data_uri' => :allow_data_uri=,
272
+ 'allow_aria_attr' => :allow_aria_attributes=, # backward compatibility
273
+ 'allow_aria_attributes' => :allow_aria_attributes=,
274
+ 'allow_data_attr' => :allow_data_attributes=, # backward compatibility
275
+ 'allow_data_attributes' => :allow_data_attributes=,
276
+ 'allow_self_close_in_attr' => :allow_self_close_in_attributes=, # backward compatibility
277
+ 'allow_self_close_in_attributes' => :allow_self_close_in_attributes=,
278
+ 'allow_style_tags' => :allow_style_tags=,
279
+ 'allow_document_elements' => :allow_document_elements=,
280
+ 'minimal_profile' => :minimal_profile=,
281
+ 'pass_limit' => :mutation_max_passes=
282
+ }.freeze
283
+
284
+ # Per-tag attribute restrictions for HTML email profile
285
+ #
286
+ # Defines which attributes are allowed on specific tags when using the html_email profile.
287
+ # This provides fine-grained security control by limiting each tag to only its appropriate
288
+ # attributes, preventing attribute confusion attacks where dangerous attributes appear on
289
+ # unexpected tags.
290
+ #
291
+ # **Security rationale:** Email clients have inconsistent rendering behavior, and allowing
292
+ # arbitrary attributes on any tag can lead to security issues. For example, allowing 'href'
293
+ # on 'img' tags or 'src' on 'a' tags could enable attacks. Per-tag restrictions prevent this.
294
+ #
295
+ # **Usage:** This constant is automatically used when `use_profiles: { html_email: true }`
296
+ # is configured. You can also use it as a template for your own per-tag attribute rules.
297
+ #
298
+ # @example Using email profile
299
+ # config = Config.new(use_profiles: { html_email: true })
300
+ # # Automatically uses HTML_EMAIL_ATTRIBUTES for per-tag control
301
+ #
302
+ # @example Custom per-tag attributes
303
+ # config.allowed_attributes_per_tag = {
304
+ # 'a' => ['href', 'title'],
305
+ # 'img' => ['src', 'alt', 'width', 'height']
306
+ # }
307
+ #
308
+ # @see #allowed_attributes_per_tag Configuration option for per-tag control
309
+ HTML_EMAIL_ATTRIBUTES = {
310
+ # Document structure
311
+ 'body' => %w[bgcolor text link vlink alink background style class id leftmargin topmargin marginwidth
312
+ marginheight],
313
+ 'html' => %w[lang dir xmlns],
314
+ 'head' => [],
315
+ 'meta' => %w[name content charset],
316
+ 'title' => [],
317
+ 'style' => %w[type],
318
+
319
+ # Table elements (core of email layouts)
320
+ 'table' => %w[width height border cellpadding cellspacing align bgcolor background style class id role summary],
321
+ 'thead' => %w[align class id style dir lang title],
322
+ 'tbody' => %w[align class id style dir lang title],
323
+ 'tfoot' => %w[align class id style dir lang title],
324
+ 'tr' => %w[height bgcolor background valign align style class id],
325
+ 'td' => %w[width height colspan rowspan align valign bgcolor background style class id headers scope],
326
+ 'th' => %w[width height colspan rowspan align valign bgcolor background style class id headers scope],
327
+
328
+ # Legacy presentation elements
329
+ 'font' => %w[face size color style],
330
+ 'center' => %w[align class id style dir lang title],
331
+
332
+ # Links and media
333
+ 'a' => %w[href target title class id style name rel],
334
+ 'img' => %w[src alt width height border align style class id],
335
+
336
+ # Headings
337
+ 'h1' => %w[align class id style dir lang title],
338
+ 'h2' => %w[align class id style dir lang title],
339
+ 'h3' => %w[align class id style dir lang title],
340
+ 'h4' => %w[align class id style dir lang title],
341
+ 'h5' => %w[align class id style dir lang title],
342
+ 'h6' => %w[align class id style dir lang title],
343
+
344
+ # Block elements
345
+ 'p' => %w[align class id style dir lang title],
346
+ 'div' => %w[align class id style dir lang title],
347
+ 'span' => %w[align class id style dir lang title],
348
+ 'blockquote' => %w[align class id style dir lang title cite],
349
+ 'pre' => %w[align class id style dir lang title],
350
+ 'code' => %w[align class id style dir lang title],
351
+
352
+ # Lists
353
+ 'ul' => %w[align class id style dir lang title type],
354
+ 'ol' => %w[align class id style dir lang title type start],
355
+ 'li' => %w[align class id style dir lang title value],
356
+
357
+ # Inline formatting
358
+ 'strong' => %w[align class id style dir lang title],
359
+ 'em' => %w[align class id style dir lang title],
360
+ 'b' => %w[align class id style dir lang title],
361
+ 'i' => %w[align class id style dir lang title],
362
+ 'u' => %w[align class id style dir lang title],
363
+ 's' => %w[align class id style dir lang title],
364
+ 'strike' => %w[align class id style dir lang title],
365
+ 'sup' => %w[align class id style dir lang title],
366
+ 'sub' => %w[align class id style dir lang title],
367
+ 'small' => %w[align class id style dir lang title],
368
+ 'big' => %w[align class id style dir lang title],
369
+ 'mark' => %w[align class id style dir lang title],
370
+ 'del' => %w[align class id style dir lang title cite datetime],
371
+ 'ins' => %w[align class id style dir lang title cite datetime],
372
+
373
+ # Empty elements
374
+ 'br' => %w[class style],
375
+ 'hr' => %w[align class id style dir lang title width size noshade]
376
+ }.freeze
377
+
378
+ # Sets content type profiles and rebuilds configuration
379
+ #
380
+ # Profiles are pre-configured sets of tags and attributes for common content types.
381
+ # When you set profiles, the configuration automatically enables the appropriate
382
+ # tags, attributes, and security settings for those content types.
383
+ #
384
+ # **Available profiles:**
385
+ # - `:html` - Standard HTML5 content
386
+ # - `:svg` - SVG graphics
387
+ # - `:svg_filters` - SVG filter effects
388
+ # - `:math_ml` - Mathematical notation
389
+ # - `:html_email` - HTML email with legacy attributes
390
+ #
391
+ # @param profiles [Hash<Symbol, Boolean>] hash of profile names to enable
392
+ # @return [Hash] the set profiles
393
+ #
394
+ # @example Enable multiple profiles
395
+ # config.use_profiles = { html: true, svg: true }
396
+ #
397
+ # @example Email profile
398
+ # config.use_profiles = { html_email: true }
399
+ #
400
+ # @note Setting profiles resets allowed_tags and allowed_attributes to nil,
401
+ # allowing the profile configuration to take effect
402
+ def use_profiles=(profiles)
403
+ @use_profiles = profiles || {}
404
+ reset_profile_dependent_settings
405
+ process_profiles unless @use_profiles.empty?
406
+ end
407
+
408
+ private
409
+
410
+ # Applies configuration options from a hash
411
+ #
412
+ # @param cfg [Hash] configuration hash
413
+ def apply_config(cfg)
414
+ cfg.each do |key, value|
415
+ normalized = key.to_s.downcase
416
+ setter = CONFIG_MAPPING[normalized] || :"#{normalized}="
417
+ send(setter, value) if respond_to?(setter)
418
+ end
419
+ end
420
+
421
+ # Resets configuration settings that depend on profiles
422
+ #
423
+ # Called when profiles are changed to clear out profile-dependent settings
424
+ # before applying new profile configuration.
425
+ #
426
+ # @return [void]
427
+ # @api private
428
+ def reset_profile_dependent_settings
429
+ @allowed_tags = nil
430
+ @allowed_attributes = nil
431
+ @allowed_attributes_per_tag = nil
432
+ @allow_style_tags = true
433
+ @allow_document_elements = false
434
+ @allow_unknown_protocols = false
435
+ @whole_document = false
436
+ @forbidden_tags = %w[base link meta annotation-xml noscript]
437
+ end
438
+
439
+ # Processes profile configurations to set allowed tags and attributes
440
+ #
441
+ # @return [void]
442
+ def process_profiles
443
+ configure_allowed_tags if @allowed_tags.nil?
444
+ configure_allowed_attributes if @allowed_attributes.nil?
445
+ end
446
+
447
+ # Configures allowed tags based on active profiles
448
+ #
449
+ # Builds the allowed tags list by combining tags from each enabled profile.
450
+ # Always includes '#text' for text content handling.
451
+ #
452
+ # @return [void]
453
+ # @api private
454
+ def configure_allowed_tags
455
+ @allowed_tags = ['#text']
456
+ @allowed_tags += Tags::HTML if @use_profiles[:html]
457
+ @allowed_tags += Tags::SVG if @use_profiles[:svg]
458
+ @allowed_tags += Tags::SVG_FILTERS if @use_profiles[:svg_filters]
459
+ @allowed_tags += Tags::MATH_ML if @use_profiles[:math_ml]
460
+ configure_html_email_tags if @use_profiles[:html_email]
461
+ end
462
+
463
+ # Configures settings specific to HTML email profile
464
+ #
465
+ # Email rendering requires special handling:
466
+ # - Allows style tags (required for email styling)
467
+ # - Allows document elements (html, head, body)
468
+ # - Treats as whole document
469
+ # - Disables DOM clobbering protection (emails are sandboxed)
470
+ # - Permits meta and style tags in forbidden list
471
+ #
472
+ # @return [void]
473
+ # @api private
474
+ def configure_html_email_tags
475
+ @allowed_tags += Tags::HTML_EMAIL
476
+ @allow_style_tags = true
477
+ @allow_document_elements = true
478
+ @allow_unknown_protocols = false
479
+ @whole_document = true
480
+ @sanitize_dom = false # Emails use IDs for styling, rendered in sandboxed contexts
481
+ @forbidden_tags -= %w[meta style]
482
+ end
483
+
484
+ # Configures allowed attributes based on active profiles
485
+ #
486
+ # Builds the allowed attributes list by combining attributes from each enabled profile.
487
+ # For html_email profile, uses per-tag attribute restrictions instead of global list.
488
+ #
489
+ # @return [void]
490
+ # @api private
491
+ def configure_allowed_attributes
492
+ @allowed_attributes = []
493
+ @allowed_attributes += Attributes::HTML if @use_profiles[:html]
494
+ @allowed_attributes_per_tag = HTML_EMAIL_ATTRIBUTES if @use_profiles[:html_email]
495
+ @allowed_attributes += Attributes::SVG + Attributes::XML if @use_profiles[:svg]
496
+ @allowed_attributes += Attributes::SVG + Attributes::XML if @use_profiles[:svg_filters]
497
+ @allowed_attributes += Attributes::MATH_ML + Attributes::XML if @use_profiles[:math_ml]
498
+ end
499
+ end
500
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dandruff
4
+ # Regular expressions for attribute matching and content validation
5
+ #
6
+ # This module contains regular expressions used throughout Dandruff for validating
7
+ # attributes, detecting template expressions, and checking URI protocols. These patterns
8
+ # are critical for security and should not be modified without careful consideration.
9
+ #
10
+ # @api private
11
+ module Expressions
12
+ # Matches HTML5 data attributes (data-*)
13
+ #
14
+ # Validates attribute names that follow the data attribute specification.
15
+ # Data attributes must start with 'data-' followed by one or more word characters or hyphens.
16
+ #
17
+ # @example Matching data attributes
18
+ # 'data-user-id' =~ Expressions::DATA_ATTR # matches
19
+ # 'data-toggle' =~ Expressions::DATA_ATTR # matches
20
+ # 'data' =~ Expressions::DATA_ATTR # does not match
21
+ # 'data-' =~ Expressions::DATA_ATTR # does not match
22
+ DATA_ATTR = /^data-[\w-]+$/
23
+
24
+ # Matches ARIA accessibility attributes (aria-*)
25
+ #
26
+ # Validates attribute names that follow the ARIA specification.
27
+ # ARIA attributes must start with 'aria-' followed by one or more word characters or hyphens.
28
+ #
29
+ # @example Matching aria attributes
30
+ # 'aria-label' =~ Expressions::ARIA_ATTR # matches
31
+ # 'aria-hidden' =~ Expressions::ARIA_ATTR # matches
32
+ # 'aria' =~ Expressions::ARIA_ATTR # does not match
33
+ ARIA_ATTR = /^aria-[\w-]+$/
34
+
35
+ # Matches Mustache/Handlebars template expressions
36
+ #
37
+ # Detects template expressions in the format `{{ expression }}`. Used when
38
+ # `safe_for_templates` is enabled to prevent template injection attacks.
39
+ #
40
+ # @example Matching mustache expressions
41
+ # '{{ user.name }}' =~ Expressions::MUSTACHE_EXPR # matches
42
+ # '{{value}}' =~ Expressions::MUSTACHE_EXPR # matches
43
+ #
44
+ # @see Config#safe_for_templates
45
+ MUSTACHE_EXPR = /\{\{[^}]+\}\}/
46
+
47
+ # Matches ERB (Embedded Ruby) template expressions
48
+ #
49
+ # Detects ERB expressions in the format `<% expression %>`, `<%= expression %>`,
50
+ # or `<%- expression %>`. Used when `safe_for_templates` is enabled.
51
+ #
52
+ # @example Matching ERB expressions
53
+ # '<%= user.name %>' =~ Expressions::ERB_EXPR # matches
54
+ # '<% if admin? %>' =~ Expressions::ERB_EXPR # matches
55
+ # '<%- value -%>' =~ Expressions::ERB_EXPR # matches
56
+ #
57
+ # @see Config#safe_for_templates
58
+ ERB_EXPR = /<%[=-]?[^%]+%>/
59
+
60
+ # Matches JavaScript template literal expressions
61
+ #
62
+ # Detects template expressions in the format `${ expression }`. Used when
63
+ # `safe_for_templates` is enabled to prevent template injection.
64
+ #
65
+ # @example Matching template literals
66
+ # '${user.name}' =~ Expressions::TMPLIT_EXPR # matches
67
+ # '${value}' =~ Expressions::TMPLIT_EXPR # matches
68
+ #
69
+ # @see Config#safe_for_templates
70
+ TMPLIT_EXPR = /\$\{[^}]+\}/
71
+
72
+ # Validates allowed URI protocols and relative URLs
73
+ #
74
+ # This is the default URI validation pattern matching DOMPurify's behavior.
75
+ # Allows: http, https, mailto, ftp, tel protocols and relative URLs.
76
+ # Blocks: javascript, data, vbscript, and other dangerous protocols.
77
+ #
78
+ # **Allowed protocols:** http, https, mailto, ftp, tel, relative URLs (/, ./, ../)
79
+ # **Blocked protocols:** javascript, vbscript, data (unless explicitly enabled)
80
+ #
81
+ # @example Valid URIs
82
+ # 'https://example.com' =~ Expressions::IS_ALLOWED_URI # matches
83
+ # 'mailto:user@example.com' =~ Expressions::IS_ALLOWED_URI # matches
84
+ # '/path/to/page' =~ Expressions::IS_ALLOWED_URI # matches
85
+ # 'javascript:alert(1)' =~ Expressions::IS_ALLOWED_URI # does not match
86
+ #
87
+ # @see Config#allowed_uri_regexp Custom URI pattern override
88
+ IS_ALLOWED_URI = /^(?:(?:https?|mailto|ftp|tel):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i
89
+
90
+ # Detects dangerous JavaScript and data:text/html URIs
91
+ #
92
+ # Matches URIs that start with `javascript:` or `data:text/html` protocols,
93
+ # which are common XSS attack vectors. These are always blocked regardless
94
+ # of other configuration. Whitespace before the protocol is also detected.
95
+ #
96
+ # @example Dangerous URIs
97
+ # 'javascript:alert(1)' =~ Expressions::IS_SCRIPT_OR_DATA # matches
98
+ # ' javascript:void(0)' =~ Expressions::IS_SCRIPT_OR_DATA # matches (whitespace)
99
+ # 'data:text/html,<script>' =~ Expressions::IS_SCRIPT_OR_DATA # matches
100
+ # 'data:image/png;base64' =~ Expressions::IS_SCRIPT_OR_DATA # does not match
101
+ IS_SCRIPT_OR_DATA = %r{^(?:\s*javascript:|\s*data:text/html)}i
102
+ end
103
+ end