selma 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,25 @@
1
1
  use lol_html::{
2
2
  doc_comments, doctype, element,
3
3
  html_content::{Element, TextChunk},
4
- text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
4
+ text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
5
+ Settings,
5
6
  };
6
7
  use magnus::{
7
- exception, function, method, scan_args,
8
+ exception, function, method,
9
+ r_hash::ForEach,
10
+ scan_args,
8
11
  typed_data::Obj,
9
12
  value::{Opaque, ReprValue},
10
- Module, Object, RArray, RModule, Ruby, Value,
13
+ Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol, Value,
11
14
  };
12
15
 
13
- use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
16
+ use std::{
17
+ borrow::Cow,
18
+ cell::{Ref, RefCell},
19
+ ops::Deref,
20
+ primitive::str,
21
+ rc::Rc,
22
+ };
14
23
 
15
24
  use crate::{
16
25
  html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
@@ -19,9 +28,26 @@ use crate::{
19
28
  tags::Tag,
20
29
  };
21
30
 
31
+ #[derive(Copy, Clone)]
32
+ pub struct ObjectValue {
33
+ pub inner: Opaque<Value>,
34
+ }
35
+
36
+ impl IntoValue for ObjectValue {
37
+ fn into_value_with(self, _: &Ruby) -> Value {
38
+ Ruby::get().unwrap().get_inner(self.inner)
39
+ }
40
+ }
41
+
42
+ impl From<Value> for ObjectValue {
43
+ fn from(v: Value) -> Self {
44
+ Self { inner: v.into() }
45
+ }
46
+ }
47
+
22
48
  #[derive(Clone)]
23
49
  pub struct Handler {
24
- rb_handler: Opaque<Value>,
50
+ rb_handler: ObjectValue,
25
51
  rb_selector: Opaque<Obj<SelmaSelector>>,
26
52
  // total_element_handler_calls: usize,
27
53
  // total_elapsed_element_handlers: f64,
@@ -30,16 +56,25 @@ pub struct Handler {
30
56
  // total_elapsed_text_handlers: f64,
31
57
  }
32
58
 
59
+ struct RewriterOptions {
60
+ memory_options: MemorySettings,
61
+ }
62
+
33
63
  pub struct Rewriter {
34
64
  sanitizer: Option<SelmaSanitizer>,
35
65
  handlers: Vec<Handler>,
66
+ options: RewriterOptions,
36
67
  // total_elapsed: f64,
37
68
  }
38
69
 
39
70
  #[magnus::wrap(class = "Selma::Rewriter")]
40
71
  pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
41
72
 
42
- type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
73
+ type RewriterValues = (
74
+ Option<Option<Obj<SelmaSanitizer>>>,
75
+ Option<RArray>,
76
+ Option<RHash>,
77
+ );
43
78
 
44
79
  impl SelmaRewriter {
45
80
  const SELMA_ON_END_TAG: &'static str = "on_end_tag";
@@ -50,9 +85,10 @@ impl SelmaRewriter {
50
85
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
51
86
  /// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
52
87
  /// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
88
+ /// @param options [Hash] Any additional options to pass to the rewriter
53
89
  /// @return [Selma::Rewriter]
54
90
  fn new(args: &[Value]) -> Result<Self, magnus::Error> {
55
- let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
91
+ let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
56
92
 
57
93
  let sanitizer = match rb_sanitizer {
58
94
  None => {
@@ -60,13 +96,13 @@ impl SelmaRewriter {
60
96
  let default_sanitizer = SelmaSanitizer::new(&[])?;
61
97
  let wrapped_sanitizer = Obj::wrap(default_sanitizer);
62
98
  wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
63
- Some(wrapped_sanitizer.get().to_owned())
99
+ Some(wrapped_sanitizer.deref().to_owned())
64
100
  }
65
101
  Some(sanitizer_value) => match sanitizer_value {
66
102
  None => None, // no `sanitizer:` provided, use default
67
103
  Some(sanitizer) => {
68
104
  sanitizer.funcall::<&str, (), Value>("setup", ())?;
69
- Some(sanitizer.get().to_owned())
105
+ Some(sanitizer.deref().to_owned())
70
106
  }
71
107
  },
72
108
  };
@@ -101,7 +137,7 @@ impl SelmaRewriter {
101
137
  Ok(rb_selector) => rb_selector,
102
138
  };
103
139
  let handler = Handler {
104
- rb_handler: Opaque::from(rb_handler),
140
+ rb_handler: ObjectValue::from(rb_handler),
105
141
  rb_selector: Opaque::from(rb_selector),
106
142
  // total_element_handler_calls: 0,
107
143
  // total_elapsed_element_handlers: 0.0,
@@ -122,9 +158,88 @@ impl SelmaRewriter {
122
158
  ));
123
159
  }
124
160
 
161
+ let mut rewriter_options = RewriterOptions::new();
162
+
163
+ match rb_options {
164
+ None => {}
165
+ Some(options) => {
166
+ options.foreach(|key: Symbol, value: RHash| {
167
+ let key = key.to_string();
168
+ match key.as_str() {
169
+ "memory" => {
170
+ let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
171
+ if max_allowed_memory_usage.is_some() {
172
+ let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
173
+ let max_allowed_memory_usage =
174
+ Integer::from_value(max_allowed_memory_usage);
175
+ if max_allowed_memory_usage.is_some() {
176
+ match max_allowed_memory_usage.unwrap().to_u64() {
177
+ Ok(max_allowed_memory_usage) => {
178
+ rewriter_options.memory_options.max_allowed_memory_usage =
179
+ max_allowed_memory_usage as usize;
180
+ }
181
+ Err(_e) => {
182
+ return Err(magnus::Error::new(
183
+ exception::arg_error(),
184
+ "max_allowed_memory_usage must be a positive integer",
185
+ ));
186
+ }
187
+ }
188
+ } else {
189
+ rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
190
+ }
191
+ }
192
+
193
+ let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
194
+ if preallocated_parsing_buffer_size.is_some() {
195
+ let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
196
+ let preallocated_parsing_buffer_size =
197
+ Integer::from_value(preallocated_parsing_buffer_size);
198
+ if preallocated_parsing_buffer_size.is_some() {
199
+ match preallocated_parsing_buffer_size.unwrap().to_u64() {
200
+ Ok(preallocated_parsing_buffer_size) => {
201
+ rewriter_options.memory_options.preallocated_parsing_buffer_size =
202
+ preallocated_parsing_buffer_size as usize;
203
+ }
204
+ Err(_e) => {
205
+ return Err(magnus::Error::new(
206
+ exception::arg_error(),
207
+ "preallocated_parsing_buffer_size must be a positive integer",
208
+ ));
209
+ }
210
+ }
211
+ } else {
212
+ rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
213
+ }
214
+ }
215
+ }
216
+ _ => {
217
+ return Err(magnus::Error::new(
218
+ exception::arg_error(),
219
+ format!("Unknown option: {key:?}"),
220
+ ));
221
+ }
222
+ }
223
+ Ok(ForEach::Continue)
224
+ })?;
225
+ }
226
+ }
227
+
228
+ if rewriter_options
229
+ .memory_options
230
+ .preallocated_parsing_buffer_size
231
+ > rewriter_options.memory_options.max_allowed_memory_usage
232
+ {
233
+ return Err(magnus::Error::new(
234
+ exception::arg_error(),
235
+ "max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
236
+ ));
237
+ }
238
+
125
239
  Ok(Self(std::cell::RefCell::new(Rewriter {
126
240
  sanitizer,
127
241
  handlers,
242
+ options: rewriter_options,
128
243
  // total_elapsed: 0.0,
129
244
  })))
130
245
  }
@@ -141,123 +256,122 @@ impl SelmaRewriter {
141
256
  let kwargs = scan_args::get_kwargs::<
142
257
  _,
143
258
  (),
144
- (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
259
+ (
260
+ Option<Option<Obj<SelmaSanitizer>>>,
261
+ Option<RArray>,
262
+ Option<RHash>,
263
+ ),
145
264
  (),
146
- >(args.keywords, &[], &["sanitizer", "handlers"])?;
147
- let (rb_sanitizer, rb_handlers) = kwargs.optional;
265
+ >(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
266
+ let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
148
267
 
149
- Ok((rb_sanitizer, rb_handlers))
268
+ Ok((rb_sanitizer, rb_handlers, rb_options))
150
269
  }
151
270
 
152
271
  /// Perform HTML rewrite sequence.
153
272
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
154
- let sanitized_html = match &self.0.borrow().sanitizer {
155
- None => Ok(html),
156
- Some(sanitizer) => {
157
- let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
158
- Ok(sanitized_html) => sanitized_html,
159
- Err(err) => return Err(err),
160
- };
273
+ let binding = self.0.borrow();
161
274
 
162
- String::from_utf8(sanitized_html)
275
+ let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
276
+ let mut sanitizer_initial_element_content_handlers: Vec<(
277
+ Cow<Selector>,
278
+ ElementContentHandlers,
279
+ )> = vec![];
280
+
281
+ match &binding.sanitizer {
282
+ None => (),
283
+ Some(sanitizer) => {
284
+ if !sanitizer.get_allow_doctype() {
285
+ sanitizer_document_content_handlers.push(doctype!(|d| {
286
+ sanitizer.remove_doctype(d);
287
+ Ok(())
288
+ }));
289
+ }
290
+ if !sanitizer.get_allow_comments() {
291
+ sanitizer_document_content_handlers.push(doc_comments!(|c| {
292
+ sanitizer.remove_comment(c);
293
+ Ok(())
294
+ }));
295
+ }
296
+ sanitizer_initial_element_content_handlers.push(element!("*", |el| {
297
+ sanitizer.try_remove_element(el);
298
+ if el.removed() {
299
+ return Ok(());
300
+ }
301
+ match sanitizer.sanitize_attributes(el) {
302
+ Ok(_) => Ok(()),
303
+ Err(err) => Err(err.to_string().into()),
304
+ }
305
+ }));
163
306
  }
164
307
  };
165
- let binding = self.0.borrow_mut();
308
+
166
309
  let handlers = &binding.handlers;
167
310
 
168
- match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
169
- Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
311
+ match Self::perform_handler_rewrite(
312
+ self,
313
+ sanitizer_document_content_handlers,
314
+ sanitizer_initial_element_content_handlers,
315
+ handlers,
316
+ html,
317
+ ) {
318
+ Ok(rewritten_html) => match &binding.sanitizer {
319
+ None => match String::from_utf8(rewritten_html) {
320
+ Ok(output) => Ok(output),
321
+ Err(err) => Err(magnus::Error::new(
322
+ exception::runtime_error(),
323
+ format!("{err:?}"),
324
+ )),
325
+ },
326
+ Some(sanitizer) => {
327
+ Self::perform_final_sanitization(self, sanitizer, rewritten_html)
328
+ }
329
+ },
170
330
  Err(err) => Err(err),
171
331
  }
172
332
  }
173
333
 
174
- fn perform_sanitization(
334
+ // to get rid of some really nasty edge cases with dangerous tags, we perform one more
335
+ // sanitization pass at the end
336
+ fn perform_final_sanitization(
337
+ &self,
175
338
  sanitizer: &SelmaSanitizer,
176
- html: &String,
177
- ) -> Result<Vec<u8>, magnus::Error> {
178
- let mut first_pass_html = vec![];
179
- {
180
- let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
181
- if !sanitizer.get_allow_doctype() {
182
- document_content_handlers.push(doctype!(|d| {
183
- sanitizer.remove_doctype(d);
184
- Ok(())
185
- }));
186
- }
187
- if !sanitizer.get_allow_comments() {
188
- document_content_handlers.push(doc_comments!(|c| {
189
- sanitizer.remove_comment(c);
190
- Ok(())
191
- }));
192
- }
193
- let mut rewriter = HtmlRewriter::new(
194
- Settings {
195
- document_content_handlers,
196
- element_content_handlers: vec![element!("*", |el| {
197
- sanitizer.try_remove_element(el);
198
- if el.removed() {
199
- return Ok(());
200
- }
201
- match sanitizer.sanitize_attributes(el) {
202
- Ok(_) => Ok(()),
203
- Err(err) => Err(err.to_string().into()),
204
- }
205
- })],
206
- // TODO: allow for MemorySettings to be defined
207
- ..Settings::default()
208
- },
209
- |c: &[u8]| first_pass_html.extend_from_slice(c),
210
- );
211
-
212
- let result = rewriter.write(html.as_bytes());
213
- if result.is_err() {
214
- return Err(magnus::Error::new(
215
- exception::runtime_error(),
216
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
217
- ));
218
- }
219
- }
220
-
221
- let mut output = vec![];
222
- {
223
- let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
224
- if sanitizer.get_escape_tagfilter() {
225
- element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
226
- let should_remove = sanitizer.allow_element(el);
227
- if should_remove {
228
- sanitizer.force_remove_element(el);
229
- }
339
+ html: Vec<u8>,
340
+ ) -> Result<String, magnus::Error> {
341
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
342
+ let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
230
343
 
231
- Ok(())
232
- }));
233
- }
344
+ if sanitizer.get_escape_tagfilter() {
345
+ element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
346
+ let should_remove = sanitizer.allow_element(el);
347
+ if should_remove {
348
+ sanitizer.force_remove_element(el);
349
+ }
234
350
 
235
- let mut rewriter = HtmlRewriter::new(
236
- Settings {
237
- element_content_handlers,
238
- ..Settings::default()
239
- },
240
- |c: &[u8]| output.extend_from_slice(c),
241
- );
351
+ Ok(())
352
+ }));
353
+ }
242
354
 
243
- let result = rewriter.write(first_pass_html.as_slice());
244
- if result.is_err() {
245
- return Err(magnus::Error::new(
355
+ match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
356
+ Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
357
+ Ok(output) => Ok(output),
358
+ Err(err) => Err(magnus::Error::new(
246
359
  exception::runtime_error(),
247
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
248
- ));
249
- }
360
+ format!("{err:?}"),
361
+ )),
362
+ },
363
+ Err(err) => Err(err),
250
364
  }
251
-
252
- Ok(output)
253
365
  }
254
366
 
255
367
  pub fn perform_handler_rewrite(
256
368
  &self,
369
+ sanitizer_document_content_handlers: Vec<DocumentContentHandlers>,
370
+ sanitizer_initial_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
257
371
  handlers: &[Handler],
258
372
  html: String,
259
373
  ) -> Result<Vec<u8>, magnus::Error> {
260
- // TODO: this should ideally be done ahead of time, not on every `#rewrite` call
374
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
261
375
  let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
262
376
 
263
377
  handlers.iter().for_each(|handler| {
@@ -275,7 +389,7 @@ impl SelmaRewriter {
275
389
  selector.match_element().unwrap(),
276
390
  move |el| {
277
391
  match Self::process_element_handlers(
278
- ruby.get_inner(handler.rb_handler),
392
+ handler.rb_handler,
279
393
  el,
280
394
  &closure_element_stack.borrow(),
281
395
  ) {
@@ -306,9 +420,7 @@ impl SelmaRewriter {
306
420
  }
307
421
  }
308
422
 
309
- let ruby = Ruby::get().unwrap();
310
- match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
311
- {
423
+ match Self::process_text_handlers(handler.rb_handler, text) {
312
424
  Ok(_) => Ok(()),
313
425
  Err(err) => Err(err.to_string().into()),
314
426
  }
@@ -341,16 +453,35 @@ impl SelmaRewriter {
341
453
  }));
342
454
  });
343
455
 
456
+ element_content_handlers.extend(sanitizer_initial_element_content_handlers);
457
+
458
+ Self::run_rewrite(
459
+ self,
460
+ sanitizer_document_content_handlers,
461
+ element_content_handlers,
462
+ html.as_bytes(),
463
+ )
464
+ }
465
+
466
+ fn run_rewrite(
467
+ &self,
468
+ document_content_handlers: Vec<DocumentContentHandlers>,
469
+ element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
470
+ html: &[u8],
471
+ ) -> Result<Vec<u8>, magnus::Error> {
472
+ let binding = &self.0.borrow();
344
473
  let mut output = vec![];
345
474
  {
346
475
  let mut rewriter = HtmlRewriter::new(
347
476
  Settings {
477
+ document_content_handlers,
348
478
  element_content_handlers,
479
+ memory_settings: Self::get_memory_options(binding),
349
480
  ..Settings::default()
350
481
  },
351
482
  |c: &[u8]| output.extend_from_slice(c),
352
483
  );
353
- match rewriter.write(html.as_bytes()) {
484
+ match rewriter.write(html) {
354
485
  Ok(_) => {}
355
486
  Err(err) => {
356
487
  return Err(magnus::Error::new(
@@ -364,10 +495,12 @@ impl SelmaRewriter {
364
495
  }
365
496
 
366
497
  fn process_element_handlers(
367
- rb_handler: Value,
498
+ obj_rb_handler: ObjectValue,
368
499
  element: &mut Element,
369
500
  ancestors: &[String],
370
501
  ) -> Result<(), magnus::Error> {
502
+ let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
503
+
371
504
  // if `on_end_tag` function is defined, call it
372
505
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
373
506
  // TODO: error here is an "EndTagError"
@@ -394,9 +527,11 @@ impl SelmaRewriter {
394
527
  }
395
528
 
396
529
  fn process_text_handlers(
397
- rb_handler: Value,
530
+ obj_rb_handler: ObjectValue,
398
531
  text_chunk: &mut TextChunk,
399
532
  ) -> Result<(), magnus::Error> {
533
+ let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
534
+
400
535
  // prevents missing `handle_text_chunk` function
401
536
  let content = text_chunk.as_str();
402
537
 
@@ -414,6 +549,22 @@ impl SelmaRewriter {
414
549
  )),
415
550
  }
416
551
  }
552
+
553
+ fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
554
+ let options = &binding.options.memory_options;
555
+ MemorySettings {
556
+ max_allowed_memory_usage: options.max_allowed_memory_usage,
557
+ preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
558
+ }
559
+ }
560
+ }
561
+
562
+ impl RewriterOptions {
563
+ pub fn new() -> Self {
564
+ Self {
565
+ memory_options: MemorySettings::default(),
566
+ }
567
+ }
417
568
  }
418
569
 
419
570
  pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
@@ -211,20 +211,23 @@ impl SelmaSanitizer {
211
211
  }
212
212
  Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
213
213
  }
214
- } else if allowed_protocol.is_kind_of(class::symbol())
215
- && allowed_protocol.inspect() == ":relative"
216
- {
217
- match protocol_list {
218
- None => {
219
- protocol_sanitizers.insert(
220
- attr_name.to_string(),
221
- vec!["#".to_string(), "/".to_string()],
222
- );
223
- }
224
- Some(protocol_list) => {
225
- protocol_list.push("#".to_string());
226
- protocol_list.push("/".to_string());
214
+ } else if allowed_protocol.is_kind_of(class::symbol()) {
215
+ let protocol_config = allowed_protocol.inspect();
216
+ if protocol_config == ":relative" {
217
+ match protocol_list {
218
+ None => {
219
+ protocol_sanitizers.insert(
220
+ attr_name.to_string(),
221
+ vec!["#".to_string(), "/".to_string()],
222
+ );
223
+ }
224
+ Some(protocol_list) => {
225
+ protocol_list.push("#".to_string());
226
+ protocol_list.push("/".to_string());
227
+ }
227
228
  }
229
+ } else if protocol_config == ":all" {
230
+ protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
228
231
  }
229
232
  }
230
233
  }
@@ -335,7 +338,7 @@ impl SelmaSanitizer {
335
338
  element: &mut Element,
336
339
  element_sanitizer: &ElementSanitizer,
337
340
  attr_name: &String,
338
- attr_val: &String,
341
+ attr_val: &str,
339
342
  ) -> Result<bool, AttributeNameError> {
340
343
  let mut allowed: bool = false;
341
344
  let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
@@ -387,7 +390,11 @@ impl SelmaSanitizer {
387
390
  attr_val.contains("://")
388
391
  }
389
392
 
390
- fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
393
+ fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
394
+ if protocols_allowed.contains(&"all".to_string()) {
395
+ return true;
396
+ }
397
+
391
398
  // FIXME: is there a more idiomatic way to do this?
392
399
  let mut pos: usize = 0;
393
400
  let mut chars = attr_val.chars();
@@ -542,7 +549,7 @@ impl SelmaSanitizer {
542
549
  ) -> &'a mut ElementSanitizer {
543
550
  element_sanitizers
544
551
  .entry(element_name.to_string())
545
- .or_insert_with(ElementSanitizer::default)
552
+ .or_default()
546
553
  }
547
554
  }
548
555
 
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Selma
4
+ module Config
5
+ OPTIONS = {
6
+ memory: {
7
+ max_allowed_memory_usage: nil,
8
+ preallocated_parsing_buffer_size: nil,
9
+ },
10
+ }
11
+ end
12
+ end
@@ -28,7 +28,7 @@ module Selma
28
28
 
29
29
  # URL handling protocols to allow in specific attributes. By default, no
30
30
  # protocols are allowed. Use :relative in place of a protocol if you want
31
- # to allow relative URLs sans protocol.
31
+ # to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
32
32
  protocols: {},
33
33
 
34
34
  # An Array of element names whose contents will be removed. The contents
@@ -16,6 +16,7 @@ module Selma
16
16
  "colgroup",
17
17
  "data",
18
18
  "del",
19
+ "details",
19
20
  "div",
20
21
  "figcaption",
21
22
  "figure",
@@ -66,7 +66,12 @@ module Selma
66
66
  end
67
67
 
68
68
  def allow_protocol(element, attr, protos)
69
- protos = [protos] unless protos.is_a?(Array)
69
+ if protos.is_a?(Array)
70
+ raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
71
+ else
72
+ protos = [protos]
73
+ end
74
+
70
75
  set_allowed_protocols(element, attr, protos)
71
76
  end
72
77
 
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.2.2"
4
+ VERSION = "0.4.0"
5
5
  end