selma 0.2.2 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,31 @@
1
1
  use lol_html::{
2
2
  doc_comments, doctype, element,
3
3
  html_content::{Element, TextChunk},
4
- text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
4
+ text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
5
+ Settings,
5
6
  };
6
7
  use magnus::{
7
- exception, function, method, scan_args,
8
+ exception, function, gc, method,
9
+ r_hash::ForEach,
10
+ scan_args,
8
11
  typed_data::Obj,
9
12
  value::{Opaque, ReprValue},
10
- Module, Object, RArray, RModule, Ruby, Value,
13
+ DataTypeFunctions, Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol,
14
+ TypedData, Value,
11
15
  };
12
16
 
13
- use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
17
+ use std::{
18
+ borrow::Cow,
19
+ cell::{Ref, RefCell},
20
+ mem,
21
+ ops::Deref,
22
+ primitive::str,
23
+ rc::Rc,
24
+ };
14
25
 
15
26
  use crate::{
16
27
  html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
28
+ native_ref_wrap::NativeRefWrap,
17
29
  sanitizer::SelmaSanitizer,
18
30
  selector::SelmaSelector,
19
31
  tags::Tag,
@@ -30,16 +42,34 @@ pub struct Handler {
30
42
  // total_elapsed_text_handlers: f64,
31
43
  }
32
44
 
45
+ struct RewriterOptions {
46
+ memory_options: MemorySettings,
47
+ }
48
+
33
49
  pub struct Rewriter {
34
50
  sanitizer: Option<SelmaSanitizer>,
35
51
  handlers: Vec<Handler>,
52
+ options: RewriterOptions,
36
53
  // total_elapsed: f64,
37
54
  }
38
55
 
39
- #[magnus::wrap(class = "Selma::Rewriter")]
56
+ #[derive(TypedData)]
57
+ #[magnus(class = "Selma::Rewriter", free_immediately, mark)]
40
58
  pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
41
59
 
42
- type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
60
+ impl DataTypeFunctions for SelmaRewriter {
61
+ fn mark(&self, marker: &gc::Marker) {
62
+ self.0.borrow().handlers.iter().for_each(|handler| {
63
+ marker.mark(handler.rb_handler);
64
+ });
65
+ }
66
+ }
67
+
68
+ type RewriterValues = (
69
+ Option<Option<Obj<SelmaSanitizer>>>,
70
+ Option<RArray>,
71
+ Option<RHash>,
72
+ );
43
73
 
44
74
  impl SelmaRewriter {
45
75
  const SELMA_ON_END_TAG: &'static str = "on_end_tag";
@@ -50,25 +80,20 @@ impl SelmaRewriter {
50
80
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
51
81
  /// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
52
82
  /// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
83
+ /// @param options [Hash] Any additional options to pass to the rewriter
53
84
  /// @return [Selma::Rewriter]
54
85
  fn new(args: &[Value]) -> Result<Self, magnus::Error> {
55
- let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
86
+ let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
56
87
 
57
88
  let sanitizer = match rb_sanitizer {
58
89
  None => {
59
- // no `sanitizer:` provided, use default
90
+ // no `sanitizer:` kwarg provided, use default
60
91
  let default_sanitizer = SelmaSanitizer::new(&[])?;
61
92
  let wrapped_sanitizer = Obj::wrap(default_sanitizer);
62
- wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
63
- Some(wrapped_sanitizer.get().to_owned())
93
+ // wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
94
+ Some(wrapped_sanitizer.deref().to_owned())
64
95
  }
65
- Some(sanitizer_value) => match sanitizer_value {
66
- None => None, // no `sanitizer:` provided, use default
67
- Some(sanitizer) => {
68
- sanitizer.funcall::<&str, (), Value>("setup", ())?;
69
- Some(sanitizer.get().to_owned())
70
- }
71
- },
96
+ Some(sanitizer_value) => sanitizer_value.map(|sanitizer| sanitizer.deref().to_owned()),
72
97
  };
73
98
 
74
99
  let handlers = match rb_handlers {
@@ -76,9 +101,7 @@ impl SelmaRewriter {
76
101
  Some(rb_handlers) => {
77
102
  let mut handlers: Vec<Handler> = vec![];
78
103
 
79
- for h in rb_handlers.each() {
80
- let rb_handler = h.unwrap();
81
-
104
+ for rb_handler in rb_handlers.into_iter() {
82
105
  // prevents missing #selector from ruining things
83
106
  if !rb_handler.respond_to("selector", true).unwrap() {
84
107
  let classname = unsafe { rb_handler.classname() };
@@ -122,9 +145,88 @@ impl SelmaRewriter {
122
145
  ));
123
146
  }
124
147
 
148
+ let mut rewriter_options = RewriterOptions::new();
149
+
150
+ match rb_options {
151
+ None => {}
152
+ Some(options) => {
153
+ options.foreach(|key: Symbol, value: RHash| {
154
+ let key = key.to_string();
155
+ match key.as_str() {
156
+ "memory" => {
157
+ let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
158
+ if max_allowed_memory_usage.is_some() {
159
+ let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
160
+ let max_allowed_memory_usage =
161
+ Integer::from_value(max_allowed_memory_usage);
162
+ if max_allowed_memory_usage.is_some() {
163
+ match max_allowed_memory_usage.unwrap().to_u64() {
164
+ Ok(max_allowed_memory_usage) => {
165
+ rewriter_options.memory_options.max_allowed_memory_usage =
166
+ max_allowed_memory_usage as usize;
167
+ }
168
+ Err(_e) => {
169
+ return Err(magnus::Error::new(
170
+ exception::arg_error(),
171
+ "max_allowed_memory_usage must be a positive integer",
172
+ ));
173
+ }
174
+ }
175
+ } else {
176
+ rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
177
+ }
178
+ }
179
+
180
+ let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
181
+ if preallocated_parsing_buffer_size.is_some() {
182
+ let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
183
+ let preallocated_parsing_buffer_size =
184
+ Integer::from_value(preallocated_parsing_buffer_size);
185
+ if preallocated_parsing_buffer_size.is_some() {
186
+ match preallocated_parsing_buffer_size.unwrap().to_u64() {
187
+ Ok(preallocated_parsing_buffer_size) => {
188
+ rewriter_options.memory_options.preallocated_parsing_buffer_size =
189
+ preallocated_parsing_buffer_size as usize;
190
+ }
191
+ Err(_e) => {
192
+ return Err(magnus::Error::new(
193
+ exception::arg_error(),
194
+ "preallocated_parsing_buffer_size must be a positive integer",
195
+ ));
196
+ }
197
+ }
198
+ } else {
199
+ rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
200
+ }
201
+ }
202
+ }
203
+ _ => {
204
+ return Err(magnus::Error::new(
205
+ exception::arg_error(),
206
+ format!("Unknown option: {key:?}"),
207
+ ));
208
+ }
209
+ }
210
+ Ok(ForEach::Continue)
211
+ })?;
212
+ }
213
+ }
214
+
215
+ if rewriter_options
216
+ .memory_options
217
+ .preallocated_parsing_buffer_size
218
+ > rewriter_options.memory_options.max_allowed_memory_usage
219
+ {
220
+ return Err(magnus::Error::new(
221
+ exception::arg_error(),
222
+ "max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
223
+ ));
224
+ }
225
+
125
226
  Ok(Self(std::cell::RefCell::new(Rewriter {
126
227
  sanitizer,
127
228
  handlers,
229
+ options: rewriter_options,
128
230
  // total_elapsed: 0.0,
129
231
  })))
130
232
  }
@@ -141,125 +243,126 @@ impl SelmaRewriter {
141
243
  let kwargs = scan_args::get_kwargs::<
142
244
  _,
143
245
  (),
144
- (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
246
+ (
247
+ Option<Option<Obj<SelmaSanitizer>>>,
248
+ Option<RArray>,
249
+ Option<RHash>,
250
+ ),
145
251
  (),
146
- >(args.keywords, &[], &["sanitizer", "handlers"])?;
147
- let (rb_sanitizer, rb_handlers) = kwargs.optional;
252
+ >(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
253
+ let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
148
254
 
149
- Ok((rb_sanitizer, rb_handlers))
255
+ Ok((rb_sanitizer, rb_handlers, rb_options))
150
256
  }
151
257
 
152
258
  /// Perform HTML rewrite sequence.
153
259
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
154
- let sanitized_html = match &self.0.borrow().sanitizer {
155
- None => Ok(html),
156
- Some(sanitizer) => {
157
- let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
158
- Ok(sanitized_html) => sanitized_html,
159
- Err(err) => return Err(err),
160
- };
260
+ let binding = self.0.borrow();
161
261
 
162
- String::from_utf8(sanitized_html)
262
+ let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
263
+ let mut sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> =
264
+ vec![];
265
+
266
+ match &binding.sanitizer {
267
+ None => (),
268
+ Some(sanitizer) => {
269
+ if !sanitizer.get_allow_doctype() {
270
+ sanitizer_document_content_handlers.push(doctype!(|d| {
271
+ sanitizer.remove_doctype(d);
272
+ Ok(())
273
+ }));
274
+ }
275
+ if !sanitizer.get_allow_comments() {
276
+ sanitizer_document_content_handlers.push(doc_comments!(|c| {
277
+ sanitizer.remove_comment(c);
278
+ Ok(())
279
+ }));
280
+ }
281
+ sanitizer_element_content_handlers.push(element!("*", |el| {
282
+ sanitizer.try_remove_element(el);
283
+ if el.removed() {
284
+ return Ok(());
285
+ }
286
+ // if it was removed, there are no attributes to sanitize
287
+ match sanitizer.sanitize_attributes(el) {
288
+ Ok(_) => Ok(()),
289
+ Err(err) => Err(err.to_string().into()),
290
+ }
291
+ }));
163
292
  }
164
293
  };
165
- let binding = self.0.borrow_mut();
166
- let handlers = &binding.handlers;
167
294
 
168
- match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
169
- Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
295
+ let handlers: &Vec<Handler> = &binding.handlers;
296
+
297
+ match Self::perform_handler_rewrite(
298
+ self,
299
+ sanitizer_document_content_handlers,
300
+ sanitizer_element_content_handlers,
301
+ handlers,
302
+ html,
303
+ ) {
304
+ Ok(rewritten_html) => match &binding.sanitizer {
305
+ None => match String::from_utf8(rewritten_html) {
306
+ Ok(output) => Ok(output),
307
+ Err(err) => Err(magnus::Error::new(
308
+ exception::runtime_error(),
309
+ format!("{err:?}"),
310
+ )),
311
+ },
312
+ Some(sanitizer) => {
313
+ Self::perform_final_sanitization(self, sanitizer, rewritten_html)
314
+ }
315
+ },
170
316
  Err(err) => Err(err),
171
317
  }
172
318
  }
173
319
 
174
- fn perform_sanitization(
320
+ // to get rid of some really nasty edge cases with dangerous tags, we perform one more
321
+ // sanitization pass at the end
322
+ fn perform_final_sanitization(
323
+ &self,
175
324
  sanitizer: &SelmaSanitizer,
176
- html: &String,
177
- ) -> Result<Vec<u8>, magnus::Error> {
178
- let mut first_pass_html = vec![];
179
- {
180
- let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
181
- if !sanitizer.get_allow_doctype() {
182
- document_content_handlers.push(doctype!(|d| {
183
- sanitizer.remove_doctype(d);
184
- Ok(())
185
- }));
186
- }
187
- if !sanitizer.get_allow_comments() {
188
- document_content_handlers.push(doc_comments!(|c| {
189
- sanitizer.remove_comment(c);
190
- Ok(())
191
- }));
192
- }
193
- let mut rewriter = HtmlRewriter::new(
194
- Settings {
195
- document_content_handlers,
196
- element_content_handlers: vec![element!("*", |el| {
197
- sanitizer.try_remove_element(el);
198
- if el.removed() {
199
- return Ok(());
200
- }
201
- match sanitizer.sanitize_attributes(el) {
202
- Ok(_) => Ok(()),
203
- Err(err) => Err(err.to_string().into()),
204
- }
205
- })],
206
- // TODO: allow for MemorySettings to be defined
207
- ..Settings::default()
208
- },
209
- |c: &[u8]| first_pass_html.extend_from_slice(c),
210
- );
211
-
212
- let result = rewriter.write(html.as_bytes());
213
- if result.is_err() {
214
- return Err(magnus::Error::new(
215
- exception::runtime_error(),
216
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
217
- ));
218
- }
219
- }
220
-
221
- let mut output = vec![];
222
- {
223
- let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
224
- if sanitizer.get_escape_tagfilter() {
225
- element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
226
- let should_remove = sanitizer.allow_element(el);
227
- if should_remove {
228
- sanitizer.force_remove_element(el);
229
- }
325
+ html: Vec<u8>,
326
+ ) -> Result<String, magnus::Error> {
327
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
328
+ let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
230
329
 
231
- Ok(())
232
- }));
233
- }
330
+ if sanitizer.get_escape_tagfilter() {
331
+ element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
332
+ let should_remove = sanitizer.allow_element(el);
333
+ if should_remove {
334
+ sanitizer.force_remove_element(el);
335
+ }
234
336
 
235
- let mut rewriter = HtmlRewriter::new(
236
- Settings {
237
- element_content_handlers,
238
- ..Settings::default()
239
- },
240
- |c: &[u8]| output.extend_from_slice(c),
241
- );
337
+ Ok(())
338
+ }));
339
+ }
242
340
 
243
- let result = rewriter.write(first_pass_html.as_slice());
244
- if result.is_err() {
245
- return Err(magnus::Error::new(
341
+ match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
342
+ Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
343
+ Ok(output) => Ok(output),
344
+ Err(err) => Err(magnus::Error::new(
246
345
  exception::runtime_error(),
247
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
248
- ));
249
- }
346
+ format!("{err:?}"),
347
+ )),
348
+ },
349
+ Err(err) => Err(err),
250
350
  }
251
-
252
- Ok(output)
253
351
  }
254
352
 
255
- pub fn perform_handler_rewrite(
353
+ pub fn perform_handler_rewrite<'a>(
256
354
  &self,
257
- handlers: &[Handler],
355
+ sanitizer_document_content_handlers: Vec<DocumentContentHandlers<'a>>,
356
+ sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers<'a>)>,
357
+ handlers: &'a [Handler],
258
358
  html: String,
259
359
  ) -> Result<Vec<u8>, magnus::Error> {
260
- // TODO: this should ideally be done ahead of time, not on every `#rewrite` call
360
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
261
361
  let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
262
362
 
363
+ // have sanitization happen first
364
+ element_content_handlers.extend(sanitizer_element_content_handlers);
365
+
263
366
  handlers.iter().for_each(|handler| {
264
367
  let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
265
368
 
@@ -275,7 +378,7 @@ impl SelmaRewriter {
275
378
  selector.match_element().unwrap(),
276
379
  move |el| {
277
380
  match Self::process_element_handlers(
278
- ruby.get_inner(handler.rb_handler),
381
+ handler,
279
382
  el,
280
383
  &closure_element_stack.borrow(),
281
384
  ) {
@@ -294,21 +397,19 @@ impl SelmaRewriter {
294
397
  move |text| {
295
398
  let element_stack = closure_element_stack.as_ref().borrow();
296
399
  if selector.ignore_text_within().is_some() {
297
- // check if current tag is a tag we should be ignoring text within
298
- let head_tag_name = element_stack.last().unwrap().to_string();
400
+ // check if current tag is a tag we should be ignoring text within;
401
+ // also checks if tag is within an ancestery of ignored tags
299
402
  if selector
300
403
  .ignore_text_within()
301
404
  .unwrap()
302
405
  .iter()
303
- .any(|f| f == &head_tag_name)
406
+ .any(|t| element_stack.contains(t))
304
407
  {
305
408
  return Ok(());
306
409
  }
307
410
  }
308
411
 
309
- let ruby = Ruby::get().unwrap();
310
- match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
311
- {
412
+ match Self::process_text_handlers(handler, text) {
312
413
  Ok(_) => Ok(()),
313
414
  Err(err) => Err(err.to_string().into()),
314
415
  }
@@ -329,28 +430,46 @@ impl SelmaRewriter {
329
430
 
330
431
  let closure_element_stack = element_stack.clone();
331
432
 
332
- el.end_tag_handlers()
333
- .unwrap()
334
- .push(Box::new(move |_end_tag| {
335
- let mut stack = closure_element_stack.as_ref().borrow_mut();
336
- stack.pop();
337
- Ok(())
338
- }));
433
+ if let Some(end_tag_handlers) = el.end_tag_handlers() {
434
+ end_tag_handlers.push(lol_html::EndTagHandler::into(Box::new(
435
+ move |_end_tag| {
436
+ closure_element_stack.as_ref().borrow_mut().pop();
437
+ Ok(())
438
+ },
439
+ )));
440
+ }
339
441
 
340
442
  Ok(())
341
443
  }));
342
444
  });
343
445
 
446
+ Self::run_rewrite(
447
+ self,
448
+ sanitizer_document_content_handlers,
449
+ element_content_handlers,
450
+ html.as_bytes(),
451
+ )
452
+ }
453
+
454
+ fn run_rewrite<'a>(
455
+ &self,
456
+ document_content_handlers: Vec<DocumentContentHandlers<'a>>,
457
+ element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers<'a>)>,
458
+ html: &[u8],
459
+ ) -> Result<Vec<u8>, magnus::Error> {
460
+ let binding = &self.0.borrow();
344
461
  let mut output = vec![];
345
462
  {
346
463
  let mut rewriter = HtmlRewriter::new(
347
464
  Settings {
465
+ document_content_handlers,
348
466
  element_content_handlers,
467
+ memory_settings: Self::get_memory_options(binding),
349
468
  ..Settings::default()
350
469
  },
351
470
  |c: &[u8]| output.extend_from_slice(c),
352
471
  );
353
- match rewriter.write(html.as_bytes()) {
472
+ match rewriter.write(html) {
354
473
  Ok(_) => {}
355
474
  Err(err) => {
356
475
  return Err(magnus::Error::new(
@@ -364,10 +483,12 @@ impl SelmaRewriter {
364
483
  }
365
484
 
366
485
  fn process_element_handlers(
367
- rb_handler: Value,
486
+ handler: &Handler,
368
487
  element: &mut Element,
369
488
  ancestors: &[String],
370
489
  ) -> Result<(), magnus::Error> {
490
+ let rb_handler = handler.rb_handler.into_value();
491
+
371
492
  // if `on_end_tag` function is defined, call it
372
493
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
373
494
  // TODO: error here is an "EndTagError"
@@ -375,38 +496,61 @@ impl SelmaRewriter {
375
496
  .end_tag_handlers()
376
497
  .unwrap()
377
498
  .push(Box::new(move |end_tag| {
378
- let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
499
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(end_tag);
500
+
501
+ let rb_end_tag = SelmaHTMLEndTag::new(ref_wrap);
379
502
 
380
- match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
503
+ let result =
504
+ rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,));
505
+
506
+ mem::drop(anchor);
507
+
508
+ match result {
381
509
  Ok(_) => Ok(()),
382
510
  Err(err) => Err(err.to_string().into()),
383
511
  }
384
512
  }));
385
513
  }
386
514
 
387
- let rb_element = SelmaHTMLElement::new(element, ancestors);
388
- let rb_result =
389
- rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
390
- match rb_result {
515
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(element);
516
+ let rb_element = SelmaHTMLElement::new(ref_wrap, ancestors);
517
+ let result = rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
518
+
519
+ mem::drop(anchor);
520
+
521
+ match result {
391
522
  Ok(_) => Ok(()),
392
- Err(err) => Err(err),
523
+ Err(err) => Err(magnus::Error::new(
524
+ exception::runtime_error(),
525
+ format!("{err:?}"),
526
+ )),
393
527
  }
394
528
  }
395
529
 
396
530
  fn process_text_handlers(
397
- rb_handler: Value,
531
+ handler: &Handler,
398
532
  text_chunk: &mut TextChunk,
399
533
  ) -> Result<(), magnus::Error> {
534
+ let rb_handler = handler.rb_handler.into_value();
535
+
400
536
  // prevents missing `handle_text_chunk` function
401
537
  let content = text_chunk.as_str();
402
538
 
403
- // seems that sometimes lol-html returns blank text / EOLs?
539
+ // lol-html sometimes returns blank text if
540
+ // last_in_text_node() is true
404
541
  if content.is_empty() {
405
542
  return Ok(());
406
543
  }
407
544
 
408
- let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
409
- match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
545
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(text_chunk);
546
+
547
+ let rb_text_chunk = SelmaHTMLTextChunk::new(ref_wrap);
548
+ let result =
549
+ rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,));
550
+
551
+ mem::drop(anchor);
552
+
553
+ match result {
410
554
  Ok(_) => Ok(()),
411
555
  Err(err) => Err(magnus::Error::new(
412
556
  exception::runtime_error(),
@@ -414,6 +558,22 @@ impl SelmaRewriter {
414
558
  )),
415
559
  }
416
560
  }
561
+
562
+ fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
563
+ let options = &binding.options.memory_options;
564
+ MemorySettings {
565
+ max_allowed_memory_usage: options.max_allowed_memory_usage,
566
+ preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
567
+ }
568
+ }
569
+ }
570
+
571
+ impl RewriterOptions {
572
+ pub fn new() -> Self {
573
+ Self {
574
+ memory_options: MemorySettings::default(),
575
+ }
576
+ }
417
577
  }
418
578
 
419
579
  pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {