selma 0.2.2 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,31 @@
1
1
  use lol_html::{
2
2
  doc_comments, doctype, element,
3
3
  html_content::{Element, TextChunk},
4
- text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
4
+ text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
5
+ Settings,
5
6
  };
6
7
  use magnus::{
7
- exception, function, method, scan_args,
8
+ exception, function, gc, method,
9
+ r_hash::ForEach,
10
+ scan_args,
8
11
  typed_data::Obj,
9
12
  value::{Opaque, ReprValue},
10
- Module, Object, RArray, RModule, Ruby, Value,
13
+ DataTypeFunctions, Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol,
14
+ TypedData, Value,
11
15
  };
12
16
 
13
- use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
17
+ use std::{
18
+ borrow::Cow,
19
+ cell::{Ref, RefCell},
20
+ mem,
21
+ ops::Deref,
22
+ primitive::str,
23
+ rc::Rc,
24
+ };
14
25
 
15
26
  use crate::{
16
27
  html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
28
+ native_ref_wrap::NativeRefWrap,
17
29
  sanitizer::SelmaSanitizer,
18
30
  selector::SelmaSelector,
19
31
  tags::Tag,
@@ -30,16 +42,34 @@ pub struct Handler {
30
42
  // total_elapsed_text_handlers: f64,
31
43
  }
32
44
 
45
+ struct RewriterOptions {
46
+ memory_options: MemorySettings,
47
+ }
48
+
33
49
  pub struct Rewriter {
34
50
  sanitizer: Option<SelmaSanitizer>,
35
51
  handlers: Vec<Handler>,
52
+ options: RewriterOptions,
36
53
  // total_elapsed: f64,
37
54
  }
38
55
 
39
- #[magnus::wrap(class = "Selma::Rewriter")]
56
+ #[derive(TypedData)]
57
+ #[magnus(class = "Selma::Rewriter", free_immediately, mark)]
40
58
  pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
41
59
 
42
- type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
60
+ impl DataTypeFunctions for SelmaRewriter {
61
+ fn mark(&self, marker: &gc::Marker) {
62
+ self.0.borrow().handlers.iter().for_each(|handler| {
63
+ marker.mark(handler.rb_handler);
64
+ });
65
+ }
66
+ }
67
+
68
+ type RewriterValues = (
69
+ Option<Option<Obj<SelmaSanitizer>>>,
70
+ Option<RArray>,
71
+ Option<RHash>,
72
+ );
43
73
 
44
74
  impl SelmaRewriter {
45
75
  const SELMA_ON_END_TAG: &'static str = "on_end_tag";
@@ -50,25 +80,20 @@ impl SelmaRewriter {
50
80
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
51
81
  /// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
52
82
  /// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
83
+ /// @param options [Hash] Any additional options to pass to the rewriter
53
84
  /// @return [Selma::Rewriter]
54
85
  fn new(args: &[Value]) -> Result<Self, magnus::Error> {
55
- let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
86
+ let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
56
87
 
57
88
  let sanitizer = match rb_sanitizer {
58
89
  None => {
59
- // no `sanitizer:` provided, use default
90
+ // no `sanitizer:` kwarg provided, use default
60
91
  let default_sanitizer = SelmaSanitizer::new(&[])?;
61
92
  let wrapped_sanitizer = Obj::wrap(default_sanitizer);
62
- wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
63
- Some(wrapped_sanitizer.get().to_owned())
93
+ // wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
94
+ Some(wrapped_sanitizer.deref().to_owned())
64
95
  }
65
- Some(sanitizer_value) => match sanitizer_value {
66
- None => None, // no `sanitizer:` provided, use default
67
- Some(sanitizer) => {
68
- sanitizer.funcall::<&str, (), Value>("setup", ())?;
69
- Some(sanitizer.get().to_owned())
70
- }
71
- },
96
+ Some(sanitizer_value) => sanitizer_value.map(|sanitizer| sanitizer.deref().to_owned()),
72
97
  };
73
98
 
74
99
  let handlers = match rb_handlers {
@@ -76,9 +101,7 @@ impl SelmaRewriter {
76
101
  Some(rb_handlers) => {
77
102
  let mut handlers: Vec<Handler> = vec![];
78
103
 
79
- for h in rb_handlers.each() {
80
- let rb_handler = h.unwrap();
81
-
104
+ for rb_handler in rb_handlers.into_iter() {
82
105
  // prevents missing #selector from ruining things
83
106
  if !rb_handler.respond_to("selector", true).unwrap() {
84
107
  let classname = unsafe { rb_handler.classname() };
@@ -122,9 +145,88 @@ impl SelmaRewriter {
122
145
  ));
123
146
  }
124
147
 
148
+ let mut rewriter_options = RewriterOptions::new();
149
+
150
+ match rb_options {
151
+ None => {}
152
+ Some(options) => {
153
+ options.foreach(|key: Symbol, value: RHash| {
154
+ let key = key.to_string();
155
+ match key.as_str() {
156
+ "memory" => {
157
+ let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
158
+ if max_allowed_memory_usage.is_some() {
159
+ let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
160
+ let max_allowed_memory_usage =
161
+ Integer::from_value(max_allowed_memory_usage);
162
+ if max_allowed_memory_usage.is_some() {
163
+ match max_allowed_memory_usage.unwrap().to_u64() {
164
+ Ok(max_allowed_memory_usage) => {
165
+ rewriter_options.memory_options.max_allowed_memory_usage =
166
+ max_allowed_memory_usage as usize;
167
+ }
168
+ Err(_e) => {
169
+ return Err(magnus::Error::new(
170
+ exception::arg_error(),
171
+ "max_allowed_memory_usage must be a positive integer",
172
+ ));
173
+ }
174
+ }
175
+ } else {
176
+ rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
177
+ }
178
+ }
179
+
180
+ let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
181
+ if preallocated_parsing_buffer_size.is_some() {
182
+ let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
183
+ let preallocated_parsing_buffer_size =
184
+ Integer::from_value(preallocated_parsing_buffer_size);
185
+ if preallocated_parsing_buffer_size.is_some() {
186
+ match preallocated_parsing_buffer_size.unwrap().to_u64() {
187
+ Ok(preallocated_parsing_buffer_size) => {
188
+ rewriter_options.memory_options.preallocated_parsing_buffer_size =
189
+ preallocated_parsing_buffer_size as usize;
190
+ }
191
+ Err(_e) => {
192
+ return Err(magnus::Error::new(
193
+ exception::arg_error(),
194
+ "preallocated_parsing_buffer_size must be a positive integer",
195
+ ));
196
+ }
197
+ }
198
+ } else {
199
+ rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
200
+ }
201
+ }
202
+ }
203
+ _ => {
204
+ return Err(magnus::Error::new(
205
+ exception::arg_error(),
206
+ format!("Unknown option: {key:?}"),
207
+ ));
208
+ }
209
+ }
210
+ Ok(ForEach::Continue)
211
+ })?;
212
+ }
213
+ }
214
+
215
+ if rewriter_options
216
+ .memory_options
217
+ .preallocated_parsing_buffer_size
218
+ > rewriter_options.memory_options.max_allowed_memory_usage
219
+ {
220
+ return Err(magnus::Error::new(
221
+ exception::arg_error(),
222
+ "max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
223
+ ));
224
+ }
225
+
125
226
  Ok(Self(std::cell::RefCell::new(Rewriter {
126
227
  sanitizer,
127
228
  handlers,
229
+ options: rewriter_options,
128
230
  // total_elapsed: 0.0,
129
231
  })))
130
232
  }
@@ -141,125 +243,126 @@ impl SelmaRewriter {
141
243
  let kwargs = scan_args::get_kwargs::<
142
244
  _,
143
245
  (),
144
- (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
246
+ (
247
+ Option<Option<Obj<SelmaSanitizer>>>,
248
+ Option<RArray>,
249
+ Option<RHash>,
250
+ ),
145
251
  (),
146
- >(args.keywords, &[], &["sanitizer", "handlers"])?;
147
- let (rb_sanitizer, rb_handlers) = kwargs.optional;
252
+ >(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
253
+ let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
148
254
 
149
- Ok((rb_sanitizer, rb_handlers))
255
+ Ok((rb_sanitizer, rb_handlers, rb_options))
150
256
  }
151
257
 
152
258
  /// Perform HTML rewrite sequence.
153
259
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
154
- let sanitized_html = match &self.0.borrow().sanitizer {
155
- None => Ok(html),
156
- Some(sanitizer) => {
157
- let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
158
- Ok(sanitized_html) => sanitized_html,
159
- Err(err) => return Err(err),
160
- };
260
+ let binding = self.0.borrow();
161
261
 
162
- String::from_utf8(sanitized_html)
262
+ let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
263
+ let mut sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> =
264
+ vec![];
265
+
266
+ match &binding.sanitizer {
267
+ None => (),
268
+ Some(sanitizer) => {
269
+ if !sanitizer.get_allow_doctype() {
270
+ sanitizer_document_content_handlers.push(doctype!(|d| {
271
+ sanitizer.remove_doctype(d);
272
+ Ok(())
273
+ }));
274
+ }
275
+ if !sanitizer.get_allow_comments() {
276
+ sanitizer_document_content_handlers.push(doc_comments!(|c| {
277
+ sanitizer.remove_comment(c);
278
+ Ok(())
279
+ }));
280
+ }
281
+ sanitizer_element_content_handlers.push(element!("*", |el| {
282
+ sanitizer.try_remove_element(el);
283
+ if el.removed() {
284
+ return Ok(());
285
+ }
286
+ // if it was removed, there are no attributes to sanitize
287
+ match sanitizer.sanitize_attributes(el) {
288
+ Ok(_) => Ok(()),
289
+ Err(err) => Err(err.to_string().into()),
290
+ }
291
+ }));
163
292
  }
164
293
  };
165
- let binding = self.0.borrow_mut();
166
- let handlers = &binding.handlers;
167
294
 
168
- match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
169
- Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
295
+ let handlers: &Vec<Handler> = &binding.handlers;
296
+
297
+ match Self::perform_handler_rewrite(
298
+ self,
299
+ sanitizer_document_content_handlers,
300
+ sanitizer_element_content_handlers,
301
+ handlers,
302
+ html,
303
+ ) {
304
+ Ok(rewritten_html) => match &binding.sanitizer {
305
+ None => match String::from_utf8(rewritten_html) {
306
+ Ok(output) => Ok(output),
307
+ Err(err) => Err(magnus::Error::new(
308
+ exception::runtime_error(),
309
+ format!("{err:?}"),
310
+ )),
311
+ },
312
+ Some(sanitizer) => {
313
+ Self::perform_final_sanitization(self, sanitizer, rewritten_html)
314
+ }
315
+ },
170
316
  Err(err) => Err(err),
171
317
  }
172
318
  }
173
319
 
174
- fn perform_sanitization(
320
+ // to get rid of some really nasty edge cases with dangerous tags, we perform one more
321
+ // sanitization pass at the end
322
+ fn perform_final_sanitization(
323
+ &self,
175
324
  sanitizer: &SelmaSanitizer,
176
- html: &String,
177
- ) -> Result<Vec<u8>, magnus::Error> {
178
- let mut first_pass_html = vec![];
179
- {
180
- let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
181
- if !sanitizer.get_allow_doctype() {
182
- document_content_handlers.push(doctype!(|d| {
183
- sanitizer.remove_doctype(d);
184
- Ok(())
185
- }));
186
- }
187
- if !sanitizer.get_allow_comments() {
188
- document_content_handlers.push(doc_comments!(|c| {
189
- sanitizer.remove_comment(c);
190
- Ok(())
191
- }));
192
- }
193
- let mut rewriter = HtmlRewriter::new(
194
- Settings {
195
- document_content_handlers,
196
- element_content_handlers: vec![element!("*", |el| {
197
- sanitizer.try_remove_element(el);
198
- if el.removed() {
199
- return Ok(());
200
- }
201
- match sanitizer.sanitize_attributes(el) {
202
- Ok(_) => Ok(()),
203
- Err(err) => Err(err.to_string().into()),
204
- }
205
- })],
206
- // TODO: allow for MemorySettings to be defined
207
- ..Settings::default()
208
- },
209
- |c: &[u8]| first_pass_html.extend_from_slice(c),
210
- );
211
-
212
- let result = rewriter.write(html.as_bytes());
213
- if result.is_err() {
214
- return Err(magnus::Error::new(
215
- exception::runtime_error(),
216
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
217
- ));
218
- }
219
- }
220
-
221
- let mut output = vec![];
222
- {
223
- let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
224
- if sanitizer.get_escape_tagfilter() {
225
- element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
226
- let should_remove = sanitizer.allow_element(el);
227
- if should_remove {
228
- sanitizer.force_remove_element(el);
229
- }
325
+ html: Vec<u8>,
326
+ ) -> Result<String, magnus::Error> {
327
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
328
+ let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
230
329
 
231
- Ok(())
232
- }));
233
- }
330
+ if sanitizer.get_escape_tagfilter() {
331
+ element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
332
+ let should_remove = sanitizer.allow_element(el);
333
+ if should_remove {
334
+ sanitizer.force_remove_element(el);
335
+ }
234
336
 
235
- let mut rewriter = HtmlRewriter::new(
236
- Settings {
237
- element_content_handlers,
238
- ..Settings::default()
239
- },
240
- |c: &[u8]| output.extend_from_slice(c),
241
- );
337
+ Ok(())
338
+ }));
339
+ }
242
340
 
243
- let result = rewriter.write(first_pass_html.as_slice());
244
- if result.is_err() {
245
- return Err(magnus::Error::new(
341
+ match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
342
+ Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
343
+ Ok(output) => Ok(output),
344
+ Err(err) => Err(magnus::Error::new(
246
345
  exception::runtime_error(),
247
- format!("Failed to sanitize HTML: {}", result.unwrap_err()),
248
- ));
249
- }
346
+ format!("{err:?}"),
347
+ )),
348
+ },
349
+ Err(err) => Err(err),
250
350
  }
251
-
252
- Ok(output)
253
351
  }
254
352
 
255
- pub fn perform_handler_rewrite(
353
+ pub fn perform_handler_rewrite<'a>(
256
354
  &self,
257
- handlers: &[Handler],
355
+ sanitizer_document_content_handlers: Vec<DocumentContentHandlers<'a>>,
356
+ sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers<'a>)>,
357
+ handlers: &'a [Handler],
258
358
  html: String,
259
359
  ) -> Result<Vec<u8>, magnus::Error> {
260
- // TODO: this should ideally be done ahead of time, not on every `#rewrite` call
360
+ // TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
261
361
  let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
262
362
 
363
+ // have sanitization happen first
364
+ element_content_handlers.extend(sanitizer_element_content_handlers);
365
+
263
366
  handlers.iter().for_each(|handler| {
264
367
  let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
265
368
 
@@ -275,7 +378,7 @@ impl SelmaRewriter {
275
378
  selector.match_element().unwrap(),
276
379
  move |el| {
277
380
  match Self::process_element_handlers(
278
- ruby.get_inner(handler.rb_handler),
381
+ handler,
279
382
  el,
280
383
  &closure_element_stack.borrow(),
281
384
  ) {
@@ -294,21 +397,19 @@ impl SelmaRewriter {
294
397
  move |text| {
295
398
  let element_stack = closure_element_stack.as_ref().borrow();
296
399
  if selector.ignore_text_within().is_some() {
297
- // check if current tag is a tag we should be ignoring text within
298
- let head_tag_name = element_stack.last().unwrap().to_string();
400
+ // check if current tag is a tag we should be ignoring text within;
401
+ // also checks if tag is within an ancestery of ignored tags
299
402
  if selector
300
403
  .ignore_text_within()
301
404
  .unwrap()
302
405
  .iter()
303
- .any(|f| f == &head_tag_name)
406
+ .any(|t| element_stack.contains(t))
304
407
  {
305
408
  return Ok(());
306
409
  }
307
410
  }
308
411
 
309
- let ruby = Ruby::get().unwrap();
310
- match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
311
- {
412
+ match Self::process_text_handlers(handler, text) {
312
413
  Ok(_) => Ok(()),
313
414
  Err(err) => Err(err.to_string().into()),
314
415
  }
@@ -329,28 +430,46 @@ impl SelmaRewriter {
329
430
 
330
431
  let closure_element_stack = element_stack.clone();
331
432
 
332
- el.end_tag_handlers()
333
- .unwrap()
334
- .push(Box::new(move |_end_tag| {
335
- let mut stack = closure_element_stack.as_ref().borrow_mut();
336
- stack.pop();
337
- Ok(())
338
- }));
433
+ if let Some(end_tag_handlers) = el.end_tag_handlers() {
434
+ end_tag_handlers.push(lol_html::EndTagHandler::into(Box::new(
435
+ move |_end_tag| {
436
+ closure_element_stack.as_ref().borrow_mut().pop();
437
+ Ok(())
438
+ },
439
+ )));
440
+ }
339
441
 
340
442
  Ok(())
341
443
  }));
342
444
  });
343
445
 
446
+ Self::run_rewrite(
447
+ self,
448
+ sanitizer_document_content_handlers,
449
+ element_content_handlers,
450
+ html.as_bytes(),
451
+ )
452
+ }
453
+
454
+ fn run_rewrite<'a>(
455
+ &self,
456
+ document_content_handlers: Vec<DocumentContentHandlers<'a>>,
457
+ element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers<'a>)>,
458
+ html: &[u8],
459
+ ) -> Result<Vec<u8>, magnus::Error> {
460
+ let binding = &self.0.borrow();
344
461
  let mut output = vec![];
345
462
  {
346
463
  let mut rewriter = HtmlRewriter::new(
347
464
  Settings {
465
+ document_content_handlers,
348
466
  element_content_handlers,
467
+ memory_settings: Self::get_memory_options(binding),
349
468
  ..Settings::default()
350
469
  },
351
470
  |c: &[u8]| output.extend_from_slice(c),
352
471
  );
353
- match rewriter.write(html.as_bytes()) {
472
+ match rewriter.write(html) {
354
473
  Ok(_) => {}
355
474
  Err(err) => {
356
475
  return Err(magnus::Error::new(
@@ -364,10 +483,12 @@ impl SelmaRewriter {
364
483
  }
365
484
 
366
485
  fn process_element_handlers(
367
- rb_handler: Value,
486
+ handler: &Handler,
368
487
  element: &mut Element,
369
488
  ancestors: &[String],
370
489
  ) -> Result<(), magnus::Error> {
490
+ let rb_handler = handler.rb_handler.into_value();
491
+
371
492
  // if `on_end_tag` function is defined, call it
372
493
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
373
494
  // TODO: error here is an "EndTagError"
@@ -375,38 +496,61 @@ impl SelmaRewriter {
375
496
  .end_tag_handlers()
376
497
  .unwrap()
377
498
  .push(Box::new(move |end_tag| {
378
- let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
499
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(end_tag);
500
+
501
+ let rb_end_tag = SelmaHTMLEndTag::new(ref_wrap);
379
502
 
380
- match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
503
+ let result =
504
+ rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,));
505
+
506
+ mem::drop(anchor);
507
+
508
+ match result {
381
509
  Ok(_) => Ok(()),
382
510
  Err(err) => Err(err.to_string().into()),
383
511
  }
384
512
  }));
385
513
  }
386
514
 
387
- let rb_element = SelmaHTMLElement::new(element, ancestors);
388
- let rb_result =
389
- rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
390
- match rb_result {
515
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(element);
516
+ let rb_element = SelmaHTMLElement::new(ref_wrap, ancestors);
517
+ let result = rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
518
+
519
+ mem::drop(anchor);
520
+
521
+ match result {
391
522
  Ok(_) => Ok(()),
392
- Err(err) => Err(err),
523
+ Err(err) => Err(magnus::Error::new(
524
+ exception::runtime_error(),
525
+ format!("{err:?}"),
526
+ )),
393
527
  }
394
528
  }
395
529
 
396
530
  fn process_text_handlers(
397
- rb_handler: Value,
531
+ handler: &Handler,
398
532
  text_chunk: &mut TextChunk,
399
533
  ) -> Result<(), magnus::Error> {
534
+ let rb_handler = handler.rb_handler.into_value();
535
+
400
536
  // prevents missing `handle_text_chunk` function
401
537
  let content = text_chunk.as_str();
402
538
 
403
- // seems that sometimes lol-html returns blank text / EOLs?
539
+ // lol-html sometimes returns blank text if
540
+ // last_in_text_node() is true
404
541
  if content.is_empty() {
405
542
  return Ok(());
406
543
  }
407
544
 
408
- let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
409
- match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
545
+ let (ref_wrap, anchor) = NativeRefWrap::wrap(text_chunk);
546
+
547
+ let rb_text_chunk = SelmaHTMLTextChunk::new(ref_wrap);
548
+ let result =
549
+ rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,));
550
+
551
+ mem::drop(anchor);
552
+
553
+ match result {
410
554
  Ok(_) => Ok(()),
411
555
  Err(err) => Err(magnus::Error::new(
412
556
  exception::runtime_error(),
@@ -414,6 +558,22 @@ impl SelmaRewriter {
414
558
  )),
415
559
  }
416
560
  }
561
+
562
+ fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
563
+ let options = &binding.options.memory_options;
564
+ MemorySettings {
565
+ max_allowed_memory_usage: options.max_allowed_memory_usage,
566
+ preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
567
+ }
568
+ }
569
+ }
570
+
571
+ impl RewriterOptions {
572
+ pub fn new() -> Self {
573
+ Self {
574
+ memory_options: MemorySettings::default(),
575
+ }
576
+ }
417
577
  }
418
578
 
419
579
  pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {