selma 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +107 -99
- data/README.md +57 -35
- data/ext/selma/Cargo.toml +4 -1
- data/ext/selma/src/html/element.rs +11 -6
- data/ext/selma/src/native_ref_wrap.rs +15 -12
- data/ext/selma/src/rewriter.rs +266 -106
- data/ext/selma/src/sanitizer.rs +3 -3
- data/lib/selma/config.rb +12 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,16 +1,25 @@
|
|
1
1
|
use lol_html::{
|
2
2
|
doc_comments, doctype, element,
|
3
3
|
html_content::{Element, TextChunk},
|
4
|
-
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter,
|
4
|
+
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
|
5
|
+
Settings,
|
5
6
|
};
|
6
7
|
use magnus::{
|
7
|
-
exception, function, method,
|
8
|
+
exception, function, method,
|
9
|
+
r_hash::ForEach,
|
10
|
+
scan_args,
|
8
11
|
typed_data::Obj,
|
9
12
|
value::{Opaque, ReprValue},
|
10
|
-
Module, Object, RArray, RModule, Ruby, Value,
|
13
|
+
Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol, Value,
|
11
14
|
};
|
12
15
|
|
13
|
-
use std::{
|
16
|
+
use std::{
|
17
|
+
borrow::Cow,
|
18
|
+
cell::{Ref, RefCell},
|
19
|
+
ops::Deref,
|
20
|
+
primitive::str,
|
21
|
+
rc::Rc,
|
22
|
+
};
|
14
23
|
|
15
24
|
use crate::{
|
16
25
|
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
|
@@ -19,9 +28,26 @@ use crate::{
|
|
19
28
|
tags::Tag,
|
20
29
|
};
|
21
30
|
|
31
|
+
#[derive(Copy, Clone)]
|
32
|
+
pub struct ObjectValue {
|
33
|
+
pub inner: Opaque<Value>,
|
34
|
+
}
|
35
|
+
|
36
|
+
impl IntoValue for ObjectValue {
|
37
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
38
|
+
Ruby::get().unwrap().get_inner(self.inner)
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
impl From<Value> for ObjectValue {
|
43
|
+
fn from(v: Value) -> Self {
|
44
|
+
Self { inner: v.into() }
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
22
48
|
#[derive(Clone)]
|
23
49
|
pub struct Handler {
|
24
|
-
rb_handler:
|
50
|
+
rb_handler: ObjectValue,
|
25
51
|
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
52
|
// total_element_handler_calls: usize,
|
27
53
|
// total_elapsed_element_handlers: f64,
|
@@ -30,16 +56,25 @@ pub struct Handler {
|
|
30
56
|
// total_elapsed_text_handlers: f64,
|
31
57
|
}
|
32
58
|
|
59
|
+
struct RewriterOptions {
|
60
|
+
memory_options: MemorySettings,
|
61
|
+
}
|
62
|
+
|
33
63
|
pub struct Rewriter {
|
34
64
|
sanitizer: Option<SelmaSanitizer>,
|
35
65
|
handlers: Vec<Handler>,
|
66
|
+
options: RewriterOptions,
|
36
67
|
// total_elapsed: f64,
|
37
68
|
}
|
38
69
|
|
39
70
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
40
71
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
41
72
|
|
42
|
-
type RewriterValues = (
|
73
|
+
type RewriterValues = (
|
74
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
75
|
+
Option<RArray>,
|
76
|
+
Option<RHash>,
|
77
|
+
);
|
43
78
|
|
44
79
|
impl SelmaRewriter {
|
45
80
|
const SELMA_ON_END_TAG: &'static str = "on_end_tag";
|
@@ -50,9 +85,10 @@ impl SelmaRewriter {
|
|
50
85
|
/// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
|
51
86
|
/// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
|
52
87
|
/// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
|
88
|
+
/// @param options [Hash] Any additional options to pass to the rewriter
|
53
89
|
/// @return [Selma::Rewriter]
|
54
90
|
fn new(args: &[Value]) -> Result<Self, magnus::Error> {
|
55
|
-
let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
|
91
|
+
let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
|
56
92
|
|
57
93
|
let sanitizer = match rb_sanitizer {
|
58
94
|
None => {
|
@@ -60,13 +96,13 @@ impl SelmaRewriter {
|
|
60
96
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
61
97
|
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
62
98
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
63
|
-
Some(wrapped_sanitizer.
|
99
|
+
Some(wrapped_sanitizer.deref().to_owned())
|
64
100
|
}
|
65
101
|
Some(sanitizer_value) => match sanitizer_value {
|
66
102
|
None => None, // no `sanitizer:` provided, use default
|
67
103
|
Some(sanitizer) => {
|
68
104
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
69
|
-
Some(sanitizer.
|
105
|
+
Some(sanitizer.deref().to_owned())
|
70
106
|
}
|
71
107
|
},
|
72
108
|
};
|
@@ -101,7 +137,7 @@ impl SelmaRewriter {
|
|
101
137
|
Ok(rb_selector) => rb_selector,
|
102
138
|
};
|
103
139
|
let handler = Handler {
|
104
|
-
rb_handler:
|
140
|
+
rb_handler: ObjectValue::from(rb_handler),
|
105
141
|
rb_selector: Opaque::from(rb_selector),
|
106
142
|
// total_element_handler_calls: 0,
|
107
143
|
// total_elapsed_element_handlers: 0.0,
|
@@ -122,9 +158,88 @@ impl SelmaRewriter {
|
|
122
158
|
));
|
123
159
|
}
|
124
160
|
|
161
|
+
let mut rewriter_options = RewriterOptions::new();
|
162
|
+
|
163
|
+
match rb_options {
|
164
|
+
None => {}
|
165
|
+
Some(options) => {
|
166
|
+
options.foreach(|key: Symbol, value: RHash| {
|
167
|
+
let key = key.to_string();
|
168
|
+
match key.as_str() {
|
169
|
+
"memory" => {
|
170
|
+
let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
|
171
|
+
if max_allowed_memory_usage.is_some() {
|
172
|
+
let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
|
173
|
+
let max_allowed_memory_usage =
|
174
|
+
Integer::from_value(max_allowed_memory_usage);
|
175
|
+
if max_allowed_memory_usage.is_some() {
|
176
|
+
match max_allowed_memory_usage.unwrap().to_u64() {
|
177
|
+
Ok(max_allowed_memory_usage) => {
|
178
|
+
rewriter_options.memory_options.max_allowed_memory_usage =
|
179
|
+
max_allowed_memory_usage as usize;
|
180
|
+
}
|
181
|
+
Err(_e) => {
|
182
|
+
return Err(magnus::Error::new(
|
183
|
+
exception::arg_error(),
|
184
|
+
"max_allowed_memory_usage must be a positive integer",
|
185
|
+
));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
} else {
|
189
|
+
rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
|
194
|
+
if preallocated_parsing_buffer_size.is_some() {
|
195
|
+
let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
|
196
|
+
let preallocated_parsing_buffer_size =
|
197
|
+
Integer::from_value(preallocated_parsing_buffer_size);
|
198
|
+
if preallocated_parsing_buffer_size.is_some() {
|
199
|
+
match preallocated_parsing_buffer_size.unwrap().to_u64() {
|
200
|
+
Ok(preallocated_parsing_buffer_size) => {
|
201
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size =
|
202
|
+
preallocated_parsing_buffer_size as usize;
|
203
|
+
}
|
204
|
+
Err(_e) => {
|
205
|
+
return Err(magnus::Error::new(
|
206
|
+
exception::arg_error(),
|
207
|
+
"preallocated_parsing_buffer_size must be a positive integer",
|
208
|
+
));
|
209
|
+
}
|
210
|
+
}
|
211
|
+
} else {
|
212
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
}
|
216
|
+
_ => {
|
217
|
+
return Err(magnus::Error::new(
|
218
|
+
exception::arg_error(),
|
219
|
+
format!("Unknown option: {key:?}"),
|
220
|
+
));
|
221
|
+
}
|
222
|
+
}
|
223
|
+
Ok(ForEach::Continue)
|
224
|
+
})?;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
if rewriter_options
|
229
|
+
.memory_options
|
230
|
+
.preallocated_parsing_buffer_size
|
231
|
+
> rewriter_options.memory_options.max_allowed_memory_usage
|
232
|
+
{
|
233
|
+
return Err(magnus::Error::new(
|
234
|
+
exception::arg_error(),
|
235
|
+
"max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
|
236
|
+
));
|
237
|
+
}
|
238
|
+
|
125
239
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
126
240
|
sanitizer,
|
127
241
|
handlers,
|
242
|
+
options: rewriter_options,
|
128
243
|
// total_elapsed: 0.0,
|
129
244
|
})))
|
130
245
|
}
|
@@ -141,123 +256,120 @@ impl SelmaRewriter {
|
|
141
256
|
let kwargs = scan_args::get_kwargs::<
|
142
257
|
_,
|
143
258
|
(),
|
144
|
-
(
|
259
|
+
(
|
260
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
261
|
+
Option<RArray>,
|
262
|
+
Option<RHash>,
|
263
|
+
),
|
145
264
|
(),
|
146
|
-
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
147
|
-
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
265
|
+
>(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
|
266
|
+
let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
|
148
267
|
|
149
|
-
Ok((rb_sanitizer, rb_handlers))
|
268
|
+
Ok((rb_sanitizer, rb_handlers, rb_options))
|
150
269
|
}
|
151
270
|
|
152
271
|
/// Perform HTML rewrite sequence.
|
153
272
|
fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
|
154
|
-
let
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
Err(err) => return Err(err),
|
160
|
-
};
|
273
|
+
let binding = self.0.borrow();
|
274
|
+
|
275
|
+
let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
276
|
+
let mut sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> =
|
277
|
+
vec![];
|
161
278
|
|
162
|
-
|
279
|
+
match &binding.sanitizer {
|
280
|
+
None => (),
|
281
|
+
Some(sanitizer) => {
|
282
|
+
if !sanitizer.get_allow_doctype() {
|
283
|
+
sanitizer_document_content_handlers.push(doctype!(|d| {
|
284
|
+
sanitizer.remove_doctype(d);
|
285
|
+
Ok(())
|
286
|
+
}));
|
287
|
+
}
|
288
|
+
if !sanitizer.get_allow_comments() {
|
289
|
+
sanitizer_document_content_handlers.push(doc_comments!(|c| {
|
290
|
+
sanitizer.remove_comment(c);
|
291
|
+
Ok(())
|
292
|
+
}));
|
293
|
+
}
|
294
|
+
sanitizer_element_content_handlers.push(element!("*", |el| {
|
295
|
+
sanitizer.try_remove_element(el);
|
296
|
+
if el.removed() {
|
297
|
+
return Ok(());
|
298
|
+
}
|
299
|
+
match sanitizer.sanitize_attributes(el) {
|
300
|
+
Ok(_) => Ok(()),
|
301
|
+
Err(err) => Err(err.to_string().into()),
|
302
|
+
}
|
303
|
+
}));
|
163
304
|
}
|
164
305
|
};
|
165
|
-
|
306
|
+
|
166
307
|
let handlers = &binding.handlers;
|
167
308
|
|
168
|
-
match Self::perform_handler_rewrite(
|
169
|
-
|
309
|
+
match Self::perform_handler_rewrite(
|
310
|
+
self,
|
311
|
+
sanitizer_document_content_handlers,
|
312
|
+
sanitizer_element_content_handlers,
|
313
|
+
handlers,
|
314
|
+
html,
|
315
|
+
) {
|
316
|
+
Ok(rewritten_html) => match &binding.sanitizer {
|
317
|
+
None => match String::from_utf8(rewritten_html) {
|
318
|
+
Ok(output) => Ok(output),
|
319
|
+
Err(err) => Err(magnus::Error::new(
|
320
|
+
exception::runtime_error(),
|
321
|
+
format!("{err:?}"),
|
322
|
+
)),
|
323
|
+
},
|
324
|
+
Some(sanitizer) => {
|
325
|
+
Self::perform_final_sanitization(self, sanitizer, rewritten_html)
|
326
|
+
}
|
327
|
+
},
|
170
328
|
Err(err) => Err(err),
|
171
329
|
}
|
172
330
|
}
|
173
331
|
|
174
|
-
|
332
|
+
// to get rid of some really nasty edge cases with dangerous tags, we perform one more
|
333
|
+
// sanitization pass at the end
|
334
|
+
fn perform_final_sanitization(
|
335
|
+
&self,
|
175
336
|
sanitizer: &SelmaSanitizer,
|
176
|
-
html:
|
177
|
-
) -> Result<
|
178
|
-
|
179
|
-
|
180
|
-
let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
181
|
-
if !sanitizer.get_allow_doctype() {
|
182
|
-
document_content_handlers.push(doctype!(|d| {
|
183
|
-
sanitizer.remove_doctype(d);
|
184
|
-
Ok(())
|
185
|
-
}));
|
186
|
-
}
|
187
|
-
if !sanitizer.get_allow_comments() {
|
188
|
-
document_content_handlers.push(doc_comments!(|c| {
|
189
|
-
sanitizer.remove_comment(c);
|
190
|
-
Ok(())
|
191
|
-
}));
|
192
|
-
}
|
193
|
-
let mut rewriter = HtmlRewriter::new(
|
194
|
-
Settings {
|
195
|
-
document_content_handlers,
|
196
|
-
element_content_handlers: vec![element!("*", |el| {
|
197
|
-
sanitizer.try_remove_element(el);
|
198
|
-
if el.removed() {
|
199
|
-
return Ok(());
|
200
|
-
}
|
201
|
-
match sanitizer.sanitize_attributes(el) {
|
202
|
-
Ok(_) => Ok(()),
|
203
|
-
Err(err) => Err(err.to_string().into()),
|
204
|
-
}
|
205
|
-
})],
|
206
|
-
// TODO: allow for MemorySettings to be defined
|
207
|
-
..Settings::default()
|
208
|
-
},
|
209
|
-
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
210
|
-
);
|
211
|
-
|
212
|
-
let result = rewriter.write(html.as_bytes());
|
213
|
-
if result.is_err() {
|
214
|
-
return Err(magnus::Error::new(
|
215
|
-
exception::runtime_error(),
|
216
|
-
format!("Failed to sanitize HTML: {}", result.unwrap_err()),
|
217
|
-
));
|
218
|
-
}
|
219
|
-
}
|
220
|
-
|
221
|
-
let mut output = vec![];
|
222
|
-
{
|
223
|
-
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
224
|
-
if sanitizer.get_escape_tagfilter() {
|
225
|
-
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
226
|
-
let should_remove = sanitizer.allow_element(el);
|
227
|
-
if should_remove {
|
228
|
-
sanitizer.force_remove_element(el);
|
229
|
-
}
|
337
|
+
html: Vec<u8>,
|
338
|
+
) -> Result<String, magnus::Error> {
|
339
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
340
|
+
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
230
341
|
|
231
|
-
|
232
|
-
|
233
|
-
|
342
|
+
if sanitizer.get_escape_tagfilter() {
|
343
|
+
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
344
|
+
let should_remove = sanitizer.allow_element(el);
|
345
|
+
if should_remove {
|
346
|
+
sanitizer.force_remove_element(el);
|
347
|
+
}
|
234
348
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
..Settings::default()
|
239
|
-
},
|
240
|
-
|c: &[u8]| output.extend_from_slice(c),
|
241
|
-
);
|
349
|
+
Ok(())
|
350
|
+
}));
|
351
|
+
}
|
242
352
|
|
243
|
-
|
244
|
-
|
245
|
-
|
353
|
+
match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
|
354
|
+
Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
|
355
|
+
Ok(output) => Ok(output),
|
356
|
+
Err(err) => Err(magnus::Error::new(
|
246
357
|
exception::runtime_error(),
|
247
|
-
format!("
|
248
|
-
))
|
249
|
-
}
|
358
|
+
format!("{err:?}"),
|
359
|
+
)),
|
360
|
+
},
|
361
|
+
Err(err) => Err(err),
|
250
362
|
}
|
251
|
-
|
252
|
-
Ok(output)
|
253
363
|
}
|
254
364
|
|
255
365
|
pub fn perform_handler_rewrite(
|
256
366
|
&self,
|
367
|
+
sanitizer_document_content_handlers: Vec<DocumentContentHandlers>,
|
368
|
+
sanitizer_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
257
369
|
handlers: &[Handler],
|
258
370
|
html: String,
|
259
371
|
) -> Result<Vec<u8>, magnus::Error> {
|
260
|
-
// TODO: this should ideally be done ahead of time
|
372
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
261
373
|
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
262
374
|
|
263
375
|
handlers.iter().for_each(|handler| {
|
@@ -275,7 +387,7 @@ impl SelmaRewriter {
|
|
275
387
|
selector.match_element().unwrap(),
|
276
388
|
move |el| {
|
277
389
|
match Self::process_element_handlers(
|
278
|
-
|
390
|
+
handler.rb_handler,
|
279
391
|
el,
|
280
392
|
&closure_element_stack.borrow(),
|
281
393
|
) {
|
@@ -306,9 +418,7 @@ impl SelmaRewriter {
|
|
306
418
|
}
|
307
419
|
}
|
308
420
|
|
309
|
-
|
310
|
-
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
311
|
-
{
|
421
|
+
match Self::process_text_handlers(handler.rb_handler, text) {
|
312
422
|
Ok(_) => Ok(()),
|
313
423
|
Err(err) => Err(err.to_string().into()),
|
314
424
|
}
|
@@ -341,16 +451,46 @@ impl SelmaRewriter {
|
|
341
451
|
}));
|
342
452
|
});
|
343
453
|
|
454
|
+
let rewritten_html = Self::run_rewrite(
|
455
|
+
self,
|
456
|
+
sanitizer_document_content_handlers,
|
457
|
+
element_content_handlers,
|
458
|
+
html.as_bytes(),
|
459
|
+
);
|
460
|
+
|
461
|
+
// sanitization must happen separately, because text chunks
|
462
|
+
// could potentially have rewritten the html. ideally we'd
|
463
|
+
// be able to sanitize around the `process_text_handlers` call
|
464
|
+
match rewritten_html {
|
465
|
+
Ok(rewritten_html) => Self::run_rewrite(
|
466
|
+
self,
|
467
|
+
vec![],
|
468
|
+
sanitizer_element_content_handlers,
|
469
|
+
rewritten_html.as_slice(),
|
470
|
+
),
|
471
|
+
Err(err) => Err(err),
|
472
|
+
}
|
473
|
+
}
|
474
|
+
|
475
|
+
fn run_rewrite(
|
476
|
+
&self,
|
477
|
+
document_content_handlers: Vec<DocumentContentHandlers>,
|
478
|
+
element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
479
|
+
html: &[u8],
|
480
|
+
) -> Result<Vec<u8>, magnus::Error> {
|
481
|
+
let binding = &self.0.borrow();
|
344
482
|
let mut output = vec![];
|
345
483
|
{
|
346
484
|
let mut rewriter = HtmlRewriter::new(
|
347
485
|
Settings {
|
486
|
+
document_content_handlers,
|
348
487
|
element_content_handlers,
|
488
|
+
memory_settings: Self::get_memory_options(binding),
|
349
489
|
..Settings::default()
|
350
490
|
},
|
351
491
|
|c: &[u8]| output.extend_from_slice(c),
|
352
492
|
);
|
353
|
-
match rewriter.write(html
|
493
|
+
match rewriter.write(html) {
|
354
494
|
Ok(_) => {}
|
355
495
|
Err(err) => {
|
356
496
|
return Err(magnus::Error::new(
|
@@ -364,10 +504,12 @@ impl SelmaRewriter {
|
|
364
504
|
}
|
365
505
|
|
366
506
|
fn process_element_handlers(
|
367
|
-
|
507
|
+
obj_rb_handler: ObjectValue,
|
368
508
|
element: &mut Element,
|
369
509
|
ancestors: &[String],
|
370
510
|
) -> Result<(), magnus::Error> {
|
511
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
512
|
+
|
371
513
|
// if `on_end_tag` function is defined, call it
|
372
514
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
373
515
|
// TODO: error here is an "EndTagError"
|
@@ -394,9 +536,11 @@ impl SelmaRewriter {
|
|
394
536
|
}
|
395
537
|
|
396
538
|
fn process_text_handlers(
|
397
|
-
|
539
|
+
obj_rb_handler: ObjectValue,
|
398
540
|
text_chunk: &mut TextChunk,
|
399
541
|
) -> Result<(), magnus::Error> {
|
542
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
543
|
+
|
400
544
|
// prevents missing `handle_text_chunk` function
|
401
545
|
let content = text_chunk.as_str();
|
402
546
|
|
@@ -414,6 +558,22 @@ impl SelmaRewriter {
|
|
414
558
|
)),
|
415
559
|
}
|
416
560
|
}
|
561
|
+
|
562
|
+
fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
|
563
|
+
let options = &binding.options.memory_options;
|
564
|
+
MemorySettings {
|
565
|
+
max_allowed_memory_usage: options.max_allowed_memory_usage,
|
566
|
+
preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
|
567
|
+
}
|
568
|
+
}
|
569
|
+
}
|
570
|
+
|
571
|
+
impl RewriterOptions {
|
572
|
+
pub fn new() -> Self {
|
573
|
+
Self {
|
574
|
+
memory_options: MemorySettings::default(),
|
575
|
+
}
|
576
|
+
}
|
417
577
|
}
|
418
578
|
|
419
579
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -338,7 +338,7 @@ impl SelmaSanitizer {
|
|
338
338
|
element: &mut Element,
|
339
339
|
element_sanitizer: &ElementSanitizer,
|
340
340
|
attr_name: &String,
|
341
|
-
attr_val: &
|
341
|
+
attr_val: &str,
|
342
342
|
) -> Result<bool, AttributeNameError> {
|
343
343
|
let mut allowed: bool = false;
|
344
344
|
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
@@ -390,7 +390,7 @@ impl SelmaSanitizer {
|
|
390
390
|
attr_val.contains("://")
|
391
391
|
}
|
392
392
|
|
393
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &
|
393
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
|
394
394
|
if protocols_allowed.contains(&"all".to_string()) {
|
395
395
|
return true;
|
396
396
|
}
|
@@ -549,7 +549,7 @@ impl SelmaSanitizer {
|
|
549
549
|
) -> &'a mut ElementSanitizer {
|
550
550
|
element_sanitizers
|
551
551
|
.entry(element_name.to_string())
|
552
|
-
.
|
552
|
+
.or_default()
|
553
553
|
}
|
554
554
|
}
|
555
555
|
|
data/lib/selma/config.rb
ADDED
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- ext/selma/src/selector.rs
|
78
78
|
- ext/selma/src/tags.rs
|
79
79
|
- lib/selma.rb
|
80
|
+
- lib/selma/config.rb
|
80
81
|
- lib/selma/extension.rb
|
81
82
|
- lib/selma/html.rb
|
82
83
|
- lib/selma/rewriter.rb
|