selma 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +115 -114
- data/README.md +122 -24
- data/ext/selma/Cargo.toml +5 -2
- data/ext/selma/src/html/element.rs +11 -6
- data/ext/selma/src/native_ref_wrap.rs +15 -12
- data/ext/selma/src/rewriter.rs +257 -106
- data/ext/selma/src/sanitizer.rs +23 -16
- data/lib/selma/config.rb +12 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer.rb +6 -1
- data/lib/selma/version.rb +1 -1
- metadata +8 -7
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,16 +1,25 @@
|
|
1
1
|
use lol_html::{
|
2
2
|
doc_comments, doctype, element,
|
3
3
|
html_content::{Element, TextChunk},
|
4
|
-
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter,
|
4
|
+
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
|
5
|
+
Settings,
|
5
6
|
};
|
6
7
|
use magnus::{
|
7
|
-
exception, function, method,
|
8
|
+
exception, function, method,
|
9
|
+
r_hash::ForEach,
|
10
|
+
scan_args,
|
8
11
|
typed_data::Obj,
|
9
12
|
value::{Opaque, ReprValue},
|
10
|
-
Module, Object, RArray, RModule, Ruby, Value,
|
13
|
+
Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol, Value,
|
11
14
|
};
|
12
15
|
|
13
|
-
use std::{
|
16
|
+
use std::{
|
17
|
+
borrow::Cow,
|
18
|
+
cell::{Ref, RefCell},
|
19
|
+
ops::Deref,
|
20
|
+
primitive::str,
|
21
|
+
rc::Rc,
|
22
|
+
};
|
14
23
|
|
15
24
|
use crate::{
|
16
25
|
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
|
@@ -19,9 +28,26 @@ use crate::{
|
|
19
28
|
tags::Tag,
|
20
29
|
};
|
21
30
|
|
31
|
+
#[derive(Copy, Clone)]
|
32
|
+
pub struct ObjectValue {
|
33
|
+
pub inner: Opaque<Value>,
|
34
|
+
}
|
35
|
+
|
36
|
+
impl IntoValue for ObjectValue {
|
37
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
38
|
+
Ruby::get().unwrap().get_inner(self.inner)
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
impl From<Value> for ObjectValue {
|
43
|
+
fn from(v: Value) -> Self {
|
44
|
+
Self { inner: v.into() }
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
22
48
|
#[derive(Clone)]
|
23
49
|
pub struct Handler {
|
24
|
-
rb_handler:
|
50
|
+
rb_handler: ObjectValue,
|
25
51
|
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
52
|
// total_element_handler_calls: usize,
|
27
53
|
// total_elapsed_element_handlers: f64,
|
@@ -30,16 +56,25 @@ pub struct Handler {
|
|
30
56
|
// total_elapsed_text_handlers: f64,
|
31
57
|
}
|
32
58
|
|
59
|
+
struct RewriterOptions {
|
60
|
+
memory_options: MemorySettings,
|
61
|
+
}
|
62
|
+
|
33
63
|
pub struct Rewriter {
|
34
64
|
sanitizer: Option<SelmaSanitizer>,
|
35
65
|
handlers: Vec<Handler>,
|
66
|
+
options: RewriterOptions,
|
36
67
|
// total_elapsed: f64,
|
37
68
|
}
|
38
69
|
|
39
70
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
40
71
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
41
72
|
|
42
|
-
type RewriterValues = (
|
73
|
+
type RewriterValues = (
|
74
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
75
|
+
Option<RArray>,
|
76
|
+
Option<RHash>,
|
77
|
+
);
|
43
78
|
|
44
79
|
impl SelmaRewriter {
|
45
80
|
const SELMA_ON_END_TAG: &'static str = "on_end_tag";
|
@@ -50,9 +85,10 @@ impl SelmaRewriter {
|
|
50
85
|
/// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
|
51
86
|
/// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
|
52
87
|
/// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
|
88
|
+
/// @param options [Hash] Any additional options to pass to the rewriter
|
53
89
|
/// @return [Selma::Rewriter]
|
54
90
|
fn new(args: &[Value]) -> Result<Self, magnus::Error> {
|
55
|
-
let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
|
91
|
+
let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
|
56
92
|
|
57
93
|
let sanitizer = match rb_sanitizer {
|
58
94
|
None => {
|
@@ -60,13 +96,13 @@ impl SelmaRewriter {
|
|
60
96
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
61
97
|
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
62
98
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
63
|
-
Some(wrapped_sanitizer.
|
99
|
+
Some(wrapped_sanitizer.deref().to_owned())
|
64
100
|
}
|
65
101
|
Some(sanitizer_value) => match sanitizer_value {
|
66
102
|
None => None, // no `sanitizer:` provided, use default
|
67
103
|
Some(sanitizer) => {
|
68
104
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
69
|
-
Some(sanitizer.
|
105
|
+
Some(sanitizer.deref().to_owned())
|
70
106
|
}
|
71
107
|
},
|
72
108
|
};
|
@@ -101,7 +137,7 @@ impl SelmaRewriter {
|
|
101
137
|
Ok(rb_selector) => rb_selector,
|
102
138
|
};
|
103
139
|
let handler = Handler {
|
104
|
-
rb_handler:
|
140
|
+
rb_handler: ObjectValue::from(rb_handler),
|
105
141
|
rb_selector: Opaque::from(rb_selector),
|
106
142
|
// total_element_handler_calls: 0,
|
107
143
|
// total_elapsed_element_handlers: 0.0,
|
@@ -122,9 +158,88 @@ impl SelmaRewriter {
|
|
122
158
|
));
|
123
159
|
}
|
124
160
|
|
161
|
+
let mut rewriter_options = RewriterOptions::new();
|
162
|
+
|
163
|
+
match rb_options {
|
164
|
+
None => {}
|
165
|
+
Some(options) => {
|
166
|
+
options.foreach(|key: Symbol, value: RHash| {
|
167
|
+
let key = key.to_string();
|
168
|
+
match key.as_str() {
|
169
|
+
"memory" => {
|
170
|
+
let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
|
171
|
+
if max_allowed_memory_usage.is_some() {
|
172
|
+
let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
|
173
|
+
let max_allowed_memory_usage =
|
174
|
+
Integer::from_value(max_allowed_memory_usage);
|
175
|
+
if max_allowed_memory_usage.is_some() {
|
176
|
+
match max_allowed_memory_usage.unwrap().to_u64() {
|
177
|
+
Ok(max_allowed_memory_usage) => {
|
178
|
+
rewriter_options.memory_options.max_allowed_memory_usage =
|
179
|
+
max_allowed_memory_usage as usize;
|
180
|
+
}
|
181
|
+
Err(_e) => {
|
182
|
+
return Err(magnus::Error::new(
|
183
|
+
exception::arg_error(),
|
184
|
+
"max_allowed_memory_usage must be a positive integer",
|
185
|
+
));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
} else {
|
189
|
+
rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
|
194
|
+
if preallocated_parsing_buffer_size.is_some() {
|
195
|
+
let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
|
196
|
+
let preallocated_parsing_buffer_size =
|
197
|
+
Integer::from_value(preallocated_parsing_buffer_size);
|
198
|
+
if preallocated_parsing_buffer_size.is_some() {
|
199
|
+
match preallocated_parsing_buffer_size.unwrap().to_u64() {
|
200
|
+
Ok(preallocated_parsing_buffer_size) => {
|
201
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size =
|
202
|
+
preallocated_parsing_buffer_size as usize;
|
203
|
+
}
|
204
|
+
Err(_e) => {
|
205
|
+
return Err(magnus::Error::new(
|
206
|
+
exception::arg_error(),
|
207
|
+
"preallocated_parsing_buffer_size must be a positive integer",
|
208
|
+
));
|
209
|
+
}
|
210
|
+
}
|
211
|
+
} else {
|
212
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
}
|
216
|
+
_ => {
|
217
|
+
return Err(magnus::Error::new(
|
218
|
+
exception::arg_error(),
|
219
|
+
format!("Unknown option: {key:?}"),
|
220
|
+
));
|
221
|
+
}
|
222
|
+
}
|
223
|
+
Ok(ForEach::Continue)
|
224
|
+
})?;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
if rewriter_options
|
229
|
+
.memory_options
|
230
|
+
.preallocated_parsing_buffer_size
|
231
|
+
> rewriter_options.memory_options.max_allowed_memory_usage
|
232
|
+
{
|
233
|
+
return Err(magnus::Error::new(
|
234
|
+
exception::arg_error(),
|
235
|
+
"max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
|
236
|
+
));
|
237
|
+
}
|
238
|
+
|
125
239
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
126
240
|
sanitizer,
|
127
241
|
handlers,
|
242
|
+
options: rewriter_options,
|
128
243
|
// total_elapsed: 0.0,
|
129
244
|
})))
|
130
245
|
}
|
@@ -141,123 +256,122 @@ impl SelmaRewriter {
|
|
141
256
|
let kwargs = scan_args::get_kwargs::<
|
142
257
|
_,
|
143
258
|
(),
|
144
|
-
(
|
259
|
+
(
|
260
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
261
|
+
Option<RArray>,
|
262
|
+
Option<RHash>,
|
263
|
+
),
|
145
264
|
(),
|
146
|
-
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
147
|
-
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
265
|
+
>(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
|
266
|
+
let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
|
148
267
|
|
149
|
-
Ok((rb_sanitizer, rb_handlers))
|
268
|
+
Ok((rb_sanitizer, rb_handlers, rb_options))
|
150
269
|
}
|
151
270
|
|
152
271
|
/// Perform HTML rewrite sequence.
|
153
272
|
fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
|
154
|
-
let
|
155
|
-
None => Ok(html),
|
156
|
-
Some(sanitizer) => {
|
157
|
-
let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
|
158
|
-
Ok(sanitized_html) => sanitized_html,
|
159
|
-
Err(err) => return Err(err),
|
160
|
-
};
|
273
|
+
let binding = self.0.borrow();
|
161
274
|
|
162
|
-
|
275
|
+
let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
276
|
+
let mut sanitizer_initial_element_content_handlers: Vec<(
|
277
|
+
Cow<Selector>,
|
278
|
+
ElementContentHandlers,
|
279
|
+
)> = vec![];
|
280
|
+
|
281
|
+
match &binding.sanitizer {
|
282
|
+
None => (),
|
283
|
+
Some(sanitizer) => {
|
284
|
+
if !sanitizer.get_allow_doctype() {
|
285
|
+
sanitizer_document_content_handlers.push(doctype!(|d| {
|
286
|
+
sanitizer.remove_doctype(d);
|
287
|
+
Ok(())
|
288
|
+
}));
|
289
|
+
}
|
290
|
+
if !sanitizer.get_allow_comments() {
|
291
|
+
sanitizer_document_content_handlers.push(doc_comments!(|c| {
|
292
|
+
sanitizer.remove_comment(c);
|
293
|
+
Ok(())
|
294
|
+
}));
|
295
|
+
}
|
296
|
+
sanitizer_initial_element_content_handlers.push(element!("*", |el| {
|
297
|
+
sanitizer.try_remove_element(el);
|
298
|
+
if el.removed() {
|
299
|
+
return Ok(());
|
300
|
+
}
|
301
|
+
match sanitizer.sanitize_attributes(el) {
|
302
|
+
Ok(_) => Ok(()),
|
303
|
+
Err(err) => Err(err.to_string().into()),
|
304
|
+
}
|
305
|
+
}));
|
163
306
|
}
|
164
307
|
};
|
165
|
-
|
308
|
+
|
166
309
|
let handlers = &binding.handlers;
|
167
310
|
|
168
|
-
match Self::perform_handler_rewrite(
|
169
|
-
|
311
|
+
match Self::perform_handler_rewrite(
|
312
|
+
self,
|
313
|
+
sanitizer_document_content_handlers,
|
314
|
+
sanitizer_initial_element_content_handlers,
|
315
|
+
handlers,
|
316
|
+
html,
|
317
|
+
) {
|
318
|
+
Ok(rewritten_html) => match &binding.sanitizer {
|
319
|
+
None => match String::from_utf8(rewritten_html) {
|
320
|
+
Ok(output) => Ok(output),
|
321
|
+
Err(err) => Err(magnus::Error::new(
|
322
|
+
exception::runtime_error(),
|
323
|
+
format!("{err:?}"),
|
324
|
+
)),
|
325
|
+
},
|
326
|
+
Some(sanitizer) => {
|
327
|
+
Self::perform_final_sanitization(self, sanitizer, rewritten_html)
|
328
|
+
}
|
329
|
+
},
|
170
330
|
Err(err) => Err(err),
|
171
331
|
}
|
172
332
|
}
|
173
333
|
|
174
|
-
|
334
|
+
// to get rid of some really nasty edge cases with dangerous tags, we perform one more
|
335
|
+
// sanitization pass at the end
|
336
|
+
fn perform_final_sanitization(
|
337
|
+
&self,
|
175
338
|
sanitizer: &SelmaSanitizer,
|
176
|
-
html:
|
177
|
-
) -> Result<
|
178
|
-
|
179
|
-
|
180
|
-
let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
181
|
-
if !sanitizer.get_allow_doctype() {
|
182
|
-
document_content_handlers.push(doctype!(|d| {
|
183
|
-
sanitizer.remove_doctype(d);
|
184
|
-
Ok(())
|
185
|
-
}));
|
186
|
-
}
|
187
|
-
if !sanitizer.get_allow_comments() {
|
188
|
-
document_content_handlers.push(doc_comments!(|c| {
|
189
|
-
sanitizer.remove_comment(c);
|
190
|
-
Ok(())
|
191
|
-
}));
|
192
|
-
}
|
193
|
-
let mut rewriter = HtmlRewriter::new(
|
194
|
-
Settings {
|
195
|
-
document_content_handlers,
|
196
|
-
element_content_handlers: vec![element!("*", |el| {
|
197
|
-
sanitizer.try_remove_element(el);
|
198
|
-
if el.removed() {
|
199
|
-
return Ok(());
|
200
|
-
}
|
201
|
-
match sanitizer.sanitize_attributes(el) {
|
202
|
-
Ok(_) => Ok(()),
|
203
|
-
Err(err) => Err(err.to_string().into()),
|
204
|
-
}
|
205
|
-
})],
|
206
|
-
// TODO: allow for MemorySettings to be defined
|
207
|
-
..Settings::default()
|
208
|
-
},
|
209
|
-
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
210
|
-
);
|
211
|
-
|
212
|
-
let result = rewriter.write(html.as_bytes());
|
213
|
-
if result.is_err() {
|
214
|
-
return Err(magnus::Error::new(
|
215
|
-
exception::runtime_error(),
|
216
|
-
format!("Failed to sanitize HTML: {}", result.unwrap_err()),
|
217
|
-
));
|
218
|
-
}
|
219
|
-
}
|
220
|
-
|
221
|
-
let mut output = vec![];
|
222
|
-
{
|
223
|
-
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
224
|
-
if sanitizer.get_escape_tagfilter() {
|
225
|
-
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
226
|
-
let should_remove = sanitizer.allow_element(el);
|
227
|
-
if should_remove {
|
228
|
-
sanitizer.force_remove_element(el);
|
229
|
-
}
|
339
|
+
html: Vec<u8>,
|
340
|
+
) -> Result<String, magnus::Error> {
|
341
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
342
|
+
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
230
343
|
|
231
|
-
|
232
|
-
|
233
|
-
|
344
|
+
if sanitizer.get_escape_tagfilter() {
|
345
|
+
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
346
|
+
let should_remove = sanitizer.allow_element(el);
|
347
|
+
if should_remove {
|
348
|
+
sanitizer.force_remove_element(el);
|
349
|
+
}
|
234
350
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
..Settings::default()
|
239
|
-
},
|
240
|
-
|c: &[u8]| output.extend_from_slice(c),
|
241
|
-
);
|
351
|
+
Ok(())
|
352
|
+
}));
|
353
|
+
}
|
242
354
|
|
243
|
-
|
244
|
-
|
245
|
-
|
355
|
+
match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
|
356
|
+
Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
|
357
|
+
Ok(output) => Ok(output),
|
358
|
+
Err(err) => Err(magnus::Error::new(
|
246
359
|
exception::runtime_error(),
|
247
|
-
format!("
|
248
|
-
))
|
249
|
-
}
|
360
|
+
format!("{err:?}"),
|
361
|
+
)),
|
362
|
+
},
|
363
|
+
Err(err) => Err(err),
|
250
364
|
}
|
251
|
-
|
252
|
-
Ok(output)
|
253
365
|
}
|
254
366
|
|
255
367
|
pub fn perform_handler_rewrite(
|
256
368
|
&self,
|
369
|
+
sanitizer_document_content_handlers: Vec<DocumentContentHandlers>,
|
370
|
+
sanitizer_initial_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
257
371
|
handlers: &[Handler],
|
258
372
|
html: String,
|
259
373
|
) -> Result<Vec<u8>, magnus::Error> {
|
260
|
-
// TODO: this should ideally be done ahead of time
|
374
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
261
375
|
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
262
376
|
|
263
377
|
handlers.iter().for_each(|handler| {
|
@@ -275,7 +389,7 @@ impl SelmaRewriter {
|
|
275
389
|
selector.match_element().unwrap(),
|
276
390
|
move |el| {
|
277
391
|
match Self::process_element_handlers(
|
278
|
-
|
392
|
+
handler.rb_handler,
|
279
393
|
el,
|
280
394
|
&closure_element_stack.borrow(),
|
281
395
|
) {
|
@@ -306,9 +420,7 @@ impl SelmaRewriter {
|
|
306
420
|
}
|
307
421
|
}
|
308
422
|
|
309
|
-
|
310
|
-
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
311
|
-
{
|
423
|
+
match Self::process_text_handlers(handler.rb_handler, text) {
|
312
424
|
Ok(_) => Ok(()),
|
313
425
|
Err(err) => Err(err.to_string().into()),
|
314
426
|
}
|
@@ -341,16 +453,35 @@ impl SelmaRewriter {
|
|
341
453
|
}));
|
342
454
|
});
|
343
455
|
|
456
|
+
element_content_handlers.extend(sanitizer_initial_element_content_handlers);
|
457
|
+
|
458
|
+
Self::run_rewrite(
|
459
|
+
self,
|
460
|
+
sanitizer_document_content_handlers,
|
461
|
+
element_content_handlers,
|
462
|
+
html.as_bytes(),
|
463
|
+
)
|
464
|
+
}
|
465
|
+
|
466
|
+
fn run_rewrite(
|
467
|
+
&self,
|
468
|
+
document_content_handlers: Vec<DocumentContentHandlers>,
|
469
|
+
element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
470
|
+
html: &[u8],
|
471
|
+
) -> Result<Vec<u8>, magnus::Error> {
|
472
|
+
let binding = &self.0.borrow();
|
344
473
|
let mut output = vec![];
|
345
474
|
{
|
346
475
|
let mut rewriter = HtmlRewriter::new(
|
347
476
|
Settings {
|
477
|
+
document_content_handlers,
|
348
478
|
element_content_handlers,
|
479
|
+
memory_settings: Self::get_memory_options(binding),
|
349
480
|
..Settings::default()
|
350
481
|
},
|
351
482
|
|c: &[u8]| output.extend_from_slice(c),
|
352
483
|
);
|
353
|
-
match rewriter.write(html
|
484
|
+
match rewriter.write(html) {
|
354
485
|
Ok(_) => {}
|
355
486
|
Err(err) => {
|
356
487
|
return Err(magnus::Error::new(
|
@@ -364,10 +495,12 @@ impl SelmaRewriter {
|
|
364
495
|
}
|
365
496
|
|
366
497
|
fn process_element_handlers(
|
367
|
-
|
498
|
+
obj_rb_handler: ObjectValue,
|
368
499
|
element: &mut Element,
|
369
500
|
ancestors: &[String],
|
370
501
|
) -> Result<(), magnus::Error> {
|
502
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
503
|
+
|
371
504
|
// if `on_end_tag` function is defined, call it
|
372
505
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
373
506
|
// TODO: error here is an "EndTagError"
|
@@ -394,9 +527,11 @@ impl SelmaRewriter {
|
|
394
527
|
}
|
395
528
|
|
396
529
|
fn process_text_handlers(
|
397
|
-
|
530
|
+
obj_rb_handler: ObjectValue,
|
398
531
|
text_chunk: &mut TextChunk,
|
399
532
|
) -> Result<(), magnus::Error> {
|
533
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
534
|
+
|
400
535
|
// prevents missing `handle_text_chunk` function
|
401
536
|
let content = text_chunk.as_str();
|
402
537
|
|
@@ -414,6 +549,22 @@ impl SelmaRewriter {
|
|
414
549
|
)),
|
415
550
|
}
|
416
551
|
}
|
552
|
+
|
553
|
+
fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
|
554
|
+
let options = &binding.options.memory_options;
|
555
|
+
MemorySettings {
|
556
|
+
max_allowed_memory_usage: options.max_allowed_memory_usage,
|
557
|
+
preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
|
558
|
+
}
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
impl RewriterOptions {
|
563
|
+
pub fn new() -> Self {
|
564
|
+
Self {
|
565
|
+
memory_options: MemorySettings::default(),
|
566
|
+
}
|
567
|
+
}
|
417
568
|
}
|
418
569
|
|
419
570
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -211,20 +211,23 @@ impl SelmaSanitizer {
|
|
211
211
|
}
|
212
212
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
213
213
|
}
|
214
|
-
} else if allowed_protocol.is_kind_of(class::symbol())
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
214
|
+
} else if allowed_protocol.is_kind_of(class::symbol()) {
|
215
|
+
let protocol_config = allowed_protocol.inspect();
|
216
|
+
if protocol_config == ":relative" {
|
217
|
+
match protocol_list {
|
218
|
+
None => {
|
219
|
+
protocol_sanitizers.insert(
|
220
|
+
attr_name.to_string(),
|
221
|
+
vec!["#".to_string(), "/".to_string()],
|
222
|
+
);
|
223
|
+
}
|
224
|
+
Some(protocol_list) => {
|
225
|
+
protocol_list.push("#".to_string());
|
226
|
+
protocol_list.push("/".to_string());
|
227
|
+
}
|
227
228
|
}
|
229
|
+
} else if protocol_config == ":all" {
|
230
|
+
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
|
228
231
|
}
|
229
232
|
}
|
230
233
|
}
|
@@ -335,7 +338,7 @@ impl SelmaSanitizer {
|
|
335
338
|
element: &mut Element,
|
336
339
|
element_sanitizer: &ElementSanitizer,
|
337
340
|
attr_name: &String,
|
338
|
-
attr_val: &
|
341
|
+
attr_val: &str,
|
339
342
|
) -> Result<bool, AttributeNameError> {
|
340
343
|
let mut allowed: bool = false;
|
341
344
|
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
@@ -387,7 +390,11 @@ impl SelmaSanitizer {
|
|
387
390
|
attr_val.contains("://")
|
388
391
|
}
|
389
392
|
|
390
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &
|
393
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
|
394
|
+
if protocols_allowed.contains(&"all".to_string()) {
|
395
|
+
return true;
|
396
|
+
}
|
397
|
+
|
391
398
|
// FIXME: is there a more idiomatic way to do this?
|
392
399
|
let mut pos: usize = 0;
|
393
400
|
let mut chars = attr_val.chars();
|
@@ -542,7 +549,7 @@ impl SelmaSanitizer {
|
|
542
549
|
) -> &'a mut ElementSanitizer {
|
543
550
|
element_sanitizers
|
544
551
|
.entry(element_name.to_string())
|
545
|
-
.
|
552
|
+
.or_default()
|
546
553
|
}
|
547
554
|
}
|
548
555
|
|
data/lib/selma/config.rb
ADDED
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -66,7 +66,12 @@ module Selma
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def allow_protocol(element, attr, protos)
|
69
|
-
|
69
|
+
if protos.is_a?(Array)
|
70
|
+
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
71
|
+
else
|
72
|
+
protos = [protos]
|
73
|
+
end
|
74
|
+
|
70
75
|
set_allowed_protocols(element, attr, protos)
|
71
76
|
end
|
72
77
|
|
data/lib/selma/version.rb
CHANGED