selma 0.2.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +115 -114
- data/README.md +122 -24
- data/ext/selma/Cargo.toml +5 -2
- data/ext/selma/src/html/element.rs +11 -6
- data/ext/selma/src/native_ref_wrap.rs +15 -12
- data/ext/selma/src/rewriter.rs +257 -106
- data/ext/selma/src/sanitizer.rs +23 -16
- data/lib/selma/config.rb +12 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer.rb +6 -1
- data/lib/selma/version.rb +1 -1
- metadata +8 -7
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,16 +1,25 @@
|
|
1
1
|
use lol_html::{
|
2
2
|
doc_comments, doctype, element,
|
3
3
|
html_content::{Element, TextChunk},
|
4
|
-
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter,
|
4
|
+
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, MemorySettings, Selector,
|
5
|
+
Settings,
|
5
6
|
};
|
6
7
|
use magnus::{
|
7
|
-
exception, function, method,
|
8
|
+
exception, function, method,
|
9
|
+
r_hash::ForEach,
|
10
|
+
scan_args,
|
8
11
|
typed_data::Obj,
|
9
12
|
value::{Opaque, ReprValue},
|
10
|
-
Module, Object, RArray, RModule, Ruby, Value,
|
13
|
+
Integer, IntoValue, Module, Object, RArray, RHash, RModule, Ruby, Symbol, Value,
|
11
14
|
};
|
12
15
|
|
13
|
-
use std::{
|
16
|
+
use std::{
|
17
|
+
borrow::Cow,
|
18
|
+
cell::{Ref, RefCell},
|
19
|
+
ops::Deref,
|
20
|
+
primitive::str,
|
21
|
+
rc::Rc,
|
22
|
+
};
|
14
23
|
|
15
24
|
use crate::{
|
16
25
|
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
|
@@ -19,9 +28,26 @@ use crate::{
|
|
19
28
|
tags::Tag,
|
20
29
|
};
|
21
30
|
|
31
|
+
#[derive(Copy, Clone)]
|
32
|
+
pub struct ObjectValue {
|
33
|
+
pub inner: Opaque<Value>,
|
34
|
+
}
|
35
|
+
|
36
|
+
impl IntoValue for ObjectValue {
|
37
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
38
|
+
Ruby::get().unwrap().get_inner(self.inner)
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
impl From<Value> for ObjectValue {
|
43
|
+
fn from(v: Value) -> Self {
|
44
|
+
Self { inner: v.into() }
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
22
48
|
#[derive(Clone)]
|
23
49
|
pub struct Handler {
|
24
|
-
rb_handler:
|
50
|
+
rb_handler: ObjectValue,
|
25
51
|
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
52
|
// total_element_handler_calls: usize,
|
27
53
|
// total_elapsed_element_handlers: f64,
|
@@ -30,16 +56,25 @@ pub struct Handler {
|
|
30
56
|
// total_elapsed_text_handlers: f64,
|
31
57
|
}
|
32
58
|
|
59
|
+
struct RewriterOptions {
|
60
|
+
memory_options: MemorySettings,
|
61
|
+
}
|
62
|
+
|
33
63
|
pub struct Rewriter {
|
34
64
|
sanitizer: Option<SelmaSanitizer>,
|
35
65
|
handlers: Vec<Handler>,
|
66
|
+
options: RewriterOptions,
|
36
67
|
// total_elapsed: f64,
|
37
68
|
}
|
38
69
|
|
39
70
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
40
71
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
41
72
|
|
42
|
-
type RewriterValues = (
|
73
|
+
type RewriterValues = (
|
74
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
75
|
+
Option<RArray>,
|
76
|
+
Option<RHash>,
|
77
|
+
);
|
43
78
|
|
44
79
|
impl SelmaRewriter {
|
45
80
|
const SELMA_ON_END_TAG: &'static str = "on_end_tag";
|
@@ -50,9 +85,10 @@ impl SelmaRewriter {
|
|
50
85
|
/// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
|
51
86
|
/// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
|
52
87
|
/// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
|
88
|
+
/// @param options [Hash] Any additional options to pass to the rewriter
|
53
89
|
/// @return [Selma::Rewriter]
|
54
90
|
fn new(args: &[Value]) -> Result<Self, magnus::Error> {
|
55
|
-
let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
|
91
|
+
let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
|
56
92
|
|
57
93
|
let sanitizer = match rb_sanitizer {
|
58
94
|
None => {
|
@@ -60,13 +96,13 @@ impl SelmaRewriter {
|
|
60
96
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
61
97
|
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
62
98
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
63
|
-
Some(wrapped_sanitizer.
|
99
|
+
Some(wrapped_sanitizer.deref().to_owned())
|
64
100
|
}
|
65
101
|
Some(sanitizer_value) => match sanitizer_value {
|
66
102
|
None => None, // no `sanitizer:` provided, use default
|
67
103
|
Some(sanitizer) => {
|
68
104
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
69
|
-
Some(sanitizer.
|
105
|
+
Some(sanitizer.deref().to_owned())
|
70
106
|
}
|
71
107
|
},
|
72
108
|
};
|
@@ -101,7 +137,7 @@ impl SelmaRewriter {
|
|
101
137
|
Ok(rb_selector) => rb_selector,
|
102
138
|
};
|
103
139
|
let handler = Handler {
|
104
|
-
rb_handler:
|
140
|
+
rb_handler: ObjectValue::from(rb_handler),
|
105
141
|
rb_selector: Opaque::from(rb_selector),
|
106
142
|
// total_element_handler_calls: 0,
|
107
143
|
// total_elapsed_element_handlers: 0.0,
|
@@ -122,9 +158,88 @@ impl SelmaRewriter {
|
|
122
158
|
));
|
123
159
|
}
|
124
160
|
|
161
|
+
let mut rewriter_options = RewriterOptions::new();
|
162
|
+
|
163
|
+
match rb_options {
|
164
|
+
None => {}
|
165
|
+
Some(options) => {
|
166
|
+
options.foreach(|key: Symbol, value: RHash| {
|
167
|
+
let key = key.to_string();
|
168
|
+
match key.as_str() {
|
169
|
+
"memory" => {
|
170
|
+
let max_allowed_memory_usage = value.get(Symbol::new("max_allowed_memory_usage"));
|
171
|
+
if max_allowed_memory_usage.is_some() {
|
172
|
+
let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
|
173
|
+
let max_allowed_memory_usage =
|
174
|
+
Integer::from_value(max_allowed_memory_usage);
|
175
|
+
if max_allowed_memory_usage.is_some() {
|
176
|
+
match max_allowed_memory_usage.unwrap().to_u64() {
|
177
|
+
Ok(max_allowed_memory_usage) => {
|
178
|
+
rewriter_options.memory_options.max_allowed_memory_usage =
|
179
|
+
max_allowed_memory_usage as usize;
|
180
|
+
}
|
181
|
+
Err(_e) => {
|
182
|
+
return Err(magnus::Error::new(
|
183
|
+
exception::arg_error(),
|
184
|
+
"max_allowed_memory_usage must be a positive integer",
|
185
|
+
));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
} else {
|
189
|
+
rewriter_options.memory_options.max_allowed_memory_usage = MemorySettings::default().max_allowed_memory_usage;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
let preallocated_parsing_buffer_size = value.get(Symbol::new("preallocated_parsing_buffer_size"));
|
194
|
+
if preallocated_parsing_buffer_size.is_some() {
|
195
|
+
let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
|
196
|
+
let preallocated_parsing_buffer_size =
|
197
|
+
Integer::from_value(preallocated_parsing_buffer_size);
|
198
|
+
if preallocated_parsing_buffer_size.is_some() {
|
199
|
+
match preallocated_parsing_buffer_size.unwrap().to_u64() {
|
200
|
+
Ok(preallocated_parsing_buffer_size) => {
|
201
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size =
|
202
|
+
preallocated_parsing_buffer_size as usize;
|
203
|
+
}
|
204
|
+
Err(_e) => {
|
205
|
+
return Err(magnus::Error::new(
|
206
|
+
exception::arg_error(),
|
207
|
+
"preallocated_parsing_buffer_size must be a positive integer",
|
208
|
+
));
|
209
|
+
}
|
210
|
+
}
|
211
|
+
} else {
|
212
|
+
rewriter_options.memory_options.preallocated_parsing_buffer_size = MemorySettings::default().preallocated_parsing_buffer_size;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
}
|
216
|
+
_ => {
|
217
|
+
return Err(magnus::Error::new(
|
218
|
+
exception::arg_error(),
|
219
|
+
format!("Unknown option: {key:?}"),
|
220
|
+
));
|
221
|
+
}
|
222
|
+
}
|
223
|
+
Ok(ForEach::Continue)
|
224
|
+
})?;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
if rewriter_options
|
229
|
+
.memory_options
|
230
|
+
.preallocated_parsing_buffer_size
|
231
|
+
> rewriter_options.memory_options.max_allowed_memory_usage
|
232
|
+
{
|
233
|
+
return Err(magnus::Error::new(
|
234
|
+
exception::arg_error(),
|
235
|
+
"max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
|
236
|
+
));
|
237
|
+
}
|
238
|
+
|
125
239
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
126
240
|
sanitizer,
|
127
241
|
handlers,
|
242
|
+
options: rewriter_options,
|
128
243
|
// total_elapsed: 0.0,
|
129
244
|
})))
|
130
245
|
}
|
@@ -141,123 +256,122 @@ impl SelmaRewriter {
|
|
141
256
|
let kwargs = scan_args::get_kwargs::<
|
142
257
|
_,
|
143
258
|
(),
|
144
|
-
(
|
259
|
+
(
|
260
|
+
Option<Option<Obj<SelmaSanitizer>>>,
|
261
|
+
Option<RArray>,
|
262
|
+
Option<RHash>,
|
263
|
+
),
|
145
264
|
(),
|
146
|
-
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
147
|
-
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
265
|
+
>(args.keywords, &[], &["sanitizer", "handlers", "options"])?;
|
266
|
+
let (rb_sanitizer, rb_handlers, rb_options) = kwargs.optional;
|
148
267
|
|
149
|
-
Ok((rb_sanitizer, rb_handlers))
|
268
|
+
Ok((rb_sanitizer, rb_handlers, rb_options))
|
150
269
|
}
|
151
270
|
|
152
271
|
/// Perform HTML rewrite sequence.
|
153
272
|
fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
|
154
|
-
let
|
155
|
-
None => Ok(html),
|
156
|
-
Some(sanitizer) => {
|
157
|
-
let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
|
158
|
-
Ok(sanitized_html) => sanitized_html,
|
159
|
-
Err(err) => return Err(err),
|
160
|
-
};
|
273
|
+
let binding = self.0.borrow();
|
161
274
|
|
162
|
-
|
275
|
+
let mut sanitizer_document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
276
|
+
let mut sanitizer_initial_element_content_handlers: Vec<(
|
277
|
+
Cow<Selector>,
|
278
|
+
ElementContentHandlers,
|
279
|
+
)> = vec![];
|
280
|
+
|
281
|
+
match &binding.sanitizer {
|
282
|
+
None => (),
|
283
|
+
Some(sanitizer) => {
|
284
|
+
if !sanitizer.get_allow_doctype() {
|
285
|
+
sanitizer_document_content_handlers.push(doctype!(|d| {
|
286
|
+
sanitizer.remove_doctype(d);
|
287
|
+
Ok(())
|
288
|
+
}));
|
289
|
+
}
|
290
|
+
if !sanitizer.get_allow_comments() {
|
291
|
+
sanitizer_document_content_handlers.push(doc_comments!(|c| {
|
292
|
+
sanitizer.remove_comment(c);
|
293
|
+
Ok(())
|
294
|
+
}));
|
295
|
+
}
|
296
|
+
sanitizer_initial_element_content_handlers.push(element!("*", |el| {
|
297
|
+
sanitizer.try_remove_element(el);
|
298
|
+
if el.removed() {
|
299
|
+
return Ok(());
|
300
|
+
}
|
301
|
+
match sanitizer.sanitize_attributes(el) {
|
302
|
+
Ok(_) => Ok(()),
|
303
|
+
Err(err) => Err(err.to_string().into()),
|
304
|
+
}
|
305
|
+
}));
|
163
306
|
}
|
164
307
|
};
|
165
|
-
|
308
|
+
|
166
309
|
let handlers = &binding.handlers;
|
167
310
|
|
168
|
-
match Self::perform_handler_rewrite(
|
169
|
-
|
311
|
+
match Self::perform_handler_rewrite(
|
312
|
+
self,
|
313
|
+
sanitizer_document_content_handlers,
|
314
|
+
sanitizer_initial_element_content_handlers,
|
315
|
+
handlers,
|
316
|
+
html,
|
317
|
+
) {
|
318
|
+
Ok(rewritten_html) => match &binding.sanitizer {
|
319
|
+
None => match String::from_utf8(rewritten_html) {
|
320
|
+
Ok(output) => Ok(output),
|
321
|
+
Err(err) => Err(magnus::Error::new(
|
322
|
+
exception::runtime_error(),
|
323
|
+
format!("{err:?}"),
|
324
|
+
)),
|
325
|
+
},
|
326
|
+
Some(sanitizer) => {
|
327
|
+
Self::perform_final_sanitization(self, sanitizer, rewritten_html)
|
328
|
+
}
|
329
|
+
},
|
170
330
|
Err(err) => Err(err),
|
171
331
|
}
|
172
332
|
}
|
173
333
|
|
174
|
-
|
334
|
+
// to get rid of some really nasty edge cases with dangerous tags, we perform one more
|
335
|
+
// sanitization pass at the end
|
336
|
+
fn perform_final_sanitization(
|
337
|
+
&self,
|
175
338
|
sanitizer: &SelmaSanitizer,
|
176
|
-
html:
|
177
|
-
) -> Result<
|
178
|
-
|
179
|
-
|
180
|
-
let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
181
|
-
if !sanitizer.get_allow_doctype() {
|
182
|
-
document_content_handlers.push(doctype!(|d| {
|
183
|
-
sanitizer.remove_doctype(d);
|
184
|
-
Ok(())
|
185
|
-
}));
|
186
|
-
}
|
187
|
-
if !sanitizer.get_allow_comments() {
|
188
|
-
document_content_handlers.push(doc_comments!(|c| {
|
189
|
-
sanitizer.remove_comment(c);
|
190
|
-
Ok(())
|
191
|
-
}));
|
192
|
-
}
|
193
|
-
let mut rewriter = HtmlRewriter::new(
|
194
|
-
Settings {
|
195
|
-
document_content_handlers,
|
196
|
-
element_content_handlers: vec![element!("*", |el| {
|
197
|
-
sanitizer.try_remove_element(el);
|
198
|
-
if el.removed() {
|
199
|
-
return Ok(());
|
200
|
-
}
|
201
|
-
match sanitizer.sanitize_attributes(el) {
|
202
|
-
Ok(_) => Ok(()),
|
203
|
-
Err(err) => Err(err.to_string().into()),
|
204
|
-
}
|
205
|
-
})],
|
206
|
-
// TODO: allow for MemorySettings to be defined
|
207
|
-
..Settings::default()
|
208
|
-
},
|
209
|
-
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
210
|
-
);
|
211
|
-
|
212
|
-
let result = rewriter.write(html.as_bytes());
|
213
|
-
if result.is_err() {
|
214
|
-
return Err(magnus::Error::new(
|
215
|
-
exception::runtime_error(),
|
216
|
-
format!("Failed to sanitize HTML: {}", result.unwrap_err()),
|
217
|
-
));
|
218
|
-
}
|
219
|
-
}
|
220
|
-
|
221
|
-
let mut output = vec![];
|
222
|
-
{
|
223
|
-
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
224
|
-
if sanitizer.get_escape_tagfilter() {
|
225
|
-
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
226
|
-
let should_remove = sanitizer.allow_element(el);
|
227
|
-
if should_remove {
|
228
|
-
sanitizer.force_remove_element(el);
|
229
|
-
}
|
339
|
+
html: Vec<u8>,
|
340
|
+
) -> Result<String, magnus::Error> {
|
341
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
342
|
+
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
230
343
|
|
231
|
-
|
232
|
-
|
233
|
-
|
344
|
+
if sanitizer.get_escape_tagfilter() {
|
345
|
+
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
346
|
+
let should_remove = sanitizer.allow_element(el);
|
347
|
+
if should_remove {
|
348
|
+
sanitizer.force_remove_element(el);
|
349
|
+
}
|
234
350
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
..Settings::default()
|
239
|
-
},
|
240
|
-
|c: &[u8]| output.extend_from_slice(c),
|
241
|
-
);
|
351
|
+
Ok(())
|
352
|
+
}));
|
353
|
+
}
|
242
354
|
|
243
|
-
|
244
|
-
|
245
|
-
|
355
|
+
match Self::run_rewrite(self, vec![], element_content_handlers, html.as_slice()) {
|
356
|
+
Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
|
357
|
+
Ok(output) => Ok(output),
|
358
|
+
Err(err) => Err(magnus::Error::new(
|
246
359
|
exception::runtime_error(),
|
247
|
-
format!("
|
248
|
-
))
|
249
|
-
}
|
360
|
+
format!("{err:?}"),
|
361
|
+
)),
|
362
|
+
},
|
363
|
+
Err(err) => Err(err),
|
250
364
|
}
|
251
|
-
|
252
|
-
Ok(output)
|
253
365
|
}
|
254
366
|
|
255
367
|
pub fn perform_handler_rewrite(
|
256
368
|
&self,
|
369
|
+
sanitizer_document_content_handlers: Vec<DocumentContentHandlers>,
|
370
|
+
sanitizer_initial_element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
257
371
|
handlers: &[Handler],
|
258
372
|
html: String,
|
259
373
|
) -> Result<Vec<u8>, magnus::Error> {
|
260
|
-
// TODO: this should ideally be done ahead of time
|
374
|
+
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
|
261
375
|
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
262
376
|
|
263
377
|
handlers.iter().for_each(|handler| {
|
@@ -275,7 +389,7 @@ impl SelmaRewriter {
|
|
275
389
|
selector.match_element().unwrap(),
|
276
390
|
move |el| {
|
277
391
|
match Self::process_element_handlers(
|
278
|
-
|
392
|
+
handler.rb_handler,
|
279
393
|
el,
|
280
394
|
&closure_element_stack.borrow(),
|
281
395
|
) {
|
@@ -306,9 +420,7 @@ impl SelmaRewriter {
|
|
306
420
|
}
|
307
421
|
}
|
308
422
|
|
309
|
-
|
310
|
-
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
311
|
-
{
|
423
|
+
match Self::process_text_handlers(handler.rb_handler, text) {
|
312
424
|
Ok(_) => Ok(()),
|
313
425
|
Err(err) => Err(err.to_string().into()),
|
314
426
|
}
|
@@ -341,16 +453,35 @@ impl SelmaRewriter {
|
|
341
453
|
}));
|
342
454
|
});
|
343
455
|
|
456
|
+
element_content_handlers.extend(sanitizer_initial_element_content_handlers);
|
457
|
+
|
458
|
+
Self::run_rewrite(
|
459
|
+
self,
|
460
|
+
sanitizer_document_content_handlers,
|
461
|
+
element_content_handlers,
|
462
|
+
html.as_bytes(),
|
463
|
+
)
|
464
|
+
}
|
465
|
+
|
466
|
+
fn run_rewrite(
|
467
|
+
&self,
|
468
|
+
document_content_handlers: Vec<DocumentContentHandlers>,
|
469
|
+
element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)>,
|
470
|
+
html: &[u8],
|
471
|
+
) -> Result<Vec<u8>, magnus::Error> {
|
472
|
+
let binding = &self.0.borrow();
|
344
473
|
let mut output = vec![];
|
345
474
|
{
|
346
475
|
let mut rewriter = HtmlRewriter::new(
|
347
476
|
Settings {
|
477
|
+
document_content_handlers,
|
348
478
|
element_content_handlers,
|
479
|
+
memory_settings: Self::get_memory_options(binding),
|
349
480
|
..Settings::default()
|
350
481
|
},
|
351
482
|
|c: &[u8]| output.extend_from_slice(c),
|
352
483
|
);
|
353
|
-
match rewriter.write(html
|
484
|
+
match rewriter.write(html) {
|
354
485
|
Ok(_) => {}
|
355
486
|
Err(err) => {
|
356
487
|
return Err(magnus::Error::new(
|
@@ -364,10 +495,12 @@ impl SelmaRewriter {
|
|
364
495
|
}
|
365
496
|
|
366
497
|
fn process_element_handlers(
|
367
|
-
|
498
|
+
obj_rb_handler: ObjectValue,
|
368
499
|
element: &mut Element,
|
369
500
|
ancestors: &[String],
|
370
501
|
) -> Result<(), magnus::Error> {
|
502
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
503
|
+
|
371
504
|
// if `on_end_tag` function is defined, call it
|
372
505
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
373
506
|
// TODO: error here is an "EndTagError"
|
@@ -394,9 +527,11 @@ impl SelmaRewriter {
|
|
394
527
|
}
|
395
528
|
|
396
529
|
fn process_text_handlers(
|
397
|
-
|
530
|
+
obj_rb_handler: ObjectValue,
|
398
531
|
text_chunk: &mut TextChunk,
|
399
532
|
) -> Result<(), magnus::Error> {
|
533
|
+
let rb_handler = Ruby::get().unwrap().get_inner(obj_rb_handler.inner);
|
534
|
+
|
400
535
|
// prevents missing `handle_text_chunk` function
|
401
536
|
let content = text_chunk.as_str();
|
402
537
|
|
@@ -414,6 +549,22 @@ impl SelmaRewriter {
|
|
414
549
|
)),
|
415
550
|
}
|
416
551
|
}
|
552
|
+
|
553
|
+
fn get_memory_options(binding: &Ref<Rewriter>) -> MemorySettings {
|
554
|
+
let options = &binding.options.memory_options;
|
555
|
+
MemorySettings {
|
556
|
+
max_allowed_memory_usage: options.max_allowed_memory_usage,
|
557
|
+
preallocated_parsing_buffer_size: options.preallocated_parsing_buffer_size,
|
558
|
+
}
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
impl RewriterOptions {
|
563
|
+
pub fn new() -> Self {
|
564
|
+
Self {
|
565
|
+
memory_options: MemorySettings::default(),
|
566
|
+
}
|
567
|
+
}
|
417
568
|
}
|
418
569
|
|
419
570
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -211,20 +211,23 @@ impl SelmaSanitizer {
|
|
211
211
|
}
|
212
212
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
213
213
|
}
|
214
|
-
} else if allowed_protocol.is_kind_of(class::symbol())
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
214
|
+
} else if allowed_protocol.is_kind_of(class::symbol()) {
|
215
|
+
let protocol_config = allowed_protocol.inspect();
|
216
|
+
if protocol_config == ":relative" {
|
217
|
+
match protocol_list {
|
218
|
+
None => {
|
219
|
+
protocol_sanitizers.insert(
|
220
|
+
attr_name.to_string(),
|
221
|
+
vec!["#".to_string(), "/".to_string()],
|
222
|
+
);
|
223
|
+
}
|
224
|
+
Some(protocol_list) => {
|
225
|
+
protocol_list.push("#".to_string());
|
226
|
+
protocol_list.push("/".to_string());
|
227
|
+
}
|
227
228
|
}
|
229
|
+
} else if protocol_config == ":all" {
|
230
|
+
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
|
228
231
|
}
|
229
232
|
}
|
230
233
|
}
|
@@ -335,7 +338,7 @@ impl SelmaSanitizer {
|
|
335
338
|
element: &mut Element,
|
336
339
|
element_sanitizer: &ElementSanitizer,
|
337
340
|
attr_name: &String,
|
338
|
-
attr_val: &
|
341
|
+
attr_val: &str,
|
339
342
|
) -> Result<bool, AttributeNameError> {
|
340
343
|
let mut allowed: bool = false;
|
341
344
|
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
@@ -387,7 +390,11 @@ impl SelmaSanitizer {
|
|
387
390
|
attr_val.contains("://")
|
388
391
|
}
|
389
392
|
|
390
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &
|
393
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
|
394
|
+
if protocols_allowed.contains(&"all".to_string()) {
|
395
|
+
return true;
|
396
|
+
}
|
397
|
+
|
391
398
|
// FIXME: is there a more idiomatic way to do this?
|
392
399
|
let mut pos: usize = 0;
|
393
400
|
let mut chars = attr_val.chars();
|
@@ -542,7 +549,7 @@ impl SelmaSanitizer {
|
|
542
549
|
) -> &'a mut ElementSanitizer {
|
543
550
|
element_sanitizers
|
544
551
|
.entry(element_name.to_string())
|
545
|
-
.
|
552
|
+
.or_default()
|
546
553
|
}
|
547
554
|
}
|
548
555
|
|
data/lib/selma/config.rb
ADDED
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -66,7 +66,12 @@ module Selma
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def allow_protocol(element, attr, protos)
|
69
|
-
|
69
|
+
if protos.is_a?(Array)
|
70
|
+
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
71
|
+
else
|
72
|
+
protos = [protos]
|
73
|
+
end
|
74
|
+
|
70
75
|
set_allowed_protocols(element, attr, protos)
|
71
76
|
end
|
72
77
|
|
data/lib/selma/version.rb
CHANGED