selma 0.0.2-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +173 -0
- data/ext/selma/Cargo.toml +14 -0
- data/ext/selma/_util.rb +102 -0
- data/ext/selma/extconf.rb +6 -0
- data/ext/selma/src/html/element.rs +195 -0
- data/ext/selma/src/html/end_tag.rs +35 -0
- data/ext/selma/src/html.rs +17 -0
- data/ext/selma/src/lib.rs +23 -0
- data/ext/selma/src/native_ref_wrap.rs +79 -0
- data/ext/selma/src/rewriter.rs +441 -0
- data/ext/selma/src/sanitizer.rs +578 -0
- data/ext/selma/src/selector.rs +115 -0
- data/ext/selma/src/tags.rs +1133 -0
- data/ext/selma/src/wrapped_struct.rs +92 -0
- data/lib/selma/3.1/selma.bundle +0 -0
- data/lib/selma/extension.rb +14 -0
- data/lib/selma/html.rb +6 -0
- data/lib/selma/rewriter.rb +6 -0
- data/lib/selma/sanitizer/config/basic.rb +27 -0
- data/lib/selma/sanitizer/config/default.rb +42 -0
- data/lib/selma/sanitizer/config/relaxed.rb +37 -0
- data/lib/selma/sanitizer/config/restricted.rb +13 -0
- data/lib/selma/sanitizer/config.rb +67 -0
- data/lib/selma/sanitizer.rb +85 -0
- data/lib/selma/selector.rb +6 -0
- data/lib/selma/version.rb +5 -0
- data/lib/selma.rb +13 -0
- data/selma.gemspec +41 -0
- metadata +136 -0
@@ -0,0 +1,578 @@
|
|
1
|
+
use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
|
2
|
+
|
3
|
+
use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
|
4
|
+
use magnus::{
|
5
|
+
class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
|
6
|
+
Value,
|
7
|
+
};
|
8
|
+
|
9
|
+
use crate::tags::Tag;
|
10
|
+
|
11
|
+
#[derive(Clone, Debug)]
|
12
|
+
struct ElementSanitizer {
|
13
|
+
allowed_attrs: Vec<String>,
|
14
|
+
required_attrs: Vec<String>,
|
15
|
+
allowed_classes: Vec<String>,
|
16
|
+
protocol_sanitizers: HashMap<String, Vec<String>>,
|
17
|
+
}
|
18
|
+
|
19
|
+
#[derive(Clone, Debug)]
|
20
|
+
pub struct Sanitizer {
|
21
|
+
flags: [u8; Tag::TAG_COUNT],
|
22
|
+
allowed_attrs: Vec<String>,
|
23
|
+
allowed_classes: Vec<String>,
|
24
|
+
element_sanitizers: HashMap<String, ElementSanitizer>,
|
25
|
+
|
26
|
+
pub escape_tagfilter: bool,
|
27
|
+
pub allow_comments: bool,
|
28
|
+
pub allow_doctype: bool,
|
29
|
+
config: RHash,
|
30
|
+
}
|
31
|
+
|
32
|
+
#[derive(Clone, Debug)]
|
33
|
+
#[magnus::wrap(class = "Selma::Sanitizer")]
|
34
|
+
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
35
|
+
|
36
|
+
impl SelmaSanitizer {
|
37
|
+
const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
|
38
|
+
const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
39
|
+
const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
|
40
|
+
const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
|
41
|
+
|
42
|
+
pub fn new(arguments: &[Value]) -> Result<Self, Error> {
|
43
|
+
let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
|
44
|
+
let (opt_config,): (Option<RHash>,) = args.optional;
|
45
|
+
|
46
|
+
let config = match opt_config {
|
47
|
+
Some(config) => config,
|
48
|
+
// TODO: this seems like a hack to fix?
|
49
|
+
None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
|
50
|
+
};
|
51
|
+
|
52
|
+
let mut element_sanitizers = HashMap::new();
|
53
|
+
Tag::html_tags().iter().for_each(|html_tag| {
|
54
|
+
let es = ElementSanitizer {
|
55
|
+
allowed_attrs: vec![],
|
56
|
+
allowed_classes: vec![],
|
57
|
+
required_attrs: vec![],
|
58
|
+
|
59
|
+
protocol_sanitizers: HashMap::new(),
|
60
|
+
};
|
61
|
+
element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
|
62
|
+
});
|
63
|
+
|
64
|
+
Ok(Self(std::cell::RefCell::new(Sanitizer {
|
65
|
+
flags: [0; Tag::TAG_COUNT],
|
66
|
+
allowed_attrs: vec![],
|
67
|
+
allowed_classes: vec![],
|
68
|
+
element_sanitizers,
|
69
|
+
|
70
|
+
escape_tagfilter: true,
|
71
|
+
allow_comments: false,
|
72
|
+
allow_doctype: true,
|
73
|
+
config,
|
74
|
+
})))
|
75
|
+
}
|
76
|
+
|
77
|
+
fn get_config(&self) -> Result<RHash, Error> {
|
78
|
+
let binding = self.0.borrow();
|
79
|
+
|
80
|
+
Ok(binding.config)
|
81
|
+
}
|
82
|
+
|
83
|
+
/// Toggle a sanitizer option on or off.
|
84
|
+
fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
|
85
|
+
let tag = Tag::tag_from_tag_name(tag_name.as_str());
|
86
|
+
if set {
|
87
|
+
self.0.borrow_mut().flags[tag.index] |= flag;
|
88
|
+
} else {
|
89
|
+
self.0.borrow_mut().flags[tag.index] &= !flag;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
/// Toggles all sanitization options on or off.
|
94
|
+
fn set_all_flags(&self, flag: u8, set: bool) {
|
95
|
+
if set {
|
96
|
+
Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
|
97
|
+
self.0.borrow_mut().flags[iter] |= flag;
|
98
|
+
});
|
99
|
+
} else {
|
100
|
+
Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
|
101
|
+
self.0.borrow_mut().flags[iter] &= flag;
|
102
|
+
});
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
/// Whether or not to keep dangerous HTML tags.
|
107
|
+
fn set_escape_tagfilter(&self, allow: bool) -> bool {
|
108
|
+
self.0.borrow_mut().escape_tagfilter = allow;
|
109
|
+
allow
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
|
113
|
+
if self.0.borrow().escape_tagfilter {
|
114
|
+
let tag = Tag::tag_from_element(e);
|
115
|
+
if Tag::is_tag_escapeworthy(tag) {
|
116
|
+
e.remove();
|
117
|
+
return true;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
false
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn get_escape_tagfilter(&self) -> bool {
|
125
|
+
self.0.borrow().escape_tagfilter
|
126
|
+
}
|
127
|
+
|
128
|
+
/// Whether or not to keep HTML comments.
|
129
|
+
fn set_allow_comments(&self, allow: bool) -> bool {
|
130
|
+
self.0.borrow_mut().allow_comments = allow;
|
131
|
+
allow
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn get_allow_comments(&self) -> bool {
|
135
|
+
self.0.borrow().allow_comments
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn remove_comment(&self, c: &mut Comment) {
|
139
|
+
c.remove();
|
140
|
+
}
|
141
|
+
|
142
|
+
/// Whether or not to keep HTML doctype.
|
143
|
+
fn set_allow_doctype(&self, allow: bool) -> bool {
|
144
|
+
self.0.borrow_mut().allow_doctype = allow;
|
145
|
+
allow
|
146
|
+
}
|
147
|
+
|
148
|
+
/// Whether or not to keep HTML doctype.
|
149
|
+
pub fn get_allow_doctype(&self) -> bool {
|
150
|
+
self.0.borrow().allow_doctype
|
151
|
+
}
|
152
|
+
|
153
|
+
pub fn remove_doctype(&self, d: &mut Doctype) {
|
154
|
+
d.remove();
|
155
|
+
}
|
156
|
+
|
157
|
+
fn set_allowed_attribute(&self, eln: Value, attr_name: String, allow: bool) -> bool {
|
158
|
+
let mut binding = self.0.borrow_mut();
|
159
|
+
|
160
|
+
let element_name = eln.to_r_string().unwrap().to_string().unwrap();
|
161
|
+
if element_name == "all" {
|
162
|
+
let allowed_attrs = &mut binding.allowed_attrs;
|
163
|
+
Self::set_allowed(allowed_attrs, &attr_name, allow);
|
164
|
+
} else {
|
165
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
166
|
+
|
167
|
+
element_sanitizer.allowed_attrs.push(attr_name);
|
168
|
+
}
|
169
|
+
|
170
|
+
allow
|
171
|
+
}
|
172
|
+
|
173
|
+
fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
|
174
|
+
let mut binding = self.0.borrow_mut();
|
175
|
+
if element_name == "all" {
|
176
|
+
let allowed_classes = &mut binding.allowed_classes;
|
177
|
+
Self::set_allowed(allowed_classes, &class_name, allow);
|
178
|
+
} else {
|
179
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
180
|
+
|
181
|
+
let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
|
182
|
+
Self::set_allowed(allowed_classes, &class_name, allow)
|
183
|
+
}
|
184
|
+
allow
|
185
|
+
}
|
186
|
+
|
187
|
+
fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
|
188
|
+
let mut binding = self.0.borrow_mut();
|
189
|
+
|
190
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
191
|
+
|
192
|
+
let protocol_sanitizers = element_sanitizer.protocol_sanitizers.borrow_mut();
|
193
|
+
|
194
|
+
for opt_allowed_protocol in allow_list.each() {
|
195
|
+
let allowed_protocol = opt_allowed_protocol.unwrap();
|
196
|
+
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
197
|
+
if allowed_protocol.is_kind_of(class::string()) {
|
198
|
+
match protocol_list {
|
199
|
+
None => {
|
200
|
+
protocol_sanitizers
|
201
|
+
.insert(attr_name.to_string(), vec![allowed_protocol.to_string()]);
|
202
|
+
}
|
203
|
+
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
204
|
+
}
|
205
|
+
} else if allowed_protocol.is_kind_of(class::symbol())
|
206
|
+
&& allowed_protocol.inspect() == ":relative"
|
207
|
+
{
|
208
|
+
match protocol_list {
|
209
|
+
None => {
|
210
|
+
protocol_sanitizers.insert(
|
211
|
+
attr_name.to_string(),
|
212
|
+
vec!["#".to_string(), "/".to_string()],
|
213
|
+
);
|
214
|
+
}
|
215
|
+
Some(protocol_list) => {
|
216
|
+
protocol_list.push("#".to_string());
|
217
|
+
protocol_list.push("/".to_string());
|
218
|
+
}
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
fn set_allowed(set: &mut Vec<String>, attr_name: &String, allow: bool) {
|
225
|
+
if allow {
|
226
|
+
set.push(attr_name.to_string());
|
227
|
+
} else if set.contains(attr_name) {
|
228
|
+
set.swap_remove(set.iter().position(|x| x == attr_name).unwrap());
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn sanitize_attributes(&self, element: &mut Element) {
|
233
|
+
let binding = self.0.borrow_mut();
|
234
|
+
let tag = Tag::tag_from_element(element);
|
235
|
+
let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
|
236
|
+
|
237
|
+
// FIXME: This is a hack to get around the fact that we can't borrow
|
238
|
+
let attribute_map: HashMap<String, String> = element
|
239
|
+
.attributes()
|
240
|
+
.iter()
|
241
|
+
.map(|a| (a.name(), a.value()))
|
242
|
+
.collect();
|
243
|
+
|
244
|
+
for (attr_name, attr_val) in attribute_map.iter() {
|
245
|
+
// you can actually embed <!-- ... --> inside
|
246
|
+
// an HTML tag to pass malicious data. If this is
|
247
|
+
// encountered, remove the entire element to be safe.
|
248
|
+
if attr_name.starts_with("<!--") {
|
249
|
+
Self::force_remove_element(self, element);
|
250
|
+
return;
|
251
|
+
}
|
252
|
+
|
253
|
+
// first, trim leading spaces and unescape any encodings
|
254
|
+
let trimmed = attr_val.trim_start();
|
255
|
+
let x = escapist::unescape_html(trimmed.as_bytes());
|
256
|
+
let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
|
257
|
+
|
258
|
+
if !Self::should_keep_attribute(
|
259
|
+
&binding,
|
260
|
+
element,
|
261
|
+
element_sanitizer,
|
262
|
+
attr_name,
|
263
|
+
&unescaped_attr_val,
|
264
|
+
) {
|
265
|
+
element.remove_attribute(attr_name);
|
266
|
+
} else {
|
267
|
+
// Prevent the use of `<meta>` elements that set a charset other than UTF-8,
|
268
|
+
// since output is always UTF-8.
|
269
|
+
if Tag::is_meta(tag) {
|
270
|
+
if attr_name == "charset" && unescaped_attr_val != "utf-8" {
|
271
|
+
element.set_attribute(attr_name, "utf-8");
|
272
|
+
}
|
273
|
+
} else if !unescaped_attr_val.is_empty() {
|
274
|
+
let mut buf = String::new();
|
275
|
+
// ...then, escape any special characters, for security
|
276
|
+
if attr_name == "href" {
|
277
|
+
// FIXME: gross--------------vvvv
|
278
|
+
escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
|
279
|
+
} else {
|
280
|
+
escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
|
281
|
+
};
|
282
|
+
|
283
|
+
element.set_attribute(attr_name, &buf);
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
|
288
|
+
let required = &element_sanitizer.required_attrs;
|
289
|
+
if required.contains(&"*".to_string()) {
|
290
|
+
return;
|
291
|
+
}
|
292
|
+
for attr in element.attributes().iter() {
|
293
|
+
let attr_name = &attr.name();
|
294
|
+
if required.contains(attr_name) {
|
295
|
+
return;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
}
|
299
|
+
|
300
|
+
fn should_keep_attribute(
|
301
|
+
binding: &RefMut<Sanitizer>,
|
302
|
+
element: &mut Element,
|
303
|
+
element_sanitizer: &ElementSanitizer,
|
304
|
+
attr_name: &String,
|
305
|
+
attr_val: &String,
|
306
|
+
) -> bool {
|
307
|
+
let mut allowed: bool = false;
|
308
|
+
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
309
|
+
let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
|
310
|
+
|
311
|
+
if element_allowed_attrs {
|
312
|
+
allowed = true;
|
313
|
+
}
|
314
|
+
|
315
|
+
if !allowed && sanitizer_allowed_attrs {
|
316
|
+
allowed = true;
|
317
|
+
}
|
318
|
+
|
319
|
+
if !allowed {
|
320
|
+
return false;
|
321
|
+
}
|
322
|
+
|
323
|
+
let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
|
324
|
+
match protocol_sanitizer_values {
|
325
|
+
None => {
|
326
|
+
// has a protocol, but no sanitization list
|
327
|
+
if !attr_val.is_empty() && Self::has_protocol(attr_val) {
|
328
|
+
return false;
|
329
|
+
}
|
330
|
+
}
|
331
|
+
Some(protocol_sanitizer_values) => {
|
332
|
+
if !attr_val.is_empty()
|
333
|
+
&& !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
|
334
|
+
{
|
335
|
+
return false;
|
336
|
+
}
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
if attr_name == "class"
|
341
|
+
&& !Self::sanitize_class_attribute(
|
342
|
+
binding,
|
343
|
+
element,
|
344
|
+
element_sanitizer,
|
345
|
+
attr_name,
|
346
|
+
attr_val,
|
347
|
+
)
|
348
|
+
.unwrap()
|
349
|
+
{
|
350
|
+
return false;
|
351
|
+
}
|
352
|
+
|
353
|
+
true
|
354
|
+
}
|
355
|
+
|
356
|
+
fn has_protocol(attr_val: &str) -> bool {
|
357
|
+
attr_val.contains("://")
|
358
|
+
}
|
359
|
+
|
360
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
|
361
|
+
// FIXME: is there a more idiomatic way to do this?
|
362
|
+
let mut pos: usize = 0;
|
363
|
+
let mut chars = attr_val.chars();
|
364
|
+
let len = attr_val.len();
|
365
|
+
|
366
|
+
for (i, c) in attr_val.chars().enumerate() {
|
367
|
+
if c != ':' && c != '/' && c != '#' && pos + 1 < len {
|
368
|
+
pos = i + 1;
|
369
|
+
} else {
|
370
|
+
break;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
let char = chars.nth(pos).unwrap();
|
375
|
+
|
376
|
+
if char == '/' {
|
377
|
+
return protocols_allowed.contains(&"/".to_string());
|
378
|
+
}
|
379
|
+
|
380
|
+
if char == '#' {
|
381
|
+
return protocols_allowed.contains(&"#".to_string());
|
382
|
+
}
|
383
|
+
|
384
|
+
// Allow protocol name to be case-insensitive
|
385
|
+
let protocol = attr_val[0..pos].to_lowercase();
|
386
|
+
|
387
|
+
protocols_allowed.contains(&protocol.to_lowercase())
|
388
|
+
}
|
389
|
+
|
390
|
+
fn sanitize_class_attribute(
|
391
|
+
binding: &RefMut<Sanitizer>,
|
392
|
+
element: &mut Element,
|
393
|
+
element_sanitizer: &ElementSanitizer,
|
394
|
+
attr_name: &str,
|
395
|
+
attr_val: &str,
|
396
|
+
) -> Result<bool, Error> {
|
397
|
+
let allowed_global = &binding.allowed_classes;
|
398
|
+
|
399
|
+
let mut valid_classes: Vec<String> = vec![];
|
400
|
+
|
401
|
+
let allowed_local = &element_sanitizer.allowed_classes;
|
402
|
+
|
403
|
+
// No class filters, so everything goes through
|
404
|
+
if allowed_global.is_empty() && allowed_local.is_empty() {
|
405
|
+
return Ok(true);
|
406
|
+
}
|
407
|
+
|
408
|
+
let attr_value = attr_val.trim_start();
|
409
|
+
attr_value
|
410
|
+
.split_whitespace()
|
411
|
+
.map(|s| s.to_string())
|
412
|
+
.for_each(|class| {
|
413
|
+
if allowed_global.contains(&class) || allowed_local.contains(&class) {
|
414
|
+
valid_classes.push(class);
|
415
|
+
}
|
416
|
+
});
|
417
|
+
|
418
|
+
if valid_classes.is_empty() {
|
419
|
+
return Ok(false);
|
420
|
+
}
|
421
|
+
|
422
|
+
match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
|
423
|
+
Ok(_) => Ok(true),
|
424
|
+
Err(err) => Err(Error::new(
|
425
|
+
exception::runtime_error(),
|
426
|
+
format!("AttributeNameError: {}", err),
|
427
|
+
)),
|
428
|
+
}
|
429
|
+
}
|
430
|
+
|
431
|
+
pub fn allow_element(&self, element: &mut Element) -> bool {
|
432
|
+
let tag = Tag::tag_from_element(element);
|
433
|
+
let flags: u8 = self.0.borrow().flags[tag.index];
|
434
|
+
|
435
|
+
(flags & Self::SELMA_SANITIZER_ALLOW) == 0
|
436
|
+
}
|
437
|
+
|
438
|
+
pub fn try_remove_element(&self, element: &mut Element) -> bool {
|
439
|
+
let tag = Tag::tag_from_element(element);
|
440
|
+
let flags: u8 = self.0.borrow().flags[tag.index];
|
441
|
+
|
442
|
+
let should_remove = !element.removed() && self.allow_element(element);
|
443
|
+
|
444
|
+
if should_remove {
|
445
|
+
if Tag::has_text_content(tag) {
|
446
|
+
Self::remove_element(
|
447
|
+
element,
|
448
|
+
tag.self_closing,
|
449
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
450
|
+
);
|
451
|
+
} else {
|
452
|
+
Self::remove_element(element, tag.self_closing, flags);
|
453
|
+
}
|
454
|
+
|
455
|
+
Self::check_if_end_tag_needs_removal(element);
|
456
|
+
} else {
|
457
|
+
// anything in <iframe> must be removed, if it's kept
|
458
|
+
if Tag::is_iframe(tag) {
|
459
|
+
if self.0.borrow().flags[tag.index] != 0 {
|
460
|
+
element.set_inner_content(" ", ContentType::Text);
|
461
|
+
} else {
|
462
|
+
element.set_inner_content("", ContentType::Text);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
}
|
466
|
+
|
467
|
+
should_remove
|
468
|
+
}
|
469
|
+
|
470
|
+
fn remove_element(element: &mut Element, self_closing: bool, flags: u8) {
|
471
|
+
let wrap_whitespace = (flags & Self::SELMA_SANITIZER_WRAP_WHITESPACE) != 0;
|
472
|
+
let remove_contents = (flags & Self::SELMA_SANITIZER_REMOVE_CONTENTS) != 0;
|
473
|
+
|
474
|
+
if remove_contents {
|
475
|
+
element.remove();
|
476
|
+
} else {
|
477
|
+
if wrap_whitespace {
|
478
|
+
if self_closing {
|
479
|
+
element.after(" ", ContentType::Text);
|
480
|
+
} else {
|
481
|
+
element.before(" ", ContentType::Text);
|
482
|
+
element.after(" ", ContentType::Text);
|
483
|
+
}
|
484
|
+
}
|
485
|
+
element.remove_and_keep_content();
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
pub fn force_remove_element(&self, element: &mut Element) {
|
490
|
+
let tag = Tag::tag_from_element(element);
|
491
|
+
let self_closing = tag.self_closing;
|
492
|
+
Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
|
493
|
+
Self::check_if_end_tag_needs_removal(element);
|
494
|
+
}
|
495
|
+
|
496
|
+
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
497
|
+
if element.removed() && !Tag::tag_from_element(element).self_closing {
|
498
|
+
element
|
499
|
+
.on_end_tag(move |end| {
|
500
|
+
Self::remove_end_tag(end);
|
501
|
+
Ok(())
|
502
|
+
})
|
503
|
+
.unwrap();
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
fn remove_end_tag(end_tag: &mut EndTag) {
|
508
|
+
end_tag.remove();
|
509
|
+
}
|
510
|
+
|
511
|
+
fn get_element_sanitizer<'a>(
|
512
|
+
binding: &'a RefMut<Sanitizer>,
|
513
|
+
element_name: &str,
|
514
|
+
) -> &'a ElementSanitizer {
|
515
|
+
binding.element_sanitizers.get(element_name).unwrap()
|
516
|
+
}
|
517
|
+
|
518
|
+
fn get_mut_element_sanitizer<'a>(
|
519
|
+
binding: &'a mut Sanitizer,
|
520
|
+
element_name: &str,
|
521
|
+
) -> &'a mut ElementSanitizer {
|
522
|
+
binding.element_sanitizers.get_mut(element_name).unwrap()
|
523
|
+
}
|
524
|
+
}
|
525
|
+
|
526
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
527
|
+
let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
|
528
|
+
|
529
|
+
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
530
|
+
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
531
|
+
|
532
|
+
c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
|
533
|
+
c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
|
534
|
+
|
535
|
+
c_sanitizer.define_method(
|
536
|
+
"set_escape_tagfilter",
|
537
|
+
method!(SelmaSanitizer::set_escape_tagfilter, 1),
|
538
|
+
)?;
|
539
|
+
c_sanitizer.define_method(
|
540
|
+
"escape_tagfilter",
|
541
|
+
method!(SelmaSanitizer::get_escape_tagfilter, 0),
|
542
|
+
)?;
|
543
|
+
|
544
|
+
c_sanitizer.define_method(
|
545
|
+
"set_allow_comments",
|
546
|
+
method!(SelmaSanitizer::set_allow_comments, 1),
|
547
|
+
)?;
|
548
|
+
c_sanitizer.define_method(
|
549
|
+
"allow_comments",
|
550
|
+
method!(SelmaSanitizer::get_allow_comments, 0),
|
551
|
+
)?;
|
552
|
+
|
553
|
+
c_sanitizer.define_method(
|
554
|
+
"set_allow_doctype",
|
555
|
+
method!(SelmaSanitizer::set_allow_doctype, 1),
|
556
|
+
)?;
|
557
|
+
c_sanitizer.define_method(
|
558
|
+
"allow_doctype",
|
559
|
+
method!(SelmaSanitizer::get_allow_doctype, 0),
|
560
|
+
)?;
|
561
|
+
|
562
|
+
c_sanitizer.define_method(
|
563
|
+
"set_allowed_attribute",
|
564
|
+
method!(SelmaSanitizer::set_allowed_attribute, 3),
|
565
|
+
)?;
|
566
|
+
|
567
|
+
c_sanitizer.define_method(
|
568
|
+
"set_allowed_class",
|
569
|
+
method!(SelmaSanitizer::set_allowed_class, 3),
|
570
|
+
)?;
|
571
|
+
|
572
|
+
c_sanitizer.define_method(
|
573
|
+
"set_allowed_protocols",
|
574
|
+
method!(SelmaSanitizer::set_allowed_protocols, 3),
|
575
|
+
)?;
|
576
|
+
|
577
|
+
Ok(())
|
578
|
+
}
|
@@ -0,0 +1,115 @@
|
|
1
|
+
use magnus::{exception, function, scan_args, Error, Module, Object, RModule, Value};
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
#[magnus::wrap(class = "Selma::Selector")]
|
5
|
+
pub struct SelmaSelector {
|
6
|
+
match_element: Option<String>,
|
7
|
+
match_text_within: Option<String>,
|
8
|
+
ignore_text_within: Option<Vec<String>>,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl SelmaSelector {
|
12
|
+
fn new(args: &[Value]) -> Result<Self, Error> {
|
13
|
+
let (match_element, match_text_within, rb_ignore_text_within) =
|
14
|
+
Self::scan_parse_args(args)?;
|
15
|
+
|
16
|
+
if match_element.is_none() && match_text_within.is_none() {
|
17
|
+
return Err(Error::new(
|
18
|
+
exception::arg_error(),
|
19
|
+
"Neither `match_element` nor `match_text_within` option given",
|
20
|
+
));
|
21
|
+
}
|
22
|
+
|
23
|
+
// FIXME: not excited about this double parse work (`element!` does it too),
|
24
|
+
// but at least we can bail ASAP if the CSS is invalid
|
25
|
+
if match_element.is_some() {
|
26
|
+
let css = match_element.as_ref().unwrap();
|
27
|
+
if css.parse::<lol_html::Selector>().is_err() {
|
28
|
+
return Err(Error::new(
|
29
|
+
exception::arg_error(),
|
30
|
+
format!("Could not parse `match_element` (`{}`) as valid CSS", css),
|
31
|
+
));
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
if match_text_within.is_some() {
|
36
|
+
let css = match_text_within.as_ref().unwrap();
|
37
|
+
if css.parse::<lol_html::Selector>().is_err() {
|
38
|
+
return Err(Error::new(
|
39
|
+
exception::arg_error(),
|
40
|
+
format!(
|
41
|
+
"Could not parse `match_text_within` (`{}`) as valid CSS",
|
42
|
+
css
|
43
|
+
),
|
44
|
+
));
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
let ignore_text_within = match rb_ignore_text_within {
|
49
|
+
None => None,
|
50
|
+
Some(rb_ignore_text_within) => {
|
51
|
+
let mut ignore_text_within = vec![];
|
52
|
+
rb_ignore_text_within.iter().for_each(|i| {
|
53
|
+
// TODO: test this against malice
|
54
|
+
let ignore_text_within_tag_name = i.to_string();
|
55
|
+
ignore_text_within.push(ignore_text_within_tag_name);
|
56
|
+
});
|
57
|
+
Some(ignore_text_within)
|
58
|
+
}
|
59
|
+
};
|
60
|
+
|
61
|
+
Ok(Self {
|
62
|
+
match_element,
|
63
|
+
match_text_within,
|
64
|
+
ignore_text_within,
|
65
|
+
})
|
66
|
+
}
|
67
|
+
|
68
|
+
#[allow(clippy::let_unit_value)]
|
69
|
+
fn scan_parse_args(
|
70
|
+
args: &[Value],
|
71
|
+
) -> Result<(Option<String>, Option<String>, Option<Vec<String>>), Error> {
|
72
|
+
let args = scan_args::scan_args(args)?;
|
73
|
+
let _: () = args.required;
|
74
|
+
let _: () = args.optional;
|
75
|
+
let _: () = args.splat;
|
76
|
+
let _: () = args.trailing;
|
77
|
+
let _: () = args.block;
|
78
|
+
|
79
|
+
let kw = scan_args::get_kwargs::<
|
80
|
+
_,
|
81
|
+
(),
|
82
|
+
(Option<String>, Option<String>, Option<Vec<String>>),
|
83
|
+
(),
|
84
|
+
>(
|
85
|
+
args.keywords,
|
86
|
+
&[],
|
87
|
+
&["match_element", "match_text_within", "ignore_text_within"],
|
88
|
+
)?;
|
89
|
+
let (match_element, match_text_within, rb_ignore_text_within) = kw.optional;
|
90
|
+
|
91
|
+
Ok((match_element, match_text_within, rb_ignore_text_within))
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn match_element(&self) -> Option<String> {
|
95
|
+
self.match_element.clone()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn match_text_within(&self) -> Option<String> {
|
99
|
+
self.match_text_within.clone()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn ignore_text_within(&self) -> Option<Vec<String>> {
|
103
|
+
self.ignore_text_within.clone()
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
108
|
+
let c_selector = m_selma
|
109
|
+
.define_class("Selector", Default::default())
|
110
|
+
.expect("cannot define class Selma::Selector");
|
111
|
+
|
112
|
+
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
113
|
+
|
114
|
+
Ok(())
|
115
|
+
}
|