selma 0.0.7-x86_64-darwin → 0.1.4-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/selma/3.1/selma.bundle +0 -0
- data/lib/selma/3.2/selma.bundle +0 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -46
- data/ext/selma/Cargo.toml +0 -14
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/extconf.rb +0 -6
- data/ext/selma/src/html/element.rs +0 -254
- data/ext/selma/src/html/end_tag.rs +0 -35
- data/ext/selma/src/html/text_chunk.rs +0 -113
- data/ext/selma/src/html.rs +0 -19
- data/ext/selma/src/lib.rs +0 -50
- data/ext/selma/src/native_ref_wrap.rs +0 -79
- data/ext/selma/src/rewriter.rs +0 -429
- data/ext/selma/src/sanitizer.rs +0 -607
- data/ext/selma/src/selector.rs +0 -112
- data/ext/selma/src/tags.rs +0 -1136
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
data/ext/selma/src/sanitizer.rs
DELETED
@@ -1,607 +0,0 @@
|
|
1
|
-
use std::{borrow::BorrowMut, collections::HashMap};
|
2
|
-
|
3
|
-
use lol_html::{
|
4
|
-
errors::AttributeNameError,
|
5
|
-
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
|
-
};
|
7
|
-
use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
|
8
|
-
|
9
|
-
#[derive(Clone, Debug)]
|
10
|
-
struct ElementSanitizer {
|
11
|
-
allowed_attrs: Vec<String>,
|
12
|
-
required_attrs: Vec<String>,
|
13
|
-
allowed_classes: Vec<String>,
|
14
|
-
protocol_sanitizers: HashMap<String, Vec<String>>,
|
15
|
-
}
|
16
|
-
|
17
|
-
impl Default for ElementSanitizer {
|
18
|
-
fn default() -> Self {
|
19
|
-
ElementSanitizer {
|
20
|
-
allowed_attrs: vec![],
|
21
|
-
allowed_classes: vec![],
|
22
|
-
required_attrs: vec![],
|
23
|
-
|
24
|
-
protocol_sanitizers: HashMap::new(),
|
25
|
-
}
|
26
|
-
}
|
27
|
-
}
|
28
|
-
|
29
|
-
#[derive(Clone, Debug)]
|
30
|
-
pub struct Sanitizer {
|
31
|
-
flags: [u8; crate::tags::Tag::TAG_COUNT],
|
32
|
-
allowed_attrs: Vec<String>,
|
33
|
-
allowed_classes: Vec<String>,
|
34
|
-
element_sanitizers: HashMap<String, ElementSanitizer>,
|
35
|
-
|
36
|
-
pub escape_tagfilter: bool,
|
37
|
-
pub allow_comments: bool,
|
38
|
-
pub allow_doctype: bool,
|
39
|
-
config: RHash,
|
40
|
-
}
|
41
|
-
|
42
|
-
#[derive(Clone, Debug)]
|
43
|
-
#[magnus::wrap(class = "Selma::Sanitizer")]
|
44
|
-
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
45
|
-
|
46
|
-
impl SelmaSanitizer {
|
47
|
-
const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
|
48
|
-
// const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
49
|
-
const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
|
50
|
-
const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
|
51
|
-
|
52
|
-
pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
|
53
|
-
let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
|
54
|
-
let (opt_config,): (Option<RHash>,) = args.optional;
|
55
|
-
|
56
|
-
let config = match opt_config {
|
57
|
-
Some(config) => config,
|
58
|
-
// TODO: this seems like a hack to fix?
|
59
|
-
None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
|
60
|
-
};
|
61
|
-
|
62
|
-
let mut element_sanitizers = HashMap::new();
|
63
|
-
crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
|
64
|
-
let es = ElementSanitizer::default();
|
65
|
-
element_sanitizers.insert(
|
66
|
-
crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
|
67
|
-
es,
|
68
|
-
);
|
69
|
-
});
|
70
|
-
|
71
|
-
Ok(Self(std::cell::RefCell::new(Sanitizer {
|
72
|
-
flags: [0; crate::tags::Tag::TAG_COUNT],
|
73
|
-
allowed_attrs: vec![],
|
74
|
-
allowed_classes: vec![],
|
75
|
-
element_sanitizers,
|
76
|
-
|
77
|
-
escape_tagfilter: true,
|
78
|
-
allow_comments: false,
|
79
|
-
allow_doctype: true,
|
80
|
-
config,
|
81
|
-
})))
|
82
|
-
}
|
83
|
-
|
84
|
-
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
85
|
-
let binding = self.0.borrow();
|
86
|
-
|
87
|
-
Ok(binding.config)
|
88
|
-
}
|
89
|
-
|
90
|
-
/// Toggle a sanitizer option on or off.
|
91
|
-
fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
|
92
|
-
let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
|
93
|
-
if set {
|
94
|
-
self.0.borrow_mut().flags[tag.index] |= flag;
|
95
|
-
} else {
|
96
|
-
self.0.borrow_mut().flags[tag.index] &= !flag;
|
97
|
-
}
|
98
|
-
}
|
99
|
-
|
100
|
-
/// Toggles all sanitization options on or off.
|
101
|
-
fn set_all_flags(&self, flag: u8, set: bool) {
|
102
|
-
if set {
|
103
|
-
crate::tags::Tag::html_tags()
|
104
|
-
.iter()
|
105
|
-
.enumerate()
|
106
|
-
.for_each(|(iter, _)| {
|
107
|
-
self.0.borrow_mut().flags[iter] |= flag;
|
108
|
-
});
|
109
|
-
} else {
|
110
|
-
crate::tags::Tag::html_tags()
|
111
|
-
.iter()
|
112
|
-
.enumerate()
|
113
|
-
.for_each(|(iter, _)| {
|
114
|
-
self.0.borrow_mut().flags[iter] &= flag;
|
115
|
-
});
|
116
|
-
}
|
117
|
-
}
|
118
|
-
|
119
|
-
/// Whether or not to keep dangerous HTML tags.
|
120
|
-
fn set_escape_tagfilter(&self, allow: bool) -> bool {
|
121
|
-
self.0.borrow_mut().escape_tagfilter = allow;
|
122
|
-
allow
|
123
|
-
}
|
124
|
-
|
125
|
-
pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
|
126
|
-
if self.0.borrow().escape_tagfilter {
|
127
|
-
let tag = crate::tags::Tag::tag_from_element(e);
|
128
|
-
if crate::tags::Tag::is_tag_escapeworthy(tag) {
|
129
|
-
e.remove();
|
130
|
-
return true;
|
131
|
-
}
|
132
|
-
}
|
133
|
-
|
134
|
-
false
|
135
|
-
}
|
136
|
-
|
137
|
-
pub fn get_escape_tagfilter(&self) -> bool {
|
138
|
-
self.0.borrow().escape_tagfilter
|
139
|
-
}
|
140
|
-
|
141
|
-
/// Whether or not to keep HTML comments.
|
142
|
-
fn set_allow_comments(&self, allow: bool) -> bool {
|
143
|
-
self.0.borrow_mut().allow_comments = allow;
|
144
|
-
allow
|
145
|
-
}
|
146
|
-
|
147
|
-
pub fn get_allow_comments(&self) -> bool {
|
148
|
-
self.0.borrow().allow_comments
|
149
|
-
}
|
150
|
-
|
151
|
-
pub fn remove_comment(&self, c: &mut Comment) {
|
152
|
-
c.remove();
|
153
|
-
}
|
154
|
-
|
155
|
-
/// Whether or not to keep HTML doctype.
|
156
|
-
fn set_allow_doctype(&self, allow: bool) -> bool {
|
157
|
-
self.0.borrow_mut().allow_doctype = allow;
|
158
|
-
allow
|
159
|
-
}
|
160
|
-
|
161
|
-
/// Whether or not to keep HTML doctype.
|
162
|
-
pub fn get_allow_doctype(&self) -> bool {
|
163
|
-
self.0.borrow().allow_doctype
|
164
|
-
}
|
165
|
-
|
166
|
-
pub fn remove_doctype(&self, d: &mut Doctype) {
|
167
|
-
d.remove();
|
168
|
-
}
|
169
|
-
|
170
|
-
fn set_allowed_attribute(&self, eln: Value, attr_name: String, allow: bool) -> bool {
|
171
|
-
let mut binding = self.0.borrow_mut();
|
172
|
-
|
173
|
-
let element_name = eln.to_r_string().unwrap().to_string().unwrap();
|
174
|
-
if element_name == "all" {
|
175
|
-
let allowed_attrs = &mut binding.allowed_attrs;
|
176
|
-
Self::set_allowed(allowed_attrs, &attr_name, allow);
|
177
|
-
} else {
|
178
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
179
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
180
|
-
|
181
|
-
element_sanitizer.allowed_attrs.push(attr_name);
|
182
|
-
}
|
183
|
-
|
184
|
-
allow
|
185
|
-
}
|
186
|
-
|
187
|
-
fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
|
188
|
-
let mut binding = self.0.borrow_mut();
|
189
|
-
if element_name == "all" {
|
190
|
-
let allowed_classes = &mut binding.allowed_classes;
|
191
|
-
Self::set_allowed(allowed_classes, &class_name, allow);
|
192
|
-
} else {
|
193
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
194
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
195
|
-
|
196
|
-
let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
|
197
|
-
Self::set_allowed(allowed_classes, &class_name, allow)
|
198
|
-
}
|
199
|
-
allow
|
200
|
-
}
|
201
|
-
|
202
|
-
fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
|
203
|
-
let mut binding = self.0.borrow_mut();
|
204
|
-
|
205
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
206
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
207
|
-
|
208
|
-
let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
|
209
|
-
|
210
|
-
for opt_allowed_protocol in allow_list.each() {
|
211
|
-
let allowed_protocol = opt_allowed_protocol.unwrap();
|
212
|
-
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
213
|
-
if allowed_protocol.is_kind_of(class::string()) {
|
214
|
-
match protocol_list {
|
215
|
-
None => {
|
216
|
-
protocol_sanitizers
|
217
|
-
.insert(attr_name.to_string(), vec![allowed_protocol.to_string()]);
|
218
|
-
}
|
219
|
-
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
220
|
-
}
|
221
|
-
} else if allowed_protocol.is_kind_of(class::symbol())
|
222
|
-
&& allowed_protocol.inspect() == ":relative"
|
223
|
-
{
|
224
|
-
match protocol_list {
|
225
|
-
None => {
|
226
|
-
protocol_sanitizers.insert(
|
227
|
-
attr_name.to_string(),
|
228
|
-
vec!["#".to_string(), "/".to_string()],
|
229
|
-
);
|
230
|
-
}
|
231
|
-
Some(protocol_list) => {
|
232
|
-
protocol_list.push("#".to_string());
|
233
|
-
protocol_list.push("/".to_string());
|
234
|
-
}
|
235
|
-
}
|
236
|
-
}
|
237
|
-
}
|
238
|
-
}
|
239
|
-
|
240
|
-
fn set_allowed(set: &mut Vec<String>, attr_name: &String, allow: bool) {
|
241
|
-
if allow {
|
242
|
-
set.push(attr_name.to_string());
|
243
|
-
} else if set.contains(attr_name) {
|
244
|
-
set.swap_remove(set.iter().position(|x| x == attr_name).unwrap());
|
245
|
-
}
|
246
|
-
}
|
247
|
-
|
248
|
-
pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
|
249
|
-
let tag = crate::tags::Tag::tag_from_element(element);
|
250
|
-
let tag_name = &element.tag_name();
|
251
|
-
let element_sanitizer = {
|
252
|
-
let mut binding = self.0.borrow_mut();
|
253
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
254
|
-
Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
|
255
|
-
};
|
256
|
-
|
257
|
-
let binding = self.0.borrow();
|
258
|
-
|
259
|
-
// FIXME: This is a hack to get around the fact that we can't borrow
|
260
|
-
let attribute_map: HashMap<String, String> = element
|
261
|
-
.attributes()
|
262
|
-
.iter()
|
263
|
-
.map(|a| (a.name(), a.value()))
|
264
|
-
.collect();
|
265
|
-
|
266
|
-
for (attr_name, attr_val) in attribute_map.iter() {
|
267
|
-
// you can actually embed <!-- ... --> inside
|
268
|
-
// an HTML tag to pass malicious data. If this is
|
269
|
-
// encountered, remove the entire element to be safe.
|
270
|
-
if attr_name.starts_with("<!--") {
|
271
|
-
Self::force_remove_element(self, element);
|
272
|
-
return Ok(());
|
273
|
-
}
|
274
|
-
|
275
|
-
// first, trim leading spaces and unescape any encodings
|
276
|
-
let trimmed = attr_val.trim_start();
|
277
|
-
let x = escapist::unescape_html(trimmed.as_bytes());
|
278
|
-
let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
|
279
|
-
|
280
|
-
let should_keep_attrubute = match Self::should_keep_attribute(
|
281
|
-
&binding,
|
282
|
-
element,
|
283
|
-
&element_sanitizer,
|
284
|
-
attr_name,
|
285
|
-
&unescaped_attr_val,
|
286
|
-
) {
|
287
|
-
Ok(should_keep) => should_keep,
|
288
|
-
Err(e) => {
|
289
|
-
return Err(e);
|
290
|
-
}
|
291
|
-
};
|
292
|
-
|
293
|
-
if !should_keep_attrubute {
|
294
|
-
element.remove_attribute(attr_name);
|
295
|
-
} else {
|
296
|
-
// Prevent the use of `<meta>` elements that set a charset other than UTF-8,
|
297
|
-
// since output is always UTF-8.
|
298
|
-
if crate::tags::Tag::is_meta(tag) {
|
299
|
-
if attr_name == "charset" && unescaped_attr_val != "utf-8" {
|
300
|
-
match element.set_attribute(attr_name, "utf-8") {
|
301
|
-
Ok(_) => {}
|
302
|
-
Err(err) => {
|
303
|
-
return Err(err);
|
304
|
-
}
|
305
|
-
}
|
306
|
-
}
|
307
|
-
} else if !unescaped_attr_val.is_empty() {
|
308
|
-
let mut buf = String::new();
|
309
|
-
// ...then, escape any special characters, for security
|
310
|
-
if attr_name == "href" {
|
311
|
-
escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
|
312
|
-
} else {
|
313
|
-
escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
|
314
|
-
};
|
315
|
-
|
316
|
-
match element.set_attribute(attr_name, &buf) {
|
317
|
-
Ok(_) => {}
|
318
|
-
Err(err) => {
|
319
|
-
return Err(err);
|
320
|
-
}
|
321
|
-
}
|
322
|
-
}
|
323
|
-
}
|
324
|
-
}
|
325
|
-
|
326
|
-
let required = &element_sanitizer.required_attrs;
|
327
|
-
if required.contains(&"*".to_string()) {
|
328
|
-
return Ok(());
|
329
|
-
}
|
330
|
-
for attr in element.attributes().iter() {
|
331
|
-
let attr_name = &attr.name();
|
332
|
-
if required.contains(attr_name) {
|
333
|
-
return Ok(());
|
334
|
-
}
|
335
|
-
}
|
336
|
-
|
337
|
-
Ok(())
|
338
|
-
}
|
339
|
-
|
340
|
-
fn should_keep_attribute(
|
341
|
-
binding: &Sanitizer,
|
342
|
-
element: &mut Element,
|
343
|
-
element_sanitizer: &ElementSanitizer,
|
344
|
-
attr_name: &String,
|
345
|
-
attr_val: &String,
|
346
|
-
) -> Result<bool, AttributeNameError> {
|
347
|
-
let mut allowed: bool = false;
|
348
|
-
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
349
|
-
let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
|
350
|
-
|
351
|
-
if element_allowed_attrs {
|
352
|
-
allowed = true;
|
353
|
-
}
|
354
|
-
|
355
|
-
if !allowed && sanitizer_allowed_attrs {
|
356
|
-
allowed = true;
|
357
|
-
}
|
358
|
-
|
359
|
-
if !allowed {
|
360
|
-
return Ok(false);
|
361
|
-
}
|
362
|
-
|
363
|
-
let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
|
364
|
-
match protocol_sanitizer_values {
|
365
|
-
None => {
|
366
|
-
// has a protocol, but no sanitization list
|
367
|
-
if !attr_val.is_empty() && Self::has_protocol(attr_val) {
|
368
|
-
return Ok(false);
|
369
|
-
}
|
370
|
-
}
|
371
|
-
Some(protocol_sanitizer_values) => {
|
372
|
-
if !attr_val.is_empty()
|
373
|
-
&& !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
|
374
|
-
{
|
375
|
-
return Ok(false);
|
376
|
-
}
|
377
|
-
}
|
378
|
-
}
|
379
|
-
|
380
|
-
if attr_name == "class" {
|
381
|
-
return Self::sanitize_class_attribute(
|
382
|
-
binding,
|
383
|
-
element,
|
384
|
-
element_sanitizer,
|
385
|
-
attr_name,
|
386
|
-
attr_val,
|
387
|
-
);
|
388
|
-
}
|
389
|
-
|
390
|
-
Ok(true)
|
391
|
-
}
|
392
|
-
|
393
|
-
fn has_protocol(attr_val: &str) -> bool {
|
394
|
-
attr_val.contains("://")
|
395
|
-
}
|
396
|
-
|
397
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
|
398
|
-
// FIXME: is there a more idiomatic way to do this?
|
399
|
-
let mut pos: usize = 0;
|
400
|
-
let mut chars = attr_val.chars();
|
401
|
-
let len = attr_val.len();
|
402
|
-
|
403
|
-
for (i, c) in attr_val.chars().enumerate() {
|
404
|
-
if c != ':' && c != '/' && c != '#' && pos + 1 < len {
|
405
|
-
pos = i + 1;
|
406
|
-
} else {
|
407
|
-
break;
|
408
|
-
}
|
409
|
-
}
|
410
|
-
|
411
|
-
let char = chars.nth(pos).unwrap();
|
412
|
-
|
413
|
-
if char == '/' {
|
414
|
-
return protocols_allowed.contains(&"/".to_string());
|
415
|
-
}
|
416
|
-
|
417
|
-
if char == '#' {
|
418
|
-
return protocols_allowed.contains(&"#".to_string());
|
419
|
-
}
|
420
|
-
|
421
|
-
// Allow protocol name to be case-insensitive
|
422
|
-
let protocol = attr_val[0..pos].to_lowercase();
|
423
|
-
|
424
|
-
protocols_allowed.contains(&protocol.to_lowercase())
|
425
|
-
}
|
426
|
-
|
427
|
-
fn sanitize_class_attribute(
|
428
|
-
binding: &Sanitizer,
|
429
|
-
element: &mut Element,
|
430
|
-
element_sanitizer: &ElementSanitizer,
|
431
|
-
attr_name: &str,
|
432
|
-
attr_val: &str,
|
433
|
-
) -> Result<bool, lol_html::errors::AttributeNameError> {
|
434
|
-
let allowed_global = &binding.allowed_classes;
|
435
|
-
|
436
|
-
let mut valid_classes: Vec<String> = vec![];
|
437
|
-
|
438
|
-
let allowed_local = &element_sanitizer.allowed_classes;
|
439
|
-
|
440
|
-
// No class filters, so everything goes through
|
441
|
-
if allowed_global.is_empty() && allowed_local.is_empty() {
|
442
|
-
return Ok(true);
|
443
|
-
}
|
444
|
-
|
445
|
-
let attr_value = attr_val.trim_start();
|
446
|
-
attr_value
|
447
|
-
.split_whitespace()
|
448
|
-
.map(|s| s.to_string())
|
449
|
-
.for_each(|class| {
|
450
|
-
if allowed_global.contains(&class) || allowed_local.contains(&class) {
|
451
|
-
valid_classes.push(class);
|
452
|
-
}
|
453
|
-
});
|
454
|
-
|
455
|
-
if valid_classes.is_empty() {
|
456
|
-
return Ok(false);
|
457
|
-
}
|
458
|
-
|
459
|
-
match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
|
460
|
-
Ok(_) => Ok(true),
|
461
|
-
Err(err) => Err(err),
|
462
|
-
}
|
463
|
-
}
|
464
|
-
|
465
|
-
pub fn allow_element(&self, element: &mut Element) -> bool {
|
466
|
-
let tag = crate::tags::Tag::tag_from_element(element);
|
467
|
-
let flags: u8 = self.0.borrow().flags[tag.index];
|
468
|
-
|
469
|
-
(flags & Self::SELMA_SANITIZER_ALLOW) == 0
|
470
|
-
}
|
471
|
-
|
472
|
-
pub fn try_remove_element(&self, element: &mut Element) -> bool {
|
473
|
-
let tag = crate::tags::Tag::tag_from_element(element);
|
474
|
-
let flags: u8 = self.0.borrow().flags[tag.index];
|
475
|
-
|
476
|
-
let should_remove = !element.removed() && self.allow_element(element);
|
477
|
-
|
478
|
-
if should_remove {
|
479
|
-
if crate::tags::Tag::has_text_content(tag) {
|
480
|
-
Self::remove_element(
|
481
|
-
element,
|
482
|
-
tag.self_closing,
|
483
|
-
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
484
|
-
);
|
485
|
-
} else {
|
486
|
-
Self::remove_element(element, tag.self_closing, flags);
|
487
|
-
}
|
488
|
-
|
489
|
-
Self::check_if_end_tag_needs_removal(element);
|
490
|
-
} else {
|
491
|
-
// anything in <iframe> must be removed, if it's kept
|
492
|
-
if crate::tags::Tag::is_iframe(tag) {
|
493
|
-
if self.0.borrow().flags[tag.index] != 0 {
|
494
|
-
element.set_inner_content(" ", ContentType::Text);
|
495
|
-
} else {
|
496
|
-
element.set_inner_content("", ContentType::Text);
|
497
|
-
}
|
498
|
-
}
|
499
|
-
}
|
500
|
-
|
501
|
-
should_remove
|
502
|
-
}
|
503
|
-
|
504
|
-
fn remove_element(element: &mut Element, self_closing: bool, flags: u8) {
|
505
|
-
let wrap_whitespace = (flags & Self::SELMA_SANITIZER_WRAP_WHITESPACE) != 0;
|
506
|
-
let remove_contents = (flags & Self::SELMA_SANITIZER_REMOVE_CONTENTS) != 0;
|
507
|
-
|
508
|
-
if remove_contents {
|
509
|
-
element.remove();
|
510
|
-
} else {
|
511
|
-
if wrap_whitespace {
|
512
|
-
if self_closing {
|
513
|
-
element.after(" ", ContentType::Text);
|
514
|
-
} else {
|
515
|
-
element.before(" ", ContentType::Text);
|
516
|
-
element.after(" ", ContentType::Text);
|
517
|
-
}
|
518
|
-
}
|
519
|
-
element.remove_and_keep_content();
|
520
|
-
}
|
521
|
-
}
|
522
|
-
|
523
|
-
pub fn force_remove_element(&self, element: &mut Element) {
|
524
|
-
let tag = crate::tags::Tag::tag_from_element(element);
|
525
|
-
let self_closing = tag.self_closing;
|
526
|
-
Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
|
527
|
-
Self::check_if_end_tag_needs_removal(element);
|
528
|
-
}
|
529
|
-
|
530
|
-
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
531
|
-
if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
|
532
|
-
element
|
533
|
-
.on_end_tag(move |end| {
|
534
|
-
Self::remove_end_tag(end);
|
535
|
-
Ok(())
|
536
|
-
})
|
537
|
-
.unwrap();
|
538
|
-
}
|
539
|
-
}
|
540
|
-
|
541
|
-
fn remove_end_tag(end_tag: &mut EndTag) {
|
542
|
-
end_tag.remove();
|
543
|
-
}
|
544
|
-
|
545
|
-
fn get_element_sanitizer<'a>(
|
546
|
-
element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
|
547
|
-
element_name: &str,
|
548
|
-
) -> &'a mut ElementSanitizer {
|
549
|
-
element_sanitizers
|
550
|
-
.entry(element_name.to_string())
|
551
|
-
.or_insert_with(ElementSanitizer::default)
|
552
|
-
}
|
553
|
-
}
|
554
|
-
|
555
|
-
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
556
|
-
let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
|
557
|
-
|
558
|
-
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
559
|
-
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
560
|
-
|
561
|
-
c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
|
562
|
-
c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
|
563
|
-
|
564
|
-
c_sanitizer.define_method(
|
565
|
-
"set_escape_tagfilter",
|
566
|
-
method!(SelmaSanitizer::set_escape_tagfilter, 1),
|
567
|
-
)?;
|
568
|
-
c_sanitizer.define_method(
|
569
|
-
"escape_tagfilter",
|
570
|
-
method!(SelmaSanitizer::get_escape_tagfilter, 0),
|
571
|
-
)?;
|
572
|
-
|
573
|
-
c_sanitizer.define_method(
|
574
|
-
"set_allow_comments",
|
575
|
-
method!(SelmaSanitizer::set_allow_comments, 1),
|
576
|
-
)?;
|
577
|
-
c_sanitizer.define_method(
|
578
|
-
"allow_comments",
|
579
|
-
method!(SelmaSanitizer::get_allow_comments, 0),
|
580
|
-
)?;
|
581
|
-
|
582
|
-
c_sanitizer.define_method(
|
583
|
-
"set_allow_doctype",
|
584
|
-
method!(SelmaSanitizer::set_allow_doctype, 1),
|
585
|
-
)?;
|
586
|
-
c_sanitizer.define_method(
|
587
|
-
"allow_doctype",
|
588
|
-
method!(SelmaSanitizer::get_allow_doctype, 0),
|
589
|
-
)?;
|
590
|
-
|
591
|
-
c_sanitizer.define_method(
|
592
|
-
"set_allowed_attribute",
|
593
|
-
method!(SelmaSanitizer::set_allowed_attribute, 3),
|
594
|
-
)?;
|
595
|
-
|
596
|
-
c_sanitizer.define_method(
|
597
|
-
"set_allowed_class",
|
598
|
-
method!(SelmaSanitizer::set_allowed_class, 3),
|
599
|
-
)?;
|
600
|
-
|
601
|
-
c_sanitizer.define_method(
|
602
|
-
"set_allowed_protocols",
|
603
|
-
method!(SelmaSanitizer::set_allowed_protocols, 3),
|
604
|
-
)?;
|
605
|
-
|
606
|
-
Ok(())
|
607
|
-
}
|