selma 0.0.2-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +173 -0
- data/ext/selma/Cargo.toml +14 -0
- data/ext/selma/_util.rb +102 -0
- data/ext/selma/extconf.rb +6 -0
- data/ext/selma/src/html/element.rs +195 -0
- data/ext/selma/src/html/end_tag.rs +35 -0
- data/ext/selma/src/html.rs +17 -0
- data/ext/selma/src/lib.rs +23 -0
- data/ext/selma/src/native_ref_wrap.rs +79 -0
- data/ext/selma/src/rewriter.rs +441 -0
- data/ext/selma/src/sanitizer.rs +578 -0
- data/ext/selma/src/selector.rs +115 -0
- data/ext/selma/src/tags.rs +1133 -0
- data/ext/selma/src/wrapped_struct.rs +92 -0
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/extension.rb +14 -0
- data/lib/selma/html.rb +6 -0
- data/lib/selma/rewriter.rb +6 -0
- data/lib/selma/sanitizer/config/basic.rb +27 -0
- data/lib/selma/sanitizer/config/default.rb +42 -0
- data/lib/selma/sanitizer/config/relaxed.rb +37 -0
- data/lib/selma/sanitizer/config/restricted.rb +13 -0
- data/lib/selma/sanitizer/config.rb +67 -0
- data/lib/selma/sanitizer.rb +85 -0
- data/lib/selma/selector.rb +6 -0
- data/lib/selma/version.rb +5 -0
- data/lib/selma.rb +13 -0
- data/selma.gemspec +41 -0
- metadata +136 -0
@@ -0,0 +1,578 @@
|
|
1
|
+
use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
|
2
|
+
|
3
|
+
use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
|
4
|
+
use magnus::{
|
5
|
+
class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
|
6
|
+
Value,
|
7
|
+
};
|
8
|
+
|
9
|
+
use crate::tags::Tag;
|
10
|
+
|
11
|
+
#[derive(Clone, Debug)]
|
12
|
+
struct ElementSanitizer {
|
13
|
+
allowed_attrs: Vec<String>,
|
14
|
+
required_attrs: Vec<String>,
|
15
|
+
allowed_classes: Vec<String>,
|
16
|
+
protocol_sanitizers: HashMap<String, Vec<String>>,
|
17
|
+
}
|
18
|
+
|
19
|
+
#[derive(Clone, Debug)]
|
20
|
+
pub struct Sanitizer {
|
21
|
+
flags: [u8; Tag::TAG_COUNT],
|
22
|
+
allowed_attrs: Vec<String>,
|
23
|
+
allowed_classes: Vec<String>,
|
24
|
+
element_sanitizers: HashMap<String, ElementSanitizer>,
|
25
|
+
|
26
|
+
pub escape_tagfilter: bool,
|
27
|
+
pub allow_comments: bool,
|
28
|
+
pub allow_doctype: bool,
|
29
|
+
config: RHash,
|
30
|
+
}
|
31
|
+
|
32
|
+
#[derive(Clone, Debug)]
|
33
|
+
#[magnus::wrap(class = "Selma::Sanitizer")]
|
34
|
+
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
35
|
+
|
36
|
+
impl SelmaSanitizer {
|
37
|
+
const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
|
38
|
+
const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
39
|
+
const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
|
40
|
+
const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
|
41
|
+
|
42
|
+
pub fn new(arguments: &[Value]) -> Result<Self, Error> {
|
43
|
+
let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
|
44
|
+
let (opt_config,): (Option<RHash>,) = args.optional;
|
45
|
+
|
46
|
+
let config = match opt_config {
|
47
|
+
Some(config) => config,
|
48
|
+
// TODO: this seems like a hack to fix?
|
49
|
+
None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
|
50
|
+
};
|
51
|
+
|
52
|
+
let mut element_sanitizers = HashMap::new();
|
53
|
+
Tag::html_tags().iter().for_each(|html_tag| {
|
54
|
+
let es = ElementSanitizer {
|
55
|
+
allowed_attrs: vec![],
|
56
|
+
allowed_classes: vec![],
|
57
|
+
required_attrs: vec![],
|
58
|
+
|
59
|
+
protocol_sanitizers: HashMap::new(),
|
60
|
+
};
|
61
|
+
element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
|
62
|
+
});
|
63
|
+
|
64
|
+
Ok(Self(std::cell::RefCell::new(Sanitizer {
|
65
|
+
flags: [0; Tag::TAG_COUNT],
|
66
|
+
allowed_attrs: vec![],
|
67
|
+
allowed_classes: vec![],
|
68
|
+
element_sanitizers,
|
69
|
+
|
70
|
+
escape_tagfilter: true,
|
71
|
+
allow_comments: false,
|
72
|
+
allow_doctype: true,
|
73
|
+
config,
|
74
|
+
})))
|
75
|
+
}
|
76
|
+
|
77
|
+
fn get_config(&self) -> Result<RHash, Error> {
|
78
|
+
let binding = self.0.borrow();
|
79
|
+
|
80
|
+
Ok(binding.config)
|
81
|
+
}
|
82
|
+
|
83
|
+
/// Toggle a sanitizer option on or off.
|
84
|
+
fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
|
85
|
+
let tag = Tag::tag_from_tag_name(tag_name.as_str());
|
86
|
+
if set {
|
87
|
+
self.0.borrow_mut().flags[tag.index] |= flag;
|
88
|
+
} else {
|
89
|
+
self.0.borrow_mut().flags[tag.index] &= !flag;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
/// Toggles all sanitization options on or off.
|
94
|
+
fn set_all_flags(&self, flag: u8, set: bool) {
|
95
|
+
if set {
|
96
|
+
Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
|
97
|
+
self.0.borrow_mut().flags[iter] |= flag;
|
98
|
+
});
|
99
|
+
} else {
|
100
|
+
Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
|
101
|
+
self.0.borrow_mut().flags[iter] &= flag;
|
102
|
+
});
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
/// Whether or not to keep dangerous HTML tags.
|
107
|
+
fn set_escape_tagfilter(&self, allow: bool) -> bool {
|
108
|
+
self.0.borrow_mut().escape_tagfilter = allow;
|
109
|
+
allow
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
|
113
|
+
if self.0.borrow().escape_tagfilter {
|
114
|
+
let tag = Tag::tag_from_element(e);
|
115
|
+
if Tag::is_tag_escapeworthy(tag) {
|
116
|
+
e.remove();
|
117
|
+
return true;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
false
|
122
|
+
}
|
123
|
+
|
124
|
+
pub fn get_escape_tagfilter(&self) -> bool {
|
125
|
+
self.0.borrow().escape_tagfilter
|
126
|
+
}
|
127
|
+
|
128
|
+
/// Whether or not to keep HTML comments.
|
129
|
+
fn set_allow_comments(&self, allow: bool) -> bool {
|
130
|
+
self.0.borrow_mut().allow_comments = allow;
|
131
|
+
allow
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn get_allow_comments(&self) -> bool {
|
135
|
+
self.0.borrow().allow_comments
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn remove_comment(&self, c: &mut Comment) {
|
139
|
+
c.remove();
|
140
|
+
}
|
141
|
+
|
142
|
+
/// Whether or not to keep HTML doctype.
|
143
|
+
fn set_allow_doctype(&self, allow: bool) -> bool {
|
144
|
+
self.0.borrow_mut().allow_doctype = allow;
|
145
|
+
allow
|
146
|
+
}
|
147
|
+
|
148
|
+
/// Whether or not to keep HTML doctype.
|
149
|
+
pub fn get_allow_doctype(&self) -> bool {
|
150
|
+
self.0.borrow().allow_doctype
|
151
|
+
}
|
152
|
+
|
153
|
+
pub fn remove_doctype(&self, d: &mut Doctype) {
|
154
|
+
d.remove();
|
155
|
+
}
|
156
|
+
|
157
|
+
fn set_allowed_attribute(&self, eln: Value, attr_name: String, allow: bool) -> bool {
|
158
|
+
let mut binding = self.0.borrow_mut();
|
159
|
+
|
160
|
+
let element_name = eln.to_r_string().unwrap().to_string().unwrap();
|
161
|
+
if element_name == "all" {
|
162
|
+
let allowed_attrs = &mut binding.allowed_attrs;
|
163
|
+
Self::set_allowed(allowed_attrs, &attr_name, allow);
|
164
|
+
} else {
|
165
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
166
|
+
|
167
|
+
element_sanitizer.allowed_attrs.push(attr_name);
|
168
|
+
}
|
169
|
+
|
170
|
+
allow
|
171
|
+
}
|
172
|
+
|
173
|
+
fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
|
174
|
+
let mut binding = self.0.borrow_mut();
|
175
|
+
if element_name == "all" {
|
176
|
+
let allowed_classes = &mut binding.allowed_classes;
|
177
|
+
Self::set_allowed(allowed_classes, &class_name, allow);
|
178
|
+
} else {
|
179
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
180
|
+
|
181
|
+
let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
|
182
|
+
Self::set_allowed(allowed_classes, &class_name, allow)
|
183
|
+
}
|
184
|
+
allow
|
185
|
+
}
|
186
|
+
|
187
|
+
fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
|
188
|
+
let mut binding = self.0.borrow_mut();
|
189
|
+
|
190
|
+
let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
|
191
|
+
|
192
|
+
let protocol_sanitizers = element_sanitizer.protocol_sanitizers.borrow_mut();
|
193
|
+
|
194
|
+
for opt_allowed_protocol in allow_list.each() {
|
195
|
+
let allowed_protocol = opt_allowed_protocol.unwrap();
|
196
|
+
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
197
|
+
if allowed_protocol.is_kind_of(class::string()) {
|
198
|
+
match protocol_list {
|
199
|
+
None => {
|
200
|
+
protocol_sanitizers
|
201
|
+
.insert(attr_name.to_string(), vec![allowed_protocol.to_string()]);
|
202
|
+
}
|
203
|
+
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
204
|
+
}
|
205
|
+
} else if allowed_protocol.is_kind_of(class::symbol())
|
206
|
+
&& allowed_protocol.inspect() == ":relative"
|
207
|
+
{
|
208
|
+
match protocol_list {
|
209
|
+
None => {
|
210
|
+
protocol_sanitizers.insert(
|
211
|
+
attr_name.to_string(),
|
212
|
+
vec!["#".to_string(), "/".to_string()],
|
213
|
+
);
|
214
|
+
}
|
215
|
+
Some(protocol_list) => {
|
216
|
+
protocol_list.push("#".to_string());
|
217
|
+
protocol_list.push("/".to_string());
|
218
|
+
}
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
fn set_allowed(set: &mut Vec<String>, attr_name: &String, allow: bool) {
|
225
|
+
if allow {
|
226
|
+
set.push(attr_name.to_string());
|
227
|
+
} else if set.contains(attr_name) {
|
228
|
+
set.swap_remove(set.iter().position(|x| x == attr_name).unwrap());
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn sanitize_attributes(&self, element: &mut Element) {
|
233
|
+
let binding = self.0.borrow_mut();
|
234
|
+
let tag = Tag::tag_from_element(element);
|
235
|
+
let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
|
236
|
+
|
237
|
+
// FIXME: This is a hack to get around the fact that we can't borrow
|
238
|
+
let attribute_map: HashMap<String, String> = element
|
239
|
+
.attributes()
|
240
|
+
.iter()
|
241
|
+
.map(|a| (a.name(), a.value()))
|
242
|
+
.collect();
|
243
|
+
|
244
|
+
for (attr_name, attr_val) in attribute_map.iter() {
|
245
|
+
// you can actually embed <!-- ... --> inside
|
246
|
+
// an HTML tag to pass malicious data. If this is
|
247
|
+
// encountered, remove the entire element to be safe.
|
248
|
+
if attr_name.starts_with("<!--") {
|
249
|
+
Self::force_remove_element(self, element);
|
250
|
+
return;
|
251
|
+
}
|
252
|
+
|
253
|
+
// first, trim leading spaces and unescape any encodings
|
254
|
+
let trimmed = attr_val.trim_start();
|
255
|
+
let x = escapist::unescape_html(trimmed.as_bytes());
|
256
|
+
let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
|
257
|
+
|
258
|
+
if !Self::should_keep_attribute(
|
259
|
+
&binding,
|
260
|
+
element,
|
261
|
+
element_sanitizer,
|
262
|
+
attr_name,
|
263
|
+
&unescaped_attr_val,
|
264
|
+
) {
|
265
|
+
element.remove_attribute(attr_name);
|
266
|
+
} else {
|
267
|
+
// Prevent the use of `<meta>` elements that set a charset other than UTF-8,
|
268
|
+
// since output is always UTF-8.
|
269
|
+
if Tag::is_meta(tag) {
|
270
|
+
if attr_name == "charset" && unescaped_attr_val != "utf-8" {
|
271
|
+
element.set_attribute(attr_name, "utf-8");
|
272
|
+
}
|
273
|
+
} else if !unescaped_attr_val.is_empty() {
|
274
|
+
let mut buf = String::new();
|
275
|
+
// ...then, escape any special characters, for security
|
276
|
+
if attr_name == "href" {
|
277
|
+
// FIXME: gross--------------vvvv
|
278
|
+
escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
|
279
|
+
} else {
|
280
|
+
escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
|
281
|
+
};
|
282
|
+
|
283
|
+
element.set_attribute(attr_name, &buf);
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
|
288
|
+
let required = &element_sanitizer.required_attrs;
|
289
|
+
if required.contains(&"*".to_string()) {
|
290
|
+
return;
|
291
|
+
}
|
292
|
+
for attr in element.attributes().iter() {
|
293
|
+
let attr_name = &attr.name();
|
294
|
+
if required.contains(attr_name) {
|
295
|
+
return;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
}
|
299
|
+
|
300
|
+
fn should_keep_attribute(
|
301
|
+
binding: &RefMut<Sanitizer>,
|
302
|
+
element: &mut Element,
|
303
|
+
element_sanitizer: &ElementSanitizer,
|
304
|
+
attr_name: &String,
|
305
|
+
attr_val: &String,
|
306
|
+
) -> bool {
|
307
|
+
let mut allowed: bool = false;
|
308
|
+
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
309
|
+
let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
|
310
|
+
|
311
|
+
if element_allowed_attrs {
|
312
|
+
allowed = true;
|
313
|
+
}
|
314
|
+
|
315
|
+
if !allowed && sanitizer_allowed_attrs {
|
316
|
+
allowed = true;
|
317
|
+
}
|
318
|
+
|
319
|
+
if !allowed {
|
320
|
+
return false;
|
321
|
+
}
|
322
|
+
|
323
|
+
let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
|
324
|
+
match protocol_sanitizer_values {
|
325
|
+
None => {
|
326
|
+
// has a protocol, but no sanitization list
|
327
|
+
if !attr_val.is_empty() && Self::has_protocol(attr_val) {
|
328
|
+
return false;
|
329
|
+
}
|
330
|
+
}
|
331
|
+
Some(protocol_sanitizer_values) => {
|
332
|
+
if !attr_val.is_empty()
|
333
|
+
&& !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
|
334
|
+
{
|
335
|
+
return false;
|
336
|
+
}
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
if attr_name == "class"
|
341
|
+
&& !Self::sanitize_class_attribute(
|
342
|
+
binding,
|
343
|
+
element,
|
344
|
+
element_sanitizer,
|
345
|
+
attr_name,
|
346
|
+
attr_val,
|
347
|
+
)
|
348
|
+
.unwrap()
|
349
|
+
{
|
350
|
+
return false;
|
351
|
+
}
|
352
|
+
|
353
|
+
true
|
354
|
+
}
|
355
|
+
|
356
|
+
fn has_protocol(attr_val: &str) -> bool {
|
357
|
+
attr_val.contains("://")
|
358
|
+
}
|
359
|
+
|
360
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
|
361
|
+
// FIXME: is there a more idiomatic way to do this?
|
362
|
+
let mut pos: usize = 0;
|
363
|
+
let mut chars = attr_val.chars();
|
364
|
+
let len = attr_val.len();
|
365
|
+
|
366
|
+
for (i, c) in attr_val.chars().enumerate() {
|
367
|
+
if c != ':' && c != '/' && c != '#' && pos + 1 < len {
|
368
|
+
pos = i + 1;
|
369
|
+
} else {
|
370
|
+
break;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
let char = chars.nth(pos).unwrap();
|
375
|
+
|
376
|
+
if char == '/' {
|
377
|
+
return protocols_allowed.contains(&"/".to_string());
|
378
|
+
}
|
379
|
+
|
380
|
+
if char == '#' {
|
381
|
+
return protocols_allowed.contains(&"#".to_string());
|
382
|
+
}
|
383
|
+
|
384
|
+
// Allow protocol name to be case-insensitive
|
385
|
+
let protocol = attr_val[0..pos].to_lowercase();
|
386
|
+
|
387
|
+
protocols_allowed.contains(&protocol.to_lowercase())
|
388
|
+
}
|
389
|
+
|
390
|
+
fn sanitize_class_attribute(
|
391
|
+
binding: &RefMut<Sanitizer>,
|
392
|
+
element: &mut Element,
|
393
|
+
element_sanitizer: &ElementSanitizer,
|
394
|
+
attr_name: &str,
|
395
|
+
attr_val: &str,
|
396
|
+
) -> Result<bool, Error> {
|
397
|
+
let allowed_global = &binding.allowed_classes;
|
398
|
+
|
399
|
+
let mut valid_classes: Vec<String> = vec![];
|
400
|
+
|
401
|
+
let allowed_local = &element_sanitizer.allowed_classes;
|
402
|
+
|
403
|
+
// No class filters, so everything goes through
|
404
|
+
if allowed_global.is_empty() && allowed_local.is_empty() {
|
405
|
+
return Ok(true);
|
406
|
+
}
|
407
|
+
|
408
|
+
let attr_value = attr_val.trim_start();
|
409
|
+
attr_value
|
410
|
+
.split_whitespace()
|
411
|
+
.map(|s| s.to_string())
|
412
|
+
.for_each(|class| {
|
413
|
+
if allowed_global.contains(&class) || allowed_local.contains(&class) {
|
414
|
+
valid_classes.push(class);
|
415
|
+
}
|
416
|
+
});
|
417
|
+
|
418
|
+
if valid_classes.is_empty() {
|
419
|
+
return Ok(false);
|
420
|
+
}
|
421
|
+
|
422
|
+
match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
|
423
|
+
Ok(_) => Ok(true),
|
424
|
+
Err(err) => Err(Error::new(
|
425
|
+
exception::runtime_error(),
|
426
|
+
format!("AttributeNameError: {}", err),
|
427
|
+
)),
|
428
|
+
}
|
429
|
+
}
|
430
|
+
|
431
|
+
pub fn allow_element(&self, element: &mut Element) -> bool {
|
432
|
+
let tag = Tag::tag_from_element(element);
|
433
|
+
let flags: u8 = self.0.borrow().flags[tag.index];
|
434
|
+
|
435
|
+
(flags & Self::SELMA_SANITIZER_ALLOW) == 0
|
436
|
+
}
|
437
|
+
|
438
|
+
pub fn try_remove_element(&self, element: &mut Element) -> bool {
|
439
|
+
let tag = Tag::tag_from_element(element);
|
440
|
+
let flags: u8 = self.0.borrow().flags[tag.index];
|
441
|
+
|
442
|
+
let should_remove = !element.removed() && self.allow_element(element);
|
443
|
+
|
444
|
+
if should_remove {
|
445
|
+
if Tag::has_text_content(tag) {
|
446
|
+
Self::remove_element(
|
447
|
+
element,
|
448
|
+
tag.self_closing,
|
449
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
450
|
+
);
|
451
|
+
} else {
|
452
|
+
Self::remove_element(element, tag.self_closing, flags);
|
453
|
+
}
|
454
|
+
|
455
|
+
Self::check_if_end_tag_needs_removal(element);
|
456
|
+
} else {
|
457
|
+
// anything in <iframe> must be removed, if it's kept
|
458
|
+
if Tag::is_iframe(tag) {
|
459
|
+
if self.0.borrow().flags[tag.index] != 0 {
|
460
|
+
element.set_inner_content(" ", ContentType::Text);
|
461
|
+
} else {
|
462
|
+
element.set_inner_content("", ContentType::Text);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
}
|
466
|
+
|
467
|
+
should_remove
|
468
|
+
}
|
469
|
+
|
470
|
+
fn remove_element(element: &mut Element, self_closing: bool, flags: u8) {
|
471
|
+
let wrap_whitespace = (flags & Self::SELMA_SANITIZER_WRAP_WHITESPACE) != 0;
|
472
|
+
let remove_contents = (flags & Self::SELMA_SANITIZER_REMOVE_CONTENTS) != 0;
|
473
|
+
|
474
|
+
if remove_contents {
|
475
|
+
element.remove();
|
476
|
+
} else {
|
477
|
+
if wrap_whitespace {
|
478
|
+
if self_closing {
|
479
|
+
element.after(" ", ContentType::Text);
|
480
|
+
} else {
|
481
|
+
element.before(" ", ContentType::Text);
|
482
|
+
element.after(" ", ContentType::Text);
|
483
|
+
}
|
484
|
+
}
|
485
|
+
element.remove_and_keep_content();
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
pub fn force_remove_element(&self, element: &mut Element) {
|
490
|
+
let tag = Tag::tag_from_element(element);
|
491
|
+
let self_closing = tag.self_closing;
|
492
|
+
Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
|
493
|
+
Self::check_if_end_tag_needs_removal(element);
|
494
|
+
}
|
495
|
+
|
496
|
+
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
497
|
+
if element.removed() && !Tag::tag_from_element(element).self_closing {
|
498
|
+
element
|
499
|
+
.on_end_tag(move |end| {
|
500
|
+
Self::remove_end_tag(end);
|
501
|
+
Ok(())
|
502
|
+
})
|
503
|
+
.unwrap();
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
fn remove_end_tag(end_tag: &mut EndTag) {
|
508
|
+
end_tag.remove();
|
509
|
+
}
|
510
|
+
|
511
|
+
fn get_element_sanitizer<'a>(
|
512
|
+
binding: &'a RefMut<Sanitizer>,
|
513
|
+
element_name: &str,
|
514
|
+
) -> &'a ElementSanitizer {
|
515
|
+
binding.element_sanitizers.get(element_name).unwrap()
|
516
|
+
}
|
517
|
+
|
518
|
+
fn get_mut_element_sanitizer<'a>(
|
519
|
+
binding: &'a mut Sanitizer,
|
520
|
+
element_name: &str,
|
521
|
+
) -> &'a mut ElementSanitizer {
|
522
|
+
binding.element_sanitizers.get_mut(element_name).unwrap()
|
523
|
+
}
|
524
|
+
}
|
525
|
+
|
526
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
527
|
+
let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
|
528
|
+
|
529
|
+
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
530
|
+
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
531
|
+
|
532
|
+
c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
|
533
|
+
c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
|
534
|
+
|
535
|
+
c_sanitizer.define_method(
|
536
|
+
"set_escape_tagfilter",
|
537
|
+
method!(SelmaSanitizer::set_escape_tagfilter, 1),
|
538
|
+
)?;
|
539
|
+
c_sanitizer.define_method(
|
540
|
+
"escape_tagfilter",
|
541
|
+
method!(SelmaSanitizer::get_escape_tagfilter, 0),
|
542
|
+
)?;
|
543
|
+
|
544
|
+
c_sanitizer.define_method(
|
545
|
+
"set_allow_comments",
|
546
|
+
method!(SelmaSanitizer::set_allow_comments, 1),
|
547
|
+
)?;
|
548
|
+
c_sanitizer.define_method(
|
549
|
+
"allow_comments",
|
550
|
+
method!(SelmaSanitizer::get_allow_comments, 0),
|
551
|
+
)?;
|
552
|
+
|
553
|
+
c_sanitizer.define_method(
|
554
|
+
"set_allow_doctype",
|
555
|
+
method!(SelmaSanitizer::set_allow_doctype, 1),
|
556
|
+
)?;
|
557
|
+
c_sanitizer.define_method(
|
558
|
+
"allow_doctype",
|
559
|
+
method!(SelmaSanitizer::get_allow_doctype, 0),
|
560
|
+
)?;
|
561
|
+
|
562
|
+
c_sanitizer.define_method(
|
563
|
+
"set_allowed_attribute",
|
564
|
+
method!(SelmaSanitizer::set_allowed_attribute, 3),
|
565
|
+
)?;
|
566
|
+
|
567
|
+
c_sanitizer.define_method(
|
568
|
+
"set_allowed_class",
|
569
|
+
method!(SelmaSanitizer::set_allowed_class, 3),
|
570
|
+
)?;
|
571
|
+
|
572
|
+
c_sanitizer.define_method(
|
573
|
+
"set_allowed_protocols",
|
574
|
+
method!(SelmaSanitizer::set_allowed_protocols, 3),
|
575
|
+
)?;
|
576
|
+
|
577
|
+
Ok(())
|
578
|
+
}
|
@@ -0,0 +1,115 @@
|
|
1
|
+
use magnus::{exception, function, scan_args, Error, Module, Object, RModule, Value};
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
#[magnus::wrap(class = "Selma::Selector")]
|
5
|
+
pub struct SelmaSelector {
|
6
|
+
match_element: Option<String>,
|
7
|
+
match_text_within: Option<String>,
|
8
|
+
ignore_text_within: Option<Vec<String>>,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl SelmaSelector {
|
12
|
+
fn new(args: &[Value]) -> Result<Self, Error> {
|
13
|
+
let (match_element, match_text_within, rb_ignore_text_within) =
|
14
|
+
Self::scan_parse_args(args)?;
|
15
|
+
|
16
|
+
if match_element.is_none() && match_text_within.is_none() {
|
17
|
+
return Err(Error::new(
|
18
|
+
exception::arg_error(),
|
19
|
+
"Neither `match_element` nor `match_text_within` option given",
|
20
|
+
));
|
21
|
+
}
|
22
|
+
|
23
|
+
// FIXME: not excited about this double parse work (`element!` does it too),
|
24
|
+
// but at least we can bail ASAP if the CSS is invalid
|
25
|
+
if match_element.is_some() {
|
26
|
+
let css = match_element.as_ref().unwrap();
|
27
|
+
if css.parse::<lol_html::Selector>().is_err() {
|
28
|
+
return Err(Error::new(
|
29
|
+
exception::arg_error(),
|
30
|
+
format!("Could not parse `match_element` (`{}`) as valid CSS", css),
|
31
|
+
));
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
if match_text_within.is_some() {
|
36
|
+
let css = match_text_within.as_ref().unwrap();
|
37
|
+
if css.parse::<lol_html::Selector>().is_err() {
|
38
|
+
return Err(Error::new(
|
39
|
+
exception::arg_error(),
|
40
|
+
format!(
|
41
|
+
"Could not parse `match_text_within` (`{}`) as valid CSS",
|
42
|
+
css
|
43
|
+
),
|
44
|
+
));
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
let ignore_text_within = match rb_ignore_text_within {
|
49
|
+
None => None,
|
50
|
+
Some(rb_ignore_text_within) => {
|
51
|
+
let mut ignore_text_within = vec![];
|
52
|
+
rb_ignore_text_within.iter().for_each(|i| {
|
53
|
+
// TODO: test this against malice
|
54
|
+
let ignore_text_within_tag_name = i.to_string();
|
55
|
+
ignore_text_within.push(ignore_text_within_tag_name);
|
56
|
+
});
|
57
|
+
Some(ignore_text_within)
|
58
|
+
}
|
59
|
+
};
|
60
|
+
|
61
|
+
Ok(Self {
|
62
|
+
match_element,
|
63
|
+
match_text_within,
|
64
|
+
ignore_text_within,
|
65
|
+
})
|
66
|
+
}
|
67
|
+
|
68
|
+
#[allow(clippy::let_unit_value)]
|
69
|
+
fn scan_parse_args(
|
70
|
+
args: &[Value],
|
71
|
+
) -> Result<(Option<String>, Option<String>, Option<Vec<String>>), Error> {
|
72
|
+
let args = scan_args::scan_args(args)?;
|
73
|
+
let _: () = args.required;
|
74
|
+
let _: () = args.optional;
|
75
|
+
let _: () = args.splat;
|
76
|
+
let _: () = args.trailing;
|
77
|
+
let _: () = args.block;
|
78
|
+
|
79
|
+
let kw = scan_args::get_kwargs::<
|
80
|
+
_,
|
81
|
+
(),
|
82
|
+
(Option<String>, Option<String>, Option<Vec<String>>),
|
83
|
+
(),
|
84
|
+
>(
|
85
|
+
args.keywords,
|
86
|
+
&[],
|
87
|
+
&["match_element", "match_text_within", "ignore_text_within"],
|
88
|
+
)?;
|
89
|
+
let (match_element, match_text_within, rb_ignore_text_within) = kw.optional;
|
90
|
+
|
91
|
+
Ok((match_element, match_text_within, rb_ignore_text_within))
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn match_element(&self) -> Option<String> {
|
95
|
+
self.match_element.clone()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn match_text_within(&self) -> Option<String> {
|
99
|
+
self.match_text_within.clone()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn ignore_text_within(&self) -> Option<Vec<String>> {
|
103
|
+
self.ignore_text_within.clone()
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
108
|
+
let c_selector = m_selma
|
109
|
+
.define_class("Selector", Default::default())
|
110
|
+
.expect("cannot define class Selma::Selector");
|
111
|
+
|
112
|
+
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
113
|
+
|
114
|
+
Ok(())
|
115
|
+
}
|