selma 0.0.7-aarch64-linux → 0.1.4-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,607 +0,0 @@
1
- use std::{borrow::BorrowMut, collections::HashMap};
2
-
3
- use lol_html::{
4
- errors::AttributeNameError,
5
- html_content::{Comment, ContentType, Doctype, Element, EndTag},
6
- };
7
- use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
8
-
9
- #[derive(Clone, Debug)]
10
- struct ElementSanitizer {
11
- allowed_attrs: Vec<String>,
12
- required_attrs: Vec<String>,
13
- allowed_classes: Vec<String>,
14
- protocol_sanitizers: HashMap<String, Vec<String>>,
15
- }
16
-
17
- impl Default for ElementSanitizer {
18
- fn default() -> Self {
19
- ElementSanitizer {
20
- allowed_attrs: vec![],
21
- allowed_classes: vec![],
22
- required_attrs: vec![],
23
-
24
- protocol_sanitizers: HashMap::new(),
25
- }
26
- }
27
- }
28
-
29
- #[derive(Clone, Debug)]
30
- pub struct Sanitizer {
31
- flags: [u8; crate::tags::Tag::TAG_COUNT],
32
- allowed_attrs: Vec<String>,
33
- allowed_classes: Vec<String>,
34
- element_sanitizers: HashMap<String, ElementSanitizer>,
35
-
36
- pub escape_tagfilter: bool,
37
- pub allow_comments: bool,
38
- pub allow_doctype: bool,
39
- config: RHash,
40
- }
41
-
42
- #[derive(Clone, Debug)]
43
- #[magnus::wrap(class = "Selma::Sanitizer")]
44
- pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
45
-
46
- impl SelmaSanitizer {
47
- const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
48
- // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
49
- const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
50
- const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
51
-
52
- pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
53
- let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
54
- let (opt_config,): (Option<RHash>,) = args.optional;
55
-
56
- let config = match opt_config {
57
- Some(config) => config,
58
- // TODO: this seems like a hack to fix?
59
- None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
60
- };
61
-
62
- let mut element_sanitizers = HashMap::new();
63
- crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
64
- let es = ElementSanitizer::default();
65
- element_sanitizers.insert(
66
- crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
67
- es,
68
- );
69
- });
70
-
71
- Ok(Self(std::cell::RefCell::new(Sanitizer {
72
- flags: [0; crate::tags::Tag::TAG_COUNT],
73
- allowed_attrs: vec![],
74
- allowed_classes: vec![],
75
- element_sanitizers,
76
-
77
- escape_tagfilter: true,
78
- allow_comments: false,
79
- allow_doctype: true,
80
- config,
81
- })))
82
- }
83
-
84
- fn get_config(&self) -> Result<RHash, magnus::Error> {
85
- let binding = self.0.borrow();
86
-
87
- Ok(binding.config)
88
- }
89
-
90
- /// Toggle a sanitizer option on or off.
91
- fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
92
- let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
93
- if set {
94
- self.0.borrow_mut().flags[tag.index] |= flag;
95
- } else {
96
- self.0.borrow_mut().flags[tag.index] &= !flag;
97
- }
98
- }
99
-
100
- /// Toggles all sanitization options on or off.
101
- fn set_all_flags(&self, flag: u8, set: bool) {
102
- if set {
103
- crate::tags::Tag::html_tags()
104
- .iter()
105
- .enumerate()
106
- .for_each(|(iter, _)| {
107
- self.0.borrow_mut().flags[iter] |= flag;
108
- });
109
- } else {
110
- crate::tags::Tag::html_tags()
111
- .iter()
112
- .enumerate()
113
- .for_each(|(iter, _)| {
114
- self.0.borrow_mut().flags[iter] &= flag;
115
- });
116
- }
117
- }
118
-
119
- /// Whether or not to keep dangerous HTML tags.
120
- fn set_escape_tagfilter(&self, allow: bool) -> bool {
121
- self.0.borrow_mut().escape_tagfilter = allow;
122
- allow
123
- }
124
-
125
- pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
126
- if self.0.borrow().escape_tagfilter {
127
- let tag = crate::tags::Tag::tag_from_element(e);
128
- if crate::tags::Tag::is_tag_escapeworthy(tag) {
129
- e.remove();
130
- return true;
131
- }
132
- }
133
-
134
- false
135
- }
136
-
137
- pub fn get_escape_tagfilter(&self) -> bool {
138
- self.0.borrow().escape_tagfilter
139
- }
140
-
141
- /// Whether or not to keep HTML comments.
142
- fn set_allow_comments(&self, allow: bool) -> bool {
143
- self.0.borrow_mut().allow_comments = allow;
144
- allow
145
- }
146
-
147
- pub fn get_allow_comments(&self) -> bool {
148
- self.0.borrow().allow_comments
149
- }
150
-
151
- pub fn remove_comment(&self, c: &mut Comment) {
152
- c.remove();
153
- }
154
-
155
- /// Whether or not to keep HTML doctype.
156
- fn set_allow_doctype(&self, allow: bool) -> bool {
157
- self.0.borrow_mut().allow_doctype = allow;
158
- allow
159
- }
160
-
161
- /// Whether or not to keep HTML doctype.
162
- pub fn get_allow_doctype(&self) -> bool {
163
- self.0.borrow().allow_doctype
164
- }
165
-
166
- pub fn remove_doctype(&self, d: &mut Doctype) {
167
- d.remove();
168
- }
169
-
170
- fn set_allowed_attribute(&self, eln: Value, attr_name: String, allow: bool) -> bool {
171
- let mut binding = self.0.borrow_mut();
172
-
173
- let element_name = eln.to_r_string().unwrap().to_string().unwrap();
174
- if element_name == "all" {
175
- let allowed_attrs = &mut binding.allowed_attrs;
176
- Self::set_allowed(allowed_attrs, &attr_name, allow);
177
- } else {
178
- let element_sanitizers = &mut binding.element_sanitizers;
179
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
180
-
181
- element_sanitizer.allowed_attrs.push(attr_name);
182
- }
183
-
184
- allow
185
- }
186
-
187
- fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
188
- let mut binding = self.0.borrow_mut();
189
- if element_name == "all" {
190
- let allowed_classes = &mut binding.allowed_classes;
191
- Self::set_allowed(allowed_classes, &class_name, allow);
192
- } else {
193
- let element_sanitizers = &mut binding.element_sanitizers;
194
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
195
-
196
- let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
197
- Self::set_allowed(allowed_classes, &class_name, allow)
198
- }
199
- allow
200
- }
201
-
202
- fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
203
- let mut binding = self.0.borrow_mut();
204
-
205
- let element_sanitizers = &mut binding.element_sanitizers;
206
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
207
-
208
- let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
209
-
210
- for opt_allowed_protocol in allow_list.each() {
211
- let allowed_protocol = opt_allowed_protocol.unwrap();
212
- let protocol_list = protocol_sanitizers.get_mut(&attr_name);
213
- if allowed_protocol.is_kind_of(class::string()) {
214
- match protocol_list {
215
- None => {
216
- protocol_sanitizers
217
- .insert(attr_name.to_string(), vec![allowed_protocol.to_string()]);
218
- }
219
- Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
220
- }
221
- } else if allowed_protocol.is_kind_of(class::symbol())
222
- && allowed_protocol.inspect() == ":relative"
223
- {
224
- match protocol_list {
225
- None => {
226
- protocol_sanitizers.insert(
227
- attr_name.to_string(),
228
- vec!["#".to_string(), "/".to_string()],
229
- );
230
- }
231
- Some(protocol_list) => {
232
- protocol_list.push("#".to_string());
233
- protocol_list.push("/".to_string());
234
- }
235
- }
236
- }
237
- }
238
- }
239
-
240
- fn set_allowed(set: &mut Vec<String>, attr_name: &String, allow: bool) {
241
- if allow {
242
- set.push(attr_name.to_string());
243
- } else if set.contains(attr_name) {
244
- set.swap_remove(set.iter().position(|x| x == attr_name).unwrap());
245
- }
246
- }
247
-
248
- pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
249
- let tag = crate::tags::Tag::tag_from_element(element);
250
- let tag_name = &element.tag_name();
251
- let element_sanitizer = {
252
- let mut binding = self.0.borrow_mut();
253
- let element_sanitizers = &mut binding.element_sanitizers;
254
- Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
255
- };
256
-
257
- let binding = self.0.borrow();
258
-
259
- // FIXME: This is a hack to get around the fact that we can't borrow
260
- let attribute_map: HashMap<String, String> = element
261
- .attributes()
262
- .iter()
263
- .map(|a| (a.name(), a.value()))
264
- .collect();
265
-
266
- for (attr_name, attr_val) in attribute_map.iter() {
267
- // you can actually embed <!-- ... --> inside
268
- // an HTML tag to pass malicious data. If this is
269
- // encountered, remove the entire element to be safe.
270
- if attr_name.starts_with("<!--") {
271
- Self::force_remove_element(self, element);
272
- return Ok(());
273
- }
274
-
275
- // first, trim leading spaces and unescape any encodings
276
- let trimmed = attr_val.trim_start();
277
- let x = escapist::unescape_html(trimmed.as_bytes());
278
- let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
279
-
280
- let should_keep_attrubute = match Self::should_keep_attribute(
281
- &binding,
282
- element,
283
- &element_sanitizer,
284
- attr_name,
285
- &unescaped_attr_val,
286
- ) {
287
- Ok(should_keep) => should_keep,
288
- Err(e) => {
289
- return Err(e);
290
- }
291
- };
292
-
293
- if !should_keep_attrubute {
294
- element.remove_attribute(attr_name);
295
- } else {
296
- // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
297
- // since output is always UTF-8.
298
- if crate::tags::Tag::is_meta(tag) {
299
- if attr_name == "charset" && unescaped_attr_val != "utf-8" {
300
- match element.set_attribute(attr_name, "utf-8") {
301
- Ok(_) => {}
302
- Err(err) => {
303
- return Err(err);
304
- }
305
- }
306
- }
307
- } else if !unescaped_attr_val.is_empty() {
308
- let mut buf = String::new();
309
- // ...then, escape any special characters, for security
310
- if attr_name == "href" {
311
- escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
312
- } else {
313
- escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
314
- };
315
-
316
- match element.set_attribute(attr_name, &buf) {
317
- Ok(_) => {}
318
- Err(err) => {
319
- return Err(err);
320
- }
321
- }
322
- }
323
- }
324
- }
325
-
326
- let required = &element_sanitizer.required_attrs;
327
- if required.contains(&"*".to_string()) {
328
- return Ok(());
329
- }
330
- for attr in element.attributes().iter() {
331
- let attr_name = &attr.name();
332
- if required.contains(attr_name) {
333
- return Ok(());
334
- }
335
- }
336
-
337
- Ok(())
338
- }
339
-
340
- fn should_keep_attribute(
341
- binding: &Sanitizer,
342
- element: &mut Element,
343
- element_sanitizer: &ElementSanitizer,
344
- attr_name: &String,
345
- attr_val: &String,
346
- ) -> Result<bool, AttributeNameError> {
347
- let mut allowed: bool = false;
348
- let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
349
- let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
350
-
351
- if element_allowed_attrs {
352
- allowed = true;
353
- }
354
-
355
- if !allowed && sanitizer_allowed_attrs {
356
- allowed = true;
357
- }
358
-
359
- if !allowed {
360
- return Ok(false);
361
- }
362
-
363
- let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
364
- match protocol_sanitizer_values {
365
- None => {
366
- // has a protocol, but no sanitization list
367
- if !attr_val.is_empty() && Self::has_protocol(attr_val) {
368
- return Ok(false);
369
- }
370
- }
371
- Some(protocol_sanitizer_values) => {
372
- if !attr_val.is_empty()
373
- && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
374
- {
375
- return Ok(false);
376
- }
377
- }
378
- }
379
-
380
- if attr_name == "class" {
381
- return Self::sanitize_class_attribute(
382
- binding,
383
- element,
384
- element_sanitizer,
385
- attr_name,
386
- attr_val,
387
- );
388
- }
389
-
390
- Ok(true)
391
- }
392
-
393
- fn has_protocol(attr_val: &str) -> bool {
394
- attr_val.contains("://")
395
- }
396
-
397
- fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
398
- // FIXME: is there a more idiomatic way to do this?
399
- let mut pos: usize = 0;
400
- let mut chars = attr_val.chars();
401
- let len = attr_val.len();
402
-
403
- for (i, c) in attr_val.chars().enumerate() {
404
- if c != ':' && c != '/' && c != '#' && pos + 1 < len {
405
- pos = i + 1;
406
- } else {
407
- break;
408
- }
409
- }
410
-
411
- let char = chars.nth(pos).unwrap();
412
-
413
- if char == '/' {
414
- return protocols_allowed.contains(&"/".to_string());
415
- }
416
-
417
- if char == '#' {
418
- return protocols_allowed.contains(&"#".to_string());
419
- }
420
-
421
- // Allow protocol name to be case-insensitive
422
- let protocol = attr_val[0..pos].to_lowercase();
423
-
424
- protocols_allowed.contains(&protocol.to_lowercase())
425
- }
426
-
427
- fn sanitize_class_attribute(
428
- binding: &Sanitizer,
429
- element: &mut Element,
430
- element_sanitizer: &ElementSanitizer,
431
- attr_name: &str,
432
- attr_val: &str,
433
- ) -> Result<bool, lol_html::errors::AttributeNameError> {
434
- let allowed_global = &binding.allowed_classes;
435
-
436
- let mut valid_classes: Vec<String> = vec![];
437
-
438
- let allowed_local = &element_sanitizer.allowed_classes;
439
-
440
- // No class filters, so everything goes through
441
- if allowed_global.is_empty() && allowed_local.is_empty() {
442
- return Ok(true);
443
- }
444
-
445
- let attr_value = attr_val.trim_start();
446
- attr_value
447
- .split_whitespace()
448
- .map(|s| s.to_string())
449
- .for_each(|class| {
450
- if allowed_global.contains(&class) || allowed_local.contains(&class) {
451
- valid_classes.push(class);
452
- }
453
- });
454
-
455
- if valid_classes.is_empty() {
456
- return Ok(false);
457
- }
458
-
459
- match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
460
- Ok(_) => Ok(true),
461
- Err(err) => Err(err),
462
- }
463
- }
464
-
465
- pub fn allow_element(&self, element: &mut Element) -> bool {
466
- let tag = crate::tags::Tag::tag_from_element(element);
467
- let flags: u8 = self.0.borrow().flags[tag.index];
468
-
469
- (flags & Self::SELMA_SANITIZER_ALLOW) == 0
470
- }
471
-
472
- pub fn try_remove_element(&self, element: &mut Element) -> bool {
473
- let tag = crate::tags::Tag::tag_from_element(element);
474
- let flags: u8 = self.0.borrow().flags[tag.index];
475
-
476
- let should_remove = !element.removed() && self.allow_element(element);
477
-
478
- if should_remove {
479
- if crate::tags::Tag::has_text_content(tag) {
480
- Self::remove_element(
481
- element,
482
- tag.self_closing,
483
- Self::SELMA_SANITIZER_REMOVE_CONTENTS,
484
- );
485
- } else {
486
- Self::remove_element(element, tag.self_closing, flags);
487
- }
488
-
489
- Self::check_if_end_tag_needs_removal(element);
490
- } else {
491
- // anything in <iframe> must be removed, if it's kept
492
- if crate::tags::Tag::is_iframe(tag) {
493
- if self.0.borrow().flags[tag.index] != 0 {
494
- element.set_inner_content(" ", ContentType::Text);
495
- } else {
496
- element.set_inner_content("", ContentType::Text);
497
- }
498
- }
499
- }
500
-
501
- should_remove
502
- }
503
-
504
- fn remove_element(element: &mut Element, self_closing: bool, flags: u8) {
505
- let wrap_whitespace = (flags & Self::SELMA_SANITIZER_WRAP_WHITESPACE) != 0;
506
- let remove_contents = (flags & Self::SELMA_SANITIZER_REMOVE_CONTENTS) != 0;
507
-
508
- if remove_contents {
509
- element.remove();
510
- } else {
511
- if wrap_whitespace {
512
- if self_closing {
513
- element.after(" ", ContentType::Text);
514
- } else {
515
- element.before(" ", ContentType::Text);
516
- element.after(" ", ContentType::Text);
517
- }
518
- }
519
- element.remove_and_keep_content();
520
- }
521
- }
522
-
523
- pub fn force_remove_element(&self, element: &mut Element) {
524
- let tag = crate::tags::Tag::tag_from_element(element);
525
- let self_closing = tag.self_closing;
526
- Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
527
- Self::check_if_end_tag_needs_removal(element);
528
- }
529
-
530
- fn check_if_end_tag_needs_removal(element: &mut Element) {
531
- if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
532
- element
533
- .on_end_tag(move |end| {
534
- Self::remove_end_tag(end);
535
- Ok(())
536
- })
537
- .unwrap();
538
- }
539
- }
540
-
541
- fn remove_end_tag(end_tag: &mut EndTag) {
542
- end_tag.remove();
543
- }
544
-
545
- fn get_element_sanitizer<'a>(
546
- element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
547
- element_name: &str,
548
- ) -> &'a mut ElementSanitizer {
549
- element_sanitizers
550
- .entry(element_name.to_string())
551
- .or_insert_with(ElementSanitizer::default)
552
- }
553
- }
554
-
555
- pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
556
- let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
557
-
558
- c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
559
- c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
560
-
561
- c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
562
- c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
563
-
564
- c_sanitizer.define_method(
565
- "set_escape_tagfilter",
566
- method!(SelmaSanitizer::set_escape_tagfilter, 1),
567
- )?;
568
- c_sanitizer.define_method(
569
- "escape_tagfilter",
570
- method!(SelmaSanitizer::get_escape_tagfilter, 0),
571
- )?;
572
-
573
- c_sanitizer.define_method(
574
- "set_allow_comments",
575
- method!(SelmaSanitizer::set_allow_comments, 1),
576
- )?;
577
- c_sanitizer.define_method(
578
- "allow_comments",
579
- method!(SelmaSanitizer::get_allow_comments, 0),
580
- )?;
581
-
582
- c_sanitizer.define_method(
583
- "set_allow_doctype",
584
- method!(SelmaSanitizer::set_allow_doctype, 1),
585
- )?;
586
- c_sanitizer.define_method(
587
- "allow_doctype",
588
- method!(SelmaSanitizer::get_allow_doctype, 0),
589
- )?;
590
-
591
- c_sanitizer.define_method(
592
- "set_allowed_attribute",
593
- method!(SelmaSanitizer::set_allowed_attribute, 3),
594
- )?;
595
-
596
- c_sanitizer.define_method(
597
- "set_allowed_class",
598
- method!(SelmaSanitizer::set_allowed_class, 3),
599
- )?;
600
-
601
- c_sanitizer.define_method(
602
- "set_allowed_protocols",
603
- method!(SelmaSanitizer::set_allowed_protocols, 3),
604
- )?;
605
-
606
- Ok(())
607
- }