selma 0.0.7-x86_64-darwin → 0.1.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,607 +0,0 @@
1
- use std::{borrow::BorrowMut, collections::HashMap};
2
-
3
- use lol_html::{
4
- errors::AttributeNameError,
5
- html_content::{Comment, ContentType, Doctype, Element, EndTag},
6
- };
7
- use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
8
-
9
- #[derive(Clone, Debug)]
10
- struct ElementSanitizer {
11
- allowed_attrs: Vec<String>,
12
- required_attrs: Vec<String>,
13
- allowed_classes: Vec<String>,
14
- protocol_sanitizers: HashMap<String, Vec<String>>,
15
- }
16
-
17
- impl Default for ElementSanitizer {
18
- fn default() -> Self {
19
- ElementSanitizer {
20
- allowed_attrs: vec![],
21
- allowed_classes: vec![],
22
- required_attrs: vec![],
23
-
24
- protocol_sanitizers: HashMap::new(),
25
- }
26
- }
27
- }
28
-
29
- #[derive(Clone, Debug)]
30
- pub struct Sanitizer {
31
- flags: [u8; crate::tags::Tag::TAG_COUNT],
32
- allowed_attrs: Vec<String>,
33
- allowed_classes: Vec<String>,
34
- element_sanitizers: HashMap<String, ElementSanitizer>,
35
-
36
- pub escape_tagfilter: bool,
37
- pub allow_comments: bool,
38
- pub allow_doctype: bool,
39
- config: RHash,
40
- }
41
-
42
- #[derive(Clone, Debug)]
43
- #[magnus::wrap(class = "Selma::Sanitizer")]
44
- pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
45
-
46
- impl SelmaSanitizer {
47
- const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
48
- // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
49
- const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
50
- const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
51
-
52
- pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
53
- let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
54
- let (opt_config,): (Option<RHash>,) = args.optional;
55
-
56
- let config = match opt_config {
57
- Some(config) => config,
58
- // TODO: this seems like a hack to fix?
59
- None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
60
- };
61
-
62
- let mut element_sanitizers = HashMap::new();
63
- crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
64
- let es = ElementSanitizer::default();
65
- element_sanitizers.insert(
66
- crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
67
- es,
68
- );
69
- });
70
-
71
- Ok(Self(std::cell::RefCell::new(Sanitizer {
72
- flags: [0; crate::tags::Tag::TAG_COUNT],
73
- allowed_attrs: vec![],
74
- allowed_classes: vec![],
75
- element_sanitizers,
76
-
77
- escape_tagfilter: true,
78
- allow_comments: false,
79
- allow_doctype: true,
80
- config,
81
- })))
82
- }
83
-
84
- fn get_config(&self) -> Result<RHash, magnus::Error> {
85
- let binding = self.0.borrow();
86
-
87
- Ok(binding.config)
88
- }
89
-
90
- /// Toggle a sanitizer option on or off.
91
- fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
92
- let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
93
- if set {
94
- self.0.borrow_mut().flags[tag.index] |= flag;
95
- } else {
96
- self.0.borrow_mut().flags[tag.index] &= !flag;
97
- }
98
- }
99
-
100
- /// Toggles all sanitization options on or off.
101
- fn set_all_flags(&self, flag: u8, set: bool) {
102
- if set {
103
- crate::tags::Tag::html_tags()
104
- .iter()
105
- .enumerate()
106
- .for_each(|(iter, _)| {
107
- self.0.borrow_mut().flags[iter] |= flag;
108
- });
109
- } else {
110
- crate::tags::Tag::html_tags()
111
- .iter()
112
- .enumerate()
113
- .for_each(|(iter, _)| {
114
- self.0.borrow_mut().flags[iter] &= flag;
115
- });
116
- }
117
- }
118
-
119
- /// Whether or not to keep dangerous HTML tags.
120
- fn set_escape_tagfilter(&self, allow: bool) -> bool {
121
- self.0.borrow_mut().escape_tagfilter = allow;
122
- allow
123
- }
124
-
125
- pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
126
- if self.0.borrow().escape_tagfilter {
127
- let tag = crate::tags::Tag::tag_from_element(e);
128
- if crate::tags::Tag::is_tag_escapeworthy(tag) {
129
- e.remove();
130
- return true;
131
- }
132
- }
133
-
134
- false
135
- }
136
-
137
- pub fn get_escape_tagfilter(&self) -> bool {
138
- self.0.borrow().escape_tagfilter
139
- }
140
-
141
- /// Whether or not to keep HTML comments.
142
- fn set_allow_comments(&self, allow: bool) -> bool {
143
- self.0.borrow_mut().allow_comments = allow;
144
- allow
145
- }
146
-
147
- pub fn get_allow_comments(&self) -> bool {
148
- self.0.borrow().allow_comments
149
- }
150
-
151
- pub fn remove_comment(&self, c: &mut Comment) {
152
- c.remove();
153
- }
154
-
155
- /// Whether or not to keep HTML doctype.
156
- fn set_allow_doctype(&self, allow: bool) -> bool {
157
- self.0.borrow_mut().allow_doctype = allow;
158
- allow
159
- }
160
-
161
- /// Whether or not to keep HTML doctype.
162
- pub fn get_allow_doctype(&self) -> bool {
163
- self.0.borrow().allow_doctype
164
- }
165
-
166
- pub fn remove_doctype(&self, d: &mut Doctype) {
167
- d.remove();
168
- }
169
-
170
- fn set_allowed_attribute(&self, eln: Value, attr_name: String, allow: bool) -> bool {
171
- let mut binding = self.0.borrow_mut();
172
-
173
- let element_name = eln.to_r_string().unwrap().to_string().unwrap();
174
- if element_name == "all" {
175
- let allowed_attrs = &mut binding.allowed_attrs;
176
- Self::set_allowed(allowed_attrs, &attr_name, allow);
177
- } else {
178
- let element_sanitizers = &mut binding.element_sanitizers;
179
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
180
-
181
- element_sanitizer.allowed_attrs.push(attr_name);
182
- }
183
-
184
- allow
185
- }
186
-
187
- fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
188
- let mut binding = self.0.borrow_mut();
189
- if element_name == "all" {
190
- let allowed_classes = &mut binding.allowed_classes;
191
- Self::set_allowed(allowed_classes, &class_name, allow);
192
- } else {
193
- let element_sanitizers = &mut binding.element_sanitizers;
194
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
195
-
196
- let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
197
- Self::set_allowed(allowed_classes, &class_name, allow)
198
- }
199
- allow
200
- }
201
-
202
- fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
203
- let mut binding = self.0.borrow_mut();
204
-
205
- let element_sanitizers = &mut binding.element_sanitizers;
206
- let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
207
-
208
- let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
209
-
210
- for opt_allowed_protocol in allow_list.each() {
211
- let allowed_protocol = opt_allowed_protocol.unwrap();
212
- let protocol_list = protocol_sanitizers.get_mut(&attr_name);
213
- if allowed_protocol.is_kind_of(class::string()) {
214
- match protocol_list {
215
- None => {
216
- protocol_sanitizers
217
- .insert(attr_name.to_string(), vec![allowed_protocol.to_string()]);
218
- }
219
- Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
220
- }
221
- } else if allowed_protocol.is_kind_of(class::symbol())
222
- && allowed_protocol.inspect() == ":relative"
223
- {
224
- match protocol_list {
225
- None => {
226
- protocol_sanitizers.insert(
227
- attr_name.to_string(),
228
- vec!["#".to_string(), "/".to_string()],
229
- );
230
- }
231
- Some(protocol_list) => {
232
- protocol_list.push("#".to_string());
233
- protocol_list.push("/".to_string());
234
- }
235
- }
236
- }
237
- }
238
- }
239
-
240
- fn set_allowed(set: &mut Vec<String>, attr_name: &String, allow: bool) {
241
- if allow {
242
- set.push(attr_name.to_string());
243
- } else if set.contains(attr_name) {
244
- set.swap_remove(set.iter().position(|x| x == attr_name).unwrap());
245
- }
246
- }
247
-
248
- pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
249
- let tag = crate::tags::Tag::tag_from_element(element);
250
- let tag_name = &element.tag_name();
251
- let element_sanitizer = {
252
- let mut binding = self.0.borrow_mut();
253
- let element_sanitizers = &mut binding.element_sanitizers;
254
- Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
255
- };
256
-
257
- let binding = self.0.borrow();
258
-
259
- // FIXME: This is a hack to get around the fact that we can't borrow
260
- let attribute_map: HashMap<String, String> = element
261
- .attributes()
262
- .iter()
263
- .map(|a| (a.name(), a.value()))
264
- .collect();
265
-
266
- for (attr_name, attr_val) in attribute_map.iter() {
267
- // you can actually embed <!-- ... --> inside
268
- // an HTML tag to pass malicious data. If this is
269
- // encountered, remove the entire element to be safe.
270
- if attr_name.starts_with("<!--") {
271
- Self::force_remove_element(self, element);
272
- return Ok(());
273
- }
274
-
275
- // first, trim leading spaces and unescape any encodings
276
- let trimmed = attr_val.trim_start();
277
- let x = escapist::unescape_html(trimmed.as_bytes());
278
- let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
279
-
280
- let should_keep_attrubute = match Self::should_keep_attribute(
281
- &binding,
282
- element,
283
- &element_sanitizer,
284
- attr_name,
285
- &unescaped_attr_val,
286
- ) {
287
- Ok(should_keep) => should_keep,
288
- Err(e) => {
289
- return Err(e);
290
- }
291
- };
292
-
293
- if !should_keep_attrubute {
294
- element.remove_attribute(attr_name);
295
- } else {
296
- // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
297
- // since output is always UTF-8.
298
- if crate::tags::Tag::is_meta(tag) {
299
- if attr_name == "charset" && unescaped_attr_val != "utf-8" {
300
- match element.set_attribute(attr_name, "utf-8") {
301
- Ok(_) => {}
302
- Err(err) => {
303
- return Err(err);
304
- }
305
- }
306
- }
307
- } else if !unescaped_attr_val.is_empty() {
308
- let mut buf = String::new();
309
- // ...then, escape any special characters, for security
310
- if attr_name == "href" {
311
- escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
312
- } else {
313
- escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
314
- };
315
-
316
- match element.set_attribute(attr_name, &buf) {
317
- Ok(_) => {}
318
- Err(err) => {
319
- return Err(err);
320
- }
321
- }
322
- }
323
- }
324
- }
325
-
326
- let required = &element_sanitizer.required_attrs;
327
- if required.contains(&"*".to_string()) {
328
- return Ok(());
329
- }
330
- for attr in element.attributes().iter() {
331
- let attr_name = &attr.name();
332
- if required.contains(attr_name) {
333
- return Ok(());
334
- }
335
- }
336
-
337
- Ok(())
338
- }
339
-
340
- fn should_keep_attribute(
341
- binding: &Sanitizer,
342
- element: &mut Element,
343
- element_sanitizer: &ElementSanitizer,
344
- attr_name: &String,
345
- attr_val: &String,
346
- ) -> Result<bool, AttributeNameError> {
347
- let mut allowed: bool = false;
348
- let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
349
- let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
350
-
351
- if element_allowed_attrs {
352
- allowed = true;
353
- }
354
-
355
- if !allowed && sanitizer_allowed_attrs {
356
- allowed = true;
357
- }
358
-
359
- if !allowed {
360
- return Ok(false);
361
- }
362
-
363
- let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
364
- match protocol_sanitizer_values {
365
- None => {
366
- // has a protocol, but no sanitization list
367
- if !attr_val.is_empty() && Self::has_protocol(attr_val) {
368
- return Ok(false);
369
- }
370
- }
371
- Some(protocol_sanitizer_values) => {
372
- if !attr_val.is_empty()
373
- && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
374
- {
375
- return Ok(false);
376
- }
377
- }
378
- }
379
-
380
- if attr_name == "class" {
381
- return Self::sanitize_class_attribute(
382
- binding,
383
- element,
384
- element_sanitizer,
385
- attr_name,
386
- attr_val,
387
- );
388
- }
389
-
390
- Ok(true)
391
- }
392
-
393
- fn has_protocol(attr_val: &str) -> bool {
394
- attr_val.contains("://")
395
- }
396
-
397
- fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
398
- // FIXME: is there a more idiomatic way to do this?
399
- let mut pos: usize = 0;
400
- let mut chars = attr_val.chars();
401
- let len = attr_val.len();
402
-
403
- for (i, c) in attr_val.chars().enumerate() {
404
- if c != ':' && c != '/' && c != '#' && pos + 1 < len {
405
- pos = i + 1;
406
- } else {
407
- break;
408
- }
409
- }
410
-
411
- let char = chars.nth(pos).unwrap();
412
-
413
- if char == '/' {
414
- return protocols_allowed.contains(&"/".to_string());
415
- }
416
-
417
- if char == '#' {
418
- return protocols_allowed.contains(&"#".to_string());
419
- }
420
-
421
- // Allow protocol name to be case-insensitive
422
- let protocol = attr_val[0..pos].to_lowercase();
423
-
424
- protocols_allowed.contains(&protocol.to_lowercase())
425
- }
426
-
427
- fn sanitize_class_attribute(
428
- binding: &Sanitizer,
429
- element: &mut Element,
430
- element_sanitizer: &ElementSanitizer,
431
- attr_name: &str,
432
- attr_val: &str,
433
- ) -> Result<bool, lol_html::errors::AttributeNameError> {
434
- let allowed_global = &binding.allowed_classes;
435
-
436
- let mut valid_classes: Vec<String> = vec![];
437
-
438
- let allowed_local = &element_sanitizer.allowed_classes;
439
-
440
- // No class filters, so everything goes through
441
- if allowed_global.is_empty() && allowed_local.is_empty() {
442
- return Ok(true);
443
- }
444
-
445
- let attr_value = attr_val.trim_start();
446
- attr_value
447
- .split_whitespace()
448
- .map(|s| s.to_string())
449
- .for_each(|class| {
450
- if allowed_global.contains(&class) || allowed_local.contains(&class) {
451
- valid_classes.push(class);
452
- }
453
- });
454
-
455
- if valid_classes.is_empty() {
456
- return Ok(false);
457
- }
458
-
459
- match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
460
- Ok(_) => Ok(true),
461
- Err(err) => Err(err),
462
- }
463
- }
464
-
465
- pub fn allow_element(&self, element: &mut Element) -> bool {
466
- let tag = crate::tags::Tag::tag_from_element(element);
467
- let flags: u8 = self.0.borrow().flags[tag.index];
468
-
469
- (flags & Self::SELMA_SANITIZER_ALLOW) == 0
470
- }
471
-
472
- pub fn try_remove_element(&self, element: &mut Element) -> bool {
473
- let tag = crate::tags::Tag::tag_from_element(element);
474
- let flags: u8 = self.0.borrow().flags[tag.index];
475
-
476
- let should_remove = !element.removed() && self.allow_element(element);
477
-
478
- if should_remove {
479
- if crate::tags::Tag::has_text_content(tag) {
480
- Self::remove_element(
481
- element,
482
- tag.self_closing,
483
- Self::SELMA_SANITIZER_REMOVE_CONTENTS,
484
- );
485
- } else {
486
- Self::remove_element(element, tag.self_closing, flags);
487
- }
488
-
489
- Self::check_if_end_tag_needs_removal(element);
490
- } else {
491
- // anything in <iframe> must be removed, if it's kept
492
- if crate::tags::Tag::is_iframe(tag) {
493
- if self.0.borrow().flags[tag.index] != 0 {
494
- element.set_inner_content(" ", ContentType::Text);
495
- } else {
496
- element.set_inner_content("", ContentType::Text);
497
- }
498
- }
499
- }
500
-
501
- should_remove
502
- }
503
-
504
- fn remove_element(element: &mut Element, self_closing: bool, flags: u8) {
505
- let wrap_whitespace = (flags & Self::SELMA_SANITIZER_WRAP_WHITESPACE) != 0;
506
- let remove_contents = (flags & Self::SELMA_SANITIZER_REMOVE_CONTENTS) != 0;
507
-
508
- if remove_contents {
509
- element.remove();
510
- } else {
511
- if wrap_whitespace {
512
- if self_closing {
513
- element.after(" ", ContentType::Text);
514
- } else {
515
- element.before(" ", ContentType::Text);
516
- element.after(" ", ContentType::Text);
517
- }
518
- }
519
- element.remove_and_keep_content();
520
- }
521
- }
522
-
523
- pub fn force_remove_element(&self, element: &mut Element) {
524
- let tag = crate::tags::Tag::tag_from_element(element);
525
- let self_closing = tag.self_closing;
526
- Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
527
- Self::check_if_end_tag_needs_removal(element);
528
- }
529
-
530
- fn check_if_end_tag_needs_removal(element: &mut Element) {
531
- if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
532
- element
533
- .on_end_tag(move |end| {
534
- Self::remove_end_tag(end);
535
- Ok(())
536
- })
537
- .unwrap();
538
- }
539
- }
540
-
541
- fn remove_end_tag(end_tag: &mut EndTag) {
542
- end_tag.remove();
543
- }
544
-
545
- fn get_element_sanitizer<'a>(
546
- element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
547
- element_name: &str,
548
- ) -> &'a mut ElementSanitizer {
549
- element_sanitizers
550
- .entry(element_name.to_string())
551
- .or_insert_with(ElementSanitizer::default)
552
- }
553
- }
554
-
555
- pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
556
- let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
557
-
558
- c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
559
- c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
560
-
561
- c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
562
- c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
563
-
564
- c_sanitizer.define_method(
565
- "set_escape_tagfilter",
566
- method!(SelmaSanitizer::set_escape_tagfilter, 1),
567
- )?;
568
- c_sanitizer.define_method(
569
- "escape_tagfilter",
570
- method!(SelmaSanitizer::get_escape_tagfilter, 0),
571
- )?;
572
-
573
- c_sanitizer.define_method(
574
- "set_allow_comments",
575
- method!(SelmaSanitizer::set_allow_comments, 1),
576
- )?;
577
- c_sanitizer.define_method(
578
- "allow_comments",
579
- method!(SelmaSanitizer::get_allow_comments, 0),
580
- )?;
581
-
582
- c_sanitizer.define_method(
583
- "set_allow_doctype",
584
- method!(SelmaSanitizer::set_allow_doctype, 1),
585
- )?;
586
- c_sanitizer.define_method(
587
- "allow_doctype",
588
- method!(SelmaSanitizer::get_allow_doctype, 0),
589
- )?;
590
-
591
- c_sanitizer.define_method(
592
- "set_allowed_attribute",
593
- method!(SelmaSanitizer::set_allowed_attribute, 3),
594
- )?;
595
-
596
- c_sanitizer.define_method(
597
- "set_allowed_class",
598
- method!(SelmaSanitizer::set_allowed_class, 3),
599
- )?;
600
-
601
- c_sanitizer.define_method(
602
- "set_allowed_protocols",
603
- method!(SelmaSanitizer::set_allowed_protocols, 3),
604
- )?;
605
-
606
- Ok(())
607
- }