selma 0.0.2-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/LICENSE.txt +21 -0
 - data/README.md +173 -0
 - data/ext/selma/Cargo.toml +14 -0
 - data/ext/selma/_util.rb +102 -0
 - data/ext/selma/extconf.rb +6 -0
 - data/ext/selma/src/html/element.rs +195 -0
 - data/ext/selma/src/html/end_tag.rs +35 -0
 - data/ext/selma/src/html.rs +17 -0
 - data/ext/selma/src/lib.rs +23 -0
 - data/ext/selma/src/native_ref_wrap.rs +79 -0
 - data/ext/selma/src/rewriter.rs +441 -0
 - data/ext/selma/src/sanitizer.rs +578 -0
 - data/ext/selma/src/selector.rs +115 -0
 - data/ext/selma/src/tags.rs +1133 -0
 - data/ext/selma/src/wrapped_struct.rs +92 -0
 - data/lib/selma/3.1/selma.bundle +0 -0
 - data/lib/selma/extension.rb +14 -0
 - data/lib/selma/html.rb +6 -0
 - data/lib/selma/rewriter.rb +6 -0
 - data/lib/selma/sanitizer/config/basic.rb +27 -0
 - data/lib/selma/sanitizer/config/default.rb +42 -0
 - data/lib/selma/sanitizer/config/relaxed.rb +37 -0
 - data/lib/selma/sanitizer/config/restricted.rb +13 -0
 - data/lib/selma/sanitizer/config.rb +67 -0
 - data/lib/selma/sanitizer.rb +85 -0
 - data/lib/selma/selector.rb +6 -0
 - data/lib/selma/version.rb +5 -0
 - data/lib/selma.rb +13 -0
 - data/selma.gemspec +41 -0
 - metadata +136 -0
 
| 
         @@ -0,0 +1,441 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            use std::{borrow::Cow, cell::RefCell, rc::Rc};
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            use lol_html::{
         
     | 
| 
      
 4 
     | 
    
         
            +
                doc_comments, doctype, element,
         
     | 
| 
      
 5 
     | 
    
         
            +
                html_content::{ContentType, Element, EndTag, TextChunk},
         
     | 
| 
      
 6 
     | 
    
         
            +
                text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
         
     | 
| 
      
 7 
     | 
    
         
            +
            };
         
     | 
| 
      
 8 
     | 
    
         
            +
            use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            use crate::{
         
     | 
| 
      
 11 
     | 
    
         
            +
                html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
         
     | 
| 
      
 12 
     | 
    
         
            +
                sanitizer::SelmaSanitizer,
         
     | 
| 
      
 13 
     | 
    
         
            +
                selector::SelmaSelector,
         
     | 
| 
      
 14 
     | 
    
         
            +
                tags::Tag,
         
     | 
| 
      
 15 
     | 
    
         
            +
                wrapped_struct::WrappedStruct,
         
     | 
| 
      
 16 
     | 
    
         
            +
            };
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            #[derive(Clone, Debug)]
         
     | 
| 
      
 19 
     | 
    
         
            +
            pub struct Handler {
         
     | 
| 
      
 20 
     | 
    
         
            +
                rb_handler: Value,
         
     | 
| 
      
 21 
     | 
    
         
            +
                rb_selector: WrappedStruct<SelmaSelector>,
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                total_element_handler_calls: usize,
         
     | 
| 
      
 24 
     | 
    
         
            +
                total_elapsed_element_handlers: f64,
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                total_text_handler_calls: usize,
         
     | 
| 
      
 27 
     | 
    
         
            +
                total_elapsed_text_handlers: f64,
         
     | 
| 
      
 28 
     | 
    
         
            +
            }
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            pub struct Rewriter {
         
     | 
| 
      
 31 
     | 
    
         
            +
                sanitizer: Option<SelmaSanitizer>,
         
     | 
| 
      
 32 
     | 
    
         
            +
                handlers: Vec<Handler>,
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                total_elapsed: f64,
         
     | 
| 
      
 35 
     | 
    
         
            +
            }
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            #[magnus::wrap(class = "Selma::Rewriter")]
         
     | 
| 
      
 38 
     | 
    
         
            +
            pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
            /// SAFETY: This is safe because we only access this data when the GVL is held.
         
     | 
| 
      
 41 
     | 
    
         
            +
            unsafe impl Send for SelmaRewriter {}
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            impl SelmaRewriter {
         
     | 
| 
      
 44 
     | 
    
         
            +
                const SELMA_ON_END_TAG: &str = "on_end_tag";
         
     | 
| 
      
 45 
     | 
    
         
            +
                const SELMA_HANDLE_ELEMENT: &str = "handle_element";
         
     | 
| 
      
 46 
     | 
    
         
            +
                const SELMA_HANDLE_TEXT: &str = "handle_text";
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                /// @yard
         
     | 
| 
      
 49 
     | 
    
         
            +
                /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
         
     | 
| 
      
 50 
     | 
    
         
            +
                /// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
         
     | 
| 
      
 51 
     | 
    
         
            +
                /// @param handlers  [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
         
     | 
| 
      
 52 
     | 
    
         
            +
                /// @return [Selma::Rewriter]
         
     | 
| 
      
 53 
     | 
    
         
            +
                fn new(args: &[Value]) -> Result<Self, magnus::Error> {
         
     | 
| 
      
 54 
     | 
    
         
            +
                    let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                    let sanitizer = match rb_sanitizer {
         
     | 
| 
      
 57 
     | 
    
         
            +
                        None => {
         
     | 
| 
      
 58 
     | 
    
         
            +
                            let default_sanitizer = SelmaSanitizer::new(&[])?;
         
     | 
| 
      
 59 
     | 
    
         
            +
                            let wrapped_sanitizer = WrappedStruct::from(default_sanitizer);
         
     | 
| 
      
 60 
     | 
    
         
            +
                            wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
         
     | 
| 
      
 61 
     | 
    
         
            +
                            Some(wrapped_sanitizer.get().unwrap().to_owned())
         
     | 
| 
      
 62 
     | 
    
         
            +
                        }
         
     | 
| 
      
 63 
     | 
    
         
            +
                        Some(sanitizer_value) => match sanitizer_value {
         
     | 
| 
      
 64 
     | 
    
         
            +
                            None => None,
         
     | 
| 
      
 65 
     | 
    
         
            +
                            Some(sanitizer) => {
         
     | 
| 
      
 66 
     | 
    
         
            +
                                sanitizer.funcall::<&str, (), Value>("setup", ())?;
         
     | 
| 
      
 67 
     | 
    
         
            +
                                Some(sanitizer.get().unwrap().to_owned())
         
     | 
| 
      
 68 
     | 
    
         
            +
                            }
         
     | 
| 
      
 69 
     | 
    
         
            +
                        },
         
     | 
| 
      
 70 
     | 
    
         
            +
                    };
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                    let handlers = match rb_handlers {
         
     | 
| 
      
 73 
     | 
    
         
            +
                        None => vec![],
         
     | 
| 
      
 74 
     | 
    
         
            +
                        Some(rb_handlers) => {
         
     | 
| 
      
 75 
     | 
    
         
            +
                            let mut handlers: Vec<Handler> = vec![];
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                            for h in rb_handlers.each() {
         
     | 
| 
      
 78 
     | 
    
         
            +
                                let rb_handler = h.unwrap();
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                                // prevents missing #selector from ruining things
         
     | 
| 
      
 81 
     | 
    
         
            +
                                if !rb_handler.respond_to("selector", true).unwrap() {
         
     | 
| 
      
 82 
     | 
    
         
            +
                                    let classname = unsafe { rb_handler.classname() };
         
     | 
| 
      
 83 
     | 
    
         
            +
                                    return Err(magnus::Error::new(
         
     | 
| 
      
 84 
     | 
    
         
            +
                                        exception::no_method_error(),
         
     | 
| 
      
 85 
     | 
    
         
            +
                                        format!(
         
     | 
| 
      
 86 
     | 
    
         
            +
                                            "Could not call #selector on {:?}; is this an object that defines it?",
         
     | 
| 
      
 87 
     | 
    
         
            +
                                            classname
         
     | 
| 
      
 88 
     | 
    
         
            +
                                        ),
         
     | 
| 
      
 89 
     | 
    
         
            +
                                    ));
         
     | 
| 
      
 90 
     | 
    
         
            +
                                }
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                                let rb_selector: WrappedStruct<SelmaSelector> =
         
     | 
| 
      
 93 
     | 
    
         
            +
                                    match rb_handler.funcall("selector", ()) {
         
     | 
| 
      
 94 
     | 
    
         
            +
                                        Err(e) => {
         
     | 
| 
      
 95 
     | 
    
         
            +
                                            return Err(magnus::Error::new(
         
     | 
| 
      
 96 
     | 
    
         
            +
                                                exception::type_error(),
         
     | 
| 
      
 97 
     | 
    
         
            +
                                                format!("Error instantiating selector: {}", e),
         
     | 
| 
      
 98 
     | 
    
         
            +
                                            ));
         
     | 
| 
      
 99 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 100 
     | 
    
         
            +
                                        Ok(rb_selector) => rb_selector,
         
     | 
| 
      
 101 
     | 
    
         
            +
                                    };
         
     | 
| 
      
 102 
     | 
    
         
            +
                                let handler = Handler {
         
     | 
| 
      
 103 
     | 
    
         
            +
                                    rb_handler,
         
     | 
| 
      
 104 
     | 
    
         
            +
                                    rb_selector,
         
     | 
| 
      
 105 
     | 
    
         
            +
                                    total_element_handler_calls: 0,
         
     | 
| 
      
 106 
     | 
    
         
            +
                                    total_elapsed_element_handlers: 0.0,
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                                    total_text_handler_calls: 0,
         
     | 
| 
      
 109 
     | 
    
         
            +
                                    total_elapsed_text_handlers: 0.0,
         
     | 
| 
      
 110 
     | 
    
         
            +
                                };
         
     | 
| 
      
 111 
     | 
    
         
            +
                                handlers.push(handler);
         
     | 
| 
      
 112 
     | 
    
         
            +
                            }
         
     | 
| 
      
 113 
     | 
    
         
            +
                            handlers
         
     | 
| 
      
 114 
     | 
    
         
            +
                        }
         
     | 
| 
      
 115 
     | 
    
         
            +
                    };
         
     | 
| 
      
 116 
     | 
    
         
            +
             
     | 
| 
      
 117 
     | 
    
         
            +
                    if sanitizer.is_none() && handlers.is_empty() {
         
     | 
| 
      
 118 
     | 
    
         
            +
                        return Err(magnus::Error::new(
         
     | 
| 
      
 119 
     | 
    
         
            +
                            exception::arg_error(),
         
     | 
| 
      
 120 
     | 
    
         
            +
                            "Must provide a sanitizer or a handler",
         
     | 
| 
      
 121 
     | 
    
         
            +
                        ));
         
     | 
| 
      
 122 
     | 
    
         
            +
                    }
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                    Ok(Self(std::cell::RefCell::new(Rewriter {
         
     | 
| 
      
 125 
     | 
    
         
            +
                        sanitizer,
         
     | 
| 
      
 126 
     | 
    
         
            +
                        handlers,
         
     | 
| 
      
 127 
     | 
    
         
            +
                        total_elapsed: 0.0,
         
     | 
| 
      
 128 
     | 
    
         
            +
                    })))
         
     | 
| 
      
 129 
     | 
    
         
            +
                }
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                #[allow(clippy::let_unit_value)]
         
     | 
| 
      
 132 
     | 
    
         
            +
                fn scan_parse_args(
         
     | 
| 
      
 133 
     | 
    
         
            +
                    args: &[Value],
         
     | 
| 
      
 134 
     | 
    
         
            +
                ) -> Result<
         
     | 
| 
      
 135 
     | 
    
         
            +
                    (
         
     | 
| 
      
 136 
     | 
    
         
            +
                        Option<Option<WrappedStruct<SelmaSanitizer>>>,
         
     | 
| 
      
 137 
     | 
    
         
            +
                        Option<RArray>,
         
     | 
| 
      
 138 
     | 
    
         
            +
                    ),
         
     | 
| 
      
 139 
     | 
    
         
            +
                    magnus::Error,
         
     | 
| 
      
 140 
     | 
    
         
            +
                > {
         
     | 
| 
      
 141 
     | 
    
         
            +
                    let args = scan_args::scan_args(args)?;
         
     | 
| 
      
 142 
     | 
    
         
            +
                    let _: () = args.required;
         
     | 
| 
      
 143 
     | 
    
         
            +
                    let _: () = args.optional;
         
     | 
| 
      
 144 
     | 
    
         
            +
                    let _: () = args.splat;
         
     | 
| 
      
 145 
     | 
    
         
            +
                    let _: () = args.trailing;
         
     | 
| 
      
 146 
     | 
    
         
            +
                    let _: () = args.block;
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                    let kw = scan_args::get_kwargs::<
         
     | 
| 
      
 149 
     | 
    
         
            +
                        _,
         
     | 
| 
      
 150 
     | 
    
         
            +
                        (),
         
     | 
| 
      
 151 
     | 
    
         
            +
                        (
         
     | 
| 
      
 152 
     | 
    
         
            +
                            Option<Option<WrappedStruct<SelmaSanitizer>>>,
         
     | 
| 
      
 153 
     | 
    
         
            +
                            Option<RArray>,
         
     | 
| 
      
 154 
     | 
    
         
            +
                        ),
         
     | 
| 
      
 155 
     | 
    
         
            +
                        (),
         
     | 
| 
      
 156 
     | 
    
         
            +
                    >(args.keywords, &[], &["sanitizer", "handlers"])?;
         
     | 
| 
      
 157 
     | 
    
         
            +
                    let (rb_sanitizer, rb_handlers) = kw.optional;
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
                    Ok((rb_sanitizer, rb_handlers))
         
     | 
| 
      
 160 
     | 
    
         
            +
                }
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                /// Perform HTML rewrite sequence.
         
     | 
| 
      
 163 
     | 
    
         
            +
                fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
         
     | 
| 
      
 164 
     | 
    
         
            +
                    let sanitized_html = match &self.0.borrow().sanitizer {
         
     | 
| 
      
 165 
     | 
    
         
            +
                        None => html,
         
     | 
| 
      
 166 
     | 
    
         
            +
                        Some(sanitizer) => {
         
     | 
| 
      
 167 
     | 
    
         
            +
                            // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                            // due to malicious html crafting
         
     | 
| 
      
 170 
     | 
    
         
            +
                            // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
         
     | 
| 
      
 171 
     | 
    
         
            +
                            // we need to run sanitization several times to truly remove unwanted tags,
         
     | 
| 
      
 172 
     | 
    
         
            +
                            // because lol-html happily accepts this garbage (by design?)
         
     | 
| 
      
 173 
     | 
    
         
            +
                            let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                            String::from_utf8(sanitized_html).unwrap()
         
     | 
| 
      
 176 
     | 
    
         
            +
                        }
         
     | 
| 
      
 177 
     | 
    
         
            +
                    };
         
     | 
| 
      
 178 
     | 
    
         
            +
                    let binding = self.0.borrow_mut();
         
     | 
| 
      
 179 
     | 
    
         
            +
                    let handlers = &binding.handlers;
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
                    match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
         
     | 
| 
      
 182 
     | 
    
         
            +
                        Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
         
     | 
| 
      
 183 
     | 
    
         
            +
                        Err(err) => Err(magnus::Error::new(
         
     | 
| 
      
 184 
     | 
    
         
            +
                            exception::runtime_error(),
         
     | 
| 
      
 185 
     | 
    
         
            +
                            format!("{}", err),
         
     | 
| 
      
 186 
     | 
    
         
            +
                        )),
         
     | 
| 
      
 187 
     | 
    
         
            +
                    }
         
     | 
| 
      
 188 
     | 
    
         
            +
                }
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
                fn perform_sanitization(
         
     | 
| 
      
 191 
     | 
    
         
            +
                    sanitizer: &SelmaSanitizer,
         
     | 
| 
      
 192 
     | 
    
         
            +
                    html: &String,
         
     | 
| 
      
 193 
     | 
    
         
            +
                ) -> Result<Vec<u8>, magnus::Error> {
         
     | 
| 
      
 194 
     | 
    
         
            +
                    let mut first_pass_html = vec![];
         
     | 
| 
      
 195 
     | 
    
         
            +
                    {
         
     | 
| 
      
 196 
     | 
    
         
            +
                        let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
         
     | 
| 
      
 197 
     | 
    
         
            +
                        if !sanitizer.get_allow_doctype() {
         
     | 
| 
      
 198 
     | 
    
         
            +
                            document_content_handlers.push(doctype!(|d| {
         
     | 
| 
      
 199 
     | 
    
         
            +
                                sanitizer.remove_doctype(d);
         
     | 
| 
      
 200 
     | 
    
         
            +
                                Ok(())
         
     | 
| 
      
 201 
     | 
    
         
            +
                            }));
         
     | 
| 
      
 202 
     | 
    
         
            +
                        }
         
     | 
| 
      
 203 
     | 
    
         
            +
                        if !sanitizer.get_allow_comments() {
         
     | 
| 
      
 204 
     | 
    
         
            +
                            document_content_handlers.push(doc_comments!(|c| {
         
     | 
| 
      
 205 
     | 
    
         
            +
                                sanitizer.remove_comment(c);
         
     | 
| 
      
 206 
     | 
    
         
            +
                                Ok(())
         
     | 
| 
      
 207 
     | 
    
         
            +
                            }));
         
     | 
| 
      
 208 
     | 
    
         
            +
                        }
         
     | 
| 
      
 209 
     | 
    
         
            +
                        let mut rewriter = HtmlRewriter::new(
         
     | 
| 
      
 210 
     | 
    
         
            +
                            Settings {
         
     | 
| 
      
 211 
     | 
    
         
            +
                                document_content_handlers,
         
     | 
| 
      
 212 
     | 
    
         
            +
                                element_content_handlers: vec![element!("*", |el| {
         
     | 
| 
      
 213 
     | 
    
         
            +
                                    sanitizer.try_remove_element(el);
         
     | 
| 
      
 214 
     | 
    
         
            +
                                    if el.removed() {
         
     | 
| 
      
 215 
     | 
    
         
            +
                                        return Ok(());
         
     | 
| 
      
 216 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 217 
     | 
    
         
            +
                                    sanitizer.sanitize_attributes(el);
         
     | 
| 
      
 218 
     | 
    
         
            +
             
     | 
| 
      
 219 
     | 
    
         
            +
                                    Ok(())
         
     | 
| 
      
 220 
     | 
    
         
            +
                                })],
         
     | 
| 
      
 221 
     | 
    
         
            +
                                ..Settings::default()
         
     | 
| 
      
 222 
     | 
    
         
            +
                            },
         
     | 
| 
      
 223 
     | 
    
         
            +
                            |c: &[u8]| first_pass_html.extend_from_slice(c),
         
     | 
| 
      
 224 
     | 
    
         
            +
                        );
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                        let result = rewriter.write(html.as_bytes());
         
     | 
| 
      
 227 
     | 
    
         
            +
                        if result.is_err() {
         
     | 
| 
      
 228 
     | 
    
         
            +
                            return Err(magnus::Error::new(
         
     | 
| 
      
 229 
     | 
    
         
            +
                                exception::runtime_error(),
         
     | 
| 
      
 230 
     | 
    
         
            +
                                format!("Failed to sanitize HTML: {}", result.unwrap_err()),
         
     | 
| 
      
 231 
     | 
    
         
            +
                            ));
         
     | 
| 
      
 232 
     | 
    
         
            +
                        }
         
     | 
| 
      
 233 
     | 
    
         
            +
                    }
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                    let mut output = vec![];
         
     | 
| 
      
 236 
     | 
    
         
            +
                    {
         
     | 
| 
      
 237 
     | 
    
         
            +
                        let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
         
     | 
| 
      
 238 
     | 
    
         
            +
                        if sanitizer.get_escape_tagfilter() {
         
     | 
| 
      
 239 
     | 
    
         
            +
                            element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
         
     | 
| 
      
 240 
     | 
    
         
            +
                                let should_remove = sanitizer.allow_element(el);
         
     | 
| 
      
 241 
     | 
    
         
            +
                                if should_remove {
         
     | 
| 
      
 242 
     | 
    
         
            +
                                    sanitizer.force_remove_element(el);
         
     | 
| 
      
 243 
     | 
    
         
            +
                                }
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
                                Ok(())
         
     | 
| 
      
 246 
     | 
    
         
            +
                            }));
         
     | 
| 
      
 247 
     | 
    
         
            +
                        }
         
     | 
| 
      
 248 
     | 
    
         
            +
             
     | 
| 
      
 249 
     | 
    
         
            +
                        let mut rewriter = HtmlRewriter::new(
         
     | 
| 
      
 250 
     | 
    
         
            +
                            Settings {
         
     | 
| 
      
 251 
     | 
    
         
            +
                                element_content_handlers,
         
     | 
| 
      
 252 
     | 
    
         
            +
                                ..Settings::default()
         
     | 
| 
      
 253 
     | 
    
         
            +
                            },
         
     | 
| 
      
 254 
     | 
    
         
            +
                            |c: &[u8]| output.extend_from_slice(c),
         
     | 
| 
      
 255 
     | 
    
         
            +
                        );
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                        let result = rewriter.write(first_pass_html.as_slice());
         
     | 
| 
      
 258 
     | 
    
         
            +
                        if result.is_err() {
         
     | 
| 
      
 259 
     | 
    
         
            +
                            return Err(magnus::Error::new(
         
     | 
| 
      
 260 
     | 
    
         
            +
                                exception::runtime_error(),
         
     | 
| 
      
 261 
     | 
    
         
            +
                                format!("Failed to sanitize HTML: {}", result.unwrap_err()),
         
     | 
| 
      
 262 
     | 
    
         
            +
                            ));
         
     | 
| 
      
 263 
     | 
    
         
            +
                        }
         
     | 
| 
      
 264 
     | 
    
         
            +
                    }
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
                    Ok(output)
         
     | 
| 
      
 267 
     | 
    
         
            +
                }
         
     | 
| 
      
 268 
     | 
    
         
            +
             
     | 
| 
      
 269 
     | 
    
         
            +
                pub fn perform_handler_rewrite(
         
     | 
| 
      
 270 
     | 
    
         
            +
                    &self,
         
     | 
| 
      
 271 
     | 
    
         
            +
                    handlers: &[Handler],
         
     | 
| 
      
 272 
     | 
    
         
            +
                    html: String,
         
     | 
| 
      
 273 
     | 
    
         
            +
                ) -> Result<Vec<u8>, magnus::Error> {
         
     | 
| 
      
 274 
     | 
    
         
            +
                    // TODO: this should ideally be done ahead of time, not on every `#rewrite` call
         
     | 
| 
      
 275 
     | 
    
         
            +
                    let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
                    handlers.iter().for_each(|handler| {
         
     | 
| 
      
 278 
     | 
    
         
            +
                        let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
         
     | 
| 
      
 279 
     | 
    
         
            +
             
     | 
| 
      
 280 
     | 
    
         
            +
                        let selector = handler.rb_selector.get_static().unwrap();
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
                        // TODO: test final raise by simulating errors
         
     | 
| 
      
 283 
     | 
    
         
            +
                        if selector.match_element().is_some() {
         
     | 
| 
      
 284 
     | 
    
         
            +
                            let closure_element_stack = element_stack.clone();
         
     | 
| 
      
 285 
     | 
    
         
            +
             
     | 
| 
      
 286 
     | 
    
         
            +
                            element_content_handlers.push(element!(
         
     | 
| 
      
 287 
     | 
    
         
            +
                                selector.match_element().unwrap(),
         
     | 
| 
      
 288 
     | 
    
         
            +
                                move |el| {
         
     | 
| 
      
 289 
     | 
    
         
            +
                                    match Self::process_element_handlers(
         
     | 
| 
      
 290 
     | 
    
         
            +
                                        handler.rb_handler,
         
     | 
| 
      
 291 
     | 
    
         
            +
                                        el,
         
     | 
| 
      
 292 
     | 
    
         
            +
                                        &closure_element_stack.borrow(),
         
     | 
| 
      
 293 
     | 
    
         
            +
                                    ) {
         
     | 
| 
      
 294 
     | 
    
         
            +
                                        Ok(_) => Ok(()),
         
     | 
| 
      
 295 
     | 
    
         
            +
                                        Err(err) => Err(err.to_string().into()),
         
     | 
| 
      
 296 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 297 
     | 
    
         
            +
                                }
         
     | 
| 
      
 298 
     | 
    
         
            +
                            ));
         
     | 
| 
      
 299 
     | 
    
         
            +
                        }
         
     | 
| 
      
 300 
     | 
    
         
            +
             
     | 
| 
      
 301 
     | 
    
         
            +
                        if selector.match_text_within().is_some() {
         
     | 
| 
      
 302 
     | 
    
         
            +
                            let closure_element_stack = element_stack.clone();
         
     | 
| 
      
 303 
     | 
    
         
            +
             
     | 
| 
      
 304 
     | 
    
         
            +
                            element_content_handlers.push(text!(
         
     | 
| 
      
 305 
     | 
    
         
            +
                                selector.match_text_within().unwrap(),
         
     | 
| 
      
 306 
     | 
    
         
            +
                                move |text| {
         
     | 
| 
      
 307 
     | 
    
         
            +
                                    let element_stack = closure_element_stack.as_ref().borrow();
         
     | 
| 
      
 308 
     | 
    
         
            +
                                    if selector.ignore_text_within().is_some() {
         
     | 
| 
      
 309 
     | 
    
         
            +
                                        // check if current tag is a tag we should be ignoring text within
         
     | 
| 
      
 310 
     | 
    
         
            +
                                        let head_tag_name = element_stack.last().unwrap().to_string();
         
     | 
| 
      
 311 
     | 
    
         
            +
                                        if selector
         
     | 
| 
      
 312 
     | 
    
         
            +
                                            .ignore_text_within()
         
     | 
| 
      
 313 
     | 
    
         
            +
                                            .unwrap()
         
     | 
| 
      
 314 
     | 
    
         
            +
                                            .iter()
         
     | 
| 
      
 315 
     | 
    
         
            +
                                            .any(|f| f == &head_tag_name)
         
     | 
| 
      
 316 
     | 
    
         
            +
                                        {
         
     | 
| 
      
 317 
     | 
    
         
            +
                                            return Ok(());
         
     | 
| 
      
 318 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 319 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
                                    match Self::process_text_handlers(handler.rb_handler, text) {
         
     | 
| 
      
 322 
     | 
    
         
            +
                                        Ok(_) => Ok(()),
         
     | 
| 
      
 323 
     | 
    
         
            +
                                        Err(err) => Err(err.to_string().into()),
         
     | 
| 
      
 324 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 325 
     | 
    
         
            +
                                }
         
     | 
| 
      
 326 
     | 
    
         
            +
                            ));
         
     | 
| 
      
 327 
     | 
    
         
            +
                        }
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
                        // we need to check *every* element we iterate over, to create a stack of elements
         
     | 
| 
      
 330 
     | 
    
         
            +
                        element_content_handlers.push(element!("*", move |el| {
         
     | 
| 
      
 331 
     | 
    
         
            +
                            let tag_name = el.tag_name().to_lowercase();
         
     | 
| 
      
 332 
     | 
    
         
            +
             
     | 
| 
      
 333 
     | 
    
         
            +
                            // no need to track self-closing tags
         
     | 
| 
      
 334 
     | 
    
         
            +
                            if Tag::tag_from_tag_name(&tag_name).self_closing {
         
     | 
| 
      
 335 
     | 
    
         
            +
                                return Ok(());
         
     | 
| 
      
 336 
     | 
    
         
            +
                            };
         
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
                            element_stack.as_ref().borrow_mut().push(tag_name);
         
     | 
| 
      
 339 
     | 
    
         
            +
             
     | 
| 
      
 340 
     | 
    
         
            +
                            let closure_element_stack = element_stack.clone();
         
     | 
| 
      
 341 
     | 
    
         
            +
                            el.on_end_tag(move |_end_tag: &mut EndTag| {
         
     | 
| 
      
 342 
     | 
    
         
            +
                                let mut stack = closure_element_stack.as_ref().borrow_mut();
         
     | 
| 
      
 343 
     | 
    
         
            +
                                stack.pop();
         
     | 
| 
      
 344 
     | 
    
         
            +
                                Ok(())
         
     | 
| 
      
 345 
     | 
    
         
            +
                            });
         
     | 
| 
      
 346 
     | 
    
         
            +
                            Ok(())
         
     | 
| 
      
 347 
     | 
    
         
            +
                        }));
         
     | 
| 
      
 348 
     | 
    
         
            +
                    });
         
     | 
| 
      
 349 
     | 
    
         
            +
             
     | 
| 
      
 350 
     | 
    
         
            +
                    let mut output = vec![];
         
     | 
| 
      
 351 
     | 
    
         
            +
                    {
         
     | 
| 
      
 352 
     | 
    
         
            +
                        let mut rewriter = HtmlRewriter::new(
         
     | 
| 
      
 353 
     | 
    
         
            +
                            Settings {
         
     | 
| 
      
 354 
     | 
    
         
            +
                                element_content_handlers,
         
     | 
| 
      
 355 
     | 
    
         
            +
                                ..Settings::default()
         
     | 
| 
      
 356 
     | 
    
         
            +
                            },
         
     | 
| 
      
 357 
     | 
    
         
            +
                            |c: &[u8]| output.extend_from_slice(c),
         
     | 
| 
      
 358 
     | 
    
         
            +
                        );
         
     | 
| 
      
 359 
     | 
    
         
            +
                        match rewriter.write(html.as_bytes()) {
         
     | 
| 
      
 360 
     | 
    
         
            +
                            Ok(_) => {}
         
     | 
| 
      
 361 
     | 
    
         
            +
                            Err(err) => {
         
     | 
| 
      
 362 
     | 
    
         
            +
                                return Err(magnus::Error::new(
         
     | 
| 
      
 363 
     | 
    
         
            +
                                    exception::runtime_error(),
         
     | 
| 
      
 364 
     | 
    
         
            +
                                    format!("{}", err),
         
     | 
| 
      
 365 
     | 
    
         
            +
                                ));
         
     | 
| 
      
 366 
     | 
    
         
            +
                            }
         
     | 
| 
      
 367 
     | 
    
         
            +
                        }
         
     | 
| 
      
 368 
     | 
    
         
            +
                    }
         
     | 
| 
      
 369 
     | 
    
         
            +
                    Ok(output)
         
     | 
| 
      
 370 
     | 
    
         
            +
                }
         
     | 
| 
      
 371 
     | 
    
         
            +
             
     | 
| 
      
 372 
     | 
    
         
            +
                fn process_element_handlers(
         
     | 
| 
      
 373 
     | 
    
         
            +
                    rb_handler: Value,
         
     | 
| 
      
 374 
     | 
    
         
            +
                    element: &mut Element,
         
     | 
| 
      
 375 
     | 
    
         
            +
                    ancestors: &Vec<String>,
         
     | 
| 
      
 376 
     | 
    
         
            +
                ) -> Result<(), magnus::Error> {
         
     | 
| 
      
 377 
     | 
    
         
            +
                    // if `on_end_tag` function is defined, call it
         
     | 
| 
      
 378 
     | 
    
         
            +
                    if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
         
     | 
| 
      
 379 
     | 
    
         
            +
                        element.on_end_tag(move |end_tag| {
         
     | 
| 
      
 380 
     | 
    
         
            +
                            let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
         
     | 
| 
      
 381 
     | 
    
         
            +
             
     | 
| 
      
 382 
     | 
    
         
            +
                            rb_handler
         
     | 
| 
      
 383 
     | 
    
         
            +
                                .funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,))
         
     | 
| 
      
 384 
     | 
    
         
            +
                                .unwrap();
         
     | 
| 
      
 385 
     | 
    
         
            +
                            Ok(())
         
     | 
| 
      
 386 
     | 
    
         
            +
                        });
         
     | 
| 
      
 387 
     | 
    
         
            +
                    }
         
     | 
| 
      
 388 
     | 
    
         
            +
             
     | 
| 
      
 389 
     | 
    
         
            +
                    let rb_element = SelmaHTMLElement::new(element, ancestors);
         
     | 
| 
      
 390 
     | 
    
         
            +
                    let rb_result =
         
     | 
| 
      
 391 
     | 
    
         
            +
                        rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
         
     | 
| 
      
 392 
     | 
    
         
            +
                    match rb_result {
         
     | 
| 
      
 393 
     | 
    
         
            +
                        Ok(_) => Ok(()),
         
     | 
| 
      
 394 
     | 
    
         
            +
                        Err(err) => Err(magnus::Error::new(
         
     | 
| 
      
 395 
     | 
    
         
            +
                            exception::runtime_error(),
         
     | 
| 
      
 396 
     | 
    
         
            +
                            format!("{}", err),
         
     | 
| 
      
 397 
     | 
    
         
            +
                        )),
         
     | 
| 
      
 398 
     | 
    
         
            +
                    }
         
     | 
| 
      
 399 
     | 
    
         
            +
                }
         
     | 
| 
      
 400 
     | 
    
         
            +
             
     | 
| 
      
 401 
     | 
    
         
            +
                fn process_text_handlers(rb_handler: Value, text: &mut TextChunk) -> Result<(), magnus::Error> {
         
     | 
| 
      
 402 
     | 
    
         
            +
                    // prevents missing `handle_text` function
         
     | 
| 
      
 403 
     | 
    
         
            +
                    let content = text.as_str();
         
     | 
| 
      
 404 
     | 
    
         
            +
             
     | 
| 
      
 405 
     | 
    
         
            +
                    // FIXME: why does this happen?
         
     | 
| 
      
 406 
     | 
    
         
            +
                    if content.is_empty() {
         
     | 
| 
      
 407 
     | 
    
         
            +
                        return Ok(());
         
     | 
| 
      
 408 
     | 
    
         
            +
                    }
         
     | 
| 
      
 409 
     | 
    
         
            +
                    let rb_result = rb_handler.funcall(Self::SELMA_HANDLE_TEXT, (content,));
         
     | 
| 
      
 410 
     | 
    
         
            +
             
     | 
| 
      
 411 
     | 
    
         
            +
                    if rb_result.is_err() {
         
     | 
| 
      
 412 
     | 
    
         
            +
                        return Err(magnus::Error::new(
         
     | 
| 
      
 413 
     | 
    
         
            +
                            exception::type_error(),
         
     | 
| 
      
 414 
     | 
    
         
            +
                            format!(
         
     | 
| 
      
 415 
     | 
    
         
            +
                                "Expected #{:?} to return a string: {:?}",
         
     | 
| 
      
 416 
     | 
    
         
            +
                                Self::SELMA_HANDLE_TEXT,
         
     | 
| 
      
 417 
     | 
    
         
            +
                                rb_result.err().unwrap()
         
     | 
| 
      
 418 
     | 
    
         
            +
                            ),
         
     | 
| 
      
 419 
     | 
    
         
            +
                        ));
         
     | 
| 
      
 420 
     | 
    
         
            +
                    }
         
     | 
| 
      
 421 
     | 
    
         
            +
             
     | 
| 
      
 422 
     | 
    
         
            +
                    let new_content: String = rb_result.unwrap();
         
     | 
| 
      
 423 
     | 
    
         
            +
                    // TODO: can this be an option?
         
     | 
| 
      
 424 
     | 
    
         
            +
                    text.replace(&new_content, ContentType::Html);
         
     | 
| 
      
 425 
     | 
    
         
            +
             
     | 
| 
      
 426 
     | 
    
         
            +
                    Ok(())
         
     | 
| 
      
 427 
     | 
    
         
            +
                }
         
     | 
| 
      
 428 
     | 
    
         
            +
            }
         
     | 
| 
      
 429 
     | 
    
         
            +
             
     | 
| 
      
 430 
     | 
    
         
            +
            pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
         
     | 
| 
      
 431 
     | 
    
         
            +
                let c_rewriter = m_selma
         
     | 
| 
      
 432 
     | 
    
         
            +
                    .define_class("Rewriter", Default::default())
         
     | 
| 
      
 433 
     | 
    
         
            +
                    .expect("cannot find class Selma::Rewriter");
         
     | 
| 
      
 434 
     | 
    
         
            +
             
     | 
| 
      
 435 
     | 
    
         
            +
                c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
         
     | 
| 
      
 436 
     | 
    
         
            +
                c_rewriter
         
     | 
| 
      
 437 
     | 
    
         
            +
                    .define_method("rewrite", method!(SelmaRewriter::rewrite, 1))
         
     | 
| 
      
 438 
     | 
    
         
            +
                    .expect("cannot define method `rewrite`");
         
     | 
| 
      
 439 
     | 
    
         
            +
             
     | 
| 
      
 440 
     | 
    
         
            +
                Ok(())
         
     | 
| 
      
 441 
     | 
    
         
            +
            }
         
     |