selma 0.0.3-aarch64-linux → 0.0.5-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -10
- data/ext/selma/Cargo.toml +1 -1
- data/ext/selma/src/html/element.rs +103 -44
- data/ext/selma/src/html/end_tag.rs +2 -2
- data/ext/selma/src/html/text_chunk.rs +113 -0
- data/ext/selma/src/html.rs +2 -0
- data/ext/selma/src/lib.rs +28 -1
- data/ext/selma/src/rewriter.rs +37 -49
- data/ext/selma/src/sanitizer.rs +102 -83
- data/ext/selma/src/tags.rs +7 -4
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/sanitizer/config/default.rb +4 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 1cc01b8b795db9625ed5104ae6266d3147955def7b1729f91944bb254d5d2fbb
         | 
| 4 | 
            +
              data.tar.gz: 3d2006ada6dc72d43d5eeb8070a368420863a4839ad3132416f4262004f3c9e8
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 345c650c379049924fd084a07047875ddf3982a442208e08fcf67128c590c9e9ac1f851429a759a63cd7ca901b16f82dc81527ae54f9792077db6326cc0c4633
         | 
| 7 | 
            +
              data.tar.gz: 7506ae8bcd4a854c549b2b2e0664b9a5d6971ecf8eb54b4e4aad5cb0fa329815ab9ee6f9bcefd27725880518f8752338e24dbf4b08a2c579a6893c6f68f82e3c
         | 
    
        data/README.md
    CHANGED
    
    | @@ -56,6 +56,10 @@ allow_comments: false, | |
| 56 56 | 
             
            # "<!DOCTYPE html>" when sanitizing a document.
         | 
| 57 57 | 
             
            allow_doctype: false,
         | 
| 58 58 |  | 
| 59 | 
            +
            # HTML elements to allow. By default, no elements are allowed (which means
         | 
| 60 | 
            +
            # that all HTML will be stripped).
         | 
| 61 | 
            +
            elements: ["a", "b", "img", ],
         | 
| 62 | 
            +
             | 
| 59 63 | 
             
            # HTML attributes to allow in specific elements. The key is the name of the element,
         | 
| 60 64 | 
             
            # and the value is an array of allowed attributes. By default, no attributes
         | 
| 61 65 | 
             
            # are allowed.
         | 
| @@ -64,14 +68,10 @@ attributes: { | |
| 64 68 | 
             
                "img" => ["src"],
         | 
| 65 69 | 
             
            },
         | 
| 66 70 |  | 
| 67 | 
            -
            # HTML elements to allow. By default, no elements are allowed (which means
         | 
| 68 | 
            -
            # that all HTML will be stripped).
         | 
| 69 | 
            -
            elements: ["a", "b", "img", ],
         | 
| 70 | 
            -
             | 
| 71 71 | 
             
            # URL handling protocols to allow in specific attributes. By default, no
         | 
| 72 72 | 
             
            # protocols are allowed. Use :relative in place of a protocol if you want
         | 
| 73 73 | 
             
            # to allow relative URLs sans protocol.
         | 
| 74 | 
            -
             | 
| 74 | 
            +
            protocols: {
         | 
| 75 75 | 
             
                "a" => { "href" => ["http", "https", "mailto", :relative] },
         | 
| 76 76 | 
             
                "img" => { "href" => ["http", "https"] },
         | 
| 77 77 | 
             
            },
         | 
| @@ -91,7 +91,7 @@ The real power in Selma comes in its use of handlers. A handler is simply an obj | |
| 91 91 |  | 
| 92 92 | 
             
            - `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
         | 
| 93 93 | 
             
            - `handle_element`, a method that's call on each matched element
         | 
| 94 | 
            -
            - ` | 
| 94 | 
            +
            - `handle_text_chunk`, a method that's called on each matched text node; this MUST return a string
         | 
| 95 95 |  | 
| 96 96 | 
             
            Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
         | 
| 97 97 |  | 
| @@ -118,7 +118,7 @@ rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new]) | |
| 118 118 | 
             
            The `Selma::Selector` object has three possible kwargs:
         | 
| 119 119 |  | 
| 120 120 | 
             
            - `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
         | 
| 121 | 
            -
            - `match_text_within`: any element which matches this CSS rule will be passed on to ` | 
| 121 | 
            +
            - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text_chunk`
         | 
| 122 122 | 
             
            - `ignore_text_within`: this is an array of element names whose text contents will be ignored
         | 
| 123 123 |  | 
| 124 124 | 
             
            You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
         | 
| @@ -132,7 +132,7 @@ class MatchText | |
| 132 132 | 
             
                SELECTOR
         | 
| 133 133 | 
             
              end
         | 
| 134 134 |  | 
| 135 | 
            -
              def  | 
| 135 | 
            +
              def handle_text_chunk(text)
         | 
| 136 136 | 
             
                string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
         | 
| 137 137 | 
             
              end
         | 
| 138 138 | 
             
            end
         | 
| @@ -150,8 +150,9 @@ The `element` argument in `handle_element` has the following methods: | |
| 150 150 | 
             
            - `remove_attribute`: remove an attribute
         | 
| 151 151 | 
             
            - `attributes`: list all the attributes
         | 
| 152 152 | 
             
            - `ancestors`: list all the ancestors
         | 
| 153 | 
            -
            - `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
         | 
| 154 | 
            -
            - ` | 
| 153 | 
            +
            - `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
         | 
| 154 | 
            +
            - `before(content, as: content_type)`: Inserts `content` before the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
         | 
| 155 | 
            +
            - `after(content, as: content_type)`: Inserts `content` after the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
         | 
| 155 156 | 
             
            - `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
         | 
| 156 157 |  | 
| 157 158 | 
             
            ## Benchmarks
         | 
    
        data/ext/selma/Cargo.toml
    CHANGED
    
    
| @@ -1,8 +1,6 @@ | |
| 1 | 
            -
            use std::borrow::Cow;
         | 
| 2 | 
            -
             | 
| 3 1 | 
             
            use crate::native_ref_wrap::NativeRefWrap;
         | 
| 4 | 
            -
            use lol_html::html_content:: | 
| 5 | 
            -
            use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString,  | 
| 2 | 
            +
            use lol_html::html_content::Element;
         | 
| 3 | 
            +
            use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
         | 
| 6 4 |  | 
| 7 5 | 
             
            struct HTMLElement {
         | 
| 8 6 | 
             
                element: NativeRefWrap<Element<'static, 'static>>,
         | 
| @@ -38,6 +36,48 @@ impl SelmaHTMLElement { | |
| 38 36 | 
             
                    }
         | 
| 39 37 | 
             
                }
         | 
| 40 38 |  | 
| 39 | 
            +
                fn set_tag_name(&self, name: String) -> Result<(), Error> {
         | 
| 40 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    if let Ok(element) = binding.element.get_mut() {
         | 
| 43 | 
            +
                        match element.set_tag_name(&name) {
         | 
| 44 | 
            +
                            Ok(_) => Ok(()),
         | 
| 45 | 
            +
                            Err(err) => Err(Error::new(exception::runtime_error(), format!("{err:?}"))),
         | 
| 46 | 
            +
                        }
         | 
| 47 | 
            +
                    } else {
         | 
| 48 | 
            +
                        Err(Error::new(
         | 
| 49 | 
            +
                            exception::runtime_error(),
         | 
| 50 | 
            +
                            "`set_tag_name` is not available",
         | 
| 51 | 
            +
                        ))
         | 
| 52 | 
            +
                    }
         | 
| 53 | 
            +
                }
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                fn is_self_closing(&self) -> Result<bool, Error> {
         | 
| 56 | 
            +
                    let binding = self.0.borrow();
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    if let Ok(e) = binding.element.get() {
         | 
| 59 | 
            +
                        Ok(e.is_self_closing())
         | 
| 60 | 
            +
                    } else {
         | 
| 61 | 
            +
                        Err(Error::new(
         | 
| 62 | 
            +
                            exception::runtime_error(),
         | 
| 63 | 
            +
                            "`is_self_closing` is not available",
         | 
| 64 | 
            +
                        ))
         | 
| 65 | 
            +
                    }
         | 
| 66 | 
            +
                }
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                fn has_attribute(&self, attr: String) -> Result<bool, Error> {
         | 
| 69 | 
            +
                    let binding = self.0.borrow();
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    if let Ok(e) = binding.element.get() {
         | 
| 72 | 
            +
                        Ok(e.has_attribute(&attr))
         | 
| 73 | 
            +
                    } else {
         | 
| 74 | 
            +
                        Err(Error::new(
         | 
| 75 | 
            +
                            exception::runtime_error(),
         | 
| 76 | 
            +
                            "`is_self_closing` is not available",
         | 
| 77 | 
            +
                        ))
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
                }
         | 
| 80 | 
            +
             | 
| 41 81 | 
             
                fn get_attribute(&self, attr: String) -> Option<String> {
         | 
| 42 82 | 
             
                    let binding = self.0.borrow();
         | 
| 43 83 | 
             
                    let element = binding.element.get();
         | 
| @@ -106,89 +146,108 @@ impl SelmaHTMLElement { | |
| 106 146 | 
             
                    Ok(array)
         | 
| 107 147 | 
             
                }
         | 
| 108 148 |  | 
| 109 | 
            -
                fn  | 
| 149 | 
            +
                fn before(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 110 150 | 
             
                    let mut binding = self.0.borrow_mut();
         | 
| 111 151 | 
             
                    let element = binding.element.get_mut().unwrap();
         | 
| 112 152 |  | 
| 113 | 
            -
                    let text_str =  | 
| 153 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 154 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 155 | 
            +
                        Err(err) => return Err(err),
         | 
| 156 | 
            +
                    };
         | 
| 114 157 |  | 
| 115 | 
            -
                     | 
| 116 | 
            -
             | 
| 117 | 
            -
                    element.append(text_str, content_type);
         | 
| 158 | 
            +
                    element.before(&text_str, content_type);
         | 
| 118 159 |  | 
| 119 160 | 
             
                    Ok(())
         | 
| 120 161 | 
             
                }
         | 
| 121 162 |  | 
| 122 | 
            -
                fn  | 
| 123 | 
            -
                    &self,
         | 
| 124 | 
            -
                    start_text: String,
         | 
| 125 | 
            -
                    end_text: String,
         | 
| 126 | 
            -
                    content_type: Symbol,
         | 
| 127 | 
            -
                ) -> Result<(), Error> {
         | 
| 163 | 
            +
                fn after(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 128 164 | 
             
                    let mut binding = self.0.borrow_mut();
         | 
| 129 165 | 
             
                    let element = binding.element.get_mut().unwrap();
         | 
| 130 166 |  | 
| 131 | 
            -
                    let  | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
                     | 
| 167 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 168 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 169 | 
            +
                        Err(err) => return Err(err),
         | 
| 170 | 
            +
                    };
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    element.after(&text_str, content_type);
         | 
| 135 173 |  | 
| 136 174 | 
             
                    Ok(())
         | 
| 137 175 | 
             
                }
         | 
| 138 176 |  | 
| 139 | 
            -
                fn  | 
| 177 | 
            +
                fn prepend(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 140 178 | 
             
                    let mut binding = self.0.borrow_mut();
         | 
| 141 179 | 
             
                    let element = binding.element.get_mut().unwrap();
         | 
| 142 180 |  | 
| 143 | 
            -
                    let text_str =  | 
| 181 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 182 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 183 | 
            +
                        Err(err) => return Err(err),
         | 
| 184 | 
            +
                    };
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                    element.prepend(&text_str, content_type);
         | 
| 144 187 |  | 
| 145 | 
            -
                     | 
| 188 | 
            +
                    Ok(())
         | 
| 189 | 
            +
                }
         | 
| 146 190 |  | 
| 147 | 
            -
             | 
| 191 | 
            +
                fn append(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 192 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 193 | 
            +
                    let element = binding.element.get_mut().unwrap();
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 196 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 197 | 
            +
                        Err(err) => return Err(err),
         | 
| 198 | 
            +
                    };
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                    element.append(&text_str, content_type);
         | 
| 148 201 |  | 
| 149 202 | 
             
                    Ok(())
         | 
| 150 203 | 
             
                }
         | 
| 151 204 |  | 
| 152 | 
            -
                fn  | 
| 153 | 
            -
                     | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
                            exception::runtime_error(),
         | 
| 165 | 
            -
                            format!("Could not unwrap symbol: {err:?}"),
         | 
| 166 | 
            -
                        ))
         | 
| 167 | 
            -
                        .unwrap(),
         | 
| 168 | 
            -
                    }
         | 
| 205 | 
            +
                fn set_inner_content(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 206 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 207 | 
            +
                    let element = binding.element.get_mut().unwrap();
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                    let (inner_content, content_type) = match crate::scan_text_args(args) {
         | 
| 210 | 
            +
                        Ok((inner_content, content_type)) => (inner_content, content_type),
         | 
| 211 | 
            +
                        Err(err) => return Err(err),
         | 
| 212 | 
            +
                    };
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                    element.set_inner_content(&inner_content, content_type);
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                    Ok(())
         | 
| 169 217 | 
             
                }
         | 
| 170 218 | 
             
            }
         | 
| 171 219 |  | 
| 172 220 | 
             
            pub fn init(c_html: RClass) -> Result<(), Error> {
         | 
| 173 221 | 
             
                let c_element = c_html
         | 
| 174 222 | 
             
                    .define_class("Element", Default::default())
         | 
| 175 | 
            -
                    .expect("cannot find class Selma::Element");
         | 
| 223 | 
            +
                    .expect("cannot find class Selma::HTML::Element");
         | 
| 176 224 |  | 
| 177 225 | 
             
                c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
         | 
| 226 | 
            +
                c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
         | 
| 227 | 
            +
                c_element.define_method(
         | 
| 228 | 
            +
                    "self_closing?",
         | 
| 229 | 
            +
                    method!(SelmaHTMLElement::is_self_closing, 0),
         | 
| 230 | 
            +
                )?;
         | 
| 178 231 | 
             
                c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
         | 
| 179 232 | 
             
                c_element.define_method("[]=", method!(SelmaHTMLElement::set_attribute, 2))?;
         | 
| 180 233 | 
             
                c_element.define_method(
         | 
| 181 234 | 
             
                    "remove_attribute",
         | 
| 182 235 | 
             
                    method!(SelmaHTMLElement::remove_attribute, 1),
         | 
| 183 236 | 
             
                )?;
         | 
| 237 | 
            +
                c_element.define_method(
         | 
| 238 | 
            +
                    "has_attribute?",
         | 
| 239 | 
            +
                    method!(SelmaHTMLElement::has_attribute, 1),
         | 
| 240 | 
            +
                )?;
         | 
| 184 241 | 
             
                c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
         | 
| 185 242 | 
             
                c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
         | 
| 186 243 |  | 
| 187 | 
            -
                c_element.define_method(" | 
| 188 | 
            -
                c_element.define_method(" | 
| 244 | 
            +
                c_element.define_method("before", method!(SelmaHTMLElement::before, -1))?;
         | 
| 245 | 
            +
                c_element.define_method("after", method!(SelmaHTMLElement::after, -1))?;
         | 
| 246 | 
            +
                c_element.define_method("prepend", method!(SelmaHTMLElement::prepend, -1))?;
         | 
| 247 | 
            +
                c_element.define_method("append", method!(SelmaHTMLElement::append, -1))?;
         | 
| 189 248 | 
             
                c_element.define_method(
         | 
| 190 249 | 
             
                    "set_inner_content",
         | 
| 191 | 
            -
                    method!(SelmaHTMLElement::set_inner_content,  | 
| 250 | 
            +
                    method!(SelmaHTMLElement::set_inner_content, -1),
         | 
| 192 251 | 
             
                )?;
         | 
| 193 252 |  | 
| 194 253 | 
             
                Ok(())
         | 
| @@ -6,7 +6,7 @@ struct HTMLEndTag { | |
| 6 6 | 
             
                end_tag: NativeRefWrap<EndTag<'static>>,
         | 
| 7 7 | 
             
            }
         | 
| 8 8 |  | 
| 9 | 
            -
            #[magnus::wrap(class = "Selma::HTML:: | 
| 9 | 
            +
            #[magnus::wrap(class = "Selma::HTML::EndTag")]
         | 
| 10 10 | 
             
            pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
         | 
| 11 11 |  | 
| 12 12 | 
             
            /// SAFETY: This is safe because we only access this data when the GVL is held.
         | 
| @@ -27,7 +27,7 @@ impl SelmaHTMLEndTag { | |
| 27 27 | 
             
            pub fn init(c_html: RClass) -> Result<(), Error> {
         | 
| 28 28 | 
             
                let c_end_tag = c_html
         | 
| 29 29 | 
             
                    .define_class("EndTag", Default::default())
         | 
| 30 | 
            -
                    .expect("cannot find class Selma::EndTag");
         | 
| 30 | 
            +
                    .expect("cannot find class Selma::HTML::EndTag");
         | 
| 31 31 |  | 
| 32 32 | 
             
                c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
         | 
| 33 33 |  | 
| @@ -0,0 +1,113 @@ | |
| 1 | 
            +
            use crate::native_ref_wrap::NativeRefWrap;
         | 
| 2 | 
            +
            use lol_html::html_content::{TextChunk, TextType};
         | 
| 3 | 
            +
            use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            struct HTMLTextChunk {
         | 
| 6 | 
            +
                text_chunk: NativeRefWrap<TextChunk<'static>>,
         | 
| 7 | 
            +
            }
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            #[magnus::wrap(class = "Selma::HTML::TextChunk")]
         | 
| 10 | 
            +
            pub struct SelmaHTMLTextChunk(std::cell::RefCell<HTMLTextChunk>);
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            /// SAFETY: This is safe because we only access this data when the GVL is held.
         | 
| 13 | 
            +
            unsafe impl Send for SelmaHTMLTextChunk {}
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            impl SelmaHTMLTextChunk {
         | 
| 16 | 
            +
                pub fn new(text_chunk: &mut TextChunk) -> Self {
         | 
| 17 | 
            +
                    let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(text_chunk);
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                    Self(std::cell::RefCell::new(HTMLTextChunk {
         | 
| 20 | 
            +
                        text_chunk: ref_wrap,
         | 
| 21 | 
            +
                    }))
         | 
| 22 | 
            +
                }
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                fn to_s(&self) -> Result<String, Error> {
         | 
| 25 | 
            +
                    let binding = self.0.borrow();
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    if let Ok(tc) = binding.text_chunk.get() {
         | 
| 28 | 
            +
                        Ok(tc.as_str().to_string())
         | 
| 29 | 
            +
                    } else {
         | 
| 30 | 
            +
                        Err(Error::new(
         | 
| 31 | 
            +
                            exception::runtime_error(),
         | 
| 32 | 
            +
                            "`to_s` is not available",
         | 
| 33 | 
            +
                        ))
         | 
| 34 | 
            +
                    }
         | 
| 35 | 
            +
                }
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                fn text_type(&self) -> Result<Symbol, Error> {
         | 
| 38 | 
            +
                    let binding = self.0.borrow();
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                    if let Ok(tc) = binding.text_chunk.get() {
         | 
| 41 | 
            +
                        match tc.text_type() {
         | 
| 42 | 
            +
                            TextType::Data => Ok(Symbol::from("data")),
         | 
| 43 | 
            +
                            TextType::PlainText => Ok(Symbol::from("plain_text")),
         | 
| 44 | 
            +
                            TextType::RawText => Ok(Symbol::from("raw_text")),
         | 
| 45 | 
            +
                            TextType::ScriptData => Ok(Symbol::from("script")),
         | 
| 46 | 
            +
                            TextType::RCData => Ok(Symbol::from("rc_data")),
         | 
| 47 | 
            +
                            TextType::CDataSection => Ok(Symbol::from("cdata_section")),
         | 
| 48 | 
            +
                        }
         | 
| 49 | 
            +
                    } else {
         | 
| 50 | 
            +
                        Err(Error::new(
         | 
| 51 | 
            +
                            exception::runtime_error(),
         | 
| 52 | 
            +
                            "`text_type` is not available",
         | 
| 53 | 
            +
                        ))
         | 
| 54 | 
            +
                    }
         | 
| 55 | 
            +
                }
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                fn before(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 58 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 59 | 
            +
                    let text_chunk = binding.text_chunk.get_mut().unwrap();
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 62 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 63 | 
            +
                        Err(err) => return Err(err),
         | 
| 64 | 
            +
                    };
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    text_chunk.before(&text_str, content_type);
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                    Ok(())
         | 
| 69 | 
            +
                }
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                fn after(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 72 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 73 | 
            +
                    let text_chunk = binding.text_chunk.get_mut().unwrap();
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 76 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 77 | 
            +
                        Err(err) => return Err(err),
         | 
| 78 | 
            +
                    };
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    text_chunk.after(&text_str, content_type);
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    Ok(())
         | 
| 83 | 
            +
                }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                fn replace(&self, args: &[Value]) -> Result<(), Error> {
         | 
| 86 | 
            +
                    let mut binding = self.0.borrow_mut();
         | 
| 87 | 
            +
                    let text_chunk = binding.text_chunk.get_mut().unwrap();
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    let (text_str, content_type) = match crate::scan_text_args(args) {
         | 
| 90 | 
            +
                        Ok((text_str, content_type)) => (text_str, content_type),
         | 
| 91 | 
            +
                        Err(err) => return Err(err),
         | 
| 92 | 
            +
                    };
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    text_chunk.replace(&text_str, content_type);
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                    Ok(())
         | 
| 97 | 
            +
                }
         | 
| 98 | 
            +
            }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            pub fn init(c_html: RClass) -> Result<(), Error> {
         | 
| 101 | 
            +
                let c_text_chunk = c_html
         | 
| 102 | 
            +
                    .define_class("TextChunk", Default::default())
         | 
| 103 | 
            +
                    .expect("cannot find class Selma::HTML::TextChunk");
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
         | 
| 106 | 
            +
                c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
         | 
| 107 | 
            +
                c_text_chunk.define_method("text_type", method!(SelmaHTMLTextChunk::text_type, 0))?;
         | 
| 108 | 
            +
                c_text_chunk.define_method("before", method!(SelmaHTMLTextChunk::before, -1))?;
         | 
| 109 | 
            +
                c_text_chunk.define_method("after", method!(SelmaHTMLTextChunk::after, -1))?;
         | 
| 110 | 
            +
                c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                Ok(())
         | 
| 113 | 
            +
            }
         | 
    
        data/ext/selma/src/html.rs
    CHANGED
    
    | @@ -9,9 +9,11 @@ pub fn init(m_selma: RModule) -> Result<(), Error> { | |
| 9 9 |  | 
| 10 10 | 
             
                element::init(c_html).expect("cannot define Selma::HTML::Element class");
         | 
| 11 11 | 
             
                end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
         | 
| 12 | 
            +
                text_chunk::init(c_html).expect("cannot define Selma::HTML::TextChunk class");
         | 
| 12 13 |  | 
| 13 14 | 
             
                Ok(())
         | 
| 14 15 | 
             
            }
         | 
| 15 16 |  | 
| 16 17 | 
             
            pub mod element;
         | 
| 17 18 | 
             
            pub mod end_tag;
         | 
| 19 | 
            +
            pub mod text_chunk;
         | 
    
        data/ext/selma/src/lib.rs
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            extern crate core;
         | 
| 2 2 |  | 
| 3 | 
            -
            use  | 
| 3 | 
            +
            use lol_html::html_content::ContentType;
         | 
| 4 | 
            +
            use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
         | 
| 4 5 |  | 
| 5 6 | 
             
            pub mod html;
         | 
| 6 7 | 
             
            pub mod native_ref_wrap;
         | 
| @@ -10,6 +11,32 @@ pub mod selector; | |
| 10 11 | 
             
            pub mod tags;
         | 
| 11 12 | 
             
            pub mod wrapped_struct;
         | 
| 12 13 |  | 
| 14 | 
            +
            #[allow(clippy::let_unit_value)]
         | 
| 15 | 
            +
            fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
         | 
| 16 | 
            +
                let args = scan_args::scan_args(args)?;
         | 
| 17 | 
            +
                let (text,): (String,) = args.required;
         | 
| 18 | 
            +
                let _: () = args.optional;
         | 
| 19 | 
            +
                let _: () = args.splat;
         | 
| 20 | 
            +
                let _: () = args.trailing;
         | 
| 21 | 
            +
                let _: () = args.block;
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
         | 
| 24 | 
            +
                let as_sym = kwargs.required.0;
         | 
| 25 | 
            +
                let as_sym_str = as_sym.name().unwrap();
         | 
| 26 | 
            +
                let content_type = if as_sym_str == "text" {
         | 
| 27 | 
            +
                    ContentType::Text
         | 
| 28 | 
            +
                } else if as_sym_str == "html" {
         | 
| 29 | 
            +
                    ContentType::Html
         | 
| 30 | 
            +
                } else {
         | 
| 31 | 
            +
                    return Err(Error::new(
         | 
| 32 | 
            +
                        exception::runtime_error(),
         | 
| 33 | 
            +
                        format!("unknown symbol `{as_sym_str:?}`"),
         | 
| 34 | 
            +
                    ));
         | 
| 35 | 
            +
                };
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                Ok((text, content_type))
         | 
| 38 | 
            +
            }
         | 
| 39 | 
            +
             | 
| 13 40 | 
             
            #[magnus::init]
         | 
| 14 41 | 
             
            fn init() -> Result<(), Error> {
         | 
| 15 42 | 
             
                let m_selma = define_module("Selma").expect("cannot define ::Selma module");
         | 
    
        data/ext/selma/src/rewriter.rs
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            use lol_html::{
         | 
| 2 2 | 
             
                doc_comments, doctype, element,
         | 
| 3 | 
            -
                html_content::{ | 
| 3 | 
            +
                html_content::{Element, EndTag, TextChunk},
         | 
| 4 4 | 
             
                text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
         | 
| 5 5 | 
             
            };
         | 
| 6 6 | 
             
            use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
         | 
| @@ -8,7 +8,7 @@ use magnus::{exception, function, method, scan_args, Module, Object, RArray, RMo | |
| 8 8 | 
             
            use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
         | 
| 9 9 |  | 
| 10 10 | 
             
            use crate::{
         | 
| 11 | 
            -
                html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
         | 
| 11 | 
            +
                html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
         | 
| 12 12 | 
             
                sanitizer::SelmaSanitizer,
         | 
| 13 13 | 
             
                selector::SelmaSelector,
         | 
| 14 14 | 
             
                tags::Tag,
         | 
| @@ -43,7 +43,7 @@ unsafe impl Send for SelmaRewriter {} | |
| 43 43 | 
             
            impl SelmaRewriter {
         | 
| 44 44 | 
             
                const SELMA_ON_END_TAG: &str = "on_end_tag";
         | 
| 45 45 | 
             
                const SELMA_HANDLE_ELEMENT: &str = "handle_element";
         | 
| 46 | 
            -
                const  | 
| 46 | 
            +
                const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
         | 
| 47 47 |  | 
| 48 48 | 
             
                /// @yard
         | 
| 49 49 | 
             
                /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
         | 
| @@ -145,7 +145,7 @@ impl SelmaRewriter { | |
| 145 145 | 
             
                    let _: () = args.trailing;
         | 
| 146 146 | 
             
                    let _: () = args.block;
         | 
| 147 147 |  | 
| 148 | 
            -
                    let  | 
| 148 | 
            +
                    let kwargs = scan_args::get_kwargs::<
         | 
| 149 149 | 
             
                        _,
         | 
| 150 150 | 
             
                        (),
         | 
| 151 151 | 
             
                        (
         | 
| @@ -154,7 +154,7 @@ impl SelmaRewriter { | |
| 154 154 | 
             
                        ),
         | 
| 155 155 | 
             
                        (),
         | 
| 156 156 | 
             
                    >(args.keywords, &[], &["sanitizer", "handlers"])?;
         | 
| 157 | 
            -
                    let (rb_sanitizer, rb_handlers) =  | 
| 157 | 
            +
                    let (rb_sanitizer, rb_handlers) = kwargs.optional;
         | 
| 158 158 |  | 
| 159 159 | 
             
                    Ok((rb_sanitizer, rb_handlers))
         | 
| 160 160 | 
             
                }
         | 
| @@ -162,26 +162,22 @@ impl SelmaRewriter { | |
| 162 162 | 
             
                /// Perform HTML rewrite sequence.
         | 
| 163 163 | 
             
                fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
         | 
| 164 164 | 
             
                    let sanitized_html = match &self.0.borrow().sanitizer {
         | 
| 165 | 
            -
                        None => html,
         | 
| 165 | 
            +
                        None => Ok(html),
         | 
| 166 166 | 
             
                        Some(sanitizer) => {
         | 
| 167 | 
            -
                             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
                             | 
| 171 | 
            -
                            let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
         | 
| 167 | 
            +
                            let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
         | 
| 168 | 
            +
                                Ok(sanitized_html) => sanitized_html,
         | 
| 169 | 
            +
                                Err(err) => return Err(err),
         | 
| 170 | 
            +
                            };
         | 
| 172 171 |  | 
| 173 | 
            -
                            String::from_utf8(sanitized_html) | 
| 172 | 
            +
                            String::from_utf8(sanitized_html)
         | 
| 174 173 | 
             
                        }
         | 
| 175 174 | 
             
                    };
         | 
| 176 175 | 
             
                    let binding = self.0.borrow_mut();
         | 
| 177 176 | 
             
                    let handlers = &binding.handlers;
         | 
| 178 177 |  | 
| 179 | 
            -
                    match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
         | 
| 178 | 
            +
                    match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
         | 
| 180 179 | 
             
                        Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
         | 
| 181 | 
            -
                        Err(err) => Err( | 
| 182 | 
            -
                            exception::runtime_error(),
         | 
| 183 | 
            -
                            format!("{err:?}"),
         | 
| 184 | 
            -
                        )),
         | 
| 180 | 
            +
                        Err(err) => Err(err),
         | 
| 185 181 | 
             
                    }
         | 
| 186 182 | 
             
                }
         | 
| 187 183 |  | 
| @@ -212,9 +208,10 @@ impl SelmaRewriter { | |
| 212 208 | 
             
                                    if el.removed() {
         | 
| 213 209 | 
             
                                        return Ok(());
         | 
| 214 210 | 
             
                                    }
         | 
| 215 | 
            -
                                    sanitizer.sanitize_attributes(el) | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 211 | 
            +
                                    match sanitizer.sanitize_attributes(el) {
         | 
| 212 | 
            +
                                        Ok(_) => Ok(()),
         | 
| 213 | 
            +
                                        Err(err) => Err(err.to_string().into()),
         | 
| 214 | 
            +
                                    }
         | 
| 218 215 | 
             
                                })],
         | 
| 219 216 | 
             
                                // TODO: allow for MemorySettings to be defined
         | 
| 220 217 | 
             
                                ..Settings::default()
         | 
| @@ -341,7 +338,7 @@ impl SelmaRewriter { | |
| 341 338 | 
             
                                let mut stack = closure_element_stack.as_ref().borrow_mut();
         | 
| 342 339 | 
             
                                stack.pop();
         | 
| 343 340 | 
             
                                Ok(())
         | 
| 344 | 
            -
                            }) | 
| 341 | 
            +
                            })?;
         | 
| 345 342 | 
             
                            Ok(())
         | 
| 346 343 | 
             
                        }));
         | 
| 347 344 | 
             
                    });
         | 
| @@ -375,13 +372,14 @@ impl SelmaRewriter { | |
| 375 372 | 
             
                ) -> Result<(), magnus::Error> {
         | 
| 376 373 | 
             
                    // if `on_end_tag` function is defined, call it
         | 
| 377 374 | 
             
                    if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
         | 
| 375 | 
            +
                        // TODO: error here is an "EndTagError"
         | 
| 378 376 | 
             
                        element.on_end_tag(move |end_tag| {
         | 
| 379 377 | 
             
                            let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
         | 
| 380 378 |  | 
| 381 | 
            -
                            rb_handler
         | 
| 382 | 
            -
                                 | 
| 383 | 
            -
                                . | 
| 384 | 
            -
                             | 
| 379 | 
            +
                            match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
         | 
| 380 | 
            +
                                Ok(_) => Ok(()),
         | 
| 381 | 
            +
                                Err(err) => Err(err.to_string().into()),
         | 
| 382 | 
            +
                            }
         | 
| 385 383 | 
             
                        });
         | 
| 386 384 | 
             
                    }
         | 
| 387 385 |  | 
| @@ -390,40 +388,30 @@ impl SelmaRewriter { | |
| 390 388 | 
             
                        rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
         | 
| 391 389 | 
             
                    match rb_result {
         | 
| 392 390 | 
             
                        Ok(_) => Ok(()),
         | 
| 393 | 
            -
                        Err(err) => Err( | 
| 394 | 
            -
                            exception::runtime_error(),
         | 
| 395 | 
            -
                            format!("{err:?}"),
         | 
| 396 | 
            -
                        )),
         | 
| 391 | 
            +
                        Err(err) => Err(err),
         | 
| 397 392 | 
             
                    }
         | 
| 398 393 | 
             
                }
         | 
| 399 394 |  | 
| 400 | 
            -
                fn process_text_handlers( | 
| 401 | 
            -
                     | 
| 402 | 
            -
                     | 
| 395 | 
            +
                fn process_text_handlers(
         | 
| 396 | 
            +
                    rb_handler: Value,
         | 
| 397 | 
            +
                    text_chunk: &mut TextChunk,
         | 
| 398 | 
            +
                ) -> Result<(), magnus::Error> {
         | 
| 399 | 
            +
                    // prevents missing `handle_text_chunk` function
         | 
| 400 | 
            +
                    let content = text_chunk.as_str();
         | 
| 403 401 |  | 
| 404 402 | 
             
                    // seems that sometimes lol-html returns blank text / EOLs?
         | 
| 405 403 | 
             
                    if content.is_empty() {
         | 
| 406 404 | 
             
                        return Ok(());
         | 
| 407 405 | 
             
                    }
         | 
| 408 406 |  | 
| 409 | 
            -
                    let  | 
| 410 | 
            -
             | 
| 411 | 
            -
             | 
| 412 | 
            -
                         | 
| 413 | 
            -
                            exception:: | 
| 414 | 
            -
                            format!(
         | 
| 415 | 
            -
             | 
| 416 | 
            -
                                Self::SELMA_HANDLE_TEXT,
         | 
| 417 | 
            -
                                rb_result.err().unwrap()
         | 
| 418 | 
            -
                            ),
         | 
| 419 | 
            -
                        ));
         | 
| 407 | 
            +
                    let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
         | 
| 408 | 
            +
                    match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
         | 
| 409 | 
            +
                        Ok(_) => Ok(()),
         | 
| 410 | 
            +
                        Err(err) => Err(magnus::Error::new(
         | 
| 411 | 
            +
                            exception::runtime_error(),
         | 
| 412 | 
            +
                            format!("{err:?}"),
         | 
| 413 | 
            +
                        )),
         | 
| 420 414 | 
             
                    }
         | 
| 421 | 
            -
             | 
| 422 | 
            -
                    let new_content = rb_result.unwrap();
         | 
| 423 | 
            -
                    // TODO: can this be an option?
         | 
| 424 | 
            -
                    text.replace(&new_content, ContentType::Html);
         | 
| 425 | 
            -
             | 
| 426 | 
            -
                    Ok(())
         | 
| 427 415 | 
             
                }
         | 
| 428 416 | 
             
            }
         | 
| 429 417 |  | 
    
        data/ext/selma/src/sanitizer.rs
    CHANGED
    
    | @@ -1,12 +1,10 @@ | |
| 1 | 
            -
            use std::{borrow::BorrowMut,  | 
| 1 | 
            +
            use std::{borrow::BorrowMut, collections::HashMap};
         | 
| 2 2 |  | 
| 3 | 
            -
            use lol_html:: | 
| 4 | 
            -
             | 
| 5 | 
            -
                 | 
| 6 | 
            -
                Value,
         | 
| 3 | 
            +
            use lol_html::{
         | 
| 4 | 
            +
                errors::AttributeNameError,
         | 
| 5 | 
            +
                html_content::{Comment, ContentType, Doctype, Element, EndTag},
         | 
| 7 6 | 
             
            };
         | 
| 8 | 
            -
             | 
| 9 | 
            -
            use crate::tags::Tag;
         | 
| 7 | 
            +
            use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
         | 
| 10 8 |  | 
| 11 9 | 
             
            #[derive(Clone, Debug)]
         | 
| 12 10 | 
             
            struct ElementSanitizer {
         | 
| @@ -16,9 +14,21 @@ struct ElementSanitizer { | |
| 16 14 | 
             
                protocol_sanitizers: HashMap<String, Vec<String>>,
         | 
| 17 15 | 
             
            }
         | 
| 18 16 |  | 
| 17 | 
            +
            impl Default for ElementSanitizer {
         | 
| 18 | 
            +
                fn default() -> Self {
         | 
| 19 | 
            +
                    ElementSanitizer {
         | 
| 20 | 
            +
                        allowed_attrs: vec![],
         | 
| 21 | 
            +
                        allowed_classes: vec![],
         | 
| 22 | 
            +
                        required_attrs: vec![],
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                        protocol_sanitizers: HashMap::new(),
         | 
| 25 | 
            +
                    }
         | 
| 26 | 
            +
                }
         | 
| 27 | 
            +
            }
         | 
| 28 | 
            +
             | 
| 19 29 | 
             
            #[derive(Clone, Debug)]
         | 
| 20 30 | 
             
            pub struct Sanitizer {
         | 
| 21 | 
            -
                flags: [u8; Tag::TAG_COUNT],
         | 
| 31 | 
            +
                flags: [u8; crate::tags::Tag::TAG_COUNT],
         | 
| 22 32 | 
             
                allowed_attrs: Vec<String>,
         | 
| 23 33 | 
             
                allowed_classes: Vec<String>,
         | 
| 24 34 | 
             
                element_sanitizers: HashMap<String, ElementSanitizer>,
         | 
| @@ -39,7 +49,7 @@ impl SelmaSanitizer { | |
| 39 49 | 
             
                const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
         | 
| 40 50 | 
             
                const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
         | 
| 41 51 |  | 
| 42 | 
            -
                pub fn new(arguments: &[Value]) -> Result<Self, Error> {
         | 
| 52 | 
            +
                pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
         | 
| 43 53 | 
             
                    let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
         | 
| 44 54 | 
             
                    let (opt_config,): (Option<RHash>,) = args.optional;
         | 
| 45 55 |  | 
| @@ -50,19 +60,16 @@ impl SelmaSanitizer { | |
| 50 60 | 
             
                    };
         | 
| 51 61 |  | 
| 52 62 | 
             
                    let mut element_sanitizers = HashMap::new();
         | 
| 53 | 
            -
                    Tag::html_tags().iter().for_each(|html_tag| {
         | 
| 54 | 
            -
                        let es = ElementSanitizer | 
| 55 | 
            -
             | 
| 56 | 
            -
                             | 
| 57 | 
            -
                             | 
| 58 | 
            -
             | 
| 59 | 
            -
                            protocol_sanitizers: HashMap::new(),
         | 
| 60 | 
            -
                        };
         | 
| 61 | 
            -
                        element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
         | 
| 63 | 
            +
                    crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
         | 
| 64 | 
            +
                        let es = ElementSanitizer::default();
         | 
| 65 | 
            +
                        element_sanitizers.insert(
         | 
| 66 | 
            +
                            crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
         | 
| 67 | 
            +
                            es,
         | 
| 68 | 
            +
                        );
         | 
| 62 69 | 
             
                    });
         | 
| 63 70 |  | 
| 64 71 | 
             
                    Ok(Self(std::cell::RefCell::new(Sanitizer {
         | 
| 65 | 
            -
                        flags: [0; Tag::TAG_COUNT],
         | 
| 72 | 
            +
                        flags: [0; crate::tags::Tag::TAG_COUNT],
         | 
| 66 73 | 
             
                        allowed_attrs: vec![],
         | 
| 67 74 | 
             
                        allowed_classes: vec![],
         | 
| 68 75 | 
             
                        element_sanitizers,
         | 
| @@ -74,7 +81,7 @@ impl SelmaSanitizer { | |
| 74 81 | 
             
                    })))
         | 
| 75 82 | 
             
                }
         | 
| 76 83 |  | 
| 77 | 
            -
                fn get_config(&self) -> Result<RHash, Error> {
         | 
| 84 | 
            +
                fn get_config(&self) -> Result<RHash, magnus::Error> {
         | 
| 78 85 | 
             
                    let binding = self.0.borrow();
         | 
| 79 86 |  | 
| 80 87 | 
             
                    Ok(binding.config)
         | 
| @@ -82,7 +89,7 @@ impl SelmaSanitizer { | |
| 82 89 |  | 
| 83 90 | 
             
                /// Toggle a sanitizer option on or off.
         | 
| 84 91 | 
             
                fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
         | 
| 85 | 
            -
                    let tag = Tag::tag_from_tag_name(tag_name.as_str());
         | 
| 92 | 
            +
                    let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
         | 
| 86 93 | 
             
                    if set {
         | 
| 87 94 | 
             
                        self.0.borrow_mut().flags[tag.index] |= flag;
         | 
| 88 95 | 
             
                    } else {
         | 
| @@ -93,13 +100,19 @@ impl SelmaSanitizer { | |
| 93 100 | 
             
                /// Toggles all sanitization options on or off.
         | 
| 94 101 | 
             
                fn set_all_flags(&self, flag: u8, set: bool) {
         | 
| 95 102 | 
             
                    if set {
         | 
| 96 | 
            -
                        Tag::html_tags() | 
| 97 | 
            -
                             | 
| 98 | 
            -
             | 
| 103 | 
            +
                        crate::tags::Tag::html_tags()
         | 
| 104 | 
            +
                            .iter()
         | 
| 105 | 
            +
                            .enumerate()
         | 
| 106 | 
            +
                            .for_each(|(iter, _)| {
         | 
| 107 | 
            +
                                self.0.borrow_mut().flags[iter] |= flag;
         | 
| 108 | 
            +
                            });
         | 
| 99 109 | 
             
                    } else {
         | 
| 100 | 
            -
                        Tag::html_tags() | 
| 101 | 
            -
                             | 
| 102 | 
            -
             | 
| 110 | 
            +
                        crate::tags::Tag::html_tags()
         | 
| 111 | 
            +
                            .iter()
         | 
| 112 | 
            +
                            .enumerate()
         | 
| 113 | 
            +
                            .for_each(|(iter, _)| {
         | 
| 114 | 
            +
                                self.0.borrow_mut().flags[iter] &= flag;
         | 
| 115 | 
            +
                            });
         | 
| 103 116 | 
             
                    }
         | 
| 104 117 | 
             
                }
         | 
| 105 118 |  | 
| @@ -111,8 +124,8 @@ impl SelmaSanitizer { | |
| 111 124 |  | 
| 112 125 | 
             
                pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
         | 
| 113 126 | 
             
                    if self.0.borrow().escape_tagfilter {
         | 
| 114 | 
            -
                        let tag = Tag::tag_from_element(e);
         | 
| 115 | 
            -
                        if Tag::is_tag_escapeworthy(tag) {
         | 
| 127 | 
            +
                        let tag = crate::tags::Tag::tag_from_element(e);
         | 
| 128 | 
            +
                        if crate::tags::Tag::is_tag_escapeworthy(tag) {
         | 
| 116 129 | 
             
                            e.remove();
         | 
| 117 130 | 
             
                            return true;
         | 
| 118 131 | 
             
                        }
         | 
| @@ -162,7 +175,8 @@ impl SelmaSanitizer { | |
| 162 175 | 
             
                        let allowed_attrs = &mut binding.allowed_attrs;
         | 
| 163 176 | 
             
                        Self::set_allowed(allowed_attrs, &attr_name, allow);
         | 
| 164 177 | 
             
                    } else {
         | 
| 165 | 
            -
                        let  | 
| 178 | 
            +
                        let element_sanitizers = &mut binding.element_sanitizers;
         | 
| 179 | 
            +
                        let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
         | 
| 166 180 |  | 
| 167 181 | 
             
                        element_sanitizer.allowed_attrs.push(attr_name);
         | 
| 168 182 | 
             
                    }
         | 
| @@ -176,7 +190,8 @@ impl SelmaSanitizer { | |
| 176 190 | 
             
                        let allowed_classes = &mut binding.allowed_classes;
         | 
| 177 191 | 
             
                        Self::set_allowed(allowed_classes, &class_name, allow);
         | 
| 178 192 | 
             
                    } else {
         | 
| 179 | 
            -
                        let  | 
| 193 | 
            +
                        let element_sanitizers = &mut binding.element_sanitizers;
         | 
| 194 | 
            +
                        let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
         | 
| 180 195 |  | 
| 181 196 | 
             
                        let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
         | 
| 182 197 | 
             
                        Self::set_allowed(allowed_classes, &class_name, allow)
         | 
| @@ -187,9 +202,10 @@ impl SelmaSanitizer { | |
| 187 202 | 
             
                fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
         | 
| 188 203 | 
             
                    let mut binding = self.0.borrow_mut();
         | 
| 189 204 |  | 
| 190 | 
            -
                    let  | 
| 205 | 
            +
                    let element_sanitizers = &mut binding.element_sanitizers;
         | 
| 206 | 
            +
                    let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
         | 
| 191 207 |  | 
| 192 | 
            -
                    let protocol_sanitizers = element_sanitizer.protocol_sanitizers.borrow_mut();
         | 
| 208 | 
            +
                    let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
         | 
| 193 209 |  | 
| 194 210 | 
             
                    for opt_allowed_protocol in allow_list.each() {
         | 
| 195 211 | 
             
                        let allowed_protocol = opt_allowed_protocol.unwrap();
         | 
| @@ -229,10 +245,16 @@ impl SelmaSanitizer { | |
| 229 245 | 
             
                    }
         | 
| 230 246 | 
             
                }
         | 
| 231 247 |  | 
| 232 | 
            -
                pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(),  | 
| 233 | 
            -
                    let  | 
| 234 | 
            -
                    let  | 
| 235 | 
            -
                    let element_sanitizer =  | 
| 248 | 
            +
                pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
         | 
| 249 | 
            +
                    let tag = crate::tags::Tag::tag_from_element(element);
         | 
| 250 | 
            +
                    let tag_name = &element.tag_name();
         | 
| 251 | 
            +
                    let element_sanitizer = {
         | 
| 252 | 
            +
                        let mut binding = self.0.borrow_mut();
         | 
| 253 | 
            +
                        let element_sanitizers = &mut binding.element_sanitizers;
         | 
| 254 | 
            +
                        Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
         | 
| 255 | 
            +
                    };
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                    let binding = self.0.borrow();
         | 
| 236 258 |  | 
| 237 259 | 
             
                    // FIXME: This is a hack to get around the fact that we can't borrow
         | 
| 238 260 | 
             
                    let attribute_map: HashMap<String, String> = element
         | 
| @@ -255,26 +277,30 @@ impl SelmaSanitizer { | |
| 255 277 | 
             
                        let x = escapist::unescape_html(trimmed.as_bytes());
         | 
| 256 278 | 
             
                        let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
         | 
| 257 279 |  | 
| 258 | 
            -
                         | 
| 280 | 
            +
                        let should_keep_attrubute = match Self::should_keep_attribute(
         | 
| 259 281 | 
             
                            &binding,
         | 
| 260 282 | 
             
                            element,
         | 
| 261 | 
            -
                            element_sanitizer,
         | 
| 283 | 
            +
                            &element_sanitizer,
         | 
| 262 284 | 
             
                            attr_name,
         | 
| 263 285 | 
             
                            &unescaped_attr_val,
         | 
| 264 286 | 
             
                        ) {
         | 
| 287 | 
            +
                            Ok(should_keep) => should_keep,
         | 
| 288 | 
            +
                            Err(e) => {
         | 
| 289 | 
            +
                                return Err(e);
         | 
| 290 | 
            +
                            }
         | 
| 291 | 
            +
                        };
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                        if !should_keep_attrubute {
         | 
| 265 294 | 
             
                            element.remove_attribute(attr_name);
         | 
| 266 295 | 
             
                        } else {
         | 
| 267 296 | 
             
                            // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
         | 
| 268 297 | 
             
                            // since output is always UTF-8.
         | 
| 269 | 
            -
                            if Tag::is_meta(tag) {
         | 
| 298 | 
            +
                            if crate::tags::Tag::is_meta(tag) {
         | 
| 270 299 | 
             
                                if attr_name == "charset" && unescaped_attr_val != "utf-8" {
         | 
| 271 300 | 
             
                                    match element.set_attribute(attr_name, "utf-8") {
         | 
| 272 301 | 
             
                                        Ok(_) => {}
         | 
| 273 | 
            -
                                        Err( | 
| 274 | 
            -
                                            return Err( | 
| 275 | 
            -
                                                exception::runtime_error(),
         | 
| 276 | 
            -
                                                format!("Unable to change {attr_name:?}"),
         | 
| 277 | 
            -
                                            ));
         | 
| 302 | 
            +
                                        Err(err) => {
         | 
| 303 | 
            +
                                            return Err(err);
         | 
| 278 304 | 
             
                                        }
         | 
| 279 305 | 
             
                                    }
         | 
| 280 306 | 
             
                                }
         | 
| @@ -282,13 +308,17 @@ impl SelmaSanitizer { | |
| 282 308 | 
             
                                let mut buf = String::new();
         | 
| 283 309 | 
             
                                // ...then, escape any special characters, for security
         | 
| 284 310 | 
             
                                if attr_name == "href" {
         | 
| 285 | 
            -
                                     | 
| 286 | 
            -
                                    escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
         | 
| 311 | 
            +
                                    escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
         | 
| 287 312 | 
             
                                } else {
         | 
| 288 | 
            -
                                    escapist::escape_html(&mut buf, unescaped_attr_val. | 
| 313 | 
            +
                                    escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
         | 
| 289 314 | 
             
                                };
         | 
| 290 315 |  | 
| 291 | 
            -
                                element.set_attribute(attr_name, &buf) | 
| 316 | 
            +
                                match element.set_attribute(attr_name, &buf) {
         | 
| 317 | 
            +
                                    Ok(_) => {}
         | 
| 318 | 
            +
                                    Err(err) => {
         | 
| 319 | 
            +
                                        return Err(err);
         | 
| 320 | 
            +
                                    }
         | 
| 321 | 
            +
                                }
         | 
| 292 322 | 
             
                            }
         | 
| 293 323 | 
             
                        }
         | 
| 294 324 | 
             
                    }
         | 
| @@ -308,12 +338,12 @@ impl SelmaSanitizer { | |
| 308 338 | 
             
                }
         | 
| 309 339 |  | 
| 310 340 | 
             
                fn should_keep_attribute(
         | 
| 311 | 
            -
                    binding: & | 
| 341 | 
            +
                    binding: &Sanitizer,
         | 
| 312 342 | 
             
                    element: &mut Element,
         | 
| 313 343 | 
             
                    element_sanitizer: &ElementSanitizer,
         | 
| 314 344 | 
             
                    attr_name: &String,
         | 
| 315 345 | 
             
                    attr_val: &String,
         | 
| 316 | 
            -
                ) -> bool {
         | 
| 346 | 
            +
                ) -> Result<bool, AttributeNameError> {
         | 
| 317 347 | 
             
                    let mut allowed: bool = false;
         | 
| 318 348 | 
             
                    let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
         | 
| 319 349 | 
             
                    let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
         | 
| @@ -327,7 +357,7 @@ impl SelmaSanitizer { | |
| 327 357 | 
             
                    }
         | 
| 328 358 |  | 
| 329 359 | 
             
                    if !allowed {
         | 
| 330 | 
            -
                        return false;
         | 
| 360 | 
            +
                        return Ok(false);
         | 
| 331 361 | 
             
                    }
         | 
| 332 362 |  | 
| 333 363 | 
             
                    let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
         | 
| @@ -335,32 +365,29 @@ impl SelmaSanitizer { | |
| 335 365 | 
             
                        None => {
         | 
| 336 366 | 
             
                            // has a protocol, but no sanitization list
         | 
| 337 367 | 
             
                            if !attr_val.is_empty() && Self::has_protocol(attr_val) {
         | 
| 338 | 
            -
                                return false;
         | 
| 368 | 
            +
                                return Ok(false);
         | 
| 339 369 | 
             
                            }
         | 
| 340 370 | 
             
                        }
         | 
| 341 371 | 
             
                        Some(protocol_sanitizer_values) => {
         | 
| 342 372 | 
             
                            if !attr_val.is_empty()
         | 
| 343 373 | 
             
                                && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
         | 
| 344 374 | 
             
                            {
         | 
| 345 | 
            -
                                return false;
         | 
| 375 | 
            +
                                return Ok(false);
         | 
| 346 376 | 
             
                            }
         | 
| 347 377 | 
             
                        }
         | 
| 348 378 | 
             
                    }
         | 
| 349 379 |  | 
| 350 | 
            -
                    if attr_name == "class"
         | 
| 351 | 
            -
                         | 
| 380 | 
            +
                    if attr_name == "class" {
         | 
| 381 | 
            +
                        return Self::sanitize_class_attribute(
         | 
| 352 382 | 
             
                            binding,
         | 
| 353 383 | 
             
                            element,
         | 
| 354 384 | 
             
                            element_sanitizer,
         | 
| 355 385 | 
             
                            attr_name,
         | 
| 356 386 | 
             
                            attr_val,
         | 
| 357 | 
            -
                        )
         | 
| 358 | 
            -
                        .unwrap()
         | 
| 359 | 
            -
                    {
         | 
| 360 | 
            -
                        return false;
         | 
| 387 | 
            +
                        );
         | 
| 361 388 | 
             
                    }
         | 
| 362 389 |  | 
| 363 | 
            -
                    true
         | 
| 390 | 
            +
                    Ok(true)
         | 
| 364 391 | 
             
                }
         | 
| 365 392 |  | 
| 366 393 | 
             
                fn has_protocol(attr_val: &str) -> bool {
         | 
| @@ -398,12 +425,12 @@ impl SelmaSanitizer { | |
| 398 425 | 
             
                }
         | 
| 399 426 |  | 
| 400 427 | 
             
                fn sanitize_class_attribute(
         | 
| 401 | 
            -
                    binding: & | 
| 428 | 
            +
                    binding: &Sanitizer,
         | 
| 402 429 | 
             
                    element: &mut Element,
         | 
| 403 430 | 
             
                    element_sanitizer: &ElementSanitizer,
         | 
| 404 431 | 
             
                    attr_name: &str,
         | 
| 405 432 | 
             
                    attr_val: &str,
         | 
| 406 | 
            -
                ) -> Result<bool,  | 
| 433 | 
            +
                ) -> Result<bool, lol_html::errors::AttributeNameError> {
         | 
| 407 434 | 
             
                    let allowed_global = &binding.allowed_classes;
         | 
| 408 435 |  | 
| 409 436 | 
             
                    let mut valid_classes: Vec<String> = vec![];
         | 
| @@ -431,28 +458,25 @@ impl SelmaSanitizer { | |
| 431 458 |  | 
| 432 459 | 
             
                    match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
         | 
| 433 460 | 
             
                        Ok(_) => Ok(true),
         | 
| 434 | 
            -
                        Err(err) => Err( | 
| 435 | 
            -
                            exception::runtime_error(),
         | 
| 436 | 
            -
                            format!("AttributeNameError: {err:?}"),
         | 
| 437 | 
            -
                        )),
         | 
| 461 | 
            +
                        Err(err) => Err(err),
         | 
| 438 462 | 
             
                    }
         | 
| 439 463 | 
             
                }
         | 
| 440 464 |  | 
| 441 465 | 
             
                pub fn allow_element(&self, element: &mut Element) -> bool {
         | 
| 442 | 
            -
                    let tag = Tag::tag_from_element(element);
         | 
| 466 | 
            +
                    let tag = crate::tags::Tag::tag_from_element(element);
         | 
| 443 467 | 
             
                    let flags: u8 = self.0.borrow().flags[tag.index];
         | 
| 444 468 |  | 
| 445 469 | 
             
                    (flags & Self::SELMA_SANITIZER_ALLOW) == 0
         | 
| 446 470 | 
             
                }
         | 
| 447 471 |  | 
| 448 472 | 
             
                pub fn try_remove_element(&self, element: &mut Element) -> bool {
         | 
| 449 | 
            -
                    let tag = Tag::tag_from_element(element);
         | 
| 473 | 
            +
                    let tag = crate::tags::Tag::tag_from_element(element);
         | 
| 450 474 | 
             
                    let flags: u8 = self.0.borrow().flags[tag.index];
         | 
| 451 475 |  | 
| 452 476 | 
             
                    let should_remove = !element.removed() && self.allow_element(element);
         | 
| 453 477 |  | 
| 454 478 | 
             
                    if should_remove {
         | 
| 455 | 
            -
                        if Tag::has_text_content(tag) {
         | 
| 479 | 
            +
                        if crate::tags::Tag::has_text_content(tag) {
         | 
| 456 480 | 
             
                            Self::remove_element(
         | 
| 457 481 | 
             
                                element,
         | 
| 458 482 | 
             
                                tag.self_closing,
         | 
| @@ -465,7 +489,7 @@ impl SelmaSanitizer { | |
| 465 489 | 
             
                        Self::check_if_end_tag_needs_removal(element);
         | 
| 466 490 | 
             
                    } else {
         | 
| 467 491 | 
             
                        // anything in <iframe> must be removed, if it's kept
         | 
| 468 | 
            -
                        if Tag::is_iframe(tag) {
         | 
| 492 | 
            +
                        if crate::tags::Tag::is_iframe(tag) {
         | 
| 469 493 | 
             
                            if self.0.borrow().flags[tag.index] != 0 {
         | 
| 470 494 | 
             
                                element.set_inner_content(" ", ContentType::Text);
         | 
| 471 495 | 
             
                            } else {
         | 
| @@ -497,14 +521,14 @@ impl SelmaSanitizer { | |
| 497 521 | 
             
                }
         | 
| 498 522 |  | 
| 499 523 | 
             
                pub fn force_remove_element(&self, element: &mut Element) {
         | 
| 500 | 
            -
                    let tag = Tag::tag_from_element(element);
         | 
| 524 | 
            +
                    let tag = crate::tags::Tag::tag_from_element(element);
         | 
| 501 525 | 
             
                    let self_closing = tag.self_closing;
         | 
| 502 526 | 
             
                    Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
         | 
| 503 527 | 
             
                    Self::check_if_end_tag_needs_removal(element);
         | 
| 504 528 | 
             
                }
         | 
| 505 529 |  | 
| 506 530 | 
             
                fn check_if_end_tag_needs_removal(element: &mut Element) {
         | 
| 507 | 
            -
                    if element.removed() && !Tag::tag_from_element(element).self_closing {
         | 
| 531 | 
            +
                    if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
         | 
| 508 532 | 
             
                        element
         | 
| 509 533 | 
             
                            .on_end_tag(move |end| {
         | 
| 510 534 | 
             
                                Self::remove_end_tag(end);
         | 
| @@ -519,21 +543,16 @@ impl SelmaSanitizer { | |
| 519 543 | 
             
                }
         | 
| 520 544 |  | 
| 521 545 | 
             
                fn get_element_sanitizer<'a>(
         | 
| 522 | 
            -
                     | 
| 523 | 
            -
                    element_name: &str,
         | 
| 524 | 
            -
                ) -> &'a ElementSanitizer {
         | 
| 525 | 
            -
                    binding.element_sanitizers.get(element_name).unwrap()
         | 
| 526 | 
            -
                }
         | 
| 527 | 
            -
             | 
| 528 | 
            -
                fn get_mut_element_sanitizer<'a>(
         | 
| 529 | 
            -
                    binding: &'a mut Sanitizer,
         | 
| 546 | 
            +
                    element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
         | 
| 530 547 | 
             
                    element_name: &str,
         | 
| 531 548 | 
             
                ) -> &'a mut ElementSanitizer {
         | 
| 532 | 
            -
                     | 
| 549 | 
            +
                    element_sanitizers
         | 
| 550 | 
            +
                        .entry(element_name.to_string())
         | 
| 551 | 
            +
                        .or_insert_with(ElementSanitizer::default)
         | 
| 533 552 | 
             
                }
         | 
| 534 553 | 
             
            }
         | 
| 535 554 |  | 
| 536 | 
            -
            pub fn init(m_selma: RModule) -> Result<(), Error> {
         | 
| 555 | 
            +
            pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
         | 
| 537 556 | 
             
                let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
         | 
| 538 557 |  | 
| 539 558 | 
             
                c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
         | 
    
        data/ext/selma/src/tags.rs
    CHANGED
    
    | @@ -192,14 +192,17 @@ impl Tag { | |
| 192 192 | 
             
                /// Is this tag something which needs to be removed?
         | 
| 193 193 | 
             
                pub fn is_tag_escapeworthy(tag: Tag) -> bool {
         | 
| 194 194 | 
             
                    tag.index == HTMLTag::TITLE as usize
         | 
| 195 | 
            -
                        || tag.index == HTMLTag::TEXTAREA as usize
         | 
| 196 | 
            -
                        || tag.index == HTMLTag::STYLE as usize
         | 
| 197 | 
            -
                        || tag.index == HTMLTag::XMP as usize
         | 
| 198 195 | 
             
                        || tag.index == HTMLTag::IFRAME as usize
         | 
| 196 | 
            +
                        || tag.index == HTMLTag::MATH as usize
         | 
| 199 197 | 
             
                        || tag.index == HTMLTag::NOEMBED as usize
         | 
| 200 198 | 
             
                        || tag.index == HTMLTag::NOFRAMES as usize
         | 
| 201 | 
            -
                        || tag.index == HTMLTag:: | 
| 199 | 
            +
                        || tag.index == HTMLTag::NOSCRIPT as usize
         | 
| 202 200 | 
             
                        || tag.index == HTMLTag::PLAINTEXT as usize
         | 
| 201 | 
            +
                        || tag.index == HTMLTag::SCRIPT as usize
         | 
| 202 | 
            +
                        || tag.index == HTMLTag::STYLE as usize
         | 
| 203 | 
            +
                        || tag.index == HTMLTag::SVG as usize
         | 
| 204 | 
            +
                        || tag.index == HTMLTag::TEXTAREA as usize
         | 
| 205 | 
            +
                        || tag.index == HTMLTag::XMP as usize
         | 
| 203 206 | 
             
                }
         | 
| 204 207 |  | 
| 205 208 | 
             
                pub const ESCAPEWORTHY_TAGS_CSS: &str =
         | 
    
        data/lib/selma/3.1/selma.so
    CHANGED
    
    | Binary file | 
| @@ -3,6 +3,10 @@ | |
| 3 3 | 
             
            module Selma
         | 
| 4 4 | 
             
              class Sanitizer
         | 
| 5 5 | 
             
                module Config
         | 
| 6 | 
            +
                  # although there are many more protocol types, eg., ftp, xmpp, etc.,
         | 
| 7 | 
            +
                  # these are the only ones that are allowed by default
         | 
| 8 | 
            +
                  VALID_PROTOCOLS = ["http", "https", "mailto", :relative]
         | 
| 9 | 
            +
             | 
| 6 10 | 
             
                  DEFAULT = freeze_config(
         | 
| 7 11 | 
             
                    # Whether or not to allow HTML comments. Allowing comments is strongly
         | 
| 8 12 | 
             
                    # discouraged, since IE allows script execution within conditional
         | 
    
        data/lib/selma/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: selma
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.5
         | 
| 5 5 | 
             
            platform: aarch64-linux
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Garen J. Torikian
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022-12- | 
| 11 | 
            +
            date: 2022-12-27 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rb_sys
         | 
| @@ -81,6 +81,7 @@ files: | |
| 81 81 | 
             
            - ext/selma/src/html.rs
         | 
| 82 82 | 
             
            - ext/selma/src/html/element.rs
         | 
| 83 83 | 
             
            - ext/selma/src/html/end_tag.rs
         | 
| 84 | 
            +
            - ext/selma/src/html/text_chunk.rs
         | 
| 84 85 | 
             
            - ext/selma/src/lib.rs
         | 
| 85 86 | 
             
            - ext/selma/src/native_ref_wrap.rs
         | 
| 86 87 | 
             
            - ext/selma/src/rewriter.rs
         |