selma 0.0.2-arm64-darwin → 0.0.3-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/selma/Cargo.toml +2 -2
- data/ext/selma/src/html/element.rs +6 -6
- data/ext/selma/src/native_ref_wrap.rs +3 -3
- data/ext/selma/src/rewriter.rs +15 -15
- data/ext/selma/src/sanitizer.rs +17 -7
- data/ext/selma/src/selector.rs +2 -5
- data/lib/selma/3.1/selma.bundle +0 -0
- data/lib/selma/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: f9939e73d5832b8d53b08f867bf218ab761211a6390f2da824889a7ec9d95516
         | 
| 4 | 
            +
              data.tar.gz: e605aa93a58224fef7dc76c6fe587903a57cc03339f68da3cfc7f801bfa0228c
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 71c3adb9c05a949eaf4079c72db7e5d1a2c8c1a3bbc20190dbc0dc62fc7d1105452c222ae0b4594694008efaa06bdb2ed57ae54927ed93d2b3716b2f742983f2
         | 
| 7 | 
            +
              data.tar.gz: 431c58a0824a3b711724e95809323009061ca750bf9fa9a70999f31e454b194d537edfefdd287aa887416e3c624a1e9e354b28251c6caa0fe2b144e9bb75687d
         | 
    
        data/ext/selma/Cargo.toml
    CHANGED
    
    | @@ -6,8 +6,8 @@ edition = "2021" | |
| 6 6 | 
             
            [dependencies]
         | 
| 7 7 | 
             
            enum-iterator = "1.2"
         | 
| 8 8 | 
             
            escapist = "0.0.1"
         | 
| 9 | 
            -
            magnus = " | 
| 10 | 
            -
            lol_html =  | 
| 9 | 
            +
            magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
         | 
| 10 | 
            +
            lol_html = "0.3"
         | 
| 11 11 |  | 
| 12 12 | 
             
            [lib]
         | 
| 13 13 | 
             
            name = "selma"
         | 
| @@ -51,7 +51,7 @@ impl SelmaHTMLElement { | |
| 51 51 | 
             
                            Ok(_) => Ok(value),
         | 
| 52 52 | 
             
                            Err(err) => Err(Error::new(
         | 
| 53 53 | 
             
                                exception::runtime_error(),
         | 
| 54 | 
            -
                                format!("AttributeNameError: {}" | 
| 54 | 
            +
                                format!("AttributeNameError: {err:?}"),
         | 
| 55 55 | 
             
                            )),
         | 
| 56 56 | 
             
                        }
         | 
| 57 57 | 
             
                    } else {
         | 
| @@ -81,7 +81,7 @@ impl SelmaHTMLElement { | |
| 81 81 | 
             
                                Ok(_) => {}
         | 
| 82 82 | 
             
                                Err(err) => Err(Error::new(
         | 
| 83 83 | 
             
                                    exception::runtime_error(),
         | 
| 84 | 
            -
                                    format!("AttributeNameError: {}" | 
| 84 | 
            +
                                    format!("AttributeNameError: {err:?}"),
         | 
| 85 85 | 
             
                                ))
         | 
| 86 86 | 
             
                                .unwrap(),
         | 
| 87 87 | 
             
                            });
         | 
| @@ -99,7 +99,7 @@ impl SelmaHTMLElement { | |
| 99 99 | 
             
                        .for_each(|ancestor| match array.push(RString::new(ancestor)) {
         | 
| 100 100 | 
             
                            Ok(_) => {}
         | 
| 101 101 | 
             
                            Err(err) => {
         | 
| 102 | 
            -
                                Err(Error::new(exception::runtime_error(), format!("{}" | 
| 102 | 
            +
                                Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
         | 
| 103 103 | 
             
                            }
         | 
| 104 104 | 
             
                        });
         | 
| 105 105 |  | 
| @@ -151,18 +151,18 @@ impl SelmaHTMLElement { | |
| 151 151 |  | 
| 152 152 | 
             
                fn find_content_type(content_type: Symbol) -> ContentType {
         | 
| 153 153 | 
             
                    match content_type.name() {
         | 
| 154 | 
            -
                        Ok(name) => match  | 
| 154 | 
            +
                        Ok(name) => match name {
         | 
| 155 155 | 
             
                            Cow::Borrowed("as_text") => ContentType::Text,
         | 
| 156 156 | 
             
                            Cow::Borrowed("as_html") => ContentType::Html,
         | 
| 157 157 | 
             
                            _ => Err(Error::new(
         | 
| 158 158 | 
             
                                exception::runtime_error(),
         | 
| 159 | 
            -
                                format!("unknown symbol `{}`" | 
| 159 | 
            +
                                format!("unknown symbol `{name:?}`"),
         | 
| 160 160 | 
             
                            ))
         | 
| 161 161 | 
             
                            .unwrap(),
         | 
| 162 162 | 
             
                        },
         | 
| 163 163 | 
             
                        Err(err) => Err(Error::new(
         | 
| 164 164 | 
             
                            exception::runtime_error(),
         | 
| 165 | 
            -
                            format!("Could not unwrap symbol"),
         | 
| 165 | 
            +
                            format!("Could not unwrap symbol: {err:?}"),
         | 
| 166 166 | 
             
                        ))
         | 
| 167 167 | 
             
                        .unwrap(),
         | 
| 168 168 | 
             
                    }
         | 
| @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            use std::{cell::Cell, marker::PhantomData,  | 
| 1 | 
            +
            use std::{cell::Cell, marker::PhantomData, rc::Rc};
         | 
| 2 2 |  | 
| 3 3 | 
             
            // NOTE: My Rust isn't good enough to know what any of this does,
         | 
| 4 4 | 
             
            // but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
         | 
| @@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> { | |
| 37 37 | 
             
            impl<R> NativeRefWrap<R> {
         | 
| 38 38 | 
             
                pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
         | 
| 39 39 | 
             
                    let wrap = NativeRefWrap {
         | 
| 40 | 
            -
                        inner_ptr:  | 
| 40 | 
            +
                        inner_ptr: inner as *const I as *mut R,
         | 
| 41 41 | 
             
                        poisoned: Rc::new(Cell::new(false)),
         | 
| 42 42 | 
             
                    };
         | 
| 43 43 |  | 
| @@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> { | |
| 48 48 |  | 
| 49 49 | 
             
                pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
         | 
| 50 50 | 
             
                    let wrap = NativeRefWrap {
         | 
| 51 | 
            -
                        inner_ptr:  | 
| 51 | 
            +
                        inner_ptr: inner as *mut I as *mut R,
         | 
| 52 52 | 
             
                        poisoned: Rc::new(Cell::new(false)),
         | 
| 53 53 | 
             
                    };
         | 
| 54 54 |  | 
    
        data/ext/selma/src/rewriter.rs
    CHANGED
    
    | @@ -1,5 +1,3 @@ | |
| 1 | 
            -
            use std::{borrow::Cow, cell::RefCell, rc::Rc};
         | 
| 2 | 
            -
             | 
| 3 1 | 
             
            use lol_html::{
         | 
| 4 2 | 
             
                doc_comments, doctype, element,
         | 
| 5 3 | 
             
                html_content::{ContentType, Element, EndTag, TextChunk},
         | 
| @@ -7,6 +5,8 @@ use lol_html::{ | |
| 7 5 | 
             
            };
         | 
| 8 6 | 
             
            use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
         | 
| 9 7 |  | 
| 8 | 
            +
            use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
         | 
| 9 | 
            +
             | 
| 10 10 | 
             
            use crate::{
         | 
| 11 11 | 
             
                html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
         | 
| 12 12 | 
             
                sanitizer::SelmaSanitizer,
         | 
| @@ -83,18 +83,18 @@ impl SelmaRewriter { | |
| 83 83 | 
             
                                    return Err(magnus::Error::new(
         | 
| 84 84 | 
             
                                        exception::no_method_error(),
         | 
| 85 85 | 
             
                                        format!(
         | 
| 86 | 
            -
                                            "Could not call #selector on {:?}; is this an object that defines it?",
         | 
| 87 | 
            -
             | 
| 86 | 
            +
                                            "Could not call #selector on {classname:?}; is this an object that defines it?",
         | 
| 87 | 
            +
             | 
| 88 88 | 
             
                                        ),
         | 
| 89 89 | 
             
                                    ));
         | 
| 90 90 | 
             
                                }
         | 
| 91 91 |  | 
| 92 92 | 
             
                                let rb_selector: WrappedStruct<SelmaSelector> =
         | 
| 93 93 | 
             
                                    match rb_handler.funcall("selector", ()) {
         | 
| 94 | 
            -
                                        Err( | 
| 94 | 
            +
                                        Err(err) => {
         | 
| 95 95 | 
             
                                            return Err(magnus::Error::new(
         | 
| 96 96 | 
             
                                                exception::type_error(),
         | 
| 97 | 
            -
                                                format!("Error instantiating selector: {}" | 
| 97 | 
            +
                                                format!("Error instantiating selector: {err:?}"),
         | 
| 98 98 | 
             
                                            ));
         | 
| 99 99 | 
             
                                        }
         | 
| 100 100 | 
             
                                        Ok(rb_selector) => rb_selector,
         | 
| @@ -164,8 +164,6 @@ impl SelmaRewriter { | |
| 164 164 | 
             
                    let sanitized_html = match &self.0.borrow().sanitizer {
         | 
| 165 165 | 
             
                        None => html,
         | 
| 166 166 | 
             
                        Some(sanitizer) => {
         | 
| 167 | 
            -
                            // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
         | 
| 168 | 
            -
             | 
| 169 167 | 
             
                            // due to malicious html crafting
         | 
| 170 168 | 
             
                            // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
         | 
| 171 169 | 
             
                            // we need to run sanitization several times to truly remove unwanted tags,
         | 
| @@ -182,7 +180,7 @@ impl SelmaRewriter { | |
| 182 180 | 
             
                        Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
         | 
| 183 181 | 
             
                        Err(err) => Err(magnus::Error::new(
         | 
| 184 182 | 
             
                            exception::runtime_error(),
         | 
| 185 | 
            -
                            format!("{}" | 
| 183 | 
            +
                            format!("{err:?}"),
         | 
| 186 184 | 
             
                        )),
         | 
| 187 185 | 
             
                    }
         | 
| 188 186 | 
             
                }
         | 
| @@ -218,6 +216,7 @@ impl SelmaRewriter { | |
| 218 216 |  | 
| 219 217 | 
             
                                    Ok(())
         | 
| 220 218 | 
             
                                })],
         | 
| 219 | 
            +
                                // TODO: allow for MemorySettings to be defined
         | 
| 221 220 | 
             
                                ..Settings::default()
         | 
| 222 221 | 
             
                            },
         | 
| 223 222 | 
             
                            |c: &[u8]| first_pass_html.extend_from_slice(c),
         | 
| @@ -361,7 +360,7 @@ impl SelmaRewriter { | |
| 361 360 | 
             
                            Err(err) => {
         | 
| 362 361 | 
             
                                return Err(magnus::Error::new(
         | 
| 363 362 | 
             
                                    exception::runtime_error(),
         | 
| 364 | 
            -
                                    format!("{}" | 
| 363 | 
            +
                                    format!("{err:?}"),
         | 
| 365 364 | 
             
                                ));
         | 
| 366 365 | 
             
                            }
         | 
| 367 366 | 
             
                        }
         | 
| @@ -372,7 +371,7 @@ impl SelmaRewriter { | |
| 372 371 | 
             
                fn process_element_handlers(
         | 
| 373 372 | 
             
                    rb_handler: Value,
         | 
| 374 373 | 
             
                    element: &mut Element,
         | 
| 375 | 
            -
                    ancestors: & | 
| 374 | 
            +
                    ancestors: &[String],
         | 
| 376 375 | 
             
                ) -> Result<(), magnus::Error> {
         | 
| 377 376 | 
             
                    // if `on_end_tag` function is defined, call it
         | 
| 378 377 | 
             
                    if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
         | 
| @@ -393,7 +392,7 @@ impl SelmaRewriter { | |
| 393 392 | 
             
                        Ok(_) => Ok(()),
         | 
| 394 393 | 
             
                        Err(err) => Err(magnus::Error::new(
         | 
| 395 394 | 
             
                            exception::runtime_error(),
         | 
| 396 | 
            -
                            format!("{}" | 
| 395 | 
            +
                            format!("{err:?}"),
         | 
| 397 396 | 
             
                        )),
         | 
| 398 397 | 
             
                    }
         | 
| 399 398 | 
             
                }
         | 
| @@ -402,11 +401,12 @@ impl SelmaRewriter { | |
| 402 401 | 
             
                    // prevents missing `handle_text` function
         | 
| 403 402 | 
             
                    let content = text.as_str();
         | 
| 404 403 |  | 
| 405 | 
            -
                    //  | 
| 404 | 
            +
                    // seems that sometimes lol-html returns blank text / EOLs?
         | 
| 406 405 | 
             
                    if content.is_empty() {
         | 
| 407 406 | 
             
                        return Ok(());
         | 
| 408 407 | 
             
                    }
         | 
| 409 | 
            -
             | 
| 408 | 
            +
             | 
| 409 | 
            +
                    let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
         | 
| 410 410 |  | 
| 411 411 | 
             
                    if rb_result.is_err() {
         | 
| 412 412 | 
             
                        return Err(magnus::Error::new(
         | 
| @@ -419,7 +419,7 @@ impl SelmaRewriter { | |
| 419 419 | 
             
                        ));
         | 
| 420 420 | 
             
                    }
         | 
| 421 421 |  | 
| 422 | 
            -
                    let new_content | 
| 422 | 
            +
                    let new_content = rb_result.unwrap();
         | 
| 423 423 | 
             
                    // TODO: can this be an option?
         | 
| 424 424 | 
             
                    text.replace(&new_content, ContentType::Html);
         | 
| 425 425 |  | 
    
        data/ext/selma/src/sanitizer.rs
    CHANGED
    
    | @@ -35,7 +35,7 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>); | |
| 35 35 |  | 
| 36 36 | 
             
            impl SelmaSanitizer {
         | 
| 37 37 | 
             
                const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
         | 
| 38 | 
            -
                const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
         | 
| 38 | 
            +
                // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
         | 
| 39 39 | 
             
                const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
         | 
| 40 40 | 
             
                const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
         | 
| 41 41 |  | 
| @@ -229,7 +229,7 @@ impl SelmaSanitizer { | |
| 229 229 | 
             
                    }
         | 
| 230 230 | 
             
                }
         | 
| 231 231 |  | 
| 232 | 
            -
                pub fn sanitize_attributes(&self, element: &mut Element) {
         | 
| 232 | 
            +
                pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
         | 
| 233 233 | 
             
                    let binding = self.0.borrow_mut();
         | 
| 234 234 | 
             
                    let tag = Tag::tag_from_element(element);
         | 
| 235 235 | 
             
                    let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
         | 
| @@ -247,7 +247,7 @@ impl SelmaSanitizer { | |
| 247 247 | 
             
                        // encountered, remove the entire element to be safe.
         | 
| 248 248 | 
             
                        if attr_name.starts_with("<!--") {
         | 
| 249 249 | 
             
                            Self::force_remove_element(self, element);
         | 
| 250 | 
            -
                            return;
         | 
| 250 | 
            +
                            return Ok(());
         | 
| 251 251 | 
             
                        }
         | 
| 252 252 |  | 
| 253 253 | 
             
                        // first, trim leading spaces and unescape any encodings
         | 
| @@ -268,7 +268,15 @@ impl SelmaSanitizer { | |
| 268 268 | 
             
                            // since output is always UTF-8.
         | 
| 269 269 | 
             
                            if Tag::is_meta(tag) {
         | 
| 270 270 | 
             
                                if attr_name == "charset" && unescaped_attr_val != "utf-8" {
         | 
| 271 | 
            -
                                    element.set_attribute(attr_name, "utf-8") | 
| 271 | 
            +
                                    match element.set_attribute(attr_name, "utf-8") {
         | 
| 272 | 
            +
                                        Ok(_) => {}
         | 
| 273 | 
            +
                                        Err(_) => {
         | 
| 274 | 
            +
                                            return Err(magnus::Error::new(
         | 
| 275 | 
            +
                                                exception::runtime_error(),
         | 
| 276 | 
            +
                                                format!("Unable to change {attr_name:?}"),
         | 
| 277 | 
            +
                                            ));
         | 
| 278 | 
            +
                                        }
         | 
| 279 | 
            +
                                    }
         | 
| 272 280 | 
             
                                }
         | 
| 273 281 | 
             
                            } else if !unescaped_attr_val.is_empty() {
         | 
| 274 282 | 
             
                                let mut buf = String::new();
         | 
| @@ -287,14 +295,16 @@ impl SelmaSanitizer { | |
| 287 295 |  | 
| 288 296 | 
             
                    let required = &element_sanitizer.required_attrs;
         | 
| 289 297 | 
             
                    if required.contains(&"*".to_string()) {
         | 
| 290 | 
            -
                        return;
         | 
| 298 | 
            +
                        return Ok(());
         | 
| 291 299 | 
             
                    }
         | 
| 292 300 | 
             
                    for attr in element.attributes().iter() {
         | 
| 293 301 | 
             
                        let attr_name = &attr.name();
         | 
| 294 302 | 
             
                        if required.contains(attr_name) {
         | 
| 295 | 
            -
                            return;
         | 
| 303 | 
            +
                            return Ok(());
         | 
| 296 304 | 
             
                        }
         | 
| 297 305 | 
             
                    }
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                    Ok(())
         | 
| 298 308 | 
             
                }
         | 
| 299 309 |  | 
| 300 310 | 
             
                fn should_keep_attribute(
         | 
| @@ -423,7 +433,7 @@ impl SelmaSanitizer { | |
| 423 433 | 
             
                        Ok(_) => Ok(true),
         | 
| 424 434 | 
             
                        Err(err) => Err(Error::new(
         | 
| 425 435 | 
             
                            exception::runtime_error(),
         | 
| 426 | 
            -
                            format!("AttributeNameError: {}" | 
| 436 | 
            +
                            format!("AttributeNameError: {err:?}"),
         | 
| 427 437 | 
             
                        )),
         | 
| 428 438 | 
             
                    }
         | 
| 429 439 | 
             
                }
         | 
    
        data/ext/selma/src/selector.rs
    CHANGED
    
    | @@ -27,7 +27,7 @@ impl SelmaSelector { | |
| 27 27 | 
             
                        if css.parse::<lol_html::Selector>().is_err() {
         | 
| 28 28 | 
             
                            return Err(Error::new(
         | 
| 29 29 | 
             
                                exception::arg_error(),
         | 
| 30 | 
            -
                                format!("Could not parse `match_element` (`{}`) as valid CSS" | 
| 30 | 
            +
                                format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
         | 
| 31 31 | 
             
                            ));
         | 
| 32 32 | 
             
                        }
         | 
| 33 33 | 
             
                    }
         | 
| @@ -37,10 +37,7 @@ impl SelmaSelector { | |
| 37 37 | 
             
                        if css.parse::<lol_html::Selector>().is_err() {
         | 
| 38 38 | 
             
                            return Err(Error::new(
         | 
| 39 39 | 
             
                                exception::arg_error(),
         | 
| 40 | 
            -
                                format!(
         | 
| 41 | 
            -
                                    "Could not parse `match_text_within` (`{}`) as valid CSS",
         | 
| 42 | 
            -
                                    css
         | 
| 43 | 
            -
                                ),
         | 
| 40 | 
            +
                                format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
         | 
| 44 41 | 
             
                            ));
         | 
| 45 42 | 
             
                        }
         | 
| 46 43 | 
             
                    }
         | 
    
        data/lib/selma/3.1/selma.bundle
    CHANGED
    
    | Binary file | 
    
        data/lib/selma/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: selma
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.3
         | 
| 5 5 | 
             
            platform: arm64-darwin
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Garen J. Torikian
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022-12- | 
| 11 | 
            +
            date: 2022-12-24 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rb_sys
         |