selma 0.0.2-x86_64-darwin → 0.0.3-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c540a4c66965c519eb3e1df6dd3438d26e2f73095485133b21b38022330a552e
4
- data.tar.gz: 198d8db194c08247858f1e64e7398a05bdb3f175ec0cb484f40a33437df3ac62
3
+ metadata.gz: ceee7cb6b7fd8b7b020dfff56815454535ec7f67323b1b045f3de1c523fd3fec
4
+ data.tar.gz: da0126a559cf6d95b0317accc345d4a1bf44809c983348ab53b1d21121d13188
5
5
  SHA512:
6
- metadata.gz: 7d6d478be88892c0aa07283dbfa2ae26d6dcdfbcfca9e6d0dd1312ab77685cb98343519b0942b904f85955373c1233d455b47f45c6f10411f5baeabe0ece4681
7
- data.tar.gz: b912f88b531a1ef49afeaa40aab13ab9e84e9b45694865f5d18d70ce50c40c380b6297bafc0f90d35737acf543351aee92eacc9e2c79e239281d460e1ffc827a
6
+ metadata.gz: 8a5dc1a03436bbb409d8a9071d21aecc5841b6fd8889e56ef75081711bdc8f1d1250374917252f238bbd7130151d894624268e871bab6e686f0ca2919b20d265
7
+ data.tar.gz: abf419dd76970608f8dd296f7b6736a3022704bf0b4db68132f44e62b5b373ccc1346672a7aa3bbaaabe883938fe5024beb3645c6af85511e33b18f8fd2f94b9
data/ext/selma/Cargo.toml CHANGED
@@ -6,8 +6,8 @@ edition = "2021"
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
8
  escapist = "0.0.1"
9
- magnus = "0.4"
10
- lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
9
+ magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
+ lol_html = "0.3"
11
11
 
12
12
  [lib]
13
13
  name = "selma"
@@ -51,7 +51,7 @@ impl SelmaHTMLElement {
51
51
  Ok(_) => Ok(value),
52
52
  Err(err) => Err(Error::new(
53
53
  exception::runtime_error(),
54
- format!("AttributeNameError: {}", err),
54
+ format!("AttributeNameError: {err:?}"),
55
55
  )),
56
56
  }
57
57
  } else {
@@ -81,7 +81,7 @@ impl SelmaHTMLElement {
81
81
  Ok(_) => {}
82
82
  Err(err) => Err(Error::new(
83
83
  exception::runtime_error(),
84
- format!("AttributeNameError: {}", err),
84
+ format!("AttributeNameError: {err:?}"),
85
85
  ))
86
86
  .unwrap(),
87
87
  });
@@ -99,7 +99,7 @@ impl SelmaHTMLElement {
99
99
  .for_each(|ancestor| match array.push(RString::new(ancestor)) {
100
100
  Ok(_) => {}
101
101
  Err(err) => {
102
- Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
102
+ Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
103
103
  }
104
104
  });
105
105
 
@@ -151,18 +151,18 @@ impl SelmaHTMLElement {
151
151
 
152
152
  fn find_content_type(content_type: Symbol) -> ContentType {
153
153
  match content_type.name() {
154
- Ok(name) => match (name) {
154
+ Ok(name) => match name {
155
155
  Cow::Borrowed("as_text") => ContentType::Text,
156
156
  Cow::Borrowed("as_html") => ContentType::Html,
157
157
  _ => Err(Error::new(
158
158
  exception::runtime_error(),
159
- format!("unknown symbol `{}`", name),
159
+ format!("unknown symbol `{name:?}`"),
160
160
  ))
161
161
  .unwrap(),
162
162
  },
163
163
  Err(err) => Err(Error::new(
164
164
  exception::runtime_error(),
165
- format!("Could not unwrap symbol"),
165
+ format!("Could not unwrap symbol: {err:?}"),
166
166
  ))
167
167
  .unwrap(),
168
168
  }
@@ -1,4 +1,4 @@
1
- use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
1
+ use std::{cell::Cell, marker::PhantomData, rc::Rc};
2
2
 
3
3
  // NOTE: My Rust isn't good enough to know what any of this does,
4
4
  // but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
37
37
  impl<R> NativeRefWrap<R> {
38
38
  pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
39
39
  let wrap = NativeRefWrap {
40
- inner_ptr: unsafe { mem::transmute(inner) },
40
+ inner_ptr: inner as *const I as *mut R,
41
41
  poisoned: Rc::new(Cell::new(false)),
42
42
  };
43
43
 
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
48
48
 
49
49
  pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
50
50
  let wrap = NativeRefWrap {
51
- inner_ptr: unsafe { mem::transmute(inner) },
51
+ inner_ptr: inner as *mut I as *mut R,
52
52
  poisoned: Rc::new(Cell::new(false)),
53
53
  };
54
54
 
@@ -1,5 +1,3 @@
1
- use std::{borrow::Cow, cell::RefCell, rc::Rc};
2
-
3
1
  use lol_html::{
4
2
  doc_comments, doctype, element,
5
3
  html_content::{ContentType, Element, EndTag, TextChunk},
@@ -7,6 +5,8 @@ use lol_html::{
7
5
  };
8
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
9
7
 
8
+ use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
+
10
10
  use crate::{
11
11
  html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
12
12
  sanitizer::SelmaSanitizer,
@@ -83,18 +83,18 @@ impl SelmaRewriter {
83
83
  return Err(magnus::Error::new(
84
84
  exception::no_method_error(),
85
85
  format!(
86
- "Could not call #selector on {:?}; is this an object that defines it?",
87
- classname
86
+ "Could not call #selector on {classname:?}; is this an object that defines it?",
87
+
88
88
  ),
89
89
  ));
90
90
  }
91
91
 
92
92
  let rb_selector: WrappedStruct<SelmaSelector> =
93
93
  match rb_handler.funcall("selector", ()) {
94
- Err(e) => {
94
+ Err(err) => {
95
95
  return Err(magnus::Error::new(
96
96
  exception::type_error(),
97
- format!("Error instantiating selector: {}", e),
97
+ format!("Error instantiating selector: {err:?}"),
98
98
  ));
99
99
  }
100
100
  Ok(rb_selector) => rb_selector,
@@ -164,8 +164,6 @@ impl SelmaRewriter {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
165
  None => html,
166
166
  Some(sanitizer) => {
167
- // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
168
-
169
167
  // due to malicious html crafting
170
168
  // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
171
169
  // we need to run sanitization several times to truly remove unwanted tags,
@@ -182,7 +180,7 @@ impl SelmaRewriter {
182
180
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
183
181
  Err(err) => Err(magnus::Error::new(
184
182
  exception::runtime_error(),
185
- format!("{}", err),
183
+ format!("{err:?}"),
186
184
  )),
187
185
  }
188
186
  }
@@ -218,6 +216,7 @@ impl SelmaRewriter {
218
216
 
219
217
  Ok(())
220
218
  })],
219
+ // TODO: allow for MemorySettings to be defined
221
220
  ..Settings::default()
222
221
  },
223
222
  |c: &[u8]| first_pass_html.extend_from_slice(c),
@@ -361,7 +360,7 @@ impl SelmaRewriter {
361
360
  Err(err) => {
362
361
  return Err(magnus::Error::new(
363
362
  exception::runtime_error(),
364
- format!("{}", err),
363
+ format!("{err:?}"),
365
364
  ));
366
365
  }
367
366
  }
@@ -372,7 +371,7 @@ impl SelmaRewriter {
372
371
  fn process_element_handlers(
373
372
  rb_handler: Value,
374
373
  element: &mut Element,
375
- ancestors: &Vec<String>,
374
+ ancestors: &[String],
376
375
  ) -> Result<(), magnus::Error> {
377
376
  // if `on_end_tag` function is defined, call it
378
377
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
@@ -393,7 +392,7 @@ impl SelmaRewriter {
393
392
  Ok(_) => Ok(()),
394
393
  Err(err) => Err(magnus::Error::new(
395
394
  exception::runtime_error(),
396
- format!("{}", err),
395
+ format!("{err:?}"),
397
396
  )),
398
397
  }
399
398
  }
@@ -402,11 +401,12 @@ impl SelmaRewriter {
402
401
  // prevents missing `handle_text` function
403
402
  let content = text.as_str();
404
403
 
405
- // FIXME: why does this happen?
404
+ // seems that sometimes lol-html returns blank text / EOLs?
406
405
  if content.is_empty() {
407
406
  return Ok(());
408
407
  }
409
- let rb_result = rb_handler.funcall(Self::SELMA_HANDLE_TEXT, (content,));
408
+
409
+ let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
410
410
 
411
411
  if rb_result.is_err() {
412
412
  return Err(magnus::Error::new(
@@ -419,7 +419,7 @@ impl SelmaRewriter {
419
419
  ));
420
420
  }
421
421
 
422
- let new_content: String = rb_result.unwrap();
422
+ let new_content = rb_result.unwrap();
423
423
  // TODO: can this be an option?
424
424
  text.replace(&new_content, ContentType::Html);
425
425
 
@@ -35,7 +35,7 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
35
35
 
36
36
  impl SelmaSanitizer {
37
37
  const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
38
- const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
38
+ // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
39
39
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
40
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
41
 
@@ -229,7 +229,7 @@ impl SelmaSanitizer {
229
229
  }
230
230
  }
231
231
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) {
232
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
233
233
  let binding = self.0.borrow_mut();
234
234
  let tag = Tag::tag_from_element(element);
235
235
  let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
@@ -247,7 +247,7 @@ impl SelmaSanitizer {
247
247
  // encountered, remove the entire element to be safe.
248
248
  if attr_name.starts_with("<!--") {
249
249
  Self::force_remove_element(self, element);
250
- return;
250
+ return Ok(());
251
251
  }
252
252
 
253
253
  // first, trim leading spaces and unescape any encodings
@@ -268,7 +268,15 @@ impl SelmaSanitizer {
268
268
  // since output is always UTF-8.
269
269
  if Tag::is_meta(tag) {
270
270
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
- element.set_attribute(attr_name, "utf-8");
271
+ match element.set_attribute(attr_name, "utf-8") {
272
+ Ok(_) => {}
273
+ Err(_) => {
274
+ return Err(magnus::Error::new(
275
+ exception::runtime_error(),
276
+ format!("Unable to change {attr_name:?}"),
277
+ ));
278
+ }
279
+ }
272
280
  }
273
281
  } else if !unescaped_attr_val.is_empty() {
274
282
  let mut buf = String::new();
@@ -287,14 +295,16 @@ impl SelmaSanitizer {
287
295
 
288
296
  let required = &element_sanitizer.required_attrs;
289
297
  if required.contains(&"*".to_string()) {
290
- return;
298
+ return Ok(());
291
299
  }
292
300
  for attr in element.attributes().iter() {
293
301
  let attr_name = &attr.name();
294
302
  if required.contains(attr_name) {
295
- return;
303
+ return Ok(());
296
304
  }
297
305
  }
306
+
307
+ Ok(())
298
308
  }
299
309
 
300
310
  fn should_keep_attribute(
@@ -423,7 +433,7 @@ impl SelmaSanitizer {
423
433
  Ok(_) => Ok(true),
424
434
  Err(err) => Err(Error::new(
425
435
  exception::runtime_error(),
426
- format!("AttributeNameError: {}", err),
436
+ format!("AttributeNameError: {err:?}"),
427
437
  )),
428
438
  }
429
439
  }
@@ -27,7 +27,7 @@ impl SelmaSelector {
27
27
  if css.parse::<lol_html::Selector>().is_err() {
28
28
  return Err(Error::new(
29
29
  exception::arg_error(),
30
- format!("Could not parse `match_element` (`{}`) as valid CSS", css),
30
+ format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
31
31
  ));
32
32
  }
33
33
  }
@@ -37,10 +37,7 @@ impl SelmaSelector {
37
37
  if css.parse::<lol_html::Selector>().is_err() {
38
38
  return Err(Error::new(
39
39
  exception::arg_error(),
40
- format!(
41
- "Could not parse `match_text_within` (`{}`) as valid CSS",
42
- css
43
- ),
40
+ format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
44
41
  ));
45
42
  }
46
43
  }
Binary file
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.2"
4
+ VERSION = "0.0.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-21 00:00:00.000000000 Z
11
+ date: 2022-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys