selma 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62e8e007de6eb92bda551ab4247b57d7643b48854874dd6ba8ce3ba6eb066138
4
- data.tar.gz: 4dc2e07f7cc02547797bbe57c2c87574c25bc15d42fa2c998974c006799ed4d9
3
+ metadata.gz: 5f579c1ac9c0e6e24d6c5919f27072940adb9e31c5a0cc4429b6338cc24f6315
4
+ data.tar.gz: 90b190ebde12c3a38fb682af3627d83fe3e704ad7b9bc301d70eb81da6d1c63c
5
5
  SHA512:
6
- metadata.gz: f60ba708bb9274513f781b1aa90659acb3b858ff7b39de2dffbd089fa29a667c923934f6e87520d435b2ee51429469317a71312fbdce5f82d1aa718ec8cca1c7
7
- data.tar.gz: 83dd64c6cc55dd26ceeb4e13ae6d239b433946ee2e54d477ac58f8fa3c483b95cda7365f1d4b5e8c79751004e5b51bd527aa31acf77a59a145f6a895f6d05234
6
+ metadata.gz: 4da6d3776c6c4ad04c73bdfa43e3cef4ce1ed71ada3d5653041b388c41cf4b1c77dc65671238ea28f36092d68bd4dcf3b9585ae35f7ea8f280c68a0db6b93fed
7
+ data.tar.gz: e29bb6f28f5fb9123946b97948aef0e0857938b4a5869fce451cba3f1fdb95af1033908418f22ad454f5feb64295db40cc8df63fb183f6a628019bbd3618547e
data/ext/selma/Cargo.toml CHANGED
@@ -6,8 +6,8 @@ edition = "2021"
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
8
  escapist = "0.0.1"
9
- magnus = "0.4"
10
- lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
9
+ magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
+ lol_html = "0.3"
11
11
 
12
12
  [lib]
13
13
  name = "selma"
@@ -51,7 +51,7 @@ impl SelmaHTMLElement {
51
51
  Ok(_) => Ok(value),
52
52
  Err(err) => Err(Error::new(
53
53
  exception::runtime_error(),
54
- format!("AttributeNameError: {}", err),
54
+ format!("AttributeNameError: {err:?}"),
55
55
  )),
56
56
  }
57
57
  } else {
@@ -81,7 +81,7 @@ impl SelmaHTMLElement {
81
81
  Ok(_) => {}
82
82
  Err(err) => Err(Error::new(
83
83
  exception::runtime_error(),
84
- format!("AttributeNameError: {}", err),
84
+ format!("AttributeNameError: {err:?}"),
85
85
  ))
86
86
  .unwrap(),
87
87
  });
@@ -99,7 +99,7 @@ impl SelmaHTMLElement {
99
99
  .for_each(|ancestor| match array.push(RString::new(ancestor)) {
100
100
  Ok(_) => {}
101
101
  Err(err) => {
102
- Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
102
+ Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
103
103
  }
104
104
  });
105
105
 
@@ -151,18 +151,18 @@ impl SelmaHTMLElement {
151
151
 
152
152
  fn find_content_type(content_type: Symbol) -> ContentType {
153
153
  match content_type.name() {
154
- Ok(name) => match (name) {
154
+ Ok(name) => match name {
155
155
  Cow::Borrowed("as_text") => ContentType::Text,
156
156
  Cow::Borrowed("as_html") => ContentType::Html,
157
157
  _ => Err(Error::new(
158
158
  exception::runtime_error(),
159
- format!("unknown symbol `{}`", name),
159
+ format!("unknown symbol `{name:?}`"),
160
160
  ))
161
161
  .unwrap(),
162
162
  },
163
163
  Err(err) => Err(Error::new(
164
164
  exception::runtime_error(),
165
- format!("Could not unwrap symbol"),
165
+ format!("Could not unwrap symbol: {err:?}"),
166
166
  ))
167
167
  .unwrap(),
168
168
  }
@@ -1,4 +1,4 @@
1
- use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
1
+ use std::{cell::Cell, marker::PhantomData, rc::Rc};
2
2
 
3
3
  // NOTE: My Rust isn't good enough to know what any of this does,
4
4
  // but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
37
37
  impl<R> NativeRefWrap<R> {
38
38
  pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
39
39
  let wrap = NativeRefWrap {
40
- inner_ptr: unsafe { mem::transmute(inner) },
40
+ inner_ptr: inner as *const I as *mut R,
41
41
  poisoned: Rc::new(Cell::new(false)),
42
42
  };
43
43
 
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
48
48
 
49
49
  pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
50
50
  let wrap = NativeRefWrap {
51
- inner_ptr: unsafe { mem::transmute(inner) },
51
+ inner_ptr: inner as *mut I as *mut R,
52
52
  poisoned: Rc::new(Cell::new(false)),
53
53
  };
54
54
 
@@ -1,5 +1,3 @@
1
- use std::{borrow::Cow, cell::RefCell, rc::Rc};
2
-
3
1
  use lol_html::{
4
2
  doc_comments, doctype, element,
5
3
  html_content::{ContentType, Element, EndTag, TextChunk},
@@ -7,6 +5,8 @@ use lol_html::{
7
5
  };
8
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
9
7
 
8
+ use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
+
10
10
  use crate::{
11
11
  html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
12
12
  sanitizer::SelmaSanitizer,
@@ -83,18 +83,18 @@ impl SelmaRewriter {
83
83
  return Err(magnus::Error::new(
84
84
  exception::no_method_error(),
85
85
  format!(
86
- "Could not call #selector on {:?}; is this an object that defines it?",
87
- classname
86
+ "Could not call #selector on {classname:?}; is this an object that defines it?",
87
+
88
88
  ),
89
89
  ));
90
90
  }
91
91
 
92
92
  let rb_selector: WrappedStruct<SelmaSelector> =
93
93
  match rb_handler.funcall("selector", ()) {
94
- Err(e) => {
94
+ Err(err) => {
95
95
  return Err(magnus::Error::new(
96
96
  exception::type_error(),
97
- format!("Error instantiating selector: {}", e),
97
+ format!("Error instantiating selector: {err:?}"),
98
98
  ));
99
99
  }
100
100
  Ok(rb_selector) => rb_selector,
@@ -164,8 +164,6 @@ impl SelmaRewriter {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
165
  None => html,
166
166
  Some(sanitizer) => {
167
- // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
168
-
169
167
  // due to malicious html crafting
170
168
  // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
171
169
  // we need to run sanitization several times to truly remove unwanted tags,
@@ -182,7 +180,7 @@ impl SelmaRewriter {
182
180
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
183
181
  Err(err) => Err(magnus::Error::new(
184
182
  exception::runtime_error(),
185
- format!("{}", err),
183
+ format!("{err:?}"),
186
184
  )),
187
185
  }
188
186
  }
@@ -218,6 +216,7 @@ impl SelmaRewriter {
218
216
 
219
217
  Ok(())
220
218
  })],
219
+ // TODO: allow for MemorySettings to be defined
221
220
  ..Settings::default()
222
221
  },
223
222
  |c: &[u8]| first_pass_html.extend_from_slice(c),
@@ -361,7 +360,7 @@ impl SelmaRewriter {
361
360
  Err(err) => {
362
361
  return Err(magnus::Error::new(
363
362
  exception::runtime_error(),
364
- format!("{}", err),
363
+ format!("{err:?}"),
365
364
  ));
366
365
  }
367
366
  }
@@ -372,7 +371,7 @@ impl SelmaRewriter {
372
371
  fn process_element_handlers(
373
372
  rb_handler: Value,
374
373
  element: &mut Element,
375
- ancestors: &Vec<String>,
374
+ ancestors: &[String],
376
375
  ) -> Result<(), magnus::Error> {
377
376
  // if `on_end_tag` function is defined, call it
378
377
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
@@ -393,7 +392,7 @@ impl SelmaRewriter {
393
392
  Ok(_) => Ok(()),
394
393
  Err(err) => Err(magnus::Error::new(
395
394
  exception::runtime_error(),
396
- format!("{}", err),
395
+ format!("{err:?}"),
397
396
  )),
398
397
  }
399
398
  }
@@ -402,11 +401,12 @@ impl SelmaRewriter {
402
401
  // prevents missing `handle_text` function
403
402
  let content = text.as_str();
404
403
 
405
- // FIXME: why does this happen?
404
+ // seems that sometimes lol-html returns blank text / EOLs?
406
405
  if content.is_empty() {
407
406
  return Ok(());
408
407
  }
409
- let rb_result = rb_handler.funcall(Self::SELMA_HANDLE_TEXT, (content,));
408
+
409
+ let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
410
410
 
411
411
  if rb_result.is_err() {
412
412
  return Err(magnus::Error::new(
@@ -419,7 +419,7 @@ impl SelmaRewriter {
419
419
  ));
420
420
  }
421
421
 
422
- let new_content: String = rb_result.unwrap();
422
+ let new_content = rb_result.unwrap();
423
423
  // TODO: can this be an option?
424
424
  text.replace(&new_content, ContentType::Html);
425
425
 
@@ -35,7 +35,7 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
35
35
 
36
36
  impl SelmaSanitizer {
37
37
  const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
38
- const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
38
+ // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
39
39
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
40
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
41
 
@@ -229,7 +229,7 @@ impl SelmaSanitizer {
229
229
  }
230
230
  }
231
231
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) {
232
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
233
233
  let binding = self.0.borrow_mut();
234
234
  let tag = Tag::tag_from_element(element);
235
235
  let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
@@ -247,7 +247,7 @@ impl SelmaSanitizer {
247
247
  // encountered, remove the entire element to be safe.
248
248
  if attr_name.starts_with("<!--") {
249
249
  Self::force_remove_element(self, element);
250
- return;
250
+ return Ok(());
251
251
  }
252
252
 
253
253
  // first, trim leading spaces and unescape any encodings
@@ -268,7 +268,15 @@ impl SelmaSanitizer {
268
268
  // since output is always UTF-8.
269
269
  if Tag::is_meta(tag) {
270
270
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
- element.set_attribute(attr_name, "utf-8");
271
+ match element.set_attribute(attr_name, "utf-8") {
272
+ Ok(_) => {}
273
+ Err(_) => {
274
+ return Err(magnus::Error::new(
275
+ exception::runtime_error(),
276
+ format!("Unable to change {attr_name:?}"),
277
+ ));
278
+ }
279
+ }
272
280
  }
273
281
  } else if !unescaped_attr_val.is_empty() {
274
282
  let mut buf = String::new();
@@ -287,14 +295,16 @@ impl SelmaSanitizer {
287
295
 
288
296
  let required = &element_sanitizer.required_attrs;
289
297
  if required.contains(&"*".to_string()) {
290
- return;
298
+ return Ok(());
291
299
  }
292
300
  for attr in element.attributes().iter() {
293
301
  let attr_name = &attr.name();
294
302
  if required.contains(attr_name) {
295
- return;
303
+ return Ok(());
296
304
  }
297
305
  }
306
+
307
+ Ok(())
298
308
  }
299
309
 
300
310
  fn should_keep_attribute(
@@ -423,7 +433,7 @@ impl SelmaSanitizer {
423
433
  Ok(_) => Ok(true),
424
434
  Err(err) => Err(Error::new(
425
435
  exception::runtime_error(),
426
- format!("AttributeNameError: {}", err),
436
+ format!("AttributeNameError: {err:?}"),
427
437
  )),
428
438
  }
429
439
  }
@@ -27,7 +27,7 @@ impl SelmaSelector {
27
27
  if css.parse::<lol_html::Selector>().is_err() {
28
28
  return Err(Error::new(
29
29
  exception::arg_error(),
30
- format!("Could not parse `match_element` (`{}`) as valid CSS", css),
30
+ format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
31
31
  ));
32
32
  }
33
33
  }
@@ -37,10 +37,7 @@ impl SelmaSelector {
37
37
  if css.parse::<lol_html::Selector>().is_err() {
38
38
  return Err(Error::new(
39
39
  exception::arg_error(),
40
- format!(
41
- "Could not parse `match_text_within` (`{}`) as valid CSS",
42
- css
43
- ),
40
+ format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
44
41
  ));
45
42
  }
46
43
  }
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.2"
4
+ VERSION = "0.0.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-21 00:00:00.000000000 Z
11
+ date: 2022-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys