selma 0.0.2-aarch64-linux → 0.0.4-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1209a332b4168277dabc2ae40f6c8d9d0539909e5744dd63d93143dbd79d768d
4
- data.tar.gz: fd1e485e7306bd8a42df084effce4e4c4c281d296ddcea4ea5a1fb17b66bf7a5
3
+ metadata.gz: 0e2e415b1b174378d6b7635b815401e021cab9edbe6b9d1e10cc6527ad491d8d
4
+ data.tar.gz: 163ac28f3268425a7d6fdd36e3ad1537e44437c0757fb069004c7e0943de0a5e
5
5
  SHA512:
6
- metadata.gz: 834bbe0129ba7facc08a64e6637b629225f329f295f7e7bfeeaca78d8651c2b5673ddefe577e24513c9981f8c5aba89c8c324a74ea46a8df229584c02de58516
7
- data.tar.gz: e699befa331a48a993650b48ed2b7f682c32e5e2ebcae39cdcdd562566c50c44ac23f860e322d099a364affde1aa9e79126af1ea0212e918cff89860982721fd
6
+ metadata.gz: 217a5647fab0fb789d9a12a55d31dab047537608871809520adf0e1377dde5380aba031798e936c03d96e83c3b3aae2087eedae975e0a058e96996bb2a6d6a38
7
+ data.tar.gz: 9b00064e46f201878e3aa95e5fed8aac4f6758f3e38db146c9536357d7e4c5ec508c05589e57513dfcffab9b2f365645425d87f8597299db21111332cbcbe4ba
data/README.md CHANGED
@@ -56,6 +56,10 @@ allow_comments: false,
56
56
  # "<!DOCTYPE html>" when sanitizing a document.
57
57
  allow_doctype: false,
58
58
 
59
+ # HTML elements to allow. By default, no elements are allowed (which means
60
+ # that all HTML will be stripped).
61
+ elements: ["a", "b", "img", ],
62
+
59
63
  # HTML attributes to allow in specific elements. The key is the name of the element,
60
64
  # and the value is an array of allowed attributes. By default, no attributes
61
65
  # are allowed.
@@ -64,14 +68,10 @@ attributes: {
64
68
  "img" => ["src"],
65
69
  },
66
70
 
67
- # HTML elements to allow. By default, no elements are allowed (which means
68
- # that all HTML will be stripped).
69
- elements: ["a", "b", "img", ],
70
-
71
71
  # URL handling protocols to allow in specific attributes. By default, no
72
72
  # protocols are allowed. Use :relative in place of a protocol if you want
73
73
  # to allow relative URLs sans protocol.
74
- protocols: {
74
+ protocols: {
75
75
  "a" => { "href" => ["http", "https", "mailto", :relative] },
76
76
  "img" => { "href" => ["http", "https"] },
77
77
  },
@@ -91,7 +91,7 @@ The real power in Selma comes in its use of handlers. A handler is simply an obj
91
91
 
92
92
  - `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
93
93
  - `handle_element`, a method that's call on each matched element
94
- - `handle_text`, a method that's called on each matched text node; this MUST return a string
94
+ - `handle_text_chunk`, a method that's called on each matched text node; this MUST return a string
95
95
 
96
96
  Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
97
97
 
@@ -118,7 +118,7 @@ rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
118
118
  The `Selma::Selector` object has three possible kwargs:
119
119
 
120
120
  - `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
121
- - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
121
+ - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text_chunk`
122
122
  - `ignore_text_within`: this is an array of element names whose text contents will be ignored
123
123
 
124
124
  You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
@@ -132,7 +132,7 @@ class MatchText
132
132
  SELECTOR
133
133
  end
134
134
 
135
- def handle_text(text)
135
+ def handle_text_chunk(text)
136
136
  string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
137
137
  end
138
138
  end
@@ -150,8 +150,9 @@ The `element` argument in `handle_element` has the following methods:
150
150
  - `remove_attribute`: remove an attribute
151
151
  - `attributes`: list all the attributes
152
152
  - `ancestors`: list all the ancestors
153
- - `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
- - `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
153
+ - `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
+ - `before(content, as: content_type)`: Inserts `content` before the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
+ - `after(content, as: content_type)`: Inserts `content` after the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
156
  - `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
156
157
 
157
158
  ## Benchmarks
data/ext/selma/Cargo.toml CHANGED
@@ -5,9 +5,9 @@ edition = "2021"
5
5
 
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
- escapist = "0.0.1"
9
- magnus = "0.4"
10
- lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
8
+ escapist = "0.0.2"
9
+ magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
+ lol_html = "0.3"
11
11
 
12
12
  [lib]
13
13
  name = "selma"
@@ -1,8 +1,6 @@
1
- use std::borrow::Cow;
2
-
3
1
  use crate::native_ref_wrap::NativeRefWrap;
4
- use lol_html::html_content::{ContentType, Element};
5
- use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
2
+ use lol_html::html_content::Element;
3
+ use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
6
4
 
7
5
  struct HTMLElement {
8
6
  element: NativeRefWrap<Element<'static, 'static>>,
@@ -51,7 +49,7 @@ impl SelmaHTMLElement {
51
49
  Ok(_) => Ok(value),
52
50
  Err(err) => Err(Error::new(
53
51
  exception::runtime_error(),
54
- format!("AttributeNameError: {}", err),
52
+ format!("AttributeNameError: {err:?}"),
55
53
  )),
56
54
  }
57
55
  } else {
@@ -81,7 +79,7 @@ impl SelmaHTMLElement {
81
79
  Ok(_) => {}
82
80
  Err(err) => Err(Error::new(
83
81
  exception::runtime_error(),
84
- format!("AttributeNameError: {}", err),
82
+ format!("AttributeNameError: {err:?}"),
85
83
  ))
86
84
  .unwrap(),
87
85
  });
@@ -99,80 +97,74 @@ impl SelmaHTMLElement {
99
97
  .for_each(|ancestor| match array.push(RString::new(ancestor)) {
100
98
  Ok(_) => {}
101
99
  Err(err) => {
102
- Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
100
+ Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
103
101
  }
104
102
  });
105
103
 
106
104
  Ok(array)
107
105
  }
108
106
 
109
- fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
107
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
110
108
  let mut binding = self.0.borrow_mut();
111
109
  let element = binding.element.get_mut().unwrap();
112
110
 
113
- let text_str = text_to_append.as_str();
114
-
115
- let content_type = Self::find_content_type(content_type);
111
+ let (text_str, content_type) = match crate::scan_text_args(args) {
112
+ Ok((text_str, content_type)) => (text_str, content_type),
113
+ Err(err) => return Err(err),
114
+ };
116
115
 
117
- element.append(text_str, content_type);
116
+ element.before(&text_str, content_type);
118
117
 
119
118
  Ok(())
120
119
  }
121
120
 
122
- fn wrap(
123
- &self,
124
- start_text: String,
125
- end_text: String,
126
- content_type: Symbol,
127
- ) -> Result<(), Error> {
121
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
128
122
  let mut binding = self.0.borrow_mut();
129
123
  let element = binding.element.get_mut().unwrap();
130
124
 
131
- let before_content_type = Self::find_content_type(content_type);
132
- let after_content_type = Self::find_content_type(content_type);
133
- element.before(&start_text, before_content_type);
134
- element.after(&end_text, after_content_type);
125
+ let (text_str, content_type) = match crate::scan_text_args(args) {
126
+ Ok((text_str, content_type)) => (text_str, content_type),
127
+ Err(err) => return Err(err),
128
+ };
129
+
130
+ element.after(&text_str, content_type);
135
131
 
136
132
  Ok(())
137
133
  }
138
134
 
139
- fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
135
+ fn append(&self, args: &[Value]) -> Result<(), Error> {
140
136
  let mut binding = self.0.borrow_mut();
141
137
  let element = binding.element.get_mut().unwrap();
142
138
 
143
- let text_str = text_to_set.as_str();
144
-
145
- let content_type = Self::find_content_type(content_type);
139
+ let (text_str, content_type) = match crate::scan_text_args(args) {
140
+ Ok((text_str, content_type)) => (text_str, content_type),
141
+ Err(err) => return Err(err),
142
+ };
146
143
 
147
- element.set_inner_content(text_str, content_type);
144
+ element.append(&text_str, content_type);
148
145
 
149
146
  Ok(())
150
147
  }
151
148
 
152
- fn find_content_type(content_type: Symbol) -> ContentType {
153
- match content_type.name() {
154
- Ok(name) => match (name) {
155
- Cow::Borrowed("as_text") => ContentType::Text,
156
- Cow::Borrowed("as_html") => ContentType::Html,
157
- _ => Err(Error::new(
158
- exception::runtime_error(),
159
- format!("unknown symbol `{}`", name),
160
- ))
161
- .unwrap(),
162
- },
163
- Err(err) => Err(Error::new(
164
- exception::runtime_error(),
165
- format!("Could not unwrap symbol"),
166
- ))
167
- .unwrap(),
168
- }
149
+ fn set_inner_content(&self, args: &[Value]) -> Result<(), Error> {
150
+ let mut binding = self.0.borrow_mut();
151
+ let element = binding.element.get_mut().unwrap();
152
+
153
+ let (inner_content, content_type) = match crate::scan_text_args(args) {
154
+ Ok((inner_content, content_type)) => (inner_content, content_type),
155
+ Err(err) => return Err(err),
156
+ };
157
+
158
+ element.set_inner_content(&inner_content, content_type);
159
+
160
+ Ok(())
169
161
  }
170
162
  }
171
163
 
172
164
  pub fn init(c_html: RClass) -> Result<(), Error> {
173
165
  let c_element = c_html
174
166
  .define_class("Element", Default::default())
175
- .expect("cannot find class Selma::Element");
167
+ .expect("cannot find class Selma::HTML::Element");
176
168
 
177
169
  c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
178
170
  c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
@@ -184,11 +176,12 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
184
176
  c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
185
177
  c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
186
178
 
187
- c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
188
- c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
179
+ c_element.define_method("before", method!(SelmaHTMLElement::before, -1))?;
180
+ c_element.define_method("after", method!(SelmaHTMLElement::after, -1))?;
181
+ c_element.define_method("append", method!(SelmaHTMLElement::append, -1))?;
189
182
  c_element.define_method(
190
183
  "set_inner_content",
191
- method!(SelmaHTMLElement::set_inner_content, 2),
184
+ method!(SelmaHTMLElement::set_inner_content, -1),
192
185
  )?;
193
186
 
194
187
  Ok(())
@@ -6,7 +6,7 @@ struct HTMLEndTag {
6
6
  end_tag: NativeRefWrap<EndTag<'static>>,
7
7
  }
8
8
 
9
- #[magnus::wrap(class = "Selma::HTML::Element")]
9
+ #[magnus::wrap(class = "Selma::HTML::EndTag")]
10
10
  pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
11
11
 
12
12
  /// SAFETY: This is safe because we only access this data when the GVL is held.
@@ -27,7 +27,7 @@ impl SelmaHTMLEndTag {
27
27
  pub fn init(c_html: RClass) -> Result<(), Error> {
28
28
  let c_end_tag = c_html
29
29
  .define_class("EndTag", Default::default())
30
- .expect("cannot find class Selma::EndTag");
30
+ .expect("cannot find class Selma::HTML::EndTag");
31
31
 
32
32
  c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
33
33
 
@@ -0,0 +1,83 @@
1
+ use crate::native_ref_wrap::NativeRefWrap;
2
+ use lol_html::html_content::{TextChunk, TextType};
3
+ use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
4
+
5
+ struct HTMLTextChunk {
6
+ text_chunk: NativeRefWrap<TextChunk<'static>>,
7
+ }
8
+
9
+ #[magnus::wrap(class = "Selma::HTML::TextChunk")]
10
+ pub struct SelmaHTMLTextChunk(std::cell::RefCell<HTMLTextChunk>);
11
+
12
+ /// SAFETY: This is safe because we only access this data when the GVL is held.
13
+ unsafe impl Send for SelmaHTMLTextChunk {}
14
+
15
+ impl SelmaHTMLTextChunk {
16
+ pub fn new(text_chunk: &mut TextChunk) -> Self {
17
+ let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(text_chunk);
18
+
19
+ Self(std::cell::RefCell::new(HTMLTextChunk {
20
+ text_chunk: ref_wrap,
21
+ }))
22
+ }
23
+
24
+ fn to_s(&self) -> Result<String, Error> {
25
+ let binding = self.0.borrow();
26
+
27
+ if let Ok(tc) = binding.text_chunk.get() {
28
+ Ok(tc.as_str().to_string())
29
+ } else {
30
+ Err(Error::new(
31
+ exception::runtime_error(),
32
+ "`to_s` is not available",
33
+ ))
34
+ }
35
+ }
36
+
37
+ fn text_type(&self) -> Result<Symbol, Error> {
38
+ let binding = self.0.borrow();
39
+
40
+ if let Ok(tc) = binding.text_chunk.get() {
41
+ match tc.text_type() {
42
+ TextType::Data => Ok(Symbol::from("data")),
43
+ TextType::PlainText => Ok(Symbol::from("plain_text")),
44
+ TextType::RawText => Ok(Symbol::from("raw_text")),
45
+ TextType::ScriptData => Ok(Symbol::from("script")),
46
+ TextType::RCData => Ok(Symbol::from("rc_data")),
47
+ TextType::CDataSection => Ok(Symbol::from("cdata_section")),
48
+ }
49
+ } else {
50
+ Err(Error::new(
51
+ exception::runtime_error(),
52
+ "`text_type` is not available",
53
+ ))
54
+ }
55
+ }
56
+
57
+ fn replace(&self, args: &[Value]) -> Result<(), Error> {
58
+ let mut binding = self.0.borrow_mut();
59
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
60
+
61
+ let (text_str, content_type) = match crate::scan_text_args(args) {
62
+ Ok((text_str, content_type)) => (text_str, content_type),
63
+ Err(err) => return Err(err),
64
+ };
65
+
66
+ text_chunk.replace(&text_str, content_type);
67
+
68
+ Ok(())
69
+ }
70
+ }
71
+
72
+ pub fn init(c_html: RClass) -> Result<(), Error> {
73
+ let c_text_chunk = c_html
74
+ .define_class("TextChunk", Default::default())
75
+ .expect("cannot find class Selma::HTML::TextChunk");
76
+
77
+ c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
78
+ c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
79
+ c_text_chunk.define_method("text_type", method!(SelmaHTMLTextChunk::text_type, 0))?;
80
+ c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
81
+
82
+ Ok(())
83
+ }
@@ -9,9 +9,11 @@ pub fn init(m_selma: RModule) -> Result<(), Error> {
9
9
 
10
10
  element::init(c_html).expect("cannot define Selma::HTML::Element class");
11
11
  end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
12
+ text_chunk::init(c_html).expect("cannot define Selma::HTML::TextChunk class");
12
13
 
13
14
  Ok(())
14
15
  }
15
16
 
16
17
  pub mod element;
17
18
  pub mod end_tag;
19
+ pub mod text_chunk;
data/ext/selma/src/lib.rs CHANGED
@@ -1,6 +1,7 @@
1
1
  extern crate core;
2
2
 
3
- use magnus::{define_module, Error};
3
+ use lol_html::html_content::ContentType;
4
+ use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
4
5
 
5
6
  pub mod html;
6
7
  pub mod native_ref_wrap;
@@ -10,6 +11,32 @@ pub mod selector;
10
11
  pub mod tags;
11
12
  pub mod wrapped_struct;
12
13
 
14
+ #[allow(clippy::let_unit_value)]
15
+ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
16
+ let args = scan_args::scan_args(args)?;
17
+ let (text,): (String,) = args.required;
18
+ let _: () = args.optional;
19
+ let _: () = args.splat;
20
+ let _: () = args.trailing;
21
+ let _: () = args.block;
22
+
23
+ let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
24
+ let as_sym = kwargs.required.0;
25
+ let as_sym_str = as_sym.name().unwrap();
26
+ let content_type = if as_sym_str == "text" {
27
+ ContentType::Text
28
+ } else if as_sym_str == "html" {
29
+ ContentType::Html
30
+ } else {
31
+ return Err(Error::new(
32
+ exception::runtime_error(),
33
+ format!("unknown symbol `{as_sym_str:?}`"),
34
+ ));
35
+ };
36
+
37
+ Ok((text, content_type))
38
+ }
39
+
13
40
  #[magnus::init]
14
41
  fn init() -> Result<(), Error> {
15
42
  let m_selma = define_module("Selma").expect("cannot define ::Selma module");
@@ -1,4 +1,4 @@
1
- use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
1
+ use std::{cell::Cell, marker::PhantomData, rc::Rc};
2
2
 
3
3
  // NOTE: My Rust isn't good enough to know what any of this does,
4
4
  // but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
37
37
  impl<R> NativeRefWrap<R> {
38
38
  pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
39
39
  let wrap = NativeRefWrap {
40
- inner_ptr: unsafe { mem::transmute(inner) },
40
+ inner_ptr: inner as *const I as *mut R,
41
41
  poisoned: Rc::new(Cell::new(false)),
42
42
  };
43
43
 
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
48
48
 
49
49
  pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
50
50
  let wrap = NativeRefWrap {
51
- inner_ptr: unsafe { mem::transmute(inner) },
51
+ inner_ptr: inner as *mut I as *mut R,
52
52
  poisoned: Rc::new(Cell::new(false)),
53
53
  };
54
54
 
@@ -1,14 +1,14 @@
1
- use std::{borrow::Cow, cell::RefCell, rc::Rc};
2
-
3
1
  use lol_html::{
4
2
  doc_comments, doctype, element,
5
- html_content::{ContentType, Element, EndTag, TextChunk},
3
+ html_content::{Element, EndTag, TextChunk},
6
4
  text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
7
5
  };
8
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
9
7
 
8
+ use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
+
10
10
  use crate::{
11
- html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
11
+ html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
12
12
  sanitizer::SelmaSanitizer,
13
13
  selector::SelmaSelector,
14
14
  tags::Tag,
@@ -43,7 +43,7 @@ unsafe impl Send for SelmaRewriter {}
43
43
  impl SelmaRewriter {
44
44
  const SELMA_ON_END_TAG: &str = "on_end_tag";
45
45
  const SELMA_HANDLE_ELEMENT: &str = "handle_element";
46
- const SELMA_HANDLE_TEXT: &str = "handle_text";
46
+ const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
47
47
 
48
48
  /// @yard
49
49
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
@@ -83,18 +83,18 @@ impl SelmaRewriter {
83
83
  return Err(magnus::Error::new(
84
84
  exception::no_method_error(),
85
85
  format!(
86
- "Could not call #selector on {:?}; is this an object that defines it?",
87
- classname
86
+ "Could not call #selector on {classname:?}; is this an object that defines it?",
87
+
88
88
  ),
89
89
  ));
90
90
  }
91
91
 
92
92
  let rb_selector: WrappedStruct<SelmaSelector> =
93
93
  match rb_handler.funcall("selector", ()) {
94
- Err(e) => {
94
+ Err(err) => {
95
95
  return Err(magnus::Error::new(
96
96
  exception::type_error(),
97
- format!("Error instantiating selector: {}", e),
97
+ format!("Error instantiating selector: {err:?}"),
98
98
  ));
99
99
  }
100
100
  Ok(rb_selector) => rb_selector,
@@ -145,7 +145,7 @@ impl SelmaRewriter {
145
145
  let _: () = args.trailing;
146
146
  let _: () = args.block;
147
147
 
148
- let kw = scan_args::get_kwargs::<
148
+ let kwargs = scan_args::get_kwargs::<
149
149
  _,
150
150
  (),
151
151
  (
@@ -154,7 +154,7 @@ impl SelmaRewriter {
154
154
  ),
155
155
  (),
156
156
  >(args.keywords, &[], &["sanitizer", "handlers"])?;
157
- let (rb_sanitizer, rb_handlers) = kw.optional;
157
+ let (rb_sanitizer, rb_handlers) = kwargs.optional;
158
158
 
159
159
  Ok((rb_sanitizer, rb_handlers))
160
160
  }
@@ -162,28 +162,22 @@ impl SelmaRewriter {
162
162
  /// Perform HTML rewrite sequence.
163
163
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
- None => html,
165
+ None => Ok(html),
166
166
  Some(sanitizer) => {
167
- // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
168
-
169
- // due to malicious html crafting
170
- // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
171
- // we need to run sanitization several times to truly remove unwanted tags,
172
- // because lol-html happily accepts this garbage (by design?)
173
- let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
167
+ let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
168
+ Ok(sanitized_html) => sanitized_html,
169
+ Err(err) => return Err(err),
170
+ };
174
171
 
175
- String::from_utf8(sanitized_html).unwrap()
172
+ String::from_utf8(sanitized_html)
176
173
  }
177
174
  };
178
175
  let binding = self.0.borrow_mut();
179
176
  let handlers = &binding.handlers;
180
177
 
181
- match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
178
+ match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
182
179
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
183
- Err(err) => Err(magnus::Error::new(
184
- exception::runtime_error(),
185
- format!("{}", err),
186
- )),
180
+ Err(err) => Err(err),
187
181
  }
188
182
  }
189
183
 
@@ -214,10 +208,12 @@ impl SelmaRewriter {
214
208
  if el.removed() {
215
209
  return Ok(());
216
210
  }
217
- sanitizer.sanitize_attributes(el);
218
-
219
- Ok(())
211
+ match sanitizer.sanitize_attributes(el) {
212
+ Ok(_) => Ok(()),
213
+ Err(err) => Err(err.to_string().into()),
214
+ }
220
215
  })],
216
+ // TODO: allow for MemorySettings to be defined
221
217
  ..Settings::default()
222
218
  },
223
219
  |c: &[u8]| first_pass_html.extend_from_slice(c),
@@ -342,7 +338,7 @@ impl SelmaRewriter {
342
338
  let mut stack = closure_element_stack.as_ref().borrow_mut();
343
339
  stack.pop();
344
340
  Ok(())
345
- });
341
+ })?;
346
342
  Ok(())
347
343
  }));
348
344
  });
@@ -361,7 +357,7 @@ impl SelmaRewriter {
361
357
  Err(err) => {
362
358
  return Err(magnus::Error::new(
363
359
  exception::runtime_error(),
364
- format!("{}", err),
360
+ format!("{err:?}"),
365
361
  ));
366
362
  }
367
363
  }
@@ -372,17 +368,18 @@ impl SelmaRewriter {
372
368
  fn process_element_handlers(
373
369
  rb_handler: Value,
374
370
  element: &mut Element,
375
- ancestors: &Vec<String>,
371
+ ancestors: &[String],
376
372
  ) -> Result<(), magnus::Error> {
377
373
  // if `on_end_tag` function is defined, call it
378
374
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
375
+ // TODO: error here is an "EndTagError"
379
376
  element.on_end_tag(move |end_tag| {
380
377
  let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
381
378
 
382
- rb_handler
383
- .funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,))
384
- .unwrap();
385
- Ok(())
379
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
380
+ Ok(_) => Ok(()),
381
+ Err(err) => Err(err.to_string().into()),
382
+ }
386
383
  });
387
384
  }
388
385
 
@@ -391,39 +388,30 @@ impl SelmaRewriter {
391
388
  rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
392
389
  match rb_result {
393
390
  Ok(_) => Ok(()),
394
- Err(err) => Err(magnus::Error::new(
395
- exception::runtime_error(),
396
- format!("{}", err),
397
- )),
391
+ Err(err) => Err(err),
398
392
  }
399
393
  }
400
394
 
401
- fn process_text_handlers(rb_handler: Value, text: &mut TextChunk) -> Result<(), magnus::Error> {
402
- // prevents missing `handle_text` function
403
- let content = text.as_str();
395
+ fn process_text_handlers(
396
+ rb_handler: Value,
397
+ text_chunk: &mut TextChunk,
398
+ ) -> Result<(), magnus::Error> {
399
+ // prevents missing `handle_text_chunk` function
400
+ let content = text_chunk.as_str();
404
401
 
405
- // FIXME: why does this happen?
402
+ // seems that sometimes lol-html returns blank text / EOLs?
406
403
  if content.is_empty() {
407
404
  return Ok(());
408
405
  }
409
- let rb_result = rb_handler.funcall(Self::SELMA_HANDLE_TEXT, (content,));
410
406
 
411
- if rb_result.is_err() {
412
- return Err(magnus::Error::new(
413
- exception::type_error(),
414
- format!(
415
- "Expected #{:?} to return a string: {:?}",
416
- Self::SELMA_HANDLE_TEXT,
417
- rb_result.err().unwrap()
418
- ),
419
- ));
407
+ let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
408
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
409
+ Ok(_) => Ok(()),
410
+ Err(err) => Err(magnus::Error::new(
411
+ exception::runtime_error(),
412
+ format!("{err:?}"),
413
+ )),
420
414
  }
421
-
422
- let new_content: String = rb_result.unwrap();
423
- // TODO: can this be an option?
424
- text.replace(&new_content, ContentType::Html);
425
-
426
- Ok(())
427
415
  }
428
416
  }
429
417
 
@@ -1,12 +1,10 @@
1
1
  use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
2
2
 
3
- use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
4
- use magnus::{
5
- class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
6
- Value,
3
+ use lol_html::{
4
+ errors::AttributeNameError,
5
+ html_content::{Comment, ContentType, Doctype, Element, EndTag},
7
6
  };
8
-
9
- use crate::tags::Tag;
7
+ use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
10
8
 
11
9
  #[derive(Clone, Debug)]
12
10
  struct ElementSanitizer {
@@ -18,7 +16,7 @@ struct ElementSanitizer {
18
16
 
19
17
  #[derive(Clone, Debug)]
20
18
  pub struct Sanitizer {
21
- flags: [u8; Tag::TAG_COUNT],
19
+ flags: [u8; crate::tags::Tag::TAG_COUNT],
22
20
  allowed_attrs: Vec<String>,
23
21
  allowed_classes: Vec<String>,
24
22
  element_sanitizers: HashMap<String, ElementSanitizer>,
@@ -35,11 +33,11 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
35
33
 
36
34
  impl SelmaSanitizer {
37
35
  const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
38
- const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
36
+ // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
39
37
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
38
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
39
 
42
- pub fn new(arguments: &[Value]) -> Result<Self, Error> {
40
+ pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
43
41
  let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
44
42
  let (opt_config,): (Option<RHash>,) = args.optional;
45
43
 
@@ -50,7 +48,7 @@ impl SelmaSanitizer {
50
48
  };
51
49
 
52
50
  let mut element_sanitizers = HashMap::new();
53
- Tag::html_tags().iter().for_each(|html_tag| {
51
+ crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
54
52
  let es = ElementSanitizer {
55
53
  allowed_attrs: vec![],
56
54
  allowed_classes: vec![],
@@ -58,11 +56,14 @@ impl SelmaSanitizer {
58
56
 
59
57
  protocol_sanitizers: HashMap::new(),
60
58
  };
61
- element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
59
+ element_sanitizers.insert(
60
+ crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
61
+ es,
62
+ );
62
63
  });
63
64
 
64
65
  Ok(Self(std::cell::RefCell::new(Sanitizer {
65
- flags: [0; Tag::TAG_COUNT],
66
+ flags: [0; crate::tags::Tag::TAG_COUNT],
66
67
  allowed_attrs: vec![],
67
68
  allowed_classes: vec![],
68
69
  element_sanitizers,
@@ -74,7 +75,7 @@ impl SelmaSanitizer {
74
75
  })))
75
76
  }
76
77
 
77
- fn get_config(&self) -> Result<RHash, Error> {
78
+ fn get_config(&self) -> Result<RHash, magnus::Error> {
78
79
  let binding = self.0.borrow();
79
80
 
80
81
  Ok(binding.config)
@@ -82,7 +83,7 @@ impl SelmaSanitizer {
82
83
 
83
84
  /// Toggle a sanitizer option on or off.
84
85
  fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
85
- let tag = Tag::tag_from_tag_name(tag_name.as_str());
86
+ let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
86
87
  if set {
87
88
  self.0.borrow_mut().flags[tag.index] |= flag;
88
89
  } else {
@@ -93,13 +94,19 @@ impl SelmaSanitizer {
93
94
  /// Toggles all sanitization options on or off.
94
95
  fn set_all_flags(&self, flag: u8, set: bool) {
95
96
  if set {
96
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
97
- self.0.borrow_mut().flags[iter] |= flag;
98
- });
97
+ crate::tags::Tag::html_tags()
98
+ .iter()
99
+ .enumerate()
100
+ .for_each(|(iter, _)| {
101
+ self.0.borrow_mut().flags[iter] |= flag;
102
+ });
99
103
  } else {
100
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
101
- self.0.borrow_mut().flags[iter] &= flag;
102
- });
104
+ crate::tags::Tag::html_tags()
105
+ .iter()
106
+ .enumerate()
107
+ .for_each(|(iter, _)| {
108
+ self.0.borrow_mut().flags[iter] &= flag;
109
+ });
103
110
  }
104
111
  }
105
112
 
@@ -111,8 +118,8 @@ impl SelmaSanitizer {
111
118
 
112
119
  pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
113
120
  if self.0.borrow().escape_tagfilter {
114
- let tag = Tag::tag_from_element(e);
115
- if Tag::is_tag_escapeworthy(tag) {
121
+ let tag = crate::tags::Tag::tag_from_element(e);
122
+ if crate::tags::Tag::is_tag_escapeworthy(tag) {
116
123
  e.remove();
117
124
  return true;
118
125
  }
@@ -229,9 +236,9 @@ impl SelmaSanitizer {
229
236
  }
230
237
  }
231
238
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) {
239
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
233
240
  let binding = self.0.borrow_mut();
234
- let tag = Tag::tag_from_element(element);
241
+ let tag = crate::tags::Tag::tag_from_element(element);
235
242
  let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
236
243
 
237
244
  // FIXME: This is a hack to get around the fact that we can't borrow
@@ -247,7 +254,7 @@ impl SelmaSanitizer {
247
254
  // encountered, remove the entire element to be safe.
248
255
  if attr_name.starts_with("<!--") {
249
256
  Self::force_remove_element(self, element);
250
- return;
257
+ return Ok(());
251
258
  }
252
259
 
253
260
  // first, trim leading spaces and unescape any encodings
@@ -255,46 +262,64 @@ impl SelmaSanitizer {
255
262
  let x = escapist::unescape_html(trimmed.as_bytes());
256
263
  let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
257
264
 
258
- if !Self::should_keep_attribute(
265
+ let should_keep_attrubute = match Self::should_keep_attribute(
259
266
  &binding,
260
267
  element,
261
268
  element_sanitizer,
262
269
  attr_name,
263
270
  &unescaped_attr_val,
264
271
  ) {
272
+ Ok(should_keep) => should_keep,
273
+ Err(e) => {
274
+ return Err(e);
275
+ }
276
+ };
277
+
278
+ if !should_keep_attrubute {
265
279
  element.remove_attribute(attr_name);
266
280
  } else {
267
281
  // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
268
282
  // since output is always UTF-8.
269
- if Tag::is_meta(tag) {
283
+ if crate::tags::Tag::is_meta(tag) {
270
284
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
- element.set_attribute(attr_name, "utf-8");
285
+ match element.set_attribute(attr_name, "utf-8") {
286
+ Ok(_) => {}
287
+ Err(err) => {
288
+ return Err(err);
289
+ }
290
+ }
272
291
  }
273
292
  } else if !unescaped_attr_val.is_empty() {
274
293
  let mut buf = String::new();
275
294
  // ...then, escape any special characters, for security
276
295
  if attr_name == "href" {
277
- // FIXME: gross--------------vvvv
278
- escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
296
+ escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
279
297
  } else {
280
- escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
298
+ escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
281
299
  };
282
300
 
283
- element.set_attribute(attr_name, &buf);
301
+ match element.set_attribute(attr_name, &buf) {
302
+ Ok(_) => {}
303
+ Err(err) => {
304
+ return Err(err);
305
+ }
306
+ }
284
307
  }
285
308
  }
286
309
  }
287
310
 
288
311
  let required = &element_sanitizer.required_attrs;
289
312
  if required.contains(&"*".to_string()) {
290
- return;
313
+ return Ok(());
291
314
  }
292
315
  for attr in element.attributes().iter() {
293
316
  let attr_name = &attr.name();
294
317
  if required.contains(attr_name) {
295
- return;
318
+ return Ok(());
296
319
  }
297
320
  }
321
+
322
+ Ok(())
298
323
  }
299
324
 
300
325
  fn should_keep_attribute(
@@ -303,7 +328,7 @@ impl SelmaSanitizer {
303
328
  element_sanitizer: &ElementSanitizer,
304
329
  attr_name: &String,
305
330
  attr_val: &String,
306
- ) -> bool {
331
+ ) -> Result<bool, AttributeNameError> {
307
332
  let mut allowed: bool = false;
308
333
  let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
309
334
  let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
@@ -317,7 +342,7 @@ impl SelmaSanitizer {
317
342
  }
318
343
 
319
344
  if !allowed {
320
- return false;
345
+ return Ok(false);
321
346
  }
322
347
 
323
348
  let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
@@ -325,32 +350,29 @@ impl SelmaSanitizer {
325
350
  None => {
326
351
  // has a protocol, but no sanitization list
327
352
  if !attr_val.is_empty() && Self::has_protocol(attr_val) {
328
- return false;
353
+ return Ok(false);
329
354
  }
330
355
  }
331
356
  Some(protocol_sanitizer_values) => {
332
357
  if !attr_val.is_empty()
333
358
  && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
334
359
  {
335
- return false;
360
+ return Ok(false);
336
361
  }
337
362
  }
338
363
  }
339
364
 
340
- if attr_name == "class"
341
- && !Self::sanitize_class_attribute(
365
+ if attr_name == "class" {
366
+ return Self::sanitize_class_attribute(
342
367
  binding,
343
368
  element,
344
369
  element_sanitizer,
345
370
  attr_name,
346
371
  attr_val,
347
- )
348
- .unwrap()
349
- {
350
- return false;
372
+ );
351
373
  }
352
374
 
353
- true
375
+ Ok(true)
354
376
  }
355
377
 
356
378
  fn has_protocol(attr_val: &str) -> bool {
@@ -393,7 +415,7 @@ impl SelmaSanitizer {
393
415
  element_sanitizer: &ElementSanitizer,
394
416
  attr_name: &str,
395
417
  attr_val: &str,
396
- ) -> Result<bool, Error> {
418
+ ) -> Result<bool, lol_html::errors::AttributeNameError> {
397
419
  let allowed_global = &binding.allowed_classes;
398
420
 
399
421
  let mut valid_classes: Vec<String> = vec![];
@@ -421,28 +443,25 @@ impl SelmaSanitizer {
421
443
 
422
444
  match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
423
445
  Ok(_) => Ok(true),
424
- Err(err) => Err(Error::new(
425
- exception::runtime_error(),
426
- format!("AttributeNameError: {}", err),
427
- )),
446
+ Err(err) => Err(err),
428
447
  }
429
448
  }
430
449
 
431
450
  pub fn allow_element(&self, element: &mut Element) -> bool {
432
- let tag = Tag::tag_from_element(element);
451
+ let tag = crate::tags::Tag::tag_from_element(element);
433
452
  let flags: u8 = self.0.borrow().flags[tag.index];
434
453
 
435
454
  (flags & Self::SELMA_SANITIZER_ALLOW) == 0
436
455
  }
437
456
 
438
457
  pub fn try_remove_element(&self, element: &mut Element) -> bool {
439
- let tag = Tag::tag_from_element(element);
458
+ let tag = crate::tags::Tag::tag_from_element(element);
440
459
  let flags: u8 = self.0.borrow().flags[tag.index];
441
460
 
442
461
  let should_remove = !element.removed() && self.allow_element(element);
443
462
 
444
463
  if should_remove {
445
- if Tag::has_text_content(tag) {
464
+ if crate::tags::Tag::has_text_content(tag) {
446
465
  Self::remove_element(
447
466
  element,
448
467
  tag.self_closing,
@@ -455,7 +474,7 @@ impl SelmaSanitizer {
455
474
  Self::check_if_end_tag_needs_removal(element);
456
475
  } else {
457
476
  // anything in <iframe> must be removed, if it's kept
458
- if Tag::is_iframe(tag) {
477
+ if crate::tags::Tag::is_iframe(tag) {
459
478
  if self.0.borrow().flags[tag.index] != 0 {
460
479
  element.set_inner_content(" ", ContentType::Text);
461
480
  } else {
@@ -487,14 +506,14 @@ impl SelmaSanitizer {
487
506
  }
488
507
 
489
508
  pub fn force_remove_element(&self, element: &mut Element) {
490
- let tag = Tag::tag_from_element(element);
509
+ let tag = crate::tags::Tag::tag_from_element(element);
491
510
  let self_closing = tag.self_closing;
492
511
  Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
493
512
  Self::check_if_end_tag_needs_removal(element);
494
513
  }
495
514
 
496
515
  fn check_if_end_tag_needs_removal(element: &mut Element) {
497
- if element.removed() && !Tag::tag_from_element(element).self_closing {
516
+ if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
498
517
  element
499
518
  .on_end_tag(move |end| {
500
519
  Self::remove_end_tag(end);
@@ -523,7 +542,7 @@ impl SelmaSanitizer {
523
542
  }
524
543
  }
525
544
 
526
- pub fn init(m_selma: RModule) -> Result<(), Error> {
545
+ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
527
546
  let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
528
547
 
529
548
  c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
@@ -27,7 +27,7 @@ impl SelmaSelector {
27
27
  if css.parse::<lol_html::Selector>().is_err() {
28
28
  return Err(Error::new(
29
29
  exception::arg_error(),
30
- format!("Could not parse `match_element` (`{}`) as valid CSS", css),
30
+ format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
31
31
  ));
32
32
  }
33
33
  }
@@ -37,10 +37,7 @@ impl SelmaSelector {
37
37
  if css.parse::<lol_html::Selector>().is_err() {
38
38
  return Err(Error::new(
39
39
  exception::arg_error(),
40
- format!(
41
- "Could not parse `match_text_within` (`{}`) as valid CSS",
42
- css
43
- ),
40
+ format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
44
41
  ));
45
42
  }
46
43
  }
@@ -192,14 +192,17 @@ impl Tag {
192
192
  /// Is this tag something which needs to be removed?
193
193
  pub fn is_tag_escapeworthy(tag: Tag) -> bool {
194
194
  tag.index == HTMLTag::TITLE as usize
195
- || tag.index == HTMLTag::TEXTAREA as usize
196
- || tag.index == HTMLTag::STYLE as usize
197
- || tag.index == HTMLTag::XMP as usize
198
195
  || tag.index == HTMLTag::IFRAME as usize
196
+ || tag.index == HTMLTag::MATH as usize
199
197
  || tag.index == HTMLTag::NOEMBED as usize
200
198
  || tag.index == HTMLTag::NOFRAMES as usize
201
- || tag.index == HTMLTag::SCRIPT as usize
199
+ || tag.index == HTMLTag::NOSCRIPT as usize
202
200
  || tag.index == HTMLTag::PLAINTEXT as usize
201
+ || tag.index == HTMLTag::SCRIPT as usize
202
+ || tag.index == HTMLTag::STYLE as usize
203
+ || tag.index == HTMLTag::SVG as usize
204
+ || tag.index == HTMLTag::TEXTAREA as usize
205
+ || tag.index == HTMLTag::XMP as usize
203
206
  }
204
207
 
205
208
  pub const ESCAPEWORTHY_TAGS_CSS: &str =
Binary file
@@ -3,6 +3,10 @@
3
3
  module Selma
4
4
  class Sanitizer
5
5
  module Config
6
+ # although there are many more protocol types, eg., ftp, xmpp, etc.,
7
+ # these are the only ones that are allowed by default
8
+ VALID_PROTOCOLS = ["http", "https", "mailto", :relative]
9
+
6
10
  DEFAULT = freeze_config(
7
11
  # Whether or not to allow HTML comments. Allowing comments is strongly
8
12
  # discouraged, since IE allows script execution within conditional
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.2"
4
+ VERSION = "0.0.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-21 00:00:00.000000000 Z
11
+ date: 2022-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -81,6 +81,7 @@ files:
81
81
  - ext/selma/src/html.rs
82
82
  - ext/selma/src/html/element.rs
83
83
  - ext/selma/src/html/end_tag.rs
84
+ - ext/selma/src/html/text_chunk.rs
84
85
  - ext/selma/src/lib.rs
85
86
  - ext/selma/src/native_ref_wrap.rs
86
87
  - ext/selma/src/rewriter.rs