selma 0.0.2-x86_64-darwin → 0.0.4-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c540a4c66965c519eb3e1df6dd3438d26e2f73095485133b21b38022330a552e
4
- data.tar.gz: 198d8db194c08247858f1e64e7398a05bdb3f175ec0cb484f40a33437df3ac62
3
+ metadata.gz: 5355712312797e3aabbf2c958807f12c397250836d7c4575848bf57852b9cecb
4
+ data.tar.gz: 5c8bf43375c4785fa152d6ea467efc3f7ef34f7fa0801f5473cc2b7f62296cf6
5
5
  SHA512:
6
- metadata.gz: 7d6d478be88892c0aa07283dbfa2ae26d6dcdfbcfca9e6d0dd1312ab77685cb98343519b0942b904f85955373c1233d455b47f45c6f10411f5baeabe0ece4681
7
- data.tar.gz: b912f88b531a1ef49afeaa40aab13ab9e84e9b45694865f5d18d70ce50c40c380b6297bafc0f90d35737acf543351aee92eacc9e2c79e239281d460e1ffc827a
6
+ metadata.gz: 29137ce7a58700f8b56414a11054ab295643d56c1fe079921fe69bf9f80c3879e22c3afd60e25961315e0a37b4dbbf4b1a278951404166ffb3571c55b0e75bef
7
+ data.tar.gz: 4dee67b422acddcd03f4168ae52e27ee9c403cab9233886d861a121ca26a0ee01968d0134b112409a7650141990aa5530053188880d644739b43181357ddfc15
data/README.md CHANGED
@@ -56,6 +56,10 @@ allow_comments: false,
56
56
  # "<!DOCTYPE html>" when sanitizing a document.
57
57
  allow_doctype: false,
58
58
 
59
+ # HTML elements to allow. By default, no elements are allowed (which means
60
+ # that all HTML will be stripped).
61
+ elements: ["a", "b", "img", ],
62
+
59
63
  # HTML attributes to allow in specific elements. The key is the name of the element,
60
64
  # and the value is an array of allowed attributes. By default, no attributes
61
65
  # are allowed.
@@ -64,14 +68,10 @@ attributes: {
64
68
  "img" => ["src"],
65
69
  },
66
70
 
67
- # HTML elements to allow. By default, no elements are allowed (which means
68
- # that all HTML will be stripped).
69
- elements: ["a", "b", "img", ],
70
-
71
71
  # URL handling protocols to allow in specific attributes. By default, no
72
72
  # protocols are allowed. Use :relative in place of a protocol if you want
73
73
  # to allow relative URLs sans protocol.
74
- protocols: {
74
+ protocols: {
75
75
  "a" => { "href" => ["http", "https", "mailto", :relative] },
76
76
  "img" => { "href" => ["http", "https"] },
77
77
  },
@@ -91,7 +91,7 @@ The real power in Selma comes in its use of handlers. A handler is simply an obj
91
91
 
92
92
  - `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
93
93
  - `handle_element`, a method that's call on each matched element
94
- - `handle_text`, a method that's called on each matched text node; this MUST return a string
94
+ - `handle_text_chunk`, a method that's called on each matched text node; this MUST return a string
95
95
 
96
96
  Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
97
97
 
@@ -118,7 +118,7 @@ rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
118
118
  The `Selma::Selector` object has three possible kwargs:
119
119
 
120
120
  - `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
121
- - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
121
+ - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text_chunk`
122
122
  - `ignore_text_within`: this is an array of element names whose text contents will be ignored
123
123
 
124
124
  You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
@@ -132,7 +132,7 @@ class MatchText
132
132
  SELECTOR
133
133
  end
134
134
 
135
- def handle_text(text)
135
+ def handle_text_chunk(text)
136
136
  string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
137
137
  end
138
138
  end
@@ -150,8 +150,9 @@ The `element` argument in `handle_element` has the following methods:
150
150
  - `remove_attribute`: remove an attribute
151
151
  - `attributes`: list all the attributes
152
152
  - `ancestors`: list all the ancestors
153
- - `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
- - `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
153
+ - `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
+ - `before(content, as: content_type)`: Inserts `content` before the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
+ - `after(content, as: content_type)`: Inserts `content` after the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
156
  - `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
156
157
 
157
158
  ## Benchmarks
data/ext/selma/Cargo.toml CHANGED
@@ -5,9 +5,9 @@ edition = "2021"
5
5
 
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
- escapist = "0.0.1"
9
- magnus = "0.4"
10
- lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
8
+ escapist = "0.0.2"
9
+ magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
+ lol_html = "0.3"
11
11
 
12
12
  [lib]
13
13
  name = "selma"
@@ -1,8 +1,6 @@
1
- use std::borrow::Cow;
2
-
3
1
  use crate::native_ref_wrap::NativeRefWrap;
4
- use lol_html::html_content::{ContentType, Element};
5
- use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
2
+ use lol_html::html_content::Element;
3
+ use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
6
4
 
7
5
  struct HTMLElement {
8
6
  element: NativeRefWrap<Element<'static, 'static>>,
@@ -51,7 +49,7 @@ impl SelmaHTMLElement {
51
49
  Ok(_) => Ok(value),
52
50
  Err(err) => Err(Error::new(
53
51
  exception::runtime_error(),
54
- format!("AttributeNameError: {}", err),
52
+ format!("AttributeNameError: {err:?}"),
55
53
  )),
56
54
  }
57
55
  } else {
@@ -81,7 +79,7 @@ impl SelmaHTMLElement {
81
79
  Ok(_) => {}
82
80
  Err(err) => Err(Error::new(
83
81
  exception::runtime_error(),
84
- format!("AttributeNameError: {}", err),
82
+ format!("AttributeNameError: {err:?}"),
85
83
  ))
86
84
  .unwrap(),
87
85
  });
@@ -99,80 +97,74 @@ impl SelmaHTMLElement {
99
97
  .for_each(|ancestor| match array.push(RString::new(ancestor)) {
100
98
  Ok(_) => {}
101
99
  Err(err) => {
102
- Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
100
+ Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
103
101
  }
104
102
  });
105
103
 
106
104
  Ok(array)
107
105
  }
108
106
 
109
- fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
107
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
110
108
  let mut binding = self.0.borrow_mut();
111
109
  let element = binding.element.get_mut().unwrap();
112
110
 
113
- let text_str = text_to_append.as_str();
114
-
115
- let content_type = Self::find_content_type(content_type);
111
+ let (text_str, content_type) = match crate::scan_text_args(args) {
112
+ Ok((text_str, content_type)) => (text_str, content_type),
113
+ Err(err) => return Err(err),
114
+ };
116
115
 
117
- element.append(text_str, content_type);
116
+ element.before(&text_str, content_type);
118
117
 
119
118
  Ok(())
120
119
  }
121
120
 
122
- fn wrap(
123
- &self,
124
- start_text: String,
125
- end_text: String,
126
- content_type: Symbol,
127
- ) -> Result<(), Error> {
121
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
128
122
  let mut binding = self.0.borrow_mut();
129
123
  let element = binding.element.get_mut().unwrap();
130
124
 
131
- let before_content_type = Self::find_content_type(content_type);
132
- let after_content_type = Self::find_content_type(content_type);
133
- element.before(&start_text, before_content_type);
134
- element.after(&end_text, after_content_type);
125
+ let (text_str, content_type) = match crate::scan_text_args(args) {
126
+ Ok((text_str, content_type)) => (text_str, content_type),
127
+ Err(err) => return Err(err),
128
+ };
129
+
130
+ element.after(&text_str, content_type);
135
131
 
136
132
  Ok(())
137
133
  }
138
134
 
139
- fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
135
+ fn append(&self, args: &[Value]) -> Result<(), Error> {
140
136
  let mut binding = self.0.borrow_mut();
141
137
  let element = binding.element.get_mut().unwrap();
142
138
 
143
- let text_str = text_to_set.as_str();
144
-
145
- let content_type = Self::find_content_type(content_type);
139
+ let (text_str, content_type) = match crate::scan_text_args(args) {
140
+ Ok((text_str, content_type)) => (text_str, content_type),
141
+ Err(err) => return Err(err),
142
+ };
146
143
 
147
- element.set_inner_content(text_str, content_type);
144
+ element.append(&text_str, content_type);
148
145
 
149
146
  Ok(())
150
147
  }
151
148
 
152
- fn find_content_type(content_type: Symbol) -> ContentType {
153
- match content_type.name() {
154
- Ok(name) => match (name) {
155
- Cow::Borrowed("as_text") => ContentType::Text,
156
- Cow::Borrowed("as_html") => ContentType::Html,
157
- _ => Err(Error::new(
158
- exception::runtime_error(),
159
- format!("unknown symbol `{}`", name),
160
- ))
161
- .unwrap(),
162
- },
163
- Err(err) => Err(Error::new(
164
- exception::runtime_error(),
165
- format!("Could not unwrap symbol"),
166
- ))
167
- .unwrap(),
168
- }
149
+ fn set_inner_content(&self, args: &[Value]) -> Result<(), Error> {
150
+ let mut binding = self.0.borrow_mut();
151
+ let element = binding.element.get_mut().unwrap();
152
+
153
+ let (inner_content, content_type) = match crate::scan_text_args(args) {
154
+ Ok((inner_content, content_type)) => (inner_content, content_type),
155
+ Err(err) => return Err(err),
156
+ };
157
+
158
+ element.set_inner_content(&inner_content, content_type);
159
+
160
+ Ok(())
169
161
  }
170
162
  }
171
163
 
172
164
  pub fn init(c_html: RClass) -> Result<(), Error> {
173
165
  let c_element = c_html
174
166
  .define_class("Element", Default::default())
175
- .expect("cannot find class Selma::Element");
167
+ .expect("cannot find class Selma::HTML::Element");
176
168
 
177
169
  c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
178
170
  c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
@@ -184,11 +176,12 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
184
176
  c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
185
177
  c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
186
178
 
187
- c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
188
- c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
179
+ c_element.define_method("before", method!(SelmaHTMLElement::before, -1))?;
180
+ c_element.define_method("after", method!(SelmaHTMLElement::after, -1))?;
181
+ c_element.define_method("append", method!(SelmaHTMLElement::append, -1))?;
189
182
  c_element.define_method(
190
183
  "set_inner_content",
191
- method!(SelmaHTMLElement::set_inner_content, 2),
184
+ method!(SelmaHTMLElement::set_inner_content, -1),
192
185
  )?;
193
186
 
194
187
  Ok(())
@@ -6,7 +6,7 @@ struct HTMLEndTag {
6
6
  end_tag: NativeRefWrap<EndTag<'static>>,
7
7
  }
8
8
 
9
- #[magnus::wrap(class = "Selma::HTML::Element")]
9
+ #[magnus::wrap(class = "Selma::HTML::EndTag")]
10
10
  pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
11
11
 
12
12
  /// SAFETY: This is safe because we only access this data when the GVL is held.
@@ -27,7 +27,7 @@ impl SelmaHTMLEndTag {
27
27
  pub fn init(c_html: RClass) -> Result<(), Error> {
28
28
  let c_end_tag = c_html
29
29
  .define_class("EndTag", Default::default())
30
- .expect("cannot find class Selma::EndTag");
30
+ .expect("cannot find class Selma::HTML::EndTag");
31
31
 
32
32
  c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
33
33
 
@@ -0,0 +1,83 @@
1
+ use crate::native_ref_wrap::NativeRefWrap;
2
+ use lol_html::html_content::{TextChunk, TextType};
3
+ use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
4
+
5
+ struct HTMLTextChunk {
6
+ text_chunk: NativeRefWrap<TextChunk<'static>>,
7
+ }
8
+
9
+ #[magnus::wrap(class = "Selma::HTML::TextChunk")]
10
+ pub struct SelmaHTMLTextChunk(std::cell::RefCell<HTMLTextChunk>);
11
+
12
+ /// SAFETY: This is safe because we only access this data when the GVL is held.
13
+ unsafe impl Send for SelmaHTMLTextChunk {}
14
+
15
+ impl SelmaHTMLTextChunk {
16
+ pub fn new(text_chunk: &mut TextChunk) -> Self {
17
+ let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(text_chunk);
18
+
19
+ Self(std::cell::RefCell::new(HTMLTextChunk {
20
+ text_chunk: ref_wrap,
21
+ }))
22
+ }
23
+
24
+ fn to_s(&self) -> Result<String, Error> {
25
+ let binding = self.0.borrow();
26
+
27
+ if let Ok(tc) = binding.text_chunk.get() {
28
+ Ok(tc.as_str().to_string())
29
+ } else {
30
+ Err(Error::new(
31
+ exception::runtime_error(),
32
+ "`to_s` is not available",
33
+ ))
34
+ }
35
+ }
36
+
37
+ fn text_type(&self) -> Result<Symbol, Error> {
38
+ let binding = self.0.borrow();
39
+
40
+ if let Ok(tc) = binding.text_chunk.get() {
41
+ match tc.text_type() {
42
+ TextType::Data => Ok(Symbol::from("data")),
43
+ TextType::PlainText => Ok(Symbol::from("plain_text")),
44
+ TextType::RawText => Ok(Symbol::from("raw_text")),
45
+ TextType::ScriptData => Ok(Symbol::from("script")),
46
+ TextType::RCData => Ok(Symbol::from("rc_data")),
47
+ TextType::CDataSection => Ok(Symbol::from("cdata_section")),
48
+ }
49
+ } else {
50
+ Err(Error::new(
51
+ exception::runtime_error(),
52
+ "`text_type` is not available",
53
+ ))
54
+ }
55
+ }
56
+
57
+ fn replace(&self, args: &[Value]) -> Result<(), Error> {
58
+ let mut binding = self.0.borrow_mut();
59
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
60
+
61
+ let (text_str, content_type) = match crate::scan_text_args(args) {
62
+ Ok((text_str, content_type)) => (text_str, content_type),
63
+ Err(err) => return Err(err),
64
+ };
65
+
66
+ text_chunk.replace(&text_str, content_type);
67
+
68
+ Ok(())
69
+ }
70
+ }
71
+
72
+ pub fn init(c_html: RClass) -> Result<(), Error> {
73
+ let c_text_chunk = c_html
74
+ .define_class("TextChunk", Default::default())
75
+ .expect("cannot find class Selma::HTML::TextChunk");
76
+
77
+ c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
78
+ c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
79
+ c_text_chunk.define_method("text_type", method!(SelmaHTMLTextChunk::text_type, 0))?;
80
+ c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
81
+
82
+ Ok(())
83
+ }
@@ -9,9 +9,11 @@ pub fn init(m_selma: RModule) -> Result<(), Error> {
9
9
 
10
10
  element::init(c_html).expect("cannot define Selma::HTML::Element class");
11
11
  end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
12
+ text_chunk::init(c_html).expect("cannot define Selma::HTML::TextChunk class");
12
13
 
13
14
  Ok(())
14
15
  }
15
16
 
16
17
  pub mod element;
17
18
  pub mod end_tag;
19
+ pub mod text_chunk;
data/ext/selma/src/lib.rs CHANGED
@@ -1,6 +1,7 @@
1
1
  extern crate core;
2
2
 
3
- use magnus::{define_module, Error};
3
+ use lol_html::html_content::ContentType;
4
+ use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
4
5
 
5
6
  pub mod html;
6
7
  pub mod native_ref_wrap;
@@ -10,6 +11,32 @@ pub mod selector;
10
11
  pub mod tags;
11
12
  pub mod wrapped_struct;
12
13
 
14
+ #[allow(clippy::let_unit_value)]
15
+ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
16
+ let args = scan_args::scan_args(args)?;
17
+ let (text,): (String,) = args.required;
18
+ let _: () = args.optional;
19
+ let _: () = args.splat;
20
+ let _: () = args.trailing;
21
+ let _: () = args.block;
22
+
23
+ let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
24
+ let as_sym = kwargs.required.0;
25
+ let as_sym_str = as_sym.name().unwrap();
26
+ let content_type = if as_sym_str == "text" {
27
+ ContentType::Text
28
+ } else if as_sym_str == "html" {
29
+ ContentType::Html
30
+ } else {
31
+ return Err(Error::new(
32
+ exception::runtime_error(),
33
+ format!("unknown symbol `{as_sym_str:?}`"),
34
+ ));
35
+ };
36
+
37
+ Ok((text, content_type))
38
+ }
39
+
13
40
  #[magnus::init]
14
41
  fn init() -> Result<(), Error> {
15
42
  let m_selma = define_module("Selma").expect("cannot define ::Selma module");
@@ -1,4 +1,4 @@
1
- use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
1
+ use std::{cell::Cell, marker::PhantomData, rc::Rc};
2
2
 
3
3
  // NOTE: My Rust isn't good enough to know what any of this does,
4
4
  // but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
37
37
  impl<R> NativeRefWrap<R> {
38
38
  pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
39
39
  let wrap = NativeRefWrap {
40
- inner_ptr: unsafe { mem::transmute(inner) },
40
+ inner_ptr: inner as *const I as *mut R,
41
41
  poisoned: Rc::new(Cell::new(false)),
42
42
  };
43
43
 
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
48
48
 
49
49
  pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
50
50
  let wrap = NativeRefWrap {
51
- inner_ptr: unsafe { mem::transmute(inner) },
51
+ inner_ptr: inner as *mut I as *mut R,
52
52
  poisoned: Rc::new(Cell::new(false)),
53
53
  };
54
54
 
@@ -1,14 +1,14 @@
1
- use std::{borrow::Cow, cell::RefCell, rc::Rc};
2
-
3
1
  use lol_html::{
4
2
  doc_comments, doctype, element,
5
- html_content::{ContentType, Element, EndTag, TextChunk},
3
+ html_content::{Element, EndTag, TextChunk},
6
4
  text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
7
5
  };
8
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
9
7
 
8
+ use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
+
10
10
  use crate::{
11
- html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
11
+ html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
12
12
  sanitizer::SelmaSanitizer,
13
13
  selector::SelmaSelector,
14
14
  tags::Tag,
@@ -43,7 +43,7 @@ unsafe impl Send for SelmaRewriter {}
43
43
  impl SelmaRewriter {
44
44
  const SELMA_ON_END_TAG: &str = "on_end_tag";
45
45
  const SELMA_HANDLE_ELEMENT: &str = "handle_element";
46
- const SELMA_HANDLE_TEXT: &str = "handle_text";
46
+ const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
47
47
 
48
48
  /// @yard
49
49
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
@@ -83,18 +83,18 @@ impl SelmaRewriter {
83
83
  return Err(magnus::Error::new(
84
84
  exception::no_method_error(),
85
85
  format!(
86
- "Could not call #selector on {:?}; is this an object that defines it?",
87
- classname
86
+ "Could not call #selector on {classname:?}; is this an object that defines it?",
87
+
88
88
  ),
89
89
  ));
90
90
  }
91
91
 
92
92
  let rb_selector: WrappedStruct<SelmaSelector> =
93
93
  match rb_handler.funcall("selector", ()) {
94
- Err(e) => {
94
+ Err(err) => {
95
95
  return Err(magnus::Error::new(
96
96
  exception::type_error(),
97
- format!("Error instantiating selector: {}", e),
97
+ format!("Error instantiating selector: {err:?}"),
98
98
  ));
99
99
  }
100
100
  Ok(rb_selector) => rb_selector,
@@ -145,7 +145,7 @@ impl SelmaRewriter {
145
145
  let _: () = args.trailing;
146
146
  let _: () = args.block;
147
147
 
148
- let kw = scan_args::get_kwargs::<
148
+ let kwargs = scan_args::get_kwargs::<
149
149
  _,
150
150
  (),
151
151
  (
@@ -154,7 +154,7 @@ impl SelmaRewriter {
154
154
  ),
155
155
  (),
156
156
  >(args.keywords, &[], &["sanitizer", "handlers"])?;
157
- let (rb_sanitizer, rb_handlers) = kw.optional;
157
+ let (rb_sanitizer, rb_handlers) = kwargs.optional;
158
158
 
159
159
  Ok((rb_sanitizer, rb_handlers))
160
160
  }
@@ -162,28 +162,22 @@ impl SelmaRewriter {
162
162
  /// Perform HTML rewrite sequence.
163
163
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
- None => html,
165
+ None => Ok(html),
166
166
  Some(sanitizer) => {
167
- // let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
168
-
169
- // due to malicious html crafting
170
- // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
171
- // we need to run sanitization several times to truly remove unwanted tags,
172
- // because lol-html happily accepts this garbage (by design?)
173
- let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
167
+ let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
168
+ Ok(sanitized_html) => sanitized_html,
169
+ Err(err) => return Err(err),
170
+ };
174
171
 
175
- String::from_utf8(sanitized_html).unwrap()
172
+ String::from_utf8(sanitized_html)
176
173
  }
177
174
  };
178
175
  let binding = self.0.borrow_mut();
179
176
  let handlers = &binding.handlers;
180
177
 
181
- match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
178
+ match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
182
179
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
183
- Err(err) => Err(magnus::Error::new(
184
- exception::runtime_error(),
185
- format!("{}", err),
186
- )),
180
+ Err(err) => Err(err),
187
181
  }
188
182
  }
189
183
 
@@ -214,10 +208,12 @@ impl SelmaRewriter {
214
208
  if el.removed() {
215
209
  return Ok(());
216
210
  }
217
- sanitizer.sanitize_attributes(el);
218
-
219
- Ok(())
211
+ match sanitizer.sanitize_attributes(el) {
212
+ Ok(_) => Ok(()),
213
+ Err(err) => Err(err.to_string().into()),
214
+ }
220
215
  })],
216
+ // TODO: allow for MemorySettings to be defined
221
217
  ..Settings::default()
222
218
  },
223
219
  |c: &[u8]| first_pass_html.extend_from_slice(c),
@@ -342,7 +338,7 @@ impl SelmaRewriter {
342
338
  let mut stack = closure_element_stack.as_ref().borrow_mut();
343
339
  stack.pop();
344
340
  Ok(())
345
- });
341
+ })?;
346
342
  Ok(())
347
343
  }));
348
344
  });
@@ -361,7 +357,7 @@ impl SelmaRewriter {
361
357
  Err(err) => {
362
358
  return Err(magnus::Error::new(
363
359
  exception::runtime_error(),
364
- format!("{}", err),
360
+ format!("{err:?}"),
365
361
  ));
366
362
  }
367
363
  }
@@ -372,17 +368,18 @@ impl SelmaRewriter {
372
368
  fn process_element_handlers(
373
369
  rb_handler: Value,
374
370
  element: &mut Element,
375
- ancestors: &Vec<String>,
371
+ ancestors: &[String],
376
372
  ) -> Result<(), magnus::Error> {
377
373
  // if `on_end_tag` function is defined, call it
378
374
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
375
+ // TODO: error here is an "EndTagError"
379
376
  element.on_end_tag(move |end_tag| {
380
377
  let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
381
378
 
382
- rb_handler
383
- .funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,))
384
- .unwrap();
385
- Ok(())
379
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
380
+ Ok(_) => Ok(()),
381
+ Err(err) => Err(err.to_string().into()),
382
+ }
386
383
  });
387
384
  }
388
385
 
@@ -391,39 +388,30 @@ impl SelmaRewriter {
391
388
  rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
392
389
  match rb_result {
393
390
  Ok(_) => Ok(()),
394
- Err(err) => Err(magnus::Error::new(
395
- exception::runtime_error(),
396
- format!("{}", err),
397
- )),
391
+ Err(err) => Err(err),
398
392
  }
399
393
  }
400
394
 
401
- fn process_text_handlers(rb_handler: Value, text: &mut TextChunk) -> Result<(), magnus::Error> {
402
- // prevents missing `handle_text` function
403
- let content = text.as_str();
395
+ fn process_text_handlers(
396
+ rb_handler: Value,
397
+ text_chunk: &mut TextChunk,
398
+ ) -> Result<(), magnus::Error> {
399
+ // prevents missing `handle_text_chunk` function
400
+ let content = text_chunk.as_str();
404
401
 
405
- // FIXME: why does this happen?
402
+ // seems that sometimes lol-html returns blank text / EOLs?
406
403
  if content.is_empty() {
407
404
  return Ok(());
408
405
  }
409
- let rb_result = rb_handler.funcall(Self::SELMA_HANDLE_TEXT, (content,));
410
406
 
411
- if rb_result.is_err() {
412
- return Err(magnus::Error::new(
413
- exception::type_error(),
414
- format!(
415
- "Expected #{:?} to return a string: {:?}",
416
- Self::SELMA_HANDLE_TEXT,
417
- rb_result.err().unwrap()
418
- ),
419
- ));
407
+ let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
408
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
409
+ Ok(_) => Ok(()),
410
+ Err(err) => Err(magnus::Error::new(
411
+ exception::runtime_error(),
412
+ format!("{err:?}"),
413
+ )),
420
414
  }
421
-
422
- let new_content: String = rb_result.unwrap();
423
- // TODO: can this be an option?
424
- text.replace(&new_content, ContentType::Html);
425
-
426
- Ok(())
427
415
  }
428
416
  }
429
417
 
@@ -1,12 +1,10 @@
1
1
  use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
2
2
 
3
- use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
4
- use magnus::{
5
- class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
6
- Value,
3
+ use lol_html::{
4
+ errors::AttributeNameError,
5
+ html_content::{Comment, ContentType, Doctype, Element, EndTag},
7
6
  };
8
-
9
- use crate::tags::Tag;
7
+ use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
10
8
 
11
9
  #[derive(Clone, Debug)]
12
10
  struct ElementSanitizer {
@@ -18,7 +16,7 @@ struct ElementSanitizer {
18
16
 
19
17
  #[derive(Clone, Debug)]
20
18
  pub struct Sanitizer {
21
- flags: [u8; Tag::TAG_COUNT],
19
+ flags: [u8; crate::tags::Tag::TAG_COUNT],
22
20
  allowed_attrs: Vec<String>,
23
21
  allowed_classes: Vec<String>,
24
22
  element_sanitizers: HashMap<String, ElementSanitizer>,
@@ -35,11 +33,11 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
35
33
 
36
34
  impl SelmaSanitizer {
37
35
  const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
38
- const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
36
+ // const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
39
37
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
38
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
39
 
42
- pub fn new(arguments: &[Value]) -> Result<Self, Error> {
40
+ pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
43
41
  let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
44
42
  let (opt_config,): (Option<RHash>,) = args.optional;
45
43
 
@@ -50,7 +48,7 @@ impl SelmaSanitizer {
50
48
  };
51
49
 
52
50
  let mut element_sanitizers = HashMap::new();
53
- Tag::html_tags().iter().for_each(|html_tag| {
51
+ crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
54
52
  let es = ElementSanitizer {
55
53
  allowed_attrs: vec![],
56
54
  allowed_classes: vec![],
@@ -58,11 +56,14 @@ impl SelmaSanitizer {
58
56
 
59
57
  protocol_sanitizers: HashMap::new(),
60
58
  };
61
- element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
59
+ element_sanitizers.insert(
60
+ crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
61
+ es,
62
+ );
62
63
  });
63
64
 
64
65
  Ok(Self(std::cell::RefCell::new(Sanitizer {
65
- flags: [0; Tag::TAG_COUNT],
66
+ flags: [0; crate::tags::Tag::TAG_COUNT],
66
67
  allowed_attrs: vec![],
67
68
  allowed_classes: vec![],
68
69
  element_sanitizers,
@@ -74,7 +75,7 @@ impl SelmaSanitizer {
74
75
  })))
75
76
  }
76
77
 
77
- fn get_config(&self) -> Result<RHash, Error> {
78
+ fn get_config(&self) -> Result<RHash, magnus::Error> {
78
79
  let binding = self.0.borrow();
79
80
 
80
81
  Ok(binding.config)
@@ -82,7 +83,7 @@ impl SelmaSanitizer {
82
83
 
83
84
  /// Toggle a sanitizer option on or off.
84
85
  fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
85
- let tag = Tag::tag_from_tag_name(tag_name.as_str());
86
+ let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
86
87
  if set {
87
88
  self.0.borrow_mut().flags[tag.index] |= flag;
88
89
  } else {
@@ -93,13 +94,19 @@ impl SelmaSanitizer {
93
94
  /// Toggles all sanitization options on or off.
94
95
  fn set_all_flags(&self, flag: u8, set: bool) {
95
96
  if set {
96
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
97
- self.0.borrow_mut().flags[iter] |= flag;
98
- });
97
+ crate::tags::Tag::html_tags()
98
+ .iter()
99
+ .enumerate()
100
+ .for_each(|(iter, _)| {
101
+ self.0.borrow_mut().flags[iter] |= flag;
102
+ });
99
103
  } else {
100
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
101
- self.0.borrow_mut().flags[iter] &= flag;
102
- });
104
+ crate::tags::Tag::html_tags()
105
+ .iter()
106
+ .enumerate()
107
+ .for_each(|(iter, _)| {
108
+ self.0.borrow_mut().flags[iter] &= flag;
109
+ });
103
110
  }
104
111
  }
105
112
 
@@ -111,8 +118,8 @@ impl SelmaSanitizer {
111
118
 
112
119
  pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
113
120
  if self.0.borrow().escape_tagfilter {
114
- let tag = Tag::tag_from_element(e);
115
- if Tag::is_tag_escapeworthy(tag) {
121
+ let tag = crate::tags::Tag::tag_from_element(e);
122
+ if crate::tags::Tag::is_tag_escapeworthy(tag) {
116
123
  e.remove();
117
124
  return true;
118
125
  }
@@ -229,9 +236,9 @@ impl SelmaSanitizer {
229
236
  }
230
237
  }
231
238
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) {
239
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
233
240
  let binding = self.0.borrow_mut();
234
- let tag = Tag::tag_from_element(element);
241
+ let tag = crate::tags::Tag::tag_from_element(element);
235
242
  let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
236
243
 
237
244
  // FIXME: This is a hack to get around the fact that we can't borrow
@@ -247,7 +254,7 @@ impl SelmaSanitizer {
247
254
  // encountered, remove the entire element to be safe.
248
255
  if attr_name.starts_with("<!--") {
249
256
  Self::force_remove_element(self, element);
250
- return;
257
+ return Ok(());
251
258
  }
252
259
 
253
260
  // first, trim leading spaces and unescape any encodings
@@ -255,46 +262,64 @@ impl SelmaSanitizer {
255
262
  let x = escapist::unescape_html(trimmed.as_bytes());
256
263
  let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
257
264
 
258
- if !Self::should_keep_attribute(
265
+ let should_keep_attrubute = match Self::should_keep_attribute(
259
266
  &binding,
260
267
  element,
261
268
  element_sanitizer,
262
269
  attr_name,
263
270
  &unescaped_attr_val,
264
271
  ) {
272
+ Ok(should_keep) => should_keep,
273
+ Err(e) => {
274
+ return Err(e);
275
+ }
276
+ };
277
+
278
+ if !should_keep_attrubute {
265
279
  element.remove_attribute(attr_name);
266
280
  } else {
267
281
  // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
268
282
  // since output is always UTF-8.
269
- if Tag::is_meta(tag) {
283
+ if crate::tags::Tag::is_meta(tag) {
270
284
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
- element.set_attribute(attr_name, "utf-8");
285
+ match element.set_attribute(attr_name, "utf-8") {
286
+ Ok(_) => {}
287
+ Err(err) => {
288
+ return Err(err);
289
+ }
290
+ }
272
291
  }
273
292
  } else if !unescaped_attr_val.is_empty() {
274
293
  let mut buf = String::new();
275
294
  // ...then, escape any special characters, for security
276
295
  if attr_name == "href" {
277
- // FIXME: gross--------------vvvv
278
- escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
296
+ escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
279
297
  } else {
280
- escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
298
+ escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
281
299
  };
282
300
 
283
- element.set_attribute(attr_name, &buf);
301
+ match element.set_attribute(attr_name, &buf) {
302
+ Ok(_) => {}
303
+ Err(err) => {
304
+ return Err(err);
305
+ }
306
+ }
284
307
  }
285
308
  }
286
309
  }
287
310
 
288
311
  let required = &element_sanitizer.required_attrs;
289
312
  if required.contains(&"*".to_string()) {
290
- return;
313
+ return Ok(());
291
314
  }
292
315
  for attr in element.attributes().iter() {
293
316
  let attr_name = &attr.name();
294
317
  if required.contains(attr_name) {
295
- return;
318
+ return Ok(());
296
319
  }
297
320
  }
321
+
322
+ Ok(())
298
323
  }
299
324
 
300
325
  fn should_keep_attribute(
@@ -303,7 +328,7 @@ impl SelmaSanitizer {
303
328
  element_sanitizer: &ElementSanitizer,
304
329
  attr_name: &String,
305
330
  attr_val: &String,
306
- ) -> bool {
331
+ ) -> Result<bool, AttributeNameError> {
307
332
  let mut allowed: bool = false;
308
333
  let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
309
334
  let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
@@ -317,7 +342,7 @@ impl SelmaSanitizer {
317
342
  }
318
343
 
319
344
  if !allowed {
320
- return false;
345
+ return Ok(false);
321
346
  }
322
347
 
323
348
  let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
@@ -325,32 +350,29 @@ impl SelmaSanitizer {
325
350
  None => {
326
351
  // has a protocol, but no sanitization list
327
352
  if !attr_val.is_empty() && Self::has_protocol(attr_val) {
328
- return false;
353
+ return Ok(false);
329
354
  }
330
355
  }
331
356
  Some(protocol_sanitizer_values) => {
332
357
  if !attr_val.is_empty()
333
358
  && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
334
359
  {
335
- return false;
360
+ return Ok(false);
336
361
  }
337
362
  }
338
363
  }
339
364
 
340
- if attr_name == "class"
341
- && !Self::sanitize_class_attribute(
365
+ if attr_name == "class" {
366
+ return Self::sanitize_class_attribute(
342
367
  binding,
343
368
  element,
344
369
  element_sanitizer,
345
370
  attr_name,
346
371
  attr_val,
347
- )
348
- .unwrap()
349
- {
350
- return false;
372
+ );
351
373
  }
352
374
 
353
- true
375
+ Ok(true)
354
376
  }
355
377
 
356
378
  fn has_protocol(attr_val: &str) -> bool {
@@ -393,7 +415,7 @@ impl SelmaSanitizer {
393
415
  element_sanitizer: &ElementSanitizer,
394
416
  attr_name: &str,
395
417
  attr_val: &str,
396
- ) -> Result<bool, Error> {
418
+ ) -> Result<bool, lol_html::errors::AttributeNameError> {
397
419
  let allowed_global = &binding.allowed_classes;
398
420
 
399
421
  let mut valid_classes: Vec<String> = vec![];
@@ -421,28 +443,25 @@ impl SelmaSanitizer {
421
443
 
422
444
  match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
423
445
  Ok(_) => Ok(true),
424
- Err(err) => Err(Error::new(
425
- exception::runtime_error(),
426
- format!("AttributeNameError: {}", err),
427
- )),
446
+ Err(err) => Err(err),
428
447
  }
429
448
  }
430
449
 
431
450
  pub fn allow_element(&self, element: &mut Element) -> bool {
432
- let tag = Tag::tag_from_element(element);
451
+ let tag = crate::tags::Tag::tag_from_element(element);
433
452
  let flags: u8 = self.0.borrow().flags[tag.index];
434
453
 
435
454
  (flags & Self::SELMA_SANITIZER_ALLOW) == 0
436
455
  }
437
456
 
438
457
  pub fn try_remove_element(&self, element: &mut Element) -> bool {
439
- let tag = Tag::tag_from_element(element);
458
+ let tag = crate::tags::Tag::tag_from_element(element);
440
459
  let flags: u8 = self.0.borrow().flags[tag.index];
441
460
 
442
461
  let should_remove = !element.removed() && self.allow_element(element);
443
462
 
444
463
  if should_remove {
445
- if Tag::has_text_content(tag) {
464
+ if crate::tags::Tag::has_text_content(tag) {
446
465
  Self::remove_element(
447
466
  element,
448
467
  tag.self_closing,
@@ -455,7 +474,7 @@ impl SelmaSanitizer {
455
474
  Self::check_if_end_tag_needs_removal(element);
456
475
  } else {
457
476
  // anything in <iframe> must be removed, if it's kept
458
- if Tag::is_iframe(tag) {
477
+ if crate::tags::Tag::is_iframe(tag) {
459
478
  if self.0.borrow().flags[tag.index] != 0 {
460
479
  element.set_inner_content(" ", ContentType::Text);
461
480
  } else {
@@ -487,14 +506,14 @@ impl SelmaSanitizer {
487
506
  }
488
507
 
489
508
  pub fn force_remove_element(&self, element: &mut Element) {
490
- let tag = Tag::tag_from_element(element);
509
+ let tag = crate::tags::Tag::tag_from_element(element);
491
510
  let self_closing = tag.self_closing;
492
511
  Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
493
512
  Self::check_if_end_tag_needs_removal(element);
494
513
  }
495
514
 
496
515
  fn check_if_end_tag_needs_removal(element: &mut Element) {
497
- if element.removed() && !Tag::tag_from_element(element).self_closing {
516
+ if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
498
517
  element
499
518
  .on_end_tag(move |end| {
500
519
  Self::remove_end_tag(end);
@@ -523,7 +542,7 @@ impl SelmaSanitizer {
523
542
  }
524
543
  }
525
544
 
526
- pub fn init(m_selma: RModule) -> Result<(), Error> {
545
+ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
527
546
  let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
528
547
 
529
548
  c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
@@ -27,7 +27,7 @@ impl SelmaSelector {
27
27
  if css.parse::<lol_html::Selector>().is_err() {
28
28
  return Err(Error::new(
29
29
  exception::arg_error(),
30
- format!("Could not parse `match_element` (`{}`) as valid CSS", css),
30
+ format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
31
31
  ));
32
32
  }
33
33
  }
@@ -37,10 +37,7 @@ impl SelmaSelector {
37
37
  if css.parse::<lol_html::Selector>().is_err() {
38
38
  return Err(Error::new(
39
39
  exception::arg_error(),
40
- format!(
41
- "Could not parse `match_text_within` (`{}`) as valid CSS",
42
- css
43
- ),
40
+ format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
44
41
  ));
45
42
  }
46
43
  }
@@ -192,14 +192,17 @@ impl Tag {
192
192
  /// Is this tag something which needs to be removed?
193
193
  pub fn is_tag_escapeworthy(tag: Tag) -> bool {
194
194
  tag.index == HTMLTag::TITLE as usize
195
- || tag.index == HTMLTag::TEXTAREA as usize
196
- || tag.index == HTMLTag::STYLE as usize
197
- || tag.index == HTMLTag::XMP as usize
198
195
  || tag.index == HTMLTag::IFRAME as usize
196
+ || tag.index == HTMLTag::MATH as usize
199
197
  || tag.index == HTMLTag::NOEMBED as usize
200
198
  || tag.index == HTMLTag::NOFRAMES as usize
201
- || tag.index == HTMLTag::SCRIPT as usize
199
+ || tag.index == HTMLTag::NOSCRIPT as usize
202
200
  || tag.index == HTMLTag::PLAINTEXT as usize
201
+ || tag.index == HTMLTag::SCRIPT as usize
202
+ || tag.index == HTMLTag::STYLE as usize
203
+ || tag.index == HTMLTag::SVG as usize
204
+ || tag.index == HTMLTag::TEXTAREA as usize
205
+ || tag.index == HTMLTag::XMP as usize
203
206
  }
204
207
 
205
208
  pub const ESCAPEWORTHY_TAGS_CSS: &str =
Binary file
@@ -3,6 +3,10 @@
3
3
  module Selma
4
4
  class Sanitizer
5
5
  module Config
6
+ # although there are many more protocol types, eg., ftp, xmpp, etc.,
7
+ # these are the only ones that are allowed by default
8
+ VALID_PROTOCOLS = ["http", "https", "mailto", :relative]
9
+
6
10
  DEFAULT = freeze_config(
7
11
  # Whether or not to allow HTML comments. Allowing comments is strongly
8
12
  # discouraged, since IE allows script execution within conditional
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.2"
4
+ VERSION = "0.0.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-21 00:00:00.000000000 Z
11
+ date: 2022-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -81,6 +81,7 @@ files:
81
81
  - ext/selma/src/html.rs
82
82
  - ext/selma/src/html/element.rs
83
83
  - ext/selma/src/html/end_tag.rs
84
+ - ext/selma/src/html/text_chunk.rs
84
85
  - ext/selma/src/lib.rs
85
86
  - ext/selma/src/native_ref_wrap.rs
86
87
  - ext/selma/src/rewriter.rs