selma 0.0.3-arm64-darwin → 0.0.5-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f9939e73d5832b8d53b08f867bf218ab761211a6390f2da824889a7ec9d95516
4
- data.tar.gz: e605aa93a58224fef7dc76c6fe587903a57cc03339f68da3cfc7f801bfa0228c
3
+ metadata.gz: 6a8ae1537672de0e7cca2dcecf7c19d470e9f481130fa9c3dad37eef2bc91507
4
+ data.tar.gz: 4d897c25589773b5a5bcf987d7b420aa82dd69bcb4d832686f87ea14e40610a7
5
5
  SHA512:
6
- metadata.gz: 71c3adb9c05a949eaf4079c72db7e5d1a2c8c1a3bbc20190dbc0dc62fc7d1105452c222ae0b4594694008efaa06bdb2ed57ae54927ed93d2b3716b2f742983f2
7
- data.tar.gz: 431c58a0824a3b711724e95809323009061ca750bf9fa9a70999f31e454b194d537edfefdd287aa887416e3c624a1e9e354b28251c6caa0fe2b144e9bb75687d
6
+ metadata.gz: 183446b3cf5e97ef5f61e96da4db1198a8468f7971d6ae61dd9f0130cd019bde29b7975c1a46ff822b055543ed3657597bd02dc0c0a9234e24cefaa93e3e39fb
7
+ data.tar.gz: d105ad92ef51135b6d8fe675febe539eb41368bde7f08e10aba9f118d6c8950328d766c874202f759becb1ae75236ebf4e91c756ea1ba12c688941742801401d
data/README.md CHANGED
@@ -56,6 +56,10 @@ allow_comments: false,
56
56
  # "<!DOCTYPE html>" when sanitizing a document.
57
57
  allow_doctype: false,
58
58
 
59
+ # HTML elements to allow. By default, no elements are allowed (which means
60
+ # that all HTML will be stripped).
61
+ elements: ["a", "b", "img", ],
62
+
59
63
  # HTML attributes to allow in specific elements. The key is the name of the element,
60
64
  # and the value is an array of allowed attributes. By default, no attributes
61
65
  # are allowed.
@@ -64,14 +68,10 @@ attributes: {
64
68
  "img" => ["src"],
65
69
  },
66
70
 
67
- # HTML elements to allow. By default, no elements are allowed (which means
68
- # that all HTML will be stripped).
69
- elements: ["a", "b", "img", ],
70
-
71
71
  # URL handling protocols to allow in specific attributes. By default, no
72
72
  # protocols are allowed. Use :relative in place of a protocol if you want
73
73
  # to allow relative URLs sans protocol.
74
- protocols: {
74
+ protocols: {
75
75
  "a" => { "href" => ["http", "https", "mailto", :relative] },
76
76
  "img" => { "href" => ["http", "https"] },
77
77
  },
@@ -91,7 +91,7 @@ The real power in Selma comes in its use of handlers. A handler is simply an obj
91
91
 
92
92
  - `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
93
93
  - `handle_element`, a method that's call on each matched element
94
- - `handle_text`, a method that's called on each matched text node; this MUST return a string
94
+ - `handle_text_chunk`, a method that's called on each matched text node; this MUST return a string
95
95
 
96
96
  Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
97
97
 
@@ -118,7 +118,7 @@ rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
118
118
  The `Selma::Selector` object has three possible kwargs:
119
119
 
120
120
  - `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
121
- - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
121
+ - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text_chunk`
122
122
  - `ignore_text_within`: this is an array of element names whose text contents will be ignored
123
123
 
124
124
  You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
@@ -132,7 +132,7 @@ class MatchText
132
132
  SELECTOR
133
133
  end
134
134
 
135
- def handle_text(text)
135
+ def handle_text_chunk(text)
136
136
  string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
137
137
  end
138
138
  end
@@ -150,8 +150,9 @@ The `element` argument in `handle_element` has the following methods:
150
150
  - `remove_attribute`: remove an attribute
151
151
  - `attributes`: list all the attributes
152
152
  - `ancestors`: list all the ancestors
153
- - `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
- - `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
153
+ - `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
+ - `before(content, as: content_type)`: Inserts `content` before the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
+ - `after(content, as: content_type)`: Inserts `content` after the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
156
  - `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
156
157
 
157
158
  ## Benchmarks
data/ext/selma/Cargo.toml CHANGED
@@ -5,7 +5,7 @@ edition = "2021"
5
5
 
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
- escapist = "0.0.1"
8
+ escapist = "0.0.2"
9
9
  magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
10
  lol_html = "0.3"
11
11
 
@@ -1,8 +1,6 @@
1
- use std::borrow::Cow;
2
-
3
1
  use crate::native_ref_wrap::NativeRefWrap;
4
- use lol_html::html_content::{ContentType, Element};
5
- use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
2
+ use lol_html::html_content::Element;
3
+ use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
6
4
 
7
5
  struct HTMLElement {
8
6
  element: NativeRefWrap<Element<'static, 'static>>,
@@ -38,6 +36,48 @@ impl SelmaHTMLElement {
38
36
  }
39
37
  }
40
38
 
39
+ fn set_tag_name(&self, name: String) -> Result<(), Error> {
40
+ let mut binding = self.0.borrow_mut();
41
+
42
+ if let Ok(element) = binding.element.get_mut() {
43
+ match element.set_tag_name(&name) {
44
+ Ok(_) => Ok(()),
45
+ Err(err) => Err(Error::new(exception::runtime_error(), format!("{err:?}"))),
46
+ }
47
+ } else {
48
+ Err(Error::new(
49
+ exception::runtime_error(),
50
+ "`set_tag_name` is not available",
51
+ ))
52
+ }
53
+ }
54
+
55
+ fn is_self_closing(&self) -> Result<bool, Error> {
56
+ let binding = self.0.borrow();
57
+
58
+ if let Ok(e) = binding.element.get() {
59
+ Ok(e.is_self_closing())
60
+ } else {
61
+ Err(Error::new(
62
+ exception::runtime_error(),
63
+ "`is_self_closing` is not available",
64
+ ))
65
+ }
66
+ }
67
+
68
+ fn has_attribute(&self, attr: String) -> Result<bool, Error> {
69
+ let binding = self.0.borrow();
70
+
71
+ if let Ok(e) = binding.element.get() {
72
+ Ok(e.has_attribute(&attr))
73
+ } else {
74
+ Err(Error::new(
75
+ exception::runtime_error(),
76
+ "`is_self_closing` is not available",
77
+ ))
78
+ }
79
+ }
80
+
41
81
  fn get_attribute(&self, attr: String) -> Option<String> {
42
82
  let binding = self.0.borrow();
43
83
  let element = binding.element.get();
@@ -106,89 +146,108 @@ impl SelmaHTMLElement {
106
146
  Ok(array)
107
147
  }
108
148
 
109
- fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
149
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
110
150
  let mut binding = self.0.borrow_mut();
111
151
  let element = binding.element.get_mut().unwrap();
112
152
 
113
- let text_str = text_to_append.as_str();
153
+ let (text_str, content_type) = match crate::scan_text_args(args) {
154
+ Ok((text_str, content_type)) => (text_str, content_type),
155
+ Err(err) => return Err(err),
156
+ };
114
157
 
115
- let content_type = Self::find_content_type(content_type);
116
-
117
- element.append(text_str, content_type);
158
+ element.before(&text_str, content_type);
118
159
 
119
160
  Ok(())
120
161
  }
121
162
 
122
- fn wrap(
123
- &self,
124
- start_text: String,
125
- end_text: String,
126
- content_type: Symbol,
127
- ) -> Result<(), Error> {
163
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
128
164
  let mut binding = self.0.borrow_mut();
129
165
  let element = binding.element.get_mut().unwrap();
130
166
 
131
- let before_content_type = Self::find_content_type(content_type);
132
- let after_content_type = Self::find_content_type(content_type);
133
- element.before(&start_text, before_content_type);
134
- element.after(&end_text, after_content_type);
167
+ let (text_str, content_type) = match crate::scan_text_args(args) {
168
+ Ok((text_str, content_type)) => (text_str, content_type),
169
+ Err(err) => return Err(err),
170
+ };
171
+
172
+ element.after(&text_str, content_type);
135
173
 
136
174
  Ok(())
137
175
  }
138
176
 
139
- fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
177
+ fn prepend(&self, args: &[Value]) -> Result<(), Error> {
140
178
  let mut binding = self.0.borrow_mut();
141
179
  let element = binding.element.get_mut().unwrap();
142
180
 
143
- let text_str = text_to_set.as_str();
181
+ let (text_str, content_type) = match crate::scan_text_args(args) {
182
+ Ok((text_str, content_type)) => (text_str, content_type),
183
+ Err(err) => return Err(err),
184
+ };
185
+
186
+ element.prepend(&text_str, content_type);
144
187
 
145
- let content_type = Self::find_content_type(content_type);
188
+ Ok(())
189
+ }
146
190
 
147
- element.set_inner_content(text_str, content_type);
191
+ fn append(&self, args: &[Value]) -> Result<(), Error> {
192
+ let mut binding = self.0.borrow_mut();
193
+ let element = binding.element.get_mut().unwrap();
194
+
195
+ let (text_str, content_type) = match crate::scan_text_args(args) {
196
+ Ok((text_str, content_type)) => (text_str, content_type),
197
+ Err(err) => return Err(err),
198
+ };
199
+
200
+ element.append(&text_str, content_type);
148
201
 
149
202
  Ok(())
150
203
  }
151
204
 
152
- fn find_content_type(content_type: Symbol) -> ContentType {
153
- match content_type.name() {
154
- Ok(name) => match name {
155
- Cow::Borrowed("as_text") => ContentType::Text,
156
- Cow::Borrowed("as_html") => ContentType::Html,
157
- _ => Err(Error::new(
158
- exception::runtime_error(),
159
- format!("unknown symbol `{name:?}`"),
160
- ))
161
- .unwrap(),
162
- },
163
- Err(err) => Err(Error::new(
164
- exception::runtime_error(),
165
- format!("Could not unwrap symbol: {err:?}"),
166
- ))
167
- .unwrap(),
168
- }
205
+ fn set_inner_content(&self, args: &[Value]) -> Result<(), Error> {
206
+ let mut binding = self.0.borrow_mut();
207
+ let element = binding.element.get_mut().unwrap();
208
+
209
+ let (inner_content, content_type) = match crate::scan_text_args(args) {
210
+ Ok((inner_content, content_type)) => (inner_content, content_type),
211
+ Err(err) => return Err(err),
212
+ };
213
+
214
+ element.set_inner_content(&inner_content, content_type);
215
+
216
+ Ok(())
169
217
  }
170
218
  }
171
219
 
172
220
  pub fn init(c_html: RClass) -> Result<(), Error> {
173
221
  let c_element = c_html
174
222
  .define_class("Element", Default::default())
175
- .expect("cannot find class Selma::Element");
223
+ .expect("cannot find class Selma::HTML::Element");
176
224
 
177
225
  c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
226
+ c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
227
+ c_element.define_method(
228
+ "self_closing?",
229
+ method!(SelmaHTMLElement::is_self_closing, 0),
230
+ )?;
178
231
  c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
179
232
  c_element.define_method("[]=", method!(SelmaHTMLElement::set_attribute, 2))?;
180
233
  c_element.define_method(
181
234
  "remove_attribute",
182
235
  method!(SelmaHTMLElement::remove_attribute, 1),
183
236
  )?;
237
+ c_element.define_method(
238
+ "has_attribute?",
239
+ method!(SelmaHTMLElement::has_attribute, 1),
240
+ )?;
184
241
  c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
185
242
  c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
186
243
 
187
- c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
188
- c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
244
+ c_element.define_method("before", method!(SelmaHTMLElement::before, -1))?;
245
+ c_element.define_method("after", method!(SelmaHTMLElement::after, -1))?;
246
+ c_element.define_method("prepend", method!(SelmaHTMLElement::prepend, -1))?;
247
+ c_element.define_method("append", method!(SelmaHTMLElement::append, -1))?;
189
248
  c_element.define_method(
190
249
  "set_inner_content",
191
- method!(SelmaHTMLElement::set_inner_content, 2),
250
+ method!(SelmaHTMLElement::set_inner_content, -1),
192
251
  )?;
193
252
 
194
253
  Ok(())
@@ -6,7 +6,7 @@ struct HTMLEndTag {
6
6
  end_tag: NativeRefWrap<EndTag<'static>>,
7
7
  }
8
8
 
9
- #[magnus::wrap(class = "Selma::HTML::Element")]
9
+ #[magnus::wrap(class = "Selma::HTML::EndTag")]
10
10
  pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
11
11
 
12
12
  /// SAFETY: This is safe because we only access this data when the GVL is held.
@@ -27,7 +27,7 @@ impl SelmaHTMLEndTag {
27
27
  pub fn init(c_html: RClass) -> Result<(), Error> {
28
28
  let c_end_tag = c_html
29
29
  .define_class("EndTag", Default::default())
30
- .expect("cannot find class Selma::EndTag");
30
+ .expect("cannot find class Selma::HTML::EndTag");
31
31
 
32
32
  c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
33
33
 
@@ -0,0 +1,113 @@
1
+ use crate::native_ref_wrap::NativeRefWrap;
2
+ use lol_html::html_content::{TextChunk, TextType};
3
+ use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
4
+
5
+ struct HTMLTextChunk {
6
+ text_chunk: NativeRefWrap<TextChunk<'static>>,
7
+ }
8
+
9
+ #[magnus::wrap(class = "Selma::HTML::TextChunk")]
10
+ pub struct SelmaHTMLTextChunk(std::cell::RefCell<HTMLTextChunk>);
11
+
12
+ /// SAFETY: This is safe because we only access this data when the GVL is held.
13
+ unsafe impl Send for SelmaHTMLTextChunk {}
14
+
15
+ impl SelmaHTMLTextChunk {
16
+ pub fn new(text_chunk: &mut TextChunk) -> Self {
17
+ let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(text_chunk);
18
+
19
+ Self(std::cell::RefCell::new(HTMLTextChunk {
20
+ text_chunk: ref_wrap,
21
+ }))
22
+ }
23
+
24
+ fn to_s(&self) -> Result<String, Error> {
25
+ let binding = self.0.borrow();
26
+
27
+ if let Ok(tc) = binding.text_chunk.get() {
28
+ Ok(tc.as_str().to_string())
29
+ } else {
30
+ Err(Error::new(
31
+ exception::runtime_error(),
32
+ "`to_s` is not available",
33
+ ))
34
+ }
35
+ }
36
+
37
+ fn text_type(&self) -> Result<Symbol, Error> {
38
+ let binding = self.0.borrow();
39
+
40
+ if let Ok(tc) = binding.text_chunk.get() {
41
+ match tc.text_type() {
42
+ TextType::Data => Ok(Symbol::from("data")),
43
+ TextType::PlainText => Ok(Symbol::from("plain_text")),
44
+ TextType::RawText => Ok(Symbol::from("raw_text")),
45
+ TextType::ScriptData => Ok(Symbol::from("script")),
46
+ TextType::RCData => Ok(Symbol::from("rc_data")),
47
+ TextType::CDataSection => Ok(Symbol::from("cdata_section")),
48
+ }
49
+ } else {
50
+ Err(Error::new(
51
+ exception::runtime_error(),
52
+ "`text_type` is not available",
53
+ ))
54
+ }
55
+ }
56
+
57
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
58
+ let mut binding = self.0.borrow_mut();
59
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
60
+
61
+ let (text_str, content_type) = match crate::scan_text_args(args) {
62
+ Ok((text_str, content_type)) => (text_str, content_type),
63
+ Err(err) => return Err(err),
64
+ };
65
+
66
+ text_chunk.before(&text_str, content_type);
67
+
68
+ Ok(())
69
+ }
70
+
71
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
72
+ let mut binding = self.0.borrow_mut();
73
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
74
+
75
+ let (text_str, content_type) = match crate::scan_text_args(args) {
76
+ Ok((text_str, content_type)) => (text_str, content_type),
77
+ Err(err) => return Err(err),
78
+ };
79
+
80
+ text_chunk.after(&text_str, content_type);
81
+
82
+ Ok(())
83
+ }
84
+
85
+ fn replace(&self, args: &[Value]) -> Result<(), Error> {
86
+ let mut binding = self.0.borrow_mut();
87
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
88
+
89
+ let (text_str, content_type) = match crate::scan_text_args(args) {
90
+ Ok((text_str, content_type)) => (text_str, content_type),
91
+ Err(err) => return Err(err),
92
+ };
93
+
94
+ text_chunk.replace(&text_str, content_type);
95
+
96
+ Ok(())
97
+ }
98
+ }
99
+
100
+ pub fn init(c_html: RClass) -> Result<(), Error> {
101
+ let c_text_chunk = c_html
102
+ .define_class("TextChunk", Default::default())
103
+ .expect("cannot find class Selma::HTML::TextChunk");
104
+
105
+ c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
106
+ c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
107
+ c_text_chunk.define_method("text_type", method!(SelmaHTMLTextChunk::text_type, 0))?;
108
+ c_text_chunk.define_method("before", method!(SelmaHTMLTextChunk::before, -1))?;
109
+ c_text_chunk.define_method("after", method!(SelmaHTMLTextChunk::after, -1))?;
110
+ c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
111
+
112
+ Ok(())
113
+ }
@@ -9,9 +9,11 @@ pub fn init(m_selma: RModule) -> Result<(), Error> {
9
9
 
10
10
  element::init(c_html).expect("cannot define Selma::HTML::Element class");
11
11
  end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
12
+ text_chunk::init(c_html).expect("cannot define Selma::HTML::TextChunk class");
12
13
 
13
14
  Ok(())
14
15
  }
15
16
 
16
17
  pub mod element;
17
18
  pub mod end_tag;
19
+ pub mod text_chunk;
data/ext/selma/src/lib.rs CHANGED
@@ -1,6 +1,7 @@
1
1
  extern crate core;
2
2
 
3
- use magnus::{define_module, Error};
3
+ use lol_html::html_content::ContentType;
4
+ use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
4
5
 
5
6
  pub mod html;
6
7
  pub mod native_ref_wrap;
@@ -10,6 +11,32 @@ pub mod selector;
10
11
  pub mod tags;
11
12
  pub mod wrapped_struct;
12
13
 
14
+ #[allow(clippy::let_unit_value)]
15
+ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
16
+ let args = scan_args::scan_args(args)?;
17
+ let (text,): (String,) = args.required;
18
+ let _: () = args.optional;
19
+ let _: () = args.splat;
20
+ let _: () = args.trailing;
21
+ let _: () = args.block;
22
+
23
+ let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
24
+ let as_sym = kwargs.required.0;
25
+ let as_sym_str = as_sym.name().unwrap();
26
+ let content_type = if as_sym_str == "text" {
27
+ ContentType::Text
28
+ } else if as_sym_str == "html" {
29
+ ContentType::Html
30
+ } else {
31
+ return Err(Error::new(
32
+ exception::runtime_error(),
33
+ format!("unknown symbol `{as_sym_str:?}`"),
34
+ ));
35
+ };
36
+
37
+ Ok((text, content_type))
38
+ }
39
+
13
40
  #[magnus::init]
14
41
  fn init() -> Result<(), Error> {
15
42
  let m_selma = define_module("Selma").expect("cannot define ::Selma module");
@@ -1,6 +1,6 @@
1
1
  use lol_html::{
2
2
  doc_comments, doctype, element,
3
- html_content::{ContentType, Element, EndTag, TextChunk},
3
+ html_content::{Element, EndTag, TextChunk},
4
4
  text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
5
5
  };
6
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
@@ -8,7 +8,7 @@ use magnus::{exception, function, method, scan_args, Module, Object, RArray, RMo
8
8
  use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
9
 
10
10
  use crate::{
11
- html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
11
+ html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
12
12
  sanitizer::SelmaSanitizer,
13
13
  selector::SelmaSelector,
14
14
  tags::Tag,
@@ -43,7 +43,7 @@ unsafe impl Send for SelmaRewriter {}
43
43
  impl SelmaRewriter {
44
44
  const SELMA_ON_END_TAG: &str = "on_end_tag";
45
45
  const SELMA_HANDLE_ELEMENT: &str = "handle_element";
46
- const SELMA_HANDLE_TEXT: &str = "handle_text";
46
+ const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
47
47
 
48
48
  /// @yard
49
49
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
@@ -145,7 +145,7 @@ impl SelmaRewriter {
145
145
  let _: () = args.trailing;
146
146
  let _: () = args.block;
147
147
 
148
- let kw = scan_args::get_kwargs::<
148
+ let kwargs = scan_args::get_kwargs::<
149
149
  _,
150
150
  (),
151
151
  (
@@ -154,7 +154,7 @@ impl SelmaRewriter {
154
154
  ),
155
155
  (),
156
156
  >(args.keywords, &[], &["sanitizer", "handlers"])?;
157
- let (rb_sanitizer, rb_handlers) = kw.optional;
157
+ let (rb_sanitizer, rb_handlers) = kwargs.optional;
158
158
 
159
159
  Ok((rb_sanitizer, rb_handlers))
160
160
  }
@@ -162,26 +162,22 @@ impl SelmaRewriter {
162
162
  /// Perform HTML rewrite sequence.
163
163
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
- None => html,
165
+ None => Ok(html),
166
166
  Some(sanitizer) => {
167
- // due to malicious html crafting
168
- // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
169
- // we need to run sanitization several times to truly remove unwanted tags,
170
- // because lol-html happily accepts this garbage (by design?)
171
- let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
167
+ let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
168
+ Ok(sanitized_html) => sanitized_html,
169
+ Err(err) => return Err(err),
170
+ };
172
171
 
173
- String::from_utf8(sanitized_html).unwrap()
172
+ String::from_utf8(sanitized_html)
174
173
  }
175
174
  };
176
175
  let binding = self.0.borrow_mut();
177
176
  let handlers = &binding.handlers;
178
177
 
179
- match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
178
+ match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
180
179
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
181
- Err(err) => Err(magnus::Error::new(
182
- exception::runtime_error(),
183
- format!("{err:?}"),
184
- )),
180
+ Err(err) => Err(err),
185
181
  }
186
182
  }
187
183
 
@@ -212,9 +208,10 @@ impl SelmaRewriter {
212
208
  if el.removed() {
213
209
  return Ok(());
214
210
  }
215
- sanitizer.sanitize_attributes(el);
216
-
217
- Ok(())
211
+ match sanitizer.sanitize_attributes(el) {
212
+ Ok(_) => Ok(()),
213
+ Err(err) => Err(err.to_string().into()),
214
+ }
218
215
  })],
219
216
  // TODO: allow for MemorySettings to be defined
220
217
  ..Settings::default()
@@ -341,7 +338,7 @@ impl SelmaRewriter {
341
338
  let mut stack = closure_element_stack.as_ref().borrow_mut();
342
339
  stack.pop();
343
340
  Ok(())
344
- });
341
+ })?;
345
342
  Ok(())
346
343
  }));
347
344
  });
@@ -375,13 +372,14 @@ impl SelmaRewriter {
375
372
  ) -> Result<(), magnus::Error> {
376
373
  // if `on_end_tag` function is defined, call it
377
374
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
375
+ // TODO: error here is an "EndTagError"
378
376
  element.on_end_tag(move |end_tag| {
379
377
  let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
380
378
 
381
- rb_handler
382
- .funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,))
383
- .unwrap();
384
- Ok(())
379
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
380
+ Ok(_) => Ok(()),
381
+ Err(err) => Err(err.to_string().into()),
382
+ }
385
383
  });
386
384
  }
387
385
 
@@ -390,40 +388,30 @@ impl SelmaRewriter {
390
388
  rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
391
389
  match rb_result {
392
390
  Ok(_) => Ok(()),
393
- Err(err) => Err(magnus::Error::new(
394
- exception::runtime_error(),
395
- format!("{err:?}"),
396
- )),
391
+ Err(err) => Err(err),
397
392
  }
398
393
  }
399
394
 
400
- fn process_text_handlers(rb_handler: Value, text: &mut TextChunk) -> Result<(), magnus::Error> {
401
- // prevents missing `handle_text` function
402
- let content = text.as_str();
395
+ fn process_text_handlers(
396
+ rb_handler: Value,
397
+ text_chunk: &mut TextChunk,
398
+ ) -> Result<(), magnus::Error> {
399
+ // prevents missing `handle_text_chunk` function
400
+ let content = text_chunk.as_str();
403
401
 
404
402
  // seems that sometimes lol-html returns blank text / EOLs?
405
403
  if content.is_empty() {
406
404
  return Ok(());
407
405
  }
408
406
 
409
- let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
410
-
411
- if rb_result.is_err() {
412
- return Err(magnus::Error::new(
413
- exception::type_error(),
414
- format!(
415
- "Expected #{:?} to return a string: {:?}",
416
- Self::SELMA_HANDLE_TEXT,
417
- rb_result.err().unwrap()
418
- ),
419
- ));
407
+ let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
408
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
409
+ Ok(_) => Ok(()),
410
+ Err(err) => Err(magnus::Error::new(
411
+ exception::runtime_error(),
412
+ format!("{err:?}"),
413
+ )),
420
414
  }
421
-
422
- let new_content = rb_result.unwrap();
423
- // TODO: can this be an option?
424
- text.replace(&new_content, ContentType::Html);
425
-
426
- Ok(())
427
415
  }
428
416
  }
429
417
 
@@ -1,12 +1,10 @@
1
- use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
1
+ use std::{borrow::BorrowMut, collections::HashMap};
2
2
 
3
- use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
4
- use magnus::{
5
- class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
6
- Value,
3
+ use lol_html::{
4
+ errors::AttributeNameError,
5
+ html_content::{Comment, ContentType, Doctype, Element, EndTag},
7
6
  };
8
-
9
- use crate::tags::Tag;
7
+ use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
10
8
 
11
9
  #[derive(Clone, Debug)]
12
10
  struct ElementSanitizer {
@@ -16,9 +14,21 @@ struct ElementSanitizer {
16
14
  protocol_sanitizers: HashMap<String, Vec<String>>,
17
15
  }
18
16
 
17
+ impl Default for ElementSanitizer {
18
+ fn default() -> Self {
19
+ ElementSanitizer {
20
+ allowed_attrs: vec![],
21
+ allowed_classes: vec![],
22
+ required_attrs: vec![],
23
+
24
+ protocol_sanitizers: HashMap::new(),
25
+ }
26
+ }
27
+ }
28
+
19
29
  #[derive(Clone, Debug)]
20
30
  pub struct Sanitizer {
21
- flags: [u8; Tag::TAG_COUNT],
31
+ flags: [u8; crate::tags::Tag::TAG_COUNT],
22
32
  allowed_attrs: Vec<String>,
23
33
  allowed_classes: Vec<String>,
24
34
  element_sanitizers: HashMap<String, ElementSanitizer>,
@@ -39,7 +49,7 @@ impl SelmaSanitizer {
39
49
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
50
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
51
 
42
- pub fn new(arguments: &[Value]) -> Result<Self, Error> {
52
+ pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
43
53
  let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
44
54
  let (opt_config,): (Option<RHash>,) = args.optional;
45
55
 
@@ -50,19 +60,16 @@ impl SelmaSanitizer {
50
60
  };
51
61
 
52
62
  let mut element_sanitizers = HashMap::new();
53
- Tag::html_tags().iter().for_each(|html_tag| {
54
- let es = ElementSanitizer {
55
- allowed_attrs: vec![],
56
- allowed_classes: vec![],
57
- required_attrs: vec![],
58
-
59
- protocol_sanitizers: HashMap::new(),
60
- };
61
- element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
63
+ crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
64
+ let es = ElementSanitizer::default();
65
+ element_sanitizers.insert(
66
+ crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
67
+ es,
68
+ );
62
69
  });
63
70
 
64
71
  Ok(Self(std::cell::RefCell::new(Sanitizer {
65
- flags: [0; Tag::TAG_COUNT],
72
+ flags: [0; crate::tags::Tag::TAG_COUNT],
66
73
  allowed_attrs: vec![],
67
74
  allowed_classes: vec![],
68
75
  element_sanitizers,
@@ -74,7 +81,7 @@ impl SelmaSanitizer {
74
81
  })))
75
82
  }
76
83
 
77
- fn get_config(&self) -> Result<RHash, Error> {
84
+ fn get_config(&self) -> Result<RHash, magnus::Error> {
78
85
  let binding = self.0.borrow();
79
86
 
80
87
  Ok(binding.config)
@@ -82,7 +89,7 @@ impl SelmaSanitizer {
82
89
 
83
90
  /// Toggle a sanitizer option on or off.
84
91
  fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
85
- let tag = Tag::tag_from_tag_name(tag_name.as_str());
92
+ let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
86
93
  if set {
87
94
  self.0.borrow_mut().flags[tag.index] |= flag;
88
95
  } else {
@@ -93,13 +100,19 @@ impl SelmaSanitizer {
93
100
  /// Toggles all sanitization options on or off.
94
101
  fn set_all_flags(&self, flag: u8, set: bool) {
95
102
  if set {
96
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
97
- self.0.borrow_mut().flags[iter] |= flag;
98
- });
103
+ crate::tags::Tag::html_tags()
104
+ .iter()
105
+ .enumerate()
106
+ .for_each(|(iter, _)| {
107
+ self.0.borrow_mut().flags[iter] |= flag;
108
+ });
99
109
  } else {
100
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
101
- self.0.borrow_mut().flags[iter] &= flag;
102
- });
110
+ crate::tags::Tag::html_tags()
111
+ .iter()
112
+ .enumerate()
113
+ .for_each(|(iter, _)| {
114
+ self.0.borrow_mut().flags[iter] &= flag;
115
+ });
103
116
  }
104
117
  }
105
118
 
@@ -111,8 +124,8 @@ impl SelmaSanitizer {
111
124
 
112
125
  pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
113
126
  if self.0.borrow().escape_tagfilter {
114
- let tag = Tag::tag_from_element(e);
115
- if Tag::is_tag_escapeworthy(tag) {
127
+ let tag = crate::tags::Tag::tag_from_element(e);
128
+ if crate::tags::Tag::is_tag_escapeworthy(tag) {
116
129
  e.remove();
117
130
  return true;
118
131
  }
@@ -162,7 +175,8 @@ impl SelmaSanitizer {
162
175
  let allowed_attrs = &mut binding.allowed_attrs;
163
176
  Self::set_allowed(allowed_attrs, &attr_name, allow);
164
177
  } else {
165
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
178
+ let element_sanitizers = &mut binding.element_sanitizers;
179
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
166
180
 
167
181
  element_sanitizer.allowed_attrs.push(attr_name);
168
182
  }
@@ -176,7 +190,8 @@ impl SelmaSanitizer {
176
190
  let allowed_classes = &mut binding.allowed_classes;
177
191
  Self::set_allowed(allowed_classes, &class_name, allow);
178
192
  } else {
179
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
193
+ let element_sanitizers = &mut binding.element_sanitizers;
194
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
180
195
 
181
196
  let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
182
197
  Self::set_allowed(allowed_classes, &class_name, allow)
@@ -187,9 +202,10 @@ impl SelmaSanitizer {
187
202
  fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
188
203
  let mut binding = self.0.borrow_mut();
189
204
 
190
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
205
+ let element_sanitizers = &mut binding.element_sanitizers;
206
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
191
207
 
192
- let protocol_sanitizers = element_sanitizer.protocol_sanitizers.borrow_mut();
208
+ let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
193
209
 
194
210
  for opt_allowed_protocol in allow_list.each() {
195
211
  let allowed_protocol = opt_allowed_protocol.unwrap();
@@ -229,10 +245,16 @@ impl SelmaSanitizer {
229
245
  }
230
246
  }
231
247
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
233
- let binding = self.0.borrow_mut();
234
- let tag = Tag::tag_from_element(element);
235
- let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
248
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
249
+ let tag = crate::tags::Tag::tag_from_element(element);
250
+ let tag_name = &element.tag_name();
251
+ let element_sanitizer = {
252
+ let mut binding = self.0.borrow_mut();
253
+ let element_sanitizers = &mut binding.element_sanitizers;
254
+ Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
255
+ };
256
+
257
+ let binding = self.0.borrow();
236
258
 
237
259
  // FIXME: This is a hack to get around the fact that we can't borrow
238
260
  let attribute_map: HashMap<String, String> = element
@@ -255,26 +277,30 @@ impl SelmaSanitizer {
255
277
  let x = escapist::unescape_html(trimmed.as_bytes());
256
278
  let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
257
279
 
258
- if !Self::should_keep_attribute(
280
+ let should_keep_attrubute = match Self::should_keep_attribute(
259
281
  &binding,
260
282
  element,
261
- element_sanitizer,
283
+ &element_sanitizer,
262
284
  attr_name,
263
285
  &unescaped_attr_val,
264
286
  ) {
287
+ Ok(should_keep) => should_keep,
288
+ Err(e) => {
289
+ return Err(e);
290
+ }
291
+ };
292
+
293
+ if !should_keep_attrubute {
265
294
  element.remove_attribute(attr_name);
266
295
  } else {
267
296
  // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
268
297
  // since output is always UTF-8.
269
- if Tag::is_meta(tag) {
298
+ if crate::tags::Tag::is_meta(tag) {
270
299
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
300
  match element.set_attribute(attr_name, "utf-8") {
272
301
  Ok(_) => {}
273
- Err(_) => {
274
- return Err(magnus::Error::new(
275
- exception::runtime_error(),
276
- format!("Unable to change {attr_name:?}"),
277
- ));
302
+ Err(err) => {
303
+ return Err(err);
278
304
  }
279
305
  }
280
306
  }
@@ -282,13 +308,17 @@ impl SelmaSanitizer {
282
308
  let mut buf = String::new();
283
309
  // ...then, escape any special characters, for security
284
310
  if attr_name == "href" {
285
- // FIXME: gross--------------vvvv
286
- escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
311
+ escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
287
312
  } else {
288
- escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
313
+ escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
289
314
  };
290
315
 
291
- element.set_attribute(attr_name, &buf);
316
+ match element.set_attribute(attr_name, &buf) {
317
+ Ok(_) => {}
318
+ Err(err) => {
319
+ return Err(err);
320
+ }
321
+ }
292
322
  }
293
323
  }
294
324
  }
@@ -308,12 +338,12 @@ impl SelmaSanitizer {
308
338
  }
309
339
 
310
340
  fn should_keep_attribute(
311
- binding: &RefMut<Sanitizer>,
341
+ binding: &Sanitizer,
312
342
  element: &mut Element,
313
343
  element_sanitizer: &ElementSanitizer,
314
344
  attr_name: &String,
315
345
  attr_val: &String,
316
- ) -> bool {
346
+ ) -> Result<bool, AttributeNameError> {
317
347
  let mut allowed: bool = false;
318
348
  let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
319
349
  let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
@@ -327,7 +357,7 @@ impl SelmaSanitizer {
327
357
  }
328
358
 
329
359
  if !allowed {
330
- return false;
360
+ return Ok(false);
331
361
  }
332
362
 
333
363
  let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
@@ -335,32 +365,29 @@ impl SelmaSanitizer {
335
365
  None => {
336
366
  // has a protocol, but no sanitization list
337
367
  if !attr_val.is_empty() && Self::has_protocol(attr_val) {
338
- return false;
368
+ return Ok(false);
339
369
  }
340
370
  }
341
371
  Some(protocol_sanitizer_values) => {
342
372
  if !attr_val.is_empty()
343
373
  && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
344
374
  {
345
- return false;
375
+ return Ok(false);
346
376
  }
347
377
  }
348
378
  }
349
379
 
350
- if attr_name == "class"
351
- && !Self::sanitize_class_attribute(
380
+ if attr_name == "class" {
381
+ return Self::sanitize_class_attribute(
352
382
  binding,
353
383
  element,
354
384
  element_sanitizer,
355
385
  attr_name,
356
386
  attr_val,
357
- )
358
- .unwrap()
359
- {
360
- return false;
387
+ );
361
388
  }
362
389
 
363
- true
390
+ Ok(true)
364
391
  }
365
392
 
366
393
  fn has_protocol(attr_val: &str) -> bool {
@@ -398,12 +425,12 @@ impl SelmaSanitizer {
398
425
  }
399
426
 
400
427
  fn sanitize_class_attribute(
401
- binding: &RefMut<Sanitizer>,
428
+ binding: &Sanitizer,
402
429
  element: &mut Element,
403
430
  element_sanitizer: &ElementSanitizer,
404
431
  attr_name: &str,
405
432
  attr_val: &str,
406
- ) -> Result<bool, Error> {
433
+ ) -> Result<bool, lol_html::errors::AttributeNameError> {
407
434
  let allowed_global = &binding.allowed_classes;
408
435
 
409
436
  let mut valid_classes: Vec<String> = vec![];
@@ -431,28 +458,25 @@ impl SelmaSanitizer {
431
458
 
432
459
  match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
433
460
  Ok(_) => Ok(true),
434
- Err(err) => Err(Error::new(
435
- exception::runtime_error(),
436
- format!("AttributeNameError: {err:?}"),
437
- )),
461
+ Err(err) => Err(err),
438
462
  }
439
463
  }
440
464
 
441
465
  pub fn allow_element(&self, element: &mut Element) -> bool {
442
- let tag = Tag::tag_from_element(element);
466
+ let tag = crate::tags::Tag::tag_from_element(element);
443
467
  let flags: u8 = self.0.borrow().flags[tag.index];
444
468
 
445
469
  (flags & Self::SELMA_SANITIZER_ALLOW) == 0
446
470
  }
447
471
 
448
472
  pub fn try_remove_element(&self, element: &mut Element) -> bool {
449
- let tag = Tag::tag_from_element(element);
473
+ let tag = crate::tags::Tag::tag_from_element(element);
450
474
  let flags: u8 = self.0.borrow().flags[tag.index];
451
475
 
452
476
  let should_remove = !element.removed() && self.allow_element(element);
453
477
 
454
478
  if should_remove {
455
- if Tag::has_text_content(tag) {
479
+ if crate::tags::Tag::has_text_content(tag) {
456
480
  Self::remove_element(
457
481
  element,
458
482
  tag.self_closing,
@@ -465,7 +489,7 @@ impl SelmaSanitizer {
465
489
  Self::check_if_end_tag_needs_removal(element);
466
490
  } else {
467
491
  // anything in <iframe> must be removed, if it's kept
468
- if Tag::is_iframe(tag) {
492
+ if crate::tags::Tag::is_iframe(tag) {
469
493
  if self.0.borrow().flags[tag.index] != 0 {
470
494
  element.set_inner_content(" ", ContentType::Text);
471
495
  } else {
@@ -497,14 +521,14 @@ impl SelmaSanitizer {
497
521
  }
498
522
 
499
523
  pub fn force_remove_element(&self, element: &mut Element) {
500
- let tag = Tag::tag_from_element(element);
524
+ let tag = crate::tags::Tag::tag_from_element(element);
501
525
  let self_closing = tag.self_closing;
502
526
  Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
503
527
  Self::check_if_end_tag_needs_removal(element);
504
528
  }
505
529
 
506
530
  fn check_if_end_tag_needs_removal(element: &mut Element) {
507
- if element.removed() && !Tag::tag_from_element(element).self_closing {
531
+ if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
508
532
  element
509
533
  .on_end_tag(move |end| {
510
534
  Self::remove_end_tag(end);
@@ -519,21 +543,16 @@ impl SelmaSanitizer {
519
543
  }
520
544
 
521
545
  fn get_element_sanitizer<'a>(
522
- binding: &'a RefMut<Sanitizer>,
523
- element_name: &str,
524
- ) -> &'a ElementSanitizer {
525
- binding.element_sanitizers.get(element_name).unwrap()
526
- }
527
-
528
- fn get_mut_element_sanitizer<'a>(
529
- binding: &'a mut Sanitizer,
546
+ element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
530
547
  element_name: &str,
531
548
  ) -> &'a mut ElementSanitizer {
532
- binding.element_sanitizers.get_mut(element_name).unwrap()
549
+ element_sanitizers
550
+ .entry(element_name.to_string())
551
+ .or_insert_with(ElementSanitizer::default)
533
552
  }
534
553
  }
535
554
 
536
- pub fn init(m_selma: RModule) -> Result<(), Error> {
555
+ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
537
556
  let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
538
557
 
539
558
  c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
@@ -192,14 +192,17 @@ impl Tag {
192
192
  /// Is this tag something which needs to be removed?
193
193
  pub fn is_tag_escapeworthy(tag: Tag) -> bool {
194
194
  tag.index == HTMLTag::TITLE as usize
195
- || tag.index == HTMLTag::TEXTAREA as usize
196
- || tag.index == HTMLTag::STYLE as usize
197
- || tag.index == HTMLTag::XMP as usize
198
195
  || tag.index == HTMLTag::IFRAME as usize
196
+ || tag.index == HTMLTag::MATH as usize
199
197
  || tag.index == HTMLTag::NOEMBED as usize
200
198
  || tag.index == HTMLTag::NOFRAMES as usize
201
- || tag.index == HTMLTag::SCRIPT as usize
199
+ || tag.index == HTMLTag::NOSCRIPT as usize
202
200
  || tag.index == HTMLTag::PLAINTEXT as usize
201
+ || tag.index == HTMLTag::SCRIPT as usize
202
+ || tag.index == HTMLTag::STYLE as usize
203
+ || tag.index == HTMLTag::SVG as usize
204
+ || tag.index == HTMLTag::TEXTAREA as usize
205
+ || tag.index == HTMLTag::XMP as usize
203
206
  }
204
207
 
205
208
  pub const ESCAPEWORTHY_TAGS_CSS: &str =
Binary file
@@ -3,6 +3,10 @@
3
3
  module Selma
4
4
  class Sanitizer
5
5
  module Config
6
+ # although there are many more protocol types, eg., ftp, xmpp, etc.,
7
+ # these are the only ones that are allowed by default
8
+ VALID_PROTOCOLS = ["http", "https", "mailto", :relative]
9
+
6
10
  DEFAULT = freeze_config(
7
11
  # Whether or not to allow HTML comments. Allowing comments is strongly
8
12
  # discouraged, since IE allows script execution within conditional
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.5"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-24 00:00:00.000000000 Z
11
+ date: 2022-12-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -81,6 +81,7 @@ files:
81
81
  - ext/selma/src/html.rs
82
82
  - ext/selma/src/html/element.rs
83
83
  - ext/selma/src/html/end_tag.rs
84
+ - ext/selma/src/html/text_chunk.rs
84
85
  - ext/selma/src/lib.rs
85
86
  - ext/selma/src/native_ref_wrap.rs
86
87
  - ext/selma/src/rewriter.rs