selma 0.0.3-aarch64-linux → 0.0.5-aarch64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce81cc62ed12675d4362d65bc2983ef707664f4cd161a3af6bab496de99ee929
4
- data.tar.gz: fdf0b59a4bbc06e9967389c8c069d7da49c835a0ca92080c7fb9bfe73f5d324f
3
+ metadata.gz: 1cc01b8b795db9625ed5104ae6266d3147955def7b1729f91944bb254d5d2fbb
4
+ data.tar.gz: 3d2006ada6dc72d43d5eeb8070a368420863a4839ad3132416f4262004f3c9e8
5
5
  SHA512:
6
- metadata.gz: 4b089a159b06035dfe0a46df235a65f448110908b94092d75bf749a7c7bd590296a128a43ada24258eff411953f57686df8970646dd5b517ce8c2a3a67a66e5c
7
- data.tar.gz: 7312a40c011c715536b91ebe27c1b08707cceffe89f8f81e3dba9457a04c3db07d981351c916c0c2a4340e8389c64ea325f78efdcf0cebfe31e9dcf88ba7a960
6
+ metadata.gz: 345c650c379049924fd084a07047875ddf3982a442208e08fcf67128c590c9e9ac1f851429a759a63cd7ca901b16f82dc81527ae54f9792077db6326cc0c4633
7
+ data.tar.gz: 7506ae8bcd4a854c549b2b2e0664b9a5d6971ecf8eb54b4e4aad5cb0fa329815ab9ee6f9bcefd27725880518f8752338e24dbf4b08a2c579a6893c6f68f82e3c
data/README.md CHANGED
@@ -56,6 +56,10 @@ allow_comments: false,
56
56
  # "<!DOCTYPE html>" when sanitizing a document.
57
57
  allow_doctype: false,
58
58
 
59
+ # HTML elements to allow. By default, no elements are allowed (which means
60
+ # that all HTML will be stripped).
61
+ elements: ["a", "b", "img", ],
62
+
59
63
  # HTML attributes to allow in specific elements. The key is the name of the element,
60
64
  # and the value is an array of allowed attributes. By default, no attributes
61
65
  # are allowed.
@@ -64,14 +68,10 @@ attributes: {
64
68
  "img" => ["src"],
65
69
  },
66
70
 
67
- # HTML elements to allow. By default, no elements are allowed (which means
68
- # that all HTML will be stripped).
69
- elements: ["a", "b", "img", ],
70
-
71
71
  # URL handling protocols to allow in specific attributes. By default, no
72
72
  # protocols are allowed. Use :relative in place of a protocol if you want
73
73
  # to allow relative URLs sans protocol.
74
- protocols: {
74
+ protocols: {
75
75
  "a" => { "href" => ["http", "https", "mailto", :relative] },
76
76
  "img" => { "href" => ["http", "https"] },
77
77
  },
@@ -91,7 +91,7 @@ The real power in Selma comes in its use of handlers. A handler is simply an obj
91
91
 
92
92
  - `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
93
93
  - `handle_element`, a method that's call on each matched element
94
- - `handle_text`, a method that's called on each matched text node; this MUST return a string
94
+ - `handle_text_chunk`, a method that's called on each matched text node; this MUST return a string
95
95
 
96
96
  Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
97
97
 
@@ -118,7 +118,7 @@ rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
118
118
  The `Selma::Selector` object has three possible kwargs:
119
119
 
120
120
  - `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
121
- - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
121
+ - `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text_chunk`
122
122
  - `ignore_text_within`: this is an array of element names whose text contents will be ignored
123
123
 
124
124
  You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
@@ -132,7 +132,7 @@ class MatchText
132
132
  SELECTOR
133
133
  end
134
134
 
135
- def handle_text(text)
135
+ def handle_text_chunk(text)
136
136
  string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
137
137
  end
138
138
  end
@@ -150,8 +150,9 @@ The `element` argument in `handle_element` has the following methods:
150
150
  - `remove_attribute`: remove an attribute
151
151
  - `attributes`: list all the attributes
152
152
  - `ancestors`: list all the ancestors
153
- - `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
- - `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
153
+ - `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
154
+ - `before(content, as: content_type)`: Inserts `content` before the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
+ - `after(content, as: content_type)`: Inserts `content` after the element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
155
156
  - `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
156
157
 
157
158
  ## Benchmarks
data/ext/selma/Cargo.toml CHANGED
@@ -5,7 +5,7 @@ edition = "2021"
5
5
 
6
6
  [dependencies]
7
7
  enum-iterator = "1.2"
8
- escapist = "0.0.1"
8
+ escapist = "0.0.2"
9
9
  magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
10
10
  lol_html = "0.3"
11
11
 
@@ -1,8 +1,6 @@
1
- use std::borrow::Cow;
2
-
3
1
  use crate::native_ref_wrap::NativeRefWrap;
4
- use lol_html::html_content::{ContentType, Element};
5
- use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
2
+ use lol_html::html_content::Element;
3
+ use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
6
4
 
7
5
  struct HTMLElement {
8
6
  element: NativeRefWrap<Element<'static, 'static>>,
@@ -38,6 +36,48 @@ impl SelmaHTMLElement {
38
36
  }
39
37
  }
40
38
 
39
+ fn set_tag_name(&self, name: String) -> Result<(), Error> {
40
+ let mut binding = self.0.borrow_mut();
41
+
42
+ if let Ok(element) = binding.element.get_mut() {
43
+ match element.set_tag_name(&name) {
44
+ Ok(_) => Ok(()),
45
+ Err(err) => Err(Error::new(exception::runtime_error(), format!("{err:?}"))),
46
+ }
47
+ } else {
48
+ Err(Error::new(
49
+ exception::runtime_error(),
50
+ "`set_tag_name` is not available",
51
+ ))
52
+ }
53
+ }
54
+
55
+ fn is_self_closing(&self) -> Result<bool, Error> {
56
+ let binding = self.0.borrow();
57
+
58
+ if let Ok(e) = binding.element.get() {
59
+ Ok(e.is_self_closing())
60
+ } else {
61
+ Err(Error::new(
62
+ exception::runtime_error(),
63
+ "`is_self_closing` is not available",
64
+ ))
65
+ }
66
+ }
67
+
68
+ fn has_attribute(&self, attr: String) -> Result<bool, Error> {
69
+ let binding = self.0.borrow();
70
+
71
+ if let Ok(e) = binding.element.get() {
72
+ Ok(e.has_attribute(&attr))
73
+ } else {
74
+ Err(Error::new(
75
+ exception::runtime_error(),
76
+ "`is_self_closing` is not available",
77
+ ))
78
+ }
79
+ }
80
+
41
81
  fn get_attribute(&self, attr: String) -> Option<String> {
42
82
  let binding = self.0.borrow();
43
83
  let element = binding.element.get();
@@ -106,89 +146,108 @@ impl SelmaHTMLElement {
106
146
  Ok(array)
107
147
  }
108
148
 
109
- fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
149
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
110
150
  let mut binding = self.0.borrow_mut();
111
151
  let element = binding.element.get_mut().unwrap();
112
152
 
113
- let text_str = text_to_append.as_str();
153
+ let (text_str, content_type) = match crate::scan_text_args(args) {
154
+ Ok((text_str, content_type)) => (text_str, content_type),
155
+ Err(err) => return Err(err),
156
+ };
114
157
 
115
- let content_type = Self::find_content_type(content_type);
116
-
117
- element.append(text_str, content_type);
158
+ element.before(&text_str, content_type);
118
159
 
119
160
  Ok(())
120
161
  }
121
162
 
122
- fn wrap(
123
- &self,
124
- start_text: String,
125
- end_text: String,
126
- content_type: Symbol,
127
- ) -> Result<(), Error> {
163
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
128
164
  let mut binding = self.0.borrow_mut();
129
165
  let element = binding.element.get_mut().unwrap();
130
166
 
131
- let before_content_type = Self::find_content_type(content_type);
132
- let after_content_type = Self::find_content_type(content_type);
133
- element.before(&start_text, before_content_type);
134
- element.after(&end_text, after_content_type);
167
+ let (text_str, content_type) = match crate::scan_text_args(args) {
168
+ Ok((text_str, content_type)) => (text_str, content_type),
169
+ Err(err) => return Err(err),
170
+ };
171
+
172
+ element.after(&text_str, content_type);
135
173
 
136
174
  Ok(())
137
175
  }
138
176
 
139
- fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
177
+ fn prepend(&self, args: &[Value]) -> Result<(), Error> {
140
178
  let mut binding = self.0.borrow_mut();
141
179
  let element = binding.element.get_mut().unwrap();
142
180
 
143
- let text_str = text_to_set.as_str();
181
+ let (text_str, content_type) = match crate::scan_text_args(args) {
182
+ Ok((text_str, content_type)) => (text_str, content_type),
183
+ Err(err) => return Err(err),
184
+ };
185
+
186
+ element.prepend(&text_str, content_type);
144
187
 
145
- let content_type = Self::find_content_type(content_type);
188
+ Ok(())
189
+ }
146
190
 
147
- element.set_inner_content(text_str, content_type);
191
+ fn append(&self, args: &[Value]) -> Result<(), Error> {
192
+ let mut binding = self.0.borrow_mut();
193
+ let element = binding.element.get_mut().unwrap();
194
+
195
+ let (text_str, content_type) = match crate::scan_text_args(args) {
196
+ Ok((text_str, content_type)) => (text_str, content_type),
197
+ Err(err) => return Err(err),
198
+ };
199
+
200
+ element.append(&text_str, content_type);
148
201
 
149
202
  Ok(())
150
203
  }
151
204
 
152
- fn find_content_type(content_type: Symbol) -> ContentType {
153
- match content_type.name() {
154
- Ok(name) => match name {
155
- Cow::Borrowed("as_text") => ContentType::Text,
156
- Cow::Borrowed("as_html") => ContentType::Html,
157
- _ => Err(Error::new(
158
- exception::runtime_error(),
159
- format!("unknown symbol `{name:?}`"),
160
- ))
161
- .unwrap(),
162
- },
163
- Err(err) => Err(Error::new(
164
- exception::runtime_error(),
165
- format!("Could not unwrap symbol: {err:?}"),
166
- ))
167
- .unwrap(),
168
- }
205
+ fn set_inner_content(&self, args: &[Value]) -> Result<(), Error> {
206
+ let mut binding = self.0.borrow_mut();
207
+ let element = binding.element.get_mut().unwrap();
208
+
209
+ let (inner_content, content_type) = match crate::scan_text_args(args) {
210
+ Ok((inner_content, content_type)) => (inner_content, content_type),
211
+ Err(err) => return Err(err),
212
+ };
213
+
214
+ element.set_inner_content(&inner_content, content_type);
215
+
216
+ Ok(())
169
217
  }
170
218
  }
171
219
 
172
220
  pub fn init(c_html: RClass) -> Result<(), Error> {
173
221
  let c_element = c_html
174
222
  .define_class("Element", Default::default())
175
- .expect("cannot find class Selma::Element");
223
+ .expect("cannot find class Selma::HTML::Element");
176
224
 
177
225
  c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
226
+ c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
227
+ c_element.define_method(
228
+ "self_closing?",
229
+ method!(SelmaHTMLElement::is_self_closing, 0),
230
+ )?;
178
231
  c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
179
232
  c_element.define_method("[]=", method!(SelmaHTMLElement::set_attribute, 2))?;
180
233
  c_element.define_method(
181
234
  "remove_attribute",
182
235
  method!(SelmaHTMLElement::remove_attribute, 1),
183
236
  )?;
237
+ c_element.define_method(
238
+ "has_attribute?",
239
+ method!(SelmaHTMLElement::has_attribute, 1),
240
+ )?;
184
241
  c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
185
242
  c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
186
243
 
187
- c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
188
- c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
244
+ c_element.define_method("before", method!(SelmaHTMLElement::before, -1))?;
245
+ c_element.define_method("after", method!(SelmaHTMLElement::after, -1))?;
246
+ c_element.define_method("prepend", method!(SelmaHTMLElement::prepend, -1))?;
247
+ c_element.define_method("append", method!(SelmaHTMLElement::append, -1))?;
189
248
  c_element.define_method(
190
249
  "set_inner_content",
191
- method!(SelmaHTMLElement::set_inner_content, 2),
250
+ method!(SelmaHTMLElement::set_inner_content, -1),
192
251
  )?;
193
252
 
194
253
  Ok(())
@@ -6,7 +6,7 @@ struct HTMLEndTag {
6
6
  end_tag: NativeRefWrap<EndTag<'static>>,
7
7
  }
8
8
 
9
- #[magnus::wrap(class = "Selma::HTML::Element")]
9
+ #[magnus::wrap(class = "Selma::HTML::EndTag")]
10
10
  pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
11
11
 
12
12
  /// SAFETY: This is safe because we only access this data when the GVL is held.
@@ -27,7 +27,7 @@ impl SelmaHTMLEndTag {
27
27
  pub fn init(c_html: RClass) -> Result<(), Error> {
28
28
  let c_end_tag = c_html
29
29
  .define_class("EndTag", Default::default())
30
- .expect("cannot find class Selma::EndTag");
30
+ .expect("cannot find class Selma::HTML::EndTag");
31
31
 
32
32
  c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
33
33
 
@@ -0,0 +1,113 @@
1
+ use crate::native_ref_wrap::NativeRefWrap;
2
+ use lol_html::html_content::{TextChunk, TextType};
3
+ use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
4
+
5
+ struct HTMLTextChunk {
6
+ text_chunk: NativeRefWrap<TextChunk<'static>>,
7
+ }
8
+
9
+ #[magnus::wrap(class = "Selma::HTML::TextChunk")]
10
+ pub struct SelmaHTMLTextChunk(std::cell::RefCell<HTMLTextChunk>);
11
+
12
+ /// SAFETY: This is safe because we only access this data when the GVL is held.
13
+ unsafe impl Send for SelmaHTMLTextChunk {}
14
+
15
+ impl SelmaHTMLTextChunk {
16
+ pub fn new(text_chunk: &mut TextChunk) -> Self {
17
+ let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(text_chunk);
18
+
19
+ Self(std::cell::RefCell::new(HTMLTextChunk {
20
+ text_chunk: ref_wrap,
21
+ }))
22
+ }
23
+
24
+ fn to_s(&self) -> Result<String, Error> {
25
+ let binding = self.0.borrow();
26
+
27
+ if let Ok(tc) = binding.text_chunk.get() {
28
+ Ok(tc.as_str().to_string())
29
+ } else {
30
+ Err(Error::new(
31
+ exception::runtime_error(),
32
+ "`to_s` is not available",
33
+ ))
34
+ }
35
+ }
36
+
37
+ fn text_type(&self) -> Result<Symbol, Error> {
38
+ let binding = self.0.borrow();
39
+
40
+ if let Ok(tc) = binding.text_chunk.get() {
41
+ match tc.text_type() {
42
+ TextType::Data => Ok(Symbol::from("data")),
43
+ TextType::PlainText => Ok(Symbol::from("plain_text")),
44
+ TextType::RawText => Ok(Symbol::from("raw_text")),
45
+ TextType::ScriptData => Ok(Symbol::from("script")),
46
+ TextType::RCData => Ok(Symbol::from("rc_data")),
47
+ TextType::CDataSection => Ok(Symbol::from("cdata_section")),
48
+ }
49
+ } else {
50
+ Err(Error::new(
51
+ exception::runtime_error(),
52
+ "`text_type` is not available",
53
+ ))
54
+ }
55
+ }
56
+
57
+ fn before(&self, args: &[Value]) -> Result<(), Error> {
58
+ let mut binding = self.0.borrow_mut();
59
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
60
+
61
+ let (text_str, content_type) = match crate::scan_text_args(args) {
62
+ Ok((text_str, content_type)) => (text_str, content_type),
63
+ Err(err) => return Err(err),
64
+ };
65
+
66
+ text_chunk.before(&text_str, content_type);
67
+
68
+ Ok(())
69
+ }
70
+
71
+ fn after(&self, args: &[Value]) -> Result<(), Error> {
72
+ let mut binding = self.0.borrow_mut();
73
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
74
+
75
+ let (text_str, content_type) = match crate::scan_text_args(args) {
76
+ Ok((text_str, content_type)) => (text_str, content_type),
77
+ Err(err) => return Err(err),
78
+ };
79
+
80
+ text_chunk.after(&text_str, content_type);
81
+
82
+ Ok(())
83
+ }
84
+
85
+ fn replace(&self, args: &[Value]) -> Result<(), Error> {
86
+ let mut binding = self.0.borrow_mut();
87
+ let text_chunk = binding.text_chunk.get_mut().unwrap();
88
+
89
+ let (text_str, content_type) = match crate::scan_text_args(args) {
90
+ Ok((text_str, content_type)) => (text_str, content_type),
91
+ Err(err) => return Err(err),
92
+ };
93
+
94
+ text_chunk.replace(&text_str, content_type);
95
+
96
+ Ok(())
97
+ }
98
+ }
99
+
100
+ pub fn init(c_html: RClass) -> Result<(), Error> {
101
+ let c_text_chunk = c_html
102
+ .define_class("TextChunk", Default::default())
103
+ .expect("cannot find class Selma::HTML::TextChunk");
104
+
105
+ c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
106
+ c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
107
+ c_text_chunk.define_method("text_type", method!(SelmaHTMLTextChunk::text_type, 0))?;
108
+ c_text_chunk.define_method("before", method!(SelmaHTMLTextChunk::before, -1))?;
109
+ c_text_chunk.define_method("after", method!(SelmaHTMLTextChunk::after, -1))?;
110
+ c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
111
+
112
+ Ok(())
113
+ }
@@ -9,9 +9,11 @@ pub fn init(m_selma: RModule) -> Result<(), Error> {
9
9
 
10
10
  element::init(c_html).expect("cannot define Selma::HTML::Element class");
11
11
  end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
12
+ text_chunk::init(c_html).expect("cannot define Selma::HTML::TextChunk class");
12
13
 
13
14
  Ok(())
14
15
  }
15
16
 
16
17
  pub mod element;
17
18
  pub mod end_tag;
19
+ pub mod text_chunk;
data/ext/selma/src/lib.rs CHANGED
@@ -1,6 +1,7 @@
1
1
  extern crate core;
2
2
 
3
- use magnus::{define_module, Error};
3
+ use lol_html::html_content::ContentType;
4
+ use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
4
5
 
5
6
  pub mod html;
6
7
  pub mod native_ref_wrap;
@@ -10,6 +11,32 @@ pub mod selector;
10
11
  pub mod tags;
11
12
  pub mod wrapped_struct;
12
13
 
14
+ #[allow(clippy::let_unit_value)]
15
+ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
16
+ let args = scan_args::scan_args(args)?;
17
+ let (text,): (String,) = args.required;
18
+ let _: () = args.optional;
19
+ let _: () = args.splat;
20
+ let _: () = args.trailing;
21
+ let _: () = args.block;
22
+
23
+ let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
24
+ let as_sym = kwargs.required.0;
25
+ let as_sym_str = as_sym.name().unwrap();
26
+ let content_type = if as_sym_str == "text" {
27
+ ContentType::Text
28
+ } else if as_sym_str == "html" {
29
+ ContentType::Html
30
+ } else {
31
+ return Err(Error::new(
32
+ exception::runtime_error(),
33
+ format!("unknown symbol `{as_sym_str:?}`"),
34
+ ));
35
+ };
36
+
37
+ Ok((text, content_type))
38
+ }
39
+
13
40
  #[magnus::init]
14
41
  fn init() -> Result<(), Error> {
15
42
  let m_selma = define_module("Selma").expect("cannot define ::Selma module");
@@ -1,6 +1,6 @@
1
1
  use lol_html::{
2
2
  doc_comments, doctype, element,
3
- html_content::{ContentType, Element, EndTag, TextChunk},
3
+ html_content::{Element, EndTag, TextChunk},
4
4
  text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
5
5
  };
6
6
  use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
@@ -8,7 +8,7 @@ use magnus::{exception, function, method, scan_args, Module, Object, RArray, RMo
8
8
  use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
9
9
 
10
10
  use crate::{
11
- html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
11
+ html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
12
12
  sanitizer::SelmaSanitizer,
13
13
  selector::SelmaSelector,
14
14
  tags::Tag,
@@ -43,7 +43,7 @@ unsafe impl Send for SelmaRewriter {}
43
43
  impl SelmaRewriter {
44
44
  const SELMA_ON_END_TAG: &str = "on_end_tag";
45
45
  const SELMA_HANDLE_ELEMENT: &str = "handle_element";
46
- const SELMA_HANDLE_TEXT: &str = "handle_text";
46
+ const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
47
47
 
48
48
  /// @yard
49
49
  /// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
@@ -145,7 +145,7 @@ impl SelmaRewriter {
145
145
  let _: () = args.trailing;
146
146
  let _: () = args.block;
147
147
 
148
- let kw = scan_args::get_kwargs::<
148
+ let kwargs = scan_args::get_kwargs::<
149
149
  _,
150
150
  (),
151
151
  (
@@ -154,7 +154,7 @@ impl SelmaRewriter {
154
154
  ),
155
155
  (),
156
156
  >(args.keywords, &[], &["sanitizer", "handlers"])?;
157
- let (rb_sanitizer, rb_handlers) = kw.optional;
157
+ let (rb_sanitizer, rb_handlers) = kwargs.optional;
158
158
 
159
159
  Ok((rb_sanitizer, rb_handlers))
160
160
  }
@@ -162,26 +162,22 @@ impl SelmaRewriter {
162
162
  /// Perform HTML rewrite sequence.
163
163
  fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
164
164
  let sanitized_html = match &self.0.borrow().sanitizer {
165
- None => html,
165
+ None => Ok(html),
166
166
  Some(sanitizer) => {
167
- // due to malicious html crafting
168
- // (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
169
- // we need to run sanitization several times to truly remove unwanted tags,
170
- // because lol-html happily accepts this garbage (by design?)
171
- let sanitized_html = Self::perform_sanitization(sanitizer, &html).unwrap();
167
+ let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
168
+ Ok(sanitized_html) => sanitized_html,
169
+ Err(err) => return Err(err),
170
+ };
172
171
 
173
- String::from_utf8(sanitized_html).unwrap()
172
+ String::from_utf8(sanitized_html)
174
173
  }
175
174
  };
176
175
  let binding = self.0.borrow_mut();
177
176
  let handlers = &binding.handlers;
178
177
 
179
- match Self::perform_handler_rewrite(self, handlers, sanitized_html) {
178
+ match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
180
179
  Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
181
- Err(err) => Err(magnus::Error::new(
182
- exception::runtime_error(),
183
- format!("{err:?}"),
184
- )),
180
+ Err(err) => Err(err),
185
181
  }
186
182
  }
187
183
 
@@ -212,9 +208,10 @@ impl SelmaRewriter {
212
208
  if el.removed() {
213
209
  return Ok(());
214
210
  }
215
- sanitizer.sanitize_attributes(el);
216
-
217
- Ok(())
211
+ match sanitizer.sanitize_attributes(el) {
212
+ Ok(_) => Ok(()),
213
+ Err(err) => Err(err.to_string().into()),
214
+ }
218
215
  })],
219
216
  // TODO: allow for MemorySettings to be defined
220
217
  ..Settings::default()
@@ -341,7 +338,7 @@ impl SelmaRewriter {
341
338
  let mut stack = closure_element_stack.as_ref().borrow_mut();
342
339
  stack.pop();
343
340
  Ok(())
344
- });
341
+ })?;
345
342
  Ok(())
346
343
  }));
347
344
  });
@@ -375,13 +372,14 @@ impl SelmaRewriter {
375
372
  ) -> Result<(), magnus::Error> {
376
373
  // if `on_end_tag` function is defined, call it
377
374
  if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
375
+ // TODO: error here is an "EndTagError"
378
376
  element.on_end_tag(move |end_tag| {
379
377
  let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
380
378
 
381
- rb_handler
382
- .funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,))
383
- .unwrap();
384
- Ok(())
379
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
380
+ Ok(_) => Ok(()),
381
+ Err(err) => Err(err.to_string().into()),
382
+ }
385
383
  });
386
384
  }
387
385
 
@@ -390,40 +388,30 @@ impl SelmaRewriter {
390
388
  rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
391
389
  match rb_result {
392
390
  Ok(_) => Ok(()),
393
- Err(err) => Err(magnus::Error::new(
394
- exception::runtime_error(),
395
- format!("{err:?}"),
396
- )),
391
+ Err(err) => Err(err),
397
392
  }
398
393
  }
399
394
 
400
- fn process_text_handlers(rb_handler: Value, text: &mut TextChunk) -> Result<(), magnus::Error> {
401
- // prevents missing `handle_text` function
402
- let content = text.as_str();
395
+ fn process_text_handlers(
396
+ rb_handler: Value,
397
+ text_chunk: &mut TextChunk,
398
+ ) -> Result<(), magnus::Error> {
399
+ // prevents missing `handle_text_chunk` function
400
+ let content = text_chunk.as_str();
403
401
 
404
402
  // seems that sometimes lol-html returns blank text / EOLs?
405
403
  if content.is_empty() {
406
404
  return Ok(());
407
405
  }
408
406
 
409
- let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
410
-
411
- if rb_result.is_err() {
412
- return Err(magnus::Error::new(
413
- exception::type_error(),
414
- format!(
415
- "Expected #{:?} to return a string: {:?}",
416
- Self::SELMA_HANDLE_TEXT,
417
- rb_result.err().unwrap()
418
- ),
419
- ));
407
+ let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
408
+ match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
409
+ Ok(_) => Ok(()),
410
+ Err(err) => Err(magnus::Error::new(
411
+ exception::runtime_error(),
412
+ format!("{err:?}"),
413
+ )),
420
414
  }
421
-
422
- let new_content = rb_result.unwrap();
423
- // TODO: can this be an option?
424
- text.replace(&new_content, ContentType::Html);
425
-
426
- Ok(())
427
415
  }
428
416
  }
429
417
 
@@ -1,12 +1,10 @@
1
- use std::{borrow::BorrowMut, cell::RefMut, collections::HashMap};
1
+ use std::{borrow::BorrowMut, collections::HashMap};
2
2
 
3
- use lol_html::html_content::{Comment, ContentType, Doctype, Element, EndTag};
4
- use magnus::{
5
- class, exception, function, method, scan_args, Error, Module, Object, RArray, RHash, RModule,
6
- Value,
3
+ use lol_html::{
4
+ errors::AttributeNameError,
5
+ html_content::{Comment, ContentType, Doctype, Element, EndTag},
7
6
  };
8
-
9
- use crate::tags::Tag;
7
+ use magnus::{class, function, method, scan_args, Module, Object, RArray, RHash, RModule, Value};
10
8
 
11
9
  #[derive(Clone, Debug)]
12
10
  struct ElementSanitizer {
@@ -16,9 +14,21 @@ struct ElementSanitizer {
16
14
  protocol_sanitizers: HashMap<String, Vec<String>>,
17
15
  }
18
16
 
17
+ impl Default for ElementSanitizer {
18
+ fn default() -> Self {
19
+ ElementSanitizer {
20
+ allowed_attrs: vec![],
21
+ allowed_classes: vec![],
22
+ required_attrs: vec![],
23
+
24
+ protocol_sanitizers: HashMap::new(),
25
+ }
26
+ }
27
+ }
28
+
19
29
  #[derive(Clone, Debug)]
20
30
  pub struct Sanitizer {
21
- flags: [u8; Tag::TAG_COUNT],
31
+ flags: [u8; crate::tags::Tag::TAG_COUNT],
22
32
  allowed_attrs: Vec<String>,
23
33
  allowed_classes: Vec<String>,
24
34
  element_sanitizers: HashMap<String, ElementSanitizer>,
@@ -39,7 +49,7 @@ impl SelmaSanitizer {
39
49
  const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
40
50
  const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
41
51
 
42
- pub fn new(arguments: &[Value]) -> Result<Self, Error> {
52
+ pub fn new(arguments: &[Value]) -> Result<Self, magnus::Error> {
43
53
  let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
44
54
  let (opt_config,): (Option<RHash>,) = args.optional;
45
55
 
@@ -50,19 +60,16 @@ impl SelmaSanitizer {
50
60
  };
51
61
 
52
62
  let mut element_sanitizers = HashMap::new();
53
- Tag::html_tags().iter().for_each(|html_tag| {
54
- let es = ElementSanitizer {
55
- allowed_attrs: vec![],
56
- allowed_classes: vec![],
57
- required_attrs: vec![],
58
-
59
- protocol_sanitizers: HashMap::new(),
60
- };
61
- element_sanitizers.insert(Tag::element_name_from_enum(html_tag).to_string(), es);
63
+ crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
64
+ let es = ElementSanitizer::default();
65
+ element_sanitizers.insert(
66
+ crate::tags::Tag::element_name_from_enum(html_tag).to_string(),
67
+ es,
68
+ );
62
69
  });
63
70
 
64
71
  Ok(Self(std::cell::RefCell::new(Sanitizer {
65
- flags: [0; Tag::TAG_COUNT],
72
+ flags: [0; crate::tags::Tag::TAG_COUNT],
66
73
  allowed_attrs: vec![],
67
74
  allowed_classes: vec![],
68
75
  element_sanitizers,
@@ -74,7 +81,7 @@ impl SelmaSanitizer {
74
81
  })))
75
82
  }
76
83
 
77
- fn get_config(&self) -> Result<RHash, Error> {
84
+ fn get_config(&self) -> Result<RHash, magnus::Error> {
78
85
  let binding = self.0.borrow();
79
86
 
80
87
  Ok(binding.config)
@@ -82,7 +89,7 @@ impl SelmaSanitizer {
82
89
 
83
90
  /// Toggle a sanitizer option on or off.
84
91
  fn set_flag(&self, tag_name: String, flag: u8, set: bool) {
85
- let tag = Tag::tag_from_tag_name(tag_name.as_str());
92
+ let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
86
93
  if set {
87
94
  self.0.borrow_mut().flags[tag.index] |= flag;
88
95
  } else {
@@ -93,13 +100,19 @@ impl SelmaSanitizer {
93
100
  /// Toggles all sanitization options on or off.
94
101
  fn set_all_flags(&self, flag: u8, set: bool) {
95
102
  if set {
96
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
97
- self.0.borrow_mut().flags[iter] |= flag;
98
- });
103
+ crate::tags::Tag::html_tags()
104
+ .iter()
105
+ .enumerate()
106
+ .for_each(|(iter, _)| {
107
+ self.0.borrow_mut().flags[iter] |= flag;
108
+ });
99
109
  } else {
100
- Tag::html_tags().iter().enumerate().for_each(|(iter, _)| {
101
- self.0.borrow_mut().flags[iter] &= flag;
102
- });
110
+ crate::tags::Tag::html_tags()
111
+ .iter()
112
+ .enumerate()
113
+ .for_each(|(iter, _)| {
114
+ self.0.borrow_mut().flags[iter] &= flag;
115
+ });
103
116
  }
104
117
  }
105
118
 
@@ -111,8 +124,8 @@ impl SelmaSanitizer {
111
124
 
112
125
  pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
113
126
  if self.0.borrow().escape_tagfilter {
114
- let tag = Tag::tag_from_element(e);
115
- if Tag::is_tag_escapeworthy(tag) {
127
+ let tag = crate::tags::Tag::tag_from_element(e);
128
+ if crate::tags::Tag::is_tag_escapeworthy(tag) {
116
129
  e.remove();
117
130
  return true;
118
131
  }
@@ -162,7 +175,8 @@ impl SelmaSanitizer {
162
175
  let allowed_attrs = &mut binding.allowed_attrs;
163
176
  Self::set_allowed(allowed_attrs, &attr_name, allow);
164
177
  } else {
165
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
178
+ let element_sanitizers = &mut binding.element_sanitizers;
179
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
166
180
 
167
181
  element_sanitizer.allowed_attrs.push(attr_name);
168
182
  }
@@ -176,7 +190,8 @@ impl SelmaSanitizer {
176
190
  let allowed_classes = &mut binding.allowed_classes;
177
191
  Self::set_allowed(allowed_classes, &class_name, allow);
178
192
  } else {
179
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
193
+ let element_sanitizers = &mut binding.element_sanitizers;
194
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
180
195
 
181
196
  let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
182
197
  Self::set_allowed(allowed_classes, &class_name, allow)
@@ -187,9 +202,10 @@ impl SelmaSanitizer {
187
202
  fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
188
203
  let mut binding = self.0.borrow_mut();
189
204
 
190
- let element_sanitizer = Self::get_mut_element_sanitizer(&mut binding, &element_name);
205
+ let element_sanitizers = &mut binding.element_sanitizers;
206
+ let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
191
207
 
192
- let protocol_sanitizers = element_sanitizer.protocol_sanitizers.borrow_mut();
208
+ let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
193
209
 
194
210
  for opt_allowed_protocol in allow_list.each() {
195
211
  let allowed_protocol = opt_allowed_protocol.unwrap();
@@ -229,10 +245,16 @@ impl SelmaSanitizer {
229
245
  }
230
246
  }
231
247
 
232
- pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
233
- let binding = self.0.borrow_mut();
234
- let tag = Tag::tag_from_element(element);
235
- let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
248
+ pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), AttributeNameError> {
249
+ let tag = crate::tags::Tag::tag_from_element(element);
250
+ let tag_name = &element.tag_name();
251
+ let element_sanitizer = {
252
+ let mut binding = self.0.borrow_mut();
253
+ let element_sanitizers = &mut binding.element_sanitizers;
254
+ Self::get_element_sanitizer(element_sanitizers, tag_name).clone()
255
+ };
256
+
257
+ let binding = self.0.borrow();
236
258
 
237
259
  // FIXME: This is a hack to get around the fact that we can't borrow
238
260
  let attribute_map: HashMap<String, String> = element
@@ -255,26 +277,30 @@ impl SelmaSanitizer {
255
277
  let x = escapist::unescape_html(trimmed.as_bytes());
256
278
  let unescaped_attr_val = String::from_utf8_lossy(&x).to_string();
257
279
 
258
- if !Self::should_keep_attribute(
280
+ let should_keep_attrubute = match Self::should_keep_attribute(
259
281
  &binding,
260
282
  element,
261
- element_sanitizer,
283
+ &element_sanitizer,
262
284
  attr_name,
263
285
  &unescaped_attr_val,
264
286
  ) {
287
+ Ok(should_keep) => should_keep,
288
+ Err(e) => {
289
+ return Err(e);
290
+ }
291
+ };
292
+
293
+ if !should_keep_attrubute {
265
294
  element.remove_attribute(attr_name);
266
295
  } else {
267
296
  // Prevent the use of `<meta>` elements that set a charset other than UTF-8,
268
297
  // since output is always UTF-8.
269
- if Tag::is_meta(tag) {
298
+ if crate::tags::Tag::is_meta(tag) {
270
299
  if attr_name == "charset" && unescaped_attr_val != "utf-8" {
271
300
  match element.set_attribute(attr_name, "utf-8") {
272
301
  Ok(_) => {}
273
- Err(_) => {
274
- return Err(magnus::Error::new(
275
- exception::runtime_error(),
276
- format!("Unable to change {attr_name:?}"),
277
- ));
302
+ Err(err) => {
303
+ return Err(err);
278
304
  }
279
305
  }
280
306
  }
@@ -282,13 +308,17 @@ impl SelmaSanitizer {
282
308
  let mut buf = String::new();
283
309
  // ...then, escape any special characters, for security
284
310
  if attr_name == "href" {
285
- // FIXME: gross--------------vvvv
286
- escapist::escape_href(&mut buf, unescaped_attr_val.to_string().as_str());
311
+ escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
287
312
  } else {
288
- escapist::escape_html(&mut buf, unescaped_attr_val.to_string().as_str());
313
+ escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
289
314
  };
290
315
 
291
- element.set_attribute(attr_name, &buf);
316
+ match element.set_attribute(attr_name, &buf) {
317
+ Ok(_) => {}
318
+ Err(err) => {
319
+ return Err(err);
320
+ }
321
+ }
292
322
  }
293
323
  }
294
324
  }
@@ -308,12 +338,12 @@ impl SelmaSanitizer {
308
338
  }
309
339
 
310
340
  fn should_keep_attribute(
311
- binding: &RefMut<Sanitizer>,
341
+ binding: &Sanitizer,
312
342
  element: &mut Element,
313
343
  element_sanitizer: &ElementSanitizer,
314
344
  attr_name: &String,
315
345
  attr_val: &String,
316
- ) -> bool {
346
+ ) -> Result<bool, AttributeNameError> {
317
347
  let mut allowed: bool = false;
318
348
  let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
319
349
  let sanitizer_allowed_attrs = binding.allowed_attrs.contains(attr_name);
@@ -327,7 +357,7 @@ impl SelmaSanitizer {
327
357
  }
328
358
 
329
359
  if !allowed {
330
- return false;
360
+ return Ok(false);
331
361
  }
332
362
 
333
363
  let protocol_sanitizer_values = element_sanitizer.protocol_sanitizers.get(attr_name);
@@ -335,32 +365,29 @@ impl SelmaSanitizer {
335
365
  None => {
336
366
  // has a protocol, but no sanitization list
337
367
  if !attr_val.is_empty() && Self::has_protocol(attr_val) {
338
- return false;
368
+ return Ok(false);
339
369
  }
340
370
  }
341
371
  Some(protocol_sanitizer_values) => {
342
372
  if !attr_val.is_empty()
343
373
  && !Self::has_allowed_protocol(protocol_sanitizer_values, attr_val)
344
374
  {
345
- return false;
375
+ return Ok(false);
346
376
  }
347
377
  }
348
378
  }
349
379
 
350
- if attr_name == "class"
351
- && !Self::sanitize_class_attribute(
380
+ if attr_name == "class" {
381
+ return Self::sanitize_class_attribute(
352
382
  binding,
353
383
  element,
354
384
  element_sanitizer,
355
385
  attr_name,
356
386
  attr_val,
357
- )
358
- .unwrap()
359
- {
360
- return false;
387
+ );
361
388
  }
362
389
 
363
- true
390
+ Ok(true)
364
391
  }
365
392
 
366
393
  fn has_protocol(attr_val: &str) -> bool {
@@ -398,12 +425,12 @@ impl SelmaSanitizer {
398
425
  }
399
426
 
400
427
  fn sanitize_class_attribute(
401
- binding: &RefMut<Sanitizer>,
428
+ binding: &Sanitizer,
402
429
  element: &mut Element,
403
430
  element_sanitizer: &ElementSanitizer,
404
431
  attr_name: &str,
405
432
  attr_val: &str,
406
- ) -> Result<bool, Error> {
433
+ ) -> Result<bool, lol_html::errors::AttributeNameError> {
407
434
  let allowed_global = &binding.allowed_classes;
408
435
 
409
436
  let mut valid_classes: Vec<String> = vec![];
@@ -431,28 +458,25 @@ impl SelmaSanitizer {
431
458
 
432
459
  match element.set_attribute(attr_name, valid_classes.join(" ").as_str()) {
433
460
  Ok(_) => Ok(true),
434
- Err(err) => Err(Error::new(
435
- exception::runtime_error(),
436
- format!("AttributeNameError: {err:?}"),
437
- )),
461
+ Err(err) => Err(err),
438
462
  }
439
463
  }
440
464
 
441
465
  pub fn allow_element(&self, element: &mut Element) -> bool {
442
- let tag = Tag::tag_from_element(element);
466
+ let tag = crate::tags::Tag::tag_from_element(element);
443
467
  let flags: u8 = self.0.borrow().flags[tag.index];
444
468
 
445
469
  (flags & Self::SELMA_SANITIZER_ALLOW) == 0
446
470
  }
447
471
 
448
472
  pub fn try_remove_element(&self, element: &mut Element) -> bool {
449
- let tag = Tag::tag_from_element(element);
473
+ let tag = crate::tags::Tag::tag_from_element(element);
450
474
  let flags: u8 = self.0.borrow().flags[tag.index];
451
475
 
452
476
  let should_remove = !element.removed() && self.allow_element(element);
453
477
 
454
478
  if should_remove {
455
- if Tag::has_text_content(tag) {
479
+ if crate::tags::Tag::has_text_content(tag) {
456
480
  Self::remove_element(
457
481
  element,
458
482
  tag.self_closing,
@@ -465,7 +489,7 @@ impl SelmaSanitizer {
465
489
  Self::check_if_end_tag_needs_removal(element);
466
490
  } else {
467
491
  // anything in <iframe> must be removed, if it's kept
468
- if Tag::is_iframe(tag) {
492
+ if crate::tags::Tag::is_iframe(tag) {
469
493
  if self.0.borrow().flags[tag.index] != 0 {
470
494
  element.set_inner_content(" ", ContentType::Text);
471
495
  } else {
@@ -497,14 +521,14 @@ impl SelmaSanitizer {
497
521
  }
498
522
 
499
523
  pub fn force_remove_element(&self, element: &mut Element) {
500
- let tag = Tag::tag_from_element(element);
524
+ let tag = crate::tags::Tag::tag_from_element(element);
501
525
  let self_closing = tag.self_closing;
502
526
  Self::remove_element(element, self_closing, Self::SELMA_SANITIZER_REMOVE_CONTENTS);
503
527
  Self::check_if_end_tag_needs_removal(element);
504
528
  }
505
529
 
506
530
  fn check_if_end_tag_needs_removal(element: &mut Element) {
507
- if element.removed() && !Tag::tag_from_element(element).self_closing {
531
+ if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
508
532
  element
509
533
  .on_end_tag(move |end| {
510
534
  Self::remove_end_tag(end);
@@ -519,21 +543,16 @@ impl SelmaSanitizer {
519
543
  }
520
544
 
521
545
  fn get_element_sanitizer<'a>(
522
- binding: &'a RefMut<Sanitizer>,
523
- element_name: &str,
524
- ) -> &'a ElementSanitizer {
525
- binding.element_sanitizers.get(element_name).unwrap()
526
- }
527
-
528
- fn get_mut_element_sanitizer<'a>(
529
- binding: &'a mut Sanitizer,
546
+ element_sanitizers: &'a mut HashMap<String, ElementSanitizer>,
530
547
  element_name: &str,
531
548
  ) -> &'a mut ElementSanitizer {
532
- binding.element_sanitizers.get_mut(element_name).unwrap()
549
+ element_sanitizers
550
+ .entry(element_name.to_string())
551
+ .or_insert_with(ElementSanitizer::default)
533
552
  }
534
553
  }
535
554
 
536
- pub fn init(m_selma: RModule) -> Result<(), Error> {
555
+ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
537
556
  let c_sanitizer = m_selma.define_class("Sanitizer", Default::default())?;
538
557
 
539
558
  c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
@@ -192,14 +192,17 @@ impl Tag {
192
192
  /// Is this tag something which needs to be removed?
193
193
  pub fn is_tag_escapeworthy(tag: Tag) -> bool {
194
194
  tag.index == HTMLTag::TITLE as usize
195
- || tag.index == HTMLTag::TEXTAREA as usize
196
- || tag.index == HTMLTag::STYLE as usize
197
- || tag.index == HTMLTag::XMP as usize
198
195
  || tag.index == HTMLTag::IFRAME as usize
196
+ || tag.index == HTMLTag::MATH as usize
199
197
  || tag.index == HTMLTag::NOEMBED as usize
200
198
  || tag.index == HTMLTag::NOFRAMES as usize
201
- || tag.index == HTMLTag::SCRIPT as usize
199
+ || tag.index == HTMLTag::NOSCRIPT as usize
202
200
  || tag.index == HTMLTag::PLAINTEXT as usize
201
+ || tag.index == HTMLTag::SCRIPT as usize
202
+ || tag.index == HTMLTag::STYLE as usize
203
+ || tag.index == HTMLTag::SVG as usize
204
+ || tag.index == HTMLTag::TEXTAREA as usize
205
+ || tag.index == HTMLTag::XMP as usize
203
206
  }
204
207
 
205
208
  pub const ESCAPEWORTHY_TAGS_CSS: &str =
Binary file
@@ -3,6 +3,10 @@
3
3
  module Selma
4
4
  class Sanitizer
5
5
  module Config
6
+ # although there are many more protocol types, eg., ftp, xmpp, etc.,
7
+ # these are the only ones that are allowed by default
8
+ VALID_PROTOCOLS = ["http", "https", "mailto", :relative]
9
+
6
10
  DEFAULT = freeze_config(
7
11
  # Whether or not to allow HTML comments. Allowing comments is strongly
8
12
  # discouraged, since IE allows script execution within conditional
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.5"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-24 00:00:00.000000000 Z
11
+ date: 2022-12-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -81,6 +81,7 @@ files:
81
81
  - ext/selma/src/html.rs
82
82
  - ext/selma/src/html/element.rs
83
83
  - ext/selma/src/html/end_tag.rs
84
+ - ext/selma/src/html/text_chunk.rs
84
85
  - ext/selma/src/lib.rs
85
86
  - ext/selma/src/native_ref_wrap.rs
86
87
  - ext/selma/src/rewriter.rs