selma 0.0.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +753 -0
- data/Cargo.toml +6 -0
- data/README.md +4 -1
- data/ext/selma/Cargo.toml +3 -3
- data/ext/selma/extconf.rb +0 -2
- data/ext/selma/src/html/element.rs +38 -2
- data/ext/selma/src/html/end_tag.rs +2 -2
- data/ext/selma/src/html/text_chunk.rs +8 -8
- data/ext/selma/src/html.rs +3 -1
- data/ext/selma/src/lib.rs +0 -1
- data/ext/selma/src/rewriter.rs +66 -66
- data/ext/selma/src/sanitizer.rs +21 -25
- data/ext/selma/src/selector.rs +4 -4
- data/lib/selma/version.rb +1 -1
- metadata +5 -20
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
data/Cargo.toml
ADDED
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Selma can perform two different actions, either independently or together:
|
|
29
29
|
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
30
|
- Select HTML using CSS rules, and manipulate elements and text nodes along the way.
|
31
31
|
|
32
|
-
It does this through two
|
32
|
+
It does this through two kwargs: `sanitizer` and `handlers`. The basic API for Selma looks like this:
|
33
33
|
|
34
34
|
```ruby
|
35
35
|
sanitizer_config = {
|
@@ -164,6 +164,9 @@ The `element` argument in `handle_element` has the following methods:
|
|
164
164
|
- `prepend(content, as: content_type)`: prepends `content` to the element's inner content, i.e. inserts content right after the element's start tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
165
165
|
- `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
166
166
|
- `set_inner_content`: Replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
167
|
+
- `remove`: Removes the element and its inner content.
|
168
|
+
- `remove_and_keep_content`: Removes the element, but keeps its content. I.e. remove start and end tags of the element.
|
169
|
+
- `removed?`: A bool which identifies if the element has been removed or replaced with some content.
|
167
170
|
|
168
171
|
#### `text_chunk` methods
|
169
172
|
|
data/ext/selma/Cargo.toml
CHANGED
@@ -4,10 +4,10 @@ version = "1.0.0"
|
|
4
4
|
edition = "2021"
|
5
5
|
|
6
6
|
[dependencies]
|
7
|
-
enum-iterator = "1.
|
7
|
+
enum-iterator = "1.4"
|
8
8
|
escapist = "0.0.2"
|
9
|
-
magnus =
|
10
|
-
lol_html = "
|
9
|
+
magnus = "0.6"
|
10
|
+
lol_html = "1.2"
|
11
11
|
|
12
12
|
[lib]
|
13
13
|
name = "selma"
|
data/ext/selma/extconf.rb
CHANGED
@@ -215,12 +215,41 @@ impl SelmaHTMLElement {
|
|
215
215
|
|
216
216
|
Ok(())
|
217
217
|
}
|
218
|
+
|
219
|
+
fn remove(&self) {
|
220
|
+
let mut binding = self.0.borrow_mut();
|
221
|
+
|
222
|
+
if let Ok(e) = binding.element.get_mut() {
|
223
|
+
e.remove()
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
fn remove_and_keep_content(&self) {
|
228
|
+
let mut binding = self.0.borrow_mut();
|
229
|
+
|
230
|
+
if let Ok(e) = binding.element.get_mut() {
|
231
|
+
e.remove_and_keep_content()
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
fn is_removed(&self) -> Result<bool, Error> {
|
236
|
+
let binding = self.0.borrow();
|
237
|
+
|
238
|
+
if let Ok(e) = binding.element.get() {
|
239
|
+
Ok(e.removed())
|
240
|
+
} else {
|
241
|
+
Err(Error::new(
|
242
|
+
exception::runtime_error(),
|
243
|
+
"`is_removed` is not available",
|
244
|
+
))
|
245
|
+
}
|
246
|
+
}
|
218
247
|
}
|
219
248
|
|
220
249
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
221
250
|
let c_element = c_html
|
222
|
-
.define_class("Element",
|
223
|
-
.expect("cannot
|
251
|
+
.define_class("Element", magnus::class::object())
|
252
|
+
.expect("cannot define class Selma::HTML::Element");
|
224
253
|
|
225
254
|
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
226
255
|
c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
|
@@ -250,5 +279,12 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
250
279
|
method!(SelmaHTMLElement::set_inner_content, -1),
|
251
280
|
)?;
|
252
281
|
|
282
|
+
c_element.define_method("remove", method!(SelmaHTMLElement::remove, 0))?;
|
283
|
+
c_element.define_method(
|
284
|
+
"remove_and_keep_content",
|
285
|
+
method!(SelmaHTMLElement::remove_and_keep_content, 0),
|
286
|
+
)?;
|
287
|
+
c_element.define_method("removed?", method!(SelmaHTMLElement::is_removed, 0))?;
|
288
|
+
|
253
289
|
Ok(())
|
254
290
|
}
|
@@ -26,8 +26,8 @@ impl SelmaHTMLEndTag {
|
|
26
26
|
|
27
27
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
28
28
|
let c_end_tag = c_html
|
29
|
-
.define_class("EndTag",
|
30
|
-
.expect("cannot
|
29
|
+
.define_class("EndTag", magnus::class::object())
|
30
|
+
.expect("cannot define class Selma::HTML::EndTag");
|
31
31
|
|
32
32
|
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
33
33
|
|
@@ -39,12 +39,12 @@ impl SelmaHTMLTextChunk {
|
|
39
39
|
|
40
40
|
if let Ok(tc) = binding.text_chunk.get() {
|
41
41
|
match tc.text_type() {
|
42
|
-
TextType::Data => Ok(Symbol::
|
43
|
-
TextType::PlainText => Ok(Symbol::
|
44
|
-
TextType::RawText => Ok(Symbol::
|
45
|
-
TextType::ScriptData => Ok(Symbol::
|
46
|
-
TextType::RCData => Ok(Symbol::
|
47
|
-
TextType::CDataSection => Ok(Symbol::
|
42
|
+
TextType::Data => Ok(Symbol::new("data")),
|
43
|
+
TextType::PlainText => Ok(Symbol::new("plain_text")),
|
44
|
+
TextType::RawText => Ok(Symbol::new("raw_text")),
|
45
|
+
TextType::ScriptData => Ok(Symbol::new("script")),
|
46
|
+
TextType::RCData => Ok(Symbol::new("rc_data")),
|
47
|
+
TextType::CDataSection => Ok(Symbol::new("cdata_section")),
|
48
48
|
}
|
49
49
|
} else {
|
50
50
|
Err(Error::new(
|
@@ -99,8 +99,8 @@ impl SelmaHTMLTextChunk {
|
|
99
99
|
|
100
100
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
101
101
|
let c_text_chunk = c_html
|
102
|
-
.define_class("TextChunk",
|
103
|
-
.expect("cannot
|
102
|
+
.define_class("TextChunk", magnus::class::object())
|
103
|
+
.expect("cannot define class Selma::HTML::TextChunk");
|
104
104
|
|
105
105
|
c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
106
106
|
c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
data/ext/selma/src/html.rs
CHANGED
@@ -5,7 +5,9 @@ use magnus::{Error, Module, RModule};
|
|
5
5
|
pub(crate) struct SelmaHTML {}
|
6
6
|
|
7
7
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
8
|
-
let c_html = m_selma
|
8
|
+
let c_html = m_selma
|
9
|
+
.define_class("HTML", magnus::class::object())
|
10
|
+
.expect("cannot define class Selma::HTML");
|
9
11
|
|
10
12
|
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
11
13
|
end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
|
data/ext/selma/src/lib.rs
CHANGED
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
use lol_html::{
|
2
2
|
doc_comments, doctype, element,
|
3
|
-
html_content::{Element,
|
3
|
+
html_content::{Element, TextChunk},
|
4
4
|
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
|
5
5
|
};
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{
|
7
|
+
exception, function, method, scan_args,
|
8
|
+
typed_data::Obj,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RModule, Ruby, Value,
|
11
|
+
};
|
7
12
|
|
8
13
|
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
14
|
|
@@ -12,33 +17,29 @@ use crate::{
|
|
12
17
|
sanitizer::SelmaSanitizer,
|
13
18
|
selector::SelmaSelector,
|
14
19
|
tags::Tag,
|
15
|
-
wrapped_struct::WrappedStruct,
|
16
20
|
};
|
17
21
|
|
18
|
-
#[derive(Clone
|
22
|
+
#[derive(Clone)]
|
19
23
|
pub struct Handler {
|
20
|
-
rb_handler: Value
|
21
|
-
rb_selector:
|
22
|
-
|
23
|
-
|
24
|
-
total_elapsed_element_handlers: f64,
|
24
|
+
rb_handler: Opaque<Value>,
|
25
|
+
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
|
+
// total_element_handler_calls: usize,
|
27
|
+
// total_elapsed_element_handlers: f64,
|
25
28
|
|
26
|
-
total_text_handler_calls: usize,
|
27
|
-
total_elapsed_text_handlers: f64,
|
29
|
+
// total_text_handler_calls: usize,
|
30
|
+
// total_elapsed_text_handlers: f64,
|
28
31
|
}
|
29
32
|
|
30
33
|
pub struct Rewriter {
|
31
34
|
sanitizer: Option<SelmaSanitizer>,
|
32
35
|
handlers: Vec<Handler>,
|
33
|
-
|
34
|
-
total_elapsed: f64,
|
36
|
+
// total_elapsed: f64,
|
35
37
|
}
|
36
38
|
|
37
39
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
38
40
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
39
41
|
|
40
|
-
|
41
|
-
unsafe impl Send for SelmaRewriter {}
|
42
|
+
type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
|
42
43
|
|
43
44
|
impl SelmaRewriter {
|
44
45
|
const SELMA_ON_END_TAG: &str = "on_end_tag";
|
@@ -56,15 +57,15 @@ impl SelmaRewriter {
|
|
56
57
|
let sanitizer = match rb_sanitizer {
|
57
58
|
None => {
|
58
59
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
59
|
-
let wrapped_sanitizer =
|
60
|
+
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
60
61
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
61
|
-
Some(wrapped_sanitizer.get().
|
62
|
+
Some(wrapped_sanitizer.get().to_owned())
|
62
63
|
}
|
63
64
|
Some(sanitizer_value) => match sanitizer_value {
|
64
65
|
None => None,
|
65
66
|
Some(sanitizer) => {
|
66
67
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
67
|
-
Some(sanitizer.get().
|
68
|
+
Some(sanitizer.get().to_owned())
|
68
69
|
}
|
69
70
|
},
|
70
71
|
};
|
@@ -89,24 +90,23 @@ impl SelmaRewriter {
|
|
89
90
|
));
|
90
91
|
}
|
91
92
|
|
92
|
-
let rb_selector:
|
93
|
-
|
94
|
-
Err(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
};
|
93
|
+
let rb_selector: Obj<SelmaSelector> = match rb_handler.funcall("selector", ()) {
|
94
|
+
Err(err) => {
|
95
|
+
return Err(magnus::Error::new(
|
96
|
+
exception::type_error(),
|
97
|
+
format!("Error instantiating selector: {err:?}"),
|
98
|
+
));
|
99
|
+
}
|
100
|
+
Ok(rb_selector) => rb_selector,
|
101
|
+
};
|
102
102
|
let handler = Handler {
|
103
|
-
rb_handler,
|
104
|
-
rb_selector,
|
105
|
-
total_element_handler_calls: 0,
|
106
|
-
total_elapsed_element_handlers: 0.0,
|
103
|
+
rb_handler: Opaque::from(rb_handler),
|
104
|
+
rb_selector: Opaque::from(rb_selector),
|
105
|
+
// total_element_handler_calls: 0,
|
106
|
+
// total_elapsed_element_handlers: 0.0,
|
107
107
|
|
108
|
-
total_text_handler_calls: 0,
|
109
|
-
total_elapsed_text_handlers: 0.0,
|
108
|
+
// total_text_handler_calls: 0,
|
109
|
+
// total_elapsed_text_handlers: 0.0,
|
110
110
|
};
|
111
111
|
handlers.push(handler);
|
112
112
|
}
|
@@ -124,20 +124,12 @@ impl SelmaRewriter {
|
|
124
124
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
125
125
|
sanitizer,
|
126
126
|
handlers,
|
127
|
-
total_elapsed: 0.0,
|
127
|
+
// total_elapsed: 0.0,
|
128
128
|
})))
|
129
129
|
}
|
130
130
|
|
131
131
|
#[allow(clippy::let_unit_value)]
|
132
|
-
fn scan_parse_args(
|
133
|
-
args: &[Value],
|
134
|
-
) -> Result<
|
135
|
-
(
|
136
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
137
|
-
Option<RArray>,
|
138
|
-
),
|
139
|
-
magnus::Error,
|
140
|
-
> {
|
132
|
+
fn scan_parse_args(args: &[Value]) -> Result<RewriterValues, magnus::Error> {
|
141
133
|
let args = scan_args::scan_args(args)?;
|
142
134
|
let _: () = args.required;
|
143
135
|
let _: () = args.optional;
|
@@ -148,10 +140,7 @@ impl SelmaRewriter {
|
|
148
140
|
let kwargs = scan_args::get_kwargs::<
|
149
141
|
_,
|
150
142
|
(),
|
151
|
-
(
|
152
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
153
|
-
Option<RArray>,
|
154
|
-
),
|
143
|
+
(Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
|
155
144
|
(),
|
156
145
|
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
157
146
|
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
@@ -273,7 +262,9 @@ impl SelmaRewriter {
|
|
273
262
|
handlers.iter().for_each(|handler| {
|
274
263
|
let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
|
275
264
|
|
276
|
-
let
|
265
|
+
let ruby = Ruby::get().unwrap();
|
266
|
+
|
267
|
+
let selector = ruby.get_inner(handler.rb_selector);
|
277
268
|
|
278
269
|
// TODO: test final raise by simulating errors
|
279
270
|
if selector.match_element().is_some() {
|
@@ -283,7 +274,7 @@ impl SelmaRewriter {
|
|
283
274
|
selector.match_element().unwrap(),
|
284
275
|
move |el| {
|
285
276
|
match Self::process_element_handlers(
|
286
|
-
handler.rb_handler,
|
277
|
+
ruby.get_inner(handler.rb_handler),
|
287
278
|
el,
|
288
279
|
&closure_element_stack.borrow(),
|
289
280
|
) {
|
@@ -314,7 +305,9 @@ impl SelmaRewriter {
|
|
314
305
|
}
|
315
306
|
}
|
316
307
|
|
317
|
-
|
308
|
+
let ruby = Ruby::get().unwrap();
|
309
|
+
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
310
|
+
{
|
318
311
|
Ok(_) => Ok(()),
|
319
312
|
Err(err) => Err(err.to_string().into()),
|
320
313
|
}
|
@@ -334,11 +327,15 @@ impl SelmaRewriter {
|
|
334
327
|
element_stack.as_ref().borrow_mut().push(tag_name);
|
335
328
|
|
336
329
|
let closure_element_stack = element_stack.clone();
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
330
|
+
|
331
|
+
el.end_tag_handlers()
|
332
|
+
.unwrap()
|
333
|
+
.push(Box::new(move |_end_tag| {
|
334
|
+
let mut stack = closure_element_stack.as_ref().borrow_mut();
|
335
|
+
stack.pop();
|
336
|
+
Ok(())
|
337
|
+
}));
|
338
|
+
|
342
339
|
Ok(())
|
343
340
|
}));
|
344
341
|
});
|
@@ -373,14 +370,17 @@ impl SelmaRewriter {
|
|
373
370
|
// if `on_end_tag` function is defined, call it
|
374
371
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
375
372
|
// TODO: error here is an "EndTagError"
|
376
|
-
element
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
373
|
+
element
|
374
|
+
.end_tag_handlers()
|
375
|
+
.unwrap()
|
376
|
+
.push(Box::new(move |end_tag| {
|
377
|
+
let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
|
378
|
+
|
379
|
+
match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
|
380
|
+
Ok(_) => Ok(()),
|
381
|
+
Err(err) => Err(err.to_string().into()),
|
382
|
+
}
|
383
|
+
}));
|
384
384
|
}
|
385
385
|
|
386
386
|
let rb_element = SelmaHTMLElement::new(element, ancestors);
|
@@ -417,8 +417,8 @@ impl SelmaRewriter {
|
|
417
417
|
|
418
418
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
419
419
|
let c_rewriter = m_selma
|
420
|
-
.define_class("Rewriter",
|
421
|
-
.expect("cannot
|
420
|
+
.define_class("Rewriter", magnus::class::object())
|
421
|
+
.expect("cannot define class Selma::Rewriter");
|
422
422
|
|
423
423
|
c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
|
424
424
|
c_rewriter
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -4,9 +4,13 @@ use lol_html::{
|
|
4
4
|
errors::AttributeNameError,
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
6
|
};
|
7
|
-
use magnus::{
|
7
|
+
use magnus::{
|
8
|
+
class, function, method, scan_args,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RHash, RModule, Ruby, Value,
|
11
|
+
};
|
8
12
|
|
9
|
-
#[derive(Clone, Debug)]
|
13
|
+
#[derive(Clone, Debug, Default)]
|
10
14
|
struct ElementSanitizer {
|
11
15
|
allowed_attrs: Vec<String>,
|
12
16
|
required_attrs: Vec<String>,
|
@@ -14,19 +18,7 @@ struct ElementSanitizer {
|
|
14
18
|
protocol_sanitizers: HashMap<String, Vec<String>>,
|
15
19
|
}
|
16
20
|
|
17
|
-
|
18
|
-
fn default() -> Self {
|
19
|
-
ElementSanitizer {
|
20
|
-
allowed_attrs: vec![],
|
21
|
-
allowed_classes: vec![],
|
22
|
-
required_attrs: vec![],
|
23
|
-
|
24
|
-
protocol_sanitizers: HashMap::new(),
|
25
|
-
}
|
26
|
-
}
|
27
|
-
}
|
28
|
-
|
29
|
-
#[derive(Clone, Debug)]
|
21
|
+
#[derive(Clone)]
|
30
22
|
pub struct Sanitizer {
|
31
23
|
flags: [u8; crate::tags::Tag::TAG_COUNT],
|
32
24
|
allowed_attrs: Vec<String>,
|
@@ -36,10 +28,10 @@ pub struct Sanitizer {
|
|
36
28
|
pub escape_tagfilter: bool,
|
37
29
|
pub allow_comments: bool,
|
38
30
|
pub allow_doctype: bool,
|
39
|
-
config: RHash
|
31
|
+
config: Opaque<RHash>,
|
40
32
|
}
|
41
33
|
|
42
|
-
#[derive(Clone
|
34
|
+
#[derive(Clone)]
|
43
35
|
#[magnus::wrap(class = "Selma::Sanitizer")]
|
44
36
|
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
45
37
|
|
@@ -77,14 +69,15 @@ impl SelmaSanitizer {
|
|
77
69
|
escape_tagfilter: true,
|
78
70
|
allow_comments: false,
|
79
71
|
allow_doctype: true,
|
80
|
-
config,
|
72
|
+
config: config.into(),
|
81
73
|
})))
|
82
74
|
}
|
83
75
|
|
84
76
|
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
85
77
|
let binding = self.0.borrow();
|
78
|
+
let ruby = Ruby::get().unwrap();
|
86
79
|
|
87
|
-
Ok(binding.config)
|
80
|
+
Ok(ruby.get_inner(binding.config))
|
88
81
|
}
|
89
82
|
|
90
83
|
/// Toggle a sanitizer option on or off.
|
@@ -308,9 +301,9 @@ impl SelmaSanitizer {
|
|
308
301
|
let mut buf = String::new();
|
309
302
|
// ...then, escape any special characters, for security
|
310
303
|
if attr_name == "href" {
|
311
|
-
escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
|
304
|
+
escapist::escape_href(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
312
305
|
} else {
|
313
|
-
escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
|
306
|
+
escapist::escape_html(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
314
307
|
};
|
315
308
|
|
316
309
|
match element.set_attribute(attr_name, &buf) {
|
@@ -530,11 +523,12 @@ impl SelmaSanitizer {
|
|
530
523
|
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
531
524
|
if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
|
532
525
|
element
|
533
|
-
.
|
526
|
+
.end_tag_handlers()
|
527
|
+
.unwrap()
|
528
|
+
.push(Box::new(move |end| {
|
534
529
|
Self::remove_end_tag(end);
|
535
530
|
Ok(())
|
536
|
-
})
|
537
|
-
.unwrap();
|
531
|
+
}));
|
538
532
|
}
|
539
533
|
}
|
540
534
|
|
@@ -553,7 +547,9 @@ impl SelmaSanitizer {
|
|
553
547
|
}
|
554
548
|
|
555
549
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
556
|
-
let c_sanitizer = m_selma
|
550
|
+
let c_sanitizer = m_selma
|
551
|
+
.define_class("Sanitizer", magnus::class::object())
|
552
|
+
.expect("cannot define class Selma::Sanitizer");
|
557
553
|
|
558
554
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
559
555
|
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
data/ext/selma/src/selector.rs
CHANGED
@@ -8,6 +8,8 @@ pub struct SelmaSelector {
|
|
8
8
|
ignore_text_within: Option<Vec<String>>,
|
9
9
|
}
|
10
10
|
|
11
|
+
type SelectorMatches = (Option<String>, Option<String>, Option<Vec<String>>);
|
12
|
+
|
11
13
|
impl SelmaSelector {
|
12
14
|
fn new(args: &[Value]) -> Result<Self, Error> {
|
13
15
|
let (match_element, match_text_within, rb_ignore_text_within) =
|
@@ -63,9 +65,7 @@ impl SelmaSelector {
|
|
63
65
|
}
|
64
66
|
|
65
67
|
#[allow(clippy::let_unit_value)]
|
66
|
-
fn scan_parse_args(
|
67
|
-
args: &[Value],
|
68
|
-
) -> Result<(Option<String>, Option<String>, Option<Vec<String>>), Error> {
|
68
|
+
fn scan_parse_args(args: &[Value]) -> Result<SelectorMatches, Error> {
|
69
69
|
let args = scan_args::scan_args(args)?;
|
70
70
|
let _: () = args.required;
|
71
71
|
let _: () = args.optional;
|
@@ -103,7 +103,7 @@ impl SelmaSelector {
|
|
103
103
|
|
104
104
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
105
105
|
let c_selector = m_selma
|
106
|
-
.define_class("Selector",
|
106
|
+
.define_class("Selector", magnus::class::object())
|
107
107
|
.expect("cannot define class Selma::Selector");
|
108
108
|
|
109
109
|
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.2'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake-compiler-dock
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.2'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '1.2'
|
69
55
|
description:
|
70
56
|
email:
|
71
57
|
- gjtorikian@gmail.com
|
@@ -74,10 +60,11 @@ extensions:
|
|
74
60
|
- ext/selma/extconf.rb
|
75
61
|
extra_rdoc_files: []
|
76
62
|
files:
|
63
|
+
- Cargo.lock
|
64
|
+
- Cargo.toml
|
77
65
|
- LICENSE.txt
|
78
66
|
- README.md
|
79
67
|
- ext/selma/Cargo.toml
|
80
|
-
- ext/selma/_util.rb
|
81
68
|
- ext/selma/extconf.rb
|
82
69
|
- ext/selma/src/html.rs
|
83
70
|
- ext/selma/src/html/element.rs
|
@@ -89,7 +76,6 @@ files:
|
|
89
76
|
- ext/selma/src/sanitizer.rs
|
90
77
|
- ext/selma/src/selector.rs
|
91
78
|
- ext/selma/src/tags.rs
|
92
|
-
- ext/selma/src/wrapped_struct.rs
|
93
79
|
- lib/selma.rb
|
94
80
|
- lib/selma/extension.rb
|
95
81
|
- lib/selma/html.rb
|
@@ -102,7 +88,6 @@ files:
|
|
102
88
|
- lib/selma/sanitizer/config/restricted.rb
|
103
89
|
- lib/selma/selector.rb
|
104
90
|
- lib/selma/version.rb
|
105
|
-
- selma.gemspec
|
106
91
|
homepage:
|
107
92
|
licenses:
|
108
93
|
- MIT
|
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
111
|
- !ruby/object:Gem::Version
|
127
112
|
version: 3.3.22
|
128
113
|
requirements: []
|
129
|
-
rubygems_version: 3.4.
|
114
|
+
rubygems_version: 3.4.20
|
130
115
|
signing_key:
|
131
116
|
specification_version: 4
|
132
117
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|