selma 0.0.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +753 -0
- data/Cargo.toml +6 -0
- data/README.md +4 -1
- data/ext/selma/Cargo.toml +3 -3
- data/ext/selma/extconf.rb +0 -2
- data/ext/selma/src/html/element.rs +38 -2
- data/ext/selma/src/html/end_tag.rs +2 -2
- data/ext/selma/src/html/text_chunk.rs +8 -8
- data/ext/selma/src/html.rs +3 -1
- data/ext/selma/src/lib.rs +0 -1
- data/ext/selma/src/rewriter.rs +66 -66
- data/ext/selma/src/sanitizer.rs +21 -25
- data/ext/selma/src/selector.rs +4 -4
- data/lib/selma/version.rb +1 -1
- metadata +5 -20
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
data/Cargo.toml
ADDED
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Selma can perform two different actions, either independently or together:
|
|
29
29
|
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
30
|
- Select HTML using CSS rules, and manipulate elements and text nodes along the way.
|
31
31
|
|
32
|
-
It does this through two
|
32
|
+
It does this through two kwargs: `sanitizer` and `handlers`. The basic API for Selma looks like this:
|
33
33
|
|
34
34
|
```ruby
|
35
35
|
sanitizer_config = {
|
@@ -164,6 +164,9 @@ The `element` argument in `handle_element` has the following methods:
|
|
164
164
|
- `prepend(content, as: content_type)`: prepends `content` to the element's inner content, i.e. inserts content right after the element's start tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
165
165
|
- `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
166
166
|
- `set_inner_content`: Replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
167
|
+
- `remove`: Removes the element and its inner content.
|
168
|
+
- `remove_and_keep_content`: Removes the element, but keeps its content. I.e. remove start and end tags of the element.
|
169
|
+
- `removed?`: A bool which identifies if the element has been removed or replaced with some content.
|
167
170
|
|
168
171
|
#### `text_chunk` methods
|
169
172
|
|
data/ext/selma/Cargo.toml
CHANGED
@@ -4,10 +4,10 @@ version = "1.0.0"
|
|
4
4
|
edition = "2021"
|
5
5
|
|
6
6
|
[dependencies]
|
7
|
-
enum-iterator = "1.
|
7
|
+
enum-iterator = "1.4"
|
8
8
|
escapist = "0.0.2"
|
9
|
-
magnus =
|
10
|
-
lol_html = "
|
9
|
+
magnus = "0.6"
|
10
|
+
lol_html = "1.2"
|
11
11
|
|
12
12
|
[lib]
|
13
13
|
name = "selma"
|
data/ext/selma/extconf.rb
CHANGED
@@ -215,12 +215,41 @@ impl SelmaHTMLElement {
|
|
215
215
|
|
216
216
|
Ok(())
|
217
217
|
}
|
218
|
+
|
219
|
+
fn remove(&self) {
|
220
|
+
let mut binding = self.0.borrow_mut();
|
221
|
+
|
222
|
+
if let Ok(e) = binding.element.get_mut() {
|
223
|
+
e.remove()
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
fn remove_and_keep_content(&self) {
|
228
|
+
let mut binding = self.0.borrow_mut();
|
229
|
+
|
230
|
+
if let Ok(e) = binding.element.get_mut() {
|
231
|
+
e.remove_and_keep_content()
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
fn is_removed(&self) -> Result<bool, Error> {
|
236
|
+
let binding = self.0.borrow();
|
237
|
+
|
238
|
+
if let Ok(e) = binding.element.get() {
|
239
|
+
Ok(e.removed())
|
240
|
+
} else {
|
241
|
+
Err(Error::new(
|
242
|
+
exception::runtime_error(),
|
243
|
+
"`is_removed` is not available",
|
244
|
+
))
|
245
|
+
}
|
246
|
+
}
|
218
247
|
}
|
219
248
|
|
220
249
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
221
250
|
let c_element = c_html
|
222
|
-
.define_class("Element",
|
223
|
-
.expect("cannot
|
251
|
+
.define_class("Element", magnus::class::object())
|
252
|
+
.expect("cannot define class Selma::HTML::Element");
|
224
253
|
|
225
254
|
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
226
255
|
c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
|
@@ -250,5 +279,12 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
250
279
|
method!(SelmaHTMLElement::set_inner_content, -1),
|
251
280
|
)?;
|
252
281
|
|
282
|
+
c_element.define_method("remove", method!(SelmaHTMLElement::remove, 0))?;
|
283
|
+
c_element.define_method(
|
284
|
+
"remove_and_keep_content",
|
285
|
+
method!(SelmaHTMLElement::remove_and_keep_content, 0),
|
286
|
+
)?;
|
287
|
+
c_element.define_method("removed?", method!(SelmaHTMLElement::is_removed, 0))?;
|
288
|
+
|
253
289
|
Ok(())
|
254
290
|
}
|
@@ -26,8 +26,8 @@ impl SelmaHTMLEndTag {
|
|
26
26
|
|
27
27
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
28
28
|
let c_end_tag = c_html
|
29
|
-
.define_class("EndTag",
|
30
|
-
.expect("cannot
|
29
|
+
.define_class("EndTag", magnus::class::object())
|
30
|
+
.expect("cannot define class Selma::HTML::EndTag");
|
31
31
|
|
32
32
|
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
33
33
|
|
@@ -39,12 +39,12 @@ impl SelmaHTMLTextChunk {
|
|
39
39
|
|
40
40
|
if let Ok(tc) = binding.text_chunk.get() {
|
41
41
|
match tc.text_type() {
|
42
|
-
TextType::Data => Ok(Symbol::
|
43
|
-
TextType::PlainText => Ok(Symbol::
|
44
|
-
TextType::RawText => Ok(Symbol::
|
45
|
-
TextType::ScriptData => Ok(Symbol::
|
46
|
-
TextType::RCData => Ok(Symbol::
|
47
|
-
TextType::CDataSection => Ok(Symbol::
|
42
|
+
TextType::Data => Ok(Symbol::new("data")),
|
43
|
+
TextType::PlainText => Ok(Symbol::new("plain_text")),
|
44
|
+
TextType::RawText => Ok(Symbol::new("raw_text")),
|
45
|
+
TextType::ScriptData => Ok(Symbol::new("script")),
|
46
|
+
TextType::RCData => Ok(Symbol::new("rc_data")),
|
47
|
+
TextType::CDataSection => Ok(Symbol::new("cdata_section")),
|
48
48
|
}
|
49
49
|
} else {
|
50
50
|
Err(Error::new(
|
@@ -99,8 +99,8 @@ impl SelmaHTMLTextChunk {
|
|
99
99
|
|
100
100
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
101
101
|
let c_text_chunk = c_html
|
102
|
-
.define_class("TextChunk",
|
103
|
-
.expect("cannot
|
102
|
+
.define_class("TextChunk", magnus::class::object())
|
103
|
+
.expect("cannot define class Selma::HTML::TextChunk");
|
104
104
|
|
105
105
|
c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
106
106
|
c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
data/ext/selma/src/html.rs
CHANGED
@@ -5,7 +5,9 @@ use magnus::{Error, Module, RModule};
|
|
5
5
|
pub(crate) struct SelmaHTML {}
|
6
6
|
|
7
7
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
8
|
-
let c_html = m_selma
|
8
|
+
let c_html = m_selma
|
9
|
+
.define_class("HTML", magnus::class::object())
|
10
|
+
.expect("cannot define class Selma::HTML");
|
9
11
|
|
10
12
|
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
11
13
|
end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
|
data/ext/selma/src/lib.rs
CHANGED
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
use lol_html::{
|
2
2
|
doc_comments, doctype, element,
|
3
|
-
html_content::{Element,
|
3
|
+
html_content::{Element, TextChunk},
|
4
4
|
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
|
5
5
|
};
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{
|
7
|
+
exception, function, method, scan_args,
|
8
|
+
typed_data::Obj,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RModule, Ruby, Value,
|
11
|
+
};
|
7
12
|
|
8
13
|
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
14
|
|
@@ -12,33 +17,29 @@ use crate::{
|
|
12
17
|
sanitizer::SelmaSanitizer,
|
13
18
|
selector::SelmaSelector,
|
14
19
|
tags::Tag,
|
15
|
-
wrapped_struct::WrappedStruct,
|
16
20
|
};
|
17
21
|
|
18
|
-
#[derive(Clone
|
22
|
+
#[derive(Clone)]
|
19
23
|
pub struct Handler {
|
20
|
-
rb_handler: Value
|
21
|
-
rb_selector:
|
22
|
-
|
23
|
-
|
24
|
-
total_elapsed_element_handlers: f64,
|
24
|
+
rb_handler: Opaque<Value>,
|
25
|
+
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
|
+
// total_element_handler_calls: usize,
|
27
|
+
// total_elapsed_element_handlers: f64,
|
25
28
|
|
26
|
-
total_text_handler_calls: usize,
|
27
|
-
total_elapsed_text_handlers: f64,
|
29
|
+
// total_text_handler_calls: usize,
|
30
|
+
// total_elapsed_text_handlers: f64,
|
28
31
|
}
|
29
32
|
|
30
33
|
pub struct Rewriter {
|
31
34
|
sanitizer: Option<SelmaSanitizer>,
|
32
35
|
handlers: Vec<Handler>,
|
33
|
-
|
34
|
-
total_elapsed: f64,
|
36
|
+
// total_elapsed: f64,
|
35
37
|
}
|
36
38
|
|
37
39
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
38
40
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
39
41
|
|
40
|
-
|
41
|
-
unsafe impl Send for SelmaRewriter {}
|
42
|
+
type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
|
42
43
|
|
43
44
|
impl SelmaRewriter {
|
44
45
|
const SELMA_ON_END_TAG: &str = "on_end_tag";
|
@@ -56,15 +57,15 @@ impl SelmaRewriter {
|
|
56
57
|
let sanitizer = match rb_sanitizer {
|
57
58
|
None => {
|
58
59
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
59
|
-
let wrapped_sanitizer =
|
60
|
+
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
60
61
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
61
|
-
Some(wrapped_sanitizer.get().
|
62
|
+
Some(wrapped_sanitizer.get().to_owned())
|
62
63
|
}
|
63
64
|
Some(sanitizer_value) => match sanitizer_value {
|
64
65
|
None => None,
|
65
66
|
Some(sanitizer) => {
|
66
67
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
67
|
-
Some(sanitizer.get().
|
68
|
+
Some(sanitizer.get().to_owned())
|
68
69
|
}
|
69
70
|
},
|
70
71
|
};
|
@@ -89,24 +90,23 @@ impl SelmaRewriter {
|
|
89
90
|
));
|
90
91
|
}
|
91
92
|
|
92
|
-
let rb_selector:
|
93
|
-
|
94
|
-
Err(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
};
|
93
|
+
let rb_selector: Obj<SelmaSelector> = match rb_handler.funcall("selector", ()) {
|
94
|
+
Err(err) => {
|
95
|
+
return Err(magnus::Error::new(
|
96
|
+
exception::type_error(),
|
97
|
+
format!("Error instantiating selector: {err:?}"),
|
98
|
+
));
|
99
|
+
}
|
100
|
+
Ok(rb_selector) => rb_selector,
|
101
|
+
};
|
102
102
|
let handler = Handler {
|
103
|
-
rb_handler,
|
104
|
-
rb_selector,
|
105
|
-
total_element_handler_calls: 0,
|
106
|
-
total_elapsed_element_handlers: 0.0,
|
103
|
+
rb_handler: Opaque::from(rb_handler),
|
104
|
+
rb_selector: Opaque::from(rb_selector),
|
105
|
+
// total_element_handler_calls: 0,
|
106
|
+
// total_elapsed_element_handlers: 0.0,
|
107
107
|
|
108
|
-
total_text_handler_calls: 0,
|
109
|
-
total_elapsed_text_handlers: 0.0,
|
108
|
+
// total_text_handler_calls: 0,
|
109
|
+
// total_elapsed_text_handlers: 0.0,
|
110
110
|
};
|
111
111
|
handlers.push(handler);
|
112
112
|
}
|
@@ -124,20 +124,12 @@ impl SelmaRewriter {
|
|
124
124
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
125
125
|
sanitizer,
|
126
126
|
handlers,
|
127
|
-
total_elapsed: 0.0,
|
127
|
+
// total_elapsed: 0.0,
|
128
128
|
})))
|
129
129
|
}
|
130
130
|
|
131
131
|
#[allow(clippy::let_unit_value)]
|
132
|
-
fn scan_parse_args(
|
133
|
-
args: &[Value],
|
134
|
-
) -> Result<
|
135
|
-
(
|
136
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
137
|
-
Option<RArray>,
|
138
|
-
),
|
139
|
-
magnus::Error,
|
140
|
-
> {
|
132
|
+
fn scan_parse_args(args: &[Value]) -> Result<RewriterValues, magnus::Error> {
|
141
133
|
let args = scan_args::scan_args(args)?;
|
142
134
|
let _: () = args.required;
|
143
135
|
let _: () = args.optional;
|
@@ -148,10 +140,7 @@ impl SelmaRewriter {
|
|
148
140
|
let kwargs = scan_args::get_kwargs::<
|
149
141
|
_,
|
150
142
|
(),
|
151
|
-
(
|
152
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
153
|
-
Option<RArray>,
|
154
|
-
),
|
143
|
+
(Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
|
155
144
|
(),
|
156
145
|
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
157
146
|
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
@@ -273,7 +262,9 @@ impl SelmaRewriter {
|
|
273
262
|
handlers.iter().for_each(|handler| {
|
274
263
|
let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
|
275
264
|
|
276
|
-
let
|
265
|
+
let ruby = Ruby::get().unwrap();
|
266
|
+
|
267
|
+
let selector = ruby.get_inner(handler.rb_selector);
|
277
268
|
|
278
269
|
// TODO: test final raise by simulating errors
|
279
270
|
if selector.match_element().is_some() {
|
@@ -283,7 +274,7 @@ impl SelmaRewriter {
|
|
283
274
|
selector.match_element().unwrap(),
|
284
275
|
move |el| {
|
285
276
|
match Self::process_element_handlers(
|
286
|
-
handler.rb_handler,
|
277
|
+
ruby.get_inner(handler.rb_handler),
|
287
278
|
el,
|
288
279
|
&closure_element_stack.borrow(),
|
289
280
|
) {
|
@@ -314,7 +305,9 @@ impl SelmaRewriter {
|
|
314
305
|
}
|
315
306
|
}
|
316
307
|
|
317
|
-
|
308
|
+
let ruby = Ruby::get().unwrap();
|
309
|
+
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
310
|
+
{
|
318
311
|
Ok(_) => Ok(()),
|
319
312
|
Err(err) => Err(err.to_string().into()),
|
320
313
|
}
|
@@ -334,11 +327,15 @@ impl SelmaRewriter {
|
|
334
327
|
element_stack.as_ref().borrow_mut().push(tag_name);
|
335
328
|
|
336
329
|
let closure_element_stack = element_stack.clone();
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
330
|
+
|
331
|
+
el.end_tag_handlers()
|
332
|
+
.unwrap()
|
333
|
+
.push(Box::new(move |_end_tag| {
|
334
|
+
let mut stack = closure_element_stack.as_ref().borrow_mut();
|
335
|
+
stack.pop();
|
336
|
+
Ok(())
|
337
|
+
}));
|
338
|
+
|
342
339
|
Ok(())
|
343
340
|
}));
|
344
341
|
});
|
@@ -373,14 +370,17 @@ impl SelmaRewriter {
|
|
373
370
|
// if `on_end_tag` function is defined, call it
|
374
371
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
375
372
|
// TODO: error here is an "EndTagError"
|
376
|
-
element
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
373
|
+
element
|
374
|
+
.end_tag_handlers()
|
375
|
+
.unwrap()
|
376
|
+
.push(Box::new(move |end_tag| {
|
377
|
+
let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
|
378
|
+
|
379
|
+
match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
|
380
|
+
Ok(_) => Ok(()),
|
381
|
+
Err(err) => Err(err.to_string().into()),
|
382
|
+
}
|
383
|
+
}));
|
384
384
|
}
|
385
385
|
|
386
386
|
let rb_element = SelmaHTMLElement::new(element, ancestors);
|
@@ -417,8 +417,8 @@ impl SelmaRewriter {
|
|
417
417
|
|
418
418
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
419
419
|
let c_rewriter = m_selma
|
420
|
-
.define_class("Rewriter",
|
421
|
-
.expect("cannot
|
420
|
+
.define_class("Rewriter", magnus::class::object())
|
421
|
+
.expect("cannot define class Selma::Rewriter");
|
422
422
|
|
423
423
|
c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
|
424
424
|
c_rewriter
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -4,9 +4,13 @@ use lol_html::{
|
|
4
4
|
errors::AttributeNameError,
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
6
|
};
|
7
|
-
use magnus::{
|
7
|
+
use magnus::{
|
8
|
+
class, function, method, scan_args,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RHash, RModule, Ruby, Value,
|
11
|
+
};
|
8
12
|
|
9
|
-
#[derive(Clone, Debug)]
|
13
|
+
#[derive(Clone, Debug, Default)]
|
10
14
|
struct ElementSanitizer {
|
11
15
|
allowed_attrs: Vec<String>,
|
12
16
|
required_attrs: Vec<String>,
|
@@ -14,19 +18,7 @@ struct ElementSanitizer {
|
|
14
18
|
protocol_sanitizers: HashMap<String, Vec<String>>,
|
15
19
|
}
|
16
20
|
|
17
|
-
|
18
|
-
fn default() -> Self {
|
19
|
-
ElementSanitizer {
|
20
|
-
allowed_attrs: vec![],
|
21
|
-
allowed_classes: vec![],
|
22
|
-
required_attrs: vec![],
|
23
|
-
|
24
|
-
protocol_sanitizers: HashMap::new(),
|
25
|
-
}
|
26
|
-
}
|
27
|
-
}
|
28
|
-
|
29
|
-
#[derive(Clone, Debug)]
|
21
|
+
#[derive(Clone)]
|
30
22
|
pub struct Sanitizer {
|
31
23
|
flags: [u8; crate::tags::Tag::TAG_COUNT],
|
32
24
|
allowed_attrs: Vec<String>,
|
@@ -36,10 +28,10 @@ pub struct Sanitizer {
|
|
36
28
|
pub escape_tagfilter: bool,
|
37
29
|
pub allow_comments: bool,
|
38
30
|
pub allow_doctype: bool,
|
39
|
-
config: RHash
|
31
|
+
config: Opaque<RHash>,
|
40
32
|
}
|
41
33
|
|
42
|
-
#[derive(Clone
|
34
|
+
#[derive(Clone)]
|
43
35
|
#[magnus::wrap(class = "Selma::Sanitizer")]
|
44
36
|
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
45
37
|
|
@@ -77,14 +69,15 @@ impl SelmaSanitizer {
|
|
77
69
|
escape_tagfilter: true,
|
78
70
|
allow_comments: false,
|
79
71
|
allow_doctype: true,
|
80
|
-
config,
|
72
|
+
config: config.into(),
|
81
73
|
})))
|
82
74
|
}
|
83
75
|
|
84
76
|
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
85
77
|
let binding = self.0.borrow();
|
78
|
+
let ruby = Ruby::get().unwrap();
|
86
79
|
|
87
|
-
Ok(binding.config)
|
80
|
+
Ok(ruby.get_inner(binding.config))
|
88
81
|
}
|
89
82
|
|
90
83
|
/// Toggle a sanitizer option on or off.
|
@@ -308,9 +301,9 @@ impl SelmaSanitizer {
|
|
308
301
|
let mut buf = String::new();
|
309
302
|
// ...then, escape any special characters, for security
|
310
303
|
if attr_name == "href" {
|
311
|
-
escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
|
304
|
+
escapist::escape_href(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
312
305
|
} else {
|
313
|
-
escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
|
306
|
+
escapist::escape_html(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
314
307
|
};
|
315
308
|
|
316
309
|
match element.set_attribute(attr_name, &buf) {
|
@@ -530,11 +523,12 @@ impl SelmaSanitizer {
|
|
530
523
|
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
531
524
|
if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
|
532
525
|
element
|
533
|
-
.
|
526
|
+
.end_tag_handlers()
|
527
|
+
.unwrap()
|
528
|
+
.push(Box::new(move |end| {
|
534
529
|
Self::remove_end_tag(end);
|
535
530
|
Ok(())
|
536
|
-
})
|
537
|
-
.unwrap();
|
531
|
+
}));
|
538
532
|
}
|
539
533
|
}
|
540
534
|
|
@@ -553,7 +547,9 @@ impl SelmaSanitizer {
|
|
553
547
|
}
|
554
548
|
|
555
549
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
556
|
-
let c_sanitizer = m_selma
|
550
|
+
let c_sanitizer = m_selma
|
551
|
+
.define_class("Sanitizer", magnus::class::object())
|
552
|
+
.expect("cannot define class Selma::Sanitizer");
|
557
553
|
|
558
554
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
559
555
|
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
data/ext/selma/src/selector.rs
CHANGED
@@ -8,6 +8,8 @@ pub struct SelmaSelector {
|
|
8
8
|
ignore_text_within: Option<Vec<String>>,
|
9
9
|
}
|
10
10
|
|
11
|
+
type SelectorMatches = (Option<String>, Option<String>, Option<Vec<String>>);
|
12
|
+
|
11
13
|
impl SelmaSelector {
|
12
14
|
fn new(args: &[Value]) -> Result<Self, Error> {
|
13
15
|
let (match_element, match_text_within, rb_ignore_text_within) =
|
@@ -63,9 +65,7 @@ impl SelmaSelector {
|
|
63
65
|
}
|
64
66
|
|
65
67
|
#[allow(clippy::let_unit_value)]
|
66
|
-
fn scan_parse_args(
|
67
|
-
args: &[Value],
|
68
|
-
) -> Result<(Option<String>, Option<String>, Option<Vec<String>>), Error> {
|
68
|
+
fn scan_parse_args(args: &[Value]) -> Result<SelectorMatches, Error> {
|
69
69
|
let args = scan_args::scan_args(args)?;
|
70
70
|
let _: () = args.required;
|
71
71
|
let _: () = args.optional;
|
@@ -103,7 +103,7 @@ impl SelmaSelector {
|
|
103
103
|
|
104
104
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
105
105
|
let c_selector = m_selma
|
106
|
-
.define_class("Selector",
|
106
|
+
.define_class("Selector", magnus::class::object())
|
107
107
|
.expect("cannot define class Selma::Selector");
|
108
108
|
|
109
109
|
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.2'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake-compiler-dock
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.2'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '1.2'
|
69
55
|
description:
|
70
56
|
email:
|
71
57
|
- gjtorikian@gmail.com
|
@@ -74,10 +60,11 @@ extensions:
|
|
74
60
|
- ext/selma/extconf.rb
|
75
61
|
extra_rdoc_files: []
|
76
62
|
files:
|
63
|
+
- Cargo.lock
|
64
|
+
- Cargo.toml
|
77
65
|
- LICENSE.txt
|
78
66
|
- README.md
|
79
67
|
- ext/selma/Cargo.toml
|
80
|
-
- ext/selma/_util.rb
|
81
68
|
- ext/selma/extconf.rb
|
82
69
|
- ext/selma/src/html.rs
|
83
70
|
- ext/selma/src/html/element.rs
|
@@ -89,7 +76,6 @@ files:
|
|
89
76
|
- ext/selma/src/sanitizer.rs
|
90
77
|
- ext/selma/src/selector.rs
|
91
78
|
- ext/selma/src/tags.rs
|
92
|
-
- ext/selma/src/wrapped_struct.rs
|
93
79
|
- lib/selma.rb
|
94
80
|
- lib/selma/extension.rb
|
95
81
|
- lib/selma/html.rb
|
@@ -102,7 +88,6 @@ files:
|
|
102
88
|
- lib/selma/sanitizer/config/restricted.rb
|
103
89
|
- lib/selma/selector.rb
|
104
90
|
- lib/selma/version.rb
|
105
|
-
- selma.gemspec
|
106
91
|
homepage:
|
107
92
|
licenses:
|
108
93
|
- MIT
|
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
111
|
- !ruby/object:Gem::Version
|
127
112
|
version: 3.3.22
|
128
113
|
requirements: []
|
129
|
-
rubygems_version: 3.4.
|
114
|
+
rubygems_version: 3.4.20
|
130
115
|
signing_key:
|
131
116
|
specification_version: 4
|
132
117
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|