selma 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/selma/Cargo.toml +2 -2
- data/ext/selma/src/html/element.rs +6 -6
- data/ext/selma/src/native_ref_wrap.rs +3 -3
- data/ext/selma/src/rewriter.rs +15 -15
- data/ext/selma/src/sanitizer.rs +17 -7
- data/ext/selma/src/selector.rs +2 -5
- data/lib/selma/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f579c1ac9c0e6e24d6c5919f27072940adb9e31c5a0cc4429b6338cc24f6315
|
4
|
+
data.tar.gz: 90b190ebde12c3a38fb682af3627d83fe3e704ad7b9bc301d70eb81da6d1c63c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4da6d3776c6c4ad04c73bdfa43e3cef4ce1ed71ada3d5653041b388c41cf4b1c77dc65671238ea28f36092d68bd4dcf3b9585ae35f7ea8f280c68a0db6b93fed
|
7
|
+
data.tar.gz: e29bb6f28f5fb9123946b97948aef0e0857938b4a5869fce451cba3f1fdb95af1033908418f22ad454f5feb64295db40cc8df63fb183f6a628019bbd3618547e
|
data/ext/selma/Cargo.toml
CHANGED
@@ -6,8 +6,8 @@ edition = "2021"
|
|
6
6
|
[dependencies]
|
7
7
|
enum-iterator = "1.2"
|
8
8
|
escapist = "0.0.1"
|
9
|
-
magnus = "
|
10
|
-
lol_html =
|
9
|
+
magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
|
10
|
+
lol_html = "0.3"
|
11
11
|
|
12
12
|
[lib]
|
13
13
|
name = "selma"
|
@@ -51,7 +51,7 @@ impl SelmaHTMLElement {
|
|
51
51
|
Ok(_) => Ok(value),
|
52
52
|
Err(err) => Err(Error::new(
|
53
53
|
exception::runtime_error(),
|
54
|
-
format!("AttributeNameError: {}"
|
54
|
+
format!("AttributeNameError: {err:?}"),
|
55
55
|
)),
|
56
56
|
}
|
57
57
|
} else {
|
@@ -81,7 +81,7 @@ impl SelmaHTMLElement {
|
|
81
81
|
Ok(_) => {}
|
82
82
|
Err(err) => Err(Error::new(
|
83
83
|
exception::runtime_error(),
|
84
|
-
format!("AttributeNameError: {}"
|
84
|
+
format!("AttributeNameError: {err:?}"),
|
85
85
|
))
|
86
86
|
.unwrap(),
|
87
87
|
});
|
@@ -99,7 +99,7 @@ impl SelmaHTMLElement {
|
|
99
99
|
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
100
100
|
Ok(_) => {}
|
101
101
|
Err(err) => {
|
102
|
-
Err(Error::new(exception::runtime_error(), format!("{}"
|
102
|
+
Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
|
103
103
|
}
|
104
104
|
});
|
105
105
|
|
@@ -151,18 +151,18 @@ impl SelmaHTMLElement {
|
|
151
151
|
|
152
152
|
fn find_content_type(content_type: Symbol) -> ContentType {
|
153
153
|
match content_type.name() {
|
154
|
-
Ok(name) => match
|
154
|
+
Ok(name) => match name {
|
155
155
|
Cow::Borrowed("as_text") => ContentType::Text,
|
156
156
|
Cow::Borrowed("as_html") => ContentType::Html,
|
157
157
|
_ => Err(Error::new(
|
158
158
|
exception::runtime_error(),
|
159
|
-
format!("unknown symbol `{}`"
|
159
|
+
format!("unknown symbol `{name:?}`"),
|
160
160
|
))
|
161
161
|
.unwrap(),
|
162
162
|
},
|
163
163
|
Err(err) => Err(Error::new(
|
164
164
|
exception::runtime_error(),
|
165
|
-
format!("Could not unwrap symbol"),
|
165
|
+
format!("Could not unwrap symbol: {err:?}"),
|
166
166
|
))
|
167
167
|
.unwrap(),
|
168
168
|
}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use std::{cell::Cell, marker::PhantomData,
|
1
|
+
use std::{cell::Cell, marker::PhantomData, rc::Rc};
|
2
2
|
|
3
3
|
// NOTE: My Rust isn't good enough to know what any of this does,
|
4
4
|
// but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
|
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
|
|
37
37
|
impl<R> NativeRefWrap<R> {
|
38
38
|
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
39
39
|
let wrap = NativeRefWrap {
|
40
|
-
inner_ptr:
|
40
|
+
inner_ptr: inner as *const I as *mut R,
|
41
41
|
poisoned: Rc::new(Cell::new(false)),
|
42
42
|
};
|
43
43
|
|
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
|
|
48
48
|
|
49
49
|
pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
|
50
50
|
let wrap = NativeRefWrap {
|
51
|
-
inner_ptr:
|
51
|
+
inner_ptr: inner as *mut I as *mut R,
|
52
52
|
poisoned: Rc::new(Cell::new(false)),
|
53
53
|
};
|
54
54
|
|
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
use std::{borrow::Cow, cell::RefCell, rc::Rc};
|
2
|
-
|
3
1
|
use lol_html::{
|
4
2
|
doc_comments, doctype, element,
|
5
3
|
html_content::{ContentType, Element, EndTag, TextChunk},
|
@@ -7,6 +5,8 @@ use lol_html::{
|
|
7
5
|
};
|
8
6
|
use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
|
9
7
|
|
8
|
+
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
|
+
|
10
10
|
use crate::{
|
11
11
|
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
|
12
12
|
sanitizer::SelmaSanitizer,
|
@@ -83,18 +83,18 @@ impl SelmaRewriter {
|
|
83
83
|
return Err(magnus::Error::new(
|
84
84
|
exception::no_method_error(),
|
85
85
|
format!(
|
86
|
-
"Could not call #selector on {:?}; is this an object that defines it?",
|
87
|
-
|
86
|
+
"Could not call #selector on {classname:?}; is this an object that defines it?",
|
87
|
+
|
88
88
|
),
|
89
89
|
));
|
90
90
|
}
|
91
91
|
|
92
92
|
let rb_selector: WrappedStruct<SelmaSelector> =
|
93
93
|
match rb_handler.funcall("selector", ()) {
|
94
|
-
Err(
|
94
|
+
Err(err) => {
|
95
95
|
return Err(magnus::Error::new(
|
96
96
|
exception::type_error(),
|
97
|
-
format!("Error instantiating selector: {}"
|
97
|
+
format!("Error instantiating selector: {err:?}"),
|
98
98
|
));
|
99
99
|
}
|
100
100
|
Ok(rb_selector) => rb_selector,
|
@@ -164,8 +164,6 @@ impl SelmaRewriter {
|
|
164
164
|
let sanitized_html = match &self.0.borrow().sanitizer {
|
165
165
|
None => html,
|
166
166
|
Some(sanitizer) => {
|
167
|
-
// let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
|
168
|
-
|
169
167
|
// due to malicious html crafting
|
170
168
|
// (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
|
171
169
|
// we need to run sanitization several times to truly remove unwanted tags,
|
@@ -182,7 +180,7 @@ impl SelmaRewriter {
|
|
182
180
|
Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
|
183
181
|
Err(err) => Err(magnus::Error::new(
|
184
182
|
exception::runtime_error(),
|
185
|
-
format!("{}"
|
183
|
+
format!("{err:?}"),
|
186
184
|
)),
|
187
185
|
}
|
188
186
|
}
|
@@ -218,6 +216,7 @@ impl SelmaRewriter {
|
|
218
216
|
|
219
217
|
Ok(())
|
220
218
|
})],
|
219
|
+
// TODO: allow for MemorySettings to be defined
|
221
220
|
..Settings::default()
|
222
221
|
},
|
223
222
|
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
@@ -361,7 +360,7 @@ impl SelmaRewriter {
|
|
361
360
|
Err(err) => {
|
362
361
|
return Err(magnus::Error::new(
|
363
362
|
exception::runtime_error(),
|
364
|
-
format!("{}"
|
363
|
+
format!("{err:?}"),
|
365
364
|
));
|
366
365
|
}
|
367
366
|
}
|
@@ -372,7 +371,7 @@ impl SelmaRewriter {
|
|
372
371
|
fn process_element_handlers(
|
373
372
|
rb_handler: Value,
|
374
373
|
element: &mut Element,
|
375
|
-
ancestors: &
|
374
|
+
ancestors: &[String],
|
376
375
|
) -> Result<(), magnus::Error> {
|
377
376
|
// if `on_end_tag` function is defined, call it
|
378
377
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
@@ -393,7 +392,7 @@ impl SelmaRewriter {
|
|
393
392
|
Ok(_) => Ok(()),
|
394
393
|
Err(err) => Err(magnus::Error::new(
|
395
394
|
exception::runtime_error(),
|
396
|
-
format!("{}"
|
395
|
+
format!("{err:?}"),
|
397
396
|
)),
|
398
397
|
}
|
399
398
|
}
|
@@ -402,11 +401,12 @@ impl SelmaRewriter {
|
|
402
401
|
// prevents missing `handle_text` function
|
403
402
|
let content = text.as_str();
|
404
403
|
|
405
|
-
//
|
404
|
+
// seems that sometimes lol-html returns blank text / EOLs?
|
406
405
|
if content.is_empty() {
|
407
406
|
return Ok(());
|
408
407
|
}
|
409
|
-
|
408
|
+
|
409
|
+
let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
|
410
410
|
|
411
411
|
if rb_result.is_err() {
|
412
412
|
return Err(magnus::Error::new(
|
@@ -419,7 +419,7 @@ impl SelmaRewriter {
|
|
419
419
|
));
|
420
420
|
}
|
421
421
|
|
422
|
-
let new_content
|
422
|
+
let new_content = rb_result.unwrap();
|
423
423
|
// TODO: can this be an option?
|
424
424
|
text.replace(&new_content, ContentType::Html);
|
425
425
|
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -35,7 +35,7 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
|
35
35
|
|
36
36
|
impl SelmaSanitizer {
|
37
37
|
const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
|
38
|
-
const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
38
|
+
// const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
39
39
|
const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
|
40
40
|
const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
|
41
41
|
|
@@ -229,7 +229,7 @@ impl SelmaSanitizer {
|
|
229
229
|
}
|
230
230
|
}
|
231
231
|
|
232
|
-
pub fn sanitize_attributes(&self, element: &mut Element) {
|
232
|
+
pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
|
233
233
|
let binding = self.0.borrow_mut();
|
234
234
|
let tag = Tag::tag_from_element(element);
|
235
235
|
let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
|
@@ -247,7 +247,7 @@ impl SelmaSanitizer {
|
|
247
247
|
// encountered, remove the entire element to be safe.
|
248
248
|
if attr_name.starts_with("<!--") {
|
249
249
|
Self::force_remove_element(self, element);
|
250
|
-
return;
|
250
|
+
return Ok(());
|
251
251
|
}
|
252
252
|
|
253
253
|
// first, trim leading spaces and unescape any encodings
|
@@ -268,7 +268,15 @@ impl SelmaSanitizer {
|
|
268
268
|
// since output is always UTF-8.
|
269
269
|
if Tag::is_meta(tag) {
|
270
270
|
if attr_name == "charset" && unescaped_attr_val != "utf-8" {
|
271
|
-
element.set_attribute(attr_name, "utf-8")
|
271
|
+
match element.set_attribute(attr_name, "utf-8") {
|
272
|
+
Ok(_) => {}
|
273
|
+
Err(_) => {
|
274
|
+
return Err(magnus::Error::new(
|
275
|
+
exception::runtime_error(),
|
276
|
+
format!("Unable to change {attr_name:?}"),
|
277
|
+
));
|
278
|
+
}
|
279
|
+
}
|
272
280
|
}
|
273
281
|
} else if !unescaped_attr_val.is_empty() {
|
274
282
|
let mut buf = String::new();
|
@@ -287,14 +295,16 @@ impl SelmaSanitizer {
|
|
287
295
|
|
288
296
|
let required = &element_sanitizer.required_attrs;
|
289
297
|
if required.contains(&"*".to_string()) {
|
290
|
-
return;
|
298
|
+
return Ok(());
|
291
299
|
}
|
292
300
|
for attr in element.attributes().iter() {
|
293
301
|
let attr_name = &attr.name();
|
294
302
|
if required.contains(attr_name) {
|
295
|
-
return;
|
303
|
+
return Ok(());
|
296
304
|
}
|
297
305
|
}
|
306
|
+
|
307
|
+
Ok(())
|
298
308
|
}
|
299
309
|
|
300
310
|
fn should_keep_attribute(
|
@@ -423,7 +433,7 @@ impl SelmaSanitizer {
|
|
423
433
|
Ok(_) => Ok(true),
|
424
434
|
Err(err) => Err(Error::new(
|
425
435
|
exception::runtime_error(),
|
426
|
-
format!("AttributeNameError: {}"
|
436
|
+
format!("AttributeNameError: {err:?}"),
|
427
437
|
)),
|
428
438
|
}
|
429
439
|
}
|
data/ext/selma/src/selector.rs
CHANGED
@@ -27,7 +27,7 @@ impl SelmaSelector {
|
|
27
27
|
if css.parse::<lol_html::Selector>().is_err() {
|
28
28
|
return Err(Error::new(
|
29
29
|
exception::arg_error(),
|
30
|
-
format!("Could not parse `match_element` (`{}`) as valid CSS"
|
30
|
+
format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
|
31
31
|
));
|
32
32
|
}
|
33
33
|
}
|
@@ -37,10 +37,7 @@ impl SelmaSelector {
|
|
37
37
|
if css.parse::<lol_html::Selector>().is_err() {
|
38
38
|
return Err(Error::new(
|
39
39
|
exception::arg_error(),
|
40
|
-
format!(
|
41
|
-
"Could not parse `match_text_within` (`{}`) as valid CSS",
|
42
|
-
css
|
43
|
-
),
|
40
|
+
format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
|
44
41
|
));
|
45
42
|
}
|
46
43
|
}
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|