selma 0.0.2-x64-mingw-ucrt → 0.0.3-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/selma/Cargo.toml +2 -2
- data/ext/selma/src/html/element.rs +6 -6
- data/ext/selma/src/native_ref_wrap.rs +3 -3
- data/ext/selma/src/rewriter.rs +15 -15
- data/ext/selma/src/sanitizer.rs +17 -7
- data/ext/selma/src/selector.rs +2 -5
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c778a1189fea09803ab20f414e31b02ebb9502e14b9325e7f4af235526cbba4c
|
4
|
+
data.tar.gz: 3985ee4a86fb0d9dfcb591675569841f59a5adf3f6ce565e839ecb8b3d5493de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1809c68d71d5b945fa757ec3b9aab66e935b44106be7e6e4c177822c23cda06fa9e29d05a0c03805eb6404eea5cad2ba7d34255e4f9faf06d8aff28f0902eede
|
7
|
+
data.tar.gz: 32cb02767c6f241970d745b62f7c6b36fc77bf1b19446b2e4170b109a6a36ed4efda7b3f6b33a5e4d7850091c23c0b0b3692c40849c735d786dd199b2c1e7824
|
data/ext/selma/Cargo.toml
CHANGED
@@ -6,8 +6,8 @@ edition = "2021"
|
|
6
6
|
[dependencies]
|
7
7
|
enum-iterator = "1.2"
|
8
8
|
escapist = "0.0.1"
|
9
|
-
magnus = "
|
10
|
-
lol_html =
|
9
|
+
magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
|
10
|
+
lol_html = "0.3"
|
11
11
|
|
12
12
|
[lib]
|
13
13
|
name = "selma"
|
@@ -51,7 +51,7 @@ impl SelmaHTMLElement {
|
|
51
51
|
Ok(_) => Ok(value),
|
52
52
|
Err(err) => Err(Error::new(
|
53
53
|
exception::runtime_error(),
|
54
|
-
format!("AttributeNameError: {}"
|
54
|
+
format!("AttributeNameError: {err:?}"),
|
55
55
|
)),
|
56
56
|
}
|
57
57
|
} else {
|
@@ -81,7 +81,7 @@ impl SelmaHTMLElement {
|
|
81
81
|
Ok(_) => {}
|
82
82
|
Err(err) => Err(Error::new(
|
83
83
|
exception::runtime_error(),
|
84
|
-
format!("AttributeNameError: {}"
|
84
|
+
format!("AttributeNameError: {err:?}"),
|
85
85
|
))
|
86
86
|
.unwrap(),
|
87
87
|
});
|
@@ -99,7 +99,7 @@ impl SelmaHTMLElement {
|
|
99
99
|
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
100
100
|
Ok(_) => {}
|
101
101
|
Err(err) => {
|
102
|
-
Err(Error::new(exception::runtime_error(), format!("{}"
|
102
|
+
Err(Error::new(exception::runtime_error(), format!("{err:?}"))).unwrap()
|
103
103
|
}
|
104
104
|
});
|
105
105
|
|
@@ -151,18 +151,18 @@ impl SelmaHTMLElement {
|
|
151
151
|
|
152
152
|
fn find_content_type(content_type: Symbol) -> ContentType {
|
153
153
|
match content_type.name() {
|
154
|
-
Ok(name) => match
|
154
|
+
Ok(name) => match name {
|
155
155
|
Cow::Borrowed("as_text") => ContentType::Text,
|
156
156
|
Cow::Borrowed("as_html") => ContentType::Html,
|
157
157
|
_ => Err(Error::new(
|
158
158
|
exception::runtime_error(),
|
159
|
-
format!("unknown symbol `{}`"
|
159
|
+
format!("unknown symbol `{name:?}`"),
|
160
160
|
))
|
161
161
|
.unwrap(),
|
162
162
|
},
|
163
163
|
Err(err) => Err(Error::new(
|
164
164
|
exception::runtime_error(),
|
165
|
-
format!("Could not unwrap symbol"),
|
165
|
+
format!("Could not unwrap symbol: {err:?}"),
|
166
166
|
))
|
167
167
|
.unwrap(),
|
168
168
|
}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use std::{cell::Cell, marker::PhantomData,
|
1
|
+
use std::{cell::Cell, marker::PhantomData, rc::Rc};
|
2
2
|
|
3
3
|
// NOTE: My Rust isn't good enough to know what any of this does,
|
4
4
|
// but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
|
@@ -37,7 +37,7 @@ pub struct NativeRefWrap<R> {
|
|
37
37
|
impl<R> NativeRefWrap<R> {
|
38
38
|
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
39
39
|
let wrap = NativeRefWrap {
|
40
|
-
inner_ptr:
|
40
|
+
inner_ptr: inner as *const I as *mut R,
|
41
41
|
poisoned: Rc::new(Cell::new(false)),
|
42
42
|
};
|
43
43
|
|
@@ -48,7 +48,7 @@ impl<R> NativeRefWrap<R> {
|
|
48
48
|
|
49
49
|
pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
|
50
50
|
let wrap = NativeRefWrap {
|
51
|
-
inner_ptr:
|
51
|
+
inner_ptr: inner as *mut I as *mut R,
|
52
52
|
poisoned: Rc::new(Cell::new(false)),
|
53
53
|
};
|
54
54
|
|
data/ext/selma/src/rewriter.rs
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
use std::{borrow::Cow, cell::RefCell, rc::Rc};
|
2
|
-
|
3
1
|
use lol_html::{
|
4
2
|
doc_comments, doctype, element,
|
5
3
|
html_content::{ContentType, Element, EndTag, TextChunk},
|
@@ -7,6 +5,8 @@ use lol_html::{
|
|
7
5
|
};
|
8
6
|
use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
|
9
7
|
|
8
|
+
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
|
+
|
10
10
|
use crate::{
|
11
11
|
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag},
|
12
12
|
sanitizer::SelmaSanitizer,
|
@@ -83,18 +83,18 @@ impl SelmaRewriter {
|
|
83
83
|
return Err(magnus::Error::new(
|
84
84
|
exception::no_method_error(),
|
85
85
|
format!(
|
86
|
-
"Could not call #selector on {:?}; is this an object that defines it?",
|
87
|
-
|
86
|
+
"Could not call #selector on {classname:?}; is this an object that defines it?",
|
87
|
+
|
88
88
|
),
|
89
89
|
));
|
90
90
|
}
|
91
91
|
|
92
92
|
let rb_selector: WrappedStruct<SelmaSelector> =
|
93
93
|
match rb_handler.funcall("selector", ()) {
|
94
|
-
Err(
|
94
|
+
Err(err) => {
|
95
95
|
return Err(magnus::Error::new(
|
96
96
|
exception::type_error(),
|
97
|
-
format!("Error instantiating selector: {}"
|
97
|
+
format!("Error instantiating selector: {err:?}"),
|
98
98
|
));
|
99
99
|
}
|
100
100
|
Ok(rb_selector) => rb_selector,
|
@@ -164,8 +164,6 @@ impl SelmaRewriter {
|
|
164
164
|
let sanitized_html = match &self.0.borrow().sanitizer {
|
165
165
|
None => html,
|
166
166
|
Some(sanitizer) => {
|
167
|
-
// let first_pass_html = Self::perform_initial_sanitization(sanitizer, &html).unwrap();
|
168
|
-
|
169
167
|
// due to malicious html crafting
|
170
168
|
// (e.g. <<foo>script>...</script>, or <div <!-- comment -->> as in tests),
|
171
169
|
// we need to run sanitization several times to truly remove unwanted tags,
|
@@ -182,7 +180,7 @@ impl SelmaRewriter {
|
|
182
180
|
Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
|
183
181
|
Err(err) => Err(magnus::Error::new(
|
184
182
|
exception::runtime_error(),
|
185
|
-
format!("{}"
|
183
|
+
format!("{err:?}"),
|
186
184
|
)),
|
187
185
|
}
|
188
186
|
}
|
@@ -218,6 +216,7 @@ impl SelmaRewriter {
|
|
218
216
|
|
219
217
|
Ok(())
|
220
218
|
})],
|
219
|
+
// TODO: allow for MemorySettings to be defined
|
221
220
|
..Settings::default()
|
222
221
|
},
|
223
222
|
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
@@ -361,7 +360,7 @@ impl SelmaRewriter {
|
|
361
360
|
Err(err) => {
|
362
361
|
return Err(magnus::Error::new(
|
363
362
|
exception::runtime_error(),
|
364
|
-
format!("{}"
|
363
|
+
format!("{err:?}"),
|
365
364
|
));
|
366
365
|
}
|
367
366
|
}
|
@@ -372,7 +371,7 @@ impl SelmaRewriter {
|
|
372
371
|
fn process_element_handlers(
|
373
372
|
rb_handler: Value,
|
374
373
|
element: &mut Element,
|
375
|
-
ancestors: &
|
374
|
+
ancestors: &[String],
|
376
375
|
) -> Result<(), magnus::Error> {
|
377
376
|
// if `on_end_tag` function is defined, call it
|
378
377
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
@@ -393,7 +392,7 @@ impl SelmaRewriter {
|
|
393
392
|
Ok(_) => Ok(()),
|
394
393
|
Err(err) => Err(magnus::Error::new(
|
395
394
|
exception::runtime_error(),
|
396
|
-
format!("{}"
|
395
|
+
format!("{err:?}"),
|
397
396
|
)),
|
398
397
|
}
|
399
398
|
}
|
@@ -402,11 +401,12 @@ impl SelmaRewriter {
|
|
402
401
|
// prevents missing `handle_text` function
|
403
402
|
let content = text.as_str();
|
404
403
|
|
405
|
-
//
|
404
|
+
// seems that sometimes lol-html returns blank text / EOLs?
|
406
405
|
if content.is_empty() {
|
407
406
|
return Ok(());
|
408
407
|
}
|
409
|
-
|
408
|
+
|
409
|
+
let rb_result = rb_handler.funcall::<_, _, String>(Self::SELMA_HANDLE_TEXT, (content,));
|
410
410
|
|
411
411
|
if rb_result.is_err() {
|
412
412
|
return Err(magnus::Error::new(
|
@@ -419,7 +419,7 @@ impl SelmaRewriter {
|
|
419
419
|
));
|
420
420
|
}
|
421
421
|
|
422
|
-
let new_content
|
422
|
+
let new_content = rb_result.unwrap();
|
423
423
|
// TODO: can this be an option?
|
424
424
|
text.replace(&new_content, ContentType::Html);
|
425
425
|
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -35,7 +35,7 @@ pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
|
35
35
|
|
36
36
|
impl SelmaSanitizer {
|
37
37
|
const SELMA_SANITIZER_ALLOW: u8 = (1 << 0);
|
38
|
-
const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
38
|
+
// const SELMA_SANITIZER_ESCAPE_TAGFILTER: u8 = (1 << 1);
|
39
39
|
const SELMA_SANITIZER_REMOVE_CONTENTS: u8 = (1 << 2);
|
40
40
|
const SELMA_SANITIZER_WRAP_WHITESPACE: u8 = (1 << 3);
|
41
41
|
|
@@ -229,7 +229,7 @@ impl SelmaSanitizer {
|
|
229
229
|
}
|
230
230
|
}
|
231
231
|
|
232
|
-
pub fn sanitize_attributes(&self, element: &mut Element) {
|
232
|
+
pub fn sanitize_attributes(&self, element: &mut Element) -> Result<(), magnus::Error> {
|
233
233
|
let binding = self.0.borrow_mut();
|
234
234
|
let tag = Tag::tag_from_element(element);
|
235
235
|
let element_sanitizer = Self::get_element_sanitizer(&binding, &element.tag_name());
|
@@ -247,7 +247,7 @@ impl SelmaSanitizer {
|
|
247
247
|
// encountered, remove the entire element to be safe.
|
248
248
|
if attr_name.starts_with("<!--") {
|
249
249
|
Self::force_remove_element(self, element);
|
250
|
-
return;
|
250
|
+
return Ok(());
|
251
251
|
}
|
252
252
|
|
253
253
|
// first, trim leading spaces and unescape any encodings
|
@@ -268,7 +268,15 @@ impl SelmaSanitizer {
|
|
268
268
|
// since output is always UTF-8.
|
269
269
|
if Tag::is_meta(tag) {
|
270
270
|
if attr_name == "charset" && unescaped_attr_val != "utf-8" {
|
271
|
-
element.set_attribute(attr_name, "utf-8")
|
271
|
+
match element.set_attribute(attr_name, "utf-8") {
|
272
|
+
Ok(_) => {}
|
273
|
+
Err(_) => {
|
274
|
+
return Err(magnus::Error::new(
|
275
|
+
exception::runtime_error(),
|
276
|
+
format!("Unable to change {attr_name:?}"),
|
277
|
+
));
|
278
|
+
}
|
279
|
+
}
|
272
280
|
}
|
273
281
|
} else if !unescaped_attr_val.is_empty() {
|
274
282
|
let mut buf = String::new();
|
@@ -287,14 +295,16 @@ impl SelmaSanitizer {
|
|
287
295
|
|
288
296
|
let required = &element_sanitizer.required_attrs;
|
289
297
|
if required.contains(&"*".to_string()) {
|
290
|
-
return;
|
298
|
+
return Ok(());
|
291
299
|
}
|
292
300
|
for attr in element.attributes().iter() {
|
293
301
|
let attr_name = &attr.name();
|
294
302
|
if required.contains(attr_name) {
|
295
|
-
return;
|
303
|
+
return Ok(());
|
296
304
|
}
|
297
305
|
}
|
306
|
+
|
307
|
+
Ok(())
|
298
308
|
}
|
299
309
|
|
300
310
|
fn should_keep_attribute(
|
@@ -423,7 +433,7 @@ impl SelmaSanitizer {
|
|
423
433
|
Ok(_) => Ok(true),
|
424
434
|
Err(err) => Err(Error::new(
|
425
435
|
exception::runtime_error(),
|
426
|
-
format!("AttributeNameError: {}"
|
436
|
+
format!("AttributeNameError: {err:?}"),
|
427
437
|
)),
|
428
438
|
}
|
429
439
|
}
|
data/ext/selma/src/selector.rs
CHANGED
@@ -27,7 +27,7 @@ impl SelmaSelector {
|
|
27
27
|
if css.parse::<lol_html::Selector>().is_err() {
|
28
28
|
return Err(Error::new(
|
29
29
|
exception::arg_error(),
|
30
|
-
format!("Could not parse `match_element` (`{}`) as valid CSS"
|
30
|
+
format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
|
31
31
|
));
|
32
32
|
}
|
33
33
|
}
|
@@ -37,10 +37,7 @@ impl SelmaSelector {
|
|
37
37
|
if css.parse::<lol_html::Selector>().is_err() {
|
38
38
|
return Err(Error::new(
|
39
39
|
exception::arg_error(),
|
40
|
-
format!(
|
41
|
-
"Could not parse `match_text_within` (`{}`) as valid CSS",
|
42
|
-
css
|
43
|
-
),
|
40
|
+
format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
|
44
41
|
));
|
45
42
|
}
|
46
43
|
}
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|