selma 0.4.14 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +145 -179
- data/ext/selma/Cargo.toml +1 -1
- data/ext/selma/src/html/element.rs +23 -15
- data/ext/selma/src/html/end_tag.rs +3 -2
- data/ext/selma/src/html/text_chunk.rs +13 -11
- data/ext/selma/src/html.rs +3 -6
- data/ext/selma/src/lib.rs +7 -4
- data/ext/selma/src/native_ref_wrap.rs +1 -1
- data/ext/selma/src/rewriter.rs +42 -40
- data/ext/selma/src/sanitizer.rs +20 -20
- data/ext/selma/src/selector.rs +9 -9
- data/lib/selma/version.rb +1 -1
- metadata +3 -3
|
@@ -2,7 +2,7 @@ use std::cell::RefCell;
|
|
|
2
2
|
|
|
3
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
|
4
4
|
use lol_html::html_content::Element;
|
|
5
|
-
use magnus::{
|
|
5
|
+
use magnus::{method, Error, Module, RArray, RClass, RHash, Ruby, Value};
|
|
6
6
|
|
|
7
7
|
struct HTMLElement {
|
|
8
8
|
element: NativeRefWrap<Element<'static, 'static>>,
|
|
@@ -29,7 +29,7 @@ impl SelmaHTMLElement {
|
|
|
29
29
|
match binding.element.get() {
|
|
30
30
|
Ok(e) => Ok(e.tag_name().to_string()),
|
|
31
31
|
Err(_) => Err(Error::new(
|
|
32
|
-
|
|
32
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
33
33
|
"`tag_name` is not available",
|
|
34
34
|
)),
|
|
35
35
|
}
|
|
@@ -37,15 +37,19 @@ impl SelmaHTMLElement {
|
|
|
37
37
|
|
|
38
38
|
fn set_tag_name(&self, name: String) -> Result<(), Error> {
|
|
39
39
|
let mut binding = self.0.borrow_mut();
|
|
40
|
+
let ruby = Ruby::get().unwrap();
|
|
40
41
|
|
|
41
42
|
if let Ok(element) = binding.element.get_mut() {
|
|
42
43
|
match element.set_tag_name(&name) {
|
|
43
44
|
Ok(_) => Ok(()),
|
|
44
|
-
Err(err) => Err(Error::new(
|
|
45
|
+
Err(err) => Err(Error::new(
|
|
46
|
+
ruby.exception_runtime_error(),
|
|
47
|
+
format!("{err:?}"),
|
|
48
|
+
)),
|
|
45
49
|
}
|
|
46
50
|
} else {
|
|
47
51
|
Err(Error::new(
|
|
48
|
-
|
|
52
|
+
ruby.exception_runtime_error(),
|
|
49
53
|
"`set_tag_name` is not available",
|
|
50
54
|
))
|
|
51
55
|
}
|
|
@@ -58,7 +62,7 @@ impl SelmaHTMLElement {
|
|
|
58
62
|
Ok(e.is_self_closing())
|
|
59
63
|
} else {
|
|
60
64
|
Err(Error::new(
|
|
61
|
-
|
|
65
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
62
66
|
"`is_self_closing` is not available",
|
|
63
67
|
))
|
|
64
68
|
}
|
|
@@ -71,7 +75,7 @@ impl SelmaHTMLElement {
|
|
|
71
75
|
Ok(e.has_attribute(&attr))
|
|
72
76
|
} else {
|
|
73
77
|
Err(Error::new(
|
|
74
|
-
|
|
78
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
75
79
|
"`is_self_closing` is not available",
|
|
76
80
|
))
|
|
77
81
|
}
|
|
@@ -85,17 +89,18 @@ impl SelmaHTMLElement {
|
|
|
85
89
|
|
|
86
90
|
fn set_attribute(&self, attr: String, value: String) -> Result<String, Error> {
|
|
87
91
|
let mut binding = self.0.borrow_mut();
|
|
92
|
+
let ruby = Ruby::get().unwrap();
|
|
88
93
|
if let Ok(element) = binding.element.get_mut() {
|
|
89
94
|
match element.set_attribute(&attr, &value) {
|
|
90
95
|
Ok(_) => Ok(value),
|
|
91
96
|
Err(err) => Err(Error::new(
|
|
92
|
-
|
|
97
|
+
ruby.exception_runtime_error(),
|
|
93
98
|
format!("AttributeNameError: {err:?}"),
|
|
94
99
|
)),
|
|
95
100
|
}
|
|
96
101
|
} else {
|
|
97
102
|
Err(Error::new(
|
|
98
|
-
|
|
103
|
+
ruby.exception_runtime_error(),
|
|
99
104
|
"`tag_name` is not available",
|
|
100
105
|
))
|
|
101
106
|
}
|
|
@@ -111,7 +116,8 @@ impl SelmaHTMLElement {
|
|
|
111
116
|
|
|
112
117
|
fn get_attributes(&self) -> Result<RHash, Error> {
|
|
113
118
|
let binding = self.0.borrow();
|
|
114
|
-
let
|
|
119
|
+
let ruby = Ruby::get().unwrap();
|
|
120
|
+
let hash = ruby.hash_new();
|
|
115
121
|
|
|
116
122
|
if let Ok(e) = binding.element.get() {
|
|
117
123
|
e.attributes()
|
|
@@ -121,7 +127,7 @@ impl SelmaHTMLElement {
|
|
|
121
127
|
Err(err) => panic!(
|
|
122
128
|
"{:?}",
|
|
123
129
|
Error::new(
|
|
124
|
-
|
|
130
|
+
ruby.exception_runtime_error(),
|
|
125
131
|
format!("AttributeNameError: {err:?}"),
|
|
126
132
|
)
|
|
127
133
|
),
|
|
@@ -132,17 +138,18 @@ impl SelmaHTMLElement {
|
|
|
132
138
|
|
|
133
139
|
fn get_ancestors(&self) -> Result<RArray, Error> {
|
|
134
140
|
let binding = self.0.borrow();
|
|
135
|
-
let
|
|
141
|
+
let ruby = Ruby::get().unwrap();
|
|
142
|
+
let array = ruby.ary_new();
|
|
136
143
|
|
|
137
144
|
binding
|
|
138
145
|
.ancestors
|
|
139
146
|
.iter()
|
|
140
|
-
.for_each(|ancestor| match array.push(
|
|
147
|
+
.for_each(|ancestor| match array.push(ruby.str_new(ancestor)) {
|
|
141
148
|
Ok(_) => {}
|
|
142
149
|
Err(err) => {
|
|
143
150
|
panic!(
|
|
144
151
|
"{:?}",
|
|
145
|
-
Error::new(
|
|
152
|
+
Error::new(ruby.exception_runtime_error(), format!("{err:?}"))
|
|
146
153
|
)
|
|
147
154
|
}
|
|
148
155
|
});
|
|
@@ -244,7 +251,7 @@ impl SelmaHTMLElement {
|
|
|
244
251
|
match binding.element.get() {
|
|
245
252
|
Ok(e) => Ok(e.removed()),
|
|
246
253
|
Err(_) => Err(Error::new(
|
|
247
|
-
|
|
254
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
248
255
|
"`is_removed` is not available",
|
|
249
256
|
)),
|
|
250
257
|
}
|
|
@@ -252,8 +259,9 @@ impl SelmaHTMLElement {
|
|
|
252
259
|
}
|
|
253
260
|
|
|
254
261
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
262
|
+
let ruby = Ruby::get().unwrap();
|
|
255
263
|
let c_element = c_html
|
|
256
|
-
.define_class("Element",
|
|
264
|
+
.define_class("Element", ruby.class_object())
|
|
257
265
|
.expect("cannot define class Selma::HTML::Element");
|
|
258
266
|
|
|
259
267
|
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
|
@@ -2,7 +2,7 @@ use std::cell::RefCell;
|
|
|
2
2
|
|
|
3
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
|
4
4
|
use lol_html::html_content::EndTag;
|
|
5
|
-
use magnus::{method, Error, Module, RClass};
|
|
5
|
+
use magnus::{method, Error, Module, RClass, Ruby};
|
|
6
6
|
|
|
7
7
|
struct HTMLEndTag {
|
|
8
8
|
end_tag: NativeRefWrap<EndTag<'static>>,
|
|
@@ -25,8 +25,9 @@ impl SelmaHTMLEndTag {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
28
|
+
let ruby = Ruby::get().unwrap();
|
|
28
29
|
let c_end_tag = c_html
|
|
29
|
-
.define_class("EndTag",
|
|
30
|
+
.define_class("EndTag", ruby.class_object())
|
|
30
31
|
.expect("cannot define class Selma::HTML::EndTag");
|
|
31
32
|
|
|
32
33
|
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
|
@@ -2,7 +2,7 @@ use std::cell::RefCell;
|
|
|
2
2
|
|
|
3
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
|
4
4
|
use lol_html::html_content::{TextChunk, TextType};
|
|
5
|
-
use magnus::{
|
|
5
|
+
use magnus::{method, Error, Module, RClass, Ruby, Symbol, Value};
|
|
6
6
|
|
|
7
7
|
struct HTMLTextChunk {
|
|
8
8
|
text_chunk: NativeRefWrap<TextChunk<'static>>,
|
|
@@ -49,7 +49,7 @@ impl SelmaHTMLTextChunk {
|
|
|
49
49
|
Ok(tc.as_str().to_string())
|
|
50
50
|
} else {
|
|
51
51
|
Err(Error::new(
|
|
52
|
-
|
|
52
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
53
53
|
"`to_s` is not available",
|
|
54
54
|
))
|
|
55
55
|
}
|
|
@@ -57,19 +57,20 @@ impl SelmaHTMLTextChunk {
|
|
|
57
57
|
|
|
58
58
|
fn text_type(&self) -> Result<Symbol, Error> {
|
|
59
59
|
let binding = self.0.borrow();
|
|
60
|
+
let ruby = Ruby::get().unwrap();
|
|
60
61
|
|
|
61
62
|
if let Ok(tc) = binding.text_chunk.get() {
|
|
62
63
|
match tc.text_type() {
|
|
63
|
-
TextType::Data => Ok(
|
|
64
|
-
TextType::PlainText => Ok(
|
|
65
|
-
TextType::RawText => Ok(
|
|
66
|
-
TextType::ScriptData => Ok(
|
|
67
|
-
TextType::RCData => Ok(
|
|
68
|
-
TextType::CDataSection => Ok(
|
|
64
|
+
TextType::Data => Ok(ruby.to_symbol("data")),
|
|
65
|
+
TextType::PlainText => Ok(ruby.to_symbol("plain_text")),
|
|
66
|
+
TextType::RawText => Ok(ruby.to_symbol("raw_text")),
|
|
67
|
+
TextType::ScriptData => Ok(ruby.to_symbol("script")),
|
|
68
|
+
TextType::RCData => Ok(ruby.to_symbol("rc_data")),
|
|
69
|
+
TextType::CDataSection => Ok(ruby.to_symbol("cdata_section")),
|
|
69
70
|
}
|
|
70
71
|
} else {
|
|
71
72
|
Err(Error::new(
|
|
72
|
-
|
|
73
|
+
ruby.exception_runtime_error(),
|
|
73
74
|
"`text_type` is not available",
|
|
74
75
|
))
|
|
75
76
|
}
|
|
@@ -81,7 +82,7 @@ impl SelmaHTMLTextChunk {
|
|
|
81
82
|
match binding.text_chunk.get() {
|
|
82
83
|
Ok(tc) => Ok(tc.removed()),
|
|
83
84
|
Err(_) => Err(Error::new(
|
|
84
|
-
|
|
85
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
85
86
|
"`is_removed` is not available",
|
|
86
87
|
)),
|
|
87
88
|
}
|
|
@@ -140,8 +141,9 @@ impl SelmaHTMLTextChunk {
|
|
|
140
141
|
}
|
|
141
142
|
|
|
142
143
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
144
|
+
let ruby = Ruby::get().unwrap();
|
|
143
145
|
let c_text_chunk = c_html
|
|
144
|
-
.define_class("TextChunk",
|
|
146
|
+
.define_class("TextChunk", ruby.class_object())
|
|
145
147
|
.expect("cannot define class Selma::HTML::TextChunk");
|
|
146
148
|
|
|
147
149
|
c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
data/ext/selma/src/html.rs
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
use magnus::{Error, Module, RModule};
|
|
2
|
-
|
|
3
|
-
#[derive(Clone, Debug)]
|
|
4
|
-
#[magnus::wrap(class = "Selma::HTML")]
|
|
5
|
-
pub(crate) struct SelmaHTML {}
|
|
1
|
+
use magnus::{Error, Module, RModule, Ruby};
|
|
6
2
|
|
|
7
3
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
|
4
|
+
let ruby = Ruby::get().unwrap();
|
|
8
5
|
let c_html = m_selma
|
|
9
|
-
.define_class("HTML",
|
|
6
|
+
.define_class("HTML", ruby.class_object())
|
|
10
7
|
.expect("cannot define class Selma::HTML");
|
|
11
8
|
|
|
12
9
|
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
data/ext/selma/src/lib.rs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
extern crate core;
|
|
2
2
|
|
|
3
3
|
use lol_html::html_content::ContentType;
|
|
4
|
-
use magnus::{
|
|
4
|
+
use magnus::{scan_args, Error, Ruby, Symbol, Value};
|
|
5
5
|
|
|
6
6
|
pub mod html;
|
|
7
7
|
pub mod native_ref_wrap;
|
|
@@ -27,8 +27,9 @@ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error
|
|
|
27
27
|
} else if as_sym_str == "html" {
|
|
28
28
|
ContentType::Html
|
|
29
29
|
} else {
|
|
30
|
+
let ruby = Ruby::get().unwrap();
|
|
30
31
|
return Err(Error::new(
|
|
31
|
-
|
|
32
|
+
ruby.exception_runtime_error(),
|
|
32
33
|
format!("unknown symbol `{as_sym_str:?}`"),
|
|
33
34
|
));
|
|
34
35
|
};
|
|
@@ -37,8 +38,10 @@ fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error
|
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
#[magnus::init]
|
|
40
|
-
fn init() -> Result<(), Error> {
|
|
41
|
-
let m_selma =
|
|
41
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
42
|
+
let m_selma = ruby
|
|
43
|
+
.define_module("Selma")
|
|
44
|
+
.expect("cannot define ::Selma module");
|
|
42
45
|
|
|
43
46
|
sanitizer::init(m_selma).expect("cannot define Selma::Sanitizer class");
|
|
44
47
|
rewriter::init(m_selma).expect("cannot define Selma::Rewriter class");
|
|
@@ -39,7 +39,7 @@ pub struct NativeRefWrap<R> {
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
impl<R> NativeRefWrap<R> {
|
|
42
|
-
pub fn wrap<I>(inner: &mut I) -> (Self, Anchor) {
|
|
42
|
+
pub fn wrap<I>(inner: &mut I) -> (Self, Anchor<'_>) {
|
|
43
43
|
let wrap = NativeRefWrap {
|
|
44
44
|
inner_ptr: inner as *mut I as *mut R,
|
|
45
45
|
poisoned: Arc::new(Mutex::new(false)),
|
data/ext/selma/src/rewriter.rs
CHANGED
|
@@ -5,7 +5,7 @@ use lol_html::{
|
|
|
5
5
|
Settings,
|
|
6
6
|
};
|
|
7
7
|
use magnus::{
|
|
8
|
-
|
|
8
|
+
function, gc, method,
|
|
9
9
|
r_hash::ForEach,
|
|
10
10
|
scan_args,
|
|
11
11
|
typed_data::Obj,
|
|
@@ -84,12 +84,13 @@ impl SelmaRewriter {
|
|
|
84
84
|
/// @return [Selma::Rewriter]
|
|
85
85
|
fn new(args: &[Value]) -> Result<Self, magnus::Error> {
|
|
86
86
|
let (rb_sanitizer, rb_handlers, rb_options) = Self::scan_parse_args(args)?;
|
|
87
|
+
let ruby = Ruby::get().unwrap();
|
|
87
88
|
|
|
88
89
|
let sanitizer = match rb_sanitizer {
|
|
89
90
|
None => {
|
|
90
91
|
// no `sanitizer:` kwarg provided, use default
|
|
91
92
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
|
92
|
-
let wrapped_sanitizer =
|
|
93
|
+
let wrapped_sanitizer = ruby.obj_wrap(default_sanitizer);
|
|
93
94
|
// wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
|
94
95
|
Some(wrapped_sanitizer.deref().to_owned())
|
|
95
96
|
}
|
|
@@ -106,7 +107,7 @@ impl SelmaRewriter {
|
|
|
106
107
|
if !rb_handler.respond_to("selector", true).unwrap() {
|
|
107
108
|
let classname = unsafe { rb_handler.classname() };
|
|
108
109
|
return Err(magnus::Error::new(
|
|
109
|
-
|
|
110
|
+
ruby.exception_no_method_error(),
|
|
110
111
|
format!(
|
|
111
112
|
"Could not call #selector on {classname:?}; is this an object that defines it?",
|
|
112
113
|
|
|
@@ -117,7 +118,7 @@ impl SelmaRewriter {
|
|
|
117
118
|
let rb_selector: Obj<SelmaSelector> = match rb_handler.funcall("selector", ()) {
|
|
118
119
|
Err(err) => {
|
|
119
120
|
return Err(magnus::Error::new(
|
|
120
|
-
|
|
121
|
+
ruby.exception_type_error(),
|
|
121
122
|
format!("Error instantiating selector: {err:?}"),
|
|
122
123
|
));
|
|
123
124
|
}
|
|
@@ -140,7 +141,7 @@ impl SelmaRewriter {
|
|
|
140
141
|
|
|
141
142
|
if sanitizer.is_none() && handlers.is_empty() {
|
|
142
143
|
return Err(magnus::Error::new(
|
|
143
|
-
|
|
144
|
+
ruby.exception_arg_error(),
|
|
144
145
|
"Must provide a sanitizer or a handler",
|
|
145
146
|
));
|
|
146
147
|
}
|
|
@@ -151,23 +152,22 @@ impl SelmaRewriter {
|
|
|
151
152
|
None => {}
|
|
152
153
|
Some(options) => {
|
|
153
154
|
options.foreach(|key: Symbol, value: RHash| {
|
|
155
|
+
let ruby = Ruby::get().unwrap();
|
|
154
156
|
let key = key.to_string();
|
|
155
157
|
match key.as_str() {
|
|
156
158
|
"memory" => {
|
|
157
|
-
let max_allowed_memory_usage = value.get(
|
|
158
|
-
if max_allowed_memory_usage.is_some() {
|
|
159
|
-
let max_allowed_memory_usage = max_allowed_memory_usage.unwrap();
|
|
159
|
+
if let Some(max_allowed_memory_usage) = value.get(ruby.to_symbol("max_allowed_memory_usage")) {
|
|
160
160
|
let max_allowed_memory_usage =
|
|
161
161
|
Integer::from_value(max_allowed_memory_usage);
|
|
162
|
-
if max_allowed_memory_usage
|
|
163
|
-
match max_allowed_memory_usage.
|
|
162
|
+
if let Some(max_allowed_memory_usage) = max_allowed_memory_usage {
|
|
163
|
+
match max_allowed_memory_usage.to_u64() {
|
|
164
164
|
Ok(max_allowed_memory_usage) => {
|
|
165
165
|
rewriter_options.memory_options.max_allowed_memory_usage =
|
|
166
166
|
max_allowed_memory_usage as usize;
|
|
167
167
|
}
|
|
168
168
|
Err(_e) => {
|
|
169
169
|
return Err(magnus::Error::new(
|
|
170
|
-
|
|
170
|
+
ruby.exception_arg_error(),
|
|
171
171
|
"max_allowed_memory_usage must be a positive integer",
|
|
172
172
|
));
|
|
173
173
|
}
|
|
@@ -177,20 +177,18 @@ impl SelmaRewriter {
|
|
|
177
177
|
}
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
-
let preallocated_parsing_buffer_size = value.get(
|
|
181
|
-
if preallocated_parsing_buffer_size.is_some() {
|
|
182
|
-
let preallocated_parsing_buffer_size = preallocated_parsing_buffer_size.unwrap();
|
|
180
|
+
if let Some(preallocated_parsing_buffer_size) = value.get(ruby.to_symbol("preallocated_parsing_buffer_size")) {
|
|
183
181
|
let preallocated_parsing_buffer_size =
|
|
184
182
|
Integer::from_value(preallocated_parsing_buffer_size);
|
|
185
|
-
if preallocated_parsing_buffer_size
|
|
186
|
-
match preallocated_parsing_buffer_size.
|
|
183
|
+
if let Some(preallocated_parsing_buffer_size) = preallocated_parsing_buffer_size {
|
|
184
|
+
match preallocated_parsing_buffer_size.to_u64() {
|
|
187
185
|
Ok(preallocated_parsing_buffer_size) => {
|
|
188
186
|
rewriter_options.memory_options.preallocated_parsing_buffer_size =
|
|
189
187
|
preallocated_parsing_buffer_size as usize;
|
|
190
188
|
}
|
|
191
189
|
Err(_e) => {
|
|
192
190
|
return Err(magnus::Error::new(
|
|
193
|
-
|
|
191
|
+
ruby.exception_arg_error(),
|
|
194
192
|
"preallocated_parsing_buffer_size must be a positive integer",
|
|
195
193
|
));
|
|
196
194
|
}
|
|
@@ -202,7 +200,7 @@ impl SelmaRewriter {
|
|
|
202
200
|
}
|
|
203
201
|
_ => {
|
|
204
202
|
return Err(magnus::Error::new(
|
|
205
|
-
|
|
203
|
+
ruby.exception_arg_error(),
|
|
206
204
|
format!("Unknown option: {key:?}"),
|
|
207
205
|
));
|
|
208
206
|
}
|
|
@@ -218,7 +216,7 @@ impl SelmaRewriter {
|
|
|
218
216
|
> rewriter_options.memory_options.max_allowed_memory_usage
|
|
219
217
|
{
|
|
220
218
|
return Err(magnus::Error::new(
|
|
221
|
-
|
|
219
|
+
ruby.exception_arg_error(),
|
|
222
220
|
"max_allowed_memory_usage must be greater than preallocated_parsing_buffer_size",
|
|
223
221
|
));
|
|
224
222
|
}
|
|
@@ -305,7 +303,7 @@ impl SelmaRewriter {
|
|
|
305
303
|
None => match String::from_utf8(rewritten_html) {
|
|
306
304
|
Ok(output) => Ok(output),
|
|
307
305
|
Err(err) => Err(magnus::Error::new(
|
|
308
|
-
|
|
306
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
309
307
|
format!("{err:?}"),
|
|
310
308
|
)),
|
|
311
309
|
},
|
|
@@ -342,7 +340,7 @@ impl SelmaRewriter {
|
|
|
342
340
|
Ok(rewritten_html) => match String::from_utf8(rewritten_html) {
|
|
343
341
|
Ok(output) => Ok(output),
|
|
344
342
|
Err(err) => Err(magnus::Error::new(
|
|
345
|
-
|
|
343
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
346
344
|
format!("{err:?}"),
|
|
347
345
|
)),
|
|
348
346
|
},
|
|
@@ -430,14 +428,12 @@ impl SelmaRewriter {
|
|
|
430
428
|
|
|
431
429
|
let closure_element_stack = element_stack.clone();
|
|
432
430
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
)));
|
|
440
|
-
}
|
|
431
|
+
let handler: lol_html::EndTagHandler<'static> = Box::new(move |_end_tag| {
|
|
432
|
+
closure_element_stack.as_ref().borrow_mut().pop();
|
|
433
|
+
Ok(())
|
|
434
|
+
});
|
|
435
|
+
// ignore void elements (lol_html's void list may differ from selma's `self_closing`)
|
|
436
|
+
let _ = el.on_end_tag(handler);
|
|
441
437
|
|
|
442
438
|
Ok(())
|
|
443
439
|
}));
|
|
@@ -473,7 +469,7 @@ impl SelmaRewriter {
|
|
|
473
469
|
Ok(_) => {}
|
|
474
470
|
Err(err) => {
|
|
475
471
|
return Err(magnus::Error::new(
|
|
476
|
-
|
|
472
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
477
473
|
format!("{err:?}"),
|
|
478
474
|
));
|
|
479
475
|
}
|
|
@@ -487,15 +483,13 @@ impl SelmaRewriter {
|
|
|
487
483
|
element: &mut Element,
|
|
488
484
|
ancestors: &[String],
|
|
489
485
|
) -> Result<(), magnus::Error> {
|
|
490
|
-
let
|
|
486
|
+
let ruby = Ruby::get().unwrap();
|
|
487
|
+
let rb_handler = handler.rb_handler.into_value_with(&ruby);
|
|
491
488
|
|
|
492
489
|
// if `on_end_tag` function is defined, call it
|
|
493
490
|
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
|
494
|
-
// TODO: error here is an "EndTagError"
|
|
495
491
|
element
|
|
496
|
-
.
|
|
497
|
-
.unwrap()
|
|
498
|
-
.push(Box::new(move |end_tag| {
|
|
492
|
+
.on_end_tag(Box::new(move |end_tag| {
|
|
499
493
|
let (ref_wrap, anchor) = NativeRefWrap::wrap(end_tag);
|
|
500
494
|
|
|
501
495
|
let rb_end_tag = SelmaHTMLEndTag::new(ref_wrap);
|
|
@@ -509,7 +503,13 @@ impl SelmaRewriter {
|
|
|
509
503
|
Ok(_) => Ok(()),
|
|
510
504
|
Err(err) => Err(err.to_string().into()),
|
|
511
505
|
}
|
|
512
|
-
}))
|
|
506
|
+
}))
|
|
507
|
+
.map_err(|err| {
|
|
508
|
+
magnus::Error::new(
|
|
509
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
|
510
|
+
err.to_string(),
|
|
511
|
+
)
|
|
512
|
+
})?;
|
|
513
513
|
}
|
|
514
514
|
|
|
515
515
|
let (ref_wrap, anchor) = NativeRefWrap::wrap(element);
|
|
@@ -521,7 +521,7 @@ impl SelmaRewriter {
|
|
|
521
521
|
match result {
|
|
522
522
|
Ok(_) => Ok(()),
|
|
523
523
|
Err(err) => Err(magnus::Error::new(
|
|
524
|
-
|
|
524
|
+
ruby.exception_runtime_error(),
|
|
525
525
|
format!("{err:?}"),
|
|
526
526
|
)),
|
|
527
527
|
}
|
|
@@ -531,7 +531,8 @@ impl SelmaRewriter {
|
|
|
531
531
|
handler: &Handler,
|
|
532
532
|
text_chunk: &mut TextChunk,
|
|
533
533
|
) -> Result<(), magnus::Error> {
|
|
534
|
-
let
|
|
534
|
+
let ruby = Ruby::get().unwrap();
|
|
535
|
+
let rb_handler = handler.rb_handler.into_value_with(&ruby);
|
|
535
536
|
|
|
536
537
|
// prevents missing `handle_text_chunk` function
|
|
537
538
|
let content = text_chunk.as_str();
|
|
@@ -553,7 +554,7 @@ impl SelmaRewriter {
|
|
|
553
554
|
match result {
|
|
554
555
|
Ok(_) => Ok(()),
|
|
555
556
|
Err(err) => Err(magnus::Error::new(
|
|
556
|
-
|
|
557
|
+
ruby.exception_runtime_error(),
|
|
557
558
|
format!("{err:?}"),
|
|
558
559
|
)),
|
|
559
560
|
}
|
|
@@ -577,8 +578,9 @@ impl RewriterOptions {
|
|
|
577
578
|
}
|
|
578
579
|
|
|
579
580
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
|
581
|
+
let ruby = Ruby::get().unwrap();
|
|
580
582
|
let c_rewriter = m_selma
|
|
581
|
-
.define_class("Rewriter",
|
|
583
|
+
.define_class("Rewriter", ruby.class_object())
|
|
582
584
|
.expect("cannot define class Selma::Rewriter");
|
|
583
585
|
|
|
584
586
|
c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
|
data/ext/selma/src/sanitizer.rs
CHANGED
|
@@ -5,7 +5,7 @@ use lol_html::{
|
|
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
|
6
6
|
};
|
|
7
7
|
use magnus::{
|
|
8
|
-
|
|
8
|
+
eval, function, method,
|
|
9
9
|
r_hash::ForEach,
|
|
10
10
|
scan_args,
|
|
11
11
|
value::{Opaque, ReprValue},
|
|
@@ -125,22 +125,22 @@ impl SelmaSanitizer {
|
|
|
125
125
|
allowed_protocols.foreach(|element_name: String, protocols: RHash| {
|
|
126
126
|
protocols.foreach(|attribute_name: String, protocol_list: Value| {
|
|
127
127
|
let protocols: RArray;
|
|
128
|
-
if protocol_list.is_kind_of(
|
|
128
|
+
if protocol_list.is_kind_of(ruby.class_array()) {
|
|
129
129
|
protocols = RArray::from_value(protocol_list).unwrap();
|
|
130
130
|
if protocols.includes(ruby.to_symbol("all")) {
|
|
131
131
|
return Err(magnus::Error::new(
|
|
132
|
-
|
|
132
|
+
ruby.exception_arg_error(),
|
|
133
133
|
"`:all` must be passed outside of an array".to_string(),
|
|
134
134
|
));
|
|
135
135
|
}
|
|
136
|
-
} else if protocol_list.is_kind_of(
|
|
136
|
+
} else if protocol_list.is_kind_of(ruby.class_symbol())
|
|
137
137
|
&& Symbol::from_value(protocol_list) == eval(":all").unwrap()
|
|
138
138
|
{
|
|
139
|
-
protocols =
|
|
139
|
+
protocols = ruby.ary_new();
|
|
140
140
|
protocols.push(ruby.to_symbol("all"))?;
|
|
141
141
|
} else {
|
|
142
142
|
return Err(magnus::Error::new(
|
|
143
|
-
|
|
143
|
+
ruby.exception_arg_error(),
|
|
144
144
|
"Protocol list must be an array, or just `:all`".to_string(),
|
|
145
145
|
));
|
|
146
146
|
}
|
|
@@ -220,15 +220,15 @@ impl SelmaSanitizer {
|
|
|
220
220
|
// end
|
|
221
221
|
// end
|
|
222
222
|
if let Some(remove_contents) = config.get(ruby.to_symbol("remove_contents")) {
|
|
223
|
-
if remove_contents.is_kind_of(
|
|
224
|
-
|| remove_contents.is_kind_of(
|
|
223
|
+
if remove_contents.is_kind_of(ruby.class_true_class())
|
|
224
|
+
|| remove_contents.is_kind_of(ruby.class_false_class())
|
|
225
225
|
{
|
|
226
226
|
Self::set_all_flags(
|
|
227
227
|
flags,
|
|
228
228
|
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
|
229
229
|
remove_contents.to_bool(),
|
|
230
230
|
);
|
|
231
|
-
} else if remove_contents.is_kind_of(
|
|
231
|
+
} else if remove_contents.is_kind_of(ruby.class_array()) {
|
|
232
232
|
let elements = RArray::from_value(remove_contents).unwrap();
|
|
233
233
|
elements
|
|
234
234
|
.into_iter()
|
|
@@ -245,7 +245,7 @@ impl SelmaSanitizer {
|
|
|
245
245
|
});
|
|
246
246
|
} else {
|
|
247
247
|
return Err(magnus::Error::new(
|
|
248
|
-
|
|
248
|
+
ruby.exception_arg_error(),
|
|
249
249
|
"remove_contents must be `true`, `false`, or an array".to_string(),
|
|
250
250
|
));
|
|
251
251
|
}
|
|
@@ -354,11 +354,12 @@ impl SelmaSanitizer {
|
|
|
354
354
|
attr_name: String,
|
|
355
355
|
allow_list: RArray,
|
|
356
356
|
) {
|
|
357
|
+
let ruby = Ruby::get().unwrap();
|
|
357
358
|
let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
|
|
358
359
|
|
|
359
360
|
for allowed_protocol in allow_list.into_iter() {
|
|
360
361
|
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
|
361
|
-
if allowed_protocol.is_kind_of(
|
|
362
|
+
if allowed_protocol.is_kind_of(ruby.class_string()) {
|
|
362
363
|
match protocol_list {
|
|
363
364
|
None => {
|
|
364
365
|
protocol_sanitizers
|
|
@@ -366,7 +367,7 @@ impl SelmaSanitizer {
|
|
|
366
367
|
}
|
|
367
368
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
|
368
369
|
}
|
|
369
|
-
} else if allowed_protocol.is_kind_of(
|
|
370
|
+
} else if allowed_protocol.is_kind_of(ruby.class_symbol()) {
|
|
370
371
|
let protocol_config = allowed_protocol.inspect();
|
|
371
372
|
if protocol_config == ":relative" {
|
|
372
373
|
match protocol_list {
|
|
@@ -685,13 +686,11 @@ impl SelmaSanitizer {
|
|
|
685
686
|
|
|
686
687
|
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
|
687
688
|
if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
Ok(())
|
|
694
|
-
}));
|
|
689
|
+
// ignore void elements (lol_html's void list may differ from selma's `self_closing`)
|
|
690
|
+
let _ = element.on_end_tag(Box::new(move |end| {
|
|
691
|
+
Self::remove_end_tag(end);
|
|
692
|
+
Ok(())
|
|
693
|
+
}));
|
|
695
694
|
}
|
|
696
695
|
}
|
|
697
696
|
|
|
@@ -710,8 +709,9 @@ impl SelmaSanitizer {
|
|
|
710
709
|
}
|
|
711
710
|
|
|
712
711
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
|
712
|
+
let ruby = Ruby::get().unwrap();
|
|
713
713
|
let c_sanitizer = m_selma
|
|
714
|
-
.define_class("Sanitizer",
|
|
714
|
+
.define_class("Sanitizer", ruby.class_object())
|
|
715
715
|
.expect("cannot define class Selma::Sanitizer");
|
|
716
716
|
|
|
717
717
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|