selma 0.0.7-x64-mingw-ucrt → 0.1.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -46
- data/ext/selma/Cargo.toml +0 -14
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/extconf.rb +0 -6
- data/ext/selma/src/html/element.rs +0 -254
- data/ext/selma/src/html/end_tag.rs +0 -35
- data/ext/selma/src/html/text_chunk.rs +0 -113
- data/ext/selma/src/html.rs +0 -19
- data/ext/selma/src/lib.rs +0 -50
- data/ext/selma/src/native_ref_wrap.rs +0 -79
- data/ext/selma/src/rewriter.rs +0 -429
- data/ext/selma/src/sanitizer.rs +0 -607
- data/ext/selma/src/selector.rs +0 -112
- data/ext/selma/src/tags.rs +0 -1136
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
data/ext/selma/src/lib.rs
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
extern crate core;
|
2
|
-
|
3
|
-
use lol_html::html_content::ContentType;
|
4
|
-
use magnus::{define_module, exception, scan_args, Error, Symbol, Value};
|
5
|
-
|
6
|
-
pub mod html;
|
7
|
-
pub mod native_ref_wrap;
|
8
|
-
pub mod rewriter;
|
9
|
-
pub mod sanitizer;
|
10
|
-
pub mod selector;
|
11
|
-
pub mod tags;
|
12
|
-
pub mod wrapped_struct;
|
13
|
-
|
14
|
-
#[allow(clippy::let_unit_value)]
|
15
|
-
fn scan_text_args(args: &[Value]) -> Result<(String, ContentType), magnus::Error> {
|
16
|
-
let args = scan_args::scan_args(args)?;
|
17
|
-
let (text,): (String,) = args.required;
|
18
|
-
let _: () = args.optional;
|
19
|
-
let _: () = args.splat;
|
20
|
-
let _: () = args.trailing;
|
21
|
-
let _: () = args.block;
|
22
|
-
|
23
|
-
let kwargs = scan_args::get_kwargs::<_, (Symbol,), (), ()>(args.keywords, &["as"], &[])?;
|
24
|
-
let as_sym = kwargs.required.0;
|
25
|
-
let as_sym_str = as_sym.name().unwrap();
|
26
|
-
let content_type = if as_sym_str == "text" {
|
27
|
-
ContentType::Text
|
28
|
-
} else if as_sym_str == "html" {
|
29
|
-
ContentType::Html
|
30
|
-
} else {
|
31
|
-
return Err(Error::new(
|
32
|
-
exception::runtime_error(),
|
33
|
-
format!("unknown symbol `{as_sym_str:?}`"),
|
34
|
-
));
|
35
|
-
};
|
36
|
-
|
37
|
-
Ok((text, content_type))
|
38
|
-
}
|
39
|
-
|
40
|
-
#[magnus::init]
|
41
|
-
fn init() -> Result<(), Error> {
|
42
|
-
let m_selma = define_module("Selma").expect("cannot define ::Selma module");
|
43
|
-
|
44
|
-
sanitizer::init(m_selma).expect("cannot define Selma::Sanitizer class");
|
45
|
-
rewriter::init(m_selma).expect("cannot define Selma::Rewriter class");
|
46
|
-
html::init(m_selma).expect("cannot define Selma::HTML class");
|
47
|
-
selector::init(m_selma).expect("cannot define Selma::Selector class");
|
48
|
-
|
49
|
-
Ok(())
|
50
|
-
}
|
@@ -1,79 +0,0 @@
|
|
1
|
-
use std::{cell::Cell, marker::PhantomData, rc::Rc};
|
2
|
-
|
3
|
-
// NOTE: My Rust isn't good enough to know what any of this does,
|
4
|
-
// but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
|
5
|
-
|
6
|
-
pub struct Anchor<'r> {
|
7
|
-
poisoned: Rc<Cell<bool>>,
|
8
|
-
lifetime: PhantomData<&'r mut ()>,
|
9
|
-
}
|
10
|
-
|
11
|
-
impl<'r> Anchor<'r> {
|
12
|
-
pub fn new(poisoned: Rc<Cell<bool>>) -> Self {
|
13
|
-
Anchor {
|
14
|
-
poisoned,
|
15
|
-
lifetime: PhantomData,
|
16
|
-
}
|
17
|
-
}
|
18
|
-
}
|
19
|
-
|
20
|
-
// impl Drop for Anchor<'_> {
|
21
|
-
// fn drop(&mut self) {
|
22
|
-
// self.poisoned.replace(true);
|
23
|
-
// }
|
24
|
-
// }
|
25
|
-
|
26
|
-
// NOTE: wasm_bindgen doesn't allow structures with lifetimes. To workaround that
|
27
|
-
// we create a wrapper that erases all the lifetime information from the inner reference
|
28
|
-
// and provides an anchor object that keeps track of the lifetime in the runtime.
|
29
|
-
//
|
30
|
-
// When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner
|
31
|
-
// object results in exception.
|
32
|
-
pub struct NativeRefWrap<R> {
|
33
|
-
inner_ptr: *mut R,
|
34
|
-
poisoned: Rc<Cell<bool>>,
|
35
|
-
}
|
36
|
-
|
37
|
-
impl<R> NativeRefWrap<R> {
|
38
|
-
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
39
|
-
let wrap = NativeRefWrap {
|
40
|
-
inner_ptr: inner as *const I as *mut R,
|
41
|
-
poisoned: Rc::new(Cell::new(false)),
|
42
|
-
};
|
43
|
-
|
44
|
-
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
45
|
-
|
46
|
-
(wrap, anchor)
|
47
|
-
}
|
48
|
-
|
49
|
-
pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
|
50
|
-
let wrap = NativeRefWrap {
|
51
|
-
inner_ptr: inner as *mut I as *mut R,
|
52
|
-
poisoned: Rc::new(Cell::new(false)),
|
53
|
-
};
|
54
|
-
|
55
|
-
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
56
|
-
|
57
|
-
(wrap, anchor)
|
58
|
-
}
|
59
|
-
|
60
|
-
pub fn get(&self) -> Result<&R, &'static str> {
|
61
|
-
self.assert_not_poisoned()?;
|
62
|
-
|
63
|
-
Ok(unsafe { self.inner_ptr.as_ref() }.unwrap())
|
64
|
-
}
|
65
|
-
|
66
|
-
pub fn get_mut(&mut self) -> Result<&mut R, &'static str> {
|
67
|
-
self.assert_not_poisoned()?;
|
68
|
-
|
69
|
-
Ok(unsafe { self.inner_ptr.as_mut() }.unwrap())
|
70
|
-
}
|
71
|
-
|
72
|
-
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
73
|
-
if self.poisoned.get() {
|
74
|
-
Err("The object has been freed and can't be used anymore.")
|
75
|
-
} else {
|
76
|
-
Ok(())
|
77
|
-
}
|
78
|
-
}
|
79
|
-
}
|
data/ext/selma/src/rewriter.rs
DELETED
@@ -1,429 +0,0 @@
|
|
1
|
-
use lol_html::{
|
2
|
-
doc_comments, doctype, element,
|
3
|
-
html_content::{Element, EndTag, TextChunk},
|
4
|
-
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
|
5
|
-
};
|
6
|
-
use magnus::{exception, function, method, scan_args, Module, Object, RArray, RModule, Value};
|
7
|
-
|
8
|
-
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
|
-
|
10
|
-
use crate::{
|
11
|
-
html::{element::SelmaHTMLElement, end_tag::SelmaHTMLEndTag, text_chunk::SelmaHTMLTextChunk},
|
12
|
-
sanitizer::SelmaSanitizer,
|
13
|
-
selector::SelmaSelector,
|
14
|
-
tags::Tag,
|
15
|
-
wrapped_struct::WrappedStruct,
|
16
|
-
};
|
17
|
-
|
18
|
-
#[derive(Clone, Debug)]
|
19
|
-
pub struct Handler {
|
20
|
-
rb_handler: Value,
|
21
|
-
rb_selector: WrappedStruct<SelmaSelector>,
|
22
|
-
|
23
|
-
total_element_handler_calls: usize,
|
24
|
-
total_elapsed_element_handlers: f64,
|
25
|
-
|
26
|
-
total_text_handler_calls: usize,
|
27
|
-
total_elapsed_text_handlers: f64,
|
28
|
-
}
|
29
|
-
|
30
|
-
pub struct Rewriter {
|
31
|
-
sanitizer: Option<SelmaSanitizer>,
|
32
|
-
handlers: Vec<Handler>,
|
33
|
-
|
34
|
-
total_elapsed: f64,
|
35
|
-
}
|
36
|
-
|
37
|
-
#[magnus::wrap(class = "Selma::Rewriter")]
|
38
|
-
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
39
|
-
|
40
|
-
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
41
|
-
unsafe impl Send for SelmaRewriter {}
|
42
|
-
|
43
|
-
impl SelmaRewriter {
|
44
|
-
const SELMA_ON_END_TAG: &str = "on_end_tag";
|
45
|
-
const SELMA_HANDLE_ELEMENT: &str = "handle_element";
|
46
|
-
const SELMA_HANDLE_TEXT_CHUNK: &str = "handle_text_chunk";
|
47
|
-
|
48
|
-
/// @yard
|
49
|
-
/// @def new(sanitizer: Selma::Sanitizer.new(Selma::Sanitizer::Config::DEFAULT), handlers: [])
|
50
|
-
/// @param sanitizer [Selma::Sanitizer] The sanitizer which performs the initial cleanup
|
51
|
-
/// @param handlers [Array<Selma::Selector>] The handlers to use to perform HTML rewriting
|
52
|
-
/// @return [Selma::Rewriter]
|
53
|
-
fn new(args: &[Value]) -> Result<Self, magnus::Error> {
|
54
|
-
let (rb_sanitizer, rb_handlers) = Self::scan_parse_args(args)?;
|
55
|
-
|
56
|
-
let sanitizer = match rb_sanitizer {
|
57
|
-
None => {
|
58
|
-
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
59
|
-
let wrapped_sanitizer = WrappedStruct::from(default_sanitizer);
|
60
|
-
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
61
|
-
Some(wrapped_sanitizer.get().unwrap().to_owned())
|
62
|
-
}
|
63
|
-
Some(sanitizer_value) => match sanitizer_value {
|
64
|
-
None => None,
|
65
|
-
Some(sanitizer) => {
|
66
|
-
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
67
|
-
Some(sanitizer.get().unwrap().to_owned())
|
68
|
-
}
|
69
|
-
},
|
70
|
-
};
|
71
|
-
|
72
|
-
let handlers = match rb_handlers {
|
73
|
-
None => vec![],
|
74
|
-
Some(rb_handlers) => {
|
75
|
-
let mut handlers: Vec<Handler> = vec![];
|
76
|
-
|
77
|
-
for h in rb_handlers.each() {
|
78
|
-
let rb_handler = h.unwrap();
|
79
|
-
|
80
|
-
// prevents missing #selector from ruining things
|
81
|
-
if !rb_handler.respond_to("selector", true).unwrap() {
|
82
|
-
let classname = unsafe { rb_handler.classname() };
|
83
|
-
return Err(magnus::Error::new(
|
84
|
-
exception::no_method_error(),
|
85
|
-
format!(
|
86
|
-
"Could not call #selector on {classname:?}; is this an object that defines it?",
|
87
|
-
|
88
|
-
),
|
89
|
-
));
|
90
|
-
}
|
91
|
-
|
92
|
-
let rb_selector: WrappedStruct<SelmaSelector> =
|
93
|
-
match rb_handler.funcall("selector", ()) {
|
94
|
-
Err(err) => {
|
95
|
-
return Err(magnus::Error::new(
|
96
|
-
exception::type_error(),
|
97
|
-
format!("Error instantiating selector: {err:?}"),
|
98
|
-
));
|
99
|
-
}
|
100
|
-
Ok(rb_selector) => rb_selector,
|
101
|
-
};
|
102
|
-
let handler = Handler {
|
103
|
-
rb_handler,
|
104
|
-
rb_selector,
|
105
|
-
total_element_handler_calls: 0,
|
106
|
-
total_elapsed_element_handlers: 0.0,
|
107
|
-
|
108
|
-
total_text_handler_calls: 0,
|
109
|
-
total_elapsed_text_handlers: 0.0,
|
110
|
-
};
|
111
|
-
handlers.push(handler);
|
112
|
-
}
|
113
|
-
handlers
|
114
|
-
}
|
115
|
-
};
|
116
|
-
|
117
|
-
if sanitizer.is_none() && handlers.is_empty() {
|
118
|
-
return Err(magnus::Error::new(
|
119
|
-
exception::arg_error(),
|
120
|
-
"Must provide a sanitizer or a handler",
|
121
|
-
));
|
122
|
-
}
|
123
|
-
|
124
|
-
Ok(Self(std::cell::RefCell::new(Rewriter {
|
125
|
-
sanitizer,
|
126
|
-
handlers,
|
127
|
-
total_elapsed: 0.0,
|
128
|
-
})))
|
129
|
-
}
|
130
|
-
|
131
|
-
#[allow(clippy::let_unit_value)]
|
132
|
-
fn scan_parse_args(
|
133
|
-
args: &[Value],
|
134
|
-
) -> Result<
|
135
|
-
(
|
136
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
137
|
-
Option<RArray>,
|
138
|
-
),
|
139
|
-
magnus::Error,
|
140
|
-
> {
|
141
|
-
let args = scan_args::scan_args(args)?;
|
142
|
-
let _: () = args.required;
|
143
|
-
let _: () = args.optional;
|
144
|
-
let _: () = args.splat;
|
145
|
-
let _: () = args.trailing;
|
146
|
-
let _: () = args.block;
|
147
|
-
|
148
|
-
let kwargs = scan_args::get_kwargs::<
|
149
|
-
_,
|
150
|
-
(),
|
151
|
-
(
|
152
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
153
|
-
Option<RArray>,
|
154
|
-
),
|
155
|
-
(),
|
156
|
-
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
157
|
-
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
158
|
-
|
159
|
-
Ok((rb_sanitizer, rb_handlers))
|
160
|
-
}
|
161
|
-
|
162
|
-
/// Perform HTML rewrite sequence.
|
163
|
-
fn rewrite(&self, html: String) -> Result<String, magnus::Error> {
|
164
|
-
let sanitized_html = match &self.0.borrow().sanitizer {
|
165
|
-
None => Ok(html),
|
166
|
-
Some(sanitizer) => {
|
167
|
-
let sanitized_html = match Self::perform_sanitization(sanitizer, &html) {
|
168
|
-
Ok(sanitized_html) => sanitized_html,
|
169
|
-
Err(err) => return Err(err),
|
170
|
-
};
|
171
|
-
|
172
|
-
String::from_utf8(sanitized_html)
|
173
|
-
}
|
174
|
-
};
|
175
|
-
let binding = self.0.borrow_mut();
|
176
|
-
let handlers = &binding.handlers;
|
177
|
-
|
178
|
-
match Self::perform_handler_rewrite(self, handlers, sanitized_html.unwrap()) {
|
179
|
-
Ok(rewritten_html) => Ok(String::from_utf8(rewritten_html).unwrap()),
|
180
|
-
Err(err) => Err(err),
|
181
|
-
}
|
182
|
-
}
|
183
|
-
|
184
|
-
fn perform_sanitization(
|
185
|
-
sanitizer: &SelmaSanitizer,
|
186
|
-
html: &String,
|
187
|
-
) -> Result<Vec<u8>, magnus::Error> {
|
188
|
-
let mut first_pass_html = vec![];
|
189
|
-
{
|
190
|
-
let mut document_content_handlers: Vec<DocumentContentHandlers> = vec![];
|
191
|
-
if !sanitizer.get_allow_doctype() {
|
192
|
-
document_content_handlers.push(doctype!(|d| {
|
193
|
-
sanitizer.remove_doctype(d);
|
194
|
-
Ok(())
|
195
|
-
}));
|
196
|
-
}
|
197
|
-
if !sanitizer.get_allow_comments() {
|
198
|
-
document_content_handlers.push(doc_comments!(|c| {
|
199
|
-
sanitizer.remove_comment(c);
|
200
|
-
Ok(())
|
201
|
-
}));
|
202
|
-
}
|
203
|
-
let mut rewriter = HtmlRewriter::new(
|
204
|
-
Settings {
|
205
|
-
document_content_handlers,
|
206
|
-
element_content_handlers: vec![element!("*", |el| {
|
207
|
-
sanitizer.try_remove_element(el);
|
208
|
-
if el.removed() {
|
209
|
-
return Ok(());
|
210
|
-
}
|
211
|
-
match sanitizer.sanitize_attributes(el) {
|
212
|
-
Ok(_) => Ok(()),
|
213
|
-
Err(err) => Err(err.to_string().into()),
|
214
|
-
}
|
215
|
-
})],
|
216
|
-
// TODO: allow for MemorySettings to be defined
|
217
|
-
..Settings::default()
|
218
|
-
},
|
219
|
-
|c: &[u8]| first_pass_html.extend_from_slice(c),
|
220
|
-
);
|
221
|
-
|
222
|
-
let result = rewriter.write(html.as_bytes());
|
223
|
-
if result.is_err() {
|
224
|
-
return Err(magnus::Error::new(
|
225
|
-
exception::runtime_error(),
|
226
|
-
format!("Failed to sanitize HTML: {}", result.unwrap_err()),
|
227
|
-
));
|
228
|
-
}
|
229
|
-
}
|
230
|
-
|
231
|
-
let mut output = vec![];
|
232
|
-
{
|
233
|
-
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
234
|
-
if sanitizer.get_escape_tagfilter() {
|
235
|
-
element_content_handlers.push(element!(Tag::ESCAPEWORTHY_TAGS_CSS, |el| {
|
236
|
-
let should_remove = sanitizer.allow_element(el);
|
237
|
-
if should_remove {
|
238
|
-
sanitizer.force_remove_element(el);
|
239
|
-
}
|
240
|
-
|
241
|
-
Ok(())
|
242
|
-
}));
|
243
|
-
}
|
244
|
-
|
245
|
-
let mut rewriter = HtmlRewriter::new(
|
246
|
-
Settings {
|
247
|
-
element_content_handlers,
|
248
|
-
..Settings::default()
|
249
|
-
},
|
250
|
-
|c: &[u8]| output.extend_from_slice(c),
|
251
|
-
);
|
252
|
-
|
253
|
-
let result = rewriter.write(first_pass_html.as_slice());
|
254
|
-
if result.is_err() {
|
255
|
-
return Err(magnus::Error::new(
|
256
|
-
exception::runtime_error(),
|
257
|
-
format!("Failed to sanitize HTML: {}", result.unwrap_err()),
|
258
|
-
));
|
259
|
-
}
|
260
|
-
}
|
261
|
-
|
262
|
-
Ok(output)
|
263
|
-
}
|
264
|
-
|
265
|
-
pub fn perform_handler_rewrite(
|
266
|
-
&self,
|
267
|
-
handlers: &[Handler],
|
268
|
-
html: String,
|
269
|
-
) -> Result<Vec<u8>, magnus::Error> {
|
270
|
-
// TODO: this should ideally be done ahead of time, not on every `#rewrite` call
|
271
|
-
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];
|
272
|
-
|
273
|
-
handlers.iter().for_each(|handler| {
|
274
|
-
let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
|
275
|
-
|
276
|
-
let selector = handler.rb_selector.get_static().unwrap();
|
277
|
-
|
278
|
-
// TODO: test final raise by simulating errors
|
279
|
-
if selector.match_element().is_some() {
|
280
|
-
let closure_element_stack = element_stack.clone();
|
281
|
-
|
282
|
-
element_content_handlers.push(element!(
|
283
|
-
selector.match_element().unwrap(),
|
284
|
-
move |el| {
|
285
|
-
match Self::process_element_handlers(
|
286
|
-
handler.rb_handler,
|
287
|
-
el,
|
288
|
-
&closure_element_stack.borrow(),
|
289
|
-
) {
|
290
|
-
Ok(_) => Ok(()),
|
291
|
-
Err(err) => Err(err.to_string().into()),
|
292
|
-
}
|
293
|
-
}
|
294
|
-
));
|
295
|
-
}
|
296
|
-
|
297
|
-
if selector.match_text_within().is_some() {
|
298
|
-
let closure_element_stack = element_stack.clone();
|
299
|
-
|
300
|
-
element_content_handlers.push(text!(
|
301
|
-
selector.match_text_within().unwrap(),
|
302
|
-
move |text| {
|
303
|
-
let element_stack = closure_element_stack.as_ref().borrow();
|
304
|
-
if selector.ignore_text_within().is_some() {
|
305
|
-
// check if current tag is a tag we should be ignoring text within
|
306
|
-
let head_tag_name = element_stack.last().unwrap().to_string();
|
307
|
-
if selector
|
308
|
-
.ignore_text_within()
|
309
|
-
.unwrap()
|
310
|
-
.iter()
|
311
|
-
.any(|f| f == &head_tag_name)
|
312
|
-
{
|
313
|
-
return Ok(());
|
314
|
-
}
|
315
|
-
}
|
316
|
-
|
317
|
-
match Self::process_text_handlers(handler.rb_handler, text) {
|
318
|
-
Ok(_) => Ok(()),
|
319
|
-
Err(err) => Err(err.to_string().into()),
|
320
|
-
}
|
321
|
-
}
|
322
|
-
));
|
323
|
-
}
|
324
|
-
|
325
|
-
// we need to check *every* element we iterate over, to create a stack of elements
|
326
|
-
element_content_handlers.push(element!("*", move |el| {
|
327
|
-
let tag_name = el.tag_name().to_lowercase();
|
328
|
-
|
329
|
-
// no need to track self-closing tags
|
330
|
-
if Tag::tag_from_tag_name(&tag_name).self_closing {
|
331
|
-
return Ok(());
|
332
|
-
};
|
333
|
-
|
334
|
-
element_stack.as_ref().borrow_mut().push(tag_name);
|
335
|
-
|
336
|
-
let closure_element_stack = element_stack.clone();
|
337
|
-
el.on_end_tag(move |_end_tag: &mut EndTag| {
|
338
|
-
let mut stack = closure_element_stack.as_ref().borrow_mut();
|
339
|
-
stack.pop();
|
340
|
-
Ok(())
|
341
|
-
})?;
|
342
|
-
Ok(())
|
343
|
-
}));
|
344
|
-
});
|
345
|
-
|
346
|
-
let mut output = vec![];
|
347
|
-
{
|
348
|
-
let mut rewriter = HtmlRewriter::new(
|
349
|
-
Settings {
|
350
|
-
element_content_handlers,
|
351
|
-
..Settings::default()
|
352
|
-
},
|
353
|
-
|c: &[u8]| output.extend_from_slice(c),
|
354
|
-
);
|
355
|
-
match rewriter.write(html.as_bytes()) {
|
356
|
-
Ok(_) => {}
|
357
|
-
Err(err) => {
|
358
|
-
return Err(magnus::Error::new(
|
359
|
-
exception::runtime_error(),
|
360
|
-
format!("{err:?}"),
|
361
|
-
));
|
362
|
-
}
|
363
|
-
}
|
364
|
-
}
|
365
|
-
Ok(output)
|
366
|
-
}
|
367
|
-
|
368
|
-
fn process_element_handlers(
|
369
|
-
rb_handler: Value,
|
370
|
-
element: &mut Element,
|
371
|
-
ancestors: &[String],
|
372
|
-
) -> Result<(), magnus::Error> {
|
373
|
-
// if `on_end_tag` function is defined, call it
|
374
|
-
if rb_handler.respond_to(Self::SELMA_ON_END_TAG, true).unwrap() {
|
375
|
-
// TODO: error here is an "EndTagError"
|
376
|
-
element.on_end_tag(move |end_tag| {
|
377
|
-
let rb_end_tag = SelmaHTMLEndTag::new(end_tag);
|
378
|
-
|
379
|
-
match rb_handler.funcall::<_, _, Value>(Self::SELMA_ON_END_TAG, (rb_end_tag,)) {
|
380
|
-
Ok(_) => Ok(()),
|
381
|
-
Err(err) => Err(err.to_string().into()),
|
382
|
-
}
|
383
|
-
});
|
384
|
-
}
|
385
|
-
|
386
|
-
let rb_element = SelmaHTMLElement::new(element, ancestors);
|
387
|
-
let rb_result =
|
388
|
-
rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_ELEMENT, (rb_element,));
|
389
|
-
match rb_result {
|
390
|
-
Ok(_) => Ok(()),
|
391
|
-
Err(err) => Err(err),
|
392
|
-
}
|
393
|
-
}
|
394
|
-
|
395
|
-
fn process_text_handlers(
|
396
|
-
rb_handler: Value,
|
397
|
-
text_chunk: &mut TextChunk,
|
398
|
-
) -> Result<(), magnus::Error> {
|
399
|
-
// prevents missing `handle_text_chunk` function
|
400
|
-
let content = text_chunk.as_str();
|
401
|
-
|
402
|
-
// seems that sometimes lol-html returns blank text / EOLs?
|
403
|
-
if content.is_empty() {
|
404
|
-
return Ok(());
|
405
|
-
}
|
406
|
-
|
407
|
-
let rb_text_chunk = SelmaHTMLTextChunk::new(text_chunk);
|
408
|
-
match rb_handler.funcall::<_, _, Value>(Self::SELMA_HANDLE_TEXT_CHUNK, (rb_text_chunk,)) {
|
409
|
-
Ok(_) => Ok(()),
|
410
|
-
Err(err) => Err(magnus::Error::new(
|
411
|
-
exception::runtime_error(),
|
412
|
-
format!("{err:?}"),
|
413
|
-
)),
|
414
|
-
}
|
415
|
-
}
|
416
|
-
}
|
417
|
-
|
418
|
-
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
419
|
-
let c_rewriter = m_selma
|
420
|
-
.define_class("Rewriter", Default::default())
|
421
|
-
.expect("cannot find class Selma::Rewriter");
|
422
|
-
|
423
|
-
c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
|
424
|
-
c_rewriter
|
425
|
-
.define_method("rewrite", method!(SelmaRewriter::rewrite, 1))
|
426
|
-
.expect("cannot define method `rewrite`");
|
427
|
-
|
428
|
-
Ok(())
|
429
|
-
}
|