selma 0.2.2 → 0.4.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +129 -124
- data/README.md +133 -25
- data/ext/selma/Cargo.toml +6 -3
- data/ext/selma/src/html/element.rs +32 -27
- data/ext/selma/src/html/end_tag.rs +5 -5
- data/ext/selma/src/html/text_chunk.rs +55 -12
- data/ext/selma/src/native_ref_wrap.rs +30 -33
- data/ext/selma/src/rewriter.rs +299 -139
- data/ext/selma/src/sanitizer.rs +256 -138
- data/lib/selma/config.rb +12 -0
- data/lib/selma/html/element.rb +11 -0
- data/lib/selma/html.rb +2 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer/config.rb +2 -2
- data/lib/selma/sanitizer.rb +0 -77
- data/lib/selma/version.rb +1 -1
- metadata +9 -7
data/README.md
CHANGED
@@ -76,7 +76,7 @@ attributes: {
|
|
76
76
|
|
77
77
|
# URL handling protocols to allow in specific attributes. By default, no
|
78
78
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
79
|
-
# to allow relative URLs sans protocol.
|
79
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
80
80
|
protocols: {
|
81
81
|
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
82
82
|
"img" => { "href" => ["http", "https"] },
|
@@ -103,7 +103,11 @@ Here's an example which rewrites the `href` attribute on `a` and the `src` attri
|
|
103
103
|
|
104
104
|
```ruby
|
105
105
|
class MatchAttribute
|
106
|
-
SELECTOR = Selma::Selector(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
106
|
+
SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
107
|
+
|
108
|
+
def selector
|
109
|
+
SELECTOR
|
110
|
+
end
|
107
111
|
|
108
112
|
def handle_element(element)
|
109
113
|
if element.tag_name == "a"
|
@@ -130,7 +134,6 @@ The `Selma::Selector` object has three possible kwargs:
|
|
130
134
|
Here's an example for `handle_text_chunk` which changes strings in various elements which are _not_ `pre` or `code`:
|
131
135
|
|
132
136
|
```ruby
|
133
|
-
|
134
137
|
class MatchText
|
135
138
|
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: ["pre", "code"])
|
136
139
|
|
@@ -176,40 +179,145 @@ The `element` argument in `handle_element` has the following methods:
|
|
176
179
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
177
180
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
178
181
|
|
182
|
+
## Security
|
183
|
+
|
184
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide `memory` options:
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
Selma::Rewriter.new(options: { memory: { max_allowed_memory_usage: 1_000_000 } }) # ~1MB
|
188
|
+
```
|
189
|
+
|
190
|
+
The structure of the `memory` options looks like this:
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
{
|
194
|
+
memory: {
|
195
|
+
max_allowed_memory_usage: 1000,
|
196
|
+
preallocated_parsing_buffer_size: 100,
|
197
|
+
}
|
198
|
+
}
|
199
|
+
```
|
200
|
+
|
201
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
202
|
+
|
179
203
|
## Benchmarks
|
180
204
|
|
205
|
+
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
206
|
+
|
207
|
+
### Benchmarks for just the sanitization process
|
208
|
+
|
209
|
+
Comparing Selma against popular Ruby sanitization gems:
|
210
|
+
|
211
|
+
<!-- prettier-ignore-start -->
|
212
|
+
<details>
|
213
|
+
<pre>
|
214
|
+
input size = 25309 bytes, 0.03 MB
|
215
|
+
|
216
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
217
|
+
Warming up --------------------------------------
|
218
|
+
sanitize-sm 15.000 i/100ms
|
219
|
+
selma-sm 127.000 i/100ms
|
220
|
+
Calculating -------------------------------------
|
221
|
+
sanitize-sm 157.643 (± 1.9%) i/s - 4.740k in 30.077172s
|
222
|
+
selma-sm 1.278k (± 1.5%) i/s - 38.354k in 30.019722s
|
223
|
+
|
224
|
+
Comparison:
|
225
|
+
selma-sm: 1277.9 i/s
|
226
|
+
sanitize-sm: 157.6 i/s - 8.11x slower
|
227
|
+
|
228
|
+
input size = 86686 bytes, 0.09 MB
|
229
|
+
|
230
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
231
|
+
Warming up --------------------------------------
|
232
|
+
sanitize-md 4.000 i/100ms
|
233
|
+
selma-md 33.000 i/100ms
|
234
|
+
Calculating -------------------------------------
|
235
|
+
sanitize-md 40.034 (± 5.0%) i/s - 1.200k in 30.043322s
|
236
|
+
selma-md 332.959 (± 2.1%) i/s - 9.999k in 30.045733s
|
237
|
+
|
238
|
+
Comparison:
|
239
|
+
selma-md: 333.0 i/s
|
240
|
+
sanitize-md: 40.0 i/s - 8.32x slower
|
241
|
+
|
242
|
+
input size = 7172510 bytes, 7.17 MB
|
243
|
+
|
244
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
245
|
+
Warming up --------------------------------------
|
246
|
+
sanitize-lg 1.000 i/100ms
|
247
|
+
selma-lg 1.000 i/100ms
|
248
|
+
Calculating -------------------------------------
|
249
|
+
sanitize-lg 0.141 (± 0.0%) i/s - 5.000 in 35.426127s
|
250
|
+
selma-lg 3.963 (± 0.0%) i/s - 119.000 in 30.037386s
|
251
|
+
|
252
|
+
Comparison:
|
253
|
+
selma-lg: 4.0 i/s
|
254
|
+
sanitize-lg: 0.1 i/s - 28.03x slower
|
255
|
+
|
256
|
+
</pre>
|
257
|
+
</details>
|
258
|
+
<!-- prettier-ignore-end -->
|
259
|
+
|
260
|
+
### Benchmarks for just the rewriting process
|
261
|
+
|
262
|
+
Comparing Selma against popular Ruby HTML parsing gems:
|
263
|
+
|
264
|
+
<!-- prettier-ignore-start -->
|
181
265
|
<details>
|
182
266
|
<pre>
|
183
|
-
|
184
|
-
|
267
|
+
input size = 25309 bytes, 0.03 MB
|
268
|
+
|
269
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
185
270
|
Warming up --------------------------------------
|
186
|
-
|
187
|
-
|
188
|
-
|
271
|
+
nokogiri-sm 79.000 i/100ms
|
272
|
+
nokolexbor-sm 295.000 i/100ms
|
273
|
+
selma-sm 237.000 i/100ms
|
189
274
|
Calculating -------------------------------------
|
190
|
-
|
191
|
-
|
192
|
-
|
275
|
+
nokogiri-sm 800.531 (± 2.2%) i/s - 24.016k in 30.016056s
|
276
|
+
nokolexbor-sm 3.033k (± 3.6%) i/s - 91.155k in 30.094884s
|
277
|
+
selma-sm 2.386k (± 1.6%) i/s - 71.574k in 30.001701s
|
278
|
+
|
279
|
+
Comparison:
|
280
|
+
nokolexbor-sm: 3033.1 i/s
|
281
|
+
selma-sm: 2386.3 i/s - 1.27x slower
|
282
|
+
nokogiri-sm: 800.5 i/s - 3.79x slower
|
283
|
+
|
284
|
+
input size = 86686 bytes, 0.09 MB
|
285
|
+
|
286
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
193
287
|
Warming up --------------------------------------
|
194
|
-
|
195
|
-
|
196
|
-
selma-
|
197
|
-
22.000 i/100ms
|
288
|
+
nokogiri-md 8.000 i/100ms
|
289
|
+
nokolexbor-md 43.000 i/100ms
|
290
|
+
selma-md 38.000 i/100ms
|
198
291
|
Calculating -------------------------------------
|
199
|
-
|
200
|
-
|
201
|
-
selma-
|
202
|
-
|
292
|
+
nokogiri-md 85.013 (± 8.2%) i/s - 2.024k in 52.257472s
|
293
|
+
nokolexbor-md 416.074 (±11.1%) i/s - 12.341k in 30.111613s
|
294
|
+
selma-md 361.471 (± 4.7%) i/s - 10.830k in 30.033997s
|
295
|
+
|
296
|
+
Comparison:
|
297
|
+
nokolexbor-md: 416.1 i/s
|
298
|
+
selma-md: 361.5 i/s - same-ish: difference falls within error
|
299
|
+
nokogiri-md: 85.0 i/s - 4.89x slower
|
300
|
+
|
301
|
+
input size = 7172510 bytes, 7.17 MB
|
302
|
+
|
303
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
203
304
|
Warming up --------------------------------------
|
204
|
-
|
205
|
-
|
206
|
-
selma-
|
305
|
+
nokogiri-lg 1.000 i/100ms
|
306
|
+
nokolexbor-lg 1.000 i/100ms
|
307
|
+
selma-lg 1.000 i/100ms
|
207
308
|
Calculating -------------------------------------
|
208
|
-
|
209
|
-
|
210
|
-
selma-
|
309
|
+
nokogiri-lg 0.805 (± 0.0%) i/s - 25.000 in 31.148730s
|
310
|
+
nokolexbor-lg 2.194 (± 0.0%) i/s - 66.000 in 30.278108s
|
311
|
+
selma-lg 5.541 (± 0.0%) i/s - 166.000 in 30.037197s
|
312
|
+
|
313
|
+
Comparison:
|
314
|
+
selma-lg: 5.5 i/s
|
315
|
+
nokolexbor-lg: 2.2 i/s - 2.53x slower
|
316
|
+
nokogiri-lg: 0.8 i/s - 6.88x slower
|
317
|
+
|
211
318
|
</pre>
|
212
319
|
</details>
|
320
|
+
<!-- prettier-ignore-end -->
|
213
321
|
|
214
322
|
## Contributing
|
215
323
|
|
data/ext/selma/Cargo.toml
CHANGED
@@ -6,10 +6,13 @@ rust-version = "1.75.0"
|
|
6
6
|
publish = false
|
7
7
|
|
8
8
|
[dependencies]
|
9
|
-
enum-iterator = "1
|
9
|
+
enum-iterator = "2.1"
|
10
10
|
escapist = "0.0.2"
|
11
|
-
magnus = "0.
|
12
|
-
|
11
|
+
magnus = { version = "0.7", features = ["rb-sys"] }
|
12
|
+
rb-sys = { version = "*", default-features = false, features = [
|
13
|
+
"stable-api-compiled-fallback",
|
14
|
+
] }
|
15
|
+
lol_html = "2.0"
|
13
16
|
|
14
17
|
[lib]
|
15
18
|
name = "selma"
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::Element;
|
3
5
|
use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
|
@@ -8,16 +10,14 @@ struct HTMLElement {
|
|
8
10
|
}
|
9
11
|
|
10
12
|
#[magnus::wrap(class = "Selma::HTML::Element")]
|
11
|
-
pub struct SelmaHTMLElement(
|
13
|
+
pub struct SelmaHTMLElement(RefCell<HTMLElement>);
|
12
14
|
|
13
15
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
14
16
|
unsafe impl Send for SelmaHTMLElement {}
|
15
17
|
|
16
18
|
impl SelmaHTMLElement {
|
17
|
-
pub fn new(
|
18
|
-
|
19
|
-
|
20
|
-
Self(std::cell::RefCell::new(HTMLElement {
|
19
|
+
pub fn new(ref_wrap: NativeRefWrap<Element<'static, 'static>>, ancestors: &[String]) -> Self {
|
20
|
+
Self(RefCell::new(HTMLElement {
|
21
21
|
element: ref_wrap,
|
22
22
|
ancestors: ancestors.to_owned(),
|
23
23
|
}))
|
@@ -26,13 +26,12 @@ impl SelmaHTMLElement {
|
|
26
26
|
fn tag_name(&self) -> Result<String, Error> {
|
27
27
|
let binding = self.0.borrow();
|
28
28
|
|
29
|
-
|
30
|
-
Ok(e.tag_name())
|
31
|
-
|
32
|
-
Err(Error::new(
|
29
|
+
match binding.element.get() {
|
30
|
+
Ok(e) => Ok(e.tag_name().to_string()),
|
31
|
+
Err(_) => Err(Error::new(
|
33
32
|
exception::runtime_error(),
|
34
33
|
"`tag_name` is not available",
|
35
|
-
))
|
34
|
+
)),
|
36
35
|
}
|
37
36
|
}
|
38
37
|
|
@@ -119,11 +118,13 @@ impl SelmaHTMLElement {
|
|
119
118
|
.iter()
|
120
119
|
.for_each(|attr| match hash.aset(attr.name(), attr.value()) {
|
121
120
|
Ok(_) => {}
|
122
|
-
Err(err) =>
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
121
|
+
Err(err) => panic!(
|
122
|
+
"{:?}",
|
123
|
+
Error::new(
|
124
|
+
exception::runtime_error(),
|
125
|
+
format!("AttributeNameError: {err:?}"),
|
126
|
+
)
|
127
|
+
),
|
127
128
|
});
|
128
129
|
}
|
129
130
|
Ok(hash)
|
@@ -139,7 +140,10 @@ impl SelmaHTMLElement {
|
|
139
140
|
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
140
141
|
Ok(_) => {}
|
141
142
|
Err(err) => {
|
142
|
-
|
143
|
+
panic!(
|
144
|
+
"{:?}",
|
145
|
+
Error::new(exception::runtime_error(), format!("{err:?}"))
|
146
|
+
)
|
143
147
|
}
|
144
148
|
});
|
145
149
|
|
@@ -224,24 +228,25 @@ impl SelmaHTMLElement {
|
|
224
228
|
}
|
225
229
|
}
|
226
230
|
|
227
|
-
fn remove_and_keep_content(&self) {
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
231
|
+
fn remove_and_keep_content(&self) -> Result<(), Error> {
|
232
|
+
self.0
|
233
|
+
.borrow_mut()
|
234
|
+
.element
|
235
|
+
.get_mut()
|
236
|
+
.unwrap()
|
237
|
+
.remove_and_keep_content();
|
238
|
+
Ok(())
|
233
239
|
}
|
234
240
|
|
235
241
|
fn is_removed(&self) -> Result<bool, Error> {
|
236
242
|
let binding = self.0.borrow();
|
237
243
|
|
238
|
-
|
239
|
-
Ok(e.removed())
|
240
|
-
|
241
|
-
Err(Error::new(
|
244
|
+
match binding.element.get() {
|
245
|
+
Ok(e) => Ok(e.removed()),
|
246
|
+
Err(_) => Err(Error::new(
|
242
247
|
exception::runtime_error(),
|
243
248
|
"`is_removed` is not available",
|
244
|
-
))
|
249
|
+
)),
|
245
250
|
}
|
246
251
|
}
|
247
252
|
}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::EndTag;
|
3
5
|
use magnus::{method, Error, Module, RClass};
|
@@ -7,16 +9,14 @@ struct HTMLEndTag {
|
|
7
9
|
}
|
8
10
|
|
9
11
|
#[magnus::wrap(class = "Selma::HTML::EndTag")]
|
10
|
-
pub struct SelmaHTMLEndTag(
|
12
|
+
pub struct SelmaHTMLEndTag(RefCell<HTMLEndTag>);
|
11
13
|
|
12
14
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
15
|
unsafe impl Send for SelmaHTMLEndTag {}
|
14
16
|
|
15
17
|
impl SelmaHTMLEndTag {
|
16
|
-
pub fn new(
|
17
|
-
|
18
|
-
|
19
|
-
Self(std::cell::RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
18
|
+
pub fn new(ref_wrap: NativeRefWrap<EndTag<'static>>) -> Self {
|
19
|
+
Self(RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
20
20
|
}
|
21
21
|
|
22
22
|
fn tag_name(&self) -> String {
|
@@ -1,23 +1,44 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::{TextChunk, TextType};
|
3
5
|
use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
|
4
6
|
|
5
7
|
struct HTMLTextChunk {
|
6
8
|
text_chunk: NativeRefWrap<TextChunk<'static>>,
|
9
|
+
buffer: String,
|
10
|
+
}
|
11
|
+
|
12
|
+
macro_rules! clone_buffer_if_not_empty {
|
13
|
+
($binding:expr, $buffer:expr) => {
|
14
|
+
if !$binding.buffer.is_empty() {
|
15
|
+
$buffer.clone_from(&$binding.buffer);
|
16
|
+
}
|
17
|
+
};
|
18
|
+
}
|
19
|
+
|
20
|
+
// if this is the first time we're processing this text chunk (buffer is empty),
|
21
|
+
// we carry on. Otherwise, we need to use the buffer text, not the text chunk,
|
22
|
+
// because lol-html is not designed in such a way to keep track of text chunks.
|
23
|
+
macro_rules! set_text_chunk_to_buffer {
|
24
|
+
($text_chunk:expr, $buffer:expr) => {
|
25
|
+
if !$buffer.is_empty() {
|
26
|
+
$text_chunk.set_str($buffer);
|
27
|
+
}
|
28
|
+
};
|
7
29
|
}
|
8
30
|
|
9
31
|
#[magnus::wrap(class = "Selma::HTML::TextChunk")]
|
10
|
-
pub struct SelmaHTMLTextChunk(
|
32
|
+
pub struct SelmaHTMLTextChunk(RefCell<HTMLTextChunk>);
|
11
33
|
|
12
34
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
35
|
unsafe impl Send for SelmaHTMLTextChunk {}
|
14
36
|
|
15
37
|
impl SelmaHTMLTextChunk {
|
16
|
-
pub fn new(
|
17
|
-
|
18
|
-
|
19
|
-
Self(std::cell::RefCell::new(HTMLTextChunk {
|
38
|
+
pub fn new(ref_wrap: NativeRefWrap<TextChunk<'static>>) -> Self {
|
39
|
+
Self(RefCell::new(HTMLTextChunk {
|
20
40
|
text_chunk: ref_wrap,
|
41
|
+
buffer: String::new(),
|
21
42
|
}))
|
22
43
|
}
|
23
44
|
|
@@ -54,7 +75,19 @@ impl SelmaHTMLTextChunk {
|
|
54
75
|
}
|
55
76
|
}
|
56
77
|
|
57
|
-
fn
|
78
|
+
fn is_removed(&self) -> Result<bool, Error> {
|
79
|
+
let binding = self.0.borrow();
|
80
|
+
|
81
|
+
match binding.text_chunk.get() {
|
82
|
+
Ok(tc) => Ok(tc.removed()),
|
83
|
+
Err(_) => Err(Error::new(
|
84
|
+
exception::runtime_error(),
|
85
|
+
"`is_removed` is not available",
|
86
|
+
)),
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
fn before(&self, args: &[Value]) -> Result<String, Error> {
|
58
91
|
let mut binding = self.0.borrow_mut();
|
59
92
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
60
93
|
|
@@ -65,10 +98,10 @@ impl SelmaHTMLTextChunk {
|
|
65
98
|
|
66
99
|
text_chunk.before(&text_str, content_type);
|
67
100
|
|
68
|
-
Ok(())
|
101
|
+
Ok(text_chunk.as_str().to_string())
|
69
102
|
}
|
70
103
|
|
71
|
-
fn after(&self, args: &[Value]) -> Result<
|
104
|
+
fn after(&self, args: &[Value]) -> Result<String, Error> {
|
72
105
|
let mut binding = self.0.borrow_mut();
|
73
106
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
74
107
|
|
@@ -79,21 +112,30 @@ impl SelmaHTMLTextChunk {
|
|
79
112
|
|
80
113
|
text_chunk.after(&text_str, content_type);
|
81
114
|
|
82
|
-
Ok(())
|
115
|
+
Ok(text_chunk.as_str().to_string())
|
83
116
|
}
|
84
117
|
|
85
|
-
fn replace(&self, args: &[Value]) -> Result<
|
118
|
+
fn replace(&self, args: &[Value]) -> Result<String, Error> {
|
86
119
|
let mut binding = self.0.borrow_mut();
|
120
|
+
let mut buffer = String::new();
|
121
|
+
|
122
|
+
clone_buffer_if_not_empty!(binding, buffer);
|
123
|
+
|
87
124
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
88
125
|
|
126
|
+
set_text_chunk_to_buffer!(text_chunk, buffer);
|
127
|
+
|
89
128
|
let (text_str, content_type) = match crate::scan_text_args(args) {
|
90
129
|
Ok((text_str, content_type)) => (text_str, content_type),
|
91
130
|
Err(err) => return Err(err),
|
92
131
|
};
|
93
|
-
|
94
132
|
text_chunk.replace(&text_str, content_type);
|
95
133
|
|
96
|
-
|
134
|
+
text_chunk.set_str(text_str.clone());
|
135
|
+
|
136
|
+
binding.buffer = text_chunk.as_str().to_string();
|
137
|
+
|
138
|
+
Ok(text_str)
|
97
139
|
}
|
98
140
|
}
|
99
141
|
|
@@ -108,6 +150,7 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
108
150
|
c_text_chunk.define_method("before", method!(SelmaHTMLTextChunk::before, -1))?;
|
109
151
|
c_text_chunk.define_method("after", method!(SelmaHTMLTextChunk::after, -1))?;
|
110
152
|
c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
|
153
|
+
c_text_chunk.define_method("removed?", method!(SelmaHTMLTextChunk::is_removed, 0))?;
|
111
154
|
|
112
155
|
Ok(())
|
113
156
|
}
|
@@ -1,15 +1,18 @@
|
|
1
|
-
use std::{
|
1
|
+
use std::{
|
2
|
+
marker::PhantomData,
|
3
|
+
sync::{Arc, Mutex},
|
4
|
+
};
|
2
5
|
|
3
|
-
// NOTE:
|
4
|
-
//
|
6
|
+
// NOTE: this was inspired from
|
7
|
+
// https://github.com/worker-tools/html-rewriter-wasm/blob/92bafdfa34c809c37036f57cb282184cada3bbc9/src/handlers.rs
|
5
8
|
|
6
9
|
pub struct Anchor<'r> {
|
7
|
-
poisoned:
|
10
|
+
poisoned: Arc<Mutex<bool>>,
|
8
11
|
lifetime: PhantomData<&'r mut ()>,
|
9
12
|
}
|
10
13
|
|
11
14
|
impl<'r> Anchor<'r> {
|
12
|
-
pub fn new(poisoned:
|
15
|
+
pub fn new(poisoned: Arc<Mutex<bool>>) -> Self {
|
13
16
|
Anchor {
|
14
17
|
poisoned,
|
15
18
|
lifetime: PhantomData,
|
@@ -17,44 +20,46 @@ impl<'r> Anchor<'r> {
|
|
17
20
|
}
|
18
21
|
}
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
impl Drop for Anchor<'_> {
|
24
|
+
fn drop(&mut self) {
|
25
|
+
*self.poisoned.lock().unwrap() = true;
|
26
|
+
}
|
27
|
+
}
|
25
28
|
|
26
|
-
// NOTE:
|
27
|
-
// we create a wrapper that erases all the lifetime information from the inner reference
|
29
|
+
// NOTE: So far as I understand it, there's no great way to work between lol_html's lifetimes and FFI.
|
30
|
+
// To work around that, we create a wrapper that erases all the lifetime information from the inner reference
|
28
31
|
// and provides an anchor object that keeps track of the lifetime in the runtime.
|
29
32
|
//
|
30
33
|
// When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner
|
31
34
|
// object results in exception.
|
35
|
+
#[derive(Clone)]
|
32
36
|
pub struct NativeRefWrap<R> {
|
33
37
|
inner_ptr: *mut R,
|
34
|
-
poisoned:
|
38
|
+
poisoned: Arc<Mutex<bool>>,
|
35
39
|
}
|
36
40
|
|
37
41
|
impl<R> NativeRefWrap<R> {
|
38
|
-
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
42
|
+
pub fn wrap<I>(inner: &mut I) -> (Self, Anchor) {
|
39
43
|
let wrap = NativeRefWrap {
|
40
|
-
inner_ptr: inner as *
|
41
|
-
poisoned:
|
44
|
+
inner_ptr: inner as *mut I as *mut R,
|
45
|
+
poisoned: Arc::new(Mutex::new(false)),
|
42
46
|
};
|
43
47
|
|
44
|
-
let anchor = Anchor::new(
|
48
|
+
let anchor = Anchor::new(Arc::clone(&wrap.poisoned));
|
45
49
|
|
46
50
|
(wrap, anchor)
|
47
51
|
}
|
48
52
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
54
|
+
if self.is_poisoned() {
|
55
|
+
Err("The object has been freed and can't be used anymore.")
|
56
|
+
} else {
|
57
|
+
Ok(())
|
58
|
+
}
|
59
|
+
}
|
56
60
|
|
57
|
-
|
61
|
+
pub fn is_poisoned(&self) -> bool {
|
62
|
+
*self.poisoned.lock().unwrap()
|
58
63
|
}
|
59
64
|
|
60
65
|
pub fn get(&self) -> Result<&R, &'static str> {
|
@@ -68,12 +73,4 @@ impl<R> NativeRefWrap<R> {
|
|
68
73
|
|
69
74
|
Ok(unsafe { self.inner_ptr.as_mut() }.unwrap())
|
70
75
|
}
|
71
|
-
|
72
|
-
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
73
|
-
if self.poisoned.get() {
|
74
|
-
Err("The object has been freed and can't be used anymore.")
|
75
|
-
} else {
|
76
|
-
Ok(())
|
77
|
-
}
|
78
|
-
}
|
79
76
|
}
|