selma 0.2.2 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +129 -124
- data/README.md +133 -25
- data/ext/selma/Cargo.toml +6 -3
- data/ext/selma/src/html/element.rs +32 -27
- data/ext/selma/src/html/end_tag.rs +5 -5
- data/ext/selma/src/html/text_chunk.rs +55 -12
- data/ext/selma/src/native_ref_wrap.rs +30 -33
- data/ext/selma/src/rewriter.rs +299 -139
- data/ext/selma/src/sanitizer.rs +256 -138
- data/lib/selma/config.rb +12 -0
- data/lib/selma/html/element.rb +11 -0
- data/lib/selma/html.rb +2 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer/config.rb +2 -2
- data/lib/selma/sanitizer.rb +0 -77
- data/lib/selma/version.rb +1 -1
- metadata +9 -7
data/README.md
CHANGED
@@ -76,7 +76,7 @@ attributes: {
|
|
76
76
|
|
77
77
|
# URL handling protocols to allow in specific attributes. By default, no
|
78
78
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
79
|
-
# to allow relative URLs sans protocol.
|
79
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
80
80
|
protocols: {
|
81
81
|
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
82
82
|
"img" => { "href" => ["http", "https"] },
|
@@ -103,7 +103,11 @@ Here's an example which rewrites the `href` attribute on `a` and the `src` attri
|
|
103
103
|
|
104
104
|
```ruby
|
105
105
|
class MatchAttribute
|
106
|
-
SELECTOR = Selma::Selector(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
106
|
+
SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
107
|
+
|
108
|
+
def selector
|
109
|
+
SELECTOR
|
110
|
+
end
|
107
111
|
|
108
112
|
def handle_element(element)
|
109
113
|
if element.tag_name == "a"
|
@@ -130,7 +134,6 @@ The `Selma::Selector` object has three possible kwargs:
|
|
130
134
|
Here's an example for `handle_text_chunk` which changes strings in various elements which are _not_ `pre` or `code`:
|
131
135
|
|
132
136
|
```ruby
|
133
|
-
|
134
137
|
class MatchText
|
135
138
|
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: ["pre", "code"])
|
136
139
|
|
@@ -176,40 +179,145 @@ The `element` argument in `handle_element` has the following methods:
|
|
176
179
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
177
180
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
178
181
|
|
182
|
+
## Security
|
183
|
+
|
184
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide `memory` options:
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
Selma::Rewriter.new(options: { memory: { max_allowed_memory_usage: 1_000_000 } }) # ~1MB
|
188
|
+
```
|
189
|
+
|
190
|
+
The structure of the `memory` options looks like this:
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
{
|
194
|
+
memory: {
|
195
|
+
max_allowed_memory_usage: 1000,
|
196
|
+
preallocated_parsing_buffer_size: 100,
|
197
|
+
}
|
198
|
+
}
|
199
|
+
```
|
200
|
+
|
201
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
202
|
+
|
179
203
|
## Benchmarks
|
180
204
|
|
205
|
+
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
206
|
+
|
207
|
+
### Benchmarks for just the sanitization process
|
208
|
+
|
209
|
+
Comparing Selma against popular Ruby sanitization gems:
|
210
|
+
|
211
|
+
<!-- prettier-ignore-start -->
|
212
|
+
<details>
|
213
|
+
<pre>
|
214
|
+
input size = 25309 bytes, 0.03 MB
|
215
|
+
|
216
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
217
|
+
Warming up --------------------------------------
|
218
|
+
sanitize-sm 15.000 i/100ms
|
219
|
+
selma-sm 127.000 i/100ms
|
220
|
+
Calculating -------------------------------------
|
221
|
+
sanitize-sm 157.643 (± 1.9%) i/s - 4.740k in 30.077172s
|
222
|
+
selma-sm 1.278k (± 1.5%) i/s - 38.354k in 30.019722s
|
223
|
+
|
224
|
+
Comparison:
|
225
|
+
selma-sm: 1277.9 i/s
|
226
|
+
sanitize-sm: 157.6 i/s - 8.11x slower
|
227
|
+
|
228
|
+
input size = 86686 bytes, 0.09 MB
|
229
|
+
|
230
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
231
|
+
Warming up --------------------------------------
|
232
|
+
sanitize-md 4.000 i/100ms
|
233
|
+
selma-md 33.000 i/100ms
|
234
|
+
Calculating -------------------------------------
|
235
|
+
sanitize-md 40.034 (± 5.0%) i/s - 1.200k in 30.043322s
|
236
|
+
selma-md 332.959 (± 2.1%) i/s - 9.999k in 30.045733s
|
237
|
+
|
238
|
+
Comparison:
|
239
|
+
selma-md: 333.0 i/s
|
240
|
+
sanitize-md: 40.0 i/s - 8.32x slower
|
241
|
+
|
242
|
+
input size = 7172510 bytes, 7.17 MB
|
243
|
+
|
244
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
245
|
+
Warming up --------------------------------------
|
246
|
+
sanitize-lg 1.000 i/100ms
|
247
|
+
selma-lg 1.000 i/100ms
|
248
|
+
Calculating -------------------------------------
|
249
|
+
sanitize-lg 0.141 (± 0.0%) i/s - 5.000 in 35.426127s
|
250
|
+
selma-lg 3.963 (± 0.0%) i/s - 119.000 in 30.037386s
|
251
|
+
|
252
|
+
Comparison:
|
253
|
+
selma-lg: 4.0 i/s
|
254
|
+
sanitize-lg: 0.1 i/s - 28.03x slower
|
255
|
+
|
256
|
+
</pre>
|
257
|
+
</details>
|
258
|
+
<!-- prettier-ignore-end -->
|
259
|
+
|
260
|
+
### Benchmarks for just the rewriting process
|
261
|
+
|
262
|
+
Comparing Selma against popular Ruby HTML parsing gems:
|
263
|
+
|
264
|
+
<!-- prettier-ignore-start -->
|
181
265
|
<details>
|
182
266
|
<pre>
|
183
|
-
|
184
|
-
|
267
|
+
input size = 25309 bytes, 0.03 MB
|
268
|
+
|
269
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
185
270
|
Warming up --------------------------------------
|
186
|
-
|
187
|
-
|
188
|
-
|
271
|
+
nokogiri-sm 79.000 i/100ms
|
272
|
+
nokolexbor-sm 295.000 i/100ms
|
273
|
+
selma-sm 237.000 i/100ms
|
189
274
|
Calculating -------------------------------------
|
190
|
-
|
191
|
-
|
192
|
-
|
275
|
+
nokogiri-sm 800.531 (± 2.2%) i/s - 24.016k in 30.016056s
|
276
|
+
nokolexbor-sm 3.033k (± 3.6%) i/s - 91.155k in 30.094884s
|
277
|
+
selma-sm 2.386k (± 1.6%) i/s - 71.574k in 30.001701s
|
278
|
+
|
279
|
+
Comparison:
|
280
|
+
nokolexbor-sm: 3033.1 i/s
|
281
|
+
selma-sm: 2386.3 i/s - 1.27x slower
|
282
|
+
nokogiri-sm: 800.5 i/s - 3.79x slower
|
283
|
+
|
284
|
+
input size = 86686 bytes, 0.09 MB
|
285
|
+
|
286
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
193
287
|
Warming up --------------------------------------
|
194
|
-
|
195
|
-
|
196
|
-
selma-
|
197
|
-
22.000 i/100ms
|
288
|
+
nokogiri-md 8.000 i/100ms
|
289
|
+
nokolexbor-md 43.000 i/100ms
|
290
|
+
selma-md 38.000 i/100ms
|
198
291
|
Calculating -------------------------------------
|
199
|
-
|
200
|
-
|
201
|
-
selma-
|
202
|
-
|
292
|
+
nokogiri-md 85.013 (± 8.2%) i/s - 2.024k in 52.257472s
|
293
|
+
nokolexbor-md 416.074 (±11.1%) i/s - 12.341k in 30.111613s
|
294
|
+
selma-md 361.471 (± 4.7%) i/s - 10.830k in 30.033997s
|
295
|
+
|
296
|
+
Comparison:
|
297
|
+
nokolexbor-md: 416.1 i/s
|
298
|
+
selma-md: 361.5 i/s - same-ish: difference falls within error
|
299
|
+
nokogiri-md: 85.0 i/s - 4.89x slower
|
300
|
+
|
301
|
+
input size = 7172510 bytes, 7.17 MB
|
302
|
+
|
303
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
203
304
|
Warming up --------------------------------------
|
204
|
-
|
205
|
-
|
206
|
-
selma-
|
305
|
+
nokogiri-lg 1.000 i/100ms
|
306
|
+
nokolexbor-lg 1.000 i/100ms
|
307
|
+
selma-lg 1.000 i/100ms
|
207
308
|
Calculating -------------------------------------
|
208
|
-
|
209
|
-
|
210
|
-
selma-
|
309
|
+
nokogiri-lg 0.805 (± 0.0%) i/s - 25.000 in 31.148730s
|
310
|
+
nokolexbor-lg 2.194 (± 0.0%) i/s - 66.000 in 30.278108s
|
311
|
+
selma-lg 5.541 (± 0.0%) i/s - 166.000 in 30.037197s
|
312
|
+
|
313
|
+
Comparison:
|
314
|
+
selma-lg: 5.5 i/s
|
315
|
+
nokolexbor-lg: 2.2 i/s - 2.53x slower
|
316
|
+
nokogiri-lg: 0.8 i/s - 6.88x slower
|
317
|
+
|
211
318
|
</pre>
|
212
319
|
</details>
|
320
|
+
<!-- prettier-ignore-end -->
|
213
321
|
|
214
322
|
## Contributing
|
215
323
|
|
data/ext/selma/Cargo.toml
CHANGED
@@ -6,10 +6,13 @@ rust-version = "1.75.0"
|
|
6
6
|
publish = false
|
7
7
|
|
8
8
|
[dependencies]
|
9
|
-
enum-iterator = "1
|
9
|
+
enum-iterator = "2.1"
|
10
10
|
escapist = "0.0.2"
|
11
|
-
magnus = "0.
|
12
|
-
|
11
|
+
magnus = { version = "0.7", features = ["rb-sys"] }
|
12
|
+
rb-sys = { version = "*", default-features = false, features = [
|
13
|
+
"stable-api-compiled-fallback",
|
14
|
+
] }
|
15
|
+
lol_html = "2.0"
|
13
16
|
|
14
17
|
[lib]
|
15
18
|
name = "selma"
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::Element;
|
3
5
|
use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Value};
|
@@ -8,16 +10,14 @@ struct HTMLElement {
|
|
8
10
|
}
|
9
11
|
|
10
12
|
#[magnus::wrap(class = "Selma::HTML::Element")]
|
11
|
-
pub struct SelmaHTMLElement(
|
13
|
+
pub struct SelmaHTMLElement(RefCell<HTMLElement>);
|
12
14
|
|
13
15
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
14
16
|
unsafe impl Send for SelmaHTMLElement {}
|
15
17
|
|
16
18
|
impl SelmaHTMLElement {
|
17
|
-
pub fn new(
|
18
|
-
|
19
|
-
|
20
|
-
Self(std::cell::RefCell::new(HTMLElement {
|
19
|
+
pub fn new(ref_wrap: NativeRefWrap<Element<'static, 'static>>, ancestors: &[String]) -> Self {
|
20
|
+
Self(RefCell::new(HTMLElement {
|
21
21
|
element: ref_wrap,
|
22
22
|
ancestors: ancestors.to_owned(),
|
23
23
|
}))
|
@@ -26,13 +26,12 @@ impl SelmaHTMLElement {
|
|
26
26
|
fn tag_name(&self) -> Result<String, Error> {
|
27
27
|
let binding = self.0.borrow();
|
28
28
|
|
29
|
-
|
30
|
-
Ok(e.tag_name())
|
31
|
-
|
32
|
-
Err(Error::new(
|
29
|
+
match binding.element.get() {
|
30
|
+
Ok(e) => Ok(e.tag_name().to_string()),
|
31
|
+
Err(_) => Err(Error::new(
|
33
32
|
exception::runtime_error(),
|
34
33
|
"`tag_name` is not available",
|
35
|
-
))
|
34
|
+
)),
|
36
35
|
}
|
37
36
|
}
|
38
37
|
|
@@ -119,11 +118,13 @@ impl SelmaHTMLElement {
|
|
119
118
|
.iter()
|
120
119
|
.for_each(|attr| match hash.aset(attr.name(), attr.value()) {
|
121
120
|
Ok(_) => {}
|
122
|
-
Err(err) =>
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
121
|
+
Err(err) => panic!(
|
122
|
+
"{:?}",
|
123
|
+
Error::new(
|
124
|
+
exception::runtime_error(),
|
125
|
+
format!("AttributeNameError: {err:?}"),
|
126
|
+
)
|
127
|
+
),
|
127
128
|
});
|
128
129
|
}
|
129
130
|
Ok(hash)
|
@@ -139,7 +140,10 @@ impl SelmaHTMLElement {
|
|
139
140
|
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
140
141
|
Ok(_) => {}
|
141
142
|
Err(err) => {
|
142
|
-
|
143
|
+
panic!(
|
144
|
+
"{:?}",
|
145
|
+
Error::new(exception::runtime_error(), format!("{err:?}"))
|
146
|
+
)
|
143
147
|
}
|
144
148
|
});
|
145
149
|
|
@@ -224,24 +228,25 @@ impl SelmaHTMLElement {
|
|
224
228
|
}
|
225
229
|
}
|
226
230
|
|
227
|
-
fn remove_and_keep_content(&self) {
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
231
|
+
fn remove_and_keep_content(&self) -> Result<(), Error> {
|
232
|
+
self.0
|
233
|
+
.borrow_mut()
|
234
|
+
.element
|
235
|
+
.get_mut()
|
236
|
+
.unwrap()
|
237
|
+
.remove_and_keep_content();
|
238
|
+
Ok(())
|
233
239
|
}
|
234
240
|
|
235
241
|
fn is_removed(&self) -> Result<bool, Error> {
|
236
242
|
let binding = self.0.borrow();
|
237
243
|
|
238
|
-
|
239
|
-
Ok(e.removed())
|
240
|
-
|
241
|
-
Err(Error::new(
|
244
|
+
match binding.element.get() {
|
245
|
+
Ok(e) => Ok(e.removed()),
|
246
|
+
Err(_) => Err(Error::new(
|
242
247
|
exception::runtime_error(),
|
243
248
|
"`is_removed` is not available",
|
244
|
-
))
|
249
|
+
)),
|
245
250
|
}
|
246
251
|
}
|
247
252
|
}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::EndTag;
|
3
5
|
use magnus::{method, Error, Module, RClass};
|
@@ -7,16 +9,14 @@ struct HTMLEndTag {
|
|
7
9
|
}
|
8
10
|
|
9
11
|
#[magnus::wrap(class = "Selma::HTML::EndTag")]
|
10
|
-
pub struct SelmaHTMLEndTag(
|
12
|
+
pub struct SelmaHTMLEndTag(RefCell<HTMLEndTag>);
|
11
13
|
|
12
14
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
15
|
unsafe impl Send for SelmaHTMLEndTag {}
|
14
16
|
|
15
17
|
impl SelmaHTMLEndTag {
|
16
|
-
pub fn new(
|
17
|
-
|
18
|
-
|
19
|
-
Self(std::cell::RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
18
|
+
pub fn new(ref_wrap: NativeRefWrap<EndTag<'static>>) -> Self {
|
19
|
+
Self(RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
20
20
|
}
|
21
21
|
|
22
22
|
fn tag_name(&self) -> String {
|
@@ -1,23 +1,44 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
|
1
3
|
use crate::native_ref_wrap::NativeRefWrap;
|
2
4
|
use lol_html::html_content::{TextChunk, TextType};
|
3
5
|
use magnus::{exception, method, Error, Module, RClass, Symbol, Value};
|
4
6
|
|
5
7
|
struct HTMLTextChunk {
|
6
8
|
text_chunk: NativeRefWrap<TextChunk<'static>>,
|
9
|
+
buffer: String,
|
10
|
+
}
|
11
|
+
|
12
|
+
macro_rules! clone_buffer_if_not_empty {
|
13
|
+
($binding:expr, $buffer:expr) => {
|
14
|
+
if !$binding.buffer.is_empty() {
|
15
|
+
$buffer.clone_from(&$binding.buffer);
|
16
|
+
}
|
17
|
+
};
|
18
|
+
}
|
19
|
+
|
20
|
+
// if this is the first time we're processing this text chunk (buffer is empty),
|
21
|
+
// we carry on. Otherwise, we need to use the buffer text, not the text chunk,
|
22
|
+
// because lol-html is not designed in such a way to keep track of text chunks.
|
23
|
+
macro_rules! set_text_chunk_to_buffer {
|
24
|
+
($text_chunk:expr, $buffer:expr) => {
|
25
|
+
if !$buffer.is_empty() {
|
26
|
+
$text_chunk.set_str($buffer);
|
27
|
+
}
|
28
|
+
};
|
7
29
|
}
|
8
30
|
|
9
31
|
#[magnus::wrap(class = "Selma::HTML::TextChunk")]
|
10
|
-
pub struct SelmaHTMLTextChunk(
|
32
|
+
pub struct SelmaHTMLTextChunk(RefCell<HTMLTextChunk>);
|
11
33
|
|
12
34
|
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
35
|
unsafe impl Send for SelmaHTMLTextChunk {}
|
14
36
|
|
15
37
|
impl SelmaHTMLTextChunk {
|
16
|
-
pub fn new(
|
17
|
-
|
18
|
-
|
19
|
-
Self(std::cell::RefCell::new(HTMLTextChunk {
|
38
|
+
pub fn new(ref_wrap: NativeRefWrap<TextChunk<'static>>) -> Self {
|
39
|
+
Self(RefCell::new(HTMLTextChunk {
|
20
40
|
text_chunk: ref_wrap,
|
41
|
+
buffer: String::new(),
|
21
42
|
}))
|
22
43
|
}
|
23
44
|
|
@@ -54,7 +75,19 @@ impl SelmaHTMLTextChunk {
|
|
54
75
|
}
|
55
76
|
}
|
56
77
|
|
57
|
-
fn
|
78
|
+
fn is_removed(&self) -> Result<bool, Error> {
|
79
|
+
let binding = self.0.borrow();
|
80
|
+
|
81
|
+
match binding.text_chunk.get() {
|
82
|
+
Ok(tc) => Ok(tc.removed()),
|
83
|
+
Err(_) => Err(Error::new(
|
84
|
+
exception::runtime_error(),
|
85
|
+
"`is_removed` is not available",
|
86
|
+
)),
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
fn before(&self, args: &[Value]) -> Result<String, Error> {
|
58
91
|
let mut binding = self.0.borrow_mut();
|
59
92
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
60
93
|
|
@@ -65,10 +98,10 @@ impl SelmaHTMLTextChunk {
|
|
65
98
|
|
66
99
|
text_chunk.before(&text_str, content_type);
|
67
100
|
|
68
|
-
Ok(())
|
101
|
+
Ok(text_chunk.as_str().to_string())
|
69
102
|
}
|
70
103
|
|
71
|
-
fn after(&self, args: &[Value]) -> Result<
|
104
|
+
fn after(&self, args: &[Value]) -> Result<String, Error> {
|
72
105
|
let mut binding = self.0.borrow_mut();
|
73
106
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
74
107
|
|
@@ -79,21 +112,30 @@ impl SelmaHTMLTextChunk {
|
|
79
112
|
|
80
113
|
text_chunk.after(&text_str, content_type);
|
81
114
|
|
82
|
-
Ok(())
|
115
|
+
Ok(text_chunk.as_str().to_string())
|
83
116
|
}
|
84
117
|
|
85
|
-
fn replace(&self, args: &[Value]) -> Result<
|
118
|
+
fn replace(&self, args: &[Value]) -> Result<String, Error> {
|
86
119
|
let mut binding = self.0.borrow_mut();
|
120
|
+
let mut buffer = String::new();
|
121
|
+
|
122
|
+
clone_buffer_if_not_empty!(binding, buffer);
|
123
|
+
|
87
124
|
let text_chunk = binding.text_chunk.get_mut().unwrap();
|
88
125
|
|
126
|
+
set_text_chunk_to_buffer!(text_chunk, buffer);
|
127
|
+
|
89
128
|
let (text_str, content_type) = match crate::scan_text_args(args) {
|
90
129
|
Ok((text_str, content_type)) => (text_str, content_type),
|
91
130
|
Err(err) => return Err(err),
|
92
131
|
};
|
93
|
-
|
94
132
|
text_chunk.replace(&text_str, content_type);
|
95
133
|
|
96
|
-
|
134
|
+
text_chunk.set_str(text_str.clone());
|
135
|
+
|
136
|
+
binding.buffer = text_chunk.as_str().to_string();
|
137
|
+
|
138
|
+
Ok(text_str)
|
97
139
|
}
|
98
140
|
}
|
99
141
|
|
@@ -108,6 +150,7 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
108
150
|
c_text_chunk.define_method("before", method!(SelmaHTMLTextChunk::before, -1))?;
|
109
151
|
c_text_chunk.define_method("after", method!(SelmaHTMLTextChunk::after, -1))?;
|
110
152
|
c_text_chunk.define_method("replace", method!(SelmaHTMLTextChunk::replace, -1))?;
|
153
|
+
c_text_chunk.define_method("removed?", method!(SelmaHTMLTextChunk::is_removed, 0))?;
|
111
154
|
|
112
155
|
Ok(())
|
113
156
|
}
|
@@ -1,15 +1,18 @@
|
|
1
|
-
use std::{
|
1
|
+
use std::{
|
2
|
+
marker::PhantomData,
|
3
|
+
sync::{Arc, Mutex},
|
4
|
+
};
|
2
5
|
|
3
|
-
// NOTE:
|
4
|
-
//
|
6
|
+
// NOTE: this was inspired from
|
7
|
+
// https://github.com/worker-tools/html-rewriter-wasm/blob/92bafdfa34c809c37036f57cb282184cada3bbc9/src/handlers.rs
|
5
8
|
|
6
9
|
pub struct Anchor<'r> {
|
7
|
-
poisoned:
|
10
|
+
poisoned: Arc<Mutex<bool>>,
|
8
11
|
lifetime: PhantomData<&'r mut ()>,
|
9
12
|
}
|
10
13
|
|
11
14
|
impl<'r> Anchor<'r> {
|
12
|
-
pub fn new(poisoned:
|
15
|
+
pub fn new(poisoned: Arc<Mutex<bool>>) -> Self {
|
13
16
|
Anchor {
|
14
17
|
poisoned,
|
15
18
|
lifetime: PhantomData,
|
@@ -17,44 +20,46 @@ impl<'r> Anchor<'r> {
|
|
17
20
|
}
|
18
21
|
}
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
impl Drop for Anchor<'_> {
|
24
|
+
fn drop(&mut self) {
|
25
|
+
*self.poisoned.lock().unwrap() = true;
|
26
|
+
}
|
27
|
+
}
|
25
28
|
|
26
|
-
// NOTE:
|
27
|
-
// we create a wrapper that erases all the lifetime information from the inner reference
|
29
|
+
// NOTE: So far as I understand it, there's no great way to work between lol_html's lifetimes and FFI.
|
30
|
+
// To work around that, we create a wrapper that erases all the lifetime information from the inner reference
|
28
31
|
// and provides an anchor object that keeps track of the lifetime in the runtime.
|
29
32
|
//
|
30
33
|
// When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner
|
31
34
|
// object results in exception.
|
35
|
+
#[derive(Clone)]
|
32
36
|
pub struct NativeRefWrap<R> {
|
33
37
|
inner_ptr: *mut R,
|
34
|
-
poisoned:
|
38
|
+
poisoned: Arc<Mutex<bool>>,
|
35
39
|
}
|
36
40
|
|
37
41
|
impl<R> NativeRefWrap<R> {
|
38
|
-
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
42
|
+
pub fn wrap<I>(inner: &mut I) -> (Self, Anchor) {
|
39
43
|
let wrap = NativeRefWrap {
|
40
|
-
inner_ptr: inner as *
|
41
|
-
poisoned:
|
44
|
+
inner_ptr: inner as *mut I as *mut R,
|
45
|
+
poisoned: Arc::new(Mutex::new(false)),
|
42
46
|
};
|
43
47
|
|
44
|
-
let anchor = Anchor::new(
|
48
|
+
let anchor = Anchor::new(Arc::clone(&wrap.poisoned));
|
45
49
|
|
46
50
|
(wrap, anchor)
|
47
51
|
}
|
48
52
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
54
|
+
if self.is_poisoned() {
|
55
|
+
Err("The object has been freed and can't be used anymore.")
|
56
|
+
} else {
|
57
|
+
Ok(())
|
58
|
+
}
|
59
|
+
}
|
56
60
|
|
57
|
-
|
61
|
+
pub fn is_poisoned(&self) -> bool {
|
62
|
+
*self.poisoned.lock().unwrap()
|
58
63
|
}
|
59
64
|
|
60
65
|
pub fn get(&self) -> Result<&R, &'static str> {
|
@@ -68,12 +73,4 @@ impl<R> NativeRefWrap<R> {
|
|
68
73
|
|
69
74
|
Ok(unsafe { self.inner_ptr.as_mut() }.unwrap())
|
70
75
|
}
|
71
|
-
|
72
|
-
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
73
|
-
if self.poisoned.get() {
|
74
|
-
Err("The object has been freed and can't be used anymore.")
|
75
|
-
} else {
|
76
|
-
Ok(())
|
77
|
-
}
|
78
|
-
}
|
79
76
|
}
|