selma 0.4.11 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +164 -348
- data/README.md +3 -3
- data/ext/selma/Cargo.toml +3 -3
- data/ext/selma/src/html/element.rs +23 -15
- data/ext/selma/src/html/end_tag.rs +3 -2
- data/ext/selma/src/html/text_chunk.rs +13 -11
- data/ext/selma/src/html.rs +3 -6
- data/ext/selma/src/lib.rs +7 -4
- data/ext/selma/src/native_ref_wrap.rs +1 -1
- data/ext/selma/src/rewriter.rs +42 -40
- data/ext/selma/src/sanitizer.rs +20 -20
- data/ext/selma/src/selector.rs +9 -9
- data/lib/selma/version.rb +1 -1
- metadata +9 -6
data/ext/selma/src/sanitizer.rs
CHANGED
|
@@ -5,7 +5,7 @@ use lol_html::{
|
|
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
|
6
6
|
};
|
|
7
7
|
use magnus::{
|
|
8
|
-
|
|
8
|
+
eval, function, method,
|
|
9
9
|
r_hash::ForEach,
|
|
10
10
|
scan_args,
|
|
11
11
|
value::{Opaque, ReprValue},
|
|
@@ -125,22 +125,22 @@ impl SelmaSanitizer {
|
|
|
125
125
|
allowed_protocols.foreach(|element_name: String, protocols: RHash| {
|
|
126
126
|
protocols.foreach(|attribute_name: String, protocol_list: Value| {
|
|
127
127
|
let protocols: RArray;
|
|
128
|
-
if protocol_list.is_kind_of(
|
|
128
|
+
if protocol_list.is_kind_of(ruby.class_array()) {
|
|
129
129
|
protocols = RArray::from_value(protocol_list).unwrap();
|
|
130
130
|
if protocols.includes(ruby.to_symbol("all")) {
|
|
131
131
|
return Err(magnus::Error::new(
|
|
132
|
-
|
|
132
|
+
ruby.exception_arg_error(),
|
|
133
133
|
"`:all` must be passed outside of an array".to_string(),
|
|
134
134
|
));
|
|
135
135
|
}
|
|
136
|
-
} else if protocol_list.is_kind_of(
|
|
136
|
+
} else if protocol_list.is_kind_of(ruby.class_symbol())
|
|
137
137
|
&& Symbol::from_value(protocol_list) == eval(":all").unwrap()
|
|
138
138
|
{
|
|
139
|
-
protocols =
|
|
139
|
+
protocols = ruby.ary_new();
|
|
140
140
|
protocols.push(ruby.to_symbol("all"))?;
|
|
141
141
|
} else {
|
|
142
142
|
return Err(magnus::Error::new(
|
|
143
|
-
|
|
143
|
+
ruby.exception_arg_error(),
|
|
144
144
|
"Protocol list must be an array, or just `:all`".to_string(),
|
|
145
145
|
));
|
|
146
146
|
}
|
|
@@ -220,15 +220,15 @@ impl SelmaSanitizer {
|
|
|
220
220
|
// end
|
|
221
221
|
// end
|
|
222
222
|
if let Some(remove_contents) = config.get(ruby.to_symbol("remove_contents")) {
|
|
223
|
-
if remove_contents.is_kind_of(
|
|
224
|
-
|| remove_contents.is_kind_of(
|
|
223
|
+
if remove_contents.is_kind_of(ruby.class_true_class())
|
|
224
|
+
|| remove_contents.is_kind_of(ruby.class_false_class())
|
|
225
225
|
{
|
|
226
226
|
Self::set_all_flags(
|
|
227
227
|
flags,
|
|
228
228
|
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
|
229
229
|
remove_contents.to_bool(),
|
|
230
230
|
);
|
|
231
|
-
} else if remove_contents.is_kind_of(
|
|
231
|
+
} else if remove_contents.is_kind_of(ruby.class_array()) {
|
|
232
232
|
let elements = RArray::from_value(remove_contents).unwrap();
|
|
233
233
|
elements
|
|
234
234
|
.into_iter()
|
|
@@ -245,7 +245,7 @@ impl SelmaSanitizer {
|
|
|
245
245
|
});
|
|
246
246
|
} else {
|
|
247
247
|
return Err(magnus::Error::new(
|
|
248
|
-
|
|
248
|
+
ruby.exception_arg_error(),
|
|
249
249
|
"remove_contents must be `true`, `false`, or an array".to_string(),
|
|
250
250
|
));
|
|
251
251
|
}
|
|
@@ -354,11 +354,12 @@ impl SelmaSanitizer {
|
|
|
354
354
|
attr_name: String,
|
|
355
355
|
allow_list: RArray,
|
|
356
356
|
) {
|
|
357
|
+
let ruby = Ruby::get().unwrap();
|
|
357
358
|
let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
|
|
358
359
|
|
|
359
360
|
for allowed_protocol in allow_list.into_iter() {
|
|
360
361
|
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
|
361
|
-
if allowed_protocol.is_kind_of(
|
|
362
|
+
if allowed_protocol.is_kind_of(ruby.class_string()) {
|
|
362
363
|
match protocol_list {
|
|
363
364
|
None => {
|
|
364
365
|
protocol_sanitizers
|
|
@@ -366,7 +367,7 @@ impl SelmaSanitizer {
|
|
|
366
367
|
}
|
|
367
368
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
|
368
369
|
}
|
|
369
|
-
} else if allowed_protocol.is_kind_of(
|
|
370
|
+
} else if allowed_protocol.is_kind_of(ruby.class_symbol()) {
|
|
370
371
|
let protocol_config = allowed_protocol.inspect();
|
|
371
372
|
if protocol_config == ":relative" {
|
|
372
373
|
match protocol_list {
|
|
@@ -685,13 +686,11 @@ impl SelmaSanitizer {
|
|
|
685
686
|
|
|
686
687
|
fn check_if_end_tag_needs_removal(element: &mut Element) {
|
|
687
688
|
if element.removed() && !crate::tags::Tag::tag_from_element(element).self_closing {
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
Ok(())
|
|
694
|
-
}));
|
|
689
|
+
// ignore void elements (lol_html's void list may differ from selma's `self_closing`)
|
|
690
|
+
let _ = element.on_end_tag(Box::new(move |end| {
|
|
691
|
+
Self::remove_end_tag(end);
|
|
692
|
+
Ok(())
|
|
693
|
+
}));
|
|
695
694
|
}
|
|
696
695
|
}
|
|
697
696
|
|
|
@@ -710,8 +709,9 @@ impl SelmaSanitizer {
|
|
|
710
709
|
}
|
|
711
710
|
|
|
712
711
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
|
712
|
+
let ruby = Ruby::get().unwrap();
|
|
713
713
|
let c_sanitizer = m_selma
|
|
714
|
-
.define_class("Sanitizer",
|
|
714
|
+
.define_class("Sanitizer", ruby.class_object())
|
|
715
715
|
.expect("cannot define class Selma::Sanitizer");
|
|
716
716
|
|
|
717
717
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
data/ext/selma/src/selector.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{
|
|
1
|
+
use magnus::{function, scan_args, Error, Module, Object, RModule, Ruby, Value};
|
|
2
2
|
|
|
3
3
|
#[derive(Clone, Debug)]
|
|
4
4
|
#[magnus::wrap(class = "Selma::Selector")]
|
|
@@ -14,31 +14,30 @@ impl SelmaSelector {
|
|
|
14
14
|
fn new(args: &[Value]) -> Result<Self, Error> {
|
|
15
15
|
let (match_element, match_text_within, rb_ignore_text_within) =
|
|
16
16
|
Self::scan_parse_args(args)?;
|
|
17
|
+
let ruby = Ruby::get().unwrap();
|
|
17
18
|
|
|
18
19
|
if match_element.is_none() && match_text_within.is_none() {
|
|
19
20
|
return Err(Error::new(
|
|
20
|
-
|
|
21
|
+
ruby.exception_arg_error(),
|
|
21
22
|
"Neither `match_element` nor `match_text_within` option given",
|
|
22
23
|
));
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
// FIXME: not excited about this double parse work (`element!` does it too),
|
|
26
27
|
// but at least we can bail ASAP if the CSS is invalid
|
|
27
|
-
if
|
|
28
|
-
let css = match_element.as_ref().unwrap();
|
|
28
|
+
if let Some(css) = &match_element {
|
|
29
29
|
if css.parse::<lol_html::Selector>().is_err() {
|
|
30
30
|
return Err(Error::new(
|
|
31
|
-
|
|
31
|
+
ruby.exception_arg_error(),
|
|
32
32
|
format!("Could not parse `match_element` (`{css:?}`) as valid CSS"),
|
|
33
33
|
));
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
if
|
|
38
|
-
let css = match_text_within.as_ref().unwrap();
|
|
37
|
+
if let Some(css) = &match_text_within {
|
|
39
38
|
if css.parse::<lol_html::Selector>().is_err() {
|
|
40
39
|
return Err(Error::new(
|
|
41
|
-
|
|
40
|
+
ruby.exception_arg_error(),
|
|
42
41
|
format!("Could not parse `match_text_within` (`{css:?}`) as valid CSS",),
|
|
43
42
|
));
|
|
44
43
|
}
|
|
@@ -102,8 +101,9 @@ impl SelmaSelector {
|
|
|
102
101
|
}
|
|
103
102
|
|
|
104
103
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
|
104
|
+
let ruby = Ruby::get().unwrap();
|
|
105
105
|
let c_selector = m_selma
|
|
106
|
-
.define_class("Selector",
|
|
106
|
+
.define_class("Selector", ruby.class_object())
|
|
107
107
|
.expect("cannot define class Selma::Selector");
|
|
108
108
|
|
|
109
109
|
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: selma
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Garen J. Torikian
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: rb_sys
|
|
@@ -100,16 +100,19 @@ require_paths:
|
|
|
100
100
|
- lib
|
|
101
101
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
102
102
|
requirements:
|
|
103
|
-
- - "
|
|
103
|
+
- - ">="
|
|
104
104
|
- !ruby/object:Gem::Version
|
|
105
|
-
version: '3.
|
|
105
|
+
version: '3.2'
|
|
106
|
+
- - "<"
|
|
107
|
+
- !ruby/object:Gem::Version
|
|
108
|
+
version: '5'
|
|
106
109
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
110
|
requirements:
|
|
108
|
-
- - "
|
|
111
|
+
- - ">="
|
|
109
112
|
- !ruby/object:Gem::Version
|
|
110
113
|
version: '3.4'
|
|
111
114
|
requirements: []
|
|
112
|
-
rubygems_version:
|
|
115
|
+
rubygems_version: 4.0.6
|
|
113
116
|
specification_version: 4
|
|
114
117
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
|
115
118
|
parser.
|