selma 0.2.2 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +129 -124
- data/README.md +133 -25
- data/ext/selma/Cargo.toml +6 -3
- data/ext/selma/src/html/element.rs +32 -27
- data/ext/selma/src/html/end_tag.rs +5 -5
- data/ext/selma/src/html/text_chunk.rs +55 -12
- data/ext/selma/src/native_ref_wrap.rs +30 -33
- data/ext/selma/src/rewriter.rs +299 -139
- data/ext/selma/src/sanitizer.rs +256 -138
- data/lib/selma/config.rb +12 -0
- data/lib/selma/html/element.rb +11 -0
- data/lib/selma/html.rb +2 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer/config.rb +2 -2
- data/lib/selma/sanitizer.rb +0 -77
- data/lib/selma/version.rb +1 -1
- metadata +9 -7
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -5,9 +5,11 @@ use lol_html::{
|
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
6
|
};
|
7
7
|
use magnus::{
|
8
|
-
class, function, method,
|
8
|
+
class, eval, exception, function, method,
|
9
|
+
r_hash::ForEach,
|
10
|
+
scan_args,
|
9
11
|
value::{Opaque, ReprValue},
|
10
|
-
Module, Object, RArray, RHash, RModule, Ruby, Value,
|
12
|
+
Module, Object, RArray, RHash, RModule, RString, Ruby, Symbol, Value,
|
11
13
|
};
|
12
14
|
|
13
15
|
#[derive(Clone, Debug, Default)]
|
@@ -45,34 +47,234 @@ impl SelmaSanitizer {
|
|
45
47
|
let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
|
46
48
|
let (opt_config,): (Option<RHash>,) = args.optional;
|
47
49
|
|
50
|
+
let ruby = Ruby::get().unwrap();
|
51
|
+
|
48
52
|
let config = match opt_config {
|
49
53
|
Some(config) => config,
|
50
54
|
// TODO: this seems like a hack to fix?
|
51
55
|
None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
|
52
56
|
};
|
53
57
|
|
58
|
+
let mut flags = [0; crate::tags::Tag::TAG_COUNT];
|
59
|
+
let mut sanitizer_allowed_attrs = vec![];
|
60
|
+
let sanitizer_allowed_classes = vec![];
|
61
|
+
match Self::setup_config(&mut flags, config) {
|
62
|
+
Ok(_) => {}
|
63
|
+
Err(e) => {
|
64
|
+
return Err(e);
|
65
|
+
}
|
66
|
+
};
|
67
|
+
|
54
68
|
let mut element_sanitizers = HashMap::new();
|
69
|
+
|
70
|
+
// TODO: set up default tags; do we need this?
|
55
71
|
crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
|
56
|
-
let
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
);
|
72
|
+
let element_name = crate::tags::Tag::element_name_from_enum(html_tag).to_string();
|
73
|
+
|
74
|
+
let element_sanitizer = ElementSanitizer::default();
|
75
|
+
element_sanitizers.insert(element_name, element_sanitizer);
|
61
76
|
});
|
62
77
|
|
78
|
+
// def allow_attribute(element, attrs)
|
79
|
+
// attrs.flatten.each { |attr| set_allowed_attribute(element, attr, true) }
|
80
|
+
// end
|
81
|
+
if let Some(value) = config.get(ruby.to_symbol("attributes")) {
|
82
|
+
if let Some(allowed_attributes) = RHash::from_value(value) {
|
83
|
+
allowed_attributes.foreach(|element_value: Value, attributes: RArray| {
|
84
|
+
attributes.into_iter().for_each(|attr: Value| {
|
85
|
+
match RString::from_value(attr) {
|
86
|
+
None => {}
|
87
|
+
Some(attribute_name) => {
|
88
|
+
let attr_name = attribute_name.to_string().unwrap();
|
89
|
+
let element = match element_value.to_r_string() {
|
90
|
+
Err(_) => "".to_string(),
|
91
|
+
Ok(element_name) => element_name.to_string().unwrap(),
|
92
|
+
};
|
93
|
+
if element == "all" {
|
94
|
+
Self::set_allowed(
|
95
|
+
&mut sanitizer_allowed_attrs,
|
96
|
+
&attr_name,
|
97
|
+
true,
|
98
|
+
);
|
99
|
+
} else {
|
100
|
+
let element_sanitizer = Self::get_element_sanitizer(
|
101
|
+
&mut element_sanitizers,
|
102
|
+
&element,
|
103
|
+
);
|
104
|
+
element_sanitizer.allowed_attrs.push(attr_name);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
});
|
109
|
+
|
110
|
+
Ok(ForEach::Continue)
|
111
|
+
})?;
|
112
|
+
}
|
113
|
+
};
|
114
|
+
|
115
|
+
// def allow_protocol(element, attr, protos)
|
116
|
+
// if protos.is_a?(Array)
|
117
|
+
// raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
118
|
+
// else
|
119
|
+
// protos = [protos]
|
120
|
+
// end
|
121
|
+
// set_allowed_protocols(element, attr, protos)
|
122
|
+
// end
|
123
|
+
if let Some(value) = config.get(ruby.to_symbol("protocols")) {
|
124
|
+
if let Some(allowed_protocols) = RHash::from_value(value) {
|
125
|
+
allowed_protocols.foreach(|element_name: String, protocols: RHash| {
|
126
|
+
protocols.foreach(|attribute_name: String, protocol_list: Value| {
|
127
|
+
let protocols: RArray;
|
128
|
+
if protocol_list.is_kind_of(class::array()) {
|
129
|
+
protocols = RArray::from_value(protocol_list).unwrap();
|
130
|
+
if protocols.includes(ruby.to_symbol("all")) {
|
131
|
+
return Err(magnus::Error::new(
|
132
|
+
exception::arg_error(),
|
133
|
+
"`:all` must be passed outside of an array".to_string(),
|
134
|
+
));
|
135
|
+
}
|
136
|
+
} else if protocol_list.is_kind_of(class::symbol())
|
137
|
+
&& Symbol::from_value(protocol_list) == eval(":all").unwrap()
|
138
|
+
{
|
139
|
+
protocols = RArray::new();
|
140
|
+
protocols.push(ruby.to_symbol("all"))?;
|
141
|
+
} else {
|
142
|
+
return Err(magnus::Error::new(
|
143
|
+
exception::arg_error(),
|
144
|
+
"Protocol list must be an array, or just `:all`".to_string(),
|
145
|
+
));
|
146
|
+
}
|
147
|
+
|
148
|
+
let element_sanitizer =
|
149
|
+
Self::get_element_sanitizer(&mut element_sanitizers, &element_name);
|
150
|
+
|
151
|
+
Self::set_allowed_protocols(element_sanitizer, attribute_name, protocols);
|
152
|
+
Ok(ForEach::Continue)
|
153
|
+
})?;
|
154
|
+
|
155
|
+
Ok(ForEach::Continue)
|
156
|
+
})?;
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
let escape_tagfilter = match config.get(ruby.to_symbol("escape_tagfilter")) {
|
161
|
+
Some(value) => value.to_bool(),
|
162
|
+
None => true,
|
163
|
+
};
|
164
|
+
|
165
|
+
let allow_comments = match config.get(ruby.to_symbol("allow_comments")) {
|
166
|
+
Some(value) => value.to_bool(),
|
167
|
+
None => false,
|
168
|
+
};
|
169
|
+
|
170
|
+
let allow_doctype = match config.get(ruby.to_symbol("allow_doctype")) {
|
171
|
+
Some(value) => value.to_bool(),
|
172
|
+
None => true,
|
173
|
+
};
|
174
|
+
|
63
175
|
Ok(Self(std::cell::RefCell::new(Sanitizer {
|
64
|
-
flags
|
65
|
-
allowed_attrs:
|
66
|
-
allowed_classes:
|
176
|
+
flags,
|
177
|
+
allowed_attrs: sanitizer_allowed_attrs,
|
178
|
+
allowed_classes: sanitizer_allowed_classes,
|
67
179
|
element_sanitizers,
|
68
180
|
|
69
|
-
escape_tagfilter
|
70
|
-
allow_comments
|
71
|
-
allow_doctype
|
181
|
+
escape_tagfilter,
|
182
|
+
allow_comments,
|
183
|
+
allow_doctype,
|
72
184
|
config: config.into(),
|
73
185
|
})))
|
74
186
|
}
|
75
187
|
|
188
|
+
fn setup_config(
|
189
|
+
flags: &mut [u8; crate::tags::Tag::TAG_COUNT],
|
190
|
+
config: RHash,
|
191
|
+
) -> Result<(), magnus::Error> {
|
192
|
+
let ruby = Ruby::get().unwrap();
|
193
|
+
|
194
|
+
// def allow_element(elements)
|
195
|
+
// elements.flatten.each { |e| set_flag(e, ALLOW, true) }
|
196
|
+
// end
|
197
|
+
if let Some(value) = config.get(ruby.to_symbol("elements")) {
|
198
|
+
if let Some(elements) = RArray::from_value(value) {
|
199
|
+
elements
|
200
|
+
.into_iter()
|
201
|
+
.for_each(|element| match RString::from_value(element) {
|
202
|
+
None => {}
|
203
|
+
Some(element_name) => {
|
204
|
+
Self::set_flag(
|
205
|
+
element_name.to_string().unwrap(),
|
206
|
+
flags,
|
207
|
+
Self::SELMA_SANITIZER_ALLOW,
|
208
|
+
true,
|
209
|
+
);
|
210
|
+
}
|
211
|
+
});
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
// def remove_contents(elements)
|
216
|
+
// if elements.is_a?(TrueClass) || elements.is_a?(FalseClass)
|
217
|
+
// set_all_flags(REMOVE_CONTENTS, elements)
|
218
|
+
// else
|
219
|
+
// elements.flatten.each { |e| set_flag(e, REMOVE_CONTENTS, true) }
|
220
|
+
// end
|
221
|
+
// end
|
222
|
+
if let Some(remove_contents) = config.get(ruby.to_symbol("remove_contents")) {
|
223
|
+
if remove_contents.is_kind_of(class::true_class())
|
224
|
+
|| remove_contents.is_kind_of(class::false_class())
|
225
|
+
{
|
226
|
+
Self::set_all_flags(
|
227
|
+
flags,
|
228
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
229
|
+
remove_contents.to_bool(),
|
230
|
+
);
|
231
|
+
} else if remove_contents.is_kind_of(class::array()) {
|
232
|
+
let elements = RArray::from_value(remove_contents).unwrap();
|
233
|
+
elements
|
234
|
+
.into_iter()
|
235
|
+
.for_each(|element| match RString::from_value(element) {
|
236
|
+
None => {}
|
237
|
+
Some(element_name) => {
|
238
|
+
Self::set_flag(
|
239
|
+
element_name.to_string().unwrap(),
|
240
|
+
flags,
|
241
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
242
|
+
true,
|
243
|
+
);
|
244
|
+
}
|
245
|
+
});
|
246
|
+
} else {
|
247
|
+
return Err(magnus::Error::new(
|
248
|
+
exception::arg_error(),
|
249
|
+
"remove_contents must be `true`, `false`, or an array".to_string(),
|
250
|
+
));
|
251
|
+
}
|
252
|
+
}
|
253
|
+
|
254
|
+
// def wrap_with_whitespace(elements)
|
255
|
+
// elements.flatten.each { |e| set_flag(e, WRAP_WHITESPACE, true) }
|
256
|
+
// end
|
257
|
+
if let Some(value) = config.get(ruby.to_symbol("whitespace_elements")) {
|
258
|
+
if let Some(elements) = RArray::from_value(value) {
|
259
|
+
elements
|
260
|
+
.into_iter()
|
261
|
+
.for_each(|element| match RString::from_value(element) {
|
262
|
+
None => {}
|
263
|
+
Some(element_name) => {
|
264
|
+
Self::set_flag(
|
265
|
+
element_name.to_string().unwrap(),
|
266
|
+
flags,
|
267
|
+
Self::SELMA_SANITIZER_WRAP_WHITESPACE,
|
268
|
+
true,
|
269
|
+
);
|
270
|
+
}
|
271
|
+
});
|
272
|
+
}
|
273
|
+
};
|
274
|
+
|
275
|
+
Ok(())
|
276
|
+
}
|
277
|
+
|
76
278
|
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
77
279
|
let binding = self.0.borrow();
|
78
280
|
let ruby = Ruby::get().unwrap();
|
@@ -81,40 +283,39 @@ impl SelmaSanitizer {
|
|
81
283
|
}
|
82
284
|
|
83
285
|
/// Toggle a sanitizer option on or off.
|
84
|
-
fn set_flag(
|
286
|
+
fn set_flag(
|
287
|
+
tag_name: String,
|
288
|
+
flags: &mut [u8; crate::tags::Tag::TAG_COUNT],
|
289
|
+
flag: u8,
|
290
|
+
set: bool,
|
291
|
+
) {
|
85
292
|
let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
|
86
293
|
if set {
|
87
|
-
|
294
|
+
flags[tag.index] |= flag;
|
88
295
|
} else {
|
89
|
-
|
296
|
+
flags[tag.index] &= !flag;
|
90
297
|
}
|
91
298
|
}
|
92
299
|
|
93
300
|
/// Toggles all sanitization options on or off.
|
94
|
-
fn set_all_flags(&
|
301
|
+
fn set_all_flags(flags: &mut [u8; crate::tags::Tag::TAG_COUNT], flag: u8, set: bool) {
|
95
302
|
if set {
|
96
303
|
crate::tags::Tag::html_tags()
|
97
304
|
.iter()
|
98
305
|
.enumerate()
|
99
306
|
.for_each(|(iter, _)| {
|
100
|
-
|
307
|
+
flags[iter] |= flag;
|
101
308
|
});
|
102
309
|
} else {
|
103
310
|
crate::tags::Tag::html_tags()
|
104
311
|
.iter()
|
105
312
|
.enumerate()
|
106
313
|
.for_each(|(iter, _)| {
|
107
|
-
|
314
|
+
flags[iter] &= flag;
|
108
315
|
});
|
109
316
|
}
|
110
317
|
}
|
111
318
|
|
112
|
-
/// Whether or not to keep dangerous HTML tags.
|
113
|
-
fn set_escape_tagfilter(&self, allow: bool) -> bool {
|
114
|
-
self.0.borrow_mut().escape_tagfilter = allow;
|
115
|
-
allow
|
116
|
-
}
|
117
|
-
|
118
319
|
pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
|
119
320
|
if self.0.borrow().escape_tagfilter {
|
120
321
|
let tag = crate::tags::Tag::tag_from_element(e);
|
@@ -131,12 +332,6 @@ impl SelmaSanitizer {
|
|
131
332
|
self.0.borrow().escape_tagfilter
|
132
333
|
}
|
133
334
|
|
134
|
-
/// Whether or not to keep HTML comments.
|
135
|
-
fn set_allow_comments(&self, allow: bool) -> bool {
|
136
|
-
self.0.borrow_mut().allow_comments = allow;
|
137
|
-
allow
|
138
|
-
}
|
139
|
-
|
140
335
|
pub fn get_allow_comments(&self) -> bool {
|
141
336
|
self.0.borrow().allow_comments
|
142
337
|
}
|
@@ -145,12 +340,6 @@ impl SelmaSanitizer {
|
|
145
340
|
c.remove();
|
146
341
|
}
|
147
342
|
|
148
|
-
/// Whether or not to keep HTML doctype.
|
149
|
-
fn set_allow_doctype(&self, allow: bool) -> bool {
|
150
|
-
self.0.borrow_mut().allow_doctype = allow;
|
151
|
-
allow
|
152
|
-
}
|
153
|
-
|
154
343
|
/// Whether or not to keep HTML doctype.
|
155
344
|
pub fn get_allow_doctype(&self) -> bool {
|
156
345
|
self.0.borrow().allow_doctype
|
@@ -160,48 +349,14 @@ impl SelmaSanitizer {
|
|
160
349
|
d.remove();
|
161
350
|
}
|
162
351
|
|
163
|
-
fn
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
let allowed_attrs = &mut binding.allowed_attrs;
|
169
|
-
Self::set_allowed(allowed_attrs, &attr_name, allow);
|
170
|
-
} else {
|
171
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
172
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
173
|
-
|
174
|
-
element_sanitizer.allowed_attrs.push(attr_name);
|
175
|
-
}
|
176
|
-
|
177
|
-
allow
|
178
|
-
}
|
179
|
-
|
180
|
-
fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
|
181
|
-
let mut binding = self.0.borrow_mut();
|
182
|
-
if element_name == "all" {
|
183
|
-
let allowed_classes = &mut binding.allowed_classes;
|
184
|
-
Self::set_allowed(allowed_classes, &class_name, allow);
|
185
|
-
} else {
|
186
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
187
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
188
|
-
|
189
|
-
let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
|
190
|
-
Self::set_allowed(allowed_classes, &class_name, allow)
|
191
|
-
}
|
192
|
-
allow
|
193
|
-
}
|
194
|
-
|
195
|
-
fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
|
196
|
-
let mut binding = self.0.borrow_mut();
|
197
|
-
|
198
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
199
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
200
|
-
|
352
|
+
fn set_allowed_protocols(
|
353
|
+
element_sanitizer: &mut ElementSanitizer,
|
354
|
+
attr_name: String,
|
355
|
+
allow_list: RArray,
|
356
|
+
) {
|
201
357
|
let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
|
202
358
|
|
203
|
-
for
|
204
|
-
let allowed_protocol = opt_allowed_protocol.unwrap();
|
359
|
+
for allowed_protocol in allow_list.into_iter() {
|
205
360
|
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
206
361
|
if allowed_protocol.is_kind_of(class::string()) {
|
207
362
|
match protocol_list {
|
@@ -211,20 +366,23 @@ impl SelmaSanitizer {
|
|
211
366
|
}
|
212
367
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
213
368
|
}
|
214
|
-
} else if allowed_protocol.is_kind_of(class::symbol())
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
369
|
+
} else if allowed_protocol.is_kind_of(class::symbol()) {
|
370
|
+
let protocol_config = allowed_protocol.inspect();
|
371
|
+
if protocol_config == ":relative" {
|
372
|
+
match protocol_list {
|
373
|
+
None => {
|
374
|
+
protocol_sanitizers.insert(
|
375
|
+
attr_name.to_string(),
|
376
|
+
vec!["#".to_string(), "/".to_string()],
|
377
|
+
);
|
378
|
+
}
|
379
|
+
Some(protocol_list) => {
|
380
|
+
protocol_list.push("#".to_string());
|
381
|
+
protocol_list.push("/".to_string());
|
382
|
+
}
|
227
383
|
}
|
384
|
+
} else if protocol_config == ":all" {
|
385
|
+
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
|
228
386
|
}
|
229
387
|
}
|
230
388
|
}
|
@@ -335,7 +493,7 @@ impl SelmaSanitizer {
|
|
335
493
|
element: &mut Element,
|
336
494
|
element_sanitizer: &ElementSanitizer,
|
337
495
|
attr_name: &String,
|
338
|
-
attr_val: &
|
496
|
+
attr_val: &str,
|
339
497
|
) -> Result<bool, AttributeNameError> {
|
340
498
|
let mut allowed: bool = false;
|
341
499
|
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
@@ -387,7 +545,11 @@ impl SelmaSanitizer {
|
|
387
545
|
attr_val.contains("://")
|
388
546
|
}
|
389
547
|
|
390
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &
|
548
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
|
549
|
+
if protocols_allowed.contains(&"all".to_string()) {
|
550
|
+
return true;
|
551
|
+
}
|
552
|
+
|
391
553
|
// FIXME: is there a more idiomatic way to do this?
|
392
554
|
let mut pos: usize = 0;
|
393
555
|
let mut chars = attr_val.chars();
|
@@ -509,6 +671,7 @@ impl SelmaSanitizer {
|
|
509
671
|
element.after(" ", ContentType::Text);
|
510
672
|
}
|
511
673
|
}
|
674
|
+
|
512
675
|
element.remove_and_keep_content();
|
513
676
|
}
|
514
677
|
}
|
@@ -542,7 +705,7 @@ impl SelmaSanitizer {
|
|
542
705
|
) -> &'a mut ElementSanitizer {
|
543
706
|
element_sanitizers
|
544
707
|
.entry(element_name.to_string())
|
545
|
-
.
|
708
|
+
.or_default()
|
546
709
|
}
|
547
710
|
}
|
548
711
|
|
@@ -554,50 +717,5 @@ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
|
554
717
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
555
718
|
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
556
719
|
|
557
|
-
c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
|
558
|
-
c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
|
559
|
-
|
560
|
-
c_sanitizer.define_method(
|
561
|
-
"set_escape_tagfilter",
|
562
|
-
method!(SelmaSanitizer::set_escape_tagfilter, 1),
|
563
|
-
)?;
|
564
|
-
c_sanitizer.define_method(
|
565
|
-
"escape_tagfilter",
|
566
|
-
method!(SelmaSanitizer::get_escape_tagfilter, 0),
|
567
|
-
)?;
|
568
|
-
|
569
|
-
c_sanitizer.define_method(
|
570
|
-
"set_allow_comments",
|
571
|
-
method!(SelmaSanitizer::set_allow_comments, 1),
|
572
|
-
)?;
|
573
|
-
c_sanitizer.define_method(
|
574
|
-
"allow_comments",
|
575
|
-
method!(SelmaSanitizer::get_allow_comments, 0),
|
576
|
-
)?;
|
577
|
-
|
578
|
-
c_sanitizer.define_method(
|
579
|
-
"set_allow_doctype",
|
580
|
-
method!(SelmaSanitizer::set_allow_doctype, 1),
|
581
|
-
)?;
|
582
|
-
c_sanitizer.define_method(
|
583
|
-
"allow_doctype",
|
584
|
-
method!(SelmaSanitizer::get_allow_doctype, 0),
|
585
|
-
)?;
|
586
|
-
|
587
|
-
c_sanitizer.define_method(
|
588
|
-
"set_allowed_attribute",
|
589
|
-
method!(SelmaSanitizer::set_allowed_attribute, 3),
|
590
|
-
)?;
|
591
|
-
|
592
|
-
c_sanitizer.define_method(
|
593
|
-
"set_allowed_class",
|
594
|
-
method!(SelmaSanitizer::set_allowed_class, 3),
|
595
|
-
)?;
|
596
|
-
|
597
|
-
c_sanitizer.define_method(
|
598
|
-
"set_allowed_protocols",
|
599
|
-
method!(SelmaSanitizer::set_allowed_protocols, 3),
|
600
|
-
)?;
|
601
|
-
|
602
720
|
Ok(())
|
603
721
|
}
|
data/lib/selma/config.rb
ADDED
data/lib/selma/html.rb
CHANGED
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
raise ArgumentError, "other_config must be a Hash" unless other_config.is_a?(Hash)
|
29
29
|
|
30
30
|
merged = {}
|
31
|
-
keys = Set.new(config.keys + other_config.keys)
|
31
|
+
keys = Set.new(config.keys + other_config.keys).to_a
|
32
32
|
|
33
33
|
keys.each do |key|
|
34
34
|
oldval = config[key]
|
@@ -39,7 +39,7 @@ module Selma
|
|
39
39
|
merged[key] = if oldval.is_a?(Hash) && newval.is_a?(Hash)
|
40
40
|
oldval.empty? ? newval.dup : merge(oldval, newval)
|
41
41
|
elsif newval.is_a?(Array) && key != :transformers
|
42
|
-
Set.new(newval)
|
42
|
+
Set.new(newval).to_a
|
43
43
|
else
|
44
44
|
can_dupe?(newval) ? newval.dup : newval
|
45
45
|
end
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -4,82 +4,5 @@ require "selma/sanitizer/config"
|
|
4
4
|
|
5
5
|
module Selma
|
6
6
|
class Sanitizer
|
7
|
-
ALLOW = 1 << 0
|
8
|
-
ESCAPE_TAGFILTER = (1 << 1)
|
9
|
-
REMOVE_CONTENTS = (1 << 2)
|
10
|
-
WRAP_WHITESPACE = (1 << 3)
|
11
|
-
|
12
|
-
# initialize is in Rust, this just helps manage config setup in Ruby
|
13
|
-
# TODO: could this just become initialize?
|
14
|
-
def setup
|
15
|
-
allow_element(config[:elements] || [])
|
16
|
-
|
17
|
-
(config[:attributes] || {}).each do |element, attrs|
|
18
|
-
allow_attribute(element, attrs)
|
19
|
-
end
|
20
|
-
|
21
|
-
(config[:protocols] || {}).each do |element, protocols|
|
22
|
-
protocols.each do |attribute, pr|
|
23
|
-
allow_protocol(element, attribute, pr)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
remove_contents(config[:remove_contents]) if config.include?(:remove_contents)
|
28
|
-
|
29
|
-
wrap_with_whitespace(config[:whitespace_elements]) if config.include?(:whitespace_elements)
|
30
|
-
|
31
|
-
set_escape_tagfilter(config.fetch(:escape_tagfilter, true))
|
32
|
-
set_allow_comments(config.fetch(:allow_comments, false))
|
33
|
-
set_allow_doctype(config.fetch(:allow_doctype, true))
|
34
|
-
end
|
35
|
-
|
36
|
-
def elements
|
37
|
-
config[:elements]
|
38
|
-
end
|
39
|
-
|
40
|
-
def allow_element(elements)
|
41
|
-
elements.flatten.each { |e| set_flag(e, ALLOW, true) }
|
42
|
-
end
|
43
|
-
|
44
|
-
def disallow_element(elements)
|
45
|
-
elements.flatten.each { |e| set_flag(e, ALLOW, false) }
|
46
|
-
end
|
47
|
-
|
48
|
-
def allow_attribute(element, attrs)
|
49
|
-
attrs.flatten.each { |attr| set_allowed_attribute(element, attr, true) }
|
50
|
-
end
|
51
|
-
|
52
|
-
def require_any_attributes(element, attrs)
|
53
|
-
if attr.empty?
|
54
|
-
set_required_attribute(element, "*", true)
|
55
|
-
else
|
56
|
-
attrs.flatten.each { |attr| set_required_attribute(element, attr, true) }
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def disallow_attribute(element, attrs)
|
61
|
-
attrs.flatten.each { |attr| set_allowed_attribute(element, attr, false) }
|
62
|
-
end
|
63
|
-
|
64
|
-
def allow_class(element, *klass)
|
65
|
-
klass.flatten.each { |k| set_allowed_class(element, k, true) }
|
66
|
-
end
|
67
|
-
|
68
|
-
def allow_protocol(element, attr, protos)
|
69
|
-
protos = [protos] unless protos.is_a?(Array)
|
70
|
-
set_allowed_protocols(element, attr, protos)
|
71
|
-
end
|
72
|
-
|
73
|
-
def remove_contents(elements)
|
74
|
-
if elements.is_a?(TrueClass) || elements.is_a?(FalseClass)
|
75
|
-
set_all_flags(REMOVE_CONTENTS, elements)
|
76
|
-
else
|
77
|
-
elements.flatten.each { |e| set_flag(e, REMOVE_CONTENTS, true) }
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def wrap_with_whitespace(elements)
|
82
|
-
elements.flatten.each { |e| set_flag(e, WRAP_WHITESPACE, true) }
|
83
|
-
end
|
84
7
|
end
|
85
8
|
end
|
data/lib/selma/version.rb
CHANGED