selma 0.2.2 → 0.4.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +129 -124
- data/README.md +133 -25
- data/ext/selma/Cargo.toml +6 -3
- data/ext/selma/src/html/element.rs +32 -27
- data/ext/selma/src/html/end_tag.rs +5 -5
- data/ext/selma/src/html/text_chunk.rs +55 -12
- data/ext/selma/src/native_ref_wrap.rs +30 -33
- data/ext/selma/src/rewriter.rs +299 -139
- data/ext/selma/src/sanitizer.rs +256 -138
- data/lib/selma/config.rb +12 -0
- data/lib/selma/html/element.rb +11 -0
- data/lib/selma/html.rb +2 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer/config.rb +2 -2
- data/lib/selma/sanitizer.rb +0 -77
- data/lib/selma/version.rb +1 -1
- metadata +9 -7
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -5,9 +5,11 @@ use lol_html::{
|
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
6
|
};
|
7
7
|
use magnus::{
|
8
|
-
class, function, method,
|
8
|
+
class, eval, exception, function, method,
|
9
|
+
r_hash::ForEach,
|
10
|
+
scan_args,
|
9
11
|
value::{Opaque, ReprValue},
|
10
|
-
Module, Object, RArray, RHash, RModule, Ruby, Value,
|
12
|
+
Module, Object, RArray, RHash, RModule, RString, Ruby, Symbol, Value,
|
11
13
|
};
|
12
14
|
|
13
15
|
#[derive(Clone, Debug, Default)]
|
@@ -45,34 +47,234 @@ impl SelmaSanitizer {
|
|
45
47
|
let args = scan_args::scan_args::<(), (Option<RHash>,), (), (), (), ()>(arguments)?;
|
46
48
|
let (opt_config,): (Option<RHash>,) = args.optional;
|
47
49
|
|
50
|
+
let ruby = Ruby::get().unwrap();
|
51
|
+
|
48
52
|
let config = match opt_config {
|
49
53
|
Some(config) => config,
|
50
54
|
// TODO: this seems like a hack to fix?
|
51
55
|
None => magnus::eval::<RHash>(r#"Selma::Sanitizer::Config::DEFAULT"#).unwrap(),
|
52
56
|
};
|
53
57
|
|
58
|
+
let mut flags = [0; crate::tags::Tag::TAG_COUNT];
|
59
|
+
let mut sanitizer_allowed_attrs = vec![];
|
60
|
+
let sanitizer_allowed_classes = vec![];
|
61
|
+
match Self::setup_config(&mut flags, config) {
|
62
|
+
Ok(_) => {}
|
63
|
+
Err(e) => {
|
64
|
+
return Err(e);
|
65
|
+
}
|
66
|
+
};
|
67
|
+
|
54
68
|
let mut element_sanitizers = HashMap::new();
|
69
|
+
|
70
|
+
// TODO: set up default tags; do we need this?
|
55
71
|
crate::tags::Tag::html_tags().iter().for_each(|html_tag| {
|
56
|
-
let
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
);
|
72
|
+
let element_name = crate::tags::Tag::element_name_from_enum(html_tag).to_string();
|
73
|
+
|
74
|
+
let element_sanitizer = ElementSanitizer::default();
|
75
|
+
element_sanitizers.insert(element_name, element_sanitizer);
|
61
76
|
});
|
62
77
|
|
78
|
+
// def allow_attribute(element, attrs)
|
79
|
+
// attrs.flatten.each { |attr| set_allowed_attribute(element, attr, true) }
|
80
|
+
// end
|
81
|
+
if let Some(value) = config.get(ruby.to_symbol("attributes")) {
|
82
|
+
if let Some(allowed_attributes) = RHash::from_value(value) {
|
83
|
+
allowed_attributes.foreach(|element_value: Value, attributes: RArray| {
|
84
|
+
attributes.into_iter().for_each(|attr: Value| {
|
85
|
+
match RString::from_value(attr) {
|
86
|
+
None => {}
|
87
|
+
Some(attribute_name) => {
|
88
|
+
let attr_name = attribute_name.to_string().unwrap();
|
89
|
+
let element = match element_value.to_r_string() {
|
90
|
+
Err(_) => "".to_string(),
|
91
|
+
Ok(element_name) => element_name.to_string().unwrap(),
|
92
|
+
};
|
93
|
+
if element == "all" {
|
94
|
+
Self::set_allowed(
|
95
|
+
&mut sanitizer_allowed_attrs,
|
96
|
+
&attr_name,
|
97
|
+
true,
|
98
|
+
);
|
99
|
+
} else {
|
100
|
+
let element_sanitizer = Self::get_element_sanitizer(
|
101
|
+
&mut element_sanitizers,
|
102
|
+
&element,
|
103
|
+
);
|
104
|
+
element_sanitizer.allowed_attrs.push(attr_name);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
});
|
109
|
+
|
110
|
+
Ok(ForEach::Continue)
|
111
|
+
})?;
|
112
|
+
}
|
113
|
+
};
|
114
|
+
|
115
|
+
// def allow_protocol(element, attr, protos)
|
116
|
+
// if protos.is_a?(Array)
|
117
|
+
// raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
118
|
+
// else
|
119
|
+
// protos = [protos]
|
120
|
+
// end
|
121
|
+
// set_allowed_protocols(element, attr, protos)
|
122
|
+
// end
|
123
|
+
if let Some(value) = config.get(ruby.to_symbol("protocols")) {
|
124
|
+
if let Some(allowed_protocols) = RHash::from_value(value) {
|
125
|
+
allowed_protocols.foreach(|element_name: String, protocols: RHash| {
|
126
|
+
protocols.foreach(|attribute_name: String, protocol_list: Value| {
|
127
|
+
let protocols: RArray;
|
128
|
+
if protocol_list.is_kind_of(class::array()) {
|
129
|
+
protocols = RArray::from_value(protocol_list).unwrap();
|
130
|
+
if protocols.includes(ruby.to_symbol("all")) {
|
131
|
+
return Err(magnus::Error::new(
|
132
|
+
exception::arg_error(),
|
133
|
+
"`:all` must be passed outside of an array".to_string(),
|
134
|
+
));
|
135
|
+
}
|
136
|
+
} else if protocol_list.is_kind_of(class::symbol())
|
137
|
+
&& Symbol::from_value(protocol_list) == eval(":all").unwrap()
|
138
|
+
{
|
139
|
+
protocols = RArray::new();
|
140
|
+
protocols.push(ruby.to_symbol("all"))?;
|
141
|
+
} else {
|
142
|
+
return Err(magnus::Error::new(
|
143
|
+
exception::arg_error(),
|
144
|
+
"Protocol list must be an array, or just `:all`".to_string(),
|
145
|
+
));
|
146
|
+
}
|
147
|
+
|
148
|
+
let element_sanitizer =
|
149
|
+
Self::get_element_sanitizer(&mut element_sanitizers, &element_name);
|
150
|
+
|
151
|
+
Self::set_allowed_protocols(element_sanitizer, attribute_name, protocols);
|
152
|
+
Ok(ForEach::Continue)
|
153
|
+
})?;
|
154
|
+
|
155
|
+
Ok(ForEach::Continue)
|
156
|
+
})?;
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
let escape_tagfilter = match config.get(ruby.to_symbol("escape_tagfilter")) {
|
161
|
+
Some(value) => value.to_bool(),
|
162
|
+
None => true,
|
163
|
+
};
|
164
|
+
|
165
|
+
let allow_comments = match config.get(ruby.to_symbol("allow_comments")) {
|
166
|
+
Some(value) => value.to_bool(),
|
167
|
+
None => false,
|
168
|
+
};
|
169
|
+
|
170
|
+
let allow_doctype = match config.get(ruby.to_symbol("allow_doctype")) {
|
171
|
+
Some(value) => value.to_bool(),
|
172
|
+
None => true,
|
173
|
+
};
|
174
|
+
|
63
175
|
Ok(Self(std::cell::RefCell::new(Sanitizer {
|
64
|
-
flags
|
65
|
-
allowed_attrs:
|
66
|
-
allowed_classes:
|
176
|
+
flags,
|
177
|
+
allowed_attrs: sanitizer_allowed_attrs,
|
178
|
+
allowed_classes: sanitizer_allowed_classes,
|
67
179
|
element_sanitizers,
|
68
180
|
|
69
|
-
escape_tagfilter
|
70
|
-
allow_comments
|
71
|
-
allow_doctype
|
181
|
+
escape_tagfilter,
|
182
|
+
allow_comments,
|
183
|
+
allow_doctype,
|
72
184
|
config: config.into(),
|
73
185
|
})))
|
74
186
|
}
|
75
187
|
|
188
|
+
fn setup_config(
|
189
|
+
flags: &mut [u8; crate::tags::Tag::TAG_COUNT],
|
190
|
+
config: RHash,
|
191
|
+
) -> Result<(), magnus::Error> {
|
192
|
+
let ruby = Ruby::get().unwrap();
|
193
|
+
|
194
|
+
// def allow_element(elements)
|
195
|
+
// elements.flatten.each { |e| set_flag(e, ALLOW, true) }
|
196
|
+
// end
|
197
|
+
if let Some(value) = config.get(ruby.to_symbol("elements")) {
|
198
|
+
if let Some(elements) = RArray::from_value(value) {
|
199
|
+
elements
|
200
|
+
.into_iter()
|
201
|
+
.for_each(|element| match RString::from_value(element) {
|
202
|
+
None => {}
|
203
|
+
Some(element_name) => {
|
204
|
+
Self::set_flag(
|
205
|
+
element_name.to_string().unwrap(),
|
206
|
+
flags,
|
207
|
+
Self::SELMA_SANITIZER_ALLOW,
|
208
|
+
true,
|
209
|
+
);
|
210
|
+
}
|
211
|
+
});
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
// def remove_contents(elements)
|
216
|
+
// if elements.is_a?(TrueClass) || elements.is_a?(FalseClass)
|
217
|
+
// set_all_flags(REMOVE_CONTENTS, elements)
|
218
|
+
// else
|
219
|
+
// elements.flatten.each { |e| set_flag(e, REMOVE_CONTENTS, true) }
|
220
|
+
// end
|
221
|
+
// end
|
222
|
+
if let Some(remove_contents) = config.get(ruby.to_symbol("remove_contents")) {
|
223
|
+
if remove_contents.is_kind_of(class::true_class())
|
224
|
+
|| remove_contents.is_kind_of(class::false_class())
|
225
|
+
{
|
226
|
+
Self::set_all_flags(
|
227
|
+
flags,
|
228
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
229
|
+
remove_contents.to_bool(),
|
230
|
+
);
|
231
|
+
} else if remove_contents.is_kind_of(class::array()) {
|
232
|
+
let elements = RArray::from_value(remove_contents).unwrap();
|
233
|
+
elements
|
234
|
+
.into_iter()
|
235
|
+
.for_each(|element| match RString::from_value(element) {
|
236
|
+
None => {}
|
237
|
+
Some(element_name) => {
|
238
|
+
Self::set_flag(
|
239
|
+
element_name.to_string().unwrap(),
|
240
|
+
flags,
|
241
|
+
Self::SELMA_SANITIZER_REMOVE_CONTENTS,
|
242
|
+
true,
|
243
|
+
);
|
244
|
+
}
|
245
|
+
});
|
246
|
+
} else {
|
247
|
+
return Err(magnus::Error::new(
|
248
|
+
exception::arg_error(),
|
249
|
+
"remove_contents must be `true`, `false`, or an array".to_string(),
|
250
|
+
));
|
251
|
+
}
|
252
|
+
}
|
253
|
+
|
254
|
+
// def wrap_with_whitespace(elements)
|
255
|
+
// elements.flatten.each { |e| set_flag(e, WRAP_WHITESPACE, true) }
|
256
|
+
// end
|
257
|
+
if let Some(value) = config.get(ruby.to_symbol("whitespace_elements")) {
|
258
|
+
if let Some(elements) = RArray::from_value(value) {
|
259
|
+
elements
|
260
|
+
.into_iter()
|
261
|
+
.for_each(|element| match RString::from_value(element) {
|
262
|
+
None => {}
|
263
|
+
Some(element_name) => {
|
264
|
+
Self::set_flag(
|
265
|
+
element_name.to_string().unwrap(),
|
266
|
+
flags,
|
267
|
+
Self::SELMA_SANITIZER_WRAP_WHITESPACE,
|
268
|
+
true,
|
269
|
+
);
|
270
|
+
}
|
271
|
+
});
|
272
|
+
}
|
273
|
+
};
|
274
|
+
|
275
|
+
Ok(())
|
276
|
+
}
|
277
|
+
|
76
278
|
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
77
279
|
let binding = self.0.borrow();
|
78
280
|
let ruby = Ruby::get().unwrap();
|
@@ -81,40 +283,39 @@ impl SelmaSanitizer {
|
|
81
283
|
}
|
82
284
|
|
83
285
|
/// Toggle a sanitizer option on or off.
|
84
|
-
fn set_flag(
|
286
|
+
fn set_flag(
|
287
|
+
tag_name: String,
|
288
|
+
flags: &mut [u8; crate::tags::Tag::TAG_COUNT],
|
289
|
+
flag: u8,
|
290
|
+
set: bool,
|
291
|
+
) {
|
85
292
|
let tag = crate::tags::Tag::tag_from_tag_name(tag_name.as_str());
|
86
293
|
if set {
|
87
|
-
|
294
|
+
flags[tag.index] |= flag;
|
88
295
|
} else {
|
89
|
-
|
296
|
+
flags[tag.index] &= !flag;
|
90
297
|
}
|
91
298
|
}
|
92
299
|
|
93
300
|
/// Toggles all sanitization options on or off.
|
94
|
-
fn set_all_flags(&
|
301
|
+
fn set_all_flags(flags: &mut [u8; crate::tags::Tag::TAG_COUNT], flag: u8, set: bool) {
|
95
302
|
if set {
|
96
303
|
crate::tags::Tag::html_tags()
|
97
304
|
.iter()
|
98
305
|
.enumerate()
|
99
306
|
.for_each(|(iter, _)| {
|
100
|
-
|
307
|
+
flags[iter] |= flag;
|
101
308
|
});
|
102
309
|
} else {
|
103
310
|
crate::tags::Tag::html_tags()
|
104
311
|
.iter()
|
105
312
|
.enumerate()
|
106
313
|
.for_each(|(iter, _)| {
|
107
|
-
|
314
|
+
flags[iter] &= flag;
|
108
315
|
});
|
109
316
|
}
|
110
317
|
}
|
111
318
|
|
112
|
-
/// Whether or not to keep dangerous HTML tags.
|
113
|
-
fn set_escape_tagfilter(&self, allow: bool) -> bool {
|
114
|
-
self.0.borrow_mut().escape_tagfilter = allow;
|
115
|
-
allow
|
116
|
-
}
|
117
|
-
|
118
319
|
pub fn escape_tagfilter(&self, e: &mut Element) -> bool {
|
119
320
|
if self.0.borrow().escape_tagfilter {
|
120
321
|
let tag = crate::tags::Tag::tag_from_element(e);
|
@@ -131,12 +332,6 @@ impl SelmaSanitizer {
|
|
131
332
|
self.0.borrow().escape_tagfilter
|
132
333
|
}
|
133
334
|
|
134
|
-
/// Whether or not to keep HTML comments.
|
135
|
-
fn set_allow_comments(&self, allow: bool) -> bool {
|
136
|
-
self.0.borrow_mut().allow_comments = allow;
|
137
|
-
allow
|
138
|
-
}
|
139
|
-
|
140
335
|
pub fn get_allow_comments(&self) -> bool {
|
141
336
|
self.0.borrow().allow_comments
|
142
337
|
}
|
@@ -145,12 +340,6 @@ impl SelmaSanitizer {
|
|
145
340
|
c.remove();
|
146
341
|
}
|
147
342
|
|
148
|
-
/// Whether or not to keep HTML doctype.
|
149
|
-
fn set_allow_doctype(&self, allow: bool) -> bool {
|
150
|
-
self.0.borrow_mut().allow_doctype = allow;
|
151
|
-
allow
|
152
|
-
}
|
153
|
-
|
154
343
|
/// Whether or not to keep HTML doctype.
|
155
344
|
pub fn get_allow_doctype(&self) -> bool {
|
156
345
|
self.0.borrow().allow_doctype
|
@@ -160,48 +349,14 @@ impl SelmaSanitizer {
|
|
160
349
|
d.remove();
|
161
350
|
}
|
162
351
|
|
163
|
-
fn
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
let allowed_attrs = &mut binding.allowed_attrs;
|
169
|
-
Self::set_allowed(allowed_attrs, &attr_name, allow);
|
170
|
-
} else {
|
171
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
172
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
173
|
-
|
174
|
-
element_sanitizer.allowed_attrs.push(attr_name);
|
175
|
-
}
|
176
|
-
|
177
|
-
allow
|
178
|
-
}
|
179
|
-
|
180
|
-
fn set_allowed_class(&self, element_name: String, class_name: String, allow: bool) -> bool {
|
181
|
-
let mut binding = self.0.borrow_mut();
|
182
|
-
if element_name == "all" {
|
183
|
-
let allowed_classes = &mut binding.allowed_classes;
|
184
|
-
Self::set_allowed(allowed_classes, &class_name, allow);
|
185
|
-
} else {
|
186
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
187
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
188
|
-
|
189
|
-
let allowed_classes = element_sanitizer.allowed_classes.borrow_mut();
|
190
|
-
Self::set_allowed(allowed_classes, &class_name, allow)
|
191
|
-
}
|
192
|
-
allow
|
193
|
-
}
|
194
|
-
|
195
|
-
fn set_allowed_protocols(&self, element_name: String, attr_name: String, allow_list: RArray) {
|
196
|
-
let mut binding = self.0.borrow_mut();
|
197
|
-
|
198
|
-
let element_sanitizers = &mut binding.element_sanitizers;
|
199
|
-
let element_sanitizer = Self::get_element_sanitizer(element_sanitizers, &element_name);
|
200
|
-
|
352
|
+
fn set_allowed_protocols(
|
353
|
+
element_sanitizer: &mut ElementSanitizer,
|
354
|
+
attr_name: String,
|
355
|
+
allow_list: RArray,
|
356
|
+
) {
|
201
357
|
let protocol_sanitizers = &mut element_sanitizer.protocol_sanitizers.borrow_mut();
|
202
358
|
|
203
|
-
for
|
204
|
-
let allowed_protocol = opt_allowed_protocol.unwrap();
|
359
|
+
for allowed_protocol in allow_list.into_iter() {
|
205
360
|
let protocol_list = protocol_sanitizers.get_mut(&attr_name);
|
206
361
|
if allowed_protocol.is_kind_of(class::string()) {
|
207
362
|
match protocol_list {
|
@@ -211,20 +366,23 @@ impl SelmaSanitizer {
|
|
211
366
|
}
|
212
367
|
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
|
213
368
|
}
|
214
|
-
} else if allowed_protocol.is_kind_of(class::symbol())
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
369
|
+
} else if allowed_protocol.is_kind_of(class::symbol()) {
|
370
|
+
let protocol_config = allowed_protocol.inspect();
|
371
|
+
if protocol_config == ":relative" {
|
372
|
+
match protocol_list {
|
373
|
+
None => {
|
374
|
+
protocol_sanitizers.insert(
|
375
|
+
attr_name.to_string(),
|
376
|
+
vec!["#".to_string(), "/".to_string()],
|
377
|
+
);
|
378
|
+
}
|
379
|
+
Some(protocol_list) => {
|
380
|
+
protocol_list.push("#".to_string());
|
381
|
+
protocol_list.push("/".to_string());
|
382
|
+
}
|
227
383
|
}
|
384
|
+
} else if protocol_config == ":all" {
|
385
|
+
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
|
228
386
|
}
|
229
387
|
}
|
230
388
|
}
|
@@ -335,7 +493,7 @@ impl SelmaSanitizer {
|
|
335
493
|
element: &mut Element,
|
336
494
|
element_sanitizer: &ElementSanitizer,
|
337
495
|
attr_name: &String,
|
338
|
-
attr_val: &
|
496
|
+
attr_val: &str,
|
339
497
|
) -> Result<bool, AttributeNameError> {
|
340
498
|
let mut allowed: bool = false;
|
341
499
|
let element_allowed_attrs = element_sanitizer.allowed_attrs.contains(attr_name);
|
@@ -387,7 +545,11 @@ impl SelmaSanitizer {
|
|
387
545
|
attr_val.contains("://")
|
388
546
|
}
|
389
547
|
|
390
|
-
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &
|
548
|
+
fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &str) -> bool {
|
549
|
+
if protocols_allowed.contains(&"all".to_string()) {
|
550
|
+
return true;
|
551
|
+
}
|
552
|
+
|
391
553
|
// FIXME: is there a more idiomatic way to do this?
|
392
554
|
let mut pos: usize = 0;
|
393
555
|
let mut chars = attr_val.chars();
|
@@ -509,6 +671,7 @@ impl SelmaSanitizer {
|
|
509
671
|
element.after(" ", ContentType::Text);
|
510
672
|
}
|
511
673
|
}
|
674
|
+
|
512
675
|
element.remove_and_keep_content();
|
513
676
|
}
|
514
677
|
}
|
@@ -542,7 +705,7 @@ impl SelmaSanitizer {
|
|
542
705
|
) -> &'a mut ElementSanitizer {
|
543
706
|
element_sanitizers
|
544
707
|
.entry(element_name.to_string())
|
545
|
-
.
|
708
|
+
.or_default()
|
546
709
|
}
|
547
710
|
}
|
548
711
|
|
@@ -554,50 +717,5 @@ pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
|
554
717
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
555
718
|
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
556
719
|
|
557
|
-
c_sanitizer.define_method("set_flag", method!(SelmaSanitizer::set_flag, 3))?;
|
558
|
-
c_sanitizer.define_method("set_all_flags", method!(SelmaSanitizer::set_all_flags, 2))?;
|
559
|
-
|
560
|
-
c_sanitizer.define_method(
|
561
|
-
"set_escape_tagfilter",
|
562
|
-
method!(SelmaSanitizer::set_escape_tagfilter, 1),
|
563
|
-
)?;
|
564
|
-
c_sanitizer.define_method(
|
565
|
-
"escape_tagfilter",
|
566
|
-
method!(SelmaSanitizer::get_escape_tagfilter, 0),
|
567
|
-
)?;
|
568
|
-
|
569
|
-
c_sanitizer.define_method(
|
570
|
-
"set_allow_comments",
|
571
|
-
method!(SelmaSanitizer::set_allow_comments, 1),
|
572
|
-
)?;
|
573
|
-
c_sanitizer.define_method(
|
574
|
-
"allow_comments",
|
575
|
-
method!(SelmaSanitizer::get_allow_comments, 0),
|
576
|
-
)?;
|
577
|
-
|
578
|
-
c_sanitizer.define_method(
|
579
|
-
"set_allow_doctype",
|
580
|
-
method!(SelmaSanitizer::set_allow_doctype, 1),
|
581
|
-
)?;
|
582
|
-
c_sanitizer.define_method(
|
583
|
-
"allow_doctype",
|
584
|
-
method!(SelmaSanitizer::get_allow_doctype, 0),
|
585
|
-
)?;
|
586
|
-
|
587
|
-
c_sanitizer.define_method(
|
588
|
-
"set_allowed_attribute",
|
589
|
-
method!(SelmaSanitizer::set_allowed_attribute, 3),
|
590
|
-
)?;
|
591
|
-
|
592
|
-
c_sanitizer.define_method(
|
593
|
-
"set_allowed_class",
|
594
|
-
method!(SelmaSanitizer::set_allowed_class, 3),
|
595
|
-
)?;
|
596
|
-
|
597
|
-
c_sanitizer.define_method(
|
598
|
-
"set_allowed_protocols",
|
599
|
-
method!(SelmaSanitizer::set_allowed_protocols, 3),
|
600
|
-
)?;
|
601
|
-
|
602
720
|
Ok(())
|
603
721
|
}
|
data/lib/selma/config.rb
ADDED
data/lib/selma/html.rb
CHANGED
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
raise ArgumentError, "other_config must be a Hash" unless other_config.is_a?(Hash)
|
29
29
|
|
30
30
|
merged = {}
|
31
|
-
keys = Set.new(config.keys + other_config.keys)
|
31
|
+
keys = Set.new(config.keys + other_config.keys).to_a
|
32
32
|
|
33
33
|
keys.each do |key|
|
34
34
|
oldval = config[key]
|
@@ -39,7 +39,7 @@ module Selma
|
|
39
39
|
merged[key] = if oldval.is_a?(Hash) && newval.is_a?(Hash)
|
40
40
|
oldval.empty? ? newval.dup : merge(oldval, newval)
|
41
41
|
elsif newval.is_a?(Array) && key != :transformers
|
42
|
-
Set.new(newval)
|
42
|
+
Set.new(newval).to_a
|
43
43
|
else
|
44
44
|
can_dupe?(newval) ? newval.dup : newval
|
45
45
|
end
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -4,82 +4,5 @@ require "selma/sanitizer/config"
|
|
4
4
|
|
5
5
|
module Selma
|
6
6
|
class Sanitizer
|
7
|
-
ALLOW = 1 << 0
|
8
|
-
ESCAPE_TAGFILTER = (1 << 1)
|
9
|
-
REMOVE_CONTENTS = (1 << 2)
|
10
|
-
WRAP_WHITESPACE = (1 << 3)
|
11
|
-
|
12
|
-
# initialize is in Rust, this just helps manage config setup in Ruby
|
13
|
-
# TODO: could this just become initialize?
|
14
|
-
def setup
|
15
|
-
allow_element(config[:elements] || [])
|
16
|
-
|
17
|
-
(config[:attributes] || {}).each do |element, attrs|
|
18
|
-
allow_attribute(element, attrs)
|
19
|
-
end
|
20
|
-
|
21
|
-
(config[:protocols] || {}).each do |element, protocols|
|
22
|
-
protocols.each do |attribute, pr|
|
23
|
-
allow_protocol(element, attribute, pr)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
remove_contents(config[:remove_contents]) if config.include?(:remove_contents)
|
28
|
-
|
29
|
-
wrap_with_whitespace(config[:whitespace_elements]) if config.include?(:whitespace_elements)
|
30
|
-
|
31
|
-
set_escape_tagfilter(config.fetch(:escape_tagfilter, true))
|
32
|
-
set_allow_comments(config.fetch(:allow_comments, false))
|
33
|
-
set_allow_doctype(config.fetch(:allow_doctype, true))
|
34
|
-
end
|
35
|
-
|
36
|
-
def elements
|
37
|
-
config[:elements]
|
38
|
-
end
|
39
|
-
|
40
|
-
def allow_element(elements)
|
41
|
-
elements.flatten.each { |e| set_flag(e, ALLOW, true) }
|
42
|
-
end
|
43
|
-
|
44
|
-
def disallow_element(elements)
|
45
|
-
elements.flatten.each { |e| set_flag(e, ALLOW, false) }
|
46
|
-
end
|
47
|
-
|
48
|
-
def allow_attribute(element, attrs)
|
49
|
-
attrs.flatten.each { |attr| set_allowed_attribute(element, attr, true) }
|
50
|
-
end
|
51
|
-
|
52
|
-
def require_any_attributes(element, attrs)
|
53
|
-
if attr.empty?
|
54
|
-
set_required_attribute(element, "*", true)
|
55
|
-
else
|
56
|
-
attrs.flatten.each { |attr| set_required_attribute(element, attr, true) }
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def disallow_attribute(element, attrs)
|
61
|
-
attrs.flatten.each { |attr| set_allowed_attribute(element, attr, false) }
|
62
|
-
end
|
63
|
-
|
64
|
-
def allow_class(element, *klass)
|
65
|
-
klass.flatten.each { |k| set_allowed_class(element, k, true) }
|
66
|
-
end
|
67
|
-
|
68
|
-
def allow_protocol(element, attr, protos)
|
69
|
-
protos = [protos] unless protos.is_a?(Array)
|
70
|
-
set_allowed_protocols(element, attr, protos)
|
71
|
-
end
|
72
|
-
|
73
|
-
def remove_contents(elements)
|
74
|
-
if elements.is_a?(TrueClass) || elements.is_a?(FalseClass)
|
75
|
-
set_all_flags(REMOVE_CONTENTS, elements)
|
76
|
-
else
|
77
|
-
elements.flatten.each { |e| set_flag(e, REMOVE_CONTENTS, true) }
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def wrap_with_whitespace(elements)
|
82
|
-
elements.flatten.each { |e| set_flag(e, WRAP_WHITESPACE, true) }
|
83
|
-
end
|
84
7
|
end
|
85
8
|
end
|
data/lib/selma/version.rb
CHANGED