rails-html-sanitizer 1.4.3 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +114 -0
- data/MIT-LICENSE +1 -1
- data/README.md +163 -34
- data/lib/rails/html/sanitizer/version.rb +4 -2
- data/lib/rails/html/sanitizer.rb +371 -121
- data/lib/rails/html/scrubbers.rb +78 -78
- data/lib/rails-html-sanitizer.rb +7 -23
- data/test/rails_api_test.rb +88 -0
- data/test/sanitizer_test.rb +925 -505
- data/test/scrubbers_test.rb +57 -30
- metadata +19 -57
data/lib/rails/html/sanitizer.rb
CHANGED
@@ -1,172 +1,422 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rails
|
2
|
-
module
|
3
|
-
|
4
|
+
module HTML
|
5
|
+
class Sanitizer
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
return @html5_support if defined?(@html5_support)
|
9
|
+
|
10
|
+
@html5_support = Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
|
11
|
+
end
|
12
|
+
|
13
|
+
def best_supported_vendor
|
14
|
+
html5_support? ? Rails::HTML5::Sanitizer : Rails::HTML4::Sanitizer
|
15
|
+
end
|
16
|
+
end
|
4
17
|
|
5
|
-
class Sanitizer # :nodoc:
|
6
18
|
def sanitize(html, options = {})
|
7
19
|
raise NotImplementedError, "subclasses must implement sanitize method."
|
8
20
|
end
|
9
21
|
|
10
22
|
private
|
23
|
+
def remove_xpaths(node, xpaths)
|
24
|
+
node.xpath(*xpaths).remove
|
25
|
+
node
|
26
|
+
end
|
27
|
+
|
28
|
+
def properly_encode(fragment, options)
|
29
|
+
fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
|
30
|
+
end
|
31
|
+
end
|
11
32
|
|
12
|
-
|
13
|
-
|
14
|
-
|
33
|
+
module Concern
|
34
|
+
module ComposedSanitize
|
35
|
+
def sanitize(html, options = {})
|
36
|
+
return unless html
|
37
|
+
return html if html.empty?
|
38
|
+
|
39
|
+
serialize(scrub(parse_fragment(html), options))
|
40
|
+
end
|
15
41
|
end
|
16
42
|
|
17
|
-
|
18
|
-
|
43
|
+
module Parser
|
44
|
+
module HTML4
|
45
|
+
def parse_fragment(html)
|
46
|
+
Loofah.html4_fragment(html)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
module HTML5
|
51
|
+
def parse_fragment(html)
|
52
|
+
Loofah.html5_fragment(html)
|
53
|
+
end
|
54
|
+
end if Rails::HTML::Sanitizer.html5_support?
|
55
|
+
end
|
56
|
+
|
57
|
+
module Scrubber
|
58
|
+
module Full
|
59
|
+
def scrub(fragment, options = {})
|
60
|
+
fragment.scrub!(TextOnlyScrubber.new)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
module Link
|
65
|
+
def initialize
|
66
|
+
super
|
67
|
+
@link_scrubber = TargetScrubber.new
|
68
|
+
@link_scrubber.tags = %w(a)
|
69
|
+
@link_scrubber.attributes = %w(href)
|
70
|
+
end
|
71
|
+
|
72
|
+
def scrub(fragment, options = {})
|
73
|
+
fragment.scrub!(@link_scrubber)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
module SafeList
|
78
|
+
# The default safe list for tags
|
79
|
+
DEFAULT_ALLOWED_TAGS = Set.new([
|
80
|
+
"a",
|
81
|
+
"abbr",
|
82
|
+
"acronym",
|
83
|
+
"address",
|
84
|
+
"b",
|
85
|
+
"big",
|
86
|
+
"blockquote",
|
87
|
+
"br",
|
88
|
+
"cite",
|
89
|
+
"code",
|
90
|
+
"dd",
|
91
|
+
"del",
|
92
|
+
"dfn",
|
93
|
+
"div",
|
94
|
+
"dl",
|
95
|
+
"dt",
|
96
|
+
"em",
|
97
|
+
"h1",
|
98
|
+
"h2",
|
99
|
+
"h3",
|
100
|
+
"h4",
|
101
|
+
"h5",
|
102
|
+
"h6",
|
103
|
+
"hr",
|
104
|
+
"i",
|
105
|
+
"img",
|
106
|
+
"ins",
|
107
|
+
"kbd",
|
108
|
+
"li",
|
109
|
+
"ol",
|
110
|
+
"p",
|
111
|
+
"pre",
|
112
|
+
"samp",
|
113
|
+
"small",
|
114
|
+
"span",
|
115
|
+
"strong",
|
116
|
+
"sub",
|
117
|
+
"sup",
|
118
|
+
"time",
|
119
|
+
"tt",
|
120
|
+
"ul",
|
121
|
+
"var",
|
122
|
+
]).freeze
|
123
|
+
|
124
|
+
# The default safe list for attributes
|
125
|
+
DEFAULT_ALLOWED_ATTRIBUTES = Set.new([
|
126
|
+
"abbr",
|
127
|
+
"alt",
|
128
|
+
"cite",
|
129
|
+
"class",
|
130
|
+
"datetime",
|
131
|
+
"height",
|
132
|
+
"href",
|
133
|
+
"lang",
|
134
|
+
"name",
|
135
|
+
"src",
|
136
|
+
"title",
|
137
|
+
"width",
|
138
|
+
"xml:lang",
|
139
|
+
]).freeze
|
140
|
+
|
141
|
+
def self.included(klass)
|
142
|
+
class << klass
|
143
|
+
attr_accessor :allowed_tags
|
144
|
+
attr_accessor :allowed_attributes
|
145
|
+
end
|
146
|
+
|
147
|
+
klass.allowed_tags = DEFAULT_ALLOWED_TAGS.dup
|
148
|
+
klass.allowed_attributes = DEFAULT_ALLOWED_ATTRIBUTES.dup
|
149
|
+
end
|
150
|
+
|
151
|
+
def initialize(prune: false)
|
152
|
+
@permit_scrubber = PermitScrubber.new(prune: prune)
|
153
|
+
end
|
154
|
+
|
155
|
+
def scrub(fragment, options = {})
|
156
|
+
if scrubber = options[:scrubber]
|
157
|
+
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
|
158
|
+
fragment.scrub!(scrubber)
|
159
|
+
elsif allowed_tags(options) || allowed_attributes(options)
|
160
|
+
@permit_scrubber.tags = allowed_tags(options)
|
161
|
+
@permit_scrubber.attributes = allowed_attributes(options)
|
162
|
+
fragment.scrub!(@permit_scrubber)
|
163
|
+
else
|
164
|
+
fragment.scrub!(:strip)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def sanitize_css(style_string)
|
169
|
+
Loofah::HTML5::Scrub.scrub_css(style_string)
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
def allowed_tags(options)
|
174
|
+
options[:tags] || self.class.allowed_tags
|
175
|
+
end
|
176
|
+
|
177
|
+
def allowed_attributes(options)
|
178
|
+
options[:attributes] || self.class.allowed_attributes
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
module Serializer
|
184
|
+
module UTF8Encode
|
185
|
+
def serialize(fragment)
|
186
|
+
properly_encode(fragment, encoding: "UTF-8")
|
187
|
+
end
|
188
|
+
end
|
19
189
|
end
|
20
190
|
end
|
191
|
+
end
|
21
192
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
class FullSanitizer < Sanitizer
|
29
|
-
def sanitize(html, options = {})
|
30
|
-
return unless html
|
31
|
-
return html if html.empty?
|
193
|
+
module HTML4
|
194
|
+
module Sanitizer
|
195
|
+
module VendorMethods
|
196
|
+
def full_sanitizer
|
197
|
+
Rails::HTML4::FullSanitizer
|
198
|
+
end
|
32
199
|
|
33
|
-
|
200
|
+
def link_sanitizer
|
201
|
+
Rails::HTML4::LinkSanitizer
|
202
|
+
end
|
34
203
|
|
35
|
-
|
36
|
-
|
204
|
+
def safe_list_sanitizer
|
205
|
+
Rails::HTML4::SafeListSanitizer
|
206
|
+
end
|
37
207
|
|
38
|
-
|
208
|
+
def white_list_sanitizer # :nodoc:
|
209
|
+
safe_list_sanitizer
|
210
|
+
end
|
39
211
|
end
|
212
|
+
|
213
|
+
extend VendorMethods
|
40
214
|
end
|
41
215
|
|
42
|
-
#
|
43
|
-
# Removes +a+ tags and +href+ attributes leaving only the link text.
|
216
|
+
# == Rails::HTML4::FullSanitizer
|
44
217
|
#
|
45
|
-
#
|
46
|
-
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
218
|
+
# Removes all tags from HTML4 but strips out scripts, forms and comments.
|
47
219
|
#
|
48
|
-
#
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
220
|
+
# full_sanitizer = Rails::HTML4::FullSanitizer.new
|
221
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
222
|
+
# # => "Bold no more! See more here..."
|
223
|
+
#
|
224
|
+
class FullSanitizer < Rails::HTML::Sanitizer
|
225
|
+
include HTML::Concern::ComposedSanitize
|
226
|
+
include HTML::Concern::Parser::HTML4
|
227
|
+
include HTML::Concern::Scrubber::Full
|
228
|
+
include HTML::Concern::Serializer::UTF8Encode
|
229
|
+
end
|
55
230
|
|
56
|
-
|
57
|
-
|
58
|
-
|
231
|
+
# == Rails::HTML4::LinkSanitizer
|
232
|
+
#
|
233
|
+
# Removes +a+ tags and +href+ attributes from HTML4 leaving only the link text.
|
234
|
+
#
|
235
|
+
# link_sanitizer = Rails::HTML4::LinkSanitizer.new
|
236
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
237
|
+
# # => "Only the link text will be kept."
|
238
|
+
#
|
239
|
+
class LinkSanitizer < Rails::HTML::Sanitizer
|
240
|
+
include HTML::Concern::ComposedSanitize
|
241
|
+
include HTML::Concern::Parser::HTML4
|
242
|
+
include HTML::Concern::Scrubber::Link
|
243
|
+
include HTML::Concern::Serializer::UTF8Encode
|
59
244
|
end
|
60
245
|
|
61
|
-
#
|
62
|
-
#
|
246
|
+
# == Rails::HTML4::SafeListSanitizer
|
247
|
+
#
|
248
|
+
# Sanitizes HTML4 and CSS from an extensive safe list.
|
63
249
|
#
|
64
250
|
# === Whitespace
|
65
|
-
# We can't make any guarantees about whitespace being kept or stripped.
|
66
|
-
# Loofah uses Nokogiri, which wraps either a C or Java parser for the
|
67
|
-
# respective Ruby implementation.
|
68
|
-
# Those two parsers determine how whitespace is ultimately handled.
|
69
251
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
252
|
+
# We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
|
253
|
+
# which wraps either a C or Java parser for the respective Ruby implementation. Those two
|
254
|
+
# parsers determine how whitespace is ultimately handled.
|
255
|
+
#
|
256
|
+
# When the stripped markup will be rendered the users browser won't take whitespace into account
|
257
|
+
# anyway. It might be better to suggest your users wrap their whitespace sensitive content in
|
258
|
+
# pre tags or that you do so automatically.
|
74
259
|
#
|
75
260
|
# === Options
|
76
|
-
# Sanitizes both html and css via the safe lists found here:
|
77
|
-
# https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/safelist.rb
|
78
261
|
#
|
79
|
-
#
|
80
|
-
#
|
262
|
+
# Sanitizes both html and css via the safe lists found in
|
263
|
+
# Rails::HTML::Concern::Scrubber::SafeList
|
264
|
+
#
|
265
|
+
# SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
|
81
266
|
# There's a class level option:
|
82
|
-
# Rails::Html::SafeListSanitizer.allowed_tags = %w(table tr td)
|
83
|
-
# Rails::Html::SafeListSanitizer.allowed_attributes = %w(id class style)
|
84
267
|
#
|
85
|
-
#
|
86
|
-
#
|
268
|
+
# Rails::HTML4::SafeListSanitizer.allowed_tags = %w(table tr td)
|
269
|
+
# Rails::HTML4::SafeListSanitizer.allowed_attributes = %w(id class style)
|
270
|
+
#
|
271
|
+
# Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
|
272
|
+
# class level options.
|
87
273
|
#
|
88
274
|
# === Examples
|
89
|
-
# safe_list_sanitizer = Rails::Html::SafeListSanitizer.new
|
90
275
|
#
|
91
|
-
#
|
92
|
-
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
276
|
+
# safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new
|
93
277
|
#
|
94
|
-
#
|
95
|
-
#
|
278
|
+
# # default: sanitize via a extensive safe list of allowed elements
|
279
|
+
# safe_list_sanitizer.sanitize(@article.body)
|
96
280
|
#
|
97
|
-
#
|
98
|
-
#
|
99
|
-
#
|
281
|
+
# # sanitize via the supplied tags and attributes
|
282
|
+
# safe_list_sanitizer.sanitize(
|
283
|
+
# @article.body,
|
284
|
+
# tags: %w(table tr td),
|
285
|
+
# attributes: %w(id class style),
|
286
|
+
# )
|
100
287
|
#
|
101
|
-
#
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
def sanitize(html, options = {})
|
118
|
-
return unless html
|
119
|
-
return html if html.empty?
|
120
|
-
|
121
|
-
loofah_fragment = Loofah.fragment(html)
|
288
|
+
# # sanitize via a custom Loofah scrubber
|
289
|
+
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
290
|
+
#
|
291
|
+
# # prune nodes from the tree instead of stripping tags and leaving inner content
|
292
|
+
# safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new(prune: true)
|
293
|
+
#
|
294
|
+
# # the sanitizer can also sanitize CSS
|
295
|
+
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
296
|
+
#
|
297
|
+
class SafeListSanitizer < Rails::HTML::Sanitizer
|
298
|
+
include HTML::Concern::ComposedSanitize
|
299
|
+
include HTML::Concern::Parser::HTML4
|
300
|
+
include HTML::Concern::Scrubber::SafeList
|
301
|
+
include HTML::Concern::Serializer::UTF8Encode
|
302
|
+
end
|
303
|
+
end
|
122
304
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
@permit_scrubber.attributes = allowed_attributes(options)
|
129
|
-
loofah_fragment.scrub!(@permit_scrubber)
|
130
|
-
else
|
131
|
-
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
132
|
-
loofah_fragment.scrub!(:strip)
|
305
|
+
module HTML5
|
306
|
+
class Sanitizer
|
307
|
+
class << self
|
308
|
+
def full_sanitizer
|
309
|
+
Rails::HTML5::FullSanitizer
|
133
310
|
end
|
134
311
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
def sanitize_css(style_string)
|
139
|
-
Loofah::HTML5::Scrub.scrub_css(style_string)
|
140
|
-
end
|
141
|
-
|
142
|
-
private
|
143
|
-
|
144
|
-
def loofah_using_html5?
|
145
|
-
# future-proofing, see https://github.com/flavorjones/loofah/pull/239
|
146
|
-
Loofah.respond_to?(:html5_mode?) && Loofah.html5_mode?
|
147
|
-
end
|
312
|
+
def link_sanitizer
|
313
|
+
Rails::HTML5::LinkSanitizer
|
314
|
+
end
|
148
315
|
|
149
|
-
|
150
|
-
|
151
|
-
warn("WARNING: #{self.class}: removing 'style' from safelist, should not be combined with 'select'")
|
152
|
-
tags.delete("style")
|
316
|
+
def safe_list_sanitizer
|
317
|
+
Rails::HTML5::SafeListSanitizer
|
153
318
|
end
|
154
|
-
tags
|
155
|
-
end
|
156
319
|
|
157
|
-
|
158
|
-
|
159
|
-
remove_safelist_tag_combinations(options[:tags])
|
160
|
-
else
|
161
|
-
self.class.allowed_tags
|
320
|
+
def white_list_sanitizer # :nodoc:
|
321
|
+
safe_list_sanitizer
|
162
322
|
end
|
163
323
|
end
|
324
|
+
end
|
164
325
|
|
165
|
-
|
166
|
-
|
167
|
-
|
326
|
+
# == Rails::HTML5::FullSanitizer
|
327
|
+
#
|
328
|
+
# Removes all tags from HTML5 but strips out scripts, forms and comments.
|
329
|
+
#
|
330
|
+
# full_sanitizer = Rails::HTML5::FullSanitizer.new
|
331
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
332
|
+
# # => "Bold no more! See more here..."
|
333
|
+
#
|
334
|
+
class FullSanitizer < Rails::HTML::Sanitizer
|
335
|
+
include HTML::Concern::ComposedSanitize
|
336
|
+
include HTML::Concern::Parser::HTML5
|
337
|
+
include HTML::Concern::Scrubber::Full
|
338
|
+
include HTML::Concern::Serializer::UTF8Encode
|
339
|
+
end
|
340
|
+
|
341
|
+
# == Rails::HTML5::LinkSanitizer
|
342
|
+
#
|
343
|
+
# Removes +a+ tags and +href+ attributes from HTML5 leaving only the link text.
|
344
|
+
#
|
345
|
+
# link_sanitizer = Rails::HTML5::LinkSanitizer.new
|
346
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
347
|
+
# # => "Only the link text will be kept."
|
348
|
+
#
|
349
|
+
class LinkSanitizer < Rails::HTML::Sanitizer
|
350
|
+
include HTML::Concern::ComposedSanitize
|
351
|
+
include HTML::Concern::Parser::HTML5
|
352
|
+
include HTML::Concern::Scrubber::Link
|
353
|
+
include HTML::Concern::Serializer::UTF8Encode
|
354
|
+
end
|
355
|
+
|
356
|
+
# == Rails::HTML5::SafeListSanitizer
|
357
|
+
#
|
358
|
+
# Sanitizes HTML5 and CSS from an extensive safe list.
|
359
|
+
#
|
360
|
+
# === Whitespace
|
361
|
+
#
|
362
|
+
# We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
|
363
|
+
# which wraps either a C or Java parser for the respective Ruby implementation. Those two
|
364
|
+
# parsers determine how whitespace is ultimately handled.
|
365
|
+
#
|
366
|
+
# When the stripped markup will be rendered the users browser won't take whitespace into account
|
367
|
+
# anyway. It might be better to suggest your users wrap their whitespace sensitive content in
|
368
|
+
# pre tags or that you do so automatically.
|
369
|
+
#
|
370
|
+
# === Options
|
371
|
+
#
|
372
|
+
# Sanitizes both html and css via the safe lists found in
|
373
|
+
# Rails::HTML::Concern::Scrubber::SafeList
|
374
|
+
#
|
375
|
+
# SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
|
376
|
+
# There's a class level option:
|
377
|
+
#
|
378
|
+
# Rails::HTML5::SafeListSanitizer.allowed_tags = %w(table tr td)
|
379
|
+
# Rails::HTML5::SafeListSanitizer.allowed_attributes = %w(id class style)
|
380
|
+
#
|
381
|
+
# Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
|
382
|
+
# class level options.
|
383
|
+
#
|
384
|
+
# === Examples
|
385
|
+
#
|
386
|
+
# safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new
|
387
|
+
#
|
388
|
+
# # default: sanitize via a extensive safe list of allowed elements
|
389
|
+
# safe_list_sanitizer.sanitize(@article.body)
|
390
|
+
#
|
391
|
+
# # sanitize via the supplied tags and attributes
|
392
|
+
# safe_list_sanitizer.sanitize(
|
393
|
+
# @article.body,
|
394
|
+
# tags: %w(table tr td),
|
395
|
+
# attributes: %w(id class style),
|
396
|
+
# )
|
397
|
+
#
|
398
|
+
# # sanitize via a custom Loofah scrubber
|
399
|
+
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
400
|
+
#
|
401
|
+
# # prune nodes from the tree instead of stripping tags and leaving inner content
|
402
|
+
# safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new(prune: true)
|
403
|
+
#
|
404
|
+
# # the sanitizer can also sanitize CSS
|
405
|
+
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
406
|
+
#
|
407
|
+
class SafeListSanitizer < Rails::HTML::Sanitizer
|
408
|
+
include HTML::Concern::ComposedSanitize
|
409
|
+
include HTML::Concern::Parser::HTML5
|
410
|
+
include HTML::Concern::Scrubber::SafeList
|
411
|
+
include HTML::Concern::Serializer::UTF8Encode
|
168
412
|
end
|
413
|
+
end if Rails::HTML::Sanitizer.html5_support?
|
169
414
|
|
170
|
-
|
415
|
+
module HTML
|
416
|
+
Sanitizer.extend(HTML4::Sanitizer::VendorMethods) # :nodoc:
|
417
|
+
FullSanitizer = HTML4::FullSanitizer # :nodoc:
|
418
|
+
LinkSanitizer = HTML4::LinkSanitizer # :nodoc:
|
419
|
+
SafeListSanitizer = HTML4::SafeListSanitizer # :nodoc:
|
420
|
+
WhiteListSanitizer = SafeListSanitizer # :nodoc:
|
171
421
|
end
|
172
422
|
end
|