rails-html-sanitizer 1.5.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +125 -0
- data/MIT-LICENSE +1 -1
- data/README.md +124 -72
- data/lib/rails/html/sanitizer/version.rb +4 -2
- data/lib/rails/html/sanitizer.rb +372 -104
- data/lib/rails/html/scrubbers.rb +98 -73
- data/lib/rails-html-sanitizer.rb +7 -23
- data/test/rails_api_test.rb +88 -0
- data/test/sanitizer_test.rb +1095 -584
- data/test/scrubbers_test.rb +129 -38
- metadata +68 -58
data/lib/rails/html/sanitizer.rb
CHANGED
@@ -1,155 +1,423 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rails
|
2
|
-
module
|
3
|
-
|
4
|
+
module HTML
|
5
|
+
class Sanitizer
|
6
|
+
class << self
|
7
|
+
def html5_support?
|
8
|
+
return @html5_support if defined?(@html5_support)
|
9
|
+
|
10
|
+
@html5_support = Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
|
11
|
+
end
|
12
|
+
|
13
|
+
def best_supported_vendor
|
14
|
+
html5_support? ? Rails::HTML5::Sanitizer : Rails::HTML4::Sanitizer
|
15
|
+
end
|
16
|
+
end
|
4
17
|
|
5
|
-
class Sanitizer # :nodoc:
|
6
18
|
def sanitize(html, options = {})
|
7
19
|
raise NotImplementedError, "subclasses must implement sanitize method."
|
8
20
|
end
|
9
21
|
|
10
22
|
private
|
23
|
+
def remove_xpaths(node, xpaths)
|
24
|
+
node.xpath(*xpaths).remove
|
25
|
+
node
|
26
|
+
end
|
27
|
+
|
28
|
+
def properly_encode(fragment, options)
|
29
|
+
fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Concern
|
34
|
+
module ComposedSanitize
|
35
|
+
def sanitize(html, options = {})
|
36
|
+
return unless html
|
37
|
+
return html if html.empty?
|
38
|
+
|
39
|
+
serialize(scrub(parse_fragment(html), options))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module Parser
|
44
|
+
module HTML4
|
45
|
+
def parse_fragment(html)
|
46
|
+
Loofah.html4_fragment(html)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
module HTML5
|
51
|
+
def parse_fragment(html)
|
52
|
+
Loofah.html5_fragment(html)
|
53
|
+
end
|
54
|
+
end if Rails::HTML::Sanitizer.html5_support?
|
55
|
+
end
|
56
|
+
|
57
|
+
module Scrubber
|
58
|
+
module Full
|
59
|
+
def scrub(fragment, options = {})
|
60
|
+
fragment.scrub!(TextOnlyScrubber.new)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
module Link
|
65
|
+
def initialize
|
66
|
+
super
|
67
|
+
@link_scrubber = TargetScrubber.new
|
68
|
+
@link_scrubber.tags = %w(a)
|
69
|
+
@link_scrubber.attributes = %w(href)
|
70
|
+
end
|
71
|
+
|
72
|
+
def scrub(fragment, options = {})
|
73
|
+
fragment.scrub!(@link_scrubber)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
module SafeList
|
78
|
+
# The default safe list for tags
|
79
|
+
DEFAULT_ALLOWED_TAGS = Set.new([
|
80
|
+
"a",
|
81
|
+
"abbr",
|
82
|
+
"acronym",
|
83
|
+
"address",
|
84
|
+
"b",
|
85
|
+
"big",
|
86
|
+
"blockquote",
|
87
|
+
"br",
|
88
|
+
"cite",
|
89
|
+
"code",
|
90
|
+
"dd",
|
91
|
+
"del",
|
92
|
+
"dfn",
|
93
|
+
"div",
|
94
|
+
"dl",
|
95
|
+
"dt",
|
96
|
+
"em",
|
97
|
+
"h1",
|
98
|
+
"h2",
|
99
|
+
"h3",
|
100
|
+
"h4",
|
101
|
+
"h5",
|
102
|
+
"h6",
|
103
|
+
"hr",
|
104
|
+
"i",
|
105
|
+
"img",
|
106
|
+
"ins",
|
107
|
+
"kbd",
|
108
|
+
"li",
|
109
|
+
"mark",
|
110
|
+
"ol",
|
111
|
+
"p",
|
112
|
+
"pre",
|
113
|
+
"samp",
|
114
|
+
"small",
|
115
|
+
"span",
|
116
|
+
"strong",
|
117
|
+
"sub",
|
118
|
+
"sup",
|
119
|
+
"time",
|
120
|
+
"tt",
|
121
|
+
"ul",
|
122
|
+
"var",
|
123
|
+
]).freeze
|
124
|
+
|
125
|
+
# The default safe list for attributes
|
126
|
+
DEFAULT_ALLOWED_ATTRIBUTES = Set.new([
|
127
|
+
"abbr",
|
128
|
+
"alt",
|
129
|
+
"cite",
|
130
|
+
"class",
|
131
|
+
"datetime",
|
132
|
+
"height",
|
133
|
+
"href",
|
134
|
+
"lang",
|
135
|
+
"name",
|
136
|
+
"src",
|
137
|
+
"title",
|
138
|
+
"width",
|
139
|
+
"xml:lang",
|
140
|
+
]).freeze
|
11
141
|
|
12
|
-
|
13
|
-
|
14
|
-
|
142
|
+
def self.included(klass)
|
143
|
+
class << klass
|
144
|
+
attr_accessor :allowed_tags
|
145
|
+
attr_accessor :allowed_attributes
|
146
|
+
end
|
147
|
+
|
148
|
+
klass.allowed_tags = DEFAULT_ALLOWED_TAGS.dup
|
149
|
+
klass.allowed_attributes = DEFAULT_ALLOWED_ATTRIBUTES.dup
|
150
|
+
end
|
151
|
+
|
152
|
+
def initialize(prune: false)
|
153
|
+
@permit_scrubber = PermitScrubber.new(prune: prune)
|
154
|
+
end
|
155
|
+
|
156
|
+
def scrub(fragment, options = {})
|
157
|
+
if scrubber = options[:scrubber]
|
158
|
+
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
|
159
|
+
fragment.scrub!(scrubber)
|
160
|
+
elsif allowed_tags(options) || allowed_attributes(options)
|
161
|
+
@permit_scrubber.tags = allowed_tags(options)
|
162
|
+
@permit_scrubber.attributes = allowed_attributes(options)
|
163
|
+
fragment.scrub!(@permit_scrubber)
|
164
|
+
else
|
165
|
+
fragment.scrub!(:strip)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def sanitize_css(style_string)
|
170
|
+
Loofah::HTML5::Scrub.scrub_css(style_string)
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
def allowed_tags(options)
|
175
|
+
options[:tags] || self.class.allowed_tags
|
176
|
+
end
|
177
|
+
|
178
|
+
def allowed_attributes(options)
|
179
|
+
options[:attributes] || self.class.allowed_attributes
|
180
|
+
end
|
181
|
+
end
|
15
182
|
end
|
16
183
|
|
17
|
-
|
18
|
-
|
184
|
+
module Serializer
|
185
|
+
module UTF8Encode
|
186
|
+
def serialize(fragment)
|
187
|
+
properly_encode(fragment, encoding: "UTF-8")
|
188
|
+
end
|
189
|
+
end
|
19
190
|
end
|
20
191
|
end
|
192
|
+
end
|
21
193
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
class FullSanitizer < Sanitizer
|
29
|
-
def sanitize(html, options = {})
|
30
|
-
return unless html
|
31
|
-
return html if html.empty?
|
194
|
+
module HTML4
|
195
|
+
module Sanitizer
|
196
|
+
module VendorMethods
|
197
|
+
def full_sanitizer
|
198
|
+
Rails::HTML4::FullSanitizer
|
199
|
+
end
|
32
200
|
|
33
|
-
|
201
|
+
def link_sanitizer
|
202
|
+
Rails::HTML4::LinkSanitizer
|
203
|
+
end
|
34
204
|
|
35
|
-
|
36
|
-
|
205
|
+
def safe_list_sanitizer
|
206
|
+
Rails::HTML4::SafeListSanitizer
|
207
|
+
end
|
37
208
|
|
38
|
-
|
209
|
+
def white_list_sanitizer # :nodoc:
|
210
|
+
safe_list_sanitizer
|
211
|
+
end
|
39
212
|
end
|
213
|
+
|
214
|
+
extend VendorMethods
|
40
215
|
end
|
41
216
|
|
42
|
-
#
|
43
|
-
# Removes +a+ tags and +href+ attributes leaving only the link text.
|
217
|
+
# == Rails::HTML4::FullSanitizer
|
44
218
|
#
|
45
|
-
#
|
46
|
-
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
219
|
+
# Removes all tags from HTML4 but strips out scripts, forms and comments.
|
47
220
|
#
|
48
|
-
#
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
221
|
+
# full_sanitizer = Rails::HTML4::FullSanitizer.new
|
222
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
223
|
+
# # => "Bold no more! See more here..."
|
224
|
+
#
|
225
|
+
class FullSanitizer < Rails::HTML::Sanitizer
|
226
|
+
include HTML::Concern::ComposedSanitize
|
227
|
+
include HTML::Concern::Parser::HTML4
|
228
|
+
include HTML::Concern::Scrubber::Full
|
229
|
+
include HTML::Concern::Serializer::UTF8Encode
|
230
|
+
end
|
55
231
|
|
56
|
-
|
57
|
-
|
58
|
-
|
232
|
+
# == Rails::HTML4::LinkSanitizer
|
233
|
+
#
|
234
|
+
# Removes +a+ tags and +href+ attributes from HTML4 leaving only the link text.
|
235
|
+
#
|
236
|
+
# link_sanitizer = Rails::HTML4::LinkSanitizer.new
|
237
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
238
|
+
# # => "Only the link text will be kept."
|
239
|
+
#
|
240
|
+
class LinkSanitizer < Rails::HTML::Sanitizer
|
241
|
+
include HTML::Concern::ComposedSanitize
|
242
|
+
include HTML::Concern::Parser::HTML4
|
243
|
+
include HTML::Concern::Scrubber::Link
|
244
|
+
include HTML::Concern::Serializer::UTF8Encode
|
59
245
|
end
|
60
246
|
|
61
|
-
#
|
62
|
-
#
|
247
|
+
# == Rails::HTML4::SafeListSanitizer
|
248
|
+
#
|
249
|
+
# Sanitizes HTML4 and CSS from an extensive safe list.
|
63
250
|
#
|
64
251
|
# === Whitespace
|
65
|
-
# We can't make any guarantees about whitespace being kept or stripped.
|
66
|
-
# Loofah uses Nokogiri, which wraps either a C or Java parser for the
|
67
|
-
# respective Ruby implementation.
|
68
|
-
# Those two parsers determine how whitespace is ultimately handled.
|
69
252
|
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
253
|
+
# We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
|
254
|
+
# which wraps either a C or Java parser for the respective Ruby implementation. Those two
|
255
|
+
# parsers determine how whitespace is ultimately handled.
|
256
|
+
#
|
257
|
+
# When the stripped markup will be rendered the users browser won't take whitespace into account
|
258
|
+
# anyway. It might be better to suggest your users wrap their whitespace sensitive content in
|
259
|
+
# pre tags or that you do so automatically.
|
74
260
|
#
|
75
261
|
# === Options
|
76
|
-
# Sanitizes both html and css via the safe lists found here:
|
77
|
-
# https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/safelist.rb
|
78
262
|
#
|
79
|
-
#
|
80
|
-
#
|
263
|
+
# Sanitizes both html and css via the safe lists found in
|
264
|
+
# Rails::HTML::Concern::Scrubber::SafeList
|
265
|
+
#
|
266
|
+
# SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
|
81
267
|
# There's a class level option:
|
82
|
-
# Rails::Html::SafeListSanitizer.allowed_tags = %w(table tr td)
|
83
|
-
# Rails::Html::SafeListSanitizer.allowed_attributes = %w(id class style)
|
84
268
|
#
|
85
|
-
#
|
86
|
-
#
|
269
|
+
# Rails::HTML4::SafeListSanitizer.allowed_tags = %w(table tr td)
|
270
|
+
# Rails::HTML4::SafeListSanitizer.allowed_attributes = %w(id class style)
|
271
|
+
#
|
272
|
+
# Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
|
273
|
+
# class level options.
|
87
274
|
#
|
88
275
|
# === Examples
|
89
|
-
# safe_list_sanitizer = Rails::Html::SafeListSanitizer.new
|
90
276
|
#
|
91
|
-
#
|
92
|
-
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
277
|
+
# safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new
|
93
278
|
#
|
94
|
-
#
|
95
|
-
#
|
279
|
+
# # default: sanitize via a extensive safe list of allowed elements
|
280
|
+
# safe_list_sanitizer.sanitize(@article.body)
|
96
281
|
#
|
97
|
-
#
|
98
|
-
#
|
99
|
-
#
|
282
|
+
# # sanitize via the supplied tags and attributes
|
283
|
+
# safe_list_sanitizer.sanitize(
|
284
|
+
# @article.body,
|
285
|
+
# tags: %w(table tr td),
|
286
|
+
# attributes: %w(id class style),
|
287
|
+
# )
|
100
288
|
#
|
101
|
-
#
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
def sanitize(html, options = {})
|
118
|
-
return unless html
|
119
|
-
return html if html.empty?
|
289
|
+
# # sanitize via a custom Loofah scrubber
|
290
|
+
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
291
|
+
#
|
292
|
+
# # prune nodes from the tree instead of stripping tags and leaving inner content
|
293
|
+
# safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new(prune: true)
|
294
|
+
#
|
295
|
+
# # the sanitizer can also sanitize CSS
|
296
|
+
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
297
|
+
#
|
298
|
+
class SafeListSanitizer < Rails::HTML::Sanitizer
|
299
|
+
include HTML::Concern::ComposedSanitize
|
300
|
+
include HTML::Concern::Parser::HTML4
|
301
|
+
include HTML::Concern::Scrubber::SafeList
|
302
|
+
include HTML::Concern::Serializer::UTF8Encode
|
303
|
+
end
|
304
|
+
end
|
120
305
|
|
121
|
-
|
306
|
+
module HTML5
|
307
|
+
class Sanitizer
|
308
|
+
class << self
|
309
|
+
def full_sanitizer
|
310
|
+
Rails::HTML5::FullSanitizer
|
311
|
+
end
|
122
312
|
|
123
|
-
|
124
|
-
|
125
|
-
loofah_fragment.scrub!(scrubber)
|
126
|
-
elsif allowed_tags(options) || allowed_attributes(options)
|
127
|
-
@permit_scrubber.tags = allowed_tags(options)
|
128
|
-
@permit_scrubber.attributes = allowed_attributes(options)
|
129
|
-
loofah_fragment.scrub!(@permit_scrubber)
|
130
|
-
else
|
131
|
-
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
|
132
|
-
loofah_fragment.scrub!(:strip)
|
313
|
+
def link_sanitizer
|
314
|
+
Rails::HTML5::LinkSanitizer
|
133
315
|
end
|
134
316
|
|
135
|
-
|
136
|
-
|
317
|
+
def safe_list_sanitizer
|
318
|
+
Rails::HTML5::SafeListSanitizer
|
319
|
+
end
|
137
320
|
|
138
|
-
|
139
|
-
|
321
|
+
def white_list_sanitizer # :nodoc:
|
322
|
+
safe_list_sanitizer
|
323
|
+
end
|
140
324
|
end
|
325
|
+
end
|
141
326
|
|
142
|
-
|
327
|
+
# == Rails::HTML5::FullSanitizer
|
328
|
+
#
|
329
|
+
# Removes all tags from HTML5 but strips out scripts, forms and comments.
|
330
|
+
#
|
331
|
+
# full_sanitizer = Rails::HTML5::FullSanitizer.new
|
332
|
+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
|
333
|
+
# # => "Bold no more! See more here..."
|
334
|
+
#
|
335
|
+
class FullSanitizer < Rails::HTML::Sanitizer
|
336
|
+
include HTML::Concern::ComposedSanitize
|
337
|
+
include HTML::Concern::Parser::HTML5
|
338
|
+
include HTML::Concern::Scrubber::Full
|
339
|
+
include HTML::Concern::Serializer::UTF8Encode
|
340
|
+
end
|
143
341
|
|
144
|
-
|
145
|
-
|
146
|
-
|
342
|
+
# == Rails::HTML5::LinkSanitizer
|
343
|
+
#
|
344
|
+
# Removes +a+ tags and +href+ attributes from HTML5 leaving only the link text.
|
345
|
+
#
|
346
|
+
# link_sanitizer = Rails::HTML5::LinkSanitizer.new
|
347
|
+
# link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
|
348
|
+
# # => "Only the link text will be kept."
|
349
|
+
#
|
350
|
+
class LinkSanitizer < Rails::HTML::Sanitizer
|
351
|
+
include HTML::Concern::ComposedSanitize
|
352
|
+
include HTML::Concern::Parser::HTML5
|
353
|
+
include HTML::Concern::Scrubber::Link
|
354
|
+
include HTML::Concern::Serializer::UTF8Encode
|
355
|
+
end
|
147
356
|
|
148
|
-
|
149
|
-
|
150
|
-
|
357
|
+
# == Rails::HTML5::SafeListSanitizer
|
358
|
+
#
|
359
|
+
# Sanitizes HTML5 and CSS from an extensive safe list.
|
360
|
+
#
|
361
|
+
# === Whitespace
|
362
|
+
#
|
363
|
+
# We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
|
364
|
+
# which wraps either a C or Java parser for the respective Ruby implementation. Those two
|
365
|
+
# parsers determine how whitespace is ultimately handled.
|
366
|
+
#
|
367
|
+
# When the stripped markup will be rendered the users browser won't take whitespace into account
|
368
|
+
# anyway. It might be better to suggest your users wrap their whitespace sensitive content in
|
369
|
+
# pre tags or that you do so automatically.
|
370
|
+
#
|
371
|
+
# === Options
|
372
|
+
#
|
373
|
+
# Sanitizes both html and css via the safe lists found in
|
374
|
+
# Rails::HTML::Concern::Scrubber::SafeList
|
375
|
+
#
|
376
|
+
# SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
|
377
|
+
# There's a class level option:
|
378
|
+
#
|
379
|
+
# Rails::HTML5::SafeListSanitizer.allowed_tags = %w(table tr td)
|
380
|
+
# Rails::HTML5::SafeListSanitizer.allowed_attributes = %w(id class style)
|
381
|
+
#
|
382
|
+
# Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
|
383
|
+
# class level options.
|
384
|
+
#
|
385
|
+
# === Examples
|
386
|
+
#
|
387
|
+
# safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new
|
388
|
+
#
|
389
|
+
# # default: sanitize via a extensive safe list of allowed elements
|
390
|
+
# safe_list_sanitizer.sanitize(@article.body)
|
391
|
+
#
|
392
|
+
# # sanitize via the supplied tags and attributes
|
393
|
+
# safe_list_sanitizer.sanitize(
|
394
|
+
# @article.body,
|
395
|
+
# tags: %w(table tr td),
|
396
|
+
# attributes: %w(id class style),
|
397
|
+
# )
|
398
|
+
#
|
399
|
+
# # sanitize via a custom Loofah scrubber
|
400
|
+
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
|
401
|
+
#
|
402
|
+
# # prune nodes from the tree instead of stripping tags and leaving inner content
|
403
|
+
# safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new(prune: true)
|
404
|
+
#
|
405
|
+
# # the sanitizer can also sanitize CSS
|
406
|
+
# safe_list_sanitizer.sanitize_css('background-color: #000;')
|
407
|
+
#
|
408
|
+
class SafeListSanitizer < Rails::HTML::Sanitizer
|
409
|
+
include HTML::Concern::ComposedSanitize
|
410
|
+
include HTML::Concern::Parser::HTML5
|
411
|
+
include HTML::Concern::Scrubber::SafeList
|
412
|
+
include HTML::Concern::Serializer::UTF8Encode
|
151
413
|
end
|
414
|
+
end if Rails::HTML::Sanitizer.html5_support?
|
152
415
|
|
153
|
-
|
416
|
+
module HTML
|
417
|
+
Sanitizer.extend(HTML4::Sanitizer::VendorMethods) # :nodoc:
|
418
|
+
FullSanitizer = HTML4::FullSanitizer # :nodoc:
|
419
|
+
LinkSanitizer = HTML4::LinkSanitizer # :nodoc:
|
420
|
+
SafeListSanitizer = HTML4::SafeListSanitizer # :nodoc:
|
421
|
+
WhiteListSanitizer = SafeListSanitizer # :nodoc:
|
154
422
|
end
|
155
423
|
end
|