rails-html-sanitizer 1.5.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,155 +1,423 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rails
2
- module Html
3
- XPATHS_TO_REMOVE = %w{.//script .//form comment()}
4
+ module HTML
5
+ class Sanitizer
6
+ class << self
7
+ def html5_support?
8
+ return @html5_support if defined?(@html5_support)
9
+
10
+ @html5_support = Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
11
+ end
12
+
13
+ def best_supported_vendor
14
+ html5_support? ? Rails::HTML5::Sanitizer : Rails::HTML4::Sanitizer
15
+ end
16
+ end
4
17
 
5
- class Sanitizer # :nodoc:
6
18
  def sanitize(html, options = {})
7
19
  raise NotImplementedError, "subclasses must implement sanitize method."
8
20
  end
9
21
 
10
22
  private
23
+ def remove_xpaths(node, xpaths)
24
+ node.xpath(*xpaths).remove
25
+ node
26
+ end
27
+
28
+ def properly_encode(fragment, options)
29
+ fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
30
+ end
31
+ end
32
+
33
+ module Concern
34
+ module ComposedSanitize
35
+ def sanitize(html, options = {})
36
+ return unless html
37
+ return html if html.empty?
38
+
39
+ serialize(scrub(parse_fragment(html), options))
40
+ end
41
+ end
42
+
43
+ module Parser
44
+ module HTML4
45
+ def parse_fragment(html)
46
+ Loofah.html4_fragment(html)
47
+ end
48
+ end
49
+
50
+ module HTML5
51
+ def parse_fragment(html)
52
+ Loofah.html5_fragment(html)
53
+ end
54
+ end if Rails::HTML::Sanitizer.html5_support?
55
+ end
56
+
57
+ module Scrubber
58
+ module Full
59
+ def scrub(fragment, options = {})
60
+ fragment.scrub!(TextOnlyScrubber.new)
61
+ end
62
+ end
63
+
64
+ module Link
65
+ def initialize
66
+ super
67
+ @link_scrubber = TargetScrubber.new
68
+ @link_scrubber.tags = %w(a)
69
+ @link_scrubber.attributes = %w(href)
70
+ end
71
+
72
+ def scrub(fragment, options = {})
73
+ fragment.scrub!(@link_scrubber)
74
+ end
75
+ end
76
+
77
+ module SafeList
78
+ # The default safe list for tags
79
+ DEFAULT_ALLOWED_TAGS = Set.new([
80
+ "a",
81
+ "abbr",
82
+ "acronym",
83
+ "address",
84
+ "b",
85
+ "big",
86
+ "blockquote",
87
+ "br",
88
+ "cite",
89
+ "code",
90
+ "dd",
91
+ "del",
92
+ "dfn",
93
+ "div",
94
+ "dl",
95
+ "dt",
96
+ "em",
97
+ "h1",
98
+ "h2",
99
+ "h3",
100
+ "h4",
101
+ "h5",
102
+ "h6",
103
+ "hr",
104
+ "i",
105
+ "img",
106
+ "ins",
107
+ "kbd",
108
+ "li",
109
+ "mark",
110
+ "ol",
111
+ "p",
112
+ "pre",
113
+ "samp",
114
+ "small",
115
+ "span",
116
+ "strong",
117
+ "sub",
118
+ "sup",
119
+ "time",
120
+ "tt",
121
+ "ul",
122
+ "var",
123
+ ]).freeze
124
+
125
+ # The default safe list for attributes
126
+ DEFAULT_ALLOWED_ATTRIBUTES = Set.new([
127
+ "abbr",
128
+ "alt",
129
+ "cite",
130
+ "class",
131
+ "datetime",
132
+ "height",
133
+ "href",
134
+ "lang",
135
+ "name",
136
+ "src",
137
+ "title",
138
+ "width",
139
+ "xml:lang",
140
+ ]).freeze
11
141
 
12
- def remove_xpaths(node, xpaths)
13
- node.xpath(*xpaths).remove
14
- node
142
+ def self.included(klass)
143
+ class << klass
144
+ attr_accessor :allowed_tags
145
+ attr_accessor :allowed_attributes
146
+ end
147
+
148
+ klass.allowed_tags = DEFAULT_ALLOWED_TAGS.dup
149
+ klass.allowed_attributes = DEFAULT_ALLOWED_ATTRIBUTES.dup
150
+ end
151
+
152
+ def initialize(prune: false)
153
+ @permit_scrubber = PermitScrubber.new(prune: prune)
154
+ end
155
+
156
+ def scrub(fragment, options = {})
157
+ if scrubber = options[:scrubber]
158
+ # No duck typing, Loofah ensures subclass of Loofah::Scrubber
159
+ fragment.scrub!(scrubber)
160
+ elsif allowed_tags(options) || allowed_attributes(options)
161
+ @permit_scrubber.tags = allowed_tags(options)
162
+ @permit_scrubber.attributes = allowed_attributes(options)
163
+ fragment.scrub!(@permit_scrubber)
164
+ else
165
+ fragment.scrub!(:strip)
166
+ end
167
+ end
168
+
169
+ def sanitize_css(style_string)
170
+ Loofah::HTML5::Scrub.scrub_css(style_string)
171
+ end
172
+
173
+ private
174
+ def allowed_tags(options)
175
+ options[:tags] || self.class.allowed_tags
176
+ end
177
+
178
+ def allowed_attributes(options)
179
+ options[:attributes] || self.class.allowed_attributes
180
+ end
181
+ end
15
182
  end
16
183
 
17
- def properly_encode(fragment, options)
18
- fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
184
+ module Serializer
185
+ module UTF8Encode
186
+ def serialize(fragment)
187
+ properly_encode(fragment, encoding: "UTF-8")
188
+ end
189
+ end
19
190
  end
20
191
  end
192
+ end
21
193
 
22
- # === Rails::Html::FullSanitizer
23
- # Removes all tags but strips out scripts, forms and comments.
24
- #
25
- # full_sanitizer = Rails::Html::FullSanitizer.new
26
- # full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
27
- # # => Bold no more! See more here...
28
- class FullSanitizer < Sanitizer
29
- def sanitize(html, options = {})
30
- return unless html
31
- return html if html.empty?
194
+ module HTML4
195
+ module Sanitizer
196
+ module VendorMethods
197
+ def full_sanitizer
198
+ Rails::HTML4::FullSanitizer
199
+ end
32
200
 
33
- loofah_fragment = Loofah.fragment(html)
201
+ def link_sanitizer
202
+ Rails::HTML4::LinkSanitizer
203
+ end
34
204
 
35
- remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
36
- loofah_fragment.scrub!(TextOnlyScrubber.new)
205
+ def safe_list_sanitizer
206
+ Rails::HTML4::SafeListSanitizer
207
+ end
37
208
 
38
- properly_encode(loofah_fragment, encoding: 'UTF-8')
209
+ def white_list_sanitizer # :nodoc:
210
+ safe_list_sanitizer
211
+ end
39
212
  end
213
+
214
+ extend VendorMethods
40
215
  end
41
216
 
42
- # === Rails::Html::LinkSanitizer
43
- # Removes +a+ tags and +href+ attributes leaving only the link text.
217
+ # == Rails::HTML4::FullSanitizer
44
218
  #
45
- # link_sanitizer = Rails::Html::LinkSanitizer.new
46
- # link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
219
+ # Removes all tags from HTML4 but strips out scripts, forms and comments.
47
220
  #
48
- # => 'Only the link text will be kept.'
49
- class LinkSanitizer < Sanitizer
50
- def initialize
51
- @link_scrubber = TargetScrubber.new
52
- @link_scrubber.tags = %w(a)
53
- @link_scrubber.attributes = %w(href)
54
- end
221
+ # full_sanitizer = Rails::HTML4::FullSanitizer.new
222
+ # full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
223
+ # # => "Bold no more! See more here..."
224
+ #
225
+ class FullSanitizer < Rails::HTML::Sanitizer
226
+ include HTML::Concern::ComposedSanitize
227
+ include HTML::Concern::Parser::HTML4
228
+ include HTML::Concern::Scrubber::Full
229
+ include HTML::Concern::Serializer::UTF8Encode
230
+ end
55
231
 
56
- def sanitize(html, options = {})
57
- Loofah.scrub_fragment(html, @link_scrubber).to_s
58
- end
232
+ # == Rails::HTML4::LinkSanitizer
233
+ #
234
+ # Removes +a+ tags and +href+ attributes from HTML4 leaving only the link text.
235
+ #
236
+ # link_sanitizer = Rails::HTML4::LinkSanitizer.new
237
+ # link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
238
+ # # => "Only the link text will be kept."
239
+ #
240
+ class LinkSanitizer < Rails::HTML::Sanitizer
241
+ include HTML::Concern::ComposedSanitize
242
+ include HTML::Concern::Parser::HTML4
243
+ include HTML::Concern::Scrubber::Link
244
+ include HTML::Concern::Serializer::UTF8Encode
59
245
  end
60
246
 
61
- # === Rails::Html::SafeListSanitizer
62
- # Sanitizes html and css from an extensive safe list (see link further down).
247
+ # == Rails::HTML4::SafeListSanitizer
248
+ #
249
+ # Sanitizes HTML4 and CSS from an extensive safe list.
63
250
  #
64
251
  # === Whitespace
65
- # We can't make any guarantees about whitespace being kept or stripped.
66
- # Loofah uses Nokogiri, which wraps either a C or Java parser for the
67
- # respective Ruby implementation.
68
- # Those two parsers determine how whitespace is ultimately handled.
69
252
  #
70
- # When the stripped markup will be rendered the users browser won't take
71
- # whitespace into account anyway. It might be better to suggest your users
72
- # wrap their whitespace sensitive content in pre tags or that you do
73
- # so automatically.
253
+ # We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
254
+ # which wraps either a C or Java parser for the respective Ruby implementation. Those two
255
+ # parsers determine how whitespace is ultimately handled.
256
+ #
257
+ # When the stripped markup will be rendered the users browser won't take whitespace into account
258
+ # anyway. It might be better to suggest your users wrap their whitespace sensitive content in
259
+ # pre tags or that you do so automatically.
74
260
  #
75
261
  # === Options
76
- # Sanitizes both html and css via the safe lists found here:
77
- # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/safelist.rb
78
262
  #
79
- # SafeListSanitizer also accepts options to configure
80
- # the safe list used when sanitizing html.
263
+ # Sanitizes both html and css via the safe lists found in
264
+ # Rails::HTML::Concern::Scrubber::SafeList
265
+ #
266
+ # SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
81
267
  # There's a class level option:
82
- # Rails::Html::SafeListSanitizer.allowed_tags = %w(table tr td)
83
- # Rails::Html::SafeListSanitizer.allowed_attributes = %w(id class style)
84
268
  #
85
- # Tags and attributes can also be passed to +sanitize+.
86
- # Passed options take precedence over the class level options.
269
+ # Rails::HTML4::SafeListSanitizer.allowed_tags = %w(table tr td)
270
+ # Rails::HTML4::SafeListSanitizer.allowed_attributes = %w(id class style)
271
+ #
272
+ # Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
273
+ # class level options.
87
274
  #
88
275
  # === Examples
89
- # safe_list_sanitizer = Rails::Html::SafeListSanitizer.new
90
276
  #
91
- # Sanitize css doesn't take options
92
- # safe_list_sanitizer.sanitize_css('background-color: #000;')
277
+ # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new
93
278
  #
94
- # Default: sanitize via a extensive safe list of allowed elements
95
- # safe_list_sanitizer.sanitize(@article.body)
279
+ # # default: sanitize via a extensive safe list of allowed elements
280
+ # safe_list_sanitizer.sanitize(@article.body)
96
281
  #
97
- # Safe list via the supplied tags and attributes
98
- # safe_list_sanitizer.sanitize(@article.body, tags: %w(table tr td),
99
- # attributes: %w(id class style))
282
+ # # sanitize via the supplied tags and attributes
283
+ # safe_list_sanitizer.sanitize(
284
+ # @article.body,
285
+ # tags: %w(table tr td),
286
+ # attributes: %w(id class style),
287
+ # )
100
288
  #
101
- # Safe list via a custom scrubber
102
- # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
103
- class SafeListSanitizer < Sanitizer
104
- class << self
105
- attr_accessor :allowed_tags
106
- attr_accessor :allowed_attributes
107
- end
108
- self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
109
- sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
110
- acronym a img blockquote del ins))
111
- self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
112
-
113
- def initialize(prune: false)
114
- @permit_scrubber = PermitScrubber.new(prune: prune)
115
- end
116
-
117
- def sanitize(html, options = {})
118
- return unless html
119
- return html if html.empty?
289
+ # # sanitize via a custom Loofah scrubber
290
+ # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
291
+ #
292
+ # # prune nodes from the tree instead of stripping tags and leaving inner content
293
+ # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new(prune: true)
294
+ #
295
+ # # the sanitizer can also sanitize CSS
296
+ # safe_list_sanitizer.sanitize_css('background-color: #000;')
297
+ #
298
+ class SafeListSanitizer < Rails::HTML::Sanitizer
299
+ include HTML::Concern::ComposedSanitize
300
+ include HTML::Concern::Parser::HTML4
301
+ include HTML::Concern::Scrubber::SafeList
302
+ include HTML::Concern::Serializer::UTF8Encode
303
+ end
304
+ end
120
305
 
121
- loofah_fragment = Loofah.fragment(html)
306
+ module HTML5
307
+ class Sanitizer
308
+ class << self
309
+ def full_sanitizer
310
+ Rails::HTML5::FullSanitizer
311
+ end
122
312
 
123
- if scrubber = options[:scrubber]
124
- # No duck typing, Loofah ensures subclass of Loofah::Scrubber
125
- loofah_fragment.scrub!(scrubber)
126
- elsif allowed_tags(options) || allowed_attributes(options)
127
- @permit_scrubber.tags = allowed_tags(options)
128
- @permit_scrubber.attributes = allowed_attributes(options)
129
- loofah_fragment.scrub!(@permit_scrubber)
130
- else
131
- remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
132
- loofah_fragment.scrub!(:strip)
313
+ def link_sanitizer
314
+ Rails::HTML5::LinkSanitizer
133
315
  end
134
316
 
135
- properly_encode(loofah_fragment, encoding: 'UTF-8')
136
- end
317
+ def safe_list_sanitizer
318
+ Rails::HTML5::SafeListSanitizer
319
+ end
137
320
 
138
- def sanitize_css(style_string)
139
- Loofah::HTML5::Scrub.scrub_css(style_string)
321
+ def white_list_sanitizer # :nodoc:
322
+ safe_list_sanitizer
323
+ end
140
324
  end
325
+ end
141
326
 
142
- private
327
+ # == Rails::HTML5::FullSanitizer
328
+ #
329
+ # Removes all tags from HTML5 but strips out scripts, forms and comments.
330
+ #
331
+ # full_sanitizer = Rails::HTML5::FullSanitizer.new
332
+ # full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
333
+ # # => "Bold no more! See more here..."
334
+ #
335
+ class FullSanitizer < Rails::HTML::Sanitizer
336
+ include HTML::Concern::ComposedSanitize
337
+ include HTML::Concern::Parser::HTML5
338
+ include HTML::Concern::Scrubber::Full
339
+ include HTML::Concern::Serializer::UTF8Encode
340
+ end
143
341
 
144
- def allowed_tags(options)
145
- options[:tags] || self.class.allowed_tags
146
- end
342
+ # == Rails::HTML5::LinkSanitizer
343
+ #
344
+ # Removes +a+ tags and +href+ attributes from HTML5 leaving only the link text.
345
+ #
346
+ # link_sanitizer = Rails::HTML5::LinkSanitizer.new
347
+ # link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
348
+ # # => "Only the link text will be kept."
349
+ #
350
+ class LinkSanitizer < Rails::HTML::Sanitizer
351
+ include HTML::Concern::ComposedSanitize
352
+ include HTML::Concern::Parser::HTML5
353
+ include HTML::Concern::Scrubber::Link
354
+ include HTML::Concern::Serializer::UTF8Encode
355
+ end
147
356
 
148
- def allowed_attributes(options)
149
- options[:attributes] || self.class.allowed_attributes
150
- end
357
+ # == Rails::HTML5::SafeListSanitizer
358
+ #
359
+ # Sanitizes HTML5 and CSS from an extensive safe list.
360
+ #
361
+ # === Whitespace
362
+ #
363
+ # We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
364
+ # which wraps either a C or Java parser for the respective Ruby implementation. Those two
365
+ # parsers determine how whitespace is ultimately handled.
366
+ #
367
+ # When the stripped markup will be rendered the users browser won't take whitespace into account
368
+ # anyway. It might be better to suggest your users wrap their whitespace sensitive content in
369
+ # pre tags or that you do so automatically.
370
+ #
371
+ # === Options
372
+ #
373
+ # Sanitizes both html and css via the safe lists found in
374
+ # Rails::HTML::Concern::Scrubber::SafeList
375
+ #
376
+ # SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
377
+ # There's a class level option:
378
+ #
379
+ # Rails::HTML5::SafeListSanitizer.allowed_tags = %w(table tr td)
380
+ # Rails::HTML5::SafeListSanitizer.allowed_attributes = %w(id class style)
381
+ #
382
+ # Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
383
+ # class level options.
384
+ #
385
+ # === Examples
386
+ #
387
+ # safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new
388
+ #
389
+ # # default: sanitize via a extensive safe list of allowed elements
390
+ # safe_list_sanitizer.sanitize(@article.body)
391
+ #
392
+ # # sanitize via the supplied tags and attributes
393
+ # safe_list_sanitizer.sanitize(
394
+ # @article.body,
395
+ # tags: %w(table tr td),
396
+ # attributes: %w(id class style),
397
+ # )
398
+ #
399
+ # # sanitize via a custom Loofah scrubber
400
+ # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
401
+ #
402
+ # # prune nodes from the tree instead of stripping tags and leaving inner content
403
+ # safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new(prune: true)
404
+ #
405
+ # # the sanitizer can also sanitize CSS
406
+ # safe_list_sanitizer.sanitize_css('background-color: #000;')
407
+ #
408
+ class SafeListSanitizer < Rails::HTML::Sanitizer
409
+ include HTML::Concern::ComposedSanitize
410
+ include HTML::Concern::Parser::HTML5
411
+ include HTML::Concern::Scrubber::SafeList
412
+ include HTML::Concern::Serializer::UTF8Encode
151
413
  end
414
+ end if Rails::HTML::Sanitizer.html5_support?
152
415
 
153
- WhiteListSanitizer = SafeListSanitizer
416
+ module HTML
417
+ Sanitizer.extend(HTML4::Sanitizer::VendorMethods) # :nodoc:
418
+ FullSanitizer = HTML4::FullSanitizer # :nodoc:
419
+ LinkSanitizer = HTML4::LinkSanitizer # :nodoc:
420
+ SafeListSanitizer = HTML4::SafeListSanitizer # :nodoc:
421
+ WhiteListSanitizer = SafeListSanitizer # :nodoc:
154
422
  end
155
423
  end