loofah 2.19.0 → 2.23.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "cgi"
3
4
  require "crass"
4
5
 
@@ -6,9 +7,10 @@ module Loofah
6
7
  module HTML5 # :nodoc:
7
8
  module Scrub
8
9
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
10
11
  CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
11
- CSS_IMPORTANT = '!important'
12
+ CSS_IMPORTANT = "!important"
13
+ CSS_WHITESPACE = " "
12
14
  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
13
15
  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
14
16
 
@@ -26,7 +28,7 @@ module Loofah
26
28
  attr_node.node_name
27
29
  end
28
30
 
29
- if attr_name =~ DATA_ATTRIBUTE_NAME
31
+ if DATA_ATTRIBUTE_NAME.match?(attr_name)
30
32
  next
31
33
  end
32
34
 
@@ -36,28 +38,19 @@ module Loofah
36
38
  end
37
39
 
38
40
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39
- # this block lifted nearly verbatim from HTML5 sanitization
40
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
41
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42
- attr_node.remove
43
- next
44
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
45
- # permit only allowed data mediatypes
46
- mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47
- mediatype, _ = mediatype.split(";")[0..1] if mediatype
48
- if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49
- attr_node.remove
50
- next
51
- end
52
- end
41
+ next if scrub_uri_attribute(attr_node)
53
42
  end
43
+
54
44
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
56
- end
57
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
58
- attr_node.remove
59
- next
45
+ scrub_attribute_that_allows_local_ref(attr_node)
60
46
  end
47
+
48
+ next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
49
+ attr_name == "xlink:href" &&
50
+ attr_node.value =~ /^\s*[^#\s].*/m
51
+
52
+ attr_node.remove
53
+ next
61
54
  end
62
55
 
63
56
  scrub_css_attribute(node)
@@ -77,29 +70,28 @@ module Loofah
77
70
  end
78
71
 
79
72
  def scrub_css(style)
73
+ url_flags = [:url, :bad_url]
80
74
  style_tree = Crass.parse_properties(style)
81
75
  sanitized_tree = []
82
76
 
83
77
  style_tree.each do |node|
84
78
  next unless node[:node] == :property
85
79
  next if node[:children].any? do |child|
86
- [:url, :bad_url].include?(child[:node])
80
+ url_flags.include?(child[:node])
87
81
  end
88
82
 
89
83
  name = node[:name].downcase
90
84
  next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
91
- SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
92
- SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
85
+ SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
86
+ SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
93
87
 
94
88
  value = node[:children].map do |child|
95
89
  case child[:node]
96
90
  when :whitespace
97
- nil
91
+ CSS_WHITESPACE
98
92
  when :string
99
- if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
93
+ if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
100
94
  Crass::Parser.stringify(child)
101
- else
102
- nil
103
95
  end
104
96
  when :function
105
97
  if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
@@ -108,18 +100,19 @@ module Loofah
108
100
  when :ident
109
101
  keyword = child[:value]
110
102
  if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
111
- SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
112
- (keyword =~ CSS_KEYWORDISH)
103
+ SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
104
+ (keyword =~ CSS_KEYWORDISH)
113
105
  keyword
114
106
  end
115
107
  else
116
108
  child[:raw]
117
109
  end
118
- end.compact
110
+ end.compact.join.strip
119
111
 
120
112
  next if value.empty?
121
- value << CSS_IMPORTANT if node[:important]
122
- propstring = format("%s:%s", name, value.join(" "))
113
+
114
+ value << CSS_WHITESPACE << CSS_IMPORTANT if node[:important]
115
+ propstring = format("%s:%s", name, value)
123
116
  sanitized_node = Crass.parse_properties(propstring).first
124
117
  sanitized_tree << sanitized_node << CRASS_SEMICOLON
125
118
  end
@@ -127,6 +120,44 @@ module Loofah
127
120
  Crass::Parser.stringify(sanitized_tree)
128
121
  end
129
122
 
123
+ def scrub_attribute_that_allows_local_ref(attr_node)
124
+ return unless attr_node.value
125
+
126
+ nodes = Crass::Parser.new(attr_node.value).parse_component_values
127
+
128
+ values = nodes.map do |node|
129
+ case node[:node]
130
+ when :url
131
+ if node[:value].start_with?("#")
132
+ node[:raw]
133
+ end
134
+ when :hash, :ident, :string
135
+ node[:raw]
136
+ end
137
+ end.compact
138
+
139
+ attr_node.value = values.join(" ")
140
+ end
141
+
142
+ def scrub_uri_attribute(attr_node)
143
+ # this block lifted nearly verbatim from HTML5 sanitization
144
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
146
+ !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
147
+ attr_node.remove
148
+ return true
149
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
150
+ # permit only allowed data mediatypes
151
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
152
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
153
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
154
+ attr_node.remove
155
+ return true
156
+ end
157
+ end
158
+ false
159
+ end
160
+
130
161
  #
131
162
  # libxml2 >= 2.9.2 fails to escape comments within some attributes.
132
163
  #
@@ -152,6 +183,46 @@ module Loofah
152
183
  end.force_encoding(encoding)
153
184
  end
154
185
  end
186
+
187
+ def cdata_needs_escaping?(node)
188
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
189
+ node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
190
+ end
191
+
192
+ def cdata_escape(node)
193
+ escaped_text = escape_tags(node.text)
194
+ if Nokogiri.jruby?
195
+ node.document.create_text_node(escaped_text)
196
+ else
197
+ node.document.create_cdata(escaped_text)
198
+ end
199
+ end
200
+
201
+ TABLE_FOR_ESCAPE_HTML__ = {
202
+ "<" => "&lt;",
203
+ ">" => "&gt;",
204
+ "&" => "&amp;",
205
+ }
206
+
207
+ def escape_tags(string)
208
+ # modified version of CGI.escapeHTML from ruby 3.1
209
+ enc = string.encoding
210
+ if enc.ascii_compatible?
211
+ string = string.b
212
+ string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
213
+ string.force_encoding(enc)
214
+ else
215
+ if enc.dummy?
216
+ origenc = enc
217
+ enc = Encoding::Converter.asciicompat_encoding(enc)
218
+ string = enc ? string.encode(enc) : string.b
219
+ end
220
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
221
+ string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
222
+ string.encode!(origenc) if origenc
223
+ string
224
+ end
225
+ end
155
226
  end
156
227
  end
157
228
  end
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module MetaHelpers # :nodoc:
4
- def self.add_downcased_set_members_to_all_set_constants(mojule)
5
- mojule.constants.each do |constant_sym|
6
- constant = mojule.const_get constant_sym
7
- next unless Set === constant
8
- constant.dup.each do |member|
9
- constant.add member.downcase
5
+ class << self
6
+ def add_downcased_set_members_to_all_set_constants(mojule)
7
+ mojule.constants.each do |constant_sym|
8
+ constant = mojule.const_get(constant_sym)
9
+ next unless Set === constant
10
+
11
+ constant.dup.each do |member|
12
+ constant.add(member.downcase)
13
+ end
10
14
  end
11
15
  end
12
16
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
@@ -24,7 +25,7 @@ module Loofah
24
25
  #
25
26
  # This can then be run on a document:
26
27
  #
27
- # Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
+ # Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
29
  # # => "<div>foo</div><p>bar</p>"
29
30
  #
30
31
  # Scrubbers can be run on a document in either a top-down traversal (the
@@ -32,7 +33,6 @@ module Loofah
32
33
  # Scrubber::STOP to terminate the traversal of a subtree.
33
34
  #
34
35
  class Scrubber
35
-
36
36
  # Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
37
37
  CONTINUE = Object.new.freeze
38
38
 
@@ -67,7 +67,9 @@ module Loofah
67
67
  unless [:top_down, :bottom_up].include?(direction)
68
68
  raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
69
69
  end
70
- @direction, @block = direction, block
70
+
71
+ @direction = direction
72
+ @block = block
71
73
  end
72
74
 
73
75
  #
@@ -84,7 +86,7 @@ module Loofah
84
86
  # +scrub+, which will be called for each document node.
85
87
  #
86
88
  def scrub(node)
87
- raise ScrubberNotFound, "No scrub method has been defined on #{self.class.to_s}"
89
+ raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
88
90
  end
89
91
 
90
92
  #
@@ -103,11 +105,15 @@ module Loofah
103
105
  def html5lib_sanitize(node)
104
106
  case node.type
105
107
  when Nokogiri::XML::Node::ELEMENT_NODE
106
- if HTML5::Scrub.allowed_element? node.name
107
- HTML5::Scrub.scrub_attributes node
108
+ if HTML5::Scrub.allowed_element?(node.name)
109
+ HTML5::Scrub.scrub_attributes(node)
108
110
  return Scrubber::CONTINUE
109
111
  end
110
112
  when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
113
+ if HTML5::Scrub.cdata_needs_escaping?(node)
114
+ node.before(HTML5::Scrub.cdata_escape(node))
115
+ return Scrubber::STOP
116
+ end
111
117
  return Scrubber::CONTINUE
112
118
  end
113
119
  Scrubber::STOP
@@ -116,8 +122,8 @@ module Loofah
116
122
  def traverse_conditionally_top_down(node)
117
123
  if block
118
124
  return if block.call(node) == STOP
119
- else
120
- return if scrub(node) == STOP
125
+ elsif scrub(node) == STOP
126
+ return
121
127
  end
122
128
  node.children.each { |j| traverse_conditionally_top_down(j) }
123
129
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # Loofah provides some built-in scrubbers for sanitizing with
@@ -11,7 +12,7 @@ module Loofah
11
12
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
12
13
  #
13
14
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
14
- # Loofah.fragment(unsafe_html).scrub!(:strip)
15
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
15
16
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
16
17
  #
17
18
  #
@@ -20,7 +21,7 @@ module Loofah
20
21
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
21
22
  #
22
23
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
23
- # Loofah.fragment(unsafe_html).scrub!(:prune)
24
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
24
25
  # => "ohai! <div>div is safe</div> "
25
26
  #
26
27
  #
@@ -29,7 +30,7 @@ module Loofah
29
30
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
30
31
  #
31
32
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
32
- # Loofah.fragment(unsafe_html).scrub!(:escape)
33
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
33
34
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
34
35
  #
35
36
  #
@@ -41,7 +42,7 @@ module Loofah
41
42
  # layer of paint on top of the HTML input to make it look nice.
42
43
  #
43
44
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
44
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
45
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
45
46
  # => "ohai! <div>div with attributes</div>"
46
47
  #
47
48
  # One use case for this scrubber is to clean up HTML that was
@@ -56,25 +57,42 @@ module Loofah
56
57
  # +:nofollow+ adds a rel="nofollow" attribute to all links
57
58
  #
58
59
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
59
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
60
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
60
61
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
61
62
  #
62
63
  #
64
+ # === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
65
+ #
66
+ # +:targetblank+ adds a target="_blank" attribute to all links
67
+ #
68
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
69
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
70
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
71
+ #
72
+ #
63
73
  # === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
64
74
  #
65
75
  # +:noopener+ adds a rel="noopener" attribute to all links
66
76
  #
67
77
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
68
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
78
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
69
79
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
70
80
  #
81
+ # === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
82
+ #
83
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
84
+ #
85
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
86
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
87
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
88
+ #
71
89
  #
72
90
  # === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
73
91
  #
74
92
  # +:unprintable+ removes unprintable Unicode characters.
75
93
  #
76
94
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
77
- # Loofah.fragment(markup).scrub!(:unprintable)
95
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
78
96
  # => "<p>Some text with an unprintable character at the end</p>"
79
97
  #
80
98
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -90,23 +108,20 @@ module Loofah
90
108
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
91
109
  #
92
110
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
93
- # Loofah.fragment(unsafe_html).scrub!(:strip)
111
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
94
112
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
95
113
  #
96
114
  class Strip < Scrubber
97
- def initialize
115
+ def initialize # rubocop:disable Lint/MissingSuper
98
116
  @direction = :bottom_up
99
117
  end
100
118
 
101
119
  def scrub(node)
102
120
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
103
- if node.children.length == 1 && node.children.first.cdata?
104
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
105
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
106
- else
107
- node.before node.children
108
- end
121
+
122
+ node.before(node.children)
109
123
  node.remove
124
+ STOP
110
125
  end
111
126
  end
112
127
 
@@ -116,18 +131,19 @@ module Loofah
116
131
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
117
132
  #
118
133
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
119
- # Loofah.fragment(unsafe_html).scrub!(:prune)
134
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
120
135
  # => "ohai! <div>div is safe</div> "
121
136
  #
122
137
  class Prune < Scrubber
123
- def initialize
138
+ def initialize # rubocop:disable Lint/MissingSuper
124
139
  @direction = :top_down
125
140
  end
126
141
 
127
142
  def scrub(node)
128
143
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
144
+
129
145
  node.remove
130
- return STOP
146
+ STOP
131
147
  end
132
148
  end
133
149
 
@@ -137,19 +153,20 @@ module Loofah
137
153
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
138
154
  #
139
155
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
140
- # Loofah.fragment(unsafe_html).scrub!(:escape)
156
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
141
157
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
142
158
  #
143
159
  class Escape < Scrubber
144
- def initialize
160
+ def initialize # rubocop:disable Lint/MissingSuper
145
161
  @direction = :top_down
146
162
  end
147
163
 
148
164
  def scrub(node)
149
165
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
150
- node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
166
+
167
+ node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
151
168
  node.remove
152
- return STOP
169
+ STOP
153
170
  end
154
171
  end
155
172
 
@@ -162,7 +179,7 @@ module Loofah
162
179
  # layer of paint on top of the HTML input to make it look nice.
163
180
  #
164
181
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
165
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
182
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
166
183
  # => "ohai! <div>div with attributes</div>"
167
184
  #
168
185
  # One use case for this scrubber is to clean up HTML that was
@@ -172,14 +189,14 @@ module Loofah
172
189
  # Certainly not me.
173
190
  #
174
191
  class Whitewash < Scrubber
175
- def initialize
192
+ def initialize # rubocop:disable Lint/MissingSuper
176
193
  @direction = :top_down
177
194
  end
178
195
 
179
196
  def scrub(node)
180
197
  case node.type
181
198
  when Nokogiri::XML::Node::ELEMENT_NODE
182
- if HTML5::Scrub.allowed_element? node.name
199
+ if HTML5::Scrub.allowed_element?(node.name)
183
200
  node.attributes.each { |attr| node.remove_attribute(attr.first) }
184
201
  return CONTINUE if node.namespaces.empty?
185
202
  end
@@ -197,18 +214,46 @@ module Loofah
197
214
  # +:nofollow+ adds a rel="nofollow" attribute to all links
198
215
  #
199
216
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
200
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
217
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
201
218
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
202
219
  #
203
220
  class NoFollow < Scrubber
204
- def initialize
221
+ def initialize # rubocop:disable Lint/MissingSuper
205
222
  @direction = :top_down
206
223
  end
207
224
 
208
225
  def scrub(node)
209
226
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
227
+
210
228
  append_attribute(node, "rel", "nofollow")
211
- return STOP
229
+ STOP
230
+ end
231
+ end
232
+
233
+ #
234
+ # === scrub!(:targetblank)
235
+ #
236
+ # +:targetblank+ adds a target="_blank" attribute to all links.
237
+ # If there is a target already set, replaces it with target="_blank".
238
+ #
239
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
240
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
241
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
242
+ #
243
+ # On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
244
+ # behavior as setting rel="noopener".
245
+ #
246
+ class TargetBlank < Scrubber
247
+ def initialize # rubocop:disable Lint/MissingSuper
248
+ @direction = :top_down
249
+ end
250
+
251
+ def scrub(node)
252
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
253
+
254
+ node.set_attribute("target", "_blank")
255
+
256
+ STOP
212
257
  end
213
258
  end
214
259
 
@@ -218,35 +263,59 @@ module Loofah
218
263
  # +:noopener+ adds a rel="noopener" attribute to all links
219
264
  #
220
265
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
221
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
266
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
222
267
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
223
268
  #
224
269
  class NoOpener < Scrubber
225
- def initialize
270
+ def initialize # rubocop:disable Lint/MissingSuper
226
271
  @direction = :top_down
227
272
  end
228
273
 
229
274
  def scrub(node)
230
275
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
276
+
231
277
  append_attribute(node, "rel", "noopener")
232
- return STOP
278
+ STOP
279
+ end
280
+ end
281
+
282
+ #
283
+ # === scrub!(:noreferrer)
284
+ #
285
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
286
+ #
287
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
288
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
289
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
290
+ #
291
+ class NoReferrer < Scrubber
292
+ def initialize # rubocop:disable Lint/MissingSuper
293
+ @direction = :top_down
294
+ end
295
+
296
+ def scrub(node)
297
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
298
+
299
+ append_attribute(node, "rel", "noreferrer")
300
+ STOP
233
301
  end
234
302
  end
235
303
 
236
304
  # This class probably isn't useful publicly, but is used for #to_text's current implemention
237
305
  class NewlineBlockElements < Scrubber # :nodoc:
238
- def initialize
306
+ def initialize # rubocop:disable Lint/MissingSuper
239
307
  @direction = :bottom_up
240
308
  end
241
309
 
242
310
  def scrub(node)
243
311
  return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
312
+
244
313
  replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
245
314
  "\n"
246
315
  else
247
316
  "\n#{node.content}\n"
248
317
  end
249
- node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
318
+ node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
250
319
  node.remove
251
320
  end
252
321
  end
@@ -257,7 +326,7 @@ module Loofah
257
326
  # +:unprintable+ removes unprintable Unicode characters.
258
327
  #
259
328
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
260
- # Loofah.fragment(markup).scrub!(:unprintable)
329
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
261
330
  # => "<p>Some text with an unprintable character at the end</p>"
262
331
  #
263
332
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -267,7 +336,7 @@ module Loofah
267
336
  # http://timelessrepo.com/json-isnt-a-javascript-subset
268
337
  #
269
338
  class Unprintable < Scrubber
270
- def initialize
339
+ def initialize # rubocop:disable Lint/MissingSuper
271
340
  @direction = :top_down
272
341
  end
273
342
 
@@ -283,21 +352,25 @@ module Loofah
283
352
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
284
353
  #
285
354
  MAP = {
286
- :escape => Escape,
287
- :prune => Prune,
288
- :whitewash => Whitewash,
289
- :strip => Strip,
290
- :nofollow => NoFollow,
291
- :noopener => NoOpener,
292
- :newline_block_elements => NewlineBlockElements,
293
- :unprintable => Unprintable,
355
+ escape: Escape,
356
+ prune: Prune,
357
+ whitewash: Whitewash,
358
+ strip: Strip,
359
+ nofollow: NoFollow,
360
+ noopener: NoOpener,
361
+ noreferrer: NoReferrer,
362
+ targetblank: TargetBlank,
363
+ newline_block_elements: NewlineBlockElements,
364
+ unprintable: Unprintable,
294
365
  }
295
366
 
296
- #
297
- # Returns an array of symbols representing the built-in scrubbers
298
- #
299
- def self.scrubber_symbols
300
- MAP.keys
367
+ class << self
368
+ #
369
+ # Returns an array of symbols representing the built-in scrubbers
370
+ #
371
+ def scrubber_symbols
372
+ MAP.keys
373
+ end
301
374
  end
302
375
  end
303
376
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  # The version of Loofah you are using
4
- VERSION = "2.19.0"
5
+ VERSION = "2.23.1"
5
6
  end