loofah 2.19.0 → 2.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "cgi"
3
4
  require "crass"
4
5
 
@@ -6,9 +7,10 @@ module Loofah
6
7
  module HTML5 # :nodoc:
7
8
  module Scrub
8
9
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
10
11
  CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
11
- CSS_IMPORTANT = '!important'
12
+ CSS_IMPORTANT = "!important"
13
+ CSS_WHITESPACE = " "
12
14
  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
13
15
  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
14
16
 
@@ -26,7 +28,7 @@ module Loofah
26
28
  attr_node.node_name
27
29
  end
28
30
 
29
- if attr_name =~ DATA_ATTRIBUTE_NAME
31
+ if DATA_ATTRIBUTE_NAME.match?(attr_name)
30
32
  next
31
33
  end
32
34
 
@@ -36,28 +38,19 @@ module Loofah
36
38
  end
37
39
 
38
40
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39
- # this block lifted nearly verbatim from HTML5 sanitization
40
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
41
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42
- attr_node.remove
43
- next
44
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
45
- # permit only allowed data mediatypes
46
- mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47
- mediatype, _ = mediatype.split(";")[0..1] if mediatype
48
- if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49
- attr_node.remove
50
- next
51
- end
52
- end
41
+ next if scrub_uri_attribute(attr_node)
53
42
  end
43
+
54
44
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
56
- end
57
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
58
- attr_node.remove
59
- next
45
+ scrub_attribute_that_allows_local_ref(attr_node)
60
46
  end
47
+
48
+ next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
49
+ attr_name == "xlink:href" &&
50
+ attr_node.value =~ /^\s*[^#\s].*/m
51
+
52
+ attr_node.remove
53
+ next
61
54
  end
62
55
 
63
56
  scrub_css_attribute(node)
@@ -77,29 +70,28 @@ module Loofah
77
70
  end
78
71
 
79
72
  def scrub_css(style)
73
+ url_flags = [:url, :bad_url]
80
74
  style_tree = Crass.parse_properties(style)
81
75
  sanitized_tree = []
82
76
 
83
77
  style_tree.each do |node|
84
78
  next unless node[:node] == :property
85
79
  next if node[:children].any? do |child|
86
- [:url, :bad_url].include?(child[:node])
80
+ url_flags.include?(child[:node])
87
81
  end
88
82
 
89
83
  name = node[:name].downcase
90
84
  next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
91
- SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
92
- SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
85
+ SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
86
+ SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
93
87
 
94
88
  value = node[:children].map do |child|
95
89
  case child[:node]
96
90
  when :whitespace
97
- nil
91
+ CSS_WHITESPACE
98
92
  when :string
99
- if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
93
+ if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
100
94
  Crass::Parser.stringify(child)
101
- else
102
- nil
103
95
  end
104
96
  when :function
105
97
  if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
@@ -108,18 +100,19 @@ module Loofah
108
100
  when :ident
109
101
  keyword = child[:value]
110
102
  if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
111
- SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
112
- (keyword =~ CSS_KEYWORDISH)
103
+ SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
104
+ (keyword =~ CSS_KEYWORDISH)
113
105
  keyword
114
106
  end
115
107
  else
116
108
  child[:raw]
117
109
  end
118
- end.compact
110
+ end.compact.join.strip
119
111
 
120
112
  next if value.empty?
121
- value << CSS_IMPORTANT if node[:important]
122
- propstring = format("%s:%s", name, value.join(" "))
113
+
114
+ value << CSS_WHITESPACE << CSS_IMPORTANT if node[:important]
115
+ propstring = format("%s:%s", name, value)
123
116
  sanitized_node = Crass.parse_properties(propstring).first
124
117
  sanitized_tree << sanitized_node << CRASS_SEMICOLON
125
118
  end
@@ -127,6 +120,44 @@ module Loofah
127
120
  Crass::Parser.stringify(sanitized_tree)
128
121
  end
129
122
 
123
+ def scrub_attribute_that_allows_local_ref(attr_node)
124
+ return unless attr_node.value
125
+
126
+ nodes = Crass::Parser.new(attr_node.value).parse_component_values
127
+
128
+ values = nodes.map do |node|
129
+ case node[:node]
130
+ when :url
131
+ if node[:value].start_with?("#")
132
+ node[:raw]
133
+ end
134
+ when :hash, :ident, :string
135
+ node[:raw]
136
+ end
137
+ end.compact
138
+
139
+ attr_node.value = values.join(" ")
140
+ end
141
+
142
+ def scrub_uri_attribute(attr_node)
143
+ # this block lifted nearly verbatim from HTML5 sanitization
144
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
146
+ !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
147
+ attr_node.remove
148
+ return true
149
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
150
+ # permit only allowed data mediatypes
151
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
152
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
153
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
154
+ attr_node.remove
155
+ return true
156
+ end
157
+ end
158
+ false
159
+ end
160
+
130
161
  #
131
162
  # libxml2 >= 2.9.2 fails to escape comments within some attributes.
132
163
  #
@@ -152,6 +183,46 @@ module Loofah
152
183
  end.force_encoding(encoding)
153
184
  end
154
185
  end
186
+
187
+ def cdata_needs_escaping?(node)
188
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
189
+ node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
190
+ end
191
+
192
+ def cdata_escape(node)
193
+ escaped_text = escape_tags(node.text)
194
+ if Nokogiri.jruby?
195
+ node.document.create_text_node(escaped_text)
196
+ else
197
+ node.document.create_cdata(escaped_text)
198
+ end
199
+ end
200
+
201
+ TABLE_FOR_ESCAPE_HTML__ = {
202
+ "<" => "&lt;",
203
+ ">" => "&gt;",
204
+ "&" => "&amp;",
205
+ }
206
+
207
+ def escape_tags(string)
208
+ # modified version of CGI.escapeHTML from ruby 3.1
209
+ enc = string.encoding
210
+ if enc.ascii_compatible?
211
+ string = string.b
212
+ string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
213
+ string.force_encoding(enc)
214
+ else
215
+ if enc.dummy?
216
+ origenc = enc
217
+ enc = Encoding::Converter.asciicompat_encoding(enc)
218
+ string = enc ? string.encode(enc) : string.b
219
+ end
220
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
221
+ string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
222
+ string.encode!(origenc) if origenc
223
+ string
224
+ end
225
+ end
155
226
  end
156
227
  end
157
228
  end
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module MetaHelpers # :nodoc:
4
- def self.add_downcased_set_members_to_all_set_constants(mojule)
5
- mojule.constants.each do |constant_sym|
6
- constant = mojule.const_get constant_sym
7
- next unless Set === constant
8
- constant.dup.each do |member|
9
- constant.add member.downcase
5
+ class << self
6
+ def add_downcased_set_members_to_all_set_constants(mojule)
7
+ mojule.constants.each do |constant_sym|
8
+ constant = mojule.const_get(constant_sym)
9
+ next unless Set === constant
10
+
11
+ constant.dup.each do |member|
12
+ constant.add(member.downcase)
13
+ end
10
14
  end
11
15
  end
12
16
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
@@ -24,7 +25,7 @@ module Loofah
24
25
  #
25
26
  # This can then be run on a document:
26
27
  #
27
- # Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
+ # Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
29
  # # => "<div>foo</div><p>bar</p>"
29
30
  #
30
31
  # Scrubbers can be run on a document in either a top-down traversal (the
@@ -32,7 +33,6 @@ module Loofah
32
33
  # Scrubber::STOP to terminate the traversal of a subtree.
33
34
  #
34
35
  class Scrubber
35
-
36
36
  # Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
37
37
  CONTINUE = Object.new.freeze
38
38
 
@@ -67,7 +67,9 @@ module Loofah
67
67
  unless [:top_down, :bottom_up].include?(direction)
68
68
  raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
69
69
  end
70
- @direction, @block = direction, block
70
+
71
+ @direction = direction
72
+ @block = block
71
73
  end
72
74
 
73
75
  #
@@ -84,7 +86,7 @@ module Loofah
84
86
  # +scrub+, which will be called for each document node.
85
87
  #
86
88
  def scrub(node)
87
- raise ScrubberNotFound, "No scrub method has been defined on #{self.class.to_s}"
89
+ raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
88
90
  end
89
91
 
90
92
  #
@@ -103,11 +105,15 @@ module Loofah
103
105
  def html5lib_sanitize(node)
104
106
  case node.type
105
107
  when Nokogiri::XML::Node::ELEMENT_NODE
106
- if HTML5::Scrub.allowed_element? node.name
107
- HTML5::Scrub.scrub_attributes node
108
+ if HTML5::Scrub.allowed_element?(node.name)
109
+ HTML5::Scrub.scrub_attributes(node)
108
110
  return Scrubber::CONTINUE
109
111
  end
110
112
  when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
113
+ if HTML5::Scrub.cdata_needs_escaping?(node)
114
+ node.before(HTML5::Scrub.cdata_escape(node))
115
+ return Scrubber::STOP
116
+ end
111
117
  return Scrubber::CONTINUE
112
118
  end
113
119
  Scrubber::STOP
@@ -116,8 +122,8 @@ module Loofah
116
122
  def traverse_conditionally_top_down(node)
117
123
  if block
118
124
  return if block.call(node) == STOP
119
- else
120
- return if scrub(node) == STOP
125
+ elsif scrub(node) == STOP
126
+ return
121
127
  end
122
128
  node.children.each { |j| traverse_conditionally_top_down(j) }
123
129
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # Loofah provides some built-in scrubbers for sanitizing with
@@ -11,7 +12,7 @@ module Loofah
11
12
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
12
13
  #
13
14
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
14
- # Loofah.fragment(unsafe_html).scrub!(:strip)
15
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
15
16
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
16
17
  #
17
18
  #
@@ -20,7 +21,7 @@ module Loofah
20
21
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
21
22
  #
22
23
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
23
- # Loofah.fragment(unsafe_html).scrub!(:prune)
24
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
24
25
  # => "ohai! <div>div is safe</div> "
25
26
  #
26
27
  #
@@ -29,7 +30,7 @@ module Loofah
29
30
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
30
31
  #
31
32
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
32
- # Loofah.fragment(unsafe_html).scrub!(:escape)
33
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
33
34
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
34
35
  #
35
36
  #
@@ -41,7 +42,7 @@ module Loofah
41
42
  # layer of paint on top of the HTML input to make it look nice.
42
43
  #
43
44
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
44
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
45
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
45
46
  # => "ohai! <div>div with attributes</div>"
46
47
  #
47
48
  # One use case for this scrubber is to clean up HTML that was
@@ -56,25 +57,42 @@ module Loofah
56
57
  # +:nofollow+ adds a rel="nofollow" attribute to all links
57
58
  #
58
59
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
59
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
60
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
60
61
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
61
62
  #
62
63
  #
64
+ # === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
65
+ #
66
+ # +:targetblank+ adds a target="_blank" attribute to all links
67
+ #
68
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
69
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
70
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
71
+ #
72
+ #
63
73
  # === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
64
74
  #
65
75
  # +:noopener+ adds a rel="noopener" attribute to all links
66
76
  #
67
77
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
68
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
78
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
69
79
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
70
80
  #
81
+ # === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
82
+ #
83
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
84
+ #
85
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
86
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
87
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
88
+ #
71
89
  #
72
90
  # === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
73
91
  #
74
92
  # +:unprintable+ removes unprintable Unicode characters.
75
93
  #
76
94
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
77
- # Loofah.fragment(markup).scrub!(:unprintable)
95
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
78
96
  # => "<p>Some text with an unprintable character at the end</p>"
79
97
  #
80
98
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -90,23 +108,20 @@ module Loofah
90
108
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
91
109
  #
92
110
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
93
- # Loofah.fragment(unsafe_html).scrub!(:strip)
111
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
94
112
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
95
113
  #
96
114
  class Strip < Scrubber
97
- def initialize
115
+ def initialize # rubocop:disable Lint/MissingSuper
98
116
  @direction = :bottom_up
99
117
  end
100
118
 
101
119
  def scrub(node)
102
120
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
103
- if node.children.length == 1 && node.children.first.cdata?
104
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
105
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
106
- else
107
- node.before node.children
108
- end
121
+
122
+ node.before(node.children)
109
123
  node.remove
124
+ STOP
110
125
  end
111
126
  end
112
127
 
@@ -116,18 +131,19 @@ module Loofah
116
131
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
117
132
  #
118
133
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
119
- # Loofah.fragment(unsafe_html).scrub!(:prune)
134
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
120
135
  # => "ohai! <div>div is safe</div> "
121
136
  #
122
137
  class Prune < Scrubber
123
- def initialize
138
+ def initialize # rubocop:disable Lint/MissingSuper
124
139
  @direction = :top_down
125
140
  end
126
141
 
127
142
  def scrub(node)
128
143
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
144
+
129
145
  node.remove
130
- return STOP
146
+ STOP
131
147
  end
132
148
  end
133
149
 
@@ -137,19 +153,20 @@ module Loofah
137
153
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
138
154
  #
139
155
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
140
- # Loofah.fragment(unsafe_html).scrub!(:escape)
156
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
141
157
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
142
158
  #
143
159
  class Escape < Scrubber
144
- def initialize
160
+ def initialize # rubocop:disable Lint/MissingSuper
145
161
  @direction = :top_down
146
162
  end
147
163
 
148
164
  def scrub(node)
149
165
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
150
- node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
166
+
167
+ node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
151
168
  node.remove
152
- return STOP
169
+ STOP
153
170
  end
154
171
  end
155
172
 
@@ -162,7 +179,7 @@ module Loofah
162
179
  # layer of paint on top of the HTML input to make it look nice.
163
180
  #
164
181
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
165
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
182
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
166
183
  # => "ohai! <div>div with attributes</div>"
167
184
  #
168
185
  # One use case for this scrubber is to clean up HTML that was
@@ -172,14 +189,14 @@ module Loofah
172
189
  # Certainly not me.
173
190
  #
174
191
  class Whitewash < Scrubber
175
- def initialize
192
+ def initialize # rubocop:disable Lint/MissingSuper
176
193
  @direction = :top_down
177
194
  end
178
195
 
179
196
  def scrub(node)
180
197
  case node.type
181
198
  when Nokogiri::XML::Node::ELEMENT_NODE
182
- if HTML5::Scrub.allowed_element? node.name
199
+ if HTML5::Scrub.allowed_element?(node.name)
183
200
  node.attributes.each { |attr| node.remove_attribute(attr.first) }
184
201
  return CONTINUE if node.namespaces.empty?
185
202
  end
@@ -197,18 +214,46 @@ module Loofah
197
214
  # +:nofollow+ adds a rel="nofollow" attribute to all links
198
215
  #
199
216
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
200
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
217
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
201
218
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
202
219
  #
203
220
  class NoFollow < Scrubber
204
- def initialize
221
+ def initialize # rubocop:disable Lint/MissingSuper
205
222
  @direction = :top_down
206
223
  end
207
224
 
208
225
  def scrub(node)
209
226
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
227
+
210
228
  append_attribute(node, "rel", "nofollow")
211
- return STOP
229
+ STOP
230
+ end
231
+ end
232
+
233
+ #
234
+ # === scrub!(:targetblank)
235
+ #
236
+ # +:targetblank+ adds a target="_blank" attribute to all links.
237
+ # If there is a target already set, replaces it with target="_blank".
238
+ #
239
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
240
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
241
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
242
+ #
243
+ # On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
244
+ # behavior as setting rel="noopener".
245
+ #
246
+ class TargetBlank < Scrubber
247
+ def initialize # rubocop:disable Lint/MissingSuper
248
+ @direction = :top_down
249
+ end
250
+
251
+ def scrub(node)
252
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
253
+
254
+ node.set_attribute("target", "_blank")
255
+
256
+ STOP
212
257
  end
213
258
  end
214
259
 
@@ -218,35 +263,59 @@ module Loofah
218
263
  # +:noopener+ adds a rel="noopener" attribute to all links
219
264
  #
220
265
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
221
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
266
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
222
267
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
223
268
  #
224
269
  class NoOpener < Scrubber
225
- def initialize
270
+ def initialize # rubocop:disable Lint/MissingSuper
226
271
  @direction = :top_down
227
272
  end
228
273
 
229
274
  def scrub(node)
230
275
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
276
+
231
277
  append_attribute(node, "rel", "noopener")
232
- return STOP
278
+ STOP
279
+ end
280
+ end
281
+
282
+ #
283
+ # === scrub!(:noreferrer)
284
+ #
285
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
286
+ #
287
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
288
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
289
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
290
+ #
291
+ class NoReferrer < Scrubber
292
+ def initialize # rubocop:disable Lint/MissingSuper
293
+ @direction = :top_down
294
+ end
295
+
296
+ def scrub(node)
297
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
298
+
299
+ append_attribute(node, "rel", "noreferrer")
300
+ STOP
233
301
  end
234
302
  end
235
303
 
236
304
  # This class probably isn't useful publicly, but is used for #to_text's current implemention
237
305
  class NewlineBlockElements < Scrubber # :nodoc:
238
- def initialize
306
+ def initialize # rubocop:disable Lint/MissingSuper
239
307
  @direction = :bottom_up
240
308
  end
241
309
 
242
310
  def scrub(node)
243
311
  return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
312
+
244
313
  replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
245
314
  "\n"
246
315
  else
247
316
  "\n#{node.content}\n"
248
317
  end
249
- node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
318
+ node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
250
319
  node.remove
251
320
  end
252
321
  end
@@ -257,7 +326,7 @@ module Loofah
257
326
  # +:unprintable+ removes unprintable Unicode characters.
258
327
  #
259
328
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
260
- # Loofah.fragment(markup).scrub!(:unprintable)
329
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
261
330
  # => "<p>Some text with an unprintable character at the end</p>"
262
331
  #
263
332
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -267,7 +336,7 @@ module Loofah
267
336
  # http://timelessrepo.com/json-isnt-a-javascript-subset
268
337
  #
269
338
  class Unprintable < Scrubber
270
- def initialize
339
+ def initialize # rubocop:disable Lint/MissingSuper
271
340
  @direction = :top_down
272
341
  end
273
342
 
@@ -283,21 +352,25 @@ module Loofah
283
352
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
284
353
  #
285
354
  MAP = {
286
- :escape => Escape,
287
- :prune => Prune,
288
- :whitewash => Whitewash,
289
- :strip => Strip,
290
- :nofollow => NoFollow,
291
- :noopener => NoOpener,
292
- :newline_block_elements => NewlineBlockElements,
293
- :unprintable => Unprintable,
355
+ escape: Escape,
356
+ prune: Prune,
357
+ whitewash: Whitewash,
358
+ strip: Strip,
359
+ nofollow: NoFollow,
360
+ noopener: NoOpener,
361
+ noreferrer: NoReferrer,
362
+ targetblank: TargetBlank,
363
+ newline_block_elements: NewlineBlockElements,
364
+ unprintable: Unprintable,
294
365
  }
295
366
 
296
- #
297
- # Returns an array of symbols representing the built-in scrubbers
298
- #
299
- def self.scrubber_symbols
300
- MAP.keys
367
+ class << self
368
+ #
369
+ # Returns an array of symbols representing the built-in scrubbers
370
+ #
371
+ def scrubber_symbols
372
+ MAP.keys
373
+ end
301
374
  end
302
375
  end
303
376
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  # The version of Loofah you are using
4
- VERSION = "2.19.0"
5
+ VERSION = "2.23.1"
5
6
  end