loofah 2.19.1 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "cgi"
3
4
  require "crass"
4
5
 
@@ -6,9 +7,10 @@ module Loofah
6
7
  module HTML5 # :nodoc:
7
8
  module Scrub
8
9
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
10
11
  CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
11
- CSS_IMPORTANT = '!important'
12
+ CSS_IMPORTANT = "!important"
13
+ CSS_WHITESPACE = " "
12
14
  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
13
15
  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
14
16
 
@@ -26,7 +28,7 @@ module Loofah
26
28
  attr_node.node_name
27
29
  end
28
30
 
29
- if attr_name =~ DATA_ATTRIBUTE_NAME
31
+ if DATA_ATTRIBUTE_NAME.match?(attr_name)
30
32
  next
31
33
  end
32
34
 
@@ -43,10 +45,12 @@ module Loofah
43
45
  scrub_attribute_that_allows_local_ref(attr_node)
44
46
  end
45
47
 
46
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
47
- attr_node.remove
48
- next
49
- end
48
+ next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
49
+ attr_name == "xlink:href" &&
50
+ attr_node.value =~ /^\s*[^#\s].*/m
51
+
52
+ attr_node.remove
53
+ next
50
54
  end
51
55
 
52
56
  scrub_css_attribute(node)
@@ -66,29 +70,28 @@ module Loofah
66
70
  end
67
71
 
68
72
  def scrub_css(style)
73
+ url_flags = [:url, :bad_url]
69
74
  style_tree = Crass.parse_properties(style)
70
75
  sanitized_tree = []
71
76
 
72
77
  style_tree.each do |node|
73
78
  next unless node[:node] == :property
74
79
  next if node[:children].any? do |child|
75
- [:url, :bad_url].include?(child[:node])
80
+ url_flags.include?(child[:node])
76
81
  end
77
82
 
78
83
  name = node[:name].downcase
79
84
  next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
80
- SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
81
- SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
85
+ SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
86
+ SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
82
87
 
83
88
  value = node[:children].map do |child|
84
89
  case child[:node]
85
90
  when :whitespace
86
- nil
91
+ CSS_WHITESPACE
87
92
  when :string
88
- if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
93
+ if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
89
94
  Crass::Parser.stringify(child)
90
- else
91
- nil
92
95
  end
93
96
  when :function
94
97
  if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
@@ -97,18 +100,19 @@ module Loofah
97
100
  when :ident
98
101
  keyword = child[:value]
99
102
  if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
100
- SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
101
- (keyword =~ CSS_KEYWORDISH)
103
+ SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
104
+ (keyword =~ CSS_KEYWORDISH)
102
105
  keyword
103
106
  end
104
107
  else
105
108
  child[:raw]
106
109
  end
107
- end.compact
110
+ end.compact.join.strip
108
111
 
109
112
  next if value.empty?
110
- value << CSS_IMPORTANT if node[:important]
111
- propstring = format("%s:%s", name, value.join(" "))
113
+
114
+ value << CSS_WHITESPACE << CSS_IMPORTANT if node[:important]
115
+ propstring = format("%s:%s", name, value)
112
116
  sanitized_node = Crass.parse_properties(propstring).first
113
117
  sanitized_tree << sanitized_node << CRASS_SEMICOLON
114
118
  end
@@ -126,13 +130,9 @@ module Loofah
126
130
  when :url
127
131
  if node[:value].start_with?("#")
128
132
  node[:raw]
129
- else
130
- nil
131
133
  end
132
134
  when :hash, :ident, :string
133
135
  node[:raw]
134
- else
135
- nil
136
136
  end
137
137
  end.compact
138
138
 
@@ -142,7 +142,8 @@ module Loofah
142
142
  def scrub_uri_attribute(attr_node)
143
143
  # this block lifted nearly verbatim from HTML5 sanitization
144
144
  val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
145
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
146
+ !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
146
147
  attr_node.remove
147
148
  return true
148
149
  elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
@@ -184,8 +185,8 @@ module Loofah
184
185
  end
185
186
 
186
187
  def cdata_needs_escaping?(node)
187
- # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
188
- node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
188
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
189
+ node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
189
190
  end
190
191
 
191
192
  def cdata_escape(node)
@@ -198,28 +199,28 @@ module Loofah
198
199
  end
199
200
 
200
201
  TABLE_FOR_ESCAPE_HTML__ = {
201
- '<' => '&lt;',
202
- '>' => '&gt;',
203
- '&' => '&amp;',
202
+ "<" => "&lt;",
203
+ ">" => "&gt;",
204
+ "&" => "&amp;",
204
205
  }
205
206
 
206
207
  def escape_tags(string)
207
208
  # modified version of CGI.escapeHTML from ruby 3.1
208
209
  enc = string.encoding
209
- unless enc.ascii_compatible?
210
+ if enc.ascii_compatible?
211
+ string = string.b
212
+ string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
213
+ string.force_encoding(enc)
214
+ else
210
215
  if enc.dummy?
211
216
  origenc = enc
212
217
  enc = Encoding::Converter.asciicompat_encoding(enc)
213
218
  string = enc ? string.encode(enc) : string.b
214
219
  end
215
- table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
220
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
216
221
  string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
217
222
  string.encode!(origenc) if origenc
218
223
  string
219
- else
220
- string = string.b
221
- string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
222
- string.force_encoding(enc)
223
224
  end
224
225
  end
225
226
  end
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module MetaHelpers # :nodoc:
4
- def self.add_downcased_set_members_to_all_set_constants(mojule)
5
- mojule.constants.each do |constant_sym|
6
- constant = mojule.const_get constant_sym
7
- next unless Set === constant
8
- constant.dup.each do |member|
9
- constant.add member.downcase
5
+ class << self
6
+ def add_downcased_set_members_to_all_set_constants(mojule)
7
+ mojule.constants.each do |constant_sym|
8
+ constant = mojule.const_get(constant_sym)
9
+ next unless Set === constant
10
+
11
+ constant.dup.each do |member|
12
+ constant.add(member.downcase)
13
+ end
10
14
  end
11
15
  end
12
16
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
@@ -24,7 +25,7 @@ module Loofah
24
25
  #
25
26
  # This can then be run on a document:
26
27
  #
27
- # Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
+ # Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
28
29
  # # => "<div>foo</div><p>bar</p>"
29
30
  #
30
31
  # Scrubbers can be run on a document in either a top-down traversal (the
@@ -32,7 +33,6 @@ module Loofah
32
33
  # Scrubber::STOP to terminate the traversal of a subtree.
33
34
  #
34
35
  class Scrubber
35
-
36
36
  # Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
37
37
  CONTINUE = Object.new.freeze
38
38
 
@@ -67,7 +67,9 @@ module Loofah
67
67
  unless [:top_down, :bottom_up].include?(direction)
68
68
  raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
69
69
  end
70
- @direction, @block = direction, block
70
+
71
+ @direction = direction
72
+ @block = block
71
73
  end
72
74
 
73
75
  #
@@ -84,7 +86,7 @@ module Loofah
84
86
  # +scrub+, which will be called for each document node.
85
87
  #
86
88
  def scrub(node)
87
- raise ScrubberNotFound, "No scrub method has been defined on #{self.class.to_s}"
89
+ raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
88
90
  end
89
91
 
90
92
  #
@@ -103,8 +105,8 @@ module Loofah
103
105
  def html5lib_sanitize(node)
104
106
  case node.type
105
107
  when Nokogiri::XML::Node::ELEMENT_NODE
106
- if HTML5::Scrub.allowed_element? node.name
107
- HTML5::Scrub.scrub_attributes node
108
+ if HTML5::Scrub.allowed_element?(node.name)
109
+ HTML5::Scrub.scrub_attributes(node)
108
110
  return Scrubber::CONTINUE
109
111
  end
110
112
  when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
@@ -120,8 +122,8 @@ module Loofah
120
122
  def traverse_conditionally_top_down(node)
121
123
  if block
122
124
  return if block.call(node) == STOP
123
- else
124
- return if scrub(node) == STOP
125
+ elsif scrub(node) == STOP
126
+ return
125
127
  end
126
128
  node.children.each { |j| traverse_conditionally_top_down(j) }
127
129
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  #
4
5
  # Loofah provides some built-in scrubbers for sanitizing with
@@ -11,7 +12,7 @@ module Loofah
11
12
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
12
13
  #
13
14
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
14
- # Loofah.fragment(unsafe_html).scrub!(:strip)
15
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
15
16
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
16
17
  #
17
18
  #
@@ -20,7 +21,7 @@ module Loofah
20
21
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
21
22
  #
22
23
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
23
- # Loofah.fragment(unsafe_html).scrub!(:prune)
24
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
24
25
  # => "ohai! <div>div is safe</div> "
25
26
  #
26
27
  #
@@ -29,7 +30,7 @@ module Loofah
29
30
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
30
31
  #
31
32
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
32
- # Loofah.fragment(unsafe_html).scrub!(:escape)
33
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
33
34
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
34
35
  #
35
36
  #
@@ -41,7 +42,7 @@ module Loofah
41
42
  # layer of paint on top of the HTML input to make it look nice.
42
43
  #
43
44
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
44
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
45
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
45
46
  # => "ohai! <div>div with attributes</div>"
46
47
  #
47
48
  # One use case for this scrubber is to clean up HTML that was
@@ -56,25 +57,42 @@ module Loofah
56
57
  # +:nofollow+ adds a rel="nofollow" attribute to all links
57
58
  #
58
59
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
59
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
60
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
60
61
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
61
62
  #
62
63
  #
64
+ # === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
65
+ #
66
+ # +:targetblank+ adds a target="_blank" attribute to all links
67
+ #
68
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
69
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
70
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
71
+ #
72
+ #
63
73
  # === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
64
74
  #
65
75
  # +:noopener+ adds a rel="noopener" attribute to all links
66
76
  #
67
77
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
68
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
78
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
69
79
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
70
80
  #
81
+ # === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
82
+ #
83
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
84
+ #
85
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
86
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
87
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
88
+ #
71
89
  #
72
90
  # === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
73
91
  #
74
92
  # +:unprintable+ removes unprintable Unicode characters.
75
93
  #
76
94
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
77
- # Loofah.fragment(markup).scrub!(:unprintable)
95
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
78
96
  # => "<p>Some text with an unprintable character at the end</p>"
79
97
  #
80
98
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -90,19 +108,20 @@ module Loofah
90
108
  # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
91
109
  #
92
110
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
93
- # Loofah.fragment(unsafe_html).scrub!(:strip)
111
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip)
94
112
  # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
95
113
  #
96
114
  class Strip < Scrubber
97
- def initialize
115
+ def initialize # rubocop:disable Lint/MissingSuper
98
116
  @direction = :bottom_up
99
117
  end
100
118
 
101
119
  def scrub(node)
102
120
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
121
+
103
122
  node.before(node.children)
104
123
  node.remove
105
- return STOP
124
+ STOP
106
125
  end
107
126
  end
108
127
 
@@ -112,18 +131,19 @@ module Loofah
112
131
  # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
113
132
  #
114
133
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
115
- # Loofah.fragment(unsafe_html).scrub!(:prune)
134
+ # Loofah.html5_fragment(unsafe_html).scrub!(:prune)
116
135
  # => "ohai! <div>div is safe</div> "
117
136
  #
118
137
  class Prune < Scrubber
119
- def initialize
138
+ def initialize # rubocop:disable Lint/MissingSuper
120
139
  @direction = :top_down
121
140
  end
122
141
 
123
142
  def scrub(node)
124
143
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
144
+
125
145
  node.remove
126
- return STOP
146
+ STOP
127
147
  end
128
148
  end
129
149
 
@@ -133,19 +153,20 @@ module Loofah
133
153
  # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
134
154
  #
135
155
  # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
136
- # Loofah.fragment(unsafe_html).scrub!(:escape)
156
+ # Loofah.html5_fragment(unsafe_html).scrub!(:escape)
137
157
  # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
138
158
  #
139
159
  class Escape < Scrubber
140
- def initialize
160
+ def initialize # rubocop:disable Lint/MissingSuper
141
161
  @direction = :top_down
142
162
  end
143
163
 
144
164
  def scrub(node)
145
165
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
146
- node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
166
+
167
+ node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
147
168
  node.remove
148
- return STOP
169
+ STOP
149
170
  end
150
171
  end
151
172
 
@@ -158,7 +179,7 @@ module Loofah
158
179
  # layer of paint on top of the HTML input to make it look nice.
159
180
  #
160
181
  # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
161
- # Loofah.fragment(messy_markup).scrub!(:whitewash)
182
+ # Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
162
183
  # => "ohai! <div>div with attributes</div>"
163
184
  #
164
185
  # One use case for this scrubber is to clean up HTML that was
@@ -168,14 +189,14 @@ module Loofah
168
189
  # Certainly not me.
169
190
  #
170
191
  class Whitewash < Scrubber
171
- def initialize
192
+ def initialize # rubocop:disable Lint/MissingSuper
172
193
  @direction = :top_down
173
194
  end
174
195
 
175
196
  def scrub(node)
176
197
  case node.type
177
198
  when Nokogiri::XML::Node::ELEMENT_NODE
178
- if HTML5::Scrub.allowed_element? node.name
199
+ if HTML5::Scrub.allowed_element?(node.name)
179
200
  node.attributes.each { |attr| node.remove_attribute(attr.first) }
180
201
  return CONTINUE if node.namespaces.empty?
181
202
  end
@@ -193,18 +214,48 @@ module Loofah
193
214
  # +:nofollow+ adds a rel="nofollow" attribute to all links
194
215
  #
195
216
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
196
- # Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
217
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
197
218
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
198
219
  #
199
220
  class NoFollow < Scrubber
200
- def initialize
221
+ def initialize # rubocop:disable Lint/MissingSuper
201
222
  @direction = :top_down
202
223
  end
203
224
 
204
225
  def scrub(node)
205
226
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
227
+
206
228
  append_attribute(node, "rel", "nofollow")
207
- return STOP
229
+ STOP
230
+ end
231
+ end
232
+
233
+ #
234
+ # === scrub!(:targetblank)
235
+ #
236
+ # +:targetblank+ adds a target="_blank" attribute to all links.
237
+ # If there is a target already set, replaces it with target="_blank".
238
+ #
239
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
240
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
241
+ # => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
242
+ #
243
+ # On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
244
+ # behavior as setting rel="noopener".
245
+ #
246
+ class TargetBlank < Scrubber
247
+ def initialize # rubocop:disable Lint/MissingSuper
248
+ @direction = :top_down
249
+ end
250
+
251
+ def scrub(node)
252
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
253
+
254
+ href = node["href"]
255
+
256
+ node.set_attribute("target", "_blank") if href && href[0] != "#"
257
+
258
+ STOP
208
259
  end
209
260
  end
210
261
 
@@ -214,35 +265,59 @@ module Loofah
214
265
  # +:noopener+ adds a rel="noopener" attribute to all links
215
266
  #
216
267
  # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
217
- # Loofah.fragment(link_farmers_markup).scrub!(:noopener)
268
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
218
269
  # => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
219
270
  #
220
271
  class NoOpener < Scrubber
221
- def initialize
272
+ def initialize # rubocop:disable Lint/MissingSuper
222
273
  @direction = :top_down
223
274
  end
224
275
 
225
276
  def scrub(node)
226
277
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
278
+
227
279
  append_attribute(node, "rel", "noopener")
228
- return STOP
280
+ STOP
281
+ end
282
+ end
283
+
284
+ #
285
+ # === scrub!(:noreferrer)
286
+ #
287
+ # +:noreferrer+ adds a rel="noreferrer" attribute to all links
288
+ #
289
+ # link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
290
+ # Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
291
+ # => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
292
+ #
293
+ class NoReferrer < Scrubber
294
+ def initialize # rubocop:disable Lint/MissingSuper
295
+ @direction = :top_down
296
+ end
297
+
298
+ def scrub(node)
299
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
300
+
301
+ append_attribute(node, "rel", "noreferrer")
302
+ STOP
229
303
  end
230
304
  end
231
305
 
232
306
  # This class probably isn't useful publicly, but is used for #to_text's current implemention
233
307
  class NewlineBlockElements < Scrubber # :nodoc:
234
- def initialize
308
+ def initialize # rubocop:disable Lint/MissingSuper
235
309
  @direction = :bottom_up
236
310
  end
237
311
 
238
312
  def scrub(node)
239
313
  return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
314
+
240
315
  replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
241
316
  "\n"
242
317
  else
243
318
  "\n#{node.content}\n"
244
319
  end
245
- node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
320
+ node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
246
321
  node.remove
247
322
  end
248
323
  end
@@ -253,7 +328,7 @@ module Loofah
253
328
  # +:unprintable+ removes unprintable Unicode characters.
254
329
  #
255
330
  # markup = "<p>Some text with an unprintable character at the end\u2028</p>"
256
- # Loofah.fragment(markup).scrub!(:unprintable)
331
+ # Loofah.html5_fragment(markup).scrub!(:unprintable)
257
332
  # => "<p>Some text with an unprintable character at the end</p>"
258
333
  #
259
334
  # You may not be able to see the unprintable character in the above example, but there is a
@@ -263,7 +338,7 @@ module Loofah
263
338
  # http://timelessrepo.com/json-isnt-a-javascript-subset
264
339
  #
265
340
  class Unprintable < Scrubber
266
- def initialize
341
+ def initialize # rubocop:disable Lint/MissingSuper
267
342
  @direction = :top_down
268
343
  end
269
344
 
@@ -275,25 +350,81 @@ module Loofah
275
350
  end
276
351
  end
277
352
 
353
+ #
354
+ # === scrub!(:double_breakpoint)
355
+ #
356
+ # +:double_breakpoint+ replaces double-break tags with closing/opening paragraph tags.
357
+ #
358
+ # markup = "<p>Some text here in a logical paragraph.<br><br>Some more text, apparently a second paragraph.</p>"
359
+ # Loofah.html5_fragment(markup).scrub!(:double_breakpoint)
360
+ # => "<p>Some text here in a logical paragraph.</p><p>Some more text, apparently a second paragraph.</p>"
361
+ #
362
+ class DoubleBreakpoint < Scrubber
363
+ def initialize # rubocop:disable Lint/MissingSuper
364
+ @direction = :top_down
365
+ end
366
+
367
+ def scrub(node)
368
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "p")
369
+
370
+ paragraph_with_break_point_nodes = node.xpath("//p[br[following-sibling::br]]")
371
+
372
+ paragraph_with_break_point_nodes.each do |paragraph_node|
373
+ new_paragraph = paragraph_node.add_previous_sibling("<p>").first
374
+
375
+ paragraph_node.children.each do |child|
376
+ remove_blank_text_nodes(child)
377
+ end
378
+
379
+ paragraph_node.children.each do |child|
380
+ # already unlinked
381
+ next if child.parent.nil?
382
+
383
+ if child.name == "br" && child.next_sibling.name == "br"
384
+ new_paragraph = paragraph_node.add_previous_sibling("<p>").first
385
+ child.next_sibling.unlink
386
+ child.unlink
387
+ else
388
+ child.parent = new_paragraph
389
+ end
390
+ end
391
+
392
+ paragraph_node.unlink
393
+ end
394
+
395
+ CONTINUE
396
+ end
397
+
398
+ private
399
+
400
+ def remove_blank_text_nodes(node)
401
+ node.unlink if node.text? && node.blank?
402
+ end
403
+ end
278
404
  #
279
405
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
280
406
  #
281
407
  MAP = {
282
- :escape => Escape,
283
- :prune => Prune,
284
- :whitewash => Whitewash,
285
- :strip => Strip,
286
- :nofollow => NoFollow,
287
- :noopener => NoOpener,
288
- :newline_block_elements => NewlineBlockElements,
289
- :unprintable => Unprintable,
408
+ escape: Escape,
409
+ prune: Prune,
410
+ whitewash: Whitewash,
411
+ strip: Strip,
412
+ nofollow: NoFollow,
413
+ noopener: NoOpener,
414
+ noreferrer: NoReferrer,
415
+ targetblank: TargetBlank,
416
+ newline_block_elements: NewlineBlockElements,
417
+ unprintable: Unprintable,
418
+ double_breakpoint: DoubleBreakpoint,
290
419
  }
291
420
 
292
- #
293
- # Returns an array of symbols representing the built-in scrubbers
294
- #
295
- def self.scrubber_symbols
296
- MAP.keys
421
+ class << self
422
+ #
423
+ # Returns an array of symbols representing the built-in scrubbers
424
+ #
425
+ def scrubber_symbols
426
+ MAP.keys
427
+ end
297
428
  end
298
429
  end
299
430
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  # The version of Loofah you are using
4
- VERSION = "2.19.1"
5
+ VERSION = "2.24.0"
5
6
  end