loofah 2.3.1 → 2.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +193 -40
- data/README.md +13 -12
- data/lib/loofah/elements.rb +79 -75
- data/lib/loofah/helpers.rb +5 -4
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +4 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
- data/lib/loofah/html5/safelist.rb +273 -27
- data/lib/loofah/html5/scrub.rb +147 -52
- data/lib/loofah/instance_methods.rb +14 -8
- data/lib/loofah/metahelpers.rb +2 -1
- data/lib/loofah/scrubber.rb +12 -7
- data/lib/loofah/scrubbers.rb +20 -18
- data/lib/loofah/version.rb +5 -0
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -1
- data/lib/loofah.rb +33 -16
- metadata +45 -125
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -41
- data/Rakefile +0 -81
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -401
- data/test/html5/test_scrub.rb +0 -10
- data/test/integration/test_ad_hoc.rb +0 -220
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -1,30 +1,32 @@
|
|
1
|
-
|
2
|
-
require
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "cgi"
|
3
|
+
require "crass"
|
3
4
|
|
4
5
|
module Loofah
|
5
6
|
module HTML5 # :nodoc:
|
6
7
|
module Scrub
|
7
|
-
|
8
8
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
9
|
-
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
|
10
|
-
CRASS_SEMICOLON = {:
|
9
|
+
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
|
10
|
+
CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
|
11
|
+
CSS_IMPORTANT = '!important'
|
12
|
+
CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
|
13
|
+
DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
|
11
14
|
|
12
15
|
class << self
|
13
|
-
|
14
|
-
|
15
|
-
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
|
16
|
+
def allowed_element?(element_name)
|
17
|
+
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name)
|
16
18
|
end
|
17
19
|
|
18
20
|
# alternative implementation of the html5lib attribute scrubbing algorithm
|
19
|
-
def scrub_attributes
|
21
|
+
def scrub_attributes(node)
|
20
22
|
node.attribute_nodes.each do |attr_node|
|
21
23
|
attr_name = if attr_node.namespace
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
|
25
|
+
else
|
26
|
+
attr_node.node_name
|
27
|
+
end
|
26
28
|
|
27
|
-
if attr_name =~
|
29
|
+
if attr_name =~ DATA_ATTRIBUTE_NAME
|
28
30
|
next
|
29
31
|
end
|
30
32
|
|
@@ -34,71 +36,125 @@ module Loofah
|
|
34
36
|
end
|
35
37
|
|
36
38
|
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
37
|
-
|
38
|
-
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
|
39
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
40
|
-
attr_node.remove
|
41
|
-
next
|
42
|
-
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
|
43
|
-
# permit only allowed data mediatypes
|
44
|
-
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
45
|
-
mediatype, _ = mediatype.split(';')[0..1] if mediatype
|
46
|
-
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
47
|
-
attr_node.remove
|
48
|
-
next
|
49
|
-
end
|
50
|
-
end
|
39
|
+
next if scrub_uri_attribute(attr_node)
|
51
40
|
end
|
41
|
+
|
52
42
|
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
53
|
-
attr_node
|
43
|
+
scrub_attribute_that_allows_local_ref(attr_node)
|
54
44
|
end
|
55
|
-
|
45
|
+
|
46
|
+
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
|
56
47
|
attr_node.remove
|
57
48
|
next
|
58
49
|
end
|
59
50
|
end
|
60
51
|
|
61
|
-
scrub_css_attribute
|
52
|
+
scrub_css_attribute(node)
|
62
53
|
|
63
54
|
node.attribute_nodes.each do |attr_node|
|
64
|
-
|
55
|
+
if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME
|
56
|
+
node.remove_attribute(attr_node.name)
|
57
|
+
end
|
65
58
|
end
|
66
59
|
|
67
|
-
force_correct_attribute_escaping!
|
60
|
+
force_correct_attribute_escaping!(node)
|
68
61
|
end
|
69
62
|
|
70
|
-
def scrub_css_attribute
|
71
|
-
style = node.attributes[
|
63
|
+
def scrub_css_attribute(node)
|
64
|
+
style = node.attributes["style"]
|
72
65
|
style.value = scrub_css(style.value) if style
|
73
66
|
end
|
74
67
|
|
75
|
-
def scrub_css
|
76
|
-
style_tree = Crass.parse_properties
|
68
|
+
def scrub_css(style)
|
69
|
+
style_tree = Crass.parse_properties(style)
|
77
70
|
sanitized_tree = []
|
78
71
|
|
79
72
|
style_tree.each do |node|
|
80
73
|
next unless node[:node] == :property
|
81
74
|
next if node[:children].any? do |child|
|
82
|
-
[:url, :bad_url].include?(child[:node])
|
75
|
+
[:url, :bad_url].include?(child[:node])
|
83
76
|
end
|
77
|
+
|
84
78
|
name = node[:name].downcase
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
79
|
+
next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
|
80
|
+
SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
|
81
|
+
SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
|
82
|
+
|
83
|
+
value = node[:children].map do |child|
|
84
|
+
case child[:node]
|
85
|
+
when :whitespace
|
86
|
+
nil
|
87
|
+
when :string
|
88
|
+
if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
|
89
|
+
Crass::Parser.stringify(child)
|
90
|
+
else
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
when :function
|
94
|
+
if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
|
95
|
+
Crass::Parser.stringify(child)
|
96
|
+
end
|
97
|
+
when :ident
|
98
|
+
keyword = child[:value]
|
99
|
+
if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
|
100
|
+
SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
|
101
|
+
(keyword =~ CSS_KEYWORDISH)
|
90
102
|
keyword
|
91
103
|
end
|
92
|
-
|
93
|
-
|
94
|
-
propstring = sprintf "%s:%s", name, value.join(" ")
|
95
|
-
sanitized_node = Crass.parse_properties(propstring).first
|
96
|
-
sanitized_tree << sanitized_node << CRASS_SEMICOLON
|
104
|
+
else
|
105
|
+
child[:raw]
|
97
106
|
end
|
98
|
-
end
|
107
|
+
end.compact
|
108
|
+
|
109
|
+
next if value.empty?
|
110
|
+
value << CSS_IMPORTANT if node[:important]
|
111
|
+
propstring = format("%s:%s", name, value.join(" "))
|
112
|
+
sanitized_node = Crass.parse_properties(propstring).first
|
113
|
+
sanitized_tree << sanitized_node << CRASS_SEMICOLON
|
99
114
|
end
|
100
115
|
|
101
|
-
Crass::Parser.stringify
|
116
|
+
Crass::Parser.stringify(sanitized_tree)
|
117
|
+
end
|
118
|
+
|
119
|
+
def scrub_attribute_that_allows_local_ref(attr_node)
|
120
|
+
return unless attr_node.value
|
121
|
+
|
122
|
+
nodes = Crass::Parser.new(attr_node.value).parse_component_values
|
123
|
+
|
124
|
+
values = nodes.map do |node|
|
125
|
+
case node[:node]
|
126
|
+
when :url
|
127
|
+
if node[:value].start_with?("#")
|
128
|
+
node[:raw]
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
when :hash, :ident, :string
|
133
|
+
node[:raw]
|
134
|
+
else
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
|
139
|
+
attr_node.value = values.join(" ")
|
140
|
+
end
|
141
|
+
|
142
|
+
def scrub_uri_attribute(attr_node)
|
143
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
144
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
|
+
attr_node.remove
|
147
|
+
return true
|
148
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
149
|
+
# permit only allowed data mediatypes
|
150
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
151
|
+
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
152
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
153
|
+
attr_node.remove
|
154
|
+
return true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
false
|
102
158
|
end
|
103
159
|
|
104
160
|
#
|
@@ -106,7 +162,7 @@ module Loofah
|
|
106
162
|
#
|
107
163
|
# see comments about CVE-2018-8048 within the tests for more information
|
108
164
|
#
|
109
|
-
def force_correct_attribute_escaping!
|
165
|
+
def force_correct_attribute_escaping!(node)
|
110
166
|
return unless Nokogiri::VersionInfo.instance.libxml2?
|
111
167
|
|
112
168
|
node.attribute_nodes.each do |attr_node|
|
@@ -122,11 +178,50 @@ module Loofah
|
|
122
178
|
#
|
123
179
|
encoding = attr_node.value.encoding
|
124
180
|
attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
|
125
|
-
|
181
|
+
"%" + m.unpack("H2" * m.bytesize).join("%").upcase
|
126
182
|
end.force_encoding(encoding)
|
127
183
|
end
|
128
184
|
end
|
129
185
|
|
186
|
+
def cdata_needs_escaping?(node)
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
|
189
|
+
end
|
190
|
+
|
191
|
+
def cdata_escape(node)
|
192
|
+
escaped_text = escape_tags(node.text)
|
193
|
+
if Nokogiri.jruby?
|
194
|
+
node.document.create_text_node(escaped_text)
|
195
|
+
else
|
196
|
+
node.document.create_cdata(escaped_text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
+
'<' => '<',
|
202
|
+
'>' => '>',
|
203
|
+
'&' => '&',
|
204
|
+
}
|
205
|
+
|
206
|
+
def escape_tags(string)
|
207
|
+
# modified version of CGI.escapeHTML from ruby 3.1
|
208
|
+
enc = string.encoding
|
209
|
+
unless enc.ascii_compatible?
|
210
|
+
if enc.dummy?
|
211
|
+
origenc = enc
|
212
|
+
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
|
+
string = enc ? string.encode(enc) : string.b
|
214
|
+
end
|
215
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
216
|
+
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
|
+
string.encode!(origenc) if origenc
|
218
|
+
string
|
219
|
+
else
|
220
|
+
string = string.b
|
221
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
+
string.force_encoding(enc)
|
223
|
+
end
|
224
|
+
end
|
130
225
|
end
|
131
226
|
end
|
132
227
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
#
|
3
4
|
# Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
|
@@ -91,28 +92,33 @@ module Loofah
|
|
91
92
|
# # decidedly not ok for browser:
|
92
93
|
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
93
94
|
#
|
94
|
-
def text(options={})
|
95
|
-
result = serialize_root
|
95
|
+
def text(options = {})
|
96
|
+
result = if serialize_root
|
97
|
+
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
+
else
|
99
|
+
""
|
100
|
+
end
|
96
101
|
if options[:encode_special_chars] == false
|
97
102
|
result # possibly dangerous if rendered in a browser
|
98
103
|
else
|
99
104
|
encode_special_chars result
|
100
105
|
end
|
101
106
|
end
|
107
|
+
|
102
108
|
alias :inner_text :text
|
103
|
-
alias :to_str
|
109
|
+
alias :to_str :text
|
104
110
|
|
105
111
|
#
|
106
112
|
# Returns a plain-text version of the markup contained by the
|
107
113
|
# fragment, with HTML entities encoded.
|
108
114
|
#
|
109
|
-
# This method is slower than #
|
110
|
-
# whitespace around block elements.
|
115
|
+
# This method is slower than #text, but is clever about
|
116
|
+
# whitespace around block elements and line break elements.
|
111
117
|
#
|
112
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").to_text
|
113
|
-
# # => "\nTitle\n\nContent\n"
|
118
|
+
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
+
# # => "\nTitle\n\nContent\nNext line\n"
|
114
120
|
#
|
115
|
-
def to_text(options={})
|
121
|
+
def to_text(options = {})
|
116
122
|
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
117
123
|
end
|
118
124
|
end
|
data/lib/loofah/metahelpers.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
module MetaHelpers # :nodoc:
|
3
|
-
def self.add_downcased_set_members_to_all_set_constants
|
4
|
+
def self.add_downcased_set_members_to_all_set_constants(mojule)
|
4
5
|
mojule.constants.each do |constant_sym|
|
5
6
|
constant = mojule.const_get constant_sym
|
6
7
|
next unless Set === constant
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
#
|
3
4
|
# A RuntimeError raised when Loofah could not find an appropriate scrubber.
|
4
5
|
#
|
5
|
-
class ScrubberNotFound < RuntimeError
|
6
|
+
class ScrubberNotFound < RuntimeError; end
|
6
7
|
|
7
8
|
#
|
8
9
|
# A Scrubber wraps up a block (or method) that is run on an HTML node (element):
|
@@ -36,7 +37,7 @@ module Loofah
|
|
36
37
|
CONTINUE = Object.new.freeze
|
37
38
|
|
38
39
|
# Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
|
39
|
-
STOP
|
40
|
+
STOP = Object.new.freeze
|
40
41
|
|
41
42
|
# When a scrubber is initialized, the :direction may be specified
|
42
43
|
# as :top_down (the default) or :bottom_up.
|
@@ -64,7 +65,7 @@ module Loofah
|
|
64
65
|
def initialize(options = {}, &block)
|
65
66
|
direction = options[:direction] || :top_down
|
66
67
|
unless [:top_down, :bottom_up].include?(direction)
|
67
|
-
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
|
68
|
+
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
|
68
69
|
end
|
69
70
|
@direction, @block = direction, block
|
70
71
|
end
|
@@ -91,10 +92,10 @@ module Loofah
|
|
91
92
|
# If the attribute is set, don't overwrite the existing value
|
92
93
|
#
|
93
94
|
def append_attribute(node, attribute, value)
|
94
|
-
current_value = node.get_attribute(attribute) ||
|
95
|
+
current_value = node.get_attribute(attribute) || ""
|
95
96
|
current_values = current_value.split(/\s+/)
|
96
97
|
updated_value = current_values | [value]
|
97
|
-
node.set_attribute(attribute, updated_value.join(
|
98
|
+
node.set_attribute(attribute, updated_value.join(" "))
|
98
99
|
end
|
99
100
|
|
100
101
|
private
|
@@ -107,6 +108,10 @@ module Loofah
|
|
107
108
|
return Scrubber::CONTINUE
|
108
109
|
end
|
109
110
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
111
|
+
if HTML5::Scrub.cdata_needs_escaping?(node)
|
112
|
+
node.before(HTML5::Scrub.cdata_escape(node))
|
113
|
+
return Scrubber::STOP
|
114
|
+
end
|
110
115
|
return Scrubber::CONTINUE
|
111
116
|
end
|
112
117
|
Scrubber::STOP
|
@@ -118,11 +123,11 @@ module Loofah
|
|
118
123
|
else
|
119
124
|
return if scrub(node) == STOP
|
120
125
|
end
|
121
|
-
node.children.each {|j| traverse_conditionally_top_down(j)}
|
126
|
+
node.children.each { |j| traverse_conditionally_top_down(j) }
|
122
127
|
end
|
123
128
|
|
124
129
|
def traverse_conditionally_bottom_up(node)
|
125
|
-
node.children.each {|j| traverse_conditionally_bottom_up(j)}
|
130
|
+
node.children.each { |j| traverse_conditionally_bottom_up(j) }
|
126
131
|
if block
|
127
132
|
block.call(node)
|
128
133
|
else
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
#
|
3
4
|
# Loofah provides some built-in scrubbers for sanitizing with
|
@@ -99,13 +100,9 @@ module Loofah
|
|
99
100
|
|
100
101
|
def scrub(node)
|
101
102
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
102
|
-
|
103
|
-
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
|
104
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
105
|
-
else
|
106
|
-
node.before node.children
|
107
|
-
end
|
103
|
+
node.before(node.children)
|
108
104
|
node.remove
|
105
|
+
return STOP
|
109
106
|
end
|
110
107
|
end
|
111
108
|
|
@@ -205,8 +202,8 @@ module Loofah
|
|
205
202
|
end
|
206
203
|
|
207
204
|
def scrub(node)
|
208
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
209
|
-
append_attribute(node,
|
205
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
206
|
+
append_attribute(node, "rel", "nofollow")
|
210
207
|
return STOP
|
211
208
|
end
|
212
209
|
end
|
@@ -226,8 +223,8 @@ module Loofah
|
|
226
223
|
end
|
227
224
|
|
228
225
|
def scrub(node)
|
229
|
-
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name ==
|
230
|
-
append_attribute(node,
|
226
|
+
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
|
227
|
+
append_attribute(node, "rel", "noopener")
|
231
228
|
return STOP
|
232
229
|
end
|
233
230
|
end
|
@@ -239,8 +236,13 @@ module Loofah
|
|
239
236
|
end
|
240
237
|
|
241
238
|
def scrub(node)
|
242
|
-
return CONTINUE unless Loofah::Elements::
|
243
|
-
|
239
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
240
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
241
|
+
"\n"
|
242
|
+
else
|
243
|
+
"\n#{node.content}\n"
|
244
|
+
end
|
245
|
+
node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
|
244
246
|
node.remove
|
245
247
|
end
|
246
248
|
end
|
@@ -267,7 +269,7 @@ module Loofah
|
|
267
269
|
|
268
270
|
def scrub(node)
|
269
271
|
if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
270
|
-
node.content = node.content.gsub(/\u2028|\u2029/,
|
272
|
+
node.content = node.content.gsub(/\u2028|\u2029/, "")
|
271
273
|
end
|
272
274
|
CONTINUE
|
273
275
|
end
|
@@ -277,14 +279,14 @@ module Loofah
|
|
277
279
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
278
280
|
#
|
279
281
|
MAP = {
|
280
|
-
:escape
|
281
|
-
:prune
|
282
|
+
:escape => Escape,
|
283
|
+
:prune => Prune,
|
282
284
|
:whitewash => Whitewash,
|
283
|
-
:strip
|
284
|
-
:nofollow
|
285
|
+
:strip => Strip,
|
286
|
+
:nofollow => NoFollow,
|
285
287
|
:noopener => NoOpener,
|
286
288
|
:newline_block_elements => NewlineBlockElements,
|
287
|
-
:unprintable => Unprintable
|
289
|
+
:unprintable => Unprintable,
|
288
290
|
}
|
289
291
|
|
290
292
|
#
|
data/lib/loofah/xml/document.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
module XML # :nodoc:
|
3
4
|
#
|
@@ -12,7 +13,7 @@ module Loofah
|
|
12
13
|
# constructor. Applications should use Loofah.fragment to
|
13
14
|
# parse a fragment.
|
14
15
|
#
|
15
|
-
def parse
|
16
|
+
def parse(tags)
|
16
17
|
doc = Loofah::XML::Document.new
|
17
18
|
doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
|
18
19
|
self.new(doc, tags)
|
data/lib/loofah.rb
CHANGED
@@ -1,22 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
3
|
|
3
4
|
require "nokogiri"
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
require_relative "loofah/version"
|
7
|
+
require_relative "loofah/metahelpers"
|
8
|
+
require_relative "loofah/elements"
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
-
|
10
|
+
require_relative "loofah/html5/safelist"
|
11
|
+
require_relative "loofah/html5/libxml2_workarounds"
|
12
|
+
require_relative "loofah/html5/scrub"
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
+
require_relative "loofah/scrubber"
|
15
|
+
require_relative "loofah/scrubbers"
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
require_relative "loofah/instance_methods"
|
18
|
+
require_relative "loofah/xml/document"
|
19
|
+
require_relative "loofah/xml/document_fragment"
|
20
|
+
require_relative "loofah/html/document"
|
21
|
+
require_relative "loofah/html/document_fragment"
|
20
22
|
|
21
23
|
# == Strings and IO Objects as Input
|
22
24
|
#
|
@@ -27,14 +29,11 @@ require "loofah/html/document_fragment"
|
|
27
29
|
# quantities of docs.
|
28
30
|
#
|
29
31
|
module Loofah
|
30
|
-
# The version of Loofah you are using
|
31
|
-
VERSION = "2.3.1"
|
32
|
-
|
33
32
|
class << self
|
34
33
|
# Shortcut for Loofah::HTML::Document.parse
|
35
34
|
# This method accepts the same parameters as Nokogiri::HTML::Document.parse
|
36
35
|
def document(*args, &block)
|
37
|
-
Loofah::HTML::Document.parse(*args, &block)
|
36
|
+
remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
|
38
37
|
end
|
39
38
|
|
40
39
|
# Shortcut for Loofah::HTML::DocumentFragment.parse
|
@@ -79,5 +78,23 @@ module Loofah
|
|
79
78
|
def remove_extraneous_whitespace(string)
|
80
79
|
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
81
80
|
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# remove comments that exist outside of the HTML element.
|
85
|
+
#
|
86
|
+
# these comments are allowed by the HTML spec:
|
87
|
+
#
|
88
|
+
# https://www.w3.org/TR/html401/struct/global.html#h-7.1
|
89
|
+
#
|
90
|
+
# but are not scrubbed by Loofah because these nodes don't meet
|
91
|
+
# the contract that scrubbers expect of a node (e.g., it can be
|
92
|
+
# replaced, sibling and children nodes can be created).
|
93
|
+
def remove_comments_before_html_element(doc)
|
94
|
+
doc.children.each do |child|
|
95
|
+
child.unlink if child.comment?
|
96
|
+
end
|
97
|
+
doc
|
98
|
+
end
|
82
99
|
end
|
83
100
|
end
|