loofah 2.19.0 → 2.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/loofah/html5/safelist.rb +0 -1
- data/lib/loofah/html5/scrub.rb +85 -15
- data/lib/loofah/scrubber.rb +4 -0
- data/lib/loofah/scrubbers.rb +2 -6
- data/lib/loofah/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd3edb0acdf2359d82564aca0bc13710d9f6c49157963d18953ff55bd7c14413
|
4
|
+
data.tar.gz: 3a6e11b7deb9cfb469aaf6ec919062687bd4215ef11980bded72ca298807610c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4970a6aa72265f60556dd6fd254375c86d3f83be23f3bbcc8b04df00ce0e801e8ef9e67d0a77ca6a21915be89226131c16a7f3540f02538cc2b9a369950dfebf
|
7
|
+
data.tar.gz: 27e3a06cc391ec3d9e3c966efdb6b4ce58e98c397ec87490d418406c17757e5cb0193edabaced30a9f24320c729e6730308e346610859f9f7c6d5fcc6f72cd56
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.19.1 / 2022-12-13
|
4
|
+
|
5
|
+
### Security
|
6
|
+
|
7
|
+
* Address CVE-2022-23514, inefficient regular expression complexity. See [GHSA-486f-hjj9-9vhh](https://github.com/flavorjones/loofah/security/advisories/GHSA-486f-hjj9-9vhh) for more information.
|
8
|
+
* Address CVE-2022-23515, improper neutralization of data URIs. See [GHSA-228g-948r-83gx](https://github.com/flavorjones/loofah/security/advisories/GHSA-228g-948r-83gx) for more information.
|
9
|
+
* Address CVE-2022-23516, uncontrolled recursion. See [GHSA-3x8r-x6xp-q4vm](https://github.com/flavorjones/loofah/security/advisories/GHSA-3x8r-x6xp-q4vm) for more information.
|
10
|
+
|
11
|
+
|
3
12
|
## 2.19.0 / 2022-09-14
|
4
13
|
|
5
14
|
### Features
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -36,24 +36,13 @@ module Loofah
|
|
36
36
|
end
|
37
37
|
|
38
38
|
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
39
|
-
|
40
|
-
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
41
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
42
|
-
attr_node.remove
|
43
|
-
next
|
44
|
-
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
45
|
-
# permit only allowed data mediatypes
|
46
|
-
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
47
|
-
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
48
|
-
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
49
|
-
attr_node.remove
|
50
|
-
next
|
51
|
-
end
|
52
|
-
end
|
39
|
+
next if scrub_uri_attribute(attr_node)
|
53
40
|
end
|
41
|
+
|
54
42
|
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
55
|
-
attr_node
|
43
|
+
scrub_attribute_that_allows_local_ref(attr_node)
|
56
44
|
end
|
45
|
+
|
57
46
|
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
|
58
47
|
attr_node.remove
|
59
48
|
next
|
@@ -127,6 +116,47 @@ module Loofah
|
|
127
116
|
Crass::Parser.stringify(sanitized_tree)
|
128
117
|
end
|
129
118
|
|
119
|
+
def scrub_attribute_that_allows_local_ref(attr_node)
|
120
|
+
return unless attr_node.value
|
121
|
+
|
122
|
+
nodes = Crass::Parser.new(attr_node.value).parse_component_values
|
123
|
+
|
124
|
+
values = nodes.map do |node|
|
125
|
+
case node[:node]
|
126
|
+
when :url
|
127
|
+
if node[:value].start_with?("#")
|
128
|
+
node[:raw]
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
when :hash, :ident, :string
|
133
|
+
node[:raw]
|
134
|
+
else
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
|
139
|
+
attr_node.value = values.join(" ")
|
140
|
+
end
|
141
|
+
|
142
|
+
def scrub_uri_attribute(attr_node)
|
143
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
144
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
|
+
attr_node.remove
|
147
|
+
return true
|
148
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
149
|
+
# permit only allowed data mediatypes
|
150
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
151
|
+
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
152
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
153
|
+
attr_node.remove
|
154
|
+
return true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
false
|
158
|
+
end
|
159
|
+
|
130
160
|
#
|
131
161
|
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
|
132
162
|
#
|
@@ -152,6 +182,46 @@ module Loofah
|
|
152
182
|
end.force_encoding(encoding)
|
153
183
|
end
|
154
184
|
end
|
185
|
+
|
186
|
+
def cdata_needs_escaping?(node)
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
|
189
|
+
end
|
190
|
+
|
191
|
+
def cdata_escape(node)
|
192
|
+
escaped_text = escape_tags(node.text)
|
193
|
+
if Nokogiri.jruby?
|
194
|
+
node.document.create_text_node(escaped_text)
|
195
|
+
else
|
196
|
+
node.document.create_cdata(escaped_text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
+
'<' => '<',
|
202
|
+
'>' => '>',
|
203
|
+
'&' => '&',
|
204
|
+
}
|
205
|
+
|
206
|
+
def escape_tags(string)
|
207
|
+
# modified version of CGI.escapeHTML from ruby 3.1
|
208
|
+
enc = string.encoding
|
209
|
+
unless enc.ascii_compatible?
|
210
|
+
if enc.dummy?
|
211
|
+
origenc = enc
|
212
|
+
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
|
+
string = enc ? string.encode(enc) : string.b
|
214
|
+
end
|
215
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
216
|
+
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
|
+
string.encode!(origenc) if origenc
|
218
|
+
string
|
219
|
+
else
|
220
|
+
string = string.b
|
221
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
+
string.force_encoding(enc)
|
223
|
+
end
|
224
|
+
end
|
155
225
|
end
|
156
226
|
end
|
157
227
|
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -108,6 +108,10 @@ module Loofah
|
|
108
108
|
return Scrubber::CONTINUE
|
109
109
|
end
|
110
110
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
111
|
+
if HTML5::Scrub.cdata_needs_escaping?(node)
|
112
|
+
node.before(HTML5::Scrub.cdata_escape(node))
|
113
|
+
return Scrubber::STOP
|
114
|
+
end
|
111
115
|
return Scrubber::CONTINUE
|
112
116
|
end
|
113
117
|
Scrubber::STOP
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -100,13 +100,9 @@ module Loofah
|
|
100
100
|
|
101
101
|
def scrub(node)
|
102
102
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
103
|
-
|
104
|
-
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
|
105
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
106
|
-
else
|
107
|
-
node.before node.children
|
108
|
-
end
|
103
|
+
node.before(node.children)
|
109
104
|
node.remove
|
105
|
+
return STOP
|
110
106
|
end
|
111
107
|
end
|
112
108
|
|
data/lib/loofah/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.19.
|
4
|
+
version: 2.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-12-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|