loofah 2.19.0 → 2.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/loofah/html5/safelist.rb +0 -1
- data/lib/loofah/html5/scrub.rb +85 -15
- data/lib/loofah/scrubber.rb +4 -0
- data/lib/loofah/scrubbers.rb +2 -6
- data/lib/loofah/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd3edb0acdf2359d82564aca0bc13710d9f6c49157963d18953ff55bd7c14413
|
4
|
+
data.tar.gz: 3a6e11b7deb9cfb469aaf6ec919062687bd4215ef11980bded72ca298807610c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4970a6aa72265f60556dd6fd254375c86d3f83be23f3bbcc8b04df00ce0e801e8ef9e67d0a77ca6a21915be89226131c16a7f3540f02538cc2b9a369950dfebf
|
7
|
+
data.tar.gz: 27e3a06cc391ec3d9e3c966efdb6b4ce58e98c397ec87490d418406c17757e5cb0193edabaced30a9f24320c729e6730308e346610859f9f7c6d5fcc6f72cd56
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.19.1 / 2022-12-13
|
4
|
+
|
5
|
+
### Security
|
6
|
+
|
7
|
+
* Address CVE-2022-23514, inefficient regular expression complexity. See [GHSA-486f-hjj9-9vhh](https://github.com/flavorjones/loofah/security/advisories/GHSA-486f-hjj9-9vhh) for more information.
|
8
|
+
* Address CVE-2022-23515, improper neutralization of data URIs. See [GHSA-228g-948r-83gx](https://github.com/flavorjones/loofah/security/advisories/GHSA-228g-948r-83gx) for more information.
|
9
|
+
* Address CVE-2022-23516, uncontrolled recursion. See [GHSA-3x8r-x6xp-q4vm](https://github.com/flavorjones/loofah/security/advisories/GHSA-3x8r-x6xp-q4vm) for more information.
|
10
|
+
|
11
|
+
|
3
12
|
## 2.19.0 / 2022-09-14
|
4
13
|
|
5
14
|
### Features
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -36,24 +36,13 @@ module Loofah
|
|
36
36
|
end
|
37
37
|
|
38
38
|
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
39
|
-
|
40
|
-
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
41
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
42
|
-
attr_node.remove
|
43
|
-
next
|
44
|
-
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
45
|
-
# permit only allowed data mediatypes
|
46
|
-
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
47
|
-
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
48
|
-
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
49
|
-
attr_node.remove
|
50
|
-
next
|
51
|
-
end
|
52
|
-
end
|
39
|
+
next if scrub_uri_attribute(attr_node)
|
53
40
|
end
|
41
|
+
|
54
42
|
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
55
|
-
attr_node
|
43
|
+
scrub_attribute_that_allows_local_ref(attr_node)
|
56
44
|
end
|
45
|
+
|
57
46
|
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
|
58
47
|
attr_node.remove
|
59
48
|
next
|
@@ -127,6 +116,47 @@ module Loofah
|
|
127
116
|
Crass::Parser.stringify(sanitized_tree)
|
128
117
|
end
|
129
118
|
|
119
|
+
def scrub_attribute_that_allows_local_ref(attr_node)
|
120
|
+
return unless attr_node.value
|
121
|
+
|
122
|
+
nodes = Crass::Parser.new(attr_node.value).parse_component_values
|
123
|
+
|
124
|
+
values = nodes.map do |node|
|
125
|
+
case node[:node]
|
126
|
+
when :url
|
127
|
+
if node[:value].start_with?("#")
|
128
|
+
node[:raw]
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
when :hash, :ident, :string
|
133
|
+
node[:raw]
|
134
|
+
else
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
|
139
|
+
attr_node.value = values.join(" ")
|
140
|
+
end
|
141
|
+
|
142
|
+
def scrub_uri_attribute(attr_node)
|
143
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
144
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
|
+
attr_node.remove
|
147
|
+
return true
|
148
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
149
|
+
# permit only allowed data mediatypes
|
150
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
151
|
+
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
152
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
153
|
+
attr_node.remove
|
154
|
+
return true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
false
|
158
|
+
end
|
159
|
+
|
130
160
|
#
|
131
161
|
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
|
132
162
|
#
|
@@ -152,6 +182,46 @@ module Loofah
|
|
152
182
|
end.force_encoding(encoding)
|
153
183
|
end
|
154
184
|
end
|
185
|
+
|
186
|
+
def cdata_needs_escaping?(node)
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
|
189
|
+
end
|
190
|
+
|
191
|
+
def cdata_escape(node)
|
192
|
+
escaped_text = escape_tags(node.text)
|
193
|
+
if Nokogiri.jruby?
|
194
|
+
node.document.create_text_node(escaped_text)
|
195
|
+
else
|
196
|
+
node.document.create_cdata(escaped_text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
+
'<' => '<',
|
202
|
+
'>' => '>',
|
203
|
+
'&' => '&',
|
204
|
+
}
|
205
|
+
|
206
|
+
def escape_tags(string)
|
207
|
+
# modified version of CGI.escapeHTML from ruby 3.1
|
208
|
+
enc = string.encoding
|
209
|
+
unless enc.ascii_compatible?
|
210
|
+
if enc.dummy?
|
211
|
+
origenc = enc
|
212
|
+
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
|
+
string = enc ? string.encode(enc) : string.b
|
214
|
+
end
|
215
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
216
|
+
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
|
+
string.encode!(origenc) if origenc
|
218
|
+
string
|
219
|
+
else
|
220
|
+
string = string.b
|
221
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
+
string.force_encoding(enc)
|
223
|
+
end
|
224
|
+
end
|
155
225
|
end
|
156
226
|
end
|
157
227
|
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -108,6 +108,10 @@ module Loofah
|
|
108
108
|
return Scrubber::CONTINUE
|
109
109
|
end
|
110
110
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
111
|
+
if HTML5::Scrub.cdata_needs_escaping?(node)
|
112
|
+
node.before(HTML5::Scrub.cdata_escape(node))
|
113
|
+
return Scrubber::STOP
|
114
|
+
end
|
111
115
|
return Scrubber::CONTINUE
|
112
116
|
end
|
113
117
|
Scrubber::STOP
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -100,13 +100,9 @@ module Loofah
|
|
100
100
|
|
101
101
|
def scrub(node)
|
102
102
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
103
|
-
|
104
|
-
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
|
105
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
106
|
-
else
|
107
|
-
node.before node.children
|
108
|
-
end
|
103
|
+
node.before(node.children)
|
109
104
|
node.remove
|
105
|
+
return STOP
|
110
106
|
end
|
111
107
|
end
|
112
108
|
|
data/lib/loofah/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.19.
|
4
|
+
version: 2.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-12-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|