loofah 2.19.0 → 2.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d59ed56910860de60170e919b3ab77b382f00eadc5d37518a7a395edabc8a4f
4
- data.tar.gz: d0ed6a2362ec8b366f4739a67c2197a24c45e0681cba6e5bd6b7b55617d492dc
3
+ metadata.gz: bd3edb0acdf2359d82564aca0bc13710d9f6c49157963d18953ff55bd7c14413
4
+ data.tar.gz: 3a6e11b7deb9cfb469aaf6ec919062687bd4215ef11980bded72ca298807610c
5
5
  SHA512:
6
- metadata.gz: dabaf4204cf846132d0b2962cef11534e3043ae8b2be39cbf23dea2fabc3722d83fba8805a5453fca6f2ec80f13c48c62726751f6acf06d3fdfd427297f07968
7
- data.tar.gz: 84d3442b65227346d62df8ea24ef0febe3212b1a1bdb61266f22cafc356467637f2a3a050d4c52672d55e081a3e040d2cb423961d571cf364978265398742f47
6
+ metadata.gz: 4970a6aa72265f60556dd6fd254375c86d3f83be23f3bbcc8b04df00ce0e801e8ef9e67d0a77ca6a21915be89226131c16a7f3540f02538cc2b9a369950dfebf
7
+ data.tar.gz: 27e3a06cc391ec3d9e3c966efdb6b4ce58e98c397ec87490d418406c17757e5cb0193edabaced30a9f24320c729e6730308e346610859f9f7c6d5fcc6f72cd56
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.19.1 / 2022-12-13
4
+
5
+ ### Security
6
+
7
+ * Address CVE-2022-23514, inefficient regular expression complexity. See [GHSA-486f-hjj9-9vhh](https://github.com/flavorjones/loofah/security/advisories/GHSA-486f-hjj9-9vhh) for more information.
8
+ * Address CVE-2022-23515, improper neutralization of data URIs. See [GHSA-228g-948r-83gx](https://github.com/flavorjones/loofah/security/advisories/GHSA-228g-948r-83gx) for more information.
9
+ * Address CVE-2022-23516, uncontrolled recursion. See [GHSA-3x8r-x6xp-q4vm](https://github.com/flavorjones/loofah/security/advisories/GHSA-3x8r-x6xp-q4vm) for more information.
10
+
11
+
3
12
  ## 2.19.0 / 2022-09-14
4
13
 
5
14
  ### Features
@@ -999,7 +999,6 @@ module Loofah
999
999
  "image/gif",
1000
1000
  "image/jpeg",
1001
1001
  "image/png",
1002
- "image/svg+xml",
1003
1002
  "text/css",
1004
1003
  "text/plain",
1005
1004
  ])
@@ -36,24 +36,13 @@ module Loofah
36
36
  end
37
37
 
38
38
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39
- # this block lifted nearly verbatim from HTML5 sanitization
40
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
41
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42
- attr_node.remove
43
- next
44
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
45
- # permit only allowed data mediatypes
46
- mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47
- mediatype, _ = mediatype.split(";")[0..1] if mediatype
48
- if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49
- attr_node.remove
50
- next
51
- end
52
- end
39
+ next if scrub_uri_attribute(attr_node)
53
40
  end
41
+
54
42
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
43
+ scrub_attribute_that_allows_local_ref(attr_node)
56
44
  end
45
+
57
46
  if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
58
47
  attr_node.remove
59
48
  next
@@ -127,6 +116,47 @@ module Loofah
127
116
  Crass::Parser.stringify(sanitized_tree)
128
117
  end
129
118
 
119
+ def scrub_attribute_that_allows_local_ref(attr_node)
120
+ return unless attr_node.value
121
+
122
+ nodes = Crass::Parser.new(attr_node.value).parse_component_values
123
+
124
+ values = nodes.map do |node|
125
+ case node[:node]
126
+ when :url
127
+ if node[:value].start_with?("#")
128
+ node[:raw]
129
+ else
130
+ nil
131
+ end
132
+ when :hash, :ident, :string
133
+ node[:raw]
134
+ else
135
+ nil
136
+ end
137
+ end.compact
138
+
139
+ attr_node.value = values.join(" ")
140
+ end
141
+
142
+ def scrub_uri_attribute(attr_node)
143
+ # this block lifted nearly verbatim from HTML5 sanitization
144
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
146
+ attr_node.remove
147
+ return true
148
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
149
+ # permit only allowed data mediatypes
150
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
151
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
152
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
153
+ attr_node.remove
154
+ return true
155
+ end
156
+ end
157
+ false
158
+ end
159
+
130
160
  #
131
161
  # libxml2 >= 2.9.2 fails to escape comments within some attributes.
132
162
  #
@@ -152,6 +182,46 @@ module Loofah
152
182
  end.force_encoding(encoding)
153
183
  end
154
184
  end
185
+
186
+ def cdata_needs_escaping?(node)
187
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
188
+ node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
189
+ end
190
+
191
+ def cdata_escape(node)
192
+ escaped_text = escape_tags(node.text)
193
+ if Nokogiri.jruby?
194
+ node.document.create_text_node(escaped_text)
195
+ else
196
+ node.document.create_cdata(escaped_text)
197
+ end
198
+ end
199
+
200
+ TABLE_FOR_ESCAPE_HTML__ = {
201
+ '<' => '&lt;',
202
+ '>' => '&gt;',
203
+ '&' => '&amp;',
204
+ }
205
+
206
+ def escape_tags(string)
207
+ # modified version of CGI.escapeHTML from ruby 3.1
208
+ enc = string.encoding
209
+ unless enc.ascii_compatible?
210
+ if enc.dummy?
211
+ origenc = enc
212
+ enc = Encoding::Converter.asciicompat_encoding(enc)
213
+ string = enc ? string.encode(enc) : string.b
214
+ end
215
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
216
+ string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
217
+ string.encode!(origenc) if origenc
218
+ string
219
+ else
220
+ string = string.b
221
+ string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
222
+ string.force_encoding(enc)
223
+ end
224
+ end
155
225
  end
156
226
  end
157
227
  end
@@ -108,6 +108,10 @@ module Loofah
108
108
  return Scrubber::CONTINUE
109
109
  end
110
110
  when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
111
+ if HTML5::Scrub.cdata_needs_escaping?(node)
112
+ node.before(HTML5::Scrub.cdata_escape(node))
113
+ return Scrubber::STOP
114
+ end
111
115
  return Scrubber::CONTINUE
112
116
  end
113
117
  Scrubber::STOP
@@ -100,13 +100,9 @@ module Loofah
100
100
 
101
101
  def scrub(node)
102
102
  return CONTINUE if html5lib_sanitize(node) == CONTINUE
103
- if node.children.length == 1 && node.children.first.cdata?
104
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
105
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
106
- else
107
- node.before node.children
108
- end
103
+ node.before(node.children)
109
104
  node.remove
105
+ return STOP
110
106
  end
111
107
  end
112
108
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  module Loofah
3
3
  # The version of Loofah you are using
4
- VERSION = "2.19.0"
4
+ VERSION = "2.19.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.19.0
4
+ version: 2.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-09-14 00:00:00.000000000 Z
12
+ date: 2022-12-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: crass