loofah 2.24.1 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16850a48486ab3e9191ceff0a4fd6d768f82151049332ae162068f6712efccb8
4
- data.tar.gz: 6ccd67672b489120796711e08643cbaec9c88648622fc0c3a1ac013e49534b25
3
+ metadata.gz: 6494f909053083504e78578fee2ee3eea7ea3779f56dcc190f55bf2e0944a534
4
+ data.tar.gz: c8eb417d4a46efcffb41256ff928d51bd7503f20356450c27853c9c1b1d06539
5
5
  SHA512:
6
- metadata.gz: b2a4f569f20365f63d548506946736a20ee195a3b4149228489c39f1d6fddf2fe9c774ded5d88d0d3bd547a00110b42ab37d582f8701a01eb2a047070cc2b440
7
- data.tar.gz: 2bca5a9c58d363251e8ca5b3803a57b73e51506e9d294e45d69d1fef376b658f31901a359315c6d60974d469047e78307e7cd33005884314e98bf9d2775bd36a
6
+ metadata.gz: 89fafc68ced95a9dfa715e52a0033804e67e1935fbf0a7b4ec74708d6d1b975b1498507d2eb43cbba481562e9817db5f4d80bd797ba8470099ab7754dc0a8ef6
7
+ data.tar.gz: 6ef9cec163006ad1d7c995828cb7036ec8b7923611737c3ee2b1a98f69d2cb973847b8a39eb6b198039e258c0d10644bc5c817946524643b12c4d31e37b4e4e3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.25.0 / 2025-12-15
4
+
5
+ * Extract `Loofah::HTML5::Scrub.allowed_uri?` which operates on a string. Previously this logic was coupled to the parsed tree in `.scrub_uri_attribute`. #300 @flavorjones
6
+ * Tightened up how entities and control characters are handled when detecting allowed URIs. #301 @flavorjones
7
+
8
+
3
9
  ## 2.24.1 / 2025-05-12
4
10
 
5
11
  ### Ruby support
@@ -14,6 +14,7 @@ module Loofah
14
14
  CSS_WHITESPACE = " "
15
15
  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
16
16
  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
17
+ URI_PROTOCOL_REGEX = /\A[a-z][a-z0-9+\-.]*:/ # RFC 3986
17
18
 
18
19
  class << self
19
20
  def allowed_element?(element_name)
@@ -140,23 +141,33 @@ module Loofah
140
141
  attr_node.value = values.join(" ")
141
142
  end
142
143
 
144
+ # Returns true if the given URI string is safe, false otherwise.
145
+ # This method can be used to validate URI attribute values without
146
+ # requiring a Nokogiri DOM node.
147
+ def allowed_uri?(uri_string)
148
+ # this logic lifted nearly verbatim from HTML5 sanitization
149
+ val_unescaped = CGI.unescapeHTML(uri_string.gsub(CONTROL_CHARACTERS, "")).gsub("&colon;", ":").downcase
150
+ if URI_PROTOCOL_REGEX.match?(val_unescaped)
151
+ protocol = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0]
152
+ return false unless SafeList::ALLOWED_PROTOCOLS.include?(protocol)
153
+
154
+ if protocol == "data"
155
+ # permit only allowed data mediatypes
156
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
157
+ mediatype, _ = mediatype.split(/[;,]/)[0..1] if mediatype
158
+ return false if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
159
+ end
160
+ end
161
+ true
162
+ end
163
+
143
164
  def scrub_uri_attribute(attr_node)
144
- # this block lifted nearly verbatim from HTML5 sanitization
145
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
146
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
147
- !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
165
+ if allowed_uri?(attr_node.value)
166
+ false
167
+ else
148
168
  attr_node.remove
149
- return true
150
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
151
- # permit only allowed data mediatypes
152
- mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
153
- mediatype, _ = mediatype.split(";")[0..1] if mediatype
154
- if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
155
- attr_node.remove
156
- return true
157
- end
169
+ true
158
170
  end
159
- false
160
171
  end
161
172
 
162
173
  #
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Loofah
4
4
  # The version of Loofah you are using
5
- VERSION = "2.24.1"
5
+ VERSION = "2.25.0"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.24.1
4
+ version: 2.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -96,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  - !ruby/object:Gem::Version
97
97
  version: '0'
98
98
  requirements: []
99
- rubygems_version: 3.6.8
99
+ rubygems_version: 3.6.9
100
100
  specification_version: 4
101
101
  summary: Loofah is a general library for manipulating and transforming HTML/XML documents
102
102
  and fragments, built on top of Nokogiri.