loofah 2.24.0 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e003942bbd3be7a5d576d4b48002964664c844b3aa66aa24a6b55e536ecd0662
4
- data.tar.gz: f959fadbc762a26167cc46563d92e8be0d085372467b774dcbb3f18dabc6d8c8
3
+ metadata.gz: 6494f909053083504e78578fee2ee3eea7ea3779f56dcc190f55bf2e0944a534
4
+ data.tar.gz: c8eb417d4a46efcffb41256ff928d51bd7503f20356450c27853c9c1b1d06539
5
5
  SHA512:
6
- metadata.gz: 254caef30657885b063ae85c0ac01ba386aa387571a7f5587dfee57f4e803047f56579d11425bb01a6aa50209b3b1755d7e2c613412f4783f3ce5e6d4fd70b70
7
- data.tar.gz: 4a531c380c0ce45a9c3e9ea1a87c08da487331bf56db20e6bf42b903ca1e1f3e043f72ddfc1ad82076fffcad6bd08055131952b62c40f0b6067fcac3a552cfd7
6
+ metadata.gz: 89fafc68ced95a9dfa715e52a0033804e67e1935fbf0a7b4ec74708d6d1b975b1498507d2eb43cbba481562e9817db5f4d80bd797ba8470099ab7754dc0a8ef6
7
+ data.tar.gz: 6ef9cec163006ad1d7c995828cb7036ec8b7923611737c3ee2b1a98f69d2cb973847b8a39eb6b198039e258c0d10644bc5c817946524643b12c4d31e37b4e4e3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.25.0 / 2025-12-15
4
+
5
+ * Extract `Loofah::HTML5::Scrub.allowed_uri?` which operates on a string. Previously this logic was coupled to the parsed tree in `.scrub_uri_attribute`. #300 @flavorjones
6
+ * Tightened up how entities and control characters are handled when detecting allowed URIs. #301 @flavorjones
7
+
8
+
9
+ ## 2.24.1 / 2025-05-12
10
+
11
+ ### Ruby support
12
+
13
+ * Import only what's needed from `cgi` for support for Ruby 3.5 #296 @Earlopain
14
+
15
+
3
16
  ## 2.24.0 / 2024-12-24
4
17
 
5
18
  ### Added
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "cgi"
3
+ require "cgi/escape"
4
+ require "cgi/util" if RUBY_VERSION < "3.5"
4
5
  require "crass"
5
6
 
6
7
  module Loofah
@@ -13,6 +14,7 @@ module Loofah
13
14
  CSS_WHITESPACE = " "
14
15
  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
15
16
  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
17
+ URI_PROTOCOL_REGEX = /\A[a-z][a-z0-9+\-.]*:/ # RFC 3986
16
18
 
17
19
  class << self
18
20
  def allowed_element?(element_name)
@@ -139,23 +141,33 @@ module Loofah
139
141
  attr_node.value = values.join(" ")
140
142
  end
141
143
 
144
+ # Returns true if the given URI string is safe, false otherwise.
145
+ # This method can be used to validate URI attribute values without
146
+ # requiring a Nokogiri DOM node.
147
+ def allowed_uri?(uri_string)
148
+ # this logic lifted nearly verbatim from HTML5 sanitization
149
+ val_unescaped = CGI.unescapeHTML(uri_string.gsub(CONTROL_CHARACTERS, "")).gsub("&colon;", ":").downcase
150
+ if URI_PROTOCOL_REGEX.match?(val_unescaped)
151
+ protocol = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0]
152
+ return false unless SafeList::ALLOWED_PROTOCOLS.include?(protocol)
153
+
154
+ if protocol == "data"
155
+ # permit only allowed data mediatypes
156
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
157
+ mediatype, _ = mediatype.split(/[;,]/)[0..1] if mediatype
158
+ return false if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
159
+ end
160
+ end
161
+ true
162
+ end
163
+
142
164
  def scrub_uri_attribute(attr_node)
143
- # this block lifted nearly verbatim from HTML5 sanitization
144
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
146
- !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
165
+ if allowed_uri?(attr_node.value)
166
+ false
167
+ else
147
168
  attr_node.remove
148
- return true
149
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
150
- # permit only allowed data mediatypes
151
- mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
152
- mediatype, _ = mediatype.split(";")[0..1] if mediatype
153
- if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
154
- attr_node.remove
155
- return true
156
- end
169
+ true
157
170
  end
158
- false
159
171
  end
160
172
 
161
173
  #
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Loofah
4
4
  # The version of Loofah you are using
5
- VERSION = "2.24.0"
5
+ VERSION = "2.25.0"
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.24.0
4
+ version: 2.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
8
8
  - Bryan Helmkamp
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-01 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: crass
@@ -96,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  - !ruby/object:Gem::Version
97
97
  version: '0'
98
98
  requirements: []
99
- rubygems_version: 3.6.2
99
+ rubygems_version: 3.6.9
100
100
  specification_version: 4
101
101
  summary: Loofah is a general library for manipulating and transforming HTML/XML documents
102
102
  and fragments, built on top of Nokogiri.