loofah 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/CHANGELOG.md +336 -0
- data/Gemfile +22 -0
- data/MIT-LICENSE.txt +23 -0
- data/Manifest.txt +41 -0
- data/README.md +363 -0
- data/Rakefile +81 -0
- data/SECURITY.md +18 -0
- data/benchmark/benchmark.rb +149 -0
- data/benchmark/fragment.html +96 -0
- data/benchmark/helper.rb +73 -0
- data/benchmark/www.slashdot.com.html +2560 -0
- data/lib/loofah.rb +83 -0
- data/lib/loofah/elements.rb +92 -0
- data/lib/loofah/helpers.rb +103 -0
- data/lib/loofah/html/document.rb +18 -0
- data/lib/loofah/html/document_fragment.rb +40 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +26 -0
- data/lib/loofah/html5/safelist.rb +796 -0
- data/lib/loofah/html5/scrub.rb +133 -0
- data/lib/loofah/instance_methods.rb +127 -0
- data/lib/loofah/metahelpers.rb +13 -0
- data/lib/loofah/scrubber.rb +133 -0
- data/lib/loofah/scrubbers.rb +297 -0
- data/lib/loofah/xml/document.rb +13 -0
- data/lib/loofah/xml/document_fragment.rb +23 -0
- data/test/assets/msword.html +63 -0
- data/test/assets/testdata_sanitizer_tests1.dat +502 -0
- data/test/helper.rb +18 -0
- data/test/html5/test_sanitizer.rb +401 -0
- data/test/html5/test_scrub.rb +10 -0
- data/test/integration/test_ad_hoc.rb +220 -0
- data/test/integration/test_helpers.rb +43 -0
- data/test/integration/test_html.rb +72 -0
- data/test/integration/test_scrubbers.rb +400 -0
- data/test/integration/test_xml.rb +55 -0
- data/test/unit/test_api.rb +142 -0
- data/test/unit/test_encoding.rb +20 -0
- data/test/unit/test_helpers.rb +62 -0
- data/test/unit/test_scrubber.rb +229 -0
- data/test/unit/test_scrubbers.rb +14 -0
- metadata +287 -0
data/lib/loofah.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require "nokogiri"
|
4
|
+
|
5
|
+
require "loofah/metahelpers"
|
6
|
+
require "loofah/elements"
|
7
|
+
|
8
|
+
require "loofah/html5/safelist"
|
9
|
+
require "loofah/html5/libxml2_workarounds"
|
10
|
+
require "loofah/html5/scrub"
|
11
|
+
|
12
|
+
require "loofah/scrubber"
|
13
|
+
require "loofah/scrubbers"
|
14
|
+
|
15
|
+
require "loofah/instance_methods"
|
16
|
+
require "loofah/xml/document"
|
17
|
+
require "loofah/xml/document_fragment"
|
18
|
+
require "loofah/html/document"
|
19
|
+
require "loofah/html/document_fragment"
|
20
|
+
|
21
|
+
# == Strings and IO Objects as Input
|
22
|
+
#
|
23
|
+
# Loofah.document and Loofah.fragment accept any IO object in addition
|
24
|
+
# to accepting a string. That IO object could be a file, or a socket,
|
25
|
+
# or a StringIO, or anything that responds to +read+ and
|
26
|
+
# +close+. Which makes it particularly easy to sanitize mass
|
27
|
+
# quantities of docs.
|
28
|
+
#
|
29
|
+
module Loofah
|
30
|
+
# The version of Loofah you are using
|
31
|
+
VERSION = "2.3.1"
|
32
|
+
|
33
|
+
class << self
|
34
|
+
# Shortcut for Loofah::HTML::Document.parse
|
35
|
+
# This method accepts the same parameters as Nokogiri::HTML::Document.parse
|
36
|
+
def document(*args, &block)
|
37
|
+
Loofah::HTML::Document.parse(*args, &block)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Shortcut for Loofah::HTML::DocumentFragment.parse
|
41
|
+
# This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
|
42
|
+
def fragment(*args, &block)
|
43
|
+
Loofah::HTML::DocumentFragment.parse(*args, &block)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Shortcut for Loofah.fragment(string_or_io).scrub!(method)
|
47
|
+
def scrub_fragment(string_or_io, method)
|
48
|
+
Loofah.fragment(string_or_io).scrub!(method)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Shortcut for Loofah.document(string_or_io).scrub!(method)
|
52
|
+
def scrub_document(string_or_io, method)
|
53
|
+
Loofah.document(string_or_io).scrub!(method)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Shortcut for Loofah::XML::Document.parse
|
57
|
+
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
58
|
+
def xml_document(*args, &block)
|
59
|
+
Loofah::XML::Document.parse(*args, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse
|
63
|
+
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
64
|
+
def xml_fragment(*args, &block)
|
65
|
+
Loofah::XML::DocumentFragment.parse(*args, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Shortcut for Loofah.xml_fragment(string_or_io).scrub!(method)
|
69
|
+
def scrub_xml_fragment(string_or_io, method)
|
70
|
+
Loofah.xml_fragment(string_or_io).scrub!(method)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Shortcut for Loofah.xml_document(string_or_io).scrub!(method)
|
74
|
+
def scrub_xml_document(string_or_io, method)
|
75
|
+
Loofah.xml_document(string_or_io).scrub!(method)
|
76
|
+
end
|
77
|
+
|
78
|
+
# A helper to remove extraneous whitespace from text-ified HTML
|
79
|
+
def remove_extraneous_whitespace(string)
|
80
|
+
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module Elements
|
5
|
+
STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
|
6
|
+
address
|
7
|
+
blockquote
|
8
|
+
center
|
9
|
+
dir
|
10
|
+
div
|
11
|
+
dl
|
12
|
+
fieldset
|
13
|
+
form
|
14
|
+
h1
|
15
|
+
h2
|
16
|
+
h3
|
17
|
+
h4
|
18
|
+
h5
|
19
|
+
h6
|
20
|
+
hr
|
21
|
+
isindex
|
22
|
+
menu
|
23
|
+
noframes
|
24
|
+
noscript
|
25
|
+
ol
|
26
|
+
p
|
27
|
+
pre
|
28
|
+
table
|
29
|
+
ul
|
30
|
+
]
|
31
|
+
|
32
|
+
# https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
33
|
+
STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
|
34
|
+
address
|
35
|
+
article
|
36
|
+
aside
|
37
|
+
blockquote
|
38
|
+
canvas
|
39
|
+
dd
|
40
|
+
div
|
41
|
+
dl
|
42
|
+
dt
|
43
|
+
fieldset
|
44
|
+
figcaption
|
45
|
+
figure
|
46
|
+
footer
|
47
|
+
form
|
48
|
+
h1
|
49
|
+
h2
|
50
|
+
h3
|
51
|
+
h4
|
52
|
+
h5
|
53
|
+
h6
|
54
|
+
header
|
55
|
+
hgroup
|
56
|
+
hr
|
57
|
+
li
|
58
|
+
main
|
59
|
+
nav
|
60
|
+
noscript
|
61
|
+
ol
|
62
|
+
output
|
63
|
+
p
|
64
|
+
pre
|
65
|
+
section
|
66
|
+
table
|
67
|
+
tfoot
|
68
|
+
ul
|
69
|
+
video
|
70
|
+
]
|
71
|
+
|
72
|
+
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
73
|
+
|
74
|
+
# The following elements may also be considered block-level
|
75
|
+
# elements since they may contain block-level elements
|
76
|
+
LOOSE_BLOCK_LEVEL = Set.new %w[dd
|
77
|
+
dt
|
78
|
+
frameset
|
79
|
+
li
|
80
|
+
tbody
|
81
|
+
td
|
82
|
+
tfoot
|
83
|
+
th
|
84
|
+
thead
|
85
|
+
tr
|
86
|
+
]
|
87
|
+
|
88
|
+
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
89
|
+
end
|
90
|
+
|
91
|
+
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
|
92
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Loofah
|
2
|
+
module Helpers
|
3
|
+
class << self
|
4
|
+
#
|
5
|
+
# A replacement for Rails's built-in +strip_tags+ helper.
|
6
|
+
#
|
7
|
+
# Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
|
8
|
+
#
|
9
|
+
def strip_tags(string_or_io)
|
10
|
+
Loofah.fragment(string_or_io).text
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# A replacement for Rails's built-in +sanitize+ helper.
|
15
|
+
#
|
16
|
+
# Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
|
17
|
+
#
|
18
|
+
def sanitize(string_or_io)
|
19
|
+
loofah_fragment = Loofah.fragment(string_or_io)
|
20
|
+
loofah_fragment.scrub!(:strip)
|
21
|
+
loofah_fragment.xpath("./form").each { |form| form.remove }
|
22
|
+
loofah_fragment.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# A replacement for Rails's built-in +sanitize_css+ helper.
|
27
|
+
#
|
28
|
+
# Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
|
29
|
+
#
|
30
|
+
def sanitize_css style_string
|
31
|
+
::Loofah::HTML5::Scrub.scrub_css style_string
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# A helper to remove extraneous whitespace from text-ified HTML
|
36
|
+
# TODO: remove this in a future major-point-release.
|
37
|
+
#
|
38
|
+
def remove_extraneous_whitespace(string)
|
39
|
+
Loofah.remove_extraneous_whitespace string
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module ActionView
|
44
|
+
module ClassMethods # :nodoc:
|
45
|
+
def full_sanitizer
|
46
|
+
@full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
|
47
|
+
end
|
48
|
+
|
49
|
+
def safe_list_sanitizer
|
50
|
+
@safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
|
51
|
+
end
|
52
|
+
|
53
|
+
def white_list_sanitizer
|
54
|
+
warn "warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead."
|
55
|
+
safe_list_sanitizer
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Replacement class for Rails's HTML::FullSanitizer.
|
61
|
+
#
|
62
|
+
# To use by default, call this in an application initializer:
|
63
|
+
#
|
64
|
+
# ActionView::Helpers::SanitizeHelper.full_sanitizer = ::Loofah::Helpers::ActionView::FullSanitizer.new
|
65
|
+
#
|
66
|
+
# Or, to generally opt-in to Loofah's view sanitizers:
|
67
|
+
#
|
68
|
+
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
69
|
+
#
|
70
|
+
class FullSanitizer
|
71
|
+
def sanitize html, *args
|
72
|
+
Loofah::Helpers.strip_tags html
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Replacement class for Rails's HTML::WhiteListSanitizer.
|
78
|
+
#
|
79
|
+
# To use by default, call this in an application initializer:
|
80
|
+
#
|
81
|
+
# ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = ::Loofah::Helpers::ActionView::SafeListSanitizer.new
|
82
|
+
#
|
83
|
+
# Or, to generally opt-in to Loofah's view sanitizers:
|
84
|
+
#
|
85
|
+
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
86
|
+
#
|
87
|
+
class SafeListSanitizer
|
88
|
+
def sanitize html, *args
|
89
|
+
Loofah::Helpers.sanitize html
|
90
|
+
end
|
91
|
+
|
92
|
+
def sanitize_css style_string, *args
|
93
|
+
Loofah::Helpers.sanitize_css style_string
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
WhiteListSanitizer = SafeListSanitizer
|
98
|
+
if Object.respond_to?(:deprecate_constant)
|
99
|
+
deprecate_constant :WhiteListSanitizer
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Loofah
|
2
|
+
module HTML # :nodoc:
|
3
|
+
#
|
4
|
+
# Subclass of Nokogiri::HTML::Document.
|
5
|
+
#
|
6
|
+
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
7
|
+
#
|
8
|
+
class Document < Nokogiri::HTML::Document
|
9
|
+
include Loofah::ScrubBehavior::Node
|
10
|
+
include Loofah::DocumentDecorator
|
11
|
+
include Loofah::TextBehavior
|
12
|
+
|
13
|
+
def serialize_root
|
14
|
+
at_xpath("/html/body")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Loofah
|
2
|
+
module HTML # :nodoc:
|
3
|
+
#
|
4
|
+
# Subclass of Nokogiri::HTML::DocumentFragment.
|
5
|
+
#
|
6
|
+
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
7
|
+
#
|
8
|
+
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
9
|
+
include Loofah::TextBehavior
|
10
|
+
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Overridden Nokogiri::HTML::DocumentFragment
|
14
|
+
# constructor. Applications should use Loofah.fragment to
|
15
|
+
# parse a fragment.
|
16
|
+
#
|
17
|
+
def parse tags, encoding = nil
|
18
|
+
doc = Loofah::HTML::Document.new
|
19
|
+
|
20
|
+
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
|
21
|
+
doc.encoding = encoding
|
22
|
+
|
23
|
+
new(doc, tags)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Returns the HTML markup contained by the fragment
|
29
|
+
#
|
30
|
+
def to_s
|
31
|
+
serialize_root.children.to_s
|
32
|
+
end
|
33
|
+
alias :serialize :to_s
|
34
|
+
|
35
|
+
def serialize_root
|
36
|
+
at_xpath("./body") || self
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
module Loofah
|
5
|
+
#
|
6
|
+
# constants related to working around unhelpful libxml2 behavior
|
7
|
+
#
|
8
|
+
# ಠ_ಠ
|
9
|
+
#
|
10
|
+
module LibxmlWorkarounds
|
11
|
+
#
|
12
|
+
# these attributes and qualifying parent tags are determined by the code at:
|
13
|
+
#
|
14
|
+
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
|
15
|
+
#
|
16
|
+
# see comments about CVE-2018-8048 within the tests for more information
|
17
|
+
#
|
18
|
+
BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
|
19
|
+
href
|
20
|
+
action
|
21
|
+
src
|
22
|
+
name
|
23
|
+
]
|
24
|
+
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,796 @@
|
|
1
|
+
require "set"
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module HTML5 # :nodoc:
|
5
|
+
#
|
6
|
+
# HTML safelist lifted from HTML5lib sanitizer code:
|
7
|
+
#
|
8
|
+
# http://code.google.com/p/html5lib/
|
9
|
+
#
|
10
|
+
# <html5_license>
|
11
|
+
#
|
12
|
+
# Copyright (c) 2006-2008 The Authors
|
13
|
+
#
|
14
|
+
# Contributors:
|
15
|
+
# James Graham - jg307@cam.ac.uk
|
16
|
+
# Anne van Kesteren - annevankesteren@gmail.com
|
17
|
+
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
18
|
+
# Matt McDonald - kanashii@kanashii.ca
|
19
|
+
# Sam Ruby - rubys@intertwingly.net
|
20
|
+
# Ian Hickson (Google) - ian@hixie.ch
|
21
|
+
# Thomas Broyer - t.broyer@ltgt.net
|
22
|
+
# Jacques Distler - distler@golem.ph.utexas.edu
|
23
|
+
# Henri Sivonen - hsivonen@iki.fi
|
24
|
+
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
25
|
+
#
|
26
|
+
# Permission is hereby granted, free of charge, to any person
|
27
|
+
# obtaining a copy of this software and associated documentation
|
28
|
+
# files (the "Software"), to deal in the Software without
|
29
|
+
# restriction, including without limitation the rights to use, copy,
|
30
|
+
# modify, merge, publish, distribute, sublicense, and/or sell copies
|
31
|
+
# of the Software, and to permit persons to whom the Software is
|
32
|
+
# furnished to do so, subject to the following conditions:
|
33
|
+
#
|
34
|
+
# The above copyright notice and this permission notice shall be
|
35
|
+
# included in all copies or substantial portions of the Software.
|
36
|
+
#
|
37
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
38
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
39
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
40
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
41
|
+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
42
|
+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
43
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
44
|
+
# DEALINGS IN THE SOFTWARE.
|
45
|
+
#
|
46
|
+
# </html5_license>
|
47
|
+
module SafeList
|
48
|
+
ACCEPTABLE_ELEMENTS = Set.new([
|
49
|
+
"a",
|
50
|
+
"abbr",
|
51
|
+
"acronym",
|
52
|
+
"address",
|
53
|
+
"area",
|
54
|
+
"article",
|
55
|
+
"aside",
|
56
|
+
"audio",
|
57
|
+
"b",
|
58
|
+
"bdi",
|
59
|
+
"bdo",
|
60
|
+
"big",
|
61
|
+
"blockquote",
|
62
|
+
"br",
|
63
|
+
"button",
|
64
|
+
"canvas",
|
65
|
+
"caption",
|
66
|
+
"center",
|
67
|
+
"cite",
|
68
|
+
"code",
|
69
|
+
"col",
|
70
|
+
"colgroup",
|
71
|
+
"command",
|
72
|
+
"datalist",
|
73
|
+
"dd",
|
74
|
+
"del",
|
75
|
+
"details",
|
76
|
+
"dfn",
|
77
|
+
"dir",
|
78
|
+
"div",
|
79
|
+
"dl",
|
80
|
+
"dt",
|
81
|
+
"em",
|
82
|
+
"fieldset",
|
83
|
+
"figcaption",
|
84
|
+
"figure",
|
85
|
+
"font",
|
86
|
+
"footer",
|
87
|
+
"form",
|
88
|
+
"h1",
|
89
|
+
"h2",
|
90
|
+
"h3",
|
91
|
+
"h4",
|
92
|
+
"h5",
|
93
|
+
"h6",
|
94
|
+
"header",
|
95
|
+
"hr",
|
96
|
+
"i",
|
97
|
+
"img",
|
98
|
+
"input",
|
99
|
+
"ins",
|
100
|
+
"kbd",
|
101
|
+
"label",
|
102
|
+
"legend",
|
103
|
+
"li",
|
104
|
+
"main",
|
105
|
+
"map",
|
106
|
+
"mark",
|
107
|
+
"menu",
|
108
|
+
"meter",
|
109
|
+
"nav",
|
110
|
+
"ol",
|
111
|
+
"optgroup",
|
112
|
+
"option",
|
113
|
+
"output",
|
114
|
+
"p",
|
115
|
+
"pre",
|
116
|
+
"q",
|
117
|
+
"s",
|
118
|
+
"samp",
|
119
|
+
"section",
|
120
|
+
"select",
|
121
|
+
"small",
|
122
|
+
"span",
|
123
|
+
"strike",
|
124
|
+
"strong",
|
125
|
+
"sub",
|
126
|
+
"summary",
|
127
|
+
"sup",
|
128
|
+
"table",
|
129
|
+
"tbody",
|
130
|
+
"td",
|
131
|
+
"textarea",
|
132
|
+
"tfoot",
|
133
|
+
"th",
|
134
|
+
"thead",
|
135
|
+
"time",
|
136
|
+
"tr",
|
137
|
+
"tt",
|
138
|
+
"u",
|
139
|
+
"ul",
|
140
|
+
"var",
|
141
|
+
"video",
|
142
|
+
])
|
143
|
+
|
144
|
+
MATHML_ELEMENTS = Set.new([
|
145
|
+
"annotation",
|
146
|
+
"annotation-xml",
|
147
|
+
"maction",
|
148
|
+
"math",
|
149
|
+
"merror",
|
150
|
+
"mfenced",
|
151
|
+
"mfrac",
|
152
|
+
"mi",
|
153
|
+
"mmultiscripts",
|
154
|
+
"mn",
|
155
|
+
"mo",
|
156
|
+
"mover",
|
157
|
+
"mpadded",
|
158
|
+
"mphantom",
|
159
|
+
"mprescripts",
|
160
|
+
"mroot",
|
161
|
+
"mrow",
|
162
|
+
"mspace",
|
163
|
+
"msqrt",
|
164
|
+
"mstyle",
|
165
|
+
"msub",
|
166
|
+
"msubsup",
|
167
|
+
"msup",
|
168
|
+
"mtable",
|
169
|
+
"mtd",
|
170
|
+
"mtext",
|
171
|
+
"mtr",
|
172
|
+
"munder",
|
173
|
+
"munderover",
|
174
|
+
"none",
|
175
|
+
"semantics",
|
176
|
+
])
|
177
|
+
|
178
|
+
SVG_ELEMENTS = Set.new([
|
179
|
+
"a",
|
180
|
+
"animate",
|
181
|
+
"animateColor",
|
182
|
+
"animateMotion",
|
183
|
+
"animateTransform",
|
184
|
+
"circle",
|
185
|
+
"clipPath",
|
186
|
+
"defs",
|
187
|
+
"desc",
|
188
|
+
"ellipse",
|
189
|
+
"feGaussianBlur",
|
190
|
+
"filter",
|
191
|
+
"font-face",
|
192
|
+
"font-face-name",
|
193
|
+
"font-face-src",
|
194
|
+
"foreignObject",
|
195
|
+
"g",
|
196
|
+
"glyph",
|
197
|
+
"hkern",
|
198
|
+
"line",
|
199
|
+
"linearGradient",
|
200
|
+
"marker",
|
201
|
+
"mask",
|
202
|
+
"metadata",
|
203
|
+
"missing-glyph",
|
204
|
+
"mpath",
|
205
|
+
"path",
|
206
|
+
"polygon",
|
207
|
+
"polyline",
|
208
|
+
"radialGradient",
|
209
|
+
"rect",
|
210
|
+
"set",
|
211
|
+
"stop",
|
212
|
+
"svg",
|
213
|
+
"switch",
|
214
|
+
"symbol",
|
215
|
+
"text",
|
216
|
+
"textPath",
|
217
|
+
"title",
|
218
|
+
"tspan",
|
219
|
+
"use",
|
220
|
+
])
|
221
|
+
|
222
|
+
ACCEPTABLE_ATTRIBUTES = Set.new([
|
223
|
+
"abbr",
|
224
|
+
"accept",
|
225
|
+
"accept-charset",
|
226
|
+
"accesskey",
|
227
|
+
"action",
|
228
|
+
"align",
|
229
|
+
"alt",
|
230
|
+
"axis",
|
231
|
+
"border",
|
232
|
+
"cellpadding",
|
233
|
+
"cellspacing",
|
234
|
+
"char",
|
235
|
+
"charoff",
|
236
|
+
"charset",
|
237
|
+
"checked",
|
238
|
+
"cite",
|
239
|
+
"class",
|
240
|
+
"clear",
|
241
|
+
"color",
|
242
|
+
"cols",
|
243
|
+
"colspan",
|
244
|
+
"compact",
|
245
|
+
"contenteditable",
|
246
|
+
"coords",
|
247
|
+
"datetime",
|
248
|
+
"dir",
|
249
|
+
"disabled",
|
250
|
+
"enctype",
|
251
|
+
"for",
|
252
|
+
"frame",
|
253
|
+
"headers",
|
254
|
+
"height",
|
255
|
+
"href",
|
256
|
+
"hreflang",
|
257
|
+
"hspace",
|
258
|
+
"id",
|
259
|
+
"ismap",
|
260
|
+
"label",
|
261
|
+
"lang",
|
262
|
+
"longdesc",
|
263
|
+
"loop",
|
264
|
+
"loopcount",
|
265
|
+
"loopend",
|
266
|
+
"loopstart",
|
267
|
+
"maxlength",
|
268
|
+
"media",
|
269
|
+
"method",
|
270
|
+
"multiple",
|
271
|
+
"name",
|
272
|
+
"nohref",
|
273
|
+
"noshade",
|
274
|
+
"nowrap",
|
275
|
+
"poster",
|
276
|
+
"preload",
|
277
|
+
"prompt",
|
278
|
+
"readonly",
|
279
|
+
"rel",
|
280
|
+
"rev",
|
281
|
+
"rows",
|
282
|
+
"rowspan",
|
283
|
+
"rules",
|
284
|
+
"scope",
|
285
|
+
"selected",
|
286
|
+
"shape",
|
287
|
+
"size",
|
288
|
+
"span",
|
289
|
+
"src",
|
290
|
+
"start",
|
291
|
+
"style",
|
292
|
+
"summary",
|
293
|
+
"tabindex",
|
294
|
+
"target",
|
295
|
+
"title",
|
296
|
+
"type",
|
297
|
+
"usemap",
|
298
|
+
"valign",
|
299
|
+
"value",
|
300
|
+
"vspace",
|
301
|
+
"width",
|
302
|
+
"xml:lang",
|
303
|
+
])
|
304
|
+
|
305
|
+
MATHML_ATTRIBUTES = Set.new([
|
306
|
+
"actiontype",
|
307
|
+
"align",
|
308
|
+
"close",
|
309
|
+
"columnalign",
|
310
|
+
"columnlines",
|
311
|
+
"columnspacing",
|
312
|
+
"columnspan",
|
313
|
+
"depth",
|
314
|
+
"display",
|
315
|
+
"displaystyle",
|
316
|
+
"encoding",
|
317
|
+
"equalcolumns",
|
318
|
+
"equalrows",
|
319
|
+
"fence",
|
320
|
+
"fontstyle",
|
321
|
+
"fontweight",
|
322
|
+
"frame",
|
323
|
+
"height",
|
324
|
+
"linethickness",
|
325
|
+
"lspace",
|
326
|
+
"mathbackground",
|
327
|
+
"mathcolor",
|
328
|
+
"mathvariant",
|
329
|
+
"maxsize",
|
330
|
+
"minsize",
|
331
|
+
"open",
|
332
|
+
"other",
|
333
|
+
"rowalign",
|
334
|
+
"rowlines",
|
335
|
+
"rowspacing",
|
336
|
+
"rowspan",
|
337
|
+
"rspace",
|
338
|
+
"scriptlevel",
|
339
|
+
"selection",
|
340
|
+
"separator",
|
341
|
+
"separators",
|
342
|
+
"stretchy",
|
343
|
+
"width",
|
344
|
+
"xlink:href",
|
345
|
+
"xlink:show",
|
346
|
+
"xlink:type",
|
347
|
+
"xmlns",
|
348
|
+
"xmlns:xlink",
|
349
|
+
])
|
350
|
+
|
351
|
+
SVG_ATTRIBUTES = Set.new([
|
352
|
+
"accent-height",
|
353
|
+
"accumulate",
|
354
|
+
"additive",
|
355
|
+
"alphabetic",
|
356
|
+
"arabic-form",
|
357
|
+
"ascent",
|
358
|
+
"attributeName",
|
359
|
+
"attributeType",
|
360
|
+
"baseProfile",
|
361
|
+
"bbox",
|
362
|
+
"begin",
|
363
|
+
"calcMode",
|
364
|
+
"cap-height",
|
365
|
+
"class",
|
366
|
+
"clip-path",
|
367
|
+
"clip-rule",
|
368
|
+
"color",
|
369
|
+
"color-interpolation-filters",
|
370
|
+
"color-rendering",
|
371
|
+
"content",
|
372
|
+
"cx",
|
373
|
+
"cy",
|
374
|
+
"d",
|
375
|
+
"descent",
|
376
|
+
"display",
|
377
|
+
"dur",
|
378
|
+
"dx",
|
379
|
+
"dy",
|
380
|
+
"end",
|
381
|
+
"fill",
|
382
|
+
"fill-opacity",
|
383
|
+
"fill-rule",
|
384
|
+
"filterRes",
|
385
|
+
"filterUnits",
|
386
|
+
"font-family",
|
387
|
+
"font-size",
|
388
|
+
"font-stretch",
|
389
|
+
"font-style",
|
390
|
+
"font-variant",
|
391
|
+
"font-weight",
|
392
|
+
"fx",
|
393
|
+
"fy",
|
394
|
+
"g1",
|
395
|
+
"g2",
|
396
|
+
"glyph-name",
|
397
|
+
"gradientUnits",
|
398
|
+
"hanging",
|
399
|
+
"height",
|
400
|
+
"horiz-adv-x",
|
401
|
+
"horiz-origin-x",
|
402
|
+
"id",
|
403
|
+
"ideographic",
|
404
|
+
"k",
|
405
|
+
"keyPoints",
|
406
|
+
"keySplines",
|
407
|
+
"keyTimes",
|
408
|
+
"lang",
|
409
|
+
"marker-end",
|
410
|
+
"marker-mid",
|
411
|
+
"marker-start",
|
412
|
+
"markerHeight",
|
413
|
+
"markerUnits",
|
414
|
+
"markerWidth",
|
415
|
+
"maskContentUnits",
|
416
|
+
"maskUnits",
|
417
|
+
"mathematical",
|
418
|
+
"max",
|
419
|
+
"method",
|
420
|
+
"min",
|
421
|
+
"name",
|
422
|
+
"offset",
|
423
|
+
"opacity",
|
424
|
+
"orient",
|
425
|
+
"origin",
|
426
|
+
"overline-position",
|
427
|
+
"overline-thickness",
|
428
|
+
"panose-1",
|
429
|
+
"path",
|
430
|
+
"pathLength",
|
431
|
+
"patternContentUnits",
|
432
|
+
"patternTransform",
|
433
|
+
"patternUnits",
|
434
|
+
"points",
|
435
|
+
"preserveAspectRatio",
|
436
|
+
"primitiveUnits",
|
437
|
+
"r",
|
438
|
+
"refX",
|
439
|
+
"refY",
|
440
|
+
"repeatCount",
|
441
|
+
"repeatDur",
|
442
|
+
"requiredExtensions",
|
443
|
+
"requiredFeatures",
|
444
|
+
"restart",
|
445
|
+
"rotate",
|
446
|
+
"rx",
|
447
|
+
"ry",
|
448
|
+
"slope",
|
449
|
+
"spacing",
|
450
|
+
"startOffset",
|
451
|
+
"stdDeviation",
|
452
|
+
"stemh",
|
453
|
+
"stemv",
|
454
|
+
"stop-color",
|
455
|
+
"stop-opacity",
|
456
|
+
"strikethrough-position",
|
457
|
+
"strikethrough-thickness",
|
458
|
+
"stroke",
|
459
|
+
"stroke-dasharray",
|
460
|
+
"stroke-dashoffset",
|
461
|
+
"stroke-linecap",
|
462
|
+
"stroke-linejoin",
|
463
|
+
"stroke-miterlimit",
|
464
|
+
"stroke-opacity",
|
465
|
+
"stroke-width",
|
466
|
+
"systemLanguage",
|
467
|
+
"target",
|
468
|
+
"text-anchor",
|
469
|
+
"transform",
|
470
|
+
"type",
|
471
|
+
"u1",
|
472
|
+
"u2",
|
473
|
+
"underline-position",
|
474
|
+
"underline-thickness",
|
475
|
+
"unicode",
|
476
|
+
"unicode-range",
|
477
|
+
"units-per-em",
|
478
|
+
"version",
|
479
|
+
"viewBox",
|
480
|
+
"visibility",
|
481
|
+
"width",
|
482
|
+
"widths",
|
483
|
+
"x",
|
484
|
+
"x-height",
|
485
|
+
"x1",
|
486
|
+
"x2",
|
487
|
+
"xlink:actuate",
|
488
|
+
"xlink:arcrole",
|
489
|
+
"xlink:href",
|
490
|
+
"xlink:role",
|
491
|
+
"xlink:show",
|
492
|
+
"xlink:title",
|
493
|
+
"xlink:type",
|
494
|
+
"xml:base",
|
495
|
+
"xml:lang",
|
496
|
+
"xml:space",
|
497
|
+
"xmlns",
|
498
|
+
"xmlns:xlink",
|
499
|
+
"y",
|
500
|
+
"y1",
|
501
|
+
"y2",
|
502
|
+
"zoomAndPan",
|
503
|
+
])
|
504
|
+
|
505
|
+
ATTR_VAL_IS_URI = Set.new([
|
506
|
+
"action",
|
507
|
+
"cite",
|
508
|
+
"href",
|
509
|
+
"longdesc",
|
510
|
+
"poster",
|
511
|
+
"preload",
|
512
|
+
"src",
|
513
|
+
"xlink:href",
|
514
|
+
"xml:base",
|
515
|
+
])
|
516
|
+
|
517
|
+
SVG_ATTR_VAL_ALLOWS_REF = Set.new([
|
518
|
+
"clip-path",
|
519
|
+
"color-profile",
|
520
|
+
"cursor",
|
521
|
+
"fill",
|
522
|
+
"filter",
|
523
|
+
"marker",
|
524
|
+
"marker-end",
|
525
|
+
"marker-mid",
|
526
|
+
"marker-start",
|
527
|
+
"mask",
|
528
|
+
"stroke",
|
529
|
+
])
|
530
|
+
|
531
|
+
SVG_ALLOW_LOCAL_HREF = Set.new([
|
532
|
+
"altGlyph",
|
533
|
+
"animate",
|
534
|
+
"animateColor",
|
535
|
+
"animateMotion",
|
536
|
+
"animateTransform",
|
537
|
+
"cursor",
|
538
|
+
"feImage",
|
539
|
+
"filter",
|
540
|
+
"linearGradient",
|
541
|
+
"pattern",
|
542
|
+
"radialGradient",
|
543
|
+
"set",
|
544
|
+
"textpath",
|
545
|
+
"tref",
|
546
|
+
"use",
|
547
|
+
])
|
548
|
+
|
549
|
+
ACCEPTABLE_CSS_PROPERTIES = Set.new([
|
550
|
+
"azimuth",
|
551
|
+
"background-color",
|
552
|
+
"border-bottom-color",
|
553
|
+
"border-collapse",
|
554
|
+
"border-color",
|
555
|
+
"border-left-color",
|
556
|
+
"border-right-color",
|
557
|
+
"border-top-color",
|
558
|
+
"clear",
|
559
|
+
"color",
|
560
|
+
"cursor",
|
561
|
+
"direction",
|
562
|
+
"display",
|
563
|
+
"elevation",
|
564
|
+
"float",
|
565
|
+
"font",
|
566
|
+
"font-family",
|
567
|
+
"font-size",
|
568
|
+
"font-style",
|
569
|
+
"font-variant",
|
570
|
+
"font-weight",
|
571
|
+
"height",
|
572
|
+
"letter-spacing",
|
573
|
+
"line-height",
|
574
|
+
"list-style",
|
575
|
+
"list-style-type",
|
576
|
+
"overflow",
|
577
|
+
"pause",
|
578
|
+
"pause-after",
|
579
|
+
"pause-before",
|
580
|
+
"pitch",
|
581
|
+
"pitch-range",
|
582
|
+
"richness",
|
583
|
+
"speak",
|
584
|
+
"speak-header",
|
585
|
+
"speak-numeral",
|
586
|
+
"speak-punctuation",
|
587
|
+
"speech-rate",
|
588
|
+
"stress",
|
589
|
+
"text-align",
|
590
|
+
"text-decoration",
|
591
|
+
"text-indent",
|
592
|
+
"unicode-bidi",
|
593
|
+
"vertical-align",
|
594
|
+
"voice-family",
|
595
|
+
"volume",
|
596
|
+
"white-space",
|
597
|
+
"width",
|
598
|
+
])
|
599
|
+
|
600
|
+
ACCEPTABLE_CSS_KEYWORDS = Set.new([
|
601
|
+
"!important",
|
602
|
+
"aqua",
|
603
|
+
"auto",
|
604
|
+
"black",
|
605
|
+
"block",
|
606
|
+
"blue",
|
607
|
+
"bold",
|
608
|
+
"both",
|
609
|
+
"bottom",
|
610
|
+
"brown",
|
611
|
+
"center",
|
612
|
+
"collapse",
|
613
|
+
"dashed",
|
614
|
+
"dotted",
|
615
|
+
"fuchsia",
|
616
|
+
"gray",
|
617
|
+
"green",
|
618
|
+
"italic",
|
619
|
+
"left",
|
620
|
+
"lime",
|
621
|
+
"maroon",
|
622
|
+
"medium",
|
623
|
+
"navy",
|
624
|
+
"none",
|
625
|
+
"normal",
|
626
|
+
"nowrap",
|
627
|
+
"olive",
|
628
|
+
"pointer",
|
629
|
+
"purple",
|
630
|
+
"red",
|
631
|
+
"right",
|
632
|
+
"silver",
|
633
|
+
"solid",
|
634
|
+
"teal",
|
635
|
+
"thin",
|
636
|
+
"thick",
|
637
|
+
"top",
|
638
|
+
"transparent",
|
639
|
+
"underline",
|
640
|
+
"white",
|
641
|
+
"yellow",
|
642
|
+
])
|
643
|
+
|
644
|
+
# see https://www.quackit.com/css/functions/
|
645
|
+
# omit `url` and `image` from that list
|
646
|
+
ACCEPTABLE_CSS_FUNCTIONS = Set.new([
|
647
|
+
"attr",
|
648
|
+
"blur",
|
649
|
+
"brightness",
|
650
|
+
"calc",
|
651
|
+
"circle",
|
652
|
+
"contrast",
|
653
|
+
"counter",
|
654
|
+
"counters",
|
655
|
+
"cubic-bezier",
|
656
|
+
"drop-shadow",
|
657
|
+
"ellipse",
|
658
|
+
"grayscale",
|
659
|
+
"hsl",
|
660
|
+
"hsla",
|
661
|
+
"hue-rotate",
|
662
|
+
"hwb",
|
663
|
+
"inset",
|
664
|
+
"invert",
|
665
|
+
"linear-gradient",
|
666
|
+
"matrix",
|
667
|
+
"matrix3d",
|
668
|
+
"opacity",
|
669
|
+
"perspective",
|
670
|
+
"polygon",
|
671
|
+
"radial-gradient",
|
672
|
+
"repeating-linear-gradient",
|
673
|
+
"repeating-radial-gradient",
|
674
|
+
"rgb",
|
675
|
+
"rgba",
|
676
|
+
"rotate",
|
677
|
+
"rotate3d",
|
678
|
+
"rotateX",
|
679
|
+
"rotateY",
|
680
|
+
"rotateZ",
|
681
|
+
"saturate",
|
682
|
+
"sepia",
|
683
|
+
"scale",
|
684
|
+
"scale3d",
|
685
|
+
"scaleX",
|
686
|
+
"scaleY",
|
687
|
+
"scaleZ",
|
688
|
+
"skew",
|
689
|
+
"skewX",
|
690
|
+
"skewY",
|
691
|
+
"symbols",
|
692
|
+
"translate",
|
693
|
+
"translate3d",
|
694
|
+
"translateX",
|
695
|
+
"translateY",
|
696
|
+
"translateZ",
|
697
|
+
])
|
698
|
+
|
699
|
+
SHORTHAND_CSS_PROPERTIES = Set.new([
|
700
|
+
"background",
|
701
|
+
"border",
|
702
|
+
"margin",
|
703
|
+
"padding",
|
704
|
+
])
|
705
|
+
|
706
|
+
ACCEPTABLE_SVG_PROPERTIES = Set.new([
|
707
|
+
"fill",
|
708
|
+
"fill-opacity",
|
709
|
+
"fill-rule",
|
710
|
+
"stroke",
|
711
|
+
"stroke-width",
|
712
|
+
"stroke-linecap",
|
713
|
+
"stroke-linejoin",
|
714
|
+
"stroke-opacity",
|
715
|
+
])
|
716
|
+
|
717
|
+
PROTOCOL_SEPARATOR = /:|(�*58)|(p)|(�*3a)|(%|%)3A/i
|
718
|
+
|
719
|
+
ACCEPTABLE_PROTOCOLS = Set.new([
|
720
|
+
"afs",
|
721
|
+
"aim",
|
722
|
+
"callto",
|
723
|
+
"data",
|
724
|
+
"ed2k",
|
725
|
+
"feed",
|
726
|
+
"ftp",
|
727
|
+
"gopher",
|
728
|
+
"http",
|
729
|
+
"https",
|
730
|
+
"irc",
|
731
|
+
"line",
|
732
|
+
"mailto",
|
733
|
+
"news",
|
734
|
+
"nntp",
|
735
|
+
"rsync",
|
736
|
+
"rtsp",
|
737
|
+
"sftp",
|
738
|
+
"ssh",
|
739
|
+
"tag",
|
740
|
+
"tel",
|
741
|
+
"telnet",
|
742
|
+
"urn",
|
743
|
+
"webcal",
|
744
|
+
"xmpp",
|
745
|
+
])
|
746
|
+
|
747
|
+
ACCEPTABLE_URI_DATA_MEDIATYPES = Set.new([
|
748
|
+
"image/gif",
|
749
|
+
"image/jpeg",
|
750
|
+
"image/png",
|
751
|
+
"image/svg+xml",
|
752
|
+
"text/css",
|
753
|
+
"text/plain",
|
754
|
+
])
|
755
|
+
|
756
|
+
# subclasses may define their own versions of these constants
|
757
|
+
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
758
|
+
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
759
|
+
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
760
|
+
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
761
|
+
ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
|
762
|
+
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
763
|
+
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
764
|
+
ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
|
765
|
+
|
766
|
+
VOID_ELEMENTS = Set.new([
|
767
|
+
"area",
|
768
|
+
"base",
|
769
|
+
"br",
|
770
|
+
"col",
|
771
|
+
"embed",
|
772
|
+
"hr",
|
773
|
+
"img",
|
774
|
+
"input",
|
775
|
+
"link",
|
776
|
+
"meta",
|
777
|
+
"param",
|
778
|
+
])
|
779
|
+
|
780
|
+
# additional tags we should consider safe since we have libxml2 fixing up our documents.
|
781
|
+
TAGS_SAFE_WITH_LIBXML2 = Set.new([
|
782
|
+
"body",
|
783
|
+
"head",
|
784
|
+
"html",
|
785
|
+
])
|
786
|
+
ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
|
787
|
+
end
|
788
|
+
|
789
|
+
WhiteList = SafeList
|
790
|
+
if Object.respond_to?(:deprecate_constant)
|
791
|
+
deprecate_constant :WhiteList
|
792
|
+
end
|
793
|
+
|
794
|
+
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::HTML5::SafeList
|
795
|
+
end
|
796
|
+
end
|