loofah 2.2.3 → 2.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +269 -31
- data/README.md +109 -124
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +85 -79
- data/lib/loofah/helpers.rb +37 -20
- data/lib/loofah/{html → html4}/document.rb +6 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
- data/lib/loofah/html5/safelist.rb +1055 -0
- data/lib/loofah/html5/scrub.rb +153 -58
- data/lib/loofah/metahelpers.rb +11 -6
- data/lib/loofah/scrubber.rb +22 -15
- data/lib/loofah/scrubbers.rb +66 -55
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +4 -7
- data/lib/loofah.rb +131 -38
- metadata +28 -216
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html/document_fragment.rb +0 -40
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/lib/loofah/instance_methods.rb +0 -127
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/lib/loofah/elements.rb
CHANGED
@@ -1,92 +1,98 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
2
4
|
|
3
5
|
module Loofah
|
4
6
|
module Elements
|
5
|
-
STRICT_BLOCK_LEVEL_HTML4 = Set.new
|
6
|
-
address
|
7
|
-
blockquote
|
8
|
-
center
|
9
|
-
dir
|
10
|
-
div
|
11
|
-
dl
|
12
|
-
fieldset
|
13
|
-
form
|
14
|
-
h1
|
15
|
-
h2
|
16
|
-
h3
|
17
|
-
h4
|
18
|
-
h5
|
19
|
-
h6
|
20
|
-
hr
|
21
|
-
isindex
|
22
|
-
menu
|
23
|
-
noframes
|
24
|
-
noscript
|
25
|
-
ol
|
26
|
-
p
|
27
|
-
pre
|
28
|
-
table
|
29
|
-
ul
|
30
|
-
]
|
7
|
+
STRICT_BLOCK_LEVEL_HTML4 = Set.new([
|
8
|
+
"address",
|
9
|
+
"blockquote",
|
10
|
+
"center",
|
11
|
+
"dir",
|
12
|
+
"div",
|
13
|
+
"dl",
|
14
|
+
"fieldset",
|
15
|
+
"form",
|
16
|
+
"h1",
|
17
|
+
"h2",
|
18
|
+
"h3",
|
19
|
+
"h4",
|
20
|
+
"h5",
|
21
|
+
"h6",
|
22
|
+
"hr",
|
23
|
+
"isindex",
|
24
|
+
"menu",
|
25
|
+
"noframes",
|
26
|
+
"noscript",
|
27
|
+
"ol",
|
28
|
+
"p",
|
29
|
+
"pre",
|
30
|
+
"table",
|
31
|
+
"ul",
|
32
|
+
])
|
31
33
|
|
32
34
|
# https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
33
|
-
STRICT_BLOCK_LEVEL_HTML5 = Set.new
|
34
|
-
address
|
35
|
-
article
|
36
|
-
aside
|
37
|
-
blockquote
|
38
|
-
canvas
|
39
|
-
dd
|
40
|
-
div
|
41
|
-
dl
|
42
|
-
dt
|
43
|
-
fieldset
|
44
|
-
figcaption
|
45
|
-
figure
|
46
|
-
footer
|
47
|
-
form
|
48
|
-
h1
|
49
|
-
h2
|
50
|
-
h3
|
51
|
-
h4
|
52
|
-
h5
|
53
|
-
h6
|
54
|
-
header
|
55
|
-
hgroup
|
56
|
-
hr
|
57
|
-
li
|
58
|
-
main
|
59
|
-
nav
|
60
|
-
noscript
|
61
|
-
ol
|
62
|
-
output
|
63
|
-
p
|
64
|
-
pre
|
65
|
-
section
|
66
|
-
table
|
67
|
-
tfoot
|
68
|
-
ul
|
69
|
-
video
|
70
|
-
]
|
71
|
-
|
72
|
-
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
35
|
+
STRICT_BLOCK_LEVEL_HTML5 = Set.new([
|
36
|
+
"address",
|
37
|
+
"article",
|
38
|
+
"aside",
|
39
|
+
"blockquote",
|
40
|
+
"canvas",
|
41
|
+
"dd",
|
42
|
+
"div",
|
43
|
+
"dl",
|
44
|
+
"dt",
|
45
|
+
"fieldset",
|
46
|
+
"figcaption",
|
47
|
+
"figure",
|
48
|
+
"footer",
|
49
|
+
"form",
|
50
|
+
"h1",
|
51
|
+
"h2",
|
52
|
+
"h3",
|
53
|
+
"h4",
|
54
|
+
"h5",
|
55
|
+
"h6",
|
56
|
+
"header",
|
57
|
+
"hgroup",
|
58
|
+
"hr",
|
59
|
+
"li",
|
60
|
+
"main",
|
61
|
+
"nav",
|
62
|
+
"noscript",
|
63
|
+
"ol",
|
64
|
+
"output",
|
65
|
+
"p",
|
66
|
+
"pre",
|
67
|
+
"section",
|
68
|
+
"table",
|
69
|
+
"tfoot",
|
70
|
+
"ul",
|
71
|
+
"video",
|
72
|
+
])
|
73
73
|
|
74
74
|
# The following elements may also be considered block-level
|
75
75
|
# elements since they may contain block-level elements
|
76
|
-
LOOSE_BLOCK_LEVEL = Set.new
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
76
|
+
LOOSE_BLOCK_LEVEL = Set.new([
|
77
|
+
"dd",
|
78
|
+
"dt",
|
79
|
+
"frameset",
|
80
|
+
"li",
|
81
|
+
"tbody",
|
82
|
+
"td",
|
83
|
+
"tfoot",
|
84
|
+
"th",
|
85
|
+
"thead",
|
86
|
+
"tr",
|
87
|
+
])
|
87
88
|
|
89
|
+
# Elements that aren't block but should generate a newline in #to_text
|
90
|
+
INLINE_LINE_BREAK = Set.new(["br"])
|
91
|
+
|
92
|
+
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
88
93
|
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
94
|
+
LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
|
89
95
|
end
|
90
96
|
|
91
|
-
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants
|
97
|
+
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
|
92
98
|
end
|
data/lib/loofah/helpers.rb
CHANGED
@@ -1,42 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
4
|
module Helpers
|
3
5
|
class << self
|
4
6
|
#
|
5
7
|
# A replacement for Rails's built-in +strip_tags+ helper.
|
6
8
|
#
|
7
|
-
#
|
9
|
+
# Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
|
8
10
|
#
|
9
11
|
def strip_tags(string_or_io)
|
10
|
-
Loofah.
|
12
|
+
Loofah.html4_fragment(string_or_io).text
|
11
13
|
end
|
12
14
|
|
13
15
|
#
|
14
16
|
# A replacement for Rails's built-in +sanitize+ helper.
|
15
17
|
#
|
16
|
-
#
|
18
|
+
# Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
|
19
|
+
# # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
|
17
20
|
#
|
18
21
|
def sanitize(string_or_io)
|
19
|
-
loofah_fragment = Loofah.
|
22
|
+
loofah_fragment = Loofah.html4_fragment(string_or_io)
|
20
23
|
loofah_fragment.scrub!(:strip)
|
21
|
-
loofah_fragment.xpath("./form").each
|
24
|
+
loofah_fragment.xpath("./form").each(&:remove)
|
22
25
|
loofah_fragment.to_s
|
23
26
|
end
|
24
27
|
|
25
28
|
#
|
26
29
|
# A replacement for Rails's built-in +sanitize_css+ helper.
|
27
30
|
#
|
28
|
-
# Loofah::Helpers.sanitize_css("display:block;background-image:url(http://
|
31
|
+
# Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
|
32
|
+
# # => "display: block;"
|
29
33
|
#
|
30
|
-
def sanitize_css
|
31
|
-
::Loofah::HTML5::Scrub.scrub_css
|
34
|
+
def sanitize_css(style_string)
|
35
|
+
::Loofah::HTML5::Scrub.scrub_css(style_string)
|
32
36
|
end
|
33
37
|
|
34
38
|
#
|
35
|
-
# A helper to remove extraneous whitespace from text-ified HTML
|
39
|
+
# A helper to remove extraneous whitespace from text-ified HTML.
|
40
|
+
#
|
36
41
|
# TODO: remove this in a future major-point-release.
|
37
42
|
#
|
38
43
|
def remove_extraneous_whitespace(string)
|
39
|
-
Loofah.remove_extraneous_whitespace
|
44
|
+
Loofah.remove_extraneous_whitespace(string)
|
40
45
|
end
|
41
46
|
end
|
42
47
|
|
@@ -46,8 +51,13 @@ module Loofah
|
|
46
51
|
@full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
|
47
52
|
end
|
48
53
|
|
54
|
+
def safe_list_sanitizer
|
55
|
+
@safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
|
56
|
+
end
|
57
|
+
|
49
58
|
def white_list_sanitizer
|
50
|
-
|
59
|
+
warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
|
60
|
+
safe_list_sanitizer
|
51
61
|
end
|
52
62
|
end
|
53
63
|
|
@@ -56,15 +66,16 @@ module Loofah
|
|
56
66
|
#
|
57
67
|
# To use by default, call this in an application initializer:
|
58
68
|
#
|
59
|
-
# ActionView::Helpers::SanitizeHelper.full_sanitizer =
|
69
|
+
# ActionView::Helpers::SanitizeHelper.full_sanitizer = \
|
70
|
+
# Loofah::Helpers::ActionView::FullSanitizer.new
|
60
71
|
#
|
61
72
|
# Or, to generally opt-in to Loofah's view sanitizers:
|
62
73
|
#
|
63
74
|
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
64
75
|
#
|
65
76
|
class FullSanitizer
|
66
|
-
def sanitize
|
67
|
-
Loofah::Helpers.strip_tags
|
77
|
+
def sanitize(html, *args)
|
78
|
+
Loofah::Helpers.strip_tags(html)
|
68
79
|
end
|
69
80
|
end
|
70
81
|
|
@@ -73,21 +84,27 @@ module Loofah
|
|
73
84
|
#
|
74
85
|
# To use by default, call this in an application initializer:
|
75
86
|
#
|
76
|
-
# ActionView::Helpers::SanitizeHelper.
|
87
|
+
# ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
|
88
|
+
# Loofah::Helpers::ActionView::SafeListSanitizer.new
|
77
89
|
#
|
78
90
|
# Or, to generally opt-in to Loofah's view sanitizers:
|
79
91
|
#
|
80
92
|
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
81
93
|
#
|
82
|
-
class
|
83
|
-
def sanitize
|
84
|
-
Loofah::Helpers.sanitize
|
94
|
+
class SafeListSanitizer
|
95
|
+
def sanitize(html, *args)
|
96
|
+
Loofah::Helpers.sanitize(html)
|
85
97
|
end
|
86
98
|
|
87
|
-
def sanitize_css
|
88
|
-
Loofah::Helpers.sanitize_css
|
99
|
+
def sanitize_css(style_string, *args)
|
100
|
+
Loofah::Helpers.sanitize_css(style_string)
|
89
101
|
end
|
90
102
|
end
|
103
|
+
|
104
|
+
WhiteListSanitizer = SafeListSanitizer
|
105
|
+
if Object.respond_to?(:deprecate_constant)
|
106
|
+
deprecate_constant :WhiteListSanitizer
|
107
|
+
end
|
91
108
|
end
|
92
109
|
end
|
93
110
|
end
|
@@ -1,18 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Loofah
|
2
|
-
module
|
4
|
+
module HTML4 # :nodoc:
|
3
5
|
#
|
4
|
-
# Subclass of Nokogiri::
|
6
|
+
# Subclass of Nokogiri::HTML4::Document.
|
5
7
|
#
|
6
8
|
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
7
9
|
#
|
8
|
-
class Document < Nokogiri::
|
10
|
+
class Document < Nokogiri::HTML4::Document
|
9
11
|
include Loofah::ScrubBehavior::Node
|
10
12
|
include Loofah::DocumentDecorator
|
11
13
|
include Loofah::TextBehavior
|
12
|
-
|
13
|
-
def serialize_root
|
14
|
-
at_xpath("/html/body")
|
15
|
-
end
|
14
|
+
include Loofah::HtmlDocumentBehavior
|
16
15
|
end
|
17
16
|
end
|
18
17
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module HTML4 # :nodoc:
|
5
|
+
#
|
6
|
+
# Subclass of Nokogiri::HTML4::DocumentFragment.
|
7
|
+
#
|
8
|
+
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
9
|
+
#
|
10
|
+
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
|
11
|
+
include Loofah::TextBehavior
|
12
|
+
include Loofah::HtmlFragmentBehavior
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module HTML5 # :nodoc:
|
5
|
+
#
|
6
|
+
# Subclass of Nokogiri::HTML5::Document.
|
7
|
+
#
|
8
|
+
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
9
|
+
#
|
10
|
+
class Document < Nokogiri::HTML5::Document
|
11
|
+
include Loofah::ScrubBehavior::Node
|
12
|
+
include Loofah::DocumentDecorator
|
13
|
+
include Loofah::TextBehavior
|
14
|
+
include Loofah::HtmlDocumentBehavior
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module HTML5 # :nodoc:
|
5
|
+
#
|
6
|
+
# Subclass of Nokogiri::HTML5::DocumentFragment.
|
7
|
+
#
|
8
|
+
# See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
|
9
|
+
#
|
10
|
+
class DocumentFragment < Nokogiri::HTML5::DocumentFragment
|
11
|
+
include Loofah::TextBehavior
|
12
|
+
include Loofah::HtmlFragmentBehavior
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "set"
|
3
5
|
|
4
6
|
module Loofah
|
5
7
|
#
|
@@ -15,12 +17,12 @@ module Loofah
|
|
15
17
|
#
|
16
18
|
# see comments about CVE-2018-8048 within the tests for more information
|
17
19
|
#
|
18
|
-
BROKEN_ESCAPING_ATTRIBUTES = Set.new
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
|
20
|
+
BROKEN_ESCAPING_ATTRIBUTES = Set.new([
|
21
|
+
"href",
|
22
|
+
"action",
|
23
|
+
"src",
|
24
|
+
"name",
|
25
|
+
])
|
26
|
+
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
|
25
27
|
end
|
26
28
|
end
|