loofah 2.2.3 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -31
- data/Gemfile +11 -9
- data/Manifest.txt +1 -16
- data/README.md +6 -13
- data/Rakefile +39 -21
- data/benchmark/benchmark.rb +6 -1
- data/benchmark/helper.rb +15 -15
- data/lib/loofah.rb +35 -16
- data/lib/loofah/elements.rb +74 -73
- data/lib/loofah/helpers.rb +18 -7
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +4 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
- data/lib/loofah/html5/safelist.rb +804 -0
- data/lib/loofah/html5/scrub.rb +28 -30
- data/lib/loofah/instance_methods.rb +5 -3
- data/lib/loofah/metahelpers.rb +2 -1
- data/lib/loofah/scrubber.rb +8 -7
- data/lib/loofah/scrubbers.rb +12 -11
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -1
- metadata +69 -58
- data/.gemtest +0 -0
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/benchmark/benchmark.rb
CHANGED
@@ -23,7 +23,7 @@ def compare_scrub_methods
|
|
23
23
|
end
|
24
24
|
|
25
25
|
module TestSet
|
26
|
-
def test_set
|
26
|
+
def test_set(options = {})
|
27
27
|
scale = options[:rehearse] ? 10 : 1
|
28
28
|
puts self.class.name
|
29
29
|
|
@@ -49,6 +49,7 @@ end
|
|
49
49
|
|
50
50
|
class HeadToHeadRailsSanitize < Measure
|
51
51
|
include TestSet
|
52
|
+
|
52
53
|
def bench(content, ntimes, fragment_p)
|
53
54
|
clear_measure
|
54
55
|
|
@@ -65,6 +66,7 @@ end
|
|
65
66
|
|
66
67
|
class HeadToHeadRailsStripTags < Measure
|
67
68
|
include TestSet
|
69
|
+
|
68
70
|
def bench(content, ntimes, fragment_p)
|
69
71
|
clear_measure
|
70
72
|
|
@@ -81,6 +83,7 @@ end
|
|
81
83
|
|
82
84
|
class HeadToHeadSanitizerSanitize < Measure
|
83
85
|
include TestSet
|
86
|
+
|
84
87
|
def bench(content, ntimes, fragment_p)
|
85
88
|
clear_measure
|
86
89
|
|
@@ -100,6 +103,7 @@ end
|
|
100
103
|
|
101
104
|
class HeadToHeadHtml5LibSanitize < Measure
|
102
105
|
include TestSet
|
106
|
+
|
103
107
|
def bench(content, ntimes, fragment_p)
|
104
108
|
clear_measure
|
105
109
|
|
@@ -120,6 +124,7 @@ end
|
|
120
124
|
|
121
125
|
class HeadToHeadHTMLFilter < Measure
|
122
126
|
include TestSet
|
127
|
+
|
123
128
|
def bench(content, ntimes, fragment_p)
|
124
129
|
clear_measure
|
125
130
|
|
data/benchmark/helper.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "rubygems"
|
2
|
+
require "open-uri"
|
3
|
+
require "hpricot"
|
4
4
|
require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
|
5
|
-
require
|
5
|
+
require "benchmark"
|
6
6
|
require "action_view"
|
7
7
|
require "action_controller/vendor/html-scanner"
|
8
8
|
require "sanitize"
|
9
|
-
require
|
10
|
-
require
|
9
|
+
require "hitimes"
|
10
|
+
require "htmlfilter"
|
11
11
|
|
12
12
|
unless defined?(HTMLFilter)
|
13
13
|
HTMLFilter = HtmlFilter
|
@@ -19,20 +19,20 @@ class RailsSanitize
|
|
19
19
|
end
|
20
20
|
|
21
21
|
class HTML5libSanitize
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
26
|
-
require
|
27
|
-
require
|
22
|
+
require "html5/html5parser"
|
23
|
+
require "html5/liberalxmlparser"
|
24
|
+
require "html5/treewalkers"
|
25
|
+
require "html5/treebuilders"
|
26
|
+
require "html5/serializer"
|
27
|
+
require "html5/sanitizer"
|
28
28
|
|
29
29
|
include HTML5
|
30
30
|
|
31
31
|
def sanitize(html)
|
32
32
|
HTMLParser.parse_fragment(html, {
|
33
|
-
:tokenizer
|
34
|
-
:encoding
|
35
|
-
:tree
|
33
|
+
:tokenizer => HTMLSanitizer,
|
34
|
+
:encoding => "utf-8",
|
35
|
+
:tree => TreeBuilders::REXML::TreeBuilder,
|
36
36
|
}).to_s
|
37
37
|
end
|
38
38
|
end
|
data/lib/loofah.rb
CHANGED
@@ -1,22 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
3
|
|
3
|
-
require
|
4
|
+
require "nokogiri"
|
4
5
|
|
5
|
-
require
|
6
|
-
require
|
6
|
+
require "loofah/metahelpers"
|
7
|
+
require "loofah/elements"
|
7
8
|
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
9
|
+
require "loofah/html5/safelist"
|
10
|
+
require "loofah/html5/libxml2_workarounds"
|
11
|
+
require "loofah/html5/scrub"
|
11
12
|
|
12
|
-
require
|
13
|
-
require
|
13
|
+
require "loofah/scrubber"
|
14
|
+
require "loofah/scrubbers"
|
14
15
|
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
18
|
-
require
|
19
|
-
require
|
16
|
+
require "loofah/instance_methods"
|
17
|
+
require "loofah/xml/document"
|
18
|
+
require "loofah/xml/document_fragment"
|
19
|
+
require "loofah/html/document"
|
20
|
+
require "loofah/html/document_fragment"
|
20
21
|
|
21
22
|
# == Strings and IO Objects as Input
|
22
23
|
#
|
@@ -28,13 +29,13 @@ require 'loofah/html/document_fragment'
|
|
28
29
|
#
|
29
30
|
module Loofah
|
30
31
|
# The version of Loofah you are using
|
31
|
-
VERSION =
|
32
|
+
VERSION = "2.6.0"
|
32
33
|
|
33
34
|
class << self
|
34
35
|
# Shortcut for Loofah::HTML::Document.parse
|
35
36
|
# This method accepts the same parameters as Nokogiri::HTML::Document.parse
|
36
37
|
def document(*args, &block)
|
37
|
-
Loofah::HTML::Document.parse(*args, &block)
|
38
|
+
remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
|
38
39
|
end
|
39
40
|
|
40
41
|
# Shortcut for Loofah::HTML::DocumentFragment.parse
|
@@ -77,7 +78,25 @@ module Loofah
|
|
77
78
|
|
78
79
|
# A helper to remove extraneous whitespace from text-ified HTML
|
79
80
|
def remove_extraneous_whitespace(string)
|
80
|
-
string.gsub(/\n\s*\n\s*\n/,"\n\n")
|
81
|
+
string.gsub(/\n\s*\n\s*\n/, "\n\n")
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# remove comments that exist outside of the HTML element.
|
87
|
+
#
|
88
|
+
# these comments are allowed by the HTML spec:
|
89
|
+
#
|
90
|
+
# https://www.w3.org/TR/html401/struct/global.html#h-7.1
|
91
|
+
#
|
92
|
+
# but are not scrubbed by Loofah because these nodes don't meet
|
93
|
+
# the contract that scrubbers expect of a node (e.g., it can be
|
94
|
+
# replaced, sibling and children nodes can be created).
|
95
|
+
def remove_comments_before_html_element(doc)
|
96
|
+
doc.children.each do |child|
|
97
|
+
child.unlink if child.comment?
|
98
|
+
end
|
99
|
+
doc
|
81
100
|
end
|
82
101
|
end
|
83
102
|
end
|
data/lib/loofah/elements.rb
CHANGED
@@ -1,89 +1,90 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "set"
|
2
3
|
|
3
4
|
module Loofah
|
4
5
|
module Elements
|
5
6
|
STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
7
|
+
address
|
8
|
+
blockquote
|
9
|
+
center
|
10
|
+
dir
|
11
|
+
div
|
12
|
+
dl
|
13
|
+
fieldset
|
14
|
+
form
|
15
|
+
h1
|
16
|
+
h2
|
17
|
+
h3
|
18
|
+
h4
|
19
|
+
h5
|
20
|
+
h6
|
21
|
+
hr
|
22
|
+
isindex
|
23
|
+
menu
|
24
|
+
noframes
|
25
|
+
noscript
|
26
|
+
ol
|
27
|
+
p
|
28
|
+
pre
|
29
|
+
table
|
30
|
+
ul
|
31
|
+
]
|
31
32
|
|
32
33
|
# https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
33
34
|
STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
35
|
+
address
|
36
|
+
article
|
37
|
+
aside
|
38
|
+
blockquote
|
39
|
+
canvas
|
40
|
+
dd
|
41
|
+
div
|
42
|
+
dl
|
43
|
+
dt
|
44
|
+
fieldset
|
45
|
+
figcaption
|
46
|
+
figure
|
47
|
+
footer
|
48
|
+
form
|
49
|
+
h1
|
50
|
+
h2
|
51
|
+
h3
|
52
|
+
h4
|
53
|
+
h5
|
54
|
+
h6
|
55
|
+
header
|
56
|
+
hgroup
|
57
|
+
hr
|
58
|
+
li
|
59
|
+
main
|
60
|
+
nav
|
61
|
+
noscript
|
62
|
+
ol
|
63
|
+
output
|
64
|
+
p
|
65
|
+
pre
|
66
|
+
section
|
67
|
+
table
|
68
|
+
tfoot
|
69
|
+
ul
|
70
|
+
video
|
71
|
+
]
|
71
72
|
|
72
73
|
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
73
74
|
|
74
75
|
# The following elements may also be considered block-level
|
75
76
|
# elements since they may contain block-level elements
|
76
77
|
LOOSE_BLOCK_LEVEL = Set.new %w[dd
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
78
|
+
dt
|
79
|
+
frameset
|
80
|
+
li
|
81
|
+
tbody
|
82
|
+
td
|
83
|
+
tfoot
|
84
|
+
th
|
85
|
+
thead
|
86
|
+
tr
|
87
|
+
]
|
87
88
|
|
88
89
|
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
89
90
|
end
|
data/lib/loofah/helpers.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
module Helpers
|
3
4
|
class << self
|
@@ -27,7 +28,7 @@ module Loofah
|
|
27
28
|
#
|
28
29
|
# Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
|
29
30
|
#
|
30
|
-
def sanitize_css
|
31
|
+
def sanitize_css(style_string)
|
31
32
|
::Loofah::HTML5::Scrub.scrub_css style_string
|
32
33
|
end
|
33
34
|
|
@@ -46,8 +47,13 @@ module Loofah
|
|
46
47
|
@full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
|
47
48
|
end
|
48
49
|
|
50
|
+
def safe_list_sanitizer
|
51
|
+
@safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
|
52
|
+
end
|
53
|
+
|
49
54
|
def white_list_sanitizer
|
50
|
-
|
55
|
+
warn "warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead."
|
56
|
+
safe_list_sanitizer
|
51
57
|
end
|
52
58
|
end
|
53
59
|
|
@@ -63,7 +69,7 @@ module Loofah
|
|
63
69
|
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
64
70
|
#
|
65
71
|
class FullSanitizer
|
66
|
-
def sanitize
|
72
|
+
def sanitize(html, *args)
|
67
73
|
Loofah::Helpers.strip_tags html
|
68
74
|
end
|
69
75
|
end
|
@@ -73,21 +79,26 @@ module Loofah
|
|
73
79
|
#
|
74
80
|
# To use by default, call this in an application initializer:
|
75
81
|
#
|
76
|
-
# ActionView::Helpers::SanitizeHelper.
|
82
|
+
# ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = ::Loofah::Helpers::ActionView::SafeListSanitizer.new
|
77
83
|
#
|
78
84
|
# Or, to generally opt-in to Loofah's view sanitizers:
|
79
85
|
#
|
80
86
|
# Loofah::Helpers::ActionView.set_as_default_sanitizer
|
81
87
|
#
|
82
|
-
class
|
83
|
-
def sanitize
|
88
|
+
class SafeListSanitizer
|
89
|
+
def sanitize(html, *args)
|
84
90
|
Loofah::Helpers.sanitize html
|
85
91
|
end
|
86
92
|
|
87
|
-
def sanitize_css
|
93
|
+
def sanitize_css(style_string, *args)
|
88
94
|
Loofah::Helpers.sanitize_css style_string
|
89
95
|
end
|
90
96
|
end
|
97
|
+
|
98
|
+
WhiteListSanitizer = SafeListSanitizer
|
99
|
+
if Object.respond_to?(:deprecate_constant)
|
100
|
+
deprecate_constant :WhiteListSanitizer
|
101
|
+
end
|
91
102
|
end
|
92
103
|
end
|
93
104
|
end
|
data/lib/loofah/html/document.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
module HTML # :nodoc:
|
3
4
|
#
|
@@ -14,10 +15,10 @@ module Loofah
|
|
14
15
|
# constructor. Applications should use Loofah.fragment to
|
15
16
|
# parse a fragment.
|
16
17
|
#
|
17
|
-
def parse
|
18
|
+
def parse(tags, encoding = nil)
|
18
19
|
doc = Loofah::HTML::Document.new
|
19
20
|
|
20
|
-
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name :
|
21
|
+
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
|
21
22
|
doc.encoding = encoding
|
22
23
|
|
23
24
|
new(doc, tags)
|
@@ -30,6 +31,7 @@ module Loofah
|
|
30
31
|
def to_s
|
31
32
|
serialize_root.children.to_s
|
32
33
|
end
|
34
|
+
|
33
35
|
alias :serialize :to_s
|
34
36
|
|
35
37
|
def serialize_root
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
require "set"
|
3
4
|
|
4
5
|
module Loofah
|
5
6
|
#
|
@@ -16,11 +17,11 @@ module Loofah
|
|
16
17
|
# see comments about CVE-2018-8048 within the tests for more information
|
17
18
|
#
|
18
19
|
BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
|
20
|
+
href
|
21
|
+
action
|
22
|
+
src
|
23
|
+
name
|
24
|
+
]
|
25
|
+
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
|
25
26
|
end
|
26
27
|
end
|