loofah 2.19.0 → 2.23.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ #
5
+ # Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
6
+ #
7
+ # Traverse the document or fragment, invoking the +scrubber+ on each node.
8
+ #
9
+ # +scrubber+ must either be one of the symbols representing the built-in scrubbers (see
10
+ # Scrubbers), or a Scrubber instance.
11
+ #
12
+ # span2div = Loofah::Scrubber.new do |node|
13
+ # node.name = "div" if node.name == "span"
14
+ # end
15
+ # Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
16
+ # # => "<div>foo</div><p>bar</p>"
17
+ #
18
+ # or
19
+ #
20
+ # unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
21
+ # Loofah.html5_fragment(unsafe_html).scrub!(:strip).to_s
22
+ # # => "ohai! <div>div is safe</div> "
23
+ #
24
+ # Note that this method is called implicitly from the shortcuts Loofah.scrub_html5_fragment et
25
+ # al.
26
+ #
27
+ # Please see Scrubber for more information on implementation and traversal, and README.rdoc for
28
+ # more example usage.
29
+ #
30
+ module ScrubBehavior
31
+ module Node # :nodoc:
32
+ def scrub!(scrubber)
33
+ #
34
+ # yes. this should be three separate methods. but nokogiri decorates (or not) based on
35
+ # whether the module name has already been included. and since documents get decorated just
36
+ # like their constituent nodes, we need to jam all the logic into a single module.
37
+ #
38
+ scrubber = ScrubBehavior.resolve_scrubber(scrubber)
39
+ case self
40
+ when Nokogiri::XML::Document
41
+ scrubber.traverse(root) if root
42
+ when Nokogiri::XML::DocumentFragment
43
+ children.scrub!(scrubber)
44
+ else
45
+ scrubber.traverse(self)
46
+ end
47
+ self
48
+ end
49
+ end
50
+
51
+ module NodeSet # :nodoc:
52
+ def scrub!(scrubber)
53
+ each { |node| node.scrub!(scrubber) }
54
+ self
55
+ end
56
+ end
57
+
58
+ class << self
59
+ def resolve_scrubber(scrubber) # :nodoc:
60
+ scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
61
+ unless scrubber.is_a?(Loofah::Scrubber)
62
+ raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
63
+ end
64
+
65
+ scrubber
66
+ end
67
+ end
68
+ end
69
+
70
+ #
71
+ # Overrides +text+ in Document and DocumentFragment classes, and mixes in +to_text+.
72
+ #
73
+ module TextBehavior
74
+ #
75
+ # Returns a plain-text version of the markup contained by the document, with HTML entities
76
+ # encoded.
77
+ #
78
+ # This method is significantly faster than #to_text, but isn't clever about whitespace around
79
+ # block elements.
80
+ #
81
+ # Loofah.html5_document("<h1>Title</h1><div>Content</div>").text
82
+ # # => "TitleContent"
83
+ #
84
+ # By default, the returned text will have HTML entities escaped. If you want unescaped
85
+ # entities, and you understand that the result is unsafe to render in a browser, then you can
86
+ # pass an argument as shown:
87
+ #
88
+ # frag = Loofah.html5_fragment("&lt;script&gt;alert('EVIL');&lt;/script&gt;")
89
+ # # ok for browser:
90
+ # frag.text # => "&lt;script&gt;alert('EVIL');&lt;/script&gt;"
91
+ # # decidedly not ok for browser:
92
+ # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
93
+ #
94
+ def text(options = {})
95
+ result = if serialize_root
96
+ serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
97
+ else
98
+ ""
99
+ end
100
+ if options[:encode_special_chars] == false
101
+ result # possibly dangerous if rendered in a browser
102
+ else
103
+ encode_special_chars(result)
104
+ end
105
+ end
106
+
107
+ alias_method :inner_text, :text
108
+ alias_method :to_str, :text
109
+
110
+ #
111
+ # Returns a plain-text version of the markup contained by the fragment, with HTML entities
112
+ # encoded.
113
+ #
114
+ # This method is slower than #text, but is clever about whitespace around block elements and
115
+ # line break elements.
116
+ #
117
+ # Loofah.html5_document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
118
+ # # => "\nTitle\n\nContent\nNext line\n"
119
+ #
120
+ def to_text(options = {})
121
+ Loofah.remove_extraneous_whitespace(dup.scrub!(:newline_block_elements).text(options))
122
+ end
123
+ end
124
+
125
+ module DocumentDecorator # :nodoc:
126
+ def initialize(*args, &block)
127
+ super
128
+ decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
129
+ decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
130
+ end
131
+ end
132
+
133
+ module HtmlDocumentBehavior # :nodoc:
134
+ module ClassMethods
135
+ def parse(*args, &block)
136
+ remove_comments_before_html_element(super)
137
+ end
138
+
139
+ private
140
+
141
+ # remove comments that exist outside of the HTML element.
142
+ #
143
+ # these comments are allowed by the HTML spec:
144
+ #
145
+ # https://www.w3.org/TR/html401/struct/global.html#h-7.1
146
+ #
147
+ # but are not scrubbed by Loofah because these nodes don't meet
148
+ # the contract that scrubbers expect of a node (e.g., it can be
149
+ # replaced, sibling and children nodes can be created).
150
+ def remove_comments_before_html_element(doc)
151
+ doc.children.each do |child|
152
+ child.unlink if child.comment?
153
+ end
154
+ doc
155
+ end
156
+ end
157
+
158
+ class << self
159
+ def included(base)
160
+ base.extend(ClassMethods)
161
+ end
162
+ end
163
+
164
+ def serialize_root
165
+ at_xpath("/html/body")
166
+ end
167
+ end
168
+
169
+ module HtmlFragmentBehavior # :nodoc:
170
+ module ClassMethods
171
+ def parse(tags, encoding = nil)
172
+ doc = document_klass.new
173
+
174
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
175
+ doc.encoding = encoding
176
+
177
+ new(doc, tags)
178
+ end
179
+
180
+ def document_klass
181
+ @document_klass ||= if Loofah.html5_support? && self == Loofah::HTML5::DocumentFragment
182
+ Loofah::HTML5::Document
183
+ elsif self == Loofah::HTML4::DocumentFragment
184
+ Loofah::HTML4::Document
185
+ else
186
+ raise ArgumentError, "unexpected class: #{self}"
187
+ end
188
+ end
189
+ end
190
+
191
+ class << self
192
+ def included(base)
193
+ base.extend(ClassMethods)
194
+ end
195
+ end
196
+
197
+ def to_s
198
+ serialize_root.children.to_s
199
+ end
200
+
201
+ alias_method :serialize, :to_s
202
+
203
+ def serialize_root
204
+ at_xpath("./body") || self
205
+ end
206
+ end
207
+ end
@@ -1,88 +1,90 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "set"
3
4
 
4
5
  module Loofah
5
6
  module Elements
6
- STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
7
- address
8
- blockquote
9
- center
10
- dir
11
- div
12
- dl
13
- fieldset
14
- form
15
- h1
16
- h2
17
- h3
18
- h4
19
- h5
20
- h6
21
- hr
22
- isindex
23
- menu
24
- noframes
25
- noscript
26
- ol
27
- p
28
- pre
29
- table
30
- ul
31
- ]
7
+ STRICT_BLOCK_LEVEL_HTML4 = Set.new([
8
+ "address",
9
+ "blockquote",
10
+ "center",
11
+ "dir",
12
+ "div",
13
+ "dl",
14
+ "fieldset",
15
+ "form",
16
+ "h1",
17
+ "h2",
18
+ "h3",
19
+ "h4",
20
+ "h5",
21
+ "h6",
22
+ "hr",
23
+ "isindex",
24
+ "menu",
25
+ "noframes",
26
+ "noscript",
27
+ "ol",
28
+ "p",
29
+ "pre",
30
+ "table",
31
+ "ul",
32
+ ])
32
33
 
33
34
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
34
- STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
35
- address
36
- article
37
- aside
38
- blockquote
39
- canvas
40
- dd
41
- div
42
- dl
43
- dt
44
- fieldset
45
- figcaption
46
- figure
47
- footer
48
- form
49
- h1
50
- h2
51
- h3
52
- h4
53
- h5
54
- h6
55
- header
56
- hgroup
57
- hr
58
- li
59
- main
60
- nav
61
- noscript
62
- ol
63
- output
64
- p
65
- pre
66
- section
67
- table
68
- tfoot
69
- ul
70
- video
71
- ]
35
+ STRICT_BLOCK_LEVEL_HTML5 = Set.new([
36
+ "address",
37
+ "article",
38
+ "aside",
39
+ "blockquote",
40
+ "canvas",
41
+ "dd",
42
+ "div",
43
+ "dl",
44
+ "dt",
45
+ "fieldset",
46
+ "figcaption",
47
+ "figure",
48
+ "footer",
49
+ "form",
50
+ "h1",
51
+ "h2",
52
+ "h3",
53
+ "h4",
54
+ "h5",
55
+ "h6",
56
+ "header",
57
+ "hgroup",
58
+ "hr",
59
+ "li",
60
+ "main",
61
+ "nav",
62
+ "noscript",
63
+ "ol",
64
+ "output",
65
+ "p",
66
+ "pre",
67
+ "section",
68
+ "table",
69
+ "tfoot",
70
+ "ul",
71
+ "video",
72
+ ])
72
73
 
73
74
  # The following elements may also be considered block-level
74
75
  # elements since they may contain block-level elements
75
- LOOSE_BLOCK_LEVEL = Set.new %w[dd
76
- dt
77
- frameset
78
- li
79
- tbody
80
- td
81
- tfoot
82
- th
83
- thead
84
- tr
85
- ]
76
+ LOOSE_BLOCK_LEVEL = Set.new([
77
+ "dd",
78
+ "dt",
79
+ "frameset",
80
+ "li",
81
+ "tbody",
82
+ "td",
83
+ "tfoot",
84
+ "th",
85
+ "thead",
86
+ "tr",
87
+ ])
86
88
 
87
89
  # Elements that aren't block but should generate a newline in #to_text
88
90
  INLINE_LINE_BREAK = Set.new(["br"])
@@ -92,5 +94,5 @@ module Loofah
92
94
  LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
93
95
  end
94
96
 
95
- ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
97
+ ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
96
98
  end
@@ -1,43 +1,47 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
4
  module Helpers
4
5
  class << self
5
6
  #
6
7
  # A replacement for Rails's built-in +strip_tags+ helper.
7
8
  #
8
- # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
9
+ # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
9
10
  #
10
11
  def strip_tags(string_or_io)
11
- Loofah.fragment(string_or_io).text
12
+ Loofah.html4_fragment(string_or_io).text
12
13
  end
13
14
 
14
15
  #
15
16
  # A replacement for Rails's built-in +sanitize+ helper.
16
17
  #
17
- # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
18
+ # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
19
+ # # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
18
20
  #
19
21
  def sanitize(string_or_io)
20
- loofah_fragment = Loofah.fragment(string_or_io)
22
+ loofah_fragment = Loofah.html4_fragment(string_or_io)
21
23
  loofah_fragment.scrub!(:strip)
22
- loofah_fragment.xpath("./form").each { |form| form.remove }
24
+ loofah_fragment.xpath("./form").each(&:remove)
23
25
  loofah_fragment.to_s
24
26
  end
25
27
 
26
28
  #
27
29
  # A replacement for Rails's built-in +sanitize_css+ helper.
28
30
  #
29
- # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
31
+ # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
32
+ # # => "display: block;"
30
33
  #
31
34
  def sanitize_css(style_string)
32
- ::Loofah::HTML5::Scrub.scrub_css style_string
35
+ ::Loofah::HTML5::Scrub.scrub_css(style_string)
33
36
  end
34
37
 
35
38
  #
36
- # A helper to remove extraneous whitespace from text-ified HTML
39
+ # A helper to remove extraneous whitespace from text-ified HTML.
40
+ #
37
41
  # TODO: remove this in a future major-point-release.
38
42
  #
39
43
  def remove_extraneous_whitespace(string)
40
- Loofah.remove_extraneous_whitespace string
44
+ Loofah.remove_extraneous_whitespace(string)
41
45
  end
42
46
  end
43
47
 
@@ -52,7 +56,7 @@ module Loofah
52
56
  end
53
57
 
54
58
  def white_list_sanitizer
55
- warn "warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead."
59
+ warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
56
60
  safe_list_sanitizer
57
61
  end
58
62
  end
@@ -62,7 +66,8 @@ module Loofah
62
66
  #
63
67
  # To use by default, call this in an application initializer:
64
68
  #
65
- # ActionView::Helpers::SanitizeHelper.full_sanitizer = ::Loofah::Helpers::ActionView::FullSanitizer.new
69
+ # ActionView::Helpers::SanitizeHelper.full_sanitizer = \
70
+ # Loofah::Helpers::ActionView::FullSanitizer.new
66
71
  #
67
72
  # Or, to generally opt-in to Loofah's view sanitizers:
68
73
  #
@@ -70,7 +75,7 @@ module Loofah
70
75
  #
71
76
  class FullSanitizer
72
77
  def sanitize(html, *args)
73
- Loofah::Helpers.strip_tags html
78
+ Loofah::Helpers.strip_tags(html)
74
79
  end
75
80
  end
76
81
 
@@ -79,7 +84,8 @@ module Loofah
79
84
  #
80
85
  # To use by default, call this in an application initializer:
81
86
  #
82
- # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = ::Loofah::Helpers::ActionView::SafeListSanitizer.new
87
+ # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
88
+ # Loofah::Helpers::ActionView::SafeListSanitizer.new
83
89
  #
84
90
  # Or, to generally opt-in to Loofah's view sanitizers:
85
91
  #
@@ -87,11 +93,11 @@ module Loofah
87
93
  #
88
94
  class SafeListSanitizer
89
95
  def sanitize(html, *args)
90
- Loofah::Helpers.sanitize html
96
+ Loofah::Helpers.sanitize(html)
91
97
  end
92
98
 
93
99
  def sanitize_css(style_string, *args)
94
- Loofah::Helpers.sanitize_css style_string
100
+ Loofah::Helpers.sanitize_css(style_string)
95
101
  end
96
102
  end
97
103
 
@@ -1,19 +1,17 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Loofah
3
- module HTML # :nodoc:
4
+ module HTML4 # :nodoc:
4
5
  #
5
- # Subclass of Nokogiri::HTML::Document.
6
+ # Subclass of Nokogiri::HTML4::Document.
6
7
  #
7
8
  # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
8
9
  #
9
- class Document < Nokogiri::HTML::Document
10
+ class Document < Nokogiri::HTML4::Document
10
11
  include Loofah::ScrubBehavior::Node
11
12
  include Loofah::DocumentDecorator
12
13
  include Loofah::TextBehavior
13
-
14
- def serialize_root
15
- at_xpath("/html/body")
16
- end
14
+ include Loofah::HtmlDocumentBehavior
17
15
  end
18
16
  end
19
17
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML4 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML4::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::Document.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class Document < Nokogiri::HTML5::Document
11
+ include Loofah::ScrubBehavior::Node
12
+ include Loofah::DocumentDecorator
13
+ include Loofah::TextBehavior
14
+ include Loofah::HtmlDocumentBehavior
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML5::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
2
  # frozen_string_literal: true
3
+
3
4
  require "set"
4
5
 
5
6
  module Loofah
@@ -16,12 +17,12 @@ module Loofah
16
17
  #
17
18
  # see comments about CVE-2018-8048 within the tests for more information
18
19
  #
19
- BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
20
- href
21
- action
22
- src
23
- name
24
- ]
20
+ BROKEN_ESCAPING_ATTRIBUTES = Set.new([
21
+ "href",
22
+ "action",
23
+ "src",
24
+ "name",
25
+ ])
25
26
  BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
26
27
  end
27
28
  end