loofah 2.2.3 → 2.21.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +269 -31
  3. data/README.md +109 -124
  4. data/lib/loofah/concerns.rb +207 -0
  5. data/lib/loofah/elements.rb +85 -79
  6. data/lib/loofah/helpers.rb +37 -20
  7. data/lib/loofah/{html → html4}/document.rb +6 -7
  8. data/lib/loofah/html4/document_fragment.rb +15 -0
  9. data/lib/loofah/html5/document.rb +17 -0
  10. data/lib/loofah/html5/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
  12. data/lib/loofah/html5/safelist.rb +1055 -0
  13. data/lib/loofah/html5/scrub.rb +153 -58
  14. data/lib/loofah/metahelpers.rb +11 -6
  15. data/lib/loofah/scrubber.rb +22 -15
  16. data/lib/loofah/scrubbers.rb +66 -55
  17. data/lib/loofah/version.rb +6 -0
  18. data/lib/loofah/xml/document.rb +2 -0
  19. data/lib/loofah/xml/document_fragment.rb +4 -7
  20. data/lib/loofah.rb +131 -38
  21. metadata +28 -216
  22. data/.gemtest +0 -0
  23. data/Gemfile +0 -22
  24. data/Manifest.txt +0 -40
  25. data/Rakefile +0 -79
  26. data/benchmark/benchmark.rb +0 -149
  27. data/benchmark/fragment.html +0 -96
  28. data/benchmark/helper.rb +0 -73
  29. data/benchmark/www.slashdot.com.html +0 -2560
  30. data/lib/loofah/html/document_fragment.rb +0 -40
  31. data/lib/loofah/html5/whitelist.rb +0 -186
  32. data/lib/loofah/instance_methods.rb +0 -127
  33. data/test/assets/msword.html +0 -63
  34. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  35. data/test/helper.rb +0 -18
  36. data/test/html5/test_sanitizer.rb +0 -382
  37. data/test/integration/test_ad_hoc.rb +0 -204
  38. data/test/integration/test_helpers.rb +0 -43
  39. data/test/integration/test_html.rb +0 -72
  40. data/test/integration/test_scrubbers.rb +0 -400
  41. data/test/integration/test_xml.rb +0 -55
  42. data/test/unit/test_api.rb +0 -142
  43. data/test/unit/test_encoding.rb +0 -20
  44. data/test/unit/test_helpers.rb +0 -62
  45. data/test/unit/test_scrubber.rb +0 -229
  46. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,92 +1,98 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
2
4
 
3
5
  module Loofah
4
6
  module Elements
5
- STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
6
- address
7
- blockquote
8
- center
9
- dir
10
- div
11
- dl
12
- fieldset
13
- form
14
- h1
15
- h2
16
- h3
17
- h4
18
- h5
19
- h6
20
- hr
21
- isindex
22
- menu
23
- noframes
24
- noscript
25
- ol
26
- p
27
- pre
28
- table
29
- ul
30
- ]
7
+ STRICT_BLOCK_LEVEL_HTML4 = Set.new([
8
+ "address",
9
+ "blockquote",
10
+ "center",
11
+ "dir",
12
+ "div",
13
+ "dl",
14
+ "fieldset",
15
+ "form",
16
+ "h1",
17
+ "h2",
18
+ "h3",
19
+ "h4",
20
+ "h5",
21
+ "h6",
22
+ "hr",
23
+ "isindex",
24
+ "menu",
25
+ "noframes",
26
+ "noscript",
27
+ "ol",
28
+ "p",
29
+ "pre",
30
+ "table",
31
+ "ul",
32
+ ])
31
33
 
32
34
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
33
- STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
34
- address
35
- article
36
- aside
37
- blockquote
38
- canvas
39
- dd
40
- div
41
- dl
42
- dt
43
- fieldset
44
- figcaption
45
- figure
46
- footer
47
- form
48
- h1
49
- h2
50
- h3
51
- h4
52
- h5
53
- h6
54
- header
55
- hgroup
56
- hr
57
- li
58
- main
59
- nav
60
- noscript
61
- ol
62
- output
63
- p
64
- pre
65
- section
66
- table
67
- tfoot
68
- ul
69
- video
70
- ]
71
-
72
- STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
35
+ STRICT_BLOCK_LEVEL_HTML5 = Set.new([
36
+ "address",
37
+ "article",
38
+ "aside",
39
+ "blockquote",
40
+ "canvas",
41
+ "dd",
42
+ "div",
43
+ "dl",
44
+ "dt",
45
+ "fieldset",
46
+ "figcaption",
47
+ "figure",
48
+ "footer",
49
+ "form",
50
+ "h1",
51
+ "h2",
52
+ "h3",
53
+ "h4",
54
+ "h5",
55
+ "h6",
56
+ "header",
57
+ "hgroup",
58
+ "hr",
59
+ "li",
60
+ "main",
61
+ "nav",
62
+ "noscript",
63
+ "ol",
64
+ "output",
65
+ "p",
66
+ "pre",
67
+ "section",
68
+ "table",
69
+ "tfoot",
70
+ "ul",
71
+ "video",
72
+ ])
73
73
 
74
74
  # The following elements may also be considered block-level
75
75
  # elements since they may contain block-level elements
76
- LOOSE_BLOCK_LEVEL = Set.new %w[dd
77
- dt
78
- frameset
79
- li
80
- tbody
81
- td
82
- tfoot
83
- th
84
- thead
85
- tr
86
- ]
76
+ LOOSE_BLOCK_LEVEL = Set.new([
77
+ "dd",
78
+ "dt",
79
+ "frameset",
80
+ "li",
81
+ "tbody",
82
+ "td",
83
+ "tfoot",
84
+ "th",
85
+ "thead",
86
+ "tr",
87
+ ])
87
88
 
89
+ # Elements that aren't block but should generate a newline in #to_text
90
+ INLINE_LINE_BREAK = Set.new(["br"])
91
+
92
+ STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
88
93
  BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
94
+ LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
89
95
  end
90
96
 
91
- ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
97
+ ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
92
98
  end
@@ -1,42 +1,47 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module Helpers
3
5
  class << self
4
6
  #
5
7
  # A replacement for Rails's built-in +strip_tags+ helper.
6
8
  #
7
- # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
9
+ # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
8
10
  #
9
11
  def strip_tags(string_or_io)
10
- Loofah.fragment(string_or_io).text
12
+ Loofah.html4_fragment(string_or_io).text
11
13
  end
12
14
 
13
15
  #
14
16
  # A replacement for Rails's built-in +sanitize+ helper.
15
17
  #
16
- # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
18
+ # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
19
+ # # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
17
20
  #
18
21
  def sanitize(string_or_io)
19
- loofah_fragment = Loofah.fragment(string_or_io)
22
+ loofah_fragment = Loofah.html4_fragment(string_or_io)
20
23
  loofah_fragment.scrub!(:strip)
21
- loofah_fragment.xpath("./form").each { |form| form.remove }
24
+ loofah_fragment.xpath("./form").each(&:remove)
22
25
  loofah_fragment.to_s
23
26
  end
24
27
 
25
28
  #
26
29
  # A replacement for Rails's built-in +sanitize_css+ helper.
27
30
  #
28
- # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
31
+ # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
32
+ # # => "display: block;"
29
33
  #
30
- def sanitize_css style_string
31
- ::Loofah::HTML5::Scrub.scrub_css style_string
34
+ def sanitize_css(style_string)
35
+ ::Loofah::HTML5::Scrub.scrub_css(style_string)
32
36
  end
33
37
 
34
38
  #
35
- # A helper to remove extraneous whitespace from text-ified HTML
39
+ # A helper to remove extraneous whitespace from text-ified HTML.
40
+ #
36
41
  # TODO: remove this in a future major-point-release.
37
42
  #
38
43
  def remove_extraneous_whitespace(string)
39
- Loofah.remove_extraneous_whitespace string
44
+ Loofah.remove_extraneous_whitespace(string)
40
45
  end
41
46
  end
42
47
 
@@ -46,8 +51,13 @@ module Loofah
46
51
  @full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
47
52
  end
48
53
 
54
+ def safe_list_sanitizer
55
+ @safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
56
+ end
57
+
49
58
  def white_list_sanitizer
50
- @white_list_sanitizer ||= ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
59
+ warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
60
+ safe_list_sanitizer
51
61
  end
52
62
  end
53
63
 
@@ -56,15 +66,16 @@ module Loofah
56
66
  #
57
67
  # To use by default, call this in an application initializer:
58
68
  #
59
- # ActionView::Helpers::SanitizeHelper.full_sanitizer = ::Loofah::Helpers::ActionView::FullSanitizer.new
69
+ # ActionView::Helpers::SanitizeHelper.full_sanitizer = \
70
+ # Loofah::Helpers::ActionView::FullSanitizer.new
60
71
  #
61
72
  # Or, to generally opt-in to Loofah's view sanitizers:
62
73
  #
63
74
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
64
75
  #
65
76
  class FullSanitizer
66
- def sanitize html, *args
67
- Loofah::Helpers.strip_tags html
77
+ def sanitize(html, *args)
78
+ Loofah::Helpers.strip_tags(html)
68
79
  end
69
80
  end
70
81
 
@@ -73,21 +84,27 @@ module Loofah
73
84
  #
74
85
  # To use by default, call this in an application initializer:
75
86
  #
76
- # ActionView::Helpers::SanitizeHelper.white_list_sanitizer = ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
87
+ # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
88
+ # Loofah::Helpers::ActionView::SafeListSanitizer.new
77
89
  #
78
90
  # Or, to generally opt-in to Loofah's view sanitizers:
79
91
  #
80
92
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
81
93
  #
82
- class WhiteListSanitizer
83
- def sanitize html, *args
84
- Loofah::Helpers.sanitize html
94
+ class SafeListSanitizer
95
+ def sanitize(html, *args)
96
+ Loofah::Helpers.sanitize(html)
85
97
  end
86
98
 
87
- def sanitize_css style_string, *args
88
- Loofah::Helpers.sanitize_css style_string
99
+ def sanitize_css(style_string, *args)
100
+ Loofah::Helpers.sanitize_css(style_string)
89
101
  end
90
102
  end
103
+
104
+ WhiteListSanitizer = SafeListSanitizer
105
+ if Object.respond_to?(:deprecate_constant)
106
+ deprecate_constant :WhiteListSanitizer
107
+ end
91
108
  end
92
109
  end
93
110
  end
@@ -1,18 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
- module HTML # :nodoc:
4
+ module HTML4 # :nodoc:
3
5
  #
4
- # Subclass of Nokogiri::HTML::Document.
6
+ # Subclass of Nokogiri::HTML4::Document.
5
7
  #
6
8
  # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
7
9
  #
8
- class Document < Nokogiri::HTML::Document
10
+ class Document < Nokogiri::HTML4::Document
9
11
  include Loofah::ScrubBehavior::Node
10
12
  include Loofah::DocumentDecorator
11
13
  include Loofah::TextBehavior
12
-
13
- def serialize_root
14
- at_xpath("/html/body")
15
- end
14
+ include Loofah::HtmlDocumentBehavior
16
15
  end
17
16
  end
18
17
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML4 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML4::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::Document.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class Document < Nokogiri::HTML5::Document
11
+ include Loofah::ScrubBehavior::Node
12
+ include Loofah::DocumentDecorator
13
+ include Loofah::TextBehavior
14
+ include Loofah::HtmlDocumentBehavior
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML5::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,7 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
3
5
 
4
6
  module Loofah
5
7
  #
@@ -15,12 +17,12 @@ module Loofah
15
17
  #
16
18
  # see comments about CVE-2018-8048 within the tests for more information
17
19
  #
18
- BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ BROKEN_ESCAPING_ATTRIBUTES = Set.new([
21
+ "href",
22
+ "action",
23
+ "src",
24
+ "name",
25
+ ])
26
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
27
  end
26
28
  end