loofah 2.2.3 → 2.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +269 -31
  3. data/README.md +109 -124
  4. data/lib/loofah/concerns.rb +207 -0
  5. data/lib/loofah/elements.rb +85 -79
  6. data/lib/loofah/helpers.rb +37 -20
  7. data/lib/loofah/{html → html4}/document.rb +6 -7
  8. data/lib/loofah/html4/document_fragment.rb +15 -0
  9. data/lib/loofah/html5/document.rb +17 -0
  10. data/lib/loofah/html5/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
  12. data/lib/loofah/html5/safelist.rb +1055 -0
  13. data/lib/loofah/html5/scrub.rb +153 -58
  14. data/lib/loofah/metahelpers.rb +11 -6
  15. data/lib/loofah/scrubber.rb +22 -15
  16. data/lib/loofah/scrubbers.rb +66 -55
  17. data/lib/loofah/version.rb +6 -0
  18. data/lib/loofah/xml/document.rb +2 -0
  19. data/lib/loofah/xml/document_fragment.rb +4 -7
  20. data/lib/loofah.rb +131 -38
  21. metadata +28 -216
  22. data/.gemtest +0 -0
  23. data/Gemfile +0 -22
  24. data/Manifest.txt +0 -40
  25. data/Rakefile +0 -79
  26. data/benchmark/benchmark.rb +0 -149
  27. data/benchmark/fragment.html +0 -96
  28. data/benchmark/helper.rb +0 -73
  29. data/benchmark/www.slashdot.com.html +0 -2560
  30. data/lib/loofah/html/document_fragment.rb +0 -40
  31. data/lib/loofah/html5/whitelist.rb +0 -186
  32. data/lib/loofah/instance_methods.rb +0 -127
  33. data/test/assets/msword.html +0 -63
  34. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  35. data/test/helper.rb +0 -18
  36. data/test/html5/test_sanitizer.rb +0 -382
  37. data/test/integration/test_ad_hoc.rb +0 -204
  38. data/test/integration/test_helpers.rb +0 -43
  39. data/test/integration/test_html.rb +0 -72
  40. data/test/integration/test_scrubbers.rb +0 -400
  41. data/test/integration/test_xml.rb +0 -55
  42. data/test/unit/test_api.rb +0 -142
  43. data/test/unit/test_encoding.rb +0 -20
  44. data/test/unit/test_helpers.rb +0 -62
  45. data/test/unit/test_scrubber.rb +0 -229
  46. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,92 +1,98 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
2
4
 
3
5
  module Loofah
4
6
  module Elements
5
- STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
6
- address
7
- blockquote
8
- center
9
- dir
10
- div
11
- dl
12
- fieldset
13
- form
14
- h1
15
- h2
16
- h3
17
- h4
18
- h5
19
- h6
20
- hr
21
- isindex
22
- menu
23
- noframes
24
- noscript
25
- ol
26
- p
27
- pre
28
- table
29
- ul
30
- ]
7
+ STRICT_BLOCK_LEVEL_HTML4 = Set.new([
8
+ "address",
9
+ "blockquote",
10
+ "center",
11
+ "dir",
12
+ "div",
13
+ "dl",
14
+ "fieldset",
15
+ "form",
16
+ "h1",
17
+ "h2",
18
+ "h3",
19
+ "h4",
20
+ "h5",
21
+ "h6",
22
+ "hr",
23
+ "isindex",
24
+ "menu",
25
+ "noframes",
26
+ "noscript",
27
+ "ol",
28
+ "p",
29
+ "pre",
30
+ "table",
31
+ "ul",
32
+ ])
31
33
 
32
34
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
33
- STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
34
- address
35
- article
36
- aside
37
- blockquote
38
- canvas
39
- dd
40
- div
41
- dl
42
- dt
43
- fieldset
44
- figcaption
45
- figure
46
- footer
47
- form
48
- h1
49
- h2
50
- h3
51
- h4
52
- h5
53
- h6
54
- header
55
- hgroup
56
- hr
57
- li
58
- main
59
- nav
60
- noscript
61
- ol
62
- output
63
- p
64
- pre
65
- section
66
- table
67
- tfoot
68
- ul
69
- video
70
- ]
71
-
72
- STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
35
+ STRICT_BLOCK_LEVEL_HTML5 = Set.new([
36
+ "address",
37
+ "article",
38
+ "aside",
39
+ "blockquote",
40
+ "canvas",
41
+ "dd",
42
+ "div",
43
+ "dl",
44
+ "dt",
45
+ "fieldset",
46
+ "figcaption",
47
+ "figure",
48
+ "footer",
49
+ "form",
50
+ "h1",
51
+ "h2",
52
+ "h3",
53
+ "h4",
54
+ "h5",
55
+ "h6",
56
+ "header",
57
+ "hgroup",
58
+ "hr",
59
+ "li",
60
+ "main",
61
+ "nav",
62
+ "noscript",
63
+ "ol",
64
+ "output",
65
+ "p",
66
+ "pre",
67
+ "section",
68
+ "table",
69
+ "tfoot",
70
+ "ul",
71
+ "video",
72
+ ])
73
73
 
74
74
  # The following elements may also be considered block-level
75
75
  # elements since they may contain block-level elements
76
- LOOSE_BLOCK_LEVEL = Set.new %w[dd
77
- dt
78
- frameset
79
- li
80
- tbody
81
- td
82
- tfoot
83
- th
84
- thead
85
- tr
86
- ]
76
+ LOOSE_BLOCK_LEVEL = Set.new([
77
+ "dd",
78
+ "dt",
79
+ "frameset",
80
+ "li",
81
+ "tbody",
82
+ "td",
83
+ "tfoot",
84
+ "th",
85
+ "thead",
86
+ "tr",
87
+ ])
87
88
 
89
+ # Elements that aren't block but should generate a newline in #to_text
90
+ INLINE_LINE_BREAK = Set.new(["br"])
91
+
92
+ STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
88
93
  BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
94
+ LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
89
95
  end
90
96
 
91
- ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
97
+ ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
92
98
  end
@@ -1,42 +1,47 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module Helpers
3
5
  class << self
4
6
  #
5
7
  # A replacement for Rails's built-in +strip_tags+ helper.
6
8
  #
7
- # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
9
+ # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
8
10
  #
9
11
  def strip_tags(string_or_io)
10
- Loofah.fragment(string_or_io).text
12
+ Loofah.html4_fragment(string_or_io).text
11
13
  end
12
14
 
13
15
  #
14
16
  # A replacement for Rails's built-in +sanitize+ helper.
15
17
  #
16
- # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
18
+ # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
19
+ # # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
17
20
  #
18
21
  def sanitize(string_or_io)
19
- loofah_fragment = Loofah.fragment(string_or_io)
22
+ loofah_fragment = Loofah.html4_fragment(string_or_io)
20
23
  loofah_fragment.scrub!(:strip)
21
- loofah_fragment.xpath("./form").each { |form| form.remove }
24
+ loofah_fragment.xpath("./form").each(&:remove)
22
25
  loofah_fragment.to_s
23
26
  end
24
27
 
25
28
  #
26
29
  # A replacement for Rails's built-in +sanitize_css+ helper.
27
30
  #
28
- # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
31
+ # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
32
+ # # => "display: block;"
29
33
  #
30
- def sanitize_css style_string
31
- ::Loofah::HTML5::Scrub.scrub_css style_string
34
+ def sanitize_css(style_string)
35
+ ::Loofah::HTML5::Scrub.scrub_css(style_string)
32
36
  end
33
37
 
34
38
  #
35
- # A helper to remove extraneous whitespace from text-ified HTML
39
+ # A helper to remove extraneous whitespace from text-ified HTML.
40
+ #
36
41
  # TODO: remove this in a future major-point-release.
37
42
  #
38
43
  def remove_extraneous_whitespace(string)
39
- Loofah.remove_extraneous_whitespace string
44
+ Loofah.remove_extraneous_whitespace(string)
40
45
  end
41
46
  end
42
47
 
@@ -46,8 +51,13 @@ module Loofah
46
51
  @full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
47
52
  end
48
53
 
54
+ def safe_list_sanitizer
55
+ @safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
56
+ end
57
+
49
58
  def white_list_sanitizer
50
- @white_list_sanitizer ||= ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
59
+ warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
60
+ safe_list_sanitizer
51
61
  end
52
62
  end
53
63
 
@@ -56,15 +66,16 @@ module Loofah
56
66
  #
57
67
  # To use by default, call this in an application initializer:
58
68
  #
59
- # ActionView::Helpers::SanitizeHelper.full_sanitizer = ::Loofah::Helpers::ActionView::FullSanitizer.new
69
+ # ActionView::Helpers::SanitizeHelper.full_sanitizer = \
70
+ # Loofah::Helpers::ActionView::FullSanitizer.new
60
71
  #
61
72
  # Or, to generally opt-in to Loofah's view sanitizers:
62
73
  #
63
74
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
64
75
  #
65
76
  class FullSanitizer
66
- def sanitize html, *args
67
- Loofah::Helpers.strip_tags html
77
+ def sanitize(html, *args)
78
+ Loofah::Helpers.strip_tags(html)
68
79
  end
69
80
  end
70
81
 
@@ -73,21 +84,27 @@ module Loofah
73
84
  #
74
85
  # To use by default, call this in an application initializer:
75
86
  #
76
- # ActionView::Helpers::SanitizeHelper.white_list_sanitizer = ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
87
+ # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
88
+ # Loofah::Helpers::ActionView::SafeListSanitizer.new
77
89
  #
78
90
  # Or, to generally opt-in to Loofah's view sanitizers:
79
91
  #
80
92
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
81
93
  #
82
- class WhiteListSanitizer
83
- def sanitize html, *args
84
- Loofah::Helpers.sanitize html
94
+ class SafeListSanitizer
95
+ def sanitize(html, *args)
96
+ Loofah::Helpers.sanitize(html)
85
97
  end
86
98
 
87
- def sanitize_css style_string, *args
88
- Loofah::Helpers.sanitize_css style_string
99
+ def sanitize_css(style_string, *args)
100
+ Loofah::Helpers.sanitize_css(style_string)
89
101
  end
90
102
  end
103
+
104
+ WhiteListSanitizer = SafeListSanitizer
105
+ if Object.respond_to?(:deprecate_constant)
106
+ deprecate_constant :WhiteListSanitizer
107
+ end
91
108
  end
92
109
  end
93
110
  end
@@ -1,18 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
- module HTML # :nodoc:
4
+ module HTML4 # :nodoc:
3
5
  #
4
- # Subclass of Nokogiri::HTML::Document.
6
+ # Subclass of Nokogiri::HTML4::Document.
5
7
  #
6
8
  # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
7
9
  #
8
- class Document < Nokogiri::HTML::Document
10
+ class Document < Nokogiri::HTML4::Document
9
11
  include Loofah::ScrubBehavior::Node
10
12
  include Loofah::DocumentDecorator
11
13
  include Loofah::TextBehavior
12
-
13
- def serialize_root
14
- at_xpath("/html/body")
15
- end
14
+ include Loofah::HtmlDocumentBehavior
16
15
  end
17
16
  end
18
17
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML4 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML4::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::Document.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class Document < Nokogiri::HTML5::Document
11
+ include Loofah::ScrubBehavior::Node
12
+ include Loofah::DocumentDecorator
13
+ include Loofah::TextBehavior
14
+ include Loofah::HtmlDocumentBehavior
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML5::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,7 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
3
5
 
4
6
  module Loofah
5
7
  #
@@ -15,12 +17,12 @@ module Loofah
15
17
  #
16
18
  # see comments about CVE-2018-8048 within the tests for more information
17
19
  #
18
- BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ BROKEN_ESCAPING_ATTRIBUTES = Set.new([
21
+ "href",
22
+ "action",
23
+ "src",
24
+ "name",
25
+ ])
26
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
27
  end
26
28
  end