loofah 0.4.2 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +604 -0
  3. data/MIT-LICENSE.txt +3 -1
  4. data/README.md +410 -0
  5. data/SECURITY.md +18 -0
  6. data/lib/loofah/concerns.rb +207 -0
  7. data/lib/loofah/elements.rb +98 -0
  8. data/lib/loofah/helpers.rb +91 -4
  9. data/lib/loofah/html4/document.rb +17 -0
  10. data/lib/loofah/html4/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/document.rb +17 -0
  12. data/lib/loofah/html5/document_fragment.rb +15 -0
  13. data/lib/loofah/html5/libxml2_workarounds.rb +28 -0
  14. data/lib/loofah/html5/safelist.rb +1058 -0
  15. data/lib/loofah/html5/scrub.rb +211 -40
  16. data/lib/loofah/metahelpers.rb +18 -0
  17. data/lib/loofah/scrubber.rb +31 -13
  18. data/lib/loofah/scrubbers.rb +262 -31
  19. data/lib/loofah/version.rb +6 -0
  20. data/lib/loofah/xml/document.rb +2 -0
  21. data/lib/loofah/xml/document_fragment.rb +6 -9
  22. data/lib/loofah.rb +131 -52
  23. metadata +79 -158
  24. data/CHANGELOG.rdoc +0 -92
  25. data/DEPRECATED.rdoc +0 -12
  26. data/Manifest.txt +0 -34
  27. data/README.rdoc +0 -330
  28. data/Rakefile +0 -61
  29. data/TODO.rdoc +0 -4
  30. data/benchmark/benchmark.rb +0 -149
  31. data/benchmark/fragment.html +0 -96
  32. data/benchmark/helper.rb +0 -73
  33. data/benchmark/www.slashdot.com.html +0 -2560
  34. data/init.rb +0 -1
  35. data/lib/loofah/active_record.rb +0 -62
  36. data/lib/loofah/html/document.rb +0 -22
  37. data/lib/loofah/html/document_fragment.rb +0 -46
  38. data/lib/loofah/html5/whitelist.rb +0 -174
  39. data/lib/loofah/instance_methods.rb +0 -77
  40. data/lib/loofah/xss_foliate.rb +0 -212
  41. data/test/helper.rb +0 -8
  42. data/test/html5/test_sanitizer.rb +0 -248
  43. data/test/test_active_record.rb +0 -146
  44. data/test/test_ad_hoc.rb +0 -272
  45. data/test/test_api.rb +0 -128
  46. data/test/test_helpers.rb +0 -28
  47. data/test/test_scrubber.rb +0 -227
  48. data/test/test_scrubbers.rb +0 -144
  49. data/test/test_xss_foliate.rb +0 -171
  50. data.tar.gz.sig +0 -0
  51. metadata.gz.sig +0 -2
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Loofah
6
+ module Elements
7
+ STRICT_BLOCK_LEVEL_HTML4 = Set.new([
8
+ "address",
9
+ "blockquote",
10
+ "center",
11
+ "dir",
12
+ "div",
13
+ "dl",
14
+ "fieldset",
15
+ "form",
16
+ "h1",
17
+ "h2",
18
+ "h3",
19
+ "h4",
20
+ "h5",
21
+ "h6",
22
+ "hr",
23
+ "isindex",
24
+ "menu",
25
+ "noframes",
26
+ "noscript",
27
+ "ol",
28
+ "p",
29
+ "pre",
30
+ "table",
31
+ "ul",
32
+ ])
33
+
34
+ # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
35
+ STRICT_BLOCK_LEVEL_HTML5 = Set.new([
36
+ "address",
37
+ "article",
38
+ "aside",
39
+ "blockquote",
40
+ "canvas",
41
+ "dd",
42
+ "div",
43
+ "dl",
44
+ "dt",
45
+ "fieldset",
46
+ "figcaption",
47
+ "figure",
48
+ "footer",
49
+ "form",
50
+ "h1",
51
+ "h2",
52
+ "h3",
53
+ "h4",
54
+ "h5",
55
+ "h6",
56
+ "header",
57
+ "hgroup",
58
+ "hr",
59
+ "li",
60
+ "main",
61
+ "nav",
62
+ "noscript",
63
+ "ol",
64
+ "output",
65
+ "p",
66
+ "pre",
67
+ "section",
68
+ "table",
69
+ "tfoot",
70
+ "ul",
71
+ "video",
72
+ ])
73
+
74
+ # The following elements may also be considered block-level
75
+ # elements since they may contain block-level elements
76
+ LOOSE_BLOCK_LEVEL = Set.new([
77
+ "dd",
78
+ "dt",
79
+ "frameset",
80
+ "li",
81
+ "tbody",
82
+ "td",
83
+ "tfoot",
84
+ "th",
85
+ "thead",
86
+ "tr",
87
+ ])
88
+
89
+ # Elements that aren't block but should generate a newline in #to_text
90
+ INLINE_LINE_BREAK = Set.new(["br"])
91
+
92
+ STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
93
+ BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
94
+ LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
95
+ end
96
+
97
+ ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
98
+ end
@@ -1,22 +1,109 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loofah
2
4
  module Helpers
3
5
  class << self
4
6
  #
5
7
  # A replacement for Rails's built-in +strip_tags+ helper.
6
8
  #
7
- # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
9
+ # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
8
10
  #
9
11
  def strip_tags(string_or_io)
10
- Loofah.fragment(string_or_io).text
12
+ Loofah.html4_fragment(string_or_io).text
11
13
  end
12
14
 
13
15
  #
14
16
  # A replacement for Rails's built-in +sanitize+ helper.
15
17
  #
16
- # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
18
+ # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
19
+ # # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
17
20
  #
18
21
  def sanitize(string_or_io)
19
- Loofah.scrub_fragment(string_or_io, :strip).to_s
22
+ loofah_fragment = Loofah.html4_fragment(string_or_io)
23
+ loofah_fragment.scrub!(:strip)
24
+ loofah_fragment.xpath("./form").each(&:remove)
25
+ loofah_fragment.to_s
26
+ end
27
+
28
+ #
29
+ # A replacement for Rails's built-in +sanitize_css+ helper.
30
+ #
31
+ # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
32
+ # # => "display: block;"
33
+ #
34
+ def sanitize_css(style_string)
35
+ ::Loofah::HTML5::Scrub.scrub_css(style_string)
36
+ end
37
+
38
+ #
39
+ # A helper to remove extraneous whitespace from text-ified HTML.
40
+ #
41
+ # TODO: remove this in a future major-point-release.
42
+ #
43
+ def remove_extraneous_whitespace(string)
44
+ Loofah.remove_extraneous_whitespace(string)
45
+ end
46
+ end
47
+
48
+ module ActionView
49
+ module ClassMethods # :nodoc:
50
+ def full_sanitizer
51
+ @full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
52
+ end
53
+
54
+ def safe_list_sanitizer
55
+ @safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
56
+ end
57
+
58
+ def white_list_sanitizer
59
+ warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
60
+ safe_list_sanitizer
61
+ end
62
+ end
63
+
64
+ #
65
+ # Replacement class for Rails's HTML::FullSanitizer.
66
+ #
67
+ # To use by default, call this in an application initializer:
68
+ #
69
+ # ActionView::Helpers::SanitizeHelper.full_sanitizer = \
70
+ # Loofah::Helpers::ActionView::FullSanitizer.new
71
+ #
72
+ # Or, to generally opt-in to Loofah's view sanitizers:
73
+ #
74
+ # Loofah::Helpers::ActionView.set_as_default_sanitizer
75
+ #
76
+ class FullSanitizer
77
+ def sanitize(html, *args)
78
+ Loofah::Helpers.strip_tags(html)
79
+ end
80
+ end
81
+
82
+ #
83
+ # Replacement class for Rails's HTML::WhiteListSanitizer.
84
+ #
85
+ # To use by default, call this in an application initializer:
86
+ #
87
+ # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
88
+ # Loofah::Helpers::ActionView::SafeListSanitizer.new
89
+ #
90
+ # Or, to generally opt-in to Loofah's view sanitizers:
91
+ #
92
+ # Loofah::Helpers::ActionView.set_as_default_sanitizer
93
+ #
94
+ class SafeListSanitizer
95
+ def sanitize(html, *args)
96
+ Loofah::Helpers.sanitize(html)
97
+ end
98
+
99
+ def sanitize_css(style_string, *args)
100
+ Loofah::Helpers.sanitize_css(style_string)
101
+ end
102
+ end
103
+
104
+ WhiteListSanitizer = SafeListSanitizer
105
+ if Object.respond_to?(:deprecate_constant)
106
+ deprecate_constant :WhiteListSanitizer
20
107
  end
21
108
  end
22
109
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML4 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML4::Document.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class Document < Nokogiri::HTML4::Document
11
+ include Loofah::ScrubBehavior::Node
12
+ include Loofah::DocumentDecorator
13
+ include Loofah::TextBehavior
14
+ include Loofah::HtmlDocumentBehavior
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML4 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML4::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::Document.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class Document < Nokogiri::HTML5::Document
11
+ include Loofah::ScrubBehavior::Node
12
+ include Loofah::DocumentDecorator
13
+ include Loofah::TextBehavior
14
+ include Loofah::HtmlDocumentBehavior
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loofah
4
+ module HTML5 # :nodoc:
5
+ #
6
+ # Subclass of Nokogiri::HTML5::DocumentFragment.
7
+ #
8
+ # See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
9
+ #
10
+ class DocumentFragment < Nokogiri::HTML5::DocumentFragment
11
+ include Loofah::TextBehavior
12
+ include Loofah::HtmlFragmentBehavior
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
5
+
6
+ module Loofah
7
+ #
8
+ # constants related to working around unhelpful libxml2 behavior
9
+ #
10
+ # ಠ_ಠ
11
+ #
12
+ module LibxmlWorkarounds
13
+ #
14
+ # these attributes and qualifying parent tags are determined by the code at:
15
+ #
16
+ # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
17
+ #
18
+ # see comments about CVE-2018-8048 within the tests for more information
19
+ #
20
+ BROKEN_ESCAPING_ATTRIBUTES = Set.new([
21
+ "href",
22
+ "action",
23
+ "src",
24
+ "name",
25
+ ])
26
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
27
+ end
28
+ end