loofah 2.2.3 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -23,7 +23,7 @@ def compare_scrub_methods
23
23
  end
24
24
 
25
25
  module TestSet
26
- def test_set options={}
26
+ def test_set(options = {})
27
27
  scale = options[:rehearse] ? 10 : 1
28
28
  puts self.class.name
29
29
 
@@ -49,6 +49,7 @@ end
49
49
 
50
50
  class HeadToHeadRailsSanitize < Measure
51
51
  include TestSet
52
+
52
53
  def bench(content, ntimes, fragment_p)
53
54
  clear_measure
54
55
 
@@ -65,6 +66,7 @@ end
65
66
 
66
67
  class HeadToHeadRailsStripTags < Measure
67
68
  include TestSet
69
+
68
70
  def bench(content, ntimes, fragment_p)
69
71
  clear_measure
70
72
 
@@ -81,6 +83,7 @@ end
81
83
 
82
84
  class HeadToHeadSanitizerSanitize < Measure
83
85
  include TestSet
86
+
84
87
  def bench(content, ntimes, fragment_p)
85
88
  clear_measure
86
89
 
@@ -100,6 +103,7 @@ end
100
103
 
101
104
  class HeadToHeadHtml5LibSanitize < Measure
102
105
  include TestSet
106
+
103
107
  def bench(content, ntimes, fragment_p)
104
108
  clear_measure
105
109
 
@@ -120,6 +124,7 @@ end
120
124
 
121
125
  class HeadToHeadHTMLFilter < Measure
122
126
  include TestSet
127
+
123
128
  def bench(content, ntimes, fragment_p)
124
129
  clear_measure
125
130
 
@@ -1,13 +1,13 @@
1
- require 'rubygems'
2
- require 'open-uri'
3
- require 'hpricot'
1
+ require "rubygems"
2
+ require "open-uri"
3
+ require "hpricot"
4
4
  require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
5
- require 'benchmark'
5
+ require "benchmark"
6
6
  require "action_view"
7
7
  require "action_controller/vendor/html-scanner"
8
8
  require "sanitize"
9
- require 'hitimes'
10
- require 'htmlfilter'
9
+ require "hitimes"
10
+ require "htmlfilter"
11
11
 
12
12
  unless defined?(HTMLFilter)
13
13
  HTMLFilter = HtmlFilter
@@ -19,20 +19,20 @@ class RailsSanitize
19
19
  end
20
20
 
21
21
  class HTML5libSanitize
22
- require 'html5/html5parser'
23
- require 'html5/liberalxmlparser'
24
- require 'html5/treewalkers'
25
- require 'html5/treebuilders'
26
- require 'html5/serializer'
27
- require 'html5/sanitizer'
22
+ require "html5/html5parser"
23
+ require "html5/liberalxmlparser"
24
+ require "html5/treewalkers"
25
+ require "html5/treebuilders"
26
+ require "html5/serializer"
27
+ require "html5/sanitizer"
28
28
 
29
29
  include HTML5
30
30
 
31
31
  def sanitize(html)
32
32
  HTMLParser.parse_fragment(html, {
33
- :tokenizer => HTMLSanitizer,
34
- :encoding => 'utf-8',
35
- :tree => TreeBuilders::REXML::TreeBuilder
33
+ :tokenizer => HTMLSanitizer,
34
+ :encoding => "utf-8",
35
+ :tree => TreeBuilders::REXML::TreeBuilder,
36
36
  }).to_s
37
37
  end
38
38
  end
@@ -1,22 +1,23 @@
1
+ # frozen_string_literal: true
1
2
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
3
 
3
- require 'nokogiri'
4
+ require "nokogiri"
4
5
 
5
- require 'loofah/metahelpers'
6
- require 'loofah/elements'
6
+ require "loofah/metahelpers"
7
+ require "loofah/elements"
7
8
 
8
- require 'loofah/html5/whitelist'
9
- require 'loofah/html5/libxml2_workarounds'
10
- require 'loofah/html5/scrub'
9
+ require "loofah/html5/safelist"
10
+ require "loofah/html5/libxml2_workarounds"
11
+ require "loofah/html5/scrub"
11
12
 
12
- require 'loofah/scrubber'
13
- require 'loofah/scrubbers'
13
+ require "loofah/scrubber"
14
+ require "loofah/scrubbers"
14
15
 
15
- require 'loofah/instance_methods'
16
- require 'loofah/xml/document'
17
- require 'loofah/xml/document_fragment'
18
- require 'loofah/html/document'
19
- require 'loofah/html/document_fragment'
16
+ require "loofah/instance_methods"
17
+ require "loofah/xml/document"
18
+ require "loofah/xml/document_fragment"
19
+ require "loofah/html/document"
20
+ require "loofah/html/document_fragment"
20
21
 
21
22
  # == Strings and IO Objects as Input
22
23
  #
@@ -28,13 +29,13 @@ require 'loofah/html/document_fragment'
28
29
  #
29
30
  module Loofah
30
31
  # The version of Loofah you are using
31
- VERSION = '2.2.3'
32
+ VERSION = "2.6.0"
32
33
 
33
34
  class << self
34
35
  # Shortcut for Loofah::HTML::Document.parse
35
36
  # This method accepts the same parameters as Nokogiri::HTML::Document.parse
36
37
  def document(*args, &block)
37
- Loofah::HTML::Document.parse(*args, &block)
38
+ remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
38
39
  end
39
40
 
40
41
  # Shortcut for Loofah::HTML::DocumentFragment.parse
@@ -77,7 +78,25 @@ module Loofah
77
78
 
78
79
  # A helper to remove extraneous whitespace from text-ified HTML
79
80
  def remove_extraneous_whitespace(string)
80
- string.gsub(/\n\s*\n\s*\n/,"\n\n")
81
+ string.gsub(/\n\s*\n\s*\n/, "\n\n")
82
+ end
83
+
84
+ private
85
+
86
+ # remove comments that exist outside of the HTML element.
87
+ #
88
+ # these comments are allowed by the HTML spec:
89
+ #
90
+ # https://www.w3.org/TR/html401/struct/global.html#h-7.1
91
+ #
92
+ # but are not scrubbed by Loofah because these nodes don't meet
93
+ # the contract that scrubbers expect of a node (e.g., it can be
94
+ # replaced, sibling and children nodes can be created).
95
+ def remove_comments_before_html_element(doc)
96
+ doc.children.each do |child|
97
+ child.unlink if child.comment?
98
+ end
99
+ doc
81
100
  end
82
101
  end
83
102
  end
@@ -1,89 +1,90 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+ require "set"
2
3
 
3
4
  module Loofah
4
5
  module Elements
5
6
  STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
6
- address
7
- blockquote
8
- center
9
- dir
10
- div
11
- dl
12
- fieldset
13
- form
14
- h1
15
- h2
16
- h3
17
- h4
18
- h5
19
- h6
20
- hr
21
- isindex
22
- menu
23
- noframes
24
- noscript
25
- ol
26
- p
27
- pre
28
- table
29
- ul
30
- ]
7
+ address
8
+ blockquote
9
+ center
10
+ dir
11
+ div
12
+ dl
13
+ fieldset
14
+ form
15
+ h1
16
+ h2
17
+ h3
18
+ h4
19
+ h5
20
+ h6
21
+ hr
22
+ isindex
23
+ menu
24
+ noframes
25
+ noscript
26
+ ol
27
+ p
28
+ pre
29
+ table
30
+ ul
31
+ ]
31
32
 
32
33
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
33
34
  STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
34
- address
35
- article
36
- aside
37
- blockquote
38
- canvas
39
- dd
40
- div
41
- dl
42
- dt
43
- fieldset
44
- figcaption
45
- figure
46
- footer
47
- form
48
- h1
49
- h2
50
- h3
51
- h4
52
- h5
53
- h6
54
- header
55
- hgroup
56
- hr
57
- li
58
- main
59
- nav
60
- noscript
61
- ol
62
- output
63
- p
64
- pre
65
- section
66
- table
67
- tfoot
68
- ul
69
- video
70
- ]
35
+ address
36
+ article
37
+ aside
38
+ blockquote
39
+ canvas
40
+ dd
41
+ div
42
+ dl
43
+ dt
44
+ fieldset
45
+ figcaption
46
+ figure
47
+ footer
48
+ form
49
+ h1
50
+ h2
51
+ h3
52
+ h4
53
+ h5
54
+ h6
55
+ header
56
+ hgroup
57
+ hr
58
+ li
59
+ main
60
+ nav
61
+ noscript
62
+ ol
63
+ output
64
+ p
65
+ pre
66
+ section
67
+ table
68
+ tfoot
69
+ ul
70
+ video
71
+ ]
71
72
 
72
73
  STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
73
74
 
74
75
  # The following elements may also be considered block-level
75
76
  # elements since they may contain block-level elements
76
77
  LOOSE_BLOCK_LEVEL = Set.new %w[dd
77
- dt
78
- frameset
79
- li
80
- tbody
81
- td
82
- tfoot
83
- th
84
- thead
85
- tr
86
- ]
78
+ dt
79
+ frameset
80
+ li
81
+ tbody
82
+ td
83
+ tfoot
84
+ th
85
+ thead
86
+ tr
87
+ ]
87
88
 
88
89
  BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
89
90
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module Helpers
3
4
  class << self
@@ -27,7 +28,7 @@ module Loofah
27
28
  #
28
29
  # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
29
30
  #
30
- def sanitize_css style_string
31
+ def sanitize_css(style_string)
31
32
  ::Loofah::HTML5::Scrub.scrub_css style_string
32
33
  end
33
34
 
@@ -46,8 +47,13 @@ module Loofah
46
47
  @full_sanitizer ||= ::Loofah::Helpers::ActionView::FullSanitizer.new
47
48
  end
48
49
 
50
+ def safe_list_sanitizer
51
+ @safe_list_sanitizer ||= ::Loofah::Helpers::ActionView::SafeListSanitizer.new
52
+ end
53
+
49
54
  def white_list_sanitizer
50
- @white_list_sanitizer ||= ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
55
+ warn "warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead."
56
+ safe_list_sanitizer
51
57
  end
52
58
  end
53
59
 
@@ -63,7 +69,7 @@ module Loofah
63
69
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
64
70
  #
65
71
  class FullSanitizer
66
- def sanitize html, *args
72
+ def sanitize(html, *args)
67
73
  Loofah::Helpers.strip_tags html
68
74
  end
69
75
  end
@@ -73,21 +79,26 @@ module Loofah
73
79
  #
74
80
  # To use by default, call this in an application initializer:
75
81
  #
76
- # ActionView::Helpers::SanitizeHelper.white_list_sanitizer = ::Loofah::Helpers::ActionView::WhiteListSanitizer.new
82
+ # ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = ::Loofah::Helpers::ActionView::SafeListSanitizer.new
77
83
  #
78
84
  # Or, to generally opt-in to Loofah's view sanitizers:
79
85
  #
80
86
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
81
87
  #
82
- class WhiteListSanitizer
83
- def sanitize html, *args
88
+ class SafeListSanitizer
89
+ def sanitize(html, *args)
84
90
  Loofah::Helpers.sanitize html
85
91
  end
86
92
 
87
- def sanitize_css style_string, *args
93
+ def sanitize_css(style_string, *args)
88
94
  Loofah::Helpers.sanitize_css style_string
89
95
  end
90
96
  end
97
+
98
+ WhiteListSanitizer = SafeListSanitizer
99
+ if Object.respond_to?(:deprecate_constant)
100
+ deprecate_constant :WhiteListSanitizer
101
+ end
91
102
  end
92
103
  end
93
104
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -14,10 +15,10 @@ module Loofah
14
15
  # constructor. Applications should use Loofah.fragment to
15
16
  # parse a fragment.
16
17
  #
17
- def parse tags, encoding = nil
18
+ def parse(tags, encoding = nil)
18
19
  doc = Loofah::HTML::Document.new
19
20
 
20
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
21
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
21
22
  doc.encoding = encoding
22
23
 
23
24
  new(doc, tags)
@@ -30,6 +31,7 @@ module Loofah
30
31
  def to_s
31
32
  serialize_root.children.to_s
32
33
  end
34
+
33
35
  alias :serialize :to_s
34
36
 
35
37
  def serialize_root
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+ require "set"
3
4
 
4
5
  module Loofah
5
6
  #
@@ -16,11 +17,11 @@ module Loofah
16
17
  # see comments about CVE-2018-8048 within the tests for more information
17
18
  #
18
19
  BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ href
21
+ action
22
+ src
23
+ name
24
+ ]
25
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
26
  end
26
27
  end