loofah 2.3.0 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,13 +1,13 @@
1
- require 'rubygems'
2
- require 'open-uri'
3
- require 'hpricot'
1
+ require "rubygems"
2
+ require "open-uri"
3
+ require "hpricot"
4
4
  require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
5
- require 'benchmark'
5
+ require "benchmark"
6
6
  require "action_view"
7
7
  require "action_controller/vendor/html-scanner"
8
8
  require "sanitize"
9
- require 'hitimes'
10
- require 'htmlfilter'
9
+ require "hitimes"
10
+ require "htmlfilter"
11
11
 
12
12
  unless defined?(HTMLFilter)
13
13
  HTMLFilter = HtmlFilter
@@ -19,20 +19,20 @@ class RailsSanitize
19
19
  end
20
20
 
21
21
  class HTML5libSanitize
22
- require 'html5/html5parser'
23
- require 'html5/liberalxmlparser'
24
- require 'html5/treewalkers'
25
- require 'html5/treebuilders'
26
- require 'html5/serializer'
27
- require 'html5/sanitizer'
22
+ require "html5/html5parser"
23
+ require "html5/liberalxmlparser"
24
+ require "html5/treewalkers"
25
+ require "html5/treebuilders"
26
+ require "html5/serializer"
27
+ require "html5/sanitizer"
28
28
 
29
29
  include HTML5
30
30
 
31
31
  def sanitize(html)
32
32
  HTMLParser.parse_fragment(html, {
33
- :tokenizer => HTMLSanitizer,
34
- :encoding => 'utf-8',
35
- :tree => TreeBuilders::REXML::TreeBuilder
33
+ :tokenizer => HTMLSanitizer,
34
+ :encoding => "utf-8",
35
+ :tree => TreeBuilders::REXML::TreeBuilder,
36
36
  }).to_s
37
37
  end
38
38
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
3
 
3
4
  require "nokogiri"
@@ -28,13 +29,13 @@ require "loofah/html/document_fragment"
28
29
  #
29
30
  module Loofah
30
31
  # The version of Loofah you are using
31
- VERSION = "2.3.0"
32
+ VERSION = "2.7.0"
32
33
 
33
34
  class << self
34
35
  # Shortcut for Loofah::HTML::Document.parse
35
36
  # This method accepts the same parameters as Nokogiri::HTML::Document.parse
36
37
  def document(*args, &block)
37
- Loofah::HTML::Document.parse(*args, &block)
38
+ remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
38
39
  end
39
40
 
40
41
  # Shortcut for Loofah::HTML::DocumentFragment.parse
@@ -79,5 +80,23 @@ module Loofah
79
80
  def remove_extraneous_whitespace(string)
80
81
  string.gsub(/\n\s*\n\s*\n/, "\n\n")
81
82
  end
83
+
84
+ private
85
+
86
+ # remove comments that exist outside of the HTML element.
87
+ #
88
+ # these comments are allowed by the HTML spec:
89
+ #
90
+ # https://www.w3.org/TR/html401/struct/global.html#h-7.1
91
+ #
92
+ # but are not scrubbed by Loofah because these nodes don't meet
93
+ # the contract that scrubbers expect of a node (e.g., it can be
94
+ # replaced, sibling and children nodes can be created).
95
+ def remove_comments_before_html_element(doc)
96
+ doc.children.each do |child|
97
+ child.unlink if child.comment?
98
+ end
99
+ doc
100
+ end
82
101
  end
83
102
  end
@@ -1,89 +1,90 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+ require "set"
2
3
 
3
4
  module Loofah
4
5
  module Elements
5
6
  STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
6
- address
7
- blockquote
8
- center
9
- dir
10
- div
11
- dl
12
- fieldset
13
- form
14
- h1
15
- h2
16
- h3
17
- h4
18
- h5
19
- h6
20
- hr
21
- isindex
22
- menu
23
- noframes
24
- noscript
25
- ol
26
- p
27
- pre
28
- table
29
- ul
30
- ]
7
+ address
8
+ blockquote
9
+ center
10
+ dir
11
+ div
12
+ dl
13
+ fieldset
14
+ form
15
+ h1
16
+ h2
17
+ h3
18
+ h4
19
+ h5
20
+ h6
21
+ hr
22
+ isindex
23
+ menu
24
+ noframes
25
+ noscript
26
+ ol
27
+ p
28
+ pre
29
+ table
30
+ ul
31
+ ]
31
32
 
32
33
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
33
34
  STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
34
- address
35
- article
36
- aside
37
- blockquote
38
- canvas
39
- dd
40
- div
41
- dl
42
- dt
43
- fieldset
44
- figcaption
45
- figure
46
- footer
47
- form
48
- h1
49
- h2
50
- h3
51
- h4
52
- h5
53
- h6
54
- header
55
- hgroup
56
- hr
57
- li
58
- main
59
- nav
60
- noscript
61
- ol
62
- output
63
- p
64
- pre
65
- section
66
- table
67
- tfoot
68
- ul
69
- video
70
- ]
35
+ address
36
+ article
37
+ aside
38
+ blockquote
39
+ canvas
40
+ dd
41
+ div
42
+ dl
43
+ dt
44
+ fieldset
45
+ figcaption
46
+ figure
47
+ footer
48
+ form
49
+ h1
50
+ h2
51
+ h3
52
+ h4
53
+ h5
54
+ h6
55
+ header
56
+ hgroup
57
+ hr
58
+ li
59
+ main
60
+ nav
61
+ noscript
62
+ ol
63
+ output
64
+ p
65
+ pre
66
+ section
67
+ table
68
+ tfoot
69
+ ul
70
+ video
71
+ ]
71
72
 
72
73
  STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
73
74
 
74
75
  # The following elements may also be considered block-level
75
76
  # elements since they may contain block-level elements
76
77
  LOOSE_BLOCK_LEVEL = Set.new %w[dd
77
- dt
78
- frameset
79
- li
80
- tbody
81
- td
82
- tfoot
83
- th
84
- thead
85
- tr
86
- ]
78
+ dt
79
+ frameset
80
+ li
81
+ tbody
82
+ td
83
+ tfoot
84
+ th
85
+ thead
86
+ tr
87
+ ]
87
88
 
88
89
  BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
89
90
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module Helpers
3
4
  class << self
@@ -27,7 +28,7 @@ module Loofah
27
28
  #
28
29
  # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
29
30
  #
30
- def sanitize_css style_string
31
+ def sanitize_css(style_string)
31
32
  ::Loofah::HTML5::Scrub.scrub_css style_string
32
33
  end
33
34
 
@@ -68,7 +69,7 @@ module Loofah
68
69
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
69
70
  #
70
71
  class FullSanitizer
71
- def sanitize html, *args
72
+ def sanitize(html, *args)
72
73
  Loofah::Helpers.strip_tags html
73
74
  end
74
75
  end
@@ -85,11 +86,11 @@ module Loofah
85
86
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
86
87
  #
87
88
  class SafeListSanitizer
88
- def sanitize html, *args
89
+ def sanitize(html, *args)
89
90
  Loofah::Helpers.sanitize html
90
91
  end
91
92
 
92
- def sanitize_css style_string, *args
93
+ def sanitize_css(style_string, *args)
93
94
  Loofah::Helpers.sanitize_css style_string
94
95
  end
95
96
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -14,10 +15,10 @@ module Loofah
14
15
  # constructor. Applications should use Loofah.fragment to
15
16
  # parse a fragment.
16
17
  #
17
- def parse tags, encoding = nil
18
+ def parse(tags, encoding = nil)
18
19
  doc = Loofah::HTML::Document.new
19
20
 
20
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
21
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
21
22
  doc.encoding = encoding
22
23
 
23
24
  new(doc, tags)
@@ -30,6 +31,7 @@ module Loofah
30
31
  def to_s
31
32
  serialize_root.children.to_s
32
33
  end
34
+
33
35
  alias :serialize :to_s
34
36
 
35
37
  def serialize_root
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+ require "set"
3
4
 
4
5
  module Loofah
5
6
  #
@@ -16,11 +17,11 @@ module Loofah
16
17
  # see comments about CVE-2018-8048 within the tests for more information
17
18
  #
18
19
  BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ href
21
+ action
22
+ src
23
+ name
24
+ ]
25
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
26
  end
26
27
  end
@@ -1,4 +1,5 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+ require "set"
2
3
 
3
4
  module Loofah
4
5
  module HTML5 # :nodoc:
@@ -45,7 +46,6 @@ module Loofah
45
46
  #
46
47
  # </html5_license>
47
48
  module SafeList
48
-
49
49
  ACCEPTABLE_ELEMENTS = Set.new([
50
50
  "a",
51
51
  "abbr",
@@ -361,7 +361,6 @@ module Loofah
361
361
  "baseProfile",
362
362
  "bbox",
363
363
  "begin",
364
- "by",
365
364
  "calcMode",
366
365
  "cap-height",
367
366
  "class",
@@ -468,7 +467,6 @@ module Loofah
468
467
  "systemLanguage",
469
468
  "target",
470
469
  "text-anchor",
471
- "to",
472
470
  "transform",
473
471
  "type",
474
472
  "u1",
@@ -478,7 +476,6 @@ module Loofah
478
476
  "unicode",
479
477
  "unicode-range",
480
478
  "units-per-em",
481
- "values",
482
479
  "version",
483
480
  "viewBox",
484
481
  "visibility",
@@ -577,7 +574,11 @@ module Loofah
577
574
  "line-height",
578
575
  "list-style",
579
576
  "list-style-type",
577
+ "max-width",
580
578
  "overflow",
579
+ "page-break-after",
580
+ "page-break-before",
581
+ "page-break-inside",
581
582
  "pause",
582
583
  "pause-after",
583
584
  "pause-before",
@@ -616,9 +617,13 @@ module Loofah
616
617
  "collapse",
617
618
  "dashed",
618
619
  "dotted",
620
+ "double",
619
621
  "fuchsia",
620
622
  "gray",
621
623
  "green",
624
+ "groove",
625
+ "hidden",
626
+ "inset",
622
627
  "italic",
623
628
  "left",
624
629
  "lime",
@@ -629,9 +634,11 @@ module Loofah
629
634
  "normal",
630
635
  "nowrap",
631
636
  "olive",
637
+ "outset",
632
638
  "pointer",
633
639
  "purple",
634
640
  "red",
641
+ "ridge",
635
642
  "right",
636
643
  "silver",
637
644
  "solid",
@@ -1,22 +1,22 @@
1
- require 'cgi'
2
- require 'crass'
1
+ # frozen_string_literal: true
2
+ require "cgi"
3
+ require "crass"
3
4
 
4
5
  module Loofah
5
6
  module HTML5 # :nodoc:
6
7
  module Scrub
7
-
8
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
10
- CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" }
11
+ CSS_IMPORTANT = '!important'
11
12
 
12
13
  class << self
13
-
14
- def allowed_element? element_name
14
+ def allowed_element?(element_name)
15
15
  ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
16
16
  end
17
17
 
18
18
  # alternative implementation of the html5lib attribute scrubbing algorithm
19
- def scrub_attributes node
19
+ def scrub_attributes(node)
20
20
  node.attribute_nodes.each do |attr_node|
21
21
  attr_name = if attr_node.namespace
22
22
  "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
@@ -35,14 +35,14 @@ module Loofah
35
35
 
36
36
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
37
37
  # this block lifted nearly verbatim from HTML5 sanitization
38
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
39
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
38
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
39
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
40
40
  attr_node.remove
41
41
  next
42
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
42
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
43
43
  # permit only allowed data mediatypes
44
44
  mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
45
- mediatype, _ = mediatype.split(';')[0..1] if mediatype
45
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
46
46
  if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
47
47
  attr_node.remove
48
48
  next
@@ -50,9 +50,9 @@ module Loofah
50
50
  end
51
51
  end
52
52
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
53
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
53
+ attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
54
54
  end
55
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
55
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
56
56
  attr_node.remove
57
57
  next
58
58
  end
@@ -67,12 +67,12 @@ module Loofah
67
67
  force_correct_attribute_escaping! node
68
68
  end
69
69
 
70
- def scrub_css_attribute node
71
- style = node.attributes['style']
70
+ def scrub_css_attribute(node)
71
+ style = node.attributes["style"]
72
72
  style.value = scrub_css(style.value) if style
73
73
  end
74
74
 
75
- def scrub_css style
75
+ def scrub_css(style)
76
76
  style_tree = Crass.parse_properties style
77
77
  sanitized_tree = []
78
78
 
@@ -84,13 +84,14 @@ module Loofah
84
84
  name = node[:name].downcase
85
85
  if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
86
86
  sanitized_tree << node << CRASS_SEMICOLON
87
- elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
87
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
88
88
  value = node[:value].split.map do |keyword|
89
89
  if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
90
  keyword
91
91
  end
92
92
  end.compact
93
93
  unless value.empty?
94
+ value << CSS_IMPORTANT if node[:important]
94
95
  propstring = sprintf "%s:%s", name, value.join(" ")
95
96
  sanitized_node = Crass.parse_properties(propstring).first
96
97
  sanitized_tree << sanitized_node << CRASS_SEMICOLON
@@ -106,7 +107,7 @@ module Loofah
106
107
  #
107
108
  # see comments about CVE-2018-8048 within the tests for more information
108
109
  #
109
- def force_correct_attribute_escaping! node
110
+ def force_correct_attribute_escaping!(node)
110
111
  return unless Nokogiri::VersionInfo.instance.libxml2?
111
112
 
112
113
  node.attribute_nodes.each do |attr_node|
@@ -122,11 +123,10 @@ module Loofah
122
123
  #
123
124
  encoding = attr_node.value.encoding
124
125
  attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
125
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
126
+ "%" + m.unpack("H2" * m.bytesize).join("%").upcase
126
127
  end.force_encoding(encoding)
127
128
  end
128
129
  end
129
-
130
130
  end
131
131
  end
132
132
  end