loofah 2.4.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -28,7 +28,7 @@ module Loofah
28
28
  #
29
29
  # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
30
30
  #
31
- def sanitize_css style_string
31
+ def sanitize_css(style_string)
32
32
  ::Loofah::HTML5::Scrub.scrub_css style_string
33
33
  end
34
34
 
@@ -69,7 +69,7 @@ module Loofah
69
69
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
70
70
  #
71
71
  class FullSanitizer
72
- def sanitize html, *args
72
+ def sanitize(html, *args)
73
73
  Loofah::Helpers.strip_tags html
74
74
  end
75
75
  end
@@ -86,11 +86,11 @@ module Loofah
86
86
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
87
87
  #
88
88
  class SafeListSanitizer
89
- def sanitize html, *args
89
+ def sanitize(html, *args)
90
90
  Loofah::Helpers.sanitize html
91
91
  end
92
92
 
93
- def sanitize_css style_string, *args
93
+ def sanitize_css(style_string, *args)
94
94
  Loofah::Helpers.sanitize_css style_string
95
95
  end
96
96
  end
@@ -15,10 +15,10 @@ module Loofah
15
15
  # constructor. Applications should use Loofah.fragment to
16
16
  # parse a fragment.
17
17
  #
18
- def parse tags, encoding = nil
18
+ def parse(tags, encoding = nil)
19
19
  doc = Loofah::HTML::Document.new
20
20
 
21
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
21
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
22
22
  doc.encoding = encoding
23
23
 
24
24
  new(doc, tags)
@@ -31,6 +31,7 @@ module Loofah
31
31
  def to_s
32
32
  serialize_root.children.to_s
33
33
  end
34
+
34
35
  alias :serialize :to_s
35
36
 
36
37
  def serialize_root
@@ -1,6 +1,6 @@
1
1
  # coding: utf-8
2
2
  # frozen_string_literal: true
3
- require 'set'
3
+ require "set"
4
4
 
5
5
  module Loofah
6
6
  #
@@ -17,11 +17,11 @@ module Loofah
17
17
  # see comments about CVE-2018-8048 within the tests for more information
18
18
  #
19
19
  BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
20
- href
21
- action
22
- src
23
- name
24
- ]
25
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ href
21
+ action
22
+ src
23
+ name
24
+ ]
25
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
26
26
  end
27
27
  end
@@ -549,6 +549,9 @@ module Loofah
549
549
 
550
550
  ACCEPTABLE_CSS_PROPERTIES = Set.new([
551
551
  "azimuth",
552
+ "align-content",
553
+ "align-items",
554
+ "align-self",
552
555
  "background-color",
553
556
  "border-bottom-color",
554
557
  "border-collapse",
@@ -562,6 +565,13 @@ module Loofah
562
565
  "direction",
563
566
  "display",
564
567
  "elevation",
568
+ "flex",
569
+ "flex-basis",
570
+ "flex-direction",
571
+ "flex-flow",
572
+ "flex-grow",
573
+ "flex-shrink",
574
+ "flex-wrap",
565
575
  "float",
566
576
  "font",
567
577
  "font-family",
@@ -570,12 +580,17 @@ module Loofah
570
580
  "font-variant",
571
581
  "font-weight",
572
582
  "height",
583
+ "justify-content",
573
584
  "letter-spacing",
574
585
  "line-height",
575
586
  "list-style",
576
587
  "list-style-type",
577
588
  "max-width",
589
+ "order",
578
590
  "overflow",
591
+ "page-break-after",
592
+ "page-break-before",
593
+ "page-break-inside",
579
594
  "pause",
580
595
  "pause-after",
581
596
  "pause-before",
@@ -614,9 +629,13 @@ module Loofah
614
629
  "collapse",
615
630
  "dashed",
616
631
  "dotted",
632
+ "double",
617
633
  "fuchsia",
618
634
  "gray",
619
635
  "green",
636
+ "groove",
637
+ "hidden",
638
+ "inset",
620
639
  "italic",
621
640
  "left",
622
641
  "lime",
@@ -627,9 +646,11 @@ module Loofah
627
646
  "normal",
628
647
  "nowrap",
629
648
  "olive",
649
+ "outset",
630
650
  "pointer",
631
651
  "purple",
632
652
  "red",
653
+ "ridge",
633
654
  "right",
634
655
  "silver",
635
656
  "solid",
@@ -1,29 +1,28 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi'
3
- require 'crass'
2
+ require "cgi"
3
+ require "crass"
4
4
 
5
5
  module Loofah
6
6
  module HTML5 # :nodoc:
7
7
  module Scrub
8
-
9
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
10
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|r?em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
11
- CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
11
+ CSS_IMPORTANT = '!important'
12
12
 
13
13
  class << self
14
-
15
- def allowed_element? element_name
16
- ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
14
+ def allowed_element?(element_name)
15
+ ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name)
17
16
  end
18
17
 
19
18
  # alternative implementation of the html5lib attribute scrubbing algorithm
20
- def scrub_attributes node
19
+ def scrub_attributes(node)
21
20
  node.attribute_nodes.each do |attr_node|
22
21
  attr_name = if attr_node.namespace
23
- "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
24
- else
25
- attr_node.node_name
26
- end
22
+ "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
23
+ else
24
+ attr_node.node_name
25
+ end
27
26
 
28
27
  if attr_name =~ /\Adata-[\w-]+\z/
29
28
  next
@@ -36,14 +35,14 @@ module Loofah
36
35
 
37
36
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
38
37
  # this block lifted nearly verbatim from HTML5 sanitization
39
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
40
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
38
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
39
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
41
40
  attr_node.remove
42
41
  next
43
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
42
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
44
43
  # permit only allowed data mediatypes
45
44
  mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
46
- mediatype, _ = mediatype.split(';')[0..1] if mediatype
45
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
47
46
  if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
48
47
  attr_node.remove
49
48
  next
@@ -51,55 +50,73 @@ module Loofah
51
50
  end
52
51
  end
53
52
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
54
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
53
+ attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
55
54
  end
56
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
55
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
57
56
  attr_node.remove
58
57
  next
59
58
  end
60
59
  end
61
60
 
62
- scrub_css_attribute node
61
+ scrub_css_attribute(node)
63
62
 
64
63
  node.attribute_nodes.each do |attr_node|
65
64
  node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
66
65
  end
67
66
 
68
- force_correct_attribute_escaping! node
67
+ force_correct_attribute_escaping!(node)
69
68
  end
70
69
 
71
- def scrub_css_attribute node
72
- style = node.attributes['style']
70
+ def scrub_css_attribute(node)
71
+ style = node.attributes["style"]
73
72
  style.value = scrub_css(style.value) if style
74
73
  end
75
74
 
76
- def scrub_css style
77
- style_tree = Crass.parse_properties style
75
+ def scrub_css(style)
76
+ style_tree = Crass.parse_properties(style)
78
77
  sanitized_tree = []
79
78
 
80
79
  style_tree.each do |node|
81
80
  next unless node[:node] == :property
82
81
  next if node[:children].any? do |child|
83
- [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
82
+ [:url, :bad_url].include?(child[:node])
84
83
  end
84
+
85
85
  name = node[:name].downcase
86
- if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
87
- sanitized_tree << node << CRASS_SEMICOLON
88
- elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
89
- value = node[:value].split.map do |keyword|
90
- if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
86
+ next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
87
+ SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
88
+ SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
89
+
90
+ value = node[:children].map do |child|
91
+ case child[:node]
92
+ when :whitespace
93
+ nil
94
+ when :string
95
+ nil
96
+ when :function
97
+ if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
98
+ Crass::Parser.stringify(child)
99
+ end
100
+ when :ident
101
+ keyword = child[:value]
102
+ if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
103
+ SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
104
+ (keyword =~ CSS_KEYWORDISH)
91
105
  keyword
92
106
  end
93
- end.compact
94
- unless value.empty?
95
- propstring = sprintf "%s:%s", name, value.join(" ")
96
- sanitized_node = Crass.parse_properties(propstring).first
97
- sanitized_tree << sanitized_node << CRASS_SEMICOLON
107
+ else
108
+ child[:raw]
98
109
  end
99
- end
110
+ end.compact
111
+
112
+ next if value.empty?
113
+ value << CSS_IMPORTANT if node[:important]
114
+ propstring = format("%s:%s", name, value.join(" "))
115
+ sanitized_node = Crass.parse_properties(propstring).first
116
+ sanitized_tree << sanitized_node << CRASS_SEMICOLON
100
117
  end
101
118
 
102
- Crass::Parser.stringify sanitized_tree
119
+ Crass::Parser.stringify(sanitized_tree)
103
120
  end
104
121
 
105
122
  #
@@ -107,7 +124,7 @@ module Loofah
107
124
  #
108
125
  # see comments about CVE-2018-8048 within the tests for more information
109
126
  #
110
- def force_correct_attribute_escaping! node
127
+ def force_correct_attribute_escaping!(node)
111
128
  return unless Nokogiri::VersionInfo.instance.libxml2?
112
129
 
113
130
  node.attribute_nodes.each do |attr_node|
@@ -123,11 +140,10 @@ module Loofah
123
140
  #
124
141
  encoding = attr_node.value.encoding
125
142
  attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
126
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
143
+ "%" + m.unpack("H2" * m.bytesize).join("%").upcase
127
144
  end.force_encoding(encoding)
128
145
  end
129
146
  end
130
-
131
147
  end
132
148
  end
133
149
  end
@@ -92,7 +92,7 @@ module Loofah
92
92
  # # decidedly not ok for browser:
93
93
  # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
94
94
  #
95
- def text(options={})
95
+ def text(options = {})
96
96
  result = serialize_root.children.inner_text rescue ""
97
97
  if options[:encode_special_chars] == false
98
98
  result # possibly dangerous if rendered in a browser
@@ -100,8 +100,9 @@ module Loofah
100
100
  encode_special_chars result
101
101
  end
102
102
  end
103
+
103
104
  alias :inner_text :text
104
- alias :to_str :text
105
+ alias :to_str :text
105
106
 
106
107
  #
107
108
  # Returns a plain-text version of the markup contained by the
@@ -113,7 +114,7 @@ module Loofah
113
114
  # Loofah.document("<h1>Title</h1><div>Content</div>").to_text
114
115
  # # => "\nTitle\n\nContent\n"
115
116
  #
116
- def to_text(options={})
117
+ def to_text(options = {})
117
118
  Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
118
119
  end
119
120
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Loofah
3
3
  module MetaHelpers # :nodoc:
4
- def self.add_downcased_set_members_to_all_set_constants mojule
4
+ def self.add_downcased_set_members_to_all_set_constants(mojule)
5
5
  mojule.constants.each do |constant_sym|
6
6
  constant = mojule.const_get constant_sym
7
7
  next unless Set === constant
@@ -3,7 +3,7 @@ module Loofah
3
3
  #
4
4
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
5
5
  #
6
- class ScrubberNotFound < RuntimeError ; end
6
+ class ScrubberNotFound < RuntimeError; end
7
7
 
8
8
  #
9
9
  # A Scrubber wraps up a block (or method) that is run on an HTML node (element):
@@ -37,7 +37,7 @@ module Loofah
37
37
  CONTINUE = Object.new.freeze
38
38
 
39
39
  # Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
40
- STOP = Object.new.freeze
40
+ STOP = Object.new.freeze
41
41
 
42
42
  # When a scrubber is initialized, the :direction may be specified
43
43
  # as :top_down (the default) or :bottom_up.
@@ -65,7 +65,7 @@ module Loofah
65
65
  def initialize(options = {}, &block)
66
66
  direction = options[:direction] || :top_down
67
67
  unless [:top_down, :bottom_up].include?(direction)
68
- raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
68
+ raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
69
69
  end
70
70
  @direction, @block = direction, block
71
71
  end
@@ -92,10 +92,10 @@ module Loofah
92
92
  # If the attribute is set, don't overwrite the existing value
93
93
  #
94
94
  def append_attribute(node, attribute, value)
95
- current_value = node.get_attribute(attribute) || ''
95
+ current_value = node.get_attribute(attribute) || ""
96
96
  current_values = current_value.split(/\s+/)
97
97
  updated_value = current_values | [value]
98
- node.set_attribute(attribute, updated_value.join(' '))
98
+ node.set_attribute(attribute, updated_value.join(" "))
99
99
  end
100
100
 
101
101
  private
@@ -119,11 +119,11 @@ module Loofah
119
119
  else
120
120
  return if scrub(node) == STOP
121
121
  end
122
- node.children.each {|j| traverse_conditionally_top_down(j)}
122
+ node.children.each { |j| traverse_conditionally_top_down(j) }
123
123
  end
124
124
 
125
125
  def traverse_conditionally_bottom_up(node)
126
- node.children.each {|j| traverse_conditionally_bottom_up(j)}
126
+ node.children.each { |j| traverse_conditionally_bottom_up(j) }
127
127
  if block
128
128
  block.call(node)
129
129
  else
@@ -206,8 +206,8 @@ module Loofah
206
206
  end
207
207
 
208
208
  def scrub(node)
209
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
210
- append_attribute(node, 'rel', 'nofollow')
209
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
210
+ append_attribute(node, "rel", "nofollow")
211
211
  return STOP
212
212
  end
213
213
  end
@@ -227,8 +227,8 @@ module Loofah
227
227
  end
228
228
 
229
229
  def scrub(node)
230
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
231
- append_attribute(node, 'rel', 'noopener')
230
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
231
+ append_attribute(node, "rel", "noopener")
232
232
  return STOP
233
233
  end
234
234
  end
@@ -268,7 +268,7 @@ module Loofah
268
268
 
269
269
  def scrub(node)
270
270
  if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
271
- node.content = node.content.gsub(/\u2028|\u2029/, '')
271
+ node.content = node.content.gsub(/\u2028|\u2029/, "")
272
272
  end
273
273
  CONTINUE
274
274
  end
@@ -278,14 +278,14 @@ module Loofah
278
278
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
279
279
  #
280
280
  MAP = {
281
- :escape => Escape,
282
- :prune => Prune,
281
+ :escape => Escape,
282
+ :prune => Prune,
283
283
  :whitewash => Whitewash,
284
- :strip => Strip,
285
- :nofollow => NoFollow,
284
+ :strip => Strip,
285
+ :nofollow => NoFollow,
286
286
  :noopener => NoOpener,
287
287
  :newline_block_elements => NewlineBlockElements,
288
- :unprintable => Unprintable
288
+ :unprintable => Unprintable,
289
289
  }
290
290
 
291
291
  #