loofah 2.3.1 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +91 -40
  3. data/README.md +7 -4
  4. data/lib/loofah.rb +33 -16
  5. data/lib/loofah/elements.rb +74 -73
  6. data/lib/loofah/helpers.rb +5 -4
  7. data/lib/loofah/html/document.rb +1 -0
  8. data/lib/loofah/html/document_fragment.rb +4 -2
  9. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  10. data/lib/loofah/html5/safelist.rb +23 -0
  11. data/lib/loofah/html5/scrub.rb +21 -21
  12. data/lib/loofah/instance_methods.rb +5 -3
  13. data/lib/loofah/metahelpers.rb +2 -1
  14. data/lib/loofah/scrubber.rb +8 -7
  15. data/lib/loofah/scrubbers.rb +11 -10
  16. data/lib/loofah/version.rb +5 -0
  17. data/lib/loofah/xml/document.rb +1 -0
  18. data/lib/loofah/xml/document_fragment.rb +2 -1
  19. metadata +27 -93
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -41
  23. data/Rakefile +0 -81
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/test/assets/msword.html +0 -63
  29. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  30. data/test/helper.rb +0 -18
  31. data/test/html5/test_sanitizer.rb +0 -401
  32. data/test/html5/test_scrub.rb +0 -10
  33. data/test/integration/test_ad_hoc.rb +0 -220
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module Helpers
3
4
  class << self
@@ -27,7 +28,7 @@ module Loofah
27
28
  #
28
29
  # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
29
30
  #
30
- def sanitize_css style_string
31
+ def sanitize_css(style_string)
31
32
  ::Loofah::HTML5::Scrub.scrub_css style_string
32
33
  end
33
34
 
@@ -68,7 +69,7 @@ module Loofah
68
69
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
69
70
  #
70
71
  class FullSanitizer
71
- def sanitize html, *args
72
+ def sanitize(html, *args)
72
73
  Loofah::Helpers.strip_tags html
73
74
  end
74
75
  end
@@ -85,11 +86,11 @@ module Loofah
85
86
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
86
87
  #
87
88
  class SafeListSanitizer
88
- def sanitize html, *args
89
+ def sanitize(html, *args)
89
90
  Loofah::Helpers.sanitize html
90
91
  end
91
92
 
92
- def sanitize_css style_string, *args
93
+ def sanitize_css(style_string, *args)
93
94
  Loofah::Helpers.sanitize_css style_string
94
95
  end
95
96
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -14,10 +15,10 @@ module Loofah
14
15
  # constructor. Applications should use Loofah.fragment to
15
16
  # parse a fragment.
16
17
  #
17
- def parse tags, encoding = nil
18
+ def parse(tags, encoding = nil)
18
19
  doc = Loofah::HTML::Document.new
19
20
 
20
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
21
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
21
22
  doc.encoding = encoding
22
23
 
23
24
  new(doc, tags)
@@ -30,6 +31,7 @@ module Loofah
30
31
  def to_s
31
32
  serialize_root.children.to_s
32
33
  end
34
+
33
35
  alias :serialize :to_s
34
36
 
35
37
  def serialize_root
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+ require "set"
3
4
 
4
5
  module Loofah
5
6
  #
@@ -16,11 +17,11 @@ module Loofah
16
17
  # see comments about CVE-2018-8048 within the tests for more information
17
18
  #
18
19
  BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ href
21
+ action
22
+ src
23
+ name
24
+ ]
25
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
26
  end
26
27
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require "set"
2
3
 
3
4
  module Loofah
@@ -548,6 +549,9 @@ module Loofah
548
549
 
549
550
  ACCEPTABLE_CSS_PROPERTIES = Set.new([
550
551
  "azimuth",
552
+ "align-content",
553
+ "align-items",
554
+ "align-self",
551
555
  "background-color",
552
556
  "border-bottom-color",
553
557
  "border-collapse",
@@ -561,6 +565,13 @@ module Loofah
561
565
  "direction",
562
566
  "display",
563
567
  "elevation",
568
+ "flex",
569
+ "flex-basis",
570
+ "flex-direction",
571
+ "flex-flow",
572
+ "flex-grow",
573
+ "flex-shrink",
574
+ "flex-wrap",
564
575
  "float",
565
576
  "font",
566
577
  "font-family",
@@ -569,11 +580,17 @@ module Loofah
569
580
  "font-variant",
570
581
  "font-weight",
571
582
  "height",
583
+ "justify-content",
572
584
  "letter-spacing",
573
585
  "line-height",
574
586
  "list-style",
575
587
  "list-style-type",
588
+ "max-width",
589
+ "order",
576
590
  "overflow",
591
+ "page-break-after",
592
+ "page-break-before",
593
+ "page-break-inside",
577
594
  "pause",
578
595
  "pause-after",
579
596
  "pause-before",
@@ -612,9 +629,13 @@ module Loofah
612
629
  "collapse",
613
630
  "dashed",
614
631
  "dotted",
632
+ "double",
615
633
  "fuchsia",
616
634
  "gray",
617
635
  "green",
636
+ "groove",
637
+ "hidden",
638
+ "inset",
618
639
  "italic",
619
640
  "left",
620
641
  "lime",
@@ -625,9 +646,11 @@ module Loofah
625
646
  "normal",
626
647
  "nowrap",
627
648
  "olive",
649
+ "outset",
628
650
  "pointer",
629
651
  "purple",
630
652
  "red",
653
+ "ridge",
631
654
  "right",
632
655
  "silver",
633
656
  "solid",
@@ -1,22 +1,22 @@
1
- require 'cgi'
2
- require 'crass'
1
+ # frozen_string_literal: true
2
+ require "cgi"
3
+ require "crass"
3
4
 
4
5
  module Loofah
5
6
  module HTML5 # :nodoc:
6
7
  module Scrub
7
-
8
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
10
- CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" }
11
+ CSS_IMPORTANT = '!important'
11
12
 
12
13
  class << self
13
-
14
- def allowed_element? element_name
14
+ def allowed_element?(element_name)
15
15
  ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
16
16
  end
17
17
 
18
18
  # alternative implementation of the html5lib attribute scrubbing algorithm
19
- def scrub_attributes node
19
+ def scrub_attributes(node)
20
20
  node.attribute_nodes.each do |attr_node|
21
21
  attr_name = if attr_node.namespace
22
22
  "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
@@ -35,14 +35,14 @@ module Loofah
35
35
 
36
36
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
37
37
  # this block lifted nearly verbatim from HTML5 sanitization
38
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
39
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
38
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
39
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
40
40
  attr_node.remove
41
41
  next
42
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
42
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
43
43
  # permit only allowed data mediatypes
44
44
  mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
45
- mediatype, _ = mediatype.split(';')[0..1] if mediatype
45
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
46
46
  if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
47
47
  attr_node.remove
48
48
  next
@@ -50,9 +50,9 @@ module Loofah
50
50
  end
51
51
  end
52
52
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
53
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
53
+ attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
54
54
  end
55
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
55
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
56
56
  attr_node.remove
57
57
  next
58
58
  end
@@ -67,12 +67,12 @@ module Loofah
67
67
  force_correct_attribute_escaping! node
68
68
  end
69
69
 
70
- def scrub_css_attribute node
71
- style = node.attributes['style']
70
+ def scrub_css_attribute(node)
71
+ style = node.attributes["style"]
72
72
  style.value = scrub_css(style.value) if style
73
73
  end
74
74
 
75
- def scrub_css style
75
+ def scrub_css(style)
76
76
  style_tree = Crass.parse_properties style
77
77
  sanitized_tree = []
78
78
 
@@ -84,13 +84,14 @@ module Loofah
84
84
  name = node[:name].downcase
85
85
  if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
86
86
  sanitized_tree << node << CRASS_SEMICOLON
87
- elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
87
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
88
88
  value = node[:value].split.map do |keyword|
89
89
  if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
90
  keyword
91
91
  end
92
92
  end.compact
93
93
  unless value.empty?
94
+ value << CSS_IMPORTANT if node[:important]
94
95
  propstring = sprintf "%s:%s", name, value.join(" ")
95
96
  sanitized_node = Crass.parse_properties(propstring).first
96
97
  sanitized_tree << sanitized_node << CRASS_SEMICOLON
@@ -106,7 +107,7 @@ module Loofah
106
107
  #
107
108
  # see comments about CVE-2018-8048 within the tests for more information
108
109
  #
109
- def force_correct_attribute_escaping! node
110
+ def force_correct_attribute_escaping!(node)
110
111
  return unless Nokogiri::VersionInfo.instance.libxml2?
111
112
 
112
113
  node.attribute_nodes.each do |attr_node|
@@ -122,11 +123,10 @@ module Loofah
122
123
  #
123
124
  encoding = attr_node.value.encoding
124
125
  attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
125
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
126
+ "%" + m.unpack("H2" * m.bytesize).join("%").upcase
126
127
  end.force_encoding(encoding)
127
128
  end
128
129
  end
129
-
130
130
  end
131
131
  end
132
132
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
@@ -91,7 +92,7 @@ module Loofah
91
92
  # # decidedly not ok for browser:
92
93
  # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
93
94
  #
94
- def text(options={})
95
+ def text(options = {})
95
96
  result = serialize_root.children.inner_text rescue ""
96
97
  if options[:encode_special_chars] == false
97
98
  result # possibly dangerous if rendered in a browser
@@ -99,8 +100,9 @@ module Loofah
99
100
  encode_special_chars result
100
101
  end
101
102
  end
103
+
102
104
  alias :inner_text :text
103
- alias :to_str :text
105
+ alias :to_str :text
104
106
 
105
107
  #
106
108
  # Returns a plain-text version of the markup contained by the
@@ -112,7 +114,7 @@ module Loofah
112
114
  # Loofah.document("<h1>Title</h1><div>Content</div>").to_text
113
115
  # # => "\nTitle\n\nContent\n"
114
116
  #
115
- def to_text(options={})
117
+ def to_text(options = {})
116
118
  Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
117
119
  end
118
120
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module MetaHelpers # :nodoc:
3
- def self.add_downcased_set_members_to_all_set_constants mojule
4
+ def self.add_downcased_set_members_to_all_set_constants(mojule)
4
5
  mojule.constants.each do |constant_sym|
5
6
  constant = mojule.const_get constant_sym
6
7
  next unless Set === constant
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
4
5
  #
5
- class ScrubberNotFound < RuntimeError ; end
6
+ class ScrubberNotFound < RuntimeError; end
6
7
 
7
8
  #
8
9
  # A Scrubber wraps up a block (or method) that is run on an HTML node (element):
@@ -36,7 +37,7 @@ module Loofah
36
37
  CONTINUE = Object.new.freeze
37
38
 
38
39
  # Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
39
- STOP = Object.new.freeze
40
+ STOP = Object.new.freeze
40
41
 
41
42
  # When a scrubber is initialized, the :direction may be specified
42
43
  # as :top_down (the default) or :bottom_up.
@@ -64,7 +65,7 @@ module Loofah
64
65
  def initialize(options = {}, &block)
65
66
  direction = options[:direction] || :top_down
66
67
  unless [:top_down, :bottom_up].include?(direction)
67
- raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
68
+ raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
68
69
  end
69
70
  @direction, @block = direction, block
70
71
  end
@@ -91,10 +92,10 @@ module Loofah
91
92
  # If the attribute is set, don't overwrite the existing value
92
93
  #
93
94
  def append_attribute(node, attribute, value)
94
- current_value = node.get_attribute(attribute) || ''
95
+ current_value = node.get_attribute(attribute) || ""
95
96
  current_values = current_value.split(/\s+/)
96
97
  updated_value = current_values | [value]
97
- node.set_attribute(attribute, updated_value.join(' '))
98
+ node.set_attribute(attribute, updated_value.join(" "))
98
99
  end
99
100
 
100
101
  private
@@ -118,11 +119,11 @@ module Loofah
118
119
  else
119
120
  return if scrub(node) == STOP
120
121
  end
121
- node.children.each {|j| traverse_conditionally_top_down(j)}
122
+ node.children.each { |j| traverse_conditionally_top_down(j) }
122
123
  end
123
124
 
124
125
  def traverse_conditionally_bottom_up(node)
125
- node.children.each {|j| traverse_conditionally_bottom_up(j)}
126
+ node.children.each { |j| traverse_conditionally_bottom_up(j) }
126
127
  if block
127
128
  block.call(node)
128
129
  else
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # Loofah provides some built-in scrubbers for sanitizing with
@@ -205,8 +206,8 @@ module Loofah
205
206
  end
206
207
 
207
208
  def scrub(node)
208
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
209
- append_attribute(node, 'rel', 'nofollow')
209
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
210
+ append_attribute(node, "rel", "nofollow")
210
211
  return STOP
211
212
  end
212
213
  end
@@ -226,8 +227,8 @@ module Loofah
226
227
  end
227
228
 
228
229
  def scrub(node)
229
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
230
- append_attribute(node, 'rel', 'noopener')
230
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
231
+ append_attribute(node, "rel", "noopener")
231
232
  return STOP
232
233
  end
233
234
  end
@@ -267,7 +268,7 @@ module Loofah
267
268
 
268
269
  def scrub(node)
269
270
  if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
270
- node.content = node.content.gsub(/\u2028|\u2029/, '')
271
+ node.content = node.content.gsub(/\u2028|\u2029/, "")
271
272
  end
272
273
  CONTINUE
273
274
  end
@@ -277,14 +278,14 @@ module Loofah
277
278
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
278
279
  #
279
280
  MAP = {
280
- :escape => Escape,
281
- :prune => Prune,
281
+ :escape => Escape,
282
+ :prune => Prune,
282
283
  :whitewash => Whitewash,
283
- :strip => Strip,
284
- :nofollow => NoFollow,
284
+ :strip => Strip,
285
+ :nofollow => NoFollow,
285
286
  :noopener => NoOpener,
286
287
  :newline_block_elements => NewlineBlockElements,
287
- :unprintable => Unprintable
288
+ :unprintable => Unprintable,
288
289
  }
289
290
 
290
291
  #
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Loofah
3
+ # The version of Loofah you are using
4
+ VERSION = "2.8.0"
5
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module XML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module XML # :nodoc:
3
4
  #
@@ -12,7 +13,7 @@ module Loofah
12
13
  # constructor. Applications should use Loofah.fragment to
13
14
  # parse a fragment.
14
15
  #
15
- def parse tags
16
+ def parse(tags)
16
17
  doc = Loofah::XML::Document.new
17
18
  doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
18
19
  self.new(doc, tags)