loofah 2.5.0 → 2.18.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -140,6 +140,7 @@ module Loofah
140
140
  "ul",
141
141
  "var",
142
142
  "video",
143
+ "wbr",
143
144
  ])
144
145
 
145
146
  MATHML_ELEMENTS = Set.new([
@@ -147,6 +148,7 @@ module Loofah
147
148
  "annotation-xml",
148
149
  "maction",
149
150
  "math",
151
+ "menclose",
150
152
  "merror",
151
153
  "mfenced",
152
154
  "mfrac",
@@ -160,6 +162,7 @@ module Loofah
160
162
  "mprescripts",
161
163
  "mroot",
162
164
  "mrow",
165
+ "ms",
163
166
  "mspace",
164
167
  "msqrt",
165
168
  "mstyle",
@@ -312,6 +315,7 @@ module Loofah
312
315
  "columnspacing",
313
316
  "columnspan",
314
317
  "depth",
318
+ "dir",
315
319
  "display",
316
320
  "displaystyle",
317
321
  "encoding",
@@ -322,19 +326,24 @@ module Loofah
322
326
  "fontweight",
323
327
  "frame",
324
328
  "height",
329
+ "href",
325
330
  "linethickness",
331
+ "lquote",
326
332
  "lspace",
327
333
  "mathbackground",
328
334
  "mathcolor",
335
+ "mathsize",
329
336
  "mathvariant",
330
337
  "maxsize",
331
338
  "minsize",
339
+ "notation",
332
340
  "open",
333
341
  "other",
334
342
  "rowalign",
335
343
  "rowlines",
336
344
  "rowspacing",
337
345
  "rowspan",
346
+ "rquote",
338
347
  "rspace",
339
348
  "scriptlevel",
340
349
  "selection",
@@ -503,6 +512,62 @@ module Loofah
503
512
  "zoomAndPan",
504
513
  ])
505
514
 
515
+ ARIA_ATTRIBUTES = Set.new([
516
+ "aria-activedescendant",
517
+ "aria-atomic",
518
+ "aria-autocomplete",
519
+ "aria-braillelabel",
520
+ "aria-brailleroledescription",
521
+ "aria-busy",
522
+ "aria-checked",
523
+ "aria-colcount",
524
+ "aria-colindex",
525
+ "aria-colindextext",
526
+ "aria-colspan",
527
+ "aria-controls",
528
+ "aria-current",
529
+ "aria-describedby",
530
+ "aria-description",
531
+ "aria-details",
532
+ "aria-disabled",
533
+ "aria-dropeffect",
534
+ "aria-errormessage",
535
+ "aria-expanded",
536
+ "aria-flowto",
537
+ "aria-grabbed",
538
+ "aria-haspopup",
539
+ "aria-hidden",
540
+ "aria-invalid",
541
+ "aria-keyshortcuts",
542
+ "aria-label",
543
+ "aria-labelledby",
544
+ "aria-level",
545
+ "aria-live",
546
+ "aria-multiline",
547
+ "aria-multiselectable",
548
+ "aria-orientation",
549
+ "aria-owns",
550
+ "aria-placeholder",
551
+ "aria-posinset",
552
+ "aria-pressed",
553
+ "aria-readonly",
554
+ "aria-relevant",
555
+ "aria-required",
556
+ "aria-roledescription",
557
+ "aria-rowcount",
558
+ "aria-rowindex",
559
+ "aria-rowindextext",
560
+ "aria-rowspan",
561
+ "aria-selected",
562
+ "aria-setsize",
563
+ "aria-sort",
564
+ "aria-valuemax",
565
+ "aria-valuemin",
566
+ "aria-valuenow",
567
+ "aria-valuetext",
568
+ "role",
569
+ ])
570
+
506
571
  ATTR_VAL_IS_URI = Set.new([
507
572
  "action",
508
573
  "cite",
@@ -549,6 +614,10 @@ module Loofah
549
614
 
550
615
  ACCEPTABLE_CSS_PROPERTIES = Set.new([
551
616
  "azimuth",
617
+ "align-content",
618
+ "align-items",
619
+ "align-self",
620
+ "aspect-ratio",
552
621
  "background-color",
553
622
  "border-bottom-color",
554
623
  "border-collapse",
@@ -562,6 +631,13 @@ module Loofah
562
631
  "direction",
563
632
  "display",
564
633
  "elevation",
634
+ "flex",
635
+ "flex-basis",
636
+ "flex-direction",
637
+ "flex-flow",
638
+ "flex-grow",
639
+ "flex-shrink",
640
+ "flex-wrap",
565
641
  "float",
566
642
  "font",
567
643
  "font-family",
@@ -570,12 +646,19 @@ module Loofah
570
646
  "font-variant",
571
647
  "font-weight",
572
648
  "height",
649
+ "justify-content",
573
650
  "letter-spacing",
574
651
  "line-height",
575
652
  "list-style",
576
653
  "list-style-type",
577
654
  "max-width",
655
+ "order",
578
656
  "overflow",
657
+ "overflow-x",
658
+ "overflow-y",
659
+ "page-break-after",
660
+ "page-break-before",
661
+ "page-break-inside",
579
662
  "pause",
580
663
  "pause-after",
581
664
  "pause-before",
@@ -614,9 +697,15 @@ module Loofah
614
697
  "collapse",
615
698
  "dashed",
616
699
  "dotted",
700
+ "double",
617
701
  "fuchsia",
618
702
  "gray",
619
703
  "green",
704
+ "groove",
705
+ "hidden",
706
+ "inherit",
707
+ "initial",
708
+ "inset",
620
709
  "italic",
621
710
  "left",
622
711
  "lime",
@@ -627,18 +716,23 @@ module Loofah
627
716
  "normal",
628
717
  "nowrap",
629
718
  "olive",
719
+ "outset",
630
720
  "pointer",
631
721
  "purple",
632
722
  "red",
723
+ "revert",
724
+ "ridge",
633
725
  "right",
726
+ "separate",
634
727
  "silver",
635
728
  "solid",
636
729
  "teal",
637
- "thin",
638
730
  "thick",
731
+ "thin",
639
732
  "top",
640
733
  "transparent",
641
734
  "underline",
735
+ "unset",
642
736
  "white",
643
737
  "yellow",
644
738
  ])
@@ -737,6 +831,7 @@ module Loofah
737
831
  "rsync",
738
832
  "rtsp",
739
833
  "sftp",
834
+ "sms",
740
835
  "ssh",
741
836
  "tag",
742
837
  "tel",
@@ -757,7 +852,7 @@ module Loofah
757
852
 
758
853
  # subclasses may define their own versions of these constants
759
854
  ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
760
- ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
855
+ ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES + ARIA_ATTRIBUTES
761
856
  ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
762
857
  ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
763
858
  ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
@@ -765,18 +860,14 @@ module Loofah
765
860
  ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
766
861
  ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
767
862
 
863
+ # TODO: remove VOID_ELEMENTS in a future major release
864
+ # and put it in the tests (it is used only for testing, not for functional behavior)
768
865
  VOID_ELEMENTS = Set.new([
769
866
  "area",
770
- "base",
771
867
  "br",
772
- "col",
773
- "embed",
774
868
  "hr",
775
869
  "img",
776
870
  "input",
777
- "link",
778
- "meta",
779
- "param",
780
871
  ])
781
872
 
782
873
  # additional tags we should consider safe since we have libxml2 fixing up our documents.
@@ -7,23 +7,26 @@ module Loofah
7
7
  module Scrub
8
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
9
  CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
- CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" }
10
+ CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
11
+ CSS_IMPORTANT = '!important'
12
+ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
13
+ DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
11
14
 
12
15
  class << self
13
16
  def allowed_element?(element_name)
14
- ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
17
+ ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name)
15
18
  end
16
19
 
17
20
  # alternative implementation of the html5lib attribute scrubbing algorithm
18
21
  def scrub_attributes(node)
19
22
  node.attribute_nodes.each do |attr_node|
20
23
  attr_name = if attr_node.namespace
21
- "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
22
- else
23
- attr_node.node_name
24
- end
24
+ "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
25
+ else
26
+ attr_node.node_name
27
+ end
25
28
 
26
- if attr_name =~ /\Adata-[\w-]+\z/
29
+ if attr_name =~ DATA_ATTRIBUTE_NAME
27
30
  next
28
31
  end
29
32
 
@@ -57,13 +60,15 @@ module Loofah
57
60
  end
58
61
  end
59
62
 
60
- scrub_css_attribute node
63
+ scrub_css_attribute(node)
61
64
 
62
65
  node.attribute_nodes.each do |attr_node|
63
- node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
66
+ if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME
67
+ node.remove_attribute(attr_node.name)
68
+ end
64
69
  end
65
70
 
66
- force_correct_attribute_escaping! node
71
+ force_correct_attribute_escaping!(node)
67
72
  end
68
73
 
69
74
  def scrub_css_attribute(node)
@@ -72,32 +77,54 @@ module Loofah
72
77
  end
73
78
 
74
79
  def scrub_css(style)
75
- style_tree = Crass.parse_properties style
80
+ style_tree = Crass.parse_properties(style)
76
81
  sanitized_tree = []
77
82
 
78
83
  style_tree.each do |node|
79
84
  next unless node[:node] == :property
80
85
  next if node[:children].any? do |child|
81
- [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
86
+ [:url, :bad_url].include?(child[:node])
82
87
  end
88
+
83
89
  name = node[:name].downcase
84
- if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
85
- sanitized_tree << node << CRASS_SEMICOLON
86
- elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
87
- value = node[:value].split.map do |keyword|
88
- if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
+ next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
91
+ SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
92
+ SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
93
+
94
+ value = node[:children].map do |child|
95
+ case child[:node]
96
+ when :whitespace
97
+ nil
98
+ when :string
99
+ if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
100
+ Crass::Parser.stringify(child)
101
+ else
102
+ nil
103
+ end
104
+ when :function
105
+ if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
106
+ Crass::Parser.stringify(child)
107
+ end
108
+ when :ident
109
+ keyword = child[:value]
110
+ if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
111
+ SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
112
+ (keyword =~ CSS_KEYWORDISH)
89
113
  keyword
90
114
  end
91
- end.compact
92
- unless value.empty?
93
- propstring = sprintf "%s:%s", name, value.join(" ")
94
- sanitized_node = Crass.parse_properties(propstring).first
95
- sanitized_tree << sanitized_node << CRASS_SEMICOLON
115
+ else
116
+ child[:raw]
96
117
  end
97
- end
118
+ end.compact
119
+
120
+ next if value.empty?
121
+ value << CSS_IMPORTANT if node[:important]
122
+ propstring = format("%s:%s", name, value.join(" "))
123
+ sanitized_node = Crass.parse_properties(propstring).first
124
+ sanitized_tree << sanitized_node << CRASS_SEMICOLON
98
125
  end
99
126
 
100
- Crass::Parser.stringify sanitized_tree
127
+ Crass::Parser.stringify(sanitized_tree)
101
128
  end
102
129
 
103
130
  #
@@ -93,7 +93,11 @@ module Loofah
93
93
  # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
94
94
  #
95
95
  def text(options = {})
96
- result = serialize_root.children.inner_text rescue ""
96
+ result = if serialize_root
97
+ serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
98
+ else
99
+ ""
100
+ end
97
101
  if options[:encode_special_chars] == false
98
102
  result # possibly dangerous if rendered in a browser
99
103
  else
@@ -108,11 +112,11 @@ module Loofah
108
112
  # Returns a plain-text version of the markup contained by the
109
113
  # fragment, with HTML entities encoded.
110
114
  #
111
- # This method is slower than #to_text, but is clever about
112
- # whitespace around block elements.
115
+ # This method is slower than #text, but is clever about
116
+ # whitespace around block elements and line break elements.
113
117
  #
114
- # Loofah.document("<h1>Title</h1><div>Content</div>").to_text
115
- # # => "\nTitle\n\nContent\n"
118
+ # Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
119
+ # # => "\nTitle\n\nContent\nNext line\n"
116
120
  #
117
121
  def to_text(options = {})
118
122
  Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
@@ -240,8 +240,13 @@ module Loofah
240
240
  end
241
241
 
242
242
  def scrub(node)
243
- return CONTINUE unless Loofah::Elements::BLOCK_LEVEL.include?(node.name)
244
- node.add_next_sibling Nokogiri::XML::Text.new("\n#{node.content}\n", node.document)
243
+ return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
244
+ replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
245
+ "\n"
246
+ else
247
+ "\n#{node.content}\n"
248
+ end
249
+ node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
245
250
  node.remove
246
251
  end
247
252
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Loofah
3
+ # The version of Loofah you are using
4
+ VERSION = "2.18.0"
5
+ end
data/lib/loofah.rb CHANGED
@@ -3,21 +3,22 @@ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.i
3
3
 
4
4
  require "nokogiri"
5
5
 
6
- require "loofah/metahelpers"
7
- require "loofah/elements"
6
+ require_relative "loofah/version"
7
+ require_relative "loofah/metahelpers"
8
+ require_relative "loofah/elements"
8
9
 
9
- require "loofah/html5/safelist"
10
- require "loofah/html5/libxml2_workarounds"
11
- require "loofah/html5/scrub"
10
+ require_relative "loofah/html5/safelist"
11
+ require_relative "loofah/html5/libxml2_workarounds"
12
+ require_relative "loofah/html5/scrub"
12
13
 
13
- require "loofah/scrubber"
14
- require "loofah/scrubbers"
14
+ require_relative "loofah/scrubber"
15
+ require_relative "loofah/scrubbers"
15
16
 
16
- require "loofah/instance_methods"
17
- require "loofah/xml/document"
18
- require "loofah/xml/document_fragment"
19
- require "loofah/html/document"
20
- require "loofah/html/document_fragment"
17
+ require_relative "loofah/instance_methods"
18
+ require_relative "loofah/xml/document"
19
+ require_relative "loofah/xml/document_fragment"
20
+ require_relative "loofah/html/document"
21
+ require_relative "loofah/html/document_fragment"
21
22
 
22
23
  # == Strings and IO Objects as Input
23
24
  #
@@ -28,9 +29,6 @@ require "loofah/html/document_fragment"
28
29
  # quantities of docs.
29
30
  #
30
31
  module Loofah
31
- # The version of Loofah you are using
32
- VERSION = "2.5.0"
33
-
34
32
  class << self
35
33
  # Shortcut for Loofah::HTML::Document.parse
36
34
  # This method accepts the same parameters as Nokogiri::HTML::Document.parse