loofah 2.5.0 → 2.18.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +157 -47
- data/README.md +13 -12
- data/lib/loofah/elements.rb +5 -2
- data/lib/loofah/html5/safelist.rb +99 -8
- data/lib/loofah/html5/scrub.rb +51 -24
- data/lib/loofah/instance_methods.rb +9 -5
- data/lib/loofah/scrubbers.rb +7 -2
- data/lib/loofah/version.rb +5 -0
- data/lib/loofah.rb +13 -15
- metadata +41 -124
- data/Gemfile +0 -23
- data/Manifest.txt +0 -25
- data/Rakefile +0 -97
- data/benchmark/benchmark.rb +0 -154
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
@@ -140,6 +140,7 @@ module Loofah
|
|
140
140
|
"ul",
|
141
141
|
"var",
|
142
142
|
"video",
|
143
|
+
"wbr",
|
143
144
|
])
|
144
145
|
|
145
146
|
MATHML_ELEMENTS = Set.new([
|
@@ -147,6 +148,7 @@ module Loofah
|
|
147
148
|
"annotation-xml",
|
148
149
|
"maction",
|
149
150
|
"math",
|
151
|
+
"menclose",
|
150
152
|
"merror",
|
151
153
|
"mfenced",
|
152
154
|
"mfrac",
|
@@ -160,6 +162,7 @@ module Loofah
|
|
160
162
|
"mprescripts",
|
161
163
|
"mroot",
|
162
164
|
"mrow",
|
165
|
+
"ms",
|
163
166
|
"mspace",
|
164
167
|
"msqrt",
|
165
168
|
"mstyle",
|
@@ -312,6 +315,7 @@ module Loofah
|
|
312
315
|
"columnspacing",
|
313
316
|
"columnspan",
|
314
317
|
"depth",
|
318
|
+
"dir",
|
315
319
|
"display",
|
316
320
|
"displaystyle",
|
317
321
|
"encoding",
|
@@ -322,19 +326,24 @@ module Loofah
|
|
322
326
|
"fontweight",
|
323
327
|
"frame",
|
324
328
|
"height",
|
329
|
+
"href",
|
325
330
|
"linethickness",
|
331
|
+
"lquote",
|
326
332
|
"lspace",
|
327
333
|
"mathbackground",
|
328
334
|
"mathcolor",
|
335
|
+
"mathsize",
|
329
336
|
"mathvariant",
|
330
337
|
"maxsize",
|
331
338
|
"minsize",
|
339
|
+
"notation",
|
332
340
|
"open",
|
333
341
|
"other",
|
334
342
|
"rowalign",
|
335
343
|
"rowlines",
|
336
344
|
"rowspacing",
|
337
345
|
"rowspan",
|
346
|
+
"rquote",
|
338
347
|
"rspace",
|
339
348
|
"scriptlevel",
|
340
349
|
"selection",
|
@@ -503,6 +512,62 @@ module Loofah
|
|
503
512
|
"zoomAndPan",
|
504
513
|
])
|
505
514
|
|
515
|
+
ARIA_ATTRIBUTES = Set.new([
|
516
|
+
"aria-activedescendant",
|
517
|
+
"aria-atomic",
|
518
|
+
"aria-autocomplete",
|
519
|
+
"aria-braillelabel",
|
520
|
+
"aria-brailleroledescription",
|
521
|
+
"aria-busy",
|
522
|
+
"aria-checked",
|
523
|
+
"aria-colcount",
|
524
|
+
"aria-colindex",
|
525
|
+
"aria-colindextext",
|
526
|
+
"aria-colspan",
|
527
|
+
"aria-controls",
|
528
|
+
"aria-current",
|
529
|
+
"aria-describedby",
|
530
|
+
"aria-description",
|
531
|
+
"aria-details",
|
532
|
+
"aria-disabled",
|
533
|
+
"aria-dropeffect",
|
534
|
+
"aria-errormessage",
|
535
|
+
"aria-expanded",
|
536
|
+
"aria-flowto",
|
537
|
+
"aria-grabbed",
|
538
|
+
"aria-haspopup",
|
539
|
+
"aria-hidden",
|
540
|
+
"aria-invalid",
|
541
|
+
"aria-keyshortcuts",
|
542
|
+
"aria-label",
|
543
|
+
"aria-labelledby",
|
544
|
+
"aria-level",
|
545
|
+
"aria-live",
|
546
|
+
"aria-multiline",
|
547
|
+
"aria-multiselectable",
|
548
|
+
"aria-orientation",
|
549
|
+
"aria-owns",
|
550
|
+
"aria-placeholder",
|
551
|
+
"aria-posinset",
|
552
|
+
"aria-pressed",
|
553
|
+
"aria-readonly",
|
554
|
+
"aria-relevant",
|
555
|
+
"aria-required",
|
556
|
+
"aria-roledescription",
|
557
|
+
"aria-rowcount",
|
558
|
+
"aria-rowindex",
|
559
|
+
"aria-rowindextext",
|
560
|
+
"aria-rowspan",
|
561
|
+
"aria-selected",
|
562
|
+
"aria-setsize",
|
563
|
+
"aria-sort",
|
564
|
+
"aria-valuemax",
|
565
|
+
"aria-valuemin",
|
566
|
+
"aria-valuenow",
|
567
|
+
"aria-valuetext",
|
568
|
+
"role",
|
569
|
+
])
|
570
|
+
|
506
571
|
ATTR_VAL_IS_URI = Set.new([
|
507
572
|
"action",
|
508
573
|
"cite",
|
@@ -549,6 +614,10 @@ module Loofah
|
|
549
614
|
|
550
615
|
ACCEPTABLE_CSS_PROPERTIES = Set.new([
|
551
616
|
"azimuth",
|
617
|
+
"align-content",
|
618
|
+
"align-items",
|
619
|
+
"align-self",
|
620
|
+
"aspect-ratio",
|
552
621
|
"background-color",
|
553
622
|
"border-bottom-color",
|
554
623
|
"border-collapse",
|
@@ -562,6 +631,13 @@ module Loofah
|
|
562
631
|
"direction",
|
563
632
|
"display",
|
564
633
|
"elevation",
|
634
|
+
"flex",
|
635
|
+
"flex-basis",
|
636
|
+
"flex-direction",
|
637
|
+
"flex-flow",
|
638
|
+
"flex-grow",
|
639
|
+
"flex-shrink",
|
640
|
+
"flex-wrap",
|
565
641
|
"float",
|
566
642
|
"font",
|
567
643
|
"font-family",
|
@@ -570,12 +646,19 @@ module Loofah
|
|
570
646
|
"font-variant",
|
571
647
|
"font-weight",
|
572
648
|
"height",
|
649
|
+
"justify-content",
|
573
650
|
"letter-spacing",
|
574
651
|
"line-height",
|
575
652
|
"list-style",
|
576
653
|
"list-style-type",
|
577
654
|
"max-width",
|
655
|
+
"order",
|
578
656
|
"overflow",
|
657
|
+
"overflow-x",
|
658
|
+
"overflow-y",
|
659
|
+
"page-break-after",
|
660
|
+
"page-break-before",
|
661
|
+
"page-break-inside",
|
579
662
|
"pause",
|
580
663
|
"pause-after",
|
581
664
|
"pause-before",
|
@@ -614,9 +697,15 @@ module Loofah
|
|
614
697
|
"collapse",
|
615
698
|
"dashed",
|
616
699
|
"dotted",
|
700
|
+
"double",
|
617
701
|
"fuchsia",
|
618
702
|
"gray",
|
619
703
|
"green",
|
704
|
+
"groove",
|
705
|
+
"hidden",
|
706
|
+
"inherit",
|
707
|
+
"initial",
|
708
|
+
"inset",
|
620
709
|
"italic",
|
621
710
|
"left",
|
622
711
|
"lime",
|
@@ -627,18 +716,23 @@ module Loofah
|
|
627
716
|
"normal",
|
628
717
|
"nowrap",
|
629
718
|
"olive",
|
719
|
+
"outset",
|
630
720
|
"pointer",
|
631
721
|
"purple",
|
632
722
|
"red",
|
723
|
+
"revert",
|
724
|
+
"ridge",
|
633
725
|
"right",
|
726
|
+
"separate",
|
634
727
|
"silver",
|
635
728
|
"solid",
|
636
729
|
"teal",
|
637
|
-
"thin",
|
638
730
|
"thick",
|
731
|
+
"thin",
|
639
732
|
"top",
|
640
733
|
"transparent",
|
641
734
|
"underline",
|
735
|
+
"unset",
|
642
736
|
"white",
|
643
737
|
"yellow",
|
644
738
|
])
|
@@ -737,6 +831,7 @@ module Loofah
|
|
737
831
|
"rsync",
|
738
832
|
"rtsp",
|
739
833
|
"sftp",
|
834
|
+
"sms",
|
740
835
|
"ssh",
|
741
836
|
"tag",
|
742
837
|
"tel",
|
@@ -757,7 +852,7 @@ module Loofah
|
|
757
852
|
|
758
853
|
# subclasses may define their own versions of these constants
|
759
854
|
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
760
|
-
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
855
|
+
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES + ARIA_ATTRIBUTES
|
761
856
|
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
762
857
|
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
763
858
|
ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
|
@@ -765,18 +860,14 @@ module Loofah
|
|
765
860
|
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
766
861
|
ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
|
767
862
|
|
863
|
+
# TODO: remove VOID_ELEMENTS in a future major release
|
864
|
+
# and put it in the tests (it is used only for testing, not for functional behavior)
|
768
865
|
VOID_ELEMENTS = Set.new([
|
769
866
|
"area",
|
770
|
-
"base",
|
771
867
|
"br",
|
772
|
-
"col",
|
773
|
-
"embed",
|
774
868
|
"hr",
|
775
869
|
"img",
|
776
870
|
"input",
|
777
|
-
"link",
|
778
|
-
"meta",
|
779
|
-
"param",
|
780
871
|
])
|
781
872
|
|
782
873
|
# additional tags we should consider safe since we have libxml2 fixing up our documents.
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -7,23 +7,26 @@ module Loofah
|
|
7
7
|
module Scrub
|
8
8
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
9
9
|
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
|
10
|
-
CRASS_SEMICOLON = { :
|
10
|
+
CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
|
11
|
+
CSS_IMPORTANT = '!important'
|
12
|
+
CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
|
13
|
+
DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
|
11
14
|
|
12
15
|
class << self
|
13
16
|
def allowed_element?(element_name)
|
14
|
-
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?
|
17
|
+
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name)
|
15
18
|
end
|
16
19
|
|
17
20
|
# alternative implementation of the html5lib attribute scrubbing algorithm
|
18
21
|
def scrub_attributes(node)
|
19
22
|
node.attribute_nodes.each do |attr_node|
|
20
23
|
attr_name = if attr_node.namespace
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
|
25
|
+
else
|
26
|
+
attr_node.node_name
|
27
|
+
end
|
25
28
|
|
26
|
-
if attr_name =~
|
29
|
+
if attr_name =~ DATA_ATTRIBUTE_NAME
|
27
30
|
next
|
28
31
|
end
|
29
32
|
|
@@ -57,13 +60,15 @@ module Loofah
|
|
57
60
|
end
|
58
61
|
end
|
59
62
|
|
60
|
-
scrub_css_attribute
|
63
|
+
scrub_css_attribute(node)
|
61
64
|
|
62
65
|
node.attribute_nodes.each do |attr_node|
|
63
|
-
|
66
|
+
if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME
|
67
|
+
node.remove_attribute(attr_node.name)
|
68
|
+
end
|
64
69
|
end
|
65
70
|
|
66
|
-
force_correct_attribute_escaping!
|
71
|
+
force_correct_attribute_escaping!(node)
|
67
72
|
end
|
68
73
|
|
69
74
|
def scrub_css_attribute(node)
|
@@ -72,32 +77,54 @@ module Loofah
|
|
72
77
|
end
|
73
78
|
|
74
79
|
def scrub_css(style)
|
75
|
-
style_tree = Crass.parse_properties
|
80
|
+
style_tree = Crass.parse_properties(style)
|
76
81
|
sanitized_tree = []
|
77
82
|
|
78
83
|
style_tree.each do |node|
|
79
84
|
next unless node[:node] == :property
|
80
85
|
next if node[:children].any? do |child|
|
81
|
-
[:url, :bad_url].include?(child[:node])
|
86
|
+
[:url, :bad_url].include?(child[:node])
|
82
87
|
end
|
88
|
+
|
83
89
|
name = node[:name].downcase
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
90
|
+
next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
|
91
|
+
SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
|
92
|
+
SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
|
93
|
+
|
94
|
+
value = node[:children].map do |child|
|
95
|
+
case child[:node]
|
96
|
+
when :whitespace
|
97
|
+
nil
|
98
|
+
when :string
|
99
|
+
if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
|
100
|
+
Crass::Parser.stringify(child)
|
101
|
+
else
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
when :function
|
105
|
+
if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
|
106
|
+
Crass::Parser.stringify(child)
|
107
|
+
end
|
108
|
+
when :ident
|
109
|
+
keyword = child[:value]
|
110
|
+
if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
|
111
|
+
SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
|
112
|
+
(keyword =~ CSS_KEYWORDISH)
|
89
113
|
keyword
|
90
114
|
end
|
91
|
-
|
92
|
-
|
93
|
-
propstring = sprintf "%s:%s", name, value.join(" ")
|
94
|
-
sanitized_node = Crass.parse_properties(propstring).first
|
95
|
-
sanitized_tree << sanitized_node << CRASS_SEMICOLON
|
115
|
+
else
|
116
|
+
child[:raw]
|
96
117
|
end
|
97
|
-
end
|
118
|
+
end.compact
|
119
|
+
|
120
|
+
next if value.empty?
|
121
|
+
value << CSS_IMPORTANT if node[:important]
|
122
|
+
propstring = format("%s:%s", name, value.join(" "))
|
123
|
+
sanitized_node = Crass.parse_properties(propstring).first
|
124
|
+
sanitized_tree << sanitized_node << CRASS_SEMICOLON
|
98
125
|
end
|
99
126
|
|
100
|
-
Crass::Parser.stringify
|
127
|
+
Crass::Parser.stringify(sanitized_tree)
|
101
128
|
end
|
102
129
|
|
103
130
|
#
|
@@ -93,7 +93,11 @@ module Loofah
|
|
93
93
|
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
94
94
|
#
|
95
95
|
def text(options = {})
|
96
|
-
result = serialize_root
|
96
|
+
result = if serialize_root
|
97
|
+
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
+
else
|
99
|
+
""
|
100
|
+
end
|
97
101
|
if options[:encode_special_chars] == false
|
98
102
|
result # possibly dangerous if rendered in a browser
|
99
103
|
else
|
@@ -108,11 +112,11 @@ module Loofah
|
|
108
112
|
# Returns a plain-text version of the markup contained by the
|
109
113
|
# fragment, with HTML entities encoded.
|
110
114
|
#
|
111
|
-
# This method is slower than #
|
112
|
-
# whitespace around block elements.
|
115
|
+
# This method is slower than #text, but is clever about
|
116
|
+
# whitespace around block elements and line break elements.
|
113
117
|
#
|
114
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").to_text
|
115
|
-
# # => "\nTitle\n\nContent\n"
|
118
|
+
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
+
# # => "\nTitle\n\nContent\nNext line\n"
|
116
120
|
#
|
117
121
|
def to_text(options = {})
|
118
122
|
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -240,8 +240,13 @@ module Loofah
|
|
240
240
|
end
|
241
241
|
|
242
242
|
def scrub(node)
|
243
|
-
return CONTINUE unless Loofah::Elements::
|
244
|
-
|
243
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
244
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
245
|
+
"\n"
|
246
|
+
else
|
247
|
+
"\n#{node.content}\n"
|
248
|
+
end
|
249
|
+
node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
|
245
250
|
node.remove
|
246
251
|
end
|
247
252
|
end
|
data/lib/loofah.rb
CHANGED
@@ -3,21 +3,22 @@ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.i
|
|
3
3
|
|
4
4
|
require "nokogiri"
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
require_relative "loofah/version"
|
7
|
+
require_relative "loofah/metahelpers"
|
8
|
+
require_relative "loofah/elements"
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
require_relative "loofah/html5/safelist"
|
11
|
+
require_relative "loofah/html5/libxml2_workarounds"
|
12
|
+
require_relative "loofah/html5/scrub"
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
require_relative "loofah/scrubber"
|
15
|
+
require_relative "loofah/scrubbers"
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
require_relative "loofah/instance_methods"
|
18
|
+
require_relative "loofah/xml/document"
|
19
|
+
require_relative "loofah/xml/document_fragment"
|
20
|
+
require_relative "loofah/html/document"
|
21
|
+
require_relative "loofah/html/document_fragment"
|
21
22
|
|
22
23
|
# == Strings and IO Objects as Input
|
23
24
|
#
|
@@ -28,9 +29,6 @@ require "loofah/html/document_fragment"
|
|
28
29
|
# quantities of docs.
|
29
30
|
#
|
30
31
|
module Loofah
|
31
|
-
# The version of Loofah you are using
|
32
|
-
VERSION = "2.5.0"
|
33
|
-
|
34
32
|
class << self
|
35
33
|
# Shortcut for Loofah::HTML::Document.parse
|
36
34
|
# This method accepts the same parameters as Nokogiri::HTML::Document.parse
|