loofah 2.3.1 → 2.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +193 -40
  3. data/README.md +13 -12
  4. data/lib/loofah/elements.rb +79 -75
  5. data/lib/loofah/helpers.rb +5 -4
  6. data/lib/loofah/html/document.rb +1 -0
  7. data/lib/loofah/html/document_fragment.rb +4 -2
  8. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  9. data/lib/loofah/html5/safelist.rb +273 -27
  10. data/lib/loofah/html5/scrub.rb +147 -52
  11. data/lib/loofah/instance_methods.rb +14 -8
  12. data/lib/loofah/metahelpers.rb +2 -1
  13. data/lib/loofah/scrubber.rb +12 -7
  14. data/lib/loofah/scrubbers.rb +20 -18
  15. data/lib/loofah/version.rb +5 -0
  16. data/lib/loofah/xml/document.rb +1 -0
  17. data/lib/loofah/xml/document_fragment.rb +2 -1
  18. data/lib/loofah.rb +33 -16
  19. metadata +45 -125
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -41
  23. data/Rakefile +0 -81
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/test/assets/msword.html +0 -63
  29. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  30. data/test/helper.rb +0 -18
  31. data/test/html5/test_sanitizer.rb +0 -401
  32. data/test/html5/test_scrub.rb +0 -10
  33. data/test/integration/test_ad_hoc.rb +0 -220
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module Helpers
3
4
  class << self
@@ -27,7 +28,7 @@ module Loofah
27
28
  #
28
29
  # Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
29
30
  #
30
- def sanitize_css style_string
31
+ def sanitize_css(style_string)
31
32
  ::Loofah::HTML5::Scrub.scrub_css style_string
32
33
  end
33
34
 
@@ -68,7 +69,7 @@ module Loofah
68
69
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
69
70
  #
70
71
  class FullSanitizer
71
- def sanitize html, *args
72
+ def sanitize(html, *args)
72
73
  Loofah::Helpers.strip_tags html
73
74
  end
74
75
  end
@@ -85,11 +86,11 @@ module Loofah
85
86
  # Loofah::Helpers::ActionView.set_as_default_sanitizer
86
87
  #
87
88
  class SafeListSanitizer
88
- def sanitize html, *args
89
+ def sanitize(html, *args)
89
90
  Loofah::Helpers.sanitize html
90
91
  end
91
92
 
92
- def sanitize_css style_string, *args
93
+ def sanitize_css(style_string, *args)
93
94
  Loofah::Helpers.sanitize_css style_string
94
95
  end
95
96
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module HTML # :nodoc:
3
4
  #
@@ -14,10 +15,10 @@ module Loofah
14
15
  # constructor. Applications should use Loofah.fragment to
15
16
  # parse a fragment.
16
17
  #
17
- def parse tags, encoding = nil
18
+ def parse(tags, encoding = nil)
18
19
  doc = Loofah::HTML::Document.new
19
20
 
20
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
21
+ encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
21
22
  doc.encoding = encoding
22
23
 
23
24
  new(doc, tags)
@@ -30,6 +31,7 @@ module Loofah
30
31
  def to_s
31
32
  serialize_root.children.to_s
32
33
  end
34
+
33
35
  alias :serialize :to_s
34
36
 
35
37
  def serialize_root
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
- require 'set'
2
+ # frozen_string_literal: true
3
+ require "set"
3
4
 
4
5
  module Loofah
5
6
  #
@@ -16,11 +17,11 @@ module Loofah
16
17
  # see comments about CVE-2018-8048 within the tests for more information
17
18
  #
18
19
  BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
19
- href
20
- action
21
- src
22
- name
23
- ]
24
- BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
20
+ href
21
+ action
22
+ src
23
+ name
24
+ ]
25
+ BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
25
26
  end
26
27
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require "set"
2
3
 
3
4
  module Loofah
@@ -139,6 +140,7 @@ module Loofah
139
140
  "ul",
140
141
  "var",
141
142
  "video",
143
+ "wbr",
142
144
  ])
143
145
 
144
146
  MATHML_ELEMENTS = Set.new([
@@ -146,6 +148,7 @@ module Loofah
146
148
  "annotation-xml",
147
149
  "maction",
148
150
  "math",
151
+ "menclose",
149
152
  "merror",
150
153
  "mfenced",
151
154
  "mfrac",
@@ -159,6 +162,7 @@ module Loofah
159
162
  "mprescripts",
160
163
  "mroot",
161
164
  "mrow",
165
+ "ms",
162
166
  "mspace",
163
167
  "msqrt",
164
168
  "mstyle",
@@ -311,6 +315,7 @@ module Loofah
311
315
  "columnspacing",
312
316
  "columnspan",
313
317
  "depth",
318
+ "dir",
314
319
  "display",
315
320
  "displaystyle",
316
321
  "encoding",
@@ -321,19 +326,24 @@ module Loofah
321
326
  "fontweight",
322
327
  "frame",
323
328
  "height",
329
+ "href",
324
330
  "linethickness",
331
+ "lquote",
325
332
  "lspace",
326
333
  "mathbackground",
327
334
  "mathcolor",
335
+ "mathsize",
328
336
  "mathvariant",
329
337
  "maxsize",
330
338
  "minsize",
339
+ "notation",
331
340
  "open",
332
341
  "other",
333
342
  "rowalign",
334
343
  "rowlines",
335
344
  "rowspacing",
336
345
  "rowspan",
346
+ "rquote",
337
347
  "rspace",
338
348
  "scriptlevel",
339
349
  "selection",
@@ -502,6 +512,62 @@ module Loofah
502
512
  "zoomAndPan",
503
513
  ])
504
514
 
515
+ ARIA_ATTRIBUTES = Set.new([
516
+ "aria-activedescendant",
517
+ "aria-atomic",
518
+ "aria-autocomplete",
519
+ "aria-braillelabel",
520
+ "aria-brailleroledescription",
521
+ "aria-busy",
522
+ "aria-checked",
523
+ "aria-colcount",
524
+ "aria-colindex",
525
+ "aria-colindextext",
526
+ "aria-colspan",
527
+ "aria-controls",
528
+ "aria-current",
529
+ "aria-describedby",
530
+ "aria-description",
531
+ "aria-details",
532
+ "aria-disabled",
533
+ "aria-dropeffect",
534
+ "aria-errormessage",
535
+ "aria-expanded",
536
+ "aria-flowto",
537
+ "aria-grabbed",
538
+ "aria-haspopup",
539
+ "aria-hidden",
540
+ "aria-invalid",
541
+ "aria-keyshortcuts",
542
+ "aria-label",
543
+ "aria-labelledby",
544
+ "aria-level",
545
+ "aria-live",
546
+ "aria-multiline",
547
+ "aria-multiselectable",
548
+ "aria-orientation",
549
+ "aria-owns",
550
+ "aria-placeholder",
551
+ "aria-posinset",
552
+ "aria-pressed",
553
+ "aria-readonly",
554
+ "aria-relevant",
555
+ "aria-required",
556
+ "aria-roledescription",
557
+ "aria-rowcount",
558
+ "aria-rowindex",
559
+ "aria-rowindextext",
560
+ "aria-rowspan",
561
+ "aria-selected",
562
+ "aria-setsize",
563
+ "aria-sort",
564
+ "aria-valuemax",
565
+ "aria-valuemin",
566
+ "aria-valuenow",
567
+ "aria-valuetext",
568
+ "role",
569
+ ])
570
+
505
571
  ATTR_VAL_IS_URI = Set.new([
506
572
  "action",
507
573
  "cite",
@@ -548,6 +614,10 @@ module Loofah
548
614
 
549
615
  ACCEPTABLE_CSS_PROPERTIES = Set.new([
550
616
  "azimuth",
617
+ "align-content",
618
+ "align-items",
619
+ "align-self",
620
+ "aspect-ratio",
551
621
  "background-color",
552
622
  "border-bottom-color",
553
623
  "border-collapse",
@@ -561,6 +631,13 @@ module Loofah
561
631
  "direction",
562
632
  "display",
563
633
  "elevation",
634
+ "flex",
635
+ "flex-basis",
636
+ "flex-direction",
637
+ "flex-flow",
638
+ "flex-grow",
639
+ "flex-shrink",
640
+ "flex-wrap",
564
641
  "float",
565
642
  "font",
566
643
  "font-family",
@@ -569,11 +646,19 @@ module Loofah
569
646
  "font-variant",
570
647
  "font-weight",
571
648
  "height",
649
+ "justify-content",
572
650
  "letter-spacing",
573
651
  "line-height",
574
652
  "list-style",
575
653
  "list-style-type",
654
+ "max-width",
655
+ "order",
576
656
  "overflow",
657
+ "overflow-x",
658
+ "overflow-y",
659
+ "page-break-after",
660
+ "page-break-before",
661
+ "page-break-inside",
577
662
  "pause",
578
663
  "pause-after",
579
664
  "pause-before",
@@ -599,48 +684,213 @@ module Loofah
599
684
 
600
685
  ACCEPTABLE_CSS_KEYWORDS = Set.new([
601
686
  "!important",
602
- "aqua",
603
687
  "auto",
604
- "black",
605
688
  "block",
606
- "blue",
607
689
  "bold",
608
690
  "both",
609
691
  "bottom",
610
- "brown",
611
692
  "center",
612
693
  "collapse",
613
694
  "dashed",
614
695
  "dotted",
615
- "fuchsia",
616
- "gray",
617
- "green",
696
+ "double",
697
+ "groove",
698
+ "hidden",
699
+ "inherit",
700
+ "initial",
701
+ "inset",
618
702
  "italic",
619
703
  "left",
620
- "lime",
621
- "maroon",
622
704
  "medium",
623
- "navy",
624
705
  "none",
625
706
  "normal",
626
707
  "nowrap",
627
- "olive",
708
+ "outset",
628
709
  "pointer",
629
- "purple",
630
- "red",
710
+ "revert",
711
+ "ridge",
631
712
  "right",
632
- "silver",
713
+ "separate",
633
714
  "solid",
634
- "teal",
635
- "thin",
636
715
  "thick",
716
+ "thin",
637
717
  "top",
638
718
  "transparent",
639
719
  "underline",
640
- "white",
641
- "yellow",
720
+ "unset",
642
721
  ])
643
722
 
723
+ # https://www.w3.org/TR/css-color-3/#html4
724
+ ACCEPTABLE_CSS_COLORS = Set.new([
725
+ "aqua",
726
+ "black",
727
+ "blue",
728
+ "fuchsia",
729
+ "gray",
730
+ "green",
731
+ "lime",
732
+ "maroon",
733
+ "navy",
734
+ "olive",
735
+ "purple",
736
+ "red",
737
+ "silver",
738
+ "teal",
739
+ "white",
740
+ "yellow",
741
+ ])
742
+
743
+ # https://www.w3.org/TR/css-color-3/#svg-color
744
+ ACCEPTABLE_CSS_EXTENDED_COLORS = Set.new([
745
+ "aliceblue",
746
+ "antiquewhite",
747
+ "aqua",
748
+ "aquamarine",
749
+ "azure",
750
+ "beige",
751
+ "bisque",
752
+ "black",
753
+ "blanchedalmond",
754
+ "blue",
755
+ "blueviolet",
756
+ "brown",
757
+ "burlywood",
758
+ "cadetblue",
759
+ "chartreuse",
760
+ "chocolate",
761
+ "coral",
762
+ "cornflowerblue",
763
+ "cornsilk",
764
+ "crimson",
765
+ "cyan",
766
+ "darkblue",
767
+ "darkcyan",
768
+ "darkgoldenrod",
769
+ "darkgray",
770
+ "darkgreen",
771
+ "darkgrey",
772
+ "darkkhaki",
773
+ "darkmagenta",
774
+ "darkolivegreen",
775
+ "darkorange",
776
+ "darkorchid",
777
+ "darkred",
778
+ "darksalmon",
779
+ "darkseagreen",
780
+ "darkslateblue",
781
+ "darkslategray",
782
+ "darkslategrey",
783
+ "darkturquoise",
784
+ "darkviolet",
785
+ "deeppink",
786
+ "deepskyblue",
787
+ "dimgray",
788
+ "dimgrey",
789
+ "dodgerblue",
790
+ "firebrick",
791
+ "floralwhite",
792
+ "forestgreen",
793
+ "fuchsia",
794
+ "gainsboro",
795
+ "ghostwhite",
796
+ "gold",
797
+ "goldenrod",
798
+ "gray",
799
+ "green",
800
+ "greenyellow",
801
+ "grey",
802
+ "honeydew",
803
+ "hotpink",
804
+ "indianred",
805
+ "indigo",
806
+ "ivory",
807
+ "khaki",
808
+ "lavender",
809
+ "lavenderblush",
810
+ "lawngreen",
811
+ "lemonchiffon",
812
+ "lightblue",
813
+ "lightcoral",
814
+ "lightcyan",
815
+ "lightgoldenrodyellow",
816
+ "lightgray",
817
+ "lightgreen",
818
+ "lightgrey",
819
+ "lightpink",
820
+ "lightsalmon",
821
+ "lightseagreen",
822
+ "lightskyblue",
823
+ "lightslategray",
824
+ "lightslategrey",
825
+ "lightsteelblue",
826
+ "lightyellow",
827
+ "lime",
828
+ "limegreen",
829
+ "linen",
830
+ "magenta",
831
+ "maroon",
832
+ "mediumaquamarine",
833
+ "mediumblue",
834
+ "mediumorchid",
835
+ "mediumpurple",
836
+ "mediumseagreen",
837
+ "mediumslateblue",
838
+ "mediumspringgreen",
839
+ "mediumturquoise",
840
+ "mediumvioletred",
841
+ "midnightblue",
842
+ "mintcream",
843
+ "mistyrose",
844
+ "moccasin",
845
+ "navajowhite",
846
+ "navy",
847
+ "oldlace",
848
+ "olive",
849
+ "olivedrab",
850
+ "orange",
851
+ "orangered",
852
+ "orchid",
853
+ "palegoldenrod",
854
+ "palegreen",
855
+ "paleturquoise",
856
+ "palevioletred",
857
+ "papayawhip",
858
+ "peachpuff",
859
+ "peru",
860
+ "pink",
861
+ "plum",
862
+ "powderblue",
863
+ "purple",
864
+ "red",
865
+ "rosybrown",
866
+ "royalblue",
867
+ "saddlebrown",
868
+ "salmon",
869
+ "sandybrown",
870
+ "seagreen",
871
+ "seashell",
872
+ "sienna",
873
+ "silver",
874
+ "skyblue",
875
+ "slateblue",
876
+ "slategray",
877
+ "slategrey",
878
+ "snow",
879
+ "springgreen",
880
+ "steelblue",
881
+ "tan",
882
+ "teal",
883
+ "thistle",
884
+ "tomato",
885
+ "turquoise",
886
+ "violet",
887
+ "wheat",
888
+ "white",
889
+ "whitesmoke",
890
+ "yellow",
891
+ "yellowgreen",
892
+ ])
893
+
644
894
  # see https://www.quackit.com/css/functions/
645
895
  # omit `url` and `image` from that list
646
896
  ACCEPTABLE_CSS_FUNCTIONS = Set.new([
@@ -735,6 +985,7 @@ module Loofah
735
985
  "rsync",
736
986
  "rtsp",
737
987
  "sftp",
988
+ "sms",
738
989
  "ssh",
739
990
  "tag",
740
991
  "tel",
@@ -748,33 +999,28 @@ module Loofah
748
999
  "image/gif",
749
1000
  "image/jpeg",
750
1001
  "image/png",
751
- "image/svg+xml",
752
1002
  "text/css",
753
1003
  "text/plain",
754
1004
  ])
755
1005
 
756
1006
  # subclasses may define their own versions of these constants
757
1007
  ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
758
- ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
1008
+ ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES + ARIA_ATTRIBUTES
759
1009
  ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
760
- ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
1010
+ ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS + ACCEPTABLE_CSS_COLORS + ACCEPTABLE_CSS_EXTENDED_COLORS
761
1011
  ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
762
1012
  ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
763
1013
  ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
764
1014
  ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
765
1015
 
1016
+ # TODO: remove VOID_ELEMENTS in a future major release
1017
+ # and put it in the tests (it is used only for testing, not for functional behavior)
766
1018
  VOID_ELEMENTS = Set.new([
767
1019
  "area",
768
- "base",
769
1020
  "br",
770
- "col",
771
- "embed",
772
1021
  "hr",
773
1022
  "img",
774
1023
  "input",
775
- "link",
776
- "meta",
777
- "param",
778
1024
  ])
779
1025
 
780
1026
  # additional tags we should consider safe since we have libxml2 fixing up our documents.