loofah 2.12.0 → 2.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/README.md +5 -6
- data/lib/loofah/elements.rb +5 -2
- data/lib/loofah/html5/safelist.rb +239 -20
- data/lib/loofah/html5/scrub.rb +85 -15
- data/lib/loofah/instance_methods.rb +9 -5
- data/lib/loofah/scrubber.rb +4 -0
- data/lib/loofah/scrubbers.rb +9 -8
- data/lib/loofah/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd3edb0acdf2359d82564aca0bc13710d9f6c49157963d18953ff55bd7c14413
|
4
|
+
data.tar.gz: 3a6e11b7deb9cfb469aaf6ec919062687bd4215ef11980bded72ca298807610c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4970a6aa72265f60556dd6fd254375c86d3f83be23f3bbcc8b04df00ce0e801e8ef9e67d0a77ca6a21915be89226131c16a7f3540f02538cc2b9a369950dfebf
|
7
|
+
data.tar.gz: 27e3a06cc391ec3d9e3c966efdb6b4ce58e98c397ec87490d418406c17757e5cb0193edabaced30a9f24320c729e6730308e346610859f9f7c6d5fcc6f72cd56
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,63 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.19.1 / 2022-12-13
|
4
|
+
|
5
|
+
### Security
|
6
|
+
|
7
|
+
* Address CVE-2022-23514, inefficient regular expression complexity. See [GHSA-486f-hjj9-9vhh](https://github.com/flavorjones/loofah/security/advisories/GHSA-486f-hjj9-9vhh) for more information.
|
8
|
+
* Address CVE-2022-23515, improper neutralization of data URIs. See [GHSA-228g-948r-83gx](https://github.com/flavorjones/loofah/security/advisories/GHSA-228g-948r-83gx) for more information.
|
9
|
+
* Address CVE-2022-23516, uncontrolled recursion. See [GHSA-3x8r-x6xp-q4vm](https://github.com/flavorjones/loofah/security/advisories/GHSA-3x8r-x6xp-q4vm) for more information.
|
10
|
+
|
11
|
+
|
12
|
+
## 2.19.0 / 2022-09-14
|
13
|
+
|
14
|
+
### Features
|
15
|
+
|
16
|
+
* Allow SVG 1.0 color keyword names in CSS attributes. These colors are part of the [CSS Color Module Level 3](https://www.w3.org/TR/css-color-3/#svg-color) recommendation released 2022-01-18. [[#243](https://github.com/flavorjones/loofah/issues/243)]
|
17
|
+
|
18
|
+
|
19
|
+
## 2.18.0 / 2022-05-11
|
20
|
+
|
21
|
+
### Features
|
22
|
+
|
23
|
+
* Allow CSS property `aspect-ratio`. [[#236](https://github.com/flavorjones/loofah/issues/236)] (Thanks, [@louim](https://github.com/louim)!)
|
24
|
+
|
25
|
+
|
26
|
+
## 2.17.0 / 2022-04-28
|
27
|
+
|
28
|
+
### Features
|
29
|
+
|
30
|
+
* Allow ARIA attributes. [[#232](https://github.com/flavorjones/loofah/issues/232), [#233](https://github.com/flavorjones/loofah/issues/233)] (Thanks, [@nick-desteffen](https://github.com/nick-desteffen)!)
|
31
|
+
|
32
|
+
|
33
|
+
## 2.16.0 / 2022-04-01
|
34
|
+
|
35
|
+
### Features
|
36
|
+
|
37
|
+
* Allow MathML elements `menclose` and `ms`, and MathML attributes `dir`, `href`, `lquote`, `mathsize`, `notation`, and `rquote`. [[#231](https://github.com/flavorjones/loofah/issues/231)] (Thanks, [@nick-desteffen](https://github.com/nick-desteffen)!)
|
38
|
+
|
39
|
+
|
40
|
+
## 2.15.0 / 2022-03-14
|
41
|
+
|
42
|
+
### Features
|
43
|
+
|
44
|
+
* Expand set of allowed protocols to include `sms:`. [[#228](https://github.com/flavorjones/loofah/issues/228)] (Thanks, [@brendon](https://github.com/brendon)!)
|
45
|
+
|
46
|
+
|
47
|
+
## 2.14.0 / 2022-02-11
|
48
|
+
|
49
|
+
### Features
|
50
|
+
|
51
|
+
* The `#to_text` method on `Loofah::HTML::{Document,DocumentFragment}` replaces `<br>` line break elements with a newline. [[#225](https://github.com/flavorjones/loofah/issues/225)]
|
52
|
+
|
53
|
+
|
54
|
+
## 2.13.0 / 2021-12-10
|
55
|
+
|
56
|
+
### Bug fixes
|
57
|
+
|
58
|
+
* Loofah::HTML::DocumentFragment#text no longer serializes top-level comment children. [[#221](https://github.com/flavorjones/loofah/issues/221)]
|
59
|
+
|
60
|
+
|
3
61
|
## 2.12.0 / 2021-08-11
|
4
62
|
|
5
63
|
### Features
|
data/README.md
CHANGED
@@ -133,13 +133,12 @@ and `text` to return plain text:
|
|
133
133
|
doc.text # => "ohai! div is safe "
|
134
134
|
```
|
135
135
|
|
136
|
-
Also, `to_text` is available, which does the right thing with
|
137
|
-
whitespace around block-level elements.
|
136
|
+
Also, `to_text` is available, which does the right thing with whitespace around block-level and line break elements.
|
138
137
|
|
139
138
|
``` ruby
|
140
|
-
doc = Loofah.fragment("<h1>Title</h1><div>Content</div>")
|
141
|
-
doc.text # => "
|
142
|
-
doc.to_text # => "\nTitle\n\nContent\n"
|
139
|
+
doc = Loofah.fragment("<h1>Title</h1><div>Content<br>Next line</div>")
|
140
|
+
doc.text # => "TitleContentNext line" # probably not what you want
|
141
|
+
doc.to_text # => "\nTitle\n\nContent\nNext line\n" # better
|
143
142
|
```
|
144
143
|
|
145
144
|
### Loofah::XML::Document and Loofah::XML::DocumentFragment
|
@@ -349,7 +348,7 @@ And a big shout-out to Corey Innis for the name, and feedback on the API.
|
|
349
348
|
|
350
349
|
## Thank You
|
351
350
|
|
352
|
-
The following people have generously
|
351
|
+
The following people have generously funded Loofah:
|
353
352
|
|
354
353
|
* Bill Harding
|
355
354
|
|
data/lib/loofah/elements.rb
CHANGED
@@ -70,8 +70,6 @@ module Loofah
|
|
70
70
|
video
|
71
71
|
]
|
72
72
|
|
73
|
-
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
74
|
-
|
75
73
|
# The following elements may also be considered block-level
|
76
74
|
# elements since they may contain block-level elements
|
77
75
|
LOOSE_BLOCK_LEVEL = Set.new %w[dd
|
@@ -86,7 +84,12 @@ module Loofah
|
|
86
84
|
tr
|
87
85
|
]
|
88
86
|
|
87
|
+
# Elements that aren't block but should generate a newline in #to_text
|
88
|
+
INLINE_LINE_BREAK = Set.new(["br"])
|
89
|
+
|
90
|
+
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
89
91
|
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
92
|
+
LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
|
90
93
|
end
|
91
94
|
|
92
95
|
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
|
@@ -148,6 +148,7 @@ module Loofah
|
|
148
148
|
"annotation-xml",
|
149
149
|
"maction",
|
150
150
|
"math",
|
151
|
+
"menclose",
|
151
152
|
"merror",
|
152
153
|
"mfenced",
|
153
154
|
"mfrac",
|
@@ -161,6 +162,7 @@ module Loofah
|
|
161
162
|
"mprescripts",
|
162
163
|
"mroot",
|
163
164
|
"mrow",
|
165
|
+
"ms",
|
164
166
|
"mspace",
|
165
167
|
"msqrt",
|
166
168
|
"mstyle",
|
@@ -313,6 +315,7 @@ module Loofah
|
|
313
315
|
"columnspacing",
|
314
316
|
"columnspan",
|
315
317
|
"depth",
|
318
|
+
"dir",
|
316
319
|
"display",
|
317
320
|
"displaystyle",
|
318
321
|
"encoding",
|
@@ -323,19 +326,24 @@ module Loofah
|
|
323
326
|
"fontweight",
|
324
327
|
"frame",
|
325
328
|
"height",
|
329
|
+
"href",
|
326
330
|
"linethickness",
|
331
|
+
"lquote",
|
327
332
|
"lspace",
|
328
333
|
"mathbackground",
|
329
334
|
"mathcolor",
|
335
|
+
"mathsize",
|
330
336
|
"mathvariant",
|
331
337
|
"maxsize",
|
332
338
|
"minsize",
|
339
|
+
"notation",
|
333
340
|
"open",
|
334
341
|
"other",
|
335
342
|
"rowalign",
|
336
343
|
"rowlines",
|
337
344
|
"rowspacing",
|
338
345
|
"rowspan",
|
346
|
+
"rquote",
|
339
347
|
"rspace",
|
340
348
|
"scriptlevel",
|
341
349
|
"selection",
|
@@ -504,6 +512,62 @@ module Loofah
|
|
504
512
|
"zoomAndPan",
|
505
513
|
])
|
506
514
|
|
515
|
+
ARIA_ATTRIBUTES = Set.new([
|
516
|
+
"aria-activedescendant",
|
517
|
+
"aria-atomic",
|
518
|
+
"aria-autocomplete",
|
519
|
+
"aria-braillelabel",
|
520
|
+
"aria-brailleroledescription",
|
521
|
+
"aria-busy",
|
522
|
+
"aria-checked",
|
523
|
+
"aria-colcount",
|
524
|
+
"aria-colindex",
|
525
|
+
"aria-colindextext",
|
526
|
+
"aria-colspan",
|
527
|
+
"aria-controls",
|
528
|
+
"aria-current",
|
529
|
+
"aria-describedby",
|
530
|
+
"aria-description",
|
531
|
+
"aria-details",
|
532
|
+
"aria-disabled",
|
533
|
+
"aria-dropeffect",
|
534
|
+
"aria-errormessage",
|
535
|
+
"aria-expanded",
|
536
|
+
"aria-flowto",
|
537
|
+
"aria-grabbed",
|
538
|
+
"aria-haspopup",
|
539
|
+
"aria-hidden",
|
540
|
+
"aria-invalid",
|
541
|
+
"aria-keyshortcuts",
|
542
|
+
"aria-label",
|
543
|
+
"aria-labelledby",
|
544
|
+
"aria-level",
|
545
|
+
"aria-live",
|
546
|
+
"aria-multiline",
|
547
|
+
"aria-multiselectable",
|
548
|
+
"aria-orientation",
|
549
|
+
"aria-owns",
|
550
|
+
"aria-placeholder",
|
551
|
+
"aria-posinset",
|
552
|
+
"aria-pressed",
|
553
|
+
"aria-readonly",
|
554
|
+
"aria-relevant",
|
555
|
+
"aria-required",
|
556
|
+
"aria-roledescription",
|
557
|
+
"aria-rowcount",
|
558
|
+
"aria-rowindex",
|
559
|
+
"aria-rowindextext",
|
560
|
+
"aria-rowspan",
|
561
|
+
"aria-selected",
|
562
|
+
"aria-setsize",
|
563
|
+
"aria-sort",
|
564
|
+
"aria-valuemax",
|
565
|
+
"aria-valuemin",
|
566
|
+
"aria-valuenow",
|
567
|
+
"aria-valuetext",
|
568
|
+
"role",
|
569
|
+
])
|
570
|
+
|
507
571
|
ATTR_VAL_IS_URI = Set.new([
|
508
572
|
"action",
|
509
573
|
"cite",
|
@@ -553,6 +617,7 @@ module Loofah
|
|
553
617
|
"align-content",
|
554
618
|
"align-items",
|
555
619
|
"align-self",
|
620
|
+
"aspect-ratio",
|
556
621
|
"background-color",
|
557
622
|
"border-bottom-color",
|
558
623
|
"border-collapse",
|
@@ -619,23 +684,16 @@ module Loofah
|
|
619
684
|
|
620
685
|
ACCEPTABLE_CSS_KEYWORDS = Set.new([
|
621
686
|
"!important",
|
622
|
-
"aqua",
|
623
687
|
"auto",
|
624
|
-
"black",
|
625
688
|
"block",
|
626
|
-
"blue",
|
627
689
|
"bold",
|
628
690
|
"both",
|
629
691
|
"bottom",
|
630
|
-
"brown",
|
631
692
|
"center",
|
632
693
|
"collapse",
|
633
694
|
"dashed",
|
634
695
|
"dotted",
|
635
696
|
"double",
|
636
|
-
"fuchsia",
|
637
|
-
"gray",
|
638
|
-
"green",
|
639
697
|
"groove",
|
640
698
|
"hidden",
|
641
699
|
"inherit",
|
@@ -643,35 +701,196 @@ module Loofah
|
|
643
701
|
"inset",
|
644
702
|
"italic",
|
645
703
|
"left",
|
646
|
-
"lime",
|
647
|
-
"maroon",
|
648
704
|
"medium",
|
649
|
-
"navy",
|
650
705
|
"none",
|
651
706
|
"normal",
|
652
707
|
"nowrap",
|
653
|
-
"olive",
|
654
708
|
"outset",
|
655
709
|
"pointer",
|
656
|
-
"purple",
|
657
|
-
"red",
|
658
710
|
"revert",
|
659
711
|
"ridge",
|
660
712
|
"right",
|
661
713
|
"separate",
|
662
|
-
"silver",
|
663
714
|
"solid",
|
664
|
-
"teal",
|
665
715
|
"thick",
|
666
716
|
"thin",
|
667
717
|
"top",
|
668
718
|
"transparent",
|
669
719
|
"underline",
|
670
720
|
"unset",
|
671
|
-
"white",
|
672
|
-
"yellow",
|
673
721
|
])
|
674
722
|
|
723
|
+
# https://www.w3.org/TR/css-color-3/#html4
|
724
|
+
ACCEPTABLE_CSS_COLORS = Set.new([
|
725
|
+
"aqua",
|
726
|
+
"black",
|
727
|
+
"blue",
|
728
|
+
"fuchsia",
|
729
|
+
"gray",
|
730
|
+
"green",
|
731
|
+
"lime",
|
732
|
+
"maroon",
|
733
|
+
"navy",
|
734
|
+
"olive",
|
735
|
+
"purple",
|
736
|
+
"red",
|
737
|
+
"silver",
|
738
|
+
"teal",
|
739
|
+
"white",
|
740
|
+
"yellow",
|
741
|
+
])
|
742
|
+
|
743
|
+
# https://www.w3.org/TR/css-color-3/#svg-color
|
744
|
+
ACCEPTABLE_CSS_EXTENDED_COLORS = Set.new([
|
745
|
+
"aliceblue",
|
746
|
+
"antiquewhite",
|
747
|
+
"aqua",
|
748
|
+
"aquamarine",
|
749
|
+
"azure",
|
750
|
+
"beige",
|
751
|
+
"bisque",
|
752
|
+
"black",
|
753
|
+
"blanchedalmond",
|
754
|
+
"blue",
|
755
|
+
"blueviolet",
|
756
|
+
"brown",
|
757
|
+
"burlywood",
|
758
|
+
"cadetblue",
|
759
|
+
"chartreuse",
|
760
|
+
"chocolate",
|
761
|
+
"coral",
|
762
|
+
"cornflowerblue",
|
763
|
+
"cornsilk",
|
764
|
+
"crimson",
|
765
|
+
"cyan",
|
766
|
+
"darkblue",
|
767
|
+
"darkcyan",
|
768
|
+
"darkgoldenrod",
|
769
|
+
"darkgray",
|
770
|
+
"darkgreen",
|
771
|
+
"darkgrey",
|
772
|
+
"darkkhaki",
|
773
|
+
"darkmagenta",
|
774
|
+
"darkolivegreen",
|
775
|
+
"darkorange",
|
776
|
+
"darkorchid",
|
777
|
+
"darkred",
|
778
|
+
"darksalmon",
|
779
|
+
"darkseagreen",
|
780
|
+
"darkslateblue",
|
781
|
+
"darkslategray",
|
782
|
+
"darkslategrey",
|
783
|
+
"darkturquoise",
|
784
|
+
"darkviolet",
|
785
|
+
"deeppink",
|
786
|
+
"deepskyblue",
|
787
|
+
"dimgray",
|
788
|
+
"dimgrey",
|
789
|
+
"dodgerblue",
|
790
|
+
"firebrick",
|
791
|
+
"floralwhite",
|
792
|
+
"forestgreen",
|
793
|
+
"fuchsia",
|
794
|
+
"gainsboro",
|
795
|
+
"ghostwhite",
|
796
|
+
"gold",
|
797
|
+
"goldenrod",
|
798
|
+
"gray",
|
799
|
+
"green",
|
800
|
+
"greenyellow",
|
801
|
+
"grey",
|
802
|
+
"honeydew",
|
803
|
+
"hotpink",
|
804
|
+
"indianred",
|
805
|
+
"indigo",
|
806
|
+
"ivory",
|
807
|
+
"khaki",
|
808
|
+
"lavender",
|
809
|
+
"lavenderblush",
|
810
|
+
"lawngreen",
|
811
|
+
"lemonchiffon",
|
812
|
+
"lightblue",
|
813
|
+
"lightcoral",
|
814
|
+
"lightcyan",
|
815
|
+
"lightgoldenrodyellow",
|
816
|
+
"lightgray",
|
817
|
+
"lightgreen",
|
818
|
+
"lightgrey",
|
819
|
+
"lightpink",
|
820
|
+
"lightsalmon",
|
821
|
+
"lightseagreen",
|
822
|
+
"lightskyblue",
|
823
|
+
"lightslategray",
|
824
|
+
"lightslategrey",
|
825
|
+
"lightsteelblue",
|
826
|
+
"lightyellow",
|
827
|
+
"lime",
|
828
|
+
"limegreen",
|
829
|
+
"linen",
|
830
|
+
"magenta",
|
831
|
+
"maroon",
|
832
|
+
"mediumaquamarine",
|
833
|
+
"mediumblue",
|
834
|
+
"mediumorchid",
|
835
|
+
"mediumpurple",
|
836
|
+
"mediumseagreen",
|
837
|
+
"mediumslateblue",
|
838
|
+
"mediumspringgreen",
|
839
|
+
"mediumturquoise",
|
840
|
+
"mediumvioletred",
|
841
|
+
"midnightblue",
|
842
|
+
"mintcream",
|
843
|
+
"mistyrose",
|
844
|
+
"moccasin",
|
845
|
+
"navajowhite",
|
846
|
+
"navy",
|
847
|
+
"oldlace",
|
848
|
+
"olive",
|
849
|
+
"olivedrab",
|
850
|
+
"orange",
|
851
|
+
"orangered",
|
852
|
+
"orchid",
|
853
|
+
"palegoldenrod",
|
854
|
+
"palegreen",
|
855
|
+
"paleturquoise",
|
856
|
+
"palevioletred",
|
857
|
+
"papayawhip",
|
858
|
+
"peachpuff",
|
859
|
+
"peru",
|
860
|
+
"pink",
|
861
|
+
"plum",
|
862
|
+
"powderblue",
|
863
|
+
"purple",
|
864
|
+
"red",
|
865
|
+
"rosybrown",
|
866
|
+
"royalblue",
|
867
|
+
"saddlebrown",
|
868
|
+
"salmon",
|
869
|
+
"sandybrown",
|
870
|
+
"seagreen",
|
871
|
+
"seashell",
|
872
|
+
"sienna",
|
873
|
+
"silver",
|
874
|
+
"skyblue",
|
875
|
+
"slateblue",
|
876
|
+
"slategray",
|
877
|
+
"slategrey",
|
878
|
+
"snow",
|
879
|
+
"springgreen",
|
880
|
+
"steelblue",
|
881
|
+
"tan",
|
882
|
+
"teal",
|
883
|
+
"thistle",
|
884
|
+
"tomato",
|
885
|
+
"turquoise",
|
886
|
+
"violet",
|
887
|
+
"wheat",
|
888
|
+
"white",
|
889
|
+
"whitesmoke",
|
890
|
+
"yellow",
|
891
|
+
"yellowgreen",
|
892
|
+
])
|
893
|
+
|
675
894
|
# see https://www.quackit.com/css/functions/
|
676
895
|
# omit `url` and `image` from that list
|
677
896
|
ACCEPTABLE_CSS_FUNCTIONS = Set.new([
|
@@ -766,6 +985,7 @@ module Loofah
|
|
766
985
|
"rsync",
|
767
986
|
"rtsp",
|
768
987
|
"sftp",
|
988
|
+
"sms",
|
769
989
|
"ssh",
|
770
990
|
"tag",
|
771
991
|
"tel",
|
@@ -779,16 +999,15 @@ module Loofah
|
|
779
999
|
"image/gif",
|
780
1000
|
"image/jpeg",
|
781
1001
|
"image/png",
|
782
|
-
"image/svg+xml",
|
783
1002
|
"text/css",
|
784
1003
|
"text/plain",
|
785
1004
|
])
|
786
1005
|
|
787
1006
|
# subclasses may define their own versions of these constants
|
788
1007
|
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
789
|
-
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
1008
|
+
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES + ARIA_ATTRIBUTES
|
790
1009
|
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
791
|
-
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
1010
|
+
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS + ACCEPTABLE_CSS_COLORS + ACCEPTABLE_CSS_EXTENDED_COLORS
|
792
1011
|
ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
|
793
1012
|
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
794
1013
|
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -36,24 +36,13 @@ module Loofah
|
|
36
36
|
end
|
37
37
|
|
38
38
|
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
39
|
-
|
40
|
-
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
41
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
42
|
-
attr_node.remove
|
43
|
-
next
|
44
|
-
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
45
|
-
# permit only allowed data mediatypes
|
46
|
-
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
47
|
-
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
48
|
-
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
49
|
-
attr_node.remove
|
50
|
-
next
|
51
|
-
end
|
52
|
-
end
|
39
|
+
next if scrub_uri_attribute(attr_node)
|
53
40
|
end
|
41
|
+
|
54
42
|
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
55
|
-
attr_node
|
43
|
+
scrub_attribute_that_allows_local_ref(attr_node)
|
56
44
|
end
|
45
|
+
|
57
46
|
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
|
58
47
|
attr_node.remove
|
59
48
|
next
|
@@ -127,6 +116,47 @@ module Loofah
|
|
127
116
|
Crass::Parser.stringify(sanitized_tree)
|
128
117
|
end
|
129
118
|
|
119
|
+
def scrub_attribute_that_allows_local_ref(attr_node)
|
120
|
+
return unless attr_node.value
|
121
|
+
|
122
|
+
nodes = Crass::Parser.new(attr_node.value).parse_component_values
|
123
|
+
|
124
|
+
values = nodes.map do |node|
|
125
|
+
case node[:node]
|
126
|
+
when :url
|
127
|
+
if node[:value].start_with?("#")
|
128
|
+
node[:raw]
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
when :hash, :ident, :string
|
133
|
+
node[:raw]
|
134
|
+
else
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
end.compact
|
138
|
+
|
139
|
+
attr_node.value = values.join(" ")
|
140
|
+
end
|
141
|
+
|
142
|
+
def scrub_uri_attribute(attr_node)
|
143
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
144
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
|
145
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
146
|
+
attr_node.remove
|
147
|
+
return true
|
148
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
|
149
|
+
# permit only allowed data mediatypes
|
150
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
151
|
+
mediatype, _ = mediatype.split(";")[0..1] if mediatype
|
152
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
153
|
+
attr_node.remove
|
154
|
+
return true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
false
|
158
|
+
end
|
159
|
+
|
130
160
|
#
|
131
161
|
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
|
132
162
|
#
|
@@ -152,6 +182,46 @@ module Loofah
|
|
152
182
|
end.force_encoding(encoding)
|
153
183
|
end
|
154
184
|
end
|
185
|
+
|
186
|
+
def cdata_needs_escaping?(node)
|
187
|
+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
|
188
|
+
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
|
189
|
+
end
|
190
|
+
|
191
|
+
def cdata_escape(node)
|
192
|
+
escaped_text = escape_tags(node.text)
|
193
|
+
if Nokogiri.jruby?
|
194
|
+
node.document.create_text_node(escaped_text)
|
195
|
+
else
|
196
|
+
node.document.create_cdata(escaped_text)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
TABLE_FOR_ESCAPE_HTML__ = {
|
201
|
+
'<' => '<',
|
202
|
+
'>' => '>',
|
203
|
+
'&' => '&',
|
204
|
+
}
|
205
|
+
|
206
|
+
def escape_tags(string)
|
207
|
+
# modified version of CGI.escapeHTML from ruby 3.1
|
208
|
+
enc = string.encoding
|
209
|
+
unless enc.ascii_compatible?
|
210
|
+
if enc.dummy?
|
211
|
+
origenc = enc
|
212
|
+
enc = Encoding::Converter.asciicompat_encoding(enc)
|
213
|
+
string = enc ? string.encode(enc) : string.b
|
214
|
+
end
|
215
|
+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
|
216
|
+
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
|
217
|
+
string.encode!(origenc) if origenc
|
218
|
+
string
|
219
|
+
else
|
220
|
+
string = string.b
|
221
|
+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
|
222
|
+
string.force_encoding(enc)
|
223
|
+
end
|
224
|
+
end
|
155
225
|
end
|
156
226
|
end
|
157
227
|
end
|
@@ -93,7 +93,11 @@ module Loofah
|
|
93
93
|
# frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
|
94
94
|
#
|
95
95
|
def text(options = {})
|
96
|
-
result = serialize_root
|
96
|
+
result = if serialize_root
|
97
|
+
serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
|
98
|
+
else
|
99
|
+
""
|
100
|
+
end
|
97
101
|
if options[:encode_special_chars] == false
|
98
102
|
result # possibly dangerous if rendered in a browser
|
99
103
|
else
|
@@ -108,11 +112,11 @@ module Loofah
|
|
108
112
|
# Returns a plain-text version of the markup contained by the
|
109
113
|
# fragment, with HTML entities encoded.
|
110
114
|
#
|
111
|
-
# This method is slower than #
|
112
|
-
# whitespace around block elements.
|
115
|
+
# This method is slower than #text, but is clever about
|
116
|
+
# whitespace around block elements and line break elements.
|
113
117
|
#
|
114
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").to_text
|
115
|
-
# # => "\nTitle\n\nContent\n"
|
118
|
+
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
+
# # => "\nTitle\n\nContent\nNext line\n"
|
116
120
|
#
|
117
121
|
def to_text(options = {})
|
118
122
|
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -108,6 +108,10 @@ module Loofah
|
|
108
108
|
return Scrubber::CONTINUE
|
109
109
|
end
|
110
110
|
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
111
|
+
if HTML5::Scrub.cdata_needs_escaping?(node)
|
112
|
+
node.before(HTML5::Scrub.cdata_escape(node))
|
113
|
+
return Scrubber::STOP
|
114
|
+
end
|
111
115
|
return Scrubber::CONTINUE
|
112
116
|
end
|
113
117
|
Scrubber::STOP
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -100,13 +100,9 @@ module Loofah
|
|
100
100
|
|
101
101
|
def scrub(node)
|
102
102
|
return CONTINUE if html5lib_sanitize(node) == CONTINUE
|
103
|
-
|
104
|
-
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
|
105
|
-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
|
106
|
-
else
|
107
|
-
node.before node.children
|
108
|
-
end
|
103
|
+
node.before(node.children)
|
109
104
|
node.remove
|
105
|
+
return STOP
|
110
106
|
end
|
111
107
|
end
|
112
108
|
|
@@ -240,8 +236,13 @@ module Loofah
|
|
240
236
|
end
|
241
237
|
|
242
238
|
def scrub(node)
|
243
|
-
return CONTINUE unless Loofah::Elements::
|
244
|
-
|
239
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
240
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
241
|
+
"\n"
|
242
|
+
else
|
243
|
+
"\n#{node.content}\n"
|
244
|
+
end
|
245
|
+
node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
|
245
246
|
node.remove
|
246
247
|
end
|
247
248
|
end
|
data/lib/loofah/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-12-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
199
|
- !ruby/object:Gem::Version
|
200
200
|
version: '0'
|
201
201
|
requirements: []
|
202
|
-
rubygems_version: 3.
|
202
|
+
rubygems_version: 3.3.7
|
203
203
|
signing_key:
|
204
204
|
specification_version: 4
|
205
205
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|