loofah 2.2.3 → 2.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +221 -31
  3. data/README.md +18 -24
  4. data/lib/loofah/elements.rb +79 -75
  5. data/lib/loofah/helpers.rb +18 -7
  6. data/lib/loofah/html/document.rb +1 -0
  7. data/lib/loofah/html/document_fragment.rb +4 -2
  8. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  9. data/lib/loofah/html5/safelist.rb +1042 -0
  10. data/lib/loofah/html5/scrub.rb +150 -55
  11. data/lib/loofah/instance_methods.rb +14 -8
  12. data/lib/loofah/metahelpers.rb +2 -1
  13. data/lib/loofah/scrubber.rb +12 -7
  14. data/lib/loofah/scrubbers.rb +21 -19
  15. data/lib/loofah/version.rb +5 -0
  16. data/lib/loofah/xml/document.rb +1 -0
  17. data/lib/loofah/xml/document_fragment.rb +2 -1
  18. data/lib/loofah.rb +35 -18
  19. metadata +52 -138
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -40
  23. data/Rakefile +0 -79
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/lib/loofah/html5/whitelist.rb +0 -186
  29. data/test/assets/msword.html +0 -63
  30. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  31. data/test/helper.rb +0 -18
  32. data/test/html5/test_sanitizer.rb +0 -382
  33. data/test/integration/test_ad_hoc.rb +0 -204
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,186 +0,0 @@
1
- require 'set'
2
-
3
- module Loofah
4
- module HTML5 # :nodoc:
5
- #
6
- # HTML whitelist lifted from HTML5lib sanitizer code:
7
- #
8
- # http://code.google.com/p/html5lib/
9
- #
10
- # <html5_license>
11
- #
12
- # Copyright (c) 2006-2008 The Authors
13
- #
14
- # Contributors:
15
- # James Graham - jg307@cam.ac.uk
16
- # Anne van Kesteren - annevankesteren@gmail.com
17
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
18
- # Matt McDonald - kanashii@kanashii.ca
19
- # Sam Ruby - rubys@intertwingly.net
20
- # Ian Hickson (Google) - ian@hixie.ch
21
- # Thomas Broyer - t.broyer@ltgt.net
22
- # Jacques Distler - distler@golem.ph.utexas.edu
23
- # Henri Sivonen - hsivonen@iki.fi
24
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
25
- #
26
- # Permission is hereby granted, free of charge, to any person
27
- # obtaining a copy of this software and associated documentation
28
- # files (the "Software"), to deal in the Software without
29
- # restriction, including without limitation the rights to use, copy,
30
- # modify, merge, publish, distribute, sublicense, and/or sell copies
31
- # of the Software, and to permit persons to whom the Software is
32
- # furnished to do so, subject to the following conditions:
33
- #
34
- # The above copyright notice and this permission notice shall be
35
- # included in all copies or substantial portions of the Software.
36
- #
37
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
39
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
41
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
42
- # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
44
- # DEALINGS IN THE SOFTWARE.
45
- #
46
- # </html5_license>
47
- module WhiteList
48
-
49
- ACCEPTABLE_ELEMENTS = Set.new %w[a abbr acronym address area
50
- article aside audio b bdi bdo big blockquote br button canvas
51
- caption center cite code col colgroup command datalist dd del
52
- details dfn dir div dl dt em fieldset figcaption figure footer
53
- font form h1 h2 h3 h4 h5 h6 header hr i img input ins kbd label
54
- legend li main map mark menu meter nav ol output optgroup option p
55
- pre q s samp section select small span strike strong sub summary
56
- sup table tbody td textarea tfoot th thead time tr tt u ul var
57
- video]
58
-
59
- MATHML_ELEMENTS = Set.new %w[annotation annotation-xml maction math merror mfrac
60
- mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
61
- mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
62
- munderover none semantics]
63
-
64
- SVG_ELEMENTS = Set.new %w[a animate animateColor animateMotion animateTransform
65
- circle clipPath defs desc ellipse feGaussianBlur filter font-face
66
- font-face-name font-face-src foreignObject
67
- g glyph hkern linearGradient line marker mask metadata missing-glyph
68
- mpath path polygon polyline radialGradient rect set stop svg switch symbol
69
- text textPath title tspan use]
70
-
71
- ACCEPTABLE_ATTRIBUTES = Set.new %w[abbr accept accept-charset accesskey action
72
- align alt axis border cellpadding cellspacing char charoff charset
73
- checked cite class clear cols colspan color compact coords datetime
74
- dir disabled enctype for frame headers height href hreflang hspace id
75
- ismap label lang longdesc loop loopcount loopend loopstart
76
- maxlength media method multiple name nohref
77
- noshade nowrap poster preload prompt readonly rel rev rows rowspan rules scope
78
- selected shape size span src start style summary tabindex target title
79
- type usemap valign value vspace width xml:lang]
80
-
81
- MATHML_ATTRIBUTES = Set.new %w[actiontype align close
82
- columnalign columnlines columnspacing columnspan depth display
83
- displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
84
- frame height linethickness lspace mathbackground mathcolor mathvariant
85
- maxsize minsize open other rowalign rowlines
86
- rowspacing rowspan rspace scriptlevel selection separator separators
87
- stretchy width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
88
-
89
- SVG_ATTRIBUTES = Set.new %w[accent-height accumulate additive alphabetic
90
- arabic-form ascent attributeName attributeType baseProfile bbox begin
91
- by calcMode cap-height class clip-path clip-rule color
92
- color-interpolation-filters color-rendering content cx cy d dx
93
- dy descent display dur end fill fill-opacity fill-rule
94
- filterRes filterUnits font-family
95
- font-size font-stretch font-style font-variant font-weight fx fy g1
96
- g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
97
- ideographic k keyPoints keySplines keyTimes lang marker-end
98
- marker-mid marker-start markerHeight markerUnits markerWidth
99
- maskContentUnits maskUnits mathematical max method min name offset opacity orient origin
100
- overline-position overline-thickness panose-1 path pathLength
101
- patternContentUnits patternTransform patternUnits points
102
- preserveAspectRatio primitiveUnits r refX refY repeatCount repeatDur
103
- requiredExtensions requiredFeatures restart rotate rx ry slope spacing
104
- startOffset stdDeviation stemh
105
- stemv stop-color stop-opacity strikethrough-position
106
- strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
107
- stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
108
- stroke-width systemLanguage target text-anchor to transform type u1
109
- u2 underline-position underline-thickness unicode unicode-range
110
- units-per-em values version viewBox visibility width widths x
111
- x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
112
- xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
113
- xmlns:xlink y y1 y2 zoomAndPan]
114
-
115
- ATTR_VAL_IS_URI = Set.new %w[href src cite action longdesc xlink:href xml:base poster preload]
116
-
117
- SVG_ATTR_VAL_ALLOWS_REF = Set.new %w[clip-path color-profile cursor fill
118
- filter marker marker-start marker-mid marker-end mask stroke]
119
-
120
- SVG_ALLOW_LOCAL_HREF = Set.new %w[altGlyph animate animateColor animateMotion
121
- animateTransform cursor feImage filter linearGradient pattern
122
- radialGradient textpath tref set use]
123
-
124
- ACCEPTABLE_CSS_PROPERTIES = Set.new %w[azimuth background-color
125
- border-bottom-color border-collapse border-color border-left-color
126
- border-right-color border-top-color clear color cursor direction
127
- display elevation float font font-family font-size font-style
128
- font-variant font-weight height letter-spacing line-height list-style-type
129
- overflow pause pause-after pause-before pitch pitch-range richness speak
130
- speak-header speak-numeral speak-punctuation speech-rate stress
131
- text-align text-decoration text-indent unicode-bidi vertical-align
132
- voice-family volume white-space width]
133
-
134
- ACCEPTABLE_CSS_KEYWORDS = Set.new %w[auto aqua black block blue bold both bottom
135
- brown center collapse dashed dotted fuchsia gray green !important
136
- italic left lime maroon medium none navy normal nowrap olive pointer
137
- purple red right solid silver teal top transparent underline white
138
- yellow]
139
-
140
- ACCEPTABLE_CSS_FUNCTIONS = Set.new %w[calc rgb]
141
-
142
- SHORTHAND_CSS_PROPERTIES = Set.new %w[background border margin padding]
143
-
144
- ACCEPTABLE_SVG_PROPERTIES = Set.new %w[fill fill-opacity fill-rule stroke
145
- stroke-width stroke-linecap stroke-linejoin stroke-opacity]
146
-
147
- PROTOCOL_SEPARATOR = /:|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i
148
-
149
- ACCEPTABLE_PROTOCOLS = Set.new %w[ed2k ftp http https irc mailto news gopher nntp
150
- telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs data]
151
-
152
- ACCEPTABLE_URI_DATA_MEDIATYPES = Set.new %w[text/plain text/css image/png image/gif
153
- image/jpeg image/svg+xml]
154
-
155
- # subclasses may define their own versions of these constants
156
- ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
157
- ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
158
- ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
159
- ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
160
- ALLOWED_CSS_FUNCTIONS = ACCEPTABLE_CSS_FUNCTIONS
161
- ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
162
- ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
163
- ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES
164
-
165
- VOID_ELEMENTS = Set.new %w[
166
- base
167
- link
168
- meta
169
- hr
170
- br
171
- img
172
- embed
173
- param
174
- area
175
- col
176
- input
177
- ]
178
-
179
- # additional tags we should consider safe since we have libxml2 fixing up our documents.
180
- TAGS_SAFE_WITH_LIBXML2 = Set.new %w[html head body]
181
- ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
182
- end
183
-
184
- ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::HTML5::WhiteList
185
- end
186
- end
@@ -1,63 +0,0 @@
1
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
2
- <w:WordDocument>
3
- <w:View>Normal</w:View>
4
- <w:Zoom>0</w:Zoom>
5
- <w:PunctuationKerning/>
6
- <w:ValidateAgainstSchemas/>
7
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
8
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
9
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
10
- <w:Compatibility>
11
- <w:BreakWrappedTables/>
12
- <w:SnapToGridInCell/>
13
- <w:WrapTextWithPunct/>
14
- <w:UseAsianBreakRules/>
15
- <w:DontGrowAutofit/>
16
- </w:Compatibility>
17
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
18
- </w:WordDocument>
19
- </xml><![endif]--><!--[if gte mso 9]><xml>
20
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
21
- </w:LatentStyles>
22
- </xml><![endif]--><style>
23
- <!--
24
- /* Style Definitions */
25
- p.MsoNormal, li.MsoNormal, div.MsoNormal
26
- {mso-style-parent:"";
27
- margin:0in;
28
- margin-bottom:.0001pt;
29
- mso-pagination:widow-orphan;
30
- font-size:12.0pt;
31
- font-family:"Times New Roman";
32
- mso-fareast-font-family:"Times New Roman";}
33
- @page Section1
34
- {size:8.5in 11.0in;
35
- margin:1.0in 1.25in 1.0in 1.25in;
36
- mso-header-margin:.5in;
37
- mso-footer-margin:.5in;
38
- mso-paper-source:0;}
39
- div.Section1
40
- {page:Section1;}
41
- -->
42
- </style><!--[if gte mso 10]>
43
- <style>
44
- /* Style Definitions */
45
- table.MsoNormalTable
46
- {mso-style-name:"Table Normal";
47
- mso-tstyle-rowband-size:0;
48
- mso-tstyle-colband-size:0;
49
- mso-style-noshow:yes;
50
- mso-style-parent:"";
51
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
52
- mso-para-margin:0in;
53
- mso-para-margin-bottom:.0001pt;
54
- mso-pagination:widow-orphan;
55
- font-size:10.0pt;
56
- font-family:"Times New Roman";
57
- mso-ansi-language:#0400;
58
- mso-fareast-language:#0400;
59
- mso-bidi-language:#0400;}
60
- </style>
61
- <![endif]-->
62
-
63
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>