loofah 0.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

data/init.rb ADDED
@@ -0,0 +1,2 @@
1
+ require "loofah/rails_extension"
2
+ ActiveRecord::Base.send(:include, Loofah::RailsExtension)
@@ -0,0 +1,197 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'rubygems'
4
+ require 'nokogiri'
5
+
6
+ require 'loofah/html5/whitelist'
7
+ require 'loofah/html5/scrub'
8
+
9
+ require 'loofah/scrubber'
10
+
11
+ require 'loofah/html/document'
12
+ require 'loofah/html/document_fragment'
13
+
14
+ require 'loofah/deprecated'
15
+
16
+
17
+ #
18
+ # Loofah is an HTML sanitizer wrapped around Nokogiri[http://nokogiri.org], an excellent
19
+ # HTML/XML parser. If you don't know how Nokogiri[http://nokogiri.org]
20
+ # works, you might want to pause for a moment and go check it out. I'll
21
+ # wait.
22
+ #
23
+ # A Loofah::HTML::Document is a subclass of Nokogiri::HTML::Document,
24
+ # so a parsed document gives you all the markup fixer-uppery and API
25
+ # goodness of Nokogiri.
26
+ #
27
+ # Loofah.document(unsafe_html).is_a?(Nokogiri::HTML::Document) # => true
28
+ # Loofah.fragment(unsafe_html).is_a?(Nokogiri::HTML::DocumentFragment) # => true
29
+ #
30
+ # Loofah adds a +scrub!+ method, which can clean up your HTML in a few
31
+ # different ways by modifying the document in-place:
32
+ #
33
+ # doc.scrub!(:strip) # replaces unknown/unsafe tags with their inner text
34
+ # doc.scrub!(:prune) # removes unknown/unsafe tags and their children
35
+ # doc.scrub!(:whitewash) # removes unknown/unsafe/namespaced tags and their children,
36
+ # # and strips all node attributes
37
+ # doc.scrub!(:escape) # escapes unknown/unsafe tags, like this: <script>
38
+ #
39
+ # Loofah overrides +to_s+ to return html:
40
+ #
41
+ # unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
42
+ #
43
+ # doc = Loofah.fragment(unsafe_html).scrub!(:strip)
44
+ # doc.to_s # => "ohai! <div>div is safe</div> "
45
+ #
46
+ # and +text+ to return plain text:
47
+ #
48
+ # doc.text # => "ohai! div is safe "
49
+ #
50
+ # Or, if you prefer, you can use the shorthand methods +scrub_fragment+ and +scrub_document+:
51
+ #
52
+ # Loofah.scrub_fragment(unsafe_html, :prune).to_s
53
+ # Loofah.scrub_document(unsafe_html, :strip).text
54
+ #
55
+ # == Usage
56
+ #
57
+ # Let's say you have a Web 2.0 application, and you allow people to
58
+ # send HTML snippets to each other.
59
+ #
60
+ # Let's also say some script-kiddie from Norland sends this to your
61
+ # users, in an effort to swipe some credit cards:
62
+ #
63
+ # <script src=http://ha.ckers.org/xss.js></script>
64
+ #
65
+ # Oooh, that could be bad. Here's how to fix it:
66
+ #
67
+ # Loofah.scrub_fragment(dangerous_html, :escape).to_s
68
+ #
69
+ # # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
70
+ #
71
+ # Loofah also makes available the sanitized markup in both HTML and
72
+ # plain-text formats without incurring the overhead of multiple
73
+ # parsings:
74
+ #
75
+ # safe_fragment = Loofah.scrub_fragment(dangerous_html, :strip)
76
+ # safe_fragment.to_s # => HTML output
77
+ # safe_fragment.text # => plain text output
78
+ #
79
+ # And you can modify the HTML using Nokogiri's API, if you like:
80
+ #
81
+ # stylized_fragment = Loofah.fragment(dangerous_html)
82
+ # stylized_fragment.xpath("//a/text()").wrap("<span></span>")
83
+ # stylized_fragment.scrub!(:strip)
84
+ #
85
+ # == Fragments vs Documents
86
+ #
87
+ # Generally speaking, unless you expect to have \&lt;html\&gt; and
88
+ # \&lt;body\&gt; tags in your HTML, you don't have a *document*, you
89
+ # have a *fragment*.
90
+ #
91
+ # For parsing fragments, you should use Loofah.fragment. Nokogiri
92
+ # won't wrap the result in +html+ and +body+ tags, and will ignore
93
+ # +head+ elements.
94
+ #
95
+ # Full HTML documents should be parsed with Loofah.document, which
96
+ # will add the DOCTYPE declaration, and properly handle +head+ and
97
+ # +body+ elements.
98
+ #
99
+ # == Strings and IO Objects as Input
100
+ #
101
+ # Loofah.document and Loofah.fragment accept any IO object in addition
102
+ # to accepting a string. That IO object could be a file, or a socket,
103
+ # or a StringIO, or anything that responds to +read+ and
104
+ # +close+. Which makes it particularly easy to sanitize mass
105
+ # quantities of docs.
106
+ #
107
+ # == Scrubbing Methods
108
+ #
109
+ # Given:
110
+ # unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
111
+ #
112
+ # === scrub!(:strip)
113
+ #
114
+ # +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
115
+ #
116
+ # Loofah.fragment(unsafe_html).scrub!(:strip)
117
+ # # or
118
+ # Loofah.scrub_fragment(unsafe_html, :strip)
119
+ #
120
+ # => "ohai! <div>div is safe</div> but foo is <b>not</b>"
121
+ #
122
+ # === scrub!(:prune)
123
+ #
124
+ # +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
125
+ #
126
+ # Loofah.fragment(unsafe_html).scrub!(:prune)
127
+ # # or
128
+ # Loofah.scrub_fragment(unsafe_html, :prune)
129
+ #
130
+ # => "ohai! <div>div is safe</div> "
131
+ #
132
+ # === scrub!(:escape)
133
+ #
134
+ # +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
135
+ #
136
+ # Loofah.fragment(unsafe_html).scrub!(:escape)
137
+ # # or
138
+ # Loofah.scrub_fragment(unsafe_html, :escape)
139
+ #
140
+ # => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
141
+ #
142
+ # === scrub!(:whitewash)
143
+ #
144
+ # +:whitewash+ removes all comments, styling and attributes in
145
+ # addition to doing markup-fixer-uppery and pruning unsafe tags. I
146
+ # like to call this "whitewashing", since it's like putting a new
147
+ # layer of paint on top of the HTML input to make it look nice.
148
+ #
149
+ # messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
150
+ #
151
+ # Loofah.fragment(messy_markup).scrub!(:whitewash)
152
+ # # or
153
+ # Loofah.scrub_fragment(messy_markup, :whitewash)
154
+ #
155
+ # => "ohai! <div>div with attributes</div>"
156
+ #
157
+ # One use case for this feature is to clean up HTML that was
158
+ # cut-and-pasted from Microsoft Word into a WYSIWYG editor or a rich
159
+ # text editor. Microsoft's software is famous for injecting all kinds
160
+ # of cruft into its HTML output. Who needs that? Certainly not me.
161
+ #
162
+ module Loofah
163
+ # The version of Loofah you are using
164
+ VERSION = '0.2.0'
165
+
166
+ # The minimum required version of Nokogiri
167
+ REQUIRED_NOKOGIRI_VERSION = '1.3.3'
168
+
169
+ class << self
170
+ # Shortcut for Loofah::HTML::Document.parse
171
+ # This method accepts the same parameters as Nokogiri::HTML::Document.parse
172
+ def document(*args, &block)
173
+ Loofah::HTML::Document.parse(*args, &block)
174
+ end
175
+
176
+ # Shortcut for Loofah::HTML::DocumentFragment.parse
177
+ # This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
178
+ def fragment(*args, &block)
179
+ Loofah::HTML::DocumentFragment.parse(*args, &block)
180
+ end
181
+
182
+ # Shortcut for Loofah.fragment(string_or_io).scrub!(method)
183
+ def scrub_fragment(string_or_io, method)
184
+ Loofah.fragment(string_or_io).scrub!(method)
185
+ end
186
+
187
+ # Shortcut for Loofah.document(string_or_io).scrub!(method)
188
+ def scrub_document(string_or_io, method)
189
+ Loofah.document(string_or_io).scrub!(method)
190
+ end
191
+
192
+ end
193
+ end
194
+
195
+ if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
196
+ raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
197
+ end
@@ -0,0 +1,44 @@
1
+ module Loofah
2
+ #
3
+ # Loofah can scrub ActiveRecord attributes in a before_save callback:
4
+ #
5
+ # # in environment.rb
6
+ # require 'loofah/active_record'
7
+ #
8
+ # # db/schema.rb
9
+ # create_table "posts" do |t|
10
+ # t.string "title"
11
+ # t.string "body"
12
+ # end
13
+ #
14
+ # # app/model/post.rb
15
+ # class Post < ActiveRecord::Base
16
+ # html_fragment :body, :scrub => :prune # scrubs 'body' in a before_save
17
+ # end
18
+ #
19
+ module ActiveRecord
20
+ #
21
+ # scrub an ActiveRecord attribute +attr+ as an HTML fragment
22
+ # using the method specified in the required +:scrub+ option.
23
+ #
24
+ def html_fragment(attr, options={})
25
+ raise ArgumentError, "html_fragment requires :scrub option" unless method = options[:scrub]
26
+ before_save do |record|
27
+ record[attr] = Loofah.scrub_fragment(record[attr], method)
28
+ end
29
+ end
30
+
31
+ #
32
+ # scrub an ActiveRecord attribute +attr+ as an HTML document
33
+ # using the method specified in the required +:scrub+ option.
34
+ #
35
+ def html_document(attr, options={})
36
+ raise ArgumentError, "html_document requires :scrub option" unless method = options[:scrub]
37
+ before_save do |record|
38
+ record[attr] = Loofah.scrub_document(record[attr], method)
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ ActiveRecord::Base.extend(Loofah::ActiveRecord)
@@ -0,0 +1,38 @@
1
+ module Loofah
2
+ class << self
3
+ def strip_tags(string_or_io) # :nodoc:
4
+ warn_once "WARNING: Loofah.strip_tags is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :prune)"
5
+ Loofah.scrub_document(string_or_io, :prune).text
6
+ end
7
+
8
+ def whitewash(string_or_io) # :nodoc:
9
+ warn_once "WARNING: Loofah.whitewash is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :whitewash)"
10
+ Loofah.scrub_fragment(string_or_io, :whitewash).to_s
11
+ end
12
+
13
+ def whitewash_document(string_or_io) # :nodoc:
14
+ warn_once "WARNING: Loofah.whitewash_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :whitewash)"
15
+ Loofah.scrub_document(string_or_io, :whitewash).to_s
16
+ end
17
+
18
+ def sanitize(string_or_io) # :nodoc:
19
+ warn_once "WARNING: Loofah.sanitize is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :escape)"
20
+ Loofah.scrub_fragment(string_or_io, :escape).to_xml
21
+ end
22
+
23
+ def sanitize_document(string_or_io) # :nodoc:
24
+ warn_once "WARNING: Loofah.sanitize_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :escape)"
25
+ Loofah.scrub_document(string_or_io, :escape).to_xml
26
+ end
27
+
28
+ private
29
+
30
+ def warn_once(message)
31
+ @aooga ||= {}
32
+ unless @aooga.key?(message)
33
+ warn message unless @aooga[message]
34
+ @aooga[message] = true
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,19 @@
1
+ module Loofah
2
+ module HTML
3
+ #
4
+ # Subclass of Nokogiri::HTML::Document.
5
+ #
6
+ # See Loofah::ScrubberInstanceMethods for additional methods.
7
+ #
8
+ class Document < Nokogiri::HTML::Document
9
+ include Loofah::ScrubberInstanceMethods
10
+
11
+ private
12
+
13
+ def __sanitize_roots # :nodoc:
14
+ xpath("/html/head","/html/body")
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ module Loofah
2
+ module HTML
3
+ #
4
+ # Subclass of Nokogiri::HTML::DocumentFragment. Also includes Loofah::ScrubberInstanceMethods.
5
+ #
6
+ # See Loofah::ScrubberInstanceMethods for additional methods.
7
+ #
8
+ class DocumentFragment < Nokogiri::HTML::DocumentFragment
9
+ include Loofah::ScrubberInstanceMethods
10
+
11
+ class << self
12
+ #
13
+ # Overridden Nokogiri::HTML::DocumentFragment
14
+ # constructor. Applications should use Loofah.fragment to
15
+ # parse a fragment.
16
+ #
17
+ def parse tags
18
+ self.new(Loofah::HTML::Document.new, tags)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def __sanitize_roots # :nodoc:
25
+ xpath("./body").first || self
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,70 @@
1
+ require 'cgi'
2
+
3
+ module Loofah
4
+ module HTML5
5
+ module Scrub
6
+
7
+ class << self
8
+
9
+ # alternative implementation of the html5lib attribute scrubbing algorithm
10
+ def scrub_attributes(node)
11
+ node.attribute_nodes.each do |attr_node|
12
+ attr_name = if attr_node.namespace
13
+ "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
14
+ else
15
+ attr_node.node_name
16
+ end
17
+ attr_node.remove unless HashedWhiteList::ALLOWED_ATTRIBUTES[attr_name]
18
+ if HashedWhiteList::ATTR_VAL_IS_URI[attr_name]
19
+ # this block lifted nearly verbatim from HTML5 sanitization
20
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
21
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and HashedWhiteList::ALLOWED_PROTOCOLS[val_unescaped.split(':')[0]].nil?
22
+ attr_node.remove
23
+ end
24
+ end
25
+ if HashedWhiteList::SVG_ATTR_VAL_ALLOWS_REF[attr_name]
26
+ attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
27
+ end
28
+ if HashedWhiteList::SVG_ALLOW_LOCAL_HREF[node.name] && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
29
+ attr_node.remove
30
+ end
31
+ end
32
+ if node.attributes['style']
33
+ node['style'] = scrub_css(node.attributes['style'])
34
+ end
35
+ end
36
+
37
+ # lifted nearly verbatim from html5lib
38
+ def scrub_css(style)
39
+ # disallow urls
40
+ style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
41
+
42
+ # gauntlet
43
+ return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
44
+ return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/
45
+
46
+ clean = []
47
+ style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
48
+ next if val.empty?
49
+ prop.downcase!
50
+ if HashedWhiteList::ALLOWED_CSS_PROPERTIES[prop]
51
+ clean << "#{prop}: #{val};"
52
+ elsif %w[background border margin padding].include?(prop.split('-')[0])
53
+ clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
54
+ HashedWhiteList::ALLOWED_CSS_KEYWORDS[keyword].nil? and
55
+ keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
56
+ end
57
+ elsif HashedWhiteList::ALLOWED_SVG_PROPERTIES[prop]
58
+ clean << "#{prop}: #{val};"
59
+ end
60
+ end
61
+
62
+ style = clean.join(' ')
63
+ end
64
+
65
+ end
66
+
67
+ end
68
+ end
69
+ end
70
+
@@ -0,0 +1,170 @@
1
+ module Loofah
2
+ module HTML5
3
+ #
4
+ # HTML whitelist lifted from HTML5lib sanitizer code:
5
+ #
6
+ # http://code.google.com/p/html5lib/
7
+ #
8
+ # <html5_license>
9
+ #
10
+ # Copyright (c) 2006-2008 The Authors
11
+ #
12
+ # Contributors:
13
+ # James Graham - jg307@cam.ac.uk
14
+ # Anne van Kesteren - annevankesteren@gmail.com
15
+ # Lachlan Hunt - lachlan.hunt@lachy.id.au
16
+ # Matt McDonald - kanashii@kanashii.ca
17
+ # Sam Ruby - rubys@intertwingly.net
18
+ # Ian Hickson (Google) - ian@hixie.ch
19
+ # Thomas Broyer - t.broyer@ltgt.net
20
+ # Jacques Distler - distler@golem.ph.utexas.edu
21
+ # Henri Sivonen - hsivonen@iki.fi
22
+ # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
23
+ #
24
+ # Permission is hereby granted, free of charge, to any person
25
+ # obtaining a copy of this software and associated documentation
26
+ # files (the "Software"), to deal in the Software without
27
+ # restriction, including without limitation the rights to use, copy,
28
+ # modify, merge, publish, distribute, sublicense, and/or sell copies
29
+ # of the Software, and to permit persons to whom the Software is
30
+ # furnished to do so, subject to the following conditions:
31
+ #
32
+ # The above copyright notice and this permission notice shall be
33
+ # included in all copies or substantial portions of the Software.
34
+ #
35
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
39
+ # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
40
+ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
42
+ # DEALINGS IN THE SOFTWARE.
43
+ #
44
+ # </html5_license>
45
+ module WhiteList
46
+ ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
47
+ button caption center cite code col colgroup dd del dfn dir div dl dt
48
+ em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
49
+ legend li map menu ol optgroup option p pre q s samp select small span
50
+ strike strong sub sup table tbody td textarea tfoot th thead tr tt u
51
+ ul var]
52
+
53
+ MATHML_ELEMENTS = %w[annotation annotation-xml maction math merror mfrac
54
+ mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
55
+ mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
56
+ munderover none semantics]
57
+
58
+ SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
59
+ circle defs desc ellipse font-face font-face-name font-face-src foreignObject
60
+ g glyph hkern linearGradient line marker metadata missing-glyph
61
+ mpath path polygon polyline radialGradient rect set stop svg switch
62
+ text title tspan use]
63
+
64
+ ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
65
+ align alt axis border cellpadding cellspacing char charoff charset
66
+ checked cite class clear cols colspan color compact coords datetime
67
+ dir disabled enctype for frame headers height href hreflang hspace id
68
+ ismap label lang longdesc maxlength media method multiple name nohref
69
+ noshade nowrap prompt readonly rel rev rows rowspan rules scope
70
+ selected shape size span src start style summary tabindex target title
71
+ type usemap valign value vspace width xml:lang]
72
+
73
+ MATHML_ATTRIBUTES = %w[actiontype align close columnalign columnalign
74
+ columnalign columnlines columnspacing columnspan depth display
75
+ displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
76
+ frame height linethickness lspace mathbackground mathcolor mathvariant
77
+ mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
78
+ rowspacing rowspan rspace scriptlevel selection separator separators
79
+ stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
80
+
81
+ SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
82
+ arabic-form ascent attributeName attributeType baseProfile bbox begin
83
+ by calcMode cap-height class color color-rendering content cx cy d dx
84
+ dy descent display dur end fill fill-opacity fill-rule font-family
85
+ font-size font-stretch font-style font-variant font-weight from fx fy g1
86
+ g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
87
+ ideographic k keyPoints keySplines keyTimes lang marker-end
88
+ marker-mid marker-start markerHeight markerUnits markerWidth
89
+ mathematical max min name offset opacity orient origin
90
+ overline-position overline-thickness panose-1 path pathLength points
91
+ preserveAspectRatio r refX refY repeatCount repeatDur
92
+ requiredExtensions requiredFeatures restart rotate rx ry slope stemh
93
+ stemv stop-color stop-opacity strikethrough-position
94
+ strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
95
+ stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
96
+ stroke-width systemLanguage target text-anchor to transform type u1
97
+ u2 underline-position underline-thickness unicode unicode-range
98
+ units-per-em values version viewBox visibility width widths x
99
+ x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
100
+ xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
101
+ xmlns:xlink y y1 y2 zoomAndPan]
102
+
103
+ ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
104
+
105
+ SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill
106
+ filter marker marker-start marker-mid marker-end mask stroke]
107
+
108
+ SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion
109
+ animateTransform cursor feImage filter linearGradient pattern
110
+ radialGradient textpath tref set use]
111
+
112
+ ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
113
+ border-bottom-color border-collapse border-color border-left-color
114
+ border-right-color border-top-color clear color cursor direction
115
+ display elevation float font font-family font-size font-style
116
+ font-variant font-weight height letter-spacing line-height overflow
117
+ pause pause-after pause-before pitch pitch-range richness speak
118
+ speak-header speak-numeral speak-punctuation speech-rate stress
119
+ text-align text-decoration text-indent unicode-bidi vertical-align
120
+ voice-family volume white-space width]
121
+
122
+ ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
123
+ brown center collapse dashed dotted fuchsia gray green !important
124
+ italic left lime maroon medium none navy normal nowrap olive pointer
125
+ purple red right solid silver teal top transparent underline white
126
+ yellow]
127
+
128
+ ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
129
+ stroke-width stroke-linecap stroke-linejoin stroke-opacity]
130
+
131
+ ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
132
+ telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
133
+
134
+ # subclasses may define their own versions of these constants
135
+ ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
136
+ ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
137
+ ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
138
+ ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
139
+ ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
140
+ ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
141
+
142
+ VOID_ELEMENTS = %w[
143
+ base
144
+ link
145
+ meta
146
+ hr
147
+ br
148
+ img
149
+ embed
150
+ param
151
+ area
152
+ col
153
+ input
154
+ ]
155
+ end
156
+
157
+ #
158
+ # The HTML5lib whitelist arrays, transformed into hashes for faster lookup.
159
+ #
160
+ module HashedWhiteList
161
+ WhiteList.constants.each do |constant|
162
+ next unless WhiteList.module_eval("#{constant}").is_a?(Array)
163
+ module_eval <<-CODE
164
+ #{constant} = {}
165
+ WhiteList::#{constant}.each { |c| #{constant}[c] = true ; #{constant}[c.downcase] = true }
166
+ CODE
167
+ end
168
+ end
169
+ end
170
+ end