loofah 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +18 -0
- data/MIT-LICENSE.txt +21 -0
- data/Manifest.txt +28 -0
- data/README.rdoc +110 -0
- data/Rakefile +16 -0
- data/TODO.rdoc +9 -0
- data/benchmark/benchmark.rb +72 -0
- data/benchmark/fragment.html +96 -0
- data/benchmark/www.slashdot.com.html +2560 -0
- data/init.rb +2 -0
- data/lib/loofah.rb +197 -0
- data/lib/loofah/active_record.rb +44 -0
- data/lib/loofah/deprecated.rb +38 -0
- data/lib/loofah/html/document.rb +19 -0
- data/lib/loofah/html/document_fragment.rb +30 -0
- data/lib/loofah/html5/scrub.rb +70 -0
- data/lib/loofah/html5/whitelist.rb +170 -0
- data/lib/loofah/scrubber.rb +108 -0
- data/test/helper.rb +8 -0
- data/test/html5/test_deprecated_sanitizer.rb +185 -0
- data/test/html5/test_sanitizer.rb +245 -0
- data/test/html5/testdata/tests1.dat +501 -0
- data/test/test_active_record.rb +71 -0
- data/test/test_api.rb +51 -0
- data/test/test_deprecated_basic.rb +68 -0
- data/test/test_microsofty.rb +91 -0
- data/test/test_scrubber.rb +100 -0
- data/test/test_strip_tags.rb +36 -0
- metadata +148 -0
- metadata.gz.sig +0 -0
data/init.rb
ADDED
data/lib/loofah.rb
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'nokogiri'
|
5
|
+
|
6
|
+
require 'loofah/html5/whitelist'
|
7
|
+
require 'loofah/html5/scrub'
|
8
|
+
|
9
|
+
require 'loofah/scrubber'
|
10
|
+
|
11
|
+
require 'loofah/html/document'
|
12
|
+
require 'loofah/html/document_fragment'
|
13
|
+
|
14
|
+
require 'loofah/deprecated'
|
15
|
+
|
16
|
+
|
17
|
+
#
|
18
|
+
# Loofah is an HTML sanitizer wrapped around Nokogiri[http://nokogiri.org], an excellent
|
19
|
+
# HTML/XML parser. If you don't know how Nokogiri[http://nokogiri.org]
|
20
|
+
# works, you might want to pause for a moment and go check it out. I'll
|
21
|
+
# wait.
|
22
|
+
#
|
23
|
+
# A Loofah::HTML::Document is a subclass of Nokogiri::HTML::Document,
|
24
|
+
# so a parsed document gives you all the markup fixer-uppery and API
|
25
|
+
# goodness of Nokogiri.
|
26
|
+
#
|
27
|
+
# Loofah.document(unsafe_html).is_a?(Nokogiri::HTML::Document) # => true
|
28
|
+
# Loofah.fragment(unsafe_html).is_a?(Nokogiri::HTML::DocumentFragment) # => true
|
29
|
+
#
|
30
|
+
# Loofah adds a +scrub!+ method, which can clean up your HTML in a few
|
31
|
+
# different ways by modifying the document in-place:
|
32
|
+
#
|
33
|
+
# doc.scrub!(:strip) # replaces unknown/unsafe tags with their inner text
|
34
|
+
# doc.scrub!(:prune) # removes unknown/unsafe tags and their children
|
35
|
+
# doc.scrub!(:whitewash) # removes unknown/unsafe/namespaced tags and their children,
|
36
|
+
# # and strips all node attributes
|
37
|
+
# doc.scrub!(:escape) # escapes unknown/unsafe tags, like this: <script>
|
38
|
+
#
|
39
|
+
# Loofah overrides +to_s+ to return html:
|
40
|
+
#
|
41
|
+
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
42
|
+
#
|
43
|
+
# doc = Loofah.fragment(unsafe_html).scrub!(:strip)
|
44
|
+
# doc.to_s # => "ohai! <div>div is safe</div> "
|
45
|
+
#
|
46
|
+
# and +text+ to return plain text:
|
47
|
+
#
|
48
|
+
# doc.text # => "ohai! div is safe "
|
49
|
+
#
|
50
|
+
# Or, if you prefer, you can use the shorthand methods +scrub_fragment+ and +scrub_document+:
|
51
|
+
#
|
52
|
+
# Loofah.scrub_fragment(unsafe_html, :prune).to_s
|
53
|
+
# Loofah.scrub_document(unsafe_html, :strip).text
|
54
|
+
#
|
55
|
+
# == Usage
|
56
|
+
#
|
57
|
+
# Let's say you have a Web 2.0 application, and you allow people to
|
58
|
+
# send HTML snippets to each other.
|
59
|
+
#
|
60
|
+
# Let's also say some script-kiddie from Norland sends this to your
|
61
|
+
# users, in an effort to swipe some credit cards:
|
62
|
+
#
|
63
|
+
# <script src=http://ha.ckers.org/xss.js></script>
|
64
|
+
#
|
65
|
+
# Oooh, that could be bad. Here's how to fix it:
|
66
|
+
#
|
67
|
+
# Loofah.scrub_fragment(dangerous_html, :escape).to_s
|
68
|
+
#
|
69
|
+
# # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
|
70
|
+
#
|
71
|
+
# Loofah also makes available the sanitized markup in both HTML and
|
72
|
+
# plain-text formats without incurring the overhead of multiple
|
73
|
+
# parsings:
|
74
|
+
#
|
75
|
+
# safe_fragment = Loofah.scrub_fragment(dangerous_html, :strip)
|
76
|
+
# safe_fragment.to_s # => HTML output
|
77
|
+
# safe_fragment.text # => plain text output
|
78
|
+
#
|
79
|
+
# And you can modify the HTML using Nokogiri's API, if you like:
|
80
|
+
#
|
81
|
+
# stylized_fragment = Loofah.fragment(dangerous_html)
|
82
|
+
# stylized_fragment.xpath("//a/text()").wrap("<span></span>")
|
83
|
+
# stylized_fragment.scrub!(:strip)
|
84
|
+
#
|
85
|
+
# == Fragments vs Documents
|
86
|
+
#
|
87
|
+
# Generally speaking, unless you expect to have \<html\> and
|
88
|
+
# \<body\> tags in your HTML, you don't have a *document*, you
|
89
|
+
# have a *fragment*.
|
90
|
+
#
|
91
|
+
# For parsing fragments, you should use Loofah.fragment. Nokogiri
|
92
|
+
# won't wrap the result in +html+ and +body+ tags, and will ignore
|
93
|
+
# +head+ elements.
|
94
|
+
#
|
95
|
+
# Full HTML documents should be parsed with Loofah.document, which
|
96
|
+
# will add the DOCTYPE declaration, and properly handle +head+ and
|
97
|
+
# +body+ elements.
|
98
|
+
#
|
99
|
+
# == Strings and IO Objects as Input
|
100
|
+
#
|
101
|
+
# Loofah.document and Loofah.fragment accept any IO object in addition
|
102
|
+
# to accepting a string. That IO object could be a file, or a socket,
|
103
|
+
# or a StringIO, or anything that responds to +read+ and
|
104
|
+
# +close+. Which makes it particularly easy to sanitize mass
|
105
|
+
# quantities of docs.
|
106
|
+
#
|
107
|
+
# == Scrubbing Methods
|
108
|
+
#
|
109
|
+
# Given:
|
110
|
+
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
111
|
+
#
|
112
|
+
# === scrub!(:strip)
|
113
|
+
#
|
114
|
+
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
115
|
+
#
|
116
|
+
# Loofah.fragment(unsafe_html).scrub!(:strip)
|
117
|
+
# # or
|
118
|
+
# Loofah.scrub_fragment(unsafe_html, :strip)
|
119
|
+
#
|
120
|
+
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
121
|
+
#
|
122
|
+
# === scrub!(:prune)
|
123
|
+
#
|
124
|
+
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
125
|
+
#
|
126
|
+
# Loofah.fragment(unsafe_html).scrub!(:prune)
|
127
|
+
# # or
|
128
|
+
# Loofah.scrub_fragment(unsafe_html, :prune)
|
129
|
+
#
|
130
|
+
# => "ohai! <div>div is safe</div> "
|
131
|
+
#
|
132
|
+
# === scrub!(:escape)
|
133
|
+
#
|
134
|
+
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
135
|
+
#
|
136
|
+
# Loofah.fragment(unsafe_html).scrub!(:escape)
|
137
|
+
# # or
|
138
|
+
# Loofah.scrub_fragment(unsafe_html, :escape)
|
139
|
+
#
|
140
|
+
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
141
|
+
#
|
142
|
+
# === scrub!(:whitewash)
|
143
|
+
#
|
144
|
+
# +:whitewash+ removes all comments, styling and attributes in
|
145
|
+
# addition to doing markup-fixer-uppery and pruning unsafe tags. I
|
146
|
+
# like to call this "whitewashing", since it's like putting a new
|
147
|
+
# layer of paint on top of the HTML input to make it look nice.
|
148
|
+
#
|
149
|
+
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
150
|
+
#
|
151
|
+
# Loofah.fragment(messy_markup).scrub!(:whitewash)
|
152
|
+
# # or
|
153
|
+
# Loofah.scrub_fragment(messy_markup, :whitewash)
|
154
|
+
#
|
155
|
+
# => "ohai! <div>div with attributes</div>"
|
156
|
+
#
|
157
|
+
# One use case for this feature is to clean up HTML that was
|
158
|
+
# cut-and-pasted from Microsoft Word into a WYSIWYG editor or a rich
|
159
|
+
# text editor. Microsoft's software is famous for injecting all kinds
|
160
|
+
# of cruft into its HTML output. Who needs that? Certainly not me.
|
161
|
+
#
|
162
|
+
module Loofah
|
163
|
+
# The version of Loofah you are using
|
164
|
+
VERSION = '0.2.0'
|
165
|
+
|
166
|
+
# The minimum required version of Nokogiri
|
167
|
+
REQUIRED_NOKOGIRI_VERSION = '1.3.3'
|
168
|
+
|
169
|
+
class << self
|
170
|
+
# Shortcut for Loofah::HTML::Document.parse
|
171
|
+
# This method accepts the same parameters as Nokogiri::HTML::Document.parse
|
172
|
+
def document(*args, &block)
|
173
|
+
Loofah::HTML::Document.parse(*args, &block)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Shortcut for Loofah::HTML::DocumentFragment.parse
|
177
|
+
# This method accepts the same parameters as Nokogiri::HTML::DocumentFragment.parse
|
178
|
+
def fragment(*args, &block)
|
179
|
+
Loofah::HTML::DocumentFragment.parse(*args, &block)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Shortcut for Loofah.fragment(string_or_io).scrub!(method)
|
183
|
+
def scrub_fragment(string_or_io, method)
|
184
|
+
Loofah.fragment(string_or_io).scrub!(method)
|
185
|
+
end
|
186
|
+
|
187
|
+
# Shortcut for Loofah.document(string_or_io).scrub!(method)
|
188
|
+
def scrub_document(string_or_io, method)
|
189
|
+
Loofah.document(string_or_io).scrub!(method)
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
|
196
|
+
raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
|
197
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Loofah
|
2
|
+
#
|
3
|
+
# Loofah can scrub ActiveRecord attributes in a before_save callback:
|
4
|
+
#
|
5
|
+
# # in environment.rb
|
6
|
+
# require 'loofah/active_record'
|
7
|
+
#
|
8
|
+
# # db/schema.rb
|
9
|
+
# create_table "posts" do |t|
|
10
|
+
# t.string "title"
|
11
|
+
# t.string "body"
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# # app/model/post.rb
|
15
|
+
# class Post < ActiveRecord::Base
|
16
|
+
# html_fragment :body, :scrub => :prune # scrubs 'body' in a before_save
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
module ActiveRecord
|
20
|
+
#
|
21
|
+
# scrub an ActiveRecord attribute +attr+ as an HTML fragment
|
22
|
+
# using the method specified in the required +:scrub+ option.
|
23
|
+
#
|
24
|
+
def html_fragment(attr, options={})
|
25
|
+
raise ArgumentError, "html_fragment requires :scrub option" unless method = options[:scrub]
|
26
|
+
before_save do |record|
|
27
|
+
record[attr] = Loofah.scrub_fragment(record[attr], method)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# scrub an ActiveRecord attribute +attr+ as an HTML document
|
33
|
+
# using the method specified in the required +:scrub+ option.
|
34
|
+
#
|
35
|
+
def html_document(attr, options={})
|
36
|
+
raise ArgumentError, "html_document requires :scrub option" unless method = options[:scrub]
|
37
|
+
before_save do |record|
|
38
|
+
record[attr] = Loofah.scrub_document(record[attr], method)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
ActiveRecord::Base.extend(Loofah::ActiveRecord)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Loofah
|
2
|
+
class << self
|
3
|
+
def strip_tags(string_or_io) # :nodoc:
|
4
|
+
warn_once "WARNING: Loofah.strip_tags is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :prune)"
|
5
|
+
Loofah.scrub_document(string_or_io, :prune).text
|
6
|
+
end
|
7
|
+
|
8
|
+
def whitewash(string_or_io) # :nodoc:
|
9
|
+
warn_once "WARNING: Loofah.whitewash is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :whitewash)"
|
10
|
+
Loofah.scrub_fragment(string_or_io, :whitewash).to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
def whitewash_document(string_or_io) # :nodoc:
|
14
|
+
warn_once "WARNING: Loofah.whitewash_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :whitewash)"
|
15
|
+
Loofah.scrub_document(string_or_io, :whitewash).to_s
|
16
|
+
end
|
17
|
+
|
18
|
+
def sanitize(string_or_io) # :nodoc:
|
19
|
+
warn_once "WARNING: Loofah.sanitize is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :escape)"
|
20
|
+
Loofah.scrub_fragment(string_or_io, :escape).to_xml
|
21
|
+
end
|
22
|
+
|
23
|
+
def sanitize_document(string_or_io) # :nodoc:
|
24
|
+
warn_once "WARNING: Loofah.sanitize_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :escape)"
|
25
|
+
Loofah.scrub_document(string_or_io, :escape).to_xml
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def warn_once(message)
|
31
|
+
@aooga ||= {}
|
32
|
+
unless @aooga.key?(message)
|
33
|
+
warn message unless @aooga[message]
|
34
|
+
@aooga[message] = true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Loofah
|
2
|
+
module HTML
|
3
|
+
#
|
4
|
+
# Subclass of Nokogiri::HTML::Document.
|
5
|
+
#
|
6
|
+
# See Loofah::ScrubberInstanceMethods for additional methods.
|
7
|
+
#
|
8
|
+
class Document < Nokogiri::HTML::Document
|
9
|
+
include Loofah::ScrubberInstanceMethods
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def __sanitize_roots # :nodoc:
|
14
|
+
xpath("/html/head","/html/body")
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Loofah
|
2
|
+
module HTML
|
3
|
+
#
|
4
|
+
# Subclass of Nokogiri::HTML::DocumentFragment. Also includes Loofah::ScrubberInstanceMethods.
|
5
|
+
#
|
6
|
+
# See Loofah::ScrubberInstanceMethods for additional methods.
|
7
|
+
#
|
8
|
+
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
9
|
+
include Loofah::ScrubberInstanceMethods
|
10
|
+
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Overridden Nokogiri::HTML::DocumentFragment
|
14
|
+
# constructor. Applications should use Loofah.fragment to
|
15
|
+
# parse a fragment.
|
16
|
+
#
|
17
|
+
def parse tags
|
18
|
+
self.new(Loofah::HTML::Document.new, tags)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def __sanitize_roots # :nodoc:
|
25
|
+
xpath("./body").first || self
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module Loofah
|
4
|
+
module HTML5
|
5
|
+
module Scrub
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
# alternative implementation of the html5lib attribute scrubbing algorithm
|
10
|
+
def scrub_attributes(node)
|
11
|
+
node.attribute_nodes.each do |attr_node|
|
12
|
+
attr_name = if attr_node.namespace
|
13
|
+
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
|
14
|
+
else
|
15
|
+
attr_node.node_name
|
16
|
+
end
|
17
|
+
attr_node.remove unless HashedWhiteList::ALLOWED_ATTRIBUTES[attr_name]
|
18
|
+
if HashedWhiteList::ATTR_VAL_IS_URI[attr_name]
|
19
|
+
# this block lifted nearly verbatim from HTML5 sanitization
|
20
|
+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
21
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and HashedWhiteList::ALLOWED_PROTOCOLS[val_unescaped.split(':')[0]].nil?
|
22
|
+
attr_node.remove
|
23
|
+
end
|
24
|
+
end
|
25
|
+
if HashedWhiteList::SVG_ATTR_VAL_ALLOWS_REF[attr_name]
|
26
|
+
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
|
27
|
+
end
|
28
|
+
if HashedWhiteList::SVG_ALLOW_LOCAL_HREF[node.name] && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
|
29
|
+
attr_node.remove
|
30
|
+
end
|
31
|
+
end
|
32
|
+
if node.attributes['style']
|
33
|
+
node['style'] = scrub_css(node.attributes['style'])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# lifted nearly verbatim from html5lib
|
38
|
+
def scrub_css(style)
|
39
|
+
# disallow urls
|
40
|
+
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
41
|
+
|
42
|
+
# gauntlet
|
43
|
+
return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
44
|
+
return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/
|
45
|
+
|
46
|
+
clean = []
|
47
|
+
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
48
|
+
next if val.empty?
|
49
|
+
prop.downcase!
|
50
|
+
if HashedWhiteList::ALLOWED_CSS_PROPERTIES[prop]
|
51
|
+
clean << "#{prop}: #{val};"
|
52
|
+
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
53
|
+
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
54
|
+
HashedWhiteList::ALLOWED_CSS_KEYWORDS[keyword].nil? and
|
55
|
+
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
56
|
+
end
|
57
|
+
elsif HashedWhiteList::ALLOWED_SVG_PROPERTIES[prop]
|
58
|
+
clean << "#{prop}: #{val};"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
style = clean.join(' ')
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
@@ -0,0 +1,170 @@
|
|
1
|
+
module Loofah
|
2
|
+
module HTML5
|
3
|
+
#
|
4
|
+
# HTML whitelist lifted from HTML5lib sanitizer code:
|
5
|
+
#
|
6
|
+
# http://code.google.com/p/html5lib/
|
7
|
+
#
|
8
|
+
# <html5_license>
|
9
|
+
#
|
10
|
+
# Copyright (c) 2006-2008 The Authors
|
11
|
+
#
|
12
|
+
# Contributors:
|
13
|
+
# James Graham - jg307@cam.ac.uk
|
14
|
+
# Anne van Kesteren - annevankesteren@gmail.com
|
15
|
+
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
16
|
+
# Matt McDonald - kanashii@kanashii.ca
|
17
|
+
# Sam Ruby - rubys@intertwingly.net
|
18
|
+
# Ian Hickson (Google) - ian@hixie.ch
|
19
|
+
# Thomas Broyer - t.broyer@ltgt.net
|
20
|
+
# Jacques Distler - distler@golem.ph.utexas.edu
|
21
|
+
# Henri Sivonen - hsivonen@iki.fi
|
22
|
+
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
23
|
+
#
|
24
|
+
# Permission is hereby granted, free of charge, to any person
|
25
|
+
# obtaining a copy of this software and associated documentation
|
26
|
+
# files (the "Software"), to deal in the Software without
|
27
|
+
# restriction, including without limitation the rights to use, copy,
|
28
|
+
# modify, merge, publish, distribute, sublicense, and/or sell copies
|
29
|
+
# of the Software, and to permit persons to whom the Software is
|
30
|
+
# furnished to do so, subject to the following conditions:
|
31
|
+
#
|
32
|
+
# The above copyright notice and this permission notice shall be
|
33
|
+
# included in all copies or substantial portions of the Software.
|
34
|
+
#
|
35
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
36
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
37
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
38
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
39
|
+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
40
|
+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
41
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
42
|
+
# DEALINGS IN THE SOFTWARE.
|
43
|
+
#
|
44
|
+
# </html5_license>
|
45
|
+
module WhiteList
|
46
|
+
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
47
|
+
button caption center cite code col colgroup dd del dfn dir div dl dt
|
48
|
+
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
49
|
+
legend li map menu ol optgroup option p pre q s samp select small span
|
50
|
+
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
51
|
+
ul var]
|
52
|
+
|
53
|
+
MATHML_ELEMENTS = %w[annotation annotation-xml maction math merror mfrac
|
54
|
+
mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
|
55
|
+
mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
|
56
|
+
munderover none semantics]
|
57
|
+
|
58
|
+
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
59
|
+
circle defs desc ellipse font-face font-face-name font-face-src foreignObject
|
60
|
+
g glyph hkern linearGradient line marker metadata missing-glyph
|
61
|
+
mpath path polygon polyline radialGradient rect set stop svg switch
|
62
|
+
text title tspan use]
|
63
|
+
|
64
|
+
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
65
|
+
align alt axis border cellpadding cellspacing char charoff charset
|
66
|
+
checked cite class clear cols colspan color compact coords datetime
|
67
|
+
dir disabled enctype for frame headers height href hreflang hspace id
|
68
|
+
ismap label lang longdesc maxlength media method multiple name nohref
|
69
|
+
noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
70
|
+
selected shape size span src start style summary tabindex target title
|
71
|
+
type usemap valign value vspace width xml:lang]
|
72
|
+
|
73
|
+
MATHML_ATTRIBUTES = %w[actiontype align close columnalign columnalign
|
74
|
+
columnalign columnlines columnspacing columnspan depth display
|
75
|
+
displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
|
76
|
+
frame height linethickness lspace mathbackground mathcolor mathvariant
|
77
|
+
mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
|
78
|
+
rowspacing rowspan rspace scriptlevel selection separator separators
|
79
|
+
stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
80
|
+
|
81
|
+
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
82
|
+
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
83
|
+
by calcMode cap-height class color color-rendering content cx cy d dx
|
84
|
+
dy descent display dur end fill fill-opacity fill-rule font-family
|
85
|
+
font-size font-stretch font-style font-variant font-weight from fx fy g1
|
86
|
+
g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
87
|
+
ideographic k keyPoints keySplines keyTimes lang marker-end
|
88
|
+
marker-mid marker-start markerHeight markerUnits markerWidth
|
89
|
+
mathematical max min name offset opacity orient origin
|
90
|
+
overline-position overline-thickness panose-1 path pathLength points
|
91
|
+
preserveAspectRatio r refX refY repeatCount repeatDur
|
92
|
+
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
93
|
+
stemv stop-color stop-opacity strikethrough-position
|
94
|
+
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
95
|
+
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
96
|
+
stroke-width systemLanguage target text-anchor to transform type u1
|
97
|
+
u2 underline-position underline-thickness unicode unicode-range
|
98
|
+
units-per-em values version viewBox visibility width widths x
|
99
|
+
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
100
|
+
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
101
|
+
xmlns:xlink y y1 y2 zoomAndPan]
|
102
|
+
|
103
|
+
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
104
|
+
|
105
|
+
SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill
|
106
|
+
filter marker marker-start marker-mid marker-end mask stroke]
|
107
|
+
|
108
|
+
SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion
|
109
|
+
animateTransform cursor feImage filter linearGradient pattern
|
110
|
+
radialGradient textpath tref set use]
|
111
|
+
|
112
|
+
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
113
|
+
border-bottom-color border-collapse border-color border-left-color
|
114
|
+
border-right-color border-top-color clear color cursor direction
|
115
|
+
display elevation float font font-family font-size font-style
|
116
|
+
font-variant font-weight height letter-spacing line-height overflow
|
117
|
+
pause pause-after pause-before pitch pitch-range richness speak
|
118
|
+
speak-header speak-numeral speak-punctuation speech-rate stress
|
119
|
+
text-align text-decoration text-indent unicode-bidi vertical-align
|
120
|
+
voice-family volume white-space width]
|
121
|
+
|
122
|
+
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
123
|
+
brown center collapse dashed dotted fuchsia gray green !important
|
124
|
+
italic left lime maroon medium none navy normal nowrap olive pointer
|
125
|
+
purple red right solid silver teal top transparent underline white
|
126
|
+
yellow]
|
127
|
+
|
128
|
+
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
129
|
+
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
130
|
+
|
131
|
+
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
132
|
+
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
133
|
+
|
134
|
+
# subclasses may define their own versions of these constants
|
135
|
+
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
136
|
+
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
137
|
+
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
138
|
+
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
139
|
+
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
140
|
+
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
141
|
+
|
142
|
+
VOID_ELEMENTS = %w[
|
143
|
+
base
|
144
|
+
link
|
145
|
+
meta
|
146
|
+
hr
|
147
|
+
br
|
148
|
+
img
|
149
|
+
embed
|
150
|
+
param
|
151
|
+
area
|
152
|
+
col
|
153
|
+
input
|
154
|
+
]
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# The HTML5lib whitelist arrays, transformed into hashes for faster lookup.
|
159
|
+
#
|
160
|
+
module HashedWhiteList
|
161
|
+
WhiteList.constants.each do |constant|
|
162
|
+
next unless WhiteList.module_eval("#{constant}").is_a?(Array)
|
163
|
+
module_eval <<-CODE
|
164
|
+
#{constant} = {}
|
165
|
+
WhiteList::#{constant}.each { |c| #{constant}[c] = true ; #{constant}[c.downcase] = true }
|
166
|
+
CODE
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|