loofah 1.0.0 → 2.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +489 -0
- data/MIT-LICENSE.txt +3 -1
- data/README.md +364 -0
- data/SECURITY.md +18 -0
- data/lib/loofah/elements.rb +88 -11
- data/lib/loofah/helpers.rb +76 -2
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +9 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +27 -0
- data/lib/loofah/html5/safelist.rb +1042 -0
- data/lib/loofah/html5/scrub.rb +198 -40
- data/lib/loofah/instance_methods.rb +16 -10
- data/lib/loofah/metahelpers.rb +9 -10
- data/lib/loofah/scrubber.rb +22 -6
- data/lib/loofah/scrubbers.rb +96 -16
- data/lib/loofah/version.rb +5 -0
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +5 -2
- data/lib/loofah.rb +38 -25
- metadata +159 -172
- data/CHANGELOG.rdoc +0 -134
- data/Gemfile +0 -1
- data/Manifest.txt +0 -34
- data/README.rdoc +0 -312
- data/Rakefile +0 -53
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html5/whitelist.rb +0 -168
- data/test/helper.rb +0 -7
- data/test/html5/test_sanitizer.rb +0 -248
- data/test/integration/test_ad_hoc.rb +0 -176
- data/test/integration/test_helpers.rb +0 -33
- data/test/integration/test_html.rb +0 -51
- data/test/integration/test_scrubbers.rb +0 -331
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -138
- data/test/unit/test_helpers.rb +0 -27
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
data/README.rdoc
DELETED
@@ -1,312 +0,0 @@
|
|
1
|
-
= Loofah
|
2
|
-
|
3
|
-
* http://github.com/flavorjones/loofah
|
4
|
-
* http://loofah.rubyforge.org
|
5
|
-
* http://rubyforge.org/projects/loofah
|
6
|
-
|
7
|
-
== Description
|
8
|
-
|
9
|
-
Loofah is a general library for manipulating and transforming HTML/XML
|
10
|
-
documents and fragments. It's built on top of Nokogiri and libxml2, so
|
11
|
-
it's fast and has a nice API.
|
12
|
-
|
13
|
-
Loofah excels at HTML sanitization (XSS prevention). It includes some
|
14
|
-
nice HTML sanitizers, which are based on HTML5lib's whitelist, so it
|
15
|
-
most likely won't make your codes less secure. (These statements have
|
16
|
-
not been evaluated by Netexperts.)
|
17
|
-
|
18
|
-
ActiveRecord extensions for sanitization are available in the
|
19
|
-
`loofah-activerecord` gem (see
|
20
|
-
http://github.com/flavorjones/loofah-activerecord).
|
21
|
-
|
22
|
-
== Features
|
23
|
-
|
24
|
-
* Easily write custom scrubbers for HTML/XML leveraging the sweetness of Nokogiri (and HTML5lib's whitelists).
|
25
|
-
* Common HTML sanitizing tasks are built-in:
|
26
|
-
* _Strip_ unsafe tags, leaving behind only the inner text.
|
27
|
-
* _Prune_ unsafe tags and their subtrees, removing all traces that they ever existed.
|
28
|
-
* _Escape_ unsafe tags and their subtrees, leaving behind lots of <tt><</tt> and <tt>></tt> entities.
|
29
|
-
* _Whitewash_ the markup, removing all attributes and namespaced nodes.
|
30
|
-
* Common HTML transformation tasks are built-in:
|
31
|
-
* Add the _nofollow_ attribute to all hyperlinks.
|
32
|
-
* Format markup as plain text, with or without sensible whitespace handling around block elements.
|
33
|
-
* Replace Rails's +strip_tags+ and +sanitize+ view helper methods.
|
34
|
-
|
35
|
-
== Compare and Contrast
|
36
|
-
|
37
|
-
Loofah is one of two known Ruby XSS/sanitization solutions that
|
38
|
-
guarantees well-formed and valid markup (the other is Sanitize, which
|
39
|
-
also uses Nokogiri).
|
40
|
-
|
41
|
-
Loofah works on XML, XHTML and HTML documents.
|
42
|
-
|
43
|
-
Also, it's pretty fast. Here is a benchmark comparing Loofah to other
|
44
|
-
commonly-used libraries (ActionView, Sanitize, HTML5lib and HTMLfilter):
|
45
|
-
|
46
|
-
* http://gist.github.com/170193
|
47
|
-
|
48
|
-
Lastly, Loofah is extensible. It's super-easy to write your own custom
|
49
|
-
scrubbers for whatever document manipulation you need. You don't like
|
50
|
-
the built-in scrubbers? Build your own, like a boss.
|
51
|
-
|
52
|
-
== The Basics
|
53
|
-
|
54
|
-
Loofah wraps Nokogiri[http://nokogiri.org] in a loving
|
55
|
-
embrace. Nokogiri[http://nokogiri.org] is an excellent HTML/XML
|
56
|
-
parser. If you don't know how Nokogiri[http://nokogiri.org] works, you
|
57
|
-
might want to pause for a moment and go check it out. I'll wait.
|
58
|
-
|
59
|
-
Loofah presents the following classes:
|
60
|
-
|
61
|
-
* Loofah::HTML::Document and Loofah::HTML::DocumentFragment
|
62
|
-
* Loofah::XML::Document and Loofah::XML::DocumentFragment
|
63
|
-
* Loofah::Scrubber
|
64
|
-
|
65
|
-
The documents and fragments are subclasses of the similar Nokogiri classes.
|
66
|
-
|
67
|
-
The Scrubber represents the document manipulation, either by wrapping
|
68
|
-
a block,
|
69
|
-
|
70
|
-
span2div = Loofah::Scrubber.new do |node|
|
71
|
-
node.name = "div" if node.name == "span"
|
72
|
-
end
|
73
|
-
|
74
|
-
or by implementing a method.
|
75
|
-
|
76
|
-
=== Side Note: Fragments vs Documents
|
77
|
-
|
78
|
-
Generally speaking, unless you expect to have a DOCTYPE and a single
|
79
|
-
root node, you don't have a *document*, you have a *fragment*. For
|
80
|
-
HTML, another rule of thumb is that *documents* have \<html\>
|
81
|
-
and \<body\> tags, and *fragments* usually do not.
|
82
|
-
|
83
|
-
HTML fragments should be parsed with Loofah.fragment. The result won't
|
84
|
-
be wrapped in +html+ or +body+ tags, won't have a DOCTYPE declaration,
|
85
|
-
+head+ elements will be silently ignored, and multiple root nodes are
|
86
|
-
allowed.
|
87
|
-
|
88
|
-
XML fragments should be parsed with Loofah.xml_fragment. The result
|
89
|
-
won't have a DOCTYPE declaration, and multiple root nodes are allowed.
|
90
|
-
|
91
|
-
HTML documents should be parsed with Loofah.document. The result will
|
92
|
-
have a DOCTYPE declaration, along with +html+, +head+ and +body+ tags.
|
93
|
-
|
94
|
-
XML documents should be parsed with Loofah.xml_document. The result
|
95
|
-
will have a DOCTYPE declaration and a single root node.
|
96
|
-
|
97
|
-
=== Loofah::HTML::Document and Loofah::HTML::DocumentFragment
|
98
|
-
|
99
|
-
These classes are subclasses of Nokogiri::HTML::Document and
|
100
|
-
Nokogiri::HTML::DocumentFragment, so you get all the markup
|
101
|
-
fixer-uppery and API goodness of Nokogiri.
|
102
|
-
|
103
|
-
The module methods Loofah.document and Loofah.fragment will parse an
|
104
|
-
HTML document and an HTML fragment, respectively.
|
105
|
-
|
106
|
-
Loofah.document(unsafe_html).is_a?(Nokogiri::HTML::Document) # => true
|
107
|
-
Loofah.fragment(unsafe_html).is_a?(Nokogiri::HTML::DocumentFragment) # => true
|
108
|
-
|
109
|
-
Loofah injects a +scrub!+ method, which takes either a symbol (for
|
110
|
-
built-in scrubbers) or a Loofah::Scrubber object (for custom
|
111
|
-
scrubbers), and modifies the document in-place.
|
112
|
-
|
113
|
-
Loofah overrides +to_s+ to return HTML:
|
114
|
-
|
115
|
-
unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
116
|
-
|
117
|
-
doc = Loofah.fragment(unsafe_html).scrub!(:strip)
|
118
|
-
doc.to_s # => "ohai! <div>div is safe</div> "
|
119
|
-
|
120
|
-
and +text+ to return plain text:
|
121
|
-
|
122
|
-
doc.text # => "ohai! div is safe "
|
123
|
-
|
124
|
-
Also, +to_text+ is available, which does the right thing with
|
125
|
-
whitespace around block-level elements.
|
126
|
-
|
127
|
-
doc = Loofah.fragment("<h1>Title</h1><div>Content</div>")
|
128
|
-
doc.text # => "TitleContent" # probably not what you want
|
129
|
-
doc.to_text # => "\nTitle\n\nContent\n" # better
|
130
|
-
|
131
|
-
=== Loofah::XML::Document and Loofah::XML::DocumentFragment
|
132
|
-
|
133
|
-
These classes are subclasses of Nokogiri::XML::Document and
|
134
|
-
Nokogiri::XML::DocumentFragment, so you get all the markup
|
135
|
-
fixer-uppery and API goodness of Nokogiri.
|
136
|
-
|
137
|
-
The module methods Loofah.xml_document and Loofah.xml_fragment will
|
138
|
-
parse an XML document and an XML fragment, respectively.
|
139
|
-
|
140
|
-
Loofah.xml_document(bad_xml).is_a?(Nokogiri::XML::Document) # => true
|
141
|
-
Loofah.xml_fragment(bad_xml).is_a?(Nokogiri::XML::DocumentFragment) # => true
|
142
|
-
|
143
|
-
=== Nodes and NodeSets
|
144
|
-
|
145
|
-
Nokogiri::XML::Node and Nokogiri::XML::NodeSet also get a +scrub!+
|
146
|
-
method, which makes it easy to scrub subtrees.
|
147
|
-
|
148
|
-
The following code will apply the +employee_scrubber+ only to the
|
149
|
-
+employee+ nodes (and their subtrees) in the document:
|
150
|
-
|
151
|
-
Loofah.xml_document(bad_xml).xpath("//employee").scrub!(employee_scrubber)
|
152
|
-
|
153
|
-
And this code will only scrub the first +employee+ node and its subtree:
|
154
|
-
|
155
|
-
Loofah.xml_document(bad_xml).at_xpath("//employee").scrub!(employee_scrubber)
|
156
|
-
|
157
|
-
=== Loofah::Scrubber
|
158
|
-
|
159
|
-
A Scrubber wraps up a block (or method) that is run on a document node:
|
160
|
-
|
161
|
-
# change all <span> tags to <div> tags
|
162
|
-
span2div = Loofah::Scrubber.new do |node|
|
163
|
-
node.name = "div" if node.name == "span"
|
164
|
-
end
|
165
|
-
|
166
|
-
This can then be run on a document:
|
167
|
-
|
168
|
-
Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
169
|
-
# => "<div>foo</div><p>bar</p>"
|
170
|
-
|
171
|
-
Scrubbers can be run on a document in either a top-down traversal (the
|
172
|
-
default) or bottom-up. Top-down scrubbers can optionally return
|
173
|
-
Scrubber::STOP to terminate the traversal of a subtree. Read below and
|
174
|
-
in the Loofah::Scrubber class for more detailed usage.
|
175
|
-
|
176
|
-
Here's an XML example:
|
177
|
-
|
178
|
-
# remove all <employee> tags that have a "deceased" attribute set to true
|
179
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
180
|
-
if node.name == "employee" and node["deceased"] == "true"
|
181
|
-
node.remove
|
182
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
183
|
-
end
|
184
|
-
end
|
185
|
-
Loofah.xml_document(File.read('plague.xml')).scrub!(bring_out_your_dead)
|
186
|
-
|
187
|
-
=== Built-In HTML Scrubbers
|
188
|
-
|
189
|
-
Loofah comes with a set of sanitizing scrubbers that use HTML5lib's
|
190
|
-
whitelist algorithm:
|
191
|
-
|
192
|
-
doc.scrub!(:strip) # replaces unknown/unsafe tags with their inner text
|
193
|
-
doc.scrub!(:prune) # removes unknown/unsafe tags and their children
|
194
|
-
doc.scrub!(:escape) # escapes unknown/unsafe tags, like this: <script>
|
195
|
-
doc.scrub!(:whitewash) # removes unknown/unsafe/namespaced tags and their children,
|
196
|
-
# and strips all node attributes
|
197
|
-
|
198
|
-
Loofah also comes with some common transformation tasks:
|
199
|
-
|
200
|
-
doc.scrub!(:nofollow) # adds rel="nofollow" attribute to links
|
201
|
-
|
202
|
-
See Loofah::Scrubbers for more details and example usage.
|
203
|
-
|
204
|
-
=== Chaining Scrubbers
|
205
|
-
|
206
|
-
You can chain scrubbers:
|
207
|
-
|
208
|
-
Loofah.fragment("<span>hello</span> <script>alert('OHAI')</script>") \
|
209
|
-
.scrub!(:prune) \
|
210
|
-
.scrub!(span2div).to_s
|
211
|
-
# => "<div>hello</div> "
|
212
|
-
|
213
|
-
=== Shorthand
|
214
|
-
|
215
|
-
The class methods Loofah.scrub_fragment and Loofah.scrub_document are
|
216
|
-
shorthand.
|
217
|
-
|
218
|
-
Loofah.scrub_fragment(unsafe_html, :prune)
|
219
|
-
Loofah.scrub_document(unsafe_html, :prune)
|
220
|
-
Loofah.scrub_xml_fragment(bad_xml, custom_scrubber)
|
221
|
-
Loofah.scrub_xml_document(bad_xml, custom_scrubber)
|
222
|
-
|
223
|
-
are the same thing as (and arguably semantically clearer than):
|
224
|
-
|
225
|
-
Loofah.fragment(unsafe_html).scrub!(:prune)
|
226
|
-
Loofah.document(unsafe_html).scrub!(:prune)
|
227
|
-
Loofah.xml_fragment(bad_xml).scrub!(custom_scrubber)
|
228
|
-
Loofah.xml_document(bad_xml).scrub!(custom_scrubber)
|
229
|
-
|
230
|
-
=== View Helpers
|
231
|
-
|
232
|
-
Loofah has two "view helpers": Loofah::Helpers.sanitize and
|
233
|
-
Loofah::Helpers.strip_tags, both of which are drop-in replacements for
|
234
|
-
the Rails ActionView helpers of the same name.
|
235
|
-
|
236
|
-
== Requirements
|
237
|
-
|
238
|
-
* Nokogiri >= 1.3.3
|
239
|
-
|
240
|
-
== Installation
|
241
|
-
|
242
|
-
Unsurprisingly:
|
243
|
-
|
244
|
-
* gem install loofah
|
245
|
-
|
246
|
-
== Support
|
247
|
-
|
248
|
-
The bug tracker is available here:
|
249
|
-
|
250
|
-
* http://github.com/flavorjones/loofah/issues
|
251
|
-
|
252
|
-
And the mailing list is on librelist:
|
253
|
-
|
254
|
-
* loofah@librelist.com / http://librelist.com
|
255
|
-
|
256
|
-
And the IRC channel is \#loofah on freenode.
|
257
|
-
|
258
|
-
== Related Links
|
259
|
-
|
260
|
-
* Nokogiri: http://nokogiri.org
|
261
|
-
* libxml2: http://xmlsoft.org
|
262
|
-
* html5lib: http://code.google.com/p/html5lib
|
263
|
-
|
264
|
-
== Authors
|
265
|
-
|
266
|
-
* {Mike Dalessio}[http://mike.daless.io] (@flavorjones[http://twitter.com/flavorjones])
|
267
|
-
* Bryan Helmkamp
|
268
|
-
|
269
|
-
Featuring code contributed by:
|
270
|
-
|
271
|
-
* Aaron Patterson
|
272
|
-
* John Barnette
|
273
|
-
* Josh Owens
|
274
|
-
* Paul Dix
|
275
|
-
* Luke Melia
|
276
|
-
|
277
|
-
And a big shout-out to Corey Innis for the name, and feedback on the API.
|
278
|
-
|
279
|
-
== Thank You
|
280
|
-
|
281
|
-
The following people have generously donated via the Pledgie[http://pledgie.com] badge on the {Loofah github page}[http://github.com/flavorjones/loofah]:
|
282
|
-
|
283
|
-
* Bill Harding
|
284
|
-
|
285
|
-
== Historical Note
|
286
|
-
|
287
|
-
This library was formerly known as Dryopteris, which was a very bad
|
288
|
-
name that nobody could spell properly.
|
289
|
-
|
290
|
-
== License
|
291
|
-
|
292
|
-
The MIT License
|
293
|
-
|
294
|
-
Copyright (c) 2009, 2010 by Mike Dalessio, Bryan Helmkamp
|
295
|
-
|
296
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
297
|
-
of this software and associated documentation files (the "Software"), to deal
|
298
|
-
in the Software without restriction, including without limitation the rights
|
299
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
300
|
-
copies of the Software, and to permit persons to whom the Software is
|
301
|
-
furnished to do so, subject to the following conditions:
|
302
|
-
|
303
|
-
The above copyright notice and this permission notice shall be included in
|
304
|
-
all copies or substantial portions of the Software.
|
305
|
-
|
306
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
307
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
308
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
309
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
310
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
311
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
312
|
-
THE SOFTWARE.
|
data/Rakefile
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
gem 'hoe', '>= 2.3.0'
|
3
|
-
require 'hoe'
|
4
|
-
|
5
|
-
Hoe.plugin :git
|
6
|
-
Hoe.plugin :gemspec
|
7
|
-
|
8
|
-
Hoe.spec "loofah" do
|
9
|
-
developer "Mike Dalessio", "mike.dalessio@gmail.com"
|
10
|
-
developer "Bryan Helmkamp", "bryan@brynary.com"
|
11
|
-
|
12
|
-
self.extra_rdoc_files = FileList["*.rdoc"]
|
13
|
-
self.history_file = "CHANGELOG.rdoc"
|
14
|
-
self.readme_file = "README.rdoc"
|
15
|
-
|
16
|
-
extra_deps << ["nokogiri", ">=1.3.3"]
|
17
|
-
extra_dev_deps << ["mocha", ">=0.9"]
|
18
|
-
extra_dev_deps << ["shoulda", ">=2.10"]
|
19
|
-
extra_dev_deps << ["rake", ">=0.8"]
|
20
|
-
end
|
21
|
-
|
22
|
-
task :gemspec do
|
23
|
-
system %q(rake debug_gem | grep -v "^\(in " > loofah.gemspec)
|
24
|
-
end
|
25
|
-
|
26
|
-
task :redocs => :fix_css
|
27
|
-
task :docs => :fix_css
|
28
|
-
task :fix_css do
|
29
|
-
better_css = <<-EOT
|
30
|
-
.method-description pre {
|
31
|
-
margin : 1em 0 ;
|
32
|
-
}
|
33
|
-
|
34
|
-
.method-description ul {
|
35
|
-
padding : .5em 0 .5em 2em ;
|
36
|
-
}
|
37
|
-
|
38
|
-
.method-description p {
|
39
|
-
margin-top : .5em ;
|
40
|
-
}
|
41
|
-
|
42
|
-
#main ul, div#documentation ul {
|
43
|
-
list-style-type : disc ! IMPORTANT ;
|
44
|
-
list-style-position : inside ! IMPORTANT ;
|
45
|
-
}
|
46
|
-
|
47
|
-
h2 + ul {
|
48
|
-
margin-top : 1em;
|
49
|
-
}
|
50
|
-
EOT
|
51
|
-
puts "* fixing css"
|
52
|
-
File.open("doc/rdoc.css", "a") { |f| f.write better_css }
|
53
|
-
end
|
data/benchmark/benchmark.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require "#{File.dirname(__FILE__)}/helper.rb"
|
3
|
-
|
4
|
-
def compare_scrub_methods
|
5
|
-
snip = "<div>foo</div><foo>fuxx <b>quux</b></foo><script>i have a chair</script>"
|
6
|
-
puts "starting with:\n#{snip}"
|
7
|
-
puts
|
8
|
-
puts RailsSanitize.new.sanitize(snip) # => Rails.sanitize / scrub!(:prune).to_s
|
9
|
-
puts Loofah::Helpers.sanitize(snip)
|
10
|
-
puts "--"
|
11
|
-
puts RailsSanitize.new.strip_tags(snip) # => Rails.strip_tags / parse().text
|
12
|
-
puts Loofah::Helpers.strip_tags(snip)
|
13
|
-
puts "--"
|
14
|
-
puts Sanitize.clean(snip, Sanitize::Config::RELAXED) # => scrub!(:strip).to_s
|
15
|
-
puts Loofah.scrub_fragment(snip, :strip).to_s
|
16
|
-
puts "--"
|
17
|
-
puts HTML5libSanitize.new.sanitize(snip) # => scrub!(:escape).to_s
|
18
|
-
puts Loofah.scrub_fragment(snip, :escape).to_s
|
19
|
-
puts "--"
|
20
|
-
puts HTMLFilter.new.filter(snip)
|
21
|
-
puts Loofah.scrub_fragment(snip, :strip).to_s
|
22
|
-
puts
|
23
|
-
end
|
24
|
-
|
25
|
-
module TestSet
|
26
|
-
def test_set options={}
|
27
|
-
scale = options[:rehearse] ? 10 : 1
|
28
|
-
puts self.class.name
|
29
|
-
|
30
|
-
n = 100 / scale
|
31
|
-
puts " Large document, #{BIG_FILE.length} bytes (x#{n})"
|
32
|
-
bench BIG_FILE, n, false
|
33
|
-
puts
|
34
|
-
|
35
|
-
n = 1000 / scale
|
36
|
-
puts " Small fragment, #{FRAGMENT.length} bytes (x#{n})"
|
37
|
-
bench FRAGMENT, n, true
|
38
|
-
puts
|
39
|
-
|
40
|
-
n = 10_000 / scale
|
41
|
-
puts " Text snippet, #{SNIPPET.length} bytes (x#{n})"
|
42
|
-
bench SNIPPET, n, true
|
43
|
-
puts
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
class HeadToHead < Measure
|
48
|
-
end
|
49
|
-
|
50
|
-
class HeadToHeadRailsSanitize < Measure
|
51
|
-
include TestSet
|
52
|
-
def bench(content, ntimes, fragment_p)
|
53
|
-
clear_measure
|
54
|
-
|
55
|
-
measure "Loofah::Helpers.sanitize", ntimes do
|
56
|
-
Loofah::Helpers.sanitize content
|
57
|
-
end
|
58
|
-
|
59
|
-
sanitizer = RailsSanitize.new
|
60
|
-
measure "ActionView sanitize", ntimes do
|
61
|
-
sanitizer.sanitize(content)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
class HeadToHeadRailsStripTags < Measure
|
67
|
-
include TestSet
|
68
|
-
def bench(content, ntimes, fragment_p)
|
69
|
-
clear_measure
|
70
|
-
|
71
|
-
measure "Loofah::Helpers.strip_tags", ntimes do
|
72
|
-
Loofah::Helpers.strip_tags content
|
73
|
-
end
|
74
|
-
|
75
|
-
sanitizer = RailsSanitize.new
|
76
|
-
measure "ActionView strip_tags", ntimes do
|
77
|
-
sanitizer.strip_tags(content)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
class HeadToHeadSanitizerSanitize < Measure
|
83
|
-
include TestSet
|
84
|
-
def bench(content, ntimes, fragment_p)
|
85
|
-
clear_measure
|
86
|
-
|
87
|
-
measure "Loofah :strip", ntimes do
|
88
|
-
if fragment_p
|
89
|
-
Loofah.scrub_fragment(content, :strip).to_s
|
90
|
-
else
|
91
|
-
Loofah.scrub_document(content, :strip).to_s
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
measure "Sanitize.clean", ntimes do
|
96
|
-
Sanitize.clean(content, Sanitize::Config::RELAXED)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
class HeadToHeadHtml5LibSanitize < Measure
|
102
|
-
include TestSet
|
103
|
-
def bench(content, ntimes, fragment_p)
|
104
|
-
clear_measure
|
105
|
-
|
106
|
-
measure "Loofah :escape", ntimes do
|
107
|
-
if fragment_p
|
108
|
-
Loofah.scrub_fragment(content, :escape).to_s
|
109
|
-
else
|
110
|
-
Loofah.scrub_document(content, :escape).to_s
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
html5_sanitizer = HTML5libSanitize.new
|
115
|
-
measure "HTML5lib.sanitize", ntimes do
|
116
|
-
html5_sanitizer.sanitize(content)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
class HeadToHeadHTMLFilter < Measure
|
122
|
-
include TestSet
|
123
|
-
def bench(content, ntimes, fragment_p)
|
124
|
-
clear_measure
|
125
|
-
|
126
|
-
measure "Loofah::Helpers.sanitize", ntimes do
|
127
|
-
Loofah::Helpers.sanitize content
|
128
|
-
end
|
129
|
-
|
130
|
-
sanitizer = HTMLFilter.new
|
131
|
-
measure "HTMLFilter.filter", ntimes do
|
132
|
-
sanitizer.filter(content)
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
puts "Nokogiri version: #{Nokogiri::VERSION_INFO.inspect}"
|
138
|
-
puts "Loofah version: #{Loofah::VERSION.inspect}"
|
139
|
-
|
140
|
-
benches = []
|
141
|
-
benches << HeadToHeadRailsSanitize.new
|
142
|
-
benches << HeadToHeadRailsStripTags.new
|
143
|
-
benches << HeadToHeadSanitizerSanitize.new
|
144
|
-
benches << HeadToHeadHtml5LibSanitize.new
|
145
|
-
benches << HeadToHeadHTMLFilter.new
|
146
|
-
puts "---------- rehearsal ----------"
|
147
|
-
benches.each { |bench| bench.test_set :rehearse => true }
|
148
|
-
puts "---------- realsies ----------"
|
149
|
-
benches.each { |bench| bench.test_set }
|
data/benchmark/fragment.html
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
<div id="top_parent"></div>
|
2
|
-
|
3
|
-
<div id="jump">
|
4
|
-
<a href="#main-articles">Stories</a>
|
5
|
-
<br>
|
6
|
-
<a href="#blocks">Slash Boxes</a>
|
7
|
-
<br>
|
8
|
-
<a href="#comments">Comments</a>
|
9
|
-
</div>
|
10
|
-
<a name="topothepage"></a>
|
11
|
-
<div id="doc3" class="yui-t6 index2 mainpage ac ">
|
12
|
-
<div id="hd" >
|
13
|
-
<div id="logo" >
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
<h1><a href="//slashdot.org"><span>Slashdot</span></a></h1>
|
18
|
-
<div id="slogan"><h2>News for nerds, stuff that matters</h2></div>
|
19
|
-
</div>
|
20
|
-
<a href="#articles" class="hidden">Jump to articles</a>
|
21
|
-
<div class="nav">
|
22
|
-
<ul>
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
<li><a href="//slashdot.org/submit.pl" title="Submit a story to Slashdot">Submit Story</a></li>
|
27
|
-
<li><a href="//slashdot.org/help" title="Frequently asked questions on Slashdot">Help</a></li>
|
28
|
-
<li><a href="//slashdot.org/login.pl" onclick="show_login_box(); return false;">Log In</a></li>
|
29
|
-
|
30
|
-
</ul>
|
31
|
-
</div>
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
<div id="fh_picker_search" style="display: block;">
|
38
|
-
<form method="get" action="//slashdot.org/index2.pl">
|
39
|
-
<fieldset class="mode-filter mode-anon">
|
40
|
-
<legend>Search</legend>
|
41
|
-
|
42
|
-
|
43
|
-
<input class="query" type="text" name="fhfilter" value="" id="searchquery"> <input type="button" class="setfhfilter" value="Filter" id="viewsearch" style="display:none"> <input type="submit" class="setsearchfilter" value="Search" id="fhsearch" style="display:none">
|
44
|
-
<noscript><input type="submit" class="setsearchfilter" value="Search"></noscript>
|
45
|
-
|
46
|
-
<script type="text/javascript">
|
47
|
-
var slash_search;
|
48
|
-
$(function(){
|
49
|
-
if (has_hose()) {
|
50
|
-
var $search_text = $any('searchquery'),
|
51
|
-
$panel = $search_text.closest('fieldset');
|
52
|
-
$search_buttons = $('#viewsearch,#fhsearch'),
|
53
|
-
ws = /\s+/;
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
// The search buttons set the firehose option named by their class.
|
58
|
-
$search_buttons.
|
59
|
-
click(function(){
|
60
|
-
var which=this.className;
|
61
|
-
$search_text.each(function(){
|
62
|
-
firehose_set_options(which, this.value);
|
63
|
-
});
|
64
|
-
return false;
|
65
|
-
});
|
66
|
-
|
67
|
-
// Provide a globally available function that does whatever clicking the search button would do.
|
68
|
-
slash_search = function( query ){
|
69
|
-
query!==undefined && $search_text.val(query);
|
70
|
-
$search_buttons.filter(':visible:first').click();
|
71
|
-
};
|
72
|
-
|
73
|
-
$search_text.
|
74
|
-
keydown(function( e ){ // ESCAPE restores the filter in-effect.
|
75
|
-
if ( e.which == $.ui.keyCode.ESCAPE ) {
|
76
|
-
$search_text.val(firehose_settings.fhfilter||'');
|
77
|
-
return true;
|
78
|
-
}
|
79
|
-
if ( e.which == $.ui.keyCode.ENTER ) {
|
80
|
-
slash_search();
|
81
|
-
return false;
|
82
|
-
}
|
83
|
-
});
|
84
|
-
|
85
|
-
$(document).
|
86
|
-
bind('firehose-setting-setfhfilter firehose-setting-setsearchfilter', function( e, new_query ){
|
87
|
-
$('fieldset input[type=text]').each(function(){
|
88
|
-
$(this).blur().val(new_query);
|
89
|
-
});
|
90
|
-
}).
|
91
|
-
bind('set-options.firehose', function( e, data ){
|
92
|
-
data.select_section && $panel.toggleClass('mode-filter', data.id!=='unsaved');
|
93
|
-
});
|
94
|
-
}
|
95
|
-
});
|
96
|
-
</script>
|
data/benchmark/helper.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'open-uri'
|
3
|
-
require 'hpricot'
|
4
|
-
require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
|
5
|
-
require 'benchmark'
|
6
|
-
require "action_view"
|
7
|
-
require "action_controller/vendor/html-scanner"
|
8
|
-
require "sanitize"
|
9
|
-
require 'hitimes'
|
10
|
-
require 'htmlfilter'
|
11
|
-
|
12
|
-
unless defined?(HTMLFilter)
|
13
|
-
HTMLFilter = HtmlFilter
|
14
|
-
end
|
15
|
-
|
16
|
-
class RailsSanitize
|
17
|
-
include ActionView::Helpers::SanitizeHelper
|
18
|
-
extend ActionView::Helpers::SanitizeHelper::ClassMethods
|
19
|
-
end
|
20
|
-
|
21
|
-
class HTML5libSanitize
|
22
|
-
require 'html5/html5parser'
|
23
|
-
require 'html5/liberalxmlparser'
|
24
|
-
require 'html5/treewalkers'
|
25
|
-
require 'html5/treebuilders'
|
26
|
-
require 'html5/serializer'
|
27
|
-
require 'html5/sanitizer'
|
28
|
-
|
29
|
-
include HTML5
|
30
|
-
|
31
|
-
def sanitize(html)
|
32
|
-
HTMLParser.parse_fragment(html, {
|
33
|
-
:tokenizer => HTMLSanitizer,
|
34
|
-
:encoding => 'utf-8',
|
35
|
-
:tree => TreeBuilders::REXML::TreeBuilder
|
36
|
-
}).to_s
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
BIG_FILE = File.read(File.join(File.dirname(__FILE__), "www.slashdot.com.html"))
|
41
|
-
FRAGMENT = File.read(File.join(File.dirname(__FILE__), "fragment.html"))
|
42
|
-
SNIPPET = "This is typical form field input in <b>length and content."
|
43
|
-
|
44
|
-
class Measure
|
45
|
-
def initialize
|
46
|
-
clear_measure
|
47
|
-
end
|
48
|
-
|
49
|
-
def clear_measure
|
50
|
-
@first_time = true
|
51
|
-
@baseline = nil
|
52
|
-
end
|
53
|
-
|
54
|
-
def measure(name, ntimes)
|
55
|
-
if @first_time
|
56
|
-
printf " %-30s %7s %8s %5s\n", "", "total", "single", "rel"
|
57
|
-
@first_time = false
|
58
|
-
end
|
59
|
-
timer = Hitimes::TimedMetric.new(name)
|
60
|
-
timer.start
|
61
|
-
ntimes.times do |j|
|
62
|
-
yield
|
63
|
-
end
|
64
|
-
timer.stop
|
65
|
-
if @baseline
|
66
|
-
printf " %30s %7.3f (%8.6f) %5.2fx\n", timer.name, timer.sum, timer.sum / ntimes, timer.sum / @baseline
|
67
|
-
else
|
68
|
-
@baseline = timer.sum
|
69
|
-
printf " %30s %7.3f (%8.6f) %5s\n", timer.name, timer.sum, timer.sum / ntimes, "-"
|
70
|
-
end
|
71
|
-
timer.sum
|
72
|
-
end
|
73
|
-
end
|