loofah 1.2.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/CHANGELOG.rdoc +23 -0
- data/Gemfile +5 -4
- data/MIT-LICENSE.txt +3 -1
- data/README.rdoc +4 -2
- data/Rakefile +8 -2
- data/lib/loofah.rb +1 -3
- data/lib/loofah/html/document_fragment.rb +6 -3
- data/lib/loofah/html5/scrub.rb +17 -11
- data/lib/loofah/html5/whitelist.rb +17 -9
- data/lib/loofah/scrubbers.rb +46 -2
- data/test/assets/testdata_sanitizer_tests1.dat +12 -12
- data/test/helper.rb +3 -2
- data/test/html5/test_sanitizer.rb +22 -2
- data/test/integration/test_html.rb +7 -0
- data/test/integration/test_scrubbers.rb +24 -1
- metadata +109 -74
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fd62f1a9a9c2c83eb28fb849b22375ed149287df
|
4
|
+
data.tar.gz: 6672bfebfdf0af96d2351c408a4bc81a362ed029
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3e956204c9e823f5a1fb548b3f2a8edd6431b1398607b76b4d869db8f849311c9f2e7fa52934d1c16aca5e9093b319a59b620485bffb11d02a62dcd70cd9c2f6
|
7
|
+
data.tar.gz: 48e7e8ee165346437f19a082c0828074e090b113d65e80e52679122523417749cf153585047ae8d291bc343550176e0d38bd20085e81b9b6672985f28e6d921c
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,5 +1,28 @@
|
|
1
1
|
= Changelog
|
2
2
|
|
3
|
+
== 2.0.0 / 2014-05-09
|
4
|
+
|
5
|
+
Compatibility notes:
|
6
|
+
|
7
|
+
* ActionView helpers now must be required explicitly: `require "loofah/helpers"`
|
8
|
+
* Support for Ruby 1.8.7 and prior has been dropped
|
9
|
+
|
10
|
+
Enhancements:
|
11
|
+
|
12
|
+
* HTML5 whitelist allows the following ...
|
13
|
+
* tags: `article`, `aside`, `bdi`, `bdo`, `canvas`, `command`, `datalist`, `details`, `figcaption`, `figure`, `footer`, `header`, `mark`, `meter`, `nav`, `output`, `section`, `summary`, `time`
|
14
|
+
* attributes: `data-*` (Thanks, Rafael Franca!)
|
15
|
+
* URI attributes: `poster` and `preload`
|
16
|
+
* Addition of the `:unprintable` scrubber to remove unprintable characters from text nodes. #65 (Thanks, Matt Swanson!)
|
17
|
+
* `Loofah.fragment` accepts an optional encoding argument, compatible with `Nokogiri::HTML::DocumentFragment.parse`. #62 (Thanks, Ben Atkins!)
|
18
|
+
* HTML5 sanitizers now remove attributes without values. (Thanks, Kasper Timm Hansen!)
|
19
|
+
|
20
|
+
Bug fixes:
|
21
|
+
|
22
|
+
* HTML5 sanitizers' CSS keyword check now actually works (broken in v2.0). Additional regression tests added. (Thanks, Kasper Timm Hansen!)
|
23
|
+
* HTML5 sanitizers now allow negative arguments to CSS. #64 (Thanks, Jon Calhoun!)
|
24
|
+
|
25
|
+
|
3
26
|
== 1.2.1 (2012-04-14)
|
4
27
|
|
5
28
|
* Declaring encoding in html5/scrub.rb. Without this, use of the ruby -KU option would cause havoc. (#32)
|
data/Gemfile
CHANGED
@@ -2,18 +2,19 @@
|
|
2
2
|
|
3
3
|
# DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`.
|
4
4
|
|
5
|
-
source
|
5
|
+
source "https://rubygems.org/"
|
6
6
|
|
7
|
-
gem "nokogiri", ">=1.
|
7
|
+
gem "nokogiri", ">=1.5.9"
|
8
8
|
|
9
|
+
gem "rdoc", "~>4.0", :group => [:development, :test]
|
9
10
|
gem "rake", ">=0.8", :group => [:development, :test]
|
10
11
|
gem "minitest", "~>2.2", :group => [:development, :test]
|
11
|
-
gem "rr", "~>1.0", :group => [:development, :test]
|
12
|
+
gem "rr", "~>1.1.0", :group => [:development, :test]
|
12
13
|
gem "json", ">=0", :group => [:development, :test]
|
13
14
|
gem "hoe-gemspec", ">=0", :group => [:development, :test]
|
14
15
|
gem "hoe-debugging", ">=0", :group => [:development, :test]
|
15
16
|
gem "hoe-bundler", ">=0", :group => [:development, :test]
|
16
17
|
gem "hoe-git", ">=0", :group => [:development, :test]
|
17
|
-
gem "hoe", "
|
18
|
+
gem "hoe", "~>3.6", :group => [:development, :test]
|
18
19
|
|
19
20
|
# vim: syntax=ruby
|
data/MIT-LICENSE.txt
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
|
3
|
+
The MIT License
|
4
|
+
|
5
|
+
Copyright (c) 2009 -- 2014 by Mike Dalessio, Bryan Helmkamp
|
4
6
|
|
5
7
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
8
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Loofah
|
1
|
+
= Loofah {<img src="https://travis-ci.org/flavorjones/loofah.png?branch=master" alt="Build Status" />}[https://travis-ci.org/flavorjones/loofah]
|
2
2
|
|
3
3
|
* http://github.com/flavorjones/loofah
|
4
4
|
* http://rubydoc.info/github/flavorjones/loofah/master/frames
|
@@ -198,6 +198,7 @@ whitelist algorithm:
|
|
198
198
|
Loofah also comes with some common transformation tasks:
|
199
199
|
|
200
200
|
doc.scrub!(:nofollow) # adds rel="nofollow" attribute to links
|
201
|
+
doc.scrub!(:unprintable) # removes unprintable characters from text nodes
|
201
202
|
|
202
203
|
See Loofah::Scrubbers for more details and example usage.
|
203
204
|
|
@@ -232,6 +233,7 @@ are the same thing as (and arguably semantically clearer than):
|
|
232
233
|
Loofah has two "view helpers": Loofah::Helpers.sanitize and
|
233
234
|
Loofah::Helpers.strip_tags, both of which are drop-in replacements for
|
234
235
|
the Rails ActionView helpers of the same name.
|
236
|
+
These are no longer required automatically. You must require `loofah/helpers`.
|
235
237
|
|
236
238
|
== Requirements
|
237
239
|
|
@@ -291,7 +293,7 @@ name that nobody could spell properly.
|
|
291
293
|
|
292
294
|
The MIT License
|
293
295
|
|
294
|
-
Copyright (c) 2009
|
296
|
+
Copyright (c) 2009 -- 2014 by Mike Dalessio, Bryan Helmkamp
|
295
297
|
|
296
298
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
297
299
|
of this software and associated documentation files (the "Software"), to deal
|
data/Rakefile
CHANGED
@@ -14,12 +14,13 @@ Hoe.spec "loofah" do
|
|
14
14
|
self.extra_rdoc_files = FileList["*.rdoc"]
|
15
15
|
self.history_file = "CHANGELOG.rdoc"
|
16
16
|
self.readme_file = "README.rdoc"
|
17
|
+
self.license "MIT"
|
17
18
|
|
18
|
-
extra_deps << ["nokogiri", ">=1.
|
19
|
+
extra_deps << ["nokogiri", ">=1.5.9"]
|
19
20
|
|
20
21
|
extra_dev_deps << ["rake", ">=0.8"]
|
21
22
|
extra_dev_deps << ["minitest", "~>2.2"]
|
22
|
-
extra_dev_deps << ["rr", "~>1.0"]
|
23
|
+
extra_dev_deps << ["rr", "~>1.1.0"]
|
23
24
|
extra_dev_deps << ["json", ">=0"]
|
24
25
|
extra_dev_deps << ["hoe-gemspec", ">=0"]
|
25
26
|
extra_dev_deps << ["hoe-debugging", ">=0"]
|
@@ -66,3 +67,8 @@ task :doc_upload_to_rubyforge => :docs do
|
|
66
67
|
system "rsync -avz --delete * rubyforge.org:/var/www/gforge-projects/loofah/loofah"
|
67
68
|
end
|
68
69
|
end
|
70
|
+
|
71
|
+
desc "generate whitelists from W3C specifications"
|
72
|
+
task :generate_whitelists do
|
73
|
+
load "tasks/generate-whitelists"
|
74
|
+
end
|
data/lib/loofah.rb
CHANGED
@@ -17,8 +17,6 @@ require 'loofah/xml/document_fragment'
|
|
17
17
|
require 'loofah/html/document'
|
18
18
|
require 'loofah/html/document_fragment'
|
19
19
|
|
20
|
-
require 'loofah/helpers'
|
21
|
-
|
22
20
|
# == Strings and IO Objects as Input
|
23
21
|
#
|
24
22
|
# Loofah.document and Loofah.fragment accept any IO object in addition
|
@@ -29,7 +27,7 @@ require 'loofah/helpers'
|
|
29
27
|
#
|
30
28
|
module Loofah
|
31
29
|
# The version of Loofah you are using
|
32
|
-
VERSION = '
|
30
|
+
VERSION = '2.0.0'
|
33
31
|
|
34
32
|
class << self
|
35
33
|
# Shortcut for Loofah::HTML::Document.parse
|
@@ -14,10 +14,13 @@ module Loofah
|
|
14
14
|
# constructor. Applications should use Loofah.fragment to
|
15
15
|
# parse a fragment.
|
16
16
|
#
|
17
|
-
def parse tags
|
17
|
+
def parse tags, encoding = nil
|
18
18
|
doc = Loofah::HTML::Document.new
|
19
|
-
|
20
|
-
|
19
|
+
|
20
|
+
encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
|
21
|
+
doc.encoding = encoding
|
22
|
+
|
23
|
+
new(doc, tags)
|
21
24
|
end
|
22
25
|
end
|
23
26
|
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -6,11 +6,7 @@ module Loofah
|
|
6
6
|
module HTML5 # :nodoc:
|
7
7
|
module Scrub
|
8
8
|
|
9
|
-
CONTROL_CHARACTERS =
|
10
|
-
/`|[\000-\040\177\s]+|\302[\200-\240]/
|
11
|
-
else
|
12
|
-
/[`\u0000-\u0020\u007F\s\u0080-\u0101]/
|
13
|
-
end
|
9
|
+
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
14
10
|
|
15
11
|
class << self
|
16
12
|
|
@@ -26,14 +22,20 @@ module Loofah
|
|
26
22
|
else
|
27
23
|
attr_node.node_name
|
28
24
|
end
|
25
|
+
|
26
|
+
if attr_name =~ /\Adata-\w+\z/
|
27
|
+
next
|
28
|
+
end
|
29
|
+
|
29
30
|
unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name)
|
30
31
|
attr_node.remove
|
31
32
|
next
|
32
33
|
end
|
34
|
+
|
33
35
|
if WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
|
34
36
|
# this block lifted nearly verbatim from HTML5 sanitization
|
35
37
|
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
|
36
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(
|
38
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0])
|
37
39
|
attr_node.remove
|
38
40
|
next
|
39
41
|
end
|
@@ -49,6 +51,10 @@ module Loofah
|
|
49
51
|
if node.attributes['style']
|
50
52
|
node['style'] = scrub_css node.attributes['style']
|
51
53
|
end
|
54
|
+
|
55
|
+
node.attribute_nodes.each do |attr_node|
|
56
|
+
node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
|
57
|
+
end
|
52
58
|
end
|
53
59
|
|
54
60
|
# lifted nearly verbatim from html5lib
|
@@ -57,8 +63,8 @@ module Loofah
|
|
57
63
|
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
58
64
|
|
59
65
|
# gauntlet
|
60
|
-
return '' unless style =~
|
61
|
-
return '' unless style =~
|
66
|
+
return '' unless style =~ /\A([-:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/
|
67
|
+
return '' unless style =~ /\A\s*([-\w]+\s*:[^:;]*(;\s*|$))*\z/
|
62
68
|
|
63
69
|
clean = []
|
64
70
|
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
@@ -66,10 +72,10 @@ module Loofah
|
|
66
72
|
prop.downcase!
|
67
73
|
if WhiteList::ALLOWED_CSS_PROPERTIES.include?(prop)
|
68
74
|
clean << "#{prop}: #{val};"
|
69
|
-
elsif
|
75
|
+
elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(prop.split('-')[0])
|
70
76
|
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
71
|
-
WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) &&
|
72
|
-
keyword !~
|
77
|
+
!WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) &&
|
78
|
+
keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
|
73
79
|
end
|
74
80
|
elsif WhiteList::ALLOWED_SVG_PROPERTIES.include?(prop)
|
75
81
|
clean << "#{prop}: #{val};"
|
@@ -45,12 +45,16 @@ module Loofah
|
|
45
45
|
#
|
46
46
|
# </html5_license>
|
47
47
|
module WhiteList
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
48
|
+
|
49
|
+
ACCEPTABLE_ELEMENTS = Set.new %w[a abbr acronym address area
|
50
|
+
article aside audio b bdi bdo big blockquote br button canvas
|
51
|
+
caption center cite code col colgroup command datalist dd del
|
52
|
+
details dfn dir div dl dt em fieldset figcaption figure footer
|
53
|
+
font form h1 h2 h3 h4 h5 h6 header hr i img input ins kbd label
|
54
|
+
legend li map mark menu meter nav ol output optgroup option p
|
55
|
+
pre q s samp section select small span strike strong sub summary
|
56
|
+
sup table tbody td textarea tfoot th thead time tr tt u ul var
|
57
|
+
video]
|
54
58
|
|
55
59
|
MATHML_ELEMENTS = Set.new %w[annotation annotation-xml maction math merror mfrac
|
56
60
|
mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
|
@@ -70,7 +74,7 @@ module Loofah
|
|
70
74
|
dir disabled enctype for frame headers height href hreflang hspace id
|
71
75
|
ismap label lang longdesc loop loopcount loopend loopstart
|
72
76
|
maxlength media method multiple name nohref
|
73
|
-
noshade nowrap poster prompt readonly rel rev rows rowspan rules scope
|
77
|
+
noshade nowrap poster preload prompt readonly rel rev rows rowspan rules scope
|
74
78
|
selected shape size span src start style summary tabindex target title
|
75
79
|
type usemap valign value vspace width xml:lang]
|
76
80
|
|
@@ -108,7 +112,7 @@ module Loofah
|
|
108
112
|
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
109
113
|
xmlns:xlink y y1 y2 zoomAndPan]
|
110
114
|
|
111
|
-
ATTR_VAL_IS_URI = Set.new %w[href src cite action longdesc xlink:href xml:base]
|
115
|
+
ATTR_VAL_IS_URI = Set.new %w[href src cite action longdesc xlink:href xml:base poster preload]
|
112
116
|
|
113
117
|
SVG_ATTR_VAL_ALLOWS_REF = Set.new %w[clip-path color-profile cursor fill
|
114
118
|
filter marker marker-start marker-mid marker-end mask stroke]
|
@@ -133,9 +137,13 @@ module Loofah
|
|
133
137
|
purple red right solid silver teal top transparent underline white
|
134
138
|
yellow]
|
135
139
|
|
140
|
+
SHORTHAND_CSS_PROPERTIES = Set.new %w[background border margin padding]
|
141
|
+
|
136
142
|
ACCEPTABLE_SVG_PROPERTIES = Set.new %w[fill fill-opacity fill-rule stroke
|
137
143
|
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
138
144
|
|
145
|
+
PROTOCOL_SEPARATOR = /:|(�*58)|(p)|(�*3a)|(%|%)3A/i
|
146
|
+
|
139
147
|
ACCEPTABLE_PROTOCOLS = Set.new %w[ed2k ftp http https irc mailto news gopher nntp
|
140
148
|
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
141
149
|
|
@@ -164,7 +172,7 @@ module Loofah
|
|
164
172
|
# additional tags we should consider safe since we have libxml2 fixing up our documents.
|
165
173
|
TAGS_SAFE_WITH_LIBXML2 = Set.new %w[html head body]
|
166
174
|
ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
|
167
|
-
end
|
175
|
+
end
|
168
176
|
|
169
177
|
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::HTML5::WhiteList
|
170
178
|
end
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -58,6 +58,21 @@ module Loofah
|
|
58
58
|
# Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
|
59
59
|
# => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
|
60
60
|
#
|
61
|
+
#
|
62
|
+
# === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
|
63
|
+
#
|
64
|
+
# +:unprintable+ removes unprintable Unicode characters.
|
65
|
+
#
|
66
|
+
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
67
|
+
# Loofah.fragment(markup).scrub!(:unprintable)
|
68
|
+
# => "<p>Some text with an unprintable character at the end</p>"
|
69
|
+
#
|
70
|
+
# You may not be able to see the unprintable character in the above example, but there is a
|
71
|
+
# U+2028 character right before the closing </p> tag. These characters can cause issues if
|
72
|
+
# the content is ever parsed by JavaScript - more information here:
|
73
|
+
#
|
74
|
+
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
75
|
+
#
|
61
76
|
module Scrubbers
|
62
77
|
#
|
63
78
|
# === scrub!(:strip)
|
@@ -178,7 +193,7 @@ module Loofah
|
|
178
193
|
def scrub(node)
|
179
194
|
return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
|
180
195
|
node.set_attribute('rel', 'nofollow')
|
181
|
-
return STOP
|
196
|
+
return STOP
|
182
197
|
end
|
183
198
|
end
|
184
199
|
|
@@ -195,6 +210,34 @@ module Loofah
|
|
195
210
|
end
|
196
211
|
end
|
197
212
|
|
213
|
+
#
|
214
|
+
# === scrub!(:unprintable)
|
215
|
+
#
|
216
|
+
# +:unprintable+ removes unprintable Unicode characters.
|
217
|
+
#
|
218
|
+
# markup = "<p>Some text with an unprintable character at the end\u2028</p>"
|
219
|
+
# Loofah.fragment(markup).scrub!(:unprintable)
|
220
|
+
# => "<p>Some text with an unprintable character at the end</p>"
|
221
|
+
#
|
222
|
+
# You may not be able to see the unprintable character in the above example, but there is a
|
223
|
+
# U+2028 character right before the closing </p> tag. These characters can cause issues if
|
224
|
+
# the content is ever parsed by JavaScript - more information here:
|
225
|
+
#
|
226
|
+
# http://timelessrepo.com/json-isnt-a-javascript-subset
|
227
|
+
#
|
228
|
+
class Unprintable < Scrubber
|
229
|
+
def initialize
|
230
|
+
@direction = :top_down
|
231
|
+
end
|
232
|
+
|
233
|
+
def scrub(node)
|
234
|
+
if node.type == Nokogiri::XML::Node::TEXT_NODE
|
235
|
+
node.content = node.content.gsub(/\u2028|\u2029/, '')
|
236
|
+
end
|
237
|
+
CONTINUE
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
198
241
|
#
|
199
242
|
# A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
|
200
243
|
#
|
@@ -204,7 +247,8 @@ module Loofah
|
|
204
247
|
:whitewash => Whitewash,
|
205
248
|
:strip => Strip,
|
206
249
|
:nofollow => NoFollow,
|
207
|
-
:newline_block_elements => NewlineBlockElements
|
250
|
+
:newline_block_elements => NewlineBlockElements,
|
251
|
+
:unprintable => Unprintable
|
208
252
|
}
|
209
253
|
|
210
254
|
#
|
@@ -36,13 +36,13 @@
|
|
36
36
|
{
|
37
37
|
"name": "div_background_image_unicode_encoded",
|
38
38
|
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
|
39
|
-
"output": "<div
|
39
|
+
"output": "<div>foo</div>"
|
40
40
|
},
|
41
41
|
|
42
42
|
{
|
43
43
|
"name": "div_expression",
|
44
44
|
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
|
45
|
-
"output": "<div
|
45
|
+
"output": "<div>foo</div>"
|
46
46
|
},
|
47
47
|
|
48
48
|
{
|
@@ -104,7 +104,7 @@
|
|
104
104
|
{
|
105
105
|
"name": "list_style_image",
|
106
106
|
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
|
107
|
-
"output": "<li
|
107
|
+
"output": "<li>foo</li>"
|
108
108
|
},
|
109
109
|
|
110
110
|
{
|
@@ -138,7 +138,7 @@
|
|
138
138
|
{
|
139
139
|
"name": "non_alpha_non_digit_II",
|
140
140
|
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
141
|
-
"output": "<a
|
141
|
+
"output": "<a>foo</a>",
|
142
142
|
"rexml": "Ill-formed XHTML!"
|
143
143
|
},
|
144
144
|
|
@@ -152,7 +152,7 @@
|
|
152
152
|
{
|
153
153
|
"name": "platypus",
|
154
154
|
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
|
155
|
-
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-
|
155
|
+
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
|
156
156
|
},
|
157
157
|
|
158
158
|
{
|
@@ -253,7 +253,7 @@
|
|
253
253
|
{
|
254
254
|
"name": "should_not_fall_for_xss_image_hack_12",
|
255
255
|
"input": "<img src=\"  javascript:alert('XSS');\" />",
|
256
|
-
"output": "<img
|
256
|
+
"output": "<img>",
|
257
257
|
"rexml": "<img />"
|
258
258
|
},
|
259
259
|
|
@@ -404,7 +404,7 @@
|
|
404
404
|
{
|
405
405
|
"name": "xul",
|
406
406
|
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
407
|
-
"output": "<p
|
407
|
+
"output": "<p>fubar</p>"
|
408
408
|
},
|
409
409
|
|
410
410
|
{
|
@@ -441,9 +441,9 @@
|
|
441
441
|
{
|
442
442
|
"name": "absolute_uri_ref_with_space_in svg_attribute",
|
443
443
|
"input": "<rect fill=\"url(\nhttp://bad.com/)\" />",
|
444
|
-
"rexml": "<rect
|
445
|
-
"xhtml": "<rect
|
446
|
-
"output": "<rect
|
444
|
+
"rexml": "<rect></rect>",
|
445
|
+
"xhtml": "<rect></rect>",
|
446
|
+
"output": "<rect/>"
|
447
447
|
},
|
448
448
|
|
449
449
|
{
|
@@ -484,14 +484,14 @@
|
|
484
484
|
"xhtml": "<div style='color: blue;'></div>",
|
485
485
|
"rexml": "<div style='color: blue;'></div>"
|
486
486
|
},
|
487
|
-
|
487
|
+
|
488
488
|
{
|
489
489
|
"name": "attributes_with_embedded_quotes",
|
490
490
|
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
|
491
491
|
"output": "<img src='doesntexist.jpg%22'onerror=%22alert(1)'>",
|
492
492
|
"rexml": "Ill-formed XHTML!"
|
493
493
|
},
|
494
|
-
|
494
|
+
|
495
495
|
{
|
496
496
|
"name": "attributes_with_embedded_quotes_II",
|
497
497
|
"input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
|
data/test/helper.rb
CHANGED
@@ -6,11 +6,12 @@ require 'minitest/autorun'
|
|
6
6
|
|
7
7
|
require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah"))
|
8
8
|
|
9
|
+
# require the ActionView helpers here, since they are no longer required automatically
|
10
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah", "helpers"))
|
11
|
+
|
9
12
|
puts "=> testing with Nokogiri #{Nokogiri::VERSION_INFO.inspect}"
|
10
13
|
|
11
14
|
class Loofah::TestCase < MiniTest::Spec
|
12
|
-
include RR::Adapters::TestUnit
|
13
|
-
|
14
15
|
class << self
|
15
16
|
alias_method :context, :describe
|
16
17
|
end
|
@@ -88,6 +88,15 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
def test_should_allow_data_attributes
|
92
|
+
input = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
93
|
+
|
94
|
+
output = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
95
|
+
htmloutput = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
96
|
+
|
97
|
+
check_sanitization(input, htmloutput, output, output)
|
98
|
+
end
|
99
|
+
|
91
100
|
##
|
92
101
|
## libxml2 downcases attributes, so this is moot.
|
93
102
|
##
|
@@ -146,6 +155,11 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
146
155
|
end
|
147
156
|
end
|
148
157
|
|
158
|
+
def test_figure_element_is_valid
|
159
|
+
fragment = Loofah.scrub_fragment("<span>hello</span> <figure>asd</figure>", :prune)
|
160
|
+
assert fragment.at_css("figure"), "<figure> tag was scrubbed"
|
161
|
+
end
|
162
|
+
|
149
163
|
##
|
150
164
|
## as tenderlove says, "care < 0"
|
151
165
|
##
|
@@ -162,7 +176,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
162
176
|
# This affects only NS4. Is it worth fixing?
|
163
177
|
# def test_javascript_includes
|
164
178
|
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
165
|
-
# output = "<div>foo</div>"
|
179
|
+
# output = "<div>foo</div>"
|
166
180
|
# check_sanitization(input, output, output, output)
|
167
181
|
# end
|
168
182
|
|
@@ -187,7 +201,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
187
201
|
end
|
188
202
|
|
189
203
|
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
190
|
-
HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
204
|
+
HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
191
205
|
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
192
206
|
input = "<rect fill='url(#foo)' />"
|
193
207
|
output = "<rect fill='url(#foo)'></rect>"
|
@@ -200,6 +214,12 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
200
214
|
check_sanitization(input, output, output, output)
|
201
215
|
end
|
202
216
|
end
|
217
|
+
|
218
|
+
def test_css_negative_value_sanitization
|
219
|
+
html = "<span style=\"letter-spacing:-0.03em;\">"
|
220
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
221
|
+
assert_match %r/-0.03em/, sane.inner_html
|
222
|
+
end
|
203
223
|
end
|
204
224
|
|
205
225
|
# <html5_license>
|
@@ -29,6 +29,13 @@ class IntegrationTestHtml < Loofah::TestCase
|
|
29
29
|
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
30
30
|
end
|
31
31
|
end
|
32
|
+
|
33
|
+
context 'with an `encoding` arg' do
|
34
|
+
it "sets the parent document's encoding to accordingly" do
|
35
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
|
36
|
+
assert_equal 'US-ASCII', html.document.encoding
|
37
|
+
end
|
38
|
+
end
|
32
39
|
end
|
33
40
|
|
34
41
|
context "html document" do
|
@@ -13,6 +13,9 @@ class IntegrationTestScrubbers < Loofah::TestCase
|
|
13
13
|
NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
14
14
|
NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
15
15
|
|
16
|
+
UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b>"
|
17
|
+
UNPRINTABLE_RESULT = "<b>Loofah rocks!</b>"
|
18
|
+
|
16
19
|
ENTITY_FRAGMENT = "<p>this is < that "&" the other > boo'ya</p><div>w00t</div>"
|
17
20
|
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
|
18
21
|
|
@@ -71,6 +74,16 @@ class IntegrationTestScrubbers < Loofah::TestCase
|
|
71
74
|
assert_equal doc, result
|
72
75
|
end
|
73
76
|
end
|
77
|
+
|
78
|
+
context ":unprintable" do
|
79
|
+
it "removes unprintable unicode characters" do
|
80
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
|
81
|
+
result = doc.scrub! :unprintable
|
82
|
+
|
83
|
+
assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
|
84
|
+
assert_equal doc, result
|
85
|
+
end
|
86
|
+
end
|
74
87
|
end
|
75
88
|
|
76
89
|
context "#scrub_document" do
|
@@ -187,7 +200,7 @@ class IntegrationTestScrubbers < Loofah::TestCase
|
|
187
200
|
end
|
188
201
|
end
|
189
202
|
end
|
190
|
-
|
203
|
+
|
191
204
|
context "DocumentFragment" do
|
192
205
|
context "#scrub!" do
|
193
206
|
context ":escape" do
|
@@ -239,6 +252,16 @@ class IntegrationTestScrubbers < Loofah::TestCase
|
|
239
252
|
assert_equal doc, result
|
240
253
|
end
|
241
254
|
end
|
255
|
+
|
256
|
+
context ":unprintable" do
|
257
|
+
it "removes unprintable unicode characters" do
|
258
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
|
259
|
+
result = doc.scrub! :unprintable
|
260
|
+
|
261
|
+
assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
|
262
|
+
assert_equal doc, result
|
263
|
+
end
|
264
|
+
end
|
242
265
|
end
|
243
266
|
|
244
267
|
context "#scrub_fragment" do
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 2.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Mike Dalessio
|
@@ -10,149 +9,185 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2014-05-09 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: nokogiri
|
17
|
-
requirement:
|
18
|
-
none: false
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - '>='
|
21
19
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.
|
20
|
+
version: 1.5.9
|
23
21
|
type: :runtime
|
24
22
|
prerelease: false
|
25
|
-
version_requirements:
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - '>='
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 1.5.9
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rdoc
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ~>
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '4.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ~>
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '4.0'
|
26
42
|
- !ruby/object:Gem::Dependency
|
27
43
|
name: rake
|
28
|
-
requirement:
|
29
|
-
none: false
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
30
45
|
requirements:
|
31
|
-
- -
|
46
|
+
- - '>='
|
32
47
|
- !ruby/object:Gem::Version
|
33
48
|
version: '0.8'
|
34
49
|
type: :development
|
35
50
|
prerelease: false
|
36
|
-
version_requirements:
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0.8'
|
37
56
|
- !ruby/object:Gem::Dependency
|
38
57
|
name: minitest
|
39
|
-
requirement:
|
40
|
-
none: false
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
41
59
|
requirements:
|
42
60
|
- - ~>
|
43
61
|
- !ruby/object:Gem::Version
|
44
62
|
version: '2.2'
|
45
63
|
type: :development
|
46
64
|
prerelease: false
|
47
|
-
version_requirements:
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '2.2'
|
48
70
|
- !ruby/object:Gem::Dependency
|
49
71
|
name: rr
|
50
|
-
requirement:
|
51
|
-
none: false
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
52
73
|
requirements:
|
53
74
|
- - ~>
|
54
75
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
76
|
+
version: 1.1.0
|
56
77
|
type: :development
|
57
78
|
prerelease: false
|
58
|
-
version_requirements:
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ~>
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 1.1.0
|
59
84
|
- !ruby/object:Gem::Dependency
|
60
85
|
name: json
|
61
|
-
requirement:
|
62
|
-
none: false
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
63
87
|
requirements:
|
64
|
-
- -
|
88
|
+
- - '>='
|
65
89
|
- !ruby/object:Gem::Version
|
66
90
|
version: '0'
|
67
91
|
type: :development
|
68
92
|
prerelease: false
|
69
|
-
version_requirements:
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
70
98
|
- !ruby/object:Gem::Dependency
|
71
99
|
name: hoe-gemspec
|
72
|
-
requirement:
|
73
|
-
none: false
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
74
101
|
requirements:
|
75
|
-
- -
|
102
|
+
- - '>='
|
76
103
|
- !ruby/object:Gem::Version
|
77
104
|
version: '0'
|
78
105
|
type: :development
|
79
106
|
prerelease: false
|
80
|
-
version_requirements:
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
81
112
|
- !ruby/object:Gem::Dependency
|
82
113
|
name: hoe-debugging
|
83
|
-
requirement:
|
84
|
-
none: false
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
85
115
|
requirements:
|
86
|
-
- -
|
116
|
+
- - '>='
|
87
117
|
- !ruby/object:Gem::Version
|
88
118
|
version: '0'
|
89
119
|
type: :development
|
90
120
|
prerelease: false
|
91
|
-
version_requirements:
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
92
126
|
- !ruby/object:Gem::Dependency
|
93
127
|
name: hoe-bundler
|
94
|
-
requirement:
|
95
|
-
none: false
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
96
129
|
requirements:
|
97
|
-
- -
|
130
|
+
- - '>='
|
98
131
|
- !ruby/object:Gem::Version
|
99
132
|
version: '0'
|
100
133
|
type: :development
|
101
134
|
prerelease: false
|
102
|
-
version_requirements:
|
135
|
+
version_requirements: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - '>='
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
103
140
|
- !ruby/object:Gem::Dependency
|
104
141
|
name: hoe-git
|
105
|
-
requirement:
|
106
|
-
none: false
|
142
|
+
requirement: !ruby/object:Gem::Requirement
|
107
143
|
requirements:
|
108
|
-
- -
|
144
|
+
- - '>='
|
109
145
|
- !ruby/object:Gem::Version
|
110
146
|
version: '0'
|
111
147
|
type: :development
|
112
148
|
prerelease: false
|
113
|
-
version_requirements:
|
149
|
+
version_requirements: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
114
154
|
- !ruby/object:Gem::Dependency
|
115
155
|
name: hoe
|
116
|
-
requirement:
|
117
|
-
none: false
|
156
|
+
requirement: !ruby/object:Gem::Requirement
|
118
157
|
requirements:
|
119
158
|
- - ~>
|
120
159
|
- !ruby/object:Gem::Version
|
121
|
-
version: '
|
160
|
+
version: '3.11'
|
122
161
|
type: :development
|
123
162
|
prerelease: false
|
124
|
-
version_requirements:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
163
|
+
version_requirements: !ruby/object:Gem::Requirement
|
164
|
+
requirements:
|
165
|
+
- - ~>
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '3.11'
|
168
|
+
description: |-
|
169
|
+
Loofah is a general library for manipulating and transforming HTML/XML
|
170
|
+
documents and fragments. It's built on top of Nokogiri and libxml2, so
|
171
|
+
it's fast and has a nice API.
|
131
172
|
|
132
173
|
Loofah excels at HTML sanitization (XSS prevention). It includes some
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
most likely won''t make your codes less secure. (These statements have
|
137
|
-
|
174
|
+
nice HTML sanitizers, which are based on HTML5lib's whitelist, so it
|
175
|
+
most likely won't make your codes less secure. (These statements have
|
138
176
|
not been evaluated by Netexperts.)
|
139
177
|
|
140
|
-
|
141
178
|
ActiveRecord extensions for sanitization are available in the
|
142
|
-
|
143
179
|
`loofah-activerecord` gem (see
|
144
|
-
|
145
|
-
http://github.com/flavorjones/loofah-activerecord).'
|
180
|
+
http://github.com/flavorjones/loofah-activerecord).
|
146
181
|
email:
|
147
182
|
- mike.dalessio@gmail.com
|
148
183
|
- bryan@brynary.com
|
149
184
|
executables: []
|
150
185
|
extensions: []
|
151
186
|
extra_rdoc_files:
|
187
|
+
- CHANGELOG.rdoc
|
152
188
|
- MIT-LICENSE.txt
|
153
189
|
- Manifest.txt
|
154
190
|
- README.rdoc
|
155
|
-
- CHANGELOG.rdoc
|
156
191
|
files:
|
157
192
|
- CHANGELOG.rdoc
|
158
193
|
- Gemfile
|
@@ -192,7 +227,9 @@ files:
|
|
192
227
|
- test/unit/test_scrubbers.rb
|
193
228
|
- .gemtest
|
194
229
|
homepage: http://github.com/flavorjones/loofah
|
195
|
-
licenses:
|
230
|
+
licenses:
|
231
|
+
- MIT
|
232
|
+
metadata: {}
|
196
233
|
post_install_message:
|
197
234
|
rdoc_options:
|
198
235
|
- --main
|
@@ -200,33 +237,31 @@ rdoc_options:
|
|
200
237
|
require_paths:
|
201
238
|
- lib
|
202
239
|
required_ruby_version: !ruby/object:Gem::Requirement
|
203
|
-
none: false
|
204
240
|
requirements:
|
205
|
-
- -
|
241
|
+
- - '>='
|
206
242
|
- !ruby/object:Gem::Version
|
207
243
|
version: '0'
|
208
244
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
209
|
-
none: false
|
210
245
|
requirements:
|
211
|
-
- -
|
246
|
+
- - '>='
|
212
247
|
- !ruby/object:Gem::Version
|
213
248
|
version: '0'
|
214
249
|
requirements: []
|
215
|
-
rubyforge_project:
|
216
|
-
rubygems_version:
|
250
|
+
rubyforge_project:
|
251
|
+
rubygems_version: 2.0.3
|
217
252
|
signing_key:
|
218
|
-
specification_version:
|
253
|
+
specification_version: 4
|
219
254
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|
220
255
|
and fragments
|
221
256
|
test_files:
|
222
257
|
- test/html5/test_sanitizer.rb
|
223
|
-
- test/integration/test_html.rb
|
224
|
-
- test/integration/test_helpers.rb
|
225
|
-
- test/integration/test_ad_hoc.rb
|
226
|
-
- test/integration/test_scrubbers.rb
|
227
|
-
- test/integration/test_xml.rb
|
228
|
-
- test/unit/test_scrubber.rb
|
229
258
|
- test/unit/test_helpers.rb
|
230
259
|
- test/unit/test_scrubbers.rb
|
231
|
-
- test/unit/
|
260
|
+
- test/unit/test_scrubber.rb
|
232
261
|
- test/unit/test_encoding.rb
|
262
|
+
- test/unit/test_api.rb
|
263
|
+
- test/integration/test_helpers.rb
|
264
|
+
- test/integration/test_xml.rb
|
265
|
+
- test/integration/test_ad_hoc.rb
|
266
|
+
- test/integration/test_scrubbers.rb
|
267
|
+
- test/integration/test_html.rb
|