sanitizer 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +20 -0
- data/README +19 -0
- data/README.rdoc +19 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/sanitizer.rb +100 -0
- data/lib/whitelist.rb +180 -0
- data/spec/sanitizer_spec.rb +47 -0
- data/spec/spec_helper.rb +12 -0
- data/tags +0 -0
- metadata +142 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rspec", "~> 2.3.0"
|
10
|
+
gem "bundler", "~> 1.0.0"
|
11
|
+
gem "jeweler", "~> 1.6.0"
|
12
|
+
gem "rcov", ">= 0"
|
13
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Marcelo Eden
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= sanitizer
|
2
|
+
|
3
|
+
Sanitizer is a very simple and fast string cleaner for ruby, it uses only simple regular
|
4
|
+
expressions.
|
5
|
+
|
6
|
+
== Contributing to sanitizer
|
7
|
+
|
8
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
9
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
10
|
+
* Fork the project
|
11
|
+
* Start a feature/bugfix branch
|
12
|
+
* Commit and push until you are happy with your contribution
|
13
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
14
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
15
|
+
|
16
|
+
== Copyright
|
17
|
+
|
18
|
+
Copyright (c) 2011 Marcelo Eden. See LICENSE.txt for
|
19
|
+
further details.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= sanitizer
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Contributing to sanitizer
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
9
|
+
* Fork the project
|
10
|
+
* Start a feature/bugfix branch
|
11
|
+
* Commit and push until you are happy with your contribution
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2011 Marcelo Eden. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "sanitizer"
|
18
|
+
gem.homepage = "http://github.com/3den/sanitizer"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{The simplest string cleaner ever made}
|
21
|
+
gem.description = %Q{Sanitizer.clean(text)}
|
22
|
+
gem.email = "edendroid@gmail.com"
|
23
|
+
gem.authors = ["Marcelo Eden"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rake/rdoctask'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "sanitizer #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.1
|
data/lib/sanitizer.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'whitelist'
|
2
|
+
|
3
|
+
module Sanitizer
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def sanitize(text)
|
7
|
+
new_text = text
|
8
|
+
sanitize!(new_text)
|
9
|
+
end
|
10
|
+
|
11
|
+
def sanitize!(text)
|
12
|
+
strip_tags(text)
|
13
|
+
clean_spaces(text)
|
14
|
+
clean_ampersand(text)
|
15
|
+
text
|
16
|
+
end
|
17
|
+
|
18
|
+
def clean_spaces(text)
|
19
|
+
text.gsub!(/\s+/, " ")
|
20
|
+
text
|
21
|
+
end
|
22
|
+
|
23
|
+
def clean_ampersand(text)
|
24
|
+
text.gsub!(/\&[^\w\;]+/, "& ")
|
25
|
+
text
|
26
|
+
end
|
27
|
+
|
28
|
+
def strip_comments(text)
|
29
|
+
text.gsub!(/(\<\!\-\-\b*[^\-\-\>]*.*?\-\-\>)/ui, "")
|
30
|
+
text.gsub!(/(\<\s?\!--.*\s?--\>)/uim, "")
|
31
|
+
text
|
32
|
+
end
|
33
|
+
|
34
|
+
# Remove all <script> and <style> tags
|
35
|
+
def strip_disallowed_tags(text)
|
36
|
+
text.gsub!(/(<script\s*.*>.*<\/script>)/uim, "")
|
37
|
+
text.gsub!(/(<script\s*.*\/?>)/uim, "")
|
38
|
+
text.gsub!(/(<link\s*.*\/?>)/uim, "")
|
39
|
+
text.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")
|
40
|
+
|
41
|
+
# Stripping html entities too
|
42
|
+
text.gsub!(/(\<script\s*.*\>.*\<\/script\>)/uim, "")
|
43
|
+
text.gsub!(/(\<script\s*.*\/?\>)/uim, "")
|
44
|
+
text.gsub!(/(\<link\s*.*\/?\>)/uim, "")
|
45
|
+
text.gsub!(/(\<style\s*.*\>.*\<\/style\>)/uim, "")
|
46
|
+
text
|
47
|
+
end
|
48
|
+
|
49
|
+
# Remove all tags from from text
|
50
|
+
def strip_tags(text, *tags)
|
51
|
+
if tags.empty? # clear all tags by default
|
52
|
+
text.gsub!(/<\/?[^>]*>/uim, "")
|
53
|
+
text.gsub!(/\<\/?[^\>]*\>/uim, "")
|
54
|
+
else # clean only selected tags
|
55
|
+
strip = tags.map do |tag|
|
56
|
+
%Q{(#{tag})}
|
57
|
+
end.join('|')
|
58
|
+
text.gsub!(/<\/?(#{strip})[^>]*>/uim, "")
|
59
|
+
text.gsub!(/\<\/?(#{strip})[^\>]*\>/uim, "")
|
60
|
+
end
|
61
|
+
text
|
62
|
+
end
|
63
|
+
|
64
|
+
# Alguns feeds retornam tags "escapadas" dentro do conteúdo (ex: <br/>)
|
65
|
+
# Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo
|
66
|
+
def entities_to_chars(text)
|
67
|
+
text.gsub!(/\</uim, "<")
|
68
|
+
text.gsub!(/\>/uim, ">")
|
69
|
+
text
|
70
|
+
end
|
71
|
+
|
72
|
+
# this liftend nearly verbatim from html5
|
73
|
+
def sanitize_css(style)
|
74
|
+
# disallow urls
|
75
|
+
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/uim, ' ')
|
76
|
+
|
77
|
+
# gauntlet
|
78
|
+
return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/uim
|
79
|
+
return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/uim
|
80
|
+
|
81
|
+
clean = []
|
82
|
+
style.scan(/([-\w]+)\s*:\s*([^:;]*)/uim) do |prop, val|
|
83
|
+
next if val.empty?
|
84
|
+
prop.downcase!
|
85
|
+
if HashedWhiteList::ALLOWED_CSS_PROPERTIES[prop]
|
86
|
+
clean << "#{prop}: #{val};"
|
87
|
+
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
88
|
+
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
89
|
+
HashedWhiteList::ALLOWED_CSS_KEYWORDS[keyword].nil? and
|
90
|
+
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/uim
|
91
|
+
end
|
92
|
+
elsif HashedWhiteList::ALLOWED_SVG_PROPERTIES[prop]
|
93
|
+
clean << "#{prop}: #{val};"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
style = clean.join(' ')
|
98
|
+
end
|
99
|
+
end # self
|
100
|
+
end
|
data/lib/whitelist.rb
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
#
|
2
|
+
# HTML whitelist lifted from HTML5 sanitizer code
|
3
|
+
# http://code.google.com/p/html5lib/
|
4
|
+
#
|
5
|
+
module Sanitizer
|
6
|
+
module WhiteList
|
7
|
+
# <html5_license>
|
8
|
+
#
|
9
|
+
# Copyright (c) 2006-2008 The Authors
|
10
|
+
#
|
11
|
+
# Contributors:
|
12
|
+
# James Graham - jg307@cam.ac.uk
|
13
|
+
# Anne van Kesteren - annevankesteren@gmail.com
|
14
|
+
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
15
|
+
# Matt McDonald - kanashii@kanashii.ca
|
16
|
+
# Sam Ruby - rubys@intertwingly.net
|
17
|
+
# Ian Hickson (Google) - ian@hixie.ch
|
18
|
+
# Thomas Broyer - t.broyer@ltgt.net
|
19
|
+
# Jacques Distler - distler@golem.ph.utexas.edu
|
20
|
+
# Henri Sivonen - hsivonen@iki.fi
|
21
|
+
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
22
|
+
#
|
23
|
+
# Permission is hereby granted, free of charge, to any person
|
24
|
+
# obtaining a copy of this software and associated documentation
|
25
|
+
# files (the "Software"), to deal in the Software without
|
26
|
+
# restriction, including without limitation the rights to use, copy,
|
27
|
+
# modify, merge, publish, distribute, sublicense, and/or sell copies
|
28
|
+
# of the Software, and to permit persons to whom the Software is
|
29
|
+
# furnished to do so, subject to the following conditions:
|
30
|
+
#
|
31
|
+
# The above copyright notice and this permission notice shall be
|
32
|
+
# included in all copies or substantial portions of the Software.
|
33
|
+
#
|
34
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
35
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
36
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
37
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
38
|
+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
39
|
+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
40
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
41
|
+
# DEALINGS IN THE SOFTWARE.
|
42
|
+
#
|
43
|
+
# </html5_license>
|
44
|
+
|
45
|
+
# ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
46
|
+
# button caption center cite code col colgroup dd del dfn dir div dl dt
|
47
|
+
# em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
48
|
+
# legend li map menu ol optgroup option p pre q s samp select small span
|
49
|
+
# strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
50
|
+
# ul var]
|
51
|
+
#
|
52
|
+
# MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
|
53
|
+
# mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
|
54
|
+
# msubsup msup mtable mtd mtext mtr munder munderover none]
|
55
|
+
#
|
56
|
+
# SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
57
|
+
# circle defs desc ellipse font-face font-face-name font-face-src g
|
58
|
+
# glyph hkern image linearGradient line marker metadata missing-glyph
|
59
|
+
# mpath path polygon polyline radialGradient rect set stop svg switch
|
60
|
+
# text title tspan use]
|
61
|
+
#
|
62
|
+
# ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
63
|
+
# align alt axis border cellpadding cellspacing char charoff charset
|
64
|
+
# checked cite class clear cols colspan color compact coords datetime
|
65
|
+
# dir disabled enctype for frame headers height href hreflang hspace id
|
66
|
+
# ismap label lang longdesc maxlength media method multiple name nohref
|
67
|
+
# noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
68
|
+
# selected shape size span src start style summary tabindex target title
|
69
|
+
# type usemap valign value vspace width xml:lang]
|
70
|
+
#
|
71
|
+
# MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
|
72
|
+
# columnalign columnlines columnspacing columnspan depth display
|
73
|
+
# displaystyle equalcolumns equalrows fence fontstyle fontweight frame
|
74
|
+
# height linethickness lspace mathbackground mathcolor mathvariant
|
75
|
+
# mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
|
76
|
+
# rowspacing rowspan rspace scriptlevel selection separator stretchy
|
77
|
+
# width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
78
|
+
#
|
79
|
+
# SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
80
|
+
# arabic-form ascent attributeName attributeType baseProfile bbox begin
|
81
|
+
# by calcMode cap-height class color color-rendering content cx cy d dx
|
82
|
+
# dy descent display dur end fill fill-rule font-family font-size
|
83
|
+
# font-stretch font-style font-variant font-weight from fx fy g1 g2
|
84
|
+
# glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
85
|
+
# ideographic k keyPoints keySplines keyTimes lang marker-end
|
86
|
+
# marker-mid marker-start markerHeight markerUnits markerWidth
|
87
|
+
# mathematical max min name offset opacity orient origin
|
88
|
+
# overline-position overline-thickness panose-1 path pathLength points
|
89
|
+
# preserveAspectRatio r refX refY repeatCount repeatDur
|
90
|
+
# requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
91
|
+
# stemv stop-color stop-opacity strikethrough-position
|
92
|
+
# strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
93
|
+
# stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
94
|
+
# stroke-width systemLanguage target text-anchor to transform type u1
|
95
|
+
# u2 underline-position underline-thickness unicode unicode-range
|
96
|
+
# units-per-em values version viewBox visibility width widths x
|
97
|
+
# x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
98
|
+
# xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
99
|
+
# xmlns:xlink y y1 y2 zoomAndPan]
|
100
|
+
|
101
|
+
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
102
|
+
|
103
|
+
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
104
|
+
border-bottom-color border-collapse border-color border-left-color
|
105
|
+
border-right-color border-top-color clear color cursor direction
|
106
|
+
display elevation float font font-family font-size font-style
|
107
|
+
font-variant font-weight height letter-spacing line-height overflow
|
108
|
+
pause pause-after pause-before pitch pitch-range richness speak
|
109
|
+
speak-header speak-numeral speak-punctuation speech-rate stress
|
110
|
+
text-align text-decoration text-indent unicode-bidi vertical-align
|
111
|
+
voice-family volume white-space width]
|
112
|
+
|
113
|
+
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
114
|
+
brown center collapse dashed dotted fuchsia gray green !important
|
115
|
+
italic left lime maroon medium none navy normal nowrap olive pointer
|
116
|
+
purple red right solid silver teal top transparent underline white
|
117
|
+
yellow]
|
118
|
+
|
119
|
+
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
120
|
+
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
121
|
+
|
122
|
+
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
123
|
+
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
124
|
+
|
125
|
+
# subclasses may define their own versions of these constants
|
126
|
+
#ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
127
|
+
#ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
128
|
+
ALLOWED_ELEMENTS = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
129
|
+
'big', 'blockquote', 'br', 'caption', 'center', 'cite',
|
130
|
+
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
|
131
|
+
'em', 'embed', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
|
132
|
+
'img', 'ins', 'kbd', 'li', 'map', 'menu', 'object', 'ol', 'p', 'param', 'pre', 'q',
|
133
|
+
's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
|
134
|
+
'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
|
135
|
+
'ul', 'var'
|
136
|
+
]
|
137
|
+
|
138
|
+
ALLOWED_ATTRIBUTES = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
139
|
+
'align', 'alt', 'axis', 'border', 'cellpadding',
|
140
|
+
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'clear',
|
141
|
+
'cols', 'colspan', 'color', 'compact', 'coords', 'data', 'datetime', 'dir',
|
142
|
+
'disabled', 'enctype', 'flashvars', 'for', 'frame', 'headers', 'href', 'hreflang',
|
143
|
+
'hspace', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media',
|
144
|
+
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt',
|
145
|
+
'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected',
|
146
|
+
'shape', 'span', 'src', 'start', 'summary', 'tabindex', 'target',
|
147
|
+
'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'xml:lang', 'width'
|
148
|
+
]
|
149
|
+
|
150
|
+
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
151
|
+
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
152
|
+
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
153
|
+
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
154
|
+
|
155
|
+
VOID_ELEMENTS = %w[
|
156
|
+
base
|
157
|
+
link
|
158
|
+
meta
|
159
|
+
hr
|
160
|
+
br
|
161
|
+
img
|
162
|
+
embed
|
163
|
+
param
|
164
|
+
area
|
165
|
+
col
|
166
|
+
input
|
167
|
+
]
|
168
|
+
end
|
169
|
+
|
170
|
+
module HashedWhiteList
|
171
|
+
# turn each of the whitelist arrays into a hash for faster lookup
|
172
|
+
WhiteList.constants.each do |constant|
|
173
|
+
next unless WhiteList.module_eval("#{constant}").is_a?(Array)
|
174
|
+
module_eval <<-CODE
|
175
|
+
#{constant} = {}
|
176
|
+
WhiteList::#{constant}.each { |c| #{constant}[c] = true ; #{constant}[c.downcase] = true }
|
177
|
+
CODE
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe Sanitizer do
|
4
|
+
|
5
|
+
describe "sanitize" do
|
6
|
+
|
7
|
+
it "should strip all tags" do
|
8
|
+
html = "<div><p>Oi <b>como</b> <a href='/xxx/'>Vai</a></p><!-- s --></div>"
|
9
|
+
output = Sanitizer.sanitize(html)
|
10
|
+
output.should == 'Oi como Vai'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should clean spaces and tags" do
|
14
|
+
html = "<p>Oi <b>como</b>
|
15
|
+
Vai</p>"
|
16
|
+
output = Sanitizer.sanitize(html)
|
17
|
+
output.should == 'Oi como Vai'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should clean '&' entries" do
|
21
|
+
html = "Eu & você"
|
22
|
+
output = Sanitizer.sanitize(html)
|
23
|
+
output.should == 'Eu & você'
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should not remove valid entries" do
|
27
|
+
html = "Eu & você"
|
28
|
+
output = Sanitizer.sanitize(html)
|
29
|
+
output.should == 'Eu & você'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "strip_tags" do
|
34
|
+
|
35
|
+
it "should remove only <b> tags" do
|
36
|
+
html = "<p>Oi <b>como</b> <a href='/xxx/'>Vai</a></p><!-- s -->"
|
37
|
+
output = Sanitizer.strip_tags(html, 'b')
|
38
|
+
output.should == "<p>Oi como <a href='/xxx/'>Vai</a></p><!-- s -->"
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should remove only <b> and <a> tags" do
|
42
|
+
html = "<p>Oi <b>como</b> <a href='/xxx/'>Vai</a></p><!-- s -->"
|
43
|
+
output = Sanitizer.strip_tags(html, 'a', 'b')
|
44
|
+
output.should == "<p>Oi como Vai</p><!-- s -->"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'sanitizer'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
data/tags
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sanitizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Marcelo Eden
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-06 00:00:00 -03:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
prerelease: false
|
23
|
+
type: :development
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 3
|
33
|
+
- 0
|
34
|
+
version: 2.3.0
|
35
|
+
name: rspec
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
prerelease: false
|
39
|
+
type: :development
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 23
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 0
|
49
|
+
- 0
|
50
|
+
version: 1.0.0
|
51
|
+
name: bundler
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
prerelease: false
|
55
|
+
type: :development
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 15
|
62
|
+
segments:
|
63
|
+
- 1
|
64
|
+
- 6
|
65
|
+
- 0
|
66
|
+
version: 1.6.0
|
67
|
+
name: jeweler
|
68
|
+
version_requirements: *id003
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
prerelease: false
|
71
|
+
type: :development
|
72
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
name: rcov
|
82
|
+
version_requirements: *id004
|
83
|
+
description: Sanitizer.clean(text)
|
84
|
+
email: edendroid@gmail.com
|
85
|
+
executables: []
|
86
|
+
|
87
|
+
extensions: []
|
88
|
+
|
89
|
+
extra_rdoc_files:
|
90
|
+
- LICENSE.txt
|
91
|
+
- README
|
92
|
+
- README.rdoc
|
93
|
+
files:
|
94
|
+
- .document
|
95
|
+
- .rspec
|
96
|
+
- Gemfile
|
97
|
+
- LICENSE.txt
|
98
|
+
- README
|
99
|
+
- README.rdoc
|
100
|
+
- Rakefile
|
101
|
+
- VERSION
|
102
|
+
- lib/sanitizer.rb
|
103
|
+
- lib/whitelist.rb
|
104
|
+
- spec/sanitizer_spec.rb
|
105
|
+
- spec/spec_helper.rb
|
106
|
+
- tags
|
107
|
+
has_rdoc: true
|
108
|
+
homepage: http://github.com/3den/sanitizer
|
109
|
+
licenses:
|
110
|
+
- MIT
|
111
|
+
post_install_message:
|
112
|
+
rdoc_options: []
|
113
|
+
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
hash: 3
|
122
|
+
segments:
|
123
|
+
- 0
|
124
|
+
version: "0"
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
none: false
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
hash: 3
|
131
|
+
segments:
|
132
|
+
- 0
|
133
|
+
version: "0"
|
134
|
+
requirements: []
|
135
|
+
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 1.6.2
|
138
|
+
signing_key:
|
139
|
+
specification_version: 3
|
140
|
+
summary: The simplest string cleaner ever made
|
141
|
+
test_files: []
|
142
|
+
|