htmlfilter 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +6 -0
- data/Manifest.txt +19 -0
- data/README.rdoc +53 -0
- data/Rakefile +15 -0
- data/TODO +7 -0
- data/lib/cssfilter.rb +226 -0
- data/lib/htmlfilter/multiton.rb +386 -0
- data/lib/htmlfilter.rb +516 -0
- data/meta/package +1 -0
- data/meta/project +1 -0
- data/meta/title +1 -0
- data/meta/version +1 -0
- data/test/test_cssfilter.rb +35 -0
- data/test/test_htmlfilter.rb +70 -0
- metadata +75 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!mast bin lib meta test [A-Z]*
|
2
|
+
lib
|
3
|
+
lib/cssfilter.rb
|
4
|
+
lib/htmlfilter
|
5
|
+
lib/htmlfilter/multiton.rb
|
6
|
+
lib/htmlfilter.rb
|
7
|
+
meta
|
8
|
+
meta/package
|
9
|
+
meta/project
|
10
|
+
meta/title
|
11
|
+
meta/version
|
12
|
+
test
|
13
|
+
test/test_cssfilter.rb
|
14
|
+
test/test_htmlfilter.rb
|
15
|
+
Rakefile
|
16
|
+
Manifest.txt
|
17
|
+
TODO
|
18
|
+
README.rdoc
|
19
|
+
History.rdoc
|
data/README.rdoc
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
= HtmlFilter
|
2
|
+
|
3
|
+
* http://rubyworks.github.com/htmlfilter
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
HTML Filter library can be used to sanitize and sterilize
|
8
|
+
HTML. A good idea if you let users submit HTML in comments,
|
9
|
+
for instance.
|
10
|
+
|
11
|
+
This library also include CssFilter. The CssFilter class will
|
12
|
+
clean-up a cascading style sheet. It can be used to remove
|
13
|
+
whitespace and most importantly remove urls.
|
14
|
+
|
15
|
+
== FEATURES:
|
16
|
+
|
17
|
+
* Santize HTML
|
18
|
+
* Compress CSS
|
19
|
+
|
20
|
+
== SYNOPSIS:
|
21
|
+
|
22
|
+
Via the class.
|
23
|
+
|
24
|
+
html = "<<b>hello</b>"
|
25
|
+
|
26
|
+
HtmlFilter.new(options).filter(html)
|
27
|
+
|
28
|
+
Or using the String extension.
|
29
|
+
|
30
|
+
html.html_filter #=> "<b>hello</b>"
|
31
|
+
|
32
|
+
See RDocs for more information.
|
33
|
+
|
34
|
+
== REQUIREMENTS:
|
35
|
+
|
36
|
+
* Uses a copy of multiton.rb (included)
|
37
|
+
|
38
|
+
== INSTALL:
|
39
|
+
|
40
|
+
* sudo gem install htmlfilter
|
41
|
+
|
42
|
+
== LICENSE:
|
43
|
+
|
44
|
+
(Creative Commons Attribution-ShareAlike License)
|
45
|
+
|
46
|
+
Copyright (c) 2009 Thomas Sawyer
|
47
|
+
|
48
|
+
See http://creativecommons.org/licenses/by-sa/3.0/deed.en
|
49
|
+
|
50
|
+
HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>.
|
51
|
+
This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License.
|
52
|
+
See http://creativecommons.org/licenses/by-sa/2.5/.
|
53
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
#$: << './lib'
|
4
|
+
#require 'rubygems'
|
5
|
+
#require 'hoe'
|
6
|
+
#require 'htmlfilter'
|
7
|
+
#Hoe.new('htmlfilter', HtmlFilter::VERSION) do |p|
|
8
|
+
# p.rubyforge_name = 'death' # if different than lowercase project name
|
9
|
+
# p.developer('Thomas Sawyer', 'transfire@gmail.com')
|
10
|
+
#end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
# vim: syntax=Ruby
|
15
|
+
|
data/TODO
ADDED
data/lib/cssfilter.rb
ADDED
@@ -0,0 +1,226 @@
|
|
1
|
+
# = CSS Filter
|
2
|
+
#
|
3
|
+
# The CssFilter class will clean up a cascading style sheet.
|
4
|
+
# It can be used to remove whitespace and most importantly
|
5
|
+
# remove urls.
|
6
|
+
#
|
7
|
+
# == Authors
|
8
|
+
#
|
9
|
+
# * Trans
|
10
|
+
#
|
11
|
+
# == Todo
|
12
|
+
#
|
13
|
+
# * Allow urls to be specified per attribute type.
|
14
|
+
#
|
15
|
+
# == Copying
|
16
|
+
#
|
17
|
+
# Copyright (c) 2007 7rans
|
18
|
+
|
19
|
+
#require 'htmlfilter/uri'
|
20
|
+
require 'uri'
|
21
|
+
|
22
|
+
# = CSS Filter
|
23
|
+
#
|
24
|
+
# The CssFilter class will clean up a cascading style sheet.
|
25
|
+
# It can be used to remove whitespace and most importantly
|
26
|
+
# remove urls.
|
27
|
+
#
|
28
|
+
class CssFilter
|
29
|
+
VERSION="1.0.0"
|
30
|
+
|
31
|
+
# should we remove comments? (true, false)
|
32
|
+
attr_accessor :strip_comments
|
33
|
+
|
34
|
+
# should we remove urls? (true, false)
|
35
|
+
attr_accessor :strip_urls
|
36
|
+
|
37
|
+
# url schemes which will be allowed (http, ftp, mailto)
|
38
|
+
attr_accessor :allowed_scheme
|
39
|
+
|
40
|
+
# alias for allowed_scheme
|
41
|
+
alias_method :allowed_protocols, :allowed_scheme
|
42
|
+
alias_method :allowed_protocols=, :allowed_scheme=
|
43
|
+
|
44
|
+
# url hosts which will be allowed.
|
45
|
+
attr_accessor :allowed_hosts
|
46
|
+
|
47
|
+
# urls which will be allowed. (NOT YET USED)
|
48
|
+
attr_accessor :allowed_urls
|
49
|
+
|
50
|
+
# substitue urls (NOT YET USED)
|
51
|
+
attr_accessor :substitute_urls
|
52
|
+
|
53
|
+
# remove blank lines.
|
54
|
+
attr_accessor :strip_whitespace
|
55
|
+
|
56
|
+
# remove blank lines.
|
57
|
+
attr_accessor :strip_blanklines
|
58
|
+
|
59
|
+
# Complete parse and rewrite of CSS document.
|
60
|
+
# This does a complete "cleaning" but note that
|
61
|
+
# is not yet a perfect parser.
|
62
|
+
attr_accessor :rewrite
|
63
|
+
|
64
|
+
# CssFilter option defaults.
|
65
|
+
|
66
|
+
DEFAULT = {
|
67
|
+
'strip_comments' => true,
|
68
|
+
'strip_urls' => true,
|
69
|
+
'allowed_urls' => [],
|
70
|
+
'allowed_hosts' => [],
|
71
|
+
'allowed_scheme' => [],
|
72
|
+
'strip_whitespace' => false,
|
73
|
+
'strip_blanklines' => true,
|
74
|
+
'rewrite' => false,
|
75
|
+
'substitute_urls' => {}
|
76
|
+
}
|
77
|
+
|
78
|
+
#
|
79
|
+
|
80
|
+
def initialize(options=nil)
|
81
|
+
if options
|
82
|
+
h = DEFAULT.dup
|
83
|
+
options.each do |k,v|
|
84
|
+
h[k.to_s] = v
|
85
|
+
end
|
86
|
+
options = h
|
87
|
+
else
|
88
|
+
options = DEFAULT.dup
|
89
|
+
end
|
90
|
+
|
91
|
+
options.each{ |k,v| send("#{k}=",v) }
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
|
96
|
+
def accept_host(host)
|
97
|
+
@hosts << host
|
98
|
+
end
|
99
|
+
|
100
|
+
#
|
101
|
+
|
102
|
+
def filter(css)
|
103
|
+
css = remove_comments(css) if strip_comments
|
104
|
+
css = remove_urls(css) if strip_urls
|
105
|
+
|
106
|
+
css = remove_nullvalues(css)
|
107
|
+
|
108
|
+
css = remove_whitespace(css) if strip_whitespace
|
109
|
+
css = remove_blanklines(css) if strip_blanklines
|
110
|
+
|
111
|
+
css = parse(css).to_css if rewrite
|
112
|
+
css
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
|
117
|
+
def remove_comments(data)
|
118
|
+
data.gsub(/\/\*(.8?)\*\//,'')
|
119
|
+
end
|
120
|
+
|
121
|
+
# TODO: allowed_urls
|
122
|
+
|
123
|
+
def remove_urls(data)
|
124
|
+
urls = data.scan(/url\((.*?)\)/).flatten
|
125
|
+
uris = urls.collect{ |u| URI.extract(u) }.flatten
|
126
|
+
uris.each do |u|
|
127
|
+
uri = URI.parse(u)
|
128
|
+
unless allowed_hosts.include?(uri.host) or
|
129
|
+
allowed_scheme.include?(uri.scheme)
|
130
|
+
data.sub!(u.to_s, '')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
data.gsub(/url\(\s*\)/, '')
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
|
138
|
+
def remove_whitespace(data)
|
139
|
+
data = data.gsub(/^\s*/,'')
|
140
|
+
data = data.gsub(/\s*$/,'')
|
141
|
+
end
|
142
|
+
|
143
|
+
#
|
144
|
+
|
145
|
+
def remove_blanklines(data)
|
146
|
+
data = data.gsub(/^\s*\n/,'')
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
|
151
|
+
def remove_nullvalues(data);
|
152
|
+
data = data.gsub(/\w+[:](\s+)[;]/,'')
|
153
|
+
end
|
154
|
+
|
155
|
+
# Breaks a css document up into a hash. This can be used
|
156
|
+
# completely rewritting the css.
|
157
|
+
#
|
158
|
+
# TODO: Not complete, does not work with "@xxx foo;" for example.
|
159
|
+
|
160
|
+
def parse(css)
|
161
|
+
tree = CssTree.new
|
162
|
+
entries = css.scan(/^(.*?)\{(.*?)\}/m)
|
163
|
+
entries.each do |ref, props|
|
164
|
+
tree[ref.strip] ||= {}
|
165
|
+
props = clean_properties(props)
|
166
|
+
props = props.scan(/(.*?)[:](.*?)([;]|\s*\Z)/)
|
167
|
+
props.each do |(key,val)|
|
168
|
+
tree[ref.strip][key.strip] = clean_value(val)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
return tree
|
172
|
+
end
|
173
|
+
|
174
|
+
# Takes a css entry and ensures it is valid (as best it can).
|
175
|
+
# It will fix trival mistakes, and raise an error when it is
|
176
|
+
# beyond repair.
|
177
|
+
#
|
178
|
+
# TODO: So far this does absolutely nothing!
|
179
|
+
|
180
|
+
def clean_properties(atts)
|
181
|
+
atts
|
182
|
+
end
|
183
|
+
|
184
|
+
#
|
185
|
+
|
186
|
+
def clean_value(val)
|
187
|
+
val = val.strip
|
188
|
+
|
189
|
+
if urls
|
190
|
+
uris = URI.extract(val)
|
191
|
+
uris.each do |u|
|
192
|
+
val.sub!(u.to_s, urls)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
return val
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
# CSS parse tree. This is for a "deep filtering".
|
203
|
+
|
204
|
+
class CssTree < Hash
|
205
|
+
|
206
|
+
def initialize(options=nil)
|
207
|
+
@options = options || {}
|
208
|
+
super()
|
209
|
+
end
|
210
|
+
|
211
|
+
# Re-output the CSS, all tidy ;)
|
212
|
+
|
213
|
+
def to_css
|
214
|
+
css = ""
|
215
|
+
each do |selector, entries|
|
216
|
+
css << "#{selector}{"
|
217
|
+
entries.each do |key, value|
|
218
|
+
css << "#{key}:#{value};"
|
219
|
+
end
|
220
|
+
css << "}\n"
|
221
|
+
end
|
222
|
+
return css
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
@@ -0,0 +1,386 @@
|
|
1
|
+
# = Multiton
|
2
|
+
#
|
3
|
+
# == Synopsis
|
4
|
+
#
|
5
|
+
# Multiton design pattern ensures only one object is allocated for a given state.
|
6
|
+
#
|
7
|
+
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
8
|
+
# instance, there are several similar instances. It is useful when you want to
|
9
|
+
# avoid constructing objects many times because of some huge expense (connecting
|
10
|
+
# to a database for example), require a set of similar but not identical
|
11
|
+
# objects, and cannot easily control how many times a contructor may be called.
|
12
|
+
#
|
13
|
+
# class SomeMultitonClass
|
14
|
+
# include Multiton
|
15
|
+
# attr :arg
|
16
|
+
# def initialize(arg)
|
17
|
+
# @arg = arg
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# a = SomeMultitonClass.new(4)
|
22
|
+
# b = SomeMultitonClass.new(4) # a and b are same object
|
23
|
+
# c = SomeMultitonClass.new(2) # c is a different object
|
24
|
+
#
|
25
|
+
# == Previous Behavior
|
26
|
+
#
|
27
|
+
# In previous versions of Multiton the #new method was made
|
28
|
+
# private and #instance had to be used in its stay --just like Singleton.
|
29
|
+
# But this is less desirable for Multiton since Multitions can
|
30
|
+
# have multiple instances, not just one.
|
31
|
+
#
|
32
|
+
# So instead Multiton now defines #create as a private alias of
|
33
|
+
# the original #new method (just in case it is needed) and then
|
34
|
+
# defines #new to handle the multiton; #instance is provided
|
35
|
+
# as an alias for it.
|
36
|
+
#
|
37
|
+
#--
|
38
|
+
# So if you must have the old behavior, all you need do is re-alias
|
39
|
+
# #new to #create and privatize it.
|
40
|
+
#
|
41
|
+
# class SomeMultitonClass
|
42
|
+
# include Multiton
|
43
|
+
# alias_method :new, :create
|
44
|
+
# private :new
|
45
|
+
# ...
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# Then only #instance will be available for creating the Multiton.
|
49
|
+
#++
|
50
|
+
#
|
51
|
+
# == How It Works
|
52
|
+
#
|
53
|
+
# A pool of objects is searched for a previously cached object,
|
54
|
+
# if one is not found we construct one and cache it in the pool
|
55
|
+
# based on class and the args given to the contructor.
|
56
|
+
#
|
57
|
+
# A limitation of this approach is that it is impossible to
|
58
|
+
# detect if different blocks were given to a contructor (if it takes a
|
59
|
+
# block). So it is the constructor arguments _only_ which determine
|
60
|
+
# the uniqueness of an object. To workaround this, define the _class_
|
61
|
+
# method ::multiton_id.
|
62
|
+
#
|
63
|
+
# def Klass.multiton_id(*args, &block)
|
64
|
+
# # ...
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# Which should return a hash key used to identify the object being
|
68
|
+
# constructed as (not) unique.
|
69
|
+
#
|
70
|
+
# == Authors
|
71
|
+
#
|
72
|
+
# * Christoph Rippel
|
73
|
+
# * Thomas Sawyer
|
74
|
+
#
|
75
|
+
# = Copying
|
76
|
+
#
|
77
|
+
# Copyright (c) 2007 Christoph Rippel, Thomas Sawyer
|
78
|
+
#
|
79
|
+
# Ruby License
|
80
|
+
#
|
81
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
82
|
+
# software under the same terms as Ruby.
|
83
|
+
#
|
84
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
85
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
86
|
+
# FOR A PARTICULAR PURPOSE.
|
87
|
+
|
88
|
+
require 'thread'
|
89
|
+
|
90
|
+
# = Multiton
|
91
|
+
#
|
92
|
+
# Multiton design pattern ensures only one object is allocated for a given state.
|
93
|
+
#
|
94
|
+
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
95
|
+
# instance, there are several similar instances. It is useful when you want to
|
96
|
+
# avoid constructing objects many times because of some huge expense (connecting
|
97
|
+
# to a database for example), require a set of similar but not identical
|
98
|
+
# objects, and cannot easily control how many times a contructor may be called.
|
99
|
+
#
|
100
|
+
# class SomeMultitonClass
|
101
|
+
# include Multiton
|
102
|
+
# attr :arg
|
103
|
+
# def initialize(arg)
|
104
|
+
# @arg = arg
|
105
|
+
# end
|
106
|
+
# end
|
107
|
+
#
|
108
|
+
# a = SomeMultitonClass.new(4)
|
109
|
+
# b = SomeMultitonClass.new(4) # a and b are same object
|
110
|
+
# c = SomeMultitonClass.new(2) # c is a different object
|
111
|
+
#
|
112
|
+
# == How It Works
|
113
|
+
#
|
114
|
+
# A pool of objects is searched for a previously cached object,
|
115
|
+
# if one is not found we construct one and cache it in the pool
|
116
|
+
# based on class and the args given to the contructor.
|
117
|
+
#
|
118
|
+
# A limitation of this approach is that it is impossible to
|
119
|
+
# detect if different blocks were given to a contructor (if it takes a
|
120
|
+
# block). So it is the constructor arguments _only_ which determine
|
121
|
+
# the uniqueness of an object. To workaround this, define the _class_
|
122
|
+
# method ::multiton_id.
|
123
|
+
#
|
124
|
+
# def Klass.multiton_id(*args, &block)
|
125
|
+
# # ...
|
126
|
+
# end
|
127
|
+
#
|
128
|
+
# Which should return a hash key used to identify the object being
|
129
|
+
# constructed as (not) unique.
|
130
|
+
|
131
|
+
module Multiton
|
132
|
+
|
133
|
+
# disable build-in copying methods
|
134
|
+
|
135
|
+
def clone
|
136
|
+
raise TypeError, "can't clone Multiton #{self}"
|
137
|
+
#self
|
138
|
+
end
|
139
|
+
|
140
|
+
def dup
|
141
|
+
raise TypeError, "can't dup Multiton #{self}"
|
142
|
+
#self
|
143
|
+
end
|
144
|
+
|
145
|
+
# default marshalling strategy
|
146
|
+
|
147
|
+
protected
|
148
|
+
|
149
|
+
def _dump(depth=-1)
|
150
|
+
Marshal.dump(@multiton_initializer)
|
151
|
+
end
|
152
|
+
|
153
|
+
# Mutex to safely store multiton instances.
|
154
|
+
|
155
|
+
class InstanceMutex < Hash #:nodoc:
|
156
|
+
def initialize
|
157
|
+
@global = Mutex.new
|
158
|
+
end
|
159
|
+
|
160
|
+
def initialized(arg)
|
161
|
+
store(arg, DummyMutex)
|
162
|
+
end
|
163
|
+
|
164
|
+
def (DummyMutex = Object.new).synchronize
|
165
|
+
yield
|
166
|
+
end
|
167
|
+
|
168
|
+
def default(arg)
|
169
|
+
@global.synchronize{ fetch(arg){ store(arg, Mutex.new) } }
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Multiton can be included in another module, in which case that module effectively becomes
|
174
|
+
# a multiton behavior distributor too. This is why we propogate #included to the base module.
|
175
|
+
# by putting it in another module.
|
176
|
+
#
|
177
|
+
#--
|
178
|
+
# def append_features(mod)
|
179
|
+
# # help out people counting on transitive mixins
|
180
|
+
# unless mod.instance_of?(Class)
|
181
|
+
# raise TypeError, "Inclusion of Multiton in module #{mod}"
|
182
|
+
# end
|
183
|
+
# super
|
184
|
+
# end
|
185
|
+
#++
|
186
|
+
|
187
|
+
module Inclusive
|
188
|
+
private
|
189
|
+
def included(base)
|
190
|
+
class << base
|
191
|
+
#alias_method(:new!, :new) unless method_defined?(:new!)
|
192
|
+
# gracefully handle multiple inclusions of Multiton
|
193
|
+
unless include?(Multiton::MetaMethods)
|
194
|
+
alias_method :new!, :new
|
195
|
+
private :allocate #, :new
|
196
|
+
include Multiton::MetaMethods
|
197
|
+
|
198
|
+
if method_defined?(:marshal_dump)
|
199
|
+
undef_method :marshal_dump
|
200
|
+
warn "warning: marshal_dump was undefined since it is incompatible with the Multiton pattern"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
extend Inclusive
|
208
|
+
|
209
|
+
#
|
210
|
+
|
211
|
+
module MetaMethods
|
212
|
+
|
213
|
+
include Inclusive
|
214
|
+
|
215
|
+
def instance(*e, &b)
|
216
|
+
arg = multiton_id(*e, &b)
|
217
|
+
multiton_instance.fetch(arg) do
|
218
|
+
multiton_mutex[arg].synchronize do
|
219
|
+
multiton_instance.fetch(arg) do
|
220
|
+
val = multiton_instance[arg] = new!(*e, &b) #new(*e, &b)
|
221
|
+
val.instance_variable_set(:@multiton_initializer, e, &b)
|
222
|
+
multiton_mutex.initialized(arg)
|
223
|
+
val
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
alias_method :new, :instance
|
229
|
+
|
230
|
+
def initialized?(*e, &b)
|
231
|
+
multiton_instance.key?(multiton_id(*e, &b))
|
232
|
+
end
|
233
|
+
|
234
|
+
protected
|
235
|
+
|
236
|
+
def multiton_instance
|
237
|
+
@multiton_instance ||= Hash.new
|
238
|
+
end
|
239
|
+
|
240
|
+
def multiton_mutex
|
241
|
+
@multiton_mutex ||= InstanceMutex.new
|
242
|
+
end
|
243
|
+
|
244
|
+
def reinitialize
|
245
|
+
multiton_instance.clear
|
246
|
+
multiton_mutex.clear
|
247
|
+
end
|
248
|
+
|
249
|
+
def _load(str)
|
250
|
+
instance(*Marshal.load(str))
|
251
|
+
end
|
252
|
+
|
253
|
+
private
|
254
|
+
|
255
|
+
# Default method to to create a key to cache already constructed
|
256
|
+
# instances. In the use case MultitonClass.new(e), MultiClass.new(f)
|
257
|
+
# must be semantically equal if multiton_id(e).eql?(multiton_id(f))
|
258
|
+
# evaluates to true.
|
259
|
+
def multiton_id(*e, &b)
|
260
|
+
e
|
261
|
+
end
|
262
|
+
|
263
|
+
def singleton_method_added(sym)
|
264
|
+
super
|
265
|
+
if (sym == :marshal_dump) & singleton_methods.include?('marshal_dump')
|
266
|
+
raise TypeError, "Don't use marshal_dump - rely on _dump and _load instead"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
=begin
|
278
|
+
# TODO Convert this into a real test and/or benchmark.
|
279
|
+
|
280
|
+
if $0 == __FILE__
|
281
|
+
|
282
|
+
### Simple marshalling test #######
|
283
|
+
class A
|
284
|
+
def initialize(a,*e)
|
285
|
+
@e = a
|
286
|
+
end
|
287
|
+
|
288
|
+
include Multiton
|
289
|
+
begin
|
290
|
+
def self.marshal_dump(depth = -1)
|
291
|
+
end
|
292
|
+
rescue => mes
|
293
|
+
p mes
|
294
|
+
class << self; undef marshal_dump end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
C = Class.new(A.clone)
|
299
|
+
s = C.instance('a','b')
|
300
|
+
|
301
|
+
raise unless Marshal.load(Marshal.dump(s)) == s
|
302
|
+
|
303
|
+
|
304
|
+
### Interdependent initialization example and threading benchmark ###
|
305
|
+
|
306
|
+
class Regular_SymPlane
|
307
|
+
def self.multiton_id(*e)
|
308
|
+
a,b = e
|
309
|
+
(a+b - 1)*(a+b )/2 + (a > b ? a : b)
|
310
|
+
end
|
311
|
+
|
312
|
+
def initialize(a,b)
|
313
|
+
klass = self.class
|
314
|
+
if a < b
|
315
|
+
@l = b > 0 ? klass.instance(a,b-1) : nil
|
316
|
+
@r = a > 0 ? klass.instance(a-1,b) : nil
|
317
|
+
else
|
318
|
+
@l = a > 0 ? klass.instance(a-1,b) : nil
|
319
|
+
@r = b > 0 ? klass.instance(a,b-1) : nil
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
include Multiton
|
324
|
+
end
|
325
|
+
|
326
|
+
|
327
|
+
|
328
|
+
def nap
|
329
|
+
# Thread.pass
|
330
|
+
sleep(rand(0.01))
|
331
|
+
end
|
332
|
+
|
333
|
+
class SymPlane < Regular_SymPlane
|
334
|
+
@m = Mutex.new
|
335
|
+
@count = 0
|
336
|
+
end
|
337
|
+
|
338
|
+
class << SymPlane
|
339
|
+
attr_reader :count
|
340
|
+
def reinitialize
|
341
|
+
super
|
342
|
+
@m = Mutex.new
|
343
|
+
@count = 0
|
344
|
+
end
|
345
|
+
def inherited(sub_class)
|
346
|
+
super
|
347
|
+
sub_class.instance_eval { @m = Mutex.new; @count = 0 }
|
348
|
+
end
|
349
|
+
|
350
|
+
def multiton_id(*e)
|
351
|
+
nap()
|
352
|
+
super
|
353
|
+
end
|
354
|
+
|
355
|
+
def new!(*e) # NOTICE!!!
|
356
|
+
super
|
357
|
+
ensure
|
358
|
+
nap()
|
359
|
+
@m.synchronize { p @count if (@count += 1) % 15 == 0 }
|
360
|
+
end
|
361
|
+
|
362
|
+
def run(k)
|
363
|
+
threads = 0
|
364
|
+
max = k * (k+1) / 2
|
365
|
+
puts ""
|
366
|
+
while count() < max
|
367
|
+
Thread.new { threads+= 1; instance(rand(30),rand(30)) }
|
368
|
+
end
|
369
|
+
puts "\nThe simulation created #{threads} threads"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
|
374
|
+
require 'benchmark'
|
375
|
+
include Benchmark
|
376
|
+
|
377
|
+
bmbm do |x|
|
378
|
+
x.report('Initialize 465 SymPlane instances') { SymPlane.run(30) }
|
379
|
+
x.report('Reinitialize ') do
|
380
|
+
sleep 3
|
381
|
+
SymPlane.reinitialize
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
end
|
386
|
+
=end
|
data/lib/htmlfilter.rb
ADDED
@@ -0,0 +1,516 @@
|
|
1
|
+
# = HTML Filter
|
2
|
+
#
|
3
|
+
# HTML Filter library can be used to sanitize and sterilize
|
4
|
+
# HTML. A good idea if you let users submit HTML in comments,
|
5
|
+
# for instance.
|
6
|
+
#
|
7
|
+
# HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
8
|
+
#
|
9
|
+
# This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
10
|
+
# http://creativecommons.org/licenses/by-sa/2.5/
|
11
|
+
#
|
12
|
+
# Thanks to Jang Kim for adding support for single quoted attributes.
|
13
|
+
#
|
14
|
+
# == Reference
|
15
|
+
#
|
16
|
+
# * http://iamcal.com/publish/articles/php/processing_html/
|
17
|
+
# * http://iamcal.com/publish/articles/php/processing_html_part_2/
|
18
|
+
#
|
19
|
+
# == Author(s)
|
20
|
+
#
|
21
|
+
# * Trans
|
22
|
+
# * George Moschovitis
|
23
|
+
# * James Britt
|
24
|
+
# * Cal Henderson
|
25
|
+
# * Jang Kim
|
26
|
+
#
|
27
|
+
# == Copying
|
28
|
+
#
|
29
|
+
# Copyright (c) 2007 Trans
|
30
|
+
|
31
|
+
require 'htmlfilter/multiton.rb'
|
32
|
+
|
33
|
+
# = HtmlFilter
|
34
|
+
#
|
35
|
+
# HTML Filter library can be used to sanitize and sterilize
|
36
|
+
# HTML. A good idea if you let users submit HTML in comments,
|
37
|
+
# for instance.
|
38
|
+
#
|
39
|
+
# lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
40
|
+
#
|
41
|
+
# This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
42
|
+
# http://creativecommons.org/licenses/by-sa/2.5/
|
43
|
+
#
|
44
|
+
# Thanks to Jang Kim for adding support for single quoted attributes.
|
45
|
+
#
|
46
|
+
# == Reference
|
47
|
+
#
|
48
|
+
# * http://iamcal.com/publish/articles/php/processing_html/
|
49
|
+
# * http://iamcal.com/publish/articles/php/processing_html_part_2/
|
50
|
+
|
51
|
+
class HtmlFilter
|
52
|
+
VERSION = "1.0.0"
|
53
|
+
|
54
|
+
include Multiton
|
55
|
+
|
56
|
+
# tags and attributes that are allowed
|
57
|
+
#
|
58
|
+
# Eg.
|
59
|
+
#
|
60
|
+
# {
|
61
|
+
# 'a' => ['href', 'target'],
|
62
|
+
# 'b' => [],
|
63
|
+
# 'img' => ['src', 'width', 'height', 'alt']
|
64
|
+
# }
|
65
|
+
attr_accessor :allowed
|
66
|
+
|
67
|
+
# tags which should always be self-closing (e.g. "<img />")
|
68
|
+
attr_accessor :no_close
|
69
|
+
|
70
|
+
# tags which must always have seperate opening and closing
|
71
|
+
# tags (e.g. "<b></b>")
|
72
|
+
attr_accessor :always_close
|
73
|
+
|
74
|
+
# attributes which should be checked for valid protocols
|
75
|
+
# (src,href)
|
76
|
+
attr_accessor :protocol_attributes
|
77
|
+
|
78
|
+
# protocols which are allowed (http, ftp, mailto)
|
79
|
+
attr_accessor :allowed_protocols
|
80
|
+
|
81
|
+
# tags which should be removed if they contain no content
|
82
|
+
# (e.g. "<b></b>" or "<b />")
|
83
|
+
attr_accessor :remove_blanks
|
84
|
+
|
85
|
+
# should we remove comments? (true, false)
|
86
|
+
attr_accessor :strip_comments
|
87
|
+
|
88
|
+
# should we try and make a b tag out of "b>" (true, false)
|
89
|
+
attr_accessor :always_make_tags
|
90
|
+
|
91
|
+
# entity control option (true, false)
|
92
|
+
attr_accessor :allow_numbered_entities
|
93
|
+
|
94
|
+
# entity control option (amp, gt, lt, quot, etc.)
|
95
|
+
attr_accessor :allowed_entities
|
96
|
+
|
97
|
+
# default settings
|
98
|
+
|
99
|
+
DEFAULT = {
|
100
|
+
'allowed' => {
|
101
|
+
'a' => ['href', 'target'],
|
102
|
+
'b' => [],
|
103
|
+
'i' => [],
|
104
|
+
'img' => ['src', 'width', 'height', 'alt']
|
105
|
+
},
|
106
|
+
'no_close' => ['img', 'br', 'hr'],
|
107
|
+
'always_close' => ['a', 'b'],
|
108
|
+
'protocol_attributes' => ['src', 'href'],
|
109
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
110
|
+
'remove_blanks' => ['a', 'b'],
|
111
|
+
'strip_comments' => true,
|
112
|
+
'always_make_tags' => true,
|
113
|
+
'allow_numbered_entities' => true,
|
114
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
115
|
+
}
|
116
|
+
|
117
|
+
# New html filter.
|
118
|
+
|
119
|
+
def initialize( options=nil )
|
120
|
+
if options
|
121
|
+
h = DEFAULT.dup
|
122
|
+
options.each do |k,v|
|
123
|
+
h[k.to_s] = v
|
124
|
+
end
|
125
|
+
options = h
|
126
|
+
else
|
127
|
+
options = DEFAULT.dup
|
128
|
+
end
|
129
|
+
|
130
|
+
options.each{ |k,v| send("#{k}=",v) }
|
131
|
+
end
|
132
|
+
|
133
|
+
# Filter html string.
|
134
|
+
|
135
|
+
def filter(data)
|
136
|
+
@tag_counts = {}
|
137
|
+
|
138
|
+
data = escape_comments(data)
|
139
|
+
data = balance_html(data)
|
140
|
+
data = check_tags(data)
|
141
|
+
data = process_remove_blanks(data)
|
142
|
+
data = validate_entities(data)
|
143
|
+
|
144
|
+
return data
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
#
|
150
|
+
# internal tag counter
|
151
|
+
#
|
152
|
+
|
153
|
+
def tag_counts ; @tag_counts; end
|
154
|
+
|
155
|
+
#
|
156
|
+
#
|
157
|
+
#
|
158
|
+
|
159
|
+
def escape_comments(data)
|
160
|
+
data = data.gsub(/<!--(.*?)-->/s) do
|
161
|
+
'<!--' + escape_special_chars(strip_single($1)) + '-->'
|
162
|
+
end
|
163
|
+
|
164
|
+
return data
|
165
|
+
end
|
166
|
+
|
167
|
+
#
|
168
|
+
#
|
169
|
+
#
|
170
|
+
|
171
|
+
def balance_html(data)
|
172
|
+
data = data.dup
|
173
|
+
|
174
|
+
if always_make_tags
|
175
|
+
# try and form html
|
176
|
+
data.gsub!(/>>+/, '>')
|
177
|
+
data.gsub!(/<<+/, '<')
|
178
|
+
data.gsub!(/^>/, '')
|
179
|
+
data.gsub!(/<([^>]*?)(?=<|$)/, '<\1>')
|
180
|
+
data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1<\2')
|
181
|
+
else
|
182
|
+
# escape stray brackets
|
183
|
+
data.gsub!(/<([^>]*?)(?=<|$)/, '<\1')
|
184
|
+
data.gsub!(/(^|>)([^<]*?)(?=>)/, '\1\2><')
|
185
|
+
# the last regexp causes '<>' entities to appear
|
186
|
+
# (we need to do a lookahead assertion so that the last bracket
|
187
|
+
# can be used in the next pass of the regexp)
|
188
|
+
data.gsub!('<>', '')
|
189
|
+
end
|
190
|
+
|
191
|
+
return data
|
192
|
+
end
|
193
|
+
|
194
|
+
#
|
195
|
+
#
|
196
|
+
#
|
197
|
+
|
198
|
+
def check_tags(data)
|
199
|
+
data = data.dup
|
200
|
+
|
201
|
+
data.gsub!(/<(.*?)>/s){
|
202
|
+
process_tag(strip_single($1))
|
203
|
+
}
|
204
|
+
|
205
|
+
tag_counts.each do |tag, cnt|
|
206
|
+
cnt.times{ data << "</#{tag}>" }
|
207
|
+
end
|
208
|
+
|
209
|
+
return data
|
210
|
+
end
|
211
|
+
|
212
|
+
#
|
213
|
+
#
|
214
|
+
#
|
215
|
+
|
216
|
+
def process_tag(data)
|
217
|
+
|
218
|
+
# ending tags
|
219
|
+
|
220
|
+
re = /^\/([a-z0-9]+)/si
|
221
|
+
|
222
|
+
if matches = re.match(data)
|
223
|
+
name = matches[1].downcase
|
224
|
+
if allowed.key?(name)
|
225
|
+
unless no_close.include?(name)
|
226
|
+
if tag_counts[name]
|
227
|
+
tag_counts[name] -= 1
|
228
|
+
return "</#{name}>"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
else
|
232
|
+
return ''
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# starting tags
|
237
|
+
|
238
|
+
re = /^([a-z0-9]+)(.*?)(\/?)$/si
|
239
|
+
|
240
|
+
if matches = re.match(data)
|
241
|
+
name = matches[1].downcase
|
242
|
+
body = matches[2]
|
243
|
+
ending = matches[3]
|
244
|
+
|
245
|
+
if allowed.key?(name)
|
246
|
+
params = ""
|
247
|
+
|
248
|
+
matches_2 = body.scan(/([a-z0-9]+)=(["'])(.*?)\2/si) # <foo a="b" />
|
249
|
+
matches_1 = body.scan(/([a-z0-9]+)(=)([^"\s']+)/si) # <foo a=b />
|
250
|
+
matches_3 = body.scan(/([a-z0-9]+)=(["'])([^"']*?)\s*$/si) # <foo a="b />
|
251
|
+
|
252
|
+
matches = matches_1 + matches_2 + matches_3
|
253
|
+
|
254
|
+
matches.each do |match|
|
255
|
+
pname = match[0].downcase
|
256
|
+
if allowed[name].include?(pname)
|
257
|
+
value = match[2]
|
258
|
+
if protocol_attributes.include?(pname)
|
259
|
+
value = process_param_protocol(value)
|
260
|
+
end
|
261
|
+
params += %{ #{pname}="#{value}"}
|
262
|
+
end
|
263
|
+
end
|
264
|
+
if no_close.include?(name)
|
265
|
+
ending = ' /'
|
266
|
+
end
|
267
|
+
if always_close.include?(name)
|
268
|
+
ending = ''
|
269
|
+
end
|
270
|
+
if ending.empty?
|
271
|
+
if tag_counts.key?(name)
|
272
|
+
tag_counts[name] += 1
|
273
|
+
else
|
274
|
+
tag_counts[name] = 1
|
275
|
+
end
|
276
|
+
end
|
277
|
+
unless ending.empty?
|
278
|
+
ending = ' /'
|
279
|
+
end
|
280
|
+
return '<' + name + params + ending + '>'
|
281
|
+
else
|
282
|
+
return ''
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
# comments
|
287
|
+
if /^!--(.*)--$/si =~ data
|
288
|
+
if strip_comments
|
289
|
+
return ''
|
290
|
+
else
|
291
|
+
return '<' + data + '>'
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# garbage, ignore it
|
296
|
+
return ''
|
297
|
+
end
|
298
|
+
|
299
|
+
#
|
300
|
+
#
|
301
|
+
#
|
302
|
+
|
303
|
+
def process_param_protocol(data)
|
304
|
+
data = decode_entities(data)
|
305
|
+
|
306
|
+
re = /^([^:]+)\:/si
|
307
|
+
|
308
|
+
if matches = re.match(data)
|
309
|
+
unless allowed_protocols.include?(matches[1])
|
310
|
+
#data = '#'.substr(data, strlen(matches[1])+1)
|
311
|
+
data = '#' + data[0..matches[1].size+1]
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
return data
|
316
|
+
end
|
317
|
+
|
318
|
+
#
|
319
|
+
#
|
320
|
+
#
|
321
|
+
|
322
|
+
def process_remove_blanks(data)
|
323
|
+
data = data.dup
|
324
|
+
|
325
|
+
remove_blanks.each do |tag|
|
326
|
+
data.gsub!(/<#{tag}(\s[^>]*)?><\/#{tag}>/, '')
|
327
|
+
data.gsub!(/<#{tag}(\s[^>]*)?\/>/, '')
|
328
|
+
end
|
329
|
+
|
330
|
+
return data
|
331
|
+
end
|
332
|
+
|
333
|
+
#
|
334
|
+
#
|
335
|
+
#
|
336
|
+
|
337
|
+
def fix_case(data)
|
338
|
+
data_notags = strip_tags(data)
|
339
|
+
data_notags = data_notags.gsub(/[^a-zA-Z]/, '')
|
340
|
+
|
341
|
+
if data_notags.size < 5
|
342
|
+
return data
|
343
|
+
end
|
344
|
+
|
345
|
+
if /[a-z]/ =~ data_notags
|
346
|
+
return data
|
347
|
+
end
|
348
|
+
|
349
|
+
data = data.gsub(/(>|^)([^<]+?)(<|$)/s){
|
350
|
+
strip_single($1) +
|
351
|
+
fix_case_inner(strip_single($2)) +
|
352
|
+
strip_single($3)
|
353
|
+
}
|
354
|
+
|
355
|
+
return data
|
356
|
+
end
|
357
|
+
|
358
|
+
#
|
359
|
+
#
|
360
|
+
#
|
361
|
+
|
362
|
+
def fix_case_inner(data)
|
363
|
+
data = data.dup
|
364
|
+
|
365
|
+
data.downcase!
|
366
|
+
|
367
|
+
data.gsub!(/(^|[^\w\s\';,\\-])(\s*)([a-z])/){
|
368
|
+
strip_single("#{$1}#{$2}") + strip_single($3).upcase
|
369
|
+
}
|
370
|
+
|
371
|
+
return data
|
372
|
+
end
|
373
|
+
|
374
|
+
#
|
375
|
+
#
|
376
|
+
#
|
377
|
+
|
378
|
+
def validate_entities(data)
|
379
|
+
data = data.dup
|
380
|
+
|
381
|
+
# validate entities throughout the string
|
382
|
+
data.gsub!(%r!&([^&;]*)(?=(;|&|$))!){
|
383
|
+
check_entity(strip_single($1), strip_single($2))
|
384
|
+
}
|
385
|
+
|
386
|
+
# validate quotes outside of tags
|
387
|
+
data.gsub!(/(>|^)([^<]+?)(<|$)/s){
|
388
|
+
m1, m2, m3 = $1, $2, $3
|
389
|
+
strip_single(m1) +
|
390
|
+
strip_single(m2).gsub('\"', '"') +
|
391
|
+
strip_single(m3)
|
392
|
+
}
|
393
|
+
|
394
|
+
return data
|
395
|
+
end
|
396
|
+
|
397
|
+
#
|
398
|
+
#
|
399
|
+
#
|
400
|
+
|
401
|
+
def check_entity(preamble, term)
|
402
|
+
if term != ';'
|
403
|
+
return '&' + preamble
|
404
|
+
end
|
405
|
+
|
406
|
+
if is_valid_entity(preamble)
|
407
|
+
return '&' + preamble
|
408
|
+
end
|
409
|
+
|
410
|
+
return '&' + preamble
|
411
|
+
end
|
412
|
+
|
413
|
+
#
|
414
|
+
#
|
415
|
+
#
|
416
|
+
|
417
|
+
def is_valid_entity(entity)
|
418
|
+
re = /^#([0-9]+)$/i
|
419
|
+
|
420
|
+
if md = re.match(entity)
|
421
|
+
if (md[1].to_i > 127)
|
422
|
+
return true
|
423
|
+
end
|
424
|
+
return allow_numbered_entities
|
425
|
+
end
|
426
|
+
|
427
|
+
if allowed_entities.include?(entity)
|
428
|
+
return true
|
429
|
+
end
|
430
|
+
|
431
|
+
return nil
|
432
|
+
end
|
433
|
+
|
434
|
+
# within attributes, we want to convert all hex/dec/url
|
435
|
+
# escape sequences into their raw characters so that we can
|
436
|
+
# check we don't get stray quotes/brackets inside strings.
|
437
|
+
|
438
|
+
def decode_entities(data)
|
439
|
+
data = data.dup
|
440
|
+
|
441
|
+
data.gsub!(/(&)#(\d+);?/){ decode_dec_entity($1, $2) }
|
442
|
+
data.gsub!(/(&)#x([0-9a-f]+);?/i){ decode_hex_entity($1, $2) }
|
443
|
+
data.gsub!(/(%)([0-9a-f]{2});?/i){ decode_hex_entity($1, $2) }
|
444
|
+
|
445
|
+
data = validate_entities(data)
|
446
|
+
|
447
|
+
return data
|
448
|
+
end
|
449
|
+
|
450
|
+
#
|
451
|
+
#
|
452
|
+
#
|
453
|
+
|
454
|
+
def decode_hex_entity(*m)
|
455
|
+
return decode_num_entity(m[1], m[2].to_i.to_s(16))
|
456
|
+
end
|
457
|
+
|
458
|
+
#
|
459
|
+
#
|
460
|
+
#
|
461
|
+
|
462
|
+
def decode_dec_entity(*m)
|
463
|
+
return decode_num_entity(m[1], m[2])
|
464
|
+
end
|
465
|
+
|
466
|
+
#
|
467
|
+
#
|
468
|
+
#
|
469
|
+
|
470
|
+
def decode_num_entity(orig_type, d)
|
471
|
+
d = d.to_i
|
472
|
+
d = 32 if d < 0 # space
|
473
|
+
|
474
|
+
# don't mess with high chars
|
475
|
+
if d > 127
|
476
|
+
return '%' + d.to_s(16) if orig_type == '%'
|
477
|
+
return "&#{d};" if orig_type == '&'
|
478
|
+
end
|
479
|
+
|
480
|
+
return escape_special_chars(d.chr)
|
481
|
+
end
|
482
|
+
|
483
|
+
#
|
484
|
+
#
|
485
|
+
#
|
486
|
+
|
487
|
+
def strip_single(data)
|
488
|
+
return data.gsub('\"', '"').gsub('\0', 0.chr)
|
489
|
+
end
|
490
|
+
|
491
|
+
# Certain characters have special significance in HTML, and
|
492
|
+
# should be represented by HTML entities if they are to
|
493
|
+
# preserve their meanings. This function returns a string
|
494
|
+
# with some of these conversions made; the translations made
|
495
|
+
# are those most useful for everyday web programming.
|
496
|
+
|
497
|
+
def escape_special_chars(data)
|
498
|
+
data = data.dup
|
499
|
+
data.gsub!( /&/n , '&' )
|
500
|
+
data.gsub!( /\"/n , '"' )
|
501
|
+
data.gsub!( />/n , '>' )
|
502
|
+
data.gsub!( /</n , '<' )
|
503
|
+
data.gsub!( /'/ , ''' )
|
504
|
+
return data
|
505
|
+
end
|
506
|
+
|
507
|
+
end
|
508
|
+
|
509
|
+
# Overload the standard String class for extra convienience.
|
510
|
+
|
511
|
+
class String
|
512
|
+
def html_filter(*opts)
|
513
|
+
HtmlFilter.new(*opts).filter(self)
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
data/meta/package
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
htmlfilter
|
data/meta/project
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rubyworks
|
data/meta/title
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
HTMLFilter
|
data/meta/version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "cssfilter"
|
3
|
+
#require 'yaml'
|
4
|
+
|
5
|
+
class TestCssFilter < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@css = <<-END
|
9
|
+
* {
|
10
|
+
margin: 0;
|
11
|
+
height: 0;
|
12
|
+
}
|
13
|
+
|
14
|
+
body {
|
15
|
+
margin: 0;
|
16
|
+
height: 0;
|
17
|
+
background: url(http://xzy.org);
|
18
|
+
}
|
19
|
+
|
20
|
+
h1 {
|
21
|
+
trythis: url(http://here.org/fun.js);
|
22
|
+
font-size: 12pt;
|
23
|
+
}
|
24
|
+
END
|
25
|
+
@result = "* {\nmargin: 0;\nheight: 0;\n}\nbody {\nmargin: 0;\nheight: 0;\n}\nh1 {\ntrythis: url(http://here.org/fun.js);\nfont-size: 12pt;\n}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_filter
|
29
|
+
cssfilter = CssFilter.new(:allowed_hosts=>["here.org"], :strip_whitespace => true)
|
30
|
+
csstree = cssfilter.filter(@css)
|
31
|
+
assert_equal(@result, csstree.to_s)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "htmlfilter"
|
3
|
+
|
4
|
+
class TestHtmlFilter < Test::Unit::TestCase
|
5
|
+
|
6
|
+
# core tests
|
7
|
+
|
8
|
+
def test_multiton_without_options
|
9
|
+
h1 = HtmlFilter.new
|
10
|
+
h2 = HtmlFilter.new
|
11
|
+
h3 = HtmlFilter.new( :strip_comments => false )
|
12
|
+
assert_equal( h1.object_id, h2.object_id )
|
13
|
+
assert_not_equal( h1.object_id, h3.object_id )
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_multiton_with_options
|
17
|
+
h1 = HtmlFilter.new( :strip_comments => false )
|
18
|
+
h2 = HtmlFilter.new( :strip_comments => false )
|
19
|
+
h3 = HtmlFilter.new
|
20
|
+
assert_equal( h1.object_id, h2.object_id )
|
21
|
+
assert_not_equal( h1.object_id, h3.object_id )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_strip_single
|
25
|
+
hf = HtmlFilter.new
|
26
|
+
assert_equal( '"', hf.send(:strip_single,'\"') )
|
27
|
+
assert_equal( "\000", hf.send(:strip_single,'\0') )
|
28
|
+
end
|
29
|
+
|
30
|
+
# functional tests
|
31
|
+
|
32
|
+
def assert_filter(filtered, original)
|
33
|
+
assert_equal(filtered, original.html_filter)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_fix_quotes
|
37
|
+
assert_filter '<img src="foo.jpg" />', "<img src=\"foo.jpg />"
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_basics
|
41
|
+
assert_filter '', ''
|
42
|
+
assert_filter 'hello', 'hello'
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_balancing_tags
|
46
|
+
assert_filter "<b>hello</b>", "<<b>hello</b>"
|
47
|
+
assert_filter "<b>hello</b>", "<b>>hello</b>"
|
48
|
+
assert_filter "<b>hello</b>", "<b>hello<</b>"
|
49
|
+
assert_filter "<b>hello</b>", "<b>hello</b>>"
|
50
|
+
assert_filter "", "<>"
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_tag_completion
|
54
|
+
assert_filter "hello", "hello<b>"
|
55
|
+
assert_filter "<b>hello</b>", "<b>hello"
|
56
|
+
assert_filter "hello<b>world</b>", "hello<b>world"
|
57
|
+
assert_filter "hello", "hello</b>"
|
58
|
+
assert_filter "hello", "hello<b/>"
|
59
|
+
assert_filter "hello<b>world</b>", "hello<b/>world"
|
60
|
+
assert_filter "<b><b><b>hello</b></b></b>", "<b><b><b>hello"
|
61
|
+
assert_filter "", "</b><b>"
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_end_slashes
|
65
|
+
assert_filter '<img />', '<img>'
|
66
|
+
assert_filter '<img />', '<img/>'
|
67
|
+
assert_filter '', '<b/></b>'
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: htmlfilter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-09-22 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: HTML Filter library can be used to sanitize and sterilize HTML. A good idea if you let users submit HTML in comments, for instance. This library also include CssFilter. The CssFilter class will clean-up a cascading style sheet. It can be used to remove whitespace and most importantly remove urls.
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- Rakefile
|
24
|
+
- Manifest.txt
|
25
|
+
- TODO
|
26
|
+
- README.rdoc
|
27
|
+
- History.rdoc
|
28
|
+
files:
|
29
|
+
- lib/cssfilter.rb
|
30
|
+
- lib/htmlfilter/multiton.rb
|
31
|
+
- lib/htmlfilter.rb
|
32
|
+
- meta/package
|
33
|
+
- meta/project
|
34
|
+
- meta/title
|
35
|
+
- meta/version
|
36
|
+
- test/test_cssfilter.rb
|
37
|
+
- test/test_htmlfilter.rb
|
38
|
+
- Rakefile
|
39
|
+
- Manifest.txt
|
40
|
+
- TODO
|
41
|
+
- README.rdoc
|
42
|
+
- History.rdoc
|
43
|
+
has_rdoc: true
|
44
|
+
homepage:
|
45
|
+
licenses: []
|
46
|
+
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options:
|
49
|
+
- --inline-source
|
50
|
+
- --title
|
51
|
+
- htmlfilter api
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "0"
|
59
|
+
version:
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
requirements: []
|
67
|
+
|
68
|
+
rubyforge_project: htmlfilter
|
69
|
+
rubygems_version: 1.3.5
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: HTML Filter library can be used to sanitize and sterilize HTML.
|
73
|
+
test_files:
|
74
|
+
- test/test_cssfilter.rb
|
75
|
+
- test/test_htmlfilter.rb
|