htmlfilter 1.0.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +25 -0
- data/{Manifest.txt → MANIFEST} +7 -9
- data/README.rdoc +7 -7
- data/TODO +0 -2
- data/lib/cssfilter.rb +32 -28
- data/lib/htmlfilter.rb +136 -32
- data/meta/{project → collection} +0 -0
- data/meta/contact +1 -0
- data/meta/description +1 -0
- data/meta/homepage +1 -0
- data/meta/{package → name} +0 -0
- data/meta/repository +1 -0
- data/meta/version +1 -1
- data/test/test_cssfilter.rb +2 -2
- data/test/test_htmlfilter.rb +2 -18
- metadata +17 -15
- data/History.rdoc +0 -6
- data/lib/htmlfilter/multiton.rb +0 -386
data/HISTORY
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= RELEASE HISTORY
|
2
|
+
|
3
|
+
== 1.1 / 2009-11-24
|
4
|
+
|
5
|
+
This is release adjusts the names of the classes to
|
6
|
+
be capitialized according to the actual use of the
|
7
|
+
terms. Some alternate options presets have been added
|
8
|
+
as well, and this releaseo sheds the Multiton, which
|
9
|
+
was basically a YAGNI.
|
10
|
+
|
11
|
+
Changes:
|
12
|
+
|
13
|
+
* Renamed HtmlFilter to HTMLFilter.
|
14
|
+
* Renamed CssFilter to CSSFilter
|
15
|
+
* HTMLFilter is no longer a Multiton.
|
16
|
+
* Old names are still available temporarily.
|
17
|
+
* Added built-in option constants.
|
18
|
+
* CssTree is now CSSFilter::Tree.
|
19
|
+
|
20
|
+
== 1.0.0 / 2009-06-25
|
21
|
+
|
22
|
+
Changes:
|
23
|
+
|
24
|
+
* Birthday! (Spun-off from Ruby Facets)
|
25
|
+
|
data/{Manifest.txt → MANIFEST}
RENAMED
@@ -1,19 +1,17 @@
|
|
1
1
|
#!mast bin lib meta test [A-Z]*
|
2
|
-
lib
|
3
2
|
lib/cssfilter.rb
|
4
|
-
lib/htmlfilter
|
5
|
-
lib/htmlfilter/multiton.rb
|
6
3
|
lib/htmlfilter.rb
|
7
|
-
meta
|
8
|
-
meta/
|
9
|
-
meta/
|
4
|
+
meta/collection
|
5
|
+
meta/contact
|
6
|
+
meta/description
|
7
|
+
meta/homepage
|
8
|
+
meta/name
|
9
|
+
meta/repository
|
10
10
|
meta/title
|
11
11
|
meta/version
|
12
|
-
test
|
13
12
|
test/test_cssfilter.rb
|
14
13
|
test/test_htmlfilter.rb
|
15
14
|
Rakefile
|
16
|
-
Manifest.txt
|
17
15
|
TODO
|
18
16
|
README.rdoc
|
19
|
-
|
17
|
+
HISTORY
|
data/README.rdoc
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
= HtmlFilter
|
2
2
|
|
3
3
|
* http://rubyworks.github.com/htmlfilter
|
4
|
+
* http://github.com/rubyworks/htmlfilter
|
4
5
|
|
5
6
|
== DESCRIPTION:
|
6
7
|
|
@@ -14,8 +15,11 @@ whitespace and most importantly remove urls.
|
|
14
15
|
|
15
16
|
== FEATURES:
|
16
17
|
|
17
|
-
*
|
18
|
-
*
|
18
|
+
* Based on well-worn PHP library.
|
19
|
+
* Regular expression based filtering.
|
20
|
+
* Very efficient for small snippets, like blog comments.
|
21
|
+
* Pure-Ruby and no dependencies.
|
22
|
+
* Also has library to clean and compact cascading stylesheets.
|
19
23
|
|
20
24
|
== SYNOPSIS:
|
21
25
|
|
@@ -27,14 +31,10 @@ Via the class.
|
|
27
31
|
|
28
32
|
Or using the String extension.
|
29
33
|
|
30
|
-
html.html_filter #=> "<b>hello</b>"
|
34
|
+
html.html_filter(options) #=> "<b>hello</b>"
|
31
35
|
|
32
36
|
See RDocs for more information.
|
33
37
|
|
34
|
-
== REQUIREMENTS:
|
35
|
-
|
36
|
-
* Uses a copy of multiton.rb (included)
|
37
|
-
|
38
38
|
== INSTALL:
|
39
39
|
|
40
40
|
* sudo gem install htmlfilter
|
data/TODO
CHANGED
data/lib/cssfilter.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# = CSS Filter
|
2
2
|
#
|
3
|
-
# The
|
3
|
+
# The CSSFilter class will clean up a cascading stylesheet.
|
4
4
|
# It can be used to remove whitespace and most importantly
|
5
5
|
# remove urls.
|
6
6
|
#
|
@@ -8,25 +8,27 @@
|
|
8
8
|
#
|
9
9
|
# * Trans
|
10
10
|
#
|
11
|
-
# ==
|
11
|
+
# == Copying
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# Copyright (c) 2007 Thomas Sawyer
|
14
14
|
#
|
15
|
-
#
|
15
|
+
# Creative Commons Attribution-ShareAlike 3.0 License
|
16
16
|
#
|
17
|
-
#
|
17
|
+
# Ref. http://creativecommons.org/licenses/by-sa/3.0/
|
18
|
+
|
19
|
+
|
20
|
+
# TODO: Allow urls to be specified per attribute type.
|
18
21
|
|
19
22
|
#require 'htmlfilter/uri'
|
20
23
|
require 'uri'
|
21
24
|
|
22
25
|
# = CSS Filter
|
23
26
|
#
|
24
|
-
# The
|
27
|
+
# The CSSFilter class will clean up a cascading style sheet.
|
25
28
|
# It can be used to remove whitespace and most importantly
|
26
29
|
# remove urls.
|
27
|
-
|
28
|
-
class
|
29
|
-
VERSION="1.0.0"
|
30
|
+
|
31
|
+
class CSSFilter
|
30
32
|
|
31
33
|
# should we remove comments? (true, false)
|
32
34
|
attr_accessor :strip_comments
|
@@ -158,7 +160,7 @@ class CssFilter
|
|
158
160
|
# TODO: Not complete, does not work with "@xxx foo;" for example.
|
159
161
|
|
160
162
|
def parse(css)
|
161
|
-
tree =
|
163
|
+
tree = Tree.new
|
162
164
|
entries = css.scan(/^(.*?)\{(.*?)\}/m)
|
163
165
|
entries.each do |ref, props|
|
164
166
|
tree[ref.strip] ||= {}
|
@@ -196,31 +198,33 @@ class CssFilter
|
|
196
198
|
return val
|
197
199
|
end
|
198
200
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
# CSS parse tree. This is for a "deep filtering".
|
201
|
+
# CSS parse tree. This is for a "deep filtering".
|
203
202
|
|
204
|
-
class
|
203
|
+
class Tree < Hash
|
205
204
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
205
|
+
def initialize(options=nil)
|
206
|
+
@options = options || {}
|
207
|
+
super()
|
208
|
+
end
|
210
209
|
|
211
|
-
|
210
|
+
# Re-output the CSS, all tidy ;)
|
212
211
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
212
|
+
def to_css
|
213
|
+
css = ""
|
214
|
+
each do |selector, entries|
|
215
|
+
css << "#{selector}{"
|
216
|
+
entries.each do |key, value|
|
217
|
+
css << "#{key}:#{value};"
|
218
|
+
end
|
219
|
+
css << "}\n"
|
219
220
|
end
|
220
|
-
css
|
221
|
+
return css
|
221
222
|
end
|
222
|
-
|
223
|
+
|
223
224
|
end
|
224
225
|
|
225
226
|
end
|
226
227
|
|
228
|
+
# For backward compatability. Eventually this will be deprecated.
|
229
|
+
CssFilter = CSSFilter
|
230
|
+
|
data/lib/htmlfilter.rb
CHANGED
@@ -5,9 +5,8 @@
|
|
5
5
|
# for instance.
|
6
6
|
#
|
7
7
|
# HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# http://creativecommons.org/licenses/by-sa/2.5/
|
8
|
+
# licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
9
|
+
# http://creativecommons.org/licenses/by-sa/2.5/.
|
11
10
|
#
|
12
11
|
# Thanks to Jang Kim for adding support for single quoted attributes.
|
13
12
|
#
|
@@ -26,32 +25,35 @@
|
|
26
25
|
#
|
27
26
|
# == Copying
|
28
27
|
#
|
29
|
-
# Copyright (c) 2007
|
28
|
+
# Copyright (c) 2007 Thomas Sawyer
|
29
|
+
#
|
30
|
+
# Creative Commons Attribution-ShareAlike 3.0 License
|
31
|
+
#
|
32
|
+
# Ref. http://creativecommons.org/licenses/by-sa/3.0/
|
30
33
|
|
31
|
-
require 'htmlfilter/multiton.rb'
|
32
34
|
|
33
|
-
# =
|
35
|
+
# = HTMLFilter
|
34
36
|
#
|
35
37
|
# HTML Filter library can be used to sanitize and sterilize
|
36
38
|
# HTML. A good idea if you let users submit HTML in comments,
|
37
39
|
# for instance.
|
38
40
|
#
|
39
|
-
#
|
41
|
+
# == Usage
|
40
42
|
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
# Thanks to Jang Kim for adding support for single quoted attributes.
|
43
|
+
# hf = HTMLFilter.new
|
44
|
+
# hf.filter("<b>Bold Action") #=> "<b>Bold Action</b>"
|
45
45
|
#
|
46
46
|
# == Reference
|
47
47
|
#
|
48
48
|
# * http://iamcal.com/publish/articles/php/processing_html/
|
49
49
|
# * http://iamcal.com/publish/articles/php/processing_html_part_2/
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
50
|
+
#
|
51
|
+
# == Issues
|
52
|
+
#
|
53
|
+
# * The built in option constants could use a fair bit of refinement.
|
54
|
+
# * Eventually the old HtmlFilter name needs to be deprecated.
|
55
|
+
#
|
56
|
+
class HTMLFilter
|
55
57
|
|
56
58
|
# tags and attributes that are allowed
|
57
59
|
#
|
@@ -85,7 +87,7 @@ class HtmlFilter
|
|
85
87
|
# should we remove comments? (true, false)
|
86
88
|
attr_accessor :strip_comments
|
87
89
|
|
88
|
-
# should we try and make a b tag out of "b>" (true, false)
|
90
|
+
# should we try and make a <b> tag out of "b>" (true, false)
|
89
91
|
attr_accessor :always_make_tags
|
90
92
|
|
91
93
|
# entity control option (true, false)
|
@@ -94,14 +96,18 @@ class HtmlFilter
|
|
94
96
|
# entity control option (amp, gt, lt, quot, etc.)
|
95
97
|
attr_accessor :allowed_entities
|
96
98
|
|
97
|
-
|
99
|
+
## max number of text characters at which to truncate (leave as +nil+ for no truncation)
|
100
|
+
#attr_accessor :truncate
|
98
101
|
|
102
|
+
# Default settings
|
99
103
|
DEFAULT = {
|
100
104
|
'allowed' => {
|
101
105
|
'a' => ['href', 'target'],
|
106
|
+
'img' => ['src', 'width', 'height', 'alt'],
|
102
107
|
'b' => [],
|
103
108
|
'i' => [],
|
104
|
-
'
|
109
|
+
'em' => [],
|
110
|
+
'tt' => [],
|
105
111
|
},
|
106
112
|
'no_close' => ['img', 'br', 'hr'],
|
107
113
|
'always_close' => ['a', 'b'],
|
@@ -114,9 +120,75 @@ class HtmlFilter
|
|
114
120
|
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
115
121
|
}
|
116
122
|
|
117
|
-
#
|
123
|
+
# Basic settings are simlialr to DEFAULT but do not allow any type
|
124
|
+
# of links, neither <tt>a href</tt> or <tt>img</tt>.
|
125
|
+
BASIC = {
|
126
|
+
'allowed' => {
|
127
|
+
'b' => [],
|
128
|
+
'i' => [],
|
129
|
+
'em' => [],
|
130
|
+
'tt' => [],
|
131
|
+
},
|
132
|
+
'no_close' => ['img', 'br', 'hr'],
|
133
|
+
'always_close' => ['a', 'b'],
|
134
|
+
'protocol_attributes' => ['src', 'href'],
|
135
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
136
|
+
'remove_blanks' => ['a', 'b'],
|
137
|
+
'strip_comments' => true,
|
138
|
+
'always_make_tags' => true,
|
139
|
+
'allow_numbered_entities' => true,
|
140
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
141
|
+
}
|
142
|
+
|
143
|
+
# Strict settings do not allow any tags.
|
144
|
+
STRICT = {
|
145
|
+
'allowed' => {},
|
146
|
+
'no_close' => ['img', 'br', 'hr'],
|
147
|
+
'always_close' => ['a', 'b'],
|
148
|
+
'protocol_attributes' => ['src', 'href'],
|
149
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
150
|
+
'remove_blanks' => ['a', 'b'],
|
151
|
+
'strip_comments' => true,
|
152
|
+
'always_make_tags' => true,
|
153
|
+
'allow_numbered_entities' => true,
|
154
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
155
|
+
}
|
118
156
|
|
119
|
-
|
157
|
+
# Relaxed settings allows a great deal of HTML spec.
|
158
|
+
#
|
159
|
+
# TODO: Need to expand upon RELAXED options.
|
160
|
+
#
|
161
|
+
RELAXED = {
|
162
|
+
'allowed' => {
|
163
|
+
'a' => ['class', 'href', 'target'],
|
164
|
+
'b' => ['class'],
|
165
|
+
'i' => ['class'],
|
166
|
+
'img' => ['class', 'src', 'width', 'height', 'alt'],
|
167
|
+
'div' => ['class'],
|
168
|
+
'pre' => ['class'],
|
169
|
+
'code' => ['class'],
|
170
|
+
'ul' => ['class'], 'ol' => ['class'], 'li' => ['class']
|
171
|
+
},
|
172
|
+
'no_close' => ['img', 'br', 'hr'],
|
173
|
+
'always_close' => ['a', 'b'],
|
174
|
+
'protocol_attributes' => ['src', 'href'],
|
175
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
176
|
+
'remove_blanks' => ['a', 'b'],
|
177
|
+
'strip_comments' => true,
|
178
|
+
'always_make_tags' => true,
|
179
|
+
'allow_numbered_entities' => true,
|
180
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
181
|
+
}
|
182
|
+
|
183
|
+
# New html filter.
|
184
|
+
#
|
185
|
+
# Provide custom +options+, or use one of the built-in options
|
186
|
+
# constants.
|
187
|
+
#
|
188
|
+
# hf = HTMLFilter.new(HTMLFilter::RELAXED)
|
189
|
+
# hf.filter(htmlstr)
|
190
|
+
#
|
191
|
+
def initialize(options=nil)
|
120
192
|
if options
|
121
193
|
h = DEFAULT.dup
|
122
194
|
options.each do |k,v|
|
@@ -126,22 +198,20 @@ class HtmlFilter
|
|
126
198
|
else
|
127
199
|
options = DEFAULT.dup
|
128
200
|
end
|
129
|
-
|
130
201
|
options.each{ |k,v| send("#{k}=",v) }
|
131
202
|
end
|
132
203
|
|
133
204
|
# Filter html string.
|
134
205
|
|
135
|
-
def filter(
|
206
|
+
def filter(html)
|
136
207
|
@tag_counts = {}
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
return data
|
208
|
+
html = escape_comments(html)
|
209
|
+
html = balance_html(html)
|
210
|
+
html = check_tags(html)
|
211
|
+
html = process_remove_blanks(html)
|
212
|
+
html = validate_entities(html)
|
213
|
+
#html = truncate_html(html)
|
214
|
+
html
|
145
215
|
end
|
146
216
|
|
147
217
|
private
|
@@ -504,13 +574,47 @@ class HtmlFilter
|
|
504
574
|
return data
|
505
575
|
end
|
506
576
|
|
577
|
+
## HTML comment regular expression
|
578
|
+
#REM_RE = %r{<\!--(.*?)-->}
|
579
|
+
#
|
580
|
+
## HTML tag regular expression
|
581
|
+
#TAG_RE = %r{</?\w+((\s+\w+(\s*=\s*(?:"(.|\n)*?"|'(.|\n)*?'|[^'">\s]+))?)+\s*|\s*)/?>} #'
|
582
|
+
#
|
583
|
+
##
|
584
|
+
#def truncate_html(html)
|
585
|
+
# return html unless truncate
|
586
|
+
# # default settings
|
587
|
+
# limit = truncate
|
588
|
+
#
|
589
|
+
# mask = html.gsub(REM_RE){ |m| "\0" * m.size }
|
590
|
+
# mask = mask.gsub(TAG_RE){ |m| "\0" * m.size }
|
591
|
+
#
|
592
|
+
# i, x = 0, 0
|
593
|
+
#
|
594
|
+
# while i < mask.size && x < limit
|
595
|
+
# x += 1 if mask[i] != "\0"
|
596
|
+
# i += 1
|
597
|
+
# end
|
598
|
+
#
|
599
|
+
# while x > 0 && mask[x,1] == "\0"
|
600
|
+
# x -= 1
|
601
|
+
# end
|
602
|
+
#
|
603
|
+
# return html[0..x]
|
604
|
+
#end
|
605
|
+
|
507
606
|
end
|
508
607
|
|
509
608
|
# Overload the standard String class for extra convienience.
|
510
609
|
|
511
610
|
class String
|
512
611
|
def html_filter(*opts)
|
513
|
-
|
612
|
+
HTMLFilter.new(*opts).filter(self)
|
514
613
|
end
|
515
614
|
end
|
516
615
|
|
616
|
+
# For backward compatability. Eventually this will be deprecated.
|
617
|
+
HtmlFilter = HTMLFilter
|
618
|
+
|
619
|
+
|
620
|
+
|
data/meta/{project → collection}
RENAMED
File without changes
|
data/meta/contact
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rubyworks-mailinglist@googlegroups.com
|
data/meta/description
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Sanitize and sterilize HTML, also includes a CSS filter.
|
data/meta/homepage
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
http://rubyworks.github.com/htmlfilter
|
data/meta/{package → name}
RENAMED
File without changes
|
data/meta/repository
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
git://github.com/rubyworks/htmlfilter.git
|
data/meta/version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1
|
data/test/test_cssfilter.rb
CHANGED
@@ -2,7 +2,7 @@ require "test/unit"
|
|
2
2
|
require "cssfilter"
|
3
3
|
#require 'yaml'
|
4
4
|
|
5
|
-
class
|
5
|
+
class TestCSSFilter < Test::Unit::TestCase
|
6
6
|
|
7
7
|
def setup
|
8
8
|
@css = <<-END
|
@@ -26,7 +26,7 @@ class TestCssFilter < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_filter
|
29
|
-
cssfilter =
|
29
|
+
cssfilter = CSSFilter.new(:allowed_hosts=>["here.org"], :strip_whitespace => true)
|
30
30
|
csstree = cssfilter.filter(@css)
|
31
31
|
assert_equal(@result, csstree.to_s)
|
32
32
|
end
|
data/test/test_htmlfilter.rb
CHANGED
@@ -1,28 +1,12 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "htmlfilter"
|
3
3
|
|
4
|
-
class
|
4
|
+
class TestHTMLFilter < Test::Unit::TestCase
|
5
5
|
|
6
6
|
# core tests
|
7
7
|
|
8
|
-
def test_multiton_without_options
|
9
|
-
h1 = HtmlFilter.new
|
10
|
-
h2 = HtmlFilter.new
|
11
|
-
h3 = HtmlFilter.new( :strip_comments => false )
|
12
|
-
assert_equal( h1.object_id, h2.object_id )
|
13
|
-
assert_not_equal( h1.object_id, h3.object_id )
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_multiton_with_options
|
17
|
-
h1 = HtmlFilter.new( :strip_comments => false )
|
18
|
-
h2 = HtmlFilter.new( :strip_comments => false )
|
19
|
-
h3 = HtmlFilter.new
|
20
|
-
assert_equal( h1.object_id, h2.object_id )
|
21
|
-
assert_not_equal( h1.object_id, h3.object_id )
|
22
|
-
end
|
23
|
-
|
24
8
|
def test_strip_single
|
25
|
-
hf =
|
9
|
+
hf = HTMLFilter.new
|
26
10
|
assert_equal( '"', hf.send(:strip_single,'\"') )
|
27
11
|
assert_equal( "\000", hf.send(:strip_single,'\0') )
|
28
12
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: "1.1"
|
5
5
|
platform: ruby
|
6
6
|
authors: []
|
7
7
|
|
@@ -9,46 +9,48 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-11-24 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
17
|
-
email:
|
16
|
+
description: Sanitize and sterilize HTML, also includes a CSS filter.
|
17
|
+
email: rubyworks-mailinglist@googlegroups.com
|
18
18
|
executables: []
|
19
19
|
|
20
20
|
extensions: []
|
21
21
|
|
22
22
|
extra_rdoc_files:
|
23
23
|
- Rakefile
|
24
|
-
-
|
24
|
+
- MANIFEST
|
25
25
|
- TODO
|
26
26
|
- README.rdoc
|
27
|
-
-
|
27
|
+
- HISTORY
|
28
28
|
files:
|
29
29
|
- lib/cssfilter.rb
|
30
|
-
- lib/htmlfilter/multiton.rb
|
31
30
|
- lib/htmlfilter.rb
|
32
|
-
- meta/
|
33
|
-
- meta/
|
31
|
+
- meta/collection
|
32
|
+
- meta/contact
|
33
|
+
- meta/description
|
34
|
+
- meta/homepage
|
35
|
+
- meta/name
|
36
|
+
- meta/repository
|
34
37
|
- meta/title
|
35
38
|
- meta/version
|
36
39
|
- test/test_cssfilter.rb
|
37
40
|
- test/test_htmlfilter.rb
|
38
41
|
- Rakefile
|
39
|
-
- Manifest.txt
|
40
42
|
- TODO
|
41
43
|
- README.rdoc
|
42
|
-
-
|
44
|
+
- HISTORY
|
45
|
+
- MANIFEST
|
43
46
|
has_rdoc: true
|
44
|
-
homepage:
|
47
|
+
homepage: http://rubyworks.github.com/htmlfilter
|
45
48
|
licenses: []
|
46
49
|
|
47
50
|
post_install_message:
|
48
51
|
rdoc_options:
|
49
|
-
- --inline-source
|
50
52
|
- --title
|
51
|
-
-
|
53
|
+
- HTMLFilter API
|
52
54
|
require_paths:
|
53
55
|
- lib
|
54
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -69,7 +71,7 @@ rubyforge_project: htmlfilter
|
|
69
71
|
rubygems_version: 1.3.5
|
70
72
|
signing_key:
|
71
73
|
specification_version: 3
|
72
|
-
summary:
|
74
|
+
summary: Sanitize and sterilize HTML, also includes a CSS filter.
|
73
75
|
test_files:
|
74
76
|
- test/test_cssfilter.rb
|
75
77
|
- test/test_htmlfilter.rb
|
data/History.rdoc
DELETED
data/lib/htmlfilter/multiton.rb
DELETED
@@ -1,386 +0,0 @@
|
|
1
|
-
# = Multiton
|
2
|
-
#
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# Multiton design pattern ensures only one object is allocated for a given state.
|
6
|
-
#
|
7
|
-
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
8
|
-
# instance, there are several similar instances. It is useful when you want to
|
9
|
-
# avoid constructing objects many times because of some huge expense (connecting
|
10
|
-
# to a database for example), require a set of similar but not identical
|
11
|
-
# objects, and cannot easily control how many times a contructor may be called.
|
12
|
-
#
|
13
|
-
# class SomeMultitonClass
|
14
|
-
# include Multiton
|
15
|
-
# attr :arg
|
16
|
-
# def initialize(arg)
|
17
|
-
# @arg = arg
|
18
|
-
# end
|
19
|
-
# end
|
20
|
-
#
|
21
|
-
# a = SomeMultitonClass.new(4)
|
22
|
-
# b = SomeMultitonClass.new(4) # a and b are same object
|
23
|
-
# c = SomeMultitonClass.new(2) # c is a different object
|
24
|
-
#
|
25
|
-
# == Previous Behavior
|
26
|
-
#
|
27
|
-
# In previous versions of Multiton the #new method was made
|
28
|
-
# private and #instance had to be used in its stay --just like Singleton.
|
29
|
-
# But this is less desirable for Multiton since Multitions can
|
30
|
-
# have multiple instances, not just one.
|
31
|
-
#
|
32
|
-
# So instead Multiton now defines #create as a private alias of
|
33
|
-
# the original #new method (just in case it is needed) and then
|
34
|
-
# defines #new to handle the multiton; #instance is provided
|
35
|
-
# as an alias for it.
|
36
|
-
#
|
37
|
-
#--
|
38
|
-
# So if you must have the old behavior, all you need do is re-alias
|
39
|
-
# #new to #create and privatize it.
|
40
|
-
#
|
41
|
-
# class SomeMultitonClass
|
42
|
-
# include Multiton
|
43
|
-
# alias_method :new, :create
|
44
|
-
# private :new
|
45
|
-
# ...
|
46
|
-
# end
|
47
|
-
#
|
48
|
-
# Then only #instance will be available for creating the Multiton.
|
49
|
-
#++
|
50
|
-
#
|
51
|
-
# == How It Works
|
52
|
-
#
|
53
|
-
# A pool of objects is searched for a previously cached object,
|
54
|
-
# if one is not found we construct one and cache it in the pool
|
55
|
-
# based on class and the args given to the contructor.
|
56
|
-
#
|
57
|
-
# A limitation of this approach is that it is impossible to
|
58
|
-
# detect if different blocks were given to a contructor (if it takes a
|
59
|
-
# block). So it is the constructor arguments _only_ which determine
|
60
|
-
# the uniqueness of an object. To workaround this, define the _class_
|
61
|
-
# method ::multiton_id.
|
62
|
-
#
|
63
|
-
# def Klass.multiton_id(*args, &block)
|
64
|
-
# # ...
|
65
|
-
# end
|
66
|
-
#
|
67
|
-
# Which should return a hash key used to identify the object being
|
68
|
-
# constructed as (not) unique.
|
69
|
-
#
|
70
|
-
# == Authors
|
71
|
-
#
|
72
|
-
# * Christoph Rippel
|
73
|
-
# * Thomas Sawyer
|
74
|
-
#
|
75
|
-
# = Copying
|
76
|
-
#
|
77
|
-
# Copyright (c) 2007 Christoph Rippel, Thomas Sawyer
|
78
|
-
#
|
79
|
-
# Ruby License
|
80
|
-
#
|
81
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
82
|
-
# software under the same terms as Ruby.
|
83
|
-
#
|
84
|
-
# This program is distributed in the hope that it will be useful, but WITHOUT
|
85
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
86
|
-
# FOR A PARTICULAR PURPOSE.
|
87
|
-
|
88
|
-
require 'thread'
|
89
|
-
|
90
|
-
# = Multiton
|
91
|
-
#
|
92
|
-
# Multiton design pattern ensures only one object is allocated for a given state.
|
93
|
-
#
|
94
|
-
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
95
|
-
# instance, there are several similar instances. It is useful when you want to
|
96
|
-
# avoid constructing objects many times because of some huge expense (connecting
|
97
|
-
# to a database for example), require a set of similar but not identical
|
98
|
-
# objects, and cannot easily control how many times a contructor may be called.
|
99
|
-
#
|
100
|
-
# class SomeMultitonClass
|
101
|
-
# include Multiton
|
102
|
-
# attr :arg
|
103
|
-
# def initialize(arg)
|
104
|
-
# @arg = arg
|
105
|
-
# end
|
106
|
-
# end
|
107
|
-
#
|
108
|
-
# a = SomeMultitonClass.new(4)
|
109
|
-
# b = SomeMultitonClass.new(4) # a and b are same object
|
110
|
-
# c = SomeMultitonClass.new(2) # c is a different object
|
111
|
-
#
|
112
|
-
# == How It Works
|
113
|
-
#
|
114
|
-
# A pool of objects is searched for a previously cached object,
|
115
|
-
# if one is not found we construct one and cache it in the pool
|
116
|
-
# based on class and the args given to the contructor.
|
117
|
-
#
|
118
|
-
# A limitation of this approach is that it is impossible to
|
119
|
-
# detect if different blocks were given to a contructor (if it takes a
|
120
|
-
# block). So it is the constructor arguments _only_ which determine
|
121
|
-
# the uniqueness of an object. To workaround this, define the _class_
|
122
|
-
# method ::multiton_id.
|
123
|
-
#
|
124
|
-
# def Klass.multiton_id(*args, &block)
|
125
|
-
# # ...
|
126
|
-
# end
|
127
|
-
#
|
128
|
-
# Which should return a hash key used to identify the object being
|
129
|
-
# constructed as (not) unique.
|
130
|
-
|
131
|
-
module Multiton
|
132
|
-
|
133
|
-
# disable build-in copying methods
|
134
|
-
|
135
|
-
def clone
|
136
|
-
raise TypeError, "can't clone Multiton #{self}"
|
137
|
-
#self
|
138
|
-
end
|
139
|
-
|
140
|
-
def dup
|
141
|
-
raise TypeError, "can't dup Multiton #{self}"
|
142
|
-
#self
|
143
|
-
end
|
144
|
-
|
145
|
-
# default marshalling strategy
|
146
|
-
|
147
|
-
protected
|
148
|
-
|
149
|
-
def _dump(depth=-1)
|
150
|
-
Marshal.dump(@multiton_initializer)
|
151
|
-
end
|
152
|
-
|
153
|
-
# Mutex to safely store multiton instances.
|
154
|
-
|
155
|
-
class InstanceMutex < Hash #:nodoc:
|
156
|
-
def initialize
|
157
|
-
@global = Mutex.new
|
158
|
-
end
|
159
|
-
|
160
|
-
def initialized(arg)
|
161
|
-
store(arg, DummyMutex)
|
162
|
-
end
|
163
|
-
|
164
|
-
def (DummyMutex = Object.new).synchronize
|
165
|
-
yield
|
166
|
-
end
|
167
|
-
|
168
|
-
def default(arg)
|
169
|
-
@global.synchronize{ fetch(arg){ store(arg, Mutex.new) } }
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
# Multiton can be included in another module, in which case that module effectively becomes
|
174
|
-
# a multiton behavior distributor too. This is why we propogate #included to the base module.
|
175
|
-
# by putting it in another module.
|
176
|
-
#
|
177
|
-
#--
|
178
|
-
# def append_features(mod)
|
179
|
-
# # help out people counting on transitive mixins
|
180
|
-
# unless mod.instance_of?(Class)
|
181
|
-
# raise TypeError, "Inclusion of Multiton in module #{mod}"
|
182
|
-
# end
|
183
|
-
# super
|
184
|
-
# end
|
185
|
-
#++
|
186
|
-
|
187
|
-
module Inclusive
|
188
|
-
private
|
189
|
-
def included(base)
|
190
|
-
class << base
|
191
|
-
#alias_method(:new!, :new) unless method_defined?(:new!)
|
192
|
-
# gracefully handle multiple inclusions of Multiton
|
193
|
-
unless include?(Multiton::MetaMethods)
|
194
|
-
alias_method :new!, :new
|
195
|
-
private :allocate #, :new
|
196
|
-
include Multiton::MetaMethods
|
197
|
-
|
198
|
-
if method_defined?(:marshal_dump)
|
199
|
-
undef_method :marshal_dump
|
200
|
-
warn "warning: marshal_dump was undefined since it is incompatible with the Multiton pattern"
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
extend Inclusive
|
208
|
-
|
209
|
-
#
|
210
|
-
|
211
|
-
module MetaMethods
|
212
|
-
|
213
|
-
include Inclusive
|
214
|
-
|
215
|
-
def instance(*e, &b)
|
216
|
-
arg = multiton_id(*e, &b)
|
217
|
-
multiton_instance.fetch(arg) do
|
218
|
-
multiton_mutex[arg].synchronize do
|
219
|
-
multiton_instance.fetch(arg) do
|
220
|
-
val = multiton_instance[arg] = new!(*e, &b) #new(*e, &b)
|
221
|
-
val.instance_variable_set(:@multiton_initializer, e, &b)
|
222
|
-
multiton_mutex.initialized(arg)
|
223
|
-
val
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
alias_method :new, :instance
|
229
|
-
|
230
|
-
def initialized?(*e, &b)
|
231
|
-
multiton_instance.key?(multiton_id(*e, &b))
|
232
|
-
end
|
233
|
-
|
234
|
-
protected
|
235
|
-
|
236
|
-
def multiton_instance
|
237
|
-
@multiton_instance ||= Hash.new
|
238
|
-
end
|
239
|
-
|
240
|
-
def multiton_mutex
|
241
|
-
@multiton_mutex ||= InstanceMutex.new
|
242
|
-
end
|
243
|
-
|
244
|
-
def reinitialize
|
245
|
-
multiton_instance.clear
|
246
|
-
multiton_mutex.clear
|
247
|
-
end
|
248
|
-
|
249
|
-
def _load(str)
|
250
|
-
instance(*Marshal.load(str))
|
251
|
-
end
|
252
|
-
|
253
|
-
private
|
254
|
-
|
255
|
-
# Default method to to create a key to cache already constructed
|
256
|
-
# instances. In the use case MultitonClass.new(e), MultiClass.new(f)
|
257
|
-
# must be semantically equal if multiton_id(e).eql?(multiton_id(f))
|
258
|
-
# evaluates to true.
|
259
|
-
def multiton_id(*e, &b)
|
260
|
-
e
|
261
|
-
end
|
262
|
-
|
263
|
-
def singleton_method_added(sym)
|
264
|
-
super
|
265
|
-
if (sym == :marshal_dump) & singleton_methods.include?('marshal_dump')
|
266
|
-
raise TypeError, "Don't use marshal_dump - rely on _dump and _load instead"
|
267
|
-
end
|
268
|
-
end
|
269
|
-
|
270
|
-
end
|
271
|
-
|
272
|
-
end
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
=begin
|
278
|
-
# TODO Convert this into a real test and/or benchmark.
|
279
|
-
|
280
|
-
if $0 == __FILE__
|
281
|
-
|
282
|
-
### Simple marshalling test #######
|
283
|
-
class A
|
284
|
-
def initialize(a,*e)
|
285
|
-
@e = a
|
286
|
-
end
|
287
|
-
|
288
|
-
include Multiton
|
289
|
-
begin
|
290
|
-
def self.marshal_dump(depth = -1)
|
291
|
-
end
|
292
|
-
rescue => mes
|
293
|
-
p mes
|
294
|
-
class << self; undef marshal_dump end
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
C = Class.new(A.clone)
|
299
|
-
s = C.instance('a','b')
|
300
|
-
|
301
|
-
raise unless Marshal.load(Marshal.dump(s)) == s
|
302
|
-
|
303
|
-
|
304
|
-
### Interdependent initialization example and threading benchmark ###
|
305
|
-
|
306
|
-
class Regular_SymPlane
|
307
|
-
def self.multiton_id(*e)
|
308
|
-
a,b = e
|
309
|
-
(a+b - 1)*(a+b )/2 + (a > b ? a : b)
|
310
|
-
end
|
311
|
-
|
312
|
-
def initialize(a,b)
|
313
|
-
klass = self.class
|
314
|
-
if a < b
|
315
|
-
@l = b > 0 ? klass.instance(a,b-1) : nil
|
316
|
-
@r = a > 0 ? klass.instance(a-1,b) : nil
|
317
|
-
else
|
318
|
-
@l = a > 0 ? klass.instance(a-1,b) : nil
|
319
|
-
@r = b > 0 ? klass.instance(a,b-1) : nil
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
include Multiton
|
324
|
-
end
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
def nap
|
329
|
-
# Thread.pass
|
330
|
-
sleep(rand(0.01))
|
331
|
-
end
|
332
|
-
|
333
|
-
class SymPlane < Regular_SymPlane
|
334
|
-
@m = Mutex.new
|
335
|
-
@count = 0
|
336
|
-
end
|
337
|
-
|
338
|
-
class << SymPlane
|
339
|
-
attr_reader :count
|
340
|
-
def reinitialize
|
341
|
-
super
|
342
|
-
@m = Mutex.new
|
343
|
-
@count = 0
|
344
|
-
end
|
345
|
-
def inherited(sub_class)
|
346
|
-
super
|
347
|
-
sub_class.instance_eval { @m = Mutex.new; @count = 0 }
|
348
|
-
end
|
349
|
-
|
350
|
-
def multiton_id(*e)
|
351
|
-
nap()
|
352
|
-
super
|
353
|
-
end
|
354
|
-
|
355
|
-
def new!(*e) # NOTICE!!!
|
356
|
-
super
|
357
|
-
ensure
|
358
|
-
nap()
|
359
|
-
@m.synchronize { p @count if (@count += 1) % 15 == 0 }
|
360
|
-
end
|
361
|
-
|
362
|
-
def run(k)
|
363
|
-
threads = 0
|
364
|
-
max = k * (k+1) / 2
|
365
|
-
puts ""
|
366
|
-
while count() < max
|
367
|
-
Thread.new { threads+= 1; instance(rand(30),rand(30)) }
|
368
|
-
end
|
369
|
-
puts "\nThe simulation created #{threads} threads"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
|
374
|
-
require 'benchmark'
|
375
|
-
include Benchmark
|
376
|
-
|
377
|
-
bmbm do |x|
|
378
|
-
x.report('Initialize 465 SymPlane instances') { SymPlane.run(30) }
|
379
|
-
x.report('Reinitialize ') do
|
380
|
-
sleep 3
|
381
|
-
SymPlane.reinitialize
|
382
|
-
end
|
383
|
-
end
|
384
|
-
|
385
|
-
end
|
386
|
-
=end
|