htmlfilter 1.0.0 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +25 -0
- data/{Manifest.txt → MANIFEST} +7 -9
- data/README.rdoc +7 -7
- data/TODO +0 -2
- data/lib/cssfilter.rb +32 -28
- data/lib/htmlfilter.rb +136 -32
- data/meta/{project → collection} +0 -0
- data/meta/contact +1 -0
- data/meta/description +1 -0
- data/meta/homepage +1 -0
- data/meta/{package → name} +0 -0
- data/meta/repository +1 -0
- data/meta/version +1 -1
- data/test/test_cssfilter.rb +2 -2
- data/test/test_htmlfilter.rb +2 -18
- metadata +17 -15
- data/History.rdoc +0 -6
- data/lib/htmlfilter/multiton.rb +0 -386
data/HISTORY
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= RELEASE HISTORY
|
2
|
+
|
3
|
+
== 1.1 / 2009-11-24
|
4
|
+
|
5
|
+
This is release adjusts the names of the classes to
|
6
|
+
be capitialized according to the actual use of the
|
7
|
+
terms. Some alternate options presets have been added
|
8
|
+
as well, and this releaseo sheds the Multiton, which
|
9
|
+
was basically a YAGNI.
|
10
|
+
|
11
|
+
Changes:
|
12
|
+
|
13
|
+
* Renamed HtmlFilter to HTMLFilter.
|
14
|
+
* Renamed CssFilter to CSSFilter
|
15
|
+
* HTMLFilter is no longer a Multiton.
|
16
|
+
* Old names are still available temporarily.
|
17
|
+
* Added built-in option constants.
|
18
|
+
* CssTree is now CSSFilter::Tree.
|
19
|
+
|
20
|
+
== 1.0.0 / 2009-06-25
|
21
|
+
|
22
|
+
Changes:
|
23
|
+
|
24
|
+
* Birthday! (Spun-off from Ruby Facets)
|
25
|
+
|
data/{Manifest.txt → MANIFEST}
RENAMED
@@ -1,19 +1,17 @@
|
|
1
1
|
#!mast bin lib meta test [A-Z]*
|
2
|
-
lib
|
3
2
|
lib/cssfilter.rb
|
4
|
-
lib/htmlfilter
|
5
|
-
lib/htmlfilter/multiton.rb
|
6
3
|
lib/htmlfilter.rb
|
7
|
-
meta
|
8
|
-
meta/
|
9
|
-
meta/
|
4
|
+
meta/collection
|
5
|
+
meta/contact
|
6
|
+
meta/description
|
7
|
+
meta/homepage
|
8
|
+
meta/name
|
9
|
+
meta/repository
|
10
10
|
meta/title
|
11
11
|
meta/version
|
12
|
-
test
|
13
12
|
test/test_cssfilter.rb
|
14
13
|
test/test_htmlfilter.rb
|
15
14
|
Rakefile
|
16
|
-
Manifest.txt
|
17
15
|
TODO
|
18
16
|
README.rdoc
|
19
|
-
|
17
|
+
HISTORY
|
data/README.rdoc
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
= HtmlFilter
|
2
2
|
|
3
3
|
* http://rubyworks.github.com/htmlfilter
|
4
|
+
* http://github.com/rubyworks/htmlfilter
|
4
5
|
|
5
6
|
== DESCRIPTION:
|
6
7
|
|
@@ -14,8 +15,11 @@ whitespace and most importantly remove urls.
|
|
14
15
|
|
15
16
|
== FEATURES:
|
16
17
|
|
17
|
-
*
|
18
|
-
*
|
18
|
+
* Based on well-worn PHP library.
|
19
|
+
* Regular expression based filtering.
|
20
|
+
* Very efficient for small snippets, like blog comments.
|
21
|
+
* Pure-Ruby and no dependencies.
|
22
|
+
* Also has library to clean and compact cascading stylesheets.
|
19
23
|
|
20
24
|
== SYNOPSIS:
|
21
25
|
|
@@ -27,14 +31,10 @@ Via the class.
|
|
27
31
|
|
28
32
|
Or using the String extension.
|
29
33
|
|
30
|
-
html.html_filter #=> "<b>hello</b>"
|
34
|
+
html.html_filter(options) #=> "<b>hello</b>"
|
31
35
|
|
32
36
|
See RDocs for more information.
|
33
37
|
|
34
|
-
== REQUIREMENTS:
|
35
|
-
|
36
|
-
* Uses a copy of multiton.rb (included)
|
37
|
-
|
38
38
|
== INSTALL:
|
39
39
|
|
40
40
|
* sudo gem install htmlfilter
|
data/TODO
CHANGED
data/lib/cssfilter.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# = CSS Filter
|
2
2
|
#
|
3
|
-
# The
|
3
|
+
# The CSSFilter class will clean up a cascading stylesheet.
|
4
4
|
# It can be used to remove whitespace and most importantly
|
5
5
|
# remove urls.
|
6
6
|
#
|
@@ -8,25 +8,27 @@
|
|
8
8
|
#
|
9
9
|
# * Trans
|
10
10
|
#
|
11
|
-
# ==
|
11
|
+
# == Copying
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# Copyright (c) 2007 Thomas Sawyer
|
14
14
|
#
|
15
|
-
#
|
15
|
+
# Creative Commons Attribution-ShareAlike 3.0 License
|
16
16
|
#
|
17
|
-
#
|
17
|
+
# Ref. http://creativecommons.org/licenses/by-sa/3.0/
|
18
|
+
|
19
|
+
|
20
|
+
# TODO: Allow urls to be specified per attribute type.
|
18
21
|
|
19
22
|
#require 'htmlfilter/uri'
|
20
23
|
require 'uri'
|
21
24
|
|
22
25
|
# = CSS Filter
|
23
26
|
#
|
24
|
-
# The
|
27
|
+
# The CSSFilter class will clean up a cascading style sheet.
|
25
28
|
# It can be used to remove whitespace and most importantly
|
26
29
|
# remove urls.
|
27
|
-
|
28
|
-
class
|
29
|
-
VERSION="1.0.0"
|
30
|
+
|
31
|
+
class CSSFilter
|
30
32
|
|
31
33
|
# should we remove comments? (true, false)
|
32
34
|
attr_accessor :strip_comments
|
@@ -158,7 +160,7 @@ class CssFilter
|
|
158
160
|
# TODO: Not complete, does not work with "@xxx foo;" for example.
|
159
161
|
|
160
162
|
def parse(css)
|
161
|
-
tree =
|
163
|
+
tree = Tree.new
|
162
164
|
entries = css.scan(/^(.*?)\{(.*?)\}/m)
|
163
165
|
entries.each do |ref, props|
|
164
166
|
tree[ref.strip] ||= {}
|
@@ -196,31 +198,33 @@ class CssFilter
|
|
196
198
|
return val
|
197
199
|
end
|
198
200
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
# CSS parse tree. This is for a "deep filtering".
|
201
|
+
# CSS parse tree. This is for a "deep filtering".
|
203
202
|
|
204
|
-
class
|
203
|
+
class Tree < Hash
|
205
204
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
205
|
+
def initialize(options=nil)
|
206
|
+
@options = options || {}
|
207
|
+
super()
|
208
|
+
end
|
210
209
|
|
211
|
-
|
210
|
+
# Re-output the CSS, all tidy ;)
|
212
211
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
212
|
+
def to_css
|
213
|
+
css = ""
|
214
|
+
each do |selector, entries|
|
215
|
+
css << "#{selector}{"
|
216
|
+
entries.each do |key, value|
|
217
|
+
css << "#{key}:#{value};"
|
218
|
+
end
|
219
|
+
css << "}\n"
|
219
220
|
end
|
220
|
-
css
|
221
|
+
return css
|
221
222
|
end
|
222
|
-
|
223
|
+
|
223
224
|
end
|
224
225
|
|
225
226
|
end
|
226
227
|
|
228
|
+
# For backward compatability. Eventually this will be deprecated.
|
229
|
+
CssFilter = CSSFilter
|
230
|
+
|
data/lib/htmlfilter.rb
CHANGED
@@ -5,9 +5,8 @@
|
|
5
5
|
# for instance.
|
6
6
|
#
|
7
7
|
# HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# http://creativecommons.org/licenses/by-sa/2.5/
|
8
|
+
# licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
9
|
+
# http://creativecommons.org/licenses/by-sa/2.5/.
|
11
10
|
#
|
12
11
|
# Thanks to Jang Kim for adding support for single quoted attributes.
|
13
12
|
#
|
@@ -26,32 +25,35 @@
|
|
26
25
|
#
|
27
26
|
# == Copying
|
28
27
|
#
|
29
|
-
# Copyright (c) 2007
|
28
|
+
# Copyright (c) 2007 Thomas Sawyer
|
29
|
+
#
|
30
|
+
# Creative Commons Attribution-ShareAlike 3.0 License
|
31
|
+
#
|
32
|
+
# Ref. http://creativecommons.org/licenses/by-sa/3.0/
|
30
33
|
|
31
|
-
require 'htmlfilter/multiton.rb'
|
32
34
|
|
33
|
-
# =
|
35
|
+
# = HTMLFilter
|
34
36
|
#
|
35
37
|
# HTML Filter library can be used to sanitize and sterilize
|
36
38
|
# HTML. A good idea if you let users submit HTML in comments,
|
37
39
|
# for instance.
|
38
40
|
#
|
39
|
-
#
|
41
|
+
# == Usage
|
40
42
|
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
# Thanks to Jang Kim for adding support for single quoted attributes.
|
43
|
+
# hf = HTMLFilter.new
|
44
|
+
# hf.filter("<b>Bold Action") #=> "<b>Bold Action</b>"
|
45
45
|
#
|
46
46
|
# == Reference
|
47
47
|
#
|
48
48
|
# * http://iamcal.com/publish/articles/php/processing_html/
|
49
49
|
# * http://iamcal.com/publish/articles/php/processing_html_part_2/
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
50
|
+
#
|
51
|
+
# == Issues
|
52
|
+
#
|
53
|
+
# * The built in option constants could use a fair bit of refinement.
|
54
|
+
# * Eventually the old HtmlFilter name needs to be deprecated.
|
55
|
+
#
|
56
|
+
class HTMLFilter
|
55
57
|
|
56
58
|
# tags and attributes that are allowed
|
57
59
|
#
|
@@ -85,7 +87,7 @@ class HtmlFilter
|
|
85
87
|
# should we remove comments? (true, false)
|
86
88
|
attr_accessor :strip_comments
|
87
89
|
|
88
|
-
# should we try and make a b tag out of "b>" (true, false)
|
90
|
+
# should we try and make a <b> tag out of "b>" (true, false)
|
89
91
|
attr_accessor :always_make_tags
|
90
92
|
|
91
93
|
# entity control option (true, false)
|
@@ -94,14 +96,18 @@ class HtmlFilter
|
|
94
96
|
# entity control option (amp, gt, lt, quot, etc.)
|
95
97
|
attr_accessor :allowed_entities
|
96
98
|
|
97
|
-
|
99
|
+
## max number of text characters at which to truncate (leave as +nil+ for no truncation)
|
100
|
+
#attr_accessor :truncate
|
98
101
|
|
102
|
+
# Default settings
|
99
103
|
DEFAULT = {
|
100
104
|
'allowed' => {
|
101
105
|
'a' => ['href', 'target'],
|
106
|
+
'img' => ['src', 'width', 'height', 'alt'],
|
102
107
|
'b' => [],
|
103
108
|
'i' => [],
|
104
|
-
'
|
109
|
+
'em' => [],
|
110
|
+
'tt' => [],
|
105
111
|
},
|
106
112
|
'no_close' => ['img', 'br', 'hr'],
|
107
113
|
'always_close' => ['a', 'b'],
|
@@ -114,9 +120,75 @@ class HtmlFilter
|
|
114
120
|
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
115
121
|
}
|
116
122
|
|
117
|
-
#
|
123
|
+
# Basic settings are simlialr to DEFAULT but do not allow any type
|
124
|
+
# of links, neither <tt>a href</tt> or <tt>img</tt>.
|
125
|
+
BASIC = {
|
126
|
+
'allowed' => {
|
127
|
+
'b' => [],
|
128
|
+
'i' => [],
|
129
|
+
'em' => [],
|
130
|
+
'tt' => [],
|
131
|
+
},
|
132
|
+
'no_close' => ['img', 'br', 'hr'],
|
133
|
+
'always_close' => ['a', 'b'],
|
134
|
+
'protocol_attributes' => ['src', 'href'],
|
135
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
136
|
+
'remove_blanks' => ['a', 'b'],
|
137
|
+
'strip_comments' => true,
|
138
|
+
'always_make_tags' => true,
|
139
|
+
'allow_numbered_entities' => true,
|
140
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
141
|
+
}
|
142
|
+
|
143
|
+
# Strict settings do not allow any tags.
|
144
|
+
STRICT = {
|
145
|
+
'allowed' => {},
|
146
|
+
'no_close' => ['img', 'br', 'hr'],
|
147
|
+
'always_close' => ['a', 'b'],
|
148
|
+
'protocol_attributes' => ['src', 'href'],
|
149
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
150
|
+
'remove_blanks' => ['a', 'b'],
|
151
|
+
'strip_comments' => true,
|
152
|
+
'always_make_tags' => true,
|
153
|
+
'allow_numbered_entities' => true,
|
154
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
155
|
+
}
|
118
156
|
|
119
|
-
|
157
|
+
# Relaxed settings allows a great deal of HTML spec.
|
158
|
+
#
|
159
|
+
# TODO: Need to expand upon RELAXED options.
|
160
|
+
#
|
161
|
+
RELAXED = {
|
162
|
+
'allowed' => {
|
163
|
+
'a' => ['class', 'href', 'target'],
|
164
|
+
'b' => ['class'],
|
165
|
+
'i' => ['class'],
|
166
|
+
'img' => ['class', 'src', 'width', 'height', 'alt'],
|
167
|
+
'div' => ['class'],
|
168
|
+
'pre' => ['class'],
|
169
|
+
'code' => ['class'],
|
170
|
+
'ul' => ['class'], 'ol' => ['class'], 'li' => ['class']
|
171
|
+
},
|
172
|
+
'no_close' => ['img', 'br', 'hr'],
|
173
|
+
'always_close' => ['a', 'b'],
|
174
|
+
'protocol_attributes' => ['src', 'href'],
|
175
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto'],
|
176
|
+
'remove_blanks' => ['a', 'b'],
|
177
|
+
'strip_comments' => true,
|
178
|
+
'always_make_tags' => true,
|
179
|
+
'allow_numbered_entities' => true,
|
180
|
+
'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
|
181
|
+
}
|
182
|
+
|
183
|
+
# New html filter.
|
184
|
+
#
|
185
|
+
# Provide custom +options+, or use one of the built-in options
|
186
|
+
# constants.
|
187
|
+
#
|
188
|
+
# hf = HTMLFilter.new(HTMLFilter::RELAXED)
|
189
|
+
# hf.filter(htmlstr)
|
190
|
+
#
|
191
|
+
def initialize(options=nil)
|
120
192
|
if options
|
121
193
|
h = DEFAULT.dup
|
122
194
|
options.each do |k,v|
|
@@ -126,22 +198,20 @@ class HtmlFilter
|
|
126
198
|
else
|
127
199
|
options = DEFAULT.dup
|
128
200
|
end
|
129
|
-
|
130
201
|
options.each{ |k,v| send("#{k}=",v) }
|
131
202
|
end
|
132
203
|
|
133
204
|
# Filter html string.
|
134
205
|
|
135
|
-
def filter(
|
206
|
+
def filter(html)
|
136
207
|
@tag_counts = {}
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
return data
|
208
|
+
html = escape_comments(html)
|
209
|
+
html = balance_html(html)
|
210
|
+
html = check_tags(html)
|
211
|
+
html = process_remove_blanks(html)
|
212
|
+
html = validate_entities(html)
|
213
|
+
#html = truncate_html(html)
|
214
|
+
html
|
145
215
|
end
|
146
216
|
|
147
217
|
private
|
@@ -504,13 +574,47 @@ class HtmlFilter
|
|
504
574
|
return data
|
505
575
|
end
|
506
576
|
|
577
|
+
## HTML comment regular expression
|
578
|
+
#REM_RE = %r{<\!--(.*?)-->}
|
579
|
+
#
|
580
|
+
## HTML tag regular expression
|
581
|
+
#TAG_RE = %r{</?\w+((\s+\w+(\s*=\s*(?:"(.|\n)*?"|'(.|\n)*?'|[^'">\s]+))?)+\s*|\s*)/?>} #'
|
582
|
+
#
|
583
|
+
##
|
584
|
+
#def truncate_html(html)
|
585
|
+
# return html unless truncate
|
586
|
+
# # default settings
|
587
|
+
# limit = truncate
|
588
|
+
#
|
589
|
+
# mask = html.gsub(REM_RE){ |m| "\0" * m.size }
|
590
|
+
# mask = mask.gsub(TAG_RE){ |m| "\0" * m.size }
|
591
|
+
#
|
592
|
+
# i, x = 0, 0
|
593
|
+
#
|
594
|
+
# while i < mask.size && x < limit
|
595
|
+
# x += 1 if mask[i] != "\0"
|
596
|
+
# i += 1
|
597
|
+
# end
|
598
|
+
#
|
599
|
+
# while x > 0 && mask[x,1] == "\0"
|
600
|
+
# x -= 1
|
601
|
+
# end
|
602
|
+
#
|
603
|
+
# return html[0..x]
|
604
|
+
#end
|
605
|
+
|
507
606
|
end
|
508
607
|
|
509
608
|
# Overload the standard String class for extra convienience.
|
510
609
|
|
511
610
|
class String
|
512
611
|
def html_filter(*opts)
|
513
|
-
|
612
|
+
HTMLFilter.new(*opts).filter(self)
|
514
613
|
end
|
515
614
|
end
|
516
615
|
|
616
|
+
# For backward compatability. Eventually this will be deprecated.
|
617
|
+
HtmlFilter = HTMLFilter
|
618
|
+
|
619
|
+
|
620
|
+
|
data/meta/{project → collection}
RENAMED
File without changes
|
data/meta/contact
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rubyworks-mailinglist@googlegroups.com
|
data/meta/description
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Sanitize and sterilize HTML, also includes a CSS filter.
|
data/meta/homepage
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
http://rubyworks.github.com/htmlfilter
|
data/meta/{package → name}
RENAMED
File without changes
|
data/meta/repository
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
git://github.com/rubyworks/htmlfilter.git
|
data/meta/version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1
|
data/test/test_cssfilter.rb
CHANGED
@@ -2,7 +2,7 @@ require "test/unit"
|
|
2
2
|
require "cssfilter"
|
3
3
|
#require 'yaml'
|
4
4
|
|
5
|
-
class
|
5
|
+
class TestCSSFilter < Test::Unit::TestCase
|
6
6
|
|
7
7
|
def setup
|
8
8
|
@css = <<-END
|
@@ -26,7 +26,7 @@ class TestCssFilter < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_filter
|
29
|
-
cssfilter =
|
29
|
+
cssfilter = CSSFilter.new(:allowed_hosts=>["here.org"], :strip_whitespace => true)
|
30
30
|
csstree = cssfilter.filter(@css)
|
31
31
|
assert_equal(@result, csstree.to_s)
|
32
32
|
end
|
data/test/test_htmlfilter.rb
CHANGED
@@ -1,28 +1,12 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "htmlfilter"
|
3
3
|
|
4
|
-
class
|
4
|
+
class TestHTMLFilter < Test::Unit::TestCase
|
5
5
|
|
6
6
|
# core tests
|
7
7
|
|
8
|
-
def test_multiton_without_options
|
9
|
-
h1 = HtmlFilter.new
|
10
|
-
h2 = HtmlFilter.new
|
11
|
-
h3 = HtmlFilter.new( :strip_comments => false )
|
12
|
-
assert_equal( h1.object_id, h2.object_id )
|
13
|
-
assert_not_equal( h1.object_id, h3.object_id )
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_multiton_with_options
|
17
|
-
h1 = HtmlFilter.new( :strip_comments => false )
|
18
|
-
h2 = HtmlFilter.new( :strip_comments => false )
|
19
|
-
h3 = HtmlFilter.new
|
20
|
-
assert_equal( h1.object_id, h2.object_id )
|
21
|
-
assert_not_equal( h1.object_id, h3.object_id )
|
22
|
-
end
|
23
|
-
|
24
8
|
def test_strip_single
|
25
|
-
hf =
|
9
|
+
hf = HTMLFilter.new
|
26
10
|
assert_equal( '"', hf.send(:strip_single,'\"') )
|
27
11
|
assert_equal( "\000", hf.send(:strip_single,'\0') )
|
28
12
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: "1.1"
|
5
5
|
platform: ruby
|
6
6
|
authors: []
|
7
7
|
|
@@ -9,46 +9,48 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-11-24 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
17
|
-
email:
|
16
|
+
description: Sanitize and sterilize HTML, also includes a CSS filter.
|
17
|
+
email: rubyworks-mailinglist@googlegroups.com
|
18
18
|
executables: []
|
19
19
|
|
20
20
|
extensions: []
|
21
21
|
|
22
22
|
extra_rdoc_files:
|
23
23
|
- Rakefile
|
24
|
-
-
|
24
|
+
- MANIFEST
|
25
25
|
- TODO
|
26
26
|
- README.rdoc
|
27
|
-
-
|
27
|
+
- HISTORY
|
28
28
|
files:
|
29
29
|
- lib/cssfilter.rb
|
30
|
-
- lib/htmlfilter/multiton.rb
|
31
30
|
- lib/htmlfilter.rb
|
32
|
-
- meta/
|
33
|
-
- meta/
|
31
|
+
- meta/collection
|
32
|
+
- meta/contact
|
33
|
+
- meta/description
|
34
|
+
- meta/homepage
|
35
|
+
- meta/name
|
36
|
+
- meta/repository
|
34
37
|
- meta/title
|
35
38
|
- meta/version
|
36
39
|
- test/test_cssfilter.rb
|
37
40
|
- test/test_htmlfilter.rb
|
38
41
|
- Rakefile
|
39
|
-
- Manifest.txt
|
40
42
|
- TODO
|
41
43
|
- README.rdoc
|
42
|
-
-
|
44
|
+
- HISTORY
|
45
|
+
- MANIFEST
|
43
46
|
has_rdoc: true
|
44
|
-
homepage:
|
47
|
+
homepage: http://rubyworks.github.com/htmlfilter
|
45
48
|
licenses: []
|
46
49
|
|
47
50
|
post_install_message:
|
48
51
|
rdoc_options:
|
49
|
-
- --inline-source
|
50
52
|
- --title
|
51
|
-
-
|
53
|
+
- HTMLFilter API
|
52
54
|
require_paths:
|
53
55
|
- lib
|
54
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -69,7 +71,7 @@ rubyforge_project: htmlfilter
|
|
69
71
|
rubygems_version: 1.3.5
|
70
72
|
signing_key:
|
71
73
|
specification_version: 3
|
72
|
-
summary:
|
74
|
+
summary: Sanitize and sterilize HTML, also includes a CSS filter.
|
73
75
|
test_files:
|
74
76
|
- test/test_cssfilter.rb
|
75
77
|
- test/test_htmlfilter.rb
|
data/History.rdoc
DELETED
data/lib/htmlfilter/multiton.rb
DELETED
@@ -1,386 +0,0 @@
|
|
1
|
-
# = Multiton
|
2
|
-
#
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# Multiton design pattern ensures only one object is allocated for a given state.
|
6
|
-
#
|
7
|
-
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
8
|
-
# instance, there are several similar instances. It is useful when you want to
|
9
|
-
# avoid constructing objects many times because of some huge expense (connecting
|
10
|
-
# to a database for example), require a set of similar but not identical
|
11
|
-
# objects, and cannot easily control how many times a contructor may be called.
|
12
|
-
#
|
13
|
-
# class SomeMultitonClass
|
14
|
-
# include Multiton
|
15
|
-
# attr :arg
|
16
|
-
# def initialize(arg)
|
17
|
-
# @arg = arg
|
18
|
-
# end
|
19
|
-
# end
|
20
|
-
#
|
21
|
-
# a = SomeMultitonClass.new(4)
|
22
|
-
# b = SomeMultitonClass.new(4) # a and b are same object
|
23
|
-
# c = SomeMultitonClass.new(2) # c is a different object
|
24
|
-
#
|
25
|
-
# == Previous Behavior
|
26
|
-
#
|
27
|
-
# In previous versions of Multiton the #new method was made
|
28
|
-
# private and #instance had to be used in its stay --just like Singleton.
|
29
|
-
# But this is less desirable for Multiton since Multitions can
|
30
|
-
# have multiple instances, not just one.
|
31
|
-
#
|
32
|
-
# So instead Multiton now defines #create as a private alias of
|
33
|
-
# the original #new method (just in case it is needed) and then
|
34
|
-
# defines #new to handle the multiton; #instance is provided
|
35
|
-
# as an alias for it.
|
36
|
-
#
|
37
|
-
#--
|
38
|
-
# So if you must have the old behavior, all you need do is re-alias
|
39
|
-
# #new to #create and privatize it.
|
40
|
-
#
|
41
|
-
# class SomeMultitonClass
|
42
|
-
# include Multiton
|
43
|
-
# alias_method :new, :create
|
44
|
-
# private :new
|
45
|
-
# ...
|
46
|
-
# end
|
47
|
-
#
|
48
|
-
# Then only #instance will be available for creating the Multiton.
|
49
|
-
#++
|
50
|
-
#
|
51
|
-
# == How It Works
|
52
|
-
#
|
53
|
-
# A pool of objects is searched for a previously cached object,
|
54
|
-
# if one is not found we construct one and cache it in the pool
|
55
|
-
# based on class and the args given to the contructor.
|
56
|
-
#
|
57
|
-
# A limitation of this approach is that it is impossible to
|
58
|
-
# detect if different blocks were given to a contructor (if it takes a
|
59
|
-
# block). So it is the constructor arguments _only_ which determine
|
60
|
-
# the uniqueness of an object. To workaround this, define the _class_
|
61
|
-
# method ::multiton_id.
|
62
|
-
#
|
63
|
-
# def Klass.multiton_id(*args, &block)
|
64
|
-
# # ...
|
65
|
-
# end
|
66
|
-
#
|
67
|
-
# Which should return a hash key used to identify the object being
|
68
|
-
# constructed as (not) unique.
|
69
|
-
#
|
70
|
-
# == Authors
|
71
|
-
#
|
72
|
-
# * Christoph Rippel
|
73
|
-
# * Thomas Sawyer
|
74
|
-
#
|
75
|
-
# = Copying
|
76
|
-
#
|
77
|
-
# Copyright (c) 2007 Christoph Rippel, Thomas Sawyer
|
78
|
-
#
|
79
|
-
# Ruby License
|
80
|
-
#
|
81
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
82
|
-
# software under the same terms as Ruby.
|
83
|
-
#
|
84
|
-
# This program is distributed in the hope that it will be useful, but WITHOUT
|
85
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
86
|
-
# FOR A PARTICULAR PURPOSE.
|
87
|
-
|
88
|
-
require 'thread'
|
89
|
-
|
90
|
-
# = Multiton
|
91
|
-
#
|
92
|
-
# Multiton design pattern ensures only one object is allocated for a given state.
|
93
|
-
#
|
94
|
-
# The 'multiton' pattern is similar to a singleton, but instead of only one
|
95
|
-
# instance, there are several similar instances. It is useful when you want to
|
96
|
-
# avoid constructing objects many times because of some huge expense (connecting
|
97
|
-
# to a database for example), require a set of similar but not identical
|
98
|
-
# objects, and cannot easily control how many times a contructor may be called.
|
99
|
-
#
|
100
|
-
# class SomeMultitonClass
|
101
|
-
# include Multiton
|
102
|
-
# attr :arg
|
103
|
-
# def initialize(arg)
|
104
|
-
# @arg = arg
|
105
|
-
# end
|
106
|
-
# end
|
107
|
-
#
|
108
|
-
# a = SomeMultitonClass.new(4)
|
109
|
-
# b = SomeMultitonClass.new(4) # a and b are same object
|
110
|
-
# c = SomeMultitonClass.new(2) # c is a different object
|
111
|
-
#
|
112
|
-
# == How It Works
|
113
|
-
#
|
114
|
-
# A pool of objects is searched for a previously cached object,
|
115
|
-
# if one is not found we construct one and cache it in the pool
|
116
|
-
# based on class and the args given to the contructor.
|
117
|
-
#
|
118
|
-
# A limitation of this approach is that it is impossible to
|
119
|
-
# detect if different blocks were given to a contructor (if it takes a
|
120
|
-
# block). So it is the constructor arguments _only_ which determine
|
121
|
-
# the uniqueness of an object. To workaround this, define the _class_
|
122
|
-
# method ::multiton_id.
|
123
|
-
#
|
124
|
-
# def Klass.multiton_id(*args, &block)
|
125
|
-
# # ...
|
126
|
-
# end
|
127
|
-
#
|
128
|
-
# Which should return a hash key used to identify the object being
|
129
|
-
# constructed as (not) unique.
|
130
|
-
|
131
|
-
module Multiton
|
132
|
-
|
133
|
-
# disable build-in copying methods
|
134
|
-
|
135
|
-
def clone
|
136
|
-
raise TypeError, "can't clone Multiton #{self}"
|
137
|
-
#self
|
138
|
-
end
|
139
|
-
|
140
|
-
def dup
|
141
|
-
raise TypeError, "can't dup Multiton #{self}"
|
142
|
-
#self
|
143
|
-
end
|
144
|
-
|
145
|
-
# default marshalling strategy
|
146
|
-
|
147
|
-
protected
|
148
|
-
|
149
|
-
def _dump(depth=-1)
|
150
|
-
Marshal.dump(@multiton_initializer)
|
151
|
-
end
|
152
|
-
|
153
|
-
# Mutex to safely store multiton instances.
|
154
|
-
|
155
|
-
class InstanceMutex < Hash #:nodoc:
|
156
|
-
def initialize
|
157
|
-
@global = Mutex.new
|
158
|
-
end
|
159
|
-
|
160
|
-
def initialized(arg)
|
161
|
-
store(arg, DummyMutex)
|
162
|
-
end
|
163
|
-
|
164
|
-
def (DummyMutex = Object.new).synchronize
|
165
|
-
yield
|
166
|
-
end
|
167
|
-
|
168
|
-
def default(arg)
|
169
|
-
@global.synchronize{ fetch(arg){ store(arg, Mutex.new) } }
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
# Multiton can be included in another module, in which case that module effectively becomes
|
174
|
-
# a multiton behavior distributor too. This is why we propogate #included to the base module.
|
175
|
-
# by putting it in another module.
|
176
|
-
#
|
177
|
-
#--
|
178
|
-
# def append_features(mod)
|
179
|
-
# # help out people counting on transitive mixins
|
180
|
-
# unless mod.instance_of?(Class)
|
181
|
-
# raise TypeError, "Inclusion of Multiton in module #{mod}"
|
182
|
-
# end
|
183
|
-
# super
|
184
|
-
# end
|
185
|
-
#++
|
186
|
-
|
187
|
-
module Inclusive
|
188
|
-
private
|
189
|
-
def included(base)
|
190
|
-
class << base
|
191
|
-
#alias_method(:new!, :new) unless method_defined?(:new!)
|
192
|
-
# gracefully handle multiple inclusions of Multiton
|
193
|
-
unless include?(Multiton::MetaMethods)
|
194
|
-
alias_method :new!, :new
|
195
|
-
private :allocate #, :new
|
196
|
-
include Multiton::MetaMethods
|
197
|
-
|
198
|
-
if method_defined?(:marshal_dump)
|
199
|
-
undef_method :marshal_dump
|
200
|
-
warn "warning: marshal_dump was undefined since it is incompatible with the Multiton pattern"
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
extend Inclusive
|
208
|
-
|
209
|
-
#
|
210
|
-
|
211
|
-
module MetaMethods
|
212
|
-
|
213
|
-
include Inclusive
|
214
|
-
|
215
|
-
def instance(*e, &b)
|
216
|
-
arg = multiton_id(*e, &b)
|
217
|
-
multiton_instance.fetch(arg) do
|
218
|
-
multiton_mutex[arg].synchronize do
|
219
|
-
multiton_instance.fetch(arg) do
|
220
|
-
val = multiton_instance[arg] = new!(*e, &b) #new(*e, &b)
|
221
|
-
val.instance_variable_set(:@multiton_initializer, e, &b)
|
222
|
-
multiton_mutex.initialized(arg)
|
223
|
-
val
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
alias_method :new, :instance
|
229
|
-
|
230
|
-
def initialized?(*e, &b)
|
231
|
-
multiton_instance.key?(multiton_id(*e, &b))
|
232
|
-
end
|
233
|
-
|
234
|
-
protected
|
235
|
-
|
236
|
-
def multiton_instance
|
237
|
-
@multiton_instance ||= Hash.new
|
238
|
-
end
|
239
|
-
|
240
|
-
def multiton_mutex
|
241
|
-
@multiton_mutex ||= InstanceMutex.new
|
242
|
-
end
|
243
|
-
|
244
|
-
def reinitialize
|
245
|
-
multiton_instance.clear
|
246
|
-
multiton_mutex.clear
|
247
|
-
end
|
248
|
-
|
249
|
-
def _load(str)
|
250
|
-
instance(*Marshal.load(str))
|
251
|
-
end
|
252
|
-
|
253
|
-
private
|
254
|
-
|
255
|
-
# Default method to to create a key to cache already constructed
|
256
|
-
# instances. In the use case MultitonClass.new(e), MultiClass.new(f)
|
257
|
-
# must be semantically equal if multiton_id(e).eql?(multiton_id(f))
|
258
|
-
# evaluates to true.
|
259
|
-
def multiton_id(*e, &b)
|
260
|
-
e
|
261
|
-
end
|
262
|
-
|
263
|
-
def singleton_method_added(sym)
|
264
|
-
super
|
265
|
-
if (sym == :marshal_dump) & singleton_methods.include?('marshal_dump')
|
266
|
-
raise TypeError, "Don't use marshal_dump - rely on _dump and _load instead"
|
267
|
-
end
|
268
|
-
end
|
269
|
-
|
270
|
-
end
|
271
|
-
|
272
|
-
end
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
=begin
|
278
|
-
# TODO Convert this into a real test and/or benchmark.
|
279
|
-
|
280
|
-
if $0 == __FILE__
|
281
|
-
|
282
|
-
### Simple marshalling test #######
|
283
|
-
class A
|
284
|
-
def initialize(a,*e)
|
285
|
-
@e = a
|
286
|
-
end
|
287
|
-
|
288
|
-
include Multiton
|
289
|
-
begin
|
290
|
-
def self.marshal_dump(depth = -1)
|
291
|
-
end
|
292
|
-
rescue => mes
|
293
|
-
p mes
|
294
|
-
class << self; undef marshal_dump end
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
C = Class.new(A.clone)
|
299
|
-
s = C.instance('a','b')
|
300
|
-
|
301
|
-
raise unless Marshal.load(Marshal.dump(s)) == s
|
302
|
-
|
303
|
-
|
304
|
-
### Interdependent initialization example and threading benchmark ###
|
305
|
-
|
306
|
-
class Regular_SymPlane
|
307
|
-
def self.multiton_id(*e)
|
308
|
-
a,b = e
|
309
|
-
(a+b - 1)*(a+b )/2 + (a > b ? a : b)
|
310
|
-
end
|
311
|
-
|
312
|
-
def initialize(a,b)
|
313
|
-
klass = self.class
|
314
|
-
if a < b
|
315
|
-
@l = b > 0 ? klass.instance(a,b-1) : nil
|
316
|
-
@r = a > 0 ? klass.instance(a-1,b) : nil
|
317
|
-
else
|
318
|
-
@l = a > 0 ? klass.instance(a-1,b) : nil
|
319
|
-
@r = b > 0 ? klass.instance(a,b-1) : nil
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
include Multiton
|
324
|
-
end
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
def nap
|
329
|
-
# Thread.pass
|
330
|
-
sleep(rand(0.01))
|
331
|
-
end
|
332
|
-
|
333
|
-
class SymPlane < Regular_SymPlane
|
334
|
-
@m = Mutex.new
|
335
|
-
@count = 0
|
336
|
-
end
|
337
|
-
|
338
|
-
class << SymPlane
|
339
|
-
attr_reader :count
|
340
|
-
def reinitialize
|
341
|
-
super
|
342
|
-
@m = Mutex.new
|
343
|
-
@count = 0
|
344
|
-
end
|
345
|
-
def inherited(sub_class)
|
346
|
-
super
|
347
|
-
sub_class.instance_eval { @m = Mutex.new; @count = 0 }
|
348
|
-
end
|
349
|
-
|
350
|
-
def multiton_id(*e)
|
351
|
-
nap()
|
352
|
-
super
|
353
|
-
end
|
354
|
-
|
355
|
-
def new!(*e) # NOTICE!!!
|
356
|
-
super
|
357
|
-
ensure
|
358
|
-
nap()
|
359
|
-
@m.synchronize { p @count if (@count += 1) % 15 == 0 }
|
360
|
-
end
|
361
|
-
|
362
|
-
def run(k)
|
363
|
-
threads = 0
|
364
|
-
max = k * (k+1) / 2
|
365
|
-
puts ""
|
366
|
-
while count() < max
|
367
|
-
Thread.new { threads+= 1; instance(rand(30),rand(30)) }
|
368
|
-
end
|
369
|
-
puts "\nThe simulation created #{threads} threads"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
|
374
|
-
require 'benchmark'
|
375
|
-
include Benchmark
|
376
|
-
|
377
|
-
bmbm do |x|
|
378
|
-
x.report('Initialize 465 SymPlane instances') { SymPlane.run(30) }
|
379
|
-
x.report('Reinitialize ') do
|
380
|
-
sleep 3
|
381
|
-
SymPlane.reinitialize
|
382
|
-
end
|
383
|
-
end
|
384
|
-
|
385
|
-
end
|
386
|
-
=end
|