htmlfilter 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.index +55 -0
- data/.ruby +1 -47
- data/{HISTORY.rdoc → HISTORY.md} +20 -5
- data/LICENSE.txt +22 -0
- data/NOTICE.md +11 -0
- data/{README.rdoc → README.md} +21 -22
- data/bin/cssfilter +3 -0
- data/bin/htmlfilter +3 -0
- data/lib/cssfilter.rb +52 -1
- data/lib/htmlfilter.rb +139 -22
- metadata +42 -17
- data/COPYING.rdoc +0 -41
data/.index
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
---
|
2
|
+
type: ruby
|
3
|
+
revision: 2013
|
4
|
+
sources:
|
5
|
+
- var
|
6
|
+
authors:
|
7
|
+
- name: Thomas Sawyer
|
8
|
+
email: transfire@gmail.com
|
9
|
+
organizations: []
|
10
|
+
requirements:
|
11
|
+
- groups:
|
12
|
+
- build
|
13
|
+
development: true
|
14
|
+
name: detroit
|
15
|
+
- groups:
|
16
|
+
- test
|
17
|
+
development: true
|
18
|
+
name: microtest
|
19
|
+
- groups:
|
20
|
+
- test
|
21
|
+
development: true
|
22
|
+
name: ae
|
23
|
+
conflicts: []
|
24
|
+
alternatives: []
|
25
|
+
resources:
|
26
|
+
- type: home
|
27
|
+
uri: http://rubyworks.github.com/htmlfilter
|
28
|
+
label: Homepage
|
29
|
+
- type: docs
|
30
|
+
uri: http://rubydoc.info/gems/htmlfilter
|
31
|
+
label: Documentation
|
32
|
+
- type: code
|
33
|
+
uri: http://github.com/rubyworks/htmlfilter
|
34
|
+
label: Source Code
|
35
|
+
- type: mail
|
36
|
+
uri: http://groups.google.com/group/rubyworks-mailinglist
|
37
|
+
label: Mailing List
|
38
|
+
repositories:
|
39
|
+
- name: upstream
|
40
|
+
scm: git
|
41
|
+
uri: git://github.com/rubyworks/htmlfilter.git
|
42
|
+
categories: []
|
43
|
+
load_path:
|
44
|
+
- lib
|
45
|
+
copyrights:
|
46
|
+
- holder: Thomas Sawyer, Rubyworks
|
47
|
+
year: '2009'
|
48
|
+
license: BSD-2-Clause
|
49
|
+
created: '2009-06-25'
|
50
|
+
summary: HTML/CSS Sanity
|
51
|
+
title: HTMLFilter
|
52
|
+
version: 1.3.0
|
53
|
+
name: htmlfilter
|
54
|
+
description: Sanitize and sterilize HTML, also includes a CSS filter.
|
55
|
+
date: '2012-12-13'
|
data/.ruby
CHANGED
@@ -1,47 +1 @@
|
|
1
|
-
|
2
|
-
source:
|
3
|
-
- meta
|
4
|
-
authors:
|
5
|
-
- name: Thomas Sawyer
|
6
|
-
email: transfire@gmail.com
|
7
|
-
copyrights:
|
8
|
-
- holder: Thomas Sawyer, Rubyworks
|
9
|
-
year: '2009'
|
10
|
-
license: BSD-2-Clause
|
11
|
-
replacements: []
|
12
|
-
alternatives: []
|
13
|
-
requirements:
|
14
|
-
- name: detroit
|
15
|
-
groups:
|
16
|
-
- build
|
17
|
-
development: true
|
18
|
-
- name: microtest
|
19
|
-
groups:
|
20
|
-
- test
|
21
|
-
development: true
|
22
|
-
- name: ae
|
23
|
-
groups:
|
24
|
-
- test
|
25
|
-
development: true
|
26
|
-
dependencies: []
|
27
|
-
conflicts: []
|
28
|
-
repositories:
|
29
|
-
- uri: git://github.com/rubyworks/htmlfilter.git
|
30
|
-
scm: git
|
31
|
-
name: upstream
|
32
|
-
resources:
|
33
|
-
home: http://rubyworks.github.com/htmlfilter
|
34
|
-
docs: http://rubydoc.info/gems/htmlfilter
|
35
|
-
code: http://github.com/rubyworks/htmlfilter
|
36
|
-
mail: http://groups.google.com/group/rubyworks-mailinglist
|
37
|
-
extra: {}
|
38
|
-
load_path:
|
39
|
-
- lib
|
40
|
-
revision: 0
|
41
|
-
created: '2009-06-25'
|
42
|
-
summary: HTML/CSS Sanity
|
43
|
-
title: HTMLFilter
|
44
|
-
version: 1.2.1
|
45
|
-
name: htmlfilter
|
46
|
-
description: Sanitize and sterilize HTML, also includes a CSS filter.
|
47
|
-
date: '2011-10-26'
|
1
|
+
htmlfilter 1.3.0 2012-12-14
|
data/{HISTORY.rdoc → HISTORY.md}
RENAMED
@@ -1,6 +1,18 @@
|
|
1
|
-
|
1
|
+
# RELEASE HISTORY
|
2
2
|
|
3
|
-
|
3
|
+
## 1.3.0 | 2012-12-14
|
4
|
+
|
5
|
+
This release provides a fairly comprehensive set of RELAXED html tags/attributes
|
6
|
+
and allowed html entities. All thanks to David Wright. The release also adds
|
7
|
+
a basic command line interface.
|
8
|
+
|
9
|
+
Changes:
|
10
|
+
|
11
|
+
* Make RELAXED set fairly comprehensive.
|
12
|
+
* Add basic command line executable.
|
13
|
+
|
14
|
+
|
15
|
+
## 1.2.1 | 2011-10-26
|
4
16
|
|
5
17
|
This release is simply a maintenance release to bring the
|
6
18
|
projects build configuration up to date.
|
@@ -10,7 +22,7 @@ Changes:
|
|
10
22
|
* Modernize build configuration.
|
11
23
|
|
12
24
|
|
13
|
-
|
25
|
+
## 1.2.0 | 2010-10-13
|
14
26
|
|
15
27
|
Finally removed the lowercase variations on the class names.
|
16
28
|
You must use HTMLFilter now and not HtmlFilter.
|
@@ -21,7 +33,7 @@ Changes:
|
|
21
33
|
* No longer Multiton.
|
22
34
|
|
23
35
|
|
24
|
-
|
36
|
+
## 1.1.0 | 2009-11-24
|
25
37
|
|
26
38
|
This is release adjusts the names of the classes to
|
27
39
|
be capitialized according to the actual use of the
|
@@ -38,7 +50,10 @@ Changes:
|
|
38
50
|
* Added built-in option constants.
|
39
51
|
* CssTree is now CSSFilter::Tree.
|
40
52
|
|
41
|
-
|
53
|
+
|
54
|
+
## 1.0.0 | 2009-06-25
|
55
|
+
|
56
|
+
First stand-alone release.
|
42
57
|
|
43
58
|
Changes:
|
44
59
|
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
BSD-2-Clause License
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
4
|
+
permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of
|
7
|
+
conditions and the following disclaimer.
|
8
|
+
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
10
|
+
of conditions and the following disclaimer in the documentation and/or other materials
|
11
|
+
provided with the distribution.
|
12
|
+
|
13
|
+
THIS SOFTWARE IS PROVIDED BY Thomas Sawyer ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
14
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Thomas Sawyer OR
|
16
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
18
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
19
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
21
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
data/NOTICE.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# COPYRIGHT NOTICES
|
2
|
+
|
3
|
+
## [lib_filter.php](http://code.iamcal.com/php/lib_filter/)
|
4
|
+
|
5
|
+
HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
6
|
+
licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
7
|
+
|
8
|
+
Copyright (c) 2007 Cal Henderson
|
9
|
+
|
10
|
+
[CC-BY-SA](http://creativecommons.org/licenses/by-sa/3.0/.Attribution-ShareAlike 3.0) License
|
11
|
+
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
1
|
+
# HTMLFilter
|
2
2
|
|
3
|
+
[Website](http://rubyworks.github.com/htmlfilter) /
|
4
|
+
[Source Code](http://github.com/rubyworks/htmlfilter)
|
3
5
|
|
4
|
-
== RESOURCES
|
5
6
|
|
6
|
-
|
7
|
-
* {Source Code}[http://github.com/rubyworks/htmlfilter]
|
8
|
-
|
9
|
-
|
10
|
-
== DESCRIPTION
|
7
|
+
## Description
|
11
8
|
|
12
9
|
HTML Filter library can be used to sanitize and sterilize
|
13
10
|
HTML. A good idea if you let users submit HTML in comments,
|
@@ -18,7 +15,7 @@ clean-up a cascading style sheet. It can be used to remove
|
|
18
15
|
whitespace and most importantly remove URLs.
|
19
16
|
|
20
17
|
|
21
|
-
|
18
|
+
## Features
|
22
19
|
|
23
20
|
* Based on well-worn PHP library.
|
24
21
|
* Regular expression based filtering.
|
@@ -27,34 +24,36 @@ whitespace and most importantly remove URLs.
|
|
27
24
|
* Also has library to clean and compact cascading stylesheets.
|
28
25
|
|
29
26
|
|
30
|
-
|
27
|
+
## Synopsis
|
31
28
|
|
32
29
|
Via the class.
|
33
30
|
|
34
|
-
|
31
|
+
html = "<b>hello</b>"
|
35
32
|
|
36
|
-
|
33
|
+
HTMLFilter.new(options).filter(html)
|
37
34
|
|
38
35
|
Or using the String extension.
|
39
36
|
|
40
|
-
|
37
|
+
html.html_filter(options) #=> "<b>hello</b>"
|
41
38
|
|
42
39
|
See API documentation for more information.
|
43
40
|
|
44
41
|
|
45
|
-
|
42
|
+
## Installation
|
43
|
+
|
44
|
+
Of course, RubyGems is the answer:
|
46
45
|
|
47
|
-
|
46
|
+
$ gem install htmlfilter
|
48
47
|
|
49
48
|
|
50
|
-
|
49
|
+
## Development
|
51
50
|
|
52
|
-
HTMLFilter is hosted on GitHub
|
51
|
+
HTMLFilter is hosted on [GitHub](http://github.com/rubyworks/htmlfilter).
|
53
52
|
|
54
|
-
HTMLFilter is a Rubyworks
|
53
|
+
HTMLFilter is a [Rubyworks](http://rubyworks.github.com) project.
|
55
54
|
|
56
55
|
|
57
|
-
|
56
|
+
## Acknowledgements
|
58
57
|
|
59
58
|
Thanks to Jang Kim for adding support for single quoted attributes.
|
60
59
|
|
@@ -63,10 +62,10 @@ This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 Licens
|
|
63
62
|
See http://creativecommons.org/licenses/by-sa/2.5/.
|
64
63
|
|
65
64
|
|
66
|
-
|
65
|
+
## Copyrights
|
67
66
|
|
68
|
-
Copyright (c) 2009
|
67
|
+
* Copyright (c) 2009 Rubyworks (BSD-2-Clause)
|
68
|
+
* Copyright (c) 2007 Cal Henderson (CC-BY-SA)
|
69
69
|
|
70
|
-
|
70
|
+
See LICENSE.txt and NOTICE.md for details.
|
71
71
|
|
72
|
-
See COPYING.rdoc for details.
|
data/bin/cssfilter
ADDED
data/bin/htmlfilter
ADDED
data/lib/cssfilter.rb
CHANGED
@@ -22,7 +22,7 @@ require 'uri'
|
|
22
22
|
class CSSFilter
|
23
23
|
|
24
24
|
# Library version.
|
25
|
-
VERSION = "1.
|
25
|
+
VERSION = "1.3.0"
|
26
26
|
|
27
27
|
# should we remove comments? (true, false)
|
28
28
|
attr_accessor :strip_comments
|
@@ -217,5 +217,56 @@ class CSSFilter
|
|
217
217
|
|
218
218
|
end
|
219
219
|
|
220
|
+
# Simple Command line interface for CSSFilter.
|
221
|
+
#
|
222
|
+
# It can be configured via a YAML file.
|
223
|
+
#
|
224
|
+
class CLI
|
225
|
+
def self.run
|
226
|
+
new.run
|
227
|
+
end
|
228
|
+
|
229
|
+
attr_reader :config_file
|
230
|
+
|
231
|
+
attr_reader :options
|
232
|
+
|
233
|
+
def initialize
|
234
|
+
require 'optparse'
|
235
|
+
@config_file = nil
|
236
|
+
@options = {}
|
237
|
+
end
|
238
|
+
|
239
|
+
def parser
|
240
|
+
OptionParser.new do |opt|
|
241
|
+
opt.on('--config <YAML_FILE>', 'filter with custom configuration'){ |file| @config_file = file }
|
242
|
+
opt.on('--debug', 'run in debug mode to see error details'){ $DEBUG = true }
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def options
|
247
|
+
if config_file
|
248
|
+
raise "configuration file not found" unless File.exist?(config_file)
|
249
|
+
@options = YAML.load_file(config_file)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def run
|
254
|
+
parser.parse!
|
255
|
+
begin
|
256
|
+
files = ARGV
|
257
|
+
files.each do |f|
|
258
|
+
raise "cssfilter: file not found -- #{f}" unless File.exist?(f)
|
259
|
+
end
|
260
|
+
files.each do |file|
|
261
|
+
css = File.read(file)
|
262
|
+
puts CSSFilter.new(options).filter(css)
|
263
|
+
end
|
264
|
+
rescue => error
|
265
|
+
raise error if $DEBUG
|
266
|
+
$stderr.puts error
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
220
271
|
end
|
221
272
|
|
data/lib/htmlfilter.rb
CHANGED
@@ -31,7 +31,7 @@
|
|
31
31
|
class HTMLFilter
|
32
32
|
|
33
33
|
# Library version.
|
34
|
-
VERSION = "1.
|
34
|
+
VERSION = "1.3.0"
|
35
35
|
|
36
36
|
# tags and attributes that are allowed
|
37
37
|
#
|
@@ -134,29 +134,95 @@ class HTMLFilter
|
|
134
134
|
|
135
135
|
# Relaxed settings allows a great deal of HTML spec.
|
136
136
|
#
|
137
|
-
#
|
137
|
+
# Here is a very comprhensive set of tags with attributes.
|
138
138
|
#
|
139
139
|
RELAXED = {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
140
|
+
'allowed' => {
|
141
|
+
'a' => ['class', 'href', 'target', 'name', 'id', 'style', 'title'],
|
142
|
+
'abbr' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
143
|
+
'acronym' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
144
|
+
'address' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
145
|
+
#'applet' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
146
|
+
'area' => ['shape', 'cords', 'type', 'nohref', 'href', 'class', 'id', 'style', 'title'],
|
147
|
+
'b' => ['class', 'id', 'style', 'title'],
|
148
|
+
'base' => ['target', 'type', 'href'], # NO class, id, style, title
|
149
|
+
'basefont' => ['color', 'face', 'size'], # NO class, id, style, title
|
150
|
+
'bdo' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
151
|
+
'bgsound' => ['loop', 'src'],
|
152
|
+
'big' => ['class', 'dir', 'lang', 'id', 'style', 'title'],
|
153
|
+
'blockquote' => ['class', 'id', 'style', 'title'],
|
154
|
+
'body' => ['background', 'bgcolor', 'text', 'link', 'vlink', 'class', 'id', 'style', 'title'],
|
155
|
+
'button' => ['disabled', 'name', 'type', 'value', 'accesskey', 'class', 'id', 'style', 'title'],
|
156
|
+
'br' => ['clear', 'class', 'id', 'style', 'title'], # </br> or <br />
|
157
|
+
'caption' => ['class', 'align', 'valign', 'id', 'style', 'title'],
|
158
|
+
'center' => ['class', 'id', 'style', 'title'],
|
159
|
+
'cite' => ['class', 'id', 'style', 'title'],
|
160
|
+
'code'=> ['class', 'id', 'style', 'title'],
|
161
|
+
'col' => ['char', 'charoff', 'span', 'class', 'width', 'align', 'valign', 'id', 'style', 'title'],
|
162
|
+
'colgroup' => ['char', 'charoff', 'span', 'class', 'width', 'align', 'valign', 'id', 'style', 'title'],
|
163
|
+
'div' => ['class', 'align', 'style', 'id', 'style', 'title'],
|
164
|
+
'dl' => ['class', 'id', 'style', 'title'],
|
165
|
+
'dt' => ['class', 'id', 'style', 'title'],
|
166
|
+
'dd' => ['class', 'id', 'style', 'title'],
|
167
|
+
'em' => ['class', 'id', 'style', 'title'],
|
168
|
+
'frameset' => ['cols', 'rows', 'class', 'id', 'style', 'title'],
|
169
|
+
'frame' => ['src', 'name', 'noresize', 'scroll', 'marginwidth', 'marginheight', 'class', 'id', 'style', 'title'],
|
170
|
+
'form' => ['method', 'action', 'class', 'id', 'style', 'title'],
|
171
|
+
'font' => ['face', 'size', 'color', 'class', 'id', 'style', 'title'],
|
172
|
+
'head' => [], # NO class, id, style, title
|
173
|
+
'html' => [], # NO class, id, style, title
|
174
|
+
'h1' => ['align', 'class', 'id', 'style', 'title'],
|
175
|
+
'h2' => ['align', 'class', 'id', 'style', 'title'],
|
176
|
+
'h3' => ['align', 'class', 'id', 'style', 'title'],
|
177
|
+
'h4' => ['align', 'class', 'id', 'style', 'title'],
|
178
|
+
'h5' => ['align', 'class', 'id', 'style', 'title'],
|
179
|
+
'h6' => ['align', 'class', 'id', 'style', 'title'],
|
180
|
+
'hr' => ['width', 'size', 'noshade', 'class', 'id', 'style', 'title'], #</hr> or <hr />
|
181
|
+
'i' => ['class', 'id', 'style', 'title'],
|
182
|
+
'iframe' => ['src', 'name', 'noresize', 'scroll', 'marginwidth', 'marginheight', 'class', 'id', 'style', 'title'],
|
183
|
+
'img' => ['src', 'align', 'width', 'height', 'alt', 'border', 'ISMAP', 'class', 'USEMAP', 'id', 'style', 'title'],
|
184
|
+
'input' => ['name', 'type', 'class', 'id', 'style', 'title'],
|
185
|
+
'li' => ['type', 'start', 'class', 'id', 'style', 'title'],
|
186
|
+
'link' => ['rel', 'type', 'href', 'class', 'id', 'style', 'title'],
|
187
|
+
'map' => ['name', 'class', 'id', 'style', 'title'],
|
188
|
+
'meta' => ['http-equiv', 'content', 'name', 'content'], # NO class, id, style, title
|
189
|
+
'noframes' => [],
|
190
|
+
'option' => ['class', 'id', 'style', 'title'],
|
191
|
+
'ol' => ['type', 'start', 'class', 'id', 'style', 'title'],
|
192
|
+
'p' => ['align', 'class', 'id', 'style', 'title'],
|
193
|
+
'param' => [], # NO class, id, style, title
|
194
|
+
'pre' => ['class', 'id', 'style', 'title'],
|
195
|
+
's' => ['class', 'id', 'style', 'title'],
|
196
|
+
'select' => ['name', 'size', 'class', 'id', 'style', 'title'],
|
197
|
+
#'script' => '', # not this for sure
|
198
|
+
'span' => ['class', 'id', 'style', 'title'],
|
199
|
+
'strong' => ['class', 'id', 'style', 'title'],
|
200
|
+
'style' => ['type'], # NO class, id, style, title
|
201
|
+
'table' => ['class', 'border', 'width', 'height', 'cellpadding', 'cellspacing', 'bgcolor', 'background', 'id', 'style', 'title'],
|
202
|
+
'tbody' => ['class', 'align', 'valign', 'id', 'style', 'title'],
|
203
|
+
'td' => ['class', 'nowrap', 'width', 'align', 'valign', 'colspan', 'rowspan', 'bgcolor', 'id', 'style', 'title'],
|
204
|
+
'textarea' => ['name', 'rows', 'cols', 'class', 'id', 'style', 'title'],
|
205
|
+
'tfoot' => ['class', 'align', 'valign', 'id', 'style', 'title'],
|
206
|
+
'th' => ['class', 'nowrap', 'width', 'align', 'valign', 'colspan', 'rowspan', 'bgcolor', 'id', 'style', 'title'],
|
207
|
+
'thead' => ['class', 'align', 'valign', 'id', 'style', 'title'],
|
208
|
+
'title' => [], # NO class, id, style, title
|
209
|
+
'tr' => ['class', 'align', 'valign', 'bgcolor', 'id', 'style', 'title'],
|
210
|
+
'tt' => ['class', 'id', 'style', 'title'],
|
211
|
+
'u' => ['class', 'id', 'style', 'title'],
|
212
|
+
'ul' => ['type', 'class', 'id', 'style', 'title'],
|
213
|
+
},
|
214
|
+
#'body', 'div', 'span', 'br', 'hr', 'p', 'b', 'i', 'tt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'font', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'img', 'map', 'area', 'table', 'tr', 'td', 'th', 'thead', 'tfoot', 'tbody', 'caption', 'frameset', 'frame', 'noframes', 'form', 'input', 'select', 'option', 'textarea', 'link', 'col', 'colgroup', 'u', 's', 'strong', 'em', 'base', 'html', 'head', 'title', 'param', 'script', 'meta', 'style'
|
215
|
+
'no_close' => ['img', 'br', 'hr'],
|
216
|
+
'always_close' => ['a', 'b'],
|
217
|
+
'protocol_attributes' => ['src', 'href'],
|
218
|
+
'allowed_protocols' => ['http', 'ftp', 'mailto', 'https', 'sftp'],
|
219
|
+
'remove_blanks' => ['a', 'b'],
|
220
|
+
'strip_comments' => false, # comments? <!-- -->
|
221
|
+
'always_make_tags' => false,
|
222
|
+
'allow_numbered_entities' => true,
|
223
|
+
'allowed_entities' => ['amp', 'cent', 'copy', 'deg', 'gt', 'lt', 'nbsp', '#174', '#153', 'pound', 'ndash', '#8211', 'mdash', '#8212', 'iexcl', '#161', 'iquest', '#191', 'quot', '#34', 'ldquo', '#8220', 'rdquo', '#8221', '#39', 'lsquo', '#8216', 'rsquo', '#8217', 'laquo', 'raquo', '#171', '#187', 'nbsp', '#160', 'amp', '#38', 'cent', '#162', 'copy', '#169', 'divide', '#247', 'gt', '#62', 'lt', '#60', 'micro', '#181', 'middot', 'para', '#182', 'plusmn', 'euro', '#8364', 'pound', '#163', 'reg', '#174', 'sect', '#167', 'trade', '#153', 'yen', '#165', 'aacute', 'Aacute', '#225', '#193', 'agrave', 'Agrave', '#224', '#192', 'acirc', 'Acirc', '#226', '#194', 'aring', 'Aring', '#229', '#197', 'atilde', 'Atilde', '#227', '#195', 'auml', 'Auml', '#228', '#196', 'aelig', 'AElig', '#230', '#198', 'ccedil', 'Ccedil', '#231', '#199', 'eacute', 'Eacute', '#233', '#201', 'egrave', 'Egrave', '#232', '#200', 'ecirc', 'Ecirc', '#234', '#202', 'euml', 'Euml', '#235', '#203', 'iacute', 'Iacute', '#237', '#205', 'igrave', 'Igrave', '#236', '#204', 'icirc', 'Icirc', '#238', '#206', 'iuml', 'Iuml', '#239', '#207', 'ntilde', 'Ntilde', '#241', '#209', 'oacute', 'Oacute', '#243', '#211', 'ograve', 'Ograve', '#242', '#210', 'ocirc', 'Ocirc', '#244', '#212', 'oslash', 'Oslash', '#248', '#216', 'otilde', 'Otilde', '#245', '#213', 'ouml', 'Ouml', '#246', '#214', 'szlig', '#223', 'uacute', 'Uacute', '#250', '#218', 'ugrave', 'Ugrave', '#249', '#217', 'ucirc', 'Ucirc', '#251', '#219', 'uuml', 'Uuml', '#252', '#220', 'yuml', '#255', '#180', '#96']
|
224
|
+
}
|
225
|
+
|
160
226
|
|
161
227
|
# New html filter.
|
162
228
|
#
|
@@ -581,6 +647,57 @@ class HTMLFilter
|
|
581
647
|
# return html[0..x]
|
582
648
|
#end
|
583
649
|
|
650
|
+
# Simple Command line interface for HTMLFilter.
|
651
|
+
#
|
652
|
+
# It can be configured via a YAML file.
|
653
|
+
#
|
654
|
+
class CLI
|
655
|
+
def self.run
|
656
|
+
new.run
|
657
|
+
end
|
658
|
+
|
659
|
+
attr_reader :config_file
|
660
|
+
|
661
|
+
attr_reader :options
|
662
|
+
|
663
|
+
def initialize
|
664
|
+
require 'optparse'
|
665
|
+
@config_file = nil
|
666
|
+
@options = {}
|
667
|
+
end
|
668
|
+
|
669
|
+
def parser
|
670
|
+
OptionParser.new do |opt|
|
671
|
+
opt.on('--config <YAML_FILE>', 'filter with custom configuration'){ |file| @config_file = file }
|
672
|
+
opt.on('--debug', 'run in debug mode to see error details'){ $DEBUG = true }
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
def options
|
677
|
+
if config_file
|
678
|
+
raise "configuration file not found" unless File.exist?(config_file)
|
679
|
+
@options = YAML.load_file(config_file)
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
def run
|
684
|
+
parser.parse!
|
685
|
+
begin
|
686
|
+
files = ARGV
|
687
|
+
files.each do |f|
|
688
|
+
raise "htmlfilter: file not found -- #{f}" unless File.exist?(f)
|
689
|
+
end
|
690
|
+
files.each do |file|
|
691
|
+
html = File.read(file)
|
692
|
+
puts HTMLFilter.new(options).filter(html)
|
693
|
+
end
|
694
|
+
rescue => error
|
695
|
+
raise error if $DEBUG
|
696
|
+
$stderr.puts error
|
697
|
+
end
|
698
|
+
end
|
699
|
+
end
|
700
|
+
|
584
701
|
end
|
585
702
|
|
586
703
|
# Overload the standard String class for extra convienience.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-12-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: detroit
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: microtest
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: '0'
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: ae
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ! '>='
|
@@ -43,27 +53,39 @@ dependencies:
|
|
43
53
|
version: '0'
|
44
54
|
type: :development
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
47
62
|
description: Sanitize and sterilize HTML, also includes a CSS filter.
|
48
63
|
email:
|
49
64
|
- transfire@gmail.com
|
50
|
-
executables:
|
65
|
+
executables:
|
66
|
+
- cssfilter
|
67
|
+
- htmlfilter
|
51
68
|
extensions: []
|
52
69
|
extra_rdoc_files:
|
53
|
-
-
|
54
|
-
-
|
55
|
-
-
|
70
|
+
- LICENSE.txt
|
71
|
+
- HISTORY.md
|
72
|
+
- README.md
|
73
|
+
- NOTICE.md
|
56
74
|
files:
|
75
|
+
- .index
|
57
76
|
- .ruby
|
58
77
|
- .yardopts
|
78
|
+
- bin/cssfilter
|
79
|
+
- bin/htmlfilter
|
59
80
|
- lib/cssfilter.rb
|
60
81
|
- lib/htmlfilter.rb
|
61
82
|
- test/helper.rb
|
62
83
|
- test/test_cssfilter.rb
|
63
84
|
- test/test_htmlfilter.rb
|
64
|
-
-
|
65
|
-
-
|
66
|
-
-
|
85
|
+
- LICENSE.txt
|
86
|
+
- HISTORY.md
|
87
|
+
- README.md
|
88
|
+
- NOTICE.md
|
67
89
|
homepage: http://rubyworks.github.com/htmlfilter
|
68
90
|
licenses:
|
69
91
|
- BSD-2-Clause
|
@@ -85,8 +107,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
107
|
version: '0'
|
86
108
|
requirements: []
|
87
109
|
rubyforge_project:
|
88
|
-
rubygems_version: 1.8.
|
110
|
+
rubygems_version: 1.8.23
|
89
111
|
signing_key:
|
90
112
|
specification_version: 3
|
91
113
|
summary: HTML/CSS Sanity
|
92
|
-
test_files:
|
114
|
+
test_files:
|
115
|
+
- test/helper.rb
|
116
|
+
- test/test_cssfilter.rb
|
117
|
+
- test/test_htmlfilter.rb
|
data/COPYING.rdoc
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
= COPYRIGHT NOTICES
|
2
|
-
|
3
|
-
== HTMLFilter
|
4
|
-
|
5
|
-
Copyright:: (c) 2009 Thomas Sawyer, Rubyworks
|
6
|
-
License: BSD-2-Clause
|
7
|
-
Website:: http://rubyworks.github.com/htmlfilter
|
8
|
-
|
9
|
-
Copyright (c) 2009 Thomas Sawyer, Rubyworks
|
10
|
-
|
11
|
-
Redistribution and use in source and binary forms, with or without modification, are
|
12
|
-
permitted provided that the following conditions are met:
|
13
|
-
|
14
|
-
1. Redistributions of source code must retain the above copyright notice, this list of
|
15
|
-
conditions and the following disclaimer.
|
16
|
-
|
17
|
-
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
18
|
-
of conditions and the following disclaimer in the documentation and/or other materials
|
19
|
-
provided with the distribution.
|
20
|
-
|
21
|
-
THIS SOFTWARE IS PROVIDED BY Thomas Sawyer ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
22
|
-
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
23
|
-
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Thomas Sawyer OR
|
24
|
-
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
25
|
-
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
27
|
-
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
28
|
-
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
29
|
-
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
-
|
31
|
-
|
32
|
-
== lib_filter.php
|
33
|
-
|
34
|
-
Copyright:: (c) 2007 Cal Henderson
|
35
|
-
License: CC-BY-SA
|
36
|
-
Website:: http://code.iamcal.com/php/lib_filter/
|
37
|
-
|
38
|
-
HtmlFilter is a port of lib_filter.php, v1.15 by Cal Henderson <cal@iamcal.com>
|
39
|
-
licensed under a Creative Commons Attribution-ShareAlike 2.5 License
|
40
|
-
|
41
|
-
http://creativecommons.org/licenses/by-sa/3.0/.Attribution-ShareAlike 3.0
|