trust_html 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +26 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/assets/html-sanitizer.js +554 -0
- data/assets/html4-defs.js +391 -0
- data/lib/trust_html/sanitizer.rb +31 -0
- data/lib/trust_html/string_ext.rb +10 -0
- data/lib/trust_html.rb +17 -0
- data/test/helper.rb +18 -0
- data/test/test_trust_html.rb +7 -0
- data/trust_html.gemspec +69 -0
- metadata +135 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "better" # Better::Tempfile
|
6
|
+
gem "therubyracer"
|
7
|
+
|
8
|
+
# Add dependencies to develop your gem here.
|
9
|
+
# Include everything needed to run rake, tests, features, etc.
|
10
|
+
group :development do
|
11
|
+
gem "shoulda", ">= 0"
|
12
|
+
gem "bundler", "~> 1.0.0"
|
13
|
+
gem "jeweler", "~> 1.6.4"
|
14
|
+
gem "rcov", ">= 0"
|
15
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
better (1.0.0)
|
5
|
+
git (1.2.5)
|
6
|
+
jeweler (1.6.4)
|
7
|
+
bundler (~> 1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
libv8 (3.3.10.2)
|
11
|
+
rake (0.9.2)
|
12
|
+
rcov (0.9.9)
|
13
|
+
shoulda (2.11.3)
|
14
|
+
therubyracer (0.9.2)
|
15
|
+
libv8 (~> 3.3.10)
|
16
|
+
|
17
|
+
PLATFORMS
|
18
|
+
ruby
|
19
|
+
|
20
|
+
DEPENDENCIES
|
21
|
+
better
|
22
|
+
bundler (~> 1.0.0)
|
23
|
+
jeweler (~> 1.6.4)
|
24
|
+
rcov
|
25
|
+
shoulda
|
26
|
+
therubyracer
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Cary Dunn
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= trust_html
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
== Contributing to trust_html
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
9
|
+
* Fork the project
|
10
|
+
* Start a feature/bugfix branch
|
11
|
+
* Commit and push until you are happy with your contribution
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2011 Cary Dunn. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "trust_html"
|
18
|
+
gem.homepage = "http://github.com/cdunn/trust_html"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Make HTML trustworthy for rendering within your web app via Google HTML sanitizers. This is _not_ about stripping HTML but rather about cleaning it of javascript (onclicks, etc.) as well as CSS.}
|
21
|
+
gem.description = %Q{Make HTML trustworthy for rendering within your web app via Google HTML sanitizers. This is _not_ about stripping HTML but rather about cleaning it of javascript (onclicks, etc.) as well as CSS.}
|
22
|
+
gem.email = "cary.dunn@gmail.com"
|
23
|
+
gem.authors = ["Cary Dunn"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'rcov/rcovtask'
|
36
|
+
Rcov::RcovTask.new do |test|
|
37
|
+
test.libs << 'test'
|
38
|
+
test.pattern = 'test/**/test_*.rb'
|
39
|
+
test.verbose = true
|
40
|
+
test.rcov_opts << '--exclude "gems/*"'
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "trust_html #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,554 @@
|
|
1
|
+
// Copyright (C) 2006 Google Inc.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
/**
|
16
|
+
* @fileoverview
|
17
|
+
* An HTML sanitizer that can satisfy a variety of security policies.
|
18
|
+
*
|
19
|
+
* <p>
|
20
|
+
* The HTML sanitizer is built around a SAX parser and HTML element and
|
21
|
+
* attributes schemas.
|
22
|
+
*
|
23
|
+
* @author mikesamuel@gmail.com
|
24
|
+
* @requires html4
|
25
|
+
* @provides html, html_sanitize
|
26
|
+
*/
|
27
|
+
|
28
|
+
/**
|
29
|
+
* @namespace
|
30
|
+
*/
|
31
|
+
var html = (function () {
|
32
|
+
var lcase;
|
33
|
+
// The below may not be true on browsers in the Turkish locale.
|
34
|
+
if ('script' === 'SCRIPT'.toLowerCase()) {
|
35
|
+
lcase = function (s) { return s.toLowerCase(); };
|
36
|
+
} else {
|
37
|
+
/**
|
38
|
+
* {@updoc
|
39
|
+
* $ lcase('SCRIPT')
|
40
|
+
* # 'script'
|
41
|
+
* $ lcase('script')
|
42
|
+
* # 'script'
|
43
|
+
* }
|
44
|
+
*/
|
45
|
+
lcase = function (s) {
|
46
|
+
return s.replace(
|
47
|
+
/[A-Z]/g,
|
48
|
+
function (ch) {
|
49
|
+
return String.fromCharCode(ch.charCodeAt(0) | 32);
|
50
|
+
});
|
51
|
+
};
|
52
|
+
}
|
53
|
+
|
54
|
+
var ENTITIES = {
|
55
|
+
lt : '<',
|
56
|
+
gt : '>',
|
57
|
+
amp : '&',
|
58
|
+
nbsp : '\240',
|
59
|
+
quot : '"',
|
60
|
+
apos : '\''
|
61
|
+
};
|
62
|
+
|
63
|
+
var decimalEscapeRe = /^#(\d+)$/;
|
64
|
+
var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/;
|
65
|
+
/**
|
66
|
+
* Decodes an HTML entity.
|
67
|
+
*
|
68
|
+
* {@updoc
|
69
|
+
* $ lookupEntity('lt')
|
70
|
+
* # '<'
|
71
|
+
* $ lookupEntity('GT')
|
72
|
+
* # '>'
|
73
|
+
* $ lookupEntity('amp')
|
74
|
+
* # '&'
|
75
|
+
* $ lookupEntity('nbsp')
|
76
|
+
* # '\xA0'
|
77
|
+
* $ lookupEntity('apos')
|
78
|
+
* # "'"
|
79
|
+
* $ lookupEntity('quot')
|
80
|
+
* # '"'
|
81
|
+
* $ lookupEntity('#xa')
|
82
|
+
* # '\n'
|
83
|
+
* $ lookupEntity('#10')
|
84
|
+
* # '\n'
|
85
|
+
* $ lookupEntity('#x0a')
|
86
|
+
* # '\n'
|
87
|
+
* $ lookupEntity('#010')
|
88
|
+
* # '\n'
|
89
|
+
* $ lookupEntity('#x00A')
|
90
|
+
* # '\n'
|
91
|
+
* $ lookupEntity('Pi') // Known failure
|
92
|
+
* # '\u03A0'
|
93
|
+
* $ lookupEntity('pi') // Known failure
|
94
|
+
* # '\u03C0'
|
95
|
+
* }
|
96
|
+
*
|
97
|
+
* @param name the content between the '&' and the ';'.
|
98
|
+
* @return a single unicode code-point as a string.
|
99
|
+
*/
|
100
|
+
function lookupEntity(name) {
|
101
|
+
name = lcase(name); // TODO: π is different from Π
|
102
|
+
if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
|
103
|
+
var m = name.match(decimalEscapeRe);
|
104
|
+
if (m) {
|
105
|
+
return String.fromCharCode(parseInt(m[1], 10));
|
106
|
+
} else if (!!(m = name.match(hexEscapeRe))) {
|
107
|
+
return String.fromCharCode(parseInt(m[1], 16));
|
108
|
+
}
|
109
|
+
return '';
|
110
|
+
}
|
111
|
+
|
112
|
+
function decodeOneEntity(_, name) {
|
113
|
+
return lookupEntity(name);
|
114
|
+
}
|
115
|
+
|
116
|
+
var nulRe = /\0/g;
|
117
|
+
function stripNULs(s) {
|
118
|
+
return s.replace(nulRe, '');
|
119
|
+
}
|
120
|
+
|
121
|
+
var entityRe = /&(#\d+|#x[0-9A-Fa-f]+|\w+);/g;
|
122
|
+
/**
|
123
|
+
* The plain text of a chunk of HTML CDATA which possibly containing.
|
124
|
+
*
|
125
|
+
* {@updoc
|
126
|
+
* $ unescapeEntities('')
|
127
|
+
* # ''
|
128
|
+
* $ unescapeEntities('hello World!')
|
129
|
+
* # 'hello World!'
|
130
|
+
* $ unescapeEntities('1 < 2 && 4 > 3 ')
|
131
|
+
* # '1 < 2 && 4 > 3\n'
|
132
|
+
* $ unescapeEntities('<< <- unfinished entity>')
|
133
|
+
* # '<< <- unfinished entity>'
|
134
|
+
* $ unescapeEntities('/foo?bar=baz©=true') // & often unescaped in URLS
|
135
|
+
* # '/foo?bar=baz©=true'
|
136
|
+
* $ unescapeEntities('pi=ππ, Pi=Π\u03A0') // FIXME: known failure
|
137
|
+
* # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0'
|
138
|
+
* }
|
139
|
+
*
|
140
|
+
* @param s a chunk of HTML CDATA. It must not start or end inside an HTML
|
141
|
+
* entity.
|
142
|
+
*/
|
143
|
+
function unescapeEntities(s) {
|
144
|
+
return s.replace(entityRe, decodeOneEntity);
|
145
|
+
}
|
146
|
+
|
147
|
+
var ampRe = /&/g;
|
148
|
+
var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
|
149
|
+
var ltRe = /</g;
|
150
|
+
var gtRe = />/g;
|
151
|
+
var quotRe = /\"/g;
|
152
|
+
var eqRe = /\=/g; // Backslash required on JScript.net
|
153
|
+
|
154
|
+
/**
|
155
|
+
* Escapes HTML special characters in attribute values as HTML entities.
|
156
|
+
*
|
157
|
+
* {@updoc
|
158
|
+
* $ escapeAttrib('')
|
159
|
+
* # ''
|
160
|
+
* $ escapeAttrib('"<<&==&>>"') // Do not just escape the first occurrence.
|
161
|
+
* # '"<<&==&>>"'
|
162
|
+
* $ escapeAttrib('Hello <World>!')
|
163
|
+
* # 'Hello <World>!'
|
164
|
+
* }
|
165
|
+
*/
|
166
|
+
function escapeAttrib(s) {
|
167
|
+
// Escaping '=' defangs many UTF-7 and SGML short-tag attacks.
|
168
|
+
return s.replace(ampRe, '&').replace(ltRe, '<').replace(gtRe, '>')
|
169
|
+
.replace(quotRe, '"').replace(eqRe, '=');
|
170
|
+
}
|
171
|
+
|
172
|
+
/**
|
173
|
+
* Escape entities in RCDATA that can be escaped without changing the meaning.
|
174
|
+
* {@updoc
|
175
|
+
* $ normalizeRCData('1 < 2 && 3 > 4 && 5 < 7&8')
|
176
|
+
* # '1 < 2 && 3 > 4 && 5 < 7&8'
|
177
|
+
* }
|
178
|
+
*/
|
179
|
+
function normalizeRCData(rcdata) {
|
180
|
+
return rcdata
|
181
|
+
.replace(looseAmpRe, '&$1')
|
182
|
+
.replace(ltRe, '<')
|
183
|
+
.replace(gtRe, '>');
|
184
|
+
}
|
185
|
+
|
186
|
+
|
187
|
+
// TODO(mikesamuel): validate sanitizer regexs against the HTML5 grammar at
|
188
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
|
189
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
|
190
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
|
191
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
|
192
|
+
|
193
|
+
/** token definitions. */
|
194
|
+
var INSIDE_TAG_TOKEN = new RegExp(
|
195
|
+
// Don't capture space.
|
196
|
+
'^\\s*(?:'
|
197
|
+
// Capture an attribute name in group 1, and value in group 3.
|
198
|
+
// We capture the fact that there was an attribute in group 2, since
|
199
|
+
// interpreters are inconsistent in whether a group that matches nothing
|
200
|
+
// is null, undefined, or the empty string.
|
201
|
+
+ ('(?:'
|
202
|
+
+ '([a-z][a-z-]*)' // attribute name
|
203
|
+
+ ('(' // optionally followed
|
204
|
+
+ '\\s*=\\s*'
|
205
|
+
+ ('('
|
206
|
+
// A double quoted string.
|
207
|
+
+ '\"[^\"]*\"'
|
208
|
+
// A single quoted string.
|
209
|
+
+ '|\'[^\']*\''
|
210
|
+
// The positive lookahead is used to make sure that in
|
211
|
+
// <foo bar= baz=boo>, the value for bar is blank, not "baz=boo".
|
212
|
+
+ '|(?=[a-z][a-z-]*\\s*=)'
|
213
|
+
// An unquoted value that is not an attribute name.
|
214
|
+
// We know it is not an attribute name because the previous
|
215
|
+
// zero-width match would've eliminated that possibility.
|
216
|
+
+ '|[^>\"\'\\s]*'
|
217
|
+
+ ')'
|
218
|
+
)
|
219
|
+
+ ')'
|
220
|
+
) + '?'
|
221
|
+
+ ')'
|
222
|
+
)
|
223
|
+
// End of tag captured in group 3.
|
224
|
+
+ '|(/?>)'
|
225
|
+
// Don't capture cruft
|
226
|
+
+ '|.[^a-z\\s>]*)',
|
227
|
+
'i');
|
228
|
+
|
229
|
+
var OUTSIDE_TAG_TOKEN = new RegExp(
|
230
|
+
'^(?:'
|
231
|
+
// Entity captured in group 1.
|
232
|
+
+ '&(\\#[0-9]+|\\#[x][0-9a-f]+|\\w+);'
|
233
|
+
// Comment, doctypes, and processing instructions not captured.
|
234
|
+
+ '|<\!--[\\s\\S]*?--\>|<!\\w[^>]*>|<\\?[^>*]*>'
|
235
|
+
// '/' captured in group 2 for close tags, and name captured in group 3.
|
236
|
+
+ '|<(/)?([a-z][a-z0-9]*)'
|
237
|
+
// Text captured in group 4.
|
238
|
+
+ '|([^<&>]+)'
|
239
|
+
// Cruft captured in group 5.
|
240
|
+
+ '|([<&>]))',
|
241
|
+
'i');
|
242
|
+
|
243
|
+
/**
|
244
|
+
* Given a SAX-like event handler, produce a function that feeds those
|
245
|
+
* events and a parameter to the event handler.
|
246
|
+
*
|
247
|
+
* The event handler has the form:{@code
|
248
|
+
* {
|
249
|
+
* // Name is an upper-case HTML tag name. Attribs is an array of
|
250
|
+
* // alternating upper-case attribute names, and attribute values. The
|
251
|
+
* // attribs array is reused by the parser. Param is the value passed to
|
252
|
+
* // the saxParser.
|
253
|
+
* startTag: function (name, attribs, param) { ... },
|
254
|
+
* endTag: function (name, param) { ... },
|
255
|
+
* pcdata: function (text, param) { ... },
|
256
|
+
* rcdata: function (text, param) { ... },
|
257
|
+
* cdata: function (text, param) { ... },
|
258
|
+
* startDoc: function (param) { ... },
|
259
|
+
* endDoc: function (param) { ... }
|
260
|
+
* }}
|
261
|
+
*
|
262
|
+
* @param {Object} handler a record containing event handlers.
|
263
|
+
* @return {Function} that takes a chunk of html and a parameter.
|
264
|
+
* The parameter is passed on to the handler methods.
|
265
|
+
*/
|
266
|
+
function makeSaxParser(handler) {
|
267
|
+
return function parse(htmlText, param) {
|
268
|
+
htmlText = String(htmlText);
|
269
|
+
var htmlLower = null;
|
270
|
+
|
271
|
+
var inTag = false; // True iff we're currently processing a tag.
|
272
|
+
var attribs = []; // Accumulates attribute names and values.
|
273
|
+
var tagName = void 0; // The name of the tag currently being processed.
|
274
|
+
var eflags = void 0; // The element flags for the current tag.
|
275
|
+
var openTag = void 0; // True if the current tag is an open tag.
|
276
|
+
|
277
|
+
if (handler.startDoc) { handler.startDoc(param); }
|
278
|
+
|
279
|
+
while (htmlText) {
|
280
|
+
var m = htmlText.match(inTag ? INSIDE_TAG_TOKEN : OUTSIDE_TAG_TOKEN);
|
281
|
+
htmlText = htmlText.substring(m[0].length);
|
282
|
+
|
283
|
+
if (inTag) {
|
284
|
+
if (m[1]) { // attribute
|
285
|
+
// setAttribute with uppercase names doesn't work on IE6.
|
286
|
+
var attribName = lcase(m[1]);
|
287
|
+
var decodedValue;
|
288
|
+
if (m[2]) {
|
289
|
+
var encodedValue = m[3];
|
290
|
+
switch (encodedValue.charCodeAt(0)) { // Strip quotes
|
291
|
+
case 34: case 39:
|
292
|
+
encodedValue = encodedValue.substring(
|
293
|
+
1, encodedValue.length - 1);
|
294
|
+
break;
|
295
|
+
}
|
296
|
+
decodedValue = unescapeEntities(stripNULs(encodedValue));
|
297
|
+
} else {
|
298
|
+
// Use name as value for valueless attribs, so
|
299
|
+
// <input type=checkbox checked>
|
300
|
+
// gets attributes ['type', 'checkbox', 'checked', 'checked']
|
301
|
+
decodedValue = attribName;
|
302
|
+
}
|
303
|
+
attribs.push(attribName, decodedValue);
|
304
|
+
} else if (m[4]) {
|
305
|
+
if (eflags !== void 0) { // False if not in whitelist.
|
306
|
+
if (openTag) {
|
307
|
+
if (handler.startTag) {
|
308
|
+
handler.startTag(tagName, attribs, param);
|
309
|
+
}
|
310
|
+
} else {
|
311
|
+
if (handler.endTag) {
|
312
|
+
handler.endTag(tagName, param);
|
313
|
+
}
|
314
|
+
}
|
315
|
+
}
|
316
|
+
|
317
|
+
if (openTag
|
318
|
+
&& (eflags & (html4.eflags.CDATA | html4.eflags.RCDATA))) {
|
319
|
+
if (htmlLower === null) {
|
320
|
+
htmlLower = lcase(htmlText);
|
321
|
+
} else {
|
322
|
+
htmlLower = htmlLower.substring(
|
323
|
+
htmlLower.length - htmlText.length);
|
324
|
+
}
|
325
|
+
var dataEnd = htmlLower.indexOf('</' + tagName);
|
326
|
+
if (dataEnd < 0) { dataEnd = htmlText.length; }
|
327
|
+
if (eflags & html4.eflags.CDATA) {
|
328
|
+
if (handler.cdata) {
|
329
|
+
handler.cdata(htmlText.substring(0, dataEnd), param);
|
330
|
+
}
|
331
|
+
} else if (handler.rcdata) {
|
332
|
+
handler.rcdata(
|
333
|
+
normalizeRCData(htmlText.substring(0, dataEnd)), param);
|
334
|
+
}
|
335
|
+
htmlText = htmlText.substring(dataEnd);
|
336
|
+
}
|
337
|
+
|
338
|
+
tagName = eflags = openTag = void 0;
|
339
|
+
attribs.length = 0;
|
340
|
+
inTag = false;
|
341
|
+
}
|
342
|
+
} else {
|
343
|
+
if (m[1]) { // Entity
|
344
|
+
if (handler.pcdata) { handler.pcdata(m[0], param); }
|
345
|
+
} else if (m[3]) { // Tag
|
346
|
+
openTag = !m[2];
|
347
|
+
inTag = true;
|
348
|
+
tagName = lcase(m[3]);
|
349
|
+
eflags = html4.ELEMENTS.hasOwnProperty(tagName)
|
350
|
+
? html4.ELEMENTS[tagName] : void 0;
|
351
|
+
} else if (m[4]) { // Text
|
352
|
+
if (handler.pcdata) { handler.pcdata(m[4], param); }
|
353
|
+
} else if (m[5]) { // Cruft
|
354
|
+
if (handler.pcdata) {
|
355
|
+
switch (m[5]) {
|
356
|
+
case '<': handler.pcdata('<', param); break;
|
357
|
+
case '>': handler.pcdata('>', param); break;
|
358
|
+
default: handler.pcdata('&', param); break;
|
359
|
+
}
|
360
|
+
}
|
361
|
+
}
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
if (handler.endDoc) { handler.endDoc(param); }
|
366
|
+
};
|
367
|
+
}
|
368
|
+
|
369
|
+
return {
|
370
|
+
normalizeRCData: normalizeRCData,
|
371
|
+
escapeAttrib: escapeAttrib,
|
372
|
+
unescapeEntities: unescapeEntities,
|
373
|
+
makeSaxParser: makeSaxParser
|
374
|
+
};
|
375
|
+
})();
|
376
|
+
|
377
|
+
/**
|
378
|
+
* Returns a function that strips unsafe tags and attributes from html.
|
379
|
+
* @param {Function} sanitizeAttributes
|
380
|
+
* maps from (tagName, attribs[]) to null or a sanitized attribute array.
|
381
|
+
* The attribs array can be arbitrarily modified, but the same array
|
382
|
+
* instance is reused, so should not be held.
|
383
|
+
* @return {Function} from html to sanitized html
|
384
|
+
*/
|
385
|
+
html.makeHtmlSanitizer = function (sanitizeAttributes) {
|
386
|
+
var stack;
|
387
|
+
var ignoring;
|
388
|
+
return html.makeSaxParser({
|
389
|
+
startDoc: function (_) {
|
390
|
+
stack = [];
|
391
|
+
ignoring = false;
|
392
|
+
},
|
393
|
+
startTag: function (tagName, attribs, out) {
|
394
|
+
if (ignoring) { return; }
|
395
|
+
if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
|
396
|
+
var eflags = html4.ELEMENTS[tagName];
|
397
|
+
if (eflags & html4.eflags.FOLDABLE) {
|
398
|
+
return;
|
399
|
+
} else if (eflags & html4.eflags.UNSAFE) {
|
400
|
+
ignoring = !(eflags & html4.eflags.EMPTY);
|
401
|
+
return;
|
402
|
+
}
|
403
|
+
attribs = sanitizeAttributes(tagName, attribs);
|
404
|
+
// TODO(mikesamuel): relying on sanitizeAttributes not to
|
405
|
+
// insert unsafe attribute names.
|
406
|
+
if (attribs) {
|
407
|
+
if (!(eflags & html4.eflags.EMPTY)) {
|
408
|
+
stack.push(tagName);
|
409
|
+
}
|
410
|
+
|
411
|
+
out.push('<', tagName);
|
412
|
+
for (var i = 0, n = attribs.length; i < n; i += 2) {
|
413
|
+
var attribName = attribs[i],
|
414
|
+
value = attribs[i + 1];
|
415
|
+
if (value !== null && value !== void 0) {
|
416
|
+
out.push(' ', attribName, '="', html.escapeAttrib(value), '"');
|
417
|
+
}
|
418
|
+
}
|
419
|
+
out.push('>');
|
420
|
+
}
|
421
|
+
},
|
422
|
+
endTag: function (tagName, out) {
|
423
|
+
if (ignoring) {
|
424
|
+
ignoring = false;
|
425
|
+
return;
|
426
|
+
}
|
427
|
+
if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
|
428
|
+
var eflags = html4.ELEMENTS[tagName];
|
429
|
+
if (!(eflags & (html4.eflags.UNSAFE | html4.eflags.EMPTY
|
430
|
+
| html4.eflags.FOLDABLE))) {
|
431
|
+
var index;
|
432
|
+
if (eflags & html4.eflags.OPTIONAL_ENDTAG) {
|
433
|
+
for (index = stack.length; --index >= 0;) {
|
434
|
+
var stackEl = stack[index];
|
435
|
+
if (stackEl === tagName) { break; }
|
436
|
+
if (!(html4.ELEMENTS[stackEl] & html4.eflags.OPTIONAL_ENDTAG)) {
|
437
|
+
// Don't pop non optional end tags looking for a match.
|
438
|
+
return;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
} else {
|
442
|
+
for (index = stack.length; --index >= 0;) {
|
443
|
+
if (stack[index] === tagName) { break; }
|
444
|
+
}
|
445
|
+
}
|
446
|
+
if (index < 0) { return; } // Not opened.
|
447
|
+
for (var i = stack.length; --i > index;) {
|
448
|
+
var stackEl = stack[i];
|
449
|
+
if (!(html4.ELEMENTS[stackEl] & html4.eflags.OPTIONAL_ENDTAG)) {
|
450
|
+
out.push('</', stackEl, '>');
|
451
|
+
}
|
452
|
+
}
|
453
|
+
stack.length = index;
|
454
|
+
out.push('</', tagName, '>');
|
455
|
+
}
|
456
|
+
},
|
457
|
+
pcdata: function (text, out) {
|
458
|
+
if (!ignoring) { out.push(text); }
|
459
|
+
},
|
460
|
+
rcdata: function (text, out) {
|
461
|
+
if (!ignoring) { out.push(text); }
|
462
|
+
},
|
463
|
+
cdata: function (text, out) {
|
464
|
+
if (!ignoring) { out.push(text); }
|
465
|
+
},
|
466
|
+
endDoc: function (out) {
|
467
|
+
for (var i = stack.length; --i >= 0;) {
|
468
|
+
out.push('</', stack[i], '>');
|
469
|
+
}
|
470
|
+
stack.length = 0;
|
471
|
+
}
|
472
|
+
});
|
473
|
+
};
|
474
|
+
|
475
|
+
|
476
|
+
/**
|
477
|
+
* Strips unsafe tags and attributes from html.
|
478
|
+
* @param {string} htmlText to sanitize
|
479
|
+
* @param {Function} opt_uriPolicy -- a transform to apply to uri/url attribute
|
480
|
+
* values.
|
481
|
+
* @param {Function} opt_nmTokenPolicy : string -> string? -- a transform to
|
482
|
+
* apply to names, ids, and classes.
|
483
|
+
* @return {string} html
|
484
|
+
*/
|
485
|
+
function html_sanitize(htmlText, opt_uriPolicy, opt_nmTokenPolicy) {
|
486
|
+
var out = [];
|
487
|
+
html.makeHtmlSanitizer(
|
488
|
+
function sanitizeAttribs(tagName, attribs) {
|
489
|
+
for (var i = 0; i < attribs.length; i += 2) {
|
490
|
+
var attribName = attribs[i];
|
491
|
+
var value = attribs[i + 1];
|
492
|
+
var atype = null, attribKey;
|
493
|
+
if ((attribKey = tagName + '::' + attribName,
|
494
|
+
html4.ATTRIBS.hasOwnProperty(attribKey))
|
495
|
+
|| (attribKey = '*::' + attribName,
|
496
|
+
html4.ATTRIBS.hasOwnProperty(attribKey))) {
|
497
|
+
atype = html4.ATTRIBS[attribKey];
|
498
|
+
}
|
499
|
+
if (atype !== null) {
|
500
|
+
switch (atype) {
|
501
|
+
case html4.atype.NONE: break;
|
502
|
+
case html4.atype.SCRIPT:
|
503
|
+
value = null;
|
504
|
+
break;
|
505
|
+
case html4.atype.STYLE:
|
506
|
+
// Commented out to support 'style' tag CYD 2011-04-03
|
507
|
+
//value = null;
|
508
|
+
break;
|
509
|
+
case html4.atype.ID:
|
510
|
+
case html4.atype.IDREF:
|
511
|
+
case html4.atype.IDREFS:
|
512
|
+
case html4.atype.GLOBAL_NAME:
|
513
|
+
case html4.atype.LOCAL_NAME:
|
514
|
+
case html4.atype.CLASSES:
|
515
|
+
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
|
516
|
+
break;
|
517
|
+
case html4.atype.URI:
|
518
|
+
value = opt_uriPolicy && opt_uriPolicy(value);
|
519
|
+
break;
|
520
|
+
case html4.atype.URI_FRAGMENT:
|
521
|
+
if (value && '#' === value.charAt(0)) {
|
522
|
+
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
|
523
|
+
if (value) { value = '#' + value; }
|
524
|
+
} else {
|
525
|
+
value = null;
|
526
|
+
}
|
527
|
+
break;
|
528
|
+
default:
|
529
|
+
value = null;
|
530
|
+
break;
|
531
|
+
}
|
532
|
+
} else {
|
533
|
+
value = null;
|
534
|
+
}
|
535
|
+
attribs[i + 1] = value;
|
536
|
+
}
|
537
|
+
if(tagName == "a") {
|
538
|
+
//print(attribs[1]);
|
539
|
+
var set_target = false;
|
540
|
+
for (var i = 0; i < attribs.length; i += 2) {
|
541
|
+
if (attribs[i] == "target") {
|
542
|
+
set_target = true;
|
543
|
+
attribs[i + 1] = "_blank";
|
544
|
+
}
|
545
|
+
}
|
546
|
+
if(!set_target) {
|
547
|
+
attribs.push("target");
|
548
|
+
attribs.push("_blank");
|
549
|
+
}
|
550
|
+
}
|
551
|
+
return attribs;
|
552
|
+
})(htmlText, out);
|
553
|
+
return out.join('');
|
554
|
+
}
|
@@ -0,0 +1,391 @@
|
|
1
|
+
/* Copyright Google Inc.
|
2
|
+
* Licensed under the Apache Licence Version 2.0
|
3
|
+
* Autogenerated at Wed Mar 23 23:46:49 PDT 2011
|
4
|
+
* @provides html4
|
5
|
+
*/
|
6
|
+
var html4 = {};
|
7
|
+
html4 .atype = {
|
8
|
+
'NONE': 0,
|
9
|
+
'URI': 1,
|
10
|
+
'URI_FRAGMENT': 11,
|
11
|
+
'SCRIPT': 2,
|
12
|
+
'STYLE': 3,
|
13
|
+
'ID': 4,
|
14
|
+
'IDREF': 5,
|
15
|
+
'IDREFS': 6,
|
16
|
+
'GLOBAL_NAME': 7,
|
17
|
+
'LOCAL_NAME': 8,
|
18
|
+
'CLASSES': 9,
|
19
|
+
'FRAME_TARGET': 10
|
20
|
+
};
|
21
|
+
html4 .ATTRIBS = {
|
22
|
+
'*::class': 9,
|
23
|
+
'*::dir': 0,
|
24
|
+
'*::id': 4,
|
25
|
+
'*::lang': 0,
|
26
|
+
'*::onclick': 2,
|
27
|
+
'*::ondblclick': 2,
|
28
|
+
'*::onkeydown': 2,
|
29
|
+
'*::onkeypress': 2,
|
30
|
+
'*::onkeyup': 2,
|
31
|
+
'*::onload': 2,
|
32
|
+
'*::onmousedown': 2,
|
33
|
+
'*::onmousemove': 2,
|
34
|
+
'*::onmouseout': 2,
|
35
|
+
'*::onmouseover': 2,
|
36
|
+
'*::onmouseup': 2,
|
37
|
+
'*::style': 3,
|
38
|
+
'*::title': 0,
|
39
|
+
'a::accesskey': 0,
|
40
|
+
'a::coords': 0,
|
41
|
+
'a::href': 1,
|
42
|
+
'a::hreflang': 0,
|
43
|
+
'a::name': 7,
|
44
|
+
'a::onblur': 2,
|
45
|
+
'a::onfocus': 2,
|
46
|
+
'a::rel': 0,
|
47
|
+
'a::rev': 0,
|
48
|
+
'a::shape': 0,
|
49
|
+
'a::tabindex': 0,
|
50
|
+
'a::target': 10,
|
51
|
+
'a::type': 0,
|
52
|
+
'area::accesskey': 0,
|
53
|
+
'area::alt': 0,
|
54
|
+
'area::coords': 0,
|
55
|
+
'area::href': 1,
|
56
|
+
'area::nohref': 0,
|
57
|
+
'area::onblur': 2,
|
58
|
+
'area::onfocus': 2,
|
59
|
+
'area::shape': 0,
|
60
|
+
'area::tabindex': 0,
|
61
|
+
'area::target': 10,
|
62
|
+
'bdo::dir': 0,
|
63
|
+
'blockquote::cite': 1,
|
64
|
+
'br::clear': 0,
|
65
|
+
'button::accesskey': 0,
|
66
|
+
'button::disabled': 0,
|
67
|
+
'button::name': 8,
|
68
|
+
'button::onblur': 2,
|
69
|
+
'button::onfocus': 2,
|
70
|
+
'button::tabindex': 0,
|
71
|
+
'button::type': 0,
|
72
|
+
'button::value': 0,
|
73
|
+
'canvas::height': 0,
|
74
|
+
'canvas::width': 0,
|
75
|
+
'caption::align': 0,
|
76
|
+
'col::align': 0,
|
77
|
+
'col::char': 0,
|
78
|
+
'col::charoff': 0,
|
79
|
+
'col::span': 0,
|
80
|
+
'col::valign': 0,
|
81
|
+
'col::width': 0,
|
82
|
+
'colgroup::align': 0,
|
83
|
+
'colgroup::char': 0,
|
84
|
+
'colgroup::charoff': 0,
|
85
|
+
'colgroup::span': 0,
|
86
|
+
'colgroup::valign': 0,
|
87
|
+
'colgroup::width': 0,
|
88
|
+
'del::cite': 1,
|
89
|
+
'del::datetime': 0,
|
90
|
+
'dir::compact': 0,
|
91
|
+
'div::align': 0,
|
92
|
+
'dl::compact': 0,
|
93
|
+
'font::color': 0,
|
94
|
+
'font::face': 0,
|
95
|
+
'font::size': 0,
|
96
|
+
'form::accept': 0,
|
97
|
+
'form::action': 1,
|
98
|
+
'form::autocomplete': 0,
|
99
|
+
'form::enctype': 0,
|
100
|
+
'form::method': 0,
|
101
|
+
'form::name': 7,
|
102
|
+
'form::onreset': 2,
|
103
|
+
'form::onsubmit': 2,
|
104
|
+
'form::target': 10,
|
105
|
+
'h1::align': 0,
|
106
|
+
'h2::align': 0,
|
107
|
+
'h3::align': 0,
|
108
|
+
'h4::align': 0,
|
109
|
+
'h5::align': 0,
|
110
|
+
'h6::align': 0,
|
111
|
+
'hr::align': 0,
|
112
|
+
'hr::noshade': 0,
|
113
|
+
'hr::size': 0,
|
114
|
+
'hr::width': 0,
|
115
|
+
'iframe::align': 0,
|
116
|
+
'iframe::frameborder': 0,
|
117
|
+
'iframe::height': 0,
|
118
|
+
'iframe::marginheight': 0,
|
119
|
+
'iframe::marginwidth': 0,
|
120
|
+
'iframe::width': 0,
|
121
|
+
'img::align': 0,
|
122
|
+
'img::alt': 0,
|
123
|
+
'img::border': 0,
|
124
|
+
'img::height': 0,
|
125
|
+
'img::hspace': 0,
|
126
|
+
'img::ismap': 0,
|
127
|
+
'img::name': 7,
|
128
|
+
'img::src': 1,
|
129
|
+
'img::usemap': 11,
|
130
|
+
'img::vspace': 0,
|
131
|
+
'img::width': 0,
|
132
|
+
'input::accept': 0,
|
133
|
+
'input::accesskey': 0,
|
134
|
+
'input::align': 0,
|
135
|
+
'input::alt': 0,
|
136
|
+
'input::autocomplete': 0,
|
137
|
+
'input::checked': 0,
|
138
|
+
'input::disabled': 0,
|
139
|
+
'input::ismap': 0,
|
140
|
+
'input::maxlength': 0,
|
141
|
+
'input::name': 8,
|
142
|
+
'input::onblur': 2,
|
143
|
+
'input::onchange': 2,
|
144
|
+
'input::onfocus': 2,
|
145
|
+
'input::onselect': 2,
|
146
|
+
'input::readonly': 0,
|
147
|
+
'input::size': 0,
|
148
|
+
'input::src': 1,
|
149
|
+
'input::tabindex': 0,
|
150
|
+
'input::type': 0,
|
151
|
+
'input::usemap': 11,
|
152
|
+
'input::value': 0,
|
153
|
+
'ins::cite': 1,
|
154
|
+
'ins::datetime': 0,
|
155
|
+
'label::accesskey': 0,
|
156
|
+
'label::for': 5,
|
157
|
+
'label::onblur': 2,
|
158
|
+
'label::onfocus': 2,
|
159
|
+
'legend::accesskey': 0,
|
160
|
+
'legend::align': 0,
|
161
|
+
'li::type': 0,
|
162
|
+
'li::value': 0,
|
163
|
+
'map::name': 7,
|
164
|
+
'menu::compact': 0,
|
165
|
+
'ol::compact': 0,
|
166
|
+
'ol::start': 0,
|
167
|
+
'ol::type': 0,
|
168
|
+
'optgroup::disabled': 0,
|
169
|
+
'optgroup::label': 0,
|
170
|
+
'option::disabled': 0,
|
171
|
+
'option::label': 0,
|
172
|
+
'option::selected': 0,
|
173
|
+
'option::value': 0,
|
174
|
+
'p::align': 0,
|
175
|
+
'pre::width': 0,
|
176
|
+
'q::cite': 1,
|
177
|
+
'select::disabled': 0,
|
178
|
+
'select::multiple': 0,
|
179
|
+
'select::name': 8,
|
180
|
+
'select::onblur': 2,
|
181
|
+
'select::onchange': 2,
|
182
|
+
'select::onfocus': 2,
|
183
|
+
'select::size': 0,
|
184
|
+
'select::tabindex': 0,
|
185
|
+
'table::align': 0,
|
186
|
+
'table::bgcolor': 0,
|
187
|
+
'table::border': 0,
|
188
|
+
'table::cellpadding': 0,
|
189
|
+
'table::cellspacing': 0,
|
190
|
+
'table::frame': 0,
|
191
|
+
'table::rules': 0,
|
192
|
+
'table::summary': 0,
|
193
|
+
'table::width': 0,
|
194
|
+
'tbody::align': 0,
|
195
|
+
'tbody::char': 0,
|
196
|
+
'tbody::charoff': 0,
|
197
|
+
'tbody::valign': 0,
|
198
|
+
'td::abbr': 0,
|
199
|
+
'td::align': 0,
|
200
|
+
'td::axis': 0,
|
201
|
+
'td::bgcolor': 0,
|
202
|
+
'td::char': 0,
|
203
|
+
'td::charoff': 0,
|
204
|
+
'td::colspan': 0,
|
205
|
+
'td::headers': 6,
|
206
|
+
'td::height': 0,
|
207
|
+
'td::nowrap': 0,
|
208
|
+
'td::rowspan': 0,
|
209
|
+
'td::scope': 0,
|
210
|
+
'td::valign': 0,
|
211
|
+
'td::width': 0,
|
212
|
+
'textarea::accesskey': 0,
|
213
|
+
'textarea::cols': 0,
|
214
|
+
'textarea::disabled': 0,
|
215
|
+
'textarea::name': 8,
|
216
|
+
'textarea::onblur': 2,
|
217
|
+
'textarea::onchange': 2,
|
218
|
+
'textarea::onfocus': 2,
|
219
|
+
'textarea::onselect': 2,
|
220
|
+
'textarea::readonly': 0,
|
221
|
+
'textarea::rows': 0,
|
222
|
+
'textarea::tabindex': 0,
|
223
|
+
'tfoot::align': 0,
|
224
|
+
'tfoot::char': 0,
|
225
|
+
'tfoot::charoff': 0,
|
226
|
+
'tfoot::valign': 0,
|
227
|
+
'th::abbr': 0,
|
228
|
+
'th::align': 0,
|
229
|
+
'th::axis': 0,
|
230
|
+
'th::bgcolor': 0,
|
231
|
+
'th::char': 0,
|
232
|
+
'th::charoff': 0,
|
233
|
+
'th::colspan': 0,
|
234
|
+
'th::headers': 6,
|
235
|
+
'th::height': 0,
|
236
|
+
'th::nowrap': 0,
|
237
|
+
'th::rowspan': 0,
|
238
|
+
'th::scope': 0,
|
239
|
+
'th::valign': 0,
|
240
|
+
'th::width': 0,
|
241
|
+
'thead::align': 0,
|
242
|
+
'thead::char': 0,
|
243
|
+
'thead::charoff': 0,
|
244
|
+
'thead::valign': 0,
|
245
|
+
'tr::align': 0,
|
246
|
+
'tr::bgcolor': 0,
|
247
|
+
'tr::char': 0,
|
248
|
+
'tr::charoff': 0,
|
249
|
+
'tr::valign': 0,
|
250
|
+
'ul::compact': 0,
|
251
|
+
'ul::type': 0
|
252
|
+
};
|
253
|
+
html4 .eflags = {
|
254
|
+
'OPTIONAL_ENDTAG': 1,
|
255
|
+
'EMPTY': 2,
|
256
|
+
'CDATA': 4,
|
257
|
+
'RCDATA': 8,
|
258
|
+
'UNSAFE': 16,
|
259
|
+
'FOLDABLE': 32,
|
260
|
+
'SCRIPT': 64,
|
261
|
+
'STYLE': 128
|
262
|
+
};
|
263
|
+
html4 .ELEMENTS = {
|
264
|
+
'a': 0,
|
265
|
+
'abbr': 0,
|
266
|
+
'acronym': 0,
|
267
|
+
'address': 0,
|
268
|
+
'applet': 16,
|
269
|
+
'area': 2,
|
270
|
+
'b': 0,
|
271
|
+
'base': 18,
|
272
|
+
'basefont': 18,
|
273
|
+
'bdo': 0,
|
274
|
+
'big': 0,
|
275
|
+
'blockquote': 0,
|
276
|
+
'body': 49,
|
277
|
+
'br': 2,
|
278
|
+
'button': 0,
|
279
|
+
'canvas': 0,
|
280
|
+
'caption': 0,
|
281
|
+
'center': 0,
|
282
|
+
'cite': 0,
|
283
|
+
'code': 0,
|
284
|
+
'col': 2,
|
285
|
+
'colgroup': 1,
|
286
|
+
'dd': 1,
|
287
|
+
'del': 0,
|
288
|
+
'dfn': 0,
|
289
|
+
'dir': 0,
|
290
|
+
'div': 0,
|
291
|
+
'dl': 0,
|
292
|
+
'dt': 1,
|
293
|
+
'em': 0,
|
294
|
+
'fieldset': 0,
|
295
|
+
'font': 0,
|
296
|
+
'form': 0,
|
297
|
+
'frame': 18,
|
298
|
+
'frameset': 16,
|
299
|
+
'h1': 0,
|
300
|
+
'h2': 0,
|
301
|
+
'h3': 0,
|
302
|
+
'h4': 0,
|
303
|
+
'h5': 0,
|
304
|
+
'h6': 0,
|
305
|
+
'head': 49,
|
306
|
+
'hr': 2,
|
307
|
+
'html': 49,
|
308
|
+
'i': 0,
|
309
|
+
'iframe': 4,
|
310
|
+
'img': 2,
|
311
|
+
'input': 2,
|
312
|
+
'ins': 0,
|
313
|
+
'isindex': 18,
|
314
|
+
'kbd': 0,
|
315
|
+
'label': 0,
|
316
|
+
'legend': 0,
|
317
|
+
'li': 1,
|
318
|
+
'link': 18,
|
319
|
+
'map': 0,
|
320
|
+
'menu': 0,
|
321
|
+
'meta': 18,
|
322
|
+
'nobr': 0,
|
323
|
+
'noframes': 20,
|
324
|
+
'noscript': 20,
|
325
|
+
'object': 16,
|
326
|
+
'ol': 0,
|
327
|
+
'optgroup': 0,
|
328
|
+
'option': 1,
|
329
|
+
'p': 1,
|
330
|
+
'param': 18,
|
331
|
+
'pre': 0,
|
332
|
+
'q': 0,
|
333
|
+
's': 0,
|
334
|
+
'samp': 0,
|
335
|
+
'script': 84,
|
336
|
+
'select': 0,
|
337
|
+
'small': 0,
|
338
|
+
'span': 0,
|
339
|
+
'strike': 0,
|
340
|
+
'strong': 0,
|
341
|
+
'style': 148,
|
342
|
+
'sub': 0,
|
343
|
+
'sup': 0,
|
344
|
+
'table': 0,
|
345
|
+
'tbody': 1,
|
346
|
+
'td': 1,
|
347
|
+
'textarea': 8,
|
348
|
+
'tfoot': 1,
|
349
|
+
'th': 1,
|
350
|
+
'thead': 1,
|
351
|
+
'title': 24,
|
352
|
+
'tr': 1,
|
353
|
+
'tt': 0,
|
354
|
+
'u': 0,
|
355
|
+
'ul': 0,
|
356
|
+
'var': 0
|
357
|
+
};
|
358
|
+
html4 .ueffects = {
|
359
|
+
'NOT_LOADED': 0,
|
360
|
+
'SAME_DOCUMENT': 1,
|
361
|
+
'NEW_DOCUMENT': 2
|
362
|
+
};
|
363
|
+
html4 .URIEFFECTS = {
|
364
|
+
'a::href': 2,
|
365
|
+
'area::href': 2,
|
366
|
+
'blockquote::cite': 0,
|
367
|
+
'body::background': 1,
|
368
|
+
'del::cite': 0,
|
369
|
+
'form::action': 2,
|
370
|
+
'img::src': 1,
|
371
|
+
'input::src': 1,
|
372
|
+
'ins::cite': 0,
|
373
|
+
'q::cite': 0
|
374
|
+
};
|
375
|
+
html4 .ltypes = {
|
376
|
+
'UNSANDBOXED': 2,
|
377
|
+
'SANDBOXED': 1,
|
378
|
+
'DATA': 0
|
379
|
+
};
|
380
|
+
html4 .LOADERTYPES = {
|
381
|
+
'a::href': 2,
|
382
|
+
'area::href': 2,
|
383
|
+
'blockquote::cite': 2,
|
384
|
+
'body::background': 1,
|
385
|
+
'del::cite': 2,
|
386
|
+
'form::action': 2,
|
387
|
+
'img::src': 1,
|
388
|
+
'input::src': 1,
|
389
|
+
'ins::cite': 2,
|
390
|
+
'q::cite': 2
|
391
|
+
};
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pty'
|
4
|
+
require 'v8'
|
5
|
+
require 'better/tempfile'
|
6
|
+
|
7
|
+
module TrustHtml
|
8
|
+
# Test every URL in the HTML to make sure it is of a specific structure.
|
9
|
+
# As in, make sure it is all on your domain, or all HTTPS, etc.
|
10
|
+
# Example of forcing HTTPS
|
11
|
+
# "if((new RegExp(\"^(https)?:\/\/\", \"ig\")).test(url)) { return url; }" +
|
12
|
+
# 'url' is local to the method
|
13
|
+
URL_SANITIZER_METHOD_BODY = "return url;"
|
14
|
+
|
15
|
+
# Test every ID to make sure it does not conflict (or just remove them all) etc.
|
16
|
+
# 'id' is local to the method
|
17
|
+
ID_SANITIZER_METHOD_BODY = "return id;"
|
18
|
+
|
19
|
+
def self.sanitize_html(html_to_sanitize)
|
20
|
+
sanitizer_js = "function urlX(url) {#{URL_SANITIZER_METHOD_BODY}};" +
|
21
|
+
"function idX(id) {#{ID_SANITIZER_METHOD_BODY}};" +
|
22
|
+
# Look at #escape_javascript as well...
|
23
|
+
# http://rails.rubyonrails.org/classes/ActionView/Helpers/JavaScriptHelper.html#M002239
|
24
|
+
"html_sanitize('#{html_to_sanitize.escape_single_quotes.remove_nonprintable}', urlX, idX);"
|
25
|
+
|
26
|
+
cxt = V8::Context.new
|
27
|
+
cxt.load(File.expand_path("../../../assets/html4-defs.js", __FILE__))
|
28
|
+
cxt.load(File.expand_path("../../../assets/html-sanitizer.js", __FILE__))
|
29
|
+
cxt.eval(sanitizer_js)
|
30
|
+
end
|
31
|
+
end
|
data/lib/trust_html.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
# Author: Cary Dunn <cary.dunn@gmail.com>
|
5
|
+
# Copyright 2011 All rights reserved
|
6
|
+
|
7
|
+
#require 'rubygems'
|
8
|
+
|
9
|
+
# Include subclasses...
|
10
|
+
libdir = File.dirname(__FILE__)
|
11
|
+
$LOAD_PATH.unshift(libdir) unless $LOAD_PATH.include?(libdir)
|
12
|
+
|
13
|
+
module TrustHtml
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'trust_html/sanitizer'
|
17
|
+
require 'trust_html/string_ext'
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'trust_html'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
data/trust_html.gemspec
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{trust_html}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{Cary Dunn}]
|
12
|
+
s.date = %q{2011-07-15}
|
13
|
+
s.description = %q{Make HTML trustworthy for rendering within your web app via Google HTML sanitizers. This is _not_ about stripping HTML but rather about cleaning it of javascript (onclicks, etc.) as well as CSS.}
|
14
|
+
s.email = %q{cary.dunn@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"assets/html-sanitizer.js",
|
28
|
+
"assets/html4-defs.js",
|
29
|
+
"lib/trust_html.rb",
|
30
|
+
"lib/trust_html/sanitizer.rb",
|
31
|
+
"lib/trust_html/string_ext.rb",
|
32
|
+
"test/helper.rb",
|
33
|
+
"test/test_trust_html.rb",
|
34
|
+
"trust_html.gemspec"
|
35
|
+
]
|
36
|
+
s.homepage = %q{http://github.com/cdunn/trust_html}
|
37
|
+
s.licenses = [%q{MIT}]
|
38
|
+
s.require_paths = [%q{lib}]
|
39
|
+
s.rubygems_version = %q{1.8.5}
|
40
|
+
s.summary = %q{Make HTML trustworthy for rendering within your web app via Google HTML sanitizers. This is _not_ about stripping HTML but rather about cleaning it of javascript (onclicks, etc.) as well as CSS.}
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
46
|
+
s.add_runtime_dependency(%q<better>, [">= 0"])
|
47
|
+
s.add_runtime_dependency(%q<therubyracer>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
49
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
50
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
51
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
52
|
+
else
|
53
|
+
s.add_dependency(%q<better>, [">= 0"])
|
54
|
+
s.add_dependency(%q<therubyracer>, [">= 0"])
|
55
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
56
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
57
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
58
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<better>, [">= 0"])
|
62
|
+
s.add_dependency(%q<therubyracer>, [">= 0"])
|
63
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
64
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
66
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: trust_html
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cary Dunn
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-15 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: better
|
16
|
+
requirement: &2152453300 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2152453300
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: therubyracer
|
27
|
+
requirement: &2152451820 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2152451820
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: shoulda
|
38
|
+
requirement: &2152449900 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *2152449900
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &2152447400 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2152447400
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: jeweler
|
60
|
+
requirement: &2152444300 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 1.6.4
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *2152444300
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rcov
|
71
|
+
requirement: &2152443180 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *2152443180
|
80
|
+
description: Make HTML trustworthy for rendering within your web app via Google HTML
|
81
|
+
sanitizers. This is _not_ about stripping HTML but rather about cleaning it of javascript
|
82
|
+
(onclicks, etc.) as well as CSS.
|
83
|
+
email: cary.dunn@gmail.com
|
84
|
+
executables: []
|
85
|
+
extensions: []
|
86
|
+
extra_rdoc_files:
|
87
|
+
- LICENSE.txt
|
88
|
+
- README.rdoc
|
89
|
+
files:
|
90
|
+
- .document
|
91
|
+
- Gemfile
|
92
|
+
- Gemfile.lock
|
93
|
+
- LICENSE.txt
|
94
|
+
- README.rdoc
|
95
|
+
- Rakefile
|
96
|
+
- VERSION
|
97
|
+
- assets/html-sanitizer.js
|
98
|
+
- assets/html4-defs.js
|
99
|
+
- lib/trust_html.rb
|
100
|
+
- lib/trust_html/sanitizer.rb
|
101
|
+
- lib/trust_html/string_ext.rb
|
102
|
+
- test/helper.rb
|
103
|
+
- test/test_trust_html.rb
|
104
|
+
- trust_html.gemspec
|
105
|
+
homepage: http://github.com/cdunn/trust_html
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
hash: 4343116667519934222
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
|
+
none: false
|
123
|
+
requirements:
|
124
|
+
- - ! '>='
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 1.8.5
|
130
|
+
signing_key:
|
131
|
+
specification_version: 3
|
132
|
+
summary: Make HTML trustworthy for rendering within your web app via Google HTML sanitizers.
|
133
|
+
This is _not_ about stripping HTML but rather about cleaning it of javascript (onclicks,
|
134
|
+
etc.) as well as CSS.
|
135
|
+
test_files: []
|