escape_escape_escape 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f8e9caa2b123a6e7258a68b096860d2d81f6ad48
4
+ data.tar.gz: 04cb5b7fb058a88dbc3be52cd933f10b34970c3f
5
+ SHA512:
6
+ metadata.gz: 6181a9de7665728466ac4bbd72375dad6ad632938af5b2c53faaf300717cde58a0cd5ddec57780974608cbdb2a2feb4094fe519cb8e65f7682e9714726e926dc
7
+ data.tar.gz: 87e14c445c08f606959fa353b7d373d093c9658de519890cdfadcebdc6424a54da442a87ecb2a253c91ea099268f71f6143576cd5cef61b3be4ee06de9013b24
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ /pids/
2
+ /logs/
3
+ /results/
4
+ /npm-debug.log/
5
+
6
+ /node_modules/
7
+ /npm-debug.log
8
+
9
+ *.gem
10
+ *.rbc
11
+ .bundle
12
+ .config
13
+ .yardoc
14
+ Gemfile.lock
15
+ InstalledFiles
16
+ _yardoc
17
+ coverage
18
+ doc/
19
+ lib/bundler/man
20
+ pkg
21
+ rdoc
22
+ spec/reports
23
+ test/tmp
24
+ test/version_tmp
25
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+
2
+ Copyright (c) 2013 da99
3
+
4
+ Permission is hereby granted, free of charge, to any person
5
+ obtaining a copy of this software and associated documentation
6
+ files (the "Software"), to deal in the Software without
7
+ restriction, including without limitation the rights to use,
8
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following
11
+ conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23
+ OTHER DEALINGS IN THE SOFTWARE.
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+
2
+ Copyright (c) 2014 da99
3
+
4
+ MIT License
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ What is it?
2
+ ====================
3
+
4
+ My way of escaping and sanitizing HTML.
5
+
6
+
7
+ Use:
8
+ =====================
9
+
10
+
11
+ // npm install escape_escape_escape
12
+
13
+ var E = require("escape_escape_escape").Sanitize.html;
14
+ E("The <strong>brave</strong> and the <b>bold</b>.");
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "escape_escape_escape"
7
+ spec.version = `cat VERSION`
8
+ spec.authors = ["da99"]
9
+ spec.email = ["i-hate-spam-1234567@mailinator.com"]
10
+ spec.summary = %q{My way of escaping/encoding HTML.}
11
+ spec.description = %q{
12
+ My way of escaping/encoding HTML with the proper entities.
13
+ }
14
+ spec.homepage = "https://github.com/da99/escape_escape_escape"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |file|
18
+ file.index('bin/') == 0 && file != "bin/#{File.basename Dir.pwd}"
19
+ }
20
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_dependency "sanitize" , "~> 3.0"
25
+
26
+ spec.add_development_dependency "pry" , "~> 0.9"
27
+ spec.add_development_dependency "rake" , "~> 10.3"
28
+ spec.add_development_dependency "bundler" , "~> 1.5"
29
+ spec.add_development_dependency "bacon" , "~> 1.0"
30
+ spec.add_development_dependency "Bacon_Colored" , "~> 0.1"
31
+ spec.add_development_dependency "multi_json" , "~> 1.10"
32
+ end
data/lib/beta.rb ADDED
@@ -0,0 +1,270 @@
1
+
2
+
3
+ # === Important Gems ===
4
+ require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
5
+ require 'htmlentities'
6
+ require 'loofah'
7
+ require "addressable/uri"
8
+ require "escape_utils"
9
+ require "htmlentities"
10
+ require "uri"
11
+
12
+ def Escape_Escape_Escape s
13
+ Escape_Escape_Escape.escape(s)
14
+ end
15
+
16
+ class Escape_Escape_Escape
17
+
18
+ Coder = HTMLEntities.new(:xhtml1)
19
+
20
+ ENCODING_OPTIONS_CLEAN_UTF8 = {
21
+ :invalid => :replace, # Replace invalid byte sequences
22
+ :undef => :replace, # Replace anything not defined in ASCII
23
+ :replace => '' # Use a blank for those replacements
24
+ # :newline => :universal
25
+ # :universal_newline => true # Always break lines with \n, not \r\n
26
+ }
27
+
28
+ opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
29
+
30
+ # tabs, etc.
31
+ Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
32
+ # From:
33
+ # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
34
+ White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
35
+
36
+ REPEATING_DOTS = /\.{1,}/
37
+ INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
38
+
39
+
40
+ # HTML_ESCAPE_TABLE is used after text is escaped to
41
+ # further escape text more. This is why th semi-colon (&#59;) was left out
42
+ # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
43
+ # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
44
+ # or go to: http://www.mountaindragon.com/html/iso.htm
45
+ HTML_ESCAPE_TABLE = {
46
+
47
+ '&laquo;' => "&lt;",
48
+ '&raquo;' => "&gt;",
49
+
50
+ "&lsquo;" => "&apos;",
51
+ "&rsquo;" => "&apos;",
52
+ "&sbquo;" => "&apos;",
53
+
54
+ "&lsquo;" => "&apos;",
55
+ "&rsquo;" => "&apos;",
56
+
57
+ "&ldquo;" => "&quot;",
58
+ "&rdquo;" => "&quot;",
59
+ "&bdquo;" => "&quot;",
60
+
61
+ "&lsaquo;" => "&lt;",
62
+ "&rsaquo;" => "&gt;",
63
+
64
+ "&acute;" => "&apos;",
65
+ "&uml;" => "&quot;",
66
+
67
+ '\\' => "&#92;",
68
+ # '/' => "&#47;",
69
+ # '%' => "&#37;",
70
+ # ':' => '&#58;',
71
+ # '=' => '&#61;',
72
+ # '?' => '&#63;',
73
+ # '@' => '&#64;',
74
+ "\`" => '&apos;',
75
+ '‘' => "&apos;",
76
+ '’' => "&apos;",
77
+ '“' => '&quot;',
78
+ '”' => '&quot;',
79
+ # "$" => "&#36;",
80
+ # '#' => '&#35;', # Don't use this or else it will ruin all other entities.
81
+ # '&' => # Don't use this " " " " " "
82
+ # ';' => # Don't use this " " " " " "
83
+ '|' => '&brvbar;',
84
+ '~' => '&sim;'
85
+ # '!' => '&#33;',
86
+ # '*' => '&lowast;', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
87
+ # '{' => '&#123;',
88
+ # '}' => '&#125;',
89
+ # '(' => '&#40;',
90
+ # ')' => '&#41;',
91
+ # "\n" => '<br />'
92
+ }
93
+
94
+ def new_regexp str
95
+ Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
96
+ end
97
+
98
+ class << self # ======================================================
99
+
100
+ # From:
101
+ # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
102
+ #
103
+ # Test:
104
+ # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
105
+ # inject('', :<<)
106
+ #
107
+ def clean_utf8 s
108
+ s.
109
+ encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
110
+ gsub(self::Control, "\n").
111
+ gsub(self::White_Space, " ")
112
+ end
113
+
114
+ def un_escape raw
115
+ EscapeUtils.unescape_html clean_utf8(raw)
116
+ end
117
+
118
+ def uri str
119
+ uri = Addressable::URI.parse(str)
120
+ if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
121
+ str
122
+ else
123
+ nil
124
+ end
125
+ rescue Addressable::URI::InvalidURIError
126
+ fail "Invalid: address: #{str.inspect}"
127
+ end
128
+
129
+ def escape o
130
+ case o
131
+ when String
132
+ Coder.encode(un_escape(o), :named, :hexadecimal)
133
+ else
134
+ fail "Unknown type: #{o.inspect}"
135
+ end
136
+ end # === def
137
+
138
+
139
+ # ===============================================
140
+ # Raises: TZInfo::InvalidTimezoneIdentifier.
141
+ # ===============================================
142
+ def validate_timezone(timezone)
143
+ TZInfo::Timezone.get( timezone.to_s.strip ).identifier
144
+ end
145
+
146
+ # =========================================================
147
+ # Takes out any periods and back slashes in a String.
148
+ # Single periods surround text are allowed on the last substring
149
+ # past the last slash because they are assumed to be filenames
150
+ # with extensions.
151
+ # =========================================================
152
+ def path( raw_path )
153
+ clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
154
+ File.join( *clean_crumbs )
155
+ end
156
+
157
+ # ====================================================================
158
+ # Returns a String where all characters except:
159
+ # letters numbers underscores dashes
160
+ # are replaced with a dash.
161
+ # It also delets any non-alphanumeric characters at the end
162
+ # of the String.
163
+ # ====================================================================
164
+ def filename( raw_filename )
165
+ plaintext( raw_filename ).
166
+ downcase.
167
+ gsub(REPEATING_DOTS, '.').
168
+ gsub(INVALID_FILE_NAME_CHARS, '-').
169
+ to_s
170
+ end
171
+
172
+ # ===============================================
173
+ # This method is not meant to be called directly. Instead, call
174
+ # <Wash.parse_tags>.
175
+ # Returns: String with
176
+ # * all spaces and underscores turned into dashes.
177
+ # * all non-alphanumeric characters, underscores, dashes, and periods
178
+ # turned into dashes.
179
+ # * non-alphanumeric characters at the beginning and end stripped out.
180
+ # ===============================================
181
+ def tag( raw_tag )
182
+ # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
183
+ raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
184
+ end
185
+
186
+
187
+ # ===============================================
188
+ # A better alternative than "Rack::Utils.escape_html". Escapes
189
+ # various characters (including '&', '<', '>', and both quotation mark types)
190
+ # to HTML decimal entities. Also escapes the characters from
191
+ # SWISS::HTML_ESCAPE_TABLE.
192
+ #
193
+ # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
194
+ # Therefore, all text is run through <Wash.plaintext> before encoding.
195
+ # ===============================================
196
+ def html( raw_text )
197
+
198
+ # Turn string into UTF8. (This also takes out control characters
199
+ # which is good or else they too will be escaped into HTML too.
200
+ # Strip it after conversion.
201
+ # return Dryopteris.sanitize(utf8_text)
202
+ # Now encode it.
203
+ normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
204
+
205
+ sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
206
+ end # === def html
207
+
208
+
209
+ # ===============================================
210
+ # Returns: A string that is:
211
+ # * normalized to :KC
212
+ # * "\r\n" changed to "\n"
213
+ # * all control characters stripped except for "\n"
214
+ # and end.
215
+ # Options:
216
+ # :tabs
217
+ # :spaces
218
+ #
219
+ # ===============================================
220
+ def plaintext( raw_str, *opts)
221
+
222
+ # Check options.
223
+ @plaintext_allowed_options ||= [ :spaces, :tabs ]
224
+ invalid_opts = opts - @plaintext_allowed_options
225
+ raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
226
+
227
+ # Save tabs if requested.
228
+ raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
229
+
230
+ # First: Normalize characters.
231
+ # Second: Strip out control characters.
232
+ # Note: Must be normalized first, then strip.
233
+ # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
234
+ final_str = raw_str.
235
+ split("\n").
236
+ map { |line|
237
+ # Don't use "\x20" because that is the space character.
238
+ line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
239
+ }.
240
+ join("\n")
241
+
242
+ # Save whitespace or strip.
243
+ if !opts.include?(:spaces)
244
+ final_str = final_str.strip
245
+ end
246
+
247
+ # Normalize quotations and other characters through HTML entity encoding/decoding.
248
+ final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
249
+
250
+ # Put back tabs by request.
251
+ if opts.include?(:tabs)
252
+ final_str = final_str.gsub("&#09;", "\t")
253
+ end
254
+
255
+ final_str
256
+ end # self.plaintext
257
+
258
+ # Encode a few other symbols.
259
+ # This also normalizes certain quotation and apostrophe HTML entities.
260
+ def normalize_encoded_string s
261
+ HTML_ESCAPE_TABLE.inject(s) do |m, kv|
262
+ m.gsub( kv.first, kv.last)
263
+ end
264
+ end
265
+
266
+ end # === class self ===
267
+
268
+ end # === class Escape_Escape_Escape ===
269
+
270
+
data/lib/e_e_e.js ADDED
@@ -0,0 +1,258 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , special = require('special-html')
5
+ , HTML_E = require('entities')
6
+ , URI_js = require('uri-js')
7
+ ;
8
+
9
+ var NL = "\n";
10
+ var SPACES = /\ +/g;
11
+ var VALID_HTML_ID = /^[0-9a-zA-Z_]+$/;
12
+ var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
13
+ var IS_ERROR = function (o) { return (_.isObject(o) && o.constructor == Error); };
14
+ var funcs_scope = this;
15
+ var INSPECT = function (v) { return JSON.stringify(v); };
16
+
17
+ var E = exports.Sanitize = {};
18
+
19
+ // ****************************************************************
20
+ // ****************** Sanitize Tag Attributes and content *********
21
+ // ****************************************************************
22
+
23
+
24
+ E.name = function (v) { return E.id(v , "name"); };
25
+ E.href = function (v) { return E.uri(v , "href"); }
26
+ E.action = function (v) { return E.uri(v , 'action'); };
27
+
28
+ E.string = function (raw, name) {
29
+ name = (name) ? (name + ': ') : '';
30
+
31
+ if (_.isString(raw))
32
+ return (raw.trim());
33
+
34
+ return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
35
+ };
36
+
37
+ E.uri = function (raw, name) {
38
+ name = (name) ? name : 'uri';
39
+
40
+ var val = E.string(raw, name);
41
+ if (E.is_error(val))
42
+ return val;
43
+
44
+ var url = HTML_E.decode(val, 2);
45
+ var parse = URI_js.parse(url);
46
+ if (parse.errors.length)
47
+ return new Error(name + ": " + parse.errors[0] + ': ' + val);
48
+
49
+ return URI_js.normalize(url);
50
+ };
51
+
52
+ E.tag = function (raw, name) {
53
+ name = (name) ? name : "tag";
54
+
55
+ var val = E.string(raw, name);
56
+ if (E.is_error(val))
57
+ return val;
58
+
59
+ if (!val.match(VALID_HTML_TAG))
60
+ return new Error(name + ": invalid characters: " + JSON.stringify(val));
61
+
62
+ return val;
63
+ };
64
+
65
+ E.id = function (raw_val, name) {
66
+ name = (name) ? name : "id";
67
+
68
+ var val = E.string(raw_val, name);
69
+ if (val.message)
70
+ return val;
71
+
72
+ if (!val.match(VALID_HTML_ID))
73
+ return new Error(name + ": invalid characters: " + JSON.stringify(val));
74
+
75
+ return val;
76
+ };
77
+
78
+ E.num_of_lines = function (raw_val, name) {
79
+ name = (name) ? name : 'num_of_lines';
80
+
81
+ if (!_.isNumber(raw_val) || _.isNaN(raw_val))
82
+ return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
83
+
84
+ if (raw_val < 1 || raw_val > 250)
85
+ return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
86
+
87
+ return raw_val;
88
+ };
89
+
90
+ E.string_in_array = function (unk, name) {
91
+ name = (name) ? name : 'string_in_array';
92
+ if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
93
+ return unk;
94
+ return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
95
+ };
96
+
97
+ var temp = null;
98
+ E.attr_funcs = [];
99
+ for (temp in E) {
100
+ if (_.isFunction(E[temp])) {
101
+ E.attr_funcs.push(temp);
102
+ }
103
+ }
104
+
105
+ E.opt = function (func, name) {
106
+ return function (v) {
107
+ if (v === undefined || v === null)
108
+ return null;
109
+ return func(v, name);
110
+ };
111
+ };
112
+
113
+ E.is = function (func) {
114
+ return function (v) {
115
+ var result = func(v);
116
+ if (result && result.message)
117
+ return false;
118
+ return !!result;
119
+ };
120
+ };
121
+
122
+ _.each(E.attr_funcs, function (name, i) {
123
+ E["opt_" + name] = E.opt(E[name], name);
124
+ E["is_" + name] = E.is(E[name]);
125
+ });
126
+
127
+ // ****************************************************************
128
+ // ****************** End of Sanitize Attr Checkers ***************
129
+ // ****************************************************************
130
+
131
+ E.is_error = function (obj) {
132
+ if (!_.isObject(obj))
133
+ return false;
134
+ return obj.constructor === Error;
135
+ };
136
+
137
+ E.html = function (str) {
138
+ if (_.isArray(str)) {
139
+ return _.map(str, function (v, i) {
140
+ return E.html(v);
141
+ });
142
+ }
143
+
144
+ if (_.isObject(str)) {
145
+ var new_o = {};
146
+ _.each(str, function (v, k) {
147
+ new_o[E.html(k)] = E.html(v);
148
+ });
149
+ return new_o;
150
+ }
151
+
152
+ if (!_.isString(str))
153
+ return str;
154
+
155
+ return special( _s.escapeHTML( E.un_escape(str) ) );
156
+ };
157
+
158
+ E.un_escape = function (str) {
159
+ return _s.unescapeHTML( HTML_E.decode( str , 2) )
160
+ };
161
+
162
+ E.attr = function (k, v, tag) {
163
+ if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
164
+ return new Error("Invalid chars in " + tag + " " + k + ": " + v);
165
+
166
+ if (!k.match(VALID_HTML_ID))
167
+ return new Error("Invalid chars in " + tag + " attribute name: " + k);
168
+
169
+ var safe_name = Ok.escape(k).trim();
170
+
171
+ if (_.contains(['href', 'action'], k)) {
172
+ var safe_val = Ok.escape_uri(v);
173
+ if (!safe_val)
174
+ return new Error('Invalid link address: ' + v);
175
+ } else {
176
+ var safe_val = Ok.escape(v);
177
+ }
178
+
179
+ return [safe_name, safe_val];
180
+ };
181
+
182
+ E.attrs = function (raw_attrs, tag) {
183
+ var sanitized = {};
184
+ var err = null;
185
+
186
+ _.find(raw_attrs, function (v, k) {
187
+
188
+ var pair = Ok.escape_attr(k, v, tag);
189
+
190
+ if (IS_ERROR(pair)) {
191
+ err = pair;
192
+ return pair;
193
+ }
194
+
195
+ sanitized[pair[0]] = pair[1];
196
+
197
+ });
198
+
199
+ if (err)
200
+ return err;
201
+
202
+ return sanitized;
203
+ };
204
+
205
+
206
+ E.to_func_calls = function (arr) {
207
+ var next = null;
208
+ var final = [];
209
+ var line = null;
210
+
211
+ while (arr.length) {
212
+ line = [arr.shift()];
213
+ while(arr.length && !_.isString(arr[0])) {
214
+ line.push(arr.shift());
215
+ }
216
+ final.push(line);
217
+ }
218
+
219
+ return final;
220
+ };
221
+
222
+ E.to_applet_func_calls = function (arr) {
223
+ var next = null;
224
+ var final = [];
225
+
226
+ while (arr.length) {
227
+ var name = arr.shift();
228
+ if (!_.isString(name))
229
+ return new Error("No function specfied for arg: " + JSON.stringify(name));
230
+ name = name.trim();
231
+ var attrs = null;
232
+ var body = null;
233
+
234
+ next = arr[0];
235
+
236
+ if (!_.isString(next)) {
237
+ if (_.isObject(next) && !_.isArray(next))
238
+ attrs = arr.shift();
239
+ next = arr[0];
240
+ if (_.isArray(next))
241
+ body = arr.shift();
242
+ next = arr[0];
243
+ if (next && _.isArray(next))
244
+ return new Error(name + ": extra array argument: " + JSON.stringify(next));
245
+ if (next && _.isObject(next))
246
+ return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
247
+ if (next && !_.isString(next))
248
+ return new Error(name + ": invalid argument: " + JSON.stringify(next));
249
+ }
250
+
251
+ final.push([name, attrs, body]);
252
+ }
253
+
254
+ return final;
255
+ };
256
+
257
+
258
+
@@ -0,0 +1,60 @@
1
+
2
+
3
+ require "sanitize"
4
+
5
+ def Escape_Escape_Escape s
6
+ Escape_Escape_Escape.html(s)
7
+ end
8
+
9
+ class Escape_Escape_Escape
10
+
11
+ REPEATING_DOTS = /\.{1,}\//
12
+ INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
13
+ UN_PRINT_ABLE = /[^[:print:]\n]/
14
+ CR = "\r"
15
+ TABS = "\t"
16
+ CONTROL_CHARS = /[[:cntrl:]\x00-\x1f]/ # Don't use "\x20" because that is the space character.
17
+ WHITE_SPACE = /[[:space:]]&&[^\n]/ # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
18
+
19
+ ENCODING_OPTIONS_CLEAN_UTF8 = {
20
+ :invalid => :replace, # Replace invalid byte sequences
21
+ :undef => :replace, # Replace anything not defined in ASCII
22
+ :replace => '' # Use a blank for those replacements
23
+ # :newline => :universal
24
+ # :universal_newline => true # Always break lines with \n, not \r\n
25
+ }
26
+
27
+
28
+
29
+ class << self # ======================================================
30
+
31
+ # From:
32
+ # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
33
+ #
34
+ # Test:
35
+ # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
36
+ # inject('', :<<)
37
+ #
38
+ def clean_utf8 s
39
+ s.
40
+ encode(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8).
41
+ gsub(TABS , " ").
42
+ gsub(CR , "").
43
+ gsub(UN_PRINT_ABLE , '').
44
+ gsub(CONTROL_CHARS , "\n" ).
45
+ gsub(WHITE_SPACE , " ")
46
+ end
47
+
48
+ def text s
49
+ clean_utf8 s
50
+ end
51
+
52
+ def html s
53
+ Sanitize.fragment( clean_utf8(s), Sanitize::Config::RELAXED )
54
+ end
55
+
56
+ end # === class self ===
57
+
58
+ end # === class Escape_Escape_Escape ===
59
+
60
+
data/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "escape_escape_escape",
3
+ "version": "0.0.5",
4
+ "description": "My way of escaping HTML.",
5
+ "main": "lib/e_e_e.js",
6
+ "directories": {
7
+ "test": "test"
8
+ },
9
+ "scripts": {
10
+ "test": "mocha"
11
+ },
12
+ "repository": {
13
+ "type": "git",
14
+ "url": "git://github.com/da99/escape_escape_escape.git"
15
+ },
16
+ "keywords": [
17
+ "da99"
18
+ ],
19
+ "dependencies": {
20
+ "underscore": "1.x.x",
21
+ "unhtml": "x.x.x",
22
+ "special-html": "x.x.x",
23
+ "underscore.string": "x.x.x",
24
+ "entities": "x.x.x",
25
+ "uri-js": "x.x.x"
26
+ },
27
+ "author": "da99",
28
+ "license": "MIT",
29
+ "readmeFilename": "README.md",
30
+ "gitHead": "d7addccc1aea361d29d060720a54e34ec6dac499"
31
+ }
@@ -0,0 +1,23 @@
1
+
2
+ [
3
+
4
+ {
5
+ "it" : "does not re-escape already escaped text",
6
+ "input" : "<p>Hello &amp; GoodBye</p>",
7
+ "output" : "<p>Hello &amp; GoodBye</p>"
8
+ },
9
+
10
+ {
11
+ "it" : "removes invalid attributes",
12
+ "input" : "<a ignoreme=\"blah\">Hello GoodBye</a>",
13
+ "output" : "<a>Hello GoodBye</a>"
14
+ },
15
+
16
+ {
17
+ "it" : "removes \"javascript:\" protocol in \"href\" attributes",
18
+ "input" : "<a href=\"javascript:alert()\">hello</a>",
19
+ "output" : "<a>hello</a>"
20
+ }
21
+
22
+
23
+ ]
@@ -0,0 +1,29 @@
1
+
2
+ [
3
+
4
+ {
5
+ "it" : "replaces tabs with 2 spaces",
6
+ "input" : "<p>hello\tagain</p>",
7
+ "output" : "<p>hello again</p>"
8
+ },
9
+
10
+ {
11
+ "it" : "removes \\r",
12
+ "input" : "hi \r\r again",
13
+ "output" : "hi again"
14
+ },
15
+
16
+ {
17
+ "it" : "does not remove \\n",
18
+ "input" : "<p>hello\nagain</p>",
19
+ "output" : "<p>hello\nagain</p>"
20
+ },
21
+
22
+ {
23
+ "it" : "does not remove multiple \\n",
24
+ "input" : "<p>hello\n \nagain</p>",
25
+ "output" : "<p>hello\n \nagain</p>"
26
+ }
27
+
28
+
29
+ ]
@@ -0,0 +1,35 @@
1
+
2
+ require "multi_json"
3
+ require "escape_escape_escape"
4
+
5
+ Dir.glob("specs/as_json/*.json").sort.each { |f|
6
+ contents = MultiJson.load(File.read f)
7
+ method_name = File.basename(f).gsub(/\A\d+-|\.json\Z/, '')
8
+ describe ":#{method_name}" do
9
+ contents.each { |t|
10
+ it t["it"] do
11
+ i = t["input"]
12
+ o = t["output"]
13
+ actual = Escape_Escape_Escape.send(method_name, i)
14
+
15
+ case o
16
+ when String
17
+ actual.should == o
18
+ when Array
19
+ target = o.pop
20
+ begin
21
+ if o[1].is_a?(Array)
22
+ meth = o.shift
23
+ args = o.shift
24
+ actual = actual.send(meth, *args)
25
+ else
26
+ fail "Unknown method: #{o[0].inspect}"
27
+ end
28
+ end while !o.empty?
29
+
30
+ actual.should == target
31
+ end # === case
32
+ end # === it
33
+ }
34
+ end
35
+ }
data/specs/helpers.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ require 'Bacon_Colored'
3
+ require 'escape_escape_escape'
4
+ require 'pry'
@@ -0,0 +1,132 @@
1
+
2
+ var _ = require('underscore')
3
+ , assert = require('assert')
4
+ , E = require('../lib/e_e_e').Sanitize
5
+ ;
6
+
7
+
8
+ describe( 'Sanitize attrs:', function () {
9
+
10
+ // What if the value is null? undefined?
11
+ _.each(E.attr_funcs, function (name) {
12
+
13
+ describe( name, function () {
14
+
15
+ it( 'returns error if value is null', function () {
16
+ assert.equal(E[name](null).constructor, Error);
17
+ });
18
+
19
+ it( 'returns error if value is undefined', function () {
20
+ assert.equal(E[name](undefined).constructor, Error);
21
+ });
22
+
23
+ if ( !_.contains("name href action".split(' '), name ) )
24
+ it( 'adds specified name to error', function () {
25
+ var result = E[name](null, 'my_name').message;
26
+ if ( result.indexOf('my_name: ') !== 0)
27
+ assert.fail(result, 'my_name', 'E.' + name + ' is not adding name to error message.');
28
+ });
29
+
30
+ }); // === end desc
31
+
32
+ }); // end _.each
33
+
34
+ describe( 'string', function () {
35
+ it( 'returns value if string', function () {
36
+ assert.equal(E.string("s"), "s");
37
+ });
38
+
39
+ it( 'returns error if value is number', function () {
40
+ assert.equal(E.string(1).constructor, Error);
41
+ });
42
+ }); // === end desc
43
+
44
+ describe( 'string_in_array', function () {
45
+ it( 'returns value if string in array: [ my_string ]', function () {
46
+ var val = ["This is a string."];
47
+ assert.equal(E.string_in_array(val), val);
48
+ });
49
+ }); // === end desc
50
+
51
+ describe( 'tag', function () {
52
+ it( 'returns value if valid string', function () {
53
+ assert.equal(E.tag("button"), "button");
54
+ });
55
+
56
+ it( 'returns error if string contains invalid chars', function () {
57
+ assert.equal(E.tag("my-tag").message, "tag: invalid characters: \"my-tag\"");
58
+ });
59
+ }); // === end desc
60
+
61
+ describe( 'name', function () {
62
+ it( 'returns value if valid string', function () {
63
+ assert.equal(E.name("some_name"), "some_name");
64
+ });
65
+ }); // === end desc
66
+
67
+ _.each( ['href', 'action', 'uri'] , function (name) {
68
+ describe( 'url: ' + name, function () {
69
+ it( 'returns error if url is not valid', function () {
70
+ assert.equal(E[name]("http://wwwtome<").message, name + ": URI is not strictly valid.: http://wwwtome<");
71
+ });
72
+ }); // === end desc
73
+ });
74
+
75
+ describe( 'uri', function () {
76
+
77
+ it( 'normalizes address', function () {
78
+ var s = "hTTp://wWw.test.com/";
79
+ assert.equal(E.uri(s), s.toLowerCase());
80
+ });
81
+
82
+ it( 'returns an Error if path contains: <', function () {
83
+ var s = "http://www.test.com/<something/";
84
+ assert.equal(E.uri(s).constructor, Error);
85
+ });
86
+
87
+ it( 'returns an Error if path contains HTML entities', function () {
88
+ var s = "http://6&#9;6.000146.0x7.147/";
89
+ assert.equal(E.uri(s).constructor, Error);
90
+ });
91
+
92
+ it( 'returns an Error if path contains HTML entities', function () {
93
+ var s = "http://www.test.com/&nbsp;s/";
94
+ assert.equal(E.uri(s).constructor, Error);
95
+ });
96
+
97
+ it( 'returns an Error if query string contains HTML entities', function () {
98
+ var s = "http://www.test.com/s/test?t&nbsp;test";
99
+ assert.equal(E.uri(s).constructor, Error);
100
+ });
101
+
102
+ }); // === end desc
103
+
104
+ // ****************************************************************
105
+ // ****************** END of Sanitize Attrs ***********************
106
+ // ****************************************************************
107
+
108
+ describe( '.opt(func)', function () {
109
+ it( 'returns a function where null returns null', function () {
110
+ assert.equal(E.opt(E.string)(null), null);
111
+ });
112
+
113
+ it( 'returns a function where undefined returns null', function () {
114
+ assert.equal(E.opt(E.string)(undefined), null);
115
+ });
116
+
117
+ it( 'returns a function that passes false to underlying function', function () {
118
+ assert.equal(E.opt(E.string)(false).constructor, Error);
119
+ });
120
+
121
+ it( 'returns a function that passes any Number to underlying function', function () {
122
+ assert.equal(E.opt(E.string)(1).constructor, Error);
123
+ });
124
+
125
+ it( 'returns a function that passes any String to underlying function', function () {
126
+ assert.equal(E.opt(E.string)("a"), "a");
127
+ });
128
+ }); // === end desc
129
+
130
+ }); // === end desc
131
+
132
+
@@ -0,0 +1,57 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , unhtml = require('unhtml')
5
+ , special = require('special-html')
6
+ , assert = require('assert')
7
+ , Sanitize = require('../lib/e_e_e').Sanitize
8
+ , E = Sanitize.html
9
+ ;
10
+ var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060 \
11
+ &#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060; \
12
+ &#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c \
13
+ &#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c; \
14
+ &#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c \
15
+ &#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c; \
16
+ &#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C; \
17
+ &#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C \
18
+ &#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C; \
19
+ &#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
20
+
21
+
22
+ describe( 'Sanitize', function () {
23
+
24
+ it( 'does not re-escape already escaped text mixed with HTML', function () {
25
+ var h = "<p>Hi</p>";
26
+ var e = _s.escapeHTML(h);
27
+ var o = e + h;
28
+ assert.equal(E(o), _s.escapeHTML(h + h));
29
+ });
30
+
31
+ it( 'escapes special chars: "Hello ©®∆"', function () {
32
+ var s = "Hello & World ©®∆";
33
+ var t = "Hello &amp; World &#169;&#174;&#8710;";
34
+ assert.equal(E(s), t);
35
+ });
36
+
37
+ it( 'escapes all 70 different combos of "<"', function () {
38
+ assert.equal(_.uniq(E(BRACKET.trim()).split(/\s+/)).join(' '), "&lt; %3C");
39
+ });
40
+
41
+ it( 'escapes all keys in nested objects', function () {
42
+ var HTML = "<b>test</b>";
43
+ assert.deepEqual(E({" a >":{" a >": HTML}}), {" a &gt;": {" a &gt;": _s.escapeHTML(HTML)}});
44
+ });
45
+
46
+ it( 'escapes all values in nested objects', function () {
47
+ var HTML = "<b>test</b>";
48
+ assert.deepEqual(E({name:{name: HTML}}), {name: {name: _s.escapeHTML(HTML)}});
49
+ });
50
+
51
+ it( 'escapes all values in nested arrays', function () {
52
+ var HTML = "<b>test</b>";
53
+ assert.deepEqual(E([{name:{name: HTML}}]), [{name: {name: _s.escapeHTML(HTML)}}]);
54
+ });
55
+
56
+ }); // === end desc
57
+
@@ -0,0 +1,41 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , unhtml = require('unhtml')
5
+ , special = require('special-html')
6
+ , assert = require('assert')
7
+ , Sanitize = require('../lib/e_e_e').Sanitize
8
+ , E = Sanitize.html
9
+ , U = Sanitize.un_escape
10
+ ;
11
+ var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060 \
12
+ &#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060; \
13
+ &#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c \
14
+ &#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c; \
15
+ &#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c \
16
+ &#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c; \
17
+ &#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C; \
18
+ &#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C \
19
+ &#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C; \
20
+ &#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
21
+
22
+
23
+ describe( 'Sanitize', function () {
24
+
25
+ it( 'un-escapes escaped text mixed with HTML', function () {
26
+ var s = "<p>Hi&amp;</p>";
27
+ assert.equal(U(s), "<p>Hi&</p>");
28
+ });
29
+
30
+ it( 'un-escapes special chars: "Hello ©®∆"', function () {
31
+ var s = "Hello &amp; World &#169;&#174;&#8710;";
32
+ var t = "Hello & World ©®∆";
33
+ assert.equal(U(s), t);
34
+ });
35
+
36
+ it( 'un-escapes all 70 different combos of "<"', function () {
37
+ assert.equal(_.uniq(U(BRACKET.trim()).split(/\s+/)).join(' '), "< %3C");
38
+ });
39
+
40
+ }); // === end desc
41
+
metadata ADDED
@@ -0,0 +1,163 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: escape_escape_escape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - da99
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sanitize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bacon
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: Bacon_Colored
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: multi_json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.10'
111
+ description: "\n My way of escaping/encoding HTML with the proper entities.\n "
112
+ email:
113
+ - i-hate-spam-1234567@mailinator.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - Gemfile
120
+ - LICENSE
121
+ - LICENSE.txt
122
+ - README.md
123
+ - VERSION
124
+ - escape_escape_escape.gemspec
125
+ - lib/beta.rb
126
+ - lib/e_e_e.js
127
+ - lib/escape_escape_escape.rb
128
+ - package.json
129
+ - specs/as_json/0001-html.json
130
+ - specs/as_json/0010-text.json
131
+ - specs/escape_escape_escape.rb
132
+ - specs/helpers.rb
133
+ - test/sanitize_attrs.js
134
+ - test/sanitize_html.js
135
+ - test/sanitize_un_escape.js
136
+ homepage: https://github.com/da99/escape_escape_escape
137
+ licenses:
138
+ - MIT
139
+ metadata: {}
140
+ post_install_message:
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 2.3.0
157
+ signing_key:
158
+ specification_version: 4
159
+ summary: My way of escaping/encoding HTML.
160
+ test_files:
161
+ - test/sanitize_attrs.js
162
+ - test/sanitize_html.js
163
+ - test/sanitize_un_escape.js