escape_escape_escape 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f8e9caa2b123a6e7258a68b096860d2d81f6ad48
4
+ data.tar.gz: 04cb5b7fb058a88dbc3be52cd933f10b34970c3f
5
+ SHA512:
6
+ metadata.gz: 6181a9de7665728466ac4bbd72375dad6ad632938af5b2c53faaf300717cde58a0cd5ddec57780974608cbdb2a2feb4094fe519cb8e65f7682e9714726e926dc
7
+ data.tar.gz: 87e14c445c08f606959fa353b7d373d093c9658de519890cdfadcebdc6424a54da442a87ecb2a253c91ea099268f71f6143576cd5cef61b3be4ee06de9013b24
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ /pids/
2
+ /logs/
3
+ /results/
4
+ /npm-debug.log/
5
+
6
+ /node_modules/
7
+ /npm-debug.log
8
+
9
+ *.gem
10
+ *.rbc
11
+ .bundle
12
+ .config
13
+ .yardoc
14
+ Gemfile.lock
15
+ InstalledFiles
16
+ _yardoc
17
+ coverage
18
+ doc/
19
+ lib/bundler/man
20
+ pkg
21
+ rdoc
22
+ spec/reports
23
+ test/tmp
24
+ test/version_tmp
25
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+
2
+ Copyright (c) 2013 da99
3
+
4
+ Permission is hereby granted, free of charge, to any person
5
+ obtaining a copy of this software and associated documentation
6
+ files (the "Software"), to deal in the Software without
7
+ restriction, including without limitation the rights to use,
8
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following
11
+ conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23
+ OTHER DEALINGS IN THE SOFTWARE.
data/LICENSE.txt ADDED
@@ -0,0 +1,23 @@
1
+
2
+ Copyright (c) 2014 da99
3
+
4
+ MIT License
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ What is it?
2
+ ====================
3
+
4
+ My way of escaping and sanitizing HTML.
5
+
6
+
7
+ Use:
8
+ =====================
9
+
10
+
11
+ // npm install escape_escape_escape
12
+
13
+ var E = require("escape_escape_escape").Sanitize.html;
14
+ E("The <strong>brave</strong> and the <b>bold</b>.");
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "escape_escape_escape"
7
+ spec.version = `cat VERSION`
8
+ spec.authors = ["da99"]
9
+ spec.email = ["i-hate-spam-1234567@mailinator.com"]
10
+ spec.summary = %q{My way of escaping/encoding HTML.}
11
+ spec.description = %q{
12
+ My way of escaping/encoding HTML with the proper entities.
13
+ }
14
+ spec.homepage = "https://github.com/da99/escape_escape_escape"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |file|
18
+ file.index('bin/') == 0 && file != "bin/#{File.basename Dir.pwd}"
19
+ }
20
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_dependency "sanitize" , "~> 3.0"
25
+
26
+ spec.add_development_dependency "pry" , "~> 0.9"
27
+ spec.add_development_dependency "rake" , "~> 10.3"
28
+ spec.add_development_dependency "bundler" , "~> 1.5"
29
+ spec.add_development_dependency "bacon" , "~> 1.0"
30
+ spec.add_development_dependency "Bacon_Colored" , "~> 0.1"
31
+ spec.add_development_dependency "multi_json" , "~> 1.10"
32
+ end
data/lib/beta.rb ADDED
@@ -0,0 +1,270 @@
1
+
2
+
3
+ # === Important Gems ===
4
+ require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
5
+ require 'htmlentities'
6
+ require 'loofah'
7
+ require "addressable/uri"
8
+ require "escape_utils"
9
+ require "htmlentities"
10
+ require "uri"
11
+
12
+ def Escape_Escape_Escape s
13
+ Escape_Escape_Escape.escape(s)
14
+ end
15
+
16
+ class Escape_Escape_Escape
17
+
18
+ Coder = HTMLEntities.new(:xhtml1)
19
+
20
+ ENCODING_OPTIONS_CLEAN_UTF8 = {
21
+ :invalid => :replace, # Replace invalid byte sequences
22
+ :undef => :replace, # Replace anything not defined in ASCII
23
+ :replace => '' # Use a blank for those replacements
24
+ # :newline => :universal
25
+ # :universal_newline => true # Always break lines with \n, not \r\n
26
+ }
27
+
28
+ opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
29
+
30
+ # tabs, etc.
31
+ Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
32
+ # From:
33
+ # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
34
+ White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
35
+
36
+ REPEATING_DOTS = /\.{1,}/
37
+ INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
38
+
39
+
40
+ # HTML_ESCAPE_TABLE is used after text is escaped to
41
+ # further escape text more. This is why th semi-colon (&#59;) was left out
42
+ # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
43
+ # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
44
+ # or go to: http://www.mountaindragon.com/html/iso.htm
45
+ HTML_ESCAPE_TABLE = {
46
+
47
+ '&laquo;' => "&lt;",
48
+ '&raquo;' => "&gt;",
49
+
50
+ "&lsquo;" => "&apos;",
51
+ "&rsquo;" => "&apos;",
52
+ "&sbquo;" => "&apos;",
53
+
54
+ "&lsquo;" => "&apos;",
55
+ "&rsquo;" => "&apos;",
56
+
57
+ "&ldquo;" => "&quot;",
58
+ "&rdquo;" => "&quot;",
59
+ "&bdquo;" => "&quot;",
60
+
61
+ "&lsaquo;" => "&lt;",
62
+ "&rsaquo;" => "&gt;",
63
+
64
+ "&acute;" => "&apos;",
65
+ "&uml;" => "&quot;",
66
+
67
+ '\\' => "&#92;",
68
+ # '/' => "&#47;",
69
+ # '%' => "&#37;",
70
+ # ':' => '&#58;',
71
+ # '=' => '&#61;',
72
+ # '?' => '&#63;',
73
+ # '@' => '&#64;',
74
+ "\`" => '&apos;',
75
+ '‘' => "&apos;",
76
+ '’' => "&apos;",
77
+ '“' => '&quot;',
78
+ '”' => '&quot;',
79
+ # "$" => "&#36;",
80
+ # '#' => '&#35;', # Don't use this or else it will ruin all other entities.
81
+ # '&' => # Don't use this " " " " " "
82
+ # ';' => # Don't use this " " " " " "
83
+ '|' => '&brvbar;',
84
+ '~' => '&sim;'
85
+ # '!' => '&#33;',
86
+ # '*' => '&lowast;', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
87
+ # '{' => '&#123;',
88
+ # '}' => '&#125;',
89
+ # '(' => '&#40;',
90
+ # ')' => '&#41;',
91
+ # "\n" => '<br />'
92
+ }
93
+
94
+ def new_regexp str
95
+ Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
96
+ end
97
+
98
+ class << self # ======================================================
99
+
100
+ # From:
101
+ # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
102
+ #
103
+ # Test:
104
+ # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
105
+ # inject('', :<<)
106
+ #
107
+ def clean_utf8 s
108
+ s.
109
+ encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
110
+ gsub(self::Control, "\n").
111
+ gsub(self::White_Space, " ")
112
+ end
113
+
114
+ def un_escape raw
115
+ EscapeUtils.unescape_html clean_utf8(raw)
116
+ end
117
+
118
+ def uri str
119
+ uri = Addressable::URI.parse(str)
120
+ if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
121
+ str
122
+ else
123
+ nil
124
+ end
125
+ rescue Addressable::URI::InvalidURIError
126
+ fail "Invalid: address: #{str.inspect}"
127
+ end
128
+
129
+ def escape o
130
+ case o
131
+ when String
132
+ Coder.encode(un_escape(o), :named, :hexadecimal)
133
+ else
134
+ fail "Unknown type: #{o.inspect}"
135
+ end
136
+ end # === def
137
+
138
+
139
+ # ===============================================
140
+ # Raises: TZInfo::InvalidTimezoneIdentifier.
141
+ # ===============================================
142
+ def validate_timezone(timezone)
143
+ TZInfo::Timezone.get( timezone.to_s.strip ).identifier
144
+ end
145
+
146
+ # =========================================================
147
+ # Takes out any periods and back slashes in a String.
148
+ # Single periods surround text are allowed on the last substring
149
+ # past the last slash because they are assumed to be filenames
150
+ # with extensions.
151
+ # =========================================================
152
+ def path( raw_path )
153
+ clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
154
+ File.join( *clean_crumbs )
155
+ end
156
+
157
+ # ====================================================================
158
+ # Returns a String where all characters except:
159
+ # letters numbers underscores dashes
160
+ # are replaced with a dash.
161
+ # It also delets any non-alphanumeric characters at the end
162
+ # of the String.
163
+ # ====================================================================
164
+ def filename( raw_filename )
165
+ plaintext( raw_filename ).
166
+ downcase.
167
+ gsub(REPEATING_DOTS, '.').
168
+ gsub(INVALID_FILE_NAME_CHARS, '-').
169
+ to_s
170
+ end
171
+
172
+ # ===============================================
173
+ # This method is not meant to be called directly. Instead, call
174
+ # <Wash.parse_tags>.
175
+ # Returns: String with
176
+ # * all spaces and underscores turned into dashes.
177
+ # * all non-alphanumeric characters, underscores, dashes, and periods
178
+ # turned into dashes.
179
+ # * non-alphanumeric characters at the beginning and end stripped out.
180
+ # ===============================================
181
+ def tag( raw_tag )
182
+ # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
183
+ raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
184
+ end
185
+
186
+
187
+ # ===============================================
188
+ # A better alternative than "Rack::Utils.escape_html". Escapes
189
+ # various characters (including '&', '<', '>', and both quotation mark types)
190
+ # to HTML decimal entities. Also escapes the characters from
191
+ # SWISS::HTML_ESCAPE_TABLE.
192
+ #
193
+ # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
194
+ # Therefore, all text is run through <Wash.plaintext> before encoding.
195
+ # ===============================================
196
+ def html( raw_text )
197
+
198
+ # Turn string into UTF8. (This also takes out control characters
199
+ # which is good or else they too will be escaped into HTML too.
200
+ # Strip it after conversion.
201
+ # return Dryopteris.sanitize(utf8_text)
202
+ # Now encode it.
203
+ normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
204
+
205
+ sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
206
+ end # === def html
207
+
208
+
209
+ # ===============================================
210
+ # Returns: A string that is:
211
+ # * normalized to :KC
212
+ # * "\r\n" changed to "\n"
213
+ # * all control characters stripped except for "\n"
214
+ # and end.
215
+ # Options:
216
+ # :tabs
217
+ # :spaces
218
+ #
219
+ # ===============================================
220
+ def plaintext( raw_str, *opts)
221
+
222
+ # Check options.
223
+ @plaintext_allowed_options ||= [ :spaces, :tabs ]
224
+ invalid_opts = opts - @plaintext_allowed_options
225
+ raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
226
+
227
+ # Save tabs if requested.
228
+ raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
229
+
230
+ # First: Normalize characters.
231
+ # Second: Strip out control characters.
232
+ # Note: Must be normalized first, then strip.
233
+ # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
234
+ final_str = raw_str.
235
+ split("\n").
236
+ map { |line|
237
+ # Don't use "\x20" because that is the space character.
238
+ line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
239
+ }.
240
+ join("\n")
241
+
242
+ # Save whitespace or strip.
243
+ if !opts.include?(:spaces)
244
+ final_str = final_str.strip
245
+ end
246
+
247
+ # Normalize quotations and other characters through HTML entity encoding/decoding.
248
+ final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
249
+
250
+ # Put back tabs by request.
251
+ if opts.include?(:tabs)
252
+ final_str = final_str.gsub("&#09;", "\t")
253
+ end
254
+
255
+ final_str
256
+ end # self.plaintext
257
+
258
+ # Encode a few other symbols.
259
+ # This also normalizes certain quotation and apostrophe HTML entities.
260
+ def normalize_encoded_string s
261
+ HTML_ESCAPE_TABLE.inject(s) do |m, kv|
262
+ m.gsub( kv.first, kv.last)
263
+ end
264
+ end
265
+
266
+ end # === class self ===
267
+
268
+ end # === class Escape_Escape_Escape ===
269
+
270
+
data/lib/e_e_e.js ADDED
@@ -0,0 +1,258 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , special = require('special-html')
5
+ , HTML_E = require('entities')
6
+ , URI_js = require('uri-js')
7
+ ;
8
+
9
+ var NL = "\n";
10
+ var SPACES = /\ +/g;
11
+ var VALID_HTML_ID = /^[0-9a-zA-Z_]+$/;
12
+ var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
13
+ var IS_ERROR = function (o) { return (_.isObject(o) && o.constructor == Error); };
14
+ var funcs_scope = this;
15
+ var INSPECT = function (v) { return JSON.stringify(v); };
16
+
17
+ var E = exports.Sanitize = {};
18
+
19
+ // ****************************************************************
20
+ // ****************** Sanitize Tag Attributes and content *********
21
+ // ****************************************************************
22
+
23
+
24
+ E.name = function (v) { return E.id(v , "name"); };
25
+ E.href = function (v) { return E.uri(v , "href"); }
26
+ E.action = function (v) { return E.uri(v , 'action'); };
27
+
28
+ E.string = function (raw, name) {
29
+ name = (name) ? (name + ': ') : '';
30
+
31
+ if (_.isString(raw))
32
+ return (raw.trim());
33
+
34
+ return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
35
+ };
36
+
37
+ E.uri = function (raw, name) {
38
+ name = (name) ? name : 'uri';
39
+
40
+ var val = E.string(raw, name);
41
+ if (E.is_error(val))
42
+ return val;
43
+
44
+ var url = HTML_E.decode(val, 2);
45
+ var parse = URI_js.parse(url);
46
+ if (parse.errors.length)
47
+ return new Error(name + ": " + parse.errors[0] + ': ' + val);
48
+
49
+ return URI_js.normalize(url);
50
+ };
51
+
52
+ E.tag = function (raw, name) {
53
+ name = (name) ? name : "tag";
54
+
55
+ var val = E.string(raw, name);
56
+ if (E.is_error(val))
57
+ return val;
58
+
59
+ if (!val.match(VALID_HTML_TAG))
60
+ return new Error(name + ": invalid characters: " + JSON.stringify(val));
61
+
62
+ return val;
63
+ };
64
+
65
+ E.id = function (raw_val, name) {
66
+ name = (name) ? name : "id";
67
+
68
+ var val = E.string(raw_val, name);
69
+ if (val.message)
70
+ return val;
71
+
72
+ if (!val.match(VALID_HTML_ID))
73
+ return new Error(name + ": invalid characters: " + JSON.stringify(val));
74
+
75
+ return val;
76
+ };
77
+
78
+ E.num_of_lines = function (raw_val, name) {
79
+ name = (name) ? name : 'num_of_lines';
80
+
81
+ if (!_.isNumber(raw_val) || _.isNaN(raw_val))
82
+ return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
83
+
84
+ if (raw_val < 1 || raw_val > 250)
85
+ return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
86
+
87
+ return raw_val;
88
+ };
89
+
90
+ E.string_in_array = function (unk, name) {
91
+ name = (name) ? name : 'string_in_array';
92
+ if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
93
+ return unk;
94
+ return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
95
+ };
96
+
97
+ var temp = null;
98
+ E.attr_funcs = [];
99
+ for (temp in E) {
100
+ if (_.isFunction(E[temp])) {
101
+ E.attr_funcs.push(temp);
102
+ }
103
+ }
104
+
105
+ E.opt = function (func, name) {
106
+ return function (v) {
107
+ if (v === undefined || v === null)
108
+ return null;
109
+ return func(v, name);
110
+ };
111
+ };
112
+
113
+ E.is = function (func) {
114
+ return function (v) {
115
+ var result = func(v);
116
+ if (result && result.message)
117
+ return false;
118
+ return !!result;
119
+ };
120
+ };
121
+
122
+ _.each(E.attr_funcs, function (name, i) {
123
+ E["opt_" + name] = E.opt(E[name], name);
124
+ E["is_" + name] = E.is(E[name]);
125
+ });
126
+
127
+ // ****************************************************************
128
+ // ****************** End of Sanitize Attr Checkers ***************
129
+ // ****************************************************************
130
+
131
+ E.is_error = function (obj) {
132
+ if (!_.isObject(obj))
133
+ return false;
134
+ return obj.constructor === Error;
135
+ };
136
+
137
+ E.html = function (str) {
138
+ if (_.isArray(str)) {
139
+ return _.map(str, function (v, i) {
140
+ return E.html(v);
141
+ });
142
+ }
143
+
144
+ if (_.isObject(str)) {
145
+ var new_o = {};
146
+ _.each(str, function (v, k) {
147
+ new_o[E.html(k)] = E.html(v);
148
+ });
149
+ return new_o;
150
+ }
151
+
152
+ if (!_.isString(str))
153
+ return str;
154
+
155
+ return special( _s.escapeHTML( E.un_escape(str) ) );
156
+ };
157
+
158
+ E.un_escape = function (str) {
159
+ return _s.unescapeHTML( HTML_E.decode( str , 2) )
160
+ };
161
+
162
+ E.attr = function (k, v, tag) {
163
+ if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
164
+ return new Error("Invalid chars in " + tag + " " + k + ": " + v);
165
+
166
+ if (!k.match(VALID_HTML_ID))
167
+ return new Error("Invalid chars in " + tag + " attribute name: " + k);
168
+
169
+ var safe_name = Ok.escape(k).trim();
170
+
171
+ if (_.contains(['href', 'action'], k)) {
172
+ var safe_val = Ok.escape_uri(v);
173
+ if (!safe_val)
174
+ return new Error('Invalid link address: ' + v);
175
+ } else {
176
+ var safe_val = Ok.escape(v);
177
+ }
178
+
179
+ return [safe_name, safe_val];
180
+ };
181
+
182
+ E.attrs = function (raw_attrs, tag) {
183
+ var sanitized = {};
184
+ var err = null;
185
+
186
+ _.find(raw_attrs, function (v, k) {
187
+
188
+ var pair = Ok.escape_attr(k, v, tag);
189
+
190
+ if (IS_ERROR(pair)) {
191
+ err = pair;
192
+ return pair;
193
+ }
194
+
195
+ sanitized[pair[0]] = pair[1];
196
+
197
+ });
198
+
199
+ if (err)
200
+ return err;
201
+
202
+ return sanitized;
203
+ };
204
+
205
+
206
+ E.to_func_calls = function (arr) {
207
+ var next = null;
208
+ var final = [];
209
+ var line = null;
210
+
211
+ while (arr.length) {
212
+ line = [arr.shift()];
213
+ while(arr.length && !_.isString(arr[0])) {
214
+ line.push(arr.shift());
215
+ }
216
+ final.push(line);
217
+ }
218
+
219
+ return final;
220
+ };
221
+
222
+ E.to_applet_func_calls = function (arr) {
223
+ var next = null;
224
+ var final = [];
225
+
226
+ while (arr.length) {
227
+ var name = arr.shift();
228
+ if (!_.isString(name))
229
+ return new Error("No function specfied for arg: " + JSON.stringify(name));
230
+ name = name.trim();
231
+ var attrs = null;
232
+ var body = null;
233
+
234
+ next = arr[0];
235
+
236
+ if (!_.isString(next)) {
237
+ if (_.isObject(next) && !_.isArray(next))
238
+ attrs = arr.shift();
239
+ next = arr[0];
240
+ if (_.isArray(next))
241
+ body = arr.shift();
242
+ next = arr[0];
243
+ if (next && _.isArray(next))
244
+ return new Error(name + ": extra array argument: " + JSON.stringify(next));
245
+ if (next && _.isObject(next))
246
+ return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
247
+ if (next && !_.isString(next))
248
+ return new Error(name + ": invalid argument: " + JSON.stringify(next));
249
+ }
250
+
251
+ final.push([name, attrs, body]);
252
+ }
253
+
254
+ return final;
255
+ };
256
+
257
+
258
+
@@ -0,0 +1,60 @@
1
+
2
+
3
+ require "sanitize"
4
+
5
+ def Escape_Escape_Escape s
6
+ Escape_Escape_Escape.html(s)
7
+ end
8
+
9
+ class Escape_Escape_Escape
10
+
11
+ REPEATING_DOTS = /\.{1,}\//
12
+ INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
13
+ UN_PRINT_ABLE = /[^[:print:]\n]/
14
+ CR = "\r"
15
+ TABS = "\t"
16
+ CONTROL_CHARS = /[[:cntrl:]\x00-\x1f]/ # Don't use "\x20" because that is the space character.
17
+ WHITE_SPACE = /[[:space:]]&&[^\n]/ # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
18
+
19
+ ENCODING_OPTIONS_CLEAN_UTF8 = {
20
+ :invalid => :replace, # Replace invalid byte sequences
21
+ :undef => :replace, # Replace anything not defined in ASCII
22
+ :replace => '' # Use a blank for those replacements
23
+ # :newline => :universal
24
+ # :universal_newline => true # Always break lines with \n, not \r\n
25
+ }
26
+
27
+
28
+
29
+ class << self # ======================================================
30
+
31
+ # From:
32
+ # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
33
+ #
34
+ # Test:
35
+ # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
36
+ # inject('', :<<)
37
+ #
38
+ def clean_utf8 s
39
+ s.
40
+ encode(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8).
41
+ gsub(TABS , " ").
42
+ gsub(CR , "").
43
+ gsub(UN_PRINT_ABLE , '').
44
+ gsub(CONTROL_CHARS , "\n" ).
45
+ gsub(WHITE_SPACE , " ")
46
+ end
47
+
48
+ def text s
49
+ clean_utf8 s
50
+ end
51
+
52
+ def html s
53
+ Sanitize.fragment( clean_utf8(s), Sanitize::Config::RELAXED )
54
+ end
55
+
56
+ end # === class self ===
57
+
58
+ end # === class Escape_Escape_Escape ===
59
+
60
+
data/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "escape_escape_escape",
3
+ "version": "0.0.5",
4
+ "description": "My way of escaping HTML.",
5
+ "main": "lib/e_e_e.js",
6
+ "directories": {
7
+ "test": "test"
8
+ },
9
+ "scripts": {
10
+ "test": "mocha"
11
+ },
12
+ "repository": {
13
+ "type": "git",
14
+ "url": "git://github.com/da99/escape_escape_escape.git"
15
+ },
16
+ "keywords": [
17
+ "da99"
18
+ ],
19
+ "dependencies": {
20
+ "underscore": "1.x.x",
21
+ "unhtml": "x.x.x",
22
+ "special-html": "x.x.x",
23
+ "underscore.string": "x.x.x",
24
+ "entities": "x.x.x",
25
+ "uri-js": "x.x.x"
26
+ },
27
+ "author": "da99",
28
+ "license": "MIT",
29
+ "readmeFilename": "README.md",
30
+ "gitHead": "d7addccc1aea361d29d060720a54e34ec6dac499"
31
+ }
@@ -0,0 +1,23 @@
1
+
2
+ [
3
+
4
+ {
5
+ "it" : "does not re-escape already escaped text",
6
+ "input" : "<p>Hello &amp; GoodBye</p>",
7
+ "output" : "<p>Hello &amp; GoodBye</p>"
8
+ },
9
+
10
+ {
11
+ "it" : "removes invalid attributes",
12
+ "input" : "<a ignoreme=\"blah\">Hello GoodBye</a>",
13
+ "output" : "<a>Hello GoodBye</a>"
14
+ },
15
+
16
+ {
17
+ "it" : "removes \"javascript:\" protocol in \"href\" attributes",
18
+ "input" : "<a href=\"javascript:alert()\">hello</a>",
19
+ "output" : "<a>hello</a>"
20
+ }
21
+
22
+
23
+ ]
@@ -0,0 +1,29 @@
1
+
2
+ [
3
+
4
+ {
5
+ "it" : "replaces tabs with 2 spaces",
6
+ "input" : "<p>hello\tagain</p>",
7
+ "output" : "<p>hello again</p>"
8
+ },
9
+
10
+ {
11
+ "it" : "removes \\r",
12
+ "input" : "hi \r\r again",
13
+ "output" : "hi again"
14
+ },
15
+
16
+ {
17
+ "it" : "does not remove \\n",
18
+ "input" : "<p>hello\nagain</p>",
19
+ "output" : "<p>hello\nagain</p>"
20
+ },
21
+
22
+ {
23
+ "it" : "does not remove multiple \\n",
24
+ "input" : "<p>hello\n \nagain</p>",
25
+ "output" : "<p>hello\n \nagain</p>"
26
+ }
27
+
28
+
29
+ ]
@@ -0,0 +1,35 @@
1
+
2
+ require "multi_json"
3
+ require "escape_escape_escape"
4
+
5
+ Dir.glob("specs/as_json/*.json").sort.each { |f|
6
+ contents = MultiJson.load(File.read f)
7
+ method_name = File.basename(f).gsub(/\A\d+-|\.json\Z/, '')
8
+ describe ":#{method_name}" do
9
+ contents.each { |t|
10
+ it t["it"] do
11
+ i = t["input"]
12
+ o = t["output"]
13
+ actual = Escape_Escape_Escape.send(method_name, i)
14
+
15
+ case o
16
+ when String
17
+ actual.should == o
18
+ when Array
19
+ target = o.pop
20
+ begin
21
+ if o[1].is_a?(Array)
22
+ meth = o.shift
23
+ args = o.shift
24
+ actual = actual.send(meth, *args)
25
+ else
26
+ fail "Unknown method: #{o[0].inspect}"
27
+ end
28
+ end while !o.empty?
29
+
30
+ actual.should == target
31
+ end # === case
32
+ end # === it
33
+ }
34
+ end
35
+ }
data/specs/helpers.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ require 'Bacon_Colored'
3
+ require 'escape_escape_escape'
4
+ require 'pry'
@@ -0,0 +1,132 @@
1
+
2
+ var _ = require('underscore')
3
+ , assert = require('assert')
4
+ , E = require('../lib/e_e_e').Sanitize
5
+ ;
6
+
7
+
8
+ describe( 'Sanitize attrs:', function () {
9
+
10
+ // What if the value is null? undefined?
11
+ _.each(E.attr_funcs, function (name) {
12
+
13
+ describe( name, function () {
14
+
15
+ it( 'returns error if value is null', function () {
16
+ assert.equal(E[name](null).constructor, Error);
17
+ });
18
+
19
+ it( 'returns error if value is undefined', function () {
20
+ assert.equal(E[name](undefined).constructor, Error);
21
+ });
22
+
23
+ if ( !_.contains("name href action".split(' '), name ) )
24
+ it( 'adds specified name to error', function () {
25
+ var result = E[name](null, 'my_name').message;
26
+ if ( result.indexOf('my_name: ') !== 0)
27
+ assert.fail(result, 'my_name', 'E.' + name + ' is not adding name to error message.');
28
+ });
29
+
30
+ }); // === end desc
31
+
32
+ }); // end _.each
33
+
34
+ describe( 'string', function () {
35
+ it( 'returns value if string', function () {
36
+ assert.equal(E.string("s"), "s");
37
+ });
38
+
39
+ it( 'returns error if value is number', function () {
40
+ assert.equal(E.string(1).constructor, Error);
41
+ });
42
+ }); // === end desc
43
+
44
+ describe( 'string_in_array', function () {
45
+ it( 'returns value if string in array: [ my_string ]', function () {
46
+ var val = ["This is a string."];
47
+ assert.equal(E.string_in_array(val), val);
48
+ });
49
+ }); // === end desc
50
+
51
+ describe( 'tag', function () {
52
+ it( 'returns value if valid string', function () {
53
+ assert.equal(E.tag("button"), "button");
54
+ });
55
+
56
+ it( 'returns error if string contains invalid chars', function () {
57
+ assert.equal(E.tag("my-tag").message, "tag: invalid characters: \"my-tag\"");
58
+ });
59
+ }); // === end desc
60
+
61
+ describe( 'name', function () {
62
+ it( 'returns value if valid string', function () {
63
+ assert.equal(E.name("some_name"), "some_name");
64
+ });
65
+ }); // === end desc
66
+
67
+ _.each( ['href', 'action', 'uri'] , function (name) {
68
+ describe( 'url: ' + name, function () {
69
+ it( 'returns error if url is not valid', function () {
70
+ assert.equal(E[name]("http://wwwtome<").message, name + ": URI is not strictly valid.: http://wwwtome<");
71
+ });
72
+ }); // === end desc
73
+ });
74
+
75
+ describe( 'uri', function () {
76
+
77
+ it( 'normalizes address', function () {
78
+ var s = "hTTp://wWw.test.com/";
79
+ assert.equal(E.uri(s), s.toLowerCase());
80
+ });
81
+
82
+ it( 'returns an Error if path contains: <', function () {
83
+ var s = "http://www.test.com/<something/";
84
+ assert.equal(E.uri(s).constructor, Error);
85
+ });
86
+
87
+ it( 'returns an Error if path contains HTML entities', function () {
88
+ var s = "http://6&#9;6.000146.0x7.147/";
89
+ assert.equal(E.uri(s).constructor, Error);
90
+ });
91
+
92
+ it( 'returns an Error if path contains HTML entities', function () {
93
+ var s = "http://www.test.com/&nbsp;s/";
94
+ assert.equal(E.uri(s).constructor, Error);
95
+ });
96
+
97
+ it( 'returns an Error if query string contains HTML entities', function () {
98
+ var s = "http://www.test.com/s/test?t&nbsp;test";
99
+ assert.equal(E.uri(s).constructor, Error);
100
+ });
101
+
102
+ }); // === end desc
103
+
104
+ // ****************************************************************
105
+ // ****************** END of Sanitize Attrs ***********************
106
+ // ****************************************************************
107
+
108
+ describe( '.opt(func)', function () {
109
+ it( 'returns a function where null returns null', function () {
110
+ assert.equal(E.opt(E.string)(null), null);
111
+ });
112
+
113
+ it( 'returns a function where undefined returns null', function () {
114
+ assert.equal(E.opt(E.string)(undefined), null);
115
+ });
116
+
117
+ it( 'returns a function that passes false to underlying function', function () {
118
+ assert.equal(E.opt(E.string)(false).constructor, Error);
119
+ });
120
+
121
+ it( 'returns a function that passes any Number to underlying function', function () {
122
+ assert.equal(E.opt(E.string)(1).constructor, Error);
123
+ });
124
+
125
+ it( 'returns a function that passes any String to underlying function', function () {
126
+ assert.equal(E.opt(E.string)("a"), "a");
127
+ });
128
+ }); // === end desc
129
+
130
+ }); // === end desc
131
+
132
+
@@ -0,0 +1,57 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , unhtml = require('unhtml')
5
+ , special = require('special-html')
6
+ , assert = require('assert')
7
+ , Sanitize = require('../lib/e_e_e').Sanitize
8
+ , E = Sanitize.html
9
+ ;
10
+ var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060 \
11
+ &#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060; \
12
+ &#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c \
13
+ &#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c; \
14
+ &#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c \
15
+ &#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c; \
16
+ &#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C; \
17
+ &#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C \
18
+ &#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C; \
19
+ &#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
20
+
21
+
22
+ describe( 'Sanitize', function () {
23
+
24
+ it( 'does not re-escape already escaped text mixed with HTML', function () {
25
+ var h = "<p>Hi</p>";
26
+ var e = _s.escapeHTML(h);
27
+ var o = e + h;
28
+ assert.equal(E(o), _s.escapeHTML(h + h));
29
+ });
30
+
31
+ it( 'escapes special chars: "Hello ©®∆"', function () {
32
+ var s = "Hello & World ©®∆";
33
+ var t = "Hello &amp; World &#169;&#174;&#8710;";
34
+ assert.equal(E(s), t);
35
+ });
36
+
37
+ it( 'escapes all 70 different combos of "<"', function () {
38
+ assert.equal(_.uniq(E(BRACKET.trim()).split(/\s+/)).join(' '), "&lt; %3C");
39
+ });
40
+
41
+ it( 'escapes all keys in nested objects', function () {
42
+ var HTML = "<b>test</b>";
43
+ assert.deepEqual(E({" a >":{" a >": HTML}}), {" a &gt;": {" a &gt;": _s.escapeHTML(HTML)}});
44
+ });
45
+
46
+ it( 'escapes all values in nested objects', function () {
47
+ var HTML = "<b>test</b>";
48
+ assert.deepEqual(E({name:{name: HTML}}), {name: {name: _s.escapeHTML(HTML)}});
49
+ });
50
+
51
+ it( 'escapes all values in nested arrays', function () {
52
+ var HTML = "<b>test</b>";
53
+ assert.deepEqual(E([{name:{name: HTML}}]), [{name: {name: _s.escapeHTML(HTML)}}]);
54
+ });
55
+
56
+ }); // === end desc
57
+
@@ -0,0 +1,41 @@
1
+
2
+ var _ = require('underscore')
3
+ , _s = require('underscore.string')
4
+ , unhtml = require('unhtml')
5
+ , special = require('special-html')
6
+ , assert = require('assert')
7
+ , Sanitize = require('../lib/e_e_e').Sanitize
8
+ , E = Sanitize.html
9
+ , U = Sanitize.un_escape
10
+ ;
11
+ var BRACKET = " < %3C &lt &lt; &LT &LT; &#60 &#060 &#0060 \
12
+ &#00060 &#000060 &#0000060 &#60; &#060; &#0060; &#00060; \
13
+ &#000060; &#0000060; &#x3c &#x03c &#x003c &#x0003c &#x00003c \
14
+ &#x000003c &#x3c; &#x03c; &#x003c; &#x0003c; &#x00003c; \
15
+ &#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c \
16
+ &#X3c; &#X03c; &#X003c; &#X0003c; &#X00003c; &#X000003c; \
17
+ &#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x3C; &#x03C; \
18
+ &#x003C; &#x0003C; &#x00003C; &#x000003C; &#X3C &#X03C \
19
+ &#X003C &#X0003C &#X00003C &#X000003C &#X3C; &#X03C; &#X003C; &#X0003C; \
20
+ &#X00003C; &#X000003C; \x3c \x3C \u003c \u003C ";
21
+
22
+
23
+ describe( 'Sanitize', function () {
24
+
25
+ it( 'un-escapes escaped text mixed with HTML', function () {
26
+ var s = "<p>Hi&amp;</p>";
27
+ assert.equal(U(s), "<p>Hi&</p>");
28
+ });
29
+
30
+ it( 'un-escapes special chars: "Hello ©®∆"', function () {
31
+ var s = "Hello &amp; World &#169;&#174;&#8710;";
32
+ var t = "Hello & World ©®∆";
33
+ assert.equal(U(s), t);
34
+ });
35
+
36
+ it( 'un-escapes all 70 different combos of "<"', function () {
37
+ assert.equal(_.uniq(U(BRACKET.trim()).split(/\s+/)).join(' '), "< %3C");
38
+ });
39
+
40
+ }); // === end desc
41
+
metadata ADDED
@@ -0,0 +1,163 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: escape_escape_escape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - da99
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sanitize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bacon
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: Bacon_Colored
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: multi_json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.10'
111
+ description: "\n My way of escaping/encoding HTML with the proper entities.\n "
112
+ email:
113
+ - i-hate-spam-1234567@mailinator.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - Gemfile
120
+ - LICENSE
121
+ - LICENSE.txt
122
+ - README.md
123
+ - VERSION
124
+ - escape_escape_escape.gemspec
125
+ - lib/beta.rb
126
+ - lib/e_e_e.js
127
+ - lib/escape_escape_escape.rb
128
+ - package.json
129
+ - specs/as_json/0001-html.json
130
+ - specs/as_json/0010-text.json
131
+ - specs/escape_escape_escape.rb
132
+ - specs/helpers.rb
133
+ - test/sanitize_attrs.js
134
+ - test/sanitize_html.js
135
+ - test/sanitize_un_escape.js
136
+ homepage: https://github.com/da99/escape_escape_escape
137
+ licenses:
138
+ - MIT
139
+ metadata: {}
140
+ post_install_message:
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 2.3.0
157
+ signing_key:
158
+ specification_version: 4
159
+ summary: My way of escaping/encoding HTML.
160
+ test_files:
161
+ - test/sanitize_attrs.js
162
+ - test/sanitize_html.js
163
+ - test/sanitize_un_escape.js