escape_escape_escape 0.3.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +0,0 @@
1
-
2
- Copyright (c) 2014 da99
3
-
4
- MIT License
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining
7
- a copy of this software and associated documentation files (the
8
- "Software"), to deal in the Software without restriction, including
9
- without limitation the rights to use, copy, modify, merge, publish,
10
- distribute, sublicense, and/or sell copies of the Software, and to
11
- permit persons to whom the Software is furnished to do so, subject to
12
- the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be
15
- included in all copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,270 +0,0 @@
1
-
2
-
3
- # === Important Gems ===
4
- require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
5
- require 'htmlentities'
6
- require 'loofah'
7
- require "addressable/uri"
8
- require "escape_utils"
9
- require "htmlentities"
10
- require "uri"
11
-
12
- def Escape_Escape_Escape s
13
- Escape_Escape_Escape.escape(s)
14
- end
15
-
16
- class Escape_Escape_Escape
17
-
18
- Coder = HTMLEntities.new(:xhtml1)
19
-
20
- ENCODING_OPTIONS_CLEAN_UTF8 = {
21
- :invalid => :replace, # Replace invalid byte sequences
22
- :undef => :replace, # Replace anything not defined in ASCII
23
- :replace => '' # Use a blank for those replacements
24
- # :newline => :universal
25
- # :universal_newline => true # Always break lines with \n, not \r\n
26
- }
27
-
28
- opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
29
-
30
- # tabs, etc.
31
- Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
32
- # From:
33
- # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
34
- White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
35
-
36
- REPEATING_DOTS = /\.{1,}/
37
- INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
38
-
39
-
40
- # HTML_ESCAPE_TABLE is used after text is escaped to
41
- # further escape text more. This is why th semi-colon (&#59;) was left out
42
- # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
43
- # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
44
- # or go to: http://www.mountaindragon.com/html/iso.htm
45
- HTML_ESCAPE_TABLE = {
46
-
47
- '«' => "<",
48
- '»' => ">",
49
-
50
- "‘" => "'",
51
- "’" => "'",
52
- "‚" => "'",
53
-
54
- "‘" => "'",
55
- "’" => "'",
56
-
57
- "“" => """,
58
- "”" => """,
59
- "„" => """,
60
-
61
- "‹" => "<",
62
- "›" => ">",
63
-
64
- "´" => "'",
65
- "¨" => """,
66
-
67
- '\\' => "\",
68
- # '/' => "/",
69
- # '%' => "%",
70
- # ':' => ':',
71
- # '=' => '=',
72
- # '?' => '?',
73
- # '@' => '@',
74
- "\`" => ''',
75
- '‘' => "'",
76
- '’' => "'",
77
- '“' => '"',
78
- '”' => '"',
79
- # "$" => "$",
80
- # '#' => '#', # Don't use this or else it will ruin all other entities.
81
- # '&' => # Don't use this " " " " " "
82
- # ';' => # Don't use this " " " " " "
83
- '|' => '¦',
84
- '~' => '∼'
85
- # '!' => '!',
86
- # '*' => '∗', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
87
- # '{' => '{',
88
- # '}' => '}',
89
- # '(' => '(',
90
- # ')' => ')',
91
- # "\n" => '<br />'
92
- }
93
-
94
- def new_regexp str
95
- Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
96
- end
97
-
98
- class << self # ======================================================
99
-
100
- # From:
101
- # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
102
- #
103
- # Test:
104
- # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
105
- # inject('', :<<)
106
- #
107
- def clean_utf8 s
108
- s.
109
- encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
110
- gsub(self::Control, "\n").
111
- gsub(self::White_Space, " ")
112
- end
113
-
114
- def un_escape raw
115
- EscapeUtils.unescape_html clean_utf8(raw)
116
- end
117
-
118
- def uri str
119
- uri = Addressable::URI.parse(str)
120
- if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
121
- str
122
- else
123
- nil
124
- end
125
- rescue Addressable::URI::InvalidURIError
126
- fail "Invalid: address: #{str.inspect}"
127
- end
128
-
129
- def escape o
130
- case o
131
- when String
132
- Coder.encode(un_escape(o), :named, :hexadecimal)
133
- else
134
- fail "Unknown type: #{o.inspect}"
135
- end
136
- end # === def
137
-
138
-
139
- # ===============================================
140
- # Raises: TZInfo::InvalidTimezoneIdentifier.
141
- # ===============================================
142
- def validate_timezone(timezone)
143
- TZInfo::Timezone.get( timezone.to_s.strip ).identifier
144
- end
145
-
146
- # =========================================================
147
- # Takes out any periods and back slashes in a String.
148
- # Single periods surround text are allowed on the last substring
149
- # past the last slash because they are assumed to be filenames
150
- # with extensions.
151
- # =========================================================
152
- def path( raw_path )
153
- clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
154
- File.join( *clean_crumbs )
155
- end
156
-
157
- # ====================================================================
158
- # Returns a String where all characters except:
159
- # letters numbers underscores dashes
160
- # are replaced with a dash.
161
- # It also delets any non-alphanumeric characters at the end
162
- # of the String.
163
- # ====================================================================
164
- def filename( raw_filename )
165
- plaintext( raw_filename ).
166
- downcase.
167
- gsub(REPEATING_DOTS, '.').
168
- gsub(INVALID_FILE_NAME_CHARS, '-').
169
- to_s
170
- end
171
-
172
- # ===============================================
173
- # This method is not meant to be called directly. Instead, call
174
- # <Wash.parse_tags>.
175
- # Returns: String with
176
- # * all spaces and underscores turned into dashes.
177
- # * all non-alphanumeric characters, underscores, dashes, and periods
178
- # turned into dashes.
179
- # * non-alphanumeric characters at the beginning and end stripped out.
180
- # ===============================================
181
- def tag( raw_tag )
182
- # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
183
- raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
184
- end
185
-
186
-
187
- # ===============================================
188
- # A better alternative than "Rack::Utils.escape_html". Escapes
189
- # various characters (including '&', '<', '>', and both quotation mark types)
190
- # to HTML decimal entities. Also escapes the characters from
191
- # SWISS::HTML_ESCAPE_TABLE.
192
- #
193
- # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
194
- # Therefore, all text is run through <Wash.plaintext> before encoding.
195
- # ===============================================
196
- def html( raw_text )
197
-
198
- # Turn string into UTF8. (This also takes out control characters
199
- # which is good or else they too will be escaped into HTML too.
200
- # Strip it after conversion.
201
- # return Dryopteris.sanitize(utf8_text)
202
- # Now encode it.
203
- normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
204
-
205
- sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
206
- end # === def html
207
-
208
-
209
- # ===============================================
210
- # Returns: A string that is:
211
- # * normalized to :KC
212
- # * "\r\n" changed to "\n"
213
- # * all control characters stripped except for "\n"
214
- # and end.
215
- # Options:
216
- # :tabs
217
- # :spaces
218
- #
219
- # ===============================================
220
- def plaintext( raw_str, *opts)
221
-
222
- # Check options.
223
- @plaintext_allowed_options ||= [ :spaces, :tabs ]
224
- invalid_opts = opts - @plaintext_allowed_options
225
- raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
226
-
227
- # Save tabs if requested.
228
- raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
229
-
230
- # First: Normalize characters.
231
- # Second: Strip out control characters.
232
- # Note: Must be normalized first, then strip.
233
- # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
234
- final_str = raw_str.
235
- split("\n").
236
- map { |line|
237
- # Don't use "\x20" because that is the space character.
238
- line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
239
- }.
240
- join("\n")
241
-
242
- # Save whitespace or strip.
243
- if !opts.include?(:spaces)
244
- final_str = final_str.strip
245
- end
246
-
247
- # Normalize quotations and other characters through HTML entity encoding/decoding.
248
- final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
249
-
250
- # Put back tabs by request.
251
- if opts.include?(:tabs)
252
- final_str = final_str.gsub("&#09;", "\t")
253
- end
254
-
255
- final_str
256
- end # self.plaintext
257
-
258
- # Encode a few other symbols.
259
- # This also normalizes certain quotation and apostrophe HTML entities.
260
- def normalize_encoded_string s
261
- HTML_ESCAPE_TABLE.inject(s) do |m, kv|
262
- m.gsub( kv.first, kv.last)
263
- end
264
- end
265
-
266
- end # === class self ===
267
-
268
- end # === class Escape_Escape_Escape ===
269
-
270
-
@@ -1,258 +0,0 @@
1
-
2
- var _ = require('underscore')
3
- , _s = require('underscore.string')
4
- , special = require('special-html')
5
- , HTML_E = require('entities')
6
- , URI_js = require('uri-js')
7
- ;
8
-
9
- var NL = "\n";
10
- var SPACES = /\ +/g;
11
- var VALID_HTML_ID = /^[0-9a-zA-Z_]+$/;
12
- var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
13
- var IS_ERROR = function (o) { return (_.isObject(o) && o.constructor == Error); };
14
- var funcs_scope = this;
15
- var INSPECT = function (v) { return JSON.stringify(v); };
16
-
17
- var E = exports.Sanitize = {};
18
-
19
- // ****************************************************************
20
- // ****************** Sanitize Tag Attributes and content *********
21
- // ****************************************************************
22
-
23
-
24
- E.name = function (v) { return E.id(v , "name"); };
25
- E.href = function (v) { return E.uri(v , "href"); }
26
- E.action = function (v) { return E.uri(v , 'action'); };
27
-
28
- E.string = function (raw, name) {
29
- name = (name) ? (name + ': ') : '';
30
-
31
- if (_.isString(raw))
32
- return (raw.trim());
33
-
34
- return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
35
- };
36
-
37
- E.uri = function (raw, name) {
38
- name = (name) ? name : 'uri';
39
-
40
- var val = E.string(raw, name);
41
- if (E.is_error(val))
42
- return val;
43
-
44
- var url = HTML_E.decode(val, 2);
45
- var parse = URI_js.parse(url);
46
- if (parse.errors.length)
47
- return new Error(name + ": " + parse.errors[0] + ': ' + val);
48
-
49
- return URI_js.normalize(url);
50
- };
51
-
52
- E.tag = function (raw, name) {
53
- name = (name) ? name : "tag";
54
-
55
- var val = E.string(raw, name);
56
- if (E.is_error(val))
57
- return val;
58
-
59
- if (!val.match(VALID_HTML_TAG))
60
- return new Error(name + ": invalid characters: " + JSON.stringify(val));
61
-
62
- return val;
63
- };
64
-
65
- E.id = function (raw_val, name) {
66
- name = (name) ? name : "id";
67
-
68
- var val = E.string(raw_val, name);
69
- if (val.message)
70
- return val;
71
-
72
- if (!val.match(VALID_HTML_ID))
73
- return new Error(name + ": invalid characters: " + JSON.stringify(val));
74
-
75
- return val;
76
- };
77
-
78
- E.num_of_lines = function (raw_val, name) {
79
- name = (name) ? name : 'num_of_lines';
80
-
81
- if (!_.isNumber(raw_val) || _.isNaN(raw_val))
82
- return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
83
-
84
- if (raw_val < 1 || raw_val > 250)
85
- return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
86
-
87
- return raw_val;
88
- };
89
-
90
- E.string_in_array = function (unk, name) {
91
- name = (name) ? name : 'string_in_array';
92
- if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
93
- return unk;
94
- return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
95
- };
96
-
97
- var temp = null;
98
- E.attr_funcs = [];
99
- for (temp in E) {
100
- if (_.isFunction(E[temp])) {
101
- E.attr_funcs.push(temp);
102
- }
103
- }
104
-
105
- E.opt = function (func, name) {
106
- return function (v) {
107
- if (v === undefined || v === null)
108
- return null;
109
- return func(v, name);
110
- };
111
- };
112
-
113
- E.is = function (func) {
114
- return function (v) {
115
- var result = func(v);
116
- if (result && result.message)
117
- return false;
118
- return !!result;
119
- };
120
- };
121
-
122
- _.each(E.attr_funcs, function (name, i) {
123
- E["opt_" + name] = E.opt(E[name], name);
124
- E["is_" + name] = E.is(E[name]);
125
- });
126
-
127
- // ****************************************************************
128
- // ****************** End of Sanitize Attr Checkers ***************
129
- // ****************************************************************
130
-
131
- E.is_error = function (obj) {
132
- if (!_.isObject(obj))
133
- return false;
134
- return obj.constructor === Error;
135
- };
136
-
137
- E.html = function (str) {
138
- if (_.isArray(str)) {
139
- return _.map(str, function (v, i) {
140
- return E.html(v);
141
- });
142
- }
143
-
144
- if (_.isObject(str)) {
145
- var new_o = {};
146
- _.each(str, function (v, k) {
147
- new_o[E.html(k)] = E.html(v);
148
- });
149
- return new_o;
150
- }
151
-
152
- if (!_.isString(str))
153
- return str;
154
-
155
- return special( _s.escapeHTML( E.un_escape(str) ) );
156
- };
157
-
158
- E.un_escape = function (str) {
159
- return _s.unescapeHTML( HTML_E.decode( str , 2) )
160
- };
161
-
162
- E.attr = function (k, v, tag) {
163
- if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
164
- return new Error("Invalid chars in " + tag + " " + k + ": " + v);
165
-
166
- if (!k.match(VALID_HTML_ID))
167
- return new Error("Invalid chars in " + tag + " attribute name: " + k);
168
-
169
- var safe_name = Ok.escape(k).trim();
170
-
171
- if (_.contains(['href', 'action'], k)) {
172
- var safe_val = Ok.escape_uri(v);
173
- if (!safe_val)
174
- return new Error('Invalid link address: ' + v);
175
- } else {
176
- var safe_val = Ok.escape(v);
177
- }
178
-
179
- return [safe_name, safe_val];
180
- };
181
-
182
- E.attrs = function (raw_attrs, tag) {
183
- var sanitized = {};
184
- var err = null;
185
-
186
- _.find(raw_attrs, function (v, k) {
187
-
188
- var pair = Ok.escape_attr(k, v, tag);
189
-
190
- if (IS_ERROR(pair)) {
191
- err = pair;
192
- return pair;
193
- }
194
-
195
- sanitized[pair[0]] = pair[1];
196
-
197
- });
198
-
199
- if (err)
200
- return err;
201
-
202
- return sanitized;
203
- };
204
-
205
-
206
- E.to_func_calls = function (arr) {
207
- var next = null;
208
- var final = [];
209
- var line = null;
210
-
211
- while (arr.length) {
212
- line = [arr.shift()];
213
- while(arr.length && !_.isString(arr[0])) {
214
- line.push(arr.shift());
215
- }
216
- final.push(line);
217
- }
218
-
219
- return final;
220
- };
221
-
222
- E.to_applet_func_calls = function (arr) {
223
- var next = null;
224
- var final = [];
225
-
226
- while (arr.length) {
227
- var name = arr.shift();
228
- if (!_.isString(name))
229
- return new Error("No function specfied for arg: " + JSON.stringify(name));
230
- name = name.trim();
231
- var attrs = null;
232
- var body = null;
233
-
234
- next = arr[0];
235
-
236
- if (!_.isString(next)) {
237
- if (_.isObject(next) && !_.isArray(next))
238
- attrs = arr.shift();
239
- next = arr[0];
240
- if (_.isArray(next))
241
- body = arr.shift();
242
- next = arr[0];
243
- if (next && _.isArray(next))
244
- return new Error(name + ": extra array argument: " + JSON.stringify(next));
245
- if (next && _.isObject(next))
246
- return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
247
- if (next && !_.isString(next))
248
- return new Error(name + ": invalid argument: " + JSON.stringify(next));
249
- }
250
-
251
- final.push([name, attrs, body]);
252
- }
253
-
254
- return final;
255
- };
256
-
257
-
258
-