escape_escape_escape 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +0,0 @@
1
-
2
- Copyright (c) 2014 da99
3
-
4
- MIT License
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining
7
- a copy of this software and associated documentation files (the
8
- "Software"), to deal in the Software without restriction, including
9
- without limitation the rights to use, copy, modify, merge, publish,
10
- distribute, sublicense, and/or sell copies of the Software, and to
11
- permit persons to whom the Software is furnished to do so, subject to
12
- the following conditions:
13
-
14
- The above copyright notice and this permission notice shall be
15
- included in all copies or substantial portions of the Software.
16
-
17
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,270 +0,0 @@
1
-
2
-
3
- # === Important Gems ===
4
- require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
5
- require 'htmlentities'
6
- require 'loofah'
7
- require "addressable/uri"
8
- require "escape_utils"
9
- require "htmlentities"
10
- require "uri"
11
-
12
- def Escape_Escape_Escape s
13
- Escape_Escape_Escape.escape(s)
14
- end
15
-
16
- class Escape_Escape_Escape
17
-
18
- Coder = HTMLEntities.new(:xhtml1)
19
-
20
- ENCODING_OPTIONS_CLEAN_UTF8 = {
21
- :invalid => :replace, # Replace invalid byte sequences
22
- :undef => :replace, # Replace anything not defined in ASCII
23
- :replace => '' # Use a blank for those replacements
24
- # :newline => :universal
25
- # :universal_newline => true # Always break lines with \n, not \r\n
26
- }
27
-
28
- opts = Regexp::FIXEDENCODING | Regexp::IGNORECASE
29
-
30
- # tabs, etc.
31
- Control = Regexp.new("[[:cntrl:]]".force_encoding('utf-8'), opts) # unicode whitespaces, like 160 codepoint
32
- # From:
33
- # http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
34
- White_Space = Regexp.new("[[:space:]]".force_encoding('utf-8'), opts)
35
-
36
- REPEATING_DOTS = /\.{1,}/
37
- INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
38
-
39
-
40
- # HTML_ESCAPE_TABLE is used after text is escaped to
41
- # further escape text more. This is why th semi-colon (&#59;) was left out
42
- # from HTML_ESCAPE_TABLE. It would conflict with already escaped text.
43
- # For more entities: http://www.w3.org/MarkUp/html3/latin1.html
44
- # or go to: http://www.mountaindragon.com/html/iso.htm
45
- HTML_ESCAPE_TABLE = {
46
-
47
- '«' => "<",
48
- '»' => ">",
49
-
50
- "‘" => "'",
51
- "’" => "'",
52
- "‚" => "'",
53
-
54
- "‘" => "'",
55
- "’" => "'",
56
-
57
- "“" => """,
58
- "”" => """,
59
- "„" => """,
60
-
61
- "‹" => "<",
62
- "›" => ">",
63
-
64
- "´" => "'",
65
- "¨" => """,
66
-
67
- '\\' => "\",
68
- # '/' => "/",
69
- # '%' => "%",
70
- # ':' => ':',
71
- # '=' => '=',
72
- # '?' => '?',
73
- # '@' => '@',
74
- "\`" => ''',
75
- '‘' => "'",
76
- '’' => "'",
77
- '“' => '"',
78
- '”' => '"',
79
- # "$" => "$",
80
- # '#' => '#', # Don't use this or else it will ruin all other entities.
81
- # '&' => # Don't use this " " " " " "
82
- # ';' => # Don't use this " " " " " "
83
- '|' => '¦',
84
- '~' => '∼'
85
- # '!' => '!',
86
- # '*' => '∗', # Don't use this. '*' is used by text formating, ie RedCloth, etc.
87
- # '{' => '{',
88
- # '}' => '}',
89
- # '(' => '(',
90
- # ')' => ')',
91
- # "\n" => '<br />'
92
- }
93
-
94
- def new_regexp str
95
- Regexp.new(clean_utf8(str), Regexp::FIXEDENCODING | Regexp::IGNORECASE)
96
- end
97
-
98
- class << self # ======================================================
99
-
100
- # From:
101
- # http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
102
- #
103
- # Test:
104
- # [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
105
- # inject('', :<<)
106
- #
107
- def clean_utf8 s
108
- s.
109
- encode(Encoding.find('utf-8'), ENCODING_OPTIONS_CLEAN_UTF8).
110
- gsub(self::Control, "\n").
111
- gsub(self::White_Space, " ")
112
- end
113
-
114
- def un_escape raw
115
- EscapeUtils.unescape_html clean_utf8(raw)
116
- end
117
-
118
- def uri str
119
- uri = Addressable::URI.parse(str)
120
- if ["http","https","ftp"].include?(uri.scheme) || uri.path.index('/') == 0
121
- str
122
- else
123
- nil
124
- end
125
- rescue Addressable::URI::InvalidURIError
126
- fail "Invalid: address: #{str.inspect}"
127
- end
128
-
129
- def escape o
130
- case o
131
- when String
132
- Coder.encode(un_escape(o), :named, :hexadecimal)
133
- else
134
- fail "Unknown type: #{o.inspect}"
135
- end
136
- end # === def
137
-
138
-
139
- # ===============================================
140
- # Raises: TZInfo::InvalidTimezoneIdentifier.
141
- # ===============================================
142
- def validate_timezone(timezone)
143
- TZInfo::Timezone.get( timezone.to_s.strip ).identifier
144
- end
145
-
146
- # =========================================================
147
- # Takes out any periods and back slashes in a String.
148
- # Single periods surround text are allowed on the last substring
149
- # past the last slash because they are assumed to be filenames
150
- # with extensions.
151
- # =========================================================
152
- def path( raw_path )
153
- clean_crumbs = raw_path.split('/').map { |crumb| filename(crumb) }
154
- File.join( *clean_crumbs )
155
- end
156
-
157
- # ====================================================================
158
- # Returns a String where all characters except:
159
- # letters numbers underscores dashes
160
- # are replaced with a dash.
161
- # It also delets any non-alphanumeric characters at the end
162
- # of the String.
163
- # ====================================================================
164
- def filename( raw_filename )
165
- plaintext( raw_filename ).
166
- downcase.
167
- gsub(REPEATING_DOTS, '.').
168
- gsub(INVALID_FILE_NAME_CHARS, '-').
169
- to_s
170
- end
171
-
172
- # ===============================================
173
- # This method is not meant to be called directly. Instead, call
174
- # <Wash.parse_tags>.
175
- # Returns: String with
176
- # * all spaces and underscores turned into dashes.
177
- # * all non-alphanumeric characters, underscores, dashes, and periods
178
- # turned into dashes.
179
- # * non-alphanumeric characters at the beginning and end stripped out.
180
- # ===============================================
181
- def tag( raw_tag )
182
- # raw_tag.strip.downcase.gsub( /[^a-z0-9\.]{1,}/,'-').gsub(/^[^a-z0-9]{1,}|[^a-z0-9]{1,}$/i, '').gsub(/\.{1,}/, '.')
183
- raw_tag.strip.downcase.gsub(/^[\,\.]{1,}|[\"]{1,}|[\,\.]{1,}$/, '').gsub(/\ /, '-')
184
- end
185
-
186
-
187
- # ===============================================
188
- # A better alternative than "Rack::Utils.escape_html". Escapes
189
- # various characters (including '&', '<', '>', and both quotation mark types)
190
- # to HTML decimal entities. Also escapes the characters from
191
- # SWISS::HTML_ESCAPE_TABLE.
192
- #
193
- # Text has to be UTF-8 before encoding, according to HTMLEntities gem.
194
- # Therefore, all text is run through <Wash.plaintext> before encoding.
195
- # ===============================================
196
- def html( raw_text )
197
-
198
- # Turn string into UTF8. (This also takes out control characters
199
- # which is good or else they too will be escaped into HTML too.
200
- # Strip it after conversion.
201
- # return Dryopteris.sanitize(utf8_text)
202
- # Now encode it.
203
- normalized_encoded_text = escape( plaintext(raw_text).strip, :named )
204
-
205
- sanitized_text = Loofah.scrub_fragment( normalized_encoded_text, :prune ).to_s
206
- end # === def html
207
-
208
-
209
- # ===============================================
210
- # Returns: A string that is:
211
- # * normalized to :KC
212
- # * "\r\n" changed to "\n"
213
- # * all control characters stripped except for "\n"
214
- # and end.
215
- # Options:
216
- # :tabs
217
- # :spaces
218
- #
219
- # ===============================================
220
- def plaintext( raw_str, *opts)
221
-
222
- # Check options.
223
- @plaintext_allowed_options ||= [ :spaces, :tabs ]
224
- invalid_opts = opts - @plaintext_allowed_options
225
- raise(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
226
-
227
- # Save tabs if requested.
228
- raw_str = raw_str.gsub("\t", "&#09;") if opts.include?(:tabs)
229
-
230
- # First: Normalize characters.
231
- # Second: Strip out control characters.
232
- # Note: Must be normalized first, then strip.
233
- # See: http://msdn.microsoft.com/en-us/library/ms776393(VS.85).aspx
234
- final_str = raw_str.
235
- split("\n").
236
- map { |line|
237
- # Don't use "\x20" because that is the space character.
238
- line.chars.normalize.gsub( /[[:cntrl:]\x00-\x1f]*/, '' )
239
- }.
240
- join("\n")
241
-
242
- # Save whitespace or strip.
243
- if !opts.include?(:spaces)
244
- final_str = final_str.strip
245
- end
246
-
247
- # Normalize quotations and other characters through HTML entity encoding/decoding.
248
- final_str = coder.decode( normalised_str Coder.encode(final_str, :named) )
249
-
250
- # Put back tabs by request.
251
- if opts.include?(:tabs)
252
- final_str = final_str.gsub("&#09;", "\t")
253
- end
254
-
255
- final_str
256
- end # self.plaintext
257
-
258
- # Encode a few other symbols.
259
- # This also normalizes certain quotation and apostrophe HTML entities.
260
- def normalize_encoded_string s
261
- HTML_ESCAPE_TABLE.inject(s) do |m, kv|
262
- m.gsub( kv.first, kv.last)
263
- end
264
- end
265
-
266
- end # === class self ===
267
-
268
- end # === class Escape_Escape_Escape ===
269
-
270
-
@@ -1,258 +0,0 @@
1
-
2
- var _ = require('underscore')
3
- , _s = require('underscore.string')
4
- , special = require('special-html')
5
- , HTML_E = require('entities')
6
- , URI_js = require('uri-js')
7
- ;
8
-
9
- var NL = "\n";
10
- var SPACES = /\ +/g;
11
- var VALID_HTML_ID = /^[0-9a-zA-Z_]+$/;
12
- var VALID_HTML_TAG = /^[0-9a-zA-Z_]+$/;
13
- var IS_ERROR = function (o) { return (_.isObject(o) && o.constructor == Error); };
14
- var funcs_scope = this;
15
- var INSPECT = function (v) { return JSON.stringify(v); };
16
-
17
- var E = exports.Sanitize = {};
18
-
19
- // ****************************************************************
20
- // ****************** Sanitize Tag Attributes and content *********
21
- // ****************************************************************
22
-
23
-
24
- E.name = function (v) { return E.id(v , "name"); };
25
- E.href = function (v) { return E.uri(v , "href"); }
26
- E.action = function (v) { return E.uri(v , 'action'); };
27
-
28
- E.string = function (raw, name) {
29
- name = (name) ? (name + ': ') : '';
30
-
31
- if (_.isString(raw))
32
- return (raw.trim());
33
-
34
- return new Error(name + "String expected, instead got: " + JSON.stringify(raw));
35
- };
36
-
37
- E.uri = function (raw, name) {
38
- name = (name) ? name : 'uri';
39
-
40
- var val = E.string(raw, name);
41
- if (E.is_error(val))
42
- return val;
43
-
44
- var url = HTML_E.decode(val, 2);
45
- var parse = URI_js.parse(url);
46
- if (parse.errors.length)
47
- return new Error(name + ": " + parse.errors[0] + ': ' + val);
48
-
49
- return URI_js.normalize(url);
50
- };
51
-
52
- E.tag = function (raw, name) {
53
- name = (name) ? name : "tag";
54
-
55
- var val = E.string(raw, name);
56
- if (E.is_error(val))
57
- return val;
58
-
59
- if (!val.match(VALID_HTML_TAG))
60
- return new Error(name + ": invalid characters: " + JSON.stringify(val));
61
-
62
- return val;
63
- };
64
-
65
- E.id = function (raw_val, name) {
66
- name = (name) ? name : "id";
67
-
68
- var val = E.string(raw_val, name);
69
- if (val.message)
70
- return val;
71
-
72
- if (!val.match(VALID_HTML_ID))
73
- return new Error(name + ": invalid characters: " + JSON.stringify(val));
74
-
75
- return val;
76
- };
77
-
78
- E.num_of_lines = function (raw_val, name) {
79
- name = (name) ? name : 'num_of_lines';
80
-
81
- if (!_.isNumber(raw_val) || _.isNaN(raw_val))
82
- return new Error(name + ": Must be a number: " + JSON.stringify(raw_val));
83
-
84
- if (raw_val < 1 || raw_val > 250)
85
- return new Error(name + ": Number out of range: " + JSON.stringify(raw_val));
86
-
87
- return raw_val;
88
- };
89
-
90
- E.string_in_array = function (unk, name) {
91
- name = (name) ? name : 'string_in_array';
92
- if (_.isArray(unk) && unk.length === 1 && _.isString(unk[0]))
93
- return unk;
94
- return new Error(name + ": Must be a string within an array: " + JSON.stringify(unk));
95
- };
96
-
97
- var temp = null;
98
- E.attr_funcs = [];
99
- for (temp in E) {
100
- if (_.isFunction(E[temp])) {
101
- E.attr_funcs.push(temp);
102
- }
103
- }
104
-
105
- E.opt = function (func, name) {
106
- return function (v) {
107
- if (v === undefined || v === null)
108
- return null;
109
- return func(v, name);
110
- };
111
- };
112
-
113
- E.is = function (func) {
114
- return function (v) {
115
- var result = func(v);
116
- if (result && result.message)
117
- return false;
118
- return !!result;
119
- };
120
- };
121
-
122
- _.each(E.attr_funcs, function (name, i) {
123
- E["opt_" + name] = E.opt(E[name], name);
124
- E["is_" + name] = E.is(E[name]);
125
- });
126
-
127
- // ****************************************************************
128
- // ****************** End of Sanitize Attr Checkers ***************
129
- // ****************************************************************
130
-
131
- E.is_error = function (obj) {
132
- if (!_.isObject(obj))
133
- return false;
134
- return obj.constructor === Error;
135
- };
136
-
137
- E.html = function (str) {
138
- if (_.isArray(str)) {
139
- return _.map(str, function (v, i) {
140
- return E.html(v);
141
- });
142
- }
143
-
144
- if (_.isObject(str)) {
145
- var new_o = {};
146
- _.each(str, function (v, k) {
147
- new_o[E.html(k)] = E.html(v);
148
- });
149
- return new_o;
150
- }
151
-
152
- if (!_.isString(str))
153
- return str;
154
-
155
- return special( _s.escapeHTML( E.un_escape(str) ) );
156
- };
157
-
158
- E.un_escape = function (str) {
159
- return _s.unescapeHTML( HTML_E.decode( str , 2) )
160
- };
161
-
162
- E.attr = function (k, v, tag) {
163
- if (_.contains(['id', 'name', 'type'], k) && !v.match(VALID_HTML_ID))
164
- return new Error("Invalid chars in " + tag + " " + k + ": " + v);
165
-
166
- if (!k.match(VALID_HTML_ID))
167
- return new Error("Invalid chars in " + tag + " attribute name: " + k);
168
-
169
- var safe_name = Ok.escape(k).trim();
170
-
171
- if (_.contains(['href', 'action'], k)) {
172
- var safe_val = Ok.escape_uri(v);
173
- if (!safe_val)
174
- return new Error('Invalid link address: ' + v);
175
- } else {
176
- var safe_val = Ok.escape(v);
177
- }
178
-
179
- return [safe_name, safe_val];
180
- };
181
-
182
- E.attrs = function (raw_attrs, tag) {
183
- var sanitized = {};
184
- var err = null;
185
-
186
- _.find(raw_attrs, function (v, k) {
187
-
188
- var pair = Ok.escape_attr(k, v, tag);
189
-
190
- if (IS_ERROR(pair)) {
191
- err = pair;
192
- return pair;
193
- }
194
-
195
- sanitized[pair[0]] = pair[1];
196
-
197
- });
198
-
199
- if (err)
200
- return err;
201
-
202
- return sanitized;
203
- };
204
-
205
-
206
- E.to_func_calls = function (arr) {
207
- var next = null;
208
- var final = [];
209
- var line = null;
210
-
211
- while (arr.length) {
212
- line = [arr.shift()];
213
- while(arr.length && !_.isString(arr[0])) {
214
- line.push(arr.shift());
215
- }
216
- final.push(line);
217
- }
218
-
219
- return final;
220
- };
221
-
222
- E.to_applet_func_calls = function (arr) {
223
- var next = null;
224
- var final = [];
225
-
226
- while (arr.length) {
227
- var name = arr.shift();
228
- if (!_.isString(name))
229
- return new Error("No function specfied for arg: " + JSON.stringify(name));
230
- name = name.trim();
231
- var attrs = null;
232
- var body = null;
233
-
234
- next = arr[0];
235
-
236
- if (!_.isString(next)) {
237
- if (_.isObject(next) && !_.isArray(next))
238
- attrs = arr.shift();
239
- next = arr[0];
240
- if (_.isArray(next))
241
- body = arr.shift();
242
- next = arr[0];
243
- if (next && _.isArray(next))
244
- return new Error(name + ": extra array argument: " + JSON.stringify(next));
245
- if (next && _.isObject(next))
246
- return new Error(name + ": extra attr object argument: " + JSON.stringify(next));
247
- if (next && !_.isString(next))
248
- return new Error(name + ": invalid argument: " + JSON.stringify(next));
249
- }
250
-
251
- final.push([name, attrs, body]);
252
- }
253
-
254
- return final;
255
- };
256
-
257
-
258
-