escape_escape_escape 0.3.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -11
- data/VERSION +1 -1
- data/escape_escape_escape.gemspec +5 -2
- data/lib/escape_escape_escape.rb +219 -47
- data/specs/as_ruby/0001-html.rb +60 -0
- data/specs/as_ruby/0002-decode_html.rb +13 -0
- data/specs/as_ruby/0003-css_attr.rb +10 -0
- data/specs/as_ruby/0003-css_selector.rb +12 -0
- data/specs/as_ruby/0003-css_value.rb +53 -0
- data/specs/as_ruby/0004-==.rb +5 -0
- data/specs/as_ruby/0020-href.rb +118 -0
- data/specs/as_ruby/0030-clean_utf8.rb +34 -0
- data/specs/as_ruby/0040-escape.rb +41 -0
- data/specs/escape_escape_escape.rb +133 -21
- data/specs/lib/helpers.rb +1 -0
- metadata +61 -23
- data/LICENSE.txt +0 -23
- data/lib/beta.rb +0 -270
- data/lib/e_e_e.js +0 -258
- data/package.json +0 -31
- data/specs/as_json/0001-html.json +0 -23
- data/specs/as_json/0002-inner_html.json +0 -16
- data/specs/as_json/0010-text.json +0 -29
- data/specs/helpers.rb +0 -4
- data/test/sanitize_attrs.js +0 -132
- data/test/sanitize_html.js +0 -57
- data/test/sanitize_un_escape.js +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86100684d36a9aff31d78415463e2a3c357fe646
|
4
|
+
data.tar.gz: 77b8c43cc053204953f747630c7fdd5938034ec5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7705788caaf5f6c4996b5381c1b3d2e09d390a7ab5a795a0aa8d32ccbcdec772942caf3c433d06aa3145d36244b617f545193657ee8493582011d94b48ffeec5
|
7
|
+
data.tar.gz: 5a4785d7e96190194040cf9d9c7e766b1095ae742327d0e9b3486a1fcdd89e85c19cfb576486b3afad59e6cb7b3350e61ae7ebeb77d35a4568296a9bb0ef02e9
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
Escape_Escape_Escape
|
2
2
|
====================
|
3
3
|
|
4
4
|
My way of escaping and sanitizing HTML.
|
@@ -6,20 +6,15 @@ This is very personal to me, so you won't
|
|
6
6
|
find it useful or flexible to meet your needs.
|
7
7
|
|
8
8
|
|
9
|
-
NPM Use:
|
10
|
-
=====================
|
11
|
-
|
12
|
-
|
13
|
-
// npm install escape_escape_escape
|
14
|
-
|
15
|
-
var E = require("escape_escape_escape").Sanitize.html;
|
16
|
-
E("The <strong>brave</strong> and the <b>bold</b>.");
|
17
|
-
|
18
9
|
Rubygems Use:
|
19
10
|
=====================
|
20
11
|
|
21
|
-
|
22
12
|
# gem install escape_escape_escape
|
23
13
|
|
24
14
|
Escape_Escape_Escape.html my_html_string
|
25
15
|
Escape_Escape_Escape.text my_text_string
|
16
|
+
|
17
|
+
NOTE: Node and NPM Use:
|
18
|
+
=====================
|
19
|
+
|
20
|
+
This is no longer a npm module.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
1.1.0
|
@@ -21,8 +21,10 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
|
-
spec.
|
25
|
-
spec.
|
24
|
+
spec.add_runtime_dependency "addressable" , "> 2.3.5"
|
25
|
+
spec.add_runtime_dependency "escape_utils" , "> 1.0.0"
|
26
|
+
spec.add_runtime_dependency "unf" , "> 0.1.3"
|
27
|
+
spec.add_runtime_dependency "htmlentities" , ">= 4.3.2"
|
26
28
|
|
27
29
|
spec.add_development_dependency "pry" , ">= 0.9"
|
28
30
|
spec.add_development_dependency "rake" , ">= 10.3"
|
@@ -30,4 +32,5 @@ Gem::Specification.new do |spec|
|
|
30
32
|
spec.add_development_dependency "bacon" , ">= 1.0"
|
31
33
|
spec.add_development_dependency "Bacon_Colored" , ">= 0.1"
|
32
34
|
spec.add_development_dependency "multi_json" , ">= 1.10"
|
35
|
+
spec.add_development_dependency "sanitize" , ">= 3.0.1"
|
33
36
|
end
|
data/lib/escape_escape_escape.rb
CHANGED
@@ -1,31 +1,95 @@
|
|
1
1
|
|
2
|
+
require 'unf'
|
2
3
|
|
3
|
-
require "
|
4
|
+
require "escape_utils"
|
5
|
+
|
6
|
+
require 'escape_utils/html/rack' # to patch Rack::Utils
|
7
|
+
require 'escape_utils/html/erb' # to patch ERB::Util
|
8
|
+
require 'escape_utils/html/cgi' # to patch CGI
|
9
|
+
require 'escape_utils/html/haml' # to patch Haml::Helpers
|
10
|
+
|
11
|
+
require 'escape_utils/url/cgi' # to patch CGI
|
12
|
+
require 'escape_utils/url/erb' # to patch ERB::Util
|
13
|
+
require 'escape_utils/url/rack' # to patch Rack::Utils
|
14
|
+
require 'escape_utils/url/uri' # to patch URI
|
15
|
+
|
16
|
+
# ======================
|
4
17
|
require "htmlentities"
|
18
|
+
# ======================
|
19
|
+
#
|
20
|
+
require "uri"
|
21
|
+
require 'cgi' # Don't use URI.escape because it does not escape all invalid characters.
|
22
|
+
require "addressable/uri"
|
23
|
+
# ======================
|
24
|
+
|
25
|
+
def Escape_Escape_Escape s
|
26
|
+
Escape_Escape_Escape.escape(s)
|
27
|
+
end
|
5
28
|
|
6
29
|
class Escape_Escape_Escape
|
7
30
|
|
8
|
-
|
31
|
+
# === From sanitize gem:
|
32
|
+
# https://raw.githubusercontent.com/rgrove/sanitize/master/lib/sanitize.rb
|
33
|
+
REGEX_UNSUITABLE_CHARS = /[\u0340\u0341\u17a3\u17d3\u2028\u2029\u202a-\u202e\u206a-\u206f\ufff9-\ufffb\ufeff\ufffc\u{1d173}-\u{1d17a}\u{e0000}-\u{e007f}]/u
|
34
|
+
# ==================================================================================
|
35
|
+
|
36
|
+
CODER = HTMLEntities.new(:xhtml1)
|
37
|
+
|
38
|
+
Invalid = Class.new(RuntimeError)
|
39
|
+
Invalid_HREF = Class.new(RuntimeError)
|
40
|
+
|
41
|
+
Invalid_Type = Class.new(RuntimeError)
|
42
|
+
|
43
|
+
TAG_PATTERN = /\A[a-z]([a-z0-9\_]{0,}[a-z]{1,})?\z/i
|
44
|
+
|
45
|
+
VALID_CSS_VALUE = /\A[a-z0-9\;\-\_\#\ ]+\z/i
|
46
|
+
VALID_CSS_SELECTOR = /\A[a-z0-9\#\:\_\-\.\ ]+\z/i
|
47
|
+
VALID_CSS_ATTR = /\A[a-z0-9-]+\z/i
|
9
48
|
|
10
|
-
REPEATING_DOTS = /\.{1,}\//
|
11
49
|
INVALID_FILE_NAME_CHARS = /[^a-z0-9\_\.]{1,}/i
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
50
|
+
|
51
|
+
TABS = /\t*/
|
52
|
+
TAB = "\t"
|
53
|
+
HTML_TAB = "	"
|
54
|
+
TWO_SPACES = ' '
|
55
|
+
BLANK = ''
|
56
|
+
SPACE = ' '
|
57
|
+
|
58
|
+
NL = "\n";
|
59
|
+
SPACES = /\ +/;
|
60
|
+
|
61
|
+
VALID_HTML_ID = /\A[0-9a-z_]+\z/i;
|
62
|
+
VALID_HTML_TAG = /\A[0-9a-z_]+\z/i;
|
63
|
+
|
64
|
+
REPEATING_DOTS = /\.{1,}/
|
65
|
+
|
66
|
+
# === MULTI_CONTROL_CHARS: ==================================
|
67
|
+
#
|
68
|
+
# Unicode whitespaces, like 160 codepoint, tabs, etc.
|
69
|
+
# Excludes newline.
|
70
|
+
#
|
71
|
+
# Examples:
|
72
|
+
# \r\n \r\n -> \n \n
|
73
|
+
#
|
74
|
+
# NOTE: Don't use "\x20" because that is the space character.
|
75
|
+
#
|
76
|
+
# Whitespace regex ([:space:]) from:
|
77
|
+
# http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
|
78
|
+
#
|
79
|
+
# =====================================================
|
80
|
+
MULTI_CONTROL_AND_UNPRINTABLE = /[[:space:][:cntrl:]\x00-\x1f&&[^\n\ [:print:]]]+/i
|
81
|
+
# =====================================================
|
82
|
+
|
83
|
+
ENCODING_OPTIONS_CLEAN_UTF8 = {
|
84
|
+
:invalid => :replace, # Replace invalid byte sequences
|
85
|
+
:undef => :replace, # Replace anything not defined in ASCII
|
86
|
+
:replace => '' # Use a blank for those replacements
|
87
|
+
# :universal_newline => true # Always break lines with \n, not \r\n
|
88
|
+
# -- this is not working with :replace, so it has to be done manually
|
89
|
+
# with .gsub
|
90
|
+
}
|
91
|
+
|
92
|
+
CONFIG = {
|
29
93
|
:protocols => {
|
30
94
|
"a"=>{
|
31
95
|
"href"=>["ftp", "http", "https", "mailto", :relative]
|
@@ -36,51 +100,159 @@ class Escape_Escape_Escape
|
|
36
100
|
}
|
37
101
|
}
|
38
102
|
|
39
|
-
|
40
|
-
:invalid => :replace, # Replace invalid byte sequences
|
41
|
-
:undef => :replace, # Replace anything not defined in ASCII
|
42
|
-
:replace => '' # Use a blank for those replacements
|
43
|
-
# :newline => :universal
|
44
|
-
# :universal_newline => true # Always break lines with \n, not \r\n
|
45
|
-
}
|
46
|
-
|
103
|
+
class << self # ======================================================
|
47
104
|
|
105
|
+
def regexp str
|
106
|
+
@regexp_opts ||= Regexp::FIXEDENCODING | Regexp::IGNORECASE
|
107
|
+
Regexp.new(clean_utf8(str), @regexp_opts)
|
108
|
+
end
|
48
109
|
|
49
|
-
|
110
|
+
# ===============================================
|
111
|
+
# Raises: TZInfo::InvalidTimezoneIdentifier.
|
112
|
+
# ===============================================
|
113
|
+
def validate_timezone(timezone)
|
114
|
+
TZInfo::Timezone.get( timezone.to_s.strip ).identifier
|
115
|
+
end
|
50
116
|
|
51
|
-
#
|
117
|
+
# ==================================================================
|
118
|
+
# * normalized to :KC
|
119
|
+
# * "\r\n" changed to "\n"
|
120
|
+
# * all control characters stripped except for "\n"
|
121
|
+
# and end.
|
122
|
+
# Normalization, then strip:
|
123
|
+
# http://msdn.microsoft.com/en-us/library/dd374126(v=vs.85).aspx
|
124
|
+
# http://www.unicode.org/faq/normalization.html
|
125
|
+
#
|
126
|
+
# Getting rid of non-ascii characters in ruby:
|
52
127
|
# http://stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
|
53
128
|
#
|
54
129
|
# Test:
|
55
130
|
# [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109].
|
56
131
|
# inject('', :<<)
|
57
132
|
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
133
|
+
# Options:
|
134
|
+
#
|
135
|
+
# :tabs
|
136
|
+
# :spaces
|
137
|
+
#
|
138
|
+
def clean_utf8 raw_s, *opts
|
139
|
+
|
140
|
+
fail("Not a string: #{raw_s.inspect}") unless raw_s.is_a?(String)
|
141
|
+
|
142
|
+
# === Check options. ==================================================================
|
143
|
+
@plaintext_allowed_options ||= [ :spaces, :tabs ]
|
144
|
+
invalid_opts = opts - @plaintext_allowed_options
|
145
|
+
fail(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
|
146
|
+
# =====================================================================================
|
147
|
+
|
148
|
+
raw_s = raw_s.dup
|
149
|
+
|
150
|
+
# === Save tabs if requested.
|
151
|
+
raw_s.gsub!(TAB, HTML_TAB) if opts.include?(:tabs)
|
152
|
+
|
153
|
+
raw_s.encode!(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8)
|
154
|
+
raw_s.scrub!
|
155
|
+
raw_s.gsub!(TAB , TWO_SPACES)
|
156
|
+
raw_s.gsub!(MULTI_CONTROL_AND_UNPRINTABLE , BLANK)
|
157
|
+
raw_s.gsub!(REGEX_UNSUITABLE_CHARS , ' ')
|
158
|
+
|
159
|
+
clean = raw_s.to_nfkc
|
160
|
+
|
161
|
+
# Save whitespace or strip.
|
162
|
+
if !opts.include?(:spaces)
|
163
|
+
clean.strip!
|
164
|
+
end
|
165
|
+
|
166
|
+
# Put back tabs by request.
|
167
|
+
if opts.include?(:tabs)
|
168
|
+
clean.gsub!(HTML_TAB, TAB)
|
169
|
+
end
|
67
170
|
|
68
|
-
|
69
|
-
clean_utf8 s
|
171
|
+
clean
|
70
172
|
end
|
71
173
|
|
72
|
-
|
73
|
-
|
174
|
+
# ===============================================
|
175
|
+
#
|
176
|
+
# Handles urls and relative paths.
|
177
|
+
#
|
178
|
+
# Inspired from:
|
179
|
+
# http://stackoverflow.com/a/13041565
|
180
|
+
#
|
181
|
+
# ===============================================
|
182
|
+
alias_method :path, def href raw_str
|
183
|
+
fail("Not a string: #{raw_str.inspect}") unless raw_str.is_a?(String)
|
184
|
+
|
185
|
+
begin
|
186
|
+
uri = URI.parse(decode_html(raw_str))
|
187
|
+
if uri.scheme
|
188
|
+
uri.scheme = uri.scheme.to_s.strip.downcase
|
189
|
+
end
|
190
|
+
|
191
|
+
fail( Invalid_HREF, "javascript:// is not allowed" ) if (uri.scheme || ''.freeze)['javascript'.freeze]
|
192
|
+
fail( Invalid_HREF, "address is invalid") if !uri.host && !uri.relative?
|
193
|
+
|
194
|
+
html(EscapeUtils.escape_uri uri.to_s)
|
195
|
+
rescue URI::InvalidURIError => e
|
196
|
+
raise Invalid_HREF, e.message
|
197
|
+
end
|
74
198
|
end
|
75
199
|
|
76
|
-
|
77
|
-
|
200
|
+
# ===============================================
|
201
|
+
# HTML
|
202
|
+
# ===============================================
|
203
|
+
|
204
|
+
def tag( raw_tag )
|
205
|
+
return nil unless raw_tag[TAG_PATTERN]
|
206
|
+
raw_tag
|
78
207
|
end
|
79
208
|
|
80
|
-
def
|
81
|
-
|
209
|
+
def decode_html raw
|
210
|
+
fail("Not a string: #{raw.inspect}") unless raw.is_a?(String)
|
211
|
+
CODER.decode clean_utf8(raw)
|
82
212
|
end
|
83
213
|
|
214
|
+
%w{attr selector value}.each { |name|
|
215
|
+
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
216
|
+
def css_#{name} raw
|
217
|
+
fail(Invalid_Type, "Not a string: \#{raw.inspect}") unless raw.is_a?(String)
|
218
|
+
clean = html(raw)
|
219
|
+
return clean if clean[VALID_CSS_#{name.upcase}]
|
220
|
+
fail Invalid, "contains invalid chars: \#{raw.inspect}"
|
221
|
+
end
|
222
|
+
EOF
|
223
|
+
}
|
224
|
+
|
225
|
+
# ===============================================
|
226
|
+
# A better alternative than "Rack::Utils.escape_html". Escapes
|
227
|
+
# various characters (including '&', '<', '>', and both quotation mark types)
|
228
|
+
# to HTML decimal entities. Also escapes the characters from
|
229
|
+
# <HTML_ESCAPE_TABLE>.
|
230
|
+
#
|
231
|
+
# Text has to be UTF-8 before encoding, according to HTMLEntities gem.
|
232
|
+
# Therefore, all text is run through <plaintext> before encoding.
|
233
|
+
# ===============================================
|
234
|
+
def html( raw_text )
|
235
|
+
EscapeUtils.escape_html(decode_html(raw_text))
|
236
|
+
end # === def html
|
237
|
+
|
238
|
+
def escape o, method_name = :html
|
239
|
+
if o.kind_of? Hash
|
240
|
+
return(
|
241
|
+
o.inject({}) { |memo, (k, v)|
|
242
|
+
memo[escape(k,method_name)] = escape(v, method_name)
|
243
|
+
memo
|
244
|
+
}
|
245
|
+
)
|
246
|
+
end
|
247
|
+
|
248
|
+
return(send(method_name, o.to_s).to_sym) if o.is_a?(Symbol)
|
249
|
+
return(o.map { |v| escape(v, method_name) }) if o.kind_of? Array
|
250
|
+
return send(method_name, o) if o.is_a?(String)
|
251
|
+
return send(method_name, o.to_s) if o == true || o == false || o.kind_of?(Numeric)
|
252
|
+
|
253
|
+
fail Invalid, "Not a String, Number, Array, or Hash"
|
254
|
+
end # === def
|
255
|
+
|
84
256
|
end # === class self ===
|
85
257
|
|
86
258
|
end # === class Escape_Escape_Escape ===
|
@@ -0,0 +1,60 @@
|
|
1
|
+
it "does not re-escape already escaped html"
|
2
|
+
input "<p>Hello & GoodBye</p>"
|
3
|
+
output "<p>Hello & GoodBye</p>"
|
4
|
+
|
5
|
+
it "normalizes UNICODE: Ⅷ => VIII"
|
6
|
+
input "<p> Ⅷ </p>"
|
7
|
+
output "<p> VIII </p>"
|
8
|
+
|
9
|
+
it "normalizes UNICODE: \u2167 => VIII"
|
10
|
+
input "<p> \u2167 </p>"
|
11
|
+
output "<p> VIII </p>"
|
12
|
+
|
13
|
+
it "encodes apostrophe: ' -> '"
|
14
|
+
input "Chars: ' '"
|
15
|
+
output "Chars: ' '"
|
16
|
+
|
17
|
+
it 'does not re-escape already escaped text mixed with HTML'
|
18
|
+
input "<p>Hi</p><p>Hi</p>"
|
19
|
+
output "<p>Hi</p><p>Hi</p>"
|
20
|
+
|
21
|
+
it 'does not escape special chars: "Hello ©®∆"'
|
22
|
+
input "Hello & World ©®∆"
|
23
|
+
output "Hello & World ©®∆"
|
24
|
+
|
25
|
+
it 'escapes all 70 different combos of "<"'
|
26
|
+
input BRACKETS
|
27
|
+
stack [:split, :uniq, :join, [' '], "< %3C &lt &LT &LT; &#60 &#060 &#0060 &#00060 &#000060 &#0000060 &#x3c &#x03c &#x003c &#x0003c &#x00003c &#x000003c &#x000003c; &#X3c &#X03c &#X003c &#X0003c &#X00003c &#X000003c &#X000003c; &#x3C &#x03C &#x003C &#x0003C &#x00003C &#x000003C &#x000003C; &#X3C &#X03C &#X003C &#X0003C &#X00003C &#X000003C &#X000003C;"]
|
28
|
+
|
29
|
+
|
30
|
+
it "fails with RuntimeError if: true"
|
31
|
+
input true
|
32
|
+
raises RuntimeError, /Not a string: true/
|
33
|
+
|
34
|
+
|
35
|
+
it "fails with RuntimeError if: false"
|
36
|
+
input false
|
37
|
+
raises RuntimeError, /Not a string: false/
|
38
|
+
|
39
|
+
|
40
|
+
it "fails with RuntimeError if numeric"
|
41
|
+
input 1
|
42
|
+
raises RuntimeError, /Not a string: 1/
|
43
|
+
|
44
|
+
it 'removes Unicode characters that do not belong in html'
|
45
|
+
input "b \u0340 \u0341 \u17a3 \u17d3 \u2028 \u2029 \u202a"
|
46
|
+
output "b"
|
47
|
+
|
48
|
+
it "removes unprintable characters"
|
49
|
+
input "end-\u2028-\u2029-"
|
50
|
+
output "end---"
|
51
|
+
|
52
|
+
it "escapes /:"
|
53
|
+
input "/"
|
54
|
+
output "&sol;"
|
55
|
+
|
56
|
+
it "escapes / regardless of case:"
|
57
|
+
input "&soL; &SoL; &SOL;"
|
58
|
+
output "&soL; &SoL; &SOL;"
|
59
|
+
|
60
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
it "un-escapes special chars: \"Hello ©®∆\""
|
2
|
+
input "Hello & World ©®∆"
|
3
|
+
output "Hello & World ©®∆"
|
4
|
+
|
5
|
+
it 'un-escapes escaped text mixed with HTML'
|
6
|
+
input "<p>Hi&</p>"
|
7
|
+
output "<p>Hi&</p>"
|
8
|
+
|
9
|
+
|
10
|
+
it 'un-escapes all 70 different combos of "<"'
|
11
|
+
input BRACKETS
|
12
|
+
stack [:split, :uniq, :join, [' '], '< %3C < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < <']
|
13
|
+
|