escape_utils 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/CHANGELOG.md +5 -0
- data/README.rdoc +51 -13
- data/VERSION +1 -1
- data/benchmark/html_escape.rb +20 -4
- data/benchmark/html_unescape.rb +2 -2
- data/benchmark/javascript_escape.rb +2 -4
- data/benchmark/javascript_unescape.rb +23 -0
- data/benchmark/url_escape.rb +61 -0
- data/benchmark/url_unescape.rb +54 -0
- data/escape_utils.gemspec +21 -6
- data/ext/escape_utils.c +328 -72
- data/lib/escape_utils.rb +3 -1
- data/lib/escape_utils/html/cgi.rb +8 -2
- data/lib/escape_utils/html/erb.rb +2 -3
- data/lib/escape_utils/html/haml.rb +3 -3
- data/lib/escape_utils/html/rack.rb +4 -6
- data/lib/escape_utils/html_safety.rb +19 -0
- data/lib/escape_utils/url/cgi.rb +10 -0
- data/lib/escape_utils/url/erb.rb +12 -0
- data/lib/escape_utils/url/rack.rb +14 -0
- data/spec/html/escape_spec.rb +4 -4
- data/spec/html/unescape_spec.rb +4 -4
- data/spec/html_safety_spec.rb +49 -0
- data/spec/javascript/escape_spec.rb +1 -1
- data/spec/javascript/unescape_spec.rb +39 -0
- data/spec/url/escape_spec.rb +52 -0
- data/spec/url/unescape_spec.rb +52 -0
- metadata +24 -4
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.1.5 (July 13th, 2010)
|
4
|
+
* add URL escaping and unescaping
|
5
|
+
* major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
|
6
|
+
* HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury!
|
7
|
+
|
3
8
|
## 0.1.4 (June 9th, 2010)
|
4
9
|
* ensure strings are passed in from monkey-patches
|
5
10
|
|
data/README.rdoc
CHANGED
@@ -47,8 +47,8 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
|
|
47
47
|
|
48
48
|
== Benchmarks
|
49
49
|
|
50
|
-
In my testing, escaping html is around 10-
|
51
|
-
While unescaping html is around
|
50
|
+
In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
|
51
|
+
While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
|
52
52
|
Escaping Javascript is around 16-30x faster.
|
53
53
|
|
54
54
|
This output is from my laptop using the benchmark scripts in the benchmarks folder.
|
@@ -58,28 +58,66 @@ This output is from my laptop using the benchmark scripts in the benchmarks fold
|
|
58
58
|
==== Escaping
|
59
59
|
|
60
60
|
Rack::Utils.escape_html
|
61
|
-
|
61
|
+
9.650000 0.090000 9.740000 ( 9.750756)
|
62
|
+
Haml::Helpers.html_escape
|
63
|
+
9.310000 0.110000 9.420000 ( 9.417317)
|
62
64
|
ERB::Util.html_escape
|
63
|
-
|
65
|
+
5.330000 0.390000 5.720000 ( 5.748394)
|
64
66
|
CGI.escapeHTML
|
65
|
-
|
66
|
-
|
67
|
-
0.
|
67
|
+
5.370000 0.380000 5.750000 ( 5.791344)
|
68
|
+
FasterHTMLEscape.html_escape
|
69
|
+
0.520000 0.010000 0.530000 ( 0.539485)
|
70
|
+
fast_xs_extra#fast_xs_html
|
71
|
+
0.310000 0.030000 0.340000 ( 0.336734)
|
68
72
|
EscapeUtils.escape_html
|
69
|
-
0.
|
73
|
+
0.200000 0.050000 0.250000 ( 0.258839)
|
70
74
|
|
71
|
-
|
75
|
+
==== Unescaping
|
72
76
|
|
73
77
|
CGI.unescapeHTML
|
74
|
-
|
78
|
+
16.520000 0.080000 16.600000 ( 16.853888)
|
75
79
|
EscapeUtils.unescape_html
|
76
|
-
0.
|
80
|
+
0.120000 0.040000 0.160000 ( 0.162696)
|
77
81
|
|
78
82
|
=== Javascript
|
79
83
|
|
80
84
|
==== Escaping
|
81
85
|
|
82
86
|
ActionView::Helpers::JavaScriptHelper#escape_javascript
|
83
|
-
|
87
|
+
3.810000 0.100000 3.910000 ( 3.925557)
|
84
88
|
EscapeUtils.escape_javascript
|
85
|
-
0.
|
89
|
+
0.200000 0.040000 0.240000 ( 0.236692)
|
90
|
+
|
91
|
+
==== Unescaping
|
92
|
+
|
93
|
+
I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
|
94
|
+
|
95
|
+
=== URL
|
96
|
+
|
97
|
+
==== Escaping
|
98
|
+
|
99
|
+
ERB::Util.url_encode
|
100
|
+
0.520000 0.010000 0.530000 ( 0.529277)
|
101
|
+
Rack::Utils.escape
|
102
|
+
0.460000 0.010000 0.470000 ( 0.466962)
|
103
|
+
CGI.escape
|
104
|
+
0.440000 0.000000 0.440000 ( 0.443017)
|
105
|
+
URLEscape#escape
|
106
|
+
0.040000 0.000000 0.040000 ( 0.045661)
|
107
|
+
fast_xs_extra#fast_xs_url
|
108
|
+
0.010000 0.000000 0.010000 ( 0.015429)
|
109
|
+
EscapeUtils.escape_url
|
110
|
+
0.010000 0.000000 0.010000 ( 0.010843)
|
111
|
+
|
112
|
+
==== Unescaping
|
113
|
+
|
114
|
+
Rack::Utils.unescape
|
115
|
+
0.250000 0.010000 0.260000 ( 0.257558)
|
116
|
+
CGI.unescape
|
117
|
+
0.250000 0.000000 0.250000 ( 0.257837)
|
118
|
+
URLEscape#unescape
|
119
|
+
0.040000 0.000000 0.040000 ( 0.031548)
|
120
|
+
fast_xs_extra#fast_uxs_cgi
|
121
|
+
0.010000 0.000000 0.010000 ( 0.006062)
|
122
|
+
EscapeUtils.unescape_url
|
123
|
+
0.000000 0.000000 0.000000 ( 0.005679)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.5
|
data/benchmark/html_escape.rb
CHANGED
@@ -9,6 +9,8 @@ require 'rack'
|
|
9
9
|
require 'erb'
|
10
10
|
require 'cgi'
|
11
11
|
require 'haml'
|
12
|
+
require 'fast_xs_extra'
|
13
|
+
require 'faster_html_escape'
|
12
14
|
require 'escape_utils'
|
13
15
|
|
14
16
|
module HamlBench
|
@@ -16,9 +18,9 @@ module HamlBench
|
|
16
18
|
end
|
17
19
|
|
18
20
|
times = 100
|
19
|
-
url = "http://
|
21
|
+
url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
|
20
22
|
html = `curl -s #{url}`
|
21
|
-
puts "Escaping #{html.bytesize} bytes of html from #{url}"
|
23
|
+
puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
|
22
24
|
|
23
25
|
Benchmark.bmbm do |x|
|
24
26
|
x.report do
|
@@ -28,6 +30,13 @@ Benchmark.bmbm do |x|
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
33
|
+
x.report do
|
34
|
+
puts "Haml::Helpers.html_escape"
|
35
|
+
times.times do
|
36
|
+
HamlBench.html_escape(html)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
31
40
|
x.report do
|
32
41
|
puts "ERB::Util.html_escape"
|
33
42
|
times.times do
|
@@ -43,9 +52,16 @@ Benchmark.bmbm do |x|
|
|
43
52
|
end
|
44
53
|
|
45
54
|
x.report do
|
46
|
-
puts "
|
55
|
+
puts "FasterHTMLEscape.html_escape"
|
47
56
|
times.times do
|
48
|
-
|
57
|
+
FasterHTMLEscape.html_escape(html)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
x.report do
|
62
|
+
puts "fast_xs_extra#fast_xs_html"
|
63
|
+
times.times do
|
64
|
+
html.fast_xs_html
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
data/benchmark/html_unescape.rb
CHANGED
@@ -14,10 +14,10 @@ module HamlBench
|
|
14
14
|
end
|
15
15
|
|
16
16
|
times = 100
|
17
|
-
url = "http://
|
17
|
+
url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
|
18
18
|
html = `curl -s #{url}`
|
19
19
|
escaped_html = EscapeUtils.escape_html(html)
|
20
|
-
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html from #{url}"
|
20
|
+
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
|
21
21
|
|
22
22
|
Benchmark.bmbm do |x|
|
23
23
|
x.report do
|
@@ -13,11 +13,9 @@ class ActionPackBench
|
|
13
13
|
end
|
14
14
|
|
15
15
|
times = 100
|
16
|
-
url = "http://
|
16
|
+
url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
|
17
17
|
javascript = `curl -s #{url}`
|
18
|
-
puts "Escaping #{javascript.bytesize} bytes of javascript from #{url}"
|
19
|
-
|
20
|
-
puts ActionPackBench.escape_javascript(javascript).eql?(EscapeUtils.escape_javascript(javascript))
|
18
|
+
puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
|
21
19
|
|
22
20
|
Benchmark.bmbm do |x|
|
23
21
|
x.report do
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'escape_utils'
|
9
|
+
|
10
|
+
times = 100
|
11
|
+
url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
|
12
|
+
javascript = `curl -s #{url}`
|
13
|
+
escaped_javascript = EscapeUtils.escape_javascript(javascript)
|
14
|
+
puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
|
15
|
+
|
16
|
+
Benchmark.bmbm do |x|
|
17
|
+
x.report do
|
18
|
+
puts "EscapeUtils.escape_javascript"
|
19
|
+
times.times do
|
20
|
+
EscapeUtils.unescape_javascript(escaped_javascript)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'rack'
|
9
|
+
require 'erb'
|
10
|
+
require 'cgi'
|
11
|
+
require 'url_escape'
|
12
|
+
require 'fast_xs_extra'
|
13
|
+
require 'escape_utils'
|
14
|
+
|
15
|
+
times = 10_000
|
16
|
+
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
17
|
+
puts "Escaping a #{url.bytesize} byte URL #{times} times"
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
x.report do
|
21
|
+
puts "ERB::Util.url_encode"
|
22
|
+
times.times do
|
23
|
+
ERB::Util.url_encode(url)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report do
|
28
|
+
puts "Rack::Utils.escape"
|
29
|
+
times.times do
|
30
|
+
Rack::Utils.escape(url)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
x.report do
|
35
|
+
puts "CGI.escape"
|
36
|
+
times.times do
|
37
|
+
CGI.escape(url)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
x.report do
|
42
|
+
puts "URLEscape#escape"
|
43
|
+
times.times do
|
44
|
+
URLEscape.escape(url)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
x.report do
|
49
|
+
puts "fast_xs_extra#fast_xs_url"
|
50
|
+
times.times do
|
51
|
+
url.fast_xs_url
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
x.report do
|
56
|
+
puts "EscapeUtils.escape_url"
|
57
|
+
times.times do
|
58
|
+
EscapeUtils.escape_url(url)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'rack'
|
9
|
+
require 'cgi'
|
10
|
+
require 'url_escape'
|
11
|
+
require 'fast_xs_extra'
|
12
|
+
require 'escape_utils'
|
13
|
+
|
14
|
+
times = 10_000
|
15
|
+
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
16
|
+
escaped_url = EscapeUtils.escape_url(url)
|
17
|
+
puts "Escaping a #{url.bytesize} byte URL #{times} times"
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
x.report do
|
21
|
+
puts "Rack::Utils.unescape"
|
22
|
+
times.times do
|
23
|
+
Rack::Utils.unescape(escaped_url)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report do
|
28
|
+
puts "CGI.unescape"
|
29
|
+
times.times do
|
30
|
+
CGI.unescape(escaped_url)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
x.report do
|
35
|
+
puts "URLEscape#unescape"
|
36
|
+
times.times do
|
37
|
+
URLEscape.unescape(escaped_url)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
x.report do
|
42
|
+
puts "fast_xs_extra#fast_uxs_cgi"
|
43
|
+
times.times do
|
44
|
+
url.fast_uxs_cgi
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
x.report do
|
49
|
+
puts "EscapeUtils.unescape_url"
|
50
|
+
times.times do
|
51
|
+
EscapeUtils.unescape_url(escaped_url)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/escape_utils.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{escape_utils}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brian Lopez"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-07-13}
|
13
13
|
s.email = %q{seniorlopez@gmail.com}
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,6 +25,9 @@ Gem::Specification.new do |s|
|
|
25
25
|
"benchmark/html_escape.rb",
|
26
26
|
"benchmark/html_unescape.rb",
|
27
27
|
"benchmark/javascript_escape.rb",
|
28
|
+
"benchmark/javascript_unescape.rb",
|
29
|
+
"benchmark/url_escape.rb",
|
30
|
+
"benchmark/url_unescape.rb",
|
28
31
|
"escape_utils.gemspec",
|
29
32
|
"ext/escape_utils.c",
|
30
33
|
"ext/extconf.rb",
|
@@ -33,31 +36,43 @@ Gem::Specification.new do |s|
|
|
33
36
|
"lib/escape_utils/html/erb.rb",
|
34
37
|
"lib/escape_utils/html/haml.rb",
|
35
38
|
"lib/escape_utils/html/rack.rb",
|
39
|
+
"lib/escape_utils/html_safety.rb",
|
36
40
|
"lib/escape_utils/javascript/action_view.rb",
|
41
|
+
"lib/escape_utils/url/cgi.rb",
|
42
|
+
"lib/escape_utils/url/erb.rb",
|
43
|
+
"lib/escape_utils/url/rack.rb",
|
37
44
|
"spec/html/escape_spec.rb",
|
38
45
|
"spec/html/unescape_spec.rb",
|
46
|
+
"spec/html_safety_spec.rb",
|
39
47
|
"spec/javascript/escape_spec.rb",
|
48
|
+
"spec/javascript/unescape_spec.rb",
|
40
49
|
"spec/rcov.opts",
|
41
50
|
"spec/spec.opts",
|
42
|
-
"spec/spec_helper.rb"
|
51
|
+
"spec/spec_helper.rb",
|
52
|
+
"spec/url/escape_spec.rb",
|
53
|
+
"spec/url/unescape_spec.rb"
|
43
54
|
]
|
44
55
|
s.homepage = %q{http://github.com/brianmario/escape_utils}
|
45
56
|
s.rdoc_options = ["--charset=UTF-8"]
|
46
57
|
s.require_paths = ["lib", "ext"]
|
47
|
-
s.rubygems_version = %q{1.3.
|
58
|
+
s.rubygems_version = %q{1.3.7}
|
48
59
|
s.summary = %q{Faster string escaping routines for your web apps}
|
49
60
|
s.test_files = [
|
50
61
|
"spec/html/escape_spec.rb",
|
51
62
|
"spec/html/unescape_spec.rb",
|
63
|
+
"spec/html_safety_spec.rb",
|
52
64
|
"spec/javascript/escape_spec.rb",
|
53
|
-
"spec/
|
65
|
+
"spec/javascript/unescape_spec.rb",
|
66
|
+
"spec/spec_helper.rb",
|
67
|
+
"spec/url/escape_spec.rb",
|
68
|
+
"spec/url/unescape_spec.rb"
|
54
69
|
]
|
55
70
|
|
56
71
|
if s.respond_to? :specification_version then
|
57
72
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
58
73
|
s.specification_version = 3
|
59
74
|
|
60
|
-
if Gem::Version.new(Gem::
|
75
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
61
76
|
else
|
62
77
|
end
|
63
78
|
else
|
data/ext/escape_utils.c
CHANGED
@@ -4,93 +4,234 @@
|
|
4
4
|
static rb_encoding *utf8Encoding;
|
5
5
|
#endif
|
6
6
|
|
7
|
-
#define
|
8
|
-
|
9
|
-
|
10
|
-
offset = i+scoot_by; \
|
11
|
-
memcpy(&out[total], escape, len); \
|
12
|
-
total += len; \
|
7
|
+
#define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
|
8
|
+
#define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
|
9
|
+
#define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
|
13
10
|
|
14
11
|
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
15
|
-
size_t
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
12
|
+
size_t total = 0;
|
13
|
+
unsigned char curChar;
|
14
|
+
|
15
|
+
total = in_len;
|
16
|
+
while (in_len) {
|
17
|
+
curChar = *in++;
|
18
|
+
switch (curChar) {
|
19
|
+
case '<':
|
20
|
+
*out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
|
21
|
+
total += 3;
|
22
|
+
break;
|
23
|
+
case '>':
|
24
|
+
*out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
|
25
|
+
total += 3;
|
26
|
+
break;
|
27
|
+
case '&':
|
28
|
+
*out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
|
29
|
+
total += 4;
|
30
|
+
break;
|
31
|
+
case '\'':
|
32
|
+
*out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
|
33
|
+
total += 4;
|
34
|
+
break;
|
35
|
+
case '\"':
|
36
|
+
*out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
|
37
|
+
total += 5;
|
38
|
+
break;
|
39
|
+
case '/':
|
40
|
+
*out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
|
41
|
+
total += 4;
|
42
|
+
break;
|
43
|
+
default:
|
44
|
+
*out++ = curChar;
|
45
|
+
break;
|
24
46
|
}
|
47
|
+
in_len--;
|
25
48
|
}
|
26
49
|
|
27
|
-
|
28
|
-
memcpy(&out[total], &in[offset], i-offset);
|
29
|
-
|
30
|
-
return total + (i-offset);
|
50
|
+
return total;
|
31
51
|
}
|
32
52
|
|
33
53
|
static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
34
|
-
size_t
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
54
|
+
size_t total = 0, len = in_len;
|
55
|
+
unsigned char curChar, *start;
|
56
|
+
|
57
|
+
start = (unsigned char *)&in[0];
|
58
|
+
total = in_len;
|
59
|
+
while (len) {
|
60
|
+
curChar = *in++;
|
61
|
+
if (curChar == '&') {
|
62
|
+
if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
|
63
|
+
*out++ = '<';
|
64
|
+
total-=3;
|
65
|
+
in+=3;
|
66
|
+
len-=3;
|
67
|
+
} else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
|
68
|
+
*out++ = '>';
|
69
|
+
total-=3;
|
70
|
+
in+=3;
|
71
|
+
len-=3;
|
72
|
+
} else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
|
73
|
+
*out++ = '&';
|
74
|
+
total-=4;
|
75
|
+
in+=4;
|
76
|
+
len-=4;
|
77
|
+
} else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
|
78
|
+
*out++ = '\'';
|
79
|
+
total-=4;
|
80
|
+
in+=4;
|
81
|
+
len-=4;
|
82
|
+
} else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
|
83
|
+
*out++ = '/';
|
84
|
+
total-=4;
|
85
|
+
in+=4;
|
86
|
+
len-=4;
|
87
|
+
} else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
|
88
|
+
*out++ = '\"';
|
89
|
+
total-=5;
|
90
|
+
in+=5;
|
91
|
+
len-=5;
|
44
92
|
}
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
93
|
+
} else {
|
94
|
+
*out++ = curChar;
|
95
|
+
}
|
96
|
+
len--;
|
97
|
+
}
|
98
|
+
|
99
|
+
return total;
|
100
|
+
}
|
101
|
+
|
102
|
+
static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
103
|
+
size_t total = 0;
|
104
|
+
unsigned char curChar;
|
105
|
+
|
106
|
+
total = in_len;
|
107
|
+
while (in_len) {
|
108
|
+
curChar = *in++;
|
109
|
+
switch (curChar) {
|
110
|
+
case '\\':
|
111
|
+
*out++ = '\\'; *out++ = '\\';
|
112
|
+
total++;
|
113
|
+
break;
|
114
|
+
case '<':
|
115
|
+
if (*in == '/') {
|
116
|
+
*out++ = '<'; *out++ = '\\'; *out++ = '/';
|
117
|
+
in++; in_len--;
|
118
|
+
total++;
|
51
119
|
}
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
120
|
+
break;
|
121
|
+
case '\r':
|
122
|
+
if (*in == '\n') {
|
123
|
+
*out++ = '\\'; *out++ = 'n';
|
124
|
+
in++; in_len--;
|
125
|
+
} else {
|
126
|
+
*out++ = '\\'; *out++ = 'n';
|
127
|
+
total++;
|
56
128
|
}
|
129
|
+
break;
|
130
|
+
case '\n':
|
131
|
+
*out++ = '\\'; *out++ = 'n';
|
132
|
+
total++;
|
133
|
+
break;
|
134
|
+
case '\'':
|
135
|
+
*out++ = '\\'; *out++ = '\'';
|
136
|
+
total++;
|
137
|
+
break;
|
138
|
+
case '\"':
|
139
|
+
*out++ = '\\'; *out++ = '\"';
|
140
|
+
total++;
|
141
|
+
break;
|
142
|
+
default:
|
143
|
+
*out++ = curChar;
|
144
|
+
break;
|
57
145
|
}
|
146
|
+
in_len--;
|
58
147
|
}
|
59
148
|
|
60
|
-
|
61
|
-
|
149
|
+
return total;
|
150
|
+
}
|
62
151
|
|
63
|
-
|
152
|
+
static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
153
|
+
size_t total = 0;
|
154
|
+
unsigned char curChar;
|
155
|
+
|
156
|
+
total = in_len;
|
157
|
+
while (in_len) {
|
158
|
+
curChar = *in++;
|
159
|
+
if (curChar == '\\') {
|
160
|
+
if (*in == 'n') {
|
161
|
+
*out++ = '\n';
|
162
|
+
total--;
|
163
|
+
} else if (*in == '\\') {
|
164
|
+
*out++ = '\\';
|
165
|
+
total--;
|
166
|
+
} else if (*in == '\'') {
|
167
|
+
*out++ = '\'';
|
168
|
+
total--;
|
169
|
+
} else if (*in == '\"') {
|
170
|
+
*out++ = '\"';
|
171
|
+
total--;
|
172
|
+
} else if (*in == '/') {
|
173
|
+
*out++ = '/';
|
174
|
+
total--;
|
175
|
+
} else {
|
176
|
+
*out++ = curChar;
|
177
|
+
}
|
178
|
+
in++; in_len--;
|
179
|
+
} else {
|
180
|
+
*out++ = curChar;
|
181
|
+
}
|
182
|
+
in_len--;
|
183
|
+
}
|
184
|
+
|
185
|
+
return total;
|
64
186
|
}
|
65
187
|
|
66
|
-
static size_t
|
67
|
-
size_t
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
break;
|
84
|
-
case '\n': APPEND_BUFFER("\\n", 2, 1); break;
|
85
|
-
case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
|
86
|
-
case '\'': APPEND_BUFFER("\\'", 2, 1); break;
|
188
|
+
static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
189
|
+
size_t total = 0;
|
190
|
+
unsigned char curChar, hex[2];
|
191
|
+
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
192
|
+
|
193
|
+
total = in_len;
|
194
|
+
while (in_len) {
|
195
|
+
curChar = *in++;
|
196
|
+
if (curChar == ' ') {
|
197
|
+
*out++ = '+';
|
198
|
+
} else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
|
199
|
+
hex[1] = hexChars[curChar & 0x0f];
|
200
|
+
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
201
|
+
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
202
|
+
total += 2;
|
203
|
+
} else {
|
204
|
+
*out++ = curChar;
|
87
205
|
}
|
206
|
+
in_len--;
|
88
207
|
}
|
89
208
|
|
90
|
-
|
91
|
-
|
209
|
+
return total;
|
210
|
+
}
|
211
|
+
|
212
|
+
static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
213
|
+
size_t total = 0, len = in_len;
|
214
|
+
unsigned char curChar, *start;
|
215
|
+
|
216
|
+
start = (unsigned char *)&in[0];
|
217
|
+
total = in_len;
|
218
|
+
while (len) {
|
219
|
+
curChar = *in++;
|
220
|
+
if (curChar == '%') {
|
221
|
+
if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
|
222
|
+
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
223
|
+
in+=2;
|
224
|
+
total-=2;
|
225
|
+
}
|
226
|
+
} else if (curChar == '+') {
|
227
|
+
*out++ = ' ';
|
228
|
+
} else {
|
229
|
+
*out++ = curChar;
|
230
|
+
}
|
231
|
+
len--;
|
232
|
+
}
|
92
233
|
|
93
|
-
return total
|
234
|
+
return total;
|
94
235
|
}
|
95
236
|
|
96
237
|
static VALUE rb_escape_html(VALUE self, VALUE str) {
|
@@ -202,15 +343,130 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
202
343
|
return rb_output_buf;
|
203
344
|
}
|
204
345
|
|
346
|
+
static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
|
347
|
+
if (str == Qnil) {
|
348
|
+
return rb_str_new2("");
|
349
|
+
}
|
350
|
+
|
351
|
+
Check_Type(str, T_STRING);
|
352
|
+
|
353
|
+
VALUE rb_output_buf;
|
354
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
355
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
356
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
357
|
+
#endif
|
358
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
359
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
360
|
+
|
361
|
+
// this is the max size the string could be
|
362
|
+
// TODO: we should try to be more intelligent about this
|
363
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
|
364
|
+
|
365
|
+
// perform our escape, returning the new string's length
|
366
|
+
new_len = unescape_javascript(outBuf, inBuf, len);
|
367
|
+
|
368
|
+
// create our new ruby string
|
369
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
370
|
+
|
371
|
+
// free the temporary C string
|
372
|
+
free(outBuf);
|
373
|
+
|
374
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
375
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
376
|
+
if (default_internal_enc) {
|
377
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
378
|
+
} else {
|
379
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
380
|
+
}
|
381
|
+
#endif
|
382
|
+
return rb_output_buf;
|
383
|
+
}
|
384
|
+
|
385
|
+
static VALUE rb_escape_url(VALUE self, VALUE str) {
|
386
|
+
Check_Type(str, T_STRING);
|
387
|
+
|
388
|
+
VALUE rb_output_buf;
|
389
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
390
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
391
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
392
|
+
#endif
|
393
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
394
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
395
|
+
|
396
|
+
// this is the max size the string could be
|
397
|
+
// TODO: we should try to be more intelligent about this
|
398
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
|
399
|
+
|
400
|
+
// perform our escape, returning the new string's length
|
401
|
+
new_len = escape_url(outBuf, inBuf, len);
|
402
|
+
|
403
|
+
// create our new ruby string
|
404
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
405
|
+
|
406
|
+
// free the temporary C string
|
407
|
+
free(outBuf);
|
408
|
+
|
409
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
410
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
411
|
+
if (default_internal_enc) {
|
412
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
413
|
+
} else {
|
414
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
415
|
+
}
|
416
|
+
#endif
|
417
|
+
return rb_output_buf;
|
418
|
+
}
|
419
|
+
|
420
|
+
static VALUE rb_unescape_url(VALUE self, VALUE str) {
|
421
|
+
Check_Type(str, T_STRING);
|
422
|
+
|
423
|
+
VALUE rb_output_buf;
|
424
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
425
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
426
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
427
|
+
#endif
|
428
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
429
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
430
|
+
|
431
|
+
// this is the max size the string could be
|
432
|
+
// TODO: we should try to be more intelligent about this
|
433
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
|
434
|
+
|
435
|
+
// perform our escape, returning the new string's length
|
436
|
+
new_len = unescape_url(outBuf, inBuf, len);
|
437
|
+
|
438
|
+
// create our new ruby string
|
439
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
440
|
+
|
441
|
+
// free the temporary C string
|
442
|
+
free(outBuf);
|
443
|
+
|
444
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
445
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
446
|
+
if (default_internal_enc) {
|
447
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
448
|
+
} else {
|
449
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
450
|
+
}
|
451
|
+
#endif
|
452
|
+
return rb_output_buf;
|
453
|
+
}
|
454
|
+
|
205
455
|
/* Ruby Extension initializer */
|
206
456
|
void Init_escape_utils_ext() {
|
207
457
|
VALUE mEscape = rb_define_module("EscapeUtils");
|
208
|
-
rb_define_method(mEscape, "escape_html",
|
209
|
-
rb_define_module_function(mEscape, "escape_html",
|
210
|
-
rb_define_method(mEscape, "unescape_html",
|
211
|
-
rb_define_module_function(mEscape, "unescape_html",
|
212
|
-
rb_define_method(mEscape, "escape_javascript",
|
213
|
-
rb_define_module_function(mEscape, "escape_javascript",
|
458
|
+
rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
|
459
|
+
rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
|
460
|
+
rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
|
461
|
+
rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
|
462
|
+
rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
463
|
+
rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
464
|
+
rb_define_method(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
|
465
|
+
rb_define_module_function(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
|
466
|
+
rb_define_method(mEscape, "escape_url", rb_escape_url, 1);
|
467
|
+
rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
|
468
|
+
rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
|
469
|
+
rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
|
214
470
|
|
215
471
|
#ifdef HAVE_RUBY_ENCODING_H
|
216
472
|
utf8Encoding = rb_utf8_encoding();
|