escape_utils 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/CHANGELOG.md +5 -0
- data/README.rdoc +51 -13
- data/VERSION +1 -1
- data/benchmark/html_escape.rb +20 -4
- data/benchmark/html_unescape.rb +2 -2
- data/benchmark/javascript_escape.rb +2 -4
- data/benchmark/javascript_unescape.rb +23 -0
- data/benchmark/url_escape.rb +61 -0
- data/benchmark/url_unescape.rb +54 -0
- data/escape_utils.gemspec +21 -6
- data/ext/escape_utils.c +328 -72
- data/lib/escape_utils.rb +3 -1
- data/lib/escape_utils/html/cgi.rb +8 -2
- data/lib/escape_utils/html/erb.rb +2 -3
- data/lib/escape_utils/html/haml.rb +3 -3
- data/lib/escape_utils/html/rack.rb +4 -6
- data/lib/escape_utils/html_safety.rb +19 -0
- data/lib/escape_utils/url/cgi.rb +10 -0
- data/lib/escape_utils/url/erb.rb +12 -0
- data/lib/escape_utils/url/rack.rb +14 -0
- data/spec/html/escape_spec.rb +4 -4
- data/spec/html/unescape_spec.rb +4 -4
- data/spec/html_safety_spec.rb +49 -0
- data/spec/javascript/escape_spec.rb +1 -1
- data/spec/javascript/unescape_spec.rb +39 -0
- data/spec/url/escape_spec.rb +52 -0
- data/spec/url/unescape_spec.rb +52 -0
- metadata +24 -4
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.1.5 (July 13th, 2010)
|
4
|
+
* add URL escaping and unescaping
|
5
|
+
* major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
|
6
|
+
* HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury!
|
7
|
+
|
3
8
|
## 0.1.4 (June 9th, 2010)
|
4
9
|
* ensure strings are passed in from monkey-patches
|
5
10
|
|
data/README.rdoc
CHANGED
@@ -47,8 +47,8 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
|
|
47
47
|
|
48
48
|
== Benchmarks
|
49
49
|
|
50
|
-
In my testing, escaping html is around 10-
|
51
|
-
While unescaping html is around
|
50
|
+
In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
|
51
|
+
While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
|
52
52
|
Escaping Javascript is around 16-30x faster.
|
53
53
|
|
54
54
|
This output is from my laptop using the benchmark scripts in the benchmarks folder.
|
@@ -58,28 +58,66 @@ This output is from my laptop using the benchmark scripts in the benchmarks fold
|
|
58
58
|
==== Escaping
|
59
59
|
|
60
60
|
Rack::Utils.escape_html
|
61
|
-
|
61
|
+
9.650000 0.090000 9.740000 ( 9.750756)
|
62
|
+
Haml::Helpers.html_escape
|
63
|
+
9.310000 0.110000 9.420000 ( 9.417317)
|
62
64
|
ERB::Util.html_escape
|
63
|
-
|
65
|
+
5.330000 0.390000 5.720000 ( 5.748394)
|
64
66
|
CGI.escapeHTML
|
65
|
-
|
66
|
-
|
67
|
-
0.
|
67
|
+
5.370000 0.380000 5.750000 ( 5.791344)
|
68
|
+
FasterHTMLEscape.html_escape
|
69
|
+
0.520000 0.010000 0.530000 ( 0.539485)
|
70
|
+
fast_xs_extra#fast_xs_html
|
71
|
+
0.310000 0.030000 0.340000 ( 0.336734)
|
68
72
|
EscapeUtils.escape_html
|
69
|
-
0.
|
73
|
+
0.200000 0.050000 0.250000 ( 0.258839)
|
70
74
|
|
71
|
-
|
75
|
+
==== Unescaping
|
72
76
|
|
73
77
|
CGI.unescapeHTML
|
74
|
-
|
78
|
+
16.520000 0.080000 16.600000 ( 16.853888)
|
75
79
|
EscapeUtils.unescape_html
|
76
|
-
0.
|
80
|
+
0.120000 0.040000 0.160000 ( 0.162696)
|
77
81
|
|
78
82
|
=== Javascript
|
79
83
|
|
80
84
|
==== Escaping
|
81
85
|
|
82
86
|
ActionView::Helpers::JavaScriptHelper#escape_javascript
|
83
|
-
|
87
|
+
3.810000 0.100000 3.910000 ( 3.925557)
|
84
88
|
EscapeUtils.escape_javascript
|
85
|
-
0.
|
89
|
+
0.200000 0.040000 0.240000 ( 0.236692)
|
90
|
+
|
91
|
+
==== Unescaping
|
92
|
+
|
93
|
+
I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
|
94
|
+
|
95
|
+
=== URL
|
96
|
+
|
97
|
+
==== Escaping
|
98
|
+
|
99
|
+
ERB::Util.url_encode
|
100
|
+
0.520000 0.010000 0.530000 ( 0.529277)
|
101
|
+
Rack::Utils.escape
|
102
|
+
0.460000 0.010000 0.470000 ( 0.466962)
|
103
|
+
CGI.escape
|
104
|
+
0.440000 0.000000 0.440000 ( 0.443017)
|
105
|
+
URLEscape#escape
|
106
|
+
0.040000 0.000000 0.040000 ( 0.045661)
|
107
|
+
fast_xs_extra#fast_xs_url
|
108
|
+
0.010000 0.000000 0.010000 ( 0.015429)
|
109
|
+
EscapeUtils.escape_url
|
110
|
+
0.010000 0.000000 0.010000 ( 0.010843)
|
111
|
+
|
112
|
+
==== Unescaping
|
113
|
+
|
114
|
+
Rack::Utils.unescape
|
115
|
+
0.250000 0.010000 0.260000 ( 0.257558)
|
116
|
+
CGI.unescape
|
117
|
+
0.250000 0.000000 0.250000 ( 0.257837)
|
118
|
+
URLEscape#unescape
|
119
|
+
0.040000 0.000000 0.040000 ( 0.031548)
|
120
|
+
fast_xs_extra#fast_uxs_cgi
|
121
|
+
0.010000 0.000000 0.010000 ( 0.006062)
|
122
|
+
EscapeUtils.unescape_url
|
123
|
+
0.000000 0.000000 0.000000 ( 0.005679)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.5
|
data/benchmark/html_escape.rb
CHANGED
@@ -9,6 +9,8 @@ require 'rack'
|
|
9
9
|
require 'erb'
|
10
10
|
require 'cgi'
|
11
11
|
require 'haml'
|
12
|
+
require 'fast_xs_extra'
|
13
|
+
require 'faster_html_escape'
|
12
14
|
require 'escape_utils'
|
13
15
|
|
14
16
|
module HamlBench
|
@@ -16,9 +18,9 @@ module HamlBench
|
|
16
18
|
end
|
17
19
|
|
18
20
|
times = 100
|
19
|
-
url = "http://
|
21
|
+
url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
|
20
22
|
html = `curl -s #{url}`
|
21
|
-
puts "Escaping #{html.bytesize} bytes of html from #{url}"
|
23
|
+
puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
|
22
24
|
|
23
25
|
Benchmark.bmbm do |x|
|
24
26
|
x.report do
|
@@ -28,6 +30,13 @@ Benchmark.bmbm do |x|
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
33
|
+
x.report do
|
34
|
+
puts "Haml::Helpers.html_escape"
|
35
|
+
times.times do
|
36
|
+
HamlBench.html_escape(html)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
31
40
|
x.report do
|
32
41
|
puts "ERB::Util.html_escape"
|
33
42
|
times.times do
|
@@ -43,9 +52,16 @@ Benchmark.bmbm do |x|
|
|
43
52
|
end
|
44
53
|
|
45
54
|
x.report do
|
46
|
-
puts "
|
55
|
+
puts "FasterHTMLEscape.html_escape"
|
47
56
|
times.times do
|
48
|
-
|
57
|
+
FasterHTMLEscape.html_escape(html)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
x.report do
|
62
|
+
puts "fast_xs_extra#fast_xs_html"
|
63
|
+
times.times do
|
64
|
+
html.fast_xs_html
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
data/benchmark/html_unescape.rb
CHANGED
@@ -14,10 +14,10 @@ module HamlBench
|
|
14
14
|
end
|
15
15
|
|
16
16
|
times = 100
|
17
|
-
url = "http://
|
17
|
+
url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
|
18
18
|
html = `curl -s #{url}`
|
19
19
|
escaped_html = EscapeUtils.escape_html(html)
|
20
|
-
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html from #{url}"
|
20
|
+
puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
|
21
21
|
|
22
22
|
Benchmark.bmbm do |x|
|
23
23
|
x.report do
|
@@ -13,11 +13,9 @@ class ActionPackBench
|
|
13
13
|
end
|
14
14
|
|
15
15
|
times = 100
|
16
|
-
url = "http://
|
16
|
+
url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
|
17
17
|
javascript = `curl -s #{url}`
|
18
|
-
puts "Escaping #{javascript.bytesize} bytes of javascript from #{url}"
|
19
|
-
|
20
|
-
puts ActionPackBench.escape_javascript(javascript).eql?(EscapeUtils.escape_javascript(javascript))
|
18
|
+
puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
|
21
19
|
|
22
20
|
Benchmark.bmbm do |x|
|
23
21
|
x.report do
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'escape_utils'
|
9
|
+
|
10
|
+
times = 100
|
11
|
+
url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
|
12
|
+
javascript = `curl -s #{url}`
|
13
|
+
escaped_javascript = EscapeUtils.escape_javascript(javascript)
|
14
|
+
puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
|
15
|
+
|
16
|
+
Benchmark.bmbm do |x|
|
17
|
+
x.report do
|
18
|
+
puts "EscapeUtils.escape_javascript"
|
19
|
+
times.times do
|
20
|
+
EscapeUtils.unescape_javascript(escaped_javascript)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'rack'
|
9
|
+
require 'erb'
|
10
|
+
require 'cgi'
|
11
|
+
require 'url_escape'
|
12
|
+
require 'fast_xs_extra'
|
13
|
+
require 'escape_utils'
|
14
|
+
|
15
|
+
times = 10_000
|
16
|
+
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
17
|
+
puts "Escaping a #{url.bytesize} byte URL #{times} times"
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
x.report do
|
21
|
+
puts "ERB::Util.url_encode"
|
22
|
+
times.times do
|
23
|
+
ERB::Util.url_encode(url)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report do
|
28
|
+
puts "Rack::Utils.escape"
|
29
|
+
times.times do
|
30
|
+
Rack::Utils.escape(url)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
x.report do
|
35
|
+
puts "CGI.escape"
|
36
|
+
times.times do
|
37
|
+
CGI.escape(url)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
x.report do
|
42
|
+
puts "URLEscape#escape"
|
43
|
+
times.times do
|
44
|
+
URLEscape.escape(url)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
x.report do
|
49
|
+
puts "fast_xs_extra#fast_xs_url"
|
50
|
+
times.times do
|
51
|
+
url.fast_xs_url
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
x.report do
|
56
|
+
puts "EscapeUtils.escape_url"
|
57
|
+
times.times do
|
58
|
+
EscapeUtils.escape_url(url)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
require 'rack'
|
9
|
+
require 'cgi'
|
10
|
+
require 'url_escape'
|
11
|
+
require 'fast_xs_extra'
|
12
|
+
require 'escape_utils'
|
13
|
+
|
14
|
+
times = 10_000
|
15
|
+
url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
|
16
|
+
escaped_url = EscapeUtils.escape_url(url)
|
17
|
+
puts "Escaping a #{url.bytesize} byte URL #{times} times"
|
18
|
+
|
19
|
+
Benchmark.bmbm do |x|
|
20
|
+
x.report do
|
21
|
+
puts "Rack::Utils.unescape"
|
22
|
+
times.times do
|
23
|
+
Rack::Utils.unescape(escaped_url)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report do
|
28
|
+
puts "CGI.unescape"
|
29
|
+
times.times do
|
30
|
+
CGI.unescape(escaped_url)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
x.report do
|
35
|
+
puts "URLEscape#unescape"
|
36
|
+
times.times do
|
37
|
+
URLEscape.unescape(escaped_url)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
x.report do
|
42
|
+
puts "fast_xs_extra#fast_uxs_cgi"
|
43
|
+
times.times do
|
44
|
+
url.fast_uxs_cgi
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
x.report do
|
49
|
+
puts "EscapeUtils.unescape_url"
|
50
|
+
times.times do
|
51
|
+
EscapeUtils.unescape_url(escaped_url)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/escape_utils.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{escape_utils}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brian Lopez"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-07-13}
|
13
13
|
s.email = %q{seniorlopez@gmail.com}
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,6 +25,9 @@ Gem::Specification.new do |s|
|
|
25
25
|
"benchmark/html_escape.rb",
|
26
26
|
"benchmark/html_unescape.rb",
|
27
27
|
"benchmark/javascript_escape.rb",
|
28
|
+
"benchmark/javascript_unescape.rb",
|
29
|
+
"benchmark/url_escape.rb",
|
30
|
+
"benchmark/url_unescape.rb",
|
28
31
|
"escape_utils.gemspec",
|
29
32
|
"ext/escape_utils.c",
|
30
33
|
"ext/extconf.rb",
|
@@ -33,31 +36,43 @@ Gem::Specification.new do |s|
|
|
33
36
|
"lib/escape_utils/html/erb.rb",
|
34
37
|
"lib/escape_utils/html/haml.rb",
|
35
38
|
"lib/escape_utils/html/rack.rb",
|
39
|
+
"lib/escape_utils/html_safety.rb",
|
36
40
|
"lib/escape_utils/javascript/action_view.rb",
|
41
|
+
"lib/escape_utils/url/cgi.rb",
|
42
|
+
"lib/escape_utils/url/erb.rb",
|
43
|
+
"lib/escape_utils/url/rack.rb",
|
37
44
|
"spec/html/escape_spec.rb",
|
38
45
|
"spec/html/unescape_spec.rb",
|
46
|
+
"spec/html_safety_spec.rb",
|
39
47
|
"spec/javascript/escape_spec.rb",
|
48
|
+
"spec/javascript/unescape_spec.rb",
|
40
49
|
"spec/rcov.opts",
|
41
50
|
"spec/spec.opts",
|
42
|
-
"spec/spec_helper.rb"
|
51
|
+
"spec/spec_helper.rb",
|
52
|
+
"spec/url/escape_spec.rb",
|
53
|
+
"spec/url/unescape_spec.rb"
|
43
54
|
]
|
44
55
|
s.homepage = %q{http://github.com/brianmario/escape_utils}
|
45
56
|
s.rdoc_options = ["--charset=UTF-8"]
|
46
57
|
s.require_paths = ["lib", "ext"]
|
47
|
-
s.rubygems_version = %q{1.3.
|
58
|
+
s.rubygems_version = %q{1.3.7}
|
48
59
|
s.summary = %q{Faster string escaping routines for your web apps}
|
49
60
|
s.test_files = [
|
50
61
|
"spec/html/escape_spec.rb",
|
51
62
|
"spec/html/unescape_spec.rb",
|
63
|
+
"spec/html_safety_spec.rb",
|
52
64
|
"spec/javascript/escape_spec.rb",
|
53
|
-
"spec/
|
65
|
+
"spec/javascript/unescape_spec.rb",
|
66
|
+
"spec/spec_helper.rb",
|
67
|
+
"spec/url/escape_spec.rb",
|
68
|
+
"spec/url/unescape_spec.rb"
|
54
69
|
]
|
55
70
|
|
56
71
|
if s.respond_to? :specification_version then
|
57
72
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
58
73
|
s.specification_version = 3
|
59
74
|
|
60
|
-
if Gem::Version.new(Gem::
|
75
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
61
76
|
else
|
62
77
|
end
|
63
78
|
else
|
data/ext/escape_utils.c
CHANGED
@@ -4,93 +4,234 @@
|
|
4
4
|
static rb_encoding *utf8Encoding;
|
5
5
|
#endif
|
6
6
|
|
7
|
-
#define
|
8
|
-
|
9
|
-
|
10
|
-
offset = i+scoot_by; \
|
11
|
-
memcpy(&out[total], escape, len); \
|
12
|
-
total += len; \
|
7
|
+
#define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
|
8
|
+
#define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
|
9
|
+
#define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
|
13
10
|
|
14
11
|
static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
15
|
-
size_t
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
12
|
+
size_t total = 0;
|
13
|
+
unsigned char curChar;
|
14
|
+
|
15
|
+
total = in_len;
|
16
|
+
while (in_len) {
|
17
|
+
curChar = *in++;
|
18
|
+
switch (curChar) {
|
19
|
+
case '<':
|
20
|
+
*out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
|
21
|
+
total += 3;
|
22
|
+
break;
|
23
|
+
case '>':
|
24
|
+
*out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
|
25
|
+
total += 3;
|
26
|
+
break;
|
27
|
+
case '&':
|
28
|
+
*out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
|
29
|
+
total += 4;
|
30
|
+
break;
|
31
|
+
case '\'':
|
32
|
+
*out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
|
33
|
+
total += 4;
|
34
|
+
break;
|
35
|
+
case '\"':
|
36
|
+
*out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
|
37
|
+
total += 5;
|
38
|
+
break;
|
39
|
+
case '/':
|
40
|
+
*out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
|
41
|
+
total += 4;
|
42
|
+
break;
|
43
|
+
default:
|
44
|
+
*out++ = curChar;
|
45
|
+
break;
|
24
46
|
}
|
47
|
+
in_len--;
|
25
48
|
}
|
26
49
|
|
27
|
-
|
28
|
-
memcpy(&out[total], &in[offset], i-offset);
|
29
|
-
|
30
|
-
return total + (i-offset);
|
50
|
+
return total;
|
31
51
|
}
|
32
52
|
|
33
53
|
static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
|
34
|
-
size_t
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
54
|
+
size_t total = 0, len = in_len;
|
55
|
+
unsigned char curChar, *start;
|
56
|
+
|
57
|
+
start = (unsigned char *)&in[0];
|
58
|
+
total = in_len;
|
59
|
+
while (len) {
|
60
|
+
curChar = *in++;
|
61
|
+
if (curChar == '&') {
|
62
|
+
if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
|
63
|
+
*out++ = '<';
|
64
|
+
total-=3;
|
65
|
+
in+=3;
|
66
|
+
len-=3;
|
67
|
+
} else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
|
68
|
+
*out++ = '>';
|
69
|
+
total-=3;
|
70
|
+
in+=3;
|
71
|
+
len-=3;
|
72
|
+
} else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
|
73
|
+
*out++ = '&';
|
74
|
+
total-=4;
|
75
|
+
in+=4;
|
76
|
+
len-=4;
|
77
|
+
} else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
|
78
|
+
*out++ = '\'';
|
79
|
+
total-=4;
|
80
|
+
in+=4;
|
81
|
+
len-=4;
|
82
|
+
} else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
|
83
|
+
*out++ = '/';
|
84
|
+
total-=4;
|
85
|
+
in+=4;
|
86
|
+
len-=4;
|
87
|
+
} else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
|
88
|
+
*out++ = '\"';
|
89
|
+
total-=5;
|
90
|
+
in+=5;
|
91
|
+
len-=5;
|
44
92
|
}
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
93
|
+
} else {
|
94
|
+
*out++ = curChar;
|
95
|
+
}
|
96
|
+
len--;
|
97
|
+
}
|
98
|
+
|
99
|
+
return total;
|
100
|
+
}
|
101
|
+
|
102
|
+
static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
103
|
+
size_t total = 0;
|
104
|
+
unsigned char curChar;
|
105
|
+
|
106
|
+
total = in_len;
|
107
|
+
while (in_len) {
|
108
|
+
curChar = *in++;
|
109
|
+
switch (curChar) {
|
110
|
+
case '\\':
|
111
|
+
*out++ = '\\'; *out++ = '\\';
|
112
|
+
total++;
|
113
|
+
break;
|
114
|
+
case '<':
|
115
|
+
if (*in == '/') {
|
116
|
+
*out++ = '<'; *out++ = '\\'; *out++ = '/';
|
117
|
+
in++; in_len--;
|
118
|
+
total++;
|
51
119
|
}
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
120
|
+
break;
|
121
|
+
case '\r':
|
122
|
+
if (*in == '\n') {
|
123
|
+
*out++ = '\\'; *out++ = 'n';
|
124
|
+
in++; in_len--;
|
125
|
+
} else {
|
126
|
+
*out++ = '\\'; *out++ = 'n';
|
127
|
+
total++;
|
56
128
|
}
|
129
|
+
break;
|
130
|
+
case '\n':
|
131
|
+
*out++ = '\\'; *out++ = 'n';
|
132
|
+
total++;
|
133
|
+
break;
|
134
|
+
case '\'':
|
135
|
+
*out++ = '\\'; *out++ = '\'';
|
136
|
+
total++;
|
137
|
+
break;
|
138
|
+
case '\"':
|
139
|
+
*out++ = '\\'; *out++ = '\"';
|
140
|
+
total++;
|
141
|
+
break;
|
142
|
+
default:
|
143
|
+
*out++ = curChar;
|
144
|
+
break;
|
57
145
|
}
|
146
|
+
in_len--;
|
58
147
|
}
|
59
148
|
|
60
|
-
|
61
|
-
|
149
|
+
return total;
|
150
|
+
}
|
62
151
|
|
63
|
-
|
152
|
+
static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
|
153
|
+
size_t total = 0;
|
154
|
+
unsigned char curChar;
|
155
|
+
|
156
|
+
total = in_len;
|
157
|
+
while (in_len) {
|
158
|
+
curChar = *in++;
|
159
|
+
if (curChar == '\\') {
|
160
|
+
if (*in == 'n') {
|
161
|
+
*out++ = '\n';
|
162
|
+
total--;
|
163
|
+
} else if (*in == '\\') {
|
164
|
+
*out++ = '\\';
|
165
|
+
total--;
|
166
|
+
} else if (*in == '\'') {
|
167
|
+
*out++ = '\'';
|
168
|
+
total--;
|
169
|
+
} else if (*in == '\"') {
|
170
|
+
*out++ = '\"';
|
171
|
+
total--;
|
172
|
+
} else if (*in == '/') {
|
173
|
+
*out++ = '/';
|
174
|
+
total--;
|
175
|
+
} else {
|
176
|
+
*out++ = curChar;
|
177
|
+
}
|
178
|
+
in++; in_len--;
|
179
|
+
} else {
|
180
|
+
*out++ = curChar;
|
181
|
+
}
|
182
|
+
in_len--;
|
183
|
+
}
|
184
|
+
|
185
|
+
return total;
|
64
186
|
}
|
65
187
|
|
66
|
-
static size_t
|
67
|
-
size_t
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
break;
|
84
|
-
case '\n': APPEND_BUFFER("\\n", 2, 1); break;
|
85
|
-
case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
|
86
|
-
case '\'': APPEND_BUFFER("\\'", 2, 1); break;
|
188
|
+
static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
189
|
+
size_t total = 0;
|
190
|
+
unsigned char curChar, hex[2];
|
191
|
+
const unsigned char hexChars[16] = "0123456789ABCDEF";
|
192
|
+
|
193
|
+
total = in_len;
|
194
|
+
while (in_len) {
|
195
|
+
curChar = *in++;
|
196
|
+
if (curChar == ' ') {
|
197
|
+
*out++ = '+';
|
198
|
+
} else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
|
199
|
+
hex[1] = hexChars[curChar & 0x0f];
|
200
|
+
hex[0] = hexChars[(curChar >> 4) & 0x0f];
|
201
|
+
*out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
|
202
|
+
total += 2;
|
203
|
+
} else {
|
204
|
+
*out++ = curChar;
|
87
205
|
}
|
206
|
+
in_len--;
|
88
207
|
}
|
89
208
|
|
90
|
-
|
91
|
-
|
209
|
+
return total;
|
210
|
+
}
|
211
|
+
|
212
|
+
static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
|
213
|
+
size_t total = 0, len = in_len;
|
214
|
+
unsigned char curChar, *start;
|
215
|
+
|
216
|
+
start = (unsigned char *)&in[0];
|
217
|
+
total = in_len;
|
218
|
+
while (len) {
|
219
|
+
curChar = *in++;
|
220
|
+
if (curChar == '%') {
|
221
|
+
if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
|
222
|
+
*out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
|
223
|
+
in+=2;
|
224
|
+
total-=2;
|
225
|
+
}
|
226
|
+
} else if (curChar == '+') {
|
227
|
+
*out++ = ' ';
|
228
|
+
} else {
|
229
|
+
*out++ = curChar;
|
230
|
+
}
|
231
|
+
len--;
|
232
|
+
}
|
92
233
|
|
93
|
-
return total
|
234
|
+
return total;
|
94
235
|
}
|
95
236
|
|
96
237
|
static VALUE rb_escape_html(VALUE self, VALUE str) {
|
@@ -202,15 +343,130 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
|
|
202
343
|
return rb_output_buf;
|
203
344
|
}
|
204
345
|
|
346
|
+
static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
|
347
|
+
if (str == Qnil) {
|
348
|
+
return rb_str_new2("");
|
349
|
+
}
|
350
|
+
|
351
|
+
Check_Type(str, T_STRING);
|
352
|
+
|
353
|
+
VALUE rb_output_buf;
|
354
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
355
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
356
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
357
|
+
#endif
|
358
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
359
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
360
|
+
|
361
|
+
// this is the max size the string could be
|
362
|
+
// TODO: we should try to be more intelligent about this
|
363
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
|
364
|
+
|
365
|
+
// perform our escape, returning the new string's length
|
366
|
+
new_len = unescape_javascript(outBuf, inBuf, len);
|
367
|
+
|
368
|
+
// create our new ruby string
|
369
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
370
|
+
|
371
|
+
// free the temporary C string
|
372
|
+
free(outBuf);
|
373
|
+
|
374
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
375
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
376
|
+
if (default_internal_enc) {
|
377
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
378
|
+
} else {
|
379
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
380
|
+
}
|
381
|
+
#endif
|
382
|
+
return rb_output_buf;
|
383
|
+
}
|
384
|
+
|
385
|
+
static VALUE rb_escape_url(VALUE self, VALUE str) {
|
386
|
+
Check_Type(str, T_STRING);
|
387
|
+
|
388
|
+
VALUE rb_output_buf;
|
389
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
390
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
391
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
392
|
+
#endif
|
393
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
394
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
395
|
+
|
396
|
+
// this is the max size the string could be
|
397
|
+
// TODO: we should try to be more intelligent about this
|
398
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
|
399
|
+
|
400
|
+
// perform our escape, returning the new string's length
|
401
|
+
new_len = escape_url(outBuf, inBuf, len);
|
402
|
+
|
403
|
+
// create our new ruby string
|
404
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
405
|
+
|
406
|
+
// free the temporary C string
|
407
|
+
free(outBuf);
|
408
|
+
|
409
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
410
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
411
|
+
if (default_internal_enc) {
|
412
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
413
|
+
} else {
|
414
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
415
|
+
}
|
416
|
+
#endif
|
417
|
+
return rb_output_buf;
|
418
|
+
}
|
419
|
+
|
420
|
+
static VALUE rb_unescape_url(VALUE self, VALUE str) {
|
421
|
+
Check_Type(str, T_STRING);
|
422
|
+
|
423
|
+
VALUE rb_output_buf;
|
424
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
425
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
426
|
+
rb_encoding *original_encoding = rb_enc_get(str);
|
427
|
+
#endif
|
428
|
+
unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
|
429
|
+
size_t len = RSTRING_LEN(str), new_len = 0;
|
430
|
+
|
431
|
+
// this is the max size the string could be
|
432
|
+
// TODO: we should try to be more intelligent about this
|
433
|
+
unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
|
434
|
+
|
435
|
+
// perform our escape, returning the new string's length
|
436
|
+
new_len = unescape_url(outBuf, inBuf, len);
|
437
|
+
|
438
|
+
// create our new ruby string
|
439
|
+
rb_output_buf = rb_str_new((char *)outBuf, new_len);
|
440
|
+
|
441
|
+
// free the temporary C string
|
442
|
+
free(outBuf);
|
443
|
+
|
444
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
445
|
+
rb_enc_associate(rb_output_buf, original_encoding);
|
446
|
+
if (default_internal_enc) {
|
447
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
|
448
|
+
} else {
|
449
|
+
rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
|
450
|
+
}
|
451
|
+
#endif
|
452
|
+
return rb_output_buf;
|
453
|
+
}
|
454
|
+
|
205
455
|
/* Ruby Extension initializer */
|
206
456
|
void Init_escape_utils_ext() {
|
207
457
|
VALUE mEscape = rb_define_module("EscapeUtils");
|
208
|
-
rb_define_method(mEscape, "escape_html",
|
209
|
-
rb_define_module_function(mEscape, "escape_html",
|
210
|
-
rb_define_method(mEscape, "unescape_html",
|
211
|
-
rb_define_module_function(mEscape, "unescape_html",
|
212
|
-
rb_define_method(mEscape, "escape_javascript",
|
213
|
-
rb_define_module_function(mEscape, "escape_javascript",
|
458
|
+
rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
|
459
|
+
rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
|
460
|
+
rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
|
461
|
+
rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
|
462
|
+
rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
463
|
+
rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
|
464
|
+
rb_define_method(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
|
465
|
+
rb_define_module_function(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
|
466
|
+
rb_define_method(mEscape, "escape_url", rb_escape_url, 1);
|
467
|
+
rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
|
468
|
+
rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
|
469
|
+
rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
|
214
470
|
|
215
471
|
#ifdef HAVE_RUBY_ENCODING_H
|
216
472
|
utf8Encoding = rb_utf8_encoding();
|