escape_utils 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -2,4 +2,5 @@ Makefile
2
2
  *.o
3
3
  *.bundle
4
4
  pkg/*
5
- doc/*
5
+ doc/*
6
+ *.rbc
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.5 (July 13th, 2010)
4
+ * add URL escaping and unescaping
5
+ * major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
6
+ * HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury!
7
+
3
8
  ## 0.1.4 (June 9th, 2010)
4
9
  * ensure strings are passed in from monkey-patches
5
10
 
data/README.rdoc CHANGED
@@ -47,8 +47,8 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
47
47
 
48
48
  == Benchmarks
49
49
 
50
- In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
51
- While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
50
+ In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
51
+ While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
52
52
  Escaping Javascript is around 16-30x faster.
53
53
 
54
54
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
@@ -58,28 +58,66 @@ This output is from my laptop using the benchmark scripts in the benchmarks fold
58
58
  ==== Escaping
59
59
 
60
60
  Rack::Utils.escape_html
61
- 0.560000 0.040000 0.600000 ( 0.589475)
61
+ 9.650000 0.090000 9.740000 ( 9.750756)
62
+ Haml::Helpers.html_escape
63
+ 9.310000 0.110000 9.420000 ( 9.417317)
62
64
  ERB::Util.html_escape
63
- 0.450000 0.040000 0.490000 ( 0.492893)
65
+ 5.330000 0.390000 5.720000 ( 5.748394)
64
66
  CGI.escapeHTML
65
- 0.460000 0.030000 0.490000 ( 0.490171)
66
- Haml::Helpers.html_escape
67
- 0.430000 0.010000 0.440000 ( 0.444694)
67
+ 5.370000 0.380000 5.750000 ( 5.791344)
68
+ FasterHTMLEscape.html_escape
69
+ 0.520000 0.010000 0.530000 ( 0.539485)
70
+ fast_xs_extra#fast_xs_html
71
+ 0.310000 0.030000 0.340000 ( 0.336734)
68
72
  EscapeUtils.escape_html
69
- 0.050000 0.010000 0.060000 ( 0.054799)
73
+ 0.200000 0.050000 0.250000 ( 0.258839)
70
74
 
71
- === Unescaping
75
+ ==== Unescaping
72
76
 
73
77
  CGI.unescapeHTML
74
- 1.140000 0.010000 1.150000 ( 1.148470)
78
+ 16.520000 0.080000 16.600000 ( 16.853888)
75
79
  EscapeUtils.unescape_html
76
- 0.040000 0.000000 0.040000 ( 0.046166)
80
+ 0.120000 0.040000 0.160000 ( 0.162696)
77
81
 
78
82
  === Javascript
79
83
 
80
84
  ==== Escaping
81
85
 
82
86
  ActionView::Helpers::JavaScriptHelper#escape_javascript
83
- 2.000000 0.020000 2.020000 ( 2.023047)
87
+ 3.810000 0.100000 3.910000 ( 3.925557)
84
88
  EscapeUtils.escape_javascript
85
- 0.110000 0.010000 0.120000 ( 0.121761)
89
+ 0.200000 0.040000 0.240000 ( 0.236692)
90
+
91
+ ==== Unescaping
92
+
93
+ I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
94
+
95
+ === URL
96
+
97
+ ==== Escaping
98
+
99
+ ERB::Util.url_encode
100
+ 0.520000 0.010000 0.530000 ( 0.529277)
101
+ Rack::Utils.escape
102
+ 0.460000 0.010000 0.470000 ( 0.466962)
103
+ CGI.escape
104
+ 0.440000 0.000000 0.440000 ( 0.443017)
105
+ URLEscape#escape
106
+ 0.040000 0.000000 0.040000 ( 0.045661)
107
+ fast_xs_extra#fast_xs_url
108
+ 0.010000 0.000000 0.010000 ( 0.015429)
109
+ EscapeUtils.escape_url
110
+ 0.010000 0.000000 0.010000 ( 0.010843)
111
+
112
+ ==== Unescaping
113
+
114
+ Rack::Utils.unescape
115
+ 0.250000 0.010000 0.260000 ( 0.257558)
116
+ CGI.unescape
117
+ 0.250000 0.000000 0.250000 ( 0.257837)
118
+ URLEscape#unescape
119
+ 0.040000 0.000000 0.040000 ( 0.031548)
120
+ fast_xs_extra#fast_uxs_cgi
121
+ 0.010000 0.000000 0.010000 ( 0.006062)
122
+ EscapeUtils.unescape_url
123
+ 0.000000 0.000000 0.000000 ( 0.005679)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.1.5
@@ -9,6 +9,8 @@ require 'rack'
9
9
  require 'erb'
10
10
  require 'cgi'
11
11
  require 'haml'
12
+ require 'fast_xs_extra'
13
+ require 'faster_html_escape'
12
14
  require 'escape_utils'
13
15
 
14
16
  module HamlBench
@@ -16,9 +18,9 @@ module HamlBench
16
18
  end
17
19
 
18
20
  times = 100
19
- url = "http://maps.google.com"
21
+ url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
20
22
  html = `curl -s #{url}`
21
- puts "Escaping #{html.bytesize} bytes of html from #{url}"
23
+ puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
22
24
 
23
25
  Benchmark.bmbm do |x|
24
26
  x.report do
@@ -28,6 +30,13 @@ Benchmark.bmbm do |x|
28
30
  end
29
31
  end
30
32
 
33
+ x.report do
34
+ puts "Haml::Helpers.html_escape"
35
+ times.times do
36
+ HamlBench.html_escape(html)
37
+ end
38
+ end
39
+
31
40
  x.report do
32
41
  puts "ERB::Util.html_escape"
33
42
  times.times do
@@ -43,9 +52,16 @@ Benchmark.bmbm do |x|
43
52
  end
44
53
 
45
54
  x.report do
46
- puts "Haml::Helpers.html_escape"
55
+ puts "FasterHTMLEscape.html_escape"
47
56
  times.times do
48
- HamlBench.html_escape(html)
57
+ FasterHTMLEscape.html_escape(html)
58
+ end
59
+ end
60
+
61
+ x.report do
62
+ puts "fast_xs_extra#fast_xs_html"
63
+ times.times do
64
+ html.fast_xs_html
49
65
  end
50
66
  end
51
67
 
@@ -14,10 +14,10 @@ module HamlBench
14
14
  end
15
15
 
16
16
  times = 100
17
- url = "http://maps.google.com"
17
+ url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
18
18
  html = `curl -s #{url}`
19
19
  escaped_html = EscapeUtils.escape_html(html)
20
- puts "Unescaping #{escaped_html.bytesize} bytes of escaped html from #{url}"
20
+ puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
21
21
 
22
22
  Benchmark.bmbm do |x|
23
23
  x.report do
@@ -13,11 +13,9 @@ class ActionPackBench
13
13
  end
14
14
 
15
15
  times = 100
16
- url = "http://code.jquery.com/jquery-1.4.2.js"
16
+ url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
17
17
  javascript = `curl -s #{url}`
18
- puts "Escaping #{javascript.bytesize} bytes of javascript from #{url}"
19
-
20
- puts ActionPackBench.escape_javascript(javascript).eql?(EscapeUtils.escape_javascript(javascript))
18
+ puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
21
19
 
22
20
  Benchmark.bmbm do |x|
23
21
  x.report do
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'escape_utils'
9
+
10
+ times = 100
11
+ url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
12
+ javascript = `curl -s #{url}`
13
+ escaped_javascript = EscapeUtils.escape_javascript(javascript)
14
+ puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
15
+
16
+ Benchmark.bmbm do |x|
17
+ x.report do
18
+ puts "EscapeUtils.escape_javascript"
19
+ times.times do
20
+ EscapeUtils.unescape_javascript(escaped_javascript)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,61 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'rack'
9
+ require 'erb'
10
+ require 'cgi'
11
+ require 'url_escape'
12
+ require 'fast_xs_extra'
13
+ require 'escape_utils'
14
+
15
+ times = 10_000
16
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
17
+ puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
+
19
+ Benchmark.bmbm do |x|
20
+ x.report do
21
+ puts "ERB::Util.url_encode"
22
+ times.times do
23
+ ERB::Util.url_encode(url)
24
+ end
25
+ end
26
+
27
+ x.report do
28
+ puts "Rack::Utils.escape"
29
+ times.times do
30
+ Rack::Utils.escape(url)
31
+ end
32
+ end
33
+
34
+ x.report do
35
+ puts "CGI.escape"
36
+ times.times do
37
+ CGI.escape(url)
38
+ end
39
+ end
40
+
41
+ x.report do
42
+ puts "URLEscape#escape"
43
+ times.times do
44
+ URLEscape.escape(url)
45
+ end
46
+ end
47
+
48
+ x.report do
49
+ puts "fast_xs_extra#fast_xs_url"
50
+ times.times do
51
+ url.fast_xs_url
52
+ end
53
+ end
54
+
55
+ x.report do
56
+ puts "EscapeUtils.escape_url"
57
+ times.times do
58
+ EscapeUtils.escape_url(url)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,54 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'rack'
9
+ require 'cgi'
10
+ require 'url_escape'
11
+ require 'fast_xs_extra'
12
+ require 'escape_utils'
13
+
14
+ times = 10_000
15
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
16
+ escaped_url = EscapeUtils.escape_url(url)
17
+ puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
+
19
+ Benchmark.bmbm do |x|
20
+ x.report do
21
+ puts "Rack::Utils.unescape"
22
+ times.times do
23
+ Rack::Utils.unescape(escaped_url)
24
+ end
25
+ end
26
+
27
+ x.report do
28
+ puts "CGI.unescape"
29
+ times.times do
30
+ CGI.unescape(escaped_url)
31
+ end
32
+ end
33
+
34
+ x.report do
35
+ puts "URLEscape#unescape"
36
+ times.times do
37
+ URLEscape.unescape(escaped_url)
38
+ end
39
+ end
40
+
41
+ x.report do
42
+ puts "fast_xs_extra#fast_uxs_cgi"
43
+ times.times do
44
+ url.fast_uxs_cgi
45
+ end
46
+ end
47
+
48
+ x.report do
49
+ puts "EscapeUtils.unescape_url"
50
+ times.times do
51
+ EscapeUtils.unescape_url(escaped_url)
52
+ end
53
+ end
54
+ end
data/escape_utils.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.4"
8
+ s.version = "0.1.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-06-09}
12
+ s.date = %q{2010-07-13}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,9 @@ Gem::Specification.new do |s|
25
25
  "benchmark/html_escape.rb",
26
26
  "benchmark/html_unescape.rb",
27
27
  "benchmark/javascript_escape.rb",
28
+ "benchmark/javascript_unescape.rb",
29
+ "benchmark/url_escape.rb",
30
+ "benchmark/url_unescape.rb",
28
31
  "escape_utils.gemspec",
29
32
  "ext/escape_utils.c",
30
33
  "ext/extconf.rb",
@@ -33,31 +36,43 @@ Gem::Specification.new do |s|
33
36
  "lib/escape_utils/html/erb.rb",
34
37
  "lib/escape_utils/html/haml.rb",
35
38
  "lib/escape_utils/html/rack.rb",
39
+ "lib/escape_utils/html_safety.rb",
36
40
  "lib/escape_utils/javascript/action_view.rb",
41
+ "lib/escape_utils/url/cgi.rb",
42
+ "lib/escape_utils/url/erb.rb",
43
+ "lib/escape_utils/url/rack.rb",
37
44
  "spec/html/escape_spec.rb",
38
45
  "spec/html/unescape_spec.rb",
46
+ "spec/html_safety_spec.rb",
39
47
  "spec/javascript/escape_spec.rb",
48
+ "spec/javascript/unescape_spec.rb",
40
49
  "spec/rcov.opts",
41
50
  "spec/spec.opts",
42
- "spec/spec_helper.rb"
51
+ "spec/spec_helper.rb",
52
+ "spec/url/escape_spec.rb",
53
+ "spec/url/unescape_spec.rb"
43
54
  ]
44
55
  s.homepage = %q{http://github.com/brianmario/escape_utils}
45
56
  s.rdoc_options = ["--charset=UTF-8"]
46
57
  s.require_paths = ["lib", "ext"]
47
- s.rubygems_version = %q{1.3.6}
58
+ s.rubygems_version = %q{1.3.7}
48
59
  s.summary = %q{Faster string escaping routines for your web apps}
49
60
  s.test_files = [
50
61
  "spec/html/escape_spec.rb",
51
62
  "spec/html/unescape_spec.rb",
63
+ "spec/html_safety_spec.rb",
52
64
  "spec/javascript/escape_spec.rb",
53
- "spec/spec_helper.rb"
65
+ "spec/javascript/unescape_spec.rb",
66
+ "spec/spec_helper.rb",
67
+ "spec/url/escape_spec.rb",
68
+ "spec/url/unescape_spec.rb"
54
69
  ]
55
70
 
56
71
  if s.respond_to? :specification_version then
57
72
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
58
73
  s.specification_version = 3
59
74
 
60
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
75
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
61
76
  else
62
77
  end
63
78
  else
data/ext/escape_utils.c CHANGED
@@ -4,93 +4,234 @@
4
4
  static rb_encoding *utf8Encoding;
5
5
  #endif
6
6
 
7
- #define APPEND_BUFFER(escape, len, scoot_by) \
8
- memcpy(&out[total], &in[offset], i-offset); \
9
- total += i-offset; \
10
- offset = i+scoot_by; \
11
- memcpy(&out[total], escape, len); \
12
- total += len; \
7
+ #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
8
+ #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
9
+ #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
13
10
 
14
11
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
15
- size_t i = 0, offset = 0, total = 0;
16
-
17
- for(;i<in_len;i++) {
18
- switch(in[i]) {
19
- case '&': APPEND_BUFFER("&amp;", 5, 1); break;
20
- case '<': APPEND_BUFFER("&lt;", 4, 1); break;
21
- case '>': APPEND_BUFFER("&gt;", 4, 1); break;
22
- case '\'': APPEND_BUFFER("&#39;", 5, 1); break;
23
- case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
12
+ size_t total = 0;
13
+ unsigned char curChar;
14
+
15
+ total = in_len;
16
+ while (in_len) {
17
+ curChar = *in++;
18
+ switch (curChar) {
19
+ case '<':
20
+ *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
21
+ total += 3;
22
+ break;
23
+ case '>':
24
+ *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
25
+ total += 3;
26
+ break;
27
+ case '&':
28
+ *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
29
+ total += 4;
30
+ break;
31
+ case '\'':
32
+ *out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
33
+ total += 4;
34
+ break;
35
+ case '\"':
36
+ *out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
37
+ total += 5;
38
+ break;
39
+ case '/':
40
+ *out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
41
+ total += 4;
42
+ break;
43
+ default:
44
+ *out++ = curChar;
45
+ break;
24
46
  }
47
+ in_len--;
25
48
  }
26
49
 
27
- // append the rest of the buffer
28
- memcpy(&out[total], &in[offset], i-offset);
29
-
30
- return total + (i-offset);
50
+ return total;
31
51
  }
32
52
 
33
53
  static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
34
- size_t i = 0, offset = 0, total = 0;
35
-
36
- for(;i<in_len;i++) {
37
- if(in[i] == '&') {
38
- if (i+3 <= in_len) {
39
- if (memcmp(&in[i], "&lt;", 4) == 0) {
40
- APPEND_BUFFER("<", 1, 4);
41
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
- APPEND_BUFFER(">", 1, 4);
43
- }
54
+ size_t total = 0, len = in_len;
55
+ unsigned char curChar, *start;
56
+
57
+ start = (unsigned char *)&in[0];
58
+ total = in_len;
59
+ while (len) {
60
+ curChar = *in++;
61
+ if (curChar == '&') {
62
+ if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
63
+ *out++ = '<';
64
+ total-=3;
65
+ in+=3;
66
+ len-=3;
67
+ } else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
68
+ *out++ = '>';
69
+ total-=3;
70
+ in+=3;
71
+ len-=3;
72
+ } else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
73
+ *out++ = '&';
74
+ total-=4;
75
+ in+=4;
76
+ len-=4;
77
+ } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
78
+ *out++ = '\'';
79
+ total-=4;
80
+ in+=4;
81
+ len-=4;
82
+ } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
83
+ *out++ = '/';
84
+ total-=4;
85
+ in+=4;
86
+ len-=4;
87
+ } else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
88
+ *out++ = '\"';
89
+ total-=5;
90
+ in+=5;
91
+ len-=5;
44
92
  }
45
- if (i+4 <= in_len) {
46
- if (memcmp(&in[i], "&amp;", 5) == 0) {
47
- APPEND_BUFFER("&", 1, 5);
48
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
49
- APPEND_BUFFER("\'", 1, 5);
50
- }
93
+ } else {
94
+ *out++ = curChar;
95
+ }
96
+ len--;
97
+ }
98
+
99
+ return total;
100
+ }
101
+
102
+ static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
103
+ size_t total = 0;
104
+ unsigned char curChar;
105
+
106
+ total = in_len;
107
+ while (in_len) {
108
+ curChar = *in++;
109
+ switch (curChar) {
110
+ case '\\':
111
+ *out++ = '\\'; *out++ = '\\';
112
+ total++;
113
+ break;
114
+ case '<':
115
+ if (*in == '/') {
116
+ *out++ = '<'; *out++ = '\\'; *out++ = '/';
117
+ in++; in_len--;
118
+ total++;
51
119
  }
52
- if (i+5 <= in_len) {
53
- if (memcmp(&in[i], "&quot;", 6) == 0) {
54
- APPEND_BUFFER("\"", 1, 6);
55
- }
120
+ break;
121
+ case '\r':
122
+ if (*in == '\n') {
123
+ *out++ = '\\'; *out++ = 'n';
124
+ in++; in_len--;
125
+ } else {
126
+ *out++ = '\\'; *out++ = 'n';
127
+ total++;
56
128
  }
129
+ break;
130
+ case '\n':
131
+ *out++ = '\\'; *out++ = 'n';
132
+ total++;
133
+ break;
134
+ case '\'':
135
+ *out++ = '\\'; *out++ = '\'';
136
+ total++;
137
+ break;
138
+ case '\"':
139
+ *out++ = '\\'; *out++ = '\"';
140
+ total++;
141
+ break;
142
+ default:
143
+ *out++ = curChar;
144
+ break;
57
145
  }
146
+ in_len--;
58
147
  }
59
148
 
60
- // append the rest of the buffer
61
- memcpy(&out[total], &in[offset], i-offset);
149
+ return total;
150
+ }
62
151
 
63
- return total + (i-offset);
152
+ static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
153
+ size_t total = 0;
154
+ unsigned char curChar;
155
+
156
+ total = in_len;
157
+ while (in_len) {
158
+ curChar = *in++;
159
+ if (curChar == '\\') {
160
+ if (*in == 'n') {
161
+ *out++ = '\n';
162
+ total--;
163
+ } else if (*in == '\\') {
164
+ *out++ = '\\';
165
+ total--;
166
+ } else if (*in == '\'') {
167
+ *out++ = '\'';
168
+ total--;
169
+ } else if (*in == '\"') {
170
+ *out++ = '\"';
171
+ total--;
172
+ } else if (*in == '/') {
173
+ *out++ = '/';
174
+ total--;
175
+ } else {
176
+ *out++ = curChar;
177
+ }
178
+ in++; in_len--;
179
+ } else {
180
+ *out++ = curChar;
181
+ }
182
+ in_len--;
183
+ }
184
+
185
+ return total;
64
186
  }
65
187
 
66
- static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
67
- size_t i = 0, offset = 0, total = 0;
68
-
69
- for(;i<in_len;i++) {
70
- switch(in[i]) {
71
- case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
72
- case '<':
73
- if (i+1 <= in_len && in[i+1] == '/') {
74
- APPEND_BUFFER("<\\/", 3, 2);
75
- }
76
- break;
77
- case '\r':
78
- if (i+1 <= in_len && in[i+1] == '\n') {
79
- APPEND_BUFFER("\\n", 2, 1);
80
- } else {
81
- APPEND_BUFFER("\\n", 2, 1);
82
- }
83
- break;
84
- case '\n': APPEND_BUFFER("\\n", 2, 1); break;
85
- case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
86
- case '\'': APPEND_BUFFER("\\'", 2, 1); break;
188
+ static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
189
+ size_t total = 0;
190
+ unsigned char curChar, hex[2];
191
+ const unsigned char hexChars[16] = "0123456789ABCDEF";
192
+
193
+ total = in_len;
194
+ while (in_len) {
195
+ curChar = *in++;
196
+ if (curChar == ' ') {
197
+ *out++ = '+';
198
+ } else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
199
+ hex[1] = hexChars[curChar & 0x0f];
200
+ hex[0] = hexChars[(curChar >> 4) & 0x0f];
201
+ *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
202
+ total += 2;
203
+ } else {
204
+ *out++ = curChar;
87
205
  }
206
+ in_len--;
88
207
  }
89
208
 
90
- // append the rest of the buffer
91
- memcpy(&out[total], &in[offset], i-offset);
209
+ return total;
210
+ }
211
+
212
+ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
213
+ size_t total = 0, len = in_len;
214
+ unsigned char curChar, *start;
215
+
216
+ start = (unsigned char *)&in[0];
217
+ total = in_len;
218
+ while (len) {
219
+ curChar = *in++;
220
+ if (curChar == '%') {
221
+ if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
222
+ *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
223
+ in+=2;
224
+ total-=2;
225
+ }
226
+ } else if (curChar == '+') {
227
+ *out++ = ' ';
228
+ } else {
229
+ *out++ = curChar;
230
+ }
231
+ len--;
232
+ }
92
233
 
93
- return total + (i-offset);
234
+ return total;
94
235
  }
95
236
 
96
237
  static VALUE rb_escape_html(VALUE self, VALUE str) {
@@ -202,15 +343,130 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
202
343
  return rb_output_buf;
203
344
  }
204
345
 
346
+ static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
347
+ if (str == Qnil) {
348
+ return rb_str_new2("");
349
+ }
350
+
351
+ Check_Type(str, T_STRING);
352
+
353
+ VALUE rb_output_buf;
354
+ #ifdef HAVE_RUBY_ENCODING_H
355
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
356
+ rb_encoding *original_encoding = rb_enc_get(str);
357
+ #endif
358
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
359
+ size_t len = RSTRING_LEN(str), new_len = 0;
360
+
361
+ // this is the max size the string could be
362
+ // TODO: we should try to be more intelligent about this
363
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
364
+
365
+ // perform our escape, returning the new string's length
366
+ new_len = unescape_javascript(outBuf, inBuf, len);
367
+
368
+ // create our new ruby string
369
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
370
+
371
+ // free the temporary C string
372
+ free(outBuf);
373
+
374
+ #ifdef HAVE_RUBY_ENCODING_H
375
+ rb_enc_associate(rb_output_buf, original_encoding);
376
+ if (default_internal_enc) {
377
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
378
+ } else {
379
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
380
+ }
381
+ #endif
382
+ return rb_output_buf;
383
+ }
384
+
385
+ static VALUE rb_escape_url(VALUE self, VALUE str) {
386
+ Check_Type(str, T_STRING);
387
+
388
+ VALUE rb_output_buf;
389
+ #ifdef HAVE_RUBY_ENCODING_H
390
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
391
+ rb_encoding *original_encoding = rb_enc_get(str);
392
+ #endif
393
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
394
+ size_t len = RSTRING_LEN(str), new_len = 0;
395
+
396
+ // this is the max size the string could be
397
+ // TODO: we should try to be more intelligent about this
398
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
399
+
400
+ // perform our escape, returning the new string's length
401
+ new_len = escape_url(outBuf, inBuf, len);
402
+
403
+ // create our new ruby string
404
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
405
+
406
+ // free the temporary C string
407
+ free(outBuf);
408
+
409
+ #ifdef HAVE_RUBY_ENCODING_H
410
+ rb_enc_associate(rb_output_buf, original_encoding);
411
+ if (default_internal_enc) {
412
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
413
+ } else {
414
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
415
+ }
416
+ #endif
417
+ return rb_output_buf;
418
+ }
419
+
420
+ static VALUE rb_unescape_url(VALUE self, VALUE str) {
421
+ Check_Type(str, T_STRING);
422
+
423
+ VALUE rb_output_buf;
424
+ #ifdef HAVE_RUBY_ENCODING_H
425
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
426
+ rb_encoding *original_encoding = rb_enc_get(str);
427
+ #endif
428
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
429
+ size_t len = RSTRING_LEN(str), new_len = 0;
430
+
431
+ // this is the max size the string could be
432
+ // TODO: we should try to be more intelligent about this
433
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
434
+
435
+ // perform our escape, returning the new string's length
436
+ new_len = unescape_url(outBuf, inBuf, len);
437
+
438
+ // create our new ruby string
439
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
440
+
441
+ // free the temporary C string
442
+ free(outBuf);
443
+
444
+ #ifdef HAVE_RUBY_ENCODING_H
445
+ rb_enc_associate(rb_output_buf, original_encoding);
446
+ if (default_internal_enc) {
447
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
448
+ } else {
449
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
450
+ }
451
+ #endif
452
+ return rb_output_buf;
453
+ }
454
+
205
455
  /* Ruby Extension initializer */
206
456
  void Init_escape_utils_ext() {
207
457
  VALUE mEscape = rb_define_module("EscapeUtils");
208
- rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
209
- rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
210
- rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
211
- rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
212
- rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
213
- rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
458
+ rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
459
+ rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
460
+ rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
461
+ rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
462
+ rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
463
+ rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
464
+ rb_define_method(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
465
+ rb_define_module_function(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
466
+ rb_define_method(mEscape, "escape_url", rb_escape_url, 1);
467
+ rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
468
+ rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
469
+ rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
214
470
 
215
471
  #ifdef HAVE_RUBY_ENCODING_H
216
472
  utf8Encoding = rb_utf8_encoding();