escape_utils 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -2,4 +2,5 @@ Makefile
2
2
  *.o
3
3
  *.bundle
4
4
  pkg/*
5
- doc/*
5
+ doc/*
6
+ *.rbc
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.5 (July 13th, 2010)
4
+ * add URL escaping and unescaping
5
+ * major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
6
+ * HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury!
7
+
3
8
  ## 0.1.4 (June 9th, 2010)
4
9
  * ensure strings are passed in from monkey-patches
5
10
 
data/README.rdoc CHANGED
@@ -47,8 +47,8 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
47
47
 
48
48
  == Benchmarks
49
49
 
50
- In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
51
- While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
50
+ In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
51
+ While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
52
52
  Escaping Javascript is around 16-30x faster.
53
53
 
54
54
  This output is from my laptop using the benchmark scripts in the benchmarks folder.
@@ -58,28 +58,66 @@ This output is from my laptop using the benchmark scripts in the benchmarks fold
58
58
  ==== Escaping
59
59
 
60
60
  Rack::Utils.escape_html
61
- 0.560000 0.040000 0.600000 ( 0.589475)
61
+ 9.650000 0.090000 9.740000 ( 9.750756)
62
+ Haml::Helpers.html_escape
63
+ 9.310000 0.110000 9.420000 ( 9.417317)
62
64
  ERB::Util.html_escape
63
- 0.450000 0.040000 0.490000 ( 0.492893)
65
+ 5.330000 0.390000 5.720000 ( 5.748394)
64
66
  CGI.escapeHTML
65
- 0.460000 0.030000 0.490000 ( 0.490171)
66
- Haml::Helpers.html_escape
67
- 0.430000 0.010000 0.440000 ( 0.444694)
67
+ 5.370000 0.380000 5.750000 ( 5.791344)
68
+ FasterHTMLEscape.html_escape
69
+ 0.520000 0.010000 0.530000 ( 0.539485)
70
+ fast_xs_extra#fast_xs_html
71
+ 0.310000 0.030000 0.340000 ( 0.336734)
68
72
  EscapeUtils.escape_html
69
- 0.050000 0.010000 0.060000 ( 0.054799)
73
+ 0.200000 0.050000 0.250000 ( 0.258839)
70
74
 
71
- === Unescaping
75
+ ==== Unescaping
72
76
 
73
77
  CGI.unescapeHTML
74
- 1.140000 0.010000 1.150000 ( 1.148470)
78
+ 16.520000 0.080000 16.600000 ( 16.853888)
75
79
  EscapeUtils.unescape_html
76
- 0.040000 0.000000 0.040000 ( 0.046166)
80
+ 0.120000 0.040000 0.160000 ( 0.162696)
77
81
 
78
82
  === Javascript
79
83
 
80
84
  ==== Escaping
81
85
 
82
86
  ActionView::Helpers::JavaScriptHelper#escape_javascript
83
- 2.000000 0.020000 2.020000 ( 2.023047)
87
+ 3.810000 0.100000 3.910000 ( 3.925557)
84
88
  EscapeUtils.escape_javascript
85
- 0.110000 0.010000 0.120000 ( 0.121761)
89
+ 0.200000 0.040000 0.240000 ( 0.236692)
90
+
91
+ ==== Unescaping
92
+
93
+ I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
94
+
95
+ === URL
96
+
97
+ ==== Escaping
98
+
99
+ ERB::Util.url_encode
100
+ 0.520000 0.010000 0.530000 ( 0.529277)
101
+ Rack::Utils.escape
102
+ 0.460000 0.010000 0.470000 ( 0.466962)
103
+ CGI.escape
104
+ 0.440000 0.000000 0.440000 ( 0.443017)
105
+ URLEscape#escape
106
+ 0.040000 0.000000 0.040000 ( 0.045661)
107
+ fast_xs_extra#fast_xs_url
108
+ 0.010000 0.000000 0.010000 ( 0.015429)
109
+ EscapeUtils.escape_url
110
+ 0.010000 0.000000 0.010000 ( 0.010843)
111
+
112
+ ==== Unescaping
113
+
114
+ Rack::Utils.unescape
115
+ 0.250000 0.010000 0.260000 ( 0.257558)
116
+ CGI.unescape
117
+ 0.250000 0.000000 0.250000 ( 0.257837)
118
+ URLEscape#unescape
119
+ 0.040000 0.000000 0.040000 ( 0.031548)
120
+ fast_xs_extra#fast_uxs_cgi
121
+ 0.010000 0.000000 0.010000 ( 0.006062)
122
+ EscapeUtils.unescape_url
123
+ 0.000000 0.000000 0.000000 ( 0.005679)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.1.5
@@ -9,6 +9,8 @@ require 'rack'
9
9
  require 'erb'
10
10
  require 'cgi'
11
11
  require 'haml'
12
+ require 'fast_xs_extra'
13
+ require 'faster_html_escape'
12
14
  require 'escape_utils'
13
15
 
14
16
  module HamlBench
@@ -16,9 +18,9 @@ module HamlBench
16
18
  end
17
19
 
18
20
  times = 100
19
- url = "http://maps.google.com"
21
+ url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
20
22
  html = `curl -s #{url}`
21
- puts "Escaping #{html.bytesize} bytes of html from #{url}"
23
+ puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
22
24
 
23
25
  Benchmark.bmbm do |x|
24
26
  x.report do
@@ -28,6 +30,13 @@ Benchmark.bmbm do |x|
28
30
  end
29
31
  end
30
32
 
33
+ x.report do
34
+ puts "Haml::Helpers.html_escape"
35
+ times.times do
36
+ HamlBench.html_escape(html)
37
+ end
38
+ end
39
+
31
40
  x.report do
32
41
  puts "ERB::Util.html_escape"
33
42
  times.times do
@@ -43,9 +52,16 @@ Benchmark.bmbm do |x|
43
52
  end
44
53
 
45
54
  x.report do
46
- puts "Haml::Helpers.html_escape"
55
+ puts "FasterHTMLEscape.html_escape"
47
56
  times.times do
48
- HamlBench.html_escape(html)
57
+ FasterHTMLEscape.html_escape(html)
58
+ end
59
+ end
60
+
61
+ x.report do
62
+ puts "fast_xs_extra#fast_xs_html"
63
+ times.times do
64
+ html.fast_xs_html
49
65
  end
50
66
  end
51
67
 
@@ -14,10 +14,10 @@ module HamlBench
14
14
  end
15
15
 
16
16
  times = 100
17
- url = "http://maps.google.com"
17
+ url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
18
18
  html = `curl -s #{url}`
19
19
  escaped_html = EscapeUtils.escape_html(html)
20
- puts "Unescaping #{escaped_html.bytesize} bytes of escaped html from #{url}"
20
+ puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
21
21
 
22
22
  Benchmark.bmbm do |x|
23
23
  x.report do
@@ -13,11 +13,9 @@ class ActionPackBench
13
13
  end
14
14
 
15
15
  times = 100
16
- url = "http://code.jquery.com/jquery-1.4.2.js"
16
+ url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
17
17
  javascript = `curl -s #{url}`
18
- puts "Escaping #{javascript.bytesize} bytes of javascript from #{url}"
19
-
20
- puts ActionPackBench.escape_javascript(javascript).eql?(EscapeUtils.escape_javascript(javascript))
18
+ puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
21
19
 
22
20
  Benchmark.bmbm do |x|
23
21
  x.report do
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'escape_utils'
9
+
10
+ times = 100
11
+ url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
12
+ javascript = `curl -s #{url}`
13
+ escaped_javascript = EscapeUtils.escape_javascript(javascript)
14
+ puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
15
+
16
+ Benchmark.bmbm do |x|
17
+ x.report do
18
+ puts "EscapeUtils.escape_javascript"
19
+ times.times do
20
+ EscapeUtils.unescape_javascript(escaped_javascript)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,61 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'rack'
9
+ require 'erb'
10
+ require 'cgi'
11
+ require 'url_escape'
12
+ require 'fast_xs_extra'
13
+ require 'escape_utils'
14
+
15
+ times = 10_000
16
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
17
+ puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
+
19
+ Benchmark.bmbm do |x|
20
+ x.report do
21
+ puts "ERB::Util.url_encode"
22
+ times.times do
23
+ ERB::Util.url_encode(url)
24
+ end
25
+ end
26
+
27
+ x.report do
28
+ puts "Rack::Utils.escape"
29
+ times.times do
30
+ Rack::Utils.escape(url)
31
+ end
32
+ end
33
+
34
+ x.report do
35
+ puts "CGI.escape"
36
+ times.times do
37
+ CGI.escape(url)
38
+ end
39
+ end
40
+
41
+ x.report do
42
+ puts "URLEscape#escape"
43
+ times.times do
44
+ URLEscape.escape(url)
45
+ end
46
+ end
47
+
48
+ x.report do
49
+ puts "fast_xs_extra#fast_xs_url"
50
+ times.times do
51
+ url.fast_xs_url
52
+ end
53
+ end
54
+
55
+ x.report do
56
+ puts "EscapeUtils.escape_url"
57
+ times.times do
58
+ EscapeUtils.escape_url(url)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,54 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+
5
+ require 'rubygems'
6
+ require 'benchmark'
7
+
8
+ require 'rack'
9
+ require 'cgi'
10
+ require 'url_escape'
11
+ require 'fast_xs_extra'
12
+ require 'escape_utils'
13
+
14
+ times = 10_000
15
+ url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
16
+ escaped_url = EscapeUtils.escape_url(url)
17
+ puts "Escaping a #{url.bytesize} byte URL #{times} times"
18
+
19
+ Benchmark.bmbm do |x|
20
+ x.report do
21
+ puts "Rack::Utils.unescape"
22
+ times.times do
23
+ Rack::Utils.unescape(escaped_url)
24
+ end
25
+ end
26
+
27
+ x.report do
28
+ puts "CGI.unescape"
29
+ times.times do
30
+ CGI.unescape(escaped_url)
31
+ end
32
+ end
33
+
34
+ x.report do
35
+ puts "URLEscape#unescape"
36
+ times.times do
37
+ URLEscape.unescape(escaped_url)
38
+ end
39
+ end
40
+
41
+ x.report do
42
+ puts "fast_xs_extra#fast_uxs_cgi"
43
+ times.times do
44
+ url.fast_uxs_cgi
45
+ end
46
+ end
47
+
48
+ x.report do
49
+ puts "EscapeUtils.unescape_url"
50
+ times.times do
51
+ EscapeUtils.unescape_url(escaped_url)
52
+ end
53
+ end
54
+ end
data/escape_utils.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{escape_utils}
8
- s.version = "0.1.4"
8
+ s.version = "0.1.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brian Lopez"]
12
- s.date = %q{2010-06-09}
12
+ s.date = %q{2010-07-13}
13
13
  s.email = %q{seniorlopez@gmail.com}
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,9 @@ Gem::Specification.new do |s|
25
25
  "benchmark/html_escape.rb",
26
26
  "benchmark/html_unescape.rb",
27
27
  "benchmark/javascript_escape.rb",
28
+ "benchmark/javascript_unescape.rb",
29
+ "benchmark/url_escape.rb",
30
+ "benchmark/url_unescape.rb",
28
31
  "escape_utils.gemspec",
29
32
  "ext/escape_utils.c",
30
33
  "ext/extconf.rb",
@@ -33,31 +36,43 @@ Gem::Specification.new do |s|
33
36
  "lib/escape_utils/html/erb.rb",
34
37
  "lib/escape_utils/html/haml.rb",
35
38
  "lib/escape_utils/html/rack.rb",
39
+ "lib/escape_utils/html_safety.rb",
36
40
  "lib/escape_utils/javascript/action_view.rb",
41
+ "lib/escape_utils/url/cgi.rb",
42
+ "lib/escape_utils/url/erb.rb",
43
+ "lib/escape_utils/url/rack.rb",
37
44
  "spec/html/escape_spec.rb",
38
45
  "spec/html/unescape_spec.rb",
46
+ "spec/html_safety_spec.rb",
39
47
  "spec/javascript/escape_spec.rb",
48
+ "spec/javascript/unescape_spec.rb",
40
49
  "spec/rcov.opts",
41
50
  "spec/spec.opts",
42
- "spec/spec_helper.rb"
51
+ "spec/spec_helper.rb",
52
+ "spec/url/escape_spec.rb",
53
+ "spec/url/unescape_spec.rb"
43
54
  ]
44
55
  s.homepage = %q{http://github.com/brianmario/escape_utils}
45
56
  s.rdoc_options = ["--charset=UTF-8"]
46
57
  s.require_paths = ["lib", "ext"]
47
- s.rubygems_version = %q{1.3.6}
58
+ s.rubygems_version = %q{1.3.7}
48
59
  s.summary = %q{Faster string escaping routines for your web apps}
49
60
  s.test_files = [
50
61
  "spec/html/escape_spec.rb",
51
62
  "spec/html/unescape_spec.rb",
63
+ "spec/html_safety_spec.rb",
52
64
  "spec/javascript/escape_spec.rb",
53
- "spec/spec_helper.rb"
65
+ "spec/javascript/unescape_spec.rb",
66
+ "spec/spec_helper.rb",
67
+ "spec/url/escape_spec.rb",
68
+ "spec/url/unescape_spec.rb"
54
69
  ]
55
70
 
56
71
  if s.respond_to? :specification_version then
57
72
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
58
73
  s.specification_version = 3
59
74
 
60
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
75
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
61
76
  else
62
77
  end
63
78
  else
data/ext/escape_utils.c CHANGED
@@ -4,93 +4,234 @@
4
4
  static rb_encoding *utf8Encoding;
5
5
  #endif
6
6
 
7
- #define APPEND_BUFFER(escape, len, scoot_by) \
8
- memcpy(&out[total], &in[offset], i-offset); \
9
- total += i-offset; \
10
- offset = i+scoot_by; \
11
- memcpy(&out[total], escape, len); \
12
- total += len; \
7
+ #define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
8
+ #define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
9
+ #define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
13
10
 
14
11
  static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
15
- size_t i = 0, offset = 0, total = 0;
16
-
17
- for(;i<in_len;i++) {
18
- switch(in[i]) {
19
- case '&': APPEND_BUFFER("&amp;", 5, 1); break;
20
- case '<': APPEND_BUFFER("&lt;", 4, 1); break;
21
- case '>': APPEND_BUFFER("&gt;", 4, 1); break;
22
- case '\'': APPEND_BUFFER("&#39;", 5, 1); break;
23
- case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
12
+ size_t total = 0;
13
+ unsigned char curChar;
14
+
15
+ total = in_len;
16
+ while (in_len) {
17
+ curChar = *in++;
18
+ switch (curChar) {
19
+ case '<':
20
+ *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
21
+ total += 3;
22
+ break;
23
+ case '>':
24
+ *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
25
+ total += 3;
26
+ break;
27
+ case '&':
28
+ *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
29
+ total += 4;
30
+ break;
31
+ case '\'':
32
+ *out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
33
+ total += 4;
34
+ break;
35
+ case '\"':
36
+ *out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
37
+ total += 5;
38
+ break;
39
+ case '/':
40
+ *out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
41
+ total += 4;
42
+ break;
43
+ default:
44
+ *out++ = curChar;
45
+ break;
24
46
  }
47
+ in_len--;
25
48
  }
26
49
 
27
- // append the rest of the buffer
28
- memcpy(&out[total], &in[offset], i-offset);
29
-
30
- return total + (i-offset);
50
+ return total;
31
51
  }
32
52
 
33
53
  static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
34
- size_t i = 0, offset = 0, total = 0;
35
-
36
- for(;i<in_len;i++) {
37
- if(in[i] == '&') {
38
- if (i+3 <= in_len) {
39
- if (memcmp(&in[i], "&lt;", 4) == 0) {
40
- APPEND_BUFFER("<", 1, 4);
41
- } else if (memcmp(&in[i], "&gt;", 4) == 0) {
42
- APPEND_BUFFER(">", 1, 4);
43
- }
54
+ size_t total = 0, len = in_len;
55
+ unsigned char curChar, *start;
56
+
57
+ start = (unsigned char *)&in[0];
58
+ total = in_len;
59
+ while (len) {
60
+ curChar = *in++;
61
+ if (curChar == '&') {
62
+ if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
63
+ *out++ = '<';
64
+ total-=3;
65
+ in+=3;
66
+ len-=3;
67
+ } else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
68
+ *out++ = '>';
69
+ total-=3;
70
+ in+=3;
71
+ len-=3;
72
+ } else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
73
+ *out++ = '&';
74
+ total-=4;
75
+ in+=4;
76
+ len-=4;
77
+ } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
78
+ *out++ = '\'';
79
+ total-=4;
80
+ in+=4;
81
+ len-=4;
82
+ } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
83
+ *out++ = '/';
84
+ total-=4;
85
+ in+=4;
86
+ len-=4;
87
+ } else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
88
+ *out++ = '\"';
89
+ total-=5;
90
+ in+=5;
91
+ len-=5;
44
92
  }
45
- if (i+4 <= in_len) {
46
- if (memcmp(&in[i], "&amp;", 5) == 0) {
47
- APPEND_BUFFER("&", 1, 5);
48
- } else if (memcmp(&in[i], "&#39;", 5) == 0) {
49
- APPEND_BUFFER("\'", 1, 5);
50
- }
93
+ } else {
94
+ *out++ = curChar;
95
+ }
96
+ len--;
97
+ }
98
+
99
+ return total;
100
+ }
101
+
102
+ static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
103
+ size_t total = 0;
104
+ unsigned char curChar;
105
+
106
+ total = in_len;
107
+ while (in_len) {
108
+ curChar = *in++;
109
+ switch (curChar) {
110
+ case '\\':
111
+ *out++ = '\\'; *out++ = '\\';
112
+ total++;
113
+ break;
114
+ case '<':
115
+ if (*in == '/') {
116
+ *out++ = '<'; *out++ = '\\'; *out++ = '/';
117
+ in++; in_len--;
118
+ total++;
51
119
  }
52
- if (i+5 <= in_len) {
53
- if (memcmp(&in[i], "&quot;", 6) == 0) {
54
- APPEND_BUFFER("\"", 1, 6);
55
- }
120
+ break;
121
+ case '\r':
122
+ if (*in == '\n') {
123
+ *out++ = '\\'; *out++ = 'n';
124
+ in++; in_len--;
125
+ } else {
126
+ *out++ = '\\'; *out++ = 'n';
127
+ total++;
56
128
  }
129
+ break;
130
+ case '\n':
131
+ *out++ = '\\'; *out++ = 'n';
132
+ total++;
133
+ break;
134
+ case '\'':
135
+ *out++ = '\\'; *out++ = '\'';
136
+ total++;
137
+ break;
138
+ case '\"':
139
+ *out++ = '\\'; *out++ = '\"';
140
+ total++;
141
+ break;
142
+ default:
143
+ *out++ = curChar;
144
+ break;
57
145
  }
146
+ in_len--;
58
147
  }
59
148
 
60
- // append the rest of the buffer
61
- memcpy(&out[total], &in[offset], i-offset);
149
+ return total;
150
+ }
62
151
 
63
- return total + (i-offset);
152
+ static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
153
+ size_t total = 0;
154
+ unsigned char curChar;
155
+
156
+ total = in_len;
157
+ while (in_len) {
158
+ curChar = *in++;
159
+ if (curChar == '\\') {
160
+ if (*in == 'n') {
161
+ *out++ = '\n';
162
+ total--;
163
+ } else if (*in == '\\') {
164
+ *out++ = '\\';
165
+ total--;
166
+ } else if (*in == '\'') {
167
+ *out++ = '\'';
168
+ total--;
169
+ } else if (*in == '\"') {
170
+ *out++ = '\"';
171
+ total--;
172
+ } else if (*in == '/') {
173
+ *out++ = '/';
174
+ total--;
175
+ } else {
176
+ *out++ = curChar;
177
+ }
178
+ in++; in_len--;
179
+ } else {
180
+ *out++ = curChar;
181
+ }
182
+ in_len--;
183
+ }
184
+
185
+ return total;
64
186
  }
65
187
 
66
- static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
67
- size_t i = 0, offset = 0, total = 0;
68
-
69
- for(;i<in_len;i++) {
70
- switch(in[i]) {
71
- case '\\': APPEND_BUFFER("\\\\", 2, 1); break;
72
- case '<':
73
- if (i+1 <= in_len && in[i+1] == '/') {
74
- APPEND_BUFFER("<\\/", 3, 2);
75
- }
76
- break;
77
- case '\r':
78
- if (i+1 <= in_len && in[i+1] == '\n') {
79
- APPEND_BUFFER("\\n", 2, 1);
80
- } else {
81
- APPEND_BUFFER("\\n", 2, 1);
82
- }
83
- break;
84
- case '\n': APPEND_BUFFER("\\n", 2, 1); break;
85
- case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
86
- case '\'': APPEND_BUFFER("\\'", 2, 1); break;
188
+ static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
189
+ size_t total = 0;
190
+ unsigned char curChar, hex[2];
191
+ const unsigned char hexChars[16] = "0123456789ABCDEF";
192
+
193
+ total = in_len;
194
+ while (in_len) {
195
+ curChar = *in++;
196
+ if (curChar == ' ') {
197
+ *out++ = '+';
198
+ } else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
199
+ hex[1] = hexChars[curChar & 0x0f];
200
+ hex[0] = hexChars[(curChar >> 4) & 0x0f];
201
+ *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
202
+ total += 2;
203
+ } else {
204
+ *out++ = curChar;
87
205
  }
206
+ in_len--;
88
207
  }
89
208
 
90
- // append the rest of the buffer
91
- memcpy(&out[total], &in[offset], i-offset);
209
+ return total;
210
+ }
211
+
212
+ static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
213
+ size_t total = 0, len = in_len;
214
+ unsigned char curChar, *start;
215
+
216
+ start = (unsigned char *)&in[0];
217
+ total = in_len;
218
+ while (len) {
219
+ curChar = *in++;
220
+ if (curChar == '%') {
221
+ if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
222
+ *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
223
+ in+=2;
224
+ total-=2;
225
+ }
226
+ } else if (curChar == '+') {
227
+ *out++ = ' ';
228
+ } else {
229
+ *out++ = curChar;
230
+ }
231
+ len--;
232
+ }
92
233
 
93
- return total + (i-offset);
234
+ return total;
94
235
  }
95
236
 
96
237
  static VALUE rb_escape_html(VALUE self, VALUE str) {
@@ -202,15 +343,130 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
202
343
  return rb_output_buf;
203
344
  }
204
345
 
346
+ static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
347
+ if (str == Qnil) {
348
+ return rb_str_new2("");
349
+ }
350
+
351
+ Check_Type(str, T_STRING);
352
+
353
+ VALUE rb_output_buf;
354
+ #ifdef HAVE_RUBY_ENCODING_H
355
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
356
+ rb_encoding *original_encoding = rb_enc_get(str);
357
+ #endif
358
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
359
+ size_t len = RSTRING_LEN(str), new_len = 0;
360
+
361
+ // this is the max size the string could be
362
+ // TODO: we should try to be more intelligent about this
363
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
364
+
365
+ // perform our escape, returning the new string's length
366
+ new_len = unescape_javascript(outBuf, inBuf, len);
367
+
368
+ // create our new ruby string
369
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
370
+
371
+ // free the temporary C string
372
+ free(outBuf);
373
+
374
+ #ifdef HAVE_RUBY_ENCODING_H
375
+ rb_enc_associate(rb_output_buf, original_encoding);
376
+ if (default_internal_enc) {
377
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
378
+ } else {
379
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
380
+ }
381
+ #endif
382
+ return rb_output_buf;
383
+ }
384
+
385
+ static VALUE rb_escape_url(VALUE self, VALUE str) {
386
+ Check_Type(str, T_STRING);
387
+
388
+ VALUE rb_output_buf;
389
+ #ifdef HAVE_RUBY_ENCODING_H
390
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
391
+ rb_encoding *original_encoding = rb_enc_get(str);
392
+ #endif
393
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
394
+ size_t len = RSTRING_LEN(str), new_len = 0;
395
+
396
+ // this is the max size the string could be
397
+ // TODO: we should try to be more intelligent about this
398
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
399
+
400
+ // perform our escape, returning the new string's length
401
+ new_len = escape_url(outBuf, inBuf, len);
402
+
403
+ // create our new ruby string
404
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
405
+
406
+ // free the temporary C string
407
+ free(outBuf);
408
+
409
+ #ifdef HAVE_RUBY_ENCODING_H
410
+ rb_enc_associate(rb_output_buf, original_encoding);
411
+ if (default_internal_enc) {
412
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
413
+ } else {
414
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
415
+ }
416
+ #endif
417
+ return rb_output_buf;
418
+ }
419
+
420
+ static VALUE rb_unescape_url(VALUE self, VALUE str) {
421
+ Check_Type(str, T_STRING);
422
+
423
+ VALUE rb_output_buf;
424
+ #ifdef HAVE_RUBY_ENCODING_H
425
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
426
+ rb_encoding *original_encoding = rb_enc_get(str);
427
+ #endif
428
+ unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
429
+ size_t len = RSTRING_LEN(str), new_len = 0;
430
+
431
+ // this is the max size the string could be
432
+ // TODO: we should try to be more intelligent about this
433
+ unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
434
+
435
+ // perform our escape, returning the new string's length
436
+ new_len = unescape_url(outBuf, inBuf, len);
437
+
438
+ // create our new ruby string
439
+ rb_output_buf = rb_str_new((char *)outBuf, new_len);
440
+
441
+ // free the temporary C string
442
+ free(outBuf);
443
+
444
+ #ifdef HAVE_RUBY_ENCODING_H
445
+ rb_enc_associate(rb_output_buf, original_encoding);
446
+ if (default_internal_enc) {
447
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
448
+ } else {
449
+ rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
450
+ }
451
+ #endif
452
+ return rb_output_buf;
453
+ }
454
+
205
455
  /* Ruby Extension initializer */
206
456
  void Init_escape_utils_ext() {
207
457
  VALUE mEscape = rb_define_module("EscapeUtils");
208
- rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
209
- rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
210
- rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
211
- rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
212
- rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
213
- rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
458
+ rb_define_method(mEscape, "escape_html", rb_escape_html, 1);
459
+ rb_define_module_function(mEscape, "escape_html", rb_escape_html, 1);
460
+ rb_define_method(mEscape, "unescape_html", rb_unescape_html, 1);
461
+ rb_define_module_function(mEscape, "unescape_html", rb_unescape_html, 1);
462
+ rb_define_method(mEscape, "escape_javascript", rb_escape_javascript, 1);
463
+ rb_define_module_function(mEscape, "escape_javascript", rb_escape_javascript, 1);
464
+ rb_define_method(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
465
+ rb_define_module_function(mEscape, "unescape_javascript", rb_unescape_javascript, 1);
466
+ rb_define_method(mEscape, "escape_url", rb_escape_url, 1);
467
+ rb_define_module_function(mEscape, "escape_url", rb_escape_url, 1);
468
+ rb_define_method(mEscape, "unescape_url", rb_unescape_url, 1);
469
+ rb_define_module_function(mEscape, "unescape_url", rb_unescape_url, 1);
214
470
 
215
471
  #ifdef HAVE_RUBY_ENCODING_H
216
472
  utf8Encoding = rb_utf8_encoding();