RubyGems - escape_utils - Versions diffs - 0.1.4 → 0.1.5 - Mend

escape_utils 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

data/.gitignore +2 -1
data/CHANGELOG.md +5 -0
data/README.rdoc +51 -13
data/VERSION +1 -1
data/benchmark/html_escape.rb +20 -4
data/benchmark/html_unescape.rb +2 -2
data/benchmark/javascript_escape.rb +2 -4
data/benchmark/javascript_unescape.rb +23 -0
data/benchmark/url_escape.rb +61 -0
data/benchmark/url_unescape.rb +54 -0
data/escape_utils.gemspec +21 -6
data/ext/escape_utils.c +328 -72
data/lib/escape_utils.rb +3 -1
data/lib/escape_utils/html/cgi.rb +8 -2
data/lib/escape_utils/html/erb.rb +2 -3
data/lib/escape_utils/html/haml.rb +3 -3
data/lib/escape_utils/html/rack.rb +4 -6
data/lib/escape_utils/html_safety.rb +19 -0
data/lib/escape_utils/url/cgi.rb +10 -0
data/lib/escape_utils/url/erb.rb +12 -0
data/lib/escape_utils/url/rack.rb +14 -0
data/spec/html/escape_spec.rb +4 -4
data/spec/html/unescape_spec.rb +4 -4
data/spec/html_safety_spec.rb +49 -0
data/spec/javascript/escape_spec.rb +1 -1
data/spec/javascript/unescape_spec.rb +39 -0
data/spec/url/escape_spec.rb +52 -0
data/spec/url/unescape_spec.rb +52 -0
metadata +24 -4

data/.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@ Makefile
 *.o
 *.bundle
 pkg/*
-doc/*
+doc/*
+*.rbc

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 # Changelog
+## 0.1.5 (July 13th, 2010)
+* add URL escaping and unescaping
+* major refactor of HTML and Javascript escaping and unescaping logic for a decent speed up
+* HTML escaping now takes html_safe? into account (for Rails/ActiveSupport users) - thanks yury!
 ## 0.1.4 (June 9th, 2010)
 * ensure strings are passed in from monkey-patches

data/README.rdoc CHANGED Viewed

@@ -47,8 +47,8 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
 == Benchmarks
-In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
-While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
+In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
+While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
 Escaping Javascript is around 16-30x faster.
 This output is from my laptop using the benchmark scripts in the benchmarks folder.
@@ -58,28 +58,66 @@ This output is from my laptop using the benchmark scripts in the benchmarks fold
 ==== Escaping
  Rack::Utils.escape_html
-  0.560000   0.040000   0.600000 (  0.589475)
+  9.650000   0.090000   9.740000 (  9.750756)
+ Haml::Helpers.html_escape
+  9.310000   0.110000   9.420000 (  9.417317)
  ERB::Util.html_escape
-  0.450000   0.040000   0.490000 (  0.492893)
+  5.330000   0.390000   5.720000 (  5.748394)
  CGI.escapeHTML
-  0.460000   0.030000   0.490000 (  0.490171)
- Haml::Helpers.html_escape
-  0.430000   0.010000   0.440000 (  0.444694)
+  5.370000   0.380000   5.750000 (  5.791344)
+ FasterHTMLEscape.html_escape
+  0.520000   0.010000   0.530000 (  0.539485)
+ fast_xs_extra#fast_xs_html
+  0.310000   0.030000   0.340000 (  0.336734)
  EscapeUtils.escape_html
-  0.050000   0.010000   0.060000 (  0.054799)
+  0.200000   0.050000   0.250000 (  0.258839)
-=== Unescaping
+==== Unescaping
  CGI.unescapeHTML
-  1.140000   0.010000   1.150000 (  1.148470)
+  16.520000   0.080000  16.600000 ( 16.853888)
  EscapeUtils.unescape_html
-  0.040000   0.000000   0.040000 (  0.046166)
+  0.120000   0.040000   0.160000  (  0.162696)
 === Javascript
 ==== Escaping
  ActionView::Helpers::JavaScriptHelper#escape_javascript
-  2.000000   0.020000   2.020000 (  2.023047)
+  3.810000   0.100000   3.910000 (  3.925557)
  EscapeUtils.escape_javascript
-  0.110000   0.010000   0.120000 (  0.121761)
+  0.200000   0.040000   0.240000 (  0.236692)
+==== Unescaping
+I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
+=== URL
+==== Escaping
+ ERB::Util.url_encode
+  0.520000   0.010000   0.530000 (  0.529277)
+ Rack::Utils.escape
+  0.460000   0.010000   0.470000 (  0.466962)
+ CGI.escape
+  0.440000   0.000000   0.440000 (  0.443017)
+ URLEscape#escape
+  0.040000   0.000000   0.040000 (  0.045661)
+ fast_xs_extra#fast_xs_url
+  0.010000   0.000000   0.010000 (  0.015429)
+ EscapeUtils.escape_url
+  0.010000   0.000000   0.010000 (  0.010843)
+==== Unescaping
+ Rack::Utils.unescape
+  0.250000   0.010000   0.260000 (  0.257558)
+ CGI.unescape
+  0.250000   0.000000   0.250000 (  0.257837)
+ URLEscape#unescape
+  0.040000   0.000000   0.040000 (  0.031548)
+ fast_xs_extra#fast_uxs_cgi
+  0.010000   0.000000   0.010000 (  0.006062)
+ EscapeUtils.unescape_url
+  0.000000   0.000000   0.000000 (  0.005679)

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.4
1	+ 0.1.5

data/benchmark/html_escape.rb CHANGED Viewed

@@ -9,6 +9,8 @@ require 'rack'
 require 'erb'
 require 'cgi'
 require 'haml'
+require 'fast_xs_extra'
+require 'faster_html_escape'
 require 'escape_utils'
 module HamlBench
@@ -16,9 +18,9 @@ module HamlBench
 end
 times = 100
-url = "http://maps.google.com"
+url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
 html = `curl -s #{url}`
-puts "Escaping #{html.bytesize} bytes of html from #{url}"
+puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
 Benchmark.bmbm do |x|
   x.report do
@@ -28,6 +30,13 @@ Benchmark.bmbm do |x|
     end
   end
+  x.report do
+    puts "Haml::Helpers.html_escape"
+    times.times do
+      HamlBench.html_escape(html)
+    end
+  end
   x.report do
     puts "ERB::Util.html_escape"
     times.times do
@@ -43,9 +52,16 @@ Benchmark.bmbm do |x|
   end
   x.report do
-    puts "Haml::Helpers.html_escape"
+    puts "FasterHTMLEscape.html_escape"
     times.times do
-      HamlBench.html_escape(html)
+      FasterHTMLEscape.html_escape(html)
+    end
+  end
+  x.report do
+    puts "fast_xs_extra#fast_xs_html"
+    times.times do
+      html.fast_xs_html
     end
   end

data/benchmark/html_unescape.rb CHANGED Viewed

@@ -14,10 +14,10 @@ module HamlBench
 end
 times = 100
-url = "http://maps.google.com"
+url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
 html = `curl -s #{url}`
 escaped_html = EscapeUtils.escape_html(html)
-puts "Unescaping #{escaped_html.bytesize} bytes of escaped html from #{url}"
+puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"
 Benchmark.bmbm do |x|
   x.report do

data/benchmark/javascript_escape.rb CHANGED Viewed

@@ -13,11 +13,9 @@ class ActionPackBench
 end
 times = 100
-url = "http://code.jquery.com/jquery-1.4.2.js"
+url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
 javascript = `curl -s #{url}`
-puts "Escaping #{javascript.bytesize} bytes of javascript from #{url}"
-puts ActionPackBench.escape_javascript(javascript).eql?(EscapeUtils.escape_javascript(javascript))
+puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
 Benchmark.bmbm do |x|
   x.report do

data/benchmark/javascript_unescape.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# encoding: utf-8
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'rubygems'
+require 'benchmark'
+require 'escape_utils'
+times = 100
+url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
+javascript = `curl -s #{url}`
+escaped_javascript = EscapeUtils.escape_javascript(javascript)
+puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"
+Benchmark.bmbm do |x|
+  x.report do
+    puts "EscapeUtils.escape_javascript"
+    times.times do
+      EscapeUtils.unescape_javascript(escaped_javascript)
+    end
+  end
+end

data/benchmark/url_escape.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# encoding: utf-8
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'rubygems'
+require 'benchmark'
+require 'rack'
+require 'erb'
+require 'cgi'
+require 'url_escape'
+require 'fast_xs_extra'
+require 'escape_utils'
+times = 10_000
+url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
+puts "Escaping a #{url.bytesize} byte URL #{times} times"
+Benchmark.bmbm do |x|
+  x.report do
+    puts "ERB::Util.url_encode"
+    times.times do
+      ERB::Util.url_encode(url)
+    end
+  end
+  x.report do
+    puts "Rack::Utils.escape"
+    times.times do
+      Rack::Utils.escape(url)
+    end
+  end
+  x.report do
+    puts "CGI.escape"
+    times.times do
+      CGI.escape(url)
+    end
+  end
+  x.report do
+    puts "URLEscape#escape"
+    times.times do
+      URLEscape.escape(url)
+    end
+  end
+  x.report do
+    puts "fast_xs_extra#fast_xs_url"
+    times.times do
+      url.fast_xs_url
+    end
+  end
+  x.report do
+    puts "EscapeUtils.escape_url"
+    times.times do
+      EscapeUtils.escape_url(url)
+    end
+  end
+end

data/benchmark/url_unescape.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# encoding: utf-8
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/..')
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'rubygems'
+require 'benchmark'
+require 'rack'
+require 'cgi'
+require 'url_escape'
+require 'fast_xs_extra'
+require 'escape_utils'
+times = 10_000
+url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
+escaped_url = EscapeUtils.escape_url(url)
+puts "Escaping a #{url.bytesize} byte URL #{times} times"
+Benchmark.bmbm do |x|
+  x.report do
+    puts "Rack::Utils.unescape"
+    times.times do
+      Rack::Utils.unescape(escaped_url)
+    end
+  end
+  x.report do
+    puts "CGI.unescape"
+    times.times do
+      CGI.unescape(escaped_url)
+    end
+  end
+  x.report do
+    puts "URLEscape#unescape"
+    times.times do
+      URLEscape.unescape(escaped_url)
+    end
+  end
+  x.report do
+    puts "fast_xs_extra#fast_uxs_cgi"
+    times.times do
+      url.fast_uxs_cgi
+    end
+  end
+  x.report do
+    puts "EscapeUtils.unescape_url"
+    times.times do
+      EscapeUtils.unescape_url(escaped_url)
+    end
+  end
+end

data/escape_utils.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{escape_utils}
-  s.version = "0.1.4"
+  s.version = "0.1.5"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Brian Lopez"]
-  s.date = %q{2010-06-09}
+  s.date = %q{2010-07-13}
   s.email = %q{seniorlopez@gmail.com}
   s.extensions = ["ext/extconf.rb"]
   s.extra_rdoc_files = [
@@ -25,6 +25,9 @@ Gem::Specification.new do |s|
      "benchmark/html_escape.rb",
      "benchmark/html_unescape.rb",
      "benchmark/javascript_escape.rb",
+     "benchmark/javascript_unescape.rb",
+     "benchmark/url_escape.rb",
+     "benchmark/url_unescape.rb",
      "escape_utils.gemspec",
      "ext/escape_utils.c",
      "ext/extconf.rb",
@@ -33,31 +36,43 @@ Gem::Specification.new do |s|
      "lib/escape_utils/html/erb.rb",
      "lib/escape_utils/html/haml.rb",
      "lib/escape_utils/html/rack.rb",
+     "lib/escape_utils/html_safety.rb",
      "lib/escape_utils/javascript/action_view.rb",
+     "lib/escape_utils/url/cgi.rb",
+     "lib/escape_utils/url/erb.rb",
+     "lib/escape_utils/url/rack.rb",
      "spec/html/escape_spec.rb",
      "spec/html/unescape_spec.rb",
+     "spec/html_safety_spec.rb",
      "spec/javascript/escape_spec.rb",
+     "spec/javascript/unescape_spec.rb",
      "spec/rcov.opts",
      "spec/spec.opts",
-     "spec/spec_helper.rb"
+     "spec/spec_helper.rb",
+     "spec/url/escape_spec.rb",
+     "spec/url/unescape_spec.rb"
   ]
   s.homepage = %q{http://github.com/brianmario/escape_utils}
   s.rdoc_options = ["--charset=UTF-8"]
   s.require_paths = ["lib", "ext"]
-  s.rubygems_version = %q{1.3.6}
+  s.rubygems_version = %q{1.3.7}
   s.summary = %q{Faster string escaping routines for your web apps}
   s.test_files = [
     "spec/html/escape_spec.rb",
      "spec/html/unescape_spec.rb",
+     "spec/html_safety_spec.rb",
      "spec/javascript/escape_spec.rb",
-     "spec/spec_helper.rb"
+     "spec/javascript/unescape_spec.rb",
+     "spec/spec_helper.rb",
+     "spec/url/escape_spec.rb",
+     "spec/url/unescape_spec.rb"
   ]
   if s.respond_to? :specification_version then
     current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
     s.specification_version = 3
-    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
     else
     end
   else

data/ext/escape_utils.c CHANGED Viewed

@@ -4,93 +4,234 @@
 static rb_encoding *utf8Encoding;
 #endif
-#define APPEND_BUFFER(escape, len, scoot_by)  \
-  memcpy(&out[total], &in[offset], i-offset); \
-  total += i-offset;                          \
-  offset = i+scoot_by;                        \
-  memcpy(&out[total], escape, len);           \
-  total += len;                               \
+#define IS_HEX(c) (c >= 48 || c <= 57) && (c >= 65 || c <= 70) && (c >= 97 || c <= 102)
+#define NOT_HEX(c) (c < 48 || c > 57) && (c < 65 || c > 90) && (c < 97 || c > 122)
+#define UNHEX(c) (c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)
 static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
-  size_t i = 0, offset = 0, total = 0;
-  for(;i<in_len;i++) {
-    switch(in[i]) {
-      case '&':  APPEND_BUFFER("&amp;",  5, 1); break;
-      case '<':  APPEND_BUFFER("&lt;",   4, 1); break;
-      case '>':  APPEND_BUFFER("&gt;",   4, 1); break;
-      case '\'': APPEND_BUFFER("&#39;",  5, 1); break;
-      case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
+  size_t total = 0;
+  unsigned char curChar;
+  total = in_len;
+  while (in_len) {
+    curChar = *in++;
+    switch (curChar) {
+    case '<':
+      *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';';
+      total += 3;
+      break;
+    case '>':
+      *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';';
+      total += 3;
+      break;
+    case '&':
+      *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';';
+      total += 4;
+      break;
+    case '\'':
+      *out++ = '&'; *out++ = '#'; *out++ = '3'; *out++ = '9'; *out++ = ';';
+      total += 4;
+      break;
+    case '\"':
+      *out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';';
+      total += 5;
+      break;
+    case '/':
+      *out++ = '&'; *out++ = '#'; *out++ = '4'; *out++ = '7'; *out++ = ';';
+      total += 4;
+      break;
+    default:
+      *out++ = curChar;
+      break;
     }
+    in_len--;
   }
-  // append the rest of the buffer
-  memcpy(&out[total], &in[offset], i-offset);
-  return total + (i-offset);
+  return total;
 }
 static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
-  size_t i = 0, offset = 0, total = 0;
-  for(;i<in_len;i++) {
-    if(in[i] == '&') {
-      if (i+3 <= in_len) {
-        if (memcmp(&in[i], "&lt;", 4) == 0) {
-          APPEND_BUFFER("<", 1, 4);
-        } else if (memcmp(&in[i], "&gt;", 4) == 0) {
-          APPEND_BUFFER(">", 1, 4);
-        }
+  size_t total = 0, len = in_len;
+  unsigned char curChar, *start;
+  start = (unsigned char *)&in[0];
+  total = in_len;
+  while (len) {
+    curChar = *in++;
+    if (curChar == '&') {
+      if ((in-start)+2 <= in_len && *in == 'l' && *(in+1) == 't' && *(in+2) == ';') {
+        *out++ = '<';
+        total-=3;
+        in+=3;
+        len-=3;
+      } else if ((in-start)+2 <= in_len && *in == 'g' && *(in+1) == 't' && *(in+2) == ';') {
+        *out++ = '>';
+        total-=3;
+        in+=3;
+        len-=3;
+      } else if ((in-start)+3 <= in_len && *in == 'a' && *(in+1) == 'm' && *(in+2) == 'p' && *(in+3) == ';') {
+        *out++ = '&';
+        total-=4;
+        in+=4;
+        len-=4;
+      } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '3' && *(in+2) == '9' && *(in+3) == ';') {
+        *out++ = '\'';
+        total-=4;
+        in+=4;
+        len-=4;
+      } else if ((in-start)+3 <= in_len && *in == '#' && *(in+1) == '4' && *(in+2) == '7' && *(in+3) == ';') {
+        *out++ = '/';
+        total-=4;
+        in+=4;
+        len-=4;
+      } else if ((in-start)+4 <= in_len && *in == 'q' && *(in+1) == 'u' && *(in+2) == 'o' && *(in+3) == 't' && *(in+4) == ';') {
+        *out++ = '\"';
+        total-=5;
+        in+=5;
+        len-=5;
       }
-      if (i+4 <= in_len) {
-        if (memcmp(&in[i], "&amp;", 5) == 0) {
-          APPEND_BUFFER("&", 1, 5);
-        } else if (memcmp(&in[i], "&#39;", 5) == 0) {
-          APPEND_BUFFER("\'", 1, 5);
-        }
+    } else {
+      *out++ = curChar;
+    }
+    len--;
+  }
+  return total;
+}
+static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
+  size_t total = 0;
+  unsigned char curChar;
+  total = in_len;
+  while (in_len) {
+    curChar = *in++;
+    switch (curChar) {
+    case '\\':
+      *out++ = '\\'; *out++ = '\\';
+      total++;
+      break;
+    case '<':
+      if (*in == '/') {
+        *out++ = '<'; *out++ = '\\'; *out++ = '/';
+        in++; in_len--;
+        total++;
       }
-      if (i+5 <= in_len) {
-        if (memcmp(&in[i], "&quot;", 6) == 0) {
-          APPEND_BUFFER("\"", 1, 6);
-        }
+      break;
+    case '\r':
+      if (*in == '\n') {
+        *out++ = '\\'; *out++ = 'n';
+        in++; in_len--;
+      } else {
+        *out++ = '\\'; *out++ = 'n';
+        total++;
       }
+      break;
+    case '\n':
+      *out++ = '\\'; *out++ = 'n';
+      total++;
+      break;
+    case '\'':
+      *out++ = '\\'; *out++ = '\'';
+      total++;
+      break;
+    case '\"':
+      *out++ = '\\'; *out++ = '\"';
+      total++;
+      break;
+    default:
+      *out++ = curChar;
+      break;
     }
+    in_len--;
   }
-  // append the rest of the buffer
-  memcpy(&out[total], &in[offset], i-offset);
+  return total;
+}
-  return total + (i-offset);
+static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
+  size_t total = 0;
+  unsigned char curChar;
+  total = in_len;
+  while (in_len) {
+    curChar = *in++;
+    if (curChar == '\\') {
+      if (*in == 'n') {
+        *out++ = '\n';
+        total--;
+      } else if (*in == '\\') {
+        *out++ = '\\';
+        total--;
+      } else if (*in == '\'') {
+        *out++ = '\'';
+        total--;
+      } else if (*in == '\"') {
+        *out++ = '\"';
+        total--;
+      } else if (*in == '/') {
+        *out++ = '/';
+        total--;
+      } else {
+        *out++ = curChar;
+      }
+      in++; in_len--;
+    } else {
+      *out++ = curChar;
+    }
+    in_len--;
+  }
+  return total;
 }
-static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
-  size_t i = 0, offset = 0, total = 0;
-  for(;i<in_len;i++) {
-    switch(in[i]) {
-      case '\\':  APPEND_BUFFER("\\\\", 2, 1); break;
-      case '<':
-        if (i+1 <= in_len && in[i+1] == '/') {
-          APPEND_BUFFER("<\\/", 3, 2);
-        }
-        break;
-      case '\r':
-        if (i+1 <= in_len && in[i+1] == '\n') {
-          APPEND_BUFFER("\\n", 2, 1);
-        } else {
-          APPEND_BUFFER("\\n", 2, 1);
-        }
-        break;
-      case '\n': APPEND_BUFFER("\\n", 2, 1); break;
-      case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
-      case '\'': APPEND_BUFFER("\\'", 2, 1); break;
+static size_t escape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
+  size_t total = 0;
+  unsigned char curChar, hex[2];
+  const unsigned char hexChars[16] = "0123456789ABCDEF";
+  total = in_len;
+  while (in_len) {
+    curChar = *in++;
+    if (curChar == ' ') {
+      *out++ = '+';
+    } else if ((curChar != '_' && curChar != '.' && curChar != '-') && NOT_HEX(curChar)) {
+      hex[1] = hexChars[curChar & 0x0f];
+      hex[0] = hexChars[(curChar >> 4) & 0x0f];
+      *out++ = '%'; *out++ = hex[0]; *out++ = hex[1];
+      total += 2;
+    } else {
+      *out++ = curChar;
     }
+    in_len--;
   }
-  // append the rest of the buffer
-  memcpy(&out[total], &in[offset], i-offset);
+  return total;
+}
+static size_t unescape_url(unsigned char *out, const unsigned char *in, size_t in_len) {
+  size_t total = 0, len = in_len;
+  unsigned char curChar, *start;
+  start = (unsigned char *)&in[0];
+  total = in_len;
+  while (len) {
+    curChar = *in++;
+    if (curChar == '%') {
+      if ((in-start)+2 <= in_len && IS_HEX(*in) && IS_HEX(*(in+1))) {
+        *out++ = (UNHEX(*in) << 4) + UNHEX(*(in+1));
+        in+=2;
+        total-=2;
+      }
+    } else if (curChar == '+') {
+      *out++ = ' ';
+    } else {
+      *out++ = curChar;
+    }
+    len--;
+  }
-  return total + (i-offset);
+  return total;
 }
 static VALUE rb_escape_html(VALUE self, VALUE str) {
@@ -202,15 +343,130 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
   return rb_output_buf;
 }
+static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
+  if (str == Qnil) {
+    return rb_str_new2("");
+  }
+  Check_Type(str, T_STRING);
+  VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
+  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
+  size_t len = RSTRING_LEN(str), new_len = 0;
+  // this is the max size the string could be
+  // TODO: we should try to be more intelligent about this
+  unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
+  // perform our escape, returning the new string's length
+  new_len = unescape_javascript(outBuf, inBuf, len);
+  // create our new ruby string
+  rb_output_buf = rb_str_new((char *)outBuf, new_len);
+  // free the temporary C string
+  free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
+  return rb_output_buf;
+}
+static VALUE rb_escape_url(VALUE self, VALUE str) {
+  Check_Type(str, T_STRING);
+  VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
+  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
+  size_t len = RSTRING_LEN(str), new_len = 0;
+  // this is the max size the string could be
+  // TODO: we should try to be more intelligent about this
+  unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*(len*3));
+  // perform our escape, returning the new string's length
+  new_len = escape_url(outBuf, inBuf, len);
+  // create our new ruby string
+  rb_output_buf = rb_str_new((char *)outBuf, new_len);
+  // free the temporary C string
+  free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
+  return rb_output_buf;
+}
+static VALUE rb_unescape_url(VALUE self, VALUE str) {
+  Check_Type(str, T_STRING);
+  VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
+  unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
+  size_t len = RSTRING_LEN(str), new_len = 0;
+  // this is the max size the string could be
+  // TODO: we should try to be more intelligent about this
+  unsigned char *outBuf = (unsigned char *)malloc(sizeof(unsigned char *)*len);
+  // perform our escape, returning the new string's length
+  new_len = unescape_url(outBuf, inBuf, len);
+  // create our new ruby string
+  rb_output_buf = rb_str_new((char *)outBuf, new_len);
+  // free the temporary C string
+  free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
+  return rb_output_buf;
+}
 /* Ruby Extension initializer */
 void Init_escape_utils_ext() {
   VALUE mEscape = rb_define_module("EscapeUtils");
-  rb_define_method(mEscape,           "escape_html",    rb_escape_html,   1);
-  rb_define_module_function(mEscape,  "escape_html",    rb_escape_html,   1);
-  rb_define_method(mEscape,           "unescape_html",  rb_unescape_html, 1);
-  rb_define_module_function(mEscape,  "unescape_html",  rb_unescape_html, 1);
-  rb_define_method(mEscape,           "escape_javascript",  rb_escape_javascript, 1);
-  rb_define_module_function(mEscape,  "escape_javascript",  rb_escape_javascript, 1);
+  rb_define_method(mEscape,           "escape_html",          rb_escape_html,   1);
+  rb_define_module_function(mEscape,  "escape_html",          rb_escape_html,   1);
+  rb_define_method(mEscape,           "unescape_html",        rb_unescape_html, 1);
+  rb_define_module_function(mEscape,  "unescape_html",        rb_unescape_html, 1);
+  rb_define_method(mEscape,           "escape_javascript",    rb_escape_javascript, 1);
+  rb_define_module_function(mEscape,  "escape_javascript",    rb_escape_javascript, 1);
+  rb_define_method(mEscape,           "unescape_javascript",  rb_unescape_javascript, 1);
+  rb_define_module_function(mEscape,  "unescape_javascript",  rb_unescape_javascript, 1);
+  rb_define_method(mEscape,           "escape_url",           rb_escape_url, 1);
+  rb_define_module_function(mEscape,  "escape_url",           rb_escape_url, 1);
+  rb_define_method(mEscape,           "unescape_url",         rb_unescape_url, 1);
+  rb_define_module_function(mEscape,  "unescape_url",         rb_unescape_url, 1);
 #ifdef HAVE_RUBY_ENCODING_H
   utf8Encoding = rb_utf8_encoding();