RubyGems - escape_utils - Versions diffs - 0.1.2 → 0.1.3 - Mend

escape_utils 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/CHANGELOG.md +4 -0
data/README.rdoc +6 -3
data/VERSION +1 -1
data/escape_utils.gemspec +2 -2
data/ext/escape_utils.c +72 -41
data/lib/escape_utils.rb +1 -1
data/spec/html/escape_spec.rb +14 -0
data/spec/html/unescape_spec.rb +14 -0
data/spec/javascript/escape_spec.rb +15 -1
metadata +3 -3

data/CHANGELOG.md CHANGED

@@ -1,5 +1,9 @@
 # Changelog
+## 0.1.3 (June 9th, 2010)
+* cleaned some code up, removing duplication
+* moved to a more flexible character encoding scheme using Encoding.defaut_internal for 1.9 users
 ## 0.1.2 (June 8th, 2010)
 * forgot to add the ActionView monkey patch for JS escaping ;)

data/README.rdoc CHANGED

@@ -2,7 +2,9 @@
 Being as though we're all html escaping everything these days, why not make it faster?
-At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon
+At the moment escape_utils supports escaping and unescaping of HTML, and Javascript but I wanna add URL encoding soon.
+For character encoding in 1.9, we'll return strings in whatever Encoding.default_internal is set to or utf-8 otherwise.
 It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
@@ -45,8 +47,9 @@ It has monkey-patches for Rack::Utils, CGI, ERB::Util and Haml and ActionView so
 == Benchmarks
-In my testing, escaping is around 10-20x faster than the pure ruby implementations in wide use today.
-While unescaping is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
+In my testing, escaping html is around 10-20x faster than the pure ruby implementations in wide use today.
+While unescaping html is around 20-40x faster than CGI.unescapeHTML - also pure ruby.
+Escaping Javascript is around 16-30x faster.
 This output is from my laptop using the benchmark scripts in the benchmarks folder.

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.2
1	+ 0.1.3

data/escape_utils.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{escape_utils}
-  s.version = "0.1.2"
+  s.version = "0.1.3"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Brian Lopez"]
-  s.date = %q{2010-06-08}
+  s.date = %q{2010-06-09}
   s.email = %q{seniorlopez@gmail.com}
   s.extensions = ["ext/extconf.rb"]
   s.extra_rdoc_files = [

data/ext/escape_utils.c CHANGED

@@ -1,4 +1,8 @@
 #include <ruby.h>
+#ifdef HAVE_RUBY_ENCODING_H
+#include <ruby/encoding.h>
+static rb_encoding *utf8Encoding;
+#endif
 #define APPEND_BUFFER(escape, len, scoot_by)  \
   memcpy(&out[total], &in[offset], i-offset); \
@@ -6,18 +10,17 @@
   offset = i+scoot_by;                        \
   memcpy(&out[total], escape, len);           \
   total += len;                               \
-  break;                                      \
 static size_t escape_html(unsigned char *out, const unsigned char *in, size_t in_len) {
   size_t i = 0, offset = 0, total = 0;
   for(;i<in_len;i++) {
     switch(in[i]) {
-      case '&':  APPEND_BUFFER("&amp;",  5, 1);
-      case '<':  APPEND_BUFFER("&lt;",   4, 1);
-      case '>':  APPEND_BUFFER("&gt;",   4, 1);
-      case '\'': APPEND_BUFFER("&#39;",  5, 1);
-      case '\"': APPEND_BUFFER("&quot;", 6, 1);
+      case '&':  APPEND_BUFFER("&amp;",  5, 1); break;
+      case '<':  APPEND_BUFFER("&lt;",   4, 1); break;
+      case '>':  APPEND_BUFFER("&gt;",   4, 1); break;
+      case '\'': APPEND_BUFFER("&#39;",  5, 1); break;
+      case '\"': APPEND_BUFFER("&quot;", 6, 1); break;
     }
   }
@@ -31,38 +34,26 @@ static size_t unescape_html(unsigned char *out, const unsigned char *in, size_t
   size_t i = 0, offset = 0, total = 0;
   for(;i<in_len;i++) {
-    switch(in[i]) {
-      case '&':
-        if (i+5 <= in_len) {
-          if (memcmp(&in[i], "&amp;", 5) == 0) {
-            APPEND_BUFFER("&", 1, 5);
-          } else if (memcmp(&in[i], "&lt;", 4) == 0) {
-            APPEND_BUFFER("<", 1, 4);
-          } else if (memcmp(&in[i], "&gt;", 4) == 0) {
-            APPEND_BUFFER(">", 1, 4);
-          } else if (memcmp(&in[i], "&#39;", 5) == 0) {
-            APPEND_BUFFER("\'", 1, 5);
-          } else if (memcmp(&in[i], "&quot;", 6) == 0) {
-            APPEND_BUFFER("\"", 1, 6);
-          }
-        } else if (i+4 <= in_len) {
-          if (memcmp(&in[i], "&amp;", 5) == 0) {
-            APPEND_BUFFER("&", 1, 5);
-          } else if (memcmp(&in[i], "&lt;", 4) == 0) {
-            APPEND_BUFFER("<", 1, 4);
-          } else if (memcmp(&in[i], "&gt;", 4) == 0) {
-            APPEND_BUFFER(">", 1, 4);
-          } else if (memcmp(&in[i], "&#39;", 5) == 0) {
-            APPEND_BUFFER("\'", 1, 5);
-          }
-        } else if (i+3 <= in_len) {
-          if (memcmp(&in[i], "&lt;", 4) == 0) {
-            APPEND_BUFFER("<", 1, 4);
-          } else if (memcmp(&in[i], "&gt;", 4) == 0) {
-            APPEND_BUFFER(">", 1, 4);
-          }
+    if(in[i] == '&') {
+      if (i+3 <= in_len) {
+        if (memcmp(&in[i], "&lt;", 4) == 0) {
+          APPEND_BUFFER("<", 1, 4);
+        } else if (memcmp(&in[i], "&gt;", 4) == 0) {
+          APPEND_BUFFER(">", 1, 4);
+        }
+      }
+      if (i+4 <= in_len) {
+        if (memcmp(&in[i], "&amp;", 5) == 0) {
+          APPEND_BUFFER("&", 1, 5);
+        } else if (memcmp(&in[i], "&#39;", 5) == 0) {
+          APPEND_BUFFER("\'", 1, 5);
         }
-      break;
+      }
+      if (i+5 <= in_len) {
+        if (memcmp(&in[i], "&quot;", 6) == 0) {
+          APPEND_BUFFER("\"", 1, 6);
+        }
+      }
     }
   }
@@ -77,7 +68,7 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
   for(;i<in_len;i++) {
     switch(in[i]) {
-      case '\\':  APPEND_BUFFER("\\\\", 2, 1);
+      case '\\':  APPEND_BUFFER("\\\\", 2, 1); break;
       case '<':
         if (i+1 <= in_len && in[i+1] == '/') {
           APPEND_BUFFER("<\\/", 3, 2);
@@ -90,9 +81,9 @@ static size_t escape_javascript(unsigned char *out, const unsigned char *in, siz
           APPEND_BUFFER("\\n", 2, 1);
         }
         break;
-      case '\n': APPEND_BUFFER("\\n", 2, 1);
-      case '\"': APPEND_BUFFER("\\\"", 2, 1);
-      case '\'': APPEND_BUFFER("\\'", 2, 1);
+      case '\n': APPEND_BUFFER("\\n", 2, 1); break;
+      case '\"': APPEND_BUFFER("\\\"", 2, 1); break;
+      case '\'': APPEND_BUFFER("\\'", 2, 1); break;
     }
   }
@@ -106,6 +97,10 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
   Check_Type(str, T_STRING);
   VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
   unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
   size_t len = RSTRING_LEN(str), new_len = 0;
@@ -122,6 +117,14 @@ static VALUE rb_escape_html(VALUE self, VALUE str) {
   // free the temporary C string
   free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
   return rb_output_buf;
 }
@@ -129,6 +132,10 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
   Check_Type(str, T_STRING);
   VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
   unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
   size_t len = RSTRING_LEN(str), new_len = 0;
@@ -145,6 +152,14 @@ static VALUE rb_unescape_html(VALUE self, VALUE str) {
   // free the temporary C string
   free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
   return rb_output_buf;
 }
@@ -156,6 +171,10 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
   Check_Type(str, T_STRING);
   VALUE rb_output_buf;
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_encoding *default_internal_enc = rb_default_internal_encoding();
+  rb_encoding *original_encoding = rb_enc_get(str);
+#endif
   unsigned char *inBuf = (unsigned char*)RSTRING_PTR(str);
   size_t len = RSTRING_LEN(str), new_len = 0;
@@ -172,6 +191,14 @@ static VALUE rb_escape_javascript(VALUE self, VALUE str) {
   // free the temporary C string
   free(outBuf);
+#ifdef HAVE_RUBY_ENCODING_H
+  rb_enc_associate(rb_output_buf, original_encoding);
+  if (default_internal_enc) {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, default_internal_enc);
+  } else {
+    rb_output_buf = rb_str_export_to_enc(rb_output_buf, utf8Encoding);
+  }
+#endif
   return rb_output_buf;
 }
@@ -184,4 +211,8 @@ void Init_escape_utils_ext() {
   rb_define_module_function(mEscape,  "unescape_html",  rb_unescape_html, 1);
   rb_define_method(mEscape,           "escape_javascript",  rb_escape_javascript, 1);
   rb_define_module_function(mEscape,  "escape_javascript",  rb_escape_javascript, 1);
+#ifdef HAVE_RUBY_ENCODING_H
+  utf8Encoding = rb_utf8_encoding();
+#endif
 }

data/lib/escape_utils.rb CHANGED

@@ -4,5 +4,5 @@ require 'escape_utils_ext'
 EscapeUtils.send(:extend, EscapeUtils)
 module EscapeUtils
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

data/spec/html/escape_spec.rb CHANGED

@@ -21,4 +21,18 @@ describe EscapeUtils, "escape_html" do
   it "should escape the & character" do
     EscapeUtils.escape_html("<b>Bourbon & Branch</b>").should eql("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;")
   end
+  if RUBY_VERSION =~ /^1.9/
+    it "should default to utf-8 if Encoding.default_internal is nil" do
+      Encoding.default_internal = nil
+      EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.find('utf-8'))
+    end
+    it "should use Encoding.default_internal" do
+      Encoding.default_internal = Encoding.find('utf-8')
+      EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
+      Encoding.default_internal = Encoding.find('us-ascii')
+      EscapeUtils.escape_html("<b>Bourbon & Branch</b>").encoding.should eql(Encoding.default_internal)
+    end
+  end
 end

data/spec/html/unescape_spec.rb CHANGED

@@ -21,4 +21,18 @@ describe EscapeUtils, "unescape_html" do
   it "should unescape the & character" do
     EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").should eql("<b>Bourbon & Branch</b>")
   end
+  if RUBY_VERSION =~ /^1.9/
+    it "should default to utf-8 if Encoding.default_internal is nil" do
+      Encoding.default_internal = nil
+      EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.find('utf-8'))
+    end
+    it "should use Encoding.default_internal" do
+      Encoding.default_internal = Encoding.find('utf-8')
+      EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
+      Encoding.default_internal = Encoding.find('us-ascii')
+      EscapeUtils.unescape_html("&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;").encoding.should eql(Encoding.default_internal)
+    end
+  end
 end

data/spec/javascript/escape_spec.rb CHANGED

@@ -10,7 +10,7 @@ describe EscapeUtils, "escape_javascript" do
   it "should return an empty string if passed nil" do
     EscapeUtils.escape_javascript(nil).should eql("")
   end
   it "should escape quotes and newlines" do
     EscapeUtils.escape_javascript(%(This "thing" is really\n netos')).should eql(%(This \\"thing\\" is really\\n netos\\'))
   end
@@ -22,4 +22,18 @@ describe EscapeUtils, "escape_javascript" do
   it "should escape closed html tags" do
     EscapeUtils.escape_javascript(%(dont </close> tags)).should eql(%(dont <\\/close> tags))
   end
+  if RUBY_VERSION =~ /^1.9/
+    it "should default to utf-8 if Encoding.default_internal is nil" do
+      Encoding.default_internal = nil
+      EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.find('utf-8'))
+    end
+    it "should use Encoding.default_internal" do
+      Encoding.default_internal = Encoding.find('utf-8')
+      EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
+      Encoding.default_internal = Encoding.find('us-ascii')
+      EscapeUtils.escape_javascript(%(dont </close> tags)).encoding.should eql(Encoding.default_internal)
+    end
+  end
 end

metadata CHANGED

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 1
-  - 2
-  version: 0.1.2
+  - 3
+  version: 0.1.3
 platform: ruby
 authors:
 - Brian Lopez
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-06-08 00:00:00 -07:00
+date: 2010-06-09 00:00:00 -07:00
 default_executable:
 dependencies: []