RubyGems - escape_utils - Versions diffs - 0.2.3 → 0.2.4 - Mend

escape_utils 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

data/.gitignore +2 -1
data/CHANGELOG.md +4 -0
data/README.md +206 -0
data/benchmark/html_escape.rb +1 -0
data/benchmark/html_unescape.rb +1 -0
data/benchmark/javascript_escape.rb +1 -0
data/benchmark/javascript_unescape.rb +1 -0
data/benchmark/url_escape.rb +1 -0
data/benchmark/url_unescape.rb +1 -0
data/escape_utils.gemspec +0 -3
data/ext/escape_utils/buffer.c +228 -0
data/ext/escape_utils/buffer.h +91 -0
data/ext/escape_utils/escape_utils.c +111 -531
data/ext/escape_utils/houdini.h +15 -0
data/ext/escape_utils/houdini_html.c +214 -0
data/ext/escape_utils/houdini_js.c +148 -0
data/ext/escape_utils/houdini_uri.c +130 -0
data/ext/escape_utils/html_unescape.h +754 -0
data/ext/escape_utils/uri_escape.h +35 -0
data/lib/escape_utils.rb +2 -2
data/lib/escape_utils/html/cgi.rb +0 -2
data/lib/escape_utils/html/erb.rb +0 -2
data/lib/escape_utils/html/haml.rb +0 -2
data/lib/escape_utils/html/rack.rb +0 -2
data/lib/escape_utils/html_safety.rb +0 -2
data/lib/escape_utils/javascript/action_view.rb +0 -2
data/lib/escape_utils/url/cgi.rb +0 -2
data/lib/escape_utils/url/erb.rb +0 -2
data/lib/escape_utils/url/rack.rb +0 -2
data/lib/escape_utils/url/uri.rb +0 -2
data/lib/escape_utils/version.rb +1 -1
data/spec/html/escape_spec.rb +0 -1
data/spec/html/unescape_spec.rb +0 -1
data/spec/html_safety_spec.rb +0 -1
data/spec/javascript/escape_spec.rb +0 -1
data/spec/javascript/unescape_spec.rb +0 -1
data/spec/query/escape_spec.rb +0 -1
data/spec/query/unescape_spec.rb +1 -0
data/spec/spec_helper.rb +0 -1
data/spec/uri/escape_spec.rb +0 -1
data/spec/uri/unescape_spec.rb +1 -0
data/spec/url/escape_spec.rb +0 -1
data/spec/url/unescape_spec.rb +1 -0
metadata +16 -8
data/README.rdoc +0 -146

data/.gitignore CHANGED

@@ -5,4 +5,5 @@ pkg/*
 doc/*
 *.rbc
 tmp/
-Gemfile.lock
+Gemfile.lock
+vendor/*

data/CHANGELOG.md CHANGED

@@ -1,5 +1,9 @@
 # Changelog
+## 0.2.4 (September 7th, 2011)
+* swap out custom escaping routines for houdini - https://github.com/tanoku/houdini
+* add RSTRING_NOT_MODIFIED define for a Rubinius speedup
 ## 0.2.3 (March 9th, 2011)
 * change encoding strategy to simply return strings in the encoding the input string was in, not taking into account Encoding.default_internal

data/README.md ADDED

@@ -0,0 +1,206 @@
+# escape_utils
+Being as though we're all html escaping everything these days, why not make it faster?
+For character encoding in 1.9, the output string's encoding is copied from the input string.
+It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time
+It supports HTML, URL, URI and Javascript escaping/unescaping.
+## Installing
+``` sh
+gem install escape_utils
+```
+## Warning: UTF-8 only
+escape_utils assumes all input is encoded as valid UTF-8. If you are dealing with other encodings do your best to transcode the string into a UTF-8 byte stream before handing it to escape_utils.
+On Ruby 1.9 this is as easy as:
+``` ruby
+utf8_string = non_utf8_string.encode('UTF-8')
+```
+If you're on Ruby 1.8 you can use [charlock_holmes](https://github.com/brianmario/charlock_holmes) to transcode like so:
+``` ruby
+# NOTE: we're assuming you know the encoding of `non_utf8_string` here.
+# if you don't, you can use the detection API of charlock_holmes
+utf8_string = CharlockHolmes::Converter.convert(non_utf8_string, other_encoding, 'UTF-8')
+```
+## Usage
+### HTML
+#### Escaping
+``` ruby
+html = `curl -s http://maps.google.com`
+escaped_html = EscapeUtils.escape_html(html)
+```
+By default escape_utils will escape `/` characters with `&#47;`, but you can disable that by setting `EscapeUtils.html_secure = false`
+or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)`
+For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
+#### Unescaping
+``` ruby
+html = `curl -s http://maps.google.com`
+escaped_html = EscapeUtils.escape_html(html)
+html = EscapeUtils.unescape_html(escaped_html)
+```
+#### Monkey Patches
+``` ruby
+require 'escape_utils/html/rack' # to patch Rack::Utils
+require 'escape_utils/html/erb' # to patch ERB::Util
+require 'escape_utils/html/cgi' # to patch CGI
+require 'escape_utils/html/haml' # to patch Haml::Helpers
+```
+### URL
+Use (un)escape_uri to get RFC-compliant escaping (like PHP rawurlencode).
+Use (un)escape_url to get CGI escaping (where space is +).
+#### Escaping
+``` ruby
+url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
+escaped_url = EscapeUtils.escape_url(url)
+```
+#### Unescaping
+``` ruby
+url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mcEA~!!#*YH*>@!U"
+escaped_url = EscapeUtils.escape_url(url)
+EscapeUtils.unescape_url(escaped_url) == url # => true
+```
+#### Monkey Patches
+``` ruby
+require 'escape_utils/url/cgi' # to patch CGI
+require 'escape_utils/url/erb' # to patch ERB::Util
+require 'escape_utils/url/rack' # to patch Rack::Utils
+require 'escape_utils/url/uri' # to patch URI
+```
+### Javascript
+#### Escaping
+``` ruby
+javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js`
+escaped_javascript = EscapeUtils.escape_javascript(javascript)
+```
+#### Unescaping
+``` ruby
+javascript = `curl -s http://code.jquery.com/jquery-1.4.2.js`
+escaped_javascript = EscapeUtils.escape_javascript(javascript)
+EscapeUtils.unescape_javascript(escaped_javascript) == javascript # => true
+```
+#### Monkey Patches
+``` ruby
+require 'escape_utils/javascript/action_view' # to patch ActionView::Helpers::JavaScriptHelper
+```
+## Benchmarks
+In my testing, escaping html is around 10-30x faster than the pure ruby implementations in wide use today.
+While unescaping html is around 40-100x faster than CGI.unescapeHTML which is also pure ruby.
+Escaping Javascript is around 16-30x faster.
+This output is from my laptop using the benchmark scripts in the benchmarks folder.
+### HTML
+#### Escaping
+```
+Rack::Utils.escape_html
+ 9.650000   0.090000   9.740000 (  9.750756)
+Haml::Helpers.html_escape
+ 9.310000   0.110000   9.420000 (  9.417317)
+ERB::Util.html_escape
+ 5.330000   0.390000   5.720000 (  5.748394)
+CGI.escapeHTML
+ 5.370000   0.380000   5.750000 (  5.791344)
+FasterHTMLEscape.html_escape
+ 0.520000   0.010000   0.530000 (  0.539485)
+fast_xs_extra#fast_xs_html
+ 0.310000   0.030000   0.340000 (  0.336734)
+EscapeUtils.escape_html
+ 0.200000   0.050000   0.250000 (  0.258839)
+```
+#### Unescaping
+```
+CGI.unescapeHTML
+ 16.520000   0.080000  16.600000 ( 16.853888)
+EscapeUtils.unescape_html
+ 0.120000   0.040000   0.160000  (  0.162696)
+```
+### Javascript
+#### Escaping
+```
+ActionView::Helpers::JavaScriptHelper#escape_javascript
+ 3.810000   0.100000   3.910000 (  3.925557)
+EscapeUtils.escape_javascript
+ 0.200000   0.040000   0.240000 (  0.236692)
+```
+#### Unescaping
+I didn't look that hard, but I'm not aware of another ruby library that does Javascript unescaping to benchmark against. Anyone know of any?
+### URL
+#### Escaping
+```
+ERB::Util.url_encode
+ 0.520000   0.010000   0.530000 (  0.529277)
+Rack::Utils.escape
+ 0.460000   0.010000   0.470000 (  0.466962)
+CGI.escape
+ 0.440000   0.000000   0.440000 (  0.443017)
+URLEscape#escape
+ 0.040000   0.000000   0.040000 (  0.045661)
+fast_xs_extra#fast_xs_url
+ 0.010000   0.000000   0.010000 (  0.015429)
+EscapeUtils.escape_url
+ 0.010000   0.000000   0.010000 (  0.010843)
+```
+#### Unescaping
+```
+Rack::Utils.unescape
+ 0.250000   0.010000   0.260000 (  0.257558)
+CGI.unescape
+ 0.250000   0.000000   0.250000 (  0.257837)
+URLEscape#unescape
+ 0.040000   0.000000   0.040000 (  0.031548)
+fast_xs_extra#fast_uxs_cgi
+ 0.010000   0.000000   0.010000 (  0.006062)
+EscapeUtils.unescape_url
+ 0.000000   0.000000   0.000000 (  0.005679)
+```

data/benchmark/html_escape.rb CHANGED

@@ -19,6 +19,7 @@ end
 times = 100
 url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
 html = `curl -s #{url}`
+html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
 puts "Escaping #{html.bytesize} bytes of html #{times} times, from #{url}"
 Benchmark.bmbm do |x|

data/benchmark/html_unescape.rb CHANGED

@@ -16,6 +16,7 @@ end
 times = 100
 url = "http://en.wikipedia.org/wiki/Line_of_succession_to_the_British_throne"
 html = `curl -s #{url}`
+html = html.force_encoding('binary') if html.respond_to?(:force_encoding)
 escaped_html = EscapeUtils.escape_html(html)
 puts "Unescaping #{escaped_html.bytesize} bytes of escaped html #{times} times, from #{url}"

data/benchmark/javascript_escape.rb CHANGED

@@ -15,6 +15,7 @@ end
 times = 100
 url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
 javascript = `curl -s #{url}`
+javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
 puts "Escaping #{javascript.bytesize} bytes of javascript #{times} times, from #{url}"
 Benchmark.bmbm do |x|

data/benchmark/javascript_unescape.rb CHANGED

@@ -10,6 +10,7 @@ require 'escape_utils'
 times = 100
 url = "http://ajax.googleapis.com/ajax/libs/dojo/1.4.3/dojo/dojo.xd.js.uncompressed.js"
 javascript = `curl -s #{url}`
+javascript = javascript.force_encoding('utf-8') if javascript.respond_to?(:force_encoding)
 escaped_javascript = EscapeUtils.escape_javascript(javascript)
 puts "Escaping #{escaped_javascript.bytesize} bytes of javascript #{times} times, from #{url}"

data/benchmark/url_escape.rb CHANGED

@@ -14,6 +14,7 @@ require 'escape_utils'
 times = 10_000
 url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
+url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
 puts "Escaping a #{url.bytesize} byte URL #{times} times"
 Benchmark.bmbm do |x|

data/benchmark/url_unescape.rb CHANGED

@@ -13,6 +13,7 @@ require 'escape_utils'
 times = 10_000
 url = "https://www.yourmom.com/cgi-bin/session.cgi?sess_args=mYHcEA  dh435dqUs0moGHeeAJTSLLbdbcbd9ef----,574b95600e9ab7d27eb0bf524ac68c27----"
+url = url.force_encoding('us-ascii') if url.respond_to?(:force_encoding)
 escaped_url = EscapeUtils.escape_url(url)
 puts "Escaping a #{url.bytesize} byte URL #{times} times"

data/escape_utils.gemspec CHANGED

@@ -7,9 +7,6 @@ Gem::Specification.new do |s|
   s.date = Time.now.utc.strftime("%Y-%m-%d")
   s.email = %q{seniorlopez@gmail.com}
   s.extensions = ["ext/escape_utils/extconf.rb"]
-  s.extra_rdoc_files = [
-    "README.rdoc"
-  ]
   s.files = `git ls-files`.split("\n")
   s.homepage = %q{http://github.com/brianmario/escape_utils}
   s.rdoc_options = ["--charset=UTF-8"]

data/ext/escape_utils/buffer.c ADDED

@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2008, Natacha Porté
+ * Copyright (c) 2011, Vicent Martí
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
+#include "buffer.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* MSVC compat */
+#if defined(_MSC_VER)
+#	define _buf_vsnprintf _vsnprintf
+#else
+#	define _buf_vsnprintf vsnprintf
+#endif
+int
+bufprefix(const struct buf *buf, const char *prefix)
+{
+	size_t i;
+	for (i = 0; i < buf->size; ++i) {
+		if (prefix[i] == 0)
+			return 0;
+		if (buf->data[i] != prefix[i])
+			return buf->data[i] - prefix[i];
+	}
+	return 0;
+}
+/* bufgrow: increasing the allocated size to the given value */
+int
+bufgrow(struct buf *buf, size_t neosz)
+{
+	size_t neoasz;
+	void *neodata;
+	if (!buf || !buf->unit || neosz > BUFFER_MAX_ALLOC_SIZE)
+		return BUF_ENOMEM;
+	if (buf->asize >= neosz)
+		return BUF_OK;
+	neoasz = buf->asize + buf->unit;
+	while (neoasz < neosz)
+		neoasz += buf->unit;
+	neodata = realloc(buf->data, neoasz);
+	if (!neodata)
+		return BUF_ENOMEM;
+	buf->data = neodata;
+	buf->asize = neoasz;
+	return BUF_OK;
+}
+/* bufnew: allocation of a new buffer */
+struct buf *
+bufnew(size_t unit)
+{
+	struct buf *ret;
+	ret = malloc(sizeof (struct buf));
+	if (ret) {
+		ret->data = 0;
+		ret->size = ret->asize = 0;
+		ret->unit = unit;
+	}
+	return ret;
+}
+/* bufnullterm: NULL-termination of the string array */
+const char *
+bufcstr(struct buf *buf)
+{
+	if (!buf || !buf->unit)
+		return NULL;
+	if (buf->size < buf->asize && buf->data[buf->size] == 0)
+		return (char *)buf->data;
+	if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
+		buf->data[buf->size] = 0;
+		return (char *)buf->data;
+	}
+	return NULL;
+}
+/* bufprintf: formatted printing to a buffer */
+void
+bufprintf(struct buf *buf, const char *fmt, ...)
+{
+	va_list ap;
+	if (!buf || !buf->unit)
+		return;
+	va_start(ap, fmt);
+	vbufprintf(buf, fmt, ap);
+	va_end(ap);
+}
+/* bufput: appends raw data to a buffer */
+void
+bufput(struct buf *buf, const void *data, size_t len)
+{
+	if (!buf)
+		return;
+	if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
+		return;
+	memcpy(buf->data + buf->size, data, len);
+	buf->size += len;
+}
+/* bufputs: appends a NUL-terminated string to a buffer */
+void
+bufputs(struct buf *buf, const char *str)
+{
+	bufput(buf, str, strlen(str));
+}
+/* bufputc: appends a single uint8_t to a buffer */
+void
+bufputc(struct buf *buf, int c)
+{
+	if (!buf)
+		return;
+	if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
+		return;
+	buf->data[buf->size] = c;
+	buf->size += 1;
+}
+/* bufrelease: decrease the reference count and free the buffer if needed */
+void
+bufrelease(struct buf *buf)
+{
+	if (!buf)
+		return;
+	free(buf->data);
+	free(buf);
+}
+/* bufreset: frees internal data of the buffer */
+void
+bufreset(struct buf *buf)
+{
+	if (!buf)
+		return;
+	free(buf->data);
+	buf->data = NULL;
+	buf->size = buf->asize = 0;
+}
+/* bufslurp: removes a given number of bytes from the head of the array */
+void
+bufslurp(struct buf *buf, size_t len)
+{
+	if (!buf || !buf->unit || len <= 0)
+		return;
+	if (len >= buf->size) {
+		buf->size = 0;
+		return;
+	}
+	buf->size -= len;
+	memmove(buf->data, buf->data + len, buf->size);
+}
+/* vbufprintf: stdarg variant of formatted printing into a buffer */
+void
+vbufprintf(struct buf *buf, const char *fmt, va_list ap)
+{
+	int n;
+	if (buf == 0 || (buf->size >= buf->asize && bufgrow(buf, buf->size + 1)) < 0)
+		return;
+	n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
+	if (n < 0) {
+#ifdef _MSC_VER
+		n = _vscprintf(fmt, ap);
+#else
+		return;
+#endif
+	}
+	if ((size_t)n >= buf->asize - buf->size) {
+		if (bufgrow(buf, buf->size + n + 1) < 0)
+			return;
+		n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
+	}
+	if (n < 0)
+		return;
+	buf->size += n;
+}