RubyGems - fast_xs - Versions diffs - 0.2 - Mend

fast_xs 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/CHANGELOG +8 -0
data/COPYING +22 -0
data/README +18 -0
data/Rakefile +119 -0
data/ext/fast_xs/extconf.rb +4 -0
data/ext/fast_xs/fast_xs.c +279 -0
data/lib/fast_xs_monkey_patcher.rb +32 -0
data/test/test_cgi_class_overrides.rb +35 -0
data/test/test_erb_util_module_overrides.rb +29 -0
data/test/test_xml_escaping.rb +39 -0
metadata +70 -0

data/CHANGELOG ADDED Viewed

@@ -0,0 +1,8 @@
+= 0.2
+=== 6th December, 2007
+* First real release
+* added fast_xs_html and fast_xs_cgi methods
+= 0.1
+=== 3rd October, 2007
+* initial implementation, not officially released

data/COPYING ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2007 Eric Wong <normalperson@yhbt.net>
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

data/README ADDED Viewed

@@ -0,0 +1,18 @@
+= fast_xs, a fast C extension for cleaning XML
+== Overview
+  The original fast_xs method is based on the xchar code by Sam Ruby:
+    http://intertwingly.net/stories/2005/09/28/xchar.rb
+    http://intertwingly.net/blog/2005/09/28/XML-Cleansing
+  _why also packages a slightly modified version in the latest version
+  of Hpricot (not-yet-released) that also escapes '"' to "&quot;" (which
+  is optional).
+  This is an exact translation (to the best of my knowledge :) of Sam's
+  original implementation, so it does not escape "&quot;".  The version
+  of XML::Builder packaged with Rails also uses Sam's version, and
+  XML::Builder as packaged in Rails 2.0 will be automatically use
+  fast_xs if available.

data/Rakefile ADDED Viewed

@@ -0,0 +1,119 @@
+require 'rake'
+require 'rake/clean'
+require 'rake/gempackagetask'
+require 'rake/rdoctask'
+require 'rake/testtask'
+require 'hoe'
+require 'fileutils'
+include FileUtils
+name = "fast_xs"
+rev = `git describe 2>/dev/null`.chomp rescue nil
+version = ENV['VERSION'] || "0.2" + (rev && rev.length > 0 ? "-#{rev}" : "")
+pkg = "#{name}-#{version}"
+bin = "*.{so,o}"
+archlib = "lib/#{::Config::CONFIG['arch']}"
+CLEAN.include ["ext/fast_xs/#{bin}", "lib/**/#{bin}",
+               'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config']
+rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
+             '--inline-source']
+pkg_files = %w(CHANGELOG COPYING README Rakefile) +
+            Dir.glob("{test,lib}/**/*.rb") +
+            Dir.glob("ext/**/*.{c,rb}")
+spec = Gem::Specification.new do |s|
+  s.name = name
+  s.version = version
+  s.platform = Gem::Platform::RUBY
+  s.has_rdoc = true
+  s.rdoc_options += rdoc_opts
+  s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
+  s.summary = "escape faster!"
+  s.description = s.summary
+  s.author = "Eric Wong"
+  s.email = 'normalperson@yhbt.net'
+  s.homepage = 'http://bogonips.org/fast_xs/'
+  s.files = pkg_files
+  s.require_paths = [archlib, "lib"]
+  s.extensions = FileList["ext/**/extconf.rb"].to_a
+end
+hoe = Hoe.new(name, version) do |p|
+  p.author = spec.author
+  p.description = spec.description
+  p.email = spec.email
+  p.summary = spec.summary
+  p.url = spec.homepage
+  p.rubyforge_name = 'fast-xs'
+end
+desc "Does a full compile, test run"
+task :default => [:compile, :test]
+desc "Run all the tests"
+Rake::TestTask.new do |t|
+  t.libs << "test" << archlib
+  t.test_files = FileList['test/test_*.rb']
+  t.verbose = true
+end
+Rake::RDocTask.new do |rdoc|
+  rdoc.rdoc_dir = 'doc/rdoc'
+  rdoc.options += rdoc_opts
+  rdoc.main = "README"
+  rdoc.rdoc_files.add ['README', 'CHANGELOG', 'COPYING' ]
+end
+Rake::GemPackageTask.new(spec) do |p|
+  p.need_tar = true
+  p.gem_spec = spec
+end
+['fast_xs'].each do |extension|
+  ext = "ext/#{extension}"
+  ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
+  ext_files = FileList[
+    "#{ext}/*.c",
+    "#{ext}/*.h",
+    "#{ext}/*.rl",
+    "#{ext}/extconf.rb",
+    "#{ext}/Makefile",
+    "lib"
+  ]
+  desc "Builds just the #{extension} extension"
+  task extension.to_sym => ["#{ext}/Makefile", ext_so ]
+  file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
+    Dir.chdir(ext) do ruby "extconf.rb" end
+  end
+  file ext_so => ext_files do
+    Dir.chdir(ext) do sh('make') end
+    mkdir_p archlib
+    chmod 0644, ext_so
+    cp ext_so, archlib
+  end
+end
+task "lib" do
+  directory "lib"
+end
+desc "Compiles the Ruby extension"
+task :compile => [:fast_xs] do
+  if Dir.glob(File.join(archlib,"fast_xs.*")).empty?
+    STDERR.puts 'Gem failed to build'
+    exit(1)
+  end
+end
+task :install do
+  sh %{rake package}
+  sh %{sudo gem install pkg/#{name}-#{version}}
+end
+task :uninstall => [:clean] do
+  sh %{sudo gem uninstall #{name}}
+end

data/ext/fast_xs/extconf.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'mkmf'
+have_header('assert.h') or exit
+dir_config('fast_xs')
+create_makefile('fast_xs')

data/ext/fast_xs/fast_xs.c ADDED Viewed

@@ -0,0 +1,279 @@
+#define VERSION	"0.1"
+#include <ruby.h>
+#include <assert.h>
+static ID unpack_id;
+static VALUE U_fmt, C_fmt;
+/* give GCC hints for better branch prediction
+ * (we layout branches so that ASCII characters are handled faster) */
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#  define likely(x)		__builtin_expect (!!(x), 1)
+#  define unlikely(x)		__builtin_expect (!!(x), 0)
+#else
+#  define unlikely(x)		(x)
+#  define likely(x)		(x)
+#endif
+/* pass-through certain characters for CP-1252 */
+#define p(x) (x-128)
+static const int cp_1252[] = {
+	8364,		/* 128 => 8364, euro sign */
+	p(129),		/* 129 => 129,  pass-through */
+	8218,		/* 130 => 8218, single low-9 quotation mark */
+	402,		/* 131 =>  402, latin small letter f with hook */
+	8222,		/* 132 => 8222, double low-9 quotation mark */
+	8230,		/* 133 => 8230, horizontal ellipsis */
+	8224,		/* 134 => 8224, dagger */
+	8225,		/* 135 => 8225, double dagger */
+	710,		/* 136 =>  710, modifier letter circumflex accent */
+	8240,		/* 137 => 8240, per mille sign */
+	352,		/* 138 =>  352, latin capital letter s with caron */
+	8249,		/* 139 => 8249, single left-pointing angle quotation mark */
+	338,		/* 140 =>  338, latin capital ligature oe */
+	p(141),		/* 141 =>  141, pass-through */
+	381,		/* 142 =>  381, latin capital letter z with caron */
+	p(143),		/* 143 =>  143, pass-through */
+	p(144),		/* 144 =>  144, pass-through */
+	8216,		/* 145 => 8216, left single quotation mark */
+	8217,		/* 146 => 8217, right single quotation mark */
+	8220,		/* 147 => 8220, left double quotation mark */
+	8221,		/* 148 => 8221, right double quotation mark */
+	8226,		/* 149 => 8226, bullet */
+	8211,		/* 150 => 8211, en dash */
+	8212,		/* 151 => 8212, em dash */
+	732,		/* 152 =>  732, small tilde */
+	8482,		/* 153 => 8482, trade mark sign */
+	353,		/* 154 =>  353, latin small letter s with caron */
+	8250,		/* 155 => 8250, single right-pointing angle quotation mark */
+	339,		/* 156 =>  339, latin small ligature oe */
+	p(157),		/* 157 =>  157, pass-through */
+	382,		/* 158 =>  382, latin small letter z with caron */
+	376		/* 159 =>  376} latin capital letter y with diaeresis */
+};
+#define VALID_VALUE(n) \
+	(n >= 0x20 && n <= 0xD7FF) || \
+	    (n >= 0xE000 && n <= 0xFFFD) || \
+	    (n >= 0x10000 && n <= 0x10FFFF)
+#define CP_1252_ESCAPE(n) do { \
+	if (n >= 128 && n <= 159) \
+		n = cp_1252[n - 128]; \
+	} while(0)
+static inline size_t bytes_for(int n)
+{
+	if (n < 1000)
+		return sizeof("&#999;") - 1;
+	if (n < 10000)
+		return sizeof("&#9999;") - 1;
+	if (n < 100000)
+		return sizeof("&#99999;") - 1;
+	if (n < 1000000)
+		return sizeof("&#999999;") - 1;
+	/* if (n < 10000000), we won't have cases above 0x10FFFF */
+	return sizeof("&#9999999;") - 1;
+}
+static size_t escape(char *buf, int n)
+{
+#define return_const_len(x) do { \
+	memcpy(buf, x, sizeof(x) - 1); \
+	return (sizeof(x) - 1); \
+} while (0)
+	/* handle ASCII first */
+	if (likely(n < 128)) {
+		if (likely(n >= 0x20 || n == '\t' || n == '\n' || n == '\r')) {
+			if (unlikely(n == '&'))
+				return_const_len("&amp;");
+			if (unlikely(n == '<'))
+				return_const_len("&lt;");
+			if (unlikely(n == '>'))
+				return_const_len("&gt;");
+			buf[0] = (char)n;
+			return 1;
+		}
+		buf[0] = '*';
+		return 1;
+	}
+#undef return_const_len
+	CP_1252_ESCAPE(n);
+	if (VALID_VALUE(n)) {
+		/* return snprintf(buf, sizeof("&#1114111;"), "&#%i;", n); */
+		extern const char ruby_digitmap[];
+		size_t rv = sizeof("&#;") - 1;
+		buf += bytes_for(n);
+		*--buf = ';';
+		do {
+			*--buf = ruby_digitmap[(int)(n % 10)];
+			++rv;
+		} while (n /= 10);
+		*--buf = '#';
+		*--buf = '&';
+		return rv;
+	}
+	buf[0] = '*';
+	return 1;
+}
+static long escaped_len(int n)
+{
+	if (likely(n < 128)) {
+		if (unlikely(n == '&'))
+			return (sizeof("&amp;") - 1);
+		if (unlikely(n == '>' || n == '<'))
+			return (sizeof("&gt;") - 1);
+		return 1;
+	}
+	CP_1252_ESCAPE(n);
+	return VALID_VALUE(n) ? bytes_for(n) : 1;
+}
+static VALUE unpack_utf8(VALUE self)
+{
+	return rb_funcall(self, unpack_id, 1, U_fmt);
+}
+static VALUE unpack_uchar(VALUE self)
+{
+	return rb_funcall(self, unpack_id, 1, C_fmt);
+}
+static VALUE fast_xs(VALUE self)
+{
+	long i;
+	struct RArray *array;
+	char *s, *c;
+	size_t s_len = 0;
+	VALUE *tmp;
+	array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
+	for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
+		s_len += escaped_len(NUM2INT(*tmp));
+	c = s = alloca(s_len);
+	for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
+		c += escape(c, NUM2INT(*tmp));
+	return rb_str_new(s, s_len);
+}
+/*
+ * This is coding agnostic, and works on each byte, so some multibyte
+ * character sets may not be fully supported (but UTF-8 should be).
+ * This is meant to be 100% compatible with the
+ * ERB::Util::escape_html and CGI::escapeHTML methods
+ */
+static VALUE fast_xs_html(VALUE self)
+{
+	struct RString *string = RSTRING(self);
+	long i;
+	char *s;
+	size_t new_len = 0;
+	char *new_str;
+	for (s = string->ptr, i = string->len; --i >= 0; ++s) {
+		if (unlikely(*s == '&'))
+			new_len += (sizeof("&amp;") - 1);
+		else if (unlikely(*s == '<' || *s == '>'))
+			new_len += (sizeof("&gt;") - 1);
+		else if (unlikely(*s == '"'))
+			new_len += (sizeof("&quot;") - 1);
+		else
+			new_len += 1;
+	}
+	new_str = alloca(new_len);
+#define append_const(buf, x) do { \
+	buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
+} while (0)
+	for (s = string->ptr, i = string->len; --i >= 0; ++s) {
+		if (unlikely(*s == '&'))
+			append_const(new_str, "&amp;");
+		else if (unlikely(*s == '<'))
+			append_const(new_str, "&lt;");
+		else if (unlikely(*s == '>'))
+			append_const(new_str, "&gt;");
+		else if (unlikely(*s == '"'))
+			append_const(new_str, "&quot;");
+		else
+			*new_str++ = *s;
+	}
+#undef append_const
+	return rb_str_new(new_str - new_len, new_len);
+}
+#define CGI_URI_OK(x) \
+	((x >= 'a' && x <= 'z') || \
+	 (x >= 'A' && x <= 'Z') || \
+	 (x >= '0' && x <= '9') || \
+	 (x == '.' || x == '-' || x == '_'))
+/*
+ * Compatible with CGI::escape(), this iterates through each byte, so
+ * multibyte character sets may not supported (but UTF-8 should be).
+ */
+static VALUE fast_xs_cgi(VALUE self)
+{
+	struct RString *string = RSTRING(self);
+	long i;
+	char *s;
+	size_t new_len = 0;
+	char *new_str;
+	for (s = string->ptr, i = string->len; --i >= 0; ++s) {
+		if (likely(CGI_URI_OK(*s) || *s == ' '))
+			++new_len;
+		else /* we'll only get <= "%FF" here */
+			new_len += 3;
+	}
+	new_str = alloca(new_len);
+	for (s = string->ptr, i = string->len; --i >= 0; ++s) {
+		if (likely(CGI_URI_OK(*s)))
+			*new_str++ = *s;
+		else if (*s == ' ')
+			*new_str++ = '+';
+		else {
+			static const char cgi_digitmap[] = "0123456789ABCDEF";
+			new_str[2] = cgi_digitmap[(*s % 16)];
+			new_str[1] = cgi_digitmap[((*s/16) % 16)];
+			new_str[0] = '%';
+			new_str += 3;
+		}
+	}
+	return rb_str_new(new_str - new_len, new_len);
+}
+void Init_fast_xs(void)
+{
+	assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */
+	unpack_id = rb_intern("unpack");
+	U_fmt = rb_str_new("U*", 2);
+	C_fmt = rb_str_new("C*", 2);
+	rb_global_variable(&U_fmt);
+	rb_global_variable(&C_fmt);
+	rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
+	rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
+	rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
+}

data/lib/fast_xs_monkey_patcher.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'fast_xs'
+if defined?(CGI)
+  class CGI
+    def CGI::escapeHTML(value)
+      value.to_s.fast_xs_html
+    end
+    def CGI::escape(value)
+      value.to_s.fast_xs_cgi
+    end
+  end
+end
+if defined?(ERB::Util)
+  module ERB::Util
+    def html_escape(value)
+      value.to_s.fast_xs_html
+    end
+    alias h html_escape
+    module_function :h
+    module_function :html_escape
+  end
+end

data/test/test_cgi_class_overrides.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require 'test/unit'
+require 'cgi'
+load 'fast_xs_monkey_patcher.rb'
+class TestCgiClassOverrides < Test::Unit::TestCase
+  def test_escape_html_predefined
+    assert_equal '&amp;', CGI::escapeHTML('&')
+    assert_equal '&quot;', CGI::escapeHTML('"')
+    assert_equal '&lt;', CGI::escapeHTML('<')
+    assert_equal '&gt;', CGI::escapeHTML('>')
+  end
+  def test_escape_html_normal
+    assert_equal 'hello world', CGI::escapeHTML('hello world')
+    assert_equal '', CGI::escapeHTML('')
+  end
+  def test_escape_html_ignore
+    assert_equal "\x00", CGI::escapeHTML("\x00")
+    assert_equal "\x0C", CGI::escapeHTML("\x0C")
+    assert_equal "\xEF\xBF\xBF", CGI::escapeHTML("\xEF\xBF\xBF")
+  end
+  def test_escape_cgi
+    assert_equal 'hello%3Dworld', CGI::escape('hello=world')
+    assert_equal '+', CGI::escape(' ')
+    assert_equal '%2B', CGI::escape('+')
+    assert_equal '%2C', CGI::escape(',')
+    assert_equal 'hello-world', CGI::escape('hello-world')
+    assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
+  end
+end

data/test/test_erb_util_module_overrides.rb ADDED Viewed

@@ -0,0 +1,29 @@
+require 'erb'
+require 'test/unit'
+load 'fast_xs_monkey_patcher.rb'
+class TestErbUtilModuleOverrides < Test::Unit::TestCase
+  include ERB::Util
+  def test_escape_html_predefined
+    assert_equal '&amp;', html_escape('&')
+    assert_equal '&quot;', html_escape('"')
+    assert_equal '&lt;', html_escape('<')
+    assert_equal '&gt;', html_escape('>')
+  end
+  def test_escape_html_normal
+    assert_equal 'hello world', html_escape('hello world')
+    assert_equal '', html_escape('')
+  end
+  def test_escape_html_ignore
+    assert_equal "\x00", html_escape("\x00")
+    assert_equal "\x0C", html_escape("\x0C")
+    assert_equal "\xEF\xBF\xBF", html_escape("\xEF\xBF\xBF")
+  end
+end

data/test/test_xml_escaping.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'fast_xs'
+require 'test/unit'
+# Based on Sam's original xchar.rb tests:
+class TestXmlEscaping < Test::Unit::TestCase
+  def test_ascii
+    assert_equal 'abc', 'abc'.fast_xs
+  end
+  def test_predefined
+    assert_equal '&amp;', '&'.fast_xs              # ampersand
+    assert_equal '&lt;',  '<'.fast_xs              # left angle bracket
+    assert_equal '&gt;',  '>'.fast_xs              # right angle bracket
+  end
+  def test_invalid
+    assert_equal '*', "\x00".fast_xs               # null
+    assert_equal '*', "\x0C".fast_xs               # form feed
+    assert_equal '*', "\xEF\xBF\xBF".fast_xs       # U+FFFF
+  end
+  def test_iso_8859_1
+    assert_equal '&#231;', "\xE7".fast_xs          # small c cedilla
+    assert_equal '&#169;', "\xA9".fast_xs          # copyright symbol
+  end
+  def test_win_1252
+    assert_equal '&#8217;', "\x92".fast_xs         # smart quote
+    assert_equal '&#8364;', "\x80".fast_xs         # euro
+  end
+  def test_utf8
+    assert_equal '&#8217;', "\xE2\x80\x99".fast_xs # right single quote
+    assert_equal '&#169;',  "\xC2\xA9".fast_xs     # copy
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,70 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.9.4.7
+specification_version: 2
+name: fast_xs
+version: !ruby/object:Gem::Version
+  version: "0.2"
+date: 2007-12-07 00:00:00 -08:00
+summary: escape faster!
+require_paths:
+- lib/i486-linux
+- lib
+email: normalperson@yhbt.net
+homepage: http://bogonips.org/fast_xs/
+rubyforge_project:
+description: escape faster!
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: true
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+platform: ruby
+signing_key:
+cert_chain: []
+post_install_message:
+authors:
+- Eric Wong
+files:
+- CHANGELOG
+- COPYING
+- README
+- Rakefile
+- test/test_cgi_class_overrides.rb
+- test/test_xml_escaping.rb
+- test/test_erb_util_module_overrides.rb
+- lib/fast_xs_monkey_patcher.rb
+- ext/fast_xs/fast_xs.c
+- ext/fast_xs/extconf.rb
+test_files: []
+rdoc_options:
+- --quiet
+- --title
+- fast_xs notes
+- --main
+- README
+- --inline-source
+extra_rdoc_files:
+- README
+- CHANGELOG
+- COPYING
+executables: []
+extensions:
+- ext/fast_xs/extconf.rb
+requirements: []
+dependencies: []