fast_xs 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,18 +3,23 @@ require 'rake/clean'
3
3
  require 'rake/gempackagetask'
4
4
  require 'rake/rdoctask'
5
5
  require 'rake/testtask'
6
- require 'hoe'
6
+ require 'hoe' if ENV['VERSION']
7
7
  require 'fileutils'
8
8
  include FileUtils
9
9
 
10
10
  name = "fast_xs"
11
- rev = `git describe 2>/dev/null`.chomp rescue nil
12
- version = ENV['VERSION'] || "0.2" + (rev && rev.length > 0 ? "-#{rev}" : "")
11
+ begin
12
+ rev = `git rev-list -1 HEAD --pretty=format:%ct`
13
+ rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
14
+ rescue
15
+ end
16
+ version ||= ENV['VERSION'] || '0.4' + (rev && rev.length > 0 ? ".#{rev}" : '')
13
17
  pkg = "#{name}-#{version}"
14
18
  bin = "*.{so,o}"
15
19
  archlib = "lib/#{::Config::CONFIG['arch']}"
16
20
  CLEAN.include ["ext/fast_xs/#{bin}", "lib/**/#{bin}",
17
- 'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config']
21
+ 'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config',
22
+ 'pkg']
18
23
  rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
19
24
  '--inline-source']
20
25
  pkg_files = %w(CHANGELOG COPYING README Rakefile) +
@@ -29,7 +34,7 @@ spec = Gem::Specification.new do |s|
29
34
  s.has_rdoc = true
30
35
  s.rdoc_options += rdoc_opts
31
36
  s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
32
- s.summary = "escape faster!"
37
+ s.summary = "excessively fast escaping"
33
38
  s.description = s.summary
34
39
  s.author = "Eric Wong"
35
40
  s.email = 'normalperson@yhbt.net'
@@ -39,13 +44,16 @@ spec = Gem::Specification.new do |s|
39
44
  s.extensions = FileList["ext/**/extconf.rb"].to_a
40
45
  end
41
46
 
42
- hoe = Hoe.new(name, version) do |p|
43
- p.author = spec.author
44
- p.description = spec.description
45
- p.email = spec.email
46
- p.summary = spec.summary
47
- p.url = spec.homepage
48
- p.rubyforge_name = 'fast-xs'
47
+ if ENV['VERSION']
48
+ sh('git ls-files > Manifest.txt')
49
+ hoe = Hoe.new(name, version) do |p|
50
+ p.author = spec.author
51
+ p.description = spec.description
52
+ p.email = spec.email
53
+ p.summary = spec.summary
54
+ p.url = spec.homepage
55
+ p.rubyforge_name = 'fast-xs'
56
+ end
49
57
  end
50
58
 
51
59
  desc "Does a full compile, test run"
@@ -2,9 +2,12 @@
2
2
 
3
3
  #include <ruby.h>
4
4
  #include <assert.h>
5
+ #include <sys/time.h>
6
+ #include <sys/resource.h>
5
7
 
6
8
  static ID unpack_id;
7
9
  static VALUE U_fmt, C_fmt;
10
+ static rlim_t alloca_limit = 4096; /* very small default */
8
11
 
9
12
  /* give GCC hints for better branch prediction
10
13
  * (we layout branches so that ASCII characters are handled faster) */
@@ -157,18 +160,24 @@ static VALUE fast_xs(VALUE self)
157
160
  char *s, *c;
158
161
  size_t s_len = 0;
159
162
  VALUE *tmp;
163
+ VALUE rv;
160
164
 
161
165
  array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
162
166
 
163
167
  for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
164
168
  s_len += escaped_len(NUM2INT(*tmp));
165
169
 
166
- c = s = alloca(s_len);
170
+ c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
167
171
 
168
172
  for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
169
173
  c += escape(c, NUM2INT(*tmp));
170
174
 
171
- return rb_str_new(s, s_len);
175
+ rv = rb_str_new(s, s_len);
176
+
177
+ if (unlikely(s_len > alloca_limit))
178
+ free(s);
179
+
180
+ return rv;
172
181
  }
173
182
 
174
183
  /*
@@ -184,6 +193,7 @@ static VALUE fast_xs_html(VALUE self)
184
193
  char *s;
185
194
  size_t new_len = 0;
186
195
  char *new_str;
196
+ VALUE rv;
187
197
 
188
198
  for (s = string->ptr, i = string->len; --i >= 0; ++s) {
189
199
  if (unlikely(*s == '&'))
@@ -196,7 +206,8 @@ static VALUE fast_xs_html(VALUE self)
196
206
  new_len += 1;
197
207
  }
198
208
 
199
- new_str = alloca(new_len);
209
+ new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
210
+ : alloca(new_len);
200
211
 
201
212
  #define append_const(buf, x) do { \
202
213
  buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
@@ -217,7 +228,12 @@ static VALUE fast_xs_html(VALUE self)
217
228
 
218
229
  #undef append_const
219
230
 
220
- return rb_str_new(new_str - new_len, new_len);
231
+ rv = rb_str_new(new_str - new_len, new_len);
232
+
233
+ if (unlikely(new_len > alloca_limit))
234
+ free(new_str - new_len);
235
+
236
+ return rv;
221
237
  }
222
238
 
223
239
  #define CGI_URI_OK(x) \
@@ -226,31 +242,29 @@ static VALUE fast_xs_html(VALUE self)
226
242
  (x >= '0' && x <= '9') || \
227
243
  (x == '.' || x == '-' || x == '_'))
228
244
 
229
- /*
230
- * Compatible with CGI::escape(), this iterates through each byte, so
231
- * multibyte character sets may not supported (but UTF-8 should be).
232
- */
233
- static VALUE fast_xs_cgi(VALUE self)
245
+ static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
234
246
  {
235
247
  struct RString *string = RSTRING(self);
236
248
  long i;
237
249
  char *s;
238
250
  size_t new_len = 0;
239
251
  char *new_str;
252
+ VALUE rv;
240
253
 
241
254
  for (s = string->ptr, i = string->len; --i >= 0; ++s) {
242
- if (likely(CGI_URI_OK(*s) || *s == ' '))
255
+ if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' ')))
243
256
  ++new_len;
244
257
  else /* we'll only get <= "%FF" here */
245
258
  new_len += 3;
246
259
  }
247
260
 
248
- new_str = alloca(new_len);
261
+ new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
262
+ : alloca(new_len);
249
263
 
250
264
  for (s = string->ptr, i = string->len; --i >= 0; ++s) {
251
265
  if (likely(CGI_URI_OK(*s)))
252
266
  *new_str++ = *s;
253
- else if (*s == ' ')
267
+ else if (space_to_plus && *s == ' ')
254
268
  *new_str++ = '+';
255
269
  else {
256
270
  static const char cgi_digitmap[] = "0123456789ABCDEF";
@@ -260,13 +274,44 @@ static VALUE fast_xs_cgi(VALUE self)
260
274
  new_str += 3;
261
275
  }
262
276
  }
263
- return rb_str_new(new_str - new_len, new_len);
277
+
278
+ rv = rb_str_new(new_str - new_len, new_len);
279
+
280
+ if (unlikely(new_len > alloca_limit))
281
+ free(new_str - new_len);
282
+
283
+ return rv;
284
+ }
285
+
286
+ /*
287
+ * Compatible with ERB::Util::url_encode / ERB::Util::u, this iterates
288
+ * through each byte, so multibyte character sets may not supported (but
289
+ * UTF-8 should be).
290
+ */
291
+ static VALUE fast_xs_url(VALUE self)
292
+ {
293
+ return _xs_uri_encode(self, 0);
294
+ }
295
+
296
+ /*
297
+ * Compatible with CGI::escape(), this iterates through each byte, so
298
+ * multibyte character sets may not supported (but UTF-8 should be).
299
+ */
300
+ static VALUE fast_xs_cgi(VALUE self)
301
+ {
302
+ return _xs_uri_encode(self, 1);
264
303
  }
265
304
 
266
305
  void Init_fast_xs(void)
267
306
  {
307
+ struct rlimit rlim;
308
+
268
309
  assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */
269
310
 
311
+ /* fairly conservative stack estimation IMHO... */
312
+ if (!getrlimit(RLIMIT_STACK, &rlim) && (rlim.rlim_cur > 0x80000))
313
+ alloca_limit = rlim.rlim_cur - (rlim.rlim_cur / 16);
314
+
270
315
  unpack_id = rb_intern("unpack");
271
316
  U_fmt = rb_str_new("U*", 2);
272
317
  C_fmt = rb_str_new("C*", 2);
@@ -276,4 +321,5 @@ void Init_fast_xs(void)
276
321
  rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
277
322
  rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
278
323
  rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
324
+ rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
279
325
  }
@@ -5,11 +5,11 @@ if defined?(CGI)
5
5
  class CGI
6
6
 
7
7
  def CGI::escapeHTML(value)
8
- value.to_s.fast_xs_html
8
+ value.fast_xs_html
9
9
  end
10
10
 
11
11
  def CGI::escape(value)
12
- value.to_s.fast_xs_cgi
12
+ value.fast_xs_cgi
13
13
  end
14
14
 
15
15
  end
@@ -27,6 +27,14 @@ if defined?(ERB::Util)
27
27
  module_function :h
28
28
  module_function :html_escape
29
29
 
30
+ def url_encode(value)
31
+ value.to_s.fast_xs_url
32
+ end
33
+ alias u url_encode
34
+ module_function :u
35
+ module_function :url_encode
36
+
37
+
30
38
  end
31
39
 
32
40
  end
@@ -31,5 +31,13 @@ class TestCgiClassOverrides < Test::Unit::TestCase
31
31
  assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
32
32
  end
33
33
 
34
+ def test_large_strings
35
+ if ENV['LARGE_STRING_TEST']
36
+ assert CGI::escape('&' * (8192 * 1024))
37
+ assert CGI::escapeHTML('&' * (8192 * 1024))
38
+ end
39
+ end
40
+
41
+
34
42
  end
35
43
 
@@ -24,6 +24,22 @@ class TestErbUtilModuleOverrides < Test::Unit::TestCase
24
24
  assert_equal "\xEF\xBF\xBF", html_escape("\xEF\xBF\xBF")
25
25
  end
26
26
 
27
+ def test_escape_url
28
+ assert_equal 'hello%3Dworld', url_encode('hello=world')
29
+ assert_equal '%20', url_encode(' ')
30
+ assert_equal '%2B', url_encode('+')
31
+ assert_equal '%2C', url_encode(',')
32
+ assert_equal 'hello-world', url_encode('hello-world')
33
+ assert_equal 'H3LL0%20W0RLD', url_encode('H3LL0 W0RLD')
34
+ end
35
+
36
+ def test_large_strings
37
+ if ENV['LARGE_STRING_TEST']
38
+ assert u('&' * (8192 * 1024))
39
+ assert h('&' * (8192 * 1024))
40
+ end
41
+ end
42
+
27
43
  end
28
44
 
29
45
 
@@ -36,4 +36,11 @@ class TestXmlEscaping < Test::Unit::TestCase
36
36
  assert_equal '&#169;', "\xC2\xA9".fast_xs # copy
37
37
  end
38
38
 
39
+ def test_large_document
40
+ if ENV['LARGE_STRING_TEST']
41
+ assert ('&' * (8192 * 1024)).fast_xs
42
+ assert ('a' * (8192 * 1024)).fast_xs
43
+ end
44
+ end
45
+
39
46
  end
metadata CHANGED
@@ -3,16 +3,16 @@ rubygems_version: 0.9.4.7
3
3
  specification_version: 2
4
4
  name: fast_xs
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.3"
7
- date: 2007-12-07 00:00:00 -08:00
8
- summary: escape faster!
6
+ version: "0.4"
7
+ date: 2007-12-12 00:00:00 -08:00
8
+ summary: excessively fast escaping
9
9
  require_paths:
10
10
  - lib/i486-linux
11
11
  - lib
12
12
  email: normalperson@yhbt.net
13
13
  homepage: http://bogonips.org/fast_xs/
14
14
  rubyforge_project:
15
- description: escape faster!
15
+ description: excessively fast escaping
16
16
  autorequire:
17
17
  default_executable:
18
18
  bindir: bin