fast_xs 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +20 -12
- data/ext/fast_xs/fast_xs.c +59 -13
- data/lib/fast_xs_monkey_patcher.rb +10 -2
- data/test/test_cgi_class_overrides.rb +8 -0
- data/test/test_erb_util_module_overrides.rb +16 -0
- data/test/test_xml_escaping.rb +7 -0
- metadata +4 -4
data/Rakefile
CHANGED
@@ -3,18 +3,23 @@ require 'rake/clean'
|
|
3
3
|
require 'rake/gempackagetask'
|
4
4
|
require 'rake/rdoctask'
|
5
5
|
require 'rake/testtask'
|
6
|
-
require 'hoe'
|
6
|
+
require 'hoe' if ENV['VERSION']
|
7
7
|
require 'fileutils'
|
8
8
|
include FileUtils
|
9
9
|
|
10
10
|
name = "fast_xs"
|
11
|
-
|
12
|
-
|
11
|
+
begin
|
12
|
+
rev = `git rev-list -1 HEAD --pretty=format:%ct`
|
13
|
+
rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
|
14
|
+
rescue
|
15
|
+
end
|
16
|
+
version ||= ENV['VERSION'] || '0.4' + (rev && rev.length > 0 ? ".#{rev}" : '')
|
13
17
|
pkg = "#{name}-#{version}"
|
14
18
|
bin = "*.{so,o}"
|
15
19
|
archlib = "lib/#{::Config::CONFIG['arch']}"
|
16
20
|
CLEAN.include ["ext/fast_xs/#{bin}", "lib/**/#{bin}",
|
17
|
-
'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config'
|
21
|
+
'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config',
|
22
|
+
'pkg']
|
18
23
|
rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
|
19
24
|
'--inline-source']
|
20
25
|
pkg_files = %w(CHANGELOG COPYING README Rakefile) +
|
@@ -29,7 +34,7 @@ spec = Gem::Specification.new do |s|
|
|
29
34
|
s.has_rdoc = true
|
30
35
|
s.rdoc_options += rdoc_opts
|
31
36
|
s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
|
32
|
-
s.summary = "
|
37
|
+
s.summary = "excessively fast escaping"
|
33
38
|
s.description = s.summary
|
34
39
|
s.author = "Eric Wong"
|
35
40
|
s.email = 'normalperson@yhbt.net'
|
@@ -39,13 +44,16 @@ spec = Gem::Specification.new do |s|
|
|
39
44
|
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
40
45
|
end
|
41
46
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
47
|
+
if ENV['VERSION']
|
48
|
+
sh('git ls-files > Manifest.txt')
|
49
|
+
hoe = Hoe.new(name, version) do |p|
|
50
|
+
p.author = spec.author
|
51
|
+
p.description = spec.description
|
52
|
+
p.email = spec.email
|
53
|
+
p.summary = spec.summary
|
54
|
+
p.url = spec.homepage
|
55
|
+
p.rubyforge_name = 'fast-xs'
|
56
|
+
end
|
49
57
|
end
|
50
58
|
|
51
59
|
desc "Does a full compile, test run"
|
data/ext/fast_xs/fast_xs.c
CHANGED
@@ -2,9 +2,12 @@
|
|
2
2
|
|
3
3
|
#include <ruby.h>
|
4
4
|
#include <assert.h>
|
5
|
+
#include <sys/time.h>
|
6
|
+
#include <sys/resource.h>
|
5
7
|
|
6
8
|
static ID unpack_id;
|
7
9
|
static VALUE U_fmt, C_fmt;
|
10
|
+
static rlim_t alloca_limit = 4096; /* very small default */
|
8
11
|
|
9
12
|
/* give GCC hints for better branch prediction
|
10
13
|
* (we layout branches so that ASCII characters are handled faster) */
|
@@ -157,18 +160,24 @@ static VALUE fast_xs(VALUE self)
|
|
157
160
|
char *s, *c;
|
158
161
|
size_t s_len = 0;
|
159
162
|
VALUE *tmp;
|
163
|
+
VALUE rv;
|
160
164
|
|
161
165
|
array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
|
162
166
|
|
163
167
|
for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
|
164
168
|
s_len += escaped_len(NUM2INT(*tmp));
|
165
169
|
|
166
|
-
c = s = alloca(s_len);
|
170
|
+
c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
|
167
171
|
|
168
172
|
for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
|
169
173
|
c += escape(c, NUM2INT(*tmp));
|
170
174
|
|
171
|
-
|
175
|
+
rv = rb_str_new(s, s_len);
|
176
|
+
|
177
|
+
if (unlikely(s_len > alloca_limit))
|
178
|
+
free(s);
|
179
|
+
|
180
|
+
return rv;
|
172
181
|
}
|
173
182
|
|
174
183
|
/*
|
@@ -184,6 +193,7 @@ static VALUE fast_xs_html(VALUE self)
|
|
184
193
|
char *s;
|
185
194
|
size_t new_len = 0;
|
186
195
|
char *new_str;
|
196
|
+
VALUE rv;
|
187
197
|
|
188
198
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
189
199
|
if (unlikely(*s == '&'))
|
@@ -196,7 +206,8 @@ static VALUE fast_xs_html(VALUE self)
|
|
196
206
|
new_len += 1;
|
197
207
|
}
|
198
208
|
|
199
|
-
new_str =
|
209
|
+
new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
|
210
|
+
: alloca(new_len);
|
200
211
|
|
201
212
|
#define append_const(buf, x) do { \
|
202
213
|
buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
|
@@ -217,7 +228,12 @@ static VALUE fast_xs_html(VALUE self)
|
|
217
228
|
|
218
229
|
#undef append_const
|
219
230
|
|
220
|
-
|
231
|
+
rv = rb_str_new(new_str - new_len, new_len);
|
232
|
+
|
233
|
+
if (unlikely(new_len > alloca_limit))
|
234
|
+
free(new_str - new_len);
|
235
|
+
|
236
|
+
return rv;
|
221
237
|
}
|
222
238
|
|
223
239
|
#define CGI_URI_OK(x) \
|
@@ -226,31 +242,29 @@ static VALUE fast_xs_html(VALUE self)
|
|
226
242
|
(x >= '0' && x <= '9') || \
|
227
243
|
(x == '.' || x == '-' || x == '_'))
|
228
244
|
|
229
|
-
|
230
|
-
* Compatible with CGI::escape(), this iterates through each byte, so
|
231
|
-
* multibyte character sets may not supported (but UTF-8 should be).
|
232
|
-
*/
|
233
|
-
static VALUE fast_xs_cgi(VALUE self)
|
245
|
+
static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
|
234
246
|
{
|
235
247
|
struct RString *string = RSTRING(self);
|
236
248
|
long i;
|
237
249
|
char *s;
|
238
250
|
size_t new_len = 0;
|
239
251
|
char *new_str;
|
252
|
+
VALUE rv;
|
240
253
|
|
241
254
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
242
|
-
if (likely(CGI_URI_OK(*s) || *s == ' '))
|
255
|
+
if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' ')))
|
243
256
|
++new_len;
|
244
257
|
else /* we'll only get <= "%FF" here */
|
245
258
|
new_len += 3;
|
246
259
|
}
|
247
260
|
|
248
|
-
new_str =
|
261
|
+
new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
|
262
|
+
: alloca(new_len);
|
249
263
|
|
250
264
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
251
265
|
if (likely(CGI_URI_OK(*s)))
|
252
266
|
*new_str++ = *s;
|
253
|
-
else if (*s == ' ')
|
267
|
+
else if (space_to_plus && *s == ' ')
|
254
268
|
*new_str++ = '+';
|
255
269
|
else {
|
256
270
|
static const char cgi_digitmap[] = "0123456789ABCDEF";
|
@@ -260,13 +274,44 @@ static VALUE fast_xs_cgi(VALUE self)
|
|
260
274
|
new_str += 3;
|
261
275
|
}
|
262
276
|
}
|
263
|
-
|
277
|
+
|
278
|
+
rv = rb_str_new(new_str - new_len, new_len);
|
279
|
+
|
280
|
+
if (unlikely(new_len > alloca_limit))
|
281
|
+
free(new_str - new_len);
|
282
|
+
|
283
|
+
return rv;
|
284
|
+
}
|
285
|
+
|
286
|
+
/*
|
287
|
+
* Compatible with ERB::Util::url_encode / ERB::Util::u, this iterates
|
288
|
+
* through each byte, so multibyte character sets may not supported (but
|
289
|
+
* UTF-8 should be).
|
290
|
+
*/
|
291
|
+
static VALUE fast_xs_url(VALUE self)
|
292
|
+
{
|
293
|
+
return _xs_uri_encode(self, 0);
|
294
|
+
}
|
295
|
+
|
296
|
+
/*
|
297
|
+
* Compatible with CGI::escape(), this iterates through each byte, so
|
298
|
+
* multibyte character sets may not supported (but UTF-8 should be).
|
299
|
+
*/
|
300
|
+
static VALUE fast_xs_cgi(VALUE self)
|
301
|
+
{
|
302
|
+
return _xs_uri_encode(self, 1);
|
264
303
|
}
|
265
304
|
|
266
305
|
void Init_fast_xs(void)
|
267
306
|
{
|
307
|
+
struct rlimit rlim;
|
308
|
+
|
268
309
|
assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */
|
269
310
|
|
311
|
+
/* fairly conservative stack estimation IMHO... */
|
312
|
+
if (!getrlimit(RLIMIT_STACK, &rlim) && (rlim.rlim_cur > 0x80000))
|
313
|
+
alloca_limit = rlim.rlim_cur - (rlim.rlim_cur / 16);
|
314
|
+
|
270
315
|
unpack_id = rb_intern("unpack");
|
271
316
|
U_fmt = rb_str_new("U*", 2);
|
272
317
|
C_fmt = rb_str_new("C*", 2);
|
@@ -276,4 +321,5 @@ void Init_fast_xs(void)
|
|
276
321
|
rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
|
277
322
|
rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
|
278
323
|
rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
|
324
|
+
rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
|
279
325
|
}
|
@@ -5,11 +5,11 @@ if defined?(CGI)
|
|
5
5
|
class CGI
|
6
6
|
|
7
7
|
def CGI::escapeHTML(value)
|
8
|
-
value.
|
8
|
+
value.fast_xs_html
|
9
9
|
end
|
10
10
|
|
11
11
|
def CGI::escape(value)
|
12
|
-
value.
|
12
|
+
value.fast_xs_cgi
|
13
13
|
end
|
14
14
|
|
15
15
|
end
|
@@ -27,6 +27,14 @@ if defined?(ERB::Util)
|
|
27
27
|
module_function :h
|
28
28
|
module_function :html_escape
|
29
29
|
|
30
|
+
def url_encode(value)
|
31
|
+
value.to_s.fast_xs_url
|
32
|
+
end
|
33
|
+
alias u url_encode
|
34
|
+
module_function :u
|
35
|
+
module_function :url_encode
|
36
|
+
|
37
|
+
|
30
38
|
end
|
31
39
|
|
32
40
|
end
|
@@ -31,5 +31,13 @@ class TestCgiClassOverrides < Test::Unit::TestCase
|
|
31
31
|
assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_large_strings
|
35
|
+
if ENV['LARGE_STRING_TEST']
|
36
|
+
assert CGI::escape('&' * (8192 * 1024))
|
37
|
+
assert CGI::escapeHTML('&' * (8192 * 1024))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
34
42
|
end
|
35
43
|
|
@@ -24,6 +24,22 @@ class TestErbUtilModuleOverrides < Test::Unit::TestCase
|
|
24
24
|
assert_equal "\xEF\xBF\xBF", html_escape("\xEF\xBF\xBF")
|
25
25
|
end
|
26
26
|
|
27
|
+
def test_escape_url
|
28
|
+
assert_equal 'hello%3Dworld', url_encode('hello=world')
|
29
|
+
assert_equal '%20', url_encode(' ')
|
30
|
+
assert_equal '%2B', url_encode('+')
|
31
|
+
assert_equal '%2C', url_encode(',')
|
32
|
+
assert_equal 'hello-world', url_encode('hello-world')
|
33
|
+
assert_equal 'H3LL0%20W0RLD', url_encode('H3LL0 W0RLD')
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_large_strings
|
37
|
+
if ENV['LARGE_STRING_TEST']
|
38
|
+
assert u('&' * (8192 * 1024))
|
39
|
+
assert h('&' * (8192 * 1024))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
27
43
|
end
|
28
44
|
|
29
45
|
|
data/test/test_xml_escaping.rb
CHANGED
@@ -36,4 +36,11 @@ class TestXmlEscaping < Test::Unit::TestCase
|
|
36
36
|
assert_equal '©', "\xC2\xA9".fast_xs # copy
|
37
37
|
end
|
38
38
|
|
39
|
+
def test_large_document
|
40
|
+
if ENV['LARGE_STRING_TEST']
|
41
|
+
assert ('&' * (8192 * 1024)).fast_xs
|
42
|
+
assert ('a' * (8192 * 1024)).fast_xs
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
39
46
|
end
|
metadata
CHANGED
@@ -3,16 +3,16 @@ rubygems_version: 0.9.4.7
|
|
3
3
|
specification_version: 2
|
4
4
|
name: fast_xs
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "0.
|
7
|
-
date: 2007-12-
|
8
|
-
summary:
|
6
|
+
version: "0.4"
|
7
|
+
date: 2007-12-12 00:00:00 -08:00
|
8
|
+
summary: excessively fast escaping
|
9
9
|
require_paths:
|
10
10
|
- lib/i486-linux
|
11
11
|
- lib
|
12
12
|
email: normalperson@yhbt.net
|
13
13
|
homepage: http://bogonips.org/fast_xs/
|
14
14
|
rubyforge_project:
|
15
|
-
description:
|
15
|
+
description: excessively fast escaping
|
16
16
|
autorequire:
|
17
17
|
default_executable:
|
18
18
|
bindir: bin
|