fast_xs 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +20 -12
- data/ext/fast_xs/fast_xs.c +59 -13
- data/lib/fast_xs_monkey_patcher.rb +10 -2
- data/test/test_cgi_class_overrides.rb +8 -0
- data/test/test_erb_util_module_overrides.rb +16 -0
- data/test/test_xml_escaping.rb +7 -0
- metadata +4 -4
data/Rakefile
CHANGED
|
@@ -3,18 +3,23 @@ require 'rake/clean'
|
|
|
3
3
|
require 'rake/gempackagetask'
|
|
4
4
|
require 'rake/rdoctask'
|
|
5
5
|
require 'rake/testtask'
|
|
6
|
-
require 'hoe'
|
|
6
|
+
require 'hoe' if ENV['VERSION']
|
|
7
7
|
require 'fileutils'
|
|
8
8
|
include FileUtils
|
|
9
9
|
|
|
10
10
|
name = "fast_xs"
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
begin
|
|
12
|
+
rev = `git rev-list -1 HEAD --pretty=format:%ct`
|
|
13
|
+
rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
|
|
14
|
+
rescue
|
|
15
|
+
end
|
|
16
|
+
version ||= ENV['VERSION'] || '0.4' + (rev && rev.length > 0 ? ".#{rev}" : '')
|
|
13
17
|
pkg = "#{name}-#{version}"
|
|
14
18
|
bin = "*.{so,o}"
|
|
15
19
|
archlib = "lib/#{::Config::CONFIG['arch']}"
|
|
16
20
|
CLEAN.include ["ext/fast_xs/#{bin}", "lib/**/#{bin}",
|
|
17
|
-
'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config'
|
|
21
|
+
'ext/fast_xs/Makefile', '**/.*.sw?', '*.gem', '.config',
|
|
22
|
+
'pkg']
|
|
18
23
|
rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
|
|
19
24
|
'--inline-source']
|
|
20
25
|
pkg_files = %w(CHANGELOG COPYING README Rakefile) +
|
|
@@ -29,7 +34,7 @@ spec = Gem::Specification.new do |s|
|
|
|
29
34
|
s.has_rdoc = true
|
|
30
35
|
s.rdoc_options += rdoc_opts
|
|
31
36
|
s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
|
|
32
|
-
s.summary = "
|
|
37
|
+
s.summary = "excessively fast escaping"
|
|
33
38
|
s.description = s.summary
|
|
34
39
|
s.author = "Eric Wong"
|
|
35
40
|
s.email = 'normalperson@yhbt.net'
|
|
@@ -39,13 +44,16 @@ spec = Gem::Specification.new do |s|
|
|
|
39
44
|
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
|
40
45
|
end
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
if ENV['VERSION']
|
|
48
|
+
sh('git ls-files > Manifest.txt')
|
|
49
|
+
hoe = Hoe.new(name, version) do |p|
|
|
50
|
+
p.author = spec.author
|
|
51
|
+
p.description = spec.description
|
|
52
|
+
p.email = spec.email
|
|
53
|
+
p.summary = spec.summary
|
|
54
|
+
p.url = spec.homepage
|
|
55
|
+
p.rubyforge_name = 'fast-xs'
|
|
56
|
+
end
|
|
49
57
|
end
|
|
50
58
|
|
|
51
59
|
desc "Does a full compile, test run"
|
data/ext/fast_xs/fast_xs.c
CHANGED
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
#include <ruby.h>
|
|
4
4
|
#include <assert.h>
|
|
5
|
+
#include <sys/time.h>
|
|
6
|
+
#include <sys/resource.h>
|
|
5
7
|
|
|
6
8
|
static ID unpack_id;
|
|
7
9
|
static VALUE U_fmt, C_fmt;
|
|
10
|
+
static rlim_t alloca_limit = 4096; /* very small default */
|
|
8
11
|
|
|
9
12
|
/* give GCC hints for better branch prediction
|
|
10
13
|
* (we layout branches so that ASCII characters are handled faster) */
|
|
@@ -157,18 +160,24 @@ static VALUE fast_xs(VALUE self)
|
|
|
157
160
|
char *s, *c;
|
|
158
161
|
size_t s_len = 0;
|
|
159
162
|
VALUE *tmp;
|
|
163
|
+
VALUE rv;
|
|
160
164
|
|
|
161
165
|
array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
|
|
162
166
|
|
|
163
167
|
for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
|
|
164
168
|
s_len += escaped_len(NUM2INT(*tmp));
|
|
165
169
|
|
|
166
|
-
c = s = alloca(s_len);
|
|
170
|
+
c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
|
|
167
171
|
|
|
168
172
|
for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
|
|
169
173
|
c += escape(c, NUM2INT(*tmp));
|
|
170
174
|
|
|
171
|
-
|
|
175
|
+
rv = rb_str_new(s, s_len);
|
|
176
|
+
|
|
177
|
+
if (unlikely(s_len > alloca_limit))
|
|
178
|
+
free(s);
|
|
179
|
+
|
|
180
|
+
return rv;
|
|
172
181
|
}
|
|
173
182
|
|
|
174
183
|
/*
|
|
@@ -184,6 +193,7 @@ static VALUE fast_xs_html(VALUE self)
|
|
|
184
193
|
char *s;
|
|
185
194
|
size_t new_len = 0;
|
|
186
195
|
char *new_str;
|
|
196
|
+
VALUE rv;
|
|
187
197
|
|
|
188
198
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
|
189
199
|
if (unlikely(*s == '&'))
|
|
@@ -196,7 +206,8 @@ static VALUE fast_xs_html(VALUE self)
|
|
|
196
206
|
new_len += 1;
|
|
197
207
|
}
|
|
198
208
|
|
|
199
|
-
new_str =
|
|
209
|
+
new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
|
|
210
|
+
: alloca(new_len);
|
|
200
211
|
|
|
201
212
|
#define append_const(buf, x) do { \
|
|
202
213
|
buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
|
|
@@ -217,7 +228,12 @@ static VALUE fast_xs_html(VALUE self)
|
|
|
217
228
|
|
|
218
229
|
#undef append_const
|
|
219
230
|
|
|
220
|
-
|
|
231
|
+
rv = rb_str_new(new_str - new_len, new_len);
|
|
232
|
+
|
|
233
|
+
if (unlikely(new_len > alloca_limit))
|
|
234
|
+
free(new_str - new_len);
|
|
235
|
+
|
|
236
|
+
return rv;
|
|
221
237
|
}
|
|
222
238
|
|
|
223
239
|
#define CGI_URI_OK(x) \
|
|
@@ -226,31 +242,29 @@ static VALUE fast_xs_html(VALUE self)
|
|
|
226
242
|
(x >= '0' && x <= '9') || \
|
|
227
243
|
(x == '.' || x == '-' || x == '_'))
|
|
228
244
|
|
|
229
|
-
|
|
230
|
-
* Compatible with CGI::escape(), this iterates through each byte, so
|
|
231
|
-
* multibyte character sets may not supported (but UTF-8 should be).
|
|
232
|
-
*/
|
|
233
|
-
static VALUE fast_xs_cgi(VALUE self)
|
|
245
|
+
static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
|
|
234
246
|
{
|
|
235
247
|
struct RString *string = RSTRING(self);
|
|
236
248
|
long i;
|
|
237
249
|
char *s;
|
|
238
250
|
size_t new_len = 0;
|
|
239
251
|
char *new_str;
|
|
252
|
+
VALUE rv;
|
|
240
253
|
|
|
241
254
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
|
242
|
-
if (likely(CGI_URI_OK(*s) || *s == ' '))
|
|
255
|
+
if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' ')))
|
|
243
256
|
++new_len;
|
|
244
257
|
else /* we'll only get <= "%FF" here */
|
|
245
258
|
new_len += 3;
|
|
246
259
|
}
|
|
247
260
|
|
|
248
|
-
new_str =
|
|
261
|
+
new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
|
|
262
|
+
: alloca(new_len);
|
|
249
263
|
|
|
250
264
|
for (s = string->ptr, i = string->len; --i >= 0; ++s) {
|
|
251
265
|
if (likely(CGI_URI_OK(*s)))
|
|
252
266
|
*new_str++ = *s;
|
|
253
|
-
else if (*s == ' ')
|
|
267
|
+
else if (space_to_plus && *s == ' ')
|
|
254
268
|
*new_str++ = '+';
|
|
255
269
|
else {
|
|
256
270
|
static const char cgi_digitmap[] = "0123456789ABCDEF";
|
|
@@ -260,13 +274,44 @@ static VALUE fast_xs_cgi(VALUE self)
|
|
|
260
274
|
new_str += 3;
|
|
261
275
|
}
|
|
262
276
|
}
|
|
263
|
-
|
|
277
|
+
|
|
278
|
+
rv = rb_str_new(new_str - new_len, new_len);
|
|
279
|
+
|
|
280
|
+
if (unlikely(new_len > alloca_limit))
|
|
281
|
+
free(new_str - new_len);
|
|
282
|
+
|
|
283
|
+
return rv;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/*
|
|
287
|
+
* Compatible with ERB::Util::url_encode / ERB::Util::u, this iterates
|
|
288
|
+
* through each byte, so multibyte character sets may not supported (but
|
|
289
|
+
* UTF-8 should be).
|
|
290
|
+
*/
|
|
291
|
+
static VALUE fast_xs_url(VALUE self)
|
|
292
|
+
{
|
|
293
|
+
return _xs_uri_encode(self, 0);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/*
|
|
297
|
+
* Compatible with CGI::escape(), this iterates through each byte, so
|
|
298
|
+
* multibyte character sets may not supported (but UTF-8 should be).
|
|
299
|
+
*/
|
|
300
|
+
static VALUE fast_xs_cgi(VALUE self)
|
|
301
|
+
{
|
|
302
|
+
return _xs_uri_encode(self, 1);
|
|
264
303
|
}
|
|
265
304
|
|
|
266
305
|
void Init_fast_xs(void)
|
|
267
306
|
{
|
|
307
|
+
struct rlimit rlim;
|
|
308
|
+
|
|
268
309
|
assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */
|
|
269
310
|
|
|
311
|
+
/* fairly conservative stack estimation IMHO... */
|
|
312
|
+
if (!getrlimit(RLIMIT_STACK, &rlim) && (rlim.rlim_cur > 0x80000))
|
|
313
|
+
alloca_limit = rlim.rlim_cur - (rlim.rlim_cur / 16);
|
|
314
|
+
|
|
270
315
|
unpack_id = rb_intern("unpack");
|
|
271
316
|
U_fmt = rb_str_new("U*", 2);
|
|
272
317
|
C_fmt = rb_str_new("C*", 2);
|
|
@@ -276,4 +321,5 @@ void Init_fast_xs(void)
|
|
|
276
321
|
rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
|
|
277
322
|
rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
|
|
278
323
|
rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
|
|
324
|
+
rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
|
|
279
325
|
}
|
|
@@ -5,11 +5,11 @@ if defined?(CGI)
|
|
|
5
5
|
class CGI
|
|
6
6
|
|
|
7
7
|
def CGI::escapeHTML(value)
|
|
8
|
-
value.
|
|
8
|
+
value.fast_xs_html
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def CGI::escape(value)
|
|
12
|
-
value.
|
|
12
|
+
value.fast_xs_cgi
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
end
|
|
@@ -27,6 +27,14 @@ if defined?(ERB::Util)
|
|
|
27
27
|
module_function :h
|
|
28
28
|
module_function :html_escape
|
|
29
29
|
|
|
30
|
+
def url_encode(value)
|
|
31
|
+
value.to_s.fast_xs_url
|
|
32
|
+
end
|
|
33
|
+
alias u url_encode
|
|
34
|
+
module_function :u
|
|
35
|
+
module_function :url_encode
|
|
36
|
+
|
|
37
|
+
|
|
30
38
|
end
|
|
31
39
|
|
|
32
40
|
end
|
|
@@ -31,5 +31,13 @@ class TestCgiClassOverrides < Test::Unit::TestCase
|
|
|
31
31
|
assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
def test_large_strings
|
|
35
|
+
if ENV['LARGE_STRING_TEST']
|
|
36
|
+
assert CGI::escape('&' * (8192 * 1024))
|
|
37
|
+
assert CGI::escapeHTML('&' * (8192 * 1024))
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|
|
34
42
|
end
|
|
35
43
|
|
|
@@ -24,6 +24,22 @@ class TestErbUtilModuleOverrides < Test::Unit::TestCase
|
|
|
24
24
|
assert_equal "\xEF\xBF\xBF", html_escape("\xEF\xBF\xBF")
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
def test_escape_url
|
|
28
|
+
assert_equal 'hello%3Dworld', url_encode('hello=world')
|
|
29
|
+
assert_equal '%20', url_encode(' ')
|
|
30
|
+
assert_equal '%2B', url_encode('+')
|
|
31
|
+
assert_equal '%2C', url_encode(',')
|
|
32
|
+
assert_equal 'hello-world', url_encode('hello-world')
|
|
33
|
+
assert_equal 'H3LL0%20W0RLD', url_encode('H3LL0 W0RLD')
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_large_strings
|
|
37
|
+
if ENV['LARGE_STRING_TEST']
|
|
38
|
+
assert u('&' * (8192 * 1024))
|
|
39
|
+
assert h('&' * (8192 * 1024))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
27
43
|
end
|
|
28
44
|
|
|
29
45
|
|
data/test/test_xml_escaping.rb
CHANGED
|
@@ -36,4 +36,11 @@ class TestXmlEscaping < Test::Unit::TestCase
|
|
|
36
36
|
assert_equal '©', "\xC2\xA9".fast_xs # copy
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
+
def test_large_document
|
|
40
|
+
if ENV['LARGE_STRING_TEST']
|
|
41
|
+
assert ('&' * (8192 * 1024)).fast_xs
|
|
42
|
+
assert ('a' * (8192 * 1024)).fast_xs
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
39
46
|
end
|
metadata
CHANGED
|
@@ -3,16 +3,16 @@ rubygems_version: 0.9.4.7
|
|
|
3
3
|
specification_version: 2
|
|
4
4
|
name: fast_xs
|
|
5
5
|
version: !ruby/object:Gem::Version
|
|
6
|
-
version: "0.
|
|
7
|
-
date: 2007-12-
|
|
8
|
-
summary:
|
|
6
|
+
version: "0.4"
|
|
7
|
+
date: 2007-12-12 00:00:00 -08:00
|
|
8
|
+
summary: excessively fast escaping
|
|
9
9
|
require_paths:
|
|
10
10
|
- lib/i486-linux
|
|
11
11
|
- lib
|
|
12
12
|
email: normalperson@yhbt.net
|
|
13
13
|
homepage: http://bogonips.org/fast_xs/
|
|
14
14
|
rubyforge_project:
|
|
15
|
-
description:
|
|
15
|
+
description: excessively fast escaping
|
|
16
16
|
autorequire:
|
|
17
17
|
default_executable:
|
|
18
18
|
bindir: bin
|