fast_xs 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/ext/fast_xs/fast_xs.c +116 -48
- data/ext/fast_xs/ruby_1_9_compat.h +14 -0
- data/lib/fast_xs_monkey_patcher.rb +30 -38
- data/test/test_cgi_class_overrides.rb +9 -0
- metadata +3 -2
data/Rakefile
CHANGED
@@ -13,7 +13,7 @@ begin
|
|
13
13
|
rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
|
14
14
|
rescue
|
15
15
|
end
|
16
|
-
version ||= ENV['VERSION'] || '0.
|
16
|
+
version ||= ENV['VERSION'] || '0.5' + (rev && rev.length > 0 ? ".#{rev}" : '')
|
17
17
|
pkg = "#{name}-#{version}"
|
18
18
|
bin = "*.{so,o}"
|
19
19
|
archlib = "lib/#{::Config::CONFIG['arch']}"
|
@@ -24,7 +24,7 @@ rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
|
|
24
24
|
'--inline-source']
|
25
25
|
pkg_files = %w(CHANGELOG COPYING README Rakefile) +
|
26
26
|
Dir.glob("{test,lib}/**/*.rb") +
|
27
|
-
Dir.glob("ext/**/*.{c,rb}")
|
27
|
+
Dir.glob("ext/**/*.{c,rb,h}")
|
28
28
|
|
29
29
|
|
30
30
|
spec = Gem::Specification.new do |s|
|
data/ext/fast_xs/fast_xs.c
CHANGED
@@ -4,11 +4,41 @@
|
|
4
4
|
#include <assert.h>
|
5
5
|
#include <sys/time.h>
|
6
6
|
#include <sys/resource.h>
|
7
|
+
#include "ruby_1_9_compat.h"
|
8
|
+
|
9
|
+
/* I don't trust ctype.h when it comes to locale-independence: */
|
10
|
+
static __inline__ int is_hex(const int x)
|
11
|
+
{
|
12
|
+
return (((x) >= '0' && (x) <= '9') ||
|
13
|
+
((x) >= 'a' && (x) <= 'f') ||
|
14
|
+
((x) >= 'A' && (x) <= 'F'));
|
15
|
+
}
|
16
|
+
|
17
|
+
static __inline__ int xtoupper(const int x)
|
18
|
+
{
|
19
|
+
return (x >= 'a' && x <= 'f') ? (x & ~0x20) : x;
|
20
|
+
}
|
21
|
+
|
22
|
+
static __inline__ int hexchar_to_int(const int x)
|
23
|
+
{
|
24
|
+
return (x < 'A') ? (x - '0') : (xtoupper(x) - 'A' + 10);
|
25
|
+
}
|
26
|
+
|
27
|
+
static __inline__ int hexpair_to_int(const int x1, const int x2)
|
28
|
+
{
|
29
|
+
return ((hexchar_to_int(x1) << 4) | hexchar_to_int(x2));
|
30
|
+
}
|
7
31
|
|
8
32
|
static ID unpack_id;
|
9
33
|
static VALUE U_fmt, C_fmt;
|
10
34
|
static rlim_t alloca_limit = 4096; /* very small default */
|
11
35
|
|
36
|
+
#define xs_alloc(size) \
|
37
|
+
unlikely((size) > alloca_limit) ? malloc(size) : alloca(size)
|
38
|
+
|
39
|
+
#define xs_free(ptr, size) \
|
40
|
+
do { if (unlikely((size) > alloca_limit)) free(ptr); } while (0)
|
41
|
+
|
12
42
|
/* give GCC hints for better branch prediction
|
13
43
|
* (we layout branches so that ASCII characters are handled faster) */
|
14
44
|
#if defined(__GNUC__) && (__GNUC__ >= 3)
|
@@ -128,21 +158,6 @@ static size_t escape(char *buf, int n)
|
|
128
158
|
return 1;
|
129
159
|
}
|
130
160
|
|
131
|
-
static long escaped_len(int n)
|
132
|
-
{
|
133
|
-
if (likely(n < 128)) {
|
134
|
-
if (unlikely(n == '&'))
|
135
|
-
return (sizeof("&") - 1);
|
136
|
-
if (unlikely(n == '>' || n == '<'))
|
137
|
-
return (sizeof(">") - 1);
|
138
|
-
return 1;
|
139
|
-
}
|
140
|
-
|
141
|
-
CP_1252_ESCAPE(n);
|
142
|
-
|
143
|
-
return VALID_VALUE(n) ? bytes_for(n) : 1;
|
144
|
-
}
|
145
|
-
|
146
161
|
static VALUE unpack_utf8(VALUE self)
|
147
162
|
{
|
148
163
|
return rb_funcall(self, unpack_id, 1, U_fmt);
|
@@ -156,26 +171,40 @@ static VALUE unpack_uchar(VALUE self)
|
|
156
171
|
static VALUE fast_xs(VALUE self)
|
157
172
|
{
|
158
173
|
long i;
|
159
|
-
|
174
|
+
VALUE array;
|
160
175
|
char *s, *c;
|
161
|
-
size_t s_len
|
176
|
+
size_t s_len;
|
162
177
|
VALUE *tmp;
|
163
178
|
VALUE rv;
|
164
179
|
|
165
|
-
array =
|
180
|
+
array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
|
166
181
|
|
167
|
-
for (tmp = array
|
168
|
-
|
182
|
+
for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
|
183
|
+
--i >= 0;
|
184
|
+
tmp++) {
|
185
|
+
int n = NUM2INT(*tmp);
|
186
|
+
if (likely(n < 128)) {
|
187
|
+
if (unlikely(n == '&'))
|
188
|
+
s_len += (sizeof("&") - 2);
|
189
|
+
if (unlikely(n == '>' || n == '<'))
|
190
|
+
s_len += (sizeof(">") - 2);
|
191
|
+
continue;
|
192
|
+
}
|
193
|
+
|
194
|
+
CP_1252_ESCAPE(n);
|
195
|
+
|
196
|
+
if (VALID_VALUE(n))
|
197
|
+
s_len += bytes_for(n) - 1;
|
198
|
+
}
|
169
199
|
|
170
200
|
c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
|
171
201
|
|
172
|
-
for (tmp = array
|
202
|
+
for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
|
173
203
|
c += escape(c, NUM2INT(*tmp));
|
174
204
|
|
175
205
|
rv = rb_str_new(s, s_len);
|
176
206
|
|
177
|
-
|
178
|
-
free(s);
|
207
|
+
xs_free(s, s_len);
|
179
208
|
|
180
209
|
return rv;
|
181
210
|
}
|
@@ -188,32 +217,28 @@ static VALUE fast_xs(VALUE self)
|
|
188
217
|
*/
|
189
218
|
static VALUE fast_xs_html(VALUE self)
|
190
219
|
{
|
191
|
-
struct RString *string = RSTRING(self);
|
192
220
|
long i;
|
193
221
|
char *s;
|
194
|
-
size_t new_len =
|
222
|
+
size_t new_len = RSTRING_LEN(self);
|
195
223
|
char *new_str;
|
196
224
|
VALUE rv;
|
197
225
|
|
198
|
-
for (s =
|
226
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
|
199
227
|
if (unlikely(*s == '&'))
|
200
|
-
new_len += (sizeof("&") -
|
228
|
+
new_len += (sizeof("&") - 2);
|
201
229
|
else if (unlikely(*s == '<' || *s == '>'))
|
202
|
-
new_len += (sizeof(">") -
|
230
|
+
new_len += (sizeof(">") - 2);
|
203
231
|
else if (unlikely(*s == '"'))
|
204
|
-
new_len += (sizeof(""") -
|
205
|
-
else
|
206
|
-
new_len += 1;
|
232
|
+
new_len += (sizeof(""") - 2);
|
207
233
|
}
|
208
234
|
|
209
|
-
new_str =
|
210
|
-
: alloca(new_len);
|
235
|
+
new_str = xs_alloc(new_len);
|
211
236
|
|
212
237
|
#define append_const(buf, x) do { \
|
213
238
|
buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
|
214
239
|
} while (0)
|
215
240
|
|
216
|
-
for (s =
|
241
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
|
217
242
|
if (unlikely(*s == '&'))
|
218
243
|
append_const(new_str, "&");
|
219
244
|
else if (unlikely(*s == '<'))
|
@@ -230,8 +255,7 @@ static VALUE fast_xs_html(VALUE self)
|
|
230
255
|
|
231
256
|
rv = rb_str_new(new_str - new_len, new_len);
|
232
257
|
|
233
|
-
|
234
|
-
free(new_str - new_len);
|
258
|
+
xs_free(new_str - new_len, new_len);
|
235
259
|
|
236
260
|
return rv;
|
237
261
|
}
|
@@ -244,24 +268,21 @@ static VALUE fast_xs_html(VALUE self)
|
|
244
268
|
|
245
269
|
static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
|
246
270
|
{
|
247
|
-
struct RString *string = RSTRING(self);
|
248
271
|
long i;
|
249
272
|
char *s;
|
250
|
-
size_t new_len =
|
273
|
+
size_t new_len = RSTRING_LEN(self);
|
251
274
|
char *new_str;
|
252
275
|
VALUE rv;
|
253
276
|
|
254
|
-
for (s =
|
255
|
-
if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' '))
|
256
|
-
|
257
|
-
|
258
|
-
new_len += 3;
|
277
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
|
278
|
+
if (likely(CGI_URI_OK(*s)) || (space_to_plus && *s == ' '))
|
279
|
+
continue;
|
280
|
+
new_len += 2;
|
259
281
|
}
|
260
282
|
|
261
|
-
new_str =
|
262
|
-
: alloca(new_len);
|
283
|
+
new_str = xs_alloc(new_len);
|
263
284
|
|
264
|
-
for (s =
|
285
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
|
265
286
|
if (likely(CGI_URI_OK(*s)))
|
266
287
|
*new_str++ = *s;
|
267
288
|
else if (space_to_plus && *s == ' ')
|
@@ -277,8 +298,7 @@ static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
|
|
277
298
|
|
278
299
|
rv = rb_str_new(new_str - new_len, new_len);
|
279
300
|
|
280
|
-
|
281
|
-
free(new_str - new_len);
|
301
|
+
xs_free(new_str - new_len, new_len);
|
282
302
|
|
283
303
|
return rv;
|
284
304
|
}
|
@@ -302,6 +322,53 @@ static VALUE fast_xs_cgi(VALUE self)
|
|
302
322
|
return _xs_uri_encode(self, 1);
|
303
323
|
}
|
304
324
|
|
325
|
+
static VALUE _uxs_uri(VALUE self, const unsigned int plus_to_space)
|
326
|
+
{
|
327
|
+
char *s, *new_str;
|
328
|
+
long i;
|
329
|
+
size_t new_len = RSTRING_LEN(self);
|
330
|
+
VALUE rv;
|
331
|
+
|
332
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
|
333
|
+
--i >= 0;
|
334
|
+
++s) {
|
335
|
+
if (unlikely(*s == '%') &&
|
336
|
+
likely(is_hex(s[1])) &&
|
337
|
+
likely(is_hex(s[2]))) {
|
338
|
+
new_len -= 2;
|
339
|
+
s += 2;
|
340
|
+
i -= 2;
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
new_str = xs_alloc(new_len);
|
345
|
+
for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
|
346
|
+
--i >= 0;
|
347
|
+
++s, ++new_str) {
|
348
|
+
if (plus_to_space && unlikely(*s == '+'))
|
349
|
+
*new_str = ' ';
|
350
|
+
else if (unlikely(*s == '%') &&
|
351
|
+
likely(is_hex(s[1])) &&
|
352
|
+
likely(is_hex(s[2]))) {
|
353
|
+
*new_str = hexpair_to_int(s[1], s[2]);
|
354
|
+
s += 2;
|
355
|
+
i -= 2;
|
356
|
+
} else
|
357
|
+
*new_str = *s;
|
358
|
+
}
|
359
|
+
|
360
|
+
rv = rb_str_new(new_str - new_len, new_len);
|
361
|
+
|
362
|
+
xs_free(s, new_len);
|
363
|
+
|
364
|
+
return rv;
|
365
|
+
}
|
366
|
+
|
367
|
+
static VALUE fast_uxs_cgi(VALUE self)
|
368
|
+
{
|
369
|
+
return _uxs_uri(self, 1);
|
370
|
+
}
|
371
|
+
|
305
372
|
void Init_fast_xs(void)
|
306
373
|
{
|
307
374
|
struct rlimit rlim;
|
@@ -321,5 +388,6 @@ void Init_fast_xs(void)
|
|
321
388
|
rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
|
322
389
|
rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
|
323
390
|
rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
|
391
|
+
rb_define_method(rb_cString, "fast_uxs_cgi", fast_uxs_cgi, 0);
|
324
392
|
rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
|
325
393
|
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
/* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
|
2
|
+
#ifndef RSTRING_PTR
|
3
|
+
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
4
|
+
#endif
|
5
|
+
#ifndef RSTRING_LEN
|
6
|
+
# define RSTRING_LEN(s) (RSTRING(s)->len)
|
7
|
+
#endif
|
8
|
+
#ifndef RARRAY_PTR
|
9
|
+
# define RARRAY_PTR(s) (RARRAY(s)->ptr)
|
10
|
+
#endif
|
11
|
+
#ifndef RARRAY_LEN
|
12
|
+
# define RARRAY_LEN(s) (RARRAY(s)->len)
|
13
|
+
#endif
|
14
|
+
|
@@ -1,40 +1,32 @@
|
|
1
1
|
require 'fast_xs'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
if defined?(ERB::Util)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
alias u url_encode
|
34
|
-
module_function :u
|
35
|
-
module_function :url_encode
|
36
|
-
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
3
|
+
class CGI
|
4
|
+
def CGI::escapeHTML(value); value.fast_xs_html; end
|
5
|
+
def CGI::escape(value); value.fast_xs_cgi; end
|
6
|
+
def CGI::unescape(value); value.fast_uxs_cgi; end
|
7
|
+
end if defined?(CGI)
|
8
|
+
|
9
|
+
module ERB::Util
|
10
|
+
def html_escape(value); value.to_s.fast_xs_html; end
|
11
|
+
alias h html_escape
|
12
|
+
module_function :h
|
13
|
+
module_function :html_escape
|
14
|
+
|
15
|
+
def url_encode(value); value.to_s.fast_xs_url; end
|
16
|
+
alias u url_encode
|
17
|
+
module_function :u
|
18
|
+
module_function :url_encode
|
19
|
+
end if defined?(ERB::Util)
|
20
|
+
|
21
|
+
class Mongrel::HttpRequest
|
22
|
+
def self.unescape(s); s.fast_uxs_cgi; end
|
23
|
+
def self.escape(s); s.to_s.fast_xs_cgi; end
|
24
|
+
end if defined?(Mongrel::HttpRequest)
|
25
|
+
|
26
|
+
module Rack::Utils
|
27
|
+
def unescape(s); s.fast_uxs_cgi; end
|
28
|
+
module_function :unescape
|
29
|
+
|
30
|
+
def escape(s); s.to_s.fast_xs_cgi; end
|
31
|
+
module_function :escape
|
32
|
+
end if defined?(Rack::Utils)
|
@@ -31,6 +31,15 @@ class TestCgiClassOverrides < Test::Unit::TestCase
|
|
31
31
|
assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_unescape_cgi
|
35
|
+
assert_equal 'hello=world', CGI::unescape('hello%3Dworld')
|
36
|
+
assert_equal ' ', CGI::unescape('+')
|
37
|
+
assert_equal '+', CGI::unescape('%2B')
|
38
|
+
assert_equal ',', CGI::unescape('%2C')
|
39
|
+
assert_equal 'hello-world', CGI::unescape('hello-world')
|
40
|
+
assert_equal 'H3LL0 W0RLD', CGI::unescape('H3LL0+W0RLD')
|
41
|
+
end
|
42
|
+
|
34
43
|
def test_large_strings
|
35
44
|
if ENV['LARGE_STRING_TEST']
|
36
45
|
assert CGI::escape('&' * (8192 * 1024))
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4.7
|
|
3
3
|
specification_version: 2
|
4
4
|
name: fast_xs
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "0.
|
7
|
-
date:
|
6
|
+
version: "0.5"
|
7
|
+
date: 2008-01-07 00:00:00 -08:00
|
8
8
|
summary: excessively fast escaping
|
9
9
|
require_paths:
|
10
10
|
- lib/i486-linux
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- lib/fast_xs_monkey_patcher.rb
|
48
48
|
- ext/fast_xs/fast_xs.c
|
49
49
|
- ext/fast_xs/extconf.rb
|
50
|
+
- ext/fast_xs/ruby_1_9_compat.h
|
50
51
|
test_files: []
|
51
52
|
|
52
53
|
rdoc_options:
|