fast_xs 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ begin
13
13
  rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
14
14
  rescue
15
15
  end
16
- version ||= ENV['VERSION'] || '0.4' + (rev && rev.length > 0 ? ".#{rev}" : '')
16
+ version ||= ENV['VERSION'] || '0.5' + (rev && rev.length > 0 ? ".#{rev}" : '')
17
17
  pkg = "#{name}-#{version}"
18
18
  bin = "*.{so,o}"
19
19
  archlib = "lib/#{::Config::CONFIG['arch']}"
@@ -24,7 +24,7 @@ rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
24
24
  '--inline-source']
25
25
  pkg_files = %w(CHANGELOG COPYING README Rakefile) +
26
26
  Dir.glob("{test,lib}/**/*.rb") +
27
- Dir.glob("ext/**/*.{c,rb}")
27
+ Dir.glob("ext/**/*.{c,rb,h}")
28
28
 
29
29
 
30
30
  spec = Gem::Specification.new do |s|
@@ -4,11 +4,41 @@
4
4
  #include <assert.h>
5
5
  #include <sys/time.h>
6
6
  #include <sys/resource.h>
7
+ #include "ruby_1_9_compat.h"
8
+
9
+ /* I don't trust ctype.h when it comes to locale-independence: */
10
+ static __inline__ int is_hex(const int x)
11
+ {
12
+ return (((x) >= '0' && (x) <= '9') ||
13
+ ((x) >= 'a' && (x) <= 'f') ||
14
+ ((x) >= 'A' && (x) <= 'F'));
15
+ }
16
+
17
+ static __inline__ int xtoupper(const int x)
18
+ {
19
+ return (x >= 'a' && x <= 'f') ? (x & ~0x20) : x;
20
+ }
21
+
22
+ static __inline__ int hexchar_to_int(const int x)
23
+ {
24
+ return (x < 'A') ? (x - '0') : (xtoupper(x) - 'A' + 10);
25
+ }
26
+
27
+ static __inline__ int hexpair_to_int(const int x1, const int x2)
28
+ {
29
+ return ((hexchar_to_int(x1) << 4) | hexchar_to_int(x2));
30
+ }
7
31
 
8
32
  static ID unpack_id;
9
33
  static VALUE U_fmt, C_fmt;
10
34
  static rlim_t alloca_limit = 4096; /* very small default */
11
35
 
36
+ #define xs_alloc(size) \
37
+ unlikely((size) > alloca_limit) ? malloc(size) : alloca(size)
38
+
39
+ #define xs_free(ptr, size) \
40
+ do { if (unlikely((size) > alloca_limit)) free(ptr); } while (0)
41
+
12
42
  /* give GCC hints for better branch prediction
13
43
  * (we layout branches so that ASCII characters are handled faster) */
14
44
  #if defined(__GNUC__) && (__GNUC__ >= 3)
@@ -128,21 +158,6 @@ static size_t escape(char *buf, int n)
128
158
  return 1;
129
159
  }
130
160
 
131
- static long escaped_len(int n)
132
- {
133
- if (likely(n < 128)) {
134
- if (unlikely(n == '&'))
135
- return (sizeof("&amp;") - 1);
136
- if (unlikely(n == '>' || n == '<'))
137
- return (sizeof("&gt;") - 1);
138
- return 1;
139
- }
140
-
141
- CP_1252_ESCAPE(n);
142
-
143
- return VALID_VALUE(n) ? bytes_for(n) : 1;
144
- }
145
-
146
161
  static VALUE unpack_utf8(VALUE self)
147
162
  {
148
163
  return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -156,26 +171,40 @@ static VALUE unpack_uchar(VALUE self)
156
171
  static VALUE fast_xs(VALUE self)
157
172
  {
158
173
  long i;
159
- struct RArray *array;
174
+ VALUE array;
160
175
  char *s, *c;
161
- size_t s_len = 0;
176
+ size_t s_len;
162
177
  VALUE *tmp;
163
178
  VALUE rv;
164
179
 
165
- array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
180
+ array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
166
181
 
167
- for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
168
- s_len += escaped_len(NUM2INT(*tmp));
182
+ for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
183
+ --i >= 0;
184
+ tmp++) {
185
+ int n = NUM2INT(*tmp);
186
+ if (likely(n < 128)) {
187
+ if (unlikely(n == '&'))
188
+ s_len += (sizeof("&amp;") - 2);
189
+ if (unlikely(n == '>' || n == '<'))
190
+ s_len += (sizeof("&gt;") - 2);
191
+ continue;
192
+ }
193
+
194
+ CP_1252_ESCAPE(n);
195
+
196
+ if (VALID_VALUE(n))
197
+ s_len += bytes_for(n) - 1;
198
+ }
169
199
 
170
200
  c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
171
201
 
172
- for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
202
+ for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
173
203
  c += escape(c, NUM2INT(*tmp));
174
204
 
175
205
  rv = rb_str_new(s, s_len);
176
206
 
177
- if (unlikely(s_len > alloca_limit))
178
- free(s);
207
+ xs_free(s, s_len);
179
208
 
180
209
  return rv;
181
210
  }
@@ -188,32 +217,28 @@ static VALUE fast_xs(VALUE self)
188
217
  */
189
218
  static VALUE fast_xs_html(VALUE self)
190
219
  {
191
- struct RString *string = RSTRING(self);
192
220
  long i;
193
221
  char *s;
194
- size_t new_len = 0;
222
+ size_t new_len = RSTRING_LEN(self);
195
223
  char *new_str;
196
224
  VALUE rv;
197
225
 
198
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
226
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
199
227
  if (unlikely(*s == '&'))
200
- new_len += (sizeof("&amp;") - 1);
228
+ new_len += (sizeof("&amp;") - 2);
201
229
  else if (unlikely(*s == '<' || *s == '>'))
202
- new_len += (sizeof("&gt;") - 1);
230
+ new_len += (sizeof("&gt;") - 2);
203
231
  else if (unlikely(*s == '"'))
204
- new_len += (sizeof("&quot;") - 1);
205
- else
206
- new_len += 1;
232
+ new_len += (sizeof("&quot;") - 2);
207
233
  }
208
234
 
209
- new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
210
- : alloca(new_len);
235
+ new_str = xs_alloc(new_len);
211
236
 
212
237
  #define append_const(buf, x) do { \
213
238
  buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
214
239
  } while (0)
215
240
 
216
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
241
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
217
242
  if (unlikely(*s == '&'))
218
243
  append_const(new_str, "&amp;");
219
244
  else if (unlikely(*s == '<'))
@@ -230,8 +255,7 @@ static VALUE fast_xs_html(VALUE self)
230
255
 
231
256
  rv = rb_str_new(new_str - new_len, new_len);
232
257
 
233
- if (unlikely(new_len > alloca_limit))
234
- free(new_str - new_len);
258
+ xs_free(new_str - new_len, new_len);
235
259
 
236
260
  return rv;
237
261
  }
@@ -244,24 +268,21 @@ static VALUE fast_xs_html(VALUE self)
244
268
 
245
269
  static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
246
270
  {
247
- struct RString *string = RSTRING(self);
248
271
  long i;
249
272
  char *s;
250
- size_t new_len = 0;
273
+ size_t new_len = RSTRING_LEN(self);
251
274
  char *new_str;
252
275
  VALUE rv;
253
276
 
254
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
255
- if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' ')))
256
- ++new_len;
257
- else /* we'll only get <= "%FF" here */
258
- new_len += 3;
277
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
278
+ if (likely(CGI_URI_OK(*s)) || (space_to_plus && *s == ' '))
279
+ continue;
280
+ new_len += 2;
259
281
  }
260
282
 
261
- new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
262
- : alloca(new_len);
283
+ new_str = xs_alloc(new_len);
263
284
 
264
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
285
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
265
286
  if (likely(CGI_URI_OK(*s)))
266
287
  *new_str++ = *s;
267
288
  else if (space_to_plus && *s == ' ')
@@ -277,8 +298,7 @@ static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
277
298
 
278
299
  rv = rb_str_new(new_str - new_len, new_len);
279
300
 
280
- if (unlikely(new_len > alloca_limit))
281
- free(new_str - new_len);
301
+ xs_free(new_str - new_len, new_len);
282
302
 
283
303
  return rv;
284
304
  }
@@ -302,6 +322,53 @@ static VALUE fast_xs_cgi(VALUE self)
302
322
  return _xs_uri_encode(self, 1);
303
323
  }
304
324
 
325
+ static VALUE _uxs_uri(VALUE self, const unsigned int plus_to_space)
326
+ {
327
+ char *s, *new_str;
328
+ long i;
329
+ size_t new_len = RSTRING_LEN(self);
330
+ VALUE rv;
331
+
332
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
333
+ --i >= 0;
334
+ ++s) {
335
+ if (unlikely(*s == '%') &&
336
+ likely(is_hex(s[1])) &&
337
+ likely(is_hex(s[2]))) {
338
+ new_len -= 2;
339
+ s += 2;
340
+ i -= 2;
341
+ }
342
+ }
343
+
344
+ new_str = xs_alloc(new_len);
345
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
346
+ --i >= 0;
347
+ ++s, ++new_str) {
348
+ if (plus_to_space && unlikely(*s == '+'))
349
+ *new_str = ' ';
350
+ else if (unlikely(*s == '%') &&
351
+ likely(is_hex(s[1])) &&
352
+ likely(is_hex(s[2]))) {
353
+ *new_str = hexpair_to_int(s[1], s[2]);
354
+ s += 2;
355
+ i -= 2;
356
+ } else
357
+ *new_str = *s;
358
+ }
359
+
360
+ rv = rb_str_new(new_str - new_len, new_len);
361
+
362
+ xs_free(s, new_len);
363
+
364
+ return rv;
365
+ }
366
+
367
+ static VALUE fast_uxs_cgi(VALUE self)
368
+ {
369
+ return _uxs_uri(self, 1);
370
+ }
371
+
305
372
  void Init_fast_xs(void)
306
373
  {
307
374
  struct rlimit rlim;
@@ -321,5 +388,6 @@ void Init_fast_xs(void)
321
388
  rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
322
389
  rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
323
390
  rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
391
+ rb_define_method(rb_cString, "fast_uxs_cgi", fast_uxs_cgi, 0);
324
392
  rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
325
393
  }
@@ -0,0 +1,14 @@
1
+ /* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
2
+ #ifndef RSTRING_PTR
3
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
4
+ #endif
5
+ #ifndef RSTRING_LEN
6
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
7
+ #endif
8
+ #ifndef RARRAY_PTR
9
+ # define RARRAY_PTR(s) (RARRAY(s)->ptr)
10
+ #endif
11
+ #ifndef RARRAY_LEN
12
+ # define RARRAY_LEN(s) (RARRAY(s)->len)
13
+ #endif
14
+
@@ -1,40 +1,32 @@
1
1
  require 'fast_xs'
2
2
 
3
- if defined?(CGI)
4
-
5
- class CGI
6
-
7
- def CGI::escapeHTML(value)
8
- value.fast_xs_html
9
- end
10
-
11
- def CGI::escape(value)
12
- value.fast_xs_cgi
13
- end
14
-
15
- end
16
-
17
- end
18
-
19
- if defined?(ERB::Util)
20
-
21
- module ERB::Util
22
-
23
- def html_escape(value)
24
- value.to_s.fast_xs_html
25
- end
26
- alias h html_escape
27
- module_function :h
28
- module_function :html_escape
29
-
30
- def url_encode(value)
31
- value.to_s.fast_xs_url
32
- end
33
- alias u url_encode
34
- module_function :u
35
- module_function :url_encode
36
-
37
-
38
- end
39
-
40
- end
3
+ class CGI
4
+ def CGI::escapeHTML(value); value.fast_xs_html; end
5
+ def CGI::escape(value); value.fast_xs_cgi; end
6
+ def CGI::unescape(value); value.fast_uxs_cgi; end
7
+ end if defined?(CGI)
8
+
9
+ module ERB::Util
10
+ def html_escape(value); value.to_s.fast_xs_html; end
11
+ alias h html_escape
12
+ module_function :h
13
+ module_function :html_escape
14
+
15
+ def url_encode(value); value.to_s.fast_xs_url; end
16
+ alias u url_encode
17
+ module_function :u
18
+ module_function :url_encode
19
+ end if defined?(ERB::Util)
20
+
21
+ class Mongrel::HttpRequest
22
+ def self.unescape(s); s.fast_uxs_cgi; end
23
+ def self.escape(s); s.to_s.fast_xs_cgi; end
24
+ end if defined?(Mongrel::HttpRequest)
25
+
26
+ module Rack::Utils
27
+ def unescape(s); s.fast_uxs_cgi; end
28
+ module_function :unescape
29
+
30
+ def escape(s); s.to_s.fast_xs_cgi; end
31
+ module_function :escape
32
+ end if defined?(Rack::Utils)
@@ -31,6 +31,15 @@ class TestCgiClassOverrides < Test::Unit::TestCase
31
31
  assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
32
32
  end
33
33
 
34
+ def test_unescape_cgi
35
+ assert_equal 'hello=world', CGI::unescape('hello%3Dworld')
36
+ assert_equal ' ', CGI::unescape('+')
37
+ assert_equal '+', CGI::unescape('%2B')
38
+ assert_equal ',', CGI::unescape('%2C')
39
+ assert_equal 'hello-world', CGI::unescape('hello-world')
40
+ assert_equal 'H3LL0 W0RLD', CGI::unescape('H3LL0+W0RLD')
41
+ end
42
+
34
43
  def test_large_strings
35
44
  if ENV['LARGE_STRING_TEST']
36
45
  assert CGI::escape('&' * (8192 * 1024))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4.7
3
3
  specification_version: 2
4
4
  name: fast_xs
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.4"
7
- date: 2007-12-12 00:00:00 -08:00
6
+ version: "0.5"
7
+ date: 2008-01-07 00:00:00 -08:00
8
8
  summary: excessively fast escaping
9
9
  require_paths:
10
10
  - lib/i486-linux
@@ -47,6 +47,7 @@ files:
47
47
  - lib/fast_xs_monkey_patcher.rb
48
48
  - ext/fast_xs/fast_xs.c
49
49
  - ext/fast_xs/extconf.rb
50
+ - ext/fast_xs/ruby_1_9_compat.h
50
51
  test_files: []
51
52
 
52
53
  rdoc_options: