fast_xs 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ begin
13
13
  rev = Time.at(rev.split("\n")[1].to_i).strftime('%Y%m%d.%H%M%S')
14
14
  rescue
15
15
  end
16
- version ||= ENV['VERSION'] || '0.4' + (rev && rev.length > 0 ? ".#{rev}" : '')
16
+ version ||= ENV['VERSION'] || '0.5' + (rev && rev.length > 0 ? ".#{rev}" : '')
17
17
  pkg = "#{name}-#{version}"
18
18
  bin = "*.{so,o}"
19
19
  archlib = "lib/#{::Config::CONFIG['arch']}"
@@ -24,7 +24,7 @@ rdoc_opts = ['--quiet', '--title', 'fast_xs notes', '--main', 'README',
24
24
  '--inline-source']
25
25
  pkg_files = %w(CHANGELOG COPYING README Rakefile) +
26
26
  Dir.glob("{test,lib}/**/*.rb") +
27
- Dir.glob("ext/**/*.{c,rb}")
27
+ Dir.glob("ext/**/*.{c,rb,h}")
28
28
 
29
29
 
30
30
  spec = Gem::Specification.new do |s|
@@ -4,11 +4,41 @@
4
4
  #include <assert.h>
5
5
  #include <sys/time.h>
6
6
  #include <sys/resource.h>
7
+ #include "ruby_1_9_compat.h"
8
+
9
+ /* I don't trust ctype.h when it comes to locale-independence: */
10
+ static __inline__ int is_hex(const int x)
11
+ {
12
+ return (((x) >= '0' && (x) <= '9') ||
13
+ ((x) >= 'a' && (x) <= 'f') ||
14
+ ((x) >= 'A' && (x) <= 'F'));
15
+ }
16
+
17
+ static __inline__ int xtoupper(const int x)
18
+ {
19
+ return (x >= 'a' && x <= 'f') ? (x & ~0x20) : x;
20
+ }
21
+
22
+ static __inline__ int hexchar_to_int(const int x)
23
+ {
24
+ return (x < 'A') ? (x - '0') : (xtoupper(x) - 'A' + 10);
25
+ }
26
+
27
+ static __inline__ int hexpair_to_int(const int x1, const int x2)
28
+ {
29
+ return ((hexchar_to_int(x1) << 4) | hexchar_to_int(x2));
30
+ }
7
31
 
8
32
  static ID unpack_id;
9
33
  static VALUE U_fmt, C_fmt;
10
34
  static rlim_t alloca_limit = 4096; /* very small default */
11
35
 
36
+ #define xs_alloc(size) \
37
+ unlikely((size) > alloca_limit) ? malloc(size) : alloca(size)
38
+
39
+ #define xs_free(ptr, size) \
40
+ do { if (unlikely((size) > alloca_limit)) free(ptr); } while (0)
41
+
12
42
  /* give GCC hints for better branch prediction
13
43
  * (we layout branches so that ASCII characters are handled faster) */
14
44
  #if defined(__GNUC__) && (__GNUC__ >= 3)
@@ -128,21 +158,6 @@ static size_t escape(char *buf, int n)
128
158
  return 1;
129
159
  }
130
160
 
131
- static long escaped_len(int n)
132
- {
133
- if (likely(n < 128)) {
134
- if (unlikely(n == '&'))
135
- return (sizeof("&amp;") - 1);
136
- if (unlikely(n == '>' || n == '<'))
137
- return (sizeof("&gt;") - 1);
138
- return 1;
139
- }
140
-
141
- CP_1252_ESCAPE(n);
142
-
143
- return VALID_VALUE(n) ? bytes_for(n) : 1;
144
- }
145
-
146
161
  static VALUE unpack_utf8(VALUE self)
147
162
  {
148
163
  return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -156,26 +171,40 @@ static VALUE unpack_uchar(VALUE self)
156
171
  static VALUE fast_xs(VALUE self)
157
172
  {
158
173
  long i;
159
- struct RArray *array;
174
+ VALUE array;
160
175
  char *s, *c;
161
- size_t s_len = 0;
176
+ size_t s_len;
162
177
  VALUE *tmp;
163
178
  VALUE rv;
164
179
 
165
- array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
180
+ array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
166
181
 
167
- for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
168
- s_len += escaped_len(NUM2INT(*tmp));
182
+ for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
183
+ --i >= 0;
184
+ tmp++) {
185
+ int n = NUM2INT(*tmp);
186
+ if (likely(n < 128)) {
187
+ if (unlikely(n == '&'))
188
+ s_len += (sizeof("&amp;") - 2);
189
+ if (unlikely(n == '>' || n == '<'))
190
+ s_len += (sizeof("&gt;") - 2);
191
+ continue;
192
+ }
193
+
194
+ CP_1252_ESCAPE(n);
195
+
196
+ if (VALID_VALUE(n))
197
+ s_len += bytes_for(n) - 1;
198
+ }
169
199
 
170
200
  c = s = unlikely(s_len > alloca_limit) ? malloc(s_len) : alloca(s_len);
171
201
 
172
- for (tmp = array->ptr, i = array->len; --i >= 0; tmp++)
202
+ for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
173
203
  c += escape(c, NUM2INT(*tmp));
174
204
 
175
205
  rv = rb_str_new(s, s_len);
176
206
 
177
- if (unlikely(s_len > alloca_limit))
178
- free(s);
207
+ xs_free(s, s_len);
179
208
 
180
209
  return rv;
181
210
  }
@@ -188,32 +217,28 @@ static VALUE fast_xs(VALUE self)
188
217
  */
189
218
  static VALUE fast_xs_html(VALUE self)
190
219
  {
191
- struct RString *string = RSTRING(self);
192
220
  long i;
193
221
  char *s;
194
- size_t new_len = 0;
222
+ size_t new_len = RSTRING_LEN(self);
195
223
  char *new_str;
196
224
  VALUE rv;
197
225
 
198
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
226
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
199
227
  if (unlikely(*s == '&'))
200
- new_len += (sizeof("&amp;") - 1);
228
+ new_len += (sizeof("&amp;") - 2);
201
229
  else if (unlikely(*s == '<' || *s == '>'))
202
- new_len += (sizeof("&gt;") - 1);
230
+ new_len += (sizeof("&gt;") - 2);
203
231
  else if (unlikely(*s == '"'))
204
- new_len += (sizeof("&quot;") - 1);
205
- else
206
- new_len += 1;
232
+ new_len += (sizeof("&quot;") - 2);
207
233
  }
208
234
 
209
- new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
210
- : alloca(new_len);
235
+ new_str = xs_alloc(new_len);
211
236
 
212
237
  #define append_const(buf, x) do { \
213
238
  buf = memcpy(buf, x, sizeof(x) - 1) + sizeof(x) - 1; \
214
239
  } while (0)
215
240
 
216
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
241
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
217
242
  if (unlikely(*s == '&'))
218
243
  append_const(new_str, "&amp;");
219
244
  else if (unlikely(*s == '<'))
@@ -230,8 +255,7 @@ static VALUE fast_xs_html(VALUE self)
230
255
 
231
256
  rv = rb_str_new(new_str - new_len, new_len);
232
257
 
233
- if (unlikely(new_len > alloca_limit))
234
- free(new_str - new_len);
258
+ xs_free(new_str - new_len, new_len);
235
259
 
236
260
  return rv;
237
261
  }
@@ -244,24 +268,21 @@ static VALUE fast_xs_html(VALUE self)
244
268
 
245
269
  static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
246
270
  {
247
- struct RString *string = RSTRING(self);
248
271
  long i;
249
272
  char *s;
250
- size_t new_len = 0;
273
+ size_t new_len = RSTRING_LEN(self);
251
274
  char *new_str;
252
275
  VALUE rv;
253
276
 
254
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
255
- if (likely(CGI_URI_OK(*s) || (space_to_plus && *s == ' ')))
256
- ++new_len;
257
- else /* we'll only get <= "%FF" here */
258
- new_len += 3;
277
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
278
+ if (likely(CGI_URI_OK(*s)) || (space_to_plus && *s == ' '))
279
+ continue;
280
+ new_len += 2;
259
281
  }
260
282
 
261
- new_str = unlikely(new_len > alloca_limit) ? malloc(new_len)
262
- : alloca(new_len);
283
+ new_str = xs_alloc(new_len);
263
284
 
264
- for (s = string->ptr, i = string->len; --i >= 0; ++s) {
285
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self); --i >= 0; ++s) {
265
286
  if (likely(CGI_URI_OK(*s)))
266
287
  *new_str++ = *s;
267
288
  else if (space_to_plus && *s == ' ')
@@ -277,8 +298,7 @@ static inline VALUE _xs_uri_encode(VALUE self, const unsigned int space_to_plus)
277
298
 
278
299
  rv = rb_str_new(new_str - new_len, new_len);
279
300
 
280
- if (unlikely(new_len > alloca_limit))
281
- free(new_str - new_len);
301
+ xs_free(new_str - new_len, new_len);
282
302
 
283
303
  return rv;
284
304
  }
@@ -302,6 +322,53 @@ static VALUE fast_xs_cgi(VALUE self)
302
322
  return _xs_uri_encode(self, 1);
303
323
  }
304
324
 
325
+ static VALUE _uxs_uri(VALUE self, const unsigned int plus_to_space)
326
+ {
327
+ char *s, *new_str;
328
+ long i;
329
+ size_t new_len = RSTRING_LEN(self);
330
+ VALUE rv;
331
+
332
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
333
+ --i >= 0;
334
+ ++s) {
335
+ if (unlikely(*s == '%') &&
336
+ likely(is_hex(s[1])) &&
337
+ likely(is_hex(s[2]))) {
338
+ new_len -= 2;
339
+ s += 2;
340
+ i -= 2;
341
+ }
342
+ }
343
+
344
+ new_str = xs_alloc(new_len);
345
+ for (s = RSTRING_PTR(self), i = RSTRING_LEN(self);
346
+ --i >= 0;
347
+ ++s, ++new_str) {
348
+ if (plus_to_space && unlikely(*s == '+'))
349
+ *new_str = ' ';
350
+ else if (unlikely(*s == '%') &&
351
+ likely(is_hex(s[1])) &&
352
+ likely(is_hex(s[2]))) {
353
+ *new_str = hexpair_to_int(s[1], s[2]);
354
+ s += 2;
355
+ i -= 2;
356
+ } else
357
+ *new_str = *s;
358
+ }
359
+
360
+ rv = rb_str_new(new_str - new_len, new_len);
361
+
362
+ xs_free(s, new_len);
363
+
364
+ return rv;
365
+ }
366
+
367
+ static VALUE fast_uxs_cgi(VALUE self)
368
+ {
369
+ return _uxs_uri(self, 1);
370
+ }
371
+
305
372
  void Init_fast_xs(void)
306
373
  {
307
374
  struct rlimit rlim;
@@ -321,5 +388,6 @@ void Init_fast_xs(void)
321
388
  rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
322
389
  rb_define_method(rb_cString, "fast_xs_html", fast_xs_html, 0);
323
390
  rb_define_method(rb_cString, "fast_xs_cgi", fast_xs_cgi, 0);
391
+ rb_define_method(rb_cString, "fast_uxs_cgi", fast_uxs_cgi, 0);
324
392
  rb_define_method(rb_cString, "fast_xs_url", fast_xs_url, 0);
325
393
  }
@@ -0,0 +1,14 @@
1
+ /* Ruby 1.8.6+ macros (for compatibility with Ruby 1.9) */
2
+ #ifndef RSTRING_PTR
3
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
4
+ #endif
5
+ #ifndef RSTRING_LEN
6
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
7
+ #endif
8
+ #ifndef RARRAY_PTR
9
+ # define RARRAY_PTR(s) (RARRAY(s)->ptr)
10
+ #endif
11
+ #ifndef RARRAY_LEN
12
+ # define RARRAY_LEN(s) (RARRAY(s)->len)
13
+ #endif
14
+
@@ -1,40 +1,32 @@
1
1
  require 'fast_xs'
2
2
 
3
- if defined?(CGI)
4
-
5
- class CGI
6
-
7
- def CGI::escapeHTML(value)
8
- value.fast_xs_html
9
- end
10
-
11
- def CGI::escape(value)
12
- value.fast_xs_cgi
13
- end
14
-
15
- end
16
-
17
- end
18
-
19
- if defined?(ERB::Util)
20
-
21
- module ERB::Util
22
-
23
- def html_escape(value)
24
- value.to_s.fast_xs_html
25
- end
26
- alias h html_escape
27
- module_function :h
28
- module_function :html_escape
29
-
30
- def url_encode(value)
31
- value.to_s.fast_xs_url
32
- end
33
- alias u url_encode
34
- module_function :u
35
- module_function :url_encode
36
-
37
-
38
- end
39
-
40
- end
3
+ class CGI
4
+ def CGI::escapeHTML(value); value.fast_xs_html; end
5
+ def CGI::escape(value); value.fast_xs_cgi; end
6
+ def CGI::unescape(value); value.fast_uxs_cgi; end
7
+ end if defined?(CGI)
8
+
9
+ module ERB::Util
10
+ def html_escape(value); value.to_s.fast_xs_html; end
11
+ alias h html_escape
12
+ module_function :h
13
+ module_function :html_escape
14
+
15
+ def url_encode(value); value.to_s.fast_xs_url; end
16
+ alias u url_encode
17
+ module_function :u
18
+ module_function :url_encode
19
+ end if defined?(ERB::Util)
20
+
21
+ class Mongrel::HttpRequest
22
+ def self.unescape(s); s.fast_uxs_cgi; end
23
+ def self.escape(s); s.to_s.fast_xs_cgi; end
24
+ end if defined?(Mongrel::HttpRequest)
25
+
26
+ module Rack::Utils
27
+ def unescape(s); s.fast_uxs_cgi; end
28
+ module_function :unescape
29
+
30
+ def escape(s); s.to_s.fast_xs_cgi; end
31
+ module_function :escape
32
+ end if defined?(Rack::Utils)
@@ -31,6 +31,15 @@ class TestCgiClassOverrides < Test::Unit::TestCase
31
31
  assert_equal 'H3LL0+W0RLD', CGI::escape('H3LL0 W0RLD')
32
32
  end
33
33
 
34
+ def test_unescape_cgi
35
+ assert_equal 'hello=world', CGI::unescape('hello%3Dworld')
36
+ assert_equal ' ', CGI::unescape('+')
37
+ assert_equal '+', CGI::unescape('%2B')
38
+ assert_equal ',', CGI::unescape('%2C')
39
+ assert_equal 'hello-world', CGI::unescape('hello-world')
40
+ assert_equal 'H3LL0 W0RLD', CGI::unescape('H3LL0+W0RLD')
41
+ end
42
+
34
43
  def test_large_strings
35
44
  if ENV['LARGE_STRING_TEST']
36
45
  assert CGI::escape('&' * (8192 * 1024))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4.7
3
3
  specification_version: 2
4
4
  name: fast_xs
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.4"
7
- date: 2007-12-12 00:00:00 -08:00
6
+ version: "0.5"
7
+ date: 2008-01-07 00:00:00 -08:00
8
8
  summary: excessively fast escaping
9
9
  require_paths:
10
10
  - lib/i486-linux
@@ -47,6 +47,7 @@ files:
47
47
  - lib/fast_xs_monkey_patcher.rb
48
48
  - ext/fast_xs/fast_xs.c
49
49
  - ext/fast_xs/extconf.rb
50
+ - ext/fast_xs/ruby_1_9_compat.h
50
51
  test_files: []
51
52
 
52
53
  rdoc_options: