zscan 1.0.1 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8baa1419230fe03d1ec41cfa65202dce10d0512c
4
- data.tar.gz: 0cf1335b2d7221ca4c6329dd6e8e2b5b0f3ac59d
3
+ metadata.gz: 998b97db8e9341f3920caa27bf11558954a777ba
4
+ data.tar.gz: 033986f8e4a4086985bca23c84f8acfabd2f5e29
5
5
  SHA512:
6
- metadata.gz: a9ccf8455843673336365dd446f808cb52f1538a53c86cc69d102640e4a6afb40332e3c65f6f7a5a4178895a1095571e4328a501e64797a01a6af1cdc4256549
7
- data.tar.gz: 8c3d109803e0e9d8e91b8787f98190fd7e400613772f57347ff2d0a99725202c8eb3444ba11efc7a606897d78cec5d9932638292c5ba2583fb697f9e7c8c2540
6
+ metadata.gz: a8c23d9f29b57e113a55e46fb024bce0337aa8ee5b8317744ed9ce9c13ddf35e4b1b7af9bbf25d67c6b14e6c30bb7de00c214692cb5fea05c2e0f4fa6116478b
7
+ data.tar.gz: 06ccbc8c793f873a630c4774b746ad8ca1cfa4c2895414871c0b435812bd902e3f75618a6134551b5ab3f35b2838e12c9072e0bb15ff8f6010a7ba2ca4e8a83a
@@ -1,5 +1,5 @@
1
1
  require "strscan"
2
- require_relative "lib/zscan"
2
+ require_relative "../lib/zscan"
3
3
  require "benchmark"
4
4
 
5
5
  s = "word\n"
@@ -0,0 +1,21 @@
1
+ require_relative "../lib/zscan"
2
+ require "benchmark"
3
+
4
+ spec = ZScan.binary_spec do
5
+ int8
6
+ double_le 2
7
+ single_be
8
+ end
9
+
10
+ arr = [1, 1.1, 1.2, 1.3]
11
+ str = arr.pack 'cE2g'
12
+ z = Zscan.new str.b
13
+
14
+ puts 'reference nop group'
15
+ puts Benchmark.measure{ 100000.times{ z.pos = 0 } }
16
+ puts 'ZScan#unpack'
17
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; z.unpack 'cE2g' } }
18
+ puts 'ZScan#scan_binary'
19
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; z.scan_binary spec } }
20
+ puts 'String#unpack'
21
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; str.unpack 'cE2g' } }
@@ -0,0 +1,156 @@
1
+ // GENERATED WITH: rake gen
2
+ #line 2 "ext/bspec_exec.inc"
3
+ __attribute__((__noinline__))
4
+ static VALUE bspec_exec(void** ip, char* s, VALUE a) {
5
+ static void* opcodes[] = { &&BS_RET, &&BS_INT8, &&BS_INT16, &&BS_INT16_SWAP, &&BS_INT32, &&BS_INT32_SWAP, &&BS_INT64, &&BS_INT64_SWAP, &&BS_UINT8, &&BS_UINT16, &&BS_UINT16_SWAP, &&BS_UINT32, &&BS_UINT32_SWAP, &&BS_UINT64, &&BS_UINT64_SWAP, &&BS_SINGLE, &&BS_SINGLE_SWAP, &&BS_DOUBLE, &&BS_DOUBLE_SWAP };
6
+ if (ip == NULL) {
7
+ return (VALUE)opcodes;
8
+ }
9
+ goto **(ip++);
10
+ BS_RET:
11
+ return a;
12
+ BS_INT8:
13
+ {
14
+ uint8_t r = ((uint8_t*)s)[0];
15
+ rb_ary_push(a, INT2FIX(CAST(r, int8_t)));
16
+ s += 1;
17
+ goto **(ip++);
18
+ }
19
+
20
+ BS_INT16:
21
+ {
22
+ uint16_t r = ((uint16_t*)s)[0];
23
+ rb_ary_push(a, INT2FIX(CAST(r, int16_t)));
24
+ s += 2;
25
+ goto **(ip++);
26
+ }
27
+
28
+ BS_INT16_SWAP:
29
+ {
30
+ uint16_t r = swap16(((uint16_t*)s)[0]);
31
+ rb_ary_push(a, INT2FIX(CAST(r, int16_t)));
32
+ s += 2;
33
+ goto **(ip++);
34
+ }
35
+
36
+ BS_INT32:
37
+ {
38
+ uint32_t r = ((uint32_t*)s)[0];
39
+ rb_ary_push(a, INT2NUM(CAST(r, int32_t)));
40
+ s += 4;
41
+ goto **(ip++);
42
+ }
43
+
44
+ BS_INT32_SWAP:
45
+ {
46
+ uint32_t r = swap32(((uint32_t*)s)[0]);
47
+ rb_ary_push(a, INT2NUM(CAST(r, int32_t)));
48
+ s += 4;
49
+ goto **(ip++);
50
+ }
51
+
52
+ BS_INT64:
53
+ {
54
+ uint64_t r = ((uint64_t*)s)[0];
55
+ rb_ary_push(a, INT64toNUM(CAST(r, int64_t)));
56
+ s += 8;
57
+ goto **(ip++);
58
+ }
59
+
60
+ BS_INT64_SWAP:
61
+ {
62
+ uint64_t r = swap64(((uint64_t*)s)[0]);
63
+ rb_ary_push(a, INT64toNUM(CAST(r, int64_t)));
64
+ s += 8;
65
+ goto **(ip++);
66
+ }
67
+
68
+ BS_UINT8:
69
+ {
70
+ uint8_t r = ((uint8_t*)s)[0];
71
+ rb_ary_push(a, INT2FIX(CAST(r, uint8_t)));
72
+ s += 1;
73
+ goto **(ip++);
74
+ }
75
+
76
+ BS_UINT16:
77
+ {
78
+ uint16_t r = ((uint16_t*)s)[0];
79
+ rb_ary_push(a, INT2FIX(CAST(r, uint16_t)));
80
+ s += 2;
81
+ goto **(ip++);
82
+ }
83
+
84
+ BS_UINT16_SWAP:
85
+ {
86
+ uint16_t r = swap16(((uint16_t*)s)[0]);
87
+ rb_ary_push(a, INT2FIX(CAST(r, uint16_t)));
88
+ s += 2;
89
+ goto **(ip++);
90
+ }
91
+
92
+ BS_UINT32:
93
+ {
94
+ uint32_t r = ((uint32_t*)s)[0];
95
+ rb_ary_push(a, UINT64toNUM(r));
96
+ s += 4;
97
+ goto **(ip++);
98
+ }
99
+
100
+ BS_UINT32_SWAP:
101
+ {
102
+ uint32_t r = swap32(((uint32_t*)s)[0]);
103
+ rb_ary_push(a, UINT64toNUM(r));
104
+ s += 4;
105
+ goto **(ip++);
106
+ }
107
+
108
+ BS_UINT64:
109
+ {
110
+ uint64_t r = ((uint64_t*)s)[0];
111
+ rb_ary_push(a, UINT64toNUM(r));
112
+ s += 8;
113
+ goto **(ip++);
114
+ }
115
+
116
+ BS_UINT64_SWAP:
117
+ {
118
+ uint64_t r = swap64(((uint64_t*)s)[0]);
119
+ rb_ary_push(a, UINT64toNUM(r));
120
+ s += 8;
121
+ goto **(ip++);
122
+ }
123
+
124
+ BS_SINGLE:
125
+ {
126
+ uint32_t r = ((uint32_t*)s)[0];
127
+ rb_ary_push(a, DBL2NUM((double)CAST(r, float)));
128
+ s += 4;
129
+ goto **(ip++);
130
+ }
131
+
132
+ BS_SINGLE_SWAP:
133
+ {
134
+ uint32_t r = swap32(((uint32_t*)s)[0]);
135
+ rb_ary_push(a, DBL2NUM((double)CAST(r, float)));
136
+ s += 4;
137
+ goto **(ip++);
138
+ }
139
+
140
+ BS_DOUBLE:
141
+ {
142
+ uint64_t r = ((uint64_t*)s)[0];
143
+ rb_ary_push(a, DBL2NUM(CAST(r, double)));
144
+ s += 8;
145
+ goto **(ip++);
146
+ }
147
+
148
+ BS_DOUBLE_SWAP:
149
+ {
150
+ uint64_t r = swap64(((uint64_t*)s)[0]);
151
+ rb_ary_push(a, DBL2NUM(CAST(r, double)));
152
+ s += 8;
153
+ goto **(ip++);
154
+ }
155
+
156
+ }
@@ -0,0 +1,3 @@
1
+ // GENERATED WITH: rake gen
2
+ const char* bspec_opcode_names[] = {"RET", "INT8", "INT16", "INT16_SWAP", "INT32", "INT32_SWAP", "INT64", "INT64_SWAP", "UINT8", "UINT16", "UINT16_SWAP", "UINT32", "UINT32_SWAP", "UINT64", "UINT64_SWAP", "SINGLE", "SINGLE_SWAP", "DOUBLE", "DOUBLE_SWAP"};
3
+ long bspec_opcode_size = 19;
data/ext/zscan.c CHANGED
@@ -1,23 +1,24 @@
1
1
  #include <ruby/ruby.h>
2
2
  #include <ruby/re.h>
3
3
  #include <ruby/encoding.h>
4
+ #include <ctype.h>
5
+
6
+ // todo infect check
4
7
 
5
8
  typedef struct {
6
- size_t pos;
7
- size_t bytepos;
9
+ long pos;
10
+ long bytepos;
8
11
  } Pos;
9
12
 
10
13
  typedef struct {
11
- size_t pos;
12
- size_t bytepos;
14
+ long pos;
15
+ long bytepos;
13
16
  VALUE s;
14
- size_t stack_i;
15
- size_t stack_cap;
17
+ long stack_i;
18
+ long stack_cap;
16
19
  Pos* stack;
17
20
  } ZScan;
18
21
 
19
- #define P ZScan* p = rb_check_typeddata(self, &zscan_type)
20
-
21
22
  static void zscan_mark(void* pp) {
22
23
  ZScan* p = pp;
23
24
  rb_gc_mark(p->s);
@@ -39,6 +40,8 @@ static const rb_data_type_t zscan_type = {
39
40
  {zscan_mark, zscan_free, zscan_memsize}
40
41
  };
41
42
 
43
+ #define P ZScan* p = rb_check_typeddata(self, &zscan_type)
44
+
42
45
  static VALUE zscan_alloc(VALUE klass) {
43
46
  ZScan* p = ALLOC(ZScan);
44
47
  MEMZERO(p, ZScan, 1);
@@ -66,13 +69,12 @@ static VALUE zscan_pos(VALUE self) {
66
69
 
67
70
  static VALUE zscan_advance(VALUE self, VALUE v_diff) {
68
71
  P;
69
- long signed_n = p->pos + NUM2LONG(v_diff);
70
- if (signed_n < 0) {
72
+ long n = p->pos + NUM2LONG(v_diff);
73
+ if (n < 0) {
71
74
  p->pos = 0;
72
75
  p->bytepos = 0;
73
76
  return self;
74
77
  }
75
- size_t n = signed_n;
76
78
 
77
79
  // because there's no "reverse scan" API, we have a O(n) routine :(
78
80
  if (n < p->pos) {
@@ -82,7 +84,7 @@ static VALUE zscan_advance(VALUE self, VALUE v_diff) {
82
84
 
83
85
  if (n > p->pos) {
84
86
  rb_encoding* enc = rb_enc_get(p->s);
85
- size_t byteend = RSTRING_LEN(p->s);
87
+ long byteend = RSTRING_LEN(p->s);
86
88
  char* ptr = RSTRING_PTR(p->s);
87
89
  for (; p->pos < n && p->bytepos < byteend;) {
88
90
  int n = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
@@ -105,7 +107,7 @@ static VALUE zscan_bytepos(VALUE self) {
105
107
  static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
106
108
  P;
107
109
  long signed_bytepos = NUM2LONG(v_bytepos);
108
- size_t from, to, bytepos;
110
+ long from, to, bytepos;
109
111
 
110
112
  if (signed_bytepos > RSTRING_LEN(p->s)) {
111
113
  bytepos = RSTRING_LEN(p->s);
@@ -127,7 +129,7 @@ static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
127
129
 
128
130
  rb_encoding* enc = rb_enc_get(p->s);
129
131
  char* ptr = RSTRING_PTR(p->s);
130
- size_t diff = 0;
132
+ long diff = 0;
131
133
  for (; from < to;) {
132
134
  int n = rb_enc_mbclen(ptr + from, ptr + to, enc);
133
135
  if (n) {
@@ -154,7 +156,7 @@ static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
154
156
 
155
157
  static VALUE zscan_eos_p(VALUE self) {
156
158
  P;
157
- return (p->bytepos == (size_t)RSTRING_LEN(p->s) ? Qtrue : Qfalse);
159
+ return (p->bytepos == RSTRING_LEN(p->s) ? Qtrue : Qfalse);
158
160
  }
159
161
 
160
162
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
@@ -262,10 +264,13 @@ static VALUE zscan_clear_pos_stack(VALUE self) {
262
264
  return self;
263
265
  }
264
266
 
265
- static VALUE zscan_try(VALUE self) {
266
- if (!rb_block_given_p()) {
267
- rb_raise(rb_eRuntimeError, "need a block");
267
+ #define REQUIRE_BLOCK \
268
+ if (!rb_block_given_p()) {\
269
+ rb_raise(rb_eRuntimeError, "need a block");\
268
270
  }
271
+
272
+ static VALUE zscan_try(VALUE self) {
273
+ REQUIRE_BLOCK;
269
274
  VALUE r;
270
275
  zscan_push(self);
271
276
  r = rb_yield(Qnil);
@@ -277,6 +282,175 @@ static VALUE zscan_try(VALUE self) {
277
282
  return r;
278
283
  }
279
284
 
285
+ static VALUE zscan_zero_or_one(int argc, VALUE* argv, VALUE self) {
286
+ REQUIRE_BLOCK;
287
+ volatile VALUE a = Qnil;
288
+ volatile VALUE r;
289
+ rb_scan_args(argc, argv, "01", &a);
290
+ if (a == Qnil) {
291
+ a = rb_ary_new();
292
+ }
293
+ zscan_push(self);
294
+ r = rb_yield(Qnil);
295
+ if (RTEST(r)) {
296
+ rb_funcall(a, rb_intern("<<"), 1, r);
297
+ zscan_drop(self);
298
+ } else {
299
+ zscan_pop(self);
300
+ }
301
+ return a;
302
+ }
303
+
304
+ static VALUE zscan_zero_or_more(int argc, VALUE* argv, VALUE self) {
305
+ REQUIRE_BLOCK;
306
+ volatile VALUE a = Qnil;
307
+ volatile VALUE r;
308
+ long backpos;
309
+ P;
310
+ rb_scan_args(argc, argv, "01", &a);
311
+ if (a == Qnil) {
312
+ a = rb_ary_new();
313
+ }
314
+ for (;;) {
315
+ zscan_push(self);
316
+ backpos = p->bytepos;
317
+ r = rb_yield(Qnil);
318
+ if (RTEST(r) && backpos != p->bytepos) {
319
+ rb_funcall(a, rb_intern("<<"), 1, r);
320
+ zscan_drop(self);
321
+ } else {
322
+ zscan_pop(self);
323
+ break;
324
+ }
325
+ }
326
+ return a;
327
+ }
328
+
329
+ static VALUE zscan_one_or_more(int argc, VALUE* argv, VALUE self) {
330
+ REQUIRE_BLOCK;
331
+ volatile VALUE a = Qnil;
332
+ volatile VALUE r;
333
+
334
+ r = rb_yield(Qnil);
335
+ if (RTEST(r)) {
336
+ long backpos;
337
+ P;
338
+ rb_scan_args(argc, argv, "01", &a);
339
+ if (a == Qnil) {
340
+ a = rb_ary_new();
341
+ }
342
+
343
+ rb_funcall(a, rb_intern("<<"), 1, r);
344
+ for (;;) {
345
+ zscan_push(self);
346
+ backpos = p->bytepos;
347
+ r = rb_yield(Qnil);
348
+ if (RTEST(r) && backpos != p->bytepos) {
349
+ rb_funcall(a, rb_intern("<<"), 1, r);
350
+ zscan_drop(self);
351
+ } else {
352
+ zscan_pop(self);
353
+ break;
354
+ }
355
+ }
356
+ return a;
357
+ } else {
358
+ return Qnil;
359
+ }
360
+ }
361
+
362
+ VALUE zscan_scan_float(VALUE self) {
363
+ P;
364
+ if (RSTRING_LEN(p->s) == p->bytepos) {
365
+ return Qnil;
366
+ }
367
+
368
+ char* s = RSTRING_PTR(p->s) + p->bytepos;
369
+ if (isspace(s[0])) {
370
+ return Qnil;
371
+ }
372
+ char* e;
373
+ double d = strtod(s, &e);
374
+ if (e == s || e - s > RSTRING_LEN(p->s) - p->bytepos) {
375
+ return Qnil;
376
+ } else {
377
+ // it ok to use advance because the source is ascii compatible
378
+ zscan_advance(self, LONG2NUM(e - s));
379
+ return DBL2NUM(d);
380
+ }
381
+ }
382
+
383
+ static VALUE bspec_big_endian_p(VALUE self) {
384
+ # ifdef DYNAMIC_ENDIAN
385
+ /* for universal binary of NEXTSTEP and MacOS X */
386
+ int init = 1;
387
+ char* p = (char*)&init;
388
+ return p[0] ? Qfalse : Qtrue;
389
+ # elif defined(WORDS_BIGENDIAN)
390
+ return Qtrue;
391
+ #else
392
+ return Qfalse;
393
+ #endif
394
+ }
395
+
396
+ #define GCC_VERSION_SINCE(major, minor, patchlevel) \
397
+ (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
398
+ ((__GNUC__ > (major)) || \
399
+ (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
400
+ (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
401
+
402
+ #if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
403
+ # define swap32(x) __builtin_bswap32(x)
404
+ # define swap64(x) __builtin_bswap64(x)
405
+ #endif
406
+
407
+ #ifndef swap16
408
+ # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
409
+ #endif
410
+
411
+ #ifndef swap32
412
+ # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
413
+ |(((x)>>24)&0xFF) \
414
+ |(((x)&0x0000FF00)<<8) \
415
+ |(((x)&0x00FF0000)>>8) ))
416
+ #endif
417
+
418
+ #ifndef swap64
419
+ # ifdef HAVE_INT64_T
420
+ # define byte_in_64bit(n) ((uint64_t)0xff << (n))
421
+ # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
422
+ |(((x)>>56)&0xFF) \
423
+ |(((x)&byte_in_64bit(8))<<40) \
424
+ |(((x)&byte_in_64bit(48))>>40) \
425
+ |(((x)&byte_in_64bit(16))<<24) \
426
+ |(((x)&byte_in_64bit(40))>>24) \
427
+ |(((x)&byte_in_64bit(24))<<8) \
428
+ |(((x)&byte_in_64bit(32))>>8)))
429
+ # endif
430
+ #endif
431
+
432
+ // NOTE can not use sizeof in preprocessor
433
+ #define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
434
+ #define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
435
+
436
+ #define CAST(var, ty) *((ty*)(&(var)))
437
+
438
+ #include "bspec_exec.inc"
439
+
440
+ static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
441
+ P;
442
+ long s_size = NUM2LONG(rb_iv_get(spec, "@s_size"));
443
+ if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
444
+ return Qnil;
445
+ }
446
+ VALUE code = rb_iv_get(spec, "@code");
447
+ long a_size = RSTRING_LEN(code) / sizeof(void*);
448
+ volatile VALUE a = rb_ary_new2(a_size);
449
+ bspec_exec((void**)RSTRING_PTR(code), RSTRING_PTR(p->s) + p->bytepos, a);
450
+ zscan_bytepos_eq(self, LONG2NUM(p->bytepos + s_size));
451
+ return a;
452
+ }
453
+
280
454
  void Init_zscan() {
281
455
  VALUE zscan = rb_define_class("ZScan", rb_cObject);
282
456
  rb_define_alloc_func(zscan, zscan_alloc);
@@ -295,5 +469,23 @@ void Init_zscan() {
295
469
  rb_define_method(zscan, "drop", zscan_drop, 0);
296
470
  rb_define_method(zscan, "restore", zscan_restore, 0);
297
471
  rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
472
+
298
473
  rb_define_method(zscan, "try", zscan_try, 0);
474
+ rb_define_method(zscan, "zero_or_one", zscan_zero_or_one, -1);
475
+ rb_define_method(zscan, "zero_or_more", zscan_zero_or_more, -1);
476
+ rb_define_method(zscan, "one_or_more", zscan_one_or_more, -1);
477
+
478
+ rb_define_method(zscan, "scan_float", zscan_scan_float, 0);
479
+ rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
480
+
481
+ VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
482
+ rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
483
+
484
+ # include "bspec_opcode_names.inc"
485
+ void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
486
+ for (long i = 0; i < bspec_opcode_size; i++) {
487
+ VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
488
+ OBJ_FREEZE(bytecode);
489
+ rb_define_const(bs, bspec_opcode_names[i], bytecode);
490
+ }
299
491
  }
data/lib/zscan.rb CHANGED
@@ -1,10 +1,17 @@
1
1
  require_relative "../ext/zscan"
2
+ require_relative "zscan/instructions"
3
+ require "date"
2
4
 
3
5
  class ZScan
4
- VERSION = '1.0.1'
6
+ VERSION = '1.1'
5
7
 
6
8
  def initialize s, dup=false
7
- _internal_init dup ? s.dup : s
9
+ if s.encoding.ascii_compatible?
10
+ s = dup ? s.dup : s
11
+ else
12
+ s = s.encode 'utf-8'
13
+ end
14
+ _internal_init s
8
15
  end
9
16
 
10
17
  def string
@@ -17,6 +24,76 @@ class ZScan
17
24
  end
18
25
  end
19
26
 
27
+ def scan_int radix=nil
28
+ negative = false
29
+ r = try do
30
+ negative = (scan(/[+\-]/) == '-')
31
+ if radix.nil?
32
+ radix =
33
+ if scan(/0b/i)
34
+ 2
35
+ elsif scan(/0x/i)
36
+ 16
37
+ elsif scan('0')
38
+ 8
39
+ else
40
+ 10
41
+ end
42
+ end
43
+ scan \
44
+ case radix
45
+ when 2; /[01]+/
46
+ when 8; /[0-7]+/
47
+ when 10; /\d+/
48
+ when 16; /\h+/i
49
+ else
50
+ if radix < 10
51
+ /[0-#{radix}]+/
52
+ elsif radix > 36
53
+ raise ArgumentError, "invalid radix #{radix}"
54
+ else
55
+ end_char = ('a'.ord + (radix - 11)).chr
56
+ /[\da-#{end_char}]+/i
57
+ end
58
+ end
59
+ end
60
+ if r
61
+ r = r.to_i radix
62
+ negative ? -r : r
63
+ end
64
+ end
65
+
66
+ def scan_date format, start=Date::ITALY
67
+ s = rest
68
+ d = DateTime._strptime s, format
69
+ if d
70
+ # XXX need 2 parses because the handling is very complex ...
71
+ dt = DateTime.strptime s, format, start rescue return nil
72
+
73
+ len = s.bytesize
74
+ if leftover = d[:leftover]
75
+ len -= leftover.bytesize
76
+ end
77
+ self.bytepos += len
78
+
79
+ dt
80
+ end
81
+ end
82
+
83
+ def unpack format
84
+ if format.index('@')
85
+ raise ArgumentError, 'position instruction @ not supported'
86
+ end
87
+ r = rest.unpack format
88
+ if r.index(nil)
89
+ return
90
+ end
91
+ # XXX pack to get parsed length because no related API is exposed ...
92
+ len = r.pack(format).bytesize
93
+ self.bytepos += len
94
+ r
95
+ end
96
+
20
97
  def pos= new_pos
21
98
  advance new_pos - pos
22
99
  end
@@ -57,6 +134,26 @@ class ZScan
57
134
  _internal_string.bytesize
58
135
  end
59
136
 
137
+ def line_index
138
+ _internal_string.byteslice(0, bytepos).count "\n"
139
+ end
140
+
141
+ def self.binary_spec &p
142
+ bs = BinarySpec.new
143
+ bs.instance_eval &p
144
+ bs.instance_variable_get(:@code) << BinarySpec::RET
145
+ bs
146
+ end
147
+
148
+ class BinarySpec
149
+ BLANK = ''.force_encoding 'binary'
150
+
151
+ def initialize
152
+ @code = BLANK.dup
153
+ @s_size = 0
154
+ end
155
+ end
156
+
60
157
  private :_internal_init, :_internal_string
61
158
  end
62
159
 
@@ -0,0 +1,165 @@
1
+ # GENERATED WITH: rake gen
2
+ class ZScan::BinarySpec
3
+ def int8 n=1
4
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
5
+ n.times do
6
+ @code << INT8
7
+ @s_size += 1
8
+ end
9
+ end
10
+ def int16 n=1
11
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
12
+ n.times do
13
+ @code << INT16
14
+ @s_size += 2
15
+ end
16
+ end
17
+ def int16_swap n=1
18
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
19
+ n.times do
20
+ @code << INT16_SWAP
21
+ @s_size += 2
22
+ end
23
+ end
24
+ def int32 n=1
25
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
26
+ n.times do
27
+ @code << INT32
28
+ @s_size += 4
29
+ end
30
+ end
31
+ def int32_swap n=1
32
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
33
+ n.times do
34
+ @code << INT32_SWAP
35
+ @s_size += 4
36
+ end
37
+ end
38
+ def int64 n=1
39
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
40
+ n.times do
41
+ @code << INT64
42
+ @s_size += 8
43
+ end
44
+ end
45
+ def int64_swap n=1
46
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
47
+ n.times do
48
+ @code << INT64_SWAP
49
+ @s_size += 8
50
+ end
51
+ end
52
+ def uint8 n=1
53
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
54
+ n.times do
55
+ @code << UINT8
56
+ @s_size += 1
57
+ end
58
+ end
59
+ def uint16 n=1
60
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
61
+ n.times do
62
+ @code << UINT16
63
+ @s_size += 2
64
+ end
65
+ end
66
+ def uint16_swap n=1
67
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
68
+ n.times do
69
+ @code << UINT16_SWAP
70
+ @s_size += 2
71
+ end
72
+ end
73
+ def uint32 n=1
74
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
75
+ n.times do
76
+ @code << UINT32
77
+ @s_size += 4
78
+ end
79
+ end
80
+ def uint32_swap n=1
81
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
82
+ n.times do
83
+ @code << UINT32_SWAP
84
+ @s_size += 4
85
+ end
86
+ end
87
+ def uint64 n=1
88
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
89
+ n.times do
90
+ @code << UINT64
91
+ @s_size += 8
92
+ end
93
+ end
94
+ def uint64_swap n=1
95
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
96
+ n.times do
97
+ @code << UINT64_SWAP
98
+ @s_size += 8
99
+ end
100
+ end
101
+ def single n=1
102
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
103
+ n.times do
104
+ @code << SINGLE
105
+ @s_size += 4
106
+ end
107
+ end
108
+ def single_swap n=1
109
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
110
+ n.times do
111
+ @code << SINGLE_SWAP
112
+ @s_size += 4
113
+ end
114
+ end
115
+ def double n=1
116
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
117
+ n.times do
118
+ @code << DOUBLE
119
+ @s_size += 8
120
+ end
121
+ end
122
+ def double_swap n=1
123
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
124
+ n.times do
125
+ @code << DOUBLE_SWAP
126
+ @s_size += 8
127
+ end
128
+ end
129
+ if ZScan::BinarySpec.big_endian?
130
+ alias int16_be int16
131
+ alias int16_le int16_swap
132
+ alias int32_be int32
133
+ alias int32_le int32_swap
134
+ alias int64_be int64
135
+ alias int64_le int64_swap
136
+ alias uint16_be uint16
137
+ alias uint16_le uint16_swap
138
+ alias uint32_be uint32
139
+ alias uint32_le uint32_swap
140
+ alias uint64_be uint64
141
+ alias uint64_le uint64_swap
142
+ alias single_be single
143
+ alias single_le single_swap
144
+ alias double_be double
145
+ alias double_le double_swap
146
+ else
147
+ alias int16_le int16
148
+ alias int16_be int16_swap
149
+ alias int32_le int32
150
+ alias int32_be int32_swap
151
+ alias int64_le int64
152
+ alias int64_be int64_swap
153
+ alias uint16_le uint16
154
+ alias uint16_be uint16_swap
155
+ alias uint32_le uint32
156
+ alias uint32_be uint32_swap
157
+ alias uint64_le uint64
158
+ alias uint64_be uint64_swap
159
+ alias single_le single
160
+ alias single_be single_swap
161
+ alias double_le double
162
+ alias double_be double_swap
163
+ end
164
+ undef int16_swap, int32_swap, int64_swap, uint16_swap, uint32_swap, uint64_swap, single_swap, double_swap
165
+ end
data/rakefile CHANGED
@@ -4,6 +4,60 @@ version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
4
4
  gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
5
5
  gem_package = "zscan-#{version}.gem"
6
6
 
7
+ bspec_types = %w[INT8 INT16 INT32 INT64 UINT8 UINT16 UINT32 UINT64 SINGLE DOUBLE]
8
+ bspec_insns = bspec_types.flat_map{|ty|
9
+ if ty =~ /INT8/
10
+ ty
11
+ else
12
+ [ty, "#{ty}_SWAP"]
13
+ end
14
+ }
15
+ def bspec_incr ins
16
+ case ins
17
+ when /INT(\d+)/; $1.to_i / 8
18
+ when /SINGLE/; 4
19
+ when /DOUBLE/; 8
20
+ else; raise 'bad'
21
+ end
22
+ end
23
+ def bspec_c_type ins
24
+ case ins
25
+ when /(U?INT\d+)/; "#{$1.downcase}_t"
26
+ when /SINGLE/; 'float'
27
+ when /DOUBLE/; 'double'
28
+ else; raise 'bad'
29
+ end
30
+ end
31
+ def bspec_extract ins
32
+ type = bspec_c_type ins
33
+ len = bspec_incr(ins) * 8
34
+ r = "((uint#{len}_t*)s)[0]"
35
+ if ins.end_with?('SWAP')
36
+ r = "swap#{len}(#{r})"
37
+ end
38
+ "uint#{len}_t r = #{r}"
39
+ end
40
+ def bspec_convert ins
41
+ case ins
42
+ when /(U)?INT64|UINT32/
43
+ if ins.start_with?('U')
44
+ "UINT64toNUM(r)"
45
+ else
46
+ "INT64toNUM(CAST(r, int64_t))"
47
+ end
48
+ when /INT32/
49
+ "INT2NUM(CAST(r, int32_t))"
50
+ when /INT(16|8)/
51
+ "INT2FIX(CAST(r, #{bspec_c_type ins}))"
52
+ when /SINGLE/
53
+ "DBL2NUM((double)CAST(r, float))"
54
+ when /DOUBLE/
55
+ "DBL2NUM(CAST(r, double))"
56
+ else
57
+ raise 'bad'
58
+ end
59
+ end
60
+
7
61
  desc "build and test"
8
62
  task :default => [:test, gem_package]
9
63
 
@@ -30,3 +84,86 @@ file gem_package => gem_files do
30
84
  end
31
85
  sh "gem build zscan.gemspec"
32
86
  end
87
+
88
+ desc "generate files"
89
+ task :gen => %w[ext/bspec_exec.inc ext/bspec_opcode_names.inc lib/zscan/instructions.rb]
90
+
91
+ file 'ext/bspec_exec.inc' => __FILE__ do
92
+ puts "generating ext/bspec_exec.inc"
93
+ opcode_list = bspec_insns.map do |ins|
94
+ "&&BS_#{ins}"
95
+ end.join ', '
96
+
97
+ opcode_segs = bspec_insns.map do |ins|
98
+ %Q{BS_#{ins}:
99
+ {
100
+ #{bspec_extract ins};
101
+ rb_ary_push(a, #{bspec_convert ins});
102
+ s += #{bspec_incr ins};
103
+ goto **(ip++);
104
+ }
105
+ }
106
+ end.join "\n"
107
+
108
+ File.open 'ext/bspec_exec.inc', 'w' do |f|
109
+ f.puts %Q|// GENERATED WITH: rake gen
110
+ #line 2 "ext/bspec_exec.inc"
111
+ __attribute__((__noinline__))
112
+ static VALUE bspec_exec(void** ip, char* s, VALUE a) {
113
+ static void* opcodes[] = { &&BS_RET, #{opcode_list} };
114
+ if (ip == NULL) {
115
+ return (VALUE)opcodes;
116
+ }
117
+ goto **(ip++);
118
+ BS_RET:
119
+ return a;
120
+ #{opcode_segs}
121
+ }|
122
+ end
123
+ end
124
+
125
+ file 'ext/bspec_opcode_names.inc' => __FILE__ do
126
+ puts 'generating ext/bspec_opcode_names.inc'
127
+ opcode_names = bspec_insns.map(&:inspect).join ', '
128
+ File.open 'ext/bspec_opcode_names.inc', 'w' do |f|
129
+ f.puts "// GENERATED WITH: rake gen"
130
+ f.puts %Q|const char* bspec_opcode_names[] = {"RET", #{opcode_names}};|
131
+ f.puts %Q|long bspec_opcode_size = #{bspec_insns.size + 1};|
132
+ end
133
+ end
134
+
135
+ file 'lib/zscan/instructions.rb' => __FILE__ do
136
+ puts 'generating lib/zscan/instructions.rb'
137
+ File.open 'lib/zscan/instructions.rb', 'w' do |f|
138
+ f.puts "# GENERATED WITH: rake gen"
139
+ f.puts "class ZScan::BinarySpec"
140
+
141
+ bspec_insns.each do |ins|
142
+ f.puts <<-RUBY
143
+ def #{ins.downcase} n=1
144
+ raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
145
+ n.times do
146
+ @code << #{ins}
147
+ @s_size += #{bspec_incr ins}
148
+ end
149
+ end
150
+ RUBY
151
+ end
152
+
153
+ alias_ins = (bspec_types - ['INT8', 'UINT8']).map &:downcase
154
+ f.puts " if ZScan::BinarySpec.big_endian?"
155
+ alias_ins.each do |ins|
156
+ f.puts " alias #{ins}_be #{ins}"
157
+ f.puts " alias #{ins}_le #{ins}_swap"
158
+ end
159
+ f.puts " else"
160
+ alias_ins.each do |ins|
161
+ f.puts " alias #{ins}_le #{ins}"
162
+ f.puts " alias #{ins}_be #{ins}_swap"
163
+ end
164
+ f.puts " end"
165
+ swap_ins = alias_ins.map{|ins| "#{ins}_swap"}
166
+ f.puts " undef #{swap_ins.join ', '}"
167
+ f.puts "end"
168
+ end
169
+ end
data/readme.md CHANGED
@@ -4,6 +4,7 @@
4
4
  - `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
5
5
  - Correctly scans anchors and look behind predicates.
6
6
  - Pos stack manipulation.
7
+ - Typed scanning methods: `#scan_float`, `#scan_int radix=nil`, `#scan_date format`, `#scan_binary format`.
7
8
 
8
9
  ## Install
9
10
 
@@ -22,7 +23,7 @@ z.scan /\w+/ #=> 'world'
22
23
  z.eos? #=> true
23
24
  ```
24
25
 
25
- ## Motivation
26
+ ## Motivation - `StringScanner`
26
27
 
27
28
  Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
28
29
 
@@ -46,41 +47,112 @@ z.scan /^/ #=> nil
46
47
 
47
48
  See also https://bugs.ruby-lang.org/issues/7092
48
49
 
50
+ ## Other motivations - `scanf` / `strptime` / `unpack`
51
+
52
+ - For scan and convert, ruby's stdlib `Scanf` is slow (creates regexp array everytime called) and not possible to corporate with scanner.
53
+ - For date parsing, `strptime` doesn't tell the parsed length.
54
+ - For binary parsing, `unpack` is an slow interpreter, and the instructions are quite irregular.
55
+
49
56
  ## Essential methods
50
57
 
51
58
  - `ZScan.new string, dup=false`
52
59
  - `#scan regexp_or_string`
53
60
  - `#skip regexp_or_string`
54
61
  - `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
62
+ - `#scan_float` scan a float number which is not starting with space. It deals with multibyte encodings for you.
63
+ - `#scan_int radix=nil` if radix is nil, decide base by prefix: `0x` is 16, `0` is 8, `0b` is 2, otherwise 10. `radix` should be in range `2..36`.
64
+ - `#scan_date format_string, start=Date::ITALY` scan a `DateTime` object, see also [strptime](http://rubydoc.info/stdlib/date/DateTime.strptime).
65
+ - `#scan_binary binary_spec` optimized and readable binary scan, see below for how to create a `ZScan::BinarySpec`.
66
+ - `#unpack format_string`
55
67
  - `#eos?`
56
68
  - `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
57
69
  - `#rest`
58
70
 
71
+ ## String delegates
72
+
73
+ For convienience
74
+
75
+ - `#<< append_string`
76
+ - `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
77
+ - `#size`
78
+ - `#bytesize`
79
+
80
+ ## Parsing combinators
81
+
82
+ Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration. Can be nested, useful for building parsers.
83
+
84
+ - `#try &block` returns `block`'s return.
85
+ - `#zero_or_one result=[], &block` try to execute 0 or 1 time, returns `result`.
86
+ - `#zero_or_more result=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `result`.
87
+ - `#one_or_more result=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `result`.
88
+
59
89
  ## Pos management
60
90
 
61
91
  - `#pos`
62
92
  - `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
63
93
  - `#bytepos`
64
94
  - `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
95
+ - `#line_index` line index of current position, start from `0`.
65
96
  - `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
66
97
  - `#reset` go to beginning.
67
98
  - `#terminate` go to end of string.
68
99
 
69
- ## Efficient pos stack manipulation
100
+ ## (Low level) Efficient pos stack manipulation
70
101
 
71
102
  - `#push` push current pos into the stack.
72
103
  - `#pop` set current pos to top of the stack, and pop it.
73
104
  - `#drop` drop top of pos stack without changing current pos.
74
105
  - `#restore` set current pos to top of the stack.
75
106
  - `#clear_pos_stack` clear pos stack.
76
- - `#try` try to do several scans in the given block, fall back to init pos if block returns `nil` or `false`. Returns block's return, can be nested.
77
107
 
78
- ## String delegates
108
+ ## `ZScan::BinarySpec`
79
109
 
80
- - `#<< append_string`
81
- - `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
82
- - `#size`
83
- - `#bytesize`
110
+ Specify a sequence of binary data. Designed for binary protocol parsing. Example:
111
+
112
+ ```ruby
113
+ # create a ZScan::BinarySpec
114
+ s = ZScan.binary_spec do
115
+ int8 # once
116
+ uint32_le 2 # little endian, twice
117
+ double_be 1 # big endian, once
118
+ end
119
+ z = ZScan.new [-1, 2, 3, 4.0].pack('cI<2G') + "rest"
120
+ z.scan_binary s #=> [-1, 2, 3, 4.0]
121
+ z.rest #=> 'rest
122
+ ```
123
+
124
+ Integer instructions:
125
+
126
+ ```ruby
127
+ int8 uint8
128
+ int16 uint16 int16_le uint16_le int16_be uint16_be
129
+ int32 uint32 int32_le uint32_le int32_be uint32_be
130
+ int64 uint64 int64_le uint64_le int64_be uint64_be
131
+ ```
132
+
133
+ Single precision float instructions:
134
+
135
+ ```ruby
136
+ single single_le single_be
137
+ ```
138
+
139
+ Double precision float instructions:
140
+
141
+ ```ruby
142
+ double double_le double_be
143
+ ```
144
+
145
+ Endians:
146
+
147
+ - (without endian suffix) native endian
148
+ - `*_le` little endian (VAX, x86, Windows string code unit)
149
+ - `*_be` big endian, network endian (SPARC, Java string code unit)
150
+
151
+ Repeat count must be integer `>= 1`, default is `1`.
152
+
153
+ It is implemented as a direct-threaded bytecode interpreter. Performance vs `String#unpack`:
154
+
155
+ todo
84
156
 
85
157
  ## License
86
158
 
@@ -0,0 +1,28 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe 'ZScan binary scanning methods' do
4
+ it "#unpack" do
5
+ z = ZScan.new "\x01\x02\x03"
6
+ assert_raise ArgumentError do
7
+ z.unpack '@1C'
8
+ end
9
+ assert_equal [1, 2], (z.unpack 'CC')
10
+ assert_equal 2, z.pos
11
+ assert_equal nil, (z.unpack 'I')
12
+ assert_equal 2, z.pos
13
+ end
14
+
15
+ it "#scan_binary" do
16
+ s = ZScan.binary_spec do
17
+ int8 # once
18
+ uint32_le 2 # little endian, twice
19
+ double_be 1 # big endian, once
20
+ single 1
21
+ end
22
+ a = [-1, 2, 3, 4.0, 3.0]
23
+ z = ZScan.new(a.pack('cI<2Gf') + 'rest')
24
+ b = z.scan_binary s
25
+ assert_equal 'rest', z.rest
26
+ assert_equal a, b
27
+ end
28
+ end
@@ -0,0 +1,52 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe 'ZScan combinators' do
4
+ it "#try restores pos" do
5
+ z = ZScan.new "hello"
6
+ return1 = z.try do
7
+ z.scan 'h'
8
+ z.scan 'e'
9
+ end
10
+ assert_equal 'e', return1
11
+ assert_equal 2, z.pos
12
+
13
+ return2 = z.try do
14
+ z.scan 'l'
15
+ z.scan 'l'
16
+ z.scan 'p' # fails
17
+ end
18
+ assert_equal nil, return2
19
+ assert_equal 2, z.pos
20
+ end
21
+
22
+ it "#zero_or_one" do
23
+ z = Zscan.new "aab"
24
+ assert_equal ['a'], z.zero_or_one{z.scan 'a'}
25
+ assert_equal 1, z.pos
26
+
27
+ z = Zscan.new 'aab'
28
+ assert_equal [], z.zero_or_one{z.scan 'b'}
29
+ assert_equal 0, z.pos
30
+ end
31
+
32
+ it "#zero_or_more" do
33
+ z = Zscan.new "aab"
34
+ assert_equal ['a', 'a'], z.zero_or_more{z.scan 'a'}
35
+ assert_equal 2, z.pos
36
+
37
+ assert_equal 'aab', z.zero_or_more('aa'){z.scan 'c'; z.scan 'b'}
38
+
39
+ z = Zscan.new 'aab'
40
+ assert_equal [], z.zero_or_more{z.scan 'b'}
41
+ assert_equal 0, z.pos
42
+ end
43
+
44
+ it "#one_or_more" do
45
+ z = Zscan.new 'aab'
46
+ assert_equal ['a', 'a'], z.one_or_more{z.scan 'a'}
47
+ assert_equal 2, z.pos
48
+
49
+ z = Zscan.new 'aab'
50
+ assert_equal nil, z.one_or_more([]){z.scan 'b'}
51
+ end
52
+ end
@@ -0,0 +1,8 @@
1
+ require_relative "../lib/zscan"
2
+ require 'rspec/autorun'
3
+ RSpec.configure do |config|
4
+ config.expect_with :stdlib
5
+ config.before :all do
6
+ # GC.stress = true
7
+ end
8
+ end
@@ -0,0 +1,48 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe "typed scan" do
4
+ it "#scan_int" do
5
+ z = Zscan.new " 1 0b10F5 10 030"
6
+ assert_equal nil, z.scan_int
7
+ z.advance 1
8
+ assert_equal 1, z.scan_int(10)
9
+
10
+ z.advance 1
11
+ assert_equal 0b10, z.scan_int
12
+ assert_equal 0xF5, z.scan_int(16)
13
+
14
+ z.advance 1
15
+ assert_equal 12, z.scan_int(12)
16
+
17
+ z.advance 1
18
+ assert_equal 030, z.scan_int
19
+ end
20
+
21
+ it "#scan_float" do
22
+ z = Zscan.new " -3.5e23"
23
+ assert_equal nil, z.scan_float
24
+ z.advance 1
25
+ assert_equal -3.5e23, z.scan_float
26
+ end
27
+
28
+ it "won't overflow in #scan_float" do
29
+ s = '1.23E15'.byteslice 0, 4
30
+ z = Zscan.new s
31
+ assert_equal 1.23, z.scan_float
32
+ assert_equal 4, z.pos
33
+ end
34
+
35
+ it "#scan_date" do
36
+ z = Zscan.new " 2001 04 6 04 05 06 +7 231rest"
37
+ assert_equal nil, z.scan_date('%Y %U %w %H %M %S %z %N')
38
+ z.advance 1
39
+
40
+ d = z.scan_date '%Y %U %w %H %M %S %z %N'
41
+ assert_equal 0.231, d.sec_fraction
42
+ assert_equal 'rest', z.rest
43
+
44
+ z.pos = 1
45
+ z.scan_date '%Y %U %w ahoy %H %M %S %z' # bad format
46
+ assert_equal 1, z.pos
47
+ end
48
+ end
data/spec/zscan_spec.rb CHANGED
@@ -1,18 +1,10 @@
1
- require_relative "../lib/zscan"
2
- require 'rspec/autorun'
3
- RSpec.configure do |config|
4
- config.expect_with :stdlib
5
- end
1
+ require_relative "spec_helper"
6
2
 
7
3
  describe ZScan do
8
4
  before :each do
9
5
  @z = ZScan.new 'ab你好'
10
6
  end
11
7
 
12
- before :all do
13
- GC.stress = true
14
- end
15
-
16
8
  it "random workflow" do
17
9
  assert_equal 2, @z.match_bytesize('ab')
18
10
  @z.pos = 4
@@ -76,21 +68,23 @@ describe ZScan do
76
68
  assert_equal 3, @z.pos
77
69
  end
78
70
 
79
- it "#try restores pos" do
80
- z = ZScan.new "hello"
81
- return1 = z.try do
82
- z.scan 'h'
83
- z.scan 'e'
84
- end
85
- assert_equal 'e', return1
86
- assert_equal 2, z.pos
71
+ it "#reset, #terminate and #line_index" do
72
+ z = ZScan.new ''
73
+ assert_equal 0, z.line_index
74
+ z.terminate
75
+ assert_equal 0, z.line_index
76
+ z.reset
77
+ assert_equal 0, z.line_index
87
78
 
88
- return2 = z.try do
89
- z.scan 'l'
90
- z.scan 'l'
91
- z.scan 'p' # fails
92
- end
93
- assert_equal nil, return2
94
- assert_equal 2, z.pos
79
+ z = ZScan.new "a\nb\nc"
80
+ assert_equal 0, z.line_index
81
+ z.terminate
82
+ assert_equal 2, z.line_index
83
+ z.reset
84
+ assert_equal 0, z.line_index
85
+ z.pos = 1
86
+ assert_equal 0, z.line_index
87
+ z.pos = 2
88
+ assert_equal 1, z.line_index
95
89
  end
96
90
  end
data/zscan.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.0.1" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.1" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.required_ruby_version = ">=1.9.2"
10
10
  s.licenses = ['BSD']
11
11
 
12
- s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
12
+ s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c,inc}}')
13
13
  s.require_paths = ["lib"]
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.rubygems_version = '1.8.24'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-07 00:00:00.000000000 Z
11
+ date: 2013-05-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -21,11 +21,19 @@ files:
21
21
  - rakefile
22
22
  - zscan.gemspec
23
23
  - readme.md
24
- - bench.rb
24
+ - benchmark/vs-strscan.rb
25
+ - benchmark/vs-unpack.rb
25
26
  - ext/extconf.rb
27
+ - lib/zscan/instructions.rb
26
28
  - lib/zscan.rb
29
+ - spec/binary_scan_spec.rb
30
+ - spec/combinator_spec.rb
31
+ - spec/spec_helper.rb
32
+ - spec/typed_scan_spec.rb
27
33
  - spec/zscan_spec.rb
28
34
  - ext/zscan.c
35
+ - ext/bspec_exec.inc
36
+ - ext/bspec_opcode_names.inc
29
37
  homepage: https://github.com/luikore/zscan
30
38
  licenses:
31
39
  - BSD