zscan 1.0.1 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8baa1419230fe03d1ec41cfa65202dce10d0512c
4
- data.tar.gz: 0cf1335b2d7221ca4c6329dd6e8e2b5b0f3ac59d
3
+ metadata.gz: 998b97db8e9341f3920caa27bf11558954a777ba
4
+ data.tar.gz: 033986f8e4a4086985bca23c84f8acfabd2f5e29
5
5
  SHA512:
6
- metadata.gz: a9ccf8455843673336365dd446f808cb52f1538a53c86cc69d102640e4a6afb40332e3c65f6f7a5a4178895a1095571e4328a501e64797a01a6af1cdc4256549
7
- data.tar.gz: 8c3d109803e0e9d8e91b8787f98190fd7e400613772f57347ff2d0a99725202c8eb3444ba11efc7a606897d78cec5d9932638292c5ba2583fb697f9e7c8c2540
6
+ metadata.gz: a8c23d9f29b57e113a55e46fb024bce0337aa8ee5b8317744ed9ce9c13ddf35e4b1b7af9bbf25d67c6b14e6c30bb7de00c214692cb5fea05c2e0f4fa6116478b
7
+ data.tar.gz: 06ccbc8c793f873a630c4774b746ad8ca1cfa4c2895414871c0b435812bd902e3f75618a6134551b5ab3f35b2838e12c9072e0bb15ff8f6010a7ba2ca4e8a83a
@@ -1,5 +1,5 @@
1
1
  require "strscan"
2
- require_relative "lib/zscan"
2
+ require_relative "../lib/zscan"
3
3
  require "benchmark"
4
4
 
5
5
  s = "word\n"
@@ -0,0 +1,21 @@
1
+ require_relative "../lib/zscan"
2
+ require "benchmark"
3
+
4
+ spec = ZScan.binary_spec do
5
+ int8
6
+ double_le 2
7
+ single_be
8
+ end
9
+
10
+ arr = [1, 1.1, 1.2, 1.3]
11
+ str = arr.pack 'cE2g'
12
+ z = Zscan.new str.b
13
+
14
+ puts 'reference nop group'
15
+ puts Benchmark.measure{ 100000.times{ z.pos = 0 } }
16
+ puts 'ZScan#unpack'
17
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; z.unpack 'cE2g' } }
18
+ puts 'ZScan#scan_binary'
19
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; z.scan_binary spec } }
20
+ puts 'String#unpack'
21
+ puts Benchmark.measure{ 100000.times{ z.pos = 0; str.unpack 'cE2g' } }
@@ -0,0 +1,156 @@
1
+ // GENERATED WITH: rake gen
2
+ #line 2 "ext/bspec_exec.inc"
3
+ __attribute__((__noinline__))
4
+ static VALUE bspec_exec(void** ip, char* s, VALUE a) {
5
+ static void* opcodes[] = { &&BS_RET, &&BS_INT8, &&BS_INT16, &&BS_INT16_SWAP, &&BS_INT32, &&BS_INT32_SWAP, &&BS_INT64, &&BS_INT64_SWAP, &&BS_UINT8, &&BS_UINT16, &&BS_UINT16_SWAP, &&BS_UINT32, &&BS_UINT32_SWAP, &&BS_UINT64, &&BS_UINT64_SWAP, &&BS_SINGLE, &&BS_SINGLE_SWAP, &&BS_DOUBLE, &&BS_DOUBLE_SWAP };
6
+ if (ip == NULL) {
7
+ return (VALUE)opcodes;
8
+ }
9
+ goto **(ip++);
10
+ BS_RET:
11
+ return a;
12
+ BS_INT8:
13
+ {
14
+ uint8_t r = ((uint8_t*)s)[0];
15
+ rb_ary_push(a, INT2FIX(CAST(r, int8_t)));
16
+ s += 1;
17
+ goto **(ip++);
18
+ }
19
+
20
+ BS_INT16:
21
+ {
22
+ uint16_t r = ((uint16_t*)s)[0];
23
+ rb_ary_push(a, INT2FIX(CAST(r, int16_t)));
24
+ s += 2;
25
+ goto **(ip++);
26
+ }
27
+
28
+ BS_INT16_SWAP:
29
+ {
30
+ uint16_t r = swap16(((uint16_t*)s)[0]);
31
+ rb_ary_push(a, INT2FIX(CAST(r, int16_t)));
32
+ s += 2;
33
+ goto **(ip++);
34
+ }
35
+
36
+ BS_INT32:
37
+ {
38
+ uint32_t r = ((uint32_t*)s)[0];
39
+ rb_ary_push(a, INT2NUM(CAST(r, int32_t)));
40
+ s += 4;
41
+ goto **(ip++);
42
+ }
43
+
44
+ BS_INT32_SWAP:
45
+ {
46
+ uint32_t r = swap32(((uint32_t*)s)[0]);
47
+ rb_ary_push(a, INT2NUM(CAST(r, int32_t)));
48
+ s += 4;
49
+ goto **(ip++);
50
+ }
51
+
52
+ BS_INT64:
53
+ {
54
+ uint64_t r = ((uint64_t*)s)[0];
55
+ rb_ary_push(a, INT64toNUM(CAST(r, int64_t)));
56
+ s += 8;
57
+ goto **(ip++);
58
+ }
59
+
60
+ BS_INT64_SWAP:
61
+ {
62
+ uint64_t r = swap64(((uint64_t*)s)[0]);
63
+ rb_ary_push(a, INT64toNUM(CAST(r, int64_t)));
64
+ s += 8;
65
+ goto **(ip++);
66
+ }
67
+
68
+ BS_UINT8:
69
+ {
70
+ uint8_t r = ((uint8_t*)s)[0];
71
+ rb_ary_push(a, INT2FIX(CAST(r, uint8_t)));
72
+ s += 1;
73
+ goto **(ip++);
74
+ }
75
+
76
+ BS_UINT16:
77
+ {
78
+ uint16_t r = ((uint16_t*)s)[0];
79
+ rb_ary_push(a, INT2FIX(CAST(r, uint16_t)));
80
+ s += 2;
81
+ goto **(ip++);
82
+ }
83
+
84
+ BS_UINT16_SWAP:
85
+ {
86
+ uint16_t r = swap16(((uint16_t*)s)[0]);
87
+ rb_ary_push(a, INT2FIX(CAST(r, uint16_t)));
88
+ s += 2;
89
+ goto **(ip++);
90
+ }
91
+
92
+ BS_UINT32:
93
+ {
94
+ uint32_t r = ((uint32_t*)s)[0];
95
+ rb_ary_push(a, UINT64toNUM(r));
96
+ s += 4;
97
+ goto **(ip++);
98
+ }
99
+
100
+ BS_UINT32_SWAP:
101
+ {
102
+ uint32_t r = swap32(((uint32_t*)s)[0]);
103
+ rb_ary_push(a, UINT64toNUM(r));
104
+ s += 4;
105
+ goto **(ip++);
106
+ }
107
+
108
+ BS_UINT64:
109
+ {
110
+ uint64_t r = ((uint64_t*)s)[0];
111
+ rb_ary_push(a, UINT64toNUM(r));
112
+ s += 8;
113
+ goto **(ip++);
114
+ }
115
+
116
+ BS_UINT64_SWAP:
117
+ {
118
+ uint64_t r = swap64(((uint64_t*)s)[0]);
119
+ rb_ary_push(a, UINT64toNUM(r));
120
+ s += 8;
121
+ goto **(ip++);
122
+ }
123
+
124
+ BS_SINGLE:
125
+ {
126
+ uint32_t r = ((uint32_t*)s)[0];
127
+ rb_ary_push(a, DBL2NUM((double)CAST(r, float)));
128
+ s += 4;
129
+ goto **(ip++);
130
+ }
131
+
132
+ BS_SINGLE_SWAP:
133
+ {
134
+ uint32_t r = swap32(((uint32_t*)s)[0]);
135
+ rb_ary_push(a, DBL2NUM((double)CAST(r, float)));
136
+ s += 4;
137
+ goto **(ip++);
138
+ }
139
+
140
+ BS_DOUBLE:
141
+ {
142
+ uint64_t r = ((uint64_t*)s)[0];
143
+ rb_ary_push(a, DBL2NUM(CAST(r, double)));
144
+ s += 8;
145
+ goto **(ip++);
146
+ }
147
+
148
+ BS_DOUBLE_SWAP:
149
+ {
150
+ uint64_t r = swap64(((uint64_t*)s)[0]);
151
+ rb_ary_push(a, DBL2NUM(CAST(r, double)));
152
+ s += 8;
153
+ goto **(ip++);
154
+ }
155
+
156
+ }
@@ -0,0 +1,3 @@
1
+ // GENERATED WITH: rake gen
2
+ const char* bspec_opcode_names[] = {"RET", "INT8", "INT16", "INT16_SWAP", "INT32", "INT32_SWAP", "INT64", "INT64_SWAP", "UINT8", "UINT16", "UINT16_SWAP", "UINT32", "UINT32_SWAP", "UINT64", "UINT64_SWAP", "SINGLE", "SINGLE_SWAP", "DOUBLE", "DOUBLE_SWAP"};
3
+ long bspec_opcode_size = 19;
data/ext/zscan.c CHANGED
@@ -1,23 +1,24 @@
1
1
  #include <ruby/ruby.h>
2
2
  #include <ruby/re.h>
3
3
  #include <ruby/encoding.h>
4
+ #include <ctype.h>
5
+
6
+ // todo infect check
4
7
 
5
8
  typedef struct {
6
- size_t pos;
7
- size_t bytepos;
9
+ long pos;
10
+ long bytepos;
8
11
  } Pos;
9
12
 
10
13
  typedef struct {
11
- size_t pos;
12
- size_t bytepos;
14
+ long pos;
15
+ long bytepos;
13
16
  VALUE s;
14
- size_t stack_i;
15
- size_t stack_cap;
17
+ long stack_i;
18
+ long stack_cap;
16
19
  Pos* stack;
17
20
  } ZScan;
18
21
 
19
- #define P ZScan* p = rb_check_typeddata(self, &zscan_type)
20
-
21
22
  static void zscan_mark(void* pp) {
22
23
  ZScan* p = pp;
23
24
  rb_gc_mark(p->s);
@@ -39,6 +40,8 @@ static const rb_data_type_t zscan_type = {
39
40
  {zscan_mark, zscan_free, zscan_memsize}
40
41
  };
41
42
 
43
+ #define P ZScan* p = rb_check_typeddata(self, &zscan_type)
44
+
42
45
  static VALUE zscan_alloc(VALUE klass) {
43
46
  ZScan* p = ALLOC(ZScan);
44
47
  MEMZERO(p, ZScan, 1);
@@ -66,13 +69,12 @@ static VALUE zscan_pos(VALUE self) {
66
69
 
67
70
  static VALUE zscan_advance(VALUE self, VALUE v_diff) {
68
71
  P;
69
- long signed_n = p->pos + NUM2LONG(v_diff);
70
- if (signed_n < 0) {
72
+ long n = p->pos + NUM2LONG(v_diff);
73
+ if (n < 0) {
71
74
  p->pos = 0;
72
75
  p->bytepos = 0;
73
76
  return self;
74
77
  }
75
- size_t n = signed_n;
76
78
 
77
79
  // because there's no "reverse scan" API, we have a O(n) routine :(
78
80
  if (n < p->pos) {
@@ -82,7 +84,7 @@ static VALUE zscan_advance(VALUE self, VALUE v_diff) {
82
84
 
83
85
  if (n > p->pos) {
84
86
  rb_encoding* enc = rb_enc_get(p->s);
85
- size_t byteend = RSTRING_LEN(p->s);
87
+ long byteend = RSTRING_LEN(p->s);
86
88
  char* ptr = RSTRING_PTR(p->s);
87
89
  for (; p->pos < n && p->bytepos < byteend;) {
88
90
  int n = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
@@ -105,7 +107,7 @@ static VALUE zscan_bytepos(VALUE self) {
105
107
  static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
106
108
  P;
107
109
  long signed_bytepos = NUM2LONG(v_bytepos);
108
- size_t from, to, bytepos;
110
+ long from, to, bytepos;
109
111
 
110
112
  if (signed_bytepos > RSTRING_LEN(p->s)) {
111
113
  bytepos = RSTRING_LEN(p->s);
@@ -127,7 +129,7 @@ static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
127
129
 
128
130
  rb_encoding* enc = rb_enc_get(p->s);
129
131
  char* ptr = RSTRING_PTR(p->s);
130
- size_t diff = 0;
132
+ long diff = 0;
131
133
  for (; from < to;) {
132
134
  int n = rb_enc_mbclen(ptr + from, ptr + to, enc);
133
135
  if (n) {
@@ -154,7 +156,7 @@ static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
154
156
 
155
157
  static VALUE zscan_eos_p(VALUE self) {
156
158
  P;
157
- return (p->bytepos == (size_t)RSTRING_LEN(p->s) ? Qtrue : Qfalse);
159
+ return (p->bytepos == RSTRING_LEN(p->s) ? Qtrue : Qfalse);
158
160
  }
159
161
 
160
162
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
@@ -262,10 +264,13 @@ static VALUE zscan_clear_pos_stack(VALUE self) {
262
264
  return self;
263
265
  }
264
266
 
265
- static VALUE zscan_try(VALUE self) {
266
- if (!rb_block_given_p()) {
267
- rb_raise(rb_eRuntimeError, "need a block");
267
+ #define REQUIRE_BLOCK \
268
+ if (!rb_block_given_p()) {\
269
+ rb_raise(rb_eRuntimeError, "need a block");\
268
270
  }
271
+
272
+ static VALUE zscan_try(VALUE self) {
273
+ REQUIRE_BLOCK;
269
274
  VALUE r;
270
275
  zscan_push(self);
271
276
  r = rb_yield(Qnil);
@@ -277,6 +282,175 @@ static VALUE zscan_try(VALUE self) {
277
282
  return r;
278
283
  }
279
284
 
285
+ static VALUE zscan_zero_or_one(int argc, VALUE* argv, VALUE self) {
286
+ REQUIRE_BLOCK;
287
+ volatile VALUE a = Qnil;
288
+ volatile VALUE r;
289
+ rb_scan_args(argc, argv, "01", &a);
290
+ if (a == Qnil) {
291
+ a = rb_ary_new();
292
+ }
293
+ zscan_push(self);
294
+ r = rb_yield(Qnil);
295
+ if (RTEST(r)) {
296
+ rb_funcall(a, rb_intern("<<"), 1, r);
297
+ zscan_drop(self);
298
+ } else {
299
+ zscan_pop(self);
300
+ }
301
+ return a;
302
+ }
303
+
304
+ static VALUE zscan_zero_or_more(int argc, VALUE* argv, VALUE self) {
305
+ REQUIRE_BLOCK;
306
+ volatile VALUE a = Qnil;
307
+ volatile VALUE r;
308
+ long backpos;
309
+ P;
310
+ rb_scan_args(argc, argv, "01", &a);
311
+ if (a == Qnil) {
312
+ a = rb_ary_new();
313
+ }
314
+ for (;;) {
315
+ zscan_push(self);
316
+ backpos = p->bytepos;
317
+ r = rb_yield(Qnil);
318
+ if (RTEST(r) && backpos != p->bytepos) {
319
+ rb_funcall(a, rb_intern("<<"), 1, r);
320
+ zscan_drop(self);
321
+ } else {
322
+ zscan_pop(self);
323
+ break;
324
+ }
325
+ }
326
+ return a;
327
+ }
328
+
329
+ static VALUE zscan_one_or_more(int argc, VALUE* argv, VALUE self) {
330
+ REQUIRE_BLOCK;
331
+ volatile VALUE a = Qnil;
332
+ volatile VALUE r;
333
+
334
+ r = rb_yield(Qnil);
335
+ if (RTEST(r)) {
336
+ long backpos;
337
+ P;
338
+ rb_scan_args(argc, argv, "01", &a);
339
+ if (a == Qnil) {
340
+ a = rb_ary_new();
341
+ }
342
+
343
+ rb_funcall(a, rb_intern("<<"), 1, r);
344
+ for (;;) {
345
+ zscan_push(self);
346
+ backpos = p->bytepos;
347
+ r = rb_yield(Qnil);
348
+ if (RTEST(r) && backpos != p->bytepos) {
349
+ rb_funcall(a, rb_intern("<<"), 1, r);
350
+ zscan_drop(self);
351
+ } else {
352
+ zscan_pop(self);
353
+ break;
354
+ }
355
+ }
356
+ return a;
357
+ } else {
358
+ return Qnil;
359
+ }
360
+ }
361
+
362
+ VALUE zscan_scan_float(VALUE self) {
363
+ P;
364
+ if (RSTRING_LEN(p->s) == p->bytepos) {
365
+ return Qnil;
366
+ }
367
+
368
+ char* s = RSTRING_PTR(p->s) + p->bytepos;
369
+ if (isspace(s[0])) {
370
+ return Qnil;
371
+ }
372
+ char* e;
373
+ double d = strtod(s, &e);
374
+ if (e == s || e - s > RSTRING_LEN(p->s) - p->bytepos) {
375
+ return Qnil;
376
+ } else {
377
+ // it ok to use advance because the source is ascii compatible
378
+ zscan_advance(self, LONG2NUM(e - s));
379
+ return DBL2NUM(d);
380
+ }
381
+ }
382
+
383
+ static VALUE bspec_big_endian_p(VALUE self) {
384
+ # ifdef DYNAMIC_ENDIAN
385
+ /* for universal binary of NEXTSTEP and MacOS X */
386
+ int init = 1;
387
+ char* p = (char*)&init;
388
+ return p[0] ? Qfalse : Qtrue;
389
+ # elif defined(WORDS_BIGENDIAN)
390
+ return Qtrue;
391
+ #else
392
+ return Qfalse;
393
+ #endif
394
+ }
395
+
396
+ #define GCC_VERSION_SINCE(major, minor, patchlevel) \
397
+ (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
398
+ ((__GNUC__ > (major)) || \
399
+ (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
400
+ (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
401
+
402
+ #if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
403
+ # define swap32(x) __builtin_bswap32(x)
404
+ # define swap64(x) __builtin_bswap64(x)
405
+ #endif
406
+
407
+ #ifndef swap16
408
+ # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
409
+ #endif
410
+
411
+ #ifndef swap32
412
+ # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
413
+ |(((x)>>24)&0xFF) \
414
+ |(((x)&0x0000FF00)<<8) \
415
+ |(((x)&0x00FF0000)>>8) ))
416
+ #endif
417
+
418
+ #ifndef swap64
419
+ # ifdef HAVE_INT64_T
420
+ # define byte_in_64bit(n) ((uint64_t)0xff << (n))
421
+ # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
422
+ |(((x)>>56)&0xFF) \
423
+ |(((x)&byte_in_64bit(8))<<40) \
424
+ |(((x)&byte_in_64bit(48))>>40) \
425
+ |(((x)&byte_in_64bit(16))<<24) \
426
+ |(((x)&byte_in_64bit(40))>>24) \
427
+ |(((x)&byte_in_64bit(24))<<8) \
428
+ |(((x)&byte_in_64bit(32))>>8)))
429
+ # endif
430
+ #endif
431
+
432
+ // NOTE can not use sizeof in preprocessor
433
+ #define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
434
+ #define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
435
+
436
+ #define CAST(var, ty) *((ty*)(&(var)))
437
+
438
+ #include "bspec_exec.inc"
439
+
440
+ static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
441
+ P;
442
+ long s_size = NUM2LONG(rb_iv_get(spec, "@s_size"));
443
+ if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
444
+ return Qnil;
445
+ }
446
+ VALUE code = rb_iv_get(spec, "@code");
447
+ long a_size = RSTRING_LEN(code) / sizeof(void*);
448
+ volatile VALUE a = rb_ary_new2(a_size);
449
+ bspec_exec((void**)RSTRING_PTR(code), RSTRING_PTR(p->s) + p->bytepos, a);
450
+ zscan_bytepos_eq(self, LONG2NUM(p->bytepos + s_size));
451
+ return a;
452
+ }
453
+
280
454
  void Init_zscan() {
281
455
  VALUE zscan = rb_define_class("ZScan", rb_cObject);
282
456
  rb_define_alloc_func(zscan, zscan_alloc);
@@ -295,5 +469,23 @@ void Init_zscan() {
295
469
  rb_define_method(zscan, "drop", zscan_drop, 0);
296
470
  rb_define_method(zscan, "restore", zscan_restore, 0);
297
471
  rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
472
+
298
473
  rb_define_method(zscan, "try", zscan_try, 0);
474
+ rb_define_method(zscan, "zero_or_one", zscan_zero_or_one, -1);
475
+ rb_define_method(zscan, "zero_or_more", zscan_zero_or_more, -1);
476
+ rb_define_method(zscan, "one_or_more", zscan_one_or_more, -1);
477
+
478
+ rb_define_method(zscan, "scan_float", zscan_scan_float, 0);
479
+ rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
480
+
481
+ VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
482
+ rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
483
+
484
+ # include "bspec_opcode_names.inc"
485
+ void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
486
+ for (long i = 0; i < bspec_opcode_size; i++) {
487
+ VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
488
+ OBJ_FREEZE(bytecode);
489
+ rb_define_const(bs, bspec_opcode_names[i], bytecode);
490
+ }
299
491
  }
data/lib/zscan.rb CHANGED
@@ -1,10 +1,17 @@
1
1
  require_relative "../ext/zscan"
2
+ require_relative "zscan/instructions"
3
+ require "date"
2
4
 
3
5
  class ZScan
4
- VERSION = '1.0.1'
6
+ VERSION = '1.1'
5
7
 
6
8
  def initialize s, dup=false
7
- _internal_init dup ? s.dup : s
9
+ if s.encoding.ascii_compatible?
10
+ s = dup ? s.dup : s
11
+ else
12
+ s = s.encode 'utf-8'
13
+ end
14
+ _internal_init s
8
15
  end
9
16
 
10
17
  def string
@@ -17,6 +24,76 @@ class ZScan
17
24
  end
18
25
  end
19
26
 
27
+ def scan_int radix=nil
28
+ negative = false
29
+ r = try do
30
+ negative = (scan(/[+\-]/) == '-')
31
+ if radix.nil?
32
+ radix =
33
+ if scan(/0b/i)
34
+ 2
35
+ elsif scan(/0x/i)
36
+ 16
37
+ elsif scan('0')
38
+ 8
39
+ else
40
+ 10
41
+ end
42
+ end
43
+ scan \
44
+ case radix
45
+ when 2; /[01]+/
46
+ when 8; /[0-7]+/
47
+ when 10; /\d+/
48
+ when 16; /\h+/i
49
+ else
50
+ if radix < 10
51
+ /[0-#{radix}]+/
52
+ elsif radix > 36
53
+ raise ArgumentError, "invalid radix #{radix}"
54
+ else
55
+ end_char = ('a'.ord + (radix - 11)).chr
56
+ /[\da-#{end_char}]+/i
57
+ end
58
+ end
59
+ end
60
+ if r
61
+ r = r.to_i radix
62
+ negative ? -r : r
63
+ end
64
+ end
65
+
66
+ def scan_date format, start=Date::ITALY
67
+ s = rest
68
+ d = DateTime._strptime s, format
69
+ if d
70
+ # XXX need 2 parses because the handling is very complex ...
71
+ dt = DateTime.strptime s, format, start rescue return nil
72
+
73
+ len = s.bytesize
74
+ if leftover = d[:leftover]
75
+ len -= leftover.bytesize
76
+ end
77
+ self.bytepos += len
78
+
79
+ dt
80
+ end
81
+ end
82
+
83
+ def unpack format
84
+ if format.index('@')
85
+ raise ArgumentError, 'position instruction @ not supported'
86
+ end
87
+ r = rest.unpack format
88
+ if r.index(nil)
89
+ return
90
+ end
91
+ # XXX pack to get parsed length because no related API is exposed ...
92
+ len = r.pack(format).bytesize
93
+ self.bytepos += len
94
+ r
95
+ end
96
+
20
97
  def pos= new_pos
21
98
  advance new_pos - pos
22
99
  end
@@ -57,6 +134,26 @@ class ZScan
57
134
  _internal_string.bytesize
58
135
  end
59
136
 
137
+ def line_index
138
+ _internal_string.byteslice(0, bytepos).count "\n"
139
+ end
140
+
141
+ def self.binary_spec &p
142
+ bs = BinarySpec.new
143
+ bs.instance_eval &p
144
+ bs.instance_variable_get(:@code) << BinarySpec::RET
145
+ bs
146
+ end
147
+
148
+ class BinarySpec
149
+ BLANK = ''.force_encoding 'binary'
150
+
151
+ def initialize
152
+ @code = BLANK.dup
153
+ @s_size = 0
154
+ end
155
+ end
156
+
60
157
  private :_internal_init, :_internal_string
61
158
  end
62
159
 
@@ -0,0 +1,165 @@
1
+ # GENERATED WITH: rake gen
2
+ class ZScan::BinarySpec
3
+ def int8 n=1
4
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
5
+ n.times do
6
+ @code << INT8
7
+ @s_size += 1
8
+ end
9
+ end
10
+ def int16 n=1
11
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
12
+ n.times do
13
+ @code << INT16
14
+ @s_size += 2
15
+ end
16
+ end
17
+ def int16_swap n=1
18
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
19
+ n.times do
20
+ @code << INT16_SWAP
21
+ @s_size += 2
22
+ end
23
+ end
24
+ def int32 n=1
25
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
26
+ n.times do
27
+ @code << INT32
28
+ @s_size += 4
29
+ end
30
+ end
31
+ def int32_swap n=1
32
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
33
+ n.times do
34
+ @code << INT32_SWAP
35
+ @s_size += 4
36
+ end
37
+ end
38
+ def int64 n=1
39
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
40
+ n.times do
41
+ @code << INT64
42
+ @s_size += 8
43
+ end
44
+ end
45
+ def int64_swap n=1
46
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
47
+ n.times do
48
+ @code << INT64_SWAP
49
+ @s_size += 8
50
+ end
51
+ end
52
+ def uint8 n=1
53
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
54
+ n.times do
55
+ @code << UINT8
56
+ @s_size += 1
57
+ end
58
+ end
59
+ def uint16 n=1
60
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
61
+ n.times do
62
+ @code << UINT16
63
+ @s_size += 2
64
+ end
65
+ end
66
+ def uint16_swap n=1
67
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
68
+ n.times do
69
+ @code << UINT16_SWAP
70
+ @s_size += 2
71
+ end
72
+ end
73
+ def uint32 n=1
74
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
75
+ n.times do
76
+ @code << UINT32
77
+ @s_size += 4
78
+ end
79
+ end
80
+ def uint32_swap n=1
81
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
82
+ n.times do
83
+ @code << UINT32_SWAP
84
+ @s_size += 4
85
+ end
86
+ end
87
+ def uint64 n=1
88
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
89
+ n.times do
90
+ @code << UINT64
91
+ @s_size += 8
92
+ end
93
+ end
94
+ def uint64_swap n=1
95
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
96
+ n.times do
97
+ @code << UINT64_SWAP
98
+ @s_size += 8
99
+ end
100
+ end
101
+ def single n=1
102
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
103
+ n.times do
104
+ @code << SINGLE
105
+ @s_size += 4
106
+ end
107
+ end
108
+ def single_swap n=1
109
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
110
+ n.times do
111
+ @code << SINGLE_SWAP
112
+ @s_size += 4
113
+ end
114
+ end
115
+ def double n=1
116
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
117
+ n.times do
118
+ @code << DOUBLE
119
+ @s_size += 8
120
+ end
121
+ end
122
+ def double_swap n=1
123
+ raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
124
+ n.times do
125
+ @code << DOUBLE_SWAP
126
+ @s_size += 8
127
+ end
128
+ end
129
+ if ZScan::BinarySpec.big_endian?
130
+ alias int16_be int16
131
+ alias int16_le int16_swap
132
+ alias int32_be int32
133
+ alias int32_le int32_swap
134
+ alias int64_be int64
135
+ alias int64_le int64_swap
136
+ alias uint16_be uint16
137
+ alias uint16_le uint16_swap
138
+ alias uint32_be uint32
139
+ alias uint32_le uint32_swap
140
+ alias uint64_be uint64
141
+ alias uint64_le uint64_swap
142
+ alias single_be single
143
+ alias single_le single_swap
144
+ alias double_be double
145
+ alias double_le double_swap
146
+ else
147
+ alias int16_le int16
148
+ alias int16_be int16_swap
149
+ alias int32_le int32
150
+ alias int32_be int32_swap
151
+ alias int64_le int64
152
+ alias int64_be int64_swap
153
+ alias uint16_le uint16
154
+ alias uint16_be uint16_swap
155
+ alias uint32_le uint32
156
+ alias uint32_be uint32_swap
157
+ alias uint64_le uint64
158
+ alias uint64_be uint64_swap
159
+ alias single_le single
160
+ alias single_be single_swap
161
+ alias double_le double
162
+ alias double_be double_swap
163
+ end
164
+ undef int16_swap, int32_swap, int64_swap, uint16_swap, uint32_swap, uint64_swap, single_swap, double_swap
165
+ end
data/rakefile CHANGED
@@ -4,6 +4,60 @@ version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
4
4
  gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
5
5
  gem_package = "zscan-#{version}.gem"
6
6
 
7
+ bspec_types = %w[INT8 INT16 INT32 INT64 UINT8 UINT16 UINT32 UINT64 SINGLE DOUBLE]
8
+ bspec_insns = bspec_types.flat_map{|ty|
9
+ if ty =~ /INT8/
10
+ ty
11
+ else
12
+ [ty, "#{ty}_SWAP"]
13
+ end
14
+ }
15
+ def bspec_incr ins
16
+ case ins
17
+ when /INT(\d+)/; $1.to_i / 8
18
+ when /SINGLE/; 4
19
+ when /DOUBLE/; 8
20
+ else; raise 'bad'
21
+ end
22
+ end
23
+ def bspec_c_type ins
24
+ case ins
25
+ when /(U?INT\d+)/; "#{$1.downcase}_t"
26
+ when /SINGLE/; 'float'
27
+ when /DOUBLE/; 'double'
28
+ else; raise 'bad'
29
+ end
30
+ end
31
+ def bspec_extract ins
32
+ type = bspec_c_type ins
33
+ len = bspec_incr(ins) * 8
34
+ r = "((uint#{len}_t*)s)[0]"
35
+ if ins.end_with?('SWAP')
36
+ r = "swap#{len}(#{r})"
37
+ end
38
+ "uint#{len}_t r = #{r}"
39
+ end
40
+ def bspec_convert ins
41
+ case ins
42
+ when /(U)?INT64|UINT32/
43
+ if ins.start_with?('U')
44
+ "UINT64toNUM(r)"
45
+ else
46
+ "INT64toNUM(CAST(r, int64_t))"
47
+ end
48
+ when /INT32/
49
+ "INT2NUM(CAST(r, int32_t))"
50
+ when /INT(16|8)/
51
+ "INT2FIX(CAST(r, #{bspec_c_type ins}))"
52
+ when /SINGLE/
53
+ "DBL2NUM((double)CAST(r, float))"
54
+ when /DOUBLE/
55
+ "DBL2NUM(CAST(r, double))"
56
+ else
57
+ raise 'bad'
58
+ end
59
+ end
60
+
7
61
  desc "build and test"
8
62
  task :default => [:test, gem_package]
9
63
 
@@ -30,3 +84,86 @@ file gem_package => gem_files do
30
84
  end
31
85
  sh "gem build zscan.gemspec"
32
86
  end
87
+
88
+ desc "generate files"
89
+ task :gen => %w[ext/bspec_exec.inc ext/bspec_opcode_names.inc lib/zscan/instructions.rb]
90
+
91
+ file 'ext/bspec_exec.inc' => __FILE__ do
92
+ puts "generating ext/bspec_exec.inc"
93
+ opcode_list = bspec_insns.map do |ins|
94
+ "&&BS_#{ins}"
95
+ end.join ', '
96
+
97
+ opcode_segs = bspec_insns.map do |ins|
98
+ %Q{BS_#{ins}:
99
+ {
100
+ #{bspec_extract ins};
101
+ rb_ary_push(a, #{bspec_convert ins});
102
+ s += #{bspec_incr ins};
103
+ goto **(ip++);
104
+ }
105
+ }
106
+ end.join "\n"
107
+
108
+ File.open 'ext/bspec_exec.inc', 'w' do |f|
109
+ f.puts %Q|// GENERATED WITH: rake gen
110
+ #line 2 "ext/bspec_exec.inc"
111
+ __attribute__((__noinline__))
112
+ static VALUE bspec_exec(void** ip, char* s, VALUE a) {
113
+ static void* opcodes[] = { &&BS_RET, #{opcode_list} };
114
+ if (ip == NULL) {
115
+ return (VALUE)opcodes;
116
+ }
117
+ goto **(ip++);
118
+ BS_RET:
119
+ return a;
120
+ #{opcode_segs}
121
+ }|
122
+ end
123
+ end
124
+
125
+ file 'ext/bspec_opcode_names.inc' => __FILE__ do
126
+ puts 'generating ext/bspec_opcode_names.inc'
127
+ opcode_names = bspec_insns.map(&:inspect).join ', '
128
+ File.open 'ext/bspec_opcode_names.inc', 'w' do |f|
129
+ f.puts "// GENERATED WITH: rake gen"
130
+ f.puts %Q|const char* bspec_opcode_names[] = {"RET", #{opcode_names}};|
131
+ f.puts %Q|long bspec_opcode_size = #{bspec_insns.size + 1};|
132
+ end
133
+ end
134
+
135
+ file 'lib/zscan/instructions.rb' => __FILE__ do
136
+ puts 'generating lib/zscan/instructions.rb'
137
+ File.open 'lib/zscan/instructions.rb', 'w' do |f|
138
+ f.puts "# GENERATED WITH: rake gen"
139
+ f.puts "class ZScan::BinarySpec"
140
+
141
+ bspec_insns.each do |ins|
142
+ f.puts <<-RUBY
143
+ def #{ins.downcase} n=1
144
+ raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
145
+ n.times do
146
+ @code << #{ins}
147
+ @s_size += #{bspec_incr ins}
148
+ end
149
+ end
150
+ RUBY
151
+ end
152
+
153
+ alias_ins = (bspec_types - ['INT8', 'UINT8']).map &:downcase
154
+ f.puts " if ZScan::BinarySpec.big_endian?"
155
+ alias_ins.each do |ins|
156
+ f.puts " alias #{ins}_be #{ins}"
157
+ f.puts " alias #{ins}_le #{ins}_swap"
158
+ end
159
+ f.puts " else"
160
+ alias_ins.each do |ins|
161
+ f.puts " alias #{ins}_le #{ins}"
162
+ f.puts " alias #{ins}_be #{ins}_swap"
163
+ end
164
+ f.puts " end"
165
+ swap_ins = alias_ins.map{|ins| "#{ins}_swap"}
166
+ f.puts " undef #{swap_ins.join ', '}"
167
+ f.puts "end"
168
+ end
169
+ end
data/readme.md CHANGED
@@ -4,6 +4,7 @@
4
4
  - `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
5
5
  - Correctly scans anchors and look behind predicates.
6
6
  - Pos stack manipulation.
7
+ - Typed scanning methods: `#scan_float`, `#scan_int radix=nil`, `#scan_date format`, `#scan_binary format`.
7
8
 
8
9
  ## Install
9
10
 
@@ -22,7 +23,7 @@ z.scan /\w+/ #=> 'world'
22
23
  z.eos? #=> true
23
24
  ```
24
25
 
25
- ## Motivation
26
+ ## Motivation - `StringScanner`
26
27
 
27
28
  Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
28
29
 
@@ -46,41 +47,112 @@ z.scan /^/ #=> nil
46
47
 
47
48
  See also https://bugs.ruby-lang.org/issues/7092
48
49
 
50
+ ## Other motivations - `scanf` / `strptime` / `unpack`
51
+
52
+ - For scan and convert, ruby's stdlib `Scanf` is slow (creates regexp array everytime called) and not possible to corporate with scanner.
53
+ - For date parsing, `strptime` doesn't tell the parsed length.
54
+ - For binary parsing, `unpack` is an slow interpreter, and the instructions are quite irregular.
55
+
49
56
  ## Essential methods
50
57
 
51
58
  - `ZScan.new string, dup=false`
52
59
  - `#scan regexp_or_string`
53
60
  - `#skip regexp_or_string`
54
61
  - `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
62
+ - `#scan_float` scan a float number which is not starting with space. It deals with multibyte encodings for you.
63
+ - `#scan_int radix=nil` if radix is nil, decide base by prefix: `0x` is 16, `0` is 8, `0b` is 2, otherwise 10. `radix` should be in range `2..36`.
64
+ - `#scan_date format_string, start=Date::ITALY` scan a `DateTime` object, see also [strptime](http://rubydoc.info/stdlib/date/DateTime.strptime).
65
+ - `#scan_binary binary_spec` optimized and readable binary scan, see below for how to create a `ZScan::BinarySpec`.
66
+ - `#unpack format_string`
55
67
  - `#eos?`
56
68
  - `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
57
69
  - `#rest`
58
70
 
71
+ ## String delegates
72
+
73
+ For convienience
74
+
75
+ - `#<< append_string`
76
+ - `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
77
+ - `#size`
78
+ - `#bytesize`
79
+
80
+ ## Parsing combinators
81
+
82
+ Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration. Can be nested, useful for building parsers.
83
+
84
+ - `#try &block` returns `block`'s return.
85
+ - `#zero_or_one result=[], &block` try to execute 0 or 1 time, returns `result`.
86
+ - `#zero_or_more result=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `result`.
87
+ - `#one_or_more result=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `result`.
88
+
59
89
  ## Pos management
60
90
 
61
91
  - `#pos`
62
92
  - `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
63
93
  - `#bytepos`
64
94
  - `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
95
+ - `#line_index` line index of current position, start from `0`.
65
96
  - `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
66
97
  - `#reset` go to beginning.
67
98
  - `#terminate` go to end of string.
68
99
 
69
- ## Efficient pos stack manipulation
100
+ ## (Low level) Efficient pos stack manipulation
70
101
 
71
102
  - `#push` push current pos into the stack.
72
103
  - `#pop` set current pos to top of the stack, and pop it.
73
104
  - `#drop` drop top of pos stack without changing current pos.
74
105
  - `#restore` set current pos to top of the stack.
75
106
  - `#clear_pos_stack` clear pos stack.
76
- - `#try` try to do several scans in the given block, fall back to init pos if block returns `nil` or `false`. Returns block's return, can be nested.
77
107
 
78
- ## String delegates
108
+ ## `ZScan::BinarySpec`
79
109
 
80
- - `#<< append_string`
81
- - `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
82
- - `#size`
83
- - `#bytesize`
110
+ Specify a sequence of binary data. Designed for binary protocol parsing. Example:
111
+
112
+ ```ruby
113
+ # create a ZScan::BinarySpec
114
+ s = ZScan.binary_spec do
115
+ int8 # once
116
+ uint32_le 2 # little endian, twice
117
+ double_be 1 # big endian, once
118
+ end
119
+ z = ZScan.new [-1, 2, 3, 4.0].pack('cI<2G') + "rest"
120
+ z.scan_binary s #=> [-1, 2, 3, 4.0]
121
+ z.rest #=> 'rest
122
+ ```
123
+
124
+ Integer instructions:
125
+
126
+ ```ruby
127
+ int8 uint8
128
+ int16 uint16 int16_le uint16_le int16_be uint16_be
129
+ int32 uint32 int32_le uint32_le int32_be uint32_be
130
+ int64 uint64 int64_le uint64_le int64_be uint64_be
131
+ ```
132
+
133
+ Single precision float instructions:
134
+
135
+ ```ruby
136
+ single single_le single_be
137
+ ```
138
+
139
+ Double precision float instructions:
140
+
141
+ ```ruby
142
+ double double_le double_be
143
+ ```
144
+
145
+ Endians:
146
+
147
+ - (without endian suffix) native endian
148
+ - `*_le` little endian (VAX, x86, Windows string code unit)
149
+ - `*_be` big endian, network endian (SPARC, Java string code unit)
150
+
151
+ Repeat count must be integer `>= 1`, default is `1`.
152
+
153
+ It is implemented as a direct-threaded bytecode interpreter. Performance vs `String#unpack`:
154
+
155
+ todo
84
156
 
85
157
  ## License
86
158
 
@@ -0,0 +1,28 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe 'ZScan binary scanning methods' do
4
+ it "#unpack" do
5
+ z = ZScan.new "\x01\x02\x03"
6
+ assert_raise ArgumentError do
7
+ z.unpack '@1C'
8
+ end
9
+ assert_equal [1, 2], (z.unpack 'CC')
10
+ assert_equal 2, z.pos
11
+ assert_equal nil, (z.unpack 'I')
12
+ assert_equal 2, z.pos
13
+ end
14
+
15
+ it "#scan_binary" do
16
+ s = ZScan.binary_spec do
17
+ int8 # once
18
+ uint32_le 2 # little endian, twice
19
+ double_be 1 # big endian, once
20
+ single 1
21
+ end
22
+ a = [-1, 2, 3, 4.0, 3.0]
23
+ z = ZScan.new(a.pack('cI<2Gf') + 'rest')
24
+ b = z.scan_binary s
25
+ assert_equal 'rest', z.rest
26
+ assert_equal a, b
27
+ end
28
+ end
@@ -0,0 +1,52 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe 'ZScan combinators' do
4
+ it "#try restores pos" do
5
+ z = ZScan.new "hello"
6
+ return1 = z.try do
7
+ z.scan 'h'
8
+ z.scan 'e'
9
+ end
10
+ assert_equal 'e', return1
11
+ assert_equal 2, z.pos
12
+
13
+ return2 = z.try do
14
+ z.scan 'l'
15
+ z.scan 'l'
16
+ z.scan 'p' # fails
17
+ end
18
+ assert_equal nil, return2
19
+ assert_equal 2, z.pos
20
+ end
21
+
22
+ it "#zero_or_one" do
23
+ z = Zscan.new "aab"
24
+ assert_equal ['a'], z.zero_or_one{z.scan 'a'}
25
+ assert_equal 1, z.pos
26
+
27
+ z = Zscan.new 'aab'
28
+ assert_equal [], z.zero_or_one{z.scan 'b'}
29
+ assert_equal 0, z.pos
30
+ end
31
+
32
+ it "#zero_or_more" do
33
+ z = Zscan.new "aab"
34
+ assert_equal ['a', 'a'], z.zero_or_more{z.scan 'a'}
35
+ assert_equal 2, z.pos
36
+
37
+ assert_equal 'aab', z.zero_or_more('aa'){z.scan 'c'; z.scan 'b'}
38
+
39
+ z = Zscan.new 'aab'
40
+ assert_equal [], z.zero_or_more{z.scan 'b'}
41
+ assert_equal 0, z.pos
42
+ end
43
+
44
+ it "#one_or_more" do
45
+ z = Zscan.new 'aab'
46
+ assert_equal ['a', 'a'], z.one_or_more{z.scan 'a'}
47
+ assert_equal 2, z.pos
48
+
49
+ z = Zscan.new 'aab'
50
+ assert_equal nil, z.one_or_more([]){z.scan 'b'}
51
+ end
52
+ end
@@ -0,0 +1,8 @@
1
+ require_relative "../lib/zscan"
2
+ require 'rspec/autorun'
3
+ RSpec.configure do |config|
4
+ config.expect_with :stdlib
5
+ config.before :all do
6
+ # GC.stress = true
7
+ end
8
+ end
@@ -0,0 +1,48 @@
1
+ require_relative "spec_helper"
2
+
3
+ describe "typed scan" do
4
+ it "#scan_int" do
5
+ z = Zscan.new " 1 0b10F5 10 030"
6
+ assert_equal nil, z.scan_int
7
+ z.advance 1
8
+ assert_equal 1, z.scan_int(10)
9
+
10
+ z.advance 1
11
+ assert_equal 0b10, z.scan_int
12
+ assert_equal 0xF5, z.scan_int(16)
13
+
14
+ z.advance 1
15
+ assert_equal 12, z.scan_int(12)
16
+
17
+ z.advance 1
18
+ assert_equal 030, z.scan_int
19
+ end
20
+
21
+ it "#scan_float" do
22
+ z = Zscan.new " -3.5e23"
23
+ assert_equal nil, z.scan_float
24
+ z.advance 1
25
+ assert_equal -3.5e23, z.scan_float
26
+ end
27
+
28
+ it "won't overflow in #scan_float" do
29
+ s = '1.23E15'.byteslice 0, 4
30
+ z = Zscan.new s
31
+ assert_equal 1.23, z.scan_float
32
+ assert_equal 4, z.pos
33
+ end
34
+
35
+ it "#scan_date" do
36
+ z = Zscan.new " 2001 04 6 04 05 06 +7 231rest"
37
+ assert_equal nil, z.scan_date('%Y %U %w %H %M %S %z %N')
38
+ z.advance 1
39
+
40
+ d = z.scan_date '%Y %U %w %H %M %S %z %N'
41
+ assert_equal 0.231, d.sec_fraction
42
+ assert_equal 'rest', z.rest
43
+
44
+ z.pos = 1
45
+ z.scan_date '%Y %U %w ahoy %H %M %S %z' # bad format
46
+ assert_equal 1, z.pos
47
+ end
48
+ end
data/spec/zscan_spec.rb CHANGED
@@ -1,18 +1,10 @@
1
- require_relative "../lib/zscan"
2
- require 'rspec/autorun'
3
- RSpec.configure do |config|
4
- config.expect_with :stdlib
5
- end
1
+ require_relative "spec_helper"
6
2
 
7
3
  describe ZScan do
8
4
  before :each do
9
5
  @z = ZScan.new 'ab你好'
10
6
  end
11
7
 
12
- before :all do
13
- GC.stress = true
14
- end
15
-
16
8
  it "random workflow" do
17
9
  assert_equal 2, @z.match_bytesize('ab')
18
10
  @z.pos = 4
@@ -76,21 +68,23 @@ describe ZScan do
76
68
  assert_equal 3, @z.pos
77
69
  end
78
70
 
79
- it "#try restores pos" do
80
- z = ZScan.new "hello"
81
- return1 = z.try do
82
- z.scan 'h'
83
- z.scan 'e'
84
- end
85
- assert_equal 'e', return1
86
- assert_equal 2, z.pos
71
+ it "#reset, #terminate and #line_index" do
72
+ z = ZScan.new ''
73
+ assert_equal 0, z.line_index
74
+ z.terminate
75
+ assert_equal 0, z.line_index
76
+ z.reset
77
+ assert_equal 0, z.line_index
87
78
 
88
- return2 = z.try do
89
- z.scan 'l'
90
- z.scan 'l'
91
- z.scan 'p' # fails
92
- end
93
- assert_equal nil, return2
94
- assert_equal 2, z.pos
79
+ z = ZScan.new "a\nb\nc"
80
+ assert_equal 0, z.line_index
81
+ z.terminate
82
+ assert_equal 2, z.line_index
83
+ z.reset
84
+ assert_equal 0, z.line_index
85
+ z.pos = 1
86
+ assert_equal 0, z.line_index
87
+ z.pos = 2
88
+ assert_equal 1, z.line_index
95
89
  end
96
90
  end
data/zscan.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.0.1" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.1" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.required_ruby_version = ">=1.9.2"
10
10
  s.licenses = ['BSD']
11
11
 
12
- s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
12
+ s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c,inc}}')
13
13
  s.require_paths = ["lib"]
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.rubygems_version = '1.8.24'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-07 00:00:00.000000000 Z
11
+ date: 2013-05-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -21,11 +21,19 @@ files:
21
21
  - rakefile
22
22
  - zscan.gemspec
23
23
  - readme.md
24
- - bench.rb
24
+ - benchmark/vs-strscan.rb
25
+ - benchmark/vs-unpack.rb
25
26
  - ext/extconf.rb
27
+ - lib/zscan/instructions.rb
26
28
  - lib/zscan.rb
29
+ - spec/binary_scan_spec.rb
30
+ - spec/combinator_spec.rb
31
+ - spec/spec_helper.rb
32
+ - spec/typed_scan_spec.rb
27
33
  - spec/zscan_spec.rb
28
34
  - ext/zscan.c
35
+ - ext/bspec_exec.inc
36
+ - ext/bspec_opcode_names.inc
29
37
  homepage: https://github.com/luikore/zscan
30
38
  licenses:
31
39
  - BSD