zscan 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 998b97db8e9341f3920caa27bf11558954a777ba
4
- data.tar.gz: 033986f8e4a4086985bca23c84f8acfabd2f5e29
3
+ metadata.gz: 71174140986194386fd74477637f21f95810d704
4
+ data.tar.gz: 72706410ead67f6cd9af96a7bf7024c72d7ef8c8
5
5
  SHA512:
6
- metadata.gz: a8c23d9f29b57e113a55e46fb024bce0337aa8ee5b8317744ed9ce9c13ddf35e4b1b7af9bbf25d67c6b14e6c30bb7de00c214692cb5fea05c2e0f4fa6116478b
7
- data.tar.gz: 06ccbc8c793f873a630c4774b746ad8ca1cfa4c2895414871c0b435812bd902e3f75618a6134551b5ab3f35b2838e12c9072e0bb15ff8f6010a7ba2ca4e8a83a
6
+ metadata.gz: 7c9b9a9516dfa9b5ced99820f66976868d2c0a521df1eb35a517d262d3519601bb28b2a5e95797de411398b22bea3d9aa504d4af96f1634c1f9353d873d134d0
7
+ data.tar.gz: d3c3bcc5255f1909acafdf4bb1977d0ac75d73a6e837af0382cc5dd57d4a1e98443898bd49252e9d2d0b9ee3c7bc3eb0561c423bea4a91feb44bccf6138ac86f
@@ -0,0 +1,19 @@
1
+ require_relative "../lib/zscan"
2
+ require "benchmark"
3
+
4
+ z = ZScan.new 'a' * 100
5
+ puts Benchmark.measure{
6
+ 1000.times{
7
+ z.pos = 0
8
+ z.one_or_more{
9
+ z.scan 'a'
10
+ }
11
+ }
12
+ }
13
+
14
+ puts Benchmark.measure{
15
+ 1000.times{
16
+ z.pos = 0
17
+ z.scan /a+/
18
+ }
19
+ }
@@ -0,0 +1,141 @@
1
+ #include "zscan.h"
2
+
3
+ static const rb_data_type_t* zscan_type;
4
+
5
+ typedef struct {
6
+ long s_size;
7
+ long a_size;
8
+ long a_cap;
9
+ void** code;
10
+ } BSpec;
11
+
12
+ static void bspec_free(void* pp) {
13
+ BSpec* p = pp;
14
+ free(p->code);
15
+ free(p);
16
+ }
17
+
18
+ static size_t bspec_memsize(const void* pp) {
19
+ const BSpec* p = pp;
20
+ return p ? sizeof(*p) : 0;
21
+ }
22
+
23
+ static const rb_data_type_t bspec_type = {
24
+ "ZScan::BinarySpec",
25
+ {NULL, bspec_free, bspec_memsize}
26
+ };
27
+
28
+ static VALUE bspec_alloc(VALUE klass) {
29
+ BSpec* bs = (BSpec*)malloc(sizeof(BSpec));
30
+ bs->s_size = 0;
31
+ bs->a_cap = 4;
32
+ bs->a_size = 0;
33
+ bs->code = (void**)malloc(bs->a_cap * sizeof(void*));
34
+ return TypedData_Wrap_Struct(klass, &bspec_type, bs);
35
+ }
36
+
37
+ static VALUE bspec_append(VALUE self, VALUE v_code, VALUE v_s_size) {
38
+ BSpec* bs = rb_check_typeddata(self, &bspec_type);
39
+ if (bs->a_size == bs->a_cap) {
40
+ bs->a_cap *= 2;
41
+ bs->code = (void**)realloc(bs->code, bs->a_cap * sizeof(void*));
42
+ }
43
+ long s_size = NUM2LONG(v_s_size);
44
+ bs->code[bs->a_size++] = ((void**)RSTRING_PTR(v_code))[0];
45
+ bs->s_size += s_size;
46
+ return self;
47
+ }
48
+
49
+ static VALUE bspec_big_endian_p(VALUE self) {
50
+ # ifdef DYNAMIC_ENDIAN
51
+ /* for universal binary of NEXTSTEP and MacOS X */
52
+ int init = 1;
53
+ char* p = (char*)&init;
54
+ return p[0] ? Qfalse : Qtrue;
55
+ # elif defined(WORDS_BIGENDIAN)
56
+ return Qtrue;
57
+ #else
58
+ return Qfalse;
59
+ #endif
60
+ }
61
+
62
+ #define GCC_VERSION_SINCE(major, minor, patchlevel) \
63
+ (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
64
+ ((__GNUC__ > (major)) || \
65
+ (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
66
+ (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
67
+
68
+ #if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
69
+ # define swap32(x) __builtin_bswap32(x)
70
+ # define swap64(x) __builtin_bswap64(x)
71
+ #endif
72
+
73
+ #ifndef swap16
74
+ # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
75
+ #endif
76
+
77
+ #ifndef swap32
78
+ # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
79
+ |(((x)>>24)&0xFF) \
80
+ |(((x)&0x0000FF00)<<8) \
81
+ |(((x)&0x00FF0000)>>8)))
82
+ #endif
83
+
84
+ #ifndef swap64
85
+ # define byte_in_64bit(n) ((uint64_t)0xff << (n))
86
+ # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
87
+ |(((x)>>56)&0xFF) \
88
+ |(((x)&byte_in_64bit(8))<<40) \
89
+ |(((x)&byte_in_64bit(48))>>40) \
90
+ |(((x)&byte_in_64bit(16))<<24) \
91
+ |(((x)&byte_in_64bit(40))>>24) \
92
+ |(((x)&byte_in_64bit(24))<<8) \
93
+ |(((x)&byte_in_64bit(32))>>8)))
94
+ #endif
95
+
96
+ // NOTE can not use sizeof in preprocessor
97
+ #define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
98
+ #define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
99
+
100
+ #define CAST(var, ty) *((ty*)(&(var)))
101
+
102
+ #include "bspec_exec.inc"
103
+
104
+ static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
105
+ ZScan* p = rb_check_typeddata(self, zscan_type);
106
+ if (!rb_enc_str_asciicompat_p(p->s)) {
107
+ rb_raise(rb_eRuntimeError, "encoding of source string should be ascii-compatible");
108
+ return Qnil;
109
+ }
110
+ BSpec* bs = rb_check_typeddata(spec, &bspec_type);
111
+ if (bs->a_size == 0) {
112
+ return rb_ary_new();
113
+ }
114
+ long s_size = bs->s_size;
115
+ if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
116
+ return Qnil;
117
+ }
118
+ volatile VALUE a = rb_ary_new2(bs->a_size - 1);
119
+ bspec_exec(bs->code, RSTRING_PTR(p->s) + p->bytepos, a);
120
+ p->bytepos += s_size;
121
+ p->pos += s_size;
122
+ return a;
123
+ }
124
+
125
+ void Init_zscan_bspec(VALUE zscan, const rb_data_type_t* _zscan_type) {
126
+ zscan_type = _zscan_type;
127
+ rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
128
+
129
+ VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
130
+ rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
131
+ rb_define_alloc_func(bs, bspec_alloc);
132
+ rb_define_method(bs, "append", bspec_append, 2);
133
+
134
+ # include "bspec_opcode_names.inc"
135
+ void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
136
+ for (long i = 0; i < bspec_opcode_size; i++) {
137
+ VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
138
+ OBJ_FREEZE(bytecode);
139
+ rb_define_const(bs, bspec_opcode_names[i], bytecode);
140
+ }
141
+ }
@@ -1,3 +1,8 @@
1
1
  require "mkmf"
2
2
 
3
3
  create_makefile 'zscan'
4
+
5
+ headers = Dir.glob('*.h').join ' '
6
+ File.open 'Makefile', 'a' do |f|
7
+ f.puts "\n$(OBJS): #{headers}"
8
+ end
@@ -1,24 +1,6 @@
1
- #include <ruby/ruby.h>
2
- #include <ruby/re.h>
3
- #include <ruby/encoding.h>
4
- #include <ctype.h>
5
-
1
+ #include "zscan.h"
6
2
  // todo infect check
7
3
 
8
- typedef struct {
9
- long pos;
10
- long bytepos;
11
- } Pos;
12
-
13
- typedef struct {
14
- long pos;
15
- long bytepos;
16
- VALUE s;
17
- long stack_i;
18
- long stack_cap;
19
- Pos* stack;
20
- } ZScan;
21
-
22
4
  static void zscan_mark(void* pp) {
23
5
  ZScan* p = pp;
24
6
  rb_gc_mark(p->s);
@@ -282,6 +264,16 @@ static VALUE zscan_try(VALUE self) {
282
264
  return r;
283
265
  }
284
266
 
267
+ // optimized version without pushing and block
268
+ static VALUE zscan__try(VALUE self, VALUE r) {
269
+ if (RTEST(r)) {
270
+ zscan_drop(self);
271
+ } else {
272
+ zscan_pop(self);
273
+ }
274
+ return r;
275
+ }
276
+
285
277
  static VALUE zscan_zero_or_one(int argc, VALUE* argv, VALUE self) {
286
278
  REQUIRE_BLOCK;
287
279
  volatile VALUE a = Qnil;
@@ -380,76 +372,7 @@ VALUE zscan_scan_float(VALUE self) {
380
372
  }
381
373
  }
382
374
 
383
- static VALUE bspec_big_endian_p(VALUE self) {
384
- # ifdef DYNAMIC_ENDIAN
385
- /* for universal binary of NEXTSTEP and MacOS X */
386
- int init = 1;
387
- char* p = (char*)&init;
388
- return p[0] ? Qfalse : Qtrue;
389
- # elif defined(WORDS_BIGENDIAN)
390
- return Qtrue;
391
- #else
392
- return Qfalse;
393
- #endif
394
- }
395
-
396
- #define GCC_VERSION_SINCE(major, minor, patchlevel) \
397
- (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
398
- ((__GNUC__ > (major)) || \
399
- (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
400
- (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
401
-
402
- #if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
403
- # define swap32(x) __builtin_bswap32(x)
404
- # define swap64(x) __builtin_bswap64(x)
405
- #endif
406
-
407
- #ifndef swap16
408
- # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
409
- #endif
410
-
411
- #ifndef swap32
412
- # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
413
- |(((x)>>24)&0xFF) \
414
- |(((x)&0x0000FF00)<<8) \
415
- |(((x)&0x00FF0000)>>8) ))
416
- #endif
417
-
418
- #ifndef swap64
419
- # ifdef HAVE_INT64_T
420
- # define byte_in_64bit(n) ((uint64_t)0xff << (n))
421
- # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
422
- |(((x)>>56)&0xFF) \
423
- |(((x)&byte_in_64bit(8))<<40) \
424
- |(((x)&byte_in_64bit(48))>>40) \
425
- |(((x)&byte_in_64bit(16))<<24) \
426
- |(((x)&byte_in_64bit(40))>>24) \
427
- |(((x)&byte_in_64bit(24))<<8) \
428
- |(((x)&byte_in_64bit(32))>>8)))
429
- # endif
430
- #endif
431
-
432
- // NOTE can not use sizeof in preprocessor
433
- #define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
434
- #define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
435
-
436
- #define CAST(var, ty) *((ty*)(&(var)))
437
-
438
- #include "bspec_exec.inc"
439
-
440
- static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
441
- P;
442
- long s_size = NUM2LONG(rb_iv_get(spec, "@s_size"));
443
- if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
444
- return Qnil;
445
- }
446
- VALUE code = rb_iv_get(spec, "@code");
447
- long a_size = RSTRING_LEN(code) / sizeof(void*);
448
- volatile VALUE a = rb_ary_new2(a_size);
449
- bspec_exec((void**)RSTRING_PTR(code), RSTRING_PTR(p->s) + p->bytepos, a);
450
- zscan_bytepos_eq(self, LONG2NUM(p->bytepos + s_size));
451
- return a;
452
- }
375
+ extern void Init_zscan_bspec(VALUE, const rb_data_type_t*);
453
376
 
454
377
  void Init_zscan() {
455
378
  VALUE zscan = rb_define_class("ZScan", rb_cObject);
@@ -471,21 +394,11 @@ void Init_zscan() {
471
394
  rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
472
395
 
473
396
  rb_define_method(zscan, "try", zscan_try, 0);
397
+ rb_define_method(zscan, "_try", zscan__try, 1);
474
398
  rb_define_method(zscan, "zero_or_one", zscan_zero_or_one, -1);
475
399
  rb_define_method(zscan, "zero_or_more", zscan_zero_or_more, -1);
476
400
  rb_define_method(zscan, "one_or_more", zscan_one_or_more, -1);
477
401
 
478
402
  rb_define_method(zscan, "scan_float", zscan_scan_float, 0);
479
- rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
480
-
481
- VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
482
- rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
483
-
484
- # include "bspec_opcode_names.inc"
485
- void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
486
- for (long i = 0; i < bspec_opcode_size; i++) {
487
- VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
488
- OBJ_FREEZE(bytecode);
489
- rb_define_const(bs, bspec_opcode_names[i], bytecode);
490
- }
403
+ Init_zscan_bspec(zscan, &zscan_type);
491
404
  }
@@ -0,0 +1,19 @@
1
+ #pragma once
2
+ #include <ruby/ruby.h>
3
+ #include <ruby/re.h>
4
+ #include <ruby/encoding.h>
5
+ #include <ctype.h>
6
+
7
+ typedef struct {
8
+ long pos;
9
+ long bytepos;
10
+ } Pos;
11
+
12
+ typedef struct {
13
+ long pos;
14
+ long bytepos;
15
+ VALUE s;
16
+ long stack_i;
17
+ long stack_cap;
18
+ Pos* stack;
19
+ } ZScan;
@@ -3,7 +3,7 @@ require_relative "zscan/instructions"
3
3
  require "date"
4
4
 
5
5
  class ZScan
6
- VERSION = '1.1'
6
+ VERSION = '1.2'
7
7
 
8
8
  def initialize s, dup=false
9
9
  if s.encoding.ascii_compatible?
@@ -139,18 +139,16 @@ class ZScan
139
139
  end
140
140
 
141
141
  def self.binary_spec &p
142
- bs = BinarySpec.new
142
+ bs = BinarySpec.send :new
143
143
  bs.instance_eval &p
144
- bs.instance_variable_get(:@code) << BinarySpec::RET
144
+ bs.send :append, BinarySpec::RET, 0
145
145
  bs
146
146
  end
147
147
 
148
148
  class BinarySpec
149
- BLANK = ''.force_encoding 'binary'
150
-
151
- def initialize
152
- @code = BLANK.dup
153
- @s_size = 0
149
+ private :append
150
+ class << self
151
+ private :new
154
152
  end
155
153
  end
156
154
 
@@ -3,127 +3,109 @@ class ZScan::BinarySpec
3
3
  def int8 n=1
4
4
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
5
5
  n.times do
6
- @code << INT8
7
- @s_size += 1
6
+ append INT8, 1
8
7
  end
9
8
  end
10
9
  def int16 n=1
11
10
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
12
11
  n.times do
13
- @code << INT16
14
- @s_size += 2
12
+ append INT16, 2
15
13
  end
16
14
  end
17
15
  def int16_swap n=1
18
16
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
19
17
  n.times do
20
- @code << INT16_SWAP
21
- @s_size += 2
18
+ append INT16_SWAP, 2
22
19
  end
23
20
  end
24
21
  def int32 n=1
25
22
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
26
23
  n.times do
27
- @code << INT32
28
- @s_size += 4
24
+ append INT32, 4
29
25
  end
30
26
  end
31
27
  def int32_swap n=1
32
28
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
33
29
  n.times do
34
- @code << INT32_SWAP
35
- @s_size += 4
30
+ append INT32_SWAP, 4
36
31
  end
37
32
  end
38
33
  def int64 n=1
39
34
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
40
35
  n.times do
41
- @code << INT64
42
- @s_size += 8
36
+ append INT64, 8
43
37
  end
44
38
  end
45
39
  def int64_swap n=1
46
40
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
47
41
  n.times do
48
- @code << INT64_SWAP
49
- @s_size += 8
42
+ append INT64_SWAP, 8
50
43
  end
51
44
  end
52
45
  def uint8 n=1
53
46
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
54
47
  n.times do
55
- @code << UINT8
56
- @s_size += 1
48
+ append UINT8, 1
57
49
  end
58
50
  end
59
51
  def uint16 n=1
60
52
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
61
53
  n.times do
62
- @code << UINT16
63
- @s_size += 2
54
+ append UINT16, 2
64
55
  end
65
56
  end
66
57
  def uint16_swap n=1
67
58
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
68
59
  n.times do
69
- @code << UINT16_SWAP
70
- @s_size += 2
60
+ append UINT16_SWAP, 2
71
61
  end
72
62
  end
73
63
  def uint32 n=1
74
64
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
75
65
  n.times do
76
- @code << UINT32
77
- @s_size += 4
66
+ append UINT32, 4
78
67
  end
79
68
  end
80
69
  def uint32_swap n=1
81
70
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
82
71
  n.times do
83
- @code << UINT32_SWAP
84
- @s_size += 4
72
+ append UINT32_SWAP, 4
85
73
  end
86
74
  end
87
75
  def uint64 n=1
88
76
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
89
77
  n.times do
90
- @code << UINT64
91
- @s_size += 8
78
+ append UINT64, 8
92
79
  end
93
80
  end
94
81
  def uint64_swap n=1
95
82
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
96
83
  n.times do
97
- @code << UINT64_SWAP
98
- @s_size += 8
84
+ append UINT64_SWAP, 8
99
85
  end
100
86
  end
101
87
  def single n=1
102
88
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
103
89
  n.times do
104
- @code << SINGLE
105
- @s_size += 4
90
+ append SINGLE, 4
106
91
  end
107
92
  end
108
93
  def single_swap n=1
109
94
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
110
95
  n.times do
111
- @code << SINGLE_SWAP
112
- @s_size += 4
96
+ append SINGLE_SWAP, 4
113
97
  end
114
98
  end
115
99
  def double n=1
116
100
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
117
101
  n.times do
118
- @code << DOUBLE
119
- @s_size += 8
102
+ append DOUBLE, 8
120
103
  end
121
104
  end
122
105
  def double_swap n=1
123
106
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
124
107
  n.times do
125
- @code << DOUBLE_SWAP
126
- @s_size += 8
108
+ append DOUBLE_SWAP, 8
127
109
  end
128
110
  end
129
111
  if ZScan::BinarySpec.big_endian?
data/rakefile CHANGED
@@ -143,8 +143,7 @@ file 'lib/zscan/instructions.rb' => __FILE__ do
143
143
  def #{ins.downcase} n=1
144
144
  raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
145
145
  n.times do
146
- @code << #{ins}
147
- @s_size += #{bspec_incr ins}
146
+ append #{ins}, #{bspec_incr ins}
148
147
  end
149
148
  end
150
149
  RUBY
data/readme.md CHANGED
@@ -23,7 +23,7 @@ z.scan /\w+/ #=> 'world'
23
23
  z.eos? #=> true
24
24
  ```
25
25
 
26
- ## Motivation - `StringScanner`
26
+ ## Motivation: string scanner
27
27
 
28
28
  Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
29
29
 
@@ -47,7 +47,7 @@ z.scan /^/ #=> nil
47
47
 
48
48
  See also https://bugs.ruby-lang.org/issues/7092
49
49
 
50
- ## Other motivations - `scanf` / `strptime` / `unpack`
50
+ ## Other motivations
51
51
 
52
52
  - For scan and convert, ruby's stdlib `Scanf` is slow (creates regexp array everytime called) and not possible to corporate with scanner.
53
53
  - For date parsing, `strptime` doesn't tell the parsed length.
@@ -77,15 +77,6 @@ For convienience
77
77
  - `#size`
78
78
  - `#bytesize`
79
79
 
80
- ## Parsing combinators
81
-
82
- Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration. Can be nested, useful for building parsers.
83
-
84
- - `#try &block` returns `block`'s return.
85
- - `#zero_or_one result=[], &block` try to execute 0 or 1 time, returns `result`.
86
- - `#zero_or_more result=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `result`.
87
- - `#one_or_more result=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `result`.
88
-
89
80
  ## Pos management
90
81
 
91
82
  - `#pos`
@@ -97,15 +88,7 @@ Combinators that manage scanner pos and stack state for you. In the combinators,
97
88
  - `#reset` go to beginning.
98
89
  - `#terminate` go to end of string.
99
90
 
100
- ## (Low level) Efficient pos stack manipulation
101
-
102
- - `#push` push current pos into the stack.
103
- - `#pop` set current pos to top of the stack, and pop it.
104
- - `#drop` drop top of pos stack without changing current pos.
105
- - `#restore` set current pos to top of the stack.
106
- - `#clear_pos_stack` clear pos stack.
107
-
108
- ## `ZScan::BinarySpec`
91
+ ## Binary parsing
109
92
 
110
93
  Specify a sequence of binary data. Designed for binary protocol parsing. Example:
111
94
 
@@ -150,9 +133,25 @@ Endians:
150
133
 
151
134
  Repeat count must be integer `>= 1`, default is `1`.
152
135
 
153
- It is implemented as a direct-threaded bytecode interpreter. Performance vs `String#unpack`:
136
+ It is implemented as a direct-threaded bytecode interpreter. A bit faster than `String#unpack`.
154
137
 
155
- todo
138
+ ## Parsing combinators
139
+
140
+ Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration and restores scanner location. Can be nested, useful for building parsers.
141
+
142
+ - `#try &block` returns `block`'s return.
143
+ - `#zero_or_one acc=[], &block` try to execute 0 or 1 time, returns `acc`.
144
+ - `#zero_or_more acc=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `acc`.
145
+ - `#one_or_more acc=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `acc`.
146
+
147
+ ## (Low level) Efficient pos stack manipulation
148
+
149
+ - `#push` push current pos into the stack.
150
+ - `#pop` set current pos to top of the stack, and pop it.
151
+ - `#drop` drop top of pos stack without changing current pos.
152
+ - `#restore` set current pos to top of the stack.
153
+ - `#clear_pos_stack` clear pos stack.
154
+ - `z.push._try expr` equivalent to `z.try{ expr }`, but faster because no block is required
156
155
 
157
156
  ## License
158
157
 
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require_relative "spec_helper"
2
3
 
3
4
  describe 'ZScan binary scanning methods' do
@@ -11,7 +12,7 @@ describe 'ZScan binary scanning methods' do
11
12
  assert_equal nil, (z.unpack 'I')
12
13
  assert_equal 2, z.pos
13
14
  end
14
-
15
+
15
16
  it "#scan_binary" do
16
17
  s = ZScan.binary_spec do
17
18
  int8 # once
@@ -19,6 +20,7 @@ describe 'ZScan binary scanning methods' do
19
20
  double_be 1 # big endian, once
20
21
  single 1
21
22
  end
23
+
22
24
  a = [-1, 2, 3, 4.0, 3.0]
23
25
  z = ZScan.new(a.pack('cI<2Gf') + 'rest')
24
26
  b = z.scan_binary s
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.1" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.2" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
  s.required_ruby_version = ">=1.9.2"
10
10
  s.licenses = ['BSD']
11
11
 
12
- s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c,inc}}')
12
+ s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,h,c,inc}}')
13
13
  s.require_paths = ["lib"]
14
14
  s.extensions = ["ext/extconf.rb"]
15
15
  s.rubygems_version = '1.8.24'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.1'
4
+ version: '1.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-14 00:00:00.000000000 Z
11
+ date: 2013-05-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -21,6 +21,7 @@ files:
21
21
  - rakefile
22
22
  - zscan.gemspec
23
23
  - readme.md
24
+ - benchmark/one-or-more.rb
24
25
  - benchmark/vs-strscan.rb
25
26
  - benchmark/vs-unpack.rb
26
27
  - ext/extconf.rb
@@ -31,6 +32,8 @@ files:
31
32
  - spec/spec_helper.rb
32
33
  - spec/typed_scan_spec.rb
33
34
  - spec/zscan_spec.rb
35
+ - ext/zscan.h
36
+ - ext/bspec.c
34
37
  - ext/zscan.c
35
38
  - ext/bspec_exec.inc
36
39
  - ext/bspec_opcode_names.inc
@@ -59,3 +62,4 @@ signing_key:
59
62
  specification_version: 4
60
63
  summary: improved string scanner
61
64
  test_files: []
65
+ has_rdoc: false