zscan 1.1 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/benchmark/one-or-more.rb +19 -0
- data/ext/bspec.c +141 -0
- data/ext/extconf.rb +5 -0
- data/ext/zscan.c +14 -101
- data/ext/zscan.h +19 -0
- data/lib/zscan.rb +6 -8
- data/lib/zscan/instructions.rb +18 -36
- data/rakefile +1 -2
- data/readme.md +21 -22
- data/spec/binary_scan_spec.rb +3 -1
- data/zscan.gemspec +2 -2
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71174140986194386fd74477637f21f95810d704
|
4
|
+
data.tar.gz: 72706410ead67f6cd9af96a7bf7024c72d7ef8c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c9b9a9516dfa9b5ced99820f66976868d2c0a521df1eb35a517d262d3519601bb28b2a5e95797de411398b22bea3d9aa504d4af96f1634c1f9353d873d134d0
|
7
|
+
data.tar.gz: d3c3bcc5255f1909acafdf4bb1977d0ac75d73a6e837af0382cc5dd57d4a1e98443898bd49252e9d2d0b9ee3c7bc3eb0561c423bea4a91feb44bccf6138ac86f
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative "../lib/zscan"
|
2
|
+
require "benchmark"
|
3
|
+
|
4
|
+
z = ZScan.new 'a' * 100
|
5
|
+
puts Benchmark.measure{
|
6
|
+
1000.times{
|
7
|
+
z.pos = 0
|
8
|
+
z.one_or_more{
|
9
|
+
z.scan 'a'
|
10
|
+
}
|
11
|
+
}
|
12
|
+
}
|
13
|
+
|
14
|
+
puts Benchmark.measure{
|
15
|
+
1000.times{
|
16
|
+
z.pos = 0
|
17
|
+
z.scan /a+/
|
18
|
+
}
|
19
|
+
}
|
data/ext/bspec.c
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
#include "zscan.h"
|
2
|
+
|
3
|
+
static const rb_data_type_t* zscan_type;
|
4
|
+
|
5
|
+
typedef struct {
|
6
|
+
long s_size;
|
7
|
+
long a_size;
|
8
|
+
long a_cap;
|
9
|
+
void** code;
|
10
|
+
} BSpec;
|
11
|
+
|
12
|
+
static void bspec_free(void* pp) {
|
13
|
+
BSpec* p = pp;
|
14
|
+
free(p->code);
|
15
|
+
free(p);
|
16
|
+
}
|
17
|
+
|
18
|
+
static size_t bspec_memsize(const void* pp) {
|
19
|
+
const BSpec* p = pp;
|
20
|
+
return p ? sizeof(*p) : 0;
|
21
|
+
}
|
22
|
+
|
23
|
+
static const rb_data_type_t bspec_type = {
|
24
|
+
"ZScan::BinarySpec",
|
25
|
+
{NULL, bspec_free, bspec_memsize}
|
26
|
+
};
|
27
|
+
|
28
|
+
static VALUE bspec_alloc(VALUE klass) {
|
29
|
+
BSpec* bs = (BSpec*)malloc(sizeof(BSpec));
|
30
|
+
bs->s_size = 0;
|
31
|
+
bs->a_cap = 4;
|
32
|
+
bs->a_size = 0;
|
33
|
+
bs->code = (void**)malloc(bs->a_cap * sizeof(void*));
|
34
|
+
return TypedData_Wrap_Struct(klass, &bspec_type, bs);
|
35
|
+
}
|
36
|
+
|
37
|
+
static VALUE bspec_append(VALUE self, VALUE v_code, VALUE v_s_size) {
|
38
|
+
BSpec* bs = rb_check_typeddata(self, &bspec_type);
|
39
|
+
if (bs->a_size == bs->a_cap) {
|
40
|
+
bs->a_cap *= 2;
|
41
|
+
bs->code = (void**)realloc(bs->code, bs->a_cap * sizeof(void*));
|
42
|
+
}
|
43
|
+
long s_size = NUM2LONG(v_s_size);
|
44
|
+
bs->code[bs->a_size++] = ((void**)RSTRING_PTR(v_code))[0];
|
45
|
+
bs->s_size += s_size;
|
46
|
+
return self;
|
47
|
+
}
|
48
|
+
|
49
|
+
static VALUE bspec_big_endian_p(VALUE self) {
|
50
|
+
# ifdef DYNAMIC_ENDIAN
|
51
|
+
/* for universal binary of NEXTSTEP and MacOS X */
|
52
|
+
int init = 1;
|
53
|
+
char* p = (char*)&init;
|
54
|
+
return p[0] ? Qfalse : Qtrue;
|
55
|
+
# elif defined(WORDS_BIGENDIAN)
|
56
|
+
return Qtrue;
|
57
|
+
#else
|
58
|
+
return Qfalse;
|
59
|
+
#endif
|
60
|
+
}
|
61
|
+
|
62
|
+
#define GCC_VERSION_SINCE(major, minor, patchlevel) \
|
63
|
+
(defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
|
64
|
+
((__GNUC__ > (major)) || \
|
65
|
+
(__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
|
66
|
+
(__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
|
67
|
+
|
68
|
+
#if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
|
69
|
+
# define swap32(x) __builtin_bswap32(x)
|
70
|
+
# define swap64(x) __builtin_bswap64(x)
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#ifndef swap16
|
74
|
+
# define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#ifndef swap32
|
78
|
+
# define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
|
79
|
+
|(((x)>>24)&0xFF) \
|
80
|
+
|(((x)&0x0000FF00)<<8) \
|
81
|
+
|(((x)&0x00FF0000)>>8)))
|
82
|
+
#endif
|
83
|
+
|
84
|
+
#ifndef swap64
|
85
|
+
# define byte_in_64bit(n) ((uint64_t)0xff << (n))
|
86
|
+
# define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
|
87
|
+
|(((x)>>56)&0xFF) \
|
88
|
+
|(((x)&byte_in_64bit(8))<<40) \
|
89
|
+
|(((x)&byte_in_64bit(48))>>40) \
|
90
|
+
|(((x)&byte_in_64bit(16))<<24) \
|
91
|
+
|(((x)&byte_in_64bit(40))>>24) \
|
92
|
+
|(((x)&byte_in_64bit(24))<<8) \
|
93
|
+
|(((x)&byte_in_64bit(32))>>8)))
|
94
|
+
#endif
|
95
|
+
|
96
|
+
// NOTE can not use sizeof in preprocessor
|
97
|
+
#define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
|
98
|
+
#define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
|
99
|
+
|
100
|
+
#define CAST(var, ty) *((ty*)(&(var)))
|
101
|
+
|
102
|
+
#include "bspec_exec.inc"
|
103
|
+
|
104
|
+
static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
|
105
|
+
ZScan* p = rb_check_typeddata(self, zscan_type);
|
106
|
+
if (!rb_enc_str_asciicompat_p(p->s)) {
|
107
|
+
rb_raise(rb_eRuntimeError, "encoding of source string should be ascii-compatible");
|
108
|
+
return Qnil;
|
109
|
+
}
|
110
|
+
BSpec* bs = rb_check_typeddata(spec, &bspec_type);
|
111
|
+
if (bs->a_size == 0) {
|
112
|
+
return rb_ary_new();
|
113
|
+
}
|
114
|
+
long s_size = bs->s_size;
|
115
|
+
if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
|
116
|
+
return Qnil;
|
117
|
+
}
|
118
|
+
volatile VALUE a = rb_ary_new2(bs->a_size - 1);
|
119
|
+
bspec_exec(bs->code, RSTRING_PTR(p->s) + p->bytepos, a);
|
120
|
+
p->bytepos += s_size;
|
121
|
+
p->pos += s_size;
|
122
|
+
return a;
|
123
|
+
}
|
124
|
+
|
125
|
+
void Init_zscan_bspec(VALUE zscan, const rb_data_type_t* _zscan_type) {
|
126
|
+
zscan_type = _zscan_type;
|
127
|
+
rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
|
128
|
+
|
129
|
+
VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
|
130
|
+
rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
|
131
|
+
rb_define_alloc_func(bs, bspec_alloc);
|
132
|
+
rb_define_method(bs, "append", bspec_append, 2);
|
133
|
+
|
134
|
+
# include "bspec_opcode_names.inc"
|
135
|
+
void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
|
136
|
+
for (long i = 0; i < bspec_opcode_size; i++) {
|
137
|
+
VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
|
138
|
+
OBJ_FREEZE(bytecode);
|
139
|
+
rb_define_const(bs, bspec_opcode_names[i], bytecode);
|
140
|
+
}
|
141
|
+
}
|
data/ext/extconf.rb
CHANGED
data/ext/zscan.c
CHANGED
@@ -1,24 +1,6 @@
|
|
1
|
-
#include
|
2
|
-
#include <ruby/re.h>
|
3
|
-
#include <ruby/encoding.h>
|
4
|
-
#include <ctype.h>
|
5
|
-
|
1
|
+
#include "zscan.h"
|
6
2
|
// todo infect check
|
7
3
|
|
8
|
-
typedef struct {
|
9
|
-
long pos;
|
10
|
-
long bytepos;
|
11
|
-
} Pos;
|
12
|
-
|
13
|
-
typedef struct {
|
14
|
-
long pos;
|
15
|
-
long bytepos;
|
16
|
-
VALUE s;
|
17
|
-
long stack_i;
|
18
|
-
long stack_cap;
|
19
|
-
Pos* stack;
|
20
|
-
} ZScan;
|
21
|
-
|
22
4
|
static void zscan_mark(void* pp) {
|
23
5
|
ZScan* p = pp;
|
24
6
|
rb_gc_mark(p->s);
|
@@ -282,6 +264,16 @@ static VALUE zscan_try(VALUE self) {
|
|
282
264
|
return r;
|
283
265
|
}
|
284
266
|
|
267
|
+
// optimized version without pushing and block
|
268
|
+
static VALUE zscan__try(VALUE self, VALUE r) {
|
269
|
+
if (RTEST(r)) {
|
270
|
+
zscan_drop(self);
|
271
|
+
} else {
|
272
|
+
zscan_pop(self);
|
273
|
+
}
|
274
|
+
return r;
|
275
|
+
}
|
276
|
+
|
285
277
|
static VALUE zscan_zero_or_one(int argc, VALUE* argv, VALUE self) {
|
286
278
|
REQUIRE_BLOCK;
|
287
279
|
volatile VALUE a = Qnil;
|
@@ -380,76 +372,7 @@ VALUE zscan_scan_float(VALUE self) {
|
|
380
372
|
}
|
381
373
|
}
|
382
374
|
|
383
|
-
|
384
|
-
# ifdef DYNAMIC_ENDIAN
|
385
|
-
/* for universal binary of NEXTSTEP and MacOS X */
|
386
|
-
int init = 1;
|
387
|
-
char* p = (char*)&init;
|
388
|
-
return p[0] ? Qfalse : Qtrue;
|
389
|
-
# elif defined(WORDS_BIGENDIAN)
|
390
|
-
return Qtrue;
|
391
|
-
#else
|
392
|
-
return Qfalse;
|
393
|
-
#endif
|
394
|
-
}
|
395
|
-
|
396
|
-
#define GCC_VERSION_SINCE(major, minor, patchlevel) \
|
397
|
-
(defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
|
398
|
-
((__GNUC__ > (major)) || \
|
399
|
-
(__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
|
400
|
-
(__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
|
401
|
-
|
402
|
-
#if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
|
403
|
-
# define swap32(x) __builtin_bswap32(x)
|
404
|
-
# define swap64(x) __builtin_bswap64(x)
|
405
|
-
#endif
|
406
|
-
|
407
|
-
#ifndef swap16
|
408
|
-
# define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
|
409
|
-
#endif
|
410
|
-
|
411
|
-
#ifndef swap32
|
412
|
-
# define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
|
413
|
-
|(((x)>>24)&0xFF) \
|
414
|
-
|(((x)&0x0000FF00)<<8) \
|
415
|
-
|(((x)&0x00FF0000)>>8) ))
|
416
|
-
#endif
|
417
|
-
|
418
|
-
#ifndef swap64
|
419
|
-
# ifdef HAVE_INT64_T
|
420
|
-
# define byte_in_64bit(n) ((uint64_t)0xff << (n))
|
421
|
-
# define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
|
422
|
-
|(((x)>>56)&0xFF) \
|
423
|
-
|(((x)&byte_in_64bit(8))<<40) \
|
424
|
-
|(((x)&byte_in_64bit(48))>>40) \
|
425
|
-
|(((x)&byte_in_64bit(16))<<24) \
|
426
|
-
|(((x)&byte_in_64bit(40))>>24) \
|
427
|
-
|(((x)&byte_in_64bit(24))<<8) \
|
428
|
-
|(((x)&byte_in_64bit(32))>>8)))
|
429
|
-
# endif
|
430
|
-
#endif
|
431
|
-
|
432
|
-
// NOTE can not use sizeof in preprocessor
|
433
|
-
#define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
|
434
|
-
#define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
|
435
|
-
|
436
|
-
#define CAST(var, ty) *((ty*)(&(var)))
|
437
|
-
|
438
|
-
#include "bspec_exec.inc"
|
439
|
-
|
440
|
-
static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
|
441
|
-
P;
|
442
|
-
long s_size = NUM2LONG(rb_iv_get(spec, "@s_size"));
|
443
|
-
if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
|
444
|
-
return Qnil;
|
445
|
-
}
|
446
|
-
VALUE code = rb_iv_get(spec, "@code");
|
447
|
-
long a_size = RSTRING_LEN(code) / sizeof(void*);
|
448
|
-
volatile VALUE a = rb_ary_new2(a_size);
|
449
|
-
bspec_exec((void**)RSTRING_PTR(code), RSTRING_PTR(p->s) + p->bytepos, a);
|
450
|
-
zscan_bytepos_eq(self, LONG2NUM(p->bytepos + s_size));
|
451
|
-
return a;
|
452
|
-
}
|
375
|
+
extern void Init_zscan_bspec(VALUE, const rb_data_type_t*);
|
453
376
|
|
454
377
|
void Init_zscan() {
|
455
378
|
VALUE zscan = rb_define_class("ZScan", rb_cObject);
|
@@ -471,21 +394,11 @@ void Init_zscan() {
|
|
471
394
|
rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
|
472
395
|
|
473
396
|
rb_define_method(zscan, "try", zscan_try, 0);
|
397
|
+
rb_define_method(zscan, "_try", zscan__try, 1);
|
474
398
|
rb_define_method(zscan, "zero_or_one", zscan_zero_or_one, -1);
|
475
399
|
rb_define_method(zscan, "zero_or_more", zscan_zero_or_more, -1);
|
476
400
|
rb_define_method(zscan, "one_or_more", zscan_one_or_more, -1);
|
477
401
|
|
478
402
|
rb_define_method(zscan, "scan_float", zscan_scan_float, 0);
|
479
|
-
|
480
|
-
|
481
|
-
VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
|
482
|
-
rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
|
483
|
-
|
484
|
-
# include "bspec_opcode_names.inc"
|
485
|
-
void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
|
486
|
-
for (long i = 0; i < bspec_opcode_size; i++) {
|
487
|
-
VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
|
488
|
-
OBJ_FREEZE(bytecode);
|
489
|
-
rb_define_const(bs, bspec_opcode_names[i], bytecode);
|
490
|
-
}
|
403
|
+
Init_zscan_bspec(zscan, &zscan_type);
|
491
404
|
}
|
data/ext/zscan.h
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include <ruby/ruby.h>
|
3
|
+
#include <ruby/re.h>
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
|
7
|
+
typedef struct {
|
8
|
+
long pos;
|
9
|
+
long bytepos;
|
10
|
+
} Pos;
|
11
|
+
|
12
|
+
typedef struct {
|
13
|
+
long pos;
|
14
|
+
long bytepos;
|
15
|
+
VALUE s;
|
16
|
+
long stack_i;
|
17
|
+
long stack_cap;
|
18
|
+
Pos* stack;
|
19
|
+
} ZScan;
|
data/lib/zscan.rb
CHANGED
@@ -3,7 +3,7 @@ require_relative "zscan/instructions"
|
|
3
3
|
require "date"
|
4
4
|
|
5
5
|
class ZScan
|
6
|
-
VERSION = '1.
|
6
|
+
VERSION = '1.2'
|
7
7
|
|
8
8
|
def initialize s, dup=false
|
9
9
|
if s.encoding.ascii_compatible?
|
@@ -139,18 +139,16 @@ class ZScan
|
|
139
139
|
end
|
140
140
|
|
141
141
|
def self.binary_spec &p
|
142
|
-
bs = BinarySpec.new
|
142
|
+
bs = BinarySpec.send :new
|
143
143
|
bs.instance_eval &p
|
144
|
-
bs.
|
144
|
+
bs.send :append, BinarySpec::RET, 0
|
145
145
|
bs
|
146
146
|
end
|
147
147
|
|
148
148
|
class BinarySpec
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
@code = BLANK.dup
|
153
|
-
@s_size = 0
|
149
|
+
private :append
|
150
|
+
class << self
|
151
|
+
private :new
|
154
152
|
end
|
155
153
|
end
|
156
154
|
|
data/lib/zscan/instructions.rb
CHANGED
@@ -3,127 +3,109 @@ class ZScan::BinarySpec
|
|
3
3
|
def int8 n=1
|
4
4
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
5
5
|
n.times do
|
6
|
-
|
7
|
-
@s_size += 1
|
6
|
+
append INT8, 1
|
8
7
|
end
|
9
8
|
end
|
10
9
|
def int16 n=1
|
11
10
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
12
11
|
n.times do
|
13
|
-
|
14
|
-
@s_size += 2
|
12
|
+
append INT16, 2
|
15
13
|
end
|
16
14
|
end
|
17
15
|
def int16_swap n=1
|
18
16
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
19
17
|
n.times do
|
20
|
-
|
21
|
-
@s_size += 2
|
18
|
+
append INT16_SWAP, 2
|
22
19
|
end
|
23
20
|
end
|
24
21
|
def int32 n=1
|
25
22
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
26
23
|
n.times do
|
27
|
-
|
28
|
-
@s_size += 4
|
24
|
+
append INT32, 4
|
29
25
|
end
|
30
26
|
end
|
31
27
|
def int32_swap n=1
|
32
28
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
33
29
|
n.times do
|
34
|
-
|
35
|
-
@s_size += 4
|
30
|
+
append INT32_SWAP, 4
|
36
31
|
end
|
37
32
|
end
|
38
33
|
def int64 n=1
|
39
34
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
40
35
|
n.times do
|
41
|
-
|
42
|
-
@s_size += 8
|
36
|
+
append INT64, 8
|
43
37
|
end
|
44
38
|
end
|
45
39
|
def int64_swap n=1
|
46
40
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
47
41
|
n.times do
|
48
|
-
|
49
|
-
@s_size += 8
|
42
|
+
append INT64_SWAP, 8
|
50
43
|
end
|
51
44
|
end
|
52
45
|
def uint8 n=1
|
53
46
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
54
47
|
n.times do
|
55
|
-
|
56
|
-
@s_size += 1
|
48
|
+
append UINT8, 1
|
57
49
|
end
|
58
50
|
end
|
59
51
|
def uint16 n=1
|
60
52
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
61
53
|
n.times do
|
62
|
-
|
63
|
-
@s_size += 2
|
54
|
+
append UINT16, 2
|
64
55
|
end
|
65
56
|
end
|
66
57
|
def uint16_swap n=1
|
67
58
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
68
59
|
n.times do
|
69
|
-
|
70
|
-
@s_size += 2
|
60
|
+
append UINT16_SWAP, 2
|
71
61
|
end
|
72
62
|
end
|
73
63
|
def uint32 n=1
|
74
64
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
75
65
|
n.times do
|
76
|
-
|
77
|
-
@s_size += 4
|
66
|
+
append UINT32, 4
|
78
67
|
end
|
79
68
|
end
|
80
69
|
def uint32_swap n=1
|
81
70
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
82
71
|
n.times do
|
83
|
-
|
84
|
-
@s_size += 4
|
72
|
+
append UINT32_SWAP, 4
|
85
73
|
end
|
86
74
|
end
|
87
75
|
def uint64 n=1
|
88
76
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
89
77
|
n.times do
|
90
|
-
|
91
|
-
@s_size += 8
|
78
|
+
append UINT64, 8
|
92
79
|
end
|
93
80
|
end
|
94
81
|
def uint64_swap n=1
|
95
82
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
96
83
|
n.times do
|
97
|
-
|
98
|
-
@s_size += 8
|
84
|
+
append UINT64_SWAP, 8
|
99
85
|
end
|
100
86
|
end
|
101
87
|
def single n=1
|
102
88
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
103
89
|
n.times do
|
104
|
-
|
105
|
-
@s_size += 4
|
90
|
+
append SINGLE, 4
|
106
91
|
end
|
107
92
|
end
|
108
93
|
def single_swap n=1
|
109
94
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
110
95
|
n.times do
|
111
|
-
|
112
|
-
@s_size += 4
|
96
|
+
append SINGLE_SWAP, 4
|
113
97
|
end
|
114
98
|
end
|
115
99
|
def double n=1
|
116
100
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
117
101
|
n.times do
|
118
|
-
|
119
|
-
@s_size += 8
|
102
|
+
append DOUBLE, 8
|
120
103
|
end
|
121
104
|
end
|
122
105
|
def double_swap n=1
|
123
106
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
124
107
|
n.times do
|
125
|
-
|
126
|
-
@s_size += 8
|
108
|
+
append DOUBLE_SWAP, 8
|
127
109
|
end
|
128
110
|
end
|
129
111
|
if ZScan::BinarySpec.big_endian?
|
data/rakefile
CHANGED
@@ -143,8 +143,7 @@ file 'lib/zscan/instructions.rb' => __FILE__ do
|
|
143
143
|
def #{ins.downcase} n=1
|
144
144
|
raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
|
145
145
|
n.times do
|
146
|
-
|
147
|
-
@s_size += #{bspec_incr ins}
|
146
|
+
append #{ins}, #{bspec_incr ins}
|
148
147
|
end
|
149
148
|
end
|
150
149
|
RUBY
|
data/readme.md
CHANGED
@@ -23,7 +23,7 @@ z.scan /\w+/ #=> 'world'
|
|
23
23
|
z.eos? #=> true
|
24
24
|
```
|
25
25
|
|
26
|
-
## Motivation
|
26
|
+
## Motivation: string scanner
|
27
27
|
|
28
28
|
Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
|
29
29
|
|
@@ -47,7 +47,7 @@ z.scan /^/ #=> nil
|
|
47
47
|
|
48
48
|
See also https://bugs.ruby-lang.org/issues/7092
|
49
49
|
|
50
|
-
## Other motivations
|
50
|
+
## Other motivations
|
51
51
|
|
52
52
|
- For scan and convert, ruby's stdlib `Scanf` is slow (creates regexp array everytime called) and not possible to corporate with scanner.
|
53
53
|
- For date parsing, `strptime` doesn't tell the parsed length.
|
@@ -77,15 +77,6 @@ For convienience
|
|
77
77
|
- `#size`
|
78
78
|
- `#bytesize`
|
79
79
|
|
80
|
-
## Parsing combinators
|
81
|
-
|
82
|
-
Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration. Can be nested, useful for building parsers.
|
83
|
-
|
84
|
-
- `#try &block` returns `block`'s return.
|
85
|
-
- `#zero_or_one result=[], &block` try to execute 0 or 1 time, returns `result`.
|
86
|
-
- `#zero_or_more result=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `result`.
|
87
|
-
- `#one_or_more result=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `result`.
|
88
|
-
|
89
80
|
## Pos management
|
90
81
|
|
91
82
|
- `#pos`
|
@@ -97,15 +88,7 @@ Combinators that manage scanner pos and stack state for you. In the combinators,
|
|
97
88
|
- `#reset` go to beginning.
|
98
89
|
- `#terminate` go to end of string.
|
99
90
|
|
100
|
-
##
|
101
|
-
|
102
|
-
- `#push` push current pos into the stack.
|
103
|
-
- `#pop` set current pos to top of the stack, and pop it.
|
104
|
-
- `#drop` drop top of pos stack without changing current pos.
|
105
|
-
- `#restore` set current pos to top of the stack.
|
106
|
-
- `#clear_pos_stack` clear pos stack.
|
107
|
-
|
108
|
-
## `ZScan::BinarySpec`
|
91
|
+
## Binary parsing
|
109
92
|
|
110
93
|
Specify a sequence of binary data. Designed for binary protocol parsing. Example:
|
111
94
|
|
@@ -150,9 +133,25 @@ Endians:
|
|
150
133
|
|
151
134
|
Repeat count must be integer `>= 1`, default is `1`.
|
152
135
|
|
153
|
-
It is implemented as a direct-threaded bytecode interpreter.
|
136
|
+
It is implemented as a direct-threaded bytecode interpreter. A bit faster than `String#unpack`.
|
154
137
|
|
155
|
-
|
138
|
+
## Parsing combinators
|
139
|
+
|
140
|
+
Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration and restores scanner location. Can be nested, useful for building parsers.
|
141
|
+
|
142
|
+
- `#try &block` returns `block`'s return.
|
143
|
+
- `#zero_or_one acc=[], &block` try to execute 0 or 1 time, returns `acc`.
|
144
|
+
- `#zero_or_more acc=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `acc`.
|
145
|
+
- `#one_or_more acc=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `acc`.
|
146
|
+
|
147
|
+
## (Low level) Efficient pos stack manipulation
|
148
|
+
|
149
|
+
- `#push` push current pos into the stack.
|
150
|
+
- `#pop` set current pos to top of the stack, and pop it.
|
151
|
+
- `#drop` drop top of pos stack without changing current pos.
|
152
|
+
- `#restore` set current pos to top of the stack.
|
153
|
+
- `#clear_pos_stack` clear pos stack.
|
154
|
+
- `z.push._try expr` equivalent to `z.try{ expr }`, but faster because no block is required
|
156
155
|
|
157
156
|
## License
|
158
157
|
|
data/spec/binary_scan_spec.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require_relative "spec_helper"
|
2
3
|
|
3
4
|
describe 'ZScan binary scanning methods' do
|
@@ -11,7 +12,7 @@ describe 'ZScan binary scanning methods' do
|
|
11
12
|
assert_equal nil, (z.unpack 'I')
|
12
13
|
assert_equal 2, z.pos
|
13
14
|
end
|
14
|
-
|
15
|
+
|
15
16
|
it "#scan_binary" do
|
16
17
|
s = ZScan.binary_spec do
|
17
18
|
int8 # once
|
@@ -19,6 +20,7 @@ describe 'ZScan binary scanning methods' do
|
|
19
20
|
double_be 1 # big endian, once
|
20
21
|
single 1
|
21
22
|
end
|
23
|
+
|
22
24
|
a = [-1, 2, 3, 4.0, 3.0]
|
23
25
|
z = ZScan.new(a.pack('cI<2Gf') + 'rest')
|
24
26
|
b = z.scan_binary s
|
data/zscan.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "1.
|
3
|
+
s.version = "1.2" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.required_ruby_version = ">=1.9.2"
|
10
10
|
s.licenses = ['BSD']
|
11
11
|
|
12
|
-
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c,inc}}')
|
12
|
+
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,h,c,inc}}')
|
13
13
|
s.require_paths = ["lib"]
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.rubygems_version = '1.8.24'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
14
|
positioning
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- rakefile
|
22
22
|
- zscan.gemspec
|
23
23
|
- readme.md
|
24
|
+
- benchmark/one-or-more.rb
|
24
25
|
- benchmark/vs-strscan.rb
|
25
26
|
- benchmark/vs-unpack.rb
|
26
27
|
- ext/extconf.rb
|
@@ -31,6 +32,8 @@ files:
|
|
31
32
|
- spec/spec_helper.rb
|
32
33
|
- spec/typed_scan_spec.rb
|
33
34
|
- spec/zscan_spec.rb
|
35
|
+
- ext/zscan.h
|
36
|
+
- ext/bspec.c
|
34
37
|
- ext/zscan.c
|
35
38
|
- ext/bspec_exec.inc
|
36
39
|
- ext/bspec_opcode_names.inc
|
@@ -59,3 +62,4 @@ signing_key:
|
|
59
62
|
specification_version: 4
|
60
63
|
summary: improved string scanner
|
61
64
|
test_files: []
|
65
|
+
has_rdoc: false
|