zscan 1.1 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/benchmark/one-or-more.rb +19 -0
- data/ext/bspec.c +141 -0
- data/ext/extconf.rb +5 -0
- data/ext/zscan.c +14 -101
- data/ext/zscan.h +19 -0
- data/lib/zscan.rb +6 -8
- data/lib/zscan/instructions.rb +18 -36
- data/rakefile +1 -2
- data/readme.md +21 -22
- data/spec/binary_scan_spec.rb +3 -1
- data/zscan.gemspec +2 -2
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71174140986194386fd74477637f21f95810d704
|
4
|
+
data.tar.gz: 72706410ead67f6cd9af96a7bf7024c72d7ef8c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c9b9a9516dfa9b5ced99820f66976868d2c0a521df1eb35a517d262d3519601bb28b2a5e95797de411398b22bea3d9aa504d4af96f1634c1f9353d873d134d0
|
7
|
+
data.tar.gz: d3c3bcc5255f1909acafdf4bb1977d0ac75d73a6e837af0382cc5dd57d4a1e98443898bd49252e9d2d0b9ee3c7bc3eb0561c423bea4a91feb44bccf6138ac86f
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative "../lib/zscan"
|
2
|
+
require "benchmark"
|
3
|
+
|
4
|
+
z = ZScan.new 'a' * 100
|
5
|
+
puts Benchmark.measure{
|
6
|
+
1000.times{
|
7
|
+
z.pos = 0
|
8
|
+
z.one_or_more{
|
9
|
+
z.scan 'a'
|
10
|
+
}
|
11
|
+
}
|
12
|
+
}
|
13
|
+
|
14
|
+
puts Benchmark.measure{
|
15
|
+
1000.times{
|
16
|
+
z.pos = 0
|
17
|
+
z.scan /a+/
|
18
|
+
}
|
19
|
+
}
|
data/ext/bspec.c
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
#include "zscan.h"
|
2
|
+
|
3
|
+
static const rb_data_type_t* zscan_type;
|
4
|
+
|
5
|
+
typedef struct {
|
6
|
+
long s_size;
|
7
|
+
long a_size;
|
8
|
+
long a_cap;
|
9
|
+
void** code;
|
10
|
+
} BSpec;
|
11
|
+
|
12
|
+
static void bspec_free(void* pp) {
|
13
|
+
BSpec* p = pp;
|
14
|
+
free(p->code);
|
15
|
+
free(p);
|
16
|
+
}
|
17
|
+
|
18
|
+
static size_t bspec_memsize(const void* pp) {
|
19
|
+
const BSpec* p = pp;
|
20
|
+
return p ? sizeof(*p) : 0;
|
21
|
+
}
|
22
|
+
|
23
|
+
static const rb_data_type_t bspec_type = {
|
24
|
+
"ZScan::BinarySpec",
|
25
|
+
{NULL, bspec_free, bspec_memsize}
|
26
|
+
};
|
27
|
+
|
28
|
+
static VALUE bspec_alloc(VALUE klass) {
|
29
|
+
BSpec* bs = (BSpec*)malloc(sizeof(BSpec));
|
30
|
+
bs->s_size = 0;
|
31
|
+
bs->a_cap = 4;
|
32
|
+
bs->a_size = 0;
|
33
|
+
bs->code = (void**)malloc(bs->a_cap * sizeof(void*));
|
34
|
+
return TypedData_Wrap_Struct(klass, &bspec_type, bs);
|
35
|
+
}
|
36
|
+
|
37
|
+
static VALUE bspec_append(VALUE self, VALUE v_code, VALUE v_s_size) {
|
38
|
+
BSpec* bs = rb_check_typeddata(self, &bspec_type);
|
39
|
+
if (bs->a_size == bs->a_cap) {
|
40
|
+
bs->a_cap *= 2;
|
41
|
+
bs->code = (void**)realloc(bs->code, bs->a_cap * sizeof(void*));
|
42
|
+
}
|
43
|
+
long s_size = NUM2LONG(v_s_size);
|
44
|
+
bs->code[bs->a_size++] = ((void**)RSTRING_PTR(v_code))[0];
|
45
|
+
bs->s_size += s_size;
|
46
|
+
return self;
|
47
|
+
}
|
48
|
+
|
49
|
+
static VALUE bspec_big_endian_p(VALUE self) {
|
50
|
+
# ifdef DYNAMIC_ENDIAN
|
51
|
+
/* for universal binary of NEXTSTEP and MacOS X */
|
52
|
+
int init = 1;
|
53
|
+
char* p = (char*)&init;
|
54
|
+
return p[0] ? Qfalse : Qtrue;
|
55
|
+
# elif defined(WORDS_BIGENDIAN)
|
56
|
+
return Qtrue;
|
57
|
+
#else
|
58
|
+
return Qfalse;
|
59
|
+
#endif
|
60
|
+
}
|
61
|
+
|
62
|
+
#define GCC_VERSION_SINCE(major, minor, patchlevel) \
|
63
|
+
(defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
|
64
|
+
((__GNUC__ > (major)) || \
|
65
|
+
(__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
|
66
|
+
(__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
|
67
|
+
|
68
|
+
#if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
|
69
|
+
# define swap32(x) __builtin_bswap32(x)
|
70
|
+
# define swap64(x) __builtin_bswap64(x)
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#ifndef swap16
|
74
|
+
# define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#ifndef swap32
|
78
|
+
# define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
|
79
|
+
|(((x)>>24)&0xFF) \
|
80
|
+
|(((x)&0x0000FF00)<<8) \
|
81
|
+
|(((x)&0x00FF0000)>>8)))
|
82
|
+
#endif
|
83
|
+
|
84
|
+
#ifndef swap64
|
85
|
+
# define byte_in_64bit(n) ((uint64_t)0xff << (n))
|
86
|
+
# define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
|
87
|
+
|(((x)>>56)&0xFF) \
|
88
|
+
|(((x)&byte_in_64bit(8))<<40) \
|
89
|
+
|(((x)&byte_in_64bit(48))>>40) \
|
90
|
+
|(((x)&byte_in_64bit(16))<<24) \
|
91
|
+
|(((x)&byte_in_64bit(40))>>24) \
|
92
|
+
|(((x)&byte_in_64bit(24))<<8) \
|
93
|
+
|(((x)&byte_in_64bit(32))>>8)))
|
94
|
+
#endif
|
95
|
+
|
96
|
+
// NOTE can not use sizeof in preprocessor
|
97
|
+
#define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
|
98
|
+
#define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
|
99
|
+
|
100
|
+
#define CAST(var, ty) *((ty*)(&(var)))
|
101
|
+
|
102
|
+
#include "bspec_exec.inc"
|
103
|
+
|
104
|
+
static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
|
105
|
+
ZScan* p = rb_check_typeddata(self, zscan_type);
|
106
|
+
if (!rb_enc_str_asciicompat_p(p->s)) {
|
107
|
+
rb_raise(rb_eRuntimeError, "encoding of source string should be ascii-compatible");
|
108
|
+
return Qnil;
|
109
|
+
}
|
110
|
+
BSpec* bs = rb_check_typeddata(spec, &bspec_type);
|
111
|
+
if (bs->a_size == 0) {
|
112
|
+
return rb_ary_new();
|
113
|
+
}
|
114
|
+
long s_size = bs->s_size;
|
115
|
+
if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
|
116
|
+
return Qnil;
|
117
|
+
}
|
118
|
+
volatile VALUE a = rb_ary_new2(bs->a_size - 1);
|
119
|
+
bspec_exec(bs->code, RSTRING_PTR(p->s) + p->bytepos, a);
|
120
|
+
p->bytepos += s_size;
|
121
|
+
p->pos += s_size;
|
122
|
+
return a;
|
123
|
+
}
|
124
|
+
|
125
|
+
void Init_zscan_bspec(VALUE zscan, const rb_data_type_t* _zscan_type) {
|
126
|
+
zscan_type = _zscan_type;
|
127
|
+
rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
|
128
|
+
|
129
|
+
VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
|
130
|
+
rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
|
131
|
+
rb_define_alloc_func(bs, bspec_alloc);
|
132
|
+
rb_define_method(bs, "append", bspec_append, 2);
|
133
|
+
|
134
|
+
# include "bspec_opcode_names.inc"
|
135
|
+
void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
|
136
|
+
for (long i = 0; i < bspec_opcode_size; i++) {
|
137
|
+
VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
|
138
|
+
OBJ_FREEZE(bytecode);
|
139
|
+
rb_define_const(bs, bspec_opcode_names[i], bytecode);
|
140
|
+
}
|
141
|
+
}
|
data/ext/extconf.rb
CHANGED
data/ext/zscan.c
CHANGED
@@ -1,24 +1,6 @@
|
|
1
|
-
#include
|
2
|
-
#include <ruby/re.h>
|
3
|
-
#include <ruby/encoding.h>
|
4
|
-
#include <ctype.h>
|
5
|
-
|
1
|
+
#include "zscan.h"
|
6
2
|
// todo infect check
|
7
3
|
|
8
|
-
typedef struct {
|
9
|
-
long pos;
|
10
|
-
long bytepos;
|
11
|
-
} Pos;
|
12
|
-
|
13
|
-
typedef struct {
|
14
|
-
long pos;
|
15
|
-
long bytepos;
|
16
|
-
VALUE s;
|
17
|
-
long stack_i;
|
18
|
-
long stack_cap;
|
19
|
-
Pos* stack;
|
20
|
-
} ZScan;
|
21
|
-
|
22
4
|
static void zscan_mark(void* pp) {
|
23
5
|
ZScan* p = pp;
|
24
6
|
rb_gc_mark(p->s);
|
@@ -282,6 +264,16 @@ static VALUE zscan_try(VALUE self) {
|
|
282
264
|
return r;
|
283
265
|
}
|
284
266
|
|
267
|
+
// optimized version without pushing and block
|
268
|
+
static VALUE zscan__try(VALUE self, VALUE r) {
|
269
|
+
if (RTEST(r)) {
|
270
|
+
zscan_drop(self);
|
271
|
+
} else {
|
272
|
+
zscan_pop(self);
|
273
|
+
}
|
274
|
+
return r;
|
275
|
+
}
|
276
|
+
|
285
277
|
static VALUE zscan_zero_or_one(int argc, VALUE* argv, VALUE self) {
|
286
278
|
REQUIRE_BLOCK;
|
287
279
|
volatile VALUE a = Qnil;
|
@@ -380,76 +372,7 @@ VALUE zscan_scan_float(VALUE self) {
|
|
380
372
|
}
|
381
373
|
}
|
382
374
|
|
383
|
-
|
384
|
-
# ifdef DYNAMIC_ENDIAN
|
385
|
-
/* for universal binary of NEXTSTEP and MacOS X */
|
386
|
-
int init = 1;
|
387
|
-
char* p = (char*)&init;
|
388
|
-
return p[0] ? Qfalse : Qtrue;
|
389
|
-
# elif defined(WORDS_BIGENDIAN)
|
390
|
-
return Qtrue;
|
391
|
-
#else
|
392
|
-
return Qfalse;
|
393
|
-
#endif
|
394
|
-
}
|
395
|
-
|
396
|
-
#define GCC_VERSION_SINCE(major, minor, patchlevel) \
|
397
|
-
(defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
|
398
|
-
((__GNUC__ > (major)) || \
|
399
|
-
(__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
|
400
|
-
(__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
|
401
|
-
|
402
|
-
#if GCC_VERSION_SINCE(4,3,0) || defined(__clang__)
|
403
|
-
# define swap32(x) __builtin_bswap32(x)
|
404
|
-
# define swap64(x) __builtin_bswap64(x)
|
405
|
-
#endif
|
406
|
-
|
407
|
-
#ifndef swap16
|
408
|
-
# define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
|
409
|
-
#endif
|
410
|
-
|
411
|
-
#ifndef swap32
|
412
|
-
# define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
|
413
|
-
|(((x)>>24)&0xFF) \
|
414
|
-
|(((x)&0x0000FF00)<<8) \
|
415
|
-
|(((x)&0x00FF0000)>>8) ))
|
416
|
-
#endif
|
417
|
-
|
418
|
-
#ifndef swap64
|
419
|
-
# ifdef HAVE_INT64_T
|
420
|
-
# define byte_in_64bit(n) ((uint64_t)0xff << (n))
|
421
|
-
# define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
|
422
|
-
|(((x)>>56)&0xFF) \
|
423
|
-
|(((x)&byte_in_64bit(8))<<40) \
|
424
|
-
|(((x)&byte_in_64bit(48))>>40) \
|
425
|
-
|(((x)&byte_in_64bit(16))<<24) \
|
426
|
-
|(((x)&byte_in_64bit(40))>>24) \
|
427
|
-
|(((x)&byte_in_64bit(24))<<8) \
|
428
|
-
|(((x)&byte_in_64bit(32))>>8)))
|
429
|
-
# endif
|
430
|
-
#endif
|
431
|
-
|
432
|
-
// NOTE can not use sizeof in preprocessor
|
433
|
-
#define INT64toNUM(x) (sizeof(long) == 8 ? LONG2NUM(x) : LL2NUM(x))
|
434
|
-
#define UINT64toNUM(x) (sizeof(long) == 8 ? ULONG2NUM(x) : ULL2NUM(x))
|
435
|
-
|
436
|
-
#define CAST(var, ty) *((ty*)(&(var)))
|
437
|
-
|
438
|
-
#include "bspec_exec.inc"
|
439
|
-
|
440
|
-
static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
|
441
|
-
P;
|
442
|
-
long s_size = NUM2LONG(rb_iv_get(spec, "@s_size"));
|
443
|
-
if (p->bytepos + s_size > RSTRING_LEN(p->s)) {
|
444
|
-
return Qnil;
|
445
|
-
}
|
446
|
-
VALUE code = rb_iv_get(spec, "@code");
|
447
|
-
long a_size = RSTRING_LEN(code) / sizeof(void*);
|
448
|
-
volatile VALUE a = rb_ary_new2(a_size);
|
449
|
-
bspec_exec((void**)RSTRING_PTR(code), RSTRING_PTR(p->s) + p->bytepos, a);
|
450
|
-
zscan_bytepos_eq(self, LONG2NUM(p->bytepos + s_size));
|
451
|
-
return a;
|
452
|
-
}
|
375
|
+
extern void Init_zscan_bspec(VALUE, const rb_data_type_t*);
|
453
376
|
|
454
377
|
void Init_zscan() {
|
455
378
|
VALUE zscan = rb_define_class("ZScan", rb_cObject);
|
@@ -471,21 +394,11 @@ void Init_zscan() {
|
|
471
394
|
rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
|
472
395
|
|
473
396
|
rb_define_method(zscan, "try", zscan_try, 0);
|
397
|
+
rb_define_method(zscan, "_try", zscan__try, 1);
|
474
398
|
rb_define_method(zscan, "zero_or_one", zscan_zero_or_one, -1);
|
475
399
|
rb_define_method(zscan, "zero_or_more", zscan_zero_or_more, -1);
|
476
400
|
rb_define_method(zscan, "one_or_more", zscan_one_or_more, -1);
|
477
401
|
|
478
402
|
rb_define_method(zscan, "scan_float", zscan_scan_float, 0);
|
479
|
-
|
480
|
-
|
481
|
-
VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
|
482
|
-
rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
|
483
|
-
|
484
|
-
# include "bspec_opcode_names.inc"
|
485
|
-
void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
|
486
|
-
for (long i = 0; i < bspec_opcode_size; i++) {
|
487
|
-
VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
|
488
|
-
OBJ_FREEZE(bytecode);
|
489
|
-
rb_define_const(bs, bspec_opcode_names[i], bytecode);
|
490
|
-
}
|
403
|
+
Init_zscan_bspec(zscan, &zscan_type);
|
491
404
|
}
|
data/ext/zscan.h
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include <ruby/ruby.h>
|
3
|
+
#include <ruby/re.h>
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
|
7
|
+
typedef struct {
|
8
|
+
long pos;
|
9
|
+
long bytepos;
|
10
|
+
} Pos;
|
11
|
+
|
12
|
+
typedef struct {
|
13
|
+
long pos;
|
14
|
+
long bytepos;
|
15
|
+
VALUE s;
|
16
|
+
long stack_i;
|
17
|
+
long stack_cap;
|
18
|
+
Pos* stack;
|
19
|
+
} ZScan;
|
data/lib/zscan.rb
CHANGED
@@ -3,7 +3,7 @@ require_relative "zscan/instructions"
|
|
3
3
|
require "date"
|
4
4
|
|
5
5
|
class ZScan
|
6
|
-
VERSION = '1.
|
6
|
+
VERSION = '1.2'
|
7
7
|
|
8
8
|
def initialize s, dup=false
|
9
9
|
if s.encoding.ascii_compatible?
|
@@ -139,18 +139,16 @@ class ZScan
|
|
139
139
|
end
|
140
140
|
|
141
141
|
def self.binary_spec &p
|
142
|
-
bs = BinarySpec.new
|
142
|
+
bs = BinarySpec.send :new
|
143
143
|
bs.instance_eval &p
|
144
|
-
bs.
|
144
|
+
bs.send :append, BinarySpec::RET, 0
|
145
145
|
bs
|
146
146
|
end
|
147
147
|
|
148
148
|
class BinarySpec
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
@code = BLANK.dup
|
153
|
-
@s_size = 0
|
149
|
+
private :append
|
150
|
+
class << self
|
151
|
+
private :new
|
154
152
|
end
|
155
153
|
end
|
156
154
|
|
data/lib/zscan/instructions.rb
CHANGED
@@ -3,127 +3,109 @@ class ZScan::BinarySpec
|
|
3
3
|
def int8 n=1
|
4
4
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
5
5
|
n.times do
|
6
|
-
|
7
|
-
@s_size += 1
|
6
|
+
append INT8, 1
|
8
7
|
end
|
9
8
|
end
|
10
9
|
def int16 n=1
|
11
10
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
12
11
|
n.times do
|
13
|
-
|
14
|
-
@s_size += 2
|
12
|
+
append INT16, 2
|
15
13
|
end
|
16
14
|
end
|
17
15
|
def int16_swap n=1
|
18
16
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
19
17
|
n.times do
|
20
|
-
|
21
|
-
@s_size += 2
|
18
|
+
append INT16_SWAP, 2
|
22
19
|
end
|
23
20
|
end
|
24
21
|
def int32 n=1
|
25
22
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
26
23
|
n.times do
|
27
|
-
|
28
|
-
@s_size += 4
|
24
|
+
append INT32, 4
|
29
25
|
end
|
30
26
|
end
|
31
27
|
def int32_swap n=1
|
32
28
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
33
29
|
n.times do
|
34
|
-
|
35
|
-
@s_size += 4
|
30
|
+
append INT32_SWAP, 4
|
36
31
|
end
|
37
32
|
end
|
38
33
|
def int64 n=1
|
39
34
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
40
35
|
n.times do
|
41
|
-
|
42
|
-
@s_size += 8
|
36
|
+
append INT64, 8
|
43
37
|
end
|
44
38
|
end
|
45
39
|
def int64_swap n=1
|
46
40
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
47
41
|
n.times do
|
48
|
-
|
49
|
-
@s_size += 8
|
42
|
+
append INT64_SWAP, 8
|
50
43
|
end
|
51
44
|
end
|
52
45
|
def uint8 n=1
|
53
46
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
54
47
|
n.times do
|
55
|
-
|
56
|
-
@s_size += 1
|
48
|
+
append UINT8, 1
|
57
49
|
end
|
58
50
|
end
|
59
51
|
def uint16 n=1
|
60
52
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
61
53
|
n.times do
|
62
|
-
|
63
|
-
@s_size += 2
|
54
|
+
append UINT16, 2
|
64
55
|
end
|
65
56
|
end
|
66
57
|
def uint16_swap n=1
|
67
58
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
68
59
|
n.times do
|
69
|
-
|
70
|
-
@s_size += 2
|
60
|
+
append UINT16_SWAP, 2
|
71
61
|
end
|
72
62
|
end
|
73
63
|
def uint32 n=1
|
74
64
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
75
65
|
n.times do
|
76
|
-
|
77
|
-
@s_size += 4
|
66
|
+
append UINT32, 4
|
78
67
|
end
|
79
68
|
end
|
80
69
|
def uint32_swap n=1
|
81
70
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
82
71
|
n.times do
|
83
|
-
|
84
|
-
@s_size += 4
|
72
|
+
append UINT32_SWAP, 4
|
85
73
|
end
|
86
74
|
end
|
87
75
|
def uint64 n=1
|
88
76
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
89
77
|
n.times do
|
90
|
-
|
91
|
-
@s_size += 8
|
78
|
+
append UINT64, 8
|
92
79
|
end
|
93
80
|
end
|
94
81
|
def uint64_swap n=1
|
95
82
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
96
83
|
n.times do
|
97
|
-
|
98
|
-
@s_size += 8
|
84
|
+
append UINT64_SWAP, 8
|
99
85
|
end
|
100
86
|
end
|
101
87
|
def single n=1
|
102
88
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
103
89
|
n.times do
|
104
|
-
|
105
|
-
@s_size += 4
|
90
|
+
append SINGLE, 4
|
106
91
|
end
|
107
92
|
end
|
108
93
|
def single_swap n=1
|
109
94
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
110
95
|
n.times do
|
111
|
-
|
112
|
-
@s_size += 4
|
96
|
+
append SINGLE_SWAP, 4
|
113
97
|
end
|
114
98
|
end
|
115
99
|
def double n=1
|
116
100
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
117
101
|
n.times do
|
118
|
-
|
119
|
-
@s_size += 8
|
102
|
+
append DOUBLE, 8
|
120
103
|
end
|
121
104
|
end
|
122
105
|
def double_swap n=1
|
123
106
|
raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
|
124
107
|
n.times do
|
125
|
-
|
126
|
-
@s_size += 8
|
108
|
+
append DOUBLE_SWAP, 8
|
127
109
|
end
|
128
110
|
end
|
129
111
|
if ZScan::BinarySpec.big_endian?
|
data/rakefile
CHANGED
@@ -143,8 +143,7 @@ file 'lib/zscan/instructions.rb' => __FILE__ do
|
|
143
143
|
def #{ins.downcase} n=1
|
144
144
|
raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
|
145
145
|
n.times do
|
146
|
-
|
147
|
-
@s_size += #{bspec_incr ins}
|
146
|
+
append #{ins}, #{bspec_incr ins}
|
148
147
|
end
|
149
148
|
end
|
150
149
|
RUBY
|
data/readme.md
CHANGED
@@ -23,7 +23,7 @@ z.scan /\w+/ #=> 'world'
|
|
23
23
|
z.eos? #=> true
|
24
24
|
```
|
25
25
|
|
26
|
-
## Motivation
|
26
|
+
## Motivation: string scanner
|
27
27
|
|
28
28
|
Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
|
29
29
|
|
@@ -47,7 +47,7 @@ z.scan /^/ #=> nil
|
|
47
47
|
|
48
48
|
See also https://bugs.ruby-lang.org/issues/7092
|
49
49
|
|
50
|
-
## Other motivations
|
50
|
+
## Other motivations
|
51
51
|
|
52
52
|
- For scan and convert, ruby's stdlib `Scanf` is slow (creates regexp array everytime called) and not possible to corporate with scanner.
|
53
53
|
- For date parsing, `strptime` doesn't tell the parsed length.
|
@@ -77,15 +77,6 @@ For convienience
|
|
77
77
|
- `#size`
|
78
78
|
- `#bytesize`
|
79
79
|
|
80
|
-
## Parsing combinators
|
81
|
-
|
82
|
-
Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration. Can be nested, useful for building parsers.
|
83
|
-
|
84
|
-
- `#try &block` returns `block`'s return.
|
85
|
-
- `#zero_or_one result=[], &block` try to execute 0 or 1 time, returns `result`.
|
86
|
-
- `#zero_or_more result=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `result`.
|
87
|
-
- `#one_or_more result=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `result`.
|
88
|
-
|
89
80
|
## Pos management
|
90
81
|
|
91
82
|
- `#pos`
|
@@ -97,15 +88,7 @@ Combinators that manage scanner pos and stack state for you. In the combinators,
|
|
97
88
|
- `#reset` go to beginning.
|
98
89
|
- `#terminate` go to end of string.
|
99
90
|
|
100
|
-
##
|
101
|
-
|
102
|
-
- `#push` push current pos into the stack.
|
103
|
-
- `#pop` set current pos to top of the stack, and pop it.
|
104
|
-
- `#drop` drop top of pos stack without changing current pos.
|
105
|
-
- `#restore` set current pos to top of the stack.
|
106
|
-
- `#clear_pos_stack` clear pos stack.
|
107
|
-
|
108
|
-
## `ZScan::BinarySpec`
|
91
|
+
## Binary parsing
|
109
92
|
|
110
93
|
Specify a sequence of binary data. Designed for binary protocol parsing. Example:
|
111
94
|
|
@@ -150,9 +133,25 @@ Endians:
|
|
150
133
|
|
151
134
|
Repeat count must be integer `>= 1`, default is `1`.
|
152
135
|
|
153
|
-
It is implemented as a direct-threaded bytecode interpreter.
|
136
|
+
It is implemented as a direct-threaded bytecode interpreter. A bit faster than `String#unpack`.
|
154
137
|
|
155
|
-
|
138
|
+
## Parsing combinators
|
139
|
+
|
140
|
+
Combinators that manage scanner pos and stack state for you. In the combinators, if the returned value of the given block is `nil` or `false`, stops iteration and restores scanner location. Can be nested, useful for building parsers.
|
141
|
+
|
142
|
+
- `#try &block` returns `block`'s return.
|
143
|
+
- `#zero_or_one acc=[], &block` try to execute 0 or 1 time, returns `acc`.
|
144
|
+
- `#zero_or_more acc=[], &block` try to execute 0 or more times, also stops iteration if scanner no advance, returns `acc`.
|
145
|
+
- `#one_or_more acc=[], &block` try to execute 1 or more times, also stops iteration if scanner no advance, returns `nil` or `acc`.
|
146
|
+
|
147
|
+
## (Low level) Efficient pos stack manipulation
|
148
|
+
|
149
|
+
- `#push` push current pos into the stack.
|
150
|
+
- `#pop` set current pos to top of the stack, and pop it.
|
151
|
+
- `#drop` drop top of pos stack without changing current pos.
|
152
|
+
- `#restore` set current pos to top of the stack.
|
153
|
+
- `#clear_pos_stack` clear pos stack.
|
154
|
+
- `z.push._try expr` equivalent to `z.try{ expr }`, but faster because no block is required
|
156
155
|
|
157
156
|
## License
|
158
157
|
|
data/spec/binary_scan_spec.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require_relative "spec_helper"
|
2
3
|
|
3
4
|
describe 'ZScan binary scanning methods' do
|
@@ -11,7 +12,7 @@ describe 'ZScan binary scanning methods' do
|
|
11
12
|
assert_equal nil, (z.unpack 'I')
|
12
13
|
assert_equal 2, z.pos
|
13
14
|
end
|
14
|
-
|
15
|
+
|
15
16
|
it "#scan_binary" do
|
16
17
|
s = ZScan.binary_spec do
|
17
18
|
int8 # once
|
@@ -19,6 +20,7 @@ describe 'ZScan binary scanning methods' do
|
|
19
20
|
double_be 1 # big endian, once
|
20
21
|
single 1
|
21
22
|
end
|
23
|
+
|
22
24
|
a = [-1, 2, 3, 4.0, 3.0]
|
23
25
|
z = ZScan.new(a.pack('cI<2Gf') + 'rest')
|
24
26
|
b = z.scan_binary s
|
data/zscan.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "1.
|
3
|
+
s.version = "1.2" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.required_ruby_version = ">=1.9.2"
|
10
10
|
s.licenses = ['BSD']
|
11
11
|
|
12
|
-
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c,inc}}')
|
12
|
+
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,h,c,inc}}')
|
13
13
|
s.require_paths = ["lib"]
|
14
14
|
s.extensions = ["ext/extconf.rb"]
|
15
15
|
s.rubygems_version = '1.8.24'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
14
|
positioning
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- rakefile
|
22
22
|
- zscan.gemspec
|
23
23
|
- readme.md
|
24
|
+
- benchmark/one-or-more.rb
|
24
25
|
- benchmark/vs-strscan.rb
|
25
26
|
- benchmark/vs-unpack.rb
|
26
27
|
- ext/extconf.rb
|
@@ -31,6 +32,8 @@ files:
|
|
31
32
|
- spec/spec_helper.rb
|
32
33
|
- spec/typed_scan_spec.rb
|
33
34
|
- spec/zscan_spec.rb
|
35
|
+
- ext/zscan.h
|
36
|
+
- ext/bspec.c
|
34
37
|
- ext/zscan.c
|
35
38
|
- ext/bspec_exec.inc
|
36
39
|
- ext/bspec_opcode_names.inc
|
@@ -59,3 +62,4 @@ signing_key:
|
|
59
62
|
specification_version: 4
|
60
63
|
summary: improved string scanner
|
61
64
|
test_files: []
|
65
|
+
has_rdoc: false
|