zscan 1.3 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/zscan.c CHANGED
@@ -46,7 +46,7 @@ static VALUE zscan_internal_string(VALUE self) {
46
46
 
47
47
  static VALUE zscan_pos(VALUE self) {
48
48
  P;
49
- return ULONG2NUM(p->pos);
49
+ return LONG2NUM(p->pos);
50
50
  }
51
51
 
52
52
  static VALUE zscan_advance(VALUE self, VALUE v_diff) {
@@ -83,10 +83,10 @@ static VALUE zscan_advance(VALUE self, VALUE v_diff) {
83
83
 
84
84
  static VALUE zscan_bytepos(VALUE self) {
85
85
  P;
86
- return ULONG2NUM(p->bytepos);
86
+ return LONG2NUM(p->bytepos);
87
87
  }
88
88
 
89
- static VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
89
+ VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos) {
90
90
  P;
91
91
  long signed_bytepos = NUM2LONG(v_bytepos);
92
92
  long from, to, bytepos;
@@ -170,13 +170,22 @@ static VALUE zscan_rest_bytesize(VALUE self) {
170
170
  return LONG2NUM(RSTRING_LEN(p->s) - p->bytepos);
171
171
  }
172
172
 
173
+ static VALUE zscan_cleanup(VALUE self) {
174
+ P;
175
+ long rest_len = RSTRING_LEN(p->s) - p->bytepos;
176
+ p->s = rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), LONG2NUM(rest_len));
177
+ p->bytepos = 0;
178
+ p->pos = 0;
179
+ return self;
180
+ }
181
+
173
182
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
174
183
  static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
175
184
  P;
176
185
  if (TYPE(pattern) == T_STRING) {
177
- volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, ULONG2NUM(p->bytepos), ULONG2NUM(RSTRING_LEN(p->s)));
186
+ volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), LONG2NUM(RSTRING_LEN(p->s) - p->bytepos));
178
187
  if (RTEST(rb_funcall(ss, rb_intern("start_with?"), 1, pattern))) {
179
- return ULONG2NUM(RSTRING_LEN(pattern));
188
+ return LONG2NUM(RSTRING_LEN(pattern));
180
189
  }
181
190
  } else if (TYPE(pattern) == T_REGEXP) {
182
191
  regex_t *re = rb_reg_prepare_re(pattern, p->s); // prepare with compatible encoding
@@ -228,6 +237,18 @@ static VALUE zscan_scan(VALUE self, VALUE pattern) {
228
237
  }
229
238
  }
230
239
 
240
+ static VALUE zscan_skip(VALUE self, VALUE pattern) {
241
+ volatile VALUE v_bytelen = zscan_match_bytesize(self, pattern);
242
+ if (v_bytelen == Qnil) {
243
+ return Qnil;
244
+ } else {
245
+ P;
246
+ long bytepos = p->bytepos + NUM2LONG(v_bytelen);
247
+ zscan_bytepos_eq(self, LONG2NUM(bytepos));
248
+ return LONG2NUM(bytepos);
249
+ }
250
+ }
251
+
231
252
  static VALUE zscan_push(VALUE self) {
232
253
  P;
233
254
  if (p->stack_i + 1 == p->stack_cap) {
@@ -401,7 +422,14 @@ VALUE zscan_scan_float(VALUE self) {
401
422
  }
402
423
  }
403
424
 
404
- extern void Init_zscan_bspec(VALUE, const rb_data_type_t*);
425
+ VALUE zscan_unpack(VALUE self, VALUE fmt) {
426
+ P;
427
+ long parsed_len = 0;
428
+ volatile VALUE str = zscan_rest(self);
429
+ VALUE r = zscan_internal_unpack(str, fmt, &parsed_len);
430
+ zscan_bytepos_eq(self, LONG2NUM(p->bytepos + parsed_len));
431
+ return r;
432
+ }
405
433
 
406
434
  void Init_zscan() {
407
435
  VALUE zscan = rb_define_class("ZScan", rb_cObject);
@@ -417,9 +445,12 @@ void Init_zscan() {
417
445
  rb_define_method(zscan, "rest", zscan_rest, 0);
418
446
  rb_define_method(zscan, "rest_size", zscan_rest_size, 0);
419
447
  rb_define_method(zscan, "rest_bytesize", zscan_rest_bytesize, 0);
448
+ rb_define_method(zscan, "cleanup", zscan_cleanup, 0);
420
449
 
421
450
  rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
422
451
  rb_define_method(zscan, "scan", zscan_scan, 1);
452
+ rb_define_method(zscan, "skip", zscan_skip, 1);
453
+ rb_define_method(zscan, "unpack", zscan_unpack, 1);
423
454
  rb_define_method(zscan, "push", zscan_push, 0);
424
455
  rb_define_method(zscan, "pop", zscan_pop, 0);
425
456
  rb_define_method(zscan, "drop", zscan_drop, 0);
data/ext/zscan.h CHANGED
@@ -17,3 +17,7 @@ typedef struct {
17
17
  long stack_cap;
18
18
  Pos* stack;
19
19
  } ZScan;
20
+
21
+ VALUE zscan_bytepos_eq(VALUE self, VALUE v_bytepos);
22
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len);
23
+ void Init_zscan_bspec(VALUE, const rb_data_type_t*);
data/generate/bspec.rb ADDED
@@ -0,0 +1,48 @@
1
+ class ZScan
2
+ class BSpec
3
+ def _append_expect expect_i, pack_format, opts
4
+ if opts.is_a?(Hash)
5
+ expect = opts[:expect]
6
+ if expect.is_a?(Integer) and opts.size == 1
7
+ ZScan::BSpec._append self, expect_i
8
+ ZScan::BSpec._append_expect self, [expect].pack(pack_format)
9
+ else
10
+ raise ArgumentError, "only :expect option allowed, but got #{opts.inspect}"
11
+ end
12
+ end
13
+ end
14
+
15
+ def initialize &p
16
+ instance_eval &p
17
+ end
18
+
19
+ <% @int_ins.each do |ins, i| %>
20
+ def <%= ins.downcase %> opts=nil
21
+ _append_expect <%= to_expect_i ins %>, <%= to_pack_format ins %>, opts
22
+ ZScan::BSpec._append self, <%= i %>
23
+ end
24
+ <% end %>
25
+
26
+ <% @float_ins.each do |ins, i| %>
27
+ def <%= ins.downcase %>
28
+ ZScan::BSpec._append self, <%= i %>
29
+ end
30
+ <% end %>
31
+
32
+ if ZScan::BSpec.big_endian?
33
+ <% @alias_ins.map(&:downcase).each do |ins| %>
34
+ alias <%= ins %>_be <%= ins %>
35
+ alias <%= ins %>_le <%= ins %>_swap
36
+ <% end %>
37
+ else
38
+ <% @alias_ins.map(&:downcase).each do |ins| %>
39
+ alias <%= ins %>_le <%= ins %>
40
+ alias <%= ins %>_be <%= ins %>_swap
41
+ <% end %>
42
+ end
43
+ alias byte uint8
44
+ alias float single
45
+ alias float_le single_le
46
+ alias float_be single_be
47
+ end
48
+ end
@@ -0,0 +1,29 @@
1
+ __attribute__((__noinline__))
2
+ static VALUE bspec_exec(void** ip, char* s, VALUE a) {
3
+ static void* opcodes[] = { <%= @opcode_list.join ', ' %> };
4
+ if (ip == NULL) {
5
+ return (VALUE)opcodes;
6
+ }
7
+ goto **(ip++);
8
+ BS_RET:
9
+ return a;
10
+ <% [8, 16, 32, 64].each do |len| %>
11
+ BS_EXPECT<%= len %>:
12
+ {
13
+ char* expect = (char*)(ip++);
14
+ if (strncmp(s, expect, <%= len / 8 %>)) {
15
+ return Qnil;
16
+ }
17
+ goto **(ip++);
18
+ }
19
+ <% end %>
20
+ <% @conv_insns.each do |ins| %>
21
+ BS_<%= ins %>:
22
+ {
23
+ <%= extract ins %>;
24
+ rb_ary_push(a, <%= convert ins %>);
25
+ s += <%= incr ins %>;
26
+ goto **(ip++);
27
+ }
28
+ <% end %>
29
+ }
@@ -0,0 +1,2 @@
1
+ static const long bspec_s_sizes[] = {<%= @incrs %>};
2
+ static const long bspec_opcodes_size = <%= @insns_size %>;
@@ -0,0 +1,147 @@
1
+ require "erb"
2
+
3
+ class Generate
4
+ def self.files
5
+ %w[ext/bspec_exec.inc ext/bspec_init.inc lib/zscan/bspec.rb]
6
+ end
7
+
8
+ def self.generate file
9
+ new(file).render
10
+ end
11
+
12
+ def initialize file
13
+ case file
14
+ when 'ext/bspec_exec.inc'
15
+ generate_exec
16
+ when 'ext/bspec_init.inc'
17
+ generate_init
18
+ when 'lib/zscan/bspec.rb'
19
+ generate_rb
20
+ else
21
+ raise "unkown file: #{file}"
22
+ end
23
+ @file = file
24
+ end
25
+
26
+ def render
27
+ ERB.new(File.read File.join(__dir__, File.basename(@file))).result binding
28
+ end
29
+
30
+ def generate_init
31
+ @incrs = insns.map{|ins| incr ins}.join ', '
32
+ @insns_size = insns.size
33
+ end
34
+
35
+ def generate_exec
36
+ @opcode_list = insns.map{|ins| "&&BS_#{ins}" }
37
+ @conv_insns = insns.select{|ins| ins !~ /RET|EXPECT/ }
38
+ end
39
+
40
+ def generate_rb
41
+ groups = insns.each_with_index.to_a.group_by{|x|
42
+ case x.first
43
+ when /INT/
44
+ :int
45
+ when /RET|EXPECT/
46
+ :misc
47
+ else
48
+ :float
49
+ end
50
+ }
51
+ @int_ins = groups[:int]
52
+ @float_ins = groups[:float]
53
+ @alias_ins = swap_types.map &:downcase
54
+ end
55
+
56
+ def swap_types
57
+ %w[INT16 INT32 INT64 UINT16 UINT32 UINT64 SINGLE DOUBLE]
58
+ end
59
+
60
+ def insns
61
+ ['RET', 'EXPECT8', 'EXPECT16', 'EXPECT32', 'EXPECT64', 'INT8', 'UINT8', *swap_types.flat_map{|ty| [ty, "#{ty}_SWAP"] }]
62
+ end
63
+
64
+ def to_pack_format ins
65
+ raise "bad" if ins !~ /INT/
66
+ base = {
67
+ '8' => 'c',
68
+ '16' => 's',
69
+ '32' => 'l',
70
+ '64' => 'q'
71
+ }[ins[/\d+/]]
72
+ if ins.start_with?('U')
73
+ base.upcase!
74
+ end
75
+
76
+ if ins[/\d+/] == '8'
77
+ return "'#{base}'"
78
+ end
79
+
80
+ if ins.end_with?('SWAP')
81
+ format = base + '<'
82
+ swap_format = base + '>'
83
+ else
84
+ format = base + '>'
85
+ swap_format = base + '<'
86
+ end
87
+ "(BSpec.big_endian? ? '#{format}' : '#{swap_format}')"
88
+ end
89
+
90
+ def to_expect_i ins
91
+ bits = ins[/\d+/]
92
+ insns.index "EXPECT#{bits}"
93
+ end
94
+
95
+ # following methods used in C-code gen
96
+
97
+ def incr ins
98
+ case ins
99
+ when /INT(\d+)/; $1.to_i / 8
100
+ when /SINGLE/; 4
101
+ when /DOUBLE/; 8
102
+ when /RET|EXPECT/; 0
103
+ else; raise 'bad'
104
+ end
105
+ end
106
+
107
+ def c_type ins
108
+ case ins
109
+ when /(U?INT\d+)/; "#{$1.downcase}_t"
110
+ when /SINGLE/; 'float'
111
+ when /DOUBLE/; 'double'
112
+ else; raise 'bad'
113
+ end
114
+ end
115
+
116
+ def extract ins
117
+ type = c_type ins
118
+ len = incr(ins) * 8
119
+ r = "((uint#{len}_t*)s)[0]"
120
+ if ins.end_with?('SWAP')
121
+ r = "swap#{len}(#{r})"
122
+ end
123
+ "uint#{len}_t r = #{r}"
124
+ end
125
+
126
+ def convert ins
127
+ case ins
128
+ when /(U)?INT64|UINT32/
129
+ if ins.start_with?('U')
130
+ "UINT64toNUM(r)"
131
+ else
132
+ "INT64toNUM(CAST(r, int64_t))"
133
+ end
134
+ when /INT32/
135
+ "INT2NUM(CAST(r, int32_t))"
136
+ when /INT(16|8)/
137
+ "INT2FIX(CAST(r, #{c_type ins}))"
138
+ when /SINGLE/
139
+ "DBL2NUM((double)CAST(r, float))"
140
+ when /DOUBLE/
141
+ "DBL2NUM(CAST(r, double))"
142
+ else
143
+ raise 'bad'
144
+ end
145
+ end
146
+
147
+ end
data/lib/zscan.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  require_relative "../ext/zscan"
2
- require_relative "zscan/instructions"
2
+ require_relative "zscan/bspec"
3
3
  require "date"
4
4
 
5
5
  class ZScan
6
- VERSION = '1.3'
6
+ VERSION = '2.0'
7
7
 
8
8
  def initialize s, dup=false
9
9
  if s.encoding.ascii_compatible?
@@ -18,12 +18,6 @@ class ZScan
18
18
  _internal_string.dup
19
19
  end
20
20
 
21
- def skip re_or_str
22
- if sz = match_bytesize(re_or_str)
23
- self.bytepos += sz
24
- end
25
- end
26
-
27
21
  def scan_int radix=nil
28
22
  negative = false
29
23
  r = try do
@@ -44,7 +38,7 @@ class ZScan
44
38
  case radix
45
39
  when 2; /[01]+/
46
40
  when 8; /[0-7]+/
47
- when 10; /\d+/
41
+ when 10; /[0-9]+/ # don't use \d because it matches unicode numbers
48
42
  when 16; /\h+/i
49
43
  else
50
44
  if radix < 10
@@ -53,7 +47,7 @@ class ZScan
53
47
  raise ArgumentError, "invalid radix #{radix}"
54
48
  else
55
49
  end_char = ('a'.ord + (radix - 11)).chr
56
- /[\da-#{end_char}]+/i
50
+ /[0-9a-#{end_char}]+/i
57
51
  end
58
52
  end
59
53
  end
@@ -80,20 +74,6 @@ class ZScan
80
74
  end
81
75
  end
82
76
 
83
- def unpack format
84
- if format.index('@')
85
- raise ArgumentError, 'position instruction @ not supported'
86
- end
87
- r = rest.unpack format
88
- if r.index(nil)
89
- return
90
- end
91
- # XXX pack to get parsed length because no related API is exposed ...
92
- len = r.pack(format).bytesize
93
- self.bytepos += len
94
- r
95
- end
96
-
97
77
  def pos= new_pos
98
78
  advance new_pos - pos
99
79
  end
@@ -146,19 +126,6 @@ class ZScan
146
126
  r
147
127
  end
148
128
 
149
- def self.binary_spec &p
150
- bs = BinarySpec.send :new
151
- bs.instance_eval &p
152
- bs
153
- end
154
-
155
- class BinarySpec
156
- private :append
157
- class << self
158
- private :new
159
- end
160
- end
161
-
162
129
  private :_internal_init, :_internal_string
163
130
  end
164
131
 
@@ -0,0 +1,168 @@
1
+ # generated by rake gen
2
+ class ZScan
3
+ class BSpec
4
+ def _append_expect expect_i, pack_format, opts
5
+ if opts.is_a?(Hash)
6
+ expect = opts[:expect]
7
+ if expect.is_a?(Integer) and opts.size == 1
8
+ ZScan::BSpec._append self, expect_i
9
+ ZScan::BSpec._append_expect self, [expect].pack(pack_format)
10
+ else
11
+ raise ArgumentError, "only :expect option allowed, but got #{opts.inspect}"
12
+ end
13
+ end
14
+ end
15
+
16
+ def initialize &p
17
+ instance_eval &p
18
+ end
19
+
20
+
21
+ def int8 opts=nil
22
+ _append_expect 1, 'c', opts
23
+ ZScan::BSpec._append self, 5
24
+ end
25
+
26
+ def uint8 opts=nil
27
+ _append_expect 1, 'C', opts
28
+ ZScan::BSpec._append self, 6
29
+ end
30
+
31
+ def int16 opts=nil
32
+ _append_expect 2, (BSpec.big_endian? ? 's>' : 's<'), opts
33
+ ZScan::BSpec._append self, 7
34
+ end
35
+
36
+ def int16_swap opts=nil
37
+ _append_expect 2, (BSpec.big_endian? ? 's<' : 's>'), opts
38
+ ZScan::BSpec._append self, 8
39
+ end
40
+
41
+ def int32 opts=nil
42
+ _append_expect 3, (BSpec.big_endian? ? 'l>' : 'l<'), opts
43
+ ZScan::BSpec._append self, 9
44
+ end
45
+
46
+ def int32_swap opts=nil
47
+ _append_expect 3, (BSpec.big_endian? ? 'l<' : 'l>'), opts
48
+ ZScan::BSpec._append self, 10
49
+ end
50
+
51
+ def int64 opts=nil
52
+ _append_expect 4, (BSpec.big_endian? ? 'q>' : 'q<'), opts
53
+ ZScan::BSpec._append self, 11
54
+ end
55
+
56
+ def int64_swap opts=nil
57
+ _append_expect 4, (BSpec.big_endian? ? 'q<' : 'q>'), opts
58
+ ZScan::BSpec._append self, 12
59
+ end
60
+
61
+ def uint16 opts=nil
62
+ _append_expect 2, (BSpec.big_endian? ? 'S>' : 'S<'), opts
63
+ ZScan::BSpec._append self, 13
64
+ end
65
+
66
+ def uint16_swap opts=nil
67
+ _append_expect 2, (BSpec.big_endian? ? 'S<' : 'S>'), opts
68
+ ZScan::BSpec._append self, 14
69
+ end
70
+
71
+ def uint32 opts=nil
72
+ _append_expect 3, (BSpec.big_endian? ? 'L>' : 'L<'), opts
73
+ ZScan::BSpec._append self, 15
74
+ end
75
+
76
+ def uint32_swap opts=nil
77
+ _append_expect 3, (BSpec.big_endian? ? 'L<' : 'L>'), opts
78
+ ZScan::BSpec._append self, 16
79
+ end
80
+
81
+ def uint64 opts=nil
82
+ _append_expect 4, (BSpec.big_endian? ? 'Q>' : 'Q<'), opts
83
+ ZScan::BSpec._append self, 17
84
+ end
85
+
86
+ def uint64_swap opts=nil
87
+ _append_expect 4, (BSpec.big_endian? ? 'Q<' : 'Q>'), opts
88
+ ZScan::BSpec._append self, 18
89
+ end
90
+
91
+
92
+
93
+ def single
94
+ ZScan::BSpec._append self, 19
95
+ end
96
+
97
+ def single_swap
98
+ ZScan::BSpec._append self, 20
99
+ end
100
+
101
+ def double
102
+ ZScan::BSpec._append self, 21
103
+ end
104
+
105
+ def double_swap
106
+ ZScan::BSpec._append self, 22
107
+ end
108
+
109
+
110
+ if ZScan::BSpec.big_endian?
111
+
112
+ alias int16_be int16
113
+ alias int16_le int16_swap
114
+
115
+ alias int32_be int32
116
+ alias int32_le int32_swap
117
+
118
+ alias int64_be int64
119
+ alias int64_le int64_swap
120
+
121
+ alias uint16_be uint16
122
+ alias uint16_le uint16_swap
123
+
124
+ alias uint32_be uint32
125
+ alias uint32_le uint32_swap
126
+
127
+ alias uint64_be uint64
128
+ alias uint64_le uint64_swap
129
+
130
+ alias single_be single
131
+ alias single_le single_swap
132
+
133
+ alias double_be double
134
+ alias double_le double_swap
135
+
136
+ else
137
+
138
+ alias int16_le int16
139
+ alias int16_be int16_swap
140
+
141
+ alias int32_le int32
142
+ alias int32_be int32_swap
143
+
144
+ alias int64_le int64
145
+ alias int64_be int64_swap
146
+
147
+ alias uint16_le uint16
148
+ alias uint16_be uint16_swap
149
+
150
+ alias uint32_le uint32
151
+ alias uint32_be uint32_swap
152
+
153
+ alias uint64_le uint64
154
+ alias uint64_be uint64_swap
155
+
156
+ alias single_le single
157
+ alias single_be single_swap
158
+
159
+ alias double_le double
160
+ alias double_be double_swap
161
+
162
+ end
163
+ alias byte uint8
164
+ alias float single
165
+ alias float_le single_le
166
+ alias float_be single_be
167
+ end
168
+ end