zscan 1.2 → 1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 71174140986194386fd74477637f21f95810d704
4
- data.tar.gz: 72706410ead67f6cd9af96a7bf7024c72d7ef8c8
3
+ metadata.gz: 656c885fe2dcee21e852866ae02bf9fa960a3c9a
4
+ data.tar.gz: 1f5642a27ad99afc41494e556095fc46c98af22b
5
5
  SHA512:
6
- metadata.gz: 7c9b9a9516dfa9b5ced99820f66976868d2c0a521df1eb35a517d262d3519601bb28b2a5e95797de411398b22bea3d9aa504d4af96f1634c1f9353d873d134d0
7
- data.tar.gz: d3c3bcc5255f1909acafdf4bb1977d0ac75d73a6e837af0382cc5dd57d4a1e98443898bd49252e9d2d0b9ee3c7bc3eb0561c423bea4a91feb44bccf6138ac86f
6
+ metadata.gz: 844ac1574f62522bfa329e091f1a55558e17a2db7b0c39e5599fe4ad4038c66e563f1bd36d9ddf55132746c9265a2678f2abb2f0904c1fd5e3f4fec810d3197a
7
+ data.tar.gz: b156b693586831fcf094984583442ab146946fded1564b5d70f24d62db1cf72c3df5a91286a6e6a7822186f0655347e825a1d4f391c735157a2bf6121808d313
@@ -1,6 +1,8 @@
1
1
  #include "zscan.h"
2
2
 
3
3
  static const rb_data_type_t* zscan_type;
4
+ static void** bspec_opcodes;
5
+ # include "bspec_init.inc"
4
6
 
5
7
  typedef struct {
6
8
  long s_size;
@@ -31,18 +33,31 @@ static VALUE bspec_alloc(VALUE klass) {
31
33
  bs->a_cap = 4;
32
34
  bs->a_size = 0;
33
35
  bs->code = (void**)malloc(bs->a_cap * sizeof(void*));
36
+ for (long i = 0; i < bs->a_cap; i++) {
37
+ bs->code[i] = bspec_opcodes[0];
38
+ }
34
39
  return TypedData_Wrap_Struct(klass, &bspec_type, bs);
35
40
  }
36
41
 
37
- static VALUE bspec_append(VALUE self, VALUE v_code, VALUE v_s_size) {
42
+ static VALUE bspec_append(VALUE self, VALUE v_i) {
38
43
  BSpec* bs = rb_check_typeddata(self, &bspec_type);
39
- if (bs->a_size == bs->a_cap) {
44
+ long i = NUM2LONG(v_i);
45
+ if (i < 0 || i >= bspec_opcodes_size) {
46
+ rb_raise(rb_eArgError, "bad opcode index");
47
+ }
48
+
49
+ // ensure size
50
+ if (bs->a_size == bs->a_cap - 1) { // end with 0:RET, always terminate
51
+ bs->code = (void**)realloc(bs->code, bs->a_cap * 2 * sizeof(void*));
52
+ long j = bs->a_cap;
40
53
  bs->a_cap *= 2;
41
- bs->code = (void**)realloc(bs->code, bs->a_cap * sizeof(void*));
54
+ for (; j < bs->a_cap; j++) {
55
+ bs->code[j] = bspec_opcodes[0];
56
+ }
42
57
  }
43
- long s_size = NUM2LONG(v_s_size);
44
- bs->code[bs->a_size++] = ((void**)RSTRING_PTR(v_code))[0];
45
- bs->s_size += s_size;
58
+
59
+ bs->code[bs->a_size++] = bspec_opcodes[i];
60
+ bs->s_size += bspec_s_sizes[i];
46
61
  return self;
47
62
  }
48
63
 
@@ -101,6 +116,23 @@ static VALUE bspec_big_endian_p(VALUE self) {
101
116
 
102
117
  #include "bspec_exec.inc"
103
118
 
119
+ static VALUE bspec_opcodes_to_a(VALUE bspec) {
120
+ volatile VALUE a = rb_ary_new();
121
+ for (long i = 0; i < bspec_opcodes_size; i++) {
122
+ rb_ary_push(a, UINT64toNUM((uint64_t)(bspec_opcodes[i])));
123
+ }
124
+ return a;
125
+ }
126
+
127
+ static VALUE bspec_inspect_opcodes(VALUE bspec, VALUE self) {
128
+ BSpec* bs = rb_check_typeddata(self, &bspec_type);
129
+ volatile VALUE a = rb_ary_new();
130
+ for (long i = 0; i <= bs->a_size; i++) {
131
+ rb_ary_push(a, UINT64toNUM((uint64_t)(bs->code[i])));
132
+ }
133
+ return a;
134
+ }
135
+
104
136
  static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
105
137
  ZScan* p = rb_check_typeddata(self, zscan_type);
106
138
  if (!rb_enc_str_asciicompat_p(p->s)) {
@@ -126,16 +158,11 @@ void Init_zscan_bspec(VALUE zscan, const rb_data_type_t* _zscan_type) {
126
158
  zscan_type = _zscan_type;
127
159
  rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
128
160
 
161
+ bspec_opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
129
162
  VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
130
163
  rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
164
+ rb_define_singleton_method(bs, "_opcodes_to_a", bspec_opcodes_to_a, 0);
165
+ rb_define_singleton_method(bs, "_inspect_opcodes", bspec_inspect_opcodes, 1);
131
166
  rb_define_alloc_func(bs, bspec_alloc);
132
- rb_define_method(bs, "append", bspec_append, 2);
133
-
134
- # include "bspec_opcode_names.inc"
135
- void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
136
- for (long i = 0; i < bspec_opcode_size; i++) {
137
- VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
138
- OBJ_FREEZE(bytecode);
139
- rb_define_const(bs, bspec_opcode_names[i], bytecode);
140
- }
167
+ rb_define_method(bs, "append", bspec_append, 1);
141
168
  }
@@ -0,0 +1,3 @@
1
+ // GENERATED WITH: rake gen
2
+ static const long bspec_s_sizes[] = {0, 1, 2, 2, 4, 4, 8, 8, 1, 2, 2, 4, 4, 8, 8, 4, 4, 8, 8};
3
+ static const long bspec_opcodes_size = 19;
@@ -69,10 +69,10 @@ static VALUE zscan_advance(VALUE self, VALUE v_diff) {
69
69
  long byteend = RSTRING_LEN(p->s);
70
70
  char* ptr = RSTRING_PTR(p->s);
71
71
  for (; p->pos < n && p->bytepos < byteend;) {
72
- int n = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
73
- if (n) {
72
+ int m = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
73
+ if (m) {
74
74
  p->pos++;
75
- p->bytepos += n;
75
+ p->bytepos += m;
76
76
  } else {
77
77
  break;
78
78
  }
@@ -141,6 +141,35 @@ static VALUE zscan_eos_p(VALUE self) {
141
141
  return (p->bytepos == RSTRING_LEN(p->s) ? Qtrue : Qfalse);
142
142
  }
143
143
 
144
+ static VALUE zscan_rest(VALUE self) {
145
+ P;
146
+ return rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), LONG2NUM(RSTRING_LEN(p->s)));
147
+ }
148
+
149
+ static VALUE zscan_rest_size(VALUE self) {
150
+ P;
151
+ rb_encoding* enc = rb_enc_get(p->s);
152
+ char* ptr = RSTRING_PTR(p->s) + p->bytepos;
153
+ long len = RSTRING_LEN(p->s) - p->bytepos;
154
+
155
+ long sz = 0;
156
+ for (long i = 0; i < len;) {
157
+ long n = rb_enc_mbclen(ptr + i, ptr + len, enc);
158
+ if (n) {
159
+ sz++;
160
+ i += n;
161
+ } else {
162
+ rb_raise(rb_eRuntimeError, "failed to scan char");
163
+ }
164
+ }
165
+ return LONG2NUM(sz);
166
+ }
167
+
168
+ static VALUE zscan_rest_bytesize(VALUE self) {
169
+ P;
170
+ return LONG2NUM(RSTRING_LEN(p->s) - p->bytepos);
171
+ }
172
+
144
173
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
145
174
  static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
146
175
  P;
@@ -385,6 +414,10 @@ void Init_zscan() {
385
414
  rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
386
415
  rb_define_method(zscan, "advance", zscan_advance, 1);
387
416
  rb_define_method(zscan, "eos?", zscan_eos_p, 0);
417
+ rb_define_method(zscan, "rest", zscan_rest, 0);
418
+ rb_define_method(zscan, "rest_size", zscan_rest_size, 0);
419
+ rb_define_method(zscan, "rest_bytesize", zscan_rest_bytesize, 0);
420
+
388
421
  rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
389
422
  rb_define_method(zscan, "scan", zscan_scan, 1);
390
423
  rb_define_method(zscan, "push", zscan_push, 0);
@@ -3,7 +3,7 @@ require_relative "zscan/instructions"
3
3
  require "date"
4
4
 
5
5
  class ZScan
6
- VERSION = '1.2'
6
+ VERSION = '1.3'
7
7
 
8
8
  def initialize s, dup=false
9
9
  if s.encoding.ascii_compatible?
@@ -98,10 +98,6 @@ class ZScan
98
98
  advance new_pos - pos
99
99
  end
100
100
 
101
- def rest
102
- _internal_string.byteslice bytepos, _internal_string.bytesize
103
- end
104
-
105
101
  def reset
106
102
  self.pos = 0
107
103
  self
@@ -138,10 +134,21 @@ class ZScan
138
134
  _internal_string.byteslice(0, bytepos).count "\n"
139
135
  end
140
136
 
137
+ def slice size
138
+ r = _internal_string.slice pos, size
139
+ advance size
140
+ r
141
+ end
142
+
143
+ def byteslice bytesize
144
+ r = _internal_string.byteslice bytepos, bytesize
145
+ self.bytepos += bytesize
146
+ r
147
+ end
148
+
141
149
  def self.binary_spec &p
142
150
  bs = BinarySpec.send :new
143
151
  bs.instance_eval &p
144
- bs.send :append, BinarySpec::RET, 0
145
152
  bs
146
153
  end
147
154
 
@@ -3,109 +3,109 @@ class ZScan::BinarySpec
3
3
  def int8 n=1
4
4
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
5
5
  n.times do
6
- append INT8, 1
6
+ append 1
7
7
  end
8
8
  end
9
9
  def int16 n=1
10
10
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
11
11
  n.times do
12
- append INT16, 2
12
+ append 2
13
13
  end
14
14
  end
15
15
  def int16_swap n=1
16
16
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
17
17
  n.times do
18
- append INT16_SWAP, 2
18
+ append 3
19
19
  end
20
20
  end
21
21
  def int32 n=1
22
22
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
23
23
  n.times do
24
- append INT32, 4
24
+ append 4
25
25
  end
26
26
  end
27
27
  def int32_swap n=1
28
28
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
29
29
  n.times do
30
- append INT32_SWAP, 4
30
+ append 5
31
31
  end
32
32
  end
33
33
  def int64 n=1
34
34
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
35
35
  n.times do
36
- append INT64, 8
36
+ append 6
37
37
  end
38
38
  end
39
39
  def int64_swap n=1
40
40
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
41
41
  n.times do
42
- append INT64_SWAP, 8
42
+ append 7
43
43
  end
44
44
  end
45
45
  def uint8 n=1
46
46
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
47
47
  n.times do
48
- append UINT8, 1
48
+ append 8
49
49
  end
50
50
  end
51
51
  def uint16 n=1
52
52
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
53
53
  n.times do
54
- append UINT16, 2
54
+ append 9
55
55
  end
56
56
  end
57
57
  def uint16_swap n=1
58
58
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
59
59
  n.times do
60
- append UINT16_SWAP, 2
60
+ append 10
61
61
  end
62
62
  end
63
63
  def uint32 n=1
64
64
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
65
65
  n.times do
66
- append UINT32, 4
66
+ append 11
67
67
  end
68
68
  end
69
69
  def uint32_swap n=1
70
70
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
71
71
  n.times do
72
- append UINT32_SWAP, 4
72
+ append 12
73
73
  end
74
74
  end
75
75
  def uint64 n=1
76
76
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
77
77
  n.times do
78
- append UINT64, 8
78
+ append 13
79
79
  end
80
80
  end
81
81
  def uint64_swap n=1
82
82
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
83
83
  n.times do
84
- append UINT64_SWAP, 8
84
+ append 14
85
85
  end
86
86
  end
87
87
  def single n=1
88
88
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
89
89
  n.times do
90
- append SINGLE, 4
90
+ append 15
91
91
  end
92
92
  end
93
93
  def single_swap n=1
94
94
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
95
95
  n.times do
96
- append SINGLE_SWAP, 4
96
+ append 16
97
97
  end
98
98
  end
99
99
  def double n=1
100
100
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
101
101
  n.times do
102
- append DOUBLE, 8
102
+ append 17
103
103
  end
104
104
  end
105
105
  def double_swap n=1
106
106
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
107
107
  n.times do
108
- append DOUBLE_SWAP, 8
108
+ append 18
109
109
  end
110
110
  end
111
111
  if ZScan::BinarySpec.big_endian?
data/rakefile CHANGED
@@ -86,7 +86,7 @@ file gem_package => gem_files do
86
86
  end
87
87
 
88
88
  desc "generate files"
89
- task :gen => %w[ext/bspec_exec.inc ext/bspec_opcode_names.inc lib/zscan/instructions.rb]
89
+ task :gen => %w[ext/bspec_exec.inc ext/bspec_init.inc lib/zscan/instructions.rb]
90
90
 
91
91
  file 'ext/bspec_exec.inc' => __FILE__ do
92
92
  puts "generating ext/bspec_exec.inc"
@@ -122,13 +122,13 @@ BS_RET:
122
122
  end
123
123
  end
124
124
 
125
- file 'ext/bspec_opcode_names.inc' => __FILE__ do
126
- puts 'generating ext/bspec_opcode_names.inc'
127
- opcode_names = bspec_insns.map(&:inspect).join ', '
128
- File.open 'ext/bspec_opcode_names.inc', 'w' do |f|
125
+ file 'ext/bspec_init.inc' => __FILE__ do
126
+ puts 'generating ext/bspec_init.inc'
127
+ opcode_incrs = bspec_insns.map{|ins| bspec_incr ins}.join ', '
128
+ File.open 'ext/bspec_init.inc', 'w' do |f|
129
129
  f.puts "// GENERATED WITH: rake gen"
130
- f.puts %Q|const char* bspec_opcode_names[] = {"RET", #{opcode_names}};|
131
- f.puts %Q|long bspec_opcode_size = #{bspec_insns.size + 1};|
130
+ f.puts %Q|static const long bspec_s_sizes[] = {0, #{opcode_incrs}};|
131
+ f.puts %Q|static const long bspec_opcodes_size = #{bspec_insns.size + 1};|
132
132
  end
133
133
  end
134
134
 
@@ -138,12 +138,12 @@ file 'lib/zscan/instructions.rb' => __FILE__ do
138
138
  f.puts "# GENERATED WITH: rake gen"
139
139
  f.puts "class ZScan::BinarySpec"
140
140
 
141
- bspec_insns.each do |ins|
141
+ bspec_insns.each_with_index do |ins, i|
142
142
  f.puts <<-RUBY
143
143
  def #{ins.downcase} n=1
144
144
  raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
145
145
  n.times do
146
- append #{ins}, #{bspec_incr ins}
146
+ append #{i + 1}
147
147
  end
148
148
  end
149
149
  RUBY
data/readme.md CHANGED
@@ -59,6 +59,8 @@ See also https://bugs.ruby-lang.org/issues/7092
59
59
  - `#scan regexp_or_string`
60
60
  - `#skip regexp_or_string`
61
61
  - `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
62
+ - `#slice n` slice sub string of n chars from current pos, advances the cursor.
63
+ - `#byteslice n` slice sub string of n bytes from cursor pos, advances the cursor.
62
64
  - `#scan_float` scan a float number which is not starting with space. It deals with multibyte encodings for you.
63
65
  - `#scan_int radix=nil` if radix is nil, decide base by prefix: `0x` is 16, `0` is 8, `0b` is 2, otherwise 10. `radix` should be in range `2..36`.
64
66
  - `#scan_date format_string, start=Date::ITALY` scan a `DateTime` object, see also [strptime](http://rubydoc.info/stdlib/date/DateTime.strptime).
@@ -66,7 +68,9 @@ See also https://bugs.ruby-lang.org/issues/7092
66
68
  - `#unpack format_string`
67
69
  - `#eos?`
68
70
  - `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
69
- - `#rest`
71
+ - `#rest` rest unscanned sub string.
72
+ - `#rest_size` count characters of unscanned sub string.
73
+ - `#rest_bytesize` count bytes of unscanned sub string.
70
74
 
71
75
  ## String delegates
72
76
 
@@ -21,10 +21,19 @@ describe ZScan do
21
21
  assert_equal 8, @z.bytepos
22
22
  end
23
23
 
24
+ it "slices a certain bytes or chars" do
25
+ assert_equal 'ab', (@z.slice 2)
26
+ assert_equal '你', (@z.slice 1)
27
+ assert_equal '好', (@z.byteslice 3)
28
+ assert_equal true, @z.eos?
29
+ end
30
+
24
31
  it "scans from middle" do
25
32
  @z.bytepos = 2
26
33
  assert_equal '你', @z.scan('你')
27
34
  assert_equal '好', @z.rest
35
+ assert_equal '好'.size, @z.rest_size
36
+ assert_equal '好'.bytesize, @z.rest_bytesize
28
37
  end
29
38
 
30
39
  it "won't overflow pos" do
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.2" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.3" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.2'
4
+ version: '1.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-17 00:00:00.000000000 Z
11
+ date: 2013-05-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -36,7 +36,7 @@ files:
36
36
  - ext/bspec.c
37
37
  - ext/zscan.c
38
38
  - ext/bspec_exec.inc
39
- - ext/bspec_opcode_names.inc
39
+ - ext/bspec_init.inc
40
40
  homepage: https://github.com/luikore/zscan
41
41
  licenses:
42
42
  - BSD
@@ -1,3 +0,0 @@
1
- // GENERATED WITH: rake gen
2
- const char* bspec_opcode_names[] = {"RET", "INT8", "INT16", "INT16_SWAP", "INT32", "INT32_SWAP", "INT64", "INT64_SWAP", "UINT8", "UINT16", "UINT16_SWAP", "UINT32", "UINT32_SWAP", "UINT64", "UINT64_SWAP", "SINGLE", "SINGLE_SWAP", "DOUBLE", "DOUBLE_SWAP"};
3
- long bspec_opcode_size = 19;