zscan 1.2 → 1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 71174140986194386fd74477637f21f95810d704
4
- data.tar.gz: 72706410ead67f6cd9af96a7bf7024c72d7ef8c8
3
+ metadata.gz: 656c885fe2dcee21e852866ae02bf9fa960a3c9a
4
+ data.tar.gz: 1f5642a27ad99afc41494e556095fc46c98af22b
5
5
  SHA512:
6
- metadata.gz: 7c9b9a9516dfa9b5ced99820f66976868d2c0a521df1eb35a517d262d3519601bb28b2a5e95797de411398b22bea3d9aa504d4af96f1634c1f9353d873d134d0
7
- data.tar.gz: d3c3bcc5255f1909acafdf4bb1977d0ac75d73a6e837af0382cc5dd57d4a1e98443898bd49252e9d2d0b9ee3c7bc3eb0561c423bea4a91feb44bccf6138ac86f
6
+ metadata.gz: 844ac1574f62522bfa329e091f1a55558e17a2db7b0c39e5599fe4ad4038c66e563f1bd36d9ddf55132746c9265a2678f2abb2f0904c1fd5e3f4fec810d3197a
7
+ data.tar.gz: b156b693586831fcf094984583442ab146946fded1564b5d70f24d62db1cf72c3df5a91286a6e6a7822186f0655347e825a1d4f391c735157a2bf6121808d313
@@ -1,6 +1,8 @@
1
1
  #include "zscan.h"
2
2
 
3
3
  static const rb_data_type_t* zscan_type;
4
+ static void** bspec_opcodes;
5
+ # include "bspec_init.inc"
4
6
 
5
7
  typedef struct {
6
8
  long s_size;
@@ -31,18 +33,31 @@ static VALUE bspec_alloc(VALUE klass) {
31
33
  bs->a_cap = 4;
32
34
  bs->a_size = 0;
33
35
  bs->code = (void**)malloc(bs->a_cap * sizeof(void*));
36
+ for (long i = 0; i < bs->a_cap; i++) {
37
+ bs->code[i] = bspec_opcodes[0];
38
+ }
34
39
  return TypedData_Wrap_Struct(klass, &bspec_type, bs);
35
40
  }
36
41
 
37
- static VALUE bspec_append(VALUE self, VALUE v_code, VALUE v_s_size) {
42
+ static VALUE bspec_append(VALUE self, VALUE v_i) {
38
43
  BSpec* bs = rb_check_typeddata(self, &bspec_type);
39
- if (bs->a_size == bs->a_cap) {
44
+ long i = NUM2LONG(v_i);
45
+ if (i < 0 || i >= bspec_opcodes_size) {
46
+ rb_raise(rb_eArgError, "bad opcode index");
47
+ }
48
+
49
+ // ensure size
50
+ if (bs->a_size == bs->a_cap - 1) { // end with 0:RET, always terminate
51
+ bs->code = (void**)realloc(bs->code, bs->a_cap * 2 * sizeof(void*));
52
+ long j = bs->a_cap;
40
53
  bs->a_cap *= 2;
41
- bs->code = (void**)realloc(bs->code, bs->a_cap * sizeof(void*));
54
+ for (; j < bs->a_cap; j++) {
55
+ bs->code[j] = bspec_opcodes[0];
56
+ }
42
57
  }
43
- long s_size = NUM2LONG(v_s_size);
44
- bs->code[bs->a_size++] = ((void**)RSTRING_PTR(v_code))[0];
45
- bs->s_size += s_size;
58
+
59
+ bs->code[bs->a_size++] = bspec_opcodes[i];
60
+ bs->s_size += bspec_s_sizes[i];
46
61
  return self;
47
62
  }
48
63
 
@@ -101,6 +116,23 @@ static VALUE bspec_big_endian_p(VALUE self) {
101
116
 
102
117
  #include "bspec_exec.inc"
103
118
 
119
+ static VALUE bspec_opcodes_to_a(VALUE bspec) {
120
+ volatile VALUE a = rb_ary_new();
121
+ for (long i = 0; i < bspec_opcodes_size; i++) {
122
+ rb_ary_push(a, UINT64toNUM((uint64_t)(bspec_opcodes[i])));
123
+ }
124
+ return a;
125
+ }
126
+
127
+ static VALUE bspec_inspect_opcodes(VALUE bspec, VALUE self) {
128
+ BSpec* bs = rb_check_typeddata(self, &bspec_type);
129
+ volatile VALUE a = rb_ary_new();
130
+ for (long i = 0; i <= bs->a_size; i++) {
131
+ rb_ary_push(a, UINT64toNUM((uint64_t)(bs->code[i])));
132
+ }
133
+ return a;
134
+ }
135
+
104
136
  static VALUE zscan_scan_binary(VALUE self, VALUE spec) {
105
137
  ZScan* p = rb_check_typeddata(self, zscan_type);
106
138
  if (!rb_enc_str_asciicompat_p(p->s)) {
@@ -126,16 +158,11 @@ void Init_zscan_bspec(VALUE zscan, const rb_data_type_t* _zscan_type) {
126
158
  zscan_type = _zscan_type;
127
159
  rb_define_method(zscan, "scan_binary", zscan_scan_binary, 1);
128
160
 
161
+ bspec_opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
129
162
  VALUE bs = rb_define_class_under(zscan, "BinarySpec", rb_cObject);
130
163
  rb_define_singleton_method(bs, "big_endian?", bspec_big_endian_p, 0);
164
+ rb_define_singleton_method(bs, "_opcodes_to_a", bspec_opcodes_to_a, 0);
165
+ rb_define_singleton_method(bs, "_inspect_opcodes", bspec_inspect_opcodes, 1);
131
166
  rb_define_alloc_func(bs, bspec_alloc);
132
- rb_define_method(bs, "append", bspec_append, 2);
133
-
134
- # include "bspec_opcode_names.inc"
135
- void** opcodes = (void**)bspec_exec(NULL, NULL, Qnil);
136
- for (long i = 0; i < bspec_opcode_size; i++) {
137
- VALUE bytecode = rb_str_new((char*)&opcodes[i], sizeof(void*));
138
- OBJ_FREEZE(bytecode);
139
- rb_define_const(bs, bspec_opcode_names[i], bytecode);
140
- }
167
+ rb_define_method(bs, "append", bspec_append, 1);
141
168
  }
@@ -0,0 +1,3 @@
1
+ // GENERATED WITH: rake gen
2
+ static const long bspec_s_sizes[] = {0, 1, 2, 2, 4, 4, 8, 8, 1, 2, 2, 4, 4, 8, 8, 4, 4, 8, 8};
3
+ static const long bspec_opcodes_size = 19;
@@ -69,10 +69,10 @@ static VALUE zscan_advance(VALUE self, VALUE v_diff) {
69
69
  long byteend = RSTRING_LEN(p->s);
70
70
  char* ptr = RSTRING_PTR(p->s);
71
71
  for (; p->pos < n && p->bytepos < byteend;) {
72
- int n = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
73
- if (n) {
72
+ int m = rb_enc_mbclen(ptr + p->bytepos, ptr + byteend, enc);
73
+ if (m) {
74
74
  p->pos++;
75
- p->bytepos += n;
75
+ p->bytepos += m;
76
76
  } else {
77
77
  break;
78
78
  }
@@ -141,6 +141,35 @@ static VALUE zscan_eos_p(VALUE self) {
141
141
  return (p->bytepos == RSTRING_LEN(p->s) ? Qtrue : Qfalse);
142
142
  }
143
143
 
144
+ static VALUE zscan_rest(VALUE self) {
145
+ P;
146
+ return rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), LONG2NUM(RSTRING_LEN(p->s)));
147
+ }
148
+
149
+ static VALUE zscan_rest_size(VALUE self) {
150
+ P;
151
+ rb_encoding* enc = rb_enc_get(p->s);
152
+ char* ptr = RSTRING_PTR(p->s) + p->bytepos;
153
+ long len = RSTRING_LEN(p->s) - p->bytepos;
154
+
155
+ long sz = 0;
156
+ for (long i = 0; i < len;) {
157
+ long n = rb_enc_mbclen(ptr + i, ptr + len, enc);
158
+ if (n) {
159
+ sz++;
160
+ i += n;
161
+ } else {
162
+ rb_raise(rb_eRuntimeError, "failed to scan char");
163
+ }
164
+ }
165
+ return LONG2NUM(sz);
166
+ }
167
+
168
+ static VALUE zscan_rest_bytesize(VALUE self) {
169
+ P;
170
+ return LONG2NUM(RSTRING_LEN(p->s) - p->bytepos);
171
+ }
172
+
144
173
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
145
174
  static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
146
175
  P;
@@ -385,6 +414,10 @@ void Init_zscan() {
385
414
  rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
386
415
  rb_define_method(zscan, "advance", zscan_advance, 1);
387
416
  rb_define_method(zscan, "eos?", zscan_eos_p, 0);
417
+ rb_define_method(zscan, "rest", zscan_rest, 0);
418
+ rb_define_method(zscan, "rest_size", zscan_rest_size, 0);
419
+ rb_define_method(zscan, "rest_bytesize", zscan_rest_bytesize, 0);
420
+
388
421
  rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
389
422
  rb_define_method(zscan, "scan", zscan_scan, 1);
390
423
  rb_define_method(zscan, "push", zscan_push, 0);
@@ -3,7 +3,7 @@ require_relative "zscan/instructions"
3
3
  require "date"
4
4
 
5
5
  class ZScan
6
- VERSION = '1.2'
6
+ VERSION = '1.3'
7
7
 
8
8
  def initialize s, dup=false
9
9
  if s.encoding.ascii_compatible?
@@ -98,10 +98,6 @@ class ZScan
98
98
  advance new_pos - pos
99
99
  end
100
100
 
101
- def rest
102
- _internal_string.byteslice bytepos, _internal_string.bytesize
103
- end
104
-
105
101
  def reset
106
102
  self.pos = 0
107
103
  self
@@ -138,10 +134,21 @@ class ZScan
138
134
  _internal_string.byteslice(0, bytepos).count "\n"
139
135
  end
140
136
 
137
+ def slice size
138
+ r = _internal_string.slice pos, size
139
+ advance size
140
+ r
141
+ end
142
+
143
+ def byteslice bytesize
144
+ r = _internal_string.byteslice bytepos, bytesize
145
+ self.bytepos += bytesize
146
+ r
147
+ end
148
+
141
149
  def self.binary_spec &p
142
150
  bs = BinarySpec.send :new
143
151
  bs.instance_eval &p
144
- bs.send :append, BinarySpec::RET, 0
145
152
  bs
146
153
  end
147
154
 
@@ -3,109 +3,109 @@ class ZScan::BinarySpec
3
3
  def int8 n=1
4
4
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
5
5
  n.times do
6
- append INT8, 1
6
+ append 1
7
7
  end
8
8
  end
9
9
  def int16 n=1
10
10
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
11
11
  n.times do
12
- append INT16, 2
12
+ append 2
13
13
  end
14
14
  end
15
15
  def int16_swap n=1
16
16
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
17
17
  n.times do
18
- append INT16_SWAP, 2
18
+ append 3
19
19
  end
20
20
  end
21
21
  def int32 n=1
22
22
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
23
23
  n.times do
24
- append INT32, 4
24
+ append 4
25
25
  end
26
26
  end
27
27
  def int32_swap n=1
28
28
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
29
29
  n.times do
30
- append INT32_SWAP, 4
30
+ append 5
31
31
  end
32
32
  end
33
33
  def int64 n=1
34
34
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
35
35
  n.times do
36
- append INT64, 8
36
+ append 6
37
37
  end
38
38
  end
39
39
  def int64_swap n=1
40
40
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
41
41
  n.times do
42
- append INT64_SWAP, 8
42
+ append 7
43
43
  end
44
44
  end
45
45
  def uint8 n=1
46
46
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
47
47
  n.times do
48
- append UINT8, 1
48
+ append 8
49
49
  end
50
50
  end
51
51
  def uint16 n=1
52
52
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
53
53
  n.times do
54
- append UINT16, 2
54
+ append 9
55
55
  end
56
56
  end
57
57
  def uint16_swap n=1
58
58
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
59
59
  n.times do
60
- append UINT16_SWAP, 2
60
+ append 10
61
61
  end
62
62
  end
63
63
  def uint32 n=1
64
64
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
65
65
  n.times do
66
- append UINT32, 4
66
+ append 11
67
67
  end
68
68
  end
69
69
  def uint32_swap n=1
70
70
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
71
71
  n.times do
72
- append UINT32_SWAP, 4
72
+ append 12
73
73
  end
74
74
  end
75
75
  def uint64 n=1
76
76
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
77
77
  n.times do
78
- append UINT64, 8
78
+ append 13
79
79
  end
80
80
  end
81
81
  def uint64_swap n=1
82
82
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
83
83
  n.times do
84
- append UINT64_SWAP, 8
84
+ append 14
85
85
  end
86
86
  end
87
87
  def single n=1
88
88
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
89
89
  n.times do
90
- append SINGLE, 4
90
+ append 15
91
91
  end
92
92
  end
93
93
  def single_swap n=1
94
94
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
95
95
  n.times do
96
- append SINGLE_SWAP, 4
96
+ append 16
97
97
  end
98
98
  end
99
99
  def double n=1
100
100
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
101
101
  n.times do
102
- append DOUBLE, 8
102
+ append 17
103
103
  end
104
104
  end
105
105
  def double_swap n=1
106
106
  raise ArgumentError, "repeat count should be >= 1, but got #{n}" if n < 1
107
107
  n.times do
108
- append DOUBLE_SWAP, 8
108
+ append 18
109
109
  end
110
110
  end
111
111
  if ZScan::BinarySpec.big_endian?
data/rakefile CHANGED
@@ -86,7 +86,7 @@ file gem_package => gem_files do
86
86
  end
87
87
 
88
88
  desc "generate files"
89
- task :gen => %w[ext/bspec_exec.inc ext/bspec_opcode_names.inc lib/zscan/instructions.rb]
89
+ task :gen => %w[ext/bspec_exec.inc ext/bspec_init.inc lib/zscan/instructions.rb]
90
90
 
91
91
  file 'ext/bspec_exec.inc' => __FILE__ do
92
92
  puts "generating ext/bspec_exec.inc"
@@ -122,13 +122,13 @@ BS_RET:
122
122
  end
123
123
  end
124
124
 
125
- file 'ext/bspec_opcode_names.inc' => __FILE__ do
126
- puts 'generating ext/bspec_opcode_names.inc'
127
- opcode_names = bspec_insns.map(&:inspect).join ', '
128
- File.open 'ext/bspec_opcode_names.inc', 'w' do |f|
125
+ file 'ext/bspec_init.inc' => __FILE__ do
126
+ puts 'generating ext/bspec_init.inc'
127
+ opcode_incrs = bspec_insns.map{|ins| bspec_incr ins}.join ', '
128
+ File.open 'ext/bspec_init.inc', 'w' do |f|
129
129
  f.puts "// GENERATED WITH: rake gen"
130
- f.puts %Q|const char* bspec_opcode_names[] = {"RET", #{opcode_names}};|
131
- f.puts %Q|long bspec_opcode_size = #{bspec_insns.size + 1};|
130
+ f.puts %Q|static const long bspec_s_sizes[] = {0, #{opcode_incrs}};|
131
+ f.puts %Q|static const long bspec_opcodes_size = #{bspec_insns.size + 1};|
132
132
  end
133
133
  end
134
134
 
@@ -138,12 +138,12 @@ file 'lib/zscan/instructions.rb' => __FILE__ do
138
138
  f.puts "# GENERATED WITH: rake gen"
139
139
  f.puts "class ZScan::BinarySpec"
140
140
 
141
- bspec_insns.each do |ins|
141
+ bspec_insns.each_with_index do |ins, i|
142
142
  f.puts <<-RUBY
143
143
  def #{ins.downcase} n=1
144
144
  raise ArgumentError, "repeat count should be >= 1, but got \#{n}" if n < 1
145
145
  n.times do
146
- append #{ins}, #{bspec_incr ins}
146
+ append #{i + 1}
147
147
  end
148
148
  end
149
149
  RUBY
data/readme.md CHANGED
@@ -59,6 +59,8 @@ See also https://bugs.ruby-lang.org/issues/7092
59
59
  - `#scan regexp_or_string`
60
60
  - `#skip regexp_or_string`
61
61
  - `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
62
+ - `#slice n` slice sub string of n chars from current pos, advances the cursor.
63
+ - `#byteslice n` slice sub string of n bytes from cursor pos, advances the cursor.
62
64
  - `#scan_float` scan a float number which is not starting with space. It deals with multibyte encodings for you.
63
65
  - `#scan_int radix=nil` if radix is nil, decide base by prefix: `0x` is 16, `0` is 8, `0b` is 2, otherwise 10. `radix` should be in range `2..36`.
64
66
  - `#scan_date format_string, start=Date::ITALY` scan a `DateTime` object, see also [strptime](http://rubydoc.info/stdlib/date/DateTime.strptime).
@@ -66,7 +68,9 @@ See also https://bugs.ruby-lang.org/issues/7092
66
68
  - `#unpack format_string`
67
69
  - `#eos?`
68
70
  - `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
69
- - `#rest`
71
+ - `#rest` rest unscanned sub string.
72
+ - `#rest_size` count characters of unscanned sub string.
73
+ - `#rest_bytesize` count bytes of unscanned sub string.
70
74
 
71
75
  ## String delegates
72
76
 
@@ -21,10 +21,19 @@ describe ZScan do
21
21
  assert_equal 8, @z.bytepos
22
22
  end
23
23
 
24
+ it "slices a certain bytes or chars" do
25
+ assert_equal 'ab', (@z.slice 2)
26
+ assert_equal '你', (@z.slice 1)
27
+ assert_equal '好', (@z.byteslice 3)
28
+ assert_equal true, @z.eos?
29
+ end
30
+
24
31
  it "scans from middle" do
25
32
  @z.bytepos = 2
26
33
  assert_equal '你', @z.scan('你')
27
34
  assert_equal '好', @z.rest
35
+ assert_equal '好'.size, @z.rest_size
36
+ assert_equal '好'.bytesize, @z.rest_bytesize
28
37
  end
29
38
 
30
39
  it "won't overflow pos" do
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.2" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.3" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.2'
4
+ version: '1.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-17 00:00:00.000000000 Z
11
+ date: 2013-05-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -36,7 +36,7 @@ files:
36
36
  - ext/bspec.c
37
37
  - ext/zscan.c
38
38
  - ext/bspec_exec.inc
39
- - ext/bspec_opcode_names.inc
39
+ - ext/bspec_init.inc
40
40
  homepage: https://github.com/luikore/zscan
41
41
  licenses:
42
42
  - BSD
@@ -1,3 +0,0 @@
1
- // GENERATED WITH: rake gen
2
- const char* bspec_opcode_names[] = {"RET", "INT8", "INT16", "INT16_SWAP", "INT32", "INT32_SWAP", "INT64", "INT64_SWAP", "UINT8", "UINT16", "UINT16_SWAP", "UINT32", "UINT32_SWAP", "UINT64", "UINT64_SWAP", "SINGLE", "SINGLE_SWAP", "DOUBLE", "DOUBLE_SWAP"};
3
- long bspec_opcode_size = 19;