zscan 1.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c343abbf9b043bcb7643ac193bb5f9a0fd76245e
4
- data.tar.gz: fe57d1e6e352cad9d9ac2f8095afdbbd4affbebe
3
+ metadata.gz: 8baa1419230fe03d1ec41cfa65202dce10d0512c
4
+ data.tar.gz: 0cf1335b2d7221ca4c6329dd6e8e2b5b0f3ac59d
5
5
  SHA512:
6
- metadata.gz: 687a22c21f5da837039b557365807a89b5afd33cea5301baa1d5321c3a5b324f63f7b96524166f388da1c296e2e775ce8007e97281ad084c1583aa20c247ff6f
7
- data.tar.gz: d6fde3485b2bb8e4db9bb80513c370dec17e331c1e20c5eb087742d86fdb06c403136b98be3e14dc65747b897182f4eaa0132ebc454b427e1e5b1cc622a1c8e0
6
+ metadata.gz: a9ccf8455843673336365dd446f808cb52f1538a53c86cc69d102640e4a6afb40332e3c65f6f7a5a4178895a1095571e4328a501e64797a01a6af1cdc4256549
7
+ data.tar.gz: 8c3d109803e0e9d8e91b8787f98190fd7e400613772f57347ff2d0a99725202c8eb3444ba11efc7a606897d78cec5d9932638292c5ba2583fb697f9e7c8c2540
data/bench.rb ADDED
@@ -0,0 +1,23 @@
1
+ require "strscan"
2
+ require_relative "lib/zscan"
3
+ require "benchmark"
4
+
5
+ s = "word\n"
6
+ s *= 3_000_000
7
+
8
+ puts "StringScanner:"
9
+ sc = StringScanner.new s
10
+ puts Benchmark.measure{
11
+ until sc.eos?
12
+ sc.scan(/\w+/) and (sc.pos += 1)
13
+ end
14
+ }
15
+
16
+ puts
17
+ puts "ZScan should be nearly as fast as StringScanner"
18
+ zc = ZScan.new s
19
+ puts Benchmark.measure{
20
+ until zc.eos?
21
+ zc.scan(/\w+/) and zc.advance(1)
22
+ end
23
+ }
data/ext/zscan.c CHANGED
@@ -166,7 +166,7 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
166
166
  return ULONG2NUM(RSTRING_LEN(pattern));
167
167
  }
168
168
  } else if (TYPE(pattern) == T_REGEXP) {
169
- regex_t *re = rb_reg_prepare_re(pattern, p->s);
169
+ regex_t *re = rb_reg_prepare_re(pattern, p->s); // prepare with compatible encoding
170
170
  int tmpreg = re != RREGEXP(pattern)->ptr;
171
171
  if (!tmpreg) {
172
172
  RREGEXP(pattern)->usecnt++;
@@ -177,9 +177,6 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
177
177
  UChar* ptr_match_from = (UChar*)(ptr + p->bytepos);
178
178
  long ret = onig_match(re, (UChar*)ptr, ptr_end, ptr_match_from, NULL, ONIG_OPTION_NONE);
179
179
 
180
- if (!tmpreg) {
181
- RREGEXP(pattern)->usecnt--;
182
- }
183
180
  if (tmpreg) {
184
181
  if (RREGEXP(pattern)->usecnt) {
185
182
  onig_free(re);
@@ -187,6 +184,8 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
187
184
  onig_free(RREGEXP(pattern)->ptr);
188
185
  RREGEXP(pattern)->ptr = re;
189
186
  }
187
+ } else {
188
+ RREGEXP(pattern)->usecnt--;
190
189
  }
191
190
 
192
191
  if (ret == -2) {
@@ -201,6 +200,21 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
201
200
  return Qnil;
202
201
  }
203
202
 
203
+ static VALUE zscan_scan(VALUE self, VALUE pattern) {
204
+ VALUE v_bytelen = zscan_match_bytesize(self, pattern);
205
+ if (v_bytelen == Qnil) {
206
+ return Qnil;
207
+ } else {
208
+ P;
209
+ long bytelen = NUM2LONG(v_bytelen);
210
+ volatile VALUE ret = rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), v_bytelen);
211
+ VALUE v_len = rb_str_length(ret);
212
+ p->bytepos += bytelen;
213
+ p->pos += NUM2LONG(v_len);
214
+ return ret;
215
+ }
216
+ }
217
+
204
218
  static VALUE zscan_push(VALUE self) {
205
219
  P;
206
220
  if (p->stack_i + 1 == p->stack_cap) {
@@ -242,6 +256,27 @@ static VALUE zscan_restore(VALUE self) {
242
256
  return self;
243
257
  }
244
258
 
259
+ static VALUE zscan_clear_pos_stack(VALUE self) {
260
+ P;
261
+ p->stack_i = 0;
262
+ return self;
263
+ }
264
+
265
+ static VALUE zscan_try(VALUE self) {
266
+ if (!rb_block_given_p()) {
267
+ rb_raise(rb_eRuntimeError, "need a block");
268
+ }
269
+ VALUE r;
270
+ zscan_push(self);
271
+ r = rb_yield(Qnil);
272
+ if (RTEST(r)) {
273
+ zscan_drop(self);
274
+ } else {
275
+ zscan_pop(self);
276
+ }
277
+ return r;
278
+ }
279
+
245
280
  void Init_zscan() {
246
281
  VALUE zscan = rb_define_class("ZScan", rb_cObject);
247
282
  rb_define_alloc_func(zscan, zscan_alloc);
@@ -254,8 +289,11 @@ void Init_zscan() {
254
289
  rb_define_method(zscan, "advance", zscan_advance, 1);
255
290
  rb_define_method(zscan, "eos?", zscan_eos_p, 0);
256
291
  rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
292
+ rb_define_method(zscan, "scan", zscan_scan, 1);
257
293
  rb_define_method(zscan, "push", zscan_push, 0);
258
294
  rb_define_method(zscan, "pop", zscan_pop, 0);
259
295
  rb_define_method(zscan, "drop", zscan_drop, 0);
260
296
  rb_define_method(zscan, "restore", zscan_restore, 0);
297
+ rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
298
+ rb_define_method(zscan, "try", zscan_try, 0);
261
299
  }
data/lib/zscan.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require_relative "../ext/zscan"
2
2
 
3
3
  class ZScan
4
- VERSION = '1.0'
4
+ VERSION = '1.0.1'
5
5
 
6
6
  def initialize s, dup=false
7
7
  _internal_init dup ? s.dup : s
@@ -11,14 +11,6 @@ class ZScan
11
11
  _internal_string.dup
12
12
  end
13
13
 
14
- def scan re_or_str
15
- if sz = match_bytesize(re_or_str)
16
- r = _internal_string.byteslice bytepos, sz
17
- self.bytepos += sz
18
- r
19
- end
20
- end
21
-
22
14
  def skip re_or_str
23
15
  if sz = match_bytesize(re_or_str)
24
16
  self.bytepos += sz
@@ -42,5 +34,31 @@ class ZScan
42
34
  self.pos = _internal_string.size
43
35
  end
44
36
 
37
+ def << substring
38
+ _internal_string << substring
39
+ end
40
+
41
+ def []= range, substring
42
+ start = range.start
43
+ if start < 0
44
+ start = _internal_string.size + start
45
+ end
46
+ if start < pos
47
+ self.pos = start
48
+ end
49
+ _internal_string[range] = substring
50
+ end
51
+
52
+ def size
53
+ _internal_string.size
54
+ end
55
+
56
+ def bytesize
57
+ _internal_string.bytesize
58
+ end
59
+
45
60
  private :_internal_init, :_internal_string
46
61
  end
62
+
63
+ # rooooobust!
64
+ Zscan = ZScan
data/rakefile CHANGED
@@ -9,9 +9,7 @@ task :default => [:test, gem_package]
9
9
 
10
10
  desc "build and run test"
11
11
  task :test do
12
- Dir.chdir "ext"
13
- sh "make"
14
- Dir.chdir ".."
12
+ sh "make -C ext"
15
13
  sh "rspec"
16
14
  end
17
15
 
data/readme.md CHANGED
@@ -46,14 +46,14 @@ z.scan /^/ #=> nil
46
46
 
47
47
  See also https://bugs.ruby-lang.org/issues/7092
48
48
 
49
- ## Methods
49
+ ## Essential methods
50
50
 
51
51
  - `ZScan.new string, dup=false`
52
52
  - `#scan regexp_or_string`
53
53
  - `#skip regexp_or_string`
54
- - `#match_bytesize regexp_or_string` returns length of matched bytes or nil
54
+ - `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
55
55
  - `#eos?`
56
- - `#string` note: returns a COW dup
56
+ - `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
57
57
  - `#rest`
58
58
 
59
59
  ## Pos management
@@ -68,13 +68,23 @@ See also https://bugs.ruby-lang.org/issues/7092
68
68
 
69
69
  ## Efficient pos stack manipulation
70
70
 
71
- - `#push` pushes current pos into the stack.
72
- - `#pop` sets current pos to top of the stack, and pops it.
73
- - `#drop` drops top of pos stack without changing current pos.
74
- - `#restore` sets current pos to top of the stack.
71
+ - `#push` push current pos into the stack.
72
+ - `#pop` set current pos to top of the stack, and pop it.
73
+ - `#drop` drop top of pos stack without changing current pos.
74
+ - `#restore` set current pos to top of the stack.
75
+ - `#clear_pos_stack` clear pos stack.
76
+ - `#try` try to do several scans in the given block, fall back to init pos if block returns `nil` or `false`. Returns block's return, can be nested.
77
+
78
+ ## String delegates
79
+
80
+ - `#<< append_string`
81
+ - `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
82
+ - `#size`
83
+ - `#bytesize`
75
84
 
76
85
  ## License
77
86
 
87
+ ```
78
88
  Copyright (C) 2013 by Zete Lui (BSD)
79
89
 
80
90
  Permission is hereby granted, free of charge, to any person obtaining a copy of
@@ -93,3 +103,4 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
93
103
  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
94
104
  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
95
105
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
106
+ ```
data/spec/zscan_spec.rb CHANGED
@@ -64,4 +64,33 @@ describe ZScan do
64
64
  assert_equal '', z.scan(/(?<=a)/)
65
65
  assert_equal nil, z.scan(/^/)
66
66
  end
67
+
68
+ it "stack doesn't underflow" do
69
+ @z.push
70
+ @z.pop
71
+ @z.pop
72
+ @z.pos = 3
73
+ @z.push
74
+ @z.pos = 4
75
+ @z.pop
76
+ assert_equal 3, @z.pos
77
+ end
78
+
79
+ it "#try restores pos" do
80
+ z = ZScan.new "hello"
81
+ return1 = z.try do
82
+ z.scan 'h'
83
+ z.scan 'e'
84
+ end
85
+ assert_equal 'e', return1
86
+ assert_equal 2, z.pos
87
+
88
+ return2 = z.try do
89
+ z.scan 'l'
90
+ z.scan 'l'
91
+ z.scan 'p' # fails
92
+ end
93
+ assert_equal nil, return2
94
+ assert_equal 2, z.pos
95
+ end
67
96
  end
data/zscan.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "1.0" # version mapped from zscan.rb, don't change here
3
+ s.version = "1.0.1" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-06 00:00:00.000000000 Z
11
+ date: 2013-05-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
14
  positioning
@@ -21,6 +21,7 @@ files:
21
21
  - rakefile
22
22
  - zscan.gemspec
23
23
  - readme.md
24
+ - bench.rb
24
25
  - ext/extconf.rb
25
26
  - lib/zscan.rb
26
27
  - spec/zscan_spec.rb