zscan 1.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bench.rb +23 -0
- data/ext/zscan.c +42 -4
- data/lib/zscan.rb +27 -9
- data/rakefile +1 -3
- data/readme.md +18 -7
- data/spec/zscan_spec.rb +29 -0
- data/zscan.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8baa1419230fe03d1ec41cfa65202dce10d0512c
|
4
|
+
data.tar.gz: 0cf1335b2d7221ca4c6329dd6e8e2b5b0f3ac59d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9ccf8455843673336365dd446f808cb52f1538a53c86cc69d102640e4a6afb40332e3c65f6f7a5a4178895a1095571e4328a501e64797a01a6af1cdc4256549
|
7
|
+
data.tar.gz: 8c3d109803e0e9d8e91b8787f98190fd7e400613772f57347ff2d0a99725202c8eb3444ba11efc7a606897d78cec5d9932638292c5ba2583fb697f9e7c8c2540
|
data/bench.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "strscan"
|
2
|
+
require_relative "lib/zscan"
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
s = "word\n"
|
6
|
+
s *= 3_000_000
|
7
|
+
|
8
|
+
puts "StringScanner:"
|
9
|
+
sc = StringScanner.new s
|
10
|
+
puts Benchmark.measure{
|
11
|
+
until sc.eos?
|
12
|
+
sc.scan(/\w+/) and (sc.pos += 1)
|
13
|
+
end
|
14
|
+
}
|
15
|
+
|
16
|
+
puts
|
17
|
+
puts "ZScan should be nearly as fast as StringScanner"
|
18
|
+
zc = ZScan.new s
|
19
|
+
puts Benchmark.measure{
|
20
|
+
until zc.eos?
|
21
|
+
zc.scan(/\w+/) and zc.advance(1)
|
22
|
+
end
|
23
|
+
}
|
data/ext/zscan.c
CHANGED
@@ -166,7 +166,7 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
166
166
|
return ULONG2NUM(RSTRING_LEN(pattern));
|
167
167
|
}
|
168
168
|
} else if (TYPE(pattern) == T_REGEXP) {
|
169
|
-
regex_t *re = rb_reg_prepare_re(pattern, p->s);
|
169
|
+
regex_t *re = rb_reg_prepare_re(pattern, p->s); // prepare with compatible encoding
|
170
170
|
int tmpreg = re != RREGEXP(pattern)->ptr;
|
171
171
|
if (!tmpreg) {
|
172
172
|
RREGEXP(pattern)->usecnt++;
|
@@ -177,9 +177,6 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
177
177
|
UChar* ptr_match_from = (UChar*)(ptr + p->bytepos);
|
178
178
|
long ret = onig_match(re, (UChar*)ptr, ptr_end, ptr_match_from, NULL, ONIG_OPTION_NONE);
|
179
179
|
|
180
|
-
if (!tmpreg) {
|
181
|
-
RREGEXP(pattern)->usecnt--;
|
182
|
-
}
|
183
180
|
if (tmpreg) {
|
184
181
|
if (RREGEXP(pattern)->usecnt) {
|
185
182
|
onig_free(re);
|
@@ -187,6 +184,8 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
187
184
|
onig_free(RREGEXP(pattern)->ptr);
|
188
185
|
RREGEXP(pattern)->ptr = re;
|
189
186
|
}
|
187
|
+
} else {
|
188
|
+
RREGEXP(pattern)->usecnt--;
|
190
189
|
}
|
191
190
|
|
192
191
|
if (ret == -2) {
|
@@ -201,6 +200,21 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
201
200
|
return Qnil;
|
202
201
|
}
|
203
202
|
|
203
|
+
static VALUE zscan_scan(VALUE self, VALUE pattern) {
|
204
|
+
VALUE v_bytelen = zscan_match_bytesize(self, pattern);
|
205
|
+
if (v_bytelen == Qnil) {
|
206
|
+
return Qnil;
|
207
|
+
} else {
|
208
|
+
P;
|
209
|
+
long bytelen = NUM2LONG(v_bytelen);
|
210
|
+
volatile VALUE ret = rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), v_bytelen);
|
211
|
+
VALUE v_len = rb_str_length(ret);
|
212
|
+
p->bytepos += bytelen;
|
213
|
+
p->pos += NUM2LONG(v_len);
|
214
|
+
return ret;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
204
218
|
static VALUE zscan_push(VALUE self) {
|
205
219
|
P;
|
206
220
|
if (p->stack_i + 1 == p->stack_cap) {
|
@@ -242,6 +256,27 @@ static VALUE zscan_restore(VALUE self) {
|
|
242
256
|
return self;
|
243
257
|
}
|
244
258
|
|
259
|
+
static VALUE zscan_clear_pos_stack(VALUE self) {
|
260
|
+
P;
|
261
|
+
p->stack_i = 0;
|
262
|
+
return self;
|
263
|
+
}
|
264
|
+
|
265
|
+
static VALUE zscan_try(VALUE self) {
|
266
|
+
if (!rb_block_given_p()) {
|
267
|
+
rb_raise(rb_eRuntimeError, "need a block");
|
268
|
+
}
|
269
|
+
VALUE r;
|
270
|
+
zscan_push(self);
|
271
|
+
r = rb_yield(Qnil);
|
272
|
+
if (RTEST(r)) {
|
273
|
+
zscan_drop(self);
|
274
|
+
} else {
|
275
|
+
zscan_pop(self);
|
276
|
+
}
|
277
|
+
return r;
|
278
|
+
}
|
279
|
+
|
245
280
|
void Init_zscan() {
|
246
281
|
VALUE zscan = rb_define_class("ZScan", rb_cObject);
|
247
282
|
rb_define_alloc_func(zscan, zscan_alloc);
|
@@ -254,8 +289,11 @@ void Init_zscan() {
|
|
254
289
|
rb_define_method(zscan, "advance", zscan_advance, 1);
|
255
290
|
rb_define_method(zscan, "eos?", zscan_eos_p, 0);
|
256
291
|
rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
|
292
|
+
rb_define_method(zscan, "scan", zscan_scan, 1);
|
257
293
|
rb_define_method(zscan, "push", zscan_push, 0);
|
258
294
|
rb_define_method(zscan, "pop", zscan_pop, 0);
|
259
295
|
rb_define_method(zscan, "drop", zscan_drop, 0);
|
260
296
|
rb_define_method(zscan, "restore", zscan_restore, 0);
|
297
|
+
rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
|
298
|
+
rb_define_method(zscan, "try", zscan_try, 0);
|
261
299
|
}
|
data/lib/zscan.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative "../ext/zscan"
|
2
2
|
|
3
3
|
class ZScan
|
4
|
-
VERSION = '1.0'
|
4
|
+
VERSION = '1.0.1'
|
5
5
|
|
6
6
|
def initialize s, dup=false
|
7
7
|
_internal_init dup ? s.dup : s
|
@@ -11,14 +11,6 @@ class ZScan
|
|
11
11
|
_internal_string.dup
|
12
12
|
end
|
13
13
|
|
14
|
-
def scan re_or_str
|
15
|
-
if sz = match_bytesize(re_or_str)
|
16
|
-
r = _internal_string.byteslice bytepos, sz
|
17
|
-
self.bytepos += sz
|
18
|
-
r
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
14
|
def skip re_or_str
|
23
15
|
if sz = match_bytesize(re_or_str)
|
24
16
|
self.bytepos += sz
|
@@ -42,5 +34,31 @@ class ZScan
|
|
42
34
|
self.pos = _internal_string.size
|
43
35
|
end
|
44
36
|
|
37
|
+
def << substring
|
38
|
+
_internal_string << substring
|
39
|
+
end
|
40
|
+
|
41
|
+
def []= range, substring
|
42
|
+
start = range.start
|
43
|
+
if start < 0
|
44
|
+
start = _internal_string.size + start
|
45
|
+
end
|
46
|
+
if start < pos
|
47
|
+
self.pos = start
|
48
|
+
end
|
49
|
+
_internal_string[range] = substring
|
50
|
+
end
|
51
|
+
|
52
|
+
def size
|
53
|
+
_internal_string.size
|
54
|
+
end
|
55
|
+
|
56
|
+
def bytesize
|
57
|
+
_internal_string.bytesize
|
58
|
+
end
|
59
|
+
|
45
60
|
private :_internal_init, :_internal_string
|
46
61
|
end
|
62
|
+
|
63
|
+
# rooooobust!
|
64
|
+
Zscan = ZScan
|
data/rakefile
CHANGED
data/readme.md
CHANGED
@@ -46,14 +46,14 @@ z.scan /^/ #=> nil
|
|
46
46
|
|
47
47
|
See also https://bugs.ruby-lang.org/issues/7092
|
48
48
|
|
49
|
-
##
|
49
|
+
## Essential methods
|
50
50
|
|
51
51
|
- `ZScan.new string, dup=false`
|
52
52
|
- `#scan regexp_or_string`
|
53
53
|
- `#skip regexp_or_string`
|
54
|
-
- `#match_bytesize regexp_or_string`
|
54
|
+
- `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
|
55
55
|
- `#eos?`
|
56
|
-
- `#string` note:
|
56
|
+
- `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
|
57
57
|
- `#rest`
|
58
58
|
|
59
59
|
## Pos management
|
@@ -68,13 +68,23 @@ See also https://bugs.ruby-lang.org/issues/7092
|
|
68
68
|
|
69
69
|
## Efficient pos stack manipulation
|
70
70
|
|
71
|
-
- `#push`
|
72
|
-
- `#pop`
|
73
|
-
- `#drop`
|
74
|
-
- `#restore`
|
71
|
+
- `#push` push current pos into the stack.
|
72
|
+
- `#pop` set current pos to top of the stack, and pop it.
|
73
|
+
- `#drop` drop top of pos stack without changing current pos.
|
74
|
+
- `#restore` set current pos to top of the stack.
|
75
|
+
- `#clear_pos_stack` clear pos stack.
|
76
|
+
- `#try` try to do several scans in the given block, fall back to init pos if block returns `nil` or `false`. Returns block's return, can be nested.
|
77
|
+
|
78
|
+
## String delegates
|
79
|
+
|
80
|
+
- `#<< append_string`
|
81
|
+
- `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
|
82
|
+
- `#size`
|
83
|
+
- `#bytesize`
|
75
84
|
|
76
85
|
## License
|
77
86
|
|
87
|
+
```
|
78
88
|
Copyright (C) 2013 by Zete Lui (BSD)
|
79
89
|
|
80
90
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
@@ -93,3 +103,4 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
93
103
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
94
104
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
95
105
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
106
|
+
```
|
data/spec/zscan_spec.rb
CHANGED
@@ -64,4 +64,33 @@ describe ZScan do
|
|
64
64
|
assert_equal '', z.scan(/(?<=a)/)
|
65
65
|
assert_equal nil, z.scan(/^/)
|
66
66
|
end
|
67
|
+
|
68
|
+
it "stack doesn't underflow" do
|
69
|
+
@z.push
|
70
|
+
@z.pop
|
71
|
+
@z.pop
|
72
|
+
@z.pos = 3
|
73
|
+
@z.push
|
74
|
+
@z.pos = 4
|
75
|
+
@z.pop
|
76
|
+
assert_equal 3, @z.pos
|
77
|
+
end
|
78
|
+
|
79
|
+
it "#try restores pos" do
|
80
|
+
z = ZScan.new "hello"
|
81
|
+
return1 = z.try do
|
82
|
+
z.scan 'h'
|
83
|
+
z.scan 'e'
|
84
|
+
end
|
85
|
+
assert_equal 'e', return1
|
86
|
+
assert_equal 2, z.pos
|
87
|
+
|
88
|
+
return2 = z.try do
|
89
|
+
z.scan 'l'
|
90
|
+
z.scan 'l'
|
91
|
+
z.scan 'p' # fails
|
92
|
+
end
|
93
|
+
assert_equal nil, return2
|
94
|
+
assert_equal 2, z.pos
|
95
|
+
end
|
67
96
|
end
|
data/zscan.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "1.0" # version mapped from zscan.rb, don't change here
|
3
|
+
s.version = "1.0.1" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
14
|
positioning
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- rakefile
|
22
22
|
- zscan.gemspec
|
23
23
|
- readme.md
|
24
|
+
- bench.rb
|
24
25
|
- ext/extconf.rb
|
25
26
|
- lib/zscan.rb
|
26
27
|
- spec/zscan_spec.rb
|