zscan 1.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bench.rb +23 -0
- data/ext/zscan.c +42 -4
- data/lib/zscan.rb +27 -9
- data/rakefile +1 -3
- data/readme.md +18 -7
- data/spec/zscan_spec.rb +29 -0
- data/zscan.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8baa1419230fe03d1ec41cfa65202dce10d0512c
|
4
|
+
data.tar.gz: 0cf1335b2d7221ca4c6329dd6e8e2b5b0f3ac59d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9ccf8455843673336365dd446f808cb52f1538a53c86cc69d102640e4a6afb40332e3c65f6f7a5a4178895a1095571e4328a501e64797a01a6af1cdc4256549
|
7
|
+
data.tar.gz: 8c3d109803e0e9d8e91b8787f98190fd7e400613772f57347ff2d0a99725202c8eb3444ba11efc7a606897d78cec5d9932638292c5ba2583fb697f9e7c8c2540
|
data/bench.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "strscan"
|
2
|
+
require_relative "lib/zscan"
|
3
|
+
require "benchmark"
|
4
|
+
|
5
|
+
s = "word\n"
|
6
|
+
s *= 3_000_000
|
7
|
+
|
8
|
+
puts "StringScanner:"
|
9
|
+
sc = StringScanner.new s
|
10
|
+
puts Benchmark.measure{
|
11
|
+
until sc.eos?
|
12
|
+
sc.scan(/\w+/) and (sc.pos += 1)
|
13
|
+
end
|
14
|
+
}
|
15
|
+
|
16
|
+
puts
|
17
|
+
puts "ZScan should be nearly as fast as StringScanner"
|
18
|
+
zc = ZScan.new s
|
19
|
+
puts Benchmark.measure{
|
20
|
+
until zc.eos?
|
21
|
+
zc.scan(/\w+/) and zc.advance(1)
|
22
|
+
end
|
23
|
+
}
|
data/ext/zscan.c
CHANGED
@@ -166,7 +166,7 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
166
166
|
return ULONG2NUM(RSTRING_LEN(pattern));
|
167
167
|
}
|
168
168
|
} else if (TYPE(pattern) == T_REGEXP) {
|
169
|
-
regex_t *re = rb_reg_prepare_re(pattern, p->s);
|
169
|
+
regex_t *re = rb_reg_prepare_re(pattern, p->s); // prepare with compatible encoding
|
170
170
|
int tmpreg = re != RREGEXP(pattern)->ptr;
|
171
171
|
if (!tmpreg) {
|
172
172
|
RREGEXP(pattern)->usecnt++;
|
@@ -177,9 +177,6 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
177
177
|
UChar* ptr_match_from = (UChar*)(ptr + p->bytepos);
|
178
178
|
long ret = onig_match(re, (UChar*)ptr, ptr_end, ptr_match_from, NULL, ONIG_OPTION_NONE);
|
179
179
|
|
180
|
-
if (!tmpreg) {
|
181
|
-
RREGEXP(pattern)->usecnt--;
|
182
|
-
}
|
183
180
|
if (tmpreg) {
|
184
181
|
if (RREGEXP(pattern)->usecnt) {
|
185
182
|
onig_free(re);
|
@@ -187,6 +184,8 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
187
184
|
onig_free(RREGEXP(pattern)->ptr);
|
188
185
|
RREGEXP(pattern)->ptr = re;
|
189
186
|
}
|
187
|
+
} else {
|
188
|
+
RREGEXP(pattern)->usecnt--;
|
190
189
|
}
|
191
190
|
|
192
191
|
if (ret == -2) {
|
@@ -201,6 +200,21 @@ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
|
201
200
|
return Qnil;
|
202
201
|
}
|
203
202
|
|
203
|
+
static VALUE zscan_scan(VALUE self, VALUE pattern) {
|
204
|
+
VALUE v_bytelen = zscan_match_bytesize(self, pattern);
|
205
|
+
if (v_bytelen == Qnil) {
|
206
|
+
return Qnil;
|
207
|
+
} else {
|
208
|
+
P;
|
209
|
+
long bytelen = NUM2LONG(v_bytelen);
|
210
|
+
volatile VALUE ret = rb_funcall(p->s, rb_intern("byteslice"), 2, LONG2NUM(p->bytepos), v_bytelen);
|
211
|
+
VALUE v_len = rb_str_length(ret);
|
212
|
+
p->bytepos += bytelen;
|
213
|
+
p->pos += NUM2LONG(v_len);
|
214
|
+
return ret;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
204
218
|
static VALUE zscan_push(VALUE self) {
|
205
219
|
P;
|
206
220
|
if (p->stack_i + 1 == p->stack_cap) {
|
@@ -242,6 +256,27 @@ static VALUE zscan_restore(VALUE self) {
|
|
242
256
|
return self;
|
243
257
|
}
|
244
258
|
|
259
|
+
static VALUE zscan_clear_pos_stack(VALUE self) {
|
260
|
+
P;
|
261
|
+
p->stack_i = 0;
|
262
|
+
return self;
|
263
|
+
}
|
264
|
+
|
265
|
+
static VALUE zscan_try(VALUE self) {
|
266
|
+
if (!rb_block_given_p()) {
|
267
|
+
rb_raise(rb_eRuntimeError, "need a block");
|
268
|
+
}
|
269
|
+
VALUE r;
|
270
|
+
zscan_push(self);
|
271
|
+
r = rb_yield(Qnil);
|
272
|
+
if (RTEST(r)) {
|
273
|
+
zscan_drop(self);
|
274
|
+
} else {
|
275
|
+
zscan_pop(self);
|
276
|
+
}
|
277
|
+
return r;
|
278
|
+
}
|
279
|
+
|
245
280
|
void Init_zscan() {
|
246
281
|
VALUE zscan = rb_define_class("ZScan", rb_cObject);
|
247
282
|
rb_define_alloc_func(zscan, zscan_alloc);
|
@@ -254,8 +289,11 @@ void Init_zscan() {
|
|
254
289
|
rb_define_method(zscan, "advance", zscan_advance, 1);
|
255
290
|
rb_define_method(zscan, "eos?", zscan_eos_p, 0);
|
256
291
|
rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
|
292
|
+
rb_define_method(zscan, "scan", zscan_scan, 1);
|
257
293
|
rb_define_method(zscan, "push", zscan_push, 0);
|
258
294
|
rb_define_method(zscan, "pop", zscan_pop, 0);
|
259
295
|
rb_define_method(zscan, "drop", zscan_drop, 0);
|
260
296
|
rb_define_method(zscan, "restore", zscan_restore, 0);
|
297
|
+
rb_define_method(zscan, "clear_pos_stack", zscan_clear_pos_stack, 0);
|
298
|
+
rb_define_method(zscan, "try", zscan_try, 0);
|
261
299
|
}
|
data/lib/zscan.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative "../ext/zscan"
|
2
2
|
|
3
3
|
class ZScan
|
4
|
-
VERSION = '1.0'
|
4
|
+
VERSION = '1.0.1'
|
5
5
|
|
6
6
|
def initialize s, dup=false
|
7
7
|
_internal_init dup ? s.dup : s
|
@@ -11,14 +11,6 @@ class ZScan
|
|
11
11
|
_internal_string.dup
|
12
12
|
end
|
13
13
|
|
14
|
-
def scan re_or_str
|
15
|
-
if sz = match_bytesize(re_or_str)
|
16
|
-
r = _internal_string.byteslice bytepos, sz
|
17
|
-
self.bytepos += sz
|
18
|
-
r
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
14
|
def skip re_or_str
|
23
15
|
if sz = match_bytesize(re_or_str)
|
24
16
|
self.bytepos += sz
|
@@ -42,5 +34,31 @@ class ZScan
|
|
42
34
|
self.pos = _internal_string.size
|
43
35
|
end
|
44
36
|
|
37
|
+
def << substring
|
38
|
+
_internal_string << substring
|
39
|
+
end
|
40
|
+
|
41
|
+
def []= range, substring
|
42
|
+
start = range.start
|
43
|
+
if start < 0
|
44
|
+
start = _internal_string.size + start
|
45
|
+
end
|
46
|
+
if start < pos
|
47
|
+
self.pos = start
|
48
|
+
end
|
49
|
+
_internal_string[range] = substring
|
50
|
+
end
|
51
|
+
|
52
|
+
def size
|
53
|
+
_internal_string.size
|
54
|
+
end
|
55
|
+
|
56
|
+
def bytesize
|
57
|
+
_internal_string.bytesize
|
58
|
+
end
|
59
|
+
|
45
60
|
private :_internal_init, :_internal_string
|
46
61
|
end
|
62
|
+
|
63
|
+
# rooooobust!
|
64
|
+
Zscan = ZScan
|
data/rakefile
CHANGED
data/readme.md
CHANGED
@@ -46,14 +46,14 @@ z.scan /^/ #=> nil
|
|
46
46
|
|
47
47
|
See also https://bugs.ruby-lang.org/issues/7092
|
48
48
|
|
49
|
-
##
|
49
|
+
## Essential methods
|
50
50
|
|
51
51
|
- `ZScan.new string, dup=false`
|
52
52
|
- `#scan regexp_or_string`
|
53
53
|
- `#skip regexp_or_string`
|
54
|
-
- `#match_bytesize regexp_or_string`
|
54
|
+
- `#match_bytesize regexp_or_string` return length of matched bytes or `nil`.
|
55
55
|
- `#eos?`
|
56
|
-
- `#string` note:
|
56
|
+
- `#string` note: return a dup. Don't worry the performance because it is a copy-on-write string.
|
57
57
|
- `#rest`
|
58
58
|
|
59
59
|
## Pos management
|
@@ -68,13 +68,23 @@ See also https://bugs.ruby-lang.org/issues/7092
|
|
68
68
|
|
69
69
|
## Efficient pos stack manipulation
|
70
70
|
|
71
|
-
- `#push`
|
72
|
-
- `#pop`
|
73
|
-
- `#drop`
|
74
|
-
- `#restore`
|
71
|
+
- `#push` push current pos into the stack.
|
72
|
+
- `#pop` set current pos to top of the stack, and pop it.
|
73
|
+
- `#drop` drop top of pos stack without changing current pos.
|
74
|
+
- `#restore` set current pos to top of the stack.
|
75
|
+
- `#clear_pos_stack` clear pos stack.
|
76
|
+
- `#try` try to do several scans in the given block, fall back to init pos if block returns `nil` or `false`. Returns block's return, can be nested.
|
77
|
+
|
78
|
+
## String delegates
|
79
|
+
|
80
|
+
- `#<< append_string`
|
81
|
+
- `#[]= range, replace_string` note: if `range` starts before pos, moves pos left, also clears the stack.
|
82
|
+
- `#size`
|
83
|
+
- `#bytesize`
|
75
84
|
|
76
85
|
## License
|
77
86
|
|
87
|
+
```
|
78
88
|
Copyright (C) 2013 by Zete Lui (BSD)
|
79
89
|
|
80
90
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
@@ -93,3 +103,4 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
93
103
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
94
104
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
95
105
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
106
|
+
```
|
data/spec/zscan_spec.rb
CHANGED
@@ -64,4 +64,33 @@ describe ZScan do
|
|
64
64
|
assert_equal '', z.scan(/(?<=a)/)
|
65
65
|
assert_equal nil, z.scan(/^/)
|
66
66
|
end
|
67
|
+
|
68
|
+
it "stack doesn't underflow" do
|
69
|
+
@z.push
|
70
|
+
@z.pop
|
71
|
+
@z.pop
|
72
|
+
@z.pos = 3
|
73
|
+
@z.push
|
74
|
+
@z.pos = 4
|
75
|
+
@z.pop
|
76
|
+
assert_equal 3, @z.pos
|
77
|
+
end
|
78
|
+
|
79
|
+
it "#try restores pos" do
|
80
|
+
z = ZScan.new "hello"
|
81
|
+
return1 = z.try do
|
82
|
+
z.scan 'h'
|
83
|
+
z.scan 'e'
|
84
|
+
end
|
85
|
+
assert_equal 'e', return1
|
86
|
+
assert_equal 2, z.pos
|
87
|
+
|
88
|
+
return2 = z.try do
|
89
|
+
z.scan 'l'
|
90
|
+
z.scan 'l'
|
91
|
+
z.scan 'p' # fails
|
92
|
+
end
|
93
|
+
assert_equal nil, return2
|
94
|
+
assert_equal 2, z.pos
|
95
|
+
end
|
67
96
|
end
|
data/zscan.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "1.0" # version mapped from zscan.rb, don't change here
|
3
|
+
s.version = "1.0.1" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
14
|
positioning
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- rakefile
|
22
22
|
- zscan.gemspec
|
23
23
|
- readme.md
|
24
|
+
- bench.rb
|
24
25
|
- ext/extconf.rb
|
25
26
|
- lib/zscan.rb
|
26
27
|
- spec/zscan_spec.rb
|