zscan 0.5 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/zscan.c +14 -14
- data/lib/zscan.rb +12 -3
- data/rakefile +34 -0
- data/readme.md +68 -29
- data/spec/zscan_spec.rb +67 -0
- data/zscan.gemspec +4 -3
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c343abbf9b043bcb7643ac193bb5f9a0fd76245e
|
4
|
+
data.tar.gz: fe57d1e6e352cad9d9ac2f8095afdbbd4affbebe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 687a22c21f5da837039b557365807a89b5afd33cea5301baa1d5321c3a5b324f63f7b96524166f388da1c296e2e775ce8007e97281ad084c1583aa20c247ff6f
|
7
|
+
data.tar.gz: d6fde3485b2bb8e4db9bb80513c370dec17e331c1e20c5eb087742d86fdb06c403136b98be3e14dc65747b897182f4eaa0132ebc454b427e1e5b1cc622a1c8e0
|
data/ext/zscan.c
CHANGED
@@ -43,8 +43,8 @@ static VALUE zscan_alloc(VALUE klass) {
|
|
43
43
|
ZScan* p = ALLOC(ZScan);
|
44
44
|
MEMZERO(p, ZScan, 1);
|
45
45
|
p->s = Qnil;
|
46
|
-
p->stack_cap =
|
47
|
-
p->stack = (Pos*)malloc(sizeof(Pos) *
|
46
|
+
p->stack_cap = 8;
|
47
|
+
p->stack = (Pos*)malloc(sizeof(Pos) * p->stack_cap);
|
48
48
|
return TypedData_Wrap_Struct(klass, &zscan_type, p);
|
49
49
|
}
|
50
50
|
|
@@ -158,10 +158,10 @@ static VALUE zscan_eos_p(VALUE self) {
|
|
158
158
|
}
|
159
159
|
|
160
160
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
161
|
-
static VALUE
|
161
|
+
static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
162
162
|
P;
|
163
163
|
if (TYPE(pattern) == T_STRING) {
|
164
|
-
volatile VALUE ss = rb_funcall(
|
164
|
+
volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, ULONG2NUM(p->bytepos), ULONG2NUM(RSTRING_LEN(p->s)));
|
165
165
|
if (RTEST(rb_funcall(ss, rb_intern("start_with?"), 1, pattern))) {
|
166
166
|
return ULONG2NUM(RSTRING_LEN(pattern));
|
167
167
|
}
|
@@ -201,10 +201,10 @@ static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
|
|
201
201
|
return Qnil;
|
202
202
|
}
|
203
203
|
|
204
|
-
static VALUE
|
204
|
+
static VALUE zscan_push(VALUE self) {
|
205
205
|
P;
|
206
206
|
if (p->stack_i + 1 == p->stack_cap) {
|
207
|
-
p->stack_cap
|
207
|
+
p->stack_cap = p->stack_cap * 1.4 + 3;
|
208
208
|
p->stack = (Pos*)realloc(p->stack, sizeof(Pos) * p->stack_cap);
|
209
209
|
}
|
210
210
|
Pos e = {p->pos, p->bytepos};
|
@@ -212,7 +212,7 @@ static VALUE zscan_push_pos(VALUE self) {
|
|
212
212
|
return self;
|
213
213
|
}
|
214
214
|
|
215
|
-
static VALUE
|
215
|
+
static VALUE zscan_pop(VALUE self) {
|
216
216
|
P;
|
217
217
|
if (p->stack_i) {
|
218
218
|
p->pos = p->stack[p->stack_i].pos;
|
@@ -225,7 +225,7 @@ static VALUE zscan_pop_pos(VALUE self) {
|
|
225
225
|
return self;
|
226
226
|
}
|
227
227
|
|
228
|
-
static VALUE
|
228
|
+
static VALUE zscan_drop(VALUE self) {
|
229
229
|
P;
|
230
230
|
if (p->stack_i) {
|
231
231
|
p->stack_i--;
|
@@ -233,7 +233,7 @@ static VALUE zscan_drop_top(VALUE self) {
|
|
233
233
|
return self;
|
234
234
|
}
|
235
235
|
|
236
|
-
static VALUE
|
236
|
+
static VALUE zscan_restore(VALUE self) {
|
237
237
|
P;
|
238
238
|
if (p->stack_i) {
|
239
239
|
p->pos = p->stack[p->stack_i].pos;
|
@@ -253,9 +253,9 @@ void Init_zscan() {
|
|
253
253
|
rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
|
254
254
|
rb_define_method(zscan, "advance", zscan_advance, 1);
|
255
255
|
rb_define_method(zscan, "eos?", zscan_eos_p, 0);
|
256
|
-
rb_define_method(zscan, "
|
257
|
-
rb_define_method(zscan, "
|
258
|
-
rb_define_method(zscan, "
|
259
|
-
rb_define_method(zscan, "
|
260
|
-
rb_define_method(zscan, "
|
256
|
+
rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
|
257
|
+
rb_define_method(zscan, "push", zscan_push, 0);
|
258
|
+
rb_define_method(zscan, "pop", zscan_pop, 0);
|
259
|
+
rb_define_method(zscan, "drop", zscan_drop, 0);
|
260
|
+
rb_define_method(zscan, "restore", zscan_restore, 0);
|
261
261
|
}
|
data/lib/zscan.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative "../ext/zscan"
|
2
2
|
|
3
3
|
class ZScan
|
4
|
-
VERSION = '0
|
4
|
+
VERSION = '1.0'
|
5
5
|
|
6
6
|
def initialize s, dup=false
|
7
7
|
_internal_init dup ? s.dup : s
|
@@ -12,7 +12,7 @@ class ZScan
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def scan re_or_str
|
15
|
-
if sz =
|
15
|
+
if sz = match_bytesize(re_or_str)
|
16
16
|
r = _internal_string.byteslice bytepos, sz
|
17
17
|
self.bytepos += sz
|
18
18
|
r
|
@@ -20,7 +20,7 @@ class ZScan
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def skip re_or_str
|
23
|
-
if sz =
|
23
|
+
if sz = match_bytesize(re_or_str)
|
24
24
|
self.bytepos += sz
|
25
25
|
end
|
26
26
|
end
|
@@ -33,5 +33,14 @@ class ZScan
|
|
33
33
|
_internal_string.byteslice bytepos, _internal_string.bytesize
|
34
34
|
end
|
35
35
|
|
36
|
+
def reset
|
37
|
+
self.pos = 0
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
def terminate
|
42
|
+
self.pos = _internal_string.size
|
43
|
+
end
|
44
|
+
|
36
45
|
private :_internal_init, :_internal_string
|
37
46
|
end
|
data/rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
Dir.chdir __dir__
|
2
|
+
version_re = /\d+(\.\d+)*/
|
3
|
+
version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
|
4
|
+
gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
|
5
|
+
gem_package = "zscan-#{version}.gem"
|
6
|
+
|
7
|
+
desc "build and test"
|
8
|
+
task :default => [:test, gem_package]
|
9
|
+
|
10
|
+
desc "build and run test"
|
11
|
+
task :test do
|
12
|
+
Dir.chdir "ext"
|
13
|
+
sh "make"
|
14
|
+
Dir.chdir ".."
|
15
|
+
sh "rspec"
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "pack gem"
|
19
|
+
file gem_package => gem_files do
|
20
|
+
sh "rm zscan-*.gem"
|
21
|
+
|
22
|
+
new_version = false
|
23
|
+
lines = File.readlines('zscan.gemspec')
|
24
|
+
lines.each do |line|
|
25
|
+
if line =~ /s\.version =/ and (line.sub! version_re, version)
|
26
|
+
new_version = true
|
27
|
+
break
|
28
|
+
end
|
29
|
+
end
|
30
|
+
if new_version
|
31
|
+
File.open('zscan.gemspec', 'w'){|f| f << lines.join }
|
32
|
+
end
|
33
|
+
sh "gem build zscan.gemspec"
|
34
|
+
end
|
data/readme.md
CHANGED
@@ -1,24 +1,30 @@
|
|
1
|
-
##
|
2
|
-
|
3
|
-
A simple string scanner. Provides... much less methods than `StringScanner`.
|
1
|
+
## Features
|
4
2
|
|
5
|
-
|
3
|
+
- `ZScan#scan`/`ZScan#skip`/`ZScan#match_bytesize` accept either string or regexp as param.
|
4
|
+
- `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
|
5
|
+
- Correctly scans anchors and look behind predicates.
|
6
|
+
- Pos stack manipulation.
|
6
7
|
|
7
|
-
|
8
|
+
## Install
|
8
9
|
|
9
|
-
|
10
|
+
```bash
|
11
|
+
gem ins zscan
|
12
|
+
```
|
10
13
|
|
11
|
-
|
14
|
+
## Typical use
|
12
15
|
|
13
|
-
```ruby
|
16
|
+
``` ruby
|
14
17
|
require 'zscan'
|
15
|
-
z = ZScan.new '
|
16
|
-
z.
|
17
|
-
z.
|
18
|
-
z.scan
|
18
|
+
z = ZScan.new 'hello world'
|
19
|
+
z.scan 'hello' #=> 'hello'
|
20
|
+
z.skip ' '
|
21
|
+
z.scan /\w+/ #=> 'world'
|
22
|
+
z.eos? #=> true
|
19
23
|
```
|
20
24
|
|
21
|
-
|
25
|
+
## Motivation
|
26
|
+
|
27
|
+
Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
|
22
28
|
|
23
29
|
```ruby
|
24
30
|
require 'strscan'
|
@@ -28,29 +34,62 @@ s.scan /(?<a)/ #=> nil
|
|
28
34
|
s.scan /^/ #=> ''
|
29
35
|
```
|
30
36
|
|
37
|
+
But for building parser generators, I need the scanner check the whole string for anchors and lookbehinds:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
require 'zscan'
|
41
|
+
z = ZScan.new 'ab'
|
42
|
+
z.pos = 1
|
43
|
+
z.scan /(?<a)/ #=> ''
|
44
|
+
z.scan /^/ #=> nil
|
45
|
+
```
|
46
|
+
|
31
47
|
See also https://bugs.ruby-lang.org/issues/7092
|
32
48
|
|
33
49
|
## Methods
|
34
50
|
|
35
51
|
- `ZScan.new string, dup=false`
|
36
|
-
-
|
37
|
-
-
|
38
|
-
-
|
39
|
-
-
|
40
|
-
-
|
41
|
-
-
|
52
|
+
- `#scan regexp_or_string`
|
53
|
+
- `#skip regexp_or_string`
|
54
|
+
- `#match_bytesize regexp_or_string` returns length of matched bytes or nil
|
55
|
+
- `#eos?`
|
56
|
+
- `#string` note: returns a COW dup
|
57
|
+
- `#rest`
|
42
58
|
|
43
|
-
##
|
59
|
+
## Pos management
|
44
60
|
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
-
|
61
|
+
- `#pos`
|
62
|
+
- `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
|
63
|
+
- `#bytepos`
|
64
|
+
- `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
|
65
|
+
- `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
|
66
|
+
- `#reset` go to beginning.
|
67
|
+
- `#terminate` go to end of string.
|
50
68
|
|
51
69
|
## Efficient pos stack manipulation
|
52
70
|
|
53
|
-
- `
|
54
|
-
- `
|
55
|
-
- `
|
56
|
-
- `
|
71
|
+
- `#push` pushes current pos into the stack.
|
72
|
+
- `#pop` sets current pos to top of the stack, and pops it.
|
73
|
+
- `#drop` drops top of pos stack without changing current pos.
|
74
|
+
- `#restore` sets current pos to top of the stack.
|
75
|
+
|
76
|
+
## License
|
77
|
+
|
78
|
+
Copyright (C) 2013 by Zete Lui (BSD)
|
79
|
+
|
80
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
81
|
+
this software and associated documentation files (the "Software"), to deal in
|
82
|
+
the Software without restriction, including without limitation the rights to
|
83
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
84
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
85
|
+
subject to the following conditions:
|
86
|
+
|
87
|
+
The above copyright notice and this permission notice shall be included in all
|
88
|
+
copies or substantial portions of the Software.
|
89
|
+
|
90
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
91
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
92
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
93
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
94
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
95
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
data/spec/zscan_spec.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative "../lib/zscan"
|
2
|
+
require 'rspec/autorun'
|
3
|
+
RSpec.configure do |config|
|
4
|
+
config.expect_with :stdlib
|
5
|
+
end
|
6
|
+
|
7
|
+
describe ZScan do
|
8
|
+
before :each do
|
9
|
+
@z = ZScan.new 'ab你好'
|
10
|
+
end
|
11
|
+
|
12
|
+
before :all do
|
13
|
+
GC.stress = true
|
14
|
+
end
|
15
|
+
|
16
|
+
it "random workflow" do
|
17
|
+
assert_equal 2, @z.match_bytesize('ab')
|
18
|
+
@z.pos = 4
|
19
|
+
assert_equal 8, @z.bytepos
|
20
|
+
@z.push
|
21
|
+
assert_equal nil, @z.scan(/ab你/)
|
22
|
+
@z.pos = 0
|
23
|
+
assert_equal 'ab你', @z.scan(/ab你/)
|
24
|
+
|
25
|
+
@z.restore
|
26
|
+
assert_equal 8, @z.bytepos
|
27
|
+
@z.pos = 3
|
28
|
+
@z.restore
|
29
|
+
assert_equal 8, @z.bytepos
|
30
|
+
end
|
31
|
+
|
32
|
+
it "scans from middle" do
|
33
|
+
@z.bytepos = 2
|
34
|
+
assert_equal '你', @z.scan('你')
|
35
|
+
assert_equal '好', @z.rest
|
36
|
+
end
|
37
|
+
|
38
|
+
it "won't overflow pos" do
|
39
|
+
@z.pos = 20
|
40
|
+
assert_equal 8, @z.bytepos
|
41
|
+
assert_equal 4, @z.pos
|
42
|
+
|
43
|
+
@z.skip('ab')
|
44
|
+
assert_equal 8, @z.bytepos
|
45
|
+
|
46
|
+
@z.pos = -1
|
47
|
+
assert_equal 0, @z.bytepos
|
48
|
+
assert_equal 0, @z.pos
|
49
|
+
|
50
|
+
@z.bytepos = 20
|
51
|
+
assert_equal 8, @z.bytepos
|
52
|
+
assert_equal 4, @z.pos
|
53
|
+
|
54
|
+
@z.bytepos = -1
|
55
|
+
assert_equal 0, @z.bytepos
|
56
|
+
assert_equal 0, @z.pos
|
57
|
+
end
|
58
|
+
|
59
|
+
it "recognizes anchors" do
|
60
|
+
z = ZScan.new "a x:b+ $ \\k<x>"
|
61
|
+
z.pos = 1
|
62
|
+
assert_equal ' ', z.scan(/\s*(\#.*$\s*)*/)
|
63
|
+
z.pos = 1
|
64
|
+
assert_equal '', z.scan(/(?<=a)/)
|
65
|
+
assert_equal nil, z.scan(/^/)
|
66
|
+
end
|
67
|
+
end
|
data/zscan.gemspec
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "0.
|
3
|
+
s.version = "1.0" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
7
7
|
s.summary = "improved string scanner"
|
8
|
-
s.description = "improved string scanner"
|
8
|
+
s.description = "improved string scanner, respects anchors and lookbehinds, supports codepoint positioning"
|
9
9
|
s.required_ruby_version = ">=1.9.2"
|
10
|
+
s.licenses = ['BSD']
|
10
11
|
|
11
|
-
s.files =
|
12
|
+
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
|
12
13
|
s.require_paths = ["lib"]
|
13
14
|
s.extensions = ["ext/extconf.rb"]
|
14
15
|
s.rubygems_version = '1.8.24'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0
|
4
|
+
version: '1.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
@@ -10,20 +10,24 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2013-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: improved string scanner
|
13
|
+
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
|
+
positioning
|
14
15
|
email:
|
15
16
|
executables: []
|
16
17
|
extensions:
|
17
18
|
- ext/extconf.rb
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
21
|
+
- rakefile
|
22
|
+
- zscan.gemspec
|
20
23
|
- readme.md
|
24
|
+
- ext/extconf.rb
|
21
25
|
- lib/zscan.rb
|
26
|
+
- spec/zscan_spec.rb
|
22
27
|
- ext/zscan.c
|
23
|
-
- ext/extconf.rb
|
24
|
-
- zscan.gemspec
|
25
28
|
homepage: https://github.com/luikore/zscan
|
26
|
-
licenses:
|
29
|
+
licenses:
|
30
|
+
- BSD
|
27
31
|
metadata: {}
|
28
32
|
post_install_message:
|
29
33
|
rdoc_options: []
|