zscan 0.5 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/zscan.c +14 -14
- data/lib/zscan.rb +12 -3
- data/rakefile +34 -0
- data/readme.md +68 -29
- data/spec/zscan_spec.rb +67 -0
- data/zscan.gemspec +4 -3
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c343abbf9b043bcb7643ac193bb5f9a0fd76245e
|
4
|
+
data.tar.gz: fe57d1e6e352cad9d9ac2f8095afdbbd4affbebe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 687a22c21f5da837039b557365807a89b5afd33cea5301baa1d5321c3a5b324f63f7b96524166f388da1c296e2e775ce8007e97281ad084c1583aa20c247ff6f
|
7
|
+
data.tar.gz: d6fde3485b2bb8e4db9bb80513c370dec17e331c1e20c5eb087742d86fdb06c403136b98be3e14dc65747b897182f4eaa0132ebc454b427e1e5b1cc622a1c8e0
|
data/ext/zscan.c
CHANGED
@@ -43,8 +43,8 @@ static VALUE zscan_alloc(VALUE klass) {
|
|
43
43
|
ZScan* p = ALLOC(ZScan);
|
44
44
|
MEMZERO(p, ZScan, 1);
|
45
45
|
p->s = Qnil;
|
46
|
-
p->stack_cap =
|
47
|
-
p->stack = (Pos*)malloc(sizeof(Pos) *
|
46
|
+
p->stack_cap = 8;
|
47
|
+
p->stack = (Pos*)malloc(sizeof(Pos) * p->stack_cap);
|
48
48
|
return TypedData_Wrap_Struct(klass, &zscan_type, p);
|
49
49
|
}
|
50
50
|
|
@@ -158,10 +158,10 @@ static VALUE zscan_eos_p(VALUE self) {
|
|
158
158
|
}
|
159
159
|
|
160
160
|
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
|
161
|
-
static VALUE
|
161
|
+
static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
|
162
162
|
P;
|
163
163
|
if (TYPE(pattern) == T_STRING) {
|
164
|
-
volatile VALUE ss = rb_funcall(
|
164
|
+
volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, ULONG2NUM(p->bytepos), ULONG2NUM(RSTRING_LEN(p->s)));
|
165
165
|
if (RTEST(rb_funcall(ss, rb_intern("start_with?"), 1, pattern))) {
|
166
166
|
return ULONG2NUM(RSTRING_LEN(pattern));
|
167
167
|
}
|
@@ -201,10 +201,10 @@ static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
|
|
201
201
|
return Qnil;
|
202
202
|
}
|
203
203
|
|
204
|
-
static VALUE
|
204
|
+
static VALUE zscan_push(VALUE self) {
|
205
205
|
P;
|
206
206
|
if (p->stack_i + 1 == p->stack_cap) {
|
207
|
-
p->stack_cap
|
207
|
+
p->stack_cap = p->stack_cap * 1.4 + 3;
|
208
208
|
p->stack = (Pos*)realloc(p->stack, sizeof(Pos) * p->stack_cap);
|
209
209
|
}
|
210
210
|
Pos e = {p->pos, p->bytepos};
|
@@ -212,7 +212,7 @@ static VALUE zscan_push_pos(VALUE self) {
|
|
212
212
|
return self;
|
213
213
|
}
|
214
214
|
|
215
|
-
static VALUE
|
215
|
+
static VALUE zscan_pop(VALUE self) {
|
216
216
|
P;
|
217
217
|
if (p->stack_i) {
|
218
218
|
p->pos = p->stack[p->stack_i].pos;
|
@@ -225,7 +225,7 @@ static VALUE zscan_pop_pos(VALUE self) {
|
|
225
225
|
return self;
|
226
226
|
}
|
227
227
|
|
228
|
-
static VALUE
|
228
|
+
static VALUE zscan_drop(VALUE self) {
|
229
229
|
P;
|
230
230
|
if (p->stack_i) {
|
231
231
|
p->stack_i--;
|
@@ -233,7 +233,7 @@ static VALUE zscan_drop_top(VALUE self) {
|
|
233
233
|
return self;
|
234
234
|
}
|
235
235
|
|
236
|
-
static VALUE
|
236
|
+
static VALUE zscan_restore(VALUE self) {
|
237
237
|
P;
|
238
238
|
if (p->stack_i) {
|
239
239
|
p->pos = p->stack[p->stack_i].pos;
|
@@ -253,9 +253,9 @@ void Init_zscan() {
|
|
253
253
|
rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
|
254
254
|
rb_define_method(zscan, "advance", zscan_advance, 1);
|
255
255
|
rb_define_method(zscan, "eos?", zscan_eos_p, 0);
|
256
|
-
rb_define_method(zscan, "
|
257
|
-
rb_define_method(zscan, "
|
258
|
-
rb_define_method(zscan, "
|
259
|
-
rb_define_method(zscan, "
|
260
|
-
rb_define_method(zscan, "
|
256
|
+
rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
|
257
|
+
rb_define_method(zscan, "push", zscan_push, 0);
|
258
|
+
rb_define_method(zscan, "pop", zscan_pop, 0);
|
259
|
+
rb_define_method(zscan, "drop", zscan_drop, 0);
|
260
|
+
rb_define_method(zscan, "restore", zscan_restore, 0);
|
261
261
|
}
|
data/lib/zscan.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative "../ext/zscan"
|
2
2
|
|
3
3
|
class ZScan
|
4
|
-
VERSION = '0
|
4
|
+
VERSION = '1.0'
|
5
5
|
|
6
6
|
def initialize s, dup=false
|
7
7
|
_internal_init dup ? s.dup : s
|
@@ -12,7 +12,7 @@ class ZScan
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def scan re_or_str
|
15
|
-
if sz =
|
15
|
+
if sz = match_bytesize(re_or_str)
|
16
16
|
r = _internal_string.byteslice bytepos, sz
|
17
17
|
self.bytepos += sz
|
18
18
|
r
|
@@ -20,7 +20,7 @@ class ZScan
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def skip re_or_str
|
23
|
-
if sz =
|
23
|
+
if sz = match_bytesize(re_or_str)
|
24
24
|
self.bytepos += sz
|
25
25
|
end
|
26
26
|
end
|
@@ -33,5 +33,14 @@ class ZScan
|
|
33
33
|
_internal_string.byteslice bytepos, _internal_string.bytesize
|
34
34
|
end
|
35
35
|
|
36
|
+
def reset
|
37
|
+
self.pos = 0
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
def terminate
|
42
|
+
self.pos = _internal_string.size
|
43
|
+
end
|
44
|
+
|
36
45
|
private :_internal_init, :_internal_string
|
37
46
|
end
|
data/rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
Dir.chdir __dir__
|
2
|
+
version_re = /\d+(\.\d+)*/
|
3
|
+
version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
|
4
|
+
gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
|
5
|
+
gem_package = "zscan-#{version}.gem"
|
6
|
+
|
7
|
+
desc "build and test"
|
8
|
+
task :default => [:test, gem_package]
|
9
|
+
|
10
|
+
desc "build and run test"
|
11
|
+
task :test do
|
12
|
+
Dir.chdir "ext"
|
13
|
+
sh "make"
|
14
|
+
Dir.chdir ".."
|
15
|
+
sh "rspec"
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "pack gem"
|
19
|
+
file gem_package => gem_files do
|
20
|
+
sh "rm zscan-*.gem"
|
21
|
+
|
22
|
+
new_version = false
|
23
|
+
lines = File.readlines('zscan.gemspec')
|
24
|
+
lines.each do |line|
|
25
|
+
if line =~ /s\.version =/ and (line.sub! version_re, version)
|
26
|
+
new_version = true
|
27
|
+
break
|
28
|
+
end
|
29
|
+
end
|
30
|
+
if new_version
|
31
|
+
File.open('zscan.gemspec', 'w'){|f| f << lines.join }
|
32
|
+
end
|
33
|
+
sh "gem build zscan.gemspec"
|
34
|
+
end
|
data/readme.md
CHANGED
@@ -1,24 +1,30 @@
|
|
1
|
-
##
|
2
|
-
|
3
|
-
A simple string scanner. Provides... much less methods than `StringScanner`.
|
1
|
+
## Features
|
4
2
|
|
5
|
-
|
3
|
+
- `ZScan#scan`/`ZScan#skip`/`ZScan#match_bytesize` accept either string or regexp as param.
|
4
|
+
- `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
|
5
|
+
- Correctly scans anchors and look behind predicates.
|
6
|
+
- Pos stack manipulation.
|
6
7
|
|
7
|
-
|
8
|
+
## Install
|
8
9
|
|
9
|
-
|
10
|
+
```bash
|
11
|
+
gem ins zscan
|
12
|
+
```
|
10
13
|
|
11
|
-
|
14
|
+
## Typical use
|
12
15
|
|
13
|
-
```ruby
|
16
|
+
``` ruby
|
14
17
|
require 'zscan'
|
15
|
-
z = ZScan.new '
|
16
|
-
z.
|
17
|
-
z.
|
18
|
-
z.scan
|
18
|
+
z = ZScan.new 'hello world'
|
19
|
+
z.scan 'hello' #=> 'hello'
|
20
|
+
z.skip ' '
|
21
|
+
z.scan /\w+/ #=> 'world'
|
22
|
+
z.eos? #=> true
|
19
23
|
```
|
20
24
|
|
21
|
-
|
25
|
+
## Motivation
|
26
|
+
|
27
|
+
Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
|
22
28
|
|
23
29
|
```ruby
|
24
30
|
require 'strscan'
|
@@ -28,29 +34,62 @@ s.scan /(?<a)/ #=> nil
|
|
28
34
|
s.scan /^/ #=> ''
|
29
35
|
```
|
30
36
|
|
37
|
+
But for building parser generators, I need the scanner check the whole string for anchors and lookbehinds:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
require 'zscan'
|
41
|
+
z = ZScan.new 'ab'
|
42
|
+
z.pos = 1
|
43
|
+
z.scan /(?<a)/ #=> ''
|
44
|
+
z.scan /^/ #=> nil
|
45
|
+
```
|
46
|
+
|
31
47
|
See also https://bugs.ruby-lang.org/issues/7092
|
32
48
|
|
33
49
|
## Methods
|
34
50
|
|
35
51
|
- `ZScan.new string, dup=false`
|
36
|
-
-
|
37
|
-
-
|
38
|
-
-
|
39
|
-
-
|
40
|
-
-
|
41
|
-
-
|
52
|
+
- `#scan regexp_or_string`
|
53
|
+
- `#skip regexp_or_string`
|
54
|
+
- `#match_bytesize regexp_or_string` returns length of matched bytes or nil
|
55
|
+
- `#eos?`
|
56
|
+
- `#string` note: returns a COW dup
|
57
|
+
- `#rest`
|
42
58
|
|
43
|
-
##
|
59
|
+
## Pos management
|
44
60
|
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
-
|
61
|
+
- `#pos`
|
62
|
+
- `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
|
63
|
+
- `#bytepos`
|
64
|
+
- `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
|
65
|
+
- `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
|
66
|
+
- `#reset` go to beginning.
|
67
|
+
- `#terminate` go to end of string.
|
50
68
|
|
51
69
|
## Efficient pos stack manipulation
|
52
70
|
|
53
|
-
- `
|
54
|
-
- `
|
55
|
-
- `
|
56
|
-
- `
|
71
|
+
- `#push` pushes current pos into the stack.
|
72
|
+
- `#pop` sets current pos to top of the stack, and pops it.
|
73
|
+
- `#drop` drops top of pos stack without changing current pos.
|
74
|
+
- `#restore` sets current pos to top of the stack.
|
75
|
+
|
76
|
+
## License
|
77
|
+
|
78
|
+
Copyright (C) 2013 by Zete Lui (BSD)
|
79
|
+
|
80
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
81
|
+
this software and associated documentation files (the "Software"), to deal in
|
82
|
+
the Software without restriction, including without limitation the rights to
|
83
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
84
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
85
|
+
subject to the following conditions:
|
86
|
+
|
87
|
+
The above copyright notice and this permission notice shall be included in all
|
88
|
+
copies or substantial portions of the Software.
|
89
|
+
|
90
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
91
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
92
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
93
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
94
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
95
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
data/spec/zscan_spec.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative "../lib/zscan"
|
2
|
+
require 'rspec/autorun'
|
3
|
+
RSpec.configure do |config|
|
4
|
+
config.expect_with :stdlib
|
5
|
+
end
|
6
|
+
|
7
|
+
describe ZScan do
|
8
|
+
before :each do
|
9
|
+
@z = ZScan.new 'ab你好'
|
10
|
+
end
|
11
|
+
|
12
|
+
before :all do
|
13
|
+
GC.stress = true
|
14
|
+
end
|
15
|
+
|
16
|
+
it "random workflow" do
|
17
|
+
assert_equal 2, @z.match_bytesize('ab')
|
18
|
+
@z.pos = 4
|
19
|
+
assert_equal 8, @z.bytepos
|
20
|
+
@z.push
|
21
|
+
assert_equal nil, @z.scan(/ab你/)
|
22
|
+
@z.pos = 0
|
23
|
+
assert_equal 'ab你', @z.scan(/ab你/)
|
24
|
+
|
25
|
+
@z.restore
|
26
|
+
assert_equal 8, @z.bytepos
|
27
|
+
@z.pos = 3
|
28
|
+
@z.restore
|
29
|
+
assert_equal 8, @z.bytepos
|
30
|
+
end
|
31
|
+
|
32
|
+
it "scans from middle" do
|
33
|
+
@z.bytepos = 2
|
34
|
+
assert_equal '你', @z.scan('你')
|
35
|
+
assert_equal '好', @z.rest
|
36
|
+
end
|
37
|
+
|
38
|
+
it "won't overflow pos" do
|
39
|
+
@z.pos = 20
|
40
|
+
assert_equal 8, @z.bytepos
|
41
|
+
assert_equal 4, @z.pos
|
42
|
+
|
43
|
+
@z.skip('ab')
|
44
|
+
assert_equal 8, @z.bytepos
|
45
|
+
|
46
|
+
@z.pos = -1
|
47
|
+
assert_equal 0, @z.bytepos
|
48
|
+
assert_equal 0, @z.pos
|
49
|
+
|
50
|
+
@z.bytepos = 20
|
51
|
+
assert_equal 8, @z.bytepos
|
52
|
+
assert_equal 4, @z.pos
|
53
|
+
|
54
|
+
@z.bytepos = -1
|
55
|
+
assert_equal 0, @z.bytepos
|
56
|
+
assert_equal 0, @z.pos
|
57
|
+
end
|
58
|
+
|
59
|
+
it "recognizes anchors" do
|
60
|
+
z = ZScan.new "a x:b+ $ \\k<x>"
|
61
|
+
z.pos = 1
|
62
|
+
assert_equal ' ', z.scan(/\s*(\#.*$\s*)*/)
|
63
|
+
z.pos = 1
|
64
|
+
assert_equal '', z.scan(/(?<=a)/)
|
65
|
+
assert_equal nil, z.scan(/^/)
|
66
|
+
end
|
67
|
+
end
|
data/zscan.gemspec
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "zscan"
|
3
|
-
s.version = "0.
|
3
|
+
s.version = "1.0" # version mapped from zscan.rb, don't change here
|
4
4
|
s.author = "Zete Lui"
|
5
5
|
s.homepage = "https://github.com/luikore/zscan"
|
6
6
|
s.platform = Gem::Platform::RUBY
|
7
7
|
s.summary = "improved string scanner"
|
8
|
-
s.description = "improved string scanner"
|
8
|
+
s.description = "improved string scanner, respects anchors and lookbehinds, supports codepoint positioning"
|
9
9
|
s.required_ruby_version = ">=1.9.2"
|
10
|
+
s.licenses = ['BSD']
|
10
11
|
|
11
|
-
s.files =
|
12
|
+
s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
|
12
13
|
s.require_paths = ["lib"]
|
13
14
|
s.extensions = ["ext/extconf.rb"]
|
14
15
|
s.rubygems_version = '1.8.24'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0
|
4
|
+
version: '1.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
@@ -10,20 +10,24 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2013-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: improved string scanner
|
13
|
+
description: improved string scanner, respects anchors and lookbehinds, supports codepoint
|
14
|
+
positioning
|
14
15
|
email:
|
15
16
|
executables: []
|
16
17
|
extensions:
|
17
18
|
- ext/extconf.rb
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
21
|
+
- rakefile
|
22
|
+
- zscan.gemspec
|
20
23
|
- readme.md
|
24
|
+
- ext/extconf.rb
|
21
25
|
- lib/zscan.rb
|
26
|
+
- spec/zscan_spec.rb
|
22
27
|
- ext/zscan.c
|
23
|
-
- ext/extconf.rb
|
24
|
-
- zscan.gemspec
|
25
28
|
homepage: https://github.com/luikore/zscan
|
26
|
-
licenses:
|
29
|
+
licenses:
|
30
|
+
- BSD
|
27
31
|
metadata: {}
|
28
32
|
post_install_message:
|
29
33
|
rdoc_options: []
|