zscan 0.5 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 743c9b854996ff1714f83f805a8d975b261993b5
4
- data.tar.gz: 65254b0e114cd41b727e5481da48704b28ee2bf1
3
+ metadata.gz: c343abbf9b043bcb7643ac193bb5f9a0fd76245e
4
+ data.tar.gz: fe57d1e6e352cad9d9ac2f8095afdbbd4affbebe
5
5
  SHA512:
6
- metadata.gz: 028d305bcee7917a29caadbe8a4ff1254e682de582c5316ba0267d95ab0dd5fe01a47198fc5fda3aa5ba0e6c66ccd813d17fa464986c25bf528c3fc460a1e55d
7
- data.tar.gz: eb2f9bff39078f3a835868147f3e36e388544ea6a939a053b0db3298cfef8d56945b06a78fba102b9b04345fef5ac5464e70d16f2999a25cd72f16c6489268ad
6
+ metadata.gz: 687a22c21f5da837039b557365807a89b5afd33cea5301baa1d5321c3a5b324f63f7b96524166f388da1c296e2e775ce8007e97281ad084c1583aa20c247ff6f
7
+ data.tar.gz: d6fde3485b2bb8e4db9bb80513c370dec17e331c1e20c5eb087742d86fdb06c403136b98be3e14dc65747b897182f4eaa0132ebc454b427e1e5b1cc622a1c8e0
data/ext/zscan.c CHANGED
@@ -43,8 +43,8 @@ static VALUE zscan_alloc(VALUE klass) {
43
43
  ZScan* p = ALLOC(ZScan);
44
44
  MEMZERO(p, ZScan, 1);
45
45
  p->s = Qnil;
46
- p->stack_cap = 5;
47
- p->stack = (Pos*)malloc(sizeof(Pos) * 5);
46
+ p->stack_cap = 8;
47
+ p->stack = (Pos*)malloc(sizeof(Pos) * p->stack_cap);
48
48
  return TypedData_Wrap_Struct(klass, &zscan_type, p);
49
49
  }
50
50
 
@@ -158,10 +158,10 @@ static VALUE zscan_eos_p(VALUE self) {
158
158
  }
159
159
 
160
160
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
161
- static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
161
+ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
162
162
  P;
163
163
  if (TYPE(pattern) == T_STRING) {
164
- volatile VALUE ss = rb_funcall(self, rb_intern("rest"), 0);
164
+ volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, ULONG2NUM(p->bytepos), ULONG2NUM(RSTRING_LEN(p->s)));
165
165
  if (RTEST(rb_funcall(ss, rb_intern("start_with?"), 1, pattern))) {
166
166
  return ULONG2NUM(RSTRING_LEN(pattern));
167
167
  }
@@ -201,10 +201,10 @@ static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
201
201
  return Qnil;
202
202
  }
203
203
 
204
- static VALUE zscan_push_pos(VALUE self) {
204
+ static VALUE zscan_push(VALUE self) {
205
205
  P;
206
206
  if (p->stack_i + 1 == p->stack_cap) {
207
- p->stack_cap *= 2;
207
+ p->stack_cap = p->stack_cap * 1.4 + 3;
208
208
  p->stack = (Pos*)realloc(p->stack, sizeof(Pos) * p->stack_cap);
209
209
  }
210
210
  Pos e = {p->pos, p->bytepos};
@@ -212,7 +212,7 @@ static VALUE zscan_push_pos(VALUE self) {
212
212
  return self;
213
213
  }
214
214
 
215
- static VALUE zscan_pop_pos(VALUE self) {
215
+ static VALUE zscan_pop(VALUE self) {
216
216
  P;
217
217
  if (p->stack_i) {
218
218
  p->pos = p->stack[p->stack_i].pos;
@@ -225,7 +225,7 @@ static VALUE zscan_pop_pos(VALUE self) {
225
225
  return self;
226
226
  }
227
227
 
228
- static VALUE zscan_drop_top(VALUE self) {
228
+ static VALUE zscan_drop(VALUE self) {
229
229
  P;
230
230
  if (p->stack_i) {
231
231
  p->stack_i--;
@@ -233,7 +233,7 @@ static VALUE zscan_drop_top(VALUE self) {
233
233
  return self;
234
234
  }
235
235
 
236
- static VALUE zscan_resume_top(VALUE self) {
236
+ static VALUE zscan_restore(VALUE self) {
237
237
  P;
238
238
  if (p->stack_i) {
239
239
  p->pos = p->stack[p->stack_i].pos;
@@ -253,9 +253,9 @@ void Init_zscan() {
253
253
  rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
254
254
  rb_define_method(zscan, "advance", zscan_advance, 1);
255
255
  rb_define_method(zscan, "eos?", zscan_eos_p, 0);
256
- rb_define_method(zscan, "bmatch?", zscan_bmatch_p, 1);
257
- rb_define_method(zscan, "push_pos", zscan_push_pos, 0);
258
- rb_define_method(zscan, "pop_pos", zscan_pop_pos, 0);
259
- rb_define_method(zscan, "drop_top", zscan_drop_top, 0);
260
- rb_define_method(zscan, "resume_top", zscan_resume_top, 0);
256
+ rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
257
+ rb_define_method(zscan, "push", zscan_push, 0);
258
+ rb_define_method(zscan, "pop", zscan_pop, 0);
259
+ rb_define_method(zscan, "drop", zscan_drop, 0);
260
+ rb_define_method(zscan, "restore", zscan_restore, 0);
261
261
  }
data/lib/zscan.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require_relative "../ext/zscan"
2
2
 
3
3
  class ZScan
4
- VERSION = '0.5'
4
+ VERSION = '1.0'
5
5
 
6
6
  def initialize s, dup=false
7
7
  _internal_init dup ? s.dup : s
@@ -12,7 +12,7 @@ class ZScan
12
12
  end
13
13
 
14
14
  def scan re_or_str
15
- if sz = bmatch?(re_or_str)
15
+ if sz = match_bytesize(re_or_str)
16
16
  r = _internal_string.byteslice bytepos, sz
17
17
  self.bytepos += sz
18
18
  r
@@ -20,7 +20,7 @@ class ZScan
20
20
  end
21
21
 
22
22
  def skip re_or_str
23
- if sz = bmatch?(re_or_str)
23
+ if sz = match_bytesize(re_or_str)
24
24
  self.bytepos += sz
25
25
  end
26
26
  end
@@ -33,5 +33,14 @@ class ZScan
33
33
  _internal_string.byteslice bytepos, _internal_string.bytesize
34
34
  end
35
35
 
36
+ def reset
37
+ self.pos = 0
38
+ self
39
+ end
40
+
41
+ def terminate
42
+ self.pos = _internal_string.size
43
+ end
44
+
36
45
  private :_internal_init, :_internal_string
37
46
  end
data/rakefile ADDED
@@ -0,0 +1,34 @@
1
+ Dir.chdir __dir__
2
+ version_re = /\d+(\.\d+)*/
3
+ version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
4
+ gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
5
+ gem_package = "zscan-#{version}.gem"
6
+
7
+ desc "build and test"
8
+ task :default => [:test, gem_package]
9
+
10
+ desc "build and run test"
11
+ task :test do
12
+ Dir.chdir "ext"
13
+ sh "make"
14
+ Dir.chdir ".."
15
+ sh "rspec"
16
+ end
17
+
18
+ desc "pack gem"
19
+ file gem_package => gem_files do
20
+ sh "rm zscan-*.gem"
21
+
22
+ new_version = false
23
+ lines = File.readlines('zscan.gemspec')
24
+ lines.each do |line|
25
+ if line =~ /s\.version =/ and (line.sub! version_re, version)
26
+ new_version = true
27
+ break
28
+ end
29
+ end
30
+ if new_version
31
+ File.open('zscan.gemspec', 'w'){|f| f << lines.join }
32
+ end
33
+ sh "gem build zscan.gemspec"
34
+ end
data/readme.md CHANGED
@@ -1,24 +1,30 @@
1
- ## Motivation
2
-
3
- A simple string scanner. Provides... much less methods than `StringScanner`.
1
+ ## Features
4
2
 
5
- It supports either string or regexp as scan param.
3
+ - `ZScan#scan`/`ZScan#skip`/`ZScan#match_bytesize` accept either string or regexp as param.
4
+ - `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
5
+ - Correctly scans anchors and look behind predicates.
6
+ - Pos stack manipulation.
6
7
 
7
- `pos` is by codepoints instead of bytes, use `bytepos` to locate byte position.
8
+ ## Install
8
9
 
9
- It provides a position stack for you to efficiently manage scanning locations.
10
+ ```bash
11
+ gem ins zscan
12
+ ```
10
13
 
11
- It correctly scans anchors. The following codes demonstrate the behavior:
14
+ ## Typical use
12
15
 
13
- ```ruby
16
+ ``` ruby
14
17
  require 'zscan'
15
- z = ZScan.new 'ab'
16
- z.pos = 1
17
- z.scan /(?<a)/ #=> ''
18
- z.scan /^/ #=> nil
18
+ z = ZScan.new 'hello world'
19
+ z.scan 'hello' #=> 'hello'
20
+ z.skip ' '
21
+ z.scan /\w+/ #=> 'world'
22
+ z.eos? #=> true
19
23
  ```
20
24
 
21
- While with `StringScanner`:
25
+ ## Motivation
26
+
27
+ Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
22
28
 
23
29
  ```ruby
24
30
  require 'strscan'
@@ -28,29 +34,62 @@ s.scan /(?<a)/ #=> nil
28
34
  s.scan /^/ #=> ''
29
35
  ```
30
36
 
37
+ But for building parser generators, I need the scanner check the whole string for anchors and lookbehinds:
38
+
39
+ ```ruby
40
+ require 'zscan'
41
+ z = ZScan.new 'ab'
42
+ z.pos = 1
43
+ z.scan /(?<a)/ #=> ''
44
+ z.scan /^/ #=> nil
45
+ ```
46
+
31
47
  See also https://bugs.ruby-lang.org/issues/7092
32
48
 
33
49
  ## Methods
34
50
 
35
51
  - `ZScan.new string, dup=false`
36
- - `scan regexp_or_string`
37
- - `skip regexp_or_string`
38
- - `bmatch? regexp_or_string` returns length of matched bytes or nil
39
- - `eos?`
40
- - `string` note: returns a COW dup
41
- - `rest`
52
+ - `#scan regexp_or_string`
53
+ - `#skip regexp_or_string`
54
+ - `#match_bytesize regexp_or_string` returns length of matched bytes or nil
55
+ - `#eos?`
56
+ - `#string` note: returns a COW dup
57
+ - `#rest`
42
58
 
43
- ## Position management
59
+ ## Pos management
44
60
 
45
- - `pos`
46
- - `pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
47
- - `bytepos`
48
- - `bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
49
- - `advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
61
+ - `#pos`
62
+ - `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
63
+ - `#bytepos`
64
+ - `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
65
+ - `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
66
+ - `#reset` go to beginning.
67
+ - `#terminate` go to end of string.
50
68
 
51
69
  ## Efficient pos stack manipulation
52
70
 
53
- - `push_pos` pushes current pos into the stack.
54
- - `pop_pos` sets current pos to top of the stack, and pops it.
55
- - `drop_top` drops top of pos stack without changing current pos.
56
- - `resume_top` sets current pos to top of the stack.
71
+ - `#push` pushes current pos into the stack.
72
+ - `#pop` sets current pos to top of the stack, and pops it.
73
+ - `#drop` drops top of pos stack without changing current pos.
74
+ - `#restore` sets current pos to top of the stack.
75
+
76
+ ## License
77
+
78
+ Copyright (C) 2013 by Zete Lui (BSD)
79
+
80
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
81
+ this software and associated documentation files (the "Software"), to deal in
82
+ the Software without restriction, including without limitation the rights to
83
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
84
+ the Software, and to permit persons to whom the Software is furnished to do so,
85
+ subject to the following conditions:
86
+
87
+ The above copyright notice and this permission notice shall be included in all
88
+ copies or substantial portions of the Software.
89
+
90
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
91
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
92
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
93
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
94
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
95
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
@@ -0,0 +1,67 @@
1
+ require_relative "../lib/zscan"
2
+ require 'rspec/autorun'
3
+ RSpec.configure do |config|
4
+ config.expect_with :stdlib
5
+ end
6
+
7
+ describe ZScan do
8
+ before :each do
9
+ @z = ZScan.new 'ab你好'
10
+ end
11
+
12
+ before :all do
13
+ GC.stress = true
14
+ end
15
+
16
+ it "random workflow" do
17
+ assert_equal 2, @z.match_bytesize('ab')
18
+ @z.pos = 4
19
+ assert_equal 8, @z.bytepos
20
+ @z.push
21
+ assert_equal nil, @z.scan(/ab你/)
22
+ @z.pos = 0
23
+ assert_equal 'ab你', @z.scan(/ab你/)
24
+
25
+ @z.restore
26
+ assert_equal 8, @z.bytepos
27
+ @z.pos = 3
28
+ @z.restore
29
+ assert_equal 8, @z.bytepos
30
+ end
31
+
32
+ it "scans from middle" do
33
+ @z.bytepos = 2
34
+ assert_equal '你', @z.scan('你')
35
+ assert_equal '好', @z.rest
36
+ end
37
+
38
+ it "won't overflow pos" do
39
+ @z.pos = 20
40
+ assert_equal 8, @z.bytepos
41
+ assert_equal 4, @z.pos
42
+
43
+ @z.skip('ab')
44
+ assert_equal 8, @z.bytepos
45
+
46
+ @z.pos = -1
47
+ assert_equal 0, @z.bytepos
48
+ assert_equal 0, @z.pos
49
+
50
+ @z.bytepos = 20
51
+ assert_equal 8, @z.bytepos
52
+ assert_equal 4, @z.pos
53
+
54
+ @z.bytepos = -1
55
+ assert_equal 0, @z.bytepos
56
+ assert_equal 0, @z.pos
57
+ end
58
+
59
+ it "recognizes anchors" do
60
+ z = ZScan.new "a x:b+ $ \\k<x>"
61
+ z.pos = 1
62
+ assert_equal ' ', z.scan(/\s*(\#.*$\s*)*/)
63
+ z.pos = 1
64
+ assert_equal '', z.scan(/(?<=a)/)
65
+ assert_equal nil, z.scan(/^/)
66
+ end
67
+ end
data/zscan.gemspec CHANGED
@@ -1,14 +1,15 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "0.5"
3
+ s.version = "1.0" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.summary = "improved string scanner"
8
- s.description = "improved string scanner"
8
+ s.description = "improved string scanner, respects anchors and lookbehinds, supports codepoint positioning"
9
9
  s.required_ruby_version = ">=1.9.2"
10
+ s.licenses = ['BSD']
10
11
 
11
- s.files = %w"readme.md lib/zscan.rb ext/zscan.c ext/extconf.rb zscan.gemspec"
12
+ s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
12
13
  s.require_paths = ["lib"]
13
14
  s.extensions = ["ext/extconf.rb"]
14
15
  s.rubygems_version = '1.8.24'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '1.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
@@ -10,20 +10,24 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2013-05-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: improved string scanner
13
+ description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
+ positioning
14
15
  email:
15
16
  executables: []
16
17
  extensions:
17
18
  - ext/extconf.rb
18
19
  extra_rdoc_files: []
19
20
  files:
21
+ - rakefile
22
+ - zscan.gemspec
20
23
  - readme.md
24
+ - ext/extconf.rb
21
25
  - lib/zscan.rb
26
+ - spec/zscan_spec.rb
22
27
  - ext/zscan.c
23
- - ext/extconf.rb
24
- - zscan.gemspec
25
28
  homepage: https://github.com/luikore/zscan
26
- licenses: []
29
+ licenses:
30
+ - BSD
27
31
  metadata: {}
28
32
  post_install_message:
29
33
  rdoc_options: []