zscan 0.5 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 743c9b854996ff1714f83f805a8d975b261993b5
4
- data.tar.gz: 65254b0e114cd41b727e5481da48704b28ee2bf1
3
+ metadata.gz: c343abbf9b043bcb7643ac193bb5f9a0fd76245e
4
+ data.tar.gz: fe57d1e6e352cad9d9ac2f8095afdbbd4affbebe
5
5
  SHA512:
6
- metadata.gz: 028d305bcee7917a29caadbe8a4ff1254e682de582c5316ba0267d95ab0dd5fe01a47198fc5fda3aa5ba0e6c66ccd813d17fa464986c25bf528c3fc460a1e55d
7
- data.tar.gz: eb2f9bff39078f3a835868147f3e36e388544ea6a939a053b0db3298cfef8d56945b06a78fba102b9b04345fef5ac5464e70d16f2999a25cd72f16c6489268ad
6
+ metadata.gz: 687a22c21f5da837039b557365807a89b5afd33cea5301baa1d5321c3a5b324f63f7b96524166f388da1c296e2e775ce8007e97281ad084c1583aa20c247ff6f
7
+ data.tar.gz: d6fde3485b2bb8e4db9bb80513c370dec17e331c1e20c5eb087742d86fdb06c403136b98be3e14dc65747b897182f4eaa0132ebc454b427e1e5b1cc622a1c8e0
data/ext/zscan.c CHANGED
@@ -43,8 +43,8 @@ static VALUE zscan_alloc(VALUE klass) {
43
43
  ZScan* p = ALLOC(ZScan);
44
44
  MEMZERO(p, ZScan, 1);
45
45
  p->s = Qnil;
46
- p->stack_cap = 5;
47
- p->stack = (Pos*)malloc(sizeof(Pos) * 5);
46
+ p->stack_cap = 8;
47
+ p->stack = (Pos*)malloc(sizeof(Pos) * p->stack_cap);
48
48
  return TypedData_Wrap_Struct(klass, &zscan_type, p);
49
49
  }
50
50
 
@@ -158,10 +158,10 @@ static VALUE zscan_eos_p(VALUE self) {
158
158
  }
159
159
 
160
160
  regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
161
- static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
161
+ static VALUE zscan_match_bytesize(VALUE self, VALUE pattern) {
162
162
  P;
163
163
  if (TYPE(pattern) == T_STRING) {
164
- volatile VALUE ss = rb_funcall(self, rb_intern("rest"), 0);
164
+ volatile VALUE ss = rb_funcall(p->s, rb_intern("byteslice"), 2, ULONG2NUM(p->bytepos), ULONG2NUM(RSTRING_LEN(p->s)));
165
165
  if (RTEST(rb_funcall(ss, rb_intern("start_with?"), 1, pattern))) {
166
166
  return ULONG2NUM(RSTRING_LEN(pattern));
167
167
  }
@@ -201,10 +201,10 @@ static VALUE zscan_bmatch_p(VALUE self, VALUE pattern) {
201
201
  return Qnil;
202
202
  }
203
203
 
204
- static VALUE zscan_push_pos(VALUE self) {
204
+ static VALUE zscan_push(VALUE self) {
205
205
  P;
206
206
  if (p->stack_i + 1 == p->stack_cap) {
207
- p->stack_cap *= 2;
207
+ p->stack_cap = p->stack_cap * 1.4 + 3;
208
208
  p->stack = (Pos*)realloc(p->stack, sizeof(Pos) * p->stack_cap);
209
209
  }
210
210
  Pos e = {p->pos, p->bytepos};
@@ -212,7 +212,7 @@ static VALUE zscan_push_pos(VALUE self) {
212
212
  return self;
213
213
  }
214
214
 
215
- static VALUE zscan_pop_pos(VALUE self) {
215
+ static VALUE zscan_pop(VALUE self) {
216
216
  P;
217
217
  if (p->stack_i) {
218
218
  p->pos = p->stack[p->stack_i].pos;
@@ -225,7 +225,7 @@ static VALUE zscan_pop_pos(VALUE self) {
225
225
  return self;
226
226
  }
227
227
 
228
- static VALUE zscan_drop_top(VALUE self) {
228
+ static VALUE zscan_drop(VALUE self) {
229
229
  P;
230
230
  if (p->stack_i) {
231
231
  p->stack_i--;
@@ -233,7 +233,7 @@ static VALUE zscan_drop_top(VALUE self) {
233
233
  return self;
234
234
  }
235
235
 
236
- static VALUE zscan_resume_top(VALUE self) {
236
+ static VALUE zscan_restore(VALUE self) {
237
237
  P;
238
238
  if (p->stack_i) {
239
239
  p->pos = p->stack[p->stack_i].pos;
@@ -253,9 +253,9 @@ void Init_zscan() {
253
253
  rb_define_method(zscan, "bytepos=", zscan_bytepos_eq, 1);
254
254
  rb_define_method(zscan, "advance", zscan_advance, 1);
255
255
  rb_define_method(zscan, "eos?", zscan_eos_p, 0);
256
- rb_define_method(zscan, "bmatch?", zscan_bmatch_p, 1);
257
- rb_define_method(zscan, "push_pos", zscan_push_pos, 0);
258
- rb_define_method(zscan, "pop_pos", zscan_pop_pos, 0);
259
- rb_define_method(zscan, "drop_top", zscan_drop_top, 0);
260
- rb_define_method(zscan, "resume_top", zscan_resume_top, 0);
256
+ rb_define_method(zscan, "match_bytesize", zscan_match_bytesize, 1);
257
+ rb_define_method(zscan, "push", zscan_push, 0);
258
+ rb_define_method(zscan, "pop", zscan_pop, 0);
259
+ rb_define_method(zscan, "drop", zscan_drop, 0);
260
+ rb_define_method(zscan, "restore", zscan_restore, 0);
261
261
  }
data/lib/zscan.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require_relative "../ext/zscan"
2
2
 
3
3
  class ZScan
4
- VERSION = '0.5'
4
+ VERSION = '1.0'
5
5
 
6
6
  def initialize s, dup=false
7
7
  _internal_init dup ? s.dup : s
@@ -12,7 +12,7 @@ class ZScan
12
12
  end
13
13
 
14
14
  def scan re_or_str
15
- if sz = bmatch?(re_or_str)
15
+ if sz = match_bytesize(re_or_str)
16
16
  r = _internal_string.byteslice bytepos, sz
17
17
  self.bytepos += sz
18
18
  r
@@ -20,7 +20,7 @@ class ZScan
20
20
  end
21
21
 
22
22
  def skip re_or_str
23
- if sz = bmatch?(re_or_str)
23
+ if sz = match_bytesize(re_or_str)
24
24
  self.bytepos += sz
25
25
  end
26
26
  end
@@ -33,5 +33,14 @@ class ZScan
33
33
  _internal_string.byteslice bytepos, _internal_string.bytesize
34
34
  end
35
35
 
36
+ def reset
37
+ self.pos = 0
38
+ self
39
+ end
40
+
41
+ def terminate
42
+ self.pos = _internal_string.size
43
+ end
44
+
36
45
  private :_internal_init, :_internal_string
37
46
  end
data/rakefile ADDED
@@ -0,0 +1,34 @@
1
+ Dir.chdir __dir__
2
+ version_re = /\d+(\.\d+)*/
3
+ version = `command grep 'VERSION =' lib/zscan.rb`[version_re]
4
+ gem_files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
5
+ gem_package = "zscan-#{version}.gem"
6
+
7
+ desc "build and test"
8
+ task :default => [:test, gem_package]
9
+
10
+ desc "build and run test"
11
+ task :test do
12
+ Dir.chdir "ext"
13
+ sh "make"
14
+ Dir.chdir ".."
15
+ sh "rspec"
16
+ end
17
+
18
+ desc "pack gem"
19
+ file gem_package => gem_files do
20
+ sh "rm zscan-*.gem"
21
+
22
+ new_version = false
23
+ lines = File.readlines('zscan.gemspec')
24
+ lines.each do |line|
25
+ if line =~ /s\.version =/ and (line.sub! version_re, version)
26
+ new_version = true
27
+ break
28
+ end
29
+ end
30
+ if new_version
31
+ File.open('zscan.gemspec', 'w'){|f| f << lines.join }
32
+ end
33
+ sh "gem build zscan.gemspec"
34
+ end
data/readme.md CHANGED
@@ -1,24 +1,30 @@
1
- ## Motivation
2
-
3
- A simple string scanner. Provides... much less methods than `StringScanner`.
1
+ ## Features
4
2
 
5
- It supports either string or regexp as scan param.
3
+ - `ZScan#scan`/`ZScan#skip`/`ZScan#match_bytesize` accept either string or regexp as param.
4
+ - `ZScan#pos` is the codepoint position, and `ZScan#bytepos` is byte position.
5
+ - Correctly scans anchors and look behind predicates.
6
+ - Pos stack manipulation.
6
7
 
7
- `pos` is by codepoints instead of bytes, use `bytepos` to locate byte position.
8
+ ## Install
8
9
 
9
- It provides a position stack for you to efficiently manage scanning locations.
10
+ ```bash
11
+ gem ins zscan
12
+ ```
10
13
 
11
- It correctly scans anchors. The following codes demonstrate the behavior:
14
+ ## Typical use
12
15
 
13
- ```ruby
16
+ ``` ruby
14
17
  require 'zscan'
15
- z = ZScan.new 'ab'
16
- z.pos = 1
17
- z.scan /(?<a)/ #=> ''
18
- z.scan /^/ #=> nil
18
+ z = ZScan.new 'hello world'
19
+ z.scan 'hello' #=> 'hello'
20
+ z.skip ' '
21
+ z.scan /\w+/ #=> 'world'
22
+ z.eos? #=> true
19
23
  ```
20
24
 
21
- While with `StringScanner`:
25
+ ## Motivation
26
+
27
+ Ruby's stdlib `StringScanner` treats the scanning position as beginning of string:
22
28
 
23
29
  ```ruby
24
30
  require 'strscan'
@@ -28,29 +34,62 @@ s.scan /(?<a)/ #=> nil
28
34
  s.scan /^/ #=> ''
29
35
  ```
30
36
 
37
+ But for building parser generators, I need the scanner check the whole string for anchors and lookbehinds:
38
+
39
+ ```ruby
40
+ require 'zscan'
41
+ z = ZScan.new 'ab'
42
+ z.pos = 1
43
+ z.scan /(?<a)/ #=> ''
44
+ z.scan /^/ #=> nil
45
+ ```
46
+
31
47
  See also https://bugs.ruby-lang.org/issues/7092
32
48
 
33
49
  ## Methods
34
50
 
35
51
  - `ZScan.new string, dup=false`
36
- - `scan regexp_or_string`
37
- - `skip regexp_or_string`
38
- - `bmatch? regexp_or_string` returns length of matched bytes or nil
39
- - `eos?`
40
- - `string` note: returns a COW dup
41
- - `rest`
52
+ - `#scan regexp_or_string`
53
+ - `#skip regexp_or_string`
54
+ - `#match_bytesize regexp_or_string` returns length of matched bytes or nil
55
+ - `#eos?`
56
+ - `#string` note: returns a COW dup
57
+ - `#rest`
42
58
 
43
- ## Position management
59
+ ## Pos management
44
60
 
45
- - `pos`
46
- - `pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
47
- - `bytepos`
48
- - `bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
49
- - `advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
61
+ - `#pos`
62
+ - `#pos= new_pos` note: complexity ~ `new_pos > pos ? new_pos - pos : new_pos`.
63
+ - `#bytepos`
64
+ - `#bytepos= new_bytepos` note: complexity ~ `abs(new_bytepos - bytepos)`.
65
+ - `#advance n` move forward `n` codepoints, if `n < 0`, move backward. Stops at beginning or end.
66
+ - `#reset` go to beginning.
67
+ - `#terminate` go to end of string.
50
68
 
51
69
  ## Efficient pos stack manipulation
52
70
 
53
- - `push_pos` pushes current pos into the stack.
54
- - `pop_pos` sets current pos to top of the stack, and pops it.
55
- - `drop_top` drops top of pos stack without changing current pos.
56
- - `resume_top` sets current pos to top of the stack.
71
+ - `#push` pushes current pos into the stack.
72
+ - `#pop` sets current pos to top of the stack, and pops it.
73
+ - `#drop` drops top of pos stack without changing current pos.
74
+ - `#restore` sets current pos to top of the stack.
75
+
76
+ ## License
77
+
78
+ Copyright (C) 2013 by Zete Lui (BSD)
79
+
80
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
81
+ this software and associated documentation files (the "Software"), to deal in
82
+ the Software without restriction, including without limitation the rights to
83
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
84
+ the Software, and to permit persons to whom the Software is furnished to do so,
85
+ subject to the following conditions:
86
+
87
+ The above copyright notice and this permission notice shall be included in all
88
+ copies or substantial portions of the Software.
89
+
90
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
91
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
92
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
93
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
94
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
95
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
@@ -0,0 +1,67 @@
1
+ require_relative "../lib/zscan"
2
+ require 'rspec/autorun'
3
+ RSpec.configure do |config|
4
+ config.expect_with :stdlib
5
+ end
6
+
7
+ describe ZScan do
8
+ before :each do
9
+ @z = ZScan.new 'ab你好'
10
+ end
11
+
12
+ before :all do
13
+ GC.stress = true
14
+ end
15
+
16
+ it "random workflow" do
17
+ assert_equal 2, @z.match_bytesize('ab')
18
+ @z.pos = 4
19
+ assert_equal 8, @z.bytepos
20
+ @z.push
21
+ assert_equal nil, @z.scan(/ab你/)
22
+ @z.pos = 0
23
+ assert_equal 'ab你', @z.scan(/ab你/)
24
+
25
+ @z.restore
26
+ assert_equal 8, @z.bytepos
27
+ @z.pos = 3
28
+ @z.restore
29
+ assert_equal 8, @z.bytepos
30
+ end
31
+
32
+ it "scans from middle" do
33
+ @z.bytepos = 2
34
+ assert_equal '你', @z.scan('你')
35
+ assert_equal '好', @z.rest
36
+ end
37
+
38
+ it "won't overflow pos" do
39
+ @z.pos = 20
40
+ assert_equal 8, @z.bytepos
41
+ assert_equal 4, @z.pos
42
+
43
+ @z.skip('ab')
44
+ assert_equal 8, @z.bytepos
45
+
46
+ @z.pos = -1
47
+ assert_equal 0, @z.bytepos
48
+ assert_equal 0, @z.pos
49
+
50
+ @z.bytepos = 20
51
+ assert_equal 8, @z.bytepos
52
+ assert_equal 4, @z.pos
53
+
54
+ @z.bytepos = -1
55
+ assert_equal 0, @z.bytepos
56
+ assert_equal 0, @z.pos
57
+ end
58
+
59
+ it "recognizes anchors" do
60
+ z = ZScan.new "a x:b+ $ \\k<x>"
61
+ z.pos = 1
62
+ assert_equal ' ', z.scan(/\s*(\#.*$\s*)*/)
63
+ z.pos = 1
64
+ assert_equal '', z.scan(/(?<=a)/)
65
+ assert_equal nil, z.scan(/^/)
66
+ end
67
+ end
data/zscan.gemspec CHANGED
@@ -1,14 +1,15 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "zscan"
3
- s.version = "0.5"
3
+ s.version = "1.0" # version mapped from zscan.rb, don't change here
4
4
  s.author = "Zete Lui"
5
5
  s.homepage = "https://github.com/luikore/zscan"
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.summary = "improved string scanner"
8
- s.description = "improved string scanner"
8
+ s.description = "improved string scanner, respects anchors and lookbehinds, supports codepoint positioning"
9
9
  s.required_ruby_version = ">=1.9.2"
10
+ s.licenses = ['BSD']
10
11
 
11
- s.files = %w"readme.md lib/zscan.rb ext/zscan.c ext/extconf.rb zscan.gemspec"
12
+ s.files = Dir.glob('{rakefile,zscan.gemspec,readme.md,**/*.{rb,c}}')
12
13
  s.require_paths = ["lib"]
13
14
  s.extensions = ["ext/extconf.rb"]
14
15
  s.rubygems_version = '1.8.24'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zscan
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '1.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
@@ -10,20 +10,24 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2013-05-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: improved string scanner
13
+ description: improved string scanner, respects anchors and lookbehinds, supports codepoint
14
+ positioning
14
15
  email:
15
16
  executables: []
16
17
  extensions:
17
18
  - ext/extconf.rb
18
19
  extra_rdoc_files: []
19
20
  files:
21
+ - rakefile
22
+ - zscan.gemspec
20
23
  - readme.md
24
+ - ext/extconf.rb
21
25
  - lib/zscan.rb
26
+ - spec/zscan_spec.rb
22
27
  - ext/zscan.c
23
- - ext/extconf.rb
24
- - zscan.gemspec
25
28
  homepage: https://github.com/luikore/zscan
26
- licenses: []
29
+ licenses:
30
+ - BSD
27
31
  metadata: {}
28
32
  post_install_message:
29
33
  rdoc_options: []