fupeg 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fb3eb5f497d0c4294b507b5c8681b39635b4a7d1df96644f2afd73170e552b2
4
- data.tar.gz: 682bfdc3c694abeddd1dc31591ef5bd39429ec4c9b2511e71269939eedd26549
3
+ metadata.gz: c08b3d48fd87bd8fc04611663270f967afdf9bab0e0e5ce9c63b38c9f00332b6
4
+ data.tar.gz: 93308247a0fb582957862a4c0e274a3c646c1a7cba7ffc9acade0af1f5d6d250
5
5
  SHA512:
6
- metadata.gz: 00d7f7fc3d440d968df79df59dab77c87918437dfee730682f31728fe0e1f43c650d88a1ef8d44d92c18f107d83a14fb47db0758516cf97e520e47690e118af7
7
- data.tar.gz: 1653a4e8f7a3302b42682b29fac7b18e28e61dc99f1e63b392058282d73f3f2e54963e081431507b5e9fbb52bf5da7341de72d34e2fdaf7497db25215621f8b2
6
+ metadata.gz: 42973a5d22dd77f267a94d6ec65823ab891a2d63983dc548a4818b8b77ca24f5138754e08ae1a9b9b3e42a8791215f0c9b0f546e2397729a80dcb5fbb8929447
7
+ data.tar.gz: 5b99feff0f26acdac764194409f2b2d137a5dc6e16a6122e78cefda3647c680944b847112e135df32b9a27cd0c50652e3c6f32387713906c0fcb4938a8712c85
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## [0.3.0] - 2023-08-28
2
+
3
+ - Grammar.use_gram - to simplier grammar inclusion
4
+ - Grammar.proxy - to proxy rules to included grammar
5
+ - `_(pat)` doesn't return matched text, use `txt(pat)` instead
6
+ - "\`" is specialized for tokens
7
+ -- token is either ident (which is specified with `ident_only` method), or symbols,
8
+ -- `token_sp?` is skipped after token
9
+ - fixes for position calculation
10
+
1
11
  ## [0.2.0] - 2023-08-15
2
12
 
3
13
  - Split Parser and Grammar
data/lib/fupeg/grammar.rb CHANGED
@@ -15,20 +15,43 @@ module FuPeg
15
15
  gr.__send__(root)
16
16
  end
17
17
 
18
+ def self.use_gram(gram, *, as: nil)
19
+ if as.nil?
20
+ name = gram.name[/\w+$/]
21
+ name = name.gsub(/(?<!^)(?=[A-Z](?![A-Z\d_]))/, "_").downcase
22
+ as = :"@#{name}"
23
+ elsif !as.start_with?("@")
24
+ as = :"@#{as}"
25
+ end
26
+ @used_grams ||= {}
27
+ @used_grams[as] = gram
28
+ end
29
+
30
+ def self.proxy(*meths, to:)
31
+ meths.each do |meth|
32
+ define_method(meth) { |*args, &block|
33
+ instance_variable_get(to).__send__(meth, *args, &block)
34
+ }
35
+ end
36
+ end
37
+
38
+ def self.used_grams
39
+ @used_grams&.dup || {}
40
+ end
41
+
18
42
  def initialize(parser)
19
43
  @p = parser
44
+ self.class.used_grams.each do |iv, v|
45
+ instance_variable_set(iv, v.new(parser))
46
+ end
20
47
  end
21
48
 
22
- def fail!
23
- @p.fail!(skip: 3)
49
+ def fail!(pat: nil)
50
+ @p.fail!(pat: pat, skip: 3)
24
51
  end
25
52
 
26
53
  def dot
27
- @p.match(/./m)
28
- end
29
-
30
- def `(str)
31
- @p.match(str)
54
+ @p.dot
32
55
  end
33
56
 
34
57
  def _(lit = nil, &block)
@@ -40,21 +63,17 @@ module FuPeg
40
63
  end
41
64
 
42
65
  def will?(lit = nil, &block)
43
- @p.preserve(pos: true) { @p.match(lit, &block) }
66
+ @p.look_ahead(true, lit, &block)
44
67
  end
45
68
 
46
69
  def wont!(lit = nil, &block)
47
- @p.preserve(pos: true, failed: true) { !@p.match(lit, &block) } || @p.fail!
70
+ @p.look_ahead(false, lit, &block)
48
71
  end
49
72
 
50
- def text(lit = nil, &block)
73
+ def txt(lit = nil, &block)
51
74
  @p.text(lit, &block)
52
75
  end
53
76
 
54
- def bounds(lit = nil, &block)
55
- @p.bounds(lit, &block)
56
- end
57
-
58
77
  def cut(&block)
59
78
  @p.with_cut_point(&block)
60
79
  end
@@ -68,21 +87,70 @@ module FuPeg
68
87
  end
69
88
 
70
89
  def rep(range = 0.., lit = nil, &block)
71
- range = range..range if Integer === range
72
- range = 0..range.max if range.begin.nil?
73
- unless Integer === range.min && (range.end.nil? || Integer === range.max)
74
- raise "Range malformed #{range}"
75
- end
76
- @p.backtrack do
77
- max = range.end && range.max
78
- ar = []
79
- (1..max).each do |i|
80
- res = @p.backtrack { yield i == 1 }
81
- break unless res
82
- ar << res
83
- end
84
- (ar.size >= range.min) ? ar : @p.fail!
85
- end
90
+ @p.repetition(range, lit, &block)
91
+ end
92
+
93
+ # specialized matchers
94
+
95
+ def eof
96
+ @p.eof? && :eof
97
+ end
98
+
99
+ def nl
100
+ _(/\r\n|\r|\n/)
101
+ end
102
+
103
+ def eol
104
+ _ { lnsp? && nl && :eol }
105
+ end
106
+
107
+ def lnsp?
108
+ _(/[ \t]*/)
109
+ end
110
+
111
+ def lnsp!
112
+ _(/[ \t]+/)
113
+ end
114
+
115
+ def sp!
116
+ _(/\s+/)
117
+ end
118
+
119
+ def sp?
120
+ _(/\s*/)
121
+ end
122
+
123
+ def ident
124
+ (w = ident_only) && token_sp? && w
125
+ end
126
+
127
+ # raw token match
128
+ # if token is ident, then exact match performed with whole next ident
129
+ # else only string match
130
+ # and then whitespace is consumed
131
+ def `(token)
132
+ @p.match {
133
+ if _is_ident?(token)
134
+ _{ ident_only == token } || fail!(pat: token)
135
+ else
136
+ @p.match(token)
137
+ end && token_sp? && token
138
+ }
139
+ end
140
+
141
+ def _is_ident?(tok)
142
+ @_is_ident ||= Hash.new { |h, k|
143
+ h[k] = self.class.parse(:ident_only, k) == k
144
+ }
145
+ @_is_ident[tok]
146
+ end
147
+
148
+ def ident_only
149
+ txt(/[a-zA-Z_]\w*/)
150
+ end
151
+
152
+ def token_sp?
153
+ _(/\s*/)
86
154
  end
87
155
  end
88
156
  end
data/lib/fupeg/parser.rb CHANGED
@@ -4,6 +4,11 @@ require "strscan"
4
4
 
5
5
  module FuPeg
6
6
  class Parser
7
+ attr_accessor :debug
8
+ attr_accessor :file
9
+ attr_reader :failed
10
+ attr_reader :str
11
+
7
12
  def initialize(str, pos = 0)
8
13
  reset!(str, pos)
9
14
  end
@@ -19,11 +24,10 @@ module FuPeg
19
24
  @scan.pos = pos
20
25
  end
21
26
  @failed = nil
27
+ @debug = false
22
28
  @cut = CutPoint.new
23
29
  end
24
30
 
25
- attr_reader :failed
26
-
27
31
  def bytepos
28
32
  @scan.pos
29
33
  end
@@ -32,30 +36,44 @@ module FuPeg
32
36
  @str_size - @str.byteslice(pos..).size
33
37
  end
34
38
 
35
- Fail = Struct.new(:stack, :bytepos)
39
+ Fail = Struct.new(:stack, :bytepos, :pattern)
36
40
 
37
- def fail!(*, skip: 2)
38
- if !@failed || bytepos > @failed.bytepos
41
+ def fail!(*, pat: nil, skip: 2)
42
+ if debug || !@failed || bytepos > @failed.bytepos
39
43
  stack = caller_locations(skip)
40
44
  stack.delete_if do |loc|
41
- if loc.path.start_with?(__dir__)
45
+ path = loc.path
46
+ if path == __FILE__
47
+ true
48
+ elsif path.start_with?(__dir__)
42
49
  loc.label =~ /\b(backtrack|each|block)\b/
43
50
  end
44
51
  end
45
- @failed = Fail.new(stack, bytepos)
52
+ @failed = Fail.new(stack, bytepos, pat)
53
+ report_failed($stderr) if debug
46
54
  end
47
55
  nil
48
56
  end
49
57
 
50
58
  def failed_position
51
- position_for_bytepos(@failed.bytepos)
59
+ position(bytepos: @failed.bytepos)
52
60
  end
53
61
 
54
62
  def report_failed(out)
55
- pos = position_for_bytepos(@failed.bytepos)
56
- out << "Failed at #{pos.lineno}:#{pos.colno} :\n"
57
- out << pos.line + "\n"
58
- out << (" " * (pos.colno - 1) + "^\n")
63
+ pos = position(bytepos: @failed.bytepos)
64
+ out << if @failed.pattern
65
+ "Failed #{failed.pattern.inspect} at #{pos.lineno}:#{pos.colno}"
66
+ else
67
+ "Failed at #{pos.lineno}:#{pos.colno}"
68
+ end
69
+ if @file
70
+ out << " of #{@file}"
71
+ end
72
+ out << ":\n"
73
+ out << pos.line.chomp + "\n"
74
+ curpos = pos.line[...pos.colno].gsub("\t", " " * 8).size
75
+ curpos = 1 if curpos == 0 && @failed.bytepos == @str.bytesize
76
+ out << (" " * (curpos - 1) + "^\n")
59
77
  out << "Call stack:\n"
60
78
  @failed.stack.each do |loc|
61
79
  out << "#{loc.path}:#{loc.lineno} in #{loc.label}\n"
@@ -63,31 +81,6 @@ module FuPeg
63
81
  out
64
82
  end
65
83
 
66
- begin
67
- StringScanner.new("x").skip("x")
68
- def match(lit = //, &block)
69
- block ? backtrack(&block) : (@scan.scan(lit) || fail!)
70
- end
71
- rescue
72
- def match(lit = //, &block)
73
- if String === lit
74
- @_lit_cache ||= {}
75
- lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
76
- end
77
- block ? backtrack(&block) : (@scan.scan(lit) || fail!)
78
- end
79
- end
80
-
81
- def text(lit = nil, &block)
82
- pos = @scan.pos
83
- match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
84
- end
85
-
86
- def bounds(lit = nil, &block)
87
- pos = @scan.pos
88
- match(lit, &block) && pos...@scan.pos
89
- end
90
-
91
84
  class CutPoint
92
85
  attr_accessor :next
93
86
 
@@ -134,22 +127,80 @@ module FuPeg
134
127
  @line_ends << @str.bytesize
135
128
  end
136
129
 
137
- def position_for_bytepos(pos)
138
- lineno = @line_ends.bsearch_index { |x| x >= pos }
130
+ def position(bytepos: @scan.pos)
131
+ lineno = @line_ends.bsearch_index { |x| x >= bytepos }
139
132
  case lineno
140
133
  when nil
141
- raise "Position #{pos} is larger than string byte size #{@str.bytesize}"
134
+ raise "Position #{bytepos} is larger than string byte size #{@str.bytesize}"
142
135
  else
143
136
  prev_end = @line_ends[lineno - 1]
144
137
  line_start = prev_end + 1
145
- column = @str.byteslice(line_start, pos - prev_end).size
138
+ column = @str.byteslice(line_start, bytepos - prev_end).size
139
+ end
140
+ if bytepos == @str.bytesize
141
+ if @str[-1] == "\n"
142
+ lineno, column = lineno + 1, 1
143
+ else
144
+ column += 1
145
+ end
146
146
  end
147
147
  line = @str.byteslice(line_start..@line_ends[lineno])
148
- Position.new(lineno, column, line, charpos(pos))
148
+ Position.new(lineno, column, line, charpos(bytepos))
149
149
  end
150
150
 
151
151
  # helper methods
152
152
 
153
+ begin
154
+ StringScanner.new("x").skip("x")
155
+ def match(lit = nil, &block)
156
+ block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
157
+ end
158
+ rescue
159
+ def match(lit = nil, &block)
160
+ if String === lit
161
+ @_lit_cache ||= {}
162
+ lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
163
+ end
164
+ block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
165
+ end
166
+ end
167
+
168
+ def text(lit = nil, &block)
169
+ pos = @scan.pos
170
+ match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
171
+ end
172
+
173
+ def bounds(lit = nil, &block)
174
+ pos = @scan.pos
175
+ match(lit, &block) && pos...@scan.pos
176
+ end
177
+
178
+ def repetition(range = 0.., lit = nil, &block)
179
+ range = range..range if Integer === range
180
+ range = 0..range.max if range.begin.nil?
181
+ unless Integer === range.min && (range.end.nil? || Integer === range.max)
182
+ raise "Range malformed #{range}"
183
+ end
184
+ backtrack do
185
+ max = range.end && range.max
186
+ ar = []
187
+ (1..max).each do |i|
188
+ res = backtrack { yield i == 1 }
189
+ break unless res
190
+ ar << res
191
+ end
192
+ (ar.size >= range.min) ? ar : fail!
193
+ end
194
+ end
195
+
196
+ def dot
197
+ match(/./m)
198
+ end
199
+
200
+ def eof?
201
+ @scan.eos?
202
+ end
203
+
153
204
  def backtrack
154
205
  pos = @scan.pos
155
206
  res = yield
@@ -165,12 +216,25 @@ module FuPeg
165
216
  raise
166
217
  end
167
218
 
168
- def preserve(pos = false, failed = false, &block)
169
- p, f = @scan.pos, @failed
170
- r = yield
171
- @scan.pos = p if pos
172
- @failed = f if failed
173
- r
219
+ def look_ahead(positive, lit = nil, &block)
220
+ if block
221
+ p, f = @scan.pos, @failed
222
+ r = yield
223
+ @scan.pos = p
224
+ if positive ? r : !r
225
+ @failed = f
226
+ true
227
+ else
228
+ fail!
229
+ end
230
+ else
231
+ m = @scan.match?(lit)
232
+ if positive ? m : !m
233
+ true
234
+ else
235
+ fail!(pat: lit)
236
+ end
237
+ end
174
238
  end
175
239
  end
176
240
  end
data/lib/fupeg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FuPeg
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fupeg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yura Sokolov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-15 00:00:00.000000000 Z
11
+ date: 2023-08-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "\n Simple backtracing parser, using ruby logical operators for primitive
14
14
  sequence/choice\n and slim wrappers for other PEG style operators and backtrace.\n