fupeg 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/fupeg/grammar.rb +97 -29
- data/lib/fupeg/parser.rb +112 -48
- data/lib/fupeg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c08b3d48fd87bd8fc04611663270f967afdf9bab0e0e5ce9c63b38c9f00332b6
|
4
|
+
data.tar.gz: 93308247a0fb582957862a4c0e274a3c646c1a7cba7ffc9acade0af1f5d6d250
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42973a5d22dd77f267a94d6ec65823ab891a2d63983dc548a4818b8b77ca24f5138754e08ae1a9b9b3e42a8791215f0c9b0f546e2397729a80dcb5fbb8929447
|
7
|
+
data.tar.gz: 5b99feff0f26acdac764194409f2b2d137a5dc6e16a6122e78cefda3647c680944b847112e135df32b9a27cd0c50652e3c6f32387713906c0fcb4938a8712c85
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## [0.3.0] - 2023-08-28
|
2
|
+
|
3
|
+
- Grammar.use_gram - to simplier grammar inclusion
|
4
|
+
- Grammar.proxy - to proxy rules to included grammar
|
5
|
+
- `_(pat)` doesn't return matched text, use `txt(pat)` instead
|
6
|
+
- "\`" is specialized for tokens
|
7
|
+
-- token is either ident (which is specified with `ident_only` method), or symbols,
|
8
|
+
-- `token_sp?` is skipped after token
|
9
|
+
- fixes for position calculation
|
10
|
+
|
1
11
|
## [0.2.0] - 2023-08-15
|
2
12
|
|
3
13
|
- Split Parser and Grammar
|
data/lib/fupeg/grammar.rb
CHANGED
@@ -15,20 +15,43 @@ module FuPeg
|
|
15
15
|
gr.__send__(root)
|
16
16
|
end
|
17
17
|
|
18
|
+
def self.use_gram(gram, *, as: nil)
|
19
|
+
if as.nil?
|
20
|
+
name = gram.name[/\w+$/]
|
21
|
+
name = name.gsub(/(?<!^)(?=[A-Z](?![A-Z\d_]))/, "_").downcase
|
22
|
+
as = :"@#{name}"
|
23
|
+
elsif !as.start_with?("@")
|
24
|
+
as = :"@#{as}"
|
25
|
+
end
|
26
|
+
@used_grams ||= {}
|
27
|
+
@used_grams[as] = gram
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.proxy(*meths, to:)
|
31
|
+
meths.each do |meth|
|
32
|
+
define_method(meth) { |*args, &block|
|
33
|
+
instance_variable_get(to).__send__(meth, *args, &block)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.used_grams
|
39
|
+
@used_grams&.dup || {}
|
40
|
+
end
|
41
|
+
|
18
42
|
def initialize(parser)
|
19
43
|
@p = parser
|
44
|
+
self.class.used_grams.each do |iv, v|
|
45
|
+
instance_variable_set(iv, v.new(parser))
|
46
|
+
end
|
20
47
|
end
|
21
48
|
|
22
|
-
def fail!
|
23
|
-
@p.fail!(skip: 3)
|
49
|
+
def fail!(pat: nil)
|
50
|
+
@p.fail!(pat: pat, skip: 3)
|
24
51
|
end
|
25
52
|
|
26
53
|
def dot
|
27
|
-
@p.
|
28
|
-
end
|
29
|
-
|
30
|
-
def `(str)
|
31
|
-
@p.match(str)
|
54
|
+
@p.dot
|
32
55
|
end
|
33
56
|
|
34
57
|
def _(lit = nil, &block)
|
@@ -40,21 +63,17 @@ module FuPeg
|
|
40
63
|
end
|
41
64
|
|
42
65
|
def will?(lit = nil, &block)
|
43
|
-
@p.
|
66
|
+
@p.look_ahead(true, lit, &block)
|
44
67
|
end
|
45
68
|
|
46
69
|
def wont!(lit = nil, &block)
|
47
|
-
@p.
|
70
|
+
@p.look_ahead(false, lit, &block)
|
48
71
|
end
|
49
72
|
|
50
|
-
def
|
73
|
+
def txt(lit = nil, &block)
|
51
74
|
@p.text(lit, &block)
|
52
75
|
end
|
53
76
|
|
54
|
-
def bounds(lit = nil, &block)
|
55
|
-
@p.bounds(lit, &block)
|
56
|
-
end
|
57
|
-
|
58
77
|
def cut(&block)
|
59
78
|
@p.with_cut_point(&block)
|
60
79
|
end
|
@@ -68,21 +87,70 @@ module FuPeg
|
|
68
87
|
end
|
69
88
|
|
70
89
|
def rep(range = 0.., lit = nil, &block)
|
71
|
-
range
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
90
|
+
@p.repetition(range, lit, &block)
|
91
|
+
end
|
92
|
+
|
93
|
+
# specialized matchers
|
94
|
+
|
95
|
+
def eof
|
96
|
+
@p.eof? && :eof
|
97
|
+
end
|
98
|
+
|
99
|
+
def nl
|
100
|
+
_(/\r\n|\r|\n/)
|
101
|
+
end
|
102
|
+
|
103
|
+
def eol
|
104
|
+
_ { lnsp? && nl && :eol }
|
105
|
+
end
|
106
|
+
|
107
|
+
def lnsp?
|
108
|
+
_(/[ \t]*/)
|
109
|
+
end
|
110
|
+
|
111
|
+
def lnsp!
|
112
|
+
_(/[ \t]+/)
|
113
|
+
end
|
114
|
+
|
115
|
+
def sp!
|
116
|
+
_(/\s+/)
|
117
|
+
end
|
118
|
+
|
119
|
+
def sp?
|
120
|
+
_(/\s*/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def ident
|
124
|
+
(w = ident_only) && token_sp? && w
|
125
|
+
end
|
126
|
+
|
127
|
+
# raw token match
|
128
|
+
# if token is ident, then exact match performed with whole next ident
|
129
|
+
# else only string match
|
130
|
+
# and then whitespace is consumed
|
131
|
+
def `(token)
|
132
|
+
@p.match {
|
133
|
+
if _is_ident?(token)
|
134
|
+
_{ ident_only == token } || fail!(pat: token)
|
135
|
+
else
|
136
|
+
@p.match(token)
|
137
|
+
end && token_sp? && token
|
138
|
+
}
|
139
|
+
end
|
140
|
+
|
141
|
+
def _is_ident?(tok)
|
142
|
+
@_is_ident ||= Hash.new { |h, k|
|
143
|
+
h[k] = self.class.parse(:ident_only, k) == k
|
144
|
+
}
|
145
|
+
@_is_ident[tok]
|
146
|
+
end
|
147
|
+
|
148
|
+
def ident_only
|
149
|
+
txt(/[a-zA-Z_]\w*/)
|
150
|
+
end
|
151
|
+
|
152
|
+
def token_sp?
|
153
|
+
_(/\s*/)
|
86
154
|
end
|
87
155
|
end
|
88
156
|
end
|
data/lib/fupeg/parser.rb
CHANGED
@@ -4,6 +4,11 @@ require "strscan"
|
|
4
4
|
|
5
5
|
module FuPeg
|
6
6
|
class Parser
|
7
|
+
attr_accessor :debug
|
8
|
+
attr_accessor :file
|
9
|
+
attr_reader :failed
|
10
|
+
attr_reader :str
|
11
|
+
|
7
12
|
def initialize(str, pos = 0)
|
8
13
|
reset!(str, pos)
|
9
14
|
end
|
@@ -19,11 +24,10 @@ module FuPeg
|
|
19
24
|
@scan.pos = pos
|
20
25
|
end
|
21
26
|
@failed = nil
|
27
|
+
@debug = false
|
22
28
|
@cut = CutPoint.new
|
23
29
|
end
|
24
30
|
|
25
|
-
attr_reader :failed
|
26
|
-
|
27
31
|
def bytepos
|
28
32
|
@scan.pos
|
29
33
|
end
|
@@ -32,30 +36,44 @@ module FuPeg
|
|
32
36
|
@str_size - @str.byteslice(pos..).size
|
33
37
|
end
|
34
38
|
|
35
|
-
Fail = Struct.new(:stack, :bytepos)
|
39
|
+
Fail = Struct.new(:stack, :bytepos, :pattern)
|
36
40
|
|
37
|
-
def fail!(*, skip: 2)
|
38
|
-
if !@failed || bytepos > @failed.bytepos
|
41
|
+
def fail!(*, pat: nil, skip: 2)
|
42
|
+
if debug || !@failed || bytepos > @failed.bytepos
|
39
43
|
stack = caller_locations(skip)
|
40
44
|
stack.delete_if do |loc|
|
41
|
-
|
45
|
+
path = loc.path
|
46
|
+
if path == __FILE__
|
47
|
+
true
|
48
|
+
elsif path.start_with?(__dir__)
|
42
49
|
loc.label =~ /\b(backtrack|each|block)\b/
|
43
50
|
end
|
44
51
|
end
|
45
|
-
@failed = Fail.new(stack, bytepos)
|
52
|
+
@failed = Fail.new(stack, bytepos, pat)
|
53
|
+
report_failed($stderr) if debug
|
46
54
|
end
|
47
55
|
nil
|
48
56
|
end
|
49
57
|
|
50
58
|
def failed_position
|
51
|
-
|
59
|
+
position(bytepos: @failed.bytepos)
|
52
60
|
end
|
53
61
|
|
54
62
|
def report_failed(out)
|
55
|
-
pos =
|
56
|
-
out <<
|
57
|
-
|
58
|
-
|
63
|
+
pos = position(bytepos: @failed.bytepos)
|
64
|
+
out << if @failed.pattern
|
65
|
+
"Failed #{failed.pattern.inspect} at #{pos.lineno}:#{pos.colno}"
|
66
|
+
else
|
67
|
+
"Failed at #{pos.lineno}:#{pos.colno}"
|
68
|
+
end
|
69
|
+
if @file
|
70
|
+
out << " of #{@file}"
|
71
|
+
end
|
72
|
+
out << ":\n"
|
73
|
+
out << pos.line.chomp + "\n"
|
74
|
+
curpos = pos.line[...pos.colno].gsub("\t", " " * 8).size
|
75
|
+
curpos = 1 if curpos == 0 && @failed.bytepos == @str.bytesize
|
76
|
+
out << (" " * (curpos - 1) + "^\n")
|
59
77
|
out << "Call stack:\n"
|
60
78
|
@failed.stack.each do |loc|
|
61
79
|
out << "#{loc.path}:#{loc.lineno} in #{loc.label}\n"
|
@@ -63,31 +81,6 @@ module FuPeg
|
|
63
81
|
out
|
64
82
|
end
|
65
83
|
|
66
|
-
begin
|
67
|
-
StringScanner.new("x").skip("x")
|
68
|
-
def match(lit = //, &block)
|
69
|
-
block ? backtrack(&block) : (@scan.scan(lit) || fail!)
|
70
|
-
end
|
71
|
-
rescue
|
72
|
-
def match(lit = //, &block)
|
73
|
-
if String === lit
|
74
|
-
@_lit_cache ||= {}
|
75
|
-
lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
|
76
|
-
end
|
77
|
-
block ? backtrack(&block) : (@scan.scan(lit) || fail!)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def text(lit = nil, &block)
|
82
|
-
pos = @scan.pos
|
83
|
-
match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
|
84
|
-
end
|
85
|
-
|
86
|
-
def bounds(lit = nil, &block)
|
87
|
-
pos = @scan.pos
|
88
|
-
match(lit, &block) && pos...@scan.pos
|
89
|
-
end
|
90
|
-
|
91
84
|
class CutPoint
|
92
85
|
attr_accessor :next
|
93
86
|
|
@@ -134,22 +127,80 @@ module FuPeg
|
|
134
127
|
@line_ends << @str.bytesize
|
135
128
|
end
|
136
129
|
|
137
|
-
def
|
138
|
-
lineno = @line_ends.bsearch_index { |x| x >=
|
130
|
+
def position(bytepos: @scan.pos)
|
131
|
+
lineno = @line_ends.bsearch_index { |x| x >= bytepos }
|
139
132
|
case lineno
|
140
133
|
when nil
|
141
|
-
raise "Position #{
|
134
|
+
raise "Position #{bytepos} is larger than string byte size #{@str.bytesize}"
|
142
135
|
else
|
143
136
|
prev_end = @line_ends[lineno - 1]
|
144
137
|
line_start = prev_end + 1
|
145
|
-
column = @str.byteslice(line_start,
|
138
|
+
column = @str.byteslice(line_start, bytepos - prev_end).size
|
139
|
+
end
|
140
|
+
if bytepos == @str.bytesize
|
141
|
+
if @str[-1] == "\n"
|
142
|
+
lineno, column = lineno + 1, 1
|
143
|
+
else
|
144
|
+
column += 1
|
145
|
+
end
|
146
146
|
end
|
147
147
|
line = @str.byteslice(line_start..@line_ends[lineno])
|
148
|
-
Position.new(lineno, column, line, charpos(
|
148
|
+
Position.new(lineno, column, line, charpos(bytepos))
|
149
149
|
end
|
150
150
|
|
151
151
|
# helper methods
|
152
152
|
|
153
|
+
begin
|
154
|
+
StringScanner.new("x").skip("x")
|
155
|
+
def match(lit = nil, &block)
|
156
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
157
|
+
end
|
158
|
+
rescue
|
159
|
+
def match(lit = nil, &block)
|
160
|
+
if String === lit
|
161
|
+
@_lit_cache ||= {}
|
162
|
+
lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
|
163
|
+
end
|
164
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def text(lit = nil, &block)
|
169
|
+
pos = @scan.pos
|
170
|
+
match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
|
171
|
+
end
|
172
|
+
|
173
|
+
def bounds(lit = nil, &block)
|
174
|
+
pos = @scan.pos
|
175
|
+
match(lit, &block) && pos...@scan.pos
|
176
|
+
end
|
177
|
+
|
178
|
+
def repetition(range = 0.., lit = nil, &block)
|
179
|
+
range = range..range if Integer === range
|
180
|
+
range = 0..range.max if range.begin.nil?
|
181
|
+
unless Integer === range.min && (range.end.nil? || Integer === range.max)
|
182
|
+
raise "Range malformed #{range}"
|
183
|
+
end
|
184
|
+
backtrack do
|
185
|
+
max = range.end && range.max
|
186
|
+
ar = []
|
187
|
+
(1..max).each do |i|
|
188
|
+
res = backtrack { yield i == 1 }
|
189
|
+
break unless res
|
190
|
+
ar << res
|
191
|
+
end
|
192
|
+
(ar.size >= range.min) ? ar : fail!
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def dot
|
197
|
+
match(/./m)
|
198
|
+
end
|
199
|
+
|
200
|
+
def eof?
|
201
|
+
@scan.eos?
|
202
|
+
end
|
203
|
+
|
153
204
|
def backtrack
|
154
205
|
pos = @scan.pos
|
155
206
|
res = yield
|
@@ -165,12 +216,25 @@ module FuPeg
|
|
165
216
|
raise
|
166
217
|
end
|
167
218
|
|
168
|
-
def
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
219
|
+
def look_ahead(positive, lit = nil, &block)
|
220
|
+
if block
|
221
|
+
p, f = @scan.pos, @failed
|
222
|
+
r = yield
|
223
|
+
@scan.pos = p
|
224
|
+
if positive ? r : !r
|
225
|
+
@failed = f
|
226
|
+
true
|
227
|
+
else
|
228
|
+
fail!
|
229
|
+
end
|
230
|
+
else
|
231
|
+
m = @scan.match?(lit)
|
232
|
+
if positive ? m : !m
|
233
|
+
true
|
234
|
+
else
|
235
|
+
fail!(pat: lit)
|
236
|
+
end
|
237
|
+
end
|
174
238
|
end
|
175
239
|
end
|
176
240
|
end
|
data/lib/fupeg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fupeg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yura Sokolov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "\n Simple backtracing parser, using ruby logical operators for primitive
|
14
14
|
sequence/choice\n and slim wrappers for other PEG style operators and backtrace.\n
|