fupeg 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/README.md +93 -5
- data/examples/calc.rb +69 -0
- data/lib/fupeg/grammar.rb +156 -0
- data/lib/fupeg/parser.rb +130 -111
- data/lib/fupeg/version.rb +1 -1
- data/lib/fupeg.rb +1 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c08b3d48fd87bd8fc04611663270f967afdf9bab0e0e5ce9c63b38c9f00332b6
|
4
|
+
data.tar.gz: 93308247a0fb582957862a4c0e274a3c646c1a7cba7ffc9acade0af1f5d6d250
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42973a5d22dd77f267a94d6ec65823ab891a2d63983dc548a4818b8b77ca24f5138754e08ae1a9b9b3e42a8791215f0c9b0f546e2397729a80dcb5fbb8929447
|
7
|
+
data.tar.gz: 5b99feff0f26acdac764194409f2b2d137a5dc6e16a6122e78cefda3647c680944b847112e135df32b9a27cd0c50652e3c6f32387713906c0fcb4938a8712c85
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,36 @@
|
|
1
|
-
## [0.
|
1
|
+
## [0.3.0] - 2023-08-28
|
2
|
+
|
3
|
+
- Grammar.use_gram - to simplier grammar inclusion
|
4
|
+
- Grammar.proxy - to proxy rules to included grammar
|
5
|
+
- `_(pat)` doesn't return matched text, use `txt(pat)` instead
|
6
|
+
- "\`" is specialized for tokens
|
7
|
+
-- token is either ident (which is specified with `ident_only` method), or symbols,
|
8
|
+
-- `token_sp?` is skipped after token
|
9
|
+
- fixes for position calculation
|
10
|
+
|
11
|
+
## [0.2.0] - 2023-08-15
|
12
|
+
|
13
|
+
- Split Parser and Grammar
|
14
|
+
- Use `_` for both literals and sequence:
|
15
|
+
`_("x")` , `_(/x/)`, `_{ _("x") }`
|
16
|
+
- Use backtick "\`" for string literals
|
17
|
+
`x`
|
18
|
+
- `cont?` used with block to detect uncutted alternative
|
19
|
+
```ruby
|
20
|
+
cut {
|
21
|
+
# condition
|
22
|
+
_ { `if` && cut! && ... } ||
|
23
|
+
# loop
|
24
|
+
cont? { `while` && cut! && ...} ||
|
25
|
+
# assignment
|
26
|
+
cont? { (i = ident) && sp? && `=` && cut! && ... } ||
|
27
|
+
# function call
|
28
|
+
cont? { (i = ident) && sp? && `(` && cut! && ... } ||
|
29
|
+
...
|
30
|
+
}
|
31
|
+
```
|
32
|
+
|
33
|
+
## [0.1.0] - 2023-08-14
|
2
34
|
|
3
35
|
- Initial release
|
4
36
|
- Simplest rule definition in Ruby code without magic
|
data/README.md
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
-
# Fupeg
|
1
|
+
# Fupeg - simplest parser combinator
|
2
2
|
|
3
|
-
|
3
|
+
PEG like parser combinator as simple as possible, but still useful.
|
4
|
+
- backtracking, manually specified by user.
|
5
|
+
- no memoization (yet).
|
6
|
+
- no left recursion (yet).
|
7
|
+
- built with StringScanner.
|
8
|
+
- pattern sequences and alteration are implemented with logical operators.
|
4
9
|
|
5
|
-
|
10
|
+
Grammar code is pure-ruby and is executed as it is written.
|
11
|
+
No grammar tree is built and evaluated.
|
12
|
+
|
13
|
+
As bonus, "cut" operator is implemented.
|
6
14
|
|
7
15
|
## Installation
|
8
16
|
|
@@ -16,7 +24,87 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
16
24
|
|
17
25
|
## Usage
|
18
26
|
|
19
|
-
|
27
|
+
First you should define grammar:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require "fupeg"
|
31
|
+
|
32
|
+
class Calc < FuPeg::Grammar
|
33
|
+
def eof
|
34
|
+
wont! { dot } && :eof
|
35
|
+
end
|
36
|
+
|
37
|
+
def lnsp?
|
38
|
+
# match regular expression
|
39
|
+
_(/[ \t]*/)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Ruby 3.0 flavour
|
43
|
+
def sp? = _(/\s*/)
|
44
|
+
|
45
|
+
def number = (n = _(/\d+/)) && [:num, n]
|
46
|
+
|
47
|
+
def atom
|
48
|
+
# match raw string: _("(") is aliased to `(`
|
49
|
+
#
|
50
|
+
# match sequence of patterns with backtracking:
|
51
|
+
# `_{ x && y && z }` will rewind position, if block returns `nil` or `false`
|
52
|
+
#
|
53
|
+
# store value, returned by subpattern: just stor it into variable
|
54
|
+
#
|
55
|
+
# use `||` for alternatives
|
56
|
+
number || _ { _("(") && sp? && (sub = sum) && sp? && `)` && [:sub, sub] }
|
57
|
+
end
|
58
|
+
|
59
|
+
def fact
|
60
|
+
# repetition returns array of block results
|
61
|
+
# it stops if block returns falsey (`nil` or `false`)
|
62
|
+
rep { |fst| # fst == true for first element
|
63
|
+
op = nil
|
64
|
+
# don't expect operator before first term
|
65
|
+
(fst || (op = `*` || _("/") || _(/%/)) && sp?) &&
|
66
|
+
(a = atom) && lnsp? &&
|
67
|
+
[op, a].compact
|
68
|
+
# flat AST tree, returns [:fact, at, op, at, op, at, op] if matched
|
69
|
+
}&.flatten(1)&.unshift(:fact)
|
70
|
+
end
|
71
|
+
|
72
|
+
def sum
|
73
|
+
_ {
|
74
|
+
op = rest = nil
|
75
|
+
(f = fact) &&
|
76
|
+
# optional matches pattern always succeed
|
77
|
+
opt { lnsp? && (op = `+` || `-`) && sp? && (rest = sum) } &&
|
78
|
+
# recursive AST tree
|
79
|
+
(rest ? [:sum, f, op, rest] : f)
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def root
|
84
|
+
_ { sum || eof }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
```
|
88
|
+
|
89
|
+
Then either parse string directly, or create parser and grammar:
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
# Direct parsing
|
93
|
+
pp Calc.parse(:root, "1")
|
94
|
+
pp Calc.parse(:root, "1 + 2")
|
95
|
+
|
96
|
+
# separate parser and grammar initialization
|
97
|
+
parser = FuPeg::Parser.new("1 - 2*4/7 + 5")
|
98
|
+
grammar = Calc.new(parser)
|
99
|
+
pp grammar.root
|
100
|
+
|
101
|
+
# combined parser and grammar initialization
|
102
|
+
_parser, grammar = Calc.create("(1 -
|
103
|
+
2)*
|
104
|
+
(4 -10) +
|
105
|
+
11")
|
106
|
+
pp grammar.root
|
107
|
+
```
|
20
108
|
|
21
109
|
## Development
|
22
110
|
|
@@ -26,7 +114,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
26
114
|
|
27
115
|
## Contributing
|
28
116
|
|
29
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
117
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/funny-falcon/fupeg .
|
30
118
|
|
31
119
|
## License
|
32
120
|
|
data/examples/calc.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require "fupeg"
|
2
|
+
|
3
|
+
class Calc < FuPeg::Grammar
|
4
|
+
def eof
|
5
|
+
wont! { dot } && :eof
|
6
|
+
end
|
7
|
+
|
8
|
+
def lnsp?
|
9
|
+
# match regular expression
|
10
|
+
_(/[ \t]*/)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Ruby 3.0 flavour
|
14
|
+
def sp? = _(/\s*/)
|
15
|
+
|
16
|
+
def number = (n = _(/\d+/)) && [:num, n]
|
17
|
+
|
18
|
+
def atom
|
19
|
+
# match raw string: _("(") is aliased to `(`
|
20
|
+
#
|
21
|
+
# match sequence of patterns with backtracking:
|
22
|
+
# `_{ x && y && z }` will rewind position, if block returns `nil` or `false`
|
23
|
+
#
|
24
|
+
# store value, returned by subpattern: just stor it into variable
|
25
|
+
number || _ { _("(") && sp? && (sub = sum) && sp? && `)` && [:sub, sub] }
|
26
|
+
end
|
27
|
+
|
28
|
+
def fact
|
29
|
+
# repetition returns array of block results
|
30
|
+
# it stops if block returns falsey (`nil` or `false`)
|
31
|
+
rep { |fst| # fst == true for first element
|
32
|
+
op = nil
|
33
|
+
(fst || (op = `*` || `/` || "%") && sp?) &&
|
34
|
+
(a = atom) && lnsp? &&
|
35
|
+
[op, a].compact
|
36
|
+
# flat AST tree, returns [:fact, at, op, at, op, at, op] if matched
|
37
|
+
}&.flatten(1)&.unshift(:fact)
|
38
|
+
end
|
39
|
+
|
40
|
+
def sum
|
41
|
+
_ {
|
42
|
+
op = rest = nil
|
43
|
+
(f = fact) &&
|
44
|
+
# optional matches pattern always succeed
|
45
|
+
opt { lnsp? && (op = `+` || `-`) && sp? && (rest = sum) } &&
|
46
|
+
# recursive AST tree
|
47
|
+
(rest ? [:sum, f, op, rest] : f)
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def root
|
52
|
+
_ { sum || eof }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
pp Calc.parse(:root, "1")
|
57
|
+
pp Calc.parse(:root, "1 + 2")
|
58
|
+
|
59
|
+
# separate parser and grammar initialization
|
60
|
+
parser = FuPeg::Parser.new("1 - 2*4/7 + 5")
|
61
|
+
grammar = Calc.new(parser)
|
62
|
+
pp grammar.root
|
63
|
+
|
64
|
+
# combined parser and grammar initialization
|
65
|
+
_parser, grammar = Calc.create("(1 -
|
66
|
+
2)*
|
67
|
+
(4 -10) +
|
68
|
+
11")
|
69
|
+
pp grammar.root
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "parser"
|
4
|
+
|
5
|
+
module FuPeg
|
6
|
+
class Grammar
|
7
|
+
def self.create(str, pos = 0)
|
8
|
+
parser = Parser.new(str, pos)
|
9
|
+
grammar = new(parser)
|
10
|
+
[parser, grammar]
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse(root, str)
|
14
|
+
_, gr = create(str)
|
15
|
+
gr.__send__(root)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.use_gram(gram, *, as: nil)
|
19
|
+
if as.nil?
|
20
|
+
name = gram.name[/\w+$/]
|
21
|
+
name = name.gsub(/(?<!^)(?=[A-Z](?![A-Z\d_]))/, "_").downcase
|
22
|
+
as = :"@#{name}"
|
23
|
+
elsif !as.start_with?("@")
|
24
|
+
as = :"@#{as}"
|
25
|
+
end
|
26
|
+
@used_grams ||= {}
|
27
|
+
@used_grams[as] = gram
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.proxy(*meths, to:)
|
31
|
+
meths.each do |meth|
|
32
|
+
define_method(meth) { |*args, &block|
|
33
|
+
instance_variable_get(to).__send__(meth, *args, &block)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.used_grams
|
39
|
+
@used_grams&.dup || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(parser)
|
43
|
+
@p = parser
|
44
|
+
self.class.used_grams.each do |iv, v|
|
45
|
+
instance_variable_set(iv, v.new(parser))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fail!(pat: nil)
|
50
|
+
@p.fail!(pat: pat, skip: 3)
|
51
|
+
end
|
52
|
+
|
53
|
+
def dot
|
54
|
+
@p.dot
|
55
|
+
end
|
56
|
+
|
57
|
+
def _(lit = nil, &block)
|
58
|
+
@p.match(lit, &block)
|
59
|
+
end
|
60
|
+
|
61
|
+
def opt(arg = nil, &block)
|
62
|
+
@p.match(arg, &block) || true
|
63
|
+
end
|
64
|
+
|
65
|
+
def will?(lit = nil, &block)
|
66
|
+
@p.look_ahead(true, lit, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def wont!(lit = nil, &block)
|
70
|
+
@p.look_ahead(false, lit, &block)
|
71
|
+
end
|
72
|
+
|
73
|
+
def txt(lit = nil, &block)
|
74
|
+
@p.text(lit, &block)
|
75
|
+
end
|
76
|
+
|
77
|
+
def cut(&block)
|
78
|
+
@p.with_cut_point(&block)
|
79
|
+
end
|
80
|
+
|
81
|
+
def cut!
|
82
|
+
@p.current_cutpoint.cut!
|
83
|
+
end
|
84
|
+
|
85
|
+
def cont?(&block)
|
86
|
+
@p.current_cutpoint.can_continue? && (block ? @p.backtrack(&block) : true)
|
87
|
+
end
|
88
|
+
|
89
|
+
def rep(range = 0.., lit = nil, &block)
|
90
|
+
@p.repetition(range, lit, &block)
|
91
|
+
end
|
92
|
+
|
93
|
+
# specialized matchers
|
94
|
+
|
95
|
+
def eof
|
96
|
+
@p.eof? && :eof
|
97
|
+
end
|
98
|
+
|
99
|
+
def nl
|
100
|
+
_(/\r\n|\r|\n/)
|
101
|
+
end
|
102
|
+
|
103
|
+
def eol
|
104
|
+
_ { lnsp? && nl && :eol }
|
105
|
+
end
|
106
|
+
|
107
|
+
def lnsp?
|
108
|
+
_(/[ \t]*/)
|
109
|
+
end
|
110
|
+
|
111
|
+
def lnsp!
|
112
|
+
_(/[ \t]+/)
|
113
|
+
end
|
114
|
+
|
115
|
+
def sp!
|
116
|
+
_(/\s+/)
|
117
|
+
end
|
118
|
+
|
119
|
+
def sp?
|
120
|
+
_(/\s*/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def ident
|
124
|
+
(w = ident_only) && token_sp? && w
|
125
|
+
end
|
126
|
+
|
127
|
+
# raw token match
|
128
|
+
# if token is ident, then exact match performed with whole next ident
|
129
|
+
# else only string match
|
130
|
+
# and then whitespace is consumed
|
131
|
+
def `(token)
|
132
|
+
@p.match {
|
133
|
+
if _is_ident?(token)
|
134
|
+
_{ ident_only == token } || fail!(pat: token)
|
135
|
+
else
|
136
|
+
@p.match(token)
|
137
|
+
end && token_sp? && token
|
138
|
+
}
|
139
|
+
end
|
140
|
+
|
141
|
+
def _is_ident?(tok)
|
142
|
+
@_is_ident ||= Hash.new { |h, k|
|
143
|
+
h[k] = self.class.parse(:ident_only, k) == k
|
144
|
+
}
|
145
|
+
@_is_ident[tok]
|
146
|
+
end
|
147
|
+
|
148
|
+
def ident_only
|
149
|
+
txt(/[a-zA-Z_]\w*/)
|
150
|
+
end
|
151
|
+
|
152
|
+
def token_sp?
|
153
|
+
_(/\s*/)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
data/lib/fupeg/parser.rb
CHANGED
@@ -4,6 +4,11 @@ require "strscan"
|
|
4
4
|
|
5
5
|
module FuPeg
|
6
6
|
class Parser
|
7
|
+
attr_accessor :debug
|
8
|
+
attr_accessor :file
|
9
|
+
attr_reader :failed
|
10
|
+
attr_reader :str
|
11
|
+
|
7
12
|
def initialize(str, pos = 0)
|
8
13
|
reset!(str, pos)
|
9
14
|
end
|
@@ -19,40 +24,56 @@ module FuPeg
|
|
19
24
|
@scan.pos = pos
|
20
25
|
end
|
21
26
|
@failed = nil
|
27
|
+
@debug = false
|
22
28
|
@cut = CutPoint.new
|
23
29
|
end
|
24
30
|
|
25
|
-
attr_reader :failed
|
26
|
-
|
27
31
|
def bytepos
|
28
32
|
@scan.pos
|
29
33
|
end
|
30
34
|
|
31
|
-
def charpos
|
32
|
-
@str_size - @str.byteslice(
|
35
|
+
def charpos(pos = @scan.pos)
|
36
|
+
@str_size - @str.byteslice(pos..).size
|
33
37
|
end
|
34
38
|
|
35
|
-
Fail = Struct.new(:stack, :
|
39
|
+
Fail = Struct.new(:stack, :bytepos, :pattern)
|
36
40
|
|
37
|
-
def fail!(skip
|
38
|
-
if !@failed || bytepos > @failed.bytepos
|
41
|
+
def fail!(*, pat: nil, skip: 2)
|
42
|
+
if debug || !@failed || bytepos > @failed.bytepos
|
39
43
|
stack = caller_locations(skip)
|
40
44
|
stack.delete_if do |loc|
|
41
|
-
|
42
|
-
|
45
|
+
path = loc.path
|
46
|
+
if path == __FILE__
|
47
|
+
true
|
48
|
+
elsif path.start_with?(__dir__)
|
49
|
+
loc.label =~ /\b(backtrack|each|block)\b/
|
43
50
|
end
|
44
51
|
end
|
45
|
-
|
46
|
-
|
52
|
+
@failed = Fail.new(stack, bytepos, pat)
|
53
|
+
report_failed($stderr) if debug
|
47
54
|
end
|
48
55
|
nil
|
49
56
|
end
|
50
57
|
|
58
|
+
def failed_position
|
59
|
+
position(bytepos: @failed.bytepos)
|
60
|
+
end
|
61
|
+
|
51
62
|
def report_failed(out)
|
52
|
-
pos = @failed.
|
53
|
-
out <<
|
54
|
-
|
55
|
-
|
63
|
+
pos = position(bytepos: @failed.bytepos)
|
64
|
+
out << if @failed.pattern
|
65
|
+
"Failed #{failed.pattern.inspect} at #{pos.lineno}:#{pos.colno}"
|
66
|
+
else
|
67
|
+
"Failed at #{pos.lineno}:#{pos.colno}"
|
68
|
+
end
|
69
|
+
if @file
|
70
|
+
out << " of #{@file}"
|
71
|
+
end
|
72
|
+
out << ":\n"
|
73
|
+
out << pos.line.chomp + "\n"
|
74
|
+
curpos = pos.line[...pos.colno].gsub("\t", " " * 8).size
|
75
|
+
curpos = 1 if curpos == 0 && @failed.bytepos == @str.bytesize
|
76
|
+
out << (" " * (curpos - 1) + "^\n")
|
56
77
|
out << "Call stack:\n"
|
57
78
|
@failed.stack.each do |loc|
|
58
79
|
out << "#{loc.path}:#{loc.lineno} in #{loc.label}\n"
|
@@ -60,75 +81,11 @@ module FuPeg
|
|
60
81
|
out
|
61
82
|
end
|
62
83
|
|
63
|
-
def dot
|
64
|
-
@scan.scan(/./m) || fail!
|
65
|
-
end
|
66
|
-
|
67
|
-
begin
|
68
|
-
StringScanner.new("x").skip("x")
|
69
|
-
def lit(reg_or_str)
|
70
|
-
@scan.scan(reg_or_str) || fail!
|
71
|
-
end
|
72
|
-
rescue
|
73
|
-
def lit(reg_or_str)
|
74
|
-
if String === reg_or_str
|
75
|
-
@__match_lit_cache ||= Hash.new { |h, s| h[s] = Regexp.new(Regexp.escape(s)) }
|
76
|
-
reg_or_str = @__match_lit_cache[reg_or_str]
|
77
|
-
end
|
78
|
-
@scan.scan(reg_or_str) || fail!
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def seq(*args, &block)
|
83
|
-
_bt(&block)
|
84
|
-
end
|
85
|
-
|
86
|
-
def opt(&block)
|
87
|
-
_rewind(nil, @failed, _bt(&block) || true)
|
88
|
-
end
|
89
|
-
|
90
|
-
def rep(range = 0.., &block)
|
91
|
-
range = range..range if Integer === range
|
92
|
-
range = 0..range.max if range.begin.nil?
|
93
|
-
unless Integer === range.min && (range.end.nil? || Integer === range.max)
|
94
|
-
raise "Range malformed #{range}"
|
95
|
-
end
|
96
|
-
_bt do
|
97
|
-
max = range.end && range.max
|
98
|
-
ar = []
|
99
|
-
(1..max).each do
|
100
|
-
res = _bt(&block)
|
101
|
-
break unless res
|
102
|
-
ar << res
|
103
|
-
end
|
104
|
-
(ar.size >= range.min) ? ar : fail!
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def text(&block)
|
109
|
-
pos = @scan.pos
|
110
|
-
_bt(&block) && @str.byteslice(pos, @scan.pos - pos)
|
111
|
-
end
|
112
|
-
|
113
|
-
def will?(&block)
|
114
|
-
_rewind(@scan.pos, false, _bt(&block))
|
115
|
-
end
|
116
|
-
|
117
|
-
def wont!(&block)
|
118
|
-
_rewind(@scan.pos, @failed, !_bt(&block)) || fail!
|
119
|
-
end
|
120
|
-
|
121
|
-
# cut point handling
|
122
|
-
# cut do
|
123
|
-
# seq { lit("{") && cut! && lit("}") } ||
|
124
|
-
# !cut? && seq { lit("[") && cut! && lit("]") } ||
|
125
|
-
# !cut? && dot
|
126
|
-
# end
|
127
84
|
class CutPoint
|
128
85
|
attr_accessor :next
|
129
86
|
|
130
87
|
def initialize
|
131
|
-
@cut =
|
88
|
+
@cut = nil
|
132
89
|
@next = nil
|
133
90
|
end
|
134
91
|
|
@@ -137,13 +94,13 @@ module FuPeg
|
|
137
94
|
@cut = true
|
138
95
|
end
|
139
96
|
|
140
|
-
def
|
141
|
-
@cut
|
97
|
+
def can_continue?
|
98
|
+
@cut ? nil : true
|
142
99
|
end
|
143
100
|
end
|
144
101
|
|
145
|
-
# for use with cut! and
|
146
|
-
def
|
102
|
+
# for use with cut! and cont?
|
103
|
+
def with_cut_point
|
147
104
|
prev_cut = @cut
|
148
105
|
@cut = CutPoint.new
|
149
106
|
prev_cut.next = @cut
|
@@ -153,51 +110,98 @@ module FuPeg
|
|
153
110
|
@cut = prev_cut
|
154
111
|
end
|
155
112
|
|
156
|
-
def
|
157
|
-
@cut
|
158
|
-
end
|
159
|
-
|
160
|
-
def cut?
|
161
|
-
@cut.cut?
|
113
|
+
def current_cutpoint
|
114
|
+
@cut
|
162
115
|
end
|
163
116
|
|
164
117
|
# Position handling for failures
|
165
118
|
|
166
119
|
Position = Struct.new(:lineno, :colno, :line, :charpos)
|
167
120
|
|
168
|
-
private
|
169
|
-
|
170
121
|
def init_line_ends
|
171
122
|
@line_ends = [-1]
|
172
|
-
|
173
|
-
while
|
174
|
-
@line_ends <<
|
175
|
-
pos += 1
|
123
|
+
scan = StringScanner.new(@str)
|
124
|
+
while scan.skip_until(/\n|\r\n?/)
|
125
|
+
@line_ends << scan.pos - 1
|
176
126
|
end
|
177
|
-
@line_ends << @str.
|
127
|
+
@line_ends << @str.bytesize
|
178
128
|
end
|
179
129
|
|
180
|
-
|
181
|
-
|
182
|
-
def position_for_charpos(charpos)
|
183
|
-
lineno = @line_ends.bsearch_index { |x| x >= charpos }
|
130
|
+
def position(bytepos: @scan.pos)
|
131
|
+
lineno = @line_ends.bsearch_index { |x| x >= bytepos }
|
184
132
|
case lineno
|
185
133
|
when nil
|
186
|
-
raise "Position #{
|
134
|
+
raise "Position #{bytepos} is larger than string byte size #{@str.bytesize}"
|
187
135
|
else
|
188
136
|
prev_end = @line_ends[lineno - 1]
|
189
137
|
line_start = prev_end + 1
|
190
|
-
column =
|
138
|
+
column = @str.byteslice(line_start, bytepos - prev_end).size
|
191
139
|
end
|
192
|
-
|
193
|
-
|
140
|
+
if bytepos == @str.bytesize
|
141
|
+
if @str[-1] == "\n"
|
142
|
+
lineno, column = lineno + 1, 1
|
143
|
+
else
|
144
|
+
column += 1
|
145
|
+
end
|
146
|
+
end
|
147
|
+
line = @str.byteslice(line_start..@line_ends[lineno])
|
148
|
+
Position.new(lineno, column, line, charpos(bytepos))
|
194
149
|
end
|
195
150
|
|
196
151
|
# helper methods
|
197
152
|
|
198
|
-
|
153
|
+
begin
|
154
|
+
StringScanner.new("x").skip("x")
|
155
|
+
def match(lit = nil, &block)
|
156
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
157
|
+
end
|
158
|
+
rescue
|
159
|
+
def match(lit = nil, &block)
|
160
|
+
if String === lit
|
161
|
+
@_lit_cache ||= {}
|
162
|
+
lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
|
163
|
+
end
|
164
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def text(lit = nil, &block)
|
169
|
+
pos = @scan.pos
|
170
|
+
match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
|
171
|
+
end
|
199
172
|
|
200
|
-
def
|
173
|
+
def bounds(lit = nil, &block)
|
174
|
+
pos = @scan.pos
|
175
|
+
match(lit, &block) && pos...@scan.pos
|
176
|
+
end
|
177
|
+
|
178
|
+
def repetition(range = 0.., lit = nil, &block)
|
179
|
+
range = range..range if Integer === range
|
180
|
+
range = 0..range.max if range.begin.nil?
|
181
|
+
unless Integer === range.min && (range.end.nil? || Integer === range.max)
|
182
|
+
raise "Range malformed #{range}"
|
183
|
+
end
|
184
|
+
backtrack do
|
185
|
+
max = range.end && range.max
|
186
|
+
ar = []
|
187
|
+
(1..max).each do |i|
|
188
|
+
res = backtrack { yield i == 1 }
|
189
|
+
break unless res
|
190
|
+
ar << res
|
191
|
+
end
|
192
|
+
(ar.size >= range.min) ? ar : fail!
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def dot
|
197
|
+
match(/./m)
|
198
|
+
end
|
199
|
+
|
200
|
+
def eof?
|
201
|
+
@scan.eos?
|
202
|
+
end
|
203
|
+
|
204
|
+
def backtrack
|
201
205
|
pos = @scan.pos
|
202
206
|
res = yield
|
203
207
|
if res
|
@@ -212,10 +216,25 @@ module FuPeg
|
|
212
216
|
raise
|
213
217
|
end
|
214
218
|
|
215
|
-
def
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
+
def look_ahead(positive, lit = nil, &block)
|
220
|
+
if block
|
221
|
+
p, f = @scan.pos, @failed
|
222
|
+
r = yield
|
223
|
+
@scan.pos = p
|
224
|
+
if positive ? r : !r
|
225
|
+
@failed = f
|
226
|
+
true
|
227
|
+
else
|
228
|
+
fail!
|
229
|
+
end
|
230
|
+
else
|
231
|
+
m = @scan.match?(lit)
|
232
|
+
if positive ? m : !m
|
233
|
+
true
|
234
|
+
else
|
235
|
+
fail!(pat: lit)
|
236
|
+
end
|
237
|
+
end
|
219
238
|
end
|
220
239
|
end
|
221
240
|
end
|
data/lib/fupeg/version.rb
CHANGED
data/lib/fupeg.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fupeg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yura Sokolov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "\n Simple backtracing parser, using ruby logical operators for primitive
|
14
14
|
sequence/choice\n and slim wrappers for other PEG style operators and backtrace.\n
|
@@ -26,7 +26,9 @@ files:
|
|
26
26
|
- LICENSE.txt
|
27
27
|
- README.md
|
28
28
|
- Rakefile
|
29
|
+
- examples/calc.rb
|
29
30
|
- lib/fupeg.rb
|
31
|
+
- lib/fupeg/grammar.rb
|
30
32
|
- lib/fupeg/parser.rb
|
31
33
|
- lib/fupeg/version.rb
|
32
34
|
- sig/fupeg.rbs
|