fupeg 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/README.md +93 -5
- data/examples/calc.rb +69 -0
- data/lib/fupeg/grammar.rb +156 -0
- data/lib/fupeg/parser.rb +130 -111
- data/lib/fupeg/version.rb +1 -1
- data/lib/fupeg.rb +1 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c08b3d48fd87bd8fc04611663270f967afdf9bab0e0e5ce9c63b38c9f00332b6
|
4
|
+
data.tar.gz: 93308247a0fb582957862a4c0e274a3c646c1a7cba7ffc9acade0af1f5d6d250
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42973a5d22dd77f267a94d6ec65823ab891a2d63983dc548a4818b8b77ca24f5138754e08ae1a9b9b3e42a8791215f0c9b0f546e2397729a80dcb5fbb8929447
|
7
|
+
data.tar.gz: 5b99feff0f26acdac764194409f2b2d137a5dc6e16a6122e78cefda3647c680944b847112e135df32b9a27cd0c50652e3c6f32387713906c0fcb4938a8712c85
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,36 @@
|
|
1
|
-
## [0.
|
1
|
+
## [0.3.0] - 2023-08-28
|
2
|
+
|
3
|
+
- Grammar.use_gram - to simplier grammar inclusion
|
4
|
+
- Grammar.proxy - to proxy rules to included grammar
|
5
|
+
- `_(pat)` doesn't return matched text, use `txt(pat)` instead
|
6
|
+
- "\`" is specialized for tokens
|
7
|
+
-- token is either ident (which is specified with `ident_only` method), or symbols,
|
8
|
+
-- `token_sp?` is skipped after token
|
9
|
+
- fixes for position calculation
|
10
|
+
|
11
|
+
## [0.2.0] - 2023-08-15
|
12
|
+
|
13
|
+
- Split Parser and Grammar
|
14
|
+
- Use `_` for both literals and sequence:
|
15
|
+
`_("x")` , `_(/x/)`, `_{ _("x") }`
|
16
|
+
- Use backtick "\`" for string literals
|
17
|
+
`x`
|
18
|
+
- `cont?` used with block to detect uncutted alternative
|
19
|
+
```ruby
|
20
|
+
cut {
|
21
|
+
# condition
|
22
|
+
_ { `if` && cut! && ... } ||
|
23
|
+
# loop
|
24
|
+
cont? { `while` && cut! && ...} ||
|
25
|
+
# assignment
|
26
|
+
cont? { (i = ident) && sp? && `=` && cut! && ... } ||
|
27
|
+
# function call
|
28
|
+
cont? { (i = ident) && sp? && `(` && cut! && ... } ||
|
29
|
+
...
|
30
|
+
}
|
31
|
+
```
|
32
|
+
|
33
|
+
## [0.1.0] - 2023-08-14
|
2
34
|
|
3
35
|
- Initial release
|
4
36
|
- Simplest rule definition in Ruby code without magic
|
data/README.md
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
-
# Fupeg
|
1
|
+
# Fupeg - simplest parser combinator
|
2
2
|
|
3
|
-
|
3
|
+
PEG like parser combinator as simple as possible, but still useful.
|
4
|
+
- backtracking, manually specified by user.
|
5
|
+
- no memoization (yet).
|
6
|
+
- no left recursion (yet).
|
7
|
+
- built with StringScanner.
|
8
|
+
- pattern sequences and alteration are implemented with logical operators.
|
4
9
|
|
5
|
-
|
10
|
+
Grammar code is pure-ruby and is executed as it is written.
|
11
|
+
No grammar tree is built and evaluated.
|
12
|
+
|
13
|
+
As bonus, "cut" operator is implemented.
|
6
14
|
|
7
15
|
## Installation
|
8
16
|
|
@@ -16,7 +24,87 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
16
24
|
|
17
25
|
## Usage
|
18
26
|
|
19
|
-
|
27
|
+
First you should define grammar:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require "fupeg"
|
31
|
+
|
32
|
+
class Calc < FuPeg::Grammar
|
33
|
+
def eof
|
34
|
+
wont! { dot } && :eof
|
35
|
+
end
|
36
|
+
|
37
|
+
def lnsp?
|
38
|
+
# match regular expression
|
39
|
+
_(/[ \t]*/)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Ruby 3.0 flavour
|
43
|
+
def sp? = _(/\s*/)
|
44
|
+
|
45
|
+
def number = (n = _(/\d+/)) && [:num, n]
|
46
|
+
|
47
|
+
def atom
|
48
|
+
# match raw string: _("(") is aliased to `(`
|
49
|
+
#
|
50
|
+
# match sequence of patterns with backtracking:
|
51
|
+
# `_{ x && y && z }` will rewind position, if block returns `nil` or `false`
|
52
|
+
#
|
53
|
+
# store value, returned by subpattern: just stor it into variable
|
54
|
+
#
|
55
|
+
# use `||` for alternatives
|
56
|
+
number || _ { _("(") && sp? && (sub = sum) && sp? && `)` && [:sub, sub] }
|
57
|
+
end
|
58
|
+
|
59
|
+
def fact
|
60
|
+
# repetition returns array of block results
|
61
|
+
# it stops if block returns falsey (`nil` or `false`)
|
62
|
+
rep { |fst| # fst == true for first element
|
63
|
+
op = nil
|
64
|
+
# don't expect operator before first term
|
65
|
+
(fst || (op = `*` || _("/") || _(/%/)) && sp?) &&
|
66
|
+
(a = atom) && lnsp? &&
|
67
|
+
[op, a].compact
|
68
|
+
# flat AST tree, returns [:fact, at, op, at, op, at, op] if matched
|
69
|
+
}&.flatten(1)&.unshift(:fact)
|
70
|
+
end
|
71
|
+
|
72
|
+
def sum
|
73
|
+
_ {
|
74
|
+
op = rest = nil
|
75
|
+
(f = fact) &&
|
76
|
+
# optional matches pattern always succeed
|
77
|
+
opt { lnsp? && (op = `+` || `-`) && sp? && (rest = sum) } &&
|
78
|
+
# recursive AST tree
|
79
|
+
(rest ? [:sum, f, op, rest] : f)
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def root
|
84
|
+
_ { sum || eof }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
```
|
88
|
+
|
89
|
+
Then either parse string directly, or create parser and grammar:
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
# Direct parsing
|
93
|
+
pp Calc.parse(:root, "1")
|
94
|
+
pp Calc.parse(:root, "1 + 2")
|
95
|
+
|
96
|
+
# separate parser and grammar initialization
|
97
|
+
parser = FuPeg::Parser.new("1 - 2*4/7 + 5")
|
98
|
+
grammar = Calc.new(parser)
|
99
|
+
pp grammar.root
|
100
|
+
|
101
|
+
# combined parser and grammar initialization
|
102
|
+
_parser, grammar = Calc.create("(1 -
|
103
|
+
2)*
|
104
|
+
(4 -10) +
|
105
|
+
11")
|
106
|
+
pp grammar.root
|
107
|
+
```
|
20
108
|
|
21
109
|
## Development
|
22
110
|
|
@@ -26,7 +114,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
26
114
|
|
27
115
|
## Contributing
|
28
116
|
|
29
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
117
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/funny-falcon/fupeg .
|
30
118
|
|
31
119
|
## License
|
32
120
|
|
data/examples/calc.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require "fupeg"
|
2
|
+
|
3
|
+
class Calc < FuPeg::Grammar
|
4
|
+
def eof
|
5
|
+
wont! { dot } && :eof
|
6
|
+
end
|
7
|
+
|
8
|
+
def lnsp?
|
9
|
+
# match regular expression
|
10
|
+
_(/[ \t]*/)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Ruby 3.0 flavour
|
14
|
+
def sp? = _(/\s*/)
|
15
|
+
|
16
|
+
def number = (n = _(/\d+/)) && [:num, n]
|
17
|
+
|
18
|
+
def atom
|
19
|
+
# match raw string: _("(") is aliased to `(`
|
20
|
+
#
|
21
|
+
# match sequence of patterns with backtracking:
|
22
|
+
# `_{ x && y && z }` will rewind position, if block returns `nil` or `false`
|
23
|
+
#
|
24
|
+
# store value, returned by subpattern: just stor it into variable
|
25
|
+
number || _ { _("(") && sp? && (sub = sum) && sp? && `)` && [:sub, sub] }
|
26
|
+
end
|
27
|
+
|
28
|
+
def fact
|
29
|
+
# repetition returns array of block results
|
30
|
+
# it stops if block returns falsey (`nil` or `false`)
|
31
|
+
rep { |fst| # fst == true for first element
|
32
|
+
op = nil
|
33
|
+
(fst || (op = `*` || `/` || "%") && sp?) &&
|
34
|
+
(a = atom) && lnsp? &&
|
35
|
+
[op, a].compact
|
36
|
+
# flat AST tree, returns [:fact, at, op, at, op, at, op] if matched
|
37
|
+
}&.flatten(1)&.unshift(:fact)
|
38
|
+
end
|
39
|
+
|
40
|
+
def sum
|
41
|
+
_ {
|
42
|
+
op = rest = nil
|
43
|
+
(f = fact) &&
|
44
|
+
# optional matches pattern always succeed
|
45
|
+
opt { lnsp? && (op = `+` || `-`) && sp? && (rest = sum) } &&
|
46
|
+
# recursive AST tree
|
47
|
+
(rest ? [:sum, f, op, rest] : f)
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def root
|
52
|
+
_ { sum || eof }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
pp Calc.parse(:root, "1")
|
57
|
+
pp Calc.parse(:root, "1 + 2")
|
58
|
+
|
59
|
+
# separate parser and grammar initialization
|
60
|
+
parser = FuPeg::Parser.new("1 - 2*4/7 + 5")
|
61
|
+
grammar = Calc.new(parser)
|
62
|
+
pp grammar.root
|
63
|
+
|
64
|
+
# combined parser and grammar initialization
|
65
|
+
_parser, grammar = Calc.create("(1 -
|
66
|
+
2)*
|
67
|
+
(4 -10) +
|
68
|
+
11")
|
69
|
+
pp grammar.root
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "parser"
|
4
|
+
|
5
|
+
module FuPeg
|
6
|
+
class Grammar
|
7
|
+
def self.create(str, pos = 0)
|
8
|
+
parser = Parser.new(str, pos)
|
9
|
+
grammar = new(parser)
|
10
|
+
[parser, grammar]
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse(root, str)
|
14
|
+
_, gr = create(str)
|
15
|
+
gr.__send__(root)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.use_gram(gram, *, as: nil)
|
19
|
+
if as.nil?
|
20
|
+
name = gram.name[/\w+$/]
|
21
|
+
name = name.gsub(/(?<!^)(?=[A-Z](?![A-Z\d_]))/, "_").downcase
|
22
|
+
as = :"@#{name}"
|
23
|
+
elsif !as.start_with?("@")
|
24
|
+
as = :"@#{as}"
|
25
|
+
end
|
26
|
+
@used_grams ||= {}
|
27
|
+
@used_grams[as] = gram
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.proxy(*meths, to:)
|
31
|
+
meths.each do |meth|
|
32
|
+
define_method(meth) { |*args, &block|
|
33
|
+
instance_variable_get(to).__send__(meth, *args, &block)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.used_grams
|
39
|
+
@used_grams&.dup || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(parser)
|
43
|
+
@p = parser
|
44
|
+
self.class.used_grams.each do |iv, v|
|
45
|
+
instance_variable_set(iv, v.new(parser))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fail!(pat: nil)
|
50
|
+
@p.fail!(pat: pat, skip: 3)
|
51
|
+
end
|
52
|
+
|
53
|
+
def dot
|
54
|
+
@p.dot
|
55
|
+
end
|
56
|
+
|
57
|
+
def _(lit = nil, &block)
|
58
|
+
@p.match(lit, &block)
|
59
|
+
end
|
60
|
+
|
61
|
+
def opt(arg = nil, &block)
|
62
|
+
@p.match(arg, &block) || true
|
63
|
+
end
|
64
|
+
|
65
|
+
def will?(lit = nil, &block)
|
66
|
+
@p.look_ahead(true, lit, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
def wont!(lit = nil, &block)
|
70
|
+
@p.look_ahead(false, lit, &block)
|
71
|
+
end
|
72
|
+
|
73
|
+
def txt(lit = nil, &block)
|
74
|
+
@p.text(lit, &block)
|
75
|
+
end
|
76
|
+
|
77
|
+
def cut(&block)
|
78
|
+
@p.with_cut_point(&block)
|
79
|
+
end
|
80
|
+
|
81
|
+
def cut!
|
82
|
+
@p.current_cutpoint.cut!
|
83
|
+
end
|
84
|
+
|
85
|
+
def cont?(&block)
|
86
|
+
@p.current_cutpoint.can_continue? && (block ? @p.backtrack(&block) : true)
|
87
|
+
end
|
88
|
+
|
89
|
+
def rep(range = 0.., lit = nil, &block)
|
90
|
+
@p.repetition(range, lit, &block)
|
91
|
+
end
|
92
|
+
|
93
|
+
# specialized matchers
|
94
|
+
|
95
|
+
def eof
|
96
|
+
@p.eof? && :eof
|
97
|
+
end
|
98
|
+
|
99
|
+
def nl
|
100
|
+
_(/\r\n|\r|\n/)
|
101
|
+
end
|
102
|
+
|
103
|
+
def eol
|
104
|
+
_ { lnsp? && nl && :eol }
|
105
|
+
end
|
106
|
+
|
107
|
+
def lnsp?
|
108
|
+
_(/[ \t]*/)
|
109
|
+
end
|
110
|
+
|
111
|
+
def lnsp!
|
112
|
+
_(/[ \t]+/)
|
113
|
+
end
|
114
|
+
|
115
|
+
def sp!
|
116
|
+
_(/\s+/)
|
117
|
+
end
|
118
|
+
|
119
|
+
def sp?
|
120
|
+
_(/\s*/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def ident
|
124
|
+
(w = ident_only) && token_sp? && w
|
125
|
+
end
|
126
|
+
|
127
|
+
# raw token match
|
128
|
+
# if token is ident, then exact match performed with whole next ident
|
129
|
+
# else only string match
|
130
|
+
# and then whitespace is consumed
|
131
|
+
def `(token)
|
132
|
+
@p.match {
|
133
|
+
if _is_ident?(token)
|
134
|
+
_{ ident_only == token } || fail!(pat: token)
|
135
|
+
else
|
136
|
+
@p.match(token)
|
137
|
+
end && token_sp? && token
|
138
|
+
}
|
139
|
+
end
|
140
|
+
|
141
|
+
def _is_ident?(tok)
|
142
|
+
@_is_ident ||= Hash.new { |h, k|
|
143
|
+
h[k] = self.class.parse(:ident_only, k) == k
|
144
|
+
}
|
145
|
+
@_is_ident[tok]
|
146
|
+
end
|
147
|
+
|
148
|
+
def ident_only
|
149
|
+
txt(/[a-zA-Z_]\w*/)
|
150
|
+
end
|
151
|
+
|
152
|
+
def token_sp?
|
153
|
+
_(/\s*/)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
data/lib/fupeg/parser.rb
CHANGED
@@ -4,6 +4,11 @@ require "strscan"
|
|
4
4
|
|
5
5
|
module FuPeg
|
6
6
|
class Parser
|
7
|
+
attr_accessor :debug
|
8
|
+
attr_accessor :file
|
9
|
+
attr_reader :failed
|
10
|
+
attr_reader :str
|
11
|
+
|
7
12
|
def initialize(str, pos = 0)
|
8
13
|
reset!(str, pos)
|
9
14
|
end
|
@@ -19,40 +24,56 @@ module FuPeg
|
|
19
24
|
@scan.pos = pos
|
20
25
|
end
|
21
26
|
@failed = nil
|
27
|
+
@debug = false
|
22
28
|
@cut = CutPoint.new
|
23
29
|
end
|
24
30
|
|
25
|
-
attr_reader :failed
|
26
|
-
|
27
31
|
def bytepos
|
28
32
|
@scan.pos
|
29
33
|
end
|
30
34
|
|
31
|
-
def charpos
|
32
|
-
@str_size - @str.byteslice(
|
35
|
+
def charpos(pos = @scan.pos)
|
36
|
+
@str_size - @str.byteslice(pos..).size
|
33
37
|
end
|
34
38
|
|
35
|
-
Fail = Struct.new(:stack, :
|
39
|
+
Fail = Struct.new(:stack, :bytepos, :pattern)
|
36
40
|
|
37
|
-
def fail!(skip
|
38
|
-
if !@failed || bytepos > @failed.bytepos
|
41
|
+
def fail!(*, pat: nil, skip: 2)
|
42
|
+
if debug || !@failed || bytepos > @failed.bytepos
|
39
43
|
stack = caller_locations(skip)
|
40
44
|
stack.delete_if do |loc|
|
41
|
-
|
42
|
-
|
45
|
+
path = loc.path
|
46
|
+
if path == __FILE__
|
47
|
+
true
|
48
|
+
elsif path.start_with?(__dir__)
|
49
|
+
loc.label =~ /\b(backtrack|each|block)\b/
|
43
50
|
end
|
44
51
|
end
|
45
|
-
|
46
|
-
|
52
|
+
@failed = Fail.new(stack, bytepos, pat)
|
53
|
+
report_failed($stderr) if debug
|
47
54
|
end
|
48
55
|
nil
|
49
56
|
end
|
50
57
|
|
58
|
+
def failed_position
|
59
|
+
position(bytepos: @failed.bytepos)
|
60
|
+
end
|
61
|
+
|
51
62
|
def report_failed(out)
|
52
|
-
pos = @failed.
|
53
|
-
out <<
|
54
|
-
|
55
|
-
|
63
|
+
pos = position(bytepos: @failed.bytepos)
|
64
|
+
out << if @failed.pattern
|
65
|
+
"Failed #{failed.pattern.inspect} at #{pos.lineno}:#{pos.colno}"
|
66
|
+
else
|
67
|
+
"Failed at #{pos.lineno}:#{pos.colno}"
|
68
|
+
end
|
69
|
+
if @file
|
70
|
+
out << " of #{@file}"
|
71
|
+
end
|
72
|
+
out << ":\n"
|
73
|
+
out << pos.line.chomp + "\n"
|
74
|
+
curpos = pos.line[...pos.colno].gsub("\t", " " * 8).size
|
75
|
+
curpos = 1 if curpos == 0 && @failed.bytepos == @str.bytesize
|
76
|
+
out << (" " * (curpos - 1) + "^\n")
|
56
77
|
out << "Call stack:\n"
|
57
78
|
@failed.stack.each do |loc|
|
58
79
|
out << "#{loc.path}:#{loc.lineno} in #{loc.label}\n"
|
@@ -60,75 +81,11 @@ module FuPeg
|
|
60
81
|
out
|
61
82
|
end
|
62
83
|
|
63
|
-
def dot
|
64
|
-
@scan.scan(/./m) || fail!
|
65
|
-
end
|
66
|
-
|
67
|
-
begin
|
68
|
-
StringScanner.new("x").skip("x")
|
69
|
-
def lit(reg_or_str)
|
70
|
-
@scan.scan(reg_or_str) || fail!
|
71
|
-
end
|
72
|
-
rescue
|
73
|
-
def lit(reg_or_str)
|
74
|
-
if String === reg_or_str
|
75
|
-
@__match_lit_cache ||= Hash.new { |h, s| h[s] = Regexp.new(Regexp.escape(s)) }
|
76
|
-
reg_or_str = @__match_lit_cache[reg_or_str]
|
77
|
-
end
|
78
|
-
@scan.scan(reg_or_str) || fail!
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def seq(*args, &block)
|
83
|
-
_bt(&block)
|
84
|
-
end
|
85
|
-
|
86
|
-
def opt(&block)
|
87
|
-
_rewind(nil, @failed, _bt(&block) || true)
|
88
|
-
end
|
89
|
-
|
90
|
-
def rep(range = 0.., &block)
|
91
|
-
range = range..range if Integer === range
|
92
|
-
range = 0..range.max if range.begin.nil?
|
93
|
-
unless Integer === range.min && (range.end.nil? || Integer === range.max)
|
94
|
-
raise "Range malformed #{range}"
|
95
|
-
end
|
96
|
-
_bt do
|
97
|
-
max = range.end && range.max
|
98
|
-
ar = []
|
99
|
-
(1..max).each do
|
100
|
-
res = _bt(&block)
|
101
|
-
break unless res
|
102
|
-
ar << res
|
103
|
-
end
|
104
|
-
(ar.size >= range.min) ? ar : fail!
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def text(&block)
|
109
|
-
pos = @scan.pos
|
110
|
-
_bt(&block) && @str.byteslice(pos, @scan.pos - pos)
|
111
|
-
end
|
112
|
-
|
113
|
-
def will?(&block)
|
114
|
-
_rewind(@scan.pos, false, _bt(&block))
|
115
|
-
end
|
116
|
-
|
117
|
-
def wont!(&block)
|
118
|
-
_rewind(@scan.pos, @failed, !_bt(&block)) || fail!
|
119
|
-
end
|
120
|
-
|
121
|
-
# cut point handling
|
122
|
-
# cut do
|
123
|
-
# seq { lit("{") && cut! && lit("}") } ||
|
124
|
-
# !cut? && seq { lit("[") && cut! && lit("]") } ||
|
125
|
-
# !cut? && dot
|
126
|
-
# end
|
127
84
|
class CutPoint
|
128
85
|
attr_accessor :next
|
129
86
|
|
130
87
|
def initialize
|
131
|
-
@cut =
|
88
|
+
@cut = nil
|
132
89
|
@next = nil
|
133
90
|
end
|
134
91
|
|
@@ -137,13 +94,13 @@ module FuPeg
|
|
137
94
|
@cut = true
|
138
95
|
end
|
139
96
|
|
140
|
-
def
|
141
|
-
@cut
|
97
|
+
def can_continue?
|
98
|
+
@cut ? nil : true
|
142
99
|
end
|
143
100
|
end
|
144
101
|
|
145
|
-
# for use with cut! and
|
146
|
-
def
|
102
|
+
# for use with cut! and cont?
|
103
|
+
def with_cut_point
|
147
104
|
prev_cut = @cut
|
148
105
|
@cut = CutPoint.new
|
149
106
|
prev_cut.next = @cut
|
@@ -153,51 +110,98 @@ module FuPeg
|
|
153
110
|
@cut = prev_cut
|
154
111
|
end
|
155
112
|
|
156
|
-
def
|
157
|
-
@cut
|
158
|
-
end
|
159
|
-
|
160
|
-
def cut?
|
161
|
-
@cut.cut?
|
113
|
+
def current_cutpoint
|
114
|
+
@cut
|
162
115
|
end
|
163
116
|
|
164
117
|
# Position handling for failures
|
165
118
|
|
166
119
|
Position = Struct.new(:lineno, :colno, :line, :charpos)
|
167
120
|
|
168
|
-
private
|
169
|
-
|
170
121
|
def init_line_ends
|
171
122
|
@line_ends = [-1]
|
172
|
-
|
173
|
-
while
|
174
|
-
@line_ends <<
|
175
|
-
pos += 1
|
123
|
+
scan = StringScanner.new(@str)
|
124
|
+
while scan.skip_until(/\n|\r\n?/)
|
125
|
+
@line_ends << scan.pos - 1
|
176
126
|
end
|
177
|
-
@line_ends << @str.
|
127
|
+
@line_ends << @str.bytesize
|
178
128
|
end
|
179
129
|
|
180
|
-
|
181
|
-
|
182
|
-
def position_for_charpos(charpos)
|
183
|
-
lineno = @line_ends.bsearch_index { |x| x >= charpos }
|
130
|
+
def position(bytepos: @scan.pos)
|
131
|
+
lineno = @line_ends.bsearch_index { |x| x >= bytepos }
|
184
132
|
case lineno
|
185
133
|
when nil
|
186
|
-
raise "Position #{
|
134
|
+
raise "Position #{bytepos} is larger than string byte size #{@str.bytesize}"
|
187
135
|
else
|
188
136
|
prev_end = @line_ends[lineno - 1]
|
189
137
|
line_start = prev_end + 1
|
190
|
-
column =
|
138
|
+
column = @str.byteslice(line_start, bytepos - prev_end).size
|
191
139
|
end
|
192
|
-
|
193
|
-
|
140
|
+
if bytepos == @str.bytesize
|
141
|
+
if @str[-1] == "\n"
|
142
|
+
lineno, column = lineno + 1, 1
|
143
|
+
else
|
144
|
+
column += 1
|
145
|
+
end
|
146
|
+
end
|
147
|
+
line = @str.byteslice(line_start..@line_ends[lineno])
|
148
|
+
Position.new(lineno, column, line, charpos(bytepos))
|
194
149
|
end
|
195
150
|
|
196
151
|
# helper methods
|
197
152
|
|
198
|
-
|
153
|
+
begin
|
154
|
+
StringScanner.new("x").skip("x")
|
155
|
+
def match(lit = nil, &block)
|
156
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
157
|
+
end
|
158
|
+
rescue
|
159
|
+
def match(lit = nil, &block)
|
160
|
+
if String === lit
|
161
|
+
@_lit_cache ||= {}
|
162
|
+
lit = @_lit_cache[lit] ||= Regexp.new(Regexp.escape(lit))
|
163
|
+
end
|
164
|
+
block ? backtrack(&block) : (!lit || @scan.skip(lit) && true || fail!(pat: lit))
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def text(lit = nil, &block)
|
169
|
+
pos = @scan.pos
|
170
|
+
match(lit, &block) && @str.byteslice(pos, @scan.pos - pos)
|
171
|
+
end
|
199
172
|
|
200
|
-
def
|
173
|
+
def bounds(lit = nil, &block)
|
174
|
+
pos = @scan.pos
|
175
|
+
match(lit, &block) && pos...@scan.pos
|
176
|
+
end
|
177
|
+
|
178
|
+
def repetition(range = 0.., lit = nil, &block)
|
179
|
+
range = range..range if Integer === range
|
180
|
+
range = 0..range.max if range.begin.nil?
|
181
|
+
unless Integer === range.min && (range.end.nil? || Integer === range.max)
|
182
|
+
raise "Range malformed #{range}"
|
183
|
+
end
|
184
|
+
backtrack do
|
185
|
+
max = range.end && range.max
|
186
|
+
ar = []
|
187
|
+
(1..max).each do |i|
|
188
|
+
res = backtrack { yield i == 1 }
|
189
|
+
break unless res
|
190
|
+
ar << res
|
191
|
+
end
|
192
|
+
(ar.size >= range.min) ? ar : fail!
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def dot
|
197
|
+
match(/./m)
|
198
|
+
end
|
199
|
+
|
200
|
+
def eof?
|
201
|
+
@scan.eos?
|
202
|
+
end
|
203
|
+
|
204
|
+
def backtrack
|
201
205
|
pos = @scan.pos
|
202
206
|
res = yield
|
203
207
|
if res
|
@@ -212,10 +216,25 @@ module FuPeg
|
|
212
216
|
raise
|
213
217
|
end
|
214
218
|
|
215
|
-
def
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
+
def look_ahead(positive, lit = nil, &block)
|
220
|
+
if block
|
221
|
+
p, f = @scan.pos, @failed
|
222
|
+
r = yield
|
223
|
+
@scan.pos = p
|
224
|
+
if positive ? r : !r
|
225
|
+
@failed = f
|
226
|
+
true
|
227
|
+
else
|
228
|
+
fail!
|
229
|
+
end
|
230
|
+
else
|
231
|
+
m = @scan.match?(lit)
|
232
|
+
if positive ? m : !m
|
233
|
+
true
|
234
|
+
else
|
235
|
+
fail!(pat: lit)
|
236
|
+
end
|
237
|
+
end
|
219
238
|
end
|
220
239
|
end
|
221
240
|
end
|
data/lib/fupeg/version.rb
CHANGED
data/lib/fupeg.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fupeg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yura Sokolov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "\n Simple backtracing parser, using ruby logical operators for primitive
|
14
14
|
sequence/choice\n and slim wrappers for other PEG style operators and backtrace.\n
|
@@ -26,7 +26,9 @@ files:
|
|
26
26
|
- LICENSE.txt
|
27
27
|
- README.md
|
28
28
|
- Rakefile
|
29
|
+
- examples/calc.rb
|
29
30
|
- lib/fupeg.rb
|
31
|
+
- lib/fupeg/grammar.rb
|
30
32
|
- lib/fupeg/parser.rb
|
31
33
|
- lib/fupeg/version.rb
|
32
34
|
- sig/fupeg.rbs
|