l43_peg 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +223 -63
- data/lib/l43_peg/combinators/debug_parser.rb +79 -0
- data/lib/l43_peg/combinators/sel.rb +24 -0
- data/lib/l43_peg/combinators.rb +30 -1
- data/lib/l43_peg/failure.rb +2 -0
- data/lib/l43_peg/input.rb +16 -0
- data/lib/l43_peg/parser.rb +3 -1
- data/lib/l43_peg/parsers/rgx_parser.rb +4 -0
- data/lib/l43_peg/parsers/rgx_tokenizer.rb +24 -0
- data/lib/l43_peg/parsers/sel_rgx_parser.rb +25 -0
- data/lib/l43_peg/parsers.rb +6 -5
- data/lib/l43_peg/success.rb +8 -1
- data/lib/l43_peg/tokens.rb +15 -0
- data/lib/l43_peg.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d386b9373423da69f7697e81bd02a011df1c36cc20ff6c861b3215a2081dea9
|
4
|
+
data.tar.gz: b82ba81e4372717f6603b6b41311d1ff2f608ce4a9cc7376c3242a65e3842432
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 265f550e6e956e85c5b0a39a5817ab13424e57d0da7d44ff9b946c318392421955292d7462b24a4a744c2f8361af10b627204800408fa500500c6779d359314b
|
7
|
+
data.tar.gz: 54eb3d55de2704d91c31d9fd31db3931fab1c5f614a005e11ca3efcafb854cf964f8b6a3e26bc43d93005065bc42dcdd517b6ef3d3284c3d205f93f7c9ccf466
|
data/README.md
CHANGED
@@ -6,91 +6,91 @@
|
|
6
6
|
|
7
7
|
### This Version (v0.1.x) is Alpha Quality (many PEG features are missing, like recursion and even alternatives.
|
8
8
|
|
9
|
-
It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
|
10
|
-
[speculations](https://rubygems.org/gems/speculate_about)
|
9
|
+
It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
|
10
|
+
[speculations](https://rubygems.org/gems/speculate_about)
|
11
11
|
|
12
|
-
See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
|
12
|
+
See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
|
13
13
|
|
14
14
|
### Context: `arg_parser`
|
15
15
|
|
16
16
|
Given the following argument specification
|
17
17
|
|
18
18
|
```ruby
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
include L43Peg::Combinators
|
20
|
+
let :args_spec do
|
21
|
+
{
|
22
|
+
start: "--start=(.*)",
|
23
|
+
end: "(?:--end|-e)=(.*)",
|
24
|
+
kwd: "--(alpha|beta|gamma)"
|
25
|
+
}
|
26
|
+
end
|
27
27
|
```
|
28
28
|
|
29
29
|
And the assoicated parser
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
|
32
|
+
let(:parser) { args_parser(args_spec) }
|
33
33
|
```
|
34
34
|
|
35
35
|
Then we can parse some input
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
|
38
|
+
assert_parse_success(parser, %w[--start=42 --beta -e=44], ast: {start: "42", kwd: "beta", end: "44"}, rest: [])
|
39
39
|
```
|
40
40
|
|
41
41
|
And we can get the rest in a list of tokens
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
|
44
|
+
assert_parse_success(parser, %w[--start=42 --beta -e=44 -s=not_an_arg --end=too_late], ast: {start: "42", kwd: "beta", end: "44"}, rest: %w[-s=not_an_arg --end=too_late])
|
45
45
|
```
|
46
46
|
|
47
47
|
Also note that multiple values are passed into an array
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
```
|
50
|
+
input = %w[--end=42 --beta -e=44 --beta --end=not_too_late --gamma]
|
51
|
+
ast = {end: %w[42 44 not_too_late], kwd: %w[beta beta gamma]}
|
52
|
+
assert_parse_success(parser, input, ast:, rest: [])
|
53
|
+
```
|
54
54
|
|
55
55
|
#### Context: Postprocessing
|
56
56
|
|
57
|
-
When we map the parser
|
58
|
-
|
59
|
-
```ruby
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
57
|
+
When we map the parser
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
let :int_args do
|
61
|
+
{
|
62
|
+
start: "--start=(.*)",
|
63
|
+
end: "--end=(.*)",
|
64
|
+
inc: "--inc=(.*)"
|
65
|
+
}
|
66
|
+
end
|
67
|
+
let(:int_arg_parser) {args_parser(int_args, name: "int parser", &:to_i)}
|
68
68
|
```
|
69
69
|
|
70
70
|
Then we can convert the string valus
|
71
71
|
|
72
|
-
```ruby
|
73
|
-
|
74
|
-
```
|
72
|
+
```ruby
|
73
|
+
assert_parse_success(int_arg_parser, %w[--start=42 --end=44 --inc=2], ast: {start: 42, end: 44, inc: 2}, rest: [])
|
74
|
+
```
|
75
75
|
|
76
76
|
#### Context: Knowing When To Stop
|
77
77
|
|
78
|
-
An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
|
79
|
-
An exmaple for that is the posix argument `--`
|
78
|
+
An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
|
79
|
+
An exmaple for that is the posix argument `--`
|
80
80
|
|
81
|
-
We can use whatever we want in `args_parser`, here is a variation:
|
81
|
+
We can use whatever we want in `args_parser`, here is a variation:
|
82
82
|
|
83
|
-
Given the specification
|
83
|
+
Given the specification
|
84
84
|
|
85
|
-
```ruby
|
86
|
-
|
87
|
-
|
85
|
+
```ruby
|
86
|
+
let :args do
|
87
|
+
{
|
88
88
|
width: "w:(\\d+)",
|
89
89
|
height: "h:(\\d+)",
|
90
|
-
|
90
|
+
__stop: "(::)"
|
91
91
|
}
|
92
|
-
|
93
|
-
|
92
|
+
end
|
93
|
+
let(:wh_parser) {args_parser(args, stop: :__stop, &:to_i)}
|
94
94
|
```
|
95
95
|
|
96
96
|
Then parsing the following input
|
@@ -99,7 +99,7 @@ Then parsing the following input
|
|
99
99
|
input = %w[h:42 w:73 :: w:74]
|
100
100
|
ast = {height: 42, width: 73}
|
101
101
|
assert_parse_success(wh_parser, input, ast:, rest: %w[w:74])
|
102
|
-
```
|
102
|
+
```
|
103
103
|
|
104
104
|
### Context: User Interface
|
105
105
|
|
@@ -111,45 +111,205 @@ as module methods
|
|
111
111
|
|
112
112
|
Given an _exposed_ `args_parser`
|
113
113
|
```ruby
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
114
|
+
let :parser do
|
115
|
+
L43Peg::Parsers.args_parser(
|
116
|
+
{
|
117
|
+
negative: "(-\\d+)",
|
118
|
+
positive: "\\+?(\\d+)"
|
119
|
+
},
|
120
|
+
&:to_i
|
121
|
+
)
|
122
|
+
end
|
123
123
|
```
|
124
124
|
|
125
125
|
But we are also not interested in the internal representation of success and failure of parsing which was
|
126
126
|
used in the speculations above. Nor do we want to transform our input into the internal representations
|
127
127
|
as was done above by the helpers. (If you need to see the details of this you can inspect the
|
128
|
-
file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
|
129
|
-
|
128
|
+
file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
|
129
|
+
|
130
130
|
Then we can uses the interface of `L43Peg`
|
131
131
|
|
132
132
|
```ruby
|
133
|
-
|
134
|
-
|
133
|
+
L43Peg.parse_tokens(parser, %w[43 -44 +45]) => :ok, result
|
134
|
+
expect(result).to eq(positive: [43, 45], negative: -44)
|
135
135
|
```
|
136
136
|
|
137
137
|
And if we get an error the result is as follows
|
138
138
|
|
139
139
|
```ruby
|
140
|
-
|
141
|
-
|
142
|
-
|
140
|
+
parser = L43Peg::Parsers.char_parser('a')
|
141
|
+
L43Peg.parse_string(parser, 'b') => :error, message
|
142
|
+
expect(message).to eq("char \"b\"")
|
143
|
+
```
|
144
|
+
|
145
|
+
## Context: Regexp Parser
|
146
|
+
|
147
|
+
The basic concept is the `rgx_parser`
|
148
|
+
|
149
|
+
Given a `rgx_parser` for an identifier
|
150
|
+
```ruby
|
151
|
+
include L43Peg::Parsers
|
152
|
+
let(:id_parser) { rgx_parser("[[:alpha:]][_[:alnum:]]*") }
|
153
|
+
```
|
154
|
+
|
155
|
+
Then we can parse strings that start as such
|
156
|
+
```ruby
|
157
|
+
assert_parse_success(id_parser, "l43_peg", ast: "l43_peg")
|
158
|
+
```
|
159
|
+
|
160
|
+
And we can discard some input from the ast with the aid of captures
|
161
|
+
```ruby
|
162
|
+
sym_parser = rgx_parser(":([[:alpha:]][_[:alnum:]]*)")
|
163
|
+
assert_parse_success(sym_parser, ":no_colon", ast: "no_colon")
|
164
|
+
```
|
165
|
+
|
166
|
+
But it can also fail
|
167
|
+
```ruby
|
168
|
+
reason = "input does not match /\\A[[:alpha:]][_[:alnum:]]*/ (in rgx_parser(\"[[:alpha:]][_[:alnum:]]*\"))"
|
169
|
+
assert_parse_failure(id_parser, "42", reason:)
|
170
|
+
```
|
171
|
+
|
172
|
+
#### Context: Warnings on empty matches
|
173
|
+
|
174
|
+
Oftentimes bugs in PEG parsing are caused by zero width matches, while this is quite obvious with the `many` and
|
175
|
+
`opt` or `maybe` combinators (**N.B.** they are not yet implemented, use `many(max: 1)` instead)
|
176
|
+
and they common use patterns with these combinators are safe.
|
177
|
+
|
178
|
+
However regular expression parsing might hide zero width matches, and that's whey they will trigger a warning by default
|
179
|
+
|
180
|
+
Given an empty match rgex parser
|
181
|
+
```ruby
|
182
|
+
let(:empty_parser) { rgx_parser("a*") }
|
183
|
+
```
|
184
|
+
|
185
|
+
Then we get a warning when matching an empty string
|
186
|
+
```ruby
|
187
|
+
expect { assert_parse_success(empty_parser, "", ast: "") }
|
188
|
+
.to output("Warning, parser rgx_parser(\"a*\") succeeds with empty match\n").to_stderr
|
189
|
+
```
|
190
|
+
|
191
|
+
However this behavior can also be disabled
|
192
|
+
|
193
|
+
And therefore
|
194
|
+
```ruby
|
195
|
+
parser = rgx_parser("a*", warn_on_empty: false)
|
196
|
+
expect { assert_parse_success(parser, "", ast: "") }
|
197
|
+
.not_to output.to_stderr
|
198
|
+
```
|
199
|
+
|
200
|
+
|
201
|
+
### Context: Tokenize Strings with Regexen
|
202
|
+
|
203
|
+
Now we can use a list of `rgx_parsers` to _tokenize_ a string (in the same way can use `tokens_parser` to
|
204
|
+
quantify elements of an array, but with dynamic bounds)
|
205
|
+
|
206
|
+
Given some regexen
|
207
|
+
```ruby
|
208
|
+
let :regexen do
|
209
|
+
[
|
210
|
+
[:verb, "<<", nil, ->(*){ [:verb, "<"] }],
|
211
|
+
[:verb, "\\$(\\$)"],
|
212
|
+
[:color_and_style, "<(.+?),(.+?)>", :all],
|
213
|
+
[:color, "<(.+?)>", 1],
|
214
|
+
[:reset, "\\$"],
|
215
|
+
[:verb, "[^<$]+"],
|
216
|
+
]
|
217
|
+
end
|
218
|
+
let(:tokenizer) { L43Peg::Combinators.rgx_tokenize(regexen) }
|
219
|
+
```
|
220
|
+
|
221
|
+
Then we can tokenize some inputs
|
222
|
+
```ruby
|
223
|
+
input = "<red,bold>HELLO$and<<<green>$$<reset>"
|
224
|
+
ast = [
|
225
|
+
[:color_and_style, ["<red,bold>", "red", "bold"]],
|
226
|
+
[:verb, "HELLO"],
|
227
|
+
[:reset, "$"],
|
228
|
+
[:verb, "and"],
|
229
|
+
[:verb, "<"],
|
230
|
+
[:color, "green"],
|
231
|
+
[:verb, "$"],
|
232
|
+
[:color, "reset"]
|
233
|
+
]
|
234
|
+
assert_parse_success(tokenizer, input, ast:)
|
235
|
+
```
|
236
|
+
|
237
|
+
### Context: Debugging
|
238
|
+
|
239
|
+
As parsers are by design imbricated functions debugging is not always simple.
|
240
|
+
Enter the `debug_parser`, a parser that _debugs_ parsers by not changing their behavior
|
241
|
+
by displaying more or less detailed information
|
242
|
+
|
243
|
+
Given a parser
|
244
|
+
```ruby
|
245
|
+
include L43Peg::Combinators
|
246
|
+
let :args do
|
247
|
+
{
|
248
|
+
lat: "lat:(\\d+)",
|
249
|
+
long: "long:(\\d+)",
|
250
|
+
}
|
251
|
+
end
|
252
|
+
let(:geo_parser) {args_parser(args, &:to_i)}
|
253
|
+
```
|
254
|
+
|
255
|
+
#### Context: Minimum level of information
|
256
|
+
|
257
|
+
Given a minum debug parser
|
258
|
+
```ruby
|
259
|
+
let(:debugger) {debug_parser(geo_parser, level: :min)}
|
260
|
+
```
|
261
|
+
|
262
|
+
Then we will get some output
|
263
|
+
```ruby
|
264
|
+
expected =
|
265
|
+
"Tokens<[\"lat:43\", \"long:2\"]>\nSuccess: @1\n"
|
266
|
+
expect { parsed_success(debugger, ["lat:43", "long:2"]) }
|
267
|
+
.to output(expected).to_stderr
|
268
|
+
```
|
269
|
+
|
270
|
+
#### Context: Default level of information
|
271
|
+
Given a default debug parser
|
272
|
+
```ruby
|
273
|
+
let(:debugger) {debug_parser(char_parser("a"))}
|
274
|
+
```
|
275
|
+
|
276
|
+
Then we will get some output on errors
|
277
|
+
```ruby
|
278
|
+
expected ="Input<\"b\"@1:1>\nFailure: char \"b\" @[1, 1]\n"
|
279
|
+
expect { parsed_failure(debugger, "b") }
|
280
|
+
.to output(expected).to_stderr
|
281
|
+
```
|
282
|
+
|
283
|
+
#### Context: Maximum level of information
|
284
|
+
Given a maxium level parser
|
285
|
+
```ruby
|
286
|
+
let(:max_debugger) { debug_parser(char_parser("b"), level: :max) }
|
287
|
+
```
|
288
|
+
|
289
|
+
Then we will get this output on errors
|
290
|
+
```ruby
|
291
|
+
expected =
|
292
|
+
[
|
293
|
+
"================================================================================",
|
294
|
+
'Input<col:1 input:"bc" lnb:1 context:{}>',
|
295
|
+
"================================================================================",
|
296
|
+
'Success<ast:"b" cache:{} rest:"c">',
|
297
|
+
"================================================================================",
|
298
|
+
""
|
299
|
+
].join("\n")
|
300
|
+
|
301
|
+
expect { parsed_success(max_debugger,"bc") }
|
302
|
+
.to output(expected).to_stderr
|
143
303
|
```
|
144
304
|
|
145
305
|
|
146
306
|
# Author
|
147
307
|
|
148
|
-
Copyright © 2024 Robert Dober
|
149
|
-
robert.dober@gmail.com
|
308
|
+
Copyright © 2024 Robert Dober
|
309
|
+
robert.dober@gmail.com
|
150
310
|
|
151
311
|
# LICENSE
|
152
312
|
|
153
|
-
GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
|
313
|
+
GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
|
154
314
|
|
155
|
-
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
315
|
+
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module L43Peg
|
4
|
+
module Combinators
|
5
|
+
module DebugParser extend self
|
6
|
+
MIN_LEVEL = 1
|
7
|
+
DEFAULT_LEVEL = 5
|
8
|
+
MAX_LEVEL = 10
|
9
|
+
|
10
|
+
def parse_with_debug(parser:, name:, level:)
|
11
|
+
level = _normalize_level(level)
|
12
|
+
-> (input, cache, name1=nil) do
|
13
|
+
_report_input(input, cache:, level:, name: name1||name)
|
14
|
+
case parser.(input, cache:)
|
15
|
+
in Success => success
|
16
|
+
_report_success(success, name: name1||name, level:)
|
17
|
+
in Failure => failure
|
18
|
+
_report_failure(failure, name: name1||name, level:)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def _hl(level:)
|
26
|
+
_prt("="*80, level:, needed: 10)
|
27
|
+
end
|
28
|
+
|
29
|
+
def _normalize_level(level)
|
30
|
+
case level
|
31
|
+
when :min
|
32
|
+
MIN_LEVEL
|
33
|
+
when :max
|
34
|
+
MAX_LEVEL
|
35
|
+
when :default
|
36
|
+
DEFAULT_LEVEL
|
37
|
+
when MIN_LEVEL..MAX_LEVEL
|
38
|
+
level
|
39
|
+
else
|
40
|
+
raise ArgumentError, "bad level: #{level}, use a number between #{MIN_LEVEL} and #{MAX_LEVEL} or :min, :max, :default"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def _report_failure(failure, name:, level:)
|
45
|
+
_hl(level:)
|
46
|
+
_prt("Failure: #{failure.inspect}", level:, needed: MAX_LEVEL) ||
|
47
|
+
_prt("Failure: #{failure.reason} #{failure.position_hr}", level:, needed: DEFAULT_LEVEL) ||
|
48
|
+
_prt("Failure: #{failure.reason}")
|
49
|
+
failure
|
50
|
+
end
|
51
|
+
|
52
|
+
def _report_input(input, cache:, name:, level:)
|
53
|
+
_hl(level:)
|
54
|
+
_prt(input.debug, level:, needed: MAX_LEVEL) ||
|
55
|
+
_prt(input.head_hr(20, position: true), level:, needed: DEFAULT_LEVEL) ||
|
56
|
+
_prt(input.head_hr(5))
|
57
|
+
end
|
58
|
+
|
59
|
+
def _report_success(success, name:, level:)
|
60
|
+
_hl(level:)
|
61
|
+
_prt("#{success.debug}", level:, needed: MAX_LEVEL) ||
|
62
|
+
_prt("Success: #{success.ast.inspect} @#{success.position}", level:, needed: DEFAULT_LEVEL) ||
|
63
|
+
_prt("Success: @#{success.position}")
|
64
|
+
_hl(level:)
|
65
|
+
|
66
|
+
success
|
67
|
+
end
|
68
|
+
|
69
|
+
def _prt(message, level: MIN_LEVEL, needed: MIN_LEVEL, nl: true)
|
70
|
+
return unless level >= needed
|
71
|
+
|
72
|
+
$stderr.print(message)
|
73
|
+
$stderr.puts if nl
|
74
|
+
true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../helper"
|
4
|
+
module L43Peg
|
5
|
+
module Combinators
|
6
|
+
module Sel extend self
|
7
|
+
include L43Peg::Helper
|
8
|
+
|
9
|
+
def sel(input:, cache:, name:, parsers:)
|
10
|
+
parsers.each do |parser|
|
11
|
+
case parser.(input, cache:)
|
12
|
+
in L43Peg::Success => success
|
13
|
+
return success
|
14
|
+
else
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
fail_parser("sel #{name} could not match any option", input:)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/combinators.rb
CHANGED
@@ -20,10 +20,15 @@ module L43Peg
|
|
20
20
|
map(inner, name:, fn: join_maps)
|
21
21
|
end
|
22
22
|
|
23
|
+
def debug_parser(parser, name: nil, level: :default)
|
24
|
+
name ||= "debug_parser(#{parser.name})"
|
25
|
+
Parser.new(name, &DebugParser.parse_with_debug(parser:, level:, name:))
|
26
|
+
end
|
27
|
+
|
23
28
|
def many(parser, name: nil, min: 0, max: nil)
|
24
29
|
Parser.new(name || "many(#{parser.name})") {|input, cache, name1=nil| Many.many(input:, cache:, name: name1 || name, parser:, min:, max:)}
|
25
30
|
end
|
26
|
-
|
31
|
+
|
27
32
|
def map(parser, name: nil, fn: nil, &mapper)
|
28
33
|
raise ArgumentError, "must not provide keyword parameyer fn and a block" if fn && mapper
|
29
34
|
mapper = fn || mapper
|
@@ -31,6 +36,22 @@ module L43Peg
|
|
31
36
|
Parser.new(name || "map(#{parser.name})") {|input, cache, name=nil| _map(input:, cache:, name:, parser:, mapper:)}
|
32
37
|
end
|
33
38
|
|
39
|
+
def rgx_tokenize(rgx_specs, name: nil)
|
40
|
+
name = name || "rgx_tokenize(#{rgx_specs.inspect})"
|
41
|
+
many(sel_rgx_parser(rgx_specs, name:))
|
42
|
+
end
|
43
|
+
|
44
|
+
def sel(parsers, name: nil)
|
45
|
+
name ||= "sel(#{parsers.map(&:name).join(", ")})"
|
46
|
+
Parser.new(name) { |input, cache, name1=nil| Sel.sel(input:, cache:, name: name1 || name, parsers:) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def sel_rgx_parser(rgx_specs, name: nil)
|
50
|
+
name = name || "sel_rgx_parser(#{rgx_specs.inspect})"
|
51
|
+
parsers = rgx_specs.map(&_mk_rgx_parser)
|
52
|
+
sel(parsers, name:)
|
53
|
+
end
|
54
|
+
|
34
55
|
def seq(*parsers, name: nil)
|
35
56
|
name ||= "seq(#{parsers.map(&:name).join(", ")})"
|
36
57
|
Parser.new(name) {|input, cache, _name=nil| Seq.seq(input:, cache:, name:, parsers:)}
|
@@ -46,6 +67,14 @@ module L43Peg
|
|
46
67
|
success.map(&mapper)
|
47
68
|
end
|
48
69
|
end
|
70
|
+
|
71
|
+
def _mk_rgx_parser
|
72
|
+
-> rgxspec do
|
73
|
+
name, rgx, capture, fn = rgxspec
|
74
|
+
fn ||= -> (captures) { [name, captures] }
|
75
|
+
map(Parsers.rgx_parser(rgx, capture:), &fn)
|
76
|
+
end
|
77
|
+
end
|
49
78
|
end
|
50
79
|
end
|
51
80
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/failure.rb
CHANGED
data/lib/l43_peg/input.rb
CHANGED
@@ -6,6 +6,12 @@ module L43Peg
|
|
6
6
|
|
7
7
|
attributes col: 1, input: "", lnb: 1, context: {}
|
8
8
|
|
9
|
+
def debug
|
10
|
+
"Input<" +
|
11
|
+
["col:#{col}", "input:#{input.inspect}", "lnb:#{lnb}", "context:#{context.inspect}"].join(" ") +
|
12
|
+
">"
|
13
|
+
end
|
14
|
+
|
9
15
|
def drop(by=nil)
|
10
16
|
case by
|
11
17
|
when nil
|
@@ -19,8 +25,18 @@ module L43Peg
|
|
19
25
|
|
20
26
|
def empty? = input.empty?
|
21
27
|
|
28
|
+
def head_hr(n, position: false)
|
29
|
+
if position
|
30
|
+
"Input<#{input[0...n].inspect}@#{position_hr}>"
|
31
|
+
else
|
32
|
+
"Input<#{input[0...n].inspect}>"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
22
36
|
def position = [@col, @lnb]
|
23
37
|
|
38
|
+
def position_hr = [@col, @lnb].join(":")
|
39
|
+
|
24
40
|
private
|
25
41
|
|
26
42
|
def _drop_by_n(n)
|
data/lib/l43_peg/parser.rb
CHANGED
@@ -9,10 +9,14 @@ module L43Peg
|
|
9
9
|
def initialize(rgx, name: nil, **options)
|
10
10
|
name = name || "rgx_parser(#{rgx.inspect})"
|
11
11
|
rgx = _mk_rgx(rgx)
|
12
|
+
warn_on_empty = options.fetch(:warn_on_empty, true)
|
12
13
|
super(name) do |input, cache, _name|
|
13
14
|
case rgx.match(input.input)
|
14
15
|
in MatchData => md
|
15
16
|
ast = _from_match(md, options)
|
17
|
+
if ast.empty? && warn_on_empty
|
18
|
+
$stderr.puts("Warning, parser #{name} succeeds with empty match")
|
19
|
+
end
|
16
20
|
L43Peg::Success.new(ast:, cache:, rest: input.drop(md[0]), position: input.position)
|
17
21
|
else
|
18
22
|
L43Peg::Failure.new(cache:, input:, parsed_by: self, reason: "input does not match #{rgx.inspect} (in #{name})")
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# module L43Peg
|
4
|
+
# module Parsers
|
5
|
+
# class RgxTokenizer < L43Peg::Parser
|
6
|
+
|
7
|
+
# private
|
8
|
+
|
9
|
+
# def initialize(rgxen, name: nil, **options)
|
10
|
+
# name = name || "rgx_tokenizer(#{rgxen.inspect})"
|
11
|
+
# parsers = rgxen.map(&_mk_rgx_parser)
|
12
|
+
# Combinators.sel(parsers)
|
13
|
+
# end
|
14
|
+
|
15
|
+
# def _mk_rgx_parser
|
16
|
+
# -> rgxspec do
|
17
|
+
# name, rgx, capture = rgxspec
|
18
|
+
# Combinators.map(Parsers.rgx_parser(rgx, capture:)) { |captures| [name, captures] }
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# module L43Peg
|
4
|
+
# module Parsers
|
5
|
+
# class SelRgxParser < L43Peg::Parser
|
6
|
+
|
7
|
+
# private
|
8
|
+
|
9
|
+
# def initialize(rgxen, name: nil)
|
10
|
+
# name = name || "sel_rgx_parser(#{rgxen.inspect})"
|
11
|
+
# parsers = rgxen.map(&_mk_rgx_parser)
|
12
|
+
# Combinators.sel(parsers)
|
13
|
+
# end
|
14
|
+
|
15
|
+
# def _mk_rgx_parser
|
16
|
+
# -> rgxspec do
|
17
|
+
# name, rgx, capture, fn = rgxspec
|
18
|
+
# fn ||= (-> (captures) { [name, captures] })
|
19
|
+
# Combinators.map(Parsers.rgx_parser(rgx, capture:), &fn)
|
20
|
+
# end
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/parsers.rb
CHANGED
@@ -7,11 +7,12 @@ module L43Peg
|
|
7
7
|
module Parsers extend self
|
8
8
|
def args_parser(args, name: nil, stop: nil, &blk) =
|
9
9
|
L43Peg::Combinators.args_parser(args, name:, stop:, &blk)
|
10
|
-
def char_parser(charset = nil)
|
11
|
-
def end_parser
|
12
|
-
def failure_parser
|
13
|
-
def int_parser
|
14
|
-
def rgx_parser(rgx, name: nil, **o)
|
10
|
+
def char_parser(charset = nil) = L43Peg::Parsers::CharParser.new(charset)
|
11
|
+
def end_parser = L43Peg::Parsers::EndParser.instance
|
12
|
+
def failure_parser = L43Peg::Parsers::FailureParser.instance
|
13
|
+
def int_parser = L43Peg::Parsers::IntParser.instance
|
14
|
+
def rgx_parser(rgx, name: nil, **o) = L43Peg::Parsers::RgxParser.new(rgx, name:, **o)
|
15
|
+
# def rgx_tokenizer(rgxn, name: nil, **o) = L43Peg::Parsers::RgxTokenizer.new(rgxn, name:, **o)
|
15
16
|
def token_parser(spc, name: nil, **o) = L43Peg::Parsers::TokenParser.new(spc, name:, **o)
|
16
17
|
def tokens_parser(map, name: nil, stop: nil, &b) =
|
17
18
|
L43Peg::Parsers::TokensParser.new(map, name:, stop:, &b)
|
data/lib/l43_peg/success.rb
CHANGED
@@ -8,8 +8,15 @@ module L43Peg
|
|
8
8
|
@position ||= _position
|
9
9
|
end
|
10
10
|
|
11
|
+
def debug
|
12
|
+
"Success<" +
|
13
|
+
[
|
14
|
+
"ast:#{ast.inspect}", "cache:#{cache.cache.inspect}", "rest:#{rest.input.inspect}"
|
15
|
+
].join(" ") + ">"
|
16
|
+
end
|
11
17
|
def map(&mapper)
|
12
|
-
|
18
|
+
update_attribute(:ast, &mapper)
|
19
|
+
# self.class.new(ast: mapper.(ast), cache:, position:, rest:)
|
13
20
|
end
|
14
21
|
|
15
22
|
private
|
data/lib/l43_peg/tokens.rb
CHANGED
@@ -11,9 +11,23 @@ module L43Peg
|
|
11
11
|
return self if empty?
|
12
12
|
self.class.new(tokens: input.drop(by), context:, tnb: tnb + by)
|
13
13
|
end
|
14
|
+
|
15
|
+
def debug
|
16
|
+
"Tokens<" +
|
17
|
+
["tnb:#{tnb}", "tokens:#{tokens.inspect}", "context:#{context.inspect}"].join +
|
18
|
+
">"
|
19
|
+
end
|
14
20
|
|
15
21
|
def empty? = tokens.empty?
|
16
22
|
def head = tokens.first
|
23
|
+
def head_hr(n, position: false)
|
24
|
+
if position
|
25
|
+
"Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]@#{position_hr}>"
|
26
|
+
else
|
27
|
+
"Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]>"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
17
31
|
def input = tokens
|
18
32
|
|
19
33
|
def match(str_or_rgx, option)
|
@@ -28,6 +42,7 @@ module L43Peg
|
|
28
42
|
end
|
29
43
|
|
30
44
|
def position = tnb
|
45
|
+
def position_hr = tnb.to_s
|
31
46
|
|
32
47
|
private
|
33
48
|
|
data/lib/l43_peg.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: l43_peg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Dober
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: l43_open_object
|
@@ -40,7 +40,9 @@ files:
|
|
40
40
|
- lib/l43_peg.rb
|
41
41
|
- lib/l43_peg/cache.rb
|
42
42
|
- lib/l43_peg/combinators.rb
|
43
|
+
- lib/l43_peg/combinators/debug_parser.rb
|
43
44
|
- lib/l43_peg/combinators/many.rb
|
45
|
+
- lib/l43_peg/combinators/sel.rb
|
44
46
|
- lib/l43_peg/combinators/seq.rb
|
45
47
|
- lib/l43_peg/failure.rb
|
46
48
|
- lib/l43_peg/helper.rb
|
@@ -53,6 +55,8 @@ files:
|
|
53
55
|
- lib/l43_peg/parsers/failure_parser.rb
|
54
56
|
- lib/l43_peg/parsers/int_parser.rb
|
55
57
|
- lib/l43_peg/parsers/rgx_parser.rb
|
58
|
+
- lib/l43_peg/parsers/rgx_tokenizer.rb
|
59
|
+
- lib/l43_peg/parsers/sel_rgx_parser.rb
|
56
60
|
- lib/l43_peg/parsers/token_parser.rb
|
57
61
|
- lib/l43_peg/parsers/tokens_parser.rb
|
58
62
|
- lib/l43_peg/parsers/verb_parser.rb
|