l43_peg 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -63
- data/lib/l43_peg/combinators/debug_parser.rb +79 -0
- data/lib/l43_peg/combinators/sel.rb +24 -0
- data/lib/l43_peg/combinators.rb +30 -1
- data/lib/l43_peg/failure.rb +2 -0
- data/lib/l43_peg/input.rb +16 -0
- data/lib/l43_peg/parser.rb +3 -1
- data/lib/l43_peg/parsers/rgx_parser.rb +4 -0
- data/lib/l43_peg/parsers/rgx_tokenizer.rb +24 -0
- data/lib/l43_peg/parsers/sel_rgx_parser.rb +25 -0
- data/lib/l43_peg/parsers.rb +6 -5
- data/lib/l43_peg/success.rb +8 -1
- data/lib/l43_peg/tokens.rb +15 -0
- data/lib/l43_peg.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d386b9373423da69f7697e81bd02a011df1c36cc20ff6c861b3215a2081dea9
|
4
|
+
data.tar.gz: b82ba81e4372717f6603b6b41311d1ff2f608ce4a9cc7376c3242a65e3842432
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 265f550e6e956e85c5b0a39a5817ab13424e57d0da7d44ff9b946c318392421955292d7462b24a4a744c2f8361af10b627204800408fa500500c6779d359314b
|
7
|
+
data.tar.gz: 54eb3d55de2704d91c31d9fd31db3931fab1c5f614a005e11ca3efcafb854cf964f8b6a3e26bc43d93005065bc42dcdd517b6ef3d3284c3d205f93f7c9ccf466
|
data/README.md
CHANGED
@@ -6,91 +6,91 @@
|
|
6
6
|
|
7
7
|
### This Version (v0.1.x) is Alpha Quality (many PEG features are missing, like recursion and even alternatives.
|
8
8
|
|
9
|
-
It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
|
10
|
-
[speculations](https://rubygems.org/gems/speculate_about)
|
9
|
+
It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
|
10
|
+
[speculations](https://rubygems.org/gems/speculate_about)
|
11
11
|
|
12
|
-
See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
|
12
|
+
See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
|
13
13
|
|
14
14
|
### Context: `arg_parser`
|
15
15
|
|
16
16
|
Given the following argument specification
|
17
17
|
|
18
18
|
```ruby
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
include L43Peg::Combinators
|
20
|
+
let :args_spec do
|
21
|
+
{
|
22
|
+
start: "--start=(.*)",
|
23
|
+
end: "(?:--end|-e)=(.*)",
|
24
|
+
kwd: "--(alpha|beta|gamma)"
|
25
|
+
}
|
26
|
+
end
|
27
27
|
```
|
28
28
|
|
29
29
|
And the assoicated parser
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
|
32
|
+
let(:parser) { args_parser(args_spec) }
|
33
33
|
```
|
34
34
|
|
35
35
|
Then we can parse some input
|
36
36
|
|
37
37
|
```ruby
|
38
|
-
|
38
|
+
assert_parse_success(parser, %w[--start=42 --beta -e=44], ast: {start: "42", kwd: "beta", end: "44"}, rest: [])
|
39
39
|
```
|
40
40
|
|
41
41
|
And we can get the rest in a list of tokens
|
42
42
|
|
43
43
|
```ruby
|
44
|
-
|
44
|
+
assert_parse_success(parser, %w[--start=42 --beta -e=44 -s=not_an_arg --end=too_late], ast: {start: "42", kwd: "beta", end: "44"}, rest: %w[-s=not_an_arg --end=too_late])
|
45
45
|
```
|
46
46
|
|
47
47
|
Also note that multiple values are passed into an array
|
48
48
|
|
49
49
|
```ruby
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
```
|
50
|
+
input = %w[--end=42 --beta -e=44 --beta --end=not_too_late --gamma]
|
51
|
+
ast = {end: %w[42 44 not_too_late], kwd: %w[beta beta gamma]}
|
52
|
+
assert_parse_success(parser, input, ast:, rest: [])
|
53
|
+
```
|
54
54
|
|
55
55
|
#### Context: Postprocessing
|
56
56
|
|
57
|
-
When we map the parser
|
58
|
-
|
59
|
-
```ruby
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
57
|
+
When we map the parser
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
let :int_args do
|
61
|
+
{
|
62
|
+
start: "--start=(.*)",
|
63
|
+
end: "--end=(.*)",
|
64
|
+
inc: "--inc=(.*)"
|
65
|
+
}
|
66
|
+
end
|
67
|
+
let(:int_arg_parser) {args_parser(int_args, name: "int parser", &:to_i)}
|
68
68
|
```
|
69
69
|
|
70
70
|
Then we can convert the string valus
|
71
71
|
|
72
|
-
```ruby
|
73
|
-
|
74
|
-
```
|
72
|
+
```ruby
|
73
|
+
assert_parse_success(int_arg_parser, %w[--start=42 --end=44 --inc=2], ast: {start: 42, end: 44, inc: 2}, rest: [])
|
74
|
+
```
|
75
75
|
|
76
76
|
#### Context: Knowing When To Stop
|
77
77
|
|
78
|
-
An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
|
79
|
-
An exmaple for that is the posix argument `--`
|
78
|
+
An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
|
79
|
+
An exmaple for that is the posix argument `--`
|
80
80
|
|
81
|
-
We can use whatever we want in `args_parser`, here is a variation:
|
81
|
+
We can use whatever we want in `args_parser`, here is a variation:
|
82
82
|
|
83
|
-
Given the specification
|
83
|
+
Given the specification
|
84
84
|
|
85
|
-
```ruby
|
86
|
-
|
87
|
-
|
85
|
+
```ruby
|
86
|
+
let :args do
|
87
|
+
{
|
88
88
|
width: "w:(\\d+)",
|
89
89
|
height: "h:(\\d+)",
|
90
|
-
|
90
|
+
__stop: "(::)"
|
91
91
|
}
|
92
|
-
|
93
|
-
|
92
|
+
end
|
93
|
+
let(:wh_parser) {args_parser(args, stop: :__stop, &:to_i)}
|
94
94
|
```
|
95
95
|
|
96
96
|
Then parsing the following input
|
@@ -99,7 +99,7 @@ Then parsing the following input
|
|
99
99
|
input = %w[h:42 w:73 :: w:74]
|
100
100
|
ast = {height: 42, width: 73}
|
101
101
|
assert_parse_success(wh_parser, input, ast:, rest: %w[w:74])
|
102
|
-
```
|
102
|
+
```
|
103
103
|
|
104
104
|
### Context: User Interface
|
105
105
|
|
@@ -111,45 +111,205 @@ as module methods
|
|
111
111
|
|
112
112
|
Given an _exposed_ `args_parser`
|
113
113
|
```ruby
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
114
|
+
let :parser do
|
115
|
+
L43Peg::Parsers.args_parser(
|
116
|
+
{
|
117
|
+
negative: "(-\\d+)",
|
118
|
+
positive: "\\+?(\\d+)"
|
119
|
+
},
|
120
|
+
&:to_i
|
121
|
+
)
|
122
|
+
end
|
123
123
|
```
|
124
124
|
|
125
125
|
But we are also not interested in the internal representation of success and failure of parsing which was
|
126
126
|
used in the speculations above. Nor do we want to transform our input into the internal representations
|
127
127
|
as was done above by the helpers. (If you need to see the details of this you can inspect the
|
128
|
-
file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
|
129
|
-
|
128
|
+
file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
|
129
|
+
|
130
130
|
Then we can uses the interface of `L43Peg`
|
131
131
|
|
132
132
|
```ruby
|
133
|
-
|
134
|
-
|
133
|
+
L43Peg.parse_tokens(parser, %w[43 -44 +45]) => :ok, result
|
134
|
+
expect(result).to eq(positive: [43, 45], negative: -44)
|
135
135
|
```
|
136
136
|
|
137
137
|
And if we get an error the result is as follows
|
138
138
|
|
139
139
|
```ruby
|
140
|
-
|
141
|
-
|
142
|
-
|
140
|
+
parser = L43Peg::Parsers.char_parser('a')
|
141
|
+
L43Peg.parse_string(parser, 'b') => :error, message
|
142
|
+
expect(message).to eq("char \"b\"")
|
143
|
+
```
|
144
|
+
|
145
|
+
## Context: Regexp Parser
|
146
|
+
|
147
|
+
The basic concept is the `rgx_parser`
|
148
|
+
|
149
|
+
Given a `rgx_parser` for an identifier
|
150
|
+
```ruby
|
151
|
+
include L43Peg::Parsers
|
152
|
+
let(:id_parser) { rgx_parser("[[:alpha:]][_[:alnum:]]*") }
|
153
|
+
```
|
154
|
+
|
155
|
+
Then we can parse strings that start as such
|
156
|
+
```ruby
|
157
|
+
assert_parse_success(id_parser, "l43_peg", ast: "l43_peg")
|
158
|
+
```
|
159
|
+
|
160
|
+
And we can discard some input from the ast with the aid of captures
|
161
|
+
```ruby
|
162
|
+
sym_parser = rgx_parser(":([[:alpha:]][_[:alnum:]]*)")
|
163
|
+
assert_parse_success(sym_parser, ":no_colon", ast: "no_colon")
|
164
|
+
```
|
165
|
+
|
166
|
+
But it can also fail
|
167
|
+
```ruby
|
168
|
+
reason = "input does not match /\\A[[:alpha:]][_[:alnum:]]*/ (in rgx_parser(\"[[:alpha:]][_[:alnum:]]*\"))"
|
169
|
+
assert_parse_failure(id_parser, "42", reason:)
|
170
|
+
```
|
171
|
+
|
172
|
+
#### Context: Warnings on empty matches
|
173
|
+
|
174
|
+
Oftentimes bugs in PEG parsing are caused by zero width matches, while this is quite obvious with the `many` and
|
175
|
+
`opt` or `maybe` combinators (**N.B.** they are not yet implemented, use `many(max: 1)` instead)
|
176
|
+
and they common use patterns with these combinators are safe.
|
177
|
+
|
178
|
+
However regular expression parsing might hide zero width matches, and that's whey they will trigger a warning by default
|
179
|
+
|
180
|
+
Given an empty match rgex parser
|
181
|
+
```ruby
|
182
|
+
let(:empty_parser) { rgx_parser("a*") }
|
183
|
+
```
|
184
|
+
|
185
|
+
Then we get a warning when matching an empty string
|
186
|
+
```ruby
|
187
|
+
expect { assert_parse_success(empty_parser, "", ast: "") }
|
188
|
+
.to output("Warning, parser rgx_parser(\"a*\") succeeds with empty match\n").to_stderr
|
189
|
+
```
|
190
|
+
|
191
|
+
However this behavior can also be disabled
|
192
|
+
|
193
|
+
And therefore
|
194
|
+
```ruby
|
195
|
+
parser = rgx_parser("a*", warn_on_empty: false)
|
196
|
+
expect { assert_parse_success(parser, "", ast: "") }
|
197
|
+
.not_to output.to_stderr
|
198
|
+
```
|
199
|
+
|
200
|
+
|
201
|
+
### Context: Tokenize Strings with Regexen
|
202
|
+
|
203
|
+
Now we can use a list of `rgx_parsers` to _tokenize_ a string (in the same way can use `tokens_parser` to
|
204
|
+
quantify elements of an array, but with dynamic bounds)
|
205
|
+
|
206
|
+
Given some regexen
|
207
|
+
```ruby
|
208
|
+
let :regexen do
|
209
|
+
[
|
210
|
+
[:verb, "<<", nil, ->(*){ [:verb, "<"] }],
|
211
|
+
[:verb, "\\$(\\$)"],
|
212
|
+
[:color_and_style, "<(.+?),(.+?)>", :all],
|
213
|
+
[:color, "<(.+?)>", 1],
|
214
|
+
[:reset, "\\$"],
|
215
|
+
[:verb, "[^<$]+"],
|
216
|
+
]
|
217
|
+
end
|
218
|
+
let(:tokenizer) { L43Peg::Combinators.rgx_tokenize(regexen) }
|
219
|
+
```
|
220
|
+
|
221
|
+
Then we can tokenize some inputs
|
222
|
+
```ruby
|
223
|
+
input = "<red,bold>HELLO$and<<<green>$$<reset>"
|
224
|
+
ast = [
|
225
|
+
[:color_and_style, ["<red,bold>", "red", "bold"]],
|
226
|
+
[:verb, "HELLO"],
|
227
|
+
[:reset, "$"],
|
228
|
+
[:verb, "and"],
|
229
|
+
[:verb, "<"],
|
230
|
+
[:color, "green"],
|
231
|
+
[:verb, "$"],
|
232
|
+
[:color, "reset"]
|
233
|
+
]
|
234
|
+
assert_parse_success(tokenizer, input, ast:)
|
235
|
+
```
|
236
|
+
|
237
|
+
### Context: Debugging
|
238
|
+
|
239
|
+
As parsers are by design imbricated functions debugging is not always simple.
|
240
|
+
Enter the `debug_parser`, a parser that _debugs_ parsers by not changing their behavior
|
241
|
+
by displaying more or less detailed information
|
242
|
+
|
243
|
+
Given a parser
|
244
|
+
```ruby
|
245
|
+
include L43Peg::Combinators
|
246
|
+
let :args do
|
247
|
+
{
|
248
|
+
lat: "lat:(\\d+)",
|
249
|
+
long: "long:(\\d+)",
|
250
|
+
}
|
251
|
+
end
|
252
|
+
let(:geo_parser) {args_parser(args, &:to_i)}
|
253
|
+
```
|
254
|
+
|
255
|
+
#### Context: Minimum level of information
|
256
|
+
|
257
|
+
Given a minum debug parser
|
258
|
+
```ruby
|
259
|
+
let(:debugger) {debug_parser(geo_parser, level: :min)}
|
260
|
+
```
|
261
|
+
|
262
|
+
Then we will get some output
|
263
|
+
```ruby
|
264
|
+
expected =
|
265
|
+
"Tokens<[\"lat:43\", \"long:2\"]>\nSuccess: @1\n"
|
266
|
+
expect { parsed_success(debugger, ["lat:43", "long:2"]) }
|
267
|
+
.to output(expected).to_stderr
|
268
|
+
```
|
269
|
+
|
270
|
+
#### Context: Default level of information
|
271
|
+
Given a default debug parser
|
272
|
+
```ruby
|
273
|
+
let(:debugger) {debug_parser(char_parser("a"))}
|
274
|
+
```
|
275
|
+
|
276
|
+
Then we will get some output on errors
|
277
|
+
```ruby
|
278
|
+
expected ="Input<\"b\"@1:1>\nFailure: char \"b\" @[1, 1]\n"
|
279
|
+
expect { parsed_failure(debugger, "b") }
|
280
|
+
.to output(expected).to_stderr
|
281
|
+
```
|
282
|
+
|
283
|
+
#### Context: Maximum level of information
|
284
|
+
Given a maxium level parser
|
285
|
+
```ruby
|
286
|
+
let(:max_debugger) { debug_parser(char_parser("b"), level: :max) }
|
287
|
+
```
|
288
|
+
|
289
|
+
Then we will get this output on errors
|
290
|
+
```ruby
|
291
|
+
expected =
|
292
|
+
[
|
293
|
+
"================================================================================",
|
294
|
+
'Input<col:1 input:"bc" lnb:1 context:{}>',
|
295
|
+
"================================================================================",
|
296
|
+
'Success<ast:"b" cache:{} rest:"c">',
|
297
|
+
"================================================================================",
|
298
|
+
""
|
299
|
+
].join("\n")
|
300
|
+
|
301
|
+
expect { parsed_success(max_debugger,"bc") }
|
302
|
+
.to output(expected).to_stderr
|
143
303
|
```
|
144
304
|
|
145
305
|
|
146
306
|
# Author
|
147
307
|
|
148
|
-
Copyright © 2024 Robert Dober
|
149
|
-
robert.dober@gmail.com
|
308
|
+
Copyright © 2024 Robert Dober
|
309
|
+
robert.dober@gmail.com
|
150
310
|
|
151
311
|
# LICENSE
|
152
312
|
|
153
|
-
GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
|
313
|
+
GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
|
154
314
|
|
155
|
-
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
315
|
+
<!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module L43Peg
|
4
|
+
module Combinators
|
5
|
+
module DebugParser extend self
|
6
|
+
MIN_LEVEL = 1
|
7
|
+
DEFAULT_LEVEL = 5
|
8
|
+
MAX_LEVEL = 10
|
9
|
+
|
10
|
+
def parse_with_debug(parser:, name:, level:)
|
11
|
+
level = _normalize_level(level)
|
12
|
+
-> (input, cache, name1=nil) do
|
13
|
+
_report_input(input, cache:, level:, name: name1||name)
|
14
|
+
case parser.(input, cache:)
|
15
|
+
in Success => success
|
16
|
+
_report_success(success, name: name1||name, level:)
|
17
|
+
in Failure => failure
|
18
|
+
_report_failure(failure, name: name1||name, level:)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def _hl(level:)
|
26
|
+
_prt("="*80, level:, needed: 10)
|
27
|
+
end
|
28
|
+
|
29
|
+
def _normalize_level(level)
|
30
|
+
case level
|
31
|
+
when :min
|
32
|
+
MIN_LEVEL
|
33
|
+
when :max
|
34
|
+
MAX_LEVEL
|
35
|
+
when :default
|
36
|
+
DEFAULT_LEVEL
|
37
|
+
when MIN_LEVEL..MAX_LEVEL
|
38
|
+
level
|
39
|
+
else
|
40
|
+
raise ArgumentError, "bad level: #{level}, use a number between #{MIN_LEVEL} and #{MAX_LEVEL} or :min, :max, :default"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def _report_failure(failure, name:, level:)
|
45
|
+
_hl(level:)
|
46
|
+
_prt("Failure: #{failure.inspect}", level:, needed: MAX_LEVEL) ||
|
47
|
+
_prt("Failure: #{failure.reason} #{failure.position_hr}", level:, needed: DEFAULT_LEVEL) ||
|
48
|
+
_prt("Failure: #{failure.reason}")
|
49
|
+
failure
|
50
|
+
end
|
51
|
+
|
52
|
+
def _report_input(input, cache:, name:, level:)
|
53
|
+
_hl(level:)
|
54
|
+
_prt(input.debug, level:, needed: MAX_LEVEL) ||
|
55
|
+
_prt(input.head_hr(20, position: true), level:, needed: DEFAULT_LEVEL) ||
|
56
|
+
_prt(input.head_hr(5))
|
57
|
+
end
|
58
|
+
|
59
|
+
def _report_success(success, name:, level:)
|
60
|
+
_hl(level:)
|
61
|
+
_prt("#{success.debug}", level:, needed: MAX_LEVEL) ||
|
62
|
+
_prt("Success: #{success.ast.inspect} @#{success.position}", level:, needed: DEFAULT_LEVEL) ||
|
63
|
+
_prt("Success: @#{success.position}")
|
64
|
+
_hl(level:)
|
65
|
+
|
66
|
+
success
|
67
|
+
end
|
68
|
+
|
69
|
+
def _prt(message, level: MIN_LEVEL, needed: MIN_LEVEL, nl: true)
|
70
|
+
return unless level >= needed
|
71
|
+
|
72
|
+
$stderr.print(message)
|
73
|
+
$stderr.puts if nl
|
74
|
+
true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../helper"
|
4
|
+
module L43Peg
|
5
|
+
module Combinators
|
6
|
+
module Sel extend self
|
7
|
+
include L43Peg::Helper
|
8
|
+
|
9
|
+
def sel(input:, cache:, name:, parsers:)
|
10
|
+
parsers.each do |parser|
|
11
|
+
case parser.(input, cache:)
|
12
|
+
in L43Peg::Success => success
|
13
|
+
return success
|
14
|
+
else
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
fail_parser("sel #{name} could not match any option", input:)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/combinators.rb
CHANGED
@@ -20,10 +20,15 @@ module L43Peg
|
|
20
20
|
map(inner, name:, fn: join_maps)
|
21
21
|
end
|
22
22
|
|
23
|
+
def debug_parser(parser, name: nil, level: :default)
|
24
|
+
name ||= "debug_parser(#{parser.name})"
|
25
|
+
Parser.new(name, &DebugParser.parse_with_debug(parser:, level:, name:))
|
26
|
+
end
|
27
|
+
|
23
28
|
def many(parser, name: nil, min: 0, max: nil)
|
24
29
|
Parser.new(name || "many(#{parser.name})") {|input, cache, name1=nil| Many.many(input:, cache:, name: name1 || name, parser:, min:, max:)}
|
25
30
|
end
|
26
|
-
|
31
|
+
|
27
32
|
def map(parser, name: nil, fn: nil, &mapper)
|
28
33
|
raise ArgumentError, "must not provide keyword parameyer fn and a block" if fn && mapper
|
29
34
|
mapper = fn || mapper
|
@@ -31,6 +36,22 @@ module L43Peg
|
|
31
36
|
Parser.new(name || "map(#{parser.name})") {|input, cache, name=nil| _map(input:, cache:, name:, parser:, mapper:)}
|
32
37
|
end
|
33
38
|
|
39
|
+
def rgx_tokenize(rgx_specs, name: nil)
|
40
|
+
name = name || "rgx_tokenize(#{rgx_specs.inspect})"
|
41
|
+
many(sel_rgx_parser(rgx_specs, name:))
|
42
|
+
end
|
43
|
+
|
44
|
+
def sel(parsers, name: nil)
|
45
|
+
name ||= "sel(#{parsers.map(&:name).join(", ")})"
|
46
|
+
Parser.new(name) { |input, cache, name1=nil| Sel.sel(input:, cache:, name: name1 || name, parsers:) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def sel_rgx_parser(rgx_specs, name: nil)
|
50
|
+
name = name || "sel_rgx_parser(#{rgx_specs.inspect})"
|
51
|
+
parsers = rgx_specs.map(&_mk_rgx_parser)
|
52
|
+
sel(parsers, name:)
|
53
|
+
end
|
54
|
+
|
34
55
|
def seq(*parsers, name: nil)
|
35
56
|
name ||= "seq(#{parsers.map(&:name).join(", ")})"
|
36
57
|
Parser.new(name) {|input, cache, _name=nil| Seq.seq(input:, cache:, name:, parsers:)}
|
@@ -46,6 +67,14 @@ module L43Peg
|
|
46
67
|
success.map(&mapper)
|
47
68
|
end
|
48
69
|
end
|
70
|
+
|
71
|
+
def _mk_rgx_parser
|
72
|
+
-> rgxspec do
|
73
|
+
name, rgx, capture, fn = rgxspec
|
74
|
+
fn ||= -> (captures) { [name, captures] }
|
75
|
+
map(Parsers.rgx_parser(rgx, capture:), &fn)
|
76
|
+
end
|
77
|
+
end
|
49
78
|
end
|
50
79
|
end
|
51
80
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/failure.rb
CHANGED
data/lib/l43_peg/input.rb
CHANGED
@@ -6,6 +6,12 @@ module L43Peg
|
|
6
6
|
|
7
7
|
attributes col: 1, input: "", lnb: 1, context: {}
|
8
8
|
|
9
|
+
def debug
|
10
|
+
"Input<" +
|
11
|
+
["col:#{col}", "input:#{input.inspect}", "lnb:#{lnb}", "context:#{context.inspect}"].join(" ") +
|
12
|
+
">"
|
13
|
+
end
|
14
|
+
|
9
15
|
def drop(by=nil)
|
10
16
|
case by
|
11
17
|
when nil
|
@@ -19,8 +25,18 @@ module L43Peg
|
|
19
25
|
|
20
26
|
def empty? = input.empty?
|
21
27
|
|
28
|
+
def head_hr(n, position: false)
|
29
|
+
if position
|
30
|
+
"Input<#{input[0...n].inspect}@#{position_hr}>"
|
31
|
+
else
|
32
|
+
"Input<#{input[0...n].inspect}>"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
22
36
|
def position = [@col, @lnb]
|
23
37
|
|
38
|
+
def position_hr = [@col, @lnb].join(":")
|
39
|
+
|
24
40
|
private
|
25
41
|
|
26
42
|
def _drop_by_n(n)
|
data/lib/l43_peg/parser.rb
CHANGED
@@ -9,10 +9,14 @@ module L43Peg
|
|
9
9
|
def initialize(rgx, name: nil, **options)
|
10
10
|
name = name || "rgx_parser(#{rgx.inspect})"
|
11
11
|
rgx = _mk_rgx(rgx)
|
12
|
+
warn_on_empty = options.fetch(:warn_on_empty, true)
|
12
13
|
super(name) do |input, cache, _name|
|
13
14
|
case rgx.match(input.input)
|
14
15
|
in MatchData => md
|
15
16
|
ast = _from_match(md, options)
|
17
|
+
if ast.empty? && warn_on_empty
|
18
|
+
$stderr.puts("Warning, parser #{name} succeeds with empty match")
|
19
|
+
end
|
16
20
|
L43Peg::Success.new(ast:, cache:, rest: input.drop(md[0]), position: input.position)
|
17
21
|
else
|
18
22
|
L43Peg::Failure.new(cache:, input:, parsed_by: self, reason: "input does not match #{rgx.inspect} (in #{name})")
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# module L43Peg
|
4
|
+
# module Parsers
|
5
|
+
# class RgxTokenizer < L43Peg::Parser
|
6
|
+
|
7
|
+
# private
|
8
|
+
|
9
|
+
# def initialize(rgxen, name: nil, **options)
|
10
|
+
# name = name || "rgx_tokenizer(#{rgxen.inspect})"
|
11
|
+
# parsers = rgxen.map(&_mk_rgx_parser)
|
12
|
+
# Combinators.sel(parsers)
|
13
|
+
# end
|
14
|
+
|
15
|
+
# def _mk_rgx_parser
|
16
|
+
# -> rgxspec do
|
17
|
+
# name, rgx, capture = rgxspec
|
18
|
+
# Combinators.map(Parsers.rgx_parser(rgx, capture:)) { |captures| [name, captures] }
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# module L43Peg
|
4
|
+
# module Parsers
|
5
|
+
# class SelRgxParser < L43Peg::Parser
|
6
|
+
|
7
|
+
# private
|
8
|
+
|
9
|
+
# def initialize(rgxen, name: nil)
|
10
|
+
# name = name || "sel_rgx_parser(#{rgxen.inspect})"
|
11
|
+
# parsers = rgxen.map(&_mk_rgx_parser)
|
12
|
+
# Combinators.sel(parsers)
|
13
|
+
# end
|
14
|
+
|
15
|
+
# def _mk_rgx_parser
|
16
|
+
# -> rgxspec do
|
17
|
+
# name, rgx, capture, fn = rgxspec
|
18
|
+
# fn ||= (-> (captures) { [name, captures] })
|
19
|
+
# Combinators.map(Parsers.rgx_parser(rgx, capture:), &fn)
|
20
|
+
# end
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
data/lib/l43_peg/parsers.rb
CHANGED
@@ -7,11 +7,12 @@ module L43Peg
|
|
7
7
|
module Parsers extend self
|
8
8
|
def args_parser(args, name: nil, stop: nil, &blk) =
|
9
9
|
L43Peg::Combinators.args_parser(args, name:, stop:, &blk)
|
10
|
-
def char_parser(charset = nil)
|
11
|
-
def end_parser
|
12
|
-
def failure_parser
|
13
|
-
def int_parser
|
14
|
-
def rgx_parser(rgx, name: nil, **o)
|
10
|
+
def char_parser(charset = nil) = L43Peg::Parsers::CharParser.new(charset)
|
11
|
+
def end_parser = L43Peg::Parsers::EndParser.instance
|
12
|
+
def failure_parser = L43Peg::Parsers::FailureParser.instance
|
13
|
+
def int_parser = L43Peg::Parsers::IntParser.instance
|
14
|
+
def rgx_parser(rgx, name: nil, **o) = L43Peg::Parsers::RgxParser.new(rgx, name:, **o)
|
15
|
+
# def rgx_tokenizer(rgxn, name: nil, **o) = L43Peg::Parsers::RgxTokenizer.new(rgxn, name:, **o)
|
15
16
|
def token_parser(spc, name: nil, **o) = L43Peg::Parsers::TokenParser.new(spc, name:, **o)
|
16
17
|
def tokens_parser(map, name: nil, stop: nil, &b) =
|
17
18
|
L43Peg::Parsers::TokensParser.new(map, name:, stop:, &b)
|
data/lib/l43_peg/success.rb
CHANGED
@@ -8,8 +8,15 @@ module L43Peg
|
|
8
8
|
@position ||= _position
|
9
9
|
end
|
10
10
|
|
11
|
+
def debug
|
12
|
+
"Success<" +
|
13
|
+
[
|
14
|
+
"ast:#{ast.inspect}", "cache:#{cache.cache.inspect}", "rest:#{rest.input.inspect}"
|
15
|
+
].join(" ") + ">"
|
16
|
+
end
|
11
17
|
def map(&mapper)
|
12
|
-
|
18
|
+
update_attribute(:ast, &mapper)
|
19
|
+
# self.class.new(ast: mapper.(ast), cache:, position:, rest:)
|
13
20
|
end
|
14
21
|
|
15
22
|
private
|
data/lib/l43_peg/tokens.rb
CHANGED
@@ -11,9 +11,23 @@ module L43Peg
|
|
11
11
|
return self if empty?
|
12
12
|
self.class.new(tokens: input.drop(by), context:, tnb: tnb + by)
|
13
13
|
end
|
14
|
+
|
15
|
+
def debug
|
16
|
+
"Tokens<" +
|
17
|
+
["tnb:#{tnb}", "tokens:#{tokens.inspect}", "context:#{context.inspect}"].join +
|
18
|
+
">"
|
19
|
+
end
|
14
20
|
|
15
21
|
def empty? = tokens.empty?
|
16
22
|
def head = tokens.first
|
23
|
+
def head_hr(n, position: false)
|
24
|
+
if position
|
25
|
+
"Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]@#{position_hr}>"
|
26
|
+
else
|
27
|
+
"Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]>"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
17
31
|
def input = tokens
|
18
32
|
|
19
33
|
def match(str_or_rgx, option)
|
@@ -28,6 +42,7 @@ module L43Peg
|
|
28
42
|
end
|
29
43
|
|
30
44
|
def position = tnb
|
45
|
+
def position_hr = tnb.to_s
|
31
46
|
|
32
47
|
private
|
33
48
|
|
data/lib/l43_peg.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: l43_peg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Dober
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: l43_open_object
|
@@ -40,7 +40,9 @@ files:
|
|
40
40
|
- lib/l43_peg.rb
|
41
41
|
- lib/l43_peg/cache.rb
|
42
42
|
- lib/l43_peg/combinators.rb
|
43
|
+
- lib/l43_peg/combinators/debug_parser.rb
|
43
44
|
- lib/l43_peg/combinators/many.rb
|
45
|
+
- lib/l43_peg/combinators/sel.rb
|
44
46
|
- lib/l43_peg/combinators/seq.rb
|
45
47
|
- lib/l43_peg/failure.rb
|
46
48
|
- lib/l43_peg/helper.rb
|
@@ -53,6 +55,8 @@ files:
|
|
53
55
|
- lib/l43_peg/parsers/failure_parser.rb
|
54
56
|
- lib/l43_peg/parsers/int_parser.rb
|
55
57
|
- lib/l43_peg/parsers/rgx_parser.rb
|
58
|
+
- lib/l43_peg/parsers/rgx_tokenizer.rb
|
59
|
+
- lib/l43_peg/parsers/sel_rgx_parser.rb
|
56
60
|
- lib/l43_peg/parsers/token_parser.rb
|
57
61
|
- lib/l43_peg/parsers/tokens_parser.rb
|
58
62
|
- lib/l43_peg/parsers/verb_parser.rb
|