l43_peg 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 373ca9885275389a284235de2dc5f95eb2f470aeece99bf47e9b54ed6e3fdf60
4
- data.tar.gz: 62ac1897c26b540da5205d7b0fcb6aa1ff472f036e36ca5389c3ac4767fbf926
3
+ metadata.gz: 0d386b9373423da69f7697e81bd02a011df1c36cc20ff6c861b3215a2081dea9
4
+ data.tar.gz: b82ba81e4372717f6603b6b41311d1ff2f608ce4a9cc7376c3242a65e3842432
5
5
  SHA512:
6
- metadata.gz: b51cd93714348ba6be2394cd63543603af953820394a6cc201a39c4721d1a10a4ec4bf224ad84c3a9eae49f426126da35dcc0b8cd751f9d6e6265f0341172daf
7
- data.tar.gz: edbcd0d28cedcb395a6721a28f5f2284fefdaa28c1d7eb6449d8f53a90a00219177aace6e15208f87bd12488ab01712aa015b75179d0195decc34b1d78bba1a7
6
+ metadata.gz: 265f550e6e956e85c5b0a39a5817ab13424e57d0da7d44ff9b946c318392421955292d7462b24a4a744c2f8361af10b627204800408fa500500c6779d359314b
7
+ data.tar.gz: 54eb3d55de2704d91c31d9fd31db3931fab1c5f614a005e11ca3efcafb854cf964f8b6a3e26bc43d93005065bc42dcdd517b6ef3d3284c3d205f93f7c9ccf466
data/README.md CHANGED
@@ -6,91 +6,91 @@
6
6
 
7
7
  ### This Version (v0.1.x) is Alpha Quality (many PEG features are missing, like recursion and even alternatives.
8
8
 
9
- It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
10
- [speculations](https://rubygems.org/gems/speculate_about)
9
+ It is however released because it offers quite some nice parsing of ARGV which shall be demonstrated by the following
10
+ [speculations](https://rubygems.org/gems/speculate_about)
11
11
 
12
- See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
12
+ See [README_spec.rb](spec/speculations/README_spec.rb) for the generated code for details
13
13
 
14
14
  ### Context: `arg_parser`
15
15
 
16
16
  Given the following argument specification
17
17
 
18
18
  ```ruby
19
- include L43Peg::Combinators
20
- let :args_spec do
21
- {
22
- start: "--start=(.*)",
23
- end: "(?:--end|-e)=(.*)",
24
- kwd: "--(alpha|beta|gamma)"
25
- }
26
- end
19
+ include L43Peg::Combinators
20
+ let :args_spec do
21
+ {
22
+ start: "--start=(.*)",
23
+ end: "(?:--end|-e)=(.*)",
24
+ kwd: "--(alpha|beta|gamma)"
25
+ }
26
+ end
27
27
  ```
28
28
 
29
29
  And the assoicated parser
30
30
 
31
31
  ```ruby
32
- let(:parser) { args_parser(args_spec) }
32
+ let(:parser) { args_parser(args_spec) }
33
33
  ```
34
34
 
35
35
  Then we can parse some input
36
36
 
37
37
  ```ruby
38
- assert_parse_success(parser, %w[--start=42 --beta -e=44], ast: {start: "42", kwd: "beta", end: "44"}, rest: [])
38
+ assert_parse_success(parser, %w[--start=42 --beta -e=44], ast: {start: "42", kwd: "beta", end: "44"}, rest: [])
39
39
  ```
40
40
 
41
41
  And we can get the rest in a list of tokens
42
42
 
43
43
  ```ruby
44
- assert_parse_success(parser, %w[--start=42 --beta -e=44 -s=not_an_arg --end=too_late], ast: {start: "42", kwd: "beta", end: "44"}, rest: %w[-s=not_an_arg --end=too_late])
44
+ assert_parse_success(parser, %w[--start=42 --beta -e=44 -s=not_an_arg --end=too_late], ast: {start: "42", kwd: "beta", end: "44"}, rest: %w[-s=not_an_arg --end=too_late])
45
45
  ```
46
46
 
47
47
  Also note that multiple values are passed into an array
48
48
 
49
49
  ```ruby
50
- input = %w[--end=42 --beta -e=44 --beta --end=not_too_late --gamma]
51
- ast = {end: %w[42 44 not_too_late], kwd: %w[beta beta gamma]}
52
- assert_parse_success(parser, input, ast:, rest: [])
53
- ```
50
+ input = %w[--end=42 --beta -e=44 --beta --end=not_too_late --gamma]
51
+ ast = {end: %w[42 44 not_too_late], kwd: %w[beta beta gamma]}
52
+ assert_parse_success(parser, input, ast:, rest: [])
53
+ ```
54
54
 
55
55
  #### Context: Postprocessing
56
56
 
57
- When we map the parser
58
-
59
- ```ruby
60
- let :int_args do
61
- {
62
- start: "--start=(.*)",
63
- end: "--end=(.*)",
64
- inc: "--inc=(.*)"
65
- }
66
- end
67
- let(:int_arg_parser) {args_parser(int_args, name: "int parser", &:to_i)}
57
+ When we map the parser
58
+
59
+ ```ruby
60
+ let :int_args do
61
+ {
62
+ start: "--start=(.*)",
63
+ end: "--end=(.*)",
64
+ inc: "--inc=(.*)"
65
+ }
66
+ end
67
+ let(:int_arg_parser) {args_parser(int_args, name: "int parser", &:to_i)}
68
68
  ```
69
69
 
70
70
  Then we can convert the string valus
71
71
 
72
- ```ruby
73
- assert_parse_success(int_arg_parser, %w[--start=42 --end=44 --inc=2], ast: {start: 42, end: 44, inc: 2}, rest: [])
74
- ```
72
+ ```ruby
73
+ assert_parse_success(int_arg_parser, %w[--start=42 --end=44 --inc=2], ast: {start: 42, end: 44, inc: 2}, rest: [])
74
+ ```
75
75
 
76
76
  #### Context: Knowing When To Stop
77
77
 
78
- An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
79
- An exmaple for that is the posix argument `--`
78
+ An argument parser that respects itself provides a means to _end_ argument parsing even if more matches follow.
79
+ An exmaple for that is the posix argument `--`
80
80
 
81
- We can use whatever we want in `args_parser`, here is a variation:
81
+ We can use whatever we want in `args_parser`, here is a variation:
82
82
 
83
- Given the specification
83
+ Given the specification
84
84
 
85
- ```ruby
86
- let :args do
87
- {
85
+ ```ruby
86
+ let :args do
87
+ {
88
88
  width: "w:(\\d+)",
89
89
  height: "h:(\\d+)",
90
- __stop: "(::)"
90
+ __stop: "(::)"
91
91
  }
92
- end
93
- let(:wh_parser) {args_parser(args, stop: :__stop, &:to_i)}
92
+ end
93
+ let(:wh_parser) {args_parser(args, stop: :__stop, &:to_i)}
94
94
  ```
95
95
 
96
96
  Then parsing the following input
@@ -99,7 +99,7 @@ Then parsing the following input
99
99
  input = %w[h:42 w:73 :: w:74]
100
100
  ast = {height: 42, width: 73}
101
101
  assert_parse_success(wh_parser, input, ast:, rest: %w[w:74])
102
- ```
102
+ ```
103
103
 
104
104
  ### Context: User Interface
105
105
 
@@ -111,45 +111,205 @@ as module methods
111
111
 
112
112
  Given an _exposed_ `args_parser`
113
113
  ```ruby
114
- let :parser do
115
- L43Peg::Parsers.args_parser(
116
- {
117
- negative: "(-\\d+)",
118
- positive: "\\+?(\\d+)"
119
- },
120
- &:to_i
121
- )
122
- end
114
+ let :parser do
115
+ L43Peg::Parsers.args_parser(
116
+ {
117
+ negative: "(-\\d+)",
118
+ positive: "\\+?(\\d+)"
119
+ },
120
+ &:to_i
121
+ )
122
+ end
123
123
  ```
124
124
 
125
125
  But we are also not interested in the internal representation of success and failure of parsing which was
126
126
  used in the speculations above. Nor do we want to transform our input into the internal representations
127
127
  as was done above by the helpers. (If you need to see the details of this you can inspect the
128
- file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
129
-
128
+ file [`parser_test.rb` in `spec/support`](spec/support/parser_test.rb))
129
+
130
130
  Then we can uses the interface of `L43Peg`
131
131
 
132
132
  ```ruby
133
- L43Peg.parse_tokens(parser, %w[43 -44 +45]) => :ok, result
134
- expect(result).to eq(positive: [43, 45], negative: -44)
133
+ L43Peg.parse_tokens(parser, %w[43 -44 +45]) => :ok, result
134
+ expect(result).to eq(positive: [43, 45], negative: -44)
135
135
  ```
136
136
 
137
137
  And if we get an error the result is as follows
138
138
 
139
139
  ```ruby
140
- parser = L43Peg::Parsers.char_parser('a')
141
- L43Peg.parse_string(parser, 'b') => :error, message
142
- expect(message).to eq("char \"b\"")
140
+ parser = L43Peg::Parsers.char_parser('a')
141
+ L43Peg.parse_string(parser, 'b') => :error, message
142
+ expect(message).to eq("char \"b\"")
143
+ ```
144
+
145
+ ## Context: Regexp Parser
146
+
147
+ The basic concept is the `rgx_parser`
148
+
149
+ Given a `rgx_parser` for an identifier
150
+ ```ruby
151
+ include L43Peg::Parsers
152
+ let(:id_parser) { rgx_parser("[[:alpha:]][_[:alnum:]]*") }
153
+ ```
154
+
155
+ Then we can parse strings that start as such
156
+ ```ruby
157
+ assert_parse_success(id_parser, "l43_peg", ast: "l43_peg")
158
+ ```
159
+
160
+ And we can discard some input from the ast with the aid of captures
161
+ ```ruby
162
+ sym_parser = rgx_parser(":([[:alpha:]][_[:alnum:]]*)")
163
+ assert_parse_success(sym_parser, ":no_colon", ast: "no_colon")
164
+ ```
165
+
166
+ But it can also fail
167
+ ```ruby
168
+ reason = "input does not match /\\A[[:alpha:]][_[:alnum:]]*/ (in rgx_parser(\"[[:alpha:]][_[:alnum:]]*\"))"
169
+ assert_parse_failure(id_parser, "42", reason:)
170
+ ```
171
+
172
+ #### Context: Warnings on empty matches
173
+
174
+ Oftentimes bugs in PEG parsing are caused by zero width matches, while this is quite obvious with the `many` and
175
+ `opt` or `maybe` combinators (**N.B.** they are not yet implemented, use `many(max: 1)` instead)
176
+ and they common use patterns with these combinators are safe.
177
+
178
+ However regular expression parsing might hide zero width matches, and that's whey they will trigger a warning by default
179
+
180
+ Given an empty match rgex parser
181
+ ```ruby
182
+ let(:empty_parser) { rgx_parser("a*") }
183
+ ```
184
+
185
+ Then we get a warning when matching an empty string
186
+ ```ruby
187
+ expect { assert_parse_success(empty_parser, "", ast: "") }
188
+ .to output("Warning, parser rgx_parser(\"a*\") succeeds with empty match\n").to_stderr
189
+ ```
190
+
191
+ However this behavior can also be disabled
192
+
193
+ And therefore
194
+ ```ruby
195
+ parser = rgx_parser("a*", warn_on_empty: false)
196
+ expect { assert_parse_success(parser, "", ast: "") }
197
+ .not_to output.to_stderr
198
+ ```
199
+
200
+
201
+ ### Context: Tokenize Strings with Regexen
202
+
203
+ Now we can use a list of `rgx_parsers` to _tokenize_ a string (in the same way can use `tokens_parser` to
204
+ quantify elements of an array, but with dynamic bounds)
205
+
206
+ Given some regexen
207
+ ```ruby
208
+ let :regexen do
209
+ [
210
+ [:verb, "<<", nil, ->(*){ [:verb, "<"] }],
211
+ [:verb, "\\$(\\$)"],
212
+ [:color_and_style, "<(.+?),(.+?)>", :all],
213
+ [:color, "<(.+?)>", 1],
214
+ [:reset, "\\$"],
215
+ [:verb, "[^<$]+"],
216
+ ]
217
+ end
218
+ let(:tokenizer) { L43Peg::Combinators.rgx_tokenize(regexen) }
219
+ ```
220
+
221
+ Then we can tokenize some inputs
222
+ ```ruby
223
+ input = "<red,bold>HELLO$and<<<green>$$<reset>"
224
+ ast = [
225
+ [:color_and_style, ["<red,bold>", "red", "bold"]],
226
+ [:verb, "HELLO"],
227
+ [:reset, "$"],
228
+ [:verb, "and"],
229
+ [:verb, "<"],
230
+ [:color, "green"],
231
+ [:verb, "$"],
232
+ [:color, "reset"]
233
+ ]
234
+ assert_parse_success(tokenizer, input, ast:)
235
+ ```
236
+
237
+ ### Context: Debugging
238
+
239
+ As parsers are by design imbricated functions debugging is not always simple.
240
+ Enter the `debug_parser`, a parser that _debugs_ parsers by not changing their behavior
241
+ by displaying more or less detailed information
242
+
243
+ Given a parser
244
+ ```ruby
245
+ include L43Peg::Combinators
246
+ let :args do
247
+ {
248
+ lat: "lat:(\\d+)",
249
+ long: "long:(\\d+)",
250
+ }
251
+ end
252
+ let(:geo_parser) {args_parser(args, &:to_i)}
253
+ ```
254
+
255
+ #### Context: Minimum level of information
256
+
257
+ Given a minum debug parser
258
+ ```ruby
259
+ let(:debugger) {debug_parser(geo_parser, level: :min)}
260
+ ```
261
+
262
+ Then we will get some output
263
+ ```ruby
264
+ expected =
265
+ "Tokens<[\"lat:43\", \"long:2\"]>\nSuccess: @1\n"
266
+ expect { parsed_success(debugger, ["lat:43", "long:2"]) }
267
+ .to output(expected).to_stderr
268
+ ```
269
+
270
+ #### Context: Default level of information
271
+ Given a default debug parser
272
+ ```ruby
273
+ let(:debugger) {debug_parser(char_parser("a"))}
274
+ ```
275
+
276
+ Then we will get some output on errors
277
+ ```ruby
278
+ expected ="Input<\"b\"@1:1>\nFailure: char \"b\" @[1, 1]\n"
279
+ expect { parsed_failure(debugger, "b") }
280
+ .to output(expected).to_stderr
281
+ ```
282
+
283
+ #### Context: Maximum level of information
284
+ Given a maxium level parser
285
+ ```ruby
286
+ let(:max_debugger) { debug_parser(char_parser("b"), level: :max) }
287
+ ```
288
+
289
+ Then we will get this output on errors
290
+ ```ruby
291
+ expected =
292
+ [
293
+ "================================================================================",
294
+ 'Input<col:1 input:"bc" lnb:1 context:{}>',
295
+ "================================================================================",
296
+ 'Success<ast:"b" cache:{} rest:"c">',
297
+ "================================================================================",
298
+ ""
299
+ ].join("\n")
300
+
301
+ expect { parsed_success(max_debugger,"bc") }
302
+ .to output(expected).to_stderr
143
303
  ```
144
304
 
145
305
 
146
306
  # Author
147
307
 
148
- Copyright © 2024 Robert Dober
149
- robert.dober@gmail.com
308
+ Copyright © 2024 Robert Dober
309
+ robert.dober@gmail.com
150
310
 
151
311
  # LICENSE
152
312
 
153
- GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
313
+ GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 November 2007. Please refer to [LICENSE](LICENSE) for details.
154
314
 
155
- <!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
315
+ <!-- SPDX-License-Identifier: AGPL-3.0-or-later -->
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module L43Peg
4
+ module Combinators
5
+ module DebugParser extend self
6
+ MIN_LEVEL = 1
7
+ DEFAULT_LEVEL = 5
8
+ MAX_LEVEL = 10
9
+
10
+ def parse_with_debug(parser:, name:, level:)
11
+ level = _normalize_level(level)
12
+ -> (input, cache, name1=nil) do
13
+ _report_input(input, cache:, level:, name: name1||name)
14
+ case parser.(input, cache:)
15
+ in Success => success
16
+ _report_success(success, name: name1||name, level:)
17
+ in Failure => failure
18
+ _report_failure(failure, name: name1||name, level:)
19
+ end
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def _hl(level:)
26
+ _prt("="*80, level:, needed: 10)
27
+ end
28
+
29
+ def _normalize_level(level)
30
+ case level
31
+ when :min
32
+ MIN_LEVEL
33
+ when :max
34
+ MAX_LEVEL
35
+ when :default
36
+ DEFAULT_LEVEL
37
+ when MIN_LEVEL..MAX_LEVEL
38
+ level
39
+ else
40
+ raise ArgumentError, "bad level: #{level}, use a number between #{MIN_LEVEL} and #{MAX_LEVEL} or :min, :max, :default"
41
+ end
42
+ end
43
+
44
+ def _report_failure(failure, name:, level:)
45
+ _hl(level:)
46
+ _prt("Failure: #{failure.inspect}", level:, needed: MAX_LEVEL) ||
47
+ _prt("Failure: #{failure.reason} #{failure.position_hr}", level:, needed: DEFAULT_LEVEL) ||
48
+ _prt("Failure: #{failure.reason}")
49
+ failure
50
+ end
51
+
52
+ def _report_input(input, cache:, name:, level:)
53
+ _hl(level:)
54
+ _prt(input.debug, level:, needed: MAX_LEVEL) ||
55
+ _prt(input.head_hr(20, position: true), level:, needed: DEFAULT_LEVEL) ||
56
+ _prt(input.head_hr(5))
57
+ end
58
+
59
+ def _report_success(success, name:, level:)
60
+ _hl(level:)
61
+ _prt("#{success.debug}", level:, needed: MAX_LEVEL) ||
62
+ _prt("Success: #{success.ast.inspect} @#{success.position}", level:, needed: DEFAULT_LEVEL) ||
63
+ _prt("Success: @#{success.position}")
64
+ _hl(level:)
65
+
66
+ success
67
+ end
68
+
69
+ def _prt(message, level: MIN_LEVEL, needed: MIN_LEVEL, nl: true)
70
+ return unless level >= needed
71
+
72
+ $stderr.print(message)
73
+ $stderr.puts if nl
74
+ true
75
+ end
76
+ end
77
+ end
78
+ end
79
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../helper"
4
+ module L43Peg
5
+ module Combinators
6
+ module Sel extend self
7
+ include L43Peg::Helper
8
+
9
+ def sel(input:, cache:, name:, parsers:)
10
+ parsers.each do |parser|
11
+ case parser.(input, cache:)
12
+ in L43Peg::Success => success
13
+ return success
14
+ else
15
+ end
16
+ end
17
+
18
+ fail_parser("sel #{name} could not match any option", input:)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -20,10 +20,15 @@ module L43Peg
20
20
  map(inner, name:, fn: join_maps)
21
21
  end
22
22
 
23
+ def debug_parser(parser, name: nil, level: :default)
24
+ name ||= "debug_parser(#{parser.name})"
25
+ Parser.new(name, &DebugParser.parse_with_debug(parser:, level:, name:))
26
+ end
27
+
23
28
  def many(parser, name: nil, min: 0, max: nil)
24
29
  Parser.new(name || "many(#{parser.name})") {|input, cache, name1=nil| Many.many(input:, cache:, name: name1 || name, parser:, min:, max:)}
25
30
  end
26
-
31
+
27
32
  def map(parser, name: nil, fn: nil, &mapper)
28
33
  raise ArgumentError, "must not provide keyword parameyer fn and a block" if fn && mapper
29
34
  mapper = fn || mapper
@@ -31,6 +36,22 @@ module L43Peg
31
36
  Parser.new(name || "map(#{parser.name})") {|input, cache, name=nil| _map(input:, cache:, name:, parser:, mapper:)}
32
37
  end
33
38
 
39
+ def rgx_tokenize(rgx_specs, name: nil)
40
+ name = name || "rgx_tokenize(#{rgx_specs.inspect})"
41
+ many(sel_rgx_parser(rgx_specs, name:))
42
+ end
43
+
44
+ def sel(parsers, name: nil)
45
+ name ||= "sel(#{parsers.map(&:name).join(", ")})"
46
+ Parser.new(name) { |input, cache, name1=nil| Sel.sel(input:, cache:, name: name1 || name, parsers:) }
47
+ end
48
+
49
+ def sel_rgx_parser(rgx_specs, name: nil)
50
+ name = name || "sel_rgx_parser(#{rgx_specs.inspect})"
51
+ parsers = rgx_specs.map(&_mk_rgx_parser)
52
+ sel(parsers, name:)
53
+ end
54
+
34
55
  def seq(*parsers, name: nil)
35
56
  name ||= "seq(#{parsers.map(&:name).join(", ")})"
36
57
  Parser.new(name) {|input, cache, _name=nil| Seq.seq(input:, cache:, name:, parsers:)}
@@ -46,6 +67,14 @@ module L43Peg
46
67
  success.map(&mapper)
47
68
  end
48
69
  end
70
+
71
+ def _mk_rgx_parser
72
+ -> rgxspec do
73
+ name, rgx, capture, fn = rgxspec
74
+ fn ||= -> (captures) { [name, captures] }
75
+ map(Parsers.rgx_parser(rgx, capture:), &fn)
76
+ end
77
+ end
49
78
  end
50
79
  end
51
80
  # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -4,6 +4,8 @@ module L43Peg
4
4
  class Failure
5
5
  extend L43::OpenObject
6
6
  attributes cache: nil, input: nil, parsed_by: nil, position: [1, 1], reason: ""
7
+
8
+ def position_hr = "@#{position}"
7
9
  end
8
10
  end
9
11
  # SPDX-License-Identifier: Apache-2.0
data/lib/l43_peg/input.rb CHANGED
@@ -6,6 +6,12 @@ module L43Peg
6
6
 
7
7
  attributes col: 1, input: "", lnb: 1, context: {}
8
8
 
9
+ def debug
10
+ "Input<" +
11
+ ["col:#{col}", "input:#{input.inspect}", "lnb:#{lnb}", "context:#{context.inspect}"].join(" ") +
12
+ ">"
13
+ end
14
+
9
15
  def drop(by=nil)
10
16
  case by
11
17
  when nil
@@ -19,8 +25,18 @@ module L43Peg
19
25
 
20
26
  def empty? = input.empty?
21
27
 
28
+ def head_hr(n, position: false)
29
+ if position
30
+ "Input<#{input[0...n].inspect}@#{position_hr}>"
31
+ else
32
+ "Input<#{input[0...n].inspect}>"
33
+ end
34
+ end
35
+
22
36
  def position = [@col, @lnb]
23
37
 
38
+ def position_hr = [@col, @lnb].join(":")
39
+
24
40
  private
25
41
 
26
42
  def _drop_by_n(n)
@@ -5,7 +5,9 @@ module L43Peg
5
5
 
6
6
  attr_reader :fn, :name
7
7
 
8
- def call(input, cache: L43Peg::Cache.new) = fn.(input, cache, name)
8
+ def call(input, cache: L43Peg::Cache.new)
9
+ fn.(input, cache, name)
10
+ end
9
11
 
10
12
  private
11
13
 
@@ -9,10 +9,14 @@ module L43Peg
9
9
  def initialize(rgx, name: nil, **options)
10
10
  name = name || "rgx_parser(#{rgx.inspect})"
11
11
  rgx = _mk_rgx(rgx)
12
+ warn_on_empty = options.fetch(:warn_on_empty, true)
12
13
  super(name) do |input, cache, _name|
13
14
  case rgx.match(input.input)
14
15
  in MatchData => md
15
16
  ast = _from_match(md, options)
17
+ if ast.empty? && warn_on_empty
18
+ $stderr.puts("Warning, parser #{name} succeeds with empty match")
19
+ end
16
20
  L43Peg::Success.new(ast:, cache:, rest: input.drop(md[0]), position: input.position)
17
21
  else
18
22
  L43Peg::Failure.new(cache:, input:, parsed_by: self, reason: "input does not match #{rgx.inspect} (in #{name})")
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ # module L43Peg
4
+ # module Parsers
5
+ # class RgxTokenizer < L43Peg::Parser
6
+
7
+ # private
8
+
9
+ # def initialize(rgxen, name: nil, **options)
10
+ # name = name || "rgx_tokenizer(#{rgxen.inspect})"
11
+ # parsers = rgxen.map(&_mk_rgx_parser)
12
+ # Combinators.sel(parsers)
13
+ # end
14
+
15
+ # def _mk_rgx_parser
16
+ # -> rgxspec do
17
+ # name, rgx, capture = rgxspec
18
+ # Combinators.map(Parsers.rgx_parser(rgx, capture:)) { |captures| [name, captures] }
19
+ # end
20
+ # end
21
+ # end
22
+ # end
23
+ # end
24
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ # module L43Peg
4
+ # module Parsers
5
+ # class SelRgxParser < L43Peg::Parser
6
+
7
+ # private
8
+
9
+ # def initialize(rgxen, name: nil)
10
+ # name = name || "sel_rgx_parser(#{rgxen.inspect})"
11
+ # parsers = rgxen.map(&_mk_rgx_parser)
12
+ # Combinators.sel(parsers)
13
+ # end
14
+
15
+ # def _mk_rgx_parser
16
+ # -> rgxspec do
17
+ # name, rgx, capture, fn = rgxspec
18
+ # fn ||= (-> (captures) { [name, captures] })
19
+ # Combinators.map(Parsers.rgx_parser(rgx, capture:), &fn)
20
+ # end
21
+ # end
22
+ # end
23
+ # end
24
+ # end
25
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -7,11 +7,12 @@ module L43Peg
7
7
  module Parsers extend self
8
8
  def args_parser(args, name: nil, stop: nil, &blk) =
9
9
  L43Peg::Combinators.args_parser(args, name:, stop:, &blk)
10
- def char_parser(charset = nil) = L43Peg::Parsers::CharParser.new(charset)
11
- def end_parser = L43Peg::Parsers::EndParser.instance
12
- def failure_parser = L43Peg::Parsers::FailureParser.instance
13
- def int_parser = L43Peg::Parsers::IntParser.instance
14
- def rgx_parser(rgx, name: nil, **o) = L43Peg::Parsers::RgxParser.new(rgx, name:, **o)
10
+ def char_parser(charset = nil) = L43Peg::Parsers::CharParser.new(charset)
11
+ def end_parser = L43Peg::Parsers::EndParser.instance
12
+ def failure_parser = L43Peg::Parsers::FailureParser.instance
13
+ def int_parser = L43Peg::Parsers::IntParser.instance
14
+ def rgx_parser(rgx, name: nil, **o) = L43Peg::Parsers::RgxParser.new(rgx, name:, **o)
15
+ # def rgx_tokenizer(rgxn, name: nil, **o) = L43Peg::Parsers::RgxTokenizer.new(rgxn, name:, **o)
15
16
  def token_parser(spc, name: nil, **o) = L43Peg::Parsers::TokenParser.new(spc, name:, **o)
16
17
  def tokens_parser(map, name: nil, stop: nil, &b) =
17
18
  L43Peg::Parsers::TokensParser.new(map, name:, stop:, &b)
@@ -8,8 +8,15 @@ module L43Peg
8
8
  @position ||= _position
9
9
  end
10
10
 
11
+ def debug
12
+ "Success<" +
13
+ [
14
+ "ast:#{ast.inspect}", "cache:#{cache.cache.inspect}", "rest:#{rest.input.inspect}"
15
+ ].join(" ") + ">"
16
+ end
11
17
  def map(&mapper)
12
- self.class.new(ast: mapper.(ast), cache:, position:, rest:)
18
+ update_attribute(:ast, &mapper)
19
+ # self.class.new(ast: mapper.(ast), cache:, position:, rest:)
13
20
  end
14
21
 
15
22
  private
@@ -11,9 +11,23 @@ module L43Peg
11
11
  return self if empty?
12
12
  self.class.new(tokens: input.drop(by), context:, tnb: tnb + by)
13
13
  end
14
+
15
+ def debug
16
+ "Tokens<" +
17
+ ["tnb:#{tnb}", "tokens:#{tokens.inspect}", "context:#{context.inspect}"].join +
18
+ ">"
19
+ end
14
20
 
15
21
  def empty? = tokens.empty?
16
22
  def head = tokens.first
23
+ def head_hr(n, position: false)
24
+ if position
25
+ "Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]@#{position_hr}>"
26
+ else
27
+ "Tokens<[#{tokens[0..n].map(&:inspect).join(", ")}]>"
28
+ end
29
+ end
30
+
17
31
  def input = tokens
18
32
 
19
33
  def match(str_or_rgx, option)
@@ -28,6 +42,7 @@ module L43Peg
28
42
  end
29
43
 
30
44
  def position = tnb
45
+ def position_hr = tnb.to_s
31
46
 
32
47
  private
33
48
 
data/lib/l43_peg.rb CHANGED
@@ -6,7 +6,7 @@ require_relative 'l43/require_helper'
6
6
  require_subdir {}
7
7
 
8
8
  module L43Peg extend self
9
- VERSION = "0.1.7"
9
+ VERSION = "0.1.8"
10
10
 
11
11
  def parse_string(parser, input, lnb: 1, col: 1, context: {})
12
12
  input = Input.new(input:, col:, lnb:, context:)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: l43_peg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Dober
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-04-27 00:00:00.000000000 Z
11
+ date: 2024-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: l43_open_object
@@ -40,7 +40,9 @@ files:
40
40
  - lib/l43_peg.rb
41
41
  - lib/l43_peg/cache.rb
42
42
  - lib/l43_peg/combinators.rb
43
+ - lib/l43_peg/combinators/debug_parser.rb
43
44
  - lib/l43_peg/combinators/many.rb
45
+ - lib/l43_peg/combinators/sel.rb
44
46
  - lib/l43_peg/combinators/seq.rb
45
47
  - lib/l43_peg/failure.rb
46
48
  - lib/l43_peg/helper.rb
@@ -53,6 +55,8 @@ files:
53
55
  - lib/l43_peg/parsers/failure_parser.rb
54
56
  - lib/l43_peg/parsers/int_parser.rb
55
57
  - lib/l43_peg/parsers/rgx_parser.rb
58
+ - lib/l43_peg/parsers/rgx_tokenizer.rb
59
+ - lib/l43_peg/parsers/sel_rgx_parser.rb
56
60
  - lib/l43_peg/parsers/token_parser.rb
57
61
  - lib/l43_peg/parsers/tokens_parser.rb
58
62
  - lib/l43_peg/parsers/verb_parser.rb