@sap/cds-compiler 5.1.2 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +58 -0
  2. package/bin/cdsc.js +7 -2
  3. package/bin/cdshi.js +24 -17
  4. package/bin/cdsse.js +17 -18
  5. package/doc/CHANGELOG_BETA.md +9 -4
  6. package/lib/api/main.js +19 -2
  7. package/lib/api/options.js +4 -1
  8. package/lib/api/validate.js +5 -0
  9. package/lib/base/builtins.js +1 -0
  10. package/lib/base/message-registry.js +40 -3
  11. package/lib/base/messages.js +1 -1
  12. package/lib/base/model.js +0 -11
  13. package/lib/checks/actionsFunctions.js +0 -12
  14. package/lib/checks/structuredAnnoExpressions.js +10 -14
  15. package/lib/compiler/assert-consistency.js +21 -13
  16. package/lib/compiler/builtins.js +2 -2
  17. package/lib/compiler/checks.js +25 -6
  18. package/lib/compiler/define.js +27 -31
  19. package/lib/compiler/extend.js +16 -18
  20. package/lib/compiler/generate.js +3 -3
  21. package/lib/compiler/populate.js +22 -16
  22. package/lib/compiler/propagator.js +3 -2
  23. package/lib/compiler/resolve.js +87 -94
  24. package/lib/compiler/shared.js +12 -13
  25. package/lib/compiler/tweak-assocs.js +390 -86
  26. package/lib/compiler/utils.js +41 -33
  27. package/lib/compiler/xpr-rewrite.js +45 -58
  28. package/lib/edm/annotations/genericTranslation.js +17 -13
  29. package/lib/edm/csn2edm.js +28 -4
  30. package/lib/edm/edm.js +68 -28
  31. package/lib/edm/edmInboundChecks.js +5 -8
  32. package/lib/edm/edmPreprocessor.js +66 -40
  33. package/lib/edm/edmUtils.js +1 -1
  34. package/lib/gen/BaseParser.js +778 -0
  35. package/lib/gen/CdlParser.js +4477 -0
  36. package/lib/gen/language.checksum +1 -1
  37. package/lib/gen/language.interp +1 -1
  38. package/lib/gen/languageParser.js +4072 -4024
  39. package/lib/inspect/inspectPropagation.js +1 -1
  40. package/lib/json/from-csn.js +5 -3
  41. package/lib/json/to-csn.js +7 -10
  42. package/lib/language/antlrParser.js +96 -0
  43. package/lib/language/errorStrategy.js +1 -1
  44. package/lib/language/genericAntlrParser.js +32 -4
  45. package/lib/language/multiLineStringParser.js +1 -1
  46. package/lib/main.d.ts +23 -0
  47. package/lib/model/cloneCsn.js +22 -13
  48. package/lib/model/csnUtils.js +2 -0
  49. package/lib/model/revealInternalProperties.js +2 -0
  50. package/lib/modelCompare/utils/filter.js +70 -42
  51. package/lib/optionProcessor.js +16 -10
  52. package/lib/parsers/AstBuildingParser.js +1290 -0
  53. package/lib/parsers/CdlGrammar.g4 +2013 -0
  54. package/lib/parsers/Lexer.js +249 -0
  55. package/lib/render/toCdl.js +46 -45
  56. package/lib/render/toSql.js +5 -5
  57. package/lib/transform/addTenantFields.js +4 -4
  58. package/lib/transform/db/applyTransformations.js +54 -16
  59. package/lib/transform/draft/odata.js +10 -11
  60. package/lib/transform/effective/flattening.js +10 -14
  61. package/lib/transform/forRelationalDB.js +7 -6
  62. package/lib/transform/odata/flattening.js +42 -31
  63. package/lib/transform/odata/toFinalBaseType.js +7 -6
  64. package/lib/transform/universalCsn/universalCsnEnricher.js +1 -0
  65. package/lib/utils/moduleResolve.js +1 -1
  66. package/package.json +2 -2
  67. package/share/messages/redirected-to-ambiguous.md +5 -4
  68. package/share/messages/redirected-to-complex.md +6 -3
@@ -0,0 +1,778 @@
1
+ // Base class for generated parser, for redepage v0.1.12
2
+
3
+ 'use strict';
4
+
5
+ class BaseParser {
6
+ constructor( lexer, keywords, table ) {
7
+ this.keywords = keywords;
8
+ this.table = compileTable( table );
9
+ this.lexer = lexer;
10
+ this.tokens = undefined;
11
+ this.eofIndex = undefined;
12
+ this.tokenIdx = 0;
13
+ this.conditionTokenIdx = -1;
14
+ this.fixKeywordTokenIdx = -1;
15
+ this.conditionStackLength = -1;
16
+ this.nextTokenAsId = false;
17
+
18
+ this.s = null;
19
+ this.errorState = null;
20
+ this.stack = []; // [{ ruleState, followState, tokenIdx }]
21
+ this.dynamic_ = {};
22
+ this.prec_ = null;
23
+ this.$hasErrors = null;
24
+ // trace:
25
+ this.trace = [ -1 ];
26
+ }
27
+
28
+ init() {
29
+ this.lexer.tokenize( this );
30
+ this.eofIndex = this.tokens.length - 1;
31
+ return this;
32
+ }
33
+
34
+ // methods for actions --------------------------------------------------------
35
+
36
+ la() { // lookahead: complete token
37
+ return this.tokens[this.tokenIdx];
38
+ }
39
+ lb() { // look back: complete token
40
+ return this.tokens[this.tokenIdx - 1];
41
+ }
42
+ lr() { // return the first token matched by current rule
43
+ return this.tokens[this.stack[this.stack.length - 1].tokenIdx];
44
+ }
45
+
46
+ // lookahead, error: ----------------------------------------------------------
47
+
48
+ l() { // lookahead: token type
49
+ return this.tokens[this.tokenIdx].type;
50
+ }
51
+
52
+ // instead of l() if keyword (reserved and/or unreserved) is in one of the cases
53
+ lk() { // keyword lookahead
54
+ const la = this.tokens[this.tokenIdx];
55
+ if (!this.nextTokenAsId)
56
+ return la.keyword || la.type;
57
+ this.nextTokenAsId = false;
58
+ return la.type;
59
+ }
60
+
61
+ e() { // error: report and recover
62
+ const la = this.tokens[this.tokenIdx];
63
+ const expecting = this._expecting();
64
+ if (this.trace.length > 1)
65
+ this._trace( 'detected parsing error,' );
66
+ this.reportUnexpectedToken_( la );
67
+ la.parsedAs = 0;
68
+
69
+ if (this.conditionTokenIdx === this.tokenIdx &&
70
+ this.conditionStackLength === this.stack.length &&
71
+ (la.keyword && expecting[la.keyword] || expecting[la.type])) {
72
+ // called with/after gc()/gp(), and the token would actually match
73
+ const { tokenIdx, ruleState } = this.stack.at( -1 );
74
+ this.s = (this.tokenIdx > tokenIdx) ? this.errorState : ruleState;
75
+ return false; // error recovery: ignore condition/precedence
76
+ }
77
+
78
+ if (this.tokenIdx >= this.eofIndex)
79
+ return this._stopParsing( this.stack.length );
80
+ // TODO: also sync to what comes next in current rule, at least after rule call,
81
+ // this way we do not have to do the check of g(0) in re() as we did before 2023-12-07
82
+ // (not sure yet whether to make it part of recoverInline or recoverPanicMode),
83
+ if (!this._recoverInline( expecting ))
84
+ this._recoverPanicMode();
85
+ return false;
86
+ }
87
+
88
+ // instead of e() in default if lk() had been used and 'Id' is in a non-default case
89
+ ei() { // error (after trying to test again as identifier)
90
+ if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
91
+ return this.e();
92
+ this.nextTokenAsId = true;
93
+ return false; // do not execute action after it
94
+ }
95
+
96
+ // goto state: ----------------------------------------------------------------
97
+
98
+ // go to end of the rule, in tracing parser: g(0)
99
+ gr( follow ) { // intersection follow set for fast exit
100
+ if (this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx)
101
+ return this.e(); // match at least one token
102
+ this.s = 0;
103
+ // TODO: also have recursive flag in stack: was rule was called recursively?
104
+ // extra val 'gr' when rule was called when it could reach the rule end
105
+ const { type: lt, keyword: lk } = this.tokens[this.tokenIdx];
106
+ if (lk && // Id also for unreserved, except after condition failure
107
+ follow?.[0] === 'Id' && this.keywords[lk] !== false &&
108
+ this.fixKeywordTokenIdx !== this.tokenIdx ||
109
+ follow?.includes( lk || lt )) {
110
+ this._tracePush( [ 'E', true ] );
111
+ return true;
112
+ }
113
+ this._tracePush( [ 'E', 0 ] );
114
+ // TODO: caching
115
+ const { dynamic_ } = this;
116
+ let match;
117
+ let depth = this.stack.length;
118
+ while (match == null && --depth) {
119
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
120
+ const { followState } = this.stack[depth];
121
+ match = this._pred_next( followState, lt, lk, 'E' );
122
+ this._traceSubPush( match ?? 0 );
123
+ }
124
+ this.dynamic_ = dynamic_;
125
+ // If the parser reaches this point with match = null, even the top-level rule
126
+ // does not have a required token (typically `EOF`) at the end → the parser
127
+ // must accept any token → rule exit possible (but no output '✔' in trace).
128
+ return (match ?? true) || this.e();
129
+ }
130
+
131
+ // go to state; non-tracing parser: `this.s=‹state›` or `this.gr()`
132
+ g( state, follow ) {
133
+ if (!(state == null ? this.e() : state || this.gr( follow )))
134
+ return false;
135
+ this.s = state; // is just `this.s=‹state›` in non-trace parser
136
+ this._tracePush( this.s );
137
+ return true;
138
+ }
139
+
140
+ // instead of gi() for `Id_all`
141
+ giA( state, follow ) { // go to state (after trying to test again as identifier)
142
+ if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
143
+ return this.g( state, follow );
144
+ this.nextTokenAsId = true;
145
+ return false; // do not execute action after it
146
+ }
147
+
148
+ // instead of g() in default if lk() had been used and 'Id' is in a non-default case
149
+ gi( state, follow ) { // go to state (after trying to test again as identifier)
150
+ const lk = this.tokens[this.tokenIdx].keyword;
151
+ // As opposed to ei(), we also check for reserved keywords here; this way, we
152
+ // do not have to add reserved keywords from the follow-set to the `switch`.
153
+ if (!lk || this.keywords[lk] === false) // TODO: consider fixKeywordTokenIdx ?
154
+ return this.g( state, follow );
155
+ this.nextTokenAsId = true;
156
+ return false; // do not execute action after it
157
+ }
158
+
159
+ // instead of gi() at rule end (RuleEnd_ in follow-set) for `Id_restricted`
160
+ giR( state, follow ) { // go to state (after trying to test again as identifier)
161
+ const lk = this.tokens[this.tokenIdx].keyword;
162
+ if (!lk || this.keywords[lk] === false || this._keyword_after_rule( lk ))
163
+ return this.g( state, follow );
164
+ this.nextTokenAsId = true;
165
+ return false; // do not execute action after it
166
+ }
167
+
168
+ // instead of g() in a non-default case if there is a LL1 conflict
169
+ gP( state ) { // goto state with standard weak-conflict prediction
170
+ return this.lP() && this.g( state );
171
+ }
172
+
173
+ // match and consume token: ---------------------------------------------------
174
+
175
+ m( state, token ) { // match token = compare and consume
176
+ return (this.tokens[this.tokenIdx].type === token)
177
+ ? this.c( state )
178
+ : this.e();
179
+ }
180
+
181
+ // instead of m() for identifiers via `Id` or `Id_restricted`
182
+ mi( state, ident = true ) { // match identifier token
183
+ return (this.tokens[this.tokenIdx].type === 'Id')
184
+ ? this.ci( state, ident )
185
+ : this.e();
186
+ }
187
+
188
+ // instead of mi() for `Id_all`
189
+ miA( state, ident = true ) { // match identifier token
190
+ return (this.tokens[this.tokenIdx].type === 'Id')
191
+ ? this.ciA( state, ident )
192
+ : this.e();
193
+ }
194
+
195
+ // instead of m() for reserved keywords or unreserved without conflict:
196
+ mk( state, token ) { // match keyword token
197
+ return (this.tokens[this.tokenIdx].keyword === token)
198
+ ? this.ck( state )
199
+ : this.e();
200
+ }
201
+
202
+ c( state, parsedAs = 'token' ) { // consume token
203
+ const la = this.tokens[this.tokenIdx];
204
+ la.parsedAs = parsedAs;
205
+ if (this.tokenIdx < this.eofIndex) ++this.tokenIdx;
206
+ // TODO: handle identifier-including-reserved-words later (e.g. for id after a `.`)
207
+ this.s = state;
208
+ this.errorState = state;
209
+ if (this.constructor.tracingParser)
210
+ this._trace( `consume ${ tokenFullName( la, ' as ' ) },`, la );
211
+ return true;
212
+ }
213
+
214
+ // instead of c() for identifiers, used both with l() and lk()
215
+ ci( state, ident = 'ident' ) { // consume identifier token
216
+ const la = this.tokens[this.tokenIdx];
217
+ if (this.keywords[la.keyword] === false)
218
+ this.reportReservedWord_( la );
219
+ // with error recovery: use that (consider this having a good score)
220
+ return this.c( state, ident )
221
+ }
222
+
223
+ // instead of ci() for `Id_all`, used both with l() and lk()
224
+ ciA( state, ident = 'ident' ) { // consume identifier token, the "All" variant
225
+ return this.c( state, ident )
226
+ }
227
+
228
+ // instead of c() for reserved or unreserved without conflict, requires lk()
229
+ ck( state ) { // consume keyword token
230
+ return this.c( state, 'keyword' )
231
+ }
232
+
233
+ // instead of ck() if there is a LL1 conflict
234
+ ckP( state, first2 ) { // consume unreserved keyword with weak conflict
235
+ return this.lP( first2 ) && this.ck( state );
236
+ }
237
+
238
+ // for parser token or token set via `/`
239
+ ckA( state ) {
240
+ // if it really should be considered an Id, `set this.la().parsedAs` yourself
241
+ return this.c( state, (this.l() === 'Id' ? 'keyword' : 'token') );
242
+ }
243
+
244
+ skipToken_() {
245
+ ++this.tokenIdx;
246
+ }
247
+
248
+ // condition and precedence handling ------------------------------------------
249
+
250
+ // state must match the goto-state of the default (there must be no default
251
+ // action), or null for error, lP() must have been used before. There is no
252
+ // “or Id” behavior other than via gpP()
253
+
254
+ // “go if user condition fails”
255
+ gc( state, cond ) {
256
+ if (this.conditionTokenIdx === this.tokenIdx &&
257
+ this.conditionStackLength === this.stack.length) {
258
+ this._tracePush( [ 'C' ] );
259
+ return true; // error recovery: ignore condition
260
+ }
261
+ this.conditionTokenIdx = this.tokenIdx;
262
+ this.conditionStackLength = this.stack.length;
263
+ // TODO: let this[cond]( true ) return recovery badness in error case
264
+ const fail = !this[cond]( true );
265
+ if (this.constructor.tracingParser)
266
+ this._tracePush( [ 'C', cond, !fail ] );
267
+ // TODO TOOL: in this case, the default case must not have actions (tool must
268
+ // add state if it does)
269
+ if (fail) { // TODO: extra gcK() method instead of check below
270
+ // TODO: extra method necessary for academic case
271
+ // ( 'unreserved' 'foo' | <cond> Id 'bar' )` with input `unreserved bar`
272
+ const { keyword } = this.la();
273
+ if (keyword && this.table[keyword])
274
+ this.fixKeywordTokenIdx = this.tokenIdx;
275
+ }
276
+ return !fail || this.g( state ) && false;
277
+ }
278
+
279
+ ec( cond ) {
280
+ return this.gc( null, cond );
281
+ }
282
+
283
+ // “go if precedence condition fails”
284
+ gp( state, prec, mode ) {
285
+ if (this.conditionTokenIdx === this.tokenIdx &&
286
+ this.conditionStackLength === this.stack.length) {
287
+ this._tracePush( [ 'C' ] );
288
+ return true; // error recovery: ignore condition
289
+ }
290
+ this.conditionTokenIdx = this.tokenIdx;
291
+ this.conditionStackLength = this.stack.length;
292
+ const parentPrec = this.stack.at( -1 ).prec ?? -Infinity;
293
+ const fail = prec <= parentPrec ||
294
+ this.prec_ != null && // previous op parsed by current rule
295
+ // <…,postfix> || <…,assoc=none>, <…,postfix=once>:
296
+ (mode === 'post' && prec > this.prec_ || mode === 'none' && prec >= this.prec_);
297
+ if (this.constructor.tracingParser) {
298
+ const pp = (parentPrec === -Infinity) ? '-∞' : parentPrec;
299
+ const tp = (this.prec_ == null) ? '∞' : this.prec_;
300
+ const suffix = mode === 'post' && `≤${ tp }` || mode === 'none' && `<${ tp }`;
301
+ this._tracePush( [ 'C', `${ pp }<${ prec }${ suffix || '' }`, !fail ] );
302
+ }
303
+ if (fail) { // TODO: extra gcK() method instead of check below
304
+ // TODO: extra method necessary for academic case
305
+ // ( 'unreserved' 'foo' | <cond> Id 'bar' )` with input `unreserved bar`
306
+ const { keyword } = this.la();
307
+ if (keyword && this.table[this.s][keyword])
308
+ this.fixKeywordTokenIdx = this.tokenIdx;
309
+ return this.g( state ) && false; // TODO: reset this.prec_ ?
310
+ }
311
+ this.prec_ = (mode === 'right') ? prec - 1 : prec; // -1: <…,assoc=right>, <…,prefix>
312
+ return true;
313
+ }
314
+
315
+ ep( prec, mode ) {
316
+ return this.gp( null, prec, mode );
317
+ }
318
+
319
+ // rule start, end and call: --------------------------------------------------
320
+
321
+ rule_( state, followState = -1 ) { // start rule
322
+ this.stack.push( {
323
+ ruleState: state,
324
+ followState,
325
+ tokenIdx: this.tokenIdx,
326
+ prec: this.prec_,
327
+ } );
328
+ this.dynamic_ = Object.create( this.dynamic_ );
329
+ this.s = state;
330
+ this.prec_ = null;
331
+ this.conditionTokenIdx = -1;
332
+ this.errorState ??= state;
333
+ this._trace( [ state, 'call rule', '', ' at alt start', -1 ] );
334
+ }
335
+
336
+ exit_( rulePrecMethod ) { // exit rule
337
+ if (this.s)
338
+ throw Error( `this.s === ${ this.s } // illegally set by action, or runtime/generator bug` );
339
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
340
+ const caller = this.stack.pop();
341
+ this.s = caller.followState;
342
+ this.prec_ = (rulePrecMethod) ? this[rulePrecMethod]( caller ) : caller.prec;
343
+ this._trace( [ caller.ruleState, 'exit rule', '', '', 1 ] );
344
+ //if (this.errorState == 0 || this.s != null)
345
+ this.errorState = this.s;
346
+ // execute actions if not in error recovery (pass-through) and at least one
347
+ // token has been matched in rule:
348
+ return this.s != null && this.tokenIdx > caller.tokenIdx;
349
+ }
350
+
351
+ // predicate used before rule call if with LL(1) conflict, 'Id' in other case
352
+ lP( first2 ) { // only start rule if this predicate returns true
353
+ // nothing to check if not a non-reserved keyword:
354
+ const { keyword: lk1 } = this.tokens[this.tokenIdx];
355
+ if (!lk1 || !this.keywords[lk1])
356
+ return true;
357
+
358
+ const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
359
+ // Argument first2 is just a performance hint with ckP():
360
+ if (lk2 && first2?.[0] === 'Id' && this.keywords[lk2] !== false ||
361
+ first2?.includes( lk2 || lt2 )) {
362
+ this._tracePush( [ 'P', true ] );
363
+ return true;
364
+ }
365
+ this._tracePush( [ 'P' ] );
366
+ // now check it dynamically:
367
+ let cmd = this.table[this.s][lk1];
368
+ if (cmd[2] !== 1)
369
+ throw Error( `Unexpected command '${ cmd?.[0] }' without prediction at state ${ this.s } for ‘${ lk1 }’` );
370
+
371
+ // if not the keyword match, the command is “goto” or “rule call”
372
+ const nextState = (cmd[0] === 'ck') ? cmd[1] : this._pred_keyword( cmd[1], lk1 );
373
+
374
+ ++this.tokenIdx; // for user lookahead fns and conditions
375
+ const match = this._pred_next( nextState, lt2, lk2, 'P' );
376
+ --this.tokenIdx;
377
+
378
+ const r = match ?? true;
379
+ if (match == null)
380
+ this._traceSubPush( 0 );
381
+ if (lt2 === 'IllegalToken')
382
+ return true
383
+ // TODO: instead of this IllegalToken test, implement a “confirm unreserved
384
+ // keyword as Id” prediction which tests whether the token after the then-Id
385
+ // matches.
386
+ this._traceSubPush( r );
387
+ if (!r)
388
+ this.nextTokenAsId = true;
389
+ return r;
390
+ }
391
+
392
+ // Now the helper methods =====================================================
393
+
394
+ // Standard weak-conflict predicate -------------------------------------------
395
+
396
+ _pred_keyword( state, keyword ) {
397
+ // returns state after matching the first token as keyword, for lP()
398
+ while (state) {
399
+ this._traceSubPush( state );
400
+ let cmd = this.table[state];
401
+ if (!Array.isArray( cmd ))
402
+ cmd = cmd[keyword] || cmd.Id || cmd[''];
403
+ switch (cmd[0]) {
404
+ case 'ck': case 'mk':
405
+ return cmd[1]; // state after token consumption
406
+ case 'g': // TODO: another rule call?
407
+ break;
408
+ default:
409
+ if (typeof cmd[0] !== 'number')
410
+ throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
411
+ }
412
+ state = cmd[1];
413
+ }
414
+ // reached end of rule without having consumed a token
415
+ throw Error( 'Not supported: option for unreserved keywords in follow set' );
416
+ }
417
+
418
+ _pred_next( state, type, keyword, mode ) {
419
+ let hasEnteredRule = false;
420
+ while (state) {
421
+ this._traceSubPush( state );
422
+ let cmd = this.table[state];
423
+ if (!Array.isArray( cmd )) {
424
+ const lookahead = cmd[' lookahead'];
425
+ cmd = lookahead
426
+ ? cmd[this[lookahead]( mode )] || cmd['']
427
+ : keyword && cmd[keyword] || cmd[type] || cmd[''];
428
+ }
429
+ switch (cmd[0]) {
430
+ case 'c': case 'ck': case 'ciA': case 'ckA': // TODO: re-check ckA
431
+ return true;
432
+ case 'ci':
433
+ if (!keyword ||
434
+ this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx)
435
+ return true;
436
+ cmd = this.table[state]['']; // is currently always 'g' or 'e'
437
+ break;
438
+ case 'm':
439
+ return type === cmd[2];
440
+ case 'mi':
441
+ return type === 'Id' &&
442
+ (!keyword ||
443
+ this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx);
444
+ case 'miA':
445
+ return type === 'Id';
446
+ case 'mk':
447
+ return keyword === cmd[2];
448
+ case 'g': case 'e':
449
+ break;
450
+ default:
451
+ if (typeof cmd[0] !== 'number')
452
+ throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
453
+ // If the parser enters a rule, reaching the rule end (can happen with
454
+ // option `minTokensMatched`) means "no match".
455
+ hasEnteredRule = true;
456
+ // If we want to support conditions before matching the first token in a
457
+ // rule, we would have to handle `this.stack` and `this.dynamically_`.
458
+ }
459
+ // We could optimize with rule call - only 'Id' must be further investigated
460
+ state = cmd[1];
461
+ }
462
+ // If invalid state, the second token does not match, e.g. for `VIRTUAL +`
463
+ // or `VIRTUAL §` (with IllegalToken):
464
+ if (state == null)
465
+ return false;
466
+
467
+ // Otherwise, the parser could end the rule after having matched the keyword
468
+ // with prediction. TODO: as we do not look behind the current rule for the
469
+ // prediction, the tool can normally omit the prediction (and output a
470
+ // message), no so with `ruleStartingWithUnreserved`. We will rather look
471
+ // behind the current rule _after_ having decided that the token is to be
472
+ // matched as identifier.
473
+ return !hasEnteredRule && null; // let caller decide how to interpret this
474
+ }
475
+
476
+ _keyword_after_rule( keyword ) {
477
+ // TODO: this is a slow implementation - do dedicated traversal later
478
+ // It is used in giR() only and this is currently used just once.
479
+ // TODO: using mode = 'R' and tracing R(…)
480
+ // TODO: investigate why this was not written before adding
481
+ // `<default=fallback>` in rule `fromRefWithOptAlias`.
482
+ return this._expecting()[keyword];
483
+ }
484
+
485
+ // Set of expected tokens: for error reporting and recovery -------------------
486
+
487
+ // Calculate array of expected tokens
488
+ _expecting( errorState, length ) {
489
+ // Remark: rules must not have been exited too early, see _expecting call in re()
490
+ const stack = this.stack.slice( 0, length || this.stack.length );
491
+ // Immediately exit rules when no tokens have yet been consumed:
492
+ let caller = stack.at( -1 );
493
+ while (stack.length && this.tokenIdx === caller.tokenIdx) {
494
+ --stack.length;
495
+ caller = stack.at( -1 );
496
+ }
497
+ // Now calculate dictionary of expected tokens:
498
+ const expecting = Object.create(null);
499
+ let state = errorState ?? this.errorState;
500
+ // At potential rule end, we must add follow sets of outer rules
501
+ // TODO: we also need to unravel this.dynamic_ for translateParserToken_()
502
+ while ((!state || this._exp_collect( expecting, this.table[state] )) && stack.length)
503
+ state = stack.pop().followState;
504
+ return expecting;
505
+ }
506
+
507
+ // TODO: use iterative alg, no recursive call, return state instead -----------
508
+ // Add expected tokens to dictionary `expecting` starting at command `cmd`.
509
+ // Return true if the rule end is reached, i.e. we also need to add the expected
510
+ // tokens at the follow state of the current rule. Argument `prop` is the token
511
+ // name for `cmd` in a decision.
512
+ _exp_collect( expecting, cmd, prop ) {
513
+ if (prop != null)
514
+ cmd = cmd[prop];
515
+ if (!Array.isArray( cmd )) {
516
+ let reachedRuleEnd = false;
517
+ for (const tok in cmd) {
518
+ if (Object.hasOwn( cmd, tok ) && tok.charAt(0) !== ' ' &&
519
+ this._exp_collect( expecting, cmd, tok ))
520
+ reachedRuleEnd = true;
521
+ }
522
+ return reachedRuleEnd;
523
+ }
524
+ switch (cmd[0]) {
525
+ case 'c': case 'ck':
526
+ expecting[prop] = true;
527
+ return false;
528
+ case 'ckA':
529
+ for (const tok of this.translateParserToken_( prop ))
530
+ expecting[tok] = true;
531
+ return false;
532
+ case 'm': case 'mk':
533
+ expecting[cmd[2]] = true;
534
+ return false;
535
+ case 'ci': case 'ciA': case 'mi': case 'miA':
536
+ expecting['Id'] = true;
537
+ return false;
538
+ case 'g': case 'gi':
539
+ if (!cmd[1])
540
+ return cmd[1] === 0;
541
+ //(this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx);
542
+ // TODO: add some assertion in generation that a decision has no two `g`s
543
+ // to the same state (both in cases and default)
544
+ // UPDATE: no, there will be at least gP()s
545
+ // TOOD: do properly for (...)+ - currently, the token for directly
546
+ // exiting the rule is also collected
547
+ return this._exp_collect( expecting, this.table[cmd[1]] );
548
+ default:
549
+ // a called rule must match at least one token → after having called a
550
+ // rule, do not collect expecting tokens after exiting the rule
551
+ if (typeof cmd[0] === 'number')
552
+ this._exp_collect( expecting, this.table[cmd[1]] );
553
+ return false;
554
+ }
555
+ }
556
+
557
+ translateParserToken_( token ) {
558
+ return [ token ];
559
+ }
560
+
561
+ // Error recovery -------------------------------------------------------------
562
+
563
+ _recoverInline( expecting ) {
564
+ const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
565
+ if (!(lk2 && expecting[lk2] || expecting[lt2]))
566
+ return false;
567
+
568
+ // Immediately exit rules (except start) when no tokens have yet been consumed:
569
+ let { length } = this.stack;
570
+ while (--length > 0) {
571
+ const caller = this.stack[length];
572
+ // matched tokens in rule: found rule
573
+ if (this.tokenIdx > caller.tokenIdx)
574
+ break;
575
+ caller.followState = null;
576
+ }
577
+
578
+ if (++length < this.stack.length) {
579
+ this.s = null;
580
+ this.stack[length].followState = this.errorState;
581
+ // assume the erroneous token to be skipped before having called the rule:
582
+ ++this.stack[length].tokenIdx
583
+ this.errorState = null;
584
+ }
585
+ else { // no rule to leave immediately
586
+ this.s = this.errorState;
587
+ }
588
+
589
+ this.skipToken_();
590
+ if (this.constructor.tracingParser)
591
+ this._trace( [ this.stack[length - 1].ruleState, 'recover inside rule' ] );
592
+ return true; // to be re-checked with actions
593
+ }
594
+
595
+ _recoverPanicMode() {
596
+ const { length } = this.stack;
597
+ // Panic mode: resume at token in then-expecting set:
598
+ const followSets = { EOF: 0 };
599
+ for (let idx = 0; idx < length; ++idx) {
600
+ const caller = this.stack[idx];
601
+ const exp = this._expecting( caller.followState, length );
602
+ for (const t of Object.keys( exp )) {
603
+ // no sync to 'Id' - TODO: provide grammar and rule options
604
+ if (t !== 'Id') // TODO: see below
605
+ followSets[t] = idx;
606
+ }
607
+ }
608
+ const tokenIdx = this.tokenIdx;
609
+ // console.log( this.la().location.toString(), followSets )
610
+ while (this.tokenIdx <= this.eofIndex) {
611
+ // TODO: exclude reserved words for test with this.l()
612
+ const depth = followSets[this.lk()] || followSets[this.l()];
613
+ // TODO: handle Id here
614
+ if (depth != null)
615
+ return this._error_panic( depth, length, tokenIdx );
616
+ this.skipToken_();
617
+ }
618
+ throw Error( 'EOF was added...' );
619
+ }
620
+
621
+ _error_panic( low, high, tokenIdx ) {
622
+ this.s = null; // mark current rule for exit
623
+ if (this.constructor.tracingParser) {
624
+ this._trace( this.stack.length - 1 > low
625
+ ? `recover by exiting ${ this.stack.length - low} rules prematurely,`
626
+ : 'recover by exiting current rule prematurely,' );
627
+ }
628
+ // eventually mark outer rules for exit:
629
+ // TODO: re-check for rule calls which are at the optional rule end:
630
+ // x: 'x not'; b: 'b'? x {console.log('x→b')} 'b'?; a: b {console.log('b→a')} 'a'
631
+ // with start rule `a` and input `x a`: output should be x→b + b→a
632
+ // with start rule `a` and input `b a`: output should be b→a
633
+ //
634
+ // → the rule is: if a rule can continue at the specified state and has
635
+ // matched at least one token, then its action is executed, otherwise not
636
+ for (let idx = low + 1; idx < high; ++idx) {
637
+ this.stack[idx].followState = null;
638
+ }
639
+ const resume = this.stack[low];
640
+ if (tokenIdx === resume.tokenIdx) // no tokens matched other than those by skipping
641
+ resume.tokenIdx = this.tokenIdx; // make exit_() return false
642
+ this.errorState = null;
643
+ }
644
+
645
+ _stopParsing( idx ) {
646
+ if (this.constructor.tracingParser) {
647
+ this.log( this.la().location.toString() + ':', 'Info:',
648
+ `leave all active ${ idx } rules prematurely, stop parsing` );
649
+ }
650
+ // TODO: run this.skipToken_() on all remaining tokens? Does ANTLR consumes
651
+ // those in error recovery mode? Probably not.
652
+ for (const c of this.stack)
653
+ c.followState = null;
654
+ this.errorState = null;
655
+ this.s = null;
656
+ return false;
657
+ }
658
+
659
+ // small methods --------------------------------------------------------------
660
+
661
+ log( ...args ) {
662
+ console.log( ...args );
663
+ }
664
+
665
+ expectingForMessage_( sep = ',' ) {
666
+ return Object.keys( this._expecting() ).map( tokenName ).sort().join( sep );
667
+ }
668
+
669
+ reportError_( location, text ) {
670
+ this.$hasErrors = true;
671
+ this.log( `${ location }: Error:`, text );
672
+ }
673
+
674
+ reportUnexpectedToken_( token ) {
675
+ this.reportError_( token.location,
676
+ `unexpected token ${ tokenFullName( token, ': ' ) } - expecting: ` +
677
+ this.expectingForMessage_() );
678
+ }
679
+
680
+ reportReservedWord_( token ) {
681
+ this.reportError_( token.location,
682
+ `unexpected reserved word ‘${ token.text }’ - expecting: ` +
683
+ this.expectingForMessage_() );
684
+ }
685
+
686
+ errorAndRecoverOutside( token, text ) { // TODO: re-check
687
+ this.reportError_( token.location, text );
688
+ ++this.tokenIdx;
689
+ return this._recoverPanicMode( this.stack.length );
690
+ }
691
+
692
+ _tracePush( state ) {
693
+ if (this.constructor.tracingParser)
694
+ this.trace.push( state ?? '⚠' );
695
+ }
696
+ _traceSubPush( state ) {
697
+ if (this.constructor.tracingParser)
698
+ this.trace.at(-1).push( state );
699
+ }
700
+ traceAction( location ) { // will be put into tracing parser
701
+ this._trace( 'execute action,', { location } );
702
+ }
703
+
704
+ _trace( msg, la ) {
705
+ if (!this.constructor.tracingParser)
706
+ return;
707
+ if (Array.isArray( msg ))
708
+ msg = this._rule( ...msg );
709
+ this.trace.push( this.s ?? '⚠' );
710
+ this.log( (la || this.la()).location.toString() + ':',
711
+ 'Info:', msg, 'states:', this.trace.map( traceStep ).join( ' → ' ) );
712
+ this.trace = [ this.s ?? '⚠' ];
713
+ }
714
+
715
+ // TODO: rename to ruleName_, leaving out the msg stuff
716
+ _rule( state, msg, post = '', postOther = post, depthDiff ) {
717
+ const start = --state;
718
+ while (typeof this.table[state] !== 'string')
719
+ --state;
720
+ const { length } = this.stack;
721
+ const depth = depthDiff ? `, depth ${ length + depthDiff } → ${ length }` : '';
722
+ return `${ msg } “${ this.table[state] }”${ state < start ? postOther : post }${ depth },`;
723
+ }
724
+
725
+ inSameRule_( lowState, highState ) {
726
+ if (lowState > highState)
727
+ [ lowState, highState ] = [ highState, lowState ];
728
+ while (lowState < highState) {
729
+ if (typeof this.table[++lowState] === 'string') // rule boundary
730
+ return false;
731
+ }
732
+ return true;
733
+ }
734
+
735
+ }
736
+
737
+ function traceStep( step ) {
738
+ if (!Array.isArray( step ))
739
+ return step;
740
+ const result = { true: '✔', false: '✖' }[step.at( -1 )] ?? '';
741
+ const intro = (typeof step[1] === 'number') ? '→' : '';
742
+ const arg = step.slice( 1, result ? -1 : undefined ).join( '→' );
743
+ return `${ step[0] }(${ intro }${ arg })${ result }`;
744
+ }
745
+
746
+ function tokenName( type ) {
747
+ if (typeof type !== 'string')
748
+ type = (!type.parsedAs || type.parsedAs === 'keyword') && type.keyword || type.type;
749
+ return (/^[A-Z]+/.test( type )) ? `‹${ type }›` : `‘${ type }’`;
750
+ }
751
+
752
+ function tokenFullName( token, sep ) {
753
+ return (token.parsedAs && token.parsedAs !== 'keyword' && token.parsedAs !== 'token' ||
754
+ token.type !== 'Id' && token.type !== token.text && token.text)
755
+ ? `‘${ token.text }’${ sep }${ tokenName( token ) }`
756
+ : tokenName( token );
757
+ }
758
+
759
+ function compileTable( table ) {
760
+ if (table.$compiled)
761
+ return table;
762
+ for (const line of table) {
763
+ if (typeof line !== 'object' || Array.isArray( line ))
764
+ continue;
765
+ const cache = Object.create( null ); // very sparse array
766
+ for (const prop of Object.keys( line )) {
767
+ const alt = line[prop];
768
+ if (!Array.isArray( alt ) && prop.charAt(0) !== ' ') // string or number
769
+ line[prop] = (typeof alt === 'string') ? line[alt] : (cache[alt] ??= [ 'g', alt ]);
770
+ }
771
+ if (!line[''])
772
+ line[''] = [ 'e' ];
773
+ }
774
+ table.$compiled = true;
775
+ return table;
776
+ }
777
+
778
+ module.exports = BaseParser;