@sap/cds-compiler 5.4.2 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/CHANGELOG.md +24 -1
  2. package/bin/cds_remove_invalid_whitespace.js +4 -4
  3. package/bin/cds_update_annotations.js +3 -3
  4. package/bin/cds_update_identifiers.js +3 -3
  5. package/lib/api/main.js +18 -30
  6. package/lib/api/validate.js +6 -1
  7. package/lib/base/lazyload.js +28 -0
  8. package/lib/base/location.js +1 -0
  9. package/lib/base/message-registry.js +53 -11
  10. package/lib/base/messages.js +17 -3
  11. package/lib/checks/{dbFeatureFlags.js → featureFlags.js} +1 -1
  12. package/lib/checks/parameters.js +61 -4
  13. package/lib/checks/validator.js +14 -6
  14. package/lib/compiler/index.js +7 -7
  15. package/lib/compiler/shared.js +29 -13
  16. package/lib/gen/BaseParser.js +345 -235
  17. package/lib/gen/CdlParser.js +4434 -4492
  18. package/lib/gen/Dictionary.json +2 -2
  19. package/lib/json/to-csn.js +3 -1
  20. package/lib/language/antlrParser.js +2 -111
  21. package/lib/main.js +16 -37
  22. package/lib/modelCompare/utils/filter.js +47 -21
  23. package/lib/parsers/AstBuildingParser.js +59 -49
  24. package/lib/parsers/CdlGrammar.g4 +91 -130
  25. package/lib/parsers/index.js +123 -0
  26. package/lib/render/toSql.js +8 -2
  27. package/lib/render/utils/delta.js +33 -1
  28. package/lib/transform/db/{transformExists.js → assocsToQueries/transformExists.js} +12 -407
  29. package/lib/transform/db/assocsToQueries/utils.js +440 -0
  30. package/lib/transform/db/expansion.js +2 -2
  31. package/lib/transform/draft/db.js +14 -3
  32. package/lib/transform/effective/annotations.js +3 -3
  33. package/lib/transform/effective/main.js +5 -7
  34. package/lib/transform/featureFlags.js +5 -0
  35. package/lib/transform/forRelationalDB.js +125 -192
  36. package/lib/transform/odata/createForeignKeys.js +1 -1
  37. package/lib/transform/odata/flattening.js +1 -1
  38. package/lib/transform/transformUtils.js +0 -51
  39. package/package.json +2 -2
  40. package/lib/transform/db/featureFlags.js +0 -5
@@ -1,4 +1,4 @@
1
- // Base class for generated parser, for redepage v0.1.16
1
+ // Base class for generated parser, for redepage v0.1.18
2
2
 
3
3
  'use strict';
4
4
 
@@ -12,28 +12,33 @@
12
12
  // and this.conditionStackLength and returns true?
13
13
 
14
14
  class BaseParser {
15
+ keywords;
16
+ table;
17
+ lexer;
18
+
19
+ tokens = undefined;
20
+ eofIndex = undefined;
21
+ tokenIdx = 0;
22
+ recoverTokenIdx = -1;
23
+ conditionTokenIdx = -1; // TODO: can we use recoverTokenIdx ?
24
+ errorTokenIdx = -1;
25
+ fixKeywordTokenIdx = -1;
26
+ conditionStackLength = -1;
27
+ nextTokenAsId = false;
28
+
29
+ s = null;
30
+ errorState = null;
31
+ stack = [];
32
+ dynamic_ = {}; // TODO: extra class
33
+ prec_ = null;
34
+ $hasErrors = null;
35
+ // trace:
36
+ trace = [];
37
+
15
38
  constructor( lexer, keywords, table ) {
16
- this.keywords = keywords;
39
+ this.keywords = { __proto__: null, ...keywords };
17
40
  this.table = compileTable( table );
18
41
  this.lexer = lexer;
19
- this.tokens = undefined;
20
- this.eofIndex = undefined;
21
- this.tokenIdx = 0;
22
- this.recoverTokenIdx = -1;
23
- this.conditionTokenIdx = -1; // TODO: can we use recoverTokenIdx ?
24
- this.errorTokenIdx = -1;
25
- this.fixKeywordTokenIdx = -1;
26
- this.conditionStackLength = -1;
27
- this.nextTokenAsId = false;
28
-
29
- this.s = null;
30
- this.errorState = null;
31
- this.stack = []; // [{ ruleState, followState, tokenIdx }]
32
- this.dynamic_ = {};
33
- this.prec_ = null;
34
- this.$hasErrors = null;
35
- // trace:
36
- this.trace = [];
37
42
  }
38
43
 
39
44
  init() {
@@ -42,6 +47,38 @@ class BaseParser {
42
47
  return this;
43
48
  }
44
49
 
50
+ _saveForWalk() {
51
+ return {
52
+ s: this.s,
53
+ stack: this.stack,
54
+ dynamic_: this.dynamic_,
55
+ prec_: this.prec_
56
+ };
57
+ }
58
+
59
+ _cloneFromSaved( saved ) { // non-deep: Object.assign
60
+ this.s = saved.s;
61
+ this.stack = saved.stack.map( obj => ({ ...obj }) );
62
+ this.dynamic_ = this._cloneDynamic( saved.dynamic_ );
63
+ this.prec_ = saved.prec_;
64
+ }
65
+
66
+ _cloneDynamic( dynamic_ ) {
67
+ let chain = [];
68
+ while (dynamic_ !== Object.prototype) {
69
+ const obj = {};
70
+ for (const [ prop, val ] of Object.entries( dynamic_ ))
71
+ obj[prop] = Array.isArray( val ) ? [ ...val ] : val;
72
+ chain.push( obj );
73
+ dynamic_ = Object.getPrototypeOf( dynamic_ );
74
+ }
75
+ let copy = Object.prototype;
76
+ let { length } = chain;
77
+ while (--length >= 0)
78
+ copy = { __proto__: copy, ...chain[length] };
79
+ return copy;
80
+ }
81
+
45
82
  // methods for actions --------------------------------------------------------
46
83
 
47
84
  la() { // lookahead: complete token
@@ -65,29 +102,39 @@ class BaseParser {
65
102
  const la = this.tokens[this.tokenIdx];
66
103
  if (!this.nextTokenAsId)
67
104
  return la.keyword || la.type;
105
+ // return la.keyword && this.table[this.s][la.keyword] && la.keyword || la.type;
68
106
  this.nextTokenAsId = false;
69
107
  return la.type;
70
108
  }
71
109
 
72
110
  e() { // error: report and recover
73
111
  const la = this.tokens[this.tokenIdx];
74
- if (this.trace.length > 1)
75
- this._trace( 'detected parsing error,', la );
76
- this.reportUnexpectedToken_( la );
77
- la.parsedAs = ''; // current token is erroneous
112
+ this._trace( 'detect parsing error,' );
113
+ if (this.errorTokenIdx === this.tokenIdx)
114
+ throw Error( `Already reported error for ${ tokenFullName( la ) } at ${ la.location }`);
78
115
 
79
- if (this.errorTokenIdx === this.tokenIdx) {
80
- // TODO: investigate why this is not handled otherwise
81
- this.reportInternalError_( la );
82
- this.skipToken_();
83
- return false;
84
- }
116
+ la.parsedAs = ''; // current token is erroneous
85
117
  this.errorTokenIdx = this.tokenIdx;
86
118
  this.conditionStackLength = null;
87
119
 
88
- const { rewindDepth, syncSet } = this._calculateSyncSet();
89
- const recoverDepth = this._findSyncToken( syncSet, rewindDepth );
90
- this._recoverFromError( rewindDepth, recoverDepth );
120
+ let { length } = this.stack;
121
+ while (--length && this.tokenIdx === this.stack[length].tokenIdx)
122
+ this.stack[length].followState = null;
123
+ if (++length === this.stack.length) // last good state in current rule
124
+ return this._reportAndRecover();
125
+
126
+ this.stack[length].followState = this.errorState;
127
+ this.s = null;
128
+ return false;
129
+ }
130
+
131
+ _reportAndRecover() {
132
+ this.s = this.errorState;
133
+ this.reportUnexpectedToken_();
134
+ const syncSet = this._calculateTokenSet( 'Y' );
135
+ const recoverDepth = this._findSyncToken( syncSet );
136
+ this._trace( 'recover from error,' );
137
+ this._recoverFromError( recoverDepth );
91
138
  return false;
92
139
  }
93
140
 
@@ -108,26 +155,15 @@ class BaseParser {
108
155
  this.s = 0;
109
156
  // TODO: also have recursive flag in stack: was rule was called recursively?
110
157
  // extra val 'gr' when rule was called when it could reach the rule end
111
- const { type: lt, keyword: lk } = this.tokens[this.tokenIdx];
112
- if (lk && // Id also for unreserved, except after condition failure
113
- follow?.[0] === 'Id' && this.keywords[lk] !== false &&
158
+ const { type, keyword } = this.tokens[this.tokenIdx];
159
+ if (keyword && // Id also for unreserved, except after condition failure
160
+ follow?.[0] === 'Id' && !this.keywords[keyword] &&
114
161
  this.fixKeywordTokenIdx !== this.tokenIdx ||
115
- follow?.includes( lk || lt )) {
162
+ follow?.includes( keyword || type )) {
116
163
  this._tracePush( [ 'E', true ] );
117
164
  return true;
118
165
  }
119
- this._tracePush( [ 'E', 0 ] );
120
- // TODO: caching
121
- const { dynamic_ } = this;
122
- let match;
123
- let depth = this.stack.length;
124
- while (match == null && --depth) {
125
- this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
126
- const { followState } = this.stack[depth];
127
- match = this._pred_next( followState, lt, lk, 'E' );
128
- this._traceSubPush( match ?? 0 );
129
- }
130
- this.dynamic_ = dynamic_;
166
+ const match = this._matchesInFollow( type, keyword, 'E' );
131
167
  // If the parser reaches this point with match = null, even the top-level rule
132
168
  // does not have a required token (typically `EOF`) at the end → the parser
133
169
  // must accept any token → rule exit possible (but no output '✔' in trace).
@@ -156,16 +192,19 @@ class BaseParser {
156
192
  const lk = this.tokens[this.tokenIdx].keyword;
157
193
  // As opposed to ei(), we also check for reserved keywords here; this way, we
158
194
  // do not have to add reserved keywords from the follow-set to the `switch`.
159
- if (!lk || this.keywords[lk] === false) // TODO: consider fixKeywordTokenIdx ?
195
+ if (!lk || this.keywords[lk]) // TODO: consider fixKeywordTokenIdx ?
160
196
  return this.g( state, follow );
161
197
  this.nextTokenAsId = true;
162
198
  return false; // do not execute action after it
163
199
  }
164
200
 
165
201
  // instead of gi() at rule end (RuleEnd_ in follow-set) for `Id_restricted`
202
+ // TODO: investigate why this is just used once - should be for all no-`as`
203
+ // cases
166
204
  giR( state, follow ) { // go to state (after trying to test again as identifier)
167
- const lk = this.tokens[this.tokenIdx].keyword;
168
- if (!lk || this.keywords[lk] === false || this._keyword_after_rule( lk ))
205
+ const { keyword } = this.tokens[this.tokenIdx];
206
+ if (!keyword || this.keywords[keyword] ||
207
+ this._matchesInFollow( 'Id', keyword, 'R' ))
169
208
  return this.g( state, follow );
170
209
  this.nextTokenAsId = true;
171
210
  return false; // do not execute action after it
@@ -206,10 +245,8 @@ class BaseParser {
206
245
  }
207
246
 
208
247
  c( state, parsedAs = 'token' ) { // consume token
209
- const la = this.tokens[this.tokenIdx];
248
+ const la = this.tokens[this.tokenIdx++]; // ++ now also for EOF
210
249
  la.parsedAs = parsedAs;
211
- if (this.tokenIdx < this.eofIndex) ++this.tokenIdx;
212
- // TODO: handle identifier-including-reserved-words later (e.g. for id after a `.`)
213
250
  this.s = state;
214
251
  this.errorState = state;
215
252
  if (this.constructor.tracingParser)
@@ -220,8 +257,8 @@ class BaseParser {
220
257
  // instead of c() for identifiers, used both with l() and lk()
221
258
  ci( state, ident = 'ident' ) { // consume identifier token
222
259
  const la = this.tokens[this.tokenIdx];
223
- if (this.keywords[la.keyword] === false)
224
- this.reportReservedWord_( la );
260
+ if (this.keywords[la.keyword])
261
+ this.reportReservedWord_();
225
262
  // with error recovery: use that (consider this having a good score)
226
263
  return this.c( state, ident )
227
264
  }
@@ -303,7 +340,7 @@ class BaseParser {
303
340
 
304
341
  rule_( state, followState = -1 ) { // start rule
305
342
  this.s = state;
306
- this._trace( [ 'call rule', state, ' at alt start' ], this.la() );
343
+ this._trace( [ 'call rule', state, ' at alt start' ] );
307
344
  this.stack.push( {
308
345
  ruleState: state,
309
346
  followState,
@@ -330,21 +367,20 @@ class BaseParser {
330
367
  : ' prematurely');
331
368
  const text = immediately ? '⚠ exit rule' : '⏎ exit rule';
332
369
  this.s = caller.followState; // for trace
333
- this._trace( [ text, caller.ruleState, post, this.stack.length + 1 ],
334
- this.la() );
335
- if (this.tokenIdx === caller.tokenIdx &&
336
- this.stack.at(-1)?.followState != null)
370
+ this._trace( [ text, caller.ruleState, post, this.stack.length + 1 ] )
371
+ if (immediately && this.stack.at(-1)?.followState != null)
337
372
  this.trace = [ this.errorState ]; // show last good state in trace
338
373
  }
339
374
  this.s = caller.followState;
375
+ if (immediately)
376
+ return this.s != null && this._reportAndRecover();
377
+
340
378
  this.prec_ = caller.prec;
341
379
  if (this.s)
342
- this._skipErrorTokens();
380
+ this._skipErrorTokens(); // TODO: re-think - directly with _reportAndRecover() ?
343
381
  else if (this.s == null)
344
- return !immediately; // attached actions are executed even with "unsuccessful exit"
382
+ return true; // attached actions are executed even with "unsuccessful exit"
345
383
 
346
- if (immediately)
347
- return false;
348
384
  this.errorState = this.s;
349
385
  return true;
350
386
  }
@@ -353,12 +389,12 @@ class BaseParser {
353
389
  lP( first2 ) { // only start rule if this predicate returns true
354
390
  // nothing to check if not a non-reserved keyword:
355
391
  const { keyword: lk1 } = this.tokens[this.tokenIdx];
356
- if (!lk1 || !this.keywords[lk1])
392
+ if (!lk1 || this.keywords[lk1] !== 0)
357
393
  return true;
358
394
 
359
395
  const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
360
396
  // Argument first2 is just a performance hint with ckP():
361
- if (lk2 && first2?.[0] === 'Id' && this.keywords[lk2] !== false ||
397
+ if (lk2 && first2?.[0] === 'Id' && !this.keywords[lk2] ||
362
398
  first2?.includes( lk2 || lt2 )) {
363
399
  this._tracePush( [ 'K', true ] );
364
400
  return true;
@@ -370,11 +406,13 @@ class BaseParser {
370
406
  throw Error( `Unexpected command '${ cmd?.[0] }' without prediction at state ${ this.s } for ‘${ lk1 }’` );
371
407
 
372
408
  // if not the keyword match, the command is “goto” or “rule call”
373
- const nextState = (cmd[0] === 'ck') ? cmd[1] : this._pred_keyword( cmd[1], lk1 );
409
+ const savedState = this.s;
410
+ this.s = (cmd[0] === 'ck') ? cmd[1] : this._pred_keyword( cmd[1], lk1 );
374
411
 
375
412
  ++this.tokenIdx; // for user lookahead fns and conditions
376
- const match = this._pred_next( nextState, lt2, lk2, 'K' );
413
+ const match = this._pred_next( lt2, lk2, 'K' );
377
414
  --this.tokenIdx;
415
+ this.s = savedState;
378
416
 
379
417
  const r = match ?? true;
380
418
  if (match == null)
@@ -416,34 +454,43 @@ class BaseParser {
416
454
  throw Error( 'Not supported: option for unreserved keywords in follow set' );
417
455
  }
418
456
 
419
- _pred_next( state, type, keyword, mode ) {
457
+ _pred_next( type, keyword, mode ) { // mode = K | E | R | M
458
+ const useConditions = (mode === 'M'); // TODO: extra method with conditions ?
420
459
  let hasEnteredRule = false;
421
- while (state) {
422
- this._traceSubPush( state );
423
- let cmd = this.table[state];
460
+ while (this.s) {
461
+ if (useConditions)
462
+ this._tracePush( this.s );
463
+ else
464
+ this._traceSubPush( this.s );
465
+ let cmd = this.table[this.s];
424
466
  if (!Array.isArray( cmd )) {
425
467
  const lookahead = cmd[' lookahead'];
426
- cmd = lookahead
427
- ? cmd[this[lookahead]( mode )] || cmd['']
428
- : keyword && cmd[keyword] || cmd[type] || cmd[''];
468
+ const c = lookahead // TODO: call with { keyword, type } ?
469
+ ? cmd[this[lookahead]( mode )]
470
+ : keyword && cmd[keyword] || cmd[type];
471
+ cmd = !(c && useConditions && this._rejectCondition( c, mode )) && c || cmd[''];
429
472
  }
430
473
  switch (cmd[0]) {
431
- case 'c': case 'ck': case 'ciA': case 'ckA': // TODO: re-check ckA
474
+ case 'c': case 'ck': case 'ckA': // TODO: re-check ckA
432
475
  return true;
476
+ case 'ciA':
477
+ return mode !== 'R';
478
+ // in the R prediction for optional `Id<reserved>` at rule end, only
479
+ // alternative keyword matches are preferred, not identifier matches
433
480
  case 'ci':
434
481
  if (!keyword ||
435
- this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx)
436
- return true;
437
- cmd = this.table[state]['']; // is currently always 'g' or 'e'
482
+ !this.keywords[keyword] && this.fixKeywordTokenIdx !== this.tokenIdx)
483
+ return mode !== 'R';
484
+ cmd = this.table[this.s]['']; // is currently always 'g' or 'e'
438
485
  break;
439
486
  case 'm':
440
487
  return type === cmd[2];
441
488
  case 'mi':
442
- return type === 'Id' &&
489
+ return type === 'Id' && mode !== 'R' &&
443
490
  (!keyword ||
444
- this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx);
491
+ !this.keywords[keyword] && this.fixKeywordTokenIdx !== this.tokenIdx);
445
492
  case 'miA':
446
- return type === 'Id';
493
+ return type === 'Id' && mode !== 'R';
447
494
  case 'mk':
448
495
  return keyword === cmd[2];
449
496
  case 'g': case 'e':
@@ -458,11 +505,16 @@ class BaseParser {
458
505
  // rule, we would have to handle `this.stack` and `this.dynamically_`.
459
506
  }
460
507
  // We could optimize with rule call - only 'Id' must be further investigated
461
- state = cmd[1];
508
+ // TODO: actually also with `g`
509
+ // in both cases if no condition is evaluated
510
+ this.s = cmd[1];
511
+ // TODO <prepare=…, arg=…> for real trial run also before all returns
512
+ // if (cmd[5])
513
+ // this.cmd[5]( cmd[4], mode );
462
514
  }
463
515
  // If invalid state, the second token does not match, e.g. for `VIRTUAL +`
464
- // or `VIRTUAL §` (with IllegalToken):
465
- if (state == null)
516
+ // or `VIRTUAL ⎀` (with IllegalToken):
517
+ if (this.s == null)
466
518
  return false;
467
519
 
468
520
  // Otherwise, the parser could end the rule after having matched the keyword
@@ -474,143 +526,207 @@ class BaseParser {
474
526
  return !hasEnteredRule && null; // let caller decide how to interpret this
475
527
  }
476
528
 
477
- _keyword_after_rule( keyword ) {
478
- // TODO: this is a slow implementation - do dedicated traversal later
479
- // It is used in giR() only and this is currently used just once.
480
- // TODO: using mode = 'R' and tracing R(…)
481
- // TODO: investigate why this was not written before adding
482
- // `<default=fallback>` in rule `fromRefWithOptAlias`.
483
- return this._expecting()[keyword];
529
+ _rejectCondition( cmd, mode ) {
530
+ const cond = cmd[3];
531
+ if (!cond)
532
+ return false;
533
+ if (!this.constructor.tracingParser)
534
+ return !this[cond]( mode, cmd[4] );
535
+ // TODO: let this[cond]( true ) return recovery badness in error case
536
+ const { traceName } = this[cond];
537
+ this._tracePush( [ 'C', traceName?.call( this, cmd[4] ) ?? cond ] );
538
+ // calling the condition might have side effects (precendence conditions have)
539
+ // → call tracing “name” before
540
+ const fail = !this[cond]( mode, cmd[4] );
541
+ this._traceSubPush( !fail );
542
+ return fail;
484
543
  }
485
544
 
486
- // Set of expected tokens: for error reporting and recovery -------------------
487
-
488
- // method like _exp_collect - conditions in called rules are evaluated with
489
- // unchanged stack and dynamic & no site-effects (are run with extra mode)
490
-
491
- // Calculate array of expected tokens
492
- _expecting( token ) {
493
- // Remark: rules must not have been exited too early, see _expecting call in re()
494
- const stack = this.stack.slice( 0, this.stack.length );
495
- // Immediately exit rules when no tokens have yet been consumed:
496
- let caller = stack.at( -1 );
497
- while (stack.length && this.tokenIdx === caller.tokenIdx) {
498
- --stack.length;
499
- caller = stack.at( -1 );
500
- }
501
- // Now calculate dictionary of expected tokens:
502
- const expecting = Object.create(null);
503
- let state = this.errorState;
504
- // At potential rule end, we must add follow sets of outer rules
505
- // TODO: we also need to unravel this.dynamic_ for translateParserToken_()
506
- while ((!state || this._exp_collect( expecting, this.table[state] )) && stack.length)
507
- state = stack.pop().followState;
508
-
509
- // Remove token (TODO later: instead, use conditions when collecting tokens):
510
- if (token) {
511
- const { keyword, type } = token;
512
- if (keyword && expecting[keyword] === true)
513
- delete expecting[keyword];
514
- else if (expecting[type] === true)
515
- delete expecting[type];
545
+ _matchesInFollow( type, keyword, mode ) { // mode = E | R
546
+ this._tracePush( [ mode, 0 ] );
547
+ const savedState = this.s;
548
+ // TODO: caching
549
+ const { dynamic_ } = this;
550
+ let match;
551
+ let depth = this.stack.length;
552
+ // TODO: currently assumes that lookahead does not use stack.at()
553
+ while (match == null && --depth) {
554
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
555
+ this.s = this.stack[depth].followState;
556
+ match = this._pred_next( type, keyword, mode );
557
+ this._traceSubPush( match == null ? 0 : match === (mode !== 'R') );
558
+ // successfully matching a keyword in giR() means unsuccessful match as
559
+ // reserved identifer
516
560
  }
517
- return expecting;
561
+ this.dynamic_ = dynamic_;
562
+ this.s = savedState;
563
+ return match;
518
564
  }
519
565
 
520
- // TODO: use iterative alg, no recursive call, return state instead -----------
521
- // Add expected tokens to dictionary `expecting` starting at command `cmd`.
522
- // Return true if the rule end is reached, i.e. we also need to add the expected
523
- // tokens at the follow state of the current rule. Argument `prop` is the token
524
- // name for `cmd` in a decision.
525
-
526
- // translateParserToken must work, i.e. this.stack and this.dynamic_ must be
527
- // according to stack level
528
- _exp_collect( expecting, cmd, prop, val = true ) {
529
- if (prop != null)
530
- cmd = cmd[prop];
531
- else if (!cmd) // called on follow state of start rule
532
- return false;
533
-
534
- if (!Array.isArray( cmd )) {
535
- let reachedRuleEnd = false;
536
- for (const tok in cmd) {
537
- // TODO: except for `Id`, we can directly continue if `tok` is already in
538
- // `expecting`
539
- if (Object.hasOwn( cmd, tok ) && tok.charAt(0) !== ' ' &&
540
- this._exp_collect( expecting, cmd, tok, val ))
541
- reachedRuleEnd = true;
566
+ _confirmExpected( token, saved ) { // mode = M
567
+ const [ type, keyword ] = (/^[_a-z]/.test( token )) ? [ 'Id', token ] : [ token ];
568
+ Object.assign( this.la(), { type, keyword } );
569
+ this._cloneFromSaved( saved );
570
+ this.trace = [];
571
+ let match;
572
+ while (this.stack.length) {
573
+ match = this._pred_next( type, keyword, 'M' );
574
+ if (match != null) {
575
+ this._tracePush( { true: '✔', false: '✖' }[match] );
576
+ break;
542
577
  }
543
- return reachedRuleEnd;
578
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
579
+ this.s = this.stack.pop().followState;
544
580
  }
545
- switch (cmd[0]) {
546
- case 'c': case 'ck':
547
- expecting[prop] ??= val;
548
- return false;
549
- case 'ckA':
550
- for (const tok of this.translateParserToken_( prop ))
551
- expecting[tok] ??= val;
552
- return false;
553
- case 'm': case 'mk':
554
- expecting[cmd[2]] ??= val;
555
- return false;
556
- case 'ci': case 'ciA': case 'mi': case 'miA':
557
- expecting['Id'] ??= val;
558
- // TODO: should we do s/th special, such that a reserved word is a sync
559
- // token for Id<all>? Probably not, see also comment in _findSyncToken()
560
- return false;
561
- case 'g': case 'gi':
562
- if (!cmd[1])
563
- return cmd[1] === 0;
564
- //(this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx);
565
- // TODO: add some assertion in generation that a decision has no two `g`s
566
- // to the same state (both in cases and default)
567
- // UPDATE: no, there will be at least gP()s
568
- // TOOD: do properly for (...)+ - currently, the token for directly
569
- // exiting the rule is also collected
570
- return this._exp_collect( expecting, this.table[cmd[1]], undefined, val );
571
- default:
572
- // a called rule must match at least one token → after having called a
573
- // rule, do not collect expecting tokens after exiting the rule
574
- if (typeof cmd[0] === 'number')
575
- this._exp_collect( expecting, this.table[cmd[1]], undefined, val );
576
- return false;
581
+ if (this.constructor.tracingParser) {
582
+ this.stack = saved.stack; // influences indentation
583
+ this._trace( tokenName( token ), 2 );
584
+ }
585
+ return match ?? true;
586
+ }
587
+
588
+ // Set of expected and sync tokens: for error reporting and recovery ----------
589
+
590
+ // Calculate array of expected tokens / error sync set
591
+ _calculateTokenSet( mode ) { // mode = M | Y
592
+ this._tracePush( [ mode ] );
593
+ // TODO later (after trying different synchronization tokens), we could use
594
+ // one set for both M and Y, the latter just adds more tokens to it
595
+ const savedState = this.s;
596
+ const savedDynamic = this.dynamic_;
597
+ const savedStack = this.stack;
598
+ this.stack = [ ...savedStack ];
599
+ this.s = this.errorState;
600
+
601
+ const set = Object.create(null);
602
+ // Add follow sets of outer rules if at potential rule end
603
+ if (mode === 'M') { // for messages
604
+ while (this.stack.length && this._tokenSetInRule( set, true )) {
605
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
606
+ this.s = this.stack.pop().followState;
607
+ }
577
608
  }
609
+ else { // or always when calculating the sync-set
610
+ let val = this.stack.length + 1;
611
+ while (this.stack.length) {
612
+ if (!this._tokenSetInRule( set, val ))
613
+ val = this.stack.length;
614
+ // TODO: use new _tracePush if `val` changes, probably also use Y‹val›(…)
615
+ this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
616
+ this.s = this.stack.pop().followState;
617
+ }
618
+ set.EOF ??= 0;
619
+ }
620
+ this.stack = savedStack;
621
+ this.s = savedState; // should be the errorState anyway - TODO: confirm
622
+ this.dynamic_ = savedDynamic;
623
+ return set;
624
+ }
625
+
626
+ // Filter after this fn for conditions via interpreter call after: consider
627
+ // ( <prefer, guard=fail> 'foo' | rule ) with
628
+ // rule : 'foo' | Id ;
629
+ // doing it already here would list `foo` as expected token
630
+ _tokenSetInRule( expecting, val, cmd ) {
631
+ const savedDynamic = this.dynamic_;
632
+ const savedState = this.s;
633
+ let enteredRules = 0;
634
+ loop: while (this.s) {
635
+ cmd ??= this.table[this.s];
636
+ if (!Array.isArray( cmd )) {
637
+ const lookahead = cmd[' lookahead'];
638
+ const dict = cmd;
639
+ for (const prop in dict) {
640
+ if (prop && Object.hasOwn( dict, prop ) && prop !== 'Id' &&
641
+ !Object.hasOwn( expecting, prop ) && prop.charAt(0) !== ' ') {
642
+ if (lookahead) { // yes, independently from ckA()
643
+ for (const p of this.translateParserToken_( prop, lookahead ))
644
+ expecting[p] = val;
645
+ }
646
+ else {
647
+ expecting[prop] = val;
648
+ }
649
+ }
650
+ }
651
+ cmd = dict[''];
652
+ if (dict.Id) {
653
+ // recursive call only if Id branch with non-error default branch
654
+ if (cmd[0] === 'e') {
655
+ cmd = dict.Id;
656
+ }
657
+ else { // Id branch never leads to rule exit:
658
+ this._tracePush( [ '[' ] );
659
+ this._tokenSetInRule( expecting, val, dict.Id );
660
+ this._tracePush( [ ']' ] );
661
+ }
662
+ }
663
+ }
664
+ this._traceSubPush( this.s );
665
+ switch (cmd[0]) {
666
+ case 'm': case 'mk':
667
+ expecting[cmd[2]] ??= val;
668
+ break loop;
669
+ case 'ci': case 'ciA': case 'mi': case 'miA':
670
+ expecting['Id'] ??= val;
671
+ // TODO: should we do s/th special, such that a reserved word is a sync
672
+ // token for Id<all>? Probably not, see also comment in
673
+ // _findSyncToken()
674
+ break loop;
675
+ case 'g': case 'gi': case 'e':
676
+ break;
677
+ default:
678
+ if (typeof cmd[0] !== 'number')
679
+ throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
680
+ ++enteredRules; // conditions might use stack/dynamic_
681
+ // core rule_():
682
+ this.stack.push( {
683
+ ruleState: cmd[1],
684
+ followState: cmd[0],
685
+ tokenIdx: this.tokenIdx,
686
+ prec: this.prec_,
687
+ } );
688
+ this.dynamic_ = Object.create( this.dynamic_ );
689
+ this.prec_ = null;
690
+ }
691
+ this.s = cmd[1];
692
+ cmd = null;
693
+ }
694
+ const inspectOuterRules = (this.s === 0 && !enteredRules);
695
+ this.s = savedState;
696
+ this.dynamic_ = savedDynamic;
697
+ this.stack.length -= enteredRules;
698
+ return inspectOuterRules;
578
699
  }
579
700
 
580
701
  translateParserToken_( token ) {
581
702
  return [ token ];
582
703
  }
583
704
 
584
- // Error recovery -------------------------------------------------------------
585
-
586
- _calculateSyncSet() {
587
- const { stack, dynamic_ } = this;
588
- let { length } = stack;
589
- while (stack[--length].tokenIdx === this.tokenIdx && length)
590
- this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
591
- this.stack = stack.slice( 0, ++length );
592
-
593
- // needs (copy of) "real stack"
594
- const syncSet = {};
595
- let depth = length + 1;
596
- if (!this._exp_collect( syncSet, this.table[this.errorState], undefined, depth ))
597
- --depth;
598
- while (this.stack.length) {
599
- this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
600
- const caller = this.stack.pop();
601
- // this.stack and this.dynamic_ must be changed for parser token
602
- // translation:
603
- if (caller.followState > 0 &&
604
- !this._exp_collect( syncSet, this.table[caller.followState], undefined, depth ))
605
- depth = this.stack.length;
606
- }
607
- syncSet.EOF ??= 0;
608
- this.stack = stack;
609
- this.dynamic_ = dynamic_;
610
- return { rewindDepth: length, syncSet };
705
+ // Error reporting and recovery -----------------------------------------------
706
+
707
+ expectingArray_() {
708
+ const token = this.la();
709
+ const set = this._calculateTokenSet( 'M' );
710
+ // Speed-up: delete current token
711
+ const { keyword, type } = token;
712
+ if (keyword && set[keyword] === true)
713
+ delete set[keyword];
714
+ else if (set[type] === true && !(keyword && this.keywords[keyword]))
715
+ delete set[type]; // delete Id if Id token or non-reserved keyword
716
+
717
+ this._trace( 'collect tokens for message,' );
718
+ const saved = this._saveForWalk();
719
+ const expecting = Object.keys( set )
720
+ .filter( tok => this._confirmExpected( tok, saved ) );
721
+ token.type = type; // overwritten by _confirmExpected
722
+ token.keyword = keyword;
723
+ Object.assign( this, saved );
724
+ // TODO: also trace M(…) collection, extra line for each token, with condition
725
+ return expecting;
611
726
  }
612
727
 
613
- _findSyncToken( syncSet, rewindDepth ) {
728
+ _findSyncToken( syncSet ) {
729
+ const rewindDepth = this.stack.length
614
730
  this.recoverTokenIdx = this.tokenIdx;
615
731
  while (this.recoverTokenIdx <= this.eofIndex) {
616
732
  const { keyword, type } = this.tokens[this.recoverTokenIdx];
@@ -620,8 +736,8 @@ class BaseParser {
620
736
  const tryType = syncSet[type];
621
737
  // sync to Id only if in expected set of last good state or if after ';'
622
738
  if (tryType != null &&
623
- (type !== 'Id' || (!keyword || this.keywords[keyword] !== false) &&
624
- // reserved words do not match Id in expected-set, see _exp_collect()
739
+ (type !== 'Id' || (!keyword || !this.keywords[keyword]) &&
740
+ // reserved words do not match Id in expected-set
625
741
  (tryType > rewindDepth || this.tokens[this.recoverTokenIdx - 1].type === ';')))
626
742
  return tryType;
627
743
  ++this.recoverTokenIdx;
@@ -629,7 +745,7 @@ class BaseParser {
629
745
  throw Error( 'EOF must be last in `tokens`' );
630
746
  }
631
747
 
632
- _recoverFromError( rewindDepth, recoverDepth ) {
748
+ _recoverFromError( recoverDepth ) {
633
749
  this.s = null;
634
750
  let depth = this.stack.length;
635
751
  if (recoverDepth > depth) { // no rewind, no rule exit
@@ -638,9 +754,6 @@ class BaseParser {
638
754
  if (this.s)
639
755
  this._skipErrorTokens();
640
756
  }
641
- else if (recoverDepth > rewindDepth) { // rewind, no rule exit
642
- this.stack[rewindDepth].followState = this.errorState;
643
- }
644
757
  while (depth > recoverDepth)
645
758
  this.stack[--depth].followState = null;
646
759
  // TODO: when the error is due to failed rule exit prediction, try to keep
@@ -673,30 +786,21 @@ class BaseParser {
673
786
  console.log( ...args );
674
787
  }
675
788
 
676
- expectingForMessage_( token ) {
677
- return Object.keys( this._expecting( token ) ).map( tokenName ).sort().join( ',' );
678
- }
679
-
680
789
  reportError_( location, text ) {
681
790
  this.$hasErrors = true;
682
791
  this.log( `${ location }:`, text );
683
792
  }
684
793
 
685
- reportUnexpectedToken_( token ) {
686
- this.reportError_( token.location,
687
- `Unexpected token ${ tokenFullName( token, ': ' ) } - expecting: ` +
688
- this.expectingForMessage_( token ) );
689
- }
690
-
691
- reportInternalError_( token ) {
692
- this.reportError_( token.location,
693
- `Unexpected token at ${ tokenFullName( token, ': ' ) } - skipped one token` );
794
+ reportUnexpectedToken_( msg ) {
795
+ const token = this.la();
796
+ msg ??= `Unexpected token ${ tokenFullName( token, ': ' ) }`;
797
+ this.reportError_(
798
+ token.location, msg + ' - expecting: ' +
799
+ this.expectingArray_().map( tokenName ).sort().join( ',' ) );
694
800
  }
695
801
 
696
- reportReservedWord_( token ) {
697
- this.reportError_( token.location,
698
- `Unexpected reserved word ‘${ token.text }’ - expecting: ` +
699
- this.expectingForMessage_() );
802
+ reportReservedWord_() {
803
+ this.reportUnexpectedToken_( `Unexpected reserved word ‘${ this.la().text }’` );
700
804
  }
701
805
 
702
806
  errorAndRecoverOutside( token, text ) { // TODO: re-check
@@ -717,22 +821,28 @@ class BaseParser {
717
821
  this.trace.at(-1).push( state );
718
822
  }
719
823
  traceAction( location ) { // TODO: remove
720
- this._trace( location );
824
+ this._trace( 1, location );
721
825
  }
722
826
 
723
- _trace( msg, la ) {
827
+ _trace( msg, la = this.la() ?? this.lb() ) {
724
828
  if (!this.constructor.tracingParser)
725
829
  return;
726
830
  // indentation according to rule call depth is nice, but only if without
727
831
  // excessive spaces → truncate:
728
832
  const indent = ' '.repeat( this.stack.length % 32 );
729
- if (!la) {
833
+ if (msg === 1) {
730
834
  let line = ' execute action'; // align with non-action messages
731
835
  if (this.trace.length > 1) { // i.e. with some 'g' command
732
836
  line += ', states: ' + this.trace.map( traceStep ).join( ' → ' );
733
837
  this.trace = [ this.s ?? '⚠' ];
734
838
  }
735
- this.log( indent, line, `(${ msg })` );
839
+ this.log( indent, line, `(${ la })` );
840
+ return;
841
+ }
842
+ else if (la === 2) {
843
+ this.log( indent, ' ', msg + ':',
844
+ this.trace.map( traceStep ).join( ' → ' ) );
845
+ this.trace = [ this.s ?? '⚠' ];
736
846
  return;
737
847
  }
738
848
  const { location } = la;