@sap/cds-compiler 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/bin/cdsc.js +2 -2
- package/bin/cdshi.js +24 -17
- package/bin/cdsse.js +17 -18
- package/lib/api/main.js +19 -2
- package/lib/api/options.js +4 -1
- package/lib/base/builtins.js +1 -0
- package/lib/base/message-registry.js +16 -3
- package/lib/base/model.js +0 -10
- package/lib/checks/actionsFunctions.js +0 -12
- package/lib/checks/structuredAnnoExpressions.js +10 -14
- package/lib/compiler/assert-consistency.js +19 -11
- package/lib/compiler/builtins.js +1 -1
- package/lib/compiler/define.js +6 -4
- package/lib/compiler/extend.js +5 -5
- package/lib/compiler/populate.js +9 -9
- package/lib/compiler/propagator.js +1 -0
- package/lib/compiler/resolve.js +29 -34
- package/lib/compiler/shared.js +7 -8
- package/lib/compiler/tweak-assocs.js +155 -64
- package/lib/compiler/utils.js +1 -1
- package/lib/compiler/xpr-rewrite.js +4 -3
- package/lib/edm/annotations/genericTranslation.js +13 -9
- package/lib/edm/csn2edm.js +26 -2
- package/lib/edm/edm.js +23 -8
- package/lib/edm/edmInboundChecks.js +5 -7
- package/lib/edm/edmPreprocessor.js +43 -30
- package/lib/gen/BaseParser.js +720 -0
- package/lib/gen/CdlParser.js +4421 -0
- package/lib/gen/language.checksum +1 -1
- package/lib/gen/language.interp +1 -1
- package/lib/gen/languageParser.js +4006 -4001
- package/lib/language/antlrParser.js +62 -0
- package/lib/language/genericAntlrParser.js +28 -0
- package/lib/model/csnUtils.js +2 -0
- package/lib/model/revealInternalProperties.js +2 -0
- package/lib/modelCompare/utils/filter.js +70 -42
- package/lib/optionProcessor.js +9 -3
- package/lib/parsers/AstBuildingParser.js +1172 -0
- package/lib/parsers/CdlGrammar.g4 +1940 -0
- package/lib/parsers/Lexer.js +239 -0
- package/lib/render/toCdl.js +23 -27
- package/lib/render/toSql.js +5 -5
- package/lib/transform/db/applyTransformations.js +54 -16
- package/lib/transform/draft/odata.js +10 -11
- package/lib/transform/effective/flattening.js +10 -14
- package/lib/transform/odata/flattening.js +42 -31
- package/lib/transform/odata/toFinalBaseType.js +7 -6
- package/lib/transform/universalCsn/universalCsnEnricher.js +1 -0
- package/package.json +2 -2
- package/share/messages/redirected-to-ambiguous.md +5 -4
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
// Base class for generated parser, for redepage v0.1.7
|
|
2
|
+
|
|
3
|
+
'use strict';
|
|
4
|
+
|
|
5
|
+
class BaseParser {
|
|
6
|
+
constructor( lexer, keywords, table ) {
|
|
7
|
+
this.keywords = keywords;
|
|
8
|
+
this.table = table;
|
|
9
|
+
this.lexer = lexer;
|
|
10
|
+
this.tokens = undefined;
|
|
11
|
+
this.eofIndex = undefined;
|
|
12
|
+
this.tokenIdx = 0;
|
|
13
|
+
this.conditionTokenIdx = -1;
|
|
14
|
+
this.conditionStackLength = -1;
|
|
15
|
+
this.nextTokenAsId = false;
|
|
16
|
+
|
|
17
|
+
this.s = null;
|
|
18
|
+
this.errorState = null;
|
|
19
|
+
this.stack = []; // [{ ruleState, followState, tokenIdx }]
|
|
20
|
+
this.dynamic_ = {};
|
|
21
|
+
this.prec_ = null;
|
|
22
|
+
this.$hasErrors = null;
|
|
23
|
+
// trace:
|
|
24
|
+
this.trace = [ -1 ];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
init() {
|
|
28
|
+
this.lexer.tokenize( this );
|
|
29
|
+
this.eofIndex = this.tokens.length - 1;
|
|
30
|
+
return this;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// methods for actions --------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
la() { // lookahead: complete token
|
|
36
|
+
return this.tokens[this.tokenIdx];
|
|
37
|
+
}
|
|
38
|
+
lb() { // look back: complete token
|
|
39
|
+
return this.tokens[this.tokenIdx - 1];
|
|
40
|
+
}
|
|
41
|
+
lr() { // return the first token matched by current rule
|
|
42
|
+
return this.tokens[this.stack[this.stack.length - 1].tokenIdx];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// lookahead, error: ----------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
l() { // lookahead: token type
|
|
48
|
+
return this.tokens[this.tokenIdx].type;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// instead of l() if keyword (reserved and/or unreserved) is in one of the cases
|
|
52
|
+
lk() { // keyword lookahead
|
|
53
|
+
const la = this.tokens[this.tokenIdx];
|
|
54
|
+
if (!this.nextTokenAsId)
|
|
55
|
+
return la.keyword || la.type;
|
|
56
|
+
this.nextTokenAsId = false;
|
|
57
|
+
return la.type;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
e() { // error: report and recover
|
|
61
|
+
const la = this.tokens[this.tokenIdx];
|
|
62
|
+
const expecting = this._expecting();
|
|
63
|
+
if (this.trace.length > 1)
|
|
64
|
+
this._trace( 'detected parsing error,' );
|
|
65
|
+
this.reportUnexpectedToken_( la );
|
|
66
|
+
la.parsed = 0;
|
|
67
|
+
|
|
68
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
69
|
+
this.conditionStackLength === this.stack.length &&
|
|
70
|
+
(la.keyword && expecting[la.keyword] || expecting[la.type])) {
|
|
71
|
+
// called with/after gc()/gp(), and the token would actually match
|
|
72
|
+
const { tokenIdx, ruleState } = this.stack.at( -1 );
|
|
73
|
+
this.s = (this.tokenIdx > tokenIdx) ? this.errorState : ruleState;
|
|
74
|
+
return false; // error recovery: ignore condition/precedence
|
|
75
|
+
}
|
|
76
|
+
if (++this.tokenIdx > this.eofIndex)
|
|
77
|
+
return this._stopParsing( this.stack.length );
|
|
78
|
+
// TODO: also sync to what comes next in current rule, at least after rule call,
|
|
79
|
+
// this way we do not have to do the check of g(0) in re() as we did before 2023-12-07
|
|
80
|
+
// (not sure yet whether to make it part of recoverInline or recoverPanicMode),
|
|
81
|
+
if (!this._recoverInline( expecting ))
|
|
82
|
+
this._recoverPanicMode();
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// instead of e() in default if lk() had been used and 'Id' is in a non-default case
|
|
87
|
+
ei() { // error (after trying to test again as identifier)
|
|
88
|
+
if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
|
|
89
|
+
return this.e();
|
|
90
|
+
this._traceIdOrPred( '-Id' );
|
|
91
|
+
this.nextTokenAsId = true;
|
|
92
|
+
return false; // do not execute action after it
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// goto state: ----------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
// go to end of the rule, in tracing parser: g(0)
|
|
98
|
+
gr( follow ) { // intersection follow set for fast exit
|
|
99
|
+
if (this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx)
|
|
100
|
+
return this.e(); // match at least one token
|
|
101
|
+
this.s = 0;
|
|
102
|
+
// TODO: also have recursive flag in stack: was rule was called recursively?
|
|
103
|
+
// extra val 'gr' when rule was called when it could reach the rule end
|
|
104
|
+
const { type: lt, keyword: lk } = this.tokens[this.tokenIdx];
|
|
105
|
+
if (lk && // Id also for unreserved, except after condition failure
|
|
106
|
+
follow?.[0] === 'Id' && this.keywords[lk] !== false &&
|
|
107
|
+
this.conditionTokenIdx !== this.tokenIdx ||
|
|
108
|
+
follow?.includes( lk || lt ))
|
|
109
|
+
return true;
|
|
110
|
+
|
|
111
|
+
// Do we have possibilities to stay in rule with error recovery?
|
|
112
|
+
const expecting = this._expecting( 0 ); // dynamic follow-set
|
|
113
|
+
// TODO: improve performance: no check needed for a rule-end directly after
|
|
114
|
+
// a rule end: the second is definitely successful if the first was.
|
|
115
|
+
// TODO: do not calculate the complete dynamic follow-set, provide dedicated
|
|
116
|
+
// function to test whether the next token is valid
|
|
117
|
+
// we might also cache the result in the stack
|
|
118
|
+
// ok: lk or lt -> lk=e or (lt=e && (not cond || not keyw)
|
|
119
|
+
if (expecting[lk] ||
|
|
120
|
+
// if at failed condition, do not make Id in follow end the rule
|
|
121
|
+
// (assuming that there is no condition for `Id` at optional rule end):
|
|
122
|
+
expecting[lt] && !(lk && this.conditionTokenIdx === this.tokenIdx))
|
|
123
|
+
return true;
|
|
124
|
+
|
|
125
|
+
return this.e();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// go to state; non-tracing parser: `this.s=‹state›` or `this.gr()`
|
|
129
|
+
g( state, follow ) {
|
|
130
|
+
if (!(state == null ? this.e() : state || this.gr( follow )))
|
|
131
|
+
return false;
|
|
132
|
+
this.s = state; // is just `this.s=‹state›` in non-trace parser
|
|
133
|
+
this._tracePush( this.s );
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// instead of gi() for `Id_all`
|
|
138
|
+
giA( state, follow ) { // go to state (after trying to test again as identifier)
|
|
139
|
+
if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
|
|
140
|
+
return this.g( state, follow );
|
|
141
|
+
this._traceIdOrPred( '-Id' );
|
|
142
|
+
this.nextTokenAsId = true;
|
|
143
|
+
return false; // do not execute action after it
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// instead of g() in default if lk() had been used and 'Id' is in a non-default case
|
|
147
|
+
gi( state, follow ) { // go to state (after trying to test again as identifier)
|
|
148
|
+
const lk = this.tokens[this.tokenIdx].keyword;
|
|
149
|
+
// As opposed to ei(), we also check for reserved keywords here; this way, we
|
|
150
|
+
// do not have to add reserved keywords from the follow-set to the `switch`.
|
|
151
|
+
if (!lk || this.keywords[lk] === false)
|
|
152
|
+
return this.g( state, follow );
|
|
153
|
+
this._traceIdOrPred( '-Id' );
|
|
154
|
+
this.nextTokenAsId = true;
|
|
155
|
+
return false; // do not execute action after it
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// instead of gi() at rule end (RuleEnd_ in follow-set) for `Id_restricted`
|
|
159
|
+
giR( state, follow ) { // go to state (after trying to test again as identifier)
|
|
160
|
+
const lk = this.tokens[this.tokenIdx].keyword;
|
|
161
|
+
if (!lk || this.keywords[lk] === false || this._keyword_after_rule( lk ))
|
|
162
|
+
return this.g( state, follow );
|
|
163
|
+
this._traceIdOrPred( '-Id' );
|
|
164
|
+
this.nextTokenAsId = true;
|
|
165
|
+
return false; // do not execute action after it
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// instead of g() in a non-default case if there is a LL1 conflict
|
|
169
|
+
gP( state ) { // goto state with standard weak-conflict prediction
|
|
170
|
+
return this.lP() && this.g( state );
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// match and consume token: ---------------------------------------------------
|
|
174
|
+
|
|
175
|
+
m( state, token ) { // match token = compare and consume
|
|
176
|
+
return (this.tokens[this.tokenIdx].type === token)
|
|
177
|
+
? this.c( state )
|
|
178
|
+
: this.e();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// instead of m() for identifiers via `Id` or `Id_restricted`
|
|
182
|
+
mi( state, ident = true ) { // match identifier token
|
|
183
|
+
return (this.tokens[this.tokenIdx].type === 'Id')
|
|
184
|
+
? this.ci( state, ident )
|
|
185
|
+
: this.e();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// instead of mi() for `Id_all`
|
|
189
|
+
miA( state, ident = true ) { // match identifier token
|
|
190
|
+
return (this.tokens[this.tokenIdx].type === 'Id')
|
|
191
|
+
? this.ciA( state, ident )
|
|
192
|
+
: this.e();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// instead of m() for reserved keywords or unreserved without conflict:
|
|
196
|
+
mk( state, token ) { // match keyword token
|
|
197
|
+
return (this.tokens[this.tokenIdx].keyword === token)
|
|
198
|
+
? this.ck( state )
|
|
199
|
+
: this.e();
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
c( state, parsed = 'token' ) { // consume token
|
|
203
|
+
const la = this.tokens[this.tokenIdx];
|
|
204
|
+
la.parsed = parsed;
|
|
205
|
+
if (this.tokenIdx < this.eofIndex) ++this.tokenIdx;
|
|
206
|
+
// TODO: handle identifier-including-reserved-words later (e.g. for id after a `.`)
|
|
207
|
+
this.s = state;
|
|
208
|
+
this.errorState = state;
|
|
209
|
+
if (this.constructor.tracingParser)
|
|
210
|
+
this._trace( `consume ${ tokenFullName( la, ' as ' ) },`, la );
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// instead of c() for identifiers, used both with l() and lk()
|
|
215
|
+
ci( state, ident = 'ident' ) { // consume identifier token
|
|
216
|
+
const la = this.tokens[this.tokenIdx];
|
|
217
|
+
if (this.keywords[la.keyword] === false)
|
|
218
|
+
this.reportReservedWord_( la );
|
|
219
|
+
// with error recovery: use that (consider this having a good score)
|
|
220
|
+
return this.c( state, ident )
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// instead of ci() for `Id_all`, used both with l() and lk()
|
|
224
|
+
ciA( state, ident = 'ident' ) { // consume identifier token, the "All" variant
|
|
225
|
+
return this.c( state, ident )
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// instead of c() for reserved or unreserved without conflict, requires lk()
|
|
229
|
+
ck( state ) { // consume keyword token
|
|
230
|
+
return this.c( state, 'keyword' )
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// instead of ck() if there is a LL1 conflict
|
|
234
|
+
ckP( state, first2 ) { // consume unreserved keyword with weak conflict
|
|
235
|
+
return this.lP( first2 ) && this.ck( state );
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// for parser token
|
|
239
|
+
ckA( state ) {
|
|
240
|
+
// if it really should be considered an Id, `set this.la().parsed` yourself
|
|
241
|
+
return this.c( state, (this.l() === 'Id' ? 'keyword' : 'token') );
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// condition and precedence handling ------------------------------------------
|
|
245
|
+
|
|
246
|
+
// state must match the goto-state of the default (there must be no default
|
|
247
|
+
// action), or null for error, lP() must have been used before. There is no
|
|
248
|
+
// “or Id” behavior other than via gpP()
|
|
249
|
+
|
|
250
|
+
// “go if user condition fails”
|
|
251
|
+
gc( state, cond ) {
|
|
252
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
253
|
+
this.conditionStackLength === this.stack.length)
|
|
254
|
+
return true; // error recovery: ignore condition
|
|
255
|
+
this.conditionTokenIdx = this.tokenIdx;
|
|
256
|
+
this.conditionStackLength = this.stack.length;
|
|
257
|
+
// TODO: let this[cond]( true ) return recovery badness in error case
|
|
258
|
+
const fail = !this[cond]( true );
|
|
259
|
+
if (this.constructor.tracingParser)
|
|
260
|
+
this._tracePush( `${ fail ? '¬' : '✓' } ${ cond }` );
|
|
261
|
+
return !fail || this.g( state ) && false;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
ec( cond ) {
|
|
265
|
+
return this.gc( null, cond );
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// “go if precedence condition fails”
|
|
269
|
+
gp( state, prec, mode ) {
|
|
270
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
271
|
+
this.conditionStackLength === this.stack.length) {
|
|
272
|
+
this._tracePush( `(${ this._prec })!` );
|
|
273
|
+
return true; // error recovery: ignore condition
|
|
274
|
+
}
|
|
275
|
+
this.conditionTokenIdx = this.tokenIdx;
|
|
276
|
+
this.conditionStackLength = this.stack.length;
|
|
277
|
+
const parentPrec = this.stack.at( -1 ).prec ?? -Infinity;
|
|
278
|
+
const fail = prec <= parentPrec ||
|
|
279
|
+
this.prec_ != null && // previous op parsed by current rule
|
|
280
|
+
// <…,postfix> || <…,assoc=none>, <…,postfix=once>:
|
|
281
|
+
(mode === 'post' && prec > this.prec_ || mode === 'none' && prec >= this.prec_);
|
|
282
|
+
if (this.constructor.tracingParser) {
|
|
283
|
+
const pp = (parentPrec === -Infinity) ? '-∞' : parentPrec;
|
|
284
|
+
const tp = (this.prec_ == null) ? '∞' : this.prec_;
|
|
285
|
+
const suffix = mode === 'post' && ` ≤ ${ tp }` || mode === 'none' && ` < ${ tp }`;
|
|
286
|
+
this._tracePush( `${ fail ? '¬' : '✓' }(${ pp } < ${ prec }${ suffix || '' })` );
|
|
287
|
+
}
|
|
288
|
+
if (fail)
|
|
289
|
+
return this.g( state ) && false; // TODO: reset this.prec_ ?
|
|
290
|
+
this.prec_ = (mode === 'right') ? prec - 1 : prec; // -1: <…,assoc=right>, <…,prefix>
|
|
291
|
+
return true;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
ep( prec, mode ) {
|
|
295
|
+
return this.gp( null, prec, mode );
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// rule start, end and call: --------------------------------------------------
|
|
299
|
+
|
|
300
|
+
rule_( state, followState = -1 ) { // start rule
|
|
301
|
+
this.stack.push( {
|
|
302
|
+
ruleState: state,
|
|
303
|
+
followState,
|
|
304
|
+
tokenIdx: this.tokenIdx,
|
|
305
|
+
prec: this.prec_,
|
|
306
|
+
} );
|
|
307
|
+
this.dynamic_ = Object.create( this.dynamic_ );
|
|
308
|
+
this.s = state;
|
|
309
|
+
this.prec_ = null;
|
|
310
|
+
this.conditionTokenIdx = -1;
|
|
311
|
+
this.errorState ??= state;
|
|
312
|
+
this._trace( [ state, 'call rule', '', ' at alt start', -1 ] );
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
exit_( rulePrecMethod ) { // exit rule
|
|
316
|
+
if (this.s)
|
|
317
|
+
throw Error( `this.s === ${ this.s } // illegally set by action, or runtime/generator bug` );
|
|
318
|
+
this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
|
|
319
|
+
const caller = this.stack.pop();
|
|
320
|
+
this.s = caller.followState;
|
|
321
|
+
this.prec_ = (rulePrecMethod) ? this[rulePrecMethod]( caller ) : caller.prec;
|
|
322
|
+
this._trace( [ caller.ruleState, 'exit rule', '', '', 1 ] );
|
|
323
|
+
//if (this.errorState == 0 || this.s != null)
|
|
324
|
+
this.errorState = this.s;
|
|
325
|
+
// execute actions if not in error recovery (pass-through) and at least one
|
|
326
|
+
// token has been matched in rule:
|
|
327
|
+
return this.s != null && this.tokenIdx > caller.tokenIdx;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// predicate used before rule call if with LL(1) conflict, 'Id' in other case
|
|
331
|
+
lP( first2 ) { // only start rule if this predicate returns true
|
|
332
|
+
const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
|
|
333
|
+
// Argument first2 is just a performance hint with ckP():
|
|
334
|
+
if (lk2 && first2?.[0] === 'Id' && this.keywords[lk2] !== false ||
|
|
335
|
+
first2?.includes( lk2 || lt2 ))
|
|
336
|
+
return true;
|
|
337
|
+
|
|
338
|
+
// nothing to check if not a non-reserved keyword:
|
|
339
|
+
const { keyword: lk1 } = this.tokens[this.tokenIdx];
|
|
340
|
+
if (!lk1 || !this.keywords[lk1])
|
|
341
|
+
return true;
|
|
342
|
+
|
|
343
|
+
// now check it dynamically:
|
|
344
|
+
let cmd = this.table[this.s][lk1];
|
|
345
|
+
if (typeof cmd === 'string')
|
|
346
|
+
cmd = this.table[this.s][cmd];
|
|
347
|
+
if (!Array.isArray( cmd ) || cmd[2] !== 1)
|
|
348
|
+
throw Error( `Unexpected command '${ cmd?.[0] }' without prediction at state ${ this.s } for ‘${ lk1 }’` );
|
|
349
|
+
|
|
350
|
+
this._traceIdOrPred( '-P1' );
|
|
351
|
+
const nextState = (cmd[0] === 'ck') ? cmd[1] : this._pred_keyword( cmd[1], lk1 );
|
|
352
|
+
|
|
353
|
+
if (this._pred( nextState, lt2, lk2 ))
|
|
354
|
+
return true;
|
|
355
|
+
if (lt2 === 'IllegalToken') // TODO: keep?
|
|
356
|
+
return true
|
|
357
|
+
// TODO: instead of this IllegalToken test, set tokenIndex+nextState for extra
|
|
358
|
+
// expected calculation if parser fails after Id - we would then also add the
|
|
359
|
+
// expected tokens after keyword-interpretation
|
|
360
|
+
this._traceIdOrPred( '-Id' );
|
|
361
|
+
this.nextTokenAsId = true;
|
|
362
|
+
return false; // do not execute action after it
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Now the helper methods =====================================================
|
|
366
|
+
|
|
367
|
+
// Standard weak-conflict predicate -------------------------------------------
|
|
368
|
+
// Weak (and fast) single-step walk and test (no rule exit, start is fine): for
|
|
369
|
+
// pg(), pr(). The main point is that we do not (again) consider predicates.
|
|
370
|
+
// Currently just tests against the token _type_ of the next token, not its
|
|
371
|
+
// specific keyword; see comments below for details.
|
|
372
|
+
|
|
373
|
+
_pred( nextState, lt2, lk2 ) {
|
|
374
|
+
if (nextState) {
|
|
375
|
+
// return this._pred_test( nextState, lt2 );
|
|
376
|
+
const r = this._pred_next( nextState, lt2, lk2 );
|
|
377
|
+
this._tracePush( this.s );
|
|
378
|
+
return r;
|
|
379
|
+
}
|
|
380
|
+
// dubious weak conflict at end of rule:
|
|
381
|
+
this._traceIdOrPred( '-P0' );
|
|
382
|
+
this._tracePush( this.s );
|
|
383
|
+
return true; // dubious
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
_pred_keyword( state, keyword ) {
|
|
387
|
+
// returns next state for first token as keyword, for lP()
|
|
388
|
+
while (state) {
|
|
389
|
+
this._tracePush( `${ state }-P1` );
|
|
390
|
+
let cmd = this.table[state];
|
|
391
|
+
if (!Array.isArray( cmd )) {
|
|
392
|
+
const alt = cmd[keyword] || cmd.Id; // Id to cover optimized rule call
|
|
393
|
+
cmd = (typeof alt === 'string')
|
|
394
|
+
? cmd[alt]
|
|
395
|
+
: typeof alt === 'number' && [ 'g', alt ] || alt || [ 'g', cmd[''] ];
|
|
396
|
+
}
|
|
397
|
+
switch (cmd[0]) {
|
|
398
|
+
case 'ck': case 'mk':
|
|
399
|
+
return cmd[1]; // state after token consumption
|
|
400
|
+
case 'g':
|
|
401
|
+
break;
|
|
402
|
+
default:
|
|
403
|
+
if (typeof cmd[0] !== 'number')
|
|
404
|
+
throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
|
|
405
|
+
}
|
|
406
|
+
state = cmd[1];
|
|
407
|
+
}
|
|
408
|
+
// reached end of rule without having consumed a token
|
|
409
|
+
throw Error( 'Not supported: option for unreserved keywords in follow set' );
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
_pred_next( state, type, keyword ) {
|
|
413
|
+
while (state) {
|
|
414
|
+
this._tracePush( `${ state }-P2` );
|
|
415
|
+
let cmd = this.table[state];
|
|
416
|
+
if (!Array.isArray( cmd )) {
|
|
417
|
+
const alt = keyword && cmd[keyword] || cmd[type];
|
|
418
|
+
cmd = (typeof alt === 'string')
|
|
419
|
+
? cmd[alt]
|
|
420
|
+
: typeof alt === 'number' && [ 'g', alt ] || alt || [ 'default', cmd[''] ];
|
|
421
|
+
}
|
|
422
|
+
switch (cmd[0]) {
|
|
423
|
+
case 'c': case 'ck': case 'ciA':
|
|
424
|
+
return true;
|
|
425
|
+
case 'm':
|
|
426
|
+
return type === cmd[2];
|
|
427
|
+
case 'mi': case 'ci':
|
|
428
|
+
return type === 'Id' && (!keyword || this.keywords[keyword] !== false);
|
|
429
|
+
case 'miA':
|
|
430
|
+
return type === 'Id';
|
|
431
|
+
case 'mk':
|
|
432
|
+
return keyword === cmd[2];
|
|
433
|
+
}
|
|
434
|
+
// We could optimize with rule call - only 'Id' must be further investigated
|
|
435
|
+
state = cmd[1];
|
|
436
|
+
}
|
|
437
|
+
this._traceIdOrPred( 'f' );
|
|
438
|
+
this._tracePush( this.s );
|
|
439
|
+
// TODO: really false, not true?
|
|
440
|
+
// `false` means that la1 is not considered an unreserved keyword. This is
|
|
441
|
+
// correct (consider `e: Association @Anno`), but probably not optimal for
|
|
442
|
+
// error reporting (consider `e: Association +`). Improving that is more
|
|
443
|
+
// costly, as we really need to consider rule exits → stack.
|
|
444
|
+
return false;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
_keyword_after_rule( keyword ) {
|
|
448
|
+
// TODO: this is a slow implementation - do dedicated traversal later
|
|
449
|
+
return this._expecting()[keyword];
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Set of expected tokens: for error reporting and recovery -------------------
|
|
453
|
+
|
|
454
|
+
// Calculate array of expected tokens
|
|
455
|
+
_expecting( errorState, length ) {
|
|
456
|
+
// Remark: rules must not have been exited too early, see _expecting call in re()
|
|
457
|
+
const stack = this.stack.slice( 0, length || this.stack.length );
|
|
458
|
+
// Immediately exit rules when no tokens have yet been consumed:
|
|
459
|
+
let caller = stack.at( -1 );
|
|
460
|
+
while (stack.length && this.tokenIdx === caller.tokenIdx) {
|
|
461
|
+
--stack.length;
|
|
462
|
+
caller = stack.at( -1 );
|
|
463
|
+
}
|
|
464
|
+
// Now calculate dictionary of expected tokens:
|
|
465
|
+
const expecting = Object.create(null);
|
|
466
|
+
let state = errorState ?? this.errorState;
|
|
467
|
+
// At potential rule end, we must add follow sets of outer rules
|
|
468
|
+
// TODO: we also need to unravel this.dynamic_ for translateParserToken_()
|
|
469
|
+
while ((!state || this._exp_collect( expecting, this.table[state] )) && stack.length)
|
|
470
|
+
state = stack.pop().followState;
|
|
471
|
+
return expecting;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// TODO: use iterative alg, no recursive call, return state instead -----------
|
|
475
|
+
// Add expected tokens to dictionary `expecting` starting at command `cmd`.
|
|
476
|
+
// Return true if the rule end is reached, i.e. we also need to add the expected
|
|
477
|
+
// tokens at the follow state of the current rule. Argument `prop` is the token
|
|
478
|
+
// name for `cmd` in a decision.
|
|
479
|
+
_exp_collect( expecting, cmd, prop ) {
|
|
480
|
+
if (prop != null) {
|
|
481
|
+
cmd = cmd[(typeof cmd[prop] === 'string') ? cmd[prop] : prop];
|
|
482
|
+
}
|
|
483
|
+
if (typeof cmd === 'number') // ‹followState› = short form for this.g(‹followState›)
|
|
484
|
+
cmd = [ 'g', cmd ];
|
|
485
|
+
|
|
486
|
+
if (!Array.isArray( cmd )) {
|
|
487
|
+
let reachedRuleEnd = false;
|
|
488
|
+
for (const tok in cmd) {
|
|
489
|
+
if (Object.hasOwn( cmd, tok ) && this._exp_collect( expecting, cmd, tok ))
|
|
490
|
+
reachedRuleEnd = true;
|
|
491
|
+
}
|
|
492
|
+
return reachedRuleEnd;
|
|
493
|
+
}
|
|
494
|
+
switch (cmd[0]) {
|
|
495
|
+
case 'c': case 'ck':
|
|
496
|
+
expecting[prop] = true;
|
|
497
|
+
return false;
|
|
498
|
+
case 'ckA':
|
|
499
|
+
for (const tok of this.translateParserToken_( prop ) || [ prop ])
|
|
500
|
+
expecting[tok] = true;
|
|
501
|
+
return false;
|
|
502
|
+
case 'm': case 'mk':
|
|
503
|
+
expecting[cmd[2]] = true;
|
|
504
|
+
return false;
|
|
505
|
+
case 'ci': case 'ciA': case 'mi': case 'miA':
|
|
506
|
+
expecting['Id'] = true;
|
|
507
|
+
return false;
|
|
508
|
+
case 'g': case 'gi':
|
|
509
|
+
if (!cmd[1])
|
|
510
|
+
return cmd[1] === 0;
|
|
511
|
+
//(this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx);
|
|
512
|
+
// TODO: add some assertion in generation that a decision has no two `g`s
|
|
513
|
+
// to the same state (both in cases and default)
|
|
514
|
+
// UPDATE: no, there will be at least gP()s
|
|
515
|
+
// TOOD: do properly for (...)+ - currently, the token for directly
|
|
516
|
+
// exiting the rule is also collected
|
|
517
|
+
return this._exp_collect( expecting, this.table[cmd[1]] );
|
|
518
|
+
default:
|
|
519
|
+
// a called rule must match at least one token → after having called a
|
|
520
|
+
// rule, do not collect expecting tokens after exiting the rule
|
|
521
|
+
if (typeof cmd[0] === 'number')
|
|
522
|
+
this._exp_collect( expecting, this.table[cmd[1]] );
|
|
523
|
+
return false;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
translateParserToken_( _token ) {
|
|
528
|
+
return null;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Error recovery -------------------------------------------------------------
|
|
532
|
+
|
|
533
|
+
_recoverInline( expecting ) {
|
|
534
|
+
// Inline error recovery - single token deletion (TODO later: also try more !)
|
|
535
|
+
// token position has been advanced before calling this function
|
|
536
|
+
if (!expecting[this.lk()] && !expecting[this.l()])
|
|
537
|
+
return false;
|
|
538
|
+
|
|
539
|
+
// Immediately exit rules (except start) when no tokens have yet been consumed:
|
|
540
|
+
let { length } = this.stack;
|
|
541
|
+
while (--length > 0) {
|
|
542
|
+
const caller = this.stack[length];
|
|
543
|
+
// matched tokens (other than the one skipped one) in rule: found rule
|
|
544
|
+
if (this.tokenIdx - 1 > caller.tokenIdx)
|
|
545
|
+
break;
|
|
546
|
+
caller.followState = null;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
if (++length < this.stack.length) {
|
|
550
|
+
this.s = null;
|
|
551
|
+
this.stack[length].followState = this.errorState;
|
|
552
|
+
// assume the erroneous token to be skipped before having called the rule:
|
|
553
|
+
++this.stack[length].tokenIdx
|
|
554
|
+
this.errorState = null;
|
|
555
|
+
}
|
|
556
|
+
else { // no rule to leave immediately
|
|
557
|
+
this.s = this.errorState;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (this.constructor.tracingParser)
|
|
561
|
+
this._trace( [ this.stack[length - 1].ruleState, 'recover inside rule' ] );
|
|
562
|
+
return true; // to be re-checked with actions
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
_recoverPanicMode() {
|
|
566
|
+
--this.tokenIdx
|
|
567
|
+
const { length } = this.stack;
|
|
568
|
+
// Panic mode: resume at token in then-expecting set:
|
|
569
|
+
const followSets = { EOF: 0 };
|
|
570
|
+
for (let idx = 0; idx < length; ++idx) {
|
|
571
|
+
const caller = this.stack[idx];
|
|
572
|
+
const exp = this._expecting( caller.followState, length );
|
|
573
|
+
for (const t of Object.keys( exp )) {
|
|
574
|
+
// no sync to 'Id' - TODO: provide grammar and rule options
|
|
575
|
+
if (t !== 'Id') // TODO: see below
|
|
576
|
+
followSets[t] = idx;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
const tokenIdx = this.tokenIdx;
|
|
580
|
+
// console.log( this.la().location.toString(), followSets )
|
|
581
|
+
while (this.tokenIdx <= this.eofIndex) {
|
|
582
|
+
// TODO: exclude reserved words for test with this.l()
|
|
583
|
+
const depth = followSets[this.lk()] || followSets[this.l()];
|
|
584
|
+
// TODO: handle Id here
|
|
585
|
+
if (depth != null)
|
|
586
|
+
return this._error_panic( depth, length, tokenIdx );
|
|
587
|
+
++this.tokenIdx;
|
|
588
|
+
}
|
|
589
|
+
throw Error( 'EOF was added...' );
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
_error_panic( low, high, tokenIdx ) {
|
|
593
|
+
this.s = null; // mark current rule for exit
|
|
594
|
+
if (this.constructor.tracingParser) {
|
|
595
|
+
this._trace( this.stack.length - 1 > low
|
|
596
|
+
? `recover by exiting ${ this.stack.length - low} rules prematurely,`
|
|
597
|
+
: 'recover by exiting current rule prematurely,' );
|
|
598
|
+
}
|
|
599
|
+
// eventually mark outer rules for exit:
|
|
600
|
+
// TODO: re-check for rule calls which are at the optional rule end:
|
|
601
|
+
// x: 'x not'; b: 'b'? x {console.log('x→b')} 'b'?; a: b {console.log('b→a')} 'a'
|
|
602
|
+
// with start rule `a` and input `x a`: output should be x→b + b→a
|
|
603
|
+
// with start rule `a` and input `b a`: output should be b→a
|
|
604
|
+
//
|
|
605
|
+
// → the rule is: if a rule can continue at the specified state and has
|
|
606
|
+
// matched at least one token, then its action is executed, otherwise not
|
|
607
|
+
for (let idx = low + 1; idx < high; ++idx) {
|
|
608
|
+
this.stack[idx].followState = null;
|
|
609
|
+
}
|
|
610
|
+
const resume = this.stack[low];
|
|
611
|
+
if (tokenIdx === resume.tokenIdx) // no tokens matched other than those by skipping
|
|
612
|
+
resume.tokenIdx = this.tokenIdx; // make exit_() return false
|
|
613
|
+
this.errorState = null;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
_stopParsing( idx ) {
|
|
617
|
+
--this.tokenIdx;
|
|
618
|
+
if (this.constructor.tracingParser) {
|
|
619
|
+
this.log( this.la().location.toString() + ':', 'Info:',
|
|
620
|
+
`leave all active ${ idx } rules prematurely, stop parsing` );
|
|
621
|
+
}
|
|
622
|
+
for (const c of this.stack)
|
|
623
|
+
c.followState = null;
|
|
624
|
+
this.errorState = null;
|
|
625
|
+
this.s = null;
|
|
626
|
+
return false;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// small methods --------------------------------------------------------------
|
|
630
|
+
|
|
631
|
+
log( ...args ) {
|
|
632
|
+
console.log( ...args );
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
expectingForMessage_( sep = ',' ) {
|
|
636
|
+
return Object.keys( this._expecting() ).map( tokenName ).sort().join( sep );
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
reportError_( location, text ) {
|
|
640
|
+
this.$hasErrors = true;
|
|
641
|
+
this.log( `${ location }: Error:`, text );
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
reportUnexpectedToken_( token ) {
|
|
645
|
+
this.reportError_( token.location,
|
|
646
|
+
`unexpected token ${ tokenFullName( token, ': ' ) } - expecting: ` +
|
|
647
|
+
this.expectingForMessage_() );
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
reportReservedWord_( token ) {
|
|
651
|
+
this.reportError_( token.location,
|
|
652
|
+
`unexpected reserved word ‘${ token.text }’ - expecting: ` +
|
|
653
|
+
this.expectingForMessage_() );
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
errorAndRecoverOutside( token, text ) { // TODO: re-check
|
|
657
|
+
this.reportError_( token.location, text );
|
|
658
|
+
++this.tokenIdx;
|
|
659
|
+
return this._recoverPanicMode( this.stack.length );
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
_tracePush( state ) {
|
|
663
|
+
if (this.constructor.tracingParser)
|
|
664
|
+
this.trace.push( state ?? '⚠' );
|
|
665
|
+
}
|
|
666
|
+
_traceIdOrPred( suffix ) {
|
|
667
|
+
if (this.constructor.tracingParser)
|
|
668
|
+
this.trace[this.trace.length - 1] += suffix;
|
|
669
|
+
}
|
|
670
|
+
traceAction( location ) { // will be put into tracing parser
|
|
671
|
+
this._trace( 'execute action,', { location } );
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
_trace( msg, la ) {
|
|
675
|
+
if (!this.constructor.tracingParser)
|
|
676
|
+
return;
|
|
677
|
+
if (Array.isArray( msg ))
|
|
678
|
+
msg = this._rule( ...msg );
|
|
679
|
+
this.trace.push( this.s ?? '⚠' );
|
|
680
|
+
this.log( (la || this.la()).location.toString() + ':',
|
|
681
|
+
'Info:', msg, 'states:', this.trace.join( ' → ' ) );
|
|
682
|
+
this.trace = [ this.s ?? '⚠' ];
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// TODO: rename to ruleName_, leaving out the msg stuff
|
|
686
|
+
_rule( state, msg, post = '', postOther = post, depthDiff ) {
|
|
687
|
+
const start = --state;
|
|
688
|
+
while (typeof this.table[state] !== 'string')
|
|
689
|
+
--state;
|
|
690
|
+
const { length } = this.stack;
|
|
691
|
+
const depth = depthDiff ? `, depth ${ length + depthDiff } → ${ length }` : '';
|
|
692
|
+
return `${ msg } “${ this.table[state] }”${ state < start ? postOther : post }${ depth },`;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
inSameRule_( lowState, highState ) {
|
|
696
|
+
if (lowState > highState)
|
|
697
|
+
[ lowState, highState ] = [ highState, lowState ];
|
|
698
|
+
while (lowState < highState) {
|
|
699
|
+
if (typeof this.table[++lowState] === 'string') // rule boundary
|
|
700
|
+
return false;
|
|
701
|
+
}
|
|
702
|
+
return true;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function tokenName( type ) {
|
|
708
|
+
if (typeof type !== 'string')
|
|
709
|
+
type = (!type.parsed || type.parsed === 'keyword') && type.keyword || type.type;
|
|
710
|
+
return (/^[A-Z]+/.test( type )) ? `‹${ type }›` : `‘${ type }’`;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
function tokenFullName( token, sep ) {
|
|
714
|
+
return (token.parsed && token.parsed !== 'keyword' && token.parsed !== 'token' ||
|
|
715
|
+
token.type !== 'Id' && token.type !== token.text && token.text)
|
|
716
|
+
? `‘${ token.text }’${ sep }${ tokenName( token ) }`
|
|
717
|
+
: tokenName( token );
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
module.exports = BaseParser;
|