@sap/cds-compiler 5.1.2 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/bin/cdsc.js +7 -2
- package/bin/cdshi.js +24 -17
- package/bin/cdsse.js +17 -18
- package/doc/CHANGELOG_BETA.md +9 -4
- package/lib/api/main.js +19 -2
- package/lib/api/options.js +4 -1
- package/lib/api/validate.js +5 -0
- package/lib/base/builtins.js +1 -0
- package/lib/base/message-registry.js +40 -3
- package/lib/base/messages.js +1 -1
- package/lib/base/model.js +0 -11
- package/lib/checks/actionsFunctions.js +0 -12
- package/lib/checks/structuredAnnoExpressions.js +10 -14
- package/lib/compiler/assert-consistency.js +21 -13
- package/lib/compiler/builtins.js +2 -2
- package/lib/compiler/checks.js +25 -6
- package/lib/compiler/define.js +27 -31
- package/lib/compiler/extend.js +16 -18
- package/lib/compiler/generate.js +3 -3
- package/lib/compiler/populate.js +22 -16
- package/lib/compiler/propagator.js +3 -2
- package/lib/compiler/resolve.js +87 -94
- package/lib/compiler/shared.js +12 -13
- package/lib/compiler/tweak-assocs.js +390 -86
- package/lib/compiler/utils.js +41 -33
- package/lib/compiler/xpr-rewrite.js +45 -58
- package/lib/edm/annotations/genericTranslation.js +17 -13
- package/lib/edm/csn2edm.js +28 -4
- package/lib/edm/edm.js +68 -28
- package/lib/edm/edmInboundChecks.js +5 -8
- package/lib/edm/edmPreprocessor.js +66 -40
- package/lib/edm/edmUtils.js +1 -1
- package/lib/gen/BaseParser.js +778 -0
- package/lib/gen/CdlParser.js +4477 -0
- package/lib/gen/language.checksum +1 -1
- package/lib/gen/language.interp +1 -1
- package/lib/gen/languageParser.js +4072 -4024
- package/lib/inspect/inspectPropagation.js +1 -1
- package/lib/json/from-csn.js +5 -3
- package/lib/json/to-csn.js +7 -10
- package/lib/language/antlrParser.js +96 -0
- package/lib/language/errorStrategy.js +1 -1
- package/lib/language/genericAntlrParser.js +32 -4
- package/lib/language/multiLineStringParser.js +1 -1
- package/lib/main.d.ts +23 -0
- package/lib/model/cloneCsn.js +22 -13
- package/lib/model/csnUtils.js +2 -0
- package/lib/model/revealInternalProperties.js +2 -0
- package/lib/modelCompare/utils/filter.js +70 -42
- package/lib/optionProcessor.js +16 -10
- package/lib/parsers/AstBuildingParser.js +1290 -0
- package/lib/parsers/CdlGrammar.g4 +2013 -0
- package/lib/parsers/Lexer.js +249 -0
- package/lib/render/toCdl.js +46 -45
- package/lib/render/toSql.js +5 -5
- package/lib/transform/addTenantFields.js +4 -4
- package/lib/transform/db/applyTransformations.js +54 -16
- package/lib/transform/draft/odata.js +10 -11
- package/lib/transform/effective/flattening.js +10 -14
- package/lib/transform/forRelationalDB.js +7 -6
- package/lib/transform/odata/flattening.js +42 -31
- package/lib/transform/odata/toFinalBaseType.js +7 -6
- package/lib/transform/universalCsn/universalCsnEnricher.js +1 -0
- package/lib/utils/moduleResolve.js +1 -1
- package/package.json +2 -2
- package/share/messages/redirected-to-ambiguous.md +5 -4
- package/share/messages/redirected-to-complex.md +6 -3
|
@@ -0,0 +1,778 @@
|
|
|
1
|
+
// Base class for generated parser, for redepage v0.1.12
|
|
2
|
+
|
|
3
|
+
'use strict';
|
|
4
|
+
|
|
5
|
+
class BaseParser {
|
|
6
|
+
constructor( lexer, keywords, table ) {
|
|
7
|
+
this.keywords = keywords;
|
|
8
|
+
this.table = compileTable( table );
|
|
9
|
+
this.lexer = lexer;
|
|
10
|
+
this.tokens = undefined;
|
|
11
|
+
this.eofIndex = undefined;
|
|
12
|
+
this.tokenIdx = 0;
|
|
13
|
+
this.conditionTokenIdx = -1;
|
|
14
|
+
this.fixKeywordTokenIdx = -1;
|
|
15
|
+
this.conditionStackLength = -1;
|
|
16
|
+
this.nextTokenAsId = false;
|
|
17
|
+
|
|
18
|
+
this.s = null;
|
|
19
|
+
this.errorState = null;
|
|
20
|
+
this.stack = []; // [{ ruleState, followState, tokenIdx }]
|
|
21
|
+
this.dynamic_ = {};
|
|
22
|
+
this.prec_ = null;
|
|
23
|
+
this.$hasErrors = null;
|
|
24
|
+
// trace:
|
|
25
|
+
this.trace = [ -1 ];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
init() {
|
|
29
|
+
this.lexer.tokenize( this );
|
|
30
|
+
this.eofIndex = this.tokens.length - 1;
|
|
31
|
+
return this;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// methods for actions --------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
la() { // lookahead: complete token
|
|
37
|
+
return this.tokens[this.tokenIdx];
|
|
38
|
+
}
|
|
39
|
+
lb() { // look back: complete token
|
|
40
|
+
return this.tokens[this.tokenIdx - 1];
|
|
41
|
+
}
|
|
42
|
+
lr() { // return the first token matched by current rule
|
|
43
|
+
return this.tokens[this.stack[this.stack.length - 1].tokenIdx];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// lookahead, error: ----------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
l() { // lookahead: token type
|
|
49
|
+
return this.tokens[this.tokenIdx].type;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// instead of l() if keyword (reserved and/or unreserved) is in one of the cases
|
|
53
|
+
lk() { // keyword lookahead
|
|
54
|
+
const la = this.tokens[this.tokenIdx];
|
|
55
|
+
if (!this.nextTokenAsId)
|
|
56
|
+
return la.keyword || la.type;
|
|
57
|
+
this.nextTokenAsId = false;
|
|
58
|
+
return la.type;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
e() { // error: report and recover
|
|
62
|
+
const la = this.tokens[this.tokenIdx];
|
|
63
|
+
const expecting = this._expecting();
|
|
64
|
+
if (this.trace.length > 1)
|
|
65
|
+
this._trace( 'detected parsing error,' );
|
|
66
|
+
this.reportUnexpectedToken_( la );
|
|
67
|
+
la.parsedAs = 0;
|
|
68
|
+
|
|
69
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
70
|
+
this.conditionStackLength === this.stack.length &&
|
|
71
|
+
(la.keyword && expecting[la.keyword] || expecting[la.type])) {
|
|
72
|
+
// called with/after gc()/gp(), and the token would actually match
|
|
73
|
+
const { tokenIdx, ruleState } = this.stack.at( -1 );
|
|
74
|
+
this.s = (this.tokenIdx > tokenIdx) ? this.errorState : ruleState;
|
|
75
|
+
return false; // error recovery: ignore condition/precedence
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (this.tokenIdx >= this.eofIndex)
|
|
79
|
+
return this._stopParsing( this.stack.length );
|
|
80
|
+
// TODO: also sync to what comes next in current rule, at least after rule call,
|
|
81
|
+
// this way we do not have to do the check of g(0) in re() as we did before 2023-12-07
|
|
82
|
+
// (not sure yet whether to make it part of recoverInline or recoverPanicMode),
|
|
83
|
+
if (!this._recoverInline( expecting ))
|
|
84
|
+
this._recoverPanicMode();
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// instead of e() in default if lk() had been used and 'Id' is in a non-default case
|
|
89
|
+
ei() { // error (after trying to test again as identifier)
|
|
90
|
+
if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
|
|
91
|
+
return this.e();
|
|
92
|
+
this.nextTokenAsId = true;
|
|
93
|
+
return false; // do not execute action after it
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// goto state: ----------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
// go to end of the rule, in tracing parser: g(0)
|
|
99
|
+
gr( follow ) { // intersection follow set for fast exit
|
|
100
|
+
if (this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx)
|
|
101
|
+
return this.e(); // match at least one token
|
|
102
|
+
this.s = 0;
|
|
103
|
+
// TODO: also have recursive flag in stack: was rule was called recursively?
|
|
104
|
+
// extra val 'gr' when rule was called when it could reach the rule end
|
|
105
|
+
const { type: lt, keyword: lk } = this.tokens[this.tokenIdx];
|
|
106
|
+
if (lk && // Id also for unreserved, except after condition failure
|
|
107
|
+
follow?.[0] === 'Id' && this.keywords[lk] !== false &&
|
|
108
|
+
this.fixKeywordTokenIdx !== this.tokenIdx ||
|
|
109
|
+
follow?.includes( lk || lt )) {
|
|
110
|
+
this._tracePush( [ 'E', true ] );
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
this._tracePush( [ 'E', 0 ] );
|
|
114
|
+
// TODO: caching
|
|
115
|
+
const { dynamic_ } = this;
|
|
116
|
+
let match;
|
|
117
|
+
let depth = this.stack.length;
|
|
118
|
+
while (match == null && --depth) {
|
|
119
|
+
this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
|
|
120
|
+
const { followState } = this.stack[depth];
|
|
121
|
+
match = this._pred_next( followState, lt, lk, 'E' );
|
|
122
|
+
this._traceSubPush( match ?? 0 );
|
|
123
|
+
}
|
|
124
|
+
this.dynamic_ = dynamic_;
|
|
125
|
+
// If the parser reaches this point with match = null, even the top-level rule
|
|
126
|
+
// does not have a required token (typically `EOF`) at the end → the parser
|
|
127
|
+
// must accept any token → rule exit possible (but no output '✔' in trace).
|
|
128
|
+
return (match ?? true) || this.e();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// go to state; non-tracing parser: `this.s=‹state›` or `this.gr()`
|
|
132
|
+
g( state, follow ) {
|
|
133
|
+
if (!(state == null ? this.e() : state || this.gr( follow )))
|
|
134
|
+
return false;
|
|
135
|
+
this.s = state; // is just `this.s=‹state›` in non-trace parser
|
|
136
|
+
this._tracePush( this.s );
|
|
137
|
+
return true;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// instead of gi() for `Id_all`
|
|
141
|
+
giA( state, follow ) { // go to state (after trying to test again as identifier)
|
|
142
|
+
if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type
|
|
143
|
+
return this.g( state, follow );
|
|
144
|
+
this.nextTokenAsId = true;
|
|
145
|
+
return false; // do not execute action after it
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// instead of g() in default if lk() had been used and 'Id' is in a non-default case
|
|
149
|
+
gi( state, follow ) { // go to state (after trying to test again as identifier)
|
|
150
|
+
const lk = this.tokens[this.tokenIdx].keyword;
|
|
151
|
+
// As opposed to ei(), we also check for reserved keywords here; this way, we
|
|
152
|
+
// do not have to add reserved keywords from the follow-set to the `switch`.
|
|
153
|
+
if (!lk || this.keywords[lk] === false) // TODO: consider fixKeywordTokenIdx ?
|
|
154
|
+
return this.g( state, follow );
|
|
155
|
+
this.nextTokenAsId = true;
|
|
156
|
+
return false; // do not execute action after it
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// instead of gi() at rule end (RuleEnd_ in follow-set) for `Id_restricted`
|
|
160
|
+
giR( state, follow ) { // go to state (after trying to test again as identifier)
|
|
161
|
+
const lk = this.tokens[this.tokenIdx].keyword;
|
|
162
|
+
if (!lk || this.keywords[lk] === false || this._keyword_after_rule( lk ))
|
|
163
|
+
return this.g( state, follow );
|
|
164
|
+
this.nextTokenAsId = true;
|
|
165
|
+
return false; // do not execute action after it
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// instead of g() in a non-default case if there is a LL1 conflict
|
|
169
|
+
gP( state ) { // goto state with standard weak-conflict prediction
|
|
170
|
+
return this.lP() && this.g( state );
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// match and consume token: ---------------------------------------------------
|
|
174
|
+
|
|
175
|
+
m( state, token ) { // match token = compare and consume
|
|
176
|
+
return (this.tokens[this.tokenIdx].type === token)
|
|
177
|
+
? this.c( state )
|
|
178
|
+
: this.e();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// instead of m() for identifiers via `Id` or `Id_restricted`
|
|
182
|
+
mi( state, ident = true ) { // match identifier token
|
|
183
|
+
return (this.tokens[this.tokenIdx].type === 'Id')
|
|
184
|
+
? this.ci( state, ident )
|
|
185
|
+
: this.e();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// instead of mi() for `Id_all`
|
|
189
|
+
miA( state, ident = true ) { // match identifier token
|
|
190
|
+
return (this.tokens[this.tokenIdx].type === 'Id')
|
|
191
|
+
? this.ciA( state, ident )
|
|
192
|
+
: this.e();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// instead of m() for reserved keywords or unreserved without conflict:
|
|
196
|
+
mk( state, token ) { // match keyword token
|
|
197
|
+
return (this.tokens[this.tokenIdx].keyword === token)
|
|
198
|
+
? this.ck( state )
|
|
199
|
+
: this.e();
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
c( state, parsedAs = 'token' ) { // consume token
|
|
203
|
+
const la = this.tokens[this.tokenIdx];
|
|
204
|
+
la.parsedAs = parsedAs;
|
|
205
|
+
if (this.tokenIdx < this.eofIndex) ++this.tokenIdx;
|
|
206
|
+
// TODO: handle identifier-including-reserved-words later (e.g. for id after a `.`)
|
|
207
|
+
this.s = state;
|
|
208
|
+
this.errorState = state;
|
|
209
|
+
if (this.constructor.tracingParser)
|
|
210
|
+
this._trace( `consume ${ tokenFullName( la, ' as ' ) },`, la );
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// instead of c() for identifiers, used both with l() and lk()
|
|
215
|
+
ci( state, ident = 'ident' ) { // consume identifier token
|
|
216
|
+
const la = this.tokens[this.tokenIdx];
|
|
217
|
+
if (this.keywords[la.keyword] === false)
|
|
218
|
+
this.reportReservedWord_( la );
|
|
219
|
+
// with error recovery: use that (consider this having a good score)
|
|
220
|
+
return this.c( state, ident )
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// instead of ci() for `Id_all`, used both with l() and lk()
|
|
224
|
+
ciA( state, ident = 'ident' ) { // consume identifier token, the "All" variant
|
|
225
|
+
return this.c( state, ident )
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// instead of c() for reserved or unreserved without conflict, requires lk()
|
|
229
|
+
ck( state ) { // consume keyword token
|
|
230
|
+
return this.c( state, 'keyword' )
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// instead of ck() if there is a LL1 conflict
|
|
234
|
+
ckP( state, first2 ) { // consume unreserved keyword with weak conflict
|
|
235
|
+
return this.lP( first2 ) && this.ck( state );
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// for parser token or token set via `/`
|
|
239
|
+
ckA( state ) {
|
|
240
|
+
// if it really should be considered an Id, `set this.la().parsedAs` yourself
|
|
241
|
+
return this.c( state, (this.l() === 'Id' ? 'keyword' : 'token') );
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
skipToken_() {
|
|
245
|
+
++this.tokenIdx;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// condition and precedence handling ------------------------------------------
|
|
249
|
+
|
|
250
|
+
// state must match the goto-state of the default (there must be no default
|
|
251
|
+
// action), or null for error, lP() must have been used before. There is no
|
|
252
|
+
// “or Id” behavior other than via gpP()
|
|
253
|
+
|
|
254
|
+
// “go if user condition fails”
|
|
255
|
+
gc( state, cond ) {
|
|
256
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
257
|
+
this.conditionStackLength === this.stack.length) {
|
|
258
|
+
this._tracePush( [ 'C' ] );
|
|
259
|
+
return true; // error recovery: ignore condition
|
|
260
|
+
}
|
|
261
|
+
this.conditionTokenIdx = this.tokenIdx;
|
|
262
|
+
this.conditionStackLength = this.stack.length;
|
|
263
|
+
// TODO: let this[cond]( true ) return recovery badness in error case
|
|
264
|
+
const fail = !this[cond]( true );
|
|
265
|
+
if (this.constructor.tracingParser)
|
|
266
|
+
this._tracePush( [ 'C', cond, !fail ] );
|
|
267
|
+
// TODO TOOL: in this case, the default case must not have actions (tool must
|
|
268
|
+
// add state if it does)
|
|
269
|
+
if (fail) { // TODO: extra gcK() method instead of check below
|
|
270
|
+
// TODO: extra method necessary for academic case
|
|
271
|
+
// ( 'unreserved' 'foo' | <cond> Id 'bar' )` with input `unreserved bar`
|
|
272
|
+
const { keyword } = this.la();
|
|
273
|
+
if (keyword && this.table[keyword])
|
|
274
|
+
this.fixKeywordTokenIdx = this.tokenIdx;
|
|
275
|
+
}
|
|
276
|
+
return !fail || this.g( state ) && false;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
ec( cond ) {
|
|
280
|
+
return this.gc( null, cond );
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// “go if precedence condition fails”
|
|
284
|
+
gp( state, prec, mode ) {
|
|
285
|
+
if (this.conditionTokenIdx === this.tokenIdx &&
|
|
286
|
+
this.conditionStackLength === this.stack.length) {
|
|
287
|
+
this._tracePush( [ 'C' ] );
|
|
288
|
+
return true; // error recovery: ignore condition
|
|
289
|
+
}
|
|
290
|
+
this.conditionTokenIdx = this.tokenIdx;
|
|
291
|
+
this.conditionStackLength = this.stack.length;
|
|
292
|
+
const parentPrec = this.stack.at( -1 ).prec ?? -Infinity;
|
|
293
|
+
const fail = prec <= parentPrec ||
|
|
294
|
+
this.prec_ != null && // previous op parsed by current rule
|
|
295
|
+
// <…,postfix> || <…,assoc=none>, <…,postfix=once>:
|
|
296
|
+
(mode === 'post' && prec > this.prec_ || mode === 'none' && prec >= this.prec_);
|
|
297
|
+
if (this.constructor.tracingParser) {
|
|
298
|
+
const pp = (parentPrec === -Infinity) ? '-∞' : parentPrec;
|
|
299
|
+
const tp = (this.prec_ == null) ? '∞' : this.prec_;
|
|
300
|
+
const suffix = mode === 'post' && `≤${ tp }` || mode === 'none' && `<${ tp }`;
|
|
301
|
+
this._tracePush( [ 'C', `${ pp }<${ prec }${ suffix || '' }`, !fail ] );
|
|
302
|
+
}
|
|
303
|
+
if (fail) { // TODO: extra gcK() method instead of check below
|
|
304
|
+
// TODO: extra method necessary for academic case
|
|
305
|
+
// ( 'unreserved' 'foo' | <cond> Id 'bar' )` with input `unreserved bar`
|
|
306
|
+
const { keyword } = this.la();
|
|
307
|
+
if (keyword && this.table[this.s][keyword])
|
|
308
|
+
this.fixKeywordTokenIdx = this.tokenIdx;
|
|
309
|
+
return this.g( state ) && false; // TODO: reset this.prec_ ?
|
|
310
|
+
}
|
|
311
|
+
this.prec_ = (mode === 'right') ? prec - 1 : prec; // -1: <…,assoc=right>, <…,prefix>
|
|
312
|
+
return true;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
ep( prec, mode ) {
|
|
316
|
+
return this.gp( null, prec, mode );
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// rule start, end and call: --------------------------------------------------
|
|
320
|
+
|
|
321
|
+
rule_( state, followState = -1 ) { // start rule
|
|
322
|
+
this.stack.push( {
|
|
323
|
+
ruleState: state,
|
|
324
|
+
followState,
|
|
325
|
+
tokenIdx: this.tokenIdx,
|
|
326
|
+
prec: this.prec_,
|
|
327
|
+
} );
|
|
328
|
+
this.dynamic_ = Object.create( this.dynamic_ );
|
|
329
|
+
this.s = state;
|
|
330
|
+
this.prec_ = null;
|
|
331
|
+
this.conditionTokenIdx = -1;
|
|
332
|
+
this.errorState ??= state;
|
|
333
|
+
this._trace( [ state, 'call rule', '', ' at alt start', -1 ] );
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
exit_( rulePrecMethod ) { // exit rule
|
|
337
|
+
if (this.s)
|
|
338
|
+
throw Error( `this.s === ${ this.s } // illegally set by action, or runtime/generator bug` );
|
|
339
|
+
this.dynamic_ = Object.getPrototypeOf( this.dynamic_ );
|
|
340
|
+
const caller = this.stack.pop();
|
|
341
|
+
this.s = caller.followState;
|
|
342
|
+
this.prec_ = (rulePrecMethod) ? this[rulePrecMethod]( caller ) : caller.prec;
|
|
343
|
+
this._trace( [ caller.ruleState, 'exit rule', '', '', 1 ] );
|
|
344
|
+
//if (this.errorState == 0 || this.s != null)
|
|
345
|
+
this.errorState = this.s;
|
|
346
|
+
// execute actions if not in error recovery (pass-through) and at least one
|
|
347
|
+
// token has been matched in rule:
|
|
348
|
+
return this.s != null && this.tokenIdx > caller.tokenIdx;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// predicate used before rule call if with LL(1) conflict, 'Id' in other case
|
|
352
|
+
lP( first2 ) { // only start rule if this predicate returns true
|
|
353
|
+
// nothing to check if not a non-reserved keyword:
|
|
354
|
+
const { keyword: lk1 } = this.tokens[this.tokenIdx];
|
|
355
|
+
if (!lk1 || !this.keywords[lk1])
|
|
356
|
+
return true;
|
|
357
|
+
|
|
358
|
+
const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
|
|
359
|
+
// Argument first2 is just a performance hint with ckP():
|
|
360
|
+
if (lk2 && first2?.[0] === 'Id' && this.keywords[lk2] !== false ||
|
|
361
|
+
first2?.includes( lk2 || lt2 )) {
|
|
362
|
+
this._tracePush( [ 'P', true ] );
|
|
363
|
+
return true;
|
|
364
|
+
}
|
|
365
|
+
this._tracePush( [ 'P' ] );
|
|
366
|
+
// now check it dynamically:
|
|
367
|
+
let cmd = this.table[this.s][lk1];
|
|
368
|
+
if (cmd[2] !== 1)
|
|
369
|
+
throw Error( `Unexpected command '${ cmd?.[0] }' without prediction at state ${ this.s } for ‘${ lk1 }’` );
|
|
370
|
+
|
|
371
|
+
// if not the keyword match, the command is “goto” or “rule call”
|
|
372
|
+
const nextState = (cmd[0] === 'ck') ? cmd[1] : this._pred_keyword( cmd[1], lk1 );
|
|
373
|
+
|
|
374
|
+
++this.tokenIdx; // for user lookahead fns and conditions
|
|
375
|
+
const match = this._pred_next( nextState, lt2, lk2, 'P' );
|
|
376
|
+
--this.tokenIdx;
|
|
377
|
+
|
|
378
|
+
const r = match ?? true;
|
|
379
|
+
if (match == null)
|
|
380
|
+
this._traceSubPush( 0 );
|
|
381
|
+
if (lt2 === 'IllegalToken')
|
|
382
|
+
return true
|
|
383
|
+
// TODO: instead of this IllegalToken test, implement a “confirm unreserved
|
|
384
|
+
// keyword as Id” prediction which tests whether the token after the then-Id
|
|
385
|
+
// matches.
|
|
386
|
+
this._traceSubPush( r );
|
|
387
|
+
if (!r)
|
|
388
|
+
this.nextTokenAsId = true;
|
|
389
|
+
return r;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Now the helper methods =====================================================
|
|
393
|
+
|
|
394
|
+
// Standard weak-conflict predicate -------------------------------------------
|
|
395
|
+
|
|
396
|
+
_pred_keyword( state, keyword ) {
|
|
397
|
+
// returns state after matching the first token as keyword, for lP()
|
|
398
|
+
while (state) {
|
|
399
|
+
this._traceSubPush( state );
|
|
400
|
+
let cmd = this.table[state];
|
|
401
|
+
if (!Array.isArray( cmd ))
|
|
402
|
+
cmd = cmd[keyword] || cmd.Id || cmd[''];
|
|
403
|
+
switch (cmd[0]) {
|
|
404
|
+
case 'ck': case 'mk':
|
|
405
|
+
return cmd[1]; // state after token consumption
|
|
406
|
+
case 'g': // TODO: another rule call?
|
|
407
|
+
break;
|
|
408
|
+
default:
|
|
409
|
+
if (typeof cmd[0] !== 'number')
|
|
410
|
+
throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
|
|
411
|
+
}
|
|
412
|
+
state = cmd[1];
|
|
413
|
+
}
|
|
414
|
+
// reached end of rule without having consumed a token
|
|
415
|
+
throw Error( 'Not supported: option for unreserved keywords in follow set' );
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
_pred_next( state, type, keyword, mode ) {
|
|
419
|
+
let hasEnteredRule = false;
|
|
420
|
+
while (state) {
|
|
421
|
+
this._traceSubPush( state );
|
|
422
|
+
let cmd = this.table[state];
|
|
423
|
+
if (!Array.isArray( cmd )) {
|
|
424
|
+
const lookahead = cmd[' lookahead'];
|
|
425
|
+
cmd = lookahead
|
|
426
|
+
? cmd[this[lookahead]( mode )] || cmd['']
|
|
427
|
+
: keyword && cmd[keyword] || cmd[type] || cmd[''];
|
|
428
|
+
}
|
|
429
|
+
switch (cmd[0]) {
|
|
430
|
+
case 'c': case 'ck': case 'ciA': case 'ckA': // TODO: re-check ckA
|
|
431
|
+
return true;
|
|
432
|
+
case 'ci':
|
|
433
|
+
if (!keyword ||
|
|
434
|
+
this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx)
|
|
435
|
+
return true;
|
|
436
|
+
cmd = this.table[state]['']; // is currently always 'g' or 'e'
|
|
437
|
+
break;
|
|
438
|
+
case 'm':
|
|
439
|
+
return type === cmd[2];
|
|
440
|
+
case 'mi':
|
|
441
|
+
return type === 'Id' &&
|
|
442
|
+
(!keyword ||
|
|
443
|
+
this.keywords[keyword] !== false && this.fixKeywordTokenIdx !== this.tokenIdx);
|
|
444
|
+
case 'miA':
|
|
445
|
+
return type === 'Id';
|
|
446
|
+
case 'mk':
|
|
447
|
+
return keyword === cmd[2];
|
|
448
|
+
case 'g': case 'e':
|
|
449
|
+
break;
|
|
450
|
+
default:
|
|
451
|
+
if (typeof cmd[0] !== 'number')
|
|
452
|
+
throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` );
|
|
453
|
+
// If the parser enters a rule, reaching the rule end (can happen with
|
|
454
|
+
// option `minTokensMatched`) means "no match".
|
|
455
|
+
hasEnteredRule = true;
|
|
456
|
+
// If we want to support conditions before matching the first token in a
|
|
457
|
+
// rule, we would have to handle `this.stack` and `this.dynamically_`.
|
|
458
|
+
}
|
|
459
|
+
// We could optimize with rule call - only 'Id' must be further investigated
|
|
460
|
+
state = cmd[1];
|
|
461
|
+
}
|
|
462
|
+
// If invalid state, the second token does not match, e.g. for `VIRTUAL +`
|
|
463
|
+
// or `VIRTUAL §` (with IllegalToken):
|
|
464
|
+
if (state == null)
|
|
465
|
+
return false;
|
|
466
|
+
|
|
467
|
+
// Otherwise, the parser could end the rule after having matched the keyword
|
|
468
|
+
// with prediction. TODO: as we do not look behind the current rule for the
|
|
469
|
+
// prediction, the tool can normally omit the prediction (and output a
|
|
470
|
+
// message), no so with `ruleStartingWithUnreserved`. We will rather look
|
|
471
|
+
// behind the current rule _after_ having decided that the token is to be
|
|
472
|
+
// matched as identifier.
|
|
473
|
+
return !hasEnteredRule && null; // let caller decide how to interpret this
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
_keyword_after_rule( keyword ) {
|
|
477
|
+
// TODO: this is a slow implementation - do dedicated traversal later
|
|
478
|
+
// It is used in giR() only and this is currently used just once.
|
|
479
|
+
// TODO: using mode = 'R' and tracing R(…)
|
|
480
|
+
// TODO: investigate why this was not written before adding
|
|
481
|
+
// `<default=fallback>` in rule `fromRefWithOptAlias`.
|
|
482
|
+
return this._expecting()[keyword];
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// Set of expected tokens: for error reporting and recovery -------------------
|
|
486
|
+
|
|
487
|
+
// Calculate array of expected tokens
|
|
488
|
+
_expecting( errorState, length ) {
|
|
489
|
+
// Remark: rules must not have been exited too early, see _expecting call in re()
|
|
490
|
+
const stack = this.stack.slice( 0, length || this.stack.length );
|
|
491
|
+
// Immediately exit rules when no tokens have yet been consumed:
|
|
492
|
+
let caller = stack.at( -1 );
|
|
493
|
+
while (stack.length && this.tokenIdx === caller.tokenIdx) {
|
|
494
|
+
--stack.length;
|
|
495
|
+
caller = stack.at( -1 );
|
|
496
|
+
}
|
|
497
|
+
// Now calculate dictionary of expected tokens:
|
|
498
|
+
const expecting = Object.create(null);
|
|
499
|
+
let state = errorState ?? this.errorState;
|
|
500
|
+
// At potential rule end, we must add follow sets of outer rules
|
|
501
|
+
// TODO: we also need to unravel this.dynamic_ for translateParserToken_()
|
|
502
|
+
while ((!state || this._exp_collect( expecting, this.table[state] )) && stack.length)
|
|
503
|
+
state = stack.pop().followState;
|
|
504
|
+
return expecting;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// TODO: use iterative alg, no recursive call, return state instead -----------
|
|
508
|
+
// Add expected tokens to dictionary `expecting` starting at command `cmd`.
|
|
509
|
+
// Return true if the rule end is reached, i.e. we also need to add the expected
|
|
510
|
+
// tokens at the follow state of the current rule. Argument `prop` is the token
|
|
511
|
+
// name for `cmd` in a decision.
|
|
512
|
+
_exp_collect( expecting, cmd, prop ) {
|
|
513
|
+
if (prop != null)
|
|
514
|
+
cmd = cmd[prop];
|
|
515
|
+
if (!Array.isArray( cmd )) {
|
|
516
|
+
let reachedRuleEnd = false;
|
|
517
|
+
for (const tok in cmd) {
|
|
518
|
+
if (Object.hasOwn( cmd, tok ) && tok.charAt(0) !== ' ' &&
|
|
519
|
+
this._exp_collect( expecting, cmd, tok ))
|
|
520
|
+
reachedRuleEnd = true;
|
|
521
|
+
}
|
|
522
|
+
return reachedRuleEnd;
|
|
523
|
+
}
|
|
524
|
+
switch (cmd[0]) {
|
|
525
|
+
case 'c': case 'ck':
|
|
526
|
+
expecting[prop] = true;
|
|
527
|
+
return false;
|
|
528
|
+
case 'ckA':
|
|
529
|
+
for (const tok of this.translateParserToken_( prop ))
|
|
530
|
+
expecting[tok] = true;
|
|
531
|
+
return false;
|
|
532
|
+
case 'm': case 'mk':
|
|
533
|
+
expecting[cmd[2]] = true;
|
|
534
|
+
return false;
|
|
535
|
+
case 'ci': case 'ciA': case 'mi': case 'miA':
|
|
536
|
+
expecting['Id'] = true;
|
|
537
|
+
return false;
|
|
538
|
+
case 'g': case 'gi':
|
|
539
|
+
if (!cmd[1])
|
|
540
|
+
return cmd[1] === 0;
|
|
541
|
+
//(this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx);
|
|
542
|
+
// TODO: add some assertion in generation that a decision has no two `g`s
|
|
543
|
+
// to the same state (both in cases and default)
|
|
544
|
+
// UPDATE: no, there will be at least gP()s
|
|
545
|
+
// TOOD: do properly for (...)+ - currently, the token for directly
|
|
546
|
+
// exiting the rule is also collected
|
|
547
|
+
return this._exp_collect( expecting, this.table[cmd[1]] );
|
|
548
|
+
default:
|
|
549
|
+
// a called rule must match at least one token → after having called a
|
|
550
|
+
// rule, do not collect expecting tokens after exiting the rule
|
|
551
|
+
if (typeof cmd[0] === 'number')
|
|
552
|
+
this._exp_collect( expecting, this.table[cmd[1]] );
|
|
553
|
+
return false;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
translateParserToken_( token ) {
|
|
558
|
+
return [ token ];
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Error recovery -------------------------------------------------------------
|
|
562
|
+
|
|
563
|
+
_recoverInline( expecting ) {
|
|
564
|
+
const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1];
|
|
565
|
+
if (!(lk2 && expecting[lk2] || expecting[lt2]))
|
|
566
|
+
return false;
|
|
567
|
+
|
|
568
|
+
// Immediately exit rules (except start) when no tokens have yet been consumed:
|
|
569
|
+
let { length } = this.stack;
|
|
570
|
+
while (--length > 0) {
|
|
571
|
+
const caller = this.stack[length];
|
|
572
|
+
// matched tokens in rule: found rule
|
|
573
|
+
if (this.tokenIdx > caller.tokenIdx)
|
|
574
|
+
break;
|
|
575
|
+
caller.followState = null;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
if (++length < this.stack.length) {
|
|
579
|
+
this.s = null;
|
|
580
|
+
this.stack[length].followState = this.errorState;
|
|
581
|
+
// assume the erroneous token to be skipped before having called the rule:
|
|
582
|
+
++this.stack[length].tokenIdx
|
|
583
|
+
this.errorState = null;
|
|
584
|
+
}
|
|
585
|
+
else { // no rule to leave immediately
|
|
586
|
+
this.s = this.errorState;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
this.skipToken_();
|
|
590
|
+
if (this.constructor.tracingParser)
|
|
591
|
+
this._trace( [ this.stack[length - 1].ruleState, 'recover inside rule' ] );
|
|
592
|
+
return true; // to be re-checked with actions
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
_recoverPanicMode() {
|
|
596
|
+
const { length } = this.stack;
|
|
597
|
+
// Panic mode: resume at token in then-expecting set:
|
|
598
|
+
const followSets = { EOF: 0 };
|
|
599
|
+
for (let idx = 0; idx < length; ++idx) {
|
|
600
|
+
const caller = this.stack[idx];
|
|
601
|
+
const exp = this._expecting( caller.followState, length );
|
|
602
|
+
for (const t of Object.keys( exp )) {
|
|
603
|
+
// no sync to 'Id' - TODO: provide grammar and rule options
|
|
604
|
+
if (t !== 'Id') // TODO: see below
|
|
605
|
+
followSets[t] = idx;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
const tokenIdx = this.tokenIdx;
|
|
609
|
+
// console.log( this.la().location.toString(), followSets )
|
|
610
|
+
while (this.tokenIdx <= this.eofIndex) {
|
|
611
|
+
// TODO: exclude reserved words for test with this.l()
|
|
612
|
+
const depth = followSets[this.lk()] || followSets[this.l()];
|
|
613
|
+
// TODO: handle Id here
|
|
614
|
+
if (depth != null)
|
|
615
|
+
return this._error_panic( depth, length, tokenIdx );
|
|
616
|
+
this.skipToken_();
|
|
617
|
+
}
|
|
618
|
+
throw Error( 'EOF was added...' );
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
_error_panic( low, high, tokenIdx ) {
|
|
622
|
+
this.s = null; // mark current rule for exit
|
|
623
|
+
if (this.constructor.tracingParser) {
|
|
624
|
+
this._trace( this.stack.length - 1 > low
|
|
625
|
+
? `recover by exiting ${ this.stack.length - low} rules prematurely,`
|
|
626
|
+
: 'recover by exiting current rule prematurely,' );
|
|
627
|
+
}
|
|
628
|
+
// eventually mark outer rules for exit:
|
|
629
|
+
// TODO: re-check for rule calls which are at the optional rule end:
|
|
630
|
+
// x: 'x not'; b: 'b'? x {console.log('x→b')} 'b'?; a: b {console.log('b→a')} 'a'
|
|
631
|
+
// with start rule `a` and input `x a`: output should be x→b + b→a
|
|
632
|
+
// with start rule `a` and input `b a`: output should be b→a
|
|
633
|
+
//
|
|
634
|
+
// → the rule is: if a rule can continue at the specified state and has
|
|
635
|
+
// matched at least one token, then its action is executed, otherwise not
|
|
636
|
+
for (let idx = low + 1; idx < high; ++idx) {
|
|
637
|
+
this.stack[idx].followState = null;
|
|
638
|
+
}
|
|
639
|
+
const resume = this.stack[low];
|
|
640
|
+
if (tokenIdx === resume.tokenIdx) // no tokens matched other than those by skipping
|
|
641
|
+
resume.tokenIdx = this.tokenIdx; // make exit_() return false
|
|
642
|
+
this.errorState = null;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
_stopParsing( idx ) {
|
|
646
|
+
if (this.constructor.tracingParser) {
|
|
647
|
+
this.log( this.la().location.toString() + ':', 'Info:',
|
|
648
|
+
`leave all active ${ idx } rules prematurely, stop parsing` );
|
|
649
|
+
}
|
|
650
|
+
// TODO: run this.skipToken_() on all remaining tokens? Does ANTLR consumes
|
|
651
|
+
// those in error recovery mode? Probably not.
|
|
652
|
+
for (const c of this.stack)
|
|
653
|
+
c.followState = null;
|
|
654
|
+
this.errorState = null;
|
|
655
|
+
this.s = null;
|
|
656
|
+
return false;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
// small methods --------------------------------------------------------------
|
|
660
|
+
|
|
661
|
+
log( ...args ) {
|
|
662
|
+
console.log( ...args );
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
expectingForMessage_( sep = ',' ) {
|
|
666
|
+
return Object.keys( this._expecting() ).map( tokenName ).sort().join( sep );
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
reportError_( location, text ) {
|
|
670
|
+
this.$hasErrors = true;
|
|
671
|
+
this.log( `${ location }: Error:`, text );
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
reportUnexpectedToken_( token ) {
|
|
675
|
+
this.reportError_( token.location,
|
|
676
|
+
`unexpected token ${ tokenFullName( token, ': ' ) } - expecting: ` +
|
|
677
|
+
this.expectingForMessage_() );
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
reportReservedWord_( token ) {
|
|
681
|
+
this.reportError_( token.location,
|
|
682
|
+
`unexpected reserved word ‘${ token.text }’ - expecting: ` +
|
|
683
|
+
this.expectingForMessage_() );
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
errorAndRecoverOutside( token, text ) { // TODO: re-check
|
|
687
|
+
this.reportError_( token.location, text );
|
|
688
|
+
++this.tokenIdx;
|
|
689
|
+
return this._recoverPanicMode( this.stack.length );
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
_tracePush( state ) {
|
|
693
|
+
if (this.constructor.tracingParser)
|
|
694
|
+
this.trace.push( state ?? '⚠' );
|
|
695
|
+
}
|
|
696
|
+
_traceSubPush( state ) {
|
|
697
|
+
if (this.constructor.tracingParser)
|
|
698
|
+
this.trace.at(-1).push( state );
|
|
699
|
+
}
|
|
700
|
+
traceAction( location ) { // will be put into tracing parser
|
|
701
|
+
this._trace( 'execute action,', { location } );
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
_trace( msg, la ) {
|
|
705
|
+
if (!this.constructor.tracingParser)
|
|
706
|
+
return;
|
|
707
|
+
if (Array.isArray( msg ))
|
|
708
|
+
msg = this._rule( ...msg );
|
|
709
|
+
this.trace.push( this.s ?? '⚠' );
|
|
710
|
+
this.log( (la || this.la()).location.toString() + ':',
|
|
711
|
+
'Info:', msg, 'states:', this.trace.map( traceStep ).join( ' → ' ) );
|
|
712
|
+
this.trace = [ this.s ?? '⚠' ];
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// TODO: rename to ruleName_, leaving out the msg stuff
|
|
716
|
+
_rule( state, msg, post = '', postOther = post, depthDiff ) {
|
|
717
|
+
const start = --state;
|
|
718
|
+
while (typeof this.table[state] !== 'string')
|
|
719
|
+
--state;
|
|
720
|
+
const { length } = this.stack;
|
|
721
|
+
const depth = depthDiff ? `, depth ${ length + depthDiff } → ${ length }` : '';
|
|
722
|
+
return `${ msg } “${ this.table[state] }”${ state < start ? postOther : post }${ depth },`;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
inSameRule_( lowState, highState ) {
|
|
726
|
+
if (lowState > highState)
|
|
727
|
+
[ lowState, highState ] = [ highState, lowState ];
|
|
728
|
+
while (lowState < highState) {
|
|
729
|
+
if (typeof this.table[++lowState] === 'string') // rule boundary
|
|
730
|
+
return false;
|
|
731
|
+
}
|
|
732
|
+
return true;
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
function traceStep( step ) {
|
|
738
|
+
if (!Array.isArray( step ))
|
|
739
|
+
return step;
|
|
740
|
+
const result = { true: '✔', false: '✖' }[step.at( -1 )] ?? '';
|
|
741
|
+
const intro = (typeof step[1] === 'number') ? '→' : '';
|
|
742
|
+
const arg = step.slice( 1, result ? -1 : undefined ).join( '→' );
|
|
743
|
+
return `${ step[0] }(${ intro }${ arg })${ result }`;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
function tokenName( type ) {
|
|
747
|
+
if (typeof type !== 'string')
|
|
748
|
+
type = (!type.parsedAs || type.parsedAs === 'keyword') && type.keyword || type.type;
|
|
749
|
+
return (/^[A-Z]+/.test( type )) ? `‹${ type }›` : `‘${ type }’`;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
function tokenFullName( token, sep ) {
|
|
753
|
+
return (token.parsedAs && token.parsedAs !== 'keyword' && token.parsedAs !== 'token' ||
|
|
754
|
+
token.type !== 'Id' && token.type !== token.text && token.text)
|
|
755
|
+
? `‘${ token.text }’${ sep }${ tokenName( token ) }`
|
|
756
|
+
: tokenName( token );
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
function compileTable( table ) {
|
|
760
|
+
if (table.$compiled)
|
|
761
|
+
return table;
|
|
762
|
+
for (const line of table) {
|
|
763
|
+
if (typeof line !== 'object' || Array.isArray( line ))
|
|
764
|
+
continue;
|
|
765
|
+
const cache = Object.create( null ); // very sparse array
|
|
766
|
+
for (const prop of Object.keys( line )) {
|
|
767
|
+
const alt = line[prop];
|
|
768
|
+
if (!Array.isArray( alt ) && prop.charAt(0) !== ' ') // string or number
|
|
769
|
+
line[prop] = (typeof alt === 'string') ? line[alt] : (cache[alt] ??= [ 'g', alt ]);
|
|
770
|
+
}
|
|
771
|
+
if (!line[''])
|
|
772
|
+
line[''] = [ 'e' ];
|
|
773
|
+
}
|
|
774
|
+
table.$compiled = true;
|
|
775
|
+
return table;
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
module.exports = BaseParser;
|