wikipeg 4.0.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HISTORY.md +556 -0
- package/README.md +230 -12
- package/VERSION +1 -1
- package/bin/wikipeg +8 -4
- package/examples/css.pegphp +9 -8
- package/lib/compiler/asts.js +30 -10
- package/lib/compiler/charsets.js +306 -0
- package/lib/compiler/language/javascript.js +107 -33
- package/lib/compiler/language/php.js +193 -55
- package/lib/compiler/passes/analyze-always-match.js +141 -0
- package/lib/compiler/passes/analyze-first.js +245 -0
- package/lib/compiler/passes/ast-to-code.js +316 -100
- package/lib/compiler/passes/inline-simple-rules.js +96 -0
- package/lib/compiler/passes/optimize-character-class.js +147 -0
- package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
- package/lib/compiler/passes/remove-proxy-rules.js +7 -5
- package/lib/compiler/passes/report-infinite-loops.js +4 -1
- package/lib/compiler/passes/report-left-recursion.js +3 -4
- package/lib/compiler/passes/report-unknown-attributes.js +39 -0
- package/lib/compiler/passes/transform-common-lang.js +1 -1
- package/lib/compiler/traverser.js +1 -2
- package/lib/compiler/visitor.js +5 -7
- package/lib/compiler.js +24 -10
- package/lib/parser.js +2784 -3088
- package/lib/peg.js +7 -15
- package/lib/runtime/template.js +9 -1
- package/lib/utils/CaseFolding.txt +1654 -0
- package/lib/utils/arrays.js +0 -72
- package/lib/utils/casefold.js +697 -0
- package/lib/utils/objects.js +9 -39
- package/lib/utils/unicode.js +34 -0
- package/package.json +6 -4
- package/src/DefaultTracer.php +18 -18
- package/src/PEGParserBase.php +53 -28
- package/src/SyntaxError.php +4 -4
- package/src/Tracer.php +1 -1
- package/lib/compiler/opcodes.js +0 -54
|
@@ -43,6 +43,7 @@ let php = {
|
|
|
43
43
|
maxFailPos: '$this->maxFailPos',
|
|
44
44
|
assertionSuccess: 'false',
|
|
45
45
|
inputLength: '$this->inputLength',
|
|
46
|
+
advanceInputChar: 'self::advanceChar($this->input, $this->currPos);',
|
|
46
47
|
consumeInputChar: 'self::consumeChar($this->input, $this->currPos);',
|
|
47
48
|
result: '$result',
|
|
48
49
|
actionArgPrefix: '$',
|
|
@@ -233,7 +234,20 @@ let php = {
|
|
|
233
234
|
return escapedChars.join('');
|
|
234
235
|
},
|
|
235
236
|
|
|
236
|
-
|
|
237
|
+
classToRegexp(node) {
|
|
238
|
+
return '['
|
|
239
|
+
+ (node.inverted ? '^' : '')
|
|
240
|
+
+ node.parts.map(function(part) {
|
|
241
|
+
return part instanceof Array
|
|
242
|
+
? php.regexpClassEscape(part[0])
|
|
243
|
+
+ '-'
|
|
244
|
+
+ php.regexpClassEscape(part[1])
|
|
245
|
+
: php.regexpClassEscape(part);
|
|
246
|
+
}).join('')
|
|
247
|
+
+ ']';
|
|
248
|
+
},
|
|
249
|
+
|
|
250
|
+
matchLiteral(node, reg, result, discard, discardPos) {
|
|
237
251
|
let literalLength = getUtf8Length(node.value);
|
|
238
252
|
let escapedValue = php.stringify(node.value);
|
|
239
253
|
|
|
@@ -241,9 +255,11 @@ let php = {
|
|
|
241
255
|
if (literalLength === 1 && !node.ignoreCase) {
|
|
242
256
|
result.condition = `($this->input[$this->currPos] ?? null) === ${escapedValue}`;
|
|
243
257
|
result.onSuccess([
|
|
244
|
-
`$
|
|
245
|
-
`${reg} = ${php.stringify(node.value)};`
|
|
258
|
+
`${reg} = ${discard ? 'true' : php.stringify(node.value)};`
|
|
246
259
|
]);
|
|
260
|
+
if (!discardPos) {
|
|
261
|
+
result.onSuccess([`$this->currPos++;`]);
|
|
262
|
+
}
|
|
247
263
|
return;
|
|
248
264
|
}
|
|
249
265
|
|
|
@@ -266,15 +282,18 @@ let php = {
|
|
|
266
282
|
].join(', ') + ') === 0';
|
|
267
283
|
if (node.ignoreCase) {
|
|
268
284
|
result.onSuccess([
|
|
269
|
-
|
|
270
|
-
|
|
285
|
+
discard ?
|
|
286
|
+
`${reg} = true;` :
|
|
287
|
+
`${reg} = substr($this->input, $this->currPos, ${literalLength});`
|
|
271
288
|
]);
|
|
272
289
|
} else {
|
|
273
290
|
result.onSuccess([
|
|
274
|
-
`${reg} = ${php.stringify(node.value)};`,
|
|
275
|
-
`$this->currPos += ${literalLength};`
|
|
291
|
+
`${reg} = ${discard ? 'true' : php.stringify(node.value)};`,
|
|
276
292
|
]);
|
|
277
293
|
}
|
|
294
|
+
if (!discardPos) {
|
|
295
|
+
result.onSuccess([`$this->currPos += ${literalLength};`]);
|
|
296
|
+
}
|
|
278
297
|
return;
|
|
279
298
|
}
|
|
280
299
|
|
|
@@ -290,25 +309,13 @@ let php = {
|
|
|
290
309
|
result.block.push(`${reg} = self::charAt($this->input, $this->currPos);`);
|
|
291
310
|
}
|
|
292
311
|
result.condition = `mb_strtolower(${reg}) === ${php.stringify(node.value.toLowerCase())}`;
|
|
293
|
-
|
|
312
|
+
if (!discardPos) {
|
|
313
|
+
result.onSuccess([`$this->currPos += strlen(${reg});`]);
|
|
314
|
+
}
|
|
294
315
|
},
|
|
295
316
|
|
|
296
|
-
|
|
317
|
+
analyzeClass(node) {
|
|
297
318
|
let parts = node.parts;
|
|
298
|
-
|
|
299
|
-
// Empty class
|
|
300
|
-
if (node.parts.length === 0) {
|
|
301
|
-
if (node.inverted) {
|
|
302
|
-
// Same as .
|
|
303
|
-
result.condition = '$this->currPos < $this->inputLength';
|
|
304
|
-
result.onSuccess([`${reg} = self::consumeChar($this->input, $this->currPos);`]);
|
|
305
|
-
} else {
|
|
306
|
-
// Always fail
|
|
307
|
-
result.condition = 'false';
|
|
308
|
-
}
|
|
309
|
-
return;
|
|
310
|
-
}
|
|
311
|
-
|
|
312
319
|
// Analyze for the potential special case of a class composed of individual
|
|
313
320
|
// characters
|
|
314
321
|
let hasRanges = false;
|
|
@@ -338,10 +345,119 @@ let php = {
|
|
|
338
345
|
}
|
|
339
346
|
}
|
|
340
347
|
}
|
|
348
|
+
return { hasRanges: hasRanges, hasNonAscii: hasNonAscii, chars: chars };
|
|
349
|
+
},
|
|
350
|
+
|
|
351
|
+
matchRepeatedClass(node, reg, result, atLeastOne, discard, discardPos) {
|
|
352
|
+
if (node.parts.length === 0) {
|
|
353
|
+
if (node.inverted) {
|
|
354
|
+
// Same as .* / .+
|
|
355
|
+
result.condition = atLeastOne ? '$this->currPos < $this->inputLength' : 'true';
|
|
356
|
+
if (!discard) {
|
|
357
|
+
result.onSuccess([`${reg} = mb_str_split(substr($this->input, $this->currPos), 1, 'utf-8');`]);
|
|
358
|
+
}
|
|
359
|
+
if (!discardPos) {
|
|
360
|
+
result.onSuccess([`$this->currPos = $this->inputLength;`]);
|
|
361
|
+
}
|
|
362
|
+
} else if (atLeastOne) {
|
|
363
|
+
// Always fail
|
|
364
|
+
result.condition = 'false';
|
|
365
|
+
} else {
|
|
366
|
+
// Zero length match
|
|
367
|
+
result.condition = 'true';
|
|
368
|
+
result.onSuccess([`${reg} = [];`]);
|
|
369
|
+
}
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
let {hasRanges,hasNonAscii,chars} = php.analyzeClass(node);
|
|
373
|
+
|
|
374
|
+
// ASCII character lists can be done with strspn/strcspn
|
|
375
|
+
if (!hasRanges && !hasNonAscii) {
|
|
376
|
+
if (node.inverted) {
|
|
377
|
+
result.block.push(`${reg} = strcspn($this->input, ${php.stringify(chars.join(''))}, $this->currPos);`);
|
|
378
|
+
} else {
|
|
379
|
+
result.block.push(`${reg} = strspn($this->input, ${php.stringify(chars.join(''))}, $this->currPos);`);
|
|
380
|
+
}
|
|
381
|
+
result.condition = atLeastOne ? `${reg} > 0` : "true";
|
|
382
|
+
if (discard) {
|
|
383
|
+
if (!discardPos) {
|
|
384
|
+
result.onSuccess([`$this->currPos += ${reg};`]);
|
|
385
|
+
}
|
|
386
|
+
} else {
|
|
387
|
+
// Note that on PHP <= 8.1, str_split('') returns [''] not [], so only
|
|
388
|
+
// use it if if we're guaranteed at least one match.
|
|
389
|
+
if (!discardPos) {
|
|
390
|
+
result.onSuccess([
|
|
391
|
+
`$this->currPos += ${reg};`,
|
|
392
|
+
`${reg} = substr($this->input, $this->currPos - ${reg}, ${reg});`,
|
|
393
|
+
]);
|
|
394
|
+
} else {
|
|
395
|
+
result.onSuccess([
|
|
396
|
+
`${reg} = substr($this->input, $this->currPos, ${reg});`
|
|
397
|
+
]);
|
|
398
|
+
}
|
|
399
|
+
result.onSuccess([
|
|
400
|
+
hasNonAscii || node.inverted || (!atLeastOne) ?
|
|
401
|
+
`${reg} = mb_str_split(${reg}, 1, "utf-8");` :
|
|
402
|
+
`${reg} = str_split(${reg});`,
|
|
403
|
+
]);
|
|
404
|
+
}
|
|
405
|
+
return;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Otherwise we shall construct a regex
|
|
409
|
+
let regexp = '/'
|
|
410
|
+
+ php.classToRegexp(node)
|
|
411
|
+
+ (atLeastOne ? '+' : '*')+'/A'
|
|
412
|
+
+ (node.ignoreCase ? 'i' : '')
|
|
413
|
+
+ (hasNonAscii ? 'u' : '');
|
|
414
|
+
result.block.push(`${reg} = null;`);
|
|
415
|
+
result.condition = `preg_match(${php.stringify(regexp)}, $this->input, ${reg}, 0, $this->currPos)`;
|
|
416
|
+
if (!discardPos) {
|
|
417
|
+
result.onSuccess([`$this->currPos += strlen(${reg}[0]);`]);
|
|
418
|
+
}
|
|
419
|
+
if (discard) {
|
|
420
|
+
// free the match result array
|
|
421
|
+
result.onSuccess([`${reg} = true;`]);
|
|
422
|
+
} else {
|
|
423
|
+
// See above: str_split() is only safe to use if at least one match.
|
|
424
|
+
if (hasNonAscii || node.inverted || (!atLeastOne)) {
|
|
425
|
+
result.onSuccess([`${reg} = mb_str_split(${reg}[0], 1, "utf-8");`]);
|
|
426
|
+
} else {
|
|
427
|
+
result.onSuccess([`${reg} = str_split(${reg}[0]);`]);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
},
|
|
431
|
+
|
|
432
|
+
matchClass(node, reg, result, discard, discardPos) {
|
|
433
|
+
|
|
434
|
+
// Empty class
|
|
435
|
+
if (node.parts.length === 0) {
|
|
436
|
+
if (node.inverted) {
|
|
437
|
+
// Same as .
|
|
438
|
+
result.condition = '$this->currPos < $this->inputLength';
|
|
439
|
+
if (discard) {
|
|
440
|
+
result.onSuccess([`${reg} = true;`]);
|
|
441
|
+
if (!discardPos) {
|
|
442
|
+
result.onSuccess([
|
|
443
|
+
`self::advanceChar($this->input, $this->currPos);`,
|
|
444
|
+
]);
|
|
445
|
+
}
|
|
446
|
+
} else {
|
|
447
|
+
result.onSuccess([`${reg} = self::consumeChar($this->input, $this->currPos);`]);
|
|
448
|
+
}
|
|
449
|
+
} else {
|
|
450
|
+
// Always fail
|
|
451
|
+
result.condition = 'false';
|
|
452
|
+
}
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
let {hasRanges,hasNonAscii,chars} = php.analyzeClass(node);
|
|
341
457
|
|
|
342
458
|
// Character lists can be done by getting the next character and comparing
|
|
343
459
|
// it sequentially or looking up in a hashtable
|
|
344
|
-
if (!hasRanges && (
|
|
460
|
+
if (!hasRanges && (node.parts.length <= 2 || php.config.preferClassHashtable)) {
|
|
345
461
|
if (hasNonAscii || node.inverted) {
|
|
346
462
|
result.block = [`${reg} = self::charAt($this->input, $this->currPos);`];
|
|
347
463
|
} else {
|
|
@@ -363,52 +479,69 @@ let php = {
|
|
|
363
479
|
if (node.inverted) {
|
|
364
480
|
result.condition = `${reg} !== '' && !(${result.condition})`;
|
|
365
481
|
}
|
|
366
|
-
if (
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
482
|
+
if (!discardPos) {
|
|
483
|
+
if (hasNonAscii || node.inverted) {
|
|
484
|
+
result.onSuccess([`$this->currPos += strlen(${reg});`]);
|
|
485
|
+
} else {
|
|
486
|
+
result.onSuccess([`$this->currPos++;`]);
|
|
487
|
+
}
|
|
370
488
|
}
|
|
371
489
|
return;
|
|
372
490
|
}
|
|
373
491
|
|
|
374
492
|
// ASCII character lists can be done with strspn/strcspn
|
|
375
|
-
if (!hasRanges) {
|
|
493
|
+
if (!(hasRanges || hasNonAscii)) {
|
|
376
494
|
if (node.inverted) {
|
|
377
495
|
result.condition = `strcspn($this->input, ${php.stringify(chars.join(''))}, `
|
|
378
496
|
+ '$this->currPos, 1) !== 0';
|
|
379
|
-
|
|
497
|
+
if (discard) {
|
|
498
|
+
result.onSuccess([`${reg} = true;`]);
|
|
499
|
+
if (!discardPos) {
|
|
500
|
+
result.onSuccess([`self::advanceChar($this->input, $this->currPos);`]);
|
|
501
|
+
}
|
|
502
|
+
} else {
|
|
503
|
+
result.onSuccess([`${reg} = self::consumeChar($this->input, $this->currPos);`]);
|
|
504
|
+
}
|
|
380
505
|
} else {
|
|
381
506
|
result.condition = `strspn($this->input, ${php.stringify(chars.join(''))}, `
|
|
382
507
|
+ '$this->currPos, 1) !== 0';
|
|
383
|
-
|
|
508
|
+
if (discard) {
|
|
509
|
+
result.onSuccess([ `${reg} = true;` ]);
|
|
510
|
+
} else {
|
|
511
|
+
result.onSuccess([`${reg} = $this->input[$this->currPos];`]);
|
|
512
|
+
}
|
|
513
|
+
if (!discardPos) {
|
|
514
|
+
result.onSuccess([`$this->currPos++;`]);
|
|
515
|
+
}
|
|
384
516
|
}
|
|
385
517
|
return;
|
|
386
518
|
}
|
|
387
519
|
|
|
388
520
|
// Otherwise we shall construct a regex
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
result.block = [`${reg} = $this->input[$this->currPos] ?? '';`];
|
|
393
|
-
}
|
|
394
|
-
let regexp = '/^['
|
|
395
|
-
+ (node.inverted ? '^' : '')
|
|
396
|
-
+ node.parts.map(function(part) {
|
|
397
|
-
return part instanceof Array
|
|
398
|
-
? php.regexpClassEscape(part[0])
|
|
399
|
-
+ '-'
|
|
400
|
-
+ php.regexpClassEscape(part[1])
|
|
401
|
-
: php.regexpClassEscape(part);
|
|
402
|
-
}).join('')
|
|
403
|
-
+ ']/'
|
|
521
|
+
let regexp = '/'
|
|
522
|
+
+ php.classToRegexp(node)
|
|
523
|
+
+ '/A'
|
|
404
524
|
+ (node.ignoreCase ? 'i' : '')
|
|
405
525
|
+ (hasNonAscii ? 'u' : '');
|
|
406
526
|
|
|
407
|
-
result.condition = `preg_match(${php.stringify(regexp)}, ${reg})`;
|
|
408
527
|
if (node.inverted || hasNonAscii) {
|
|
409
|
-
result
|
|
528
|
+
// A multibyte result is possible, and the exact length isn't known
|
|
529
|
+
// unless/until the match succeeds. By using preg_match with an offset,
|
|
530
|
+
// we can avoid creating the substring in the case where the match fails.
|
|
531
|
+
result.condition = `preg_match(${php.stringify(regexp)}, $this->input, ${reg}, 0, $this->currPos)`;
|
|
532
|
+
result.onSuccess([`${reg} = ${reg}[0];`]);
|
|
533
|
+
if (!discardPos) {
|
|
534
|
+
result.onSuccess([`$this->currPos += strlen(${reg});`]);
|
|
535
|
+
}
|
|
410
536
|
} else {
|
|
411
|
-
|
|
537
|
+
// Creating the matches array is expensive, and its always done if we
|
|
538
|
+
// pass an offset to preg_match. So it's cheaper to do a substring
|
|
539
|
+
// first, even if we're in 'discard' mode.
|
|
540
|
+
result.block = [`${reg} = $this->input[$this->currPos] ?? '';`];
|
|
541
|
+
result.condition = `preg_match(${php.stringify(regexp)}, ${reg})`;
|
|
542
|
+
if (!discardPos) {
|
|
543
|
+
result.onSuccess(['$this->currPos++;']);
|
|
544
|
+
}
|
|
412
545
|
}
|
|
413
546
|
},
|
|
414
547
|
|
|
@@ -452,18 +585,23 @@ let php = {
|
|
|
452
585
|
return `if ($cached->${name} !== self::$UNDEFINED) { $param_${name} = $cached->${name}; }`;
|
|
453
586
|
},
|
|
454
587
|
|
|
455
|
-
cacheStoreRef(
|
|
456
|
-
return
|
|
457
|
-
`$
|
|
588
|
+
cacheStoreRef(reg, name) {
|
|
589
|
+
return reg ?
|
|
590
|
+
`${reg} !== $param_${name} ? $param_${name} : self::$UNDEFINED` :
|
|
458
591
|
'self::$UNDEFINED';
|
|
459
592
|
},
|
|
460
593
|
|
|
594
|
+
|
|
595
|
+
cacheRestoreRef(reg, name) {
|
|
596
|
+
return `$param_${name} = ${reg};`;
|
|
597
|
+
},
|
|
598
|
+
|
|
461
599
|
/**
|
|
462
600
|
* Get a block which saves ref values to a temporary variable for later
|
|
463
|
-
* comparison in getCacheStoreRefs().
|
|
601
|
+
* comparison in getCacheStoreRefs() / getCacheRestoreRefs().
|
|
464
602
|
*/
|
|
465
|
-
cacheSaveRef(name) {
|
|
466
|
-
return `$
|
|
603
|
+
cacheSaveRef(reg, name) {
|
|
604
|
+
return `${reg} = $param_${name};`;
|
|
467
605
|
}
|
|
468
606
|
};
|
|
469
607
|
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var GrammarError = require("../../grammar-error"),
|
|
4
|
+
visitor = require("../visitor"),
|
|
5
|
+
asts = require("../asts");
|
|
6
|
+
|
|
7
|
+
// Find rules that always match/succeed:
|
|
8
|
+
// It only contains expressions that always match/succeed, either:
|
|
9
|
+
// * an optional (?) expression, or
|
|
10
|
+
// * a zero_or_more (*) expression, or
|
|
11
|
+
// * a rule reference to a rule that always matches/succeeds, or
|
|
12
|
+
// * a sequence containing only the aforementioned expressions, or
|
|
13
|
+
// * a choice containing at least one expression that always matches
|
|
14
|
+
|
|
15
|
+
function analyzeAlwaysMatch(ast, options) {
|
|
16
|
+
options = options || {};
|
|
17
|
+
if (options.noAlwaysMatch) {
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Look for rules which always match/succeed
|
|
22
|
+
const alwaysMatch = function(node, result) {
|
|
23
|
+
result.alwaysMatch = true;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const maybeMatch = function(node, result) {
|
|
27
|
+
result.alwaysMatch = false;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const childMatch = function(node, result) {
|
|
31
|
+
checkAlwaysMatch(node.expression, result);
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const ruleMatch = function(node, result) {
|
|
35
|
+
// To break cycles, mark this rule (conservatively) as *not*
|
|
36
|
+
// always matching, before recursing.
|
|
37
|
+
if (node.hasOwnProperty('alwaysMatch')) {
|
|
38
|
+
result.alwaysMatch = node.alwaysMatch;
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
node.alwaysMatch = false;
|
|
42
|
+
checkAlwaysMatch(node.expression, result);
|
|
43
|
+
node.alwaysMatch = result.alwaysMatch;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const checkAlwaysMatch = visitor.build ({
|
|
47
|
+
rule: ruleMatch,
|
|
48
|
+
|
|
49
|
+
rule_ref: function(node, result) {
|
|
50
|
+
const rule = asts.findRule( ast, node.name );
|
|
51
|
+
checkAlwaysMatch(rule, result);
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
choice: function(node, result) {
|
|
55
|
+
let alwaysMatch = false;
|
|
56
|
+
node.alternatives.forEach( (child) => {
|
|
57
|
+
// Don't recurse if we've already found a choice which always matches
|
|
58
|
+
if (alwaysMatch) {
|
|
59
|
+
if (child.type === 'rule_ref' &&
|
|
60
|
+
asts.getRuleAttributeValue(asts.findRule(ast, child.name), "unreachable", false)) {
|
|
61
|
+
// This is okay, the rule is flagged as known unreachable
|
|
62
|
+
} else if (!options.allowUselessChoice) {
|
|
63
|
+
throw new GrammarError(
|
|
64
|
+
"Unreachable alternative.", child.location
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
} else {
|
|
68
|
+
let subresult = {};
|
|
69
|
+
checkAlwaysMatch(child, subresult);
|
|
70
|
+
alwaysMatch = subresult.alwaysMatch;
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
result.alwaysMatch = alwaysMatch;
|
|
75
|
+
},
|
|
76
|
+
|
|
77
|
+
sequence: function(node, result) {
|
|
78
|
+
if (node.hasOwnProperty('alwaysMatch')) {
|
|
79
|
+
result.alwaysMatch = node.alwaysMatch;
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
let alwaysMatch = true;
|
|
83
|
+
node.elements.forEach( (child) => {
|
|
84
|
+
let subresult = {};
|
|
85
|
+
checkAlwaysMatch(child, subresult);
|
|
86
|
+
child.alwaysMatch = subresult.alwaysMatch;
|
|
87
|
+
alwaysMatch = alwaysMatch && child.alwaysMatch;
|
|
88
|
+
});
|
|
89
|
+
result.alwaysMatch = alwaysMatch;
|
|
90
|
+
node.alwaysMatch = alwaysMatch;
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
labeled: childMatch,
|
|
94
|
+
text: childMatch,
|
|
95
|
+
simple_and: childMatch,
|
|
96
|
+
simple_not: maybeMatch,
|
|
97
|
+
action: function(node, result) {
|
|
98
|
+
if (node.hasOwnProperty('alwaysMatch')) {
|
|
99
|
+
result.alwaysMatch = node.alwaysMatch;
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
checkAlwaysMatch(node.expression, result);
|
|
103
|
+
node.alwaysMatch = result.alwaysMatch;
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
optional: alwaysMatch,
|
|
107
|
+
zero_or_more: alwaysMatch,
|
|
108
|
+
// "any" can fail to match if we're at the end of file
|
|
109
|
+
any: maybeMatch,
|
|
110
|
+
// Same for 'class': even [^] will fail to match at end of file
|
|
111
|
+
class: maybeMatch,
|
|
112
|
+
|
|
113
|
+
one_or_more: maybeMatch,
|
|
114
|
+
literal: function(node, result) {
|
|
115
|
+
// Empty literal always match on any input
|
|
116
|
+
result.alwaysMatch = node.value.length === 0 ? true : false;
|
|
117
|
+
},
|
|
118
|
+
|
|
119
|
+
semantic_and: maybeMatch,
|
|
120
|
+
semantic_not: maybeMatch,
|
|
121
|
+
parameter_and: maybeMatch,
|
|
122
|
+
parameter_not: maybeMatch,
|
|
123
|
+
labeled_param: maybeMatch,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
checkAlwaysMatch(ast, {});
|
|
127
|
+
// Specifically label sequence and action nodes
|
|
128
|
+
const checkSequencesAndActions = visitor.build ({
|
|
129
|
+
sequence: function(node) {
|
|
130
|
+
node.elements.forEach( (child) => checkSequencesAndActions(child, {}) );
|
|
131
|
+
checkAlwaysMatch(node, {});
|
|
132
|
+
},
|
|
133
|
+
action: function(node) {
|
|
134
|
+
checkSequencesAndActions(node.expression, {});
|
|
135
|
+
checkAlwaysMatch(node, {});
|
|
136
|
+
},
|
|
137
|
+
});
|
|
138
|
+
checkSequencesAndActions(ast, {});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
module.exports = analyzeAlwaysMatch;
|