wikipeg 4.0.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HISTORY.md +556 -0
- package/README.md +230 -12
- package/VERSION +1 -1
- package/bin/wikipeg +8 -4
- package/examples/css.pegphp +9 -8
- package/lib/compiler/asts.js +30 -10
- package/lib/compiler/charsets.js +306 -0
- package/lib/compiler/language/javascript.js +107 -33
- package/lib/compiler/language/php.js +193 -55
- package/lib/compiler/passes/analyze-always-match.js +141 -0
- package/lib/compiler/passes/analyze-first.js +245 -0
- package/lib/compiler/passes/ast-to-code.js +316 -100
- package/lib/compiler/passes/inline-simple-rules.js +96 -0
- package/lib/compiler/passes/optimize-character-class.js +147 -0
- package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
- package/lib/compiler/passes/remove-proxy-rules.js +7 -5
- package/lib/compiler/passes/report-infinite-loops.js +4 -1
- package/lib/compiler/passes/report-left-recursion.js +3 -4
- package/lib/compiler/passes/report-unknown-attributes.js +39 -0
- package/lib/compiler/passes/transform-common-lang.js +1 -1
- package/lib/compiler/traverser.js +1 -2
- package/lib/compiler/visitor.js +5 -7
- package/lib/compiler.js +24 -10
- package/lib/parser.js +2784 -3088
- package/lib/peg.js +7 -15
- package/lib/runtime/template.js +9 -1
- package/lib/utils/CaseFolding.txt +1654 -0
- package/lib/utils/arrays.js +0 -72
- package/lib/utils/casefold.js +697 -0
- package/lib/utils/objects.js +9 -39
- package/lib/utils/unicode.js +34 -0
- package/package.json +6 -4
- package/src/DefaultTracer.php +18 -18
- package/src/PEGParserBase.php +53 -28
- package/src/SyntaxError.php +4 -4
- package/src/Tracer.php +1 -1
- package/lib/compiler/opcodes.js +0 -54
|
@@ -4,10 +4,16 @@ var js = require("../language/javascript"),
|
|
|
4
4
|
php = require("../language/php"),
|
|
5
5
|
visitor = require("../visitor"),
|
|
6
6
|
objects = require('../../utils/objects'),
|
|
7
|
+
classNode = require("../charsets").classNode,
|
|
7
8
|
asts = require("../asts"),
|
|
8
9
|
fs = require("fs");
|
|
9
10
|
|
|
10
11
|
function generateJavascript(ast, options) {
|
|
12
|
+
/**
|
|
13
|
+
* How much to expand ranges in character classes before giving up.
|
|
14
|
+
*/
|
|
15
|
+
const CHARSET_EXPAND = 16;
|
|
16
|
+
|
|
11
17
|
var rulesToGenerate = [];
|
|
12
18
|
var generatedRuleNames = {};
|
|
13
19
|
|
|
@@ -88,6 +94,22 @@ function generateJavascript(ast, options) {
|
|
|
88
94
|
namespace = `namespace ${matches[1]};`;
|
|
89
95
|
}
|
|
90
96
|
|
|
97
|
+
// Look for 'cache' attributes on rules. If some rule has [cache] on it,
|
|
98
|
+
// but options.cache was false, then set options.cache but default all
|
|
99
|
+
// rules to [cache=false], so only the explicitly [cache=true] rules
|
|
100
|
+
// will be cached.
|
|
101
|
+
if (ast.rules.some(
|
|
102
|
+
(rule) => asts.getRuleAttributeValue(rule, "cache") === true
|
|
103
|
+
) && !options.cache) {
|
|
104
|
+
options.cache = true;
|
|
105
|
+
ast.rules.forEach((rule) => {
|
|
106
|
+
if (asts.findRuleAttribute(rule, "cache") === undefined) {
|
|
107
|
+
rule.attributes = rule.attributes || [];
|
|
108
|
+
rule.attributes.push({ name: "cache", type: "boolean", value: false });
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
91
113
|
var refsSet = {};
|
|
92
114
|
var getRefs = visitor.build({
|
|
93
115
|
rule_ref: function(node) {
|
|
@@ -136,6 +158,9 @@ function generateJavascript(ast, options) {
|
|
|
136
158
|
this.resultReg_ = false;
|
|
137
159
|
this.silence_ = language.silence;
|
|
138
160
|
this.discard_ = false;
|
|
161
|
+
this.discardPos_ = false;
|
|
162
|
+
this.savedPosRefState_ = null;
|
|
163
|
+
this.shouldFreePosRefState_ = false;
|
|
139
164
|
}
|
|
140
165
|
Context.prototype = {
|
|
141
166
|
clone: function() {
|
|
@@ -221,10 +246,11 @@ function generateJavascript(ast, options) {
|
|
|
221
246
|
},
|
|
222
247
|
|
|
223
248
|
/**
|
|
224
|
-
* Clone the object, and set the discard flag in the
|
|
225
|
-
* indicates that the caller is only
|
|
226
|
-
*
|
|
227
|
-
* the match
|
|
249
|
+
* Clone the object, and set the "discard result" flag in the
|
|
250
|
+
* cloned object. When true this indicates that the caller is only
|
|
251
|
+
* interested in success or failure, and some subexpressions will
|
|
252
|
+
* use this information to return true instead of the match
|
|
253
|
+
* result.
|
|
228
254
|
*/
|
|
229
255
|
discard: function(value) {
|
|
230
256
|
var obj = this.clone();
|
|
@@ -236,11 +262,117 @@ function generateJavascript(ast, options) {
|
|
|
236
262
|
},
|
|
237
263
|
|
|
238
264
|
/**
|
|
239
|
-
*
|
|
265
|
+
* Clone the object, and set the "discard position" flag in the
|
|
266
|
+
* cloned object. When true this indicates that the caller will take care
|
|
267
|
+
* of setting/restoring the current parse position and the callee
|
|
268
|
+
* needn't bother to ensure it is set to the end of its match.
|
|
269
|
+
* (Typically used during lookahead matches.)
|
|
270
|
+
*/
|
|
271
|
+
discardPos: function(value) {
|
|
272
|
+
var obj = this.clone();
|
|
273
|
+
if (typeof value === 'undefined') {
|
|
274
|
+
value = true;
|
|
275
|
+
}
|
|
276
|
+
obj.discardPos_ = value;
|
|
277
|
+
return obj;
|
|
278
|
+
},
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Get the "discard result" flag.
|
|
240
282
|
*/
|
|
241
283
|
getDiscard: function() {
|
|
242
284
|
return this.discard_;
|
|
243
|
-
}
|
|
285
|
+
},
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Get the "discard position" flag.
|
|
289
|
+
*/
|
|
290
|
+
getDiscardPos: function() {
|
|
291
|
+
return this.discardPos_;
|
|
292
|
+
},
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Return a saved pos/ref state, or null if none can be reused.
|
|
296
|
+
*/
|
|
297
|
+
getPosRefState() {
|
|
298
|
+
return this.savedPosRefState_;
|
|
299
|
+
},
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Indicate that the pos/ref state of this context can't be reused by
|
|
303
|
+
* children.
|
|
304
|
+
*/
|
|
305
|
+
resetPosRefState() {
|
|
306
|
+
if (this.savedPosRefState_ === null) {
|
|
307
|
+
return this;
|
|
308
|
+
}
|
|
309
|
+
var obj = this.clone();
|
|
310
|
+
obj.savedPosRefState_ = null;
|
|
311
|
+
return obj;
|
|
312
|
+
},
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Ensure we have a saved pos/ref state, reusing the saved parent state
|
|
316
|
+
* where that can be reused.
|
|
317
|
+
*/
|
|
318
|
+
savePosRefState(node, result) {
|
|
319
|
+
let obj;
|
|
320
|
+
if (this.savedPosRefState_ !== null) {
|
|
321
|
+
if (this.shouldFreePosRefState_) {
|
|
322
|
+
// We can reuse the parent state, but don't free it in the child!
|
|
323
|
+
obj = this.clone();
|
|
324
|
+
obj.shouldFreePosRefState_ = false;
|
|
325
|
+
return obj;
|
|
326
|
+
}
|
|
327
|
+
return this;
|
|
328
|
+
}
|
|
329
|
+
// Save current position
|
|
330
|
+
// (should be later freed with a matching call to freePosRefState)
|
|
331
|
+
let saved = {
|
|
332
|
+
dst: allocPosReg(),
|
|
333
|
+
src: language.currPos,
|
|
334
|
+
refs: [],
|
|
335
|
+
};
|
|
336
|
+
let rule = currentRule;
|
|
337
|
+
// Add reference variables
|
|
338
|
+
for (let name in rule.passedParams) {
|
|
339
|
+
if (rule.passedParams[name].type === 'reference') {
|
|
340
|
+
saved.refs.push({reg: allocReg([]), name: name});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
result.block.push(`${saved.dst} = ${saved.src};`);
|
|
344
|
+
for(let ref of saved.refs) {
|
|
345
|
+
result.block.push(language.cacheSaveRef(ref.reg, ref.name));
|
|
346
|
+
}
|
|
347
|
+
obj = this.clone();
|
|
348
|
+
obj.savedPosRefState_ = saved;
|
|
349
|
+
obj.shouldFreePosRefState_ = true;
|
|
350
|
+
return obj;
|
|
351
|
+
},
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Restore the saved pos/ref state.
|
|
355
|
+
*/
|
|
356
|
+
restorePosRefState() {
|
|
357
|
+
let saved = this.savedPosRefState_;
|
|
358
|
+
let block = [];
|
|
359
|
+
block.push(`${saved.src} = ${saved.dst};`);
|
|
360
|
+
for(let ref of saved.refs) {
|
|
361
|
+
block.push(language.cacheRestoreRef(ref.reg, ref.name));
|
|
362
|
+
}
|
|
363
|
+
return block;
|
|
364
|
+
},
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Free this context's pos/ref state, if necessary.
|
|
368
|
+
*/
|
|
369
|
+
freePosRefState(result) {
|
|
370
|
+
if (this.savedPosRefState_ !== null && this.shouldFreePosRefState_) {
|
|
371
|
+
let saved = this.savedPosRefState_;
|
|
372
|
+
let regList = [ saved.dst ].concat(saved.refs.map((ref)=>ref.reg));
|
|
373
|
+
freeReg(regList, result);
|
|
374
|
+
}
|
|
375
|
+
},
|
|
244
376
|
|
|
245
377
|
};
|
|
246
378
|
|
|
@@ -461,7 +593,7 @@ function generateJavascript(ast, options) {
|
|
|
461
593
|
*/
|
|
462
594
|
function makeActionFunc(code, context) {
|
|
463
595
|
var argNames = [];
|
|
464
|
-
|
|
596
|
+
Object.keys(context.env).forEach(function(argName) {
|
|
465
597
|
if (context.envTypes[argName] === 'reference') {
|
|
466
598
|
argNames.push(language.refArgActionDeclarator(argName));
|
|
467
599
|
} else {
|
|
@@ -484,7 +616,7 @@ function generateJavascript(ast, options) {
|
|
|
484
616
|
* funcId is the index of the function returned by makeActionFunc().
|
|
485
617
|
*/
|
|
486
618
|
function makeActionCall(funcId, context) {
|
|
487
|
-
return language.actionCall(funcId,
|
|
619
|
+
return language.actionCall(funcId, Object.values(context.env));
|
|
488
620
|
}
|
|
489
621
|
|
|
490
622
|
/**
|
|
@@ -493,22 +625,14 @@ function generateJavascript(ast, options) {
|
|
|
493
625
|
* compile time, this returns an empty string.
|
|
494
626
|
*/
|
|
495
627
|
function makeFailCall(value, context) {
|
|
496
|
-
var silence;
|
|
497
|
-
if (context) {
|
|
498
|
-
silence = context.getSilence();
|
|
499
|
-
} else {
|
|
500
|
-
silence = 'false';
|
|
501
|
-
}
|
|
628
|
+
var silence = context.getSilence();
|
|
502
629
|
if (silence === 'true') {
|
|
630
|
+
// This should not be reached in practice.
|
|
503
631
|
return '';
|
|
504
632
|
}
|
|
505
633
|
var expectation = addExpectation(value);
|
|
506
634
|
var call = language.libraryCall('fail', [expectation]);
|
|
507
|
-
if (silence
|
|
508
|
-
return call + ';';
|
|
509
|
-
} else {
|
|
510
|
-
return ['if (!', silence, ') {', call, ';}'].join('');
|
|
511
|
-
}
|
|
635
|
+
return `if (!${silence}) { ${call}; }`;
|
|
512
636
|
}
|
|
513
637
|
|
|
514
638
|
/**
|
|
@@ -580,10 +704,9 @@ function generateJavascript(ast, options) {
|
|
|
580
704
|
function buildSimplePredicate(node, context) {
|
|
581
705
|
var result = new Result();
|
|
582
706
|
var negate = node.type === 'simple_not';
|
|
583
|
-
var posReg = allocPosReg();
|
|
584
707
|
var reg = context.getResultReg(result);
|
|
585
|
-
|
|
586
|
-
|
|
708
|
+
var newContext = context.silence().cloneEnv().discard().discardPos()
|
|
709
|
+
.savePosRefState(node, result);
|
|
587
710
|
result.append(recurse(node.expression, newContext));
|
|
588
711
|
if (negate) {
|
|
589
712
|
result.resolveBlock();
|
|
@@ -591,12 +714,18 @@ function generateJavascript(ast, options) {
|
|
|
591
714
|
result.onFailure([`${reg} = ${language.failed};`]);
|
|
592
715
|
}
|
|
593
716
|
result.onSuccess([`${reg} = ${language.assertionSuccess};`]);
|
|
717
|
+
if (node.isFirstSetTest && context.getSilence() !== 'true') {
|
|
718
|
+
result.onFailure([makeFailCall({
|
|
719
|
+
type: 'other',
|
|
720
|
+
description: node.isFirstSetTest,
|
|
721
|
+
}, context)]);
|
|
722
|
+
}
|
|
594
723
|
if (negate) {
|
|
595
|
-
result.onFailure(
|
|
724
|
+
result.onFailure(newContext.restorePosRefState());
|
|
596
725
|
} else {
|
|
597
|
-
result.onSuccess(
|
|
726
|
+
result.onSuccess(newContext.restorePosRefState());
|
|
598
727
|
}
|
|
599
|
-
|
|
728
|
+
newContext.freePosRefState(result);
|
|
600
729
|
return result;
|
|
601
730
|
}
|
|
602
731
|
|
|
@@ -644,6 +773,25 @@ function generateJavascript(ast, options) {
|
|
|
644
773
|
return result;
|
|
645
774
|
}
|
|
646
775
|
|
|
776
|
+
/**
|
|
777
|
+
* Handler for one_or_more of a character class
|
|
778
|
+
*/
|
|
779
|
+
function buildRepeatedCharacterClass(node, context, atLeastOne) {
|
|
780
|
+
var result = new Result();
|
|
781
|
+
var reg = context.getResultReg(result);
|
|
782
|
+
var classExpr = classNode.expand(classNode.caseSensitive(node.expression), CHARSET_EXPAND);
|
|
783
|
+
language.matchRepeatedClass(classExpr, reg, result, atLeastOne, context.getDiscard(), context.getDiscardPos());
|
|
784
|
+
result.onFailure([`${reg} = ${language.failed};`]);
|
|
785
|
+
if (context.getSilence() !== 'true') {
|
|
786
|
+
result.onFailure([makeFailCall({
|
|
787
|
+
type: "class",
|
|
788
|
+
value: node.expression.rawText,
|
|
789
|
+
description: node.expression.rawText
|
|
790
|
+
}, context)]);
|
|
791
|
+
}
|
|
792
|
+
return result;
|
|
793
|
+
}
|
|
794
|
+
|
|
647
795
|
function makeGenerator(node, context) {
|
|
648
796
|
if (node.type !== 'zero_or_more') {
|
|
649
797
|
throw new Error('Iterable rules must be a single starred subexpression');
|
|
@@ -655,7 +803,7 @@ function generateJavascript(ast, options) {
|
|
|
655
803
|
subresult.onSuccess([`yield ${subresult.expression};`]);
|
|
656
804
|
subresult.onFailure([
|
|
657
805
|
`if (${language.currPos} < ${language.inputLength}) {`,
|
|
658
|
-
indent2( makeFailCall({ type: "end", description: 'end of input' }) ),
|
|
806
|
+
indent2( makeFailCall({ type: "end", description: 'end of input' }, newContext) ),
|
|
659
807
|
indent2(`throw ${language.libraryCall('buildParseException')};`),
|
|
660
808
|
'}',
|
|
661
809
|
'break;'
|
|
@@ -673,8 +821,14 @@ function generateJavascript(ast, options) {
|
|
|
673
821
|
/**
|
|
674
822
|
* Get the names of the arguments to the given rule function
|
|
675
823
|
*/
|
|
676
|
-
function getRuleArgNames(rule) {
|
|
677
|
-
var args
|
|
824
|
+
function getRuleArgNames(rule, discard) {
|
|
825
|
+
var args;
|
|
826
|
+
if (discard || !rule.reportsFailure) {
|
|
827
|
+
// these rules never report failure, so silence argument isn't needed
|
|
828
|
+
args = [];
|
|
829
|
+
} else {
|
|
830
|
+
args = [language.silence];
|
|
831
|
+
}
|
|
678
832
|
|
|
679
833
|
if (rule.hasBoolParams) {
|
|
680
834
|
args.push(language.boolParams);
|
|
@@ -699,7 +853,7 @@ function generateJavascript(ast, options) {
|
|
|
699
853
|
*/
|
|
700
854
|
function getStartArgs(ruleName) {
|
|
701
855
|
var rule = asts.findRule(ast, ruleName);
|
|
702
|
-
var argNames = getRuleArgNames(rule);
|
|
856
|
+
var argNames = getRuleArgNames(rule, false);
|
|
703
857
|
var args = [];
|
|
704
858
|
|
|
705
859
|
for (let i = 0; i < argNames.length; i++) {
|
|
@@ -777,32 +931,16 @@ function generateJavascript(ast, options) {
|
|
|
777
931
|
* Get the list of expressions or statements returned by cacheStoreRef() for
|
|
778
932
|
* refs which may have changed.
|
|
779
933
|
*/
|
|
780
|
-
function getCacheStoreRefs(rule) {
|
|
781
|
-
var
|
|
782
|
-
for (let
|
|
783
|
-
|
|
784
|
-
store[name] = true;
|
|
785
|
-
}
|
|
934
|
+
function getCacheStoreRefs(rule, saved) {
|
|
935
|
+
var regMap = {};
|
|
936
|
+
for (let ref of saved.refs) {
|
|
937
|
+
regMap[ref.name] = ref.reg;
|
|
786
938
|
}
|
|
787
939
|
return references.map(function(name) {
|
|
788
|
-
return language.cacheStoreRef(name,
|
|
940
|
+
return language.cacheStoreRef(regMap[name], name);
|
|
789
941
|
});
|
|
790
942
|
}
|
|
791
943
|
|
|
792
|
-
/**
|
|
793
|
-
* Get a block which saves ref values to a temporary variable for later
|
|
794
|
-
* comparison in getCacheStoreRefs().
|
|
795
|
-
*/
|
|
796
|
-
function getCacheSaveRefs(rule) {
|
|
797
|
-
var parts = [];
|
|
798
|
-
for (let name in rule.passedParams) {
|
|
799
|
-
if (rule.passedParams[name].type === 'reference') {
|
|
800
|
-
parts.push(language.cacheSaveRef(name));
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
|
-
return parts.join('\n');
|
|
804
|
-
}
|
|
805
|
-
|
|
806
944
|
function expandTemplate(template, vars) {
|
|
807
945
|
for (let name in vars) {
|
|
808
946
|
let value = vars[name];
|
|
@@ -836,14 +974,35 @@ function generateJavascript(ast, options) {
|
|
|
836
974
|
choiceIndex = 0;
|
|
837
975
|
currentRule = node;
|
|
838
976
|
|
|
839
|
-
//
|
|
977
|
+
// Set up the Context and save the position and reference state
|
|
840
978
|
var context = (new Context()).discard(discard);
|
|
979
|
+
if (discard || !node.reportsFailure) {
|
|
980
|
+
context = context.silence();
|
|
981
|
+
}
|
|
982
|
+
var outerContext = context;
|
|
983
|
+
let nodeName = asts.getRuleAttributeValue(node, "name");
|
|
984
|
+
if (nodeName !== undefined) {
|
|
985
|
+
// Named rules suppress failure reporting in their subexpression.
|
|
986
|
+
context = context.silence();
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
var topSaveState = new Result();
|
|
990
|
+
let cacheThisRule = asts.getRuleAttributeValue(node, "cache", options.cache);
|
|
991
|
+
if (!iterable && cacheThisRule) {
|
|
992
|
+
context = context.savePosRefState(node, topSaveState);
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
// Generate the Result
|
|
841
996
|
var result;
|
|
842
997
|
if (iterable) {
|
|
843
998
|
result = makeGenerator(node.expression, context);
|
|
844
999
|
} else {
|
|
845
1000
|
result = recurse(node.expression, context);
|
|
846
1001
|
}
|
|
1002
|
+
if (nodeName !== undefined && outerContext.getSilence() !== 'true') {
|
|
1003
|
+
// Failure in a named rule is reported at this node.
|
|
1004
|
+
result.onFailure([makeFailCall({type: 'other', description: nodeName}, outerContext)]);
|
|
1005
|
+
}
|
|
847
1006
|
result.resolveBlock();
|
|
848
1007
|
|
|
849
1008
|
// Make the function body
|
|
@@ -856,7 +1015,7 @@ function generateJavascript(ast, options) {
|
|
|
856
1015
|
} else {
|
|
857
1016
|
var ruleIndexCode = asts.indexOfRule(ast, node.name);
|
|
858
1017
|
var cacheBits;
|
|
859
|
-
if (
|
|
1018
|
+
if (cacheThisRule) {
|
|
860
1019
|
var cacheFunc = options.cacheRuleHook || generateCacheRule;
|
|
861
1020
|
cacheBits = cacheFunc({
|
|
862
1021
|
startPos: language.currPos,
|
|
@@ -869,22 +1028,22 @@ function generateJavascript(ast, options) {
|
|
|
869
1028
|
result: result.expression,
|
|
870
1029
|
params: getParamsForCacheKey(node),
|
|
871
1030
|
loadRefs: getCacheLoadRefs(node),
|
|
872
|
-
|
|
873
|
-
|
|
1031
|
+
storeRefs: getCacheStoreRefs(node, context.getPosRefState()),
|
|
1032
|
+
saveRefs: topSaveState.block.join('\n'),
|
|
874
1033
|
className: className,
|
|
875
1034
|
});
|
|
876
1035
|
body.push(cacheBits.start);
|
|
877
1036
|
}
|
|
878
1037
|
body.push(result.block.join('\n'));
|
|
879
1038
|
|
|
880
|
-
if (
|
|
1039
|
+
if (cacheThisRule) {
|
|
881
1040
|
body.push(cacheBits.store);
|
|
882
1041
|
}
|
|
883
1042
|
body.push(`return ${result.expression};`);
|
|
884
1043
|
}
|
|
885
1044
|
body = indent2(body.join('\n'));
|
|
886
1045
|
|
|
887
|
-
let argNames = getRuleArgNames(node);
|
|
1046
|
+
let argNames = getRuleArgNames(node, discard);
|
|
888
1047
|
let args = argNames.join(', ');
|
|
889
1048
|
|
|
890
1049
|
// Wrap the function body in a trace decorator if requested.
|
|
@@ -907,7 +1066,6 @@ function generateJavascript(ast, options) {
|
|
|
907
1066
|
|
|
908
1067
|
rule_ref: function(node, context) {
|
|
909
1068
|
var result = new Result();
|
|
910
|
-
var reg = context.getResultReg(result);
|
|
911
1069
|
|
|
912
1070
|
var newParamValues = {};
|
|
913
1071
|
var boolSetMask = 0;
|
|
@@ -948,7 +1106,7 @@ function generateJavascript(ast, options) {
|
|
|
948
1106
|
}
|
|
949
1107
|
|
|
950
1108
|
let rule = asts.findRule(ast, node.name);
|
|
951
|
-
let argNameList = getRuleArgNames(rule);
|
|
1109
|
+
let argNameList = getRuleArgNames(rule, context.getDiscard());
|
|
952
1110
|
let args = [];
|
|
953
1111
|
for (let i = 0; i < argNameList.length; i++) {
|
|
954
1112
|
let argName = argNameList[i];
|
|
@@ -986,15 +1144,12 @@ function generateJavascript(ast, options) {
|
|
|
986
1144
|
}
|
|
987
1145
|
|
|
988
1146
|
let funcName = addRule(node.name, context.getDiscard());
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
result.append(recurse(node.expression, context.silence()));
|
|
996
|
-
if (context.getSilence() !== 'true') {
|
|
997
|
-
result.onFailure([makeFailCall({type: 'other', description: node.name}, context)]);
|
|
1147
|
+
if (context.getDiscard() && node.alwaysMatch) {
|
|
1148
|
+
result.block = [`${language.ruleFuncCall(funcName, args)};`];
|
|
1149
|
+
result.expression = 'true';
|
|
1150
|
+
} else {
|
|
1151
|
+
const reg = context.getResultReg(result);
|
|
1152
|
+
result.block = [`${reg} = ${language.ruleFuncCall(funcName, args)};`];
|
|
998
1153
|
}
|
|
999
1154
|
return result;
|
|
1000
1155
|
},
|
|
@@ -1024,16 +1179,27 @@ function generateJavascript(ast, options) {
|
|
|
1024
1179
|
action: function(node, context) {
|
|
1025
1180
|
var result = new Result();
|
|
1026
1181
|
var reg = context.getResultReg(result);
|
|
1027
|
-
var newContext = context.cloneEnv().discard();
|
|
1028
|
-
var
|
|
1182
|
+
var newContext = context.cloneEnv().discard().discardPos(false);
|
|
1183
|
+
var saved = context.getPosRefState();
|
|
1184
|
+
var savedPos, freePos = [];
|
|
1185
|
+
if (saved !== null) {
|
|
1186
|
+
savedPos = saved.dst;
|
|
1187
|
+
} else {
|
|
1188
|
+
savedPos = allocPosReg();
|
|
1189
|
+
freePos = [ savedPos ];
|
|
1190
|
+
result.block = [`${savedPos} = ${language.currPos};`];
|
|
1191
|
+
}
|
|
1029
1192
|
var subresult = recurse(node.expression, newContext);
|
|
1030
1193
|
var funcId = makeActionFunc(node.code, newContext);
|
|
1031
|
-
result.block = [`${savedPos} = ${language.currPos};`];
|
|
1032
1194
|
result.append(subresult);
|
|
1195
|
+
if (node.alwaysMatch) {
|
|
1196
|
+
result.condition = 'true';
|
|
1197
|
+
}
|
|
1033
1198
|
result.onSuccess([
|
|
1034
1199
|
`${language.savedPos} = ${savedPos};`,
|
|
1035
1200
|
`${reg} = ${makeActionCall(funcId, newContext)};`
|
|
1036
1201
|
]);
|
|
1202
|
+
freeReg(freePos, result);
|
|
1037
1203
|
return result;
|
|
1038
1204
|
},
|
|
1039
1205
|
|
|
@@ -1041,43 +1207,72 @@ function generateJavascript(ast, options) {
|
|
|
1041
1207
|
if (node.elements.length === 1) {
|
|
1042
1208
|
return recurse(node.elements[0], context);
|
|
1043
1209
|
} else {
|
|
1044
|
-
var posReg = allocPosReg();
|
|
1045
1210
|
var result = new Result();
|
|
1046
1211
|
var resultReg = context.getResultReg(result);
|
|
1047
1212
|
var label = `seq_${++seqIndex}`;
|
|
1048
1213
|
result.block = [
|
|
1049
1214
|
language.blockStart(label),
|
|
1050
|
-
|
|
1051
|
-
var parts = [], i;
|
|
1215
|
+
];
|
|
1216
|
+
var parts = [], partFree = [], i;
|
|
1217
|
+
var subcontext;
|
|
1052
1218
|
|
|
1219
|
+
if (!node.alwaysMatch) {
|
|
1220
|
+
context = context.savePosRefState(node, result);
|
|
1221
|
+
}
|
|
1222
|
+
subcontext = context;
|
|
1053
1223
|
for (i = 0; i < node.elements.length; i++) {
|
|
1054
|
-
var
|
|
1055
|
-
|
|
1224
|
+
var isPicked = node.elements[i].picked || node.numPicked === undefined;
|
|
1225
|
+
var ctxt = isPicked ?
|
|
1226
|
+
// if there's only one node picked, use this result register for
|
|
1227
|
+
// the subcontext, otherwise allocate a new result register.
|
|
1228
|
+
(node.numPicked === 1 ? subcontext : subcontext.noPassThru()):
|
|
1229
|
+
// set 'discard' if this subexpression is not picked.
|
|
1230
|
+
subcontext.noPassThru().discard();
|
|
1231
|
+
// Unless this is the last element, we need the position
|
|
1232
|
+
// (last element inherits the parent's need)
|
|
1233
|
+
if (i < node.elements.length - 1) {
|
|
1234
|
+
ctxt = ctxt.discardPos(false);
|
|
1235
|
+
}
|
|
1236
|
+
var subresult = recurse(node.elements[i], ctxt);
|
|
1237
|
+
if (isPicked) {
|
|
1238
|
+
partFree.push(...subresult.free);
|
|
1239
|
+
subresult.free = [];
|
|
1240
|
+
parts.push(subresult.expression);
|
|
1241
|
+
}
|
|
1056
1242
|
result.append(subresult);
|
|
1057
|
-
parts.push(subresult.expression);
|
|
1058
1243
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1244
|
+
if (i === 0) {
|
|
1245
|
+
// After the first element, it's not safe to reuse parent pos/ref
|
|
1246
|
+
// state any more.
|
|
1247
|
+
subcontext = subcontext.resetPosRefState();
|
|
1248
|
+
}
|
|
1249
|
+
if (!(node.alwaysMatch || node.elements[i].alwaysMatch)) {
|
|
1250
|
+
if (i > 0) {
|
|
1251
|
+
// On failure, backtrack to the start of the sequence. If this is
|
|
1252
|
+
// the first element of the sequence, it's not necessary to backtrack
|
|
1253
|
+
// since failing subexpressions do not increment the position
|
|
1254
|
+
// nor affect reference parameters
|
|
1255
|
+
result.onFailure(context.restorePosRefState());
|
|
1256
|
+
}
|
|
1257
|
+
// On failure, set the result register and exit the sequence
|
|
1258
|
+
result.onFailure([
|
|
1259
|
+
`${resultReg} = ${language.failed};`,
|
|
1260
|
+
language.gotoBlockEnd(label)
|
|
1261
|
+
]);
|
|
1064
1262
|
}
|
|
1065
|
-
// On failure, set the result register and exit the sequence
|
|
1066
|
-
result.onFailure([
|
|
1067
|
-
`${resultReg} = ${language.failed};`,
|
|
1068
|
-
language.gotoBlockEnd(label)
|
|
1069
|
-
]);
|
|
1070
1263
|
result.resolveBlock();
|
|
1071
1264
|
}
|
|
1072
|
-
if (
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1265
|
+
if (node.numPicked !== 1) {
|
|
1266
|
+
if (context.getDiscard()) {
|
|
1267
|
+
result.block.push(`${resultReg} = true;`);
|
|
1268
|
+
} else {
|
|
1269
|
+
result.block.push(`${resultReg} = [${parts.join(',')}];`);
|
|
1270
|
+
}
|
|
1076
1271
|
}
|
|
1077
1272
|
result.block.push(language.blockEnd(label));
|
|
1078
1273
|
result.expression = resultReg;
|
|
1079
|
-
freeReg(
|
|
1080
|
-
result
|
|
1274
|
+
freeReg(partFree, result);
|
|
1275
|
+
context.freePosRefState(result);
|
|
1081
1276
|
return result;
|
|
1082
1277
|
}
|
|
1083
1278
|
},
|
|
@@ -1106,11 +1301,16 @@ function generateJavascript(ast, options) {
|
|
|
1106
1301
|
},
|
|
1107
1302
|
|
|
1108
1303
|
text: function(node, context) {
|
|
1304
|
+
if (context.getDiscard()) {
|
|
1305
|
+
// text does nothing if we're already discarding
|
|
1306
|
+
return recurse(node.expression, context.cloneEnv());
|
|
1307
|
+
}
|
|
1109
1308
|
var startPos = allocPosReg();
|
|
1110
1309
|
var result = new Result();
|
|
1111
1310
|
var reg = context.getResultReg(result);
|
|
1112
1311
|
result.block = [`${startPos} = ${language.currPos};`];
|
|
1113
|
-
|
|
1312
|
+
// Note that here we are 'discarding' but not 'silenced'
|
|
1313
|
+
result.append(recurse(node.expression, context.cloneEnv().discard().discardPos(false)));
|
|
1114
1314
|
result.onSuccess([
|
|
1115
1315
|
`${reg} = ${language.inputSubstring(startPos, language.currPos)};`
|
|
1116
1316
|
]);
|
|
@@ -1136,6 +1336,10 @@ function generateJavascript(ast, options) {
|
|
|
1136
1336
|
},
|
|
1137
1337
|
|
|
1138
1338
|
zero_or_more: function(node, context) {
|
|
1339
|
+
// Special case zero_or_more of a character class
|
|
1340
|
+
if (node.expression.type === 'class') {
|
|
1341
|
+
return buildRepeatedCharacterClass(node, context, false);
|
|
1342
|
+
}
|
|
1139
1343
|
// Pseudocode for the non-discard case:
|
|
1140
1344
|
//
|
|
1141
1345
|
// let r1 = [];
|
|
@@ -1161,7 +1365,7 @@ function generateJavascript(ast, options) {
|
|
|
1161
1365
|
var result = new Result();
|
|
1162
1366
|
var resultReg = context.getResultReg(result);
|
|
1163
1367
|
var partReg = allocReg([]);
|
|
1164
|
-
var newContext = context.resultReg(partReg).cloneEnv();
|
|
1368
|
+
var newContext = context.resultReg(partReg).cloneEnv().discardPos(false).resetPosRefState();
|
|
1165
1369
|
var subresult = recurse(node.expression, newContext);
|
|
1166
1370
|
if (!context.getDiscard()) {
|
|
1167
1371
|
result.block.push(`${resultReg} = [];`);
|
|
@@ -1185,6 +1389,10 @@ function generateJavascript(ast, options) {
|
|
|
1185
1389
|
},
|
|
1186
1390
|
|
|
1187
1391
|
one_or_more: function(node, context) {
|
|
1392
|
+
// Special case one_or_more of a character class
|
|
1393
|
+
if (node.expression.type === 'class') {
|
|
1394
|
+
return buildRepeatedCharacterClass(node, context, true);
|
|
1395
|
+
}
|
|
1188
1396
|
// Pseudocode for the non-discard case:
|
|
1189
1397
|
//
|
|
1190
1398
|
// let r1 = [];
|
|
@@ -1217,7 +1425,7 @@ function generateJavascript(ast, options) {
|
|
|
1217
1425
|
var initialFree = result.free;
|
|
1218
1426
|
result.free = [];
|
|
1219
1427
|
var partReg = allocReg([]);
|
|
1220
|
-
var newContext = context.resultReg(partReg).cloneEnv();
|
|
1428
|
+
var newContext = context.resultReg(partReg).cloneEnv().discardPos(false).resetPosRefState();
|
|
1221
1429
|
var subresult = recurse(node.expression, newContext);
|
|
1222
1430
|
if (!context.getDiscard()) {
|
|
1223
1431
|
result.block.push(`${resultReg} = [];`);
|
|
@@ -1258,13 +1466,13 @@ function generateJavascript(ast, options) {
|
|
|
1258
1466
|
var result = new Result();
|
|
1259
1467
|
// Special case: empty string always matches
|
|
1260
1468
|
if (node.value.length === 0) {
|
|
1261
|
-
result.expression = "''";
|
|
1469
|
+
result.expression = context.getDiscard() ? "true" : "''";
|
|
1262
1470
|
result.condition = 'true';
|
|
1263
1471
|
return result;
|
|
1264
1472
|
}
|
|
1265
1473
|
|
|
1266
1474
|
var reg = context.getResultReg(result);
|
|
1267
|
-
language.matchLiteral(node, reg, result);
|
|
1475
|
+
language.matchLiteral(node, reg, result, context.getDiscard(), context.getDiscardPos());
|
|
1268
1476
|
if (context.getSilence() !== 'true') {
|
|
1269
1477
|
result.onFailure([
|
|
1270
1478
|
makeFailCall({
|
|
@@ -1277,10 +1485,11 @@ function generateJavascript(ast, options) {
|
|
|
1277
1485
|
return result;
|
|
1278
1486
|
},
|
|
1279
1487
|
|
|
1280
|
-
|
|
1488
|
+
class: function(node, context) {
|
|
1281
1489
|
var result = new Result();
|
|
1282
1490
|
var reg = context.getResultReg(result);
|
|
1283
|
-
|
|
1491
|
+
var classExpr = classNode.expand(classNode.caseSensitive(node), CHARSET_EXPAND);
|
|
1492
|
+
language.matchClass(classExpr, reg, result, context.getDiscard(), context.getDiscardPos());
|
|
1284
1493
|
result.onFailure([`${reg} = ${language.failed};`]);
|
|
1285
1494
|
if (context.getSilence() !== 'true') {
|
|
1286
1495
|
result.onFailure([makeFailCall({
|
|
@@ -1296,7 +1505,14 @@ function generateJavascript(ast, options) {
|
|
|
1296
1505
|
var result = new Result();
|
|
1297
1506
|
var reg = context.getResultReg(result);
|
|
1298
1507
|
result.condition = `${language.currPos} < ${language.inputLength}`;
|
|
1299
|
-
|
|
1508
|
+
if (context.getDiscard()) {
|
|
1509
|
+
if (!context.getDiscardPos()) {
|
|
1510
|
+
result.onSuccess([`${language.advanceInputChar};`]);
|
|
1511
|
+
}
|
|
1512
|
+
result.onSuccess([`${reg} = true;`]);
|
|
1513
|
+
} else {
|
|
1514
|
+
result.onSuccess([`${reg} = ${language.consumeInputChar};`]);
|
|
1515
|
+
}
|
|
1300
1516
|
result.onFailure([`${reg} = ${language.failed};`]);
|
|
1301
1517
|
if (context.getSilence() !== 'true') {
|
|
1302
1518
|
result.onFailure([makeFailCall({
|