wikipeg 4.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/HISTORY.md +556 -0
  2. package/README.md +230 -12
  3. package/VERSION +1 -1
  4. package/bin/wikipeg +8 -4
  5. package/examples/css.pegphp +9 -8
  6. package/lib/compiler/asts.js +30 -10
  7. package/lib/compiler/charsets.js +306 -0
  8. package/lib/compiler/language/javascript.js +107 -33
  9. package/lib/compiler/language/php.js +193 -55
  10. package/lib/compiler/passes/analyze-always-match.js +141 -0
  11. package/lib/compiler/passes/analyze-first.js +245 -0
  12. package/lib/compiler/passes/ast-to-code.js +316 -100
  13. package/lib/compiler/passes/inline-simple-rules.js +96 -0
  14. package/lib/compiler/passes/optimize-character-class.js +147 -0
  15. package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
  16. package/lib/compiler/passes/remove-proxy-rules.js +7 -5
  17. package/lib/compiler/passes/report-infinite-loops.js +4 -1
  18. package/lib/compiler/passes/report-left-recursion.js +3 -4
  19. package/lib/compiler/passes/report-unknown-attributes.js +39 -0
  20. package/lib/compiler/passes/transform-common-lang.js +1 -1
  21. package/lib/compiler/traverser.js +1 -2
  22. package/lib/compiler/visitor.js +5 -7
  23. package/lib/compiler.js +24 -10
  24. package/lib/parser.js +2784 -3088
  25. package/lib/peg.js +7 -15
  26. package/lib/runtime/template.js +9 -1
  27. package/lib/utils/CaseFolding.txt +1654 -0
  28. package/lib/utils/arrays.js +0 -72
  29. package/lib/utils/casefold.js +697 -0
  30. package/lib/utils/objects.js +9 -39
  31. package/lib/utils/unicode.js +34 -0
  32. package/package.json +6 -4
  33. package/src/DefaultTracer.php +18 -18
  34. package/src/PEGParserBase.php +53 -28
  35. package/src/SyntaxError.php +4 -4
  36. package/src/Tracer.php +1 -1
  37. package/lib/compiler/opcodes.js +0 -54
@@ -4,10 +4,16 @@ var js = require("../language/javascript"),
4
4
  php = require("../language/php"),
5
5
  visitor = require("../visitor"),
6
6
  objects = require('../../utils/objects'),
7
+ classNode = require("../charsets").classNode,
7
8
  asts = require("../asts"),
8
9
  fs = require("fs");
9
10
 
10
11
  function generateJavascript(ast, options) {
12
+ /**
13
+ * How much to expand ranges in character classes before giving up.
14
+ */
15
+ const CHARSET_EXPAND = 16;
16
+
11
17
  var rulesToGenerate = [];
12
18
  var generatedRuleNames = {};
13
19
 
@@ -88,6 +94,22 @@ function generateJavascript(ast, options) {
88
94
  namespace = `namespace ${matches[1]};`;
89
95
  }
90
96
 
97
+ // Look for 'cache' attributes on rules. If some rule has [cache] on it,
98
+ // but options.cache was false, then set options.cache but default all
99
+ // rules to [cache=false], so only the explicitly [cache=true] rules
100
+ // will be cached.
101
+ if (ast.rules.some(
102
+ (rule) => asts.getRuleAttributeValue(rule, "cache") === true
103
+ ) && !options.cache) {
104
+ options.cache = true;
105
+ ast.rules.forEach((rule) => {
106
+ if (asts.findRuleAttribute(rule, "cache") === undefined) {
107
+ rule.attributes = rule.attributes || [];
108
+ rule.attributes.push({ name: "cache", type: "boolean", value: false });
109
+ }
110
+ });
111
+ }
112
+
91
113
  var refsSet = {};
92
114
  var getRefs = visitor.build({
93
115
  rule_ref: function(node) {
@@ -136,6 +158,9 @@ function generateJavascript(ast, options) {
136
158
  this.resultReg_ = false;
137
159
  this.silence_ = language.silence;
138
160
  this.discard_ = false;
161
+ this.discardPos_ = false;
162
+ this.savedPosRefState_ = null;
163
+ this.shouldFreePosRefState_ = false;
139
164
  }
140
165
  Context.prototype = {
141
166
  clone: function() {
@@ -221,10 +246,11 @@ function generateJavascript(ast, options) {
221
246
  },
222
247
 
223
248
  /**
224
- * Clone the object, and set the discard flag in the cloned object. This
225
- * indicates that the caller is only interested in success or failure, and
226
- * some subexpressions will use this information to return true instead of
227
- * the match result.
249
+ * Clone the object, and set the "discard result" flag in the
250
+ * cloned object. When true this indicates that the caller is only
251
+ * interested in success or failure, and some subexpressions will
252
+ * use this information to return true instead of the match
253
+ * result.
228
254
  */
229
255
  discard: function(value) {
230
256
  var obj = this.clone();
@@ -236,11 +262,117 @@ function generateJavascript(ast, options) {
236
262
  },
237
263
 
238
264
  /**
239
- * Get the discard flag.
265
+ * Clone the object, and set the "discard position" flag in the
266
+ * cloned object. When true this indicates that the caller will take care
267
+ * of setting/restoring the current parse position and the callee
268
+ * needn't bother to ensure it is set to the end of its match.
269
+ * (Typically used during lookahead matches.)
270
+ */
271
+ discardPos: function(value) {
272
+ var obj = this.clone();
273
+ if (typeof value === 'undefined') {
274
+ value = true;
275
+ }
276
+ obj.discardPos_ = value;
277
+ return obj;
278
+ },
279
+
280
+ /**
281
+ * Get the "discard result" flag.
240
282
  */
241
283
  getDiscard: function() {
242
284
  return this.discard_;
243
- }
285
+ },
286
+
287
+ /**
288
+ * Get the "discard position" flag.
289
+ */
290
+ getDiscardPos: function() {
291
+ return this.discardPos_;
292
+ },
293
+
294
+ /**
295
+ * Return a saved pos/ref state, or null if none can be reused.
296
+ */
297
+ getPosRefState() {
298
+ return this.savedPosRefState_;
299
+ },
300
+
301
+ /**
302
+ * Indicate that the pos/ref state of this context can't be reused by
303
+ * children.
304
+ */
305
+ resetPosRefState() {
306
+ if (this.savedPosRefState_ === null) {
307
+ return this;
308
+ }
309
+ var obj = this.clone();
310
+ obj.savedPosRefState_ = null;
311
+ return obj;
312
+ },
313
+
314
+ /**
315
+ * Ensure we have a saved pos/ref state, reusing the saved parent state
316
+ * where that can be reused.
317
+ */
318
+ savePosRefState(node, result) {
319
+ let obj;
320
+ if (this.savedPosRefState_ !== null) {
321
+ if (this.shouldFreePosRefState_) {
322
+ // We can reuse the parent state, but don't free it in the child!
323
+ obj = this.clone();
324
+ obj.shouldFreePosRefState_ = false;
325
+ return obj;
326
+ }
327
+ return this;
328
+ }
329
+ // Save current position
330
+ // (should be later freed with a matching call to freePosRefState)
331
+ let saved = {
332
+ dst: allocPosReg(),
333
+ src: language.currPos,
334
+ refs: [],
335
+ };
336
+ let rule = currentRule;
337
+ // Add reference variables
338
+ for (let name in rule.passedParams) {
339
+ if (rule.passedParams[name].type === 'reference') {
340
+ saved.refs.push({reg: allocReg([]), name: name});
341
+ }
342
+ }
343
+ result.block.push(`${saved.dst} = ${saved.src};`);
344
+ for(let ref of saved.refs) {
345
+ result.block.push(language.cacheSaveRef(ref.reg, ref.name));
346
+ }
347
+ obj = this.clone();
348
+ obj.savedPosRefState_ = saved;
349
+ obj.shouldFreePosRefState_ = true;
350
+ return obj;
351
+ },
352
+
353
+ /**
354
+ * Restore the saved pos/ref state.
355
+ */
356
+ restorePosRefState() {
357
+ let saved = this.savedPosRefState_;
358
+ let block = [];
359
+ block.push(`${saved.src} = ${saved.dst};`);
360
+ for(let ref of saved.refs) {
361
+ block.push(language.cacheRestoreRef(ref.reg, ref.name));
362
+ }
363
+ return block;
364
+ },
365
+
366
+ /**
367
+ * Free this context's pos/ref state, if necessary.
368
+ */
369
+ freePosRefState(result) {
370
+ if (this.savedPosRefState_ !== null && this.shouldFreePosRefState_) {
371
+ let saved = this.savedPosRefState_;
372
+ let regList = [ saved.dst ].concat(saved.refs.map((ref)=>ref.reg));
373
+ freeReg(regList, result);
374
+ }
375
+ },
244
376
 
245
377
  };
246
378
 
@@ -461,7 +593,7 @@ function generateJavascript(ast, options) {
461
593
  */
462
594
  function makeActionFunc(code, context) {
463
595
  var argNames = [];
464
- objects.keys(context.env).forEach(function(argName) {
596
+ Object.keys(context.env).forEach(function(argName) {
465
597
  if (context.envTypes[argName] === 'reference') {
466
598
  argNames.push(language.refArgActionDeclarator(argName));
467
599
  } else {
@@ -484,7 +616,7 @@ function generateJavascript(ast, options) {
484
616
  * funcId is the index of the function returned by makeActionFunc().
485
617
  */
486
618
  function makeActionCall(funcId, context) {
487
- return language.actionCall(funcId, objects.values(context.env));
619
+ return language.actionCall(funcId, Object.values(context.env));
488
620
  }
489
621
 
490
622
  /**
@@ -493,22 +625,14 @@ function generateJavascript(ast, options) {
493
625
  * compile time, this returns an empty string.
494
626
  */
495
627
  function makeFailCall(value, context) {
496
- var silence;
497
- if (context) {
498
- silence = context.getSilence();
499
- } else {
500
- silence = 'false';
501
- }
628
+ var silence = context.getSilence();
502
629
  if (silence === 'true') {
630
+ // This should not be reached in practice.
503
631
  return '';
504
632
  }
505
633
  var expectation = addExpectation(value);
506
634
  var call = language.libraryCall('fail', [expectation]);
507
- if (silence === 'false') {
508
- return call + ';';
509
- } else {
510
- return ['if (!', silence, ') {', call, ';}'].join('');
511
- }
635
+ return `if (!${silence}) { ${call}; }`;
512
636
  }
513
637
 
514
638
  /**
@@ -580,10 +704,9 @@ function generateJavascript(ast, options) {
580
704
  function buildSimplePredicate(node, context) {
581
705
  var result = new Result();
582
706
  var negate = node.type === 'simple_not';
583
- var posReg = allocPosReg();
584
707
  var reg = context.getResultReg(result);
585
- result.block = [`${posReg} = ${language.currPos};`];
586
- var newContext = context.silence().cloneEnv().discard();
708
+ var newContext = context.silence().cloneEnv().discard().discardPos()
709
+ .savePosRefState(node, result);
587
710
  result.append(recurse(node.expression, newContext));
588
711
  if (negate) {
589
712
  result.resolveBlock();
@@ -591,12 +714,18 @@ function generateJavascript(ast, options) {
591
714
  result.onFailure([`${reg} = ${language.failed};`]);
592
715
  }
593
716
  result.onSuccess([`${reg} = ${language.assertionSuccess};`]);
717
+ if (node.isFirstSetTest && context.getSilence() !== 'true') {
718
+ result.onFailure([makeFailCall({
719
+ type: 'other',
720
+ description: node.isFirstSetTest,
721
+ }, context)]);
722
+ }
594
723
  if (negate) {
595
- result.onFailure([`${language.currPos} = ${posReg};`]);
724
+ result.onFailure(newContext.restorePosRefState());
596
725
  } else {
597
- result.onSuccess([`${language.currPos} = ${posReg};`]);
726
+ result.onSuccess(newContext.restorePosRefState());
598
727
  }
599
- freeReg(posReg, result);
728
+ newContext.freePosRefState(result);
600
729
  return result;
601
730
  }
602
731
 
@@ -644,6 +773,25 @@ function generateJavascript(ast, options) {
644
773
  return result;
645
774
  }
646
775
 
776
+ /**
777
+ * Handler for one_or_more of a character class
778
+ */
779
+ function buildRepeatedCharacterClass(node, context, atLeastOne) {
780
+ var result = new Result();
781
+ var reg = context.getResultReg(result);
782
+ var classExpr = classNode.expand(classNode.caseSensitive(node.expression), CHARSET_EXPAND);
783
+ language.matchRepeatedClass(classExpr, reg, result, atLeastOne, context.getDiscard(), context.getDiscardPos());
784
+ result.onFailure([`${reg} = ${language.failed};`]);
785
+ if (context.getSilence() !== 'true') {
786
+ result.onFailure([makeFailCall({
787
+ type: "class",
788
+ value: node.expression.rawText,
789
+ description: node.expression.rawText
790
+ }, context)]);
791
+ }
792
+ return result;
793
+ }
794
+
647
795
  function makeGenerator(node, context) {
648
796
  if (node.type !== 'zero_or_more') {
649
797
  throw new Error('Iterable rules must be a single starred subexpression');
@@ -655,7 +803,7 @@ function generateJavascript(ast, options) {
655
803
  subresult.onSuccess([`yield ${subresult.expression};`]);
656
804
  subresult.onFailure([
657
805
  `if (${language.currPos} < ${language.inputLength}) {`,
658
- indent2( makeFailCall({ type: "end", description: 'end of input' }) ),
806
+ indent2( makeFailCall({ type: "end", description: 'end of input' }, newContext) ),
659
807
  indent2(`throw ${language.libraryCall('buildParseException')};`),
660
808
  '}',
661
809
  'break;'
@@ -673,8 +821,14 @@ function generateJavascript(ast, options) {
673
821
  /**
674
822
  * Get the names of the arguments to the given rule function
675
823
  */
676
- function getRuleArgNames(rule) {
677
- var args = [language.silence];
824
+ function getRuleArgNames(rule, discard) {
825
+ var args;
826
+ if (discard || !rule.reportsFailure) {
827
+ // these rules never report failure, so silence argument isn't needed
828
+ args = [];
829
+ } else {
830
+ args = [language.silence];
831
+ }
678
832
 
679
833
  if (rule.hasBoolParams) {
680
834
  args.push(language.boolParams);
@@ -699,7 +853,7 @@ function generateJavascript(ast, options) {
699
853
  */
700
854
  function getStartArgs(ruleName) {
701
855
  var rule = asts.findRule(ast, ruleName);
702
- var argNames = getRuleArgNames(rule);
856
+ var argNames = getRuleArgNames(rule, false);
703
857
  var args = [];
704
858
 
705
859
  for (let i = 0; i < argNames.length; i++) {
@@ -777,32 +931,16 @@ function generateJavascript(ast, options) {
777
931
  * Get the list of expressions or statements returned by cacheStoreRef() for
778
932
  * refs which may have changed.
779
933
  */
780
- function getCacheStoreRefs(rule) {
781
- var store = {};
782
- for (let name in rule.passedParams) {
783
- if (rule.passedParams[name].type === 'reference') {
784
- store[name] = true;
785
- }
934
+ function getCacheStoreRefs(rule, saved) {
935
+ var regMap = {};
936
+ for (let ref of saved.refs) {
937
+ regMap[ref.name] = ref.reg;
786
938
  }
787
939
  return references.map(function(name) {
788
- return language.cacheStoreRef(name, store[name]);
940
+ return language.cacheStoreRef(regMap[name], name);
789
941
  });
790
942
  }
791
943
 
792
- /**
793
- * Get a block which saves ref values to a temporary variable for later
794
- * comparison in getCacheStoreRefs().
795
- */
796
- function getCacheSaveRefs(rule) {
797
- var parts = [];
798
- for (let name in rule.passedParams) {
799
- if (rule.passedParams[name].type === 'reference') {
800
- parts.push(language.cacheSaveRef(name));
801
- }
802
- }
803
- return parts.join('\n');
804
- }
805
-
806
944
  function expandTemplate(template, vars) {
807
945
  for (let name in vars) {
808
946
  let value = vars[name];
@@ -836,14 +974,35 @@ function generateJavascript(ast, options) {
836
974
  choiceIndex = 0;
837
975
  currentRule = node;
838
976
 
839
- // Generate the Result
977
+ // Set up the Context and save the position and reference state
840
978
  var context = (new Context()).discard(discard);
979
+ if (discard || !node.reportsFailure) {
980
+ context = context.silence();
981
+ }
982
+ var outerContext = context;
983
+ let nodeName = asts.getRuleAttributeValue(node, "name");
984
+ if (nodeName !== undefined) {
985
+ // Named rules suppress failure reporting in their subexpression.
986
+ context = context.silence();
987
+ }
988
+
989
+ var topSaveState = new Result();
990
+ let cacheThisRule = asts.getRuleAttributeValue(node, "cache", options.cache);
991
+ if (!iterable && cacheThisRule) {
992
+ context = context.savePosRefState(node, topSaveState);
993
+ }
994
+
995
+ // Generate the Result
841
996
  var result;
842
997
  if (iterable) {
843
998
  result = makeGenerator(node.expression, context);
844
999
  } else {
845
1000
  result = recurse(node.expression, context);
846
1001
  }
1002
+ if (nodeName !== undefined && outerContext.getSilence() !== 'true') {
1003
+ // Failure in a named rule is reported at this node.
1004
+ result.onFailure([makeFailCall({type: 'other', description: nodeName}, outerContext)]);
1005
+ }
847
1006
  result.resolveBlock();
848
1007
 
849
1008
  // Make the function body
@@ -856,7 +1015,7 @@ function generateJavascript(ast, options) {
856
1015
  } else {
857
1016
  var ruleIndexCode = asts.indexOfRule(ast, node.name);
858
1017
  var cacheBits;
859
- if (options.cache) {
1018
+ if (cacheThisRule) {
860
1019
  var cacheFunc = options.cacheRuleHook || generateCacheRule;
861
1020
  cacheBits = cacheFunc({
862
1021
  startPos: language.currPos,
@@ -869,22 +1028,22 @@ function generateJavascript(ast, options) {
869
1028
  result: result.expression,
870
1029
  params: getParamsForCacheKey(node),
871
1030
  loadRefs: getCacheLoadRefs(node),
872
- saveRefs: getCacheSaveRefs(node),
873
- storeRefs: getCacheStoreRefs(node),
1031
+ storeRefs: getCacheStoreRefs(node, context.getPosRefState()),
1032
+ saveRefs: topSaveState.block.join('\n'),
874
1033
  className: className,
875
1034
  });
876
1035
  body.push(cacheBits.start);
877
1036
  }
878
1037
  body.push(result.block.join('\n'));
879
1038
 
880
- if (options.cache) {
1039
+ if (cacheThisRule) {
881
1040
  body.push(cacheBits.store);
882
1041
  }
883
1042
  body.push(`return ${result.expression};`);
884
1043
  }
885
1044
  body = indent2(body.join('\n'));
886
1045
 
887
- let argNames = getRuleArgNames(node);
1046
+ let argNames = getRuleArgNames(node, discard);
888
1047
  let args = argNames.join(', ');
889
1048
 
890
1049
  // Wrap the function body in a trace decorator if requested.
@@ -907,7 +1066,6 @@ function generateJavascript(ast, options) {
907
1066
 
908
1067
  rule_ref: function(node, context) {
909
1068
  var result = new Result();
910
- var reg = context.getResultReg(result);
911
1069
 
912
1070
  var newParamValues = {};
913
1071
  var boolSetMask = 0;
@@ -948,7 +1106,7 @@ function generateJavascript(ast, options) {
948
1106
  }
949
1107
 
950
1108
  let rule = asts.findRule(ast, node.name);
951
- let argNameList = getRuleArgNames(rule);
1109
+ let argNameList = getRuleArgNames(rule, context.getDiscard());
952
1110
  let args = [];
953
1111
  for (let i = 0; i < argNameList.length; i++) {
954
1112
  let argName = argNameList[i];
@@ -986,15 +1144,12 @@ function generateJavascript(ast, options) {
986
1144
  }
987
1145
 
988
1146
  let funcName = addRule(node.name, context.getDiscard());
989
- result.block = [`${reg} = ${language.ruleFuncCall(funcName, args)};`];
990
- return result;
991
- },
992
-
993
- named: function(node, context) {
994
- var result = new Result();
995
- result.append(recurse(node.expression, context.silence()));
996
- if (context.getSilence() !== 'true') {
997
- result.onFailure([makeFailCall({type: 'other', description: node.name}, context)]);
1147
+ if (context.getDiscard() && node.alwaysMatch) {
1148
+ result.block = [`${language.ruleFuncCall(funcName, args)};`];
1149
+ result.expression = 'true';
1150
+ } else {
1151
+ const reg = context.getResultReg(result);
1152
+ result.block = [`${reg} = ${language.ruleFuncCall(funcName, args)};`];
998
1153
  }
999
1154
  return result;
1000
1155
  },
@@ -1024,16 +1179,27 @@ function generateJavascript(ast, options) {
1024
1179
  action: function(node, context) {
1025
1180
  var result = new Result();
1026
1181
  var reg = context.getResultReg(result);
1027
- var newContext = context.cloneEnv().discard();
1028
- var savedPos = allocPosReg();
1182
+ var newContext = context.cloneEnv().discard().discardPos(false);
1183
+ var saved = context.getPosRefState();
1184
+ var savedPos, freePos = [];
1185
+ if (saved !== null) {
1186
+ savedPos = saved.dst;
1187
+ } else {
1188
+ savedPos = allocPosReg();
1189
+ freePos = [ savedPos ];
1190
+ result.block = [`${savedPos} = ${language.currPos};`];
1191
+ }
1029
1192
  var subresult = recurse(node.expression, newContext);
1030
1193
  var funcId = makeActionFunc(node.code, newContext);
1031
- result.block = [`${savedPos} = ${language.currPos};`];
1032
1194
  result.append(subresult);
1195
+ if (node.alwaysMatch) {
1196
+ result.condition = 'true';
1197
+ }
1033
1198
  result.onSuccess([
1034
1199
  `${language.savedPos} = ${savedPos};`,
1035
1200
  `${reg} = ${makeActionCall(funcId, newContext)};`
1036
1201
  ]);
1202
+ freeReg(freePos, result);
1037
1203
  return result;
1038
1204
  },
1039
1205
 
@@ -1041,43 +1207,72 @@ function generateJavascript(ast, options) {
1041
1207
  if (node.elements.length === 1) {
1042
1208
  return recurse(node.elements[0], context);
1043
1209
  } else {
1044
- var posReg = allocPosReg();
1045
1210
  var result = new Result();
1046
1211
  var resultReg = context.getResultReg(result);
1047
1212
  var label = `seq_${++seqIndex}`;
1048
1213
  result.block = [
1049
1214
  language.blockStart(label),
1050
- `${posReg} = ${language.currPos};`];
1051
- var parts = [], i;
1215
+ ];
1216
+ var parts = [], partFree = [], i;
1217
+ var subcontext;
1052
1218
 
1219
+ if (!node.alwaysMatch) {
1220
+ context = context.savePosRefState(node, result);
1221
+ }
1222
+ subcontext = context;
1053
1223
  for (i = 0; i < node.elements.length; i++) {
1054
- var subresult = recurse(node.elements[i], context.noPassThru());
1055
- subresult.free = [];
1224
+ var isPicked = node.elements[i].picked || node.numPicked === undefined;
1225
+ var ctxt = isPicked ?
1226
+ // if there's only one node picked, use this result register for
1227
+ // the subcontext, otherwise allocate a new result register.
1228
+ (node.numPicked === 1 ? subcontext : subcontext.noPassThru()):
1229
+ // set 'discard' if this subexpression is not picked.
1230
+ subcontext.noPassThru().discard();
1231
+ // Unless this is the last element, we need the position
1232
+ // (last element inherits the parent's need)
1233
+ if (i < node.elements.length - 1) {
1234
+ ctxt = ctxt.discardPos(false);
1235
+ }
1236
+ var subresult = recurse(node.elements[i], ctxt);
1237
+ if (isPicked) {
1238
+ partFree.push(...subresult.free);
1239
+ subresult.free = [];
1240
+ parts.push(subresult.expression);
1241
+ }
1056
1242
  result.append(subresult);
1057
- parts.push(subresult.expression);
1058
1243
 
1059
- // On failure, backtrack to the start of the sequence. If this is
1060
- // the first element of the sequence, it's not necessary to backtrack
1061
- // since failing subexpressions do not increment the position.
1062
- if (i > 0) {
1063
- result.onFailure([`${language.currPos} = ${posReg};`]);
1244
+ if (i === 0) {
1245
+ // After the first element, it's not safe to reuse parent pos/ref
1246
+ // state any more.
1247
+ subcontext = subcontext.resetPosRefState();
1248
+ }
1249
+ if (!(node.alwaysMatch || node.elements[i].alwaysMatch)) {
1250
+ if (i > 0) {
1251
+ // On failure, backtrack to the start of the sequence. If this is
1252
+ // the first element of the sequence, it's not necessary to backtrack
1253
+ // since failing subexpressions do not increment the position
1254
+ // nor affect reference parameters
1255
+ result.onFailure(context.restorePosRefState());
1256
+ }
1257
+ // On failure, set the result register and exit the sequence
1258
+ result.onFailure([
1259
+ `${resultReg} = ${language.failed};`,
1260
+ language.gotoBlockEnd(label)
1261
+ ]);
1064
1262
  }
1065
- // On failure, set the result register and exit the sequence
1066
- result.onFailure([
1067
- `${resultReg} = ${language.failed};`,
1068
- language.gotoBlockEnd(label)
1069
- ]);
1070
1263
  result.resolveBlock();
1071
1264
  }
1072
- if (context.getDiscard()) {
1073
- result.block.push(`${resultReg} = true;`);
1074
- } else {
1075
- result.block.push(`${resultReg} = [${parts.join(',')}];`);
1265
+ if (node.numPicked !== 1) {
1266
+ if (context.getDiscard()) {
1267
+ result.block.push(`${resultReg} = true;`);
1268
+ } else {
1269
+ result.block.push(`${resultReg} = [${parts.join(',')}];`);
1270
+ }
1076
1271
  }
1077
1272
  result.block.push(language.blockEnd(label));
1078
1273
  result.expression = resultReg;
1079
- freeReg(result.free.concat([posReg]), result);
1080
- result.free = [];
1274
+ freeReg(partFree, result);
1275
+ context.freePosRefState(result);
1081
1276
  return result;
1082
1277
  }
1083
1278
  },
@@ -1106,11 +1301,16 @@ function generateJavascript(ast, options) {
1106
1301
  },
1107
1302
 
1108
1303
  text: function(node, context) {
1304
+ if (context.getDiscard()) {
1305
+ // text does nothing if we're already discarding
1306
+ return recurse(node.expression, context.cloneEnv());
1307
+ }
1109
1308
  var startPos = allocPosReg();
1110
1309
  var result = new Result();
1111
1310
  var reg = context.getResultReg(result);
1112
1311
  result.block = [`${startPos} = ${language.currPos};`];
1113
- result.append(recurse(node.expression, context.cloneEnv().discard()));
1312
+ // Note that here we are 'discarding' but not 'silenced'
1313
+ result.append(recurse(node.expression, context.cloneEnv().discard().discardPos(false)));
1114
1314
  result.onSuccess([
1115
1315
  `${reg} = ${language.inputSubstring(startPos, language.currPos)};`
1116
1316
  ]);
@@ -1136,6 +1336,10 @@ function generateJavascript(ast, options) {
1136
1336
  },
1137
1337
 
1138
1338
  zero_or_more: function(node, context) {
1339
+ // Special case zero_or_more of a character class
1340
+ if (node.expression.type === 'class') {
1341
+ return buildRepeatedCharacterClass(node, context, false);
1342
+ }
1139
1343
  // Pseudocode for the non-discard case:
1140
1344
  //
1141
1345
  // let r1 = [];
@@ -1161,7 +1365,7 @@ function generateJavascript(ast, options) {
1161
1365
  var result = new Result();
1162
1366
  var resultReg = context.getResultReg(result);
1163
1367
  var partReg = allocReg([]);
1164
- var newContext = context.resultReg(partReg).cloneEnv();
1368
+ var newContext = context.resultReg(partReg).cloneEnv().discardPos(false).resetPosRefState();
1165
1369
  var subresult = recurse(node.expression, newContext);
1166
1370
  if (!context.getDiscard()) {
1167
1371
  result.block.push(`${resultReg} = [];`);
@@ -1185,6 +1389,10 @@ function generateJavascript(ast, options) {
1185
1389
  },
1186
1390
 
1187
1391
  one_or_more: function(node, context) {
1392
+ // Special case one_or_more of a character class
1393
+ if (node.expression.type === 'class') {
1394
+ return buildRepeatedCharacterClass(node, context, true);
1395
+ }
1188
1396
  // Pseudocode for the non-discard case:
1189
1397
  //
1190
1398
  // let r1 = [];
@@ -1217,7 +1425,7 @@ function generateJavascript(ast, options) {
1217
1425
  var initialFree = result.free;
1218
1426
  result.free = [];
1219
1427
  var partReg = allocReg([]);
1220
- var newContext = context.resultReg(partReg).cloneEnv();
1428
+ var newContext = context.resultReg(partReg).cloneEnv().discardPos(false).resetPosRefState();
1221
1429
  var subresult = recurse(node.expression, newContext);
1222
1430
  if (!context.getDiscard()) {
1223
1431
  result.block.push(`${resultReg} = [];`);
@@ -1258,13 +1466,13 @@ function generateJavascript(ast, options) {
1258
1466
  var result = new Result();
1259
1467
  // Special case: empty string always matches
1260
1468
  if (node.value.length === 0) {
1261
- result.expression = "''";
1469
+ result.expression = context.getDiscard() ? "true" : "''";
1262
1470
  result.condition = 'true';
1263
1471
  return result;
1264
1472
  }
1265
1473
 
1266
1474
  var reg = context.getResultReg(result);
1267
- language.matchLiteral(node, reg, result);
1475
+ language.matchLiteral(node, reg, result, context.getDiscard(), context.getDiscardPos());
1268
1476
  if (context.getSilence() !== 'true') {
1269
1477
  result.onFailure([
1270
1478
  makeFailCall({
@@ -1277,10 +1485,11 @@ function generateJavascript(ast, options) {
1277
1485
  return result;
1278
1486
  },
1279
1487
 
1280
- "class": function(node, context) {
1488
+ class: function(node, context) {
1281
1489
  var result = new Result();
1282
1490
  var reg = context.getResultReg(result);
1283
- language.matchClass(node, reg, result);
1491
+ var classExpr = classNode.expand(classNode.caseSensitive(node), CHARSET_EXPAND);
1492
+ language.matchClass(classExpr, reg, result, context.getDiscard(), context.getDiscardPos());
1284
1493
  result.onFailure([`${reg} = ${language.failed};`]);
1285
1494
  if (context.getSilence() !== 'true') {
1286
1495
  result.onFailure([makeFailCall({
@@ -1296,7 +1505,14 @@ function generateJavascript(ast, options) {
1296
1505
  var result = new Result();
1297
1506
  var reg = context.getResultReg(result);
1298
1507
  result.condition = `${language.currPos} < ${language.inputLength}`;
1299
- result.onSuccess([`${reg} = ${language.consumeInputChar};`]);
1508
+ if (context.getDiscard()) {
1509
+ if (!context.getDiscardPos()) {
1510
+ result.onSuccess([`${language.advanceInputChar};`]);
1511
+ }
1512
+ result.onSuccess([`${reg} = true;`]);
1513
+ } else {
1514
+ result.onSuccess([`${reg} = ${language.consumeInputChar};`]);
1515
+ }
1300
1516
  result.onFailure([`${reg} = ${language.failed};`]);
1301
1517
  if (context.getSilence() !== 'true') {
1302
1518
  result.onFailure([makeFailCall({