wikipeg 4.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/HISTORY.md +556 -0
  2. package/README.md +230 -12
  3. package/VERSION +1 -1
  4. package/bin/wikipeg +8 -4
  5. package/examples/css.pegphp +9 -8
  6. package/lib/compiler/asts.js +30 -10
  7. package/lib/compiler/charsets.js +306 -0
  8. package/lib/compiler/language/javascript.js +107 -33
  9. package/lib/compiler/language/php.js +193 -55
  10. package/lib/compiler/passes/analyze-always-match.js +141 -0
  11. package/lib/compiler/passes/analyze-first.js +245 -0
  12. package/lib/compiler/passes/ast-to-code.js +316 -100
  13. package/lib/compiler/passes/inline-simple-rules.js +96 -0
  14. package/lib/compiler/passes/optimize-character-class.js +147 -0
  15. package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
  16. package/lib/compiler/passes/remove-proxy-rules.js +7 -5
  17. package/lib/compiler/passes/report-infinite-loops.js +4 -1
  18. package/lib/compiler/passes/report-left-recursion.js +3 -4
  19. package/lib/compiler/passes/report-unknown-attributes.js +39 -0
  20. package/lib/compiler/passes/transform-common-lang.js +1 -1
  21. package/lib/compiler/traverser.js +1 -2
  22. package/lib/compiler/visitor.js +5 -7
  23. package/lib/compiler.js +24 -10
  24. package/lib/parser.js +2784 -3088
  25. package/lib/peg.js +7 -15
  26. package/lib/runtime/template.js +9 -1
  27. package/lib/utils/CaseFolding.txt +1654 -0
  28. package/lib/utils/arrays.js +0 -72
  29. package/lib/utils/casefold.js +697 -0
  30. package/lib/utils/objects.js +9 -39
  31. package/lib/utils/unicode.js +34 -0
  32. package/package.json +6 -4
  33. package/src/DefaultTracer.php +18 -18
  34. package/src/PEGParserBase.php +53 -28
  35. package/src/SyntaxError.php +4 -4
  36. package/src/Tracer.php +1 -1
  37. package/lib/compiler/opcodes.js +0 -54
@@ -0,0 +1,96 @@
1
+ "use strict";
2
+
3
+ var visitor = require("../visitor"),
4
+ asts = require("../asts");
5
+
6
+ /*
7
+ * Inline simple rules, like character classes and string constants.
8
+ */
9
+ function inlineSimpleRules(ast, options) {
10
+
11
+ const startRules = options.allowedStartRules.concat(options.allowedStreamRules);
12
+
13
+ function isSimpleRule(rule) {
14
+ function isClassOrLiteral(node) {
15
+ return node.type === 'class' || node.type === 'literal';
16
+ }
17
+
18
+ function isRepeatedClassOrLiteral(node) {
19
+ return (node.type === 'zero_or_more' || node.type === 'one_or_more') &&
20
+ isClassOrLiteral(node.expression);
21
+ }
22
+
23
+ function isTextRepeatedClassOrLiteral(node) {
24
+ return node.type === 'text' && (
25
+ isClassOrLiteral(node.expression) ||
26
+ isRepeatedClassOrLiteral(node.expression)
27
+ );
28
+ }
29
+
30
+ // By default don't inline rules which have attributes
31
+ // (other than 'inline')
32
+ if ((rule.attributes || []).some((attr) => attr.name !== 'inline')) {
33
+ return false;
34
+ }
35
+ return isClassOrLiteral(rule.expression) ||
36
+ isRepeatedClassOrLiteral(rule.expression) ||
37
+ isTextRepeatedClassOrLiteral(rule.expression);
38
+ }
39
+
40
+ function isInlineRule(rule) {
41
+ let inline = asts.getRuleAttributeValue(rule, "inline", undefined);
42
+ if (inline !== undefined) {
43
+ return inline;
44
+ }
45
+ if (options.noInlining) {
46
+ return false;
47
+ }
48
+ return isSimpleRule(rule);
49
+ }
50
+
51
+ function replaceRuleRefs(ast, from, toNode) {
52
+ var removedAll = true;
53
+ var replace = visitor.build({
54
+ rule_ref: function(node) {
55
+ if (node.name === from) {
56
+ // skip inlining if this reference has parameters
57
+ if (node.assignments.length > 0) {
58
+ removedAll = false;
59
+ return;
60
+ }
61
+ const wasPicked = node.picked;
62
+ // Remove all properties of node
63
+ Object.keys(node).forEach((name) => delete node[name]);
64
+ // Deep-copy all properties from toNode
65
+ Object.keys(toNode).forEach((name) => {
66
+ // A poor man's clone
67
+ node[name] = JSON.parse(JSON.stringify(toNode[name]));
68
+ });
69
+ if (wasPicked) {
70
+ node.picked = wasPicked;
71
+ }
72
+ }
73
+ }
74
+ });
75
+
76
+ replace(ast);
77
+ return removedAll;
78
+ }
79
+
80
+ var indices = [];
81
+
82
+ ast.rules.forEach( function(rule, i) {
83
+ if (isInlineRule(rule)) {
84
+ var removedAll = replaceRuleRefs(ast, rule.name, rule.expression);
85
+ if (removedAll && !startRules.includes(rule.name)) {
86
+ indices.push(i);
87
+ }
88
+ }
89
+ });
90
+
91
+ indices.reverse();
92
+
93
+ indices.forEach((i) => ast.rules.splice(i, 1));
94
+ }
95
+
96
+ module.exports = inlineSimpleRules;
@@ -0,0 +1,147 @@
1
+ "use strict";
2
+ /*eslint no-unused-vars: ["error", { "argsIgnorePattern": "^_" }] */
3
+
4
+ var visitor = require("../visitor"),
5
+ classNode = require("../charsets").classNode,
6
+ objects = require("../../utils/objects");
7
+
8
+ // Optimize character classes:
9
+ // [...] / [...] => union
10
+ // ![...] [...] => subtraction
11
+
12
+ function optimizeCharacterClass(ast, options) {
13
+ options = options || {};
14
+
15
+ if (options.noOptimizeCharacterClass) {
16
+ return;
17
+ }
18
+
19
+ function isSingleChar(node) {
20
+ return node.type === 'class' ||
21
+ node.type === 'any' ||
22
+ (node.type === 'literal' && node.value.length === 1);
23
+ }
24
+
25
+ function toDesc(node) {
26
+ if (node.type === 'class') {
27
+ return node.rawText;
28
+ } else if (node.type === 'any') {
29
+ return ".";
30
+ } else if (node.type === 'literal') {
31
+ return "\"" + node.value + "\"";
32
+ } else {
33
+ throw new Error("unreachable");
34
+ }
35
+ }
36
+
37
+ function toList(arr) {
38
+ return (arr.length > 1) ?
39
+ (arr.slice(0, -1).join(", ") + " or " + arr[arr.length - 1]) :
40
+ arr[0];
41
+ }
42
+
43
+ function toCharset(node) {
44
+ if (node.type === 'class') {
45
+ return classNode.sort(objects.clone(node));
46
+ } else if (node.type === 'any') {
47
+ return classNode.any();
48
+ } else if (node.type === 'literal') {
49
+ return classNode.sort({
50
+ type: "class",
51
+ parts: [node.value],
52
+ inverted: false,
53
+ ignoreCase: node.ignoreCase,
54
+ });
55
+ }
56
+ throw new Error('unreachable');
57
+ }
58
+
59
+ const checkCharacterClass = visitor.build({
60
+ choice: function(node) {
61
+ // First optimize children.
62
+ node.alternatives.forEach((alt) => checkCharacterClass(alt));
63
+ // Check for [...] / [... ]
64
+ if (node.alternatives.every((alt) => isSingleChar(alt))) {
65
+ let newClass = node.alternatives.reduce(
66
+ (acc, alt) => classNode.union(acc, toCharset(alt)),
67
+ classNode.empty()
68
+ );
69
+ let newDesc = toList(node.alternatives.map((alt) => toDesc(alt)));
70
+ // Remove all properties of node
71
+ Object.keys(node).forEach((name) => {
72
+ if (name !== 'location' && name !== 'picked') {
73
+ delete node[name];
74
+ }
75
+ });
76
+ // Create a new class
77
+ Object.assign(node, newClass);
78
+ node.rawText = newDesc;
79
+ return;
80
+ }
81
+ },
82
+ sequence: function(node) {
83
+ // First optimize children.
84
+ node.elements.forEach((el) => checkCharacterClass(el));
85
+ // Check for ![...] [...]
86
+ let removed = 0;
87
+ for (let i = node.elements.length - 2; i >= 0; i--) {
88
+ let next = i + 1;
89
+ while (node.elements[next].optimizeCharClass) {
90
+ // Skip over synthetic nodes previously added.
91
+ next++;
92
+ }
93
+ if (node.elements[i].type === 'simple_not' &&
94
+ !node.elements[i].picked &&
95
+ isSingleChar(node.elements[i].expression) &&
96
+ isSingleChar(node.elements[next])) {
97
+ let newClass = classNode.subtract(
98
+ toCharset(node.elements[next]),
99
+ toCharset(node.elements[i].expression)
100
+ );
101
+ let newDesc =
102
+ toDesc(node.elements[next]) + ' but not ' + toDesc(node.elements[i].expression);
103
+ let newPicked = node.elements[next].picked;
104
+ // create a trivial node just to ensure the same # of elements
105
+ // in the sequence.
106
+ let newTest = {
107
+ type: 'simple_and',
108
+ location: node.elements[i].location,
109
+ expression: {
110
+ type: 'literal',
111
+ value: '',
112
+ location: node.elements[i].location,
113
+ },
114
+ optimizeCharClass: true, // mark synthetic node
115
+ };
116
+ let newChar = Object.assign({
117
+ location: node.elements[next].location,
118
+ rawText: newDesc,
119
+ picked: newPicked,
120
+ }, newClass);
121
+ node.elements[i] = newTest;
122
+ node.elements[next] = newChar;
123
+ removed++;
124
+ }
125
+ }
126
+ // optimize sequence if picked to allow repeated char class
127
+ let last = node.elements[node.elements.length - 1];
128
+ if (node.elements.length === (removed + 1) && last.picked) {
129
+ // Remove all properties of node.
130
+ Object.keys(node).forEach((name) => {
131
+ if (name !== 'location' && name !== 'picked') {
132
+ delete node[name];
133
+ }
134
+ });
135
+ // Create a new class
136
+ delete last.picked;
137
+ delete last.location;
138
+ Object.assign(node, last);
139
+ return;
140
+ }
141
+ },
142
+ });
143
+
144
+ checkCharacterClass(ast);
145
+ }
146
+
147
+ module.exports = optimizeCharacterClass;
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+
3
+ var visitor = require("../visitor"),
4
+ asts = require("../asts");
5
+
6
+ // Find rules that never report failures and silence them.
7
+ // 1. A start rule always reports failure.
8
+ // 2. A non-start rule does not report failure if every caller is either:
9
+ // A) a named rule
10
+ // (this is because named rules manually report failure, attributing
11
+ // the failure to the named rule instead of its children), or
12
+ // B) inside an assertion (and (&) or not (!) expressions), or
13
+ // C) (transitively) a rule which does not report failure
14
+
15
+ function optimizeFailureReporting(ast, options) {
16
+
17
+ const startRules = options.allowedStartRules.concat(options.allowedStreamRules);
18
+
19
+ // Disable failure reporting for rules by default, and find rules for which
20
+ // conditions 2A, 2B, or 2C are false.
21
+ ast.rules.forEach( (node) => {
22
+ node.reportsFailure = false;
23
+ });
24
+
25
+ // Enable failure reporting for start rules (condition 1)
26
+ const failReportingRules = startRules.map((name) =>
27
+ asts.findRule( ast, name )
28
+ );
29
+
30
+ // Selectively enable failure reporting for rules in start rules' call graph
31
+ const skipChildren = () => {};
32
+ const check = visitor.build ({
33
+ rule: function(node) {
34
+ node.reportsFailure = true;
35
+ let named = asts.getRuleAttributeValue(node, "name");
36
+ if (named !== undefined) {
37
+ // Break AST traversing because failure reporting is disabled in a
38
+ // named rule (2A)
39
+ return;
40
+ }
41
+ // We are visiting this rule because we've found that it may report
42
+ // failures. All rules referenced may also report failures.
43
+ check(node.expression);
44
+ },
45
+
46
+ // Never reports failure, so break AST traversing (2B)
47
+ simple_and: skipChildren,
48
+ simple_not: skipChildren,
49
+
50
+ rule_ref: function(node) {
51
+ const rule = asts.findRule(ast, node.name);
52
+ // This function is only reached when the parent rule reports failures.
53
+ // Recheck all rules called by the referenced rule (2C)
54
+ if (!rule.reportsFailure) {
55
+ failReportingRules.push( rule );
56
+ }
57
+ },
58
+ });
59
+
60
+ while (failReportingRules.length) {
61
+ check(failReportingRules.shift());
62
+ }
63
+ }
64
+
65
+ module.exports = optimizeFailureReporting;
@@ -1,12 +1,14 @@
1
1
  "use strict";
2
2
 
3
- var arrays = require("../../utils/arrays"),
4
- visitor = require("../visitor");
3
+ var visitor = require("../visitor");
5
4
 
6
5
  /*
7
6
  * Removes proxy rules -- that is, rules that only delegate to other rule.
8
7
  */
9
8
  function removeProxyRules(ast, options) {
9
+
10
+ const startRules = options.allowedStartRules.concat(options.allowedStreamRules);
11
+
10
12
  function isProxyRule(node) {
11
13
  return node.type === "rule"
12
14
  && node.expression.type === "rule_ref"
@@ -27,10 +29,10 @@ function removeProxyRules(ast, options) {
27
29
 
28
30
  var indices = [];
29
31
 
30
- arrays.each(ast.rules, function(rule, i) {
32
+ ast.rules.forEach( function(rule, i) {
31
33
  if (isProxyRule(rule)) {
32
34
  replaceRuleRefs(ast, rule.name, rule.expression.name);
33
- if (!arrays.contains(options.allowedStartRules, rule.name)) {
35
+ if (!startRules.includes(rule.name)) {
34
36
  indices.push(i);
35
37
  }
36
38
  }
@@ -38,7 +40,7 @@ function removeProxyRules(ast, options) {
38
40
 
39
41
  indices.reverse();
40
42
 
41
- arrays.each(indices, function(i) { ast.rules.splice(i, 1); });
43
+ indices.forEach( (i) => { ast.rules.splice(i, 1); });
42
44
  }
43
45
 
44
46
  module.exports = removeProxyRules;
@@ -8,7 +8,10 @@ var GrammarError = require("../../grammar-error"),
8
8
  * Reports expressions that don't consume any input inside |*| or |+| in the
9
9
  * grammar, which prevents infinite loops in the generated parser.
10
10
  */
11
- function reportInfiniteLoops(ast) {
11
+ function reportInfiniteLoops(ast, options) {
12
+ if (options && options.allowLoops) {
13
+ return;
14
+ }
12
15
  var check = visitor.build({
13
16
  zero_or_more: function(node) {
14
17
  if (asts.matchesEmpty(ast, node.expression)) {
@@ -1,7 +1,6 @@
1
1
  "use strict";
2
2
 
3
- var arrays = require("../../utils/arrays"),
4
- GrammarError = require("../../grammar-error"),
3
+ var GrammarError = require("../../grammar-error"),
5
4
  asts = require("../asts"),
6
5
  visitor = require("../visitor");
7
6
 
@@ -24,7 +23,7 @@ function reportLeftRecursion(ast) {
24
23
  },
25
24
 
26
25
  sequence: function(node, visitedRules) {
27
- arrays.every(node.elements, function(element) {
26
+ node.elements.every( (element) => {
28
27
  if (element.type === "rule_ref") {
29
28
  check(element, visitedRules);
30
29
  }
@@ -34,7 +33,7 @@ function reportLeftRecursion(ast) {
34
33
  },
35
34
 
36
35
  rule_ref: function(node, visitedRules) {
37
- if (arrays.contains(visitedRules, node.name)) {
36
+ if (visitedRules.includes(node.name)) {
38
37
  throw new GrammarError(
39
38
  "Left recursion detected for rule \"" + node.name + "\".",
40
39
  node.location
@@ -0,0 +1,39 @@
1
+ "use strict";
2
+
3
+ var GrammarError = require("../../grammar-error"),
4
+ visitor = require("../visitor");
5
+
6
+ /* Checks that all rule attributes are known. (Catches typos.) */
7
+ function reportUnknownAttributes(ast) {
8
+ const KNOWN_ATTRIBUTES = {
9
+ name: 'string',
10
+ inline: 'boolean',
11
+ cache: 'boolean',
12
+ empty: 'boolean',
13
+ unreachable: 'boolean',
14
+ };
15
+
16
+ var check = visitor.build({
17
+ rule: function(rule) {
18
+ for (const attr of rule.attributes || []) {
19
+ if (!KNOWN_ATTRIBUTES[attr.name]) {
20
+ throw new GrammarError(
21
+ "Rule \"" + rule.name + "\" contains unknown attribute \"" + attr.name + "\".",
22
+ attr.location
23
+ );
24
+ }
25
+ if (typeof(attr.value) !== KNOWN_ATTRIBUTES[attr.name]) {
26
+ throw new GrammarError(
27
+ "Rule \"" + rule.name + "\" attribute \"" + attr.name + "\" has " +
28
+ typeof(attr.value) + " value but expected " + KNOWN_ATTRIBUTES[attr.name] + ".",
29
+ attr.location
30
+ );
31
+ }
32
+ }
33
+ }
34
+ });
35
+
36
+ check(ast);
37
+ }
38
+
39
+ module.exports = reportUnknownAttributes;
@@ -8,7 +8,7 @@ var visitor = require("../visitor");
8
8
  * Used for testing.
9
9
  */
10
10
  function transformCommonLang(ast, options) {
11
- if (!options.commonLang) {
11
+ if (!(options && options.commonLang)) {
12
12
  return;
13
13
  }
14
14
 
@@ -61,7 +61,6 @@ Traverser.prototype = {
61
61
  defaultHandlers: {
62
62
  initializer: traverseNop,
63
63
  rule: traverseExpression,
64
- named: traverseExpression,
65
64
  choice: traverseChildren("alternatives"),
66
65
  action: traverseExpression,
67
66
  sequence: traverseChildren("elements"),
@@ -79,7 +78,7 @@ Traverser.prototype = {
79
78
  labeled_param: traverseNop,
80
79
  rule_ref: traverseRuleRef,
81
80
  literal: traverseNop,
82
- "class": traverseNop,
81
+ class: traverseNop,
83
82
  any: traverseNop
84
83
  }
85
84
  };
@@ -1,7 +1,6 @@
1
1
  "use strict";
2
2
 
3
- var objects = require("../utils/objects"),
4
- arrays = require("../utils/arrays");
3
+ var objects = require("../utils/objects");
5
4
 
6
5
  /* Simple AST node visitor builder. */
7
6
  var visitor = {
@@ -22,7 +21,7 @@ var visitor = {
22
21
  return function(node) {
23
22
  var extraArgs = Array.prototype.slice.call(arguments, 1);
24
23
 
25
- arrays.each(node[property], function(child) {
24
+ node[property].forEach( (child) => {
26
25
  visit.apply(null, [child].concat(extraArgs));
27
26
  });
28
27
  };
@@ -34,7 +33,7 @@ var visitor = {
34
33
 
35
34
  if (node.initializer) {
36
35
  if (Array.isArray(node.initializer)) {
37
- arrays.each(node.initializer, function(initializer) {
36
+ node.initializer.forEach( (initializer) => {
38
37
  visit.apply(null, [initializer].concat(extraArgs));
39
38
  });
40
39
  } else {
@@ -42,14 +41,13 @@ var visitor = {
42
41
  }
43
42
  }
44
43
 
45
- arrays.each(node.rules, function(rule) {
44
+ node.rules.forEach( (rule) => {
46
45
  visit.apply(null, [rule].concat(extraArgs));
47
46
  });
48
47
  },
49
48
 
50
49
  initializer: visitNop,
51
50
  rule: visitExpression,
52
- named: visitExpression,
53
51
  choice: visitChildren("alternatives"),
54
52
  action: visitExpression,
55
53
  sequence: visitChildren("elements"),
@@ -67,7 +65,7 @@ var visitor = {
67
65
  labeled_param: visitNop,
68
66
  rule_ref: visitNop,
69
67
  literal: visitNop,
70
- "class": visitNop,
68
+ class: visitNop,
71
69
  any: visitNop
72
70
  };
73
71
 
package/lib/compiler.js CHANGED
@@ -1,7 +1,6 @@
1
1
  "use strict";
2
2
 
3
- var arrays = require("./utils/arrays"),
4
- objects = require("./utils/objects");
3
+ const objects = require("./utils/objects");
5
4
 
6
5
  var compiler = {
7
6
  /*
@@ -15,11 +14,20 @@ var compiler = {
15
14
  check: {
16
15
  reportMissingRules: require("./compiler/passes/report-missing-rules"),
17
16
  reportLeftRecursion: require("./compiler/passes/report-left-recursion"),
18
- reportInfiniteLoops: require("./compiler/passes/report-infinite-loops")
17
+ reportInfiniteLoops: require("./compiler/passes/report-infinite-loops"),
18
+ reportUnknownAttributes:
19
+ require("./compiler/passes/report-unknown-attributes")
19
20
  },
20
21
  transform: {
21
22
  removeProxyRules: require("./compiler/passes/remove-proxy-rules"),
23
+ inlineSimpleRules: require("./compiler/passes/inline-simple-rules"),
24
+ optimizeCharacterClass:
25
+ require("./compiler/passes/optimize-character-class"),
22
26
  analyzeParams: require("./compiler/passes/analyze-params"),
27
+ analyzeAlwaysMatch: require("./compiler/passes/analyze-always-match"),
28
+ analyzeFirst: require("./compiler/passes/analyze-first"),
29
+ optimizeFailureReporting:
30
+ require("./compiler/passes/optimize-failure-reporting"),
23
31
  transformCommonLang: require("./compiler/passes/transform-common-lang"),
24
32
  },
25
33
  generate: {
@@ -34,8 +42,7 @@ var compiler = {
34
42
  * cause its malfunction.
35
43
  */
36
44
  compile: function(ast, passes) {
37
- var options = arguments.length > 2 ? objects.clone(arguments[2]) : {},
38
- stage;
45
+ let options = arguments.length > 2 ? objects.clone(arguments[2]) : {};
39
46
 
40
47
  objects.defaults(options, {
41
48
  allowedStartRules: [ast.rules[0].name],
@@ -43,14 +50,21 @@ var compiler = {
43
50
  cache: false,
44
51
  trace: false,
45
52
  optimize: "speed",
46
- output: "parser"
53
+ output: "parser",
47
54
  });
48
55
 
49
- for (stage in passes) {
50
- if (passes.hasOwnProperty(stage)) {
51
- arrays.each(passes[stage], function(p) { p(ast, options); });
56
+ // Ensure the 'caseless restrict' setting matches the compiler options
57
+ require("./compiler/charsets").classNode.setCaselessRestrict(
58
+ options.caselessRestrict || false
59
+ );
60
+
61
+ Object.getOwnPropertyNames(passes).forEach((stage) => {
62
+ let stagePasses = passes[stage];
63
+ if (typeof(stagePasses) === 'object') {
64
+ stagePasses = Object.values(stagePasses);
52
65
  }
53
- }
66
+ stagePasses.forEach( (p) => { p(ast, options); });
67
+ });
54
68
 
55
69
  switch (options.output) {
56
70
  case "parser": return eval(ast.code);