wikipeg 4.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/HISTORY.md +556 -0
  2. package/README.md +230 -12
  3. package/VERSION +1 -1
  4. package/bin/wikipeg +8 -4
  5. package/examples/css.pegphp +9 -8
  6. package/lib/compiler/asts.js +30 -10
  7. package/lib/compiler/charsets.js +306 -0
  8. package/lib/compiler/language/javascript.js +107 -33
  9. package/lib/compiler/language/php.js +193 -55
  10. package/lib/compiler/passes/analyze-always-match.js +141 -0
  11. package/lib/compiler/passes/analyze-first.js +245 -0
  12. package/lib/compiler/passes/ast-to-code.js +316 -100
  13. package/lib/compiler/passes/inline-simple-rules.js +96 -0
  14. package/lib/compiler/passes/optimize-character-class.js +147 -0
  15. package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
  16. package/lib/compiler/passes/remove-proxy-rules.js +7 -5
  17. package/lib/compiler/passes/report-infinite-loops.js +4 -1
  18. package/lib/compiler/passes/report-left-recursion.js +3 -4
  19. package/lib/compiler/passes/report-unknown-attributes.js +39 -0
  20. package/lib/compiler/passes/transform-common-lang.js +1 -1
  21. package/lib/compiler/traverser.js +1 -2
  22. package/lib/compiler/visitor.js +5 -7
  23. package/lib/compiler.js +24 -10
  24. package/lib/parser.js +2784 -3088
  25. package/lib/peg.js +7 -15
  26. package/lib/runtime/template.js +9 -1
  27. package/lib/utils/CaseFolding.txt +1654 -0
  28. package/lib/utils/arrays.js +0 -72
  29. package/lib/utils/casefold.js +697 -0
  30. package/lib/utils/objects.js +9 -39
  31. package/lib/utils/unicode.js +34 -0
  32. package/package.json +6 -4
  33. package/src/DefaultTracer.php +18 -18
  34. package/src/PEGParserBase.php +53 -28
  35. package/src/SyntaxError.php +4 -4
  36. package/src/Tracer.php +1 -1
  37. package/lib/compiler/opcodes.js +0 -54
@@ -0,0 +1,245 @@
1
+ "use strict";
2
+ /*eslint no-unused-vars: ["error", { "argsIgnorePattern": "^_" }] */
3
+
4
+ var GrammarError = require("../../grammar-error"),
5
+ visitor = require("../visitor"),
6
+ asts = require("../asts"),
7
+ classNode = require("../charsets").classNode,
8
+ objects = require("../../utils/objects");
9
+
10
+ // Find nullable rules; that is, rules which can match the empty string.
11
+ // Nullable rules only contain nullable expressions, either:
12
+ // * an optional (?) expression, or
13
+ // * a zero_or_more (*) expression, or
14
+ // * a rule reference to a nullable rule, or
15
+ // * a sequence containing only nullable expressions, or
16
+ // * a choice containing at least one nullable expression
17
+
18
+ function analyzeFirst(ast, options) {
19
+ options = options || {};
20
+ if (options.noOptimizeFirstSet || options.optimizeFirstSet === 'none') {
21
+ return;
22
+ }
23
+ const dumpAnalysis = options.dumpAnalyzeFirst;
24
+ const optimizeFirstSet = options.optimizeFirstSet || 'call';
25
+
26
+ // Look for nullable rules (can match the empty string)
27
+
28
+ // Cache nullability at every node.
29
+ function cacheNullable(f) {
30
+ return function(node) {
31
+ if (!node.hasOwnProperty('nullable')) {
32
+ node.nullable = f(node);
33
+ }
34
+ return node.nullable;
35
+ };
36
+ }
37
+
38
+ const checkNullable =
39
+ cacheNullable((node) => asts.matchesEmpty(ast, node, cacheNullable));
40
+
41
+ ast.rules.forEach((rule) => checkNullable(rule));
42
+
43
+ // Now compute the FIRST set for each node: the set of characters
44
+ // which can begin a valid match. Perhaps more usefully: if the
45
+ // next character in the input is *not* in FIRST, and the expression is
46
+ // not nullable, the expression is guaranteed to FAIL.
47
+
48
+ function firstIsChild(node) {
49
+ return checkFirst(node.expression);
50
+ }
51
+
52
+ function firstRule(node) {
53
+ if (node.hasOwnProperty('firstSet')) {
54
+ return node.firstSet;
55
+ }
56
+ // To break cycles, mark this rule (conservatively) as having firstSet
57
+ // of 'any' before recursing.
58
+ node.firstSet = classNode.any();
59
+ if (asts.getRuleAttributeValue(node, 'empty') === false) {
60
+ // If we have a manual override that says this rule is not empty,
61
+ // then conservatively assume it can match any character.
62
+ return node.firstSet;
63
+ }
64
+ const first = checkFirst(node.expression);
65
+ node.firstSet = first;
66
+ if (dumpAnalysis) {
67
+ console.error(node.name, `nullable=${node.nullable}`, "first", node.firstSet);
68
+ }
69
+ return first;
70
+ }
71
+
72
+ const checkFirst = visitor.build ({
73
+ rule: firstRule,
74
+
75
+ rule_ref: function(node) {
76
+ const rule = asts.findRule(ast, node.name);
77
+ return checkFirst(rule);
78
+ },
79
+
80
+ choice: function(node) {
81
+ if (node.alternatives.length === 0) {
82
+ throw new GrammarError( "No choices.", node.location );
83
+ }
84
+ let first = checkFirst(node.alternatives[0]);
85
+ for (let i = 1; i < node.alternatives.length; i++) {
86
+ if (classNode.isAny(first)) {
87
+ break; // first set can't grow more than this
88
+ }
89
+ first = classNode.union(first, checkFirst(node.alternatives[i]));
90
+ }
91
+ return first;
92
+ },
93
+
94
+ sequence: function(node) {
95
+ if (node.elements.length === 0) {
96
+ throw new GrammarError( "No sequence.", node.location );
97
+ }
98
+ let first = checkFirst(node.elements[0]);
99
+ let nullable = checkNullable(node.elements[0]);
100
+ let fixups = [];
101
+ for (let i = 1; nullable && i < node.elements.length; i++) {
102
+ if (classNode.isAny(first)) {
103
+ break; // first set can't grow more than this
104
+ }
105
+ if (
106
+ node.elements[i].type === 'simple_and' &&
107
+ !checkNullable(node.elements[i].expression) &&
108
+ i < node.elements.length - 1
109
+ ) {
110
+ // get a bit more clever with simple_and when expression is
111
+ // not nullable.
112
+ let currentFirst = first;
113
+ fixups.push(function(tail) {
114
+ return classNode.union(
115
+ currentFirst,
116
+ classNode.intersection(
117
+ checkFirst(node.elements[i].expression),
118
+ tail
119
+ )
120
+ );
121
+ });
122
+ first = classNode.empty();
123
+ continue;
124
+ }
125
+ first = classNode.union(first, checkFirst(node.elements[i]));
126
+ nullable = checkNullable(node.elements[i]);
127
+ }
128
+ while (fixups.length) {
129
+ let f = fixups.pop();
130
+ first = f(first);
131
+ }
132
+ return first;
133
+ },
134
+
135
+ labeled: firstIsChild,
136
+ text: firstIsChild,
137
+ simple_and: classNode.empty,
138
+ simple_not: classNode.empty,
139
+ action: firstIsChild,
140
+
141
+ optional: firstIsChild,
142
+ zero_or_more: firstIsChild,
143
+ one_or_more: firstIsChild,
144
+
145
+ any: classNode.any,
146
+ class: function(node) {
147
+ // preprocess the node into a sorted firstSet
148
+ return classNode.sort(objects.clone(node));
149
+ },
150
+ literal: function(node) {
151
+ if (node.value.length === 0) {
152
+ return classNode.empty();
153
+ }
154
+ return classNode.sort({
155
+ type: "class",
156
+ parts: [node.value[0]],
157
+ inverted: false,
158
+ ignoreCase: node.ignoreCase,
159
+ });
160
+ },
161
+
162
+ semantic_and: classNode.empty,
163
+ semantic_not: classNode.empty,
164
+ parameter_and: classNode.empty,
165
+ parameter_not: classNode.empty,
166
+ labeled_param: classNode.empty,
167
+ });
168
+
169
+ ast.rules.forEach((rule) => checkFirst(rule));
170
+
171
+ // Now we transform every non-nullable rule_ref to:
172
+ // (&FIRST rule)
173
+ // if first is not ANY
174
+
175
+ const addFirstPrefixesAtCall = visitor.build({
176
+ rule_ref: function(node) {
177
+ const rule = asts.findRule(ast, node.name);
178
+ if (rule.nullable || classNode.isAny(rule.firstSet)) {
179
+ return;
180
+ }
181
+ const location = node.location;
182
+ const newRuleRef = objects.clone(node);
183
+ const wasPicked = node.picked;
184
+ newRuleRef.picked = true;
185
+ // Remove all properties of node
186
+ Object.keys(node).forEach((name) => delete node[name]);
187
+ Object.assign(node, {
188
+ type: "sequence",
189
+ location: location,
190
+ elements: [
191
+ {
192
+ type: "simple_and",
193
+ location: location,
194
+ expression: Object.assign({
195
+ location: location,
196
+ }, rule.firstSet),
197
+ isFirstSetTest: rule.name,
198
+ },
199
+ newRuleRef,
200
+ ],
201
+ picked: wasPicked,
202
+ numPicked: 1,
203
+ });
204
+ },
205
+ });
206
+
207
+ const addFirstPrefixesAtDef = visitor.build({
208
+ rule: function(rule) {
209
+ if (rule.nullable || classNode.isAny(rule.firstSet)) {
210
+ return;
211
+ }
212
+ const location = rule.location;
213
+ const oldExpr = rule.expression;
214
+ const wasPicked = oldExpr.picked;
215
+ oldExpr.picked = true;
216
+ rule.expression = {
217
+ type: "sequence",
218
+ location: location,
219
+ elements: [
220
+ {
221
+ type: "simple_and",
222
+ location: location,
223
+ expression: Object.assign({
224
+ location: location,
225
+ }, rule.firstSet),
226
+ isFirstSetTest: rule.name,
227
+ },
228
+ oldExpr,
229
+ ],
230
+ picked: wasPicked,
231
+ numPicked: 1,
232
+ };
233
+ },
234
+ });
235
+
236
+ if (optimizeFirstSet === 'def') {
237
+ addFirstPrefixesAtDef(ast);
238
+ } else if (optimizeFirstSet === 'call') {
239
+ addFirstPrefixesAtCall(ast);
240
+ } else {
241
+ // don't optimize at all, just analyze
242
+ }
243
+ }
244
+
245
+ module.exports = analyzeFirst;