wikipeg 4.0.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HISTORY.md +556 -0
- package/README.md +230 -12
- package/VERSION +1 -1
- package/bin/wikipeg +8 -4
- package/examples/css.pegphp +9 -8
- package/lib/compiler/asts.js +30 -10
- package/lib/compiler/charsets.js +306 -0
- package/lib/compiler/language/javascript.js +107 -33
- package/lib/compiler/language/php.js +193 -55
- package/lib/compiler/passes/analyze-always-match.js +141 -0
- package/lib/compiler/passes/analyze-first.js +245 -0
- package/lib/compiler/passes/ast-to-code.js +316 -100
- package/lib/compiler/passes/inline-simple-rules.js +96 -0
- package/lib/compiler/passes/optimize-character-class.js +147 -0
- package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
- package/lib/compiler/passes/remove-proxy-rules.js +7 -5
- package/lib/compiler/passes/report-infinite-loops.js +4 -1
- package/lib/compiler/passes/report-left-recursion.js +3 -4
- package/lib/compiler/passes/report-unknown-attributes.js +39 -0
- package/lib/compiler/passes/transform-common-lang.js +1 -1
- package/lib/compiler/traverser.js +1 -2
- package/lib/compiler/visitor.js +5 -7
- package/lib/compiler.js +24 -10
- package/lib/parser.js +2784 -3088
- package/lib/peg.js +7 -15
- package/lib/runtime/template.js +9 -1
- package/lib/utils/CaseFolding.txt +1654 -0
- package/lib/utils/arrays.js +0 -72
- package/lib/utils/casefold.js +697 -0
- package/lib/utils/objects.js +9 -39
- package/lib/utils/unicode.js +34 -0
- package/package.json +6 -4
- package/src/DefaultTracer.php +18 -18
- package/src/PEGParserBase.php +53 -28
- package/src/SyntaxError.php +4 -4
- package/src/Tracer.php +1 -1
- package/lib/compiler/opcodes.js +0 -54
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*eslint no-unused-vars: ["error", { "argsIgnorePattern": "^_" }] */
|
|
3
|
+
|
|
4
|
+
var GrammarError = require("../../grammar-error"),
|
|
5
|
+
visitor = require("../visitor"),
|
|
6
|
+
asts = require("../asts"),
|
|
7
|
+
classNode = require("../charsets").classNode,
|
|
8
|
+
objects = require("../../utils/objects");
|
|
9
|
+
|
|
10
|
+
// Find nullable rules; that is, rules which can match the empty string.
|
|
11
|
+
// Nullable rules only contain nullable expressions, either:
|
|
12
|
+
// * an optional (?) expression, or
|
|
13
|
+
// * a zero_or_more (*) expression, or
|
|
14
|
+
// * a rule reference to a nullable rule, or
|
|
15
|
+
// * a sequence containing only nullable expressions, or
|
|
16
|
+
// * a choice containing at least one nullable expression
|
|
17
|
+
|
|
18
|
+
function analyzeFirst(ast, options) {
|
|
19
|
+
options = options || {};
|
|
20
|
+
if (options.noOptimizeFirstSet || options.optimizeFirstSet === 'none') {
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
const dumpAnalysis = options.dumpAnalyzeFirst;
|
|
24
|
+
const optimizeFirstSet = options.optimizeFirstSet || 'call';
|
|
25
|
+
|
|
26
|
+
// Look for nullable rules (can match the empty string)
|
|
27
|
+
|
|
28
|
+
// Cache nullability at every node.
|
|
29
|
+
function cacheNullable(f) {
|
|
30
|
+
return function(node) {
|
|
31
|
+
if (!node.hasOwnProperty('nullable')) {
|
|
32
|
+
node.nullable = f(node);
|
|
33
|
+
}
|
|
34
|
+
return node.nullable;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const checkNullable =
|
|
39
|
+
cacheNullable((node) => asts.matchesEmpty(ast, node, cacheNullable));
|
|
40
|
+
|
|
41
|
+
ast.rules.forEach((rule) => checkNullable(rule));
|
|
42
|
+
|
|
43
|
+
// Now compute the FIRST set for each node: the set of characters
|
|
44
|
+
// which can begin a valid match. Perhaps more usefully: if the
|
|
45
|
+
// next character in the input is *not* in FIRST, and the expression is
|
|
46
|
+
// not nullable, the expression is guaranteed to FAIL.
|
|
47
|
+
|
|
48
|
+
function firstIsChild(node) {
|
|
49
|
+
return checkFirst(node.expression);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function firstRule(node) {
|
|
53
|
+
if (node.hasOwnProperty('firstSet')) {
|
|
54
|
+
return node.firstSet;
|
|
55
|
+
}
|
|
56
|
+
// To break cycles, mark this rule (conservatively) as having firstSet
|
|
57
|
+
// of 'any' before recursing.
|
|
58
|
+
node.firstSet = classNode.any();
|
|
59
|
+
if (asts.getRuleAttributeValue(node, 'empty') === false) {
|
|
60
|
+
// If we have a manual override that says this rule is not empty,
|
|
61
|
+
// then conservatively assume it can match any character.
|
|
62
|
+
return node.firstSet;
|
|
63
|
+
}
|
|
64
|
+
const first = checkFirst(node.expression);
|
|
65
|
+
node.firstSet = first;
|
|
66
|
+
if (dumpAnalysis) {
|
|
67
|
+
console.error(node.name, `nullable=${node.nullable}`, "first", node.firstSet);
|
|
68
|
+
}
|
|
69
|
+
return first;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const checkFirst = visitor.build ({
|
|
73
|
+
rule: firstRule,
|
|
74
|
+
|
|
75
|
+
rule_ref: function(node) {
|
|
76
|
+
const rule = asts.findRule(ast, node.name);
|
|
77
|
+
return checkFirst(rule);
|
|
78
|
+
},
|
|
79
|
+
|
|
80
|
+
choice: function(node) {
|
|
81
|
+
if (node.alternatives.length === 0) {
|
|
82
|
+
throw new GrammarError( "No choices.", node.location );
|
|
83
|
+
}
|
|
84
|
+
let first = checkFirst(node.alternatives[0]);
|
|
85
|
+
for (let i = 1; i < node.alternatives.length; i++) {
|
|
86
|
+
if (classNode.isAny(first)) {
|
|
87
|
+
break; // first set can't grow more than this
|
|
88
|
+
}
|
|
89
|
+
first = classNode.union(first, checkFirst(node.alternatives[i]));
|
|
90
|
+
}
|
|
91
|
+
return first;
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
sequence: function(node) {
|
|
95
|
+
if (node.elements.length === 0) {
|
|
96
|
+
throw new GrammarError( "No sequence.", node.location );
|
|
97
|
+
}
|
|
98
|
+
let first = checkFirst(node.elements[0]);
|
|
99
|
+
let nullable = checkNullable(node.elements[0]);
|
|
100
|
+
let fixups = [];
|
|
101
|
+
for (let i = 1; nullable && i < node.elements.length; i++) {
|
|
102
|
+
if (classNode.isAny(first)) {
|
|
103
|
+
break; // first set can't grow more than this
|
|
104
|
+
}
|
|
105
|
+
if (
|
|
106
|
+
node.elements[i].type === 'simple_and' &&
|
|
107
|
+
!checkNullable(node.elements[i].expression) &&
|
|
108
|
+
i < node.elements.length - 1
|
|
109
|
+
) {
|
|
110
|
+
// get a bit more clever with simple_and when expression is
|
|
111
|
+
// not nullable.
|
|
112
|
+
let currentFirst = first;
|
|
113
|
+
fixups.push(function(tail) {
|
|
114
|
+
return classNode.union(
|
|
115
|
+
currentFirst,
|
|
116
|
+
classNode.intersection(
|
|
117
|
+
checkFirst(node.elements[i].expression),
|
|
118
|
+
tail
|
|
119
|
+
)
|
|
120
|
+
);
|
|
121
|
+
});
|
|
122
|
+
first = classNode.empty();
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
first = classNode.union(first, checkFirst(node.elements[i]));
|
|
126
|
+
nullable = checkNullable(node.elements[i]);
|
|
127
|
+
}
|
|
128
|
+
while (fixups.length) {
|
|
129
|
+
let f = fixups.pop();
|
|
130
|
+
first = f(first);
|
|
131
|
+
}
|
|
132
|
+
return first;
|
|
133
|
+
},
|
|
134
|
+
|
|
135
|
+
labeled: firstIsChild,
|
|
136
|
+
text: firstIsChild,
|
|
137
|
+
simple_and: classNode.empty,
|
|
138
|
+
simple_not: classNode.empty,
|
|
139
|
+
action: firstIsChild,
|
|
140
|
+
|
|
141
|
+
optional: firstIsChild,
|
|
142
|
+
zero_or_more: firstIsChild,
|
|
143
|
+
one_or_more: firstIsChild,
|
|
144
|
+
|
|
145
|
+
any: classNode.any,
|
|
146
|
+
class: function(node) {
|
|
147
|
+
// preprocess the node into a sorted firstSet
|
|
148
|
+
return classNode.sort(objects.clone(node));
|
|
149
|
+
},
|
|
150
|
+
literal: function(node) {
|
|
151
|
+
if (node.value.length === 0) {
|
|
152
|
+
return classNode.empty();
|
|
153
|
+
}
|
|
154
|
+
return classNode.sort({
|
|
155
|
+
type: "class",
|
|
156
|
+
parts: [node.value[0]],
|
|
157
|
+
inverted: false,
|
|
158
|
+
ignoreCase: node.ignoreCase,
|
|
159
|
+
});
|
|
160
|
+
},
|
|
161
|
+
|
|
162
|
+
semantic_and: classNode.empty,
|
|
163
|
+
semantic_not: classNode.empty,
|
|
164
|
+
parameter_and: classNode.empty,
|
|
165
|
+
parameter_not: classNode.empty,
|
|
166
|
+
labeled_param: classNode.empty,
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
ast.rules.forEach((rule) => checkFirst(rule));
|
|
170
|
+
|
|
171
|
+
// Now we transform every non-nullable rule_ref to:
|
|
172
|
+
// (&FIRST rule)
|
|
173
|
+
// if first is not ANY
|
|
174
|
+
|
|
175
|
+
const addFirstPrefixesAtCall = visitor.build({
|
|
176
|
+
rule_ref: function(node) {
|
|
177
|
+
const rule = asts.findRule(ast, node.name);
|
|
178
|
+
if (rule.nullable || classNode.isAny(rule.firstSet)) {
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const location = node.location;
|
|
182
|
+
const newRuleRef = objects.clone(node);
|
|
183
|
+
const wasPicked = node.picked;
|
|
184
|
+
newRuleRef.picked = true;
|
|
185
|
+
// Remove all properties of node
|
|
186
|
+
Object.keys(node).forEach((name) => delete node[name]);
|
|
187
|
+
Object.assign(node, {
|
|
188
|
+
type: "sequence",
|
|
189
|
+
location: location,
|
|
190
|
+
elements: [
|
|
191
|
+
{
|
|
192
|
+
type: "simple_and",
|
|
193
|
+
location: location,
|
|
194
|
+
expression: Object.assign({
|
|
195
|
+
location: location,
|
|
196
|
+
}, rule.firstSet),
|
|
197
|
+
isFirstSetTest: rule.name,
|
|
198
|
+
},
|
|
199
|
+
newRuleRef,
|
|
200
|
+
],
|
|
201
|
+
picked: wasPicked,
|
|
202
|
+
numPicked: 1,
|
|
203
|
+
});
|
|
204
|
+
},
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
const addFirstPrefixesAtDef = visitor.build({
|
|
208
|
+
rule: function(rule) {
|
|
209
|
+
if (rule.nullable || classNode.isAny(rule.firstSet)) {
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
const location = rule.location;
|
|
213
|
+
const oldExpr = rule.expression;
|
|
214
|
+
const wasPicked = oldExpr.picked;
|
|
215
|
+
oldExpr.picked = true;
|
|
216
|
+
rule.expression = {
|
|
217
|
+
type: "sequence",
|
|
218
|
+
location: location,
|
|
219
|
+
elements: [
|
|
220
|
+
{
|
|
221
|
+
type: "simple_and",
|
|
222
|
+
location: location,
|
|
223
|
+
expression: Object.assign({
|
|
224
|
+
location: location,
|
|
225
|
+
}, rule.firstSet),
|
|
226
|
+
isFirstSetTest: rule.name,
|
|
227
|
+
},
|
|
228
|
+
oldExpr,
|
|
229
|
+
],
|
|
230
|
+
picked: wasPicked,
|
|
231
|
+
numPicked: 1,
|
|
232
|
+
};
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
if (optimizeFirstSet === 'def') {
|
|
237
|
+
addFirstPrefixesAtDef(ast);
|
|
238
|
+
} else if (optimizeFirstSet === 'call') {
|
|
239
|
+
addFirstPrefixesAtCall(ast);
|
|
240
|
+
} else {
|
|
241
|
+
// don't optimize at all, just analyze
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
module.exports = analyzeFirst;
|