meta-parser-generator 1.0.4 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -31
- package/dist/metaParserGenerator.d.ts +22 -0
- package/dist/metaParserGenerator.js +377 -0
- package/dist/metaParserGenerator.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +29 -0
- package/dist/utils.js +185 -0
- package/dist/utils.js.map +1 -0
- package/package.json +12 -3
- package/.editorconfig +0 -8
- package/.eslintrc.js +0 -26
- package/error.png +0 -0
- package/metaParserGenerator.js +0 -312
- package/tests/generateParser.js +0 -9
- package/tests/grammar.js +0 -23
- package/tests/parser.js +0 -445
- package/tests/test.js +0 -31
- package/tests/tokensDefinition.js +0 -50
- package/utils.js +0 -153
package/dist/utils.js
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.printTree = exports.displayError = exports.checkGrammarAndTokens = exports.preprocessGrammar = exports.streamContext = void 0;
|
|
4
|
+
const RED = '\x1B[0;31m';
|
|
5
|
+
const YELLOW = '\x1B[1;33m';
|
|
6
|
+
const NC = '\x1B[0m';
|
|
7
|
+
function replaceInvisibleChars(v) {
|
|
8
|
+
v = v.replace(/\r/g, '⏎\r');
|
|
9
|
+
v = v.replace(/\n/g, '⏎\n');
|
|
10
|
+
v = v.replace(/\t/g, '⇥');
|
|
11
|
+
v = v.replace('\xa0', 'nbsp');
|
|
12
|
+
return v.replace(/[ ]/g, '␣');
|
|
13
|
+
}
|
|
14
|
+
function tokenPosition(token) {
|
|
15
|
+
const lineNumber = token.line_start;
|
|
16
|
+
const charNumber = token.column_start;
|
|
17
|
+
const end = charNumber + token.len;
|
|
18
|
+
return { lineNumber, charNumber, end };
|
|
19
|
+
}
|
|
20
|
+
function streamContext(token, firstToken, stream) {
|
|
21
|
+
const index = token.stream_index;
|
|
22
|
+
const firstTokenIndex = firstToken.stream_index;
|
|
23
|
+
const { lineNumber } = tokenPosition(token);
|
|
24
|
+
let lineNb = 1;
|
|
25
|
+
let streamIndex = 0;
|
|
26
|
+
let str = NC;
|
|
27
|
+
function char(v) {
|
|
28
|
+
if (streamIndex === index) {
|
|
29
|
+
return RED + replaceInvisibleChars(v) + NC;
|
|
30
|
+
}
|
|
31
|
+
if (streamIndex >= firstTokenIndex && streamIndex < index) {
|
|
32
|
+
return YELLOW + replaceInvisibleChars(v) + NC;
|
|
33
|
+
}
|
|
34
|
+
return v;
|
|
35
|
+
}
|
|
36
|
+
while (lineNb < (lineNumber + 4) && stream[streamIndex]) {
|
|
37
|
+
const v = stream[streamIndex].value;
|
|
38
|
+
if (v.match(/\n/)) {
|
|
39
|
+
lineNb++;
|
|
40
|
+
if (lineNb > (lineNumber + 3)) {
|
|
41
|
+
return str;
|
|
42
|
+
}
|
|
43
|
+
if (lineNb >= (lineNumber - 1)) {
|
|
44
|
+
str += `${char(v)}${String(` ${lineNb}`).slice(-5)}: `;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
else if (lineNb >= (lineNumber - 1)) {
|
|
48
|
+
if (streamIndex === 0) {
|
|
49
|
+
str += `\n${String(` ${lineNb}`).slice(-5)}: `;
|
|
50
|
+
}
|
|
51
|
+
str += char(v);
|
|
52
|
+
}
|
|
53
|
+
streamIndex++;
|
|
54
|
+
}
|
|
55
|
+
return str;
|
|
56
|
+
}
|
|
57
|
+
exports.streamContext = streamContext;
|
|
58
|
+
/**
|
|
59
|
+
* Displays a formatted error message for parsing failures
|
|
60
|
+
* @param stream - Token stream
|
|
61
|
+
* @param tokensDefinition - Token definitions
|
|
62
|
+
* @param grammar - Grammar rules
|
|
63
|
+
* @param bestFailure - The parse failure with details
|
|
64
|
+
*/
|
|
65
|
+
function displayError(stream, tokensDefinition, grammar, bestFailure) {
|
|
66
|
+
const sub_rules = grammar[bestFailure.type][bestFailure.sub_rule_index];
|
|
67
|
+
let rule = '';
|
|
68
|
+
const { token } = bestFailure;
|
|
69
|
+
const firstToken = bestFailure.first_token;
|
|
70
|
+
const positions = tokenPosition(token);
|
|
71
|
+
let failingToken = '';
|
|
72
|
+
for (let i = 0; i < sub_rules.length; i++) {
|
|
73
|
+
let sr = sub_rules[i];
|
|
74
|
+
if (tokensDefinition[sr] && tokensDefinition[sr].verbose) {
|
|
75
|
+
sr = tokensDefinition[sr].verbose.replace(/\s/g, '-');
|
|
76
|
+
}
|
|
77
|
+
if (i === bestFailure.sub_rule_token_index) {
|
|
78
|
+
rule += `${RED}${sr}${NC} `;
|
|
79
|
+
failingToken = `${sr}`;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
rule += `${YELLOW}${sr}${NC} `;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// If multiple alternatives failed at same position, show them
|
|
86
|
+
let expectedTokens = '';
|
|
87
|
+
if (bestFailure.best_failure_array && bestFailure.best_failure_array.length > 1) {
|
|
88
|
+
const uniqueExpected = new Set();
|
|
89
|
+
bestFailure.best_failure_array.forEach(f => {
|
|
90
|
+
var _a;
|
|
91
|
+
const expectedRule = grammar[f.type][f.sub_rule_index][f.sub_rule_token_index];
|
|
92
|
+
if (expectedRule) {
|
|
93
|
+
const verbose = ((_a = tokensDefinition[expectedRule]) === null || _a === void 0 ? void 0 : _a.verbose) || expectedRule;
|
|
94
|
+
uniqueExpected.add(verbose);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
if (uniqueExpected.size > 1) {
|
|
98
|
+
expectedTokens = `\n Expected one of: ${YELLOW}${Array.from(uniqueExpected).join(', ')}${NC}`;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
throw new Error(`
|
|
102
|
+
${RED}Parser error at line ${positions.lineNumber + 1} char ${positions.charNumber} to ${positions.end} ${NC}
|
|
103
|
+
Unexpected ${YELLOW}${replaceInvisibleChars(token.value)}${NC}
|
|
104
|
+
Best match was at rule ${bestFailure.type}[${bestFailure.sub_rule_index}][${bestFailure.sub_rule_token_index}] ${rule}
|
|
105
|
+
token "${YELLOW}${replaceInvisibleChars(token.value)}${NC}" (type:${token.type}) doesn't match rule item ${YELLOW}${failingToken}${NC}${expectedTokens}
|
|
106
|
+
Context:
|
|
107
|
+
${streamContext(token, firstToken, stream)}
|
|
108
|
+
`);
|
|
109
|
+
}
|
|
110
|
+
exports.displayError = displayError;
|
|
111
|
+
function isRule(node) {
|
|
112
|
+
return typeof node.type === 'string';
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Prints the AST tree structure to console
|
|
116
|
+
* @param node - Root AST node to print
|
|
117
|
+
* @param sp - Spacing/indentation string
|
|
118
|
+
*/
|
|
119
|
+
function printTree(node, sp) {
|
|
120
|
+
if (isRule(node)) {
|
|
121
|
+
console.log(`${sp}r ${node.type}(${node.sub_rule_index})`);
|
|
122
|
+
if (node.children) {
|
|
123
|
+
for (let i = 0; i < node.children.length; i++) {
|
|
124
|
+
printTree(node.children[i], `${sp} `);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
console.log(`${sp}t ${node.type} ${node.value}`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
exports.printTree = printTree;
|
|
133
|
+
/**
|
|
134
|
+
* Validates that grammar and token definitions don't have overlapping keys
|
|
135
|
+
* @param grammar - Grammar rules
|
|
136
|
+
* @param tokensDefinition - Token definitions
|
|
137
|
+
*/
|
|
138
|
+
function checkGrammarAndTokens(grammar, tokensDefinition) {
|
|
139
|
+
const gkeys = Object.keys(grammar);
|
|
140
|
+
const tkeys = Object.keys(tokensDefinition);
|
|
141
|
+
const intersection = gkeys.filter(n => tkeys.indexOf(n) > -1);
|
|
142
|
+
if (intersection.length > 0) {
|
|
143
|
+
throw new Error(`Grammar and token have keys in common: ${intersection}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
exports.checkGrammarAndTokens = checkGrammarAndTokens;
|
|
147
|
+
/**
|
|
148
|
+
* Preprocesses grammar rules to extract modifiers and aliases
|
|
149
|
+
* @param rules - Raw grammar rules
|
|
150
|
+
* @returns Processed grammar with parsed modifiers and metadata
|
|
151
|
+
*/
|
|
152
|
+
function preprocessGrammar(rules) {
|
|
153
|
+
return Object.keys(rules).reduce((accu, key) => {
|
|
154
|
+
accu[key] = rules[key].map((ruleItems) => ruleItems.map((ruleItem, index) => {
|
|
155
|
+
if (typeof ruleItem === 'function') {
|
|
156
|
+
return { function: true, value: ruleItem, optional: false, repeatable: false, leftRecursion: false };
|
|
157
|
+
}
|
|
158
|
+
const values = ruleItem.split(':');
|
|
159
|
+
let optional = false;
|
|
160
|
+
let repeatable = false;
|
|
161
|
+
let leftRecursion = false;
|
|
162
|
+
if (values[0].endsWith('?')) {
|
|
163
|
+
values[0] = values[0].substring(0, values[0].length - 1);
|
|
164
|
+
optional = true;
|
|
165
|
+
}
|
|
166
|
+
if (values[0].endsWith('*')) {
|
|
167
|
+
values[0] = values[0].substring(0, values[0].length - 1);
|
|
168
|
+
repeatable = true;
|
|
169
|
+
}
|
|
170
|
+
if (index === 0 && values[0] === key) {
|
|
171
|
+
leftRecursion = true;
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
value: values[0],
|
|
175
|
+
alias: values[1],
|
|
176
|
+
optional,
|
|
177
|
+
repeatable,
|
|
178
|
+
leftRecursion,
|
|
179
|
+
};
|
|
180
|
+
}));
|
|
181
|
+
return accu;
|
|
182
|
+
}, {});
|
|
183
|
+
}
|
|
184
|
+
exports.preprocessGrammar = preprocessGrammar;
|
|
185
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../utils.ts"],"names":[],"mappings":";;;AAEA,MAAM,GAAG,GAAG,YAAY,CAAC;AACzB,MAAM,MAAM,GAAG,YAAY,CAAC;AAC5B,MAAM,EAAE,GAAG,SAAS,CAAC;AAErB,SAAS,qBAAqB,CAAC,CAAS;IACtC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAC5B,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAC5B,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC1B,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,aAAa,CAAC,KAAY;IACjC,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;IACpC,MAAM,UAAU,GAAG,KAAK,CAAC,YAAY,CAAC;IACtC,MAAM,GAAG,GAAG,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC;IACnC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,aAAa,CAAC,KAAY,EAAE,UAAiB,EAAE,MAAe;IACrE,MAAM,KAAK,GAAG,KAAK,CAAC,YAAY,CAAC;IACjC,MAAM,eAAe,GAAG,UAAU,CAAC,YAAY,CAAC;IAChD,MAAM,EAAE,UAAU,EAAE,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5C,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,GAAG,GAAG,EAAE,CAAC;IAEb,SAAS,IAAI,CAAC,CAAS;QACrB,IAAI,WAAW,KAAK,KAAK,EAAE;YACzB,OAAO,GAAG,GAAG,qBAAqB,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;SAC5C;QACD,IAAI,WAAW,IAAI,eAAe,IAAI,WAAW,GAAG,KAAK,EAAE;YACzD,OAAO,MAAM,GAAG,qBAAqB,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;SAC/C;QACD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,MAAM,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,IAAI,MAAM,CAAC,WAAW,CAAC,EAAE;QACvD,MAAM,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC;QACpC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE;YACjB,MAAM,EAAE,CAAC;YACT,IAAI,MAAM,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE;gBAC7B,OAAO,GAAG,CAAC;aACZ;YACD,IAAI,MAAM,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE;gBAC9B,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;aAC5D;SACF;aAAM,IAAI,MAAM,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE;YACrC,IAAI,WAAW,KAAK,CAAC,EAAE;gBACrB,GAAG,IAAI,KAAK,MAAM,CAAC,QAAQ,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;aACpD;YACD,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;SAChB;QACD,WAAW,EAAE,CAAC;KACf;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAoIC,sCAAa;AAlIf;;;;;;GAMG;AACH,SAAS,YAAY,CAAC,MAAe,EAAE,gBAAkC,EAAE,OAAgB,EAAE,WAAyB;IACpH,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;IACxE,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,MAAM,EAAE,KAAK,EAAE,GAAG,WAAW,CAAC;IAC9B,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW,CAAC;IAC3C,MAAM,SAAS,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IACvC,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACzC,IAAI,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,gBAAgB,CAAC,EAAE,CAAC,IAAI,gBAAgB,CAAC,EAAE,CAAC,CAAC,OAAO,EAAE;YACxD,EAAE,GAAG,gBAAgB,CAAC,EAAE,CAAC,CAAC,OAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;SACxD;QACD,IAAI,CAAC,KAAK,WAAW,CAAC,oBAAoB,EAAE;YAC1C,IAAI,IAAI,GAAG,GAAG,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;YAC5B,YAAY,GAAG,GAAG,EAAE,EAAE,CAAC;SACxB;aAAM;YACL,IAAI,IAAI,GAAG,MAAM,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;SAChC;KACF;IAED,8DAA8D;IAC9D,IAAI,cAAc,GAAG,EAAE,CAAC;IACxB,IAAI,WAAW,CAAC,kBAAkB,IAAI,WAAW,CAAC,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE;QAC/E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;QACzC,WAAW,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;;YACzC,MAAM,YAAY,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;YAC/E,IAAI,YAAY,EAAE;gBAChB,MAAM,OAAO,GAAG,CAAA,MAAA,gBAAgB,CAAC,YAAY,CAAC,0CAAE,OAAO,KAAI,YAAY,CAAC;gBACxE,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;aAC7B;QACH,CAAC,CAAC,CAAC;QACH,IAAI,cAAc,CAAC,IAAI,GAAG,CAAC,EAAE;YAC3B,cAAc,GAAG,wBAAwB,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;SAChG;KACF;IAED,MAAM,IAAI,KAAK,CAAC;IACd,GAAG,wBAAwB,SAAS,CAAC,UAAU,GAAG,CAAC,SAAS,SAAS,CAAC,UAAU,OAAO,SAAS,CAAC,GAAG,IAAI,EAAE;eAC/F,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE;2BACpC,WAAW,CAAC,IAAI,IAAI,WAAW,CAAC,cAAc,KAAK,WAAW,CAAC,oBAAoB,KAAK,IAAI;WAC5G,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC,IAAI,6BAA6B,MAAM,GAAG,YAAY,GAAG,EAAE,GAAG,cAAc;;EAEtJ,aAAa,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC;CACzC,CAAC,CAAC;AACH,CAAC;AAkFC,oCAAY;AAhFd,SAAS,MAAM,CAAC,IAAS;IACvB,OAAO,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAA;AACtC,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAa,EAAE,EAAU;IAC1C,IAAG,MAAM,CAAC,IAAI,CAAC,EAAE;QACf,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;QAC3D,IAAI,IAAI,CAAC,QAAQ,EAAE;YACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC7C,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;aACxC;SACF;KACF;SAAM;QACL,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;KAClD;AACH,CAAC;AA6DC,8BAAS;AA3DX;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,OAAgB,EAAE,gBAAkC;IACjF,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACnC,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC5C,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9D,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE;QAC3B,MAAM,IAAI,KAAK,CAAC,0CAA0C,YAAY,EAAE,CAAC,CAAC;KAC3E;AACH,CAAC;AA6CC,sDAAqB;AA3CvB;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAc;IACvC,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,IAAsB,EAAE,GAAG,EAAE,EAAE;QAC/D,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CACxB,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE;YAC/C,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE;gBAClC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,QAAe,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC;aAC7G;YACD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACnC,IAAI,QAAQ,GAAG,KAAK,CAAC;YACrB,IAAI,UAAU,GAAG,KAAK,CAAC;YACvB,IAAI,aAAa,GAAG,KAAK,CAAC;YAC1B,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBAC3B,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,IAAI,CAAC;aACjB;YACD,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBAC3B,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzD,UAAU,GAAG,IAAI,CAAC;aACnB;YACD,IAAI,KAAK,KAAK,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;gBACpC,aAAa,GAAG,IAAI,CAAC;aACtB;YACD,OAAO;gBACL,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBAChB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBAChB,QAAQ;gBACR,UAAU;gBACV,aAAa;aACd,CAAC;QACJ,CAAC,CAAC,CACH,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC,EAAE,EAAE,CAAC,CAAC;AACT,CAAC;AAIC,8CAAiB"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "meta-parser-generator",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "A
|
|
5
|
-
"main": "metaParserGenerator.js",
|
|
3
|
+
"version": "1.1.1",
|
|
4
|
+
"description": "A PEG parser generator with packrat parsing written in JavaScript for JavaScript",
|
|
5
|
+
"main": "dist/metaParserGenerator.js",
|
|
6
|
+
"types": "dist/metaParserGenerator.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist/",
|
|
9
|
+
"README.md",
|
|
10
|
+
"LICENSE"
|
|
11
|
+
],
|
|
6
12
|
"scripts": {
|
|
13
|
+
"build": "tsc",
|
|
14
|
+
"prepublish": "npm run build",
|
|
7
15
|
"test": "jest --no-cache",
|
|
8
16
|
"gentest": "node ./tests/generateParser.js && jest --no-cache"
|
|
9
17
|
},
|
|
@@ -24,6 +32,7 @@
|
|
|
24
32
|
},
|
|
25
33
|
"homepage": "https://github.com/batiste/meta-parser-generator#readme",
|
|
26
34
|
"devDependencies": {
|
|
35
|
+
"@types/node": "^18.13.0",
|
|
27
36
|
"jest": "^28.1.3"
|
|
28
37
|
}
|
|
29
38
|
}
|
package/.editorconfig
DELETED
package/.eslintrc.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
module.exports = {
|
|
2
|
-
"extends": "airbnb-base",
|
|
3
|
-
"globals": {
|
|
4
|
-
"window": true,
|
|
5
|
-
"test": true,
|
|
6
|
-
"expect": true,
|
|
7
|
-
},
|
|
8
|
-
"rules": {
|
|
9
|
-
"no-plusplus": "off",
|
|
10
|
-
"no-unused-expressions": "off",
|
|
11
|
-
"quote-props": "off",
|
|
12
|
-
"camelcase": "off",
|
|
13
|
-
"no-underscore-dangle": "off",
|
|
14
|
-
"no-param-reassign": "off",
|
|
15
|
-
"no-shadow": "off",
|
|
16
|
-
// for the generated parser
|
|
17
|
-
"consistent-return": "off",
|
|
18
|
-
"object-property-newline": "off",
|
|
19
|
-
"dot-notation": "off",
|
|
20
|
-
"no-use-before-define": "off",
|
|
21
|
-
"no-console": ["error", { allow: ["log", "error", "warn"] }]
|
|
22
|
-
},
|
|
23
|
-
"plugins": [
|
|
24
|
-
"ie11"
|
|
25
|
-
]
|
|
26
|
-
};
|
package/error.png
DELETED
|
Binary file
|
package/metaParserGenerator.js
DELETED
|
@@ -1,312 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
const fs = require('fs');
|
|
3
|
-
// const path = require('path');
|
|
4
|
-
const { preprocessGrammar, checkGrammarAndTokens } = require('./utils');
|
|
5
|
-
|
|
6
|
-
const recordFailure = `
|
|
7
|
-
let best_failure;
|
|
8
|
-
let best_failure_array = [];
|
|
9
|
-
let best_failure_index = 0;
|
|
10
|
-
|
|
11
|
-
function record_failure(failure, i) {
|
|
12
|
-
if (i > best_failure_index) {
|
|
13
|
-
best_failure_array = [];
|
|
14
|
-
}
|
|
15
|
-
if (best_failure_array.length === 0) {
|
|
16
|
-
best_failure = failure;
|
|
17
|
-
}
|
|
18
|
-
best_failure_array.push(failure);
|
|
19
|
-
best_failure_index = i;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
let cache = {};
|
|
23
|
-
|
|
24
|
-
function memoize(name, func) {
|
|
25
|
-
return function memoize_inner(stream, index) {
|
|
26
|
-
const key = \`\${name}-\${index}\`;
|
|
27
|
-
let value = cache[key];
|
|
28
|
-
if (value !== undefined) {
|
|
29
|
-
return value;
|
|
30
|
-
}
|
|
31
|
-
value = func(stream, index);
|
|
32
|
-
cache[key] = value;
|
|
33
|
-
return value;
|
|
34
|
-
};
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
let cacheR = {};
|
|
38
|
-
|
|
39
|
-
// based on https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
|
|
40
|
-
function memoize_left_recur(name, func) {
|
|
41
|
-
return function memoize_inner(stream, index) {
|
|
42
|
-
const key = \`\${name}-\${index}\`;
|
|
43
|
-
let value = cacheR[key];
|
|
44
|
-
if (value !== undefined) {
|
|
45
|
-
return value;
|
|
46
|
-
}
|
|
47
|
-
// prime this rule with a failure
|
|
48
|
-
cacheR[key] = false;
|
|
49
|
-
let lastpos;
|
|
50
|
-
let lastvalue = value;
|
|
51
|
-
while (true) {
|
|
52
|
-
value = func(stream, index);
|
|
53
|
-
if (!value) break;
|
|
54
|
-
if (value.last_index <= lastpos) break;
|
|
55
|
-
lastpos = value.last_index;
|
|
56
|
-
lastvalue = value;
|
|
57
|
-
cacheR[key] = value;
|
|
58
|
-
}
|
|
59
|
-
return lastvalue;
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
`;
|
|
64
|
-
|
|
65
|
-
function generateTokenizer(tokenDef) {
|
|
66
|
-
const output = [];
|
|
67
|
-
const keys = Object.keys(tokenDef);
|
|
68
|
-
for (let i = 0; i < keys.length; i++) {
|
|
69
|
-
const key = keys[i];
|
|
70
|
-
if ((/:|\?/g).test(key)) {
|
|
71
|
-
throw new Error('Reserved word in token name');
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
output.push('function _tokenize(tokenDef, input, stream) {');
|
|
76
|
-
output.push(' let match;');
|
|
77
|
-
let key;
|
|
78
|
-
for (let i = 0; i < keys.length; i++) {
|
|
79
|
-
key = keys[i];
|
|
80
|
-
const token = tokenDef[key];
|
|
81
|
-
if (token.str) {
|
|
82
|
-
if (token.str.indexOf("'") > -1 || token.str.indexOf('\n') > -1) {
|
|
83
|
-
output.push(` if (input.startsWith(\`${token.str}\`)) {`);
|
|
84
|
-
output.push(` return [\`${token.str}\`, '${key}'];`);
|
|
85
|
-
} else {
|
|
86
|
-
output.push(` if (input.startsWith('${token.str}')) {`);
|
|
87
|
-
output.push(` return ['${token.str}', '${key}'];`);
|
|
88
|
-
}
|
|
89
|
-
output.push(' }');
|
|
90
|
-
} else if (token.reg) {
|
|
91
|
-
output.push(` match = input.match(tokenDef.${key}.reg);`);
|
|
92
|
-
output.push(' if (match !== null) {');
|
|
93
|
-
output.push(` return [match[0], '${key}'];`);
|
|
94
|
-
output.push(' }');
|
|
95
|
-
} else if (token.func) {
|
|
96
|
-
output.push(` match = tokenDef.${key}.func(input, stream);`);
|
|
97
|
-
output.push(' if (match !== undefined) {');
|
|
98
|
-
output.push(` return [match, '${key}'];`);
|
|
99
|
-
output.push(' }');
|
|
100
|
-
} else {
|
|
101
|
-
throw new Error(`Tokenizer error: Invalid token ${key} without a reg, str or func property`);
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
output.push(` return [null, '${key}'];`);
|
|
105
|
-
output.push('}');
|
|
106
|
-
|
|
107
|
-
output.push('function tokenize(tokenDef, input) {');
|
|
108
|
-
output.push(` const stream = [];
|
|
109
|
-
let lastToken;
|
|
110
|
-
let key;
|
|
111
|
-
let candidate = null;
|
|
112
|
-
const len = input.length;
|
|
113
|
-
let char = 0;
|
|
114
|
-
let index = 0;
|
|
115
|
-
let line = 0;
|
|
116
|
-
let column = 0;
|
|
117
|
-
while (char < len) {
|
|
118
|
-
[candidate, key] = _tokenize(tokenDef, input, stream);
|
|
119
|
-
if (candidate !== null) {
|
|
120
|
-
lastToken = {
|
|
121
|
-
type: key,
|
|
122
|
-
value: candidate,
|
|
123
|
-
start: char,
|
|
124
|
-
stream_index: index,
|
|
125
|
-
len: candidate.length,
|
|
126
|
-
lineStart: line,
|
|
127
|
-
columnStart: column,
|
|
128
|
-
};
|
|
129
|
-
const lines = candidate.split('\\n');
|
|
130
|
-
if (lines.length > 1) {
|
|
131
|
-
line += lines.length - 1;
|
|
132
|
-
column = lines[lines.length - 1].length;
|
|
133
|
-
} else {
|
|
134
|
-
column += candidate.length;
|
|
135
|
-
}
|
|
136
|
-
lastToken.lineEnd = line;
|
|
137
|
-
lastToken.columnEnd = column;
|
|
138
|
-
stream.push(lastToken);
|
|
139
|
-
index++;
|
|
140
|
-
char += candidate.length;
|
|
141
|
-
input = input.substr(candidate.length);
|
|
142
|
-
} else {
|
|
143
|
-
if (stream.length === 0) {
|
|
144
|
-
throw new Error('Tokenizer error: total match failure');
|
|
145
|
-
}
|
|
146
|
-
if (lastToken) {
|
|
147
|
-
lastToken.pointer += lastToken.value.length;
|
|
148
|
-
}
|
|
149
|
-
let msg = \`Tokenizer error, no matching token found for \${input.slice(0, 26)}\`;
|
|
150
|
-
if (lastToken) {
|
|
151
|
-
msg += \`Before token of type \${lastToken.type}: \${lastToken.value}\`;
|
|
152
|
-
}
|
|
153
|
-
const error = new Error(msg);
|
|
154
|
-
error.token = lastToken;
|
|
155
|
-
throw error;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
stream.push({
|
|
159
|
-
type: 'EOS', value: '<End Of Stream>', char, index,
|
|
160
|
-
});
|
|
161
|
-
return stream;
|
|
162
|
-
}
|
|
163
|
-
`);
|
|
164
|
-
return output;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
function generateSubRule(name, index, subRule, tokensDef, debug) {
|
|
168
|
-
const output = [];
|
|
169
|
-
output.push(`let ${name}_${index} = (stream, index) => {`);
|
|
170
|
-
let i = 0;
|
|
171
|
-
output.push(' let i = index;');
|
|
172
|
-
output.push(' const children = [];');
|
|
173
|
-
output.push(' const named = {};');
|
|
174
|
-
output.push(` const node = {
|
|
175
|
-
children, stream_index: index, name: '${name}',
|
|
176
|
-
subRule: ${index}, type: '${name}', named,
|
|
177
|
-
};`);
|
|
178
|
-
subRule.forEach((rule) => {
|
|
179
|
-
// terminal rule
|
|
180
|
-
if (tokensDef[rule.value] || rule.value === 'EOS') {
|
|
181
|
-
debug ? output.push(' console.log(i, stream[i])') : null;
|
|
182
|
-
if (rule.repeatable) {
|
|
183
|
-
output.push(` while(stream[i].type === '${rule.value}') {`);
|
|
184
|
-
if (rule.alias) {
|
|
185
|
-
output.push(` named['${rule.alias}'] ? null : named['${rule.alias}'] = []`);
|
|
186
|
-
output.push(` named['${rule.alias}'].push(stream[i])`);
|
|
187
|
-
}
|
|
188
|
-
output.push(' children.push(stream[i]); i++;');
|
|
189
|
-
output.push(' }');
|
|
190
|
-
} else if (rule.optional) {
|
|
191
|
-
output.push(` if (stream[i].type === '${rule.value}') {`);
|
|
192
|
-
rule.alias ? output.push(` named['${rule.alias}'] = stream[i];`) : null;
|
|
193
|
-
output.push(' children.push(stream[i]); i++;');
|
|
194
|
-
output.push(' }');
|
|
195
|
-
} else {
|
|
196
|
-
output.push(`
|
|
197
|
-
if (stream[i].type !== '${rule.value}') {
|
|
198
|
-
if (i >= best_failure_index) {
|
|
199
|
-
const failure = {
|
|
200
|
-
rule_name: '${name}', sub_rule_index: ${index},
|
|
201
|
-
sub_rule_stream_index: i - index, sub_rule_token_index: ${i},
|
|
202
|
-
stream_index: i, token: stream[i], first_token: stream[index], success: false,
|
|
203
|
-
};
|
|
204
|
-
record_failure(failure, i);
|
|
205
|
-
}
|
|
206
|
-
return false;
|
|
207
|
-
}
|
|
208
|
-
`);
|
|
209
|
-
rule.alias ? output.push(` named['${rule.alias}'] = stream[i];`) : null;
|
|
210
|
-
output.push(' children.push(stream[i]); i++;');
|
|
211
|
-
}
|
|
212
|
-
i++;
|
|
213
|
-
// calling another rule in the grammar
|
|
214
|
-
} else {
|
|
215
|
-
if (rule.function) {
|
|
216
|
-
output.push(` if (!(${rule.value})(node)) { return false; }`);
|
|
217
|
-
} else if (rule.repeatable) {
|
|
218
|
-
output.push(` let _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
219
|
-
output.push(` while (_rule_${i}) {`);
|
|
220
|
-
if (rule.alias) {
|
|
221
|
-
output.push(` named['${rule.alias}'] ? null : named['${rule.alias}'] = [];`);
|
|
222
|
-
output.push(` named['${rule.alias}'].push(_rule_${i});`);
|
|
223
|
-
}
|
|
224
|
-
output.push(` children.push(_rule_${i});`);
|
|
225
|
-
output.push(` i = _rule_${i}.last_index;`);
|
|
226
|
-
output.push(` _rule_${i} = ${rule.value}(stream, i);`);
|
|
227
|
-
output.push(' }');
|
|
228
|
-
} else if (!rule.optional) {
|
|
229
|
-
output.push(` const _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
230
|
-
output.push(` if (!_rule_${i}) return false;`);
|
|
231
|
-
rule.alias ? output.push(` named['${rule.alias}'] = _rule_${i};`) : null;
|
|
232
|
-
output.push(` children.push(_rule_${i});`);
|
|
233
|
-
output.push(` i = _rule_${i}.last_index;`);
|
|
234
|
-
} else {
|
|
235
|
-
output.push(` const _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
236
|
-
output.push(` if (_rule_${i}) {`);
|
|
237
|
-
output.push(` children.push(_rule_${i});`);
|
|
238
|
-
rule.alias ? output.push(` named['${rule.alias}'] = _rule_${i};`) : null;
|
|
239
|
-
output.push(` i = _rule_${i}.last_index;`);
|
|
240
|
-
output.push(' }');
|
|
241
|
-
}
|
|
242
|
-
i++;
|
|
243
|
-
}
|
|
244
|
-
});
|
|
245
|
-
output.push(' node.success = i === stream.length; node.last_index = i;');
|
|
246
|
-
output.push(' return node;');
|
|
247
|
-
output.push('};');
|
|
248
|
-
if (subRule[0].leftRecursion) {
|
|
249
|
-
output.push(`${name}_${index} = memoize_left_recur('${name}_${index}', ${name}_${index});`);
|
|
250
|
-
} else {
|
|
251
|
-
output.push(`${name}_${index} = memoize('${name}_${index}', ${name}_${index});`);
|
|
252
|
-
}
|
|
253
|
-
output.push('\n');
|
|
254
|
-
return output;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
function generate(grammar, tokensDef, debug) {
|
|
258
|
-
let output = [];
|
|
259
|
-
checkGrammarAndTokens(grammar, tokensDef);
|
|
260
|
-
const newGrammar = preprocessGrammar(grammar);
|
|
261
|
-
const entries = Object.keys(newGrammar);
|
|
262
|
-
output.push('// This code is automatically generated by the meta parser, do not modify');
|
|
263
|
-
output.push('// produced with metaParserGenerator.js');
|
|
264
|
-
output.push(recordFailure);
|
|
265
|
-
entries.forEach((key) => {
|
|
266
|
-
let i = 0;
|
|
267
|
-
const metaSub = [];
|
|
268
|
-
newGrammar[key].forEach((subRule) => {
|
|
269
|
-
output = output.concat(generateSubRule(key, i, subRule, tokensDef, debug));
|
|
270
|
-
metaSub.push(`${key}_${i}`);
|
|
271
|
-
i++;
|
|
272
|
-
});
|
|
273
|
-
output.push(`function ${key}(stream, index) {`);
|
|
274
|
-
const st = metaSub.map(sub => `${sub}(stream, index)`).join('\n || ');
|
|
275
|
-
output.push(` return ${st};`);
|
|
276
|
-
output.push('}');
|
|
277
|
-
});
|
|
278
|
-
output = output.concat(generateTokenizer(tokensDef));
|
|
279
|
-
output.push(`module.exports = {
|
|
280
|
-
parse: (stream) => {
|
|
281
|
-
best_failure = null;
|
|
282
|
-
best_failure_index = 0;
|
|
283
|
-
best_failure_array = [];
|
|
284
|
-
cache = {};
|
|
285
|
-
cacheR = {};
|
|
286
|
-
const result = START(stream, 0);
|
|
287
|
-
if (!result) {
|
|
288
|
-
return best_failure;
|
|
289
|
-
}
|
|
290
|
-
return result;
|
|
291
|
-
},
|
|
292
|
-
tokenize,
|
|
293
|
-
};
|
|
294
|
-
`);
|
|
295
|
-
return output;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
function generateParser(grammar, tokensDefinition, filename) {
|
|
299
|
-
fs.writeFileSync(filename,
|
|
300
|
-
generate(grammar, tokensDefinition, false).join('\n'), (err) => {
|
|
301
|
-
if (err) {
|
|
302
|
-
// eslint-disable-next-line no-console
|
|
303
|
-
console.log(err);
|
|
304
|
-
}
|
|
305
|
-
});
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
module.exports = {
|
|
309
|
-
generateParser,
|
|
310
|
-
generate,
|
|
311
|
-
generateTokenizer,
|
|
312
|
-
};
|
package/tests/generateParser.js
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
const path = require('path');
|
|
3
|
-
const { grammar } = require('./grammar');
|
|
4
|
-
const { tokensDefinition } = require('./tokensDefinition');
|
|
5
|
-
const { generateParser } = require('../metaParserGenerator');
|
|
6
|
-
|
|
7
|
-
generateParser(grammar, tokensDefinition, path.resolve(__dirname, './parser.js'));
|
|
8
|
-
|
|
9
|
-
console.log('parser generated');
|
package/tests/grammar.js
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
const grammar = {
|
|
3
|
-
'START': [
|
|
4
|
-
// necessary to accept the firt line to not be a newline
|
|
5
|
-
['GLOBAL_STATEMENT', 'GLOBAL_STATEMENTS*', 'EOS'],
|
|
6
|
-
['GLOBAL_STATEMENTS*', 'EOS'],
|
|
7
|
-
],
|
|
8
|
-
'GLOBAL_STATEMENTS': [
|
|
9
|
-
['newline', 'GLOBAL_STATEMENT'],
|
|
10
|
-
['newline'],
|
|
11
|
-
],
|
|
12
|
-
'GLOBAL_STATEMENT': [
|
|
13
|
-
['math_operation'],
|
|
14
|
-
],
|
|
15
|
-
'math_operation': [
|
|
16
|
-
['math_operation', 'math_operator', 'number'],
|
|
17
|
-
['number'],
|
|
18
|
-
],
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
module.exports = {
|
|
22
|
-
grammar,
|
|
23
|
-
};
|