meta-parser-generator 1.0.4 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tests/parser.js DELETED
@@ -1,445 +0,0 @@
1
- // This code is automatically generated by the meta parser, do not modify
2
- // produced with metaParserGenerator.js
3
-
4
- let best_failure;
5
- let best_failure_array = [];
6
- let best_failure_index = 0;
7
-
8
- function record_failure(failure, i) {
9
- if (i > best_failure_index) {
10
- best_failure_array = [];
11
- }
12
- if (best_failure_array.length === 0) {
13
- best_failure = failure;
14
- }
15
- best_failure_array.push(failure);
16
- best_failure_index = i;
17
- }
18
-
19
- let cache = {};
20
-
21
- function memoize(name, func) {
22
- return function memoize_inner(stream, index) {
23
- const key = `${name}-${index}`;
24
- let value = cache[key];
25
- if (value !== undefined) {
26
- return value;
27
- }
28
- value = func(stream, index);
29
- cache[key] = value;
30
- return value;
31
- };
32
- }
33
-
34
- let cacheR = {};
35
-
36
- // based on https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
37
- function memoize_left_recur(name, func) {
38
- return function memoize_inner(stream, index) {
39
- const key = `${name}-${index}`;
40
- let value = cacheR[key];
41
- if (value !== undefined) {
42
- return value;
43
- }
44
- // prime this rule with a failure
45
- cacheR[key] = false;
46
- let lastpos;
47
- let lastvalue = value;
48
- while (true) {
49
- value = func(stream, index);
50
- if (!value) break;
51
- if (value.last_index <= lastpos) break;
52
- lastpos = value.last_index;
53
- lastvalue = value;
54
- cacheR[key] = value;
55
- }
56
- return lastvalue;
57
- };
58
- }
59
-
60
-
61
- let START_0 = (stream, index) => {
62
- let i = index;
63
- const children = [];
64
- const named = {};
65
- const node = {
66
- children, stream_index: index, name: 'START',
67
- subRule: 0, type: 'START', named,
68
- };
69
- const _rule_0 = GLOBAL_STATEMENT(stream, i);
70
- if (!_rule_0) return false;
71
- children.push(_rule_0);
72
- i = _rule_0.last_index;
73
- let _rule_1 = GLOBAL_STATEMENTS(stream, i);
74
- while (_rule_1) {
75
- children.push(_rule_1);
76
- i = _rule_1.last_index;
77
- _rule_1 = GLOBAL_STATEMENTS(stream, i);
78
- }
79
-
80
- if (stream[i].type !== 'EOS') {
81
- if (i >= best_failure_index) {
82
- const failure = {
83
- rule_name: 'START', sub_rule_index: 0,
84
- sub_rule_stream_index: i - index, sub_rule_token_index: 2,
85
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
86
- };
87
- record_failure(failure, i);
88
- }
89
- return false;
90
- }
91
-
92
- children.push(stream[i]); i++;
93
- node.success = i === stream.length; node.last_index = i;
94
- return node;
95
- };
96
- START_0 = memoize('START_0', START_0);
97
-
98
-
99
- let START_1 = (stream, index) => {
100
- let i = index;
101
- const children = [];
102
- const named = {};
103
- const node = {
104
- children, stream_index: index, name: 'START',
105
- subRule: 1, type: 'START', named,
106
- };
107
- let _rule_0 = GLOBAL_STATEMENTS(stream, i);
108
- while (_rule_0) {
109
- children.push(_rule_0);
110
- i = _rule_0.last_index;
111
- _rule_0 = GLOBAL_STATEMENTS(stream, i);
112
- }
113
-
114
- if (stream[i].type !== 'EOS') {
115
- if (i >= best_failure_index) {
116
- const failure = {
117
- rule_name: 'START', sub_rule_index: 1,
118
- sub_rule_stream_index: i - index, sub_rule_token_index: 1,
119
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
120
- };
121
- record_failure(failure, i);
122
- }
123
- return false;
124
- }
125
-
126
- children.push(stream[i]); i++;
127
- node.success = i === stream.length; node.last_index = i;
128
- return node;
129
- };
130
- START_1 = memoize('START_1', START_1);
131
-
132
-
133
- function START(stream, index) {
134
- return START_0(stream, index)
135
- || START_1(stream, index);
136
- }
137
- let GLOBAL_STATEMENTS_0 = (stream, index) => {
138
- let i = index;
139
- const children = [];
140
- const named = {};
141
- const node = {
142
- children, stream_index: index, name: 'GLOBAL_STATEMENTS',
143
- subRule: 0, type: 'GLOBAL_STATEMENTS', named,
144
- };
145
-
146
- if (stream[i].type !== 'newline') {
147
- if (i >= best_failure_index) {
148
- const failure = {
149
- rule_name: 'GLOBAL_STATEMENTS', sub_rule_index: 0,
150
- sub_rule_stream_index: i - index, sub_rule_token_index: 0,
151
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
152
- };
153
- record_failure(failure, i);
154
- }
155
- return false;
156
- }
157
-
158
- children.push(stream[i]); i++;
159
- const _rule_1 = GLOBAL_STATEMENT(stream, i);
160
- if (!_rule_1) return false;
161
- children.push(_rule_1);
162
- i = _rule_1.last_index;
163
- node.success = i === stream.length; node.last_index = i;
164
- return node;
165
- };
166
- GLOBAL_STATEMENTS_0 = memoize('GLOBAL_STATEMENTS_0', GLOBAL_STATEMENTS_0);
167
-
168
-
169
- let GLOBAL_STATEMENTS_1 = (stream, index) => {
170
- let i = index;
171
- const children = [];
172
- const named = {};
173
- const node = {
174
- children, stream_index: index, name: 'GLOBAL_STATEMENTS',
175
- subRule: 1, type: 'GLOBAL_STATEMENTS', named,
176
- };
177
-
178
- if (stream[i].type !== 'newline') {
179
- if (i >= best_failure_index) {
180
- const failure = {
181
- rule_name: 'GLOBAL_STATEMENTS', sub_rule_index: 1,
182
- sub_rule_stream_index: i - index, sub_rule_token_index: 0,
183
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
184
- };
185
- record_failure(failure, i);
186
- }
187
- return false;
188
- }
189
-
190
- children.push(stream[i]); i++;
191
- node.success = i === stream.length; node.last_index = i;
192
- return node;
193
- };
194
- GLOBAL_STATEMENTS_1 = memoize('GLOBAL_STATEMENTS_1', GLOBAL_STATEMENTS_1);
195
-
196
-
197
- function GLOBAL_STATEMENTS(stream, index) {
198
- return GLOBAL_STATEMENTS_0(stream, index)
199
- || GLOBAL_STATEMENTS_1(stream, index);
200
- }
201
- let GLOBAL_STATEMENT_0 = (stream, index) => {
202
- let i = index;
203
- const children = [];
204
- const named = {};
205
- const node = {
206
- children, stream_index: index, name: 'GLOBAL_STATEMENT',
207
- subRule: 0, type: 'GLOBAL_STATEMENT', named,
208
- };
209
- const _rule_0 = math_operation(stream, i);
210
- if (!_rule_0) return false;
211
- children.push(_rule_0);
212
- i = _rule_0.last_index;
213
- node.success = i === stream.length; node.last_index = i;
214
- return node;
215
- };
216
- GLOBAL_STATEMENT_0 = memoize('GLOBAL_STATEMENT_0', GLOBAL_STATEMENT_0);
217
-
218
-
219
- function GLOBAL_STATEMENT(stream, index) {
220
- return GLOBAL_STATEMENT_0(stream, index);
221
- }
222
- let math_operation_0 = (stream, index) => {
223
- let i = index;
224
- const children = [];
225
- const named = {};
226
- const node = {
227
- children, stream_index: index, name: 'math_operation',
228
- subRule: 0, type: 'math_operation', named,
229
- };
230
- const _rule_0 = math_operation(stream, i);
231
- if (!_rule_0) return false;
232
- children.push(_rule_0);
233
- i = _rule_0.last_index;
234
-
235
- if (stream[i].type !== 'math_operator') {
236
- if (i >= best_failure_index) {
237
- const failure = {
238
- rule_name: 'math_operation', sub_rule_index: 0,
239
- sub_rule_stream_index: i - index, sub_rule_token_index: 1,
240
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
241
- };
242
- record_failure(failure, i);
243
- }
244
- return false;
245
- }
246
-
247
- children.push(stream[i]); i++;
248
-
249
- if (stream[i].type !== 'number') {
250
- if (i >= best_failure_index) {
251
- const failure = {
252
- rule_name: 'math_operation', sub_rule_index: 0,
253
- sub_rule_stream_index: i - index, sub_rule_token_index: 2,
254
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
255
- };
256
- record_failure(failure, i);
257
- }
258
- return false;
259
- }
260
-
261
- children.push(stream[i]); i++;
262
- node.success = i === stream.length; node.last_index = i;
263
- return node;
264
- };
265
- math_operation_0 = memoize_left_recur('math_operation_0', math_operation_0);
266
-
267
-
268
- let math_operation_1 = (stream, index) => {
269
- let i = index;
270
- const children = [];
271
- const named = {};
272
- const node = {
273
- children, stream_index: index, name: 'math_operation',
274
- subRule: 1, type: 'math_operation', named,
275
- };
276
-
277
- if (stream[i].type !== 'number') {
278
- if (i >= best_failure_index) {
279
- const failure = {
280
- rule_name: 'math_operation', sub_rule_index: 1,
281
- sub_rule_stream_index: i - index, sub_rule_token_index: 0,
282
- stream_index: i, token: stream[i], first_token: stream[index], success: false,
283
- };
284
- record_failure(failure, i);
285
- }
286
- return false;
287
- }
288
-
289
- children.push(stream[i]); i++;
290
- node.success = i === stream.length; node.last_index = i;
291
- return node;
292
- };
293
- math_operation_1 = memoize('math_operation_1', math_operation_1);
294
-
295
-
296
- function math_operation(stream, index) {
297
- return math_operation_0(stream, index)
298
- || math_operation_1(stream, index);
299
- }
300
- function _tokenize(tokenDef, input, stream) {
301
- let match;
302
- match = input.match(tokenDef.number.reg);
303
- if (match !== null) {
304
- return [match[0], 'number'];
305
- }
306
- match = input.match(tokenDef.comment.reg);
307
- if (match !== null) {
308
- return [match[0], 'comment'];
309
- }
310
- match = input.match(tokenDef.multiline_comment.reg);
311
- if (match !== null) {
312
- return [match[0], 'multiline_comment'];
313
- }
314
- if (input.startsWith(',')) {
315
- return [',', ','];
316
- }
317
- if (input.startsWith('.')) {
318
- return ['.', '.'];
319
- }
320
- if (input.startsWith('(')) {
321
- return ['(', '('];
322
- }
323
- if (input.startsWith(')')) {
324
- return [')', ')'];
325
- }
326
- if (input.startsWith('{')) {
327
- return ['{', '{'];
328
- }
329
- if (input.startsWith('}')) {
330
- return ['}', '}'];
331
- }
332
- if (input.startsWith('>')) {
333
- return ['>', '>'];
334
- }
335
- if (input.startsWith('<')) {
336
- return ['<', '<'];
337
- }
338
- match = input.match(tokenDef.name.reg);
339
- if (match !== null) {
340
- return [match[0], 'name'];
341
- }
342
- match = input.match(tokenDef.math_operator.reg);
343
- if (match !== null) {
344
- return [match[0], 'math_operator'];
345
- }
346
- if (input.startsWith('!')) {
347
- return ['!', 'unary'];
348
- }
349
- if (input.startsWith('=')) {
350
- return ['=', '='];
351
- }
352
- if (input.startsWith(':')) {
353
- return [':', 'colon'];
354
- }
355
- if (input.startsWith(`
356
- `)) {
357
- return [`
358
- `, 'newline'];
359
- }
360
- match = tokenDef.str.func(input, stream);
361
- if (match !== undefined) {
362
- return [match, 'str'];
363
- }
364
- match = tokenDef.w.func(input, stream);
365
- if (match !== undefined) {
366
- return [match, 'w'];
367
- }
368
- match = input.match(tokenDef.W.reg);
369
- if (match !== null) {
370
- return [match[0], 'W'];
371
- }
372
- return [null, 'W'];
373
- }
374
- function tokenize(tokenDef, input) {
375
- const stream = [];
376
- let lastToken;
377
- let key;
378
- let candidate = null;
379
- const len = input.length;
380
- let char = 0;
381
- let index = 0;
382
- let line = 0;
383
- let column = 0;
384
- while (char < len) {
385
- [candidate, key] = _tokenize(tokenDef, input, stream);
386
- if (candidate !== null) {
387
- lastToken = {
388
- type: key,
389
- value: candidate,
390
- start: char,
391
- stream_index: index,
392
- len: candidate.length,
393
- lineStart: line,
394
- columnStart: column,
395
- };
396
- const lines = candidate.split('\n');
397
- if (lines.length > 1) {
398
- line += lines.length - 1;
399
- column = lines[lines.length - 1].length;
400
- } else {
401
- column += candidate.length;
402
- }
403
- lastToken.lineEnd = line;
404
- lastToken.columnEnd = column;
405
- stream.push(lastToken);
406
- index++;
407
- char += candidate.length;
408
- input = input.substr(candidate.length);
409
- } else {
410
- if (stream.length === 0) {
411
- throw new Error('Tokenizer error: total match failure');
412
- }
413
- if (lastToken) {
414
- lastToken.pointer += lastToken.value.length;
415
- }
416
- let msg = `Tokenizer error, no matching token found for ${input.slice(0, 26)}`;
417
- if (lastToken) {
418
- msg += `Before token of type ${lastToken.type}: ${lastToken.value}`;
419
- }
420
- const error = new Error(msg);
421
- error.token = lastToken;
422
- throw error;
423
- }
424
- }
425
- stream.push({
426
- type: 'EOS', value: '<End Of Stream>', char, index,
427
- });
428
- return stream;
429
- }
430
-
431
- module.exports = {
432
- parse: (stream) => {
433
- best_failure = null;
434
- best_failure_index = 0;
435
- best_failure_array = [];
436
- cache = {};
437
- cacheR = {};
438
- const result = START(stream, 0);
439
- if (!result) {
440
- return best_failure;
441
- }
442
- return result;
443
- },
444
- tokenize,
445
- };
package/tests/test.js DELETED
@@ -1,31 +0,0 @@
1
- const parser = require('./parser');
2
- const { tokensDefinition } = require('./tokensDefinition');
3
- const { grammar } = require('./grammar');
4
- const { displayError } = require('../utils');
5
-
6
- function parse(code) {
7
- const tokens = parser.tokenize(tokensDefinition, code);
8
- const ast = parser.parse(tokens);
9
- if (!ast.success && process.env.DEBUG) {
10
- displayError(tokens, tokensDefinition, grammar, ast);
11
- }
12
- return ast;
13
- }
14
-
15
- test('correct grammar', () => {
16
- const code = `9+9
17
- 1+1+2
18
- `;
19
- const result = parse(code);
20
- expect(result.success).toBe(true);
21
- });
22
-
23
- test('incorrect', () => {
24
- const code = `9+9
25
- 1+1
26
- 1+2
27
- 1 + 4
28
- `;
29
- const result = parse(code);
30
- expect(result.success).toBe(false);
31
- });
@@ -1,50 +0,0 @@
1
-
2
- function strDef(input) {
3
- let i; let ch;
4
- const first = input.charAt(0);
5
- if (first === '"' || first === "'" || first === '`') {
6
- i = 1;
7
- while (input.charAt(i)) {
8
- ch = input.charAt(i);
9
- if (ch === '\\') {
10
- i++;
11
- } else if (ch === first) {
12
- return input.slice(0, i + 1);
13
- }
14
- i++;
15
- }
16
- }
17
- }
18
-
19
- function singleSpace(input) {
20
- if (input[0] === ' ' && input[1] !== ' ') {
21
- return ' ';
22
- }
23
- }
24
-
25
- const tokensDefinition = {
26
- 'number': { reg: /^[0-9]+(\.[0-9]*)?/ },
27
- 'comment': { reg: /^\/\/[^\n]*/, verbose: 'comment' },
28
- 'multiline_comment': { reg: /^\/\*+[^*]*\*+(?:[^/*][^*]*\*+)*\//, verbose: 'comment' },
29
- ',': { str: ',' },
30
- '.': { str: '.' },
31
- '(': { str: '(' },
32
- ')': { str: ')' },
33
- '{': { str: '{' },
34
- '}': { str: '}' },
35
- '>': { str: '>' },
36
- '<': { str: '<' },
37
- 'name': { reg: /^[\w|$|_]+/ },
38
- 'math_operator': { reg: /^(\+|\/|-|\*|\^|~|%)/ },
39
- 'unary': { str: '!' },
40
- '=': { str: '=' },
41
- 'colon': { str: ':' },
42
- 'newline': { str: '\n' },
43
- 'str': { func: strDef, verbose: 'string' },
44
- 'w': { func: singleSpace, verbose: 'single white space' },
45
- 'W': { reg: /^[\s]+/, verbose: 'multiple white spaces' },
46
- };
47
-
48
- module.exports = {
49
- tokensDefinition,
50
- };
package/utils.js DELETED
@@ -1,153 +0,0 @@
1
- const RED = '\x1B[0;31m';
2
- const YELLOW = '\x1B[1;33m';
3
- const NC = '\x1B[0m';
4
-
5
- function replaceInvisibleChars(v) {
6
- v = v.replace(/\r/g, '⏎\r');
7
- v = v.replace(/\n/g, '⏎\n');
8
- v = v.replace(/\t/g, '⇥');
9
- v = v.replace('\xa0', 'nbsp');
10
- return v.replace(/[ ]/g, '␣');
11
- }
12
-
13
- function tokenPosition(token) {
14
- const lineNumber = token.lineStart;
15
- const charNumber = token.columnStart;
16
- const end = charNumber + token.len;
17
- return { lineNumber, charNumber, end };
18
- }
19
-
20
- function streamContext(token, firstToken, stream) {
21
- const index = token.stream_index;
22
- const firstTokenIndex = firstToken.stream_index;
23
- const { lineNumber } = tokenPosition(token);
24
-
25
- let lineNb = 1;
26
- let streamIndex = 0;
27
- let str = NC;
28
-
29
- function char(v) {
30
- if (streamIndex === index) {
31
- return RED + replaceInvisibleChars(v) + NC;
32
- }
33
- if (streamIndex >= firstTokenIndex && streamIndex < index) {
34
- return YELLOW + replaceInvisibleChars(v) + NC;
35
- }
36
- return v;
37
- }
38
-
39
- while (lineNb < (lineNumber + 4) && stream[streamIndex]) {
40
- const v = stream[streamIndex].value;
41
- if (v.match(/\n/)) {
42
- lineNb++;
43
- if (lineNb > (lineNumber + 3)) {
44
- return str;
45
- }
46
- if (lineNb >= (lineNumber - 1)) {
47
- str += `${char(v)}${String(` ${lineNb}`).slice(-5)}: `;
48
- }
49
- } else if (lineNb >= (lineNumber - 1)) {
50
- if (streamIndex === 0) {
51
- str += `\n${String(` ${lineNb}`).slice(-5)}: `;
52
- }
53
- str += char(v);
54
- }
55
- streamIndex++;
56
- }
57
- return str;
58
- }
59
-
60
- function displayError(stream, tokensDefinition, grammar, bestFailure) {
61
- const sub_rules = grammar[bestFailure.rule_name][bestFailure.sub_rule_index];
62
- let rule = '';
63
- const { token } = bestFailure;
64
- const firstToken = bestFailure.first_token;
65
- const positions = tokenPosition(token);
66
- let failingToken = '';
67
- for (let i = 0; i < sub_rules.length; i++) {
68
- let sr = sub_rules[i];
69
- if (tokensDefinition[sr] && tokensDefinition[sr].verbose) {
70
- sr = tokensDefinition[sr].verbose.replace(/\s/g, '-');
71
- }
72
- if (i === bestFailure.sub_rule_token_index) {
73
- rule += `${RED}${sr}${NC} `;
74
- failingToken = `${sr}`;
75
- } else {
76
- rule += `${YELLOW}${sr}${NC} `;
77
- }
78
- }
79
- throw new Error(`
80
- ${RED}Parser error at line ${positions.lineNumber + 1} char ${positions.charNumber} to ${positions.end} ${NC}
81
- Unexpected ${YELLOW}${replaceInvisibleChars(token.value)}${NC}
82
- Best match was at rule ${bestFailure.rule_name}[${bestFailure.sub_rule_index}][${bestFailure.sub_rule_token_index}] ${rule}
83
- token "${YELLOW}${replaceInvisibleChars(token.value)}${NC}" (type:${token.type}) doesn't match rule item ${YELLOW}${failingToken}${NC}
84
- Context:
85
- ${streamContext(token, firstToken, stream)}
86
- `);
87
- }
88
-
89
- function printTree(node, sp) {
90
- if (node.rule_name) {
91
- console.log(`${sp}r ${node.rule_name}(${node.sub_rule_index})`);
92
- } else {
93
- console.log(`${sp}t ${node.type} ${node.value}`);
94
- }
95
-
96
- if (node.children) {
97
- for (let i = 0; i < node.children.length; i++) {
98
- printTree(node.children[i], `${sp} `);
99
- }
100
- }
101
- }
102
-
103
- function checkGrammarAndTokens(grammar, tokensDefinition) {
104
- const gkeys = Object.keys(grammar);
105
- const tkeys = Object.keys(tokensDefinition);
106
- const intersection = gkeys.filter(n => tkeys.indexOf(n) > -1);
107
- if (intersection.length > 0) {
108
- throw new Error(`Grammar and token have keys in common: ${intersection}`);
109
- }
110
- }
111
-
112
- function preprocessGrammar(rules) {
113
- return Object.keys(rules).reduce((accu, key) => {
114
- accu[key] = rules[key].map(
115
- subRule => subRule.map((subRuleItem, index) => {
116
- if (subRuleItem instanceof Function) {
117
- return { function: true, value: subRuleItem };
118
- }
119
- const values = subRuleItem.split(':');
120
- let optional = false;
121
- let repeatable = false;
122
- let leftRecursion = false;
123
- if (values[0].endsWith('?')) {
124
- values[0] = values[0].substring(0, values[0].length - 1);
125
- optional = true;
126
- }
127
- if (values[0].endsWith('*')) {
128
- values[0] = values[0].substring(0, values[0].length - 1);
129
- repeatable = true;
130
- }
131
- if (index === 0 && values[0] === key) {
132
- leftRecursion = true;
133
- }
134
- return {
135
- value: values[0],
136
- alias: values[1],
137
- optional,
138
- repeatable,
139
- leftRecursion,
140
- };
141
- }),
142
- );
143
- return accu;
144
- }, {});
145
- }
146
-
147
- module.exports = {
148
- streamContext,
149
- preprocessGrammar,
150
- checkGrammarAndTokens,
151
- displayError,
152
- printTree,
153
- };