meta-parser-generator 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +199 -12
- package/dist/metaParserGenerator.d.ts +22 -0
- package/dist/metaParserGenerator.js +377 -0
- package/dist/metaParserGenerator.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +29 -0
- package/dist/utils.js +185 -0
- package/dist/utils.js.map +1 -0
- package/package.json +14 -5
- package/.editorconfig +0 -8
- package/.eslintrc.js +0 -26
- package/error.png +0 -0
- package/metaParserGenerator.js +0 -312
- package/tests/generateParser.js +0 -9
- package/tests/grammar.js +0 -23
- package/tests/parser.js +0 -445
- package/tests/test.js +0 -31
- package/tests/tokensDefinition.js +0 -50
- package/utils.js +0 -153
package/README.md
CHANGED
|
@@ -1,19 +1,206 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Meta Parser Generator
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
```bash
|
|
4
|
+
npm install meta-parser-generator
|
|
5
|
+
```
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Meta Parser Generator will help you generate an efficient parser using grammar and a token definition.
|
|
8
|
+
Meta programming is used to generate a single self-contained parser file.
|
|
8
9
|
|
|
9
|
-
|
|
10
|
+
## Characteristics
|
|
10
11
|
|
|
11
|
-
*
|
|
12
|
-
*
|
|
12
|
+
* PEG parser (Parsing Expression Grammar) with ordered choice
|
|
13
|
+
* Packrat parsing with memoization for linear time complexity
|
|
14
|
+
* Direct left recursion support using Guido van Rossum's algorithm
|
|
13
15
|
* Parser code is generated from a grammar
|
|
14
|
-
* Good parsing performance
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
|
|
16
|
+
* Good parsing performance (O(n) with memoization)
|
|
17
|
+
* Excellent error reporting with context
|
|
18
|
+
* Small source code (~600 lines of code), no dependencies
|
|
19
|
+
|
|
20
|
+
### Important: Grammar Order Matters
|
|
21
|
+
|
|
22
|
+
Unlike LL or LR parsers, PEG parsers use **ordered choice**. The first matching alternative is selected, and no backtracking occurs across alternatives. This means:
|
|
23
|
+
|
|
24
|
+
```javascript
|
|
25
|
+
// This grammar will NEVER match 'number' because 'name' matches first!
|
|
26
|
+
'VALUE': [
|
|
27
|
+
['name'], // matches ANY identifier including '123abc'
|
|
28
|
+
['number'], // NEVER reached if name is defined as /^[\w]+/
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
// Correct order: more specific rules first
|
|
32
|
+
'VALUE': [
|
|
33
|
+
['number'], // try number first
|
|
34
|
+
['name'], // then try name
|
|
35
|
+
]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## How to generate and use a parser
|
|
39
|
+
|
|
40
|
+
This will generate a mathematical operation parser
|
|
41
|
+
|
|
42
|
+
```javascript
|
|
43
|
+
// generator.js
|
|
44
|
+
const { generateParser } = require('meta-parser-generator');
|
|
45
|
+
const path = require('path');
|
|
46
|
+
|
|
47
|
+
// only 3 possible tokens
|
|
48
|
+
const tokensDefinition = {
|
|
49
|
+
'number': { reg: /^[0-9]+(\.[0-9]*)?/ },
|
|
50
|
+
'math_operator': { reg: /^(\+|-|\*|%)/ },
|
|
51
|
+
'newline': { str: '\n' }
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const grammar = {
|
|
55
|
+
// START is the convention keyword for the entry point of the grammar
|
|
56
|
+
'START': [
|
|
57
|
+
// necessary to accept the first line be MATH
|
|
58
|
+
['MATH', 'LINE*', 'EOS'], // EOS is the End Of Stream token, added automatically by the tokenizer
|
|
59
|
+
// * is the repeating modifier {0,∞}. Better than recursion as it does not use the call stack
|
|
60
|
+
['LINE*', 'EOS'],
|
|
61
|
+
],
|
|
62
|
+
'LINE': [
|
|
63
|
+
// we define a line as always starting with a newline
|
|
64
|
+
['newline', 'MATH'],
|
|
65
|
+
['newline'],
|
|
66
|
+
],
|
|
67
|
+
'MATH': [
|
|
68
|
+
// direct left recursion here
|
|
69
|
+
['MATH', 'math_operator', 'number'],
|
|
70
|
+
['number'],
|
|
71
|
+
],
|
|
72
|
+
};
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Then execute this script `node generate.js`
|
|
76
|
+
|
|
77
|
+
```javascript
|
|
78
|
+
// generate.js
|
|
79
|
+
const { tokensDefinition, grammar } = require('./generator');
|
|
80
|
+
// this generate the executable parser file
|
|
81
|
+
generateParser(grammar, tokensDefinition, path.resolve(__dirname, './parser.js'));
|
|
82
|
+
console.log('parser generated');
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Then you can use the generated parser this way
|
|
86
|
+
|
|
87
|
+
```javascript
|
|
88
|
+
const parser = require('./parser');
|
|
89
|
+
const { tokensDefinition, grammar } = require('./generator');
|
|
90
|
+
const { displayError } = require('meta-parser-generator');
|
|
91
|
+
|
|
92
|
+
function parse(input) {
|
|
93
|
+
const tokens = parser.tokenize(tokensDefinition, input);
|
|
94
|
+
const ast = parser.parse(tokens);
|
|
95
|
+
if (!ast.success) {
|
|
96
|
+
displayError(tokens, tokensDefinition, grammar, ast);
|
|
97
|
+
}
|
|
98
|
+
return ast;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
let ast = parse('9+10-190.3');
|
|
102
|
+
console.log(ast)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### How does the generated parser work?
|
|
106
|
+
|
|
107
|
+
Each grammar rule you write is transformed into a function, and those grammar functions call each other until the input parsing is successful. The parser uses:
|
|
108
|
+
|
|
109
|
+
1. **PEG Ordered Choice**: For each rule with multiple alternatives, tries them in order and returns the first match
|
|
110
|
+
2. **Packrat Parsing**: Memoization prevents re-parsing the same position, guaranteeing O(n) time complexity
|
|
111
|
+
3. **Left Recursion Handling**: Uses a special memoization strategy based on Guido van Rossum's algorithm
|
|
112
|
+
|
|
113
|
+
The JavaScript call stack is used by the generated parser. So, if you design a very recursive grammar, you might trigger a "Maximum call stack size exceeded" error for a large input.
|
|
114
|
+
|
|
115
|
+
In our example, the `MATH` grammar rule has left recursion, allowing you to parse expressions like 1+2+3+4+5+...X, where X is limited by V8's stack size.
|
|
116
|
+
|
|
117
|
+
To find out the default maximum stack size of V8, run `node --v8-options | grep stack-size`. If the default size is not enough, you can extend it or rewrite your grammar.
|
|
118
|
+
|
|
119
|
+
**Best practice**: Use modifiers (`*`, `+`, `?`) instead of recursion when possible - they don't use the call stack and handle large inputs better.
|
|
120
|
+
|
|
121
|
+
**Note**: For very large files, the memoization cache can grow significantly. The parser clears the cache between parse calls, but memory usage during parsing is proportional to input size × grammar complexity.
|
|
122
|
+
|
|
123
|
+
### AST interface
|
|
124
|
+
|
|
125
|
+
```typescript
|
|
126
|
+
type ASTNode = RuleNode | Token
|
|
127
|
+
|
|
128
|
+
export interface RuleNode {
|
|
129
|
+
stream_index: number // position of the first token for this rule in the token stream
|
|
130
|
+
type: string // name of the rule
|
|
131
|
+
sub_rule_index: number // index of this grammar rule in the sub_rule_index array
|
|
132
|
+
children: [ASTNode] // list of children
|
|
133
|
+
named: { [key: string]: ASTNode; } // named elements withing this rule, see named aliases
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
At the leaf of the AST you will find the final tokens. They have a slightly different interface
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
export interface Token {
|
|
141
|
+
stream_index: number // position of the token in the token stream
|
|
142
|
+
type: string // name of token
|
|
143
|
+
value: string // the value of the token
|
|
144
|
+
len: number // shortcut for value.length
|
|
145
|
+
line_start: number // line start position in the input
|
|
146
|
+
column_start: number // column start position in the input
|
|
147
|
+
start: number // character start position in the input
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Modifiers
|
|
152
|
+
|
|
153
|
+
There is 3 modifiers you can add at the end of a rule or token
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
* is the {0,∞} repeating modifier
|
|
157
|
+
+ is the {1,∞} repeating modifier
|
|
158
|
+
? is the {0,1} conditional modifier
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
#### Example
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
['PREPOSITION', 'ADJECTIVE*', 'NAME']
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Named alias
|
|
168
|
+
|
|
169
|
+
To facilitate your work with the AST, you can name a rule or a token using a colon
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
'MATH': [
|
|
173
|
+
['MATH', 'math_operator:operator', 'number:num'], // tokens math_operator and number
|
|
174
|
+
// are named with operator and num
|
|
175
|
+
['number:num'], // here only number is named with num
|
|
176
|
+
]
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Then in the corresponding `RuleNode` you will find the `math_operator` in the children, but also in the named object.
|
|
180
|
+
This is useful to more easily handle and differenciate your grammar rules:
|
|
181
|
+
|
|
182
|
+
```typescript
|
|
183
|
+
// a function that handle both MATH grammar rules defined above
|
|
184
|
+
function handle_MATH_node(node: RuleNode) {
|
|
185
|
+
const named = node.named
|
|
186
|
+
// if there is an operator, we are dealing with sub rule 0
|
|
187
|
+
if(named['operator']) {
|
|
188
|
+
const left_recursion = handle_MATH_node(node.children[0])
|
|
189
|
+
console.log(`${left_recursion} ${named['operator'].value} ${named['num'].value}`)
|
|
190
|
+
} else {
|
|
191
|
+
console.log(named['num'].value)
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
### Errors
|
|
198
|
+
|
|
199
|
+
The util function `displayError` will display detailed informations about a tokenizer or parsing error. The hint given
|
|
200
|
+
is based on the first grammar rule found that consume the most token from the stream.
|
|
18
201
|
|
|
19
202
|
<img src="/error.png" width="800">
|
|
203
|
+
|
|
204
|
+
## Projects using this parser
|
|
205
|
+
|
|
206
|
+
* The Blop language https://github.com/batiste/blop-language
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { TokensDefinition, Grammar } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Generates the tokenizer function code as an array of strings
|
|
4
|
+
* @param tokenDef - Token definitions mapping token names to patterns
|
|
5
|
+
* @returns Array of code lines for the tokenizer
|
|
6
|
+
*/
|
|
7
|
+
export declare function generateTokenizer(tokenDef: TokensDefinition): string[];
|
|
8
|
+
/**
|
|
9
|
+
* Generates the complete parser code from grammar and token definitions
|
|
10
|
+
* @param grammar - Grammar rules defining the language structure
|
|
11
|
+
* @param tokensDef - Token definitions mapping token names to patterns
|
|
12
|
+
* @param debug - Whether to include debug logging in generated code
|
|
13
|
+
* @returns Array of code lines for the complete parser
|
|
14
|
+
*/
|
|
15
|
+
export declare function generate(grammar: Grammar, tokensDef: TokensDefinition, debug: boolean): string[];
|
|
16
|
+
/**
|
|
17
|
+
* Generates a parser file from grammar and token definitions
|
|
18
|
+
* @param grammar - Grammar rules defining the language structure
|
|
19
|
+
* @param tokensDefinition - Token definitions mapping token names to patterns
|
|
20
|
+
* @param filename - Output path for the generated parser file
|
|
21
|
+
*/
|
|
22
|
+
export declare function generateParser(grammar: Grammar, tokensDefinition: TokensDefinition, filename: string): void;
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.generateParser = exports.generate = exports.generateTokenizer = void 0;
|
|
27
|
+
const fs = __importStar(require("fs"));
|
|
28
|
+
const utils_1 = require("./utils");
|
|
29
|
+
const recordFailure = `
|
|
30
|
+
let best_failure;
|
|
31
|
+
let best_failure_array = [];
|
|
32
|
+
let best_failure_index = 0;
|
|
33
|
+
|
|
34
|
+
// Records parsing failures at the deepest position reached
|
|
35
|
+
// Collects all failures at the same position to potentially show "expected: X, Y, or Z"
|
|
36
|
+
function record_failure(failure, i) {
|
|
37
|
+
// New deepest position reached - reset tracking
|
|
38
|
+
if (i > best_failure_index) {
|
|
39
|
+
best_failure_array = [];
|
|
40
|
+
best_failure = null;
|
|
41
|
+
best_failure_index = i;
|
|
42
|
+
}
|
|
43
|
+
// Record this failure
|
|
44
|
+
best_failure_array.push(failure);
|
|
45
|
+
// Keep first failure as primary for error messages
|
|
46
|
+
if (!best_failure) {
|
|
47
|
+
best_failure = failure;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
// Memoization cache for regular rules
|
|
53
|
+
// Note: For very large inputs, this cache can grow to O(n * m) where:
|
|
54
|
+
// n = input size, m = number of grammar rules
|
|
55
|
+
// Cache is cleared between parse() calls to prevent memory leaks
|
|
56
|
+
let cache = {};
|
|
57
|
+
|
|
58
|
+
function memoize(name, func) {
|
|
59
|
+
return function memoize_inner(stream, index) {
|
|
60
|
+
const key = \`\${name}-\${index}\`;
|
|
61
|
+
let value = cache[key];
|
|
62
|
+
if (value !== undefined) {
|
|
63
|
+
return value;
|
|
64
|
+
}
|
|
65
|
+
value = func(stream, index);
|
|
66
|
+
cache[key] = value;
|
|
67
|
+
return value;
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Separate cache for left-recursive rules
|
|
72
|
+
let cacheR = {};
|
|
73
|
+
|
|
74
|
+
// based on https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1
|
|
75
|
+
function memoize_left_recur(name, func) {
|
|
76
|
+
return function memoize_inner(stream, index) {
|
|
77
|
+
const key = \`\${name}-\${index}\`;
|
|
78
|
+
let value = cacheR[key];
|
|
79
|
+
if (value !== undefined) {
|
|
80
|
+
return value;
|
|
81
|
+
}
|
|
82
|
+
// prime this rule with a failure
|
|
83
|
+
cacheR[key] = false;
|
|
84
|
+
let lastpos;
|
|
85
|
+
let lastvalue = value;
|
|
86
|
+
while (true) {
|
|
87
|
+
value = func(stream, index);
|
|
88
|
+
if (!value) break;
|
|
89
|
+
if (value.last_index <= lastpos) break;
|
|
90
|
+
lastpos = value.last_index;
|
|
91
|
+
lastvalue = value;
|
|
92
|
+
cacheR[key] = value;
|
|
93
|
+
}
|
|
94
|
+
return lastvalue;
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
`;
|
|
99
|
+
/**
|
|
100
|
+
* Generates the tokenizer function code as an array of strings
|
|
101
|
+
* @param tokenDef - Token definitions mapping token names to patterns
|
|
102
|
+
* @returns Array of code lines for the tokenizer
|
|
103
|
+
*/
|
|
104
|
+
function generateTokenizer(tokenDef) {
|
|
105
|
+
const output = [];
|
|
106
|
+
const keys = Object.keys(tokenDef);
|
|
107
|
+
for (let i = 0; i < keys.length; i++) {
|
|
108
|
+
const key = keys[i];
|
|
109
|
+
if ((/:|\?/g).test(key)) {
|
|
110
|
+
throw new Error('Reserved word in token name');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
output.push('function _tokenize(tokenDef, input, char, stream) {');
|
|
114
|
+
output.push(' let match;');
|
|
115
|
+
let key;
|
|
116
|
+
for (let i = 0; i < keys.length; i++) {
|
|
117
|
+
key = keys[i];
|
|
118
|
+
const token = tokenDef[key];
|
|
119
|
+
if (token.str) {
|
|
120
|
+
const strLen = token.str.length;
|
|
121
|
+
if (token.str.indexOf("'") > -1 || token.str.indexOf('\n') > -1) {
|
|
122
|
+
output.push(` if (input.substr(char, ${strLen}) === \`${token.str}\`) {`);
|
|
123
|
+
output.push(` return [\`${token.str}\`, '${key}'];`);
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
output.push(` if (input.substr(char, ${strLen}) === '${token.str}') {`);
|
|
127
|
+
output.push(` return ['${token.str}', '${key}'];`);
|
|
128
|
+
}
|
|
129
|
+
output.push(' }');
|
|
130
|
+
}
|
|
131
|
+
else if (token.reg) {
|
|
132
|
+
output.push(` match = input.substring(char).match(tokenDef.${key}.reg);`);
|
|
133
|
+
output.push(' if (match !== null) {');
|
|
134
|
+
output.push(` return [match[0], '${key}'];`);
|
|
135
|
+
output.push(' }');
|
|
136
|
+
}
|
|
137
|
+
else if (token.func) {
|
|
138
|
+
output.push(` match = tokenDef.${key}.func(input.substring(char), stream);`);
|
|
139
|
+
output.push(' if (match !== undefined) {');
|
|
140
|
+
output.push(` return [match, '${key}'];`);
|
|
141
|
+
output.push(' }');
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
throw new Error(`Tokenizer error: Invalid token ${key} without a reg, str or func property`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
output.push(` return [null, '${key}'];`);
|
|
148
|
+
output.push('}');
|
|
149
|
+
output.push('function tokenize(tokenDef, input) {');
|
|
150
|
+
output.push(` const stream = [];
|
|
151
|
+
const originalInput = input;
|
|
152
|
+
let lastToken;
|
|
153
|
+
let key;
|
|
154
|
+
let candidate = null;
|
|
155
|
+
const len = input.length;
|
|
156
|
+
let char = 0;
|
|
157
|
+
let index = 0;
|
|
158
|
+
let line = 0;
|
|
159
|
+
let column = 0;
|
|
160
|
+
while (char < len) {
|
|
161
|
+
[candidate, key] = _tokenize(tokenDef, originalInput, char, stream);
|
|
162
|
+
if (candidate !== null) {
|
|
163
|
+
const candidateLen = candidate.length;
|
|
164
|
+
lastToken = {
|
|
165
|
+
type: key,
|
|
166
|
+
value: candidate,
|
|
167
|
+
start: char,
|
|
168
|
+
stream_index: index,
|
|
169
|
+
len: candidateLen,
|
|
170
|
+
line_start: line,
|
|
171
|
+
column_start: column,
|
|
172
|
+
};
|
|
173
|
+
// Only split if there might be newlines (optimization)
|
|
174
|
+
if (candidate.indexOf('\\n') !== -1) {
|
|
175
|
+
const lines = candidate.split('\\n');
|
|
176
|
+
line += lines.length - 1;
|
|
177
|
+
column = lines[lines.length - 1].length;
|
|
178
|
+
} else {
|
|
179
|
+
column += candidateLen;
|
|
180
|
+
}
|
|
181
|
+
lastToken.lineEnd = line;
|
|
182
|
+
lastToken.columnEnd = column;
|
|
183
|
+
stream.push(lastToken);
|
|
184
|
+
index++;
|
|
185
|
+
char += candidateLen;
|
|
186
|
+
} else {
|
|
187
|
+
if (stream.length === 0) {
|
|
188
|
+
throw new Error('Tokenizer error: total match failure');
|
|
189
|
+
}
|
|
190
|
+
if (lastToken) {
|
|
191
|
+
lastToken.pointer += lastToken.len;
|
|
192
|
+
}
|
|
193
|
+
let msg = \`Tokenizer error, no matching token found for \${originalInput.slice(char, char + 26)}\`;
|
|
194
|
+
if (lastToken) {
|
|
195
|
+
msg += \` After token of type \${lastToken.type}: \${lastToken.value}\`;
|
|
196
|
+
}
|
|
197
|
+
const error = new Error(msg);
|
|
198
|
+
error.token = lastToken;
|
|
199
|
+
throw error;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
stream.push({
|
|
203
|
+
type: 'EOS', value: '<End Of Stream>', char, index,
|
|
204
|
+
});
|
|
205
|
+
return stream;
|
|
206
|
+
}
|
|
207
|
+
`);
|
|
208
|
+
return output;
|
|
209
|
+
}
|
|
210
|
+
exports.generateTokenizer = generateTokenizer;
|
|
211
|
+
/**
|
|
212
|
+
* Generates code for a specific grammar rule alternative
|
|
213
|
+
* @param name - Name of the grammar rule
|
|
214
|
+
* @param index - Index of this alternative in the rule
|
|
215
|
+
* @param ruleItems - Array of rule items (tokens and sub-rules) in this alternative
|
|
216
|
+
* @param tokensDef - Token definitions
|
|
217
|
+
* @param debug - Whether to include debug logging
|
|
218
|
+
* @returns Array of code lines for this rule function
|
|
219
|
+
*/
|
|
220
|
+
function generatesub_rule_index(name, index, ruleItems, tokensDef, debug) {
|
|
221
|
+
const output = [];
|
|
222
|
+
output.push(`let ${name}_${index} = (stream, index) => {`);
|
|
223
|
+
let i = 0;
|
|
224
|
+
output.push(' let i = index;');
|
|
225
|
+
output.push(' const children = [];');
|
|
226
|
+
output.push(' const named = {};');
|
|
227
|
+
output.push(` const node = {
|
|
228
|
+
children, stream_index: index, name: '${name}',
|
|
229
|
+
sub_rule_index: ${index}, type: '${name}', named,
|
|
230
|
+
};`);
|
|
231
|
+
ruleItems.forEach((rule) => {
|
|
232
|
+
// terminal rule
|
|
233
|
+
if (tokensDef[rule.value] || rule.value === 'EOS') {
|
|
234
|
+
debug ? output.push(' console.log(i, stream[i])') : null;
|
|
235
|
+
if (rule.repeatable) {
|
|
236
|
+
output.push(` while(stream[i].type === '${rule.value}') {`);
|
|
237
|
+
if (rule.alias) {
|
|
238
|
+
output.push(` named['${rule.alias}'] ? null : named['${rule.alias}'] = []`);
|
|
239
|
+
output.push(` named['${rule.alias}'].push(stream[i])`);
|
|
240
|
+
}
|
|
241
|
+
output.push(' children.push(stream[i]); i++;');
|
|
242
|
+
output.push(' }');
|
|
243
|
+
}
|
|
244
|
+
else if (rule.optional) {
|
|
245
|
+
output.push(` if (stream[i].type === '${rule.value}') {`);
|
|
246
|
+
rule.alias ? output.push(` named['${rule.alias}'] = stream[i];`) : null;
|
|
247
|
+
output.push(' children.push(stream[i]); i++;');
|
|
248
|
+
output.push(' }');
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
output.push(`
|
|
252
|
+
if (stream[i].type !== '${rule.value}') {
|
|
253
|
+
if (i >= best_failure_index) {
|
|
254
|
+
const failure = {
|
|
255
|
+
type: '${name}', sub_rule_index: ${index},
|
|
256
|
+
sub_rule_stream_index: i - index, sub_rule_token_index: ${i},
|
|
257
|
+
stream_index: i, token: stream[i], first_token: stream[index], success: false,
|
|
258
|
+
};
|
|
259
|
+
record_failure(failure, i);
|
|
260
|
+
}
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
`);
|
|
264
|
+
rule.alias ? output.push(` named['${rule.alias}'] = stream[i];`) : null;
|
|
265
|
+
output.push(' children.push(stream[i]); i++;');
|
|
266
|
+
}
|
|
267
|
+
i++;
|
|
268
|
+
// calling another rule in the grammar
|
|
269
|
+
}
|
|
270
|
+
else {
|
|
271
|
+
if (rule.function) {
|
|
272
|
+
output.push(` if (!(${rule.value})(node)) { return false; }`);
|
|
273
|
+
}
|
|
274
|
+
else if (rule.repeatable) {
|
|
275
|
+
output.push(` let _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
276
|
+
output.push(` while (_rule_${i}) {`);
|
|
277
|
+
if (rule.alias) {
|
|
278
|
+
output.push(` named['${rule.alias}'] ? null : named['${rule.alias}'] = [];`);
|
|
279
|
+
output.push(` named['${rule.alias}'].push(_rule_${i});`);
|
|
280
|
+
}
|
|
281
|
+
output.push(` children.push(_rule_${i});`);
|
|
282
|
+
output.push(` i = _rule_${i}.last_index;`);
|
|
283
|
+
output.push(` _rule_${i} = ${rule.value}(stream, i);`);
|
|
284
|
+
output.push(' }');
|
|
285
|
+
}
|
|
286
|
+
else if (!rule.optional) {
|
|
287
|
+
output.push(` const _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
288
|
+
output.push(` if (!_rule_${i}) return false;`);
|
|
289
|
+
rule.alias ? output.push(` named['${rule.alias}'] = _rule_${i};`) : null;
|
|
290
|
+
output.push(` children.push(_rule_${i});`);
|
|
291
|
+
output.push(` i = _rule_${i}.last_index;`);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
output.push(` const _rule_${i} = ${rule.value}(stream, i);`); // doing the call
|
|
295
|
+
output.push(` if (_rule_${i}) {`);
|
|
296
|
+
output.push(` children.push(_rule_${i});`);
|
|
297
|
+
rule.alias ? output.push(` named['${rule.alias}'] = _rule_${i};`) : null;
|
|
298
|
+
output.push(` i = _rule_${i}.last_index;`);
|
|
299
|
+
output.push(' }');
|
|
300
|
+
}
|
|
301
|
+
i++;
|
|
302
|
+
}
|
|
303
|
+
});
|
|
304
|
+
output.push(' node.success = i === stream.length; node.last_index = i;');
|
|
305
|
+
output.push(' return node;');
|
|
306
|
+
output.push('};');
|
|
307
|
+
if (ruleItems[0].leftRecursion) {
|
|
308
|
+
output.push(`${name}_${index} = memoize_left_recur('${name}_${index}', ${name}_${index});`);
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
output.push(`${name}_${index} = memoize('${name}_${index}', ${name}_${index});`);
|
|
312
|
+
}
|
|
313
|
+
output.push('\n');
|
|
314
|
+
return output;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Generates the complete parser code from grammar and token definitions
|
|
318
|
+
* @param grammar - Grammar rules defining the language structure
|
|
319
|
+
* @param tokensDef - Token definitions mapping token names to patterns
|
|
320
|
+
* @param debug - Whether to include debug logging in generated code
|
|
321
|
+
* @returns Array of code lines for the complete parser
|
|
322
|
+
*/
|
|
323
|
+
function generate(grammar, tokensDef, debug) {
|
|
324
|
+
let output = [];
|
|
325
|
+
(0, utils_1.checkGrammarAndTokens)(grammar, tokensDef);
|
|
326
|
+
const newGrammar = (0, utils_1.preprocessGrammar)(grammar);
|
|
327
|
+
const entries = Object.keys(newGrammar);
|
|
328
|
+
output.push('// This code is automatically generated by the meta parser, do not modify');
|
|
329
|
+
output.push('// produced with metaParserGenerator.js');
|
|
330
|
+
output.push(recordFailure);
|
|
331
|
+
entries.forEach((key) => {
|
|
332
|
+
let i = 0;
|
|
333
|
+
const metaSub = [];
|
|
334
|
+
newGrammar[key].forEach((ruleItems) => {
|
|
335
|
+
output = output.concat(generatesub_rule_index(key, i, ruleItems, tokensDef, debug));
|
|
336
|
+
metaSub.push(`${key}_${i}`);
|
|
337
|
+
i++;
|
|
338
|
+
});
|
|
339
|
+
output.push(`function ${key}(stream, index) {`);
|
|
340
|
+
const st = metaSub.map(sub => `${sub}(stream, index)`).join('\n || ');
|
|
341
|
+
output.push(` return ${st};`);
|
|
342
|
+
output.push('}');
|
|
343
|
+
});
|
|
344
|
+
output = output.concat(generateTokenizer(tokensDef));
|
|
345
|
+
output.push(`module.exports = {
|
|
346
|
+
parse: (stream) => {
|
|
347
|
+
best_failure = null;
|
|
348
|
+
best_failure_index = 0;
|
|
349
|
+
best_failure_array = [];
|
|
350
|
+
cache = {};
|
|
351
|
+
cacheR = {};
|
|
352
|
+
const result = START(stream, 0);
|
|
353
|
+
if (!result) {
|
|
354
|
+
return {
|
|
355
|
+
...best_failure,
|
|
356
|
+
best_failure_array,
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return result;
|
|
360
|
+
},
|
|
361
|
+
tokenize,
|
|
362
|
+
};
|
|
363
|
+
`);
|
|
364
|
+
return output;
|
|
365
|
+
}
|
|
366
|
+
exports.generate = generate;
|
|
367
|
+
/**
|
|
368
|
+
* Generates a parser file from grammar and token definitions
|
|
369
|
+
* @param grammar - Grammar rules defining the language structure
|
|
370
|
+
* @param tokensDefinition - Token definitions mapping token names to patterns
|
|
371
|
+
* @param filename - Output path for the generated parser file
|
|
372
|
+
*/
|
|
373
|
+
function generateParser(grammar, tokensDefinition, filename) {
|
|
374
|
+
fs.writeFileSync(filename, generate(grammar, tokensDefinition, false).join('\n'));
|
|
375
|
+
}
|
|
376
|
+
exports.generateParser = generateParser;
|
|
377
|
+
//# sourceMappingURL=metaParserGenerator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metaParserGenerator.js","sourceRoot":"","sources":["../metaParserGenerator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AACA,uCAAyB;AACzB,mCAAmE;AAInE,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqErB,CAAC;AAEF;;;;GAIG;AACH,SAAgB,iBAAiB,CAAC,QAA0B;IAC1D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACpC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;YACvB,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;SAChD;KACF;IAED,MAAM,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC5B,IAAI,GAAG,CAAC;IACR,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACpC,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACd,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,KAAK,CAAC,GAAG,EAAE;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;YAChC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE;gBAC/D,MAAM,CAAC,IAAI,CAAC,4BAA4B,MAAM,WAAW,KAAK,CAAC,GAAG,OAAO,CAAC,CAAC;gBAC3E,MAAM,CAAC,IAAI,CAAC,iBAAiB,KAAK,CAAC,GAAG,QAAQ,GAAG,KAAK,CAAC,CAAC;aACzD;iBAAM;gBACL,MAAM,CAAC,IAAI,CAAC,4BAA4B,MAAM,UAAU,KAAK,CAAC,GAAG,MAAM,CAAC,CAAC;gBACzE,MAAM,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,GAAG,OAAO,GAAG,KAAK,CAAC,CAAC;aACvD;YACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACpB;aAAM,IAAI,KAAK,CAAC,GAAG,EAAE;YACpB,MAAM,CAAC,IAAI,CAAC,kDAAkD,GAAG,QAAQ,CAAC,CAAC;YAC3E,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,0BAA0B,GAAG,KAAK,CAAC,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACpB;aAAM,IAAI,KAAK,CAAC,IAAI,EAAE;YACrB,MAAM,CAAC,IAAI,CAAC,sBAAsB,GAAG,uCAAuC,CAAC,CAAC;YAC9E,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YAC5C,MAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,KAAK,CAAC,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACpB;aAAM;YACL,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,sCAAsC,CAAC,CAAC;SAC9F;KACF;IACD,MAAM,CAAC,IAAI,CAAC,oBAAoB,GAAG,KAAK,CAAC,CAAC;IAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEjB,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IACpD,MAAM,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAyDb,CAAC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAvGD,8CAuGC;AAED;;;;;;;;GAQG;AACH,SAAS,sBAAsB,CAC7B,IAAY,EACZ,KAAa,EACb,SAA0B,EAC1B,SAA2B,EAC3B,KAAc;IAEd,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,KAAK,yBAAyB,CAAC,CAAC;IAC3D,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAChC,MAAM,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACtC,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACnC,MAAM,CAAC,IAAI,CAAC;4CAC8B,IAAI;sBAC1B,KAAK,YAAY,IAAI;KACtC,CAAC,CAAC;IACL,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QACzB,gBAAgB;QAChB,IAAI,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,KAAK,KAAK,EAAE;YACjD,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1D,IAAI,IAAI,CAAC,UAAU,EAAE;gBACnB,MAAM,CAAC,IAAI,CAAC,+BAA+B,IAAI,CAAC,KAAK,MAAM,CAAC,CAAC;gBAC7D,IAAI,IAAI,CAAC,KAAK,EAAE;oBACd,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,sBAAsB,IAAI,CAAC,KAAK,SAAS,CAAC,CAAC;oBAC/E,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,oBAAoB,CAAC,CAAC;iBAC3D;gBACD,MAAM,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;gBAClD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;iBAAM,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACxB,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,KAAK,MAAM,CAAC,CAAC;gBAC3D,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,iBAAiB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC3E,MAAM,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;gBAClD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;iBAAM;gBACL,MAAM,CAAC,IAAI,CAAC;4BACQ,IAAI,CAAC,KAAK;;;iBAGrB,IAAI,sBAAsB,KAAK;kEACkB,CAAC;;;;;;;CAOlE,CAAC,CAAC;gBACK,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,KAAK,iBAAiB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBACzE,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;aACjD;YACD,CAAC,EAAE,CAAC;YACN,sCAAsC;SACrC;aAAM;YACL,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACjB,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,KAAK,4BAA4B,CAAC,CAAC;aAChE;iBAAM,IAAI,IAAI,CAAC,UAAU,EAAE;gBAC1B,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,IAAI,CAAC,KAAK,cAAc,CAAC,CAAC,CAAC,iBAAiB;gBAC9E,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;gBACtC,IAAI,IAAI,CAAC,KAAK,EAAE;oBACd,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,sBAAsB,IAAI,CAAC,KAAK,UAAU,CAAC,CAAC;oBAChF,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,iBAAiB,CAAC,IAAI,CAAC,CAAC;iBAC7D;gBACD,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,IAAI,CAAC,CAAC;gBAC9C,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,cAAc,CAAC,CAAC;gBAC9C,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,IAAI,CAAC,KAAK,cAAc,CAAC,CAAC;gBAC1D,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;iBAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBACzB,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,IAAI,CAAC,KAAK,cAAc,CAAC,CAAC,CAAC,iBAAiB;gBAChF,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,iBAAiB,CAAC,CAAC;gBAChD,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,KAAK,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC1E,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,CAAC;gBAC5C,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;aAC7C;iBAAM;gBACL,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,IAAI,CAAC,KAAK,cAAc,CAAC,CAAC,CAAC,iBAAiB;gBAChF,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;gBACnC,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,IAAI,CAAC,CAAC;gBAC9C,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,KAAK,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC5E,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,cAAc,CAAC,CAAC;gBAC9C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;YACD,CAAC,EAAE,CAAC;SACL;IACH,CAAC,CAAC,CAAC;IACH,MAAM,CAAC,IAAI,CAAC,4DAA4D,CAAC,CAAC;IAC1E,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC9B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClB,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,aAAa,EAAE;QAC9B,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,KAAK,0BAA0B,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,IAAI,CAAC,CAAC;KAC7F;SAAM;QACL,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,KAAK,eAAe,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,IAAI,CAAC,CAAC;KAClF;IACD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,QAAQ,CAAC,OAAgB,EAAE,SAA2B,EAAE,KAAc;IACpF,IAAI,MAAM,GAAa,EAAE,CAAC;IAC1B,IAAA,6BAAqB,EAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,IAAA,yBAAiB,EAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACxC,MAAM,CAAC,IAAI,CAAC,2EAA2E,CAAC,CAAC;IACzF,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IACvD,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;QACtB,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,UAAU,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE;YACpC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,GAAG,EAAE,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC;YACpF,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC;YAC5B,CAAC,EAAE,CAAC;QACN,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,YAAY,GAAG,mBAAmB,CAAC,CAAC;QAChD,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,iBAAiB,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzE,MAAM,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC;IACH,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC,CAAC;IACrD,MAAM,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;CAkBb,CAAC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AA1CD,4BA0CC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAgB,EAAE,gBAAkC,EAAE,QAAgB;IACnG,EAAE,CAAC,aAAa,CAAC,QAAQ,EACvB,QAAQ,CAAC,OAAO,EAAE,gBAAgB,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3D,CAAC;AAHD,wCAGC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export type ASTNode = RuleNode | Token;
|
|
2
|
+
export interface RuleNode {
|
|
3
|
+
stream_index: number;
|
|
4
|
+
type: string;
|
|
5
|
+
sub_rule_index: number;
|
|
6
|
+
children: [ASTNode];
|
|
7
|
+
named: {
|
|
8
|
+
[key: string]: ASTNode;
|
|
9
|
+
};
|
|
10
|
+
}
|
|
11
|
+
export interface Token {
|
|
12
|
+
stream_index: number;
|
|
13
|
+
type: string;
|
|
14
|
+
value: string;
|
|
15
|
+
len: number;
|
|
16
|
+
line_start: number;
|
|
17
|
+
column_start: number;
|
|
18
|
+
start: number;
|
|
19
|
+
}
|
|
20
|
+
export interface TokenDefinition {
|
|
21
|
+
str?: string;
|
|
22
|
+
reg?: RegExp;
|
|
23
|
+
func?: (input: string, stream: Token[]) => string | undefined;
|
|
24
|
+
verbose?: string;
|
|
25
|
+
}
|
|
26
|
+
export type TokensDefinition = Record<string, TokenDefinition>;
|
|
27
|
+
export type Grammar = Record<string, string[][]>;
|
|
28
|
+
export interface ProcessedRule {
|
|
29
|
+
value: string;
|
|
30
|
+
alias?: string;
|
|
31
|
+
optional: boolean;
|
|
32
|
+
repeatable: boolean;
|
|
33
|
+
leftRecursion: boolean;
|
|
34
|
+
function?: boolean;
|
|
35
|
+
}
|
|
36
|
+
export type ProcessedGrammar = Record<string, ProcessedRule[][]>;
|
|
37
|
+
export interface ParseFailure {
|
|
38
|
+
type: string;
|
|
39
|
+
sub_rule_index: number;
|
|
40
|
+
sub_rule_stream_index: number;
|
|
41
|
+
sub_rule_token_index: number;
|
|
42
|
+
stream_index: number;
|
|
43
|
+
token: Token;
|
|
44
|
+
first_token: Token;
|
|
45
|
+
success: false;
|
|
46
|
+
best_failure_array?: ParseFailure[];
|
|
47
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":""}
|