@mattwca/little-parser-lib 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +311 -0
- package/package.json +47 -0
- package/src/index.ts +2 -0
- package/src/parser/ParsingError.ts +14 -0
- package/src/parser/index.ts +3 -0
- package/src/parser/parser.ts +255 -0
- package/src/parser/types.ts +28 -0
- package/src/tokenizer/TokenStream.ts +80 -0
- package/src/tokenizer/Tokenizer.ts +54 -0
- package/src/tokenizer/index.ts +3 -0
- package/src/tokenizer/types.ts +12 -0
package/README.md
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# @mattwca/little-parser-lib
|
|
2
|
+
|
|
3
|
+
A lightweight, flexible TypeScript library for building parsers using parser combinators. Create powerful parsers by combining simple, reusable parsing functions.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🚀 **Parser Combinators**: Build complex parsers from simple building blocks
|
|
8
|
+
- 🔍 **Built-in Tokenizer**: Flexible tokenization with regex and string matching
|
|
9
|
+
- 📝 **TypeScript First**: Full type safety and IntelliSense support
|
|
10
|
+
- 🎯 **Backtracking Support**: Automatic position restoration on parse failures
|
|
11
|
+
- 📦 **Zero Dependencies**: Lightweight with no external runtime dependencies
|
|
12
|
+
- ✨ Packaged with [tsdown](https://tsdown.dev)
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @mattwca/little-parser-lib
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { Tokenizer, TokenStream, anyOf, and, many, runParser } from '@mattwca/little-parser-lib';
|
|
24
|
+
|
|
25
|
+
// 1. Define your tokenizer
|
|
26
|
+
const tokenizer = new Tokenizer()
|
|
27
|
+
.withTokenType('letter', /[a-zA-Z]/)
|
|
28
|
+
.withTokenType('digit', /[0-9]/)
|
|
29
|
+
.withTokenType('whitespace', /\s/);
|
|
30
|
+
|
|
31
|
+
// 2. Tokenize your input
|
|
32
|
+
const tokens = tokenizer.tokenize('hello123');
|
|
33
|
+
const stream = new TokenStream(tokens);
|
|
34
|
+
|
|
35
|
+
// 3. Create a parser using combinators
|
|
36
|
+
const parser = and(
|
|
37
|
+
many(anyOf('letter')),
|
|
38
|
+
many(anyOf('digit'))
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// 4. Run the parser
|
|
42
|
+
const result = runParser(parser, stream);
|
|
43
|
+
console.log(result); // { result: [[...letters], [...digits]] }
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Core Concepts
|
|
47
|
+
|
|
48
|
+
### Tokenizer
|
|
49
|
+
|
|
50
|
+
The `Tokenizer` class converts raw input strings into tokens. Each token has a type, value, and position.
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
const tokenizer = new Tokenizer()
|
|
54
|
+
.withTokenType('number', /[0-9]/)
|
|
55
|
+
.withTokenType('operator', /[+\-*/]/)
|
|
56
|
+
.withTokenType('whitespace', /\s/);
|
|
57
|
+
|
|
58
|
+
const tokens = tokenizer.tokenize('1 + 2');
|
|
59
|
+
// [
|
|
60
|
+
// { type: 'number', value: '1', position: { line: 1, column: 1 } },
|
|
61
|
+
// { type: 'whitespace', value: ' ', position: { line: 1, column: 2 } },
|
|
62
|
+
// { type: 'operator', value: '+', position: { line: 1, column: 3 } },
|
|
63
|
+
// ...
|
|
64
|
+
// ]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Parser Functions
|
|
68
|
+
|
|
69
|
+
A parser function (`ParseFn<T>`) takes a `TokenStream` and returns a `ParserResult<T>`, which can be either:
|
|
70
|
+
- `SuccessfulParserResult<T>`: Contains the parsed result
|
|
71
|
+
- `FailedParserResult`: Contains error message and position
|
|
72
|
+
|
|
73
|
+
## Parser Combinators
|
|
74
|
+
|
|
75
|
+
### `and(...parsers)`
|
|
76
|
+
|
|
77
|
+
Combines multiple parsers in sequence. All parsers must succeed.
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
const parser = and(
|
|
81
|
+
anyOf('keyword'),
|
|
82
|
+
anyOf('identifier'),
|
|
83
|
+
anyOf('semicolon')
|
|
84
|
+
);
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### `or(...parsers)`
|
|
88
|
+
|
|
89
|
+
Tries parsers in order, returns the first successful result. If all fail, returns the deepest error.
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
const parser = or(
|
|
93
|
+
anyOf('keyword'),
|
|
94
|
+
anyOf('identifier'),
|
|
95
|
+
anyOf('operator')
|
|
96
|
+
);
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### `many(parser)`
|
|
100
|
+
|
|
101
|
+
Applies a parser repeatedly until it fails (requires at least one success).
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
const parser = many(anyOf('digit')); // Parses one or more digits
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### `optional(parser, shouldBacktrack?)`
|
|
108
|
+
|
|
109
|
+
Makes a parser optional. Returns `null` if it fails.
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
const parser = optional(anyOf('sign')); // Sign is optional
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### `attempt(parser)`
|
|
116
|
+
|
|
117
|
+
Wraps a parser with automatic backtracking on failure.
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
const parser = attempt(
|
|
121
|
+
and(anyOf('keyword'), anyOf('identifier'))
|
|
122
|
+
);
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### `map(parser, mapFn)`
|
|
126
|
+
|
|
127
|
+
Transforms the result of a parser using a mapping function.
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
const digitParser = anyOf('digit');
|
|
131
|
+
const numberParser = map(
|
|
132
|
+
many(digitParser),
|
|
133
|
+
(tokens) => parseInt(tokens.map(t => t.value).join(''))
|
|
134
|
+
);
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### `label(label, parser)`
|
|
138
|
+
|
|
139
|
+
Adds a custom label to parser errors for better debugging.
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
const parser = label(
|
|
143
|
+
'function declaration',
|
|
144
|
+
and(anyOf('function'), anyOf('identifier'))
|
|
145
|
+
);
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Built-in Parsers
|
|
149
|
+
|
|
150
|
+
### `anyOf(...types)`
|
|
151
|
+
|
|
152
|
+
Parses any token matching the specified type(s).
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
const parser = anyOf('letter', 'digit', 'underscore');
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### `anyExcept(...types)`
|
|
159
|
+
|
|
160
|
+
Parses any token NOT matching the specified type(s).
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
const parser = anyExcept('whitespace', 'newline');
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### `endOfInput()`
|
|
167
|
+
|
|
168
|
+
Ensures the end of input has been reached.
|
|
169
|
+
|
|
170
|
+
```typescript
|
|
171
|
+
const parser = and(
|
|
172
|
+
myMainParser,
|
|
173
|
+
endOfInput() // Ensure nothing left to parse
|
|
174
|
+
);
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Running Parsers
|
|
178
|
+
|
|
179
|
+
### `runParser(parser, tokenStream)`
|
|
180
|
+
|
|
181
|
+
Runs a parser on a token stream. Throws `ParsingError` on failure.
|
|
182
|
+
|
|
183
|
+
```typescript
|
|
184
|
+
try {
|
|
185
|
+
const result = runParser(myParser, tokenStream);
|
|
186
|
+
console.log(result.result);
|
|
187
|
+
} catch (error) {
|
|
188
|
+
if (error instanceof ParsingError) {
|
|
189
|
+
console.error(`Parse error at ${error.position.line}:${error.position.column}`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### `runParserOnString(parser, input, tokenizer)`
|
|
195
|
+
|
|
196
|
+
Convenience method to tokenize and parse in one step.
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
const result = runParserOnString(myParser, 'input string', tokenizer);
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Example: Simple Expression Parser
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import {
|
|
206
|
+
Tokenizer,
|
|
207
|
+
TokenStream,
|
|
208
|
+
anyOf,
|
|
209
|
+
and,
|
|
210
|
+
or,
|
|
211
|
+
many,
|
|
212
|
+
map,
|
|
213
|
+
runParserOnString
|
|
214
|
+
} from '@mattwca/little-parser-lib';
|
|
215
|
+
|
|
216
|
+
// Define tokenizer
|
|
217
|
+
const tokenizer = new Tokenizer()
|
|
218
|
+
.withTokenType('digit', /[0-9]/)
|
|
219
|
+
.withTokenType('plus', '+')
|
|
220
|
+
.withTokenType('minus', '-')
|
|
221
|
+
.withTokenType('whitespace', /\s/);
|
|
222
|
+
|
|
223
|
+
// Define parsers
|
|
224
|
+
const digit = anyOf('digit');
|
|
225
|
+
const number = map(
|
|
226
|
+
many(digit),
|
|
227
|
+
(tokens) => parseInt(tokens.map(t => t.value).join(''))
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
const operator = or(
|
|
231
|
+
anyOf('plus'),
|
|
232
|
+
anyOf('minus')
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
const expression = and(
|
|
236
|
+
number,
|
|
237
|
+
optional(anyOf('whitespace')),
|
|
238
|
+
operator,
|
|
239
|
+
optional(anyOf('whitespace')),
|
|
240
|
+
number
|
|
241
|
+
);
|
|
242
|
+
|
|
243
|
+
// Parse
|
|
244
|
+
const result = runParserOnString(expression, '10 + 5', tokenizer);
|
|
245
|
+
console.log(result.result); // [10, null, {...}, null, 5]
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Error Handling
|
|
249
|
+
|
|
250
|
+
The library provides detailed error messages with position information:
|
|
251
|
+
|
|
252
|
+
```typescript
|
|
253
|
+
try {
|
|
254
|
+
const result = runParser(myParser, stream);
|
|
255
|
+
} catch (error) {
|
|
256
|
+
if (error instanceof ParsingError) {
|
|
257
|
+
console.error(`
|
|
258
|
+
Error: ${error.message}
|
|
259
|
+
Line: ${error.position.line}
|
|
260
|
+
Column: ${error.position.column}
|
|
261
|
+
Position: ${error.position.position}
|
|
262
|
+
`);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## API Reference
|
|
268
|
+
|
|
269
|
+
### Classes
|
|
270
|
+
|
|
271
|
+
- `Tokenizer`: Converts input strings into tokens
|
|
272
|
+
- `TokenStream`: Manages token consumption and backtracking
|
|
273
|
+
- `ParsingError`: Error thrown when parsing fails
|
|
274
|
+
|
|
275
|
+
### Types
|
|
276
|
+
|
|
277
|
+
- `Token`: Represents a single token with type, value, and position
|
|
278
|
+
- `TokenType`: String identifier for token types
|
|
279
|
+
- `ParseFn<T>`: Function that takes a TokenStream and returns ParserResult<T>
|
|
280
|
+
- `ParserResult<T>`: Union of SuccessfulParserResult<T> and FailedParserResult
|
|
281
|
+
|
|
282
|
+
### Combinators
|
|
283
|
+
|
|
284
|
+
- `and(...parsers)`: Sequential combination
|
|
285
|
+
- `or(...parsers)`: Alternative combination
|
|
286
|
+
- `many(parser)`: One or more repetitions
|
|
287
|
+
- `optional(parser)`: Optional parser
|
|
288
|
+
- `attempt(parser)`: Parser with backtracking
|
|
289
|
+
- `map(parser, fn)`: Transform parser result
|
|
290
|
+
- `label(label, parser)`: Add error label
|
|
291
|
+
|
|
292
|
+
### Parsers
|
|
293
|
+
|
|
294
|
+
- `anyOf(...types)`: Match any of specified token types
|
|
295
|
+
- `anyExcept(...types)`: Match any token except specified types
|
|
296
|
+
- `endOfInput()`: Match end of input
|
|
297
|
+
|
|
298
|
+
### Utilities
|
|
299
|
+
|
|
300
|
+
- `runParser(parser, stream)`: Execute parser on token stream
|
|
301
|
+
- `runParserOnString(parser, input, tokenizer)`: Execute parser on string
|
|
302
|
+
- `isSuccessfulResult(result)`: Type guard for successful results
|
|
303
|
+
- `isFailedResult(result)`: Type guard for failed results
|
|
304
|
+
|
|
305
|
+
## License
|
|
306
|
+
|
|
307
|
+
MIT
|
|
308
|
+
|
|
309
|
+
## Author
|
|
310
|
+
|
|
311
|
+
@mattwca
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mattwca/little-parser-lib",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "./dist/index.cjs",
|
|
6
|
+
"module": "./dist/index.mjs",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist",
|
|
17
|
+
"src",
|
|
18
|
+
"README.md"
|
|
19
|
+
],
|
|
20
|
+
"scripts": {
|
|
21
|
+
"clean": "rm -rf dist",
|
|
22
|
+
"build": "npm run clean && tsdown",
|
|
23
|
+
"build:dev": "npm run clean && tsdown --dev"
|
|
24
|
+
},
|
|
25
|
+
"keywords": [
|
|
26
|
+
"parser",
|
|
27
|
+
"combinators",
|
|
28
|
+
"parser-combinators",
|
|
29
|
+
"parsing"
|
|
30
|
+
],
|
|
31
|
+
"author": "@mattwca",
|
|
32
|
+
"license": "MIT",
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@types/jest": "^30.0.0",
|
|
35
|
+
"@types/node": "^25.0.3",
|
|
36
|
+
"jest": "^30.2.0",
|
|
37
|
+
"ts-jest": "^29.4.6",
|
|
38
|
+
"tsdown": "^0.18.3",
|
|
39
|
+
"typescript": "^5.9.3"
|
|
40
|
+
},
|
|
41
|
+
"engines": {
|
|
42
|
+
"node": ">=14"
|
|
43
|
+
},
|
|
44
|
+
"repository": {
|
|
45
|
+
"url": "https://github.com/mattwca/little-parser-lib"
|
|
46
|
+
}
|
|
47
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { ParsingErrorPosition } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a parsing error with a specific message.
|
|
5
|
+
*/
|
|
6
|
+
export class ParsingError extends Error {
|
|
7
|
+
public location: ParsingErrorPosition;
|
|
8
|
+
|
|
9
|
+
constructor(message: string, location: ParsingErrorPosition) {
|
|
10
|
+
super(`Parsing Error [${location.line}:${location.column}]: ${message}`);
|
|
11
|
+
|
|
12
|
+
this.location = location;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { Tokenizer, TokenStream } from "../tokenizer";
|
|
2
|
+
import { Token, TokenType } from "../tokenizer/types";
|
|
3
|
+
import { ParsingError } from "./ParsingError";
|
|
4
|
+
import { FailedParserResult, isFailedResult, isSuccessfulResult, ParseFn, ParserResult, SuccessfulParserResult } from "./types";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Combines multiple parsers in sequence, returning an array of their results.
|
|
8
|
+
* If one of the parsers fails, the entire sequence fails.
|
|
9
|
+
*/
|
|
10
|
+
export function and(...parsers: ParseFn<any>[]): ParseFn<any[]> {
|
|
11
|
+
return (tokenStream: TokenStream) => {
|
|
12
|
+
const results: any[] = [];
|
|
13
|
+
|
|
14
|
+
for (const parser of parsers) {
|
|
15
|
+
const parseResult = parser(tokenStream);
|
|
16
|
+
|
|
17
|
+
if (isFailedResult(parseResult)) {
|
|
18
|
+
return parseResult;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
results.push(parseResult.result);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return { result: results };
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Attempts to run a parser, restoring the token position (backtracking) if it fails.
|
|
30
|
+
*/
|
|
31
|
+
export function attempt<T>(parser: ParseFn<T>): ParseFn<T> {
|
|
32
|
+
return (tokenStream: TokenStream) => {
|
|
33
|
+
tokenStream.storePosition();
|
|
34
|
+
|
|
35
|
+
const result = parser(tokenStream);
|
|
36
|
+
|
|
37
|
+
if (isSuccessfulResult(result)) {
|
|
38
|
+
tokenStream.clearPosition();
|
|
39
|
+
} else {
|
|
40
|
+
tokenStream.restorePosition();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return result;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Makes a given parser optional, returns `null` if it fails.
|
|
49
|
+
*/
|
|
50
|
+
export function optional<T>(parser: ParseFn<T>, shouldBacktrack: boolean = true): ParseFn<T | null> {
|
|
51
|
+
return (tokenStream: TokenStream) => {
|
|
52
|
+
const parseFn = shouldBacktrack ? attempt(parser) : parser;
|
|
53
|
+
const result = parseFn(tokenStream);
|
|
54
|
+
|
|
55
|
+
if (isSuccessfulResult(result)) {
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return { result: null };
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Tries multiple parsers in order, returning the result of the first successful parse.
|
|
65
|
+
* If all parsers fail, returns the error from the parser that got the furthest.
|
|
66
|
+
*/
|
|
67
|
+
export function or<T>(...parsers: ParseFn<T>[]): ParseFn<T> {
|
|
68
|
+
return (tokenStream: TokenStream) => {
|
|
69
|
+
let deepestError = null;
|
|
70
|
+
let deepestErrorPosition = -1;
|
|
71
|
+
|
|
72
|
+
for (const parser of parsers) {
|
|
73
|
+
const tryParse = attempt(parser);
|
|
74
|
+
const result = tryParse(tokenStream);
|
|
75
|
+
|
|
76
|
+
if (isSuccessfulResult(result)) {
|
|
77
|
+
return result as SuccessfulParserResult<T>;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (isFailedResult(result)) {
|
|
81
|
+
const { position } = result;
|
|
82
|
+
|
|
83
|
+
if (position.position > deepestErrorPosition) {
|
|
84
|
+
deepestError = result;
|
|
85
|
+
deepestErrorPosition = position.position;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return deepestError as FailedParserResult;
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Applies a parser repeatedly until it fails, collecting all successful results into an array.
|
|
96
|
+
* If the parser fails on the first attempt, returns a failure.
|
|
97
|
+
*/
|
|
98
|
+
export function many(parser: ParseFn<any>): ParseFn<any> {
|
|
99
|
+
return (tokenStream: TokenStream) => {
|
|
100
|
+
const results: any[] = [];
|
|
101
|
+
|
|
102
|
+
let parseFailure = null;
|
|
103
|
+
|
|
104
|
+
while (true) {
|
|
105
|
+
tokenStream.storePosition();
|
|
106
|
+
|
|
107
|
+
const result = parser(tokenStream);
|
|
108
|
+
if (isSuccessfulResult(result)) {
|
|
109
|
+
results.push(result.result);
|
|
110
|
+
tokenStream.clearPosition();
|
|
111
|
+
} else {
|
|
112
|
+
parseFailure = result;
|
|
113
|
+
tokenStream.restorePosition();
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (parseFailure && results.length === 0) {
|
|
119
|
+
return parseFailure;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return { result: results };
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Labels a parser with a custom error message for better context if it fails.
|
|
128
|
+
*/
|
|
129
|
+
export function label<T>(label: string, parser: ParseFn<T>): ParseFn<T> {
|
|
130
|
+
return (tokenStream: TokenStream) => {
|
|
131
|
+
const result = parser(tokenStream);
|
|
132
|
+
|
|
133
|
+
console.log('label parse result:', result);
|
|
134
|
+
|
|
135
|
+
if (isSuccessfulResult(result)) {
|
|
136
|
+
return result;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const errorMessage = `${label}: ${result.errorMessage}`;
|
|
140
|
+
|
|
141
|
+
console.log('labeled error message:', errorMessage);
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
errorMessage,
|
|
145
|
+
position: result.position,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* In-built utility parser that parses any token except those of the specified type(s).
|
|
152
|
+
*/
|
|
153
|
+
export function anyExcept(...types: TokenType[]): ParseFn<Token> {
|
|
154
|
+
return (tokenStream: TokenStream) => {
|
|
155
|
+
const token = tokenStream.consume();
|
|
156
|
+
|
|
157
|
+
if (!token || types.includes(token.type)) {
|
|
158
|
+
return {
|
|
159
|
+
errorMessage: `Expected any token not of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`,
|
|
160
|
+
position: tokenStream.getPositionForError(),
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return { result: token };
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* In-built utility parser that parses any token of the specified type(s).
|
|
170
|
+
*/
|
|
171
|
+
export function anyOf(...types: TokenType[]): ParseFn<Token> {
|
|
172
|
+
return (tokenStream: TokenStream) => {
|
|
173
|
+
const token = tokenStream.consume();
|
|
174
|
+
|
|
175
|
+
if (!token || !types.includes(token.type)) {
|
|
176
|
+
return {
|
|
177
|
+
errorMessage: `Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`,
|
|
178
|
+
position: tokenStream.getPositionForError(),
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return { result: token }
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* In-built utility parser that ensures the end of input has been reached.
|
|
188
|
+
*/
|
|
189
|
+
export function endOfInput(): ParseFn {
|
|
190
|
+
return (tokenStream: TokenStream) => {
|
|
191
|
+
const token = tokenStream.consume();
|
|
192
|
+
|
|
193
|
+
if (token && token.type !== 'end_of_input') {
|
|
194
|
+
return {
|
|
195
|
+
errorMessage: `Expected end of input, but got token of type ${token.type}`,
|
|
196
|
+
position: tokenStream.getPositionForError(),
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return { result: null };
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export function parseName(): ParseFn<any> {
|
|
205
|
+
return (tokenStream: TokenStream) => {
|
|
206
|
+
const validTokenTypes: TokenType[] = ['letter', 'digit', 'minus', 'underscore'];
|
|
207
|
+
const parseValidToken = anyOf(...validTokenTypes);
|
|
208
|
+
|
|
209
|
+
const parser = and(anyOf('letter'), many(parseValidToken));
|
|
210
|
+
const result = parser(tokenStream);
|
|
211
|
+
|
|
212
|
+
if (isSuccessfulResult(result)) {
|
|
213
|
+
console.log(result);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return result;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Transforms the result of a parser using a given mapping function.
|
|
222
|
+
*/
|
|
223
|
+
export function map<T, U>(parser: ParseFn<T>, mapFn: (value: T) => U): ParseFn<U> {
|
|
224
|
+
return (tokenStream: TokenStream) => {
|
|
225
|
+
const result = parser(tokenStream);
|
|
226
|
+
|
|
227
|
+
if (isSuccessfulResult(result)) {
|
|
228
|
+
return { result: mapFn(result.result) };
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return result;
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Runs a parser on a given TokenStream, throwing an error if parsing fails.
|
|
237
|
+
*/
|
|
238
|
+
export function runParser<T>(parser: ParseFn<T>, tokenStream: TokenStream): ParserResult<T> {
|
|
239
|
+
const test = parser(tokenStream);
|
|
240
|
+
if (isFailedResult(test)) {
|
|
241
|
+
throw new ParsingError(test.errorMessage, test.position);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return test;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Runs a parser on a given input string, using the provided tokenizer to generate tokens.
|
|
249
|
+
*/
|
|
250
|
+
export function runParserOnString<T>(parser: ParseFn<T>, input: string, tokenizer: Tokenizer): ParserResult<T> {
|
|
251
|
+
const tokens = tokenizer.tokenize(input);
|
|
252
|
+
const stream = new TokenStream(tokens);
|
|
253
|
+
|
|
254
|
+
return runParser<T>(parser, stream);
|
|
255
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { TokenStream } from "../tokenizer";
|
|
2
|
+
|
|
3
|
+
export type ParsingErrorPosition = {
|
|
4
|
+
line: number;
|
|
5
|
+
column: number;
|
|
6
|
+
position: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export type SuccessfulParserResult<T> = {
|
|
10
|
+
result: T;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export type FailedParserResult = {
|
|
14
|
+
errorMessage: string;
|
|
15
|
+
position: ParsingErrorPosition;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export type ParserResult<T> = SuccessfulParserResult<T> | FailedParserResult;
|
|
19
|
+
|
|
20
|
+
export type ParseFn<T = null> = (tokenStream: TokenStream) => ParserResult<T>;
|
|
21
|
+
|
|
22
|
+
export const isSuccessfulResult = <T>(result: ParserResult<T>): result is SuccessfulParserResult<T> => {
|
|
23
|
+
return (result as SuccessfulParserResult<T>).result !== undefined;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const isFailedResult = <T>(result: ParserResult<T>): result is FailedParserResult => {
|
|
27
|
+
return (result as FailedParserResult).errorMessage !== undefined;
|
|
28
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { Token, TokenType } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a stream of tokens for parsing, including methods to consume and peek tokens,
|
|
5
|
+
* as well as state and method for managing the parsing position.
|
|
6
|
+
*/
|
|
7
|
+
export class TokenStream {
|
|
8
|
+
public position: number;
|
|
9
|
+
public positionStack: number[];
|
|
10
|
+
|
|
11
|
+
constructor (private tokens: Token[]) {
|
|
12
|
+
this.position = 0;
|
|
13
|
+
this.positionStack = [];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Peeks at the next token in the stream, without consuming it.
|
|
18
|
+
* @returns The next token, or null if we're at the end of the stream.
|
|
19
|
+
*/
|
|
20
|
+
public peek(): Token | null {
|
|
21
|
+
return this.tokens[this.position] || null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Consumes and returns the next token in the stream.
|
|
26
|
+
* @returns The consumed token, or null if we're at the end of the stream.
|
|
27
|
+
*/
|
|
28
|
+
public consume(): Token | null {
|
|
29
|
+
return this.tokens[this.position++] || null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Consumes a token if it matches the expected type.
|
|
34
|
+
* @returns The consumed token, or null if the next token does not match the expected type.
|
|
35
|
+
*/
|
|
36
|
+
public consumeIf(...types: TokenType[]): Token | null {
|
|
37
|
+
const token = this.peek();
|
|
38
|
+
if (token && types.includes(token.type)) {
|
|
39
|
+
return this.consume();
|
|
40
|
+
}
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Stores the current position in the position stack.
|
|
46
|
+
*/
|
|
47
|
+
public storePosition() {
|
|
48
|
+
this.positionStack.push(this.position);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Clears the last stored position without restoring it.
|
|
53
|
+
*/
|
|
54
|
+
public clearPosition() {
|
|
55
|
+
this.positionStack.pop();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Restores the last stored position from the position stack.
|
|
60
|
+
*/
|
|
61
|
+
public restorePosition() {
|
|
62
|
+
const pos = this.positionStack.pop();
|
|
63
|
+
if (pos !== undefined) {
|
|
64
|
+
this.position = pos;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
public peekRemainder(): string {
|
|
69
|
+
return this.tokens.slice(this.position).map(t => t.value).join('');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
public getPositionForError(): { line: number; column: number, position: number } {
|
|
73
|
+
const tokenToUse = this.peek() || this.tokens[this.tokens.length - 1];
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
position: this.position,
|
|
77
|
+
...tokenToUse.position,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { Token, TokenType } from "./types";
|
|
2
|
+
|
|
3
|
+
export const LetterRegex = /[a-zA-Z]/;
|
|
4
|
+
export const DigitRegex = /[0-9]/;
|
|
5
|
+
export const WhitespaceRegex = /\s+/;
|
|
6
|
+
export const NewLineRegex = /\n/;
|
|
7
|
+
|
|
8
|
+
export class Tokenizer {
|
|
9
|
+
constructor(
|
|
10
|
+
private tokenTypeMatchers: { matcher: string | RegExp, type: TokenType }[] = [],
|
|
11
|
+
private newLineTokenType: TokenType | null = null
|
|
12
|
+
) {}
|
|
13
|
+
|
|
14
|
+
public withTokenType(type: TokenType, matcher: string | RegExp): Tokenizer {
|
|
15
|
+
this.tokenTypeMatchers.push({ matcher, type });
|
|
16
|
+
return this;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
public tokenize(input: string): Token[] {
|
|
20
|
+
const position = {
|
|
21
|
+
line: 1,
|
|
22
|
+
column: 0,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return input.split('').reduce<Token[]>((tokens, char) => {
|
|
26
|
+
position.column += 1;
|
|
27
|
+
|
|
28
|
+
let matched = false;
|
|
29
|
+
for (const { matcher, type } of this.tokenTypeMatchers.values()) {
|
|
30
|
+
if (typeof matcher === 'string' && char === matcher) {
|
|
31
|
+
tokens.push({ type, value: char, position: { ...position }});
|
|
32
|
+
matched = true;
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (matcher instanceof RegExp && matcher.test(char)) {
|
|
37
|
+
tokens.push({ type, value: char, position: { ...position }});
|
|
38
|
+
matched = true;
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (matched && type === this.newLineTokenType) {
|
|
43
|
+
position.line += 1;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!matched) {
|
|
48
|
+
throw new Error(`No token type matched for character: ${char}`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return tokens;
|
|
52
|
+
}, []);
|
|
53
|
+
}
|
|
54
|
+
}
|