@mattwca/little-parser-lib 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -60
- package/package.json +1 -1
- package/src/index.ts +1 -2
- package/src/parsers/ParserError.ts +19 -0
- package/src/parsers/combinators.ts +195 -0
- package/src/parsers/index.ts +4 -2
- package/src/parsers/parsers.ts +5 -193
- package/src/parsers/runners.ts +34 -0
- package/src/parsers/types.ts +2 -7
- package/src/parsers/ParsingError.ts +0 -14
- package/src/utils.ts +0 -17
package/README.md
CHANGED
|
@@ -8,8 +8,9 @@ A lightweight, flexible TypeScript library for building parsers using parser com
|
|
|
8
8
|
- 🔍 **Built-in Tokenizer**: Flexible tokenization with regex and string matching
|
|
9
9
|
- 📝 **TypeScript First**: Full type safety and IntelliSense support
|
|
10
10
|
- 🎯 **Backtracking Support**: Automatic position restoration on parse failures
|
|
11
|
+
- 🔒 **Infinite Loop Protection**: `many` combinator detects non-progressing parsers
|
|
11
12
|
- 📦 **Zero Dependencies**: Lightweight with no external runtime dependencies
|
|
12
|
-
- ✨ **Widely Compatible**: Packaged with [tsdown](https://tsdown.dev)
|
|
13
|
+
- ✨ **Widely Compatible**: Packaged with [tsdown](https://tsdown.dev) for ESM and CJS support
|
|
13
14
|
|
|
14
15
|
## Installation
|
|
15
16
|
|
|
@@ -70,11 +71,29 @@ A parser function (`ParseFn<T>`) takes a `TokenStream` and returns a `ParserResu
|
|
|
70
71
|
- `SuccessfulParserResult<T>`: Contains the parsed result
|
|
71
72
|
- `FailedParserResult`: Contains error message and position
|
|
72
73
|
|
|
74
|
+
### TokenStream
|
|
75
|
+
|
|
76
|
+
The `TokenStream` class manages the token consumption and position tracking during parsing:
|
|
77
|
+
|
|
78
|
+
**Core Methods:**
|
|
79
|
+
- `peek()`: Look at the next token without consuming it
|
|
80
|
+
- `consume()`: Consume and return the next token
|
|
81
|
+
- `consumeIf(...types)`: Conditionally consume a token if it matches the specified types
|
|
82
|
+
|
|
83
|
+
**Position Management:**
|
|
84
|
+
- `storePosition()`: Save current position to the stack (for backtracking)
|
|
85
|
+
- `clearPosition()`: Remove the most recent saved position
|
|
86
|
+
- `restorePosition()`: Restore to the most recent saved position
|
|
87
|
+
|
|
88
|
+
**Utility Methods:**
|
|
89
|
+
- `peekRemainder()`: Get all remaining tokens as a string
|
|
90
|
+
- `getPositionForError()`: Get current position info for error reporting
|
|
91
|
+
|
|
73
92
|
## Parser Combinators
|
|
74
93
|
|
|
75
94
|
### `and(...parsers)`
|
|
76
95
|
|
|
77
|
-
Combines multiple parsers in sequence. All parsers must succeed.
|
|
96
|
+
Combines multiple parsers in sequence. All parsers must succeed. Returns a tuple array preserving the types of each parser's result.
|
|
78
97
|
|
|
79
98
|
```typescript
|
|
80
99
|
const parser = and(
|
|
@@ -82,6 +101,7 @@ const parser = and(
|
|
|
82
101
|
anyOf('identifier'),
|
|
83
102
|
anyOf('semicolon')
|
|
84
103
|
);
|
|
104
|
+
// Result type is [Token, Token, Token]
|
|
85
105
|
```
|
|
86
106
|
|
|
87
107
|
### `or(...parsers)`
|
|
@@ -98,7 +118,7 @@ const parser = or(
|
|
|
98
118
|
|
|
99
119
|
### `many(parser)`
|
|
100
120
|
|
|
101
|
-
Applies a parser repeatedly until it fails
|
|
121
|
+
Applies a parser repeatedly until it fails or stops making progress. Requires at least one successful match. Includes infinite loop protection by detecting when the parser doesn't advance the token position.
|
|
102
122
|
|
|
103
123
|
```typescript
|
|
104
124
|
const parser = many(anyOf('digit')); // Parses one or more digits
|
|
@@ -178,15 +198,15 @@ const parser = and(
|
|
|
178
198
|
|
|
179
199
|
### `runParser(parser, tokenStream)`
|
|
180
200
|
|
|
181
|
-
Runs a parser on a token stream. Throws `
|
|
201
|
+
Runs a parser on a token stream. Throws `ParserError` on failure.
|
|
182
202
|
|
|
183
203
|
```typescript
|
|
184
204
|
try {
|
|
185
205
|
const result = runParser(myParser, tokenStream);
|
|
186
206
|
console.log(result.result);
|
|
187
207
|
} catch (error) {
|
|
188
|
-
if (error instanceof
|
|
189
|
-
console.error(`Parse error at ${error.
|
|
208
|
+
if (error instanceof ParserError) {
|
|
209
|
+
console.error(`Parse error at ${error.location.line}:${error.location.column}`);
|
|
190
210
|
}
|
|
191
211
|
}
|
|
192
212
|
```
|
|
@@ -199,57 +219,18 @@ Convenience method to tokenize and parse in one step.
|
|
|
199
219
|
const result = runParserOnString(myParser, 'input string', tokenizer);
|
|
200
220
|
```
|
|
201
221
|
|
|
202
|
-
## Utilities
|
|
203
|
-
|
|
204
|
-
The library provides utility functions to help with common parser result manipulation tasks.
|
|
205
|
-
|
|
206
|
-
### `unwrapResult(items)`
|
|
207
|
-
|
|
208
|
-
Flattens nested arrays that result from combining parsers like `and` and `many`. This is particularly useful when you have deeply nested parser structures and need a flat array of results.
|
|
209
|
-
|
|
210
|
-
```typescript
|
|
211
|
-
import { unwrapResult } from '@mattwca/little-parser-lib';
|
|
212
|
-
|
|
213
|
-
// Parser results can be nested
|
|
214
|
-
const parser = and(
|
|
215
|
-
many(anyOf('letter')),
|
|
216
|
-
many(anyOf('digit'))
|
|
217
|
-
);
|
|
218
|
-
|
|
219
|
-
const result = runParser(parser, stream);
|
|
220
|
-
// result.result might be: [[token1, token2], [token3, token4]]
|
|
221
|
-
|
|
222
|
-
const flattened = unwrapResult(result.result);
|
|
223
|
-
// flattened is: [token1, token2, token3, token4]
|
|
224
|
-
```
|
|
225
|
-
|
|
226
|
-
**Parameters:**
|
|
227
|
-
- `items: (T | T[])[]` - An array that may contain nested arrays
|
|
228
|
-
|
|
229
|
-
**Returns:**
|
|
230
|
-
- `T[]` - A flattened array with all nested items extracted
|
|
231
|
-
|
|
232
|
-
**Example Use Cases:**
|
|
233
|
-
|
|
234
|
-
```typescript
|
|
235
|
-
// Use with map to process flattened results
|
|
236
|
-
const tokenParser = map(
|
|
237
|
-
and(many(anyOf('letter')), many(anyOf('digit'))),
|
|
238
|
-
(results) => unwrapResult(results).map(t => t.value).join('')
|
|
239
|
-
);
|
|
240
|
-
```
|
|
241
|
-
|
|
242
222
|
## Example: Simple Expression Parser
|
|
243
223
|
|
|
244
224
|
```typescript
|
|
245
225
|
import {
|
|
246
226
|
Tokenizer,
|
|
247
|
-
|
|
227
|
+
isSuccessfulResult,
|
|
248
228
|
anyOf,
|
|
249
229
|
and,
|
|
250
230
|
or,
|
|
251
231
|
many,
|
|
252
232
|
map,
|
|
233
|
+
optional,
|
|
253
234
|
runParserOnString
|
|
254
235
|
} from '@mattwca/little-parser-lib';
|
|
255
236
|
|
|
@@ -258,31 +239,43 @@ const tokenizer = new Tokenizer()
|
|
|
258
239
|
.withTokenType('digit', /[0-9]/)
|
|
259
240
|
.withTokenType('plus', '+')
|
|
260
241
|
.withTokenType('minus', '-')
|
|
242
|
+
.withTokenType('multiply', '*')
|
|
243
|
+
.withTokenType('divide', '/')
|
|
261
244
|
.withTokenType('whitespace', /\s/);
|
|
262
245
|
|
|
263
246
|
// Define parsers
|
|
264
247
|
const digit = anyOf('digit');
|
|
248
|
+
const ws = optional(anyOf('whitespace'));
|
|
249
|
+
|
|
250
|
+
// Parse a number (one or more digits)
|
|
265
251
|
const number = map(
|
|
266
252
|
many(digit),
|
|
267
253
|
(tokens) => parseInt(tokens.map(t => t.value).join(''))
|
|
268
254
|
);
|
|
269
255
|
|
|
256
|
+
// Parse an operator
|
|
270
257
|
const operator = or(
|
|
271
258
|
anyOf('plus'),
|
|
272
|
-
anyOf('minus')
|
|
259
|
+
anyOf('minus'),
|
|
260
|
+
anyOf('multiply'),
|
|
261
|
+
anyOf('divide')
|
|
273
262
|
);
|
|
274
263
|
|
|
264
|
+
// Parse a complete expression: number operator number
|
|
275
265
|
const expression = and(
|
|
276
266
|
number,
|
|
277
|
-
|
|
267
|
+
ws,
|
|
278
268
|
operator,
|
|
279
|
-
|
|
269
|
+
ws,
|
|
280
270
|
number
|
|
281
271
|
);
|
|
282
272
|
|
|
283
|
-
// Parse
|
|
284
|
-
const result = runParserOnString(expression, '
|
|
285
|
-
|
|
273
|
+
// Parse and extract values
|
|
274
|
+
const result = runParserOnString(expression, '42 + 8', tokenizer);
|
|
275
|
+
if (isSuccessfulResult(result)) {
|
|
276
|
+
const [leftNum, , op, , rightNum] = result.result;
|
|
277
|
+
console.log(`${leftNum} ${op.value} ${rightNum}`); // "42 + 8"
|
|
278
|
+
}
|
|
286
279
|
```
|
|
287
280
|
|
|
288
281
|
## Error Handling
|
|
@@ -293,31 +286,101 @@ The library provides detailed error messages with position information:
|
|
|
293
286
|
try {
|
|
294
287
|
const result = runParser(myParser, stream);
|
|
295
288
|
} catch (error) {
|
|
296
|
-
if (error instanceof
|
|
289
|
+
if (error instanceof ParserError) {
|
|
297
290
|
console.error(`
|
|
298
291
|
Error: ${error.message}
|
|
299
|
-
Line: ${error.
|
|
300
|
-
Column: ${error.
|
|
301
|
-
Position: ${error.
|
|
292
|
+
Line: ${error.location.line}
|
|
293
|
+
Column: ${error.location.column}
|
|
294
|
+
Position: ${error.location.position}
|
|
302
295
|
`);
|
|
303
296
|
}
|
|
304
297
|
}
|
|
305
298
|
```
|
|
306
299
|
|
|
300
|
+
## Self-referencing ("recursive") parsing
|
|
301
|
+
|
|
302
|
+
Parsers should be able to parse complex expressions, which can occasionally require a parser to be able to call itself, or call another parser which in turn calls it.
|
|
303
|
+
|
|
304
|
+
Taking the expression parser example above, we can modify it to support parsing of nested algebraic expressions:
|
|
305
|
+
|
|
306
|
+
```typescript
|
|
307
|
+
type Expression = {
|
|
308
|
+
left: Expression | number;
|
|
309
|
+
operator: string;
|
|
310
|
+
right: Expression | number;
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
type AlgebraicExpression = {
|
|
314
|
+
symbol: string;
|
|
315
|
+
expression: Expression;
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
const tokenizer = new Tokenizer()
|
|
319
|
+
...
|
|
320
|
+
.withTokenType('left_parenthesis', '(')
|
|
321
|
+
.withTokenType('right_parenthesis', ')')
|
|
322
|
+
.withTokenType('letter', /[a-zA-Z]/);
|
|
323
|
+
|
|
324
|
+
let expression: ParseFn<number | AlgebraicExpression | Expression> | null = null;
|
|
325
|
+
|
|
326
|
+
const letter = anyOf('letter');
|
|
327
|
+
const leftParen = anyOf('left_parenthesis');
|
|
328
|
+
const rightParen = anyOf('right_parenthesis');
|
|
329
|
+
|
|
330
|
+
const algebraicExpression: ParseFn<AlgebraicExpression> = (ts) => {
|
|
331
|
+
return map(
|
|
332
|
+
and(letter, leftParen, expression!, rightParen),
|
|
333
|
+
([{ value: symbol },, expression]) => {
|
|
334
|
+
return {
|
|
335
|
+
symbol,
|
|
336
|
+
expression
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
)(ts);
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
expression = map(
|
|
343
|
+
and(
|
|
344
|
+
or<number | AlgebraicExpression>(number, algebraicExpression!),
|
|
345
|
+
ws,
|
|
346
|
+
operator,
|
|
347
|
+
ws,
|
|
348
|
+
or<number | AlgebraicExpression>(number, algebraicExpression!),
|
|
349
|
+
),
|
|
350
|
+
([leftExpr,, { value: operator },, rightExpr]) => ({
|
|
351
|
+
left: leftExpr,
|
|
352
|
+
operator,
|
|
353
|
+
right: rightExpr,
|
|
354
|
+
})
|
|
355
|
+
);
|
|
356
|
+
|
|
357
|
+
const result = runParserOnString(expression, 'a(3 + b(7 + 8)) + c(1 + 2)', tokenizer);
|
|
358
|
+
if (isSuccessfulResult(result)) {
|
|
359
|
+
console.log(JSON.stringify(result.result, null, 2));
|
|
360
|
+
}
|
|
361
|
+
```
|
|
362
|
+
|
|
307
363
|
## API Reference
|
|
308
364
|
|
|
309
365
|
### Classes
|
|
310
366
|
|
|
311
367
|
- `Tokenizer`: Converts input strings into tokens
|
|
312
368
|
- `TokenStream`: Manages token consumption and backtracking
|
|
313
|
-
- `
|
|
369
|
+
- `peek()`: Look at the next token without consuming it
|
|
370
|
+
- `consume()`: Get and advance to the next token
|
|
371
|
+
- `consumeIf(...types)`: Conditionally consume token if it matches given types
|
|
372
|
+
- `peekRemainder()`: Get remaining unparsed tokens as a string
|
|
373
|
+
- `storePosition()`, `clearPosition()`, `restorePosition()`: Manual backtracking control
|
|
374
|
+
- `ParserError`: Error thrown when parsing fails
|
|
314
375
|
|
|
315
376
|
### Types
|
|
316
377
|
|
|
317
378
|
- `Token`: Represents a single token with type, value, and position
|
|
318
|
-
- `TokenType`: String identifier for token types
|
|
379
|
+
- `TokenType`: String identifier for token types (or 'end_of_input')
|
|
380
|
+
- `TokenPosition`: Position info with line and column numbers
|
|
319
381
|
- `ParseFn<T>`: Function that takes a TokenStream and returns ParserResult<T>
|
|
320
382
|
- `ParserResult<T>`: Union of SuccessfulParserResult<T> and FailedParserResult
|
|
383
|
+
- `ParserErrorPosition`: Extended position info including token stream position
|
|
321
384
|
|
|
322
385
|
### Combinators
|
|
323
386
|
|
|
@@ -341,7 +404,6 @@ try {
|
|
|
341
404
|
- `runParserOnString(parser, input, tokenizer)`: Execute parser on string
|
|
342
405
|
- `isSuccessfulResult(result)`: Type guard for successful results
|
|
343
406
|
- `isFailedResult(result)`: Type guard for failed results
|
|
344
|
-
- `unwrapResult(results)`: Unwrap nested parser results
|
|
345
407
|
|
|
346
408
|
## License
|
|
347
409
|
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The position of a parsing error, including position in the token stream,
|
|
3
|
+
* source line number and column number.
|
|
4
|
+
*/
|
|
5
|
+
export type ParserErrorPosition = {
|
|
6
|
+
line: number;
|
|
7
|
+
column: number;
|
|
8
|
+
position: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Represents a parsing error with a specific message.
|
|
13
|
+
*/
|
|
14
|
+
export class ParserError extends Error {
|
|
15
|
+
constructor(message: string, public location: ParserErrorPosition) {
|
|
16
|
+
super(`Parser Error [${location.line}:${location.column}]: ${message}`);
|
|
17
|
+
this.location = location;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import { TokenStream } from "../tokenizer";
|
|
2
|
+
import { FailedParserResult, isFailedResult, isSuccessfulResult, ParseFn, SuccessfulParserResult } from "./types";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* A combinator which runs a sequence of parsers, returning a tuple of their results. If one of the parsers fail, the
|
|
6
|
+
* entire sequence fails (in which case the first failure encountered is returned).
|
|
7
|
+
* @typeParam Parsers A tuple of the ParseFn types to be combined. Used to infer the result tuple type.
|
|
8
|
+
* @param parsers The parsers to run.
|
|
9
|
+
*/
|
|
10
|
+
export function and<Parsers extends ParseFn<any>[]>(
|
|
11
|
+
...parsers: Parsers
|
|
12
|
+
): ParseFn<{ [K in keyof Parsers]: Parsers[K] extends ParseFn<infer R> ? R : never }> {
|
|
13
|
+
return (tokenStream: TokenStream) => {
|
|
14
|
+
const results: any[] = [];
|
|
15
|
+
|
|
16
|
+
for (const parser of parsers) {
|
|
17
|
+
const parseResult = parser(tokenStream);
|
|
18
|
+
|
|
19
|
+
if (isFailedResult(parseResult)) {
|
|
20
|
+
return parseResult;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
results.push(parseResult.result);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return { result: results } as any;
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* A combinator which attempts to run a given parser, restoring the token position (backtracking) if it fails. The
|
|
32
|
+
* result of the parser (successful or failed) will be returned.
|
|
33
|
+
* @typeParam T The type of the parse result.
|
|
34
|
+
* @param parser The parser to attempt.
|
|
35
|
+
* @returns A new parser that attempts to run the given parser, backtracking on failure.
|
|
36
|
+
*/
|
|
37
|
+
export function attempt<T>(parser: ParseFn<T>): ParseFn<T> {
|
|
38
|
+
return (tokenStream: TokenStream) => {
|
|
39
|
+
tokenStream.storePosition();
|
|
40
|
+
|
|
41
|
+
const result = parser(tokenStream);
|
|
42
|
+
|
|
43
|
+
if (isSuccessfulResult(result)) {
|
|
44
|
+
tokenStream.clearPosition();
|
|
45
|
+
} else {
|
|
46
|
+
tokenStream.restorePosition();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return result;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* A combinator which makes a given parser optional. If the parser fails, it will still return a successful result,
|
|
55
|
+
* with a `null` value. By default, the parser will also backtrack on failure.
|
|
56
|
+
* @typeParam T The type of the parse result.
|
|
57
|
+
* @param parser The parser to make optional.
|
|
58
|
+
* @param shouldBacktrack Whether to backtrack the token stream position if the parser fails. Defaults to `true`.
|
|
59
|
+
* @returns A new parser that returns either the result of the given parser, or `null` if it fails.
|
|
60
|
+
*/
|
|
61
|
+
export function optional<T>(parser: ParseFn<T>, shouldBacktrack: boolean = true): ParseFn<T | null> {
|
|
62
|
+
return (tokenStream: TokenStream) => {
|
|
63
|
+
const parseFn = shouldBacktrack ? attempt(parser) : parser;
|
|
64
|
+
const result = parseFn(tokenStream);
|
|
65
|
+
|
|
66
|
+
if (isSuccessfulResult(result)) {
|
|
67
|
+
return result;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return { result: null };
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* A combinator which tries multiple parsers, returning the result of the first one that succeeds. If all parsers fail
|
|
76
|
+
* returns the error from the parser that got the furthest.
|
|
77
|
+
* @typeParam T The type of the parse result.
|
|
78
|
+
* @param parsers The parsers to try.
|
|
79
|
+
* @returns A new parser that tries each of the given parsers in sequence.
|
|
80
|
+
*/
|
|
81
|
+
export function or<T>(...parsers: ParseFn<T>[]): ParseFn<T> {
|
|
82
|
+
return (tokenStream: TokenStream) => {
|
|
83
|
+
let deepestError = null;
|
|
84
|
+
let deepestErrorPosition = -1;
|
|
85
|
+
|
|
86
|
+
for (const parser of parsers) {
|
|
87
|
+
const tryParse = attempt(parser);
|
|
88
|
+
const result = tryParse(tokenStream);
|
|
89
|
+
|
|
90
|
+
if (isSuccessfulResult(result)) {
|
|
91
|
+
return result as SuccessfulParserResult<T>;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (isFailedResult(result)) {
|
|
95
|
+
const { position } = result;
|
|
96
|
+
|
|
97
|
+
if (position.position > deepestErrorPosition) {
|
|
98
|
+
deepestError = result;
|
|
99
|
+
deepestErrorPosition = position.position;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return deepestError as FailedParserResult;
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* A combinator which applies a parser repeatedly until it fails or doesn't make progress (move the position), collecting
|
|
110
|
+
* all successful results into an array. If the parser fails on the first attempt, it returns a failure.
|
|
111
|
+
* @typeParam T The type of the parse result.
|
|
112
|
+
* @param parser The parser to apply repeatedly.
|
|
113
|
+
* @returns A new parser that applies the given parser repeatedly, collecting results into an array.
|
|
114
|
+
*/
|
|
115
|
+
export function many<T>(parser: ParseFn<T>): ParseFn<T[]> {
|
|
116
|
+
return (tokenStream: TokenStream) => {
|
|
117
|
+
const results: T[] = [];
|
|
118
|
+
|
|
119
|
+
let parseFailure = null;
|
|
120
|
+
|
|
121
|
+
while (true) {
|
|
122
|
+
const positionBefore = tokenStream.position;
|
|
123
|
+
tokenStream.storePosition();
|
|
124
|
+
|
|
125
|
+
const result = parser(tokenStream);
|
|
126
|
+
|
|
127
|
+
if (isSuccessfulResult(result)) {
|
|
128
|
+
const positionAfter = tokenStream.position;
|
|
129
|
+
|
|
130
|
+
// Check if the parser made any progress - if it didn't, we break out to avoid infinite loops.
|
|
131
|
+
if (positionAfter === positionBefore) {
|
|
132
|
+
tokenStream.restorePosition();
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
results.push(result.result);
|
|
137
|
+
tokenStream.clearPosition();
|
|
138
|
+
} else {
|
|
139
|
+
parseFailure = result;
|
|
140
|
+
tokenStream.restorePosition();
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (parseFailure && results.length === 0) {
|
|
146
|
+
return parseFailure;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return { result: results };
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* A combinator which maps the successful result of a given parser using a mapping function. Failures are passed through unchanged.
|
|
155
|
+
* @typeParam T The type of the original parser's result.
|
|
156
|
+
* @typeParam U The type of the mapped result.
|
|
157
|
+
* @param parser The parser whose result to be mapped.
|
|
158
|
+
* @param mapFn A function that takes the parser's result and returns a new value.
|
|
159
|
+
* @returns A new parser that applies the mapping function to the result of the original parser.
|
|
160
|
+
*/
|
|
161
|
+
export function map<T, U>(parser: ParseFn<T>, mapper: (value: T) => U): ParseFn<U> {
|
|
162
|
+
return (tokenStream: TokenStream) => {
|
|
163
|
+
const result = parser(tokenStream);
|
|
164
|
+
|
|
165
|
+
if (isSuccessfulResult(result)) {
|
|
166
|
+
return { result: mapper(result.result) };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return result;
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* A combinator which prepends a given label to any error message(s) produced by a given parser.
|
|
175
|
+
* @typeParam T The type of the parse result.
|
|
176
|
+
* @param label The label to prefix the error message with.
|
|
177
|
+
* @param parser The parser to label.
|
|
178
|
+
* @returns A new parser that adds the label to any error messages from the original parser.
|
|
179
|
+
*/
|
|
180
|
+
export function label<T>(label: string, parser: ParseFn<T>): ParseFn<T> {
|
|
181
|
+
return (tokenStream: TokenStream) => {
|
|
182
|
+
const result = parser(tokenStream);
|
|
183
|
+
|
|
184
|
+
if (isSuccessfulResult(result)) {
|
|
185
|
+
return result;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const errorMessage = `${label}: ${result.errorMessage}`;
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
errorMessage,
|
|
192
|
+
position: result.position,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
}
|
package/src/parsers/index.ts
CHANGED
package/src/parsers/parsers.ts
CHANGED
|
@@ -1,160 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { TokenStream } from "../tokenizer";
|
|
2
2
|
import { Token, TokenType } from "../tokenizer/types";
|
|
3
|
-
import {
|
|
4
|
-
import { FailedParserResult, isFailedResult, isSuccessfulResult, ParseFn, ParserResult, SuccessfulParserResult } from "./types";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Combines multiple parsers in sequence, returning an array of their results.
|
|
8
|
-
* If one of the parsers fails, the entire sequence fails.
|
|
9
|
-
*/
|
|
10
|
-
export function and(...parsers: ParseFn<any>[]): ParseFn<any[]> {
|
|
11
|
-
return (tokenStream: TokenStream) => {
|
|
12
|
-
const results: any[] = [];
|
|
13
|
-
|
|
14
|
-
for (const parser of parsers) {
|
|
15
|
-
const parseResult = parser(tokenStream);
|
|
16
|
-
|
|
17
|
-
if (isFailedResult(parseResult)) {
|
|
18
|
-
return parseResult;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
results.push(parseResult.result);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
return { result: results };
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Attempts to run a parser, restoring the token position (backtracking) if it fails.
|
|
30
|
-
*/
|
|
31
|
-
export function attempt<T>(parser: ParseFn<T>): ParseFn<T> {
|
|
32
|
-
return (tokenStream: TokenStream) => {
|
|
33
|
-
tokenStream.storePosition();
|
|
34
|
-
|
|
35
|
-
const result = parser(tokenStream);
|
|
36
|
-
|
|
37
|
-
if (isSuccessfulResult(result)) {
|
|
38
|
-
tokenStream.clearPosition();
|
|
39
|
-
} else {
|
|
40
|
-
tokenStream.restorePosition();
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return result;
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Makes a given parser optional, returns `null` if it fails.
|
|
49
|
-
*/
|
|
50
|
-
export function optional<T>(parser: ParseFn<T>, shouldBacktrack: boolean = true): ParseFn<T | null> {
|
|
51
|
-
return (tokenStream: TokenStream) => {
|
|
52
|
-
const parseFn = shouldBacktrack ? attempt(parser) : parser;
|
|
53
|
-
const result = parseFn(tokenStream);
|
|
54
|
-
|
|
55
|
-
if (isSuccessfulResult(result)) {
|
|
56
|
-
return result;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return { result: null };
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Tries multiple parsers in order, returning the result of the first successful parse.
|
|
65
|
-
* If all parsers fail, returns the error from the parser that got the furthest.
|
|
66
|
-
*/
|
|
67
|
-
export function or<T>(...parsers: ParseFn<T>[]): ParseFn<T> {
|
|
68
|
-
return (tokenStream: TokenStream) => {
|
|
69
|
-
let deepestError = null;
|
|
70
|
-
let deepestErrorPosition = -1;
|
|
71
|
-
|
|
72
|
-
for (const parser of parsers) {
|
|
73
|
-
const tryParse = attempt(parser);
|
|
74
|
-
const result = tryParse(tokenStream);
|
|
75
|
-
|
|
76
|
-
if (isSuccessfulResult(result)) {
|
|
77
|
-
return result as SuccessfulParserResult<T>;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
if (isFailedResult(result)) {
|
|
81
|
-
const { position } = result;
|
|
82
|
-
|
|
83
|
-
if (position.position > deepestErrorPosition) {
|
|
84
|
-
deepestError = result;
|
|
85
|
-
deepestErrorPosition = position.position;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
return deepestError as FailedParserResult;
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Applies a parser repeatedly until it fails or doesn't make progress (move the position), collecting
|
|
96
|
-
* all successful results into an array. If the parser fails on the first attempt, returns a failure.
|
|
97
|
-
*/
|
|
98
|
-
export function many(parser: ParseFn<any>): ParseFn<any> {
|
|
99
|
-
return (tokenStream: TokenStream) => {
|
|
100
|
-
const results: any[] = [];
|
|
101
|
-
|
|
102
|
-
let parseFailure = null;
|
|
103
|
-
|
|
104
|
-
while (true) {
|
|
105
|
-
const positionBefore = tokenStream.position;
|
|
106
|
-
tokenStream.storePosition();
|
|
107
|
-
|
|
108
|
-
const result = parser(tokenStream);
|
|
109
|
-
|
|
110
|
-
if (isSuccessfulResult(result)) {
|
|
111
|
-
const positionAfter = tokenStream.position;
|
|
112
|
-
|
|
113
|
-
// Check if the parser made any progress - if it didn't, we break out to avoid infinite loops.
|
|
114
|
-
if (positionAfter === positionBefore) {
|
|
115
|
-
tokenStream.restorePosition();
|
|
116
|
-
break;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
results.push(result.result);
|
|
120
|
-
tokenStream.clearPosition();
|
|
121
|
-
} else {
|
|
122
|
-
parseFailure = result;
|
|
123
|
-
tokenStream.restorePosition();
|
|
124
|
-
break;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
if (parseFailure && results.length === 0) {
|
|
129
|
-
return parseFailure;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
return { result: results };
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Labels a parser with a custom error message for better context if it fails.
|
|
138
|
-
*/
|
|
139
|
-
export function label<T>(label: string, parser: ParseFn<T>): ParseFn<T> {
|
|
140
|
-
return (tokenStream: TokenStream) => {
|
|
141
|
-
const result = parser(tokenStream);
|
|
142
|
-
|
|
143
|
-
if (isSuccessfulResult(result)) {
|
|
144
|
-
return result;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
const errorMessage = `${label}: ${result.errorMessage}`;
|
|
148
|
-
|
|
149
|
-
return {
|
|
150
|
-
errorMessage,
|
|
151
|
-
position: result.position,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
}
|
|
3
|
+
import { ParseFn } from "./types";
|
|
155
4
|
|
|
156
5
|
/**
|
|
157
6
|
* In-built utility parser that parses any token except those of the specified type(s).
|
|
7
|
+
* @param types The token types to exclude.
|
|
158
8
|
*/
|
|
159
9
|
export function anyExcept(...types: TokenType[]): ParseFn<Token> {
|
|
160
10
|
return (tokenStream: TokenStream) => {
|
|
@@ -173,6 +23,7 @@ export function anyExcept(...types: TokenType[]): ParseFn<Token> {
|
|
|
173
23
|
|
|
174
24
|
/**
|
|
175
25
|
* In-built utility parser that parses any token of the specified type(s).
|
|
26
|
+
* @param types The token types to match.
|
|
176
27
|
*/
|
|
177
28
|
export function anyOf(...types: TokenType[]): ParseFn<Token> {
|
|
178
29
|
return (tokenStream: TokenStream) => {
|
|
@@ -191,6 +42,7 @@ export function anyOf(...types: TokenType[]): ParseFn<Token> {
|
|
|
191
42
|
|
|
192
43
|
/**
|
|
193
44
|
* In-built utility parser that ensures the end of input has been reached.
|
|
45
|
+
* Returns a failed result if there are remaining tokens.
|
|
194
46
|
*/
|
|
195
47
|
export function endOfInput(): ParseFn {
|
|
196
48
|
return (tokenStream: TokenStream) => {
|
|
@@ -205,44 +57,4 @@ export function endOfInput(): ParseFn {
|
|
|
205
57
|
|
|
206
58
|
return { result: null };
|
|
207
59
|
};
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Transforms the result of a parser using a given mapping function.
|
|
212
|
-
* @param parser The parser whose result is to be transformed.
|
|
213
|
-
* @param mapFn A function that takes the parser's result and returns a new value.
|
|
214
|
-
* @returns A new parser that applies the mapping function to the result of the original parser.
|
|
215
|
-
*/
|
|
216
|
-
export function map<T, U>(parser: ParseFn<T>, mapFn: (value: T) => U): ParseFn<U> {
|
|
217
|
-
return (tokenStream: TokenStream) => {
|
|
218
|
-
const result = parser(tokenStream);
|
|
219
|
-
|
|
220
|
-
if (isSuccessfulResult(result)) {
|
|
221
|
-
return { result: mapFn(result.result) };
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
return result;
|
|
225
|
-
};
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
/**
|
|
229
|
-
* Runs a parser on a given TokenStream, throwing an error if parsing fails.
|
|
230
|
-
*/
|
|
231
|
-
export function runParser<T>(parser: ParseFn<T>, tokenStream: TokenStream): ParserResult<T> {
|
|
232
|
-
const test = parser(tokenStream);
|
|
233
|
-
if (isFailedResult(test)) {
|
|
234
|
-
throw new ParsingError(test.errorMessage, test.position);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
return test;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
/**
|
|
241
|
-
* Runs a parser on a given input string, using the provided tokenizer to generate tokens.
|
|
242
|
-
*/
|
|
243
|
-
export function runParserOnString<T>(parser: ParseFn<T>, input: string, tokenizer: Tokenizer): ParserResult<T> {
|
|
244
|
-
const tokens = tokenizer.tokenize(input);
|
|
245
|
-
const stream = new TokenStream(tokens);
|
|
246
|
-
|
|
247
|
-
return runParser<T>(parser, stream);
|
|
248
60
|
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { Tokenizer, TokenStream } from "../tokenizer";
|
|
2
|
+
import { ParserError } from "./ParserError";
|
|
3
|
+
import { isFailedResult, ParseFn, ParserResult } from "./types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Runs a parser on a given TokenStream, throwing an error if parsing fails.
|
|
7
|
+
* @param parser The parse function to be run.
|
|
8
|
+
* @param tokenStream The TokenStream to parse.
|
|
9
|
+
* @returns The successful ParserResult.
|
|
10
|
+
* @throws {ParserError} If the parser fails. Includes error and position information.
|
|
11
|
+
*/
|
|
12
|
+
export function runParser<T>(parser: ParseFn<T>, tokenStream: TokenStream): ParserResult<T> {
|
|
13
|
+
const test = parser(tokenStream);
|
|
14
|
+
if (isFailedResult(test)) {
|
|
15
|
+
throw new ParserError(test.errorMessage, test.position);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return test;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Runs a parser on a given input string, using the provided tokenizer to generate tokens.
|
|
23
|
+
* @param parser The parse function to be run.
|
|
24
|
+
* @param input The input string to parse.
|
|
25
|
+
* @param tokenizer The tokenizer to use for tokenizing the input string.
|
|
26
|
+
* @returns The ParserResult of the parsing operation.
|
|
27
|
+
* @throws {ParserError} If the parser fails. Includes error and position information.
|
|
28
|
+
*/
|
|
29
|
+
export function runParserOnString<T>(parser: ParseFn<T>, input: string, tokenizer: Tokenizer): ParserResult<T> {
|
|
30
|
+
const tokens = tokenizer.tokenize(input);
|
|
31
|
+
const stream = new TokenStream(tokens);
|
|
32
|
+
|
|
33
|
+
return runParser<T>(parser, stream);
|
|
34
|
+
}
|
package/src/parsers/types.ts
CHANGED
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import { TokenStream } from "../tokenizer";
|
|
2
|
-
|
|
3
|
-
export type ParsingErrorPosition = {
|
|
4
|
-
line: number;
|
|
5
|
-
column: number;
|
|
6
|
-
position: number;
|
|
7
|
-
}
|
|
2
|
+
import { ParserErrorPosition } from "./ParserError";
|
|
8
3
|
|
|
9
4
|
export type SuccessfulParserResult<T> = {
|
|
10
5
|
result: T;
|
|
@@ -12,7 +7,7 @@ export type SuccessfulParserResult<T> = {
|
|
|
12
7
|
|
|
13
8
|
export type FailedParserResult = {
|
|
14
9
|
errorMessage: string;
|
|
15
|
-
position:
|
|
10
|
+
position: ParserErrorPosition;
|
|
16
11
|
}
|
|
17
12
|
|
|
18
13
|
export type ParserResult<T> = SuccessfulParserResult<T> | FailedParserResult;
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import { ParsingErrorPosition } from "./types";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Represents a parsing error with a specific message.
|
|
5
|
-
*/
|
|
6
|
-
export class ParsingError extends Error {
|
|
7
|
-
public location: ParsingErrorPosition;
|
|
8
|
-
|
|
9
|
-
constructor(message: string, location: ParsingErrorPosition) {
|
|
10
|
-
super(`Parsing Error [${location.line}:${location.column}]: ${message}`);
|
|
11
|
-
|
|
12
|
-
this.location = location;
|
|
13
|
-
}
|
|
14
|
-
}
|
package/src/utils.ts
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* A method which unwraps a given generic array, taking each element and accumulating them, if the given item is a nested array, it is flattened into the result.
|
|
3
|
-
* Useful for flattening parser results built with the `and` and `many` combinators, which can produce nested arrays.
|
|
4
|
-
* @param items - An array of items or nested arrays of items to be unwrapped.
|
|
5
|
-
* @returns A flattened array containing all individual items.
|
|
6
|
-
*/
|
|
7
|
-
export const unwrapResult = <T>(items: (T | T[])[]): T[] => {
|
|
8
|
-
const result: T[] = [];
|
|
9
|
-
for (const item of items) {
|
|
10
|
-
if (Array.isArray(item)) {
|
|
11
|
-
result.push(...unwrapResult(item));
|
|
12
|
-
} else {
|
|
13
|
-
result.push(item);
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
return result;
|
|
17
|
-
};
|