@mattwca/little-parser-lib 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,311 @@
1
+ # @mattwca/little-parser-lib
2
+
3
+ A lightweight, flexible TypeScript library for building parsers using parser combinators. Create powerful parsers by combining simple, reusable parsing functions.
4
+
5
+ ## Features
6
+
7
+ - 🚀 **Parser Combinators**: Build complex parsers from simple building blocks
8
+ - 🔍 **Built-in Tokenizer**: Flexible tokenization with regex and string matching
9
+ - 📝 **TypeScript First**: Full type safety and IntelliSense support
10
+ - 🎯 **Backtracking Support**: Automatic position restoration on parse failures
11
+ - 📦 **Zero Dependencies**: Lightweight with no external runtime dependencies
12
+ - ✨ Packaged with [tsdown](https://tsdown.dev)
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install @mattwca/little-parser-lib
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ```typescript
23
+ import { Tokenizer, TokenStream, anyOf, and, many, runParser } from '@mattwca/little-parser-lib';
24
+
25
+ // 1. Define your tokenizer
26
+ const tokenizer = new Tokenizer()
27
+ .withTokenType('letter', /[a-zA-Z]/)
28
+ .withTokenType('digit', /[0-9]/)
29
+ .withTokenType('whitespace', /\s/);
30
+
31
+ // 2. Tokenize your input
32
+ const tokens = tokenizer.tokenize('hello123');
33
+ const stream = new TokenStream(tokens);
34
+
35
+ // 3. Create a parser using combinators
36
+ const parser = and(
37
+ many(anyOf('letter')),
38
+ many(anyOf('digit'))
39
+ );
40
+
41
+ // 4. Run the parser
42
+ const result = runParser(parser, stream);
43
+ console.log(result); // { result: [[...letters], [...digits]] }
44
+ ```
45
+
46
+ ## Core Concepts
47
+
48
+ ### Tokenizer
49
+
50
+ The `Tokenizer` class converts raw input strings into tokens. Each token has a type, value, and position.
51
+
52
+ ```typescript
53
+ const tokenizer = new Tokenizer()
54
+ .withTokenType('number', /[0-9]/)
55
+ .withTokenType('operator', /[+\-*/]/)
56
+ .withTokenType('whitespace', /\s/);
57
+
58
+ const tokens = tokenizer.tokenize('1 + 2');
59
+ // [
60
+ // { type: 'number', value: '1', position: { line: 1, column: 1 } },
61
+ // { type: 'whitespace', value: ' ', position: { line: 1, column: 2 } },
62
+ // { type: 'operator', value: '+', position: { line: 1, column: 3 } },
63
+ // ...
64
+ // ]
65
+ ```
66
+
67
+ ### Parser Functions
68
+
69
+ A parser function (`ParseFn<T>`) takes a `TokenStream` and returns a `ParserResult<T>`, which can be either:
70
+ - `SuccessfulParserResult<T>`: Contains the parsed result
71
+ - `FailedParserResult`: Contains error message and position
72
+
73
+ ## Parser Combinators
74
+
75
+ ### `and(...parsers)`
76
+
77
+ Combines multiple parsers in sequence. All parsers must succeed.
78
+
79
+ ```typescript
80
+ const parser = and(
81
+ anyOf('keyword'),
82
+ anyOf('identifier'),
83
+ anyOf('semicolon')
84
+ );
85
+ ```
86
+
87
+ ### `or(...parsers)`
88
+
89
+ Tries parsers in order, returns the first successful result. If all fail, returns the deepest error.
90
+
91
+ ```typescript
92
+ const parser = or(
93
+ anyOf('keyword'),
94
+ anyOf('identifier'),
95
+ anyOf('operator')
96
+ );
97
+ ```
98
+
99
+ ### `many(parser)`
100
+
101
+ Applies a parser repeatedly until it fails (requires at least one success).
102
+
103
+ ```typescript
104
+ const parser = many(anyOf('digit')); // Parses one or more digits
105
+ ```
106
+
107
+ ### `optional(parser, shouldBacktrack?)`
108
+
109
+ Makes a parser optional. Returns `null` if it fails.
110
+
111
+ ```typescript
112
+ const parser = optional(anyOf('sign')); // Sign is optional
113
+ ```
114
+
115
+ ### `attempt(parser)`
116
+
117
+ Wraps a parser with automatic backtracking on failure.
118
+
119
+ ```typescript
120
+ const parser = attempt(
121
+ and(anyOf('keyword'), anyOf('identifier'))
122
+ );
123
+ ```
124
+
125
+ ### `map(parser, mapFn)`
126
+
127
+ Transforms the result of a parser using a mapping function.
128
+
129
+ ```typescript
130
+ const digitParser = anyOf('digit');
131
+ const numberParser = map(
132
+ many(digitParser),
133
+ (tokens) => parseInt(tokens.map(t => t.value).join(''))
134
+ );
135
+ ```
136
+
137
+ ### `label(label, parser)`
138
+
139
+ Adds a custom label to parser errors for better debugging.
140
+
141
+ ```typescript
142
+ const parser = label(
143
+ 'function declaration',
144
+ and(anyOf('function'), anyOf('identifier'))
145
+ );
146
+ ```
147
+
148
+ ## Built-in Parsers
149
+
150
+ ### `anyOf(...types)`
151
+
152
+ Parses any token matching the specified type(s).
153
+
154
+ ```typescript
155
+ const parser = anyOf('letter', 'digit', 'underscore');
156
+ ```
157
+
158
+ ### `anyExcept(...types)`
159
+
160
+ Parses any token NOT matching the specified type(s).
161
+
162
+ ```typescript
163
+ const parser = anyExcept('whitespace', 'newline');
164
+ ```
165
+
166
+ ### `endOfInput()`
167
+
168
+ Ensures the end of input has been reached.
169
+
170
+ ```typescript
171
+ const parser = and(
172
+ myMainParser,
173
+ endOfInput() // Ensure nothing left to parse
174
+ );
175
+ ```
176
+
177
+ ## Running Parsers
178
+
179
+ ### `runParser(parser, tokenStream)`
180
+
181
+ Runs a parser on a token stream. Throws `ParsingError` on failure.
182
+
183
+ ```typescript
184
+ try {
185
+ const result = runParser(myParser, tokenStream);
186
+ console.log(result.result);
187
+ } catch (error) {
188
+ if (error instanceof ParsingError) {
189
+ console.error(`Parse error at ${error.position.line}:${error.position.column}`);
190
+ }
191
+ }
192
+ ```
193
+
194
+ ### `runParserOnString(parser, input, tokenizer)`
195
+
196
+ Convenience method to tokenize and parse in one step.
197
+
198
+ ```typescript
199
+ const result = runParserOnString(myParser, 'input string', tokenizer);
200
+ ```
201
+
202
+ ## Example: Simple Expression Parser
203
+
204
+ ```typescript
205
+ import {
206
+ Tokenizer,
207
+ TokenStream,
208
+ anyOf,
209
+ and,
210
+ or,
211
+ many,
212
+ map,
213
+ runParserOnString
214
+ } from '@mattwca/little-parser-lib';
215
+
216
+ // Define tokenizer
217
+ const tokenizer = new Tokenizer()
218
+ .withTokenType('digit', /[0-9]/)
219
+ .withTokenType('plus', '+')
220
+ .withTokenType('minus', '-')
221
+ .withTokenType('whitespace', /\s/);
222
+
223
+ // Define parsers
224
+ const digit = anyOf('digit');
225
+ const number = map(
226
+ many(digit),
227
+ (tokens) => parseInt(tokens.map(t => t.value).join(''))
228
+ );
229
+
230
+ const operator = or(
231
+ anyOf('plus'),
232
+ anyOf('minus')
233
+ );
234
+
235
+ const expression = and(
236
+ number,
237
+ optional(anyOf('whitespace')),
238
+ operator,
239
+ optional(anyOf('whitespace')),
240
+ number
241
+ );
242
+
243
+ // Parse
244
+ const result = runParserOnString(expression, '10 + 5', tokenizer);
245
+ console.log(result.result); // [10, null, {...}, null, 5]
246
+ ```
247
+
248
+ ## Error Handling
249
+
250
+ The library provides detailed error messages with position information:
251
+
252
+ ```typescript
253
+ try {
254
+ const result = runParser(myParser, stream);
255
+ } catch (error) {
256
+ if (error instanceof ParsingError) {
257
+ console.error(`
258
+ Error: ${error.message}
259
+ Line: ${error.position.line}
260
+ Column: ${error.position.column}
261
+ Position: ${error.position.position}
262
+ `);
263
+ }
264
+ }
265
+ ```
266
+
267
+ ## API Reference
268
+
269
+ ### Classes
270
+
271
+ - `Tokenizer`: Converts input strings into tokens
272
+ - `TokenStream`: Manages token consumption and backtracking
273
+ - `ParsingError`: Error thrown when parsing fails
274
+
275
+ ### Types
276
+
277
+ - `Token`: Represents a single token with type, value, and position
278
+ - `TokenType`: String identifier for token types
279
+ - `ParseFn<T>`: Function that takes a TokenStream and returns ParserResult<T>
280
+ - `ParserResult<T>`: Union of SuccessfulParserResult<T> and FailedParserResult
281
+
282
+ ### Combinators
283
+
284
+ - `and(...parsers)`: Sequential combination
285
+ - `or(...parsers)`: Alternative combination
286
+ - `many(parser)`: One or more repetitions
287
+ - `optional(parser)`: Optional parser
288
+ - `attempt(parser)`: Parser with backtracking
289
+ - `map(parser, fn)`: Transform parser result
290
+ - `label(label, parser)`: Add error label
291
+
292
+ ### Parsers
293
+
294
+ - `anyOf(...types)`: Match any of specified token types
295
+ - `anyExcept(...types)`: Match any token except specified types
296
+ - `endOfInput()`: Match end of input
297
+
298
+ ### Utilities
299
+
300
+ - `runParser(parser, stream)`: Execute parser on token stream
301
+ - `runParserOnString(parser, input, tokenizer)`: Execute parser on string
302
+ - `isSuccessfulResult(result)`: Type guard for successful results
303
+ - `isFailedResult(result)`: Type guard for failed results
304
+
305
+ ## License
306
+
307
+ MIT
308
+
309
+ ## Author
310
+
311
+ @mattwca
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@mattwca/little-parser-lib",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "./dist/index.cjs",
6
+ "module": "./dist/index.mjs",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.mjs",
12
+ "require": "./dist/index.cjs"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist",
17
+ "src",
18
+ "README.md"
19
+ ],
20
+ "scripts": {
21
+ "clean": "rm -rf dist",
22
+ "build": "npm run clean && tsdown",
23
+ "build:dev": "npm run clean && tsdown --dev"
24
+ },
25
+ "keywords": [
26
+ "parser",
27
+ "combinators",
28
+ "parser-combinators",
29
+ "parsing"
30
+ ],
31
+ "author": "@mattwca",
32
+ "license": "MIT",
33
+ "devDependencies": {
34
+ "@types/jest": "^30.0.0",
35
+ "@types/node": "^25.0.3",
36
+ "jest": "^30.2.0",
37
+ "ts-jest": "^29.4.6",
38
+ "tsdown": "^0.18.3",
39
+ "typescript": "^5.9.3"
40
+ },
41
+ "engines": {
42
+ "node": ">=14"
43
+ },
44
+ "repository": {
45
+ "url": "https://github.com/mattwca/little-parser-lib"
46
+ }
47
+ }
package/src/index.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from './tokenizer';
2
+ export * from './parser';
@@ -0,0 +1,14 @@
1
+ import { ParsingErrorPosition } from "./types";
2
+
3
+ /**
4
+ * Represents a parsing error with a specific message.
5
+ */
6
+ export class ParsingError extends Error {
7
+ public location: ParsingErrorPosition;
8
+
9
+ constructor(message: string, location: ParsingErrorPosition) {
10
+ super(`Parsing Error [${location.line}:${location.column}]: ${message}`);
11
+
12
+ this.location = location;
13
+ }
14
+ }
@@ -0,0 +1,3 @@
1
+ export * from './parser';
2
+ export * from './ParsingError';
3
+ export * from './types';
@@ -0,0 +1,255 @@
1
+ import { Tokenizer, TokenStream } from "../tokenizer";
2
+ import { Token, TokenType } from "../tokenizer/types";
3
+ import { ParsingError } from "./ParsingError";
4
+ import { FailedParserResult, isFailedResult, isSuccessfulResult, ParseFn, ParserResult, SuccessfulParserResult } from "./types";
5
+
6
+ /**
7
+ * Combines multiple parsers in sequence, returning an array of their results.
8
+ * If one of the parsers fails, the entire sequence fails.
9
+ */
10
+ export function and(...parsers: ParseFn<any>[]): ParseFn<any[]> {
11
+ return (tokenStream: TokenStream) => {
12
+ const results: any[] = [];
13
+
14
+ for (const parser of parsers) {
15
+ const parseResult = parser(tokenStream);
16
+
17
+ if (isFailedResult(parseResult)) {
18
+ return parseResult;
19
+ }
20
+
21
+ results.push(parseResult.result);
22
+ }
23
+
24
+ return { result: results };
25
+ };
26
+ }
27
+
28
+ /**
29
+ * Attempts to run a parser, restoring the token position (backtracking) if it fails.
30
+ */
31
+ export function attempt<T>(parser: ParseFn<T>): ParseFn<T> {
32
+ return (tokenStream: TokenStream) => {
33
+ tokenStream.storePosition();
34
+
35
+ const result = parser(tokenStream);
36
+
37
+ if (isSuccessfulResult(result)) {
38
+ tokenStream.clearPosition();
39
+ } else {
40
+ tokenStream.restorePosition();
41
+ }
42
+
43
+ return result;
44
+ };
45
+ }
46
+
47
+ /**
48
+ * Makes a given parser optional, returns `null` if it fails.
49
+ */
50
+ export function optional<T>(parser: ParseFn<T>, shouldBacktrack: boolean = true): ParseFn<T | null> {
51
+ return (tokenStream: TokenStream) => {
52
+ const parseFn = shouldBacktrack ? attempt(parser) : parser;
53
+ const result = parseFn(tokenStream);
54
+
55
+ if (isSuccessfulResult(result)) {
56
+ return result;
57
+ }
58
+
59
+ return { result: null };
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Tries multiple parsers in order, returning the result of the first successful parse.
65
+ * If all parsers fail, returns the error from the parser that got the furthest.
66
+ */
67
+ export function or<T>(...parsers: ParseFn<T>[]): ParseFn<T> {
68
+ return (tokenStream: TokenStream) => {
69
+ let deepestError = null;
70
+ let deepestErrorPosition = -1;
71
+
72
+ for (const parser of parsers) {
73
+ const tryParse = attempt(parser);
74
+ const result = tryParse(tokenStream);
75
+
76
+ if (isSuccessfulResult(result)) {
77
+ return result as SuccessfulParserResult<T>;
78
+ }
79
+
80
+ if (isFailedResult(result)) {
81
+ const { position } = result;
82
+
83
+ if (position.position > deepestErrorPosition) {
84
+ deepestError = result;
85
+ deepestErrorPosition = position.position;
86
+ }
87
+ }
88
+ }
89
+
90
+ return deepestError as FailedParserResult;
91
+ };
92
+ }
93
+
94
+ /**
95
+ * Applies a parser repeatedly until it fails, collecting all successful results into an array.
96
+ * If the parser fails on the first attempt, returns a failure.
97
+ */
98
+ export function many(parser: ParseFn<any>): ParseFn<any> {
99
+ return (tokenStream: TokenStream) => {
100
+ const results: any[] = [];
101
+
102
+ let parseFailure = null;
103
+
104
+ while (true) {
105
+ tokenStream.storePosition();
106
+
107
+ const result = parser(tokenStream);
108
+ if (isSuccessfulResult(result)) {
109
+ results.push(result.result);
110
+ tokenStream.clearPosition();
111
+ } else {
112
+ parseFailure = result;
113
+ tokenStream.restorePosition();
114
+ break;
115
+ }
116
+ }
117
+
118
+ if (parseFailure && results.length === 0) {
119
+ return parseFailure;
120
+ }
121
+
122
+ return { result: results };
123
+ };
124
+ }
125
+
126
+ /**
127
+ * Labels a parser with a custom error message for better context if it fails.
128
+ */
129
+ export function label<T>(label: string, parser: ParseFn<T>): ParseFn<T> {
130
+ return (tokenStream: TokenStream) => {
131
+ const result = parser(tokenStream);
132
+
133
+ console.log('label parse result:', result);
134
+
135
+ if (isSuccessfulResult(result)) {
136
+ return result;
137
+ }
138
+
139
+ const errorMessage = `${label}: ${result.errorMessage}`;
140
+
141
+ console.log('labeled error message:', errorMessage);
142
+
143
+ return {
144
+ errorMessage,
145
+ position: result.position,
146
+ };
147
+ }
148
+ }
149
+
150
+ /**
151
+ * In-built utility parser that parses any token except those of the specified type(s).
152
+ */
153
+ export function anyExcept(...types: TokenType[]): ParseFn<Token> {
154
+ return (tokenStream: TokenStream) => {
155
+ const token = tokenStream.consume();
156
+
157
+ if (!token || types.includes(token.type)) {
158
+ return {
159
+ errorMessage: `Expected any token not of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`,
160
+ position: tokenStream.getPositionForError(),
161
+ };
162
+ }
163
+
164
+ return { result: token };
165
+ };
166
+ }
167
+
168
+ /**
169
+ * In-built utility parser that parses any token of the specified type(s).
170
+ */
171
+ export function anyOf(...types: TokenType[]): ParseFn<Token> {
172
+ return (tokenStream: TokenStream) => {
173
+ const token = tokenStream.consume();
174
+
175
+ if (!token || !types.includes(token.type)) {
176
+ return {
177
+ errorMessage: `Expected token of type ${types.join(', ')}, but got ${token?.type || 'end of input'}`,
178
+ position: tokenStream.getPositionForError(),
179
+ };
180
+ }
181
+
182
+ return { result: token }
183
+ }
184
+ }
185
+
186
+ /**
187
+ * In-built utility parser that ensures the end of input has been reached.
188
+ */
189
+ export function endOfInput(): ParseFn {
190
+ return (tokenStream: TokenStream) => {
191
+ const token = tokenStream.consume();
192
+
193
+ if (token && token.type !== 'end_of_input') {
194
+ return {
195
+ errorMessage: `Expected end of input, but got token of type ${token.type}`,
196
+ position: tokenStream.getPositionForError(),
197
+ };
198
+ }
199
+
200
+ return { result: null };
201
+ };
202
+ }
203
+
204
+ export function parseName(): ParseFn<any> {
205
+ return (tokenStream: TokenStream) => {
206
+ const validTokenTypes: TokenType[] = ['letter', 'digit', 'minus', 'underscore'];
207
+ const parseValidToken = anyOf(...validTokenTypes);
208
+
209
+ const parser = and(anyOf('letter'), many(parseValidToken));
210
+ const result = parser(tokenStream);
211
+
212
+ if (isSuccessfulResult(result)) {
213
+ console.log(result);
214
+ }
215
+
216
+ return result;
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Transforms the result of a parser using a given mapping function.
222
+ */
223
+ export function map<T, U>(parser: ParseFn<T>, mapFn: (value: T) => U): ParseFn<U> {
224
+ return (tokenStream: TokenStream) => {
225
+ const result = parser(tokenStream);
226
+
227
+ if (isSuccessfulResult(result)) {
228
+ return { result: mapFn(result.result) };
229
+ }
230
+
231
+ return result;
232
+ };
233
+ }
234
+
235
+ /**
236
+ * Runs a parser on a given TokenStream, throwing an error if parsing fails.
237
+ */
238
+ export function runParser<T>(parser: ParseFn<T>, tokenStream: TokenStream): ParserResult<T> {
239
+ const test = parser(tokenStream);
240
+ if (isFailedResult(test)) {
241
+ throw new ParsingError(test.errorMessage, test.position);
242
+ }
243
+
244
+ return test;
245
+ }
246
+
247
+ /**
248
+ * Runs a parser on a given input string, using the provided tokenizer to generate tokens.
249
+ */
250
+ export function runParserOnString<T>(parser: ParseFn<T>, input: string, tokenizer: Tokenizer): ParserResult<T> {
251
+ const tokens = tokenizer.tokenize(input);
252
+ const stream = new TokenStream(tokens);
253
+
254
+ return runParser<T>(parser, stream);
255
+ }
@@ -0,0 +1,28 @@
1
+ import { TokenStream } from "../tokenizer";
2
+
3
+ export type ParsingErrorPosition = {
4
+ line: number;
5
+ column: number;
6
+ position: number;
7
+ }
8
+
9
+ export type SuccessfulParserResult<T> = {
10
+ result: T;
11
+ }
12
+
13
+ export type FailedParserResult = {
14
+ errorMessage: string;
15
+ position: ParsingErrorPosition;
16
+ }
17
+
18
+ export type ParserResult<T> = SuccessfulParserResult<T> | FailedParserResult;
19
+
20
+ export type ParseFn<T = null> = (tokenStream: TokenStream) => ParserResult<T>;
21
+
22
+ export const isSuccessfulResult = <T>(result: ParserResult<T>): result is SuccessfulParserResult<T> => {
23
+ return (result as SuccessfulParserResult<T>).result !== undefined;
24
+ }
25
+
26
+ export const isFailedResult = <T>(result: ParserResult<T>): result is FailedParserResult => {
27
+ return (result as FailedParserResult).errorMessage !== undefined;
28
+ }
@@ -0,0 +1,80 @@
1
+ import { Token, TokenType } from "./types";
2
+
3
+ /**
4
+ * Represents a stream of tokens for parsing, including methods to consume and peek tokens,
5
+ * as well as state and method for managing the parsing position.
6
+ */
7
+ export class TokenStream {
8
+ public position: number;
9
+ public positionStack: number[];
10
+
11
+ constructor (private tokens: Token[]) {
12
+ this.position = 0;
13
+ this.positionStack = [];
14
+ }
15
+
16
+ /**
17
+ * Peeks at the next token in the stream, without consuming it.
18
+ * @returns The next token, or null if we're at the end of the stream.
19
+ */
20
+ public peek(): Token | null {
21
+ return this.tokens[this.position] || null;
22
+ }
23
+
24
+ /**
25
+ * Consumes and returns the next token in the stream.
26
+ * @returns The consumed token, or null if we're at the end of the stream.
27
+ */
28
+ public consume(): Token | null {
29
+ return this.tokens[this.position++] || null;
30
+ }
31
+
32
+ /**
33
+ * Consumes a token if it matches the expected type.
34
+ * @returns The consumed token, or null if the next token does not match the expected type.
35
+ */
36
+ public consumeIf(...types: TokenType[]): Token | null {
37
+ const token = this.peek();
38
+ if (token && types.includes(token.type)) {
39
+ return this.consume();
40
+ }
41
+ return null;
42
+ }
43
+
44
+ /**
45
+ * Stores the current position in the position stack.
46
+ */
47
+ public storePosition() {
48
+ this.positionStack.push(this.position);
49
+ }
50
+
51
+ /**
52
+ * Clears the last stored position without restoring it.
53
+ */
54
+ public clearPosition() {
55
+ this.positionStack.pop();
56
+ }
57
+
58
+ /**
59
+ * Restores the last stored position from the position stack.
60
+ */
61
+ public restorePosition() {
62
+ const pos = this.positionStack.pop();
63
+ if (pos !== undefined) {
64
+ this.position = pos;
65
+ }
66
+ }
67
+
68
+ public peekRemainder(): string {
69
+ return this.tokens.slice(this.position).map(t => t.value).join('');
70
+ }
71
+
72
+ public getPositionForError(): { line: number; column: number, position: number } {
73
+ const tokenToUse = this.peek() || this.tokens[this.tokens.length - 1];
74
+
75
+ return {
76
+ position: this.position,
77
+ ...tokenToUse.position,
78
+ };
79
+ }
80
+ }
@@ -0,0 +1,54 @@
1
+ import { Token, TokenType } from "./types";
2
+
3
+ export const LetterRegex = /[a-zA-Z]/;
4
+ export const DigitRegex = /[0-9]/;
5
+ export const WhitespaceRegex = /\s+/;
6
+ export const NewLineRegex = /\n/;
7
+
8
+ export class Tokenizer {
9
+ constructor(
10
+ private tokenTypeMatchers: { matcher: string | RegExp, type: TokenType }[] = [],
11
+ private newLineTokenType: TokenType | null = null
12
+ ) {}
13
+
14
+ public withTokenType(type: TokenType, matcher: string | RegExp): Tokenizer {
15
+ this.tokenTypeMatchers.push({ matcher, type });
16
+ return this;
17
+ }
18
+
19
+ public tokenize(input: string): Token[] {
20
+ const position = {
21
+ line: 1,
22
+ column: 0,
23
+ }
24
+
25
+ return input.split('').reduce<Token[]>((tokens, char) => {
26
+ position.column += 1;
27
+
28
+ let matched = false;
29
+ for (const { matcher, type } of this.tokenTypeMatchers.values()) {
30
+ if (typeof matcher === 'string' && char === matcher) {
31
+ tokens.push({ type, value: char, position: { ...position }});
32
+ matched = true;
33
+ break;
34
+ }
35
+
36
+ if (matcher instanceof RegExp && matcher.test(char)) {
37
+ tokens.push({ type, value: char, position: { ...position }});
38
+ matched = true;
39
+ break;
40
+ }
41
+
42
+ if (matched && type === this.newLineTokenType) {
43
+ position.line += 1;
44
+ }
45
+ }
46
+
47
+ if (!matched) {
48
+ throw new Error(`No token type matched for character: ${char}`);
49
+ }
50
+
51
+ return tokens;
52
+ }, []);
53
+ }
54
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./Tokenizer";
2
+ export * from "./TokenStream";
3
+ export * from "./types";
@@ -0,0 +1,12 @@
1
+ export type TokenType = string | 'end_of_input';
2
+
3
+ export type TokenPosition = {
4
+ line: number;
5
+ column: number;
6
+ }
7
+
8
+ export type Token = {
9
+ value: string;
10
+ type: TokenType;
11
+ position: TokenPosition;
12
+ }