bluera-knowledge 0.10.1 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,239 @@
1
+ /**
2
+ * ZIL Lexer
3
+ *
4
+ * Tokenizes ZIL (Zork Implementation Language) source code.
5
+ * ZIL is a Lisp-like language with angle brackets for forms instead of parentheses.
6
+ *
7
+ * Key syntax:
8
+ * - Forms: <FORM arg1 arg2 ...>
9
+ * - Strings: "text"
10
+ * - Numbers: 42, -10
11
+ * - Atoms: ROUTINE, V-LOOK, EQUAL?
12
+ * - Comments: ; line comment
13
+ * - Global refs: ,FOO
14
+ * - Local refs: .BAR
15
+ */
16
+
17
+ export enum TokenType {
18
+ LANGLE = 'LANGLE', // <
19
+ RANGLE = 'RANGLE', // >
20
+ LPAREN = 'LPAREN', // (
21
+ RPAREN = 'RPAREN', // )
22
+ ATOM = 'ATOM', // Symbols/identifiers
23
+ STRING = 'STRING', // "text"
24
+ NUMBER = 'NUMBER', // 42, -10
25
+ }
26
+
27
+ export interface Token {
28
+ type: TokenType;
29
+ value: string;
30
+ line: number;
31
+ column: number;
32
+ }
33
+
34
+ /**
35
+ * Lexer for ZIL source code
36
+ */
37
+ export class ZilLexer {
38
+ private input = '';
39
+ private pos = 0;
40
+ private line = 1;
41
+ private column = 1;
42
+ private tokens: Token[] = [];
43
+
44
+ /**
45
+ * Tokenize ZIL source code
46
+ *
47
+ * @param input - Source code string
48
+ * @returns Array of tokens
49
+ * @throws On unterminated strings
50
+ */
51
+ tokenize(input: string): Token[] {
52
+ this.input = input;
53
+ this.pos = 0;
54
+ this.line = 1;
55
+ this.column = 1;
56
+ this.tokens = [];
57
+
58
+ while (!this.isAtEnd()) {
59
+ this.scanToken();
60
+ }
61
+
62
+ return this.tokens;
63
+ }
64
+
65
+ private isAtEnd(): boolean {
66
+ return this.pos >= this.input.length;
67
+ }
68
+
69
+ private peek(): string {
70
+ if (this.isAtEnd()) return '\0';
71
+ return this.input[this.pos] ?? '\0';
72
+ }
73
+
74
+ private advance(): string {
75
+ const char = this.input[this.pos] ?? '\0';
76
+ this.pos++;
77
+
78
+ if (char === '\n') {
79
+ this.line++;
80
+ this.column = 1;
81
+ } else {
82
+ this.column++;
83
+ }
84
+
85
+ return char;
86
+ }
87
+
88
+ private addToken(type: TokenType, value: string, startLine: number, startColumn: number): void {
89
+ this.tokens.push({
90
+ type,
91
+ value,
92
+ line: startLine,
93
+ column: startColumn,
94
+ });
95
+ }
96
+
97
+ private scanToken(): void {
98
+ const startLine = this.line;
99
+ const startColumn = this.column;
100
+ const char = this.advance();
101
+
102
+ switch (char) {
103
+ case '<':
104
+ this.addToken(TokenType.LANGLE, '<', startLine, startColumn);
105
+ break;
106
+ case '>':
107
+ this.addToken(TokenType.RANGLE, '>', startLine, startColumn);
108
+ break;
109
+ case '(':
110
+ this.addToken(TokenType.LPAREN, '(', startLine, startColumn);
111
+ break;
112
+ case ')':
113
+ this.addToken(TokenType.RPAREN, ')', startLine, startColumn);
114
+ break;
115
+ case '"':
116
+ this.scanString(startLine, startColumn);
117
+ break;
118
+ case ';':
119
+ this.skipComment();
120
+ break;
121
+ case ' ':
122
+ case '\t':
123
+ case '\r':
124
+ case '\n':
125
+ // Skip whitespace
126
+ break;
127
+ default:
128
+ if (this.isDigit(char) || (char === '-' && this.isDigit(this.peek()))) {
129
+ this.scanNumber(char, startLine, startColumn);
130
+ } else if (this.isAtomStart(char)) {
131
+ this.scanAtom(char, startLine, startColumn);
132
+ }
133
+ // Ignore other characters
134
+ break;
135
+ }
136
+ }
137
+
138
+ private scanString(startLine: number, startColumn: number): void {
139
+ let value = '';
140
+
141
+ while (!this.isAtEnd() && this.peek() !== '"') {
142
+ const char = this.peek();
143
+
144
+ if (char === '\\') {
145
+ this.advance(); // consume backslash
146
+ const escaped = this.advance();
147
+ switch (escaped) {
148
+ case '"':
149
+ value += '"';
150
+ break;
151
+ case '\\':
152
+ value += '\\';
153
+ break;
154
+ case 'n':
155
+ value += '\n';
156
+ break;
157
+ case 't':
158
+ value += '\t';
159
+ break;
160
+ default:
161
+ value += escaped;
162
+ break;
163
+ }
164
+ } else {
165
+ value += this.advance();
166
+ }
167
+ }
168
+
169
+ if (this.isAtEnd()) {
170
+ throw new Error(
171
+ `Unterminated string at line ${String(startLine)}, column ${String(startColumn)}`
172
+ );
173
+ }
174
+
175
+ // Consume closing quote
176
+ this.advance();
177
+
178
+ this.addToken(TokenType.STRING, value, startLine, startColumn);
179
+ }
180
+
181
+ private scanNumber(firstChar: string, startLine: number, startColumn: number): void {
182
+ let value = firstChar;
183
+
184
+ while (this.isDigit(this.peek())) {
185
+ value += this.advance();
186
+ }
187
+
188
+ this.addToken(TokenType.NUMBER, value, startLine, startColumn);
189
+ }
190
+
191
+ private scanAtom(firstChar: string, startLine: number, startColumn: number): void {
192
+ let value = firstChar;
193
+
194
+ while (this.isAtomChar(this.peek())) {
195
+ value += this.advance();
196
+ }
197
+
198
+ this.addToken(TokenType.ATOM, value, startLine, startColumn);
199
+ }
200
+
201
+ private skipComment(): void {
202
+ // Skip until end of line
203
+ while (!this.isAtEnd() && this.peek() !== '\n') {
204
+ this.advance();
205
+ }
206
+ }
207
+
208
+ private isDigit(char: string): boolean {
209
+ return char >= '0' && char <= '9';
210
+ }
211
+
212
+ private isAtomStart(char: string): boolean {
213
+ return (
214
+ (char >= 'A' && char <= 'Z') ||
215
+ (char >= 'a' && char <= 'z') ||
216
+ char === '_' ||
217
+ char === ',' || // Global reference prefix
218
+ char === '.' || // Local reference prefix
219
+ char === '%' || // Sometimes used in ZIL
220
+ char === '#' // Hash prefix
221
+ );
222
+ }
223
+
224
+ private isAtomChar(char: string): boolean {
225
+ return (
226
+ (char >= 'A' && char <= 'Z') ||
227
+ (char >= 'a' && char <= 'z') ||
228
+ (char >= '0' && char <= '9') ||
229
+ char === '_' ||
230
+ char === '-' ||
231
+ char === '?' ||
232
+ char === '!' ||
233
+ char === ',' ||
234
+ char === '.' ||
235
+ char === '%' ||
236
+ char === '#'
237
+ );
238
+ }
239
+ }
@@ -0,0 +1,210 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { ZilParser, type ZilForm, type ZilNode } from './zil-parser.js';
3
+
4
+ describe('ZilParser', () => {
5
+ const parser = new ZilParser();
6
+
7
+ describe('basic parsing', () => {
8
+ it('should parse empty input', () => {
9
+ const result = parser.parse('');
10
+ expect(result.forms).toEqual([]);
11
+ });
12
+
13
+ it('should parse a simple form', () => {
14
+ const result = parser.parse('<ROUTINE V-LOOK>');
15
+ expect(result.forms).toHaveLength(1);
16
+ expect(result.forms[0]?.head).toBe('ROUTINE');
17
+ });
18
+
19
+ it('should parse form with arguments', () => {
20
+ const result = parser.parse('<ROUTINE V-LOOK ()>');
21
+ expect(result.forms).toHaveLength(1);
22
+
23
+ const form = result.forms[0];
24
+ expect(form?.head).toBe('ROUTINE');
25
+ expect(form?.children).toHaveLength(2); // V-LOOK and ()
26
+ });
27
+ });
28
+
29
+ describe('symbol extraction', () => {
30
+ it('should extract ROUTINE as function symbol', () => {
31
+ const result = parser.parse('<ROUTINE V-LOOK () <TELL "You see nothing.">>');
32
+ expect(result.symbols).toHaveLength(1);
33
+ expect(result.symbols[0]).toMatchObject({
34
+ name: 'V-LOOK',
35
+ kind: 'routine',
36
+ });
37
+ });
38
+
39
+ it('should extract OBJECT as object symbol', () => {
40
+ const result = parser.parse('<OBJECT BRASS-LANTERN (DESC "brass lantern")>');
41
+ expect(result.symbols).toHaveLength(1);
42
+ expect(result.symbols[0]).toMatchObject({
43
+ name: 'BRASS-LANTERN',
44
+ kind: 'object',
45
+ });
46
+ });
47
+
48
+ it('should extract ROOM as room symbol', () => {
49
+ const result = parser.parse('<ROOM WEST-OF-HOUSE (DESC "West of House")>');
50
+ expect(result.symbols).toHaveLength(1);
51
+ expect(result.symbols[0]).toMatchObject({
52
+ name: 'WEST-OF-HOUSE',
53
+ kind: 'room',
54
+ });
55
+ });
56
+
57
+ it('should extract GLOBAL as global symbol', () => {
58
+ const result = parser.parse('<GLOBAL SCORE 0>');
59
+ expect(result.symbols).toHaveLength(1);
60
+ expect(result.symbols[0]).toMatchObject({
61
+ name: 'SCORE',
62
+ kind: 'global',
63
+ });
64
+ });
65
+
66
+ it('should extract CONSTANT as constant symbol', () => {
67
+ const result = parser.parse('<CONSTANT M-BEG 1>');
68
+ expect(result.symbols).toHaveLength(1);
69
+ expect(result.symbols[0]).toMatchObject({
70
+ name: 'M-BEG',
71
+ kind: 'constant',
72
+ });
73
+ });
74
+
75
+ it('should extract SYNTAX as verb/syntax symbol', () => {
76
+ const result = parser.parse('<SYNTAX LOOK = V-LOOK>');
77
+ expect(result.symbols).toHaveLength(1);
78
+ expect(result.symbols[0]).toMatchObject({
79
+ name: 'LOOK',
80
+ kind: 'syntax',
81
+ });
82
+ });
83
+
84
+ it('should extract multiple symbols from file', () => {
85
+ const code = `
86
+ <CONSTANT M-BEG 1>
87
+ <GLOBAL SCORE 0>
88
+ <ROUTINE V-LOOK ()>
89
+ <OBJECT LAMP>
90
+ `;
91
+ const result = parser.parse(code);
92
+ expect(result.symbols).toHaveLength(4);
93
+
94
+ const kinds = result.symbols.map((s) => s.kind);
95
+ expect(kinds).toContain('constant');
96
+ expect(kinds).toContain('global');
97
+ expect(kinds).toContain('routine');
98
+ expect(kinds).toContain('object');
99
+ });
100
+ });
101
+
102
+ describe('import extraction', () => {
103
+ it('should extract INSERT-FILE as import', () => {
104
+ const result = parser.parse('<INSERT-FILE "GMACROS" T>');
105
+ expect(result.imports).toHaveLength(1);
106
+ expect(result.imports[0]).toMatchObject({
107
+ source: 'GMACROS',
108
+ specifiers: [],
109
+ isType: false,
110
+ });
111
+ });
112
+
113
+ it('should extract multiple imports', () => {
114
+ const code = `
115
+ <INSERT-FILE "GMACROS" T>
116
+ <INSERT-FILE "PARSER" T>
117
+ `;
118
+ const result = parser.parse(code);
119
+ expect(result.imports).toHaveLength(2);
120
+ expect(result.imports.map((i) => i.source)).toEqual(['GMACROS', 'PARSER']);
121
+ });
122
+ });
123
+
124
+ describe('call extraction', () => {
125
+ it('should extract calls from routine body', () => {
126
+ const code = '<ROUTINE V-LOOK () <TELL "text"> <DESCRIBE-ROOM>>';
127
+ const result = parser.parse(code);
128
+
129
+ expect(result.calls).toBeDefined();
130
+ expect(result.calls.length).toBeGreaterThan(0);
131
+
132
+ // DESCRIBE-ROOM should be a call (not a special form)
133
+ const callNames = result.calls.map((c) => c.callee);
134
+ expect(callNames).toContain('DESCRIBE-ROOM');
135
+ });
136
+
137
+ it('should filter out special forms from calls', () => {
138
+ const code = '<ROUTINE TEST () <COND (<EQUAL? 1 1> <TELL "yes">)>>';
139
+ const result = parser.parse(code);
140
+
141
+ const callNames = result.calls.map((c) => c.callee);
142
+ // COND, EQUAL?, TELL are special forms - should not be in calls
143
+ expect(callNames).not.toContain('COND');
144
+ expect(callNames).not.toContain('EQUAL?');
145
+ expect(callNames).not.toContain('TELL');
146
+ });
147
+
148
+ it('should include routine calls but not builtins', () => {
149
+ const code = '<ROUTINE V-TAKE () <V-LOOK> <MOVE ,OBJ ,HERE>>';
150
+ const result = parser.parse(code);
151
+
152
+ const callNames = result.calls.map((c) => c.callee);
153
+ expect(callNames).toContain('V-LOOK');
154
+ // MOVE is a builtin, typically filtered
155
+ expect(callNames).not.toContain('MOVE');
156
+ });
157
+ });
158
+
159
+ describe('line tracking', () => {
160
+ it('should track start and end lines for symbols', () => {
161
+ const code = `
162
+ <ROUTINE V-LOOK ()
163
+ <TELL "You see nothing special.">>
164
+ `;
165
+ const result = parser.parse(code);
166
+ expect(result.symbols).toHaveLength(1);
167
+
168
+ const symbol = result.symbols[0];
169
+ expect(symbol?.startLine).toBe(2); // Line where ROUTINE starts
170
+ expect(symbol?.endLine).toBeGreaterThanOrEqual(3);
171
+ });
172
+ });
173
+
174
+ describe('nested forms', () => {
175
+ it('should parse deeply nested forms', () => {
176
+ const code = '<COND (<AND (<EQUAL? ,X 1> <FSET? ,OBJ ,LIGHTBIT>) <RTRUE>>)>';
177
+ const result = parser.parse(code);
178
+ expect(result.forms).toHaveLength(1);
179
+
180
+ // Should not throw and should have the nested structure
181
+ const topForm = result.forms[0];
182
+ expect(topForm?.head).toBe('COND');
183
+ });
184
+ });
185
+
186
+ describe('signature extraction', () => {
187
+ it('should extract routine signature with args', () => {
188
+ const code = '<ROUTINE V-TAKE (OBJ "AUX" FLAG) <TELL "Taking...">>';
189
+ const result = parser.parse(code);
190
+
191
+ expect(result.symbols).toHaveLength(1);
192
+ expect(result.symbols[0]?.signature).toContain('V-TAKE');
193
+ });
194
+ });
195
+
196
+ describe('edge cases', () => {
197
+ it('should handle malformed input gracefully', () => {
198
+ // Missing closing angle bracket - should not throw, just return partial result
199
+ const result = parser.parse('<ROUTINE V-LOOK');
200
+ // Parser should handle gracefully
201
+ expect(result).toBeDefined();
202
+ });
203
+
204
+ it('should handle empty forms', () => {
205
+ const result = parser.parse('<>');
206
+ expect(result.forms).toHaveLength(1);
207
+ expect(result.forms[0]?.head).toBe('');
208
+ });
209
+ });
210
+ });