@words-lang/parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyser/analyser.d.ts +106 -0
- package/dist/analyser/analyser.d.ts.map +1 -0
- package/dist/analyser/analyser.js +291 -0
- package/dist/analyser/analyser.js.map +1 -0
- package/dist/analyser/diagnostics.d.ts +166 -0
- package/dist/analyser/diagnostics.d.ts.map +1 -0
- package/dist/analyser/diagnostics.js +139 -0
- package/dist/analyser/diagnostics.js.map +1 -0
- package/dist/analyser/workspace.d.ts +198 -0
- package/dist/analyser/workspace.d.ts.map +1 -0
- package/dist/analyser/workspace.js +403 -0
- package/dist/analyser/workspace.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/index.js.map +1 -0
- package/dist/lexer/lexer.d.ts +120 -0
- package/dist/lexer/lexer.d.ts.map +1 -0
- package/dist/lexer/lexer.js +365 -0
- package/dist/lexer/lexer.js.map +1 -0
- package/dist/lexer/token.d.ts +247 -0
- package/dist/lexer/token.d.ts.map +1 -0
- package/dist/lexer/token.js +250 -0
- package/dist/lexer/token.js.map +1 -0
- package/dist/parser/ast.d.ts +685 -0
- package/dist/parser/ast.d.ts.map +1 -0
- package/dist/parser/ast.js +3 -0
- package/dist/parser/ast.js.map +1 -0
- package/dist/parser/parser.d.ts +411 -0
- package/dist/parser/parser.d.ts.map +1 -0
- package/dist/parser/parser.js +1600 -0
- package/dist/parser/parser.js.map +1 -0
- package/package.json +23 -0
- package/src/analyser/analyser.ts +403 -0
- package/src/analyser/diagnostics.ts +232 -0
- package/src/analyser/workspace.ts +457 -0
- package/src/index.ts +7 -0
- package/src/lexer/lexer.ts +379 -0
- package/src/lexer/token.ts +331 -0
- package/src/parser/ast.ts +798 -0
- package/src/parser/parser.ts +1815 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lexer.ts
|
|
3
|
+
*
|
|
4
|
+
* The WORDS lexer. Converts a raw `.wds` source string into a flat array
|
|
5
|
+
* of tokens that the parser consumes.
|
|
6
|
+
*
|
|
7
|
+
* Design principles:
|
|
8
|
+
*
|
|
9
|
+
* - Single-pass, character-by-character. No regular expressions at runtime —
|
|
10
|
+
* all character classification is done with simple comparisons.
|
|
11
|
+
*
|
|
12
|
+
* - Never throws. Unrecognised characters are emitted as `Unknown` tokens so
|
|
13
|
+
* the parser can continue and collect all errors in one pass rather than
|
|
14
|
+
* stopping at the first problem.
|
|
15
|
+
*
|
|
16
|
+
* - `is not` is normalised into a single `IsNot` token during lexing.
|
|
17
|
+
* This simplifies the parser — it never has to handle a two-token sequence
|
|
18
|
+
* in conditional expressions.
|
|
19
|
+
*
|
|
20
|
+
* - Newlines are emitted as `Newline` tokens. The parser uses them to
|
|
21
|
+
* distinguish a bare ownership declaration (`module AuthModule` on its own
|
|
22
|
+
* line) from a construct body opening (`module AuthModule "..." (`).
|
|
23
|
+
*
|
|
24
|
+
* - Comments are included in the token stream (not silently discarded) so the
|
|
25
|
+
* parser can attach them to adjacent nodes for hover documentation.
|
|
26
|
+
*
|
|
27
|
+
* - Method names, callback prop names, and handler method names are all plain
|
|
28
|
+
* camelCase identifiers. Names like `switch`, `onLoad`, `onSubmit` carry no
|
|
29
|
+
* special meaning to the lexer — they are emitted as CamelIdent tokens.
|
|
30
|
+
*
|
|
31
|
+
* - Position tracking (line, column, offset) is maintained for every token
|
|
32
|
+
* so the LSP can report diagnostics and resolve go-to-definition requests
|
|
33
|
+
* at the exact source location.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { Token, TokenType, token } from './token'
|
|
37
|
+
|
|
38
|
+
// ── Keyword table ─────────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Maps every reserved word in WORDS to its token type.
|
|
42
|
+
* Identifiers not found in this table are classified as PascalIdent or
|
|
43
|
+
* CamelIdent based on their first character.
|
|
44
|
+
*
|
|
45
|
+
* Note: `true` and `false` are listed here so they are never accidentally
|
|
46
|
+
* emitted as plain identifiers.
|
|
47
|
+
*
|
|
48
|
+
* Intentionally NOT in this table — these are all plain camelCase names
|
|
49
|
+
* chosen by the designer with no special language meaning:
|
|
50
|
+
* - Method names (e.g. `switch`, `login`, `getProducts`, `clear`)
|
|
51
|
+
* - Callback prop names (e.g. `onSubmit`, `onConfirm`, `onLoad`, `onDismiss`)
|
|
52
|
+
* - Handler method names (e.g. `switch` on a handler interface)
|
|
53
|
+
* - Iteration variables (e.g. `notification`, `category`)
|
|
54
|
+
*/
|
|
55
|
+
const KEYWORDS: Record<string, TokenType> = {
|
|
56
|
+
system: TokenType.System,
|
|
57
|
+
module: TokenType.Module,
|
|
58
|
+
process: TokenType.Process,
|
|
59
|
+
state: TokenType.State,
|
|
60
|
+
context: TokenType.Context,
|
|
61
|
+
screen: TokenType.Screen,
|
|
62
|
+
view: TokenType.View,
|
|
63
|
+
provider: TokenType.Provider,
|
|
64
|
+
adapter: TokenType.Adapter,
|
|
65
|
+
interface: TokenType.Interface,
|
|
66
|
+
modules: TokenType.Modules,
|
|
67
|
+
props: TokenType.Props,
|
|
68
|
+
uses: TokenType.Uses,
|
|
69
|
+
returns: TokenType.Returns,
|
|
70
|
+
receives: TokenType.Receives,
|
|
71
|
+
start: TokenType.Start,
|
|
72
|
+
implements: TokenType.Implements,
|
|
73
|
+
when: TokenType.When,
|
|
74
|
+
enter: TokenType.Enter,
|
|
75
|
+
if: TokenType.If,
|
|
76
|
+
for: TokenType.For,
|
|
77
|
+
as: TokenType.As,
|
|
78
|
+
is: TokenType.Is,
|
|
79
|
+
true: TokenType.BooleanLit,
|
|
80
|
+
false: TokenType.BooleanLit,
|
|
81
|
+
string: TokenType.TString,
|
|
82
|
+
integer: TokenType.TInteger,
|
|
83
|
+
float: TokenType.TFloat,
|
|
84
|
+
boolean: TokenType.TBoolean,
|
|
85
|
+
list: TokenType.TList,
|
|
86
|
+
map: TokenType.TMap,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ── Lexer class ───────────────────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
export class Lexer {
|
|
92
|
+
/** The full source text being tokenized. */
|
|
93
|
+
private source: string
|
|
94
|
+
|
|
95
|
+
/** Current byte offset into `source`. */
|
|
96
|
+
private pos: number = 0
|
|
97
|
+
|
|
98
|
+
/** Current 1-based line number. Incremented each time a `\n` is consumed. */
|
|
99
|
+
private line: number = 1
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Current 1-based column number.
|
|
103
|
+
* Reset to 1 after each newline; incremented after each other character.
|
|
104
|
+
*/
|
|
105
|
+
private column: number = 1
|
|
106
|
+
|
|
107
|
+
/** Accumulated token stream. Populated by `tokenize()`. */
|
|
108
|
+
private tokens: Token[] = []
|
|
109
|
+
|
|
110
|
+
constructor(source: string) {
|
|
111
|
+
this.source = source
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ── Public API ─────────────────────────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Tokenizes the entire source string and returns the token stream.
|
|
118
|
+
* The last token in the stream is always an `EOF` token.
|
|
119
|
+
*
|
|
120
|
+
* Calling `tokenize()` more than once on the same instance returns a new
|
|
121
|
+
* stream from scratch (internal state is reset on construction, not here —
|
|
122
|
+
* create a new Lexer for each source string).
|
|
123
|
+
*/
|
|
124
|
+
tokenize(): Token[] {
|
|
125
|
+
while (!this.isAtEnd()) {
|
|
126
|
+
// Skip horizontal whitespace between tokens.
|
|
127
|
+
// Newlines are NOT skipped here — they are emitted as Newline tokens.
|
|
128
|
+
this.skipWhitespace()
|
|
129
|
+
if (this.isAtEnd()) break
|
|
130
|
+
|
|
131
|
+
const start = this.pos
|
|
132
|
+
const startLine = this.line
|
|
133
|
+
const startCol = this.column
|
|
134
|
+
const ch = this.current()
|
|
135
|
+
|
|
136
|
+
// ── Line comment ─────────────────────────────────────────────────────
|
|
137
|
+
if (ch === '/' && this.peek(1) === '/') {
|
|
138
|
+
const comment = this.readLineComment()
|
|
139
|
+
this.tokens.push(token(TokenType.Comment, comment, startLine, startCol, start))
|
|
140
|
+
continue
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ── String literal ───────────────────────────────────────────────────
|
|
144
|
+
if (ch === '"') {
|
|
145
|
+
const str = this.readString()
|
|
146
|
+
this.tokens.push(token(TokenType.StringLit, str, startLine, startCol, start))
|
|
147
|
+
continue
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ── Number literal ───────────────────────────────────────────────────
|
|
151
|
+
// Integers and floats are distinguished by the presence of a decimal point.
|
|
152
|
+
if (this.isDigit(ch)) {
|
|
153
|
+
const num = this.readNumber()
|
|
154
|
+
const type = num.includes('.') ? TokenType.FloatLit : TokenType.IntegerLit
|
|
155
|
+
this.tokens.push(token(type, num, startLine, startCol, start))
|
|
156
|
+
continue
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ── Identifier or keyword ────────────────────────────────────────────
|
|
160
|
+
if (this.isAlpha(ch) || ch === '_') {
|
|
161
|
+
const ident = this.readIdent()
|
|
162
|
+
|
|
163
|
+
// Special case: 'is not' — look ahead past any whitespace to see if
|
|
164
|
+
// the next word is 'not'. If so, consume it and emit a single IsNot
|
|
165
|
+
// token. This keeps the parser free from two-token handling in conditions.
|
|
166
|
+
if (ident === 'is') {
|
|
167
|
+
const savedPos = this.pos
|
|
168
|
+
const savedLine = this.line
|
|
169
|
+
const savedCol = this.column
|
|
170
|
+
this.skipWhitespace()
|
|
171
|
+
if (
|
|
172
|
+
this.source.startsWith('not', this.pos) &&
|
|
173
|
+
!this.isAlphaNumeric(this.source[this.pos + 3] ?? '')
|
|
174
|
+
) {
|
|
175
|
+
this.pos += 3
|
|
176
|
+
this.column += 3
|
|
177
|
+
this.tokens.push(token(TokenType.IsNot, 'is not', startLine, startCol, start))
|
|
178
|
+
continue
|
|
179
|
+
}
|
|
180
|
+
// Not 'is not' — restore position and emit plain Is.
|
|
181
|
+
this.pos = savedPos
|
|
182
|
+
this.line = savedLine
|
|
183
|
+
this.column = savedCol
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Look up keyword table; fall through to identifier classification.
|
|
187
|
+
const kwType = KEYWORDS[ident]
|
|
188
|
+
if (kwType !== undefined) {
|
|
189
|
+
this.tokens.push(token(kwType, ident, startLine, startCol, start))
|
|
190
|
+
} else if (/^[A-Z]/.test(ident)) {
|
|
191
|
+
// PascalCase → construct name or type reference
|
|
192
|
+
this.tokens.push(token(TokenType.PascalIdent, ident, startLine, startCol, start))
|
|
193
|
+
} else {
|
|
194
|
+
// camelCase → prop name, method name, handler method name, or
|
|
195
|
+
// iteration variable. This includes all designer-chosen names
|
|
196
|
+
// such as `switch`, `onLoad`, `onSubmit`, `onConfirm`, etc.
|
|
197
|
+
this.tokens.push(token(TokenType.CamelIdent, ident, startLine, startCol, start))
|
|
198
|
+
}
|
|
199
|
+
continue
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ── Optional marker ──────────────────────────────────────────────────
|
|
203
|
+
// `?` always immediately precedes a PascalCase type name.
|
|
204
|
+
if (ch === '?') {
|
|
205
|
+
this.advance()
|
|
206
|
+
this.tokens.push(token(TokenType.Question, '?', startLine, startCol, start))
|
|
207
|
+
continue
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// ── Punctuation ──────────────────────────────────────────────────────
|
|
211
|
+
if (ch === '(') { this.advance(); this.tokens.push(token(TokenType.LParen, '(', startLine, startCol, start)); continue }
|
|
212
|
+
if (ch === ')') { this.advance(); this.tokens.push(token(TokenType.RParen, ')', startLine, startCol, start)); continue }
|
|
213
|
+
if (ch === ',') { this.advance(); this.tokens.push(token(TokenType.Comma, ',', startLine, startCol, start)); continue }
|
|
214
|
+
if (ch === '.') { this.advance(); this.tokens.push(token(TokenType.Dot, '.', startLine, startCol, start)); continue }
|
|
215
|
+
|
|
216
|
+
// ── Newline ──────────────────────────────────────────────────────────
|
|
217
|
+
// Emitted as a token so the parser can detect line boundaries.
|
|
218
|
+
// The line counter is incremented inside `advance()`.
|
|
219
|
+
if (ch === '\n') {
|
|
220
|
+
this.tokens.push(token(TokenType.Newline, '\n', startLine, startCol, start))
|
|
221
|
+
this.advance()
|
|
222
|
+
continue
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// ── Unknown ──────────────────────────────────────────────────────────
|
|
226
|
+
// Emit and continue rather than throwing, so all errors can be collected.
|
|
227
|
+
this.tokens.push(token(TokenType.Unknown, ch, startLine, startCol, start))
|
|
228
|
+
this.advance()
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// EOF sentinel — always the last token.
|
|
232
|
+
this.tokens.push(token(TokenType.EOF, '', this.line, this.column, this.pos))
|
|
233
|
+
return this.tokens
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ── Private helpers ────────────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Returns the character at the current position without consuming it.
|
|
240
|
+
* Returns an empty string if at end of input.
|
|
241
|
+
*/
|
|
242
|
+
private current(): string {
|
|
243
|
+
return this.source[this.pos] ?? ''
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Returns the character at `pos + offset` without consuming it.
|
|
248
|
+
* Used for one-character lookahead (e.g. distinguishing `//` from `/`).
|
|
249
|
+
* Returns an empty string if the offset is out of bounds.
|
|
250
|
+
*/
|
|
251
|
+
private peek(offset: number): string {
|
|
252
|
+
return this.source[this.pos + offset] ?? ''
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Consumes the current character, advances the position, and updates
|
|
257
|
+
* line/column tracking. Returns the consumed character.
|
|
258
|
+
* Line is incremented and column reset to 1 when a `\n` is consumed.
|
|
259
|
+
*/
|
|
260
|
+
private advance(): string {
|
|
261
|
+
const ch = this.source[this.pos]
|
|
262
|
+
if (ch === '\n') {
|
|
263
|
+
this.line++
|
|
264
|
+
this.column = 1
|
|
265
|
+
} else {
|
|
266
|
+
this.column++
|
|
267
|
+
}
|
|
268
|
+
this.pos++
|
|
269
|
+
return ch ?? ''
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/** Returns true when all characters have been consumed. */
|
|
273
|
+
private isAtEnd(): boolean {
|
|
274
|
+
return this.pos >= this.source.length
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/** Returns true for ASCII decimal digit characters. */
|
|
278
|
+
private isDigit(ch: string): boolean {
|
|
279
|
+
return ch >= '0' && ch <= '9'
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** Returns true for ASCII letters and underscore. */
|
|
283
|
+
private isAlpha(ch: string): boolean {
|
|
284
|
+
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_'
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/** Returns true for characters valid inside an identifier (letters, digits, underscore). */
|
|
288
|
+
private isAlphaNumeric(ch: string): boolean {
|
|
289
|
+
return this.isAlpha(ch) || this.isDigit(ch)
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Advances past spaces, tabs, and carriage returns.
|
|
294
|
+
* Newlines are NOT skipped — they are significant and emitted as tokens.
|
|
295
|
+
*/
|
|
296
|
+
private skipWhitespace(): void {
|
|
297
|
+
while (!this.isAtEnd()) {
|
|
298
|
+
const ch = this.current()
|
|
299
|
+
if (ch === ' ' || ch === '\t' || ch === '\r') {
|
|
300
|
+
this.advance()
|
|
301
|
+
} else {
|
|
302
|
+
break
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Reads a `//` line comment from the current position to the end of the line.
|
|
309
|
+
* The returned value includes the `//` prefix.
|
|
310
|
+
* The terminating `\n` is NOT consumed — it will be emitted as a Newline token
|
|
311
|
+
* on the next iteration.
|
|
312
|
+
*/
|
|
313
|
+
private readLineComment(): string {
|
|
314
|
+
let result = ''
|
|
315
|
+
while (!this.isAtEnd() && this.current() !== '\n') {
|
|
316
|
+
result += this.advance()
|
|
317
|
+
}
|
|
318
|
+
return result
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Reads a double-quoted string literal from the current position.
|
|
323
|
+
* Handles backslash escape sequences by consuming both the `\` and the
|
|
324
|
+
* following character as a unit.
|
|
325
|
+
* The returned value includes the surrounding quotes.
|
|
326
|
+
* Unclosed strings (EOF before closing `"`) are returned as-is — the
|
|
327
|
+
* parser will report the error from context.
|
|
328
|
+
*/
|
|
329
|
+
private readString(): string {
|
|
330
|
+
let result = '"'
|
|
331
|
+
this.advance() // consume opening quote
|
|
332
|
+
while (!this.isAtEnd() && this.current() !== '"') {
|
|
333
|
+
if (this.current() === '\\') {
|
|
334
|
+
result += this.advance() // backslash
|
|
335
|
+
result += this.advance() // escaped character
|
|
336
|
+
} else {
|
|
337
|
+
result += this.advance()
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (!this.isAtEnd()) {
|
|
341
|
+
result += this.advance() // consume closing quote
|
|
342
|
+
}
|
|
343
|
+
return result
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Reads an integer or float literal from the current position.
|
|
348
|
+
* A decimal point followed by at least one digit triggers float mode.
|
|
349
|
+
* The returned string is the raw source text — conversion to a number
|
|
350
|
+
* happens in the parser.
|
|
351
|
+
*/
|
|
352
|
+
private readNumber(): string {
|
|
353
|
+
let result = ''
|
|
354
|
+
while (!this.isAtEnd() && this.isDigit(this.current())) {
|
|
355
|
+
result += this.advance()
|
|
356
|
+
}
|
|
357
|
+
// Check for decimal point followed by a digit — if so, continue as float.
|
|
358
|
+
if (!this.isAtEnd() && this.current() === '.' && this.isDigit(this.peek(1))) {
|
|
359
|
+
result += this.advance() // consume '.'
|
|
360
|
+
while (!this.isAtEnd() && this.isDigit(this.current())) {
|
|
361
|
+
result += this.advance()
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
return result
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Reads an identifier (keyword or user-defined name) from the current position.
|
|
369
|
+
* Identifiers consist of letters, digits, and underscores.
|
|
370
|
+
* The caller is responsible for classifying the result via the keyword table.
|
|
371
|
+
*/
|
|
372
|
+
private readIdent(): string {
|
|
373
|
+
let result = ''
|
|
374
|
+
while (!this.isAtEnd() && this.isAlphaNumeric(this.current())) {
|
|
375
|
+
result += this.advance()
|
|
376
|
+
}
|
|
377
|
+
return result
|
|
378
|
+
}
|
|
379
|
+
}
|