@words-lang/parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/analyser/analyser.d.ts +106 -0
  2. package/dist/analyser/analyser.d.ts.map +1 -0
  3. package/dist/analyser/analyser.js +291 -0
  4. package/dist/analyser/analyser.js.map +1 -0
  5. package/dist/analyser/diagnostics.d.ts +166 -0
  6. package/dist/analyser/diagnostics.d.ts.map +1 -0
  7. package/dist/analyser/diagnostics.js +139 -0
  8. package/dist/analyser/diagnostics.js.map +1 -0
  9. package/dist/analyser/workspace.d.ts +198 -0
  10. package/dist/analyser/workspace.d.ts.map +1 -0
  11. package/dist/analyser/workspace.js +403 -0
  12. package/dist/analyser/workspace.js.map +1 -0
  13. package/dist/index.d.ts +8 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +31 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/lexer/lexer.d.ts +120 -0
  18. package/dist/lexer/lexer.d.ts.map +1 -0
  19. package/dist/lexer/lexer.js +365 -0
  20. package/dist/lexer/lexer.js.map +1 -0
  21. package/dist/lexer/token.d.ts +247 -0
  22. package/dist/lexer/token.d.ts.map +1 -0
  23. package/dist/lexer/token.js +250 -0
  24. package/dist/lexer/token.js.map +1 -0
  25. package/dist/parser/ast.d.ts +685 -0
  26. package/dist/parser/ast.d.ts.map +1 -0
  27. package/dist/parser/ast.js +3 -0
  28. package/dist/parser/ast.js.map +1 -0
  29. package/dist/parser/parser.d.ts +411 -0
  30. package/dist/parser/parser.d.ts.map +1 -0
  31. package/dist/parser/parser.js +1600 -0
  32. package/dist/parser/parser.js.map +1 -0
  33. package/package.json +23 -0
  34. package/src/analyser/analyser.ts +403 -0
  35. package/src/analyser/diagnostics.ts +232 -0
  36. package/src/analyser/workspace.ts +457 -0
  37. package/src/index.ts +7 -0
  38. package/src/lexer/lexer.ts +379 -0
  39. package/src/lexer/token.ts +331 -0
  40. package/src/parser/ast.ts +798 -0
  41. package/src/parser/parser.ts +1815 -0
@@ -0,0 +1,379 @@
1
+ /**
2
+ * lexer.ts
3
+ *
4
+ * The WORDS lexer. Converts a raw `.wds` source string into a flat array
5
+ * of tokens that the parser consumes.
6
+ *
7
+ * Design principles:
8
+ *
9
+ * - Single-pass, character-by-character. No regular expressions at runtime —
10
+ * all character classification is done with simple comparisons.
11
+ *
12
+ * - Never throws. Unrecognised characters are emitted as `Unknown` tokens so
13
+ * the parser can continue and collect all errors in one pass rather than
14
+ * stopping at the first problem.
15
+ *
16
+ * - `is not` is normalised into a single `IsNot` token during lexing.
17
+ * This simplifies the parser — it never has to handle a two-token sequence
18
+ * in conditional expressions.
19
+ *
20
+ * - Newlines are emitted as `Newline` tokens. The parser uses them to
21
+ * distinguish a bare ownership declaration (`module AuthModule` on its own
22
+ * line) from a construct body opening (`module AuthModule "..." (`).
23
+ *
24
+ * - Comments are included in the token stream (not silently discarded) so the
25
+ * parser can attach them to adjacent nodes for hover documentation.
26
+ *
27
+ * - Method names, callback prop names, and handler method names are all plain
28
+ * camelCase identifiers. Names like `switch`, `onLoad`, `onSubmit` carry no
29
+ * special meaning to the lexer — they are emitted as CamelIdent tokens.
30
+ *
31
+ * - Position tracking (line, column, offset) is maintained for every token
32
+ * so the LSP can report diagnostics and resolve go-to-definition requests
33
+ * at the exact source location.
34
+ */
35
+
36
+ import { Token, TokenType, token } from './token'
37
+
38
+ // ── Keyword table ─────────────────────────────────────────────────────────────
39
+
40
+ /**
41
+ * Maps every reserved word in WORDS to its token type.
42
+ * Identifiers not found in this table are classified as PascalIdent or
43
+ * CamelIdent based on their first character.
44
+ *
45
+ * Note: `true` and `false` are listed here so they are never accidentally
46
+ * emitted as plain identifiers.
47
+ *
48
+ * Intentionally NOT in this table — these are all plain camelCase names
49
+ * chosen by the designer with no special language meaning:
50
+ * - Method names (e.g. `switch`, `login`, `getProducts`, `clear`)
51
+ * - Callback prop names (e.g. `onSubmit`, `onConfirm`, `onLoad`, `onDismiss`)
52
+ * - Handler method names (e.g. `switch` on a handler interface)
53
+ * - Iteration variables (e.g. `notification`, `category`)
54
+ */
55
+ const KEYWORDS: Record<string, TokenType> = {
56
+ system: TokenType.System,
57
+ module: TokenType.Module,
58
+ process: TokenType.Process,
59
+ state: TokenType.State,
60
+ context: TokenType.Context,
61
+ screen: TokenType.Screen,
62
+ view: TokenType.View,
63
+ provider: TokenType.Provider,
64
+ adapter: TokenType.Adapter,
65
+ interface: TokenType.Interface,
66
+ modules: TokenType.Modules,
67
+ props: TokenType.Props,
68
+ uses: TokenType.Uses,
69
+ returns: TokenType.Returns,
70
+ receives: TokenType.Receives,
71
+ start: TokenType.Start,
72
+ implements: TokenType.Implements,
73
+ when: TokenType.When,
74
+ enter: TokenType.Enter,
75
+ if: TokenType.If,
76
+ for: TokenType.For,
77
+ as: TokenType.As,
78
+ is: TokenType.Is,
79
+ true: TokenType.BooleanLit,
80
+ false: TokenType.BooleanLit,
81
+ string: TokenType.TString,
82
+ integer: TokenType.TInteger,
83
+ float: TokenType.TFloat,
84
+ boolean: TokenType.TBoolean,
85
+ list: TokenType.TList,
86
+ map: TokenType.TMap,
87
+ }
88
+
89
+ // ── Lexer class ───────────────────────────────────────────────────────────────
90
+
91
+ export class Lexer {
92
+ /** The full source text being tokenized. */
93
+ private source: string
94
+
95
+ /** Current byte offset into `source`. */
96
+ private pos: number = 0
97
+
98
+ /** Current 1-based line number. Incremented each time a `\n` is consumed. */
99
+ private line: number = 1
100
+
101
+ /**
102
+ * Current 1-based column number.
103
+ * Reset to 1 after each newline; incremented after each other character.
104
+ */
105
+ private column: number = 1
106
+
107
+ /** Accumulated token stream. Populated by `tokenize()`. */
108
+ private tokens: Token[] = []
109
+
110
+ constructor(source: string) {
111
+ this.source = source
112
+ }
113
+
114
+ // ── Public API ─────────────────────────────────────────────────────────────
115
+
116
+ /**
117
+ * Tokenizes the entire source string and returns the token stream.
118
+ * The last token in the stream is always an `EOF` token.
119
+ *
120
+ * Calling `tokenize()` more than once on the same instance returns a new
121
+ * stream from scratch (internal state is reset on construction, not here —
122
+ * create a new Lexer for each source string).
123
+ */
124
+ tokenize(): Token[] {
125
+ while (!this.isAtEnd()) {
126
+ // Skip horizontal whitespace between tokens.
127
+ // Newlines are NOT skipped here — they are emitted as Newline tokens.
128
+ this.skipWhitespace()
129
+ if (this.isAtEnd()) break
130
+
131
+ const start = this.pos
132
+ const startLine = this.line
133
+ const startCol = this.column
134
+ const ch = this.current()
135
+
136
+ // ── Line comment ─────────────────────────────────────────────────────
137
+ if (ch === '/' && this.peek(1) === '/') {
138
+ const comment = this.readLineComment()
139
+ this.tokens.push(token(TokenType.Comment, comment, startLine, startCol, start))
140
+ continue
141
+ }
142
+
143
+ // ── String literal ───────────────────────────────────────────────────
144
+ if (ch === '"') {
145
+ const str = this.readString()
146
+ this.tokens.push(token(TokenType.StringLit, str, startLine, startCol, start))
147
+ continue
148
+ }
149
+
150
+ // ── Number literal ───────────────────────────────────────────────────
151
+ // Integers and floats are distinguished by the presence of a decimal point.
152
+ if (this.isDigit(ch)) {
153
+ const num = this.readNumber()
154
+ const type = num.includes('.') ? TokenType.FloatLit : TokenType.IntegerLit
155
+ this.tokens.push(token(type, num, startLine, startCol, start))
156
+ continue
157
+ }
158
+
159
+ // ── Identifier or keyword ────────────────────────────────────────────
160
+ if (this.isAlpha(ch) || ch === '_') {
161
+ const ident = this.readIdent()
162
+
163
+ // Special case: 'is not' — look ahead past any whitespace to see if
164
+ // the next word is 'not'. If so, consume it and emit a single IsNot
165
+ // token. This keeps the parser free from two-token handling in conditions.
166
+ if (ident === 'is') {
167
+ const savedPos = this.pos
168
+ const savedLine = this.line
169
+ const savedCol = this.column
170
+ this.skipWhitespace()
171
+ if (
172
+ this.source.startsWith('not', this.pos) &&
173
+ !this.isAlphaNumeric(this.source[this.pos + 3] ?? '')
174
+ ) {
175
+ this.pos += 3
176
+ this.column += 3
177
+ this.tokens.push(token(TokenType.IsNot, 'is not', startLine, startCol, start))
178
+ continue
179
+ }
180
+ // Not 'is not' — restore position and emit plain Is.
181
+ this.pos = savedPos
182
+ this.line = savedLine
183
+ this.column = savedCol
184
+ }
185
+
186
+ // Look up keyword table; fall through to identifier classification.
187
+ const kwType = KEYWORDS[ident]
188
+ if (kwType !== undefined) {
189
+ this.tokens.push(token(kwType, ident, startLine, startCol, start))
190
+ } else if (/^[A-Z]/.test(ident)) {
191
+ // PascalCase → construct name or type reference
192
+ this.tokens.push(token(TokenType.PascalIdent, ident, startLine, startCol, start))
193
+ } else {
194
+ // camelCase → prop name, method name, handler method name, or
195
+ // iteration variable. This includes all designer-chosen names
196
+ // such as `switch`, `onLoad`, `onSubmit`, `onConfirm`, etc.
197
+ this.tokens.push(token(TokenType.CamelIdent, ident, startLine, startCol, start))
198
+ }
199
+ continue
200
+ }
201
+
202
+ // ── Optional marker ──────────────────────────────────────────────────
203
+ // `?` always immediately precedes a PascalCase type name.
204
+ if (ch === '?') {
205
+ this.advance()
206
+ this.tokens.push(token(TokenType.Question, '?', startLine, startCol, start))
207
+ continue
208
+ }
209
+
210
+ // ── Punctuation ──────────────────────────────────────────────────────
211
+ if (ch === '(') { this.advance(); this.tokens.push(token(TokenType.LParen, '(', startLine, startCol, start)); continue }
212
+ if (ch === ')') { this.advance(); this.tokens.push(token(TokenType.RParen, ')', startLine, startCol, start)); continue }
213
+ if (ch === ',') { this.advance(); this.tokens.push(token(TokenType.Comma, ',', startLine, startCol, start)); continue }
214
+ if (ch === '.') { this.advance(); this.tokens.push(token(TokenType.Dot, '.', startLine, startCol, start)); continue }
215
+
216
+ // ── Newline ──────────────────────────────────────────────────────────
217
+ // Emitted as a token so the parser can detect line boundaries.
218
+ // The line counter is incremented inside `advance()`.
219
+ if (ch === '\n') {
220
+ this.tokens.push(token(TokenType.Newline, '\n', startLine, startCol, start))
221
+ this.advance()
222
+ continue
223
+ }
224
+
225
+ // ── Unknown ──────────────────────────────────────────────────────────
226
+ // Emit and continue rather than throwing, so all errors can be collected.
227
+ this.tokens.push(token(TokenType.Unknown, ch, startLine, startCol, start))
228
+ this.advance()
229
+ }
230
+
231
+ // EOF sentinel — always the last token.
232
+ this.tokens.push(token(TokenType.EOF, '', this.line, this.column, this.pos))
233
+ return this.tokens
234
+ }
235
+
236
+ // ── Private helpers ────────────────────────────────────────────────────────
237
+
238
+ /**
239
+ * Returns the character at the current position without consuming it.
240
+ * Returns an empty string if at end of input.
241
+ */
242
+ private current(): string {
243
+ return this.source[this.pos] ?? ''
244
+ }
245
+
246
+ /**
247
+ * Returns the character at `pos + offset` without consuming it.
248
+ * Used for one-character lookahead (e.g. distinguishing `//` from `/`).
249
+ * Returns an empty string if the offset is out of bounds.
250
+ */
251
+ private peek(offset: number): string {
252
+ return this.source[this.pos + offset] ?? ''
253
+ }
254
+
255
+ /**
256
+ * Consumes the current character, advances the position, and updates
257
+ * line/column tracking. Returns the consumed character.
258
+ * Line is incremented and column reset to 1 when a `\n` is consumed.
259
+ */
260
+ private advance(): string {
261
+ const ch = this.source[this.pos]
262
+ if (ch === '\n') {
263
+ this.line++
264
+ this.column = 1
265
+ } else {
266
+ this.column++
267
+ }
268
+ this.pos++
269
+ return ch ?? ''
270
+ }
271
+
272
+ /** Returns true when all characters have been consumed. */
273
+ private isAtEnd(): boolean {
274
+ return this.pos >= this.source.length
275
+ }
276
+
277
+ /** Returns true for ASCII decimal digit characters. */
278
+ private isDigit(ch: string): boolean {
279
+ return ch >= '0' && ch <= '9'
280
+ }
281
+
282
+ /** Returns true for ASCII letters and underscore. */
283
+ private isAlpha(ch: string): boolean {
284
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_'
285
+ }
286
+
287
+ /** Returns true for characters valid inside an identifier (letters, digits, underscore). */
288
+ private isAlphaNumeric(ch: string): boolean {
289
+ return this.isAlpha(ch) || this.isDigit(ch)
290
+ }
291
+
292
+ /**
293
+ * Advances past spaces, tabs, and carriage returns.
294
+ * Newlines are NOT skipped — they are significant and emitted as tokens.
295
+ */
296
+ private skipWhitespace(): void {
297
+ while (!this.isAtEnd()) {
298
+ const ch = this.current()
299
+ if (ch === ' ' || ch === '\t' || ch === '\r') {
300
+ this.advance()
301
+ } else {
302
+ break
303
+ }
304
+ }
305
+ }
306
+
307
+ /**
308
+ * Reads a `//` line comment from the current position to the end of the line.
309
+ * The returned value includes the `//` prefix.
310
+ * The terminating `\n` is NOT consumed — it will be emitted as a Newline token
311
+ * on the next iteration.
312
+ */
313
+ private readLineComment(): string {
314
+ let result = ''
315
+ while (!this.isAtEnd() && this.current() !== '\n') {
316
+ result += this.advance()
317
+ }
318
+ return result
319
+ }
320
+
321
+ /**
322
+ * Reads a double-quoted string literal from the current position.
323
+ * Handles backslash escape sequences by consuming both the `\` and the
324
+ * following character as a unit.
325
+ * The returned value includes the surrounding quotes.
326
+ * Unclosed strings (EOF before closing `"`) are returned as-is — the
327
+ * parser will report the error from context.
328
+ */
329
+ private readString(): string {
330
+ let result = '"'
331
+ this.advance() // consume opening quote
332
+ while (!this.isAtEnd() && this.current() !== '"') {
333
+ if (this.current() === '\\') {
334
+ result += this.advance() // backslash
335
+ result += this.advance() // escaped character
336
+ } else {
337
+ result += this.advance()
338
+ }
339
+ }
340
+ if (!this.isAtEnd()) {
341
+ result += this.advance() // consume closing quote
342
+ }
343
+ return result
344
+ }
345
+
346
+ /**
347
+ * Reads an integer or float literal from the current position.
348
+ * A decimal point followed by at least one digit triggers float mode.
349
+ * The returned string is the raw source text — conversion to a number
350
+ * happens in the parser.
351
+ */
352
+ private readNumber(): string {
353
+ let result = ''
354
+ while (!this.isAtEnd() && this.isDigit(this.current())) {
355
+ result += this.advance()
356
+ }
357
+ // Check for decimal point followed by a digit — if so, continue as float.
358
+ if (!this.isAtEnd() && this.current() === '.' && this.isDigit(this.peek(1))) {
359
+ result += this.advance() // consume '.'
360
+ while (!this.isAtEnd() && this.isDigit(this.current())) {
361
+ result += this.advance()
362
+ }
363
+ }
364
+ return result
365
+ }
366
+
367
+ /**
368
+ * Reads an identifier (keyword or user-defined name) from the current position.
369
+ * Identifiers consist of letters, digits, and underscores.
370
+ * The caller is responsible for classifying the result via the keyword table.
371
+ */
372
+ private readIdent(): string {
373
+ let result = ''
374
+ while (!this.isAtEnd() && this.isAlphaNumeric(this.current())) {
375
+ result += this.advance()
376
+ }
377
+ return result
378
+ }
379
+ }