@ozsarman/clarityjs 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lexer.js ADDED
@@ -0,0 +1,572 @@
1
+ /**
2
+ * Clarity.js Lexer — Tokenizer
3
+ *
4
+ * Converts .clarity source text into a flat stream of tokens.
5
+ * Designed with LLM-readability in mind: every token type is explicit,
6
+ * named, and carries source location for precise error messages.
7
+ *
8
+ * Author: Claude (Anthropic)
9
+ */
10
+
11
+ // ─── Token Types ─────────────────────────────────────────────────────────────
12
+ export const T = {
13
+ // Keywords
14
+ COMPONENT: 'COMPONENT',
15
+ STATE: 'STATE',
16
+ EFFECT: 'EFFECT',
17
+ ON: 'ON',
18
+ RENDER: 'RENDER',
19
+ SERVER: 'SERVER',
20
+ IMPORT: 'IMPORT',
21
+ FROM: 'FROM',
22
+ ROUTE: 'ROUTE',
23
+ WHEN: 'WHEN',
24
+ AI: 'AI',
25
+ COMPUTED: 'COMPUTED',
26
+ RETURN: 'RETURN',
27
+ BEFORE_MOUNT: 'BEFORE_MOUNT', // beforeMount
28
+ ON_MOUNT: 'ON_MOUNT', // onMount
29
+ ON_CLEANUP: 'ON_CLEANUP', // onCleanup
30
+ ACTION: 'ACTION', // action — declares an AI-callable function
31
+ DATA: 'DATA', // data — server-block async data binding
32
+
33
+ // Literals
34
+ IDENT: 'IDENT', // myVariable, Component, etc.
35
+ NUMBER: 'NUMBER', // 42, 3.14
36
+ STRING: 'STRING', // "hello", 'world'
37
+ TEMPLATE: 'TEMPLATE', // `hello ${name}`
38
+ BOOL: 'BOOL', // true, false
39
+ NULL: 'NULL', // null
40
+ UNDEFINED: 'UNDEFINED', // undefined
41
+
42
+ // JSX
43
+ JSX_OPEN: 'JSX_OPEN', // <div (or <> for fragment — empty string tag)
44
+ JSX_CLOSE: 'JSX_CLOSE', // </div> (or </> for fragment)
45
+ JSX_SELF_CLOSE: 'JSX_SELF_CLOSE', // />
46
+ JSX_TEXT: 'JSX_TEXT', // text content
47
+ JSX_EXPR_OPEN: 'JSX_EXPR_OPEN', // {
48
+ JSX_EXPR_CLOSE: 'JSX_EXPR_CLOSE', // }
49
+ ON_EVENT: 'ON_EVENT', // on:click, on:input
50
+
51
+ // Special blocks
52
+ STYLE_BLOCK: 'STYLE_BLOCK', // style { raw css content }
53
+
54
+ // Operators
55
+ ASSIGN: 'ASSIGN', // =
56
+ PLUS_PLUS: 'PLUS_PLUS', // ++
57
+ MINUS_MINUS:'MINUS_MINUS',// --
58
+ PLUS_EQ: 'PLUS_EQ', // +=
59
+ MINUS_EQ: 'MINUS_EQ', // -=
60
+ ARROW: 'ARROW', // =>
61
+ EQ_EQ: 'EQ_EQ', // ==
62
+ NOT_EQ: 'NOT_EQ', // !=
63
+ EQ_EQ_EQ: 'EQ_EQ_EQ', // ===
64
+ NOT_EQ_EQ: 'NOT_EQ_EQ', // !==
65
+ LT: 'LT', // <
66
+ GT: 'GT', // >
67
+ LT_EQ: 'LT_EQ', // <=
68
+ GT_EQ: 'GT_EQ', // >=
69
+ AND: 'AND', // &&
70
+ OR: 'OR', // ||
71
+ NOT: 'NOT', // !
72
+ PLUS: 'PLUS', // +
73
+ MINUS: 'MINUS', // -
74
+ STAR: 'STAR', // *
75
+ SLASH: 'SLASH', // /
76
+ PERCENT: 'PERCENT', // %
77
+ QUESTION: 'QUESTION', // ?
78
+
79
+ // Delimiters
80
+ LBRACE: 'LBRACE', // {
81
+ RBRACE: 'RBRACE', // }
82
+ LPAREN: 'LPAREN', // (
83
+ RPAREN: 'RPAREN', // )
84
+ LBRACKET: 'LBRACKET', // [
85
+ RBRACKET: 'RBRACKET', // ]
86
+
87
+ // Punctuation
88
+ COLON: 'COLON', // :
89
+ SEMICOLON: 'SEMICOLON', // ;
90
+ COMMA: 'COMMA', // ,
91
+ DOT: 'DOT', // .
92
+ SPREAD: 'SPREAD', // ...
93
+
94
+ // Special
95
+ NEWLINE: 'NEWLINE',
96
+ EOF: 'EOF',
97
+ };
98
+
99
+ // Keywords map — for fast lookup
100
+ const KEYWORDS = new Map([
101
+ ['component', T.COMPONENT],
102
+ ['state', T.STATE],
103
+ ['effect', T.EFFECT],
104
+ ['on', T.ON],
105
+ ['render', T.RENDER],
106
+ ['server', T.SERVER],
107
+ ['import', T.IMPORT],
108
+ ['from', T.FROM],
109
+ ['route', T.ROUTE],
110
+ ['when', T.WHEN],
111
+ ['ai', T.AI],
112
+ ['computed', T.COMPUTED],
113
+ ['return', T.RETURN],
114
+ ['beforeMount', T.BEFORE_MOUNT],
115
+ ['onMount', T.ON_MOUNT],
116
+ ['onCleanup', T.ON_CLEANUP],
117
+ ['action', T.ACTION],
118
+ ['data', T.DATA],
119
+ ['true', T.BOOL],
120
+ ['false', T.BOOL],
121
+ ['null', T.NULL],
122
+ ['undefined', T.UNDEFINED],
123
+ ]);
124
+
125
+ // ─── Token ───────────────────────────────────────────────────────────────────
126
+ export class Token {
127
+ constructor(type, value, line, col) {
128
+ this.type = type;
129
+ this.value = value;
130
+ this.line = line;
131
+ this.col = col;
132
+ }
133
+
134
+ toString() {
135
+ return `Token(${this.type}, ${JSON.stringify(this.value)}, ${this.line}:${this.col})`;
136
+ }
137
+ }
138
+
139
+ // ─── LexerError ───────────────────────────────────────────────────────────────
140
+ export class LexerError extends Error {
141
+ constructor(message, line, col, source) {
142
+ const snippet = source.split('\n')[line - 1] || '';
143
+ const pointer = ' '.repeat(col - 1) + '^';
144
+ super(
145
+ `[Clarity Lexer] ${message}\n` +
146
+ ` → Line ${line}, Col ${col}\n` +
147
+ ` ${snippet}\n` +
148
+ ` ${pointer}\n` +
149
+ ` LLM-hint: Check for unclosed strings, invalid characters, or typos near this position.`
150
+ );
151
+ this.name = 'LexerError';
152
+ this.line = line;
153
+ this.col = col;
154
+ }
155
+ }
156
+
157
+ // ─── Lexer ───────────────────────────────────────────────────────────────────
158
+ export class Lexer {
159
+ constructor(source, filename = '<anonymous>') {
160
+ this.source = source;
161
+ this.filename = filename;
162
+ this.pos = 0;
163
+ this.line = 1;
164
+ this.col = 1;
165
+ this.tokens = [];
166
+ this._inJSX = 0; // JSX nesting depth
167
+ }
168
+
169
+ // ── Public API ──
170
+ tokenize() {
171
+ while (!this._atEnd()) {
172
+ this._skipWhitespaceAndComments();
173
+ if (this._atEnd()) break;
174
+
175
+ const startPos = this.pos;
176
+ const token = this._nextToken();
177
+ if (token) {
178
+ // Stamp absolute source offsets so the parser can reconstruct JSX text
179
+ // runs verbatim (preserving exact punctuation/spacing).
180
+ token.start = startPos;
181
+ token.end = this.pos;
182
+ this.tokens.push(token);
183
+ }
184
+ }
185
+
186
+ this.tokens.push(new Token(T.EOF, null, this.line, this.col));
187
+ return this.tokens;
188
+ }
189
+
190
+ // ── Core Scanner ──
191
+ _nextToken() {
192
+ const start = this.pos;
193
+ const line = this.line;
194
+ const col = this.col;
195
+ const ch = this._peek();
196
+
197
+ // Numbers
198
+ if (this._isDigit(ch)) return this._readNumber(line, col);
199
+
200
+ // Strings
201
+ if (ch === '"' || ch === "'") return this._readString(ch, line, col);
202
+
203
+ // Template literals (for JSX expressions and multiline strings)
204
+ if (ch === '`') return this._readTemplateLiteral(line, col);
205
+
206
+ // Identifiers, keywords, and Unicode text (including Turkish characters)
207
+ if (this._isAlpha(ch) || ch === '_') return this._readIdent(line, col);
208
+
209
+ // Multi-char operators
210
+ if (ch === '.') {
211
+ if (this._peekAt(1) === '.' && this._peekAt(2) === '.') {
212
+ this._advance(3);
213
+ return new Token(T.SPREAD, '...', line, col);
214
+ }
215
+ this._advance();
216
+ return new Token(T.DOT, '.', line, col);
217
+ }
218
+
219
+ if (ch === '=') {
220
+ if (this._peekAt(1) === '=') {
221
+ if (this._peekAt(2) === '=') {
222
+ this._advance(3);
223
+ return new Token(T.EQ_EQ_EQ, '===', line, col);
224
+ }
225
+ this._advance(2);
226
+ return new Token(T.EQ_EQ, '==', line, col);
227
+ }
228
+ if (this._peekAt(1) === '>') {
229
+ this._advance(2);
230
+ return new Token(T.ARROW, '=>', line, col);
231
+ }
232
+ this._advance();
233
+ return new Token(T.ASSIGN, '=', line, col);
234
+ }
235
+
236
+ if (ch === '!') {
237
+ if (this._peekAt(1) === '=') {
238
+ if (this._peekAt(2) === '=') {
239
+ this._advance(3);
240
+ return new Token(T.NOT_EQ_EQ, '!==', line, col);
241
+ }
242
+ this._advance(2);
243
+ return new Token(T.NOT_EQ, '!=', line, col);
244
+ }
245
+ this._advance();
246
+ return new Token(T.NOT, '!', line, col);
247
+ }
248
+
249
+ if (ch === '<') {
250
+ // Could be JSX open tag or less-than
251
+ if (this._isAlpha(this._peekAt(1)) || this._peekAt(1) === '_') {
252
+ return this._readJSXOpen(line, col);
253
+ }
254
+ if (this._peekAt(1) === '/') {
255
+ return this._readJSXClose(line, col);
256
+ }
257
+ // Fragment open: <> — emit JSX_OPEN with empty tag name.
258
+ // We consume ONLY the '<', leaving '>' as a separate GT token so the
259
+ // normal attr-loop-termination logic (_check(T.GT)) still works correctly.
260
+ if (this._peekAt(1) === '>') {
261
+ this._advance(); // consume only '<'
262
+ return new Token(T.JSX_OPEN, '', line, col);
263
+ }
264
+ if (this._peekAt(1) === '=') {
265
+ this._advance(2);
266
+ return new Token(T.LT_EQ, '<=', line, col);
267
+ }
268
+ this._advance();
269
+ return new Token(T.LT, '<', line, col);
270
+ }
271
+
272
+ if (ch === '>') {
273
+ if (this._peekAt(1) === '=') {
274
+ this._advance(2);
275
+ return new Token(T.GT_EQ, '>=', line, col);
276
+ }
277
+ this._advance();
278
+ return new Token(T.GT, '>', line, col);
279
+ }
280
+
281
+ if (ch === '+') {
282
+ if (this._peekAt(1) === '+') { this._advance(2); return new Token(T.PLUS_PLUS, '++', line, col); }
283
+ if (this._peekAt(1) === '=') { this._advance(2); return new Token(T.PLUS_EQ, '+=', line, col); }
284
+ this._advance();
285
+ return new Token(T.PLUS, '+', line, col);
286
+ }
287
+
288
+ if (ch === '-') {
289
+ if (this._peekAt(1) === '-') { this._advance(2); return new Token(T.MINUS_MINUS, '--', line, col); }
290
+ if (this._peekAt(1) === '=') { this._advance(2); return new Token(T.MINUS_EQ, '-=', line, col); }
291
+ this._advance();
292
+ return new Token(T.MINUS, '-', line, col);
293
+ }
294
+
295
+ if (ch === '&' && this._peekAt(1) === '&') { this._advance(2); return new Token(T.AND, '&&', line, col); }
296
+ if (ch === '|' && this._peekAt(1) === '|') { this._advance(2); return new Token(T.OR, '||', line, col); }
297
+
298
+ // HTML entity inside JSX text (e.g. &lt; &gt; &amp; &quot; &#39;)
299
+ if (ch === '&') {
300
+ const ENTITIES = { lt: '<', gt: '>', amp: '&', quot: '"', apos: "'", nbsp: '\u00a0', '#39': "'" };
301
+ let name = '';
302
+ let i = 1;
303
+ while (i < 12 && this.pos + i < this.source.length) {
304
+ const c = this.source[this.pos + i];
305
+ if (c === ';') break;
306
+ name += c;
307
+ i++;
308
+ }
309
+ if (this.source[this.pos + i] === ';' && ENTITIES[name] !== undefined) {
310
+ this._advance(i + 1); // consume &name;
311
+ return new Token(T.JSX_TEXT, ENTITIES[name], line, col);
312
+ }
313
+ }
314
+
315
+ // Single-char tokens
316
+ const single = {
317
+ '{': T.LBRACE, '}': T.RBRACE,
318
+ '(': T.LPAREN, ')': T.RPAREN,
319
+ '[': T.LBRACKET, ']': T.RBRACKET,
320
+ ':': T.COLON, ';': T.SEMICOLON,
321
+ ',': T.COMMA,
322
+ '*': T.STAR, '%': T.PERCENT, '?': T.QUESTION,
323
+ };
324
+
325
+ if (ch in single) {
326
+ this._advance();
327
+ return new Token(single[ch], ch, line, col);
328
+ }
329
+
330
+ if (ch === '/') {
331
+ // Already handled comments in _skipWhitespaceAndComments
332
+ this._advance();
333
+ return new Token(T.SLASH, '/', line, col);
334
+ }
335
+
336
+ // Newline — significant in Clarity (marks end of statement)
337
+ if (ch === '\n') {
338
+ this._advance();
339
+ return new Token(T.NEWLINE, '\n', line, col);
340
+ }
341
+
342
+ throw new LexerError(
343
+ `Unexpected character: ${JSON.stringify(ch)}`,
344
+ line, col, this.source
345
+ );
346
+ }
347
+
348
+ // ── Readers ──
349
+ _readNumber(line, col) {
350
+ let num = '';
351
+ while (!this._atEnd() && (this._isDigit(this._peek()) || this._peek() === '.')) {
352
+ if (this._peek() === '.' && this._peekAt(1) === '.') break; // spread
353
+ num += this._advance();
354
+ }
355
+ return new Token(T.NUMBER, parseFloat(num), line, col);
356
+ }
357
+
358
+ _readString(quote, line, col) {
359
+ this._advance(); // opening quote
360
+ let str = '';
361
+ while (!this._atEnd() && this._peek() !== quote) {
362
+ if (this._peek() === '\\') {
363
+ this._advance();
364
+ const esc = this._advance();
365
+ const escMap = { n: '\n', t: '\t', r: '\r', '\\': '\\', "'": "'", '"': '"' };
366
+ str += escMap[esc] ?? esc;
367
+ } else if (this._peek() === '\n') {
368
+ throw new LexerError('Unterminated string literal', line, col, this.source);
369
+ } else {
370
+ str += this._advance();
371
+ }
372
+ }
373
+ if (this._atEnd()) throw new LexerError('Unterminated string literal', line, col, this.source);
374
+ this._advance(); // closing quote
375
+ return new Token(T.STRING, str, line, col);
376
+ }
377
+
378
+ _readTemplateLiteral(line, col) {
379
+ this._advance(); // opening backtick
380
+ let raw = '';
381
+ let depth = 0; // tracks nesting inside ${ ... }
382
+
383
+ while (!this._atEnd()) {
384
+ const ch = this._peek();
385
+
386
+ // Closing backtick — only valid when not inside an expression
387
+ if (ch === '`' && depth === 0) break;
388
+
389
+ // Escape sequence
390
+ if (ch === '\\') {
391
+ raw += this._advance(); // '\'
392
+ raw += this._advance(); // escaped char
393
+ continue;
394
+ }
395
+
396
+ // Start of interpolation: ${
397
+ if (ch === '$' && this._peekAt(1) === '{') {
398
+ raw += this._advance(); // '$'
399
+ raw += this._advance(); // '{'
400
+ depth++;
401
+ continue;
402
+ }
403
+
404
+ // Opening brace inside expression (nested)
405
+ if (ch === '{' && depth > 0) {
406
+ depth++;
407
+ raw += this._advance();
408
+ continue;
409
+ }
410
+
411
+ // Closing brace — decrements depth
412
+ if (ch === '}' && depth > 0) {
413
+ depth--;
414
+ raw += this._advance();
415
+ continue;
416
+ }
417
+
418
+ // Track newlines for source positions
419
+ if (ch === '\n') { this.line++; this.col = 1; this.pos++; raw += '\n'; continue; }
420
+
421
+ raw += this._advance();
422
+ }
423
+
424
+ if (this._atEnd()) throw new LexerError('Unterminated template literal', line, col, this.source);
425
+ this._advance(); // closing backtick
426
+ // Return raw content — the parser will split it on ${...} and parse expressions
427
+ return new Token(T.TEMPLATE, raw, line, col);
428
+ }
429
+
430
+ _readIdent(line, col) {
431
+ let ident = '';
432
+ while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_')) {
433
+ ident += this._advance();
434
+ }
435
+
436
+ // Special: `style { ... }` block — scan raw CSS content into a single token.
437
+ // We look ahead for the opening '{' (possibly separated by whitespace/newlines).
438
+ // This preserves CSS syntax verbatim instead of mangling it through Clarity's tokenizer.
439
+ if (ident === 'style') {
440
+ let lookaheadPos = this.pos;
441
+ while (lookaheadPos < this.source.length &&
442
+ (this.source[lookaheadPos] === ' ' || this.source[lookaheadPos] === '\t' ||
443
+ this.source[lookaheadPos] === '\r' || this.source[lookaheadPos] === '\n')) {
444
+ lookaheadPos++;
445
+ }
446
+ if (this.source[lookaheadPos] === '{') {
447
+ // Advance cursor to and past the opening '{'
448
+ while (this.pos < lookaheadPos) this._advance();
449
+ this._advance(); // consume '{'
450
+ // Read raw CSS content until the matching closing '}'
451
+ let css = '';
452
+ let depth = 1;
453
+ while (!this._atEnd() && depth > 0) {
454
+ const c = this.source[this.pos];
455
+ if (c === '\n') { this.line++; this.col = 1; this.pos++; }
456
+ else { this.col++; this.pos++; }
457
+ if (c === '{') { depth++; css += c; }
458
+ else if (c === '}') { depth--; if (depth > 0) css += c; } // don't include final '}'
459
+ else { css += c; }
460
+ }
461
+ return new Token(T.STYLE_BLOCK, css.trim(), line, col);
462
+ }
463
+ }
464
+
465
+ // Check for on:event syntax (e.g. on:click written as identifier chain)
466
+ // We handle this at attribute level in parser, but tokenize as IDENT + COLON + IDENT
467
+ const kwType = KEYWORDS.get(ident);
468
+ const type = kwType ?? T.IDENT;
469
+ return new Token(type, ident, line, col);
470
+ }
471
+
472
+ _readJSXOpen(line, col) {
473
+ this._advance(); // <
474
+ let tagName = '';
475
+ while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_' || this._peek() === '-' || this._peek() === '.')) {
476
+ tagName += this._advance();
477
+ }
478
+ return new Token(T.JSX_OPEN, tagName, line, col);
479
+ }
480
+
481
+ _readJSXClose(line, col) {
482
+ this._advance(); // <
483
+ this._advance(); // /
484
+ let tagName = '';
485
+ while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_' || this._peek() === '-')) {
486
+ tagName += this._advance();
487
+ }
488
+ // consume >
489
+ this._skipWS();
490
+ if (this._peek() === '>') this._advance();
491
+ return new Token(T.JSX_CLOSE, tagName, line, col);
492
+ }
493
+
494
+ // ── Whitespace & Comments ──
495
+ _skipWhitespaceAndComments() {
496
+ while (!this._atEnd()) {
497
+ const ch = this._peek();
498
+
499
+ // Spaces and tabs (not newlines — those are significant)
500
+ if (ch === ' ' || ch === '\t' || ch === '\r') {
501
+ this._advance();
502
+ continue;
503
+ }
504
+
505
+ // Single-line comment
506
+ if (ch === '/' && this._peekAt(1) === '/') {
507
+ while (!this._atEnd() && this._peek() !== '\n') this._advance();
508
+ continue;
509
+ }
510
+
511
+ // Multi-line comment
512
+ if (ch === '/' && this._peekAt(1) === '*') {
513
+ this._advance(2);
514
+ while (!this._atEnd()) {
515
+ if (this._peek() === '*' && this._peekAt(1) === '/') {
516
+ this._advance(2);
517
+ break;
518
+ }
519
+ if (this._peek() === '\n') { this.line++; this.col = 1; }
520
+ this._advance();
521
+ }
522
+ continue;
523
+ }
524
+
525
+ break;
526
+ }
527
+ }
528
+
529
+ _skipWS() {
530
+ while (!this._atEnd() && (this._peek() === ' ' || this._peek() === '\t')) {
531
+ this._advance();
532
+ }
533
+ }
534
+
535
+ // ── Helpers ──
536
+ _peek(offset = 0) { return this.source[this.pos + offset] ?? ''; }
537
+ _peekAt(offset) { return this.source[this.pos + offset] ?? ''; }
538
+ _atEnd() { return this.pos >= this.source.length; }
539
+ _isDigit(ch) { return ch >= '0' && ch <= '9'; }
540
+ // Unicode letter support using charCodeAt — fast, no regex overhead
541
+ // Covers Basic Latin, Latin Extended (Turkish ı ğ ş ç ö ü), Greek, Cyrillic, etc.
542
+ _isAlpha(ch) {
543
+ if (!ch) return false;
544
+ const cc = ch.charCodeAt(0);
545
+ return (cc >= 65 && cc <= 90) || // A-Z
546
+ (cc >= 97 && cc <= 122) || // a-z
547
+ cc === 95 || // _
548
+ cc >= 128; // any non-ASCII (Unicode letters, Turkish, etc.)
549
+ }
550
+ _isAlphaNum(ch) {
551
+ if (!ch) return false;
552
+ return this._isAlpha(ch) || this._isDigit(ch);
553
+ }
554
+
555
+ _advance(n = 1) {
556
+ let result = '';
557
+ for (let i = 0; i < n; i++) {
558
+ if (this._atEnd()) break;
559
+ const ch = this.source[this.pos];
560
+ if (ch === '\n') { this.line++; this.col = 1; }
561
+ else { this.col++; }
562
+ this.pos++;
563
+ result += ch;
564
+ }
565
+ return result;
566
+ }
567
+ }
568
+
569
+ // ─── Convenience export ───────────────────────────────────────────────────────
570
+ export function tokenize(source, filename) {
571
+ return new Lexer(source, filename).tokenize();
572
+ }