npm - @ozsarman/clarityjs - Versions diffs - 0.6.0 - Mend

@ozsarman/clarityjs 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/src/lexer.js ADDED Viewed

@@ -0,0 +1,572 @@
+/**
+ * Clarity.js Lexer — Tokenizer
+ *
+ * Converts .clarity source text into a flat stream of tokens.
+ * Designed with LLM-readability in mind: every token type is explicit,
+ * named, and carries source location for precise error messages.
+ *
+ * Author: Claude (Anthropic)
+ */
+// ─── Token Types ─────────────────────────────────────────────────────────────
+export const T = {
+  // Keywords
+  COMPONENT:  'COMPONENT',
+  STATE:      'STATE',
+  EFFECT:     'EFFECT',
+  ON:         'ON',
+  RENDER:     'RENDER',
+  SERVER:     'SERVER',
+  IMPORT:     'IMPORT',
+  FROM:       'FROM',
+  ROUTE:      'ROUTE',
+  WHEN:       'WHEN',
+  AI:         'AI',
+  COMPUTED:   'COMPUTED',
+  RETURN:     'RETURN',
+  BEFORE_MOUNT: 'BEFORE_MOUNT', // beforeMount
+  ON_MOUNT:   'ON_MOUNT',    // onMount
+  ON_CLEANUP: 'ON_CLEANUP',  // onCleanup
+  ACTION:     'ACTION',      // action — declares an AI-callable function
+  DATA:       'DATA',        // data — server-block async data binding
+  // Literals
+  IDENT:      'IDENT',      // myVariable, Component, etc.
+  NUMBER:     'NUMBER',     // 42, 3.14
+  STRING:     'STRING',     // "hello", 'world'
+  TEMPLATE:   'TEMPLATE',   // `hello ${name}`
+  BOOL:       'BOOL',       // true, false
+  NULL:       'NULL',       // null
+  UNDEFINED:  'UNDEFINED',  // undefined
+  // JSX
+  JSX_OPEN:       'JSX_OPEN',       // <div  (or <> for fragment — empty string tag)
+  JSX_CLOSE:      'JSX_CLOSE',      // </div>  (or </> for fragment)
+  JSX_SELF_CLOSE: 'JSX_SELF_CLOSE', // />
+  JSX_TEXT:       'JSX_TEXT',       // text content
+  JSX_EXPR_OPEN:  'JSX_EXPR_OPEN',  // {
+  JSX_EXPR_CLOSE: 'JSX_EXPR_CLOSE', // }
+  ON_EVENT:       'ON_EVENT',       // on:click, on:input
+  // Special blocks
+  STYLE_BLOCK:    'STYLE_BLOCK',    // style { raw css content }
+  // Operators
+  ASSIGN:     'ASSIGN',     // =
+  PLUS_PLUS:  'PLUS_PLUS',  // ++
+  MINUS_MINUS:'MINUS_MINUS',// --
+  PLUS_EQ:    'PLUS_EQ',    // +=
+  MINUS_EQ:   'MINUS_EQ',   // -=
+  ARROW:      'ARROW',      // =>
+  EQ_EQ:      'EQ_EQ',      // ==
+  NOT_EQ:     'NOT_EQ',     // !=
+  EQ_EQ_EQ:   'EQ_EQ_EQ',  // ===
+  NOT_EQ_EQ:  'NOT_EQ_EQ',  // !==
+  LT:         'LT',         // <
+  GT:         'GT',         // >
+  LT_EQ:      'LT_EQ',      // <=
+  GT_EQ:      'GT_EQ',      // >=
+  AND:        'AND',        // &&
+  OR:         'OR',         // ||
+  NOT:        'NOT',        // !
+  PLUS:       'PLUS',       // +
+  MINUS:      'MINUS',      // -
+  STAR:       'STAR',       // *
+  SLASH:      'SLASH',      // /
+  PERCENT:    'PERCENT',    // %
+  QUESTION:   'QUESTION',   // ?
+  // Delimiters
+  LBRACE:     'LBRACE',     // {
+  RBRACE:     'RBRACE',     // }
+  LPAREN:     'LPAREN',     // (
+  RPAREN:     'RPAREN',     // )
+  LBRACKET:   'LBRACKET',   // [
+  RBRACKET:   'RBRACKET',   // ]
+  // Punctuation
+  COLON:      'COLON',      // :
+  SEMICOLON:  'SEMICOLON',  // ;
+  COMMA:      'COMMA',      // ,
+  DOT:        'DOT',        // .
+  SPREAD:     'SPREAD',     // ...
+  // Special
+  NEWLINE:    'NEWLINE',
+  EOF:        'EOF',
+};
+// Keywords map — for fast lookup
+const KEYWORDS = new Map([
+  ['component', T.COMPONENT],
+  ['state',     T.STATE],
+  ['effect',    T.EFFECT],
+  ['on',        T.ON],
+  ['render',    T.RENDER],
+  ['server',    T.SERVER],
+  ['import',    T.IMPORT],
+  ['from',      T.FROM],
+  ['route',     T.ROUTE],
+  ['when',      T.WHEN],
+  ['ai',        T.AI],
+  ['computed',  T.COMPUTED],
+  ['return',    T.RETURN],
+  ['beforeMount', T.BEFORE_MOUNT],
+  ['onMount',   T.ON_MOUNT],
+  ['onCleanup', T.ON_CLEANUP],
+  ['action',    T.ACTION],
+  ['data',      T.DATA],
+  ['true',      T.BOOL],
+  ['false',     T.BOOL],
+  ['null',      T.NULL],
+  ['undefined', T.UNDEFINED],
+]);
+// ─── Token ───────────────────────────────────────────────────────────────────
+export class Token {
+  constructor(type, value, line, col) {
+    this.type  = type;
+    this.value = value;
+    this.line  = line;
+    this.col   = col;
+  }
+  toString() {
+    return `Token(${this.type}, ${JSON.stringify(this.value)}, ${this.line}:${this.col})`;
+  }
+}
+// ─── LexerError ───────────────────────────────────────────────────────────────
+export class LexerError extends Error {
+  constructor(message, line, col, source) {
+    const snippet = source.split('\n')[line - 1] || '';
+    const pointer = ' '.repeat(col - 1) + '^';
+    super(
+      `[Clarity Lexer] ${message}\n` +
+      `  → Line ${line}, Col ${col}\n` +
+      `  ${snippet}\n` +
+      `  ${pointer}\n` +
+      `  LLM-hint: Check for unclosed strings, invalid characters, or typos near this position.`
+    );
+    this.name = 'LexerError';
+    this.line = line;
+    this.col = col;
+  }
+}
+// ─── Lexer ───────────────────────────────────────────────────────────────────
+export class Lexer {
+  constructor(source, filename = '<anonymous>') {
+    this.source   = source;
+    this.filename = filename;
+    this.pos      = 0;
+    this.line     = 1;
+    this.col      = 1;
+    this.tokens   = [];
+    this._inJSX   = 0;  // JSX nesting depth
+  }
+  // ── Public API ──
+  tokenize() {
+    while (!this._atEnd()) {
+      this._skipWhitespaceAndComments();
+      if (this._atEnd()) break;
+      const startPos = this.pos;
+      const token = this._nextToken();
+      if (token) {
+        // Stamp absolute source offsets so the parser can reconstruct JSX text
+        // runs verbatim (preserving exact punctuation/spacing).
+        token.start = startPos;
+        token.end   = this.pos;
+        this.tokens.push(token);
+      }
+    }
+    this.tokens.push(new Token(T.EOF, null, this.line, this.col));
+    return this.tokens;
+  }
+  // ── Core Scanner ──
+  _nextToken() {
+    const start = this.pos;
+    const line  = this.line;
+    const col   = this.col;
+    const ch    = this._peek();
+    // Numbers
+    if (this._isDigit(ch)) return this._readNumber(line, col);
+    // Strings
+    if (ch === '"' || ch === "'") return this._readString(ch, line, col);
+    // Template literals (for JSX expressions and multiline strings)
+    if (ch === '`') return this._readTemplateLiteral(line, col);
+    // Identifiers, keywords, and Unicode text (including Turkish characters)
+    if (this._isAlpha(ch) || ch === '_') return this._readIdent(line, col);
+    // Multi-char operators
+    if (ch === '.') {
+      if (this._peekAt(1) === '.' && this._peekAt(2) === '.') {
+        this._advance(3);
+        return new Token(T.SPREAD, '...', line, col);
+      }
+      this._advance();
+      return new Token(T.DOT, '.', line, col);
+    }
+    if (ch === '=') {
+      if (this._peekAt(1) === '=') {
+        if (this._peekAt(2) === '=') {
+          this._advance(3);
+          return new Token(T.EQ_EQ_EQ, '===', line, col);
+        }
+        this._advance(2);
+        return new Token(T.EQ_EQ, '==', line, col);
+      }
+      if (this._peekAt(1) === '>') {
+        this._advance(2);
+        return new Token(T.ARROW, '=>', line, col);
+      }
+      this._advance();
+      return new Token(T.ASSIGN, '=', line, col);
+    }
+    if (ch === '!') {
+      if (this._peekAt(1) === '=') {
+        if (this._peekAt(2) === '=') {
+          this._advance(3);
+          return new Token(T.NOT_EQ_EQ, '!==', line, col);
+        }
+        this._advance(2);
+        return new Token(T.NOT_EQ, '!=', line, col);
+      }
+      this._advance();
+      return new Token(T.NOT, '!', line, col);
+    }
+    if (ch === '<') {
+      // Could be JSX open tag or less-than
+      if (this._isAlpha(this._peekAt(1)) || this._peekAt(1) === '_') {
+        return this._readJSXOpen(line, col);
+      }
+      if (this._peekAt(1) === '/') {
+        return this._readJSXClose(line, col);
+      }
+      // Fragment open: <> — emit JSX_OPEN with empty tag name.
+      // We consume ONLY the '<', leaving '>' as a separate GT token so the
+      // normal attr-loop-termination logic (_check(T.GT)) still works correctly.
+      if (this._peekAt(1) === '>') {
+        this._advance(); // consume only '<'
+        return new Token(T.JSX_OPEN, '', line, col);
+      }
+      if (this._peekAt(1) === '=') {
+        this._advance(2);
+        return new Token(T.LT_EQ, '<=', line, col);
+      }
+      this._advance();
+      return new Token(T.LT, '<', line, col);
+    }
+    if (ch === '>') {
+      if (this._peekAt(1) === '=') {
+        this._advance(2);
+        return new Token(T.GT_EQ, '>=', line, col);
+      }
+      this._advance();
+      return new Token(T.GT, '>', line, col);
+    }
+    if (ch === '+') {
+      if (this._peekAt(1) === '+') { this._advance(2); return new Token(T.PLUS_PLUS, '++', line, col); }
+      if (this._peekAt(1) === '=') { this._advance(2); return new Token(T.PLUS_EQ, '+=', line, col); }
+      this._advance();
+      return new Token(T.PLUS, '+', line, col);
+    }
+    if (ch === '-') {
+      if (this._peekAt(1) === '-') { this._advance(2); return new Token(T.MINUS_MINUS, '--', line, col); }
+      if (this._peekAt(1) === '=') { this._advance(2); return new Token(T.MINUS_EQ, '-=', line, col); }
+      this._advance();
+      return new Token(T.MINUS, '-', line, col);
+    }
+    if (ch === '&' && this._peekAt(1) === '&') { this._advance(2); return new Token(T.AND, '&&', line, col); }
+    if (ch === '|' && this._peekAt(1) === '|') { this._advance(2); return new Token(T.OR, '||', line, col); }
+    // HTML entity inside JSX text (e.g. &lt; &gt; &amp; &quot; &#39;)
+    if (ch === '&') {
+      const ENTITIES = { lt: '<', gt: '>', amp: '&', quot: '"', apos: "'", nbsp: '\u00a0', '#39': "'" };
+      let name = '';
+      let i = 1;
+      while (i < 12 && this.pos + i < this.source.length) {
+        const c = this.source[this.pos + i];
+        if (c === ';') break;
+        name += c;
+        i++;
+      }
+      if (this.source[this.pos + i] === ';' && ENTITIES[name] !== undefined) {
+        this._advance(i + 1); // consume &name;
+        return new Token(T.JSX_TEXT, ENTITIES[name], line, col);
+      }
+    }
+    // Single-char tokens
+    const single = {
+      '{': T.LBRACE, '}': T.RBRACE,
+      '(': T.LPAREN, ')': T.RPAREN,
+      '[': T.LBRACKET, ']': T.RBRACKET,
+      ':': T.COLON, ';': T.SEMICOLON,
+      ',': T.COMMA,
+      '*': T.STAR, '%': T.PERCENT, '?': T.QUESTION,
+    };
+    if (ch in single) {
+      this._advance();
+      return new Token(single[ch], ch, line, col);
+    }
+    if (ch === '/') {
+      // Already handled comments in _skipWhitespaceAndComments
+      this._advance();
+      return new Token(T.SLASH, '/', line, col);
+    }
+    // Newline — significant in Clarity (marks end of statement)
+    if (ch === '\n') {
+      this._advance();
+      return new Token(T.NEWLINE, '\n', line, col);
+    }
+    throw new LexerError(
+      `Unexpected character: ${JSON.stringify(ch)}`,
+      line, col, this.source
+    );
+  }
+  // ── Readers ──
+  _readNumber(line, col) {
+    let num = '';
+    while (!this._atEnd() && (this._isDigit(this._peek()) || this._peek() === '.')) {
+      if (this._peek() === '.' && this._peekAt(1) === '.') break; // spread
+      num += this._advance();
+    }
+    return new Token(T.NUMBER, parseFloat(num), line, col);
+  }
+  _readString(quote, line, col) {
+    this._advance(); // opening quote
+    let str = '';
+    while (!this._atEnd() && this._peek() !== quote) {
+      if (this._peek() === '\\') {
+        this._advance();
+        const esc = this._advance();
+        const escMap = { n: '\n', t: '\t', r: '\r', '\\': '\\', "'": "'", '"': '"' };
+        str += escMap[esc] ?? esc;
+      } else if (this._peek() === '\n') {
+        throw new LexerError('Unterminated string literal', line, col, this.source);
+      } else {
+        str += this._advance();
+      }
+    }
+    if (this._atEnd()) throw new LexerError('Unterminated string literal', line, col, this.source);
+    this._advance(); // closing quote
+    return new Token(T.STRING, str, line, col);
+  }
+  _readTemplateLiteral(line, col) {
+    this._advance(); // opening backtick
+    let raw = '';
+    let depth = 0; // tracks nesting inside ${ ... }
+    while (!this._atEnd()) {
+      const ch = this._peek();
+      // Closing backtick — only valid when not inside an expression
+      if (ch === '`' && depth === 0) break;
+      // Escape sequence
+      if (ch === '\\') {
+        raw += this._advance(); // '\'
+        raw += this._advance(); // escaped char
+        continue;
+      }
+      // Start of interpolation: ${
+      if (ch === '$' && this._peekAt(1) === '{') {
+        raw += this._advance(); // '$'
+        raw += this._advance(); // '{'
+        depth++;
+        continue;
+      }
+      // Opening brace inside expression (nested)
+      if (ch === '{' && depth > 0) {
+        depth++;
+        raw += this._advance();
+        continue;
+      }
+      // Closing brace — decrements depth
+      if (ch === '}' && depth > 0) {
+        depth--;
+        raw += this._advance();
+        continue;
+      }
+      // Track newlines for source positions
+      if (ch === '\n') { this.line++; this.col = 1; this.pos++; raw += '\n'; continue; }
+      raw += this._advance();
+    }
+    if (this._atEnd()) throw new LexerError('Unterminated template literal', line, col, this.source);
+    this._advance(); // closing backtick
+    // Return raw content — the parser will split it on ${...} and parse expressions
+    return new Token(T.TEMPLATE, raw, line, col);
+  }
+  _readIdent(line, col) {
+    let ident = '';
+    while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_')) {
+      ident += this._advance();
+    }
+    // Special: `style { ... }` block — scan raw CSS content into a single token.
+    // We look ahead for the opening '{' (possibly separated by whitespace/newlines).
+    // This preserves CSS syntax verbatim instead of mangling it through Clarity's tokenizer.
+    if (ident === 'style') {
+      let lookaheadPos = this.pos;
+      while (lookaheadPos < this.source.length &&
+             (this.source[lookaheadPos] === ' ' || this.source[lookaheadPos] === '\t' ||
+              this.source[lookaheadPos] === '\r' || this.source[lookaheadPos] === '\n')) {
+        lookaheadPos++;
+      }
+      if (this.source[lookaheadPos] === '{') {
+        // Advance cursor to and past the opening '{'
+        while (this.pos < lookaheadPos) this._advance();
+        this._advance(); // consume '{'
+        // Read raw CSS content until the matching closing '}'
+        let css = '';
+        let depth = 1;
+        while (!this._atEnd() && depth > 0) {
+          const c = this.source[this.pos];
+          if (c === '\n') { this.line++; this.col = 1; this.pos++; }
+          else            { this.col++; this.pos++; }
+          if      (c === '{') { depth++; css += c; }
+          else if (c === '}') { depth--; if (depth > 0) css += c; } // don't include final '}'
+          else                { css += c; }
+        }
+        return new Token(T.STYLE_BLOCK, css.trim(), line, col);
+      }
+    }
+    // Check for on:event syntax (e.g. on:click written as identifier chain)
+    // We handle this at attribute level in parser, but tokenize as IDENT + COLON + IDENT
+    const kwType = KEYWORDS.get(ident);
+    const type = kwType ?? T.IDENT;
+    return new Token(type, ident, line, col);
+  }
+  _readJSXOpen(line, col) {
+    this._advance(); // <
+    let tagName = '';
+    while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_' || this._peek() === '-' || this._peek() === '.')) {
+      tagName += this._advance();
+    }
+    return new Token(T.JSX_OPEN, tagName, line, col);
+  }
+  _readJSXClose(line, col) {
+    this._advance(); // <
+    this._advance(); // /
+    let tagName = '';
+    while (!this._atEnd() && (this._isAlphaNum(this._peek()) || this._peek() === '_' || this._peek() === '-')) {
+      tagName += this._advance();
+    }
+    // consume >
+    this._skipWS();
+    if (this._peek() === '>') this._advance();
+    return new Token(T.JSX_CLOSE, tagName, line, col);
+  }
+  // ── Whitespace & Comments ──
+  _skipWhitespaceAndComments() {
+    while (!this._atEnd()) {
+      const ch = this._peek();
+      // Spaces and tabs (not newlines — those are significant)
+      if (ch === ' ' || ch === '\t' || ch === '\r') {
+        this._advance();
+        continue;
+      }
+      // Single-line comment
+      if (ch === '/' && this._peekAt(1) === '/') {
+        while (!this._atEnd() && this._peek() !== '\n') this._advance();
+        continue;
+      }
+      // Multi-line comment
+      if (ch === '/' && this._peekAt(1) === '*') {
+        this._advance(2);
+        while (!this._atEnd()) {
+          if (this._peek() === '*' && this._peekAt(1) === '/') {
+            this._advance(2);
+            break;
+          }
+          if (this._peek() === '\n') { this.line++; this.col = 1; }
+          this._advance();
+        }
+        continue;
+      }
+      break;
+    }
+  }
+  _skipWS() {
+    while (!this._atEnd() && (this._peek() === ' ' || this._peek() === '\t')) {
+      this._advance();
+    }
+  }
+  // ── Helpers ──
+  _peek(offset = 0)    { return this.source[this.pos + offset] ?? ''; }
+  _peekAt(offset)      { return this.source[this.pos + offset] ?? ''; }
+  _atEnd()             { return this.pos >= this.source.length; }
+  _isDigit(ch)         { return ch >= '0' && ch <= '9'; }
+  // Unicode letter support using charCodeAt — fast, no regex overhead
+  // Covers Basic Latin, Latin Extended (Turkish ı ğ ş ç ö ü), Greek, Cyrillic, etc.
+  _isAlpha(ch) {
+    if (!ch) return false;
+    const cc = ch.charCodeAt(0);
+    return (cc >= 65 && cc <= 90)   ||  // A-Z
+           (cc >= 97 && cc <= 122)  ||  // a-z
+           cc === 95                ||  // _
+           cc >= 128;                   // any non-ASCII (Unicode letters, Turkish, etc.)
+  }
+  _isAlphaNum(ch) {
+    if (!ch) return false;
+    return this._isAlpha(ch) || this._isDigit(ch);
+  }
+  _advance(n = 1) {
+    let result = '';
+    for (let i = 0; i < n; i++) {
+      if (this._atEnd()) break;
+      const ch = this.source[this.pos];
+      if (ch === '\n') { this.line++; this.col = 1; }
+      else { this.col++; }
+      this.pos++;
+      result += ch;
+    }
+    return result;
+  }
+}
+// ─── Convenience export ───────────────────────────────────────────────────────
+export function tokenize(source, filename) {
+  return new Lexer(source, filename).tokenize();
+}