npm - novac - Versions diffs - 1.0.0 - Mend

novac 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/src/core/lexer.js ADDED Viewed

@@ -0,0 +1,506 @@
+/**
+ * Nova Lexer
+ * ------------------------------
+ * Converts source code into structured tokens:
+ * { type, value, line, column, isUnary, isPostfix, precedence }
+ */
+const LITERALS = {
+  // Use unique sentinel values for literals the Executor will wrap
+  true: Symbol("NOVA_TRUE"),
+  false: Symbol("NOVA_FALSE"),
+  null: Symbol("NOVA_NULL"),
+  nstr: "",
+  nfunc: () => Symbol("NOVA_NULL"),
+};
+const KEYWORDS = new Set([
+  "var",
+  "let",
+  "const",
+  "class",
+  "if",
+  "else",
+  "for",
+  "repeat",
+  "unless",
+  "until",
+  "throw",
+  "try",
+  "catch",
+  "finally",
+  "func",
+  "function",
+  "return",
+  "give",
+  "async",
+  "await",
+]);
+const { CustomError, formatError } = require("./error");
+// Operator info for parser
+const OPERATORS = new Map([
+  ["+", { precedence: 5, isUnary: true }],
+  ["-", { precedence: 5, isUnary: true }],
+  ["*", { precedence: 6, isUnary: true }],
+  ["/", { precedence: 6 }],
+  ["%", { precedence: 6 }],
+  ["==", { precedence: 3 }],
+  ["!=", { precedence: 3 }],
+  ["<", { precedence: 4 }],
+  ["<=", { precedence: 4 }],
+  [">", { precedence: 4 }],
+  [">=", { precedence: 4 }],
+  ["&&", { precedence: 2 }],
+  ["||", { precedence: 1 }],
+  ["delete", { precedence: 8, isUnary: true }],
+  ["!", { precedence: 7, isUnary: true }],
+  ["++", { precedence: 7, isPostfix: true }],
+  ["--", { precedence: 7, isPostfix: true }],
+  ["=>", { precedence: 0 }],
+  ["=", { precedence: 0 }],
+]);
+// ===== lexer.js (PUNCTUATION Definition) =====
+const PUNCTUATION = new Set(["(", ")", "{", "}", "[", "]", ",", ".", ";", ":"]);
+// ... (rest of file)
+class Lexer {
+  constructor(source) {
+    this.source = source;
+    this.position = 0;
+    this.line = 1;
+    this.column = 1;
+    this.tokens = [];
+    // 🔥 NEW: Preprocessor State
+    this.definitions = new Map(); // Stores defined symbols (e.g., #define DEBUG)
+    this.isSkipping = false; // True if inside an inactive #ifdef block
+    // ──────────────────────────
+  }
+  // 🔥 NEW: Helper to read an identifier (directive name or argument)
+  readIdentifier() {
+    let value = "";
+    while (!this.isAtEnd() && this.isAlphaNumeric(this.peek())) {
+      value += this.advance();
+    }
+    return value;
+  }
+  readRestOfLine() {
+    let value = "";
+    while (!this.isAtEnd() && this.peek() !== "\n") {
+      value += this.advance();
+    }
+    return value.trim();
+  }
+  // Helper to move the cursor to the next line boundary
+  skipLine() {
+    while (!this.isAtEnd() && this.peek() !== "\n") {
+      this.advance();
+    }
+    if (this.peek() === "\n") {
+      this.line++;
+      this.column = 1;
+      this.position++; // Consume the newline
+    }
+  }
+  // Main logic to parse and execute a directive line
+  handleDirective() {
+    // 1. Consume '#' (already checked in tokenize loop)
+    this.advance();
+    // 2. Consume whitespace before the directive name
+    while (this.isWhitespace(this.peek())) {
+      this.advance();
+    }
+    let shouldSkip = true;
+    // 3. Read the directive name
+    const dirName = this.readIdentifier();
+    // 4. Consume internal whitespace and read the argument (if any)
+    while (this.isWhitespace(this.peek())) {
+      this.advance();
+    }
+    const dirArg = this.readIdentifier();
+    // 5. Execute the directive logic
+    switch (dirName) {
+      case "define":
+        if (dirArg) {
+          const value = this.readRestOfLine(); // new helper to grab everything after the name
+          this.definitions.set(dirArg, value ?? true);
+        }
+        break;
+      case "undef":
+        if (dirArg) this.definitions.delete(dirArg);
+        break;
+      case "register":
+        let type = dirArg;
+        switch (type) {
+	  case 'operator':
+            let name = this.readIdentifier();
+	    OPERATORS.set(name, JSON.parse(this.readRestOfLine()));
+            OPERATORS.get(name).custom = true;
+        }
+        break;
+      case "inject":
+        let value = this.readRestOfLine();
+        this.tokens.push(JSON.parse(value));
+        break;
+      case "ifdef":
+        // Only start skipping if we aren't already skipping from an outer block
+        if (!this.isSkipping) {
+          this.isSkipping = !this.definitions.has(dirArg);
+        }
+        break;
+      case "ifndef":
+        // Only start skipping if we aren't already skipping from an outer block
+        if (!this.isSkipping) {
+          this.isSkipping = this.definitions.has(dirArg);
+        }
+        break;
+      case "endif":
+        // Stop skipping; this assumes directives are simple and don't nest complexly.
+        this.isSkipping = false;
+        break;
+      default:
+        this.addToken("LITERAL", dirArg);
+        shouldSkip = false;
+        break;
+    }
+    // 6. Move past the rest of the directive line
+    if (shouldSkip) this.skipLine();
+  }
+  // ===== lexer.js (Updated tokenize method) =====
+  tokenize() {
+    while (!this.isAtEnd()) {
+      const char = this.peek();
+      // 1. Check for Preprocessor Directive
+      if (char === "#") {
+        this.handleDirective();
+        continue;
+      }
+      // 2. Handle Skipping State
+      if (this.isSkipping) {
+        // Manually advance and update line/column, but skip tokenization
+        if (char === "\n") {
+          this.line++;
+          this.column = 1;
+        } else if (char === "\t") {
+          this.column += 3;
+        } else {
+          this.column++;
+        }
+        this.position++;
+        continue;
+      }
+      // 3. Normal Lexing (Original code structure)
+      // Resetting position/column to match your original advance() logic flow:
+      this.advance();
+      if (this.isWhitespace(char)) {
+        if (char === "\t") this.column += 3;
+        continue;
+      }
+      if (char === "\n") {
+        this.line++;
+        this.column = 1;
+        continue;
+      }
+      // ─────────────── Comments ───────────────
+      if (char === "/") {
+        const next = this.peek();
+        // Nova-style single-line comment: /!/
+        if (next === "!") {
+          this.advance();
+          this.skipComment();
+          continue;
+        }
+        // Normal single-line comment: //
+        if (next === "/") {
+          this.advance();
+          this.skipComment();
+          continue;
+        }
+        // Block comment: /* ... */
+        if (next === "*") {
+          this.advance(); // consume '*'
+          this.skipBlockComment();
+          continue;
+        }
+      }
+      if (char === '"' || char === "'") {
+        this.string(char);
+        continue;
+      }
+      if (char === "`") {
+        this.templateLiteral();
+        continue;
+      }
+      if (this.isDigit(char)) {
+        this.number(char);
+        continue;
+      }
+      if (this.isAlpha(char)) {
+        // Read the full identifier starting from the current position
+        // and including the char that was just advanced.
+        let ident = char;
+        while (this.isAlphaNumeric(this.peek())) ident += this.advance();
+        // Preprocessor replacement
+        if (this.definitions.has(ident)) {
+          const replacement = this.definitions.get(ident);
+          if (replacement !== true) {
+            // Inject replacement into source stream
+            this.injectReplacement(replacement);
+            continue; // restart lexing with new injected chars
+          }
+        }
+        // Pass the full identifier (e.g., "std") to the identifier method.
+        this.identifier(ident); // <-- MODIFIED LINE (was: this.identifier(ident[0]);)
+        continue;
+      }
+      this.symbol(char);
+    }
+    this.addToken("EOF", null, this.column);
+    return this.tokens;
+  }
+  identifier(firstChar) {
+    const startColumn = this.column - 1;
+    let value = firstChar;
+    while (this.isAlphaNumeric(this.peek())) value += this.advance();
+    if (LITERALS.hasOwnProperty(value)) {
+      this.addToken("LITERAL", LITERALS[value], startColumn);
+    } else {
+      if (OPERATORS.has(value)) {
+        this.addOperatorToken(value, startColumn);
+        return;
+      }
+      const type = KEYWORDS.has(value) ? "KEYWORD" : "IDENTIFIER";
+      this.addToken(type, value, startColumn);
+    }
+  }
+  injectReplacement(str) {
+    // Prepend the replacement string into the remaining source
+    this.source = str + this.source.slice(this.position);
+    this.position = 0;
+    this.column = 1; // optionally reset column
+  }
+  number(firstChar) {
+    const startColumn = this.column - 1;
+    let value = firstChar;
+    while (this.isDigit(this.peek())) value += this.advance();
+    if (this.peek() === "." && this.isDigit(this.peekNext())) {
+      value += this.advance();
+      while (this.isDigit(this.peek())) value += this.advance();
+    }
+    this.addToken("NUMBER", parseFloat(value), startColumn);
+  }
+  skipBlockComment() {
+    while (!this.isAtEnd()) {
+      const ch = this.advance();
+      if (ch === "*" && this.peek() === "/") {
+        this.advance(); // consume '/'
+        return;
+      }
+      if (ch === "\n") {
+        this.line++;
+        this.column = 1;
+      }
+    }
+    throw this.error("Unterminated block comment");
+  }
+  string(quote) {
+    const startColumn = this.column;
+    let value = "";
+    while (!this.isAtEnd() && this.peek() !== quote) {
+      if (this.peek() === "\n") this.line++;
+      value += this.advance();
+    }
+    if (this.isAtEnd()) throw this.error("Unterminated string literal");
+    this.advance();
+    this.addToken("STRING", value, startColumn);
+  }
+templateLiteral() {
+  const startColumn = this.column - 1;
+  this.addToken("TEMPLATE_START", "`", startColumn);
+  let value = "";
+  const flushStringPart = () => {
+    if (value.length > 0) {
+      this.addToken("STRING_PART", value, this.column - value.length - 1);
+      value = "";
+    }
+  };
+  while (!this.isAtEnd()) {
+    const ch = this.advance();
+    // Handle interpolation start
+    if (ch === "&" && this.peek() === "{") {
+      this.advance(); // consume '{'
+      flushStringPart();
+      this.addToken("INTERPOLATION_START", "&{", this.column - 2);
+      // Parse until matching '}' (nested-safe)
+      let depth = 1;
+      let expr = "";
+      while (!this.isAtEnd() && depth > 0) {
+        const inner = this.advance();
+        if (inner === "{") depth++;
+        else if (inner === "}") depth--;
+        if (depth > 0) expr += inner;
+      }
+      // Recursively lex the expression inside the interpolation
+      const innerLexer = new Lexer(expr);
+      const innerTokens = innerLexer.tokenize();
+      // Drop EOF from innerTokens
+      innerTokens.pop();
+      this.tokens.push(...innerTokens);
+      this.addToken("INTERPOLATION_END", "}", this.column - 1);
+      continue;
+    }
+    // End of template
+    if (ch === "`") {
+      flushStringPart();
+      this.addToken("TEMPLATE_END", "`", this.column - 1);
+      return;
+    }
+    // Normal character inside template
+    if (ch === "\n") this.line++;
+    value += ch;
+  }
+  throw this.error("Unterminated template literal");
+}
+  symbol(ch) {
+    const startColumn = this.column - 1;
+    const twoChar = ch + this.peek();
+    // Two-char operator first
+    if (OPERATORS.has(twoChar)) {
+      this.advance();
+      this.addOperatorToken(twoChar, startColumn);
+      return;
+    }
+    if (OPERATORS.has(ch)) {
+      this.addOperatorToken(ch, startColumn);
+      return;
+    }
+    if (PUNCTUATION.has(ch)) {
+      this.addToken("PUNCTUATION", ch, startColumn);
+      return;
+    }
+    throw this.error(`Unexpected character '${ch}'`);
+  }
+  addOperatorToken(op, startColumn) {
+    const info = OPERATORS.get(op) || {};
+    this.tokens.push({
+      type: "OPERATOR",
+      value: op,
+      line: this.line,
+      column: startColumn,
+      precedence: info.precedence ?? 0,
+      isUnary: !!info.isUnary,
+      isPostfix: !!info.isPostfix,
+    });
+  }
+  skipComment() {
+    while (!this.isAtEnd() && this.peek() !== "\n") this.advance();
+  }
+  peek() {
+    return this.source[this.position] ?? "\0";
+  }
+  peekNext() {
+    return this.source[this.position + 1] ?? "\0";
+  }
+  advance() {
+    const ch = this.source[this.position++] ?? "\0";
+    this.column++;
+    return ch;
+  }
+  isAtEnd() {
+    return this.position >= this.source.length;
+  }
+  isWhitespace(ch) {
+    return ch === " " || ch === "\t" || ch === "\r";
+  }
+  isDigit(ch) {
+    return ch >= "0" && ch <= "9";
+  }
+  isAlpha(ch) {
+    return /[A-Za-z_]/.test(ch);
+  }
+  isAlphaNumeric(ch) {
+    return /[A-Za-z0-9_]/.test(ch);
+  }
+  addToken(type, value, startColumn = null) {
+    this.tokens.push({
+      type,
+      value,
+      line: this.line,
+      column: startColumn ?? this.column,
+    });
+  }
+  error(msg) {
+    const src = this.source;
+    const formatted = formatError("Lexer", msg, this.line, this.column, src);
+    throw new (CustomError("LexError"))(formatted);
+  }
+}
+module.exports = { Lexer, KEYWORDS, OPERATORS, PUNCTUATION };