npm - @sprig-and-prose/prose-parser - Versions diffs - 0.1.0 - Mend

@sprig-and-prose/prose-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/biome.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+  "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+  "organizeImports": {
+    "enabled": true
+  },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true
+    }
+  },
+  "formatter": {
+    "enabled": true,
+    "indentStyle": "space",
+    "indentWidth": 2
+  },
+  "javascript": {
+    "formatter": {
+      "quoteStyle": "single",
+      "semicolons": "always"
+    }
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,21 @@
+{
+  "name": "@sprig-and-prose/prose-parser",
+  "version": "0.1.0",
+  "type": "module",
+  "description": "Generic prose scanner and parser core for sprig",
+  "main": "src/index.js",
+  "scripts": {
+    "format": "biome format . --write",
+    "lint": "biome lint .",
+    "typecheck": "tsc -p tsconfig.json",
+    "test": "node --test"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {},
+  "devDependencies": {
+    "@biomejs/biome": "^1.9.4",
+    "typescript": "^5.7.2"
+  }
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * @fileoverview Public API for prose-parser
+ */
+export { scan } from './scanner.js';
+export { ParserCore } from './parser-core.js';
+export { createSpan, createPointSpan, mergeSpans } from './util/span.js';
+export { normalizeProseBlock, dedentPreserve } from './util/text.js';

package/src/parser-core.js ADDED Viewed

@@ -0,0 +1,304 @@
+/**
+ * @fileoverview Generic parser core with token navigation and raw content block parsing
+ */
+/**
+ * @typedef {import('./scanner.js').Token} Token
+ */
+/**
+ * @typedef {Object} SourceSpan
+ * @property {string} file - File path
+ * @property {{ line: number, col: number, offset: number }} start - Start position
+ * @property {{ line: number, col: number, offset: number }} end - End position
+ */
+/**
+ * @typedef {Object} Diagnostic
+ * @property {'error' | 'warning'} severity - Diagnostic severity
+ * @property {string} message - Diagnostic message
+ * @property {SourceSpan} [source] - Optional source span
+ */
+/**
+ * Core parser class with low-level token navigation utilities
+ */
+export class ParserCore {
+  /**
+   * @param {Token[]} tokens
+   * @param {string} filePath
+   * @param {string} sourceText
+   */
+  constructor(tokens, filePath, sourceText) {
+    this.tokens = tokens;
+    this.filePath = filePath;
+    this.sourceText = sourceText;
+    this.index = 0;
+    this.diagnostics = [];
+  }
+  /**
+   * @returns {Token | null}
+   */
+  peek() {
+    if (this.index >= this.tokens.length) {
+      return null;
+    }
+    return this.tokens[this.index];
+  }
+  /**
+   * @returns {Token | null}
+   */
+  previous() {
+    if (this.index === 0) {
+      return null;
+    }
+    return this.tokens[this.index - 1];
+  }
+  /**
+   * @returns {Token | null}
+   */
+  advance() {
+    if (this.index >= this.tokens.length) {
+      return null;
+    }
+    return this.tokens[this.index++];
+  }
+  /**
+   * @returns {boolean}
+   */
+  isAtEnd() {
+    const token = this.peek();
+    return token === null || token.type === 'EOF';
+  }
+  /**
+   * @param {string} type
+   * @param {string} [value]
+   * @returns {boolean}
+   */
+  match(type, value) {
+    const token = this.peek();
+    if (!token || token.type !== type) {
+      return false;
+    }
+    if (value !== undefined && token.value !== value) {
+      return false;
+    }
+    return true;
+  }
+  /**
+   * @param {string} type
+   * @param {string} [value]
+   * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
+   */
+  expect(type, value) {
+    const token = this.peek();
+    if (!token || token.type !== type) {
+      const diagnostic = {
+        severity: 'error',
+        message: `Expected ${type}${value !== undefined ? ` with value "${value}"` : ''}, got ${token ? token.type : 'EOF'}`,
+        source: token ? token.span : undefined,
+      };
+      this.diagnostics.push(diagnostic);
+      return { token: null, diagnostic };
+    }
+    if (value !== undefined && token.value !== value) {
+      const diagnostic = {
+        severity: 'error',
+        message: `Expected ${type} with value "${value}", got "${token.value}"`,
+        source: token.span,
+      };
+      this.diagnostics.push(diagnostic);
+      return { token: null, diagnostic };
+    }
+    // Success: advance and return token
+    this.advance();
+    return { token, diagnostic: null };
+  }
+  /**
+   * @param {string} [value]
+   * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
+   */
+  expectIdentifierOrKeyword(value) {
+    const token = this.peek();
+    if (!token || (token.type !== 'KEYWORD' && token.type !== 'IDENTIFIER')) {
+      const diagnostic = {
+        severity: 'error',
+        message: `Expected identifier or keyword${value !== undefined ? ` "${value}"` : ''}, got ${token ? token.type : 'EOF'}`,
+        source: token ? token.span : undefined,
+      };
+      this.diagnostics.push(diagnostic);
+      return { token: null, diagnostic };
+    }
+    if (value !== undefined && token.value !== value) {
+      const diagnostic = {
+        severity: 'error',
+        message: `Expected identifier or keyword "${value}", got "${token.value}"`,
+        source: token.span,
+      };
+      this.diagnostics.push(diagnostic);
+      return { token: null, diagnostic };
+    }
+    // Success: advance and return token
+    this.advance();
+    return { token, diagnostic: null };
+  }
+  /**
+   * @param {string} value
+   * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
+   */
+  expectKindToken(value) {
+    return this.expectIdentifierOrKeyword(value);
+  }
+  /**
+   * Reads an identifier name (IDENTIFIER or KEYWORD)
+   * @returns {string | null}
+   */
+  readIdent() {
+    const token = this.peek();
+    if (!token || (token.type !== 'IDENTIFIER' && token.type !== 'KEYWORD')) {
+      return null;
+    }
+    this.advance();
+    return token.value;
+  }
+  /**
+   * Consumes an identifier, reporting diagnostic if missing
+   * @returns {string | null}
+   */
+  consumeIdentifier() {
+    const { token, diagnostic } = this.expectIdentifierOrKeyword();
+    return token ? token.value : null;
+  }
+  /**
+   * Parses an identifier path (IDENTIFIER (DOT IDENTIFIER)*)
+   * @returns {string | null}
+   */
+  parseIdentifierPath() {
+    if (!this.match('IDENTIFIER') && !this.match('KEYWORD')) {
+      return null;
+    }
+    const parts = [];
+    const firstToken = this.advance();
+    if (firstToken) {
+      parts.push(firstToken.value);
+    }
+    while (this.match('DOT')) {
+      this.advance(); // consume DOT
+      if (this.match('IDENTIFIER') || this.match('KEYWORD')) {
+        const partToken = this.advance();
+        if (partToken) {
+          parts.push(partToken.value);
+        }
+      } else {
+        break;
+      }
+    }
+    return parts.join('.');
+  }
+  /**
+   * Reports a diagnostic
+   * @param {'error' | 'warning'} severity
+   * @param {string} message
+   * @param {SourceSpan} [span]
+   */
+  reportDiagnostic(severity, message, span) {
+    this.diagnostics.push({
+      severity,
+      message,
+      source: span,
+    });
+  }
+  /**
+   * Creates a span from start and end tokens
+   * @param {Token} startToken
+   * @param {Token} endToken
+   * @returns {SourceSpan}
+   */
+  createSpan(startToken, endToken) {
+    return {
+      file: this.filePath,
+      start: startToken.span.start,
+      end: endToken.span.end,
+    };
+  }
+  /**
+   * Creates a span from a single token
+   * @param {Token} token
+   * @returns {SourceSpan}
+   */
+  spanFromToken(token) {
+    return token.span;
+  }
+  /**
+   * Parses raw content block by brace matching (returns spans only, no raw string)
+   * @param {string} kind - Block kind ('describe', 'title', 'note')
+   * @param {Token} keywordToken - The keyword token
+   * @returns {{ kind: string, contentSpan: { startOffset: number, endOffset: number }, span: SourceSpan } | null}
+   */
+  parseRawContentBlock(kind, keywordToken) {
+    const { token: lbrace, diagnostic } = this.expect('LBRACE');
+    if (!lbrace) {
+      return null;
+    }
+    // Find matching closing brace by tracking depth
+    let depth = 1;
+    const startOffset = lbrace.span.end.offset;
+    let endOffset = startOffset;
+    let endToken = null;
+    while (depth > 0 && this.index < this.tokens.length) {
+      const token = this.tokens[this.index];
+      if (token.type === 'EOF') break;
+      if (token.type === 'LBRACE') {
+        depth++;
+        this.index++;
+      } else if (token.type === 'RBRACE') {
+        depth--;
+        if (depth === 0) {
+          endToken = token;
+          endOffset = token.span.start.offset;
+          this.index++;
+          break;
+        } else {
+          this.index++;
+        }
+      } else {
+        this.index++;
+      }
+    }
+    if (depth > 0) {
+      this.reportDiagnostic('error', `Unclosed ${kind} block`, keywordToken.span);
+      return null;
+    }
+    return {
+      kind,
+      contentSpan: {
+        startOffset,
+        endOffset,
+      },
+      span: this.createSpan(keywordToken, endToken || lbrace),
+    };
+  }
+}

package/src/scanner.js ADDED Viewed

@@ -0,0 +1,309 @@
+/**
+ * @fileoverview Generic tokenizer/scanner for prose-like syntax
+ */
+/**
+ * @typedef {Object} Token
+ * @property {string} type - Token type
+ * @property {string} value - Token value
+ * @property {SourceSpan} span - Source span
+ */
+/**
+ * @typedef {Object} SourceSpan
+ * @property {string} file - File path
+ * @property {{ line: number, col: number, offset: number }} start - Start position
+ * @property {{ line: number, col: number, offset: number }} end - End position
+ */
+/**
+ * Scans input text and returns tokens with source spans.
+ * If options.keywords (Set<string>) is provided, identifiers matching a keyword get type 'KEYWORD'; otherwise they stay 'IDENTIFIER'.
+ *
+ * @param {string} text - Input text
+ * @param {string} file - File path
+ * @param {{ keywords?: Set<string> }} [options] - Optional: keywords set for KEYWORD vs IDENTIFIER
+ * @returns {Token[]}
+ */
+export function scan(text, file, options) {
+  const keywords = options?.keywords;
+  const tokens = [];
+  let offset = 0;
+  let line = 1;
+  let col = 1;
+  while (offset < text.length) {
+    const startOffset = offset;
+    const startLine = line;
+    const startCol = col;
+    const ch = text[offset];
+    // Skip whitespace (but track newlines for span calculations)
+    if (/\s/.test(ch)) {
+      if (ch === '\n') {
+        line++;
+        col = 1;
+      } else {
+        col++;
+      }
+      offset++;
+      continue;
+    }
+    // Block comment {* ... *} (no nesting; {* inside comment is plain text)
+    if (ch === '{' && offset + 1 < text.length && text[offset + 1] === '*') {
+      offset += 2;
+      col += 2;
+      let foundEnd = false;
+      while (offset < text.length) {
+        if (text[offset] === '*' && offset + 1 < text.length && text[offset + 1] === '}') {
+          offset += 2;
+          col += 2;
+          foundEnd = true;
+          break;
+        }
+        if (text[offset] === '\n') {
+          line++;
+          col = 1;
+        } else {
+          col++;
+        }
+        offset++;
+      }
+      if (!foundEnd) {
+        throw new Error(
+          `Unterminated block comment: expected \`*}\` before end of file at ${file}:${startLine}:${startCol}`,
+        );
+      }
+      continue;
+    }
+    // Stray *} (no matching {*)
+    if (ch === '*' && offset + 1 < text.length && text[offset + 1] === '}') {
+      throw new Error(`Stray \`*}\`: no matching \`{*\` at ${file}:${line}:${col}`);
+    }
+    // Single-quoted strings
+    // Only treat as string delimiter if it's clearly a string (not a contraction)
+    // A contraction is when a quote appears between two letters/digits
+    if (ch === "'") {
+      const prevCh = startOffset > 0 ? text[startOffset - 1] : null;
+      const nextCh = offset + 1 < text.length ? text[offset + 1] : null;
+      // If quote is between letters/digits, it's a contraction - don't parse as string
+      // Let it be handled by identifier parsing below
+      if (prevCh && /[A-Za-z0-9]/.test(prevCh) && nextCh && /[A-Za-z0-9]/.test(nextCh)) {
+        // This is a contraction, fall through to identifier parsing
+      } else {
+        // This looks like a string delimiter
+        offset++;
+        col++;
+        let value = '';
+        let escaped = false;
+        while (offset < text.length) {
+          const c = text[offset];
+          if (escaped) {
+            if (c === "'" || c === '\\') {
+              value += c;
+            } else {
+              value += '\\' + c;
+            }
+            escaped = false;
+            offset++;
+            col++;
+          } else if (c === '\\') {
+            escaped = true;
+            offset++;
+            col++;
+          } else if (c === "'") {
+            offset++;
+            col++;
+            break;
+          } else {
+            value += c;
+            offset++;
+            col++;
+          }
+        }
+        tokens.push({
+          type: 'STRING',
+          value,
+          span: {
+            file,
+            start: { line: startLine, col: startCol, offset: startOffset },
+            end: { line, col, offset },
+          },
+        });
+        continue;
+      }
+    }
+    // Braces
+    if (ch === '{') {
+      tokens.push({
+        type: 'LBRACE',
+        value: '{',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    if (ch === '}') {
+      tokens.push({
+        type: 'RBRACE',
+        value: '}',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    // Brackets
+    if (ch === '[') {
+      tokens.push({
+        type: 'LBRACKET',
+        value: '[',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    if (ch === ']') {
+      tokens.push({
+        type: 'RBRACKET',
+        value: ']',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    // Dot (for namespace paths)
+    if (ch === '.') {
+      tokens.push({
+        type: 'DOT',
+        value: '.',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    // Comma
+    if (ch === ',') {
+      tokens.push({
+        type: 'COMMA',
+        value: ',',
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col: col + 1, offset: offset + 1 },
+        },
+      });
+      offset++;
+      col++;
+      continue;
+    }
+    // Numbers (integer literals)
+    if (/[0-9]/.test(ch) || (ch === '-' && /[0-9]/.test(text[offset + 1] || ''))) {
+      let value = '';
+      if (ch === '-') {
+        value += ch;
+        offset++;
+        col++;
+      }
+      while (offset < text.length && /[0-9]/.test(text[offset])) {
+        value += text[offset];
+        offset++;
+        col++;
+      }
+      tokens.push({
+        type: 'NUMBER',
+        value,
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col, offset },
+        },
+      });
+      continue;
+    }
+    // Identifiers and keywords (including contractions with apostrophes)
+    if (/[A-Za-z_]/.test(ch)) {
+      let value = '';
+      while (
+        offset < text.length &&
+        ((/[A-Za-z0-9_]/.test(text[offset]) ||
+          (text[offset] === "'" &&
+            offset + 1 < text.length &&
+            /[A-Za-z0-9]/.test(text[offset + 1]))))
+      ) {
+        value += text[offset];
+        offset++;
+        col++;
+      }
+      const type = keywords && keywords.has(value) ? 'KEYWORD' : 'IDENTIFIER';
+      tokens.push({
+        type,
+        value,
+        span: {
+          file,
+          start: { line: startLine, col: startCol, offset: startOffset },
+          end: { line, col, offset },
+        },
+      });
+      continue;
+    }
+    // Unknown character - skip (tolerant parsing)
+    offset++;
+    col++;
+  }
+  // EOF token
+  tokens.push({
+    type: 'EOF',
+    value: '',
+    span: {
+      file,
+      start: { line, col, offset },
+      end: { line, col, offset },
+    },
+  });
+  return tokens;
+}

package/src/util/span.js ADDED Viewed

@@ -0,0 +1,45 @@
+/**
+ * @fileoverview Source span utilities for tracking source locations
+ */
+/**
+ * @typedef {Object} SourceSpan
+ * @property {string} file - File path
+ * @property {{ line: number, col: number, offset: number }} start - Start position
+ * @property {{ line: number, col: number, offset: number }} end - End position
+ */
+/**
+ * Creates a source span from start and end positions
+ * @param {string} file - File path
+ * @param {{ line: number, col: number, offset: number }} start - Start position
+ * @param {{ line: number, col: number, offset: number }} end - End position
+ * @returns {SourceSpan}
+ */
+export function createSpan(file, start, end) {
+  return { file, start, end };
+}
+/**
+ * Creates a zero-length span at a position
+ * @param {string} file - File path
+ * @param {{ line: number, col: number, offset: number }} pos - Position
+ * @returns {SourceSpan}
+ */
+export function createPointSpan(file, pos) {
+  return { file, start: pos, end: pos };
+}
+/**
+ * Merges two spans (from start of first to end of second)
+ * @param {SourceSpan} span1 - First span
+ * @param {SourceSpan} span2 - Second span
+ * @returns {SourceSpan}
+ */
+export function mergeSpans(span1, span2) {
+  return {
+    file: span1.file,
+    start: span1.start,
+    end: span2.end,
+  };
+}

package/src/util/text.js ADDED Viewed

@@ -0,0 +1,125 @@
+/**
+ * @fileoverview Text normalization utilities
+ */
+/**
+ * Counts leading whitespace, treating tabs as 2 spaces.
+ * @param {string} line - Line to count indentation for
+ * @returns {number} - Indentation count (spaces + tabs*2)
+ */
+function countIndent(line) {
+  let count = 0;
+  for (let i = 0; i < line.length; i++) {
+    if (line[i] === ' ') {
+      count++;
+    } else if (line[i] === '\t') {
+      count += 2; // Treat tabs as 2 spaces
+    } else {
+      break;
+    }
+  }
+  return count;
+}
+/**
+ * Removes exactly `amount` worth of leading whitespace (spaces/tabs).
+ * Treats tabs as 2 spaces when removing.
+ * @param {string} line - Line to remove indentation from
+ * @param {number} amount - Amount of indentation to remove (in space units)
+ * @returns {string} - Line with indentation removed
+ */
+function removeIndent(line, amount) {
+  let removed = 0;
+  let i = 0;
+  while (i < line.length && removed < amount) {
+    if (line[i] === ' ') {
+      removed++;
+      i++;
+    } else if (line[i] === '\t') {
+      removed += 2; // Treat tabs as 2 spaces
+      i++;
+      // If we overshoot, we've removed the tab, which is fine
+    } else {
+      break;
+    }
+  }
+  return line.slice(i);
+}
+/**
+ * Normalizes prose blocks by removing common indentation from lines after the first non-empty line.
+ * This handles the common case where the first line is flush-left (indent 0) and subsequent lines
+ * are indented due to code formatting.
+ *
+ * Algorithm:
+ * 1. Split raw by '\n' into lines
+ * 2. Find the first non-empty line index i0
+ * 3. Compute minIndentAfter from lines AFTER i0 (only non-empty lines)
+ * 4. Keep lines[0..i0] unchanged, then for lines after i0, remove minIndentAfter worth of indentation
+ *
+ * @param {string} raw - Raw text to normalize
+ * @returns {string} - Normalized text
+ */
+export function normalizeProseBlock(raw) {
+  const lines = raw.split('\n');
+  // Find first non-empty line index
+  let i0 = -1;
+  for (let i = 0; i < lines.length; i++) {
+    if (lines[i].trim().length > 0) {
+      i0 = i;
+      break;
+    }
+  }
+  // If no non-empty line found, return raw unchanged
+  if (i0 === -1) {
+    return raw;
+  }
+  // Compute minIndentAfter from lines AFTER i0 (only non-empty lines)
+  // Use the minimum indent to preserve relative indentation while removing common prefix
+  let minIndentAfter = null;
+  for (let i = i0 + 1; i < lines.length; i++) {
+    if (lines[i].trim().length > 0) {
+      const indent = countIndent(lines[i]);
+      if (minIndentAfter === null || indent < minIndentAfter) {
+        minIndentAfter = indent;
+      }
+    }
+  }
+  // If no non-empty lines after i0, return raw unchanged
+  if (minIndentAfter === null || minIndentAfter === 0) {
+    return raw;
+  }
+  // Produce output: keep lines[0..i0] unchanged, then remove minIndentAfter from subsequent lines
+  const result = [];
+  for (let i = 0; i < lines.length; i++) {
+    if (i <= i0) {
+      // Keep lines up to and including first non-empty line unchanged
+      result.push(lines[i]);
+    } else {
+      // For lines after i0
+      if (lines[i].trim().length === 0) {
+        // Empty/whitespace-only lines: keep as empty string
+        result.push('');
+      } else {
+        // Remove exactly minIndentAfter worth of indentation
+        result.push(removeIndent(lines[i], minIndentAfter));
+      }
+    }
+  }
+  return result.join('\n');
+}
+/**
+ * @deprecated Use normalizeProseBlock instead
+ * @param {string} raw - Raw text to dedent
+ * @returns {string} - Dedented text
+ */
+export function dedentPreserve(raw) {
+  return normalizeProseBlock(raw);
+}

package/test/parser-core.test.js ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * @fileoverview Tests for prose-parser ParserCore
+ */
+import { test } from 'node:test';
+import { scan, ParserCore } from '../src/index.js';
+test('ParserCore parseIdentifierPath parses single identifier', () => {
+  const tokens = scan('foo', 'test.prose');
+  const parser = new ParserCore(tokens, 'test.prose', 'foo');
+  const path = parser.parseIdentifierPath();
+  assert(path === 'foo', 'path should be foo');
+});
+test('ParserCore parseIdentifierPath parses dotted path', () => {
+  const tokens = scan('a.b.c', 'test.prose');
+  const parser = new ParserCore(tokens, 'test.prose', 'a.b.c');
+  const path = parser.parseIdentifierPath();
+  assert(path === 'a.b.c', 'path should be a.b.c');
+});
+test('ParserCore parseRawContentBlock parses simple brace block', () => {
+  const tokens = scan("describe { inner }", 'test.prose');
+  const parser = new ParserCore(tokens, 'test.prose', "describe { inner }");
+  const keywordToken = tokens[0];
+  assert(keywordToken.type === 'IDENTIFIER' && keywordToken.value === 'describe', 'first token should be describe');
+  parser.advance(); // consume describe
+  const block = parser.parseRawContentBlock('describe', keywordToken);
+  assert(block !== null, 'block should be parsed');
+  assert(block.kind === 'describe', 'kind should be describe');
+  assert(block.contentSpan.startOffset < block.contentSpan.endOffset, 'contentSpan should have range');
+});
+test('ParserCore expect reports diagnostic on mismatch', () => {
+  const tokens = scan('foo', 'test.prose');
+  const parser = new ParserCore(tokens, 'test.prose', 'foo');
+  const { token, diagnostic } = parser.expect('LBRACE');
+  assert(token === null, 'token should be null');
+  assert(diagnostic !== null, 'diagnostic should be set');
+  assert(parser.diagnostics.length === 1, 'diagnostics should have one entry');
+});
+function assert(condition, message) {
+  if (!condition) {
+    throw new Error(message);
+  }
+}

package/test/scanner.test.js ADDED Viewed

@@ -0,0 +1,123 @@
+/**
+ * @fileoverview Tests for prose-parser scanner
+ */
+import { test } from 'node:test';
+import { deepStrictEqual, throws } from 'node:assert/strict';
+import { scan } from '../src/scanner.js';
+test('scan returns EOF for empty input', () => {
+  const tokens = scan('', 'test.prose');
+  assert(tokens.length === 1, 'should have one token');
+  assert(tokens[0].type === 'EOF', 'should be EOF');
+});
+test('scan tokenizes braces and comma without keywords', () => {
+  const tokens = scan(' { } , ', 'test.prose');
+  const types = tokens.map((t) => t.type);
+  assert(types.includes('LBRACE'), 'should have LBRACE');
+  assert(types.includes('RBRACE'), 'should have RBRACE');
+  assert(types.includes('COMMA'), 'should have COMMA');
+  assert(types[types.length - 1] === 'EOF', 'should end with EOF');
+});
+test('scan treats identifiers as IDENTIFIER when no keywords option', () => {
+  const tokens = scan('universe book', 'test.prose');
+  const idents = tokens.filter((t) => t.type === 'IDENTIFIER' || t.type === 'KEYWORD');
+  assert(idents.length === 2, 'should have two identifier-like tokens');
+  assert(idents[0].value === 'universe', 'first should be universe');
+  assert(idents[1].value === 'book', 'second should be book');
+  const hasKeyword = tokens.some((t) => t.type === 'KEYWORD');
+  assert(!hasKeyword, 'without keywords option, none should be KEYWORD');
+});
+test('scan treats keyword as KEYWORD when keywords option provided', () => {
+  const keywords = new Set(['universe', 'book']);
+  const tokens = scan('universe book foo', 'test.prose', { keywords });
+  const universeTok = tokens.find((t) => t.value === 'universe');
+  const bookTok = tokens.find((t) => t.value === 'book');
+  const fooTok = tokens.find((t) => t.value === 'foo');
+  assert(universeTok && universeTok.type === 'KEYWORD', 'universe should be KEYWORD');
+  assert(bookTok && bookTok.type === 'KEYWORD', 'book should be KEYWORD');
+  assert(fooTok && fooTok.type === 'IDENTIFIER', 'foo should be IDENTIFIER');
+});
+test('scan tokenizes single-quoted string', () => {
+  const tokens = scan("'hello'", 'test.prose');
+  const str = tokens.find((t) => t.type === 'STRING');
+  assert(str !== undefined, 'should have STRING token');
+  assert(str.value === 'hello', 'string value should be hello');
+});
+test('scan tokenizes dot for paths', () => {
+  const tokens = scan('a.b', 'test.prose');
+  const types = tokens.map((t) => t.type);
+  assert(types.includes('IDENTIFIER') || types.includes('KEYWORD'), 'should have identifier');
+  assert(types.includes('DOT'), 'should have DOT');
+});
+test('scan skips block comment: comment-only produces EOF only', () => {
+  const tokens = scan('{* hello *}', 'test.prose');
+  assert(tokens.length === 1, 'should have one token (EOF)');
+  assert(tokens[0].type === 'EOF', 'should be EOF');
+});
+test('scan skips block comment: comments between tokens', () => {
+  const withComment = scan('scene X { {* c *} actors { } }', 'test.prose');
+  const withoutComment = scan('scene X { actors { } }', 'test.prose');
+  const typesWith = withComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
+  const typesWithout = withoutComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
+  deepStrictEqual(typesWith, typesWithout, 'token types should match');
+});
+test('scan skips block comment: comment after tokens', () => {
+  const tokens = scan('kind { [ PlayerSkill ] } {* array of PlayerSkill *}', 'test.prose');
+  const nonEof = tokens.filter((t) => t.type !== 'EOF');
+  assert(nonEof.some((t) => t.value === 'kind'), 'should have kind');
+  assert(nonEof.some((t) => t.value === 'PlayerSkill'), 'should have PlayerSkill');
+  assert(tokens[tokens.length - 1].type === 'EOF', 'should end with EOF');
+});
+test('scan skips block comment: comments inside nested blocks', () => {
+  const withComment = scan(
+    'many { from { Skills[] } {* source *} by { id } matches { row.skillId } }',
+    'test.prose',
+  );
+  const withoutComment = scan(
+    'many { from { Skills[] } by { id } matches { row.skillId } }',
+    'test.prose',
+  );
+  const typesWith = withComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
+  const typesWithout = withoutComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
+  deepStrictEqual(typesWith, typesWithout, 'token types should match');
+});
+test('scan throws on unterminated block comment', () => {
+  throws(
+    () => scan('{* never ends', 'test.prose'),
+    (err) => {
+      assert(err instanceof Error);
+      assert(err.message.includes('Unterminated block comment'));
+      assert(err.message.includes('test.prose'));
+      return true;
+    },
+  );
+});
+test('scan throws on stray *}', () => {
+  throws(
+    () => scan('foo *} bar', 'test.prose'),
+    (err) => {
+      assert(err instanceof Error);
+      assert(err.message.includes('Stray'));
+      assert(err.message.includes('test.prose'));
+      return true;
+    },
+  );
+});
+function assert(condition, message) {
+  if (!condition) {
+    throw new Error(message);
+  }
+}

package/test/text.test.js ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * @fileoverview Tests for prose-parser text utils
+ */
+import { test } from 'node:test';
+import { normalizeProseBlock } from '../src/index.js';
+test('normalizeProseBlock returns empty string unchanged', () => {
+  assert(normalizeProseBlock('') === '', 'empty string');
+});
+test('normalizeProseBlock removes common indent', () => {
+  const raw = 'line0\n  line1\n  line2';
+  const out = normalizeProseBlock(raw);
+  assert(out === 'line0\nline1\nline2', 'should dedent second and third lines');
+});
+test('normalizeProseBlock preserves first line and empty lines', () => {
+  const raw = 'first\n\n  indented';
+  const out = normalizeProseBlock(raw);
+  assert(out.split('\n')[0] === 'first', 'first line unchanged');
+  assert(out.split('\n')[1] === '', 'empty line preserved');
+});
+function assert(condition, message) {
+  if (!condition) {
+    throw new Error(message);
+  }
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "checkJs": true,
+    "allowJs": true,
+    "noEmit": true,
+    "strict": true,
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "node",
+    "esModuleInterop": true,
+    "skipLibCheck": true
+  },
+  "include": ["src/**/*", "test/**/*"]
+}