npm - agent-working-memory - Versions diffs - 0.3.0 - Mend

agent-working-memory 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

package/LICENSE +21 -0
package/README.md +311 -0
package/dist/api/index.d.ts +2 -0
package/dist/api/index.d.ts.map +1 -0
package/dist/api/index.js +2 -0
package/dist/api/index.js.map +1 -0
package/dist/api/routes.d.ts +53 -0
package/dist/api/routes.d.ts.map +1 -0
package/dist/api/routes.js +388 -0
package/dist/api/routes.js.map +1 -0
package/dist/cli.d.ts +12 -0
package/dist/cli.d.ts.map +1 -0
package/dist/cli.js +245 -0
package/dist/cli.js.map +1 -0
package/dist/core/decay.d.ts +36 -0
package/dist/core/decay.d.ts.map +1 -0
package/dist/core/decay.js +38 -0
package/dist/core/decay.js.map +1 -0
package/dist/core/embeddings.d.ts +33 -0
package/dist/core/embeddings.d.ts.map +1 -0
package/dist/core/embeddings.js +76 -0
package/dist/core/embeddings.js.map +1 -0
package/dist/core/hebbian.d.ts +38 -0
package/dist/core/hebbian.d.ts.map +1 -0
package/dist/core/hebbian.js +74 -0
package/dist/core/hebbian.js.map +1 -0
package/dist/core/index.d.ts +4 -0
package/dist/core/index.d.ts.map +1 -0
package/dist/core/index.js +4 -0
package/dist/core/index.js.map +1 -0
package/dist/core/query-expander.d.ts +24 -0
package/dist/core/query-expander.d.ts.map +1 -0
package/dist/core/query-expander.js +58 -0
package/dist/core/query-expander.js.map +1 -0
package/dist/core/reranker.d.ts +25 -0
package/dist/core/reranker.d.ts.map +1 -0
package/dist/core/reranker.js +75 -0
package/dist/core/reranker.js.map +1 -0
package/dist/core/salience.d.ts +30 -0
package/dist/core/salience.d.ts.map +1 -0
package/dist/core/salience.js +81 -0
package/dist/core/salience.js.map +1 -0
package/dist/engine/activation.d.ts +38 -0
package/dist/engine/activation.d.ts.map +1 -0
package/dist/engine/activation.js +516 -0
package/dist/engine/activation.js.map +1 -0
package/dist/engine/connections.d.ts +31 -0
package/dist/engine/connections.d.ts.map +1 -0
package/dist/engine/connections.js +74 -0
package/dist/engine/connections.js.map +1 -0
package/dist/engine/consolidation-scheduler.d.ts +31 -0
package/dist/engine/consolidation-scheduler.d.ts.map +1 -0
package/dist/engine/consolidation-scheduler.js +115 -0
package/dist/engine/consolidation-scheduler.js.map +1 -0
package/dist/engine/consolidation.d.ts +62 -0
package/dist/engine/consolidation.d.ts.map +1 -0
package/dist/engine/consolidation.js +368 -0
package/dist/engine/consolidation.js.map +1 -0
package/dist/engine/eval.d.ts +22 -0
package/dist/engine/eval.d.ts.map +1 -0
package/dist/engine/eval.js +79 -0
package/dist/engine/eval.js.map +1 -0
package/dist/engine/eviction.d.ts +29 -0
package/dist/engine/eviction.d.ts.map +1 -0
package/dist/engine/eviction.js +86 -0
package/dist/engine/eviction.js.map +1 -0
package/dist/engine/index.d.ts +7 -0
package/dist/engine/index.d.ts.map +1 -0
package/dist/engine/index.js +7 -0
package/dist/engine/index.js.map +1 -0
package/dist/engine/retraction.d.ts +32 -0
package/dist/engine/retraction.d.ts.map +1 -0
package/dist/engine/retraction.js +77 -0
package/dist/engine/retraction.js.map +1 -0
package/dist/engine/staging.d.ts +33 -0
package/dist/engine/staging.d.ts.map +1 -0
package/dist/engine/staging.js +63 -0
package/dist/engine/staging.js.map +1 -0
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +95 -0
package/dist/index.js.map +1 -0
package/dist/mcp.d.ts +24 -0
package/dist/mcp.d.ts.map +1 -0
package/dist/mcp.js +532 -0
package/dist/mcp.js.map +1 -0
package/dist/storage/index.d.ts +2 -0
package/dist/storage/index.d.ts.map +1 -0
package/dist/storage/index.js +2 -0
package/dist/storage/index.js.map +1 -0
package/dist/storage/sqlite.d.ts +116 -0
package/dist/storage/sqlite.d.ts.map +1 -0
package/dist/storage/sqlite.js +750 -0
package/dist/storage/sqlite.js.map +1 -0
package/dist/types/agent.d.ts +30 -0
package/dist/types/agent.d.ts.map +1 -0
package/dist/types/agent.js +23 -0
package/dist/types/agent.js.map +1 -0
package/dist/types/checkpoint.d.ts +50 -0
package/dist/types/checkpoint.d.ts.map +1 -0
package/dist/types/checkpoint.js +8 -0
package/dist/types/checkpoint.js.map +1 -0
package/dist/types/engram.d.ts +165 -0
package/dist/types/engram.d.ts.map +1 -0
package/dist/types/engram.js +8 -0
package/dist/types/engram.js.map +1 -0
package/dist/types/eval.d.ts +84 -0
package/dist/types/eval.d.ts.map +1 -0
package/dist/types/eval.js +11 -0
package/dist/types/eval.js.map +1 -0
package/dist/types/index.d.ts +5 -0
package/dist/types/index.d.ts.map +1 -0
package/dist/types/index.js +5 -0
package/dist/types/index.js.map +1 -0
package/package.json +55 -0
package/src/api/index.ts +1 -0
package/src/api/routes.ts +528 -0
package/src/cli.ts +260 -0
package/src/core/decay.ts +61 -0
package/src/core/embeddings.ts +82 -0
package/src/core/hebbian.ts +91 -0
package/src/core/index.ts +3 -0
package/src/core/query-expander.ts +64 -0
package/src/core/reranker.ts +99 -0
package/src/core/salience.ts +95 -0
package/src/engine/activation.ts +577 -0
package/src/engine/connections.ts +101 -0
package/src/engine/consolidation-scheduler.ts +123 -0
package/src/engine/consolidation.ts +443 -0
package/src/engine/eval.ts +100 -0
package/src/engine/eviction.ts +99 -0
package/src/engine/index.ts +6 -0
package/src/engine/retraction.ts +98 -0
package/src/engine/staging.ts +72 -0
package/src/index.ts +100 -0
package/src/mcp.ts +635 -0
package/src/storage/index.ts +1 -0
package/src/storage/sqlite.ts +893 -0
package/src/types/agent.ts +65 -0
package/src/types/checkpoint.ts +44 -0
package/src/types/engram.ts +194 -0
package/src/types/eval.ts +98 -0
package/src/types/index.ts +4 -0

package/src/cli.ts ADDED Viewed

@@ -0,0 +1,260 @@
+#!/usr/bin/env node
+/**
+ * CLI entrypoint for AgentWorkingMemory.
+ *
+ * Commands:
+ *   awm setup    — configure MCP for the current project
+ *   awm mcp      — start the MCP server (called by Claude Code)
+ *   awm serve    — start the HTTP API server
+ *   awm health   — check if a running server is healthy
+ */
+import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
+import { resolve, basename, join, dirname } from 'node:path';
+import { execSync } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+// Load .env if present
+try {
+  const envPath = resolve(process.cwd(), '.env');
+  const envContent = readFileSync(envPath, 'utf-8');
+  for (const line of envContent.split('\n')) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('#')) continue;
+    const eqIdx = trimmed.indexOf('=');
+    if (eqIdx === -1) continue;
+    const key = trimmed.slice(0, eqIdx).trim();
+    const val = trimmed.slice(eqIdx + 1).trim().replace(/^["']|["']$/g, '');
+    if (!process.env[key]) process.env[key] = val;
+  }
+} catch { /* No .env file */ }
+const args = process.argv.slice(2);
+const command = args[0];
+function printUsage() {
+  console.log(`
+AgentWorkingMemory — Cognitive memory for AI agents
+Usage:
+  awm setup [--agent-id <id>] [--db-path <path>] [--no-claude-md]
+                                                    Configure MCP for current project
+  awm mcp                                           Start MCP server (used by Claude Code)
+  awm serve [--port <port>]                         Start HTTP API server
+  awm health [--port <port>]                        Check server health
+Setup:
+  Run 'awm setup' in your project directory. This creates .mcp.json
+  and appends workflow instructions to CLAUDE.md so Claude Code
+  automatically connects to your memory layer.
+  Use --no-claude-md to skip CLAUDE.md modification.
+  Restart Claude Code after setup to pick up the new MCP server.
+`.trim());
+}
+// ─── SETUP ──────────────────────────────────────
+function setup() {
+  const cwd = process.cwd();
+  const projectName = basename(cwd).toLowerCase().replace(/[^a-z0-9-]/g, '-');
+  // Parse flags
+  let agentId = projectName;
+  let dbPath: string | null = null;
+  let skipClaudeMd = false;
+  for (let i = 1; i < args.length; i++) {
+    if (args[i] === '--agent-id' && args[i + 1]) {
+      agentId = args[++i];
+    } else if (args[i] === '--db-path' && args[i + 1]) {
+      dbPath = args[++i];
+    } else if (args[i] === '--no-claude-md') {
+      skipClaudeMd = true;
+    }
+  }
+  // Find the package root (where src/mcp.ts lives)
+  const packageRoot = resolve(__dirname, '..');
+  const mcpScript = join(packageRoot, 'src', 'mcp.ts');
+  const mcpDist = join(packageRoot, 'dist', 'mcp.js');
+  // Determine DB path — default to <awm-root>/data/memory.db (shared across projects)
+  if (!dbPath) {
+    dbPath = join(packageRoot, 'data', 'memory.db');
+  }
+  const dbDir = dirname(dbPath);
+  // Ensure data directory exists
+  if (!existsSync(dbDir)) {
+    mkdirSync(dbDir, { recursive: true });
+    console.log(`Created data directory: ${dbDir}`);
+  }
+  // Determine command based on platform and whether dist exists
+  const isWindows = process.platform === 'win32';
+  const hasDist = existsSync(mcpDist);
+  let mcpConfig: { command: string; args: string[]; env: Record<string, string> };
+  if (hasDist) {
+    // Use compiled JS (faster startup, no tsx needed)
+    mcpConfig = {
+      command: 'node',
+      args: [mcpDist.replace(/\\/g, '/')],
+      env: {
+        AWM_DB_PATH: dbPath.replace(/\\/g, '/'),
+        AWM_AGENT_ID: agentId,
+      },
+    };
+  } else if (isWindows) {
+    mcpConfig = {
+      command: 'cmd',
+      args: ['/c', 'npx', 'tsx', mcpScript.replace(/\\/g, '/')],
+      env: {
+        AWM_DB_PATH: dbPath.replace(/\\/g, '/'),
+        AWM_AGENT_ID: agentId,
+      },
+    };
+  } else {
+    mcpConfig = {
+      command: 'npx',
+      args: ['tsx', mcpScript],
+      env: {
+        AWM_DB_PATH: dbPath,
+        AWM_AGENT_ID: agentId,
+      },
+    };
+  }
+  // Read or create .mcp.json
+  const mcpJsonPath = join(cwd, '.mcp.json');
+  let existing: any = { mcpServers: {} };
+  if (existsSync(mcpJsonPath)) {
+    try {
+      existing = JSON.parse(readFileSync(mcpJsonPath, 'utf-8'));
+      if (!existing.mcpServers) existing.mcpServers = {};
+    } catch {
+      existing = { mcpServers: {} };
+    }
+  }
+  existing.mcpServers['agent-working-memory'] = mcpConfig;
+  writeFileSync(mcpJsonPath, JSON.stringify(existing, null, 2) + '\n');
+  // Auto-append CLAUDE.md snippet unless --no-claude-md
+  let claudeMdAction = '';
+  const claudeMdSnippet = `
+## Memory (AWM)
+You have persistent memory via the agent-working-memory MCP server.
+- At conversation start: call memory_restore to recover previous context
+- When you learn something important: call memory_write
+- When you need past context: call memory_recall
+- Before long operations: call memory_checkpoint to save your state
+- After using a recalled memory: call memory_feedback (useful/not-useful)
+- To retract incorrect info: call memory_retract
+- To manage tasks: call memory_task_add, memory_task_update, memory_task_list, memory_task_next
+`;
+  const claudeMdPath = join(cwd, 'CLAUDE.md');
+  if (skipClaudeMd) {
+    claudeMdAction = '  CLAUDE.md: skipped (--no-claude-md)';
+  } else if (existsSync(claudeMdPath)) {
+    const content = readFileSync(claudeMdPath, 'utf-8');
+    if (content.includes('## Memory (AWM)')) {
+      claudeMdAction = '  CLAUDE.md: already has AWM section (skipped)';
+    } else {
+      writeFileSync(claudeMdPath, content.trimEnd() + '\n' + claudeMdSnippet);
+      claudeMdAction = '  CLAUDE.md: appended AWM workflow section';
+    }
+  } else {
+    writeFileSync(claudeMdPath, `# ${basename(cwd)}\n${claudeMdSnippet}`);
+    claudeMdAction = '  CLAUDE.md: created with AWM workflow section';
+  }
+  console.log(`
+AWM configured for: ${cwd}
+  Agent ID:   ${agentId}
+  DB path:    ${dbPath}
+  MCP config: ${mcpJsonPath}
+${claudeMdAction}
+Next steps:
+  1. Restart Claude Code to pick up the MCP server
+  2. The memory tools will appear automatically
+`.trim());
+}
+// ─── MCP ──────────────────────────────────────
+async function mcp() {
+  // Dynamic import to avoid loading heavy deps for setup/health commands
+  await import('./mcp.js');
+}
+// ─── SERVE ──────────────────────────────────────
+async function serve() {
+  // Parse --port flag
+  for (let i = 1; i < args.length; i++) {
+    if (args[i] === '--port' && args[i + 1]) {
+      process.env.AWM_PORT = args[++i];
+    }
+  }
+  await import('./index.js');
+}
+// ─── HEALTH ──────────────────────────────────────
+function health() {
+  let port = '8400';
+  for (let i = 1; i < args.length; i++) {
+    if (args[i] === '--port' && args[i + 1]) {
+      port = args[++i];
+    }
+  }
+  try {
+    const result = execSync(`curl -sf http://localhost:${port}/health`, {
+      encoding: 'utf8',
+      timeout: 5000,
+    });
+    const data = JSON.parse(result);
+    console.log(`OK — v${data.version} (${data.timestamp})`);
+  } catch {
+    console.error(`Cannot reach AWM server on port ${port}`);
+    process.exit(1);
+  }
+}
+// ─── Dispatch ──────────────────────────────────────
+switch (command) {
+  case 'setup':
+    setup();
+    break;
+  case 'mcp':
+    mcp();
+    break;
+  case 'serve':
+    serve();
+    break;
+  case 'health':
+    health();
+    break;
+  case '--help':
+  case '-h':
+  case undefined:
+    printUsage();
+    break;
+  default:
+    console.error(`Unknown command: ${command}`);
+    printUsage();
+    process.exit(1);
+}

package/src/core/decay.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * ACT-R Base-Level Activation
+ *
+ * Based on Anderson's ACT-R cognitive architecture (1993).
+ * Memories that are accessed more recently and more frequently
+ * have higher activation — a well-established model of human memory.
+ *
+ * Formula: B(M) = ln(n + 1) - d * ln(ageDays / (n + 1))
+ *
+ * Where:
+ *   n = access count
+ *   d = decay exponent (default 0.5)
+ *   ageDays = age of memory in days
+ */
+export function baseLevelActivation(
+  accessCount: number,
+  ageDays: number,
+  decayExponent: number = 0.5
+): number {
+  const n = Math.max(accessCount, 0);
+  const age = Math.max(ageDays, 0.001); // Avoid log(0)
+  return Math.log(n + 1) - decayExponent * Math.log(age / (n + 1));
+}
+/**
+ * Softplus — smooth approximation of ReLU.
+ * Used to keep activation scores positive without hard clipping.
+ */
+export function softplus(x: number): number {
+  return Math.log(1 + Math.exp(x));
+}
+/**
+ * Composite activation score combining content match, temporal decay,
+ * Hebbian boost, and confidence.
+ *
+ * Score = contentMatch * softplus(B(M) + scale * hebbianBoost) * confidence
+ */
+export function compositeScore(params: {
+  contentMatch: number;
+  accessCount: number;
+  ageDays: number;
+  hebbianBoost: number;
+  confidence: number;
+  decayExponent?: number;
+  hebbianScale?: number;
+}): number {
+  const {
+    contentMatch,
+    accessCount,
+    ageDays,
+    hebbianBoost,
+    confidence,
+    decayExponent = 0.5,
+    hebbianScale = 1.0,
+  } = params;
+  const bm = baseLevelActivation(accessCount, ageDays, decayExponent);
+  return contentMatch * softplus(bm + hebbianScale * hebbianBoost) * confidence;
+}

package/src/core/embeddings.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Embedding Engine — local vector embeddings via transformers.js
+ *
+ * Default: gte-small (384 dimensions, ~34MB int8, MTEB 61.4) for semantic similarity.
+ * Configurable via AWM_EMBED_MODEL env var.
+ * Model is downloaded once on first use and cached locally.
+ *
+ * Singleton pattern — call getEmbedder() to get the shared instance.
+ */
+import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
+const MODEL_ID = process.env.AWM_EMBED_MODEL ?? 'Xenova/all-MiniLM-L6-v2';
+const DIMENSIONS = parseInt(process.env.AWM_EMBED_DIMS ?? '384', 10);
+const POOLING = (process.env.AWM_EMBED_POOLING ?? 'mean') as 'cls' | 'mean';
+let instance: FeatureExtractionPipeline | null = null;
+let initPromise: Promise<FeatureExtractionPipeline> | null = null;
+/**
+ * Get or initialize the embedding pipeline (singleton).
+ * First call downloads the model (~22MB), subsequent calls are instant.
+ */
+export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
+  if (instance) return instance;
+  if (initPromise) return initPromise;
+  initPromise = pipeline('feature-extraction', MODEL_ID, {
+    dtype: 'fp32',
+  }).then(pipe => {
+    instance = pipe;
+    console.log(`Embedding model loaded: ${MODEL_ID} (${DIMENSIONS}d)`);
+    return pipe;
+  });
+  return initPromise;
+}
+/**
+ * Generate an embedding vector for a text string.
+ * Returns a normalized float32 array of length DIMENSIONS.
+ */
+export async function embed(text: string): Promise<number[]> {
+  const embedder = await getEmbedder();
+  const result = await embedder(text, { pooling: POOLING, normalize: true });
+  // result is a Tensor — extract the data
+  return Array.from(result.data as Float32Array).slice(0, DIMENSIONS);
+}
+/**
+ * Generate embeddings for multiple texts in a batch.
+ * More efficient than calling embed() in a loop.
+ */
+export async function embedBatch(texts: string[]): Promise<number[][]> {
+  if (texts.length === 0) return [];
+  const embedder = await getEmbedder();
+  const result = await embedder(texts, { pooling: POOLING, normalize: true });
+  const data = result.data as Float32Array;
+  const vectors: number[][] = [];
+  for (let i = 0; i < texts.length; i++) {
+    vectors.push(Array.from(data.slice(i * DIMENSIONS, (i + 1) * DIMENSIONS)));
+  }
+  return vectors;
+}
+/**
+ * Cosine similarity between two normalized vectors.
+ * Since vectors are pre-normalized, this is just the dot product.
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length !== b.length || a.length === 0) return 0;
+  let dot = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+  }
+  // Clamp to [-1, 1] to handle floating point drift
+  return Math.max(-1, Math.min(1, dot));
+}
+/** Vector dimensions for this model */
+export const EMBEDDING_DIMENSIONS = DIMENSIONS;

package/src/core/hebbian.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Hebbian Learning — "neurons that fire together wire together"
+ *
+ * When two engrams are co-activated (retrieved together in the same
+ * activation query), their association weight increases.
+ *
+ * Log-space weight update prevents runaway growth:
+ *   logNew = log(w) + signal * log(1 + rate)
+ *
+ * Associations decay symmetrically when unused.
+ */
+const MIN_WEIGHT = 0.001;
+const MAX_WEIGHT = 5.0;  // Cap at 5 to prevent graph walk explosion
+/**
+ * Strengthen an association weight after co-activation.
+ */
+export function strengthenAssociation(
+  currentWeight: number,
+  signal: number = 1.0,
+  rate: number = 0.25
+): number {
+  const logW = Math.log(Math.max(currentWeight, MIN_WEIGHT));
+  const logNew = logW + signal * Math.log(1 + rate);
+  return Math.min(Math.exp(logNew), MAX_WEIGHT);
+}
+/**
+ * Weaken an association weight due to lack of co-activation.
+ * Called periodically by the connection engine.
+ */
+export function decayAssociation(
+  currentWeight: number,
+  daysSinceActivation: number,
+  halfLife: number = 7.0 // days
+): number {
+  const decayFactor = Math.pow(0.5, daysSinceActivation / halfLife);
+  return Math.max(currentWeight * decayFactor, MIN_WEIGHT);
+}
+/**
+ * Ring buffer for tracking recent co-activations.
+ * Feeds the Hebbian worker — when two engrams appear in the buffer
+ * within a window, their association is strengthened.
+ */
+export class CoActivationBuffer {
+  private buffer: { engramId: string; timestamp: number }[] = [];
+  private maxSize: number;
+  constructor(maxSize: number = 50) {
+    this.maxSize = maxSize;
+  }
+  push(engramId: string): void {
+    this.buffer.push({ engramId, timestamp: Date.now() });
+    if (this.buffer.length > this.maxSize) {
+      this.buffer.shift();
+    }
+  }
+  pushBatch(engramIds: string[]): void {
+    for (const id of engramIds) {
+      this.push(id);
+    }
+  }
+  /**
+   * Get all pairs of engrams that were co-activated within windowMs.
+   */
+  getCoActivatedPairs(windowMs: number = 5000): [string, string][] {
+    const pairs: [string, string][] = [];
+    for (let i = 0; i < this.buffer.length; i++) {
+      for (let j = i + 1; j < this.buffer.length; j++) {
+        const a = this.buffer[i];
+        const b = this.buffer[j];
+        if (
+          a.engramId !== b.engramId &&
+          Math.abs(a.timestamp - b.timestamp) <= windowMs
+        ) {
+          pairs.push([a.engramId, b.engramId]);
+        }
+      }
+    }
+    return pairs;
+  }
+  clear(): void {
+    this.buffer = [];
+  }
+}

package/src/core/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export * from './decay.js';
+export * from './hebbian.js';
+export * from './salience.js';

package/src/core/query-expander.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * Query Expander — rewrites queries with synonyms and related terms.
+ *
+ * Uses Xenova/flan-t5-small (~80MB ONNX) to expand search queries
+ * with related terms that improve BM25 recall.
+ *
+ * Example: "What is Caroline's identity?" →
+ *   "What is Caroline's identity? Caroline personal gender transgender self"
+ *
+ * Singleton pattern — call getExpander() to get the shared instance.
+ */
+import { pipeline, type Text2TextGenerationPipeline } from '@huggingface/transformers';
+const MODEL_ID = 'Xenova/flan-t5-small';
+let instance: Text2TextGenerationPipeline | null = null;
+let initPromise: Promise<Text2TextGenerationPipeline> | null = null;
+/**
+ * Get or initialize the text generation pipeline (singleton).
+ * First call downloads the model (~80MB), subsequent calls are instant.
+ */
+export async function getExpander(): Promise<Text2TextGenerationPipeline> {
+  if (instance) return instance;
+  if (initPromise) return initPromise;
+  initPromise = pipeline('text2text-generation', MODEL_ID, {
+    dtype: 'fp32',
+  }).then(pipe => {
+    instance = pipe as Text2TextGenerationPipeline;
+    console.log(`Query expander loaded: ${MODEL_ID}`);
+    return instance;
+  });
+  return initPromise;
+}
+/**
+ * Expand a query with related terms and synonyms.
+ * Returns the original query + generated expansion terms.
+ * Falls back to the original query on any error.
+ */
+export async function expandQuery(originalQuery: string): Promise<string> {
+  try {
+    const expander = await getExpander();
+    const prompt = `Expand this search query with synonyms and related terms. Only output the additional terms, not the original query. Query: ${originalQuery}. Additional terms:`;
+    const result = await expander(prompt, {
+      max_new_tokens: 25,
+      no_repeat_ngram_size: 2,
+    });
+    const expanded = Array.isArray(result) ? (result[0] as any)?.generated_text ?? '' : '';
+    const cleanExpanded = expanded.trim();
+    if (cleanExpanded && cleanExpanded.length > 2) {
+      return `${originalQuery} ${cleanExpanded}`;
+    }
+    return originalQuery;
+  } catch {
+    return originalQuery;
+  }
+}

package/src/core/reranker.ts ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * Cross-Encoder Re-Ranker — scores (query, passage) pairs for relevance.
+ *
+ * Uses Xenova/ms-marco-MiniLM-L-6-v2 (~22MB ONNX) which is trained on
+ * MS-MARCO passage ranking. Unlike bi-encoders, cross-encoders see both
+ * query and passage together via full attention — much better at judging
+ * if a passage actually answers a question.
+ *
+ * Uses direct tokenizer + model inference (NOT the text-classification
+ * pipeline, which doesn't support text_pair and returns identical scores).
+ *
+ * Singleton pattern — call getReranker() to get the shared instance.
+ */
+import {
+  AutoTokenizer,
+  AutoModelForSequenceClassification,
+  type PreTrainedTokenizer,
+  type PreTrainedModel,
+} from '@huggingface/transformers';
+const DEFAULT_MODEL = 'Xenova/ms-marco-MiniLM-L-6-v2';
+const MODEL_ID = process.env.AWM_RERANKER_MODEL || DEFAULT_MODEL;
+let tokenizer: PreTrainedTokenizer | null = null;
+let model: PreTrainedModel | null = null;
+let initPromise: Promise<void> | null = null;
+async function ensureLoaded(): Promise<void> {
+  if (tokenizer && model) return;
+  if (initPromise) return initPromise;
+  initPromise = (async () => {
+    tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
+    model = await AutoModelForSequenceClassification.from_pretrained(MODEL_ID, {
+      dtype: 'fp32',
+    });
+    console.log(`Re-ranker model loaded: ${MODEL_ID}`);
+  })();
+  return initPromise;
+}
+/** Kept for backwards compat — returns the model (unused externally). */
+export async function getReranker(): Promise<any> {
+  await ensureLoaded();
+  return model;
+}
+export interface RerankResult {
+  index: number;
+  score: number; // sigmoid-normalized relevance (0-1)
+}
+function sigmoid(x: number): number {
+  return 1 / (1 + Math.exp(-x));
+}
+/**
+ * Re-rank candidate passages against a query using the cross-encoder.
+ * Returns results sorted by relevance score (descending).
+ */
+export async function rerank(
+  query: string,
+  passages: string[],
+): Promise<RerankResult[]> {
+  if (passages.length === 0) return [];
+  await ensureLoaded();
+  const results: RerankResult[] = [];
+  for (let i = 0; i < passages.length; i++) {
+    try {
+      // Tokenize as a query-passage PAIR using text_pair
+      const inputs = tokenizer!(query, {
+        text_pair: passages[i],
+        padding: true,
+        truncation: true,
+        return_tensors: 'pt',
+      });
+      const output = await model!(inputs);
+      // Model outputs raw logits — extract the single relevance logit
+      const logits = output.logits ?? output.last_hidden_state;
+      const rawLogit = logits.data[0] as number;
+      // Apply sigmoid to map to 0-1 probability
+      results.push({ index: i, score: sigmoid(rawLogit) });
+    } catch {
+      results.push({ index: i, score: 0 });
+    }
+  }
+  // Sort by score descending
+  results.sort((a, b) => b.score - a.score);
+  return results;
+}