npm - agent-working-memory - Versions diffs - 0.5.4 → 0.5.6 - Mend

agent-working-memory 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +87 -46
package/dist/api/routes.d.ts.map +1 -1
package/dist/api/routes.js +21 -5
package/dist/api/routes.js.map +1 -1
package/dist/cli.js +67 -67
package/dist/coordination/index.d.ts +11 -0
package/dist/coordination/index.d.ts.map +1 -0
package/dist/coordination/index.js +39 -0
package/dist/coordination/index.js.map +1 -0
package/dist/coordination/mcp-tools.d.ts +8 -0
package/dist/coordination/mcp-tools.d.ts.map +1 -0
package/dist/coordination/mcp-tools.js +216 -0
package/dist/coordination/mcp-tools.js.map +1 -0
package/dist/coordination/routes.d.ts +9 -0
package/dist/coordination/routes.d.ts.map +1 -0
package/dist/coordination/routes.js +434 -0
package/dist/coordination/routes.js.map +1 -0
package/dist/coordination/schema.d.ts +12 -0
package/dist/coordination/schema.d.ts.map +1 -0
package/dist/coordination/schema.js +91 -0
package/dist/coordination/schema.js.map +1 -0
package/dist/coordination/schemas.d.ts +208 -0
package/dist/coordination/schemas.d.ts.map +1 -0
package/dist/coordination/schemas.js +109 -0
package/dist/coordination/schemas.js.map +1 -0
package/dist/coordination/stale.d.ts +25 -0
package/dist/coordination/stale.d.ts.map +1 -0
package/dist/coordination/stale.js +53 -0
package/dist/coordination/stale.js.map +1 -0
package/dist/index.js +21 -3
package/dist/index.js.map +1 -1
package/dist/mcp.js +90 -79
package/dist/mcp.js.map +1 -1
package/dist/storage/sqlite.d.ts +3 -0
package/dist/storage/sqlite.d.ts.map +1 -1
package/dist/storage/sqlite.js +285 -281
package/dist/storage/sqlite.js.map +1 -1
package/package.json +55 -55
package/src/api/index.ts +3 -3
package/src/api/routes.ts +551 -536
package/src/cli.ts +397 -397
package/src/coordination/index.ts +47 -0
package/src/coordination/mcp-tools.ts +313 -0
package/src/coordination/routes.ts +656 -0
package/src/coordination/schema.ts +94 -0
package/src/coordination/schemas.ts +136 -0
package/src/coordination/stale.ts +89 -0
package/src/core/decay.ts +63 -63
package/src/core/embeddings.ts +88 -88
package/src/core/hebbian.ts +93 -93
package/src/core/index.ts +5 -5
package/src/core/logger.ts +36 -36
package/src/core/query-expander.ts +66 -66
package/src/core/reranker.ts +101 -101
package/src/engine/activation.ts +656 -656
package/src/engine/connections.ts +103 -103
package/src/engine/consolidation-scheduler.ts +125 -125
package/src/engine/eval.ts +102 -102
package/src/engine/eviction.ts +101 -101
package/src/engine/index.ts +8 -8
package/src/engine/retraction.ts +100 -100
package/src/engine/staging.ts +74 -74
package/src/index.ts +137 -121
package/src/mcp.ts +1024 -1013
package/src/storage/index.ts +3 -3
package/src/storage/sqlite.ts +968 -963
package/src/types/agent.ts +67 -67
package/src/types/checkpoint.ts +46 -46
package/src/types/engram.ts +217 -217
package/src/types/eval.ts +100 -100
package/src/types/index.ts +6 -6

package/src/coordination/schema.ts ADDED Viewed

@@ -0,0 +1,94 @@
+// Copyright 2026 Robert Winter / Complete Ideas
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * SQL table definitions for the coordination module.
+ * Tables are created conditionally when AWM_COORDINATION=true.
+ * Uses the same memory.db — coordination events feed the activation engine.
+ */
+import type Database from 'better-sqlite3';
+const COORDINATION_TABLES = `
+-- Coordination: agents in the hive
+CREATE TABLE IF NOT EXISTS coord_agents (
+  id           TEXT PRIMARY KEY,
+  name         TEXT NOT NULL,
+  role         TEXT NOT NULL DEFAULT 'worker',
+  status       TEXT NOT NULL DEFAULT 'idle',
+  pid          INTEGER,
+  started_at   TEXT NOT NULL DEFAULT (datetime('now')),
+  last_seen    TEXT NOT NULL DEFAULT (datetime('now')),
+  current_task TEXT,
+  metadata     TEXT,
+  capabilities TEXT,
+  workspace    TEXT
+);
+-- Coordination: assignments
+CREATE TABLE IF NOT EXISTS coord_assignments (
+  id           TEXT PRIMARY KEY,
+  agent_id     TEXT,
+  task         TEXT NOT NULL,
+  description  TEXT,
+  status       TEXT NOT NULL DEFAULT 'pending',
+  created_at   TEXT NOT NULL DEFAULT (datetime('now')),
+  started_at   TEXT,
+  completed_at TEXT,
+  result       TEXT,
+  workspace    TEXT,
+  FOREIGN KEY (agent_id) REFERENCES coord_agents(id)
+);
+-- Coordination: file locks
+CREATE TABLE IF NOT EXISTS coord_locks (
+  file_path    TEXT PRIMARY KEY,
+  agent_id     TEXT NOT NULL,
+  locked_at    TEXT NOT NULL DEFAULT (datetime('now')),
+  reason       TEXT,
+  FOREIGN KEY (agent_id) REFERENCES coord_agents(id)
+);
+-- Coordination: orchestrator broadcast commands
+CREATE TABLE IF NOT EXISTS coord_commands (
+  id           INTEGER PRIMARY KEY AUTOINCREMENT,
+  command      TEXT NOT NULL,
+  reason       TEXT,
+  issued_by    TEXT,
+  issued_at    TEXT NOT NULL DEFAULT (datetime('now')),
+  cleared_at   TEXT,
+  workspace    TEXT
+);
+-- Coordination: findings reported by agents
+CREATE TABLE IF NOT EXISTS coord_findings (
+  id           INTEGER PRIMARY KEY AUTOINCREMENT,
+  agent_id     TEXT NOT NULL,
+  category     TEXT NOT NULL,
+  severity     TEXT NOT NULL DEFAULT 'info',
+  file_path    TEXT,
+  line_number  INTEGER,
+  description  TEXT NOT NULL,
+  suggestion   TEXT,
+  status       TEXT NOT NULL DEFAULT 'open',
+  created_at   TEXT NOT NULL DEFAULT (datetime('now')),
+  resolved_at  TEXT,
+  FOREIGN KEY (agent_id) REFERENCES coord_agents(id)
+);
+-- Coordination: event audit trail
+CREATE TABLE IF NOT EXISTS coord_events (
+  id           INTEGER PRIMARY KEY AUTOINCREMENT,
+  agent_id     TEXT,
+  event_type   TEXT NOT NULL,
+  detail       TEXT,
+  created_at   TEXT NOT NULL DEFAULT (datetime('now'))
+);
+`;
+/**
+ * Create all coordination tables in the given database.
+ * Safe to call multiple times (CREATE IF NOT EXISTS).
+ */
+export function initCoordinationTables(db: Database.Database): void {
+  db.exec(COORDINATION_TABLES);
+}

package/src/coordination/schemas.ts ADDED Viewed

@@ -0,0 +1,136 @@
+// Copyright 2026 Robert Winter / Complete Ideas
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Zod validation schemas for the coordination module.
+ * Ported from AgentSynapse packages/coordinator/src/schemas.ts.
+ */
+import { z } from 'zod';
+// ─── Enums ──────────────────────────────────────────────────────
+export const agentRoleEnum = z.enum(['worker', 'orchestrator', 'dev-lead']);
+export const agentStatusEnum = z.enum(['idle', 'working', 'dead']);
+export const assignmentStatusEnum = z.enum(['in_progress', 'completed', 'failed', 'blocked']);
+export const commandEnum = z.enum(['BUILD_FREEZE', 'PAUSE', 'RESUME', 'SHUTDOWN']);
+export const findingSeverityEnum = z.enum(['critical', 'error', 'warn', 'info']);
+export const findingCategoryEnum = z.enum([
+  'typecheck', 'lint', 'test-failure', 'security', 'performance',
+  'dead-code', 'todo', 'bug', 'ux', 'a11y', 'sql', 'convention',
+  'freshdesk', 'data-quality', 'other',
+]);
+export const findingStatusEnum = z.enum(['open', 'resolved']);
+// ─── Checkin ────────────────────────────────────────────────────
+export const checkinSchema = z.object({
+  name: z.string().min(1).max(50),
+  role: agentRoleEnum.default('worker'),
+  pid: z.number().int().positive().optional(),
+  metadata: z.record(z.string(), z.unknown()).optional(),
+  capabilities: z.array(z.string().max(50)).max(20).optional(),
+  workspace: z.string().max(50).optional(),
+});
+export const checkoutSchema = z.object({
+  agentId: z.string().uuid(),
+});
+// ─── Assignments ────────────────────────────────────────────────
+export const assignCreateSchema = z.object({
+  agentId: z.string().uuid().optional(),
+  task: z.string().min(1).max(1000),
+  description: z.string().max(5000).optional(),
+  workspace: z.string().max(50).optional(),
+});
+export const assignmentQuerySchema = z.object({
+  agentId: z.string().uuid().optional(),
+  workspace: z.string().max(50).optional(),
+});
+export const assignmentClaimSchema = z.object({
+  agentId: z.string().uuid(),
+});
+export const assignmentUpdateSchema = z.object({
+  status: assignmentStatusEnum,
+  result: z.string().max(10000).optional(),
+});
+// ─── Locks ──────────────────────────────────────────────────────
+export const lockAcquireSchema = z.object({
+  agentId: z.string().uuid(),
+  filePath: z.string().min(1).max(500),
+  reason: z.string().max(500).optional(),
+});
+export const lockReleaseSchema = z.object({
+  agentId: z.string().uuid(),
+  filePath: z.string().min(1).max(500),
+});
+// ─── Commands ───────────────────────────────────────────────────
+export const commandCreateSchema = z.object({
+  command: commandEnum,
+  reason: z.string().max(1000).optional(),
+  issuedBy: z.string().max(50).optional(),
+  workspace: z.string().max(50).optional(),
+});
+export const commandWaitQuerySchema = z.object({
+  status: z.string().max(20).default('idle'),
+  timeout: z.coerce.number().int().min(0).max(30).optional(),
+  agentId: z.string().optional(),
+  workspace: z.string().max(50).optional(),
+});
+// ─── Findings ───────────────────────────────────────────────────
+export const findingCreateSchema = z.object({
+  agentId: z.string().uuid(),
+  category: findingCategoryEnum,
+  severity: findingSeverityEnum.default('info'),
+  filePath: z.string().max(500).optional(),
+  lineNumber: z.number().int().positive().optional(),
+  description: z.string().min(1).max(5000),
+  suggestion: z.string().max(5000).optional(),
+});
+export const findingsQuerySchema = z.object({
+  category: findingCategoryEnum.optional(),
+  severity: findingSeverityEnum.optional(),
+  status: findingStatusEnum.optional(),
+  limit: z.coerce.number().int().min(1).max(200).default(50),
+});
+// ─── Param Schemas ─────────────────────────────────────────────
+export const assignmentIdParamSchema = z.object({ id: z.string().uuid() });
+export const findingIdParamSchema = z.object({ id: z.coerce.number().int().positive() });
+// ─── Pulse ─────────────────────────────────────────────────────
+export const pulseSchema = z.object({
+  agentId: z.string().uuid(),
+});
+// ─── Status / Events ────────────────────────────────────────────
+export const eventsQuerySchema = z.object({
+  limit: z.coerce.number().int().min(1).max(200).default(50),
+});
+export const staleQuerySchema = z.object({
+  seconds: z.coerce.number().int().min(1).max(86400).default(120),
+  cleanup: z.enum(['0', '1', 'true', 'false']).optional(),
+});
+export const workersQuerySchema = z.object({
+  capability: z.string().max(50).optional(),
+  status: agentStatusEnum.optional(),
+  workspace: z.string().max(50).optional(),
+});

package/src/coordination/stale.ts ADDED Viewed

@@ -0,0 +1,89 @@
+// Copyright 2026 Robert Winter / Complete Ideas
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Stale agent detection and cleanup for the coordination module.
+ */
+import type Database from 'better-sqlite3';
+interface StaleAgent {
+  id: string;
+  name: string;
+  role: string;
+  status: string;
+  last_seen: string;
+  seconds_since_seen: number;
+}
+/** Detect agents that haven't checked in within the threshold. */
+export function detectStale(db: Database.Database, thresholdSeconds: number): StaleAgent[] {
+  return db.prepare(
+    `SELECT id, name, role, status, last_seen,
+            ROUND((julianday('now') - julianday(last_seen)) * 86400) AS seconds_since_seen
+     FROM coord_agents
+     WHERE status NOT IN ('dead')
+       AND (julianday('now') - julianday(last_seen)) * 86400 > ?`
+  ).all(thresholdSeconds) as StaleAgent[];
+}
+/** Clean up stale agents: fail assignments, release locks, mark dead. */
+export function cleanupStale(db: Database.Database, thresholdSeconds: number): { stale: StaleAgent[]; cleaned: number } {
+  const stale = detectStale(db, thresholdSeconds);
+  let cleaned = 0;
+  for (const agent of stale) {
+    // Fail active assignments
+    const orphaned = db.prepare(
+      `UPDATE coord_assignments SET status = 'failed', result = 'agent disconnected (stale)', completed_at = datetime('now')
+       WHERE agent_id = ? AND status IN ('assigned', 'in_progress')`
+    ).run(agent.id);
+    if (orphaned.changes > 0) {
+      db.prepare(
+        `INSERT INTO coord_events (agent_id, event_type, detail) VALUES (?, 'assignment_failed', ?)`
+      ).run(agent.id, `auto-failed ${orphaned.changes} orphaned assignment(s) — agent stale`);
+    }
+    // Release locks
+    const locks = db.prepare(`DELETE FROM coord_locks WHERE agent_id = ?`).run(agent.id);
+    // Mark dead
+    db.prepare(`UPDATE coord_agents SET status = 'dead', current_task = NULL WHERE id = ?`).run(agent.id);
+    cleaned += orphaned.changes + locks.changes;
+    if (orphaned.changes > 0 || locks.changes > 0) {
+      db.prepare(
+        `INSERT INTO coord_events (agent_id, event_type, detail) VALUES (?, 'stale_cleanup', ?)`
+      ).run(agent.id, `failed ${orphaned.changes} assignment(s), released ${locks.changes} lock(s)`);
+    }
+  }
+  return { stale, cleaned };
+}
+/** Prune heartbeat events older than 1 hour. Keeps assignment, registered, and command events permanently. */
+export function pruneOldHeartbeats(db: Database.Database): number {
+  const result = db.prepare(
+    `DELETE FROM coord_events WHERE event_type = 'heartbeat' AND created_at < datetime('now', '-1 hour')`
+  ).run();
+  return result.changes;
+}
+/** Clean slate on startup: mark all live agents dead, release locks, clear commands. */
+export function cleanSlate(db: Database.Database): void {
+  const alive = db.prepare(
+    `SELECT id, name FROM coord_agents WHERE status != 'dead'`
+  ).all() as Array<{ id: string; name: string }>;
+  if (alive.length === 0) return;
+  for (const agent of alive) {
+    db.prepare(`UPDATE coord_agents SET status = 'dead', current_task = NULL WHERE id = ?`).run(agent.id);
+    db.prepare(`DELETE FROM coord_locks WHERE agent_id = ?`).run(agent.id);
+  }
+  db.prepare(`UPDATE coord_commands SET cleared_at = datetime('now') WHERE cleared_at IS NULL`).run();
+  console.log(`  Coordination clean slate: marked ${alive.length} agent(s) from previous session as dead`);
+}

package/src/core/decay.ts CHANGED Viewed

@@ -1,63 +1,63 @@
-// Copyright 2026 Robert Winter / Complete Ideas
-// SPDX-License-Identifier: Apache-2.0
-/**
- * ACT-R Base-Level Activation
- *
- * Based on Anderson's ACT-R cognitive architecture (1993).
- * Memories that are accessed more recently and more frequently
- * have higher activation — a well-established model of human memory.
- *
- * Formula: B(M) = ln(n + 1) - d * ln(ageDays / (n + 1))
- *
- * Where:
- *   n = access count
- *   d = decay exponent (default 0.5)
- *   ageDays = age of memory in days
- */
-export function baseLevelActivation(
-  accessCount: number,
-  ageDays: number,
-  decayExponent: number = 0.5
-): number {
-  const n = Math.max(accessCount, 0);
-  const age = Math.max(ageDays, 0.001); // Avoid log(0)
-  return Math.log(n + 1) - decayExponent * Math.log(age / (n + 1));
-}
-/**
- * Softplus — smooth approximation of ReLU.
- * Used to keep activation scores positive without hard clipping.
- */
-export function softplus(x: number): number {
-  return Math.log(1 + Math.exp(x));
-}
-/**
- * Composite activation score combining content match, temporal decay,
- * Hebbian boost, and confidence.
- *
- * Score = contentMatch * softplus(B(M) + scale * hebbianBoost) * confidence
- */
-export function compositeScore(params: {
-  contentMatch: number;
-  accessCount: number;
-  ageDays: number;
-  hebbianBoost: number;
-  confidence: number;
-  decayExponent?: number;
-  hebbianScale?: number;
-}): number {
-  const {
-    contentMatch,
-    accessCount,
-    ageDays,
-    hebbianBoost,
-    confidence,
-    decayExponent = 0.5,
-    hebbianScale = 1.0,
-  } = params;
-  const bm = baseLevelActivation(accessCount, ageDays, decayExponent);
-  return contentMatch * softplus(bm + hebbianScale * hebbianBoost) * confidence;
-}
+// Copyright 2026 Robert Winter / Complete Ideas
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * ACT-R Base-Level Activation
+ *
+ * Based on Anderson's ACT-R cognitive architecture (1993).
+ * Memories that are accessed more recently and more frequently
+ * have higher activation — a well-established model of human memory.
+ *
+ * Formula: B(M) = ln(n + 1) - d * ln(ageDays / (n + 1))
+ *
+ * Where:
+ *   n = access count
+ *   d = decay exponent (default 0.5)
+ *   ageDays = age of memory in days
+ */
+export function baseLevelActivation(
+  accessCount: number,
+  ageDays: number,
+  decayExponent: number = 0.5
+): number {
+  const n = Math.max(accessCount, 0);
+  const age = Math.max(ageDays, 0.001); // Avoid log(0)
+  return Math.log(n + 1) - decayExponent * Math.log(age / (n + 1));
+}
+/**
+ * Softplus — smooth approximation of ReLU.
+ * Used to keep activation scores positive without hard clipping.
+ */
+export function softplus(x: number): number {
+  return Math.log(1 + Math.exp(x));
+}
+/**
+ * Composite activation score combining content match, temporal decay,
+ * Hebbian boost, and confidence.
+ *
+ * Score = contentMatch * softplus(B(M) + scale * hebbianBoost) * confidence
+ */
+export function compositeScore(params: {
+  contentMatch: number;
+  accessCount: number;
+  ageDays: number;
+  hebbianBoost: number;
+  confidence: number;
+  decayExponent?: number;
+  hebbianScale?: number;
+}): number {
+  const {
+    contentMatch,
+    accessCount,
+    ageDays,
+    hebbianBoost,
+    confidence,
+    decayExponent = 0.5,
+    hebbianScale = 1.0,
+  } = params;
+  const bm = baseLevelActivation(accessCount, ageDays, decayExponent);
+  return contentMatch * softplus(bm + hebbianScale * hebbianBoost) * confidence;
+}

package/src/core/embeddings.ts CHANGED Viewed

@@ -1,88 +1,88 @@
-// Copyright 2026 Robert Winter / Complete Ideas
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Embedding Engine — local vector embeddings via transformers.js
- *
- * Default: bge-small-en-v1.5 (384 dimensions, ~90MB, MTEB retrieval-optimized).
- * Better short-text similarity than MiniLM for agent memory concepts.
- * Configurable via AWM_EMBED_MODEL env var.
- * Model is downloaded once on first use and cached locally.
- *
- * Singleton pattern — call getEmbedder() to get the shared instance.
- *
- * NOTE: Changing the model invalidates existing embeddings.
- * Set AWM_EMBED_MODEL=Xenova/all-MiniLM-L6-v2 for backward compatibility.
- */
-import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
-const MODEL_ID = process.env.AWM_EMBED_MODEL ?? 'Xenova/bge-small-en-v1.5';
-const DIMENSIONS = parseInt(process.env.AWM_EMBED_DIMS ?? '384', 10);
-const POOLING = (process.env.AWM_EMBED_POOLING ?? 'mean') as 'cls' | 'mean';
-let instance: FeatureExtractionPipeline | null = null;
-let initPromise: Promise<FeatureExtractionPipeline> | null = null;
-/**
- * Get or initialize the embedding pipeline (singleton).
- * First call downloads the model (~22MB), subsequent calls are instant.
- */
-export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
-  if (instance) return instance;
-  if (initPromise) return initPromise;
-  initPromise = pipeline('feature-extraction', MODEL_ID, {
-    dtype: 'fp32',
-  }).then(pipe => {
-    instance = pipe;
-    console.log(`Embedding model loaded: ${MODEL_ID} (${DIMENSIONS}d)`);
-    return pipe;
-  });
-  return initPromise;
-}
-/**
- * Generate an embedding vector for a text string.
- * Returns a normalized float32 array of length DIMENSIONS.
- */
-export async function embed(text: string): Promise<number[]> {
-  const embedder = await getEmbedder();
-  const result = await embedder(text, { pooling: POOLING, normalize: true });
-  // result is a Tensor — extract the data
-  return Array.from(result.data as Float32Array).slice(0, DIMENSIONS);
-}
-/**
- * Generate embeddings for multiple texts in a batch.
- * More efficient than calling embed() in a loop.
- */
-export async function embedBatch(texts: string[]): Promise<number[][]> {
-  if (texts.length === 0) return [];
-  const embedder = await getEmbedder();
-  const result = await embedder(texts, { pooling: POOLING, normalize: true });
-  const data = result.data as Float32Array;
-  const vectors: number[][] = [];
-  for (let i = 0; i < texts.length; i++) {
-    vectors.push(Array.from(data.slice(i * DIMENSIONS, (i + 1) * DIMENSIONS)));
-  }
-  return vectors;
-}
-/**
- * Cosine similarity between two normalized vectors.
- * Since vectors are pre-normalized, this is just the dot product.
- */
-export function cosineSimilarity(a: number[], b: number[]): number {
-  if (a.length !== b.length || a.length === 0) return 0;
-  let dot = 0;
-  for (let i = 0; i < a.length; i++) {
-    dot += a[i] * b[i];
-  }
-  // Clamp to [-1, 1] to handle floating point drift
-  return Math.max(-1, Math.min(1, dot));
-}
-/** Vector dimensions for this model */
-export const EMBEDDING_DIMENSIONS = DIMENSIONS;
+// Copyright 2026 Robert Winter / Complete Ideas
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Embedding Engine — local vector embeddings via transformers.js
+ *
+ * Default: bge-small-en-v1.5 (384 dimensions, ~90MB, MTEB retrieval-optimized).
+ * Better short-text similarity than MiniLM for agent memory concepts.
+ * Configurable via AWM_EMBED_MODEL env var.
+ * Model is downloaded once on first use and cached locally.
+ *
+ * Singleton pattern — call getEmbedder() to get the shared instance.
+ *
+ * NOTE: Changing the model invalidates existing embeddings.
+ * Set AWM_EMBED_MODEL=Xenova/all-MiniLM-L6-v2 for backward compatibility.
+ */
+import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
+const MODEL_ID = process.env.AWM_EMBED_MODEL ?? 'Xenova/bge-small-en-v1.5';
+const DIMENSIONS = parseInt(process.env.AWM_EMBED_DIMS ?? '384', 10);
+const POOLING = (process.env.AWM_EMBED_POOLING ?? 'mean') as 'cls' | 'mean';
+let instance: FeatureExtractionPipeline | null = null;
+let initPromise: Promise<FeatureExtractionPipeline> | null = null;
+/**
+ * Get or initialize the embedding pipeline (singleton).
+ * First call downloads the model (~22MB), subsequent calls are instant.
+ */
+export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
+  if (instance) return instance;
+  if (initPromise) return initPromise;
+  initPromise = pipeline('feature-extraction', MODEL_ID, {
+    dtype: 'fp32',
+  }).then(pipe => {
+    instance = pipe;
+    console.log(`Embedding model loaded: ${MODEL_ID} (${DIMENSIONS}d)`);
+    return pipe;
+  });
+  return initPromise;
+}
+/**
+ * Generate an embedding vector for a text string.
+ * Returns a normalized float32 array of length DIMENSIONS.
+ */
+export async function embed(text: string): Promise<number[]> {
+  const embedder = await getEmbedder();
+  const result = await embedder(text, { pooling: POOLING, normalize: true });
+  // result is a Tensor — extract the data
+  return Array.from(result.data as Float32Array).slice(0, DIMENSIONS);
+}
+/**
+ * Generate embeddings for multiple texts in a batch.
+ * More efficient than calling embed() in a loop.
+ */
+export async function embedBatch(texts: string[]): Promise<number[][]> {
+  if (texts.length === 0) return [];
+  const embedder = await getEmbedder();
+  const result = await embedder(texts, { pooling: POOLING, normalize: true });
+  const data = result.data as Float32Array;
+  const vectors: number[][] = [];
+  for (let i = 0; i < texts.length; i++) {
+    vectors.push(Array.from(data.slice(i * DIMENSIONS, (i + 1) * DIMENSIONS)));
+  }
+  return vectors;
+}
+/**
+ * Cosine similarity between two normalized vectors.
+ * Since vectors are pre-normalized, this is just the dot product.
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length !== b.length || a.length === 0) return 0;
+  let dot = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+  }
+  // Clamp to [-1, 1] to handle floating point drift
+  return Math.max(-1, Math.min(1, dot));
+}
+/** Vector dimensions for this model */
+export const EMBEDDING_DIMENSIONS = DIMENSIONS;