npm - @icex-labs/openclaw-memory-engine - Versions diffs - 4.2.2 → 5.0.1 - Mend

@icex-labs/openclaw-memory-engine 4.2.2 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/lib/auto-capture.js CHANGED Viewed

@@ -1,71 +1,36 @@
 /**
- * Auto-capture: hook into message:received and message:sent events
- * to automatically extract and store facts in archival memory.
- *
- * No reliance on the agent calling tools — memory happens passively.
+ * Auto-capture: hook into message events, passively store facts.
+ * Uses embedding-based classification (v5.0) — no hardcoded keywords.
  */
 import { loadArchival, appendRecord } from "./archival.js";
-import { indexEmbedding } from "./embedding.js";
+import { getEmbedding, indexEmbedding } from "./embedding.js";
 import { extractTriples, addTriple } from "./graph.js";
-import { resolveWorkspace } from "./paths.js";
+import { classify } from "./classifier.js";
-/** Recent capture cache to prevent duplicates within short windows. */
-const recentCaptures = new Map(); // content_hash → timestamp
-const DEDUP_WINDOW_MS = 60_000; // 60 seconds
-// Minimum message length to consider for fact extraction
 const MIN_LENGTH = 20;
+const MAX_LENGTH = 500;
-// Skip patterns — don't store these as facts
-const SKIP_PATTERNS = [
-  /^(hi|hello|hey|ok|thanks|good morning|good night|早|晚安|你好|嗯|好的|谢谢)/i,
-  /^HEARTBEAT_OK$/,
-  /^\//,  // slash commands
-  /^(yes|no|yeah|nah|sure|maybe)$/i,
-];
-// High-value content patterns — always store these
-const HIGH_VALUE_PATTERNS = [
-  /\b(decided|decision|plan|scheduled|booked|bought|sold|paid|签|买|卖|预约|决定)\b/i,
-  /\b(doctor|lawyer|immigration|IRCC|IBKR|account|password|address|phone|email)\b/i,
-  /\b(remember|don't forget|提醒|记住|别忘)\b/i,
-  /\$\d{2,}/,  // dollar amounts
-  /\b\d{4}-\d{2}-\d{2}\b/,  // dates
-];
+// Skip very short / obvious non-fact messages (language-agnostic via length)
+const SKIP_EXACT = new Set(["heartbeat_ok", "ok", "yes", "no", "y", "n"]);
-// Entity inference (same as quality.js but lightweight)
-const ENTITY_PATTERNS = [
-  [/\b(IBKR|invest|portfolio|HELOC|mortgage|bank|\$\d{3,})/i, "finance"],
-  [/\b(immigration|PR|IRCC|CBSA|visa|lawyer|律师)/i, "immigration"],
-  [/\b(doctor|医生|hospital|health|medication|药)/i, "health"],
-  [/\b(car|vehicle|Escalade|GX550|ES350|Tesla|tire|车)/i, "vehicles"],
-  [/\b(school|homework|exam|swimming|lesson|学校|课)/i, "education"],
-  [/\b(deploy|k3d|ArgoCD|kubectl|CI|cluster)/i, "infrastructure"],
-  [/\b(quant|trading|backtest|signal|strategy)/i, "quant"],
-];
-function inferEntity(text) {
-  for (const [pat, name] of ENTITY_PATTERNS) {
-    if (pat.test(text)) return name;
-  }
-  return "conversation";
-}
+/** Recent capture cache to prevent duplicates. */
+const recentCaptures = new Map();
+const DEDUP_WINDOW_MS = 60_000;
 function shouldCapture(content) {
   if (!content || content.length < MIN_LENGTH) return false;
-  if (SKIP_PATTERNS.some((p) => p.test(content.trim()))) return false;
+  const lower = content.trim().toLowerCase();
+  if (SKIP_EXACT.has(lower)) return false;
+  if (lower.startsWith("/")) return false; // slash commands
   return true;
 }
-function isHighValue(content) {
-  return HIGH_VALUE_PATTERNS.some((p) => p.test(content));
-}
 /**
- * Process an incoming or outgoing message and auto-store if valuable.
+ * Process a message and auto-store if valuable.
+ * Classification is embedding-based — works with any language.
  */
-export function captureMessage(ws, content, source = "auto-capture") {
+export async function captureMessage(ws, content, source = "auto-capture") {
   if (!shouldCapture(content)) return null;
   // Dedup: skip if same content captured in last 60s
@@ -76,14 +41,14 @@ export function captureMessage(ws, content, source = "auto-capture") {
   }
   recentCaptures.set(contentHash, now);
-  // Clean old entries from dedup cache
+  // Clean old dedup entries
   if (recentCaptures.size > 200) {
     for (const [key, ts] of recentCaptures) {
       if (now - ts > DEDUP_WINDOW_MS) recentCaptures.delete(key);
     }
   }
-  // Also check against existing archival (keyword overlap)
+  // Check against recent archival records (keyword overlap)
   const existing = loadArchival(ws);
   const contentLower = content.toLowerCase();
   const contentWords = new Set(contentLower.split(/\s+/).filter((w) => w.length > 2));
@@ -93,15 +58,15 @@ export function captureMessage(ws, content, source = "auto-capture") {
       const exWords = new Set(ex.split(/\s+/).filter((w) => w.length > 2));
       let overlap = 0;
       for (const w of contentWords) { if (exWords.has(w)) overlap++; }
-      if (overlap / contentWords.size > 0.7) return null; // too similar to recent record
+      if (overlap / contentWords.size > 0.7) return null;
     }
   }
-  const importance = isHighValue(content) ? 7 : 4;
-  const entity = inferEntity(content);
+  // Trim long messages
+  const trimmed = content.length > MAX_LENGTH ? content.slice(0, MAX_LENGTH - 3) + "..." : content;
-  // Trim very long messages
-  const trimmed = content.length > 500 ? content.slice(0, 497) + "..." : content;
+  // Classify using embeddings (language-agnostic)
+  const { entity, importance, embedding } = await classify(trimmed, ws);
   const record = appendRecord(ws, {
     content: trimmed,
@@ -110,9 +75,15 @@ export function captureMessage(ws, content, source = "auto-capture") {
     importance,
   });
-  // Background: index embedding + extract graph triples
-  indexEmbedding(ws, record).catch(() => {});
+  // Reuse embedding for search indexing (no duplicate API call)
+  if (embedding) {
+    const { loadEmbeddingCache, saveEmbeddingCache } = await import("./embedding.js");
+    const cache = loadEmbeddingCache(ws);
+    cache[record.id] = embedding;
+    saveEmbeddingCache(ws);
+  }
+  // Extract graph triples
   const triples = extractTriples(trimmed);
   for (const t of triples) {
     addTriple(ws, t.s, t.r, t.o, record.id);

package/lib/classifier.js ADDED Viewed

@@ -0,0 +1,253 @@
+/**
+ * Embedding-based classifier — replaces all hardcoded regex patterns.
+ * Language-agnostic: works with any language the embedding model supports.
+ *
+ * Uses "anchor embeddings" — short descriptions of each category.
+ * Classifies by cosine similarity against anchors.
+ * Anchors are computed once and cached to disk.
+ */
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+import { getEmbedding, cosineSimilarity } from "./embedding.js";
+// ═══════════════════════════════════════════════════════════════════
+// Category anchors — short descriptions, NOT keywords
+// The embedding model understands semantics, so these work in ANY language
+// ═══════════════════════════════════════════════════════════════════
+const ENTITY_ANCHORS = {
+  health: "medical health doctor hospital clinic medication prescription treatment diagnosis symptom illness disease checkup appointment therapy",
+  finance: "money investment portfolio bank account mortgage loan interest rate tax income salary budget expense stock trading IBKR brokerage dividend",
+  immigration: "immigration visa permanent resident citizenship passport border agency lawyer legal court petition complaint refugee asylum",
+  legal: "lawyer attorney lawsuit court tribunal legal complaint case hearing judgment ruling contract",
+  vehicles: "car vehicle automobile SUV sedan truck van tire wheel maintenance repair insurance collision driving license",
+  property: "house home apartment condo real estate mortgage rent property landlord tenant renovation",
+  education: "school student class homework exam test grade teacher professor university college kindergarten lesson tutorial",
+  family: "wife husband spouse child son daughter parent mother father sibling family relative wedding anniversary",
+  career: "job work career company employer employee salary promotion interview resume hiring office meeting boss manager",
+  infrastructure: "server cluster kubernetes docker container deployment pipeline CI CD devops cloud hosting database",
+  technology: "code programming software AI machine learning LLM model API plugin framework library",
+  shopping: "buy purchase order shop store online delivery coupon discount price sale",
+  travel: "flight airline airport hotel trip vacation travel booking passport luggage destination",
+  food: "restaurant meal dinner lunch breakfast cooking recipe food grocery kitchen chef",
+  entertainment: "movie music game sport hobby concert show streaming video book reading",
+};
+const IMPORTANCE_ANCHORS = {
+  critical: "lawsuit court immigration visa legal case medical emergency surgery hospital critical urgent deadline account number password credential secret key",
+  high: "investment portfolio large amount financial planning doctor appointment medical treatment insurance policy contract agreement major decision career change",
+  medium: "project task deployment code fix feature technical work meeting schedule plan discussion regular maintenance",
+  low: "casual chat greeting small talk weather joke daily routine trivial minor note acknowledgment ok thanks yes no",
+};
+const IMPORTANCE_SCORES = { critical: 9, high: 7, medium: 5, low: 3 };
+// Threshold: if no anchor scores above this, keep default
+const ENTITY_THRESHOLD = 0.3;
+const IMPORTANCE_THRESHOLD = 0.25;
+// ═══════════════════════════════════════════════════════════════════
+// Anchor cache — compute once, reuse forever
+// ═══════════════════════════════════════════════════════════════════
+let anchorCache = null;
+let anchorCachePath = null;
+function getAnchorCachePath(ws) {
+  return join(ws, "memory", "classifier-anchors.json");
+}
+async function loadAnchors(ws) {
+  if (anchorCache) return anchorCache;
+  const cachePath = getAnchorCachePath(ws);
+  anchorCachePath = cachePath;
+  // Try loading from disk
+  if (existsSync(cachePath)) {
+    try {
+      anchorCache = JSON.parse(readFileSync(cachePath, "utf-8"));
+      // Validate: check if all categories are present
+      const entityKeys = Object.keys(ENTITY_ANCHORS);
+      const cachedKeys = Object.keys(anchorCache.entities || {});
+      if (entityKeys.every((k) => cachedKeys.includes(k))) {
+        return anchorCache;
+      }
+      // Cache incomplete, recompute
+    } catch { /* recompute */ }
+  }
+  // Compute anchor embeddings
+  console.error("[memory-engine] Computing classifier anchor embeddings...");
+  const entities = {};
+  for (const [name, desc] of Object.entries(ENTITY_ANCHORS)) {
+    const emb = await getEmbedding(desc);
+    if (emb) entities[name] = emb;
+  }
+  const importance = {};
+  for (const [name, desc] of Object.entries(IMPORTANCE_ANCHORS)) {
+    const emb = await getEmbedding(desc);
+    if (emb) importance[name] = emb;
+  }
+  anchorCache = { entities, importance, version: 2 };
+  // Save to disk
+  mkdirSync(join(ws, "memory"), { recursive: true });
+  writeFileSync(cachePath, JSON.stringify(anchorCache), "utf-8");
+  console.error(`[memory-engine] Anchor embeddings cached (${Object.keys(entities).length} entities, ${Object.keys(importance).length} importance levels)`);
+  return anchorCache;
+}
+// ═══════════════════════════════════════════════════════════════════
+// Classification functions
+// ═══════════════════════════════════════════════════════════════════
+/**
+ * Classify entity using embedding similarity.
+ * @param {float[]} contentEmbedding - pre-computed embedding of the content
+ * @param {string} ws - workspace path (for anchor cache)
+ * @returns {Promise<string>} entity category or "general"
+ */
+export async function classifyEntity(contentEmbedding, ws) {
+  if (!contentEmbedding) return "general";
+  const anchors = await loadAnchors(ws);
+  if (!anchors?.entities || Object.keys(anchors.entities).length === 0) return "general";
+  let bestCategory = "general";
+  let bestScore = ENTITY_THRESHOLD;
+  for (const [category, anchorEmb] of Object.entries(anchors.entities)) {
+    const sim = cosineSimilarity(contentEmbedding, anchorEmb);
+    if (sim > bestScore) {
+      bestScore = sim;
+      bestCategory = category;
+    }
+  }
+  return bestCategory;
+}
+/**
+ * Rate importance using embedding similarity.
+ * @param {float[]} contentEmbedding - pre-computed embedding
+ * @param {string} ws - workspace path
+ * @returns {Promise<number>} importance score 1-10
+ */
+export async function classifyImportance(contentEmbedding, ws) {
+  if (!contentEmbedding) return 5;
+  const anchors = await loadAnchors(ws);
+  if (!anchors?.importance || Object.keys(anchors.importance).length === 0) return 5;
+  let bestLevel = "medium";
+  let bestScore = IMPORTANCE_THRESHOLD;
+  for (const [level, anchorEmb] of Object.entries(anchors.importance)) {
+    const sim = cosineSimilarity(contentEmbedding, anchorEmb);
+    if (sim > bestScore) {
+      bestScore = sim;
+      bestLevel = level;
+    }
+  }
+  return IMPORTANCE_SCORES[bestLevel] || 5;
+}
+/**
+ * Lightweight fallback classifier — no embedding API needed.
+ * Uses format/symbol signals that work across languages:
+ *   - $ amounts → finance
+ *   - URLs → technology
+ *   - dates → general (but higher importance)
+ *   - very short messages → low importance
+ */
+function fallbackClassify(content) {
+  let entity = "general";
+  let importance = 5;
+  // Finance: currency symbols, large numbers
+  if (/[\$€£¥₹]\s*[\d,.]+|\b\d{4,}[\d,.]*\b/.test(content)) {
+    entity = "finance";
+    importance = 7;
+  }
+  // Technology: URLs, code patterns, file paths
+  else if (/https?:\/\/|```|\/\w+\/\w+|\.(js|py|ts|json|yaml|md)\b/i.test(content)) {
+    entity = "technology";
+  }
+  // Dates with context → likely scheduling/planning
+  else if (/\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}:\d{2}\b/.test(content)) {
+    importance = 6;
+  }
+  // Short messages are less important
+  if (content.length < 30) importance = Math.min(importance, 3);
+  // Long detailed messages are more important
+  if (content.length > 200) importance = Math.max(importance, 6);
+  return { entity, importance };
+}
+/**
+ * Full classification: entity + importance in one call.
+ * Uses embedding similarity when available, falls back to format-based heuristics.
+ * @param {string} content - text to classify
+ * @param {string} ws - workspace path
+ * @param {float[]} [existingEmbedding] - reuse if already computed
+ * @returns {Promise<{ entity: string, importance: number, embedding: float[]|null }>}
+ */
+export async function classify(content, ws, existingEmbedding = null) {
+  const emb = existingEmbedding || await getEmbedding(content);
+  // If no embedding available (no API key), use fallback
+  if (!emb) {
+    const fb = fallbackClassify(content);
+    return { entity: fb.entity, importance: fb.importance, embedding: null };
+  }
+  const [entity, importance] = await Promise.all([
+    classifyEntity(emb, ws),
+    classifyImportance(emb, ws),
+  ]);
+  return { entity, importance, embedding: emb };
+}
+/**
+ * Batch re-classify existing records.
+ * @param {string} ws - workspace path
+ * @param {object[]} records - archival records with embeddings
+ * @param {object} embeddingCache - { id: float[] }
+ * @returns {Promise<{ reclassified: number, rerated: number }>}
+ */
+export async function batchReclassify(ws, records, embeddingCache) {
+  await loadAnchors(ws); // ensure anchors are cached
+  let reclassified = 0;
+  let rerated = 0;
+  for (const record of records) {
+    const emb = embeddingCache[record.id];
+    if (!emb) continue;
+    const newEntity = await classifyEntity(emb, ws);
+    if (newEntity !== "general" && record.entity === "general") {
+      record.entity = newEntity;
+      reclassified++;
+    }
+    const currentImp = record.importance ?? 5;
+    if (currentImp === 5) { // only re-rate flat defaults
+      const newImp = await classifyImportance(emb, ws);
+      if (newImp !== 5) {
+        record.importance = newImp;
+        rerated++;
+      }
+    }
+  }
+  return { reclassified, rerated };
+}

package/lib/consolidate.js CHANGED Viewed

@@ -1,30 +1,11 @@
 /**
- * Auto-extract structured facts from text blocks.
- * Splits by sentence boundaries (Chinese + English), infers entity, deduplicates.
+ * Extract structured facts from text blocks.
+ * v5.0: embedding-based classification — no hardcoded keywords.
  */
 import { loadArchival, appendRecord } from "./archival.js";
-import { indexEmbedding } from "./embedding.js";
-/** Generic entity inference patterns (no personal data). */
-const ENTITY_PATTERNS = [
-  [/\b(IBKR|Interactive Brokers)\b/i, "IBKR"],
-  [/\b(immigration|PR|IRCC|CBSA|visa)\b/i, "immigration"],
-  [/\b(quant|trading|backtest|portfolio)\b/i, "trading"],
-  [/\b(doctor|医生|hospital|医院|clinic)\b/i, "health"],
-  [/\b(car|vehicle|SUV|sedan|truck|Tesla|Toyota|Lexus|BMW)\b/i, "vehicles"],
-  [/\b(house|home|mortgage|rent|property)\b/i, "property"],
-  [/\b(school|university|college|学校)\b/i, "education"],
-  [/\b(insurance|保险)\b/i, "insurance"],
-  [/\b(lawyer|律师|attorney|legal)\b/i, "legal"],
-];
-function inferEntity(text, fallback) {
-  for (const [pat, name] of ENTITY_PATTERNS) {
-    if (pat.test(text)) return name;
-  }
-  return fallback;
-}
+import { loadEmbeddingCache, saveEmbeddingCache } from "./embedding.js";
+import { classify } from "./classifier.js";
 /** Split text into sentence-level fact candidates. */
 function extractCandidates(text) {
@@ -42,32 +23,27 @@ function extractCandidates(text) {
   return segments
     .filter((seg) => {
       if (seg.startsWith("#") || seg.length < 10) return false;
-      if (/^(##|===|---|\*\*\*)/.test(seg)) return false;
+      if (/^(##|===|---|\*\*\*|```|>|\|)/.test(seg)) return false;
       return true;
     })
     .map((seg) => seg.replace(/^[-*•]\s*/, "").replace(/^\d+\.\s*/, "").trim())
     .filter((s) => s.length >= 10);
 }
-/** Check if a fact is a near-duplicate of existing content (keyword overlap >70%). */
 function isDuplicate(factLower, existingTexts) {
   const factWords = new Set(factLower.split(/\s+/).filter((w) => w.length > 2));
   if (factWords.size === 0) return false;
   for (const ex of existingTexts) {
     const exWords = new Set(ex.split(/\s+/).filter((w) => w.length > 2));
     let overlap = 0;
-    for (const w of factWords) {
-      if (exWords.has(w)) overlap++;
-    }
+    for (const w of factWords) { if (exWords.has(w)) overlap++; }
     if (overlap / factWords.size > 0.7) return true;
   }
   return false;
 }
 /**
- * Extract facts from text, deduplicate, and insert into archival.
- * @returns {{ inserted: string[], skipped: string[], total: number }}
+ * Extract facts from text, classify via embeddings, deduplicate, and insert.
  */
 export async function consolidateText(ws, text, defaultEntity = "", defaultTags = []) {
   const candidates = extractCandidates(text);
@@ -80,20 +56,28 @@ export async function consolidateText(ws, text, defaultEntity = "", defaultTags
   for (const fact of candidates) {
     const factLower = fact.toLowerCase();
     if (isDuplicate(factLower, existingTexts)) {
       skipped.push(fact.slice(0, 60));
       continue;
     }
-    const entity = inferEntity(fact, defaultEntity);
+    const { entity, importance, embedding } = await classify(fact, ws);
+    const finalEntity = (entity !== "general") ? entity : defaultEntity || "general";
     const record = appendRecord(ws, {
       content: fact,
-      entity,
+      entity: finalEntity,
       tags: defaultTags,
+      importance,
       source: "consolidate",
     });
-    indexEmbedding(ws, record).catch(() => {});
+    if (embedding) {
+      const cache = loadEmbeddingCache(ws);
+      cache[record.id] = embedding;
+      saveEmbeddingCache(ws);
+    }
     inserted.push(record.id);
     existingTexts.push(factLower);
   }

package/lib/quality.js CHANGED Viewed

@@ -1,180 +1,43 @@
 /**
- * Data quality engine: re-classify entities, re-rate importance,
- * extract missing graph triples, generate episodes from summaries.
+ * Data quality engine v5.0: embedding-based classification.
+ * Replaces hardcoded regex patterns with semantic similarity.
+ * Language-agnostic — works with any language.
  */
 import { loadArchival, rewriteArchival } from "./archival.js";
 import { addTriple, extractTriples, loadGraph } from "./graph.js";
 import { saveEpisode, loadEpisodes } from "./episodes.js";
-// ═══════════════════════════════════════════════════════════════════
-// Extended entity patterns (much richer than consolidate.js)
-// ═══════════════════════════════════════════════════════════════════
-const ENTITY_PATTERNS = [
-  // People / family
-  [/\b(wife|husband|spouse|老婆|老公|太太|丈夫|妻子)\b/i, "family"],
-  [/\b(son|daughter|child|kid|儿子|女儿|孩子)\b/i, "family"],
-  [/\b(mom|dad|mother|father|parent|妈|爸|父母)\b/i, "family"],
-  // Finance
-  [/\b(IBKR|Interactive Brokers|broker|brokerage)\b/i, "finance"],
-  [/\b(TFSA|RRSP|RESP|401k|IRA|pension)\b/i, "finance"],
-  [/\b(invest|portfolio|NAV|stock|ETF|QQQ|VOO|dividend)\b/i, "finance"],
-  [/\b(HELOC|mortgage|loan|credit|debt|利率|rate)\b/i, "finance"],
-  [/\b(bank|RBC|TD|BMO|Scotiabank|CIBC)\b/i, "finance"],
-  [/\b(budget|expense|income|salary|payment|pay|报税|tax)\b/i, "finance"],
-  [/\b(accountant|bookkeep|会计)\b/i, "finance"],
-  [/\b(\$\d|CAD|USD|万|千)\b/i, "finance"],
-  // Immigration / legal
-  [/\b(immigration|immigrant|PR|permanent resident|移民)\b/i, "immigration"],
-  [/\b(IRCC|CBSA|ATIP|NSIRA|Mandamus|IMM-)\b/i, "immigration"],
-  [/\b(visa|work permit|签证|工签)\b/i, "immigration"],
-  [/\b(lawyer|attorney|paralegal|律师|法律)\b/i, "legal"],
-  [/\b(petition|complaint|CHRC|tribunal|court|案)\b/i, "legal"],
-  // Health
-  [/\b(doctor|physician|GP|医生|主治|Dr\.)\b/i, "health"],
-  [/\b(hospital|clinic|medical|诊所|医院)\b/i, "health"],
-  [/\b(medication|medicine|drug|pill|tablet|药|处方)\b/i, "health"],
-  [/\b(cetirizine|urticaria|荨麻疹|allergy|过敏)\b/i, "health"],
-  [/\b(health|symptom|diagnosis|体检|检查|screening)\b/i, "health"],
-  [/\b(dental|dentist|vision|eye|牙|眼)\b/i, "health"],
-  // Vehicles
-  [/\b(car|vehicle|SUV|sedan|truck|van|minivan|车)\b/i, "vehicles"],
-  [/\b(Tesla|Toyota|Lexus|BMW|Mercedes|Cadillac|Honda|Audi)\b/i, "vehicles"],
-  [/\b(Escalade|GX550|ES350|Sienna|Model [3SXY])\b/i, "vehicles"],
-  [/\b(tire|tyre|PPF|wrap|oil change|maintenance|保养|轮胎)\b/i, "vehicles"],
-  [/\b(insurance|保险|Desjardins|policy)\b/i, "vehicles"],
-  // Infrastructure / DevOps
-  [/\b(k3d|k3s|k8s|kubernetes|cluster|pod|deploy)\b/i, "infrastructure"],
-  [/\b(ArgoCD|Helm|kubectl|GitOps|CI|CD|pipeline)\b/i, "infrastructure"],
-  [/\b(Docker|container|image|registry|GHCR)\b/i, "infrastructure"],
-  [/\b(U9|prod|production|staging|dev cluster)\b/i, "infrastructure"],
-  [/\b(SOPS|secret|encrypt|cert|SSL|TLS)\b/i, "infrastructure"],
-  // OpenClaw / AI
-  [/\b(OpenClaw|openclaw|gateway|plugin|hook)\b/i, "openclaw"],
-  [/\b(agent|session|compaction|memory|embedding)\b/i, "openclaw"],
-  [/\b(LLM|Claude|Anthropic|GPT|OpenAI|AI|token)\b/i, "ai"],
-  [/\b(prompt|context window|model|inference)\b/i, "ai"],
-  // Quant / trading
-  [/\b(quant|quantitative|backtest|backtesting)\b/i, "quant"],
-  [/\b(trading|trade|signal|strategy|turtle|海龟)\b/i, "quant"],
-  [/\b(Sharpe|drawdown|回撤|年化|annualized)\b/i, "quant"],
-  [/\b(paper trading|live trading|order|position)\b/i, "quant"],
-  // Messaging
-  [/\b(Telegram|Discord|WhatsApp|Slack|bot|channel)\b/i, "messaging"],
-  // Property / home
-  [/\b(house|home|condo|apartment|property|房|租)\b/i, "property"],
-  [/\b(NAS|Synology|backup|Time Machine)\b/i, "property"],
-  [/\b(lawn|garden|yard|snow|草坪|铲雪)\b/i, "property"],
-  // Education / kids
-  [/\b(school|class|homework|exam|test|学校|作业)\b/i, "education"],
-  [/\b(kindergarten|grade|teacher|老师)\b/i, "education"],
-  [/\b(swimming|skating|skiing|hockey|lesson|课)\b/i, "education"],
-  [/\b(Science Fair|concert|recital|表演)\b/i, "education"],
-  // Projects / SaaS
-  [/\b(icex|SaaS|MVP|startup|product|launch)\b/i, "project"],
-  [/\b(ESP32|Arduino|IoT|hardware|sensor)\b/i, "project"],
-  // Shopping / daily
-  [/\b(Costco|Amazon|Walmart|shopping|购物|买)\b/i, "shopping"],
-  [/\b(flight|airline|Air Canada|travel|trip|机票|飞)\b/i, "travel"],
-  [/\b(restaurant|food|meal|dinner|lunch|吃|饭)\b/i, "daily"],
-];
-// ═══════════════════════════════════════════════════════════════════
-// Importance rules
-// ═══════════════════════════════════════════════════════════════════
-const IMPORTANCE_RULES = [
-  // High (8-9): critical life matters
-  { match: /\b(immigration|PR|IRCC|CBSA|Mandamus|visa|NSIRA|CHRC|petition|lawsuit|court)\b/i, importance: 9 },
-  { match: /\b(IBKR|NAV|portfolio|invest|\$\d{4,}|万|HELOC|mortgage)\b/i, importance: 8 },
-  { match: /\b(doctor|hospital|medication|diagnosis|surgery|health insurance|AHCIP)\b/i, importance: 8 },
-  { match: /\b(lawyer|attorney|legal|律师)\b/i, importance: 8 },
-  { match: /\b(永远不要|NEVER|CRITICAL|严禁|必须|MUST)\b/i, importance: 9 },
-  { match: /\b(VIN|policy number|case number|account number|IMM-)\b/i, importance: 8 },
-  // Medium-high (7): important but not critical
-  { match: /\b(ArgoCD|GitOps|k3d|U9|prod|deploy|CI)\b/i, importance: 6 },
-  { match: /\b(quant|backtest|trading|signal|Sharpe)\b/i, importance: 7 },
-  { match: /\b(GX550|Escalade|ES350|car insurance)\b/i, importance: 6 },
-  { match: /\b(OpenClaw|gateway|plugin|config)\b/i, importance: 6 },
-  { match: /\b(icex|SaaS|MVP|ESP32)\b/i, importance: 6 },
-  // Low (3): ephemeral
-  { match: /\b(swimming lesson|concert|recital|playdate)\b/i, importance: 3 },
-  { match: /\b(weather|天气)\b/i, importance: 2 },
-  { match: /\b(heartbeat|HEARTBEAT_OK|session start|daily log)\b/i, importance: 2 },
-  { match: /\b(good morning|good night|早上好|晚安)\b/i, importance: 2 },
-];
-function inferImportance(content, currentImportance) {
-  // Only re-rate if currently at default (5)
-  if (currentImportance !== 5) return currentImportance;
-  for (const rule of IMPORTANCE_RULES) {
-    if (rule.match.test(content)) return rule.importance;
-  }
-  return 5; // keep default if no rule matches
-}
-function inferEntity(content, currentEntity) {
-  // Only re-classify if currently "general" or empty
-  if (currentEntity && currentEntity !== "general") return currentEntity;
-  for (const [pattern, entity] of ENTITY_PATTERNS) {
-    if (pattern.test(content)) return entity;
-  }
-  return currentEntity || "general";
-}
-// ═══════════════════════════════════════════════════════════════════
-// Quality pass
-// ═══════════════════════════════════════════════════════════════════
+import { loadEmbeddingCache } from "./embedding.js";
+import { batchReclassify } from "./classifier.js";
 /**
  * Run a full quality pass over archival records.
+ * Uses embedding-based classification (no regex).
  * @returns {{ reclassified, rerated, triplesAdded, episodesGenerated }}
  */
-export function runQualityPass(ws, options = {}) {
+export async function runQualityPass(ws, options = {}) {
   const records = loadArchival(ws);
-  const existingGraph = loadGraph(ws);
-  const existingTripleSet = new Set(
-    existingGraph.map((t) => `${t.s}|${t.r}|${t.o}`.toLowerCase()),
-  );
+  const embCache = loadEmbeddingCache(ws);
+  // 1. Embedding-based re-classification (entity + importance)
   let reclassified = 0;
   let rerated = 0;
-  let triplesAdded = 0;
-  for (const record of records) {
-    // 1. Re-classify entity
-    const newEntity = inferEntity(record.content, record.entity);
-    if (newEntity !== record.entity) {
-      record.entity = newEntity;
-      reclassified++;
-    }
-    // 2. Re-rate importance
-    const newImportance = inferImportance(record.content, record.importance ?? 5);
-    if (newImportance !== (record.importance ?? 5)) {
-      record.importance = newImportance;
-      rerated++;
-    }
+  const embeddedRecords = records.filter((r) => embCache[r.id]);
+  if (embeddedRecords.length > 0) {
+    const result = await batchReclassify(ws, embeddedRecords, embCache);
+    reclassified = result.reclassified;
+    rerated = result.rerated;
+  }
-    // 3. Extract graph triples
-    if (!options.skipGraph) {
+  // 2. Extract graph triples
+  let triplesAdded = 0;
+  if (!options.skipGraph) {
+    const existingGraph = loadGraph(ws);
+    const existingTripleSet = new Set(
+      existingGraph.map((t) => `${t.s}|${t.r}|${t.o}`.toLowerCase()),
+    );
+    for (const record of records) {
       const triples = extractTriples(record.content);
       for (const t of triples) {
         const key = `${t.s}|${t.r}|${t.o}`.toLowerCase();
@@ -189,12 +52,12 @@ export function runQualityPass(ws, options = {}) {
     }
   }
-  // Save updated records
+  // 3. Save updated records
   if (reclassified > 0 || rerated > 0) {
     rewriteArchival(ws, records);
   }
-  // 4. Generate episodes from weekly summaries (if episodes are sparse)
+  // 4. Generate episodes from daily record clusters
   let episodesGenerated = 0;
   if (!options.skipEpisodes) {
     episodesGenerated = generateEpisodesFromRecords(ws, records);
@@ -210,7 +73,6 @@ function generateEpisodesFromRecords(ws, records) {
   const episodes = loadEpisodes(ws);
   const existingDates = new Set(episodes.map((e) => e.ts?.slice(0, 10)));
-  // Group records by date
   const byDate = {};
   for (const r of records) {
     if (!r.ts) continue;
@@ -221,10 +83,8 @@ function generateEpisodesFromRecords(ws, records) {
   let generated = 0;
   for (const [date, dayRecords] of Object.entries(byDate)) {
-    // Skip if episode already exists for this date, or too few records
     if (existingDates.has(date) || dayRecords.length < 3) continue;
-    // Aggregate topics and entities
     const topics = [...new Set(dayRecords.map((r) => r.entity).filter((e) => e && e !== "general"))];
     const topContent = dayRecords
       .sort((a, b) => (b.importance || 5) - (a.importance || 5))
@@ -251,11 +111,11 @@ function generateEpisodesFromRecords(ws, records) {
  */
 export function formatQualityReport(result) {
   const lines = [
-    `📊 Memory Quality Pass Complete`,
+    `📊 Memory Quality Pass Complete (embedding-based v5.0)`,
     ``,
     `  Records scanned: ${result.total}`,
-    `  Entities re-classified: ${result.reclassified}`,
-    `  Importance re-rated: ${result.rerated}`,
+    `  Entities re-classified: ${result.reclassified} (via semantic similarity)`,
+    `  Importance re-rated: ${result.rerated} (via semantic similarity)`,
     `  Graph triples extracted: ${result.triplesAdded}`,
     `  Episodes generated: ${result.episodesGenerated}`,
   ];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@icex-labs/openclaw-memory-engine",
-  "version": "4.2.2",
+  "version": "5.0.1",
   "description": "MemGPT-style hierarchical memory plugin for OpenClaw — core memory block + archival storage with semantic search",
   "type": "module",
   "main": "index.js",