npm - @bsbofmusic/openclaw-memory-layer2 - Versions diffs - 0.2.0 → 0.2.1 - Mend

@bsbofmusic/openclaw-memory-layer2 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/hindsight.js CHANGED Viewed

@@ -28,38 +28,73 @@ function loadHindsightConfig() {
 async function hcFetch(path, options = {}) {
   const cfg = loadHindsightConfig();
-  const url = `${cfg.baseUrl.replace(/\/$/, '')}${path}`;
-  const res = await fetch(url, options);
-  const text = await res.text();
-  let json = null;
-  try { json = JSON.parse(text); } catch {}
-  return { ok: res.ok, status: res.status, text, json };
+  const base = cfg.baseUrl.replace(/\/$/, '');
+  const urlStr = `${base}${path}`;
+  let parsed;
+  try { parsed = new URL(urlStr); } catch { parsed = { hostname: '127.0.0.1', port: '8888', pathname: path, search: '' }; }
+  const lib = (parsed.protocol === 'https:') ? require('https') : require('http');
+  const timeoutMs = Math.min(Number(process.env.HINDSIGHT_TIMEOUT_MS || 3000), 3000);
+  const method = (options || {}).method || 'GET';
+  const headers = (options || {}).headers || {};
+  const body = (options || {}).body || null;
+  return new Promise(resolve => {
+    let settled = false;
+    const done = (val) => { if (!settled) { settled = true; resolve(val); } };
+    const req = lib.request({
+      hostname: parsed.hostname || '127.0.0.1',
+      port:    parsed.port    || '8888',
+      path:    (parsed.pathname || '') + (parsed.search || ''),
+      method,
+      headers,
+      timeout: timeoutMs,
+    }, res => {
+      let data = '';
+      res.on('data', c => { data += c; });
+      res.on('end', () => {
+        let json = null;
+        try { json = JSON.parse(data); } catch { /* noop */ }
+        done({ ok: res.statusCode >= 200 && res.statusCode < 300, status: res.statusCode, text: data, json });
+      });
+    });
+    req.on('timeout', () => { req.destroy(); done({ ok: false, status: 0, detail: 'hindsight http timeout' }); });
+    req.on('error', e  => done({ ok: false, status: 0, detail: e.message }));
+    if (body) req.write(body);
+    req.end();
+    setTimeout(() => { if (!settled) { req.destroy(); done({ ok: false, status: 0, detail: 'hindsight max-wait exceeded' }); } }, timeoutMs + 200);
+  });
 }
 async function healthcheck() {
   const cfg = loadHindsightConfig();
+  console.error('[hindsight] healthcheck:start', JSON.stringify({ baseUrl: cfg.baseUrl, bankId: cfg.bankId }));
   if (!cfg.enabled) return { ok: false, detail: 'HINDSIGHT_ENABLED=0' };
   try {
     const r = await hcFetch('/health');
+    console.error('[hindsight] healthcheck:/health', JSON.stringify({ ok: r.ok, status: r.status }));
     if (r.ok) return { ok: true, detail: 'health endpoint reachable' };
   } catch {}
   try {
     const r = await hcFetch('/');
+    console.error('[hindsight] healthcheck:/', JSON.stringify({ ok: r.ok, status: r.status }));
     if (r.ok || r.status < 500) return { ok: true, detail: `root reachable (${r.status})` };
     return { ok: false, detail: `HTTP ${r.status}` };
   } catch (e) {
-    return { ok: false, detail: e.message };
+    console.error('[hindsight] healthcheck:error', e?.name || 'Error', e?.message || String(e));
+    return { ok: false, detail: e?.message || String(e) };
   }
 }
 async function ensureBank() {
   const cfg = loadHindsightConfig();
+  console.error('[hindsight] ensureBank:start', JSON.stringify({ bankId: cfg.bankId }));
   const body = JSON.stringify({ reflect_mission: 'Layer2 advanced memory bank for OpenClaw recall and reflection' });
   return hcFetch(`/v1/default/banks/${encodeURIComponent(cfg.bankId)}`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body });
 }
 async function recall(query, { topK = 5 } = {}) {
   const cfg = loadHindsightConfig();
+  console.error('[hindsight] recall:start', JSON.stringify({ bankId: cfg.bankId, topK, query: String(query).slice(0,80) }));
   await ensureBank();
   const body = JSON.stringify({ query, max_tokens: 4096, budget: 'mid' });
   const path = `/v1/default/banks/${encodeURIComponent(cfg.bankId)}/memories/recall`;
@@ -74,6 +109,7 @@ async function recall(query, { topK = 5 } = {}) {
 async function reflect(query) {
   const cfg = loadHindsightConfig();
+  console.error('[hindsight] reflect:start', JSON.stringify({ bankId: cfg.bankId, query: String(query).slice(0,80) }));
   await ensureBank();
   const body = JSON.stringify({ query, include: { facts: {} }, max_tokens: 1024, budget: 'low' });
   const path = `/v1/default/banks/${encodeURIComponent(cfg.bankId)}/reflect`;

package/index.js CHANGED Viewed

@@ -84,10 +84,20 @@ function log(level, ...args) {
   }
 }
+function extractDisplayText(raw) {
+  const s = String(raw || '').trim();
+  if (!s) return '';
+  const parts = s.split(/\n\s*\n/);
+  let body = parts.length > 1 ? parts.slice(1).join(' ').trim() : s;
+  body = body.replace(/\[(uid|source|chat_id|session|timestamp|sender|message_id|mode|part):[^\]]*\]/g, ' ');
+  body = body.replace(/\s+/g, ' ').trim();
+  return body;
+}
 // ─── PostgreSQL Pool ──────────────────────────────────────────────────────────
 let pool = null;
 function getPool() {
-  if (!pool) {
+  if (!pool || pool.ended) {
     pool = new Pool(PG);
     pool.on('error', err => log('error', 'PG pool error', err.message));
   }
@@ -156,35 +166,94 @@ async function pgQuery(sql, params = [], timeoutMs = 8000) {
 }
 // ─── Semantic search over memos ──────────────────────────────────────────────
-async function semanticSearch(query, { topK = 10, minScore = 0.5 } = {}) {
-  const embed = await getEmbedding(query);
-  // memos stores content in `content` column; we search raw text similarity
-  // For v0.1 we do a lightweight approximate: pull recent memos and rank by
-  // keyword overlap + trust that the shared OpenClaw embedding model handles semantics.
-  // A full vector index (pgvector) can be added in v0.2.
-  const res = await pgQuery(
+async function semanticSearch(query, { topK = 10, minScore = 0.2 } = {}) {
+  // 混合检索模式：关键词召回 + 向量召回，双路融合提升准确率
+  const q = String(query || '').trim();
+  if (!q) return { ok: true, results: [] };
+  // 1. 关键词召回分支
+  const chars = Array.from(new Set(q.toLowerCase().split('').filter(c => c.trim().length > 0 && /[\u4e00-\u9fa5a-z0-9]/i.test(c)))).slice(0, 16);
+  const terms = Array.from(new Set(q.toLowerCase().split(/[^\p{L}\p{N}]+/u).filter(Boolean))).slice(0, 8);
+  const allTerms = Array.from(new Set([...chars, ...terms]));
+  const likeParams = [];
+  const clauses = [];
+  for (const t of terms) {
+    likeParams.push(`%${t}%`);
+    clauses.push(`LOWER(content) LIKE $${likeParams.length}`);
+  }
+  for (const c of chars) {
+    likeParams.push(`%${c}%`);
+    clauses.push(`LOWER(content) LIKE $${likeParams.length}`);
+  }
+  const whereLike = clauses.length ? `AND (${clauses.join(' OR ')})` : '';
+  const keywordRes = await pgQuery(
     `SELECT id, creator_id, content, payload, created_ts, updated_ts
      FROM memo
      WHERE visibility = 'PRIVATE' AND LENGTH(content) > 20
+       ${whereLike}
      ORDER BY updated_ts DESC
-     LIMIT 40`
+     LIMIT 30`,
+    likeParams
   );
-  if (!res.ok) return { ok: false, error: res.error };
-  // Score each memo by cosine similarity of query embedding vs memo text embedding
-  const scored = [];
-  for (const row of res.rows) {
-    try {
-      const rowEmbed = await getEmbedding(row.content.slice(0, 2000));
-      const score = cosineSim(embed, rowEmbed);
-      if (score >= minScore) {
-        scored.push({ ...row, score: parseFloat(score.toFixed(4)) });
-      }
-    } catch {
-      // skip on embed failure
+  // 2. 向量召回分支（如果有embedding字段存在）
+  const embedRes = { rows: [] };
+  try {
+    const queryEmbedding = await getEmbedding(q);
+    const vecRes = await pgQuery(
+      `SELECT id, creator_id, content, payload, created_ts, updated_ts, 1 - (embedding <=> $1::vector) as score
+       FROM memo
+       WHERE visibility = 'PRIVATE' AND LENGTH(content) > 20
+         AND embedding IS NOT NULL
+       ORDER BY embedding <=> $1::vector
+       LIMIT 30`,
+      [JSON.stringify(queryEmbedding)]
+    );
+    if (vecRes.ok) embedRes.rows = vecRes.rows;
+  } catch {}
+  // 3. 结果去重合并
+  const merged = new Map();
+  // 关键词结果加权
+  keywordRes.rows?.forEach(row => {
+    if (!merged.has(row.id)) {
+      let score = 0;
+      const content = String(row.content || '').toLowerCase();
+      let hits = 0;
+      for (const t of terms) if (content.includes(t)) hits += 2;
+      for (const c of chars) if (content.includes(c)) hits += 0.5;
+      const totalPossible = terms.length * 2 + chars.length * 0.5;
+      const ratio = totalPossible > 0 ? hits / totalPossible : 0;
+      const recencyBoost = row.updated_ts ? 0.1 : 0;
+      score = parseFloat((ratio + recencyBoost).toFixed(4));
+      merged.set(row.id, { ...row, score, source: 'keyword' });
     }
-  }
-  scored.sort((a, b) => b.score - a.score);
+  });
+  // 向量结果加权
+  embedRes.rows?.forEach(row => {
+    if (!merged.has(row.id)) {
+      merged.set(row.id, { ...row, score: parseFloat((row.score || 0).toFixed(4)), source: 'vector' });
+    } else {
+      // 双命中加权
+      const existing = merged.get(row.id);
+      existing.score = parseFloat((Math.max(existing.score, row.score) * 1.2).toFixed(4));
+      existing.source = 'hybrid';
+      merged.set(row.id, existing);
+    }
+  });
+  // 4. 排序取topK
+  const noisyContent = (content) => {
+    const s = String(content || '');
+    return /\{\"jsonrpc\":\"2\.0\"|layer2_answer:start|STDOUT\+STDERR|Internal task completion event|source: subagent|Stats: runtime|Action:/i.test(s);
+  };
+  const scored = Array.from(merged.values())
+    .filter(row => row.score >= Math.min(minScore, 0.1))
+    .filter(row => !noisyContent(row.content))
+    .sort((a, b) => b.score - a.score || (b.updated_ts || 0) - (a.updated_ts || 0));
   return { ok: true, results: scored.slice(0, topK) };
 }
@@ -484,42 +553,103 @@ const TOOLS = {
       }
       case 'layer2_answer': {
+        log('info', 'layer2_answer:start', JSON.stringify(args || {}));
         const { query, topK = 5 } = args || {};
         if (!query) return '❌ query is required';
-        const sem = await semanticSearch(query, { topK, minScore: 0.45 });
-        const evidence = sem.ok ? sem.results.slice(0, topK) : [];
-        const recall = await hindsight.recall(query, { topK: 6 });
-        const h = await hindsight.healthcheck();
-        let reflect = null;
-        const fastPath = /为什么喜欢|偏好|原因/.test(query) && /gotti|leah/i.test(query);
-        if (h.ok && !fastPath) {
-          reflect = await hindsight.reflect(query);
-        }
-        const recallMemories = Array.isArray(recall?.data?.results) ? recall.data.results : [];
+        // Stage 1: semantic search over memos (candidates)
+        log('info', 'layer2_answer:semantic_search');
+        const sem = await semanticSearch(query, { topK: 15, minScore: 0.35 });
+        const rawMemos = sem.ok ? sem.results : [];
+        const qTerms = String(query || '').toLowerCase().split(/[^\p{L}\p{N}]+/u).filter(Boolean);
+        // Stage 2: Hindsight recall (candidates)
+        log('info', 'layer2_answer:hindsight_recall');
+        let recallMemories = [];
+        let hindsightUsed = false;
+        try {
+          const h = await new Promise(resolve => {
+            const _lib = require('http');
+            const _req = _lib.request({
+              hostname: '127.0.0.1', port: 8888,
+              path: '/health', method: 'GET'
+            }, _res => {
+              let _d = ''; _res.on('data', c => _d += c);
+              _res.on('end', () => resolve({ ok: _res.statusCode >= 200 && _res.statusCode < 300 }));
+            });
+            _req.on('timeout', () => { _req.destroy(); resolve({ ok: false }); });
+            _req.on('error', () => resolve({ ok: false }));
+            _req.setTimeout(2000);
+            _req.end();
+            setTimeout(() => resolve({ ok: false }), 2500);
+          });
+          if (h?.ok) {
+            hindsightUsed = true;
+            const recall = await Promise.race([
+              hindsight.recall(query, { topK: 6 }),
+              new Promise(resolve => setTimeout(() => resolve({ ok: false }), 8000))
+            ]);
+            recallMemories = Array.isArray(recall?.data?.results) ? recall.data.results : [];
+          }
+        } catch (_) {}
+        // Stage 3: The Judge (memos-as-judge)
+        // Combine and filter based on strict term matching if score is low
         const facts = [];
-        for (const item of evidence.slice(0, 3)) {
-          facts.push(`- [score=${item.score}] ${String(item.content).slice(0, 180)}`);
+        const seenTexts = new Set();
+        // 1. Prioritize Hindsight results but verify them against query terms
+        const filteredRecall = recallMemories.filter(item => {
+          const t = String(item.text || '').toLowerCase();
+          if (!t.trim() || /pg 版 memos API 写入测试|发送了一条消息|没有完成|^这是一条 /.test(t)) return false;
+          // If we have query terms, check if the recall matches at least one (looser judge for hindsight)
+          return qTerms.length === 0 || qTerms.some(term => t.includes(term));
+        });
+        for (const item of filteredRecall.slice(0, 3)) {
+          const text = extractDisplayText(item.text || '').slice(0, 250);
+          if (text && !seenTexts.has(text)) {
+            facts.push(`- [recall] ${text}`);
+            seenTexts.add(text);
+          }
         }
-        for (const item of recallMemories.slice(0, 3)) {
-          facts.push(`- [recall] ${String(item.text || '').slice(0, 180)}`);
+        // 2. Add high-quality semantic hits as supporting evidence
+        const verifiedMemos = rawMemos.filter(item => {
+          const content = extractDisplayText(item.content || '').toLowerCase();
+          if (!content || seenTexts.has(content)) return false;
+          // High confidence threshold
+          if (item.score >= 0.85) return true;
+          // Medium confidence + strict term match
+          return item.score >= 0.45 && qTerms.every(term => content.includes(term));
+        });
+        for (const item of verifiedMemos.slice(0, Math.max(0, 5 - facts.length))) {
+          const content = extractDisplayText(item.content || '').slice(0, 250);
+          facts.push(`- [evidence] ${content}`);
+          seenTexts.add(content);
+        }
+        // Stage 4: Synthesis
+        log('info', 'layer2_answer:hindsight_reflect');
+        let reflect = null;
+        if (hindsightUsed && facts.length > 0) {
+          reflect = await Promise.race([
+            hindsight.reflect(query),
+            new Promise(resolve => setTimeout(() => resolve(null), 12000))
+          ]).catch(() => null);
         }
-        const joined = evidence.map(x => String(x.content || '')).join('\n');
-        const recallJoined = recallMemories.map(x => String(x.text || '')).join('\n');
-        const combined = `${joined}\n${recallJoined}\n${query}`;
-        const reasonLocked = /gotti|leah/i.test(combined) && /会摇|很会摇|摇起来|摇得/.test(combined);
-        let judgment = '未形成稳定归纳';
-        if (reasonLocked) {
-          judgment = '从已记录证据看，你喜欢 Gotti 的核心原因就是：她会摇。这是当前证据里最明确、最稳定的偏好线索。';
-        } else if (reflect?.ok) {
+        let judgment = facts.length > 0 ? '已找到相关证据，先按证据回答。' : '未查到足够证据';
+        if (reflect?.ok) {
           judgment = typeof reflect.data === 'string'
-            ? reflect.data.slice(0, 600)
-            : String(reflect.data?.text || JSON.stringify(reflect.data)).slice(0, 600);
-        } else if (evidence.length || recallMemories.length) {
-          judgment = '已找到相关证据，但当前 Hindsight 未稳定收口；先按证据做保守归纳。';
-        } else {
-          judgment = '未查到足够证据';
+            ? reflect.data.slice(0, 800)
+            : String(reflect.data?.text || JSON.stringify(reflect.data)).slice(0, 800);
         }
-        return `已确认事实：\n${facts.length ? facts.join('\n') : '- 无'}\n\n归纳判断：\n- ${judgment}\n\n不确定点：\n- ${reasonLocked ? '当前答案已被证据优先规则锁定；若底层记忆变更需重新验证' : (reflect?.ok ? 'Hindsight 已参与归纳，但仍应以证据为准' : 'Hindsight 未接通，当前仅基于 memos semantic evidence')}`;
+        log('info', 'layer2_answer:return', `facts=${facts.length}`);
+        return `已确认事实：\n${facts.length ? facts.join('\n') : '- 无'}\n\n归纳判断：\n- ${judgment}\n\n不确定点：\n- ${hindsightUsed ? 'Hindsight 已作为增强层参与' : 'Hindsight 离线，仅使用本地证据'}\n\n[PRO-TIP] 证据召回由 memos + Hindsight 双路裁决：memos 负责硬核实锤（实体对齐），Hindsight 负责语义联想。`;
       }
       case 'layer2_version': {
@@ -587,15 +717,17 @@ async function handleLine(line) {
 async function main() {
   process.stdin.setEncoding('utf8');
   let buffer = '';
-  process.stdin.on('data', chunk => {
+  process.stdin.on('data', async chunk => {
     buffer += chunk;
     let idx;
     while ((idx = buffer.indexOf('\n')) >= 0) {
       const line = buffer.slice(0, idx);
       buffer = buffer.slice(idx + 1);
-      handleLine(line).catch(e => {
+      try {
+        await handleLine(line);
+      } catch (e) {
         process.stderr.write(JSON.stringify({ jsonrpc: '2.0', id: null, error: { message: e.message } }) + '\n');
-      });
+      }
     }
   });

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@bsbofmusic/openclaw-memory-layer2",
-  "version": "0.2.0",
-  "description": "Layer2 Memory MCP Server — Hindsight + memos unified validation over memos PostgreSQL, reusing OpenClaw memorySearch embedding config",
+  "version": "0.2.1",
+  "description": "Layer2 Memory MCP Server — Hindsight + memos unified validation over memos PostgreSQL, reusing OpenClaw memorySearch embedding config",
   "main": "index.js",
   "bin": {
     "openclaw-memory-layer2": "./index.js"