npm - @yeaft/webchat-agent - Versions diffs - 0.1.408 → 0.1.410 - Mend

@yeaft/webchat-agent 0.1.408 → 0.1.410

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/package.json +1 -1
package/unify/cli.js +214 -16
package/unify/config.js +13 -0
package/unify/conversation/persist.js +436 -0
package/unify/conversation/search.js +65 -0
package/unify/engine.js +210 -18
package/unify/index.js +18 -0
package/unify/mcp.js +433 -0
package/unify/memory/consolidate.js +187 -0
package/unify/memory/dream-prompt.js +272 -0
package/unify/memory/dream.js +468 -0
package/unify/memory/extract.js +97 -0
package/unify/memory/recall.js +243 -0
package/unify/memory/scan.js +273 -0
package/unify/memory/store.js +507 -0
package/unify/memory/types.js +139 -0
package/unify/prompts.js +51 -3
package/unify/skills.js +315 -0
package/unify/stop-hooks.js +146 -0
package/unify/tools/enter-worktree.js +97 -0
package/unify/tools/exit-worktree.js +131 -0
package/unify/tools/mcp-tools.js +133 -0
package/unify/tools/registry.js +146 -0
package/unify/tools/skill.js +107 -0
package/unify/tools/types.js +71 -0

package/unify/memory/recall.js ADDED Viewed

@@ -0,0 +1,243 @@
+/**
+ * recall.js — 3-step memory recall with fingerprint cache
+ *
+ * Recall flow (per design doc):
+ *   Step 1: Keyword extraction (pure rules, <1ms)
+ *   Step 2: Scope + Tags filter (read scopes.md, <5ms) → top 15 candidates
+ *   Step 3: LLM select (side-query via adapter.call) → ≤7 most relevant
+ *
+ * Fingerprint cache:
+ *   fingerprint = hash(scope, top 5 keywords, task_id)
+ *   Same fingerprint → skip recall, reuse last result
+ *
+ * Reference: yeaft-unify-core-systems.md §3.2, yeaft-unify-design.md §5.1
+ */
+import { createHash } from 'crypto';
+// ─── Constants ──────────────────────────────────────────────────
+/** Max entries returned by recall. */
+const MAX_RECALL_RESULTS = 7;
+/** Max candidates passed to LLM select (Step 2 → Step 3). */
+const MAX_CANDIDATES = 15;
+// ─── Step 1: Keyword Extraction (pure rules, <1ms) ──────────────
+/** Common stop words to filter out. */
+const STOP_WORDS = new Set([
+  'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+  'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
+  'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
+  'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
+  'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
+  'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when',
+  'where', 'why', 'how', 'all', 'both', 'each', 'few', 'more', 'most',
+  'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same',
+  'so', 'than', 'too', 'very', 'just', 'because', 'but', 'and', 'or',
+  'if', 'while', 'about', 'up', 'it', 'its', 'my', 'me', 'i', 'you',
+  'your', 'we', 'our', 'they', 'them', 'their', 'this', 'that', 'what',
+  'which', 'who', 'whom', 'these', 'those',
+  // Chinese stop words
+  '的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都',
+  '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会',
+  '着', '没有', '看', '好', '自己', '这', '他', '她', '吗', '呢', '吧',
+  '把', '被', '那', '它', '让', '给', '可以', '什么', '怎么', '帮',
+  '帮我', '请', '能', '想',
+]);
+/**
+ * Extract keywords from a prompt (pure rules, no LLM).
+ *
+ * @param {string} prompt
+ * @returns {string[]} — keywords sorted by relevance (simple freq)
+ */
+export function extractKeywords(prompt) {
+  if (!prompt || !prompt.trim()) return [];
+  // Tokenize: split on whitespace and punctuation (keep CJK chars)
+  const tokens = prompt
+    .toLowerCase()
+    .replace(/[^\w\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]+/g, ' ')
+    .split(/\s+/)
+    .filter(t => t.length > 1 && !STOP_WORDS.has(t));
+  // Count frequencies
+  const freq = new Map();
+  for (const t of tokens) {
+    freq.set(t, (freq.get(t) || 0) + 1);
+  }
+  // Sort by frequency descending, then alphabetically
+  return [...freq.entries()]
+    .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
+    .map(([word]) => word);
+}
+// ─── Fingerprint Cache ──────────────────────────────────────────
+/**
+ * Compute a recall fingerprint for cache checking.
+ *
+ * @param {{ scope?: string, keywords: string[], taskId?: string }} params
+ * @returns {string} — hex hash
+ */
+export function computeFingerprint({ scope = '', keywords, taskId = '' }) {
+  const top5 = keywords.slice(0, 5).join(',');
+  const input = `${scope}|${top5}|${taskId}`;
+  return createHash('sha256').update(input).digest('hex').slice(0, 16);
+}
+// ─── Step 2: Scope + Tags Filter ────────────────────────────────
+/**
+ * Filter entries by scope and tags (in-memory, no LLM).
+ * Uses MemoryStore.findByFilter internally.
+ *
+ * @param {import('./store.js').MemoryStore} memoryStore
+ * @param {{ scope?: string, keywords: string[] }} params
+ * @returns {object[]} — top MAX_CANDIDATES entries
+ */
+function filterCandidates(memoryStore, { scope, keywords }) {
+  return memoryStore.findByFilter({
+    scope,
+    tags: keywords,
+    limit: MAX_CANDIDATES,
+  });
+}
+// ─── Step 3: LLM Select ────────────────────────────────────────
+/**
+ * Use LLM side-query to select the most relevant entries.
+ *
+ * @param {object} adapter — LLM adapter with .call() method
+ * @param {object} config — { model }
+ * @param {string} prompt — user's prompt
+ * @param {object[]} candidates — entries with frontmatter
+ * @returns {Promise<string[]>} — selected entry names
+ */
+async function llmSelect(adapter, config, prompt, candidates) {
+  if (candidates.length <= MAX_RECALL_RESULTS) {
+    // No need to filter if already under limit
+    return candidates.map(c => c.name);
+  }
+  const candidateList = candidates.map((c, i) =>
+    `${i + 1}. [${c.name}] kind=${c.kind}, scope=${c.scope}, tags=[${(c.tags || []).join(', ')}]`
+  ).join('\n');
+  const system = `You are a memory retrieval assistant. Given a user's prompt and a list of memory entries, select the most relevant ones (up to ${MAX_RECALL_RESULTS}).
+Return ONLY a JSON array of entry names, like: ["entry-name-1", "entry-name-2"]
+No explanation, just the JSON array.`;
+  const messages = [{
+    role: 'user',
+    content: `User prompt: "${prompt}"
+Memory entries:
+${candidateList}
+Select the ${MAX_RECALL_RESULTS} most relevant entries. Return a JSON array of entry names.`,
+  }];
+  try {
+    const result = await adapter.call({
+      model: config.model,
+      system,
+      messages,
+      maxTokens: 512,
+    });
+    // Parse the JSON array from the response
+    const text = result.text.trim();
+    const jsonMatch = text.match(/\[[\s\S]*\]/);
+    if (jsonMatch) {
+      const names = JSON.parse(jsonMatch[0]);
+      return names.filter(n => typeof n === 'string');
+    }
+  } catch {
+    // Fallback: return all candidates if LLM fails
+  }
+  return candidates.slice(0, MAX_RECALL_RESULTS).map(c => c.name);
+}
+// ─── Main Recall Function ───────────────────────────────────────
+/** @type {Map<string, { entries: object[], timestamp: number }>} */
+const _cache = new Map();
+/** Cache TTL — 5 minutes. */
+const CACHE_TTL = 5 * 60 * 1000;
+/**
+ * Recall relevant memory entries for a given prompt.
+ *
+ * 3-step process:
+ *   1. Extract keywords (rules, <1ms)
+ *   2. Scope + Tags filter → top 15 candidates
+ *   3. LLM select → ≤7 entries (skipped if ≤7 candidates)
+ *
+ * Uses fingerprint cache to skip repeat recalls.
+ *
+ * @param {{ prompt: string, adapter: object, config: object, memoryStore: import('./store.js').MemoryStore, scope?: string, taskId?: string }} params
+ * @returns {Promise<{ entries: object[], keywords: string[], fingerprint: string, cached: boolean }>}
+ */
+export async function recall({ prompt, adapter, config, memoryStore, scope, taskId }) {
+  // Step 1: Extract keywords
+  const keywords = extractKeywords(prompt);
+  if (keywords.length === 0) {
+    return { entries: [], keywords: [], fingerprint: '', cached: false };
+  }
+  // Check fingerprint cache
+  const fingerprint = computeFingerprint({ scope, keywords, taskId });
+  const cached = _cache.get(fingerprint);
+  if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
+    return { entries: cached.entries, keywords, fingerprint, cached: true };
+  }
+  // Step 2: Scope + Tags filter
+  const candidates = filterCandidates(memoryStore, { scope, keywords });
+  if (candidates.length === 0) {
+    _cache.set(fingerprint, { entries: [], timestamp: Date.now() });
+    return { entries: [], keywords, fingerprint, cached: false };
+  }
+  // Step 3: LLM select (only if > MAX_RECALL_RESULTS candidates)
+  let selectedNames;
+  if (candidates.length <= MAX_RECALL_RESULTS) {
+    selectedNames = candidates.map(c => c.name);
+  } else {
+    selectedNames = await llmSelect(adapter, config, prompt, candidates);
+  }
+  // Load full entries for selected names
+  const entries = [];
+  for (const name of selectedNames) {
+    const slug = name.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff-]+/g, '-').replace(/^-+|-+$/g, '');
+    const entry = memoryStore.readEntry(slug) || memoryStore.readEntry(name);
+    if (entry) {
+      entries.push(entry);
+      // Bump frequency
+      memoryStore.bumpFrequency(slug || name);
+    }
+  }
+  // Update cache
+  _cache.set(fingerprint, { entries, timestamp: Date.now() });
+  return { entries, keywords, fingerprint, cached: false };
+}
+/**
+ * Clear the recall cache. Useful for testing.
+ */
+export function clearRecallCache() {
+  _cache.clear();
+}

package/unify/memory/scan.js ADDED Viewed

@@ -0,0 +1,273 @@
+/**
+ * scan.js — Memory header scanning and scope/tag matching
+ *
+ * Fast in-memory scanning of entry frontmatter for:
+ * - Scope tree traversal
+ * - Tag overlap scoring
+ * - Kind-based filtering
+ * - Stale entry detection (for Dream)
+ *
+ * Reference: yeaft-unify-core-systems.md §3.3, yeaft-unify-design.md §5.1
+ */
+import { KINDS, KIND_PRIORITY, IMPORTANCE_WEIGHT, getAncestorScopes } from './types.js';
+// ─── Scan Results ──────────────────────────────────────────
+/**
+ * @typedef {Object} ScanResult
+ * @property {object[]} entries — all parsed entries
+ * @property {Map<string, number>} scopeCount — scope → entry count
+ * @property {Map<string, number>} kindCount — kind → entry count
+ * @property {Map<string, Set<string>>} tagIndex — tag → set of entry names
+ * @property {number} totalEntries — total count
+ */
+/**
+ * Scan all entries from a MemoryStore and build indexes.
+ *
+ * @param {import('./store.js').MemoryStore} memoryStore
+ * @returns {ScanResult}
+ */
+export function scanEntries(memoryStore) {
+  const entries = memoryStore.listEntries();
+  const scopeCount = new Map();
+  const kindCount = new Map();
+  const tagIndex = new Map();
+  for (const entry of entries) {
+    // Scope count
+    const scope = entry.scope || 'global';
+    scopeCount.set(scope, (scopeCount.get(scope) || 0) + 1);
+    // Kind count
+    const kind = entry.kind || 'fact';
+    kindCount.set(kind, (kindCount.get(kind) || 0) + 1);
+    // Tag index
+    const tags = entry.tags || [];
+    for (const tag of tags) {
+      const lowerTag = tag.toLowerCase();
+      if (!tagIndex.has(lowerTag)) tagIndex.set(lowerTag, new Set());
+      tagIndex.get(lowerTag).add(entry.name);
+    }
+  }
+  return {
+    entries,
+    scopeCount,
+    kindCount,
+    tagIndex,
+    totalEntries: entries.length,
+  };
+}
+// ─── Scoring Functions ─────────────────────────────────────
+/**
+ * Score an entry for relevance to a query context.
+ *
+ * Scoring factors:
+ *   - Scope match: exact=5, parent/child=3, global=1
+ *   - Tag overlap: 2 per matching tag
+ *   - Kind priority: see KIND_PRIORITY
+ *   - Importance weight: see IMPORTANCE_WEIGHT
+ *   - Frequency bonus: log2(frequency)
+ *   - Recency bonus: entries updated in last 7 days get +2
+ *
+ * @param {object} entry — memory entry
+ * @param {{ scope?: string, tags?: string[], preferKinds?: string[] }} context
+ * @returns {number} — relevance score
+ */
+export function scoreEntry(entry, context = {}) {
+  let score = 0;
+  // Scope match
+  if (context.scope && entry.scope) {
+    if (entry.scope === context.scope) {
+      score += 5; // exact match
+    } else {
+      const ancestors = getAncestorScopes(context.scope);
+      if (ancestors.includes(entry.scope)) {
+        score += 3; // ancestor match
+      } else if (entry.scope.startsWith(context.scope + '/')) {
+        score += 3; // descendant match
+      } else if (entry.scope === 'global') {
+        score += 1; // global fallback
+      }
+    }
+  }
+  // Tag overlap
+  if (context.tags && context.tags.length > 0 && entry.tags) {
+    const entryTags = new Set(entry.tags.map(t => t.toLowerCase()));
+    for (const tag of context.tags) {
+      if (entryTags.has(tag.toLowerCase())) {
+        score += 2;
+      }
+    }
+  }
+  // Kind priority
+  const kindPriority = KIND_PRIORITY[entry.kind] || 0;
+  score += kindPriority * 0.5;
+  // Preferred kinds bonus
+  if (context.preferKinds && context.preferKinds.includes(entry.kind)) {
+    score += 2;
+  }
+  // Importance weight
+  const impWeight = IMPORTANCE_WEIGHT[entry.importance] || IMPORTANCE_WEIGHT.normal;
+  score += impWeight * 0.5;
+  // Frequency bonus (logarithmic)
+  const freq = entry.frequency || 1;
+  score += Math.log2(Math.max(freq, 1));
+  // Recency bonus
+  if (entry.updated_at) {
+    const daysSince = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60 * 24);
+    if (daysSince <= 7) score += 2;
+    else if (daysSince <= 30) score += 1;
+  }
+  return score;
+}
+// ─── Stale Detection (for Dream) ────────────────────────────
+/**
+ * Find entries that are potentially stale.
+ *
+ * Stale criteria:
+ * - context entries older than 30 days
+ * - entries never recalled (frequency = 1) and older than 60 days
+ * - relation entries older than 90 days
+ *
+ * @param {object[]} entries
+ * @returns {object[]} — stale entries
+ */
+export function findStaleEntries(entries) {
+  const now = Date.now();
+  const stale = [];
+  for (const entry of entries) {
+    const updatedAt = entry.updated_at ? new Date(entry.updated_at).getTime() : 0;
+    const daysSince = (now - updatedAt) / (1000 * 60 * 60 * 24);
+    let isStale = false;
+    // Context entries become stale fast
+    if (entry.kind === 'context' && daysSince > 30) {
+      isStale = true;
+    }
+    // Entries never recalled and old
+    if ((entry.frequency || 1) <= 1 && daysSince > 60) {
+      isStale = true;
+    }
+    // Relations are volatile
+    if (entry.kind === 'relation' && daysSince > 90) {
+      isStale = true;
+    }
+    if (isStale) {
+      stale.push({ ...entry, _daysSinceUpdate: Math.round(daysSince) });
+    }
+  }
+  return stale;
+}
+// ─── Duplicate Detection (for Dream Merge) ──────────────────
+/**
+ * Find groups of entries that are potentially duplicates.
+ * Entries are grouped if they share ≥2 tags AND the same kind.
+ *
+ * @param {object[]} entries
+ * @returns {object[][]} — groups of potentially duplicate entries
+ */
+export function findDuplicateGroups(entries) {
+  const groups = [];
+  const visited = new Set();
+  for (let i = 0; i < entries.length; i++) {
+    if (visited.has(i)) continue;
+    const group = [entries[i]];
+    const eTags = new Set((entries[i].tags || []).map(t => t.toLowerCase()));
+    for (let j = i + 1; j < entries.length; j++) {
+      if (visited.has(j)) continue;
+      if (entries[i].kind !== entries[j].kind) continue;
+      const jTags = new Set((entries[j].tags || []).map(t => t.toLowerCase()));
+      let overlap = 0;
+      for (const tag of eTags) {
+        if (jTags.has(tag)) overlap++;
+      }
+      if (overlap >= 2) {
+        group.push(entries[j]);
+        visited.add(j);
+      }
+    }
+    if (group.length > 1) {
+      visited.add(i);
+      groups.push(group);
+    }
+  }
+  return groups;
+}
+// ─── Stats Summary ──────────────────────────────────────────
+/**
+ * Generate a text summary of memory state (for Dream prompts).
+ *
+ * @param {ScanResult} scan
+ * @returns {string}
+ */
+export function summarizeScan(scan) {
+  const lines = [];
+  lines.push(`Total entries: ${scan.totalEntries}`);
+  // Kind breakdown
+  const kindLines = [];
+  for (const kind of KINDS) {
+    const count = scan.kindCount.get(kind) || 0;
+    if (count > 0) kindLines.push(`${kind}: ${count}`);
+  }
+  if (kindLines.length > 0) {
+    lines.push(`Kinds: ${kindLines.join(', ')}`);
+  }
+  // Scope breakdown (top 10)
+  const scopeEntries = [...scan.scopeCount.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 10);
+  if (scopeEntries.length > 0) {
+    lines.push('Top scopes:');
+    for (const [scope, count] of scopeEntries) {
+      lines.push(`  ${scope}: ${count}`);
+    }
+  }
+  // Tag cloud (top 20)
+  const tagEntries = [...scan.tagIndex.entries()]
+    .map(([tag, names]) => [tag, names.size])
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 20);
+  if (tagEntries.length > 0) {
+    lines.push(`Top tags: ${tagEntries.map(([t, c]) => `${t}(${c})`).join(', ')}`);
+  }
+  return lines.join('\n');
+}