npm - @stitchdb/cli - Versions diffs - 0.10.0 → 0.11.0 - Mend

@stitchdb/cli 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli.js +118 -14
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -629,13 +629,28 @@ async function cmdHook(args) {
             const stitch = client(cfg);
             const projectTag = (threadName.split('/')[0] || threadName).toLowerCase();
             const baseUrl = cfg.baseUrl || 'https://db.stitchdb.com';
-            const [thread, memHits, workspaces, fileSummaries, aboutMems] = await Promise.all([
+            // Adaptive sizing — scale what we inject by how much the project
+            // actually has. A 5k-memory pool surfacing 8 random memories is
+            // noise; better to inject 0 and tell Claude to `recall` on demand.
+            const [thread, allProjectMems, workspaces, fileSummaries, aboutMems] = await Promise.all([
                 stitch.thread(threadName).recall({ last: 5 }).catch(() => ({ thread_id: '', recent: [], semantic: [] })),
-                stitch.recall(projectTag, { k: 8 }).catch(() => []),
+                stitch.list({ limit: 1 }).catch(() => []), // probe the pool size cheaply
                 stitch.workspaces.list().catch(() => []),
                 stitch.list({ limit: 12 }).then((all) => all.filter((m) => m.tags.some((t) => t.startsWith('file:')))).catch(() => []),
                 stitch.list({ tag: 'workspace:about', limit: 1 }).catch(() => []),
             ]);
+            // Estimate the workspace's total memory count from a single page
+            // header. We just need to know "small / medium / huge" — exact
+            // count is overkill. (D1 doesn't expose count in list — approximate
+            // by listing 200 once and capping.)
+            const sample = await stitch.list({ limit: 200 }).catch(() => []);
+            const poolSize = Array.isArray(sample) ? sample.length : 0;
+            // Tier the injection: tiny / small / medium / huge.
+            const memSliceK = poolSize <= 50 ? 8 : poolSize < 500 ? 8 : 0;
+            const turnSliceK = (thread.recent?.length ?? 0) < 5 ? 5 : (thread.recent?.length ?? 0) < 100 ? 5 : 2;
+            const memHits = memSliceK > 0
+                ? await stitch.recall(projectTag, { k: memSliceK }).catch(() => [])
+                : [];
             // Look up the current workspace using the client's resolved id; fall
             // back to first if resolveWorkspace failed (shouldn't normally).
             const currentWsId = await stitch.resolveWorkspace().catch(() => null);
@@ -655,8 +670,15 @@ async function cmdHook(args) {
             catch { /* ignore */ }
             const lines = [];
             lines.push('<stitch-context>');
-            lines.push(`Project: ${threadName} · Workspace: ${currentWs?.name || '(unknown)'} · Stitch MCP tools: recall, remember, recall_global, remember_global, thread_recall, thread_append, workspace_setup, file_summary, file_summary_save.`);
+            lines.push(`Project: ${threadName} · Workspace: ${currentWs?.name || '(unknown)'} · ${poolSize} memories indexed · Stitch MCP tools: recall, remember, recall_global, remember_global, thread_recall, thread_append, workspace_setup, file_summary, file_summary_save.`);
             lines.push('');
+            // Huge-pool mode: don't pre-load any memories. Tell Claude what's
+            // available and that it should pull what's relevant on demand.
+            if (poolSize >= 200 && memHits.length === 0) {
+                lines.push('### Memory pool');
+                lines.push(`This project has ${poolSize}+ memories. Don't try to load them all — call \`recall(query)\` whenever the user references prior decisions, code patterns, or preferences. Cheaper and more relevant than reading them upfront.`);
+                lines.push('');
+            }
             // Nudge the AI to set a meaningful workspace name once.
             if (currentWs?.name === 'default') {
                 lines.push('### ⚠ Workspace is still named "default"');
@@ -705,13 +727,17 @@ async function cmdHook(args) {
             }
             if (thread.recent && thread.recent.length > 0) {
                 lines.push('### Most recent turns (continue from here)');
-                for (const t of thread.recent.slice(-5)) {
+                for (const t of thread.recent.slice(-turnSliceK)) {
                     const txt = String(t.content || '').replace(/\n+/g, ' ').slice(0, 300);
                     lines.push(`- **${t.role}**: ${txt}`);
                 }
                 lines.push('');
             }
-            lines.push('Call `recall` for project memory, `recall_global` for cross-project user prefs, `thread_recall` for older turns, `file_summary` BEFORE reading any non-trivial file. Save user-level habits/preferences with `remember_global`; project facts with `remember`.');
+            lines.push('## How to use this memory layer');
+            lines.push('- The user mentions prior work, decisions, or "what we discussed" → call `recall(query)` (or `thread_recall(thread, semantic: ...)` for verbatim turns) BEFORE answering. Don\'t guess from the snippets above; they\'re a teaser, not the truth.');
+            lines.push('- About to open a non-trivial file → call `file_summary(path)` first. If hash matches the cached summary, you can skip the full read entirely.');
+            lines.push('- Learn a durable user-level habit ("I always use Postgres", "commit before reverting") → save with `remember_global` so it surfaces across every project. Project-specific facts → `remember`.');
+            lines.push('- A surfaced memory has `_(from thread, N turns)_` next to it → those turns are queryable via `thread_recall`. Reach for them when challenged on a fact.');
             lines.push('</stitch-context>');
             process.stdout.write(lines.join('\n'));
         }
@@ -735,19 +761,97 @@ async function cmdHook(args) {
     content = content.trim();
     if (!content)
         return;
+    // For UserPromptSubmit: log the turn AND fetch task-relevant memory in
+    // parallel, so the model sees only what's actually relevant to *this*
+    // prompt — not 8 random preloaded memories from session start. Task
+    // changes self-surface because the recall hits change with the prompt.
+    if (eventName === 'UserPromptSubmit') {
+        await Promise.all([
+            (async () => {
+                try {
+                    const stitch = client(cfg);
+                    await stitch.thread(threadName).append({ role: role, content });
+                }
+                catch { /* silent */ }
+            })(),
+            handleUserPromptInjection(cfg, content),
+        ]);
+        return;
+    }
+    // Stop: log the assistant turn, then maybe-distill.
     try {
         const stitch = client(cfg);
-        await stitch.thread(threadName).append({ role, content });
-    }
-    catch {
-        /* silent */
+        await stitch.thread(threadName).append({ role: role, content });
     }
-    // After Stop, opportunistically kick off a distillation pass in the
-    // background (fire-and-forget). Won't block the session; debouncing
-    // (cooldown + min-new-turns) is enforced inside maybeAutoDistill.
-    if (eventName === 'Stop') {
-        maybeAutoDistill(threadName).catch(() => { });
+    catch { /* silent */ }
+    maybeAutoDistill(threadName).catch(() => { });
+}
+/**
+ * Per-prompt smart context injection. Runs `recall(prompt)` against the
+ * project workspace + the user's `_global` workspace, score-gates the hits,
+ * and writes a Claude-Code-flavoured `additionalContext` JSON object to
+ * stdout. Time-bounded so a slow proxy doesn't delay the user's prompt.
+ *
+ * Why per-prompt: a static SessionStart block must guess what'll matter
+ * across the whole session. A per-prompt recall sees the actual question
+ * and pulls memory tailored to *that* — much higher signal per token.
+ * A task switch ("now let's do the dashboard") naturally surfaces the
+ * dashboard memories; an unrelated follow-up ("ok run the tests") returns
+ * low scores and we inject nothing — no noise.
+ */
+const PROMPT_INJECTION_TIMEOUT_MS = 2000;
+const PROMPT_INJECTION_MIN_SCORE = 0.4;
+async function handleUserPromptInjection(cfg, prompt) {
+    if (!prompt || prompt.length < 10)
+        return;
+    try {
+        const baseUrl = cfg.baseUrl || 'https://db.stitchdb.com';
+        const stitch = client(cfg);
+        const tOut = new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), PROMPT_INJECTION_TIMEOUT_MS));
+        // Project recall + global recall in parallel; tolerate either failing.
+        const projectHitsP = stitch.recall(prompt, { k: 3 }).catch(() => []);
+        const globalHitsP = (async () => {
+            try {
+                const ws = await stitch.workspaces.list();
+                const g = ws.find((w) => w.name === '_global');
+                if (!g)
+                    return [];
+                const gc = new Stitch({ apiKey: cfg.apiKey, baseUrl, workspace: g.id });
+                return gc.recall(prompt, { k: 2 }).catch(() => []);
+            }
+            catch {
+                return [];
+            }
+        })();
+        const [projectHits, globalHits] = await Promise.race([
+            Promise.all([projectHitsP, globalHitsP]),
+            tOut,
+        ]);
+        const project = projectHits.filter((h) => h.score >= PROMPT_INJECTION_MIN_SCORE).slice(0, 2);
+        const global = globalHits.filter((h) => h.score >= PROMPT_INJECTION_MIN_SCORE).slice(0, 1);
+        if (project.length === 0 && global.length === 0)
+            return;
+        const lines = ['<stitch-recall>'];
+        if (global.length > 0) {
+            lines.push('User-level rules relevant here:');
+            for (const h of global) {
+                const txt = String(h.content || '').replace(/\n+/g, ' ').slice(0, 250);
+                lines.push(`- [${h.kind}] ${txt}`);
+            }
+        }
+        if (project.length > 0) {
+            lines.push('Project memory relevant to this prompt:');
+            for (const h of project) {
+                const txt = String(h.content || '').replace(/\n+/g, ' ').slice(0, 300);
+                const src = h.source_thread_id ? ' _(thread receipt available — call thread_recall to dig)_' : '';
+                lines.push(`- [${h.kind}] (score ${Number(h.score).toFixed(2)}) ${txt}${src}`);
+            }
+        }
+        lines.push('</stitch-recall>');
+        const payload = { hookSpecificOutput: { hookEventName: 'UserPromptSubmit', additionalContext: lines.join('\n') } };
+        process.stdout.write(JSON.stringify(payload));
     }
+    catch { /* silent — never break a prompt */ }
 }
 /**
  * Derive a thread name for the project at `cwd`. Strategy:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@stitchdb/cli",
-  "version": "0.10.0",
+  "version": "0.11.0",
   "description": "Stitch CLI — manage memory + run agents from your terminal",
   "type": "module",
   "bin": {