@stitchdb/cli 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +118 -14
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -629,13 +629,28 @@ async function cmdHook(args) {
629
629
  const stitch = client(cfg);
630
630
  const projectTag = (threadName.split('/')[0] || threadName).toLowerCase();
631
631
  const baseUrl = cfg.baseUrl || 'https://db.stitchdb.com';
632
- const [thread, memHits, workspaces, fileSummaries, aboutMems] = await Promise.all([
632
+ // Adaptive sizing scale what we inject by how much the project
633
+ // actually has. A 5k-memory pool surfacing 8 random memories is
634
+ // noise; better to inject 0 and tell Claude to `recall` on demand.
635
+ const [thread, allProjectMems, workspaces, fileSummaries, aboutMems] = await Promise.all([
633
636
  stitch.thread(threadName).recall({ last: 5 }).catch(() => ({ thread_id: '', recent: [], semantic: [] })),
634
- stitch.recall(projectTag, { k: 8 }).catch(() => []),
637
+ stitch.list({ limit: 1 }).catch(() => []), // probe the pool size cheaply
635
638
  stitch.workspaces.list().catch(() => []),
636
639
  stitch.list({ limit: 12 }).then((all) => all.filter((m) => m.tags.some((t) => t.startsWith('file:')))).catch(() => []),
637
640
  stitch.list({ tag: 'workspace:about', limit: 1 }).catch(() => []),
638
641
  ]);
642
+ // Estimate the workspace's total memory count from a single page
643
+ // header. We just need to know "small / medium / huge" — exact
644
+ // count is overkill. (D1 doesn't expose count in list — approximate
645
+ // by listing 200 once and capping.)
646
+ const sample = await stitch.list({ limit: 200 }).catch(() => []);
647
+ const poolSize = Array.isArray(sample) ? sample.length : 0;
648
+ // Tier the injection: tiny / small / medium / huge.
649
+ const memSliceK = poolSize <= 50 ? 8 : poolSize < 500 ? 8 : 0;
650
+ const turnSliceK = (thread.recent?.length ?? 0) < 5 ? 5 : (thread.recent?.length ?? 0) < 100 ? 5 : 2;
651
+ const memHits = memSliceK > 0
652
+ ? await stitch.recall(projectTag, { k: memSliceK }).catch(() => [])
653
+ : [];
639
654
  // Look up the current workspace using the client's resolved id; fall
640
655
  // back to first if resolveWorkspace failed (shouldn't normally).
641
656
  const currentWsId = await stitch.resolveWorkspace().catch(() => null);
@@ -655,8 +670,15 @@ async function cmdHook(args) {
655
670
  catch { /* ignore */ }
656
671
  const lines = [];
657
672
  lines.push('<stitch-context>');
658
- lines.push(`Project: ${threadName} · Workspace: ${currentWs?.name || '(unknown)'} · Stitch MCP tools: recall, remember, recall_global, remember_global, thread_recall, thread_append, workspace_setup, file_summary, file_summary_save.`);
673
+ lines.push(`Project: ${threadName} · Workspace: ${currentWs?.name || '(unknown)'} · ${poolSize} memories indexed · Stitch MCP tools: recall, remember, recall_global, remember_global, thread_recall, thread_append, workspace_setup, file_summary, file_summary_save.`);
659
674
  lines.push('');
675
+ // Huge-pool mode: don't pre-load any memories. Tell Claude what's
676
+ // available and that it should pull what's relevant on demand.
677
+ if (poolSize >= 200 && memHits.length === 0) {
678
+ lines.push('### Memory pool');
679
+ lines.push(`This project has ${poolSize}+ memories. Don't try to load them all — call \`recall(query)\` whenever the user references prior decisions, code patterns, or preferences. Cheaper and more relevant than reading them upfront.`);
680
+ lines.push('');
681
+ }
660
682
  // Nudge the AI to set a meaningful workspace name once.
661
683
  if (currentWs?.name === 'default') {
662
684
  lines.push('### ⚠ Workspace is still named "default"');
@@ -705,13 +727,17 @@ async function cmdHook(args) {
705
727
  }
706
728
  if (thread.recent && thread.recent.length > 0) {
707
729
  lines.push('### Most recent turns (continue from here)');
708
- for (const t of thread.recent.slice(-5)) {
730
+ for (const t of thread.recent.slice(-turnSliceK)) {
709
731
  const txt = String(t.content || '').replace(/\n+/g, ' ').slice(0, 300);
710
732
  lines.push(`- **${t.role}**: ${txt}`);
711
733
  }
712
734
  lines.push('');
713
735
  }
714
- lines.push('Call `recall` for project memory, `recall_global` for cross-project user prefs, `thread_recall` for older turns, `file_summary` BEFORE reading any non-trivial file. Save user-level habits/preferences with `remember_global`; project facts with `remember`.');
736
+ lines.push('## How to use this memory layer');
737
+ lines.push('- The user mentions prior work, decisions, or "what we discussed" → call `recall(query)` (or `thread_recall(thread, semantic: ...)` for verbatim turns) BEFORE answering. Don\'t guess from the snippets above; they\'re a teaser, not the truth.');
738
+ lines.push('- About to open a non-trivial file → call `file_summary(path)` first. If hash matches the cached summary, you can skip the full read entirely.');
739
+ lines.push('- Learn a durable user-level habit ("I always use Postgres", "commit before reverting") → save with `remember_global` so it surfaces across every project. Project-specific facts → `remember`.');
740
+ lines.push('- A surfaced memory has `_(from thread, N turns)_` next to it → those turns are queryable via `thread_recall`. Reach for them when challenged on a fact.');
715
741
  lines.push('</stitch-context>');
716
742
  process.stdout.write(lines.join('\n'));
717
743
  }
@@ -735,19 +761,97 @@ async function cmdHook(args) {
735
761
  content = content.trim();
736
762
  if (!content)
737
763
  return;
764
+ // For UserPromptSubmit: log the turn AND fetch task-relevant memory in
765
+ // parallel, so the model sees only what's actually relevant to *this*
766
+ // prompt — not 8 random preloaded memories from session start. Task
767
+ // changes self-surface because the recall hits change with the prompt.
768
+ if (eventName === 'UserPromptSubmit') {
769
+ await Promise.all([
770
+ (async () => {
771
+ try {
772
+ const stitch = client(cfg);
773
+ await stitch.thread(threadName).append({ role: role, content });
774
+ }
775
+ catch { /* silent */ }
776
+ })(),
777
+ handleUserPromptInjection(cfg, content),
778
+ ]);
779
+ return;
780
+ }
781
+ // Stop: log the assistant turn, then maybe-distill.
738
782
  try {
739
783
  const stitch = client(cfg);
740
- await stitch.thread(threadName).append({ role, content });
741
- }
742
- catch {
743
- /* silent */
784
+ await stitch.thread(threadName).append({ role: role, content });
744
785
  }
745
- // After Stop, opportunistically kick off a distillation pass in the
746
- // background (fire-and-forget). Won't block the session; debouncing
747
- // (cooldown + min-new-turns) is enforced inside maybeAutoDistill.
748
- if (eventName === 'Stop') {
749
- maybeAutoDistill(threadName).catch(() => { });
786
+ catch { /* silent */ }
787
+ maybeAutoDistill(threadName).catch(() => { });
788
+ }
789
+ /**
790
+ * Per-prompt smart context injection. Runs `recall(prompt)` against the
791
+ * project workspace + the user's `_global` workspace, score-gates the hits,
792
+ * and writes a Claude-Code-flavoured `additionalContext` JSON object to
793
+ * stdout. Time-bounded so a slow proxy doesn't delay the user's prompt.
794
+ *
795
+ * Why per-prompt: a static SessionStart block must guess what'll matter
796
+ * across the whole session. A per-prompt recall sees the actual question
797
+ * and pulls memory tailored to *that* — much higher signal per token.
798
+ * A task switch ("now let's do the dashboard") naturally surfaces the
799
+ * dashboard memories; an unrelated follow-up ("ok run the tests") returns
800
+ * low scores and we inject nothing — no noise.
801
+ */
802
+ const PROMPT_INJECTION_TIMEOUT_MS = 2000;
803
+ const PROMPT_INJECTION_MIN_SCORE = 0.4;
804
+ async function handleUserPromptInjection(cfg, prompt) {
805
+ if (!prompt || prompt.length < 10)
806
+ return;
807
+ try {
808
+ const baseUrl = cfg.baseUrl || 'https://db.stitchdb.com';
809
+ const stitch = client(cfg);
810
+ const tOut = new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), PROMPT_INJECTION_TIMEOUT_MS));
811
+ // Project recall + global recall in parallel; tolerate either failing.
812
+ const projectHitsP = stitch.recall(prompt, { k: 3 }).catch(() => []);
813
+ const globalHitsP = (async () => {
814
+ try {
815
+ const ws = await stitch.workspaces.list();
816
+ const g = ws.find((w) => w.name === '_global');
817
+ if (!g)
818
+ return [];
819
+ const gc = new Stitch({ apiKey: cfg.apiKey, baseUrl, workspace: g.id });
820
+ return gc.recall(prompt, { k: 2 }).catch(() => []);
821
+ }
822
+ catch {
823
+ return [];
824
+ }
825
+ })();
826
+ const [projectHits, globalHits] = await Promise.race([
827
+ Promise.all([projectHitsP, globalHitsP]),
828
+ tOut,
829
+ ]);
830
+ const project = projectHits.filter((h) => h.score >= PROMPT_INJECTION_MIN_SCORE).slice(0, 2);
831
+ const global = globalHits.filter((h) => h.score >= PROMPT_INJECTION_MIN_SCORE).slice(0, 1);
832
+ if (project.length === 0 && global.length === 0)
833
+ return;
834
+ const lines = ['<stitch-recall>'];
835
+ if (global.length > 0) {
836
+ lines.push('User-level rules relevant here:');
837
+ for (const h of global) {
838
+ const txt = String(h.content || '').replace(/\n+/g, ' ').slice(0, 250);
839
+ lines.push(`- [${h.kind}] ${txt}`);
840
+ }
841
+ }
842
+ if (project.length > 0) {
843
+ lines.push('Project memory relevant to this prompt:');
844
+ for (const h of project) {
845
+ const txt = String(h.content || '').replace(/\n+/g, ' ').slice(0, 300);
846
+ const src = h.source_thread_id ? ' _(thread receipt available — call thread_recall to dig)_' : '';
847
+ lines.push(`- [${h.kind}] (score ${Number(h.score).toFixed(2)}) ${txt}${src}`);
848
+ }
849
+ }
850
+ lines.push('</stitch-recall>');
851
+ const payload = { hookSpecificOutput: { hookEventName: 'UserPromptSubmit', additionalContext: lines.join('\n') } };
852
+ process.stdout.write(JSON.stringify(payload));
750
853
  }
854
+ catch { /* silent — never break a prompt */ }
751
855
  }
752
856
  /**
753
857
  * Derive a thread name for the project at `cwd`. Strategy:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stitchdb/cli",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "Stitch CLI — manage memory + run agents from your terminal",
5
5
  "type": "module",
6
6
  "bin": {