npm - @aion0/forge - Versions diffs - 0.10.32 → 0.10.34 - Mend

@aion0/forge 0.10.32 → 0.10.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/RELEASE_NOTES.md +8 -5
package/app/api/activity/summary/route.ts +135 -0
package/components/ActivityPanel.tsx +288 -0
package/components/Dashboard.tsx +48 -29
package/components/PipelineView.tsx +74 -3
package/components/SkillsPanel.tsx +64 -29
package/lib/chat/agent-loop.ts +68 -11
package/lib/chat/build-memory-context.ts +36 -4
package/lib/chat/llm/anthropic.ts +30 -1
package/lib/chat/llm/openai.ts +12 -1
package/lib/chat/llm/types.ts +11 -0
package/lib/chat/session-store.ts +52 -1
package/lib/help-docs/00-overview.md +14 -0
package/lib/help-docs/06-skills.md +11 -1
package/lib/help-docs/12-usage.md +1 -1
package/lib/help-docs/24-watch.md +36 -11
package/lib/watch/watch-runner.ts +76 -1
package/package.json +1 -1

package/components/PipelineView.tsx CHANGED Viewed

@@ -7,6 +7,42 @@ import type { TaskLogEntry } from '@/src/types';
 const PipelineEditor = lazy(() => import('./PipelineEditor'));
 const ConversationEditor = lazy(() => import('./ConversationEditor'));
+// ─── Module-level SWR cache ──────────────────────────────
+// Survives tab switches (but not page refresh). On mount we hydrate
+// state from cache instantly if present (<30s old) and revalidate in
+// the background. Eliminates the "blank then slow load" feeling when
+// users switch away and come back to this tab. Cache is per-component-
+// module — entries are flushed when the page reloads.
+const _metaCache: { ts: number; workflows: any[]; projects: any[]; agents: any[] } | null = null as any;
+const _runsCache = new Map<string, { ts: number; runs: any[] }>();
+const META_TTL_MS = 30_000;
+const RUNS_TTL_MS = 15_000;
+let _metaCacheRef: typeof _metaCache = _metaCache;
+function readMetaCache() {
+  if (_metaCacheRef && Date.now() - _metaCacheRef.ts < META_TTL_MS) return _metaCacheRef;
+  return null;
+}
+function writeMetaCache(workflows: any[], projects: any[], agents: any[]) {
+  _metaCacheRef = { ts: Date.now(), workflows, projects, agents };
+}
+function readRunsCache(workflow: string) {
+  const c = _runsCache.get(workflow);
+  if (c && Date.now() - c.ts < RUNS_TTL_MS) return c;
+  return null;
+}
+function writeRunsCache(workflow: string, runs: any[]) {
+  _runsCache.set(workflow, { ts: Date.now(), runs });
+}
+// Invalidate after user mutations (delete/import/reinstall) so the next
+// fetchMeta/fetchWorkflowRuns doesn't hydrate stale UI from cache while
+// the live refetch is in flight.
+function clearMetaCache() { _metaCacheRef = null; }
+function clearRunsCache(workflow?: string) {
+  if (workflow) _runsCache.delete(workflow);
+  else _runsCache.clear();
+}
 // ─── Live Task Log Hook ──────────────────────────────────
 // Subscribes to SSE stream for a running task, returns live log entries
 function useTaskStream(taskId: string | undefined, isRunning: boolean) {
@@ -698,6 +734,7 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
       });
       const data = await res.json();
       if (!data.ok) { setMarketErr(data.error || 'install failed'); return; }
+      clearMetaCache();
       await Promise.all([fetchMarketplace(), fetchData()]);
     } catch (e) {
       setMarketErr(e instanceof Error ? e.message : String(e));
@@ -743,25 +780,52 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
   // Lightweight metadata only — no pipeline runs. Runs are lazy-loaded
   // per-workflow on click. Reduces initial page to a single workflow
   // table fetch + projects + agents, even with hundreds of runs on disk.
+  //
+  // SWR pattern: if module-level cache is fresh, hydrate UI instantly
+  // and revalidate in background. First visit per page-load still pays
+  // the full fetch cost; tab switches feel instant.
   const fetchMeta = useCallback(async () => {
+    const cached = readMetaCache();
+    if (cached) {
+      setWorkflows(cached.workflows);
+      setProjects(cached.projects.map((p: any) => ({ name: p.name, path: p.path })));
+      setAgents(cached.agents);
+      // Fall through to background refetch — comment out to make TTL strict.
+    }
     const [wRes, projRes, agentRes] = await Promise.all([
       fetch('/api/pipelines?type=workflows'),
       fetch('/api/projects'),
       fetch('/api/agents'),
     ]);
     const [wData, projData, agentData] = await Promise.all([wRes.json(), projRes.json(), agentRes.json()]);
-    if (Array.isArray(wData)) setWorkflows(wData);
-    if (Array.isArray(projData)) setProjects(projData.map((p: any) => ({ name: p.name, path: p.path })));
-    if (Array.isArray(agentData?.agents)) setAgents(agentData.agents);
+    const ws = Array.isArray(wData) ? wData : [];
+    const ps = Array.isArray(projData) ? projData : [];
+    const ags = Array.isArray(agentData?.agents) ? agentData.agents : [];
+    if (ws.length) setWorkflows(ws);
+    if (ps.length) setProjects(ps.map((p: any) => ({ name: p.name, path: p.path })));
+    if (ags.length) setAgents(ags);
+    writeMetaCache(ws, ps, ags);
   }, []);
   // Fetch the run history for ONE workflow. Used on first expand + on
   // the 5s polling tick (only for the currently active workflow).
   const fetchWorkflowRuns = useCallback(async (workflowName: string, opts: { append?: boolean } = {}) => {
+    // SWR for the first expand of a workflow tab (not for append=true
+    // "load more" calls, those want fresh server pagination).
+    if (!opts.append) {
+      const cached = readRunsCache(workflowName);
+      if (cached) {
+        setPipelines((prev) => {
+          const others = prev.filter((p) => p.workflowName !== workflowName);
+          return [...others, ...cached.runs];
+        });
+      }
+    }
     try {
       const res = await fetch(`/api/pipelines?workflow=${encodeURIComponent(workflowName)}&limit=100`);
       const data: Pipeline[] = await res.json();
       if (!Array.isArray(data)) return;
+      if (!opts.append) writeRunsCache(workflowName, data);
       setPipelines(prev => {
         if (!opts.append) {
           // Replace this workflow's runs (covers status changes during polling),
@@ -890,6 +954,8 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
       body: JSON.stringify({ action: 'delete' }),
     });
     if (selectedPipeline?.id === id) setSelectedPipeline(null);
+    const wf = pipelines.find((p) => p.id === id)?.workflowName;
+    clearRunsCache(wf);
     fetchData();
   };
@@ -1015,6 +1081,8 @@ initial_prompt: "{{input.task}}"
                               });
                               const data = await res.json();
                               if (!data.ok) { setMarketErr(data.error || 'reinstall failed'); return; }
+                              clearMetaCache();
+                              clearRunsCache(r.name);
                               await Promise.all([fetchMarketplace(), fetchData()]);
                               alert(`"${r.name}" reinstalled from registry (v${r.version}).`);
                             } catch (e) {
@@ -1052,6 +1120,7 @@ initial_prompt: "{{input.task}}"
                             });
                             const data = await res.json();
                             if (!data.ok) { setMarketErr(data.error || 'import failed'); return; }
+                            clearMetaCache();
                             await Promise.all([fetchMarketplace(), fetchData()]);
                             alert(`Imported as "${data.installed_as}". Open it from the Workflows list.`);
                           } catch (e) {
@@ -1287,6 +1356,8 @@ initial_prompt: "{{input.task}}"
                           const data = await res.json();
                           if (!res.ok || data.error) { alert(`Delete failed: ${data.error || res.status}`); return; }
                           if (activeWorkflow === w.name) setActiveWorkflow(null);
+                          clearMetaCache();
+                          clearRunsCache(w.name);
                           fetchData();
                         } catch { alert('Delete failed'); }
                       }}

package/components/SkillsPanel.tsx CHANGED Viewed

@@ -142,7 +142,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
   const [syncing, setSyncing] = useState(false);
   const [loading, setLoading] = useState(true);
   const [installTarget, setInstallTarget] = useState<{ skill: string; show: boolean }>({ skill: '', show: false });
-  const [typeFilter, setTypeFilter] = useState<'all' | 'skill' | 'command' | 'local' | 'rules' | 'plugins' | 'connectors' | 'crafts' | 'recipes' | 'pipelines'>('all');
+  const [typeFilter, setTypeFilter] = useState<'all' | 'skill' | 'command' | 'local' | 'rules' | 'plugins' | 'connectors' | 'crafts' | 'recipes' | 'pipelines'>('pipelines');
   const [localItems, setLocalItems] = useState<{ name: string; type: string; scope: string; fileCount: number; projectPath?: string }[]>([]);
   // Rules (CLAUDE.md templates)
   const [rulesTemplates, setRulesTemplates] = useState<{ id: string; name: string; description: string; tags: string[]; builtin: boolean; isDefault: boolean; content: string }[]>([]);
@@ -414,30 +414,65 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
       <div className="flex items-center justify-between px-4 py-2 border-b border-[var(--border)] shrink-0">
         <div className="flex items-center gap-2">
           <span className="text-xs font-semibold text-[var(--text-primary)]">Marketplace</span>
-          {/* Grouped category dropdown — replaces the long inline tab bar.
-              Native <optgroup> gives free keyboard nav + a coherent layout
-              regardless of category count. */}
-          <select
-            value={typeFilter}
-            onChange={(e) => setTypeFilter(e.target.value as typeof typeFilter)}
-            className="text-[10px] px-2 py-1 rounded bg-[var(--bg-tertiary)] border border-[var(--border)] text-[var(--text-primary)] focus:outline-none focus:border-[var(--accent)]"
-          >
-            <optgroup label="Catalog">
-              <option value="all">All ({skills.length})</option>
-              <option value="skill">Skills ({skillCount})</option>
-              <option value="command">Commands ({commandCount})</option>
-              <option value="local">Local ({localCount})</option>
-              <option value="rules">Rules</option>
-            </optgroup>
-            <optgroup label="Extensions">
-              <option value="plugins">Plugins</option>
-              <option value="connectors">Connectors</option>
-              <option value="crafts">Crafts</option>
-            </optgroup>
-            <optgroup label="Templates">
-              <option value="pipelines">Pipelines</option>
-            </optgroup>
-          </select>
+          {/* Three group-scoped dropdowns instead of one big <select>.
+              Order is by usage frequency: Extensions (Connectors lives
+              here) → Templates → Catalog. Each dropdown highlights when
+              its current value is active; the inactive ones show the
+              group label as a placeholder. Picking from any sets the
+              single global typeFilter. */}
+          {(() => {
+            const groups: Array<{ label: string; opts: Array<{ value: typeof typeFilter; label: string }> }> = [
+              { label: 'Templates', opts: [
+                { value: 'pipelines',  label: 'Pipelines' },
+              ]},
+              { label: 'Extensions', opts: [
+                { value: 'connectors', label: 'Connectors' },
+                { value: 'plugins',    label: 'Plugins' },
+                { value: 'crafts',     label: 'Crafts' },
+              ]},
+              { label: 'Catalog', opts: [
+                { value: 'all',     label: `All (${skills.length})` },
+                { value: 'skill',   label: `Skills (${skillCount})` },
+                { value: 'command', label: `Commands (${commandCount})` },
+                { value: 'local',   label: `Local (${localCount})` },
+                { value: 'rules',   label: 'Rules' },
+              ]},
+            ];
+            return groups.map((g) => {
+              const isActive = g.opts.some((o) => o.value === typeFilter);
+              if (g.opts.length === 1) {
+                const only = g.opts[0];
+                return (
+                  <button
+                    key={g.label}
+                    onClick={() => setTypeFilter(only.value)}
+                    className={`text-[10px] px-2 py-1 rounded border ${
+                      isActive
+                        ? 'border-[var(--accent)] text-[var(--accent)] bg-[var(--accent)]/10'
+                        : 'border-[var(--border)] text-[var(--text-primary)] bg-[var(--bg-tertiary)] hover:border-[var(--text-secondary)]'
+                    }`}
+                  >{only.label}</button>
+                );
+              }
+              return (
+                <select
+                  key={g.label}
+                  value={isActive ? (typeFilter as string) : ''}
+                  onChange={(e) => { if (e.target.value) setTypeFilter(e.target.value as typeof typeFilter); }}
+                  className={`text-[10px] px-2 py-1 rounded bg-[var(--bg-tertiary)] border focus:outline-none ${
+                    isActive
+                      ? 'border-[var(--accent)] text-[var(--accent)]'
+                      : 'border-[var(--border)] text-[var(--text-primary)] focus:border-[var(--accent)]'
+                  }`}
+                >
+                  <option value="" disabled hidden>{g.label}</option>
+                  {g.opts.map((o) => (
+                    <option key={o.value} value={o.value as string}>{o.label}</option>
+                  ))}
+                </select>
+              );
+            });
+          })()}
         </div>
         <span className="text-[8px] px-1.5 py-0.5 rounded bg-blue-500/15 text-blue-400">Claude Code</span>
         <input
@@ -697,7 +732,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
                             {installTarget.skill === itemName && installTarget.show && (
                               <>
                                 <div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
-                                <div className="absolute right-0 top-7 w-[200px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
+                                <div className="absolute right-0 top-7 w-[200px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
                                   <button
                                     onClick={async () => {
                                       const res = await fetch('/api/skills/local', { method: 'POST', headers: { 'Content-Type': 'application/json' },
@@ -708,7 +743,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
                                       setInstallTarget({ skill: '', show: false });
                                       fetchSkills();
                                     }}
-                                    className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)]"
+                                    className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)] sticky top-0 bg-[var(--bg-secondary)]"
                                   >Global (~/.claude)</button>
                                   <div className="border-t border-[var(--border)] my-0.5" />
                                   {projects.map(p => (
@@ -757,10 +792,10 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
                         {installTarget.skill === skill.name && installTarget.show && (
                           <>
                             <div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
-                            <div className="absolute right-0 top-7 w-[180px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
+                            <div className="absolute right-0 top-7 w-[180px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
                               <button
                                 onClick={() => install(skill.name, 'global')}
-                                className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] ${
+                                className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] sticky top-0 bg-[var(--bg-secondary)] ${
                                   skill.installedGlobal ? 'text-[var(--green)]' : 'text-[var(--text-primary)]'
                                 }`}
                               >

package/lib/chat/agent-loop.ts CHANGED Viewed

@@ -431,7 +431,7 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
       memStore.listBlocks({ pinned: true, scope: 'both' }),
       memStore.listBlocks({ scope: 'both' }),
       memStore.search(args.userText, 8),
-      buildMemoryContext({ store: memStore, currentUserMessage: args.userText }),
+      buildMemoryContext({ store: memStore, currentUserMessage: args.userText, currentSessionId: args.sessionId }),
     ]);
     const pinnedBlocks = bp.status === 'fulfilled' ? bp.value : [];
     const allBlocks = ba.status === 'fulfilled' ? ba.value : [];
@@ -497,17 +497,38 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
     ...memTools.map((m) => m.def),
     watchTool.def,
   ];
-  const allTools: LlmTool[] = [
-    ...builtinDefsAll.map((t) => ({
-      name: t.name,
-      description: t.description,
-      input_schema: t.input_schema,
-    })),
-    ...connectorTools,
-  ];
+  const builtinToolDefs: LlmTool[] = builtinDefsAll.map((t) => ({
+    name: t.name,
+    description: t.description,
+    input_schema: t.input_schema,
+  }));
+  // ── Sticky narrow helper ─────────────────────────────────────────
+  // After a turn that called connector tools, on the NEXT turn we
+  // restrict tool list to ONLY the connectors that were used. This
+  // shrinks tools from 99 → ~10 in a typical mantis or nac flow,
+  // saving ~18K tokens per turn AND letting the model focus its
+  // attention (helps local models avoid hallucination).
+  function pickConnectorNamespacesUsed(blocks: ContentBlock[]): Set<string> {
+    const ns = new Set<string>();
+    for (const b of blocks) {
+      if (b.type === 'tool_use' && b.name.includes('.')) {
+        ns.add(b.name.split('.')[0]!);
+      }
+    }
+    return ns;
+  }
+  const sessionSystemPrompt = session.system_prompt;
+  function buildSystem(tools: LlmTool[]): string {
+    let s = buildSystemPrompt(tools, builtinDefsAll, sessionSystemPrompt);
+    if (narrowDirective) s += narrowDirective;
+    return s;
+  }
+  const baseConnectorTools = connectorTools;  // post-initial-narrow snapshot
+  let allTools: LlmTool[] = [...builtinToolDefs, ...baseConnectorTools];
-  let system = buildSystemPrompt(connectorTools, builtinDefsAll, session.system_prompt);
-  if (narrowDirective) system += narrowDirective;
+  let system = buildSystem(baseConnectorTools);
   if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
   if (memStore.enabled) {
     const searchHint = memStore.kind === 'local'
@@ -538,9 +559,39 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
         return { ok: false, error: 'empty history' };
       }
+      // ── Sticky narrow: shrink tools to only what last turn actually used.
+      // First iteration: keep the user-mention-narrowed list. Iter 2+:
+      // if previous assistant turn called e.g. mantis.get_bug, restrict
+      // to mantis.* only — local models behave much better with focused
+      // tool set, and we save ~18K tokens per turn.
+      if (iter > 1 && assistantBlocksAccum.length > 0) {
+        const usedNs = pickConnectorNamespacesUsed(assistantBlocksAccum);
+        if (usedNs.size > 0) {
+          const narrowedConn = baseConnectorTools.filter((t) =>
+            usedNs.has(t.name.split('.')[0]!));
+          if (narrowedConn.length > 0 && narrowedConn.length < baseConnectorTools.length) {
+            allTools = [...builtinToolDefs, ...narrowedConn];
+            system = buildSystem(narrowedConn);
+            if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
+            console.log(`[chat] sticky narrow → ${[...usedNs].join(',')} (${narrowedConn.length}/${baseConnectorTools.length} connector tools)`);
+          }
+        }
+      }
       assistantBlocksAccum = [];
       let currentTextBuf = '';
+      // ── Token composition log (input side, BEFORE the call) ──
+      // Heuristic char/4. Lets you correlate later with the provider's
+      // real usage.input_tokens — if the gap widens turn-over-turn, the
+      // memory/tools blob is silently growing.
+      const _systemTok = Math.ceil(system.length / 4);
+      const _memCtxTok = Math.ceil(memContext.length / 4);
+      const _toolsTok  = Math.ceil(JSON.stringify(allTools).length / 4);
+      const _historyTok = history.reduce((s, m) => s + estimateTokens(m), 0);
+      const _historyMsgs = history.length;
+      console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} est_in=${_systemTok + _historyTok + _toolsTok} system=${_systemTok} history=${_historyTok}(${_historyMsgs}msgs) memory=${_memCtxTok} tools=${_toolsTok}`);
       const result = await streamLlm(
         {
           provider: provider.type,
@@ -563,6 +614,12 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
         },
       );
+      // ── Real usage from the provider (when reported) ──
+      if (result.usage) {
+        const u = result.usage;
+        console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} REAL in=${u.inputTokens ?? '?'} out=${u.outputTokens ?? '?'} cache_read=${u.cacheReadTokens ?? 0} cache_create=${u.cacheCreationTokens ?? 0} stop=${result.stopReason}`);
+      }
       lastStop = result.stopReason;
       assistantBlocksAccum = result.content;

package/lib/chat/build-memory-context.ts CHANGED Viewed

@@ -31,6 +31,10 @@ export interface BuildMemoryContextOpts {
   /** Prefixes that mark internal-only blocks (cursor / health / etc).
    *  Defaults to lib/memory/keys.INTERNAL_KEY_PREFIXES. */
   excludeKeyPrefixes?: readonly string[];
+  /** Current chat session id. When set, blocks keyed `chat:<otherId>:*`
+   *  are dropped — other sessions' summaries are noise in this chat and
+   *  were the primary source of "old data bleeding into new chat". */
+  currentSessionId?: string;
 }
 export interface BuildMemoryContextResult {
@@ -46,18 +50,22 @@ export async function buildMemoryContext(opts: BuildMemoryContextOpts): Promise<
     topK = 6,
     maxBlocks = 50,
     excludeKeyPrefixes = INTERNAL_KEY_PREFIXES,
+    currentSessionId,
   } = opts;
-  const blocks = filterInternal(
-    await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
-    excludeKeyPrefixes,
+  const blocks = dropForeignChat(
+    filterInternal(
+      await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
+      excludeKeyPrefixes,
+    ),
+    currentSessionId,
   ).slice(0, maxBlocks);
   const q = (currentUserMessage || '').trim();
   let hits: SearchHit[] = [];
   if (q) {
     const rawHits = await safe(() => store.search(q, topK), [] as SearchHit[]);
-    hits = filterInternalHits(rawHits, excludeKeyPrefixes);
+    hits = dropForeignChatHits(filterInternalHits(rawHits, excludeKeyPrefixes), currentSessionId);
   }
   return { text: renderMemoryContext(blocks, hits), blocks, hits };
@@ -81,6 +89,30 @@ function filterInternalHits(hits: SearchHit[], prefixes: readonly string[]): Sea
   });
 }
+/** Strip `chat:<otherSessionId>:*` blocks. Summary blocks contain raw
+ *  past-conversation excerpts; surfacing them in a different chat is
+ *  what made "new empty chat" leak old session content. Facts
+ *  (`fact:*`) and any non-chat-prefixed pinned blocks stay — they're
+ *  the intentional cross-session signal. No-op if no sessionId given. */
+function dropForeignChat(blocks: MemoryBlock[], sessionId?: string): MemoryBlock[] {
+  if (!sessionId) return blocks;
+  return blocks.filter((b) => isOwnChatOrNotChat(b.key, sessionId));
+}
+function dropForeignChatHits(hits: SearchHit[], sessionId?: string): SearchHit[] {
+  if (!sessionId) return hits;
+  return hits.filter((h) => {
+    if (!h.id?.startsWith('block:')) return true; // Graphiti hit, no key to inspect — keep
+    return isOwnChatOrNotChat(h.id.slice('block:'.length), sessionId);
+  });
+}
+function isOwnChatOrNotChat(key: string, sessionId: string): boolean {
+  if (!key.startsWith('chat:')) return true;
+  // key shape: chat:<sessionId>:summary:<ts> → split[1] === sessionId
+  return key.split(':', 2)[1] === sessionId;
+}
 async function safe<T>(fn: () => Promise<T>, fallback: T): Promise<T> {
   try {
     return await fn();

package/lib/chat/llm/anthropic.ts CHANGED Viewed

@@ -132,14 +132,30 @@ export const anthropicAdapter: LlmAdapter = {
     // execute — chat owns dispatch (destructive confirm, browser bridge,
     // memory tools etc all live in agent-loop). Setting stopWhen with
     // stepCountIs(1) prevents the SDK from auto-rolling a second step.
+    // Build tool record. Mark the LAST tool with cache_control so
+    // Anthropic-family backends (or LiteLLM proxies that forward it)
+    // cache the system+tools prefix. Subsequent turns within the 5-min
+    // TTL pay 0.1× input price for the cached portion instead of 1×.
+    // Backends that don't honor cache_control silently ignore it,
+    // costing nothing.
+    const toolNames = req.tools.map((t) => t.name);
+    const lastName = toolNames[toolNames.length - 1];
     const tools: Record<string, any> = {};
     for (const t of req.tools) {
       tools[encodeToolName(t.name)] = {
         description: t.description,
         inputSchema: jsonSchema(t.input_schema),
+        ...(t.name === lastName ? {
+          providerOptions: {
+            anthropic: { cacheControl: { type: 'ephemeral' } },
+          },
+        } : {}),
       };
     }
+    // Single cache breakpoint at end-of-tools — Anthropic caches the
+    // prefix (system + tools) since system comes first in the wire
+    // format. No need to add a separate marker on system.
     const result = streamText({
       model: client(req.model),
       system: req.system,
@@ -169,6 +185,19 @@ export const anthropicAdapter: LlmAdapter = {
     if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
     const finishReason = await result.finishReason;
-    return { stopReason: mapStop(finishReason), content };
+    let usage;
+    try {
+      const u: any = await result.usage;
+      if (u) {
+        usage = {
+          inputTokens: u.inputTokens ?? u.promptTokens,
+          outputTokens: u.outputTokens ?? u.completionTokens,
+          cacheReadTokens: u.cachedInputTokens ?? u.cacheReadInputTokens,
+          cacheCreationTokens: u.cacheCreationInputTokens,
+          totalTokens: u.totalTokens,
+        };
+      }
+    } catch {}
+    return { stopReason: mapStop(finishReason), content, usage };
   },
 };

package/lib/chat/llm/openai.ts CHANGED Viewed

@@ -108,6 +108,17 @@ export const openaiAdapter: LlmAdapter = {
     if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
     const finishReason = await result.finishReason;
-    return { stopReason: mapStop(finishReason), content };
+    let usage;
+    try {
+      const u: any = await result.usage;
+      if (u) {
+        usage = {
+          inputTokens: u.inputTokens ?? u.promptTokens,
+          outputTokens: u.outputTokens ?? u.completionTokens,
+          totalTokens: u.totalTokens,
+        };
+      }
+    } catch {}
+    return { stopReason: mapStop(finishReason), content, usage };
   },
 };

package/lib/chat/llm/types.ts CHANGED Viewed

@@ -21,9 +21,20 @@ export interface LlmCallbacks {
 export type StopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'refusal' | 'error' | 'other';
+export interface LlmTurnUsage {
+  inputTokens?: number;
+  outputTokens?: number;
+  cacheReadTokens?: number;
+  cacheCreationTokens?: number;
+  totalTokens?: number;
+}
 export interface LlmTurnResult {
   stopReason: StopReason;
   content: ContentBlock[];
+  /** Token usage from the provider, if reported. May be partially-filled
+   *  or absent for proxies that don't expose it. */
+  usage?: LlmTurnUsage;
 }
 export interface LlmRequest {

package/lib/chat/session-store.ts CHANGED Viewed

@@ -327,16 +327,67 @@ export function listMessagesCapped(
   // loop (provider will see a single message — still valid).
   const keptGroups: Message[][] = [];
   let used = 0;
+  let evictedCount = 0;
   for (let i = groups.length - 1; i >= 0; i--) {
     const g = groups[i];
     const cost = g.reduce((s, m) => s + estimateTokens(m), 0);
-    if (keptGroups.length > 0 && used + cost > tokenBudget) break;
+    if (keptGroups.length > 0 && used + cost > tokenBudget) {
+      evictedCount = i + 1; // groups [0..i] would have been evicted
+      break;
+    }
     keptGroups.unshift(g);
     used += cost;
   }
+  // ── Pin the SESSION's first user message (task brief) ──────────
+  // Even if eviction would normally drop it, the user's opening prompt
+  // defines the task. Losing it causes the model to lose track of
+  // what was asked — symptom: model writes "summarize all X" and
+  // hallucinates instead of processing the specific list the user
+  // gave. Re-fetch the absolute first user message, prepend if not
+  // already in keptGroups. Cap its tokens so a truly enormous brief
+  // can't break the call — keep first ~2k tokens.
+  if (evictedCount > 0) {
+    const firstUserRow = db().prepare(`
+      SELECT * FROM chat_messages WHERE session_id = ? AND role = 'user'
+      ORDER BY ts ASC LIMIT 1
+    `).get(session_id) as MessageRow | undefined;
+    if (firstUserRow) {
+      const firstUserMsg = rowToMessage(firstUserRow);
+      const alreadyKept = keptGroups.some((g) => g.some((m) => m.id === firstUserMsg.id));
+      if (!alreadyKept) {
+        // Cap to ~2000 tokens of brief (≈8KB) — tasks longer than that
+        // should be split anyway; preserving the head is enough to
+        // anchor the model to the original ask.
+        const FIRST_BRIEF_TOKEN_CAP = 2000;
+        let pinned = firstUserMsg;
+        if (estimateTokens(firstUserMsg) > FIRST_BRIEF_TOKEN_CAP) {
+          pinned = clipMessageToTokens(firstUserMsg, FIRST_BRIEF_TOKEN_CAP);
+        }
+        keptGroups.unshift([pinned]);
+        console.log(`[session-cap] pinned first user message (id=${firstUserMsg.id}) — ${evictedCount} groups evicted, ${used} tokens used / ${tokenBudget} budget`);
+      }
+    } else {
+      console.log(`[session-cap] ${evictedCount} groups evicted, no first user message found to pin`);
+    }
+  }
   return keptGroups.flat();
 }
+/** Clip a message's text content to a soft token cap. Tool blocks are
+ *  preserved verbatim (they're usually small structural data); only
+ *  long text blocks get a head-only truncation with a marker. */
+function clipMessageToTokens(m: Message, tokenCap: number): Message {
+  const charCap = tokenCap * 4; // matches estimateTokens char/4 heuristic
+  const blocks = m.blocks.map((b) => {
+    if (b.type === 'text' && b.text.length > charCap) {
+      return { ...b, text: b.text.slice(0, charCap) + '\n\n[…task brief truncated to keep in-context]' };
+    }
+    return b;
+  });
+  return { ...m, blocks };
+}
 export function deleteMessage(id: string): boolean {
   ensureSchema();
   const r = db().prepare(`DELETE FROM chat_messages WHERE id = ?`).run(id);

package/lib/help-docs/00-overview.md CHANGED Viewed

@@ -57,3 +57,17 @@ forge server start --port 4000  # custom port
 forge server start --dir ~/.forge-test  # custom data dir
 forge --reset-password          # reset admin password
 ```
+## Dashboard top bar
+The top toolbar is split between **at-a-glance signals** (left to right) and a **user menu** (right edge):
+- **? Help** — opens the in-app Help AI.
+- **Browser ▾** — open an embedded browser (float / right / left dock or external tab).
+- **Tunnel** — start/stop the Cloudflare tunnel + online-count badge.
+- **Alerts** — notifications (task done, pipeline failed, tunnel events).
+Next to the **Automation** tab in the left-side nav sits a small **Activity** sub-pill — running pipelines + upcoming schedules + recent runs (`▶<running>` `⏰<upcoming>`). Click for a 3-section dropdown with a "view" link that jumps to the run. It lives there because its content is the live read-side of Automation.
+- **User menu (▾)** — `⚙ Settings` + `💬 Chat (web) ↗` at the top (Chat opens in a new tab so the dashboard isn't replaced); then a divider, then the periodic-check screens `📊 Monitor` (background watches, processes, queues), `🔐 Login Status` (connector creds), `💰 Usage` (token/cost analytics), `📜 Logs`, `📱 Mobile View ↗`; then `⏻ Logout`.
+Periodic-check screens (Monitor / Login Status / Usage) live inside the user menu so the top bar only shows things worth glancing at.