@aion0/forge 0.10.32 → 0.10.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,42 @@ import type { TaskLogEntry } from '@/src/types';
7
7
  const PipelineEditor = lazy(() => import('./PipelineEditor'));
8
8
  const ConversationEditor = lazy(() => import('./ConversationEditor'));
9
9
 
10
+ // ─── Module-level SWR cache ──────────────────────────────
11
+ // Survives tab switches (but not page refresh). On mount we hydrate
12
+ // state from cache instantly if present (<30s old) and revalidate in
13
+ // the background. Eliminates the "blank then slow load" feeling when
14
+ // users switch away and come back to this tab. Cache is per-component-
15
+ // module — entries are flushed when the page reloads.
16
+ const _metaCache: { ts: number; workflows: any[]; projects: any[]; agents: any[] } | null = null as any;
17
+ const _runsCache = new Map<string, { ts: number; runs: any[] }>();
18
+ const META_TTL_MS = 30_000;
19
+ const RUNS_TTL_MS = 15_000;
20
+ let _metaCacheRef: typeof _metaCache = _metaCache;
21
+
22
+ function readMetaCache() {
23
+ if (_metaCacheRef && Date.now() - _metaCacheRef.ts < META_TTL_MS) return _metaCacheRef;
24
+ return null;
25
+ }
26
+ function writeMetaCache(workflows: any[], projects: any[], agents: any[]) {
27
+ _metaCacheRef = { ts: Date.now(), workflows, projects, agents };
28
+ }
29
+ function readRunsCache(workflow: string) {
30
+ const c = _runsCache.get(workflow);
31
+ if (c && Date.now() - c.ts < RUNS_TTL_MS) return c;
32
+ return null;
33
+ }
34
+ function writeRunsCache(workflow: string, runs: any[]) {
35
+ _runsCache.set(workflow, { ts: Date.now(), runs });
36
+ }
37
+ // Invalidate after user mutations (delete/import/reinstall) so the next
38
+ // fetchMeta/fetchWorkflowRuns doesn't hydrate stale UI from cache while
39
+ // the live refetch is in flight.
40
+ function clearMetaCache() { _metaCacheRef = null; }
41
+ function clearRunsCache(workflow?: string) {
42
+ if (workflow) _runsCache.delete(workflow);
43
+ else _runsCache.clear();
44
+ }
45
+
10
46
  // ─── Live Task Log Hook ──────────────────────────────────
11
47
  // Subscribes to SSE stream for a running task, returns live log entries
12
48
  function useTaskStream(taskId: string | undefined, isRunning: boolean) {
@@ -698,6 +734,7 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
698
734
  });
699
735
  const data = await res.json();
700
736
  if (!data.ok) { setMarketErr(data.error || 'install failed'); return; }
737
+ clearMetaCache();
701
738
  await Promise.all([fetchMarketplace(), fetchData()]);
702
739
  } catch (e) {
703
740
  setMarketErr(e instanceof Error ? e.message : String(e));
@@ -743,25 +780,52 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
743
780
  // Lightweight metadata only — no pipeline runs. Runs are lazy-loaded
744
781
  // per-workflow on click. Reduces initial page to a single workflow
745
782
  // table fetch + projects + agents, even with hundreds of runs on disk.
783
+ //
784
+ // SWR pattern: if module-level cache is fresh, hydrate UI instantly
785
+ // and revalidate in background. First visit per page-load still pays
786
+ // the full fetch cost; tab switches feel instant.
746
787
  const fetchMeta = useCallback(async () => {
788
+ const cached = readMetaCache();
789
+ if (cached) {
790
+ setWorkflows(cached.workflows);
791
+ setProjects(cached.projects.map((p: any) => ({ name: p.name, path: p.path })));
792
+ setAgents(cached.agents);
793
+ // Fall through to background refetch — comment out to make TTL strict.
794
+ }
747
795
  const [wRes, projRes, agentRes] = await Promise.all([
748
796
  fetch('/api/pipelines?type=workflows'),
749
797
  fetch('/api/projects'),
750
798
  fetch('/api/agents'),
751
799
  ]);
752
800
  const [wData, projData, agentData] = await Promise.all([wRes.json(), projRes.json(), agentRes.json()]);
753
- if (Array.isArray(wData)) setWorkflows(wData);
754
- if (Array.isArray(projData)) setProjects(projData.map((p: any) => ({ name: p.name, path: p.path })));
755
- if (Array.isArray(agentData?.agents)) setAgents(agentData.agents);
801
+ const ws = Array.isArray(wData) ? wData : [];
802
+ const ps = Array.isArray(projData) ? projData : [];
803
+ const ags = Array.isArray(agentData?.agents) ? agentData.agents : [];
804
+ if (ws.length) setWorkflows(ws);
805
+ if (ps.length) setProjects(ps.map((p: any) => ({ name: p.name, path: p.path })));
806
+ if (ags.length) setAgents(ags);
807
+ writeMetaCache(ws, ps, ags);
756
808
  }, []);
757
809
 
758
810
  // Fetch the run history for ONE workflow. Used on first expand + on
759
811
  // the 5s polling tick (only for the currently active workflow).
760
812
  const fetchWorkflowRuns = useCallback(async (workflowName: string, opts: { append?: boolean } = {}) => {
813
+ // SWR for the first expand of a workflow tab (not for append=true
814
+ // "load more" calls, those want fresh server pagination).
815
+ if (!opts.append) {
816
+ const cached = readRunsCache(workflowName);
817
+ if (cached) {
818
+ setPipelines((prev) => {
819
+ const others = prev.filter((p) => p.workflowName !== workflowName);
820
+ return [...others, ...cached.runs];
821
+ });
822
+ }
823
+ }
761
824
  try {
762
825
  const res = await fetch(`/api/pipelines?workflow=${encodeURIComponent(workflowName)}&limit=100`);
763
826
  const data: Pipeline[] = await res.json();
764
827
  if (!Array.isArray(data)) return;
828
+ if (!opts.append) writeRunsCache(workflowName, data);
765
829
  setPipelines(prev => {
766
830
  if (!opts.append) {
767
831
  // Replace this workflow's runs (covers status changes during polling),
@@ -890,6 +954,8 @@ export default function PipelineView({ onViewTask, focusPipelineId, onFocusHandl
890
954
  body: JSON.stringify({ action: 'delete' }),
891
955
  });
892
956
  if (selectedPipeline?.id === id) setSelectedPipeline(null);
957
+ const wf = pipelines.find((p) => p.id === id)?.workflowName;
958
+ clearRunsCache(wf);
893
959
  fetchData();
894
960
  };
895
961
 
@@ -1015,6 +1081,8 @@ initial_prompt: "{{input.task}}"
1015
1081
  });
1016
1082
  const data = await res.json();
1017
1083
  if (!data.ok) { setMarketErr(data.error || 'reinstall failed'); return; }
1084
+ clearMetaCache();
1085
+ clearRunsCache(r.name);
1018
1086
  await Promise.all([fetchMarketplace(), fetchData()]);
1019
1087
  alert(`"${r.name}" reinstalled from registry (v${r.version}).`);
1020
1088
  } catch (e) {
@@ -1052,6 +1120,7 @@ initial_prompt: "{{input.task}}"
1052
1120
  });
1053
1121
  const data = await res.json();
1054
1122
  if (!data.ok) { setMarketErr(data.error || 'import failed'); return; }
1123
+ clearMetaCache();
1055
1124
  await Promise.all([fetchMarketplace(), fetchData()]);
1056
1125
  alert(`Imported as "${data.installed_as}". Open it from the Workflows list.`);
1057
1126
  } catch (e) {
@@ -1287,6 +1356,8 @@ initial_prompt: "{{input.task}}"
1287
1356
  const data = await res.json();
1288
1357
  if (!res.ok || data.error) { alert(`Delete failed: ${data.error || res.status}`); return; }
1289
1358
  if (activeWorkflow === w.name) setActiveWorkflow(null);
1359
+ clearMetaCache();
1360
+ clearRunsCache(w.name);
1290
1361
  fetchData();
1291
1362
  } catch { alert('Delete failed'); }
1292
1363
  }}
@@ -142,7 +142,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
142
142
  const [syncing, setSyncing] = useState(false);
143
143
  const [loading, setLoading] = useState(true);
144
144
  const [installTarget, setInstallTarget] = useState<{ skill: string; show: boolean }>({ skill: '', show: false });
145
- const [typeFilter, setTypeFilter] = useState<'all' | 'skill' | 'command' | 'local' | 'rules' | 'plugins' | 'connectors' | 'crafts' | 'recipes' | 'pipelines'>('all');
145
+ const [typeFilter, setTypeFilter] = useState<'all' | 'skill' | 'command' | 'local' | 'rules' | 'plugins' | 'connectors' | 'crafts' | 'recipes' | 'pipelines'>('pipelines');
146
146
  const [localItems, setLocalItems] = useState<{ name: string; type: string; scope: string; fileCount: number; projectPath?: string }[]>([]);
147
147
  // Rules (CLAUDE.md templates)
148
148
  const [rulesTemplates, setRulesTemplates] = useState<{ id: string; name: string; description: string; tags: string[]; builtin: boolean; isDefault: boolean; content: string }[]>([]);
@@ -414,30 +414,65 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
414
414
  <div className="flex items-center justify-between px-4 py-2 border-b border-[var(--border)] shrink-0">
415
415
  <div className="flex items-center gap-2">
416
416
  <span className="text-xs font-semibold text-[var(--text-primary)]">Marketplace</span>
417
- {/* Grouped category dropdown replaces the long inline tab bar.
418
- Native <optgroup> gives free keyboard nav + a coherent layout
419
- regardless of category count. */}
420
- <select
421
- value={typeFilter}
422
- onChange={(e) => setTypeFilter(e.target.value as typeof typeFilter)}
423
- className="text-[10px] px-2 py-1 rounded bg-[var(--bg-tertiary)] border border-[var(--border)] text-[var(--text-primary)] focus:outline-none focus:border-[var(--accent)]"
424
- >
425
- <optgroup label="Catalog">
426
- <option value="all">All ({skills.length})</option>
427
- <option value="skill">Skills ({skillCount})</option>
428
- <option value="command">Commands ({commandCount})</option>
429
- <option value="local">Local ({localCount})</option>
430
- <option value="rules">Rules</option>
431
- </optgroup>
432
- <optgroup label="Extensions">
433
- <option value="plugins">Plugins</option>
434
- <option value="connectors">Connectors</option>
435
- <option value="crafts">Crafts</option>
436
- </optgroup>
437
- <optgroup label="Templates">
438
- <option value="pipelines">Pipelines</option>
439
- </optgroup>
440
- </select>
417
+ {/* Three group-scoped dropdowns instead of one big <select>.
418
+ Order is by usage frequency: Extensions (Connectors lives
419
+ here) Templates → Catalog. Each dropdown highlights when
420
+ its current value is active; the inactive ones show the
421
+ group label as a placeholder. Picking from any sets the
422
+ single global typeFilter. */}
423
+ {(() => {
424
+ const groups: Array<{ label: string; opts: Array<{ value: typeof typeFilter; label: string }> }> = [
425
+ { label: 'Templates', opts: [
426
+ { value: 'pipelines', label: 'Pipelines' },
427
+ ]},
428
+ { label: 'Extensions', opts: [
429
+ { value: 'connectors', label: 'Connectors' },
430
+ { value: 'plugins', label: 'Plugins' },
431
+ { value: 'crafts', label: 'Crafts' },
432
+ ]},
433
+ { label: 'Catalog', opts: [
434
+ { value: 'all', label: `All (${skills.length})` },
435
+ { value: 'skill', label: `Skills (${skillCount})` },
436
+ { value: 'command', label: `Commands (${commandCount})` },
437
+ { value: 'local', label: `Local (${localCount})` },
438
+ { value: 'rules', label: 'Rules' },
439
+ ]},
440
+ ];
441
+ return groups.map((g) => {
442
+ const isActive = g.opts.some((o) => o.value === typeFilter);
443
+ if (g.opts.length === 1) {
444
+ const only = g.opts[0];
445
+ return (
446
+ <button
447
+ key={g.label}
448
+ onClick={() => setTypeFilter(only.value)}
449
+ className={`text-[10px] px-2 py-1 rounded border ${
450
+ isActive
451
+ ? 'border-[var(--accent)] text-[var(--accent)] bg-[var(--accent)]/10'
452
+ : 'border-[var(--border)] text-[var(--text-primary)] bg-[var(--bg-tertiary)] hover:border-[var(--text-secondary)]'
453
+ }`}
454
+ >{only.label}</button>
455
+ );
456
+ }
457
+ return (
458
+ <select
459
+ key={g.label}
460
+ value={isActive ? (typeFilter as string) : ''}
461
+ onChange={(e) => { if (e.target.value) setTypeFilter(e.target.value as typeof typeFilter); }}
462
+ className={`text-[10px] px-2 py-1 rounded bg-[var(--bg-tertiary)] border focus:outline-none ${
463
+ isActive
464
+ ? 'border-[var(--accent)] text-[var(--accent)]'
465
+ : 'border-[var(--border)] text-[var(--text-primary)] focus:border-[var(--accent)]'
466
+ }`}
467
+ >
468
+ <option value="" disabled hidden>{g.label}</option>
469
+ {g.opts.map((o) => (
470
+ <option key={o.value} value={o.value as string}>{o.label}</option>
471
+ ))}
472
+ </select>
473
+ );
474
+ });
475
+ })()}
441
476
  </div>
442
477
  <span className="text-[8px] px-1.5 py-0.5 rounded bg-blue-500/15 text-blue-400">Claude Code</span>
443
478
  <input
@@ -697,7 +732,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
697
732
  {installTarget.skill === itemName && installTarget.show && (
698
733
  <>
699
734
  <div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
700
- <div className="absolute right-0 top-7 w-[200px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
735
+ <div className="absolute right-0 top-7 w-[200px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
701
736
  <button
702
737
  onClick={async () => {
703
738
  const res = await fetch('/api/skills/local', { method: 'POST', headers: { 'Content-Type': 'application/json' },
@@ -708,7 +743,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
708
743
  setInstallTarget({ skill: '', show: false });
709
744
  fetchSkills();
710
745
  }}
711
- className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)]"
746
+ className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)] sticky top-0 bg-[var(--bg-secondary)]"
712
747
  >Global (~/.claude)</button>
713
748
  <div className="border-t border-[var(--border)] my-0.5" />
714
749
  {projects.map(p => (
@@ -757,10 +792,10 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
757
792
  {installTarget.skill === skill.name && installTarget.show && (
758
793
  <>
759
794
  <div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
760
- <div className="absolute right-0 top-7 w-[180px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
795
+ <div className="absolute right-0 top-7 w-[180px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
761
796
  <button
762
797
  onClick={() => install(skill.name, 'global')}
763
- className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] ${
798
+ className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] sticky top-0 bg-[var(--bg-secondary)] ${
764
799
  skill.installedGlobal ? 'text-[var(--green)]' : 'text-[var(--text-primary)]'
765
800
  }`}
766
801
  >
@@ -431,7 +431,7 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
431
431
  memStore.listBlocks({ pinned: true, scope: 'both' }),
432
432
  memStore.listBlocks({ scope: 'both' }),
433
433
  memStore.search(args.userText, 8),
434
- buildMemoryContext({ store: memStore, currentUserMessage: args.userText }),
434
+ buildMemoryContext({ store: memStore, currentUserMessage: args.userText, currentSessionId: args.sessionId }),
435
435
  ]);
436
436
  const pinnedBlocks = bp.status === 'fulfilled' ? bp.value : [];
437
437
  const allBlocks = ba.status === 'fulfilled' ? ba.value : [];
@@ -497,17 +497,38 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
497
497
  ...memTools.map((m) => m.def),
498
498
  watchTool.def,
499
499
  ];
500
- const allTools: LlmTool[] = [
501
- ...builtinDefsAll.map((t) => ({
502
- name: t.name,
503
- description: t.description,
504
- input_schema: t.input_schema,
505
- })),
506
- ...connectorTools,
507
- ];
500
+ const builtinToolDefs: LlmTool[] = builtinDefsAll.map((t) => ({
501
+ name: t.name,
502
+ description: t.description,
503
+ input_schema: t.input_schema,
504
+ }));
505
+
506
+ // ── Sticky narrow helper ─────────────────────────────────────────
507
+ // After a turn that called connector tools, on the NEXT turn we
508
+ // restrict tool list to ONLY the connectors that were used. This
509
+ // shrinks tools from 99 → ~10 in a typical mantis or nac flow,
510
+ // saving ~18K tokens per turn AND letting the model focus its
511
+ // attention (helps local models avoid hallucination).
512
+ function pickConnectorNamespacesUsed(blocks: ContentBlock[]): Set<string> {
513
+ const ns = new Set<string>();
514
+ for (const b of blocks) {
515
+ if (b.type === 'tool_use' && b.name.includes('.')) {
516
+ ns.add(b.name.split('.')[0]!);
517
+ }
518
+ }
519
+ return ns;
520
+ }
521
+ const sessionSystemPrompt = session.system_prompt;
522
+ function buildSystem(tools: LlmTool[]): string {
523
+ let s = buildSystemPrompt(tools, builtinDefsAll, sessionSystemPrompt);
524
+ if (narrowDirective) s += narrowDirective;
525
+ return s;
526
+ }
527
+
528
+ const baseConnectorTools = connectorTools; // post-initial-narrow snapshot
529
+ let allTools: LlmTool[] = [...builtinToolDefs, ...baseConnectorTools];
508
530
 
509
- let system = buildSystemPrompt(connectorTools, builtinDefsAll, session.system_prompt);
510
- if (narrowDirective) system += narrowDirective;
531
+ let system = buildSystem(baseConnectorTools);
511
532
  if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
512
533
  if (memStore.enabled) {
513
534
  const searchHint = memStore.kind === 'local'
@@ -538,9 +559,39 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
538
559
  return { ok: false, error: 'empty history' };
539
560
  }
540
561
 
562
+ // ── Sticky narrow: shrink tools to only what last turn actually used.
563
+ // First iteration: keep the user-mention-narrowed list. Iter 2+:
564
+ // if previous assistant turn called e.g. mantis.get_bug, restrict
565
+ // to mantis.* only — local models behave much better with focused
566
+ // tool set, and we save ~18K tokens per turn.
567
+ if (iter > 1 && assistantBlocksAccum.length > 0) {
568
+ const usedNs = pickConnectorNamespacesUsed(assistantBlocksAccum);
569
+ if (usedNs.size > 0) {
570
+ const narrowedConn = baseConnectorTools.filter((t) =>
571
+ usedNs.has(t.name.split('.')[0]!));
572
+ if (narrowedConn.length > 0 && narrowedConn.length < baseConnectorTools.length) {
573
+ allTools = [...builtinToolDefs, ...narrowedConn];
574
+ system = buildSystem(narrowedConn);
575
+ if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
576
+ console.log(`[chat] sticky narrow → ${[...usedNs].join(',')} (${narrowedConn.length}/${baseConnectorTools.length} connector tools)`);
577
+ }
578
+ }
579
+ }
580
+
541
581
  assistantBlocksAccum = [];
542
582
  let currentTextBuf = '';
543
583
 
584
+ // ── Token composition log (input side, BEFORE the call) ──
585
+ // Heuristic char/4. Lets you correlate later with the provider's
586
+ // real usage.input_tokens — if the gap widens turn-over-turn, the
587
+ // memory/tools blob is silently growing.
588
+ const _systemTok = Math.ceil(system.length / 4);
589
+ const _memCtxTok = Math.ceil(memContext.length / 4);
590
+ const _toolsTok = Math.ceil(JSON.stringify(allTools).length / 4);
591
+ const _historyTok = history.reduce((s, m) => s + estimateTokens(m), 0);
592
+ const _historyMsgs = history.length;
593
+ console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} est_in=${_systemTok + _historyTok + _toolsTok} system=${_systemTok} history=${_historyTok}(${_historyMsgs}msgs) memory=${_memCtxTok} tools=${_toolsTok}`);
594
+
544
595
  const result = await streamLlm(
545
596
  {
546
597
  provider: provider.type,
@@ -563,6 +614,12 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
563
614
  },
564
615
  );
565
616
 
617
+ // ── Real usage from the provider (when reported) ──
618
+ if (result.usage) {
619
+ const u = result.usage;
620
+ console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} REAL in=${u.inputTokens ?? '?'} out=${u.outputTokens ?? '?'} cache_read=${u.cacheReadTokens ?? 0} cache_create=${u.cacheCreationTokens ?? 0} stop=${result.stopReason}`);
621
+ }
622
+
566
623
  lastStop = result.stopReason;
567
624
  assistantBlocksAccum = result.content;
568
625
 
@@ -31,6 +31,10 @@ export interface BuildMemoryContextOpts {
31
31
  /** Prefixes that mark internal-only blocks (cursor / health / etc).
32
32
  * Defaults to lib/memory/keys.INTERNAL_KEY_PREFIXES. */
33
33
  excludeKeyPrefixes?: readonly string[];
34
+ /** Current chat session id. When set, blocks keyed `chat:<otherId>:*`
35
+ * are dropped — other sessions' summaries are noise in this chat and
36
+ * were the primary source of "old data bleeding into new chat". */
37
+ currentSessionId?: string;
34
38
  }
35
39
 
36
40
  export interface BuildMemoryContextResult {
@@ -46,18 +50,22 @@ export async function buildMemoryContext(opts: BuildMemoryContextOpts): Promise<
46
50
  topK = 6,
47
51
  maxBlocks = 50,
48
52
  excludeKeyPrefixes = INTERNAL_KEY_PREFIXES,
53
+ currentSessionId,
49
54
  } = opts;
50
55
 
51
- const blocks = filterInternal(
52
- await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
53
- excludeKeyPrefixes,
56
+ const blocks = dropForeignChat(
57
+ filterInternal(
58
+ await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
59
+ excludeKeyPrefixes,
60
+ ),
61
+ currentSessionId,
54
62
  ).slice(0, maxBlocks);
55
63
 
56
64
  const q = (currentUserMessage || '').trim();
57
65
  let hits: SearchHit[] = [];
58
66
  if (q) {
59
67
  const rawHits = await safe(() => store.search(q, topK), [] as SearchHit[]);
60
- hits = filterInternalHits(rawHits, excludeKeyPrefixes);
68
+ hits = dropForeignChatHits(filterInternalHits(rawHits, excludeKeyPrefixes), currentSessionId);
61
69
  }
62
70
 
63
71
  return { text: renderMemoryContext(blocks, hits), blocks, hits };
@@ -81,6 +89,30 @@ function filterInternalHits(hits: SearchHit[], prefixes: readonly string[]): Sea
81
89
  });
82
90
  }
83
91
 
92
+ /** Strip `chat:<otherSessionId>:*` blocks. Summary blocks contain raw
93
+ * past-conversation excerpts; surfacing them in a different chat is
94
+ * what made "new empty chat" leak old session content. Facts
95
+ * (`fact:*`) and any non-chat-prefixed pinned blocks stay — they're
96
+ * the intentional cross-session signal. No-op if no sessionId given. */
97
+ function dropForeignChat(blocks: MemoryBlock[], sessionId?: string): MemoryBlock[] {
98
+ if (!sessionId) return blocks;
99
+ return blocks.filter((b) => isOwnChatOrNotChat(b.key, sessionId));
100
+ }
101
+
102
+ function dropForeignChatHits(hits: SearchHit[], sessionId?: string): SearchHit[] {
103
+ if (!sessionId) return hits;
104
+ return hits.filter((h) => {
105
+ if (!h.id?.startsWith('block:')) return true; // Graphiti hit, no key to inspect — keep
106
+ return isOwnChatOrNotChat(h.id.slice('block:'.length), sessionId);
107
+ });
108
+ }
109
+
110
+ function isOwnChatOrNotChat(key: string, sessionId: string): boolean {
111
+ if (!key.startsWith('chat:')) return true;
112
+ // key shape: chat:<sessionId>:summary:<ts> → split[1] === sessionId
113
+ return key.split(':', 2)[1] === sessionId;
114
+ }
115
+
84
116
  async function safe<T>(fn: () => Promise<T>, fallback: T): Promise<T> {
85
117
  try {
86
118
  return await fn();
@@ -132,14 +132,30 @@ export const anthropicAdapter: LlmAdapter = {
132
132
  // execute — chat owns dispatch (destructive confirm, browser bridge,
133
133
  // memory tools etc all live in agent-loop). Setting stopWhen with
134
134
  // stepCountIs(1) prevents the SDK from auto-rolling a second step.
135
+ // Build tool record. Mark the LAST tool with cache_control so
136
+ // Anthropic-family backends (or LiteLLM proxies that forward it)
137
+ // cache the system+tools prefix. Subsequent turns within the 5-min
138
+ // TTL pay 0.1× input price for the cached portion instead of 1×.
139
+ // Backends that don't honor cache_control silently ignore it,
140
+ // costing nothing.
141
+ const toolNames = req.tools.map((t) => t.name);
142
+ const lastName = toolNames[toolNames.length - 1];
135
143
  const tools: Record<string, any> = {};
136
144
  for (const t of req.tools) {
137
145
  tools[encodeToolName(t.name)] = {
138
146
  description: t.description,
139
147
  inputSchema: jsonSchema(t.input_schema),
148
+ ...(t.name === lastName ? {
149
+ providerOptions: {
150
+ anthropic: { cacheControl: { type: 'ephemeral' } },
151
+ },
152
+ } : {}),
140
153
  };
141
154
  }
142
155
 
156
+ // Single cache breakpoint at end-of-tools — Anthropic caches the
157
+ // prefix (system + tools) since system comes first in the wire
158
+ // format. No need to add a separate marker on system.
143
159
  const result = streamText({
144
160
  model: client(req.model),
145
161
  system: req.system,
@@ -169,6 +185,19 @@ export const anthropicAdapter: LlmAdapter = {
169
185
  if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
170
186
 
171
187
  const finishReason = await result.finishReason;
172
- return { stopReason: mapStop(finishReason), content };
188
+ let usage;
189
+ try {
190
+ const u: any = await result.usage;
191
+ if (u) {
192
+ usage = {
193
+ inputTokens: u.inputTokens ?? u.promptTokens,
194
+ outputTokens: u.outputTokens ?? u.completionTokens,
195
+ cacheReadTokens: u.cachedInputTokens ?? u.cacheReadInputTokens,
196
+ cacheCreationTokens: u.cacheCreationInputTokens,
197
+ totalTokens: u.totalTokens,
198
+ };
199
+ }
200
+ } catch {}
201
+ return { stopReason: mapStop(finishReason), content, usage };
173
202
  },
174
203
  };
@@ -108,6 +108,17 @@ export const openaiAdapter: LlmAdapter = {
108
108
  if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
109
109
 
110
110
  const finishReason = await result.finishReason;
111
- return { stopReason: mapStop(finishReason), content };
111
+ let usage;
112
+ try {
113
+ const u: any = await result.usage;
114
+ if (u) {
115
+ usage = {
116
+ inputTokens: u.inputTokens ?? u.promptTokens,
117
+ outputTokens: u.outputTokens ?? u.completionTokens,
118
+ totalTokens: u.totalTokens,
119
+ };
120
+ }
121
+ } catch {}
122
+ return { stopReason: mapStop(finishReason), content, usage };
112
123
  },
113
124
  };
@@ -21,9 +21,20 @@ export interface LlmCallbacks {
21
21
 
22
22
  export type StopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'refusal' | 'error' | 'other';
23
23
 
24
+ export interface LlmTurnUsage {
25
+ inputTokens?: number;
26
+ outputTokens?: number;
27
+ cacheReadTokens?: number;
28
+ cacheCreationTokens?: number;
29
+ totalTokens?: number;
30
+ }
31
+
24
32
  export interface LlmTurnResult {
25
33
  stopReason: StopReason;
26
34
  content: ContentBlock[];
35
+ /** Token usage from the provider, if reported. May be partially-filled
36
+ * or absent for proxies that don't expose it. */
37
+ usage?: LlmTurnUsage;
27
38
  }
28
39
 
29
40
  export interface LlmRequest {
@@ -327,16 +327,67 @@ export function listMessagesCapped(
327
327
  // loop (provider will see a single message — still valid).
328
328
  const keptGroups: Message[][] = [];
329
329
  let used = 0;
330
+ let evictedCount = 0;
330
331
  for (let i = groups.length - 1; i >= 0; i--) {
331
332
  const g = groups[i];
332
333
  const cost = g.reduce((s, m) => s + estimateTokens(m), 0);
333
- if (keptGroups.length > 0 && used + cost > tokenBudget) break;
334
+ if (keptGroups.length > 0 && used + cost > tokenBudget) {
335
+ evictedCount = i + 1; // groups [0..i] would have been evicted
336
+ break;
337
+ }
334
338
  keptGroups.unshift(g);
335
339
  used += cost;
336
340
  }
341
+
342
+ // ── Pin the SESSION's first user message (task brief) ──────────
343
+ // Even if eviction would normally drop it, the user's opening prompt
344
+ // defines the task. Losing it causes the model to lose track of
345
+ // what was asked — symptom: model writes "summarize all X" and
346
+ // hallucinates instead of processing the specific list the user
347
+ // gave. Re-fetch the absolute first user message, prepend if not
348
+ // already in keptGroups. Cap its tokens so a truly enormous brief
349
+ // can't break the call — keep first ~2k tokens.
350
+ if (evictedCount > 0) {
351
+ const firstUserRow = db().prepare(`
352
+ SELECT * FROM chat_messages WHERE session_id = ? AND role = 'user'
353
+ ORDER BY ts ASC LIMIT 1
354
+ `).get(session_id) as MessageRow | undefined;
355
+ if (firstUserRow) {
356
+ const firstUserMsg = rowToMessage(firstUserRow);
357
+ const alreadyKept = keptGroups.some((g) => g.some((m) => m.id === firstUserMsg.id));
358
+ if (!alreadyKept) {
359
+ // Cap to ~2000 tokens of brief (≈8KB) — tasks longer than that
360
+ // should be split anyway; preserving the head is enough to
361
+ // anchor the model to the original ask.
362
+ const FIRST_BRIEF_TOKEN_CAP = 2000;
363
+ let pinned = firstUserMsg;
364
+ if (estimateTokens(firstUserMsg) > FIRST_BRIEF_TOKEN_CAP) {
365
+ pinned = clipMessageToTokens(firstUserMsg, FIRST_BRIEF_TOKEN_CAP);
366
+ }
367
+ keptGroups.unshift([pinned]);
368
+ console.log(`[session-cap] pinned first user message (id=${firstUserMsg.id}) — ${evictedCount} groups evicted, ${used} tokens used / ${tokenBudget} budget`);
369
+ }
370
+ } else {
371
+ console.log(`[session-cap] ${evictedCount} groups evicted, no first user message found to pin`);
372
+ }
373
+ }
337
374
  return keptGroups.flat();
338
375
  }
339
376
 
377
+ /** Clip a message's text content to a soft token cap. Tool blocks are
378
+ * preserved verbatim (they're usually small structural data); only
379
+ * long text blocks get a head-only truncation with a marker. */
380
+ function clipMessageToTokens(m: Message, tokenCap: number): Message {
381
+ const charCap = tokenCap * 4; // matches estimateTokens char/4 heuristic
382
+ const blocks = m.blocks.map((b) => {
383
+ if (b.type === 'text' && b.text.length > charCap) {
384
+ return { ...b, text: b.text.slice(0, charCap) + '\n\n[…task brief truncated to keep in-context]' };
385
+ }
386
+ return b;
387
+ });
388
+ return { ...m, blocks };
389
+ }
390
+
340
391
  export function deleteMessage(id: string): boolean {
341
392
  ensureSchema();
342
393
  const r = db().prepare(`DELETE FROM chat_messages WHERE id = ?`).run(id);
@@ -57,3 +57,17 @@ forge server start --port 4000 # custom port
57
57
  forge server start --dir ~/.forge-test # custom data dir
58
58
  forge --reset-password # reset admin password
59
59
  ```
60
+
61
+ ## Dashboard top bar
62
+
63
+ The top toolbar is split between **at-a-glance signals** (left to right) and a **user menu** (right edge):
64
+
65
+ - **? Help** — opens the in-app Help AI.
66
+ - **Browser ▾** — open an embedded browser (float / right / left dock or external tab).
67
+ - **Tunnel** — start/stop the Cloudflare tunnel + online-count badge.
68
+ - **Alerts** — notifications (task done, pipeline failed, tunnel events).
69
+
70
+ Next to the **Automation** tab in the left-side nav sits a small **Activity** sub-pill — running pipelines + upcoming schedules + recent runs (`▶<running>` `⏰<upcoming>`). Click for a 3-section dropdown with a "view" link that jumps to the run. It lives there because its content is the live read-side of Automation.
71
+ - **User menu (▾)** — `⚙ Settings` + `💬 Chat (web) ↗` at the top (Chat opens in a new tab so the dashboard isn't replaced); then a divider, then the periodic-check screens `📊 Monitor` (background watches, processes, queues), `🔐 Login Status` (connector creds), `💰 Usage` (token/cost analytics), `📜 Logs`, `📱 Mobile View ↗`; then `⏻ Logout`.
72
+
73
+ Periodic-check screens (Monitor / Login Status / Usage) live inside the user menu so the top bar only shows things worth glancing at.