@aion0/forge 0.10.33 → 0.10.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE_NOTES.md +8 -17
- package/components/ActivityPanel.tsx +34 -12
- package/components/SkillsPanel.tsx +4 -4
- package/lib/chat/agent-loop.ts +68 -11
- package/lib/chat/build-memory-context.ts +36 -4
- package/lib/chat/llm/anthropic.ts +30 -1
- package/lib/chat/llm/openai.ts +12 -1
- package/lib/chat/llm/types.ts +11 -0
- package/lib/chat/session-store.ts +52 -1
- package/lib/watch/watch-runner.ts +76 -1
- package/package.json +1 -1
package/RELEASE_NOTES.md
CHANGED
|
@@ -1,23 +1,14 @@
|
|
|
1
|
-
# Forge v0.10.
|
|
1
|
+
# Forge v0.10.34
|
|
2
2
|
|
|
3
|
-
Released: 2026-06-
|
|
3
|
+
Released: 2026-06-03
|
|
4
4
|
|
|
5
|
-
## Changes since v0.10.
|
|
6
|
-
|
|
7
|
-
### Documentation
|
|
8
|
-
- docs: update help-docs for activity pill, marketplace, usage move, watch builtins
|
|
5
|
+
## Changes since v0.10.33
|
|
9
6
|
|
|
10
7
|
### Other
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
- refactor(dashboard): move Usage into user menu next to Monitor/Login Status
|
|
16
|
-
- refactor(marketplace): split category dropdown by group
|
|
17
|
-
- perf(pipeline-view): invalidate cache after mutations
|
|
18
|
-
- fix(activity): view link uses forge:navigate event
|
|
19
|
-
- perf(pipeline-view): module-level SWR cache for meta + per-workflow runs
|
|
20
|
-
- feat(activity): top-right Activity pill — running pipelines + upcoming schedules
|
|
8
|
+
- fix(watch): heuristic terminal detection for all connector pollers
|
|
9
|
+
- ui(activity): segmented pill — running/upcoming/failed each their own color
|
|
10
|
+
- fix(watch): honor poll result's terminal: true regardless of done_match
|
|
11
|
+
- fix(marketplace): scrollbar on long project list in install dropdown
|
|
21
12
|
|
|
22
13
|
|
|
23
|
-
**Full Changelog**: https://github.com/aiwatching/forge/compare/v0.10.
|
|
14
|
+
**Full Changelog**: https://github.com/aiwatching/forge/compare/v0.10.33...v0.10.34
|
|
@@ -123,25 +123,47 @@ export default function ActivityPanel() {
|
|
|
123
123
|
const runningCount = summary?.running.length ?? 0;
|
|
124
124
|
const upcomingCount = summary?.upcoming.length ?? 0;
|
|
125
125
|
const recentFailed = (summary?.recent ?? []).filter((r) => r.status === 'failed').length;
|
|
126
|
+
const hasAny = runningCount + upcomingCount + recentFailed > 0;
|
|
126
127
|
|
|
127
|
-
// Pill
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
128
|
+
// Pill border tint picks the most urgent state:
|
|
129
|
+
// failed (red) > running (blue) > else dim.
|
|
130
|
+
const borderTint = recentFailed > 0
|
|
131
|
+
? 'border-red-500/50'
|
|
132
|
+
: runningCount > 0
|
|
133
|
+
? 'border-blue-500/50'
|
|
134
|
+
: 'border-[var(--border)]';
|
|
132
135
|
|
|
133
136
|
return (
|
|
134
137
|
<div className="relative" ref={panelRef}>
|
|
135
138
|
<button
|
|
136
139
|
onClick={() => setOpen((o) => !o)}
|
|
137
|
-
className={`text-[10px] px-2 py-0.5 rounded border
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
title="Activity — running pipelines + upcoming schedules"
|
|
140
|
+
className={`text-[10px] px-2 py-0.5 rounded border ${borderTint} flex items-center gap-2.5
|
|
141
|
+
text-[var(--text-secondary)] hover:text-[var(--text-primary)]`}
|
|
142
|
+
title="Activity — running pipelines · upcoming schedules · recent failures"
|
|
141
143
|
>
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
144
|
+
{!hasAny ? (
|
|
145
|
+
<span className="text-[var(--text-secondary)]">✓</span>
|
|
146
|
+
) : (
|
|
147
|
+
<>
|
|
148
|
+
{runningCount > 0 && (
|
|
149
|
+
<span className="inline-flex items-baseline text-blue-400" title={`${runningCount} running`}>
|
|
150
|
+
<span className="text-[7px] mr-0.5">●</span>
|
|
151
|
+
<span className="font-semibold tabular-nums">{runningCount}</span>
|
|
152
|
+
</span>
|
|
153
|
+
)}
|
|
154
|
+
{upcomingCount > 0 && (
|
|
155
|
+
<span className="inline-flex items-baseline text-[var(--text-secondary)]" title={`${upcomingCount} upcoming`}>
|
|
156
|
+
<span className="text-[8px] mr-0.5">◷</span>
|
|
157
|
+
<span className="font-semibold tabular-nums">{upcomingCount}</span>
|
|
158
|
+
</span>
|
|
159
|
+
)}
|
|
160
|
+
{recentFailed > 0 && (
|
|
161
|
+
<span className="inline-flex items-baseline text-red-400" title={`${recentFailed} recently failed`}>
|
|
162
|
+
<span className="text-[8px] mr-0.5">✕</span>
|
|
163
|
+
<span className="font-semibold tabular-nums">{recentFailed}</span>
|
|
164
|
+
</span>
|
|
165
|
+
)}
|
|
166
|
+
</>
|
|
145
167
|
)}
|
|
146
168
|
</button>
|
|
147
169
|
|
|
@@ -732,7 +732,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
|
|
|
732
732
|
{installTarget.skill === itemName && installTarget.show && (
|
|
733
733
|
<>
|
|
734
734
|
<div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
|
|
735
|
-
<div className="absolute right-0 top-7 w-[200px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
|
|
735
|
+
<div className="absolute right-0 top-7 w-[200px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
|
|
736
736
|
<button
|
|
737
737
|
onClick={async () => {
|
|
738
738
|
const res = await fetch('/api/skills/local', { method: 'POST', headers: { 'Content-Type': 'application/json' },
|
|
@@ -743,7 +743,7 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
|
|
|
743
743
|
setInstallTarget({ skill: '', show: false });
|
|
744
744
|
fetchSkills();
|
|
745
745
|
}}
|
|
746
|
-
className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)]"
|
|
746
|
+
className="w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] text-[var(--text-primary)] sticky top-0 bg-[var(--bg-secondary)]"
|
|
747
747
|
>Global (~/.claude)</button>
|
|
748
748
|
<div className="border-t border-[var(--border)] my-0.5" />
|
|
749
749
|
{projects.map(p => (
|
|
@@ -792,10 +792,10 @@ export default function SkillsPanel({ projectFilter }: { projectFilter?: string
|
|
|
792
792
|
{installTarget.skill === skill.name && installTarget.show && (
|
|
793
793
|
<>
|
|
794
794
|
<div className="fixed inset-0 z-40" onClick={() => setInstallTarget({ skill: '', show: false })} />
|
|
795
|
-
<div className="absolute right-0 top-7 w-[180px] bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
|
|
795
|
+
<div className="absolute right-0 top-7 w-[180px] max-h-[60vh] overflow-y-auto bg-[var(--bg-secondary)] border border-[var(--border)] rounded-lg shadow-xl z-50 py-1">
|
|
796
796
|
<button
|
|
797
797
|
onClick={() => install(skill.name, 'global')}
|
|
798
|
-
className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] ${
|
|
798
|
+
className={`w-full text-left text-[10px] px-3 py-1.5 hover:bg-[var(--bg-tertiary)] sticky top-0 bg-[var(--bg-secondary)] ${
|
|
799
799
|
skill.installedGlobal ? 'text-[var(--green)]' : 'text-[var(--text-primary)]'
|
|
800
800
|
}`}
|
|
801
801
|
>
|
package/lib/chat/agent-loop.ts
CHANGED
|
@@ -431,7 +431,7 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
|
|
|
431
431
|
memStore.listBlocks({ pinned: true, scope: 'both' }),
|
|
432
432
|
memStore.listBlocks({ scope: 'both' }),
|
|
433
433
|
memStore.search(args.userText, 8),
|
|
434
|
-
buildMemoryContext({ store: memStore, currentUserMessage: args.userText }),
|
|
434
|
+
buildMemoryContext({ store: memStore, currentUserMessage: args.userText, currentSessionId: args.sessionId }),
|
|
435
435
|
]);
|
|
436
436
|
const pinnedBlocks = bp.status === 'fulfilled' ? bp.value : [];
|
|
437
437
|
const allBlocks = ba.status === 'fulfilled' ? ba.value : [];
|
|
@@ -497,17 +497,38 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
|
|
|
497
497
|
...memTools.map((m) => m.def),
|
|
498
498
|
watchTool.def,
|
|
499
499
|
];
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
500
|
+
const builtinToolDefs: LlmTool[] = builtinDefsAll.map((t) => ({
|
|
501
|
+
name: t.name,
|
|
502
|
+
description: t.description,
|
|
503
|
+
input_schema: t.input_schema,
|
|
504
|
+
}));
|
|
505
|
+
|
|
506
|
+
// ── Sticky narrow helper ─────────────────────────────────────────
|
|
507
|
+
// After a turn that called connector tools, on the NEXT turn we
|
|
508
|
+
// restrict tool list to ONLY the connectors that were used. This
|
|
509
|
+
// shrinks tools from 99 → ~10 in a typical mantis or nac flow,
|
|
510
|
+
// saving ~18K tokens per turn AND letting the model focus its
|
|
511
|
+
// attention (helps local models avoid hallucination).
|
|
512
|
+
function pickConnectorNamespacesUsed(blocks: ContentBlock[]): Set<string> {
|
|
513
|
+
const ns = new Set<string>();
|
|
514
|
+
for (const b of blocks) {
|
|
515
|
+
if (b.type === 'tool_use' && b.name.includes('.')) {
|
|
516
|
+
ns.add(b.name.split('.')[0]!);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return ns;
|
|
520
|
+
}
|
|
521
|
+
const sessionSystemPrompt = session.system_prompt;
|
|
522
|
+
function buildSystem(tools: LlmTool[]): string {
|
|
523
|
+
let s = buildSystemPrompt(tools, builtinDefsAll, sessionSystemPrompt);
|
|
524
|
+
if (narrowDirective) s += narrowDirective;
|
|
525
|
+
return s;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
const baseConnectorTools = connectorTools; // post-initial-narrow snapshot
|
|
529
|
+
let allTools: LlmTool[] = [...builtinToolDefs, ...baseConnectorTools];
|
|
508
530
|
|
|
509
|
-
let system =
|
|
510
|
-
if (narrowDirective) system += narrowDirective;
|
|
531
|
+
let system = buildSystem(baseConnectorTools);
|
|
511
532
|
if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
|
|
512
533
|
if (memStore.enabled) {
|
|
513
534
|
const searchHint = memStore.kind === 'local'
|
|
@@ -538,9 +559,39 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
|
|
|
538
559
|
return { ok: false, error: 'empty history' };
|
|
539
560
|
}
|
|
540
561
|
|
|
562
|
+
// ── Sticky narrow: shrink tools to only what last turn actually used.
|
|
563
|
+
// First iteration: keep the user-mention-narrowed list. Iter 2+:
|
|
564
|
+
// if previous assistant turn called e.g. mantis.get_bug, restrict
|
|
565
|
+
// to mantis.* only — local models behave much better with focused
|
|
566
|
+
// tool set, and we save ~18K tokens per turn.
|
|
567
|
+
if (iter > 1 && assistantBlocksAccum.length > 0) {
|
|
568
|
+
const usedNs = pickConnectorNamespacesUsed(assistantBlocksAccum);
|
|
569
|
+
if (usedNs.size > 0) {
|
|
570
|
+
const narrowedConn = baseConnectorTools.filter((t) =>
|
|
571
|
+
usedNs.has(t.name.split('.')[0]!));
|
|
572
|
+
if (narrowedConn.length > 0 && narrowedConn.length < baseConnectorTools.length) {
|
|
573
|
+
allTools = [...builtinToolDefs, ...narrowedConn];
|
|
574
|
+
system = buildSystem(narrowedConn);
|
|
575
|
+
if (memContext) system += '\n\n─── Memory context (auto-loaded) ───\n' + memContext;
|
|
576
|
+
console.log(`[chat] sticky narrow → ${[...usedNs].join(',')} (${narrowedConn.length}/${baseConnectorTools.length} connector tools)`);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
541
581
|
assistantBlocksAccum = [];
|
|
542
582
|
let currentTextBuf = '';
|
|
543
583
|
|
|
584
|
+
// ── Token composition log (input side, BEFORE the call) ──
|
|
585
|
+
// Heuristic char/4. Lets you correlate later with the provider's
|
|
586
|
+
// real usage.input_tokens — if the gap widens turn-over-turn, the
|
|
587
|
+
// memory/tools blob is silently growing.
|
|
588
|
+
const _systemTok = Math.ceil(system.length / 4);
|
|
589
|
+
const _memCtxTok = Math.ceil(memContext.length / 4);
|
|
590
|
+
const _toolsTok = Math.ceil(JSON.stringify(allTools).length / 4);
|
|
591
|
+
const _historyTok = history.reduce((s, m) => s + estimateTokens(m), 0);
|
|
592
|
+
const _historyMsgs = history.length;
|
|
593
|
+
console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} est_in=${_systemTok + _historyTok + _toolsTok} system=${_systemTok} history=${_historyTok}(${_historyMsgs}msgs) memory=${_memCtxTok} tools=${_toolsTok}`);
|
|
594
|
+
|
|
544
595
|
const result = await streamLlm(
|
|
545
596
|
{
|
|
546
597
|
provider: provider.type,
|
|
@@ -563,6 +614,12 @@ export async function runTurn(args: RunTurnArgs): Promise<{ ok: boolean; error?:
|
|
|
563
614
|
},
|
|
564
615
|
);
|
|
565
616
|
|
|
617
|
+
// ── Real usage from the provider (when reported) ──
|
|
618
|
+
if (result.usage) {
|
|
619
|
+
const u = result.usage;
|
|
620
|
+
console.log(`[chat-tokens] session=${args.sessionId} turn=${iter} REAL in=${u.inputTokens ?? '?'} out=${u.outputTokens ?? '?'} cache_read=${u.cacheReadTokens ?? 0} cache_create=${u.cacheCreationTokens ?? 0} stop=${result.stopReason}`);
|
|
621
|
+
}
|
|
622
|
+
|
|
566
623
|
lastStop = result.stopReason;
|
|
567
624
|
assistantBlocksAccum = result.content;
|
|
568
625
|
|
|
@@ -31,6 +31,10 @@ export interface BuildMemoryContextOpts {
|
|
|
31
31
|
/** Prefixes that mark internal-only blocks (cursor / health / etc).
|
|
32
32
|
* Defaults to lib/memory/keys.INTERNAL_KEY_PREFIXES. */
|
|
33
33
|
excludeKeyPrefixes?: readonly string[];
|
|
34
|
+
/** Current chat session id. When set, blocks keyed `chat:<otherId>:*`
|
|
35
|
+
* are dropped — other sessions' summaries are noise in this chat and
|
|
36
|
+
* were the primary source of "old data bleeding into new chat". */
|
|
37
|
+
currentSessionId?: string;
|
|
34
38
|
}
|
|
35
39
|
|
|
36
40
|
export interface BuildMemoryContextResult {
|
|
@@ -46,18 +50,22 @@ export async function buildMemoryContext(opts: BuildMemoryContextOpts): Promise<
|
|
|
46
50
|
topK = 6,
|
|
47
51
|
maxBlocks = 50,
|
|
48
52
|
excludeKeyPrefixes = INTERNAL_KEY_PREFIXES,
|
|
53
|
+
currentSessionId,
|
|
49
54
|
} = opts;
|
|
50
55
|
|
|
51
|
-
const blocks =
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
const blocks = dropForeignChat(
|
|
57
|
+
filterInternal(
|
|
58
|
+
await safe(() => store.listBlocks({ pinned: true }), [] as MemoryBlock[]),
|
|
59
|
+
excludeKeyPrefixes,
|
|
60
|
+
),
|
|
61
|
+
currentSessionId,
|
|
54
62
|
).slice(0, maxBlocks);
|
|
55
63
|
|
|
56
64
|
const q = (currentUserMessage || '').trim();
|
|
57
65
|
let hits: SearchHit[] = [];
|
|
58
66
|
if (q) {
|
|
59
67
|
const rawHits = await safe(() => store.search(q, topK), [] as SearchHit[]);
|
|
60
|
-
hits = filterInternalHits(rawHits, excludeKeyPrefixes);
|
|
68
|
+
hits = dropForeignChatHits(filterInternalHits(rawHits, excludeKeyPrefixes), currentSessionId);
|
|
61
69
|
}
|
|
62
70
|
|
|
63
71
|
return { text: renderMemoryContext(blocks, hits), blocks, hits };
|
|
@@ -81,6 +89,30 @@ function filterInternalHits(hits: SearchHit[], prefixes: readonly string[]): Sea
|
|
|
81
89
|
});
|
|
82
90
|
}
|
|
83
91
|
|
|
92
|
+
/** Strip `chat:<otherSessionId>:*` blocks. Summary blocks contain raw
|
|
93
|
+
* past-conversation excerpts; surfacing them in a different chat is
|
|
94
|
+
* what made "new empty chat" leak old session content. Facts
|
|
95
|
+
* (`fact:*`) and any non-chat-prefixed pinned blocks stay — they're
|
|
96
|
+
* the intentional cross-session signal. No-op if no sessionId given. */
|
|
97
|
+
function dropForeignChat(blocks: MemoryBlock[], sessionId?: string): MemoryBlock[] {
|
|
98
|
+
if (!sessionId) return blocks;
|
|
99
|
+
return blocks.filter((b) => isOwnChatOrNotChat(b.key, sessionId));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function dropForeignChatHits(hits: SearchHit[], sessionId?: string): SearchHit[] {
|
|
103
|
+
if (!sessionId) return hits;
|
|
104
|
+
return hits.filter((h) => {
|
|
105
|
+
if (!h.id?.startsWith('block:')) return true; // Graphiti hit, no key to inspect — keep
|
|
106
|
+
return isOwnChatOrNotChat(h.id.slice('block:'.length), sessionId);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function isOwnChatOrNotChat(key: string, sessionId: string): boolean {
|
|
111
|
+
if (!key.startsWith('chat:')) return true;
|
|
112
|
+
// key shape: chat:<sessionId>:summary:<ts> → split[1] === sessionId
|
|
113
|
+
return key.split(':', 2)[1] === sessionId;
|
|
114
|
+
}
|
|
115
|
+
|
|
84
116
|
async function safe<T>(fn: () => Promise<T>, fallback: T): Promise<T> {
|
|
85
117
|
try {
|
|
86
118
|
return await fn();
|
|
@@ -132,14 +132,30 @@ export const anthropicAdapter: LlmAdapter = {
|
|
|
132
132
|
// execute — chat owns dispatch (destructive confirm, browser bridge,
|
|
133
133
|
// memory tools etc all live in agent-loop). Setting stopWhen with
|
|
134
134
|
// stepCountIs(1) prevents the SDK from auto-rolling a second step.
|
|
135
|
+
// Build tool record. Mark the LAST tool with cache_control so
|
|
136
|
+
// Anthropic-family backends (or LiteLLM proxies that forward it)
|
|
137
|
+
// cache the system+tools prefix. Subsequent turns within the 5-min
|
|
138
|
+
// TTL pay 0.1× input price for the cached portion instead of 1×.
|
|
139
|
+
// Backends that don't honor cache_control silently ignore it,
|
|
140
|
+
// costing nothing.
|
|
141
|
+
const toolNames = req.tools.map((t) => t.name);
|
|
142
|
+
const lastName = toolNames[toolNames.length - 1];
|
|
135
143
|
const tools: Record<string, any> = {};
|
|
136
144
|
for (const t of req.tools) {
|
|
137
145
|
tools[encodeToolName(t.name)] = {
|
|
138
146
|
description: t.description,
|
|
139
147
|
inputSchema: jsonSchema(t.input_schema),
|
|
148
|
+
...(t.name === lastName ? {
|
|
149
|
+
providerOptions: {
|
|
150
|
+
anthropic: { cacheControl: { type: 'ephemeral' } },
|
|
151
|
+
},
|
|
152
|
+
} : {}),
|
|
140
153
|
};
|
|
141
154
|
}
|
|
142
155
|
|
|
156
|
+
// Single cache breakpoint at end-of-tools — Anthropic caches the
|
|
157
|
+
// prefix (system + tools) since system comes first in the wire
|
|
158
|
+
// format. No need to add a separate marker on system.
|
|
143
159
|
const result = streamText({
|
|
144
160
|
model: client(req.model),
|
|
145
161
|
system: req.system,
|
|
@@ -169,6 +185,19 @@ export const anthropicAdapter: LlmAdapter = {
|
|
|
169
185
|
if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
|
|
170
186
|
|
|
171
187
|
const finishReason = await result.finishReason;
|
|
172
|
-
|
|
188
|
+
let usage;
|
|
189
|
+
try {
|
|
190
|
+
const u: any = await result.usage;
|
|
191
|
+
if (u) {
|
|
192
|
+
usage = {
|
|
193
|
+
inputTokens: u.inputTokens ?? u.promptTokens,
|
|
194
|
+
outputTokens: u.outputTokens ?? u.completionTokens,
|
|
195
|
+
cacheReadTokens: u.cachedInputTokens ?? u.cacheReadInputTokens,
|
|
196
|
+
cacheCreationTokens: u.cacheCreationInputTokens,
|
|
197
|
+
totalTokens: u.totalTokens,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
} catch {}
|
|
201
|
+
return { stopReason: mapStop(finishReason), content, usage };
|
|
173
202
|
},
|
|
174
203
|
};
|
package/lib/chat/llm/openai.ts
CHANGED
|
@@ -108,6 +108,17 @@ export const openaiAdapter: LlmAdapter = {
|
|
|
108
108
|
if (textBuf.length > 0) content.push({ type: 'text', text: textBuf });
|
|
109
109
|
|
|
110
110
|
const finishReason = await result.finishReason;
|
|
111
|
-
|
|
111
|
+
let usage;
|
|
112
|
+
try {
|
|
113
|
+
const u: any = await result.usage;
|
|
114
|
+
if (u) {
|
|
115
|
+
usage = {
|
|
116
|
+
inputTokens: u.inputTokens ?? u.promptTokens,
|
|
117
|
+
outputTokens: u.outputTokens ?? u.completionTokens,
|
|
118
|
+
totalTokens: u.totalTokens,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
} catch {}
|
|
122
|
+
return { stopReason: mapStop(finishReason), content, usage };
|
|
112
123
|
},
|
|
113
124
|
};
|
package/lib/chat/llm/types.ts
CHANGED
|
@@ -21,9 +21,20 @@ export interface LlmCallbacks {
|
|
|
21
21
|
|
|
22
22
|
export type StopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'refusal' | 'error' | 'other';
|
|
23
23
|
|
|
24
|
+
export interface LlmTurnUsage {
|
|
25
|
+
inputTokens?: number;
|
|
26
|
+
outputTokens?: number;
|
|
27
|
+
cacheReadTokens?: number;
|
|
28
|
+
cacheCreationTokens?: number;
|
|
29
|
+
totalTokens?: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
24
32
|
export interface LlmTurnResult {
|
|
25
33
|
stopReason: StopReason;
|
|
26
34
|
content: ContentBlock[];
|
|
35
|
+
/** Token usage from the provider, if reported. May be partially-filled
|
|
36
|
+
* or absent for proxies that don't expose it. */
|
|
37
|
+
usage?: LlmTurnUsage;
|
|
27
38
|
}
|
|
28
39
|
|
|
29
40
|
export interface LlmRequest {
|
|
@@ -327,16 +327,67 @@ export function listMessagesCapped(
|
|
|
327
327
|
// loop (provider will see a single message — still valid).
|
|
328
328
|
const keptGroups: Message[][] = [];
|
|
329
329
|
let used = 0;
|
|
330
|
+
let evictedCount = 0;
|
|
330
331
|
for (let i = groups.length - 1; i >= 0; i--) {
|
|
331
332
|
const g = groups[i];
|
|
332
333
|
const cost = g.reduce((s, m) => s + estimateTokens(m), 0);
|
|
333
|
-
if (keptGroups.length > 0 && used + cost > tokenBudget)
|
|
334
|
+
if (keptGroups.length > 0 && used + cost > tokenBudget) {
|
|
335
|
+
evictedCount = i + 1; // groups [0..i] would have been evicted
|
|
336
|
+
break;
|
|
337
|
+
}
|
|
334
338
|
keptGroups.unshift(g);
|
|
335
339
|
used += cost;
|
|
336
340
|
}
|
|
341
|
+
|
|
342
|
+
// ── Pin the SESSION's first user message (task brief) ──────────
|
|
343
|
+
// Even if eviction would normally drop it, the user's opening prompt
|
|
344
|
+
// defines the task. Losing it causes the model to lose track of
|
|
345
|
+
// what was asked — symptom: model writes "summarize all X" and
|
|
346
|
+
// hallucinates instead of processing the specific list the user
|
|
347
|
+
// gave. Re-fetch the absolute first user message, prepend if not
|
|
348
|
+
// already in keptGroups. Cap its tokens so a truly enormous brief
|
|
349
|
+
// can't break the call — keep first ~2k tokens.
|
|
350
|
+
if (evictedCount > 0) {
|
|
351
|
+
const firstUserRow = db().prepare(`
|
|
352
|
+
SELECT * FROM chat_messages WHERE session_id = ? AND role = 'user'
|
|
353
|
+
ORDER BY ts ASC LIMIT 1
|
|
354
|
+
`).get(session_id) as MessageRow | undefined;
|
|
355
|
+
if (firstUserRow) {
|
|
356
|
+
const firstUserMsg = rowToMessage(firstUserRow);
|
|
357
|
+
const alreadyKept = keptGroups.some((g) => g.some((m) => m.id === firstUserMsg.id));
|
|
358
|
+
if (!alreadyKept) {
|
|
359
|
+
// Cap to ~2000 tokens of brief (≈8KB) — tasks longer than that
|
|
360
|
+
// should be split anyway; preserving the head is enough to
|
|
361
|
+
// anchor the model to the original ask.
|
|
362
|
+
const FIRST_BRIEF_TOKEN_CAP = 2000;
|
|
363
|
+
let pinned = firstUserMsg;
|
|
364
|
+
if (estimateTokens(firstUserMsg) > FIRST_BRIEF_TOKEN_CAP) {
|
|
365
|
+
pinned = clipMessageToTokens(firstUserMsg, FIRST_BRIEF_TOKEN_CAP);
|
|
366
|
+
}
|
|
367
|
+
keptGroups.unshift([pinned]);
|
|
368
|
+
console.log(`[session-cap] pinned first user message (id=${firstUserMsg.id}) — ${evictedCount} groups evicted, ${used} tokens used / ${tokenBudget} budget`);
|
|
369
|
+
}
|
|
370
|
+
} else {
|
|
371
|
+
console.log(`[session-cap] ${evictedCount} groups evicted, no first user message found to pin`);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
337
374
|
return keptGroups.flat();
|
|
338
375
|
}
|
|
339
376
|
|
|
377
|
+
/** Clip a message's text content to a soft token cap. Tool blocks are
|
|
378
|
+
* preserved verbatim (they're usually small structural data); only
|
|
379
|
+
* long text blocks get a head-only truncation with a marker. */
|
|
380
|
+
function clipMessageToTokens(m: Message, tokenCap: number): Message {
|
|
381
|
+
const charCap = tokenCap * 4; // matches estimateTokens char/4 heuristic
|
|
382
|
+
const blocks = m.blocks.map((b) => {
|
|
383
|
+
if (b.type === 'text' && b.text.length > charCap) {
|
|
384
|
+
return { ...b, text: b.text.slice(0, charCap) + '\n\n[…task brief truncated to keep in-context]' };
|
|
385
|
+
}
|
|
386
|
+
return b;
|
|
387
|
+
});
|
|
388
|
+
return { ...m, blocks };
|
|
389
|
+
}
|
|
390
|
+
|
|
340
391
|
export function deleteMessage(id: string): boolean {
|
|
341
392
|
ensureSchema();
|
|
342
393
|
const r = db().prepare(`DELETE FROM chat_messages WHERE id = ?`).run(id);
|
|
@@ -40,6 +40,45 @@ function parseResult(content: string): any {
|
|
|
40
40
|
try { return JSON.parse(content); } catch { return { _raw: content }; }
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
/** Heuristic: spot common "this work is finished" shapes from a poll
|
|
44
|
+
* result, regardless of whether the connector author thought to set
|
|
45
|
+
* `terminal: true` or pre-declare done conditions. Walks well-known
|
|
46
|
+
* state-bearing fields (state / status / phase / result / done /
|
|
47
|
+
* finished / complete / completed) and matches their values against
|
|
48
|
+
* a curated vocabulary used across CI, Jenkins, k8s, generic build
|
|
49
|
+
* systems, etc.
|
|
50
|
+
* Returns { failure } when a hit is found, null otherwise. Intended
|
|
51
|
+
* to run AFTER user's explicit done_match/done_path, so a caller who
|
|
52
|
+
* configured "done when status == running" (rare but legal) still
|
|
53
|
+
* wins. */
|
|
54
|
+
function detectTerminalState(obj: any): { failure: boolean; source: string; value: string } | null {
|
|
55
|
+
if (!obj || typeof obj !== 'object') return null;
|
|
56
|
+
// Boolean done-ish flags
|
|
57
|
+
for (const f of ['done', 'finished', 'complete', 'completed']) {
|
|
58
|
+
if (truthy(obj[f])) return { failure: false, source: f, value: 'true' };
|
|
59
|
+
}
|
|
60
|
+
// State-bearing fields with a terminal vocabulary
|
|
61
|
+
const fields = ['state', 'status', 'phase', 'result', 'conclusion', 'lifecycle_state'];
|
|
62
|
+
const failureWords = new Set([
|
|
63
|
+
'failed', 'failure', 'error', 'errored', 'cancelled', 'canceled',
|
|
64
|
+
'aborted', 'killed', 'terminated', 'timeout', 'timed_out', 'rejected',
|
|
65
|
+
'unstable', 'broken',
|
|
66
|
+
]);
|
|
67
|
+
const successWords = new Set([
|
|
68
|
+
'done', 'success', 'succeeded', 'complete', 'completed', 'finished',
|
|
69
|
+
'passed', 'ok', 'green', 'healthy',
|
|
70
|
+
]);
|
|
71
|
+
for (const f of fields) {
|
|
72
|
+
const raw = obj[f];
|
|
73
|
+
if (raw == null) continue;
|
|
74
|
+
const v = String(raw).toLowerCase().trim();
|
|
75
|
+
if (!v) continue;
|
|
76
|
+
if (failureWords.has(v)) return { failure: true, source: f, value: v };
|
|
77
|
+
if (successWords.has(v)) return { failure: false, source: f, value: v };
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
|
|
43
82
|
const g = globalThis as any;
|
|
44
83
|
|
|
45
84
|
export function startWatchRunner(hooks: WatchRunnerHooks = {}): void {
|
|
@@ -120,7 +159,27 @@ export function startWatchRunner(hooks: WatchRunnerHooks = {}): void {
|
|
|
120
159
|
if (w.fail_path && truthy(getPath(obj, w.fail_path))) {
|
|
121
160
|
return finish(w, 'failed', obj, `${w.label}: failure condition met.`);
|
|
122
161
|
}
|
|
123
|
-
//
|
|
162
|
+
// Hard terminal check — if the poll tool itself says "this is a
|
|
163
|
+
// terminal state" (cancelled / failed / done / etc.), believe it
|
|
164
|
+
// regardless of the user-configured done condition. Without this,
|
|
165
|
+
// a watch on get_pipeline_status with done_match={status:"done"}
|
|
166
|
+
// would keep polling after the user cancels the pipeline, because
|
|
167
|
+
// status="cancelled" never matches "done" — wasting polls until
|
|
168
|
+
// max_polls / timeout. The builtin status tools (get_pipeline_status,
|
|
169
|
+
// get_task_status) all set obj.terminal = true on cancelled/failed
|
|
170
|
+
// too, so honoring it here drops the watch the moment the user
|
|
171
|
+
// intervenes.
|
|
172
|
+
if (truthy(getPath(obj, 'terminal'))) {
|
|
173
|
+
const statusVal = String(getPath(obj, 'status') || '').toLowerCase();
|
|
174
|
+
const isFailureLike = statusVal === 'failed' || statusVal === 'cancelled';
|
|
175
|
+
return finish(
|
|
176
|
+
w,
|
|
177
|
+
isFailureLike ? 'failed' : 'done',
|
|
178
|
+
obj,
|
|
179
|
+
`${w.label}: ${statusVal || 'reached a terminal state'}.`,
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
// done check (user-configured)
|
|
124
183
|
let done = false;
|
|
125
184
|
if (w.done_match) {
|
|
126
185
|
const v = getPath(obj, w.done_match.path);
|
|
@@ -132,6 +191,22 @@ export function startWatchRunner(hooks: WatchRunnerHooks = {}): void {
|
|
|
132
191
|
if (done) {
|
|
133
192
|
return finish(w, 'done', obj, `${w.label}: done.`);
|
|
134
193
|
}
|
|
194
|
+
// Heuristic terminal detection — fallback for connector pollers
|
|
195
|
+
// that don't set obj.terminal AND whose authors didn't anticipate
|
|
196
|
+
// a particular done condition. If the poll result has a common
|
|
197
|
+
// "I'm finished" shape (state/status/phase/result with a known
|
|
198
|
+
// terminal word, or done:true / finished:true), trust it. User's
|
|
199
|
+
// explicit done_match/done_path runs first (above), so a watch
|
|
200
|
+
// wanting "done when status==running" still works as intended.
|
|
201
|
+
const term = detectTerminalState(obj);
|
|
202
|
+
if (term) {
|
|
203
|
+
return finish(
|
|
204
|
+
w,
|
|
205
|
+
term.failure ? 'failed' : 'done',
|
|
206
|
+
obj,
|
|
207
|
+
`${w.label}: detected ${term.source}=${term.value} — closing watch.`,
|
|
208
|
+
);
|
|
209
|
+
}
|
|
135
210
|
// not done — bound by polls / timeout, else reschedule
|
|
136
211
|
if (polls >= w.max_polls || now - w.created_at > w.timeout_sec * 1000) {
|
|
137
212
|
return finish(w, 'timed_out', obj, `${w.label}: not done within ${w.max_polls} polls / ${w.timeout_sec}s — please verify manually.`);
|
package/package.json
CHANGED