@synergenius/flow-weaver-pack-weaver 0.9.193 → 0.9.196
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/ai-client.d.ts +5 -0
- package/dist/bot/ai-client.d.ts.map +1 -1
- package/dist/bot/ai-client.js +43 -0
- package/dist/bot/ai-client.js.map +1 -1
- package/dist/bot/assistant-core.js +2 -2
- package/dist/bot/assistant-core.js.map +1 -1
- package/dist/bot/behavior-defaults.d.ts +3 -1
- package/dist/bot/behavior-defaults.d.ts.map +1 -1
- package/dist/bot/behavior-defaults.js +7 -0
- package/dist/bot/behavior-defaults.js.map +1 -1
- package/dist/bot/capability-registry.js +3 -3
- package/dist/bot/capability-registry.js.map +1 -1
- package/dist/bot/context-compactor.d.ts +35 -0
- package/dist/bot/context-compactor.d.ts.map +1 -0
- package/dist/bot/context-compactor.js +130 -0
- package/dist/bot/context-compactor.js.map +1 -0
- package/dist/bot/dream-task.d.ts +45 -0
- package/dist/bot/dream-task.d.ts.map +1 -0
- package/dist/bot/dream-task.js +125 -0
- package/dist/bot/dream-task.js.map +1 -0
- package/dist/bot/knowledge-store.d.ts +9 -0
- package/dist/bot/knowledge-store.d.ts.map +1 -1
- package/dist/bot/knowledge-store.js +21 -0
- package/dist/bot/knowledge-store.js.map +1 -1
- package/dist/bot/memory-extraction-worker.d.ts +14 -0
- package/dist/bot/memory-extraction-worker.d.ts.map +1 -0
- package/dist/bot/memory-extraction-worker.js +42 -0
- package/dist/bot/memory-extraction-worker.js.map +1 -0
- package/dist/bot/memory-extractor.d.ts +27 -0
- package/dist/bot/memory-extractor.d.ts.map +1 -0
- package/dist/bot/memory-extractor.js +155 -0
- package/dist/bot/memory-extractor.js.map +1 -0
- package/dist/bot/operations.d.ts +3 -1
- package/dist/bot/operations.d.ts.map +1 -1
- package/dist/bot/operations.js +3 -1
- package/dist/bot/operations.js.map +1 -1
- package/dist/bot/post-turn-hooks.d.ts +57 -0
- package/dist/bot/post-turn-hooks.d.ts.map +1 -0
- package/dist/bot/post-turn-hooks.js +108 -0
- package/dist/bot/post-turn-hooks.js.map +1 -0
- package/dist/bot/profile-types.d.ts +16 -0
- package/dist/bot/profile-types.d.ts.map +1 -1
- package/dist/bot/swarm-controller.d.ts +7 -0
- package/dist/bot/swarm-controller.d.ts.map +1 -1
- package/dist/bot/swarm-controller.js +121 -1
- package/dist/bot/swarm-controller.js.map +1 -1
- package/dist/bot/task-prompt-builder.js +35 -21
- package/dist/bot/task-prompt-builder.js.map +1 -1
- package/dist/bot/task-types.d.ts +13 -0
- package/dist/bot/task-types.d.ts.map +1 -1
- package/dist/bot/tool-registry.d.ts +13 -0
- package/dist/bot/tool-registry.d.ts.map +1 -1
- package/dist/bot/tool-registry.js +80 -0
- package/dist/bot/tool-registry.js.map +1 -1
- package/dist/bot/types.d.ts +2 -0
- package/dist/bot/types.d.ts.map +1 -1
- package/dist/node-types/agent-execute.d.ts.map +1 -1
- package/dist/node-types/agent-execute.js +38 -17
- package/dist/node-types/agent-execute.js.map +1 -1
- package/dist/node-types/build-context.d.ts +4 -3
- package/dist/node-types/build-context.d.ts.map +1 -1
- package/dist/node-types/build-context.js +37 -6
- package/dist/node-types/build-context.js.map +1 -1
- package/dist/node-types/receive-task.d.ts +2 -1
- package/dist/node-types/receive-task.d.ts.map +1 -1
- package/dist/node-types/receive-task.js +4 -1
- package/dist/node-types/receive-task.js.map +1 -1
- package/dist/node-types/review-result.d.ts +9 -0
- package/dist/node-types/review-result.d.ts.map +1 -1
- package/dist/node-types/review-result.js +20 -5
- package/dist/node-types/review-result.js.map +1 -1
- package/dist/node-types/verify-task.d.ts +22 -0
- package/dist/node-types/verify-task.d.ts.map +1 -0
- package/dist/node-types/verify-task.js +143 -0
- package/dist/node-types/verify-task.js.map +1 -0
- package/dist/ui/capability-editor.js +3 -3
- package/dist/ui/profile-editor.js +3 -3
- package/dist/ui/swarm-dashboard.js +3 -3
- package/dist/workflows/weaver-agent.d.ts +3 -3
- package/dist/workflows/weaver-agent.d.ts.map +1 -1
- package/dist/workflows/weaver-agent.js +267 -18
- package/dist/workflows/weaver-agent.js.map +1 -1
- package/dist/workflows/weaver-bot-batch.d.ts +3 -3
- package/dist/workflows/weaver-bot-batch.d.ts.map +1 -1
- package/dist/workflows/weaver-bot-batch.js +280 -24
- package/dist/workflows/weaver-bot-batch.js.map +1 -1
- package/dist/workflows/weaver-bot.d.ts +2 -0
- package/dist/workflows/weaver-bot.d.ts.map +1 -1
- package/dist/workflows/weaver-bot.js +15 -10
- package/dist/workflows/weaver-bot.js.map +1 -1
- package/flowweaver.manifest.json +1 -1
- package/package.json +3 -3
- package/src/bot/ai-client.ts +54 -0
- package/src/bot/assistant-core.ts +2 -2
- package/src/bot/behavior-defaults.ts +9 -1
- package/src/bot/capability-registry.ts +3 -3
- package/src/bot/context-compactor.ts +147 -0
- package/src/bot/dream-task.ts +167 -0
- package/src/bot/knowledge-store.ts +27 -0
- package/src/bot/memory-extraction-worker.ts +58 -0
- package/src/bot/memory-extractor.ts +213 -0
- package/src/bot/operations.ts +3 -1
- package/src/bot/post-turn-hooks.ts +137 -0
- package/src/bot/profile-types.ts +17 -0
- package/src/bot/swarm-controller.ts +129 -2
- package/src/bot/task-prompt-builder.ts +37 -21
- package/src/bot/task-types.ts +21 -0
- package/src/bot/tool-registry.ts +89 -0
- package/src/bot/types.ts +2 -0
- package/src/node-types/agent-execute.ts +44 -17
- package/src/node-types/build-context.ts +45 -7
- package/src/node-types/receive-task.ts +3 -0
- package/src/node-types/review-result.ts +22 -5
- package/src/node-types/verify-task.ts +181 -0
- package/src/workflows/weaver-agent.ts +429 -18
- package/src/workflows/weaver-bot-batch.ts +443 -24
- package/src/workflows/weaver-bot.ts +16 -11
|
@@ -4,17 +4,21 @@ import {
|
|
|
4
4
|
createAnthropicProvider,
|
|
5
5
|
getOrCreateCliSession,
|
|
6
6
|
killAllCliSessions,
|
|
7
|
+
joinSplitPrompt,
|
|
7
8
|
type AgentProvider,
|
|
8
9
|
type AgentMessage,
|
|
9
10
|
type ToolDefinition,
|
|
10
11
|
type StreamEvent,
|
|
11
12
|
type StreamOptions,
|
|
12
13
|
type ToolEvent,
|
|
14
|
+
type SplitPrompt,
|
|
13
15
|
} from '@synergenius/flow-weaver/agent';
|
|
14
16
|
import { WEAVER_TOOLS, createWeaverExecutor } from '../bot/weaver-tools.js';
|
|
17
|
+
import { resolveToolsForTask } from '../bot/tool-registry.js';
|
|
15
18
|
import { auditEmit } from '../bot/audit-logger.js';
|
|
16
19
|
import { withRetry, getErrorGuidance } from '../bot/error-classifier.js';
|
|
17
20
|
import { CostTracker } from '../bot/cost-tracker.js';
|
|
21
|
+
import { PostTurnHookRunner, CostCheckpointHook, ProgressReportHook } from '../bot/post-turn-hooks.js';
|
|
18
22
|
|
|
19
23
|
// Clean up persistent sessions on process exit
|
|
20
24
|
let cleanupRegistered = false;
|
|
@@ -64,15 +68,16 @@ class CliSessionProvider implements AgentProvider {
|
|
|
64
68
|
|
|
65
69
|
if (!prompt) return;
|
|
66
70
|
|
|
67
|
-
// Only pass system prompt on the first call
|
|
68
|
-
const
|
|
71
|
+
// Only pass system prompt on the first call — CLI sessions accept a string
|
|
72
|
+
const splitPrompt = this.sentCount <= messages.length ? options?.systemPrompt : undefined;
|
|
73
|
+
const systemPromptStr = splitPrompt ? joinSplitPrompt(splitPrompt) : undefined;
|
|
69
74
|
|
|
70
75
|
// Forward usage events to the runner's CostTracker via the global callback.
|
|
71
76
|
// This bridges CLI session usage → runner cost tracking → swarm budget enforcement.
|
|
72
77
|
const usageCb = (globalThis as Record<string, unknown>).__fw_ai_usage_callback__ as
|
|
73
78
|
((model: string, usage: { inputTokens: number; outputTokens: number }) => void) | undefined;
|
|
74
79
|
|
|
75
|
-
for await (const event of this.session.send(prompt,
|
|
80
|
+
for await (const event of this.session.send(prompt, systemPromptStr)) {
|
|
76
81
|
if (event.type === 'usage' && usageCb) {
|
|
77
82
|
usageCb(this.model, {
|
|
78
83
|
inputTokens: event.promptTokens,
|
|
@@ -150,21 +155,24 @@ export async function weaverAgentExecute(
|
|
|
150
155
|
return { onSuccess: false, onFailure: true, ctx: JSON.stringify(context) };
|
|
151
156
|
}
|
|
152
157
|
|
|
153
|
-
// Build system prompt
|
|
154
|
-
|
|
158
|
+
// Build system prompt as SplitPrompt — prefix is stable (cacheable),
|
|
159
|
+
// suffix is per-task (contextBundle, project plan).
|
|
160
|
+
// If frozenPromptPrefix is available from the swarm controller, use it
|
|
161
|
+
// to ensure all bot slots share the same cached prefix bytes.
|
|
162
|
+
let systemPrompt: SplitPrompt;
|
|
155
163
|
try {
|
|
156
164
|
const mod = await import('../bot/system-prompt.js');
|
|
157
|
-
const
|
|
165
|
+
const prefix = context.frozenPromptPrefix ?? await mod.buildSystemPrompt();
|
|
158
166
|
let cliCommands: { name: string; description: string; botCompatible?: boolean; options?: { flags: string; arg?: string; description: string }[] }[] = [];
|
|
159
167
|
try {
|
|
160
168
|
const docMeta = await import('@synergenius/flow-weaver/doc-metadata');
|
|
161
169
|
cliCommands = docMeta.CLI_COMMANDS ?? [];
|
|
162
170
|
} catch (err) { if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] doc-metadata unavailable (older fw):', err); }
|
|
163
|
-
const
|
|
164
|
-
systemPrompt =
|
|
171
|
+
const suffix = mod.buildBotSystemPrompt(context.contextBundle, cliCommands, projectDir);
|
|
172
|
+
systemPrompt = { prefix, suffix };
|
|
165
173
|
} catch (err) {
|
|
166
174
|
if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] system prompt build failed, using fallback:', err);
|
|
167
|
-
systemPrompt = 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.';
|
|
175
|
+
systemPrompt = { prefix: 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.', suffix: '' };
|
|
168
176
|
}
|
|
169
177
|
|
|
170
178
|
const taskPrompt = task.instruction.startsWith('## Task:')
|
|
@@ -219,14 +227,34 @@ export async function weaverAgentExecute(
|
|
|
219
227
|
|
|
220
228
|
const onStreamEvent = (event: StreamEvent) => renderer.onStreamEvent(event);
|
|
221
229
|
|
|
222
|
-
// Filter tools by
|
|
223
|
-
//
|
|
230
|
+
// Filter tools by task mode and profile capabilities.
|
|
231
|
+
// Mode-based filtering removes tools the task doesn't need (e.g., modify mode
|
|
232
|
+
// excludes write_file). Capability intersection ensures profiles only get their
|
|
233
|
+
// granted tools (e.g., orchestrator gets task_create, developer does not).
|
|
224
234
|
const behavior = context.behaviorJson ? JSON.parse(context.behaviorJson) : undefined;
|
|
225
235
|
const caps: string[] = behavior?.capabilities ?? [];
|
|
226
|
-
const
|
|
227
|
-
|
|
228
|
-
?
|
|
229
|
-
|
|
236
|
+
const grantedToolNames = resolveToolsForTask(
|
|
237
|
+
{ mode: task.mode },
|
|
238
|
+
caps.length > 0 ? caps : undefined,
|
|
239
|
+
);
|
|
240
|
+
const tools = WEAVER_TOOLS.filter(t => grantedToolNames.has(t.name));
|
|
241
|
+
|
|
242
|
+
// Set up post-turn hooks — cost checkpoint + progress reporting.
|
|
243
|
+
// CostCheckpointHook aborts the loop when cumulative cost exceeds budget.
|
|
244
|
+
// ProgressReportHook emits turn-progress events for UI updates.
|
|
245
|
+
const hookRunner = new PostTurnHookRunner();
|
|
246
|
+
const model = pInfo.model ?? 'claude-sonnet-4-6';
|
|
247
|
+
const budget = behavior?.budget;
|
|
248
|
+
if (budget != null && budget > 0) {
|
|
249
|
+
hookRunner.register(new CostCheckpointHook(budget, model));
|
|
250
|
+
}
|
|
251
|
+
hookRunner.register(new ProgressReportHook((event) => {
|
|
252
|
+
renderer.onStreamEvent?.({ type: 'text_delta', text: '' }); // keep renderer alive
|
|
253
|
+
if (process.env.WEAVER_VERBOSE) {
|
|
254
|
+
console.log(`[post-turn] ${event.type}: iter=${event.data.iteration} tools=${event.data.toolCallCount}`);
|
|
255
|
+
}
|
|
256
|
+
}));
|
|
257
|
+
const onTurnEnd = hookRunner.createCallback();
|
|
230
258
|
|
|
231
259
|
const result = await withRetry(
|
|
232
260
|
() => runAgentLoop(
|
|
@@ -234,7 +262,7 @@ export async function weaverAgentExecute(
|
|
|
234
262
|
tools,
|
|
235
263
|
executor,
|
|
236
264
|
[{ role: 'user', content: taskPrompt }],
|
|
237
|
-
{ systemPrompt, maxIterations: 15, onToolEvent, onStreamEvent },
|
|
265
|
+
{ systemPrompt, maxIterations: 15, onToolEvent, onStreamEvent, onTurnEnd },
|
|
238
266
|
),
|
|
239
267
|
{
|
|
240
268
|
maxRetries: 3,
|
|
@@ -246,7 +274,6 @@ export async function weaverAgentExecute(
|
|
|
246
274
|
);
|
|
247
275
|
|
|
248
276
|
const usage = result.usage;
|
|
249
|
-
const model = pInfo.model ?? 'claude-sonnet-4-6';
|
|
250
277
|
const estimatedCost = CostTracker.estimateCost(model, {
|
|
251
278
|
inputTokens: usage.promptTokens,
|
|
252
279
|
outputTokens: usage.completionTokens,
|
|
@@ -4,6 +4,7 @@ import * as path from 'node:path';
|
|
|
4
4
|
import type { WeaverContext } from '../bot/types.js';
|
|
5
5
|
import { HierarchyEventLog } from '../bot/hierarchy-event-log.js';
|
|
6
6
|
import { KnowledgeStore } from '../bot/knowledge-store.js';
|
|
7
|
+
import { selectRelevantKnowledge } from '../bot/ai-client.js';
|
|
7
8
|
|
|
8
9
|
/** Profiles that work with Flow Weaver workflows and need FW context. */
|
|
9
10
|
const FW_PROFILES = new Set(['fw-developer']);
|
|
@@ -65,7 +66,6 @@ function resolveFwBin(projectDir: string): string | null {
|
|
|
65
66
|
* For create tasks, includes full authoring context + templates.
|
|
66
67
|
*
|
|
67
68
|
* @flowWeaver nodeType
|
|
68
|
-
* @expression
|
|
69
69
|
* @label Build Context
|
|
70
70
|
* @icon build
|
|
71
71
|
* @color cyan
|
|
@@ -73,10 +73,13 @@ function resolveFwBin(projectDir: string): string | null {
|
|
|
73
73
|
* @output ctx [order:0] - Weaver context with contextBundle (JSON)
|
|
74
74
|
* @output onFailure [hidden]
|
|
75
75
|
*/
|
|
76
|
-
export function weaverBuildContext(ctx: string): { ctx: string } {
|
|
76
|
+
export async function weaverBuildContext(execute: boolean, ctx: string): Promise<{ onSuccess: boolean; onFailure: boolean; ctx: string }> {
|
|
77
|
+
if (!execute) {
|
|
78
|
+
return { onSuccess: true, onFailure: false, ctx };
|
|
79
|
+
}
|
|
77
80
|
const context = JSON.parse(ctx) as WeaverContext;
|
|
78
81
|
const { projectDir } = context.env;
|
|
79
|
-
const task = JSON.parse(context.taskJson!) as { mode?: string; targets?: string[] };
|
|
82
|
+
const task = JSON.parse(context.taskJson!) as { mode?: string; targets?: string[]; instruction?: string; title?: string; description?: string };
|
|
80
83
|
const sections: string[] = [];
|
|
81
84
|
const needsFw = needsFwContext(context.taskJson);
|
|
82
85
|
const fwBin = needsFw ? resolveFwBin(projectDir) : null;
|
|
@@ -102,13 +105,48 @@ export function weaverBuildContext(ctx: string): { ctx: string } {
|
|
|
102
105
|
}
|
|
103
106
|
} catch { /* non-fatal — memory is best-effort */ }
|
|
104
107
|
|
|
105
|
-
// Auto-recall learned knowledge from previous bot runs
|
|
108
|
+
// Auto-recall learned knowledge from previous bot runs (with aging caveats + LLM relevance)
|
|
106
109
|
try {
|
|
107
110
|
const knowledge = new KnowledgeStore(projectDir);
|
|
108
111
|
const entries = knowledge.list();
|
|
109
112
|
if (entries.length > 0) {
|
|
110
|
-
const
|
|
111
|
-
|
|
113
|
+
const now = Date.now();
|
|
114
|
+
const NINETY_DAYS_MS = 90 * 24 * 60 * 60 * 1000;
|
|
115
|
+
|
|
116
|
+
// Auto-prune entries older than 90 days
|
|
117
|
+
const staleKeys = entries.filter(e => now - e.createdAt > NINETY_DAYS_MS).map(e => e.key);
|
|
118
|
+
for (const key of staleKeys) knowledge.forget(key);
|
|
119
|
+
|
|
120
|
+
let fresh = entries.filter(e => now - e.createdAt <= NINETY_DAYS_MS);
|
|
121
|
+
|
|
122
|
+
// LLM-based relevance selection when > 10 entries
|
|
123
|
+
if (fresh.length > 10) {
|
|
124
|
+
try {
|
|
125
|
+
const { manifest, entries: sorted } = KnowledgeStore.buildManifest(fresh);
|
|
126
|
+
const instruction = task.instruction ?? task.title ?? task.description ?? '';
|
|
127
|
+
const indices = await selectRelevantKnowledge(
|
|
128
|
+
{ type: context.env.providerInfo?.type ?? 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY },
|
|
129
|
+
instruction,
|
|
130
|
+
task.mode,
|
|
131
|
+
manifest,
|
|
132
|
+
);
|
|
133
|
+
if (indices && indices.length > 0) {
|
|
134
|
+
fresh = indices.filter(i => i < sorted.length).map(i => sorted[i]);
|
|
135
|
+
}
|
|
136
|
+
// Fallback: if LLM returns null, use all fresh entries (current behavior)
|
|
137
|
+
} catch { /* LLM failure non-fatal — use all entries */ }
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (fresh.length > 0) {
|
|
141
|
+
const knowledgeLines = fresh.map((e: { key: string; value: string; createdAt: number }) => {
|
|
142
|
+
const ageDays = Math.floor((now - e.createdAt) / (24 * 60 * 60 * 1000));
|
|
143
|
+
const caveat = ageDays >= 1
|
|
144
|
+
? ` _(${ageDays}d ago — may be outdated, verify before asserting)_`
|
|
145
|
+
: '';
|
|
146
|
+
return `- **${e.key}**: ${e.value}${caveat}`;
|
|
147
|
+
});
|
|
148
|
+
sections.push(`## Learned Knowledge\n\nFacts discovered by previous runs — use these instead of re-discovering:\n${knowledgeLines.join('\n')}`);
|
|
149
|
+
}
|
|
112
150
|
}
|
|
113
151
|
} catch { /* non-fatal — knowledge recall is best-effort */ }
|
|
114
152
|
|
|
@@ -135,7 +173,7 @@ export function weaverBuildContext(ctx: string): { ctx: string } {
|
|
|
135
173
|
if (process.env.WEAVER_VERBOSE) process.stderr.write(`\x1b[2m Context: ${bundle.length} chars\x1b[0m\n`);
|
|
136
174
|
|
|
137
175
|
context.contextBundle = bundle;
|
|
138
|
-
return { ctx: JSON.stringify(context) };
|
|
176
|
+
return { onSuccess: true, onFailure: false, ctx: JSON.stringify(context) };
|
|
139
177
|
}
|
|
140
178
|
|
|
141
179
|
/** Minimal context for modify tasks: grammar + annotations + target sources + referenced node types. */
|
|
@@ -10,6 +10,7 @@ import type { WeaverEnv, WeaverContext } from '../bot/types.js';
|
|
|
10
10
|
* @color purple
|
|
11
11
|
* @input env [order:0] - Weaver environment bundle
|
|
12
12
|
* @input [taskJson] [order:1] - Pre-supplied task (JSON, optional)
|
|
13
|
+
* @input [frozenPromptPrefix] [order:2] [hidden] - Frozen system prompt prefix for cache sharing
|
|
13
14
|
* @output ctx [order:0] - Weaver context (JSON)
|
|
14
15
|
* @output onSuccess [order:-2] - On Success
|
|
15
16
|
* @output onFailure [order:-1] [hidden] - On Failure
|
|
@@ -18,11 +19,13 @@ export async function weaverReceiveTask(
|
|
|
18
19
|
execute: boolean,
|
|
19
20
|
env: WeaverEnv,
|
|
20
21
|
taskJson?: string,
|
|
22
|
+
frozenPromptPrefix?: string,
|
|
21
23
|
): Promise<{
|
|
22
24
|
onSuccess: boolean; onFailure: boolean;
|
|
23
25
|
ctx: string;
|
|
24
26
|
}> {
|
|
25
27
|
const context: WeaverContext = { env, taskJson: '{}', hasTask: false };
|
|
28
|
+
if (frozenPromptPrefix) context.frozenPromptPrefix = frozenPromptPrefix;
|
|
26
29
|
|
|
27
30
|
if (!execute) {
|
|
28
31
|
return { onSuccess: true, onFailure: false, ctx: JSON.stringify(context) };
|
|
@@ -11,6 +11,18 @@ import {
|
|
|
11
11
|
} from '@synergenius/flow-weaver/agent';
|
|
12
12
|
import { createWeaverExecutor } from '../bot/weaver-tools.js';
|
|
13
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Strip `<analysis>...</analysis>` scratchpad blocks from LLM response text.
|
|
16
|
+
* The analysis is a reasoning scaffold that improves verdict quality but
|
|
17
|
+
* should not leak into the parsed JSON output.
|
|
18
|
+
*/
|
|
19
|
+
export function stripAnalysis(text: string): { cleaned: string; analysis: string | undefined } {
|
|
20
|
+
const match = text.match(/<analysis>([\s\S]*?)<\/analysis>/);
|
|
21
|
+
const analysis = match?.[1]?.trim() || undefined;
|
|
22
|
+
const cleaned = text.replace(/<analysis>[\s\S]*?<\/analysis>/g, '').trim();
|
|
23
|
+
return { cleaned, analysis };
|
|
24
|
+
}
|
|
25
|
+
|
|
14
26
|
/**
|
|
15
27
|
* LLM-powered task completion reviewer.
|
|
16
28
|
* Makes a single judgment call: did the bot accomplish the assigned task?
|
|
@@ -95,7 +107,9 @@ Rate each criterion as PASS or FAIL:
|
|
|
95
107
|
|
|
96
108
|
If you need to verify file contents to judge the RESULT criterion, use the read_file tool. Only read files if the evidence is ambiguous.
|
|
97
109
|
|
|
98
|
-
|
|
110
|
+
First, write your reasoning inside <analysis> tags. Work through each criterion step by step, examining the evidence.
|
|
111
|
+
|
|
112
|
+
After your <analysis> block, output only the following JSON — no other text outside the tags:
|
|
99
113
|
{"pass": true/false, "intent": "PASS/FAIL", "execution": "PASS/FAIL", "result": "PASS/FAIL", "completeness": "PASS/FAIL", "reason": "one sentence summary"}`;
|
|
100
114
|
|
|
101
115
|
try {
|
|
@@ -129,11 +143,14 @@ Respond with exactly:
|
|
|
129
143
|
{ maxIterations: 2 },
|
|
130
144
|
);
|
|
131
145
|
|
|
146
|
+
// Strip <analysis> scratchpad before parsing JSON verdict
|
|
147
|
+
const { cleaned: cleanedSummary } = stripAnalysis(result.summary);
|
|
148
|
+
|
|
132
149
|
// Parse the structured response
|
|
133
150
|
let pass = true;
|
|
134
151
|
let reason = 'Review completed';
|
|
135
152
|
let criteria: Record<string, string> = {};
|
|
136
|
-
const jsonMatch =
|
|
153
|
+
const jsonMatch = cleanedSummary.match(/\{[\s\S]*"pass"[\s\S]*\}/);
|
|
137
154
|
if (jsonMatch) {
|
|
138
155
|
try {
|
|
139
156
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
@@ -146,14 +163,14 @@ Respond with exactly:
|
|
|
146
163
|
} catch {
|
|
147
164
|
if (jsonMatch[0].includes('"pass": false') || jsonMatch[0].includes('"pass":false')) {
|
|
148
165
|
pass = false;
|
|
149
|
-
reason =
|
|
166
|
+
reason = cleanedSummary.slice(0, 200);
|
|
150
167
|
}
|
|
151
168
|
}
|
|
152
169
|
} else {
|
|
153
|
-
const lower =
|
|
170
|
+
const lower = cleanedSummary.toLowerCase();
|
|
154
171
|
if (lower.includes('"pass": false') || lower.includes('"pass":false')) {
|
|
155
172
|
pass = false;
|
|
156
|
-
reason =
|
|
173
|
+
reason = cleanedSummary.slice(0, 200);
|
|
157
174
|
}
|
|
158
175
|
}
|
|
159
176
|
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification agent — independent post-run review of completed work.
|
|
3
|
+
*
|
|
4
|
+
* Uses a fresh provider session with a different model tier to ensure
|
|
5
|
+
* structurally independent review. Only has read_file and run_shell
|
|
6
|
+
* (read-only) — cannot modify the workspace.
|
|
7
|
+
*
|
|
8
|
+
* Produces a structured VerificationResult: pass/fail/inconclusive.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { VerificationResult, VerificationVerdict } from '../bot/task-types.js';
|
|
12
|
+
import { callAI } from '../bot/ai-client.js';
|
|
13
|
+
import { resolveProviderConfig } from '../bot/agent-provider.js';
|
|
14
|
+
import { resolveModelTier } from '../bot/behavior-defaults.js';
|
|
15
|
+
import { stripAnalysis } from './review-result.js';
|
|
16
|
+
import { CostTracker } from '../bot/cost-tracker.js';
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Verification prompt
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
const VERIFICATION_SYSTEM_PROMPT = `You are an independent verification agent. Your job is to review
|
|
23
|
+
work completed by another AI agent and determine if it meets the task requirements.
|
|
24
|
+
|
|
25
|
+
You have NO context about what happened during execution. You only see:
|
|
26
|
+
- The task description
|
|
27
|
+
- Files that were created or modified
|
|
28
|
+
- Current state of those files
|
|
29
|
+
|
|
30
|
+
You must verify by READING the actual files and optionally RUNNING read-only
|
|
31
|
+
commands (tests, linting, type checking). You CANNOT and MUST NOT modify any files.
|
|
32
|
+
|
|
33
|
+
CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. Tool calls will be
|
|
34
|
+
REJECTED and will waste your only turn.
|
|
35
|
+
|
|
36
|
+
<analysis>
|
|
37
|
+
Examine the task description, then check each modified/created file.
|
|
38
|
+
Look for:
|
|
39
|
+
1. Does the code actually implement what was requested?
|
|
40
|
+
2. Are there obvious bugs, missing error handling, or incomplete implementations?
|
|
41
|
+
3. Do tests pass? Does the code compile?
|
|
42
|
+
4. Are there security issues or bad practices?
|
|
43
|
+
</analysis>
|
|
44
|
+
|
|
45
|
+
After your <analysis> block, output ONLY the following JSON:
|
|
46
|
+
|
|
47
|
+
{
|
|
48
|
+
"verdict": "pass" | "fail" | "inconclusive",
|
|
49
|
+
"summary": "One sentence explaining the verdict",
|
|
50
|
+
"issues": ["List of specific issues found (empty if pass)"],
|
|
51
|
+
"filesReviewed": ["List of files you examined"]
|
|
52
|
+
}`;
|
|
53
|
+
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Build user prompt from task + run data
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
export interface VerifyTaskInput {
|
|
59
|
+
taskTitle: string;
|
|
60
|
+
taskDescription: string;
|
|
61
|
+
filesCreated: string[];
|
|
62
|
+
filesModified: string[];
|
|
63
|
+
summary: string;
|
|
64
|
+
checks?: Record<string, string>;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function buildVerificationPrompt(input: VerifyTaskInput): string {
|
|
68
|
+
const lines: string[] = [];
|
|
69
|
+
lines.push(`## Task to Verify`);
|
|
70
|
+
lines.push(`Title: ${input.taskTitle}`);
|
|
71
|
+
lines.push(`Description: ${input.taskDescription}`);
|
|
72
|
+
lines.push('');
|
|
73
|
+
lines.push(`## Work Summary`);
|
|
74
|
+
lines.push(input.summary);
|
|
75
|
+
lines.push('');
|
|
76
|
+
|
|
77
|
+
if (input.filesCreated.length > 0) {
|
|
78
|
+
lines.push(`## Files Created`);
|
|
79
|
+
for (const f of input.filesCreated) lines.push(`- ${f}`);
|
|
80
|
+
lines.push('');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (input.filesModified.length > 0) {
|
|
84
|
+
lines.push(`## Files Modified`);
|
|
85
|
+
for (const f of input.filesModified) lines.push(`- ${f}`);
|
|
86
|
+
lines.push('');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (input.checks && Object.keys(input.checks).length > 0) {
|
|
90
|
+
lines.push(`## Automated Check Results`);
|
|
91
|
+
for (const [name, result] of Object.entries(input.checks)) {
|
|
92
|
+
lines.push(`- ${name}: ${result === 'pass' ? 'PASS' : result.slice(0, 200)}`);
|
|
93
|
+
}
|
|
94
|
+
lines.push('');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
lines.push(`## Instructions`);
|
|
98
|
+
lines.push('Read the files listed above and verify the work meets the task requirements.');
|
|
99
|
+
lines.push('Output your analysis in <analysis> tags, then the JSON verdict.');
|
|
100
|
+
|
|
101
|
+
return lines.join('\n');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Parse verification response
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
export function parseVerificationResponse(response: string): Omit<VerificationResult, 'verifiedAt' | 'cost'> {
|
|
109
|
+
const { cleaned } = stripAnalysis(response);
|
|
110
|
+
|
|
111
|
+
// Extract JSON from response
|
|
112
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
113
|
+
if (!jsonMatch) {
|
|
114
|
+
return {
|
|
115
|
+
verdict: 'inconclusive',
|
|
116
|
+
summary: 'Failed to parse verification response',
|
|
117
|
+
issues: ['Verification agent did not produce valid JSON'],
|
|
118
|
+
filesReviewed: [],
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
124
|
+
const verdict: VerificationVerdict =
|
|
125
|
+
parsed.verdict === 'pass' || parsed.verdict === 'fail' || parsed.verdict === 'inconclusive'
|
|
126
|
+
? parsed.verdict
|
|
127
|
+
: 'inconclusive';
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
verdict,
|
|
131
|
+
summary: typeof parsed.summary === 'string' ? parsed.summary : 'No summary provided',
|
|
132
|
+
issues: Array.isArray(parsed.issues) ? parsed.issues.filter((i: unknown) => typeof i === 'string') : [],
|
|
133
|
+
filesReviewed: Array.isArray(parsed.filesReviewed) ? parsed.filesReviewed.filter((f: unknown) => typeof f === 'string') : [],
|
|
134
|
+
};
|
|
135
|
+
} catch {
|
|
136
|
+
return {
|
|
137
|
+
verdict: 'inconclusive',
|
|
138
|
+
summary: 'Failed to parse verification JSON',
|
|
139
|
+
issues: ['JSON parse error in verification response'],
|
|
140
|
+
filesReviewed: [],
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
// Run verification
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
export async function runVerification(
|
|
150
|
+
input: VerifyTaskInput,
|
|
151
|
+
providerType: string,
|
|
152
|
+
tier: string,
|
|
153
|
+
apiKey?: string,
|
|
154
|
+
): Promise<VerificationResult> {
|
|
155
|
+
const model = tier;
|
|
156
|
+
const pInfo = {
|
|
157
|
+
type: providerType as 'anthropic' | 'claude-cli' | 'platform',
|
|
158
|
+
apiKey,
|
|
159
|
+
model,
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const userPrompt = buildVerificationPrompt(input);
|
|
163
|
+
const response = await callAI(pInfo, VERIFICATION_SYSTEM_PROMPT, userPrompt, 2048);
|
|
164
|
+
|
|
165
|
+
const parsed = parseVerificationResponse(response);
|
|
166
|
+
|
|
167
|
+
// Estimate cost
|
|
168
|
+
// Rough estimate: system prompt ~500 tokens, user prompt ~300-1000, response ~500
|
|
169
|
+
const estimatedInputTokens = (VERIFICATION_SYSTEM_PROMPT.length + userPrompt.length) / 4;
|
|
170
|
+
const estimatedOutputTokens = response.length / 4;
|
|
171
|
+
const cost = CostTracker.estimateCost(model, {
|
|
172
|
+
inputTokens: Math.round(estimatedInputTokens),
|
|
173
|
+
outputTokens: Math.round(estimatedOutputTokens),
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
...parsed,
|
|
178
|
+
verifiedAt: new Date().toISOString(),
|
|
179
|
+
cost,
|
|
180
|
+
};
|
|
181
|
+
}
|