npm - @synergenius/flow-weaver-pack-weaver - Versions diffs - 0.9.193 → 0.9.196 - Mend

@synergenius/flow-weaver-pack-weaver 0.9.193 → 0.9.196

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/dist/bot/ai-client.d.ts +5 -0
package/dist/bot/ai-client.d.ts.map +1 -1
package/dist/bot/ai-client.js +43 -0
package/dist/bot/ai-client.js.map +1 -1
package/dist/bot/assistant-core.js +2 -2
package/dist/bot/assistant-core.js.map +1 -1
package/dist/bot/behavior-defaults.d.ts +3 -1
package/dist/bot/behavior-defaults.d.ts.map +1 -1
package/dist/bot/behavior-defaults.js +7 -0
package/dist/bot/behavior-defaults.js.map +1 -1
package/dist/bot/capability-registry.js +3 -3
package/dist/bot/capability-registry.js.map +1 -1
package/dist/bot/context-compactor.d.ts +35 -0
package/dist/bot/context-compactor.d.ts.map +1 -0
package/dist/bot/context-compactor.js +130 -0
package/dist/bot/context-compactor.js.map +1 -0
package/dist/bot/dream-task.d.ts +45 -0
package/dist/bot/dream-task.d.ts.map +1 -0
package/dist/bot/dream-task.js +125 -0
package/dist/bot/dream-task.js.map +1 -0
package/dist/bot/knowledge-store.d.ts +9 -0
package/dist/bot/knowledge-store.d.ts.map +1 -1
package/dist/bot/knowledge-store.js +21 -0
package/dist/bot/knowledge-store.js.map +1 -1
package/dist/bot/memory-extraction-worker.d.ts +14 -0
package/dist/bot/memory-extraction-worker.d.ts.map +1 -0
package/dist/bot/memory-extraction-worker.js +42 -0
package/dist/bot/memory-extraction-worker.js.map +1 -0
package/dist/bot/memory-extractor.d.ts +27 -0
package/dist/bot/memory-extractor.d.ts.map +1 -0
package/dist/bot/memory-extractor.js +155 -0
package/dist/bot/memory-extractor.js.map +1 -0
package/dist/bot/operations.d.ts +3 -1
package/dist/bot/operations.d.ts.map +1 -1
package/dist/bot/operations.js +3 -1
package/dist/bot/operations.js.map +1 -1
package/dist/bot/post-turn-hooks.d.ts +57 -0
package/dist/bot/post-turn-hooks.d.ts.map +1 -0
package/dist/bot/post-turn-hooks.js +108 -0
package/dist/bot/post-turn-hooks.js.map +1 -0
package/dist/bot/profile-types.d.ts +16 -0
package/dist/bot/profile-types.d.ts.map +1 -1
package/dist/bot/swarm-controller.d.ts +7 -0
package/dist/bot/swarm-controller.d.ts.map +1 -1
package/dist/bot/swarm-controller.js +121 -1
package/dist/bot/swarm-controller.js.map +1 -1
package/dist/bot/task-prompt-builder.js +35 -21
package/dist/bot/task-prompt-builder.js.map +1 -1
package/dist/bot/task-types.d.ts +13 -0
package/dist/bot/task-types.d.ts.map +1 -1
package/dist/bot/tool-registry.d.ts +13 -0
package/dist/bot/tool-registry.d.ts.map +1 -1
package/dist/bot/tool-registry.js +80 -0
package/dist/bot/tool-registry.js.map +1 -1
package/dist/bot/types.d.ts +2 -0
package/dist/bot/types.d.ts.map +1 -1
package/dist/node-types/agent-execute.d.ts.map +1 -1
package/dist/node-types/agent-execute.js +38 -17
package/dist/node-types/agent-execute.js.map +1 -1
package/dist/node-types/build-context.d.ts +4 -3
package/dist/node-types/build-context.d.ts.map +1 -1
package/dist/node-types/build-context.js +37 -6
package/dist/node-types/build-context.js.map +1 -1
package/dist/node-types/receive-task.d.ts +2 -1
package/dist/node-types/receive-task.d.ts.map +1 -1
package/dist/node-types/receive-task.js +4 -1
package/dist/node-types/receive-task.js.map +1 -1
package/dist/node-types/review-result.d.ts +9 -0
package/dist/node-types/review-result.d.ts.map +1 -1
package/dist/node-types/review-result.js +20 -5
package/dist/node-types/review-result.js.map +1 -1
package/dist/node-types/verify-task.d.ts +22 -0
package/dist/node-types/verify-task.d.ts.map +1 -0
package/dist/node-types/verify-task.js +143 -0
package/dist/node-types/verify-task.js.map +1 -0
package/dist/ui/capability-editor.js +3 -3
package/dist/ui/profile-editor.js +3 -3
package/dist/ui/swarm-dashboard.js +3 -3
package/dist/workflows/weaver-agent.d.ts +3 -3
package/dist/workflows/weaver-agent.d.ts.map +1 -1
package/dist/workflows/weaver-agent.js +267 -18
package/dist/workflows/weaver-agent.js.map +1 -1
package/dist/workflows/weaver-bot-batch.d.ts +3 -3
package/dist/workflows/weaver-bot-batch.d.ts.map +1 -1
package/dist/workflows/weaver-bot-batch.js +280 -24
package/dist/workflows/weaver-bot-batch.js.map +1 -1
package/dist/workflows/weaver-bot.d.ts +2 -0
package/dist/workflows/weaver-bot.d.ts.map +1 -1
package/dist/workflows/weaver-bot.js +15 -10
package/dist/workflows/weaver-bot.js.map +1 -1
package/flowweaver.manifest.json +1 -1
package/package.json +3 -3
package/src/bot/ai-client.ts +54 -0
package/src/bot/assistant-core.ts +2 -2
package/src/bot/behavior-defaults.ts +9 -1
package/src/bot/capability-registry.ts +3 -3
package/src/bot/context-compactor.ts +147 -0
package/src/bot/dream-task.ts +167 -0
package/src/bot/knowledge-store.ts +27 -0
package/src/bot/memory-extraction-worker.ts +58 -0
package/src/bot/memory-extractor.ts +213 -0
package/src/bot/operations.ts +3 -1
package/src/bot/post-turn-hooks.ts +137 -0
package/src/bot/profile-types.ts +17 -0
package/src/bot/swarm-controller.ts +129 -2
package/src/bot/task-prompt-builder.ts +37 -21
package/src/bot/task-types.ts +21 -0
package/src/bot/tool-registry.ts +89 -0
package/src/bot/types.ts +2 -0
package/src/node-types/agent-execute.ts +44 -17
package/src/node-types/build-context.ts +45 -7
package/src/node-types/receive-task.ts +3 -0
package/src/node-types/review-result.ts +22 -5
package/src/node-types/verify-task.ts +181 -0
package/src/workflows/weaver-agent.ts +429 -18
package/src/workflows/weaver-bot-batch.ts +443 -24
package/src/workflows/weaver-bot.ts +16 -11

package/src/node-types/agent-execute.ts CHANGED Viewed

@@ -4,17 +4,21 @@ import {
   createAnthropicProvider,
   getOrCreateCliSession,
   killAllCliSessions,
+  joinSplitPrompt,
   type AgentProvider,
   type AgentMessage,
   type ToolDefinition,
   type StreamEvent,
   type StreamOptions,
   type ToolEvent,
+  type SplitPrompt,
 } from '@synergenius/flow-weaver/agent';
 import { WEAVER_TOOLS, createWeaverExecutor } from '../bot/weaver-tools.js';
+import { resolveToolsForTask } from '../bot/tool-registry.js';
 import { auditEmit } from '../bot/audit-logger.js';
 import { withRetry, getErrorGuidance } from '../bot/error-classifier.js';
 import { CostTracker } from '../bot/cost-tracker.js';
+import { PostTurnHookRunner, CostCheckpointHook, ProgressReportHook } from '../bot/post-turn-hooks.js';
 // Clean up persistent sessions on process exit
 let cleanupRegistered = false;
@@ -64,15 +68,16 @@ class CliSessionProvider implements AgentProvider {
     if (!prompt) return;
-    // Only pass system prompt on the first call
-    const systemPrompt = this.sentCount <= messages.length ? options?.systemPrompt : undefined;
+    // Only pass system prompt on the first call — CLI sessions accept a string
+    const splitPrompt = this.sentCount <= messages.length ? options?.systemPrompt : undefined;
+    const systemPromptStr = splitPrompt ? joinSplitPrompt(splitPrompt) : undefined;
     // Forward usage events to the runner's CostTracker via the global callback.
     // This bridges CLI session usage → runner cost tracking → swarm budget enforcement.
     const usageCb = (globalThis as Record<string, unknown>).__fw_ai_usage_callback__ as
       ((model: string, usage: { inputTokens: number; outputTokens: number }) => void) | undefined;
-    for await (const event of this.session.send(prompt, systemPrompt)) {
+    for await (const event of this.session.send(prompt, systemPromptStr)) {
       if (event.type === 'usage' && usageCb) {
         usageCb(this.model, {
           inputTokens: event.promptTokens,
@@ -150,21 +155,24 @@ export async function weaverAgentExecute(
     return { onSuccess: false, onFailure: true, ctx: JSON.stringify(context) };
   }
-  // Build system prompt
-  let systemPrompt: string;
+  // Build system prompt as SplitPrompt — prefix is stable (cacheable),
+  // suffix is per-task (contextBundle, project plan).
+  // If frozenPromptPrefix is available from the swarm controller, use it
+  // to ensure all bot slots share the same cached prefix bytes.
+  let systemPrompt: SplitPrompt;
   try {
     const mod = await import('../bot/system-prompt.js');
-    const basePrompt = await mod.buildSystemPrompt();
+    const prefix = context.frozenPromptPrefix ?? await mod.buildSystemPrompt();
     let cliCommands: { name: string; description: string; botCompatible?: boolean; options?: { flags: string; arg?: string; description: string }[] }[] = [];
     try {
       const docMeta = await import('@synergenius/flow-weaver/doc-metadata');
       cliCommands = docMeta.CLI_COMMANDS ?? [];
     } catch (err) { if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] doc-metadata unavailable (older fw):', err); }
-    const botPrompt = mod.buildBotSystemPrompt(context.contextBundle, cliCommands, projectDir);
-    systemPrompt = basePrompt + '\n\n' + botPrompt;
+    const suffix = mod.buildBotSystemPrompt(context.contextBundle, cliCommands, projectDir);
+    systemPrompt = { prefix, suffix };
   } catch (err) {
     if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] system prompt build failed, using fallback:', err);
-    systemPrompt = 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.';
+    systemPrompt = { prefix: 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.', suffix: '' };
   }
   const taskPrompt = task.instruction.startsWith('## Task:')
@@ -219,14 +227,34 @@ export async function weaverAgentExecute(
     const onStreamEvent = (event: StreamEvent) => renderer.onStreamEvent(event);
-    // Filter tools by profile: only orchestrators get task_create.
-    // Without this, the AI sees task_create and delegates instead of doing work.
+    // Filter tools by task mode and profile capabilities.
+    // Mode-based filtering removes tools the task doesn't need (e.g., modify mode
+    // excludes write_file). Capability intersection ensures profiles only get their
+    // granted tools (e.g., orchestrator gets task_create, developer does not).
     const behavior = context.behaviorJson ? JSON.parse(context.behaviorJson) : undefined;
     const caps: string[] = behavior?.capabilities ?? [];
-    const isOrchestrator = caps.includes('role-orchestrator') || caps.includes('task-mgmt') || caps.includes('decomposition');
-    const tools = isOrchestrator
-      ? WEAVER_TOOLS
-      : WEAVER_TOOLS.filter(t => t.name !== 'task_create');
+    const grantedToolNames = resolveToolsForTask(
+      { mode: task.mode },
+      caps.length > 0 ? caps : undefined,
+    );
+    const tools = WEAVER_TOOLS.filter(t => grantedToolNames.has(t.name));
+    // Set up post-turn hooks — cost checkpoint + progress reporting.
+    // CostCheckpointHook aborts the loop when cumulative cost exceeds budget.
+    // ProgressReportHook emits turn-progress events for UI updates.
+    const hookRunner = new PostTurnHookRunner();
+    const model = pInfo.model ?? 'claude-sonnet-4-6';
+    const budget = behavior?.budget;
+    if (budget != null && budget > 0) {
+      hookRunner.register(new CostCheckpointHook(budget, model));
+    }
+    hookRunner.register(new ProgressReportHook((event) => {
+      renderer.onStreamEvent?.({ type: 'text_delta', text: '' }); // keep renderer alive
+      if (process.env.WEAVER_VERBOSE) {
+        console.log(`[post-turn] ${event.type}: iter=${event.data.iteration} tools=${event.data.toolCallCount}`);
+      }
+    }));
+    const onTurnEnd = hookRunner.createCallback();
     const result = await withRetry(
       () => runAgentLoop(
@@ -234,7 +262,7 @@ export async function weaverAgentExecute(
         tools,
         executor,
         [{ role: 'user', content: taskPrompt }],
-        { systemPrompt, maxIterations: 15, onToolEvent, onStreamEvent },
+        { systemPrompt, maxIterations: 15, onToolEvent, onStreamEvent, onTurnEnd },
       ),
       {
         maxRetries: 3,
@@ -246,7 +274,6 @@ export async function weaverAgentExecute(
     );
     const usage = result.usage;
-    const model = pInfo.model ?? 'claude-sonnet-4-6';
     const estimatedCost = CostTracker.estimateCost(model, {
       inputTokens: usage.promptTokens,
       outputTokens: usage.completionTokens,

package/src/node-types/build-context.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import * as path from 'node:path';
 import type { WeaverContext } from '../bot/types.js';
 import { HierarchyEventLog } from '../bot/hierarchy-event-log.js';
 import { KnowledgeStore } from '../bot/knowledge-store.js';
+import { selectRelevantKnowledge } from '../bot/ai-client.js';
 /** Profiles that work with Flow Weaver workflows and need FW context. */
 const FW_PROFILES = new Set(['fw-developer']);
@@ -65,7 +66,6 @@ function resolveFwBin(projectDir: string): string | null {
  * For create tasks, includes full authoring context + templates.
  *
  * @flowWeaver nodeType
- * @expression
  * @label Build Context
  * @icon build
  * @color cyan
@@ -73,10 +73,13 @@ function resolveFwBin(projectDir: string): string | null {
  * @output ctx [order:0] - Weaver context with contextBundle (JSON)
  * @output onFailure [hidden]
  */
-export function weaverBuildContext(ctx: string): { ctx: string } {
+export async function weaverBuildContext(execute: boolean, ctx: string): Promise<{ onSuccess: boolean; onFailure: boolean; ctx: string }> {
+  if (!execute) {
+    return { onSuccess: true, onFailure: false, ctx };
+  }
   const context = JSON.parse(ctx) as WeaverContext;
   const { projectDir } = context.env;
-  const task = JSON.parse(context.taskJson!) as { mode?: string; targets?: string[] };
+  const task = JSON.parse(context.taskJson!) as { mode?: string; targets?: string[]; instruction?: string; title?: string; description?: string };
   const sections: string[] = [];
   const needsFw = needsFwContext(context.taskJson);
   const fwBin = needsFw ? resolveFwBin(projectDir) : null;
@@ -102,13 +105,48 @@ export function weaverBuildContext(ctx: string): { ctx: string } {
     }
   } catch { /* non-fatal — memory is best-effort */ }
-  // Auto-recall learned knowledge from previous bot runs
+  // Auto-recall learned knowledge from previous bot runs (with aging caveats + LLM relevance)
   try {
     const knowledge = new KnowledgeStore(projectDir);
     const entries = knowledge.list();
     if (entries.length > 0) {
-      const knowledgeLines = entries.map((e: { key: string; value: string }) => `- **${e.key}**: ${e.value}`);
-      sections.push(`## Learned Knowledge\n\nFacts discovered by previous runs — use these instead of re-discovering:\n${knowledgeLines.join('\n')}`);
+      const now = Date.now();
+      const NINETY_DAYS_MS = 90 * 24 * 60 * 60 * 1000;
+      // Auto-prune entries older than 90 days
+      const staleKeys = entries.filter(e => now - e.createdAt > NINETY_DAYS_MS).map(e => e.key);
+      for (const key of staleKeys) knowledge.forget(key);
+      let fresh = entries.filter(e => now - e.createdAt <= NINETY_DAYS_MS);
+      // LLM-based relevance selection when > 10 entries
+      if (fresh.length > 10) {
+        try {
+          const { manifest, entries: sorted } = KnowledgeStore.buildManifest(fresh);
+          const instruction = task.instruction ?? task.title ?? task.description ?? '';
+          const indices = await selectRelevantKnowledge(
+            { type: context.env.providerInfo?.type ?? 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY },
+            instruction,
+            task.mode,
+            manifest,
+          );
+          if (indices && indices.length > 0) {
+            fresh = indices.filter(i => i < sorted.length).map(i => sorted[i]);
+          }
+          // Fallback: if LLM returns null, use all fresh entries (current behavior)
+        } catch { /* LLM failure non-fatal — use all entries */ }
+      }
+      if (fresh.length > 0) {
+        const knowledgeLines = fresh.map((e: { key: string; value: string; createdAt: number }) => {
+          const ageDays = Math.floor((now - e.createdAt) / (24 * 60 * 60 * 1000));
+          const caveat = ageDays >= 1
+            ? ` _(${ageDays}d ago — may be outdated, verify before asserting)_`
+            : '';
+          return `- **${e.key}**: ${e.value}${caveat}`;
+        });
+        sections.push(`## Learned Knowledge\n\nFacts discovered by previous runs — use these instead of re-discovering:\n${knowledgeLines.join('\n')}`);
+      }
     }
   } catch { /* non-fatal — knowledge recall is best-effort */ }
@@ -135,7 +173,7 @@ export function weaverBuildContext(ctx: string): { ctx: string } {
   if (process.env.WEAVER_VERBOSE) process.stderr.write(`\x1b[2m  Context: ${bundle.length} chars\x1b[0m\n`);
   context.contextBundle = bundle;
-  return { ctx: JSON.stringify(context) };
+  return { onSuccess: true, onFailure: false, ctx: JSON.stringify(context) };
 }
 /** Minimal context for modify tasks: grammar + annotations + target sources + referenced node types. */

package/src/node-types/receive-task.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type { WeaverEnv, WeaverContext } from '../bot/types.js';
  * @color purple
  * @input env [order:0] - Weaver environment bundle
  * @input [taskJson] [order:1] - Pre-supplied task (JSON, optional)
+ * @input [frozenPromptPrefix] [order:2] [hidden] - Frozen system prompt prefix for cache sharing
  * @output ctx [order:0] - Weaver context (JSON)
  * @output onSuccess [order:-2] - On Success
  * @output onFailure [order:-1] [hidden] - On Failure
@@ -18,11 +19,13 @@ export async function weaverReceiveTask(
   execute: boolean,
   env: WeaverEnv,
   taskJson?: string,
+  frozenPromptPrefix?: string,
 ): Promise<{
   onSuccess: boolean; onFailure: boolean;
   ctx: string;
 }> {
   const context: WeaverContext = { env, taskJson: '{}', hasTask: false };
+  if (frozenPromptPrefix) context.frozenPromptPrefix = frozenPromptPrefix;
   if (!execute) {
     return { onSuccess: true, onFailure: false, ctx: JSON.stringify(context) };

package/src/node-types/review-result.ts CHANGED Viewed

@@ -11,6 +11,18 @@ import {
 } from '@synergenius/flow-weaver/agent';
 import { createWeaverExecutor } from '../bot/weaver-tools.js';
+/**
+ * Strip `<analysis>...</analysis>` scratchpad blocks from LLM response text.
+ * The analysis is a reasoning scaffold that improves verdict quality but
+ * should not leak into the parsed JSON output.
+ */
+export function stripAnalysis(text: string): { cleaned: string; analysis: string | undefined } {
+  const match = text.match(/<analysis>([\s\S]*?)<\/analysis>/);
+  const analysis = match?.[1]?.trim() || undefined;
+  const cleaned = text.replace(/<analysis>[\s\S]*?<\/analysis>/g, '').trim();
+  return { cleaned, analysis };
+}
 /**
  * LLM-powered task completion reviewer.
  * Makes a single judgment call: did the bot accomplish the assigned task?
@@ -95,7 +107,9 @@ Rate each criterion as PASS or FAIL:
 If you need to verify file contents to judge the RESULT criterion, use the read_file tool. Only read files if the evidence is ambiguous.
-Respond with exactly:
+First, write your reasoning inside <analysis> tags. Work through each criterion step by step, examining the evidence.
+After your <analysis> block, output only the following JSON — no other text outside the tags:
 {"pass": true/false, "intent": "PASS/FAIL", "execution": "PASS/FAIL", "result": "PASS/FAIL", "completeness": "PASS/FAIL", "reason": "one sentence summary"}`;
   try {
@@ -129,11 +143,14 @@ Respond with exactly:
       { maxIterations: 2 },
     );
+    // Strip <analysis> scratchpad before parsing JSON verdict
+    const { cleaned: cleanedSummary } = stripAnalysis(result.summary);
     // Parse the structured response
     let pass = true;
     let reason = 'Review completed';
     let criteria: Record<string, string> = {};
-    const jsonMatch = result.summary.match(/\{[\s\S]*"pass"[\s\S]*\}/);
+    const jsonMatch = cleanedSummary.match(/\{[\s\S]*"pass"[\s\S]*\}/);
     if (jsonMatch) {
       try {
         const parsed = JSON.parse(jsonMatch[0]);
@@ -146,14 +163,14 @@ Respond with exactly:
       } catch {
         if (jsonMatch[0].includes('"pass": false') || jsonMatch[0].includes('"pass":false')) {
           pass = false;
-          reason = result.summary.slice(0, 200);
+          reason = cleanedSummary.slice(0, 200);
         }
       }
     } else {
-      const lower = result.summary.toLowerCase();
+      const lower = cleanedSummary.toLowerCase();
       if (lower.includes('"pass": false') || lower.includes('"pass":false')) {
         pass = false;
-        reason = result.summary.slice(0, 200);
+        reason = cleanedSummary.slice(0, 200);
       }
     }

package/src/node-types/verify-task.ts ADDED Viewed

@@ -0,0 +1,181 @@
+/**
+ * Verification agent — independent post-run review of completed work.
+ *
+ * Uses a fresh provider session with a different model tier to ensure
+ * structurally independent review. Only has read_file and run_shell
+ * (read-only) — cannot modify the workspace.
+ *
+ * Produces a structured VerificationResult: pass/fail/inconclusive.
+ */
+import type { VerificationResult, VerificationVerdict } from '../bot/task-types.js';
+import { callAI } from '../bot/ai-client.js';
+import { resolveProviderConfig } from '../bot/agent-provider.js';
+import { resolveModelTier } from '../bot/behavior-defaults.js';
+import { stripAnalysis } from './review-result.js';
+import { CostTracker } from '../bot/cost-tracker.js';
+// ---------------------------------------------------------------------------
+// Verification prompt
+// ---------------------------------------------------------------------------
+const VERIFICATION_SYSTEM_PROMPT = `You are an independent verification agent. Your job is to review
+work completed by another AI agent and determine if it meets the task requirements.
+You have NO context about what happened during execution. You only see:
+- The task description
+- Files that were created or modified
+- Current state of those files
+You must verify by READING the actual files and optionally RUNNING read-only
+commands (tests, linting, type checking). You CANNOT and MUST NOT modify any files.
+CRITICAL: Respond with TEXT ONLY. Do NOT call any tools. Tool calls will be
+REJECTED and will waste your only turn.
+<analysis>
+Examine the task description, then check each modified/created file.
+Look for:
+1. Does the code actually implement what was requested?
+2. Are there obvious bugs, missing error handling, or incomplete implementations?
+3. Do tests pass? Does the code compile?
+4. Are there security issues or bad practices?
+</analysis>
+After your <analysis> block, output ONLY the following JSON:
+{
+  "verdict": "pass" | "fail" | "inconclusive",
+  "summary": "One sentence explaining the verdict",
+  "issues": ["List of specific issues found (empty if pass)"],
+  "filesReviewed": ["List of files you examined"]
+}`;
+// ---------------------------------------------------------------------------
+// Build user prompt from task + run data
+// ---------------------------------------------------------------------------
+export interface VerifyTaskInput {
+  taskTitle: string;
+  taskDescription: string;
+  filesCreated: string[];
+  filesModified: string[];
+  summary: string;
+  checks?: Record<string, string>;
+}
+export function buildVerificationPrompt(input: VerifyTaskInput): string {
+  const lines: string[] = [];
+  lines.push(`## Task to Verify`);
+  lines.push(`Title: ${input.taskTitle}`);
+  lines.push(`Description: ${input.taskDescription}`);
+  lines.push('');
+  lines.push(`## Work Summary`);
+  lines.push(input.summary);
+  lines.push('');
+  if (input.filesCreated.length > 0) {
+    lines.push(`## Files Created`);
+    for (const f of input.filesCreated) lines.push(`- ${f}`);
+    lines.push('');
+  }
+  if (input.filesModified.length > 0) {
+    lines.push(`## Files Modified`);
+    for (const f of input.filesModified) lines.push(`- ${f}`);
+    lines.push('');
+  }
+  if (input.checks && Object.keys(input.checks).length > 0) {
+    lines.push(`## Automated Check Results`);
+    for (const [name, result] of Object.entries(input.checks)) {
+      lines.push(`- ${name}: ${result === 'pass' ? 'PASS' : result.slice(0, 200)}`);
+    }
+    lines.push('');
+  }
+  lines.push(`## Instructions`);
+  lines.push('Read the files listed above and verify the work meets the task requirements.');
+  lines.push('Output your analysis in <analysis> tags, then the JSON verdict.');
+  return lines.join('\n');
+}
+// ---------------------------------------------------------------------------
+// Parse verification response
+// ---------------------------------------------------------------------------
+export function parseVerificationResponse(response: string): Omit<VerificationResult, 'verifiedAt' | 'cost'> {
+  const { cleaned } = stripAnalysis(response);
+  // Extract JSON from response
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    return {
+      verdict: 'inconclusive',
+      summary: 'Failed to parse verification response',
+      issues: ['Verification agent did not produce valid JSON'],
+      filesReviewed: [],
+    };
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    const verdict: VerificationVerdict =
+      parsed.verdict === 'pass' || parsed.verdict === 'fail' || parsed.verdict === 'inconclusive'
+        ? parsed.verdict
+        : 'inconclusive';
+    return {
+      verdict,
+      summary: typeof parsed.summary === 'string' ? parsed.summary : 'No summary provided',
+      issues: Array.isArray(parsed.issues) ? parsed.issues.filter((i: unknown) => typeof i === 'string') : [],
+      filesReviewed: Array.isArray(parsed.filesReviewed) ? parsed.filesReviewed.filter((f: unknown) => typeof f === 'string') : [],
+    };
+  } catch {
+    return {
+      verdict: 'inconclusive',
+      summary: 'Failed to parse verification JSON',
+      issues: ['JSON parse error in verification response'],
+      filesReviewed: [],
+    };
+  }
+}
+// ---------------------------------------------------------------------------
+// Run verification
+// ---------------------------------------------------------------------------
+export async function runVerification(
+  input: VerifyTaskInput,
+  providerType: string,
+  tier: string,
+  apiKey?: string,
+): Promise<VerificationResult> {
+  const model = tier;
+  const pInfo = {
+    type: providerType as 'anthropic' | 'claude-cli' | 'platform',
+    apiKey,
+    model,
+  };
+  const userPrompt = buildVerificationPrompt(input);
+  const response = await callAI(pInfo, VERIFICATION_SYSTEM_PROMPT, userPrompt, 2048);
+  const parsed = parseVerificationResponse(response);
+  // Estimate cost
+  // Rough estimate: system prompt ~500 tokens, user prompt ~300-1000, response ~500
+  const estimatedInputTokens = (VERIFICATION_SYSTEM_PROMPT.length + userPrompt.length) / 4;
+  const estimatedOutputTokens = response.length / 4;
+  const cost = CostTracker.estimateCost(model, {
+    inputTokens: Math.round(estimatedInputTokens),
+    outputTokens: Math.round(estimatedOutputTokens),
+  });
+  return {
+    ...parsed,
+    verifiedAt: new Date().toISOString(),
+    cost,
+  };
+}