npm - @synergenius/flow-weaver-pack-weaver - Versions diffs - 0.9.193 → 0.9.195 - Mend

@synergenius/flow-weaver-pack-weaver 0.9.193 → 0.9.195

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/dist/bot/assistant-core.js +2 -2
package/dist/bot/assistant-core.js.map +1 -1
package/dist/bot/capability-registry.js +2 -2
package/dist/bot/capability-registry.js.map +1 -1
package/dist/bot/context-compactor.d.ts +35 -0
package/dist/bot/context-compactor.d.ts.map +1 -0
package/dist/bot/context-compactor.js +130 -0
package/dist/bot/context-compactor.js.map +1 -0
package/dist/bot/memory-extraction-worker.d.ts +14 -0
package/dist/bot/memory-extraction-worker.d.ts.map +1 -0
package/dist/bot/memory-extraction-worker.js +42 -0
package/dist/bot/memory-extraction-worker.js.map +1 -0
package/dist/bot/memory-extractor.d.ts +27 -0
package/dist/bot/memory-extractor.d.ts.map +1 -0
package/dist/bot/memory-extractor.js +155 -0
package/dist/bot/memory-extractor.js.map +1 -0
package/dist/bot/operations.d.ts +3 -1
package/dist/bot/operations.d.ts.map +1 -1
package/dist/bot/operations.js +3 -1
package/dist/bot/operations.js.map +1 -1
package/dist/bot/swarm-controller.d.ts +2 -0
package/dist/bot/swarm-controller.d.ts.map +1 -1
package/dist/bot/swarm-controller.js +42 -0
package/dist/bot/swarm-controller.js.map +1 -1
package/dist/bot/task-prompt-builder.js +35 -21
package/dist/bot/task-prompt-builder.js.map +1 -1
package/dist/bot/task-types.d.ts +2 -0
package/dist/bot/task-types.d.ts.map +1 -1
package/dist/bot/tool-registry.d.ts +13 -0
package/dist/bot/tool-registry.d.ts.map +1 -1
package/dist/bot/tool-registry.js +80 -0
package/dist/bot/tool-registry.js.map +1 -1
package/dist/bot/types.d.ts +2 -0
package/dist/bot/types.d.ts.map +1 -1
package/dist/node-types/agent-execute.d.ts.map +1 -1
package/dist/node-types/agent-execute.js +20 -15
package/dist/node-types/agent-execute.js.map +1 -1
package/dist/node-types/build-context.d.ts.map +1 -1
package/dist/node-types/build-context.js +18 -3
package/dist/node-types/build-context.js.map +1 -1
package/dist/node-types/receive-task.d.ts +2 -1
package/dist/node-types/receive-task.d.ts.map +1 -1
package/dist/node-types/receive-task.js +4 -1
package/dist/node-types/receive-task.js.map +1 -1
package/dist/node-types/review-result.d.ts +9 -0
package/dist/node-types/review-result.d.ts.map +1 -1
package/dist/node-types/review-result.js +20 -5
package/dist/node-types/review-result.js.map +1 -1
package/dist/ui/capability-editor.js +2 -2
package/dist/ui/profile-editor.js +2 -2
package/dist/ui/swarm-dashboard.js +2 -2
package/flowweaver.manifest.json +1 -1
package/package.json +2 -2
package/src/bot/assistant-core.ts +2 -2
package/src/bot/capability-registry.ts +2 -2
package/src/bot/context-compactor.ts +147 -0
package/src/bot/memory-extraction-worker.ts +58 -0
package/src/bot/memory-extractor.ts +213 -0
package/src/bot/operations.ts +3 -1
package/src/bot/swarm-controller.ts +43 -0
package/src/bot/task-prompt-builder.ts +37 -21
package/src/bot/task-types.ts +2 -0
package/src/bot/tool-registry.ts +89 -0
package/src/bot/types.ts +2 -0
package/src/node-types/agent-execute.ts +25 -15
package/src/node-types/build-context.ts +19 -3
package/src/node-types/receive-task.ts +3 -0
package/src/node-types/review-result.ts +22 -5

package/src/bot/swarm-controller.ts CHANGED Viewed

@@ -33,6 +33,9 @@ import type { BotProfile, BotInstance, OrchestratorInput, OrchestratorDecision,
 import { buildDefaultBehavior, adjustBehaviorForComplexity } from './behavior-defaults.js';
 import type { Task, RunProgress } from './task-types.js';
 import type { WorkflowResult } from './types.js';
+import { scheduleMemoryExtraction } from './memory-extraction-worker.js';
+import { shouldCompact, compactRunHistory } from './context-compactor.js';
+import { callAI } from './ai-client.js';
 // ---------------------------------------------------------------------------
 // Types
@@ -118,6 +121,9 @@ export class SwarmController {
   /** Last emitted dispatch-filter-summary JSON (for dedup / throttling). */
   private lastFilterSummaryJson: string | null = null;
+  /** Frozen system prompt prefix for cross-slot Anthropic cache sharing. */
+  private frozenPromptPrefix: string | null = null;
   // -----------------------------------------------------------------------
   // Singleton
   // -----------------------------------------------------------------------
@@ -210,6 +216,16 @@ export class SwarmController {
     this.state.startedAt = new Date().toISOString();
     this._persist();
+    // Freeze the stable system prompt prefix for cross-slot cache sharing.
+    // All bot slots will use this identical prefix; only the per-task suffix varies.
+    try {
+      const { buildSystemPrompt } = await import('./system-prompt.js');
+      this.frozenPromptPrefix = await buildSystemPrompt();
+    } catch (err) {
+      if (process.env.WEAVER_VERBOSE) console.warn('[swarm] failed to freeze system prompt prefix:', err);
+      this.frozenPromptPrefix = null;
+    }
     console.log(`\x1b[36m[swarm] started (pack-weaver v${PACK_VERSION})\x1b[0m`);
     this.eventLog.emit({ type: 'swarm-started', timestamp: Date.now(), data: { packVersion: PACK_VERSION } });
@@ -793,6 +809,28 @@ export class SwarmController {
       const task = await this.taskStore.get(taskId);
       if (!task) throw new Error(`Task not found: ${taskId}`);
+      // LLM-based context compaction — produces a structured summary of all runs
+      // when the task has enough history. The summary replaces verbose per-run
+      // sections in the prompt, preserving semantic signal.
+      if (shouldCompact(task, profile.preferences?.costStrategy)) {
+        try {
+          const { resolveModelTier } = await import('./behavior-defaults.js');
+          const compactModel = resolveModelTier('fast', 'anthropic');
+          const compactPInfo = {
+            type: 'anthropic' as const,
+            apiKey: process.env.ANTHROPIC_API_KEY,
+            model: compactModel,
+          };
+          const summary = await compactRunHistory(task, compactPInfo, callAI);
+          if (summary) {
+            task.context.compactedSummary = summary;
+            await this.taskStore.update(taskId, { context: task.context });
+          }
+        } catch {
+          // Compaction failure is non-fatal — prompt builder falls back to context decay
+        }
+      }
       // Build prompt from task context
       const parentTask = task.parentId ? await this.taskStore.get(task.parentId) : null;
       const siblingTasks = task.parentId ? await this.taskStore.getSubtasks(task.parentId) : [];
@@ -925,6 +963,11 @@ export class SwarmController {
     await this.taskStore.release(taskId, releaseStatus, runProgress);
+    // Fire-and-forget memory extraction — persists project facts for future runs
+    if (task) {
+      scheduleMemoryExtraction(this.projectDir, task, runProgress);
+    }
     // Record token usage
     this.recordTokenUsage(workerId, taskId, tokensUsed, costUsed);

package/src/bot/task-prompt-builder.ts CHANGED Viewed

@@ -65,10 +65,13 @@ function buildFull(
   }
   // --- Context decay: workspace is the source of truth, not history ---
-  // Workers see: last acceptance check, last run's remainingWork/blockers,
-  // stagnation count, and a directive to read the workspace.
+  // If a compacted summary exists (from LLM compaction after 3+ runs),
+  // use it instead of the per-run sections — it preserves semantic signal.
+  if (task.context.compactedSummary) {
+    sections.push(`### Execution History (Compacted)\n${task.context.compactedSummary}`);
+  }
-  // 2.3.2: Last acceptance check result
+  // 2.3.2: Last acceptance check result (always shown, even with compacted summary)
   if (task.lastAcceptanceCheck) {
     const ac = task.lastAcceptanceCheck;
     const checkLines = ac.results
@@ -78,6 +81,7 @@ function buildFull(
   }
   // 2.3.3: Continue from last run's remaining work
+  // (always shown — most recent actionable data, even with compacted summary)
   const lastRun = task.context.runHistory.length > 0
     ? task.context.runHistory[task.context.runHistory.length - 1]
     : undefined;
@@ -90,18 +94,21 @@ function buildFull(
     sections.push(`### Previous Run Blocked By\n${(lastRun.blockers as string[]).map((b: string) => `- ${b}`).join('\n')}`);
   }
-  // Last run summary (one run only, not full history)
-  if (lastRun && 'summary' in lastRun) {
-    sections.push(`### Last Run\nOutcome: ${lastRun.outcome} | ${lastRun.summary}`);
-  }
+  // Per-run sections — skipped when compacted summary exists (it covers this info)
+  if (!task.context.compactedSummary) {
+    // Last run summary (one run only, not full history)
+    if (lastRun && 'summary' in lastRun) {
+      sections.push(`### Last Run\nOutcome: ${lastRun.outcome} | ${lastRun.summary}`);
+    }
-  // Run count + stagnation
-  if (task.context.runHistory.length > 0) {
-    let meta = `Total runs: ${task.context.runHistory.length}`;
-    if (task.context.stagnationCount > 0) {
-      meta += ` | Stagnation: ${task.context.stagnationCount} run(s) with no new changes — try a different approach`;
+    // Run count + stagnation
+    if (task.context.runHistory.length > 0) {
+      let meta = `Total runs: ${task.context.runHistory.length}`;
+      if (task.context.stagnationCount > 0) {
+        meta += ` | Stagnation: ${task.context.stagnationCount} run(s) with no new changes — try a different approach`;
+      }
+      sections.push(`### Run History\n${meta}`);
     }
-    sections.push(`### Run History\n${meta}`);
   }
   // Directive: read the workspace, don't rely on stale context
@@ -226,6 +233,11 @@ function buildWithTruncation(
     sections.push(`### Relevant Files\n${task.context.files.join('\n')}`);
   }
+  // Compacted summary (same guard as buildFull)
+  if (task.context.compactedSummary) {
+    sections.push(`### Execution History (Compacted)\n${task.context.compactedSummary}`);
+  }
   // Context decay: last acceptance check + last run only
   if (task.lastAcceptanceCheck) {
     const ac = task.lastAcceptanceCheck;
@@ -244,16 +256,20 @@ function buildWithTruncation(
   if (lastRunT && 'blockers' in lastRunT && Array.isArray(lastRunT.blockers) && lastRunT.blockers.length > 0) {
     sections.push(`### Previous Run Blocked By\n${(lastRunT.blockers as string[]).map((b: string) => `- ${b}`).join('\n')}`);
   }
-  if (lastRunT && 'summary' in lastRunT) {
-    sections.push(`### Last Run\nOutcome: ${lastRunT.outcome} | ${lastRunT.summary}`);
-  }
-  if (task.context.runHistory.length > 0) {
-    let meta = `Total runs: ${task.context.runHistory.length}`;
-    if (task.context.stagnationCount > 0) {
-      meta += ` | Stagnation: ${task.context.stagnationCount} — try a different approach`;
+  // Per-run sections — skipped when compacted summary exists
+  if (!task.context.compactedSummary) {
+    if (lastRunT && 'summary' in lastRunT) {
+      sections.push(`### Last Run\nOutcome: ${lastRunT.outcome} | ${lastRunT.summary}`);
+    }
+    if (task.context.runHistory.length > 0) {
+      let meta = `Total runs: ${task.context.runHistory.length}`;
+      if (task.context.stagnationCount > 0) {
+        meta += ` | Stagnation: ${task.context.stagnationCount} — try a different approach`;
+      }
+      sections.push(`### Run History\n${meta}`);
     }
-    sections.push(`### Run History\n${meta}`);
   }
   // Directive: read the workspace, don't rely on stale context

package/src/bot/task-types.ts CHANGED Viewed

@@ -71,6 +71,8 @@ export interface TaskContext {
   stagnationCount: number;
   budgetExhausted?: boolean;
   projectBrief?: string;
+  /** LLM-generated summary of all runs, replacing verbose run history in prompts. */
+  compactedSummary?: string;
 }
 // ---------------------------------------------------------------------------

package/src/bot/tool-registry.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import type { ToolDefinition } from '@synergenius/flow-weaver/agent';
+import { getCapability } from './capability-registry.js';
 export interface WeaverTool extends ToolDefinition {
   verboseOutput?: boolean;
@@ -575,6 +576,94 @@ export const BOT_TOOLS: ToolDefinition[] = ALL_TOOLS.filter(t => t.contexts.incl
 export const ASSISTANT_TOOLS: ToolDefinition[] = ALL_TOOLS.filter(t => t.contexts.includes('assistant'));
 export const VERBOSE_TOOL_NAMES = new Set(ALL_TOOLS.filter(t => t.verboseOutput).map(t => t.name));
+// ── Mode-based tool filtering ───────────────────────────────────────
+/** Core tools included in every mode regardless of profile. */
+const CORE_TOOLS = new Set([
+  'read_file', 'list_files', 'run_shell', 'validate', 'learn', 'recall',
+]);
+/** Tools allowed per task mode. Keys match task.mode values. */
+const MODE_TOOLS: Record<string, Set<string>> = {
+  create: new Set([
+    'read_file', 'list_files', 'write_file', 'patch_file',
+    'run_shell', 'validate', 'tsc_check', 'run_tests',
+    'learn', 'recall',
+  ]),
+  modify: new Set([
+    'read_file', 'list_files', 'patch_file',
+    'run_shell', 'validate', 'tsc_check', 'run_tests',
+    'learn', 'recall',
+  ]),
+  read: new Set([
+    'read_file', 'list_files', 'run_shell', 'validate',
+    'learn', 'recall',
+  ]),
+  batch: new Set([
+    'read_file', 'list_files', 'write_file', 'patch_file',
+    'run_shell', 'validate', 'tsc_check', 'run_tests',
+    'learn', 'recall',
+  ]),
+};
+/**
+ * Resolve which tools a bot should have for a given task and profile.
+ *
+ * Uses the task mode to select a base tool pool, then intersects with
+ * profile-granted tools (from capabilities). Core tools are always included.
+ *
+ * @param task - Task with mode and optional capabilities
+ * @param capabilities - Profile capability names (e.g., ['role-developer', 'file-ops', 'shell'])
+ * @returns Set of tool names the bot should receive
+ */
+export function resolveToolsForTask(
+  task: { mode?: string },
+  capabilities?: string[],
+): Set<string> {
+  // Start with the mode-based pool (default to 'create' = full set)
+  const modePool = MODE_TOOLS[task.mode ?? 'create'] ?? MODE_TOOLS.create;
+  // If capabilities are specified, compute the capability-granted tools
+  if (capabilities && capabilities.length > 0) {
+    const capTools = new Set<string>();
+    for (const capName of capabilities) {
+      const cap = getCapability(capName);
+      if (cap?.tools) {
+        for (const tool of cap.tools) capTools.add(tool);
+      }
+    }
+    // Build the tool set in two steps:
+    // 1. Mode-restricted tools: must be in BOTH mode pool AND capability set (or core).
+    //    This ensures modify mode excludes write_file even if the capability grants it.
+    // 2. Role-specific tools: tools granted by capabilities but not present in ANY
+    //    mode pool (e.g., task_create, ask_user). These are additive — the capability
+    //    is the sole authority for them.
+    const allModeTools = new Set<string>();
+    for (const pool of Object.values(MODE_TOOLS)) {
+      for (const t of pool) allModeTools.add(t);
+    }
+    const result = new Set<string>();
+    // Step 1: mode-restricted intersection
+    for (const tool of modePool) {
+      if (capTools.has(tool) || CORE_TOOLS.has(tool)) {
+        result.add(tool);
+      }
+    }
+    // Step 2: role-specific tools (not in any mode pool)
+    for (const tool of capTools) {
+      if (!allModeTools.has(tool)) {
+        result.add(tool);
+      }
+    }
+    return result;
+  }
+  // No capability restriction — use mode pool as-is
+  return new Set(modePool);
+}
 /**
  * Generate a prompt section grouping assistant tools by category.
  */

package/src/bot/types.ts CHANGED Viewed

@@ -642,6 +642,8 @@ export interface WeaverContext {
   allValid?: boolean;
   gitResultJson?: string;
   reviewJson?: string;
+  /** Frozen system prompt prefix from swarm controller for cross-slot cache sharing. */
+  frozenPromptPrefix?: string;
 }
 export interface GenesisContext {

package/src/node-types/agent-execute.ts CHANGED Viewed

@@ -4,14 +4,17 @@ import {
   createAnthropicProvider,
   getOrCreateCliSession,
   killAllCliSessions,
+  joinSplitPrompt,
   type AgentProvider,
   type AgentMessage,
   type ToolDefinition,
   type StreamEvent,
   type StreamOptions,
   type ToolEvent,
+  type SplitPrompt,
 } from '@synergenius/flow-weaver/agent';
 import { WEAVER_TOOLS, createWeaverExecutor } from '../bot/weaver-tools.js';
+import { resolveToolsForTask } from '../bot/tool-registry.js';
 import { auditEmit } from '../bot/audit-logger.js';
 import { withRetry, getErrorGuidance } from '../bot/error-classifier.js';
 import { CostTracker } from '../bot/cost-tracker.js';
@@ -64,15 +67,16 @@ class CliSessionProvider implements AgentProvider {
     if (!prompt) return;
-    // Only pass system prompt on the first call
-    const systemPrompt = this.sentCount <= messages.length ? options?.systemPrompt : undefined;
+    // Only pass system prompt on the first call — CLI sessions accept a string
+    const splitPrompt = this.sentCount <= messages.length ? options?.systemPrompt : undefined;
+    const systemPromptStr = splitPrompt ? joinSplitPrompt(splitPrompt) : undefined;
     // Forward usage events to the runner's CostTracker via the global callback.
     // This bridges CLI session usage → runner cost tracking → swarm budget enforcement.
     const usageCb = (globalThis as Record<string, unknown>).__fw_ai_usage_callback__ as
       ((model: string, usage: { inputTokens: number; outputTokens: number }) => void) | undefined;
-    for await (const event of this.session.send(prompt, systemPrompt)) {
+    for await (const event of this.session.send(prompt, systemPromptStr)) {
       if (event.type === 'usage' && usageCb) {
         usageCb(this.model, {
           inputTokens: event.promptTokens,
@@ -150,21 +154,24 @@ export async function weaverAgentExecute(
     return { onSuccess: false, onFailure: true, ctx: JSON.stringify(context) };
   }
-  // Build system prompt
-  let systemPrompt: string;
+  // Build system prompt as SplitPrompt — prefix is stable (cacheable),
+  // suffix is per-task (contextBundle, project plan).
+  // If frozenPromptPrefix is available from the swarm controller, use it
+  // to ensure all bot slots share the same cached prefix bytes.
+  let systemPrompt: SplitPrompt;
   try {
     const mod = await import('../bot/system-prompt.js');
-    const basePrompt = await mod.buildSystemPrompt();
+    const prefix = context.frozenPromptPrefix ?? await mod.buildSystemPrompt();
     let cliCommands: { name: string; description: string; botCompatible?: boolean; options?: { flags: string; arg?: string; description: string }[] }[] = [];
     try {
       const docMeta = await import('@synergenius/flow-weaver/doc-metadata');
       cliCommands = docMeta.CLI_COMMANDS ?? [];
     } catch (err) { if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] doc-metadata unavailable (older fw):', err); }
-    const botPrompt = mod.buildBotSystemPrompt(context.contextBundle, cliCommands, projectDir);
-    systemPrompt = basePrompt + '\n\n' + botPrompt;
+    const suffix = mod.buildBotSystemPrompt(context.contextBundle, cliCommands, projectDir);
+    systemPrompt = { prefix, suffix };
   } catch (err) {
     if (process.env.WEAVER_VERBOSE) console.error('[agent-execute] system prompt build failed, using fallback:', err);
-    systemPrompt = 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.';
+    systemPrompt = { prefix: 'You are Weaver, an AI workflow bot. Use the provided tools to complete tasks.', suffix: '' };
   }
   const taskPrompt = task.instruction.startsWith('## Task:')
@@ -219,14 +226,17 @@ export async function weaverAgentExecute(
     const onStreamEvent = (event: StreamEvent) => renderer.onStreamEvent(event);
-    // Filter tools by profile: only orchestrators get task_create.
-    // Without this, the AI sees task_create and delegates instead of doing work.
+    // Filter tools by task mode and profile capabilities.
+    // Mode-based filtering removes tools the task doesn't need (e.g., modify mode
+    // excludes write_file). Capability intersection ensures profiles only get their
+    // granted tools (e.g., orchestrator gets task_create, developer does not).
     const behavior = context.behaviorJson ? JSON.parse(context.behaviorJson) : undefined;
     const caps: string[] = behavior?.capabilities ?? [];
-    const isOrchestrator = caps.includes('role-orchestrator') || caps.includes('task-mgmt') || caps.includes('decomposition');
-    const tools = isOrchestrator
-      ? WEAVER_TOOLS
-      : WEAVER_TOOLS.filter(t => t.name !== 'task_create');
+    const grantedToolNames = resolveToolsForTask(
+      { mode: task.mode },
+      caps.length > 0 ? caps : undefined,
+    );
+    const tools = WEAVER_TOOLS.filter(t => grantedToolNames.has(t.name));
     const result = await withRetry(
       () => runAgentLoop(

package/src/node-types/build-context.ts CHANGED Viewed

@@ -102,13 +102,29 @@ export function weaverBuildContext(ctx: string): { ctx: string } {
     }
   } catch { /* non-fatal — memory is best-effort */ }
-  // Auto-recall learned knowledge from previous bot runs
+  // Auto-recall learned knowledge from previous bot runs (with aging caveats)
   try {
     const knowledge = new KnowledgeStore(projectDir);
     const entries = knowledge.list();
     if (entries.length > 0) {
-      const knowledgeLines = entries.map((e: { key: string; value: string }) => `- **${e.key}**: ${e.value}`);
-      sections.push(`## Learned Knowledge\n\nFacts discovered by previous runs — use these instead of re-discovering:\n${knowledgeLines.join('\n')}`);
+      const now = Date.now();
+      const NINETY_DAYS_MS = 90 * 24 * 60 * 60 * 1000;
+      // Auto-prune entries older than 90 days
+      const staleKeys = entries.filter(e => now - e.createdAt > NINETY_DAYS_MS).map(e => e.key);
+      for (const key of staleKeys) knowledge.forget(key);
+      const fresh = entries.filter(e => now - e.createdAt <= NINETY_DAYS_MS);
+      if (fresh.length > 0) {
+        const knowledgeLines = fresh.map((e: { key: string; value: string; createdAt: number }) => {
+          const ageDays = Math.floor((now - e.createdAt) / (24 * 60 * 60 * 1000));
+          const caveat = ageDays >= 1
+            ? ` _(${ageDays}d ago — may be outdated, verify before asserting)_`
+            : '';
+          return `- **${e.key}**: ${e.value}${caveat}`;
+        });
+        sections.push(`## Learned Knowledge\n\nFacts discovered by previous runs — use these instead of re-discovering:\n${knowledgeLines.join('\n')}`);
+      }
     }
   } catch { /* non-fatal — knowledge recall is best-effort */ }

package/src/node-types/receive-task.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type { WeaverEnv, WeaverContext } from '../bot/types.js';
  * @color purple
  * @input env [order:0] - Weaver environment bundle
  * @input [taskJson] [order:1] - Pre-supplied task (JSON, optional)
+ * @input [frozenPromptPrefix] [order:2] [hidden] - Frozen system prompt prefix for cache sharing
  * @output ctx [order:0] - Weaver context (JSON)
  * @output onSuccess [order:-2] - On Success
  * @output onFailure [order:-1] [hidden] - On Failure
@@ -18,11 +19,13 @@ export async function weaverReceiveTask(
   execute: boolean,
   env: WeaverEnv,
   taskJson?: string,
+  frozenPromptPrefix?: string,
 ): Promise<{
   onSuccess: boolean; onFailure: boolean;
   ctx: string;
 }> {
   const context: WeaverContext = { env, taskJson: '{}', hasTask: false };
+  if (frozenPromptPrefix) context.frozenPromptPrefix = frozenPromptPrefix;
   if (!execute) {
     return { onSuccess: true, onFailure: false, ctx: JSON.stringify(context) };

package/src/node-types/review-result.ts CHANGED Viewed

@@ -11,6 +11,18 @@ import {
 } from '@synergenius/flow-weaver/agent';
 import { createWeaverExecutor } from '../bot/weaver-tools.js';
+/**
+ * Strip `<analysis>...</analysis>` scratchpad blocks from LLM response text.
+ * The analysis is a reasoning scaffold that improves verdict quality but
+ * should not leak into the parsed JSON output.
+ */
+export function stripAnalysis(text: string): { cleaned: string; analysis: string | undefined } {
+  const match = text.match(/<analysis>([\s\S]*?)<\/analysis>/);
+  const analysis = match?.[1]?.trim() || undefined;
+  const cleaned = text.replace(/<analysis>[\s\S]*?<\/analysis>/g, '').trim();
+  return { cleaned, analysis };
+}
 /**
  * LLM-powered task completion reviewer.
  * Makes a single judgment call: did the bot accomplish the assigned task?
@@ -95,7 +107,9 @@ Rate each criterion as PASS or FAIL:
 If you need to verify file contents to judge the RESULT criterion, use the read_file tool. Only read files if the evidence is ambiguous.
-Respond with exactly:
+First, write your reasoning inside <analysis> tags. Work through each criterion step by step, examining the evidence.
+After your <analysis> block, output only the following JSON — no other text outside the tags:
 {"pass": true/false, "intent": "PASS/FAIL", "execution": "PASS/FAIL", "result": "PASS/FAIL", "completeness": "PASS/FAIL", "reason": "one sentence summary"}`;
   try {
@@ -129,11 +143,14 @@ Respond with exactly:
       { maxIterations: 2 },
     );
+    // Strip <analysis> scratchpad before parsing JSON verdict
+    const { cleaned: cleanedSummary } = stripAnalysis(result.summary);
     // Parse the structured response
     let pass = true;
     let reason = 'Review completed';
     let criteria: Record<string, string> = {};
-    const jsonMatch = result.summary.match(/\{[\s\S]*"pass"[\s\S]*\}/);
+    const jsonMatch = cleanedSummary.match(/\{[\s\S]*"pass"[\s\S]*\}/);
     if (jsonMatch) {
       try {
         const parsed = JSON.parse(jsonMatch[0]);
@@ -146,14 +163,14 @@ Respond with exactly:
       } catch {
         if (jsonMatch[0].includes('"pass": false') || jsonMatch[0].includes('"pass":false')) {
           pass = false;
-          reason = result.summary.slice(0, 200);
+          reason = cleanedSummary.slice(0, 200);
         }
       }
     } else {
-      const lower = result.summary.toLowerCase();
+      const lower = cleanedSummary.toLowerCase();
       if (lower.includes('"pass": false') || lower.includes('"pass":false')) {
         pass = false;
-        reason = result.summary.slice(0, 200);
+        reason = cleanedSummary.slice(0, 200);
       }
     }