npm - titan-agent - Versions diffs - 6.0.0-beta.4 → 6.0.0 - Mend

titan-agent 6.0.0-beta.4 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/dist/agent/verifier.js CHANGED Viewed

@@ -338,23 +338,98 @@ function verifyVerify(input) {
     confidence: input.spawnResult.confidence ?? 0.7
   };
 }
+function llmJudgeEnabled() {
+  const env = (process.env.TITAN_LLM_JUDGE_VERIFY ?? "").toLowerCase().trim();
+  if (env === "0" || env === "false" || env === "no" || env === "off") return false;
+  return true;
+}
+async function llmJudgeVerify(input, kindResult) {
+  if (input.kind === "verify") return kindResult;
+  if (!llmJudgeEnabled()) return kindResult;
+  try {
+    const { spawnSubAgent } = await import("./subAgent.js");
+    const reasoning = (input.spawnResult.reasoning || input.spawnResult.rawResponse || "").slice(0, 1800);
+    const artifactNote = input.spawnResult.artifacts?.length ? `
+Artifacts produced: ${input.spawnResult.artifacts.map((a) => `${a.type}:${a.ref}`).join(", ")}` : "";
+    const judgePrompt = [
+      `You are a strict verification judge. Your ONE job is to decide whether the work below actually fulfilled the subtask intent.`,
+      ``,
+      `Subtask:`,
+      `  title: ${input.subtask.title}`,
+      `  description: ${input.subtask.description}`,
+      ``,
+      `Work produced (truncated to 1.8k chars):`,
+      reasoning,
+      artifactNote,
+      ``,
+      `Per-kind verifier ('${input.kind}') already passed with reason: ${kindResult.reason}`,
+      ``,
+      `Your job: does this work ACTUALLY address the subtask, or did it surface-pass without delivering?`,
+      ``,
+      `Common surface-pass failure modes to catch:`,
+      `  - Length OK but content is generic / vague / doesn't address the specific subtask`,
+      `  - Code compiles but doesn't do what was asked`,
+      `  - Research has citations but missed the actual question`,
+      `  - Report has the keywords but no real conclusion`,
+      ``,
+      `Return STRICT JSON on a single line: {"passed": true|false, "reason": "<\u2264140 chars why>"}.`,
+      `No markdown. No prose before or after the JSON.`
+    ].join("\n");
+    const judgeResult = await spawnSubAgent({
+      name: "llm-judge",
+      task: judgePrompt,
+      tier: "fast",
+      maxRounds: 1
+    });
+    const raw = (judgeResult.content || "").trim();
+    const jsonStart = raw.indexOf("{");
+    const jsonEnd = raw.lastIndexOf("}");
+    if (jsonStart < 0 || jsonEnd <= jsonStart) {
+      logger.info(COMPONENT, `LLM judge returned non-JSON, deferring to per-kind verdict`);
+      return kindResult;
+    }
+    const parsed = JSON.parse(raw.slice(jsonStart, jsonEnd + 1));
+    if (typeof parsed.passed !== "boolean") return kindResult;
+    if (parsed.passed) return kindResult;
+    const judgeReason = typeof parsed.reason === "string" && parsed.reason.trim().length > 0 ? parsed.reason.trim().slice(0, 200) : "LLM judge said no without a reason";
+    logger.info(COMPONENT, `LLM judge OVERRIDE: per-kind passed but judge said fail \u2014 ${judgeReason}`);
+    return {
+      passed: false,
+      reason: `LLM judge: ${judgeReason}`,
+      verifier: `${input.kind}+llm-judge`,
+      confidence: kindResult.confidence,
+      details: `per-kind '${input.kind}' passed (${kindResult.reason}) but judge disagreed`
+    };
+  } catch (err) {
+    logger.warn(COMPONENT, `LLM judge threw (deferring to per-kind): ${err.message}`);
+    return kindResult;
+  }
+}
 async function verifyByKind(input) {
+  let kindResult;
   try {
     switch (input.kind) {
       case "code":
-        return await verifyCode(input);
+        kindResult = await verifyCode(input);
+        break;
       case "research":
-        return verifyResearch(input);
+        kindResult = verifyResearch(input);
+        break;
       case "write":
-        return await verifyWrite(input);
+        kindResult = await verifyWrite(input);
+        break;
       case "analysis":
-        return verifyAnalysis(input);
+        kindResult = verifyAnalysis(input);
+        break;
       case "verify":
-        return verifyVerify(input);
+        kindResult = verifyVerify(input);
+        break;
       case "shell":
-        return await verifyShell(input);
+        kindResult = await verifyShell(input);
+        break;
       case "report":
-        return verifyReport(input);
+        kindResult = verifyReport(input);
+        break;
       default:
         return { passed: false, reason: `Unknown kind: ${input.kind}`, verifier: "dispatch" };
     }
@@ -366,6 +441,10 @@ async function verifyByKind(input) {
       verifier: `${input.kind}:error`
     };
   }
+  if (kindResult.passed) {
+    return await llmJudgeVerify(input, kindResult);
+  }
+  return kindResult;
 }
 function readArtifactContent(path, maxBytes = 5e4) {
   try {

package/dist/agent/verifier.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../../src/agent/verifier.ts"],"sourcesContent":["/*\n TITAN — Verifier (v4.10.0-local, Phase A)\n \n Per-kind verification that a subtask is actually done, not just\n * \"the LLM emitted 200 chars and called it a day.\" Returns a\n * VerificationResult the driver uses to decide: advance to the next\n * subtask (passed), retry with fallback (failed), or escalate to human\n * (blocked on clarification).\n \n Per-kind contracts:\n * code — run typecheck + build in workspace; all green\n * research — ≥200 chars, ≥2 source markers, no \"I don't know\"\n * write — spawn Analyst with rubric, require score ≥0.7\n * analysis — response contains structured output meeting schema\n * verify — nested verifier of the thing it claims to verify\n * shell — exit code 0 and (if pattern provided) stdout matches\n * report — ≥500 chars, keywords: \"goal\"/\"outcome\"/\"artifacts\"\n /\nimport { existsSync, readFileSync } from 'fs';\nimport { promisify } from 'util';\nimport { exec as execCb } from 'child_process';\nimport logger from '../utils/logger.js';\nimport type { SubtaskKind } from './subtaskTaxonomy.js';\nimport type { StructuredSpawnResult } from './structuredSpawnTypes.js';\nimport type { Subtask } from './goals.js';\n\nconst exec = promisify(execCb);\nconst COMPONENT = 'Verifier';\n\nexport interface VerificationInput {\n kind: SubtaskKind;\n subtask: Subtask;\n spawnResult: StructuredSpawnResult;\n /\n Workspace for code verifications — defaults to repo root.\n * For staged writes, this is the staging directory.\n /\n workspace?: string;\n /\n Optional expected-output regex for shell verifications.\n /\n expectedOutputPattern?: string;\n}\n\nexport interface VerificationResult {\n passed: boolean;\n reason: string;\n verifier: string;\n confidence?: number;\n /* Files/URLs/facts produced. /\n artifacts?: string[];\n /* Stderr/stdout snippets for code verifications — helpful in UI. /\n details?: string;\n}\n\n// ── Generic bail-out check (runs before per-kind) ────────────────\n\nfunction hasGiveUpPhrase(text: string): boolean {\n const lowered = text.toLowerCase();\n const giveups = [\n \"i don't have a specific task\",\n 'no specific task to act on',\n \"i don't know what to do\",\n 'not enough information',\n 'cannot complete without',\n 'unable to determine',\n \"i can't proceed\",\n ];\n return giveups.some(g => lowered.includes(g));\n}\n\n// v4.10.0-local fix: Detect \"thinking\" prose that indicates the specialist\n// is starting work but didn't follow JSON output instructions. These patterns\n// (\"Now let me check...\", \"Let me analyze...\") should trigger retry, not block.\nfunction hasThinkingPattern(text: string): boolean {\n const trimmed = text.trim();\n const patterns = [\n /^now let me /i,\n /^let me /i,\n /^i will /i,\n /^i'll /i,\n /^first,? let me /i,\n /^ok, let me /i,\n /^okay, let me /i,\n /^sure,? let me /i,\n /^alright,? let me /i,\n ];\n return patterns.some(p => p.test(trimmed));\n}\n\n// ── Per-kind verifiers ───────────────────────────────────────────\n\nasync function verifyCode(input: VerificationInput): Promise<VerificationResult> {\n const workspace = input.workspace \|\| process.cwd();\n // Quick fail: were artifacts actually produced?\n const fileArtifacts = input.spawnResult.artifacts.filter(a => a.type === 'file').map(a => a.ref);\n if (fileArtifacts.length === 0) {\n return {\n passed: false,\n reason: 'No file artifacts reported by specialist',\n verifier: 'verifyCode',\n };\n }\n // Files actually exist?\n const missing = fileArtifacts.filter(p => !existsSync(p));\n if (missing.length > 0) {\n return {\n passed: false,\n reason: `Claimed files don't exist: ${missing.join(', ')}`,\n verifier: 'verifyCode',\n details: `Specialist claimed ${fileArtifacts.length} files but ${missing.length} are missing on disk.`,\n };\n }\n // Typecheck\n try {\n // Short timeout — typecheck usually 5-20s\n const { stdout: tcOut, stderr: tcErr } = await exec('npm run typecheck', {\n cwd: workspace,\n timeout: 120_000,\n maxBuffer: 10 1024 * 1024,\n });\n const tcOutput = (tcOut \|\| '') + (tcErr \|\| '');\n if (/error TS\\d+:/i.test(tcOutput) \|\| /Found \\d+ error/i.test(tcOutput)) {\n return {\n passed: false,\n reason: 'TypeScript errors in workspace',\n verifier: 'verifyCode',\n details: tcOutput.slice(-2000),\n artifacts: fileArtifacts,\n };\n }\n } catch (err) {\n // typecheck failed non-zero — extract errors\n const msg = (err as { stdout?: string; stderr?: string; message: string }).stdout\n \|\| (err as { stderr?: string }).stderr\n \|\| (err as Error).message;\n return {\n passed: false,\n reason: 'npm run typecheck failed',\n verifier: 'verifyCode',\n details: String(msg).slice(-2000),\n artifacts: fileArtifacts,\n };\n }\n return {\n passed: true,\n reason: `Typecheck passed; ${fileArtifacts.length} file(s) exist`,\n verifier: 'verifyCode',\n confidence: 0.9,\n artifacts: fileArtifacts,\n };\n}\n\nfunction verifyResearch(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return {\n passed: false,\n reason: \"Specialist gave up (give-up phrase detected)\",\n verifier: 'verifyResearch',\n };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: \"Specialist returned thinking prose instead of structured JSON — needs retry\",\n verifier: 'verifyResearch',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // High-confidence done responses with ≥1 concrete artifact pass even\n // without prose markers. Prevents terse-but-correct specialists (e.g.\n // \"Done. 5 sources saved to memory.\") from looping on verification.\n // Gated on artifact count — pure confidence would let hallucinating\n // specialists self-certify.\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyResearch',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n // v4.10.0-local polish: lenient short-form path. Internal research\n // goals (like \"check local tool output\") often produce 100-200 char\n // responses that are still valid — the specialist ran the right tool\n // and returned a terse finding. Require markers OR internal artifacts.\n if (text.length < 100) {\n return {\n passed: false,\n reason: `Response too short (${text.length} chars, need ≥100)`,\n verifier: 'verifyResearch',\n };\n }\n // Count source markers: URLs, [1]-style refs, \"source:\", \"according to\"\n const urlCount = (text.match(/https?:\\/\\/[^\\s)]+/g) \|\| []).length;\n const refCount = (text.match(/\\[\\d+\\]/g) \|\| []).length;\n const sourceWords = (text.match(/\\b(source\|according to\|per the\|reference\|from the\|based on):/gi) \|\| []).length;\n const toolFindings = (text.match(/\\b(found\|returned\|reports?\|shows?\|indicates?\|displays?)\\b/gi) \|\| []).length;\n const markers = urlCount + refCount + sourceWords;\n const artifactCount = input.spawnResult.artifacts.length;\n\n // Path A: short response with artifact + tool-finding language\n if (text.length < 200) {\n if (artifactCount >= 1 && toolFindings >= 1 && input.spawnResult.confidence >= 0.7) {\n return {\n passed: true,\n reason: `Concise research ${text.length} chars, ${artifactCount} artifact(s), confidence ${input.spawnResult.confidence.toFixed(2)} — lenient pass`,\n verifier: 'verifyResearch',\n confidence: input.spawnResult.confidence * 0.85,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n return {\n passed: false,\n reason: `Response too short (${text.length} chars, need ≥200 OR artifact+tool-finding+high-confidence)`,\n verifier: 'verifyResearch',\n };\n }\n // Path B: longer response needs source markers\n if (markers < 2 && artifactCount < 1) {\n return {\n passed: false,\n reason: `Insufficient source markers (${markers}, need ≥2 URLs/refs/source phrases, or ≥1 artifact)`,\n verifier: 'verifyResearch',\n details: `urls=${urlCount} refs=${refCount} sourcewords=${sourceWords}`,\n };\n }\n return {\n passed: true,\n reason: `${markers} source markers, ${artifactCount} artifacts, ${text.length} chars`,\n verifier: 'verifyResearch',\n confidence: 0.8,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nasync function verifyWrite(input: VerificationInput): Promise<VerificationResult> {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return { passed: false, reason: 'Specialist gave up', verifier: 'verifyWrite' };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: 'Specialist returned thinking prose instead of structured JSON — needs retry',\n verifier: 'verifyWrite',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // See verifyResearch for rationale. Gated on artifact count.\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyWrite',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n if (text.length < 100) {\n return {\n passed: false,\n reason: `Draft too short (${text.length} chars, need ≥100)`,\n verifier: 'verifyWrite',\n };\n }\n // Rubric-based check: use spawn confidence + basic heuristics\n // (Full LLM-rubric check deferred — driver can spawn Analyst to review\n // via the structured-spawn path; here we do a fast local sanity check.)\n const confidence = input.spawnResult.confidence ?? 0.5;\n if (confidence < 0.6) {\n return {\n passed: false,\n reason: `Self-reported confidence ${confidence.toFixed(2)} below 0.6`,\n verifier: 'verifyWrite',\n };\n }\n return {\n passed: true,\n reason: `Draft ${text.length} chars, confidence ${confidence.toFixed(2)}`,\n verifier: 'verifyWrite',\n confidence,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nfunction verifyAnalysis(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return { passed: false, reason: 'Specialist gave up', verifier: 'verifyAnalysis' };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: 'Specialist returned thinking prose instead of structured JSON — needs retry',\n verifier: 'verifyAnalysis',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // Parallel to verifyResearch/verifyWrite. Sits below the existing\n // ≥3-artifact tier but catches the ≥0.85-confidence + ≥1-artifact case\n // that the stricter tier misses (e.g. a single bundle summary file).\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n // v4.10.0-local polish (post-deploy): analysis verification now has\n // three tiers. Added an ARTIFACT tier to catch the common case where\n // the subtask was misclassified as \"analysis\" but the specialist\n // actually produced concrete artifacts (files, URLs, memory entries).\n // Previously those runs would ping-pong on verification forever\n // because the reasoning field was terse but the work was real.\n //\n // ARTIFACT tier: ≥3 concrete artifacts + status=done + confidence ≥ 0.7.\n // STRICT tier: needs reasoning markers OR bulleted list OR ≥200 chars + structure.\n // LENIENT tier: ≥80 chars AND status=done AND confidence ≥ 0.7.\n const artifactCount = input.spawnResult.artifacts?.length ?? 0;\n if (artifactCount >= 3 && input.spawnResult.status === 'done' && input.spawnResult.confidence >= 0.7) {\n return {\n passed: true,\n reason: `Analysis produced ${artifactCount} artifact(s), confidence ${input.spawnResult.confidence.toFixed(2)} — artifact-tier pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence * 0.9,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n\n const hasReasoningMarker = /\\b(conclusion\|because\|therefore\|thus\|hence\|as a result\|this means\|indicates\|suggests\|implies)\\b/i.test(text);\n const bulletCount = (text.match(/^\\s[-+]\\s+/gm) \|\| []).length;\n const numericCount = (text.match(/\\b\\d+(?:\\.\\d+)?(?:%\|\\s(?:chars?\|ms\|s\|m\|ticks?\|patterns?))?\\b/g) \|\| []).length;\n const hasStructure = hasReasoningMarker \|\| bulletCount >= 2 \|\| numericCount >= 2;\n\n if (text.length < 80) {\n return {\n passed: false,\n reason: `Analysis too short (${text.length} chars, need ≥80)`,\n verifier: 'verifyAnalysis',\n };\n }\n\n // Lenient path: short-but-confident responses\n if (text.length < 200 && input.spawnResult.confidence >= 0.7 && input.spawnResult.status === 'done') {\n return {\n passed: true,\n reason: `Analysis ${text.length} chars, high confidence (${input.spawnResult.confidence.toFixed(2)}) — lenient pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence 0.85,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n\n // Strict path: longer responses need structural markers\n if (!hasStructure) {\n return {\n passed: false,\n reason: 'No reasoning markers, structured list, or numeric evidence found',\n verifier: 'verifyAnalysis',\n };\n }\n return {\n passed: true,\n reason: `Analysis ${text.length} chars with reasoning structure (markers=${hasReasoningMarker} bullets=${bulletCount} metrics=${numericCount})`,\n verifier: 'verifyAnalysis',\n confidence: 0.8,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nasync function verifyShell(input: VerificationInput): Promise<VerificationResult> {\n // Shell subtask's \"verification\" is: did the spawn_result indicate success?\n // Structured spawn already captures status. Here we add: if we have an\n // expectedOutputPattern, match it against the spawn's raw response.\n if (input.spawnResult.status !== 'done') {\n return {\n passed: false,\n reason: `Spawn status = ${input.spawnResult.status}`,\n verifier: 'verifyShell',\n };\n }\n if (input.expectedOutputPattern) {\n const re = new RegExp(input.expectedOutputPattern);\n if (!re.test(input.spawnResult.rawResponse)) {\n return {\n passed: false,\n reason: `Output didn't match expected pattern: ${input.expectedOutputPattern}`,\n verifier: 'verifyShell',\n details: input.spawnResult.rawResponse.slice(0, 500),\n };\n }\n }\n return {\n passed: true,\n reason: 'Shell command returned success',\n verifier: 'verifyShell',\n confidence: 0.85,\n };\n}\n\nfunction verifyReport(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (text.length < 500) {\n return {\n passed: false,\n reason: `Report too short (${text.length} chars, need ≥500)`,\n verifier: 'verifyReport',\n };\n }\n const keywords = ['goal', 'outcome', 'artifact'];\n const missing = keywords.filter(k => !text.toLowerCase().includes(k));\n if (missing.length > 1) {\n return {\n passed: false,\n reason: `Report missing key sections: ${missing.join(', ')}`,\n verifier: 'verifyReport',\n };\n }\n return {\n passed: true,\n reason: `Report ${text.length} chars, all sections present`,\n verifier: 'verifyReport',\n confidence: 0.8,\n };\n}\n\n// verify-kind subtasks are meta — they recursively verify whatever the\n// spawn claims to verify. For now we trust the spawn's status.\nfunction verifyVerify(input: VerificationInput): VerificationResult {\n if (input.spawnResult.status !== 'done') {\n return { passed: false, reason: `verify spawn status=${input.spawnResult.status}`, verifier: 'verifyVerify' };\n }\n if (input.spawnResult.confidence !== undefined && input.spawnResult.confidence < 0.6) {\n return {\n passed: false,\n reason: `verify-of-verify confidence too low (${input.spawnResult.confidence.toFixed(2)})`,\n verifier: 'verifyVerify',\n };\n }\n return {\n passed: true,\n reason: 'verify subtask reported done with confidence ≥ 0.6',\n verifier: 'verifyVerify',\n confidence: input.spawnResult.confidence ?? 0.7,\n };\n}\n\n// ── Dispatch ─────────────────────────────────────────────────────\n\nexport async function verifyByKind(input: VerificationInput): Promise<VerificationResult> {\n try {\n switch (input.kind) {\n case 'code': return await verifyCode(input);\n case 'research': return verifyResearch(input);\n case 'write': return await verifyWrite(input);\n case 'analysis': return verifyAnalysis(input);\n case 'verify': return verifyVerify(input);\n case 'shell': return await verifyShell(input);\n case 'report': return verifyReport(input);\n default:\n return { passed: false, reason: `Unknown kind: ${input.kind}`, verifier: 'dispatch' };\n }\n } catch (err) {\n logger.warn(COMPONENT, `Verifier threw: ${(err as Error).message}`);\n return {\n passed: false,\n reason: `Verifier error: ${(err as Error).message}`,\n verifier: `${input.kind}:error`,\n };\n }\n}\n\n// ── Utility: read a file's content (used by higher-level UI for the driver panel) ──\nexport function readArtifactContent(path: string, maxBytes = 50_000): string \| null {\n try {\n if (!existsSync(path)) return null;\n const content = readFileSync(path, 'utf-8');\n return content.length > maxBytes ? content.slice(0, maxBytes) + '\\n... [truncated]' : content;\n } catch { return null; }\n}\n"],"mappings":";AAkBA,SAAS,YAAY,oBAAoB;AACzC,SAAS,iBAAiB;AAC1B,SAAS,QAAQ,cAAc;AAC/B,OAAO,YAAY;AAKnB,MAAM,OAAO,UAAU,MAAM;AAC7B,MAAM,YAAY;AA8BlB,SAAS,gBAAgB,MAAuB;AAC5C,QAAM,UAAU,KAAK,YAAY;AACjC,QAAM,UAAU;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACJ;AACA,SAAO,QAAQ,KAAK,OAAK,QAAQ,SAAS,CAAC,CAAC;AAChD;AAKA,SAAS,mBAAmB,MAAuB;AAC/C,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,WAAW;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACJ;AACA,SAAO,SAAS,KAAK,OAAK,EAAE,KAAK,OAAO,CAAC;AAC7C;AAIA,eAAe,WAAW,OAAuD;AAC7E,QAAM,YAAY,MAAM,aAAa,QAAQ,IAAI;AAEjD,QAAM,gBAAgB,MAAM,YAAY,UAAU,OAAO,OAAK,EAAE,SAAS,MAAM,EAAE,IAAI,OAAK,EAAE,GAAG;AAC/F,MAAI,cAAc,WAAW,GAAG;AAC5B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,QAAM,UAAU,cAAc,OAAO,OAAK,CAAC,WAAW,CAAC,CAAC;AACxD,MAAI,QAAQ,SAAS,GAAG;AACpB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,8BAA8B,QAAQ,KAAK,IAAI,CAAC;AAAA,MACxD,UAAU;AAAA,MACV,SAAS,sBAAsB,cAAc,MAAM,cAAc,QAAQ,MAAM;AAAA,IACnF;AAAA,EACJ;AAEA,MAAI;AAEA,UAAM,EAAE,QAAQ,OAAO,QAAQ,MAAM,IAAI,MAAM,KAAK,qBAAqB;AAAA,MACrE,KAAK;AAAA,MACL,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,IAC3B,CAAC;AACD,UAAM,YAAY,SAAS,OAAO,SAAS;AAC3C,QAAI,gBAAgB,KAAK,QAAQ,KAAK,mBAAmB,KAAK,QAAQ,GAAG;AACrE,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,SAAS,MAAM,IAAK;AAAA,QAC7B,WAAW;AAAA,MACf;AAAA,IACJ;AAAA,EACJ,SAAS,KAAK;AAEV,UAAM,MAAO,IAA8D,UACnE,IAA4B,UAC5B,IAAc;AACtB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,OAAO,GAAG,EAAE,MAAM,IAAK;AAAA,MAChC,WAAW;AAAA,IACf;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,qBAAqB,cAAc,MAAM;AAAA,IACjD,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW;AAAA,EACf;AACJ;AAEA,SAAS,eAAe,OAA8C;AAClE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAOA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAKA,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,QAAM,YAAY,KAAK,MAAM,qBAAqB,KAAK,CAAC,GAAG;AAC3D,QAAM,YAAY,KAAK,MAAM,UAAU,KAAK,CAAC,GAAG;AAChD,QAAM,eAAe,KAAK,MAAM,gEAAgE,KAAK,CAAC,GAAG;AACzG,QAAM,gBAAgB,KAAK,MAAM,6DAA6D,KAAK,CAAC,GAAG;AACvG,QAAM,UAAU,WAAW,WAAW;AACtC,QAAM,gBAAgB,MAAM,YAAY,UAAU;AAGlD,MAAI,KAAK,SAAS,KAAK;AACnB,QAAI,iBAAiB,KAAK,gBAAgB,KAAK,MAAM,YAAY,cAAc,KAAK;AAChF,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ,oBAAoB,KAAK,MAAM,WAAW,aAAa,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,QAClI,UAAU;AAAA,QACV,YAAY,MAAM,YAAY,aAAa;AAAA,QAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,MACzD;AAAA,IACJ;AACA,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,MAAI,UAAU,KAAK,gBAAgB,GAAG;AAClC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,gCAAgC,OAAO;AAAA,MAC/C,UAAU;AAAA,MACV,SAAS,QAAQ,QAAQ,SAAS,QAAQ,gBAAgB,WAAW;AAAA,IACzE;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,GAAG,OAAO,oBAAoB,aAAa,eAAe,KAAK,MAAM;AAAA,IAC7E,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,eAAe,YAAY,OAAuD;AAC9E,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO,EAAE,QAAQ,OAAO,QAAQ,sBAAsB,UAAU,cAAc;AAAA,EAClF;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAGA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AACA,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,KAAK,MAAM;AAAA,MACvC,UAAU;AAAA,IACd;AAAA,EACJ;AAIA,QAAM,aAAa,MAAM,YAAY,cAAc;AACnD,MAAI,aAAa,KAAK;AAClB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,4BAA4B,WAAW,QAAQ,CAAC,CAAC;AAAA,MACzD,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,SAAS,KAAK,MAAM,sBAAsB,WAAW,QAAQ,CAAC,CAAC;AAAA,IACvE,UAAU;AAAA,IACV;AAAA,IACA,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,SAAS,eAAe,OAA8C;AAClE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO,EAAE,QAAQ,OAAO,QAAQ,sBAAsB,UAAU,iBAAiB;AAAA,EACrF;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAKA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAWA,QAAM,gBAAgB,MAAM,YAAY,WAAW,UAAU;AAC7D,MAAI,iBAAiB,KAAK,MAAM,YAAY,WAAW,UAAU,MAAM,YAAY,cAAc,KAAK;AAClG,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,qBAAqB,aAAa,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MAC7G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAEA,QAAM,qBAAqB,mGAAmG,KAAK,IAAI;AACvI,QAAM,eAAe,KAAK,MAAM,gBAAgB,KAAK,CAAC,GAAG;AACzD,QAAM,gBAAgB,KAAK,MAAM,gEAAgE,KAAK,CAAC,GAAG;AAC1G,QAAM,eAAe,sBAAsB,eAAe,KAAK,gBAAgB;AAE/E,MAAI,KAAK,SAAS,IAAI;AAClB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAGA,MAAI,KAAK,SAAS,OAAO,MAAM,YAAY,cAAc,OAAO,MAAM,YAAY,WAAW,QAAQ;AACjG,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,YAAY,KAAK,MAAM,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MAClG,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAGA,MAAI,CAAC,cAAc;AACf,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,YAAY,KAAK,MAAM,4CAA4C,kBAAkB,YAAY,WAAW,YAAY,YAAY;AAAA,IAC5I,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,eAAe,YAAY,OAAuD;AAI9E,MAAI,MAAM,YAAY,WAAW,QAAQ;AACrC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,kBAAkB,MAAM,YAAY,MAAM;AAAA,MAClD,UAAU;AAAA,IACd;AAAA,EACJ;AACA,MAAI,MAAM,uBAAuB;AAC7B,UAAM,KAAK,IAAI,OAAO,MAAM,qBAAqB;AACjD,QAAI,CAAC,GAAG,KAAK,MAAM,YAAY,WAAW,GAAG;AACzC,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ,yCAAyC,MAAM,qBAAqB;AAAA,QAC5E,UAAU;AAAA,QACV,SAAS,MAAM,YAAY,YAAY,MAAM,GAAG,GAAG;AAAA,MACvD;AAAA,IACJ;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,YAAY;AAAA,EAChB;AACJ;AAEA,SAAS,aAAa,OAA8C;AAChE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,qBAAqB,KAAK,MAAM;AAAA,MACxC,UAAU;AAAA,IACd;AAAA,EACJ;AACA,QAAM,WAAW,CAAC,QAAQ,WAAW,UAAU;AAC/C,QAAM,UAAU,SAAS,OAAO,OAAK,CAAC,KAAK,YAAY,EAAE,SAAS,CAAC,CAAC;AACpE,MAAI,QAAQ,SAAS,GAAG;AACpB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,gCAAgC,QAAQ,KAAK,IAAI,CAAC;AAAA,MAC1D,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,UAAU,KAAK,MAAM;AAAA,IAC7B,UAAU;AAAA,IACV,YAAY;AAAA,EAChB;AACJ;AAIA,SAAS,aAAa,OAA8C;AAChE,MAAI,MAAM,YAAY,WAAW,QAAQ;AACrC,WAAO,EAAE,QAAQ,OAAO,QAAQ,uBAAuB,MAAM,YAAY,MAAM,IAAI,UAAU,eAAe;AAAA,EAChH;AACA,MAAI,MAAM,YAAY,eAAe,UAAa,MAAM,YAAY,aAAa,KAAK;AAClF,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,wCAAwC,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MACvF,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,YAAY,MAAM,YAAY,cAAc;AAAA,EAChD;AACJ;AAIA,eAAsB,aAAa,OAAuD;AACtF,MAAI;AACA,YAAQ,MAAM,MAAM;AAAA,MAChB,KAAK;AAAY,eAAO,MAAM,WAAW,KAAK;AAAA,MAC9C,KAAK;AAAY,eAAO,eAAe,KAAK;AAAA,MAC5C,KAAK;AAAY,eAAO,MAAM,YAAY,KAAK;AAAA,MAC/C,KAAK;AAAY,eAAO,eAAe,KAAK;AAAA,MAC5C,KAAK;AAAY,eAAO,aAAa,KAAK;AAAA,MAC1C,KAAK;AAAY,eAAO,MAAM,YAAY,KAAK;AAAA,MAC/C,KAAK;AAAY,eAAO,aAAa,KAAK;AAAA,MAC1C;AACI,eAAO,EAAE,QAAQ,OAAO,QAAQ,iBAAiB,MAAM,IAAI,IAAI,UAAU,WAAW;AAAA,IAC5F;AAAA,EACJ,SAAS,KAAK;AACV,WAAO,KAAK,WAAW,mBAAoB,IAAc,OAAO,EAAE;AAClE,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,mBAAoB,IAAc,OAAO;AAAA,MACjD,UAAU,GAAG,MAAM,IAAI;AAAA,IAC3B;AAAA,EACJ;AACJ;AAGO,SAAS,oBAAoB,MAAc,WAAW,KAAuB;AAChF,MAAI;AACA,QAAI,CAAC,WAAW,IAAI,EAAG,QAAO;AAC9B,UAAM,UAAU,aAAa,MAAM,OAAO;AAC1C,WAAO,QAAQ,SAAS,WAAW,QAAQ,MAAM,GAAG,QAAQ,IAAI,sBAAsB;AAAA,EAC1F,QAAQ;AAAE,WAAO;AAAA,EAAM;AAC3B;","names":[]}
1	+ {"version":3,"sources":["../../src/agent/verifier.ts"],"sourcesContent":["/*\n TITAN — Verifier (v4.10.0-local, Phase A)\n \n Per-kind verification that a subtask is actually done, not just\n * \"the LLM emitted 200 chars and called it a day.\" Returns a\n * VerificationResult the driver uses to decide: advance to the next\n * subtask (passed), retry with fallback (failed), or escalate to human\n * (blocked on clarification).\n \n Per-kind contracts:\n * code — run typecheck + build in workspace; all green\n * research — ≥200 chars, ≥2 source markers, no \"I don't know\"\n * write — spawn Analyst with rubric, require score ≥0.7\n * analysis — response contains structured output meeting schema\n * verify — nested verifier of the thing it claims to verify\n * shell — exit code 0 and (if pattern provided) stdout matches\n * report — ≥500 chars, keywords: \"goal\"/\"outcome\"/\"artifacts\"\n /\nimport { existsSync, readFileSync } from 'fs';\nimport { promisify } from 'util';\nimport { exec as execCb } from 'child_process';\nimport logger from '../utils/logger.js';\nimport type { SubtaskKind } from './subtaskTaxonomy.js';\nimport type { StructuredSpawnResult } from './structuredSpawnTypes.js';\nimport type { Subtask } from './goals.js';\n\nconst exec = promisify(execCb);\nconst COMPONENT = 'Verifier';\n\nexport interface VerificationInput {\n kind: SubtaskKind;\n subtask: Subtask;\n spawnResult: StructuredSpawnResult;\n /\n Workspace for code verifications — defaults to repo root.\n * For staged writes, this is the staging directory.\n /\n workspace?: string;\n /\n Optional expected-output regex for shell verifications.\n /\n expectedOutputPattern?: string;\n}\n\nexport interface VerificationResult {\n passed: boolean;\n reason: string;\n verifier: string;\n confidence?: number;\n /* Files/URLs/facts produced. /\n artifacts?: string[];\n /* Stderr/stdout snippets for code verifications — helpful in UI. /\n details?: string;\n}\n\n// ── Generic bail-out check (runs before per-kind) ────────────────\n\nfunction hasGiveUpPhrase(text: string): boolean {\n const lowered = text.toLowerCase();\n const giveups = [\n \"i don't have a specific task\",\n 'no specific task to act on',\n \"i don't know what to do\",\n 'not enough information',\n 'cannot complete without',\n 'unable to determine',\n \"i can't proceed\",\n ];\n return giveups.some(g => lowered.includes(g));\n}\n\n// v4.10.0-local fix: Detect \"thinking\" prose that indicates the specialist\n// is starting work but didn't follow JSON output instructions. These patterns\n// (\"Now let me check...\", \"Let me analyze...\") should trigger retry, not block.\nfunction hasThinkingPattern(text: string): boolean {\n const trimmed = text.trim();\n const patterns = [\n /^now let me /i,\n /^let me /i,\n /^i will /i,\n /^i'll /i,\n /^first,? let me /i,\n /^ok, let me /i,\n /^okay, let me /i,\n /^sure,? let me /i,\n /^alright,? let me /i,\n ];\n return patterns.some(p => p.test(trimmed));\n}\n\n// ── Per-kind verifiers ───────────────────────────────────────────\n\nasync function verifyCode(input: VerificationInput): Promise<VerificationResult> {\n const workspace = input.workspace \|\| process.cwd();\n // Quick fail: were artifacts actually produced?\n const fileArtifacts = input.spawnResult.artifacts.filter(a => a.type === 'file').map(a => a.ref);\n if (fileArtifacts.length === 0) {\n return {\n passed: false,\n reason: 'No file artifacts reported by specialist',\n verifier: 'verifyCode',\n };\n }\n // Files actually exist?\n const missing = fileArtifacts.filter(p => !existsSync(p));\n if (missing.length > 0) {\n return {\n passed: false,\n reason: `Claimed files don't exist: ${missing.join(', ')}`,\n verifier: 'verifyCode',\n details: `Specialist claimed ${fileArtifacts.length} files but ${missing.length} are missing on disk.`,\n };\n }\n // Typecheck\n try {\n // Short timeout — typecheck usually 5-20s\n const { stdout: tcOut, stderr: tcErr } = await exec('npm run typecheck', {\n cwd: workspace,\n timeout: 120_000,\n maxBuffer: 10 1024 * 1024,\n });\n const tcOutput = (tcOut \|\| '') + (tcErr \|\| '');\n if (/error TS\\d+:/i.test(tcOutput) \|\| /Found \\d+ error/i.test(tcOutput)) {\n return {\n passed: false,\n reason: 'TypeScript errors in workspace',\n verifier: 'verifyCode',\n details: tcOutput.slice(-2000),\n artifacts: fileArtifacts,\n };\n }\n } catch (err) {\n // typecheck failed non-zero — extract errors\n const msg = (err as { stdout?: string; stderr?: string; message: string }).stdout\n \|\| (err as { stderr?: string }).stderr\n \|\| (err as Error).message;\n return {\n passed: false,\n reason: 'npm run typecheck failed',\n verifier: 'verifyCode',\n details: String(msg).slice(-2000),\n artifacts: fileArtifacts,\n };\n }\n return {\n passed: true,\n reason: `Typecheck passed; ${fileArtifacts.length} file(s) exist`,\n verifier: 'verifyCode',\n confidence: 0.9,\n artifacts: fileArtifacts,\n };\n}\n\nfunction verifyResearch(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return {\n passed: false,\n reason: \"Specialist gave up (give-up phrase detected)\",\n verifier: 'verifyResearch',\n };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: \"Specialist returned thinking prose instead of structured JSON — needs retry\",\n verifier: 'verifyResearch',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // High-confidence done responses with ≥1 concrete artifact pass even\n // without prose markers. Prevents terse-but-correct specialists (e.g.\n // \"Done. 5 sources saved to memory.\") from looping on verification.\n // Gated on artifact count — pure confidence would let hallucinating\n // specialists self-certify.\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyResearch',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n // v4.10.0-local polish: lenient short-form path. Internal research\n // goals (like \"check local tool output\") often produce 100-200 char\n // responses that are still valid — the specialist ran the right tool\n // and returned a terse finding. Require markers OR internal artifacts.\n if (text.length < 100) {\n return {\n passed: false,\n reason: `Response too short (${text.length} chars, need ≥100)`,\n verifier: 'verifyResearch',\n };\n }\n // Count source markers: URLs, [1]-style refs, \"source:\", \"according to\"\n const urlCount = (text.match(/https?:\\/\\/[^\\s)]+/g) \|\| []).length;\n const refCount = (text.match(/\\[\\d+\\]/g) \|\| []).length;\n const sourceWords = (text.match(/\\b(source\|according to\|per the\|reference\|from the\|based on):/gi) \|\| []).length;\n const toolFindings = (text.match(/\\b(found\|returned\|reports?\|shows?\|indicates?\|displays?)\\b/gi) \|\| []).length;\n const markers = urlCount + refCount + sourceWords;\n const artifactCount = input.spawnResult.artifacts.length;\n\n // Path A: short response with artifact + tool-finding language\n if (text.length < 200) {\n if (artifactCount >= 1 && toolFindings >= 1 && input.spawnResult.confidence >= 0.7) {\n return {\n passed: true,\n reason: `Concise research ${text.length} chars, ${artifactCount} artifact(s), confidence ${input.spawnResult.confidence.toFixed(2)} — lenient pass`,\n verifier: 'verifyResearch',\n confidence: input.spawnResult.confidence * 0.85,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n return {\n passed: false,\n reason: `Response too short (${text.length} chars, need ≥200 OR artifact+tool-finding+high-confidence)`,\n verifier: 'verifyResearch',\n };\n }\n // Path B: longer response needs source markers\n if (markers < 2 && artifactCount < 1) {\n return {\n passed: false,\n reason: `Insufficient source markers (${markers}, need ≥2 URLs/refs/source phrases, or ≥1 artifact)`,\n verifier: 'verifyResearch',\n details: `urls=${urlCount} refs=${refCount} sourcewords=${sourceWords}`,\n };\n }\n return {\n passed: true,\n reason: `${markers} source markers, ${artifactCount} artifacts, ${text.length} chars`,\n verifier: 'verifyResearch',\n confidence: 0.8,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nasync function verifyWrite(input: VerificationInput): Promise<VerificationResult> {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return { passed: false, reason: 'Specialist gave up', verifier: 'verifyWrite' };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: 'Specialist returned thinking prose instead of structured JSON — needs retry',\n verifier: 'verifyWrite',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // See verifyResearch for rationale. Gated on artifact count.\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyWrite',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n if (text.length < 100) {\n return {\n passed: false,\n reason: `Draft too short (${text.length} chars, need ≥100)`,\n verifier: 'verifyWrite',\n };\n }\n // Rubric-based check: use spawn confidence + basic heuristics\n // (Full LLM-rubric check deferred — driver can spawn Analyst to review\n // via the structured-spawn path; here we do a fast local sanity check.)\n const confidence = input.spawnResult.confidence ?? 0.5;\n if (confidence < 0.6) {\n return {\n passed: false,\n reason: `Self-reported confidence ${confidence.toFixed(2)} below 0.6`,\n verifier: 'verifyWrite',\n };\n }\n return {\n passed: true,\n reason: `Draft ${text.length} chars, confidence ${confidence.toFixed(2)}`,\n verifier: 'verifyWrite',\n confidence,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nfunction verifyAnalysis(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (hasGiveUpPhrase(text)) {\n return { passed: false, reason: 'Specialist gave up', verifier: 'verifyAnalysis' };\n }\n // v4.10.0-local fix: catch thinking patterns that indicate JSON parsing failed\n if (hasThinkingPattern(text)) {\n return {\n passed: false,\n reason: 'Specialist returned thinking prose instead of structured JSON — needs retry',\n verifier: 'verifyAnalysis',\n details: `Raw (200 chars): ${text.slice(0, 200)}`,\n };\n }\n // v4.10.0-local (post-deploy, Fix D): confidence+artifact escape hatch.\n // Parallel to verifyResearch/verifyWrite. Sits below the existing\n // ≥3-artifact tier but catches the ≥0.85-confidence + ≥1-artifact case\n // that the stricter tier misses (e.g. a single bundle summary file).\n if (input.spawnResult.status === 'done'\n && input.spawnResult.confidence >= 0.85\n && (input.spawnResult.artifacts?.length ?? 0) >= 1) {\n return {\n passed: true,\n reason: `High confidence (${input.spawnResult.confidence.toFixed(2)}) + ${input.spawnResult.artifacts.length} artifact(s) — confidence-tier pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence * 0.95,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n // v4.10.0-local polish (post-deploy): analysis verification now has\n // three tiers. Added an ARTIFACT tier to catch the common case where\n // the subtask was misclassified as \"analysis\" but the specialist\n // actually produced concrete artifacts (files, URLs, memory entries).\n // Previously those runs would ping-pong on verification forever\n // because the reasoning field was terse but the work was real.\n //\n // ARTIFACT tier: ≥3 concrete artifacts + status=done + confidence ≥ 0.7.\n // STRICT tier: needs reasoning markers OR bulleted list OR ≥200 chars + structure.\n // LENIENT tier: ≥80 chars AND status=done AND confidence ≥ 0.7.\n const artifactCount = input.spawnResult.artifacts?.length ?? 0;\n if (artifactCount >= 3 && input.spawnResult.status === 'done' && input.spawnResult.confidence >= 0.7) {\n return {\n passed: true,\n reason: `Analysis produced ${artifactCount} artifact(s), confidence ${input.spawnResult.confidence.toFixed(2)} — artifact-tier pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence * 0.9,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n\n const hasReasoningMarker = /\\b(conclusion\|because\|therefore\|thus\|hence\|as a result\|this means\|indicates\|suggests\|implies)\\b/i.test(text);\n const bulletCount = (text.match(/^\\s[-+]\\s+/gm) \|\| []).length;\n const numericCount = (text.match(/\\b\\d+(?:\\.\\d+)?(?:%\|\\s(?:chars?\|ms\|s\|m\|ticks?\|patterns?))?\\b/g) \|\| []).length;\n const hasStructure = hasReasoningMarker \|\| bulletCount >= 2 \|\| numericCount >= 2;\n\n if (text.length < 80) {\n return {\n passed: false,\n reason: `Analysis too short (${text.length} chars, need ≥80)`,\n verifier: 'verifyAnalysis',\n };\n }\n\n // Lenient path: short-but-confident responses\n if (text.length < 200 && input.spawnResult.confidence >= 0.7 && input.spawnResult.status === 'done') {\n return {\n passed: true,\n reason: `Analysis ${text.length} chars, high confidence (${input.spawnResult.confidence.toFixed(2)}) — lenient pass`,\n verifier: 'verifyAnalysis',\n confidence: input.spawnResult.confidence 0.85,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n }\n\n // Strict path: longer responses need structural markers\n if (!hasStructure) {\n return {\n passed: false,\n reason: 'No reasoning markers, structured list, or numeric evidence found',\n verifier: 'verifyAnalysis',\n };\n }\n return {\n passed: true,\n reason: `Analysis ${text.length} chars with reasoning structure (markers=${hasReasoningMarker} bullets=${bulletCount} metrics=${numericCount})`,\n verifier: 'verifyAnalysis',\n confidence: 0.8,\n artifacts: input.spawnResult.artifacts.map(a => a.ref),\n };\n}\n\nasync function verifyShell(input: VerificationInput): Promise<VerificationResult> {\n // Shell subtask's \"verification\" is: did the spawn_result indicate success?\n // Structured spawn already captures status. Here we add: if we have an\n // expectedOutputPattern, match it against the spawn's raw response.\n if (input.spawnResult.status !== 'done') {\n return {\n passed: false,\n reason: `Spawn status = ${input.spawnResult.status}`,\n verifier: 'verifyShell',\n };\n }\n if (input.expectedOutputPattern) {\n const re = new RegExp(input.expectedOutputPattern);\n if (!re.test(input.spawnResult.rawResponse)) {\n return {\n passed: false,\n reason: `Output didn't match expected pattern: ${input.expectedOutputPattern}`,\n verifier: 'verifyShell',\n details: input.spawnResult.rawResponse.slice(0, 500),\n };\n }\n }\n return {\n passed: true,\n reason: 'Shell command returned success',\n verifier: 'verifyShell',\n confidence: 0.85,\n };\n}\n\nfunction verifyReport(input: VerificationInput): VerificationResult {\n const text = input.spawnResult.reasoning \|\| input.spawnResult.rawResponse;\n if (text.length < 500) {\n return {\n passed: false,\n reason: `Report too short (${text.length} chars, need ≥500)`,\n verifier: 'verifyReport',\n };\n }\n const keywords = ['goal', 'outcome', 'artifact'];\n const missing = keywords.filter(k => !text.toLowerCase().includes(k));\n if (missing.length > 1) {\n return {\n passed: false,\n reason: `Report missing key sections: ${missing.join(', ')}`,\n verifier: 'verifyReport',\n };\n }\n return {\n passed: true,\n reason: `Report ${text.length} chars, all sections present`,\n verifier: 'verifyReport',\n confidence: 0.8,\n };\n}\n\n// verify-kind subtasks are meta — they recursively verify whatever the\n// spawn claims to verify. For now we trust the spawn's status.\nfunction verifyVerify(input: VerificationInput): VerificationResult {\n if (input.spawnResult.status !== 'done') {\n return { passed: false, reason: `verify spawn status=${input.spawnResult.status}`, verifier: 'verifyVerify' };\n }\n if (input.spawnResult.confidence !== undefined && input.spawnResult.confidence < 0.6) {\n return {\n passed: false,\n reason: `verify-of-verify confidence too low (${input.spawnResult.confidence.toFixed(2)})`,\n verifier: 'verifyVerify',\n };\n }\n return {\n passed: true,\n reason: 'verify subtask reported done with confidence ≥ 0.6',\n verifier: 'verifyVerify',\n confidence: input.spawnResult.confidence ?? 0.7,\n };\n}\n\n// ── LLM-judge layer (v6.0) ───────────────────────────────────────\n//\n// Runs AFTER the per-kind verifier passes, as a final sanity check that\n// the spawn output actually fulfilled the subtask intent. Cuts the\n// false-positive rate of the per-kind checks (which test surface\n// properties like length / exit code / keywords, not intent).\n//\n// Design:\n// - Only runs when per-kind passed (no judge on already-failed)\n// - Skipped for kind='verify' (avoids verify-of-verify recursion)\n// - Calls spawnSubAgent with the FAST tier — one short call, low cost\n// - Parses JSON {passed, reason} from the judge reply\n// - On judge throw / parse error → defers to the per-kind verdict\n// (never makes verification stricter than the per-kind alone)\n//\n// Toggle via env: TITAN_LLM_JUDGE_VERIFY=0 disables. Default is on.\n\nfunction llmJudgeEnabled(): boolean {\n const env = (process.env.TITAN_LLM_JUDGE_VERIFY ?? '').toLowerCase().trim();\n if (env === '0' \|\| env === 'false' \|\| env === 'no' \|\| env === 'off') return false;\n return true;\n}\n\nasync function llmJudgeVerify(\n input: VerificationInput,\n kindResult: VerificationResult,\n): Promise<VerificationResult> {\n if (input.kind === 'verify') return kindResult;\n if (!llmJudgeEnabled()) return kindResult;\n\n try {\n const { spawnSubAgent } = await import('./subAgent.js');\n const reasoning = (input.spawnResult.reasoning \|\| input.spawnResult.rawResponse \|\| '').slice(0, 1800);\n const artifactNote = input.spawnResult.artifacts?.length\n ? `\\nArtifacts produced: ${input.spawnResult.artifacts.map(a => `${a.type}:${a.ref}`).join(', ')}`\n : '';\n const judgePrompt = [\n `You are a strict verification judge. Your ONE job is to decide whether the work below actually fulfilled the subtask intent.`,\n ``,\n `Subtask:`,\n ` title: ${input.subtask.title}`,\n ` description: ${input.subtask.description}`,\n ``,\n `Work produced (truncated to 1.8k chars):`,\n reasoning,\n artifactNote,\n ``,\n `Per-kind verifier ('${input.kind}') already passed with reason: ${kindResult.reason}`,\n ``,\n `Your job: does this work ACTUALLY address the subtask, or did it surface-pass without delivering?`,\n ``,\n `Common surface-pass failure modes to catch:`,\n ` - Length OK but content is generic / vague / doesn't address the specific subtask`,\n ` - Code compiles but doesn't do what was asked`,\n ` - Research has citations but missed the actual question`,\n ` - Report has the keywords but no real conclusion`,\n ``,\n `Return STRICT JSON on a single line: {\"passed\": true\|false, \"reason\": \"<≤140 chars why>\"}.`,\n `No markdown. No prose before or after the JSON.`,\n ].join('\\n');\n\n const judgeResult = await spawnSubAgent({\n name: 'llm-judge',\n task: judgePrompt,\n tier: 'fast',\n maxRounds: 1,\n });\n const raw = (judgeResult.content \|\| '').trim();\n const jsonStart = raw.indexOf('{');\n const jsonEnd = raw.lastIndexOf('}');\n if (jsonStart < 0 \|\| jsonEnd <= jsonStart) {\n logger.info(COMPONENT, `LLM judge returned non-JSON, deferring to per-kind verdict`);\n return kindResult;\n }\n const parsed = JSON.parse(raw.slice(jsonStart, jsonEnd + 1)) as { passed?: unknown; reason?: unknown };\n if (typeof parsed.passed !== 'boolean') return kindResult;\n if (parsed.passed) return kindResult; // judge agrees → keep the per-kind result\n const judgeReason = typeof parsed.reason === 'string' && parsed.reason.trim().length > 0\n ? parsed.reason.trim().slice(0, 200)\n : 'LLM judge said no without a reason';\n logger.info(COMPONENT, `LLM judge OVERRIDE: per-kind passed but judge said fail — ${judgeReason}`);\n return {\n passed: false,\n reason: `LLM judge: ${judgeReason}`,\n verifier: `${input.kind}+llm-judge`,\n confidence: kindResult.confidence,\n details: `per-kind '${input.kind}' passed (${kindResult.reason}) but judge disagreed`,\n };\n } catch (err) {\n logger.warn(COMPONENT, `LLM judge threw (deferring to per-kind): ${(err as Error).message}`);\n return kindResult;\n }\n}\n\n// ── Dispatch ─────────────────────────────────────────────────────\n\nexport async function verifyByKind(input: VerificationInput): Promise<VerificationResult> {\n let kindResult: VerificationResult;\n try {\n switch (input.kind) {\n case 'code': kindResult = await verifyCode(input); break;\n case 'research': kindResult = verifyResearch(input); break;\n case 'write': kindResult = await verifyWrite(input); break;\n case 'analysis': kindResult = verifyAnalysis(input); break;\n case 'verify': kindResult = verifyVerify(input); break;\n case 'shell': kindResult = await verifyShell(input); break;\n case 'report': kindResult = verifyReport(input); break;\n default:\n return { passed: false, reason: `Unknown kind: ${input.kind}`, verifier: 'dispatch' };\n }\n } catch (err) {\n logger.warn(COMPONENT, `Verifier threw: ${(err as Error).message}`);\n return {\n passed: false,\n reason: `Verifier error: ${(err as Error).message}`,\n verifier: `${input.kind}:error`,\n };\n }\n\n // v6.0 — LLM-judge layer. Runs only when per-kind passed.\n if (kindResult.passed) {\n return await llmJudgeVerify(input, kindResult);\n }\n return kindResult;\n}\n\n// ── Utility: read a file's content (used by higher-level UI for the driver panel) ──\nexport function readArtifactContent(path: string, maxBytes = 50_000): string \| null {\n try {\n if (!existsSync(path)) return null;\n const content = readFileSync(path, 'utf-8');\n return content.length > maxBytes ? content.slice(0, maxBytes) + '\\n... [truncated]' : content;\n } catch { return null; }\n}\n"],"mappings":";AAkBA,SAAS,YAAY,oBAAoB;AACzC,SAAS,iBAAiB;AAC1B,SAAS,QAAQ,cAAc;AAC/B,OAAO,YAAY;AAKnB,MAAM,OAAO,UAAU,MAAM;AAC7B,MAAM,YAAY;AA8BlB,SAAS,gBAAgB,MAAuB;AAC5C,QAAM,UAAU,KAAK,YAAY;AACjC,QAAM,UAAU;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACJ;AACA,SAAO,QAAQ,KAAK,OAAK,QAAQ,SAAS,CAAC,CAAC;AAChD;AAKA,SAAS,mBAAmB,MAAuB;AAC/C,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,WAAW;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACJ;AACA,SAAO,SAAS,KAAK,OAAK,EAAE,KAAK,OAAO,CAAC;AAC7C;AAIA,eAAe,WAAW,OAAuD;AAC7E,QAAM,YAAY,MAAM,aAAa,QAAQ,IAAI;AAEjD,QAAM,gBAAgB,MAAM,YAAY,UAAU,OAAO,OAAK,EAAE,SAAS,MAAM,EAAE,IAAI,OAAK,EAAE,GAAG;AAC/F,MAAI,cAAc,WAAW,GAAG;AAC5B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,QAAM,UAAU,cAAc,OAAO,OAAK,CAAC,WAAW,CAAC,CAAC;AACxD,MAAI,QAAQ,SAAS,GAAG;AACpB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,8BAA8B,QAAQ,KAAK,IAAI,CAAC;AAAA,MACxD,UAAU;AAAA,MACV,SAAS,sBAAsB,cAAc,MAAM,cAAc,QAAQ,MAAM;AAAA,IACnF;AAAA,EACJ;AAEA,MAAI;AAEA,UAAM,EAAE,QAAQ,OAAO,QAAQ,MAAM,IAAI,MAAM,KAAK,qBAAqB;AAAA,MACrE,KAAK;AAAA,MACL,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,IAC3B,CAAC;AACD,UAAM,YAAY,SAAS,OAAO,SAAS;AAC3C,QAAI,gBAAgB,KAAK,QAAQ,KAAK,mBAAmB,KAAK,QAAQ,GAAG;AACrE,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,SAAS,MAAM,IAAK;AAAA,QAC7B,WAAW;AAAA,MACf;AAAA,IACJ;AAAA,EACJ,SAAS,KAAK;AAEV,UAAM,MAAO,IAA8D,UACnE,IAA4B,UAC5B,IAAc;AACtB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,OAAO,GAAG,EAAE,MAAM,IAAK;AAAA,MAChC,WAAW;AAAA,IACf;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,qBAAqB,cAAc,MAAM;AAAA,IACjD,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW;AAAA,EACf;AACJ;AAEA,SAAS,eAAe,OAA8C;AAClE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAOA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAKA,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,QAAM,YAAY,KAAK,MAAM,qBAAqB,KAAK,CAAC,GAAG;AAC3D,QAAM,YAAY,KAAK,MAAM,UAAU,KAAK,CAAC,GAAG;AAChD,QAAM,eAAe,KAAK,MAAM,gEAAgE,KAAK,CAAC,GAAG;AACzG,QAAM,gBAAgB,KAAK,MAAM,6DAA6D,KAAK,CAAC,GAAG;AACvG,QAAM,UAAU,WAAW,WAAW;AACtC,QAAM,gBAAgB,MAAM,YAAY,UAAU;AAGlD,MAAI,KAAK,SAAS,KAAK;AACnB,QAAI,iBAAiB,KAAK,gBAAgB,KAAK,MAAM,YAAY,cAAc,KAAK;AAChF,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ,oBAAoB,KAAK,MAAM,WAAW,aAAa,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,QAClI,UAAU;AAAA,QACV,YAAY,MAAM,YAAY,aAAa;AAAA,QAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,MACzD;AAAA,IACJ;AACA,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAEA,MAAI,UAAU,KAAK,gBAAgB,GAAG;AAClC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,gCAAgC,OAAO;AAAA,MAC/C,UAAU;AAAA,MACV,SAAS,QAAQ,QAAQ,SAAS,QAAQ,gBAAgB,WAAW;AAAA,IACzE;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,GAAG,OAAO,oBAAoB,aAAa,eAAe,KAAK,MAAM;AAAA,IAC7E,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,eAAe,YAAY,OAAuD;AAC9E,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO,EAAE,QAAQ,OAAO,QAAQ,sBAAsB,UAAU,cAAc;AAAA,EAClF;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAGA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AACA,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,KAAK,MAAM;AAAA,MACvC,UAAU;AAAA,IACd;AAAA,EACJ;AAIA,QAAM,aAAa,MAAM,YAAY,cAAc;AACnD,MAAI,aAAa,KAAK;AAClB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,4BAA4B,WAAW,QAAQ,CAAC,CAAC;AAAA,MACzD,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,SAAS,KAAK,MAAM,sBAAsB,WAAW,QAAQ,CAAC,CAAC;AAAA,IACvE,UAAU;AAAA,IACV;AAAA,IACA,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,SAAS,eAAe,OAA8C;AAClE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,gBAAgB,IAAI,GAAG;AACvB,WAAO,EAAE,QAAQ,OAAO,QAAQ,sBAAsB,UAAU,iBAAiB;AAAA,EACrF;AAEA,MAAI,mBAAmB,IAAI,GAAG;AAC1B,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,MACV,SAAS,oBAAoB,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACnD;AAAA,EACJ;AAKA,MAAI,MAAM,YAAY,WAAW,UAC1B,MAAM,YAAY,cAAc,SAC/B,MAAM,YAAY,WAAW,UAAU,MAAM,GAAG;AACpD,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,oBAAoB,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC,OAAO,MAAM,YAAY,UAAU,MAAM;AAAA,MAC5G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAWA,QAAM,gBAAgB,MAAM,YAAY,WAAW,UAAU;AAC7D,MAAI,iBAAiB,KAAK,MAAM,YAAY,WAAW,UAAU,MAAM,YAAY,cAAc,KAAK;AAClG,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,qBAAqB,aAAa,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MAC7G,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAEA,QAAM,qBAAqB,mGAAmG,KAAK,IAAI;AACvI,QAAM,eAAe,KAAK,MAAM,gBAAgB,KAAK,CAAC,GAAG;AACzD,QAAM,gBAAgB,KAAK,MAAM,gEAAgE,KAAK,CAAC,GAAG;AAC1G,QAAM,eAAe,sBAAsB,eAAe,KAAK,gBAAgB;AAE/E,MAAI,KAAK,SAAS,IAAI;AAClB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,uBAAuB,KAAK,MAAM;AAAA,MAC1C,UAAU;AAAA,IACd;AAAA,EACJ;AAGA,MAAI,KAAK,SAAS,OAAO,MAAM,YAAY,cAAc,OAAO,MAAM,YAAY,WAAW,QAAQ;AACjG,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,YAAY,KAAK,MAAM,4BAA4B,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MAClG,UAAU;AAAA,MACV,YAAY,MAAM,YAAY,aAAa;AAAA,MAC3C,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,IACzD;AAAA,EACJ;AAGA,MAAI,CAAC,cAAc;AACf,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,YAAY,KAAK,MAAM,4CAA4C,kBAAkB,YAAY,WAAW,YAAY,YAAY;AAAA,IAC5I,UAAU;AAAA,IACV,YAAY;AAAA,IACZ,WAAW,MAAM,YAAY,UAAU,IAAI,OAAK,EAAE,GAAG;AAAA,EACzD;AACJ;AAEA,eAAe,YAAY,OAAuD;AAI9E,MAAI,MAAM,YAAY,WAAW,QAAQ;AACrC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,kBAAkB,MAAM,YAAY,MAAM;AAAA,MAClD,UAAU;AAAA,IACd;AAAA,EACJ;AACA,MAAI,MAAM,uBAAuB;AAC7B,UAAM,KAAK,IAAI,OAAO,MAAM,qBAAqB;AACjD,QAAI,CAAC,GAAG,KAAK,MAAM,YAAY,WAAW,GAAG;AACzC,aAAO;AAAA,QACH,QAAQ;AAAA,QACR,QAAQ,yCAAyC,MAAM,qBAAqB;AAAA,QAC5E,UAAU;AAAA,QACV,SAAS,MAAM,YAAY,YAAY,MAAM,GAAG,GAAG;AAAA,MACvD;AAAA,IACJ;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,YAAY;AAAA,EAChB;AACJ;AAEA,SAAS,aAAa,OAA8C;AAChE,QAAM,OAAO,MAAM,YAAY,aAAa,MAAM,YAAY;AAC9D,MAAI,KAAK,SAAS,KAAK;AACnB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,qBAAqB,KAAK,MAAM;AAAA,MACxC,UAAU;AAAA,IACd;AAAA,EACJ;AACA,QAAM,WAAW,CAAC,QAAQ,WAAW,UAAU;AAC/C,QAAM,UAAU,SAAS,OAAO,OAAK,CAAC,KAAK,YAAY,EAAE,SAAS,CAAC,CAAC;AACpE,MAAI,QAAQ,SAAS,GAAG;AACpB,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,gCAAgC,QAAQ,KAAK,IAAI,CAAC;AAAA,MAC1D,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ,UAAU,KAAK,MAAM;AAAA,IAC7B,UAAU;AAAA,IACV,YAAY;AAAA,EAChB;AACJ;AAIA,SAAS,aAAa,OAA8C;AAChE,MAAI,MAAM,YAAY,WAAW,QAAQ;AACrC,WAAO,EAAE,QAAQ,OAAO,QAAQ,uBAAuB,MAAM,YAAY,MAAM,IAAI,UAAU,eAAe;AAAA,EAChH;AACA,MAAI,MAAM,YAAY,eAAe,UAAa,MAAM,YAAY,aAAa,KAAK;AAClF,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,wCAAwC,MAAM,YAAY,WAAW,QAAQ,CAAC,CAAC;AAAA,MACvF,UAAU;AAAA,IACd;AAAA,EACJ;AACA,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,YAAY,MAAM,YAAY,cAAc;AAAA,EAChD;AACJ;AAmBA,SAAS,kBAA2B;AAChC,QAAM,OAAO,QAAQ,IAAI,0BAA0B,IAAI,YAAY,EAAE,KAAK;AAC1E,MAAI,QAAQ,OAAO,QAAQ,WAAW,QAAQ,QAAQ,QAAQ,MAAO,QAAO;AAC5E,SAAO;AACX;AAEA,eAAe,eACX,OACA,YAC2B;AAC3B,MAAI,MAAM,SAAS,SAAU,QAAO;AACpC,MAAI,CAAC,gBAAgB,EAAG,QAAO;AAE/B,MAAI;AACA,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,eAAe;AACtD,UAAM,aAAa,MAAM,YAAY,aAAa,MAAM,YAAY,eAAe,IAAI,MAAM,GAAG,IAAI;AACpG,UAAM,eAAe,MAAM,YAAY,WAAW,SAC5C;AAAA,sBAAyB,MAAM,YAAY,UAAU,IAAI,OAAK,GAAG,EAAE,IAAI,IAAI,EAAE,GAAG,EAAE,EAAE,KAAK,IAAI,CAAC,KAC9F;AACN,UAAM,cAAc;AAAA,MAChB;AAAA,MACA;AAAA,MACA;AAAA,MACA,YAAY,MAAM,QAAQ,KAAK;AAAA,MAC/B,kBAAkB,MAAM,QAAQ,WAAW;AAAA,MAC3C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,uBAAuB,MAAM,IAAI,kCAAkC,WAAW,MAAM;AAAA,MACpF;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACJ,EAAE,KAAK,IAAI;AAEX,UAAM,cAAc,MAAM,cAAc;AAAA,MACpC,MAAM;AAAA,MACN,MAAM;AAAA,MACN,MAAM;AAAA,MACN,WAAW;AAAA,IACf,CAAC;AACD,UAAM,OAAO,YAAY,WAAW,IAAI,KAAK;AAC7C,UAAM,YAAY,IAAI,QAAQ,GAAG;AACjC,UAAM,UAAU,IAAI,YAAY,GAAG;AACnC,QAAI,YAAY,KAAK,WAAW,WAAW;AACvC,aAAO,KAAK,WAAW,4DAA4D;AACnF,aAAO;AAAA,IACX;AACA,UAAM,SAAS,KAAK,MAAM,IAAI,MAAM,WAAW,UAAU,CAAC,CAAC;AAC3D,QAAI,OAAO,OAAO,WAAW,UAAW,QAAO;AAC/C,QAAI,OAAO,OAAQ,QAAO;AAC1B,UAAM,cAAc,OAAO,OAAO,WAAW,YAAY,OAAO,OAAO,KAAK,EAAE,SAAS,IACjF,OAAO,OAAO,KAAK,EAAE,MAAM,GAAG,GAAG,IACjC;AACN,WAAO,KAAK,WAAW,kEAA6D,WAAW,EAAE;AACjG,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,cAAc,WAAW;AAAA,MACjC,UAAU,GAAG,MAAM,IAAI;AAAA,MACvB,YAAY,WAAW;AAAA,MACvB,SAAS,aAAa,MAAM,IAAI,aAAa,WAAW,MAAM;AAAA,IAClE;AAAA,EACJ,SAAS,KAAK;AACV,WAAO,KAAK,WAAW,4CAA6C,IAAc,OAAO,EAAE;AAC3F,WAAO;AAAA,EACX;AACJ;AAIA,eAAsB,aAAa,OAAuD;AACtF,MAAI;AACJ,MAAI;AACA,YAAQ,MAAM,MAAM;AAAA,MAChB,KAAK;AAAY,qBAAa,MAAM,WAAW,KAAK;AAAO;AAAA,MAC3D,KAAK;AAAY,qBAAa,eAAe,KAAK;AAAS;AAAA,MAC3D,KAAK;AAAY,qBAAa,MAAM,YAAY,KAAK;AAAM;AAAA,MAC3D,KAAK;AAAY,qBAAa,eAAe,KAAK;AAAS;AAAA,MAC3D,KAAK;AAAY,qBAAa,aAAa,KAAK;AAAW;AAAA,MAC3D,KAAK;AAAY,qBAAa,MAAM,YAAY,KAAK;AAAM;AAAA,MAC3D,KAAK;AAAY,qBAAa,aAAa,KAAK;AAAW;AAAA,MAC3D;AACI,eAAO,EAAE,QAAQ,OAAO,QAAQ,iBAAiB,MAAM,IAAI,IAAI,UAAU,WAAW;AAAA,IAC5F;AAAA,EACJ,SAAS,KAAK;AACV,WAAO,KAAK,WAAW,mBAAoB,IAAc,OAAO,EAAE;AAClE,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,QAAQ,mBAAoB,IAAc,OAAO;AAAA,MACjD,UAAU,GAAG,MAAM,IAAI;AAAA,IAC3B;AAAA,EACJ;AAGA,MAAI,WAAW,QAAQ;AACnB,WAAO,MAAM,eAAe,OAAO,UAAU;AAAA,EACjD;AACA,SAAO;AACX;AAGO,SAAS,oBAAoB,MAAc,WAAW,KAAuB;AAChF,MAAI;AACA,QAAI,CAAC,WAAW,IAAI,EAAG,QAAO;AAC9B,UAAM,UAAU,aAAa,MAAM,OAAO;AAC1C,WAAO,QAAQ,SAAS,WAAW,QAAQ,MAAM,GAAG,QAAQ,IAAI,sBAAsB;AAAA,EAC1F,QAAQ;AAAE,WAAO;AAAA,EAAM;AAC3B;","names":[]}

package/dist/gateway/server.js CHANGED Viewed

@@ -1545,6 +1545,57 @@ async function startGateway(options) {
       res.status(500).json({ error: "mission_digest_failed", message: err.message });
     }
   });
+  app.get("/api/mission/:id", async (req, res) => {
+    try {
+      const goalId = String(req.params.id || "").trim();
+      if (!goalId) {
+        res.status(400).json({ error: "missing_goal_id" });
+        return;
+      }
+      const { getGoal } = await import("../agent/goals.js");
+      const { getDriverState } = await import("../agent/goalDriver.js");
+      const goal = getGoal(goalId);
+      if (!goal) {
+        res.status(404).json({ error: "goal_not_found" });
+        return;
+      }
+      const state = getDriverState(goalId);
+      res.json({
+        goal: {
+          id: goal.id,
+          title: goal.title,
+          description: goal.description,
+          status: goal.status,
+          priority: goal.priority,
+          progress: goal.progress,
+          totalCost: goal.totalCost,
+          createdAt: goal.createdAt,
+          completedAt: goal.completedAt,
+          tags: goal.tags ?? [],
+          subtasks: (goal.subtasks ?? []).map((s) => ({
+            id: s.id,
+            title: s.title,
+            description: s.description,
+            status: s.status,
+            dependsOn: s.dependsOn ?? [],
+            retries: s.retries,
+            completedAt: s.completedAt
+          }))
+        },
+        driver: state ? {
+          phase: state.phase,
+          startedAt: state.startedAt,
+          currentSubtaskId: state.currentSubtaskId ?? null,
+          budget: state.budget,
+          blockedReason: state.blockedReason ?? null,
+          historyTail: (state.history ?? []).slice(-10),
+          subtaskStates: state.subtaskStates
+        } : null
+      });
+    } catch (err) {
+      res.status(500).json({ error: "mission_get_failed", message: err.message });
+    }
+  });
   app.post("/api/mission/:id/cancel", async (req, res) => {
     try {
       const goalId = String(req.params.id || "").trim();