@tangle-network/agent-runtime 0.46.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +1 -1
- package/dist/analyst-loop.d.ts +1 -1
- package/dist/{chunk-I42NHLKX.js → chunk-5YDS7BLC.js} +11 -6
- package/dist/chunk-5YDS7BLC.js.map +1 -0
- package/dist/{chunk-65FQLI4V.js → chunk-72JQCHOZ.js} +232 -3
- package/dist/chunk-72JQCHOZ.js.map +1 -0
- package/dist/{chunk-GN75RGM6.js → chunk-MGFEUYOH.js} +3 -3
- package/dist/{chunk-KPN7OQ64.js → chunk-T4OQQEE3.js} +2 -2
- package/dist/{chunk-KPN7OQ64.js.map → chunk-T4OQQEE3.js.map} +1 -1
- package/dist/{coder-DCWFQpmJ.d.ts → coder-CVZNGbyg.d.ts} +1 -1
- package/dist/{driver-C-mtBo7h.d.ts → driver-DYU2sgHr.d.ts} +1 -1
- package/dist/index.d.ts +7 -7
- package/dist/index.js +3 -3
- package/dist/{kb-gate-2Gwpz_27.d.ts → kb-gate-51BlLlVM.d.ts} +8 -2
- package/dist/{loop-runner-bin-D-K6bRp3.d.ts → loop-runner-bin-DEm4roYF.d.ts} +4 -4
- package/dist/loop-runner-bin.d.ts +5 -5
- package/dist/loop-runner-bin.js +3 -3
- package/dist/loops.d.ts +5 -5
- package/dist/loops.js +9 -1
- package/dist/mcp/bin.js +3 -3
- package/dist/mcp/index.d.ts +71 -70
- package/dist/mcp/index.js +199 -27
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-nurzFwuJ.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
- package/dist/profiles.d.ts +2 -2
- package/dist/{run-loop-CU2Y00Si.d.ts → run-loop-DvD4aGiE.d.ts} +1 -1
- package/dist/runtime.d.ts +96 -13
- package/dist/runtime.js +9 -1
- package/dist/{types-BfoeiQRZ.d.ts → types-Cbx3dNK5.d.ts} +4 -4
- package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} +17 -0
- package/dist/workflow.d.ts +2 -2
- package/dist/workflow.js +1 -1
- package/package.json +24 -13
- package/skills/loop-writer/SKILL.md +163 -0
- package/dist/chunk-65FQLI4V.js.map +0 -1
- package/dist/chunk-I42NHLKX.js.map +0 -1
- /package/dist/{chunk-GN75RGM6.js.map → chunk-MGFEUYOH.js.map} +0 -0
package/dist/agent.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
|
|
2
2
|
import { TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
|
|
3
|
-
import { R as RuntimeStreamEvent, S as SandboxClient, O as OutputAdapter, A as AgentRunSpec } from './types-
|
|
3
|
+
import { R as RuntimeStreamEvent, S as SandboxClient, O as OutputAdapter, A as AgentRunSpec } from './types-nBMuollC.js';
|
|
4
4
|
import { A as AgentSurfaces } from './improvement-adapter-BC4HhuAR.js';
|
|
5
5
|
export { C as CreateSurfaceImprovementAdapterOpts, D as DraftPatchInput, a as DraftPatchOutput, R as ResolvedSurface, S as SurfaceImprovementEdit, b as SurfaceValidationIssue, c as createSurfaceImprovementAdapter, r as renderSurfaceIssues, d as resolveSubjectPath, v as validateSurfaces } from './improvement-adapter-BC4HhuAR.js';
|
|
6
6
|
import { K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
|
package/dist/agent.js
CHANGED
package/dist/analyst-loop.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { TraceAnalystByteBudgets, TraceAnalysisStore } from '@tangle-network/agent-eval';
|
|
2
|
-
import { I as Iteration } from './types-
|
|
2
|
+
import { I as Iteration } from './types-nBMuollC.js';
|
|
3
3
|
import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
|
|
4
4
|
export { A as AnalystLoopEvent, b as AnalystRegistryLike, c as AnalystRegistryStreamingLike, d as AutoApplyPolicy, F as FindingsStoreLike, I as ImprovementAdapter, e as ImprovementEditBatch, f as ImprovementReport, K as KnowledgeAdapter, g as KnowledgeProposalBatch, h as KnowledgeReport } from './types-p8dWBIXL.js';
|
|
5
5
|
import '@tangle-network/sandbox';
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
} from "./chunk-KADIJAD4.js";
|
|
5
5
|
import {
|
|
6
6
|
runLoop
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-72JQCHOZ.js";
|
|
8
8
|
|
|
9
9
|
// src/mcp/executor.ts
|
|
10
10
|
function createSiblingSandboxExecutor(options) {
|
|
@@ -91,7 +91,11 @@ function createDefaultCoderDelegate(options) {
|
|
|
91
91
|
const variants = Math.max(1, Math.trunc(args.variants ?? 1));
|
|
92
92
|
ctx.report({ iteration: 0, phase: "starting" });
|
|
93
93
|
if (variants <= 1) {
|
|
94
|
-
const { agentRunSpec, output, validator } = coderProfile({
|
|
94
|
+
const { agentRunSpec, output, validator } = coderProfile({
|
|
95
|
+
task,
|
|
96
|
+
...options.harness ? { harness: options.harness } : {},
|
|
97
|
+
...options.model ? { model: options.model } : {}
|
|
98
|
+
});
|
|
95
99
|
const result2 = await runLoop({
|
|
96
100
|
driver: singleShotDriver,
|
|
97
101
|
agentRun: agentRunSpec,
|
|
@@ -113,9 +117,10 @@ function createDefaultCoderDelegate(options) {
|
|
|
113
117
|
ctx.report({ iteration: 1, phase: "completed" });
|
|
114
118
|
return chosen2;
|
|
115
119
|
}
|
|
116
|
-
const fanout = multiHarnessCoderFanout(
|
|
117
|
-
fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : {
|
|
118
|
-
|
|
120
|
+
const fanout = multiHarnessCoderFanout({
|
|
121
|
+
...fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : {},
|
|
122
|
+
...options.fanoutModels ? { models: options.fanoutModels.slice(0, variants) } : {}
|
|
123
|
+
});
|
|
119
124
|
const agentRuns = fanout.agentRuns.slice(0, variants);
|
|
120
125
|
const result = await runLoop({
|
|
121
126
|
driver: fanout.driver,
|
|
@@ -210,4 +215,4 @@ export {
|
|
|
210
215
|
createFleetWorkspaceExecutor,
|
|
211
216
|
createDefaultCoderDelegate
|
|
212
217
|
};
|
|
213
|
-
//# sourceMappingURL=chunk-
|
|
218
|
+
//# sourceMappingURL=chunk-5YDS7BLC.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/mcp/executor.ts","../src/mcp/delegates.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Delegation executors — the layer between MCP delegates and the sandbox\n * substrate. Each executor exposes a {@link SandboxClient} the kernel\n * consumes plus a placement tag so the trace pipeline can correlate workers\n * with their physical placement.\n *\n * Two implementations ship in-box:\n *\n * - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh\n * sandbox sibling to the caller. Default when the MCP server runs as a\n * standalone CLI mounted outside a fleet.\n *\n * - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines\n * in the caller's existing fleet so worker diffs land directly on the\n * caller's filesystem (the fleet's shared workspace). Selected when the\n * parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.\n */\n\nimport type { CreateSandboxOptions, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxPlacement, SandboxClient } from '../runtime'\n\n/** @experimental */\nexport interface DelegationExecutor {\n /** Sandbox client the kernel calls. Returned with `describePlacement` set. */\n readonly client: SandboxClient\n /** Best-effort one-liner used in stderr boot logs and diagnostics. */\n describe(): string\n}\n\n/** @experimental */\nexport interface SiblingSandboxExecutorOptions {\n client: SandboxClient\n}\n\n/**\n * Wrap a raw sandbox SDK client so the kernel emits\n * `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.\n *\n * The returned client `.create()` delegates to the underlying client; the\n * only added behavior is a `describePlacement` tag the kernel reads.\n *\n * @experimental\n */\nexport function createSiblingSandboxExecutor(\n options: SiblingSandboxExecutorOptions,\n): DelegationExecutor {\n const underlying = options.client\n const client: SandboxClient = {\n create(opts?: CreateSandboxOptions): Promise<SandboxInstance> {\n return underlying.create(opts)\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n return { kind: 'sibling', sandboxId: readId(box) }\n },\n }\n return {\n client,\n describe(): string {\n return 'sibling-sandbox (each delegation = fresh sandbox via client.create)'\n },\n }\n}\n\n/**\n * Minimal `SandboxFleet` surface the fleet executor calls. Declared\n * structurally so tests can pass an in-memory stub without instantiating the\n * sandbox SDK.\n *\n * @experimental\n */\nexport interface FleetHandle {\n readonly fleetId: string\n /** Machine ids in dispatch-eligible order. The executor round-robins. */\n readonly ids: ReadonlyArray<string>\n /** Resolve a machine id to its `SandboxInstance` — that machine is mounted\n * on the fleet's shared workspace, so any diff the worker writes lands on\n * every other fleet machine's filesystem too. */\n sandbox(machineId: string): Promise<SandboxInstance>\n}\n\n/** @experimental */\nexport interface FleetWorkspaceExecutorOptions {\n fleet: FleetHandle\n /**\n * Override the machine-selection policy. Default = round-robin across\n * `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the\n * coordinator machine the MCP server is running on).\n */\n selectMachine?: (call: { callIndex: number; ids: ReadonlyArray<string> }) => string\n /**\n * Machine ids to skip during default round-robin. Set to the caller's own\n * machineId so workers don't compete with the orchestrator on the same VM.\n */\n excludeMachineIds?: ReadonlyArray<string>\n}\n\n/**\n * Build an executor that resolves each delegated iteration to an existing\n * machine in `fleet`. The fleet's shared-workspace policy means the worker\n * machine sees the caller's filesystem — diffs land in-place with no\n * cross-sandbox copy step.\n *\n * @experimental\n */\nexport function createFleetWorkspaceExecutor(\n options: FleetWorkspaceExecutorOptions,\n): DelegationExecutor {\n const fleet = options.fleet\n const exclude = new Set(options.excludeMachineIds ?? [])\n let callIndex = 0\n // machineId-by-sandboxId, populated as we resolve machines so\n // `describePlacement` can recover the assignment from the SandboxInstance\n // the kernel hands back.\n const placementBySandboxId = new Map<string, { machineId: string }>()\n\n const client: SandboxClient = {\n async create(): Promise<SandboxInstance> {\n const ids = fleet.ids.filter((id) => !exclude.has(id))\n if (ids.length === 0) {\n throw new Error(\n `agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(',')}], excluded=[${[...exclude].join(',')}])`,\n )\n }\n const selector = options.selectMachine\n const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length]\n callIndex += 1\n if (typeof machineId !== 'string' || machineId.length === 0) {\n throw new Error('agent-runtime: fleet executor selectMachine returned an empty machine id')\n }\n const box = await fleet.sandbox(machineId)\n const sandboxId = readId(box)\n if (sandboxId) placementBySandboxId.set(sandboxId, { machineId })\n return box\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n const sandboxId = readId(box)\n const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : undefined\n return {\n kind: 'fleet',\n sandboxId,\n fleetId: fleet.fleetId,\n machineId: recorded?.machineId,\n }\n },\n }\n\n return {\n client,\n describe(): string {\n const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(',')}])` : ''\n return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(',')}]${excluded})`\n },\n }\n}\n\nfunction readId(box: SandboxInstance): string | undefined {\n const raw = (box as unknown as { id?: unknown }).id\n return typeof raw === 'string' && raw.length > 0 ? raw : undefined\n}\n","/**\n * @experimental\n *\n * Delegate factories — the layer between MCP tool handlers and the\n * underlying `runLoop` runners.\n *\n * The MCP server is profile-agnostic: it owns the task queue + feedback\n * store + transport. Each `*Delegate` is the closure that the queue\n * invokes when a task runs. Consumers can override either delegate to\n * inject custom drivers, mocks, fleet-aware dispatchers, etc.\n *\n * The default coder delegate is wired here because we own\n * `coderProfile` / `multiHarnessCoderFanout`. The default researcher\n * delegate is **not** wired in this file — `agent-knowledge` cannot be\n * imported from `agent-runtime` without inducing a cycle. Consumers\n * pass `researcherDelegate` explicitly when constructing the server.\n */\n\nimport { type CoderOutput, coderProfile, multiHarnessCoderFanout } from '../profiles/coder'\nimport type { Iteration, LoopTraceEmitter, SandboxClient } from '../runtime'\nimport { runLoop } from '../runtime'\nimport { createSiblingSandboxExecutor, type DelegationExecutor } from './executor'\nimport type {\n CoderTask,\n DelegateCodeArgs,\n DelegateResearchArgs,\n DelegateUiAuditArgs,\n DelegationProgress,\n ResearchOutputShape,\n UiAuditorDelegationOutput,\n} from './types'\n\n/** @experimental */\nexport interface DelegateRunCtx {\n signal: AbortSignal\n report(progress: DelegationProgress): void\n}\n\n/** @experimental */\nexport type CoderDelegate = (\n args: DelegateCodeArgs,\n ctx: DelegateRunCtx,\n) => Promise<import('../profiles/coder').CoderOutput>\n\n/** @experimental */\nexport type ResearcherDelegate = (\n args: DelegateResearchArgs,\n ctx: DelegateRunCtx,\n) => Promise<ResearchOutputShape>\n\n/**\n * UI-auditor delegate — fully consumer-injected. agent-runtime ships no\n * default factory because the inputs are workspace path + judge function\n * + (optionally) a `SandboxClient`, and the judge is the consumer's\n * model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in\n * `@tangle-network/agent-runtime/profiles` for the canonical wiring.\n *\n * @experimental\n */\nexport type UiAuditorDelegate = (\n args: DelegateUiAuditArgs,\n ctx: DelegateRunCtx,\n) => Promise<UiAuditorDelegationOutput>\n\n/** @experimental Structured review verdict over a coder candidate. */\nexport interface CoderReview {\n /** Gate: only approved candidates are eligible to win. */\n approved: boolean\n /** Reviewer's recommendation — surfaced in traces. */\n recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject'\n /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */\n readiness: number\n notes?: string\n}\n\n/**\n * @experimental\n *\n * Optional adversarial reviewer over a coder candidate that already passed\n * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded\n * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to\n * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM\n * judge, a `pnpm review` command, anything returning a `CoderReview`.\n */\nexport type CoderReviewer = (\n output: import('../profiles/coder').CoderOutput,\n task: CoderTask,\n ctx: { signal: AbortSignal },\n) => Promise<CoderReview> | CoderReview\n\n/**\n * @experimental Winner-selection strategy among validated (+ reviewed)\n * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`\n * (the kernel's behavior — preserves backward compatibility).\n */\nexport type CoderWinnerSelection =\n | 'highest-score'\n | 'smallest-diff'\n | 'highest-readiness'\n | 'first-approved'\n\n/** @experimental */\nexport interface CreateDefaultCoderDelegateOptions {\n /**\n * Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)\n * to control where worker iterations land. `sandboxClient` is a\n * convenience shorthand that wraps the client in a sibling executor — pass\n * one or the other, not both.\n */\n executor?: DelegationExecutor\n /**\n * Convenience shorthand for sibling placement. Equivalent to\n * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.\n */\n sandboxClient?: SandboxClient\n /** Backend harness for the single-coder path. Default comes from `coderProfile`. */\n harness?: string\n /** Model override for the single-coder path. */\n model?: string\n /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */\n fanoutHarnesses?: string[]\n /** Optional per-harness model override for `variants > 1`. */\n fanoutModels?: (string | undefined)[]\n /** Hard cap on the kernel's per-batch concurrency. Default 4. */\n maxConcurrency?: number\n /**\n * Optional adversarial reviewer. When set, a candidate must pass mechanical\n * validation AND `reviewer.approved` to be eligible to win — empty/secret/\n * test-failing patches are already gone; this catches the \"compiles + passes\n * but wrong/unsafe\" class the deterministic validator can't see.\n */\n reviewer?: CoderReviewer\n /** Winner-selection strategy among eligible candidates. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n /**\n * Loop trace emitter forwarded into every delegated `runLoop`. Wire\n * `createPropagatingTraceEmitter(readTraceContextFromEnv())` here (the bin\n * does) so delegated build-loops export their topology spans to the OTLP /\n * Tangle Intelligence sink when `OTEL_EXPORTER_OTLP_ENDPOINT` is set — and\n * are a cheap no-op when it isn't. Configurable by construction.\n */\n traceEmitter?: LoopTraceEmitter\n}\n\n/**\n * Build a coder delegate that drives `runLoop` against the project's\n * sandbox client + coder profile. When `args.variants > 1` it switches\n * to the multi-harness fanout topology.\n *\n * @experimental\n */\nexport function createDefaultCoderDelegate(\n options: CreateDefaultCoderDelegateOptions,\n): CoderDelegate {\n const executor = resolveExecutor(options)\n const sandboxClient = executor.client\n const fanoutHarnesses = options.fanoutHarnesses\n const maxConcurrency = options.maxConcurrency ?? 4\n const traceEmitter = options.traceEmitter\n return async (args, ctx) => {\n const task: CoderTask = {\n goal: buildCoderGoal(args),\n repoRoot: args.repoRoot,\n testCmd: args.config?.testCmd,\n typecheckCmd: args.config?.typecheckCmd,\n forbiddenPaths: args.config?.forbiddenPaths,\n maxDiffLines: args.config?.maxDiffLines,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const { agentRunSpec, output, validator } = coderProfile({\n task,\n ...(options.harness ? { harness: options.harness } : {}),\n ...(options.model ? { model: options.model } : {}),\n })\n const result = await runLoop({\n driver: singleShotDriver,\n agentRun: agentRunSpec,\n output,\n validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: 1,\n maxConcurrency,\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: 1, phase: 'completed' })\n return chosen\n }\n const fanout = multiHarnessCoderFanout({\n ...(fanoutHarnesses && fanoutHarnesses.length > 0\n ? { harnesses: fanoutHarnesses.slice(0, variants) }\n : {}),\n ...(options.fanoutModels ? { models: options.fanoutModels.slice(0, variants) } : {}),\n })\n const agentRuns = fanout.agentRuns.slice(0, variants)\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns,\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: agentRuns.length, phase: 'completed' })\n return chosen\n }\n}\n\ninterface PickCoderWinnerArgs {\n iterations: ReadonlyArray<Iteration<CoderTask, CoderOutput>>\n reviewer: CoderReviewer | undefined\n selection: CoderWinnerSelection\n task: CoderTask\n signal: AbortSignal\n}\n\ninterface CoderCandidate {\n index: number\n output: CoderOutput\n score: number\n readiness: number\n}\n\n/**\n * Pick the winning coder candidate from a finished loop's iterations:\n * 1. keep only mechanically-VALID candidates (the validator already gated\n * tests/typecheck/forbidden/diff/no-op/secrets),\n * 2. if a `reviewer` is wired, keep only those it APPROVES,\n * 3. select among survivors by the chosen strategy.\n * Returns `undefined` when nothing survives — the delegate fails loud.\n */\nasync function pickCoderWinner(args: PickCoderWinnerArgs): Promise<CoderOutput | undefined> {\n const valid: CoderCandidate[] = []\n for (const iter of args.iterations) {\n if (iter.output === undefined || iter.error || iter.verdict?.valid !== true) continue\n valid.push({\n index: iter.index,\n output: iter.output,\n score: iter.verdict.score ?? 0,\n readiness: iter.verdict.score ?? 0,\n })\n }\n if (valid.length === 0) return undefined\n\n let eligible = valid\n if (args.reviewer) {\n eligible = []\n for (const c of valid) {\n const review = await args.reviewer(c.output, args.task, { signal: args.signal })\n if (review.approved) eligible.push({ ...c, readiness: review.readiness })\n }\n if (eligible.length === 0) return undefined\n }\n\n return selectCoderCandidate(eligible, args.selection).output\n}\n\n/** Apply the winner-selection strategy; ties broken by earliest iteration. */\nfunction selectCoderCandidate(\n candidates: CoderCandidate[],\n selection: CoderWinnerSelection,\n): CoderCandidate {\n const diffLines = (c: CoderCandidate) =>\n c.output.diffStats.insertions + c.output.diffStats.deletions\n const sorted = [...candidates].sort((a, b) => {\n switch (selection) {\n case 'smallest-diff':\n return diffLines(a) - diffLines(b) || a.index - b.index\n case 'highest-readiness':\n return b.readiness - a.readiness || a.index - b.index\n case 'first-approved':\n return a.index - b.index\n default:\n return b.score - a.score || a.index - b.index\n }\n })\n return sorted[0]!\n}\n\nfunction noWinnerMessage(reviewer: CoderReviewer | undefined): string {\n return reviewer\n ? 'coder delegate: no candidate passed validation + review'\n : 'coder delegate: no candidate passed validation'\n}\n\nfunction buildCoderGoal(args: DelegateCodeArgs): string {\n if (!args.contextHint) return args.goal\n return [args.goal, '', '## Context', args.contextHint].join('\\n')\n}\n\nfunction resolveExecutor(options: CreateDefaultCoderDelegateOptions): DelegationExecutor {\n if (options.executor && options.sandboxClient) {\n throw new Error('createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`')\n }\n if (options.executor) return options.executor\n if (options.sandboxClient) {\n return createSiblingSandboxExecutor({ client: options.sandboxClient })\n }\n throw new Error('createDefaultCoderDelegate: `executor` or `sandboxClient` is required')\n}\n\n/**\n * Single-shot driver — plan one task on iteration 0, stop after one\n * iteration. Used by the coder delegate when `variants <= 1`. Keeps the\n * runLoop kernel-level accounting (timing, cost, trace emission) while\n * skipping fanout/refine topology overhead.\n */\nconst singleShotDriver = {\n name: 'mcp-single-shot',\n async plan<Task>(task: Task, history: ReadonlyArray<unknown>): Promise<Task[]> {\n return history.length === 0 ? [task] : []\n },\n decide(history: ReadonlyArray<unknown>): 'pick-winner' | 'fail' {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n}\n"],"mappings":";;;;;;;;;AA6CO,SAAS,6BACd,SACoB;AACpB,QAAM,aAAa,QAAQ;AAC3B,QAAM,SAAwB;AAAA,IAC5B,OAAO,MAAuD;AAC5D,aAAO,WAAW,OAAO,IAAI;AAAA,IAC/B;AAAA,IACA,kBAAkB,KAA4C;AAC5D,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,GAAG,EAAE;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,aAAO;AAAA,IACT;AAAA,EACF;AACF;AA2CO,SAAS,6BACd,SACoB;AACpB,QAAM,QAAQ,QAAQ;AACtB,QAAM,UAAU,IAAI,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AACvD,MAAI,YAAY;AAIhB,QAAM,uBAAuB,oBAAI,IAAmC;AAEpE,QAAM,SAAwB;AAAA,IAC5B,MAAM,SAAmC;AACvC,YAAM,MAAM,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;AACrD,UAAI,IAAI,WAAW,GAAG;AACpB,cAAM,IAAI;AAAA,UACR,wBAAwB,MAAM,OAAO,0CAA0C,MAAM,IAAI,KAAK,GAAG,CAAC,gBAAgB,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC;AAAA,QAC1I;AAAA,MACF;AACA,YAAM,WAAW,QAAQ;AACzB,YAAM,YAAY,WAAW,SAAS,EAAE,WAAW,IAAI,CAAC,IAAI,IAAI,YAAY,IAAI,MAAM;AACtF,mBAAa;AACb,UAAI,OAAO,cAAc,YAAY,UAAU,WAAW,GAAG;AAC3D,cAAM,IAAI,MAAM,0EAA0E;AAAA,MAC5F;AACA,YAAM,MAAM,MAAM,MAAM,QAAQ,SAAS;AACzC,YAAM,YAAY,OAAO,GAAG;AAC5B,UAAI,UAAW,sBAAqB,IAAI,WAAW,EAAE,UAAU,CAAC;AAChE,aAAO;AAAA,IACT;AAAA,IACA,kBAAkB,KAA4C;AAC5D,YAAM,YAAY,OAAO,GAAG;AAC5B,YAAM,WAAW,YAAY,qBAAqB,IAAI,SAAS,IAAI;AACnE,aAAO;AAAA,QACL,MAAM;AAAA,QACN;AAAA,QACA,SAAS,MAAM;AAAA,QACf,WAAW,UAAU;AAAA,MACvB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,YAAM,WAAW,QAAQ,OAAO,IAAI,eAAe,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC,OAAO;AAChF,aAAO,4BAA4B,MAAM,OAAO,eAAe,MAAM,IAAI,KAAK,GAAG,CAAC,IAAI,QAAQ;AAAA,IAChG;AAAA,EACF;AACF;AAEA,SAAS,OAAO,KAA0C;AACxD,QAAM,MAAO,IAAoC;AACjD,SAAO,OAAO,QAAQ,YAAY,IAAI,SAAS,IAAI,MAAM;AAC3D;;;ACTO,SAAS,2BACd,SACe;AACf,QAAM,WAAW,gBAAgB,OAAO;AACxC,QAAM,gBAAgB,SAAS;AAC/B,QAAM,kBAAkB,QAAQ;AAChC,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,eAAe,QAAQ;AAC7B,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAkB;AAAA,MACtB,MAAM,eAAe,IAAI;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK,QAAQ;AAAA,MACtB,cAAc,KAAK,QAAQ;AAAA,MAC3B,gBAAgB,KAAK,QAAQ;AAAA,MAC7B,cAAc,KAAK,QAAQ;AAAA,IAC7B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,EAAE,cAAc,QAAQ,UAAU,IAAI,aAAa;AAAA,QACvD;AAAA,QACA,GAAI,QAAQ,UAAU,EAAE,SAAS,QAAQ,QAAQ,IAAI,CAAC;AAAA,QACtD,GAAI,QAAQ,QAAQ,EAAE,OAAO,QAAQ,MAAM,IAAI,CAAC;AAAA,MAClD,CAAC;AACD,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,QACpF,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAAS,MAAM,gBAAgB;AAAA,QACnC,YAAYD,QAAO;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,WAAW,QAAQ,mBAAmB;AAAA,QACtC;AAAA,QACA,QAAQ,IAAI;AAAA,MACd,CAAC;AACD,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS,wBAAwB;AAAA,MACrC,GAAI,mBAAmB,gBAAgB,SAAS,IAC5C,EAAE,WAAW,gBAAgB,MAAM,GAAG,QAAQ,EAAE,IAChD,CAAC;AAAA,MACL,GAAI,QAAQ,eAAe,EAAE,QAAQ,QAAQ,aAAa,MAAM,GAAG,QAAQ,EAAE,IAAI,CAAC;AAAA,IACpF,CAAC;AACD,UAAM,YAAY,OAAO,UAAU,MAAM,GAAG,QAAQ;AACpD,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,MACpF,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,MAAM,gBAAgB;AAAA,MACnC,YAAY,OAAO;AAAA,MACnB,UAAU,QAAQ;AAAA,MAClB,WAAW,QAAQ,mBAAmB;AAAA,MACtC;AAAA,MACA,QAAQ,IAAI;AAAA,IACd,CAAC;AACD,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,QAAI,OAAO,EAAE,WAAW,UAAU,QAAQ,OAAO,YAAY,CAAC;AAC9D,WAAO;AAAA,EACT;AACF;AAyBA,eAAe,gBAAgB,MAA6D;AAC1F,QAAM,QAA0B,CAAC;AACjC,aAAW,QAAQ,KAAK,YAAY;AAClC,QAAI,KAAK,WAAW,UAAa,KAAK,SAAS,KAAK,SAAS,UAAU,KAAM;AAC7E,UAAM,KAAK;AAAA,MACT,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,QAAQ,SAAS;AAAA,MAC7B,WAAW,KAAK,QAAQ,SAAS;AAAA,IACnC,CAAC;AAAA,EACH;AACA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,MAAI,WAAW;AACf,MAAI,KAAK,UAAU;AACjB,eAAW,CAAC;AACZ,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,MAAM,KAAK,SAAS,EAAE,QAAQ,KAAK,MAAM,EAAE,QAAQ,KAAK,OAAO,CAAC;AAC/E,UAAI,OAAO,SAAU,UAAS,KAAK,EAAE,GAAG,GAAG,WAAW,OAAO,UAAU,CAAC;AAAA,IAC1E;AACA,QAAI,SAAS,WAAW,EAAG,QAAO;AAAA,EACpC;AAEA,SAAO,qBAAqB,UAAU,KAAK,SAAS,EAAE;AACxD;AAGA,SAAS,qBACP,YACA,WACgB;AAChB,QAAM,YAAY,CAAC,MACjB,EAAE,OAAO,UAAU,aAAa,EAAE,OAAO,UAAU;AACrD,QAAM,SAAS,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM;AAC5C,YAAQ,WAAW;AAAA,MACjB,KAAK;AACH,eAAO,UAAU,CAAC,IAAI,UAAU,CAAC,KAAK,EAAE,QAAQ,EAAE;AAAA,MACpD,KAAK;AACH,eAAO,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE;AAAA,MAClD,KAAK;AACH,eAAO,EAAE,QAAQ,EAAE;AAAA,MACrB;AACE,eAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,IAC5C;AAAA,EACF,CAAC;AACD,SAAO,OAAO,CAAC;AACjB;AAEA,SAAS,gBAAgB,UAA6C;AACpE,SAAO,WACH,4DACA;AACN;AAEA,SAAS,eAAe,MAAgC;AACtD,MAAI,CAAC,KAAK,YAAa,QAAO,KAAK;AACnC,SAAO,CAAC,KAAK,MAAM,IAAI,cAAc,KAAK,WAAW,EAAE,KAAK,IAAI;AAClE;AAEA,SAAS,gBAAgB,SAAgE;AACvF,MAAI,QAAQ,YAAY,QAAQ,eAAe;AAC7C,UAAM,IAAI,MAAM,+EAA+E;AAAA,EACjG;AACA,MAAI,QAAQ,SAAU,QAAO,QAAQ;AACrC,MAAI,QAAQ,eAAe;AACzB,WAAO,6BAA6B,EAAE,QAAQ,QAAQ,cAAc,CAAC;AAAA,EACvE;AACA,QAAM,IAAI,MAAM,uEAAuE;AACzF;AAQA,IAAM,mBAAmB;AAAA,EACvB,MAAM;AAAA,EACN,MAAM,KAAW,MAAY,SAAkD;AAC7E,WAAO,QAAQ,WAAW,IAAI,CAAC,IAAI,IAAI,CAAC;AAAA,EAC1C;AAAA,EACA,OAAO,SAAyD;AAC9D,WAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,EAC9C;AACF;","names":["result","chosen"]}
|
|
@@ -1024,6 +1024,7 @@ function createSandboxLineage(client, capabilities, options = {}) {
|
|
|
1024
1024
|
if (signal.aborted) throwAbort();
|
|
1025
1025
|
const opts = buildBackendOptions(spec.profile, spec.sandboxOverrides);
|
|
1026
1026
|
const box = await acquireSandbox(client, opts, { signal });
|
|
1027
|
+
await spec.prepareBox?.(box, { signal });
|
|
1027
1028
|
owned.push(box);
|
|
1028
1029
|
return box;
|
|
1029
1030
|
};
|
|
@@ -1052,6 +1053,7 @@ function createSandboxLineage(client, capabilities, options = {}) {
|
|
|
1052
1053
|
if (checkpointId !== void 0) {
|
|
1053
1054
|
const box2 = await forkFromCheckpoint(parent.box, checkpointId, signal);
|
|
1054
1055
|
owned.push(box2);
|
|
1056
|
+
await spec.prepareBox?.(box2, { signal });
|
|
1055
1057
|
const sessionId2 = mintSessionId();
|
|
1056
1058
|
return {
|
|
1057
1059
|
handle: { box: box2, sessionId: sessionId2 },
|
|
@@ -1475,6 +1477,7 @@ async function executeIteration(args) {
|
|
|
1475
1477
|
if (args.validator) {
|
|
1476
1478
|
slot.verdict = await args.validator.validate(slot.output, {
|
|
1477
1479
|
iteration: args.item.index,
|
|
1480
|
+
...box ? { box } : {},
|
|
1478
1481
|
signal: args.signal,
|
|
1479
1482
|
traceEmitter: args.ctx.traceEmitter
|
|
1480
1483
|
});
|
|
@@ -1571,7 +1574,9 @@ function readSandboxId(box) {
|
|
|
1571
1574
|
async function createSandboxForSpec(client, spec, signal) {
|
|
1572
1575
|
const opts = buildBackendOptions(spec.profile, spec.sandboxOverrides);
|
|
1573
1576
|
if (signal.aborted) throwAbort();
|
|
1574
|
-
|
|
1577
|
+
const box = await acquireSandbox(client, opts, { signal });
|
|
1578
|
+
await spec.prepareBox?.(box, { signal });
|
|
1579
|
+
return box;
|
|
1575
1580
|
}
|
|
1576
1581
|
function finalize(args) {
|
|
1577
1582
|
const winner = args.options.selectWinner ? args.options.selectWinner(args.iterations) : args.options.driver.selectWinner?.(args.iterations) ?? defaultSelectWinner(args.iterations);
|
|
@@ -1732,6 +1737,162 @@ function loopDispatch(opts) {
|
|
|
1732
1737
|
return (profile, scenario, ctx) => runLoopForCell(opts, scenario, profile, ctx);
|
|
1733
1738
|
}
|
|
1734
1739
|
|
|
1740
|
+
// src/runtime/observe.ts
|
|
1741
|
+
import { makeFinding } from "@tangle-network/agent-eval";
|
|
1742
|
+
var observerId = "observe/trace";
|
|
1743
|
+
function summarizeTrace(trace, maxLines) {
|
|
1744
|
+
const lines = [];
|
|
1745
|
+
for (const ev of trace) {
|
|
1746
|
+
const e = ev;
|
|
1747
|
+
const t = (e.type ?? "").toLowerCase();
|
|
1748
|
+
const d = e.data ?? {};
|
|
1749
|
+
const part = d.part ?? {};
|
|
1750
|
+
if (part.type === "tool")
|
|
1751
|
+
lines.push(`tool:${part.tool}${part.state?.status ? `(${part.state.status})` : ""}`);
|
|
1752
|
+
else if (t.includes("error"))
|
|
1753
|
+
lines.push(`ERROR: ${String(d.message ?? d.detail ?? "").slice(0, 200)}`);
|
|
1754
|
+
else if (t === "status" && typeof d.status === "string") lines.push(`status:${d.status}`);
|
|
1755
|
+
else if (t.includes("tool")) lines.push(`tool-event:${t}`);
|
|
1756
|
+
}
|
|
1757
|
+
const out = [];
|
|
1758
|
+
for (const ln of lines) {
|
|
1759
|
+
const prev = out[out.length - 1];
|
|
1760
|
+
const m = prev?.match(/^(.*?)(?: x(\d+))?$/);
|
|
1761
|
+
if (m && m[1] === ln) out[out.length - 1] = `${ln} x${(Number(m[2]) || 1) + 1}`;
|
|
1762
|
+
else out.push(ln);
|
|
1763
|
+
}
|
|
1764
|
+
return out.slice(0, maxLines).join("\n") || "(no tool/error events in trace)";
|
|
1765
|
+
}
|
|
1766
|
+
var findingsSchema = {
|
|
1767
|
+
name: "observer_findings",
|
|
1768
|
+
schema: {
|
|
1769
|
+
type: "object",
|
|
1770
|
+
additionalProperties: false,
|
|
1771
|
+
properties: {
|
|
1772
|
+
findings: {
|
|
1773
|
+
type: "array",
|
|
1774
|
+
items: {
|
|
1775
|
+
type: "object",
|
|
1776
|
+
additionalProperties: false,
|
|
1777
|
+
properties: {
|
|
1778
|
+
area: {
|
|
1779
|
+
type: "string",
|
|
1780
|
+
description: "tool-use | cost | verification | process | failure | latency"
|
|
1781
|
+
},
|
|
1782
|
+
severity: { type: "string", enum: ["critical", "high", "medium", "low", "info"] },
|
|
1783
|
+
claim: {
|
|
1784
|
+
type: "string",
|
|
1785
|
+
description: "what you OBSERVED in the trace (a fact, with the evidence)"
|
|
1786
|
+
},
|
|
1787
|
+
recommended_action: {
|
|
1788
|
+
type: "string",
|
|
1789
|
+
description: "the concrete change for the agent or operator"
|
|
1790
|
+
},
|
|
1791
|
+
audience: {
|
|
1792
|
+
type: "string",
|
|
1793
|
+
enum: ["agent", "operator"],
|
|
1794
|
+
description: "who should act on this"
|
|
1795
|
+
},
|
|
1796
|
+
confidence: { type: "number" }
|
|
1797
|
+
},
|
|
1798
|
+
required: ["area", "severity", "claim", "recommended_action", "audience", "confidence"]
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
},
|
|
1802
|
+
required: ["findings"]
|
|
1803
|
+
}
|
|
1804
|
+
};
|
|
1805
|
+
async function observe(input, opts) {
|
|
1806
|
+
const traceSummary = summarizeTrace(input.trace, opts.maxTraceLines ?? 80);
|
|
1807
|
+
const res = await opts.chat.chat(
|
|
1808
|
+
{
|
|
1809
|
+
...opts.model ? { model: opts.model } : {},
|
|
1810
|
+
jsonSchema: findingsSchema,
|
|
1811
|
+
messages: [
|
|
1812
|
+
{
|
|
1813
|
+
role: "system",
|
|
1814
|
+
content: "You are a third-person OBSERVER watching an AI agent work. You see its TRACE (what it did), not its grader. From the trace, name SPECIFIC, behavior-grounded findings: wasted/duplicated tool calls, thrash/retries, token/cost waste, missing verification, failure patterns. For each, a concrete recommended_action, and whether the AGENT (fix its skills/prompt/tools) or the OPERATOR (fix framing/decomposition/config) should act. Only claim what the trace shows. No findings if the run was clean."
|
|
1815
|
+
},
|
|
1816
|
+
{
|
|
1817
|
+
role: "user",
|
|
1818
|
+
content: `TASK: ${input.task}
|
|
1819
|
+
|
|
1820
|
+
OUTCOME: ${input.outcome ?? "unknown"}
|
|
1821
|
+
|
|
1822
|
+
FINAL OUTPUT (truncated):
|
|
1823
|
+
${input.output.slice(0, 1200)}
|
|
1824
|
+
|
|
1825
|
+
TRACE (in order; "xN" = repeated):
|
|
1826
|
+
${traceSummary}`
|
|
1827
|
+
}
|
|
1828
|
+
]
|
|
1829
|
+
},
|
|
1830
|
+
{ ...opts.signal ? { signal: opts.signal } : {} }
|
|
1831
|
+
);
|
|
1832
|
+
const parsed = parseFindings(res.content);
|
|
1833
|
+
const producedAt = input.runId ? `${input.runId}` : observerId;
|
|
1834
|
+
const findings = parsed.map(
|
|
1835
|
+
(f) => makeFinding({
|
|
1836
|
+
analyst_id: observerId,
|
|
1837
|
+
area: `${f.area}`,
|
|
1838
|
+
severity: f.severity,
|
|
1839
|
+
claim: f.claim,
|
|
1840
|
+
recommended_action: f.recommended_action,
|
|
1841
|
+
confidence: typeof f.confidence === "number" ? f.confidence : 0.5,
|
|
1842
|
+
evidence_refs: [],
|
|
1843
|
+
// The observer reads BEHAVIOR, never the judge verdict — firewall provenance.
|
|
1844
|
+
derived_from_judge: false,
|
|
1845
|
+
metadata: { audience: f.audience },
|
|
1846
|
+
...input.runId ? { subject: input.runId } : {}
|
|
1847
|
+
})
|
|
1848
|
+
);
|
|
1849
|
+
const learned = [];
|
|
1850
|
+
if (opts.corpus) {
|
|
1851
|
+
for (const f of findings) {
|
|
1852
|
+
const record = {
|
|
1853
|
+
schemaVersion: "1.0.0",
|
|
1854
|
+
id: f.finding_id,
|
|
1855
|
+
runId: input.runId ?? observerId,
|
|
1856
|
+
producedAt: f.produced_at ?? producedAt,
|
|
1857
|
+
area: f.area,
|
|
1858
|
+
claim: f.recommended_action ?? f.claim,
|
|
1859
|
+
...f.claim ? { rationale: f.claim } : {},
|
|
1860
|
+
tags: [...opts.tags ?? [], `audience:${f.metadata?.audience ?? "agent"}`],
|
|
1861
|
+
confidence: f.confidence,
|
|
1862
|
+
evidence: [{ kind: "finding", uri: f.finding_id }]
|
|
1863
|
+
};
|
|
1864
|
+
const r = await opts.corpus.append(record);
|
|
1865
|
+
if (r.succeeded) learned.push(record);
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
return { findings, learned, report: renderReport(findings) };
|
|
1869
|
+
}
|
|
1870
|
+
function parseFindings(content) {
|
|
1871
|
+
let obj;
|
|
1872
|
+
try {
|
|
1873
|
+
obj = JSON.parse(content);
|
|
1874
|
+
} catch {
|
|
1875
|
+
const m = content.match(/\{[\s\S]*\}/);
|
|
1876
|
+
obj = m ? JSON.parse(m[0]) : { findings: [] };
|
|
1877
|
+
}
|
|
1878
|
+
const arr = obj.findings;
|
|
1879
|
+
return Array.isArray(arr) ? arr : [];
|
|
1880
|
+
}
|
|
1881
|
+
function renderReport(findings) {
|
|
1882
|
+
if (findings.length === 0) return "\u2713 clean run \u2014 the observer found nothing to change.";
|
|
1883
|
+
const audience = (f) => f.metadata?.audience ?? "agent";
|
|
1884
|
+
const forAgent = findings.filter((f) => audience(f) === "agent");
|
|
1885
|
+
const forOperator = findings.filter((f) => audience(f) === "operator");
|
|
1886
|
+
const block = (title, fs) => fs.length === 0 ? "" : `**${title}**
|
|
1887
|
+
${fs.map((f) => `- [${f.severity}] ${f.claim}
|
|
1888
|
+
\u2192 ${f.recommended_action ?? ""}`).join("\n")}
|
|
1889
|
+
`;
|
|
1890
|
+
return [
|
|
1891
|
+
block("For the agent (fix skills / prompt / tools)", forAgent),
|
|
1892
|
+
block("For you (the operator)", forOperator)
|
|
1893
|
+
].filter(Boolean).join("\n");
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1735
1896
|
// src/runtime/supervise/scope.ts
|
|
1736
1897
|
function createScope(args) {
|
|
1737
1898
|
const children = /* @__PURE__ */ new Map();
|
|
@@ -4028,6 +4189,70 @@ function errorMessage(error) {
|
|
|
4028
4189
|
return error instanceof Error ? error.message : String(error);
|
|
4029
4190
|
}
|
|
4030
4191
|
|
|
4192
|
+
// src/runtime/workspace.ts
|
|
4193
|
+
function localShell() {
|
|
4194
|
+
return async (args, cwd) => {
|
|
4195
|
+
const { execFile } = await import("child_process");
|
|
4196
|
+
const [bin, ...rest] = args;
|
|
4197
|
+
return new Promise((resolve) => {
|
|
4198
|
+
execFile(
|
|
4199
|
+
bin ?? "",
|
|
4200
|
+
rest,
|
|
4201
|
+
{ cwd, encoding: "utf-8", maxBuffer: 64 * 1024 * 1024 },
|
|
4202
|
+
(err, stdout, stderr) => {
|
|
4203
|
+
resolve({
|
|
4204
|
+
stdout: stdout ?? "",
|
|
4205
|
+
stderr: stderr ?? "",
|
|
4206
|
+
code: err ? err.code ?? 1 : 0
|
|
4207
|
+
});
|
|
4208
|
+
}
|
|
4209
|
+
);
|
|
4210
|
+
});
|
|
4211
|
+
};
|
|
4212
|
+
}
|
|
4213
|
+
function gitWorkspace(opts) {
|
|
4214
|
+
const shell = opts.shell ?? localShell();
|
|
4215
|
+
const branch = opts.branch ?? "main";
|
|
4216
|
+
const cfg = opts.noHooks === false ? [] : ["-c", "core.hooksPath=/dev/null"];
|
|
4217
|
+
const ident = ["-c", "user.email=workspace@tangle.local", "-c", "user.name=workspace"];
|
|
4218
|
+
const run = async (args, cwd) => {
|
|
4219
|
+
const res = await shell(["git", ...cfg, ...ident, ...args], cwd);
|
|
4220
|
+
if (res.code !== 0) {
|
|
4221
|
+
throw new Error(
|
|
4222
|
+
`git ${args.join(" ")} failed (${res.code}): ${tail(res.stderr || res.stdout)}`
|
|
4223
|
+
);
|
|
4224
|
+
}
|
|
4225
|
+
return res.stdout;
|
|
4226
|
+
};
|
|
4227
|
+
return {
|
|
4228
|
+
ref: opts.ref,
|
|
4229
|
+
materialize: (dir) => run(["clone", "--branch", branch, opts.ref, dir]).then(() => {
|
|
4230
|
+
}),
|
|
4231
|
+
async commit(dir, message) {
|
|
4232
|
+
await run(["add", "-A"], dir);
|
|
4233
|
+
const status = await run(["status", "--porcelain"], dir);
|
|
4234
|
+
if (!status.trim()) return { ok: true, rev: (await run(["rev-parse", "HEAD"], dir)).trim() };
|
|
4235
|
+
await run(["commit", "-m", message], dir);
|
|
4236
|
+
const pull = await shell(["git", ...cfg, ...ident, "pull", "--rebase", "origin", branch], dir);
|
|
4237
|
+
if (pull.code !== 0) {
|
|
4238
|
+
await shell(["git", ...cfg, "rebase", "--abort"], dir).catch(() => {
|
|
4239
|
+
});
|
|
4240
|
+
return { ok: false, conflict: tail(pull.stderr || pull.stdout) };
|
|
4241
|
+
}
|
|
4242
|
+
const push = await shell(["git", ...cfg, ...ident, "push", "origin", branch], dir);
|
|
4243
|
+
if (push.code !== 0) return { ok: false, conflict: tail(push.stderr || push.stdout) };
|
|
4244
|
+
return { ok: true, rev: (await run(["rev-parse", "HEAD"], dir)).trim() };
|
|
4245
|
+
},
|
|
4246
|
+
async head() {
|
|
4247
|
+
const out = await run(["ls-remote", opts.ref, `refs/heads/${branch}`]);
|
|
4248
|
+
return out.split(/\s+/)[0] ?? "";
|
|
4249
|
+
}
|
|
4250
|
+
};
|
|
4251
|
+
}
|
|
4252
|
+
function tail(s) {
|
|
4253
|
+
return s.slice(-400);
|
|
4254
|
+
}
|
|
4255
|
+
|
|
4031
4256
|
export {
|
|
4032
4257
|
contentAddress,
|
|
4033
4258
|
InMemoryResultBlobStore,
|
|
@@ -4059,6 +4284,8 @@ export {
|
|
|
4059
4284
|
createSandboxForSpec,
|
|
4060
4285
|
defaultSelectWinner,
|
|
4061
4286
|
loopDispatch,
|
|
4287
|
+
observe,
|
|
4288
|
+
renderReport,
|
|
4062
4289
|
createScope,
|
|
4063
4290
|
settledToIteration,
|
|
4064
4291
|
pipeline,
|
|
@@ -4084,6 +4311,8 @@ export {
|
|
|
4084
4311
|
runPersonified,
|
|
4085
4312
|
trajectoryReport,
|
|
4086
4313
|
equalKOnCost,
|
|
4087
|
-
openSandboxRun
|
|
4314
|
+
openSandboxRun,
|
|
4315
|
+
localShell,
|
|
4316
|
+
gitWorkspace
|
|
4088
4317
|
};
|
|
4089
|
-
//# sourceMappingURL=chunk-
|
|
4318
|
+
//# sourceMappingURL=chunk-72JQCHOZ.js.map
|