@tangle-network/agent-runtime 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/agent.d.ts +3 -3
  2. package/dist/analyst-loop.d.ts +2 -2
  3. package/dist/analyst-loop.js +3 -257
  4. package/dist/analyst-loop.js.map +1 -1
  5. package/dist/{chunk-T3GJBKHA.js → chunk-7ZECSZ3C.js} +2 -2
  6. package/dist/chunk-AXWGLYSF.js +201 -0
  7. package/dist/chunk-AXWGLYSF.js.map +1 -0
  8. package/dist/chunk-FNMGYYSS.js +60 -0
  9. package/dist/chunk-FNMGYYSS.js.map +1 -0
  10. package/dist/{chunk-V6GURW4W.js → chunk-HSX6PFZR.js} +1 -209
  11. package/dist/chunk-HSX6PFZR.js.map +1 -0
  12. package/dist/{chunk-M65QJD35.js → chunk-PK5DYSNO.js} +5 -3
  13. package/dist/{chunk-M65QJD35.js.map → chunk-PK5DYSNO.js.map} +1 -1
  14. package/dist/chunk-VLXRXMTF.js +212 -0
  15. package/dist/chunk-VLXRXMTF.js.map +1 -0
  16. package/dist/chunk-VOX6Z3II.js +90 -0
  17. package/dist/chunk-VOX6Z3II.js.map +1 -0
  18. package/dist/chunk-XBUG326M.js +261 -0
  19. package/dist/chunk-XBUG326M.js.map +1 -0
  20. package/dist/dynamic-DcrwVGuV.d.ts +106 -0
  21. package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  22. package/dist/improvement.d.ts +6 -130
  23. package/dist/improvement.js +4 -85
  24. package/dist/improvement.js.map +1 -1
  25. package/dist/index.d.ts +10 -85
  26. package/dist/index.js +27 -44
  27. package/dist/index.js.map +1 -1
  28. package/dist/{otel-export-DgFMwsVy.d.ts → kb-gate-YdPNEagq.d.ts} +62 -176
  29. package/dist/loop-runner-bin-DgZj0zfJ.d.ts +192 -0
  30. package/dist/loop-runner-bin.d.ts +12 -0
  31. package/dist/loop-runner-bin.js +19 -0
  32. package/dist/loop-runner-bin.js.map +1 -0
  33. package/dist/loops.d.ts +5 -106
  34. package/dist/mcp/bin.js +3 -2
  35. package/dist/mcp/bin.js.map +1 -1
  36. package/dist/mcp/index.d.ts +6 -79
  37. package/dist/mcp/index.js +11 -62
  38. package/dist/mcp/index.js.map +1 -1
  39. package/dist/optimize-prompt-D-urF2wW.d.ts +129 -0
  40. package/dist/otel-export-xgf4J6bo.d.ts +191 -0
  41. package/dist/profiles.d.ts +1 -1
  42. package/dist/{types-CmTjKLyB.d.ts → types-B9O7l-ij.d.ts} +2 -2
  43. package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
  44. package/package.json +3 -2
  45. package/dist/chunk-V6GURW4W.js.map +0 -1
  46. /package/dist/{chunk-T3GJBKHA.js.map → chunk-7ZECSZ3C.js.map} +0 -0
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/mcp/bin.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * @experimental\n *\n * `agent-runtime-mcp` — stdio MCP server entry point.\n *\n * Spins up a server with the default coder delegate (wired against the\n * real `@tangle-network/sandbox` client) and, when the optional\n * `@tangle-network/agent-knowledge` peer is installed, a researcher\n * delegate against `multiHarnessResearcherFanout`.\n *\n * Environment variables:\n * TANGLE_API_KEY required — passed to `new Sandbox({ apiKey })`\n * SANDBOX_BASE_URL optional — sandbox-SDK base URL override\n * TANGLE_FLEET_ID optional — when set, delegations dispatch\n * INTO this fleet's shared workspace instead\n * of creating sibling sandboxes. Set by the\n * parent sandbox when launching this MCP\n * server so worker diffs land on the caller's\n * filesystem with no cross-sandbox boundary.\n * TANGLE_FLEET_EXCLUDE_MACHINES optional — comma-separated machine ids to\n * skip during fleet-mode round-robin\n * (typically the coordinator machine this\n * MCP server is running on).\n * MCP_MAX_CONCURRENT_SANDBOXES default 4 — kernel maxConcurrency cap\n * MCP_CODER_FANOUT_HARNESSES comma-separated harness ids to use for variants > 1\n * MCP_DISABLE_CODER set to `1` to omit `delegate_code`\n * MCP_DISABLE_RESEARCHER set to `1` to omit `delegate_research` even when peer is present\n */\n\nimport type { LoopSandboxClient } from '../loops'\nimport { runLoop } from '../loops'\nimport { detectExecutor } from './bin-helpers'\nimport { createDefaultCoderDelegate, type ResearcherDelegate } from './delegates'\nimport type { DelegationExecutor } from './executor'\nimport { createMcpServer } from './server'\nimport type { ResearchOutputShape } from './types'\n\nasync function main(): Promise<void> {\n const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES)\n const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES)\n const wantCoder = !process.env.MCP_DISABLE_CODER\n const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER\n const fleetId = parseFleetId(process.env.TANGLE_FLEET_ID)\n\n // Skip the sandbox client load entirely when no profile delegate needs it —\n // the feedback + status + history tools are queue-bound and require no\n // sandbox. Useful for tooling that mounts the MCP server purely for\n // self-introspection.\n const needsSandbox = wantCoder || wantResearcher\n let sandboxClient: LoopSandboxClient | undefined\n let executor: DelegationExecutor | undefined\n if (needsSandbox) {\n const apiKey = process.env.TANGLE_API_KEY\n if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) {\n process.stderr.write(\n 'agent-runtime-mcp: TANGLE_API_KEY is required. Set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset.\\n',\n )\n process.exit(2)\n }\n // Fleet mode against a diagnostic stub is meaningless — the stub can't\n // resolve a real fleet handle. Refuse rather than silently degrading,\n // otherwise a fleet-mounted MCP would behave differently than configured.\n if (fleetId && !apiKey) {\n process.stderr.write(\n 'agent-runtime-mcp: TANGLE_FLEET_ID was set but TANGLE_API_KEY is missing; cannot resolve fleet handle. Provide an api key or unset TANGLE_FLEET_ID.\\n',\n )\n process.exit(2)\n }\n sandboxClient = await loadSandboxClient(apiKey)\n executor = await detectExecutor({ sandboxClient })\n if (fleetId) {\n process.stderr.write(`agent-runtime-mcp: fleet-aware delegation: fleetId=${fleetId}\\n`)\n }\n process.stderr.write(`agent-runtime-mcp: delegation placement → ${executor.describe()}\\n`)\n }\n\n const coderDelegate =\n wantCoder && executor\n ? createDefaultCoderDelegate({\n executor,\n fanoutHarnesses,\n maxConcurrency,\n })\n : undefined\n\n const researcherDelegate =\n wantResearcher && executor\n ? await loadResearcherDelegate(executor.client, maxConcurrency)\n : undefined\n\n const server = createMcpServer({ coderDelegate, researcherDelegate })\n\n process.on('SIGINT', () => {\n server.stop()\n process.exit(0)\n })\n process.on('SIGTERM', () => {\n server.stop()\n process.exit(0)\n })\n\n await server.serve()\n}\n\nasync function loadSandboxClient(apiKey: string | undefined): Promise<LoopSandboxClient> {\n // Diagnostic mode: AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 enables tools/list + the\n // queue-bound tools (status / history / feedback) without sandbox creds.\n // Coder + researcher delegations require a real client; the stub fails loud\n // at create() so the agent observes the cause instead of silent success.\n if (!apiKey) {\n return {\n async create() {\n throw new Error(\n 'agent-runtime-mcp: TANGLE_API_KEY is unset; coder/researcher delegations are disabled in diagnostic mode. Set TANGLE_API_KEY or use MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to remove the unsupported tools from the tool list.',\n )\n },\n } satisfies LoopSandboxClient\n }\n // Dynamic import keeps the bin importable in environments that haven't\n // installed `@tangle-network/sandbox` yet (the runtime package lists it\n // as a peer dep, not a hard dep).\n const mod = await import('@tangle-network/sandbox').catch((err) => {\n process.stderr.write(\n `agent-runtime-mcp: failed to load @tangle-network/sandbox (${err.message}); install the peer dependency\\n`,\n )\n process.exit(2)\n })\n const SandboxCtor = (mod as { Sandbox?: new (config: unknown) => LoopSandboxClient }).Sandbox\n if (!SandboxCtor) {\n process.stderr.write(\n 'agent-runtime-mcp: @tangle-network/sandbox does not export Sandbox; cannot construct client\\n',\n )\n process.exit(2)\n }\n const baseUrl = process.env.SANDBOX_BASE_URL\n return new SandboxCtor({\n apiKey,\n ...(baseUrl ? { baseUrl } : {}),\n })\n}\n\ninterface ResearcherProfilePreset {\n agentRunSpec: Parameters<typeof runLoop>[0]['agentRun'] extends infer T ? NonNullable<T> : never\n output: Parameters<typeof runLoop>[0]['output']\n validator: Parameters<typeof runLoop>[0]['validator']\n}\n\ninterface ResearcherFanoutPreset {\n agentRuns: NonNullable<Parameters<typeof runLoop>[0]['agentRuns']>\n output: Parameters<typeof runLoop>[0]['output']\n validator: Parameters<typeof runLoop>[0]['validator']\n driver: Parameters<typeof runLoop>[0]['driver']\n}\n\nasync function loadResearcherDelegate(\n sandboxClient: LoopSandboxClient,\n maxConcurrency: number,\n): Promise<ResearcherDelegate | undefined> {\n // Optional peer — when `@tangle-network/agent-knowledge` isn't installed,\n // we silently omit the researcher tool from the advertisement. The\n // dynamic-import path is resolved at runtime; TypeScript cannot see the\n // peer, so we type the module structurally rather than via its own\n // declaration file.\n const profilesSpecifier = '@tangle-network/agent-knowledge/profiles'\n const mod = await import(profilesSpecifier).catch(() => undefined)\n if (!mod) return undefined\n type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset\n type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset\n const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory })\n .multiHarnessResearcherFanout\n const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile\n if (!fanoutFactory || !singleFactory) return undefined\n\n return async (args, ctx) => {\n const task = {\n question: args.question,\n knowledgeNamespace: args.namespace,\n scope: args.scope,\n sources: args.sources,\n recencyWindow: args.config?.recencyWindow\n ? {\n since: args.config.recencyWindow.since\n ? new Date(args.config.recencyWindow.since)\n : undefined,\n until: args.config.recencyWindow.until\n ? new Date(args.config.recencyWindow.until)\n : undefined,\n }\n : undefined,\n maxItems: args.config?.maxItems,\n minConfidence: args.config?.minConfidence,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const preset = singleFactory({ task })\n const result = await runLoop({\n driver: {\n name: 'mcp-researcher-single',\n async plan(t, history) {\n return history.length === 0 ? [t] : []\n },\n decide(history) {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n },\n agentRun: preset.agentRunSpec,\n output: preset.output,\n validator: preset.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: 1,\n maxConcurrency,\n })\n const output = result.winner?.output\n if (!output) throw new Error('researcher delegate produced no winner')\n ctx.report({ iteration: 1, phase: 'completed' })\n return output as ResearchOutputShape\n }\n const fanout = fanoutFactory({ task })\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns: fanout.agentRuns.slice(0, variants),\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const output = result.winner?.output\n if (!output) throw new Error('researcher delegate fanout produced no winner')\n ctx.report({ iteration: result.iterations.length, phase: 'completed' })\n return output as ResearchOutputShape\n }\n}\n\nfunction parseHarnesses(raw: string | undefined): string[] | undefined {\n if (!raw) return undefined\n const list = raw\n .split(',')\n .map((entry) => entry.trim())\n .filter(Boolean)\n return list.length > 0 ? list : undefined\n}\n\nfunction parseFleetId(raw: string | undefined): string | undefined {\n if (typeof raw !== 'string') return undefined\n const trimmed = raw.trim()\n return trimmed.length > 0 ? trimmed : undefined\n}\n\nfunction parseConcurrency(raw: string | undefined): number {\n if (!raw) return 4\n const n = Number(raw)\n if (!Number.isFinite(n) || n < 1) return 4\n return Math.min(Math.trunc(n), 32)\n}\n\nmain().catch((err) => {\n process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\\n`)\n process.exit(1)\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;AAuCA,eAAe,OAAsB;AACnC,QAAM,kBAAkB,eAAe,QAAQ,IAAI,0BAA0B;AAC7E,QAAM,iBAAiB,iBAAiB,QAAQ,IAAI,4BAA4B;AAChF,QAAM,YAAY,CAAC,QAAQ,IAAI;AAC/B,QAAM,iBAAiB,CAAC,QAAQ,IAAI;AACpC,QAAM,UAAU,aAAa,QAAQ,IAAI,eAAe;AAMxD,QAAM,eAAe,aAAa;AAClC,MAAI;AACJ,MAAI;AACJ,MAAI,cAAc;AAChB,UAAM,SAAS,QAAQ,IAAI;AAC3B,QAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,gCAAgC;AAC1D,cAAQ,OAAO;AAAA,QACb;AAAA,MACF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AAIA,QAAI,WAAW,CAAC,QAAQ;AACtB,cAAQ,OAAO;AAAA,QACb;AAAA,MACF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,oBAAgB,MAAM,kBAAkB,MAAM;AAC9C,eAAW,MAAM,eAAe,EAAE,cAAc,CAAC;AACjD,QAAI,SAAS;AACX,cAAQ,OAAO,MAAM,sDAAsD,OAAO;AAAA,CAAI;AAAA,IACxF;AACA,YAAQ,OAAO,MAAM,kDAA6C,SAAS,SAAS,CAAC;AAAA,CAAI;AAAA,EAC3F;AAEA,QAAM,gBACJ,aAAa,WACT,2BAA2B;AAAA,IACzB;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,IACD;AAEN,QAAM,qBACJ,kBAAkB,WACd,MAAM,uBAAuB,SAAS,QAAQ,cAAc,IAC5D;AAEN,QAAM,SAAS,gBAAgB,EAAE,eAAe,mBAAmB,CAAC;AAEpE,UAAQ,GAAG,UAAU,MAAM;AACzB,WAAO,KAAK;AACZ,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACD,UAAQ,GAAG,WAAW,MAAM;AAC1B,WAAO,KAAK;AACZ,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AAED,QAAM,OAAO,MAAM;AACrB;AAEA,eAAe,kBAAkB,QAAwD;AAKvF,MAAI,CAAC,QAAQ;AACX,WAAO;AAAA,MACL,MAAM,SAAS;AACb,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAIA,QAAM,MAAM,MAAM,OAAO,yBAAyB,EAAE,MAAM,CAAC,QAAQ;AACjE,YAAQ,OAAO;AAAA,MACb,8DAA8D,IAAI,OAAO;AAAA;AAAA,IAC3E;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACD,QAAM,cAAe,IAAiE;AACtF,MAAI,CAAC,aAAa;AAChB,YAAQ,OAAO;AAAA,MACb;AAAA,IACF;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,QAAM,UAAU,QAAQ,IAAI;AAC5B,SAAO,IAAI,YAAY;AAAA,IACrB;AAAA,IACA,GAAI,UAAU,EAAE,QAAQ,IAAI,CAAC;AAAA,EAC/B,CAAC;AACH;AAeA,eAAe,uBACb,eACA,gBACyC;AAMzC,QAAM,oBAAoB;AAC1B,QAAM,MAAM,MAAM,OAAO,mBAAmB,MAAM,MAAM,MAAS;AACjE,MAAI,CAAC,IAAK,QAAO;AAGjB,QAAM,gBAAiB,IACpB;AACH,QAAM,gBAAiB,IAA8C;AACrE,MAAI,CAAC,iBAAiB,CAAC,cAAe,QAAO;AAE7C,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAO;AAAA,MACX,UAAU,KAAK;AAAA,MACf,oBAAoB,KAAK;AAAA,MACzB,OAAO,KAAK;AAAA,MACZ,SAAS,KAAK;AAAA,MACd,eAAe,KAAK,QAAQ,gBACxB;AAAA,QACE,OAAO,KAAK,OAAO,cAAc,QAC7B,IAAI,KAAK,KAAK,OAAO,cAAc,KAAK,IACxC;AAAA,QACJ,OAAO,KAAK,OAAO,cAAc,QAC7B,IAAI,KAAK,KAAK,OAAO,cAAc,KAAK,IACxC;AAAA,MACN,IACA;AAAA,MACJ,UAAU,KAAK,QAAQ;AAAA,MACvB,eAAe,KAAK,QAAQ;AAAA,IAC9B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,SAAS,cAAc,EAAE,KAAK,CAAC;AACrC,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,UACN,MAAM;AAAA,UACN,MAAM,KAAK,GAAG,SAAS;AACrB,mBAAO,QAAQ,WAAW,IAAI,CAAC,CAAC,IAAI,CAAC;AAAA,UACvC;AAAA,UACA,OAAO,SAAS;AACd,mBAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,UAC9C;AAAA,QACF;AAAA,QACA,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,WAAW,OAAO;AAAA,QAClB;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,QACzC,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAASD,QAAO,QAAQ;AAC9B,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,wCAAwC;AACrE,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS,cAAc,EAAE,KAAK,CAAC;AACrC,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO,UAAU,MAAM,GAAG,QAAQ;AAAA,MAC7C,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,MACzC,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,OAAO,QAAQ;AAC9B,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,+CAA+C;AAC5E,QAAI,OAAO,EAAE,WAAW,OAAO,WAAW,QAAQ,OAAO,YAAY,CAAC;AACtE,WAAO;AAAA,EACT;AACF;AAEA,SAAS,eAAe,KAA+C;AACrE,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,OAAO,IACV,MAAM,GAAG,EACT,IAAI,CAAC,UAAU,MAAM,KAAK,CAAC,EAC3B,OAAO,OAAO;AACjB,SAAO,KAAK,SAAS,IAAI,OAAO;AAClC;AAEA,SAAS,aAAa,KAA6C;AACjE,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,QAAM,UAAU,IAAI,KAAK;AACzB,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,iBAAiB,KAAiC;AACzD,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,IAAI,OAAO,GAAG;AACpB,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,IAAI,EAAG,QAAO;AACzC,SAAO,KAAK,IAAI,KAAK,MAAM,CAAC,GAAG,EAAE;AACnC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AACpB,UAAQ,OAAO,MAAM,sBAAsB,eAAe,QAAQ,IAAI,QAAQ,OAAO,GAAG,CAAC;AAAA,CAAI;AAC7F,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":["result","output"]}
1
+ {"version":3,"sources":["../../src/mcp/bin.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * @experimental\n *\n * `agent-runtime-mcp` — stdio MCP server entry point.\n *\n * Spins up a server with the default coder delegate (wired against the\n * real `@tangle-network/sandbox` client) and, when the optional\n * `@tangle-network/agent-knowledge` peer is installed, a researcher\n * delegate against `multiHarnessResearcherFanout`.\n *\n * Environment variables:\n * TANGLE_API_KEY required — passed to `new Sandbox({ apiKey })`\n * SANDBOX_BASE_URL optional — sandbox-SDK base URL override\n * TANGLE_FLEET_ID optional — when set, delegations dispatch\n * INTO this fleet's shared workspace instead\n * of creating sibling sandboxes. Set by the\n * parent sandbox when launching this MCP\n * server so worker diffs land on the caller's\n * filesystem with no cross-sandbox boundary.\n * TANGLE_FLEET_EXCLUDE_MACHINES optional — comma-separated machine ids to\n * skip during fleet-mode round-robin\n * (typically the coordinator machine this\n * MCP server is running on).\n * MCP_MAX_CONCURRENT_SANDBOXES default 4 — kernel maxConcurrency cap\n * MCP_CODER_FANOUT_HARNESSES comma-separated harness ids to use for variants > 1\n * MCP_DISABLE_CODER set to `1` to omit `delegate_code`\n * MCP_DISABLE_RESEARCHER set to `1` to omit `delegate_research` even when peer is present\n */\n\nimport type { LoopSandboxClient } from '../loops'\nimport { runLoop } from '../loops'\nimport { detectExecutor } from './bin-helpers'\nimport { createDefaultCoderDelegate, type ResearcherDelegate } from './delegates'\nimport type { DelegationExecutor } from './executor'\nimport { createMcpServer } from './server'\nimport type { ResearchOutputShape } from './types'\n\nasync function main(): Promise<void> {\n const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES)\n const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES)\n const wantCoder = !process.env.MCP_DISABLE_CODER\n const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER\n const fleetId = parseFleetId(process.env.TANGLE_FLEET_ID)\n\n // Skip the sandbox client load entirely when no profile delegate needs it —\n // the feedback + status + history tools are queue-bound and require no\n // sandbox. Useful for tooling that mounts the MCP server purely for\n // self-introspection.\n const needsSandbox = wantCoder || wantResearcher\n let sandboxClient: LoopSandboxClient | undefined\n let executor: DelegationExecutor | undefined\n if (needsSandbox) {\n const apiKey = process.env.TANGLE_API_KEY\n if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) {\n process.stderr.write(\n 'agent-runtime-mcp: TANGLE_API_KEY is required. Set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset.\\n',\n )\n process.exit(2)\n }\n // Fleet mode against a diagnostic stub is meaningless — the stub can't\n // resolve a real fleet handle. Refuse rather than silently degrading,\n // otherwise a fleet-mounted MCP would behave differently than configured.\n if (fleetId && !apiKey) {\n process.stderr.write(\n 'agent-runtime-mcp: TANGLE_FLEET_ID was set but TANGLE_API_KEY is missing; cannot resolve fleet handle. Provide an api key or unset TANGLE_FLEET_ID.\\n',\n )\n process.exit(2)\n }\n sandboxClient = await loadSandboxClient(apiKey)\n executor = await detectExecutor({ sandboxClient })\n if (fleetId) {\n process.stderr.write(`agent-runtime-mcp: fleet-aware delegation: fleetId=${fleetId}\\n`)\n }\n process.stderr.write(`agent-runtime-mcp: delegation placement → ${executor.describe()}\\n`)\n }\n\n const coderDelegate =\n wantCoder && executor\n ? createDefaultCoderDelegate({\n executor,\n fanoutHarnesses,\n maxConcurrency,\n })\n : undefined\n\n const researcherDelegate =\n wantResearcher && executor\n ? await loadResearcherDelegate(executor.client, maxConcurrency)\n : undefined\n\n const server = createMcpServer({ coderDelegate, researcherDelegate })\n\n process.on('SIGINT', () => {\n server.stop()\n process.exit(0)\n })\n process.on('SIGTERM', () => {\n server.stop()\n process.exit(0)\n })\n\n await server.serve()\n}\n\nasync function loadSandboxClient(apiKey: string | undefined): Promise<LoopSandboxClient> {\n // Diagnostic mode: AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 enables tools/list + the\n // queue-bound tools (status / history / feedback) without sandbox creds.\n // Coder + researcher delegations require a real client; the stub fails loud\n // at create() so the agent observes the cause instead of silent success.\n if (!apiKey) {\n return {\n async create() {\n throw new Error(\n 'agent-runtime-mcp: TANGLE_API_KEY is unset; coder/researcher delegations are disabled in diagnostic mode. Set TANGLE_API_KEY or use MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to remove the unsupported tools from the tool list.',\n )\n },\n } satisfies LoopSandboxClient\n }\n // Dynamic import keeps the bin importable in environments that haven't\n // installed `@tangle-network/sandbox` yet (the runtime package lists it\n // as a peer dep, not a hard dep).\n const mod = await import('@tangle-network/sandbox').catch((err) => {\n process.stderr.write(\n `agent-runtime-mcp: failed to load @tangle-network/sandbox (${err.message}); install the peer dependency\\n`,\n )\n process.exit(2)\n })\n const SandboxCtor = (mod as { Sandbox?: new (config: unknown) => LoopSandboxClient }).Sandbox\n if (!SandboxCtor) {\n process.stderr.write(\n 'agent-runtime-mcp: @tangle-network/sandbox does not export Sandbox; cannot construct client\\n',\n )\n process.exit(2)\n }\n const baseUrl = process.env.SANDBOX_BASE_URL\n return new SandboxCtor({\n apiKey,\n ...(baseUrl ? { baseUrl } : {}),\n })\n}\n\ninterface ResearcherProfilePreset {\n agentRunSpec: Parameters<typeof runLoop>[0]['agentRun'] extends infer T ? NonNullable<T> : never\n output: Parameters<typeof runLoop>[0]['output']\n validator: Parameters<typeof runLoop>[0]['validator']\n}\n\ninterface ResearcherFanoutPreset {\n agentRuns: NonNullable<Parameters<typeof runLoop>[0]['agentRuns']>\n output: Parameters<typeof runLoop>[0]['output']\n validator: Parameters<typeof runLoop>[0]['validator']\n driver: Parameters<typeof runLoop>[0]['driver']\n}\n\nasync function loadResearcherDelegate(\n sandboxClient: LoopSandboxClient,\n maxConcurrency: number,\n): Promise<ResearcherDelegate | undefined> {\n // Optional peer — when `@tangle-network/agent-knowledge` isn't installed,\n // we silently omit the researcher tool from the advertisement. The\n // dynamic-import path is resolved at runtime; TypeScript cannot see the\n // peer, so we type the module structurally rather than via its own\n // declaration file.\n const profilesSpecifier = '@tangle-network/agent-knowledge/profiles'\n const mod = await import(profilesSpecifier).catch(() => undefined)\n if (!mod) return undefined\n type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset\n type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset\n const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory })\n .multiHarnessResearcherFanout\n const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile\n if (!fanoutFactory || !singleFactory) return undefined\n\n return async (args, ctx) => {\n const task = {\n question: args.question,\n knowledgeNamespace: args.namespace,\n scope: args.scope,\n sources: args.sources,\n recencyWindow: args.config?.recencyWindow\n ? {\n since: args.config.recencyWindow.since\n ? new Date(args.config.recencyWindow.since)\n : undefined,\n until: args.config.recencyWindow.until\n ? new Date(args.config.recencyWindow.until)\n : undefined,\n }\n : undefined,\n maxItems: args.config?.maxItems,\n minConfidence: args.config?.minConfidence,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const preset = singleFactory({ task })\n const result = await runLoop({\n driver: {\n name: 'mcp-researcher-single',\n async plan(t, history) {\n return history.length === 0 ? [t] : []\n },\n decide(history) {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n },\n agentRun: preset.agentRunSpec,\n output: preset.output,\n validator: preset.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: 1,\n maxConcurrency,\n })\n const output = result.winner?.output\n if (!output) throw new Error('researcher delegate produced no winner')\n ctx.report({ iteration: 1, phase: 'completed' })\n return output as ResearchOutputShape\n }\n const fanout = fanoutFactory({ task })\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns: fanout.agentRuns.slice(0, variants),\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const output = result.winner?.output\n if (!output) throw new Error('researcher delegate fanout produced no winner')\n ctx.report({ iteration: result.iterations.length, phase: 'completed' })\n return output as ResearchOutputShape\n }\n}\n\nfunction parseHarnesses(raw: string | undefined): string[] | undefined {\n if (!raw) return undefined\n const list = raw\n .split(',')\n .map((entry) => entry.trim())\n .filter(Boolean)\n return list.length > 0 ? list : undefined\n}\n\nfunction parseFleetId(raw: string | undefined): string | undefined {\n if (typeof raw !== 'string') return undefined\n const trimmed = raw.trim()\n return trimmed.length > 0 ? trimmed : undefined\n}\n\nfunction parseConcurrency(raw: string | undefined): number {\n if (!raw) return 4\n const n = Number(raw)\n if (!Number.isFinite(n) || n < 1) return 4\n return Math.min(Math.trunc(n), 32)\n}\n\nmain().catch((err) => {\n process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\\n`)\n process.exit(1)\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAuCA,eAAe,OAAsB;AACnC,QAAM,kBAAkB,eAAe,QAAQ,IAAI,0BAA0B;AAC7E,QAAM,iBAAiB,iBAAiB,QAAQ,IAAI,4BAA4B;AAChF,QAAM,YAAY,CAAC,QAAQ,IAAI;AAC/B,QAAM,iBAAiB,CAAC,QAAQ,IAAI;AACpC,QAAM,UAAU,aAAa,QAAQ,IAAI,eAAe;AAMxD,QAAM,eAAe,aAAa;AAClC,MAAI;AACJ,MAAI;AACJ,MAAI,cAAc;AAChB,UAAM,SAAS,QAAQ,IAAI;AAC3B,QAAI,CAAC,UAAU,CAAC,QAAQ,IAAI,gCAAgC;AAC1D,cAAQ,OAAO;AAAA,QACb;AAAA,MACF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AAIA,QAAI,WAAW,CAAC,QAAQ;AACtB,cAAQ,OAAO;AAAA,QACb;AAAA,MACF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,oBAAgB,MAAM,kBAAkB,MAAM;AAC9C,eAAW,MAAM,eAAe,EAAE,cAAc,CAAC;AACjD,QAAI,SAAS;AACX,cAAQ,OAAO,MAAM,sDAAsD,OAAO;AAAA,CAAI;AAAA,IACxF;AACA,YAAQ,OAAO,MAAM,kDAA6C,SAAS,SAAS,CAAC;AAAA,CAAI;AAAA,EAC3F;AAEA,QAAM,gBACJ,aAAa,WACT,2BAA2B;AAAA,IACzB;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,IACD;AAEN,QAAM,qBACJ,kBAAkB,WACd,MAAM,uBAAuB,SAAS,QAAQ,cAAc,IAC5D;AAEN,QAAM,SAAS,gBAAgB,EAAE,eAAe,mBAAmB,CAAC;AAEpE,UAAQ,GAAG,UAAU,MAAM;AACzB,WAAO,KAAK;AACZ,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACD,UAAQ,GAAG,WAAW,MAAM;AAC1B,WAAO,KAAK;AACZ,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AAED,QAAM,OAAO,MAAM;AACrB;AAEA,eAAe,kBAAkB,QAAwD;AAKvF,MAAI,CAAC,QAAQ;AACX,WAAO;AAAA,MACL,MAAM,SAAS;AACb,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAIA,QAAM,MAAM,MAAM,OAAO,yBAAyB,EAAE,MAAM,CAAC,QAAQ;AACjE,YAAQ,OAAO;AAAA,MACb,8DAA8D,IAAI,OAAO;AAAA;AAAA,IAC3E;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACD,QAAM,cAAe,IAAiE;AACtF,MAAI,CAAC,aAAa;AAChB,YAAQ,OAAO;AAAA,MACb;AAAA,IACF;AACA,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,QAAM,UAAU,QAAQ,IAAI;AAC5B,SAAO,IAAI,YAAY;AAAA,IACrB;AAAA,IACA,GAAI,UAAU,EAAE,QAAQ,IAAI,CAAC;AAAA,EAC/B,CAAC;AACH;AAeA,eAAe,uBACb,eACA,gBACyC;AAMzC,QAAM,oBAAoB;AAC1B,QAAM,MAAM,MAAM,OAAO,mBAAmB,MAAM,MAAM,MAAS;AACjE,MAAI,CAAC,IAAK,QAAO;AAGjB,QAAM,gBAAiB,IACpB;AACH,QAAM,gBAAiB,IAA8C;AACrE,MAAI,CAAC,iBAAiB,CAAC,cAAe,QAAO;AAE7C,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAO;AAAA,MACX,UAAU,KAAK;AAAA,MACf,oBAAoB,KAAK;AAAA,MACzB,OAAO,KAAK;AAAA,MACZ,SAAS,KAAK;AAAA,MACd,eAAe,KAAK,QAAQ,gBACxB;AAAA,QACE,OAAO,KAAK,OAAO,cAAc,QAC7B,IAAI,KAAK,KAAK,OAAO,cAAc,KAAK,IACxC;AAAA,QACJ,OAAO,KAAK,OAAO,cAAc,QAC7B,IAAI,KAAK,KAAK,OAAO,cAAc,KAAK,IACxC;AAAA,MACN,IACA;AAAA,MACJ,UAAU,KAAK,QAAQ;AAAA,MACvB,eAAe,KAAK,QAAQ;AAAA,IAC9B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,SAAS,cAAc,EAAE,KAAK,CAAC;AACrC,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,UACN,MAAM;AAAA,UACN,MAAM,KAAK,GAAG,SAAS;AACrB,mBAAO,QAAQ,WAAW,IAAI,CAAC,CAAC,IAAI,CAAC;AAAA,UACvC;AAAA,UACA,OAAO,SAAS;AACd,mBAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,UAC9C;AAAA,QACF;AAAA,QACA,UAAU,OAAO;AAAA,QACjB,QAAQ,OAAO;AAAA,QACf,WAAW,OAAO;AAAA,QAClB;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,QACzC,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAASD,QAAO,QAAQ;AAC9B,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,wCAAwC;AACrE,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS,cAAc,EAAE,KAAK,CAAC;AACrC,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO,UAAU,MAAM,GAAG,QAAQ;AAAA,MAC7C,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,OAAO;AAAA,MACzC,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,OAAO,QAAQ;AAC9B,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,+CAA+C;AAC5E,QAAI,OAAO,EAAE,WAAW,OAAO,WAAW,QAAQ,OAAO,YAAY,CAAC;AACtE,WAAO;AAAA,EACT;AACF;AAEA,SAAS,eAAe,KAA+C;AACrE,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,OAAO,IACV,MAAM,GAAG,EACT,IAAI,CAAC,UAAU,MAAM,KAAK,CAAC,EAC3B,OAAO,OAAO;AACjB,SAAO,KAAK,SAAS,IAAI,OAAO;AAClC;AAEA,SAAS,aAAa,KAA6C;AACjE,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,QAAM,UAAU,IAAI,KAAK;AACzB,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAEA,SAAS,iBAAiB,KAAiC;AACzD,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,IAAI,OAAO,GAAG;AACpB,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,IAAI,EAAG,QAAO;AACzC,SAAO,KAAK,IAAI,KAAK,MAAM,CAAC,GAAG,EAAE;AACnC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AACpB,UAAQ,OAAO,MAAM,sBAAsB,eAAe,QAAQ,IAAI,QAAQ,OAAO,GAAG,CAAC;AAAA,CAAI;AAC7F,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":["result","output"]}
@@ -1,8 +1,10 @@
1
- import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-CmTjKLyB.js';
2
- import { F as FleetHandle, D as DelegationExecutor, a as DelegateFeedbackArgs, b as DelegationFeedbackSnapshot, c as DelegationProfile, d as DelegateCodeArgs, e as DelegateResearchArgs, f as DelegationStatus, g as DelegationProgress, h as DelegationResultPayload, i as DelegationError, j as DelegationStatusResult, k as DelegationHistoryArgs, l as DelegationHistoryEntry, C as CoderDelegate, R as ResearcherDelegate, m as DelegateCodeResult, n as DelegateFeedbackResult, o as ResearchSource, p as DelegateResearchResult, q as DelegationHistoryResult, r as DelegationStatusArgs, O as OtelExporter } from '../otel-export-DgFMwsVy.js';
3
- export { s as CoderReview, t as CoderReviewer, u as CoderWinnerSelection, v as CreateDefaultCoderDelegateOptions, w as DelegateCodeConfig, x as DelegateResearchConfig, y as DelegateRunCtx, z as FeedbackRating, A as FeedbackRefersTo, B as FleetWorkspaceExecutorOptions, E as ResearchOutputShape, S as SiblingSandboxExecutorOptions, G as createDefaultCoderDelegate, H as createFleetWorkspaceExecutor, I as createSiblingSandboxExecutor, J as mcpToolsForRuntimeMcp, K as mcpToolsForRuntimeMcpSubset } from '../otel-export-DgFMwsVy.js';
1
+ import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-B9O7l-ij.js';
2
+ import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegationStatus, j as DelegationProgress, k as DelegationResultPayload, l as DelegationError, m as DelegationStatusResult, n as DelegationHistoryArgs, o as DelegationHistoryEntry, p as CoderDelegate, R as ResearcherDelegate, q as DelegateCodeResult, r as DelegateFeedbackResult, s as ResearchSource, t as DelegateResearchResult, u as DelegationHistoryResult, v as DelegationStatusArgs } from '../kb-gate-YdPNEagq.js';
3
+ export { w as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, x as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, y as DelegateCodeConfig, z as DelegateResearchConfig, A as DelegateRunCtx, F as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, O as createSiblingSandboxExecutor } from '../kb-gate-YdPNEagq.js';
4
4
  import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
5
5
  export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
6
+ import { O as OtelExporter } from '../otel-export-xgf4J6bo.js';
7
+ export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-xgf4J6bo.js';
6
8
  import '@tangle-network/agent-eval';
7
9
  import '@tangle-network/sandbox';
8
10
  import '../types-CsCCryln.js';
@@ -285,81 +287,6 @@ interface InProcessExecutorDescribePlacement extends LoopSandboxPlacement {
285
287
  */
286
288
  declare function createInProcessExecutor(options: InProcessExecutorOptions): DelegationExecutor;
287
289
 
288
- /**
289
- * @experimental
290
- *
291
- * `createKbGate` — the valid-only knowledge-base growth gate, distilled from
292
- * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
293
- * writer) runs candidate facts through this before persisting, so the KB grows
294
- * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
295
- * vetoed at the gate.
296
- *
297
- * Fail-closed by construction: every judge must `accept`; the FIRST veto wins
298
- * and the fact is rejected. The non-negotiable floor (always on, can't be
299
- * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
300
- * literally appear in its `sourceText`. That single check kills the dominant
301
- * failure mode (a confident claim decoupled from any real source).
302
- *
303
- * Pure + dependency-free: it operates on fact candidates, not on a store, so it
304
- * composes with `@tangle-network/agent-knowledge` or any persistence layer
305
- * without importing it. The remediation policy (correct-on-veto vs
306
- * escalate-as-unverified) is the caller's — this returns the verdict; it never
307
- * drops a fact silently.
308
- */
309
- /** @experimental A fact proposed for the KB, with its grounding. */
310
- interface FactCandidate {
311
- /** The atomic claim text. */
312
- claim: string;
313
- /** Optional extracted value (number or string) the claim asserts. */
314
- value?: string | number;
315
- /** Verbatim span lifted from the source that backs the claim. */
316
- verbatimPassage: string;
317
- /** The raw source text the passage must be grounded in. */
318
- sourceText: string;
319
- /** Where the fact claims to come from — checked for circular/self citations. */
320
- citation?: string;
321
- }
322
- /** @experimental */
323
- interface FactJudgeVerdict {
324
- accept: boolean;
325
- reason?: string;
326
- }
327
- /** @experimental A pluggable fact validator. Throw is NOT allowed — return a
328
- * verdict; a thrown judge is a programmer error, not a veto. */
329
- interface FactJudge {
330
- name: string;
331
- judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
332
- }
333
- /** @experimental */
334
- interface KbGateResult {
335
- accepted: boolean;
336
- /** Name of the judge that vetoed; undefined when accepted. */
337
- vetoedBy?: string;
338
- reason?: string;
339
- }
340
- /** @experimental */
341
- interface CreateKbGateOptions {
342
- /** Extra judges appended after the built-in floor (e.g. an LLM judge). */
343
- judges?: FactJudge[];
344
- /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
345
- minPassageChars?: number;
346
- /**
347
- * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
348
- * `'cad_params'`, `'requirements'`). A citation naming one is circular
349
- * (laundering) — the fact cites a derived artifact, not a real source.
350
- * Default `[]` (no circular check unless the consumer declares its kinds).
351
- */
352
- selfArtifactKinds?: string[];
353
- }
354
- /**
355
- * @experimental
356
- *
357
- * Build a fail-closed KB gate. The returned function runs the built-in floor
358
- * (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
359
- * then any consumer judges, returning on the first veto.
360
- */
361
- declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
362
-
363
290
  /**
364
291
  * @experimental
365
292
  *
@@ -947,4 +874,4 @@ declare function createPropagatingTraceEmitter(ctx: TraceContext): {
947
874
  */
948
875
  declare function traceContextToEnv(ctx: TraceContext): Record<string, string>;
949
876
 
950
- export { CoderDelegate, type CreateKbGateOptions, type CreateWorktreeOptions, DELEGATE_CODE_DESCRIPTION, DELEGATE_CODE_INPUT_SCHEMA, DELEGATE_CODE_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_RESEARCH_DESCRIPTION, DELEGATE_RESEARCH_INPUT_SCHEMA, DELEGATE_RESEARCH_TOOL_NAME, DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, DELEGATION_HISTORY_TOOL_NAME, DELEGATION_STATUS_DESCRIPTION, DELEGATION_STATUS_INPUT_SCHEMA, DELEGATION_STATUS_TOOL_NAME, DelegateCodeArgs, DelegateCodeResult, DelegateFeedbackArgs, DelegateFeedbackResult, DelegateResearchArgs, DelegateResearchResult, DelegationError, DelegationExecutor, DelegationFeedbackSnapshot, DelegationHistoryArgs, DelegationHistoryEntry, DelegationHistoryResult, DelegationProfile, DelegationProgress, type DelegationRecord, DelegationResultPayload, DelegationStatus, DelegationStatusArgs, DelegationStatusResult, DelegationTaskQueue, type DelegationTaskQueueOptions, type DetectExecutorArgs, type DiffOptions, type DiffResult, type FactCandidate, type FactJudge, type FactJudgeVerdict, type FeedbackEvent, type FeedbackStore, FleetHandle, type GitRunner, InMemoryFeedbackStore, type InProcessExecutorDescribePlacement, type InProcessExecutorOptions, type JsonRpcMessage, type JsonRpcResponse, type KbGateResult, LocalHarness, type McpServer, type McpServerOptions, type McpToolDescriptor, type McpTransport, type RemoveWorktreeOptions, ResearchSource, ResearcherDelegate, type SubmitInput, type SubmitOutput, type TraceContext, type WorktreeHandle, captureWorktreeDiff, createDelegateCodeHandler, createDelegateFeedbackHandler, createDelegateResearchHandler, createDelegationHistoryHandler, createDelegationStatusHandler, createInProcessExecutor, createInProcessTransport, createKbGate, createMcpServer, createPropagatingTraceEmitter, createWorktree, detectExecutor, eventToSnapshot, hashIdempotencyInput, readTraceContextFromEnv, removeWorktree, runLocalHarness, traceContextToEnv, validateDelegateCodeArgs, validateDelegateFeedbackArgs, validateDelegateResearchArgs, validateDelegationHistoryArgs, validateDelegationStatusArgs };
877
+ export { CoderDelegate, type CreateWorktreeOptions, DELEGATE_CODE_DESCRIPTION, DELEGATE_CODE_INPUT_SCHEMA, DELEGATE_CODE_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_RESEARCH_DESCRIPTION, DELEGATE_RESEARCH_INPUT_SCHEMA, DELEGATE_RESEARCH_TOOL_NAME, DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, DELEGATION_HISTORY_TOOL_NAME, DELEGATION_STATUS_DESCRIPTION, DELEGATION_STATUS_INPUT_SCHEMA, DELEGATION_STATUS_TOOL_NAME, DelegateCodeArgs, DelegateCodeResult, DelegateFeedbackArgs, DelegateFeedbackResult, DelegateResearchArgs, DelegateResearchResult, DelegationError, DelegationExecutor, DelegationFeedbackSnapshot, DelegationHistoryArgs, DelegationHistoryEntry, DelegationHistoryResult, DelegationProfile, DelegationProgress, type DelegationRecord, DelegationResultPayload, DelegationStatus, DelegationStatusArgs, DelegationStatusResult, DelegationTaskQueue, type DelegationTaskQueueOptions, type DetectExecutorArgs, type DiffOptions, type DiffResult, type FeedbackEvent, type FeedbackStore, FleetHandle, type GitRunner, InMemoryFeedbackStore, type InProcessExecutorDescribePlacement, type InProcessExecutorOptions, type JsonRpcMessage, type JsonRpcResponse, LocalHarness, type McpServer, type McpServerOptions, type McpToolDescriptor, type McpTransport, type RemoveWorktreeOptions, ResearchSource, ResearcherDelegate, type SubmitInput, type SubmitOutput, type TraceContext, type WorktreeHandle, captureWorktreeDiff, createDelegateCodeHandler, createDelegateFeedbackHandler, createDelegateResearchHandler, createDelegationHistoryHandler, createDelegationStatusHandler, createInProcessExecutor, createInProcessTransport, createMcpServer, createPropagatingTraceEmitter, createWorktree, detectExecutor, eventToSnapshot, hashIdempotencyInput, readTraceContextFromEnv, removeWorktree, runLocalHarness, traceContextToEnv, validateDelegateCodeArgs, validateDelegateFeedbackArgs, validateDelegateResearchArgs, validateDelegationHistoryArgs, validateDelegationStatusArgs };
package/dist/mcp/index.js CHANGED
@@ -6,13 +6,13 @@ import {
6
6
  createWorktree,
7
7
  detectExecutor,
8
8
  removeWorktree
9
- } from "../chunk-M65QJD35.js";
9
+ } from "../chunk-PK5DYSNO.js";
10
10
  import {
11
11
  buildLoopOtelSpans,
12
12
  createOtelExporter,
13
13
  mcpToolsForRuntimeMcp,
14
14
  mcpToolsForRuntimeMcpSubset
15
- } from "../chunk-T3GJBKHA.js";
15
+ } from "../chunk-7ZECSZ3C.js";
16
16
  import {
17
17
  DELEGATE_CODE_DESCRIPTION,
18
18
  DELEGATE_CODE_INPUT_SCHEMA,
@@ -31,14 +31,11 @@ import {
31
31
  DELEGATION_STATUS_TOOL_NAME,
32
32
  DelegationTaskQueue,
33
33
  InMemoryFeedbackStore,
34
- createDefaultCoderDelegate,
35
34
  createDelegateCodeHandler,
36
35
  createDelegateFeedbackHandler,
37
36
  createDelegateResearchHandler,
38
37
  createDelegationHistoryHandler,
39
38
  createDelegationStatusHandler,
40
- createFleetWorkspaceExecutor,
41
- createSiblingSandboxExecutor,
42
39
  eventToSnapshot,
43
40
  hashIdempotencyInput,
44
41
  validateDelegateCodeArgs,
@@ -46,7 +43,15 @@ import {
46
43
  validateDelegateResearchArgs,
47
44
  validateDelegationHistoryArgs,
48
45
  validateDelegationStatusArgs
49
- } from "../chunk-V6GURW4W.js";
46
+ } from "../chunk-HSX6PFZR.js";
47
+ import {
48
+ createKbGate
49
+ } from "../chunk-FNMGYYSS.js";
50
+ import {
51
+ createDefaultCoderDelegate,
52
+ createFleetWorkspaceExecutor,
53
+ createSiblingSandboxExecutor
54
+ } from "../chunk-VLXRXMTF.js";
50
55
  import {
51
56
  runLocalHarness
52
57
  } from "../chunk-GLR25NG7.js";
@@ -56,62 +61,6 @@ import "../chunk-PY6NMZYX.js";
56
61
  import "../chunk-SQSCRJ7U.js";
57
62
  import "../chunk-DGUM43GV.js";
58
63
 
59
- // src/mcp/kb-gate.ts
60
- var norm = (s) => s.toLowerCase().replace(/\s+/g, " ").trim();
61
- function valueAppears(value, passageNorm) {
62
- if (passageNorm.includes(norm(String(value)))) return true;
63
- if (typeof value !== "number" || !Number.isFinite(value)) return false;
64
- const forms = [value.toLocaleString("en-US")];
65
- if (Math.abs(value) >= 1e9) forms.push(`${trimZero(value / 1e9)} billion`);
66
- if (Math.abs(value) >= 1e6) forms.push(`${trimZero(value / 1e6)} million`);
67
- return forms.some((f) => passageNorm.includes(norm(f)));
68
- }
69
- function trimZero(n) {
70
- return Number.isInteger(n) ? String(n) : String(Number(n.toFixed(2)));
71
- }
72
- function builtinJudges(minPassageChars, selfArtifactKinds) {
73
- const kinds = selfArtifactKinds.map((k) => k.toLowerCase());
74
- return [
75
- {
76
- name: "passage-non-empty",
77
- judge: (c) => c.verbatimPassage.trim().length >= minPassageChars ? { accept: true } : { accept: false, reason: `passage shorter than ${minPassageChars} chars` }
78
- },
79
- {
80
- // THE anti-hallucination floor — the passage must literally be in the source.
81
- name: "passage-present",
82
- judge: (c) => norm(c.sourceText).includes(norm(c.verbatimPassage)) ? { accept: true } : { accept: false, reason: "verbatim passage not found in source (unbacked fact)" }
83
- },
84
- {
85
- name: "value-in-passage",
86
- judge: (c) => c.value === void 0 || valueAppears(c.value, norm(c.verbatimPassage)) ? { accept: true } : { accept: false, reason: `value ${JSON.stringify(c.value)} not present in passage` }
87
- },
88
- {
89
- name: "no-circular-citation",
90
- judge: (c) => {
91
- if (!c.citation || kinds.length === 0) return { accept: true };
92
- const cite = c.citation.toLowerCase();
93
- const hit = kinds.find((k) => cite.includes(k));
94
- return hit ? { accept: false, reason: `circular citation to self-generated artifact "${hit}"` } : { accept: true };
95
- }
96
- }
97
- ];
98
- }
99
- function createKbGate(options = {}) {
100
- const judges = [
101
- ...builtinJudges(options.minPassageChars ?? 12, options.selfArtifactKinds ?? []),
102
- ...options.judges ?? []
103
- ];
104
- return async (candidate) => {
105
- for (const j of judges) {
106
- const verdict = await j.judge(candidate);
107
- if (!verdict.accept) {
108
- return { accepted: false, vetoedBy: j.name, reason: verdict.reason };
109
- }
110
- }
111
- return { accepted: true };
112
- };
113
- }
114
-
115
64
  // src/mcp/trace-propagation.ts
116
65
  function readTraceContextFromEnv() {
117
66
  const traceId = process.env.TRACE_ID || generateTraceId();
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/mcp/kb-gate.ts","../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `createKbGate` — the valid-only knowledge-base growth gate, distilled from\n * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB\n * writer) runs candidate facts through this before persisting, so the KB grows\n * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are\n * vetoed at the gate.\n *\n * Fail-closed by construction: every judge must `accept`; the FIRST veto wins\n * and the fact is rejected. The non-negotiable floor (always on, can't be\n * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST\n * literally appear in its `sourceText`. That single check kills the dominant\n * failure mode (a confident claim decoupled from any real source).\n *\n * Pure + dependency-free: it operates on fact candidates, not on a store, so it\n * composes with `@tangle-network/agent-knowledge` or any persistence layer\n * without importing it. The remediation policy (correct-on-veto vs\n * escalate-as-unverified) is the caller's — this returns the verdict; it never\n * drops a fact silently.\n */\n\n/** @experimental A fact proposed for the KB, with its grounding. */\nexport interface FactCandidate {\n /** The atomic claim text. */\n claim: string\n /** Optional extracted value (number or string) the claim asserts. */\n value?: string | number\n /** Verbatim span lifted from the source that backs the claim. */\n verbatimPassage: string\n /** The raw source text the passage must be grounded in. */\n sourceText: string\n /** Where the fact claims to come from — checked for circular/self citations. */\n citation?: string\n}\n\n/** @experimental */\nexport interface FactJudgeVerdict {\n accept: boolean\n reason?: string\n}\n\n/** @experimental A pluggable fact validator. Throw is NOT allowed — return a\n * verdict; a thrown judge is a programmer error, not a veto. */\nexport interface FactJudge {\n name: string\n judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>\n}\n\n/** @experimental */\nexport interface KbGateResult {\n accepted: boolean\n /** Name of the judge that vetoed; undefined when accepted. */\n vetoedBy?: string\n reason?: string\n}\n\n/** @experimental */\nexport interface CreateKbGateOptions {\n /** Extra judges appended after the built-in floor (e.g. an LLM judge). */\n judges?: FactJudge[]\n /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */\n minPassageChars?: number\n /**\n * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,\n * `'cad_params'`, `'requirements'`). A citation naming one is circular\n * (laundering) — the fact cites a derived artifact, not a real source.\n * Default `[]` (no circular check unless the consumer declares its kinds).\n */\n selfArtifactKinds?: string[]\n}\n\nconst norm = (s: string): string => s.toLowerCase().replace(/\\s+/g, ' ').trim()\n\n/** Does `value` appear in the (normalized) passage — literally, comma-grouped,\n * or in billion/million shorthand (the forms a source actually writes). */\nfunction valueAppears(value: string | number, passageNorm: string): boolean {\n if (passageNorm.includes(norm(String(value)))) return true\n if (typeof value !== 'number' || !Number.isFinite(value)) return false\n const forms = [value.toLocaleString('en-US')]\n if (Math.abs(value) >= 1e9) forms.push(`${trimZero(value / 1e9)} billion`)\n if (Math.abs(value) >= 1e6) forms.push(`${trimZero(value / 1e6)} million`)\n return forms.some((f) => passageNorm.includes(norm(f)))\n}\n\nfunction trimZero(n: number): string {\n return Number.isInteger(n) ? String(n) : String(Number(n.toFixed(2)))\n}\n\n/** The always-on floor judges. Order matters: cheapest / most-fundamental first. */\nfunction builtinJudges(minPassageChars: number, selfArtifactKinds: string[]): FactJudge[] {\n const kinds = selfArtifactKinds.map((k) => k.toLowerCase())\n return [\n {\n name: 'passage-non-empty',\n judge: (c) =>\n c.verbatimPassage.trim().length >= minPassageChars\n ? { accept: true }\n : { accept: false, reason: `passage shorter than ${minPassageChars} chars` },\n },\n {\n // THE anti-hallucination floor — the passage must literally be in the source.\n name: 'passage-present',\n judge: (c) =>\n norm(c.sourceText).includes(norm(c.verbatimPassage))\n ? { accept: true }\n : { accept: false, reason: 'verbatim passage not found in source (unbacked fact)' },\n },\n {\n name: 'value-in-passage',\n judge: (c) =>\n c.value === undefined || valueAppears(c.value, norm(c.verbatimPassage))\n ? { accept: true }\n : { accept: false, reason: `value ${JSON.stringify(c.value)} not present in passage` },\n },\n {\n name: 'no-circular-citation',\n judge: (c) => {\n if (!c.citation || kinds.length === 0) return { accept: true }\n const cite = c.citation.toLowerCase()\n const hit = kinds.find((k) => cite.includes(k))\n return hit\n ? { accept: false, reason: `circular citation to self-generated artifact \"${hit}\"` }\n : { accept: true }\n },\n },\n ]\n}\n\n/**\n * @experimental\n *\n * Build a fail-closed KB gate. The returned function runs the built-in floor\n * (passage-non-empty → passage-present → value-in-passage → no-circular-citation)\n * then any consumer judges, returning on the first veto.\n */\nexport function createKbGate(\n options: CreateKbGateOptions = {},\n): (candidate: FactCandidate) => Promise<KbGateResult> {\n const judges = [\n ...builtinJudges(options.minPassageChars ?? 12, options.selfArtifactKinds ?? []),\n ...(options.judges ?? []),\n ]\n return async (candidate) => {\n for (const j of judges) {\n const verdict = await j.judge(candidate)\n if (!verdict.accept) {\n return { accepted: false, vetoedBy: j.name, reason: verdict.reason }\n }\n }\n return { accepted: true }\n }\n}\n","/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwEA,IAAM,OAAO,CAAC,MAAsB,EAAE,YAAY,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AAI9E,SAAS,aAAa,OAAwB,aAA8B;AAC1E,MAAI,YAAY,SAAS,KAAK,OAAO,KAAK,CAAC,CAAC,EAAG,QAAO;AACtD,MAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,EAAG,QAAO;AACjE,QAAM,QAAQ,CAAC,MAAM,eAAe,OAAO,CAAC;AAC5C,MAAI,KAAK,IAAI,KAAK,KAAK,IAAK,OAAM,KAAK,GAAG,SAAS,QAAQ,GAAG,CAAC,UAAU;AACzE,MAAI,KAAK,IAAI,KAAK,KAAK,IAAK,OAAM,KAAK,GAAG,SAAS,QAAQ,GAAG,CAAC,UAAU;AACzE,SAAO,MAAM,KAAK,CAAC,MAAM,YAAY,SAAS,KAAK,CAAC,CAAC,CAAC;AACxD;AAEA,SAAS,SAAS,GAAmB;AACnC,SAAO,OAAO,UAAU,CAAC,IAAI,OAAO,CAAC,IAAI,OAAO,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;AACtE;AAGA,SAAS,cAAc,iBAAyB,mBAA0C;AACxF,QAAM,QAAQ,kBAAkB,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC;AAC1D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,EAAE,gBAAgB,KAAK,EAAE,UAAU,kBAC/B,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,wBAAwB,eAAe,SAAS;AAAA,IACjF;AAAA,IACA;AAAA;AAAA,MAEE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,KAAK,EAAE,UAAU,EAAE,SAAS,KAAK,EAAE,eAAe,CAAC,IAC/C,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,uDAAuD;AAAA,IACxF;AAAA,IACA;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,EAAE,UAAU,UAAa,aAAa,EAAE,OAAO,KAAK,EAAE,eAAe,CAAC,IAClE,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,SAAS,KAAK,UAAU,EAAE,KAAK,CAAC,0BAA0B;AAAA,IAC3F;AAAA,IACA;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MAAM;AACZ,YAAI,CAAC,EAAE,YAAY,MAAM,WAAW,EAAG,QAAO,EAAE,QAAQ,KAAK;AAC7D,cAAM,OAAO,EAAE,SAAS,YAAY;AACpC,cAAM,MAAM,MAAM,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;AAC9C,eAAO,MACH,EAAE,QAAQ,OAAO,QAAQ,iDAAiD,GAAG,IAAI,IACjF,EAAE,QAAQ,KAAK;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AACF;AASO,SAAS,aACd,UAA+B,CAAC,GACqB;AACrD,QAAM,SAAS;AAAA,IACb,GAAG,cAAc,QAAQ,mBAAmB,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AAAA,IAC/E,GAAI,QAAQ,UAAU,CAAC;AAAA,EACzB;AACA,SAAO,OAAO,cAAc;AAC1B,eAAW,KAAK,QAAQ;AACtB,YAAM,UAAU,MAAM,EAAE,MAAM,SAAS;AACvC,UAAI,CAAC,QAAQ,QAAQ;AACnB,eAAO,EAAE,UAAU,OAAO,UAAU,EAAE,MAAM,QAAQ,QAAQ,OAAO;AAAA,MACrE;AAAA,IACF;AACA,WAAO,EAAE,UAAU,KAAK;AAAA,EAC1B;AACF;;;ACtHO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
1
+ {"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkCO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
@@ -0,0 +1,129 @@
1
+ import { LlmClientOptions } from '@tangle-network/agent-eval';
2
+ import { Scenario, DispatchContext, JudgeConfig, ImprovementDriver, Gate, CampaignStorage, GateResult, RunImprovementLoopResult } from '@tangle-network/agent-eval/campaign';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * `optimizePrompt` — identity-gated optimization for any TEXT prompt surface
8
+ * (system prompt, planner prompt, judge rubric, skill doc).
9
+ *
10
+ * The text-surface sibling to this module's `improvementDriver` (the
11
+ * CODE-surface / worktree path). Both feed agent-eval's `runImprovementLoop`;
12
+ * this one defaults the driver to agent-eval's `gepaDriver` (reflective text
13
+ * mutator) and the gate to `heldOutGate`.
14
+ *
15
+ * IDENTITY-GATED BY CONSTRUCTION — the whole point. The loop runs evals,
16
+ * collects per-scenario signal, proposes candidates, and the gate compares
17
+ * candidate-vs-baseline ON THE HELDOUT. `result.prompt` is the baseline
18
+ * (identity) UNLESS the gate decided `'ship'`. So wiring a surface up is safe:
19
+ * a surface with no beneficial mutation simply keeps its baseline. You never
20
+ * regress by registering a prompt — you only ever improve when the held-out
21
+ * data earns it.
22
+ *
23
+ * Generic over the runtime: `runWithPrompt` is the only domain seam — given a
24
+ * candidate prompt + scenario, run it however the surface runs (sandbox
25
+ * `streamPrompt`, a `runLoop`, a direct model call) and return the artifact the
26
+ * judges score. The optimizer never assumes how a prompt is executed.
27
+ */
28
+
29
+ /** Reflection config for the default `gepaDriver`. Omit when passing a custom
30
+ * `driver`. */
31
+ interface OptimizePromptReflection {
32
+ /** Router transport for the reflection model. */
33
+ llm: LlmClientOptions;
34
+ /** Model that performs the reflective rewrite. */
35
+ model: string;
36
+ /** What is being optimized — orients the reflection prompt. Default
37
+ * `'system prompt'`. */
38
+ target?: string;
39
+ /** Surface-specific mutation levers offered to the reflector. */
40
+ mutationPrimitives?: string[];
41
+ /** H2 (`## Foo`) headings that MUST survive every candidate. gepaDriver's
42
+ * only structural guard — load-bearing sections of the prompt should be
43
+ * `##` headings so a rewrite cannot drop them. */
44
+ preserveSections?: string[];
45
+ /** Max sentence-level edits per candidate vs the parent (a textual learning
46
+ * rate). Caps a rewrite from wiping prior rules in one generation. */
47
+ maxSentenceEdits?: number;
48
+ }
49
+ /** @experimental */
50
+ interface OptimizePromptOptions<TScenario extends Scenario, TArtifact> {
51
+ /** The prompt being optimized — the identity baseline the gate protects. */
52
+ baselinePrompt: string;
53
+ /** Domain seam: run a candidate prompt against a scenario → artifact the
54
+ * judges score. The optimizer is agnostic to HOW the prompt runs. */
55
+ runWithPrompt: (prompt: string, scenario: TScenario, ctx: DispatchContext) => Promise<TArtifact>;
56
+ /** Training pool — scored each generation to rank candidates. */
57
+ scenarios: TScenario[];
58
+ /** Held out of training — scored ONLY for the gate's baseline-vs-winner
59
+ * delta. Disjoint from `scenarios`; this is what makes promotion measure
60
+ * generalization, not memorization. */
61
+ holdoutScenarios: TScenario[];
62
+ /** Scorers — deterministic checks or LLM judges. */
63
+ judges: JudgeConfig<TArtifact, TScenario>[];
64
+ /** Where artifacts + traces land (opaque key under in-memory storage). */
65
+ runDir: string;
66
+ /** Default driver = `gepaDriver` built from this. Required UNLESS `driver`
67
+ * is supplied. */
68
+ reflection?: OptimizePromptReflection;
69
+ /** Override the improvement strategy (custom driver / deterministic tests). */
70
+ driver?: ImprovementDriver;
71
+ /** Override the promotion gate. Default `heldOutGate` over `holdoutScenarios`
72
+ * — zero extra LLM. Wrap `defaultProductionGate` for red-team/reward-hacking
73
+ * hardening on production wiring. */
74
+ gate?: Gate<TArtifact, TScenario>;
75
+ /** Minimum held-out composite lift to ship, forwarded to the default
76
+ * `heldOutGate`. When omitted the gate uses its own default. */
77
+ deltaThreshold?: number;
78
+ /** Candidates proposed per generation. Default 4. */
79
+ populationSize?: number;
80
+ /** Generations to run. Default 3. */
81
+ maxGenerations?: number;
82
+ /** Candidates carried to the next generation. Default 2. */
83
+ promoteTopK?: number;
84
+ /** Storage backend. Pass `inMemoryCampaignStorage()` for filesystem-less /
85
+ * test runs. Default: Node filesystem. */
86
+ storage?: CampaignStorage;
87
+ /** Reproducibility seed. Default 42. */
88
+ seed?: number;
89
+ /** Per-scenario replicates for CI bands. Default 1. */
90
+ reps?: number;
91
+ /** Max concurrent cells. Default 2. */
92
+ maxConcurrency?: number;
93
+ /** Test seam — override the wall clock. */
94
+ now?: () => Date;
95
+ /** On a shipped gate: `'pr'` opens a PR, `'none'` just reports. Default
96
+ * `'none'`. */
97
+ autoOnPromote?: 'pr' | 'none';
98
+ ghOwner?: string;
99
+ ghRepo?: string;
100
+ }
101
+ /** @experimental */
102
+ interface OptimizePromptResult<TArtifact, TScenario extends Scenario> {
103
+ /** The prompt to USE. Identity (the baseline) unless the gate shipped a
104
+ * winner — so a caller can always assign `result.prompt` unconditionally. */
105
+ prompt: string;
106
+ /** True only when the gate promoted a candidate over baseline on holdout. */
107
+ improved: boolean;
108
+ /** The gate's verdict (`'ship' | 'hold' | 'need_more_work' | ...`). */
109
+ decision: GateResult['decision'];
110
+ /** Human-readable reasons the gate gave. */
111
+ reasons: string[];
112
+ /** Mean held-out composite of the baseline. */
113
+ baselineComposite: number;
114
+ /** Mean held-out composite of the winner candidate. */
115
+ winnerComposite: number;
116
+ /** Held-out lift (winner − baseline); the gate's `delta` when it reported one. */
117
+ delta: number;
118
+ /** Why the winner was proposed — present when a shipped winner carried a
119
+ * driver rationale. */
120
+ rationale?: string;
121
+ /** Unified baseline→winner diff (empty when the winner is the baseline). */
122
+ diff: string;
123
+ /** The full loop result for callers that need generations / campaigns. */
124
+ raw: RunImprovementLoopResult<TArtifact, TScenario>;
125
+ }
126
+ /** @experimental */
127
+ declare function optimizePrompt<TScenario extends Scenario, TArtifact>(opts: OptimizePromptOptions<TScenario, TArtifact>): Promise<OptimizePromptResult<TArtifact, TScenario>>;
128
+
129
+ export { type OptimizePromptOptions as O, type OptimizePromptResult as a, type OptimizePromptReflection as b, optimizePrompt as o };