npm - @tangle-network/agent-eval - Versions diffs - 0.65.0 → 0.67.0 - Mend

@tangle-network/agent-eval 0.65.0 → 0.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/CHANGELOG.md +25 -0
package/dist/adapters/otel.d.ts +1 -1
package/dist/campaign/index.d.ts +110 -6
package/dist/campaign/index.js +26 -19
package/dist/campaign/index.js.map +1 -1
package/dist/{chunk-7TPYV2ER.js → chunk-6XQIEUQ2.js} +140 -7
package/dist/chunk-6XQIEUQ2.js.map +1 -0
package/dist/{chunk-HKINEDRZ.js → chunk-DFS3FEXO.js} +3 -2
package/dist/chunk-DFS3FEXO.js.map +1 -0
package/dist/chunk-MZ2IYGGN.js +592 -0
package/dist/chunk-MZ2IYGGN.js.map +1 -0
package/dist/{chunk-4ODZXQV2.js → chunk-NV2PF37Q.js} +645 -2
package/dist/chunk-NV2PF37Q.js.map +1 -0
package/dist/contract/index.d.ts +11 -9
package/dist/contract/index.js +11 -12
package/dist/contract/index.js.map +1 -1
package/dist/hosted/index.d.ts +1 -1
package/dist/hosted/index.js +1 -1
package/dist/{index-CzhtwYBT.d.ts → index-DSEHMwvS.d.ts} +4 -2
package/dist/index.d.ts +251 -7
package/dist/index.js +292 -2
package/dist/index.js.map +1 -1
package/dist/openapi.json +1 -1
package/dist/provenance-CChUqexv.d.ts +314 -0
package/dist/{registry-DPly4_hZ.d.ts → registry-BGKyX6bw.d.ts} +2 -2
package/dist/release-report-CN8hJlhk.d.ts +233 -0
package/dist/reporting.d.ts +4 -3
package/dist/{run-campaign-5J3ED2UJ.js → run-campaign-BVY3RGAZ.js} +2 -3
package/dist/{provenance-lqyLpOYR.d.ts → run-improvement-loop-BKpM5T4t.d.ts} +51 -329
package/dist/statistics-B7yCbi9i.d.ts +253 -0
package/dist/{types-DhqpAi_z.d.ts → types-Croy5h7V.d.ts} +1 -1
package/package.json +1 -1
package/dist/chunk-4ODZXQV2.js.map +0 -1
package/dist/chunk-7TPYV2ER.js.map +0 -1
package/dist/chunk-CZRKD2X2.js +0 -1104
package/dist/chunk-CZRKD2X2.js.map +0 -1
package/dist/chunk-E22YUOAL.js +0 -111
package/dist/chunk-E22YUOAL.js.map +0 -1
package/dist/chunk-HKINEDRZ.js.map +0 -1
package/dist/release-report-DGoeObZT.d.ts +0 -484
/package/dist/{run-campaign-5J3ED2UJ.js.map → run-campaign-BVY3RGAZ.js.map} +0 -0

package/dist/{chunk-7TPYV2ER.js → chunk-6XQIEUQ2.js} RENAMED Viewed

@@ -1,14 +1,115 @@
-import {
-  BackendIntegrityError
-} from "./chunk-E22YUOAL.js";
 import {
   confidenceInterval
 } from "./chunk-ITBRCT73.js";
+import {
+  AgentEvalError
+} from "./chunk-3BFEG2F6.js";
 // src/campaign/run-campaign.ts
 import { createHash } from "crypto";
 import { join } from "path";
+// src/integrity/backend-integrity.ts
+var BackendIntegrityError = class extends AgentEvalError {
+  constructor(message, report) {
+    super("backend_integrity", message);
+    this.report = report;
+  }
+  report;
+};
+function isStubRecord(rec) {
+  return rec.tokenUsage.input === 0 && rec.tokenUsage.output === 0;
+}
+function isUncostedRecord(rec) {
+  return rec.tokenUsage.output > 0 && rec.costUsd === 0;
+}
+function summarizeBackendIntegrity(records) {
+  const totalRecords = records.length;
+  let stubRecords = 0;
+  let realRecords = 0;
+  let uncostedRecords = 0;
+  let totalInputTokens = 0;
+  let totalOutputTokens = 0;
+  let totalCostUsd = 0;
+  for (const rec of records) {
+    totalInputTokens += rec.tokenUsage.input;
+    totalOutputTokens += rec.tokenUsage.output;
+    totalCostUsd += rec.costUsd;
+    if (isStubRecord(rec)) stubRecords++;
+    else realRecords++;
+    if (isUncostedRecord(rec)) uncostedRecords++;
+  }
+  const verdict = totalRecords === 0 ? "stub" : stubRecords === totalRecords ? "stub" : stubRecords === 0 ? "real" : "mixed";
+  const diagnosis = buildDiagnosis({
+    totalRecords,
+    stubRecords,
+    realRecords,
+    uncostedRecords,
+    totalInputTokens,
+    totalOutputTokens,
+    totalCostUsd,
+    verdict
+  });
+  return {
+    totalRecords,
+    stubRecords,
+    realRecords,
+    uncostedRecords,
+    totalInputTokens,
+    totalOutputTokens,
+    totalCostUsd,
+    verdict,
+    diagnosis
+  };
+}
+function buildDiagnosis(r) {
+  if (r.totalRecords === 0) {
+    return "no records \u2014 eval produced zero runs; backend likely failed before first turn";
+  }
+  if (r.verdict === "stub") {
+    return [
+      `all ${r.totalRecords} records have zero token usage \u2014 the LLM backend was never called.`,
+      "common causes: --backend sandbox without a sandbox bridge running; stub model returning hard-coded strings;",
+      "auth misconfigured so requests were silently dropped before the LLM. Re-run with --backend tcloud and TANGLE_API_KEY set,",
+      "or boot the cli-bridge / sandbox before invoking the eval."
+    ].join(" ");
+  }
+  if (r.verdict === "mixed") {
+    const pct = (r.stubRecords / r.totalRecords * 100).toFixed(0);
+    return [
+      `${r.stubRecords}/${r.totalRecords} records (${pct}%) have zero token usage \u2014 the backend partially failed.`,
+      "common causes: rate-limit cascade (429s after the first N personas);",
+      "transient auth expiry mid-run; provider outage. Treat the affected records as missing data, not agent failures."
+    ].join(" ");
+  }
+  if (r.uncostedRecords > 0) {
+    const pct = (r.uncostedRecords / r.totalRecords * 100).toFixed(0);
+    return [
+      `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,
+      `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0 \u2014 cost ledger is mis-wired (no input-token`,
+      "propagation from the runtime stream into RunRecord)."
+    ].join(" ");
+  }
+  return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`;
+}
+function assertRealBackend(records, opts = {}) {
+  const report = summarizeBackendIntegrity(records);
+  const allowMixed = opts.allowMixed ?? true;
+  if (report.verdict === "stub") {
+    throw new BackendIntegrityError(
+      `backend-integrity: ran against a stub or unconfigured backend \u2014 ${report.diagnosis}`,
+      report
+    );
+  }
+  if (!allowMixed && report.verdict === "mixed") {
+    throw new BackendIntegrityError(
+      `backend-integrity: partial backend failure rejected \u2014 ${report.diagnosis}`,
+      report
+    );
+  }
+  return report;
+}
 // src/campaign/storage.ts
 import { createRequire } from "module";
 function fsCampaignStorage() {
@@ -111,7 +212,8 @@ async function runCampaign(opts) {
             now,
             storage,
             buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),
-            signal: abortController.signal
+            signal: abortController.signal,
+            dispatchTimeoutMs: opts.dispatchTimeoutMs
           });
           cellsRef.push(result.cell);
           enforceCellUsage(result.cell, opts.expectUsage ?? "warn");
@@ -213,11 +315,15 @@ async function executeCell(args) {
     scenario: args.slot.scenario,
     rep: args.slot.rep
   });
+  const cellAbort = new AbortController();
+  const onCampaignAbort = () => cellAbort.abort(args.signal.reason);
+  if (args.signal.aborted) cellAbort.abort(args.signal.reason);
+  else args.signal.addEventListener("abort", onCampaignAbort, { once: true });
   const ctx = {
     cellId: args.slot.cellId,
     rep: args.slot.rep,
     seed: args.slot.cellSeed,
-    signal: args.signal,
+    signal: cellAbort.signal,
     trace,
     artifacts,
     cost,
@@ -225,10 +331,34 @@ async function executeCell(args) {
   };
   let artifact;
   let errorMessage;
+  const timeoutMs = args.dispatchTimeoutMs;
+  let timeoutTimer;
   try {
-    artifact = await args.opts.dispatch(args.slot.scenario, ctx);
+    const dispatched = args.opts.dispatch(args.slot.scenario, ctx);
+    if (timeoutMs !== void 0 && timeoutMs > 0) {
+      artifact = await Promise.race([
+        dispatched,
+        new Promise((_, reject) => {
+          timeoutTimer = setTimeout(() => {
+            cellAbort.abort(new Error("dispatch timeout"));
+            reject(
+              new Error(
+                `dispatch exceeded ${timeoutMs}ms for cell '${args.slot.cellId}' \u2014 aborted and failed loud (no silent hang)`
+              )
+            );
+          }, timeoutMs);
+          if (typeof timeoutTimer.unref === "function")
+            timeoutTimer.unref();
+        })
+      ]);
+    } else {
+      artifact = await dispatched;
+    }
   } catch (err) {
     errorMessage = err instanceof Error ? err.message : String(err);
+  } finally {
+    if (timeoutTimer) clearTimeout(timeoutTimer);
+    args.signal.removeEventListener("abort", onCampaignAbort);
   }
   const judgeScores = {};
   if (artifact !== void 0) {
@@ -396,8 +526,11 @@ function aggregate(samples, seed) {
 }
 export {
+  BackendIntegrityError,
+  summarizeBackendIntegrity,
+  assertRealBackend,
   fsCampaignStorage,
   inMemoryCampaignStorage,
   runCampaign
 };
-//# sourceMappingURL=chunk-7TPYV2ER.js.map
+//# sourceMappingURL=chunk-6XQIEUQ2.js.map

package/dist/chunk-6XQIEUQ2.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/campaign/run-campaign.ts","../src/integrity/backend-integrity.ts","../src/campaign/storage.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates\n * scenarios → dispatch → artifacts → judges → aggregates, with full\n * reproducibility (seed + manifest hash), cell-level resumability, bootstrap\n * CIs, and the `LabeledScenarioStore` capture flywheel.\n *\n * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this\n * primitive but live in `presets/run-improvement-loop.ts`. This file keeps\n * the core orchestrator minimal — Phase 1 of the Pass A track.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport { BackendIntegrityError, type BackendIntegrityReport } from '../integrity/backend-integrity'\nimport { confidenceInterval } from '../statistics'\nimport { type CampaignStorage, fsCampaignStorage } from './storage'\nimport type {\n CampaignAggregates,\n CampaignArtifactWriter,\n CampaignCellResult,\n CampaignCostMeter,\n CampaignResult,\n CampaignTokenUsage,\n CampaignTraceWriter,\n DispatchContext,\n DispatchFn,\n JudgeAggregate,\n JudgeConfig,\n JudgeScore,\n LabeledScenarioStore,\n Scenario,\n ScenarioAggregate,\n TraceSpan,\n} from './types'\n\nexport interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {\n scenarios: TScenario[]\n dispatch: DispatchFn<TScenario, TArtifact>\n judges?: JudgeConfig<TArtifact, TScenario>[]\n /** Required for reproducibility. Default 42. */\n seed?: number\n /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for\n * bootstrap-tight intervals on critical eval. */\n reps?: number\n /** When true (default), completed cells are cached by\n * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */\n resumable?: boolean\n /** Optional store — when present, every artifact + judge score is captured\n * with the configured `captureSource`. Capture is default ON; pass `'off'`\n * to disable. */\n labeledStore?: LabeledScenarioStore | 'off'\n captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic'\n captureSourceVersionHash?: string\n /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */\n costCeiling?: number\n /** Max concurrent cells. Default 2. */\n maxConcurrency?: number\n /**\n * Per-cell dispatch deadline in ms. A `dispatch` that neither resolves nor\n * rejects within this window is a hang (a stalled model request, an\n * exhausted runtime resource, a backend that never closes its stream). When\n * set, the cell's `ctx.signal` is aborted and the cell is recorded as a LOUD\n * error (`dispatch exceeded <N>ms`) so the campaign proceeds and the failure\n * is visible — instead of one wedged cell silently hanging the whole run (and\n * every loop/CI job above it) forever. `undefined`/`0` = unbounded (legacy).\n */\n dispatchTimeoutMs?: number\n /** Required: where artifacts + traces land. */\n runDir: string\n /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted\n * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate\n * refuses this when the caller wires `autoOnPromote !== 'none'`. */\n tracing?: 'on' | 'off'\n /**\n * Per-cell usage expectation — the early, fine-grained sibling of the\n * batch `assertRealBackend` guard. A cell that produced an artifact (no\n * error) but reported `costUsd === 0` AND zero tokens is a stub: the\n * dispatch never reported LLM activity via `ctx.cost`. Modes:\n * - `'warn'` (default) — log the offending cell loudly, keep going.\n * - `'assert'` — throw `BackendIntegrityError` on the first such cell\n * (fail-fast; recommended for CI campaigns expecting real LLM calls).\n * - `'off'` — no check (replay / deterministic-only / offline analysis).\n */\n expectUsage?: 'assert' | 'warn' | 'off'\n /** Test seam — override the wall clock for deterministic tests. */\n now?: () => Date\n /** Test seam — override per-cell trace writer factory. */\n buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter\n /** Storage backend for run/cell dirs, the resumability cache, artifacts,\n * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).\n * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime\n * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still\n * produced; artifacts/traces just aren't persisted to disk. */\n storage?: CampaignStorage\n /**\n * Optional per-cell placement strategy. Returns an opaque string the\n * substrate forwards as `ctx.placement` to the Dispatch — placement-aware\n * Dispatches (e.g. `httpDispatch` from `/adapters/http`) use it to route\n * each cell to the right worker, region, or sandbox. When unset, every\n * cell receives `ctx.placement = undefined` and behaves identically to\n * the in-process case.\n *\n * @example\n * cellPlacement: ({ scenario }) => scenario.tags?.includes('eu') ? 'eu-west' : 'us-east'\n */\n cellPlacement?: (input: {\n scenario: TScenario\n rep: number\n generation?: number\n }) => string | undefined\n}\n\nexport async function runCampaign<TScenario extends Scenario, TArtifact>(\n opts: RunCampaignOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n const seed = opts.seed ?? 42\n const reps = opts.reps ?? 1\n const resumable = opts.resumable ?? true\n const maxConcurrency = opts.maxConcurrency ?? 2\n const now = opts.now ?? (() => new Date())\n const judges = opts.judges ?? []\n const storage = opts.storage ?? fsCampaignStorage()\n\n storage.ensureDir(opts.runDir)\n\n const manifestHash = computeManifestHash({\n scenarios: opts.scenarios,\n judges: judges as unknown as JudgeConfig<unknown>[],\n dispatchRef: opts.dispatch.name || 'anonymous',\n seed,\n reps,\n })\n\n const startedAt = now()\n const cells: CampaignCellResult<TArtifact>[] = []\n const artifactsByPath: Record<string, string> = {}\n\n // Build the cell schedule (scenario × rep).\n const schedule: Array<{ scenario: TScenario; rep: number; cellId: string; cellSeed: number }> = []\n let cellIndex = 0\n for (const scenario of opts.scenarios) {\n for (let rep = 0; rep < reps; rep++) {\n const cellId = `${scenario.id}:${rep}`\n const cellSeed = seed + cellIndex\n schedule.push({ scenario, rep, cellId, cellSeed })\n cellIndex += 1\n }\n }\n\n // Concurrency-limited execution.\n let totalCostUsd = 0\n let costCeilingReached = false\n const abortController = new AbortController()\n // Concurrency lanes that drain the cell schedule. Named \"lanes\" — not\n // \"workers\" — to avoid clashing with the taxonomy's worker (= the agent\n // harness in a sandbox, invoked behind `dispatch`). See loop-taxonomy.md.\n const lanes: Promise<void>[] = []\n let nextIdx = 0\n const cellsRef = cells\n\n for (let i = 0; i < maxConcurrency; i++) {\n lanes.push(\n (async () => {\n while (true) {\n const myIdx = nextIdx++\n if (myIdx >= schedule.length) return\n const slot = schedule[myIdx]!\n if (costCeilingReached) {\n cellsRef.push(skippedCell(slot, 'cost_ceiling_reached'))\n continue\n }\n const result = await executeCell({\n slot,\n opts,\n manifestHash,\n resumable,\n now,\n storage,\n buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),\n signal: abortController.signal,\n dispatchTimeoutMs: opts.dispatchTimeoutMs,\n })\n cellsRef.push(result.cell)\n enforceCellUsage(result.cell, opts.expectUsage ?? 'warn')\n totalCostUsd += result.cell.costUsd\n Object.assign(artifactsByPath, result.artifactsByPath)\n if (opts.costCeiling !== undefined && totalCostUsd >= opts.costCeiling) {\n costCeilingReached = true\n }\n // Capture into LabeledScenarioStore unless explicitly disabled.\n if (opts.labeledStore && opts.labeledStore !== 'off' && !result.cell.error) {\n await captureToStore({\n store: opts.labeledStore,\n cell: result.cell,\n scenario: slot.scenario,\n opts,\n now,\n }).catch((err) => {\n // Capture failures are non-fatal — log but don't crash the campaign.\n // (Trace would normally land here.)\n console.warn(\n `[runCampaign] capture failed for ${result.cell.cellId}: ${err instanceof Error ? err.message : String(err)}`,\n )\n })\n }\n }\n })(),\n )\n }\n await Promise.all(lanes)\n\n const endedAt = now()\n cellsRef.sort((a, b) => a.cellId.localeCompare(b.cellId))\n\n const aggregates = computeAggregates(\n cellsRef,\n judges as unknown as JudgeConfig<TArtifact>[],\n seed,\n )\n\n return {\n manifestHash,\n seed,\n startedAt: startedAt.toISOString(),\n endedAt: endedAt.toISOString(),\n durationMs: endedAt.getTime() - startedAt.getTime(),\n cells: cellsRef,\n aggregates,\n runDir: opts.runDir,\n artifactsByPath,\n scenarios: opts.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n }\n}\n\n// ── Internals ─────────────────────────────────────────────────────────\n\ninterface ExecuteCellArgs<TScenario extends Scenario, TArtifact> {\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number }\n opts: RunCampaignOptions<TScenario, TArtifact>\n manifestHash: string\n resumable: boolean\n now: () => Date\n storage: CampaignStorage\n buildTraceWriter: (cellId: string, dir: string) => CampaignTraceWriter\n signal: AbortSignal\n dispatchTimeoutMs?: number\n}\n\nasync function executeCell<TScenario extends Scenario, TArtifact>(\n args: ExecuteCellArgs<TScenario, TArtifact>,\n): Promise<{ cell: CampaignCellResult<TArtifact>; artifactsByPath: Record<string, string> }> {\n const storage = args.storage\n const cellDir = join(args.opts.runDir, args.slot.cellId.replace(/[^a-zA-Z0-9_-]/g, '_'))\n storage.ensureDir(cellDir)\n\n // Resumability: cache key = (manifestHash, scenarioId, rep)\n const cachePath = join(cellDir, 'cached-result.json')\n if (args.resumable) {\n const raw = storage.read(cachePath)\n if (raw !== undefined) {\n try {\n const cached = JSON.parse(raw) as CampaignCellResult<TArtifact>\n if (cached.cellId === args.slot.cellId) {\n return { cell: { ...cached, cached: true }, artifactsByPath: {} }\n }\n } catch {\n // Corrupt cache — fall through to re-run.\n }\n }\n }\n\n const startMs = Date.now()\n const trace = args.buildTraceWriter(args.slot.cellId, cellDir)\n const artifactsByPath: Record<string, string> = {}\n const artifacts: CampaignArtifactWriter = {\n async write(path, content) {\n const fullPath = join(cellDir, path)\n storage.ensureDir(join(fullPath, '..'))\n storage.write(fullPath, content)\n artifactsByPath[`${args.slot.cellId}/${path}`] = fullPath\n return fullPath\n },\n async writeJson(path, value) {\n return artifacts.write(path, JSON.stringify(value, null, 2))\n },\n }\n let costSoFar = 0\n const tokensSoFar: CampaignTokenUsage = { input: 0, output: 0 }\n const cost: CampaignCostMeter = {\n observe(amount, source) {\n costSoFar += amount\n trace.span(`cost.${source}`, { amountUsd: amount }).end()\n },\n observeTokens(usage) {\n tokensSoFar.input += usage.input\n tokensSoFar.output += usage.output\n if (usage.cached) tokensSoFar.cached = (tokensSoFar.cached ?? 0) + usage.cached\n },\n current() {\n return costSoFar\n },\n tokens() {\n return { ...tokensSoFar }\n },\n }\n\n const placement = args.opts.cellPlacement?.({\n scenario: args.slot.scenario,\n rep: args.slot.rep,\n })\n\n // Per-cell abort signal, chained to the campaign signal. The dispatch sees\n // THIS signal so a timeout (below) can abort just this cell's in-flight work\n // without tearing down sibling cells — and a signal-honoring dispatch\n // releases its open request instead of leaking it past the deadline.\n const cellAbort = new AbortController()\n const onCampaignAbort = () => cellAbort.abort((args.signal as { reason?: unknown }).reason)\n if (args.signal.aborted) cellAbort.abort((args.signal as { reason?: unknown }).reason)\n else args.signal.addEventListener('abort', onCampaignAbort, { once: true })\n\n const ctx: DispatchContext = {\n cellId: args.slot.cellId,\n rep: args.slot.rep,\n seed: args.slot.cellSeed,\n signal: cellAbort.signal,\n trace,\n artifacts,\n cost,\n placement,\n }\n\n let artifact: TArtifact | undefined\n let errorMessage: string | undefined\n const timeoutMs = args.dispatchTimeoutMs\n let timeoutTimer: ReturnType<typeof setTimeout> | undefined\n try {\n const dispatched = args.opts.dispatch(args.slot.scenario, ctx)\n if (timeoutMs !== undefined && timeoutMs > 0) {\n // A dispatch that never settles (stalled model request, exhausted runtime\n // resource, a stream that never closes) must NOT hang the cell — and with\n // it the lane, the campaign, the loop, the CI job — forever. Race it\n // against the deadline; on timeout, abort the cell and fail it LOUD.\n artifact = await Promise.race([\n dispatched,\n new Promise<never>((_, reject) => {\n timeoutTimer = setTimeout(() => {\n cellAbort.abort(new Error('dispatch timeout'))\n reject(\n new Error(\n `dispatch exceeded ${timeoutMs}ms for cell '${args.slot.cellId}' — aborted and failed loud (no silent hang)`,\n ),\n )\n }, timeoutMs)\n if (typeof (timeoutTimer as { unref?: () => void }).unref === 'function')\n (timeoutTimer as { unref: () => void }).unref()\n }),\n ])\n } else {\n artifact = await dispatched\n }\n } catch (err) {\n errorMessage = err instanceof Error ? err.message : String(err)\n } finally {\n if (timeoutTimer) clearTimeout(timeoutTimer)\n args.signal.removeEventListener('abort', onCampaignAbort)\n }\n\n // Run judges (only if we have an artifact). A judge that throws invalidates\n // the cell — recorded as `error`, NOT folded into a fake composite:0 (a fake\n // zero is indistinguishable from a real zero and poisons every aggregate).\n const judgeScores: Record<string, JudgeScore> = {}\n if (artifact !== undefined) {\n for (const judge of args.opts.judges ?? []) {\n if (judge.appliesTo && !judge.appliesTo(args.slot.scenario)) continue\n try {\n judgeScores[judge.name] = await runJudgeCell(judge, {\n artifact,\n scenario: args.slot.scenario,\n signal: args.signal,\n })\n } catch (err) {\n errorMessage = `judge '${judge.name}' failed: ${err instanceof Error ? err.message : String(err)}`\n break\n }\n }\n }\n\n await trace.flush()\n\n const cell: CampaignCellResult<TArtifact> = {\n cellId: args.slot.cellId,\n scenarioId: args.slot.scenario.id,\n rep: args.slot.rep,\n artifact: (artifact ?? null) as TArtifact,\n judgeScores,\n costUsd: costSoFar,\n tokenUsage: { ...tokensSoFar },\n durationMs: Date.now() - startMs,\n seed: args.slot.cellSeed,\n cached: false,\n error: errorMessage,\n }\n\n if (!errorMessage && args.resumable) {\n storage.write(cachePath, JSON.stringify(cell))\n }\n\n return { cell, artifactsByPath }\n}\n\n/**\n * Per-cell stub guard. A cell that produced an artifact (no error) but reported\n * `costUsd === 0` AND zero tokens means the dispatch never called `ctx.cost` —\n * i.e. it ran against a stub or silently dropped its usage. `'warn'` logs it,\n * `'assert'` throws (fail-fast), `'off'` skips. An errored/skipped cell or a\n * deterministic judge-only run that genuinely made no LLM call is not flagged.\n */\nfunction enforceCellUsage<TArtifact>(\n cell: CampaignCellResult<TArtifact>,\n mode: 'assert' | 'warn' | 'off',\n): void {\n if (mode === 'off' || cell.error) return\n if (cell.artifact === null || cell.artifact === undefined) return\n const zeroTokens = cell.tokenUsage.input === 0 && cell.tokenUsage.output === 0\n if (cell.costUsd !== 0 || !zeroTokens) return\n const msg = `cell '${cell.cellId}' produced an artifact but reported zero cost and zero tokens — the dispatch never reported LLM usage via ctx.cost.observe/observeTokens (a stub cell)`\n if (mode === 'assert') {\n const report: BackendIntegrityReport = {\n totalRecords: 1,\n stubRecords: 1,\n realRecords: 0,\n uncostedRecords: 0,\n totalInputTokens: 0,\n totalOutputTokens: 0,\n totalCostUsd: 0,\n verdict: 'stub',\n diagnosis: msg,\n }\n throw new BackendIntegrityError(`expectUsage: ${msg}`, report)\n }\n // eslint-disable-next-line no-console\n console.warn(`[runCampaign] expectUsage: ${msg}`)\n}\n\nasync function runJudgeCell<TArtifact, TScenario extends Scenario>(\n judge: JudgeConfig<TArtifact, TScenario>,\n input: { artifact: TArtifact; scenario: TScenario; signal: AbortSignal },\n): Promise<JudgeScore> {\n return judge.score(input)\n}\n\nfunction defaultBuildTraceWriter(\n storage: CampaignStorage,\n): (cellId: string, dir: string) => CampaignTraceWriter {\n return (cellId, dir) => {\n const spans: Array<Record<string, unknown>> = []\n return {\n span(name, attributes) {\n const startMs = Date.now()\n const record: Record<string, unknown> = { name, cellId, startMs, ...(attributes ?? {}) }\n const finish: TraceSpan = {\n end(endAttrs) {\n record.durationMs = Date.now() - startMs\n if (endAttrs) Object.assign(record, endAttrs)\n spans.push(record)\n },\n setAttribute(key, value) {\n record[key] = value\n },\n }\n return finish\n },\n async flush() {\n storage.write(join(dir, 'spans.jsonl'), spans.map((s) => JSON.stringify(s)).join('\\n'))\n },\n }\n }\n}\n\nfunction skippedCell<TScenario extends Scenario, TArtifact>(\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number },\n reason: string,\n): CampaignCellResult<TArtifact> {\n return {\n cellId: slot.cellId,\n scenarioId: slot.scenario.id,\n rep: slot.rep,\n artifact: null as unknown as TArtifact,\n judgeScores: {},\n costUsd: 0,\n tokenUsage: { input: 0, output: 0 },\n durationMs: 0,\n seed: slot.cellSeed,\n cached: false,\n error: `skipped: ${reason}`,\n }\n}\n\ninterface CaptureArgs<TScenario extends Scenario, TArtifact> {\n store: LabeledScenarioStore\n cell: CampaignCellResult<TArtifact>\n scenario: TScenario\n opts: RunCampaignOptions<TScenario, TArtifact>\n now: () => Date\n}\n\nasync function captureToStore<TScenario extends Scenario, TArtifact>(\n args: CaptureArgs<TScenario, TArtifact>,\n): Promise<void> {\n await args.store.observe({\n scenario: args.scenario,\n artifact: args.cell.artifact,\n judgeScores: args.cell.judgeScores,\n source: args.opts.captureSource ?? 'eval-run',\n sourceVersionHash: args.opts.captureSourceVersionHash ?? 'unknown',\n capturedAt: args.now().toISOString(),\n redactionStatus: 'raw',\n })\n}\n\n// ── Aggregates + manifest hash ────────────────────────────────────────\n\nfunction computeManifestHash(input: {\n scenarios: Scenario[]\n judges: JudgeConfig<unknown>[]\n dispatchRef: string\n seed: number\n reps: number\n}): string {\n const canonical = {\n scenarios: input.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n judges: input.judges.map((j) => ({ name: j.name, dims: j.dimensions.map((d) => d.key) })),\n dispatch: input.dispatchRef,\n seed: input.seed,\n reps: input.reps,\n }\n return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')\n}\n\nfunction computeAggregates<TArtifact>(\n cells: CampaignCellResult<TArtifact>[],\n judges: JudgeConfig<TArtifact>[],\n seed: number,\n): CampaignAggregates {\n const byJudge: Record<string, JudgeAggregate> = {}\n for (const judge of judges) {\n const scores: number[] = []\n for (const cell of cells) {\n const s = cell.judgeScores[judge.name]\n if (s !== undefined) scores.push(s.composite)\n }\n byJudge[judge.name] = aggregate(scores, seed)\n }\n const byScenario: Record<string, ScenarioAggregate> = {}\n const scenarioGroups = new Map<string, number[]>()\n for (const cell of cells) {\n const composites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (composites.length === 0) continue\n const mean = composites.reduce((a, b) => a + b, 0) / composites.length\n const arr = scenarioGroups.get(cell.scenarioId) ?? []\n arr.push(mean)\n scenarioGroups.set(cell.scenarioId, arr)\n }\n for (const [scenarioId, samples] of scenarioGroups) {\n const ag = aggregate(samples, seed)\n byScenario[scenarioId] = { meanComposite: ag.mean, ci95: ag.ci95, n: ag.n }\n }\n return {\n byJudge,\n byScenario,\n totalCostUsd: cells.reduce((a, c) => a + c.costUsd, 0),\n cellsExecuted: cells.filter((c) => !c.error).length,\n cellsSkipped: cells.filter((c) => c.error?.startsWith('skipped:')).length,\n cellsCached: cells.filter((c) => c.cached).length,\n cellsFailed: cells.filter((c) => c.error && !c.error.startsWith('skipped:')).length,\n }\n}\n\n// Percentile bootstrap CI95 via seeded resampling. Deterministic for a given\n// seed — same campaign re-run produces identical CI bands. Falls back to\n// degenerate intervals at n<=1 (the bootstrap is undefined there).\nfunction aggregate(samples: number[], seed: number): JudgeAggregate {\n const n = samples.length\n if (n === 0) return { mean: 0, stdev: 0, ci95: [0, 0], n: 0 }\n const mean = samples.reduce((a, b) => a + b, 0) / n\n const variance = samples.reduce((a, b) => a + (b - mean) ** 2, 0) / Math.max(1, n - 1)\n const stdev = Math.sqrt(variance)\n const ci = confidenceInterval(samples, 0.95, { seed, resamples: 1000 })\n return { mean, stdev, ci95: [ci.lower, ci.upper], n }\n}\n","/**\n * Backend-integrity guard: distinguish \"agent failed\" from \"eval ran against\n * a stub / unconfigured backend.\" Without this guard a canonical eval can\n * silently report `0/N passed` and look like an agent-quality problem when\n * the LLM was never actually called — the failure mode we just hit running\n * the 4-vertical parallel eval (legal-sandbox-stub returned hard-coded 33-104\n * char strings; gtm/creative defaulted to a cli-bridge that wasn't running).\n *\n * The shape:\n *\n * const report = summarizeBackendIntegrity(records)\n * assertRealBackend(records) // throws BackendIntegrityError if 100% stub\n *\n * A record is \"stub-mode\" if its `tokenUsage.input === 0 && tokenUsage.output === 0`.\n * (`costUsd` alone is unreliable — some backends successfully call LLMs but\n * don't propagate pricing, producing real tokens with $0 cost.)\n *\n * Verdicts:\n * - `real` — at least one record has nonzero token usage\n * - `stub` — every record is stub-mode (eval ran blind)\n * - `mixed` — some records real, some stub (partial backend failure;\n * often the 429-cascade or auth-half-failed case)\n */\n\nimport { AgentEvalError } from '../errors'\nimport type { RunRecord } from '../run-record'\n\nexport interface BackendIntegrityReport {\n /** Total records inspected. */\n totalRecords: number\n /** Records with input=0 AND output=0 (a stub fingerprint). */\n stubRecords: number\n /** Records with nonzero token usage (real LLM activity). */\n realRecords: number\n /** Records where output>0 but costUsd=0 (real LLM, broken cost ledger). */\n uncostedRecords: number\n /** Sum of input tokens across all records. */\n totalInputTokens: number\n /** Sum of output tokens across all records. */\n totalOutputTokens: number\n /** Sum of costUsd across all records. */\n totalCostUsd: number\n /** Worst-case integrity verdict. */\n verdict: 'real' | 'mixed' | 'stub'\n /** Human-readable diagnosis suitable for terminal output. */\n diagnosis: string\n}\n\n/**\n * Error thrown when an integrity assertion fails. Caller can pattern-match\n * by `code === 'AGENT_EVAL_BACKEND_STUB'` to differentiate from other\n * errors.\n */\nexport class BackendIntegrityError extends AgentEvalError {\n constructor(\n message: string,\n public readonly report: BackendIntegrityReport,\n ) {\n super('backend_integrity', message)\n }\n}\n\nfunction isStubRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.input === 0 && rec.tokenUsage.output === 0\n}\n\nfunction isUncostedRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.output > 0 && rec.costUsd === 0\n}\n\n/**\n * Inspect a batch of RunRecords and return an integrity report. Pure\n * function — no I/O, no logging. The caller decides what to do with the\n * verdict (print warning, throw, gate CI, etc.).\n */\nexport function summarizeBackendIntegrity(\n records: ReadonlyArray<RunRecord>,\n): BackendIntegrityReport {\n const totalRecords = records.length\n let stubRecords = 0\n let realRecords = 0\n let uncostedRecords = 0\n let totalInputTokens = 0\n let totalOutputTokens = 0\n let totalCostUsd = 0\n for (const rec of records) {\n totalInputTokens += rec.tokenUsage.input\n totalOutputTokens += rec.tokenUsage.output\n totalCostUsd += rec.costUsd\n if (isStubRecord(rec)) stubRecords++\n else realRecords++\n if (isUncostedRecord(rec)) uncostedRecords++\n }\n const verdict: BackendIntegrityReport['verdict'] =\n totalRecords === 0\n ? 'stub'\n : stubRecords === totalRecords\n ? 'stub'\n : stubRecords === 0\n ? 'real'\n : 'mixed'\n const diagnosis = buildDiagnosis({\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n })\n return {\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n diagnosis,\n }\n}\n\nfunction buildDiagnosis(r: Omit<BackendIntegrityReport, 'diagnosis'>): string {\n if (r.totalRecords === 0) {\n return 'no records — eval produced zero runs; backend likely failed before first turn'\n }\n if (r.verdict === 'stub') {\n return [\n `all ${r.totalRecords} records have zero token usage — the LLM backend was never called.`,\n 'common causes: --backend sandbox without a sandbox bridge running; stub model returning hard-coded strings;',\n 'auth misconfigured so requests were silently dropped before the LLM. Re-run with --backend tcloud and TANGLE_API_KEY set,',\n 'or boot the cli-bridge / sandbox before invoking the eval.',\n ].join(' ')\n }\n if (r.verdict === 'mixed') {\n const pct = ((r.stubRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.stubRecords}/${r.totalRecords} records (${pct}%) have zero token usage — the backend partially failed.`,\n 'common causes: rate-limit cascade (429s after the first N personas);',\n 'transient auth expiry mid-run; provider outage. Treat the affected records as missing data, not agent failures.',\n ].join(' ')\n }\n // verdict === 'real'\n if (r.uncostedRecords > 0) {\n const pct = ((r.uncostedRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,\n `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0 — cost ledger is mis-wired (no input-token`,\n 'propagation from the runtime stream into RunRecord).',\n ].join(' ')\n }\n return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`\n}\n\n/**\n * Throw BackendIntegrityError if the verdict is 'stub' — i.e. every record\n * shows zero LLM activity. Non-strict callers can pass `{ allowMixed: false }`\n * to also reject mixed verdicts (recommended for CI gates).\n *\n * Real backends pass through silently.\n */\nexport function assertRealBackend(\n records: ReadonlyArray<RunRecord>,\n opts: { allowMixed?: boolean } = {},\n): BackendIntegrityReport {\n const report = summarizeBackendIntegrity(records)\n const allowMixed = opts.allowMixed ?? true\n if (report.verdict === 'stub') {\n throw new BackendIntegrityError(\n `backend-integrity: ran against a stub or unconfigured backend — ${report.diagnosis}`,\n report,\n )\n }\n if (!allowMixed && report.verdict === 'mixed') {\n throw new BackendIntegrityError(\n `backend-integrity: partial backend failure rejected — ${report.diagnosis}`,\n report,\n )\n }\n return report\n}\n","import { createRequire } from 'node:module'\n\n/**\n * @experimental\n *\n * `CampaignStorage` — the filesystem seam `runCampaign` writes through\n * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).\n *\n * The default (`fsCampaignStorage`) is the Node filesystem — identical\n * behavior to the inline `node:fs` calls it replaces, so existing CLI\n * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a\n * `Map`, so the substrate runs in environments WITHOUT a filesystem\n * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign\n * still produces its `CampaignResult` (cells + aggregates) in memory;\n * artifacts/traces simply aren't persisted to disk.\n *\n * Paths are opaque keys to the in-memory adapter — it does not parse them,\n * so the same `join(...)`-built paths work unchanged across both adapters.\n */\nexport interface CampaignStorage {\n /** Ensure a directory exists (recursive). No-op for in-memory. */\n ensureDir(dir: string): void\n /** Does this path exist (as a written file or an ensured dir)? */\n exists(path: string): boolean\n /** Read a UTF-8 file; `undefined` when missing or unreadable. */\n read(path: string): string | undefined\n /** Write a file (string or bytes). Parent dir is assumed ensured. */\n write(path: string, content: string | Uint8Array): void\n}\n\n/** Node-filesystem storage — the default. Lazily requires `node:fs` so the\n * module imports cleanly in non-Node runtimes (where the caller passes\n * `inMemoryCampaignStorage` instead and never constructs this).\n *\n * `createRequire(import.meta.url)` is the ESM-native lazy require — a bare\n * `require` is a ReferenceError under `\"type\": \"module\"`, which is exactly\n * the shape this package publishes. */\nexport function fsCampaignStorage(): CampaignStorage {\n const nodeRequire = createRequire(import.meta.url)\n const { existsSync, mkdirSync, readFileSync, writeFileSync } = nodeRequire(\n 'node:fs',\n ) as typeof import('node:fs')\n return {\n ensureDir(dir) {\n if (!existsSync(dir)) mkdirSync(dir, { recursive: true })\n },\n exists(path) {\n return existsSync(path)\n },\n read(path) {\n try {\n return readFileSync(path, 'utf8')\n } catch {\n return undefined\n }\n },\n write(path, content) {\n writeFileSync(path, content as Uint8Array)\n },\n }\n}\n\n/** In-memory storage for filesystem-less runtimes. Artifacts + trace spans\n * live in a `Map` for the duration of the run; the `CampaignResult` is\n * fully populated, but nothing is persisted to disk. */\nexport function inMemoryCampaignStorage(): CampaignStorage {\n const files = new Map<string, string | Uint8Array>()\n const dirs = new Set<string>()\n return {\n ensureDir(dir) {\n dirs.add(dir)\n },\n exists(path) {\n return files.has(path) || dirs.has(path)\n },\n read(path) {\n const value = files.get(path)\n if (value === undefined) return undefined\n return typeof value === 'string' ? value : new TextDecoder().decode(value)\n },\n write(path, content) {\n files.set(path, content)\n },\n }\n}\n"],"mappings":";;;;;;;;AAaA,SAAS,kBAAkB;AAC3B,SAAS,YAAY;;;ACuCd,IAAM,wBAAN,cAAoC,eAAe;AAAA,EACxD,YACE,SACgB,QAChB;AACA,UAAM,qBAAqB,OAAO;AAFlB;AAAA,EAGlB;AAAA,EAHkB;AAIpB;AAEA,SAAS,aAAa,KAAyB;AAC7C,SAAO,IAAI,WAAW,UAAU,KAAK,IAAI,WAAW,WAAW;AACjE;AAEA,SAAS,iBAAiB,KAAyB;AACjD,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,YAAY;AACtD;AAOO,SAAS,0BACd,SACwB;AACxB,QAAM,eAAe,QAAQ;AAC7B,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,MAAI,mBAAmB;AACvB,MAAI,oBAAoB;AACxB,MAAI,eAAe;AACnB,aAAW,OAAO,SAAS;AACzB,wBAAoB,IAAI,WAAW;AACnC,yBAAqB,IAAI,WAAW;AACpC,oBAAgB,IAAI;AACpB,QAAI,aAAa,GAAG,EAAG;AAAA,QAClB;AACL,QAAI,iBAAiB,GAAG,EAAG;AAAA,EAC7B;AACA,QAAM,UACJ,iBAAiB,IACb,SACA,gBAAgB,eACd,SACA,gBAAgB,IACd,SACA;AACV,QAAM,YAAY,eAAe;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,eAAe,GAAsD;AAC5E,MAAI,EAAE,iBAAiB,GAAG;AACxB,WAAO;AAAA,EACT;AACA,MAAI,EAAE,YAAY,QAAQ;AACxB,WAAO;AAAA,MACL,OAAO,EAAE,YAAY;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,MAAI,EAAE,YAAY,SAAS;AACzB,UAAM,OAAQ,EAAE,cAAc,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAC9D,WAAO;AAAA,MACL,GAAG,EAAE,WAAW,IAAI,EAAE,YAAY,aAAa,GAAG;AAAA,MAClD;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AAEA,MAAI,EAAE,kBAAkB,GAAG;AACzB,UAAM,OAAQ,EAAE,kBAAkB,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAClE,WAAO;AAAA,MACL,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB;AAAA,MACtG,GAAG,EAAE,eAAe,KAAK,GAAG;AAAA,MAC5B;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,SAAO,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB,aAAa,EAAE,aAAa,QAAQ,CAAC,CAAC;AACrJ;AASO,SAAS,kBACd,SACA,OAAiC,CAAC,GACV;AACxB,QAAM,SAAS,0BAA0B,OAAO;AAChD,QAAM,aAAa,KAAK,cAAc;AACtC,MAAI,OAAO,YAAY,QAAQ;AAC7B,UAAM,IAAI;AAAA,MACR,wEAAmE,OAAO,SAAS;AAAA,MACnF;AAAA,IACF;AAAA,EACF;AACA,MAAI,CAAC,cAAc,OAAO,YAAY,SAAS;AAC7C,UAAM,IAAI;AAAA,MACR,8DAAyD,OAAO,SAAS;AAAA,MACzE;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;;;ACtLA,SAAS,qBAAqB;AAqCvB,SAAS,oBAAqC;AACnD,QAAM,cAAc,cAAc,YAAY,GAAG;AACjD,QAAM,EAAE,YAAY,WAAW,cAAc,cAAc,IAAI;AAAA,IAC7D;AAAA,EACF;AACA,SAAO;AAAA,IACL,UAAU,KAAK;AACb,UAAI,CAAC,WAAW,GAAG,EAAG,WAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,IAC1D;AAAA,IACA,OAAO,MAAM;AACX,aAAO,WAAW,IAAI;AAAA,IACxB;AAAA,IACA,KAAK,MAAM;AACT,UAAI;AACF,eAAO,aAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,oBAAc,MAAM,OAAqB;AAAA,IAC3C;AAAA,EACF;AACF;AAKO,SAAS,0BAA2C;AACzD,QAAM,QAAQ,oBAAI,IAAiC;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO;AAAA,IACL,UAAU,KAAK;AACb,WAAK,IAAI,GAAG;AAAA,IACd;AAAA,IACA,OAAO,MAAM;AACX,aAAO,MAAM,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI;AAAA,IACzC;AAAA,IACA,KAAK,MAAM;AACT,YAAM,QAAQ,MAAM,IAAI,IAAI;AAC5B,UAAI,UAAU,OAAW,QAAO;AAChC,aAAO,OAAO,UAAU,WAAW,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK;AAAA,IAC3E;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,YAAM,IAAI,MAAM,OAAO;AAAA,IACzB;AAAA,EACF;AACF;;;AF8BA,eAAsB,YACpB,MAC+C;AAC/C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,MAAM,KAAK,QAAQ,MAAM,oBAAI,KAAK;AACxC,QAAM,SAAS,KAAK,UAAU,CAAC;AAC/B,QAAM,UAAU,KAAK,WAAW,kBAAkB;AAElD,UAAQ,UAAU,KAAK,MAAM;AAE7B,QAAM,eAAe,oBAAoB;AAAA,IACvC,WAAW,KAAK;AAAA,IAChB;AAAA,IACA,aAAa,KAAK,SAAS,QAAQ;AAAA,IACnC;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI;AACtB,QAAM,QAAyC,CAAC;AAChD,QAAM,kBAA0C,CAAC;AAGjD,QAAM,WAA0F,CAAC;AACjG,MAAI,YAAY;AAChB,aAAW,YAAY,KAAK,WAAW;AACrC,aAAS,MAAM,GAAG,MAAM,MAAM,OAAO;AACnC,YAAM,SAAS,GAAG,SAAS,EAAE,IAAI,GAAG;AACpC,YAAM,WAAW,OAAO;AACxB,eAAS,KAAK,EAAE,UAAU,KAAK,QAAQ,SAAS,CAAC;AACjD,mBAAa;AAAA,IACf;AAAA,EACF;AAGA,MAAI,eAAe;AACnB,MAAI,qBAAqB;AACzB,QAAM,kBAAkB,IAAI,gBAAgB;AAI5C,QAAM,QAAyB,CAAC;AAChC,MAAI,UAAU;AACd,QAAM,WAAW;AAEjB,WAAS,IAAI,GAAG,IAAI,gBAAgB,KAAK;AACvC,UAAM;AAAA,OACH,YAAY;AACX,eAAO,MAAM;AACX,gBAAM,QAAQ;AACd,cAAI,SAAS,SAAS,OAAQ;AAC9B,gBAAM,OAAO,SAAS,KAAK;AAC3B,cAAI,oBAAoB;AACtB,qBAAS,KAAK,YAAY,MAAM,sBAAsB,CAAC;AACvD;AAAA,UACF;AACA,gBAAM,SAAS,MAAM,YAAY;AAAA,YAC/B;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,kBAAkB,KAAK,oBAAoB,wBAAwB,OAAO;AAAA,YAC1E,QAAQ,gBAAgB;AAAA,YACxB,mBAAmB,KAAK;AAAA,UAC1B,CAAC;AACD,mBAAS,KAAK,OAAO,IAAI;AACzB,2BAAiB,OAAO,MAAM,KAAK,eAAe,MAAM;AACxD,0BAAgB,OAAO,KAAK;AAC5B,iBAAO,OAAO,iBAAiB,OAAO,eAAe;AACrD,cAAI,KAAK,gBAAgB,UAAa,gBAAgB,KAAK,aAAa;AACtE,iCAAqB;AAAA,UACvB;AAEA,cAAI,KAAK,gBAAgB,KAAK,iBAAiB,SAAS,CAAC,OAAO,KAAK,OAAO;AAC1E,kBAAM,eAAe;AAAA,cACnB,OAAO,KAAK;AAAA,cACZ,MAAM,OAAO;AAAA,cACb,UAAU,KAAK;AAAA,cACf;AAAA,cACA;AAAA,YACF,CAAC,EAAE,MAAM,CAAC,QAAQ;AAGhB,sBAAQ;AAAA,gBACN,oCAAoC,OAAO,KAAK,MAAM,KAAK,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,cAC7G;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF,GAAG;AAAA,IACL;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,KAAK;AAEvB,QAAM,UAAU,IAAI;AACpB,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,OAAO,cAAc,EAAE,MAAM,CAAC;AAExD,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,WAAW,UAAU,YAAY;AAAA,IACjC,SAAS,QAAQ,YAAY;AAAA,IAC7B,YAAY,QAAQ,QAAQ,IAAI,UAAU,QAAQ;AAAA,IAClD,OAAO;AAAA,IACP;AAAA,IACA,QAAQ,KAAK;AAAA,IACb;AAAA,IACA,WAAW,KAAK,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,EACnE;AACF;AAgBA,eAAe,YACb,MAC2F;AAC3F,QAAM,UAAU,KAAK;AACrB,QAAM,UAAU,KAAK,KAAK,KAAK,QAAQ,KAAK,KAAK,OAAO,QAAQ,mBAAmB,GAAG,CAAC;AACvF,UAAQ,UAAU,OAAO;AAGzB,QAAM,YAAY,KAAK,SAAS,oBAAoB;AACpD,MAAI,KAAK,WAAW;AAClB,UAAM,MAAM,QAAQ,KAAK,SAAS;AAClC,QAAI,QAAQ,QAAW;AACrB,UAAI;AACF,cAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,YAAI,OAAO,WAAW,KAAK,KAAK,QAAQ;AACtC,iBAAO,EAAE,MAAM,EAAE,GAAG,QAAQ,QAAQ,KAAK,GAAG,iBAAiB,CAAC,EAAE;AAAA,QAClE;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,QAAQ,KAAK,iBAAiB,KAAK,KAAK,QAAQ,OAAO;AAC7D,QAAM,kBAA0C,CAAC;AACjD,QAAM,YAAoC;AAAA,IACxC,MAAM,MAAM,MAAM,SAAS;AACzB,YAAM,WAAW,KAAK,SAAS,IAAI;AACnC,cAAQ,UAAU,KAAK,UAAU,IAAI,CAAC;AACtC,cAAQ,MAAM,UAAU,OAAO;AAC/B,sBAAgB,GAAG,KAAK,KAAK,MAAM,IAAI,IAAI,EAAE,IAAI;AACjD,aAAO;AAAA,IACT;AAAA,IACA,MAAM,UAAU,MAAM,OAAO;AAC3B,aAAO,UAAU,MAAM,MAAM,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,YAAY;AAChB,QAAM,cAAkC,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC9D,QAAM,OAA0B;AAAA,IAC9B,QAAQ,QAAQ,QAAQ;AACtB,mBAAa;AACb,YAAM,KAAK,QAAQ,MAAM,IAAI,EAAE,WAAW,OAAO,CAAC,EAAE,IAAI;AAAA,IAC1D;AAAA,IACA,cAAc,OAAO;AACnB,kBAAY,SAAS,MAAM;AAC3B,kBAAY,UAAU,MAAM;AAC5B,UAAI,MAAM,OAAQ,aAAY,UAAU,YAAY,UAAU,KAAK,MAAM;AAAA,IAC3E;AAAA,IACA,UAAU;AACR,aAAO;AAAA,IACT;AAAA,IACA,SAAS;AACP,aAAO,EAAE,GAAG,YAAY;AAAA,IAC1B;AAAA,EACF;AAEA,QAAM,YAAY,KAAK,KAAK,gBAAgB;AAAA,IAC1C,UAAU,KAAK,KAAK;AAAA,IACpB,KAAK,KAAK,KAAK;AAAA,EACjB,CAAC;AAMD,QAAM,YAAY,IAAI,gBAAgB;AACtC,QAAM,kBAAkB,MAAM,UAAU,MAAO,KAAK,OAAgC,MAAM;AAC1F,MAAI,KAAK,OAAO,QAAS,WAAU,MAAO,KAAK,OAAgC,MAAM;AAAA,MAChF,MAAK,OAAO,iBAAiB,SAAS,iBAAiB,EAAE,MAAM,KAAK,CAAC;AAE1E,QAAM,MAAuB;AAAA,IAC3B,QAAQ,KAAK,KAAK;AAAA,IAClB,KAAK,KAAK,KAAK;AAAA,IACf,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ,UAAU;AAAA,IAClB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACJ,QAAM,YAAY,KAAK;AACvB,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,GAAG;AAC7D,QAAI,cAAc,UAAa,YAAY,GAAG;AAK5C,iBAAW,MAAM,QAAQ,KAAK;AAAA,QAC5B;AAAA,QACA,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,yBAAe,WAAW,MAAM;AAC9B,sBAAU,MAAM,IAAI,MAAM,kBAAkB,CAAC;AAC7C;AAAA,cACE,IAAI;AAAA,gBACF,qBAAqB,SAAS,gBAAgB,KAAK,KAAK,MAAM;AAAA,cAChE;AAAA,YACF;AAAA,UACF,GAAG,SAAS;AACZ,cAAI,OAAQ,aAAwC,UAAU;AAC5D,YAAC,aAAuC,MAAM;AAAA,QAClD,CAAC;AAAA,MACH,CAAC;AAAA,IACH,OAAO;AACL,iBAAW,MAAM;AAAA,IACnB;AAAA,EACF,SAAS,KAAK;AACZ,mBAAe,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,EAChE,UAAE;AACA,QAAI,aAAc,cAAa,YAAY;AAC3C,SAAK,OAAO,oBAAoB,SAAS,eAAe;AAAA,EAC1D;AAKA,QAAM,cAA0C,CAAC;AACjD,MAAI,aAAa,QAAW;AAC1B,eAAW,SAAS,KAAK,KAAK,UAAU,CAAC,GAAG;AAC1C,UAAI,MAAM,aAAa,CAAC,MAAM,UAAU,KAAK,KAAK,QAAQ,EAAG;AAC7D,UAAI;AACF,oBAAY,MAAM,IAAI,IAAI,MAAM,aAAa,OAAO;AAAA,UAClD;AAAA,UACA,UAAU,KAAK,KAAK;AAAA,UACpB,QAAQ,KAAK;AAAA,QACf,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,uBAAe,UAAU,MAAM,IAAI,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAChG;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAM,MAAM;AAElB,QAAM,OAAsC;AAAA,IAC1C,QAAQ,KAAK,KAAK;AAAA,IAClB,YAAY,KAAK,KAAK,SAAS;AAAA,IAC/B,KAAK,KAAK,KAAK;AAAA,IACf,UAAW,YAAY;AAAA,IACvB;AAAA,IACA,SAAS;AAAA,IACT,YAAY,EAAE,GAAG,YAAY;AAAA,IAC7B,YAAY,KAAK,IAAI,IAAI;AAAA,IACzB,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,OAAO;AAAA,EACT;AAEA,MAAI,CAAC,gBAAgB,KAAK,WAAW;AACnC,YAAQ,MAAM,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,gBAAgB;AACjC;AASA,SAAS,iBACP,MACA,MACM;AACN,MAAI,SAAS,SAAS,KAAK,MAAO;AAClC,MAAI,KAAK,aAAa,QAAQ,KAAK,aAAa,OAAW;AAC3D,QAAM,aAAa,KAAK,WAAW,UAAU,KAAK,KAAK,WAAW,WAAW;AAC7E,MAAI,KAAK,YAAY,KAAK,CAAC,WAAY;AACvC,QAAM,MAAM,SAAS,KAAK,MAAM;AAChC,MAAI,SAAS,UAAU;AACrB,UAAM,SAAiC;AAAA,MACrC,cAAc;AAAA,MACd,aAAa;AAAA,MACb,aAAa;AAAA,MACb,iBAAiB;AAAA,MACjB,kBAAkB;AAAA,MAClB,mBAAmB;AAAA,MACnB,cAAc;AAAA,MACd,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AACA,UAAM,IAAI,sBAAsB,gBAAgB,GAAG,IAAI,MAAM;AAAA,EAC/D;AAEA,UAAQ,KAAK,8BAA8B,GAAG,EAAE;AAClD;AAEA,eAAe,aACb,OACA,OACqB;AACrB,SAAO,MAAM,MAAM,KAAK;AAC1B;AAEA,SAAS,wBACP,SACsD;AACtD,SAAO,CAAC,QAAQ,QAAQ;AACtB,UAAM,QAAwC,CAAC;AAC/C,WAAO;AAAA,MACL,KAAK,MAAM,YAAY;AACrB,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAkC,EAAE,MAAM,QAAQ,SAAS,GAAI,cAAc,CAAC,EAAG;AACvF,cAAM,SAAoB;AAAA,UACxB,IAAI,UAAU;AACZ,mBAAO,aAAa,KAAK,IAAI,IAAI;AACjC,gBAAI,SAAU,QAAO,OAAO,QAAQ,QAAQ;AAC5C,kBAAM,KAAK,MAAM;AAAA,UACnB;AAAA,UACA,aAAa,KAAK,OAAO;AACvB,mBAAO,GAAG,IAAI;AAAA,UAChB;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,MACA,MAAM,QAAQ;AACZ,gBAAQ,MAAM,KAAK,KAAK,aAAa,GAAG,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YACP,MACA,QAC+B;AAC/B,SAAO;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,YAAY,KAAK,SAAS;AAAA,IAC1B,KAAK,KAAK;AAAA,IACV,UAAU;AAAA,IACV,aAAa,CAAC;AAAA,IACd,SAAS;AAAA,IACT,YAAY,EAAE,OAAO,GAAG,QAAQ,EAAE;AAAA,IAClC,YAAY;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,QAAQ;AAAA,IACR,OAAO,YAAY,MAAM;AAAA,EAC3B;AACF;AAUA,eAAe,eACb,MACe;AACf,QAAM,KAAK,MAAM,QAAQ;AAAA,IACvB,UAAU,KAAK;AAAA,IACf,UAAU,KAAK,KAAK;AAAA,IACpB,aAAa,KAAK,KAAK;AAAA,IACvB,QAAQ,KAAK,KAAK,iBAAiB;AAAA,IACnC,mBAAmB,KAAK,KAAK,4BAA4B;AAAA,IACzD,YAAY,KAAK,IAAI,EAAE,YAAY;AAAA,IACnC,iBAAiB;AAAA,EACnB,CAAC;AACH;AAIA,SAAS,oBAAoB,OAMlB;AACT,QAAM,YAAY;AAAA,IAChB,WAAW,MAAM,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,IAClE,QAAQ,MAAM,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,EAAE,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;AAAA,IACxF,UAAU,MAAM;AAAA,IAChB,MAAM,MAAM;AAAA,IACZ,MAAM,MAAM;AAAA,EACd;AACA,SAAO,WAAW,QAAQ,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC,EAAE,OAAO,KAAK;AAC5E;AAEA,SAAS,kBACP,OACA,QACA,MACoB;AACpB,QAAM,UAA0C,CAAC;AACjD,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAmB,CAAC;AAC1B,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,YAAY,MAAM,IAAI;AACrC,UAAI,MAAM,OAAW,QAAO,KAAK,EAAE,SAAS;AAAA,IAC9C;AACA,YAAQ,MAAM,IAAI,IAAI,UAAU,QAAQ,IAAI;AAAA,EAC9C;AACA,QAAM,aAAgD,CAAC;AACvD,QAAM,iBAAiB,oBAAI,IAAsB;AACjD,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzE,QAAI,WAAW,WAAW,EAAG;AAC7B,UAAM,OAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAChE,UAAM,MAAM,eAAe,IAAI,KAAK,UAAU,KAAK,CAAC;AACpD,QAAI,KAAK,IAAI;AACb,mBAAe,IAAI,KAAK,YAAY,GAAG;AAAA,EACzC;AACA,aAAW,CAAC,YAAY,OAAO,KAAK,gBAAgB;AAClD,UAAM,KAAK,UAAU,SAAS,IAAI;AAClC,eAAW,UAAU,IAAI,EAAE,eAAe,GAAG,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE;AAAA,EAC5E;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,SAAS,CAAC;AAAA,IACrD,eAAe,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE;AAAA,IAC7C,cAAc,MAAM,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,UAAU,CAAC,EAAE;AAAA,IACnE,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,IAC3C,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,WAAW,UAAU,CAAC,EAAE;AAAA,EAC/E;AACF;AAKA,SAAS,UAAU,SAAmB,MAA8B;AAClE,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE;AAC5D,QAAM,OAAO,QAAQ,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAClD,QAAM,WAAW,QAAQ,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;AACrF,QAAM,QAAQ,KAAK,KAAK,QAAQ;AAChC,QAAM,KAAK,mBAAmB,SAAS,MAAM,EAAE,MAAM,WAAW,IAAK,CAAC;AACtE,SAAO,EAAE,MAAM,OAAO,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,GAAG,EAAE;AACtD;","names":[]}

package/dist/{chunk-HKINEDRZ.js → chunk-DFS3FEXO.js} RENAMED Viewed

@@ -13,7 +13,8 @@ async function post(tenant, path, body, opts = {}) {
   const timeoutMs = tenant.timeoutMs ?? 3e4;
   const maxRetries = tenant.retries ?? 2;
   const f = tenant.fetchImpl ?? ((...args) => fetch(...args));
-  const url = `${tenant.endpoint.replace(/\/$/, "")}${path}`;
+  const base = tenant.endpoint.replace(/\/+$/, "").replace(/\/v1$/, "");
+  const url = `${base}${path}`;
   let lastError;
   for (let attempt = 0; attempt <= maxRetries; attempt++) {
     const ourTimeout = AbortSignal.timeout(timeoutMs);
@@ -90,4 +91,4 @@ export {
   createHostedClient,
   hostedClientFromEnv
 };
-//# sourceMappingURL=chunk-HKINEDRZ.js.map
+//# sourceMappingURL=chunk-DFS3FEXO.js.map

package/dist/chunk-DFS3FEXO.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/hosted/types.ts","../src/hosted/client.ts"],"sourcesContent":["/**\n * # Hosted-tier wire format — the schema that EVERY orchestrator (ours,\n * a partner's self-hosted one, a future open implementation) must accept.\n *\n * **Stability:** every type in this file is committed under semver. New\n * minors only ADD optional fields. Breaking changes mean a major bump\n * (`HostedWireVersion` literal increment).\n *\n * The wire format is two event streams in one transport:\n *\n * 1. **Eval-run events** (`POST /v1/ingest/eval-runs`). Posted when a\n * campaign / improvement-loop completes (or per-generation if\n * streaming). Carries the structured result + per-cell scores +\n * surface diffs the orchestrator stores for the dashboard.\n *\n * 2. **Trace spans** (`POST /v1/ingest/traces`). Standard OTLP-shaped\n * spans with a few additional attributes so the orchestrator can\n * pivot from eval-run → underlying execution. Compatible with any\n * OTel collector.\n *\n * Both endpoints are authenticated with a bearer token + a tenant id\n * header. Tenants isolate everything downstream of ingest; no tenant\n * ever sees another tenant's data.\n */\n\nimport type { GateDecision, MutableSurface } from '../campaign/types'\nimport type { InsightReport } from '../contract/insight-report'\n\n// re-export so wire-format consumers can import the optional payload type\n// from `@tangle-network/agent-eval/hosted` without reaching into /contract.\nexport type { InsightReport } from '../contract/insight-report'\n\nexport const HOSTED_WIRE_VERSION = '2026-05-26.v1' as const\nexport type HostedWireVersion = typeof HOSTED_WIRE_VERSION\n\n// ── Transport headers ───────────────────────────────────────────────\n\n/** Every ingest request carries these. */\nexport interface HostedIngestHeaders {\n /** Bearer token. The orchestrator validates against the tenant key. */\n authorization: `Bearer ${string}`\n /** Stable tenant id (the orchestrator-side primary key for the tenant). */\n 'x-tangle-tenant-id': string\n /** Wire-version pin so the server can reject incompatible payloads. */\n 'x-tangle-wire-version': HostedWireVersion\n /** Optional idempotency key for retry-safe ingest. */\n 'idempotency-key'?: string\n}\n\n// ── Eval-run event ──────────────────────────────────────────────────\n\n/** Lifecycle stages of an eval-run as the substrate reports them. */\nexport type EvalRunStatus =\n | 'started'\n | 'baseline-complete'\n | 'generation-complete'\n | 'gate-decided'\n | 'finished'\n | 'errored'\n\nexport interface EvalRunCellScore {\n /** Stable scenario id from the consumer's scenario set. */\n scenarioId: string\n /** Repetition index when reps > 1; 0 for the default. */\n rep: number\n /** Composite score across all judges + dimensions for this cell. */\n compositeMean: number\n /** Per-judge → per-dimension scores; null where the judge did not run. */\n dimensions: Record<string, Record<string, number>>\n /** Per-cell error message if the dispatch threw. Null on success. */\n errorMessage?: string\n}\n\nexport interface EvalRunGenerationSnapshot {\n /** Generation index. 0 is baseline. */\n index: number\n /** Candidate surface fingerprint (stable hash) — pivot key into the\n * trace stream to fetch the underlying execution. */\n surfaceHash: string\n /** The candidate surface itself. May be omitted to avoid PII when the\n * consumer prefers not to ship verbatim prompts. */\n surface?: MutableSurface\n /** Per-cell scores for this generation. */\n cells: EvalRunCellScore[]\n /** Aggregate composite mean across all cells in this generation. */\n compositeMean: number\n /** Total $ spent across this generation. */\n costUsd: number\n /** Wall-clock duration of this generation. */\n durationMs: number\n}\n\n/**\n * The top-level eval-run event. One ingest call per logical eval-run;\n * generations stream in incrementally via repeated calls with the same\n * `runId`. The orchestrator deduplicates by `(runId, generation.index)`.\n */\nexport interface EvalRunEvent {\n /** Stable run id (the substrate's `runId`). UUID or substrate-generated. */\n runId: string\n /** Where this run was happening — derived from `RunCampaignOptions.runDir`. */\n runDir: string\n /** ISO-8601 timestamp the substrate recorded the event. */\n timestamp: string\n /** Lifecycle stage this event represents. */\n status: EvalRunStatus\n /** Free-form consumer tags (env, branch, model id, etc.). Searchable. */\n labels: Record<string, string>\n /** Baseline campaign snapshot. Present when status >= baseline-complete. */\n baseline?: EvalRunGenerationSnapshot\n /** Per-generation snapshots. Streams in; orchestrator appends. */\n generations: EvalRunGenerationSnapshot[]\n /** Final gate decision. Present when status >= gate-decided. */\n gateDecision?: GateDecision\n /** Held-out lift = winner-on-holdout - baseline-on-holdout. */\n holdoutLift?: number\n /** Total $ spent across baseline + every generation. */\n totalCostUsd: number\n /** Total wall-clock duration. */\n totalDurationMs: number\n /** Error message if status === 'errored'. */\n errorMessage?: string\n /** Rigor packet emitted alongside the run — distributional summary,\n * paired-bootstrap lift CI, judge stats, inter-rater agreement,\n * contamination check, failure clusters (when an analyst is wired),\n * outcome correlation (when downstream signal is supplied), and the\n * recommendations the dashboard surfaces verbatim. Additive; older\n * clients that don't know about this field continue to work. */\n insightReport?: InsightReport\n}\n\n// ── Trace span event ────────────────────────────────────────────────\n\n/**\n * OTel-shape span with a few additional attributes for eval-run pivoting.\n * Compatible with any OTLP collector — `name`, `traceId`, `spanId`,\n * `startTimeUnixNano`, `endTimeUnixNano`, `attributes` are stock OTel.\n */\nexport interface TraceSpanEvent {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n startTimeUnixNano: number\n endTimeUnixNano: number\n attributes: Record<string, string | number | boolean>\n events?: Array<{\n timeUnixNano: number\n name: string\n attributes?: Record<string, string | number | boolean>\n }>\n status?: { code: 'OK' | 'ERROR' | 'UNSET'; message?: string }\n /** Pivot back into the eval-run stream. */\n 'tangle.runId'?: string\n /** Pivot to the specific generation. */\n 'tangle.generation'?: number\n /** Pivot to the specific cell. */\n 'tangle.cellId'?: string\n /** Pivot to the specific scenario. */\n 'tangle.scenarioId'?: string\n}\n\n// ── Ingest request bodies ───────────────────────────────────────────\n\nexport interface IngestEvalRunsRequest {\n wireVersion: HostedWireVersion\n events: EvalRunEvent[]\n}\n\nexport interface IngestTracesRequest {\n wireVersion: HostedWireVersion\n spans: TraceSpanEvent[]\n}\n\nexport interface IngestResponse {\n /** Accepted events / spans count. */\n accepted: number\n /** Rejected events with reasons (validation failures, dup idempotency key, etc.). */\n rejected: Array<{ index: number; reason: string }>\n}\n","/**\n * # Hosted-tier ingest client.\n *\n * Ships eval-run events + trace spans to any orchestrator (ours, a\n * partner's self-hosted one, or a future open implementation) that\n * speaks the wire format in `./types.ts`.\n *\n * Three modes:\n * - **Ours:** point at `https://orchestrator.tangle.tools` (the host root —\n * the client appends the versioned `/v1/ingest/...` path itself; a trailing\n * `/v1` on the endpoint is tolerated and normalized away). We handle ingest\n * + storage + dashboard.\n * - **Self-hosted:** point at whatever URL runs the reference receiver\n * from `examples/hosted-ingest-server/`.\n * - **Off (default):** when `hostedTenant` is unset, nothing is sent.\n * Everything stays local.\n */\n\nimport {\n type EvalRunEvent,\n HOSTED_WIRE_VERSION,\n type HostedWireVersion,\n type IngestEvalRunsRequest,\n type IngestResponse,\n type IngestTracesRequest,\n type TraceSpanEvent,\n} from './types'\n\nexport interface HostedTenant {\n /** Orchestrator endpoint base URL (no trailing slash). Required. */\n endpoint: string\n /** Bearer token issued by the orchestrator. Required. */\n apiKey: string\n /** Tenant id — the orchestrator's primary key for this consumer. Required. */\n tenantId: string\n /** Optional `fetch` override (auth wrappers, custom agent, test mocks). */\n fetchImpl?: typeof fetch\n /** Per-call timeout in ms. Default 30s. */\n timeoutMs?: number\n /** Retries on 5xx / network errors. Default 2. */\n retries?: number\n}\n\nexport interface HostedClient {\n ingestEvalRun(event: EvalRunEvent, idempotencyKey?: string): Promise<IngestResponse>\n ingestEvalRuns(events: EvalRunEvent[], idempotencyKey?: string): Promise<IngestResponse>\n ingestTraces(spans: TraceSpanEvent[], idempotencyKey?: string): Promise<IngestResponse>\n readonly tenant: HostedTenant\n readonly wireVersion: HostedWireVersion\n}\n\ninterface RequestOptions {\n idempotencyKey?: string\n signal?: AbortSignal\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => {\n const t = setTimeout(resolve, ms)\n if (typeof (t as { unref?: () => void }).unref === 'function')\n (t as { unref: () => void }).unref()\n })\n}\n\nasync function post<TReq, TRes>(\n tenant: HostedTenant,\n path: string,\n body: TReq,\n opts: RequestOptions = {},\n): Promise<TRes> {\n const timeoutMs = tenant.timeoutMs ?? 30_000\n const maxRetries = tenant.retries ?? 2\n const f: typeof fetch = tenant.fetchImpl ?? ((...args) => fetch(...args))\n // `path` already carries the `/v1` version prefix (e.g. `/v1/ingest/eval-runs`).\n // Strip a trailing slash AND a trailing `/v1` from the endpoint so a base of\n // either `https://host` or `https://host/v1` resolves to the same correct URL\n // — callers routinely pass the versioned base and would otherwise hit\n // `/v1/v1/ingest/...` (404).\n const base = tenant.endpoint.replace(/\\/+$/, '').replace(/\\/v1$/, '')\n const url = `${base}${path}`\n\n let lastError: unknown\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n const ourTimeout = AbortSignal.timeout(timeoutMs)\n const combinedSignal = opts.signal ? AbortSignal.any([opts.signal, ourTimeout]) : ourTimeout\n try {\n const headers: Record<string, string> = {\n 'content-type': 'application/json',\n authorization: `Bearer ${tenant.apiKey}`,\n 'x-tangle-tenant-id': tenant.tenantId,\n 'x-tangle-wire-version': HOSTED_WIRE_VERSION,\n }\n if (opts.idempotencyKey) headers['idempotency-key'] = opts.idempotencyKey\n\n const res = await f(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(body),\n signal: combinedSignal,\n })\n if (!res.ok) {\n const retryable = res.status >= 500 || res.status === 408 || res.status === 429\n if (!retryable || attempt === maxRetries) {\n const text = await res.text().catch(() => '')\n throw new Error(`hosted ingest ${url} failed (${res.status}): ${text.slice(0, 500)}`)\n }\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n continue\n }\n return (await res.json()) as TRes\n } catch (err) {\n if (opts.signal?.aborted) throw err\n lastError = err\n if (attempt === maxRetries) throw err\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n }\n }\n throw lastError ?? new Error('hosted ingest exhausted retries')\n}\n\nexport function createHostedClient(tenant: HostedTenant): HostedClient {\n return {\n tenant,\n wireVersion: HOSTED_WIRE_VERSION,\n\n async ingestEvalRun(event, idempotencyKey) {\n return this.ingestEvalRuns([event], idempotencyKey)\n },\n\n async ingestEvalRuns(events, idempotencyKey) {\n const body: IngestEvalRunsRequest = { wireVersion: HOSTED_WIRE_VERSION, events }\n return post<IngestEvalRunsRequest, IngestResponse>(tenant, '/v1/ingest/eval-runs', body, {\n idempotencyKey,\n })\n },\n\n async ingestTraces(spans, idempotencyKey) {\n const body: IngestTracesRequest = { wireVersion: HOSTED_WIRE_VERSION, spans }\n return post<IngestTracesRequest, IngestResponse>(tenant, '/v1/ingest/traces', body, {\n idempotencyKey,\n })\n },\n }\n}\n\n/**\n * Build a `HostedClient` from environment, or `undefined` when ingest is not\n * configured — the canonical, fail-soft wiring every product uses so eval-run +\n * trace provenance lands in the Intelligence dashboard with ONE call:\n *\n * const hosted = hostedClientFromEnv()\n * // ...run the loop...\n * await emitLoopProvenance({ ..., hostedClient: hosted }) // no-op if undefined\n *\n * Returns `undefined` (NOT an error) when any of endpoint / apiKey / tenantId is\n * missing — so a product wires the ship call unconditionally and it stays a\n * no-op until the env is set. Env precedence:\n * - endpoint: `TANGLE_INGEST_URL` → `TANGLE_ORCHESTRATOR_URL`\n * - apiKey: `TANGLE_INGEST_API_KEY` → `TANGLE_API_KEY`\n * - tenantId: `TANGLE_TENANT_ID`\n * A trailing slash on the endpoint is stripped. Pass `overrides` to supply any\n * field directly (e.g. a fixed `tenantId` per product) — overrides win over env.\n */\nexport function hostedClientFromEnv(\n overrides: Partial<HostedTenant> & { env?: Record<string, string | undefined> } = {},\n): HostedClient | undefined {\n const env = overrides.env ?? process.env\n const endpoint = (\n overrides.endpoint ??\n env.TANGLE_INGEST_URL ??\n env.TANGLE_ORCHESTRATOR_URL\n )?.trim()\n const apiKey = (overrides.apiKey ?? env.TANGLE_INGEST_API_KEY ?? env.TANGLE_API_KEY)?.trim()\n const tenantId = (overrides.tenantId ?? env.TANGLE_TENANT_ID)?.trim()\n if (!endpoint || !apiKey || !tenantId) return undefined\n const tenant: HostedTenant = { endpoint: endpoint.replace(/\\/+$/, ''), apiKey, tenantId }\n if (overrides.fetchImpl) tenant.fetchImpl = overrides.fetchImpl\n if (overrides.timeoutMs !== undefined) tenant.timeoutMs = overrides.timeoutMs\n if (overrides.retries !== undefined) tenant.retries = overrides.retries\n return createHostedClient(tenant)\n}\n"],"mappings":";AAgCO,IAAM,sBAAsB;;;ACwBnC,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,IAAI,WAAW,SAAS,EAAE;AAChC,QAAI,OAAQ,EAA6B,UAAU;AACjD,MAAC,EAA4B,MAAM;AAAA,EACvC,CAAC;AACH;AAEA,eAAe,KACb,QACA,MACA,MACA,OAAuB,CAAC,GACT;AACf,QAAM,YAAY,OAAO,aAAa;AACtC,QAAM,aAAa,OAAO,WAAW;AACrC,QAAM,IAAkB,OAAO,cAAc,IAAI,SAAS,MAAM,GAAG,IAAI;AAMvE,QAAM,OAAO,OAAO,SAAS,QAAQ,QAAQ,EAAE,EAAE,QAAQ,SAAS,EAAE;AACpE,QAAM,MAAM,GAAG,IAAI,GAAG,IAAI;AAE1B,MAAI;AACJ,WAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AACtD,UAAM,aAAa,YAAY,QAAQ,SAAS;AAChD,UAAM,iBAAiB,KAAK,SAAS,YAAY,IAAI,CAAC,KAAK,QAAQ,UAAU,CAAC,IAAI;AAClF,QAAI;AACF,YAAM,UAAkC;AAAA,QACtC,gBAAgB;AAAA,QAChB,eAAe,UAAU,OAAO,MAAM;AAAA,QACtC,sBAAsB,OAAO;AAAA,QAC7B,yBAAyB;AAAA,MAC3B;AACA,UAAI,KAAK,eAAgB,SAAQ,iBAAiB,IAAI,KAAK;AAE3D,YAAM,MAAM,MAAM,EAAE,KAAK;AAAA,QACvB,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,IAAI;AAAA,QACzB,QAAQ;AAAA,MACV,CAAC;AACD,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,YAAY,IAAI,UAAU,OAAO,IAAI,WAAW,OAAO,IAAI,WAAW;AAC5E,YAAI,CAAC,aAAa,YAAY,YAAY;AACxC,gBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,gBAAM,IAAI,MAAM,iBAAiB,GAAG,YAAY,IAAI,MAAM,MAAM,KAAK,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,QACtF;AACA,cAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AACpD;AAAA,MACF;AACA,aAAQ,MAAM,IAAI,KAAK;AAAA,IACzB,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ,QAAS,OAAM;AAChC,kBAAY;AACZ,UAAI,YAAY,WAAY,OAAM;AAClC,YAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,IACtD;AAAA,EACF;AACA,QAAM,aAAa,IAAI,MAAM,iCAAiC;AAChE;AAEO,SAAS,mBAAmB,QAAoC;AACrE,SAAO;AAAA,IACL;AAAA,IACA,aAAa;AAAA,IAEb,MAAM,cAAc,OAAO,gBAAgB;AACzC,aAAO,KAAK,eAAe,CAAC,KAAK,GAAG,cAAc;AAAA,IACpD;AAAA,IAEA,MAAM,eAAe,QAAQ,gBAAgB;AAC3C,YAAM,OAA8B,EAAE,aAAa,qBAAqB,OAAO;AAC/E,aAAO,KAA4C,QAAQ,wBAAwB,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,aAAa,OAAO,gBAAgB;AACxC,YAAM,OAA4B,EAAE,aAAa,qBAAqB,MAAM;AAC5E,aAAO,KAA0C,QAAQ,qBAAqB,MAAM;AAAA,QAClF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAoBO,SAAS,oBACd,YAAkF,CAAC,GACzD;AAC1B,QAAM,MAAM,UAAU,OAAO,QAAQ;AACrC,QAAM,YACJ,UAAU,YACV,IAAI,qBACJ,IAAI,0BACH,KAAK;AACR,QAAM,UAAU,UAAU,UAAU,IAAI,yBAAyB,IAAI,iBAAiB,KAAK;AAC3F,QAAM,YAAY,UAAU,YAAY,IAAI,mBAAmB,KAAK;AACpE,MAAI,CAAC,YAAY,CAAC,UAAU,CAAC,SAAU,QAAO;AAC9C,QAAM,SAAuB,EAAE,UAAU,SAAS,QAAQ,QAAQ,EAAE,GAAG,QAAQ,SAAS;AACxF,MAAI,UAAU,UAAW,QAAO,YAAY,UAAU;AACtD,MAAI,UAAU,cAAc,OAAW,QAAO,YAAY,UAAU;AACpE,MAAI,UAAU,YAAY,OAAW,QAAO,UAAU,UAAU;AAChE,SAAO,mBAAmB,MAAM;AAClC;","names":[]}