@tangle-network/agent-eval 0.63.0 → 0.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,14 @@ All notable changes to `@tangle-network/agent-eval` and its sibling `agent-eval-
4
4
 
5
5
  ---
6
6
 
7
+ ## [0.64.0] — 2026-05-30 — `hostedClientFromEnv()` — one-call ingest wiring for the fleet
8
+
9
+ ### Added
10
+
11
+ - **`hostedClientFromEnv(overrides?)`** (`/hosted`) — the canonical, fail-soft way to wire a product's eval-run + trace provenance to the Intelligence dashboard. Reads `TANGLE_INGEST_URL` → `TANGLE_ORCHESTRATOR_URL` (endpoint), `TANGLE_INGEST_API_KEY` → `TANGLE_API_KEY` (key), `TANGLE_TENANT_ID` (tenant); returns a `HostedClient` or **`undefined`** when any is missing — so a product wires the ship call unconditionally (`emitLoopProvenance({ hostedClient })` / `selfImprove({ hostedTenant })`) and it stays a no-op until the env is set. Strips a trailing slash; `overrides` (e.g. a fixed per-product `tenantId` label) win over env. Replaces the per-product `resolveHostedClient()` copies with one substrate helper.
12
+
13
+ ---
14
+
7
15
  ## [0.63.0] — 2026-05-30 — the full optimizer drivers: GEPA Pareto + SkillOpt + a head-to-head lift benchmark
8
16
 
9
17
  Closes the optimizer-completeness gap (#101/#100). `gepaDriver` was reflection-only; the SOTA SkillOpt technique was roadmapped but unbuilt; and there was no head-to-head benchmark, so optimizer quality was measurement-invisible — a simplified driver could ship unnoticed. This release ships both drivers in full and the forcing function that keeps them honest.
@@ -1,4 +1,4 @@
1
- import { T as TraceSpanEvent, H as HostedClient } from '../index-GISRh500.js';
1
+ import { T as TraceSpanEvent, H as HostedClient } from '../index-CzhtwYBT.js';
2
2
  import '../types-c2R2kfmv.js';
3
3
  import '../run-record-BgTFzO2r.js';
4
4
  import '../errors-Dwqw-T_m.js';
@@ -1,5 +1,5 @@
1
- import { a as RunCampaignOptions, C as CampaignStorage } from '../provenance-cUnovpWV.js';
2
- export { B as BuildLoopProvenanceArgs, D as DefaultProductionGateOptions, m as EmitLoopProvenanceArgs, n as EmitLoopProvenanceResult, E as EvolutionaryDriverOptions, o as GepaDriverConstraints, G as GepaDriverOptions, H as HeldOutGateOptions, p as LoopProvenanceBackend, q as LoopProvenanceCandidate, L as LoopProvenanceRecord, O as OpenAutoPrOptions, s as OpenAutoPrResult, b as RunEvalOptions, c as RunImprovementLoopOptions, R as RunImprovementLoopResult, t as RunOptimizationOptions, u as RunOptimizationResult, v as buildLoopProvenanceRecord, d as composeGate, w as countSentenceEdits, e as defaultProductionGate, x as defaultRenderDiff, y as emitLoopProvenance, f as evolutionaryDriver, z as extractH2Sections, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, A as loopProvenanceSpans, F as openAutoPr, I as provenanceRecordPath, J as provenanceSpansPath, r as runCampaign, k as runEval, l as runImprovementLoop, K as runOptimization, M as surfaceContentHash, N as surfaceHash } from '../provenance-cUnovpWV.js';
1
+ import { a as RunCampaignOptions, C as CampaignStorage } from '../provenance-lqyLpOYR.js';
2
+ export { B as BuildLoopProvenanceArgs, D as DefaultProductionGateOptions, m as EmitLoopProvenanceArgs, n as EmitLoopProvenanceResult, E as EvolutionaryDriverOptions, o as GepaDriverConstraints, G as GepaDriverOptions, H as HeldOutGateOptions, p as LoopProvenanceBackend, q as LoopProvenanceCandidate, L as LoopProvenanceRecord, O as OpenAutoPrOptions, s as OpenAutoPrResult, b as RunEvalOptions, c as RunImprovementLoopOptions, R as RunImprovementLoopResult, t as RunOptimizationOptions, u as RunOptimizationResult, v as buildLoopProvenanceRecord, d as composeGate, w as countSentenceEdits, e as defaultProductionGate, x as defaultRenderDiff, y as emitLoopProvenance, f as evolutionaryDriver, z as extractH2Sections, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, A as loopProvenanceSpans, F as openAutoPr, I as provenanceRecordPath, J as provenanceSpansPath, r as runCampaign, k as runEval, l as runImprovementLoop, K as runOptimization, M as surfaceContentHash, N as surfaceHash } from '../provenance-lqyLpOYR.js';
3
3
  import { L as LlmClientOptions } from '../llm-client-DbjLfz-K.js';
4
4
  import { I as ImprovementDriver, L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, t as LabelTrust, S as Scenario, M as MutableSurface, b as DispatchContext, a as JudgeConfig, u as LabeledScenarioSource, f as CampaignResult, h as CodeSurface } from '../types-c2R2kfmv.js';
5
5
  export { C as CampaignAggregates, c as CampaignArtifactWriter, d as CampaignCellResult, e as CampaignCostMeter, v as CampaignTokenUsage, g as CampaignTraceWriter, D as DispatchFn, G as Gate, i as GateContext, j as GateDecision, k as GateResult, l as GenerationCandidate, m as GenerationRecord, w as JudgeAggregate, n as JudgeDimension, J as JudgeScore, o as Mutator, O as OptimizerConfig, P as ParetoParent, x as ProposeContext, y as ProposedCandidate, R as RedactionStatus, z as ScenarioAggregate, p as SessionScript, T as TraceSpan, A as isProposedCandidate, B as labelTrustRank } from '../types-c2R2kfmv.js';
@@ -10,7 +10,7 @@ import '../red-team-DW9Ca_tj.js';
10
10
  import '../dataset-B2kL-fSM.js';
11
11
  import '../store-CKUAgsJz.js';
12
12
  import '../schema-m0gsnbt3.js';
13
- import '../index-GISRh500.js';
13
+ import '../index-CzhtwYBT.js';
14
14
  import '../summary-report-ByiOUrHj.js';
15
15
  import '../failure-cluster-CL7IVgkJ.js';
16
16
  import '../judge-calibration-DilmB3Ml.js';
@@ -72,9 +72,22 @@ function createHostedClient(tenant) {
72
72
  }
73
73
  };
74
74
  }
75
+ function hostedClientFromEnv(overrides = {}) {
76
+ const env = overrides.env ?? process.env;
77
+ const endpoint = (overrides.endpoint ?? env.TANGLE_INGEST_URL ?? env.TANGLE_ORCHESTRATOR_URL)?.trim();
78
+ const apiKey = (overrides.apiKey ?? env.TANGLE_INGEST_API_KEY ?? env.TANGLE_API_KEY)?.trim();
79
+ const tenantId = (overrides.tenantId ?? env.TANGLE_TENANT_ID)?.trim();
80
+ if (!endpoint || !apiKey || !tenantId) return void 0;
81
+ const tenant = { endpoint: endpoint.replace(/\/+$/, ""), apiKey, tenantId };
82
+ if (overrides.fetchImpl) tenant.fetchImpl = overrides.fetchImpl;
83
+ if (overrides.timeoutMs !== void 0) tenant.timeoutMs = overrides.timeoutMs;
84
+ if (overrides.retries !== void 0) tenant.retries = overrides.retries;
85
+ return createHostedClient(tenant);
86
+ }
75
87
 
76
88
  export {
77
89
  HOSTED_WIRE_VERSION,
78
- createHostedClient
90
+ createHostedClient,
91
+ hostedClientFromEnv
79
92
  };
80
- //# sourceMappingURL=chunk-FQK2CCIM.js.map
93
+ //# sourceMappingURL=chunk-HKINEDRZ.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/hosted/types.ts","../src/hosted/client.ts"],"sourcesContent":["/**\n * # Hosted-tier wire format — the schema that EVERY orchestrator (ours,\n * a partner's self-hosted one, a future open implementation) must accept.\n *\n * **Stability:** every type in this file is committed under semver. New\n * minors only ADD optional fields. Breaking changes mean a major bump\n * (`HostedWireVersion` literal increment).\n *\n * The wire format is two event streams in one transport:\n *\n * 1. **Eval-run events** (`POST /v1/ingest/eval-runs`). Posted when a\n * campaign / improvement-loop completes (or per-generation if\n * streaming). Carries the structured result + per-cell scores +\n * surface diffs the orchestrator stores for the dashboard.\n *\n * 2. **Trace spans** (`POST /v1/ingest/traces`). Standard OTLP-shaped\n * spans with a few additional attributes so the orchestrator can\n * pivot from eval-run → underlying execution. Compatible with any\n * OTel collector.\n *\n * Both endpoints are authenticated with a bearer token + a tenant id\n * header. Tenants isolate everything downstream of ingest; no tenant\n * ever sees another tenant's data.\n */\n\nimport type { GateDecision, MutableSurface } from '../campaign/types'\nimport type { InsightReport } from '../contract/insight-report'\n\n// re-export so wire-format consumers can import the optional payload type\n// from `@tangle-network/agent-eval/hosted` without reaching into /contract.\nexport type { InsightReport } from '../contract/insight-report'\n\nexport const HOSTED_WIRE_VERSION = '2026-05-26.v1' as const\nexport type HostedWireVersion = typeof HOSTED_WIRE_VERSION\n\n// ── Transport headers ───────────────────────────────────────────────\n\n/** Every ingest request carries these. */\nexport interface HostedIngestHeaders {\n /** Bearer token. The orchestrator validates against the tenant key. */\n authorization: `Bearer ${string}`\n /** Stable tenant id (the orchestrator-side primary key for the tenant). */\n 'x-tangle-tenant-id': string\n /** Wire-version pin so the server can reject incompatible payloads. */\n 'x-tangle-wire-version': HostedWireVersion\n /** Optional idempotency key for retry-safe ingest. */\n 'idempotency-key'?: string\n}\n\n// ── Eval-run event ──────────────────────────────────────────────────\n\n/** Lifecycle stages of an eval-run as the substrate reports them. */\nexport type EvalRunStatus =\n | 'started'\n | 'baseline-complete'\n | 'generation-complete'\n | 'gate-decided'\n | 'finished'\n | 'errored'\n\nexport interface EvalRunCellScore {\n /** Stable scenario id from the consumer's scenario set. */\n scenarioId: string\n /** Repetition index when reps > 1; 0 for the default. */\n rep: number\n /** Composite score across all judges + dimensions for this cell. */\n compositeMean: number\n /** Per-judge → per-dimension scores; null where the judge did not run. */\n dimensions: Record<string, Record<string, number>>\n /** Per-cell error message if the dispatch threw. Null on success. */\n errorMessage?: string\n}\n\nexport interface EvalRunGenerationSnapshot {\n /** Generation index. 0 is baseline. */\n index: number\n /** Candidate surface fingerprint (stable hash) — pivot key into the\n * trace stream to fetch the underlying execution. */\n surfaceHash: string\n /** The candidate surface itself. May be omitted to avoid PII when the\n * consumer prefers not to ship verbatim prompts. */\n surface?: MutableSurface\n /** Per-cell scores for this generation. */\n cells: EvalRunCellScore[]\n /** Aggregate composite mean across all cells in this generation. */\n compositeMean: number\n /** Total $ spent across this generation. */\n costUsd: number\n /** Wall-clock duration of this generation. */\n durationMs: number\n}\n\n/**\n * The top-level eval-run event. One ingest call per logical eval-run;\n * generations stream in incrementally via repeated calls with the same\n * `runId`. The orchestrator deduplicates by `(runId, generation.index)`.\n */\nexport interface EvalRunEvent {\n /** Stable run id (the substrate's `runId`). UUID or substrate-generated. */\n runId: string\n /** Where this run was happening — derived from `RunCampaignOptions.runDir`. */\n runDir: string\n /** ISO-8601 timestamp the substrate recorded the event. */\n timestamp: string\n /** Lifecycle stage this event represents. */\n status: EvalRunStatus\n /** Free-form consumer tags (env, branch, model id, etc.). Searchable. */\n labels: Record<string, string>\n /** Baseline campaign snapshot. Present when status >= baseline-complete. */\n baseline?: EvalRunGenerationSnapshot\n /** Per-generation snapshots. Streams in; orchestrator appends. */\n generations: EvalRunGenerationSnapshot[]\n /** Final gate decision. Present when status >= gate-decided. */\n gateDecision?: GateDecision\n /** Held-out lift = winner-on-holdout - baseline-on-holdout. */\n holdoutLift?: number\n /** Total $ spent across baseline + every generation. */\n totalCostUsd: number\n /** Total wall-clock duration. */\n totalDurationMs: number\n /** Error message if status === 'errored'. */\n errorMessage?: string\n /** Rigor packet emitted alongside the run — distributional summary,\n * paired-bootstrap lift CI, judge stats, inter-rater agreement,\n * contamination check, failure clusters (when an analyst is wired),\n * outcome correlation (when downstream signal is supplied), and the\n * recommendations the dashboard surfaces verbatim. Additive; older\n * clients that don't know about this field continue to work. */\n insightReport?: InsightReport\n}\n\n// ── Trace span event ────────────────────────────────────────────────\n\n/**\n * OTel-shape span with a few additional attributes for eval-run pivoting.\n * Compatible with any OTLP collector — `name`, `traceId`, `spanId`,\n * `startTimeUnixNano`, `endTimeUnixNano`, `attributes` are stock OTel.\n */\nexport interface TraceSpanEvent {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n startTimeUnixNano: number\n endTimeUnixNano: number\n attributes: Record<string, string | number | boolean>\n events?: Array<{\n timeUnixNano: number\n name: string\n attributes?: Record<string, string | number | boolean>\n }>\n status?: { code: 'OK' | 'ERROR' | 'UNSET'; message?: string }\n /** Pivot back into the eval-run stream. */\n 'tangle.runId'?: string\n /** Pivot to the specific generation. */\n 'tangle.generation'?: number\n /** Pivot to the specific cell. */\n 'tangle.cellId'?: string\n /** Pivot to the specific scenario. */\n 'tangle.scenarioId'?: string\n}\n\n// ── Ingest request bodies ───────────────────────────────────────────\n\nexport interface IngestEvalRunsRequest {\n wireVersion: HostedWireVersion\n events: EvalRunEvent[]\n}\n\nexport interface IngestTracesRequest {\n wireVersion: HostedWireVersion\n spans: TraceSpanEvent[]\n}\n\nexport interface IngestResponse {\n /** Accepted events / spans count. */\n accepted: number\n /** Rejected events with reasons (validation failures, dup idempotency key, etc.). */\n rejected: Array<{ index: number; reason: string }>\n}\n","/**\n * # Hosted-tier ingest client.\n *\n * Ships eval-run events + trace spans to any orchestrator (ours, a\n * partner's self-hosted one, or a future open implementation) that\n * speaks the wire format in `./types.ts`.\n *\n * Three modes:\n * - **Ours:** point at `https://orchestrator.tangle.tools/v1`. We\n * handle ingest + storage + dashboard.\n * - **Self-hosted:** point at whatever URL runs the reference receiver\n * from `examples/hosted-ingest-server/`.\n * - **Off (default):** when `hostedTenant` is unset, nothing is sent.\n * Everything stays local.\n */\n\nimport {\n type EvalRunEvent,\n HOSTED_WIRE_VERSION,\n type HostedWireVersion,\n type IngestEvalRunsRequest,\n type IngestResponse,\n type IngestTracesRequest,\n type TraceSpanEvent,\n} from './types'\n\nexport interface HostedTenant {\n /** Orchestrator endpoint base URL (no trailing slash). Required. */\n endpoint: string\n /** Bearer token issued by the orchestrator. Required. */\n apiKey: string\n /** Tenant id — the orchestrator's primary key for this consumer. Required. */\n tenantId: string\n /** Optional `fetch` override (auth wrappers, custom agent, test mocks). */\n fetchImpl?: typeof fetch\n /** Per-call timeout in ms. Default 30s. */\n timeoutMs?: number\n /** Retries on 5xx / network errors. Default 2. */\n retries?: number\n}\n\nexport interface HostedClient {\n ingestEvalRun(event: EvalRunEvent, idempotencyKey?: string): Promise<IngestResponse>\n ingestEvalRuns(events: EvalRunEvent[], idempotencyKey?: string): Promise<IngestResponse>\n ingestTraces(spans: TraceSpanEvent[], idempotencyKey?: string): Promise<IngestResponse>\n readonly tenant: HostedTenant\n readonly wireVersion: HostedWireVersion\n}\n\ninterface RequestOptions {\n idempotencyKey?: string\n signal?: AbortSignal\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => {\n const t = setTimeout(resolve, ms)\n if (typeof (t as { unref?: () => void }).unref === 'function')\n (t as { unref: () => void }).unref()\n })\n}\n\nasync function post<TReq, TRes>(\n tenant: HostedTenant,\n path: string,\n body: TReq,\n opts: RequestOptions = {},\n): Promise<TRes> {\n const timeoutMs = tenant.timeoutMs ?? 30_000\n const maxRetries = tenant.retries ?? 2\n const f: typeof fetch = tenant.fetchImpl ?? ((...args) => fetch(...args))\n const url = `${tenant.endpoint.replace(/\\/$/, '')}${path}`\n\n let lastError: unknown\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n const ourTimeout = AbortSignal.timeout(timeoutMs)\n const combinedSignal = opts.signal ? AbortSignal.any([opts.signal, ourTimeout]) : ourTimeout\n try {\n const headers: Record<string, string> = {\n 'content-type': 'application/json',\n authorization: `Bearer ${tenant.apiKey}`,\n 'x-tangle-tenant-id': tenant.tenantId,\n 'x-tangle-wire-version': HOSTED_WIRE_VERSION,\n }\n if (opts.idempotencyKey) headers['idempotency-key'] = opts.idempotencyKey\n\n const res = await f(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(body),\n signal: combinedSignal,\n })\n if (!res.ok) {\n const retryable = res.status >= 500 || res.status === 408 || res.status === 429\n if (!retryable || attempt === maxRetries) {\n const text = await res.text().catch(() => '')\n throw new Error(`hosted ingest ${url} failed (${res.status}): ${text.slice(0, 500)}`)\n }\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n continue\n }\n return (await res.json()) as TRes\n } catch (err) {\n if (opts.signal?.aborted) throw err\n lastError = err\n if (attempt === maxRetries) throw err\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n }\n }\n throw lastError ?? new Error('hosted ingest exhausted retries')\n}\n\nexport function createHostedClient(tenant: HostedTenant): HostedClient {\n return {\n tenant,\n wireVersion: HOSTED_WIRE_VERSION,\n\n async ingestEvalRun(event, idempotencyKey) {\n return this.ingestEvalRuns([event], idempotencyKey)\n },\n\n async ingestEvalRuns(events, idempotencyKey) {\n const body: IngestEvalRunsRequest = { wireVersion: HOSTED_WIRE_VERSION, events }\n return post<IngestEvalRunsRequest, IngestResponse>(tenant, '/v1/ingest/eval-runs', body, {\n idempotencyKey,\n })\n },\n\n async ingestTraces(spans, idempotencyKey) {\n const body: IngestTracesRequest = { wireVersion: HOSTED_WIRE_VERSION, spans }\n return post<IngestTracesRequest, IngestResponse>(tenant, '/v1/ingest/traces', body, {\n idempotencyKey,\n })\n },\n }\n}\n"],"mappings":";AAgCO,IAAM,sBAAsB;;;ACsBnC,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,IAAI,WAAW,SAAS,EAAE;AAChC,QAAI,OAAQ,EAA6B,UAAU;AACjD,MAAC,EAA4B,MAAM;AAAA,EACvC,CAAC;AACH;AAEA,eAAe,KACb,QACA,MACA,MACA,OAAuB,CAAC,GACT;AACf,QAAM,YAAY,OAAO,aAAa;AACtC,QAAM,aAAa,OAAO,WAAW;AACrC,QAAM,IAAkB,OAAO,cAAc,IAAI,SAAS,MAAM,GAAG,IAAI;AACvE,QAAM,MAAM,GAAG,OAAO,SAAS,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI;AAExD,MAAI;AACJ,WAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AACtD,UAAM,aAAa,YAAY,QAAQ,SAAS;AAChD,UAAM,iBAAiB,KAAK,SAAS,YAAY,IAAI,CAAC,KAAK,QAAQ,UAAU,CAAC,IAAI;AAClF,QAAI;AACF,YAAM,UAAkC;AAAA,QACtC,gBAAgB;AAAA,QAChB,eAAe,UAAU,OAAO,MAAM;AAAA,QACtC,sBAAsB,OAAO;AAAA,QAC7B,yBAAyB;AAAA,MAC3B;AACA,UAAI,KAAK,eAAgB,SAAQ,iBAAiB,IAAI,KAAK;AAE3D,YAAM,MAAM,MAAM,EAAE,KAAK;AAAA,QACvB,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,IAAI;AAAA,QACzB,QAAQ;AAAA,MACV,CAAC;AACD,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,YAAY,IAAI,UAAU,OAAO,IAAI,WAAW,OAAO,IAAI,WAAW;AAC5E,YAAI,CAAC,aAAa,YAAY,YAAY;AACxC,gBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,gBAAM,IAAI,MAAM,iBAAiB,GAAG,YAAY,IAAI,MAAM,MAAM,KAAK,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,QACtF;AACA,cAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AACpD;AAAA,MACF;AACA,aAAQ,MAAM,IAAI,KAAK;AAAA,IACzB,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ,QAAS,OAAM;AAChC,kBAAY;AACZ,UAAI,YAAY,WAAY,OAAM;AAClC,YAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,IACtD;AAAA,EACF;AACA,QAAM,aAAa,IAAI,MAAM,iCAAiC;AAChE;AAEO,SAAS,mBAAmB,QAAoC;AACrE,SAAO;AAAA,IACL;AAAA,IACA,aAAa;AAAA,IAEb,MAAM,cAAc,OAAO,gBAAgB;AACzC,aAAO,KAAK,eAAe,CAAC,KAAK,GAAG,cAAc;AAAA,IACpD;AAAA,IAEA,MAAM,eAAe,QAAQ,gBAAgB;AAC3C,YAAM,OAA8B,EAAE,aAAa,qBAAqB,OAAO;AAC/E,aAAO,KAA4C,QAAQ,wBAAwB,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,aAAa,OAAO,gBAAgB;AACxC,YAAM,OAA4B,EAAE,aAAa,qBAAqB,MAAM;AAC5E,aAAO,KAA0C,QAAQ,qBAAqB,MAAM;AAAA,QAClF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/hosted/types.ts","../src/hosted/client.ts"],"sourcesContent":["/**\n * # Hosted-tier wire format — the schema that EVERY orchestrator (ours,\n * a partner's self-hosted one, a future open implementation) must accept.\n *\n * **Stability:** every type in this file is committed under semver. New\n * minors only ADD optional fields. Breaking changes mean a major bump\n * (`HostedWireVersion` literal increment).\n *\n * The wire format is two event streams in one transport:\n *\n * 1. **Eval-run events** (`POST /v1/ingest/eval-runs`). Posted when a\n * campaign / improvement-loop completes (or per-generation if\n * streaming). Carries the structured result + per-cell scores +\n * surface diffs the orchestrator stores for the dashboard.\n *\n * 2. **Trace spans** (`POST /v1/ingest/traces`). Standard OTLP-shaped\n * spans with a few additional attributes so the orchestrator can\n * pivot from eval-run → underlying execution. Compatible with any\n * OTel collector.\n *\n * Both endpoints are authenticated with a bearer token + a tenant id\n * header. Tenants isolate everything downstream of ingest; no tenant\n * ever sees another tenant's data.\n */\n\nimport type { GateDecision, MutableSurface } from '../campaign/types'\nimport type { InsightReport } from '../contract/insight-report'\n\n// re-export so wire-format consumers can import the optional payload type\n// from `@tangle-network/agent-eval/hosted` without reaching into /contract.\nexport type { InsightReport } from '../contract/insight-report'\n\nexport const HOSTED_WIRE_VERSION = '2026-05-26.v1' as const\nexport type HostedWireVersion = typeof HOSTED_WIRE_VERSION\n\n// ── Transport headers ───────────────────────────────────────────────\n\n/** Every ingest request carries these. */\nexport interface HostedIngestHeaders {\n /** Bearer token. The orchestrator validates against the tenant key. */\n authorization: `Bearer ${string}`\n /** Stable tenant id (the orchestrator-side primary key for the tenant). */\n 'x-tangle-tenant-id': string\n /** Wire-version pin so the server can reject incompatible payloads. */\n 'x-tangle-wire-version': HostedWireVersion\n /** Optional idempotency key for retry-safe ingest. */\n 'idempotency-key'?: string\n}\n\n// ── Eval-run event ──────────────────────────────────────────────────\n\n/** Lifecycle stages of an eval-run as the substrate reports them. */\nexport type EvalRunStatus =\n | 'started'\n | 'baseline-complete'\n | 'generation-complete'\n | 'gate-decided'\n | 'finished'\n | 'errored'\n\nexport interface EvalRunCellScore {\n /** Stable scenario id from the consumer's scenario set. */\n scenarioId: string\n /** Repetition index when reps > 1; 0 for the default. */\n rep: number\n /** Composite score across all judges + dimensions for this cell. */\n compositeMean: number\n /** Per-judge → per-dimension scores; null where the judge did not run. */\n dimensions: Record<string, Record<string, number>>\n /** Per-cell error message if the dispatch threw. Null on success. */\n errorMessage?: string\n}\n\nexport interface EvalRunGenerationSnapshot {\n /** Generation index. 0 is baseline. */\n index: number\n /** Candidate surface fingerprint (stable hash) — pivot key into the\n * trace stream to fetch the underlying execution. */\n surfaceHash: string\n /** The candidate surface itself. May be omitted to avoid PII when the\n * consumer prefers not to ship verbatim prompts. */\n surface?: MutableSurface\n /** Per-cell scores for this generation. */\n cells: EvalRunCellScore[]\n /** Aggregate composite mean across all cells in this generation. */\n compositeMean: number\n /** Total $ spent across this generation. */\n costUsd: number\n /** Wall-clock duration of this generation. */\n durationMs: number\n}\n\n/**\n * The top-level eval-run event. One ingest call per logical eval-run;\n * generations stream in incrementally via repeated calls with the same\n * `runId`. The orchestrator deduplicates by `(runId, generation.index)`.\n */\nexport interface EvalRunEvent {\n /** Stable run id (the substrate's `runId`). UUID or substrate-generated. */\n runId: string\n /** Where this run was happening — derived from `RunCampaignOptions.runDir`. */\n runDir: string\n /** ISO-8601 timestamp the substrate recorded the event. */\n timestamp: string\n /** Lifecycle stage this event represents. */\n status: EvalRunStatus\n /** Free-form consumer tags (env, branch, model id, etc.). Searchable. */\n labels: Record<string, string>\n /** Baseline campaign snapshot. Present when status >= baseline-complete. */\n baseline?: EvalRunGenerationSnapshot\n /** Per-generation snapshots. Streams in; orchestrator appends. */\n generations: EvalRunGenerationSnapshot[]\n /** Final gate decision. Present when status >= gate-decided. */\n gateDecision?: GateDecision\n /** Held-out lift = winner-on-holdout - baseline-on-holdout. */\n holdoutLift?: number\n /** Total $ spent across baseline + every generation. */\n totalCostUsd: number\n /** Total wall-clock duration. */\n totalDurationMs: number\n /** Error message if status === 'errored'. */\n errorMessage?: string\n /** Rigor packet emitted alongside the run — distributional summary,\n * paired-bootstrap lift CI, judge stats, inter-rater agreement,\n * contamination check, failure clusters (when an analyst is wired),\n * outcome correlation (when downstream signal is supplied), and the\n * recommendations the dashboard surfaces verbatim. Additive; older\n * clients that don't know about this field continue to work. */\n insightReport?: InsightReport\n}\n\n// ── Trace span event ────────────────────────────────────────────────\n\n/**\n * OTel-shape span with a few additional attributes for eval-run pivoting.\n * Compatible with any OTLP collector — `name`, `traceId`, `spanId`,\n * `startTimeUnixNano`, `endTimeUnixNano`, `attributes` are stock OTel.\n */\nexport interface TraceSpanEvent {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n startTimeUnixNano: number\n endTimeUnixNano: number\n attributes: Record<string, string | number | boolean>\n events?: Array<{\n timeUnixNano: number\n name: string\n attributes?: Record<string, string | number | boolean>\n }>\n status?: { code: 'OK' | 'ERROR' | 'UNSET'; message?: string }\n /** Pivot back into the eval-run stream. */\n 'tangle.runId'?: string\n /** Pivot to the specific generation. */\n 'tangle.generation'?: number\n /** Pivot to the specific cell. */\n 'tangle.cellId'?: string\n /** Pivot to the specific scenario. */\n 'tangle.scenarioId'?: string\n}\n\n// ── Ingest request bodies ───────────────────────────────────────────\n\nexport interface IngestEvalRunsRequest {\n wireVersion: HostedWireVersion\n events: EvalRunEvent[]\n}\n\nexport interface IngestTracesRequest {\n wireVersion: HostedWireVersion\n spans: TraceSpanEvent[]\n}\n\nexport interface IngestResponse {\n /** Accepted events / spans count. */\n accepted: number\n /** Rejected events with reasons (validation failures, dup idempotency key, etc.). */\n rejected: Array<{ index: number; reason: string }>\n}\n","/**\n * # Hosted-tier ingest client.\n *\n * Ships eval-run events + trace spans to any orchestrator (ours, a\n * partner's self-hosted one, or a future open implementation) that\n * speaks the wire format in `./types.ts`.\n *\n * Three modes:\n * - **Ours:** point at `https://orchestrator.tangle.tools/v1`. We\n * handle ingest + storage + dashboard.\n * - **Self-hosted:** point at whatever URL runs the reference receiver\n * from `examples/hosted-ingest-server/`.\n * - **Off (default):** when `hostedTenant` is unset, nothing is sent.\n * Everything stays local.\n */\n\nimport {\n type EvalRunEvent,\n HOSTED_WIRE_VERSION,\n type HostedWireVersion,\n type IngestEvalRunsRequest,\n type IngestResponse,\n type IngestTracesRequest,\n type TraceSpanEvent,\n} from './types'\n\nexport interface HostedTenant {\n /** Orchestrator endpoint base URL (no trailing slash). Required. */\n endpoint: string\n /** Bearer token issued by the orchestrator. Required. */\n apiKey: string\n /** Tenant id — the orchestrator's primary key for this consumer. Required. */\n tenantId: string\n /** Optional `fetch` override (auth wrappers, custom agent, test mocks). */\n fetchImpl?: typeof fetch\n /** Per-call timeout in ms. Default 30s. */\n timeoutMs?: number\n /** Retries on 5xx / network errors. Default 2. */\n retries?: number\n}\n\nexport interface HostedClient {\n ingestEvalRun(event: EvalRunEvent, idempotencyKey?: string): Promise<IngestResponse>\n ingestEvalRuns(events: EvalRunEvent[], idempotencyKey?: string): Promise<IngestResponse>\n ingestTraces(spans: TraceSpanEvent[], idempotencyKey?: string): Promise<IngestResponse>\n readonly tenant: HostedTenant\n readonly wireVersion: HostedWireVersion\n}\n\ninterface RequestOptions {\n idempotencyKey?: string\n signal?: AbortSignal\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => {\n const t = setTimeout(resolve, ms)\n if (typeof (t as { unref?: () => void }).unref === 'function')\n (t as { unref: () => void }).unref()\n })\n}\n\nasync function post<TReq, TRes>(\n tenant: HostedTenant,\n path: string,\n body: TReq,\n opts: RequestOptions = {},\n): Promise<TRes> {\n const timeoutMs = tenant.timeoutMs ?? 30_000\n const maxRetries = tenant.retries ?? 2\n const f: typeof fetch = tenant.fetchImpl ?? ((...args) => fetch(...args))\n const url = `${tenant.endpoint.replace(/\\/$/, '')}${path}`\n\n let lastError: unknown\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n const ourTimeout = AbortSignal.timeout(timeoutMs)\n const combinedSignal = opts.signal ? AbortSignal.any([opts.signal, ourTimeout]) : ourTimeout\n try {\n const headers: Record<string, string> = {\n 'content-type': 'application/json',\n authorization: `Bearer ${tenant.apiKey}`,\n 'x-tangle-tenant-id': tenant.tenantId,\n 'x-tangle-wire-version': HOSTED_WIRE_VERSION,\n }\n if (opts.idempotencyKey) headers['idempotency-key'] = opts.idempotencyKey\n\n const res = await f(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(body),\n signal: combinedSignal,\n })\n if (!res.ok) {\n const retryable = res.status >= 500 || res.status === 408 || res.status === 429\n if (!retryable || attempt === maxRetries) {\n const text = await res.text().catch(() => '')\n throw new Error(`hosted ingest ${url} failed (${res.status}): ${text.slice(0, 500)}`)\n }\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n continue\n }\n return (await res.json()) as TRes\n } catch (err) {\n if (opts.signal?.aborted) throw err\n lastError = err\n if (attempt === maxRetries) throw err\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n }\n }\n throw lastError ?? new Error('hosted ingest exhausted retries')\n}\n\nexport function createHostedClient(tenant: HostedTenant): HostedClient {\n return {\n tenant,\n wireVersion: HOSTED_WIRE_VERSION,\n\n async ingestEvalRun(event, idempotencyKey) {\n return this.ingestEvalRuns([event], idempotencyKey)\n },\n\n async ingestEvalRuns(events, idempotencyKey) {\n const body: IngestEvalRunsRequest = { wireVersion: HOSTED_WIRE_VERSION, events }\n return post<IngestEvalRunsRequest, IngestResponse>(tenant, '/v1/ingest/eval-runs', body, {\n idempotencyKey,\n })\n },\n\n async ingestTraces(spans, idempotencyKey) {\n const body: IngestTracesRequest = { wireVersion: HOSTED_WIRE_VERSION, spans }\n return post<IngestTracesRequest, IngestResponse>(tenant, '/v1/ingest/traces', body, {\n idempotencyKey,\n })\n },\n }\n}\n\n/**\n * Build a `HostedClient` from environment, or `undefined` when ingest is not\n * configured — the canonical, fail-soft wiring every product uses so eval-run +\n * trace provenance lands in the Intelligence dashboard with ONE call:\n *\n * const hosted = hostedClientFromEnv()\n * // ...run the loop...\n * await emitLoopProvenance({ ..., hostedClient: hosted }) // no-op if undefined\n *\n * Returns `undefined` (NOT an error) when any of endpoint / apiKey / tenantId is\n * missing — so a product wires the ship call unconditionally and it stays a\n * no-op until the env is set. Env precedence:\n * - endpoint: `TANGLE_INGEST_URL` → `TANGLE_ORCHESTRATOR_URL`\n * - apiKey: `TANGLE_INGEST_API_KEY` → `TANGLE_API_KEY`\n * - tenantId: `TANGLE_TENANT_ID`\n * A trailing slash on the endpoint is stripped. Pass `overrides` to supply any\n * field directly (e.g. a fixed `tenantId` per product) — overrides win over env.\n */\nexport function hostedClientFromEnv(\n overrides: Partial<HostedTenant> & { env?: Record<string, string | undefined> } = {},\n): HostedClient | undefined {\n const env = overrides.env ?? process.env\n const endpoint = (\n overrides.endpoint ??\n env.TANGLE_INGEST_URL ??\n env.TANGLE_ORCHESTRATOR_URL\n )?.trim()\n const apiKey = (overrides.apiKey ?? env.TANGLE_INGEST_API_KEY ?? env.TANGLE_API_KEY)?.trim()\n const tenantId = (overrides.tenantId ?? env.TANGLE_TENANT_ID)?.trim()\n if (!endpoint || !apiKey || !tenantId) return undefined\n const tenant: HostedTenant = { endpoint: endpoint.replace(/\\/+$/, ''), apiKey, tenantId }\n if (overrides.fetchImpl) tenant.fetchImpl = overrides.fetchImpl\n if (overrides.timeoutMs !== undefined) tenant.timeoutMs = overrides.timeoutMs\n if (overrides.retries !== undefined) tenant.retries = overrides.retries\n return createHostedClient(tenant)\n}\n"],"mappings":";AAgCO,IAAM,sBAAsB;;;ACsBnC,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,IAAI,WAAW,SAAS,EAAE;AAChC,QAAI,OAAQ,EAA6B,UAAU;AACjD,MAAC,EAA4B,MAAM;AAAA,EACvC,CAAC;AACH;AAEA,eAAe,KACb,QACA,MACA,MACA,OAAuB,CAAC,GACT;AACf,QAAM,YAAY,OAAO,aAAa;AACtC,QAAM,aAAa,OAAO,WAAW;AACrC,QAAM,IAAkB,OAAO,cAAc,IAAI,SAAS,MAAM,GAAG,IAAI;AACvE,QAAM,MAAM,GAAG,OAAO,SAAS,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI;AAExD,MAAI;AACJ,WAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AACtD,UAAM,aAAa,YAAY,QAAQ,SAAS;AAChD,UAAM,iBAAiB,KAAK,SAAS,YAAY,IAAI,CAAC,KAAK,QAAQ,UAAU,CAAC,IAAI;AAClF,QAAI;AACF,YAAM,UAAkC;AAAA,QACtC,gBAAgB;AAAA,QAChB,eAAe,UAAU,OAAO,MAAM;AAAA,QACtC,sBAAsB,OAAO;AAAA,QAC7B,yBAAyB;AAAA,MAC3B;AACA,UAAI,KAAK,eAAgB,SAAQ,iBAAiB,IAAI,KAAK;AAE3D,YAAM,MAAM,MAAM,EAAE,KAAK;AAAA,QACvB,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,IAAI;AAAA,QACzB,QAAQ;AAAA,MACV,CAAC;AACD,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,YAAY,IAAI,UAAU,OAAO,IAAI,WAAW,OAAO,IAAI,WAAW;AAC5E,YAAI,CAAC,aAAa,YAAY,YAAY;AACxC,gBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,gBAAM,IAAI,MAAM,iBAAiB,GAAG,YAAY,IAAI,MAAM,MAAM,KAAK,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,QACtF;AACA,cAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AACpD;AAAA,MACF;AACA,aAAQ,MAAM,IAAI,KAAK;AAAA,IACzB,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ,QAAS,OAAM;AAChC,kBAAY;AACZ,UAAI,YAAY,WAAY,OAAM;AAClC,YAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,IACtD;AAAA,EACF;AACA,QAAM,aAAa,IAAI,MAAM,iCAAiC;AAChE;AAEO,SAAS,mBAAmB,QAAoC;AACrE,SAAO;AAAA,IACL;AAAA,IACA,aAAa;AAAA,IAEb,MAAM,cAAc,OAAO,gBAAgB;AACzC,aAAO,KAAK,eAAe,CAAC,KAAK,GAAG,cAAc;AAAA,IACpD;AAAA,IAEA,MAAM,eAAe,QAAQ,gBAAgB;AAC3C,YAAM,OAA8B,EAAE,aAAa,qBAAqB,OAAO;AAC/E,aAAO,KAA4C,QAAQ,wBAAwB,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,aAAa,OAAO,gBAAgB;AACxC,YAAM,OAA4B,EAAE,aAAa,qBAAqB,MAAM;AAC5E,aAAO,KAA0C,QAAQ,qBAAqB,MAAM;AAAA,QAClF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAoBO,SAAS,oBACd,YAAkF,CAAC,GACzD;AAC1B,QAAM,MAAM,UAAU,OAAO,QAAQ;AACrC,QAAM,YACJ,UAAU,YACV,IAAI,qBACJ,IAAI,0BACH,KAAK;AACR,QAAM,UAAU,UAAU,UAAU,IAAI,yBAAyB,IAAI,iBAAiB,KAAK;AAC3F,QAAM,YAAY,UAAU,YAAY,IAAI,mBAAmB,KAAK;AACpE,MAAI,CAAC,YAAY,CAAC,UAAU,CAAC,SAAU,QAAO;AAC9C,QAAM,SAAuB,EAAE,UAAU,SAAS,QAAQ,QAAQ,EAAE,GAAG,QAAQ,SAAS;AACxF,MAAI,UAAU,UAAW,QAAO,YAAY,UAAU;AACtD,MAAI,UAAU,cAAc,OAAW,QAAO,YAAY,UAAU;AACpE,MAAI,UAAU,YAAY,OAAW,QAAO,UAAU,UAAU;AAChE,SAAO,mBAAmB,MAAM;AAClC;","names":[]}
@@ -1,10 +1,10 @@
1
1
  import { S as Scenario, M as MutableSurface, b as DispatchContext, a as JudgeConfig, I as ImprovementDriver, G as Gate } from '../types-c2R2kfmv.js';
2
2
  export { C as CampaignAggregates, c as CampaignArtifactWriter, d as CampaignCellResult, e as CampaignCostMeter, f as CampaignResult, g as CampaignTraceWriter, h as CodeSurface, D as Dispatch, i as GateContext, j as GateDecision, k as GateResult, l as GenerationCandidate, m as GenerationRecord, n as JudgeDimension, J as JudgeScore, o as Mutator, O as OptimizerConfig, p as SessionScript } from '../types-c2R2kfmv.js';
3
- import { C as CampaignStorage, L as LoopProvenanceRecord, R as RunImprovementLoopResult } from '../provenance-cUnovpWV.js';
4
- export { D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, a as RunCampaignOptions, b as RunEvalOptions, c as RunImprovementLoopOptions, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, r as runCampaign, k as runEval, l as runImprovementLoop } from '../provenance-cUnovpWV.js';
3
+ import { C as CampaignStorage, L as LoopProvenanceRecord, R as RunImprovementLoopResult } from '../provenance-lqyLpOYR.js';
4
+ export { D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, a as RunCampaignOptions, b as RunEvalOptions, c as RunImprovementLoopOptions, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, r as runCampaign, k as runEval, l as runImprovementLoop } from '../provenance-lqyLpOYR.js';
5
5
  export { D as DeploymentOutcome, F as FileSystemOutcomeStore, b as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore, O as OutcomeStore } from '../outcome-store-D6KWmYvj.js';
6
- import { a as HostedTenant, I as InsightReport, T as TraceSpanEvent } from '../index-GISRh500.js';
7
- export { F as FailureClusterInsight, b as InterRaterInsight, J as JudgeInsight, L as LiftInsight, O as OutcomeCorrelationInsight, R as Recommendation, c as ReleaseSummary, S as ScalarDistribution } from '../index-GISRh500.js';
6
+ import { a as HostedTenant, I as InsightReport, T as TraceSpanEvent } from '../index-CzhtwYBT.js';
7
+ export { F as FailureClusterInsight, b as InterRaterInsight, J as JudgeInsight, L as LiftInsight, O as OutcomeCorrelationInsight, R as Recommendation, c as ReleaseSummary, S as ScalarDistribution } from '../index-CzhtwYBT.js';
8
8
  import { R as RunRecord, b as RunSplitTag } from '../run-record-BgTFzO2r.js';
9
9
  import { A as AnalystRegistry } from '../registry-DPly4_hZ.js';
10
10
  import { a as DatasetScenario } from '../dataset-B2kL-fSM.js';
@@ -16,7 +16,7 @@ import {
16
16
  } from "../chunk-7TPYV2ER.js";
17
17
  import {
18
18
  createHostedClient
19
- } from "../chunk-FQK2CCIM.js";
19
+ } from "../chunk-HKINEDRZ.js";
20
20
  import {
21
21
  checkCanaries
22
22
  } from "../chunk-SHTXZ4O2.js";
@@ -1,4 +1,4 @@
1
- export { E as EvalRunCellScore, d as EvalRunEvent, e as EvalRunGenerationSnapshot, f as EvalRunStatus, g as HOSTED_WIRE_VERSION, H as HostedClient, h as HostedIngestHeaders, a as HostedTenant, i as HostedWireVersion, j as IngestEvalRunsRequest, k as IngestResponse, l as IngestTracesRequest, T as TraceSpanEvent, m as createHostedClient } from '../index-GISRh500.js';
1
+ export { E as EvalRunCellScore, d as EvalRunEvent, e as EvalRunGenerationSnapshot, f as EvalRunStatus, g as HOSTED_WIRE_VERSION, H as HostedClient, h as HostedIngestHeaders, a as HostedTenant, i as HostedWireVersion, j as IngestEvalRunsRequest, k as IngestResponse, l as IngestTracesRequest, T as TraceSpanEvent, m as createHostedClient, n as hostedClientFromEnv } from '../index-CzhtwYBT.js';
2
2
  import '../types-c2R2kfmv.js';
3
3
  import '../run-record-BgTFzO2r.js';
4
4
  import '../errors-Dwqw-T_m.js';
@@ -1,10 +1,12 @@
1
1
  import {
2
2
  HOSTED_WIRE_VERSION,
3
- createHostedClient
4
- } from "../chunk-FQK2CCIM.js";
3
+ createHostedClient,
4
+ hostedClientFromEnv
5
+ } from "../chunk-HKINEDRZ.js";
5
6
  import "../chunk-PZ5AY32C.js";
6
7
  export {
7
8
  HOSTED_WIRE_VERSION,
8
- createHostedClient
9
+ createHostedClient,
10
+ hostedClientFromEnv
9
11
  };
10
12
  //# sourceMappingURL=index.js.map
@@ -485,5 +485,26 @@ interface HostedClient {
485
485
  readonly wireVersion: HostedWireVersion;
486
486
  }
487
487
  declare function createHostedClient(tenant: HostedTenant): HostedClient;
488
+ /**
489
+ * Build a `HostedClient` from environment, or `undefined` when ingest is not
490
+ * configured — the canonical, fail-soft wiring every product uses so eval-run +
491
+ * trace provenance lands in the Intelligence dashboard with ONE call:
492
+ *
493
+ * const hosted = hostedClientFromEnv()
494
+ * // ...run the loop...
495
+ * await emitLoopProvenance({ ..., hostedClient: hosted }) // no-op if undefined
496
+ *
497
+ * Returns `undefined` (NOT an error) when any of endpoint / apiKey / tenantId is
498
+ * missing — so a product wires the ship call unconditionally and it stays a
499
+ * no-op until the env is set. Env precedence:
500
+ * - endpoint: `TANGLE_INGEST_URL` → `TANGLE_ORCHESTRATOR_URL`
501
+ * - apiKey: `TANGLE_INGEST_API_KEY` → `TANGLE_API_KEY`
502
+ * - tenantId: `TANGLE_TENANT_ID`
503
+ * A trailing slash on the endpoint is stripped. Pass `overrides` to supply any
504
+ * field directly (e.g. a fixed `tenantId` per product) — overrides win over env.
505
+ */
506
+ declare function hostedClientFromEnv(overrides?: Partial<HostedTenant> & {
507
+ env?: Record<string, string | undefined>;
508
+ }): HostedClient | undefined;
488
509
 
489
- export { type EvalRunCellScore as E, type FailureClusterInsight as F, type HostedClient as H, type InsightReport as I, type JudgeInsight as J, type LiftInsight as L, type OutcomeCorrelationInsight as O, type Recommendation as R, type ScalarDistribution as S, type TraceSpanEvent as T, type HostedTenant as a, type InterRaterInsight as b, type ReleaseSummary as c, type EvalRunEvent as d, type EvalRunGenerationSnapshot as e, type EvalRunStatus as f, HOSTED_WIRE_VERSION as g, type HostedIngestHeaders as h, type HostedWireVersion as i, type IngestEvalRunsRequest as j, type IngestResponse as k, type IngestTracesRequest as l, createHostedClient as m };
510
+ export { type EvalRunCellScore as E, type FailureClusterInsight as F, type HostedClient as H, type InsightReport as I, type JudgeInsight as J, type LiftInsight as L, type OutcomeCorrelationInsight as O, type Recommendation as R, type ScalarDistribution as S, type TraceSpanEvent as T, type HostedTenant as a, type InterRaterInsight as b, type ReleaseSummary as c, type EvalRunEvent as d, type EvalRunGenerationSnapshot as e, type EvalRunStatus as f, HOSTED_WIRE_VERSION as g, type HostedIngestHeaders as h, type HostedWireVersion as i, type IngestEvalRunsRequest as j, type IngestResponse as k, type IngestTracesRequest as l, createHostedClient as m, hostedClientFromEnv as n };
package/dist/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "@tangle-network/agent-eval — wire protocol",
5
- "version": "0.63.0",
5
+ "version": "0.64.0",
6
6
  "description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
7
7
  "contact": {
8
8
  "name": "Tangle Network",
@@ -2,7 +2,7 @@ import { S as Scenario, f as CampaignResult, k as GateResult, o as Mutator, I as
2
2
  import { L as LlmClientOptions } from './llm-client-DbjLfz-K.js';
3
3
  import { R as RedTeamCase } from './red-team-DW9Ca_tj.js';
4
4
  import { R as RunRecord } from './run-record-BgTFzO2r.js';
5
- import { H as HostedClient, T as TraceSpanEvent } from './index-GISRh500.js';
5
+ import { H as HostedClient, T as TraceSpanEvent } from './index-CzhtwYBT.js';
6
6
 
7
7
  /**
8
8
  * @experimental
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-eval",
3
- "version": "0.63.0",
3
+ "version": "0.64.0",
4
4
  "description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
5
5
  "homepage": "https://github.com/tangle-network/agent-eval#readme",
6
6
  "repository": {