@tangle-network/agent-eval 0.72.0 → 0.72.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +39 -0
  2. package/dist/adapters/http.d.ts +1 -1
  3. package/dist/adapters/langchain.d.ts +1 -1
  4. package/dist/adapters/otel.d.ts +3 -2
  5. package/dist/agent-profile-DYRboYWu.d.ts +364 -0
  6. package/dist/analyst/index.d.ts +221 -0
  7. package/dist/analyst/index.js +371 -0
  8. package/dist/analyst/index.js.map +1 -0
  9. package/dist/analyst-t7zZS3TV.d.ts +88 -0
  10. package/dist/campaign/index.d.ts +485 -9
  11. package/dist/campaign/index.js +597 -22
  12. package/dist/campaign/index.js.map +1 -1
  13. package/dist/chunk-7W4SM7FD.js +1075 -0
  14. package/dist/chunk-7W4SM7FD.js.map +1 -0
  15. package/dist/{chunk-AIWHLG7J.js → chunk-GJJNJVIR.js} +11 -11
  16. package/dist/chunk-JHA3ZGSO.js +1496 -0
  17. package/dist/chunk-JHA3ZGSO.js.map +1 -0
  18. package/dist/{chunk-4QJN7RDX.js → chunk-JYE3WOTE.js} +55 -7
  19. package/dist/{chunk-4QJN7RDX.js.map → chunk-JYE3WOTE.js.map} +1 -1
  20. package/dist/chunk-LB2UOI5F.js +412 -0
  21. package/dist/chunk-LB2UOI5F.js.map +1 -0
  22. package/dist/{chunk-ODGETRTM.js → chunk-VUINJM5M.js} +234 -1415
  23. package/dist/chunk-VUINJM5M.js.map +1 -0
  24. package/dist/chunk-WYIHD6EB.js +1044 -0
  25. package/dist/chunk-WYIHD6EB.js.map +1 -0
  26. package/dist/{chunk-UD6EF73X.js → chunk-XPILG2CA.js} +119 -2
  27. package/dist/chunk-XPILG2CA.js.map +1 -0
  28. package/dist/contract/index.d.ts +17 -13
  29. package/dist/contract/index.js +13 -7
  30. package/dist/contract/index.js.map +1 -1
  31. package/dist/{control-DxvZeV5X.d.ts → control-BgA6BYTm.d.ts} +1 -1
  32. package/dist/control.d.ts +2 -2
  33. package/dist/{feedback-trajectory-8hKC5EOb.d.ts → feedback-trajectory-B3rErRsh.d.ts} +1 -1
  34. package/dist/harness-optimizer-EnEnQPsr.d.ts +106 -0
  35. package/dist/hosted/index.d.ts +223 -2
  36. package/dist/index.d.ts +49 -1323
  37. package/dist/index.js +353 -2496
  38. package/dist/index.js.map +1 -1
  39. package/dist/{index-BGBrVS24.d.ts → insight-report-Df3lxYXM.d.ts} +1 -221
  40. package/dist/kind-factory-DW9XWPvM.d.ts +172 -0
  41. package/dist/multi-layer-verifier-DlWCXuxL.d.ts +141 -0
  42. package/dist/openapi.json +1 -1
  43. package/dist/pareto-E-pembql.d.ts +81 -0
  44. package/dist/{provenance-C69gLUXH.d.ts → provenance-B-TFszPW.d.ts} +131 -4
  45. package/dist/redact-B40YG2M_.d.ts +45 -0
  46. package/dist/registry-DuVYiTvw.d.ts +128 -0
  47. package/dist/{researcher-WJvIpX3L.d.ts → researcher-C_KJyIGg.d.ts} +1 -141
  48. package/dist/rl.d.ts +4 -3
  49. package/dist/rl.js +4 -4
  50. package/dist/run-critic-BAIjX99r.d.ts +56 -0
  51. package/dist/{run-improvement-loop-Bzamo6GB.d.ts → run-improvement-loop-BqYH2vCR.d.ts} +25 -1
  52. package/dist/semantic-concept-judge-CV9Wlx4t.d.ts +650 -0
  53. package/dist/{store-jzKpMl16.d.ts → store-GmBE2pZZ.d.ts} +1 -1
  54. package/dist/traces.d.ts +371 -308
  55. package/dist/traces.js +43 -18
  56. package/dist/{types-CnmZ2bkP.d.ts → types-Bba0vl1V.d.ts} +1 -1
  57. package/dist/{registry-BGKyX6bw.d.ts → types-CRD68aH7.d.ts} +3 -128
  58. package/dist/wire/index.d.ts +1 -1
  59. package/dist/workflow/index.d.ts +494 -0
  60. package/dist/workflow/index.js +2177 -0
  61. package/dist/workflow/index.js.map +1 -0
  62. package/docs/design/self-improvement-roadmap.md +106 -0
  63. package/package.json +36 -12
  64. package/dist/agent-profile-DzcPHR1Z.d.ts +0 -114
  65. package/dist/chunk-ODGETRTM.js.map +0 -1
  66. package/dist/chunk-SL55X4VN.js +0 -186
  67. package/dist/chunk-SL55X4VN.js.map +0 -1
  68. package/dist/chunk-UD6EF73X.js.map +0 -1
  69. /package/dist/{chunk-AIWHLG7J.js.map → chunk-GJJNJVIR.js.map} +0 -0
@@ -1,4 +1,4 @@
1
- import { a as FeedbackLabel, p as ProposedSideEffect } from './feedback-trajectory-8hKC5EOb.js';
1
+ import { b as FeedbackLabel, p as ProposedSideEffect } from './feedback-trajectory-B3rErRsh.js';
2
2
  import { C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig } from './control-runtime-DuFBYg7A.js';
3
3
  import { T as TraceEmitter } from './emitter-DEZwY14K.js';
4
4
  import { F as FailureClass } from './schema-m0gsnbt3.js';
package/dist/control.d.ts CHANGED
@@ -1,6 +1,6 @@
1
- export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DxvZeV5X.js';
1
+ export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-BgA6BYTm.js';
2
2
  export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-DuFBYg7A.js';
3
- import './feedback-trajectory-8hKC5EOb.js';
3
+ import './feedback-trajectory-B3rErRsh.js';
4
4
  import './dataset-B2kL-fSM.js';
5
5
  import './errors-Dwqw-T_m.js';
6
6
  import './emitter-DEZwY14K.js';
@@ -166,4 +166,4 @@ declare function controlRunToFeedbackTrajectory<TState, TAction, TActionResult>(
166
166
  createdAt?: string;
167
167
  }): FeedbackTrajectory;
168
168
 
169
- export { replayFeedbackTrajectory as A, serializeFeedbackTrajectoriesJsonl as B, summarizePreferenceMemory as C, withAssignedFeedbackSplit as D, type FeedbackTrajectoryStore as F, InMemoryFeedbackTrajectoryStore as I, type PreferenceMemoryEntry as P, type FeedbackLabel as a, type FeedbackTrajectory as b, type FeedbackArtifactType as c, type FeedbackAttempt as d, type FeedbackLabelKind as e, type FeedbackLabelSource as f, type FeedbackOptimizerRow as g, type FeedbackOutcome as h, type FeedbackReplayAdapter as i, type FeedbackReplayResult as j, type FeedbackSeverity as k, type FeedbackSplitPolicy as l, type FeedbackTask as m, type FeedbackTrajectoryFilter as n, FileSystemFeedbackTrajectoryStore as o, type ProposedSideEffect as p, assignFeedbackSplit as q, controlRunToFeedbackTrajectory as r, createFeedbackTrajectory as s, feedbackTrajectoriesToDatasetScenarios as t, feedbackTrajectoriesToOptimizerRows as u, feedbackTrajectoryToDatasetScenario as v, feedbackTrajectoryToOptimizerRow as w, parseFeedbackTrajectoriesJsonl as x, renderPreferenceMemoryMarkdown as y, replayFeedbackTrajectories as z };
169
+ export { replayFeedbackTrajectory as A, serializeFeedbackTrajectoriesJsonl as B, summarizePreferenceMemory as C, withAssignedFeedbackSplit as D, type FeedbackTrajectoryStore as F, InMemoryFeedbackTrajectoryStore as I, type PreferenceMemoryEntry as P, type FeedbackTrajectory as a, type FeedbackLabel as b, type FeedbackArtifactType as c, type FeedbackAttempt as d, type FeedbackLabelKind as e, type FeedbackLabelSource as f, type FeedbackOptimizerRow as g, type FeedbackOutcome as h, type FeedbackReplayAdapter as i, type FeedbackReplayResult as j, type FeedbackSeverity as k, type FeedbackSplitPolicy as l, type FeedbackTask as m, type FeedbackTrajectoryFilter as n, FileSystemFeedbackTrajectoryStore as o, type ProposedSideEffect as p, assignFeedbackSplit as q, controlRunToFeedbackTrajectory as r, createFeedbackTrajectory as s, feedbackTrajectoriesToDatasetScenarios as t, feedbackTrajectoriesToOptimizerRows as u, feedbackTrajectoryToDatasetScenario as v, feedbackTrajectoryToOptimizerRow as w, parseFeedbackTrajectoriesJsonl as x, renderPreferenceMemoryMarkdown as y, replayFeedbackTrajectories as z };
@@ -0,0 +1,106 @@
1
+ import { O as Objective, P as ParetoResult } from './pareto-E-pembql.js';
2
+ import { R as RunScore, a as RunTrace, b as RunScoreWeights } from './run-critic-BAIjX99r.js';
3
+
4
+ interface SteeringRolePrompt {
5
+ system?: string;
6
+ append?: string;
7
+ }
8
+ interface SteeringBundle {
9
+ id: string;
10
+ coderPrompt?: string;
11
+ continuePrompt?: string;
12
+ reviewerPrompts?: Record<string, string>;
13
+ skills?: string[];
14
+ rolePrompts?: Record<string, SteeringRolePrompt>;
15
+ metadata?: Record<string, unknown>;
16
+ }
17
+ interface SteeringDelta {
18
+ coderPrompt?: string;
19
+ continuePrompt?: string;
20
+ reviewerPrompts?: Record<string, string>;
21
+ skills?: string[];
22
+ rolePrompts?: Record<string, SteeringRolePrompt>;
23
+ metadata?: Record<string, unknown>;
24
+ }
25
+ declare function mergeSteeringBundle(base: SteeringBundle, delta: SteeringDelta): SteeringBundle;
26
+ declare function renderSteeringText(bundle: SteeringBundle): string;
27
+
28
+ type HarnessIntervention = 'continue' | 'plan' | 'audit' | 'recover' | 'repair' | 'verify' | 'final_gate' | 'wait_for_measurement' | 'abort';
29
+ interface WorkflowTopology {
30
+ id: string;
31
+ interventions: HarnessIntervention[];
32
+ maxParallelBranches?: number;
33
+ metadata?: Record<string, unknown>;
34
+ }
35
+ interface MeasurementPolicy {
36
+ required: string[];
37
+ optional?: string[];
38
+ promoteOn?: Array<keyof RunScore | 'aggregate'>;
39
+ }
40
+ interface HarnessVariant {
41
+ id: string;
42
+ steering?: SteeringBundle;
43
+ topology?: WorkflowTopology;
44
+ measurement?: MeasurementPolicy;
45
+ budgets?: Record<string, number>;
46
+ models?: Record<string, string>;
47
+ reviewers?: Record<string, string>;
48
+ metadata?: Record<string, unknown>;
49
+ }
50
+ interface HarnessScenario {
51
+ id: string;
52
+ task: string;
53
+ split?: 'train' | 'validation' | 'test' | string;
54
+ metadata?: Record<string, unknown>;
55
+ }
56
+ interface HarnessRunRequest {
57
+ variant: HarnessVariant;
58
+ scenario: HarnessScenario;
59
+ trialIndex: number;
60
+ }
61
+ interface HarnessAdapter {
62
+ run(request: HarnessRunRequest): Promise<RunTrace>;
63
+ }
64
+ interface HarnessRunResult {
65
+ variant: HarnessVariant;
66
+ scenario: HarnessScenario;
67
+ trialIndex: number;
68
+ trace: RunTrace;
69
+ score: RunScore;
70
+ aggregate: number;
71
+ }
72
+ interface HarnessVariantReport {
73
+ variant: HarnessVariant;
74
+ runs: HarnessRunResult[];
75
+ aggregateMean: number;
76
+ passRate: number;
77
+ costUsdMean: number;
78
+ wallSecondsMean: number;
79
+ scoreMean: RunScore;
80
+ }
81
+ interface HarnessSelection {
82
+ winner: HarnessVariantReport;
83
+ frontier: ParetoResult<HarnessVariantReport>;
84
+ reports: HarnessVariantReport[];
85
+ }
86
+ interface HarnessExperimentResult {
87
+ results: HarnessRunResult[];
88
+ selection: HarnessSelection;
89
+ }
90
+ interface HarnessExperimentConfig {
91
+ adapter: HarnessAdapter;
92
+ variants: HarnessVariant[];
93
+ scenarios: HarnessScenario[];
94
+ trialsPerScenario?: number;
95
+ parallelism?: number;
96
+ weights?: Partial<RunScoreWeights>;
97
+ objectives?: Array<Objective<HarnessVariantReport>>;
98
+ score?: (trace: RunTrace, request: HarnessRunRequest) => RunScore | Promise<RunScore>;
99
+ onResult?: (result: HarnessRunResult) => void | Promise<void>;
100
+ }
101
+ declare const DEFAULT_HARNESS_OBJECTIVES: Array<Objective<HarnessVariantReport>>;
102
+ declare function runHarnessExperiment(config: HarnessExperimentConfig): Promise<HarnessExperimentResult>;
103
+ declare function selectHarnessVariant(results: HarnessRunResult[], objectives?: Array<Objective<HarnessVariantReport>>): HarnessSelection;
104
+ declare function summarizeHarnessResults(results: HarnessRunResult[]): HarnessVariantReport[];
105
+
106
+ export { DEFAULT_HARNESS_OBJECTIVES as D, type HarnessAdapter as H, type MeasurementPolicy as M, type SteeringBundle as S, type WorkflowTopology as W, type HarnessExperimentConfig as a, type HarnessExperimentResult as b, type HarnessIntervention as c, type HarnessRunRequest as d, type HarnessRunResult as e, type HarnessScenario as f, type HarnessSelection as g, type HarnessVariant as h, type HarnessVariantReport as i, type SteeringDelta as j, type SteeringRolePrompt as k, runHarnessExperiment as l, mergeSteeringBundle as m, summarizeHarnessResults as n, renderSteeringText as r, selectHarnessVariant as s };
@@ -1,5 +1,5 @@
1
- export { E as EvalRunCellScore, d as EvalRunEvent, e as EvalRunGenerationSnapshot, f as EvalRunStatus, g as HOSTED_WIRE_VERSION, H as HostedClient, h as HostedIngestHeaders, a as HostedTenant, i as HostedWireVersion, j as IngestEvalRunsRequest, k as IngestResponse, l as IngestTracesRequest, T as TraceSpanEvent, m as createHostedClient, n as hostedClientFromEnv } from '../index-BGBrVS24.js';
2
- import '../types-CnmZ2bkP.js';
1
+ import { M as MutableSurface, j as GateDecision } from '../types-Bba0vl1V.js';
2
+ import { I as InsightReport } from '../insight-report-Df3lxYXM.js';
3
3
  import '../run-record-BgTFzO2r.js';
4
4
  import '../errors-Dwqw-T_m.js';
5
5
  import '../schema-m0gsnbt3.js';
@@ -7,3 +7,224 @@ import '../summary-report-ByiOUrHj.js';
7
7
  import '../failure-cluster-CL7IVgkJ.js';
8
8
  import '../store-CKUAgsJz.js';
9
9
  import '../judge-calibration-DilmB3Ml.js';
10
+
11
+ /**
12
+ * # Hosted-tier wire format — the schema that EVERY orchestrator (ours,
13
+ * a partner's self-hosted one, a future open implementation) must accept.
14
+ *
15
+ * **Stability:** every type in this file is committed under semver. New
16
+ * minors only ADD optional fields. Breaking changes mean a major bump
17
+ * (`HostedWireVersion` literal increment).
18
+ *
19
+ * The wire format is two event streams in one transport:
20
+ *
21
+ * 1. **Eval-run events** (`POST /v1/ingest/eval-runs`). Posted when a
22
+ * campaign / improvement-loop completes (or per-generation if
23
+ * streaming). Carries the structured result + per-cell scores +
24
+ * surface diffs the orchestrator stores for the dashboard.
25
+ *
26
+ * 2. **Trace spans** (`POST /v1/ingest/traces`). Standard OTLP-shaped
27
+ * spans with a few additional attributes so the orchestrator can
28
+ * pivot from eval-run → underlying execution. Compatible with any
29
+ * OTel collector.
30
+ *
31
+ * Both endpoints are authenticated with a bearer token + a tenant id
32
+ * header. Tenants isolate everything downstream of ingest; no tenant
33
+ * ever sees another tenant's data.
34
+ */
35
+
36
+ declare const HOSTED_WIRE_VERSION: "2026-05-26.v1";
37
+ type HostedWireVersion = typeof HOSTED_WIRE_VERSION;
38
+ /** Every ingest request carries these. */
39
+ interface HostedIngestHeaders {
40
+ /** Bearer token. The orchestrator validates against the tenant key. */
41
+ authorization: `Bearer ${string}`;
42
+ /** Stable tenant id (the orchestrator-side primary key for the tenant). */
43
+ 'x-tangle-tenant-id': string;
44
+ /** Wire-version pin so the server can reject incompatible payloads. */
45
+ 'x-tangle-wire-version': HostedWireVersion;
46
+ /** Optional idempotency key for retry-safe ingest. */
47
+ 'idempotency-key'?: string;
48
+ }
49
+ /** Lifecycle stages of an eval-run as the substrate reports them. */
50
+ type EvalRunStatus = 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
51
+ interface EvalRunCellScore {
52
+ /** Stable scenario id from the consumer's scenario set. */
53
+ scenarioId: string;
54
+ /** Repetition index when reps > 1; 0 for the default. */
55
+ rep: number;
56
+ /** Composite score across all judges + dimensions for this cell. */
57
+ compositeMean: number;
58
+ /** Per-judge → per-dimension scores; null where the judge did not run. */
59
+ dimensions: Record<string, Record<string, number>>;
60
+ /** Per-cell error message if the dispatch threw. Null on success. */
61
+ errorMessage?: string;
62
+ }
63
+ interface EvalRunGenerationSnapshot {
64
+ /** Generation index. 0 is baseline. */
65
+ index: number;
66
+ /** Candidate surface fingerprint (stable hash) — pivot key into the
67
+ * trace stream to fetch the underlying execution. */
68
+ surfaceHash: string;
69
+ /** The candidate surface itself. May be omitted to avoid PII when the
70
+ * consumer prefers not to ship verbatim prompts. */
71
+ surface?: MutableSurface;
72
+ /** Per-cell scores for this generation. */
73
+ cells: EvalRunCellScore[];
74
+ /** Aggregate composite mean across all cells in this generation. */
75
+ compositeMean: number;
76
+ /** Total $ spent across this generation. */
77
+ costUsd: number;
78
+ /** Wall-clock duration of this generation. */
79
+ durationMs: number;
80
+ }
81
+ /**
82
+ * The top-level eval-run event. One ingest call per logical eval-run;
83
+ * generations stream in incrementally via repeated calls with the same
84
+ * `runId`. The orchestrator deduplicates by `(runId, generation.index)`.
85
+ */
86
+ interface EvalRunEvent {
87
+ /** Stable run id (the substrate's `runId`). UUID or substrate-generated. */
88
+ runId: string;
89
+ /** Where this run was happening — derived from `RunCampaignOptions.runDir`. */
90
+ runDir: string;
91
+ /** ISO-8601 timestamp the substrate recorded the event. */
92
+ timestamp: string;
93
+ /** Lifecycle stage this event represents. */
94
+ status: EvalRunStatus;
95
+ /** Free-form consumer tags (env, branch, model id, etc.). Searchable. */
96
+ labels: Record<string, string>;
97
+ /** Baseline campaign snapshot. Present when status >= baseline-complete. */
98
+ baseline?: EvalRunGenerationSnapshot;
99
+ /** Per-generation snapshots. Streams in; orchestrator appends. */
100
+ generations: EvalRunGenerationSnapshot[];
101
+ /** Final gate decision. Present when status >= gate-decided. */
102
+ gateDecision?: GateDecision;
103
+ /** Held-out lift = winner-on-holdout - baseline-on-holdout. */
104
+ holdoutLift?: number;
105
+ /** Total $ spent across baseline + every generation. */
106
+ totalCostUsd: number;
107
+ /** Total wall-clock duration. */
108
+ totalDurationMs: number;
109
+ /** Error message if status === 'errored'. */
110
+ errorMessage?: string;
111
+ /** Rigor packet emitted alongside the run — distributional summary,
112
+ * paired-bootstrap lift CI, judge stats, inter-rater agreement,
113
+ * contamination check, failure clusters (when an analyst is wired),
114
+ * outcome correlation (when downstream signal is supplied), and the
115
+ * recommendations the dashboard surfaces verbatim. Additive; older
116
+ * clients that don't know about this field continue to work. */
117
+ insightReport?: InsightReport;
118
+ }
119
+ /**
120
+ * OTel-shape span with a few additional attributes for eval-run pivoting.
121
+ * Compatible with any OTLP collector — `name`, `traceId`, `spanId`,
122
+ * `startTimeUnixNano`, `endTimeUnixNano`, `attributes` are stock OTel.
123
+ */
124
+ interface TraceSpanEvent {
125
+ traceId: string;
126
+ spanId: string;
127
+ parentSpanId?: string;
128
+ name: string;
129
+ startTimeUnixNano: number;
130
+ endTimeUnixNano: number;
131
+ attributes: Record<string, string | number | boolean>;
132
+ events?: Array<{
133
+ timeUnixNano: number;
134
+ name: string;
135
+ attributes?: Record<string, string | number | boolean>;
136
+ }>;
137
+ status?: {
138
+ code: 'OK' | 'ERROR' | 'UNSET';
139
+ message?: string;
140
+ };
141
+ /** Pivot back into the eval-run stream. */
142
+ 'tangle.runId'?: string;
143
+ /** Pivot to the specific generation. */
144
+ 'tangle.generation'?: number;
145
+ /** Pivot to the specific cell. */
146
+ 'tangle.cellId'?: string;
147
+ /** Pivot to the specific scenario. */
148
+ 'tangle.scenarioId'?: string;
149
+ }
150
+ interface IngestEvalRunsRequest {
151
+ wireVersion: HostedWireVersion;
152
+ events: EvalRunEvent[];
153
+ }
154
+ interface IngestTracesRequest {
155
+ wireVersion: HostedWireVersion;
156
+ spans: TraceSpanEvent[];
157
+ }
158
+ interface IngestResponse {
159
+ /** Accepted events / spans count. */
160
+ accepted: number;
161
+ /** Rejected events with reasons (validation failures, dup idempotency key, etc.). */
162
+ rejected: Array<{
163
+ index: number;
164
+ reason: string;
165
+ }>;
166
+ }
167
+
168
+ /**
169
+ * # Hosted-tier ingest client.
170
+ *
171
+ * Ships eval-run events + trace spans to any orchestrator (ours, a
172
+ * partner's self-hosted one, or a future open implementation) that
173
+ * speaks the wire format in `./types.ts`.
174
+ *
175
+ * Three modes:
176
+ * - **Ours:** point at `https://orchestrator.tangle.tools` (the host root —
177
+ * the client appends the versioned `/v1/ingest/...` path itself; a trailing
178
+ * `/v1` on the endpoint is tolerated and normalized away). We handle ingest
179
+ * + storage + dashboard.
180
+ * - **Self-hosted:** point at whatever URL runs the reference receiver
181
+ * from `examples/hosted-ingest-server/`.
182
+ * - **Off (default):** when `hostedTenant` is unset, nothing is sent.
183
+ * Everything stays local.
184
+ */
185
+
186
+ interface HostedTenant {
187
+ /** Orchestrator endpoint base URL (no trailing slash). Required. */
188
+ endpoint: string;
189
+ /** Bearer token issued by the orchestrator. Required. */
190
+ apiKey: string;
191
+ /** Tenant id — the orchestrator's primary key for this consumer. Required. */
192
+ tenantId: string;
193
+ /** Optional `fetch` override (auth wrappers, custom agent, test mocks). */
194
+ fetchImpl?: typeof fetch;
195
+ /** Per-call timeout in ms. Default 30s. */
196
+ timeoutMs?: number;
197
+ /** Retries on 5xx / network errors. Default 2. */
198
+ retries?: number;
199
+ }
200
+ interface HostedClient {
201
+ ingestEvalRun(event: EvalRunEvent, idempotencyKey?: string): Promise<IngestResponse>;
202
+ ingestEvalRuns(events: EvalRunEvent[], idempotencyKey?: string): Promise<IngestResponse>;
203
+ ingestTraces(spans: TraceSpanEvent[], idempotencyKey?: string): Promise<IngestResponse>;
204
+ readonly tenant: HostedTenant;
205
+ readonly wireVersion: HostedWireVersion;
206
+ }
207
+ declare function createHostedClient(tenant: HostedTenant): HostedClient;
208
+ /**
209
+ * Build a `HostedClient` from environment, or `undefined` when ingest is not
210
+ * configured — the canonical, fail-soft wiring every product uses so eval-run +
211
+ * trace provenance lands in the Intelligence dashboard with ONE call:
212
+ *
213
+ * const hosted = hostedClientFromEnv()
214
+ * // ...run the loop...
215
+ * await emitLoopProvenance({ ..., hostedClient: hosted }) // no-op if undefined
216
+ *
217
+ * Returns `undefined` (NOT an error) when any of endpoint / apiKey / tenantId is
218
+ * missing — so a product wires the ship call unconditionally and it stays a
219
+ * no-op until the env is set. Env precedence:
220
+ * - endpoint: `TANGLE_INGEST_URL` → `TANGLE_ORCHESTRATOR_URL`
221
+ * - apiKey: `TANGLE_INGEST_API_KEY` → `TANGLE_API_KEY`
222
+ * - tenantId: `TANGLE_TENANT_ID`
223
+ * A trailing slash on the endpoint is stripped. Pass `overrides` to supply any
224
+ * field directly (e.g. a fixed `tenantId` per product) — overrides win over env.
225
+ */
226
+ declare function hostedClientFromEnv(overrides?: Partial<HostedTenant> & {
227
+ env?: Record<string, string | undefined>;
228
+ }): HostedClient | undefined;
229
+
230
+ export { type EvalRunCellScore, type EvalRunEvent, type EvalRunGenerationSnapshot, type EvalRunStatus, HOSTED_WIRE_VERSION, type HostedClient, type HostedIngestHeaders, type HostedTenant, type HostedWireVersion, type IngestEvalRunsRequest, type IngestResponse, type IngestTracesRequest, type TraceSpanEvent, createHostedClient, hostedClientFromEnv };