@tangle-network/agent-runtime 0.48.0 → 0.50.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -15
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +1 -1
- package/dist/analyst-loop.d.ts +1 -1
- package/dist/{chunk-656G2XCL.js → chunk-BKAIVNFA.js} +3 -3
- package/dist/{chunk-IW2LMLK6.js → chunk-CM2IK7VS.js} +913 -152
- package/dist/chunk-CM2IK7VS.js.map +1 -0
- package/dist/{chunk-VR4JIC5H.js → chunk-ML4IXGTV.js} +2 -2
- package/dist/{chunk-TJS7S3HJ.js → chunk-NDM5VXZW.js} +19 -8
- package/dist/chunk-NDM5VXZW.js.map +1 -0
- package/dist/chunk-OM3YNZIW.js +978 -0
- package/dist/chunk-OM3YNZIW.js.map +1 -0
- package/dist/{chunk-JNPK46YH.js → chunk-RHW75JW5.js} +498 -350
- package/dist/chunk-RHW75JW5.js.map +1 -0
- package/dist/{coder-CVZNGbyg.d.ts → coder-_YCf3BAK.d.ts} +2 -2
- package/dist/{driver-DYU2sgHr.d.ts → driver-DLI1io57.d.ts} +1 -1
- package/dist/index.d.ts +34 -9
- package/dist/index.js +117 -27
- package/dist/index.js.map +1 -1
- package/dist/kb-gate-CHAyt4aI.d.ts +1571 -0
- package/dist/{loop-runner-bin-DEm4roYF.d.ts → loop-runner-bin-DFUNgpeK.d.ts} +4 -4
- package/dist/loop-runner-bin.d.ts +5 -5
- package/dist/loop-runner-bin.js +3 -3
- package/dist/loops.d.ts +6 -6
- package/dist/loops.js +17 -1
- package/dist/mcp/bin.js +206 -29
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +41 -177
- package/dist/mcp/index.js +40 -6
- package/dist/mcp/index.js.map +1 -1
- package/dist/openai-tools-D4HLDWgw.d.ts +45 -0
- package/dist/platform.js +2 -2
- package/dist/platform.js.map +1 -1
- package/dist/profiles.d.ts +2 -2
- package/dist/{run-loop-DvD4aGiE.d.ts → run-loop-BIineL1T.d.ts} +1 -1
- package/dist/runtime.d.ts +403 -24
- package/dist/runtime.js +17 -1
- package/dist/{types-BpDfCPUp.d.ts → types-5MGt5KTY.d.ts} +1 -1
- package/dist/{types-nBMuollC.d.ts → types-BEQsBhOE.d.ts} +1 -1
- package/dist/workflow.d.ts +2 -2
- package/dist/workflow.js +1 -1
- package/package.json +6 -5
- package/dist/chunk-IW2LMLK6.js.map +0 -1
- package/dist/chunk-JNPK46YH.js.map +0 -1
- package/dist/chunk-LX66I3SC.js +0 -218
- package/dist/chunk-LX66I3SC.js.map +0 -1
- package/dist/chunk-TJS7S3HJ.js.map +0 -1
- package/dist/kb-gate-51BlLlVM.d.ts +0 -529
- package/dist/otel-export-EzfsVUhh.d.ts +0 -191
- /package/dist/{chunk-656G2XCL.js.map → chunk-BKAIVNFA.js.map} +0 -0
- /package/dist/{chunk-VR4JIC5H.js.map → chunk-ML4IXGTV.js.map} +0 -0
|
@@ -0,0 +1,1571 @@
|
|
|
1
|
+
import { C as CoderOutput, b as CoderTask } from './coder-_YCf3BAK.js';
|
|
2
|
+
import { f as LoopTraceEmitter, g as LoopTraceEvent, S as SandboxClient, A as AgentRunSpec } from './types-BEQsBhOE.js';
|
|
3
|
+
import { SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
4
|
+
import { AgentEvalError } from '@tangle-network/agent-eval';
|
|
5
|
+
import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @experimental
|
|
9
|
+
*
|
|
10
|
+
* Persistence port for the MCP delegation queue.
|
|
11
|
+
*
|
|
12
|
+
* `DelegationTaskQueue` keeps its working set in memory (status/history
|
|
13
|
+
* reads stay synchronous) and journals every record mutation through a
|
|
14
|
+
* `DelegationStore`. `DelegationTaskQueue.restore({ store })` is the load
|
|
15
|
+
* path: it reads the full record set once at construction and rehydrates
|
|
16
|
+
* the queue from it. After that the store only sees writes.
|
|
17
|
+
*
|
|
18
|
+
* Records MUST be JSON-safe — `FileDelegationStore` round-trips them
|
|
19
|
+
* through `JSON.stringify`/`JSON.parse`, so a `Date`, `Map`, or function
|
|
20
|
+
* smuggled into `args`/`result` would corrupt the journal.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/** @experimental */
|
|
24
|
+
interface DelegationStore {
|
|
25
|
+
/**
|
|
26
|
+
* Read every persisted record. Called once, by
|
|
27
|
+
* `DelegationTaskQueue.restore`, before any write. A missing backing
|
|
28
|
+
* file is an empty store; an unparseable one throws
|
|
29
|
+
* `DelegationStateCorruptError`.
|
|
30
|
+
*/
|
|
31
|
+
loadAll(): Promise<DelegationRecord[]>;
|
|
32
|
+
/** Insert or replace the record keyed by `record.taskId`. */
|
|
33
|
+
upsert(record: DelegationRecord): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Resolve an idempotency key to the taskId that claimed it, if any.
|
|
36
|
+
* The queue serves submit-time dedupe from its rehydrated in-memory
|
|
37
|
+
* index; this read exists for consumers that share a store across
|
|
38
|
+
* processes without holding the full record set.
|
|
39
|
+
*/
|
|
40
|
+
lookupIdempotencyKey(key: string): Promise<string | undefined>;
|
|
41
|
+
/** Delete the named records — the retention-cap eviction path. */
|
|
42
|
+
remove(taskIds: readonly string[]): Promise<void>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* The persisted delegation state exists but cannot be parsed into
|
|
46
|
+
* records. Fail loud: silently starting empty over a corrupt journal
|
|
47
|
+
* would erase delegation history and re-run idempotent work. Opt into
|
|
48
|
+
* recovery explicitly via `FileDelegationStoreOptions.recoverCorrupt`
|
|
49
|
+
* (the bin maps `AGENT_RUNTIME_DELEGATION_STATE_RECOVER=1` onto it),
|
|
50
|
+
* which archives the corrupt file and starts fresh.
|
|
51
|
+
*
|
|
52
|
+
* @experimental
|
|
53
|
+
*/
|
|
54
|
+
declare class DelegationStateCorruptError extends AgentEvalError {
|
|
55
|
+
constructor(message: string, options?: {
|
|
56
|
+
cause?: unknown;
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* A delegation-store read or write failed (filesystem error, store
|
|
61
|
+
* called before `loadAll`, ...). Once the queue observes one, it stops
|
|
62
|
+
* accepting new submissions — accepting work it cannot journal would
|
|
63
|
+
* silently demote durable mode to in-memory mode.
|
|
64
|
+
*
|
|
65
|
+
* @experimental
|
|
66
|
+
*/
|
|
67
|
+
declare class DelegationPersistenceError extends AgentEvalError {
|
|
68
|
+
constructor(message: string, options?: {
|
|
69
|
+
cause?: unknown;
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
/** @experimental */
|
|
73
|
+
declare class InMemoryDelegationStore implements DelegationStore {
|
|
74
|
+
private readonly records;
|
|
75
|
+
loadAll(): Promise<DelegationRecord[]>;
|
|
76
|
+
upsert(record: DelegationRecord): Promise<void>;
|
|
77
|
+
lookupIdempotencyKey(key: string): Promise<string | undefined>;
|
|
78
|
+
remove(taskIds: readonly string[]): Promise<void>;
|
|
79
|
+
}
|
|
80
|
+
/** @experimental */
|
|
81
|
+
interface FileDelegationStoreOptions {
|
|
82
|
+
/** Absolute path of the JSON state file. Parent directories are created on first write. */
|
|
83
|
+
filePath: string;
|
|
84
|
+
/**
|
|
85
|
+
* When the state file exists but cannot be parsed, archive it to
|
|
86
|
+
* `<filePath>.corrupt-<timestamp>` and start empty instead of
|
|
87
|
+
* throwing `DelegationStateCorruptError`. Default false.
|
|
88
|
+
*/
|
|
89
|
+
recoverCorrupt?: boolean;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* JSON-file persistence for the delegation queue. Each write serializes
|
|
93
|
+
* the full record set and lands it atomically (write to a sibling tmp
|
|
94
|
+
* file, then `rename`), so readers never observe a torn file — a crash
|
|
95
|
+
* mid-write leaves the previous snapshot intact. Writes are serialized
|
|
96
|
+
* internally; concurrent `upsert`/`remove` calls cannot interleave.
|
|
97
|
+
*
|
|
98
|
+
* Built for the MCP server's scale (one stdio process, hundreds of
|
|
99
|
+
* records): full-snapshot writes keep the format trivially inspectable
|
|
100
|
+
* and corruption-detectable without a database dependency.
|
|
101
|
+
*
|
|
102
|
+
* @experimental
|
|
103
|
+
*/
|
|
104
|
+
declare class FileDelegationStore implements DelegationStore {
|
|
105
|
+
private readonly filePath;
|
|
106
|
+
private readonly recoverCorrupt;
|
|
107
|
+
private readonly records;
|
|
108
|
+
private loaded;
|
|
109
|
+
private writeTail;
|
|
110
|
+
private tmpSeq;
|
|
111
|
+
constructor(options: FileDelegationStoreOptions);
|
|
112
|
+
loadAll(): Promise<DelegationRecord[]>;
|
|
113
|
+
upsert(record: DelegationRecord): Promise<void>;
|
|
114
|
+
lookupIdempotencyKey(key: string): Promise<string | undefined>;
|
|
115
|
+
remove(taskIds: readonly string[]): Promise<void>;
|
|
116
|
+
private assertLoaded;
|
|
117
|
+
private enqueueWrite;
|
|
118
|
+
private writeSnapshot;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* @experimental
|
|
123
|
+
*
|
|
124
|
+
* Compact loop-trace tee for the delegation journal.
|
|
125
|
+
*
|
|
126
|
+
* The OTEL exporter ({@link createPropagatingTraceEmitter}) is a no-op
|
|
127
|
+
* without `OTEL_EXPORTER_OTLP_ENDPOINT`, which leaves delegated work streams
|
|
128
|
+
* dark in practice. This module derives the same loop → round → branch span
|
|
129
|
+
* tree (via the shared {@link buildLoopSpanNodes} builder) into a small,
|
|
130
|
+
* JSON-safe shape persisted directly on the `DelegationRecord` — observable
|
|
131
|
+
* through `delegation_status` with no collector infrastructure. Both sinks
|
|
132
|
+
* coexist: the OTEL export path is unchanged.
|
|
133
|
+
*
|
|
134
|
+
* Payload discipline: a record's trace is hard-capped (spans + serialized
|
|
135
|
+
* bytes). Past the cap the OLDEST spans are dropped and the record carries a
|
|
136
|
+
* `traceTruncated: true` marker — truncation is never silent.
|
|
137
|
+
*/
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* One span of a delegation's compact trace. Flat (parent linkage by id), all
|
|
141
|
+
* values JSON-safe scalars — `FileDelegationStore` round-trips records
|
|
142
|
+
* through `JSON.stringify`. `meta` carries the span's attributes (GenAI
|
|
143
|
+
* semconv keys + `tangle.loop.*` extensions) exactly as the OTEL sink emits
|
|
144
|
+
* them, so a consumer can re-export journal traces losslessly.
|
|
145
|
+
*
|
|
146
|
+
* @experimental
|
|
147
|
+
*/
|
|
148
|
+
interface DelegationTraceSpan {
|
|
149
|
+
spanId: string;
|
|
150
|
+
/** Absent on the tree root. */
|
|
151
|
+
parentSpanId?: string;
|
|
152
|
+
/** `'loop'` | `'loop.round'` | `'loop.iteration'` (or a sink-specific name). */
|
|
153
|
+
name: string;
|
|
154
|
+
/** Topology level: loop root, plan round, or iteration branch. */
|
|
155
|
+
kind: 'loop' | 'round' | 'branch';
|
|
156
|
+
startMs: number;
|
|
157
|
+
endMs: number;
|
|
158
|
+
meta?: Record<string, string | number | boolean>;
|
|
159
|
+
}
|
|
160
|
+
/** Default cap on spans retained per delegation record. @experimental */
|
|
161
|
+
declare const DELEGATION_TRACE_MAX_SPANS = 512;
|
|
162
|
+
/** Default cap on the serialized trace payload per record, in bytes. @experimental */
|
|
163
|
+
declare const DELEGATION_TRACE_MAX_BYTES: number;
|
|
164
|
+
/** @experimental */
|
|
165
|
+
interface DelegationTraceCaps {
|
|
166
|
+
/** Default {@link DELEGATION_TRACE_MAX_SPANS}. */
|
|
167
|
+
maxSpans?: number;
|
|
168
|
+
/** Default {@link DELEGATION_TRACE_MAX_BYTES}. Approximate — measured as the
|
|
169
|
+
* sum of per-span `JSON.stringify` lengths. */
|
|
170
|
+
maxBytes?: number;
|
|
171
|
+
}
|
|
172
|
+
/** @experimental */
|
|
173
|
+
interface CappedDelegationTrace {
|
|
174
|
+
trace: DelegationTraceSpan[];
|
|
175
|
+
/** True when oldest spans were dropped to honor the caps. */
|
|
176
|
+
truncated: boolean;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Derive the compact span tree for ONE loop run from its buffered
|
|
180
|
+
* `LoopTraceEvent` stream. Same reconstruction as the OTEL exporter
|
|
181
|
+
* ({@link buildLoopSpanNodes}); tolerates partial streams.
|
|
182
|
+
*
|
|
183
|
+
* @experimental
|
|
184
|
+
*/
|
|
185
|
+
declare function buildDelegationTraceSpans(events: ReadonlyArray<LoopTraceEvent>): DelegationTraceSpan[];
|
|
186
|
+
/**
|
|
187
|
+
* Enforce the trace caps over an ordered (oldest-first) span list. Drops the
|
|
188
|
+
* OLDEST spans first and reports `truncated: true` when anything was dropped;
|
|
189
|
+
* the newest span always survives, so a non-empty input never caps to empty.
|
|
190
|
+
* Dropping a parent may orphan surviving children's `parentSpanId` references
|
|
191
|
+
* — acceptable for the flat journal shape; consumers treat unresolved parents
|
|
192
|
+
* as roots.
|
|
193
|
+
*
|
|
194
|
+
* @experimental
|
|
195
|
+
*/
|
|
196
|
+
declare function capDelegationTrace(spans: ReadonlyArray<DelegationTraceSpan>, caps?: DelegationTraceCaps): CappedDelegationTrace;
|
|
197
|
+
/**
|
|
198
|
+
* Per-delegation trace collector. Buffers `LoopTraceEvent`s per runId
|
|
199
|
+
* (mirroring the OTEL emitter's buffering) and hands the derived compact
|
|
200
|
+
* spans to `onSpans` when a run reaches `loop.ended`. `settle()` drains runs
|
|
201
|
+
* that never ended — a hard-aborted loop still leaves its partial tree in the
|
|
202
|
+
* journal, unlike the OTEL path which drops it.
|
|
203
|
+
*
|
|
204
|
+
* @experimental
|
|
205
|
+
*/
|
|
206
|
+
interface DelegationTraceCollector {
|
|
207
|
+
emitter: LoopTraceEmitter;
|
|
208
|
+
/** Flush buffered events of runs that never reached `loop.ended`. */
|
|
209
|
+
settle(): void;
|
|
210
|
+
}
|
|
211
|
+
/** @experimental */
|
|
212
|
+
declare function createDelegationTraceCollector(onSpans: (spans: DelegationTraceSpan[]) => void): DelegationTraceCollector;
|
|
213
|
+
/**
|
|
214
|
+
* Fan one `LoopTraceEvent` stream into several emitters — e.g. the
|
|
215
|
+
* process-wide OTEL exporter AND the per-delegation journal collector.
|
|
216
|
+
* `undefined` entries are skipped; returns `undefined` when nothing is left
|
|
217
|
+
* so callers keep the kernel's "no emitter, no events" fast path.
|
|
218
|
+
*
|
|
219
|
+
* @experimental
|
|
220
|
+
*/
|
|
221
|
+
declare function composeLoopTraceEmitters(...emitters: ReadonlyArray<LoopTraceEmitter | undefined>): LoopTraceEmitter | undefined;
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
|
|
225
|
+
*
|
|
226
|
+
* Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
|
|
227
|
+
* when no explicit config is given. Keeps the runtime dep-free from
|
|
228
|
+
* @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
|
|
229
|
+
*
|
|
230
|
+
* The exporter accepts both raw OtelSpan objects and LoopTraceEvents
|
|
231
|
+
* (which get converted to OTLP spans automatically).
|
|
232
|
+
*/
|
|
233
|
+
interface OtelExportConfig {
|
|
234
|
+
/** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
|
|
235
|
+
endpoint?: string;
|
|
236
|
+
/** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
|
|
237
|
+
headers?: Record<string, string>;
|
|
238
|
+
/** Batch size before flush. Default 64. */
|
|
239
|
+
batchSize?: number;
|
|
240
|
+
/** Flush interval ms. Default 5000. */
|
|
241
|
+
flushIntervalMs?: number;
|
|
242
|
+
/** Resource attributes stamped on every export. */
|
|
243
|
+
resourceAttributes?: Record<string, string | number | boolean>;
|
|
244
|
+
/** Service name. Default 'agent-runtime'. */
|
|
245
|
+
serviceName?: string;
|
|
246
|
+
}
|
|
247
|
+
interface OtelExporter {
|
|
248
|
+
/** Export a span. */
|
|
249
|
+
exportSpan(span: OtelSpan): void;
|
|
250
|
+
/** Force flush pending spans. */
|
|
251
|
+
flush(): Promise<void>;
|
|
252
|
+
/** Shutdown cleanly. */
|
|
253
|
+
shutdown(): Promise<void>;
|
|
254
|
+
}
|
|
255
|
+
interface OtelSpan {
|
|
256
|
+
traceId: string;
|
|
257
|
+
spanId: string;
|
|
258
|
+
parentSpanId?: string;
|
|
259
|
+
name: string;
|
|
260
|
+
kind?: number;
|
|
261
|
+
startTimeUnixNano: string;
|
|
262
|
+
endTimeUnixNano: string;
|
|
263
|
+
attributes?: OtelAttribute[];
|
|
264
|
+
status?: {
|
|
265
|
+
code: number;
|
|
266
|
+
message?: string;
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
interface OtelAttribute {
|
|
270
|
+
key: string;
|
|
271
|
+
value: {
|
|
272
|
+
stringValue?: string;
|
|
273
|
+
intValue?: string;
|
|
274
|
+
doubleValue?: number;
|
|
275
|
+
boolValue?: boolean;
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Create an OTEL exporter. Returns undefined when no endpoint is configured.
|
|
280
|
+
*/
|
|
281
|
+
declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
|
|
282
|
+
/**
|
|
283
|
+
* Convert a LoopTraceEvent into an OtelSpan for export.
|
|
284
|
+
*/
|
|
285
|
+
declare function loopEventToOtelSpan(event: {
|
|
286
|
+
kind: string;
|
|
287
|
+
runId: string;
|
|
288
|
+
timestamp: number;
|
|
289
|
+
payload: object;
|
|
290
|
+
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
291
|
+
/**
|
|
292
|
+
* Sink-neutral node in a reconstructed loop span tree. The root node's
|
|
293
|
+
* `parentSpanId` is `undefined` — sinks decide how to parent it (the OTEL
|
|
294
|
+
* mapper attaches the inherited delegation span; the delegation journal
|
|
295
|
+
* leaves it as the tree root).
|
|
296
|
+
*/
|
|
297
|
+
interface LoopSpanNode {
|
|
298
|
+
spanId: string;
|
|
299
|
+
parentSpanId?: string;
|
|
300
|
+
/** `'loop'` | `'loop.round'` | `'loop.iteration'`. */
|
|
301
|
+
name: string;
|
|
302
|
+
/** Topology level: loop root, plan round, or iteration branch. */
|
|
303
|
+
kind: 'loop' | 'round' | 'branch';
|
|
304
|
+
startMs: number;
|
|
305
|
+
endMs: number;
|
|
306
|
+
attrs: Record<string, string | number | boolean>;
|
|
307
|
+
/** True when the iteration carried an error — maps to OTEL status code 2. */
|
|
308
|
+
error: boolean;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Build a nested, real-duration OTLP span tree for ONE loop run from its full
|
|
312
|
+
* ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
|
|
313
|
+
* zero-duration span per event), this reconstructs the topology hierarchy a
|
|
314
|
+
* GenAI trace viewer renders natively:
|
|
315
|
+
*
|
|
316
|
+
* loop (invoke_workflow)
|
|
317
|
+
* └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
|
|
318
|
+
* ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
|
|
319
|
+
* └─ …
|
|
320
|
+
*
|
|
321
|
+
* Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
|
|
322
|
+
* a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
|
|
323
|
+
* verdict / placement / cost (not yet standardized). Pure: feed it a buffered
|
|
324
|
+
* per-runId event array (e.g. flushed on `loop.ended`) and export the result.
|
|
325
|
+
*/
|
|
326
|
+
declare function buildLoopOtelSpans(events: ReadonlyArray<{
|
|
327
|
+
kind: string;
|
|
328
|
+
runId: string;
|
|
329
|
+
timestamp: number;
|
|
330
|
+
payload: object;
|
|
331
|
+
}>, traceId: string, rootParentSpanId?: string): OtelSpan[];
|
|
332
|
+
/**
|
|
333
|
+
* Sink-neutral core behind {@link buildLoopOtelSpans}: reconstruct the
|
|
334
|
+
* loop → round → branch span tree from one run's ordered `LoopTraceEvent`
|
|
335
|
+
* stream. Consumed by the OTEL mapper above and by the MCP delegation
|
|
336
|
+
* journal's compact trace tee — one topology reconstruction, two sinks.
|
|
337
|
+
* Tolerates partial streams (a run that never reached `loop.ended` closes
|
|
338
|
+
* at the last observed event's timestamp).
|
|
339
|
+
*/
|
|
340
|
+
declare function buildLoopSpanNodes(events: ReadonlyArray<{
|
|
341
|
+
kind: string;
|
|
342
|
+
runId: string;
|
|
343
|
+
timestamp: number;
|
|
344
|
+
payload: object;
|
|
345
|
+
}>): LoopSpanNode[];
|
|
346
|
+
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
347
|
+
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
348
|
+
interface EvalRunGeneration {
|
|
349
|
+
/** 0-based ordinal of this generation within the run (required by ingest). */
|
|
350
|
+
index: number;
|
|
351
|
+
/** Identity of the proposed surface change (content-addressed hash). */
|
|
352
|
+
surfaceHash: string;
|
|
353
|
+
/** Arbitrary provenance for this generation (rationale, evidence, source). */
|
|
354
|
+
surface?: unknown;
|
|
355
|
+
/** Per-scenario results; empty until the generation is measured. */
|
|
356
|
+
cells?: unknown[];
|
|
357
|
+
/** Mean composite score (0 when unmeasured — pair with labels.measured). */
|
|
358
|
+
compositeMean: number;
|
|
359
|
+
costUsd: number;
|
|
360
|
+
durationMs: number;
|
|
361
|
+
}
|
|
362
|
+
interface EvalRunEvent {
|
|
363
|
+
runId: string;
|
|
364
|
+
runDir: string;
|
|
365
|
+
/** ISO timestamp. */
|
|
366
|
+
timestamp: string;
|
|
367
|
+
status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
|
|
368
|
+
labels?: Record<string, string>;
|
|
369
|
+
baseline?: EvalRunGeneration;
|
|
370
|
+
generations?: EvalRunGeneration[];
|
|
371
|
+
gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
|
|
372
|
+
holdoutLift?: number;
|
|
373
|
+
totalCostUsd: number;
|
|
374
|
+
totalDurationMs: number;
|
|
375
|
+
errorMessage?: string;
|
|
376
|
+
}
|
|
377
|
+
interface EvalRunsExportConfig {
|
|
378
|
+
/** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
|
|
379
|
+
apiKey?: string;
|
|
380
|
+
/** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
|
|
381
|
+
base?: string;
|
|
382
|
+
/** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
|
|
383
|
+
idempotencyKey?: string;
|
|
384
|
+
}
|
|
385
|
+
interface EvalRunsExportResult {
|
|
386
|
+
ok: boolean;
|
|
387
|
+
status: number;
|
|
388
|
+
accepted: number;
|
|
389
|
+
rejected: Array<{
|
|
390
|
+
index: number;
|
|
391
|
+
reason: string;
|
|
392
|
+
}>;
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
|
|
396
|
+
* best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
|
|
397
|
+
* rejected per event) so a consumer's loop can assert its provenance landed.
|
|
398
|
+
* Throws only on a missing key or network failure.
|
|
399
|
+
*/
|
|
400
|
+
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* @experimental
|
|
404
|
+
*
|
|
405
|
+
* Trace context propagation for MCP subprocess.
|
|
406
|
+
*
|
|
407
|
+
* When the MCP server is launched as a child process by a sandbox harness,
|
|
408
|
+
* the parent passes trace context via environment variables:
|
|
409
|
+
*
|
|
410
|
+
* TRACE_ID=<current-run-trace-id>
|
|
411
|
+
* PARENT_SPAN_ID=<span-that-dispatched-the-delegation>
|
|
412
|
+
*
|
|
413
|
+
* The MCP server reads these at startup and uses them as the root of its
|
|
414
|
+
* internal trace tree. All spans emitted by `runLoop` invocations inside
|
|
415
|
+
* the MCP are children of the parent's delegation span.
|
|
416
|
+
*
|
|
417
|
+
* When these env vars are absent, the MCP generates a fresh trace root —
|
|
418
|
+
* the server operates standalone without trace joining.
|
|
419
|
+
*/
|
|
420
|
+
|
|
421
|
+
interface TraceContext {
|
|
422
|
+
/** Trace id inherited from the parent process, or a fresh one. */
|
|
423
|
+
traceId: string;
|
|
424
|
+
/** Parent span id from the delegation that launched this MCP server. */
|
|
425
|
+
parentSpanId?: string;
|
|
426
|
+
}
|
|
427
|
+
/**
|
|
428
|
+
* Read trace context from the process environment.
|
|
429
|
+
* Returns a context with inherited ids or a freshly generated root.
|
|
430
|
+
*/
|
|
431
|
+
declare function readTraceContextFromEnv(): TraceContext;
|
|
432
|
+
/**
|
|
433
|
+
* Create a LoopTraceEmitter that:
|
|
434
|
+
* 1. Parents all spans under the inherited PARENT_SPAN_ID.
|
|
435
|
+
* 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.
|
|
436
|
+
*
|
|
437
|
+
* Returns both the emitter and the optional exporter handle for shutdown.
|
|
438
|
+
*/
|
|
439
|
+
declare function createPropagatingTraceEmitter(ctx: TraceContext): {
|
|
440
|
+
emitter: LoopTraceEmitter;
|
|
441
|
+
exporter: OtelExporter | undefined;
|
|
442
|
+
context: TraceContext;
|
|
443
|
+
};
|
|
444
|
+
/**
|
|
445
|
+
* Build env vars to pass to a child MCP subprocess so it inherits the
|
|
446
|
+
* current trace context.
|
|
447
|
+
*/
|
|
448
|
+
declare function traceContextToEnv(ctx: TraceContext): Record<string, string>;
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* @experimental
|
|
452
|
+
*
|
|
453
|
+
* MCP delegation tool surface — the typed inputs/outputs the product agent
|
|
454
|
+
* sees over the wire. These types are the contract; the JSON schemas under
|
|
455
|
+
* `tools/*` mirror them for the MCP `tools/list` advertisement.
|
|
456
|
+
*
|
|
457
|
+
* Async semantics: `delegate_code` + `delegate_research` return a `taskId`
|
|
458
|
+
* immediately. The product agent polls `delegation_status` until the task
|
|
459
|
+
* transitions to `completed` | `failed` | `cancelled`. `delegate_feedback`
|
|
460
|
+
* + `delegation_history` are synchronous reads / writes against the local
|
|
461
|
+
* task queue + feedback store.
|
|
462
|
+
*/
|
|
463
|
+
|
|
464
|
+
/** @experimental */
|
|
465
|
+
type DelegationProfile = 'coder' | 'researcher' | 'ui-auditor';
|
|
466
|
+
/** @experimental */
|
|
467
|
+
type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
468
|
+
/**
|
|
469
|
+
* Minimal `CoderTask` overrides exposed over the MCP wire. The full
|
|
470
|
+
* `CoderTask` carries fields the kernel synthesizes from `goal` +
|
|
471
|
+
* `repoRoot` — the agent only edits the few that materially gate
|
|
472
|
+
* validator behavior.
|
|
473
|
+
*
|
|
474
|
+
* @experimental
|
|
475
|
+
*/
|
|
476
|
+
interface DelegateCodeConfig {
|
|
477
|
+
testCmd?: string;
|
|
478
|
+
typecheckCmd?: string;
|
|
479
|
+
forbiddenPaths?: string[];
|
|
480
|
+
maxDiffLines?: number;
|
|
481
|
+
}
|
|
482
|
+
/** @experimental */
|
|
483
|
+
interface DelegateCodeArgs {
|
|
484
|
+
/** Natural-language description of what the coder must accomplish. */
|
|
485
|
+
goal: string;
|
|
486
|
+
/** Absolute path inside the sandbox where the repo lives. */
|
|
487
|
+
repoRoot: string;
|
|
488
|
+
/** Optional free-form context the agent surfaces in the prompt prelude. */
|
|
489
|
+
contextHint?: string;
|
|
490
|
+
/**
|
|
491
|
+
* When > 1, dispatches `multiHarnessCoderFanout` across N harnesses
|
|
492
|
+
* (claude-code, codex, opencode-glm) and picks the highest-scoring
|
|
493
|
+
* passing patch. Default 1.
|
|
494
|
+
*/
|
|
495
|
+
variants?: number;
|
|
496
|
+
/** Validator + prompt overrides the agent knows for this repo. */
|
|
497
|
+
config?: DelegateCodeConfig;
|
|
498
|
+
/** Multi-tenant scope (customer-id, workspace-id). */
|
|
499
|
+
namespace?: string;
|
|
500
|
+
}
|
|
501
|
+
/** @experimental */
|
|
502
|
+
interface DelegateCodeResult {
|
|
503
|
+
taskId: string;
|
|
504
|
+
/** Best-effort hint — coder loops can take minutes-to-hours. */
|
|
505
|
+
estimatedDurationMs?: number;
|
|
506
|
+
}
|
|
507
|
+
/** @experimental */
|
|
508
|
+
type ResearchSource = 'web' | 'corpus' | 'twitter' | 'github' | 'docs';
|
|
509
|
+
/** @experimental */
|
|
510
|
+
interface DelegateResearchConfig {
|
|
511
|
+
recencyWindow?: {
|
|
512
|
+
since?: string;
|
|
513
|
+
until?: string;
|
|
514
|
+
};
|
|
515
|
+
maxItems?: number;
|
|
516
|
+
minConfidence?: number;
|
|
517
|
+
}
|
|
518
|
+
/** @experimental */
|
|
519
|
+
interface DelegateResearchArgs {
|
|
520
|
+
question: string;
|
|
521
|
+
namespace: string;
|
|
522
|
+
scope?: string;
|
|
523
|
+
sources?: ResearchSource[];
|
|
524
|
+
variants?: number;
|
|
525
|
+
config?: DelegateResearchConfig;
|
|
526
|
+
}
|
|
527
|
+
/** @experimental */
|
|
528
|
+
interface DelegateResearchResult {
|
|
529
|
+
taskId: string;
|
|
530
|
+
estimatedDurationMs?: number;
|
|
531
|
+
}
|
|
532
|
+
/** @experimental */
|
|
533
|
+
interface FeedbackRefersTo {
|
|
534
|
+
kind: 'delegation' | 'artifact' | 'outcome';
|
|
535
|
+
/** For `'delegation'`, this is the taskId. */
|
|
536
|
+
ref: string;
|
|
537
|
+
}
|
|
538
|
+
/** @experimental */
|
|
539
|
+
interface FeedbackRating {
|
|
540
|
+
/** [0, 1]. */
|
|
541
|
+
score: number;
|
|
542
|
+
label?: 'good' | 'bad' | 'neutral' | 'mixed';
|
|
543
|
+
notes: string;
|
|
544
|
+
}
|
|
545
|
+
/** @experimental */
|
|
546
|
+
interface DelegateFeedbackArgs {
|
|
547
|
+
refersTo: FeedbackRefersTo;
|
|
548
|
+
rating: FeedbackRating;
|
|
549
|
+
by: 'agent' | 'user' | 'downstream-judge';
|
|
550
|
+
/** ISO timestamp; defaults to server clock when omitted. */
|
|
551
|
+
capturedAt?: string;
|
|
552
|
+
namespace?: string;
|
|
553
|
+
}
|
|
554
|
+
/** @experimental */
|
|
555
|
+
interface DelegateFeedbackResult {
|
|
556
|
+
recorded: true;
|
|
557
|
+
id: string;
|
|
558
|
+
}
|
|
559
|
+
/** @experimental */
|
|
560
|
+
interface DelegationStatusArgs {
|
|
561
|
+
taskId: string;
|
|
562
|
+
/**
|
|
563
|
+
* Return the delegation's compact loop-trace span tree alongside the
|
|
564
|
+
* status. Default false — status polls stay light; opt in when you need
|
|
565
|
+
* the topology (which iterations ran, where they were placed, what each
|
|
566
|
+
* cost) rather than just the state machine.
|
|
567
|
+
*/
|
|
568
|
+
includeTrace?: boolean;
|
|
569
|
+
}
|
|
570
|
+
/** @experimental */
|
|
571
|
+
interface DelegationProgress {
|
|
572
|
+
iteration: number;
|
|
573
|
+
phase: string;
|
|
574
|
+
}
|
|
575
|
+
/** @experimental */
|
|
576
|
+
interface DelegationError {
|
|
577
|
+
message: string;
|
|
578
|
+
kind: string;
|
|
579
|
+
}
|
|
580
|
+
/**
|
|
581
|
+
* Polymorphic `result` field: `CoderOutput` when the underlying profile
|
|
582
|
+
* is `'coder'`, a structurally-typed research output when `'researcher'`.
|
|
583
|
+
* The MCP wire carries it as JSON either way.
|
|
584
|
+
*
|
|
585
|
+
* @experimental
|
|
586
|
+
*/
|
|
587
|
+
type DelegationResultPayload = {
|
|
588
|
+
profile: 'coder';
|
|
589
|
+
output: CoderOutput;
|
|
590
|
+
} | {
|
|
591
|
+
profile: 'researcher';
|
|
592
|
+
output: ResearchOutputShape;
|
|
593
|
+
} | {
|
|
594
|
+
profile: 'ui-auditor';
|
|
595
|
+
output: UiAuditorDelegationOutput;
|
|
596
|
+
};
|
|
597
|
+
/**
|
|
598
|
+
* Wire-shape of a completed UI-audit delegation. The `findings` array
|
|
599
|
+
* contains every finding persisted to the workspace during the run,
|
|
600
|
+
* already enriched with `id` and `createdAt` by the writer. `workspaceDir`
|
|
601
|
+
* is the absolute path to the workspace; `indexFile` is the workspace-
|
|
602
|
+
* relative path to the regenerated index.md.
|
|
603
|
+
*
|
|
604
|
+
* @experimental
|
|
605
|
+
*/
|
|
606
|
+
interface UiAuditorDelegationOutput {
|
|
607
|
+
workspaceDir: string;
|
|
608
|
+
indexFile: string;
|
|
609
|
+
findings: UiFinding[];
|
|
610
|
+
/** Total iterations the loop ran for this delegation. */
|
|
611
|
+
iterations: number;
|
|
612
|
+
}
|
|
613
|
+
/** @experimental */
|
|
614
|
+
type UiAuditLensFilter = readonly UiLens[];
|
|
615
|
+
/** Optional per-route capture spec the agent surfaces over the wire. */
|
|
616
|
+
interface DelegateUiAuditRoute {
|
|
617
|
+
/** Stable route name (used in screenshot filenames + finding metadata). */
|
|
618
|
+
name: string;
|
|
619
|
+
/** Fully-qualified URL. */
|
|
620
|
+
url: string;
|
|
621
|
+
/** Viewports to capture at. Defaults to `[{ width: 1280, height: 800 }]`. */
|
|
622
|
+
viewports?: readonly {
|
|
623
|
+
width: number;
|
|
624
|
+
height: number;
|
|
625
|
+
}[];
|
|
626
|
+
/** Default false. Full-page captures for the broad lenses. */
|
|
627
|
+
fullPage?: boolean;
|
|
628
|
+
/** Selector to wait for before capture. */
|
|
629
|
+
waitFor?: string;
|
|
630
|
+
}
|
|
631
|
+
/** @experimental */
|
|
632
|
+
interface DelegateUiAuditConfig {
|
|
633
|
+
/**
|
|
634
|
+
* Lenses to iterate. Default: every lens except `'other'`. Order is
|
|
635
|
+
* preserved — the driver iterates lens-by-lens.
|
|
636
|
+
*/
|
|
637
|
+
lenses?: UiAuditLensFilter;
|
|
638
|
+
/** Maximum total iterations across all (lens × route) pairs. Default 33 (11 lenses × 3 routes). */
|
|
639
|
+
maxIterations?: number;
|
|
640
|
+
/** Maximum concurrent iterations within a single plan() round. Default 2. */
|
|
641
|
+
maxConcurrency?: number;
|
|
642
|
+
/** Free-form product context surfaced to the judge. */
|
|
643
|
+
productContext?: string;
|
|
644
|
+
}
|
|
645
|
+
/** @experimental */
|
|
646
|
+
interface DelegateUiAuditArgs {
|
|
647
|
+
/** Workspace root for the audit (absolute path). */
|
|
648
|
+
workspaceDir: string;
|
|
649
|
+
/** Routes to audit. Must be non-empty. */
|
|
650
|
+
routes: readonly DelegateUiAuditRoute[];
|
|
651
|
+
/** Multi-tenant scope. */
|
|
652
|
+
namespace?: string;
|
|
653
|
+
config?: DelegateUiAuditConfig;
|
|
654
|
+
}
|
|
655
|
+
/** @experimental */
|
|
656
|
+
interface DelegateUiAuditResult {
|
|
657
|
+
taskId: string;
|
|
658
|
+
estimatedDurationMs?: number;
|
|
659
|
+
}
|
|
660
|
+
/**
|
|
661
|
+
* Loose shape of a research output over the wire — the substrate cannot
|
|
662
|
+
* import the `ResearchOutput` type from agent-knowledge without inducing
|
|
663
|
+
* a dependency cycle, so the MCP layer treats it structurally.
|
|
664
|
+
*
|
|
665
|
+
* @experimental
|
|
666
|
+
*/
|
|
667
|
+
interface ResearchOutputShape {
|
|
668
|
+
items: unknown[];
|
|
669
|
+
citations: unknown[];
|
|
670
|
+
proposedWrites: unknown[];
|
|
671
|
+
gaps?: string[];
|
|
672
|
+
notes?: string;
|
|
673
|
+
[key: string]: unknown;
|
|
674
|
+
}
|
|
675
|
+
/** @experimental */
|
|
676
|
+
interface DelegationStatusResult {
|
|
677
|
+
taskId: string;
|
|
678
|
+
profile: DelegationProfile;
|
|
679
|
+
status: DelegationStatus;
|
|
680
|
+
progress?: DelegationProgress;
|
|
681
|
+
result?: DelegationResultPayload;
|
|
682
|
+
error?: DelegationError;
|
|
683
|
+
costUsd?: number;
|
|
684
|
+
startedAt: string;
|
|
685
|
+
completedAt?: string;
|
|
686
|
+
/** Compact loop-trace span tree; present only when `includeTrace: true` was passed and spans were recorded. */
|
|
687
|
+
trace?: DelegationTraceSpan[];
|
|
688
|
+
/** Present when oldest trace spans were dropped to honor the trace caps. */
|
|
689
|
+
traceTruncated?: true;
|
|
690
|
+
/** Inherited trace identity recorded at submit — join key into the caller's trace. */
|
|
691
|
+
traceId?: string;
|
|
692
|
+
/** Caller span that dispatched the delegation, when one was inherited. */
|
|
693
|
+
parentSpanId?: string;
|
|
694
|
+
}
|
|
695
|
+
/** @experimental */
|
|
696
|
+
interface DelegationHistoryArgs {
|
|
697
|
+
namespace?: string;
|
|
698
|
+
profile?: DelegationProfile;
|
|
699
|
+
/** ISO date — only delegations started at-or-after `since` are returned. */
|
|
700
|
+
since?: string;
|
|
701
|
+
/** Default 50. Hard cap 500. */
|
|
702
|
+
limit?: number;
|
|
703
|
+
}
|
|
704
|
+
/** @experimental */
|
|
705
|
+
interface DelegationFeedbackSnapshot {
|
|
706
|
+
id: string;
|
|
707
|
+
score: number;
|
|
708
|
+
label?: FeedbackRating['label'];
|
|
709
|
+
by: DelegateFeedbackArgs['by'];
|
|
710
|
+
notes: string;
|
|
711
|
+
capturedAt: string;
|
|
712
|
+
}
|
|
713
|
+
/** @experimental */
|
|
714
|
+
interface DelegationHistoryEntry {
|
|
715
|
+
taskId: string;
|
|
716
|
+
profile: DelegationProfile;
|
|
717
|
+
namespace?: string;
|
|
718
|
+
args: DelegateCodeArgs | DelegateResearchArgs | DelegateUiAuditArgs;
|
|
719
|
+
status: DelegationStatus;
|
|
720
|
+
feedback?: DelegationFeedbackSnapshot[];
|
|
721
|
+
costUsd?: number;
|
|
722
|
+
startedAt: string;
|
|
723
|
+
completedAt?: string;
|
|
724
|
+
/**
|
|
725
|
+
* True when the record carries a journaled loop trace. History stays
|
|
726
|
+
* light by design — fetch the spans via
|
|
727
|
+
* `delegation_status { taskId, includeTrace: true }`.
|
|
728
|
+
*/
|
|
729
|
+
hasTrace: boolean;
|
|
730
|
+
/** Inherited trace identity recorded at submit — join key into the caller's trace. */
|
|
731
|
+
traceId?: string;
|
|
732
|
+
}
|
|
733
|
+
/** @experimental */
|
|
734
|
+
interface DelegationHistoryResult {
|
|
735
|
+
delegations: DelegationHistoryEntry[];
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* @experimental
|
|
740
|
+
*
|
|
741
|
+
* State machine for async MCP delegations:
|
|
742
|
+
*
|
|
743
|
+
* pending → running → completed | failed
|
|
744
|
+
* ↘ cancelled (from any non-terminal state via cancel())
|
|
745
|
+
*
|
|
746
|
+
* Each `submit` returns a `taskId` immediately and kicks the work off in the
|
|
747
|
+
* background. The work function receives an `AbortSignal` the queue fires
|
|
748
|
+
* when `cancel(taskId)` is called. The queue does NOT supervise runtime
|
|
749
|
+
* timeouts — the underlying `runLoop` driver / sandbox imposes those.
|
|
750
|
+
*
|
|
751
|
+
* Idempotency: callers may supply an `idempotencyKey` (hash of the input).
|
|
752
|
+
* A duplicate `submit` with a known key returns the existing task instead of
|
|
753
|
+
* starting a new one. Mutated input → different key → different task.
|
|
754
|
+
*
|
|
755
|
+
* Durability: the working set lives in memory (reads stay synchronous) and
|
|
756
|
+
* every record mutation is journaled through a `DelegationStore`. The default
|
|
757
|
+
* `InMemoryDelegationStore` keeps today's semantics — a process restart drops
|
|
758
|
+
* all state. Construct via `DelegationTaskQueue.restore({ store })` with a
|
|
759
|
+
* `FileDelegationStore` to reload prior records on startup: terminal records
|
|
760
|
+
* stay queryable, in-flight records either re-attach through the
|
|
761
|
+
* `resumeDelegate` seam (when they carry a `detachedSessionRef`) or fail
|
|
762
|
+
* loud with a driver-restart error so `delegation_status` tells the truth.
|
|
763
|
+
*/
|
|
764
|
+
|
|
765
|
+
type AnyDelegateArgs = DelegateCodeArgs | DelegateResearchArgs | DelegateUiAuditArgs;
|
|
766
|
+
/**
|
|
767
|
+
* Must be JSON-safe end to end (`args`, `result`, `error`, `feedback`) —
|
|
768
|
+
* persistent stores round-trip records through `JSON.stringify`.
|
|
769
|
+
*
|
|
770
|
+
* @experimental
|
|
771
|
+
*/
|
|
772
|
+
interface DelegationRecord {
|
|
773
|
+
taskId: string;
|
|
774
|
+
profile: DelegationProfile;
|
|
775
|
+
namespace?: string;
|
|
776
|
+
args: AnyDelegateArgs;
|
|
777
|
+
status: DelegationStatus;
|
|
778
|
+
progress?: DelegationProgress;
|
|
779
|
+
result?: DelegationResultPayload;
|
|
780
|
+
error?: DelegationError;
|
|
781
|
+
costUsd?: number;
|
|
782
|
+
startedAt: string;
|
|
783
|
+
completedAt?: string;
|
|
784
|
+
/** Sha-prefix hash of the canonical input — used for idempotency lookup. */
|
|
785
|
+
idempotencyKey?: string;
|
|
786
|
+
/**
|
|
787
|
+
* Caller-generated deterministic id of a detached run (e.g. the sandbox
|
|
788
|
+
* session id a single-tick driver resumes by). Presence is what makes a
|
|
789
|
+
* restored in-flight record resumable via `resumeDelegate`; without it a
|
|
790
|
+
* restart settles the record as failed.
|
|
791
|
+
*/
|
|
792
|
+
detachedSessionRef?: string;
|
|
793
|
+
/** Feedback events keyed by this delegation's taskId. */
|
|
794
|
+
feedback: DelegationFeedbackSnapshot[];
|
|
795
|
+
/**
|
|
796
|
+
* Compact loop-trace span tree teed from the delegation's run, oldest
|
|
797
|
+
* spans first. Appended when a delegated loop reaches `loop.ended` and
|
|
798
|
+
* settled (partial buffers included) at the terminal transition. Capped
|
|
799
|
+
* via `capDelegationTrace` — see `traceTruncated`.
|
|
800
|
+
*/
|
|
801
|
+
trace?: DelegationTraceSpan[];
|
|
802
|
+
/** Present when oldest trace spans were dropped to honor the trace caps. */
|
|
803
|
+
traceTruncated?: true;
|
|
804
|
+
/**
|
|
805
|
+
* Inherited trace identity (the queue's `traceContext` at submit time —
|
|
806
|
+
* typically `readTraceContextFromEnv()`), distinct from the span payload:
|
|
807
|
+
* a journal consumer joins records into the parent trace by these ids
|
|
808
|
+
* without parsing spans. Restored records keep their persisted identity.
|
|
809
|
+
*/
|
|
810
|
+
traceId?: string;
|
|
811
|
+
/** Caller span that dispatched the delegation, when one was inherited. */
|
|
812
|
+
parentSpanId?: string;
|
|
813
|
+
}
|
|
814
|
+
/** @experimental */
|
|
815
|
+
interface SubmitInput<Args extends AnyDelegateArgs> {
|
|
816
|
+
profile: DelegationProfile;
|
|
817
|
+
args: Args;
|
|
818
|
+
namespace?: string;
|
|
819
|
+
idempotencyKey?: string;
|
|
820
|
+
/**
|
|
821
|
+
* Records the detached-run resume key on the new record. The submitted
|
|
822
|
+
* `run` function still executes in-process exactly as without it — the
|
|
823
|
+
* ref only matters after a restart, when `DelegationTaskQueue.restore`
|
|
824
|
+
* hands it to the `resumeDelegate` seam instead of failing the record.
|
|
825
|
+
*/
|
|
826
|
+
detachedSessionRef?: string;
|
|
827
|
+
/**
|
|
828
|
+
* Runs the underlying delegation. The queue passes a fresh `AbortSignal`
|
|
829
|
+
* and a `report` channel for incremental progress updates. The function
|
|
830
|
+
* MUST resolve with the typed `DelegationResultPayload['output']`; the
|
|
831
|
+
* queue wraps it with the profile tag.
|
|
832
|
+
*/
|
|
833
|
+
run: (ctx: DelegationRunContext) => Promise<DelegationResultPayload['output']>;
|
|
834
|
+
}
|
|
835
|
+
/** @experimental Context handed to a `SubmitInput.run` function. */
|
|
836
|
+
interface DelegationRunContext {
|
|
837
|
+
signal: AbortSignal;
|
|
838
|
+
report(progress: DelegationProgress): void;
|
|
839
|
+
/** The `detachedSessionRef` recorded at submit, when one was supplied. */
|
|
840
|
+
detachedSessionRef?: string;
|
|
841
|
+
/**
|
|
842
|
+
* Replace the record's detached-run resume key — the detached dispatch path
|
|
843
|
+
* calls this once the sandbox id is known so the persisted ref names a
|
|
844
|
+
* resolvable box. Ignored after the record settles (a cancel racing the
|
|
845
|
+
* rebind is legitimate; the ref no longer matters then). Throws on an empty
|
|
846
|
+
* ref — erasing the resume key would silently make the record unresumable.
|
|
847
|
+
*/
|
|
848
|
+
updateDetachedSessionRef(ref: string): void;
|
|
849
|
+
/**
|
|
850
|
+
* Per-delegation loop-trace sink, always provided by the queue. Events
|
|
851
|
+
* emitted here are journaled onto the record as a compact span tree
|
|
852
|
+
* (`record.trace`) when each loop run ends and at the delegation's
|
|
853
|
+
* terminal transition. Delegates forward it into their `runLoop` ctx,
|
|
854
|
+
* composed with any process-wide OTEL emitter
|
|
855
|
+
* (`composeLoopTraceEmitters`). Optional in the type so consumer-built
|
|
856
|
+
* contexts stay source-compatible.
|
|
857
|
+
*/
|
|
858
|
+
traceEmitter?: LoopTraceEmitter;
|
|
859
|
+
}
|
|
860
|
+
/** @experimental */
|
|
861
|
+
interface SubmitOutput {
|
|
862
|
+
taskId: string;
|
|
863
|
+
/** True when a prior matching `idempotencyKey` returned an existing record. */
|
|
864
|
+
reused: boolean;
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* One observation of a detached run, mapped 1:1 from a single-tick driver
|
|
868
|
+
* (e.g. the sandbox SDK's `driveTurn`, which reports
|
|
869
|
+
* completed | running | failed per pass). `running` schedules another tick
|
|
870
|
+
* after `intervalMs`; `completed` / `failed` settle the record.
|
|
871
|
+
*
|
|
872
|
+
* @experimental
|
|
873
|
+
*/
|
|
874
|
+
type DelegationResumeTick = {
|
|
875
|
+
state: 'running';
|
|
876
|
+
} | {
|
|
877
|
+
state: 'completed';
|
|
878
|
+
output: DelegationResultPayload['output'];
|
|
879
|
+
costUsd?: number;
|
|
880
|
+
} | {
|
|
881
|
+
state: 'failed';
|
|
882
|
+
error: DelegationError;
|
|
883
|
+
};
|
|
884
|
+
/** @experimental */
|
|
885
|
+
interface DelegationResumeContext {
|
|
886
|
+
/** Fired by `cancel(taskId)`; the driver should stop the remote run when it can. */
|
|
887
|
+
signal: AbortSignal;
|
|
888
|
+
report(progress: DelegationProgress): void;
|
|
889
|
+
}
|
|
890
|
+
/**
|
|
891
|
+
* Re-attaches restored in-flight records to their detached runs. The queue
|
|
892
|
+
* calls `tick` repeatedly — it never awaits a whole run — so the driver can
|
|
893
|
+
* be a thin wrapper over a one-pass primitive: resolve the run named by
|
|
894
|
+
* `detachedSessionRef`, advance/poll it once, report where it stands. A
|
|
895
|
+
* thrown error settles the record as failed; `failed` ticks are treated as
|
|
896
|
+
* terminal and are not retried.
|
|
897
|
+
*
|
|
898
|
+
* @experimental
|
|
899
|
+
*/
|
|
900
|
+
interface DelegationResumeDriver {
|
|
901
|
+
tick(task: {
|
|
902
|
+
record: DelegationRecord;
|
|
903
|
+
detachedSessionRef: string;
|
|
904
|
+
}, ctx: DelegationResumeContext): Promise<DelegationResumeTick>;
|
|
905
|
+
/** Delay between `running` ticks, in milliseconds. Default 5000. */
|
|
906
|
+
intervalMs?: number;
|
|
907
|
+
}
|
|
908
|
+
/** @experimental */
|
|
909
|
+
interface DelegationTaskQueueOptions {
|
|
910
|
+
/** ID generator override; default `randomTaskId`. */
|
|
911
|
+
generateId?: () => string;
|
|
912
|
+
/** Clock override; default `() => new Date().toISOString()`. */
|
|
913
|
+
now?: () => string;
|
|
914
|
+
/**
|
|
915
|
+
* Journal for record mutations and the `restore()` load source. Default
|
|
916
|
+
* `InMemoryDelegationStore` — observably identical to an unjournaled
|
|
917
|
+
* queue. Pass a `FileDelegationStore` through
|
|
918
|
+
* `DelegationTaskQueue.restore` for state that survives a restart;
|
|
919
|
+
* constructing with `new` never loads prior state.
|
|
920
|
+
*/
|
|
921
|
+
store?: DelegationStore;
|
|
922
|
+
/** Resume seam for restored in-flight records that carry a `detachedSessionRef`. */
|
|
923
|
+
resumeDelegate?: DelegationResumeDriver;
|
|
924
|
+
/**
|
|
925
|
+
* Maximum number of terminal (completed | failed | cancelled) records
|
|
926
|
+
* retained; the oldest (by `completedAt`) are evicted from memory and
|
|
927
|
+
* store once the cap is exceeded. Default unbounded.
|
|
928
|
+
*/
|
|
929
|
+
maxTerminalRecords?: number;
|
|
930
|
+
/**
|
|
931
|
+
* Observes the first store failure. After it fires, the queue refuses
|
|
932
|
+
* new submissions and `flush()` rejects with the same error. Default:
|
|
933
|
+
* rethrow on a microtask — an unhandled crash — because silently
|
|
934
|
+
* degrading durable mode to memory-only would lie to the caller.
|
|
935
|
+
*/
|
|
936
|
+
onPersistError?: (error: DelegationPersistenceError) => void;
|
|
937
|
+
/**
|
|
938
|
+
* Inherited trace identity stamped on every submitted record
|
|
939
|
+
* (`traceId` / `parentSpanId`). The bin passes
|
|
940
|
+
* `readTraceContextFromEnv()` so journal consumers can join delegation
|
|
941
|
+
* records into the caller's trace. Restored records keep the identity
|
|
942
|
+
* they were persisted with.
|
|
943
|
+
*/
|
|
944
|
+
traceContext?: TraceContext;
|
|
945
|
+
}
|
|
946
|
+
/** @experimental */
|
|
947
|
+
declare class DelegationTaskQueue {
|
|
948
|
+
private readonly records;
|
|
949
|
+
private readonly controllers;
|
|
950
|
+
private readonly byIdempotencyKey;
|
|
951
|
+
private readonly generateId;
|
|
952
|
+
private readonly now;
|
|
953
|
+
private readonly store;
|
|
954
|
+
private readonly resumeDelegate?;
|
|
955
|
+
private readonly maxTerminalRecords;
|
|
956
|
+
private readonly onPersistError;
|
|
957
|
+
private readonly traceContext;
|
|
958
|
+
private persistTail;
|
|
959
|
+
private persistFailure;
|
|
960
|
+
constructor(options?: DelegationTaskQueueOptions);
|
|
961
|
+
/**
|
|
962
|
+
* Construct a queue from previously-persisted state. Loads every record
|
|
963
|
+
* from `options.store`, rebuilds the idempotency index (so a re-submitted
|
|
964
|
+
* identical task returns the prior taskId and its terminal state), then:
|
|
965
|
+
*
|
|
966
|
+
* - terminal records stay queryable via `status()` / `history()`
|
|
967
|
+
* - in-flight records with a `detachedSessionRef` re-attach through
|
|
968
|
+
* `options.resumeDelegate` and report `running`
|
|
969
|
+
* - other in-flight records settle as failed — their driver died with
|
|
970
|
+
* the previous process and the result is unrecoverable
|
|
971
|
+
*
|
|
972
|
+
* The retention cap applies to the loaded set as well.
|
|
973
|
+
*/
|
|
974
|
+
static restore(options?: DelegationTaskQueueOptions): Promise<DelegationTaskQueue>;
|
|
975
|
+
/**
|
|
976
|
+
* Kick off a delegation in the background. Returns immediately. The
|
|
977
|
+
* `taskId` is queryable via `status` once this method returns. Throws
|
|
978
|
+
* the recorded `DelegationPersistenceError` once the store has failed —
|
|
979
|
+
* the queue does not accept work it cannot journal.
|
|
980
|
+
*/
|
|
981
|
+
submit<Args extends AnyDelegateArgs>(input: SubmitInput<Args>): SubmitOutput;
|
|
982
|
+
/**
|
|
983
|
+
* Snapshot the current state of a delegation. Returns `undefined` for
|
|
984
|
+
* unknown ids so callers can distinguish missing from terminal.
|
|
985
|
+
* `includeTrace` attaches the journaled loop-trace span tree — off by
|
|
986
|
+
* default so status polls stay light.
|
|
987
|
+
*/
|
|
988
|
+
status(taskId: string, opts?: {
|
|
989
|
+
includeTrace?: boolean;
|
|
990
|
+
}): DelegationStatusResult | undefined;
|
|
991
|
+
/**
|
|
992
|
+
* Abort an in-flight delegation. Returns `false` if the task is unknown
|
|
993
|
+
* or already terminal. The underlying `run` function MUST honor the
|
|
994
|
+
* abort signal for the cancel to take effect; the queue marks the
|
|
995
|
+
* record `cancelled` regardless so a misbehaving runner cannot pin the
|
|
996
|
+
* UI on `running` forever.
|
|
997
|
+
*/
|
|
998
|
+
cancel(taskId: string): boolean;
|
|
999
|
+
/**
|
|
1000
|
+
* Append a feedback event to the matching delegation. Returns `false`
|
|
1001
|
+
* when `ref` does not name a known taskId — the caller should still
|
|
1002
|
+
* record the feedback through a different surface (artifact/outcome
|
|
1003
|
+
* kinds are not queue-bound).
|
|
1004
|
+
*/
|
|
1005
|
+
attachFeedback(taskId: string, snapshot: DelegationFeedbackSnapshot): boolean;
|
|
1006
|
+
/**
|
|
1007
|
+
* Query the recorded delegations. Returns entries newest-first (by
|
|
1008
|
+
* `startedAt`), truncated to `limit`.
|
|
1009
|
+
*/
|
|
1010
|
+
history(args?: DelegationHistoryArgs): DelegationHistoryEntry[];
|
|
1011
|
+
/**
|
|
1012
|
+
* Await every journal write issued so far. Rejects with the recorded
|
|
1013
|
+
* `DelegationPersistenceError` when any of them failed. Call before
|
|
1014
|
+
* handing the store's backing file to another process.
|
|
1015
|
+
*/
|
|
1016
|
+
flush(): Promise<void>;
|
|
1017
|
+
/** Test-only — number of in-flight (non-terminal) records. */
|
|
1018
|
+
inflightCount(): number;
|
|
1019
|
+
private execute;
|
|
1020
|
+
private appendTrace;
|
|
1021
|
+
private rehydrate;
|
|
1022
|
+
private startResume;
|
|
1023
|
+
private driveResume;
|
|
1024
|
+
/**
|
|
1025
|
+
* Journal the resumed segment of a detached run as one compact span. The
|
|
1026
|
+
* resume driver re-attaches after a process restart, so the original
|
|
1027
|
+
* process's loop events are gone — this span records the post-restart
|
|
1028
|
+
* observation window (re-attach → terminal tick) under the
|
|
1029
|
+
* `'detached-resume'` driver tag, keeping restored delegations observable
|
|
1030
|
+
* in the journal alongside trace-carrying live runs.
|
|
1031
|
+
*/
|
|
1032
|
+
private appendResumeSpan;
|
|
1033
|
+
private persist;
|
|
1034
|
+
private persistRemoval;
|
|
1035
|
+
private failPersistence;
|
|
1036
|
+
private enforceRetention;
|
|
1037
|
+
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Best-effort stable hash for use as `idempotencyKey`. Not cryptographic;
|
|
1040
|
+
* collisions only affect dedupe, never correctness.
|
|
1041
|
+
*
|
|
1042
|
+
* @experimental
|
|
1043
|
+
*/
|
|
1044
|
+
declare function hashIdempotencyInput(value: unknown): string;
|
|
1045
|
+
|
|
1046
|
+
/**
|
|
1047
|
+
* @experimental
|
|
1048
|
+
*
|
|
1049
|
+
* Detached delegation turns over the sandbox SDK's `driveTurn` primitive.
|
|
1050
|
+
*
|
|
1051
|
+
* Two halves of one story:
|
|
1052
|
+
*
|
|
1053
|
+
* - {@link runDetachedTurn} — the dispatch side. A single-session delegate
|
|
1054
|
+
* (single-variant coder / researcher) acquires a box, binds the sandbox id
|
|
1055
|
+
* into the record's `detachedSessionRef`, then advances the turn with
|
|
1056
|
+
* repeated `driveTurn` ticks instead of holding a live SSE stream. The
|
|
1057
|
+
* session id is deterministic and supplied at submit time, so a process
|
|
1058
|
+
* crash between ticks loses nothing — the turn keeps running in the box.
|
|
1059
|
+
*
|
|
1060
|
+
* - {@link createDriveTurnResumeDriver} — the resume side. A
|
|
1061
|
+
* `DelegationResumeDriver` that re-attaches restored in-flight records to
|
|
1062
|
+
* their detached runs: parse the record's ref, resolve the box, advance the
|
|
1063
|
+
* turn one `driveTurn` pass per `tick()`, and map the SDK's three states
|
|
1064
|
+
* (`completed | running | failed`) onto `DelegationResumeTick`.
|
|
1065
|
+
*
|
|
1066
|
+
* Both sides type the box structurally ({@link DriveTurnCapableBox}) so tests
|
|
1067
|
+
* inject fakes and the module never requires the sandbox SDK at runtime — the
|
|
1068
|
+
* SDK stays an optional peer, exactly like the executors' `SandboxClient` seam.
|
|
1069
|
+
*
|
|
1070
|
+
* Tradeoffs of detached mode (why it is opt-in, not the default): a detached
|
|
1071
|
+
* turn yields one terminal payload instead of a live event stream, so kernel
|
|
1072
|
+
* token/cost aggregation is not produced for that turn. The trace sinks still
|
|
1073
|
+
* observe detached work — `runDetachedTurn` synthesizes a single-iteration
|
|
1074
|
+
* loop event stream (see `RunDetachedTurnOptions.traceEmitter`) so the span
|
|
1075
|
+
* topology joins the inherited trace context, with cost/tokens reported as 0
|
|
1076
|
+
* under the `'detached-turn'` driver tag. Multi-variant fanout stays on the
|
|
1077
|
+
* streaming `runLoop` path — N concurrent sessions cannot be expressed as one
|
|
1078
|
+
* resume key, and winner selection needs every candidate.
|
|
1079
|
+
*/
|
|
1080
|
+
|
|
1081
|
+
/**
|
|
1082
|
+
* Structural mirror of the sandbox SDK's `TurnDriveResult` (>= 0.6).
|
|
1083
|
+
* Discriminated on `state`; `failed` is terminal and deterministic per the
|
|
1084
|
+
* SDK contract — re-invoking with the same ids returns the same outcome.
|
|
1085
|
+
*
|
|
1086
|
+
* @experimental
|
|
1087
|
+
*/
|
|
1088
|
+
type DriveTurnTick = {
|
|
1089
|
+
state: 'completed';
|
|
1090
|
+
text: string;
|
|
1091
|
+
result: Record<string, unknown>;
|
|
1092
|
+
} | {
|
|
1093
|
+
state: 'running';
|
|
1094
|
+
startedAt?: Date;
|
|
1095
|
+
elapsedMs?: number;
|
|
1096
|
+
} | {
|
|
1097
|
+
state: 'failed';
|
|
1098
|
+
error: string;
|
|
1099
|
+
};
|
|
1100
|
+
/**
|
|
1101
|
+
* The box surface detached turns need. `SandboxInstance`
|
|
1102
|
+
* (`@tangle-network/sandbox` >= 0.6) satisfies it structurally; tests pass
|
|
1103
|
+
* in-memory fakes. `_sessionCancel` is the SDK's remote-cancellation surface —
|
|
1104
|
+
* optional here because older SDKs / fakes may not expose it; when present it
|
|
1105
|
+
* is invoked on abort so the remote run actually stops.
|
|
1106
|
+
*
|
|
1107
|
+
* @experimental
|
|
1108
|
+
*/
|
|
1109
|
+
interface DriveTurnCapableBox {
|
|
1110
|
+
driveTurn(message: string, opts: {
|
|
1111
|
+
sessionId: string;
|
|
1112
|
+
turnId?: string;
|
|
1113
|
+
wallCapMs?: number;
|
|
1114
|
+
}): Promise<DriveTurnTick>;
|
|
1115
|
+
_sessionCancel?(id: string): Promise<void>;
|
|
1116
|
+
}
|
|
1117
|
+
/**
|
|
1118
|
+
* Decoded `DelegationRecord.detachedSessionRef`. `sandboxId` is absent between
|
|
1119
|
+
* submit and box acquisition — a record restored in that window is not
|
|
1120
|
+
* resumable (there is no box to resume on) and the resume driver fails it
|
|
1121
|
+
* loud rather than dispatching onto a guessed box.
|
|
1122
|
+
*
|
|
1123
|
+
* @experimental
|
|
1124
|
+
*/
|
|
1125
|
+
interface DetachedSessionRefParts {
|
|
1126
|
+
sessionId: string;
|
|
1127
|
+
sandboxId?: string;
|
|
1128
|
+
}
|
|
1129
|
+
/**
|
|
1130
|
+
* Encode ref parts into the JSON-safe string stored on the record:
|
|
1131
|
+
* `session=<id>` before the box exists, `sandbox=<id>;session=<id>` once
|
|
1132
|
+
* bound. Ids must not contain the `;`/`=` delimiters.
|
|
1133
|
+
*
|
|
1134
|
+
* @experimental
|
|
1135
|
+
*/
|
|
1136
|
+
declare function formatDetachedSessionRef(parts: DetachedSessionRefParts): string;
|
|
1137
|
+
/** @experimental Inverse of {@link formatDetachedSessionRef}; throws `ValidationError` on malformed input. */
|
|
1138
|
+
declare function parseDetachedSessionRef(raw: string): DetachedSessionRefParts;
|
|
1139
|
+
/** @experimental The terminal payload of a finished detached turn. */
|
|
1140
|
+
interface DetachedTurn {
|
|
1141
|
+
/** Final assistant text. */
|
|
1142
|
+
text: string;
|
|
1143
|
+
/** The SDK's cached AgentExecutionResult-shape record for the turn. */
|
|
1144
|
+
result: Record<string, unknown>;
|
|
1145
|
+
}
|
|
1146
|
+
/**
|
|
1147
|
+
* Synthesize the terminal event array a detached turn settles through. Shaped
|
|
1148
|
+
* so the existing event-stream output adapters (coder, researcher) parse it:
|
|
1149
|
+
* `data.result` for adapters that read a structured terminal record, `data.text`
|
|
1150
|
+
* for adapters that scan assistant text for the fenced result block.
|
|
1151
|
+
*
|
|
1152
|
+
* @experimental
|
|
1153
|
+
*/
|
|
1154
|
+
declare function detachedTurnEvents(sessionId: string, turn: DetachedTurn): SandboxEvent[];
|
|
1155
|
+
/** @experimental */
|
|
1156
|
+
interface RunDetachedTurnOptions {
|
|
1157
|
+
/** Sandbox client used to acquire the box (the delegate's executor client). */
|
|
1158
|
+
client: SandboxClient;
|
|
1159
|
+
/** Profile + overrides for box acquisition — same spec the streaming path uses. */
|
|
1160
|
+
spec: AgentRunSpec<unknown>;
|
|
1161
|
+
/** The full turn prompt; consumed by `driveTurn`'s dispatch leg. */
|
|
1162
|
+
prompt: string;
|
|
1163
|
+
/** Deterministic resume key, minted at submit time (`parseDetachedSessionRef(ref).sessionId`). */
|
|
1164
|
+
sessionId: string;
|
|
1165
|
+
/**
|
|
1166
|
+
* Called once the box exists, with its sandbox id. Callers persist
|
|
1167
|
+
* `formatDetachedSessionRef({ sandboxId, sessionId })` onto the record here so
|
|
1168
|
+
* a restart can resolve the box again.
|
|
1169
|
+
*/
|
|
1170
|
+
bindSandbox(sandboxId: string): void;
|
|
1171
|
+
signal: AbortSignal;
|
|
1172
|
+
report(progress: DelegationProgress): void;
|
|
1173
|
+
/** Delay between `running` ticks (ms). Default 5000. */
|
|
1174
|
+
tickIntervalMs?: number;
|
|
1175
|
+
/** Wall-clock cap forwarded to `driveTurn` — the SDK cancels and fails a session past it. */
|
|
1176
|
+
wallCapMs?: number;
|
|
1177
|
+
/**
|
|
1178
|
+
* Loop-trace sink. When set, the detached turn synthesizes a
|
|
1179
|
+
* single-iteration loop span tree (`runId` = `sessionId`, driver
|
|
1180
|
+
* `'detached-turn'`) so trace-context inheritance survives the detached
|
|
1181
|
+
* path — the same events the streaming `runLoop` path would emit, minus
|
|
1182
|
+
* per-token telemetry: `driveTurn` yields one terminal payload, so token
|
|
1183
|
+
* and cost figures are structurally unavailable and reported as 0 under
|
|
1184
|
+
* this driver tag.
|
|
1185
|
+
*/
|
|
1186
|
+
traceEmitter?: LoopTraceEmitter;
|
|
1187
|
+
/** Physical placement stamped on the synthesized dispatch event. Default `'sibling'`. */
|
|
1188
|
+
placement?: 'sibling' | 'fleet';
|
|
1189
|
+
}
|
|
1190
|
+
/**
|
|
1191
|
+
* Dispatch one detached turn and advance it to a terminal state with
|
|
1192
|
+
* `driveTurn` ticks. The first tick dispatches (idempotent on `sessionId`);
|
|
1193
|
+
* subsequent ticks poll. On abort the remote session is cancelled via
|
|
1194
|
+
* `_sessionCancel` when the box exposes it. The box is torn down on every
|
|
1195
|
+
* in-process exit path (success, failure, abort) — only a process death skips
|
|
1196
|
+
* teardown, which is exactly the case the resume driver re-attaches to.
|
|
1197
|
+
*
|
|
1198
|
+
* @experimental
|
|
1199
|
+
*/
|
|
1200
|
+
declare function runDetachedTurn(options: RunDetachedTurnOptions): Promise<DetachedTurn>;
|
|
1201
|
+
/** @experimental */
|
|
1202
|
+
interface DriveTurnResumeDriverOptions {
|
|
1203
|
+
/**
|
|
1204
|
+
* Resolve the live box owning a detached session. The bin wires this to the
|
|
1205
|
+
* sandbox client's `get(sandboxId)`; throw when the box no longer exists —
|
|
1206
|
+
* a thrown tick settles the record as failed, which is the truth.
|
|
1207
|
+
*/
|
|
1208
|
+
resolveSandbox(sandboxId: string): Promise<DriveTurnCapableBox>;
|
|
1209
|
+
/**
|
|
1210
|
+
* Rebuild the turn prompt from the persisted record. Only consumed by
|
|
1211
|
+
* `driveTurn`'s dispatch leg — i.e. when the previous process died after
|
|
1212
|
+
* binding the box but before the session was dispatched. Must reproduce the
|
|
1213
|
+
* prompt the delegate would have sent.
|
|
1214
|
+
*/
|
|
1215
|
+
buildMessage(record: DelegationRecord): string;
|
|
1216
|
+
/**
|
|
1217
|
+
* Map a completed turn onto the delegation's typed output payload (parse +
|
|
1218
|
+
* validate per profile). Throw when the resumed result does not pass the
|
|
1219
|
+
* profile's gate — the queue settles the record as failed with that error.
|
|
1220
|
+
*/
|
|
1221
|
+
settleOutput(turn: DetachedTurn, record: DelegationRecord, ctx: {
|
|
1222
|
+
signal: AbortSignal;
|
|
1223
|
+
}): Promise<DelegationResultPayload['output']> | DelegationResultPayload['output'];
|
|
1224
|
+
/** Delay between `running` ticks (ms). Default 5000. */
|
|
1225
|
+
intervalMs?: number;
|
|
1226
|
+
/** Wall-clock cap forwarded to `driveTurn` on every tick. */
|
|
1227
|
+
wallCapMs?: number;
|
|
1228
|
+
}
|
|
1229
|
+
/**
|
|
1230
|
+
* Build the `driveTurn`-backed {@link DelegationResumeDriver}. Each `tick()`
|
|
1231
|
+
* is one settle/poll/dispatch pass:
|
|
1232
|
+
*
|
|
1233
|
+
* - ref without a sandbox binding → `failed` (`DetachedSessionUnboundError`):
|
|
1234
|
+
* the previous process died before a box existed; there is nothing to resume.
|
|
1235
|
+
* - `driveTurn` `completed` → `settleOutput` → `completed` tick.
|
|
1236
|
+
* - `running` → progress via `ctx.report`, `running` tick (queue re-ticks
|
|
1237
|
+
* after `intervalMs`).
|
|
1238
|
+
* - `failed` → `failed` tick (`DetachedTurnFailedError`) — terminal per the
|
|
1239
|
+
* SDK's deterministic-failure contract.
|
|
1240
|
+
*
|
|
1241
|
+
* Abort: the queue stops ticking once `cancel()` flips the record, so remote
|
|
1242
|
+
* cancellation is hooked onto `ctx.signal` (once per task) and fires
|
|
1243
|
+
* `_sessionCancel` when the SDK surface exposes it. The driver never deletes
|
|
1244
|
+
* boxes — it cannot know whether `sandboxId` is a disposable sibling or a
|
|
1245
|
+
* fleet machine, and destroying a fleet machine would be unrecoverable.
|
|
1246
|
+
*
|
|
1247
|
+
* @experimental
|
|
1248
|
+
*/
|
|
1249
|
+
declare function createDriveTurnResumeDriver(options: DriveTurnResumeDriverOptions): DelegationResumeDriver;
|
|
1250
|
+
|
|
1251
|
+
/**
|
|
1252
|
+
* @experimental
|
|
1253
|
+
*
|
|
1254
|
+
* Delegation executors — the layer between MCP delegates and the sandbox
|
|
1255
|
+
* substrate. Each executor exposes a {@link SandboxClient} the kernel
|
|
1256
|
+
* consumes plus a placement tag so the trace pipeline can correlate workers
|
|
1257
|
+
* with their physical placement.
|
|
1258
|
+
*
|
|
1259
|
+
* Two implementations ship in-box:
|
|
1260
|
+
*
|
|
1261
|
+
* - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh
|
|
1262
|
+
* sandbox sibling to the caller. Default when the MCP server runs as a
|
|
1263
|
+
* standalone CLI mounted outside a fleet.
|
|
1264
|
+
*
|
|
1265
|
+
* - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines
|
|
1266
|
+
* in the caller's existing fleet so worker diffs land directly on the
|
|
1267
|
+
* caller's filesystem (the fleet's shared workspace). Selected when the
|
|
1268
|
+
* parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.
|
|
1269
|
+
*/
|
|
1270
|
+
|
|
1271
|
+
/** @experimental */
|
|
1272
|
+
interface DelegationExecutor {
|
|
1273
|
+
/** Sandbox client the kernel calls. Returned with `describePlacement` set. */
|
|
1274
|
+
readonly client: SandboxClient;
|
|
1275
|
+
/** Best-effort one-liner used in stderr boot logs and diagnostics. */
|
|
1276
|
+
describe(): string;
|
|
1277
|
+
/**
|
|
1278
|
+
* Where delegated work physically runs. `sibling` and `fleet` placements are
|
|
1279
|
+
* session-backed (boxes expose `driveTurn`, so detached dispatch + resume
|
|
1280
|
+
* apply); `in-process` spawns local harness CLIs with no sandbox session to
|
|
1281
|
+
* detach. Optional so consumer-implemented executors stay source-compatible;
|
|
1282
|
+
* absent means "unknown" and detached dispatch is not enabled for it.
|
|
1283
|
+
*/
|
|
1284
|
+
readonly placement?: 'sibling' | 'fleet' | 'in-process';
|
|
1285
|
+
}
|
|
1286
|
+
/** @experimental */
|
|
1287
|
+
interface SiblingSandboxExecutorOptions {
|
|
1288
|
+
client: SandboxClient;
|
|
1289
|
+
}
|
|
1290
|
+
/**
|
|
1291
|
+
* Wrap a raw sandbox SDK client so the kernel emits
|
|
1292
|
+
* `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.
|
|
1293
|
+
*
|
|
1294
|
+
* The returned client `.create()` delegates to the underlying client; the
|
|
1295
|
+
* only added behavior is a `describePlacement` tag the kernel reads.
|
|
1296
|
+
*
|
|
1297
|
+
* @experimental
|
|
1298
|
+
*/
|
|
1299
|
+
declare function createSiblingSandboxExecutor(options: SiblingSandboxExecutorOptions): DelegationExecutor;
|
|
1300
|
+
/**
|
|
1301
|
+
* Minimal `SandboxFleet` surface the fleet executor calls. Declared
|
|
1302
|
+
* structurally so tests can pass an in-memory stub without instantiating the
|
|
1303
|
+
* sandbox SDK.
|
|
1304
|
+
*
|
|
1305
|
+
* @experimental
|
|
1306
|
+
*/
|
|
1307
|
+
interface FleetHandle {
|
|
1308
|
+
readonly fleetId: string;
|
|
1309
|
+
/** Machine ids in dispatch-eligible order. The executor round-robins. */
|
|
1310
|
+
readonly ids: ReadonlyArray<string>;
|
|
1311
|
+
/** Resolve a machine id to its `SandboxInstance` — that machine is mounted
|
|
1312
|
+
* on the fleet's shared workspace, so any diff the worker writes lands on
|
|
1313
|
+
* every other fleet machine's filesystem too. */
|
|
1314
|
+
sandbox(machineId: string): Promise<SandboxInstance>;
|
|
1315
|
+
}
|
|
1316
|
+
/** @experimental */
|
|
1317
|
+
interface FleetWorkspaceExecutorOptions {
|
|
1318
|
+
fleet: FleetHandle;
|
|
1319
|
+
/**
|
|
1320
|
+
* Override the machine-selection policy. Default = round-robin across
|
|
1321
|
+
* `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the
|
|
1322
|
+
* coordinator machine the MCP server is running on).
|
|
1323
|
+
*/
|
|
1324
|
+
selectMachine?: (call: {
|
|
1325
|
+
callIndex: number;
|
|
1326
|
+
ids: ReadonlyArray<string>;
|
|
1327
|
+
}) => string;
|
|
1328
|
+
/**
|
|
1329
|
+
* Machine ids to skip during default round-robin. Set to the caller's own
|
|
1330
|
+
* machineId so workers don't compete with the orchestrator on the same VM.
|
|
1331
|
+
*/
|
|
1332
|
+
excludeMachineIds?: ReadonlyArray<string>;
|
|
1333
|
+
}
|
|
1334
|
+
/**
|
|
1335
|
+
* Build an executor that resolves each delegated iteration to an existing
|
|
1336
|
+
* machine in `fleet`. The fleet's shared-workspace policy means the worker
|
|
1337
|
+
* machine sees the caller's filesystem — diffs land in-place with no
|
|
1338
|
+
* cross-sandbox copy step.
|
|
1339
|
+
*
|
|
1340
|
+
* @experimental
|
|
1341
|
+
*/
|
|
1342
|
+
declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOptions): DelegationExecutor;
|
|
1343
|
+
|
|
1344
|
+
/** @experimental */
|
|
1345
|
+
interface DelegateRunCtx {
|
|
1346
|
+
signal: AbortSignal;
|
|
1347
|
+
report(progress: DelegationProgress): void;
|
|
1348
|
+
/**
|
|
1349
|
+
* Detached-run resume key recorded on the queue record at submit time
|
|
1350
|
+
* (`formatDetachedSessionRef`). Present only when the submit path requested
|
|
1351
|
+
* detached dispatch — its presence is what routes a session-backed delegate
|
|
1352
|
+
* onto the `driveTurn` tick path instead of holding a stream.
|
|
1353
|
+
*/
|
|
1354
|
+
detachedSessionRef?: string;
|
|
1355
|
+
/** Rebind the record's resume key (e.g. once the sandbox id is known). */
|
|
1356
|
+
updateDetachedSessionRef?(ref: string): void;
|
|
1357
|
+
/**
|
|
1358
|
+
* Per-delegation trace sink supplied by the queue — loop events emitted
|
|
1359
|
+
* here land on the delegation record as a compact span tree. Delegates
|
|
1360
|
+
* compose it with their configured OTEL emitter so both sinks observe
|
|
1361
|
+
* the same stream.
|
|
1362
|
+
*/
|
|
1363
|
+
traceEmitter?: LoopTraceEmitter;
|
|
1364
|
+
}
|
|
1365
|
+
/** @experimental */
|
|
1366
|
+
type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
|
|
1367
|
+
/** @experimental */
|
|
1368
|
+
type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
|
|
1369
|
+
/**
|
|
1370
|
+
* UI-auditor delegate — fully consumer-injected. agent-runtime ships no
|
|
1371
|
+
* default factory because the inputs are workspace path + judge function
|
|
1372
|
+
* + (optionally) a `SandboxClient`, and the judge is the consumer's
|
|
1373
|
+
* model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in
|
|
1374
|
+
* `@tangle-network/agent-runtime/profiles` for the canonical wiring.
|
|
1375
|
+
*
|
|
1376
|
+
* @experimental
|
|
1377
|
+
*/
|
|
1378
|
+
type UiAuditorDelegate = (args: DelegateUiAuditArgs, ctx: DelegateRunCtx) => Promise<UiAuditorDelegationOutput>;
|
|
1379
|
+
/** @experimental Structured review verdict over a coder candidate. */
|
|
1380
|
+
interface CoderReview {
|
|
1381
|
+
/** Gate: only approved candidates are eligible to win. */
|
|
1382
|
+
approved: boolean;
|
|
1383
|
+
/** Reviewer's recommendation — surfaced in traces. */
|
|
1384
|
+
recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject';
|
|
1385
|
+
/** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */
|
|
1386
|
+
readiness: number;
|
|
1387
|
+
notes?: string;
|
|
1388
|
+
}
|
|
1389
|
+
/**
|
|
1390
|
+
* @experimental
|
|
1391
|
+
*
|
|
1392
|
+
* Optional adversarial reviewer over a coder candidate that already passed
|
|
1393
|
+
* mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded
|
|
1394
|
+
* from the ai-trading-blueprint delegation MCP: a candidate is only eligible to
|
|
1395
|
+
* win if the reviewer approves it. The reviewer is the consumer's seam — an LLM
|
|
1396
|
+
* judge, a `pnpm review` command, anything returning a `CoderReview`.
|
|
1397
|
+
*/
|
|
1398
|
+
type CoderReviewer = (output: CoderOutput, task: CoderTask, ctx: {
|
|
1399
|
+
signal: AbortSignal;
|
|
1400
|
+
}) => Promise<CoderReview> | CoderReview;
|
|
1401
|
+
/**
|
|
1402
|
+
* @experimental Winner-selection strategy among validated (+ reviewed)
|
|
1403
|
+
* candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`
|
|
1404
|
+
* (the kernel's behavior — preserves backward compatibility).
|
|
1405
|
+
*/
|
|
1406
|
+
type CoderWinnerSelection = 'highest-score' | 'smallest-diff' | 'highest-readiness' | 'first-approved';
|
|
1407
|
+
/** @experimental */
|
|
1408
|
+
interface CreateDefaultCoderDelegateOptions {
|
|
1409
|
+
/**
|
|
1410
|
+
* Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)
|
|
1411
|
+
* to control where worker iterations land. `sandboxClient` is a
|
|
1412
|
+
* convenience shorthand that wraps the client in a sibling executor — pass
|
|
1413
|
+
* one or the other, not both.
|
|
1414
|
+
*/
|
|
1415
|
+
executor?: DelegationExecutor;
|
|
1416
|
+
/**
|
|
1417
|
+
* Convenience shorthand for sibling placement. Equivalent to
|
|
1418
|
+
* `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.
|
|
1419
|
+
*/
|
|
1420
|
+
sandboxClient?: SandboxClient;
|
|
1421
|
+
/** Backend harness for the single-coder path. Default comes from `coderProfile`. */
|
|
1422
|
+
harness?: string;
|
|
1423
|
+
/** Model override for the single-coder path. */
|
|
1424
|
+
model?: string;
|
|
1425
|
+
/** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */
|
|
1426
|
+
fanoutHarnesses?: string[];
|
|
1427
|
+
/** Optional per-harness model override for `variants > 1`. */
|
|
1428
|
+
fanoutModels?: (string | undefined)[];
|
|
1429
|
+
/** Hard cap on the kernel's per-batch concurrency. Default 4. */
|
|
1430
|
+
maxConcurrency?: number;
|
|
1431
|
+
/**
|
|
1432
|
+
* Optional adversarial reviewer. When set, a candidate must pass mechanical
|
|
1433
|
+
* validation AND `reviewer.approved` to be eligible to win — empty/secret/
|
|
1434
|
+
* test-failing patches are already gone; this catches the "compiles + passes
|
|
1435
|
+
* but wrong/unsafe" class the deterministic validator can't see.
|
|
1436
|
+
*/
|
|
1437
|
+
reviewer?: CoderReviewer;
|
|
1438
|
+
/** Winner-selection strategy among eligible candidates. Default `highest-score`. */
|
|
1439
|
+
winnerSelection?: CoderWinnerSelection;
|
|
1440
|
+
/**
|
|
1441
|
+
* Loop trace emitter forwarded into every delegated `runLoop`. Wire
|
|
1442
|
+
* `createPropagatingTraceEmitter(readTraceContextFromEnv())` here (the bin
|
|
1443
|
+
* does) so delegated build-loops export their topology spans to the OTLP /
|
|
1444
|
+
* Tangle Intelligence sink when `OTEL_EXPORTER_OTLP_ENDPOINT` is set — and
|
|
1445
|
+
* are a cheap no-op when it isn't. Configurable by construction.
|
|
1446
|
+
*
|
|
1447
|
+
* Detached single-variant turns (taken when `ctx.detachedSessionRef` is set)
|
|
1448
|
+
* bypass `runLoop`; `runDetachedTurn` synthesizes a single-iteration loop
|
|
1449
|
+
* event stream for them so this emitter observes detached work too.
|
|
1450
|
+
*/
|
|
1451
|
+
traceEmitter?: LoopTraceEmitter;
|
|
1452
|
+
/** Tick cadence (ms) for the detached single-variant path. Default 5000. */
|
|
1453
|
+
detachedTickIntervalMs?: number;
|
|
1454
|
+
/** Wall-clock cap (ms) forwarded to `driveTurn` for detached turns. */
|
|
1455
|
+
detachedWallCapMs?: number;
|
|
1456
|
+
}
|
|
1457
|
+
/**
|
|
1458
|
+
* Build a coder delegate that drives `runLoop` against the project's
|
|
1459
|
+
* sandbox client + coder profile. When `args.variants > 1` it switches
|
|
1460
|
+
* to the multi-harness fanout topology.
|
|
1461
|
+
*
|
|
1462
|
+
* @experimental
|
|
1463
|
+
*/
|
|
1464
|
+
declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateOptions): CoderDelegate;
|
|
1465
|
+
/**
|
|
1466
|
+
* Canonical `DelegateCodeArgs` → `CoderTask` mapping — the single source for
|
|
1467
|
+
* the delegate's live dispatch AND the resume driver's settle/message
|
|
1468
|
+
* rebuilding, so a resumed record reproduces exactly the task the original
|
|
1469
|
+
* process dispatched.
|
|
1470
|
+
*
|
|
1471
|
+
* @experimental
|
|
1472
|
+
*/
|
|
1473
|
+
declare function coderTaskFromArgs(args: DelegateCodeArgs): CoderTask;
|
|
1474
|
+
/** @experimental */
|
|
1475
|
+
interface SettleDetachedCoderTurnOptions {
|
|
1476
|
+
task: CoderTask;
|
|
1477
|
+
/** Session id of the detached turn — used as the synthesized event id. */
|
|
1478
|
+
sessionId: string;
|
|
1479
|
+
signal: AbortSignal;
|
|
1480
|
+
harness?: string;
|
|
1481
|
+
model?: string;
|
|
1482
|
+
/** Same gate as the streaming path: an unapproved candidate cannot win. */
|
|
1483
|
+
reviewer?: CoderReviewer;
|
|
1484
|
+
}
|
|
1485
|
+
/**
|
|
1486
|
+
* Settle a completed detached coder turn through the same gate the streaming
|
|
1487
|
+
* path applies: parse the terminal payload with the coder output adapter,
|
|
1488
|
+
* run the mechanical validator (tests/typecheck/forbidden/diff/no-op/secrets),
|
|
1489
|
+
* then the optional reviewer. Throws when nothing survives — a resumed or
|
|
1490
|
+
* detached run must not return an unvalidated patch.
|
|
1491
|
+
*
|
|
1492
|
+
* @experimental
|
|
1493
|
+
*/
|
|
1494
|
+
declare function settleDetachedCoderTurn(turn: DetachedTurn, options: SettleDetachedCoderTurnOptions): Promise<CoderOutput>;
|
|
1495
|
+
|
|
1496
|
+
/**
|
|
1497
|
+
* @experimental
|
|
1498
|
+
*
|
|
1499
|
+
* `createKbGate` — the valid-only knowledge-base growth gate, distilled from
|
|
1500
|
+
* physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
|
|
1501
|
+
* writer) runs candidate facts through this before persisting, so the KB grows
|
|
1502
|
+
* with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
|
|
1503
|
+
* vetoed at the gate.
|
|
1504
|
+
*
|
|
1505
|
+
* Fail-closed by construction: every judge must `accept`; the FIRST veto wins
|
|
1506
|
+
* and the fact is rejected. The non-negotiable floor (always on, can't be
|
|
1507
|
+
* disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
|
|
1508
|
+
* literally appear in its `sourceText`. That single check kills the dominant
|
|
1509
|
+
* failure mode (a confident claim decoupled from any real source).
|
|
1510
|
+
*
|
|
1511
|
+
* Pure + dependency-free: it operates on fact candidates, not on a store, so it
|
|
1512
|
+
* composes with `@tangle-network/agent-knowledge` or any persistence layer
|
|
1513
|
+
* without importing it. The remediation policy (correct-on-veto vs
|
|
1514
|
+
* escalate-as-unverified) is the caller's — this returns the verdict; it never
|
|
1515
|
+
* drops a fact silently.
|
|
1516
|
+
*/
|
|
1517
|
+
/** @experimental A fact proposed for the KB, with its grounding. */
|
|
1518
|
+
interface FactCandidate {
|
|
1519
|
+
/** The atomic claim text. */
|
|
1520
|
+
claim: string;
|
|
1521
|
+
/** Optional extracted value (number or string) the claim asserts. */
|
|
1522
|
+
value?: string | number;
|
|
1523
|
+
/** Verbatim span lifted from the source that backs the claim. */
|
|
1524
|
+
verbatimPassage: string;
|
|
1525
|
+
/** The raw source text the passage must be grounded in. */
|
|
1526
|
+
sourceText: string;
|
|
1527
|
+
/** Where the fact claims to come from — checked for circular/self citations. */
|
|
1528
|
+
citation?: string;
|
|
1529
|
+
}
|
|
1530
|
+
/** @experimental */
|
|
1531
|
+
interface FactJudgeVerdict {
|
|
1532
|
+
accept: boolean;
|
|
1533
|
+
reason?: string;
|
|
1534
|
+
}
|
|
1535
|
+
/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
|
|
1536
|
+
* verdict; a thrown judge is a programmer error, not a veto. */
|
|
1537
|
+
interface FactJudge {
|
|
1538
|
+
name: string;
|
|
1539
|
+
judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
|
|
1540
|
+
}
|
|
1541
|
+
/** @experimental */
|
|
1542
|
+
interface KbGateResult {
|
|
1543
|
+
accepted: boolean;
|
|
1544
|
+
/** Name of the judge that vetoed; undefined when accepted. */
|
|
1545
|
+
vetoedBy?: string;
|
|
1546
|
+
reason?: string;
|
|
1547
|
+
}
|
|
1548
|
+
/** @experimental */
|
|
1549
|
+
interface CreateKbGateOptions {
|
|
1550
|
+
/** Extra judges appended after the built-in floor (e.g. an LLM judge). */
|
|
1551
|
+
judges?: FactJudge[];
|
|
1552
|
+
/** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
|
|
1553
|
+
minPassageChars?: number;
|
|
1554
|
+
/**
|
|
1555
|
+
* Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
|
|
1556
|
+
* `'cad_params'`, `'requirements'`). A citation naming one is circular
|
|
1557
|
+
* (laundering) — the fact cites a derived artifact, not a real source.
|
|
1558
|
+
* Default `[]` (no circular check unless the consumer declares its kinds).
|
|
1559
|
+
*/
|
|
1560
|
+
selfArtifactKinds?: string[];
|
|
1561
|
+
}
|
|
1562
|
+
/**
|
|
1563
|
+
* @experimental
|
|
1564
|
+
*
|
|
1565
|
+
* Build a fail-closed KB gate. The returned function runs the built-in floor
|
|
1566
|
+
* (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
|
|
1567
|
+
* then any consumer judges, returning on the first veto.
|
|
1568
|
+
*/
|
|
1569
|
+
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
1570
|
+
|
|
1571
|
+
export { type DetachedSessionRefParts as $, type DelegateRunCtx as A, type DelegateUiAuditConfig as B, type CoderReviewer as C, type DelegateCodeArgs as D, type DelegateUiAuditRoute as E, type FactCandidate as F, type DelegationError as G, type DelegationHistoryEntry as H, DelegationPersistenceError as I, type DelegationProfile as J, type DelegationProgress as K, type DelegationRecord as L, type DelegationResultPayload as M, type DelegationResumeContext as N, type DelegationResumeDriver as O, type DelegationResumeTick as P, type DelegationRunContext as Q, type ResearcherDelegate as R, DelegationStateCorruptError as S, type TraceContext as T, type UiAuditorDelegate as U, type DelegationStatus as V, type DelegationStore as W, type DelegationTaskQueueOptions as X, type DelegationTraceCaps as Y, type DelegationTraceCollector as Z, type DelegationTraceSpan as _, type CoderWinnerSelection as a, type DetachedTurn as a0, type DriveTurnCapableBox as a1, type DriveTurnResumeDriverOptions as a2, type DriveTurnTick as a3, type FactJudge as a4, type FactJudgeVerdict as a5, type FeedbackRating as a6, type FeedbackRefersTo as a7, FileDelegationStore as a8, type FileDelegationStoreOptions as a9, runDetachedTurn as aA, settleDetachedCoderTurn as aB, traceContextToEnv as aC, type EvalRunEvent as aD, type EvalRunGeneration as aE, type EvalRunsExportConfig as aF, type EvalRunsExportResult as aG, INTELLIGENCE_WIRE_VERSION as aH, type LoopSpanNode as aI, type OtelAttribute as aJ, type OtelExportConfig as aK, type OtelExporter as aL, type OtelSpan as aM, buildLoopOtelSpans as aN, buildLoopSpanNodes as aO, createOtelExporter as aP, exportEvalRuns as aQ, loopEventToOtelSpan as aR, type FleetWorkspaceExecutorOptions as aa, InMemoryDelegationStore as ab, type KbGateResult as ac, type ResearchOutputShape as ad, type RunDetachedTurnOptions as ae, type SettleDetachedCoderTurnOptions as af, type SiblingSandboxExecutorOptions as ag, type SubmitInput as ah, type SubmitOutput as ai, type UiAuditorDelegationOutput as aj, buildDelegationTraceSpans as ak, capDelegationTrace as al, coderTaskFromArgs as am, composeLoopTraceEmitters as an, createDefaultCoderDelegate as ao, createDelegationTraceCollector as ap, createDriveTurnResumeDriver as aq, createFleetWorkspaceExecutor as ar, createKbGate as as, createPropagatingTraceEmitter as at, createSiblingSandboxExecutor as au, detachedTurnEvents as av, formatDetachedSessionRef as aw, hashIdempotencyInput as ax, parseDetachedSessionRef as ay, readTraceContextFromEnv as az, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, DelegationTaskQueue as g, type CoderDelegate as h, type DelegateCodeResult as i, type DelegateFeedbackResult as j, type ResearchSource as k, type DelegateResearchArgs as l, type DelegateResearchResult as m, type DelegateUiAuditArgs as n, type DelegateUiAuditResult as o, type DelegationHistoryResult as p, type DelegationHistoryArgs as q, type DelegationStatusResult as r, type DelegationStatusArgs as s, type CappedDelegationTrace as t, type CoderReview as u, type CreateDefaultCoderDelegateOptions as v, DELEGATION_TRACE_MAX_BYTES as w, DELEGATION_TRACE_MAX_SPANS as x, type DelegateCodeConfig as y, type DelegateResearchConfig as z };
|