@tangle-network/agent-runtime 0.36.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +3 -3
- package/dist/analyst-loop.d.ts +2 -2
- package/dist/analyst-loop.js +3 -257
- package/dist/analyst-loop.js.map +1 -1
- package/dist/{chunk-NYGEI3NV.js → chunk-M65QJD35.js} +5 -211
- package/dist/chunk-M65QJD35.js.map +1 -0
- package/dist/{chunk-HSX6PFZR.js → chunk-V6GURW4W.js} +209 -1
- package/dist/chunk-V6GURW4W.js.map +1 -0
- package/dist/chunk-VOX6Z3II.js +90 -0
- package/dist/chunk-VOX6Z3II.js.map +1 -0
- package/dist/chunk-XBUG326M.js +261 -0
- package/dist/chunk-XBUG326M.js.map +1 -0
- package/dist/{chunk-7ZECSZ3C.js → chunk-Z523NPJK.js} +59 -2
- package/dist/chunk-Z523NPJK.js.map +1 -0
- package/dist/dynamic-DeOPeeAw.d.ts +106 -0
- package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +6 -130
- package/dist/improvement.js +4 -85
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +148 -3
- package/dist/index.js +109 -2
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +5 -107
- package/dist/mcp/bin.js +4 -3
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +6 -440
- package/dist/mcp/index.js +7 -62
- package/dist/mcp/index.js.map +1 -1
- package/dist/optimize-prompt-cmH9wZdH.d.ts +129 -0
- package/dist/otel-export-CNmeg_7B.d.ts +627 -0
- package/dist/profiles.d.ts +1 -2
- package/dist/{types-DrXVR2Fu.d.ts → types-CmkQl8qE.d.ts} +137 -3
- package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/package.json +1 -1
- package/dist/chunk-7ZECSZ3C.js.map +0 -1
- package/dist/chunk-HSX6PFZR.js.map +0 -1
- package/dist/chunk-NYGEI3NV.js.map +0 -1
- package/dist/otel-export-xgf4J6bo.d.ts +0 -191
- package/dist/runtime-run-B8VIiOhI.d.ts +0 -137
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
import { CoderOutput, CoderTask } from './profiles.js';
|
|
2
|
+
import { L as LoopSandboxClient } from './types-CmkQl8qE.js';
|
|
3
|
+
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
4
|
+
import { O as OpenAIChatTool } from './types-CsCCryln.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* @experimental
|
|
8
|
+
*
|
|
9
|
+
* Delegation executors — the layer between MCP delegates and the sandbox
|
|
10
|
+
* substrate. Each executor exposes a {@link LoopSandboxClient} the kernel
|
|
11
|
+
* consumes plus a placement tag so the trace pipeline can correlate workers
|
|
12
|
+
* with their physical placement.
|
|
13
|
+
*
|
|
14
|
+
* Two implementations ship in-box:
|
|
15
|
+
*
|
|
16
|
+
* - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh
|
|
17
|
+
* sandbox sibling to the caller. Default when the MCP server runs as a
|
|
18
|
+
* standalone CLI mounted outside a fleet.
|
|
19
|
+
*
|
|
20
|
+
* - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines
|
|
21
|
+
* in the caller's existing fleet so worker diffs land directly on the
|
|
22
|
+
* caller's filesystem (the fleet's shared workspace). Selected when the
|
|
23
|
+
* parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/** @experimental */
|
|
27
|
+
interface DelegationExecutor {
|
|
28
|
+
/** Sandbox client the kernel calls. Returned with `describePlacement` set. */
|
|
29
|
+
readonly client: LoopSandboxClient;
|
|
30
|
+
/** Best-effort one-liner used in stderr boot logs and diagnostics. */
|
|
31
|
+
describe(): string;
|
|
32
|
+
}
|
|
33
|
+
/** @experimental */
|
|
34
|
+
interface SiblingSandboxExecutorOptions {
|
|
35
|
+
client: LoopSandboxClient;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Wrap a raw sandbox SDK client so the kernel emits
|
|
39
|
+
* `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.
|
|
40
|
+
*
|
|
41
|
+
* The returned client `.create()` delegates to the underlying client; the
|
|
42
|
+
* only added behavior is a `describePlacement` tag the kernel reads.
|
|
43
|
+
*
|
|
44
|
+
* @experimental
|
|
45
|
+
*/
|
|
46
|
+
declare function createSiblingSandboxExecutor(options: SiblingSandboxExecutorOptions): DelegationExecutor;
|
|
47
|
+
/**
|
|
48
|
+
* Minimal `SandboxFleet` surface the fleet executor calls. Declared
|
|
49
|
+
* structurally so tests can pass an in-memory stub without instantiating the
|
|
50
|
+
* sandbox SDK.
|
|
51
|
+
*
|
|
52
|
+
* @experimental
|
|
53
|
+
*/
|
|
54
|
+
interface FleetHandle {
|
|
55
|
+
readonly fleetId: string;
|
|
56
|
+
/** Machine ids in dispatch-eligible order. The executor round-robins. */
|
|
57
|
+
readonly ids: ReadonlyArray<string>;
|
|
58
|
+
/** Resolve a machine id to its `SandboxInstance` — that machine is mounted
|
|
59
|
+
* on the fleet's shared workspace, so any diff the worker writes lands on
|
|
60
|
+
* every other fleet machine's filesystem too. */
|
|
61
|
+
sandbox(machineId: string): Promise<SandboxInstance>;
|
|
62
|
+
}
|
|
63
|
+
/** @experimental */
|
|
64
|
+
interface FleetWorkspaceExecutorOptions {
|
|
65
|
+
fleet: FleetHandle;
|
|
66
|
+
/**
|
|
67
|
+
* Override the machine-selection policy. Default = round-robin across
|
|
68
|
+
* `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the
|
|
69
|
+
* coordinator machine the MCP server is running on).
|
|
70
|
+
*/
|
|
71
|
+
selectMachine?: (call: {
|
|
72
|
+
callIndex: number;
|
|
73
|
+
ids: ReadonlyArray<string>;
|
|
74
|
+
}) => string;
|
|
75
|
+
/**
|
|
76
|
+
* Machine ids to skip during default round-robin. Set to the caller's own
|
|
77
|
+
* machineId so workers don't compete with the orchestrator on the same VM.
|
|
78
|
+
*/
|
|
79
|
+
excludeMachineIds?: ReadonlyArray<string>;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Build an executor that resolves each delegated iteration to an existing
|
|
83
|
+
* machine in `fleet`. The fleet's shared-workspace policy means the worker
|
|
84
|
+
* machine sees the caller's filesystem — diffs land in-place with no
|
|
85
|
+
* cross-sandbox copy step.
|
|
86
|
+
*
|
|
87
|
+
* @experimental
|
|
88
|
+
*/
|
|
89
|
+
declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOptions): DelegationExecutor;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* @experimental
|
|
93
|
+
*
|
|
94
|
+
* MCP delegation tool surface — the typed inputs/outputs the product agent
|
|
95
|
+
* sees over the wire. These types are the contract; the JSON schemas under
|
|
96
|
+
* `tools/*` mirror them for the MCP `tools/list` advertisement.
|
|
97
|
+
*
|
|
98
|
+
* Async semantics: `delegate_code` + `delegate_research` return a `taskId`
|
|
99
|
+
* immediately. The product agent polls `delegation_status` until the task
|
|
100
|
+
* transitions to `completed` | `failed` | `cancelled`. `delegate_feedback`
|
|
101
|
+
* + `delegation_history` are synchronous reads / writes against the local
|
|
102
|
+
* task queue + feedback store.
|
|
103
|
+
*/
|
|
104
|
+
|
|
105
|
+
/** @experimental */
|
|
106
|
+
type DelegationProfile = 'coder' | 'researcher';
|
|
107
|
+
/** @experimental */
|
|
108
|
+
type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
109
|
+
/**
|
|
110
|
+
* Minimal `CoderTask` overrides exposed over the MCP wire. The full
|
|
111
|
+
* `CoderTask` carries fields the kernel synthesizes from `goal` +
|
|
112
|
+
* `repoRoot` — the agent only edits the few that materially gate
|
|
113
|
+
* validator behavior.
|
|
114
|
+
*
|
|
115
|
+
* @experimental
|
|
116
|
+
*/
|
|
117
|
+
interface DelegateCodeConfig {
|
|
118
|
+
testCmd?: string;
|
|
119
|
+
typecheckCmd?: string;
|
|
120
|
+
forbiddenPaths?: string[];
|
|
121
|
+
maxDiffLines?: number;
|
|
122
|
+
}
|
|
123
|
+
/** @experimental */
|
|
124
|
+
interface DelegateCodeArgs {
|
|
125
|
+
/** Natural-language description of what the coder must accomplish. */
|
|
126
|
+
goal: string;
|
|
127
|
+
/** Absolute path inside the sandbox where the repo lives. */
|
|
128
|
+
repoRoot: string;
|
|
129
|
+
/** Optional free-form context the agent surfaces in the prompt prelude. */
|
|
130
|
+
contextHint?: string;
|
|
131
|
+
/**
|
|
132
|
+
* When > 1, dispatches `multiHarnessCoderFanout` across N harnesses
|
|
133
|
+
* (claude-code, codex, opencode-glm) and picks the highest-scoring
|
|
134
|
+
* passing patch. Default 1.
|
|
135
|
+
*/
|
|
136
|
+
variants?: number;
|
|
137
|
+
/** Validator + prompt overrides the agent knows for this repo. */
|
|
138
|
+
config?: DelegateCodeConfig;
|
|
139
|
+
/** Multi-tenant scope (customer-id, workspace-id). */
|
|
140
|
+
namespace?: string;
|
|
141
|
+
}
|
|
142
|
+
/** @experimental */
|
|
143
|
+
interface DelegateCodeResult {
|
|
144
|
+
taskId: string;
|
|
145
|
+
/** Best-effort hint — coder loops can take minutes-to-hours. */
|
|
146
|
+
estimatedDurationMs?: number;
|
|
147
|
+
}
|
|
148
|
+
/** @experimental */
|
|
149
|
+
type ResearchSource = 'web' | 'corpus' | 'twitter' | 'github' | 'docs';
|
|
150
|
+
/** @experimental */
|
|
151
|
+
interface DelegateResearchConfig {
|
|
152
|
+
recencyWindow?: {
|
|
153
|
+
since?: string;
|
|
154
|
+
until?: string;
|
|
155
|
+
};
|
|
156
|
+
maxItems?: number;
|
|
157
|
+
minConfidence?: number;
|
|
158
|
+
}
|
|
159
|
+
/** @experimental */
|
|
160
|
+
interface DelegateResearchArgs {
|
|
161
|
+
question: string;
|
|
162
|
+
namespace: string;
|
|
163
|
+
scope?: string;
|
|
164
|
+
sources?: ResearchSource[];
|
|
165
|
+
variants?: number;
|
|
166
|
+
config?: DelegateResearchConfig;
|
|
167
|
+
}
|
|
168
|
+
/** @experimental */
|
|
169
|
+
interface DelegateResearchResult {
|
|
170
|
+
taskId: string;
|
|
171
|
+
estimatedDurationMs?: number;
|
|
172
|
+
}
|
|
173
|
+
/** @experimental */
|
|
174
|
+
interface FeedbackRefersTo {
|
|
175
|
+
kind: 'delegation' | 'artifact' | 'outcome';
|
|
176
|
+
/** For `'delegation'`, this is the taskId. */
|
|
177
|
+
ref: string;
|
|
178
|
+
}
|
|
179
|
+
/** @experimental */
|
|
180
|
+
interface FeedbackRating {
|
|
181
|
+
/** [0, 1]. */
|
|
182
|
+
score: number;
|
|
183
|
+
label?: 'good' | 'bad' | 'neutral' | 'mixed';
|
|
184
|
+
notes: string;
|
|
185
|
+
}
|
|
186
|
+
/** @experimental */
|
|
187
|
+
interface DelegateFeedbackArgs {
|
|
188
|
+
refersTo: FeedbackRefersTo;
|
|
189
|
+
rating: FeedbackRating;
|
|
190
|
+
by: 'agent' | 'user' | 'downstream-judge';
|
|
191
|
+
/** ISO timestamp; defaults to server clock when omitted. */
|
|
192
|
+
capturedAt?: string;
|
|
193
|
+
namespace?: string;
|
|
194
|
+
}
|
|
195
|
+
/** @experimental */
|
|
196
|
+
interface DelegateFeedbackResult {
|
|
197
|
+
recorded: true;
|
|
198
|
+
id: string;
|
|
199
|
+
}
|
|
200
|
+
/** @experimental */
|
|
201
|
+
interface DelegationStatusArgs {
|
|
202
|
+
taskId: string;
|
|
203
|
+
}
|
|
204
|
+
/** @experimental */
|
|
205
|
+
interface DelegationProgress {
|
|
206
|
+
iteration: number;
|
|
207
|
+
phase: string;
|
|
208
|
+
}
|
|
209
|
+
/** @experimental */
|
|
210
|
+
interface DelegationError {
|
|
211
|
+
message: string;
|
|
212
|
+
kind: string;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Polymorphic `result` field: `CoderOutput` when the underlying profile
|
|
216
|
+
* is `'coder'`, a structurally-typed research output when `'researcher'`.
|
|
217
|
+
* The MCP wire carries it as JSON either way.
|
|
218
|
+
*
|
|
219
|
+
* @experimental
|
|
220
|
+
*/
|
|
221
|
+
type DelegationResultPayload = {
|
|
222
|
+
profile: 'coder';
|
|
223
|
+
output: CoderOutput;
|
|
224
|
+
} | {
|
|
225
|
+
profile: 'researcher';
|
|
226
|
+
output: ResearchOutputShape;
|
|
227
|
+
};
|
|
228
|
+
/**
|
|
229
|
+
* Loose shape of a research output over the wire — the substrate cannot
|
|
230
|
+
* import the `ResearchOutput` type from agent-knowledge without inducing
|
|
231
|
+
* a dependency cycle, so the MCP layer treats it structurally.
|
|
232
|
+
*
|
|
233
|
+
* @experimental
|
|
234
|
+
*/
|
|
235
|
+
interface ResearchOutputShape {
|
|
236
|
+
items: unknown[];
|
|
237
|
+
citations: unknown[];
|
|
238
|
+
proposedWrites: unknown[];
|
|
239
|
+
gaps?: string[];
|
|
240
|
+
notes?: string;
|
|
241
|
+
[key: string]: unknown;
|
|
242
|
+
}
|
|
243
|
+
/** @experimental */
|
|
244
|
+
interface DelegationStatusResult {
|
|
245
|
+
taskId: string;
|
|
246
|
+
profile: DelegationProfile;
|
|
247
|
+
status: DelegationStatus;
|
|
248
|
+
progress?: DelegationProgress;
|
|
249
|
+
result?: DelegationResultPayload;
|
|
250
|
+
error?: DelegationError;
|
|
251
|
+
costUsd?: number;
|
|
252
|
+
startedAt: string;
|
|
253
|
+
completedAt?: string;
|
|
254
|
+
}
|
|
255
|
+
/** @experimental */
|
|
256
|
+
interface DelegationHistoryArgs {
|
|
257
|
+
namespace?: string;
|
|
258
|
+
profile?: DelegationProfile;
|
|
259
|
+
/** ISO date — only delegations started at-or-after `since` are returned. */
|
|
260
|
+
since?: string;
|
|
261
|
+
/** Default 50. Hard cap 500. */
|
|
262
|
+
limit?: number;
|
|
263
|
+
}
|
|
264
|
+
/** @experimental */
|
|
265
|
+
interface DelegationFeedbackSnapshot {
|
|
266
|
+
id: string;
|
|
267
|
+
score: number;
|
|
268
|
+
label?: FeedbackRating['label'];
|
|
269
|
+
by: DelegateFeedbackArgs['by'];
|
|
270
|
+
notes: string;
|
|
271
|
+
capturedAt: string;
|
|
272
|
+
}
|
|
273
|
+
/** @experimental */
|
|
274
|
+
interface DelegationHistoryEntry {
|
|
275
|
+
taskId: string;
|
|
276
|
+
profile: DelegationProfile;
|
|
277
|
+
namespace?: string;
|
|
278
|
+
args: DelegateCodeArgs | DelegateResearchArgs;
|
|
279
|
+
status: DelegationStatus;
|
|
280
|
+
feedback?: DelegationFeedbackSnapshot[];
|
|
281
|
+
costUsd?: number;
|
|
282
|
+
startedAt: string;
|
|
283
|
+
completedAt?: string;
|
|
284
|
+
}
|
|
285
|
+
/** @experimental */
|
|
286
|
+
interface DelegationHistoryResult {
|
|
287
|
+
delegations: DelegationHistoryEntry[];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/** @experimental */
|
|
291
|
+
interface DelegateRunCtx {
|
|
292
|
+
signal: AbortSignal;
|
|
293
|
+
report(progress: DelegationProgress): void;
|
|
294
|
+
}
|
|
295
|
+
/** @experimental */
|
|
296
|
+
type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
|
|
297
|
+
/** @experimental */
|
|
298
|
+
type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
|
|
299
|
+
/** @experimental Structured review verdict over a coder candidate. */
|
|
300
|
+
interface CoderReview {
|
|
301
|
+
/** Gate: only approved candidates are eligible to win. */
|
|
302
|
+
approved: boolean;
|
|
303
|
+
/** Reviewer's recommendation — surfaced in traces. */
|
|
304
|
+
recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject';
|
|
305
|
+
/** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */
|
|
306
|
+
readiness: number;
|
|
307
|
+
notes?: string;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* @experimental
|
|
311
|
+
*
|
|
312
|
+
* Optional adversarial reviewer over a coder candidate that already passed
|
|
313
|
+
* mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded
|
|
314
|
+
* from the ai-trading-blueprint delegation MCP: a candidate is only eligible to
|
|
315
|
+
* win if the reviewer approves it. The reviewer is the consumer's seam — an LLM
|
|
316
|
+
* judge, a `pnpm review` command, anything returning a `CoderReview`.
|
|
317
|
+
*/
|
|
318
|
+
type CoderReviewer = (output: CoderOutput, task: CoderTask, ctx: {
|
|
319
|
+
signal: AbortSignal;
|
|
320
|
+
}) => Promise<CoderReview> | CoderReview;
|
|
321
|
+
/**
|
|
322
|
+
* @experimental Winner-selection strategy among validated (+ reviewed)
|
|
323
|
+
* candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`
|
|
324
|
+
* (the kernel's behavior — preserves backward compatibility).
|
|
325
|
+
*/
|
|
326
|
+
type CoderWinnerSelection = 'highest-score' | 'smallest-diff' | 'highest-readiness' | 'first-approved';
|
|
327
|
+
/** @experimental */
|
|
328
|
+
interface CreateDefaultCoderDelegateOptions {
|
|
329
|
+
/**
|
|
330
|
+
* Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)
|
|
331
|
+
* to control where worker iterations land. `sandboxClient` is a
|
|
332
|
+
* convenience shorthand that wraps the client in a sibling executor — pass
|
|
333
|
+
* one or the other, not both.
|
|
334
|
+
*/
|
|
335
|
+
executor?: DelegationExecutor;
|
|
336
|
+
/**
|
|
337
|
+
* Convenience shorthand for sibling placement. Equivalent to
|
|
338
|
+
* `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.
|
|
339
|
+
*/
|
|
340
|
+
sandboxClient?: LoopSandboxClient;
|
|
341
|
+
/** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */
|
|
342
|
+
fanoutHarnesses?: string[];
|
|
343
|
+
/** Hard cap on the kernel's per-batch concurrency. Default 4. */
|
|
344
|
+
maxConcurrency?: number;
|
|
345
|
+
/**
|
|
346
|
+
* Optional adversarial reviewer. When set, a candidate must pass mechanical
|
|
347
|
+
* validation AND `reviewer.approved` to be eligible to win — empty/secret/
|
|
348
|
+
* test-failing patches are already gone; this catches the "compiles + passes
|
|
349
|
+
* but wrong/unsafe" class the deterministic validator can't see.
|
|
350
|
+
*/
|
|
351
|
+
reviewer?: CoderReviewer;
|
|
352
|
+
/** Winner-selection strategy among eligible candidates. Default `highest-score`. */
|
|
353
|
+
winnerSelection?: CoderWinnerSelection;
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Build a coder delegate that drives `runLoop` against the project's
|
|
357
|
+
* sandbox client + coder profile. When `args.variants > 1` it switches
|
|
358
|
+
* to the multi-harness fanout topology.
|
|
359
|
+
*
|
|
360
|
+
* @experimental
|
|
361
|
+
*/
|
|
362
|
+
declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateOptions): CoderDelegate;
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* @experimental
|
|
366
|
+
*
|
|
367
|
+
* `createKbGate` — the valid-only knowledge-base growth gate, distilled from
|
|
368
|
+
* physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
|
|
369
|
+
* writer) runs candidate facts through this before persisting, so the KB grows
|
|
370
|
+
* with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
|
|
371
|
+
* vetoed at the gate.
|
|
372
|
+
*
|
|
373
|
+
* Fail-closed by construction: every judge must `accept`; the FIRST veto wins
|
|
374
|
+
* and the fact is rejected. The non-negotiable floor (always on, can't be
|
|
375
|
+
* disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
|
|
376
|
+
* literally appear in its `sourceText`. That single check kills the dominant
|
|
377
|
+
* failure mode (a confident claim decoupled from any real source).
|
|
378
|
+
*
|
|
379
|
+
* Pure + dependency-free: it operates on fact candidates, not on a store, so it
|
|
380
|
+
* composes with `@tangle-network/agent-knowledge` or any persistence layer
|
|
381
|
+
* without importing it. The remediation policy (correct-on-veto vs
|
|
382
|
+
* escalate-as-unverified) is the caller's — this returns the verdict; it never
|
|
383
|
+
* drops a fact silently.
|
|
384
|
+
*/
|
|
385
|
+
/** @experimental A fact proposed for the KB, with its grounding. */
|
|
386
|
+
interface FactCandidate {
|
|
387
|
+
/** The atomic claim text. */
|
|
388
|
+
claim: string;
|
|
389
|
+
/** Optional extracted value (number or string) the claim asserts. */
|
|
390
|
+
value?: string | number;
|
|
391
|
+
/** Verbatim span lifted from the source that backs the claim. */
|
|
392
|
+
verbatimPassage: string;
|
|
393
|
+
/** The raw source text the passage must be grounded in. */
|
|
394
|
+
sourceText: string;
|
|
395
|
+
/** Where the fact claims to come from — checked for circular/self citations. */
|
|
396
|
+
citation?: string;
|
|
397
|
+
}
|
|
398
|
+
/** @experimental */
|
|
399
|
+
interface FactJudgeVerdict {
|
|
400
|
+
accept: boolean;
|
|
401
|
+
reason?: string;
|
|
402
|
+
}
|
|
403
|
+
/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
|
|
404
|
+
* verdict; a thrown judge is a programmer error, not a veto. */
|
|
405
|
+
interface FactJudge {
|
|
406
|
+
name: string;
|
|
407
|
+
judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
|
|
408
|
+
}
|
|
409
|
+
/** @experimental */
|
|
410
|
+
interface KbGateResult {
|
|
411
|
+
accepted: boolean;
|
|
412
|
+
/** Name of the judge that vetoed; undefined when accepted. */
|
|
413
|
+
vetoedBy?: string;
|
|
414
|
+
reason?: string;
|
|
415
|
+
}
|
|
416
|
+
/** @experimental */
|
|
417
|
+
interface CreateKbGateOptions {
|
|
418
|
+
/** Extra judges appended after the built-in floor (e.g. an LLM judge). */
|
|
419
|
+
judges?: FactJudge[];
|
|
420
|
+
/** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
|
|
421
|
+
minPassageChars?: number;
|
|
422
|
+
/**
|
|
423
|
+
* Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
|
|
424
|
+
* `'cad_params'`, `'requirements'`). A citation naming one is circular
|
|
425
|
+
* (laundering) — the fact cites a derived artifact, not a real source.
|
|
426
|
+
* Default `[]` (no circular check unless the consumer declares its kinds).
|
|
427
|
+
*/
|
|
428
|
+
selfArtifactKinds?: string[];
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* @experimental
|
|
432
|
+
*
|
|
433
|
+
* Build a fail-closed KB gate. The returned function runs the built-in floor
|
|
434
|
+
* (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
|
|
435
|
+
* then any consumer judges, returning on the first veto.
|
|
436
|
+
*/
|
|
437
|
+
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* @experimental
|
|
441
|
+
*
|
|
442
|
+
* OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP
|
|
443
|
+
* delegation tools.
|
|
444
|
+
*
|
|
445
|
+
* Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the
|
|
446
|
+
* model can call `delegate_code`, `delegate_research`, `delegate_feedback`,
|
|
447
|
+
* `delegation_status`, and `delegation_history` through the OpenAI-compat
|
|
448
|
+
* transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime
|
|
449
|
+
* surfaces tool calls as `tool_call` stream events — execution is the
|
|
450
|
+
* caller's responsibility (typically the parent sandbox runtime's MCP
|
|
451
|
+
* mount).
|
|
452
|
+
*
|
|
453
|
+
* Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts
|
|
454
|
+
* MCP servers natively and the in-sandbox harness discovers tools via the
|
|
455
|
+
* runtime, not via an OpenAI tools array.
|
|
456
|
+
*
|
|
457
|
+
* Tool name + description + JSON-schema are pulled from the canonical
|
|
458
|
+
* `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
|
|
459
|
+
* drift from the server's own validators.
|
|
460
|
+
*/
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* @experimental
|
|
464
|
+
*
|
|
465
|
+
* Returns the 5 delegation tools projected into OpenAI Chat Completions
|
|
466
|
+
* `tools[]` shape. The order is stable: `delegate_code`,
|
|
467
|
+
* `delegate_research`, `delegate_feedback`, `delegation_status`,
|
|
468
|
+
* `delegation_history`.
|
|
469
|
+
*/
|
|
470
|
+
declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
|
|
471
|
+
/**
|
|
472
|
+
* @experimental
|
|
473
|
+
*
|
|
474
|
+
* Subset filter — return only the projected tools whose `function.name`
|
|
475
|
+
* appears in `names`. Useful for curated mounts (e.g. only the queue-bound
|
|
476
|
+
* delegation tools, omitting `delegate_feedback`). Unknown names are
|
|
477
|
+
* silently ignored; pass an empty array to get an empty result.
|
|
478
|
+
*/
|
|
479
|
+
declare function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[];
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
|
|
483
|
+
*
|
|
484
|
+
* Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
|
|
485
|
+
* when no explicit config is given. Keeps the runtime dep-free from
|
|
486
|
+
* @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
|
|
487
|
+
*
|
|
488
|
+
* The exporter accepts both raw OtelSpan objects and LoopTraceEvents
|
|
489
|
+
* (which get converted to OTLP spans automatically).
|
|
490
|
+
*/
|
|
491
|
+
interface OtelExportConfig {
|
|
492
|
+
/** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
|
|
493
|
+
endpoint?: string;
|
|
494
|
+
/** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
|
|
495
|
+
headers?: Record<string, string>;
|
|
496
|
+
/** Batch size before flush. Default 64. */
|
|
497
|
+
batchSize?: number;
|
|
498
|
+
/** Flush interval ms. Default 5000. */
|
|
499
|
+
flushIntervalMs?: number;
|
|
500
|
+
/** Resource attributes stamped on every export. */
|
|
501
|
+
resourceAttributes?: Record<string, string | number | boolean>;
|
|
502
|
+
/** Service name. Default 'agent-runtime'. */
|
|
503
|
+
serviceName?: string;
|
|
504
|
+
}
|
|
505
|
+
interface OtelExporter {
|
|
506
|
+
/** Export a span. */
|
|
507
|
+
exportSpan(span: OtelSpan): void;
|
|
508
|
+
/** Force flush pending spans. */
|
|
509
|
+
flush(): Promise<void>;
|
|
510
|
+
/** Shutdown cleanly. */
|
|
511
|
+
shutdown(): Promise<void>;
|
|
512
|
+
}
|
|
513
|
+
interface OtelSpan {
|
|
514
|
+
traceId: string;
|
|
515
|
+
spanId: string;
|
|
516
|
+
parentSpanId?: string;
|
|
517
|
+
name: string;
|
|
518
|
+
kind?: number;
|
|
519
|
+
startTimeUnixNano: string;
|
|
520
|
+
endTimeUnixNano: string;
|
|
521
|
+
attributes?: OtelAttribute[];
|
|
522
|
+
status?: {
|
|
523
|
+
code: number;
|
|
524
|
+
message?: string;
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
interface OtelAttribute {
|
|
528
|
+
key: string;
|
|
529
|
+
value: {
|
|
530
|
+
stringValue?: string;
|
|
531
|
+
intValue?: string;
|
|
532
|
+
doubleValue?: number;
|
|
533
|
+
boolValue?: boolean;
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
/**
|
|
537
|
+
* Create an OTEL exporter. Returns undefined when no endpoint is configured.
|
|
538
|
+
*/
|
|
539
|
+
declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
|
|
540
|
+
/**
|
|
541
|
+
* Convert a LoopTraceEvent into an OtelSpan for export.
|
|
542
|
+
*/
|
|
543
|
+
declare function loopEventToOtelSpan(event: {
|
|
544
|
+
kind: string;
|
|
545
|
+
runId: string;
|
|
546
|
+
timestamp: number;
|
|
547
|
+
payload: object;
|
|
548
|
+
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
549
|
+
/**
|
|
550
|
+
* Build a nested, real-duration OTLP span tree for ONE loop run from its full
|
|
551
|
+
* ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
|
|
552
|
+
* zero-duration span per event), this reconstructs the topology hierarchy a
|
|
553
|
+
* GenAI trace viewer renders natively:
|
|
554
|
+
*
|
|
555
|
+
* loop (invoke_workflow)
|
|
556
|
+
* └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
|
|
557
|
+
* ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
|
|
558
|
+
* └─ …
|
|
559
|
+
*
|
|
560
|
+
* Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
|
|
561
|
+
* a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
|
|
562
|
+
* verdict / placement / cost (not yet standardized). Pure: feed it a buffered
|
|
563
|
+
* per-runId event array (e.g. flushed on `loop.ended`) and export the result.
|
|
564
|
+
*/
|
|
565
|
+
declare function buildLoopOtelSpans(events: ReadonlyArray<{
|
|
566
|
+
kind: string;
|
|
567
|
+
runId: string;
|
|
568
|
+
timestamp: number;
|
|
569
|
+
payload: object;
|
|
570
|
+
}>, traceId: string, rootParentSpanId?: string): OtelSpan[];
|
|
571
|
+
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
572
|
+
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
573
|
+
interface EvalRunGeneration {
|
|
574
|
+
/** 0-based ordinal of this generation within the run (required by ingest). */
|
|
575
|
+
index: number;
|
|
576
|
+
/** Identity of the proposed surface change (content-addressed hash). */
|
|
577
|
+
surfaceHash: string;
|
|
578
|
+
/** Arbitrary provenance for this generation (rationale, evidence, source). */
|
|
579
|
+
surface?: unknown;
|
|
580
|
+
/** Per-scenario results; empty until the generation is measured. */
|
|
581
|
+
cells?: unknown[];
|
|
582
|
+
/** Mean composite score (0 when unmeasured — pair with labels.measured). */
|
|
583
|
+
compositeMean: number;
|
|
584
|
+
costUsd: number;
|
|
585
|
+
durationMs: number;
|
|
586
|
+
}
|
|
587
|
+
interface EvalRunEvent {
|
|
588
|
+
runId: string;
|
|
589
|
+
runDir: string;
|
|
590
|
+
/** ISO timestamp. */
|
|
591
|
+
timestamp: string;
|
|
592
|
+
status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
|
|
593
|
+
labels?: Record<string, string>;
|
|
594
|
+
baseline?: EvalRunGeneration;
|
|
595
|
+
generations?: EvalRunGeneration[];
|
|
596
|
+
gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
|
|
597
|
+
holdoutLift?: number;
|
|
598
|
+
totalCostUsd: number;
|
|
599
|
+
totalDurationMs: number;
|
|
600
|
+
errorMessage?: string;
|
|
601
|
+
}
|
|
602
|
+
interface EvalRunsExportConfig {
|
|
603
|
+
/** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
|
|
604
|
+
apiKey?: string;
|
|
605
|
+
/** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
|
|
606
|
+
base?: string;
|
|
607
|
+
/** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
|
|
608
|
+
idempotencyKey?: string;
|
|
609
|
+
}
|
|
610
|
+
interface EvalRunsExportResult {
|
|
611
|
+
ok: boolean;
|
|
612
|
+
status: number;
|
|
613
|
+
accepted: number;
|
|
614
|
+
rejected: Array<{
|
|
615
|
+
index: number;
|
|
616
|
+
reason: string;
|
|
617
|
+
}>;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
|
|
621
|
+
* best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
|
|
622
|
+
* rejected per event) so a consumer's loop can assert its provenance landed.
|
|
623
|
+
* Throws only on a missing key or network failure.
|
|
624
|
+
*/
|
|
625
|
+
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
626
|
+
|
|
627
|
+
export { type OtelSpan as $, type FactCandidate as A, type FactJudge as B, type CoderDelegate as C, type DelegationExecutor as D, type FactJudgeVerdict as E, type FleetHandle as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, type OtelExporter as O, createSiblingSandboxExecutor as P, mcpToolsForRuntimeMcp as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, mcpToolsForRuntimeMcpSubset as T, type EvalRunEvent as U, type EvalRunGeneration as V, type EvalRunsExportConfig as W, type EvalRunsExportResult as X, INTELLIGENCE_WIRE_VERSION as Y, type OtelAttribute as Z, type OtelExportConfig as _, type DelegateFeedbackArgs as a, buildLoopOtelSpans as a0, createOtelExporter as a1, exportEvalRuns as a2, loopEventToOtelSpan as a3, type DelegationFeedbackSnapshot as b, type DelegationProfile as c, type DelegateCodeArgs as d, type DelegateResearchArgs as e, type DelegationStatus as f, type DelegationProgress as g, type DelegationResultPayload as h, type DelegationError as i, type DelegationStatusResult as j, type DelegationHistoryArgs as k, type DelegationHistoryEntry as l, type DelegateCodeResult as m, type DelegateFeedbackResult as n, type ResearchSource as o, type DelegateResearchResult as p, type DelegationHistoryResult as q, type DelegationStatusArgs as r, type CoderReview as s, type CoderReviewer as t, type CoderWinnerSelection as u, type CreateDefaultCoderDelegateOptions as v, type CreateKbGateOptions as w, type DelegateCodeConfig as x, type DelegateResearchConfig as y, type DelegateRunCtx as z };
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { AgentProfile } from '@tangle-network/sandbox';
|
|
2
|
-
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-
|
|
2
|
+
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CmkQl8qE.js';
|
|
3
3
|
import '@tangle-network/agent-eval';
|
|
4
|
-
import './runtime-run-B8VIiOhI.js';
|
|
5
4
|
import './types-CsCCryln.js';
|
|
6
5
|
|
|
7
6
|
/**
|