@tangle-network/agent-runtime 0.40.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/agent.d.ts +57 -2
  2. package/dist/agent.js +54 -0
  3. package/dist/agent.js.map +1 -1
  4. package/dist/chunk-7JITYN6T.js +72 -0
  5. package/dist/chunk-7JITYN6T.js.map +1 -0
  6. package/dist/{chunk-3WQJRSUJ.js → chunk-BDGYYO7K.js} +3 -3
  7. package/dist/{chunk-6HI3QUJD.js → chunk-HCL2ZG5L.js} +51 -5
  8. package/dist/chunk-HCL2ZG5L.js.map +1 -0
  9. package/dist/{chunk-HSX6PFZR.js → chunk-HVYOHJHK.js} +338 -2
  10. package/dist/chunk-HVYOHJHK.js.map +1 -0
  11. package/dist/{chunk-OISRXLWI.js → chunk-IFG6GX6A.js} +64 -40
  12. package/dist/chunk-IFG6GX6A.js.map +1 -0
  13. package/dist/chunk-NRZOXCJK.js +64 -0
  14. package/dist/chunk-NRZOXCJK.js.map +1 -0
  15. package/dist/{chunk-VFKBIZTY.js → chunk-WMBYQPYM.js} +5 -4
  16. package/dist/chunk-WMBYQPYM.js.map +1 -0
  17. package/dist/delegation-profile-1GbW5yA3.d.ts +73 -0
  18. package/dist/{dynamic-BT9Ji3jE.d.ts → dynamic-B_7GgCwu.d.ts} +1 -1
  19. package/dist/index.d.ts +7 -8
  20. package/dist/index.js +9 -8
  21. package/dist/index.js.map +1 -1
  22. package/dist/{kb-gate-C4tho31v.d.ts → kb-gate-DTBum3vH.d.ts} +9 -1
  23. package/dist/{loop-runner-bin-C1MuoT8c.d.ts → loop-runner-bin-CVoCBmYk.d.ts} +3 -3
  24. package/dist/loop-runner-bin.d.ts +4 -5
  25. package/dist/loop-runner-bin.js +3 -3
  26. package/dist/loops.d.ts +65 -7
  27. package/dist/loops.js +7 -1
  28. package/dist/mcp/bin.js +28 -17
  29. package/dist/mcp/bin.js.map +1 -1
  30. package/dist/mcp/index.d.ts +6 -6
  31. package/dist/mcp/index.js +17 -49
  32. package/dist/mcp/index.js.map +1 -1
  33. package/dist/{otel-export-xgf4J6bo.d.ts → otel-export-BzvF1Ela.d.ts} +1 -1
  34. package/dist/profiles.d.ts +1 -2
  35. package/dist/{types-CNs7_1R3.d.ts → types-Bcp071Jg.d.ts} +488 -3
  36. package/package.json +11 -22
  37. package/dist/chunk-6HI3QUJD.js.map +0 -1
  38. package/dist/chunk-7ZECSZ3C.js +0 -400
  39. package/dist/chunk-7ZECSZ3C.js.map +0 -1
  40. package/dist/chunk-HSX6PFZR.js.map +0 -1
  41. package/dist/chunk-OISRXLWI.js.map +0 -1
  42. package/dist/chunk-VFKBIZTY.js.map +0 -1
  43. package/dist/types-CsCCryln.d.ts +0 -489
  44. /package/dist/{chunk-3WQJRSUJ.js.map → chunk-BDGYYO7K.js.map} +0 -0
@@ -1,489 +0,0 @@
1
- import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore } from '@tangle-network/agent-eval';
2
-
3
- /**
4
- * @stable
5
- *
6
- * Core task, session, adapter, and stream-event types for the runtime.
7
- *
8
- * This module owns the public shape of every cross-cutting record (`TaskSpec`,
9
- * `RuntimeSession`, `RuntimeStreamEvent`). Everything else in the runtime
10
- * imports from here so type-level changes ripple in one place.
11
- */
12
-
13
- /** @stable */
14
- interface AgentTaskSpec {
15
- id: string;
16
- intent: string;
17
- /** Domain is metadata, not an architectural boundary: tax, legal, gtm, creative, blueprint, redteam, etc. */
18
- domain?: string;
19
- inputs?: Record<string, unknown>;
20
- requiredKnowledge?: KnowledgeRequirement[];
21
- budget?: Partial<ControlBudget>;
22
- metadata?: Record<string, unknown>;
23
- }
24
- /** @stable */
25
- interface AgentKnowledgeProvider {
26
- buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
27
- answerQuestions?(questions: UserQuestion[], task: AgentTaskSpec): Promise<Record<string, string>> | Record<string, string>;
28
- executeAcquisitionPlans?(plans: DataAcquisitionPlan[], task: AgentTaskSpec): Promise<string[]> | string[];
29
- refreshReadiness?(input: {
30
- task: AgentTaskSpec;
31
- previous: KnowledgeReadinessReport;
32
- userAnswers: Record<string, string>;
33
- acquiredEvidenceIds: string[];
34
- }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
35
- }
36
- /** @stable */
37
- interface AgentTaskContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
38
- task: AgentTaskSpec;
39
- knowledge: KnowledgeReadinessReport;
40
- state: TState;
41
- evals: TEval[];
42
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
43
- budget: ControlBudget;
44
- stepIndex: number;
45
- wallMs: number;
46
- spentCostUsd: number;
47
- remainingCostUsd?: number;
48
- abortSignal: AbortSignal;
49
- }
50
- /** @stable */
51
- interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
52
- observe(ctx: {
53
- task: AgentTaskSpec;
54
- knowledge: KnowledgeReadinessReport;
55
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
56
- abortSignal: AbortSignal;
57
- }): Promise<TState> | TState;
58
- validate(ctx: {
59
- task: AgentTaskSpec;
60
- knowledge: KnowledgeReadinessReport;
61
- state: TState;
62
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
63
- abortSignal: AbortSignal;
64
- }): Promise<TEval[]> | TEval[];
65
- decide(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<ControlDecision<TAction>> | ControlDecision<TAction>;
66
- act(action: TAction, ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<TActionResult> | TActionResult;
67
- shouldStop?(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<{
68
- stop: boolean;
69
- pass: boolean;
70
- reason: string;
71
- score?: number;
72
- }> | {
73
- stop: boolean;
74
- pass: boolean;
75
- reason: string;
76
- score?: number;
77
- };
78
- onKnowledgeBlocked?(ctx: {
79
- task: AgentTaskSpec;
80
- knowledge: KnowledgeReadinessReport;
81
- questions: UserQuestion[];
82
- acquisitionPlans: DataAcquisitionPlan[];
83
- }): Promise<ControlDecision<TAction>> | ControlDecision<TAction>;
84
- getActionCostUsd?(ctx: {
85
- action: TAction;
86
- result: TActionResult;
87
- task: AgentTaskSpec;
88
- state: TState;
89
- evals: TEval[];
90
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
91
- }): number | undefined;
92
- projectRunRecords?(result: ControlRunResult<TState, TAction, TActionResult, TEval>, task: AgentTaskSpec): RunRecord[];
93
- }
94
- /** @stable */
95
- type AgentTaskStatus = 'completed' | 'blocked' | 'failed' | 'aborted';
96
- /** @stable */
97
- type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = {
98
- type: 'task_start';
99
- task: AgentTaskSpec;
100
- } | {
101
- type: 'readiness_start';
102
- task: AgentTaskSpec;
103
- } | {
104
- type: 'readiness_end';
105
- task: AgentTaskSpec;
106
- knowledge: KnowledgeReadinessReport;
107
- } | {
108
- type: 'questions_start';
109
- task: AgentTaskSpec;
110
- questions: UserQuestion[];
111
- } | {
112
- type: 'questions_end';
113
- task: AgentTaskSpec;
114
- questions: UserQuestion[];
115
- userAnswers: Record<string, string>;
116
- } | {
117
- type: 'acquisition_start';
118
- task: AgentTaskSpec;
119
- acquisitionPlans: DataAcquisitionPlan[];
120
- } | {
121
- type: 'acquisition_end';
122
- task: AgentTaskSpec;
123
- acquisitionPlans: DataAcquisitionPlan[];
124
- acquiredEvidenceIds: string[];
125
- } | {
126
- type: 'control_start';
127
- task: AgentTaskSpec;
128
- knowledge: KnowledgeReadinessReport;
129
- } | {
130
- type: 'control_step';
131
- task: AgentTaskSpec;
132
- step: ControlStep<TState, TAction, TActionResult, TEval>;
133
- } | {
134
- type: 'control_end';
135
- task: AgentTaskSpec;
136
- control: ControlRunResult<TState, TAction, TActionResult, TEval>;
137
- } | {
138
- type: 'task_end';
139
- task: AgentTaskSpec;
140
- status: AgentTaskStatus;
141
- reason: string;
142
- };
143
- /** @stable */
144
- type AgentRuntimeEventSink<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => Promise<void> | void;
145
- /**
146
- * @stable
147
- *
148
- * Typed transport / backend failure detail. Carried on `backend_error` and
149
- * `final` events when the backend's stream throws or the upstream HTTP call
150
- * returns a non-success status. Lets consumers (a) distinguish "stream
151
- * completed with no text" from "stream never reached the model" and
152
- * (b) reconstruct the precise upstream signal (status + truncated body) when
153
- * building a `RunRecord.error`.
154
- *
155
- * `body` is truncated to 2 KiB by the backend so an HTML error page from a
156
- * misconfigured proxy never bloats event payloads or logs. Consumers needing
157
- * the full body should inspect the underlying `BackendTransportError.body`
158
- * via a custom `mapEvent` or backend wrapper.
159
- */
160
- interface BackendErrorDetail {
161
- /**
162
- * `'transport'` — upstream HTTP / network failure with optional status code.
163
- * `'backend'` — the backend's `stream()` generator threw for a non-transport
164
- * reason (e.g. a custom adapter error, sandbox crash).
165
- */
166
- kind: 'transport' | 'backend';
167
- message: string;
168
- /** Upstream HTTP status when known. `0` for connection / abort errors. */
169
- status?: number;
170
- /** Truncated response body (≤2 KiB). Diagnostic only — never machine-parsed. */
171
- body?: string;
172
- }
173
- /**
174
- * @stable
175
- *
176
- * OpenAI Chat Completions tool descriptor. The shape mirrors the
177
- * `/v1/chat/completions` `tools[]` parameter so callers can pass tool
178
- * definitions through `createOpenAICompatibleBackend({ tools })` without any
179
- * runtime translation. The router proxies this shape verbatim to Anthropic
180
- * (translated server-side), DeepSeek, Groq, OpenAI, and Gemini — every model
181
- * that the eval surface targets.
182
- *
183
- * Callers that build their tool list from MCP servers should run a one-shot
184
- * MCP `tools/list` at config time and project the result into this shape. The
185
- * runtime intentionally does NOT depend on `@modelcontextprotocol/sdk` —
186
- * keeping the backend transport thin lets domain repos own MCP plumbing.
187
- */
188
- interface OpenAIChatTool {
189
- type: 'function';
190
- function: {
191
- name: string;
192
- description?: string;
193
- parameters?: Record<string, unknown>;
194
- };
195
- }
196
- /**
197
- * @stable
198
- *
199
- * `tool_choice` parameter for OpenAI-compat chat. Same shape as the OpenAI
200
- * spec: `'auto'` (default — model decides), `'none'` (disable tool calling
201
- * for this turn), `'required'` (force a tool call), or a specific function
202
- * pin `{ type: 'function', function: { name } }`.
203
- */
204
- type OpenAIChatToolChoice = 'auto' | 'none' | 'required' | {
205
- type: 'function';
206
- function: {
207
- name: string;
208
- };
209
- };
210
- /** @stable */
211
- type RuntimeStreamEvent = {
212
- type: 'task_start';
213
- task: AgentTaskSpec;
214
- timestamp: string;
215
- } | {
216
- type: 'readiness_start';
217
- task: AgentTaskSpec;
218
- timestamp: string;
219
- } | {
220
- type: 'readiness_end';
221
- task: AgentTaskSpec;
222
- knowledge: KnowledgeReadinessReport;
223
- decision: KnowledgeReadinessDecision;
224
- timestamp: string;
225
- } | {
226
- type: 'questions_start';
227
- task: AgentTaskSpec;
228
- questions: UserQuestion[];
229
- timestamp: string;
230
- } | {
231
- type: 'questions_end';
232
- task: AgentTaskSpec;
233
- questions: UserQuestion[];
234
- userAnswers: Record<string, string>;
235
- timestamp: string;
236
- } | {
237
- type: 'acquisition_start';
238
- task: AgentTaskSpec;
239
- acquisitionPlans: DataAcquisitionPlan[];
240
- timestamp: string;
241
- } | {
242
- type: 'acquisition_end';
243
- task: AgentTaskSpec;
244
- acquisitionPlans: DataAcquisitionPlan[];
245
- acquiredEvidenceIds: string[];
246
- timestamp: string;
247
- } | {
248
- type: 'session_created';
249
- task: AgentTaskSpec;
250
- session: RuntimeSession;
251
- timestamp: string;
252
- } | {
253
- type: 'session_resumed';
254
- task: AgentTaskSpec;
255
- session: RuntimeSession;
256
- timestamp: string;
257
- } | {
258
- type: 'backend_start';
259
- task: AgentTaskSpec;
260
- session: RuntimeSession;
261
- backend: string;
262
- timestamp: string;
263
- } | {
264
- type: 'text_delta';
265
- task?: AgentTaskSpec;
266
- session?: RuntimeSession;
267
- text: string;
268
- timestamp?: string;
269
- } | {
270
- type: 'reasoning_delta';
271
- task?: AgentTaskSpec;
272
- session?: RuntimeSession;
273
- text: string;
274
- timestamp?: string;
275
- } | {
276
- type: 'tool_call';
277
- task?: AgentTaskSpec;
278
- session?: RuntimeSession;
279
- toolName: string;
280
- toolCallId?: string;
281
- args?: unknown;
282
- timestamp?: string;
283
- } | {
284
- type: 'tool_result';
285
- task?: AgentTaskSpec;
286
- session?: RuntimeSession;
287
- toolName: string;
288
- toolCallId?: string;
289
- result?: unknown;
290
- timestamp?: string;
291
- } | {
292
- type: 'llm_call';
293
- task?: AgentTaskSpec;
294
- session?: RuntimeSession;
295
- model: string;
296
- tokensIn?: number;
297
- tokensOut?: number;
298
- costUsd?: number;
299
- latencyMs?: number;
300
- finishReason?: string;
301
- timestamp?: string;
302
- } | {
303
- type: 'artifact';
304
- task?: AgentTaskSpec;
305
- session?: RuntimeSession;
306
- artifactId: string;
307
- name?: string;
308
- mimeType?: string;
309
- uri?: string;
310
- content?: string;
311
- metadata?: Record<string, unknown>;
312
- timestamp?: string;
313
- } | {
314
- type: 'proposal_created';
315
- task?: AgentTaskSpec;
316
- session?: RuntimeSession;
317
- proposalId: string;
318
- title: string;
319
- status?: 'pending' | 'approved' | 'rejected';
320
- timestamp?: string;
321
- } | {
322
- type: 'backend_error';
323
- task: AgentTaskSpec;
324
- session?: RuntimeSession;
325
- backend: string;
326
- message: string;
327
- recoverable: boolean;
328
- /**
329
- * Typed transport diagnostic. Present when the upstream returned a
330
- * non-success HTTP status or every retry attempt threw. Consumers MUST
331
- * surface this onto their `RunRecord.error` — silently treating a
332
- * `backend_error` as "no output" hides credit exhaustion, auth failure,
333
- * and upstream outages from operators.
334
- * - `kind: 'transport'` — HTTP / network failure with optional `status`
335
- * + truncated response `body`.
336
- * - `kind: 'backend'` — the backend's `stream()` generator threw for a
337
- * reason that isn't a recognized transport failure.
338
- */
339
- error?: BackendErrorDetail;
340
- timestamp: string;
341
- } | {
342
- type: 'backend_end';
343
- task: AgentTaskSpec;
344
- session: RuntimeSession;
345
- backend: string;
346
- timestamp: string;
347
- } | {
348
- type: 'task_end';
349
- task: AgentTaskSpec;
350
- status: AgentTaskStatus;
351
- reason: string;
352
- timestamp: string;
353
- } | {
354
- type: 'final';
355
- task: AgentTaskSpec;
356
- session?: RuntimeSession;
357
- status: AgentTaskStatus;
358
- reason: string;
359
- text?: string;
360
- metadata?: Record<string, unknown>;
361
- /**
362
- * Typed terminal-error diagnostic. Mirrors the `backend_error.error`
363
- * shape so a consumer that only listens for `final` still receives a
364
- * loud, structured failure when the backend never produced output. Only
365
- * set when `status !== 'completed'`. Consumers building a `RunRecord`
366
- * MUST map this to `RunRecord.error` rather than recording silent
367
- * `error: null` with empty `finalText`.
368
- */
369
- error?: BackendErrorDetail;
370
- timestamp: string;
371
- };
372
- /** @stable */
373
- interface RuntimeSession {
374
- id: string;
375
- backend: string;
376
- status: 'active' | 'completed' | 'failed' | 'aborted';
377
- resumeToken?: string;
378
- createdAt: string;
379
- updatedAt: string;
380
- metadata?: Record<string, unknown>;
381
- }
382
- /** @stable */
383
- interface RuntimeSessionStore {
384
- get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined;
385
- put(session: RuntimeSession): Promise<void> | void;
386
- appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void;
387
- listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[];
388
- }
389
- /** @stable */
390
- interface AgentBackendInput {
391
- task: AgentTaskSpec;
392
- message?: string;
393
- messages?: Array<{
394
- role: string;
395
- content: string;
396
- }>;
397
- inputs?: Record<string, unknown>;
398
- }
399
- /** @stable */
400
- interface AgentBackendContext {
401
- task: AgentTaskSpec;
402
- knowledge: KnowledgeReadinessReport;
403
- session: RuntimeSession;
404
- signal?: AbortSignal;
405
- /**
406
- * Conversation/run identifier when this call is part of a multi-agent run.
407
- * Backends should stamp it into any trace/log emission so cross-participant
408
- * events correlate. Absent when the call is a stand-alone `runAgentTask`.
409
- */
410
- runId?: string;
411
- /**
412
- * Deterministic turn id for this single call. Stable across retries of the
413
- * same logical turn so a caching gateway / idempotent backend can dedupe.
414
- */
415
- turnId?: string;
416
- /**
417
- * If this call is itself nested inside a higher-order conversation
418
- * (recursion via `createConversationBackend`), the enclosing turn's id.
419
- * Used for trace stitching across nested orchestration.
420
- */
421
- parentTurnId?: string;
422
- /**
423
- * Headers to forward verbatim to any outbound HTTP the backend issues:
424
- * `X-Tangle-Forwarded-Authorization`, `X-Tangle-Forwarded-Depth`,
425
- * run/turn correlation. Backends that issue HTTP MUST merge these into
426
- * the outbound request; backends that don't issue HTTP may ignore them.
427
- */
428
- propagatedHeaders?: Readonly<Record<string, string>>;
429
- }
430
- /** @stable */
431
- interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
432
- kind: string;
433
- start?(input: TInput, context: Omit<AgentBackendContext, 'session'> & {
434
- requestedSessionId?: string;
435
- }): Promise<RuntimeSession> | RuntimeSession;
436
- resume?(session: RuntimeSession, input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<RuntimeSession> | RuntimeSession;
437
- stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>;
438
- stop?(session: RuntimeSession, reason: string): Promise<void> | void;
439
- }
440
- /** @stable */
441
- interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
442
- task: AgentTaskSpec;
443
- backend: AgentExecutionBackend<TInput>;
444
- input?: Omit<TInput, 'task'>;
445
- knowledge?: AgentKnowledgeProvider;
446
- sessionStore?: RuntimeSessionStore;
447
- sessionId?: string;
448
- resume?: boolean;
449
- signal?: AbortSignal;
450
- minimumReadinessScore?: number;
451
- }
452
- /** @stable */
453
- interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
454
- task: AgentTaskSpec;
455
- adapter: AgentAdapter<TState, TAction, TActionResult, TEval>;
456
- knowledge?: AgentKnowledgeProvider;
457
- onEvent?: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>;
458
- store?: TraceStore;
459
- signal?: AbortSignal;
460
- scenarioId?: string;
461
- projectId?: string;
462
- variantId?: string;
463
- minimumReadinessScore?: number;
464
- }
465
- /** @stable */
466
- interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
467
- task: AgentTaskSpec;
468
- status: AgentTaskStatus;
469
- knowledge: KnowledgeReadinessReport;
470
- questions: UserQuestion[];
471
- acquisitionPlans: DataAcquisitionPlan[];
472
- userAnswers: Record<string, string>;
473
- acquiredEvidenceIds: string[];
474
- control: ControlRunResult<TState, TAction, TActionResult, TEval>;
475
- runRecords: RunRecord[];
476
- }
477
- /** @stable */
478
- interface KnowledgeReadinessDecision {
479
- passed: boolean;
480
- status: 'ready' | 'blocked' | 'caveat';
481
- reason: string;
482
- readinessScore: number;
483
- recommendedAction: KnowledgeReadinessReport['recommendedAction'];
484
- severity: KnowledgeReadinessReport['severity'];
485
- blockingGapIds: string[];
486
- nonBlockingGapIds: string[];
487
- }
488
-
489
- export type { AgentTaskSpec as A, BackendErrorDetail as B, KnowledgeReadinessDecision as K, OpenAIChatTool as O, RuntimeStreamEvent as R, AgentBackendInput as a, AgentExecutionBackend as b, OpenAIChatToolChoice as c, AgentBackendContext as d, RunAgentTaskOptions as e, AgentTaskRunResult as f, RunAgentTaskStreamOptions as g, AgentRuntimeEvent as h, AgentTaskStatus as i, RuntimeSessionStore as j, RuntimeSession as k, AgentAdapter as l, AgentKnowledgeProvider as m, AgentRuntimeEventSink as n, AgentTaskContext as o };