@tangle-network/agent-runtime 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,390 @@
1
+ import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore } from '@tangle-network/agent-eval';
2
+
3
+ /**
4
+ * @stable
5
+ *
6
+ * Core task, session, adapter, and stream-event types for the runtime.
7
+ *
8
+ * This module owns the public shape of every cross-cutting record (`TaskSpec`,
9
+ * `RuntimeSession`, `RuntimeStreamEvent`). Everything else in the runtime
10
+ * imports from here so type-level changes ripple in one place.
11
+ */
12
+
13
+ /** @stable */
14
+ interface AgentTaskSpec {
15
+ id: string;
16
+ intent: string;
17
+ /** Domain is metadata, not an architectural boundary: tax, legal, gtm, creative, blueprint, redteam, etc. */
18
+ domain?: string;
19
+ inputs?: Record<string, unknown>;
20
+ requiredKnowledge?: KnowledgeRequirement[];
21
+ budget?: Partial<ControlBudget>;
22
+ metadata?: Record<string, unknown>;
23
+ }
24
+ /** @stable */
25
+ interface AgentKnowledgeProvider {
26
+ buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
27
+ answerQuestions?(questions: UserQuestion[], task: AgentTaskSpec): Promise<Record<string, string>> | Record<string, string>;
28
+ executeAcquisitionPlans?(plans: DataAcquisitionPlan[], task: AgentTaskSpec): Promise<string[]> | string[];
29
+ refreshReadiness?(input: {
30
+ task: AgentTaskSpec;
31
+ previous: KnowledgeReadinessReport;
32
+ userAnswers: Record<string, string>;
33
+ acquiredEvidenceIds: string[];
34
+ }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
35
+ }
36
+ /** @stable */
37
+ interface AgentTaskContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
38
+ task: AgentTaskSpec;
39
+ knowledge: KnowledgeReadinessReport;
40
+ state: TState;
41
+ evals: TEval[];
42
+ history: ControlStep<TState, TAction, TActionResult, TEval>[];
43
+ budget: ControlBudget;
44
+ stepIndex: number;
45
+ wallMs: number;
46
+ spentCostUsd: number;
47
+ remainingCostUsd?: number;
48
+ abortSignal: AbortSignal;
49
+ }
50
+ /** @stable */
51
+ interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
52
+ observe(ctx: {
53
+ task: AgentTaskSpec;
54
+ knowledge: KnowledgeReadinessReport;
55
+ history: ControlStep<TState, TAction, TActionResult, TEval>[];
56
+ abortSignal: AbortSignal;
57
+ }): Promise<TState> | TState;
58
+ validate(ctx: {
59
+ task: AgentTaskSpec;
60
+ knowledge: KnowledgeReadinessReport;
61
+ state: TState;
62
+ history: ControlStep<TState, TAction, TActionResult, TEval>[];
63
+ abortSignal: AbortSignal;
64
+ }): Promise<TEval[]> | TEval[];
65
+ decide(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<ControlDecision<TAction>> | ControlDecision<TAction>;
66
+ act(action: TAction, ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<TActionResult> | TActionResult;
67
+ shouldStop?(ctx: AgentTaskContext<TState, TAction, TActionResult, TEval>): Promise<{
68
+ stop: boolean;
69
+ pass: boolean;
70
+ reason: string;
71
+ score?: number;
72
+ }> | {
73
+ stop: boolean;
74
+ pass: boolean;
75
+ reason: string;
76
+ score?: number;
77
+ };
78
+ onKnowledgeBlocked?(ctx: {
79
+ task: AgentTaskSpec;
80
+ knowledge: KnowledgeReadinessReport;
81
+ questions: UserQuestion[];
82
+ acquisitionPlans: DataAcquisitionPlan[];
83
+ }): Promise<ControlDecision<TAction>> | ControlDecision<TAction>;
84
+ getActionCostUsd?(ctx: {
85
+ action: TAction;
86
+ result: TActionResult;
87
+ task: AgentTaskSpec;
88
+ state: TState;
89
+ evals: TEval[];
90
+ history: ControlStep<TState, TAction, TActionResult, TEval>[];
91
+ }): number | undefined;
92
+ projectRunRecords?(result: ControlRunResult<TState, TAction, TActionResult, TEval>, task: AgentTaskSpec): RunRecord[];
93
+ }
94
+ /** @stable */
95
+ type AgentTaskStatus = 'completed' | 'blocked' | 'failed' | 'aborted';
96
+ /** @stable */
97
+ type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = {
98
+ type: 'task_start';
99
+ task: AgentTaskSpec;
100
+ } | {
101
+ type: 'readiness_start';
102
+ task: AgentTaskSpec;
103
+ } | {
104
+ type: 'readiness_end';
105
+ task: AgentTaskSpec;
106
+ knowledge: KnowledgeReadinessReport;
107
+ } | {
108
+ type: 'questions_start';
109
+ task: AgentTaskSpec;
110
+ questions: UserQuestion[];
111
+ } | {
112
+ type: 'questions_end';
113
+ task: AgentTaskSpec;
114
+ questions: UserQuestion[];
115
+ userAnswers: Record<string, string>;
116
+ } | {
117
+ type: 'acquisition_start';
118
+ task: AgentTaskSpec;
119
+ acquisitionPlans: DataAcquisitionPlan[];
120
+ } | {
121
+ type: 'acquisition_end';
122
+ task: AgentTaskSpec;
123
+ acquisitionPlans: DataAcquisitionPlan[];
124
+ acquiredEvidenceIds: string[];
125
+ } | {
126
+ type: 'control_start';
127
+ task: AgentTaskSpec;
128
+ knowledge: KnowledgeReadinessReport;
129
+ } | {
130
+ type: 'control_step';
131
+ task: AgentTaskSpec;
132
+ step: ControlStep<TState, TAction, TActionResult, TEval>;
133
+ } | {
134
+ type: 'control_end';
135
+ task: AgentTaskSpec;
136
+ control: ControlRunResult<TState, TAction, TActionResult, TEval>;
137
+ } | {
138
+ type: 'task_end';
139
+ task: AgentTaskSpec;
140
+ status: AgentTaskStatus;
141
+ reason: string;
142
+ };
143
+ /** @stable */
144
+ type AgentRuntimeEventSink<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => Promise<void> | void;
145
+ /** @stable */
146
+ type RuntimeStreamEvent = {
147
+ type: 'task_start';
148
+ task: AgentTaskSpec;
149
+ timestamp: string;
150
+ } | {
151
+ type: 'readiness_start';
152
+ task: AgentTaskSpec;
153
+ timestamp: string;
154
+ } | {
155
+ type: 'readiness_end';
156
+ task: AgentTaskSpec;
157
+ knowledge: KnowledgeReadinessReport;
158
+ decision: KnowledgeReadinessDecision;
159
+ timestamp: string;
160
+ } | {
161
+ type: 'questions_start';
162
+ task: AgentTaskSpec;
163
+ questions: UserQuestion[];
164
+ timestamp: string;
165
+ } | {
166
+ type: 'questions_end';
167
+ task: AgentTaskSpec;
168
+ questions: UserQuestion[];
169
+ userAnswers: Record<string, string>;
170
+ timestamp: string;
171
+ } | {
172
+ type: 'acquisition_start';
173
+ task: AgentTaskSpec;
174
+ acquisitionPlans: DataAcquisitionPlan[];
175
+ timestamp: string;
176
+ } | {
177
+ type: 'acquisition_end';
178
+ task: AgentTaskSpec;
179
+ acquisitionPlans: DataAcquisitionPlan[];
180
+ acquiredEvidenceIds: string[];
181
+ timestamp: string;
182
+ } | {
183
+ type: 'session_created';
184
+ task: AgentTaskSpec;
185
+ session: RuntimeSession;
186
+ timestamp: string;
187
+ } | {
188
+ type: 'session_resumed';
189
+ task: AgentTaskSpec;
190
+ session: RuntimeSession;
191
+ timestamp: string;
192
+ } | {
193
+ type: 'backend_start';
194
+ task: AgentTaskSpec;
195
+ session: RuntimeSession;
196
+ backend: string;
197
+ timestamp: string;
198
+ } | {
199
+ type: 'text_delta';
200
+ task?: AgentTaskSpec;
201
+ session?: RuntimeSession;
202
+ text: string;
203
+ timestamp?: string;
204
+ } | {
205
+ type: 'reasoning_delta';
206
+ task?: AgentTaskSpec;
207
+ session?: RuntimeSession;
208
+ text: string;
209
+ timestamp?: string;
210
+ } | {
211
+ type: 'tool_call';
212
+ task?: AgentTaskSpec;
213
+ session?: RuntimeSession;
214
+ toolName: string;
215
+ toolCallId?: string;
216
+ args?: unknown;
217
+ timestamp?: string;
218
+ } | {
219
+ type: 'tool_result';
220
+ task?: AgentTaskSpec;
221
+ session?: RuntimeSession;
222
+ toolName: string;
223
+ toolCallId?: string;
224
+ result?: unknown;
225
+ timestamp?: string;
226
+ } | {
227
+ type: 'llm_call';
228
+ task?: AgentTaskSpec;
229
+ session?: RuntimeSession;
230
+ model: string;
231
+ tokensIn?: number;
232
+ tokensOut?: number;
233
+ costUsd?: number;
234
+ latencyMs?: number;
235
+ finishReason?: string;
236
+ timestamp?: string;
237
+ } | {
238
+ type: 'artifact';
239
+ task?: AgentTaskSpec;
240
+ session?: RuntimeSession;
241
+ artifactId: string;
242
+ name?: string;
243
+ mimeType?: string;
244
+ uri?: string;
245
+ metadata?: Record<string, unknown>;
246
+ timestamp?: string;
247
+ } | {
248
+ type: 'backend_error';
249
+ task: AgentTaskSpec;
250
+ session?: RuntimeSession;
251
+ backend: string;
252
+ message: string;
253
+ recoverable: boolean;
254
+ timestamp: string;
255
+ } | {
256
+ type: 'backend_end';
257
+ task: AgentTaskSpec;
258
+ session: RuntimeSession;
259
+ backend: string;
260
+ timestamp: string;
261
+ } | {
262
+ type: 'task_end';
263
+ task: AgentTaskSpec;
264
+ status: AgentTaskStatus;
265
+ reason: string;
266
+ timestamp: string;
267
+ } | {
268
+ type: 'final';
269
+ task: AgentTaskSpec;
270
+ session?: RuntimeSession;
271
+ status: AgentTaskStatus;
272
+ reason: string;
273
+ text?: string;
274
+ metadata?: Record<string, unknown>;
275
+ timestamp: string;
276
+ };
277
+ /** @stable */
278
+ interface RuntimeSession {
279
+ id: string;
280
+ backend: string;
281
+ status: 'active' | 'completed' | 'failed' | 'aborted';
282
+ resumeToken?: string;
283
+ createdAt: string;
284
+ updatedAt: string;
285
+ metadata?: Record<string, unknown>;
286
+ }
287
+ /** @stable */
288
+ interface RuntimeSessionStore {
289
+ get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined;
290
+ put(session: RuntimeSession): Promise<void> | void;
291
+ appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void;
292
+ listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[];
293
+ }
294
+ /** @stable */
295
+ interface AgentBackendInput {
296
+ task: AgentTaskSpec;
297
+ message?: string;
298
+ messages?: Array<{
299
+ role: string;
300
+ content: string;
301
+ }>;
302
+ inputs?: Record<string, unknown>;
303
+ }
304
+ /** @stable */
305
+ interface AgentBackendContext {
306
+ task: AgentTaskSpec;
307
+ knowledge: KnowledgeReadinessReport;
308
+ session: RuntimeSession;
309
+ signal?: AbortSignal;
310
+ }
311
+ /** @stable */
312
+ interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
313
+ kind: string;
314
+ start?(input: TInput, context: Omit<AgentBackendContext, 'session'> & {
315
+ requestedSessionId?: string;
316
+ }): Promise<RuntimeSession> | RuntimeSession;
317
+ resume?(session: RuntimeSession, input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<RuntimeSession> | RuntimeSession;
318
+ stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>;
319
+ stop?(session: RuntimeSession, reason: string): Promise<void> | void;
320
+ }
321
+ /** @stable */
322
+ interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
323
+ task: AgentTaskSpec;
324
+ backend: AgentExecutionBackend<TInput>;
325
+ input?: Omit<TInput, 'task'>;
326
+ knowledge?: AgentKnowledgeProvider;
327
+ sessionStore?: RuntimeSessionStore;
328
+ sessionId?: string;
329
+ resume?: boolean;
330
+ signal?: AbortSignal;
331
+ minimumReadinessScore?: number;
332
+ }
333
+ /** @stable */
334
+ interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
335
+ task: AgentTaskSpec;
336
+ adapter: AgentAdapter<TState, TAction, TActionResult, TEval>;
337
+ knowledge?: AgentKnowledgeProvider;
338
+ onEvent?: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>;
339
+ store?: TraceStore;
340
+ signal?: AbortSignal;
341
+ scenarioId?: string;
342
+ projectId?: string;
343
+ variantId?: string;
344
+ minimumReadinessScore?: number;
345
+ }
346
+ /** @stable */
347
+ interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
348
+ task: AgentTaskSpec;
349
+ status: AgentTaskStatus;
350
+ knowledge: KnowledgeReadinessReport;
351
+ questions: UserQuestion[];
352
+ acquisitionPlans: DataAcquisitionPlan[];
353
+ userAnswers: Record<string, string>;
354
+ acquiredEvidenceIds: string[];
355
+ control: ControlRunResult<TState, TAction, TActionResult, TEval>;
356
+ runRecords: RunRecord[];
357
+ }
358
+ /** @stable */
359
+ interface AgentTaskRunSummary {
360
+ taskId: string;
361
+ domain?: string;
362
+ status: AgentTaskStatus;
363
+ reason: string;
364
+ readinessStatus: KnowledgeReadinessDecision['status'];
365
+ readinessScore: number;
366
+ recommendedAction: KnowledgeReadinessReport['recommendedAction'];
367
+ blockingGapIds: string[];
368
+ nonBlockingGapIds: string[];
369
+ questionCount: number;
370
+ acquisitionPlanCount: number;
371
+ acquiredEvidenceCount: number;
372
+ controlStepCount: number;
373
+ pass: boolean;
374
+ failureClass?: string;
375
+ wallMs: number;
376
+ costUsd: number;
377
+ }
378
+ /** @stable */
379
+ interface KnowledgeReadinessDecision {
380
+ passed: boolean;
381
+ status: 'ready' | 'blocked' | 'caveat';
382
+ reason: string;
383
+ readinessScore: number;
384
+ recommendedAction: KnowledgeReadinessReport['recommendedAction'];
385
+ severity: KnowledgeReadinessReport['severity'];
386
+ blockingGapIds: string[];
387
+ nonBlockingGapIds: string[];
388
+ }
389
+
390
+ export type { AgentBackendInput as A, KnowledgeReadinessDecision as K, RuntimeStreamEvent as R, AgentExecutionBackend as a, AgentBackendContext as b, RunAgentTaskOptions as c, AgentTaskRunResult as d, RunAgentTaskStreamOptions as e, AgentTaskRunSummary as f, AgentTaskSpec as g, AgentRuntimeEvent as h, AgentTaskStatus as i, RuntimeSessionStore as j, RuntimeSession as k, AgentAdapter as l, AgentKnowledgeProvider as m, AgentRuntimeEventSink as n, AgentTaskContext as o };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-runtime",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "Reusable runtime lifecycle for domain-specific agents.",
5
5
  "homepage": "https://github.com/tangle-network/agent-runtime#readme",
6
6
  "repository": {