@purista/harness 1.2.6 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +126 -44
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +149 -3
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +38 -18
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/runtime/steps.d.ts +22 -1
  39. package/dist/runtime/steps.js +53 -2
  40. package/dist/sessions/index.d.ts +17 -6
  41. package/dist/sessions/index.js +345 -84
  42. package/dist/skills/index.d.ts +0 -2
  43. package/dist/skills/index.js +0 -8
  44. package/dist/state/in-memory.js +6 -6
  45. package/dist/telemetry/shim.js +2 -6
  46. package/dist/telemetry/span-attrs.d.ts +9 -0
  47. package/dist/telemetry/span-attrs.js +27 -0
  48. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  49. package/dist/testing/fakeLogger.d.ts +29 -0
  50. package/dist/testing/fakeLogger.js +47 -0
  51. package/dist/testing/fakeSandbox.d.ts +27 -0
  52. package/dist/testing/fakeSandbox.js +153 -0
  53. package/dist/testing/fakeStateStore.d.ts +36 -0
  54. package/dist/testing/fakeStateStore.js +66 -0
  55. package/dist/testing/index.d.ts +10 -4
  56. package/dist/testing/index.js +14 -4
  57. package/dist/testing/loggerContract.d.ts +9 -0
  58. package/dist/testing/loggerContract.js +62 -0
  59. package/dist/testing/modelProviderContract.d.ts +12 -0
  60. package/dist/testing/modelProviderContract.js +222 -0
  61. package/dist/testing/recordEvents.d.ts +3 -0
  62. package/dist/testing/recordEvents.js +8 -0
  63. package/dist/testing/stateStoreContract.js +27 -0
  64. package/dist/tools/index.js +26 -1
  65. package/dist/tools/mcp/http.d.ts +2 -0
  66. package/dist/tools/mcp/http.js +34 -21
  67. package/dist/tools/mcp/runner.d.ts +4 -0
  68. package/dist/tools/mcp/runner.js +75 -21
  69. package/dist/tools/mcp/stdio.d.ts +7 -1
  70. package/dist/tools/mcp/stdio.js +102 -23
  71. package/dist/version.d.ts +1 -1
  72. package/dist/version.js +1 -1
  73. package/dist/workspace/in-memory.d.ts +1 -0
  74. package/dist/workspace/in-memory.js +47 -12
  75. package/package.json +5 -4
package/README.md CHANGED
@@ -15,6 +15,12 @@ Telemetry defaults to dual GenAI and OpenInference attributes with no content
15
15
  capture. `InvokeOptions.traceparent` and `tracestate` accept inbound W3C Trace
16
16
  Context so application traces can parent harness run spans.
17
17
 
18
+ Workflows can orchestrate typed child agents with `ctx.agents.<id>(input)`.
19
+ Child-agent calls are disabled until a workflow declares `delegation` or the
20
+ harness opts in with `defaults.delegation.enabled: true`. Opted-in workflows get
21
+ bounded fan-out, agent allowlists, per-agent model alias overrides, and
22
+ lineage-rich run events.
23
+
18
24
  See [Evaluating Prompts](https://github.com/puristajs/harness/blob/main/docs/guides/evaluating-prompts.md)
19
25
  for the execution model, scorer limits, and privacy behavior.
20
26
 
@@ -1,7 +1,7 @@
1
1
  import type { Logger } from '../logger/index.js';
2
2
  import type { JsonValue } from '../models/json.js';
3
3
  import type { Message } from '../models/state.js';
4
- import type { AgentDefinition, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
4
+ import type { AgentDefinition, ContextCheckpoints, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
5
5
  import type { MemoryFacade } from '../ports/memory.js';
6
6
  import type { SandboxSession } from '../sandbox/index.js';
7
7
  import { type TelemetryShim } from '../telemetry/index.js';
@@ -12,15 +12,19 @@ export declare function runDefaultAgent(args: {
12
12
  runId: string;
13
13
  sessionId: string;
14
14
  workflowId?: string;
15
+ delegationCallId?: string;
16
+ delegationDepth?: number;
15
17
  input: unknown;
16
18
  history: Message[];
17
19
  agent: AgentDefinition<any>;
20
+ modelAlias?: string;
18
21
  models: Record<string, any>;
19
22
  skills: Record<string, ResolvedSkill>;
20
23
  customTools: ToolsConfig;
21
24
  mcpRegistry?: McpRunnerRegistry;
22
25
  session: SandboxSession;
23
26
  memory: MemoryFacade;
27
+ checkpoints: ContextCheckpoints;
24
28
  mountedSkills: Set<string>;
25
29
  historyWindow?: number;
26
30
  maxSteps: number;
@@ -35,3 +39,5 @@ export declare function runDefaultAgent(args: {
35
39
  output: JsonValue;
36
40
  emitted: Message[];
37
41
  }>;
42
+ /** Runs `fn` over `items` with bounded concurrency, preserving input order. */
43
+ export declare function runLimited<T, R>(items: readonly T[], limit: number, fn: (item: T) => Promise<R>): Promise<R[]>;
@@ -7,6 +7,7 @@ import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } fr
7
7
  import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
8
8
  import { ulid } from '../ulid/index.js';
9
9
  import { abortError, withAbortSignal } from '../runtime/abort.js';
10
+ import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
10
11
  function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
11
12
  function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
12
13
  async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
@@ -93,6 +94,8 @@ export async function runDefaultAgent(args) {
93
94
  'harness.session.id': args.sessionId,
94
95
  'harness.run.id': args.runId,
95
96
  ...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
97
+ ...(args.delegationCallId ? { 'harness.agent.delegation_call_id': args.delegationCallId } : {}),
98
+ ...(args.delegationDepth !== undefined ? { 'harness.agent.delegation_depth': args.delegationDepth } : {}),
96
99
  'harness.agent.id': args.agentId,
97
100
  'gen_ai.operation.name': 'invoke_agent',
98
101
  'openinference.span.kind': 'AGENT',
@@ -100,33 +103,24 @@ export async function runDefaultAgent(args) {
100
103
  'metadata.agent_id': args.agentId,
101
104
  [ATTR_GEN_AI_AGENT_NAME]: args.agentId,
102
105
  [ATTR_GEN_AI_AGENT_ID]: args.agentId,
103
- 'harness.agent.model': args.agent.model,
106
+ 'harness.agent.model': args.modelAlias ?? args.agent.model,
107
+ ...(args.modelAlias && args.modelAlias !== args.agent.model ? { 'harness.agent.default_model': args.agent.model } : {}),
104
108
  'harness.agent.has_handler': args.agent.handler !== undefined,
105
109
  ...metadataSpanAttrs(args.metadata)
106
110
  };
107
111
  const metrics = createMetrics(args.telemetry, agentAttrs);
108
- const execute = () => runDefaultAgentInner({ ...args, metrics });
109
- return args.telemetry.span(`invoke_agent ${args.agentId}`, agentAttrs, execute);
110
- }
111
- function metadataSpanAttrs(metadata) {
112
- const attrs = {};
113
- for (const [key, value] of Object.entries(metadata ?? {})) {
114
- if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
115
- continue;
116
- if (typeof value === 'string') {
117
- if (value.length <= 256)
118
- attrs[`harness.metadata.${key}`] = value;
119
- continue;
120
- }
121
- if (typeof value === 'number' && Number.isFinite(value)) {
122
- attrs[`harness.metadata.${key}`] = value;
123
- continue;
112
+ // Spec 08 §9: the harness tracks activated skill names per run when the
113
+ // `read` tool loads `/skills/<name>/SKILL.md`. Only the count is emitted —
114
+ // skill names stay out of telemetry.
115
+ const activatedSkills = new Set();
116
+ return args.telemetry.span(`invoke_agent ${args.agentId}`, agentAttrs, async (span) => {
117
+ try {
118
+ return await runDefaultAgentInner({ ...args, metrics, activatedSkills });
124
119
  }
125
- if (typeof value === 'boolean') {
126
- attrs[`harness.metadata.${key}`] = value;
120
+ finally {
121
+ span.setAttribute('harness.agent.skills_activated', activatedSkills.size);
127
122
  }
128
- }
129
- return attrs;
123
+ });
130
124
  }
131
125
  async function runDefaultAgentInner(args) {
132
126
  if (args.signal.aborted)
@@ -134,12 +128,11 @@ async function runDefaultAgentInner(args) {
134
128
  const inputSchema = args.agent.input ?? z.string();
135
129
  const outputSchema = args.agent.output ?? z.string();
136
130
  const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
137
- const model = args.models[args.agent.model];
138
- if (!model)
139
- throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: args.agent.model } });
131
+ const selectedModelAlias = args.modelAlias ?? args.agent.model;
132
+ if (!args.models[selectedModelAlias])
133
+ throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: selectedModelAlias } });
140
134
  const skillIds = args.agent.skills ?? [];
141
135
  await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
142
- const activatedSkills = new Set();
143
136
  if (args.agent.handler) {
144
137
  const handler = args.agent.handler;
145
138
  const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
@@ -150,6 +143,7 @@ async function runDefaultAgentInner(args) {
150
143
  sessionId: args.sessionId,
151
144
  history: { list: async () => args.history },
152
145
  memory: args.memory,
146
+ checkpoints: args.checkpoints,
153
147
  metadata: args.metadata ?? {},
154
148
  metrics: args.metrics
155
149
  }));
@@ -157,7 +151,7 @@ async function runDefaultAgentInner(args) {
157
151
  return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
158
152
  }
159
153
  const baseInstructions = typeof args.agent.instructions === 'function'
160
- ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
154
+ ? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, checkpoints: args.checkpoints, metadata: args.metadata ?? {}, metrics: args.metrics })
161
155
  : args.agent.instructions;
162
156
  const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
163
157
  const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
@@ -180,6 +174,7 @@ async function runDefaultAgentInner(args) {
180
174
  });
181
175
  const mcpSpecs = args.mcpRegistry ? await getMcpToolSpecs(args.customTools, enabledCustomTools, { registry: args.mcpRegistry, signal: args.signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: args.session, sandboxKey: args.sessionId }) : [];
182
176
  const customSpecs = [...tsCustomSpecs, ...mcpSpecs];
177
+ const allToolSpecs = [...builtinSpecs, ...customSpecs];
183
178
  const nonSystem = args.history.filter((m) => m.role !== 'system');
184
179
  const system = args.history.filter((m) => m.role === 'system');
185
180
  const cappedNonSystem = args.historyWindow === undefined ? nonSystem : args.historyWindow === 0 ? [] : nonSystem.slice(-args.historyWindow);
@@ -193,36 +188,79 @@ async function runDefaultAgentInner(args) {
193
188
  const emitted = [];
194
189
  const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
195
190
  let steps = 0;
196
- await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString() });
191
+ const agentEventMeta = {
192
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
193
+ ...(args.delegationCallId ? { delegationCallId: args.delegationCallId } : {}),
194
+ ...(args.delegationDepth !== undefined ? { delegationDepth: args.delegationDepth } : {}),
195
+ modelAlias: selectedModelAlias
196
+ };
197
+ await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), ...agentEventMeta });
197
198
  try {
198
199
  while (true) {
199
200
  if (args.signal.aborted)
200
201
  throw abortError(args.signal, 'run', 'Run was cancelled.');
201
202
  if (steps >= maxSteps)
202
203
  throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
204
+ const prepared = await args.agent.prepareStep?.({
205
+ input: parsedInput,
206
+ runId: args.runId,
207
+ sessionId: args.sessionId,
208
+ history: { list: async () => args.history },
209
+ memory: args.memory,
210
+ checkpoints: args.checkpoints,
211
+ metadata: args.metadata ?? {},
212
+ metrics: args.metrics,
213
+ step: steps,
214
+ model: selectedModelAlias,
215
+ messages: modelMessages,
216
+ tools: allToolSpecs
217
+ });
218
+ const stepModelAlias = prepared?.model ?? selectedModelAlias;
219
+ const model = args.models[stepModelAlias];
220
+ if (!model)
221
+ throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: stepModelAlias } });
222
+ const stepTools = filterActiveTools(allToolSpecs, prepared?.activeTools, args.agentId);
223
+ const stepMessages = prepared?.messages ? [...prepared.messages] : modelMessages;
224
+ const stepInstructions = prepared?.instructions ?? instructions;
203
225
  const response = await model.object({
204
226
  messages: [
205
- { role: 'system', content: instructions },
206
- ...modelMessages
227
+ { role: 'system', content: stepInstructions },
228
+ ...stepMessages
207
229
  ],
208
- tools: [...builtinSpecs, ...customSpecs],
209
- schema: z.toJSONSchema(outputSchema)
230
+ tools: stepTools,
231
+ schema: z.toJSONSchema(outputSchema),
232
+ ...(prepared?.call ? { call: prepared.call } : {})
210
233
  }, args.signal, {
211
234
  harnessName: args.harnessName,
212
235
  sessionId: args.sessionId,
213
236
  runId: args.runId,
214
237
  ...(args.workflowId ? { workflowId: args.workflowId } : {}),
215
- agentId: args.agentId
238
+ agentId: args.agentId,
239
+ modelAlias: stepModelAlias
216
240
  });
217
241
  // Emit one usage-bearing model event per model round-trip (including
218
242
  // tool-call steps) so run-summary modelCalls and tokenTotals are accurate
219
243
  // for multi-step runs.
220
- await args.emitEvent?.({ type: 'model.object', runId: args.runId, agentId: args.agentId, object: (response.object ?? null), usage: response.usage });
244
+ await args.emitEvent?.({
245
+ type: 'model.object',
246
+ runId: args.runId,
247
+ agentId: args.agentId,
248
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
249
+ modelAlias: stepModelAlias,
250
+ object: (response.object ?? null),
251
+ usage: response.usage
252
+ });
221
253
  const toolCalls = (response.toolCalls ?? []);
254
+ if (await shouldStopAgentLoop(args, parsedInput, stepModelAlias, steps, modelMessages, allToolSpecs, response, toolCalls)) {
255
+ const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
256
+ emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
257
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated, ...agentEventMeta });
258
+ return { output: validated, emitted };
259
+ }
222
260
  if (toolCalls.length === 0) {
223
261
  const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
224
262
  emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
225
- await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
263
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated, ...agentEventMeta });
226
264
  return { output: validated, emitted };
227
265
  }
228
266
  const assistantMsg = {
@@ -238,8 +276,7 @@ async function runDefaultAgentInner(args) {
238
276
  });
239
277
  const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
240
278
  ...args,
241
- enabledCustomTools,
242
- activatedSkills
279
+ enabledCustomTools
243
280
  }, call));
244
281
  for (const outcome of outcomes) {
245
282
  emitted.push(outcome.emitted);
@@ -250,11 +287,47 @@ async function runDefaultAgentInner(args) {
250
287
  }
251
288
  catch (error) {
252
289
  // Pair every agent.started with an agent.finished, even on error/cancel/budget.
253
- await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
290
+ await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error), ...agentEventMeta });
254
291
  throw error;
255
292
  }
256
293
  }
257
- async function runLimited(items, limit, fn) {
294
+ function filterActiveTools(tools, activeTools, agentId) {
295
+ if (!activeTools)
296
+ return [...tools];
297
+ const requested = new Set(activeTools);
298
+ const filtered = tools.filter((tool) => requested.has(tool.name));
299
+ if (filtered.length !== requested.size) {
300
+ const available = new Set(tools.map((tool) => tool.name));
301
+ const unknown = [...requested].filter((name) => !available.has(name));
302
+ throw new ValidationError('prepareStep referenced an unknown active tool.', {
303
+ where: 'agent_input',
304
+ issues: { agentId, activeTools: unknown }
305
+ });
306
+ }
307
+ return filtered;
308
+ }
309
+ async function shouldStopAgentLoop(args, input, selectedModelAlias, step, messages, tools, response, toolCalls) {
310
+ if (!args.agent.stopWhen)
311
+ return false;
312
+ return args.agent.stopWhen({
313
+ input,
314
+ runId: args.runId,
315
+ sessionId: args.sessionId,
316
+ history: { list: async () => args.history },
317
+ memory: args.memory,
318
+ checkpoints: args.checkpoints,
319
+ metadata: args.metadata ?? {},
320
+ metrics: args.metrics,
321
+ step,
322
+ model: selectedModelAlias,
323
+ messages,
324
+ tools,
325
+ response,
326
+ toolCalls
327
+ });
328
+ }
329
+ /** Runs `fn` over `items` with bounded concurrency, preserving input order. */
330
+ export async function runLimited(items, limit, fn) {
258
331
  const concurrency = Math.max(1, Math.min(limit, items.length));
259
332
  const results = new Array(items.length);
260
333
  let next = 0;
@@ -262,10 +335,10 @@ async function runLimited(items, limit, fn) {
262
335
  while (true) {
263
336
  const index = next;
264
337
  next += 1;
265
- const item = items[index];
266
- if (item === undefined)
338
+ // Index-based termination: an `undefined` element must not truncate the batch.
339
+ if (index >= items.length)
267
340
  return;
268
- results[index] = await fn(item);
341
+ results[index] = await fn(items[index]);
269
342
  }
270
343
  }
271
344
  await Promise.all(Array.from({ length: concurrency }, () => worker()));
@@ -332,9 +405,18 @@ async function executeToolCall(args, call) {
332
405
  catch (error) {
333
406
  const failure = normalizeToolFailure(canonical, error, toolKind);
334
407
  if (failure instanceof OperationCancelledError) {
335
- if (args.signal.aborted)
336
- throw new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure);
337
- throw failure;
408
+ const cancellation = args.signal.aborted
409
+ ? new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure)
410
+ : failure;
411
+ // Pair tool.started with a best-effort tool.finished even on cancellation,
412
+ // matching the deliberate started/finished pairing policy above.
413
+ try {
414
+ await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, error: serializeError(cancellation) });
415
+ }
416
+ catch {
417
+ // Best-effort: never mask the cancellation with an emit failure.
418
+ }
419
+ throw cancellation;
338
420
  }
339
421
  result = { error: serializeError(failure) };
340
422
  }
@@ -62,7 +62,12 @@ export declare class ModelError extends HarnessError {
62
62
  model: string;
63
63
  method: string;
64
64
  status?: number;
65
- reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
65
+ reason?: 'http_error' | 'network' | 'rate_limited' | 'provider_unavailable' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
66
+ retryKind?: 'none' | 'active' | 'deferred';
67
+ retryAfterMs?: number;
68
+ retryAttempt?: number;
69
+ retryMaxAttempts?: number;
70
+ rateLimit?: unknown;
66
71
  providerCode?: string;
67
72
  providerType?: string;
68
73
  providerParam?: string;
@@ -98,6 +103,7 @@ export declare class ToolNotFoundError extends HarnessError {
98
103
  export declare class SkillNotFoundError extends HarnessError {
99
104
  constructor(message: string, meta: {
100
105
  skill_id: string;
106
+ agent_id?: string;
101
107
  }, cause?: unknown);
102
108
  }
103
109
  /** Skill manifest/frontmatter/config validation failure. */
@@ -124,6 +130,16 @@ export declare class AgentLoopBudgetError extends HarnessError {
124
130
  limit: number;
125
131
  }, cause?: unknown);
126
132
  }
133
+ /** Workflow child-agent delegation was denied or exceeded a configured budget. */
134
+ export declare class DelegationPolicyError extends HarnessError {
135
+ constructor(message: string, meta: {
136
+ workflow_id: string;
137
+ agent_id: string;
138
+ reason: 'delegation_disabled' | 'agent_not_allowed' | 'max_child_agent_calls_exceeded' | 'max_parallel_child_agent_calls_exceeded' | 'max_delegation_depth_exceeded' | 'model_alias_not_allowed';
139
+ limit?: number;
140
+ model_alias?: string;
141
+ }, cause?: unknown);
142
+ }
127
143
  /** Session attempted to invoke unknown workflow id. */
128
144
  export declare class WorkflowNotFoundError extends HarnessError {
129
145
  constructor(message: string, meta: {
@@ -146,7 +162,7 @@ export declare class SessionBusyError extends HarnessError {
146
162
  /** State backend operation failed. */
147
163
  export declare class StateError extends HarnessError {
148
164
  constructor(message: string, meta: {
149
- op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
165
+ op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'replaceMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'contextCheckpointWrite' | 'contextCheckpointRead' | 'contextCheckpointList' | 'contextCheckpointDelete' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
150
166
  reason?: 'duplicate_message_id' | string;
151
167
  adapter?: 'memory' | string;
152
168
  memory_provider?: string;
@@ -33,7 +33,11 @@ export class SandboxNoExecutorError extends HarnessError {
33
33
  export class ModelError extends HarnessError {
34
34
  constructor(message, meta, cause) {
35
35
  const retriable = meta.reason === 'network'
36
+ || meta.reason === 'rate_limited'
37
+ || meta.reason === 'provider_unavailable'
36
38
  || meta.status === 429
39
+ || meta.status === 408
40
+ || meta.status === 409
37
41
  || (typeof meta.status === 'number' && meta.status >= 500);
38
42
  super({ code: 'MODEL_ERROR', category: 'model', retriable, message, meta, cause });
39
43
  }
@@ -87,6 +91,12 @@ export class AgentLoopBudgetError extends HarnessError {
87
91
  super({ code: 'AGENT_LOOP_BUDGET_EXCEEDED', category: 'validation', retriable: false, message, meta, cause });
88
92
  }
89
93
  }
94
+ /** Workflow child-agent delegation was denied or exceeded a configured budget. */
95
+ export class DelegationPolicyError extends HarnessError {
96
+ constructor(message, meta, cause) {
97
+ super({ code: 'DELEGATION_POLICY_ERROR', category: 'validation', retriable: false, message, meta, cause });
98
+ }
99
+ }
90
100
  /** Session attempted to invoke unknown workflow id. */
91
101
  export class WorkflowNotFoundError extends HarnessError {
92
102
  constructor(message, meta, cause) {
@@ -28,7 +28,7 @@ export interface ScorerResult {
28
28
  passed: boolean;
29
29
  evidence?: JsonValue;
30
30
  }
31
- export interface PromptCandidate<I = unknown> {
31
+ export interface PromptCandidate {
32
32
  id: string;
33
33
  prompt: string;
34
34
  metadata?: Record<string, JsonValue>;
@@ -47,10 +47,10 @@ export interface CandidateScore {
47
47
  scorerCount: number;
48
48
  }
49
49
  export interface EvaluatePromptCandidatesInput<I = unknown> {
50
- candidates: PromptCandidate<I>[];
50
+ candidates: PromptCandidate[];
51
51
  items: EvaluationItem<I>[];
52
52
  scorer: (target: ScorerTarget, signal: AbortSignal) => Promise<ScorerResult>;
53
- runCandidate: (candidate: PromptCandidate<I>, item: EvaluationItem<I>, signal: AbortSignal) => Promise<unknown>;
53
+ runCandidate: (candidate: PromptCandidate, item: EvaluationItem<I>, signal: AbortSignal) => Promise<unknown>;
54
54
  signal: AbortSignal;
55
55
  }
56
56
  export declare function evaluateDeterministicScorer(definition: DeterministicScorerDefinition, target: ScorerTarget): ScorerResult;
@@ -167,8 +167,22 @@ function matchesType(value, type) {
167
167
  function isRecord(value) {
168
168
  return value !== null && typeof value === 'object' && !Array.isArray(value);
169
169
  }
170
+ /** Structural deep equality, insensitive to object key order. */
170
171
  function deepEqual(a, b) {
171
- return JSON.stringify(a) === JSON.stringify(b);
172
+ if (Object.is(a, b))
173
+ return true;
174
+ if (Array.isArray(a) || Array.isArray(b)) {
175
+ if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length)
176
+ return false;
177
+ return a.every((entry, index) => deepEqual(entry, b[index]));
178
+ }
179
+ if (isRecord(a) && isRecord(b)) {
180
+ const keysA = Object.keys(a);
181
+ if (keysA.length !== Object.keys(b).length)
182
+ return false;
183
+ return keysA.every((key) => Object.hasOwn(b, key) && deepEqual(a[key], b[key]));
184
+ }
185
+ return false;
172
186
  }
173
187
  function toJsonValue(value) {
174
188
  if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean')