@purista/harness 1.2.6 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +126 -44
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +149 -3
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +38 -18
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +94 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/runtime/steps.d.ts +22 -1
- package/dist/runtime/steps.js +53 -2
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +345 -84
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +5 -4
package/README.md
CHANGED
|
@@ -15,6 +15,12 @@ Telemetry defaults to dual GenAI and OpenInference attributes with no content
|
|
|
15
15
|
capture. `InvokeOptions.traceparent` and `tracestate` accept inbound W3C Trace
|
|
16
16
|
Context so application traces can parent harness run spans.
|
|
17
17
|
|
|
18
|
+
Workflows can orchestrate typed child agents with `ctx.agents.<id>(input)`.
|
|
19
|
+
Child-agent calls are disabled until a workflow declares `delegation` or the
|
|
20
|
+
harness opts in with `defaults.delegation.enabled: true`. Opted-in workflows get
|
|
21
|
+
bounded fan-out, agent allowlists, per-agent model alias overrides, and
|
|
22
|
+
lineage-rich run events.
|
|
23
|
+
|
|
18
24
|
See [Evaluating Prompts](https://github.com/puristajs/harness/blob/main/docs/guides/evaluating-prompts.md)
|
|
19
25
|
for the execution model, scorer limits, and privacy behavior.
|
|
20
26
|
|
package/dist/agents/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Logger } from '../logger/index.js';
|
|
2
2
|
import type { JsonValue } from '../models/json.js';
|
|
3
3
|
import type { Message } from '../models/state.js';
|
|
4
|
-
import type { AgentDefinition, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
|
|
4
|
+
import type { AgentDefinition, ContextCheckpoints, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
|
|
5
5
|
import type { MemoryFacade } from '../ports/memory.js';
|
|
6
6
|
import type { SandboxSession } from '../sandbox/index.js';
|
|
7
7
|
import { type TelemetryShim } from '../telemetry/index.js';
|
|
@@ -12,15 +12,19 @@ export declare function runDefaultAgent(args: {
|
|
|
12
12
|
runId: string;
|
|
13
13
|
sessionId: string;
|
|
14
14
|
workflowId?: string;
|
|
15
|
+
delegationCallId?: string;
|
|
16
|
+
delegationDepth?: number;
|
|
15
17
|
input: unknown;
|
|
16
18
|
history: Message[];
|
|
17
19
|
agent: AgentDefinition<any>;
|
|
20
|
+
modelAlias?: string;
|
|
18
21
|
models: Record<string, any>;
|
|
19
22
|
skills: Record<string, ResolvedSkill>;
|
|
20
23
|
customTools: ToolsConfig;
|
|
21
24
|
mcpRegistry?: McpRunnerRegistry;
|
|
22
25
|
session: SandboxSession;
|
|
23
26
|
memory: MemoryFacade;
|
|
27
|
+
checkpoints: ContextCheckpoints;
|
|
24
28
|
mountedSkills: Set<string>;
|
|
25
29
|
historyWindow?: number;
|
|
26
30
|
maxSteps: number;
|
|
@@ -35,3 +39,5 @@ export declare function runDefaultAgent(args: {
|
|
|
35
39
|
output: JsonValue;
|
|
36
40
|
emitted: Message[];
|
|
37
41
|
}>;
|
|
42
|
+
/** Runs `fn` over `items` with bounded concurrency, preserving input order. */
|
|
43
|
+
export declare function runLimited<T, R>(items: readonly T[], limit: number, fn: (item: T) => Promise<R>): Promise<R[]>;
|
package/dist/agents/index.js
CHANGED
|
@@ -7,6 +7,7 @@ import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } fr
|
|
|
7
7
|
import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
|
|
8
8
|
import { ulid } from '../ulid/index.js';
|
|
9
9
|
import { abortError, withAbortSignal } from '../runtime/abort.js';
|
|
10
|
+
import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
|
|
10
11
|
function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
|
|
11
12
|
function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
|
|
12
13
|
async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
|
|
@@ -93,6 +94,8 @@ export async function runDefaultAgent(args) {
|
|
|
93
94
|
'harness.session.id': args.sessionId,
|
|
94
95
|
'harness.run.id': args.runId,
|
|
95
96
|
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
97
|
+
...(args.delegationCallId ? { 'harness.agent.delegation_call_id': args.delegationCallId } : {}),
|
|
98
|
+
...(args.delegationDepth !== undefined ? { 'harness.agent.delegation_depth': args.delegationDepth } : {}),
|
|
96
99
|
'harness.agent.id': args.agentId,
|
|
97
100
|
'gen_ai.operation.name': 'invoke_agent',
|
|
98
101
|
'openinference.span.kind': 'AGENT',
|
|
@@ -100,33 +103,24 @@ export async function runDefaultAgent(args) {
|
|
|
100
103
|
'metadata.agent_id': args.agentId,
|
|
101
104
|
[ATTR_GEN_AI_AGENT_NAME]: args.agentId,
|
|
102
105
|
[ATTR_GEN_AI_AGENT_ID]: args.agentId,
|
|
103
|
-
'harness.agent.model': args.agent.model,
|
|
106
|
+
'harness.agent.model': args.modelAlias ?? args.agent.model,
|
|
107
|
+
...(args.modelAlias && args.modelAlias !== args.agent.model ? { 'harness.agent.default_model': args.agent.model } : {}),
|
|
104
108
|
'harness.agent.has_handler': args.agent.handler !== undefined,
|
|
105
109
|
...metadataSpanAttrs(args.metadata)
|
|
106
110
|
};
|
|
107
111
|
const metrics = createMetrics(args.telemetry, agentAttrs);
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
continue;
|
|
116
|
-
if (typeof value === 'string') {
|
|
117
|
-
if (value.length <= 256)
|
|
118
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
119
|
-
continue;
|
|
120
|
-
}
|
|
121
|
-
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
122
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
123
|
-
continue;
|
|
112
|
+
// Spec 08 §9: the harness tracks activated skill names per run when the
|
|
113
|
+
// `read` tool loads `/skills/<name>/SKILL.md`. Only the count is emitted —
|
|
114
|
+
// skill names stay out of telemetry.
|
|
115
|
+
const activatedSkills = new Set();
|
|
116
|
+
return args.telemetry.span(`invoke_agent ${args.agentId}`, agentAttrs, async (span) => {
|
|
117
|
+
try {
|
|
118
|
+
return await runDefaultAgentInner({ ...args, metrics, activatedSkills });
|
|
124
119
|
}
|
|
125
|
-
|
|
126
|
-
|
|
120
|
+
finally {
|
|
121
|
+
span.setAttribute('harness.agent.skills_activated', activatedSkills.size);
|
|
127
122
|
}
|
|
128
|
-
}
|
|
129
|
-
return attrs;
|
|
123
|
+
});
|
|
130
124
|
}
|
|
131
125
|
async function runDefaultAgentInner(args) {
|
|
132
126
|
if (args.signal.aborted)
|
|
@@ -134,12 +128,11 @@ async function runDefaultAgentInner(args) {
|
|
|
134
128
|
const inputSchema = args.agent.input ?? z.string();
|
|
135
129
|
const outputSchema = args.agent.output ?? z.string();
|
|
136
130
|
const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
|
|
137
|
-
const
|
|
138
|
-
if (!
|
|
139
|
-
throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model:
|
|
131
|
+
const selectedModelAlias = args.modelAlias ?? args.agent.model;
|
|
132
|
+
if (!args.models[selectedModelAlias])
|
|
133
|
+
throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: selectedModelAlias } });
|
|
140
134
|
const skillIds = args.agent.skills ?? [];
|
|
141
135
|
await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
|
|
142
|
-
const activatedSkills = new Set();
|
|
143
136
|
if (args.agent.handler) {
|
|
144
137
|
const handler = args.agent.handler;
|
|
145
138
|
const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
|
|
@@ -150,6 +143,7 @@ async function runDefaultAgentInner(args) {
|
|
|
150
143
|
sessionId: args.sessionId,
|
|
151
144
|
history: { list: async () => args.history },
|
|
152
145
|
memory: args.memory,
|
|
146
|
+
checkpoints: args.checkpoints,
|
|
153
147
|
metadata: args.metadata ?? {},
|
|
154
148
|
metrics: args.metrics
|
|
155
149
|
}));
|
|
@@ -157,7 +151,7 @@ async function runDefaultAgentInner(args) {
|
|
|
157
151
|
return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
|
|
158
152
|
}
|
|
159
153
|
const baseInstructions = typeof args.agent.instructions === 'function'
|
|
160
|
-
? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
|
|
154
|
+
? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, checkpoints: args.checkpoints, metadata: args.metadata ?? {}, metrics: args.metrics })
|
|
161
155
|
: args.agent.instructions;
|
|
162
156
|
const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
|
|
163
157
|
const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
|
|
@@ -180,6 +174,7 @@ async function runDefaultAgentInner(args) {
|
|
|
180
174
|
});
|
|
181
175
|
const mcpSpecs = args.mcpRegistry ? await getMcpToolSpecs(args.customTools, enabledCustomTools, { registry: args.mcpRegistry, signal: args.signal, toolTimeoutMs: args.toolTimeoutMs, sandbox: args.session, sandboxKey: args.sessionId }) : [];
|
|
182
176
|
const customSpecs = [...tsCustomSpecs, ...mcpSpecs];
|
|
177
|
+
const allToolSpecs = [...builtinSpecs, ...customSpecs];
|
|
183
178
|
const nonSystem = args.history.filter((m) => m.role !== 'system');
|
|
184
179
|
const system = args.history.filter((m) => m.role === 'system');
|
|
185
180
|
const cappedNonSystem = args.historyWindow === undefined ? nonSystem : args.historyWindow === 0 ? [] : nonSystem.slice(-args.historyWindow);
|
|
@@ -193,36 +188,79 @@ async function runDefaultAgentInner(args) {
|
|
|
193
188
|
const emitted = [];
|
|
194
189
|
const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
|
|
195
190
|
let steps = 0;
|
|
196
|
-
|
|
191
|
+
const agentEventMeta = {
|
|
192
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
193
|
+
...(args.delegationCallId ? { delegationCallId: args.delegationCallId } : {}),
|
|
194
|
+
...(args.delegationDepth !== undefined ? { delegationDepth: args.delegationDepth } : {}),
|
|
195
|
+
modelAlias: selectedModelAlias
|
|
196
|
+
};
|
|
197
|
+
await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), ...agentEventMeta });
|
|
197
198
|
try {
|
|
198
199
|
while (true) {
|
|
199
200
|
if (args.signal.aborted)
|
|
200
201
|
throw abortError(args.signal, 'run', 'Run was cancelled.');
|
|
201
202
|
if (steps >= maxSteps)
|
|
202
203
|
throw new AgentLoopBudgetError('Agent loop budget exceeded.', { agent_id: args.agentId, reason: 'iterations_exceeded', limit: maxSteps });
|
|
204
|
+
const prepared = await args.agent.prepareStep?.({
|
|
205
|
+
input: parsedInput,
|
|
206
|
+
runId: args.runId,
|
|
207
|
+
sessionId: args.sessionId,
|
|
208
|
+
history: { list: async () => args.history },
|
|
209
|
+
memory: args.memory,
|
|
210
|
+
checkpoints: args.checkpoints,
|
|
211
|
+
metadata: args.metadata ?? {},
|
|
212
|
+
metrics: args.metrics,
|
|
213
|
+
step: steps,
|
|
214
|
+
model: selectedModelAlias,
|
|
215
|
+
messages: modelMessages,
|
|
216
|
+
tools: allToolSpecs
|
|
217
|
+
});
|
|
218
|
+
const stepModelAlias = prepared?.model ?? selectedModelAlias;
|
|
219
|
+
const model = args.models[stepModelAlias];
|
|
220
|
+
if (!model)
|
|
221
|
+
throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: stepModelAlias } });
|
|
222
|
+
const stepTools = filterActiveTools(allToolSpecs, prepared?.activeTools, args.agentId);
|
|
223
|
+
const stepMessages = prepared?.messages ? [...prepared.messages] : modelMessages;
|
|
224
|
+
const stepInstructions = prepared?.instructions ?? instructions;
|
|
203
225
|
const response = await model.object({
|
|
204
226
|
messages: [
|
|
205
|
-
{ role: 'system', content:
|
|
206
|
-
...
|
|
227
|
+
{ role: 'system', content: stepInstructions },
|
|
228
|
+
...stepMessages
|
|
207
229
|
],
|
|
208
|
-
tools:
|
|
209
|
-
schema: z.toJSONSchema(outputSchema)
|
|
230
|
+
tools: stepTools,
|
|
231
|
+
schema: z.toJSONSchema(outputSchema),
|
|
232
|
+
...(prepared?.call ? { call: prepared.call } : {})
|
|
210
233
|
}, args.signal, {
|
|
211
234
|
harnessName: args.harnessName,
|
|
212
235
|
sessionId: args.sessionId,
|
|
213
236
|
runId: args.runId,
|
|
214
237
|
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
215
|
-
agentId: args.agentId
|
|
238
|
+
agentId: args.agentId,
|
|
239
|
+
modelAlias: stepModelAlias
|
|
216
240
|
});
|
|
217
241
|
// Emit one usage-bearing model event per model round-trip (including
|
|
218
242
|
// tool-call steps) so run-summary modelCalls and tokenTotals are accurate
|
|
219
243
|
// for multi-step runs.
|
|
220
|
-
await args.emitEvent?.({
|
|
244
|
+
await args.emitEvent?.({
|
|
245
|
+
type: 'model.object',
|
|
246
|
+
runId: args.runId,
|
|
247
|
+
agentId: args.agentId,
|
|
248
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
249
|
+
modelAlias: stepModelAlias,
|
|
250
|
+
object: (response.object ?? null),
|
|
251
|
+
usage: response.usage
|
|
252
|
+
});
|
|
221
253
|
const toolCalls = (response.toolCalls ?? []);
|
|
254
|
+
if (await shouldStopAgentLoop(args, parsedInput, stepModelAlias, steps, modelMessages, allToolSpecs, response, toolCalls)) {
|
|
255
|
+
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
256
|
+
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
257
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated, ...agentEventMeta });
|
|
258
|
+
return { output: validated, emitted };
|
|
259
|
+
}
|
|
222
260
|
if (toolCalls.length === 0) {
|
|
223
261
|
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
224
262
|
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
225
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
263
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated, ...agentEventMeta });
|
|
226
264
|
return { output: validated, emitted };
|
|
227
265
|
}
|
|
228
266
|
const assistantMsg = {
|
|
@@ -238,8 +276,7 @@ async function runDefaultAgentInner(args) {
|
|
|
238
276
|
});
|
|
239
277
|
const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
|
|
240
278
|
...args,
|
|
241
|
-
enabledCustomTools
|
|
242
|
-
activatedSkills
|
|
279
|
+
enabledCustomTools
|
|
243
280
|
}, call));
|
|
244
281
|
for (const outcome of outcomes) {
|
|
245
282
|
emitted.push(outcome.emitted);
|
|
@@ -250,11 +287,47 @@ async function runDefaultAgentInner(args) {
|
|
|
250
287
|
}
|
|
251
288
|
catch (error) {
|
|
252
289
|
// Pair every agent.started with an agent.finished, even on error/cancel/budget.
|
|
253
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
|
|
290
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error), ...agentEventMeta });
|
|
254
291
|
throw error;
|
|
255
292
|
}
|
|
256
293
|
}
|
|
257
|
-
|
|
294
|
+
function filterActiveTools(tools, activeTools, agentId) {
|
|
295
|
+
if (!activeTools)
|
|
296
|
+
return [...tools];
|
|
297
|
+
const requested = new Set(activeTools);
|
|
298
|
+
const filtered = tools.filter((tool) => requested.has(tool.name));
|
|
299
|
+
if (filtered.length !== requested.size) {
|
|
300
|
+
const available = new Set(tools.map((tool) => tool.name));
|
|
301
|
+
const unknown = [...requested].filter((name) => !available.has(name));
|
|
302
|
+
throw new ValidationError('prepareStep referenced an unknown active tool.', {
|
|
303
|
+
where: 'agent_input',
|
|
304
|
+
issues: { agentId, activeTools: unknown }
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
return filtered;
|
|
308
|
+
}
|
|
309
|
+
async function shouldStopAgentLoop(args, input, selectedModelAlias, step, messages, tools, response, toolCalls) {
|
|
310
|
+
if (!args.agent.stopWhen)
|
|
311
|
+
return false;
|
|
312
|
+
return args.agent.stopWhen({
|
|
313
|
+
input,
|
|
314
|
+
runId: args.runId,
|
|
315
|
+
sessionId: args.sessionId,
|
|
316
|
+
history: { list: async () => args.history },
|
|
317
|
+
memory: args.memory,
|
|
318
|
+
checkpoints: args.checkpoints,
|
|
319
|
+
metadata: args.metadata ?? {},
|
|
320
|
+
metrics: args.metrics,
|
|
321
|
+
step,
|
|
322
|
+
model: selectedModelAlias,
|
|
323
|
+
messages,
|
|
324
|
+
tools,
|
|
325
|
+
response,
|
|
326
|
+
toolCalls
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
/** Runs `fn` over `items` with bounded concurrency, preserving input order. */
|
|
330
|
+
export async function runLimited(items, limit, fn) {
|
|
258
331
|
const concurrency = Math.max(1, Math.min(limit, items.length));
|
|
259
332
|
const results = new Array(items.length);
|
|
260
333
|
let next = 0;
|
|
@@ -262,10 +335,10 @@ async function runLimited(items, limit, fn) {
|
|
|
262
335
|
while (true) {
|
|
263
336
|
const index = next;
|
|
264
337
|
next += 1;
|
|
265
|
-
|
|
266
|
-
if (
|
|
338
|
+
// Index-based termination: an `undefined` element must not truncate the batch.
|
|
339
|
+
if (index >= items.length)
|
|
267
340
|
return;
|
|
268
|
-
results[index] = await fn(
|
|
341
|
+
results[index] = await fn(items[index]);
|
|
269
342
|
}
|
|
270
343
|
}
|
|
271
344
|
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
@@ -332,9 +405,18 @@ async function executeToolCall(args, call) {
|
|
|
332
405
|
catch (error) {
|
|
333
406
|
const failure = normalizeToolFailure(canonical, error, toolKind);
|
|
334
407
|
if (failure instanceof OperationCancelledError) {
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
408
|
+
const cancellation = args.signal.aborted
|
|
409
|
+
? new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure)
|
|
410
|
+
: failure;
|
|
411
|
+
// Pair tool.started with a best-effort tool.finished even on cancellation,
|
|
412
|
+
// matching the deliberate started/finished pairing policy above.
|
|
413
|
+
try {
|
|
414
|
+
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, error: serializeError(cancellation) });
|
|
415
|
+
}
|
|
416
|
+
catch {
|
|
417
|
+
// Best-effort: never mask the cancellation with an emit failure.
|
|
418
|
+
}
|
|
419
|
+
throw cancellation;
|
|
338
420
|
}
|
|
339
421
|
result = { error: serializeError(failure) };
|
|
340
422
|
}
|
package/dist/errors/catalog.d.ts
CHANGED
|
@@ -62,7 +62,12 @@ export declare class ModelError extends HarnessError {
|
|
|
62
62
|
model: string;
|
|
63
63
|
method: string;
|
|
64
64
|
status?: number;
|
|
65
|
-
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
65
|
+
reason?: 'http_error' | 'network' | 'rate_limited' | 'provider_unavailable' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
66
|
+
retryKind?: 'none' | 'active' | 'deferred';
|
|
67
|
+
retryAfterMs?: number;
|
|
68
|
+
retryAttempt?: number;
|
|
69
|
+
retryMaxAttempts?: number;
|
|
70
|
+
rateLimit?: unknown;
|
|
66
71
|
providerCode?: string;
|
|
67
72
|
providerType?: string;
|
|
68
73
|
providerParam?: string;
|
|
@@ -98,6 +103,7 @@ export declare class ToolNotFoundError extends HarnessError {
|
|
|
98
103
|
export declare class SkillNotFoundError extends HarnessError {
|
|
99
104
|
constructor(message: string, meta: {
|
|
100
105
|
skill_id: string;
|
|
106
|
+
agent_id?: string;
|
|
101
107
|
}, cause?: unknown);
|
|
102
108
|
}
|
|
103
109
|
/** Skill manifest/frontmatter/config validation failure. */
|
|
@@ -124,6 +130,16 @@ export declare class AgentLoopBudgetError extends HarnessError {
|
|
|
124
130
|
limit: number;
|
|
125
131
|
}, cause?: unknown);
|
|
126
132
|
}
|
|
133
|
+
/** Workflow child-agent delegation was denied or exceeded a configured budget. */
|
|
134
|
+
export declare class DelegationPolicyError extends HarnessError {
|
|
135
|
+
constructor(message: string, meta: {
|
|
136
|
+
workflow_id: string;
|
|
137
|
+
agent_id: string;
|
|
138
|
+
reason: 'delegation_disabled' | 'agent_not_allowed' | 'max_child_agent_calls_exceeded' | 'max_parallel_child_agent_calls_exceeded' | 'max_delegation_depth_exceeded' | 'model_alias_not_allowed';
|
|
139
|
+
limit?: number;
|
|
140
|
+
model_alias?: string;
|
|
141
|
+
}, cause?: unknown);
|
|
142
|
+
}
|
|
127
143
|
/** Session attempted to invoke unknown workflow id. */
|
|
128
144
|
export declare class WorkflowNotFoundError extends HarnessError {
|
|
129
145
|
constructor(message: string, meta: {
|
|
@@ -146,7 +162,7 @@ export declare class SessionBusyError extends HarnessError {
|
|
|
146
162
|
/** State backend operation failed. */
|
|
147
163
|
export declare class StateError extends HarnessError {
|
|
148
164
|
constructor(message: string, meta: {
|
|
149
|
-
op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
|
|
165
|
+
op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'replaceMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'contextCheckpointWrite' | 'contextCheckpointRead' | 'contextCheckpointList' | 'contextCheckpointDelete' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
|
|
150
166
|
reason?: 'duplicate_message_id' | string;
|
|
151
167
|
adapter?: 'memory' | string;
|
|
152
168
|
memory_provider?: string;
|
package/dist/errors/catalog.js
CHANGED
|
@@ -33,7 +33,11 @@ export class SandboxNoExecutorError extends HarnessError {
|
|
|
33
33
|
export class ModelError extends HarnessError {
|
|
34
34
|
constructor(message, meta, cause) {
|
|
35
35
|
const retriable = meta.reason === 'network'
|
|
36
|
+
|| meta.reason === 'rate_limited'
|
|
37
|
+
|| meta.reason === 'provider_unavailable'
|
|
36
38
|
|| meta.status === 429
|
|
39
|
+
|| meta.status === 408
|
|
40
|
+
|| meta.status === 409
|
|
37
41
|
|| (typeof meta.status === 'number' && meta.status >= 500);
|
|
38
42
|
super({ code: 'MODEL_ERROR', category: 'model', retriable, message, meta, cause });
|
|
39
43
|
}
|
|
@@ -87,6 +91,12 @@ export class AgentLoopBudgetError extends HarnessError {
|
|
|
87
91
|
super({ code: 'AGENT_LOOP_BUDGET_EXCEEDED', category: 'validation', retriable: false, message, meta, cause });
|
|
88
92
|
}
|
|
89
93
|
}
|
|
94
|
+
/** Workflow child-agent delegation was denied or exceeded a configured budget. */
|
|
95
|
+
export class DelegationPolicyError extends HarnessError {
|
|
96
|
+
constructor(message, meta, cause) {
|
|
97
|
+
super({ code: 'DELEGATION_POLICY_ERROR', category: 'validation', retriable: false, message, meta, cause });
|
|
98
|
+
}
|
|
99
|
+
}
|
|
90
100
|
/** Session attempted to invoke unknown workflow id. */
|
|
91
101
|
export class WorkflowNotFoundError extends HarnessError {
|
|
92
102
|
constructor(message, meta, cause) {
|
package/dist/eval/index.d.ts
CHANGED
|
@@ -28,7 +28,7 @@ export interface ScorerResult {
|
|
|
28
28
|
passed: boolean;
|
|
29
29
|
evidence?: JsonValue;
|
|
30
30
|
}
|
|
31
|
-
export interface PromptCandidate
|
|
31
|
+
export interface PromptCandidate {
|
|
32
32
|
id: string;
|
|
33
33
|
prompt: string;
|
|
34
34
|
metadata?: Record<string, JsonValue>;
|
|
@@ -47,10 +47,10 @@ export interface CandidateScore {
|
|
|
47
47
|
scorerCount: number;
|
|
48
48
|
}
|
|
49
49
|
export interface EvaluatePromptCandidatesInput<I = unknown> {
|
|
50
|
-
candidates: PromptCandidate
|
|
50
|
+
candidates: PromptCandidate[];
|
|
51
51
|
items: EvaluationItem<I>[];
|
|
52
52
|
scorer: (target: ScorerTarget, signal: AbortSignal) => Promise<ScorerResult>;
|
|
53
|
-
runCandidate: (candidate: PromptCandidate
|
|
53
|
+
runCandidate: (candidate: PromptCandidate, item: EvaluationItem<I>, signal: AbortSignal) => Promise<unknown>;
|
|
54
54
|
signal: AbortSignal;
|
|
55
55
|
}
|
|
56
56
|
export declare function evaluateDeterministicScorer(definition: DeterministicScorerDefinition, target: ScorerTarget): ScorerResult;
|
package/dist/eval/index.js
CHANGED
|
@@ -167,8 +167,22 @@ function matchesType(value, type) {
|
|
|
167
167
|
function isRecord(value) {
|
|
168
168
|
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
169
169
|
}
|
|
170
|
+
/** Structural deep equality, insensitive to object key order. */
|
|
170
171
|
function deepEqual(a, b) {
|
|
171
|
-
|
|
172
|
+
if (Object.is(a, b))
|
|
173
|
+
return true;
|
|
174
|
+
if (Array.isArray(a) || Array.isArray(b)) {
|
|
175
|
+
if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length)
|
|
176
|
+
return false;
|
|
177
|
+
return a.every((entry, index) => deepEqual(entry, b[index]));
|
|
178
|
+
}
|
|
179
|
+
if (isRecord(a) && isRecord(b)) {
|
|
180
|
+
const keysA = Object.keys(a);
|
|
181
|
+
if (keysA.length !== Object.keys(b).length)
|
|
182
|
+
return false;
|
|
183
|
+
return keysA.every((key) => Object.hasOwn(b, key) && deepEqual(a[key], b[key]));
|
|
184
|
+
}
|
|
185
|
+
return false;
|
|
172
186
|
}
|
|
173
187
|
function toJsonValue(value) {
|
|
174
188
|
if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean')
|