@purista/harness 1.2.6 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +56 -38
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +91 -1
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +37 -17
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +94 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +337 -81
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -15,6 +15,12 @@ Telemetry defaults to dual GenAI and OpenInference attributes with no content
|
|
|
15
15
|
capture. `InvokeOptions.traceparent` and `tracestate` accept inbound W3C Trace
|
|
16
16
|
Context so application traces can parent harness run spans.
|
|
17
17
|
|
|
18
|
+
Workflows can orchestrate typed child agents with `ctx.agents.<id>(input)`.
|
|
19
|
+
Child-agent calls are disabled until a workflow declares `delegation` or the
|
|
20
|
+
harness opts in with `defaults.delegation.enabled: true`. Opted-in workflows get
|
|
21
|
+
bounded fan-out, agent allowlists, per-agent model alias overrides, and
|
|
22
|
+
lineage-rich run events.
|
|
23
|
+
|
|
18
24
|
See [Evaluating Prompts](https://github.com/puristajs/harness/blob/main/docs/guides/evaluating-prompts.md)
|
|
19
25
|
for the execution model, scorer limits, and privacy behavior.
|
|
20
26
|
|
package/dist/agents/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Logger } from '../logger/index.js';
|
|
2
2
|
import type { JsonValue } from '../models/json.js';
|
|
3
3
|
import type { Message } from '../models/state.js';
|
|
4
|
-
import type { AgentDefinition, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
|
|
4
|
+
import type { AgentDefinition, ContextCheckpoints, ResolvedSkill, RunEvent, ToolsConfig } from '../harness/defineHarness.js';
|
|
5
5
|
import type { MemoryFacade } from '../ports/memory.js';
|
|
6
6
|
import type { SandboxSession } from '../sandbox/index.js';
|
|
7
7
|
import { type TelemetryShim } from '../telemetry/index.js';
|
|
@@ -12,15 +12,19 @@ export declare function runDefaultAgent(args: {
|
|
|
12
12
|
runId: string;
|
|
13
13
|
sessionId: string;
|
|
14
14
|
workflowId?: string;
|
|
15
|
+
delegationCallId?: string;
|
|
16
|
+
delegationDepth?: number;
|
|
15
17
|
input: unknown;
|
|
16
18
|
history: Message[];
|
|
17
19
|
agent: AgentDefinition<any>;
|
|
20
|
+
modelAlias?: string;
|
|
18
21
|
models: Record<string, any>;
|
|
19
22
|
skills: Record<string, ResolvedSkill>;
|
|
20
23
|
customTools: ToolsConfig;
|
|
21
24
|
mcpRegistry?: McpRunnerRegistry;
|
|
22
25
|
session: SandboxSession;
|
|
23
26
|
memory: MemoryFacade;
|
|
27
|
+
checkpoints: ContextCheckpoints;
|
|
24
28
|
mountedSkills: Set<string>;
|
|
25
29
|
historyWindow?: number;
|
|
26
30
|
maxSteps: number;
|
|
@@ -35,3 +39,5 @@ export declare function runDefaultAgent(args: {
|
|
|
35
39
|
output: JsonValue;
|
|
36
40
|
emitted: Message[];
|
|
37
41
|
}>;
|
|
42
|
+
/** Runs `fn` over `items` with bounded concurrency, preserving input order. */
|
|
43
|
+
export declare function runLimited<T, R>(items: readonly T[], limit: number, fn: (item: T) => Promise<R>): Promise<R[]>;
|
package/dist/agents/index.js
CHANGED
|
@@ -7,6 +7,7 @@ import { BUILTIN_ALIAS_TO_CANONICAL, getBuiltinToolSpecs, invokeBuiltinTool } fr
|
|
|
7
7
|
import { getMcpToolSpecs, invokeMcpTool, isMcpToolDefinition } from '../tools/mcp/runner.js';
|
|
8
8
|
import { ulid } from '../ulid/index.js';
|
|
9
9
|
import { abortError, withAbortSignal } from '../runtime/abort.js';
|
|
10
|
+
import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
|
|
10
11
|
function stringifyInput(input) { return typeof input === 'string' ? input : JSON.stringify(input); }
|
|
11
12
|
function isReadonlyBuiltin(name) { return ['read', 'list', 'glob', 'grep'].includes(name); }
|
|
12
13
|
async function checkPermission(agentId, runId, sessionId, def, toolName, input) {
|
|
@@ -93,6 +94,8 @@ export async function runDefaultAgent(args) {
|
|
|
93
94
|
'harness.session.id': args.sessionId,
|
|
94
95
|
'harness.run.id': args.runId,
|
|
95
96
|
...(args.workflowId ? { 'harness.workflow.id': args.workflowId } : {}),
|
|
97
|
+
...(args.delegationCallId ? { 'harness.agent.delegation_call_id': args.delegationCallId } : {}),
|
|
98
|
+
...(args.delegationDepth !== undefined ? { 'harness.agent.delegation_depth': args.delegationDepth } : {}),
|
|
96
99
|
'harness.agent.id': args.agentId,
|
|
97
100
|
'gen_ai.operation.name': 'invoke_agent',
|
|
98
101
|
'openinference.span.kind': 'AGENT',
|
|
@@ -100,33 +103,24 @@ export async function runDefaultAgent(args) {
|
|
|
100
103
|
'metadata.agent_id': args.agentId,
|
|
101
104
|
[ATTR_GEN_AI_AGENT_NAME]: args.agentId,
|
|
102
105
|
[ATTR_GEN_AI_AGENT_ID]: args.agentId,
|
|
103
|
-
'harness.agent.model': args.agent.model,
|
|
106
|
+
'harness.agent.model': args.modelAlias ?? args.agent.model,
|
|
107
|
+
...(args.modelAlias && args.modelAlias !== args.agent.model ? { 'harness.agent.default_model': args.agent.model } : {}),
|
|
104
108
|
'harness.agent.has_handler': args.agent.handler !== undefined,
|
|
105
109
|
...metadataSpanAttrs(args.metadata)
|
|
106
110
|
};
|
|
107
111
|
const metrics = createMetrics(args.telemetry, agentAttrs);
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
continue;
|
|
116
|
-
if (typeof value === 'string') {
|
|
117
|
-
if (value.length <= 256)
|
|
118
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
119
|
-
continue;
|
|
120
|
-
}
|
|
121
|
-
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
122
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
123
|
-
continue;
|
|
112
|
+
// Spec 08 §9: the harness tracks activated skill names per run when the
|
|
113
|
+
// `read` tool loads `/skills/<name>/SKILL.md`. Only the count is emitted —
|
|
114
|
+
// skill names stay out of telemetry.
|
|
115
|
+
const activatedSkills = new Set();
|
|
116
|
+
return args.telemetry.span(`invoke_agent ${args.agentId}`, agentAttrs, async (span) => {
|
|
117
|
+
try {
|
|
118
|
+
return await runDefaultAgentInner({ ...args, metrics, activatedSkills });
|
|
124
119
|
}
|
|
125
|
-
|
|
126
|
-
|
|
120
|
+
finally {
|
|
121
|
+
span.setAttribute('harness.agent.skills_activated', activatedSkills.size);
|
|
127
122
|
}
|
|
128
|
-
}
|
|
129
|
-
return attrs;
|
|
123
|
+
});
|
|
130
124
|
}
|
|
131
125
|
async function runDefaultAgentInner(args) {
|
|
132
126
|
if (args.signal.aborted)
|
|
@@ -134,12 +128,12 @@ async function runDefaultAgentInner(args) {
|
|
|
134
128
|
const inputSchema = args.agent.input ?? z.string();
|
|
135
129
|
const outputSchema = args.agent.output ?? z.string();
|
|
136
130
|
const parsedInput = parseAgentSchema(inputSchema, args.input, 'agent_input');
|
|
137
|
-
const
|
|
131
|
+
const selectedModelAlias = args.modelAlias ?? args.agent.model;
|
|
132
|
+
const model = args.models[selectedModelAlias];
|
|
138
133
|
if (!model)
|
|
139
|
-
throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model:
|
|
134
|
+
throw new ValidationError('Unknown model alias', { where: 'agent_input', issues: { model: selectedModelAlias } });
|
|
140
135
|
const skillIds = args.agent.skills ?? [];
|
|
141
136
|
await mountSkillsOnce(args.session, args.mountedSkills, args.skills, skillIds);
|
|
142
|
-
const activatedSkills = new Set();
|
|
143
137
|
if (args.agent.handler) {
|
|
144
138
|
const handler = args.agent.handler;
|
|
145
139
|
const output = await withAbortSignal(args.signal, 'run', 'Run was cancelled.', () => handler({
|
|
@@ -150,6 +144,7 @@ async function runDefaultAgentInner(args) {
|
|
|
150
144
|
sessionId: args.sessionId,
|
|
151
145
|
history: { list: async () => args.history },
|
|
152
146
|
memory: args.memory,
|
|
147
|
+
checkpoints: args.checkpoints,
|
|
153
148
|
metadata: args.metadata ?? {},
|
|
154
149
|
metrics: args.metrics
|
|
155
150
|
}));
|
|
@@ -157,7 +152,7 @@ async function runDefaultAgentInner(args) {
|
|
|
157
152
|
return { output: validated, emitted: [{ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() }] };
|
|
158
153
|
}
|
|
159
154
|
const baseInstructions = typeof args.agent.instructions === 'function'
|
|
160
|
-
? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, metadata: args.metadata ?? {}, metrics: args.metrics })
|
|
155
|
+
? args.agent.instructions({ input: parsedInput, runId: args.runId, sessionId: args.sessionId, history: { list: async () => args.history }, memory: args.memory, checkpoints: args.checkpoints, metadata: args.metadata ?? {}, metrics: args.metrics })
|
|
161
156
|
: args.agent.instructions;
|
|
162
157
|
const instructions = `${baseInstructions}${buildSkillIndex(args.skills, skillIds)}`;
|
|
163
158
|
const enabledBuiltins = args.agent.builtinTools === false ? [] : args.agent.builtinTools?.slice() ?? ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
|
|
@@ -193,7 +188,13 @@ async function runDefaultAgentInner(args) {
|
|
|
193
188
|
const emitted = [];
|
|
194
189
|
const maxSteps = Math.min(args.agent.maxSteps ?? args.maxSteps, 64);
|
|
195
190
|
let steps = 0;
|
|
196
|
-
|
|
191
|
+
const agentEventMeta = {
|
|
192
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
193
|
+
...(args.delegationCallId ? { delegationCallId: args.delegationCallId } : {}),
|
|
194
|
+
...(args.delegationDepth !== undefined ? { delegationDepth: args.delegationDepth } : {}),
|
|
195
|
+
modelAlias: selectedModelAlias
|
|
196
|
+
};
|
|
197
|
+
await args.emitEvent?.({ type: 'agent.started', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), ...agentEventMeta });
|
|
197
198
|
try {
|
|
198
199
|
while (true) {
|
|
199
200
|
if (args.signal.aborted)
|
|
@@ -217,12 +218,20 @@ async function runDefaultAgentInner(args) {
|
|
|
217
218
|
// Emit one usage-bearing model event per model round-trip (including
|
|
218
219
|
// tool-call steps) so run-summary modelCalls and tokenTotals are accurate
|
|
219
220
|
// for multi-step runs.
|
|
220
|
-
await args.emitEvent?.({
|
|
221
|
+
await args.emitEvent?.({
|
|
222
|
+
type: 'model.object',
|
|
223
|
+
runId: args.runId,
|
|
224
|
+
agentId: args.agentId,
|
|
225
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
226
|
+
modelAlias: selectedModelAlias,
|
|
227
|
+
object: (response.object ?? null),
|
|
228
|
+
usage: response.usage
|
|
229
|
+
});
|
|
221
230
|
const toolCalls = (response.toolCalls ?? []);
|
|
222
231
|
if (toolCalls.length === 0) {
|
|
223
232
|
const validated = parseAgentSchema(outputSchema, response.object, 'agent_output');
|
|
224
233
|
emitted.push({ id: `msg_${ulid()}_a`, sessionId: args.sessionId, runId: args.runId, role: 'assistant', content: JSON.stringify(validated), timestamp: new Date().toISOString() });
|
|
225
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated });
|
|
234
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), output: validated, ...agentEventMeta });
|
|
226
235
|
return { output: validated, emitted };
|
|
227
236
|
}
|
|
228
237
|
const assistantMsg = {
|
|
@@ -238,8 +247,7 @@ async function runDefaultAgentInner(args) {
|
|
|
238
247
|
});
|
|
239
248
|
const outcomes = await runLimited(toolCalls, args.maxParallelToolCalls, (call) => executeToolCall({
|
|
240
249
|
...args,
|
|
241
|
-
enabledCustomTools
|
|
242
|
-
activatedSkills
|
|
250
|
+
enabledCustomTools
|
|
243
251
|
}, call));
|
|
244
252
|
for (const outcome of outcomes) {
|
|
245
253
|
emitted.push(outcome.emitted);
|
|
@@ -250,11 +258,12 @@ async function runDefaultAgentInner(args) {
|
|
|
250
258
|
}
|
|
251
259
|
catch (error) {
|
|
252
260
|
// Pair every agent.started with an agent.finished, even on error/cancel/budget.
|
|
253
|
-
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error) });
|
|
261
|
+
await args.emitEvent?.({ type: 'agent.finished', runId: args.runId, agentId: args.agentId, at: new Date().toISOString(), error: serializeError(error), ...agentEventMeta });
|
|
254
262
|
throw error;
|
|
255
263
|
}
|
|
256
264
|
}
|
|
257
|
-
|
|
265
|
+
/** Runs `fn` over `items` with bounded concurrency, preserving input order. */
|
|
266
|
+
export async function runLimited(items, limit, fn) {
|
|
258
267
|
const concurrency = Math.max(1, Math.min(limit, items.length));
|
|
259
268
|
const results = new Array(items.length);
|
|
260
269
|
let next = 0;
|
|
@@ -262,10 +271,10 @@ async function runLimited(items, limit, fn) {
|
|
|
262
271
|
while (true) {
|
|
263
272
|
const index = next;
|
|
264
273
|
next += 1;
|
|
265
|
-
|
|
266
|
-
if (
|
|
274
|
+
// Index-based termination: an `undefined` element must not truncate the batch.
|
|
275
|
+
if (index >= items.length)
|
|
267
276
|
return;
|
|
268
|
-
results[index] = await fn(
|
|
277
|
+
results[index] = await fn(items[index]);
|
|
269
278
|
}
|
|
270
279
|
}
|
|
271
280
|
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
@@ -332,9 +341,18 @@ async function executeToolCall(args, call) {
|
|
|
332
341
|
catch (error) {
|
|
333
342
|
const failure = normalizeToolFailure(canonical, error, toolKind);
|
|
334
343
|
if (failure instanceof OperationCancelledError) {
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
344
|
+
const cancellation = args.signal.aborted
|
|
345
|
+
? new OperationCancelledError('Run was cancelled.', { scope: 'run' }, args.signal.reason ?? failure)
|
|
346
|
+
: failure;
|
|
347
|
+
// Pair tool.started with a best-effort tool.finished even on cancellation,
|
|
348
|
+
// matching the deliberate started/finished pairing policy above.
|
|
349
|
+
try {
|
|
350
|
+
await args.emitEvent?.({ type: 'tool.finished', runId: args.runId, agentId: args.agentId, toolId: canonical, callId: call.id, error: serializeError(cancellation) });
|
|
351
|
+
}
|
|
352
|
+
catch {
|
|
353
|
+
// Best-effort: never mask the cancellation with an emit failure.
|
|
354
|
+
}
|
|
355
|
+
throw cancellation;
|
|
338
356
|
}
|
|
339
357
|
result = { error: serializeError(failure) };
|
|
340
358
|
}
|
package/dist/errors/catalog.d.ts
CHANGED
|
@@ -62,7 +62,12 @@ export declare class ModelError extends HarnessError {
|
|
|
62
62
|
model: string;
|
|
63
63
|
method: string;
|
|
64
64
|
status?: number;
|
|
65
|
-
reason?: 'http_error' | 'network' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
65
|
+
reason?: 'http_error' | 'network' | 'rate_limited' | 'provider_unavailable' | 'unstructured_response' | 'malformed_response' | 'context_length_exceeded' | 'embedding_count_mismatch' | 'rerank_result_mismatch';
|
|
66
|
+
retryKind?: 'none' | 'active' | 'deferred';
|
|
67
|
+
retryAfterMs?: number;
|
|
68
|
+
retryAttempt?: number;
|
|
69
|
+
retryMaxAttempts?: number;
|
|
70
|
+
rateLimit?: unknown;
|
|
66
71
|
providerCode?: string;
|
|
67
72
|
providerType?: string;
|
|
68
73
|
providerParam?: string;
|
|
@@ -98,6 +103,7 @@ export declare class ToolNotFoundError extends HarnessError {
|
|
|
98
103
|
export declare class SkillNotFoundError extends HarnessError {
|
|
99
104
|
constructor(message: string, meta: {
|
|
100
105
|
skill_id: string;
|
|
106
|
+
agent_id?: string;
|
|
101
107
|
}, cause?: unknown);
|
|
102
108
|
}
|
|
103
109
|
/** Skill manifest/frontmatter/config validation failure. */
|
|
@@ -124,6 +130,16 @@ export declare class AgentLoopBudgetError extends HarnessError {
|
|
|
124
130
|
limit: number;
|
|
125
131
|
}, cause?: unknown);
|
|
126
132
|
}
|
|
133
|
+
/** Workflow child-agent delegation was denied or exceeded a configured budget. */
|
|
134
|
+
export declare class DelegationPolicyError extends HarnessError {
|
|
135
|
+
constructor(message: string, meta: {
|
|
136
|
+
workflow_id: string;
|
|
137
|
+
agent_id: string;
|
|
138
|
+
reason: 'delegation_disabled' | 'agent_not_allowed' | 'max_child_agent_calls_exceeded' | 'max_parallel_child_agent_calls_exceeded' | 'max_delegation_depth_exceeded' | 'model_alias_not_allowed';
|
|
139
|
+
limit?: number;
|
|
140
|
+
model_alias?: string;
|
|
141
|
+
}, cause?: unknown);
|
|
142
|
+
}
|
|
127
143
|
/** Session attempted to invoke unknown workflow id. */
|
|
128
144
|
export declare class WorkflowNotFoundError extends HarnessError {
|
|
129
145
|
constructor(message: string, meta: {
|
|
@@ -146,7 +162,7 @@ export declare class SessionBusyError extends HarnessError {
|
|
|
146
162
|
/** State backend operation failed. */
|
|
147
163
|
export declare class StateError extends HarnessError {
|
|
148
164
|
constructor(message: string, meta: {
|
|
149
|
-
op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
|
|
165
|
+
op: 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'replaceMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents' | 'contextCheckpointWrite' | 'contextCheckpointRead' | 'contextCheckpointList' | 'contextCheckpointDelete' | 'memory.get' | 'memory.set' | 'memory.delete' | 'memory.list' | 'memory.search';
|
|
150
166
|
reason?: 'duplicate_message_id' | string;
|
|
151
167
|
adapter?: 'memory' | string;
|
|
152
168
|
memory_provider?: string;
|
package/dist/errors/catalog.js
CHANGED
|
@@ -33,7 +33,11 @@ export class SandboxNoExecutorError extends HarnessError {
|
|
|
33
33
|
export class ModelError extends HarnessError {
|
|
34
34
|
constructor(message, meta, cause) {
|
|
35
35
|
const retriable = meta.reason === 'network'
|
|
36
|
+
|| meta.reason === 'rate_limited'
|
|
37
|
+
|| meta.reason === 'provider_unavailable'
|
|
36
38
|
|| meta.status === 429
|
|
39
|
+
|| meta.status === 408
|
|
40
|
+
|| meta.status === 409
|
|
37
41
|
|| (typeof meta.status === 'number' && meta.status >= 500);
|
|
38
42
|
super({ code: 'MODEL_ERROR', category: 'model', retriable, message, meta, cause });
|
|
39
43
|
}
|
|
@@ -87,6 +91,12 @@ export class AgentLoopBudgetError extends HarnessError {
|
|
|
87
91
|
super({ code: 'AGENT_LOOP_BUDGET_EXCEEDED', category: 'validation', retriable: false, message, meta, cause });
|
|
88
92
|
}
|
|
89
93
|
}
|
|
94
|
+
/** Workflow child-agent delegation was denied or exceeded a configured budget. */
|
|
95
|
+
export class DelegationPolicyError extends HarnessError {
|
|
96
|
+
constructor(message, meta, cause) {
|
|
97
|
+
super({ code: 'DELEGATION_POLICY_ERROR', category: 'validation', retriable: false, message, meta, cause });
|
|
98
|
+
}
|
|
99
|
+
}
|
|
90
100
|
/** Session attempted to invoke unknown workflow id. */
|
|
91
101
|
export class WorkflowNotFoundError extends HarnessError {
|
|
92
102
|
constructor(message, meta, cause) {
|
package/dist/eval/index.d.ts
CHANGED
|
@@ -28,7 +28,7 @@ export interface ScorerResult {
|
|
|
28
28
|
passed: boolean;
|
|
29
29
|
evidence?: JsonValue;
|
|
30
30
|
}
|
|
31
|
-
export interface PromptCandidate
|
|
31
|
+
export interface PromptCandidate {
|
|
32
32
|
id: string;
|
|
33
33
|
prompt: string;
|
|
34
34
|
metadata?: Record<string, JsonValue>;
|
|
@@ -47,10 +47,10 @@ export interface CandidateScore {
|
|
|
47
47
|
scorerCount: number;
|
|
48
48
|
}
|
|
49
49
|
export interface EvaluatePromptCandidatesInput<I = unknown> {
|
|
50
|
-
candidates: PromptCandidate
|
|
50
|
+
candidates: PromptCandidate[];
|
|
51
51
|
items: EvaluationItem<I>[];
|
|
52
52
|
scorer: (target: ScorerTarget, signal: AbortSignal) => Promise<ScorerResult>;
|
|
53
|
-
runCandidate: (candidate: PromptCandidate
|
|
53
|
+
runCandidate: (candidate: PromptCandidate, item: EvaluationItem<I>, signal: AbortSignal) => Promise<unknown>;
|
|
54
54
|
signal: AbortSignal;
|
|
55
55
|
}
|
|
56
56
|
export declare function evaluateDeterministicScorer(definition: DeterministicScorerDefinition, target: ScorerTarget): ScorerResult;
|
package/dist/eval/index.js
CHANGED
|
@@ -167,8 +167,22 @@ function matchesType(value, type) {
|
|
|
167
167
|
function isRecord(value) {
|
|
168
168
|
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
169
169
|
}
|
|
170
|
+
/** Structural deep equality, insensitive to object key order. */
|
|
170
171
|
function deepEqual(a, b) {
|
|
171
|
-
|
|
172
|
+
if (Object.is(a, b))
|
|
173
|
+
return true;
|
|
174
|
+
if (Array.isArray(a) || Array.isArray(b)) {
|
|
175
|
+
if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length)
|
|
176
|
+
return false;
|
|
177
|
+
return a.every((entry, index) => deepEqual(entry, b[index]));
|
|
178
|
+
}
|
|
179
|
+
if (isRecord(a) && isRecord(b)) {
|
|
180
|
+
const keysA = Object.keys(a);
|
|
181
|
+
if (keysA.length !== Object.keys(b).length)
|
|
182
|
+
return false;
|
|
183
|
+
return keysA.every((key) => Object.hasOwn(b, key) && deepEqual(a[key], b[key]));
|
|
184
|
+
}
|
|
185
|
+
return false;
|
|
172
186
|
}
|
|
173
187
|
function toJsonValue(value) {
|
|
174
188
|
if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean')
|
|
@@ -6,6 +6,8 @@ import type { Metrics, TelemetryShim } from '../telemetry/index.js';
|
|
|
6
6
|
import type { HarnessAdapterContext } from '../ports/harness-context.js';
|
|
7
7
|
import type { MemoryAdapter, MemoryFacade, SessionMemory } from '../ports/memory.js';
|
|
8
8
|
import type { DurableWorkspaceStore } from '../ports/workspace.js';
|
|
9
|
+
import type { ContextCheckpointStore } from '../ports/context-checkpoints.js';
|
|
10
|
+
import type { ContextCheckpoint, ContextCheckpointQuery } from '../ports/context-checkpoints.js';
|
|
9
11
|
import type { JsonValue } from '../models/json.js';
|
|
10
12
|
import type { Message } from '../models/state.js';
|
|
11
13
|
import type { RunStatus } from '../models/state.js';
|
|
@@ -43,6 +45,32 @@ export interface HarnessDefaults {
|
|
|
43
45
|
* `undefined` keeps all history, `0` keeps only system messages.
|
|
44
46
|
*/
|
|
45
47
|
historyWindow?: number;
|
|
48
|
+
/** Default workflow child-agent delegation budgets. */
|
|
49
|
+
delegation?: DelegationDefaults;
|
|
50
|
+
}
|
|
51
|
+
/** Workflow child-agent delegation defaults. Delegation is disabled unless explicitly enabled. */
|
|
52
|
+
export interface DelegationDefaults {
|
|
53
|
+
/**
|
|
54
|
+
* Enable workflow child-agent calls for workflows that do not declare their
|
|
55
|
+
* own `delegation` policy. Default: `false`.
|
|
56
|
+
*/
|
|
57
|
+
enabled?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Maximum child-agent calls one workflow run may start. Default: `32`.
|
|
60
|
+
* Set per workflow with `workflow.delegation.maxChildAgentCalls`.
|
|
61
|
+
*/
|
|
62
|
+
maxChildAgentCalls?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Maximum child-agent calls active at the same time inside one workflow run.
|
|
65
|
+
* Default: `8`.
|
|
66
|
+
*/
|
|
67
|
+
maxParallelChildAgentCalls?: number;
|
|
68
|
+
/**
|
|
69
|
+
* Maximum local delegation depth. Default: `1`.
|
|
70
|
+
* Current harness workflows invoke leaf agents, so `1` allows normal
|
|
71
|
+
* workflow-to-agent calls and `0` disables child-agent delegation.
|
|
72
|
+
*/
|
|
73
|
+
maxDepth?: number;
|
|
46
74
|
}
|
|
47
75
|
/** Top-level harness options passed to {@link defineHarness}. */
|
|
48
76
|
export interface HarnessOptions {
|
|
@@ -333,21 +361,43 @@ export interface AgentContextMinimal<S extends BuilderState, I> {
|
|
|
333
361
|
runId: string;
|
|
334
362
|
history: ConversationHistory;
|
|
335
363
|
memory: MemoryFacade;
|
|
364
|
+
checkpoints: ContextCheckpoints;
|
|
336
365
|
metadata: Readonly<Record<string, JsonValue>>;
|
|
337
366
|
metrics: Metrics;
|
|
338
367
|
}
|
|
368
|
+
/** Run-bound facade for explicit long-horizon context checkpoints. */
|
|
369
|
+
export interface ContextCheckpoints {
|
|
370
|
+
write(input: {
|
|
371
|
+
sequence: number;
|
|
372
|
+
kind: ContextCheckpoint['kind'];
|
|
373
|
+
payload: JsonValue;
|
|
374
|
+
metadata?: Record<string, JsonValue>;
|
|
375
|
+
}): Promise<void>;
|
|
376
|
+
list(query?: Omit<ContextCheckpointQuery, 'runId' | 'sessionId' | 'workflowId' | 'agentId' | 'signal'>): Promise<readonly ContextCheckpoint[]>;
|
|
377
|
+
read(ref: {
|
|
378
|
+
sequence: number;
|
|
379
|
+
kind: ContextCheckpoint['kind'];
|
|
380
|
+
}): Promise<ContextCheckpoint | undefined>;
|
|
381
|
+
delete(ref: {
|
|
382
|
+
sequence: number;
|
|
383
|
+
kind: ContextCheckpoint['kind'];
|
|
384
|
+
}): Promise<void>;
|
|
385
|
+
}
|
|
339
386
|
/** Full context passed to workflow handlers. */
|
|
340
387
|
export interface WorkflowContext<S extends BuilderState, I, O> {
|
|
341
388
|
input: I;
|
|
342
389
|
agents: {
|
|
343
|
-
[K in keyof NonNullable<S['agents']>]: (input: AgentInput<S, K>, opts?:
|
|
390
|
+
[K in keyof NonNullable<S['agents']>]: (input: AgentInput<S, K>, opts?: WorkflowAgentInvokeOptions<S, K>) => Promise<AgentOutput<S, K>>;
|
|
344
391
|
};
|
|
345
392
|
models: ModelHandles<S>;
|
|
393
|
+
/** Harness logger scoped for workflow handler code (spec 10 `WorkflowContext`). */
|
|
394
|
+
log: Logger;
|
|
346
395
|
signal: AbortSignal;
|
|
347
396
|
runId: string;
|
|
348
397
|
sessionId: string;
|
|
349
398
|
metadata: Readonly<Record<string, JsonValue>>;
|
|
350
399
|
memory: MemoryFacade;
|
|
400
|
+
checkpoints: ContextCheckpoints;
|
|
351
401
|
metrics: Metrics;
|
|
352
402
|
/**
|
|
353
403
|
* Runs `fn` as a durable step. Under a durable invocation the output is
|
|
@@ -357,6 +407,15 @@ export interface WorkflowContext<S extends BuilderState, I, O> {
|
|
|
357
407
|
step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
|
|
358
408
|
output?: O;
|
|
359
409
|
}
|
|
410
|
+
/** Invoke options accepted by workflow-local child-agent calls. */
|
|
411
|
+
export type WorkflowAgentInvokeOptions<S extends BuilderState, K extends keyof NonNullable<S['agents']>> = InvokeOptions & {
|
|
412
|
+
/**
|
|
413
|
+
* Optional model alias override for this child-agent call.
|
|
414
|
+
* The alias must exist on the harness model registry and be allowed by the
|
|
415
|
+
* workflow delegation policy.
|
|
416
|
+
*/
|
|
417
|
+
model?: keyof NonNullable<S['models']> & string;
|
|
418
|
+
};
|
|
360
419
|
/** Full context passed to custom agent handlers. */
|
|
361
420
|
export interface AgentContext<S extends BuilderState, I, O> extends AgentContextMinimal<S, I> {
|
|
362
421
|
models: ModelHandles<S>;
|
|
@@ -381,6 +440,7 @@ export interface AgentDefinition<S extends BuilderState, I extends z.ZodTypeAny
|
|
|
381
440
|
export interface WorkflowDefinition<S extends BuilderState, I extends z.ZodTypeAny = z.ZodTypeAny, O extends z.ZodTypeAny = z.ZodTypeAny> {
|
|
382
441
|
input?: I;
|
|
383
442
|
output?: O;
|
|
443
|
+
delegation?: WorkflowDelegationPolicy<S>;
|
|
384
444
|
handler: (ctx: WorkflowContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
|
|
385
445
|
}
|
|
386
446
|
type AgentSchemaFields = {
|
|
@@ -423,8 +483,26 @@ type WorkflowSchemaFields = {
|
|
|
423
483
|
type WorkflowDefinitionResolved<S extends BuilderState, I extends z.ZodTypeAny, O extends z.ZodTypeAny> = {
|
|
424
484
|
input?: I;
|
|
425
485
|
output?: O;
|
|
486
|
+
delegation?: WorkflowDelegationPolicy<S>;
|
|
426
487
|
handler: (ctx: WorkflowContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
|
|
427
488
|
};
|
|
489
|
+
/** Policy for workflow-local child-agent delegation through `ctx.agents`. */
|
|
490
|
+
export interface WorkflowDelegationPolicy<S extends BuilderState = BuilderState> {
|
|
491
|
+
/** Enable or disable child-agent calls for this workflow. A policy object without this field enables delegation. */
|
|
492
|
+
enabled?: boolean;
|
|
493
|
+
/** Child agent ids this workflow may call. Omit to allow all registered agents. */
|
|
494
|
+
agents?: readonly (keyof NonNullable<S['agents']> & string)[];
|
|
495
|
+
/** Per-run child-agent call limit. Overrides `defaults.delegation.maxChildAgentCalls`. */
|
|
496
|
+
maxChildAgentCalls?: number;
|
|
497
|
+
/** Per-run active child-agent call limit. Overrides `defaults.delegation.maxParallelChildAgentCalls`. */
|
|
498
|
+
maxParallelChildAgentCalls?: number;
|
|
499
|
+
/** Maximum local delegation depth. Overrides `defaults.delegation.maxDepth`. */
|
|
500
|
+
maxDepth?: number;
|
|
501
|
+
/** Model aliases allowed for every child-agent call in this workflow, including calls running on the agent's default `model`. */
|
|
502
|
+
modelAliases?: readonly (keyof NonNullable<S['models']> & string)[];
|
|
503
|
+
/** Per-child-agent model alias allowlists. These replace `modelAliases` for the named agent. */
|
|
504
|
+
agentModelAliases?: Partial<Record<keyof NonNullable<S['agents']> & string, readonly (keyof NonNullable<S['models']> & string)[]>>;
|
|
505
|
+
}
|
|
428
506
|
type WorkflowDefinitionFor<S extends BuilderState, D> = D extends {
|
|
429
507
|
input: infer I extends z.ZodTypeAny;
|
|
430
508
|
output: infer O extends z.ZodTypeAny;
|
|
@@ -576,11 +654,21 @@ export type RunEvent = {
|
|
|
576
654
|
runId: string;
|
|
577
655
|
agentId: string;
|
|
578
656
|
at: string;
|
|
657
|
+
workflowId?: string;
|
|
658
|
+
parentAgentId?: string;
|
|
659
|
+
delegationCallId?: string;
|
|
660
|
+
delegationDepth?: number;
|
|
661
|
+
modelAlias?: string;
|
|
579
662
|
} | {
|
|
580
663
|
type: 'agent.finished';
|
|
581
664
|
runId: string;
|
|
582
665
|
agentId: string;
|
|
583
666
|
at: string;
|
|
667
|
+
workflowId?: string;
|
|
668
|
+
parentAgentId?: string;
|
|
669
|
+
delegationCallId?: string;
|
|
670
|
+
delegationDepth?: number;
|
|
671
|
+
modelAlias?: string;
|
|
584
672
|
output?: JsonValue;
|
|
585
673
|
error?: SerializedError;
|
|
586
674
|
} | {
|
|
@@ -657,6 +745,7 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
|
|
|
657
745
|
memory(adapter: MemoryAdapter): HarnessBuilder<S>;
|
|
658
746
|
runtime(runtime: DurableRuntimeAdapter): HarnessBuilder<S>;
|
|
659
747
|
workspaceStore(store: DurableWorkspaceStore): HarnessBuilder<S>;
|
|
748
|
+
checkpoints(store: ContextCheckpointStore): HarnessBuilder<S>;
|
|
660
749
|
requires(capabilities: readonly AdapterCapability[]): HarnessBuilder<S>;
|
|
661
750
|
defaults(defaults: HarnessDefaults): HarnessBuilder<S>;
|
|
662
751
|
models<const M extends ModelsConfig>(models: M): HarnessBuilder<S & {
|
|
@@ -745,6 +834,7 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
|
|
|
745
834
|
* summarize_ticket: {
|
|
746
835
|
* input: z.object({ ticket: z.string() }),
|
|
747
836
|
* output: z.string(),
|
|
837
|
+
* delegation: { agents: ['summarize'] },
|
|
748
838
|
* handler: (ctx) => ctx.agents.summarize(ctx.input.ticket)
|
|
749
839
|
* }
|
|
750
840
|
* })
|