@botbotgo/agent-harness 0.0.124 → 0.0.125
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/upstream-runtime-ab-benchmark.d.ts +32 -2
- package/dist/benchmark/upstream-runtime-ab-benchmark.js +58 -2
- package/dist/contracts/runtime.d.ts +32 -0
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/agent-runtime-adapter.d.ts +3 -3
- package/dist/runtime/agent-runtime-adapter.js +31 -7
- package/dist/runtime/harness/run/helpers.d.ts +1 -1
- package/dist/runtime/harness/run/helpers.js +1 -1
- package/dist/runtime/harness/run/stream-run.js +1 -8
- package/dist/runtime/harness/run/thread-records.d.ts +1 -1
- package/dist/runtime/harness/run/thread-records.js +10 -7
- package/dist/runtime/harness.js +2 -2
- package/package.json +1 -1
|
@@ -1,15 +1,23 @@
|
|
|
1
|
-
export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "raw-langchain-v1", "raw-deepagent"];
|
|
1
|
+
export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "harness-minimal-upstream", "raw-langchain-v1", "raw-deepagent"];
|
|
2
2
|
export declare const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD: "tool";
|
|
3
|
+
export declare const DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS: readonly ["normal", "complex", "extreme"];
|
|
3
4
|
export type UpstreamBenchmarkPath = (typeof DEFAULT_UPSTREAM_BENCHMARK_PATHS)[number];
|
|
4
5
|
export type UpstreamBenchmarkWorkload = "tool" | "no-tool";
|
|
6
|
+
export type UpstreamBenchmarkScenario = (typeof DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS)[number];
|
|
5
7
|
export type UpstreamBenchmarkRunSummary = {
|
|
6
8
|
providerLabel: string;
|
|
7
9
|
model: string;
|
|
10
|
+
scenario: UpstreamBenchmarkScenario;
|
|
8
11
|
path: UpstreamBenchmarkPath;
|
|
9
12
|
runNumber: number;
|
|
10
13
|
status: "completed" | "failed";
|
|
11
14
|
totalMs: number;
|
|
12
15
|
firstTokenMs: number | null;
|
|
16
|
+
setupMs: number | null;
|
|
17
|
+
firstToolMs: number | null;
|
|
18
|
+
lastToolMs: number | null;
|
|
19
|
+
finalOutputMs: number | null;
|
|
20
|
+
cleanupMs: number | null;
|
|
13
21
|
outputLength: number;
|
|
14
22
|
normalizedOutputLength: number;
|
|
15
23
|
toolCallCount: number;
|
|
@@ -19,6 +27,7 @@ export type UpstreamBenchmarkRunSummary = {
|
|
|
19
27
|
export type UpstreamBenchmarkAggregateSummary = {
|
|
20
28
|
providerLabel: string;
|
|
21
29
|
model: string;
|
|
30
|
+
scenario: UpstreamBenchmarkScenario;
|
|
22
31
|
path: UpstreamBenchmarkPath;
|
|
23
32
|
repetitions: number;
|
|
24
33
|
successCount: number;
|
|
@@ -31,6 +40,11 @@ export type UpstreamBenchmarkAggregateSummary = {
|
|
|
31
40
|
trimmedAvgFirstTokenMs: number | null;
|
|
32
41
|
medianFirstTokenMs: number | null;
|
|
33
42
|
p95FirstTokenMs: number | null;
|
|
43
|
+
avgSetupMs: number | null;
|
|
44
|
+
avgFirstToolMs: number | null;
|
|
45
|
+
avgLastToolMs: number | null;
|
|
46
|
+
avgFinalOutputMs: number | null;
|
|
47
|
+
avgCleanupMs: number | null;
|
|
34
48
|
avgOutputLength: number | null;
|
|
35
49
|
avgNormalizedOutputLength: number | null;
|
|
36
50
|
avgToolCallCount: number | null;
|
|
@@ -44,8 +58,24 @@ export type UpstreamBenchmarkComparison = {
|
|
|
44
58
|
avgFirstTokenMsDelta: number | null;
|
|
45
59
|
avgFirstTokenMsOverheadPct: number | null;
|
|
46
60
|
};
|
|
61
|
+
export type UpstreamBenchmarkPhaseCheckpoint = {
|
|
62
|
+
label: string;
|
|
63
|
+
atMs: number | null;
|
|
64
|
+
};
|
|
65
|
+
export type UpstreamBenchmarkTemperature = "cold" | "warm";
|
|
66
|
+
export type UpstreamBenchmarkDurationSummary = {
|
|
67
|
+
count: number;
|
|
68
|
+
totalMs: number;
|
|
69
|
+
avgMs: number | null;
|
|
70
|
+
maxMs: number | null;
|
|
71
|
+
};
|
|
47
72
|
export declare function resolveUpstreamBenchmarkPaths(rawValue?: string): readonly UpstreamBenchmarkPath[];
|
|
48
73
|
export declare function resolveUpstreamBenchmarkWorkload(rawValue?: string): UpstreamBenchmarkWorkload;
|
|
74
|
+
export declare function resolveUpstreamBenchmarkScenarios(rawValue?: string): readonly UpstreamBenchmarkScenario[];
|
|
49
75
|
export declare function extractLastMatchingToken(output: string, prefixes: readonly string[]): string;
|
|
50
|
-
export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
|
|
76
|
+
export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, scenario: UpstreamBenchmarkScenario, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
|
|
77
|
+
export declare function withUpstreamBenchmarkCleanup(summary: UpstreamBenchmarkRunSummary, cleanupMs: number | null): UpstreamBenchmarkRunSummary;
|
|
78
|
+
export declare function summarizeUpstreamBenchmarkPhases(checkpoints: readonly UpstreamBenchmarkPhaseCheckpoint[]): Record<string, number | null>;
|
|
79
|
+
export declare function selectUpstreamBenchmarkRunsByTemperature(runs: readonly UpstreamBenchmarkRunSummary[], temperature: UpstreamBenchmarkTemperature): UpstreamBenchmarkRunSummary[];
|
|
80
|
+
export declare function summarizeUpstreamBenchmarkDurations(values: readonly number[]): UpstreamBenchmarkDurationSummary;
|
|
51
81
|
export declare function compareUpstreamBenchmarkPaths(baseline: UpstreamBenchmarkAggregateSummary, candidate: UpstreamBenchmarkAggregateSummary): UpstreamBenchmarkComparison;
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
export const DEFAULT_UPSTREAM_BENCHMARK_PATHS = Object.freeze([
|
|
2
2
|
"harness",
|
|
3
|
+
"harness-minimal-upstream",
|
|
3
4
|
"raw-langchain-v1",
|
|
4
5
|
"raw-deepagent",
|
|
5
6
|
]);
|
|
6
7
|
export const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD = "tool";
|
|
8
|
+
export const DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS = Object.freeze([
|
|
9
|
+
"normal",
|
|
10
|
+
"complex",
|
|
11
|
+
"extreme",
|
|
12
|
+
]);
|
|
7
13
|
function average(values) {
|
|
8
14
|
return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
|
|
9
15
|
}
|
|
@@ -47,12 +53,25 @@ export function resolveUpstreamBenchmarkPaths(rawValue) {
|
|
|
47
53
|
const parsed = rawValue
|
|
48
54
|
.split(",")
|
|
49
55
|
.map((value) => value.trim().toLowerCase())
|
|
50
|
-
.filter((value) => value === "harness" ||
|
|
56
|
+
.filter((value) => value === "harness" ||
|
|
57
|
+
value === "harness-minimal-upstream" ||
|
|
58
|
+
value === "raw-langchain-v1" ||
|
|
59
|
+
value === "raw-deepagent");
|
|
51
60
|
return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
|
|
52
61
|
}
|
|
53
62
|
export function resolveUpstreamBenchmarkWorkload(rawValue) {
|
|
54
63
|
return rawValue?.trim().toLowerCase() === "no-tool" ? "no-tool" : DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD;
|
|
55
64
|
}
|
|
65
|
+
export function resolveUpstreamBenchmarkScenarios(rawValue) {
|
|
66
|
+
if (!rawValue) {
|
|
67
|
+
return [...DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS];
|
|
68
|
+
}
|
|
69
|
+
const parsed = rawValue
|
|
70
|
+
.split(",")
|
|
71
|
+
.map((value) => value.trim().toLowerCase())
|
|
72
|
+
.filter((value) => value === "normal" || value === "complex" || value === "extreme");
|
|
73
|
+
return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_SCENARIOS];
|
|
74
|
+
}
|
|
56
75
|
export function extractLastMatchingToken(output, prefixes) {
|
|
57
76
|
const normalized = output.replace(/\s+/g, " ").trim();
|
|
58
77
|
let matched = "";
|
|
@@ -69,7 +88,7 @@ export function extractLastMatchingToken(output, prefixes) {
|
|
|
69
88
|
}
|
|
70
89
|
return matched || normalized;
|
|
71
90
|
}
|
|
72
|
-
export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs) {
|
|
91
|
+
export function aggregateUpstreamBenchmarkRuns(providerLabel, model, scenario, path, runs) {
|
|
73
92
|
const successfulRuns = runs.filter((run) => run.status === "completed");
|
|
74
93
|
const totalValues = successfulRuns.map((run) => run.totalMs);
|
|
75
94
|
const firstTokenValues = successfulRuns
|
|
@@ -78,6 +97,7 @@ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs)
|
|
|
78
97
|
return {
|
|
79
98
|
providerLabel,
|
|
80
99
|
model,
|
|
100
|
+
scenario,
|
|
81
101
|
path,
|
|
82
102
|
repetitions: runs.length,
|
|
83
103
|
successCount: successfulRuns.length,
|
|
@@ -90,12 +110,48 @@ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs)
|
|
|
90
110
|
trimmedAvgFirstTokenMs: trimmedAverageOrNull(firstTokenValues, 0.1),
|
|
91
111
|
medianFirstTokenMs: medianOrNull(firstTokenValues),
|
|
92
112
|
p95FirstTokenMs: percentileOrNull(firstTokenValues, 0.95),
|
|
113
|
+
avgSetupMs: averageOrNull(successfulRuns.map((run) => run.setupMs).filter((value) => value !== null)),
|
|
114
|
+
avgFirstToolMs: averageOrNull(successfulRuns.map((run) => run.firstToolMs).filter((value) => value !== null)),
|
|
115
|
+
avgLastToolMs: averageOrNull(successfulRuns.map((run) => run.lastToolMs).filter((value) => value !== null)),
|
|
116
|
+
avgFinalOutputMs: averageOrNull(successfulRuns.map((run) => run.finalOutputMs).filter((value) => value !== null)),
|
|
117
|
+
avgCleanupMs: averageOrNull(successfulRuns.map((run) => run.cleanupMs).filter((value) => value !== null)),
|
|
93
118
|
avgOutputLength: averageOrNull(successfulRuns.map((run) => run.outputLength)),
|
|
94
119
|
avgNormalizedOutputLength: averageOrNull(successfulRuns.map((run) => run.normalizedOutputLength)),
|
|
95
120
|
avgToolCallCount: averageOrNull(successfulRuns.map((run) => run.toolCallCount)),
|
|
96
121
|
exactOutputMatchCount: successfulRuns.filter((run) => run.exactOutputMatch).length,
|
|
97
122
|
};
|
|
98
123
|
}
|
|
124
|
+
export function withUpstreamBenchmarkCleanup(summary, cleanupMs) {
|
|
125
|
+
return {
|
|
126
|
+
...summary,
|
|
127
|
+
cleanupMs,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
export function summarizeUpstreamBenchmarkPhases(checkpoints) {
|
|
131
|
+
let previousAtMs = 0;
|
|
132
|
+
const durations = {};
|
|
133
|
+
for (const checkpoint of checkpoints) {
|
|
134
|
+
const key = `${checkpoint.label}Ms`;
|
|
135
|
+
if (checkpoint.atMs === null) {
|
|
136
|
+
durations[key] = null;
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
durations[key] = Number((checkpoint.atMs - previousAtMs).toFixed(2));
|
|
140
|
+
previousAtMs = checkpoint.atMs;
|
|
141
|
+
}
|
|
142
|
+
return durations;
|
|
143
|
+
}
|
|
144
|
+
export function selectUpstreamBenchmarkRunsByTemperature(runs, temperature) {
|
|
145
|
+
return runs.filter((run) => (temperature === "cold" ? run.runNumber === 1 : run.runNumber > 1));
|
|
146
|
+
}
|
|
147
|
+
export function summarizeUpstreamBenchmarkDurations(values) {
|
|
148
|
+
return {
|
|
149
|
+
count: values.length,
|
|
150
|
+
totalMs: Number(values.reduce((sum, value) => sum + value, 0).toFixed(2)),
|
|
151
|
+
avgMs: values.length > 0 ? average(values) : null,
|
|
152
|
+
maxMs: values.length > 0 ? Number(Math.max(...values).toFixed(2)) : null,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
99
155
|
function computeOverhead(candidate, baseline) {
|
|
100
156
|
if (candidate === null || baseline === null) {
|
|
101
157
|
return { delta: null, pct: null };
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import type { RunState } from "./core.js";
|
|
2
2
|
import type { CompiledAgentBinding, CompiledModel, CompiledTool, ParsedAgentObject, ParsedToolObject, WorkspaceBundle } from "./workspace.js";
|
|
3
|
+
/**
|
|
4
|
+
* Persisted runtime summary for an inspectable conversation thread.
|
|
5
|
+
* This projects upstream session/thread execution state into a stable runtime
|
|
6
|
+
* inspection surface.
|
|
7
|
+
*/
|
|
3
8
|
export type ThreadSummary = {
|
|
4
9
|
agentId: string;
|
|
5
10
|
threadId: string;
|
|
@@ -11,6 +16,11 @@ export type ThreadSummary = {
|
|
|
11
16
|
export type SessionRecord = ThreadSummary;
|
|
12
17
|
export type KnownHarnessEventType = "run.created" | "run.queued" | "run.dequeued" | "run.state.changed" | "run.resumed" | "approval.requested" | "approval.resolved" | "artifact.created" | "output.delta" | "runtime.health.changed" | "runtime.synthetic_fallback";
|
|
13
18
|
export type HarnessEventType = KnownHarnessEventType | (string & {});
|
|
19
|
+
/**
|
|
20
|
+
* Persisted runtime event recorded by the harness runtime.
|
|
21
|
+
* Event payload semantics should stay aligned with upstream/runtime behavior
|
|
22
|
+
* rather than introducing a second execution protocol.
|
|
23
|
+
*/
|
|
14
24
|
export type HarnessEvent = {
|
|
15
25
|
eventId: string;
|
|
16
26
|
eventType: HarnessEventType;
|
|
@@ -45,6 +55,10 @@ export type RuntimeHealthSymptom = {
|
|
|
45
55
|
firstSeenAt: string;
|
|
46
56
|
lastSeenAt: string;
|
|
47
57
|
};
|
|
58
|
+
/**
|
|
59
|
+
* Harness-operated operational state built from persisted records and runtime telemetry.
|
|
60
|
+
* This is runtime ops state, not an upstream execution semantic.
|
|
61
|
+
*/
|
|
48
62
|
export type RuntimeHealthSnapshot = {
|
|
49
63
|
status: HealthStatus;
|
|
50
64
|
updatedAt: string;
|
|
@@ -153,8 +167,15 @@ export type ThreadRunRecord = {
|
|
|
153
167
|
checkpointRef: string | null;
|
|
154
168
|
resumable: boolean;
|
|
155
169
|
};
|
|
170
|
+
/**
|
|
171
|
+
* Persisted run summary projected from upstream execution state plus runtime lifecycle metadata.
|
|
172
|
+
*/
|
|
156
173
|
export type RunSummary = ThreadRunRecord;
|
|
157
174
|
export type RunRecord = RunSummary;
|
|
175
|
+
/**
|
|
176
|
+
* Persisted thread inspection record assembled from runtime records.
|
|
177
|
+
* This is an inspectable projection, not a second thread semantic model.
|
|
178
|
+
*/
|
|
158
179
|
export type ThreadRecord = {
|
|
159
180
|
threadId: string;
|
|
160
181
|
entryAgentId: string;
|
|
@@ -188,6 +209,11 @@ export type RestartConversationOptions = {
|
|
|
188
209
|
mode: "restart-in-thread" | "restart-new-thread";
|
|
189
210
|
input: string;
|
|
190
211
|
};
|
|
212
|
+
/**
|
|
213
|
+
* Persisted approval inspection record.
|
|
214
|
+
* Approval decision semantics should stay aligned with upstream interrupt/approval
|
|
215
|
+
* behavior even though the record is stored and resolved through harness persistence.
|
|
216
|
+
*/
|
|
191
217
|
export type ApprovalRecord = {
|
|
192
218
|
approvalId: string;
|
|
193
219
|
pendingActionId: string;
|
|
@@ -240,6 +266,12 @@ export type RuntimeAdapterOptions = {
|
|
|
240
266
|
checkpointerResolver?: RuntimeCheckpointerResolver;
|
|
241
267
|
storeResolver?: RuntimeStoreResolver;
|
|
242
268
|
backendResolver?: RuntimeBackendResolver;
|
|
269
|
+
/**
|
|
270
|
+
* DeepAgent execution semantics stay upstream-owned.
|
|
271
|
+
* `minimal` keeps harness runtime persistence/ops active while only attaching
|
|
272
|
+
* upstream substrate objects when the binding explicitly needs them.
|
|
273
|
+
*/
|
|
274
|
+
deepAgentUpstreamSubstrateMode?: "full" | "minimal";
|
|
243
275
|
};
|
|
244
276
|
export type ToolKindAdapter = {
|
|
245
277
|
type: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.124";
|
package/dist/package-version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.124";
|
|
@@ -32,9 +32,9 @@ export declare function buildDeepAgentCreateParams(input: {
|
|
|
32
32
|
resolvedTools: unknown[];
|
|
33
33
|
resolvedMiddleware: unknown[];
|
|
34
34
|
resolvedSubagents: UpstreamSubagentConfig[];
|
|
35
|
-
resolvedCheckpointer
|
|
36
|
-
resolvedStore
|
|
37
|
-
resolvedBackend
|
|
35
|
+
resolvedCheckpointer?: unknown;
|
|
36
|
+
resolvedStore?: unknown;
|
|
37
|
+
resolvedBackend?: unknown;
|
|
38
38
|
resolvedInterruptOn?: Record<string, {
|
|
39
39
|
allowedDecisions: Array<"approve" | "edit" | "reject">;
|
|
40
40
|
}>;
|
|
@@ -18,7 +18,7 @@ export { applyDeepAgentDelegationPromptCompatibility, materializeDeepAgentSkillS
|
|
|
18
18
|
export { buildAuthOmittingFetch, normalizeOpenAICompatibleInit } from "./adapter/compat/openai-compatible.js";
|
|
19
19
|
export { buildToolNameMapping, createModelFacingToolNameCandidates, createModelFacingToolNameLookupCandidates, resolveModelFacingToolName, sanitizeToolNameForModel, } from "./adapter/tool/tool-name-mapping.js";
|
|
20
20
|
export { computeRemainingTimeoutMs, isRetryableProviderError, resolveBindingTimeout, resolveProviderRetryPolicy, resolveStreamIdleTimeout, resolveTimeoutMs, } from "./adapter/resilience.js";
|
|
21
|
-
import { getBindingAdapterKind, getBindingExecutionKind, getBindingExecutionParams, getBindingFilesystemConfig, getBindingInterruptCompatibilityRules, getBindingPrimaryModel, getBindingSkills, getBindingSubagents, getBindingToolCount, getBindingPrimaryTools, getBindingSystemPrompt, isDeepAgentBinding, isLangChainBinding, } from "./support/compiled-binding.js";
|
|
21
|
+
import { getBindingBackendConfig, getBindingAdapterKind, getBindingExecutionKind, getBindingExecutionParams, getBindingFilesystemConfig, getBindingInterruptCompatibilityRules, getBindingMemorySources, getBindingMiddlewareConfigs, getBindingPrimaryModel, getBindingSkills, getBindingStoreConfig, getBindingSubagents, getBindingToolCount, getBindingPrimaryTools, getBindingSystemPrompt, isDeepAgentBinding, isLangChainBinding, } from "./support/compiled-binding.js";
|
|
22
22
|
const AGENT_INTERRUPT_SENTINEL_PREFIX = "__agent_harness_interrupt__:";
|
|
23
23
|
const UPSTREAM_BUILTIN_MIDDLEWARE_TOOL_NAMES = Object.freeze([
|
|
24
24
|
"write_todos",
|
|
@@ -116,13 +116,28 @@ export function buildDeepAgentCreateParams(input) {
|
|
|
116
116
|
model: input.resolvedModel,
|
|
117
117
|
tools: input.resolvedTools,
|
|
118
118
|
middleware: input.resolvedMiddleware,
|
|
119
|
-
checkpointer: input.resolvedCheckpointer,
|
|
120
|
-
store: input.resolvedStore,
|
|
121
119
|
subagents: input.resolvedSubagents,
|
|
122
|
-
backend: input.resolvedBackend,
|
|
123
120
|
interruptOn: input.resolvedInterruptOn,
|
|
121
|
+
...(input.resolvedCheckpointer !== undefined ? { checkpointer: input.resolvedCheckpointer } : {}),
|
|
122
|
+
...(input.resolvedStore !== undefined ? { store: input.resolvedStore } : {}),
|
|
123
|
+
...(input.resolvedBackend !== undefined ? { backend: input.resolvedBackend } : {}),
|
|
124
124
|
};
|
|
125
125
|
}
|
|
126
|
+
function shouldAttachMinimalDeepAgentCheckpointer(binding, resolvedInterruptOn) {
|
|
127
|
+
if (binding.harnessRuntime.checkpointer !== undefined) {
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
return resolvedInterruptOn !== undefined && Object.keys(resolvedInterruptOn).length > 0;
|
|
131
|
+
}
|
|
132
|
+
function shouldAttachMinimalDeepAgentStore(binding) {
|
|
133
|
+
return getBindingStoreConfig(binding) !== undefined || getBindingMemorySources(binding).length > 0;
|
|
134
|
+
}
|
|
135
|
+
function shouldAttachMinimalDeepAgentBackend(binding) {
|
|
136
|
+
return (getBindingBackendConfig(binding) !== undefined ||
|
|
137
|
+
getBindingMemorySources(binding).length > 0 ||
|
|
138
|
+
getBindingSkills(binding).length > 0 ||
|
|
139
|
+
(getBindingMiddlewareConfigs(binding)?.length ?? 0) > 0);
|
|
140
|
+
}
|
|
126
141
|
export class AgentRuntimeAdapter {
|
|
127
142
|
options;
|
|
128
143
|
modelCache = new Map();
|
|
@@ -351,10 +366,19 @@ export class AgentRuntimeAdapter {
|
|
|
351
366
|
const resolvedTools = this.resolveTools(primaryTools, binding);
|
|
352
367
|
const resolvedMiddleware = await this.resolveMiddleware(binding);
|
|
353
368
|
const resolvedSubagents = await this.resolveSubagents(getBindingSubagents(binding), binding);
|
|
354
|
-
const resolvedCheckpointer = resolveRunnableCheckpointer(this.options, binding);
|
|
355
|
-
const resolvedStore = this.options.storeResolver?.(binding);
|
|
356
|
-
const resolvedBackend = this.options.backendResolver?.(binding);
|
|
357
369
|
const resolvedInterruptOn = resolveRunnableInterruptOn(binding);
|
|
370
|
+
const substrateMode = this.options.deepAgentUpstreamSubstrateMode ?? "minimal";
|
|
371
|
+
const resolvedCheckpointer = substrateMode === "minimal"
|
|
372
|
+
? (shouldAttachMinimalDeepAgentCheckpointer(binding, resolvedInterruptOn)
|
|
373
|
+
? resolveRunnableCheckpointer(this.options, binding)
|
|
374
|
+
: undefined)
|
|
375
|
+
: resolveRunnableCheckpointer(this.options, binding);
|
|
376
|
+
const resolvedStore = substrateMode === "minimal"
|
|
377
|
+
? (shouldAttachMinimalDeepAgentStore(binding) ? this.options.storeResolver?.(binding) : undefined)
|
|
378
|
+
: this.options.storeResolver?.(binding);
|
|
379
|
+
const resolvedBackend = substrateMode === "minimal"
|
|
380
|
+
? (shouldAttachMinimalDeepAgentBackend(binding) ? this.options.backendResolver?.(binding) : undefined)
|
|
381
|
+
: this.options.backendResolver?.(binding);
|
|
358
382
|
const resolvedSkills = resolveDeepAgentSkillSourcePaths({
|
|
359
383
|
workspaceRoot: binding.harnessRuntime.workspaceRoot,
|
|
360
384
|
runRoot: binding.harnessRuntime.runRoot,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ApprovalRecord, HarnessEvent, InternalApprovalRecord, InvocationEnvelope, RunListeners, RunOptions, RunResult, RunStartOptions, MessageContent } from "../../../contracts/types.js";
|
|
2
|
-
export declare function
|
|
2
|
+
export declare function toInspectableApprovalRecord(approval: InternalApprovalRecord): ApprovalRecord;
|
|
3
3
|
export declare function normalizeInvocationEnvelope(options: RunStartOptions): {
|
|
4
4
|
context?: Record<string, unknown>;
|
|
5
5
|
state?: Record<string, unknown>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { normalizeMessageContent } from "../../../utils/message-content.js";
|
|
2
|
-
export function
|
|
2
|
+
export function toInspectableApprovalRecord(approval) {
|
|
3
3
|
const { toolCallId: _toolCallId, checkpointRef: _checkpointRef, eventRefs: _eventRefs, ...publicApproval } = approval;
|
|
4
4
|
return publicApproval;
|
|
5
5
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AGENT_INTERRUPT_SENTINEL_PREFIX, RuntimeOperationTimeoutError } from "../../agent-runtime-adapter.js";
|
|
2
2
|
import { renderRuntimeFailure, renderToolFailure } from "../../support/harness-support.js";
|
|
3
|
-
import { createContentBlocksItem, createToolResultKey,
|
|
3
|
+
import { createContentBlocksItem, createToolResultKey, } from "../events/streaming.js";
|
|
4
4
|
function normalizeStreamChunk(chunk) {
|
|
5
5
|
if (typeof chunk === "string") {
|
|
6
6
|
if (chunk.startsWith(AGENT_INTERRUPT_SENTINEL_PREFIX)) {
|
|
@@ -33,7 +33,6 @@ export async function* streamHarnessRun(options) {
|
|
|
33
33
|
let releaseRunSlot = async () => undefined;
|
|
34
34
|
let emitted = false;
|
|
35
35
|
let streamActivityObserved = false;
|
|
36
|
-
const emitOutputDelta = (content) => emitOutputDeltaAndCreateItem(options.emit, options.threadId, options.runId, options.selectedAgentId, content);
|
|
37
36
|
try {
|
|
38
37
|
const [priorHistory, acquiredReleaseRunSlot] = await Promise.all([
|
|
39
38
|
priorHistoryPromise,
|
|
@@ -114,12 +113,10 @@ export async function* streamHarnessRun(options) {
|
|
|
114
113
|
}
|
|
115
114
|
emitted = true;
|
|
116
115
|
assistantOutput += normalizedChunk.content;
|
|
117
|
-
yield await emitOutputDelta(normalizedChunk.content);
|
|
118
116
|
}
|
|
119
117
|
if (!assistantOutput && toolErrors.length > 0) {
|
|
120
118
|
assistantOutput = toolErrors.join("\n\n");
|
|
121
119
|
emitted = true;
|
|
122
|
-
yield await emitOutputDelta(assistantOutput);
|
|
123
120
|
}
|
|
124
121
|
if (!assistantOutput) {
|
|
125
122
|
const actual = await options.invokeWithHistory(options.binding, options.input, options.threadId, options.runId);
|
|
@@ -129,7 +126,6 @@ export async function* streamHarnessRun(options) {
|
|
|
129
126
|
if (actual.output) {
|
|
130
127
|
assistantOutput = actual.output;
|
|
131
128
|
emitted = true;
|
|
132
|
-
yield await emitOutputDelta(actual.output);
|
|
133
129
|
}
|
|
134
130
|
}
|
|
135
131
|
await options.appendAssistantMessage(options.threadId, options.runId, assistantOutput);
|
|
@@ -216,9 +212,6 @@ export async function* streamHarnessRun(options) {
|
|
|
216
212
|
if (Array.isArray(actual.contentBlocks) && actual.contentBlocks.length > 0) {
|
|
217
213
|
yield createContentBlocksItem(options.threadId, options.runId, options.selectedAgentId, actual.contentBlocks);
|
|
218
214
|
}
|
|
219
|
-
if (actual.output) {
|
|
220
|
-
yield await emitOutputDelta(actual.output);
|
|
221
|
-
}
|
|
222
215
|
yield {
|
|
223
216
|
type: "result",
|
|
224
217
|
result: {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ApprovalRecord, ThreadRecord, ThreadSummary } from "../../../contracts/types.js";
|
|
2
2
|
import type { RuntimePersistence } from "../../../persistence/types.js";
|
|
3
|
-
export declare function
|
|
3
|
+
export declare function buildThreadInspectionRecord(input: {
|
|
4
4
|
persistence: RuntimePersistence;
|
|
5
5
|
getSession: (threadId: string) => Promise<ThreadSummary | null>;
|
|
6
6
|
}, threadId: string): Promise<ThreadRecord | null>;
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
import { isTerminalRunState,
|
|
2
|
-
|
|
1
|
+
import { isTerminalRunState, toInspectableApprovalRecord } from "./helpers.js";
|
|
2
|
+
function selectLatestPendingApproval(approvals) {
|
|
3
|
+
return approvals
|
|
4
|
+
.filter((approval) => approval.status === "pending")
|
|
5
|
+
.sort((left, right) => right.requestedAt.localeCompare(left.requestedAt))[0];
|
|
6
|
+
}
|
|
7
|
+
export async function buildThreadInspectionRecord(input, threadId) {
|
|
3
8
|
const [threadSummary, meta, messages, runs] = await Promise.all([
|
|
4
9
|
input.getSession(threadId),
|
|
5
10
|
input.persistence.getThreadMeta(threadId),
|
|
@@ -11,9 +16,7 @@ export async function getThreadRecord(input, threadId) {
|
|
|
11
16
|
}
|
|
12
17
|
const latestRunId = threadSummary.latestRunId;
|
|
13
18
|
const latestApprovals = await input.persistence.getRunApprovals(threadId, latestRunId);
|
|
14
|
-
const pendingApproval = latestApprovals
|
|
15
|
-
.filter((approval) => approval.status === "pending")
|
|
16
|
-
.sort((left, right) => right.requestedAt.localeCompare(left.requestedAt))[0];
|
|
19
|
+
const pendingApproval = selectLatestPendingApproval(latestApprovals);
|
|
17
20
|
return {
|
|
18
21
|
threadId,
|
|
19
22
|
entryAgentId: meta.entryAgentId,
|
|
@@ -36,11 +39,11 @@ export async function getThreadRecord(input, threadId) {
|
|
|
36
39
|
}
|
|
37
40
|
export async function listPublicApprovals(input, filter) {
|
|
38
41
|
const approvals = await input.persistence.listApprovals(filter);
|
|
39
|
-
return approvals.map((approval) =>
|
|
42
|
+
return approvals.map((approval) => toInspectableApprovalRecord(approval));
|
|
40
43
|
}
|
|
41
44
|
export async function getPublicApproval(input, approvalId) {
|
|
42
45
|
const approval = await input.persistence.getApproval(approvalId);
|
|
43
|
-
return approval ?
|
|
46
|
+
return approval ? toInspectableApprovalRecord(approval) : null;
|
|
44
47
|
}
|
|
45
48
|
export async function deleteThreadRecord(input, threadId) {
|
|
46
49
|
const thread = await input.getThread(threadId);
|
package/dist/runtime/harness.js
CHANGED
|
@@ -29,7 +29,7 @@ import { resolveRuntimeAdapterOptions } from "./support/runtime-adapter-options.
|
|
|
29
29
|
import { initializeHarnessRuntime, reclaimExpiredClaimedRuns as reclaimHarnessExpiredClaimedRuns, recoverStartupRuns as recoverHarnessStartupRuns, isStaleRunningRun as isHarnessStaleRunningRun, } from "./harness/run/startup-runtime.js";
|
|
30
30
|
import { streamHarnessRun } from "./harness/run/stream-run.js";
|
|
31
31
|
import { defaultRequestedAgentId, prepareRunStart } from "./harness/run/start-run.js";
|
|
32
|
-
import { deleteThreadRecord, getPublicApproval,
|
|
32
|
+
import { buildThreadInspectionRecord, deleteThreadRecord, getPublicApproval, listPublicApprovals, } from "./harness/run/thread-records.js";
|
|
33
33
|
export class AgentHarnessRuntime {
|
|
34
34
|
workspace;
|
|
35
35
|
runtimeAdapterOptions;
|
|
@@ -199,7 +199,7 @@ export class AgentHarnessRuntime {
|
|
|
199
199
|
return this.persistence.getSession(threadId);
|
|
200
200
|
}
|
|
201
201
|
async getThread(threadId) {
|
|
202
|
-
return
|
|
202
|
+
return buildThreadInspectionRecord({
|
|
203
203
|
persistence: this.persistence,
|
|
204
204
|
getSession: (currentThreadId) => this.getSession(currentThreadId),
|
|
205
205
|
}, threadId);
|