@botbotgo/agent-harness 0.0.84 → 0.0.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/upstream-runtime-ab-benchmark.d.ts +51 -0
- package/dist/benchmark/upstream-runtime-ab-benchmark.js +123 -0
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/runtime/agent-runtime-adapter.d.ts +0 -1
- package/dist/runtime/agent-runtime-adapter.js +5 -78
- package/dist/runtime/harness.d.ts +6 -0
- package/dist/runtime/harness.js +126 -55
- package/dist/runtime/thread-memory-sync.js +0 -1
- package/package.json +3 -2
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "raw-langchain-v1", "raw-deepagent"];
|
|
2
|
+
export declare const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD: "tool";
|
|
3
|
+
export type UpstreamBenchmarkPath = (typeof DEFAULT_UPSTREAM_BENCHMARK_PATHS)[number];
|
|
4
|
+
export type UpstreamBenchmarkWorkload = "tool" | "no-tool";
|
|
5
|
+
export type UpstreamBenchmarkRunSummary = {
|
|
6
|
+
providerLabel: string;
|
|
7
|
+
model: string;
|
|
8
|
+
path: UpstreamBenchmarkPath;
|
|
9
|
+
runNumber: number;
|
|
10
|
+
status: "completed" | "failed";
|
|
11
|
+
totalMs: number;
|
|
12
|
+
firstTokenMs: number | null;
|
|
13
|
+
outputLength: number;
|
|
14
|
+
normalizedOutputLength: number;
|
|
15
|
+
toolCallCount: number;
|
|
16
|
+
exactOutputMatch: boolean;
|
|
17
|
+
errorMessage?: string;
|
|
18
|
+
};
|
|
19
|
+
export type UpstreamBenchmarkAggregateSummary = {
|
|
20
|
+
providerLabel: string;
|
|
21
|
+
model: string;
|
|
22
|
+
path: UpstreamBenchmarkPath;
|
|
23
|
+
repetitions: number;
|
|
24
|
+
successCount: number;
|
|
25
|
+
failureCount: number;
|
|
26
|
+
avgTotalMs: number | null;
|
|
27
|
+
trimmedAvgTotalMs: number | null;
|
|
28
|
+
medianTotalMs: number | null;
|
|
29
|
+
p95TotalMs: number | null;
|
|
30
|
+
avgFirstTokenMs: number | null;
|
|
31
|
+
trimmedAvgFirstTokenMs: number | null;
|
|
32
|
+
medianFirstTokenMs: number | null;
|
|
33
|
+
p95FirstTokenMs: number | null;
|
|
34
|
+
avgOutputLength: number | null;
|
|
35
|
+
avgNormalizedOutputLength: number | null;
|
|
36
|
+
avgToolCallCount: number | null;
|
|
37
|
+
exactOutputMatchCount: number;
|
|
38
|
+
};
|
|
39
|
+
export type UpstreamBenchmarkComparison = {
|
|
40
|
+
baselinePath: UpstreamBenchmarkPath;
|
|
41
|
+
candidatePath: UpstreamBenchmarkPath;
|
|
42
|
+
avgTotalMsDelta: number | null;
|
|
43
|
+
avgTotalMsOverheadPct: number | null;
|
|
44
|
+
avgFirstTokenMsDelta: number | null;
|
|
45
|
+
avgFirstTokenMsOverheadPct: number | null;
|
|
46
|
+
};
|
|
47
|
+
export declare function resolveUpstreamBenchmarkPaths(rawValue?: string): readonly UpstreamBenchmarkPath[];
|
|
48
|
+
export declare function resolveUpstreamBenchmarkWorkload(rawValue?: string): UpstreamBenchmarkWorkload;
|
|
49
|
+
export declare function extractLastMatchingToken(output: string, prefixes: readonly string[]): string;
|
|
50
|
+
export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
|
|
51
|
+
export declare function compareUpstreamBenchmarkPaths(baseline: UpstreamBenchmarkAggregateSummary, candidate: UpstreamBenchmarkAggregateSummary): UpstreamBenchmarkComparison;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
export const DEFAULT_UPSTREAM_BENCHMARK_PATHS = Object.freeze([
|
|
2
|
+
"harness",
|
|
3
|
+
"raw-langchain-v1",
|
|
4
|
+
"raw-deepagent",
|
|
5
|
+
]);
|
|
6
|
+
export const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD = "tool";
|
|
7
|
+
function average(values) {
|
|
8
|
+
return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
|
|
9
|
+
}
|
|
10
|
+
function median(values) {
|
|
11
|
+
const sorted = [...values].sort((left, right) => left - right);
|
|
12
|
+
const middle = Math.floor(sorted.length / 2);
|
|
13
|
+
if (sorted.length % 2 === 0) {
|
|
14
|
+
return Number((((sorted[middle - 1] ?? 0) + (sorted[middle] ?? 0)) / 2).toFixed(2));
|
|
15
|
+
}
|
|
16
|
+
return Number((sorted[middle] ?? 0).toFixed(2));
|
|
17
|
+
}
|
|
18
|
+
function percentile(values, quantile) {
|
|
19
|
+
const sorted = [...values].sort((left, right) => left - right);
|
|
20
|
+
const index = Math.max(0, Math.min(sorted.length - 1, Math.ceil(sorted.length * quantile) - 1));
|
|
21
|
+
return Number(sorted[index].toFixed(2));
|
|
22
|
+
}
|
|
23
|
+
function averageOrNull(values) {
|
|
24
|
+
return values.length > 0 ? average(values) : null;
|
|
25
|
+
}
|
|
26
|
+
function medianOrNull(values) {
|
|
27
|
+
return values.length > 0 ? median(values) : null;
|
|
28
|
+
}
|
|
29
|
+
function percentileOrNull(values, quantile) {
|
|
30
|
+
return values.length > 0 ? percentile(values, quantile) : null;
|
|
31
|
+
}
|
|
32
|
+
function trimValues(values, trimFraction) {
|
|
33
|
+
if (values.length <= 2) {
|
|
34
|
+
return [...values];
|
|
35
|
+
}
|
|
36
|
+
const sorted = [...values].sort((left, right) => left - right);
|
|
37
|
+
const trimCount = Math.min(Math.floor(sorted.length * trimFraction), Math.floor((sorted.length - 1) / 2));
|
|
38
|
+
return sorted.slice(trimCount, sorted.length - trimCount);
|
|
39
|
+
}
|
|
40
|
+
function trimmedAverageOrNull(values, trimFraction) {
|
|
41
|
+
return values.length > 0 ? average(trimValues(values, trimFraction)) : null;
|
|
42
|
+
}
|
|
43
|
+
export function resolveUpstreamBenchmarkPaths(rawValue) {
|
|
44
|
+
if (!rawValue) {
|
|
45
|
+
return [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
|
|
46
|
+
}
|
|
47
|
+
const parsed = rawValue
|
|
48
|
+
.split(",")
|
|
49
|
+
.map((value) => value.trim().toLowerCase())
|
|
50
|
+
.filter((value) => value === "harness" || value === "raw-langchain-v1" || value === "raw-deepagent");
|
|
51
|
+
return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
|
|
52
|
+
}
|
|
53
|
+
export function resolveUpstreamBenchmarkWorkload(rawValue) {
|
|
54
|
+
return rawValue?.trim().toLowerCase() === "no-tool" ? "no-tool" : DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD;
|
|
55
|
+
}
|
|
56
|
+
export function extractLastMatchingToken(output, prefixes) {
|
|
57
|
+
const normalized = output.replace(/\s+/g, " ").trim();
|
|
58
|
+
let matched = "";
|
|
59
|
+
for (const prefix of prefixes) {
|
|
60
|
+
let searchIndex = normalized.indexOf(prefix);
|
|
61
|
+
while (searchIndex >= 0) {
|
|
62
|
+
const suffix = normalized.slice(searchIndex);
|
|
63
|
+
const token = (suffix.split(/\s/)[0] ?? suffix).trim();
|
|
64
|
+
if (token.length > 0) {
|
|
65
|
+
matched = token;
|
|
66
|
+
}
|
|
67
|
+
searchIndex = normalized.indexOf(prefix, searchIndex + prefix.length);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return matched || normalized;
|
|
71
|
+
}
|
|
72
|
+
export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs) {
|
|
73
|
+
const successfulRuns = runs.filter((run) => run.status === "completed");
|
|
74
|
+
const totalValues = successfulRuns.map((run) => run.totalMs);
|
|
75
|
+
const firstTokenValues = successfulRuns
|
|
76
|
+
.map((run) => run.firstTokenMs)
|
|
77
|
+
.filter((value) => value !== null);
|
|
78
|
+
return {
|
|
79
|
+
providerLabel,
|
|
80
|
+
model,
|
|
81
|
+
path,
|
|
82
|
+
repetitions: runs.length,
|
|
83
|
+
successCount: successfulRuns.length,
|
|
84
|
+
failureCount: runs.length - successfulRuns.length,
|
|
85
|
+
avgTotalMs: averageOrNull(totalValues),
|
|
86
|
+
trimmedAvgTotalMs: trimmedAverageOrNull(totalValues, 0.1),
|
|
87
|
+
medianTotalMs: medianOrNull(totalValues),
|
|
88
|
+
p95TotalMs: percentileOrNull(totalValues, 0.95),
|
|
89
|
+
avgFirstTokenMs: averageOrNull(firstTokenValues),
|
|
90
|
+
trimmedAvgFirstTokenMs: trimmedAverageOrNull(firstTokenValues, 0.1),
|
|
91
|
+
medianFirstTokenMs: medianOrNull(firstTokenValues),
|
|
92
|
+
p95FirstTokenMs: percentileOrNull(firstTokenValues, 0.95),
|
|
93
|
+
avgOutputLength: averageOrNull(successfulRuns.map((run) => run.outputLength)),
|
|
94
|
+
avgNormalizedOutputLength: averageOrNull(successfulRuns.map((run) => run.normalizedOutputLength)),
|
|
95
|
+
avgToolCallCount: averageOrNull(successfulRuns.map((run) => run.toolCallCount)),
|
|
96
|
+
exactOutputMatchCount: successfulRuns.filter((run) => run.exactOutputMatch).length,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
function computeOverhead(candidate, baseline) {
|
|
100
|
+
if (candidate === null || baseline === null) {
|
|
101
|
+
return { delta: null, pct: null };
|
|
102
|
+
}
|
|
103
|
+
const delta = Number((candidate - baseline).toFixed(2));
|
|
104
|
+
if (baseline === 0) {
|
|
105
|
+
return { delta, pct: null };
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
delta,
|
|
109
|
+
pct: Number((((candidate - baseline) / baseline) * 100).toFixed(2)),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
export function compareUpstreamBenchmarkPaths(baseline, candidate) {
|
|
113
|
+
const total = computeOverhead(candidate.avgTotalMs, baseline.avgTotalMs);
|
|
114
|
+
const firstToken = computeOverhead(candidate.avgFirstTokenMs, baseline.avgFirstTokenMs);
|
|
115
|
+
return {
|
|
116
|
+
baselinePath: baseline.path,
|
|
117
|
+
candidatePath: candidate.path,
|
|
118
|
+
avgTotalMsDelta: total.delta,
|
|
119
|
+
avgTotalMsOverheadPct: total.pct,
|
|
120
|
+
avgFirstTokenMsDelta: firstToken.delta,
|
|
121
|
+
avgFirstTokenMsOverheadPct: firstToken.pct,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export declare const AGENT_HARNESS_VERSION = "0.0.84";
|
package/dist/package-version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const AGENT_HARNESS_VERSION = "0.0.
|
|
1
|
+
export const AGENT_HARNESS_VERSION = "0.0.84";
|
|
@@ -35,7 +35,6 @@ export declare class AgentRuntimeAdapter {
|
|
|
35
35
|
private materializeModelStream;
|
|
36
36
|
private createModelFallbackRunnable;
|
|
37
37
|
private applyStrictToolJsonInstruction;
|
|
38
|
-
private synthesizeDeepAgentAnswer;
|
|
39
38
|
private resolveModel;
|
|
40
39
|
private buildToolNameMapping;
|
|
41
40
|
private buildAgentMessages;
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
|
-
import { existsSync, statSync } from "node:fs";
|
|
3
|
-
import { cp, mkdir, rm } from "node:fs/promises";
|
|
4
2
|
import { Command, MemorySaver } from "@langchain/langgraph";
|
|
5
3
|
import { tool as createLangChainTool } from "@langchain/core/tools";
|
|
6
4
|
import { HumanMessage, ToolMessage } from "@langchain/core/messages";
|
|
@@ -85,49 +83,12 @@ export function relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) {
|
|
|
85
83
|
return relative.split(path.sep).join("/");
|
|
86
84
|
});
|
|
87
85
|
}
|
|
88
|
-
function isDeepAgentSkillDirectory(sourcePath) {
|
|
89
|
-
return existsSync(sourcePath) && statSync(sourcePath).isDirectory() && existsSync(path.join(sourcePath, "SKILL.md"));
|
|
90
|
-
}
|
|
91
|
-
function toWorkspaceRelativePath(workspaceRoot, targetPath) {
|
|
92
|
-
if (!workspaceRoot) {
|
|
93
|
-
return targetPath;
|
|
94
|
-
}
|
|
95
|
-
const relative = path.relative(workspaceRoot, targetPath);
|
|
96
|
-
if (!relative || relative.startsWith("..")) {
|
|
97
|
-
return targetPath;
|
|
98
|
-
}
|
|
99
|
-
return relative.split(path.sep).join("/");
|
|
100
|
-
}
|
|
101
86
|
export async function materializeDeepAgentSkillSourcePaths(options) {
|
|
102
|
-
const { workspaceRoot,
|
|
87
|
+
const { workspaceRoot, skillPaths } = options;
|
|
103
88
|
if (!skillPaths) {
|
|
104
89
|
return skillPaths;
|
|
105
90
|
}
|
|
106
|
-
|
|
107
|
-
if (!workspaceRoot || !runRoot) {
|
|
108
|
-
return materialized;
|
|
109
|
-
}
|
|
110
|
-
const sourceRoot = path.join(runRoot, "deepagent-skill-sources", ownerId);
|
|
111
|
-
let wroteSyntheticSource = false;
|
|
112
|
-
const resolvedSources = [];
|
|
113
|
-
for (const [index, sourcePath] of materialized.entries()) {
|
|
114
|
-
const absolutePath = path.isAbsolute(sourcePath) ? sourcePath : path.resolve(workspaceRoot, sourcePath);
|
|
115
|
-
if (!isDeepAgentSkillDirectory(absolutePath)) {
|
|
116
|
-
resolvedSources.push(sourcePath);
|
|
117
|
-
continue;
|
|
118
|
-
}
|
|
119
|
-
if (!wroteSyntheticSource) {
|
|
120
|
-
await rm(sourceRoot, { recursive: true, force: true });
|
|
121
|
-
await mkdir(sourceRoot, { recursive: true });
|
|
122
|
-
wroteSyntheticSource = true;
|
|
123
|
-
}
|
|
124
|
-
const skillDirectoryName = path.basename(absolutePath);
|
|
125
|
-
const syntheticSourcePath = path.join(sourceRoot, `${String(index + 1).padStart(3, "0")}-${skillDirectoryName}`);
|
|
126
|
-
await mkdir(syntheticSourcePath, { recursive: true });
|
|
127
|
-
await cp(absolutePath, path.join(syntheticSourcePath, skillDirectoryName), { recursive: true });
|
|
128
|
-
resolvedSources.push(toWorkspaceRelativePath(workspaceRoot, syntheticSourcePath));
|
|
129
|
-
}
|
|
130
|
-
return resolvedSources;
|
|
91
|
+
return relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
|
|
131
92
|
}
|
|
132
93
|
function buildAuthOmittingFetch(baseFetch = fetch) {
|
|
133
94
|
return async (input, init) => {
|
|
@@ -673,31 +634,6 @@ export class AgentRuntimeAdapter {
|
|
|
673
634
|
}
|
|
674
635
|
return binding;
|
|
675
636
|
}
|
|
676
|
-
async synthesizeDeepAgentAnswer(binding, input, result) {
|
|
677
|
-
const params = getBindingDeepAgentParams(binding);
|
|
678
|
-
if (!params) {
|
|
679
|
-
return "";
|
|
680
|
-
}
|
|
681
|
-
const toolContext = extractToolFallbackContext(result);
|
|
682
|
-
if (!toolContext) {
|
|
683
|
-
return "";
|
|
684
|
-
}
|
|
685
|
-
const model = (await this.resolveModel(params.model));
|
|
686
|
-
if (!model?.invoke) {
|
|
687
|
-
return "";
|
|
688
|
-
}
|
|
689
|
-
const synthesized = await this.withTimeout(() => model.invoke([
|
|
690
|
-
{
|
|
691
|
-
role: "system",
|
|
692
|
-
content: "The previous agent run completed tool work but did not produce a final user-facing answer. Write the final answer now using the tool results provided. Do not expose internal state, tools, or reasoning.",
|
|
693
|
-
},
|
|
694
|
-
{
|
|
695
|
-
role: "user",
|
|
696
|
-
content: `Original user request:\n${extractMessageText(input)}\n\nTool results:\n${toolContext}`,
|
|
697
|
-
},
|
|
698
|
-
]), this.resolveBindingTimeout(binding), "deepagent synthesis invoke", "invoke");
|
|
699
|
-
return sanitizeVisibleText(extractVisibleOutput(synthesized));
|
|
700
|
-
}
|
|
701
637
|
async resolveModel(model) {
|
|
702
638
|
const cacheKey = this.getModelCacheKey(model);
|
|
703
639
|
const cached = this.modelCache.get(cacheKey);
|
|
@@ -1166,7 +1102,7 @@ export class AgentRuntimeAdapter {
|
|
|
1166
1102
|
if (!isDeepAgentBinding(binding)) {
|
|
1167
1103
|
return [];
|
|
1168
1104
|
}
|
|
1169
|
-
return
|
|
1105
|
+
return [];
|
|
1170
1106
|
}
|
|
1171
1107
|
async resolveMiddleware(binding, interruptOn) {
|
|
1172
1108
|
const declarativeMiddleware = await resolveDeclaredMiddleware(getBindingMiddlewareConfigs(binding), {
|
|
@@ -1469,19 +1405,10 @@ export class AgentRuntimeAdapter {
|
|
|
1469
1405
|
const visibleOutput = extractedOutput && !isLikelyToolArgsObject(tryParseJson(extractedOutput)) ? extractedOutput : "";
|
|
1470
1406
|
const emptyAssistantMessageFailure = extractEmptyAssistantMessageFailure(result);
|
|
1471
1407
|
const toolFallback = extractToolFallbackContext(result);
|
|
1472
|
-
|
|
1473
|
-
try {
|
|
1474
|
-
synthesizedOutput = await this.synthesizeDeepAgentAnswer(binding, input, result);
|
|
1475
|
-
}
|
|
1476
|
-
catch (error) {
|
|
1477
|
-
if (!(error instanceof RuntimeOperationTimeoutError) || !toolFallback) {
|
|
1478
|
-
throw error;
|
|
1479
|
-
}
|
|
1480
|
-
}
|
|
1481
|
-
if (!visibleOutput && !synthesizedOutput && !toolFallback && emptyAssistantMessageFailure) {
|
|
1408
|
+
if (!visibleOutput && !toolFallback && emptyAssistantMessageFailure) {
|
|
1482
1409
|
throw new Error(emptyAssistantMessageFailure);
|
|
1483
1410
|
}
|
|
1484
|
-
const output = visibleOutput ||
|
|
1411
|
+
const output = visibleOutput || toolFallback || JSON.stringify(result, null, 2);
|
|
1485
1412
|
const finalMessageText = sanitizeVisibleText(output);
|
|
1486
1413
|
const outputContent = extractOutputContent(result);
|
|
1487
1414
|
const contentBlocks = extractContentBlocks(result);
|
|
@@ -5,6 +5,8 @@ import type { RequirementAssessmentOptions } from "./skill-requirements.js";
|
|
|
5
5
|
export declare class AgentHarnessRuntime {
|
|
6
6
|
private readonly workspace;
|
|
7
7
|
private readonly runtimeAdapterOptions;
|
|
8
|
+
private static readonly DEFAULT_HOST_AGENT_ID;
|
|
9
|
+
private static readonly BACKGROUND_EVENT_TYPES;
|
|
8
10
|
private readonly eventBus;
|
|
9
11
|
private readonly persistence;
|
|
10
12
|
private readonly policyEngine;
|
|
@@ -27,6 +29,7 @@ export declare class AgentHarnessRuntime {
|
|
|
27
29
|
private readonly healthMonitor;
|
|
28
30
|
private readonly recoveryConfig;
|
|
29
31
|
private readonly concurrencyConfig;
|
|
32
|
+
private readonly backgroundTasks;
|
|
30
33
|
private readonly workerId;
|
|
31
34
|
private activeRunSlots;
|
|
32
35
|
private pendingRunInsertionOrder;
|
|
@@ -38,6 +41,7 @@ export declare class AgentHarnessRuntime {
|
|
|
38
41
|
private listHostBindings;
|
|
39
42
|
private defaultRunRoot;
|
|
40
43
|
private heuristicRoute;
|
|
44
|
+
private getDefaultHostAgentId;
|
|
41
45
|
private buildRoutingInput;
|
|
42
46
|
private resolveSelectedAgentId;
|
|
43
47
|
private resolveStore;
|
|
@@ -82,6 +86,7 @@ export declare class AgentHarnessRuntime {
|
|
|
82
86
|
threadId?: string;
|
|
83
87
|
}): Promise<string>;
|
|
84
88
|
private emit;
|
|
89
|
+
private trackBackgroundTask;
|
|
85
90
|
private ensureThreadStarted;
|
|
86
91
|
private loadPriorHistory;
|
|
87
92
|
private loadRunInput;
|
|
@@ -99,6 +104,7 @@ export declare class AgentHarnessRuntime {
|
|
|
99
104
|
private finalizeContinuedRun;
|
|
100
105
|
private emitOutputDeltaAndCreateItem;
|
|
101
106
|
private createContentBlocksItem;
|
|
107
|
+
private createToolResultKey;
|
|
102
108
|
private emitRunCreated;
|
|
103
109
|
private setRunStateAndEmit;
|
|
104
110
|
private requestApprovalAndEmit;
|
package/dist/runtime/harness.js
CHANGED
|
@@ -22,6 +22,14 @@ import { describeWorkspaceInventory, listAgentSkills as listWorkspaceAgentSkills
|
|
|
22
22
|
export class AgentHarnessRuntime {
|
|
23
23
|
workspace;
|
|
24
24
|
runtimeAdapterOptions;
|
|
25
|
+
static DEFAULT_HOST_AGENT_ID = "orchestra";
|
|
26
|
+
static BACKGROUND_EVENT_TYPES = new Set([
|
|
27
|
+
"run.created",
|
|
28
|
+
"run.queued",
|
|
29
|
+
"run.dequeued",
|
|
30
|
+
"output.delta",
|
|
31
|
+
"reasoning.delta",
|
|
32
|
+
]);
|
|
25
33
|
eventBus = new EventBus();
|
|
26
34
|
persistence;
|
|
27
35
|
policyEngine = new PolicyEngine();
|
|
@@ -44,6 +52,7 @@ export class AgentHarnessRuntime {
|
|
|
44
52
|
healthMonitor;
|
|
45
53
|
recoveryConfig;
|
|
46
54
|
concurrencyConfig;
|
|
55
|
+
backgroundTasks = new Set();
|
|
47
56
|
workerId = `worker-${createPersistentId()}`;
|
|
48
57
|
activeRunSlots = 0;
|
|
49
58
|
pendingRunInsertionOrder = 0;
|
|
@@ -76,6 +85,13 @@ export class AgentHarnessRuntime {
|
|
|
76
85
|
const { primaryBinding, secondaryBinding } = inferRoutingBindings(this.workspace);
|
|
77
86
|
return heuristicRoute(extractMessageText(input), primaryBinding, secondaryBinding);
|
|
78
87
|
}
|
|
88
|
+
getDefaultHostAgentId() {
|
|
89
|
+
const orchestraBinding = this.workspace.bindings.get(AgentHarnessRuntime.DEFAULT_HOST_AGENT_ID);
|
|
90
|
+
if (orchestraBinding && orchestraBinding.harnessRuntime.hostFacing !== false) {
|
|
91
|
+
return orchestraBinding.agent.id;
|
|
92
|
+
}
|
|
93
|
+
return this.heuristicRoute("");
|
|
94
|
+
}
|
|
79
95
|
async buildRoutingInput(input, threadId) {
|
|
80
96
|
const inputText = extractMessageText(input);
|
|
81
97
|
if (!threadId) {
|
|
@@ -107,7 +123,7 @@ export class AgentHarnessRuntime {
|
|
|
107
123
|
return thread.agentId;
|
|
108
124
|
}
|
|
109
125
|
}
|
|
110
|
-
return this.
|
|
126
|
+
return this.getDefaultHostAgentId();
|
|
111
127
|
}
|
|
112
128
|
return requestedAgentId;
|
|
113
129
|
}
|
|
@@ -404,18 +420,29 @@ export class AgentHarnessRuntime {
|
|
|
404
420
|
}
|
|
405
421
|
async emit(threadId, runId, sequence, eventType, payload, source = "runtime") {
|
|
406
422
|
const event = createHarnessEvent(threadId, runId, sequence, eventType, payload, source);
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
423
|
+
if (AgentHarnessRuntime.BACKGROUND_EVENT_TYPES.has(event.eventType)) {
|
|
424
|
+
this.trackBackgroundTask(this.persistence.appendEvent(event).catch(() => {
|
|
425
|
+
// Fail open for telemetry-style event persistence.
|
|
426
|
+
}));
|
|
427
|
+
}
|
|
428
|
+
else {
|
|
429
|
+
await this.persistence.appendEvent(event);
|
|
411
430
|
}
|
|
431
|
+
this.eventBus.publish(event);
|
|
412
432
|
return event;
|
|
413
433
|
}
|
|
434
|
+
trackBackgroundTask(task) {
|
|
435
|
+
this.backgroundTasks.add(task);
|
|
436
|
+
void task.finally(() => {
|
|
437
|
+
this.backgroundTasks.delete(task);
|
|
438
|
+
});
|
|
439
|
+
}
|
|
414
440
|
async ensureThreadStarted(selectedAgentId, binding, input, existingThreadId) {
|
|
415
441
|
const threadId = existingThreadId ?? createPersistentId();
|
|
416
442
|
const runId = createPersistentId();
|
|
417
443
|
const createdAt = new Date().toISOString();
|
|
418
|
-
|
|
444
|
+
const isNewThread = !existingThreadId;
|
|
445
|
+
if (isNewThread) {
|
|
419
446
|
await this.persistence.createThread({
|
|
420
447
|
threadId,
|
|
421
448
|
agentId: selectedAgentId,
|
|
@@ -424,21 +451,23 @@ export class AgentHarnessRuntime {
|
|
|
424
451
|
createdAt,
|
|
425
452
|
});
|
|
426
453
|
}
|
|
427
|
-
await
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
454
|
+
await Promise.all([
|
|
455
|
+
this.persistence.appendThreadMessage(threadId, {
|
|
456
|
+
role: "user",
|
|
457
|
+
content: normalizeMessageContent(input),
|
|
458
|
+
runId,
|
|
459
|
+
createdAt,
|
|
460
|
+
}),
|
|
461
|
+
this.persistence.createRun({
|
|
462
|
+
threadId,
|
|
463
|
+
runId,
|
|
464
|
+
agentId: binding.agent.id,
|
|
465
|
+
executionMode: getBindingAdapterKind(binding),
|
|
466
|
+
adapterKind: getBindingAdapterKind(binding),
|
|
467
|
+
createdAt,
|
|
468
|
+
}),
|
|
469
|
+
]);
|
|
470
|
+
return { threadId, runId, createdAt, isNewThread };
|
|
442
471
|
}
|
|
443
472
|
async loadPriorHistory(threadId, runId) {
|
|
444
473
|
const history = await this.persistence.listThreadMessages(threadId);
|
|
@@ -500,11 +529,11 @@ export class AgentHarnessRuntime {
|
|
|
500
529
|
output: reason ? `cancelled: ${reason}` : "cancelled",
|
|
501
530
|
};
|
|
502
531
|
}
|
|
503
|
-
async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
|
|
504
|
-
const
|
|
532
|
+
async invokeWithHistory(binding, input, threadId, runId, resumePayload, priorHistory, options = {}) {
|
|
533
|
+
const history = priorHistory ?? await this.loadPriorHistory(threadId, runId);
|
|
505
534
|
const startedAt = Date.now();
|
|
506
535
|
try {
|
|
507
|
-
const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload,
|
|
536
|
+
const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, history, options);
|
|
508
537
|
this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
|
|
509
538
|
return result;
|
|
510
539
|
}
|
|
@@ -603,7 +632,7 @@ export class AgentHarnessRuntime {
|
|
|
603
632
|
});
|
|
604
633
|
}
|
|
605
634
|
try {
|
|
606
|
-
const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, {
|
|
635
|
+
const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, options.priorHistory, {
|
|
607
636
|
context: options.context,
|
|
608
637
|
state: options.state,
|
|
609
638
|
files: options.files,
|
|
@@ -683,6 +712,16 @@ export class AgentHarnessRuntime {
|
|
|
683
712
|
contentBlocks,
|
|
684
713
|
};
|
|
685
714
|
}
|
|
715
|
+
createToolResultKey(toolName, output, isError) {
|
|
716
|
+
let serializedOutput = "";
|
|
717
|
+
try {
|
|
718
|
+
serializedOutput = JSON.stringify(output);
|
|
719
|
+
}
|
|
720
|
+
catch {
|
|
721
|
+
serializedOutput = String(output);
|
|
722
|
+
}
|
|
723
|
+
return JSON.stringify([toolName, serializedOutput, isError === true]);
|
|
724
|
+
}
|
|
686
725
|
async emitRunCreated(threadId, runId, payload) {
|
|
687
726
|
return this.emit(threadId, runId, 1, "run.created", payload);
|
|
688
727
|
}
|
|
@@ -757,22 +796,29 @@ export class AgentHarnessRuntime {
|
|
|
757
796
|
await listener(value);
|
|
758
797
|
}
|
|
759
798
|
async acquireRunSlot(threadId, runId, activeState = "running", priority = 0) {
|
|
760
|
-
if (threadId && runId) {
|
|
761
|
-
await this.persistence.enqueueRun({ threadId, runId, priority });
|
|
762
|
-
}
|
|
763
799
|
let stopHeartbeat = () => undefined;
|
|
764
|
-
const beginLease = async () => {
|
|
800
|
+
const beginLease = async (mode) => {
|
|
765
801
|
if (!threadId || !runId) {
|
|
766
802
|
return;
|
|
767
803
|
}
|
|
768
804
|
const claimedAt = new Date().toISOString();
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
805
|
+
if (mode === "queue-claim") {
|
|
806
|
+
await this.persistence.claimQueuedRun({
|
|
807
|
+
threadId,
|
|
808
|
+
runId,
|
|
809
|
+
workerId: this.workerId,
|
|
810
|
+
claimedAt,
|
|
811
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
812
|
+
});
|
|
813
|
+
}
|
|
814
|
+
else {
|
|
815
|
+
await this.persistence.renewRunLease({
|
|
816
|
+
runId,
|
|
817
|
+
workerId: this.workerId,
|
|
818
|
+
heartbeatAt: claimedAt,
|
|
819
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
820
|
+
});
|
|
821
|
+
}
|
|
776
822
|
if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
|
|
777
823
|
return;
|
|
778
824
|
}
|
|
@@ -797,14 +843,19 @@ export class AgentHarnessRuntime {
|
|
|
797
843
|
};
|
|
798
844
|
const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
|
|
799
845
|
if (!maxConcurrentRuns) {
|
|
800
|
-
await beginLease();
|
|
846
|
+
await beginLease("direct-heartbeat");
|
|
801
847
|
return async () => {
|
|
802
848
|
await releaseLease();
|
|
803
849
|
};
|
|
804
850
|
}
|
|
805
|
-
|
|
851
|
+
const canActivateImmediately = this.activeRunSlots < maxConcurrentRuns;
|
|
852
|
+
const useDirectHeartbeatFastPath = canActivateImmediately && maxConcurrentRuns > 1;
|
|
853
|
+
if (canActivateImmediately) {
|
|
806
854
|
this.activeRunSlots += 1;
|
|
807
|
-
|
|
855
|
+
if (threadId && runId && !useDirectHeartbeatFastPath) {
|
|
856
|
+
await this.persistence.enqueueRun({ threadId, runId, priority });
|
|
857
|
+
}
|
|
858
|
+
await beginLease(useDirectHeartbeatFastPath ? "direct-heartbeat" : "queue-claim");
|
|
808
859
|
let released = false;
|
|
809
860
|
return async () => {
|
|
810
861
|
if (released) {
|
|
@@ -833,11 +884,12 @@ export class AgentHarnessRuntime {
|
|
|
833
884
|
await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
|
|
834
885
|
previousState: "queued",
|
|
835
886
|
});
|
|
836
|
-
await beginLease();
|
|
887
|
+
await beginLease("queue-claim");
|
|
837
888
|
}
|
|
838
889
|
return "activate";
|
|
839
890
|
};
|
|
840
891
|
if (threadId && runId) {
|
|
892
|
+
await this.persistence.enqueueRun({ threadId, runId, priority });
|
|
841
893
|
const slotAcquisition = new Promise((resolve, reject) => {
|
|
842
894
|
const displacedEntries = this.enqueuePendingRunSlot({
|
|
843
895
|
threadId,
|
|
@@ -994,16 +1046,19 @@ export class AgentHarnessRuntime {
|
|
|
994
1046
|
if (!policyDecision.allowed) {
|
|
995
1047
|
throw new Error(`Policy evaluation blocked agent ${selectedAgentId}: ${policyDecision.reasons.join(", ")}`);
|
|
996
1048
|
}
|
|
997
|
-
const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
|
|
1049
|
+
const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
|
|
998
1050
|
const priority = this.normalizeRunPriority(options.priority);
|
|
999
|
-
|
|
1000
|
-
|
|
1051
|
+
const runRequestPromise = this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
|
|
1052
|
+
const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
|
|
1001
1053
|
agentId: binding.agent.id,
|
|
1002
1054
|
requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
|
|
1003
1055
|
selectedAgentId,
|
|
1004
1056
|
executionMode: getBindingAdapterKind(binding),
|
|
1005
1057
|
});
|
|
1006
|
-
const
|
|
1058
|
+
const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
|
|
1059
|
+
await runRequestPromise;
|
|
1060
|
+
await runCreatedEventPromise;
|
|
1061
|
+
const releaseRunSlot = await releaseRunSlotPromise;
|
|
1007
1062
|
try {
|
|
1008
1063
|
return await this.executeQueuedRun(binding, options.input, threadId, runId, selectedAgentId, {
|
|
1009
1064
|
context: invocation.context,
|
|
@@ -1012,6 +1067,7 @@ export class AgentHarnessRuntime {
|
|
|
1012
1067
|
previousState: "running",
|
|
1013
1068
|
stateSequence: 6,
|
|
1014
1069
|
approvalSequence: 7,
|
|
1070
|
+
priorHistory: isNewThread ? [] : undefined,
|
|
1015
1071
|
});
|
|
1016
1072
|
}
|
|
1017
1073
|
finally {
|
|
@@ -1037,22 +1093,31 @@ export class AgentHarnessRuntime {
|
|
|
1037
1093
|
}
|
|
1038
1094
|
let emitted = false;
|
|
1039
1095
|
let streamActivityObserved = false;
|
|
1040
|
-
const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
|
|
1096
|
+
const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
|
|
1041
1097
|
const priority = this.normalizeRunPriority(options.priority);
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1098
|
+
const priorHistoryPromise = Promise.resolve(isNewThread ? [] : undefined).then((historyHint) => historyHint ?? this.loadPriorHistory(threadId, runId));
|
|
1099
|
+
const runRequestPromise = this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
|
|
1100
|
+
const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
|
|
1101
|
+
agentId: selectedAgentId,
|
|
1102
|
+
requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
|
|
1103
|
+
selectedAgentId,
|
|
1104
|
+
input: options.input,
|
|
1105
|
+
state: "running",
|
|
1106
|
+
});
|
|
1107
|
+
yield { type: "event", event: await runCreatedEventPromise };
|
|
1108
|
+
const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
|
|
1109
|
+
let releaseRunSlot = async () => undefined;
|
|
1051
1110
|
try {
|
|
1052
1111
|
try {
|
|
1053
|
-
const priorHistory = await
|
|
1112
|
+
const [priorHistory, acquiredReleaseRunSlot] = await Promise.all([
|
|
1113
|
+
priorHistoryPromise,
|
|
1114
|
+
releaseRunSlotPromise,
|
|
1115
|
+
runRequestPromise,
|
|
1116
|
+
]).then(([loadedPriorHistory, resolvedReleaseRunSlot]) => [loadedPriorHistory, resolvedReleaseRunSlot]);
|
|
1117
|
+
releaseRunSlot = acquiredReleaseRunSlot;
|
|
1054
1118
|
let assistantOutput = "";
|
|
1055
1119
|
const toolErrors = [];
|
|
1120
|
+
let lastToolResultKey = null;
|
|
1056
1121
|
for await (const chunk of this.runtimeAdapter.stream(binding, options.input, threadId, priorHistory, {
|
|
1057
1122
|
context: invocation.context,
|
|
1058
1123
|
state: invocation.state,
|
|
@@ -1120,6 +1185,11 @@ export class AgentHarnessRuntime {
|
|
|
1120
1185
|
continue;
|
|
1121
1186
|
}
|
|
1122
1187
|
if (normalizedChunk.kind === "tool-result") {
|
|
1188
|
+
const toolResultKey = this.createToolResultKey(normalizedChunk.toolName, normalizedChunk.output, normalizedChunk.isError);
|
|
1189
|
+
if (toolResultKey === lastToolResultKey) {
|
|
1190
|
+
continue;
|
|
1191
|
+
}
|
|
1192
|
+
lastToolResultKey = toolResultKey;
|
|
1123
1193
|
if (normalizedChunk.isError) {
|
|
1124
1194
|
toolErrors.push(renderToolFailure(normalizedChunk.toolName, normalizedChunk.output));
|
|
1125
1195
|
}
|
|
@@ -1425,6 +1495,7 @@ export class AgentHarnessRuntime {
|
|
|
1425
1495
|
await this.checkpointMaintenance?.stop();
|
|
1426
1496
|
await this.runtimeRecordMaintenance?.stop();
|
|
1427
1497
|
this.unregisterThreadMemorySync();
|
|
1498
|
+
await Promise.allSettled(Array.from(this.backgroundTasks));
|
|
1428
1499
|
await this.threadMemorySync.close();
|
|
1429
1500
|
}
|
|
1430
1501
|
async stop() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@botbotgo/agent-harness",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.85",
|
|
4
4
|
"description": "Workspace runtime for multi-agent applications",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"packageManager": "npm@10.9.2",
|
|
@@ -53,7 +53,8 @@
|
|
|
53
53
|
"scripts": {
|
|
54
54
|
"build": "rm -rf dist tsconfig.tsbuildinfo && tsc -p tsconfig.json && cp -R config dist/",
|
|
55
55
|
"check": "tsc -p tsconfig.json --noEmit",
|
|
56
|
-
"test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
|
|
56
|
+
"test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/upstream-runtime-ab-benchmark.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
|
|
57
|
+
"test:upstream-ab-real": "vitest run test/upstream-runtime-ab-real.test.ts",
|
|
57
58
|
"test:real-providers": "vitest run test/real-provider-harness.test.ts",
|
|
58
59
|
"release:prepare": "npm version patch --no-git-tag-version && node ./scripts/sync-example-version.mjs",
|
|
59
60
|
"release:pack": "npm pack --dry-run",
|