@botbotgo/agent-harness 0.0.84 → 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "raw-langchain-v1", "raw-deepagent"];
2
+ export declare const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD: "tool";
3
+ export type UpstreamBenchmarkPath = (typeof DEFAULT_UPSTREAM_BENCHMARK_PATHS)[number];
4
+ export type UpstreamBenchmarkWorkload = "tool" | "no-tool";
5
+ export type UpstreamBenchmarkRunSummary = {
6
+ providerLabel: string;
7
+ model: string;
8
+ path: UpstreamBenchmarkPath;
9
+ runNumber: number;
10
+ status: "completed" | "failed";
11
+ totalMs: number;
12
+ firstTokenMs: number | null;
13
+ outputLength: number;
14
+ normalizedOutputLength: number;
15
+ toolCallCount: number;
16
+ exactOutputMatch: boolean;
17
+ errorMessage?: string;
18
+ };
19
+ export type UpstreamBenchmarkAggregateSummary = {
20
+ providerLabel: string;
21
+ model: string;
22
+ path: UpstreamBenchmarkPath;
23
+ repetitions: number;
24
+ successCount: number;
25
+ failureCount: number;
26
+ avgTotalMs: number | null;
27
+ trimmedAvgTotalMs: number | null;
28
+ medianTotalMs: number | null;
29
+ p95TotalMs: number | null;
30
+ avgFirstTokenMs: number | null;
31
+ trimmedAvgFirstTokenMs: number | null;
32
+ medianFirstTokenMs: number | null;
33
+ p95FirstTokenMs: number | null;
34
+ avgOutputLength: number | null;
35
+ avgNormalizedOutputLength: number | null;
36
+ avgToolCallCount: number | null;
37
+ exactOutputMatchCount: number;
38
+ };
39
+ export type UpstreamBenchmarkComparison = {
40
+ baselinePath: UpstreamBenchmarkPath;
41
+ candidatePath: UpstreamBenchmarkPath;
42
+ avgTotalMsDelta: number | null;
43
+ avgTotalMsOverheadPct: number | null;
44
+ avgFirstTokenMsDelta: number | null;
45
+ avgFirstTokenMsOverheadPct: number | null;
46
+ };
47
+ export declare function resolveUpstreamBenchmarkPaths(rawValue?: string): readonly UpstreamBenchmarkPath[];
48
+ export declare function resolveUpstreamBenchmarkWorkload(rawValue?: string): UpstreamBenchmarkWorkload;
49
+ export declare function extractLastMatchingToken(output: string, prefixes: readonly string[]): string;
50
+ export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
51
+ export declare function compareUpstreamBenchmarkPaths(baseline: UpstreamBenchmarkAggregateSummary, candidate: UpstreamBenchmarkAggregateSummary): UpstreamBenchmarkComparison;
@@ -0,0 +1,123 @@
1
+ export const DEFAULT_UPSTREAM_BENCHMARK_PATHS = Object.freeze([
2
+ "harness",
3
+ "raw-langchain-v1",
4
+ "raw-deepagent",
5
+ ]);
6
+ export const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD = "tool";
7
+ function average(values) {
8
+ return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
9
+ }
10
+ function median(values) {
11
+ const sorted = [...values].sort((left, right) => left - right);
12
+ const middle = Math.floor(sorted.length / 2);
13
+ if (sorted.length % 2 === 0) {
14
+ return Number((((sorted[middle - 1] ?? 0) + (sorted[middle] ?? 0)) / 2).toFixed(2));
15
+ }
16
+ return Number((sorted[middle] ?? 0).toFixed(2));
17
+ }
18
+ function percentile(values, quantile) {
19
+ const sorted = [...values].sort((left, right) => left - right);
20
+ const index = Math.max(0, Math.min(sorted.length - 1, Math.ceil(sorted.length * quantile) - 1));
21
+ return Number(sorted[index].toFixed(2));
22
+ }
23
+ function averageOrNull(values) {
24
+ return values.length > 0 ? average(values) : null;
25
+ }
26
+ function medianOrNull(values) {
27
+ return values.length > 0 ? median(values) : null;
28
+ }
29
+ function percentileOrNull(values, quantile) {
30
+ return values.length > 0 ? percentile(values, quantile) : null;
31
+ }
32
+ function trimValues(values, trimFraction) {
33
+ if (values.length <= 2) {
34
+ return [...values];
35
+ }
36
+ const sorted = [...values].sort((left, right) => left - right);
37
+ const trimCount = Math.min(Math.floor(sorted.length * trimFraction), Math.floor((sorted.length - 1) / 2));
38
+ return sorted.slice(trimCount, sorted.length - trimCount);
39
+ }
40
+ function trimmedAverageOrNull(values, trimFraction) {
41
+ return values.length > 0 ? average(trimValues(values, trimFraction)) : null;
42
+ }
43
+ export function resolveUpstreamBenchmarkPaths(rawValue) {
44
+ if (!rawValue) {
45
+ return [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
46
+ }
47
+ const parsed = rawValue
48
+ .split(",")
49
+ .map((value) => value.trim().toLowerCase())
50
+ .filter((value) => value === "harness" || value === "raw-langchain-v1" || value === "raw-deepagent");
51
+ return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
52
+ }
53
+ export function resolveUpstreamBenchmarkWorkload(rawValue) {
54
+ return rawValue?.trim().toLowerCase() === "no-tool" ? "no-tool" : DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD;
55
+ }
56
+ export function extractLastMatchingToken(output, prefixes) {
57
+ const normalized = output.replace(/\s+/g, " ").trim();
58
+ let matched = "";
59
+ for (const prefix of prefixes) {
60
+ let searchIndex = normalized.indexOf(prefix);
61
+ while (searchIndex >= 0) {
62
+ const suffix = normalized.slice(searchIndex);
63
+ const token = (suffix.split(/\s/)[0] ?? suffix).trim();
64
+ if (token.length > 0) {
65
+ matched = token;
66
+ }
67
+ searchIndex = normalized.indexOf(prefix, searchIndex + prefix.length);
68
+ }
69
+ }
70
+ return matched || normalized;
71
+ }
72
+ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs) {
73
+ const successfulRuns = runs.filter((run) => run.status === "completed");
74
+ const totalValues = successfulRuns.map((run) => run.totalMs);
75
+ const firstTokenValues = successfulRuns
76
+ .map((run) => run.firstTokenMs)
77
+ .filter((value) => value !== null);
78
+ return {
79
+ providerLabel,
80
+ model,
81
+ path,
82
+ repetitions: runs.length,
83
+ successCount: successfulRuns.length,
84
+ failureCount: runs.length - successfulRuns.length,
85
+ avgTotalMs: averageOrNull(totalValues),
86
+ trimmedAvgTotalMs: trimmedAverageOrNull(totalValues, 0.1),
87
+ medianTotalMs: medianOrNull(totalValues),
88
+ p95TotalMs: percentileOrNull(totalValues, 0.95),
89
+ avgFirstTokenMs: averageOrNull(firstTokenValues),
90
+ trimmedAvgFirstTokenMs: trimmedAverageOrNull(firstTokenValues, 0.1),
91
+ medianFirstTokenMs: medianOrNull(firstTokenValues),
92
+ p95FirstTokenMs: percentileOrNull(firstTokenValues, 0.95),
93
+ avgOutputLength: averageOrNull(successfulRuns.map((run) => run.outputLength)),
94
+ avgNormalizedOutputLength: averageOrNull(successfulRuns.map((run) => run.normalizedOutputLength)),
95
+ avgToolCallCount: averageOrNull(successfulRuns.map((run) => run.toolCallCount)),
96
+ exactOutputMatchCount: successfulRuns.filter((run) => run.exactOutputMatch).length,
97
+ };
98
+ }
99
+ function computeOverhead(candidate, baseline) {
100
+ if (candidate === null || baseline === null) {
101
+ return { delta: null, pct: null };
102
+ }
103
+ const delta = Number((candidate - baseline).toFixed(2));
104
+ if (baseline === 0) {
105
+ return { delta, pct: null };
106
+ }
107
+ return {
108
+ delta,
109
+ pct: Number((((candidate - baseline) / baseline) * 100).toFixed(2)),
110
+ };
111
+ }
112
+ export function compareUpstreamBenchmarkPaths(baseline, candidate) {
113
+ const total = computeOverhead(candidate.avgTotalMs, baseline.avgTotalMs);
114
+ const firstToken = computeOverhead(candidate.avgFirstTokenMs, baseline.avgFirstTokenMs);
115
+ return {
116
+ baselinePath: baseline.path,
117
+ candidatePath: candidate.path,
118
+ avgTotalMsDelta: total.delta,
119
+ avgTotalMsOverheadPct: total.pct,
120
+ avgFirstTokenMsDelta: firstToken.delta,
121
+ avgFirstTokenMsOverheadPct: firstToken.pct,
122
+ };
123
+ }
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.83";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.85";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.83";
1
+ export const AGENT_HARNESS_VERSION = "0.0.85";
@@ -15,6 +15,7 @@ export declare class SqlitePersistence implements RuntimePersistence {
15
15
  private threadDir;
16
16
  private runDir;
17
17
  private execute;
18
+ private executeTransaction;
18
19
  private selectOne;
19
20
  private selectAll;
20
21
  private mapThreadSummary;
@@ -29,6 +30,18 @@ export declare class SqlitePersistence implements RuntimePersistence {
29
30
  status: RunState;
30
31
  createdAt: string;
31
32
  }): Promise<void>;
33
+ bootstrapRun(input: {
34
+ threadId: string;
35
+ agentId: string;
36
+ runId: string;
37
+ status: RunState;
38
+ createdAt: string;
39
+ executionMode: string;
40
+ adapterKind?: string;
41
+ userMessage: TranscriptMessage;
42
+ runRequest: PersistedRunRequest;
43
+ createThread: boolean;
44
+ }): Promise<void>;
32
45
  createRun(input: {
33
46
  threadId: string;
34
47
  runId: string;
@@ -263,6 +263,31 @@ export class SqlitePersistence {
263
263
  }
264
264
  await client.execute(sql);
265
265
  }
266
+ async executeTransaction(steps) {
267
+ await this.ensureInitialized();
268
+ const client = await this.getClient();
269
+ await client.execute("BEGIN IMMEDIATE");
270
+ try {
271
+ for (const step of steps) {
272
+ if (step.args) {
273
+ await client.execute(step.sql, step.args);
274
+ }
275
+ else {
276
+ await client.execute(step.sql);
277
+ }
278
+ }
279
+ await client.execute("COMMIT");
280
+ }
281
+ catch (error) {
282
+ try {
283
+ await client.execute("ROLLBACK");
284
+ }
285
+ catch {
286
+ // Ignore rollback failures and preserve the original error.
287
+ }
288
+ throw error;
289
+ }
290
+ }
266
291
  async selectOne(sql, args) {
267
292
  await this.ensureInitialized();
268
293
  const client = await this.getClient();
@@ -347,6 +372,61 @@ export class SqlitePersistence {
347
372
  (thread_id, workspace_id, entry_agent_id, status, latest_run_id, created_at, updated_at)
348
373
  VALUES (?, ?, ?, ?, ?, ?, ?)`, [input.threadId, "default", input.agentId, input.status, input.runId, input.createdAt, input.createdAt]);
349
374
  }
375
+ async bootstrapRun(input) {
376
+ await mkdir(this.threadDir(input.threadId), { recursive: true });
377
+ await mkdir(path.join(this.runDir(input.threadId, input.runId), "events"), { recursive: true });
378
+ const steps = [];
379
+ if (input.createThread) {
380
+ steps.push({
381
+ sql: `INSERT OR REPLACE INTO threads
382
+ (thread_id, workspace_id, entry_agent_id, status, latest_run_id, created_at, updated_at)
383
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
384
+ args: [input.threadId, "default", input.agentId, input.status, input.runId, input.createdAt, input.createdAt],
385
+ });
386
+ }
387
+ steps.push({
388
+ sql: `INSERT OR REPLACE INTO runs
389
+ (run_id, thread_id, agent_id, execution_mode, adapter_kind, created_at, updated_at, state, previous_state, state_entered_at, last_transition_at, resumable, checkpoint_ref)
390
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
391
+ args: [
392
+ input.runId,
393
+ input.threadId,
394
+ input.agentId,
395
+ input.executionMode,
396
+ input.adapterKind ?? input.executionMode ?? null,
397
+ input.createdAt,
398
+ input.createdAt,
399
+ "running",
400
+ null,
401
+ input.createdAt,
402
+ input.createdAt,
403
+ 0,
404
+ null,
405
+ ],
406
+ }, {
407
+ sql: `INSERT OR REPLACE INTO run_control
408
+ (run_id, cancel_requested, cancel_reason, cancel_requested_at, heartbeat_at, worker_id, worker_started_at)
409
+ VALUES (?, 0, NULL, NULL, NULL, NULL, NULL)`,
410
+ args: [input.runId],
411
+ }, {
412
+ sql: `INSERT INTO thread_messages
413
+ (thread_id, role, content_json, run_id, created_at)
414
+ VALUES (?, ?, ?, ?, ?)`,
415
+ args: [
416
+ input.threadId,
417
+ input.userMessage.role,
418
+ JSON.stringify(input.userMessage.content),
419
+ input.userMessage.runId,
420
+ input.userMessage.createdAt,
421
+ ],
422
+ }, {
423
+ sql: `INSERT OR REPLACE INTO run_requests
424
+ (run_id, thread_id, request_json, saved_at)
425
+ VALUES (?, ?, ?, ?)`,
426
+ args: [input.runId, input.threadId, JSON.stringify(input.runRequest), input.runRequest.savedAt],
427
+ });
428
+ await this.executeTransaction(steps);
429
+ }
350
430
  async createRun(input) {
351
431
  await mkdir(path.join(this.runDir(input.threadId, input.runId), "events"), { recursive: true });
352
432
  await this.execute(`INSERT OR REPLACE INTO runs
@@ -75,6 +75,18 @@ export type ApprovalFilter = {
75
75
  };
76
76
  export interface RuntimePersistence {
77
77
  initialize(): Promise<void>;
78
+ bootstrapRun?(input: {
79
+ threadId: string;
80
+ agentId: string;
81
+ runId: string;
82
+ status: RunState;
83
+ createdAt: string;
84
+ executionMode: string;
85
+ adapterKind?: string;
86
+ userMessage: TranscriptMessage;
87
+ runRequest: PersistedRunRequest;
88
+ createThread: boolean;
89
+ }): Promise<void>;
78
90
  createThread(input: {
79
91
  threadId: string;
80
92
  agentId: string;
@@ -35,7 +35,6 @@ export declare class AgentRuntimeAdapter {
35
35
  private materializeModelStream;
36
36
  private createModelFallbackRunnable;
37
37
  private applyStrictToolJsonInstruction;
38
- private synthesizeDeepAgentAnswer;
39
38
  private resolveModel;
40
39
  private buildToolNameMapping;
41
40
  private buildAgentMessages;
@@ -1,6 +1,4 @@
1
1
  import path from "node:path";
2
- import { existsSync, statSync } from "node:fs";
3
- import { cp, mkdir, rm } from "node:fs/promises";
4
2
  import { Command, MemorySaver } from "@langchain/langgraph";
5
3
  import { tool as createLangChainTool } from "@langchain/core/tools";
6
4
  import { HumanMessage, ToolMessage } from "@langchain/core/messages";
@@ -85,49 +83,12 @@ export function relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) {
85
83
  return relative.split(path.sep).join("/");
86
84
  });
87
85
  }
88
- function isDeepAgentSkillDirectory(sourcePath) {
89
- return existsSync(sourcePath) && statSync(sourcePath).isDirectory() && existsSync(path.join(sourcePath, "SKILL.md"));
90
- }
91
- function toWorkspaceRelativePath(workspaceRoot, targetPath) {
92
- if (!workspaceRoot) {
93
- return targetPath;
94
- }
95
- const relative = path.relative(workspaceRoot, targetPath);
96
- if (!relative || relative.startsWith("..")) {
97
- return targetPath;
98
- }
99
- return relative.split(path.sep).join("/");
100
- }
101
86
  export async function materializeDeepAgentSkillSourcePaths(options) {
102
- const { workspaceRoot, runRoot, ownerId, skillPaths } = options;
87
+ const { workspaceRoot, skillPaths } = options;
103
88
  if (!skillPaths) {
104
89
  return skillPaths;
105
90
  }
106
- const materialized = relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
107
- if (!workspaceRoot || !runRoot) {
108
- return materialized;
109
- }
110
- const sourceRoot = path.join(runRoot, "deepagent-skill-sources", ownerId);
111
- let wroteSyntheticSource = false;
112
- const resolvedSources = [];
113
- for (const [index, sourcePath] of materialized.entries()) {
114
- const absolutePath = path.isAbsolute(sourcePath) ? sourcePath : path.resolve(workspaceRoot, sourcePath);
115
- if (!isDeepAgentSkillDirectory(absolutePath)) {
116
- resolvedSources.push(sourcePath);
117
- continue;
118
- }
119
- if (!wroteSyntheticSource) {
120
- await rm(sourceRoot, { recursive: true, force: true });
121
- await mkdir(sourceRoot, { recursive: true });
122
- wroteSyntheticSource = true;
123
- }
124
- const skillDirectoryName = path.basename(absolutePath);
125
- const syntheticSourcePath = path.join(sourceRoot, `${String(index + 1).padStart(3, "0")}-${skillDirectoryName}`);
126
- await mkdir(syntheticSourcePath, { recursive: true });
127
- await cp(absolutePath, path.join(syntheticSourcePath, skillDirectoryName), { recursive: true });
128
- resolvedSources.push(toWorkspaceRelativePath(workspaceRoot, syntheticSourcePath));
129
- }
130
- return resolvedSources;
91
+ return relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
131
92
  }
132
93
  function buildAuthOmittingFetch(baseFetch = fetch) {
133
94
  return async (input, init) => {
@@ -673,31 +634,6 @@ export class AgentRuntimeAdapter {
673
634
  }
674
635
  return binding;
675
636
  }
676
- async synthesizeDeepAgentAnswer(binding, input, result) {
677
- const params = getBindingDeepAgentParams(binding);
678
- if (!params) {
679
- return "";
680
- }
681
- const toolContext = extractToolFallbackContext(result);
682
- if (!toolContext) {
683
- return "";
684
- }
685
- const model = (await this.resolveModel(params.model));
686
- if (!model?.invoke) {
687
- return "";
688
- }
689
- const synthesized = await this.withTimeout(() => model.invoke([
690
- {
691
- role: "system",
692
- content: "The previous agent run completed tool work but did not produce a final user-facing answer. Write the final answer now using the tool results provided. Do not expose internal state, tools, or reasoning.",
693
- },
694
- {
695
- role: "user",
696
- content: `Original user request:\n${extractMessageText(input)}\n\nTool results:\n${toolContext}`,
697
- },
698
- ]), this.resolveBindingTimeout(binding), "deepagent synthesis invoke", "invoke");
699
- return sanitizeVisibleText(extractVisibleOutput(synthesized));
700
- }
701
637
  async resolveModel(model) {
702
638
  const cacheKey = this.getModelCacheKey(model);
703
639
  const cached = this.modelCache.get(cacheKey);
@@ -1166,7 +1102,7 @@ export class AgentRuntimeAdapter {
1166
1102
  if (!isDeepAgentBinding(binding)) {
1167
1103
  return [];
1168
1104
  }
1169
- return this.resolveAutomaticSummarizationMiddleware(binding);
1105
+ return [];
1170
1106
  }
1171
1107
  async resolveMiddleware(binding, interruptOn) {
1172
1108
  const declarativeMiddleware = await resolveDeclaredMiddleware(getBindingMiddlewareConfigs(binding), {
@@ -1469,19 +1405,10 @@ export class AgentRuntimeAdapter {
1469
1405
  const visibleOutput = extractedOutput && !isLikelyToolArgsObject(tryParseJson(extractedOutput)) ? extractedOutput : "";
1470
1406
  const emptyAssistantMessageFailure = extractEmptyAssistantMessageFailure(result);
1471
1407
  const toolFallback = extractToolFallbackContext(result);
1472
- let synthesizedOutput = "";
1473
- try {
1474
- synthesizedOutput = await this.synthesizeDeepAgentAnswer(binding, input, result);
1475
- }
1476
- catch (error) {
1477
- if (!(error instanceof RuntimeOperationTimeoutError) || !toolFallback) {
1478
- throw error;
1479
- }
1480
- }
1481
- if (!visibleOutput && !synthesizedOutput && !toolFallback && emptyAssistantMessageFailure) {
1408
+ if (!visibleOutput && !toolFallback && emptyAssistantMessageFailure) {
1482
1409
  throw new Error(emptyAssistantMessageFailure);
1483
1410
  }
1484
- const output = visibleOutput || synthesizedOutput || toolFallback || JSON.stringify(result, null, 2);
1411
+ const output = visibleOutput || toolFallback || JSON.stringify(result, null, 2);
1485
1412
  const finalMessageText = sanitizeVisibleText(output);
1486
1413
  const outputContent = extractOutputContent(result);
1487
1414
  const contentBlocks = extractContentBlocks(result);
@@ -5,6 +5,8 @@ import type { RequirementAssessmentOptions } from "./skill-requirements.js";
5
5
  export declare class AgentHarnessRuntime {
6
6
  private readonly workspace;
7
7
  private readonly runtimeAdapterOptions;
8
+ private static readonly DEFAULT_HOST_AGENT_ID;
9
+ private static readonly BACKGROUND_EVENT_TYPES;
8
10
  private readonly eventBus;
9
11
  private readonly persistence;
10
12
  private readonly policyEngine;
@@ -27,6 +29,7 @@ export declare class AgentHarnessRuntime {
27
29
  private readonly healthMonitor;
28
30
  private readonly recoveryConfig;
29
31
  private readonly concurrencyConfig;
32
+ private readonly backgroundTasks;
30
33
  private readonly workerId;
31
34
  private activeRunSlots;
32
35
  private pendingRunInsertionOrder;
@@ -38,6 +41,7 @@ export declare class AgentHarnessRuntime {
38
41
  private listHostBindings;
39
42
  private defaultRunRoot;
40
43
  private heuristicRoute;
44
+ private getDefaultHostAgentId;
41
45
  private buildRoutingInput;
42
46
  private resolveSelectedAgentId;
43
47
  private resolveStore;
@@ -82,6 +86,7 @@ export declare class AgentHarnessRuntime {
82
86
  threadId?: string;
83
87
  }): Promise<string>;
84
88
  private emit;
89
+ private trackBackgroundTask;
85
90
  private ensureThreadStarted;
86
91
  private loadPriorHistory;
87
92
  private loadRunInput;
@@ -99,6 +104,7 @@ export declare class AgentHarnessRuntime {
99
104
  private finalizeContinuedRun;
100
105
  private emitOutputDeltaAndCreateItem;
101
106
  private createContentBlocksItem;
107
+ private createToolResultKey;
102
108
  private emitRunCreated;
103
109
  private setRunStateAndEmit;
104
110
  private requestApprovalAndEmit;
@@ -22,6 +22,14 @@ import { describeWorkspaceInventory, listAgentSkills as listWorkspaceAgentSkills
22
22
  export class AgentHarnessRuntime {
23
23
  workspace;
24
24
  runtimeAdapterOptions;
25
+ static DEFAULT_HOST_AGENT_ID = "orchestra";
26
+ static BACKGROUND_EVENT_TYPES = new Set([
27
+ "run.created",
28
+ "run.queued",
29
+ "run.dequeued",
30
+ "output.delta",
31
+ "reasoning.delta",
32
+ ]);
25
33
  eventBus = new EventBus();
26
34
  persistence;
27
35
  policyEngine = new PolicyEngine();
@@ -44,6 +52,7 @@ export class AgentHarnessRuntime {
44
52
  healthMonitor;
45
53
  recoveryConfig;
46
54
  concurrencyConfig;
55
+ backgroundTasks = new Set();
47
56
  workerId = `worker-${createPersistentId()}`;
48
57
  activeRunSlots = 0;
49
58
  pendingRunInsertionOrder = 0;
@@ -76,6 +85,13 @@ export class AgentHarnessRuntime {
76
85
  const { primaryBinding, secondaryBinding } = inferRoutingBindings(this.workspace);
77
86
  return heuristicRoute(extractMessageText(input), primaryBinding, secondaryBinding);
78
87
  }
88
+ getDefaultHostAgentId() {
89
+ const orchestraBinding = this.workspace.bindings.get(AgentHarnessRuntime.DEFAULT_HOST_AGENT_ID);
90
+ if (orchestraBinding && orchestraBinding.harnessRuntime.hostFacing !== false) {
91
+ return orchestraBinding.agent.id;
92
+ }
93
+ return this.heuristicRoute("");
94
+ }
79
95
  async buildRoutingInput(input, threadId) {
80
96
  const inputText = extractMessageText(input);
81
97
  if (!threadId) {
@@ -107,7 +123,7 @@ export class AgentHarnessRuntime {
107
123
  return thread.agentId;
108
124
  }
109
125
  }
110
- return this.routeAgent(input, { threadId });
126
+ return this.getDefaultHostAgentId();
111
127
  }
112
128
  return requestedAgentId;
113
129
  }
@@ -404,41 +420,72 @@ export class AgentHarnessRuntime {
404
420
  }
405
421
  async emit(threadId, runId, sequence, eventType, payload, source = "runtime") {
406
422
  const event = createHarnessEvent(threadId, runId, sequence, eventType, payload, source);
407
- await this.persistence.appendEvent(event);
408
- this.eventBus.publish(event);
409
- if (this.threadMemorySync.shouldHandle(event)) {
410
- await this.threadMemorySync.handleEvent(event);
423
+ if (AgentHarnessRuntime.BACKGROUND_EVENT_TYPES.has(event.eventType)) {
424
+ this.trackBackgroundTask(this.persistence.appendEvent(event).catch(() => {
425
+ // Fail open for telemetry-style event persistence.
426
+ }));
427
+ }
428
+ else {
429
+ await this.persistence.appendEvent(event);
411
430
  }
431
+ this.eventBus.publish(event);
412
432
  return event;
413
433
  }
414
- async ensureThreadStarted(selectedAgentId, binding, input, existingThreadId) {
434
+ trackBackgroundTask(task) {
435
+ this.backgroundTasks.add(task);
436
+ void task.finally(() => {
437
+ this.backgroundTasks.delete(task);
438
+ });
439
+ }
440
+ async ensureThreadStarted(selectedAgentId, binding, input, runRequest, existingThreadId) {
415
441
  const threadId = existingThreadId ?? createPersistentId();
416
442
  const runId = createPersistentId();
417
443
  const createdAt = new Date().toISOString();
418
- if (!existingThreadId) {
419
- await this.persistence.createThread({
444
+ const isNewThread = !existingThreadId;
445
+ const userMessage = {
446
+ role: "user",
447
+ content: normalizeMessageContent(input),
448
+ runId,
449
+ createdAt,
450
+ };
451
+ if (typeof this.persistence.bootstrapRun === "function") {
452
+ await this.persistence.bootstrapRun({
420
453
  threadId,
421
- agentId: selectedAgentId,
454
+ agentId: binding.agent.id,
422
455
  runId,
423
456
  status: "running",
424
457
  createdAt,
458
+ executionMode: getBindingAdapterKind(binding),
459
+ adapterKind: getBindingAdapterKind(binding),
460
+ userMessage,
461
+ runRequest,
462
+ createThread: isNewThread,
425
463
  });
426
464
  }
427
- await this.persistence.appendThreadMessage(threadId, {
428
- role: "user",
429
- content: normalizeMessageContent(input),
430
- runId,
431
- createdAt,
432
- });
433
- await this.persistence.createRun({
434
- threadId,
435
- runId,
436
- agentId: binding.agent.id,
437
- executionMode: getBindingAdapterKind(binding),
438
- adapterKind: getBindingAdapterKind(binding),
439
- createdAt,
440
- });
441
- return { threadId, runId, createdAt };
465
+ else {
466
+ if (isNewThread) {
467
+ await this.persistence.createThread({
468
+ threadId,
469
+ agentId: selectedAgentId,
470
+ runId,
471
+ status: "running",
472
+ createdAt,
473
+ });
474
+ }
475
+ await Promise.all([
476
+ this.persistence.appendThreadMessage(threadId, userMessage),
477
+ this.persistence.createRun({
478
+ threadId,
479
+ runId,
480
+ agentId: binding.agent.id,
481
+ executionMode: getBindingAdapterKind(binding),
482
+ adapterKind: getBindingAdapterKind(binding),
483
+ createdAt,
484
+ }),
485
+ this.persistence.saveRunRequest(threadId, runId, runRequest),
486
+ ]);
487
+ }
488
+ return { threadId, runId, createdAt, isNewThread };
442
489
  }
443
490
  async loadPriorHistory(threadId, runId) {
444
491
  const history = await this.persistence.listThreadMessages(threadId);
@@ -500,11 +547,11 @@ export class AgentHarnessRuntime {
500
547
  output: reason ? `cancelled: ${reason}` : "cancelled",
501
548
  };
502
549
  }
503
- async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
504
- const priorHistory = await this.loadPriorHistory(threadId, runId);
550
+ async invokeWithHistory(binding, input, threadId, runId, resumePayload, priorHistory, options = {}) {
551
+ const history = priorHistory ?? await this.loadPriorHistory(threadId, runId);
505
552
  const startedAt = Date.now();
506
553
  try {
507
- const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, priorHistory, options);
554
+ const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, history, options);
508
555
  this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
509
556
  return result;
510
557
  }
@@ -603,7 +650,7 @@ export class AgentHarnessRuntime {
603
650
  });
604
651
  }
605
652
  try {
606
- const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, {
653
+ const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, options.priorHistory, {
607
654
  context: options.context,
608
655
  state: options.state,
609
656
  files: options.files,
@@ -683,6 +730,16 @@ export class AgentHarnessRuntime {
683
730
  contentBlocks,
684
731
  };
685
732
  }
733
+ createToolResultKey(toolName, output, isError) {
734
+ let serializedOutput = "";
735
+ try {
736
+ serializedOutput = JSON.stringify(output);
737
+ }
738
+ catch {
739
+ serializedOutput = String(output);
740
+ }
741
+ return JSON.stringify([toolName, serializedOutput, isError === true]);
742
+ }
686
743
  async emitRunCreated(threadId, runId, payload) {
687
744
  return this.emit(threadId, runId, 1, "run.created", payload);
688
745
  }
@@ -757,22 +814,29 @@ export class AgentHarnessRuntime {
757
814
  await listener(value);
758
815
  }
759
816
  async acquireRunSlot(threadId, runId, activeState = "running", priority = 0) {
760
- if (threadId && runId) {
761
- await this.persistence.enqueueRun({ threadId, runId, priority });
762
- }
763
817
  let stopHeartbeat = () => undefined;
764
- const beginLease = async () => {
818
+ const beginLease = async (mode) => {
765
819
  if (!threadId || !runId) {
766
820
  return;
767
821
  }
768
822
  const claimedAt = new Date().toISOString();
769
- await this.persistence.claimQueuedRun({
770
- threadId,
771
- runId,
772
- workerId: this.workerId,
773
- claimedAt,
774
- leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
775
- });
823
+ if (mode === "queue-claim") {
824
+ await this.persistence.claimQueuedRun({
825
+ threadId,
826
+ runId,
827
+ workerId: this.workerId,
828
+ claimedAt,
829
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
830
+ });
831
+ }
832
+ else {
833
+ await this.persistence.renewRunLease({
834
+ runId,
835
+ workerId: this.workerId,
836
+ heartbeatAt: claimedAt,
837
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
838
+ });
839
+ }
776
840
  if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
777
841
  return;
778
842
  }
@@ -797,14 +861,19 @@ export class AgentHarnessRuntime {
797
861
  };
798
862
  const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
799
863
  if (!maxConcurrentRuns) {
800
- await beginLease();
864
+ await beginLease("direct-heartbeat");
801
865
  return async () => {
802
866
  await releaseLease();
803
867
  };
804
868
  }
805
- if (this.activeRunSlots < maxConcurrentRuns) {
869
+ const canActivateImmediately = this.activeRunSlots < maxConcurrentRuns;
870
+ const useDirectHeartbeatFastPath = canActivateImmediately && maxConcurrentRuns > 1;
871
+ if (canActivateImmediately) {
806
872
  this.activeRunSlots += 1;
807
- await beginLease();
873
+ if (threadId && runId && !useDirectHeartbeatFastPath) {
874
+ await this.persistence.enqueueRun({ threadId, runId, priority });
875
+ }
876
+ await beginLease(useDirectHeartbeatFastPath ? "direct-heartbeat" : "queue-claim");
808
877
  let released = false;
809
878
  return async () => {
810
879
  if (released) {
@@ -833,11 +902,12 @@ export class AgentHarnessRuntime {
833
902
  await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
834
903
  previousState: "queued",
835
904
  });
836
- await beginLease();
905
+ await beginLease("queue-claim");
837
906
  }
838
907
  return "activate";
839
908
  };
840
909
  if (threadId && runId) {
910
+ await this.persistence.enqueueRun({ threadId, runId, priority });
841
911
  const slotAcquisition = new Promise((resolve, reject) => {
842
912
  const displacedEntries = this.enqueuePendingRunSlot({
843
913
  threadId,
@@ -994,16 +1064,18 @@ export class AgentHarnessRuntime {
994
1064
  if (!policyDecision.allowed) {
995
1065
  throw new Error(`Policy evaluation blocked agent ${selectedAgentId}: ${policyDecision.reasons.join(", ")}`);
996
1066
  }
997
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
998
1067
  const priority = this.normalizeRunPriority(options.priority);
999
- await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1000
- await this.emitRunCreated(threadId, runId, {
1068
+ const runRequest = this.buildPersistedRunRequest(options.input, invocation, priority);
1069
+ const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, runRequest, options.threadId);
1070
+ const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
1001
1071
  agentId: binding.agent.id,
1002
1072
  requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1003
1073
  selectedAgentId,
1004
1074
  executionMode: getBindingAdapterKind(binding),
1005
1075
  });
1006
- const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "running", priority);
1076
+ const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
1077
+ await runCreatedEventPromise;
1078
+ const releaseRunSlot = await releaseRunSlotPromise;
1007
1079
  try {
1008
1080
  return await this.executeQueuedRun(binding, options.input, threadId, runId, selectedAgentId, {
1009
1081
  context: invocation.context,
@@ -1012,6 +1084,7 @@ export class AgentHarnessRuntime {
1012
1084
  previousState: "running",
1013
1085
  stateSequence: 6,
1014
1086
  approvalSequence: 7,
1087
+ priorHistory: isNewThread ? [] : undefined,
1015
1088
  });
1016
1089
  }
1017
1090
  finally {
@@ -1037,22 +1110,30 @@ export class AgentHarnessRuntime {
1037
1110
  }
1038
1111
  let emitted = false;
1039
1112
  let streamActivityObserved = false;
1040
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
1041
1113
  const priority = this.normalizeRunPriority(options.priority);
1042
- await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1043
- yield { type: "event", event: await this.emitRunCreated(threadId, runId, {
1044
- agentId: selectedAgentId,
1045
- requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1046
- selectedAgentId,
1047
- input: options.input,
1048
- state: "running",
1049
- }) };
1050
- const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "running", priority);
1114
+ const runRequest = this.buildPersistedRunRequest(options.input, invocation, priority);
1115
+ const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, runRequest, options.threadId);
1116
+ const priorHistoryPromise = Promise.resolve(isNewThread ? [] : undefined).then((historyHint) => historyHint ?? this.loadPriorHistory(threadId, runId));
1117
+ const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
1118
+ agentId: selectedAgentId,
1119
+ requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1120
+ selectedAgentId,
1121
+ input: options.input,
1122
+ state: "running",
1123
+ });
1124
+ yield { type: "event", event: await runCreatedEventPromise };
1125
+ const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
1126
+ let releaseRunSlot = async () => undefined;
1051
1127
  try {
1052
1128
  try {
1053
- const priorHistory = await this.loadPriorHistory(threadId, runId);
1129
+ const [priorHistory, acquiredReleaseRunSlot] = await Promise.all([
1130
+ priorHistoryPromise,
1131
+ releaseRunSlotPromise,
1132
+ ]).then(([loadedPriorHistory, resolvedReleaseRunSlot]) => [loadedPriorHistory, resolvedReleaseRunSlot]);
1133
+ releaseRunSlot = acquiredReleaseRunSlot;
1054
1134
  let assistantOutput = "";
1055
1135
  const toolErrors = [];
1136
+ let lastToolResultKey = null;
1056
1137
  for await (const chunk of this.runtimeAdapter.stream(binding, options.input, threadId, priorHistory, {
1057
1138
  context: invocation.context,
1058
1139
  state: invocation.state,
@@ -1120,6 +1201,11 @@ export class AgentHarnessRuntime {
1120
1201
  continue;
1121
1202
  }
1122
1203
  if (normalizedChunk.kind === "tool-result") {
1204
+ const toolResultKey = this.createToolResultKey(normalizedChunk.toolName, normalizedChunk.output, normalizedChunk.isError);
1205
+ if (toolResultKey === lastToolResultKey) {
1206
+ continue;
1207
+ }
1208
+ lastToolResultKey = toolResultKey;
1123
1209
  if (normalizedChunk.isError) {
1124
1210
  toolErrors.push(renderToolFailure(normalizedChunk.toolName, normalizedChunk.output));
1125
1211
  }
@@ -1425,6 +1511,7 @@ export class AgentHarnessRuntime {
1425
1511
  await this.checkpointMaintenance?.stop();
1426
1512
  await this.runtimeRecordMaintenance?.stop();
1427
1513
  this.unregisterThreadMemorySync();
1514
+ await Promise.allSettled(Array.from(this.backgroundTasks));
1428
1515
  await this.threadMemorySync.close();
1429
1516
  }
1430
1517
  async stop() {
@@ -79,7 +79,6 @@ export class ThreadMemorySync {
79
79
  this.pending.delete(task);
80
80
  });
81
81
  this.pending.add(task);
82
- await task;
83
82
  }
84
83
  async syncThread(threadId) {
85
84
  const thread = await this.persistence.getSession(threadId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.84",
3
+ "version": "0.0.86",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
@@ -53,7 +53,8 @@
53
53
  "scripts": {
54
54
  "build": "rm -rf dist tsconfig.tsbuildinfo && tsc -p tsconfig.json && cp -R config dist/",
55
55
  "check": "tsc -p tsconfig.json --noEmit",
56
- "test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
56
+ "test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/upstream-runtime-ab-benchmark.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
57
+ "test:upstream-ab-real": "vitest run test/upstream-runtime-ab-real.test.ts",
57
58
  "test:real-providers": "vitest run test/real-provider-harness.test.ts",
58
59
  "release:prepare": "npm version patch --no-git-tag-version && node ./scripts/sync-example-version.mjs",
59
60
  "release:pack": "npm pack --dry-run",