@botbotgo/agent-harness 0.0.84 → 0.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ export declare const DEFAULT_UPSTREAM_BENCHMARK_PATHS: readonly ["harness", "raw-langchain-v1", "raw-deepagent"];
2
+ export declare const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD: "tool";
3
+ export type UpstreamBenchmarkPath = (typeof DEFAULT_UPSTREAM_BENCHMARK_PATHS)[number];
4
+ export type UpstreamBenchmarkWorkload = "tool" | "no-tool";
5
+ export type UpstreamBenchmarkRunSummary = {
6
+ providerLabel: string;
7
+ model: string;
8
+ path: UpstreamBenchmarkPath;
9
+ runNumber: number;
10
+ status: "completed" | "failed";
11
+ totalMs: number;
12
+ firstTokenMs: number | null;
13
+ outputLength: number;
14
+ normalizedOutputLength: number;
15
+ toolCallCount: number;
16
+ exactOutputMatch: boolean;
17
+ errorMessage?: string;
18
+ };
19
+ export type UpstreamBenchmarkAggregateSummary = {
20
+ providerLabel: string;
21
+ model: string;
22
+ path: UpstreamBenchmarkPath;
23
+ repetitions: number;
24
+ successCount: number;
25
+ failureCount: number;
26
+ avgTotalMs: number | null;
27
+ trimmedAvgTotalMs: number | null;
28
+ medianTotalMs: number | null;
29
+ p95TotalMs: number | null;
30
+ avgFirstTokenMs: number | null;
31
+ trimmedAvgFirstTokenMs: number | null;
32
+ medianFirstTokenMs: number | null;
33
+ p95FirstTokenMs: number | null;
34
+ avgOutputLength: number | null;
35
+ avgNormalizedOutputLength: number | null;
36
+ avgToolCallCount: number | null;
37
+ exactOutputMatchCount: number;
38
+ };
39
+ export type UpstreamBenchmarkComparison = {
40
+ baselinePath: UpstreamBenchmarkPath;
41
+ candidatePath: UpstreamBenchmarkPath;
42
+ avgTotalMsDelta: number | null;
43
+ avgTotalMsOverheadPct: number | null;
44
+ avgFirstTokenMsDelta: number | null;
45
+ avgFirstTokenMsOverheadPct: number | null;
46
+ };
47
+ export declare function resolveUpstreamBenchmarkPaths(rawValue?: string): readonly UpstreamBenchmarkPath[];
48
+ export declare function resolveUpstreamBenchmarkWorkload(rawValue?: string): UpstreamBenchmarkWorkload;
49
+ export declare function extractLastMatchingToken(output: string, prefixes: readonly string[]): string;
50
+ export declare function aggregateUpstreamBenchmarkRuns(providerLabel: string, model: string, path: UpstreamBenchmarkPath, runs: UpstreamBenchmarkRunSummary[]): UpstreamBenchmarkAggregateSummary;
51
+ export declare function compareUpstreamBenchmarkPaths(baseline: UpstreamBenchmarkAggregateSummary, candidate: UpstreamBenchmarkAggregateSummary): UpstreamBenchmarkComparison;
@@ -0,0 +1,123 @@
1
+ export const DEFAULT_UPSTREAM_BENCHMARK_PATHS = Object.freeze([
2
+ "harness",
3
+ "raw-langchain-v1",
4
+ "raw-deepagent",
5
+ ]);
6
+ export const DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD = "tool";
7
+ function average(values) {
8
+ return Number((values.reduce((sum, value) => sum + value, 0) / values.length).toFixed(2));
9
+ }
10
+ function median(values) {
11
+ const sorted = [...values].sort((left, right) => left - right);
12
+ const middle = Math.floor(sorted.length / 2);
13
+ if (sorted.length % 2 === 0) {
14
+ return Number((((sorted[middle - 1] ?? 0) + (sorted[middle] ?? 0)) / 2).toFixed(2));
15
+ }
16
+ return Number((sorted[middle] ?? 0).toFixed(2));
17
+ }
18
+ function percentile(values, quantile) {
19
+ const sorted = [...values].sort((left, right) => left - right);
20
+ const index = Math.max(0, Math.min(sorted.length - 1, Math.ceil(sorted.length * quantile) - 1));
21
+ return Number(sorted[index].toFixed(2));
22
+ }
23
+ function averageOrNull(values) {
24
+ return values.length > 0 ? average(values) : null;
25
+ }
26
+ function medianOrNull(values) {
27
+ return values.length > 0 ? median(values) : null;
28
+ }
29
+ function percentileOrNull(values, quantile) {
30
+ return values.length > 0 ? percentile(values, quantile) : null;
31
+ }
32
+ function trimValues(values, trimFraction) {
33
+ if (values.length <= 2) {
34
+ return [...values];
35
+ }
36
+ const sorted = [...values].sort((left, right) => left - right);
37
+ const trimCount = Math.min(Math.floor(sorted.length * trimFraction), Math.floor((sorted.length - 1) / 2));
38
+ return sorted.slice(trimCount, sorted.length - trimCount);
39
+ }
40
+ function trimmedAverageOrNull(values, trimFraction) {
41
+ return values.length > 0 ? average(trimValues(values, trimFraction)) : null;
42
+ }
43
+ export function resolveUpstreamBenchmarkPaths(rawValue) {
44
+ if (!rawValue) {
45
+ return [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
46
+ }
47
+ const parsed = rawValue
48
+ .split(",")
49
+ .map((value) => value.trim().toLowerCase())
50
+ .filter((value) => value === "harness" || value === "raw-langchain-v1" || value === "raw-deepagent");
51
+ return parsed.length > 0 ? parsed : [...DEFAULT_UPSTREAM_BENCHMARK_PATHS];
52
+ }
53
+ export function resolveUpstreamBenchmarkWorkload(rawValue) {
54
+ return rawValue?.trim().toLowerCase() === "no-tool" ? "no-tool" : DEFAULT_UPSTREAM_BENCHMARK_WORKLOAD;
55
+ }
56
+ export function extractLastMatchingToken(output, prefixes) {
57
+ const normalized = output.replace(/\s+/g, " ").trim();
58
+ let matched = "";
59
+ for (const prefix of prefixes) {
60
+ let searchIndex = normalized.indexOf(prefix);
61
+ while (searchIndex >= 0) {
62
+ const suffix = normalized.slice(searchIndex);
63
+ const token = (suffix.split(/\s/)[0] ?? suffix).trim();
64
+ if (token.length > 0) {
65
+ matched = token;
66
+ }
67
+ searchIndex = normalized.indexOf(prefix, searchIndex + prefix.length);
68
+ }
69
+ }
70
+ return matched || normalized;
71
+ }
72
+ export function aggregateUpstreamBenchmarkRuns(providerLabel, model, path, runs) {
73
+ const successfulRuns = runs.filter((run) => run.status === "completed");
74
+ const totalValues = successfulRuns.map((run) => run.totalMs);
75
+ const firstTokenValues = successfulRuns
76
+ .map((run) => run.firstTokenMs)
77
+ .filter((value) => value !== null);
78
+ return {
79
+ providerLabel,
80
+ model,
81
+ path,
82
+ repetitions: runs.length,
83
+ successCount: successfulRuns.length,
84
+ failureCount: runs.length - successfulRuns.length,
85
+ avgTotalMs: averageOrNull(totalValues),
86
+ trimmedAvgTotalMs: trimmedAverageOrNull(totalValues, 0.1),
87
+ medianTotalMs: medianOrNull(totalValues),
88
+ p95TotalMs: percentileOrNull(totalValues, 0.95),
89
+ avgFirstTokenMs: averageOrNull(firstTokenValues),
90
+ trimmedAvgFirstTokenMs: trimmedAverageOrNull(firstTokenValues, 0.1),
91
+ medianFirstTokenMs: medianOrNull(firstTokenValues),
92
+ p95FirstTokenMs: percentileOrNull(firstTokenValues, 0.95),
93
+ avgOutputLength: averageOrNull(successfulRuns.map((run) => run.outputLength)),
94
+ avgNormalizedOutputLength: averageOrNull(successfulRuns.map((run) => run.normalizedOutputLength)),
95
+ avgToolCallCount: averageOrNull(successfulRuns.map((run) => run.toolCallCount)),
96
+ exactOutputMatchCount: successfulRuns.filter((run) => run.exactOutputMatch).length,
97
+ };
98
+ }
99
+ function computeOverhead(candidate, baseline) {
100
+ if (candidate === null || baseline === null) {
101
+ return { delta: null, pct: null };
102
+ }
103
+ const delta = Number((candidate - baseline).toFixed(2));
104
+ if (baseline === 0) {
105
+ return { delta, pct: null };
106
+ }
107
+ return {
108
+ delta,
109
+ pct: Number((((candidate - baseline) / baseline) * 100).toFixed(2)),
110
+ };
111
+ }
112
+ export function compareUpstreamBenchmarkPaths(baseline, candidate) {
113
+ const total = computeOverhead(candidate.avgTotalMs, baseline.avgTotalMs);
114
+ const firstToken = computeOverhead(candidate.avgFirstTokenMs, baseline.avgFirstTokenMs);
115
+ return {
116
+ baselinePath: baseline.path,
117
+ candidatePath: candidate.path,
118
+ avgTotalMsDelta: total.delta,
119
+ avgTotalMsOverheadPct: total.pct,
120
+ avgFirstTokenMsDelta: firstToken.delta,
121
+ avgFirstTokenMsOverheadPct: firstToken.pct,
122
+ };
123
+ }
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.83";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.84";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.83";
1
+ export const AGENT_HARNESS_VERSION = "0.0.84";
@@ -35,7 +35,6 @@ export declare class AgentRuntimeAdapter {
35
35
  private materializeModelStream;
36
36
  private createModelFallbackRunnable;
37
37
  private applyStrictToolJsonInstruction;
38
- private synthesizeDeepAgentAnswer;
39
38
  private resolveModel;
40
39
  private buildToolNameMapping;
41
40
  private buildAgentMessages;
@@ -1,6 +1,4 @@
1
1
  import path from "node:path";
2
- import { existsSync, statSync } from "node:fs";
3
- import { cp, mkdir, rm } from "node:fs/promises";
4
2
  import { Command, MemorySaver } from "@langchain/langgraph";
5
3
  import { tool as createLangChainTool } from "@langchain/core/tools";
6
4
  import { HumanMessage, ToolMessage } from "@langchain/core/messages";
@@ -85,49 +83,12 @@ export function relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) {
85
83
  return relative.split(path.sep).join("/");
86
84
  });
87
85
  }
88
- function isDeepAgentSkillDirectory(sourcePath) {
89
- return existsSync(sourcePath) && statSync(sourcePath).isDirectory() && existsSync(path.join(sourcePath, "SKILL.md"));
90
- }
91
- function toWorkspaceRelativePath(workspaceRoot, targetPath) {
92
- if (!workspaceRoot) {
93
- return targetPath;
94
- }
95
- const relative = path.relative(workspaceRoot, targetPath);
96
- if (!relative || relative.startsWith("..")) {
97
- return targetPath;
98
- }
99
- return relative.split(path.sep).join("/");
100
- }
101
86
  export async function materializeDeepAgentSkillSourcePaths(options) {
102
- const { workspaceRoot, runRoot, ownerId, skillPaths } = options;
87
+ const { workspaceRoot, skillPaths } = options;
103
88
  if (!skillPaths) {
104
89
  return skillPaths;
105
90
  }
106
- const materialized = relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
107
- if (!workspaceRoot || !runRoot) {
108
- return materialized;
109
- }
110
- const sourceRoot = path.join(runRoot, "deepagent-skill-sources", ownerId);
111
- let wroteSyntheticSource = false;
112
- const resolvedSources = [];
113
- for (const [index, sourcePath] of materialized.entries()) {
114
- const absolutePath = path.isAbsolute(sourcePath) ? sourcePath : path.resolve(workspaceRoot, sourcePath);
115
- if (!isDeepAgentSkillDirectory(absolutePath)) {
116
- resolvedSources.push(sourcePath);
117
- continue;
118
- }
119
- if (!wroteSyntheticSource) {
120
- await rm(sourceRoot, { recursive: true, force: true });
121
- await mkdir(sourceRoot, { recursive: true });
122
- wroteSyntheticSource = true;
123
- }
124
- const skillDirectoryName = path.basename(absolutePath);
125
- const syntheticSourcePath = path.join(sourceRoot, `${String(index + 1).padStart(3, "0")}-${skillDirectoryName}`);
126
- await mkdir(syntheticSourcePath, { recursive: true });
127
- await cp(absolutePath, path.join(syntheticSourcePath, skillDirectoryName), { recursive: true });
128
- resolvedSources.push(toWorkspaceRelativePath(workspaceRoot, syntheticSourcePath));
129
- }
130
- return resolvedSources;
91
+ return relativizeDeepAgentSkillSourcePaths(workspaceRoot, skillPaths) ?? skillPaths;
131
92
  }
132
93
  function buildAuthOmittingFetch(baseFetch = fetch) {
133
94
  return async (input, init) => {
@@ -673,31 +634,6 @@ export class AgentRuntimeAdapter {
673
634
  }
674
635
  return binding;
675
636
  }
676
- async synthesizeDeepAgentAnswer(binding, input, result) {
677
- const params = getBindingDeepAgentParams(binding);
678
- if (!params) {
679
- return "";
680
- }
681
- const toolContext = extractToolFallbackContext(result);
682
- if (!toolContext) {
683
- return "";
684
- }
685
- const model = (await this.resolveModel(params.model));
686
- if (!model?.invoke) {
687
- return "";
688
- }
689
- const synthesized = await this.withTimeout(() => model.invoke([
690
- {
691
- role: "system",
692
- content: "The previous agent run completed tool work but did not produce a final user-facing answer. Write the final answer now using the tool results provided. Do not expose internal state, tools, or reasoning.",
693
- },
694
- {
695
- role: "user",
696
- content: `Original user request:\n${extractMessageText(input)}\n\nTool results:\n${toolContext}`,
697
- },
698
- ]), this.resolveBindingTimeout(binding), "deepagent synthesis invoke", "invoke");
699
- return sanitizeVisibleText(extractVisibleOutput(synthesized));
700
- }
701
637
  async resolveModel(model) {
702
638
  const cacheKey = this.getModelCacheKey(model);
703
639
  const cached = this.modelCache.get(cacheKey);
@@ -1166,7 +1102,7 @@ export class AgentRuntimeAdapter {
1166
1102
  if (!isDeepAgentBinding(binding)) {
1167
1103
  return [];
1168
1104
  }
1169
- return this.resolveAutomaticSummarizationMiddleware(binding);
1105
+ return [];
1170
1106
  }
1171
1107
  async resolveMiddleware(binding, interruptOn) {
1172
1108
  const declarativeMiddleware = await resolveDeclaredMiddleware(getBindingMiddlewareConfigs(binding), {
@@ -1469,19 +1405,10 @@ export class AgentRuntimeAdapter {
1469
1405
  const visibleOutput = extractedOutput && !isLikelyToolArgsObject(tryParseJson(extractedOutput)) ? extractedOutput : "";
1470
1406
  const emptyAssistantMessageFailure = extractEmptyAssistantMessageFailure(result);
1471
1407
  const toolFallback = extractToolFallbackContext(result);
1472
- let synthesizedOutput = "";
1473
- try {
1474
- synthesizedOutput = await this.synthesizeDeepAgentAnswer(binding, input, result);
1475
- }
1476
- catch (error) {
1477
- if (!(error instanceof RuntimeOperationTimeoutError) || !toolFallback) {
1478
- throw error;
1479
- }
1480
- }
1481
- if (!visibleOutput && !synthesizedOutput && !toolFallback && emptyAssistantMessageFailure) {
1408
+ if (!visibleOutput && !toolFallback && emptyAssistantMessageFailure) {
1482
1409
  throw new Error(emptyAssistantMessageFailure);
1483
1410
  }
1484
- const output = visibleOutput || synthesizedOutput || toolFallback || JSON.stringify(result, null, 2);
1411
+ const output = visibleOutput || toolFallback || JSON.stringify(result, null, 2);
1485
1412
  const finalMessageText = sanitizeVisibleText(output);
1486
1413
  const outputContent = extractOutputContent(result);
1487
1414
  const contentBlocks = extractContentBlocks(result);
@@ -5,6 +5,8 @@ import type { RequirementAssessmentOptions } from "./skill-requirements.js";
5
5
  export declare class AgentHarnessRuntime {
6
6
  private readonly workspace;
7
7
  private readonly runtimeAdapterOptions;
8
+ private static readonly DEFAULT_HOST_AGENT_ID;
9
+ private static readonly BACKGROUND_EVENT_TYPES;
8
10
  private readonly eventBus;
9
11
  private readonly persistence;
10
12
  private readonly policyEngine;
@@ -27,6 +29,7 @@ export declare class AgentHarnessRuntime {
27
29
  private readonly healthMonitor;
28
30
  private readonly recoveryConfig;
29
31
  private readonly concurrencyConfig;
32
+ private readonly backgroundTasks;
30
33
  private readonly workerId;
31
34
  private activeRunSlots;
32
35
  private pendingRunInsertionOrder;
@@ -38,6 +41,7 @@ export declare class AgentHarnessRuntime {
38
41
  private listHostBindings;
39
42
  private defaultRunRoot;
40
43
  private heuristicRoute;
44
+ private getDefaultHostAgentId;
41
45
  private buildRoutingInput;
42
46
  private resolveSelectedAgentId;
43
47
  private resolveStore;
@@ -82,6 +86,7 @@ export declare class AgentHarnessRuntime {
82
86
  threadId?: string;
83
87
  }): Promise<string>;
84
88
  private emit;
89
+ private trackBackgroundTask;
85
90
  private ensureThreadStarted;
86
91
  private loadPriorHistory;
87
92
  private loadRunInput;
@@ -99,6 +104,7 @@ export declare class AgentHarnessRuntime {
99
104
  private finalizeContinuedRun;
100
105
  private emitOutputDeltaAndCreateItem;
101
106
  private createContentBlocksItem;
107
+ private createToolResultKey;
102
108
  private emitRunCreated;
103
109
  private setRunStateAndEmit;
104
110
  private requestApprovalAndEmit;
@@ -22,6 +22,14 @@ import { describeWorkspaceInventory, listAgentSkills as listWorkspaceAgentSkills
22
22
  export class AgentHarnessRuntime {
23
23
  workspace;
24
24
  runtimeAdapterOptions;
25
+ static DEFAULT_HOST_AGENT_ID = "orchestra";
26
+ static BACKGROUND_EVENT_TYPES = new Set([
27
+ "run.created",
28
+ "run.queued",
29
+ "run.dequeued",
30
+ "output.delta",
31
+ "reasoning.delta",
32
+ ]);
25
33
  eventBus = new EventBus();
26
34
  persistence;
27
35
  policyEngine = new PolicyEngine();
@@ -44,6 +52,7 @@ export class AgentHarnessRuntime {
44
52
  healthMonitor;
45
53
  recoveryConfig;
46
54
  concurrencyConfig;
55
+ backgroundTasks = new Set();
47
56
  workerId = `worker-${createPersistentId()}`;
48
57
  activeRunSlots = 0;
49
58
  pendingRunInsertionOrder = 0;
@@ -76,6 +85,13 @@ export class AgentHarnessRuntime {
76
85
  const { primaryBinding, secondaryBinding } = inferRoutingBindings(this.workspace);
77
86
  return heuristicRoute(extractMessageText(input), primaryBinding, secondaryBinding);
78
87
  }
88
+ getDefaultHostAgentId() {
89
+ const orchestraBinding = this.workspace.bindings.get(AgentHarnessRuntime.DEFAULT_HOST_AGENT_ID);
90
+ if (orchestraBinding && orchestraBinding.harnessRuntime.hostFacing !== false) {
91
+ return orchestraBinding.agent.id;
92
+ }
93
+ return this.heuristicRoute("");
94
+ }
79
95
  async buildRoutingInput(input, threadId) {
80
96
  const inputText = extractMessageText(input);
81
97
  if (!threadId) {
@@ -107,7 +123,7 @@ export class AgentHarnessRuntime {
107
123
  return thread.agentId;
108
124
  }
109
125
  }
110
- return this.routeAgent(input, { threadId });
126
+ return this.getDefaultHostAgentId();
111
127
  }
112
128
  return requestedAgentId;
113
129
  }
@@ -404,18 +420,29 @@ export class AgentHarnessRuntime {
404
420
  }
405
421
  async emit(threadId, runId, sequence, eventType, payload, source = "runtime") {
406
422
  const event = createHarnessEvent(threadId, runId, sequence, eventType, payload, source);
407
- await this.persistence.appendEvent(event);
408
- this.eventBus.publish(event);
409
- if (this.threadMemorySync.shouldHandle(event)) {
410
- await this.threadMemorySync.handleEvent(event);
423
+ if (AgentHarnessRuntime.BACKGROUND_EVENT_TYPES.has(event.eventType)) {
424
+ this.trackBackgroundTask(this.persistence.appendEvent(event).catch(() => {
425
+ // Fail open for telemetry-style event persistence.
426
+ }));
427
+ }
428
+ else {
429
+ await this.persistence.appendEvent(event);
411
430
  }
431
+ this.eventBus.publish(event);
412
432
  return event;
413
433
  }
434
+ trackBackgroundTask(task) {
435
+ this.backgroundTasks.add(task);
436
+ void task.finally(() => {
437
+ this.backgroundTasks.delete(task);
438
+ });
439
+ }
414
440
  async ensureThreadStarted(selectedAgentId, binding, input, existingThreadId) {
415
441
  const threadId = existingThreadId ?? createPersistentId();
416
442
  const runId = createPersistentId();
417
443
  const createdAt = new Date().toISOString();
418
- if (!existingThreadId) {
444
+ const isNewThread = !existingThreadId;
445
+ if (isNewThread) {
419
446
  await this.persistence.createThread({
420
447
  threadId,
421
448
  agentId: selectedAgentId,
@@ -424,21 +451,23 @@ export class AgentHarnessRuntime {
424
451
  createdAt,
425
452
  });
426
453
  }
427
- await this.persistence.appendThreadMessage(threadId, {
428
- role: "user",
429
- content: normalizeMessageContent(input),
430
- runId,
431
- createdAt,
432
- });
433
- await this.persistence.createRun({
434
- threadId,
435
- runId,
436
- agentId: binding.agent.id,
437
- executionMode: getBindingAdapterKind(binding),
438
- adapterKind: getBindingAdapterKind(binding),
439
- createdAt,
440
- });
441
- return { threadId, runId, createdAt };
454
+ await Promise.all([
455
+ this.persistence.appendThreadMessage(threadId, {
456
+ role: "user",
457
+ content: normalizeMessageContent(input),
458
+ runId,
459
+ createdAt,
460
+ }),
461
+ this.persistence.createRun({
462
+ threadId,
463
+ runId,
464
+ agentId: binding.agent.id,
465
+ executionMode: getBindingAdapterKind(binding),
466
+ adapterKind: getBindingAdapterKind(binding),
467
+ createdAt,
468
+ }),
469
+ ]);
470
+ return { threadId, runId, createdAt, isNewThread };
442
471
  }
443
472
  async loadPriorHistory(threadId, runId) {
444
473
  const history = await this.persistence.listThreadMessages(threadId);
@@ -500,11 +529,11 @@ export class AgentHarnessRuntime {
500
529
  output: reason ? `cancelled: ${reason}` : "cancelled",
501
530
  };
502
531
  }
503
- async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
504
- const priorHistory = await this.loadPriorHistory(threadId, runId);
532
+ async invokeWithHistory(binding, input, threadId, runId, resumePayload, priorHistory, options = {}) {
533
+ const history = priorHistory ?? await this.loadPriorHistory(threadId, runId);
505
534
  const startedAt = Date.now();
506
535
  try {
507
- const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, priorHistory, options);
536
+ const result = await this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, history, options);
508
537
  this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
509
538
  return result;
510
539
  }
@@ -603,7 +632,7 @@ export class AgentHarnessRuntime {
603
632
  });
604
633
  }
605
634
  try {
606
- const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, {
635
+ const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, options.priorHistory, {
607
636
  context: options.context,
608
637
  state: options.state,
609
638
  files: options.files,
@@ -683,6 +712,16 @@ export class AgentHarnessRuntime {
683
712
  contentBlocks,
684
713
  };
685
714
  }
715
+ createToolResultKey(toolName, output, isError) {
716
+ let serializedOutput = "";
717
+ try {
718
+ serializedOutput = JSON.stringify(output);
719
+ }
720
+ catch {
721
+ serializedOutput = String(output);
722
+ }
723
+ return JSON.stringify([toolName, serializedOutput, isError === true]);
724
+ }
686
725
  async emitRunCreated(threadId, runId, payload) {
687
726
  return this.emit(threadId, runId, 1, "run.created", payload);
688
727
  }
@@ -757,22 +796,29 @@ export class AgentHarnessRuntime {
757
796
  await listener(value);
758
797
  }
759
798
  async acquireRunSlot(threadId, runId, activeState = "running", priority = 0) {
760
- if (threadId && runId) {
761
- await this.persistence.enqueueRun({ threadId, runId, priority });
762
- }
763
799
  let stopHeartbeat = () => undefined;
764
- const beginLease = async () => {
800
+ const beginLease = async (mode) => {
765
801
  if (!threadId || !runId) {
766
802
  return;
767
803
  }
768
804
  const claimedAt = new Date().toISOString();
769
- await this.persistence.claimQueuedRun({
770
- threadId,
771
- runId,
772
- workerId: this.workerId,
773
- claimedAt,
774
- leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
775
- });
805
+ if (mode === "queue-claim") {
806
+ await this.persistence.claimQueuedRun({
807
+ threadId,
808
+ runId,
809
+ workerId: this.workerId,
810
+ claimedAt,
811
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
812
+ });
813
+ }
814
+ else {
815
+ await this.persistence.renewRunLease({
816
+ runId,
817
+ workerId: this.workerId,
818
+ heartbeatAt: claimedAt,
819
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
820
+ });
821
+ }
776
822
  if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
777
823
  return;
778
824
  }
@@ -797,14 +843,19 @@ export class AgentHarnessRuntime {
797
843
  };
798
844
  const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
799
845
  if (!maxConcurrentRuns) {
800
- await beginLease();
846
+ await beginLease("direct-heartbeat");
801
847
  return async () => {
802
848
  await releaseLease();
803
849
  };
804
850
  }
805
- if (this.activeRunSlots < maxConcurrentRuns) {
851
+ const canActivateImmediately = this.activeRunSlots < maxConcurrentRuns;
852
+ const useDirectHeartbeatFastPath = canActivateImmediately && maxConcurrentRuns > 1;
853
+ if (canActivateImmediately) {
806
854
  this.activeRunSlots += 1;
807
- await beginLease();
855
+ if (threadId && runId && !useDirectHeartbeatFastPath) {
856
+ await this.persistence.enqueueRun({ threadId, runId, priority });
857
+ }
858
+ await beginLease(useDirectHeartbeatFastPath ? "direct-heartbeat" : "queue-claim");
808
859
  let released = false;
809
860
  return async () => {
810
861
  if (released) {
@@ -833,11 +884,12 @@ export class AgentHarnessRuntime {
833
884
  await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
834
885
  previousState: "queued",
835
886
  });
836
- await beginLease();
887
+ await beginLease("queue-claim");
837
888
  }
838
889
  return "activate";
839
890
  };
840
891
  if (threadId && runId) {
892
+ await this.persistence.enqueueRun({ threadId, runId, priority });
841
893
  const slotAcquisition = new Promise((resolve, reject) => {
842
894
  const displacedEntries = this.enqueuePendingRunSlot({
843
895
  threadId,
@@ -994,16 +1046,19 @@ export class AgentHarnessRuntime {
994
1046
  if (!policyDecision.allowed) {
995
1047
  throw new Error(`Policy evaluation blocked agent ${selectedAgentId}: ${policyDecision.reasons.join(", ")}`);
996
1048
  }
997
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
1049
+ const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
998
1050
  const priority = this.normalizeRunPriority(options.priority);
999
- await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1000
- await this.emitRunCreated(threadId, runId, {
1051
+ const runRequestPromise = this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1052
+ const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
1001
1053
  agentId: binding.agent.id,
1002
1054
  requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1003
1055
  selectedAgentId,
1004
1056
  executionMode: getBindingAdapterKind(binding),
1005
1057
  });
1006
- const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "running", priority);
1058
+ const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
1059
+ await runRequestPromise;
1060
+ await runCreatedEventPromise;
1061
+ const releaseRunSlot = await releaseRunSlotPromise;
1007
1062
  try {
1008
1063
  return await this.executeQueuedRun(binding, options.input, threadId, runId, selectedAgentId, {
1009
1064
  context: invocation.context,
@@ -1012,6 +1067,7 @@ export class AgentHarnessRuntime {
1012
1067
  previousState: "running",
1013
1068
  stateSequence: 6,
1014
1069
  approvalSequence: 7,
1070
+ priorHistory: isNewThread ? [] : undefined,
1015
1071
  });
1016
1072
  }
1017
1073
  finally {
@@ -1037,22 +1093,31 @@ export class AgentHarnessRuntime {
1037
1093
  }
1038
1094
  let emitted = false;
1039
1095
  let streamActivityObserved = false;
1040
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
1096
+ const { threadId, runId, isNewThread } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
1041
1097
  const priority = this.normalizeRunPriority(options.priority);
1042
- await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1043
- yield { type: "event", event: await this.emitRunCreated(threadId, runId, {
1044
- agentId: selectedAgentId,
1045
- requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1046
- selectedAgentId,
1047
- input: options.input,
1048
- state: "running",
1049
- }) };
1050
- const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "running", priority);
1098
+ const priorHistoryPromise = Promise.resolve(isNewThread ? [] : undefined).then((historyHint) => historyHint ?? this.loadPriorHistory(threadId, runId));
1099
+ const runRequestPromise = this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation, priority));
1100
+ const runCreatedEventPromise = this.emitRunCreated(threadId, runId, {
1101
+ agentId: selectedAgentId,
1102
+ requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
1103
+ selectedAgentId,
1104
+ input: options.input,
1105
+ state: "running",
1106
+ });
1107
+ yield { type: "event", event: await runCreatedEventPromise };
1108
+ const releaseRunSlotPromise = this.acquireRunSlot(threadId, runId, "running", priority);
1109
+ let releaseRunSlot = async () => undefined;
1051
1110
  try {
1052
1111
  try {
1053
- const priorHistory = await this.loadPriorHistory(threadId, runId);
1112
+ const [priorHistory, acquiredReleaseRunSlot] = await Promise.all([
1113
+ priorHistoryPromise,
1114
+ releaseRunSlotPromise,
1115
+ runRequestPromise,
1116
+ ]).then(([loadedPriorHistory, resolvedReleaseRunSlot]) => [loadedPriorHistory, resolvedReleaseRunSlot]);
1117
+ releaseRunSlot = acquiredReleaseRunSlot;
1054
1118
  let assistantOutput = "";
1055
1119
  const toolErrors = [];
1120
+ let lastToolResultKey = null;
1056
1121
  for await (const chunk of this.runtimeAdapter.stream(binding, options.input, threadId, priorHistory, {
1057
1122
  context: invocation.context,
1058
1123
  state: invocation.state,
@@ -1120,6 +1185,11 @@ export class AgentHarnessRuntime {
1120
1185
  continue;
1121
1186
  }
1122
1187
  if (normalizedChunk.kind === "tool-result") {
1188
+ const toolResultKey = this.createToolResultKey(normalizedChunk.toolName, normalizedChunk.output, normalizedChunk.isError);
1189
+ if (toolResultKey === lastToolResultKey) {
1190
+ continue;
1191
+ }
1192
+ lastToolResultKey = toolResultKey;
1123
1193
  if (normalizedChunk.isError) {
1124
1194
  toolErrors.push(renderToolFailure(normalizedChunk.toolName, normalizedChunk.output));
1125
1195
  }
@@ -1425,6 +1495,7 @@ export class AgentHarnessRuntime {
1425
1495
  await this.checkpointMaintenance?.stop();
1426
1496
  await this.runtimeRecordMaintenance?.stop();
1427
1497
  this.unregisterThreadMemorySync();
1498
+ await Promise.allSettled(Array.from(this.backgroundTasks));
1428
1499
  await this.threadMemorySync.close();
1429
1500
  }
1430
1501
  async stop() {
@@ -79,7 +79,6 @@ export class ThreadMemorySync {
79
79
  this.pending.delete(task);
80
80
  });
81
81
  this.pending.add(task);
82
- await task;
83
82
  }
84
83
  async syncThread(threadId) {
85
84
  const thread = await this.persistence.getSession(threadId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.84",
3
+ "version": "0.0.85",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
@@ -53,7 +53,8 @@
53
53
  "scripts": {
54
54
  "build": "rm -rf dist tsconfig.tsbuildinfo && tsc -p tsconfig.json && cp -R config dist/",
55
55
  "check": "tsc -p tsconfig.json --noEmit",
56
- "test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
56
+ "test": "vitest run test/hello-file.test.ts test/public-api.test.ts test/runtime-health.test.ts test/memory-runtime.test.ts test/sqlite-persistence.test.ts test/runtime-queue-lease.test.ts test/runtime-cancel.test.ts test/runtime-record-maintenance.test.ts test/resource-optional-provider.test.ts test/resource-isolation.test.ts test/stock-research-app-load-harness.test.ts test/stock-research-app-run.test.ts test/stock-research-app-config.test.ts test/release-workflow.test.ts test/release-version.test.ts test/gitignore.test.ts test/package-lock.test.ts test/readme.test.ts test/product-boundary-docs.test.ts test/long-term-memory-docs.test.ts test/local-docs-persistence-inventory.test.ts test/docs-site.test.ts test/runtime-adapter-regressions.test.ts test/runtime-capabilities.test.ts test/runtime-recovery.test.ts test/tool-extension-gaps.test.ts test/checkpoint-maintenance.test.ts test/llamaindex-dependency-compat.test.ts test/skill-standard.test.ts test/routing-config.test.ts test/workspace-compat-regressions.test.ts test/upstream-compat-regressions.test.ts test/upstream-runtime-ab-benchmark.test.ts test/yaml-format.test.ts test/config-secrets.test.ts test/init-command.test.ts test/coding-agent-guide.test.ts",
57
+ "test:upstream-ab-real": "vitest run test/upstream-runtime-ab-real.test.ts",
57
58
  "test:real-providers": "vitest run test/real-provider-harness.test.ts",
58
59
  "release:prepare": "npm version patch --no-git-tag-version && node ./scripts/sync-example-version.mjs",
59
60
  "release:pack": "npm pack --dry-run",