@botbotgo/agent-harness 0.0.79 → 0.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import type { ApprovalRecord, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, MessageContent, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, ResumeOptions, RunOptions, RunResult, RunSummary, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
1
+ import type { ApprovalRecord, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, MessageContent, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, ResumeOptions, RunOptions, RunResult, RunSummary, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
2
2
  import { type ToolMcpServerOptions } from "../mcp.js";
3
3
  import { type InventoryAgentRecord, type InventorySkillRecord } from "./inventory.js";
4
4
  import type { RequirementAssessmentOptions } from "./skill-requirements.js";
@@ -27,6 +27,7 @@ export declare class AgentHarnessRuntime {
27
27
  private readonly healthMonitor;
28
28
  private readonly recoveryConfig;
29
29
  private readonly concurrencyConfig;
30
+ private readonly workerId;
30
31
  private activeRunSlots;
31
32
  private readonly pendingRunSlots;
32
33
  private runtimeEventSequence;
@@ -84,6 +85,9 @@ export declare class AgentHarnessRuntime {
84
85
  private loadPriorHistory;
85
86
  private loadRunInput;
86
87
  private appendAssistantMessage;
88
+ private getRunCancellation;
89
+ private expirePendingApprovals;
90
+ private finalizeCancelledRun;
87
91
  private invokeWithHistory;
88
92
  private buildPersistedRunRequest;
89
93
  private executeQueuedRun;
@@ -100,6 +104,7 @@ export declare class AgentHarnessRuntime {
100
104
  private isDecisionRun;
101
105
  private notifyListener;
102
106
  private acquireRunSlot;
107
+ private dropPendingRunSlot;
103
108
  private dispatchRunListeners;
104
109
  run(options: RunOptions): Promise<RunResult>;
105
110
  streamEvents(options: RunStartOptions): AsyncGenerator<HarnessStreamItem>;
@@ -110,6 +115,9 @@ export declare class AgentHarnessRuntime {
110
115
  }>;
111
116
  close(): Promise<void>;
112
117
  stop(): Promise<void>;
118
+ cancelRun(options: CancelOptions): Promise<RunResult>;
113
119
  private recoverStartupRuns;
120
+ private reclaimExpiredClaimedRuns;
121
+ private isStaleRunningRun;
114
122
  }
115
123
  export { AgentHarnessRuntime as AgentHarness };
@@ -44,6 +44,7 @@ export class AgentHarnessRuntime {
44
44
  healthMonitor;
45
45
  recoveryConfig;
46
46
  concurrencyConfig;
47
+ workerId = `worker-${createPersistentId()}`;
47
48
  activeRunSlots = 0;
48
49
  pendingRunSlots = [];
49
50
  runtimeEventSequence = 0;
@@ -98,6 +99,13 @@ export class AgentHarnessRuntime {
98
99
  }
99
100
  async resolveSelectedAgentId(input, requestedAgentId, threadId) {
100
101
  if (!requestedAgentId || requestedAgentId === AUTO_AGENT_ID) {
102
+ if (threadId) {
103
+ const thread = await this.getSession(threadId);
104
+ const threadBinding = thread ? this.workspace.bindings.get(thread.agentId) : undefined;
105
+ if (thread?.agentId && threadBinding?.harnessRuntime.hostFacing !== false) {
106
+ return thread.agentId;
107
+ }
108
+ }
101
109
  return this.routeAgent(input, { threadId });
102
110
  }
103
111
  return requestedAgentId;
@@ -480,6 +488,46 @@ export class AgentHarnessRuntime {
480
488
  createdAt: new Date().toISOString(),
481
489
  });
482
490
  }
491
+ async getRunCancellation(runId) {
492
+ const control = await this.persistence.getRunControl(runId);
493
+ return {
494
+ requested: control?.cancelRequested === true,
495
+ ...(control?.cancelReason ? { reason: control.cancelReason } : {}),
496
+ };
497
+ }
498
+ async expirePendingApprovals(threadId, runId) {
499
+ const approvals = await this.persistence.getRunApprovals(threadId, runId);
500
+ for (const approval of approvals) {
501
+ if (approval.status !== "pending") {
502
+ continue;
503
+ }
504
+ await this.persistence.resolveApproval(threadId, runId, approval.approvalId, "expired");
505
+ await this.emit(threadId, runId, 6, "approval.resolved", {
506
+ approvalId: approval.approvalId,
507
+ pendingActionId: approval.pendingActionId,
508
+ decision: "cancel",
509
+ toolName: approval.toolName,
510
+ });
511
+ }
512
+ }
513
+ async finalizeCancelledRun(threadId, runId, previousState, reason) {
514
+ await this.expirePendingApprovals(threadId, runId);
515
+ await this.persistence.releaseRunClaim(runId);
516
+ await this.persistence.clearRunCancel(runId);
517
+ await this.persistence.clearRunRequest(threadId, runId);
518
+ await this.setRunStateAndEmit(threadId, runId, 104, "cancelled", {
519
+ previousState,
520
+ ...(reason ? { error: reason } : {}),
521
+ });
522
+ const runMeta = await this.persistence.getRunMeta(threadId, runId);
523
+ return {
524
+ threadId,
525
+ runId,
526
+ agentId: runMeta.agentId,
527
+ state: "cancelled",
528
+ output: reason ? `cancelled: ${reason}` : "cancelled",
529
+ };
530
+ }
483
531
  async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
484
532
  const priorHistory = await this.loadPriorHistory(threadId, runId);
485
533
  const startedAt = Date.now();
@@ -514,6 +562,20 @@ export class AgentHarnessRuntime {
514
562
  }
515
563
  async executeQueuedRun(binding, input, threadId, runId, agentId, options = {}) {
516
564
  const previousState = options.previousState ?? "running";
565
+ const currentRun = await this.persistence.getRun(runId);
566
+ if (currentRun?.state === "cancelled") {
567
+ return {
568
+ threadId,
569
+ runId,
570
+ agentId,
571
+ state: "cancelled",
572
+ output: "cancelled",
573
+ };
574
+ }
575
+ const cancellation = await this.getRunCancellation(runId);
576
+ if (cancellation.requested) {
577
+ return this.finalizeCancelledRun(threadId, runId, previousState, cancellation.reason);
578
+ }
517
579
  if (previousState === "queued") {
518
580
  await this.emit(threadId, runId, 101, "run.dequeued", {
519
581
  queuePosition: 0,
@@ -531,6 +593,10 @@ export class AgentHarnessRuntime {
531
593
  state: options.state,
532
594
  files: options.files,
533
595
  });
596
+ const cancelledAfterInvoke = await this.getRunCancellation(runId);
597
+ if (cancelledAfterInvoke.requested) {
598
+ return this.finalizeCancelledRun(threadId, runId, previousState === "queued" ? "running" : previousState, cancelledAfterInvoke.reason);
599
+ }
534
600
  const finalized = await this.finalizeContinuedRun(threadId, runId, input, actual, {
535
601
  previousState: previousState === "queued" ? "running" : previousState,
536
602
  stateSequence: options.stateSequence ?? 103,
@@ -676,21 +742,64 @@ export class AgentHarnessRuntime {
676
742
  await listener(value);
677
743
  }
678
744
  async acquireRunSlot(threadId, runId, activeState = "running") {
745
+ if (threadId && runId) {
746
+ await this.persistence.enqueueRun({ threadId, runId });
747
+ }
748
+ let stopHeartbeat = () => undefined;
749
+ const beginLease = async () => {
750
+ if (!threadId || !runId) {
751
+ return;
752
+ }
753
+ const claimedAt = new Date().toISOString();
754
+ await this.persistence.claimQueuedRun({
755
+ threadId,
756
+ runId,
757
+ workerId: this.workerId,
758
+ claimedAt,
759
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
760
+ });
761
+ if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
762
+ return;
763
+ }
764
+ const timer = setInterval(() => {
765
+ void this.persistence.renewRunLease({
766
+ runId,
767
+ workerId: this.workerId,
768
+ heartbeatAt: new Date().toISOString(),
769
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
770
+ });
771
+ }, this.concurrencyConfig.heartbeatIntervalMs);
772
+ timer.unref?.();
773
+ stopHeartbeat = () => {
774
+ clearInterval(timer);
775
+ };
776
+ };
777
+ const releaseLease = async () => {
778
+ stopHeartbeat();
779
+ if (runId) {
780
+ await this.persistence.releaseRunClaim(runId);
781
+ }
782
+ };
679
783
  const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
680
784
  if (!maxConcurrentRuns) {
681
- return () => undefined;
785
+ await beginLease();
786
+ return async () => {
787
+ await releaseLease();
788
+ };
682
789
  }
683
790
  if (this.activeRunSlots < maxConcurrentRuns) {
684
791
  this.activeRunSlots += 1;
792
+ await beginLease();
685
793
  let released = false;
686
- return () => {
794
+ return async () => {
687
795
  if (released) {
688
796
  return;
689
797
  }
690
798
  released = true;
799
+ await releaseLease();
691
800
  this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
692
801
  const next = this.pendingRunSlots.shift();
693
- void next?.();
802
+ void next?.activate();
694
803
  };
695
804
  }
696
805
  if (threadId && runId) {
@@ -704,38 +813,57 @@ export class AgentHarnessRuntime {
704
813
  maxConcurrentRuns,
705
814
  });
706
815
  }
707
- await new Promise((resolve, reject) => {
708
- this.pendingRunSlots.push(async () => {
709
- try {
710
- this.activeRunSlots += 1;
711
- if (threadId && runId) {
712
- await this.emit(threadId, runId, 4, "run.dequeued", {
713
- queuePosition: 0,
714
- activeRunCount: this.activeRunSlots,
715
- maxConcurrentRuns,
716
- });
717
- await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
718
- previousState: "queued",
719
- });
816
+ const slotAcquisition = await new Promise((resolve, reject) => {
817
+ this.pendingRunSlots.push({ runId, activate: async () => {
818
+ try {
819
+ const currentRun = runId ? await this.persistence.getRun(runId) : null;
820
+ if (currentRun?.state === "cancelled") {
821
+ resolve("abort");
822
+ return;
823
+ }
824
+ this.activeRunSlots += 1;
825
+ if (threadId && runId) {
826
+ await this.emit(threadId, runId, 4, "run.dequeued", {
827
+ queuePosition: 0,
828
+ activeRunCount: this.activeRunSlots,
829
+ maxConcurrentRuns,
830
+ });
831
+ await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
832
+ previousState: "queued",
833
+ });
834
+ await beginLease();
835
+ }
836
+ resolve("activate");
720
837
  }
721
- resolve();
722
- }
723
- catch (error) {
724
- reject(error);
725
- }
726
- });
838
+ catch (error) {
839
+ reject(error);
840
+ }
841
+ }, abort: () => resolve("abort") });
727
842
  });
843
+ if (slotAcquisition === "abort") {
844
+ return async () => undefined;
845
+ }
728
846
  let released = false;
729
- return () => {
847
+ return async () => {
730
848
  if (released) {
731
849
  return;
732
850
  }
733
851
  released = true;
852
+ await releaseLease();
734
853
  this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
735
854
  const next = this.pendingRunSlots.shift();
736
- void next?.();
855
+ void next?.activate();
737
856
  };
738
857
  }
858
+ dropPendingRunSlot(runId) {
859
+ const index = this.pendingRunSlots.findIndex((entry) => entry.runId === runId);
860
+ if (index < 0) {
861
+ return false;
862
+ }
863
+ const [entry] = this.pendingRunSlots.splice(index, 1);
864
+ entry?.abort();
865
+ return true;
866
+ }
739
867
  async dispatchRunListeners(stream, listeners) {
740
868
  let latestEvent;
741
869
  let latestResult;
@@ -843,7 +971,7 @@ export class AgentHarnessRuntime {
843
971
  });
844
972
  }
845
973
  finally {
846
- releaseRunSlot();
974
+ await releaseRunSlot();
847
975
  }
848
976
  }
849
977
  async *streamEvents(options) {
@@ -1104,7 +1232,7 @@ export class AgentHarnessRuntime {
1104
1232
  }
1105
1233
  finally {
1106
1234
  await this.persistence.clearRunRequest(threadId, runId);
1107
- releaseRunSlot();
1235
+ await releaseRunSlot();
1108
1236
  }
1109
1237
  }
1110
1238
  async resume(options) {
@@ -1125,6 +1253,10 @@ export class AgentHarnessRuntime {
1125
1253
  throw new Error(`Unknown agent ${thread.agentId}`);
1126
1254
  }
1127
1255
  const resumePayload = this.buildResumePayload(binding, approval, options);
1256
+ const cancellation = await this.getRunCancellation(runId);
1257
+ if (cancellation.requested) {
1258
+ return this.finalizeCancelledRun(threadId, runId, thread.status, cancellation.reason);
1259
+ }
1128
1260
  await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
1129
1261
  const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "resuming");
1130
1262
  try {
@@ -1156,6 +1288,10 @@ export class AgentHarnessRuntime {
1156
1288
  try {
1157
1289
  const actual = await this.runtimeAdapter.invoke(binding, "", threadId, runId, resumePayload, priorHistory);
1158
1290
  this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
1291
+ const cancelledAfterInvoke = await this.getRunCancellation(runId);
1292
+ if (cancelledAfterInvoke.requested) {
1293
+ return this.finalizeCancelledRun(threadId, runId, "resuming", cancelledAfterInvoke.reason);
1294
+ }
1159
1295
  await this.persistence.clearRecoveryIntent(threadId, runId);
1160
1296
  const finalized = await this.finalizeContinuedRun(threadId, runId, runInput, actual, {
1161
1297
  previousState: "resuming",
@@ -1174,7 +1310,7 @@ export class AgentHarnessRuntime {
1174
1310
  }
1175
1311
  }
1176
1312
  finally {
1177
- releaseRunSlot();
1313
+ await releaseRunSlot();
1178
1314
  }
1179
1315
  }
1180
1316
  buildResumePayload(binding, approval, options) {
@@ -1249,10 +1385,44 @@ export class AgentHarnessRuntime {
1249
1385
  async stop() {
1250
1386
  await this.close();
1251
1387
  }
1388
+ async cancelRun(options) {
1389
+ const run = await this.persistence.getRun(options.runId);
1390
+ if (!run) {
1391
+ throw new Error(`Unknown run ${options.runId}`);
1392
+ }
1393
+ if (this.isTerminalRunState(run.state)) {
1394
+ return {
1395
+ threadId: run.threadId,
1396
+ runId: run.runId,
1397
+ agentId: run.agentId,
1398
+ state: run.state,
1399
+ output: run.state,
1400
+ };
1401
+ }
1402
+ await this.persistence.requestRunCancel(run.runId, options.reason);
1403
+ if (run.state === "queued" || run.state === "waiting_for_approval" || run.state === "claimed") {
1404
+ if (run.state === "queued") {
1405
+ this.dropPendingRunSlot(run.runId);
1406
+ }
1407
+ return this.finalizeCancelledRun(run.threadId, run.runId, run.state, options.reason);
1408
+ }
1409
+ await this.setRunStateAndEmit(run.threadId, run.runId, 103, "cancelling", {
1410
+ previousState: run.state,
1411
+ ...(options.reason ? { error: options.reason } : {}),
1412
+ });
1413
+ return {
1414
+ threadId: run.threadId,
1415
+ runId: run.runId,
1416
+ agentId: run.agentId,
1417
+ state: "cancelling",
1418
+ output: options.reason ? `cancelling: ${options.reason}` : "cancelling",
1419
+ };
1420
+ }
1252
1421
  async recoverStartupRuns() {
1253
1422
  if (!this.recoveryConfig.enabled) {
1254
1423
  return;
1255
1424
  }
1425
+ await this.reclaimExpiredClaimedRuns();
1256
1426
  const threads = await this.persistence.listSessions();
1257
1427
  for (const thread of threads) {
1258
1428
  if (thread.status === "queued") {
@@ -1269,7 +1439,7 @@ export class AgentHarnessRuntime {
1269
1439
  });
1270
1440
  continue;
1271
1441
  }
1272
- const releaseRunSlot = await this.acquireRunSlot();
1442
+ const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId);
1273
1443
  try {
1274
1444
  await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
1275
1445
  context: request.invocation?.context,
@@ -1281,21 +1451,38 @@ export class AgentHarnessRuntime {
1281
1451
  });
1282
1452
  }
1283
1453
  finally {
1284
- releaseRunSlot();
1454
+ await releaseRunSlot();
1285
1455
  }
1286
1456
  continue;
1287
1457
  }
1288
1458
  if (thread.status === "running") {
1459
+ const isStale = await this.isStaleRunningRun(thread);
1460
+ if (!isStale) {
1461
+ continue;
1462
+ }
1289
1463
  const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
1290
1464
  const binding = this.workspace.bindings.get(runMeta.agentId);
1291
- if (!binding || !this.supportsRunningReplay(binding)) {
1465
+ if (!binding) {
1466
+ continue;
1467
+ }
1468
+ if (!this.supportsRunningReplay(binding)) {
1469
+ await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
1470
+ previousState: "running",
1471
+ error: "stale running run cannot be replayed safely",
1472
+ });
1473
+ await this.persistence.releaseRunClaim(thread.latestRunId);
1292
1474
  continue;
1293
1475
  }
1294
1476
  const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
1295
1477
  if (!request) {
1478
+ await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
1479
+ previousState: "running",
1480
+ error: "missing persisted run request for stale running run recovery",
1481
+ });
1482
+ await this.persistence.releaseRunClaim(thread.latestRunId);
1296
1483
  continue;
1297
1484
  }
1298
- const releaseRunSlot = await this.acquireRunSlot();
1485
+ const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId, "running");
1299
1486
  try {
1300
1487
  await this.emit(thread.threadId, thread.latestRunId, 100, "run.resumed", {
1301
1488
  resumeKind: "startup-running-recovery",
@@ -1311,7 +1498,7 @@ export class AgentHarnessRuntime {
1311
1498
  });
1312
1499
  }
1313
1500
  finally {
1314
- releaseRunSlot();
1501
+ await releaseRunSlot();
1315
1502
  }
1316
1503
  continue;
1317
1504
  }
@@ -1369,5 +1556,49 @@ export class AgentHarnessRuntime {
1369
1556
  }
1370
1557
  }
1371
1558
  }
1559
+ async reclaimExpiredClaimedRuns(nowIso = new Date().toISOString()) {
1560
+ const expiredClaims = await this.persistence.listExpiredClaimedRuns(nowIso);
1561
+ for (const claim of expiredClaims) {
1562
+ const thread = await this.persistence.getSession(claim.threadId);
1563
+ if (!thread) {
1564
+ await this.persistence.releaseRunClaim(claim.runId);
1565
+ continue;
1566
+ }
1567
+ const lifecycle = await this.persistence.getRunLifecycle(claim.threadId, claim.runId);
1568
+ if (lifecycle.state === "claimed") {
1569
+ await this.persistence.enqueueRun({
1570
+ threadId: claim.threadId,
1571
+ runId: claim.runId,
1572
+ priority: claim.priority,
1573
+ queueKey: claim.queueKey,
1574
+ availableAt: nowIso,
1575
+ });
1576
+ await this.setRunStateAndEmit(claim.threadId, claim.runId, 99, "queued", {
1577
+ previousState: "claimed",
1578
+ });
1579
+ await this.emit(claim.threadId, claim.runId, 100, "run.queued", {
1580
+ queuePosition: 0,
1581
+ activeRunCount: this.activeRunSlots,
1582
+ maxConcurrentRuns: this.concurrencyConfig.maxConcurrentRuns,
1583
+ recoveredOnStartup: true,
1584
+ reclaimReason: "expired-lease",
1585
+ });
1586
+ continue;
1587
+ }
1588
+ await this.persistence.releaseRunClaim(claim.runId);
1589
+ }
1590
+ }
1591
+ async isStaleRunningRun(thread, nowMs = Date.now()) {
1592
+ const control = await this.persistence.getRunControl(thread.latestRunId);
1593
+ const heartbeatAt = control?.heartbeatAt;
1594
+ if (!heartbeatAt) {
1595
+ return true;
1596
+ }
1597
+ const heartbeatAtMs = Date.parse(heartbeatAt);
1598
+ if (!Number.isFinite(heartbeatAtMs)) {
1599
+ return true;
1600
+ }
1601
+ return nowMs - heartbeatAtMs >= this.concurrencyConfig.heartbeatTimeoutMs;
1602
+ }
1372
1603
  }
1373
1604
  export { AgentHarnessRuntime as AgentHarness };
@@ -422,7 +422,7 @@ export class HealthMonitor {
422
422
  }
423
423
  countStuckRuns(runs, nowMs) {
424
424
  return runs.filter((run) => {
425
- if (!["running", "resuming", "queued"].includes(run.state)) {
425
+ if (!["claimed", "running", "resuming", "queued", "cancelling"].includes(run.state)) {
426
426
  return false;
427
427
  }
428
428
  const updatedAtMs = Date.parse(run.updatedAt);
@@ -91,9 +91,11 @@ export async function maintainSqliteRuntimeRecords(dbPath, config, nowMs = Date.
91
91
  "DELETE FROM artifacts WHERE thread_id = ?",
92
92
  "DELETE FROM approvals WHERE thread_id = ?",
93
93
  "DELETE FROM events WHERE thread_id = ?",
94
+ "DELETE FROM run_queue WHERE thread_id = ?",
94
95
  "DELETE FROM run_requests WHERE thread_id = ?",
95
96
  "DELETE FROM recovery_intents WHERE thread_id = ?",
96
97
  "DELETE FROM thread_messages WHERE thread_id = ?",
98
+ "DELETE FROM run_control WHERE run_id IN (SELECT run_id FROM runs WHERE thread_id = ?)",
97
99
  "DELETE FROM runs WHERE thread_id = ?",
98
100
  "DELETE FROM threads WHERE thread_id = ?",
99
101
  ].map((sql) => ({ sql, args: [threadId] })), "write");