@botbotgo/agent-harness 0.0.80 → 0.0.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,7 @@ export class AgentHarnessRuntime {
44
44
  healthMonitor;
45
45
  recoveryConfig;
46
46
  concurrencyConfig;
47
+ workerId = `worker-${createPersistentId()}`;
47
48
  activeRunSlots = 0;
48
49
  pendingRunSlots = [];
49
50
  runtimeEventSequence = 0;
@@ -98,6 +99,13 @@ export class AgentHarnessRuntime {
98
99
  }
99
100
  async resolveSelectedAgentId(input, requestedAgentId, threadId) {
100
101
  if (!requestedAgentId || requestedAgentId === AUTO_AGENT_ID) {
102
+ if (threadId) {
103
+ const thread = await this.getSession(threadId);
104
+ const threadBinding = thread ? this.workspace.bindings.get(thread.agentId) : undefined;
105
+ if (thread?.agentId && threadBinding?.harnessRuntime.hostFacing !== false) {
106
+ return thread.agentId;
107
+ }
108
+ }
101
109
  return this.routeAgent(input, { threadId });
102
110
  }
103
111
  return requestedAgentId;
@@ -253,26 +261,10 @@ export class AgentHarnessRuntime {
253
261
  return tools.every((tool) => tool.retryable === true);
254
262
  }
255
263
  async listThreads(filter) {
256
- const threadSummaries = await this.persistence.listSessions();
257
- if (!filter?.agentId) {
258
- return threadSummaries;
259
- }
260
- return threadSummaries.filter((thread) => thread.agentId === filter.agentId);
264
+ return this.persistence.listSessions(filter);
261
265
  }
262
266
  async listRuns(filter) {
263
- const runs = await this.persistence.listRuns();
264
- return runs.filter((run) => {
265
- if (filter?.agentId && run.agentId !== filter.agentId) {
266
- return false;
267
- }
268
- if (filter?.threadId && run.threadId !== filter.threadId) {
269
- return false;
270
- }
271
- if (filter?.state && run.state !== filter.state) {
272
- return false;
273
- }
274
- return true;
275
- });
267
+ return this.persistence.listRuns(filter);
276
268
  }
277
269
  async getRun(runId) {
278
270
  return this.persistence.getRun(runId);
@@ -316,21 +308,8 @@ export class AgentHarnessRuntime {
316
308
  };
317
309
  }
318
310
  async listApprovals(filter) {
319
- const approvals = filter?.threadId && filter?.runId
320
- ? await this.persistence.getRunApprovals(filter.threadId, filter.runId)
321
- : await this.persistence.listApprovals();
322
- return approvals.filter((approval) => {
323
- if (filter?.status && approval.status !== filter.status) {
324
- return false;
325
- }
326
- if (filter?.threadId && approval.threadId !== filter.threadId) {
327
- return false;
328
- }
329
- if (filter?.runId && approval.runId !== filter.runId) {
330
- return false;
331
- }
332
- return true;
333
- }).map((approval) => this.toPublicApprovalRecord(approval));
311
+ const approvals = await this.persistence.listApprovals(filter);
312
+ return approvals.map((approval) => this.toPublicApprovalRecord(approval));
334
313
  }
335
314
  async getApproval(approvalId) {
336
315
  const approval = await this.persistence.getApproval(approvalId);
@@ -480,6 +459,46 @@ export class AgentHarnessRuntime {
480
459
  createdAt: new Date().toISOString(),
481
460
  });
482
461
  }
462
+ async getRunCancellation(runId) {
463
+ const control = await this.persistence.getRunControl(runId);
464
+ return {
465
+ requested: control?.cancelRequested === true,
466
+ ...(control?.cancelReason ? { reason: control.cancelReason } : {}),
467
+ };
468
+ }
469
+ async expirePendingApprovals(threadId, runId) {
470
+ const approvals = await this.persistence.getRunApprovals(threadId, runId);
471
+ for (const approval of approvals) {
472
+ if (approval.status !== "pending") {
473
+ continue;
474
+ }
475
+ await this.persistence.resolveApproval(threadId, runId, approval.approvalId, "expired");
476
+ await this.emit(threadId, runId, 6, "approval.resolved", {
477
+ approvalId: approval.approvalId,
478
+ pendingActionId: approval.pendingActionId,
479
+ decision: "cancel",
480
+ toolName: approval.toolName,
481
+ });
482
+ }
483
+ }
484
+ async finalizeCancelledRun(threadId, runId, previousState, reason) {
485
+ await this.expirePendingApprovals(threadId, runId);
486
+ await this.persistence.releaseRunClaim(runId);
487
+ await this.persistence.clearRunCancel(runId);
488
+ await this.persistence.clearRunRequest(threadId, runId);
489
+ await this.setRunStateAndEmit(threadId, runId, 104, "cancelled", {
490
+ previousState,
491
+ ...(reason ? { error: reason } : {}),
492
+ });
493
+ const runMeta = await this.persistence.getRunMeta(threadId, runId);
494
+ return {
495
+ threadId,
496
+ runId,
497
+ agentId: runMeta.agentId,
498
+ state: "cancelled",
499
+ output: reason ? `cancelled: ${reason}` : "cancelled",
500
+ };
501
+ }
483
502
  async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
484
503
  const priorHistory = await this.loadPriorHistory(threadId, runId);
485
504
  const startedAt = Date.now();
@@ -514,6 +533,20 @@ export class AgentHarnessRuntime {
514
533
  }
515
534
  async executeQueuedRun(binding, input, threadId, runId, agentId, options = {}) {
516
535
  const previousState = options.previousState ?? "running";
536
+ const currentRun = await this.persistence.getRun(runId);
537
+ if (currentRun?.state === "cancelled") {
538
+ return {
539
+ threadId,
540
+ runId,
541
+ agentId,
542
+ state: "cancelled",
543
+ output: "cancelled",
544
+ };
545
+ }
546
+ const cancellation = await this.getRunCancellation(runId);
547
+ if (cancellation.requested) {
548
+ return this.finalizeCancelledRun(threadId, runId, previousState, cancellation.reason);
549
+ }
517
550
  if (previousState === "queued") {
518
551
  await this.emit(threadId, runId, 101, "run.dequeued", {
519
552
  queuePosition: 0,
@@ -531,6 +564,10 @@ export class AgentHarnessRuntime {
531
564
  state: options.state,
532
565
  files: options.files,
533
566
  });
567
+ const cancelledAfterInvoke = await this.getRunCancellation(runId);
568
+ if (cancelledAfterInvoke.requested) {
569
+ return this.finalizeCancelledRun(threadId, runId, previousState === "queued" ? "running" : previousState, cancelledAfterInvoke.reason);
570
+ }
534
571
  const finalized = await this.finalizeContinuedRun(threadId, runId, input, actual, {
535
572
  previousState: previousState === "queued" ? "running" : previousState,
536
573
  stateSequence: options.stateSequence ?? 103,
@@ -676,21 +713,64 @@ export class AgentHarnessRuntime {
676
713
  await listener(value);
677
714
  }
678
715
  async acquireRunSlot(threadId, runId, activeState = "running") {
716
+ if (threadId && runId) {
717
+ await this.persistence.enqueueRun({ threadId, runId });
718
+ }
719
+ let stopHeartbeat = () => undefined;
720
+ const beginLease = async () => {
721
+ if (!threadId || !runId) {
722
+ return;
723
+ }
724
+ const claimedAt = new Date().toISOString();
725
+ await this.persistence.claimQueuedRun({
726
+ threadId,
727
+ runId,
728
+ workerId: this.workerId,
729
+ claimedAt,
730
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
731
+ });
732
+ if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
733
+ return;
734
+ }
735
+ const timer = setInterval(() => {
736
+ void this.persistence.renewRunLease({
737
+ runId,
738
+ workerId: this.workerId,
739
+ heartbeatAt: new Date().toISOString(),
740
+ leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
741
+ });
742
+ }, this.concurrencyConfig.heartbeatIntervalMs);
743
+ timer.unref?.();
744
+ stopHeartbeat = () => {
745
+ clearInterval(timer);
746
+ };
747
+ };
748
+ const releaseLease = async () => {
749
+ stopHeartbeat();
750
+ if (runId) {
751
+ await this.persistence.releaseRunClaim(runId);
752
+ }
753
+ };
679
754
  const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
680
755
  if (!maxConcurrentRuns) {
681
- return () => undefined;
756
+ await beginLease();
757
+ return async () => {
758
+ await releaseLease();
759
+ };
682
760
  }
683
761
  if (this.activeRunSlots < maxConcurrentRuns) {
684
762
  this.activeRunSlots += 1;
763
+ await beginLease();
685
764
  let released = false;
686
- return () => {
765
+ return async () => {
687
766
  if (released) {
688
767
  return;
689
768
  }
690
769
  released = true;
770
+ await releaseLease();
691
771
  this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
692
772
  const next = this.pendingRunSlots.shift();
693
- void next?.();
773
+ void next?.activate();
694
774
  };
695
775
  }
696
776
  if (threadId && runId) {
@@ -704,38 +784,57 @@ export class AgentHarnessRuntime {
704
784
  maxConcurrentRuns,
705
785
  });
706
786
  }
707
- await new Promise((resolve, reject) => {
708
- this.pendingRunSlots.push(async () => {
709
- try {
710
- this.activeRunSlots += 1;
711
- if (threadId && runId) {
712
- await this.emit(threadId, runId, 4, "run.dequeued", {
713
- queuePosition: 0,
714
- activeRunCount: this.activeRunSlots,
715
- maxConcurrentRuns,
716
- });
717
- await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
718
- previousState: "queued",
719
- });
787
+ const slotAcquisition = await new Promise((resolve, reject) => {
788
+ this.pendingRunSlots.push({ runId, activate: async () => {
789
+ try {
790
+ const currentRun = runId ? await this.persistence.getRun(runId) : null;
791
+ if (currentRun?.state === "cancelled") {
792
+ resolve("abort");
793
+ return;
794
+ }
795
+ this.activeRunSlots += 1;
796
+ if (threadId && runId) {
797
+ await this.emit(threadId, runId, 4, "run.dequeued", {
798
+ queuePosition: 0,
799
+ activeRunCount: this.activeRunSlots,
800
+ maxConcurrentRuns,
801
+ });
802
+ await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
803
+ previousState: "queued",
804
+ });
805
+ await beginLease();
806
+ }
807
+ resolve("activate");
720
808
  }
721
- resolve();
722
- }
723
- catch (error) {
724
- reject(error);
725
- }
726
- });
809
+ catch (error) {
810
+ reject(error);
811
+ }
812
+ }, abort: () => resolve("abort") });
727
813
  });
814
+ if (slotAcquisition === "abort") {
815
+ return async () => undefined;
816
+ }
728
817
  let released = false;
729
- return () => {
818
+ return async () => {
730
819
  if (released) {
731
820
  return;
732
821
  }
733
822
  released = true;
823
+ await releaseLease();
734
824
  this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
735
825
  const next = this.pendingRunSlots.shift();
736
- void next?.();
826
+ void next?.activate();
737
827
  };
738
828
  }
829
+ dropPendingRunSlot(runId) {
830
+ const index = this.pendingRunSlots.findIndex((entry) => entry.runId === runId);
831
+ if (index < 0) {
832
+ return false;
833
+ }
834
+ const [entry] = this.pendingRunSlots.splice(index, 1);
835
+ entry?.abort();
836
+ return true;
837
+ }
739
838
  async dispatchRunListeners(stream, listeners) {
740
839
  let latestEvent;
741
840
  let latestResult;
@@ -843,7 +942,7 @@ export class AgentHarnessRuntime {
843
942
  });
844
943
  }
845
944
  finally {
846
- releaseRunSlot();
945
+ await releaseRunSlot();
847
946
  }
848
947
  }
849
948
  async *streamEvents(options) {
@@ -1104,7 +1203,7 @@ export class AgentHarnessRuntime {
1104
1203
  }
1105
1204
  finally {
1106
1205
  await this.persistence.clearRunRequest(threadId, runId);
1107
- releaseRunSlot();
1206
+ await releaseRunSlot();
1108
1207
  }
1109
1208
  }
1110
1209
  async resume(options) {
@@ -1125,6 +1224,10 @@ export class AgentHarnessRuntime {
1125
1224
  throw new Error(`Unknown agent ${thread.agentId}`);
1126
1225
  }
1127
1226
  const resumePayload = this.buildResumePayload(binding, approval, options);
1227
+ const cancellation = await this.getRunCancellation(runId);
1228
+ if (cancellation.requested) {
1229
+ return this.finalizeCancelledRun(threadId, runId, thread.status, cancellation.reason);
1230
+ }
1128
1231
  await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
1129
1232
  const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "resuming");
1130
1233
  try {
@@ -1156,6 +1259,10 @@ export class AgentHarnessRuntime {
1156
1259
  try {
1157
1260
  const actual = await this.runtimeAdapter.invoke(binding, "", threadId, runId, resumePayload, priorHistory);
1158
1261
  this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
1262
+ const cancelledAfterInvoke = await this.getRunCancellation(runId);
1263
+ if (cancelledAfterInvoke.requested) {
1264
+ return this.finalizeCancelledRun(threadId, runId, "resuming", cancelledAfterInvoke.reason);
1265
+ }
1159
1266
  await this.persistence.clearRecoveryIntent(threadId, runId);
1160
1267
  const finalized = await this.finalizeContinuedRun(threadId, runId, runInput, actual, {
1161
1268
  previousState: "resuming",
@@ -1174,7 +1281,7 @@ export class AgentHarnessRuntime {
1174
1281
  }
1175
1282
  }
1176
1283
  finally {
1177
- releaseRunSlot();
1284
+ await releaseRunSlot();
1178
1285
  }
1179
1286
  }
1180
1287
  buildResumePayload(binding, approval, options) {
@@ -1249,10 +1356,44 @@ export class AgentHarnessRuntime {
1249
1356
  async stop() {
1250
1357
  await this.close();
1251
1358
  }
1359
+ async cancelRun(options) {
1360
+ const run = await this.persistence.getRun(options.runId);
1361
+ if (!run) {
1362
+ throw new Error(`Unknown run ${options.runId}`);
1363
+ }
1364
+ if (this.isTerminalRunState(run.state)) {
1365
+ return {
1366
+ threadId: run.threadId,
1367
+ runId: run.runId,
1368
+ agentId: run.agentId,
1369
+ state: run.state,
1370
+ output: run.state,
1371
+ };
1372
+ }
1373
+ await this.persistence.requestRunCancel(run.runId, options.reason);
1374
+ if (run.state === "queued" || run.state === "waiting_for_approval" || run.state === "claimed") {
1375
+ if (run.state === "queued") {
1376
+ this.dropPendingRunSlot(run.runId);
1377
+ }
1378
+ return this.finalizeCancelledRun(run.threadId, run.runId, run.state, options.reason);
1379
+ }
1380
+ await this.setRunStateAndEmit(run.threadId, run.runId, 103, "cancelling", {
1381
+ previousState: run.state,
1382
+ ...(options.reason ? { error: options.reason } : {}),
1383
+ });
1384
+ return {
1385
+ threadId: run.threadId,
1386
+ runId: run.runId,
1387
+ agentId: run.agentId,
1388
+ state: "cancelling",
1389
+ output: options.reason ? `cancelling: ${options.reason}` : "cancelling",
1390
+ };
1391
+ }
1252
1392
  async recoverStartupRuns() {
1253
1393
  if (!this.recoveryConfig.enabled) {
1254
1394
  return;
1255
1395
  }
1396
+ await this.reclaimExpiredClaimedRuns();
1256
1397
  const threads = await this.persistence.listSessions();
1257
1398
  for (const thread of threads) {
1258
1399
  if (thread.status === "queued") {
@@ -1269,7 +1410,7 @@ export class AgentHarnessRuntime {
1269
1410
  });
1270
1411
  continue;
1271
1412
  }
1272
- const releaseRunSlot = await this.acquireRunSlot();
1413
+ const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId);
1273
1414
  try {
1274
1415
  await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
1275
1416
  context: request.invocation?.context,
@@ -1281,21 +1422,38 @@ export class AgentHarnessRuntime {
1281
1422
  });
1282
1423
  }
1283
1424
  finally {
1284
- releaseRunSlot();
1425
+ await releaseRunSlot();
1285
1426
  }
1286
1427
  continue;
1287
1428
  }
1288
1429
  if (thread.status === "running") {
1430
+ const isStale = await this.isStaleRunningRun(thread);
1431
+ if (!isStale) {
1432
+ continue;
1433
+ }
1289
1434
  const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
1290
1435
  const binding = this.workspace.bindings.get(runMeta.agentId);
1291
- if (!binding || !this.supportsRunningReplay(binding)) {
1436
+ if (!binding) {
1437
+ continue;
1438
+ }
1439
+ if (!this.supportsRunningReplay(binding)) {
1440
+ await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
1441
+ previousState: "running",
1442
+ error: "stale running run cannot be replayed safely",
1443
+ });
1444
+ await this.persistence.releaseRunClaim(thread.latestRunId);
1292
1445
  continue;
1293
1446
  }
1294
1447
  const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
1295
1448
  if (!request) {
1449
+ await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
1450
+ previousState: "running",
1451
+ error: "missing persisted run request for stale running run recovery",
1452
+ });
1453
+ await this.persistence.releaseRunClaim(thread.latestRunId);
1296
1454
  continue;
1297
1455
  }
1298
- const releaseRunSlot = await this.acquireRunSlot();
1456
+ const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId, "running");
1299
1457
  try {
1300
1458
  await this.emit(thread.threadId, thread.latestRunId, 100, "run.resumed", {
1301
1459
  resumeKind: "startup-running-recovery",
@@ -1311,7 +1469,7 @@ export class AgentHarnessRuntime {
1311
1469
  });
1312
1470
  }
1313
1471
  finally {
1314
- releaseRunSlot();
1472
+ await releaseRunSlot();
1315
1473
  }
1316
1474
  continue;
1317
1475
  }
@@ -1369,5 +1527,49 @@ export class AgentHarnessRuntime {
1369
1527
  }
1370
1528
  }
1371
1529
  }
1530
+ async reclaimExpiredClaimedRuns(nowIso = new Date().toISOString()) {
1531
+ const expiredClaims = await this.persistence.listExpiredClaimedRuns(nowIso);
1532
+ for (const claim of expiredClaims) {
1533
+ const thread = await this.persistence.getSession(claim.threadId);
1534
+ if (!thread) {
1535
+ await this.persistence.releaseRunClaim(claim.runId);
1536
+ continue;
1537
+ }
1538
+ const lifecycle = await this.persistence.getRunLifecycle(claim.threadId, claim.runId);
1539
+ if (lifecycle.state === "claimed") {
1540
+ await this.persistence.enqueueRun({
1541
+ threadId: claim.threadId,
1542
+ runId: claim.runId,
1543
+ priority: claim.priority,
1544
+ queueKey: claim.queueKey,
1545
+ availableAt: nowIso,
1546
+ });
1547
+ await this.setRunStateAndEmit(claim.threadId, claim.runId, 99, "queued", {
1548
+ previousState: "claimed",
1549
+ });
1550
+ await this.emit(claim.threadId, claim.runId, 100, "run.queued", {
1551
+ queuePosition: 0,
1552
+ activeRunCount: this.activeRunSlots,
1553
+ maxConcurrentRuns: this.concurrencyConfig.maxConcurrentRuns,
1554
+ recoveredOnStartup: true,
1555
+ reclaimReason: "expired-lease",
1556
+ });
1557
+ continue;
1558
+ }
1559
+ await this.persistence.releaseRunClaim(claim.runId);
1560
+ }
1561
+ }
1562
+ async isStaleRunningRun(thread, nowMs = Date.now()) {
1563
+ const control = await this.persistence.getRunControl(thread.latestRunId);
1564
+ const heartbeatAt = control?.heartbeatAt;
1565
+ if (!heartbeatAt) {
1566
+ return true;
1567
+ }
1568
+ const heartbeatAtMs = Date.parse(heartbeatAt);
1569
+ if (!Number.isFinite(heartbeatAtMs)) {
1570
+ return true;
1571
+ }
1572
+ return nowMs - heartbeatAtMs >= this.concurrencyConfig.heartbeatTimeoutMs;
1573
+ }
1372
1574
  }
1373
1575
  export { AgentHarnessRuntime as AgentHarness };
@@ -422,7 +422,7 @@ export class HealthMonitor {
422
422
  }
423
423
  countStuckRuns(runs, nowMs) {
424
424
  return runs.filter((run) => {
425
- if (!["running", "resuming", "queued"].includes(run.state)) {
425
+ if (!["claimed", "running", "resuming", "queued", "cancelling"].includes(run.state)) {
426
426
  return false;
427
427
  }
428
428
  const updatedAtMs = Date.parse(run.updatedAt);
@@ -91,9 +91,11 @@ export async function maintainSqliteRuntimeRecords(dbPath, config, nowMs = Date.
91
91
  "DELETE FROM artifacts WHERE thread_id = ?",
92
92
  "DELETE FROM approvals WHERE thread_id = ?",
93
93
  "DELETE FROM events WHERE thread_id = ?",
94
+ "DELETE FROM run_queue WHERE thread_id = ?",
94
95
  "DELETE FROM run_requests WHERE thread_id = ?",
95
96
  "DELETE FROM recovery_intents WHERE thread_id = ?",
96
97
  "DELETE FROM thread_messages WHERE thread_id = ?",
98
+ "DELETE FROM run_control WHERE run_id IN (SELECT run_id FROM runs WHERE thread_id = ?)",
97
99
  "DELETE FROM runs WHERE thread_id = ?",
98
100
  "DELETE FROM threads WHERE thread_id = ?",
99
101
  ].map((sql) => ({ sql, args: [threadId] })), "write");