@botbotgo/agent-harness 0.0.80 → 0.0.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.d.ts +2 -1
- package/dist/api.js +3 -0
- package/dist/contracts/types.d.ts +5 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/persistence/file-store.d.ts +32 -4
- package/dist/persistence/file-store.js +197 -6
- package/dist/persistence/sqlite-store.d.ts +32 -4
- package/dist/persistence/sqlite-store.js +174 -9
- package/dist/persistence/types.d.ts +64 -3
- package/dist/runtime/agent-runtime-adapter.d.ts +5 -0
- package/dist/runtime/agent-runtime-adapter.js +93 -19
- package/dist/runtime/harness.d.ts +9 -1
- package/dist/runtime/harness.js +267 -65
- package/dist/runtime/health-monitor.js +1 -1
- package/dist/runtime/runtime-record-maintenance.js +2 -0
- package/dist/workspace/object-loader.js +133 -7
- package/dist/workspace/support/workspace-ref-utils.d.ts +3 -0
- package/dist/workspace/support/workspace-ref-utils.js +30 -1
- package/package.json +2 -2
package/dist/runtime/harness.js
CHANGED
|
@@ -44,6 +44,7 @@ export class AgentHarnessRuntime {
|
|
|
44
44
|
healthMonitor;
|
|
45
45
|
recoveryConfig;
|
|
46
46
|
concurrencyConfig;
|
|
47
|
+
workerId = `worker-${createPersistentId()}`;
|
|
47
48
|
activeRunSlots = 0;
|
|
48
49
|
pendingRunSlots = [];
|
|
49
50
|
runtimeEventSequence = 0;
|
|
@@ -98,6 +99,13 @@ export class AgentHarnessRuntime {
|
|
|
98
99
|
}
|
|
99
100
|
async resolveSelectedAgentId(input, requestedAgentId, threadId) {
|
|
100
101
|
if (!requestedAgentId || requestedAgentId === AUTO_AGENT_ID) {
|
|
102
|
+
if (threadId) {
|
|
103
|
+
const thread = await this.getSession(threadId);
|
|
104
|
+
const threadBinding = thread ? this.workspace.bindings.get(thread.agentId) : undefined;
|
|
105
|
+
if (thread?.agentId && threadBinding?.harnessRuntime.hostFacing !== false) {
|
|
106
|
+
return thread.agentId;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
101
109
|
return this.routeAgent(input, { threadId });
|
|
102
110
|
}
|
|
103
111
|
return requestedAgentId;
|
|
@@ -253,26 +261,10 @@ export class AgentHarnessRuntime {
|
|
|
253
261
|
return tools.every((tool) => tool.retryable === true);
|
|
254
262
|
}
|
|
255
263
|
async listThreads(filter) {
|
|
256
|
-
|
|
257
|
-
if (!filter?.agentId) {
|
|
258
|
-
return threadSummaries;
|
|
259
|
-
}
|
|
260
|
-
return threadSummaries.filter((thread) => thread.agentId === filter.agentId);
|
|
264
|
+
return this.persistence.listSessions(filter);
|
|
261
265
|
}
|
|
262
266
|
async listRuns(filter) {
|
|
263
|
-
|
|
264
|
-
return runs.filter((run) => {
|
|
265
|
-
if (filter?.agentId && run.agentId !== filter.agentId) {
|
|
266
|
-
return false;
|
|
267
|
-
}
|
|
268
|
-
if (filter?.threadId && run.threadId !== filter.threadId) {
|
|
269
|
-
return false;
|
|
270
|
-
}
|
|
271
|
-
if (filter?.state && run.state !== filter.state) {
|
|
272
|
-
return false;
|
|
273
|
-
}
|
|
274
|
-
return true;
|
|
275
|
-
});
|
|
267
|
+
return this.persistence.listRuns(filter);
|
|
276
268
|
}
|
|
277
269
|
async getRun(runId) {
|
|
278
270
|
return this.persistence.getRun(runId);
|
|
@@ -316,21 +308,8 @@ export class AgentHarnessRuntime {
|
|
|
316
308
|
};
|
|
317
309
|
}
|
|
318
310
|
async listApprovals(filter) {
|
|
319
|
-
const approvals =
|
|
320
|
-
|
|
321
|
-
: await this.persistence.listApprovals();
|
|
322
|
-
return approvals.filter((approval) => {
|
|
323
|
-
if (filter?.status && approval.status !== filter.status) {
|
|
324
|
-
return false;
|
|
325
|
-
}
|
|
326
|
-
if (filter?.threadId && approval.threadId !== filter.threadId) {
|
|
327
|
-
return false;
|
|
328
|
-
}
|
|
329
|
-
if (filter?.runId && approval.runId !== filter.runId) {
|
|
330
|
-
return false;
|
|
331
|
-
}
|
|
332
|
-
return true;
|
|
333
|
-
}).map((approval) => this.toPublicApprovalRecord(approval));
|
|
311
|
+
const approvals = await this.persistence.listApprovals(filter);
|
|
312
|
+
return approvals.map((approval) => this.toPublicApprovalRecord(approval));
|
|
334
313
|
}
|
|
335
314
|
async getApproval(approvalId) {
|
|
336
315
|
const approval = await this.persistence.getApproval(approvalId);
|
|
@@ -480,6 +459,46 @@ export class AgentHarnessRuntime {
|
|
|
480
459
|
createdAt: new Date().toISOString(),
|
|
481
460
|
});
|
|
482
461
|
}
|
|
462
|
+
async getRunCancellation(runId) {
|
|
463
|
+
const control = await this.persistence.getRunControl(runId);
|
|
464
|
+
return {
|
|
465
|
+
requested: control?.cancelRequested === true,
|
|
466
|
+
...(control?.cancelReason ? { reason: control.cancelReason } : {}),
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
async expirePendingApprovals(threadId, runId) {
|
|
470
|
+
const approvals = await this.persistence.getRunApprovals(threadId, runId);
|
|
471
|
+
for (const approval of approvals) {
|
|
472
|
+
if (approval.status !== "pending") {
|
|
473
|
+
continue;
|
|
474
|
+
}
|
|
475
|
+
await this.persistence.resolveApproval(threadId, runId, approval.approvalId, "expired");
|
|
476
|
+
await this.emit(threadId, runId, 6, "approval.resolved", {
|
|
477
|
+
approvalId: approval.approvalId,
|
|
478
|
+
pendingActionId: approval.pendingActionId,
|
|
479
|
+
decision: "cancel",
|
|
480
|
+
toolName: approval.toolName,
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
async finalizeCancelledRun(threadId, runId, previousState, reason) {
|
|
485
|
+
await this.expirePendingApprovals(threadId, runId);
|
|
486
|
+
await this.persistence.releaseRunClaim(runId);
|
|
487
|
+
await this.persistence.clearRunCancel(runId);
|
|
488
|
+
await this.persistence.clearRunRequest(threadId, runId);
|
|
489
|
+
await this.setRunStateAndEmit(threadId, runId, 104, "cancelled", {
|
|
490
|
+
previousState,
|
|
491
|
+
...(reason ? { error: reason } : {}),
|
|
492
|
+
});
|
|
493
|
+
const runMeta = await this.persistence.getRunMeta(threadId, runId);
|
|
494
|
+
return {
|
|
495
|
+
threadId,
|
|
496
|
+
runId,
|
|
497
|
+
agentId: runMeta.agentId,
|
|
498
|
+
state: "cancelled",
|
|
499
|
+
output: reason ? `cancelled: ${reason}` : "cancelled",
|
|
500
|
+
};
|
|
501
|
+
}
|
|
483
502
|
async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
|
|
484
503
|
const priorHistory = await this.loadPriorHistory(threadId, runId);
|
|
485
504
|
const startedAt = Date.now();
|
|
@@ -514,6 +533,20 @@ export class AgentHarnessRuntime {
|
|
|
514
533
|
}
|
|
515
534
|
async executeQueuedRun(binding, input, threadId, runId, agentId, options = {}) {
|
|
516
535
|
const previousState = options.previousState ?? "running";
|
|
536
|
+
const currentRun = await this.persistence.getRun(runId);
|
|
537
|
+
if (currentRun?.state === "cancelled") {
|
|
538
|
+
return {
|
|
539
|
+
threadId,
|
|
540
|
+
runId,
|
|
541
|
+
agentId,
|
|
542
|
+
state: "cancelled",
|
|
543
|
+
output: "cancelled",
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
const cancellation = await this.getRunCancellation(runId);
|
|
547
|
+
if (cancellation.requested) {
|
|
548
|
+
return this.finalizeCancelledRun(threadId, runId, previousState, cancellation.reason);
|
|
549
|
+
}
|
|
517
550
|
if (previousState === "queued") {
|
|
518
551
|
await this.emit(threadId, runId, 101, "run.dequeued", {
|
|
519
552
|
queuePosition: 0,
|
|
@@ -531,6 +564,10 @@ export class AgentHarnessRuntime {
|
|
|
531
564
|
state: options.state,
|
|
532
565
|
files: options.files,
|
|
533
566
|
});
|
|
567
|
+
const cancelledAfterInvoke = await this.getRunCancellation(runId);
|
|
568
|
+
if (cancelledAfterInvoke.requested) {
|
|
569
|
+
return this.finalizeCancelledRun(threadId, runId, previousState === "queued" ? "running" : previousState, cancelledAfterInvoke.reason);
|
|
570
|
+
}
|
|
534
571
|
const finalized = await this.finalizeContinuedRun(threadId, runId, input, actual, {
|
|
535
572
|
previousState: previousState === "queued" ? "running" : previousState,
|
|
536
573
|
stateSequence: options.stateSequence ?? 103,
|
|
@@ -676,21 +713,64 @@ export class AgentHarnessRuntime {
|
|
|
676
713
|
await listener(value);
|
|
677
714
|
}
|
|
678
715
|
async acquireRunSlot(threadId, runId, activeState = "running") {
|
|
716
|
+
if (threadId && runId) {
|
|
717
|
+
await this.persistence.enqueueRun({ threadId, runId });
|
|
718
|
+
}
|
|
719
|
+
let stopHeartbeat = () => undefined;
|
|
720
|
+
const beginLease = async () => {
|
|
721
|
+
if (!threadId || !runId) {
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
const claimedAt = new Date().toISOString();
|
|
725
|
+
await this.persistence.claimQueuedRun({
|
|
726
|
+
threadId,
|
|
727
|
+
runId,
|
|
728
|
+
workerId: this.workerId,
|
|
729
|
+
claimedAt,
|
|
730
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
731
|
+
});
|
|
732
|
+
if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
|
|
733
|
+
return;
|
|
734
|
+
}
|
|
735
|
+
const timer = setInterval(() => {
|
|
736
|
+
void this.persistence.renewRunLease({
|
|
737
|
+
runId,
|
|
738
|
+
workerId: this.workerId,
|
|
739
|
+
heartbeatAt: new Date().toISOString(),
|
|
740
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
741
|
+
});
|
|
742
|
+
}, this.concurrencyConfig.heartbeatIntervalMs);
|
|
743
|
+
timer.unref?.();
|
|
744
|
+
stopHeartbeat = () => {
|
|
745
|
+
clearInterval(timer);
|
|
746
|
+
};
|
|
747
|
+
};
|
|
748
|
+
const releaseLease = async () => {
|
|
749
|
+
stopHeartbeat();
|
|
750
|
+
if (runId) {
|
|
751
|
+
await this.persistence.releaseRunClaim(runId);
|
|
752
|
+
}
|
|
753
|
+
};
|
|
679
754
|
const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
|
|
680
755
|
if (!maxConcurrentRuns) {
|
|
681
|
-
|
|
756
|
+
await beginLease();
|
|
757
|
+
return async () => {
|
|
758
|
+
await releaseLease();
|
|
759
|
+
};
|
|
682
760
|
}
|
|
683
761
|
if (this.activeRunSlots < maxConcurrentRuns) {
|
|
684
762
|
this.activeRunSlots += 1;
|
|
763
|
+
await beginLease();
|
|
685
764
|
let released = false;
|
|
686
|
-
return () => {
|
|
765
|
+
return async () => {
|
|
687
766
|
if (released) {
|
|
688
767
|
return;
|
|
689
768
|
}
|
|
690
769
|
released = true;
|
|
770
|
+
await releaseLease();
|
|
691
771
|
this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
|
|
692
772
|
const next = this.pendingRunSlots.shift();
|
|
693
|
-
void next?.();
|
|
773
|
+
void next?.activate();
|
|
694
774
|
};
|
|
695
775
|
}
|
|
696
776
|
if (threadId && runId) {
|
|
@@ -704,38 +784,57 @@ export class AgentHarnessRuntime {
|
|
|
704
784
|
maxConcurrentRuns,
|
|
705
785
|
});
|
|
706
786
|
}
|
|
707
|
-
await new Promise((resolve, reject) => {
|
|
708
|
-
this.pendingRunSlots.push(async () => {
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
787
|
+
const slotAcquisition = await new Promise((resolve, reject) => {
|
|
788
|
+
this.pendingRunSlots.push({ runId, activate: async () => {
|
|
789
|
+
try {
|
|
790
|
+
const currentRun = runId ? await this.persistence.getRun(runId) : null;
|
|
791
|
+
if (currentRun?.state === "cancelled") {
|
|
792
|
+
resolve("abort");
|
|
793
|
+
return;
|
|
794
|
+
}
|
|
795
|
+
this.activeRunSlots += 1;
|
|
796
|
+
if (threadId && runId) {
|
|
797
|
+
await this.emit(threadId, runId, 4, "run.dequeued", {
|
|
798
|
+
queuePosition: 0,
|
|
799
|
+
activeRunCount: this.activeRunSlots,
|
|
800
|
+
maxConcurrentRuns,
|
|
801
|
+
});
|
|
802
|
+
await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
|
|
803
|
+
previousState: "queued",
|
|
804
|
+
});
|
|
805
|
+
await beginLease();
|
|
806
|
+
}
|
|
807
|
+
resolve("activate");
|
|
720
808
|
}
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
}
|
|
726
|
-
});
|
|
809
|
+
catch (error) {
|
|
810
|
+
reject(error);
|
|
811
|
+
}
|
|
812
|
+
}, abort: () => resolve("abort") });
|
|
727
813
|
});
|
|
814
|
+
if (slotAcquisition === "abort") {
|
|
815
|
+
return async () => undefined;
|
|
816
|
+
}
|
|
728
817
|
let released = false;
|
|
729
|
-
return () => {
|
|
818
|
+
return async () => {
|
|
730
819
|
if (released) {
|
|
731
820
|
return;
|
|
732
821
|
}
|
|
733
822
|
released = true;
|
|
823
|
+
await releaseLease();
|
|
734
824
|
this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
|
|
735
825
|
const next = this.pendingRunSlots.shift();
|
|
736
|
-
void next?.();
|
|
826
|
+
void next?.activate();
|
|
737
827
|
};
|
|
738
828
|
}
|
|
829
|
+
dropPendingRunSlot(runId) {
|
|
830
|
+
const index = this.pendingRunSlots.findIndex((entry) => entry.runId === runId);
|
|
831
|
+
if (index < 0) {
|
|
832
|
+
return false;
|
|
833
|
+
}
|
|
834
|
+
const [entry] = this.pendingRunSlots.splice(index, 1);
|
|
835
|
+
entry?.abort();
|
|
836
|
+
return true;
|
|
837
|
+
}
|
|
739
838
|
async dispatchRunListeners(stream, listeners) {
|
|
740
839
|
let latestEvent;
|
|
741
840
|
let latestResult;
|
|
@@ -843,7 +942,7 @@ export class AgentHarnessRuntime {
|
|
|
843
942
|
});
|
|
844
943
|
}
|
|
845
944
|
finally {
|
|
846
|
-
releaseRunSlot();
|
|
945
|
+
await releaseRunSlot();
|
|
847
946
|
}
|
|
848
947
|
}
|
|
849
948
|
async *streamEvents(options) {
|
|
@@ -1104,7 +1203,7 @@ export class AgentHarnessRuntime {
|
|
|
1104
1203
|
}
|
|
1105
1204
|
finally {
|
|
1106
1205
|
await this.persistence.clearRunRequest(threadId, runId);
|
|
1107
|
-
releaseRunSlot();
|
|
1206
|
+
await releaseRunSlot();
|
|
1108
1207
|
}
|
|
1109
1208
|
}
|
|
1110
1209
|
async resume(options) {
|
|
@@ -1125,6 +1224,10 @@ export class AgentHarnessRuntime {
|
|
|
1125
1224
|
throw new Error(`Unknown agent ${thread.agentId}`);
|
|
1126
1225
|
}
|
|
1127
1226
|
const resumePayload = this.buildResumePayload(binding, approval, options);
|
|
1227
|
+
const cancellation = await this.getRunCancellation(runId);
|
|
1228
|
+
if (cancellation.requested) {
|
|
1229
|
+
return this.finalizeCancelledRun(threadId, runId, thread.status, cancellation.reason);
|
|
1230
|
+
}
|
|
1128
1231
|
await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
|
|
1129
1232
|
const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "resuming");
|
|
1130
1233
|
try {
|
|
@@ -1156,6 +1259,10 @@ export class AgentHarnessRuntime {
|
|
|
1156
1259
|
try {
|
|
1157
1260
|
const actual = await this.runtimeAdapter.invoke(binding, "", threadId, runId, resumePayload, priorHistory);
|
|
1158
1261
|
this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
|
|
1262
|
+
const cancelledAfterInvoke = await this.getRunCancellation(runId);
|
|
1263
|
+
if (cancelledAfterInvoke.requested) {
|
|
1264
|
+
return this.finalizeCancelledRun(threadId, runId, "resuming", cancelledAfterInvoke.reason);
|
|
1265
|
+
}
|
|
1159
1266
|
await this.persistence.clearRecoveryIntent(threadId, runId);
|
|
1160
1267
|
const finalized = await this.finalizeContinuedRun(threadId, runId, runInput, actual, {
|
|
1161
1268
|
previousState: "resuming",
|
|
@@ -1174,7 +1281,7 @@ export class AgentHarnessRuntime {
|
|
|
1174
1281
|
}
|
|
1175
1282
|
}
|
|
1176
1283
|
finally {
|
|
1177
|
-
releaseRunSlot();
|
|
1284
|
+
await releaseRunSlot();
|
|
1178
1285
|
}
|
|
1179
1286
|
}
|
|
1180
1287
|
buildResumePayload(binding, approval, options) {
|
|
@@ -1249,10 +1356,44 @@ export class AgentHarnessRuntime {
|
|
|
1249
1356
|
async stop() {
|
|
1250
1357
|
await this.close();
|
|
1251
1358
|
}
|
|
1359
|
+
async cancelRun(options) {
|
|
1360
|
+
const run = await this.persistence.getRun(options.runId);
|
|
1361
|
+
if (!run) {
|
|
1362
|
+
throw new Error(`Unknown run ${options.runId}`);
|
|
1363
|
+
}
|
|
1364
|
+
if (this.isTerminalRunState(run.state)) {
|
|
1365
|
+
return {
|
|
1366
|
+
threadId: run.threadId,
|
|
1367
|
+
runId: run.runId,
|
|
1368
|
+
agentId: run.agentId,
|
|
1369
|
+
state: run.state,
|
|
1370
|
+
output: run.state,
|
|
1371
|
+
};
|
|
1372
|
+
}
|
|
1373
|
+
await this.persistence.requestRunCancel(run.runId, options.reason);
|
|
1374
|
+
if (run.state === "queued" || run.state === "waiting_for_approval" || run.state === "claimed") {
|
|
1375
|
+
if (run.state === "queued") {
|
|
1376
|
+
this.dropPendingRunSlot(run.runId);
|
|
1377
|
+
}
|
|
1378
|
+
return this.finalizeCancelledRun(run.threadId, run.runId, run.state, options.reason);
|
|
1379
|
+
}
|
|
1380
|
+
await this.setRunStateAndEmit(run.threadId, run.runId, 103, "cancelling", {
|
|
1381
|
+
previousState: run.state,
|
|
1382
|
+
...(options.reason ? { error: options.reason } : {}),
|
|
1383
|
+
});
|
|
1384
|
+
return {
|
|
1385
|
+
threadId: run.threadId,
|
|
1386
|
+
runId: run.runId,
|
|
1387
|
+
agentId: run.agentId,
|
|
1388
|
+
state: "cancelling",
|
|
1389
|
+
output: options.reason ? `cancelling: ${options.reason}` : "cancelling",
|
|
1390
|
+
};
|
|
1391
|
+
}
|
|
1252
1392
|
async recoverStartupRuns() {
|
|
1253
1393
|
if (!this.recoveryConfig.enabled) {
|
|
1254
1394
|
return;
|
|
1255
1395
|
}
|
|
1396
|
+
await this.reclaimExpiredClaimedRuns();
|
|
1256
1397
|
const threads = await this.persistence.listSessions();
|
|
1257
1398
|
for (const thread of threads) {
|
|
1258
1399
|
if (thread.status === "queued") {
|
|
@@ -1269,7 +1410,7 @@ export class AgentHarnessRuntime {
|
|
|
1269
1410
|
});
|
|
1270
1411
|
continue;
|
|
1271
1412
|
}
|
|
1272
|
-
const releaseRunSlot = await this.acquireRunSlot();
|
|
1413
|
+
const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId);
|
|
1273
1414
|
try {
|
|
1274
1415
|
await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
|
|
1275
1416
|
context: request.invocation?.context,
|
|
@@ -1281,21 +1422,38 @@ export class AgentHarnessRuntime {
|
|
|
1281
1422
|
});
|
|
1282
1423
|
}
|
|
1283
1424
|
finally {
|
|
1284
|
-
releaseRunSlot();
|
|
1425
|
+
await releaseRunSlot();
|
|
1285
1426
|
}
|
|
1286
1427
|
continue;
|
|
1287
1428
|
}
|
|
1288
1429
|
if (thread.status === "running") {
|
|
1430
|
+
const isStale = await this.isStaleRunningRun(thread);
|
|
1431
|
+
if (!isStale) {
|
|
1432
|
+
continue;
|
|
1433
|
+
}
|
|
1289
1434
|
const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
|
|
1290
1435
|
const binding = this.workspace.bindings.get(runMeta.agentId);
|
|
1291
|
-
if (!binding
|
|
1436
|
+
if (!binding) {
|
|
1437
|
+
continue;
|
|
1438
|
+
}
|
|
1439
|
+
if (!this.supportsRunningReplay(binding)) {
|
|
1440
|
+
await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
|
|
1441
|
+
previousState: "running",
|
|
1442
|
+
error: "stale running run cannot be replayed safely",
|
|
1443
|
+
});
|
|
1444
|
+
await this.persistence.releaseRunClaim(thread.latestRunId);
|
|
1292
1445
|
continue;
|
|
1293
1446
|
}
|
|
1294
1447
|
const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
|
|
1295
1448
|
if (!request) {
|
|
1449
|
+
await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
|
|
1450
|
+
previousState: "running",
|
|
1451
|
+
error: "missing persisted run request for stale running run recovery",
|
|
1452
|
+
});
|
|
1453
|
+
await this.persistence.releaseRunClaim(thread.latestRunId);
|
|
1296
1454
|
continue;
|
|
1297
1455
|
}
|
|
1298
|
-
const releaseRunSlot = await this.acquireRunSlot();
|
|
1456
|
+
const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId, "running");
|
|
1299
1457
|
try {
|
|
1300
1458
|
await this.emit(thread.threadId, thread.latestRunId, 100, "run.resumed", {
|
|
1301
1459
|
resumeKind: "startup-running-recovery",
|
|
@@ -1311,7 +1469,7 @@ export class AgentHarnessRuntime {
|
|
|
1311
1469
|
});
|
|
1312
1470
|
}
|
|
1313
1471
|
finally {
|
|
1314
|
-
releaseRunSlot();
|
|
1472
|
+
await releaseRunSlot();
|
|
1315
1473
|
}
|
|
1316
1474
|
continue;
|
|
1317
1475
|
}
|
|
@@ -1369,5 +1527,49 @@ export class AgentHarnessRuntime {
|
|
|
1369
1527
|
}
|
|
1370
1528
|
}
|
|
1371
1529
|
}
|
|
1530
|
+
async reclaimExpiredClaimedRuns(nowIso = new Date().toISOString()) {
|
|
1531
|
+
const expiredClaims = await this.persistence.listExpiredClaimedRuns(nowIso);
|
|
1532
|
+
for (const claim of expiredClaims) {
|
|
1533
|
+
const thread = await this.persistence.getSession(claim.threadId);
|
|
1534
|
+
if (!thread) {
|
|
1535
|
+
await this.persistence.releaseRunClaim(claim.runId);
|
|
1536
|
+
continue;
|
|
1537
|
+
}
|
|
1538
|
+
const lifecycle = await this.persistence.getRunLifecycle(claim.threadId, claim.runId);
|
|
1539
|
+
if (lifecycle.state === "claimed") {
|
|
1540
|
+
await this.persistence.enqueueRun({
|
|
1541
|
+
threadId: claim.threadId,
|
|
1542
|
+
runId: claim.runId,
|
|
1543
|
+
priority: claim.priority,
|
|
1544
|
+
queueKey: claim.queueKey,
|
|
1545
|
+
availableAt: nowIso,
|
|
1546
|
+
});
|
|
1547
|
+
await this.setRunStateAndEmit(claim.threadId, claim.runId, 99, "queued", {
|
|
1548
|
+
previousState: "claimed",
|
|
1549
|
+
});
|
|
1550
|
+
await this.emit(claim.threadId, claim.runId, 100, "run.queued", {
|
|
1551
|
+
queuePosition: 0,
|
|
1552
|
+
activeRunCount: this.activeRunSlots,
|
|
1553
|
+
maxConcurrentRuns: this.concurrencyConfig.maxConcurrentRuns,
|
|
1554
|
+
recoveredOnStartup: true,
|
|
1555
|
+
reclaimReason: "expired-lease",
|
|
1556
|
+
});
|
|
1557
|
+
continue;
|
|
1558
|
+
}
|
|
1559
|
+
await this.persistence.releaseRunClaim(claim.runId);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
async isStaleRunningRun(thread, nowMs = Date.now()) {
|
|
1563
|
+
const control = await this.persistence.getRunControl(thread.latestRunId);
|
|
1564
|
+
const heartbeatAt = control?.heartbeatAt;
|
|
1565
|
+
if (!heartbeatAt) {
|
|
1566
|
+
return true;
|
|
1567
|
+
}
|
|
1568
|
+
const heartbeatAtMs = Date.parse(heartbeatAt);
|
|
1569
|
+
if (!Number.isFinite(heartbeatAtMs)) {
|
|
1570
|
+
return true;
|
|
1571
|
+
}
|
|
1572
|
+
return nowMs - heartbeatAtMs >= this.concurrencyConfig.heartbeatTimeoutMs;
|
|
1573
|
+
}
|
|
1372
1574
|
}
|
|
1373
1575
|
export { AgentHarnessRuntime as AgentHarness };
|
|
@@ -422,7 +422,7 @@ export class HealthMonitor {
|
|
|
422
422
|
}
|
|
423
423
|
countStuckRuns(runs, nowMs) {
|
|
424
424
|
return runs.filter((run) => {
|
|
425
|
-
if (!["running", "resuming", "queued"].includes(run.state)) {
|
|
425
|
+
if (!["claimed", "running", "resuming", "queued", "cancelling"].includes(run.state)) {
|
|
426
426
|
return false;
|
|
427
427
|
}
|
|
428
428
|
const updatedAtMs = Date.parse(run.updatedAt);
|
|
@@ -91,9 +91,11 @@ export async function maintainSqliteRuntimeRecords(dbPath, config, nowMs = Date.
|
|
|
91
91
|
"DELETE FROM artifacts WHERE thread_id = ?",
|
|
92
92
|
"DELETE FROM approvals WHERE thread_id = ?",
|
|
93
93
|
"DELETE FROM events WHERE thread_id = ?",
|
|
94
|
+
"DELETE FROM run_queue WHERE thread_id = ?",
|
|
94
95
|
"DELETE FROM run_requests WHERE thread_id = ?",
|
|
95
96
|
"DELETE FROM recovery_intents WHERE thread_id = ?",
|
|
96
97
|
"DELETE FROM thread_messages WHERE thread_id = ?",
|
|
98
|
+
"DELETE FROM run_control WHERE run_id IN (SELECT run_id FROM runs WHERE thread_id = ?)",
|
|
97
99
|
"DELETE FROM runs WHERE thread_id = ?",
|
|
98
100
|
"DELETE FROM threads WHERE thread_id = ?",
|
|
99
101
|
].map((sql) => ({ sql, args: [threadId] })), "write");
|