@botbotgo/agent-harness 0.0.79 → 0.0.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -2
- package/README.zh.md +7 -2
- package/dist/api.d.ts +2 -1
- package/dist/api.js +3 -0
- package/dist/contracts/types.d.ts +5 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/persistence/file-store.d.ts +29 -1
- package/dist/persistence/file-store.js +164 -0
- package/dist/persistence/sqlite-store.d.ts +29 -1
- package/dist/persistence/sqlite-store.js +143 -1
- package/dist/persistence/types.d.ts +48 -0
- package/dist/runtime/agent-runtime-adapter.js +9 -1
- package/dist/runtime/harness.d.ts +9 -1
- package/dist/runtime/harness.js +263 -32
- package/dist/runtime/health-monitor.js +1 -1
- package/dist/runtime/runtime-record-maintenance.js +2 -0
- package/dist/workspace/object-loader.js +133 -7
- package/dist/workspace/support/workspace-ref-utils.d.ts +3 -0
- package/dist/workspace/support/workspace-ref-utils.js +30 -1
- package/package.json +2 -2
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ApprovalRecord, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, MessageContent, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, ResumeOptions, RunOptions, RunResult, RunSummary, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
|
|
1
|
+
import type { ApprovalRecord, CancelOptions, HarnessEvent, HarnessStreamItem, RuntimeHealthSnapshot, MessageContent, RunRecord, RunStartOptions, RestartConversationOptions, RuntimeAdapterOptions, ResumeOptions, RunOptions, RunResult, RunSummary, ThreadSummary, ThreadRecord, WorkspaceBundle } from "../contracts/types.js";
|
|
2
2
|
import { type ToolMcpServerOptions } from "../mcp.js";
|
|
3
3
|
import { type InventoryAgentRecord, type InventorySkillRecord } from "./inventory.js";
|
|
4
4
|
import type { RequirementAssessmentOptions } from "./skill-requirements.js";
|
|
@@ -27,6 +27,7 @@ export declare class AgentHarnessRuntime {
|
|
|
27
27
|
private readonly healthMonitor;
|
|
28
28
|
private readonly recoveryConfig;
|
|
29
29
|
private readonly concurrencyConfig;
|
|
30
|
+
private readonly workerId;
|
|
30
31
|
private activeRunSlots;
|
|
31
32
|
private readonly pendingRunSlots;
|
|
32
33
|
private runtimeEventSequence;
|
|
@@ -84,6 +85,9 @@ export declare class AgentHarnessRuntime {
|
|
|
84
85
|
private loadPriorHistory;
|
|
85
86
|
private loadRunInput;
|
|
86
87
|
private appendAssistantMessage;
|
|
88
|
+
private getRunCancellation;
|
|
89
|
+
private expirePendingApprovals;
|
|
90
|
+
private finalizeCancelledRun;
|
|
87
91
|
private invokeWithHistory;
|
|
88
92
|
private buildPersistedRunRequest;
|
|
89
93
|
private executeQueuedRun;
|
|
@@ -100,6 +104,7 @@ export declare class AgentHarnessRuntime {
|
|
|
100
104
|
private isDecisionRun;
|
|
101
105
|
private notifyListener;
|
|
102
106
|
private acquireRunSlot;
|
|
107
|
+
private dropPendingRunSlot;
|
|
103
108
|
private dispatchRunListeners;
|
|
104
109
|
run(options: RunOptions): Promise<RunResult>;
|
|
105
110
|
streamEvents(options: RunStartOptions): AsyncGenerator<HarnessStreamItem>;
|
|
@@ -110,6 +115,9 @@ export declare class AgentHarnessRuntime {
|
|
|
110
115
|
}>;
|
|
111
116
|
close(): Promise<void>;
|
|
112
117
|
stop(): Promise<void>;
|
|
118
|
+
cancelRun(options: CancelOptions): Promise<RunResult>;
|
|
113
119
|
private recoverStartupRuns;
|
|
120
|
+
private reclaimExpiredClaimedRuns;
|
|
121
|
+
private isStaleRunningRun;
|
|
114
122
|
}
|
|
115
123
|
export { AgentHarnessRuntime as AgentHarness };
|
package/dist/runtime/harness.js
CHANGED
|
@@ -44,6 +44,7 @@ export class AgentHarnessRuntime {
|
|
|
44
44
|
healthMonitor;
|
|
45
45
|
recoveryConfig;
|
|
46
46
|
concurrencyConfig;
|
|
47
|
+
workerId = `worker-${createPersistentId()}`;
|
|
47
48
|
activeRunSlots = 0;
|
|
48
49
|
pendingRunSlots = [];
|
|
49
50
|
runtimeEventSequence = 0;
|
|
@@ -98,6 +99,13 @@ export class AgentHarnessRuntime {
|
|
|
98
99
|
}
|
|
99
100
|
async resolveSelectedAgentId(input, requestedAgentId, threadId) {
|
|
100
101
|
if (!requestedAgentId || requestedAgentId === AUTO_AGENT_ID) {
|
|
102
|
+
if (threadId) {
|
|
103
|
+
const thread = await this.getSession(threadId);
|
|
104
|
+
const threadBinding = thread ? this.workspace.bindings.get(thread.agentId) : undefined;
|
|
105
|
+
if (thread?.agentId && threadBinding?.harnessRuntime.hostFacing !== false) {
|
|
106
|
+
return thread.agentId;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
101
109
|
return this.routeAgent(input, { threadId });
|
|
102
110
|
}
|
|
103
111
|
return requestedAgentId;
|
|
@@ -480,6 +488,46 @@ export class AgentHarnessRuntime {
|
|
|
480
488
|
createdAt: new Date().toISOString(),
|
|
481
489
|
});
|
|
482
490
|
}
|
|
491
|
+
async getRunCancellation(runId) {
|
|
492
|
+
const control = await this.persistence.getRunControl(runId);
|
|
493
|
+
return {
|
|
494
|
+
requested: control?.cancelRequested === true,
|
|
495
|
+
...(control?.cancelReason ? { reason: control.cancelReason } : {}),
|
|
496
|
+
};
|
|
497
|
+
}
|
|
498
|
+
async expirePendingApprovals(threadId, runId) {
|
|
499
|
+
const approvals = await this.persistence.getRunApprovals(threadId, runId);
|
|
500
|
+
for (const approval of approvals) {
|
|
501
|
+
if (approval.status !== "pending") {
|
|
502
|
+
continue;
|
|
503
|
+
}
|
|
504
|
+
await this.persistence.resolveApproval(threadId, runId, approval.approvalId, "expired");
|
|
505
|
+
await this.emit(threadId, runId, 6, "approval.resolved", {
|
|
506
|
+
approvalId: approval.approvalId,
|
|
507
|
+
pendingActionId: approval.pendingActionId,
|
|
508
|
+
decision: "cancel",
|
|
509
|
+
toolName: approval.toolName,
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
async finalizeCancelledRun(threadId, runId, previousState, reason) {
|
|
514
|
+
await this.expirePendingApprovals(threadId, runId);
|
|
515
|
+
await this.persistence.releaseRunClaim(runId);
|
|
516
|
+
await this.persistence.clearRunCancel(runId);
|
|
517
|
+
await this.persistence.clearRunRequest(threadId, runId);
|
|
518
|
+
await this.setRunStateAndEmit(threadId, runId, 104, "cancelled", {
|
|
519
|
+
previousState,
|
|
520
|
+
...(reason ? { error: reason } : {}),
|
|
521
|
+
});
|
|
522
|
+
const runMeta = await this.persistence.getRunMeta(threadId, runId);
|
|
523
|
+
return {
|
|
524
|
+
threadId,
|
|
525
|
+
runId,
|
|
526
|
+
agentId: runMeta.agentId,
|
|
527
|
+
state: "cancelled",
|
|
528
|
+
output: reason ? `cancelled: ${reason}` : "cancelled",
|
|
529
|
+
};
|
|
530
|
+
}
|
|
483
531
|
async invokeWithHistory(binding, input, threadId, runId, resumePayload, options = {}) {
|
|
484
532
|
const priorHistory = await this.loadPriorHistory(threadId, runId);
|
|
485
533
|
const startedAt = Date.now();
|
|
@@ -514,6 +562,20 @@ export class AgentHarnessRuntime {
|
|
|
514
562
|
}
|
|
515
563
|
async executeQueuedRun(binding, input, threadId, runId, agentId, options = {}) {
|
|
516
564
|
const previousState = options.previousState ?? "running";
|
|
565
|
+
const currentRun = await this.persistence.getRun(runId);
|
|
566
|
+
if (currentRun?.state === "cancelled") {
|
|
567
|
+
return {
|
|
568
|
+
threadId,
|
|
569
|
+
runId,
|
|
570
|
+
agentId,
|
|
571
|
+
state: "cancelled",
|
|
572
|
+
output: "cancelled",
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
const cancellation = await this.getRunCancellation(runId);
|
|
576
|
+
if (cancellation.requested) {
|
|
577
|
+
return this.finalizeCancelledRun(threadId, runId, previousState, cancellation.reason);
|
|
578
|
+
}
|
|
517
579
|
if (previousState === "queued") {
|
|
518
580
|
await this.emit(threadId, runId, 101, "run.dequeued", {
|
|
519
581
|
queuePosition: 0,
|
|
@@ -531,6 +593,10 @@ export class AgentHarnessRuntime {
|
|
|
531
593
|
state: options.state,
|
|
532
594
|
files: options.files,
|
|
533
595
|
});
|
|
596
|
+
const cancelledAfterInvoke = await this.getRunCancellation(runId);
|
|
597
|
+
if (cancelledAfterInvoke.requested) {
|
|
598
|
+
return this.finalizeCancelledRun(threadId, runId, previousState === "queued" ? "running" : previousState, cancelledAfterInvoke.reason);
|
|
599
|
+
}
|
|
534
600
|
const finalized = await this.finalizeContinuedRun(threadId, runId, input, actual, {
|
|
535
601
|
previousState: previousState === "queued" ? "running" : previousState,
|
|
536
602
|
stateSequence: options.stateSequence ?? 103,
|
|
@@ -676,21 +742,64 @@ export class AgentHarnessRuntime {
|
|
|
676
742
|
await listener(value);
|
|
677
743
|
}
|
|
678
744
|
async acquireRunSlot(threadId, runId, activeState = "running") {
|
|
745
|
+
if (threadId && runId) {
|
|
746
|
+
await this.persistence.enqueueRun({ threadId, runId });
|
|
747
|
+
}
|
|
748
|
+
let stopHeartbeat = () => undefined;
|
|
749
|
+
const beginLease = async () => {
|
|
750
|
+
if (!threadId || !runId) {
|
|
751
|
+
return;
|
|
752
|
+
}
|
|
753
|
+
const claimedAt = new Date().toISOString();
|
|
754
|
+
await this.persistence.claimQueuedRun({
|
|
755
|
+
threadId,
|
|
756
|
+
runId,
|
|
757
|
+
workerId: this.workerId,
|
|
758
|
+
claimedAt,
|
|
759
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
760
|
+
});
|
|
761
|
+
if (this.concurrencyConfig.heartbeatIntervalMs <= 0) {
|
|
762
|
+
return;
|
|
763
|
+
}
|
|
764
|
+
const timer = setInterval(() => {
|
|
765
|
+
void this.persistence.renewRunLease({
|
|
766
|
+
runId,
|
|
767
|
+
workerId: this.workerId,
|
|
768
|
+
heartbeatAt: new Date().toISOString(),
|
|
769
|
+
leaseExpiresAt: new Date(Date.now() + this.concurrencyConfig.leaseMs).toISOString(),
|
|
770
|
+
});
|
|
771
|
+
}, this.concurrencyConfig.heartbeatIntervalMs);
|
|
772
|
+
timer.unref?.();
|
|
773
|
+
stopHeartbeat = () => {
|
|
774
|
+
clearInterval(timer);
|
|
775
|
+
};
|
|
776
|
+
};
|
|
777
|
+
const releaseLease = async () => {
|
|
778
|
+
stopHeartbeat();
|
|
779
|
+
if (runId) {
|
|
780
|
+
await this.persistence.releaseRunClaim(runId);
|
|
781
|
+
}
|
|
782
|
+
};
|
|
679
783
|
const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
|
|
680
784
|
if (!maxConcurrentRuns) {
|
|
681
|
-
|
|
785
|
+
await beginLease();
|
|
786
|
+
return async () => {
|
|
787
|
+
await releaseLease();
|
|
788
|
+
};
|
|
682
789
|
}
|
|
683
790
|
if (this.activeRunSlots < maxConcurrentRuns) {
|
|
684
791
|
this.activeRunSlots += 1;
|
|
792
|
+
await beginLease();
|
|
685
793
|
let released = false;
|
|
686
|
-
return () => {
|
|
794
|
+
return async () => {
|
|
687
795
|
if (released) {
|
|
688
796
|
return;
|
|
689
797
|
}
|
|
690
798
|
released = true;
|
|
799
|
+
await releaseLease();
|
|
691
800
|
this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
|
|
692
801
|
const next = this.pendingRunSlots.shift();
|
|
693
|
-
void next?.();
|
|
802
|
+
void next?.activate();
|
|
694
803
|
};
|
|
695
804
|
}
|
|
696
805
|
if (threadId && runId) {
|
|
@@ -704,38 +813,57 @@ export class AgentHarnessRuntime {
|
|
|
704
813
|
maxConcurrentRuns,
|
|
705
814
|
});
|
|
706
815
|
}
|
|
707
|
-
await new Promise((resolve, reject) => {
|
|
708
|
-
this.pendingRunSlots.push(async () => {
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
816
|
+
const slotAcquisition = await new Promise((resolve, reject) => {
|
|
817
|
+
this.pendingRunSlots.push({ runId, activate: async () => {
|
|
818
|
+
try {
|
|
819
|
+
const currentRun = runId ? await this.persistence.getRun(runId) : null;
|
|
820
|
+
if (currentRun?.state === "cancelled") {
|
|
821
|
+
resolve("abort");
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
this.activeRunSlots += 1;
|
|
825
|
+
if (threadId && runId) {
|
|
826
|
+
await this.emit(threadId, runId, 4, "run.dequeued", {
|
|
827
|
+
queuePosition: 0,
|
|
828
|
+
activeRunCount: this.activeRunSlots,
|
|
829
|
+
maxConcurrentRuns,
|
|
830
|
+
});
|
|
831
|
+
await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
|
|
832
|
+
previousState: "queued",
|
|
833
|
+
});
|
|
834
|
+
await beginLease();
|
|
835
|
+
}
|
|
836
|
+
resolve("activate");
|
|
720
837
|
}
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
}
|
|
726
|
-
});
|
|
838
|
+
catch (error) {
|
|
839
|
+
reject(error);
|
|
840
|
+
}
|
|
841
|
+
}, abort: () => resolve("abort") });
|
|
727
842
|
});
|
|
843
|
+
if (slotAcquisition === "abort") {
|
|
844
|
+
return async () => undefined;
|
|
845
|
+
}
|
|
728
846
|
let released = false;
|
|
729
|
-
return () => {
|
|
847
|
+
return async () => {
|
|
730
848
|
if (released) {
|
|
731
849
|
return;
|
|
732
850
|
}
|
|
733
851
|
released = true;
|
|
852
|
+
await releaseLease();
|
|
734
853
|
this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
|
|
735
854
|
const next = this.pendingRunSlots.shift();
|
|
736
|
-
void next?.();
|
|
855
|
+
void next?.activate();
|
|
737
856
|
};
|
|
738
857
|
}
|
|
858
|
+
dropPendingRunSlot(runId) {
|
|
859
|
+
const index = this.pendingRunSlots.findIndex((entry) => entry.runId === runId);
|
|
860
|
+
if (index < 0) {
|
|
861
|
+
return false;
|
|
862
|
+
}
|
|
863
|
+
const [entry] = this.pendingRunSlots.splice(index, 1);
|
|
864
|
+
entry?.abort();
|
|
865
|
+
return true;
|
|
866
|
+
}
|
|
739
867
|
async dispatchRunListeners(stream, listeners) {
|
|
740
868
|
let latestEvent;
|
|
741
869
|
let latestResult;
|
|
@@ -843,7 +971,7 @@ export class AgentHarnessRuntime {
|
|
|
843
971
|
});
|
|
844
972
|
}
|
|
845
973
|
finally {
|
|
846
|
-
releaseRunSlot();
|
|
974
|
+
await releaseRunSlot();
|
|
847
975
|
}
|
|
848
976
|
}
|
|
849
977
|
async *streamEvents(options) {
|
|
@@ -1104,7 +1232,7 @@ export class AgentHarnessRuntime {
|
|
|
1104
1232
|
}
|
|
1105
1233
|
finally {
|
|
1106
1234
|
await this.persistence.clearRunRequest(threadId, runId);
|
|
1107
|
-
releaseRunSlot();
|
|
1235
|
+
await releaseRunSlot();
|
|
1108
1236
|
}
|
|
1109
1237
|
}
|
|
1110
1238
|
async resume(options) {
|
|
@@ -1125,6 +1253,10 @@ export class AgentHarnessRuntime {
|
|
|
1125
1253
|
throw new Error(`Unknown agent ${thread.agentId}`);
|
|
1126
1254
|
}
|
|
1127
1255
|
const resumePayload = this.buildResumePayload(binding, approval, options);
|
|
1256
|
+
const cancellation = await this.getRunCancellation(runId);
|
|
1257
|
+
if (cancellation.requested) {
|
|
1258
|
+
return this.finalizeCancelledRun(threadId, runId, thread.status, cancellation.reason);
|
|
1259
|
+
}
|
|
1128
1260
|
await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
|
|
1129
1261
|
const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "resuming");
|
|
1130
1262
|
try {
|
|
@@ -1156,6 +1288,10 @@ export class AgentHarnessRuntime {
|
|
|
1156
1288
|
try {
|
|
1157
1289
|
const actual = await this.runtimeAdapter.invoke(binding, "", threadId, runId, resumePayload, priorHistory);
|
|
1158
1290
|
this.healthMonitor.recordLlmSuccess(Date.now() - startedAt);
|
|
1291
|
+
const cancelledAfterInvoke = await this.getRunCancellation(runId);
|
|
1292
|
+
if (cancelledAfterInvoke.requested) {
|
|
1293
|
+
return this.finalizeCancelledRun(threadId, runId, "resuming", cancelledAfterInvoke.reason);
|
|
1294
|
+
}
|
|
1159
1295
|
await this.persistence.clearRecoveryIntent(threadId, runId);
|
|
1160
1296
|
const finalized = await this.finalizeContinuedRun(threadId, runId, runInput, actual, {
|
|
1161
1297
|
previousState: "resuming",
|
|
@@ -1174,7 +1310,7 @@ export class AgentHarnessRuntime {
|
|
|
1174
1310
|
}
|
|
1175
1311
|
}
|
|
1176
1312
|
finally {
|
|
1177
|
-
releaseRunSlot();
|
|
1313
|
+
await releaseRunSlot();
|
|
1178
1314
|
}
|
|
1179
1315
|
}
|
|
1180
1316
|
buildResumePayload(binding, approval, options) {
|
|
@@ -1249,10 +1385,44 @@ export class AgentHarnessRuntime {
|
|
|
1249
1385
|
async stop() {
|
|
1250
1386
|
await this.close();
|
|
1251
1387
|
}
|
|
1388
|
+
async cancelRun(options) {
|
|
1389
|
+
const run = await this.persistence.getRun(options.runId);
|
|
1390
|
+
if (!run) {
|
|
1391
|
+
throw new Error(`Unknown run ${options.runId}`);
|
|
1392
|
+
}
|
|
1393
|
+
if (this.isTerminalRunState(run.state)) {
|
|
1394
|
+
return {
|
|
1395
|
+
threadId: run.threadId,
|
|
1396
|
+
runId: run.runId,
|
|
1397
|
+
agentId: run.agentId,
|
|
1398
|
+
state: run.state,
|
|
1399
|
+
output: run.state,
|
|
1400
|
+
};
|
|
1401
|
+
}
|
|
1402
|
+
await this.persistence.requestRunCancel(run.runId, options.reason);
|
|
1403
|
+
if (run.state === "queued" || run.state === "waiting_for_approval" || run.state === "claimed") {
|
|
1404
|
+
if (run.state === "queued") {
|
|
1405
|
+
this.dropPendingRunSlot(run.runId);
|
|
1406
|
+
}
|
|
1407
|
+
return this.finalizeCancelledRun(run.threadId, run.runId, run.state, options.reason);
|
|
1408
|
+
}
|
|
1409
|
+
await this.setRunStateAndEmit(run.threadId, run.runId, 103, "cancelling", {
|
|
1410
|
+
previousState: run.state,
|
|
1411
|
+
...(options.reason ? { error: options.reason } : {}),
|
|
1412
|
+
});
|
|
1413
|
+
return {
|
|
1414
|
+
threadId: run.threadId,
|
|
1415
|
+
runId: run.runId,
|
|
1416
|
+
agentId: run.agentId,
|
|
1417
|
+
state: "cancelling",
|
|
1418
|
+
output: options.reason ? `cancelling: ${options.reason}` : "cancelling",
|
|
1419
|
+
};
|
|
1420
|
+
}
|
|
1252
1421
|
async recoverStartupRuns() {
|
|
1253
1422
|
if (!this.recoveryConfig.enabled) {
|
|
1254
1423
|
return;
|
|
1255
1424
|
}
|
|
1425
|
+
await this.reclaimExpiredClaimedRuns();
|
|
1256
1426
|
const threads = await this.persistence.listSessions();
|
|
1257
1427
|
for (const thread of threads) {
|
|
1258
1428
|
if (thread.status === "queued") {
|
|
@@ -1269,7 +1439,7 @@ export class AgentHarnessRuntime {
|
|
|
1269
1439
|
});
|
|
1270
1440
|
continue;
|
|
1271
1441
|
}
|
|
1272
|
-
const releaseRunSlot = await this.acquireRunSlot();
|
|
1442
|
+
const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId);
|
|
1273
1443
|
try {
|
|
1274
1444
|
await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
|
|
1275
1445
|
context: request.invocation?.context,
|
|
@@ -1281,21 +1451,38 @@ export class AgentHarnessRuntime {
|
|
|
1281
1451
|
});
|
|
1282
1452
|
}
|
|
1283
1453
|
finally {
|
|
1284
|
-
releaseRunSlot();
|
|
1454
|
+
await releaseRunSlot();
|
|
1285
1455
|
}
|
|
1286
1456
|
continue;
|
|
1287
1457
|
}
|
|
1288
1458
|
if (thread.status === "running") {
|
|
1459
|
+
const isStale = await this.isStaleRunningRun(thread);
|
|
1460
|
+
if (!isStale) {
|
|
1461
|
+
continue;
|
|
1462
|
+
}
|
|
1289
1463
|
const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
|
|
1290
1464
|
const binding = this.workspace.bindings.get(runMeta.agentId);
|
|
1291
|
-
if (!binding
|
|
1465
|
+
if (!binding) {
|
|
1466
|
+
continue;
|
|
1467
|
+
}
|
|
1468
|
+
if (!this.supportsRunningReplay(binding)) {
|
|
1469
|
+
await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
|
|
1470
|
+
previousState: "running",
|
|
1471
|
+
error: "stale running run cannot be replayed safely",
|
|
1472
|
+
});
|
|
1473
|
+
await this.persistence.releaseRunClaim(thread.latestRunId);
|
|
1292
1474
|
continue;
|
|
1293
1475
|
}
|
|
1294
1476
|
const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
|
|
1295
1477
|
if (!request) {
|
|
1478
|
+
await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
|
|
1479
|
+
previousState: "running",
|
|
1480
|
+
error: "missing persisted run request for stale running run recovery",
|
|
1481
|
+
});
|
|
1482
|
+
await this.persistence.releaseRunClaim(thread.latestRunId);
|
|
1296
1483
|
continue;
|
|
1297
1484
|
}
|
|
1298
|
-
const releaseRunSlot = await this.acquireRunSlot();
|
|
1485
|
+
const releaseRunSlot = await this.acquireRunSlot(thread.threadId, thread.latestRunId, "running");
|
|
1299
1486
|
try {
|
|
1300
1487
|
await this.emit(thread.threadId, thread.latestRunId, 100, "run.resumed", {
|
|
1301
1488
|
resumeKind: "startup-running-recovery",
|
|
@@ -1311,7 +1498,7 @@ export class AgentHarnessRuntime {
|
|
|
1311
1498
|
});
|
|
1312
1499
|
}
|
|
1313
1500
|
finally {
|
|
1314
|
-
releaseRunSlot();
|
|
1501
|
+
await releaseRunSlot();
|
|
1315
1502
|
}
|
|
1316
1503
|
continue;
|
|
1317
1504
|
}
|
|
@@ -1369,5 +1556,49 @@ export class AgentHarnessRuntime {
|
|
|
1369
1556
|
}
|
|
1370
1557
|
}
|
|
1371
1558
|
}
|
|
1559
|
+
async reclaimExpiredClaimedRuns(nowIso = new Date().toISOString()) {
|
|
1560
|
+
const expiredClaims = await this.persistence.listExpiredClaimedRuns(nowIso);
|
|
1561
|
+
for (const claim of expiredClaims) {
|
|
1562
|
+
const thread = await this.persistence.getSession(claim.threadId);
|
|
1563
|
+
if (!thread) {
|
|
1564
|
+
await this.persistence.releaseRunClaim(claim.runId);
|
|
1565
|
+
continue;
|
|
1566
|
+
}
|
|
1567
|
+
const lifecycle = await this.persistence.getRunLifecycle(claim.threadId, claim.runId);
|
|
1568
|
+
if (lifecycle.state === "claimed") {
|
|
1569
|
+
await this.persistence.enqueueRun({
|
|
1570
|
+
threadId: claim.threadId,
|
|
1571
|
+
runId: claim.runId,
|
|
1572
|
+
priority: claim.priority,
|
|
1573
|
+
queueKey: claim.queueKey,
|
|
1574
|
+
availableAt: nowIso,
|
|
1575
|
+
});
|
|
1576
|
+
await this.setRunStateAndEmit(claim.threadId, claim.runId, 99, "queued", {
|
|
1577
|
+
previousState: "claimed",
|
|
1578
|
+
});
|
|
1579
|
+
await this.emit(claim.threadId, claim.runId, 100, "run.queued", {
|
|
1580
|
+
queuePosition: 0,
|
|
1581
|
+
activeRunCount: this.activeRunSlots,
|
|
1582
|
+
maxConcurrentRuns: this.concurrencyConfig.maxConcurrentRuns,
|
|
1583
|
+
recoveredOnStartup: true,
|
|
1584
|
+
reclaimReason: "expired-lease",
|
|
1585
|
+
});
|
|
1586
|
+
continue;
|
|
1587
|
+
}
|
|
1588
|
+
await this.persistence.releaseRunClaim(claim.runId);
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
async isStaleRunningRun(thread, nowMs = Date.now()) {
|
|
1592
|
+
const control = await this.persistence.getRunControl(thread.latestRunId);
|
|
1593
|
+
const heartbeatAt = control?.heartbeatAt;
|
|
1594
|
+
if (!heartbeatAt) {
|
|
1595
|
+
return true;
|
|
1596
|
+
}
|
|
1597
|
+
const heartbeatAtMs = Date.parse(heartbeatAt);
|
|
1598
|
+
if (!Number.isFinite(heartbeatAtMs)) {
|
|
1599
|
+
return true;
|
|
1600
|
+
}
|
|
1601
|
+
return nowMs - heartbeatAtMs >= this.concurrencyConfig.heartbeatTimeoutMs;
|
|
1602
|
+
}
|
|
1372
1603
|
}
|
|
1373
1604
|
export { AgentHarnessRuntime as AgentHarness };
|
|
@@ -422,7 +422,7 @@ export class HealthMonitor {
|
|
|
422
422
|
}
|
|
423
423
|
countStuckRuns(runs, nowMs) {
|
|
424
424
|
return runs.filter((run) => {
|
|
425
|
-
if (!["running", "resuming", "queued"].includes(run.state)) {
|
|
425
|
+
if (!["claimed", "running", "resuming", "queued", "cancelling"].includes(run.state)) {
|
|
426
426
|
return false;
|
|
427
427
|
}
|
|
428
428
|
const updatedAtMs = Date.parse(run.updatedAt);
|
|
@@ -91,9 +91,11 @@ export async function maintainSqliteRuntimeRecords(dbPath, config, nowMs = Date.
|
|
|
91
91
|
"DELETE FROM artifacts WHERE thread_id = ?",
|
|
92
92
|
"DELETE FROM approvals WHERE thread_id = ?",
|
|
93
93
|
"DELETE FROM events WHERE thread_id = ?",
|
|
94
|
+
"DELETE FROM run_queue WHERE thread_id = ?",
|
|
94
95
|
"DELETE FROM run_requests WHERE thread_id = ?",
|
|
95
96
|
"DELETE FROM recovery_intents WHERE thread_id = ?",
|
|
96
97
|
"DELETE FROM thread_messages WHERE thread_id = ?",
|
|
98
|
+
"DELETE FROM run_control WHERE run_id IN (SELECT run_id FROM runs WHERE thread_id = ?)",
|
|
97
99
|
"DELETE FROM runs WHERE thread_id = ?",
|
|
98
100
|
"DELETE FROM threads WHERE thread_id = ?",
|
|
99
101
|
].map((sql) => ({ sql, args: [threadId] })), "write");
|