@okrlinkhub/agent-factory 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,13 @@ const claimedJobValidator = v.object({
48
48
  payload: queuePayloadValidator,
49
49
  });
50
50
 
51
+ const workerAssignmentValidator = v.object({
52
+ conversationId: v.string(),
53
+ agentKey: v.string(),
54
+ leaseId: v.string(),
55
+ assignedAt: v.number(),
56
+ });
57
+
51
58
  const secretStatusValidator = v.object({
52
59
  secretRef: v.string(),
53
60
  hasActive: v.boolean(),
@@ -166,6 +173,10 @@ export const enqueueMessage = mutation({
166
173
  contextHistory: [],
167
174
  pendingToolCalls: [],
168
175
  });
176
+ } else if (existingConversation.agentKey !== args.agentKey) {
177
+ throw new Error(
178
+ `Conversation '${args.conversationId}' is already bound to agent '${existingConversation.agentKey}', cannot enqueue for '${args.agentKey}'.`,
179
+ );
169
180
  }
170
181
 
171
182
  const priority = Math.min(
@@ -1024,11 +1035,13 @@ export const attachMessageMetadata = mutation({
1024
1035
  export const claimNextJob = mutation({
1025
1036
  args: {
1026
1037
  workerId: v.string(),
1038
+ conversationId: v.optional(v.string()),
1027
1039
  nowMs: v.optional(v.number()),
1028
1040
  },
1029
1041
  returns: v.union(v.null(), claimedJobValidator),
1030
1042
  handler: async (ctx, args) => {
1031
1043
  const nowMs = args.nowMs ?? Date.now();
1044
+ const staleHeartbeatCutoff = nowMs - DEFAULT_CONFIG.lease.staleAfterMs;
1032
1045
  const worker = await ctx.db
1033
1046
  .query("workers")
1034
1047
  .withIndex("by_workerId", (q) => q.eq("workerId", args.workerId))
@@ -1036,6 +1049,14 @@ export const claimNextJob = mutation({
1036
1049
  if (worker && !isWorkerClaimable(worker.status)) {
1037
1050
  return null;
1038
1051
  }
1052
+ if (
1053
+ worker?.assignment &&
1054
+ args.conversationId &&
1055
+ worker.assignment.conversationId !== args.conversationId
1056
+ ) {
1057
+ return null;
1058
+ }
1059
+ const workers = await ctx.db.query("workers").collect();
1039
1060
  const candidates = await ctx.db
1040
1061
  .query("messageQueue")
1041
1062
  .withIndex("by_status_and_scheduledFor", (q) =>
@@ -1050,6 +1071,16 @@ export const claimNextJob = mutation({
1050
1071
  });
1051
1072
 
1052
1073
  for (const candidate of candidates) {
1074
+ if (args.conversationId && candidate.conversationId !== args.conversationId) {
1075
+ continue;
1076
+ }
1077
+ if (
1078
+ worker?.assignment &&
1079
+ candidate.conversationId !== worker.assignment.conversationId
1080
+ ) {
1081
+ continue;
1082
+ }
1083
+
1053
1084
  const conversation = await ctx.db
1054
1085
  .query("conversations")
1055
1086
  .withIndex("by_conversationId", (q) =>
@@ -1057,12 +1088,34 @@ export const claimNextJob = mutation({
1057
1088
  )
1058
1089
  .unique();
1059
1090
  if (!conversation) continue;
1091
+ if (conversation.agentKey !== candidate.agentKey) continue;
1092
+ if (
1093
+ worker?.assignment &&
1094
+ conversation.agentKey !== worker.assignment.agentKey
1095
+ ) {
1096
+ continue;
1097
+ }
1098
+ const existingOwner = findActiveAssignmentOwner(workers, {
1099
+ conversationId: candidate.conversationId,
1100
+ agentKey: candidate.agentKey,
1101
+ excludeWorkerId: args.workerId,
1102
+ staleHeartbeatCutoff,
1103
+ });
1104
+ if (existingOwner) {
1105
+ continue;
1106
+ }
1060
1107
 
1061
1108
  const lock = conversation.processingLock;
1062
1109
  if (lock && lock.leaseExpiresAt > nowMs) continue;
1063
1110
 
1064
1111
  const leaseId = `${nowMs}-${Math.random().toString(36).slice(2, 10)}`;
1065
1112
  const leaseExpiresAt = nowMs + DEFAULT_CONFIG.lease.leaseMs;
1113
+ const nextAssignment = {
1114
+ conversationId: candidate.conversationId,
1115
+ agentKey: candidate.agentKey,
1116
+ leaseId,
1117
+ assignedAt: worker?.assignment?.assignedAt ?? nowMs,
1118
+ };
1066
1119
 
1067
1120
  await ctx.db.patch(candidate._id, {
1068
1121
  status: "processing",
@@ -1091,6 +1144,7 @@ export const claimNextJob = mutation({
1091
1144
  lastClaimAt: nowMs,
1092
1145
  scheduledShutdownAt: undefined,
1093
1146
  stoppedAt: undefined,
1147
+ assignment: nextAssignment,
1094
1148
  capabilities: [],
1095
1149
  });
1096
1150
  } else {
@@ -1101,6 +1155,7 @@ export const claimNextJob = mutation({
1101
1155
  lastClaimAt: nowMs,
1102
1156
  scheduledShutdownAt: undefined,
1103
1157
  stoppedAt: undefined,
1158
+ assignment: nextAssignment,
1104
1159
  });
1105
1160
  }
1106
1161
 
@@ -1168,7 +1223,29 @@ export const heartbeatJob = mutation({
1168
1223
  .withIndex("by_workerId", (q) => q.eq("workerId", args.workerId))
1169
1224
  .unique();
1170
1225
  if (worker && isWorkerRunning(worker.status)) {
1171
- await ctx.db.patch(worker._id, { heartbeatAt: nowMs });
1226
+ const nextPatch: {
1227
+ heartbeatAt: number;
1228
+ assignment?: {
1229
+ conversationId: string;
1230
+ agentKey: string;
1231
+ leaseId: string;
1232
+ assignedAt: number;
1233
+ };
1234
+ } = { heartbeatAt: nowMs };
1235
+ if (
1236
+ !worker.assignment ||
1237
+ worker.assignment.conversationId !== message.conversationId ||
1238
+ worker.assignment.agentKey !== message.agentKey ||
1239
+ worker.assignment.leaseId !== args.leaseId
1240
+ ) {
1241
+ nextPatch.assignment = {
1242
+ conversationId: message.conversationId,
1243
+ agentKey: message.agentKey,
1244
+ leaseId: args.leaseId,
1245
+ assignedAt: worker.assignment?.assignedAt ?? nowMs,
1246
+ };
1247
+ }
1248
+ await ctx.db.patch(worker._id, nextPatch);
1172
1249
  }
1173
1250
 
1174
1251
  return true;
@@ -1229,6 +1306,7 @@ export const completeJob = mutation({
1229
1306
  load: nextLoad,
1230
1307
  heartbeatAt: nowMs,
1231
1308
  scheduledShutdownAt: nextScheduledShutdownAt,
1309
+ assignment: getAssignmentForCompletedConversation(worker, message),
1232
1310
  });
1233
1311
  if (nextScheduledShutdownAt !== undefined) {
1234
1312
  await scheduleIdleShutdownWatchdog(ctx, nextScheduledShutdownAt, nowMs, args.providerConfig);
@@ -1317,6 +1395,7 @@ export const failJob = mutation({
1317
1395
  load: nextLoad,
1318
1396
  heartbeatAt: nowMs,
1319
1397
  scheduledShutdownAt: nextScheduledShutdownAt,
1398
+ assignment: getAssignmentForCompletedConversation(worker, message),
1320
1399
  });
1321
1400
  if (nextScheduledShutdownAt !== undefined) {
1322
1401
  await scheduleIdleShutdownWatchdog(ctx, nextScheduledShutdownAt, nowMs, args.providerConfig);
@@ -1402,6 +1481,7 @@ export const releaseExpiredLeases = internalMutation({
1402
1481
  load: nextLoad,
1403
1482
  heartbeatAt: nowMs,
1404
1483
  scheduledShutdownAt: nextScheduledShutdownAt,
1484
+ assignment: clearAssignmentForMessage(worker, message, nextLoad),
1405
1485
  });
1406
1486
  if (nextScheduledShutdownAt !== undefined) {
1407
1487
  await scheduleIdleShutdownWatchdog(ctx, nextScheduledShutdownAt, nowMs);
@@ -1485,6 +1565,7 @@ export const releaseStuckJobs = mutation({
1485
1565
  load: nextLoad,
1486
1566
  heartbeatAt: nowMs,
1487
1567
  scheduledShutdownAt: nextScheduledShutdownAt,
1568
+ assignment: clearAssignmentForMessage(worker, message, nextLoad),
1488
1569
  });
1489
1570
  if (nextScheduledShutdownAt !== undefined) {
1490
1571
  await scheduleIdleShutdownWatchdog(ctx, nextScheduledShutdownAt, nowMs);
@@ -1714,6 +1795,33 @@ export const getActiveConversationCountForScheduler = internalQuery({
1714
1795
  },
1715
1796
  });
1716
1797
 
1798
+ export const getActiveConversationIdsForScheduler = internalQuery({
1799
+ args: {
1800
+ nowMs: v.optional(v.number()),
1801
+ limit: v.optional(v.number()),
1802
+ },
1803
+ returns: v.array(v.string()),
1804
+ handler: async (ctx, args) => {
1805
+ const nowMs = args.nowMs ?? Date.now();
1806
+ const limit = Math.max(1, args.limit ?? 1000);
1807
+ const queuedJobs = await ctx.db
1808
+ .query("messageQueue")
1809
+ .withIndex("by_status_and_scheduledFor", (q) =>
1810
+ q.eq("status", "queued").lte("scheduledFor", nowMs),
1811
+ )
1812
+ .take(limit);
1813
+ const processingJobs = await ctx.db
1814
+ .query("messageQueue")
1815
+ .withIndex("by_status_and_leaseExpiresAt", (q) =>
1816
+ q.eq("status", "processing").gt("leaseExpiresAt", nowMs),
1817
+ )
1818
+ .take(limit);
1819
+ return Array.from(
1820
+ new Set([...queuedJobs, ...processingJobs].map((job) => job.conversationId)),
1821
+ ).sort();
1822
+ },
1823
+ });
1824
+
1717
1825
  export const listJobsByStatus = query({
1718
1826
  args: {
1719
1827
  status: queueStatusValidator,
@@ -1788,6 +1896,7 @@ export const upsertWorkerState = internalMutation({
1788
1896
  args.status === "stopped" || args.status === "stopping"
1789
1897
  ? (args.stoppedAt ?? nowMs)
1790
1898
  : undefined,
1899
+ assignment: undefined,
1791
1900
  machineRef:
1792
1901
  args.machineId && args.appName
1793
1902
  ? {
@@ -1817,6 +1926,7 @@ export const upsertWorkerState = internalMutation({
1817
1926
  ? (args.stoppedAt ?? worker.stoppedAt ?? nowMs)
1818
1927
  : undefined,
1819
1928
  lastSnapshotId: args.clearLastSnapshotId ? undefined : worker.lastSnapshotId,
1929
+ assignment: worker.assignment,
1820
1930
  machineRef:
1821
1931
  args.clearMachineRef
1822
1932
  ? undefined
@@ -1979,10 +2089,9 @@ export const getLatestDataSnapshotForRestore = query({
1979
2089
  snapshot.archiveFileId !== undefined &&
1980
2090
  snapshot.expiresAt > nowMs,
1981
2091
  );
1982
- const preferred =
1983
- (args.conversationId
1984
- ? ready.find((snapshot) => snapshot.conversationId === args.conversationId)
1985
- : undefined) ?? ready[0];
2092
+ const preferred = args.conversationId
2093
+ ? ready.find((snapshot) => snapshot.conversationId === args.conversationId)
2094
+ : ready[0];
1986
2095
  if (!preferred || !preferred.archiveFileId) return null;
1987
2096
  const downloadUrl = await ctx.storage.getUrl(preferred.archiveFileId);
1988
2097
  if (!downloadUrl) return null;
@@ -2008,6 +2117,7 @@ export const listWorkersForScheduler = internalQuery({
2008
2117
  scheduledShutdownAt: v.union(v.null(), v.number()),
2009
2118
  stoppedAt: v.union(v.null(), v.number()),
2010
2119
  lastSnapshotId: v.union(v.null(), v.id("dataSnapshots")),
2120
+ assignment: v.union(v.null(), workerAssignmentValidator),
2011
2121
  machineId: v.union(v.null(), v.string()),
2012
2122
  appName: v.union(v.null(), v.string()),
2013
2123
  region: v.union(v.null(), v.string()),
@@ -2024,6 +2134,7 @@ export const listWorkersForScheduler = internalQuery({
2024
2134
  scheduledShutdownAt: worker.scheduledShutdownAt ?? null,
2025
2135
  stoppedAt: worker.stoppedAt ?? null,
2026
2136
  lastSnapshotId: worker.lastSnapshotId ?? null,
2137
+ assignment: worker.assignment ?? null,
2027
2138
  machineId: worker.machineRef?.machineId ?? null,
2028
2139
  appName: worker.machineRef?.appName ?? null,
2029
2140
  region: worker.machineRef?.region ?? null,
@@ -2192,7 +2303,7 @@ async function scheduleIdleShutdownWatchdog(
2192
2303
  }
2193
2304
  }
2194
2305
 
2195
- async function scheduleLeaseRecoveryWatchdog(ctx: any, nowMs: number) {
2306
+ async function scheduleLeaseRecoveryWatchdog(ctx: any, _nowMs: number) {
2196
2307
  const delayMs = DEFAULT_CONFIG.lease.leaseMs + 1_000;
2197
2308
  try {
2198
2309
  await ctx.scheduler.runAfter(delayMs, (internal.scheduler as any).reconcileWorkerPoolInternal, {
@@ -2222,6 +2333,89 @@ function computeNextScheduledShutdownAt(
2222
2333
  return worker.scheduledShutdownAt ?? shutdownBaseMs + DEFAULT_CONFIG.scaling.idleTimeoutMs;
2223
2334
  }
2224
2335
 
2336
+ function getAssignmentForCompletedConversation(
2337
+ worker: {
2338
+ assignment?: {
2339
+ conversationId: string;
2340
+ agentKey: string;
2341
+ leaseId: string;
2342
+ assignedAt: number;
2343
+ };
2344
+ },
2345
+ message: {
2346
+ conversationId: string;
2347
+ agentKey: string;
2348
+ leaseId?: string;
2349
+ },
2350
+ ) {
2351
+ if (
2352
+ worker.assignment &&
2353
+ worker.assignment.conversationId === message.conversationId &&
2354
+ worker.assignment.agentKey === message.agentKey
2355
+ ) {
2356
+ return {
2357
+ ...worker.assignment,
2358
+ leaseId: message.leaseId ?? worker.assignment.leaseId,
2359
+ };
2360
+ }
2361
+ return worker.assignment;
2362
+ }
2363
+
2364
+ function clearAssignmentForMessage(
2365
+ worker: {
2366
+ assignment?: {
2367
+ conversationId: string;
2368
+ agentKey: string;
2369
+ leaseId: string;
2370
+ assignedAt: number;
2371
+ };
2372
+ },
2373
+ message: {
2374
+ conversationId: string;
2375
+ agentKey: string;
2376
+ },
2377
+ nextLoad: number,
2378
+ ) {
2379
+ if (
2380
+ nextLoad === 0 &&
2381
+ worker.assignment &&
2382
+ worker.assignment.conversationId === message.conversationId &&
2383
+ worker.assignment.agentKey === message.agentKey
2384
+ ) {
2385
+ return undefined;
2386
+ }
2387
+ return worker.assignment;
2388
+ }
2389
+
2390
+ function findActiveAssignmentOwner(
2391
+ workers: Array<{
2392
+ workerId: string;
2393
+ status: "active" | "draining" | "stopping" | "stopped";
2394
+ heartbeatAt: number;
2395
+ assignment?: {
2396
+ conversationId: string;
2397
+ agentKey: string;
2398
+ leaseId: string;
2399
+ assignedAt: number;
2400
+ };
2401
+ }>,
2402
+ args: {
2403
+ conversationId: string;
2404
+ agentKey: string;
2405
+ excludeWorkerId: string;
2406
+ staleHeartbeatCutoff: number;
2407
+ },
2408
+ ) {
2409
+ return workers.find(
2410
+ (candidate) =>
2411
+ candidate.workerId !== args.excludeWorkerId &&
2412
+ isWorkerClaimable(candidate.status) &&
2413
+ candidate.heartbeatAt > args.staleHeartbeatCutoff &&
2414
+ candidate.assignment?.conversationId === args.conversationId &&
2415
+ candidate.assignment.agentKey === args.agentKey,
2416
+ );
2417
+ }
2418
+
2225
2419
  function dedupeMessagesById<T extends { _id: string }>(messages: Array<T>): Array<T> {
2226
2420
  const seen = new Set<string>();
2227
2421
  const deduped: Array<T> = [];
@@ -64,6 +64,12 @@ type SchedulerWorkerRow = {
64
64
  scheduledShutdownAt: number | null;
65
65
  stoppedAt: number | null;
66
66
  lastSnapshotId: string | null;
67
+ assignment: {
68
+ conversationId: string;
69
+ agentKey: string;
70
+ leaseId: string;
71
+ assignedAt: number;
72
+ } | null;
67
73
  machineId: string | null;
68
74
  appName: string | null;
69
75
  region: string | null;
@@ -174,10 +180,11 @@ async function runReconcileWorkerPool(
174
180
  }
175
181
  const workspaceId = args.workspaceId ?? "default";
176
182
  const provider = resolveProvider(providerConfig.kind, flyApiToken);
177
- const activeConversationCount: number = await ctx.runQuery(
178
- (internal.queue as any).getActiveConversationCountForScheduler,
183
+ const activeConversationIds: Array<string> = await ctx.runQuery(
184
+ (internal.queue as any).getActiveConversationIdsForScheduler,
179
185
  { nowMs, limit: 1000 },
180
186
  );
187
+ const activeConversationCount = activeConversationIds.length;
181
188
  const cycle = await runWorkerLifecycleCycle(ctx, {
182
189
  nowMs,
183
190
  provider,
@@ -186,6 +193,7 @@ async function runReconcileWorkerPool(
186
193
  allowSpawn: true,
187
194
  convexUrl,
188
195
  workspaceId,
196
+ activeConversationIds,
189
197
  desiredActiveWorkers: clamp(activeConversationCount, 0, scaling.maxWorkers),
190
198
  });
191
199
  if (activeConversationCount > 0 || cycle.pending > 0) {
@@ -244,6 +252,7 @@ async function runEnforceIdleShutdowns(
244
252
  scaling: DEFAULT_CONFIG.scaling,
245
253
  allowSpawn: false,
246
254
  desiredActiveWorkers: 0,
255
+ activeConversationIds: [],
247
256
  });
248
257
 
249
258
  if (cycle.pending > 0) {
@@ -267,6 +276,7 @@ async function runWorkerLifecycleCycle(
267
276
  scaling: typeof DEFAULT_CONFIG.scaling;
268
277
  allowSpawn: boolean;
269
278
  desiredActiveWorkers: number;
279
+ activeConversationIds: Array<string>;
270
280
  convexUrl?: string;
271
281
  workspaceId?: string;
272
282
  },
@@ -320,9 +330,11 @@ async function runWorkerLifecycleCycle(
320
330
 
321
331
  let spawned = 0;
322
332
  if (input.allowSpawn && input.desiredActiveWorkers > 0) {
323
- const claimableWorkers = filterScopedWorkers(workerRows, input.providerConfig.appName).filter(
324
- (worker) => isWorkerClaimable(worker.status) && worker.heartbeatAt > staleHeartbeatCutoff,
325
- ).length;
333
+ const claimableWorkers = countWorkersAvailableForActiveConversations(
334
+ filterScopedWorkers(workerRows, input.providerConfig.appName),
335
+ input.activeConversationIds,
336
+ staleHeartbeatCutoff,
337
+ );
326
338
  if (input.desiredActiveWorkers > claimableWorkers) {
327
339
  const toSpawn = Math.min(
328
340
  input.scaling.spawnStep,
@@ -747,6 +759,31 @@ function filterScopedWorkers(workerRows: Array<SchedulerWorkerRow>, appName: str
747
759
  return workerRows.filter((worker) => worker.appName === null || worker.appName === appName);
748
760
  }
749
761
 
762
+ function countWorkersAvailableForActiveConversations(
763
+ workerRows: Array<SchedulerWorkerRow>,
764
+ activeConversationIds: Array<string>,
765
+ staleHeartbeatCutoff: number,
766
+ ) {
767
+ const activeConversationSet = new Set(activeConversationIds);
768
+ const assignedConversationKeys = new Set<string>();
769
+ let unassignedWorkers = 0;
770
+ for (const worker of workerRows) {
771
+ if (!isWorkerClaimable(worker.status) || worker.heartbeatAt <= staleHeartbeatCutoff) {
772
+ continue;
773
+ }
774
+ if (!worker.assignment) {
775
+ unassignedWorkers += 1;
776
+ continue;
777
+ }
778
+ if (activeConversationSet.has(worker.assignment.conversationId)) {
779
+ assignedConversationKeys.add(
780
+ `${worker.assignment.agentKey}::${worker.assignment.conversationId}`,
781
+ );
782
+ }
783
+ }
784
+ return unassignedWorkers + assignedConversationKeys.size;
785
+ }
786
+
750
787
  function deriveScheduledShutdownAt(
751
788
  worker: SchedulerWorkerRow,
752
789
  nowMs: number,
@@ -119,6 +119,14 @@ export default defineSchema({
119
119
  scheduledShutdownAt: v.optional(v.number()),
120
120
  stoppedAt: v.optional(v.number()),
121
121
  lastSnapshotId: v.optional(v.id("dataSnapshots")),
122
+ assignment: v.optional(
123
+ v.object({
124
+ conversationId: v.string(),
125
+ agentKey: v.string(),
126
+ leaseId: v.string(),
127
+ assignedAt: v.number(),
128
+ }),
129
+ ),
122
130
  capabilities: v.array(v.string()),
123
131
  })
124
132
  .index("by_workerId", ["workerId"])