@okrlinkhub/agent-factory 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -4
- package/dist/client/index.d.ts +1 -1
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/index.js +0 -3
- package/dist/client/index.js.map +1 -1
- package/dist/component/_generated/component.d.ts +0 -34
- package/dist/component/_generated/component.d.ts.map +1 -1
- package/dist/component/lib.d.ts +1 -1
- package/dist/component/lib.d.ts.map +1 -1
- package/dist/component/lib.js +1 -1
- package/dist/component/lib.js.map +1 -1
- package/dist/component/providers/fly.d.ts +14 -0
- package/dist/component/providers/fly.d.ts.map +1 -1
- package/dist/component/providers/fly.js +35 -5
- package/dist/component/providers/fly.js.map +1 -1
- package/dist/component/queue.d.ts +5 -20
- package/dist/component/queue.d.ts.map +1 -1
- package/dist/component/queue.js +41 -107
- package/dist/component/queue.js.map +1 -1
- package/dist/component/scheduler.d.ts.map +1 -1
- package/dist/component/scheduler.js +127 -81
- package/dist/component/scheduler.js.map +1 -1
- package/dist/component/schema.d.ts +5 -13
- package/dist/component/schema.d.ts.map +1 -1
- package/dist/component/schema.js +0 -4
- package/dist/component/schema.js.map +1 -1
- package/package.json +1 -1
- package/src/client/index.ts +0 -3
- package/src/component/_generated/component.ts +0 -42
- package/src/component/lib.test.ts +348 -88
- package/src/component/lib.ts +0 -1
- package/src/component/providers/fly.ts +50 -5
- package/src/component/queue.ts +52 -135
- package/src/component/scheduler.ts +211 -96
- package/src/component/schema.ts +0 -4
|
@@ -44,6 +44,20 @@ type ReconcileWorkerPoolArgs = {
|
|
|
44
44
|
providerConfig?: typeof DEFAULT_CONFIG.provider;
|
|
45
45
|
};
|
|
46
46
|
|
|
47
|
+
type SchedulerWorkerRow = {
|
|
48
|
+
workerId: string;
|
|
49
|
+
status: "active" | "stopped";
|
|
50
|
+
load: number;
|
|
51
|
+
heartbeatAt: number;
|
|
52
|
+
lastClaimAt: number | null;
|
|
53
|
+
scheduledShutdownAt: number | null;
|
|
54
|
+
stoppedAt: number | null;
|
|
55
|
+
lastSnapshotId: string | null;
|
|
56
|
+
machineId: string | null;
|
|
57
|
+
appName: string | null;
|
|
58
|
+
region: string | null;
|
|
59
|
+
};
|
|
60
|
+
|
|
47
61
|
export const reconcileWorkerPool = action({
|
|
48
62
|
args: reconcileWorkerPoolArgs,
|
|
49
63
|
returns: reconcileWorkerPoolReturns,
|
|
@@ -126,6 +140,7 @@ async function runReconcileWorkerPool(
|
|
|
126
140
|
lastClaimAt: number | null;
|
|
127
141
|
scheduledShutdownAt: number | null;
|
|
128
142
|
stoppedAt: number | null;
|
|
143
|
+
lastSnapshotId: string | null;
|
|
129
144
|
machineId: string | null;
|
|
130
145
|
appName: string | null;
|
|
131
146
|
region: string | null;
|
|
@@ -172,11 +187,11 @@ async function runReconcileWorkerPool(
|
|
|
172
187
|
}
|
|
173
188
|
const workspaceId = args.workspaceId ?? "default";
|
|
174
189
|
const provider = resolveProvider(providerConfig.kind, flyApiToken);
|
|
175
|
-
|
|
190
|
+
const isScopedWorker = (worker: SchedulerWorkerRow) =>
|
|
191
|
+
worker.appName === null || worker.appName === providerConfig.appName;
|
|
192
|
+
const scopedWorkerRows = () => workerRows.filter(isScopedWorker);
|
|
176
193
|
const localWorkersWithMachine = workerRows.filter(
|
|
177
|
-
(worker) =>
|
|
178
|
-
worker.machineId &&
|
|
179
|
-
(worker.appName === null || worker.appName === providerConfig.appName),
|
|
194
|
+
(worker) => isScopedWorker(worker) && worker.machineId,
|
|
180
195
|
);
|
|
181
196
|
const liveMachineIds = new Set<string>();
|
|
182
197
|
const liveMachineImages = new Set<string>();
|
|
@@ -205,18 +220,9 @@ async function runReconcileWorkerPool(
|
|
|
205
220
|
(worker) => worker.machineId && !liveMachineIds.has(worker.machineId),
|
|
206
221
|
);
|
|
207
222
|
for (const worker of staleWorkers) {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
status: "stopped",
|
|
212
|
-
load: 0,
|
|
213
|
-
nowMs,
|
|
214
|
-
scheduledShutdownAt: worker.scheduledShutdownAt ?? undefined,
|
|
215
|
-
stoppedAt: nowMs,
|
|
216
|
-
machineId: worker.machineId ?? undefined,
|
|
217
|
-
appName: providerConfig.appName,
|
|
218
|
-
region: providerConfig.region,
|
|
219
|
-
});
|
|
223
|
+
if (worker.status !== "stopped") {
|
|
224
|
+
await transitionWorkerToStopped(ctx, worker, providerConfig, nowMs, false);
|
|
225
|
+
}
|
|
220
226
|
}
|
|
221
227
|
if (staleWorkers.length > 0) {
|
|
222
228
|
workerRows = await ctx.runQuery((internal.queue as any).listWorkersForScheduler, {});
|
|
@@ -224,7 +230,7 @@ async function runReconcileWorkerPool(
|
|
|
224
230
|
}
|
|
225
231
|
|
|
226
232
|
let spawned = 0;
|
|
227
|
-
let terminated = staleWorkers.length;
|
|
233
|
+
let terminated = staleWorkers.filter((worker) => worker.status !== "stopped").length;
|
|
228
234
|
|
|
229
235
|
const dedicatedVolumeMode =
|
|
230
236
|
providerConfig.volumeName.trim().length > 0 && providerConfig.volumePath.trim().length > 0;
|
|
@@ -237,7 +243,7 @@ async function runReconcileWorkerPool(
|
|
|
237
243
|
`[scheduler] dedicated volume mode enabled for ${providerConfig.volumeName}; clamping desired workers to 1`,
|
|
238
244
|
);
|
|
239
245
|
}
|
|
240
|
-
const activeWorkers =
|
|
246
|
+
const activeWorkers = scopedWorkerRows().filter(
|
|
241
247
|
(worker) => worker.status === "active" && worker.heartbeatAt > staleHeartbeatCutoff,
|
|
242
248
|
).length;
|
|
243
249
|
|
|
@@ -273,46 +279,49 @@ async function runReconcileWorkerPool(
|
|
|
273
279
|
appName: providerConfig.appName,
|
|
274
280
|
region: created.region,
|
|
275
281
|
});
|
|
282
|
+
await scheduleIdleShutdownWatch(ctx, providerConfig, nowMs + scaling.idleTimeoutMs, nowMs);
|
|
276
283
|
spawned += 1;
|
|
277
284
|
}
|
|
278
285
|
}
|
|
279
286
|
|
|
280
|
-
const dueIdleTimeout =
|
|
281
|
-
.filter(
|
|
282
|
-
(worker) =>
|
|
283
|
-
worker.status === "active" &&
|
|
284
|
-
worker.load === 0 &&
|
|
285
|
-
worker.scheduledShutdownAt !== null &&
|
|
286
|
-
worker.scheduledShutdownAt <= nowMs,
|
|
287
|
-
)
|
|
288
|
-
.sort((a, b) => (a.scheduledShutdownAt ?? 0) - (b.scheduledShutdownAt ?? 0));
|
|
287
|
+
const dueIdleTimeout = getDueIdleWorkers(scopedWorkerRows(), nowMs);
|
|
289
288
|
for (const worker of dueIdleTimeout) {
|
|
290
|
-
const
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
machineIsLive,
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
289
|
+
const machineIsLive = worker.machineId ? liveMachineIds.has(worker.machineId) : false;
|
|
290
|
+
await transitionWorkerToStopped(
|
|
291
|
+
ctx,
|
|
292
|
+
worker,
|
|
293
|
+
providerConfig,
|
|
294
|
+
nowMs,
|
|
295
|
+
machineIsLive && requiresFinalSnapshot(worker),
|
|
296
|
+
);
|
|
297
|
+
terminated += 1;
|
|
298
|
+
}
|
|
299
|
+
if (dueIdleTimeout.length > 0) {
|
|
300
|
+
workerRows = await ctx.runQuery((internal.queue as any).listWorkersForScheduler, {});
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
let pendingFinalization = 0;
|
|
304
|
+
const stoppedWorkersAwaitingTeardown = getStoppedWorkersAwaitingTeardown(
|
|
305
|
+
scopedWorkerRows(),
|
|
306
|
+
nowMs,
|
|
307
|
+
);
|
|
308
|
+
for (const worker of stoppedWorkersAwaitingTeardown) {
|
|
309
|
+
if (!hasFinalSnapshotReady(worker)) {
|
|
310
|
+
pendingFinalization += 1;
|
|
301
311
|
continue;
|
|
302
312
|
}
|
|
303
|
-
await
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
nowMs,
|
|
309
|
-
scheduledShutdownAt: worker.scheduledShutdownAt ?? undefined,
|
|
310
|
-
stoppedAt: nowMs,
|
|
311
|
-
machineId: machineId ?? undefined,
|
|
312
|
-
appName: providerConfig.appName,
|
|
313
|
-
region: providerConfig.region,
|
|
313
|
+
const finalized = await finalizeStoppedWorkerTeardown({
|
|
314
|
+
provider,
|
|
315
|
+
providerConfig,
|
|
316
|
+
worker,
|
|
317
|
+
liveMachineIds,
|
|
314
318
|
});
|
|
315
|
-
|
|
319
|
+
if (!finalized) {
|
|
320
|
+
pendingFinalization += 1;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
if (pendingFinalization > 0) {
|
|
324
|
+
await scheduleIdleShutdownRetry(ctx, providerConfig);
|
|
316
325
|
}
|
|
317
326
|
|
|
318
327
|
await ctx.runMutation((internal.queue as any).expireOldDataSnapshots, {
|
|
@@ -361,25 +370,21 @@ async function runEnforceIdleShutdowns(
|
|
|
361
370
|
);
|
|
362
371
|
}
|
|
363
372
|
const provider = resolveProvider(providerConfig.kind, flyApiToken);
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
const dueIdleTimeout =
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
worker.scheduledShutdownAt !== null &&
|
|
378
|
-
worker.scheduledShutdownAt <= nowMs,
|
|
379
|
-
)
|
|
380
|
-
.sort((a, b) => (a.scheduledShutdownAt ?? 0) - (b.scheduledShutdownAt ?? 0));
|
|
373
|
+
let workerRows: Array<SchedulerWorkerRow> = await ctx.runQuery(
|
|
374
|
+
(internal.queue as any).listWorkersForScheduler,
|
|
375
|
+
{},
|
|
376
|
+
);
|
|
377
|
+
const scopedWorkers = () =>
|
|
378
|
+
workerRows.filter(
|
|
379
|
+
(worker) => worker.appName === null || worker.appName === providerConfig.appName,
|
|
380
|
+
);
|
|
381
|
+
const dueIdleTimeout = getDueIdleWorkers(scopedWorkers(), nowMs);
|
|
382
|
+
const stoppedWorkersAwaitingTeardown = getStoppedWorkersAwaitingTeardown(
|
|
383
|
+
scopedWorkers(),
|
|
384
|
+
nowMs,
|
|
385
|
+
);
|
|
381
386
|
|
|
382
|
-
if (dueIdleTimeout.length === 0) {
|
|
387
|
+
if (dueIdleTimeout.length === 0 && stoppedWorkersAwaitingTeardown.length === 0) {
|
|
383
388
|
return {
|
|
384
389
|
checked: 0,
|
|
385
390
|
stopped: 0,
|
|
@@ -391,51 +396,131 @@ async function runEnforceIdleShutdowns(
|
|
|
391
396
|
const providerWorkers = await provider.listWorkers(providerConfig.appName);
|
|
392
397
|
const liveMachineIds = new Set(providerWorkers.map((worker) => worker.machineId));
|
|
393
398
|
|
|
394
|
-
let stopped = 0;
|
|
395
|
-
let pending = 0;
|
|
396
399
|
for (const worker of dueIdleTimeout) {
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
machineIsLive,
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
400
|
+
const machineIsLive = worker.machineId ? liveMachineIds.has(worker.machineId) : false;
|
|
401
|
+
await transitionWorkerToStopped(
|
|
402
|
+
ctx,
|
|
403
|
+
worker,
|
|
404
|
+
providerConfig,
|
|
405
|
+
nowMs,
|
|
406
|
+
machineIsLive && requiresFinalSnapshot(worker),
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
if (dueIdleTimeout.length > 0) {
|
|
410
|
+
workerRows = await ctx.runQuery((internal.queue as any).listWorkersForScheduler, {});
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const stopped = dueIdleTimeout.length;
|
|
414
|
+
let pending = 0;
|
|
415
|
+
for (const worker of getStoppedWorkersAwaitingTeardown(scopedWorkers(), nowMs)) {
|
|
416
|
+
if (!hasFinalSnapshotReady(worker)) {
|
|
407
417
|
pending += 1;
|
|
408
418
|
continue;
|
|
409
419
|
}
|
|
410
|
-
await
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
nowMs,
|
|
416
|
-
scheduledShutdownAt: worker.scheduledShutdownAt ?? undefined,
|
|
417
|
-
stoppedAt: nowMs,
|
|
418
|
-
machineId: machineId ?? undefined,
|
|
419
|
-
appName: providerConfig.appName,
|
|
420
|
-
region: providerConfig.region,
|
|
420
|
+
const finalized = await finalizeStoppedWorkerTeardown({
|
|
421
|
+
provider,
|
|
422
|
+
providerConfig,
|
|
423
|
+
worker,
|
|
424
|
+
liveMachineIds,
|
|
421
425
|
});
|
|
422
|
-
|
|
426
|
+
if (!finalized) {
|
|
427
|
+
pending += 1;
|
|
428
|
+
}
|
|
423
429
|
}
|
|
424
430
|
|
|
425
431
|
if (pending > 0) {
|
|
426
|
-
await ctx
|
|
427
|
-
providerConfig,
|
|
428
|
-
});
|
|
432
|
+
await scheduleIdleShutdownRetry(ctx, providerConfig);
|
|
429
433
|
}
|
|
430
434
|
|
|
431
435
|
return {
|
|
432
|
-
checked: dueIdleTimeout.length,
|
|
436
|
+
checked: dueIdleTimeout.length + stoppedWorkersAwaitingTeardown.length,
|
|
433
437
|
stopped,
|
|
434
438
|
pending,
|
|
435
439
|
nextCheckScheduled: pending > 0,
|
|
436
440
|
};
|
|
437
441
|
}
|
|
438
442
|
|
|
443
|
+
function getDueIdleWorkers(workerRows: Array<SchedulerWorkerRow>, nowMs: number) {
|
|
444
|
+
return workerRows
|
|
445
|
+
.filter(
|
|
446
|
+
(worker) =>
|
|
447
|
+
worker.status === "active" &&
|
|
448
|
+
worker.load === 0 &&
|
|
449
|
+
worker.scheduledShutdownAt !== null &&
|
|
450
|
+
worker.scheduledShutdownAt <= nowMs,
|
|
451
|
+
)
|
|
452
|
+
.sort((a, b) => (a.scheduledShutdownAt ?? 0) - (b.scheduledShutdownAt ?? 0));
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function getStoppedWorkersAwaitingTeardown(workerRows: Array<SchedulerWorkerRow>, nowMs: number) {
|
|
456
|
+
return workerRows
|
|
457
|
+
.filter(
|
|
458
|
+
(worker) =>
|
|
459
|
+
worker.status === "stopped" &&
|
|
460
|
+
worker.scheduledShutdownAt !== null &&
|
|
461
|
+
worker.scheduledShutdownAt <= nowMs,
|
|
462
|
+
)
|
|
463
|
+
.sort((a, b) => (a.stoppedAt ?? a.scheduledShutdownAt ?? 0) - (b.stoppedAt ?? b.scheduledShutdownAt ?? 0));
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
function requiresFinalSnapshot(worker: SchedulerWorkerRow) {
|
|
467
|
+
return worker.lastClaimAt !== null;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function hasFinalSnapshotReady(worker: SchedulerWorkerRow) {
|
|
471
|
+
return !requiresFinalSnapshot(worker) || worker.lastSnapshotId !== null;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
async function transitionWorkerToStopped(
|
|
475
|
+
ctx: any,
|
|
476
|
+
worker: SchedulerWorkerRow,
|
|
477
|
+
providerConfig: typeof DEFAULT_CONFIG.provider,
|
|
478
|
+
nowMs: number,
|
|
479
|
+
clearLastSnapshotId: boolean,
|
|
480
|
+
) {
|
|
481
|
+
await ctx.runMutation(internal.queue.upsertWorkerState, {
|
|
482
|
+
workerId: worker.workerId,
|
|
483
|
+
provider: providerConfig.kind,
|
|
484
|
+
status: "stopped",
|
|
485
|
+
load: 0,
|
|
486
|
+
nowMs,
|
|
487
|
+
scheduledShutdownAt: worker.scheduledShutdownAt ?? undefined,
|
|
488
|
+
stoppedAt: worker.stoppedAt ?? nowMs,
|
|
489
|
+
machineId: worker.machineId ?? undefined,
|
|
490
|
+
appName: providerConfig.appName,
|
|
491
|
+
region: worker.region ?? providerConfig.region,
|
|
492
|
+
clearLastSnapshotId,
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
async function finalizeStoppedWorkerTeardown(input: {
|
|
497
|
+
provider: WorkerProvider;
|
|
498
|
+
providerConfig: typeof DEFAULT_CONFIG.provider;
|
|
499
|
+
worker: SchedulerWorkerRow;
|
|
500
|
+
liveMachineIds: Set<string>;
|
|
501
|
+
}) {
|
|
502
|
+
const machineId = input.worker.machineId;
|
|
503
|
+
const machineIsLive = machineId ? input.liveMachineIds.has(machineId) : false;
|
|
504
|
+
const terminatedNow = await drainAndTerminateWorker({
|
|
505
|
+
provider: input.provider,
|
|
506
|
+
appName: input.providerConfig.appName,
|
|
507
|
+
machineId,
|
|
508
|
+
machineIsLive,
|
|
509
|
+
workerId: input.worker.workerId,
|
|
510
|
+
});
|
|
511
|
+
if (!terminatedNow) {
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
await input.provider.cleanupWorkerStorage({
|
|
515
|
+
appName: input.providerConfig.appName,
|
|
516
|
+
workerId: input.worker.workerId,
|
|
517
|
+
machineId,
|
|
518
|
+
region: input.worker.region ?? input.providerConfig.region,
|
|
519
|
+
volumeName: input.providerConfig.volumeName,
|
|
520
|
+
});
|
|
521
|
+
return true;
|
|
522
|
+
}
|
|
523
|
+
|
|
439
524
|
async function drainAndTerminateWorker(input: {
|
|
440
525
|
provider: WorkerProvider;
|
|
441
526
|
appName: string;
|
|
@@ -467,6 +552,36 @@ async function drainAndTerminateWorker(input: {
|
|
|
467
552
|
}
|
|
468
553
|
}
|
|
469
554
|
|
|
555
|
+
async function scheduleIdleShutdownWatch(
|
|
556
|
+
ctx: {
|
|
557
|
+
scheduler: {
|
|
558
|
+
runAfter: (delayMs: number, fn: unknown, args: { providerConfig: typeof DEFAULT_CONFIG.provider }) => Promise<unknown>;
|
|
559
|
+
};
|
|
560
|
+
},
|
|
561
|
+
providerConfig: typeof DEFAULT_CONFIG.provider,
|
|
562
|
+
scheduledShutdownAt: number,
|
|
563
|
+
nowMs: number,
|
|
564
|
+
) {
|
|
565
|
+
const delayMs = Math.max(0, scheduledShutdownAt - nowMs) + 1_000;
|
|
566
|
+
await ctx.scheduler.runAfter(delayMs, (internal.scheduler as any).enforceIdleShutdowns, {
|
|
567
|
+
providerConfig,
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
async function scheduleIdleShutdownRetry(
|
|
572
|
+
ctx: {
|
|
573
|
+
scheduler: {
|
|
574
|
+
runAfter: (delayMs: number, fn: unknown, args: { providerConfig: typeof DEFAULT_CONFIG.provider }) => Promise<unknown>;
|
|
575
|
+
};
|
|
576
|
+
},
|
|
577
|
+
providerConfig: typeof DEFAULT_CONFIG.provider,
|
|
578
|
+
delayMs = 60_000,
|
|
579
|
+
) {
|
|
580
|
+
await ctx.scheduler.runAfter(delayMs, (internal.scheduler as any).enforceIdleShutdowns, {
|
|
581
|
+
providerConfig,
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
|
|
470
585
|
function resolveProvider(kind: string, flyApiToken: string): WorkerProvider {
|
|
471
586
|
switch (kind) {
|
|
472
587
|
case "fly":
|
package/src/component/schema.ts
CHANGED
|
@@ -4,11 +4,7 @@ import { v } from "convex/values";
|
|
|
4
4
|
export default defineSchema({
|
|
5
5
|
agentProfiles: defineTable({
|
|
6
6
|
agentKey: v.string(),
|
|
7
|
-
providerUserId: v.optional(v.string()),
|
|
8
7
|
version: v.string(),
|
|
9
|
-
soulMd: v.optional(v.string()),
|
|
10
|
-
clientMd: v.optional(v.string()),
|
|
11
|
-
skills: v.optional(v.array(v.string())),
|
|
12
8
|
secretsRef: v.array(v.string()),
|
|
13
9
|
bridgeConfig: v.optional(
|
|
14
10
|
v.object({
|