@wopr-network/platform-core 1.39.4 → 1.39.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/credits/credit-expiry-cron.test.js +30 -0
- package/dist/credits/ledger.js +11 -5
- package/dist/credits/ledger.test.js +87 -0
- package/dist/db/schema/ledger.js +6 -0
- package/dist/fleet/fleet-manager.d.ts +14 -35
- package/dist/fleet/fleet-manager.js +52 -236
- package/dist/fleet/fleet-manager.test.js +13 -85
- package/dist/fleet/instance.d.ts +58 -3
- package/dist/fleet/instance.js +297 -33
- package/dist/fleet/instance.test.d.ts +1 -0
- package/dist/fleet/instance.test.js +343 -0
- package/dist/node-agent/types.d.ts +1 -1
- package/package.json +1 -1
- package/src/credits/README.md +106 -0
- package/src/credits/credit-expiry-cron.test.ts +36 -0
- package/src/credits/ledger.test.ts +113 -0
- package/src/credits/ledger.ts +13 -7
- package/src/db/schema/ledger.ts +6 -0
- package/src/fleet/fleet-manager.test.ts +13 -111
- package/src/fleet/fleet-manager.ts +50 -255
- package/src/fleet/instance.test.ts +390 -0
- package/src/fleet/instance.ts +318 -33
package/src/fleet/instance.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import { PassThrough } from "node:stream";
|
|
1
2
|
import type Docker from "dockerode";
|
|
2
3
|
import { logger } from "../config/logger.js";
|
|
4
|
+
import type { BotMetricsTracker } from "../gateway/bot-metrics-tracker.js";
|
|
3
5
|
import type { ProxyManagerInterface } from "../proxy/types.js";
|
|
4
6
|
import type { IBotInstanceRepository } from "./bot-instance-repository.js";
|
|
5
7
|
import type { BotEventType, FleetEventEmitter } from "./fleet-event-emitter.js";
|
|
6
|
-
import type { BotProfile } from "./types.js";
|
|
8
|
+
import type { BotProfile, BotStatus, ContainerStats } from "./types.js";
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* Instance — a runtime handle to a container.
|
|
@@ -25,6 +27,7 @@ export interface InstanceDeps {
|
|
|
25
27
|
instanceRepo?: IBotInstanceRepository;
|
|
26
28
|
proxyManager?: ProxyManagerInterface;
|
|
27
29
|
eventEmitter?: FleetEventEmitter;
|
|
30
|
+
botMetricsTracker?: BotMetricsTracker;
|
|
28
31
|
}
|
|
29
32
|
|
|
30
33
|
export class Instance {
|
|
@@ -38,6 +41,10 @@ export class Instance {
|
|
|
38
41
|
private readonly instanceRepo: IBotInstanceRepository | undefined;
|
|
39
42
|
private readonly proxyManager: ProxyManagerInterface | undefined;
|
|
40
43
|
private readonly eventEmitter: FleetEventEmitter | undefined;
|
|
44
|
+
private readonly botMetricsTracker: BotMetricsTracker | undefined;
|
|
45
|
+
|
|
46
|
+
/** Simple per-instance mutex to serialize start/stop/restart/remove. */
|
|
47
|
+
private lockPromise = Promise.resolve();
|
|
41
48
|
|
|
42
49
|
constructor(deps: InstanceDeps) {
|
|
43
50
|
this.id = deps.profile.id;
|
|
@@ -49,6 +56,36 @@ export class Instance {
|
|
|
49
56
|
this.instanceRepo = deps.instanceRepo;
|
|
50
57
|
this.proxyManager = deps.proxyManager;
|
|
51
58
|
this.eventEmitter = deps.eventEmitter;
|
|
59
|
+
this.botMetricsTracker = deps.botMetricsTracker;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Remote instances have containerId like "remote:node-3".
|
|
64
|
+
* Local Docker operations are not supported — callers (e.g. wopr-platform)
|
|
65
|
+
* handle remote delegation at a higher level via NodeCommandBus.
|
|
66
|
+
*/
|
|
67
|
+
private get isRemote(): boolean {
|
|
68
|
+
return this.containerId.startsWith("remote:");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
private assertLocal(operation: string): void {
|
|
72
|
+
if (this.isRemote) {
|
|
73
|
+
throw new Error(`${operation} is not supported on remote instances — use node agent`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private async withLock<T>(fn: () => Promise<T>): Promise<T> {
|
|
78
|
+
const prev = this.lockPromise;
|
|
79
|
+
let resolve!: () => void;
|
|
80
|
+
this.lockPromise = new Promise<void>((r) => {
|
|
81
|
+
resolve = r;
|
|
82
|
+
});
|
|
83
|
+
try {
|
|
84
|
+
await prev;
|
|
85
|
+
return await fn();
|
|
86
|
+
} finally {
|
|
87
|
+
resolve();
|
|
88
|
+
}
|
|
52
89
|
}
|
|
53
90
|
|
|
54
91
|
/** Emit bot.created — call only from FleetManager.create(), not getInstance() */
|
|
@@ -57,51 +94,109 @@ export class Instance {
|
|
|
57
94
|
}
|
|
58
95
|
|
|
59
96
|
async start(): Promise<void> {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
97
|
+
this.assertLocal("start()");
|
|
98
|
+
return this.withLock(async () => {
|
|
99
|
+
const container = this.docker.getContainer(this.containerId);
|
|
100
|
+
await container.start();
|
|
101
|
+
logger.info(`Instance started`, { id: this.id, containerName: this.containerName, url: this.url });
|
|
102
|
+
this.emit("bot.started");
|
|
103
|
+
});
|
|
64
104
|
}
|
|
65
105
|
|
|
66
106
|
async stop(): Promise<void> {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
107
|
+
this.assertLocal("stop()");
|
|
108
|
+
return this.withLock(async () => {
|
|
109
|
+
const container = this.docker.getContainer(this.containerId);
|
|
110
|
+
try {
|
|
111
|
+
await container.stop({ t: 10 });
|
|
112
|
+
} catch (err: unknown) {
|
|
113
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
114
|
+
if (!msg.includes("not running") && !msg.includes("already stopped")) {
|
|
115
|
+
throw err;
|
|
116
|
+
}
|
|
74
117
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
118
|
+
logger.info(`Instance stopped`, { id: this.id, containerName: this.containerName });
|
|
119
|
+
this.emit("bot.stopped");
|
|
120
|
+
});
|
|
78
121
|
}
|
|
79
122
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
123
|
+
/**
|
|
124
|
+
* Restart the container.
|
|
125
|
+
* Callers that need an image update should call pullImage() first.
|
|
126
|
+
*/
|
|
127
|
+
async restart(): Promise<void> {
|
|
128
|
+
this.assertLocal("restart()");
|
|
129
|
+
return this.withLock(async () => {
|
|
130
|
+
this.botMetricsTracker?.reset(this.id);
|
|
131
|
+
const container = this.docker.getContainer(this.containerId);
|
|
132
|
+
const info = await container.inspect();
|
|
133
|
+
const validStates = new Set(["running", "stopped", "exited", "dead"]);
|
|
134
|
+
const currentState = typeof info.State.Status === "string" && info.State.Status ? info.State.Status : "unknown";
|
|
135
|
+
if (!validStates.has(currentState)) {
|
|
136
|
+
throw new Error(
|
|
137
|
+
`Cannot restart instance ${this.id}: container is in state "${currentState}". ` +
|
|
138
|
+
`Valid states: ${[...validStates].join(", ")}.`,
|
|
139
|
+
);
|
|
89
140
|
}
|
|
90
|
-
|
|
141
|
+
await container.restart();
|
|
142
|
+
logger.info(`Instance restarted`, { id: this.id, containerName: this.containerName });
|
|
143
|
+
this.emit("bot.restarted");
|
|
144
|
+
});
|
|
145
|
+
}
|
|
91
146
|
|
|
92
|
-
|
|
147
|
+
/**
|
|
148
|
+
* Pull the latest version of this instance's image.
|
|
149
|
+
* Call before restart() to update the image before restarting.
|
|
150
|
+
*/
|
|
151
|
+
async pullImage(): Promise<void> {
|
|
152
|
+
this.assertLocal("pullImage()");
|
|
153
|
+
logger.info(`Pulling image ${this.profile.image}`, { id: this.id });
|
|
154
|
+
const username = process.env.REGISTRY_USERNAME;
|
|
155
|
+
const password = process.env.REGISTRY_PASSWORD;
|
|
156
|
+
const server = process.env.REGISTRY_SERVER;
|
|
157
|
+
const authconfig = username && password ? { username, password, serveraddress: server ?? "ghcr.io" } : undefined;
|
|
158
|
+
const stream = await this.docker.pull(this.profile.image, authconfig ? { authconfig } : {});
|
|
159
|
+
await new Promise<void>((resolve, reject) => {
|
|
160
|
+
this.docker.modem.followProgress(stream, (err: Error | null) => {
|
|
161
|
+
if (err) reject(err);
|
|
162
|
+
else resolve();
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
logger.info(`Image pulled`, { id: this.id, image: this.profile.image });
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async remove(removeVolumes = false): Promise<void> {
|
|
169
|
+
this.assertLocal("remove()");
|
|
170
|
+
return this.withLock(async () => {
|
|
171
|
+
const container = this.docker.getContainer(this.containerId);
|
|
93
172
|
try {
|
|
94
|
-
await
|
|
95
|
-
|
|
96
|
-
|
|
173
|
+
await container.stop({ t: 5 }).catch(() => {});
|
|
174
|
+
await container.remove({ force: true, v: removeVolumes });
|
|
175
|
+
} catch (err: unknown) {
|
|
176
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
177
|
+
if (!msg.includes("No such container")) {
|
|
178
|
+
throw err;
|
|
179
|
+
}
|
|
97
180
|
}
|
|
98
|
-
}
|
|
99
181
|
|
|
100
|
-
|
|
101
|
-
|
|
182
|
+
if (this.proxyManager) {
|
|
183
|
+
try {
|
|
184
|
+
await this.proxyManager.removeRoute(this.id);
|
|
185
|
+
} catch (err) {
|
|
186
|
+
logger.warn("Proxy route cleanup failed (non-fatal)", { id: this.id, err });
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
logger.info(`Instance removed`, { id: this.id, containerName: this.containerName });
|
|
191
|
+
this.emit("bot.removed");
|
|
192
|
+
});
|
|
102
193
|
}
|
|
103
194
|
|
|
104
|
-
|
|
195
|
+
/**
|
|
196
|
+
* Simple container state check (running / stopped / gone).
|
|
197
|
+
*/
|
|
198
|
+
async containerState(): Promise<"running" | "stopped" | "gone"> {
|
|
199
|
+
this.assertLocal("containerState()");
|
|
105
200
|
try {
|
|
106
201
|
const container = this.docker.getContainer(this.containerId);
|
|
107
202
|
const info = await container.inspect();
|
|
@@ -111,6 +206,158 @@ export class Instance {
|
|
|
111
206
|
}
|
|
112
207
|
}
|
|
113
208
|
|
|
209
|
+
/**
|
|
210
|
+
* Full status including profile data, container state, resource stats,
|
|
211
|
+
* and application metrics. Returns BotStatus.
|
|
212
|
+
*/
|
|
213
|
+
async status(): Promise<BotStatus> {
|
|
214
|
+
this.assertLocal("status()");
|
|
215
|
+
try {
|
|
216
|
+
const container = this.docker.getContainer(this.containerId);
|
|
217
|
+
const info = await container.inspect();
|
|
218
|
+
|
|
219
|
+
let stats: ContainerStats | null = null;
|
|
220
|
+
if (info.State.Running) {
|
|
221
|
+
try {
|
|
222
|
+
stats = await this.getStats(container);
|
|
223
|
+
} catch {
|
|
224
|
+
// stats not available
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const now = new Date().toISOString();
|
|
229
|
+
return {
|
|
230
|
+
id: this.profile.id,
|
|
231
|
+
name: this.profile.name,
|
|
232
|
+
description: this.profile.description,
|
|
233
|
+
image: this.profile.image,
|
|
234
|
+
containerId: info.Id,
|
|
235
|
+
state: info.State.Status as BotStatus["state"],
|
|
236
|
+
health: info.State.Health?.Status ?? null,
|
|
237
|
+
uptime: info.State.Running && info.State.StartedAt ? info.State.StartedAt : null,
|
|
238
|
+
startedAt: info.State.StartedAt || null,
|
|
239
|
+
createdAt: info.Created || now,
|
|
240
|
+
updatedAt: now,
|
|
241
|
+
stats,
|
|
242
|
+
applicationMetrics: this.botMetricsTracker?.getMetrics(this.profile.id) ?? null,
|
|
243
|
+
};
|
|
244
|
+
} catch {
|
|
245
|
+
return this.offlineStatus();
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get container logs (demultiplexed to plain text).
|
|
251
|
+
*/
|
|
252
|
+
async logs(tail = 100): Promise<string> {
|
|
253
|
+
this.assertLocal("logs()");
|
|
254
|
+
const container = this.docker.getContainer(this.containerId);
|
|
255
|
+
const logBuffer = await container.logs({
|
|
256
|
+
stdout: true,
|
|
257
|
+
stderr: true,
|
|
258
|
+
tail,
|
|
259
|
+
timestamps: true,
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
// Docker returns multiplexed binary frames when Tty is false (the default).
|
|
263
|
+
// Demultiplex by stripping the 8-byte header from each frame so callers
|
|
264
|
+
// receive plain text instead of binary garbage interleaved with log lines.
|
|
265
|
+
const buf = Buffer.isBuffer(logBuffer) ? logBuffer : Buffer.from(logBuffer as unknown as string, "binary");
|
|
266
|
+
const chunks: Buffer[] = [];
|
|
267
|
+
let offset = 0;
|
|
268
|
+
while (offset + 8 <= buf.length) {
|
|
269
|
+
const frameSize = buf.readUInt32BE(offset + 4);
|
|
270
|
+
const end = offset + 8 + frameSize;
|
|
271
|
+
if (end > buf.length) break;
|
|
272
|
+
chunks.push(buf.subarray(offset + 8, end));
|
|
273
|
+
offset = end;
|
|
274
|
+
}
|
|
275
|
+
// If demux produced nothing (e.g. TTY container), fall back to raw string
|
|
276
|
+
return chunks.length > 0 ? Buffer.concat(chunks).toString("utf-8") : buf.toString("utf-8");
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Stream container logs in real-time (follow mode).
|
|
281
|
+
* Returns a Node.js ReadableStream that emits plain-text log chunks (already demultiplexed).
|
|
282
|
+
* Caller is responsible for destroying the stream when done.
|
|
283
|
+
*/
|
|
284
|
+
async logStream(opts: { since?: string; tail?: number }): Promise<NodeJS.ReadableStream> {
|
|
285
|
+
this.assertLocal("logStream()");
|
|
286
|
+
const container = this.docker.getContainer(this.containerId);
|
|
287
|
+
const logOpts: Record<string, unknown> = {
|
|
288
|
+
stdout: true,
|
|
289
|
+
stderr: true,
|
|
290
|
+
follow: true,
|
|
291
|
+
tail: opts.tail ?? 100,
|
|
292
|
+
timestamps: true,
|
|
293
|
+
};
|
|
294
|
+
if (opts.since) {
|
|
295
|
+
logOpts.since = opts.since;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Docker returns a multiplexed binary stream when Tty is false (the default for
|
|
299
|
+
// containers created by createContainer without Tty:true). Demultiplex it so
|
|
300
|
+
// callers receive plain text without 8-byte binary frame headers.
|
|
301
|
+
const multiplexed = (await container.logs(logOpts)) as unknown as NodeJS.ReadableStream;
|
|
302
|
+
const pt = new PassThrough();
|
|
303
|
+
(
|
|
304
|
+
this.docker.modem as unknown as {
|
|
305
|
+
demuxStream(stream: NodeJS.ReadableStream, stdout: PassThrough, stderr: PassThrough): void;
|
|
306
|
+
}
|
|
307
|
+
).demuxStream(multiplexed, pt, pt);
|
|
308
|
+
return pt;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Get disk usage for this instance's /data volume.
|
|
313
|
+
* Returns null if the container is not running or exec fails.
|
|
314
|
+
*/
|
|
315
|
+
async getVolumeUsage(): Promise<{ usedBytes: number; totalBytes: number; availableBytes: number } | null> {
|
|
316
|
+
this.assertLocal("getVolumeUsage()");
|
|
317
|
+
try {
|
|
318
|
+
const container = this.docker.getContainer(this.containerId);
|
|
319
|
+
const info = await container.inspect();
|
|
320
|
+
if (!info.State.Running) return null;
|
|
321
|
+
|
|
322
|
+
const exec = await container.exec({
|
|
323
|
+
Cmd: ["df", "-B1", "/data"],
|
|
324
|
+
AttachStdout: true,
|
|
325
|
+
AttachStderr: false,
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
const output = await new Promise<string>((resolve, reject) => {
|
|
329
|
+
exec.start({}, (err: Error | null, stream: import("node:stream").Duplex | undefined) => {
|
|
330
|
+
if (err) return reject(err);
|
|
331
|
+
if (!stream) return reject(new Error("No stream from exec"));
|
|
332
|
+
let data = "";
|
|
333
|
+
stream.on("data", (chunk: Buffer) => {
|
|
334
|
+
data += chunk.toString();
|
|
335
|
+
});
|
|
336
|
+
stream.on("end", () => resolve(data));
|
|
337
|
+
stream.on("error", reject);
|
|
338
|
+
});
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
// Parse df output — second line has the numbers
|
|
342
|
+
const lines = output.trim().split("\n");
|
|
343
|
+
if (lines.length < 2) return null;
|
|
344
|
+
|
|
345
|
+
const parts = lines[lines.length - 1].split(/\s+/);
|
|
346
|
+
if (parts.length < 4) return null;
|
|
347
|
+
|
|
348
|
+
const totalBytes = parseInt(parts[1], 10);
|
|
349
|
+
const usedBytes = parseInt(parts[2], 10);
|
|
350
|
+
const availableBytes = parseInt(parts[3], 10);
|
|
351
|
+
|
|
352
|
+
if (Number.isNaN(totalBytes) || Number.isNaN(usedBytes) || Number.isNaN(availableBytes)) return null;
|
|
353
|
+
|
|
354
|
+
return { usedBytes, totalBytes, availableBytes };
|
|
355
|
+
} catch {
|
|
356
|
+
logger.warn(`Failed to get volume usage for instance ${this.id}`);
|
|
357
|
+
return null;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
114
361
|
/**
|
|
115
362
|
* Register this instance in the billing system.
|
|
116
363
|
* Skip for ephemeral instances — they bill per-token, not per-instance.
|
|
@@ -158,6 +405,44 @@ export class Instance {
|
|
|
158
405
|
}
|
|
159
406
|
}
|
|
160
407
|
|
|
408
|
+
private offlineStatus(): BotStatus {
|
|
409
|
+
const now = new Date().toISOString();
|
|
410
|
+
return {
|
|
411
|
+
id: this.profile.id,
|
|
412
|
+
name: this.profile.name,
|
|
413
|
+
description: this.profile.description,
|
|
414
|
+
image: this.profile.image,
|
|
415
|
+
containerId: null,
|
|
416
|
+
state: "stopped",
|
|
417
|
+
health: null,
|
|
418
|
+
uptime: null,
|
|
419
|
+
startedAt: null,
|
|
420
|
+
createdAt: now,
|
|
421
|
+
updatedAt: now,
|
|
422
|
+
stats: null,
|
|
423
|
+
applicationMetrics: null,
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
private async getStats(container: Docker.Container): Promise<ContainerStats> {
|
|
428
|
+
const raw = await container.stats({ stream: false });
|
|
429
|
+
|
|
430
|
+
const cpuDelta = raw.cpu_stats.cpu_usage.total_usage - raw.precpu_stats.cpu_usage.total_usage;
|
|
431
|
+
const systemDelta = raw.cpu_stats.system_cpu_usage - raw.precpu_stats.system_cpu_usage;
|
|
432
|
+
const numCpus = raw.cpu_stats.online_cpus || 1;
|
|
433
|
+
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * numCpus * 100 : 0;
|
|
434
|
+
|
|
435
|
+
const memUsage = raw.memory_stats.usage || 0;
|
|
436
|
+
const memLimit = raw.memory_stats.limit || 1;
|
|
437
|
+
|
|
438
|
+
return {
|
|
439
|
+
cpuPercent: Math.round(cpuPercent * 100) / 100,
|
|
440
|
+
memoryUsageMb: Math.round(memUsage / 1024 / 1024),
|
|
441
|
+
memoryLimitMb: Math.round(memLimit / 1024 / 1024),
|
|
442
|
+
memoryPercent: Math.round((memUsage / memLimit) * 100 * 100) / 100,
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
|
|
161
446
|
private emit(type: BotEventType): void {
|
|
162
447
|
if (this.eventEmitter) {
|
|
163
448
|
this.eventEmitter.emit({
|