@wopr-network/platform-core 1.39.4 → 1.39.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
+ import { PassThrough } from "node:stream";
1
2
  import type Docker from "dockerode";
2
3
  import { logger } from "../config/logger.js";
4
+ import type { BotMetricsTracker } from "../gateway/bot-metrics-tracker.js";
3
5
  import type { ProxyManagerInterface } from "../proxy/types.js";
4
6
  import type { IBotInstanceRepository } from "./bot-instance-repository.js";
5
7
  import type { BotEventType, FleetEventEmitter } from "./fleet-event-emitter.js";
6
- import type { BotProfile } from "./types.js";
8
+ import type { BotProfile, BotStatus, ContainerStats } from "./types.js";
7
9
 
8
10
  /**
9
11
  * Instance — a runtime handle to a container.
@@ -25,6 +27,7 @@ export interface InstanceDeps {
25
27
  instanceRepo?: IBotInstanceRepository;
26
28
  proxyManager?: ProxyManagerInterface;
27
29
  eventEmitter?: FleetEventEmitter;
30
+ botMetricsTracker?: BotMetricsTracker;
28
31
  }
29
32
 
30
33
  export class Instance {
@@ -38,6 +41,10 @@ export class Instance {
38
41
  private readonly instanceRepo: IBotInstanceRepository | undefined;
39
42
  private readonly proxyManager: ProxyManagerInterface | undefined;
40
43
  private readonly eventEmitter: FleetEventEmitter | undefined;
44
+ private readonly botMetricsTracker: BotMetricsTracker | undefined;
45
+
46
+ /** Simple per-instance mutex to serialize start/stop/restart/remove. */
47
+ private lockPromise = Promise.resolve();
41
48
 
42
49
  constructor(deps: InstanceDeps) {
43
50
  this.id = deps.profile.id;
@@ -49,6 +56,36 @@ export class Instance {
49
56
  this.instanceRepo = deps.instanceRepo;
50
57
  this.proxyManager = deps.proxyManager;
51
58
  this.eventEmitter = deps.eventEmitter;
59
+ this.botMetricsTracker = deps.botMetricsTracker;
60
+ }
61
+
62
+ /**
63
+ * Remote instances have containerId like "remote:node-3".
64
+ * Local Docker operations are not supported — callers (e.g. wopr-platform)
65
+ * handle remote delegation at a higher level via NodeCommandBus.
66
+ */
67
+ private get isRemote(): boolean {
68
+ return this.containerId.startsWith("remote:");
69
+ }
70
+
71
+ private assertLocal(operation: string): void {
72
+ if (this.isRemote) {
73
+ throw new Error(`${operation} is not supported on remote instances — use node agent`);
74
+ }
75
+ }
76
+
77
+ private async withLock<T>(fn: () => Promise<T>): Promise<T> {
78
+ const prev = this.lockPromise;
79
+ let resolve!: () => void;
80
+ this.lockPromise = new Promise<void>((r) => {
81
+ resolve = r;
82
+ });
83
+ try {
84
+ await prev;
85
+ return await fn();
86
+ } finally {
87
+ resolve();
88
+ }
52
89
  }
53
90
 
54
91
  /** Emit bot.created — call only from FleetManager.create(), not getInstance() */
@@ -57,51 +94,109 @@ export class Instance {
57
94
  }
58
95
 
59
96
  async start(): Promise<void> {
60
- const container = this.docker.getContainer(this.containerId);
61
- await container.start();
62
- logger.info(`Instance started`, { id: this.id, containerName: this.containerName, url: this.url });
63
- this.emit("bot.started");
97
+ this.assertLocal("start()");
98
+ return this.withLock(async () => {
99
+ const container = this.docker.getContainer(this.containerId);
100
+ await container.start();
101
+ logger.info(`Instance started`, { id: this.id, containerName: this.containerName, url: this.url });
102
+ this.emit("bot.started");
103
+ });
64
104
  }
65
105
 
66
106
  async stop(): Promise<void> {
67
- const container = this.docker.getContainer(this.containerId);
68
- try {
69
- await container.stop({ t: 10 });
70
- } catch (err: unknown) {
71
- const msg = err instanceof Error ? err.message : String(err);
72
- if (!msg.includes("not running") && !msg.includes("already stopped")) {
73
- throw err;
107
+ this.assertLocal("stop()");
108
+ return this.withLock(async () => {
109
+ const container = this.docker.getContainer(this.containerId);
110
+ try {
111
+ await container.stop({ t: 10 });
112
+ } catch (err: unknown) {
113
+ const msg = err instanceof Error ? err.message : String(err);
114
+ if (!msg.includes("not running") && !msg.includes("already stopped")) {
115
+ throw err;
116
+ }
74
117
  }
75
- }
76
- logger.info(`Instance stopped`, { id: this.id, containerName: this.containerName });
77
- this.emit("bot.stopped");
118
+ logger.info(`Instance stopped`, { id: this.id, containerName: this.containerName });
119
+ this.emit("bot.stopped");
120
+ });
78
121
  }
79
122
 
80
- async remove(): Promise<void> {
81
- const container = this.docker.getContainer(this.containerId);
82
- try {
83
- await container.stop({ t: 5 }).catch(() => {});
84
- await container.remove({ force: true });
85
- } catch (err: unknown) {
86
- const msg = err instanceof Error ? err.message : String(err);
87
- if (!msg.includes("No such container")) {
88
- throw err;
123
+ /**
124
+ * Restart the container.
125
+ * Callers that need an image update should call pullImage() first.
126
+ */
127
+ async restart(): Promise<void> {
128
+ this.assertLocal("restart()");
129
+ return this.withLock(async () => {
130
+ this.botMetricsTracker?.reset(this.id);
131
+ const container = this.docker.getContainer(this.containerId);
132
+ const info = await container.inspect();
133
+ const validStates = new Set(["running", "stopped", "exited", "dead"]);
134
+ const currentState = typeof info.State.Status === "string" && info.State.Status ? info.State.Status : "unknown";
135
+ if (!validStates.has(currentState)) {
136
+ throw new Error(
137
+ `Cannot restart instance ${this.id}: container is in state "${currentState}". ` +
138
+ `Valid states: ${[...validStates].join(", ")}.`,
139
+ );
89
140
  }
90
- }
141
+ await container.restart();
142
+ logger.info(`Instance restarted`, { id: this.id, containerName: this.containerName });
143
+ this.emit("bot.restarted");
144
+ });
145
+ }
91
146
 
92
- if (this.proxyManager) {
147
+ /**
148
+ * Pull the latest version of this instance's image.
149
+ * Call before restart() to update the image before restarting.
150
+ */
151
+ async pullImage(): Promise<void> {
152
+ this.assertLocal("pullImage()");
153
+ logger.info(`Pulling image ${this.profile.image}`, { id: this.id });
154
+ const username = process.env.REGISTRY_USERNAME;
155
+ const password = process.env.REGISTRY_PASSWORD;
156
+ const server = process.env.REGISTRY_SERVER;
157
+ const authconfig = username && password ? { username, password, serveraddress: server ?? "ghcr.io" } : undefined;
158
+ const stream = await this.docker.pull(this.profile.image, authconfig ? { authconfig } : {});
159
+ await new Promise<void>((resolve, reject) => {
160
+ this.docker.modem.followProgress(stream, (err: Error | null) => {
161
+ if (err) reject(err);
162
+ else resolve();
163
+ });
164
+ });
165
+ logger.info(`Image pulled`, { id: this.id, image: this.profile.image });
166
+ }
167
+
168
+ async remove(removeVolumes = false): Promise<void> {
169
+ this.assertLocal("remove()");
170
+ return this.withLock(async () => {
171
+ const container = this.docker.getContainer(this.containerId);
93
172
  try {
94
- await this.proxyManager.removeRoute(this.id);
95
- } catch (err) {
96
- logger.warn("Proxy route cleanup failed (non-fatal)", { id: this.id, err });
173
+ await container.stop({ t: 5 }).catch(() => {});
174
+ await container.remove({ force: true, v: removeVolumes });
175
+ } catch (err: unknown) {
176
+ const msg = err instanceof Error ? err.message : String(err);
177
+ if (!msg.includes("No such container")) {
178
+ throw err;
179
+ }
97
180
  }
98
- }
99
181
 
100
- logger.info(`Instance removed`, { id: this.id, containerName: this.containerName });
101
- this.emit("bot.removed");
182
+ if (this.proxyManager) {
183
+ try {
184
+ await this.proxyManager.removeRoute(this.id);
185
+ } catch (err) {
186
+ logger.warn("Proxy route cleanup failed (non-fatal)", { id: this.id, err });
187
+ }
188
+ }
189
+
190
+ logger.info(`Instance removed`, { id: this.id, containerName: this.containerName });
191
+ this.emit("bot.removed");
192
+ });
102
193
  }
103
194
 
104
- async status(): Promise<"running" | "stopped" | "gone"> {
195
+ /**
196
+ * Simple container state check (running / stopped / gone).
197
+ */
198
+ async containerState(): Promise<"running" | "stopped" | "gone"> {
199
+ this.assertLocal("containerState()");
105
200
  try {
106
201
  const container = this.docker.getContainer(this.containerId);
107
202
  const info = await container.inspect();
@@ -111,6 +206,158 @@ export class Instance {
111
206
  }
112
207
  }
113
208
 
209
+ /**
210
+ * Full status including profile data, container state, resource stats,
211
+ * and application metrics. Returns BotStatus.
212
+ */
213
+ async status(): Promise<BotStatus> {
214
+ this.assertLocal("status()");
215
+ try {
216
+ const container = this.docker.getContainer(this.containerId);
217
+ const info = await container.inspect();
218
+
219
+ let stats: ContainerStats | null = null;
220
+ if (info.State.Running) {
221
+ try {
222
+ stats = await this.getStats(container);
223
+ } catch {
224
+ // stats not available
225
+ }
226
+ }
227
+
228
+ const now = new Date().toISOString();
229
+ return {
230
+ id: this.profile.id,
231
+ name: this.profile.name,
232
+ description: this.profile.description,
233
+ image: this.profile.image,
234
+ containerId: info.Id,
235
+ state: info.State.Status as BotStatus["state"],
236
+ health: info.State.Health?.Status ?? null,
237
+ uptime: info.State.Running && info.State.StartedAt ? info.State.StartedAt : null,
238
+ startedAt: info.State.StartedAt || null,
239
+ createdAt: info.Created || now,
240
+ updatedAt: now,
241
+ stats,
242
+ applicationMetrics: this.botMetricsTracker?.getMetrics(this.profile.id) ?? null,
243
+ };
244
+ } catch {
245
+ return this.offlineStatus();
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Get container logs (demultiplexed to plain text).
251
+ */
252
+ async logs(tail = 100): Promise<string> {
253
+ this.assertLocal("logs()");
254
+ const container = this.docker.getContainer(this.containerId);
255
+ const logBuffer = await container.logs({
256
+ stdout: true,
257
+ stderr: true,
258
+ tail,
259
+ timestamps: true,
260
+ });
261
+
262
+ // Docker returns multiplexed binary frames when Tty is false (the default).
263
+ // Demultiplex by stripping the 8-byte header from each frame so callers
264
+ // receive plain text instead of binary garbage interleaved with log lines.
265
+ const buf = Buffer.isBuffer(logBuffer) ? logBuffer : Buffer.from(logBuffer as unknown as string, "binary");
266
+ const chunks: Buffer[] = [];
267
+ let offset = 0;
268
+ while (offset + 8 <= buf.length) {
269
+ const frameSize = buf.readUInt32BE(offset + 4);
270
+ const end = offset + 8 + frameSize;
271
+ if (end > buf.length) break;
272
+ chunks.push(buf.subarray(offset + 8, end));
273
+ offset = end;
274
+ }
275
+ // If demux produced nothing (e.g. TTY container), fall back to raw string
276
+ return chunks.length > 0 ? Buffer.concat(chunks).toString("utf-8") : buf.toString("utf-8");
277
+ }
278
+
279
+ /**
280
+ * Stream container logs in real-time (follow mode).
281
+ * Returns a Node.js ReadableStream that emits plain-text log chunks (already demultiplexed).
282
+ * Caller is responsible for destroying the stream when done.
283
+ */
284
+ async logStream(opts: { since?: string; tail?: number }): Promise<NodeJS.ReadableStream> {
285
+ this.assertLocal("logStream()");
286
+ const container = this.docker.getContainer(this.containerId);
287
+ const logOpts: Record<string, unknown> = {
288
+ stdout: true,
289
+ stderr: true,
290
+ follow: true,
291
+ tail: opts.tail ?? 100,
292
+ timestamps: true,
293
+ };
294
+ if (opts.since) {
295
+ logOpts.since = opts.since;
296
+ }
297
+
298
+ // Docker returns a multiplexed binary stream when Tty is false (the default for
299
+ // containers created by createContainer without Tty:true). Demultiplex it so
300
+ // callers receive plain text without 8-byte binary frame headers.
301
+ const multiplexed = (await container.logs(logOpts)) as unknown as NodeJS.ReadableStream;
302
+ const pt = new PassThrough();
303
+ (
304
+ this.docker.modem as unknown as {
305
+ demuxStream(stream: NodeJS.ReadableStream, stdout: PassThrough, stderr: PassThrough): void;
306
+ }
307
+ ).demuxStream(multiplexed, pt, pt);
308
+ return pt;
309
+ }
310
+
311
+ /**
312
+ * Get disk usage for this instance's /data volume.
313
+ * Returns null if the container is not running or exec fails.
314
+ */
315
+ async getVolumeUsage(): Promise<{ usedBytes: number; totalBytes: number; availableBytes: number } | null> {
316
+ this.assertLocal("getVolumeUsage()");
317
+ try {
318
+ const container = this.docker.getContainer(this.containerId);
319
+ const info = await container.inspect();
320
+ if (!info.State.Running) return null;
321
+
322
+ const exec = await container.exec({
323
+ Cmd: ["df", "-B1", "/data"],
324
+ AttachStdout: true,
325
+ AttachStderr: false,
326
+ });
327
+
328
+ const output = await new Promise<string>((resolve, reject) => {
329
+ exec.start({}, (err: Error | null, stream: import("node:stream").Duplex | undefined) => {
330
+ if (err) return reject(err);
331
+ if (!stream) return reject(new Error("No stream from exec"));
332
+ let data = "";
333
+ stream.on("data", (chunk: Buffer) => {
334
+ data += chunk.toString();
335
+ });
336
+ stream.on("end", () => resolve(data));
337
+ stream.on("error", reject);
338
+ });
339
+ });
340
+
341
+ // Parse df output — second line has the numbers
342
+ const lines = output.trim().split("\n");
343
+ if (lines.length < 2) return null;
344
+
345
+ const parts = lines[lines.length - 1].split(/\s+/);
346
+ if (parts.length < 4) return null;
347
+
348
+ const totalBytes = parseInt(parts[1], 10);
349
+ const usedBytes = parseInt(parts[2], 10);
350
+ const availableBytes = parseInt(parts[3], 10);
351
+
352
+ if (Number.isNaN(totalBytes) || Number.isNaN(usedBytes) || Number.isNaN(availableBytes)) return null;
353
+
354
+ return { usedBytes, totalBytes, availableBytes };
355
+ } catch {
356
+ logger.warn(`Failed to get volume usage for instance ${this.id}`);
357
+ return null;
358
+ }
359
+ }
360
+
114
361
  /**
115
362
  * Register this instance in the billing system.
116
363
  * Skip for ephemeral instances — they bill per-token, not per-instance.
@@ -158,6 +405,44 @@ export class Instance {
158
405
  }
159
406
  }
160
407
 
408
+ private offlineStatus(): BotStatus {
409
+ const now = new Date().toISOString();
410
+ return {
411
+ id: this.profile.id,
412
+ name: this.profile.name,
413
+ description: this.profile.description,
414
+ image: this.profile.image,
415
+ containerId: null,
416
+ state: "stopped",
417
+ health: null,
418
+ uptime: null,
419
+ startedAt: null,
420
+ createdAt: now,
421
+ updatedAt: now,
422
+ stats: null,
423
+ applicationMetrics: null,
424
+ };
425
+ }
426
+
427
+ private async getStats(container: Docker.Container): Promise<ContainerStats> {
428
+ const raw = await container.stats({ stream: false });
429
+
430
+ const cpuDelta = raw.cpu_stats.cpu_usage.total_usage - raw.precpu_stats.cpu_usage.total_usage;
431
+ const systemDelta = raw.cpu_stats.system_cpu_usage - raw.precpu_stats.system_cpu_usage;
432
+ const numCpus = raw.cpu_stats.online_cpus || 1;
433
+ const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * numCpus * 100 : 0;
434
+
435
+ const memUsage = raw.memory_stats.usage || 0;
436
+ const memLimit = raw.memory_stats.limit || 1;
437
+
438
+ return {
439
+ cpuPercent: Math.round(cpuPercent * 100) / 100,
440
+ memoryUsageMb: Math.round(memUsage / 1024 / 1024),
441
+ memoryLimitMb: Math.round(memLimit / 1024 / 1024),
442
+ memoryPercent: Math.round((memUsage / memLimit) * 100 * 100) / 100,
443
+ };
444
+ }
445
+
161
446
  private emit(type: BotEventType): void {
162
447
  if (this.eventEmitter) {
163
448
  this.eventEmitter.emit({