@wopr-network/platform-core 1.22.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/billing/crypto/btc/watcher.d.ts +5 -1
  2. package/dist/billing/crypto/btc/watcher.js +8 -4
  3. package/dist/billing/crypto/cursor-store.d.ts +28 -0
  4. package/dist/billing/crypto/cursor-store.js +43 -0
  5. package/dist/billing/crypto/evm/eth-watcher.d.ts +12 -4
  6. package/dist/billing/crypto/evm/eth-watcher.js +23 -9
  7. package/dist/billing/crypto/evm/watcher.d.ts +6 -0
  8. package/dist/billing/crypto/evm/watcher.js +54 -17
  9. package/dist/billing/crypto/index.d.ts +2 -0
  10. package/dist/billing/crypto/index.js +1 -0
  11. package/dist/db/schema/crypto.d.ts +121 -0
  12. package/dist/db/schema/crypto.js +16 -1
  13. package/dist/fleet/__tests__/rollout-orchestrator.test.d.ts +1 -0
  14. package/dist/fleet/__tests__/rollout-orchestrator.test.js +262 -0
  15. package/dist/fleet/index.d.ts +1 -0
  16. package/dist/fleet/index.js +1 -0
  17. package/dist/fleet/rollout-orchestrator.d.ts +69 -0
  18. package/dist/fleet/rollout-orchestrator.js +204 -0
  19. package/dist/fleet/services.d.ts +6 -0
  20. package/dist/fleet/services.js +22 -0
  21. package/drizzle/migrations/0007_watcher_cursors.sql +12 -0
  22. package/drizzle/migrations/meta/_journal.json +7 -0
  23. package/package.json +1 -1
  24. package/src/billing/crypto/btc/watcher.ts +11 -4
  25. package/src/billing/crypto/cursor-store.ts +61 -0
  26. package/src/billing/crypto/evm/eth-watcher.ts +25 -10
  27. package/src/billing/crypto/evm/watcher.ts +57 -19
  28. package/src/billing/crypto/index.ts +2 -0
  29. package/src/db/schema/crypto.ts +22 -1
  30. package/src/fleet/__tests__/rollout-orchestrator.test.ts +321 -0
  31. package/src/fleet/index.ts +1 -0
  32. package/src/fleet/rollout-orchestrator.ts +262 -0
  33. package/src/fleet/services.ts +28 -0
@@ -0,0 +1,262 @@
1
+ /**
2
+ * RolloutOrchestrator — coordinates fleet-wide container updates using
3
+ * pluggable rollout strategies and volume snapshots for nuclear rollback.
4
+ *
5
+ * Sits between ImagePoller (detects new digests) and ContainerUpdater
6
+ * (handles per-bot pull/stop/recreate/health). Adds:
7
+ * - Strategy-driven batching (rolling wave, single bot, immediate)
8
+ * - Pre-update volume snapshots via VolumeSnapshotManager
9
+ * - Volume restore on health check failure (nuclear rollback)
10
+ * - Per-tenant update orchestration
11
+ */
12
+
13
+ import { logger } from "../config/logger.js";
14
+ import type { IRolloutStrategy } from "./rollout-strategy.js";
15
+ import type { BotProfile } from "./types.js";
16
+ import type { ContainerUpdater, UpdateResult } from "./updater.js";
17
+ import type { VolumeSnapshotManager } from "./volume-snapshot-manager.js";
18
+
19
+ export interface RolloutOrchestratorDeps {
20
+ updater: ContainerUpdater;
21
+ snapshotManager: VolumeSnapshotManager;
22
+ strategy: IRolloutStrategy;
23
+ /** Resolve running profiles that need updating for a given image digest */
24
+ getUpdatableProfiles: () => Promise<BotProfile[]>;
25
+ /** Optional callback after each bot update (success or failure) */
26
+ onBotUpdated?: (result: UpdateResult & { volumeRestored: boolean }) => void;
27
+ /** Optional callback when a rollout completes */
28
+ onRolloutComplete?: (results: RolloutResult) => void;
29
+ }
30
+
31
+ export interface BotUpdateResult extends UpdateResult {
32
+ volumeRestored: boolean;
33
+ }
34
+
35
+ export interface RolloutResult {
36
+ totalBots: number;
37
+ succeeded: number;
38
+ failed: number;
39
+ skipped: number;
40
+ aborted: boolean;
41
+ /** True when a concurrent rollout was already in progress */
42
+ alreadyRunning: boolean;
43
+ results: BotUpdateResult[];
44
+ }
45
+
46
+ export class RolloutOrchestrator {
47
+ private readonly updater: ContainerUpdater;
48
+ private readonly snapshotManager: VolumeSnapshotManager;
49
+ private readonly strategy: IRolloutStrategy;
50
+ private readonly getUpdatableProfiles: () => Promise<BotProfile[]>;
51
+ private readonly onBotUpdated?: (result: BotUpdateResult) => void;
52
+ private readonly onRolloutComplete?: (results: RolloutResult) => void;
53
+ private rolling = false;
54
+
55
+ constructor(deps: RolloutOrchestratorDeps) {
56
+ this.updater = deps.updater;
57
+ this.snapshotManager = deps.snapshotManager;
58
+ this.strategy = deps.strategy;
59
+ this.getUpdatableProfiles = deps.getUpdatableProfiles;
60
+ this.onBotUpdated = deps.onBotUpdated;
61
+ this.onRolloutComplete = deps.onRolloutComplete;
62
+ }
63
+
64
+ /** Whether a rollout is currently in progress. */
65
+ get isRolling(): boolean {
66
+ return this.rolling;
67
+ }
68
+
69
+ /**
70
+ * Execute a rollout across all updatable bots.
71
+ * Uses the configured strategy for batching, pausing, and failure handling.
72
+ */
73
+ async rollout(): Promise<RolloutResult> {
74
+ if (this.rolling) {
75
+ logger.warn("Rollout already in progress — skipping");
76
+ return { totalBots: 0, succeeded: 0, failed: 0, skipped: 0, aborted: false, alreadyRunning: true, results: [] };
77
+ }
78
+
79
+ this.rolling = true;
80
+ const allResults: BotUpdateResult[] = [];
81
+ let aborted = false;
82
+
83
+ try {
84
+ let remaining = await this.getUpdatableProfiles();
85
+ const totalBots = remaining.length;
86
+
87
+ if (totalBots === 0) {
88
+ logger.info("Rollout: no bots to update");
89
+ return {
90
+ totalBots: 0,
91
+ succeeded: 0,
92
+ failed: 0,
93
+ skipped: 0,
94
+ aborted: false,
95
+ alreadyRunning: false,
96
+ results: [],
97
+ };
98
+ }
99
+
100
+ logger.info(`Rollout starting: ${totalBots} bots to update`);
101
+
102
+ while (remaining.length > 0 && !aborted) {
103
+ const batch = this.strategy.nextBatch(remaining);
104
+ if (batch.length === 0) break;
105
+
106
+ logger.info(`Rollout wave: ${batch.length} bots (${remaining.length} remaining)`);
107
+
108
+ // Process batch — each bot sequentially within a wave for safety
109
+ const retryProfiles: BotProfile[] = [];
110
+ for (const profile of batch) {
111
+ if (aborted) break;
112
+
113
+ const result = await this.updateBot(profile);
114
+ allResults.push(result);
115
+ this.onBotUpdated?.(result);
116
+
117
+ if (!result.success) {
118
+ const action = this.handleFailure(profile.id, result, allResults);
119
+ if (action === "abort") {
120
+ aborted = true;
121
+ logger.warn(`Rollout aborted after bot ${profile.id} failure`);
122
+ } else if (action === "retry") {
123
+ retryProfiles.push(profile);
124
+ }
125
+ // "skip" → don't re-add, bot is dropped
126
+ }
127
+ }
128
+
129
+ // Remove processed bots from remaining, but re-add retries
130
+ const processedIds = new Set(batch.map((b) => b.id));
131
+ const retryIds = new Set(retryProfiles.map((b) => b.id));
132
+ remaining = [
133
+ ...remaining.filter((b) => !processedIds.has(b.id)),
134
+ ...retryProfiles.filter((b) => retryIds.has(b.id)),
135
+ ];
136
+
137
+ // Pause between waves (unless aborted or done)
138
+ if (remaining.length > 0 && !aborted) {
139
+ const pause = this.strategy.pauseDuration();
140
+ if (pause > 0) {
141
+ logger.info(`Rollout: pausing ${pause}ms before next wave`);
142
+ await sleep(pause);
143
+ }
144
+ }
145
+ }
146
+
147
+ const succeeded = allResults.filter((r) => r.success).length;
148
+ const failed = allResults.filter((r) => !r.success).length;
149
+ const skipped = totalBots - allResults.length;
150
+
151
+ const rolloutResult: RolloutResult = {
152
+ totalBots,
153
+ succeeded,
154
+ failed,
155
+ skipped,
156
+ aborted,
157
+ alreadyRunning: false,
158
+ results: allResults,
159
+ };
160
+
161
+ logger.info(`Rollout complete: ${succeeded} succeeded, ${failed} failed, ${skipped} skipped, aborted=${aborted}`);
162
+ this.onRolloutComplete?.(rolloutResult);
163
+
164
+ return rolloutResult;
165
+ } finally {
166
+ this.rolling = false;
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Update a single bot with volume snapshot + nuclear rollback.
172
+ */
173
+ private async updateBot(profile: BotProfile): Promise<BotUpdateResult> {
174
+ const snapshotIds: string[] = [];
175
+
176
+ try {
177
+ // Step 1: Snapshot volumes before update
178
+ if (profile.volumeName) {
179
+ try {
180
+ const snap = await this.snapshotManager.snapshot(profile.volumeName);
181
+ snapshotIds.push(snap.id);
182
+ logger.info(`Pre-update snapshot for ${profile.id}: ${snap.id}`);
183
+ } catch (err) {
184
+ logger.warn(`Volume snapshot failed for ${profile.id} — proceeding without backup`, { err });
185
+ }
186
+ }
187
+
188
+ // Step 2: Delegate to ContainerUpdater
189
+ const result = await this.updater.updateBot(profile.id);
190
+
191
+ if (result.success) {
192
+ // Clean up snapshots on success
193
+ await this.cleanupSnapshots(snapshotIds);
194
+ return { ...result, volumeRestored: false };
195
+ }
196
+
197
+ // Step 3: Nuclear rollback — restore volumes if update failed
198
+ const volumeRestored = await this.restoreVolumes(profile.id, snapshotIds);
199
+ return { ...result, volumeRestored };
200
+ } catch (err) {
201
+ logger.error(`Unexpected error updating bot ${profile.id}`, { err });
202
+
203
+ // Attempt volume restore on unexpected errors too
204
+ const volumeRestored = await this.restoreVolumes(profile.id, snapshotIds);
205
+
206
+ return {
207
+ botId: profile.id,
208
+ success: false,
209
+ previousImage: profile.image,
210
+ newImage: profile.image,
211
+ previousDigest: null,
212
+ newDigest: null,
213
+ rolledBack: false,
214
+ volumeRestored,
215
+ error: err instanceof Error ? err.message : String(err),
216
+ };
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Handle a bot failure using the strategy's failure policy.
222
+ * Retries the update up to maxRetries before escalating.
223
+ */
224
+ private handleFailure(
225
+ botId: string,
226
+ result: BotUpdateResult,
227
+ allResults: BotUpdateResult[],
228
+ ): "abort" | "skip" | "retry" {
229
+ const error = new Error(result.error ?? "Unknown error");
230
+ const failCount = allResults.filter((r) => r.botId === botId && !r.success).length;
231
+ return this.strategy.onBotFailure(botId, error, failCount);
232
+ }
233
+
234
+ private async restoreVolumes(botId: string, snapshotIds: string[]): Promise<boolean> {
235
+ if (snapshotIds.length === 0) return false;
236
+
237
+ for (const id of snapshotIds) {
238
+ try {
239
+ await this.snapshotManager.restore(id);
240
+ logger.info(`Volume restored for ${botId} from snapshot ${id}`);
241
+ return true;
242
+ } catch (err) {
243
+ logger.error(`Volume restore failed for ${botId} snapshot ${id}`, { err });
244
+ }
245
+ }
246
+ return false;
247
+ }
248
+
249
+ private async cleanupSnapshots(snapshotIds: string[]): Promise<void> {
250
+ for (const id of snapshotIds) {
251
+ try {
252
+ await this.snapshotManager.delete(id);
253
+ } catch (err) {
254
+ logger.warn(`Failed to clean up snapshot ${id}`, { err });
255
+ }
256
+ }
257
+ }
258
+ }
259
+
260
+ function sleep(ms: number): Promise<void> {
261
+ return new Promise((resolve) => setTimeout(resolve, ms));
262
+ }
@@ -32,6 +32,8 @@ import { SystemResourceMonitor } from "../observability/system-resources.js";
32
32
  // Stub re-exports so existing references compile; consumers must call initPlatformServices().
33
33
  // TODO: Replace with proper DI / service-locator pattern in platform-core.
34
34
  import { DrizzleTwoFactorRepository } from "../security/two-factor-repository.js";
35
+ import type { RolloutOrchestrator } from "./rollout-orchestrator.js";
36
+ import type { VolumeSnapshotManager } from "./volume-snapshot-manager.js";
35
37
 
36
38
  // Platform singletons (getAdminAuditLog, getCreditLedger, etc.) are wired by
37
39
  // the consuming application's own composition root (e.g. wopr-platform's
@@ -136,6 +138,8 @@ let _restoreLogStore: IRestoreLogStore | null = null;
136
138
  let _restoreService: RestoreService | null = null;
137
139
  let _backupStatusStore: IBackupStatusStore | null = null;
138
140
  let _snapshotManager: SnapshotManager | null = null;
141
+ let _volumeSnapshotManager: VolumeSnapshotManager | null = null;
142
+ let _rolloutOrchestrator: RolloutOrchestrator | null = null;
139
143
 
140
144
  const S3_BUCKET = process.env.S3_BUCKET || "wopr-backups";
141
145
 
@@ -537,6 +541,28 @@ export function getSnapshotManager(): SnapshotManager {
537
541
  return _snapshotManager;
538
542
  }
539
543
 
544
+ export function getVolumeSnapshotManager(): VolumeSnapshotManager {
545
+ if (!_volumeSnapshotManager) {
546
+ throw new Error("VolumeSnapshotManager not initialized — call setVolumeSnapshotManager() first");
547
+ }
548
+ return _volumeSnapshotManager;
549
+ }
550
+
551
+ export function setVolumeSnapshotManager(mgr: VolumeSnapshotManager): void {
552
+ _volumeSnapshotManager = mgr;
553
+ }
554
+
555
+ export function getRolloutOrchestrator(): RolloutOrchestrator {
556
+ if (!_rolloutOrchestrator) {
557
+ throw new Error("RolloutOrchestrator not initialized — call setRolloutOrchestrator() first");
558
+ }
559
+ return _rolloutOrchestrator;
560
+ }
561
+
562
+ export function setRolloutOrchestrator(orch: RolloutOrchestrator): void {
563
+ _rolloutOrchestrator = orch;
564
+ }
565
+
540
566
  export function getRestoreService(): RestoreService {
541
567
  if (!_restoreService) {
542
568
  _restoreService = new RestoreService({
@@ -877,6 +903,8 @@ export function _resetForTest(): void {
877
903
  _restoreService = null;
878
904
  _backupStatusStore = null;
879
905
  _snapshotManager = null;
906
+ _volumeSnapshotManager = null;
907
+ _rolloutOrchestrator = null;
880
908
  _botBilling = null;
881
909
  _phoneNumberRepo = null;
882
910
  _affiliateRepo = null;