@celilo/cli 0.5.0-alpha.8 → 0.5.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/src/api-clients/proxmox.test.ts +78 -0
  3. package/src/api-clients/proxmox.ts +96 -1
  4. package/src/cli/command-registry.ts +32 -3
  5. package/src/cli/commands/backup-delete.ts +10 -7
  6. package/src/cli/commands/backup-import.ts +11 -8
  7. package/src/cli/commands/backup-restore.ts +11 -8
  8. package/src/cli/commands/events.ts +8 -3
  9. package/src/cli/commands/machine-add.ts +178 -163
  10. package/src/cli/commands/machine-remove.ts +10 -7
  11. package/src/cli/commands/module-config.test.ts +78 -0
  12. package/src/cli/commands/module-config.ts +18 -3
  13. package/src/cli/commands/module-import.ts +9 -5
  14. package/src/cli/commands/module-remove.ts +20 -9
  15. package/src/cli/commands/module-status.ts +15 -0
  16. package/src/cli/commands/module-upgrade.ts +10 -6
  17. package/src/cli/commands/proxmox-node-list.ts +101 -0
  18. package/src/cli/commands/proxmox-template-selection.ts +16 -15
  19. package/src/cli/commands/service-add-digitalocean.ts +120 -109
  20. package/src/cli/commands/service-add-proxmox.ts +275 -260
  21. package/src/cli/commands/service-reconfigure.ts +171 -153
  22. package/src/cli/commands/service-remove.ts +19 -13
  23. package/src/cli/commands/service-verify.ts +9 -10
  24. package/src/cli/commands/storage-add-local.ts +120 -107
  25. package/src/cli/commands/storage-add-s3.ts +145 -131
  26. package/src/cli/commands/storage-remove.ts +11 -8
  27. package/src/cli/commands/system-init.ts +119 -128
  28. package/src/cli/completion.ts +15 -0
  29. package/src/cli/index.ts +25 -0
  30. package/src/cli/service-credential.ts +54 -0
  31. package/src/services/bus-interview.ts +232 -0
  32. package/src/services/module-config.ts +12 -0
  33. package/src/services/module-deploy.ts +6 -1
  34. package/src/services/placement-reconcile.test.ts +86 -0
  35. package/src/services/placement-reconcile.ts +108 -0
  36. package/src/services/programmatic-responder.ts +34 -0
  37. package/src/services/terminal-responder.ts +113 -0
  38. package/src/templates/generator.test.ts +30 -0
  39. package/src/templates/generator.ts +86 -31
@@ -106,6 +106,18 @@ export function upsertModuleConfig(
106
106
  .run();
107
107
  }
108
108
 
109
+ /**
110
+ * Delete a module config row if present (no-op if absent). Used to drop a
111
+ * derived/cached key that should no longer persist — e.g. `__infra_target_node`,
112
+ * now resolved live from Proxmox each generate rather than cached in the DB
113
+ * (ISS-0090: "the DB stores intent, Proxmox reports reality").
114
+ */
115
+ export function deleteModuleConfig(db: DbClient, moduleId: string, key: string): void {
116
+ db.delete(moduleConfigs)
117
+ .where(and(eq(moduleConfigs.moduleId, moduleId), eq(moduleConfigs.key, key)))
118
+ .run();
119
+ }
120
+
109
121
  /**
110
122
  * Parse a stored module_configs row into its canonical typed value.
111
123
  * Throws if valueJson is null — that's a row written before the
@@ -907,7 +907,12 @@ async function deployModuleImpl(
907
907
  lines.push(` ${key} = ${value}`);
908
908
  }
909
909
  if (resolution.skipped.length > 0) {
910
- lines.push(` (skipped user-configured: ${resolution.skipped.join(', ')})`);
910
+ // These are infrastructure-managed vars with no value this deploy (e.g.
911
+ // vmid/target_node on a machine deploy). NOT honored operator overrides —
912
+ // such keys are rejected at `config set` (ISS-0069); don't imply otherwise.
913
+ lines.push(
914
+ ` (auto-managed, not applicable this deploy: ${resolution.skipped.join(', ')})`,
915
+ );
911
916
  }
912
917
  log.success(lines.join('\n'));
913
918
 
@@ -0,0 +1,86 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+ import type { DeployedSystem } from '@celilo/capabilities';
3
+ import { formatPlacementLine, resolveOnePlacement } from './placement-reconcile';
4
+
5
+ function sys(
6
+ overrides: Partial<DeployedSystem> & { infrastructure: DeployedSystem['infrastructure'] },
7
+ ): DeployedSystem {
8
+ return {
9
+ name: 'main',
10
+ hostname: 'caddy',
11
+ ipv4_address: '10.0.10.10',
12
+ zone: 'dmz',
13
+ ...overrides,
14
+ } as DeployedSystem;
15
+ }
16
+
17
+ const SVC = 'svc-1';
18
+ const cs = (vmid?: number) =>
19
+ sys({
20
+ infrastructure: {
21
+ type: 'container_service',
22
+ serviceId: SVC,
23
+ ...(vmid != null ? { vmid } : {}),
24
+ },
25
+ });
26
+
27
+ describe('resolveOnePlacement (ISS-0060 — node reconciled from Proxmox, not a cached DB value)', () => {
28
+ test('machine-pool system → machine (no Proxmox node)', () => {
29
+ const s = sys({ infrastructure: { type: 'machine', machineId: 'm1' } });
30
+ expect(resolveOnePlacement(s, new Map(), new Set(), new Set())).toEqual({ kind: 'machine' });
31
+ });
32
+
33
+ test('non-Proxmox container service → other', () => {
34
+ expect(resolveOnePlacement(cs(200), new Map(), new Set(), new Set([SVC]))).toEqual({
35
+ kind: 'other',
36
+ });
37
+ });
38
+
39
+ test('container_service without a vmid → uncreated', () => {
40
+ expect(resolveOnePlacement(cs(undefined), new Map(), new Set(), new Set())).toEqual({
41
+ kind: 'uncreated',
42
+ });
43
+ });
44
+
45
+ test('Proxmox unreachable for the service → unreachable (never a stale fallback)', () => {
46
+ expect(resolveOnePlacement(cs(200), new Map(), new Set([SVC]), new Set())).toEqual({
47
+ kind: 'unreachable',
48
+ });
49
+ });
50
+
51
+ test('vmid present in the cluster → node (the reconciled reality)', () => {
52
+ expect(resolveOnePlacement(cs(200), new Map([[200, 'node2']]), new Set(), new Set())).toEqual({
53
+ kind: 'node',
54
+ node: 'node2',
55
+ });
56
+ });
57
+
58
+ test('vmid absent from the cluster → absent', () => {
59
+ expect(resolveOnePlacement(cs(200), new Map([[999, 'node2']]), new Set(), new Set())).toEqual({
60
+ kind: 'absent',
61
+ });
62
+ });
63
+ });
64
+
65
+ describe('formatPlacementLine', () => {
66
+ const s = cs(200);
67
+
68
+ test('node resolution shows the real node + vmid', () => {
69
+ expect(formatPlacementLine(s, { kind: 'node', node: 'node2' })).toBe(
70
+ 'caddy (vmid 200) → node2 (zone dmz)',
71
+ );
72
+ });
73
+
74
+ test('unreachable is explicit — no silent stale value', () => {
75
+ expect(formatPlacementLine(s, { kind: 'unreachable' })).toContain('Proxmox unreachable');
76
+ });
77
+
78
+ test('absent flags a vmid not in the cluster', () => {
79
+ expect(formatPlacementLine(s, { kind: 'absent' })).toContain('not found in Proxmox');
80
+ });
81
+
82
+ test('machine line omits vmid/node', () => {
83
+ const m = sys({ hostname: 'iot', zone: 'internal', infrastructure: { type: 'machine' } });
84
+ expect(formatPlacementLine(m, { kind: 'machine' })).toBe('iot — machine (zone internal)');
85
+ });
86
+ });
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Reconcile a module's ACTUAL node placement from Proxmox (ISS-0060 pt 2).
3
+ *
4
+ * "The DB stores intent, Proxmox reports reality." `module status` / `list`
5
+ * must show where a container ACTUALLY lives — queried live from the Proxmox
6
+ * cluster — not a cached `__infra_target_node` that drifts (caddy recorded
7
+ * node3, ran on node2). Machine-pool systems and non-Proxmox container services
8
+ * have no Proxmox node to reconcile.
9
+ *
10
+ * Never throws: a Proxmox outage yields an 'unreachable' resolution so a status
11
+ * or list command still renders instead of erroring.
12
+ */
13
+
14
+ import type { DeployedSystem } from '@celilo/capabilities';
15
+ import { ProxmoxClient, type ProxmoxCredentials } from '../api-clients/proxmox';
16
+ import { getContainerService, getServiceCredentials } from './container-service';
17
+
18
+ export type PlacementResolution =
19
+ | { kind: 'node'; node: string } // reconciled: lives on <node>
20
+ | { kind: 'unreachable' } // a Proxmox container, but the API couldn't be reached
21
+ | { kind: 'absent' } // a Proxmox container with a vmid not present in the cluster
22
+ | { kind: 'uncreated' } // a container_service system not yet created (no vmid)
23
+ | { kind: 'machine' } // a machine-pool system (no Proxmox node)
24
+ | { kind: 'other' }; // a non-Proxmox container service (e.g. DigitalOcean)
25
+
26
+ /** One-line placement description for `module status`. Pure (Rule 10). */
27
+ export function formatPlacementLine(sys: DeployedSystem, res: PlacementResolution): string {
28
+ const zone = `zone ${sys.zone}`;
29
+ const vmid = sys.infrastructure.vmid;
30
+ switch (res.kind) {
31
+ case 'machine':
32
+ return `${sys.hostname} — machine (${zone})`;
33
+ case 'other':
34
+ return `${sys.hostname} — container_service, non-Proxmox (${zone})`;
35
+ case 'uncreated':
36
+ return `${sys.hostname} — not yet created (${zone})`;
37
+ case 'unreachable':
38
+ return `${sys.hostname} (vmid ${vmid}) → node unknown — Proxmox unreachable (${zone})`;
39
+ case 'absent':
40
+ return `${sys.hostname} (vmid ${vmid}) → not found in Proxmox — not created? (${zone})`;
41
+ case 'node':
42
+ return `${sys.hostname} (vmid ${vmid}) → ${res.node} (${zone})`;
43
+ }
44
+ }
45
+
46
+ /** Pure resolution of one system given the reconciled vmid→node map + service classification. */
47
+ export function resolveOnePlacement(
48
+ sys: DeployedSystem,
49
+ vmidToNode: Map<number, string>,
50
+ unreachable: Set<string>,
51
+ nonProxmox: Set<string>,
52
+ ): PlacementResolution {
53
+ const infra = sys.infrastructure;
54
+ if (infra.type !== 'container_service') return { kind: 'machine' };
55
+ if (infra.serviceId && nonProxmox.has(infra.serviceId)) return { kind: 'other' };
56
+ if (infra.vmid == null) return { kind: 'uncreated' };
57
+ if (infra.serviceId && unreachable.has(infra.serviceId)) return { kind: 'unreachable' };
58
+ const node = vmidToNode.get(infra.vmid);
59
+ return node ? { kind: 'node', node } : { kind: 'absent' };
60
+ }
61
+
62
+ /**
63
+ * Reconcile each system's real node from Proxmox. One `/cluster/resources` fetch
64
+ * per distinct Proxmox service (usually one); machine + non-Proxmox systems need
65
+ * no call. Returns each system paired with its resolution, in input order.
66
+ */
67
+ export async function reconcilePlacement(
68
+ systems: DeployedSystem[],
69
+ ): Promise<Array<{ system: DeployedSystem; resolution: PlacementResolution }>> {
70
+ const serviceIds = [
71
+ ...new Set(
72
+ systems
73
+ .filter((s) => s.infrastructure.type === 'container_service')
74
+ .map((s) => s.infrastructure.serviceId)
75
+ .filter((id): id is string => !!id),
76
+ ),
77
+ ];
78
+
79
+ const vmidToNode = new Map<number, string>();
80
+ const unreachable = new Set<string>();
81
+ const nonProxmox = new Set<string>();
82
+
83
+ for (const serviceId of serviceIds) {
84
+ const service = await getContainerService(serviceId);
85
+ if (!service || service.providerName !== 'proxmox') {
86
+ nonProxmox.add(serviceId);
87
+ continue;
88
+ }
89
+ try {
90
+ const creds = (await getServiceCredentials(serviceId)) as ProxmoxCredentials;
91
+ const result = await new ProxmoxClient(creds).clusterResources();
92
+ if (!result.success) {
93
+ unreachable.add(serviceId);
94
+ continue;
95
+ }
96
+ for (const r of result.data) {
97
+ if (typeof r.vmid === 'number' && r.node) vmidToNode.set(r.vmid, r.node);
98
+ }
99
+ } catch {
100
+ unreachable.add(serviceId);
101
+ }
102
+ }
103
+
104
+ return systems.map((system) => ({
105
+ system,
106
+ resolution: resolveOnePlacement(system, vmidToNode, unreachable, nonProxmox),
107
+ }));
108
+ }
@@ -23,6 +23,7 @@ import { getOrCreateMasterKey } from '../secrets/master-key';
23
23
  import type {
24
24
  ConfigRequiredPayload,
25
25
  EnsureRequiredPayload,
26
+ InterviewRequiredPayload,
26
27
  SecretRequiredPayload,
27
28
  } from './bus-interview';
28
29
  import { readModuleSecretKey, writeModuleSecretKey } from './config-interview';
@@ -58,6 +59,13 @@ export interface ResponderValues {
58
59
  secretValues?: Record<string, string>;
59
60
  }
60
61
  >;
62
+ /**
63
+ * Generic interview answers keyed by `<scope>.<key>` (ISS-0127). When a
64
+ * command emits `interview.required.<scope>.<key>`, the responder replies
65
+ * with `{ value }`. The value's shape should match the payload's `kind`
66
+ * (string for text/select, string[] for multiselect, boolean for confirm).
67
+ */
68
+ interview?: Record<string, unknown>;
61
69
  }
62
70
 
63
71
  export interface ProgrammaticResponderOptions {
@@ -108,6 +116,7 @@ export interface ProgrammaticResponderHandle {
108
116
  seenConfigPayloads(): ConfigRequiredPayload[];
109
117
  seenSecretPayloads(): SecretRequiredPayload[];
110
118
  seenEnsurePayloads(): EnsureRequiredPayload[];
119
+ seenInterviewPayloads(): InterviewRequiredPayload[];
111
120
  /** Stop watching. Caller still owns the db client. */
112
121
  close(): void;
113
122
  }
@@ -126,6 +135,7 @@ export function startProgrammaticResponder(
126
135
  const seenConfig: ConfigRequiredPayload[] = [];
127
136
  const seenSecret: SecretRequiredPayload[] = [];
128
137
  const seenEnsure: EnsureRequiredPayload[] = [];
138
+ const seenInterview: InterviewRequiredPayload[] = [];
129
139
  let lastActivityAt = Date.now();
130
140
 
131
141
  const me = opts.emittedBy ?? 'programmatic';
@@ -276,6 +286,28 @@ export function startProgrammaticResponder(
276
286
  answered.push({ type: event.type, key: lookupKey });
277
287
  });
278
288
 
289
+ const interviewWatch = bus.watch('interview.required.*.*', async (event) => {
290
+ if (event.replyFor !== null) return;
291
+ lastActivityAt = Date.now();
292
+
293
+ const payload = event.payload as InterviewRequiredPayload;
294
+ if (!payload || typeof payload.scope !== 'string' || typeof payload.key !== 'string') {
295
+ missed.push({ type: event.type, key: '?', reason: 'malformed payload' });
296
+ return;
297
+ }
298
+ seenInterview.push(payload);
299
+
300
+ const lookupKey = `${payload.scope}.${payload.key}`;
301
+ const value = opts.values.interview?.[lookupKey];
302
+ if (value === undefined) {
303
+ handleMissing(event.type, lookupKey, `no interview value for "${lookupKey}"`);
304
+ return;
305
+ }
306
+
307
+ bus.emitRaw(`${event.type}.reply`, { value }, { replyFor: event.id, emittedBy: me });
308
+ answered.push({ type: event.type, key: lookupKey });
309
+ });
310
+
279
311
  // Liveness probe: a non-interactive caller (e.g. `module generate`
280
312
  // with no TTY) emits `responder.probe` to detect whether any
281
313
  // responder is listening before calling busInterview (which waits
@@ -298,10 +330,12 @@ export function startProgrammaticResponder(
298
330
  seenConfigPayloads: () => [...seenConfig],
299
331
  seenSecretPayloads: () => [...seenSecret],
300
332
  seenEnsurePayloads: () => [...seenEnsure],
333
+ seenInterviewPayloads: () => [...seenInterview],
301
334
  close: () => {
302
335
  configWatch.close();
303
336
  secretWatch.close();
304
337
  ensureWatch.close();
338
+ interviewWatch.close();
305
339
  probeWatch.close();
306
340
  bus.close();
307
341
  },
@@ -37,6 +37,7 @@ import { getOrCreateMasterKey } from '../secrets/master-key';
37
37
  import type {
38
38
  ConfigRequiredPayload,
39
39
  EnsureRequiredPayload,
40
+ InterviewRequiredPayload,
40
41
  SecretRequiredPayload,
41
42
  } from './bus-interview';
42
43
  import { readModuleSecretKey, writeModuleSecretKey } from './config-interview';
@@ -283,6 +284,117 @@ export function startTerminalResponder(): TerminalResponderHandle {
283
284
  });
284
285
  });
285
286
 
287
+ // Generic interview family (ISS-0127). Non-deploy commands (e.g.
288
+ // `service reconfigure`) ask their questions here so they're driveable
289
+ // over the bus like a deploy. Renders by `kind`. Like the other watches,
290
+ // we ignore reply events (replyFor !== null) and de-dupe by event id.
291
+ const interviewWatch = bus.watch('interview.required.*.*', async (event) => {
292
+ if (event.replyFor !== null) return;
293
+ if (handled.has(event.id)) return;
294
+ handled.add(event.id);
295
+
296
+ const payload = event.payload as InterviewRequiredPayload;
297
+ if (!payload || typeof payload.scope !== 'string' || typeof payload.key !== 'string') {
298
+ log.warn(
299
+ `Terminal responder skipped malformed interview event ${event.type} (id ${event.id}): missing scope/key`,
300
+ );
301
+ return;
302
+ }
303
+
304
+ const message = payload.description
305
+ ? `${payload.message} — ${payload.description}`
306
+ : payload.message;
307
+
308
+ let value: unknown;
309
+
310
+ if (payload.kind === 'confirm') {
311
+ const answer = await p.confirm({
312
+ message,
313
+ initialValue: payload.defaultValue === 'true',
314
+ });
315
+ if (p.isCancel(answer)) {
316
+ log.warn(
317
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
318
+ );
319
+ return;
320
+ }
321
+ value = answer;
322
+ } else if (payload.kind === 'select') {
323
+ const answer = await p.select({
324
+ message,
325
+ options: (payload.options ?? []).map((opt) => ({
326
+ value: opt.value,
327
+ label: opt.label,
328
+ hint: opt.hint,
329
+ })),
330
+ initialValue: payload.defaultValue,
331
+ });
332
+ if (p.isCancel(answer)) {
333
+ log.warn(
334
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
335
+ );
336
+ return;
337
+ }
338
+ value = answer;
339
+ } else if (payload.kind === 'multiselect') {
340
+ const answer = await p.multiselect({
341
+ message,
342
+ options: (payload.options ?? []).map((opt) => ({
343
+ value: opt.value,
344
+ label: opt.label,
345
+ hint: opt.hint,
346
+ })),
347
+ required: payload.required,
348
+ });
349
+ if (p.isCancel(answer)) {
350
+ log.warn(
351
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
352
+ );
353
+ return;
354
+ }
355
+ value = answer;
356
+ } else {
357
+ // kind === 'text'
358
+ const type = payload.type ?? 'string';
359
+ const typeHint = describeTypeHint(type);
360
+ const answer = await promptText({
361
+ message: typeHint ? `${message} (${typeHint})` : message,
362
+ defaultValue: payload.defaultValue,
363
+ placeholder: payload.placeholder,
364
+ validate: (val) => {
365
+ if (payload.required && (!val || val.trim() === '')) {
366
+ return 'This field is required';
367
+ }
368
+ if (payload.pattern && val) {
369
+ const re = new RegExp(payload.pattern);
370
+ if (!re.test(val)) return `Value must match: ${payload.pattern}`;
371
+ }
372
+ try {
373
+ coerceValue(val, type);
374
+ } catch (err) {
375
+ return err instanceof Error ? err.message : String(err);
376
+ }
377
+ },
378
+ });
379
+ if (answer === undefined) {
380
+ log.warn(
381
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
382
+ );
383
+ return;
384
+ }
385
+ value = coerceValue(answer, type);
386
+ }
387
+
388
+ bus.emitRaw(
389
+ `${event.type}.reply`,
390
+ { value },
391
+ {
392
+ replyFor: event.id,
393
+ emittedBy: me,
394
+ },
395
+ );
396
+ });
397
+
286
398
  // Liveness probe: lets a non-interactive caller in another shell
287
399
  // (e.g. `module generate`) detect that a terminal-responder is
288
400
  // running here and that calling busInterview is safe.
@@ -300,6 +412,7 @@ export function startTerminalResponder(): TerminalResponderHandle {
300
412
  configWatch.close();
301
413
  secretWatch.close();
302
414
  ensureWatch.close();
415
+ interviewWatch.close();
303
416
  probeWatch.close();
304
417
  bus.close();
305
418
  },
@@ -6,6 +6,7 @@ import { type DbClient, createDbClient } from '../db/client';
6
6
  import { capabilities } from '../db/schema';
7
7
  import { upsertModuleConfig } from '../services/module-config';
8
8
  import {
9
+ decideTargetNode,
9
10
  discoverTemplateFiles,
10
11
  generateTemplates,
11
12
  getOutputFilename,
@@ -794,3 +795,32 @@ describe("targetNodeFromTfState (ISS-0090 — terraform state is celilo's placem
794
795
  expect(targetNodeFromTfState({})).toBeNull();
795
796
  });
796
797
  });
798
+
799
+ describe('decideTargetNode (ISS-0090 — deploy follows reality: Proxmox > state > default)', () => {
800
+ test('Proxmox reality wins — adopts a hand-migration tf-state would miss', () => {
801
+ expect(
802
+ decideTargetNode({ proxmoxNode: 'node2', stateNode: 'node3', defaultNode: 'node3' }),
803
+ ).toEqual({ node: 'node2', source: 'proxmox' });
804
+ });
805
+
806
+ test('falls back to terraform state when Proxmox is unknown/unreachable', () => {
807
+ expect(
808
+ decideTargetNode({ proxmoxNode: null, stateNode: 'node2', defaultNode: 'node3' }),
809
+ ).toEqual({ node: 'node2', source: 'state' });
810
+ });
811
+
812
+ test('first deploy (no Proxmox, no state) → service default', () => {
813
+ expect(decideTargetNode({ proxmoxNode: null, stateNode: null, defaultNode: 'node3' })).toEqual({
814
+ node: 'node3',
815
+ source: 'default',
816
+ });
817
+ });
818
+
819
+ test('a changed default never relocates a running container (reality overrides default)', () => {
820
+ // default flipped node2→node3, but the container actually runs on node2:
821
+ // resolution stays node2, so the redeploy is an in-place update, not a move.
822
+ expect(
823
+ decideTargetNode({ proxmoxNode: 'node2', stateNode: null, defaultNode: 'node3' }),
824
+ ).toEqual({ node: 'node2', source: 'proxmox' });
825
+ });
826
+ });
@@ -5,6 +5,7 @@ import { dirname, join, relative } from 'node:path';
5
5
  import { and, eq } from 'drizzle-orm';
6
6
  import { generateInventory } from '../ansible/inventory';
7
7
  import { generateAnsibleSecrets } from '../ansible/secrets';
8
+ import { ProxmoxClient, type ProxmoxCredentials } from '../api-clients/proxmox';
8
9
  import { log } from '../cli/prompts';
9
10
  import { getModuleStoragePath } from '../config/paths';
10
11
  import { type DbClient, getDb } from '../db/client';
@@ -19,12 +20,14 @@ import {
19
20
  import { getSingularSystemSpec } from '../manifest/schema';
20
21
  import type { AnsibleCollection, ModuleManifest } from '../manifest/schema';
21
22
  import { validateZoneRequirements } from '../manifest/validate';
23
+ import { getServiceCredentials } from '../services/container-service';
24
+ import { getModuleSystems } from '../services/deployed-systems';
22
25
  import {
23
26
  describeCapabilityProblem,
24
27
  findBrokenCapabilityDerivations,
25
28
  } from '../services/fleet-checks';
26
29
  import { selectInfrastructure } from '../services/infrastructure-selector';
27
- import { upsertModuleConfig } from '../services/module-config';
30
+ import { deleteModuleConfig, upsertModuleConfig } from '../services/module-config';
28
31
  import type { InfrastructureSelection } from '../types/infrastructure';
29
32
  import { convertSecretsToJinja } from '../variables/ansible-resolver';
30
33
  import { buildResolutionContext } from '../variables/context';
@@ -261,6 +264,49 @@ async function readDeployedTargetNode(moduleId: string): Promise<string | null>
261
264
  }
262
265
  }
263
266
 
267
+ /**
268
+ * The node a module's container ACTUALLY lives on, from Proxmox (ISS-0090).
269
+ * Proxmox is the ultimate source of truth for current location — it sees a
270
+ * hand-migration that celilo's terraform state wouldn't. Returns null when the
271
+ * module has no deployed vmid yet, the service is unreachable, or the vmid isn't
272
+ * in the cluster — the caller then falls back to terraform state / the default.
273
+ * Never throws: a Proxmox outage must not block a deploy.
274
+ */
275
+ async function readProxmoxNodeForModule(
276
+ moduleId: string,
277
+ serviceId: string,
278
+ db: DbClient,
279
+ ): Promise<string | null> {
280
+ const vmid = getModuleSystems(moduleId, db).find(
281
+ (s) => s.infrastructure.type === 'container_service' && s.infrastructure.vmid != null,
282
+ )?.infrastructure.vmid;
283
+ if (vmid == null) return null;
284
+ try {
285
+ const creds = (await getServiceCredentials(serviceId)) as ProxmoxCredentials;
286
+ const result = await new ProxmoxClient(creds).nodeForVmid(vmid);
287
+ return result.success ? result.data : null;
288
+ } catch {
289
+ return null;
290
+ }
291
+ }
292
+
293
+ /**
294
+ * Decide which node to target for a deploy (ISS-0090). Pure (Rule 10):
295
+ * Proxmox reality > recorded terraform state > service default.
296
+ * `default_target_node` governs only a FIRST placement; a changed default must
297
+ * never relocate a running container. A hand-migration (seen by Proxmox but not
298
+ * tf-state) is adopted. Deliberate moves are an explicit migrate (ISS-0062).
299
+ */
300
+ export function decideTargetNode(opts: {
301
+ proxmoxNode: string | null;
302
+ stateNode: string | null;
303
+ defaultNode: string;
304
+ }): { node: string; source: 'proxmox' | 'state' | 'default' } {
305
+ if (opts.proxmoxNode) return { node: opts.proxmoxNode, source: 'proxmox' };
306
+ if (opts.stateNode) return { node: opts.stateNode, source: 'state' };
307
+ return { node: opts.defaultNode, source: 'default' };
308
+ }
309
+
264
310
  /**
265
311
  * Discover template files in directory recursively
266
312
  *
@@ -684,6 +730,9 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
684
730
  // Infrastructure Properties Resolution (Proxmox provider config)
685
731
  // For Proxmox services, extract provider config and store as temporary values
686
732
  // This happens during generation so templates can access target_node, lxc_template, etc.
733
+ // Resolved live each generate (ISS-0090) and injected into the context below,
734
+ // never cached in the DB. undefined for non-Proxmox / machine deploys.
735
+ let resolvedTargetNode: string | undefined;
687
736
  if (isContainerService && isProxmoxService && infrastructureSelection?.serviceId) {
688
737
  const service = await db
689
738
  .select()
@@ -698,37 +747,36 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
698
747
  storage: string;
699
748
  };
700
749
 
701
- // ISS-0090: target the node the container ACTUALLY lives on, not the
702
- // service default. `default_target_node` governs only NEW placement; a
703
- // changed default must NEVER relocate a running system. celilo's terraform
704
- // state is its authoritative record of where it placed this container (kept
705
- // in sync by deploy and by `module migrate`), so read the deployed node from
706
- // there. Fall back to the default only when there's no state yet — a first
707
- // deploy. Deliberate relocation is an explicit `module migrate`, not a
708
- // side-effect of the default changing.
709
- let targetNode = providerConfig.default_target_node;
710
- const deployedNode = await readDeployedTargetNode(moduleId);
711
- if (deployedNode) {
712
- if (deployedNode !== targetNode) {
713
- log.info(
714
- `${moduleId} is already deployed on node '${deployedNode}' — targeting it (service default is '${providerConfig.default_target_node}'). Relocating requires a deliberate migration.`,
715
- );
716
- }
717
- targetNode = deployedNode;
750
+ // ISS-0090: deploy follows REALITY. Resolve the node from Proxmox (it sees
751
+ // a hand-migration that tf-state wouldn't), else the recorded terraform
752
+ // state, else the service default (FIRST placement only). A changed default
753
+ // must never relocate a running container; deliberate moves are an explicit
754
+ // migrate (ISS-0062).
755
+ const decision = decideTargetNode({
756
+ proxmoxNode: await readProxmoxNodeForModule(
757
+ moduleId,
758
+ infrastructureSelection.serviceId,
759
+ db,
760
+ ),
761
+ stateNode: await readDeployedTargetNode(moduleId),
762
+ defaultNode: providerConfig.default_target_node,
763
+ });
764
+ resolvedTargetNode = decision.node;
765
+ if (decision.source !== 'default' && decision.node !== providerConfig.default_target_node) {
766
+ const from = decision.source === 'proxmox' ? 'Proxmox' : 'terraform state';
767
+ log.info(
768
+ `${moduleId} → node '${decision.node}' (from ${from}; service default is '${providerConfig.default_target_node}'). Relocating requires a deliberate migration.`,
769
+ );
718
770
  }
719
771
 
720
- // Store provider config values as temporary config (similar to IPAM allocation)
721
- const infraProperties = [
722
- { key: 'target_node', value: targetNode },
723
- { key: 'lxc_template', value: providerConfig.lxc_template },
724
- { key: 'storage', value: providerConfig.storage },
725
- ];
726
-
727
- for (const prop of infraProperties) {
728
- upsertModuleConfig(db, moduleId, `__infra_${prop.key}`, prop.value);
729
- }
772
+ // Persist only the non-drift provider values. target_node is reality it's
773
+ // injected into the resolution context below, never cached (ISS-0090); drop
774
+ // any stale __infra_target_node a prior generate left behind.
775
+ upsertModuleConfig(db, moduleId, '__infra_lxc_template', providerConfig.lxc_template);
776
+ upsertModuleConfig(db, moduleId, '__infra_storage', providerConfig.storage);
777
+ deleteModuleConfig(db, moduleId, '__infra_target_node');
730
778
 
731
- log.success(`Infrastructure properties resolved from service: target_node=${targetNode}`);
779
+ log.success(`Infrastructure resolved: target_node=${decision.node} (${decision.source})`);
732
780
  }
733
781
  }
734
782
 
@@ -760,8 +808,15 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
760
808
  context.selfConfig.target_ip = ipConfig.value!;
761
809
  }
762
810
 
763
- // Add infrastructure properties to context (target_node, lxc_template, storage)
764
- const infraKeys = ['target_node', 'lxc_template', 'storage'];
811
+ // target_node is the live-resolved reality (ISS-0090) — inject it directly,
812
+ // never from a cached __infra_target_node row (which drifts).
813
+ if (resolvedTargetNode) {
814
+ context.selfConfig.target_node = resolvedTargetNode;
815
+ }
816
+
817
+ // lxc_template / storage are provider config (intent, not drift-prone) — read
818
+ // them back from the __infra_* rows persisted above.
819
+ const infraKeys = ['lxc_template', 'storage'];
765
820
  for (const key of infraKeys) {
766
821
  const infraConfig = db
767
822
  .select()