@celilo/cli 0.5.0-alpha.7 → 0.5.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/package.json +2 -2
  2. package/src/api-clients/proxmox.test.ts +78 -0
  3. package/src/api-clients/proxmox.ts +96 -1
  4. package/src/cli/command-registry.ts +32 -3
  5. package/src/cli/commands/backup-delete.ts +10 -7
  6. package/src/cli/commands/backup-import.ts +11 -8
  7. package/src/cli/commands/backup-restore.ts +11 -8
  8. package/src/cli/commands/events.ts +8 -3
  9. package/src/cli/commands/machine-add.ts +178 -163
  10. package/src/cli/commands/machine-remove.ts +10 -7
  11. package/src/cli/commands/module-config.test.ts +78 -0
  12. package/src/cli/commands/module-config.ts +18 -3
  13. package/src/cli/commands/module-import.ts +9 -5
  14. package/src/cli/commands/module-remove.ts +20 -9
  15. package/src/cli/commands/module-status.ts +15 -0
  16. package/src/cli/commands/module-upgrade.ts +10 -6
  17. package/src/cli/commands/proxmox-node-list.ts +101 -0
  18. package/src/cli/commands/proxmox-template-selection.ts +16 -15
  19. package/src/cli/commands/service-add-digitalocean.ts +120 -109
  20. package/src/cli/commands/service-add-proxmox.ts +275 -260
  21. package/src/cli/commands/service-reconfigure.ts +171 -153
  22. package/src/cli/commands/service-remove.ts +19 -13
  23. package/src/cli/commands/service-verify.ts +9 -10
  24. package/src/cli/commands/storage-add-local.ts +120 -107
  25. package/src/cli/commands/storage-add-s3.ts +145 -131
  26. package/src/cli/commands/storage-remove.ts +11 -8
  27. package/src/cli/commands/system-init.ts +119 -128
  28. package/src/cli/completion.ts +15 -0
  29. package/src/cli/index.ts +25 -0
  30. package/src/cli/service-credential.ts +54 -0
  31. package/src/services/bus-interview.ts +232 -0
  32. package/src/services/deploy-validation.test.ts +52 -2
  33. package/src/services/deploy-validation.ts +27 -36
  34. package/src/services/fleet-checks.test.ts +13 -0
  35. package/src/services/fleet-checks.ts +15 -0
  36. package/src/services/module-config.ts +12 -0
  37. package/src/services/module-deploy.ts +7 -6
  38. package/src/services/placement-reconcile.test.ts +86 -0
  39. package/src/services/placement-reconcile.ts +108 -0
  40. package/src/services/programmatic-responder.ts +34 -0
  41. package/src/services/terminal-responder.ts +113 -0
  42. package/src/templates/generator.test.ts +30 -0
  43. package/src/templates/generator.ts +86 -31
@@ -37,6 +37,7 @@ import { getOrCreateMasterKey } from '../secrets/master-key';
37
37
  import type {
38
38
  ConfigRequiredPayload,
39
39
  EnsureRequiredPayload,
40
+ InterviewRequiredPayload,
40
41
  SecretRequiredPayload,
41
42
  } from './bus-interview';
42
43
  import { readModuleSecretKey, writeModuleSecretKey } from './config-interview';
@@ -283,6 +284,117 @@ export function startTerminalResponder(): TerminalResponderHandle {
283
284
  });
284
285
  });
285
286
 
287
+ // Generic interview family (ISS-0127). Non-deploy commands (e.g.
288
+ // `service reconfigure`) ask their questions here so they're driveable
289
+ // over the bus like a deploy. Renders by `kind`. Like the other watches,
290
+ // we ignore reply events (replyFor !== null) and de-dupe by event id.
291
+ const interviewWatch = bus.watch('interview.required.*.*', async (event) => {
292
+ if (event.replyFor !== null) return;
293
+ if (handled.has(event.id)) return;
294
+ handled.add(event.id);
295
+
296
+ const payload = event.payload as InterviewRequiredPayload;
297
+ if (!payload || typeof payload.scope !== 'string' || typeof payload.key !== 'string') {
298
+ log.warn(
299
+ `Terminal responder skipped malformed interview event ${event.type} (id ${event.id}): missing scope/key`,
300
+ );
301
+ return;
302
+ }
303
+
304
+ const message = payload.description
305
+ ? `${payload.message} — ${payload.description}`
306
+ : payload.message;
307
+
308
+ let value: unknown;
309
+
310
+ if (payload.kind === 'confirm') {
311
+ const answer = await p.confirm({
312
+ message,
313
+ initialValue: payload.defaultValue === 'true',
314
+ });
315
+ if (p.isCancel(answer)) {
316
+ log.warn(
317
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
318
+ );
319
+ return;
320
+ }
321
+ value = answer;
322
+ } else if (payload.kind === 'select') {
323
+ const answer = await p.select({
324
+ message,
325
+ options: (payload.options ?? []).map((opt) => ({
326
+ value: opt.value,
327
+ label: opt.label,
328
+ hint: opt.hint,
329
+ })),
330
+ initialValue: payload.defaultValue,
331
+ });
332
+ if (p.isCancel(answer)) {
333
+ log.warn(
334
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
335
+ );
336
+ return;
337
+ }
338
+ value = answer;
339
+ } else if (payload.kind === 'multiselect') {
340
+ const answer = await p.multiselect({
341
+ message,
342
+ options: (payload.options ?? []).map((opt) => ({
343
+ value: opt.value,
344
+ label: opt.label,
345
+ hint: opt.hint,
346
+ })),
347
+ required: payload.required,
348
+ });
349
+ if (p.isCancel(answer)) {
350
+ log.warn(
351
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
352
+ );
353
+ return;
354
+ }
355
+ value = answer;
356
+ } else {
357
+ // kind === 'text'
358
+ const type = payload.type ?? 'string';
359
+ const typeHint = describeTypeHint(type);
360
+ const answer = await promptText({
361
+ message: typeHint ? `${message} (${typeHint})` : message,
362
+ defaultValue: payload.defaultValue,
363
+ placeholder: payload.placeholder,
364
+ validate: (val) => {
365
+ if (payload.required && (!val || val.trim() === '')) {
366
+ return 'This field is required';
367
+ }
368
+ if (payload.pattern && val) {
369
+ const re = new RegExp(payload.pattern);
370
+ if (!re.test(val)) return `Value must match: ${payload.pattern}`;
371
+ }
372
+ try {
373
+ coerceValue(val, type);
374
+ } catch (err) {
375
+ return err instanceof Error ? err.message : String(err);
376
+ }
377
+ },
378
+ });
379
+ if (answer === undefined) {
380
+ log.warn(
381
+ `Terminal responder: cancelled prompt for ${payload.scope}.${payload.key}; no reply emitted`,
382
+ );
383
+ return;
384
+ }
385
+ value = coerceValue(answer, type);
386
+ }
387
+
388
+ bus.emitRaw(
389
+ `${event.type}.reply`,
390
+ { value },
391
+ {
392
+ replyFor: event.id,
393
+ emittedBy: me,
394
+ },
395
+ );
396
+ });
397
+
286
398
  // Liveness probe: lets a non-interactive caller in another shell
287
399
  // (e.g. `module generate`) detect that a terminal-responder is
288
400
  // running here and that calling busInterview is safe.
@@ -300,6 +412,7 @@ export function startTerminalResponder(): TerminalResponderHandle {
300
412
  configWatch.close();
301
413
  secretWatch.close();
302
414
  ensureWatch.close();
415
+ interviewWatch.close();
303
416
  probeWatch.close();
304
417
  bus.close();
305
418
  },
@@ -6,6 +6,7 @@ import { type DbClient, createDbClient } from '../db/client';
6
6
  import { capabilities } from '../db/schema';
7
7
  import { upsertModuleConfig } from '../services/module-config';
8
8
  import {
9
+ decideTargetNode,
9
10
  discoverTemplateFiles,
10
11
  generateTemplates,
11
12
  getOutputFilename,
@@ -794,3 +795,32 @@ describe("targetNodeFromTfState (ISS-0090 — terraform state is celilo's placem
794
795
  expect(targetNodeFromTfState({})).toBeNull();
795
796
  });
796
797
  });
798
+
799
+ describe('decideTargetNode (ISS-0090 — deploy follows reality: Proxmox > state > default)', () => {
800
+ test('Proxmox reality wins — adopts a hand-migration tf-state would miss', () => {
801
+ expect(
802
+ decideTargetNode({ proxmoxNode: 'node2', stateNode: 'node3', defaultNode: 'node3' }),
803
+ ).toEqual({ node: 'node2', source: 'proxmox' });
804
+ });
805
+
806
+ test('falls back to terraform state when Proxmox is unknown/unreachable', () => {
807
+ expect(
808
+ decideTargetNode({ proxmoxNode: null, stateNode: 'node2', defaultNode: 'node3' }),
809
+ ).toEqual({ node: 'node2', source: 'state' });
810
+ });
811
+
812
+ test('first deploy (no Proxmox, no state) → service default', () => {
813
+ expect(decideTargetNode({ proxmoxNode: null, stateNode: null, defaultNode: 'node3' })).toEqual({
814
+ node: 'node3',
815
+ source: 'default',
816
+ });
817
+ });
818
+
819
+ test('a changed default never relocates a running container (reality overrides default)', () => {
820
+ // default flipped node2→node3, but the container actually runs on node2:
821
+ // resolution stays node2, so the redeploy is an in-place update, not a move.
822
+ expect(
823
+ decideTargetNode({ proxmoxNode: 'node2', stateNode: null, defaultNode: 'node3' }),
824
+ ).toEqual({ node: 'node2', source: 'proxmox' });
825
+ });
826
+ });
@@ -5,6 +5,7 @@ import { dirname, join, relative } from 'node:path';
5
5
  import { and, eq } from 'drizzle-orm';
6
6
  import { generateInventory } from '../ansible/inventory';
7
7
  import { generateAnsibleSecrets } from '../ansible/secrets';
8
+ import { ProxmoxClient, type ProxmoxCredentials } from '../api-clients/proxmox';
8
9
  import { log } from '../cli/prompts';
9
10
  import { getModuleStoragePath } from '../config/paths';
10
11
  import { type DbClient, getDb } from '../db/client';
@@ -19,12 +20,14 @@ import {
19
20
  import { getSingularSystemSpec } from '../manifest/schema';
20
21
  import type { AnsibleCollection, ModuleManifest } from '../manifest/schema';
21
22
  import { validateZoneRequirements } from '../manifest/validate';
23
+ import { getServiceCredentials } from '../services/container-service';
24
+ import { getModuleSystems } from '../services/deployed-systems';
22
25
  import {
23
26
  describeCapabilityProblem,
24
27
  findBrokenCapabilityDerivations,
25
28
  } from '../services/fleet-checks';
26
29
  import { selectInfrastructure } from '../services/infrastructure-selector';
27
- import { upsertModuleConfig } from '../services/module-config';
30
+ import { deleteModuleConfig, upsertModuleConfig } from '../services/module-config';
28
31
  import type { InfrastructureSelection } from '../types/infrastructure';
29
32
  import { convertSecretsToJinja } from '../variables/ansible-resolver';
30
33
  import { buildResolutionContext } from '../variables/context';
@@ -261,6 +264,49 @@ async function readDeployedTargetNode(moduleId: string): Promise<string | null>
261
264
  }
262
265
  }
263
266
 
267
+ /**
268
+ * The node a module's container ACTUALLY lives on, from Proxmox (ISS-0090).
269
+ * Proxmox is the ultimate source of truth for current location — it sees a
270
+ * hand-migration that celilo's terraform state wouldn't. Returns null when the
271
+ * module has no deployed vmid yet, the service is unreachable, or the vmid isn't
272
+ * in the cluster — the caller then falls back to terraform state / the default.
273
+ * Never throws: a Proxmox outage must not block a deploy.
274
+ */
275
+ async function readProxmoxNodeForModule(
276
+ moduleId: string,
277
+ serviceId: string,
278
+ db: DbClient,
279
+ ): Promise<string | null> {
280
+ const vmid = getModuleSystems(moduleId, db).find(
281
+ (s) => s.infrastructure.type === 'container_service' && s.infrastructure.vmid != null,
282
+ )?.infrastructure.vmid;
283
+ if (vmid == null) return null;
284
+ try {
285
+ const creds = (await getServiceCredentials(serviceId)) as ProxmoxCredentials;
286
+ const result = await new ProxmoxClient(creds).nodeForVmid(vmid);
287
+ return result.success ? result.data : null;
288
+ } catch {
289
+ return null;
290
+ }
291
+ }
292
+
293
+ /**
294
+ * Decide which node to target for a deploy (ISS-0090). Pure (Rule 10):
295
+ * Proxmox reality > recorded terraform state > service default.
296
+ * `default_target_node` governs only a FIRST placement; a changed default must
297
+ * never relocate a running container. A hand-migration (seen by Proxmox but not
298
+ * tf-state) is adopted. Deliberate moves are an explicit migrate (ISS-0062).
299
+ */
300
+ export function decideTargetNode(opts: {
301
+ proxmoxNode: string | null;
302
+ stateNode: string | null;
303
+ defaultNode: string;
304
+ }): { node: string; source: 'proxmox' | 'state' | 'default' } {
305
+ if (opts.proxmoxNode) return { node: opts.proxmoxNode, source: 'proxmox' };
306
+ if (opts.stateNode) return { node: opts.stateNode, source: 'state' };
307
+ return { node: opts.defaultNode, source: 'default' };
308
+ }
309
+
264
310
  /**
265
311
  * Discover template files in directory recursively
266
312
  *
@@ -684,6 +730,9 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
684
730
  // Infrastructure Properties Resolution (Proxmox provider config)
685
731
  // For Proxmox services, extract provider config and store as temporary values
686
732
  // This happens during generation so templates can access target_node, lxc_template, etc.
733
+ // Resolved live each generate (ISS-0090) and injected into the context below,
734
+ // never cached in the DB. undefined for non-Proxmox / machine deploys.
735
+ let resolvedTargetNode: string | undefined;
687
736
  if (isContainerService && isProxmoxService && infrastructureSelection?.serviceId) {
688
737
  const service = await db
689
738
  .select()
@@ -698,37 +747,36 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
698
747
  storage: string;
699
748
  };
700
749
 
701
- // ISS-0090: target the node the container ACTUALLY lives on, not the
702
- // service default. `default_target_node` governs only NEW placement; a
703
- // changed default must NEVER relocate a running system. celilo's terraform
704
- // state is its authoritative record of where it placed this container (kept
705
- // in sync by deploy and by `module migrate`), so read the deployed node from
706
- // there. Fall back to the default only when there's no state yet — a first
707
- // deploy. Deliberate relocation is an explicit `module migrate`, not a
708
- // side-effect of the default changing.
709
- let targetNode = providerConfig.default_target_node;
710
- const deployedNode = await readDeployedTargetNode(moduleId);
711
- if (deployedNode) {
712
- if (deployedNode !== targetNode) {
713
- log.info(
714
- `${moduleId} is already deployed on node '${deployedNode}' — targeting it (service default is '${providerConfig.default_target_node}'). Relocating requires a deliberate migration.`,
715
- );
716
- }
717
- targetNode = deployedNode;
750
+ // ISS-0090: deploy follows REALITY. Resolve the node from Proxmox (it sees
751
+ // a hand-migration that tf-state wouldn't), else the recorded terraform
752
+ // state, else the service default (FIRST placement only). A changed default
753
+ // must never relocate a running container; deliberate moves are an explicit
754
+ // migrate (ISS-0062).
755
+ const decision = decideTargetNode({
756
+ proxmoxNode: await readProxmoxNodeForModule(
757
+ moduleId,
758
+ infrastructureSelection.serviceId,
759
+ db,
760
+ ),
761
+ stateNode: await readDeployedTargetNode(moduleId),
762
+ defaultNode: providerConfig.default_target_node,
763
+ });
764
+ resolvedTargetNode = decision.node;
765
+ if (decision.source !== 'default' && decision.node !== providerConfig.default_target_node) {
766
+ const from = decision.source === 'proxmox' ? 'Proxmox' : 'terraform state';
767
+ log.info(
768
+ `${moduleId} → node '${decision.node}' (from ${from}; service default is '${providerConfig.default_target_node}'). Relocating requires a deliberate migration.`,
769
+ );
718
770
  }
719
771
 
720
- // Store provider config values as temporary config (similar to IPAM allocation)
721
- const infraProperties = [
722
- { key: 'target_node', value: targetNode },
723
- { key: 'lxc_template', value: providerConfig.lxc_template },
724
- { key: 'storage', value: providerConfig.storage },
725
- ];
726
-
727
- for (const prop of infraProperties) {
728
- upsertModuleConfig(db, moduleId, `__infra_${prop.key}`, prop.value);
729
- }
772
+ // Persist only the non-drift provider values. target_node is reality it's
773
+ // injected into the resolution context below, never cached (ISS-0090); drop
774
+ // any stale __infra_target_node a prior generate left behind.
775
+ upsertModuleConfig(db, moduleId, '__infra_lxc_template', providerConfig.lxc_template);
776
+ upsertModuleConfig(db, moduleId, '__infra_storage', providerConfig.storage);
777
+ deleteModuleConfig(db, moduleId, '__infra_target_node');
730
778
 
731
- log.success(`Infrastructure properties resolved from service: target_node=${targetNode}`);
779
+ log.success(`Infrastructure resolved: target_node=${decision.node} (${decision.source})`);
732
780
  }
733
781
  }
734
782
 
@@ -760,8 +808,15 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
760
808
  context.selfConfig.target_ip = ipConfig.value!;
761
809
  }
762
810
 
763
- // Add infrastructure properties to context (target_node, lxc_template, storage)
764
- const infraKeys = ['target_node', 'lxc_template', 'storage'];
811
+ // target_node is the live-resolved reality (ISS-0090) — inject it directly,
812
+ // never from a cached __infra_target_node row (which drifts).
813
+ if (resolvedTargetNode) {
814
+ context.selfConfig.target_node = resolvedTargetNode;
815
+ }
816
+
817
+ // lxc_template / storage are provider config (intent, not drift-prone) — read
818
+ // them back from the __infra_* rows persisted above.
819
+ const infraKeys = ['lxc_template', 'storage'];
765
820
  for (const key of infraKeys) {
766
821
  const infraConfig = db
767
822
  .select()