@celilo/cli 0.4.0-alpha.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/drizzle/0008_aspect_consent.sql +1 -0
  2. package/drizzle/meta/_journal.json +7 -0
  3. package/package.json +5 -6
  4. package/src/cli/command-registry.ts +38 -0
  5. package/src/cli/commands/backup-pull.test.ts +48 -0
  6. package/src/cli/commands/backup-pull.ts +116 -0
  7. package/src/cli/commands/events.test.ts +108 -0
  8. package/src/cli/commands/events.ts +243 -0
  9. package/src/cli/commands/module-generate.ts +5 -4
  10. package/src/cli/commands/module-import-aspect.test.ts +116 -0
  11. package/src/cli/commands/module-import.ts +12 -1
  12. package/src/cli/commands/restore.ts +5 -0
  13. package/src/cli/commands/storage-add-s3.ts +91 -46
  14. package/src/cli/completion.ts +2 -1
  15. package/src/cli/index.ts +11 -0
  16. package/src/db/client.ts +4 -0
  17. package/src/db/schema.ts +9 -1
  18. package/src/hooks/capability-loader.test.ts +31 -1
  19. package/src/hooks/capability-loader.ts +65 -16
  20. package/src/manifest/contracts/v1.ts +12 -0
  21. package/src/manifest/schema.ts +13 -1
  22. package/src/manifest/template-validator.ts +1 -0
  23. package/src/module/import.ts +10 -5
  24. package/src/module/packaging/build.test.ts +75 -0
  25. package/src/module/packaging/build.ts +9 -20
  26. package/src/module/packaging/package-rules.ts +44 -0
  27. package/src/secrets/generators.test.ts +14 -1
  28. package/src/secrets/generators.ts +63 -1
  29. package/src/services/aspect-approvals.test.ts +30 -10
  30. package/src/services/aspect-approvals.ts +61 -31
  31. package/src/services/aspect-runner.test.ts +161 -8
  32. package/src/services/aspect-runner.ts +156 -34
  33. package/src/services/backup-create.ts +11 -2
  34. package/src/services/bus-ensure-flow.test.ts +19 -1
  35. package/src/services/bus-interview.ts +56 -0
  36. package/src/services/bus-secret-flow.test.ts +19 -1
  37. package/src/services/celilo-events.test.ts +122 -0
  38. package/src/services/celilo-events.ts +144 -0
  39. package/src/services/celilo-mgmt-hooks.test.ts +30 -3
  40. package/src/services/config-interview.ts +38 -19
  41. package/src/services/deploy-planner.test.ts +66 -0
  42. package/src/services/deploy-planner.ts +16 -2
  43. package/src/services/deploy-preflight.ts +18 -1
  44. package/src/services/deployed-systems.ts +30 -1
  45. package/src/services/dns-provider-backfill.test.ts +150 -0
  46. package/src/services/dns-provider-backfill.ts +72 -2
  47. package/src/services/e2e-guard.test.ts +38 -0
  48. package/src/services/e2e-guard.ts +43 -0
  49. package/src/services/module-deploy.ts +12 -26
  50. package/src/services/responder-probe.test.ts +87 -0
  51. package/src/services/responder-probe.ts +29 -0
  52. package/src/services/restore-from-file.test.ts +46 -0
  53. package/src/services/restore-from-file.ts +106 -9
  54. package/src/services/storage-providers/s3.test.ts +101 -0
  55. package/src/templates/generator.test.ts +77 -0
  56. package/src/templates/generator.ts +69 -2
  57. package/src/variables/context.ts +34 -0
  58. package/src/variables/lxc-nameserver.test.ts +86 -0
@@ -16,13 +16,18 @@
16
16
  * of the restore the hook itself cannot do (live DB is open).
17
17
  */
18
18
 
19
+ import { Database } from 'bun:sqlite';
19
20
  import { execSync } from 'node:child_process';
20
21
  import {
21
22
  chmodSync,
23
+ closeSync,
22
24
  copyFileSync,
25
+ cpSync,
23
26
  existsSync,
24
27
  mkdirSync,
28
+ openSync,
25
29
  readFileSync,
30
+ readSync,
26
31
  readdirSync,
27
32
  rmSync,
28
33
  writeFileSync,
@@ -30,7 +35,7 @@ import {
30
35
  import { tmpdir } from 'node:os';
31
36
  import { dirname, join } from 'node:path';
32
37
  import { eq } from 'drizzle-orm';
33
- import { getDbPath, getMasterKeyPath } from '../config/paths';
38
+ import { getDbPath, getMasterKeyPath, getModuleStoragePath } from '../config/paths';
34
39
  import { closeDb, getDb } from '../db/client';
35
40
  import { runMigrations } from '../db/migrate';
36
41
  import { modules } from '../db/schema';
@@ -61,6 +66,8 @@ export interface RestoreFromFileResult {
61
66
  masterKeyApplied?: boolean;
62
67
  /** Was the fleet SSH keypair restored to <dataDir>/.ssh/? */
63
68
  sshKeyApplied?: boolean;
69
+ /** How many module source dirs were laid down at the target's modules dir. */
70
+ moduleSourcesApplied?: number;
64
71
  }
65
72
 
66
73
  export interface RestoreFromFileOptions {
@@ -233,21 +240,32 @@ export async function restoreFromArtifactFile(
233
240
  }
234
241
  }
235
242
 
236
- // 7. Apply staged system files (celilo.db + master.key).
237
- // These were staged by celilo-mgmt's on_restore at
238
- // restore_dir/system/. The live process has the DB open, so
239
- // this swap must happen AFTER the hook returns (which is now).
243
+ // 7. Complete the operation BEFORE swapping the DB. The swap
244
+ // (applyStagedSystemFiles) is the final, irreversible step: it closes
245
+ // the live DB and overwrites celilo.db with the artifact's. Marking the
246
+ // operation complete writes to module_operations if done AFTER the
247
+ // swap it reopens the freshly-staged artifact DB in THIS process and
248
+ // throws SQLITE_IOERR (a fresh process opens the same file fine; it's a
249
+ // same-process post-swap reopen artifact). The operation row lives in
250
+ // the pre-swap DB the swap discards anyway, so completing it here —
251
+ // while that DB is still open — is correct and avoids the reopen.
252
+ // Nothing in-process must touch the DB after the swap; migrateRestoredDb
253
+ // (the CLI's next step) is best-effort and tolerates a reopen failure.
254
+ completeOperation(opId);
255
+
256
+ // 8. Apply staged system files (celilo.db + master.key + fleet SSH key).
257
+ // Staged by celilo-mgmt's on_restore at restore_dir/system/. The live
258
+ // process had the DB open, so this swap happens AFTER the hook returns.
240
259
  const systemStaging = join(restoreDataDir, 'system');
241
260
  const apply = applyStagedSystemFiles(systemStaging);
242
261
 
243
- completeOperation(opId);
244
-
245
262
  return {
246
263
  success: true,
247
264
  crossModuleApplied,
248
265
  systemDbApplied: apply.dbApplied,
249
266
  masterKeyApplied: apply.keyApplied,
250
267
  sshKeyApplied: apply.sshApplied,
268
+ moduleSourcesApplied: apply.moduleSourcesApplied,
251
269
  };
252
270
  } finally {
253
271
  try {
@@ -262,6 +280,61 @@ export interface StagedSystemApplyResult {
262
280
  dbApplied: boolean;
263
281
  keyApplied: boolean;
264
282
  sshApplied: boolean;
283
+ /** How many module source dirs were laid down at the target's modules dir. */
284
+ moduleSourcesApplied: number;
285
+ }
286
+
287
+ /** Cheap check for the SQLite file magic without reading the whole DB. */
288
+ function looksLikeSqlite(path: string): boolean {
289
+ let fd: number;
290
+ try {
291
+ fd = openSync(path, 'r');
292
+ } catch {
293
+ return false;
294
+ }
295
+ try {
296
+ const buf = Buffer.alloc(16);
297
+ readSync(fd, buf, 0, 16, 0);
298
+ // Magic header is "SQLite format 3" (15 bytes) + a NUL terminator.
299
+ return buf.subarray(0, 15).toString('utf-8') === 'SQLite format 3' && buf[15] === 0;
300
+ } finally {
301
+ closeSync(fd);
302
+ }
303
+ }
304
+
305
+ /**
306
+ * Recompute every module's source_path to THIS box's modules dir, on the
307
+ * STAGED DB file (pre-swap). The artifact carries the SOURCE box's absolute
308
+ * source_path (e.g. macOS `~/Library/Application Support/celilo/modules/<id>`),
309
+ * which doesn't exist on a different box/OS — so a restored DB references module
310
+ * code at a dead path and no deploy can generate. By construction (import.ts)
311
+ * source_path is always `${dataDir}/modules/<id>`, so it's safe to recompute for
312
+ * the target; a no-op when the paths already match (same-OS restore).
313
+ *
314
+ * Done on the staged file (a fresh connection that's about to be swapped in),
315
+ * never the live DB, so there's no post-swap in-process reopen (ISS-0037). The
316
+ * PRAGMA forces commits into the main file (no -wal sibling) so the copy below
317
+ * captures the rewrite; the live DB re-enters WAL when celilo reopens it.
318
+ */
319
+ function rewriteStagedModuleSourcePaths(stagedDbPath: string): void {
320
+ // Tolerate non-SQLite staged files: some callers/tests stage placeholder
321
+ // bytes to exercise the file-copy path. A real artifact DB always opens
322
+ // with the "SQLite format 3\0" magic header — skip anything that doesn't
323
+ // (a valid-header-but-corrupt DB still surfaces by throwing below).
324
+ if (!looksLikeSqlite(stagedDbPath)) return;
325
+
326
+ const moduleStorageBase = getModuleStoragePath();
327
+ const staged = new Database(stagedDbPath);
328
+ try {
329
+ staged.query('PRAGMA journal_mode = DELETE').run();
330
+ const rows = staged.query('SELECT id FROM modules').all() as Array<{ id: string }>;
331
+ const update = staged.query('UPDATE modules SET source_path = ? WHERE id = ?');
332
+ for (const { id } of rows) {
333
+ update.run(join(moduleStorageBase, id), id);
334
+ }
335
+ } finally {
336
+ staged.close();
337
+ }
265
338
  }
266
339
 
267
340
  /**
@@ -281,14 +354,16 @@ export function applyStagedSystemFiles(systemStagingDir: string): StagedSystemAp
281
354
  let dbApplied = false;
282
355
  let keyApplied = false;
283
356
  let sshApplied = false;
357
+ let moduleSourcesApplied = 0;
284
358
 
285
359
  if (!existsSync(systemStagingDir)) {
286
- return { dbApplied, keyApplied, sshApplied };
360
+ return { dbApplied, keyApplied, sshApplied, moduleSourcesApplied };
287
361
  }
288
362
 
289
363
  const stagedDb = join(systemStagingDir, 'celilo.db');
290
364
  const stagedKey = join(systemStagingDir, 'master.key');
291
365
  const stagedSsh = join(systemStagingDir, 'ssh');
366
+ const stagedModuleSrc = join(systemStagingDir, 'module_src');
292
367
 
293
368
  // master.key first: it's not load-bearing for the running process
294
369
  // (already in memory if the daemon's running). Safe to swap before
@@ -319,9 +394,31 @@ export function applyStagedSystemFiles(systemStagingDir: string): StagedSystemAp
319
394
  sshApplied = true;
320
395
  }
321
396
 
397
+ // Module SOURCE code → lay down at THIS box's modules dir so the restored
398
+ // box has the code for EVERY module (incl. non-registry ones like lunacycle).
399
+ // on_backup captured each module's source; without this the restored DB
400
+ // references modules whose code isn't on disk and no deploy can generate.
401
+ // The artifact carries no generated/ subtree, so any existing generated/
402
+ // under a module dir (e.g. restored TF state) is preserved by the merge.
403
+ if (existsSync(stagedModuleSrc)) {
404
+ const moduleStorageBase = getModuleStoragePath();
405
+ mkdirSync(moduleStorageBase, { recursive: true });
406
+ for (const entry of readdirSync(stagedModuleSrc, { withFileTypes: true })) {
407
+ if (!entry.isDirectory()) continue;
408
+ cpSync(join(stagedModuleSrc, entry.name), join(moduleStorageBase, entry.name), {
409
+ recursive: true,
410
+ });
411
+ moduleSourcesApplied += 1;
412
+ }
413
+ }
414
+
322
415
  // celilo.db: must close the live DB connection before replacing
323
416
  // the file or SQLite gets confused (macOS holds a vnode handle).
324
417
  if (existsSync(stagedDb)) {
418
+ // Reconcile module source_paths to THIS box on the staged file BEFORE the
419
+ // swap (and before closeDb) — fixes cross-host / cross-OS restores. See
420
+ // rewriteStagedModuleSourcePaths.
421
+ rewriteStagedModuleSourcePaths(stagedDb);
325
422
  closeDb();
326
423
  const livePath = getDbPath();
327
424
  mkdirSync(join(livePath, '..'), { recursive: true });
@@ -338,7 +435,7 @@ export function applyStagedSystemFiles(systemStagingDir: string): StagedSystemAp
338
435
  dbApplied = true;
339
436
  }
340
437
 
341
- return { dbApplied, keyApplied, sshApplied };
438
+ return { dbApplied, keyApplied, sshApplied, moduleSourcesApplied };
342
439
  }
343
440
 
344
441
  /**
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Recurrence gate for ISS-0016: the S3 provider's upload() must send a
3
+ * self-describing, replayable Buffer body — NOT a one-shot Node read stream.
4
+ *
5
+ * The original bug passed `createReadStream(localPath)` as PutObject Body with
6
+ * no ContentLength; AWS SDK v3 fell back to aws-chunked streaming and failed
7
+ * with "The request body terminated unexpectedly" (and couldn't replay the
8
+ * stream across S3's retries/redirects). The verify path used a string body
9
+ * (length known) and so never exercised the broken path — every real S3 backup
10
+ * silently failed. These tests upload a REAL on-disk file and assert the body
11
+ * is a Buffer, so a regression back to a stream is caught here.
12
+ *
13
+ * No live S3 / MinIO harness exists, so we intercept S3Client.prototype.send
14
+ * and inspect the command the provider builds.
15
+ */
16
+
17
+ import { type Mock, afterEach, describe, expect, it, spyOn } from 'bun:test';
18
+ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
19
+ import { tmpdir } from 'node:os';
20
+ import { join } from 'node:path';
21
+ import { Readable } from 'node:stream';
22
+ import { GetObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
23
+ import { type S3StorageConfig, createS3StorageProvider } from './s3';
24
+
25
+ const CONFIG: S3StorageConfig = {
26
+ bucket: 'test-bucket',
27
+ region: 'us-east-1',
28
+ endpoint: 'http://localhost:9000',
29
+ accessKeyId: 'test-key',
30
+ secretAccessKey: 'test-secret',
31
+ };
32
+
33
+ describe('s3 storage provider (ISS-0016)', () => {
34
+ let dir: string | undefined;
35
+ // biome-ignore lint/suspicious/noExplicitAny: send() is heavily overloaded; the test only reads .input.
36
+ let sendSpy: Mock<(command: any) => Promise<unknown>> | undefined;
37
+
38
+ afterEach(() => {
39
+ sendSpy?.mockRestore();
40
+ sendSpy = undefined;
41
+ if (dir) {
42
+ rmSync(dir, { recursive: true, force: true });
43
+ dir = undefined;
44
+ }
45
+ });
46
+
47
+ it('uploads a real on-disk file as a self-describing Buffer body, not a stream', async () => {
48
+ dir = mkdtempSync(join(tmpdir(), 's3-upload-'));
49
+ const file = join(dir, 'envelope.tar.gz');
50
+ // A multi-KB real file — the case that broke against live S3.
51
+ const bytes = Buffer.from('celilo backup envelope payload — '.repeat(2000));
52
+ writeFileSync(file, bytes);
53
+
54
+ // biome-ignore lint/suspicious/noExplicitAny: capturing the built command.
55
+ const sent: any[] = [];
56
+ sendSpy = spyOn(S3Client.prototype, 'send').mockImplementation(async (command: unknown) => {
57
+ sent.push(command);
58
+ return {};
59
+ });
60
+
61
+ const provider = createS3StorageProvider(CONFIG);
62
+ await provider.upload(file, 'celilo-mgmt/2026/envelope.tar.gz');
63
+
64
+ expect(sent).toHaveLength(1);
65
+ const command = sent[0];
66
+ expect(command).toBeInstanceOf(PutObjectCommand);
67
+
68
+ const body = command.input.Body;
69
+ // The regression gate: a Node read stream (the original bug) is not a Buffer.
70
+ expect(Buffer.isBuffer(body)).toBe(true);
71
+ expect(body instanceof Readable).toBe(false);
72
+ // Self-describing length == the whole file: the SDK derives ContentLength
73
+ // and can replay the body across retries/redirects.
74
+ expect(body.length).toBe(bytes.length);
75
+ expect(Buffer.compare(body, bytes)).toBe(0);
76
+ // Key is prefixed with the backup namespace.
77
+ expect(command.input.Key).toBe('celilo-backups/celilo-mgmt/2026/envelope.tar.gz');
78
+ expect(command.input.Bucket).toBe('test-bucket');
79
+ });
80
+
81
+ it('downloads a multi-chunk response body to disk intact (short-read safe)', async () => {
82
+ dir = mkdtempSync(join(tmpdir(), 's3-download-'));
83
+ const out = join(dir, 'restored.bin');
84
+ const chunks = [Buffer.from('chunk-1-'), Buffer.from('chunk-2-'), Buffer.from('chunk-3')];
85
+ const expected = Buffer.concat(chunks);
86
+
87
+ sendSpy = spyOn(S3Client.prototype, 'send').mockImplementation(async (command: unknown) => {
88
+ expect(command).toBeInstanceOf(GetObjectCommand);
89
+ const body = new Readable({ read() {} });
90
+ // Push several chunks separately to model short reads over the wire.
91
+ for (const chunk of chunks) body.push(chunk);
92
+ body.push(null);
93
+ return { Body: body };
94
+ });
95
+
96
+ const provider = createS3StorageProvider(CONFIG);
97
+ await provider.download('celilo-mgmt/2026/envelope.tar.gz', out);
98
+
99
+ expect(readFileSync(out)).toEqual(expected);
100
+ });
101
+ });
@@ -9,6 +9,7 @@ import {
9
9
  discoverTemplateFiles,
10
10
  generateTemplates,
11
11
  getOutputFilename,
12
+ injectProxmoxLxcDns,
12
13
  isTemplateFile,
13
14
  readTemplateFiles,
14
15
  writeGeneratedFiles,
@@ -633,4 +634,80 @@ resource "proxmox_lxc" "container" {
633
634
  }
634
635
  });
635
636
  });
637
+
638
+ describe('injectProxmoxLxcDns', () => {
639
+ const LXC = [
640
+ 'resource "proxmox_lxc" "caddy" {',
641
+ ' target_node = "pve"',
642
+ ' start = true',
643
+ ' network {',
644
+ ' bridge = "vmbr0"',
645
+ ' }',
646
+ '}',
647
+ ].join('\n');
648
+
649
+ test('injects nameserver + lifecycle when a nameserver is computable', () => {
650
+ const out = injectProxmoxLxcDns(LXC, true);
651
+ expect(out).toContain(' nameserver = "$self:lxc_nameserver"');
652
+ expect(out).toContain(' lifecycle {');
653
+ expect(out).toContain(' ignore_changes = [nameserver]');
654
+ // Injected immediately after the opening line, before author attributes.
655
+ const lines = out.split('\n');
656
+ expect(lines[0]).toBe('resource "proxmox_lxc" "caddy" {');
657
+ expect(lines[1]).toBe(' nameserver = "$self:lxc_nameserver"');
658
+ expect(lines[2]).toBe(' lifecycle {');
659
+ // Author's attributes and nested blocks are untouched.
660
+ expect(out).toContain(' target_node = "pve"');
661
+ expect(out).toContain(' network {');
662
+ });
663
+
664
+ test('injects lifecycle only (no nameserver) when none is computable', () => {
665
+ const out = injectProxmoxLxcDns(LXC, false);
666
+ expect(out).not.toContain('nameserver = "$self:lxc_nameserver"');
667
+ expect(out).toContain(' lifecycle {');
668
+ expect(out).toContain(' ignore_changes = [nameserver]');
669
+ });
670
+
671
+ test('is idempotent — already-injected content is returned unchanged', () => {
672
+ const once = injectProxmoxLxcDns(LXC, true);
673
+ const twice = injectProxmoxLxcDns(once, true);
674
+ expect(twice).toBe(once);
675
+ });
676
+
677
+ test('does not double-inject nameserver when the template already has one', () => {
678
+ // A stale copied module (or an author-set nameserver) — injecting a second
679
+ // would be a terraform "Attribute redefined" error.
680
+ const stale = [
681
+ 'resource "proxmox_lxc" "caddy" {',
682
+ ' target_node = "pve"',
683
+ ' nameserver = "$self:lxc_nameserver"',
684
+ ' start = true',
685
+ '}',
686
+ ].join('\n');
687
+ const out = injectProxmoxLxcDns(stale, true);
688
+ // Exactly one nameserver attribute survives.
689
+ expect(out.match(/nameserver\s*=/g)?.length).toBe(1);
690
+ // The lifecycle guard is still added.
691
+ expect(out).toContain('ignore_changes = [nameserver]');
692
+ });
693
+
694
+ test('leaves non-proxmox_lxc resources untouched', () => {
695
+ const vm = ['resource "proxmox_vm" "build" {', ' cores = 4', '}'].join('\n');
696
+ expect(injectProxmoxLxcDns(vm, true)).toBe(vm);
697
+ });
698
+
699
+ test('injects into every proxmox_lxc block in a multi-resource file', () => {
700
+ const two = `${LXC}\n\n${LXC.replace('"caddy"', '"forgejo"')}`;
701
+ const out = injectProxmoxLxcDns(two, true);
702
+ expect(out.match(/ignore_changes = \[nameserver\]/g)?.length).toBe(2);
703
+ });
704
+
705
+ test('matches the indentation of the resource opening line', () => {
706
+ const indented = [' resource "proxmox_lxc" "x" {', ' cores = 1', ' }'].join('\n');
707
+ const out = injectProxmoxLxcDns(indented, true);
708
+ expect(out).toContain(' nameserver = "$self:lxc_nameserver"');
709
+ expect(out).toContain(' lifecycle {');
710
+ expect(out).toContain(' ignore_changes = [nameserver]');
711
+ });
712
+ });
636
713
  });
@@ -126,6 +126,67 @@ export function getOutputFilename(templateFilename: string): string {
126
126
  return result;
127
127
  }
128
128
 
129
+ /**
130
+ * Inject framework-owned DNS-at-birth into every `proxmox_lxc` resource.
131
+ *
132
+ * An LXC's nameserver is infrastructure celilo owns — like vmid, target_ip, and
133
+ * inventory — not something a module author hand-writes. Authors declare zero
134
+ * DNS terraform; this stamps it onto each `proxmox_lxc` block at generate time.
135
+ * For every block it emits:
136
+ *
137
+ * - `nameserver = "$self:lxc_nameserver"` — only when a nameserver is
138
+ * computable (`hasNameserver`). The first LXC, deployed before any
139
+ * `dns_internal` provider exists, has no value: it inherits the Proxmox
140
+ * node default and the `dns-client-config` aspect repairs resolv.conf
141
+ * post-deploy.
142
+ * - `lifecycle { ignore_changes = [nameserver] }` — always. Existing LXCs
143
+ * were born without a nameserver, so setting one is an in-place UPDATE the
144
+ * terraform-safety guard (create-only, see terraform-safety.ts) rejects.
145
+ * `ignore_changes` makes nameserver birth-only: terraform sets it at create
146
+ * and never diffs it again, so the guard never trips on a redeploy. The
147
+ * aspect owns the live resolv.conf from then on (terraform = birth DNS,
148
+ * aspect = ongoing DNS). See v2/LXC_INTERNAL_DNS.md.
149
+ *
150
+ * Anchored on the resource's opening line — it never brace-matches the nested
151
+ * rootfs/network/features blocks, so it's robust to attribute order/formatting.
152
+ * Idempotent at file granularity: a `.tf` that already declares
153
+ * `ignore_changes = [nameserver]` (re-run, or an author who opted in) is
154
+ * returned untouched.
155
+ *
156
+ * Policy function (Rule 10.1) - pure string transformation, no I/O.
157
+ *
158
+ * @param content - Raw terraform template content (pre variable-resolution)
159
+ * @param hasNameserver - Whether `$self:lxc_nameserver` resolves to a value
160
+ * @returns Content with DNS injected into each proxmox_lxc resource
161
+ */
162
+ export function injectProxmoxLxcDns(content: string, hasNameserver: boolean): string {
163
+ // Already injected (idempotent) or author opted into the lifecycle — done.
164
+ if (content.includes('ignore_changes = [nameserver]')) {
165
+ return content;
166
+ }
167
+
168
+ // A template that still carries a `nameserver = …` attribute — a stale copied
169
+ // module from before the per-template lines were reverted, or an author who
170
+ // set it by hand — already supplies the value. Injecting a second `nameserver`
171
+ // is a terraform "Attribute redefined" error. So skip the value line when one
172
+ // exists and just add the lifecycle guard (the load-bearing part). Both cases
173
+ // converge on exactly one nameserver + ignore_changes.
174
+ const alreadyHasNameserver = /^[ \t]*nameserver[ \t]*=/m.test(content);
175
+
176
+ const openLineRe = /^([ \t]*)resource\s+"proxmox_lxc"\s+"[^"]+"\s*\{[ \t]*$/gm;
177
+ return content.replace(openLineRe, (openLine, indent: string) => {
178
+ const inner = `${indent} `;
179
+ const injected = [openLine];
180
+ if (hasNameserver && !alreadyHasNameserver) {
181
+ injected.push(`${inner}nameserver = "$self:lxc_nameserver"`);
182
+ }
183
+ injected.push(`${inner}lifecycle {`);
184
+ injected.push(`${inner} ignore_changes = [nameserver]`);
185
+ injected.push(`${inner}}`);
186
+ return injected.join('\n');
187
+ });
188
+ }
189
+
129
190
  /**
130
191
  * Discover template files in directory recursively
131
192
  *
@@ -833,9 +894,15 @@ export async function generateTemplates(options: GenerateOptions): Promise<Gener
833
894
  } else {
834
895
  // Template files (.tpl, .j2) - apply variable resolution
835
896
  const isAnsibleTemplate = template.targetPath.includes('ansible/');
897
+ // Framework-owned DNS-at-birth: stamp nameserver + ignore_changes onto
898
+ // every proxmox_lxc resource before resolution (terraform files only).
899
+ const content =
900
+ !isAnsibleTemplate && template.targetPath.endsWith('.tf')
901
+ ? injectProxmoxLxcDns(template.content, Boolean(context.selfConfig.lxc_nameserver))
902
+ : template.content;
836
903
  const result = isAnsibleTemplate
837
- ? await convertSecretsToJinja(template.content, context, db)
838
- : await resolveTemplate(template.content, context, db);
904
+ ? await convertSecretsToJinja(content, context, db)
905
+ : await resolveTemplate(content, context, db);
839
906
 
840
907
  if (!result.success) {
841
908
  resolutionErrors.push({
@@ -471,6 +471,40 @@ export async function buildResolutionContext(
471
471
  systemConfigMap[row.key] = row.value;
472
472
  }
473
473
 
474
+ // LXC nameserver list (v2/LXC_INTERNAL_DNS.md). Proxmox's `nameserver` is a
475
+ // space-separated primary/secondary list. Compose it so every celilo-built
476
+ // LXC boots with a working resolver — before Ansible's apt update, and
477
+ // independent of which Proxmox node it lands on (the nodes' DNS defaults
478
+ // diverge): the internal dns_internal resolver first (it recurses, so it
479
+ // also answers external names), then the public resolvers as fallback.
480
+ // Bootstrap (no dns_internal provider registered yet — e.g. the DNS module's
481
+ // own LXC) → public only, so apt still works. Injected as
482
+ // $self:lxc_nameserver, mirroring how inventory.* are auto-derived; the
483
+ // proxmox_lxc terraform template drops it straight into `nameserver`.
484
+ {
485
+ const publicDns: string[] = [];
486
+ for (const key of ['dns.primary', 'dns.fallback']) {
487
+ const raw = systemConfigMap[key];
488
+ if (raw) {
489
+ for (const part of raw.split(',')) {
490
+ const ip = part.trim();
491
+ if (ip) publicDns.push(ip);
492
+ }
493
+ }
494
+ }
495
+ const dnsInternal = capabilitiesMap.dns_internal as { server?: { ip?: unknown } } | undefined;
496
+ // The dns_internal capability advertises the provider's address, which may
497
+ // carry a CIDR suffix (technitium's server.ip resolves from target_ip,
498
+ // e.g. "192.168.0.151/24"). A nameserver must be a bare IP — strip it.
499
+ const rawInternalIp =
500
+ typeof dnsInternal?.server?.ip === 'string' ? dnsInternal.server.ip : undefined;
501
+ const internalIp = rawInternalIp ? rawInternalIp.split('/')[0] : undefined;
502
+ const nameservers = internalIp ? [internalIp, ...publicDns] : publicDns;
503
+ if (nameservers.length > 0) {
504
+ selfConfig.lxc_nameserver = nameservers.join(' ');
505
+ }
506
+ }
507
+
474
508
  // Fetch system secrets (for $system_secret: variables)
475
509
  const systemSecretsMap: Record<string, string> = {};
476
510
  try {
@@ -0,0 +1,86 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
2
+ import { existsSync } from 'node:fs';
3
+ import { rm } from 'node:fs/promises';
4
+ import { type DbClient, createDbClient } from '../db/client';
5
+ import { capabilities, moduleConfigs, modules, systemConfig } from '../db/schema';
6
+ import { buildResolutionContext } from './context';
7
+
8
+ const TEST_DB_PATH = './test-lxc-nameserver.db';
9
+
10
+ /**
11
+ * Coverage for the generate-time `lxc_nameserver` composition
12
+ * (v2/LXC_INTERNAL_DNS.md): proxmox_lxc templates read `$self:lxc_nameserver`,
13
+ * a space-separated primary/secondary list = internal dns_internal resolver
14
+ * first (CIDR stripped), then the public dns.primary/dns.fallback resolvers;
15
+ * public-only when no dns_internal provider is registered (bootstrap).
16
+ */
17
+ describe('lxc_nameserver composition', () => {
18
+ let db: DbClient;
19
+
20
+ beforeEach(() => {
21
+ db = createDbClient({ path: TEST_DB_PATH });
22
+ db.insert(modules)
23
+ .values({
24
+ id: 'consumer',
25
+ name: 'consumer',
26
+ version: '1.0.0',
27
+ manifestData: { requires: { machine: { zone: 'app' } } },
28
+ sourcePath: '/tmp/consumer',
29
+ })
30
+ .run();
31
+ db.insert(moduleConfigs)
32
+ .values({ moduleId: 'consumer', key: 'hostname', value: 'consumer', valueJson: '"consumer"' })
33
+ .run();
34
+ for (const [key, value] of [
35
+ ['dns.primary', '1.1.1.1'],
36
+ ['dns.fallback', '1.0.0.1,8.8.8.8'],
37
+ ]) {
38
+ db.insert(systemConfig).values({ key, value }).run();
39
+ }
40
+ });
41
+
42
+ afterEach(async () => {
43
+ db.$client.close();
44
+ for (const suffix of ['', '-shm', '-wal']) {
45
+ const p = `${TEST_DB_PATH}${suffix}`;
46
+ if (existsSync(p)) await rm(p);
47
+ }
48
+ });
49
+
50
+ function registerInternalDns(ip: string): void {
51
+ db.insert(modules)
52
+ .values({
53
+ id: 'dns-provider',
54
+ name: 'dns-provider',
55
+ version: '1.0.0',
56
+ manifestData: {},
57
+ sourcePath: '/tmp/dns',
58
+ })
59
+ .run();
60
+ db.insert(capabilities)
61
+ .values({
62
+ moduleId: 'dns-provider',
63
+ capabilityName: 'dns_internal',
64
+ version: '1.0.0',
65
+ data: { server: { ip } },
66
+ })
67
+ .run();
68
+ }
69
+
70
+ test('internal resolver first (CIDR stripped) + public fallback', async () => {
71
+ registerInternalDns('192.168.0.151/24');
72
+ const ctx = await buildResolutionContext('consumer', db);
73
+ expect(ctx.selfConfig.lxc_nameserver).toBe('192.168.0.151 1.1.1.1 1.0.0.1 8.8.8.8');
74
+ });
75
+
76
+ test('bootstrap: no dns_internal provider → public resolvers only', async () => {
77
+ const ctx = await buildResolutionContext('consumer', db);
78
+ expect(ctx.selfConfig.lxc_nameserver).toBe('1.1.1.1 1.0.0.1 8.8.8.8');
79
+ });
80
+
81
+ test('already-bare internal IP is passed through unchanged', async () => {
82
+ registerInternalDns('10.0.20.30');
83
+ const ctx = await buildResolutionContext('consumer', db);
84
+ expect(ctx.selfConfig.lxc_nameserver).toBe('10.0.20.30 1.1.1.1 1.0.0.1 8.8.8.8');
85
+ });
86
+ });