@celilo/cli 0.3.30 → 0.4.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/drizzle/0005_module_operations.sql +12 -0
  2. package/drizzle/0006_base_module_aspects.sql +15 -0
  3. package/drizzle/0007_module_systems.sql +17 -0
  4. package/drizzle/meta/_journal.json +21 -0
  5. package/package.json +5 -4
  6. package/schemas/system_config.json +14 -28
  7. package/src/ansible/inventory.test.ts +46 -62
  8. package/src/ansible/inventory.ts +48 -25
  9. package/src/capabilities/registration.ts +25 -7
  10. package/src/capabilities/validation.test.ts +30 -0
  11. package/src/capabilities/validation.ts +8 -0
  12. package/src/cli/backup-rename.test.ts +95 -0
  13. package/src/cli/cli.test.ts +17 -23
  14. package/src/cli/command-registry.ts +199 -0
  15. package/src/cli/commands/backup-list.ts +1 -1
  16. package/src/cli/commands/events.ts +96 -0
  17. package/src/cli/commands/machine-add.ts +103 -59
  18. package/src/cli/commands/module-import.ts +153 -4
  19. package/src/cli/commands/module-remove.ts +86 -17
  20. package/src/cli/commands/module-status.ts +6 -2
  21. package/src/cli/commands/publish/alpha.test.ts +185 -0
  22. package/src/cli/commands/publish/alpha.ts +226 -0
  23. package/src/cli/commands/publish/changesets.test.ts +89 -0
  24. package/src/cli/commands/publish/changesets.ts +144 -0
  25. package/src/cli/commands/publish/consumer-pins.test.ts +155 -0
  26. package/src/cli/commands/publish/consumer-pins.ts +149 -0
  27. package/src/cli/commands/publish/execute.ts +131 -0
  28. package/src/cli/commands/publish/global-install.test.ts +154 -0
  29. package/src/cli/commands/publish/global-install.ts +171 -0
  30. package/src/cli/commands/publish/helpers.ts +227 -0
  31. package/src/cli/commands/publish/index.ts +365 -0
  32. package/src/cli/commands/publish/module-registry.test.ts +40 -0
  33. package/src/cli/commands/publish/module-registry.ts +64 -0
  34. package/src/cli/commands/publish/plan.ts +107 -0
  35. package/src/cli/commands/publish/preflight.ts +238 -0
  36. package/src/cli/commands/publish/types.ts +264 -0
  37. package/src/cli/commands/publish/workspace.test.ts +323 -0
  38. package/src/cli/commands/publish/workspace.ts +596 -0
  39. package/src/cli/commands/restore.ts +126 -0
  40. package/src/cli/commands/storage-add-local.ts +1 -1
  41. package/src/cli/commands/storage-add-s3.ts +1 -1
  42. package/src/cli/commands/subscribers-add.ts +68 -0
  43. package/src/cli/commands/subscribers-list.ts +48 -0
  44. package/src/cli/commands/subscribers-remove.ts +38 -0
  45. package/src/cli/commands/subscribers-serve.ts +77 -0
  46. package/src/cli/commands/subscribers-status.ts +33 -0
  47. package/src/cli/commands/subscribers-test.ts +71 -0
  48. package/src/cli/commands/system-apply-config-equivalence.test.ts +108 -0
  49. package/src/cli/commands/system-apply-config.test.ts +70 -0
  50. package/src/cli/commands/system-apply-config.ts +130 -0
  51. package/src/cli/commands/system-audit.ts +2 -1
  52. package/src/cli/commands/system-init-deprecation.test.ts +90 -0
  53. package/src/cli/commands/system-init.ts +36 -70
  54. package/src/cli/commands/system-update.ts +3 -2
  55. package/src/cli/completion.ts +22 -1
  56. package/src/cli/index.ts +214 -6
  57. package/src/cli/interactive-config.test.ts +19 -0
  58. package/src/cli/restore-command.test.ts +131 -0
  59. package/src/db/client.ts +42 -0
  60. package/src/db/schema.test.ts +13 -16
  61. package/src/db/schema.ts +161 -9
  62. package/src/hooks/capability-loader-firewall.test.ts +6 -15
  63. package/src/hooks/capability-loader.test.ts +2 -3
  64. package/src/hooks/capability-loader.ts +36 -2
  65. package/src/hooks/define-hook.test.ts +4 -0
  66. package/src/hooks/executor.test.ts +18 -0
  67. package/src/hooks/executor.ts +21 -2
  68. package/src/hooks/load-hook-config.test.ts +26 -24
  69. package/src/hooks/load-hook-config.ts +11 -2
  70. package/src/hooks/run-named-hook.ts +16 -0
  71. package/src/hooks/types.ts +9 -1
  72. package/src/manifest/contracts/v1.ts +70 -0
  73. package/src/manifest/schema.ts +262 -16
  74. package/src/manifest/validate-privileged.test.ts +84 -0
  75. package/src/manifest/validate.test.ts +156 -0
  76. package/src/manifest/validate.ts +69 -0
  77. package/src/module/import.ts +12 -0
  78. package/src/services/aspect-approvals.test.ts +231 -0
  79. package/src/services/aspect-approvals.ts +120 -0
  80. package/src/services/aspect-runner.test.ts +493 -0
  81. package/src/services/aspect-runner.ts +438 -0
  82. package/src/services/aspect-template-resolver.test.ts +101 -0
  83. package/src/services/aspect-template-resolver.ts +122 -0
  84. package/src/services/backup-create.ts +104 -25
  85. package/src/services/backup-envelope-roundtrip.test.ts +199 -0
  86. package/src/services/backup-in-flight-refusal.test.ts +163 -0
  87. package/src/services/backup-manifest.test.ts +115 -0
  88. package/src/services/backup-manifest.ts +163 -0
  89. package/src/services/backup-restore.ts +154 -19
  90. package/src/services/build-bus/delivery-events.ts +92 -0
  91. package/src/services/build-bus/event-factory.ts +54 -0
  92. package/src/services/build-bus/fan-out.test.ts +279 -0
  93. package/src/services/build-bus/fan-out.ts +161 -0
  94. package/src/services/build-bus/hook-dispatch-mgmt.test.ts +157 -0
  95. package/src/services/build-bus/hook-dispatch.test.ts +207 -0
  96. package/src/services/build-bus/hook-dispatch.ts +198 -0
  97. package/src/services/build-bus/hook-dispatcher.ts +115 -0
  98. package/src/services/build-bus/index.ts +41 -0
  99. package/src/services/build-bus/receiver-server.test.ts +179 -0
  100. package/src/services/build-bus/receiver-server.ts +159 -0
  101. package/src/services/build-bus/status.test.ts +212 -0
  102. package/src/services/build-bus/status.ts +213 -0
  103. package/src/services/build-bus/subscriber-store.ts +113 -0
  104. package/src/services/celilo-events.test.ts +70 -0
  105. package/src/services/celilo-events.ts +92 -0
  106. package/src/services/celilo-mgmt-hooks.test.ts +296 -0
  107. package/src/services/config-interview.ts +13 -95
  108. package/src/services/cross-module-data-manager.ts +2 -31
  109. package/src/services/cross-module-read.test.ts +250 -0
  110. package/src/services/cross-module-read.ts +232 -0
  111. package/src/services/deploy-validation.ts +7 -0
  112. package/src/services/deployed-systems.test.ts +235 -0
  113. package/src/services/deployed-systems.ts +308 -0
  114. package/src/services/dns-provider-backfill.ts +75 -0
  115. package/src/services/health-runner.ts +19 -3
  116. package/src/services/infrastructure-variable-resolver.test.ts +6 -32
  117. package/src/services/infrastructure-variable-resolver.ts +3 -13
  118. package/src/services/machine-detector.ts +104 -48
  119. package/src/services/machine-pool.ts +145 -2
  120. package/src/services/module-config.ts +78 -120
  121. package/src/services/module-deploy.ts +113 -40
  122. package/src/services/module-operations.test.ts +154 -0
  123. package/src/services/module-operations.ts +154 -0
  124. package/src/services/module-subscriptions.test.ts +58 -0
  125. package/src/services/module-subscriptions.ts +24 -1
  126. package/src/services/module-types-generator.test.ts +3 -3
  127. package/src/services/module-types-generator.ts +7 -2
  128. package/src/services/proxmox-reconcile.test.ts +333 -0
  129. package/src/services/proxmox-reconcile.ts +156 -0
  130. package/src/services/proxmox-state-recovery.ts +3 -24
  131. package/src/services/restore-from-file.test.ts +177 -0
  132. package/src/services/restore-from-file.ts +355 -0
  133. package/src/services/restore-preflight.test.ts +127 -0
  134. package/src/services/restore-preflight.ts +118 -0
  135. package/src/services/storage-providers/s3.ts +10 -2
  136. package/src/services/system-identity.ts +30 -0
  137. package/src/services/system-init.test.ts +64 -21
  138. package/src/services/system-init.ts +28 -26
  139. package/src/templates/generator.test.ts +7 -16
  140. package/src/templates/generator.ts +28 -115
  141. package/src/test-utils/integration.ts +5 -2
  142. package/src/types/infrastructure.ts +8 -0
  143. package/src/variables/computed/computed-integration.test.ts +191 -0
  144. package/src/variables/computed/computed.test.ts +177 -0
  145. package/src/variables/computed/evaluate.ts +271 -0
  146. package/src/variables/computed/marker.ts +53 -0
  147. package/src/variables/computed/parse.ts +262 -0
  148. package/src/variables/computed/provider-lookup.ts +130 -0
  149. package/src/variables/context.test.ts +89 -28
  150. package/src/variables/context.ts +196 -191
  151. package/src/variables/parser.ts +3 -3
  152. package/src/variables/resolver.test.ts +61 -0
  153. package/src/variables/resolver.ts +81 -0
  154. package/src/variables/types.ts +23 -1
  155. package/src/services/dns-auto-register.ts +0 -211
@@ -0,0 +1,438 @@
1
+ /**
2
+ * Aspect runner — executes a module's base-module aspect across
3
+ * the fleet.
4
+ *
5
+ * Per v2/CELILO_BASE.md Phase 1: when a module with an approved
6
+ * `base_module_aspect` triggers a fan-out event, the runner walks
7
+ * every non-`api_only` system in the aspect's `applicable_zones`,
8
+ * materializes an Ansible inventory + playbook in a scratch
9
+ * directory, copies the aspect role files there, and invokes
10
+ * `executeAnsible` against the result. The existing Ansible
11
+ * machinery does the actual SSH + playbook execution; the runner
12
+ * only sets up the per-aspect Ansible workspace.
13
+ *
14
+ * Scope NOT covered in SC3:
15
+ * - Container_service (Proxmox LXC) systems are deferred to a
16
+ * follow-up — `getSystemsByZone` covers the machines table only.
17
+ * - Proxmox `nameserver` reconciliation is SC5.
18
+ * - Trigger wiring (when `on_install` / `on_new_system_in_zone`
19
+ * etc. fire) is SC4. SC3 ships the pure execution surface so
20
+ * SC4 can call it from the deploy planner.
21
+ * - Capability data injection into aspect host_vars is Phase 2+.
22
+ */
23
+
24
+ import { existsSync } from 'node:fs';
25
+ import { cp, mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
26
+ import { tmpdir } from 'node:os';
27
+ import { join } from 'node:path';
28
+ import { eq } from 'drizzle-orm';
29
+ import { type InventoryHost, generateHostsIni } from '../ansible/inventory';
30
+ import { log } from '../cli/prompts';
31
+ import type { getDb } from '../db/client';
32
+ import { modules } from '../db/schema';
33
+ import type { BaseModuleAspect, BaseModuleAspectTrigger, ModuleManifest } from '../manifest/schema';
34
+ import type { Machine } from '../types/infrastructure';
35
+ import { checkAspectApproval } from './aspect-approvals';
36
+ import { resolveAspectTemplateRecord } from './aspect-template-resolver';
37
+ import { executeAnsible } from './deploy-ansible';
38
+ import { getSystemsByZone } from './machine-pool';
39
+ import { executeProxmoxReconcile, planProxmoxReconcile } from './proxmox-reconcile';
40
+ import { writeTemporarySshKey } from './ssh-key-manager';
41
+
42
+ type DbClient = ReturnType<typeof getDb>;
43
+
44
+ export interface AspectFanOutPlan {
45
+ /** Systems the aspect will run on. */
46
+ targetSystems: Machine[];
47
+ /** Systems that matched the zones but were excluded (api_only, etc.). */
48
+ skipped: Array<{ machine: Machine; reason: string }>;
49
+ }
50
+
51
+ export interface AspectRunResult {
52
+ success: boolean;
53
+ /** Ansible recap-level summary, or empty string if nothing ran. */
54
+ output: string;
55
+ /** Populated when success === false. */
56
+ error?: string;
57
+ /** The plan that was executed, for caller logging / event emission. */
58
+ plan: AspectFanOutPlan;
59
+ }
60
+
61
+ export interface AspectRunOptions {
62
+ /** Which trigger caused this fan-out. Recorded for logging. */
63
+ trigger: BaseModuleAspectTrigger;
64
+ /**
65
+ * Hostnames to exclude from fan-out, on top of `api_only`. The
66
+ * primary deploy's own host(s) typically don't need the aspect
67
+ * (per v2/CELILO_BASE.md D3 — aspect authors handle this when
68
+ * they want it, the framework doesn't force a skip).
69
+ */
70
+ excludeHostnames?: string[];
71
+ /**
72
+ * Override for noInteractive mode passed to executeAnsible.
73
+ * Defaults to `true` because aspect fan-outs run as part of
74
+ * larger orchestration (deploy flow) — there's no operator
75
+ * waiting at this specific step.
76
+ */
77
+ noInteractive?: boolean;
78
+ }
79
+
80
+ /**
81
+ * Planning-phase function (Rule 10.1): resolves which systems the
82
+ * aspect will target, without executing anything. Pure aside from
83
+ * the database read.
84
+ */
85
+ export async function planAspectFanOut(
86
+ aspect: BaseModuleAspect,
87
+ options: Pick<AspectRunOptions, 'excludeHostnames'> = {},
88
+ ): Promise<AspectFanOutPlan> {
89
+ // Pull the candidate set with api_only already filtered out so
90
+ // we can record the skips separately for observability.
91
+ const allInZones = await getSystemsByZone(aspect.applicable_zones, {
92
+ excludeApiOnly: false,
93
+ excludeHostnames: options.excludeHostnames,
94
+ });
95
+
96
+ const targetSystems: Machine[] = [];
97
+ const skipped: AspectFanOutPlan['skipped'] = [];
98
+ for (const m of allInZones) {
99
+ if (m.apiOnly) {
100
+ skipped.push({ machine: m, reason: 'api_only' });
101
+ } else {
102
+ targetSystems.push(m);
103
+ }
104
+ }
105
+
106
+ return { targetSystems, skipped };
107
+ }
108
+
109
+ /**
110
+ * Materialize the per-aspect Ansible workspace in a temp dir:
111
+ *
112
+ * <tmp>/
113
+ * ansible/
114
+ * inventory/
115
+ * hosts.ini
116
+ * host_vars/<hostname>.yml (target_zone fact)
117
+ * group_vars/all/aspect_vars.yml (resolved ansible_vars
118
+ * from the manifest, if any)
119
+ * playbook.yml (synthesized)
120
+ * roles/<aspect_role>/ (copied from the module's
121
+ * base-module-aspect/ tree)
122
+ *
123
+ * Returns the temp dir path; caller is responsible for cleanup.
124
+ * Throws if the module's aspect role doesn't exist on disk.
125
+ *
126
+ * When `aspect.ansible_vars` is set, each template resolves against
127
+ * the providing module's context (its module_configs, capability
128
+ * data, system_config) and lands in group_vars/all/aspect_vars.yml
129
+ * — readable from the role as `{{ var_name }}`.
130
+ */
131
+ export async function materializeAspectAnsible(args: {
132
+ aspect: BaseModuleAspect;
133
+ /** Absolute path to the module's imported source dir. The aspect
134
+ * role is expected at `<moduleSourcePath>/base-module-aspect/ansible/roles/<aspect.ansible_role>`. */
135
+ moduleSourcePath: string;
136
+ targetSystems: Machine[];
137
+ /** Provider module ID — used to resolve $self / $capability /
138
+ * $system references in ansible_vars. */
139
+ providerModuleId?: string;
140
+ /** DB client for context resolution. Required when ansible_vars
141
+ * is declared on the aspect. */
142
+ db?: DbClient;
143
+ }): Promise<string> {
144
+ const { aspect, moduleSourcePath, targetSystems, providerModuleId, db } = args;
145
+ const roleSrcDir = join(
146
+ moduleSourcePath,
147
+ 'base-module-aspect',
148
+ 'ansible',
149
+ 'roles',
150
+ aspect.ansible_role,
151
+ );
152
+ if (!existsSync(roleSrcDir)) {
153
+ throw new Error(
154
+ `Aspect role not found at ${roleSrcDir}. The module declared base_module_aspect.ansible_role='${aspect.ansible_role}' but the corresponding directory is missing.`,
155
+ );
156
+ }
157
+
158
+ const workDir = await mkdtemp(join(tmpdir(), 'celilo-aspect-'));
159
+ const ansibleDir = join(workDir, 'ansible');
160
+ const inventoryDir = join(ansibleDir, 'inventory');
161
+ const hostVarsDir = join(inventoryDir, 'host_vars');
162
+ const rolesDir = join(ansibleDir, 'roles');
163
+ await mkdir(hostVarsDir, { recursive: true });
164
+ await mkdir(rolesDir, { recursive: true });
165
+
166
+ // Stage each system's SSH key and build inventory rows.
167
+ const inventoryHosts: InventoryHost[] = [];
168
+ for (const m of targetSystems) {
169
+ const keyPath = await writeTemporarySshKey(m.id);
170
+ inventoryHosts.push({
171
+ hostname: m.hostname,
172
+ ansibleHost: m.ipAddress,
173
+ ansibleUser: m.sshUser,
174
+ groups: ['aspect_targets', m.zone],
175
+ ansibleSshPrivateKeyFile: keyPath,
176
+ });
177
+
178
+ // Per-host vars: target_zone is the only fact the framework
179
+ // guarantees today (per D3). Capability-data / module-config
180
+ // injection is Phase 2+.
181
+ const hostVars = `---\n# Aspect host vars for ${m.hostname}\ntarget_zone: ${m.zone}\n`;
182
+ await writeFile(join(hostVarsDir, `${m.hostname}.yml`), hostVars, 'utf-8');
183
+ }
184
+
185
+ // hosts.ini groups every target under 'aspect_targets' and the
186
+ // per-system zone. The playbook (below) targets 'aspect_targets'.
187
+ const hostsIni = generateHostsIni(inventoryHosts);
188
+ await writeFile(join(inventoryDir, 'hosts.ini'), hostsIni, 'utf-8');
189
+
190
+ // Resolve and write aspect ansible_vars (if any) to
191
+ // group_vars/all/aspect_vars.yml. Every target reads these as
192
+ // `{{ var_name }}` from the role. Values resolve against the
193
+ // providing module's context — its module_configs, capability
194
+ // data, system_config — so the role sees concrete strings.
195
+ if (aspect.ansible_vars && Object.keys(aspect.ansible_vars).length > 0) {
196
+ if (!providerModuleId || !db) {
197
+ throw new Error(
198
+ 'materializeAspectAnsible: aspect declares ansible_vars but providerModuleId/db were not supplied. This is a framework bug.',
199
+ );
200
+ }
201
+ const resolved = await resolveAspectTemplateRecord(
202
+ aspect.ansible_vars,
203
+ providerModuleId,
204
+ db,
205
+ 'base_module_aspect.ansible_vars',
206
+ );
207
+ const groupVarsAllDir = join(inventoryDir, 'group_vars', 'all');
208
+ await mkdir(groupVarsAllDir, { recursive: true });
209
+ const lines = [
210
+ '---',
211
+ `# Resolved base_module_aspect.ansible_vars for ${providerModuleId}`,
212
+ ...Object.entries(resolved).map(([k, v]) => `${k}: ${JSON.stringify(v)}`),
213
+ '',
214
+ ];
215
+ await writeFile(join(groupVarsAllDir, 'aspect_vars.yml'), lines.join('\n'), 'utf-8');
216
+ }
217
+
218
+ // Copy the role tree into the staging dir so Ansible can find it
219
+ // via the default role path. cp -r equivalent; the role directory
220
+ // structure (tasks/, templates/, handlers/, vars/, defaults/)
221
+ // comes along intact.
222
+ await cp(roleSrcDir, join(rolesDir, aspect.ansible_role), { recursive: true });
223
+
224
+ // Synthesize the playbook. One play, targeting every host in the
225
+ // 'aspect_targets' group, invoking the single role. Become true
226
+ // because aspect roles typically modify /etc/* files.
227
+ const playbook = [
228
+ '---',
229
+ `- name: Aspect '${aspect.ansible_role}' fan-out`,
230
+ ' hosts: aspect_targets',
231
+ ' become: true',
232
+ ' gather_facts: true',
233
+ ' roles:',
234
+ ` - role: ${aspect.ansible_role}`,
235
+ '',
236
+ ].join('\n');
237
+ await writeFile(join(ansibleDir, 'playbook.yml'), playbook, 'utf-8');
238
+
239
+ return workDir;
240
+ }
241
+
242
+ /**
243
+ * Execution-phase function: orchestrates plan + materialize + run.
244
+ *
245
+ * Failure semantics (v2/CELILO_BASE.md D4): aspects are idempotent
246
+ * and forward-progress only. A failed fan-out is reported and the
247
+ * partial state (some systems updated, others not) is preserved —
248
+ * no rollback. The caller (deploy planner, in SC4) decides how to
249
+ * surface the failure.
250
+ */
251
+ export async function runAspectFanOut(args: {
252
+ moduleId: string;
253
+ aspect: BaseModuleAspect;
254
+ moduleSourcePath: string;
255
+ options: AspectRunOptions;
256
+ db: DbClient;
257
+ }): Promise<AspectRunResult> {
258
+ const { moduleId, aspect, moduleSourcePath, options, db } = args;
259
+
260
+ const plan = await planAspectFanOut(aspect, {
261
+ excludeHostnames: options.excludeHostnames,
262
+ });
263
+
264
+ if (plan.targetSystems.length === 0) {
265
+ log.info(
266
+ `Aspect fan-out for '${moduleId}' (${options.trigger}): no eligible systems in zones [${aspect.applicable_zones.join(', ')}]`,
267
+ );
268
+ return { success: true, output: '', plan };
269
+ }
270
+
271
+ log.info(
272
+ `Aspect fan-out for '${moduleId}' (${options.trigger}): ${plan.targetSystems.length} target(s) in zones [${aspect.applicable_zones.join(', ')}]`,
273
+ );
274
+
275
+ let workDir: string | undefined;
276
+ try {
277
+ workDir = await materializeAspectAnsible({
278
+ aspect,
279
+ moduleSourcePath,
280
+ targetSystems: plan.targetSystems,
281
+ providerModuleId: moduleId,
282
+ db,
283
+ });
284
+
285
+ const result = await executeAnsible(workDir, {
286
+ noInteractive: options.noInteractive ?? true,
287
+ });
288
+
289
+ // Proxmox reconciliation (D5): if the aspect declares
290
+ // proxmox_reconcile.tfvars and the fan-out plan includes
291
+ // Proxmox-provisioned LXCs, surface what the persisted
292
+ // terraform config WOULD need to look like. Currently
293
+ // observation-only (see proxmox-reconcile.ts header); when the
294
+ // persistence layer lands the planning here stays unchanged.
295
+ //
296
+ // Only attempt reconciliation if the Ansible run succeeded —
297
+ // there's no point warning about persisted-config drift if
298
+ // the running config didn't update.
299
+ if (result.success && aspect.proxmox_reconcile) {
300
+ try {
301
+ const reconcilePlan = await planProxmoxReconcile({
302
+ aspect,
303
+ providerModuleId: moduleId,
304
+ db,
305
+ });
306
+ executeProxmoxReconcile(reconcilePlan);
307
+ } catch (err) {
308
+ // Reconciliation planning failed (e.g., capability data
309
+ // missing). Don't fail the fan-out — the running config
310
+ // is already updated. Just warn.
311
+ log.warn(
312
+ `Proxmox reconciliation planning failed for '${moduleId}': ${err instanceof Error ? err.message : String(err)}`,
313
+ );
314
+ }
315
+ }
316
+
317
+ return {
318
+ success: result.success,
319
+ output: result.output,
320
+ error: result.error,
321
+ plan,
322
+ };
323
+ } finally {
324
+ if (workDir) {
325
+ try {
326
+ await rm(workDir, { recursive: true, force: true });
327
+ } catch {
328
+ // Best-effort cleanup. If rm fails the tmpdir GC will handle it.
329
+ }
330
+ }
331
+ }
332
+ }
333
+
334
+ /**
335
+ * Reasons the deploy planner might skip a fan-out without raising
336
+ * an error. Each is a "this is fine, just not applicable" outcome
337
+ * that the planner should log but not treat as a deploy failure.
338
+ */
339
+ export type AspectSkipReason =
340
+ | 'no_aspect' // module didn't declare base_module_aspect
341
+ | 'trigger_not_declared' // aspect.triggers doesn't include this trigger
342
+ | 'no_approval' // operator hasn't approved (D2)
343
+ | 'scope_changed'; // approval exists but applicable_zones/triggers diverged (D7)
344
+
345
+ export interface AspectGlueResult {
346
+ ran: boolean;
347
+ /** Populated when `ran === true`. */
348
+ success?: boolean;
349
+ /** Populated when `ran === false`. */
350
+ reason?: AspectSkipReason;
351
+ /** Populated when `ran === true` — the fan-out plan + Ansible recap. */
352
+ runResult?: AspectRunResult;
353
+ }
354
+
355
+ /**
356
+ * Deploy-flow glue (SC4): consulted by `module-deploy.ts` after a
357
+ * primary deploy successfully completes. Decides whether to fan an
358
+ * aspect out for the given trigger and dispatches if so.
359
+ *
360
+ * Gating logic, in order:
361
+ *
362
+ * 1. No `base_module_aspect` in the manifest → skip
363
+ * (`reason: 'no_aspect'`).
364
+ * 2. The aspect's `triggers` list doesn't include the current
365
+ * trigger → skip (`reason: 'trigger_not_declared'`).
366
+ * 3. No `aspect_approvals` row for (moduleId, version) → skip
367
+ * (`reason: 'no_approval'`). Surface a warning so the operator
368
+ * can re-import to grant consent.
369
+ * 4. Approval exists but the manifest's scope no longer matches
370
+ * the approved scope_hash → skip (`reason: 'scope_changed'`).
371
+ * Surface a warning that re-approval is required (D7).
372
+ * 5. Otherwise: invoke the runner with the named trigger.
373
+ *
374
+ * `runner` is injectable so unit tests don't need to drive real
375
+ * Ansible — the default is `runAspectFanOut`.
376
+ *
377
+ * Failure semantics (per D4): a failed fan-out is reported as
378
+ * `{ ran: true, success: false, ... }`. The PRIMARY deploy does
379
+ * not get rolled back — aspects are forward-progress only and a
380
+ * partial fleet update is expected to converge on the next
381
+ * fan-out. The caller (module-deploy.ts) should log the failure
382
+ * loudly but not change the primary deploy's success status.
383
+ */
384
+ export async function maybeRunAspectForTrigger(args: {
385
+ moduleId: string;
386
+ manifest: ModuleManifest;
387
+ trigger: BaseModuleAspectTrigger;
388
+ db: DbClient;
389
+ runner?: typeof runAspectFanOut;
390
+ excludeHostnames?: string[];
391
+ }): Promise<AspectGlueResult> {
392
+ const { moduleId, manifest, trigger, db } = args;
393
+ const aspect = manifest.base_module_aspect;
394
+
395
+ if (!aspect) {
396
+ return { ran: false, reason: 'no_aspect' };
397
+ }
398
+
399
+ if (!aspect.triggers.includes(trigger)) {
400
+ return { ran: false, reason: 'trigger_not_declared' };
401
+ }
402
+
403
+ const moduleRow = db.select().from(modules).where(eq(modules.id, moduleId)).get();
404
+ if (!moduleRow) {
405
+ // The deploy flow just acted on this module — its absence
406
+ // would be a deeper bug. Surface as "no_approval" with a log
407
+ // because there's nothing meaningful to fan out to.
408
+ log.warn(`Aspect glue: module '${moduleId}' not found in DB, skipping fan-out`);
409
+ return { ran: false, reason: 'no_approval' };
410
+ }
411
+
412
+ const approvalStatus = checkAspectApproval(moduleId, moduleRow.version, aspect, db);
413
+ if (approvalStatus === 'no_approval') {
414
+ log.warn(
415
+ `Aspect for '${moduleId}' is declared but not approved. Re-import the module to grant consent; aspect skipped.`,
416
+ );
417
+ return { ran: false, reason: 'no_approval' };
418
+ }
419
+ if (approvalStatus === 'scope_changed') {
420
+ log.warn(
421
+ `Aspect for '${moduleId}@${moduleRow.version}' has scope changes from the prior approval. Re-import to re-approve; aspect skipped.`,
422
+ );
423
+ return { ran: false, reason: 'scope_changed' };
424
+ }
425
+
426
+ const runner = args.runner ?? runAspectFanOut;
427
+ const runResult = await runner({
428
+ moduleId,
429
+ aspect,
430
+ moduleSourcePath: moduleRow.sourcePath,
431
+ options: {
432
+ trigger,
433
+ excludeHostnames: args.excludeHostnames,
434
+ },
435
+ db,
436
+ });
437
+ return { ran: true, success: runResult.success, runResult };
438
+ }
@@ -0,0 +1,101 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
2
+ import { existsSync } from 'node:fs';
3
+ import { rm } from 'node:fs/promises';
4
+ import { type DbClient, createDbClient } from '../db/client';
5
+ import { moduleConfigs, modules } from '../db/schema';
6
+ import { resolveAspectTemplate, resolveAspectTemplateRecord } from './aspect-template-resolver';
7
+ import { upsertDeployedSystem } from './deployed-systems';
8
+
9
+ const TEST_DB_PATH = './test-aspect-resolver.db';
10
+
11
+ /**
12
+ * Regression coverage for the `$infra:<name>.<field>` selector inside
13
+ * base_module_aspect ansible_vars. This path has its own hand-rolled
14
+ * resolver (separate from variables/resolver.ts); it previously handled
15
+ * only $self:/$system:/$capability:, so knot's migrated
16
+ * `knot_server_ip: $infra:main.ipv4_address` leaked into resolv.conf
17
+ * unresolved and broke DNS on every fleet machine. v2/MODULE_SYSTEMS_ADDRESSING.md.
18
+ */
19
+ function seedModuleWithSystem(db: DbClient, moduleId: string, ip: string): void {
20
+ db.insert(modules)
21
+ .values({
22
+ id: moduleId,
23
+ name: moduleId,
24
+ version: '1.0.0',
25
+ manifestData: {},
26
+ sourcePath: `/tmp/${moduleId}`,
27
+ })
28
+ .run();
29
+ db.insert(moduleConfigs)
30
+ .values({ moduleId, key: 'hostname', value: moduleId, valueJson: JSON.stringify(moduleId) })
31
+ .run();
32
+ upsertDeployedSystem(db, moduleId, {
33
+ name: 'main',
34
+ hostname: `${moduleId}-host`,
35
+ ipv4Address: ip,
36
+ zone: 'internal',
37
+ infraType: 'machine',
38
+ });
39
+ }
40
+
41
+ describe('resolveAspectTemplate — $infra: selector', () => {
42
+ let db: DbClient;
43
+
44
+ beforeEach(() => {
45
+ db = createDbClient({ path: TEST_DB_PATH });
46
+ seedModuleWithSystem(db, 'knot-unbound-internal', '192.168.0.10');
47
+ });
48
+
49
+ afterEach(async () => {
50
+ db.$client.close();
51
+ for (const suffix of ['', '-shm', '-wal']) {
52
+ const p = `${TEST_DB_PATH}${suffix}`;
53
+ if (existsSync(p)) await rm(p);
54
+ }
55
+ });
56
+
57
+ test('resolves $infra:main.ipv4_address to the recorded system IP', async () => {
58
+ const resolved = await resolveAspectTemplate(
59
+ '$infra:main.ipv4_address',
60
+ 'knot-unbound-internal',
61
+ db,
62
+ 'base_module_aspect.ansible_vars',
63
+ );
64
+ expect(resolved).toBe('192.168.0.10');
65
+ });
66
+
67
+ test('resolves the braced form and embeds in surrounding text', async () => {
68
+ const resolved = await resolveAspectTemplate(
69
+ 'nameserver ${infra:main.ipv4_address}',
70
+ 'knot-unbound-internal',
71
+ db,
72
+ 'base_module_aspect.ansible_vars',
73
+ );
74
+ expect(resolved).toBe('nameserver 192.168.0.10');
75
+ });
76
+
77
+ test('resolveAspectTemplateRecord resolves $infra: across keys', async () => {
78
+ const resolved = await resolveAspectTemplateRecord(
79
+ { knot_server_ip: '$infra:main.ipv4_address', knot_host: '$infra:main.hostname' },
80
+ 'knot-unbound-internal',
81
+ db,
82
+ 'base_module_aspect.ansible_vars',
83
+ );
84
+ expect(resolved).toEqual({
85
+ knot_server_ip: '192.168.0.10',
86
+ knot_host: 'knot-unbound-internal-host',
87
+ });
88
+ });
89
+
90
+ test('throws on an unknown system name', async () => {
91
+ expect(
92
+ resolveAspectTemplate('$infra:replica.ipv4_address', 'knot-unbound-internal', db, 'scope'),
93
+ ).rejects.toThrow(/Cannot resolve \$infra:replica\.ipv4_address/);
94
+ });
95
+
96
+ test('throws on an unknown field', async () => {
97
+ expect(
98
+ resolveAspectTemplate('$infra:main.bogus_field', 'knot-unbound-internal', db, 'scope'),
99
+ ).rejects.toThrow(/Cannot resolve \$infra:main\.bogus_field/);
100
+ });
101
+ });
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Shared template resolution for base-module aspect declarations.
3
+ *
4
+ * Both `proxmox_reconcile.tfvars` (SC5) and `ansible_vars` (SC6)
5
+ * accept string templates with the same substitution rules celilo
6
+ * uses for manifest variables — `$self:`, `$capability:`,
7
+ * `$system:`. Resolution happens against the PROVIDING module's
8
+ * context: the values come from the module that owns the aspect,
9
+ * not the target system the aspect runs on.
10
+ *
11
+ * Factored out so the two callers stay consistent — a fix to the
12
+ * substitution rules lands here once.
13
+ */
14
+
15
+ import type { getDb } from '../db/client';
16
+ import { buildResolutionContext } from '../variables/context';
17
+
18
+ type DbClient = ReturnType<typeof getDb>;
19
+
20
+ /**
21
+ * Resolve a template string against `providerModuleId`'s context.
22
+ *
23
+ * `scopeLabel` is included in error messages so failures point at
24
+ * the right manifest block (e.g., 'proxmox_reconcile.tfvars',
25
+ * 'base_module_aspect.ansible_vars').
26
+ *
27
+ * Throws when a substitution references a value that doesn't
28
+ * exist — the caller is expected to surface this to the operator
29
+ * since it's an aspect-author bug.
30
+ */
31
+ export async function resolveAspectTemplate(
32
+ template: string,
33
+ providerModuleId: string,
34
+ db: DbClient,
35
+ scopeLabel: string,
36
+ ): Promise<string> {
37
+ const ctx = await buildResolutionContext(providerModuleId, db);
38
+
39
+ let result = template;
40
+
41
+ result = result.replace(/\$\{?system:([a-zA-Z0-9_.]+)\}?/g, (_match, key) => {
42
+ const value = ctx.systemConfig[key];
43
+ if (value === undefined) {
44
+ throw new Error(
45
+ `Cannot resolve $system:${key} in ${scopeLabel} (provider module: ${providerModuleId})`,
46
+ );
47
+ }
48
+ return value;
49
+ });
50
+
51
+ result = result.replace(/\$\{?self:([a-zA-Z0-9_]+)\}?/g, (_match, key) => {
52
+ const value = ctx.selfConfig[key];
53
+ if (value === undefined) {
54
+ throw new Error(
55
+ `Cannot resolve $self:${key} in ${scopeLabel} (provider module: ${providerModuleId})`,
56
+ );
57
+ }
58
+ return value;
59
+ });
60
+
61
+ // $infra:<system-name>.<field> — the provider's deployed system, by name
62
+ // (v2/MODULE_SYSTEMS_ADDRESSING.md). This is how an aspect references the
63
+ // provider's own host IP (e.g. knot's dns-client-config aspect sets every
64
+ // fleet machine's nameserver to $infra:main.ipv4_address). Without this the
65
+ // literal template string would leak into resolv.conf.
66
+ result = result.replace(
67
+ /\$\{?infra:([a-z0-9-]+)\.([a-zA-Z0-9_]+)\}?/g,
68
+ (_match, sysName, field) => {
69
+ const sys = ctx.systems?.[sysName] as unknown as Record<string, unknown> | undefined;
70
+ const value = sys?.[field];
71
+ if (value === undefined || value === '') {
72
+ throw new Error(
73
+ `Cannot resolve $infra:${sysName}.${field} in ${scopeLabel} (provider module: ${providerModuleId}) — no such deployed system or field`,
74
+ );
75
+ }
76
+ return String(value);
77
+ },
78
+ );
79
+
80
+ result = result.replace(
81
+ /\$capability:([a-zA-Z0-9_]+)\.([a-zA-Z0-9_.]+)/g,
82
+ (_match, capName, path) => {
83
+ const capData = ctx.capabilities[capName];
84
+ if (!capData) {
85
+ throw new Error(
86
+ `Cannot resolve $capability:${capName}.${path} — capability not registered (provider module: ${providerModuleId}, in ${scopeLabel})`,
87
+ );
88
+ }
89
+ const parts = path.split('.');
90
+ let cur: unknown = capData;
91
+ for (const p of parts) {
92
+ if (cur && typeof cur === 'object' && p in (cur as Record<string, unknown>)) {
93
+ cur = (cur as Record<string, unknown>)[p];
94
+ } else {
95
+ throw new Error(
96
+ `Cannot resolve $capability:${capName}.${path} — field missing on capability data (provider module: ${providerModuleId}, in ${scopeLabel})`,
97
+ );
98
+ }
99
+ }
100
+ return String(cur);
101
+ },
102
+ );
103
+
104
+ return result;
105
+ }
106
+
107
+ /**
108
+ * Convenience: resolve a record-of-templates, returning a record
109
+ * of resolved strings. The keys pass through unchanged.
110
+ */
111
+ export async function resolveAspectTemplateRecord(
112
+ templates: Record<string, string>,
113
+ providerModuleId: string,
114
+ db: DbClient,
115
+ scopeLabel: string,
116
+ ): Promise<Record<string, string>> {
117
+ const resolved: Record<string, string> = {};
118
+ for (const [name, template] of Object.entries(templates)) {
119
+ resolved[name] = await resolveAspectTemplate(template, providerModuleId, db, scopeLabel);
120
+ }
121
+ return resolved;
122
+ }