underpost 3.2.10 → 3.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.vscode/extensions.json +9 -9
  2. package/.vscode/settings.json +12 -1
  3. package/CHANGELOG.md +92 -1
  4. package/CLI-HELP.md +80 -26
  5. package/README.md +6 -10
  6. package/bin/build.js +9 -6
  7. package/bin/build.template.js +187 -0
  8. package/bin/deploy.js +29 -18
  9. package/conf.js +1 -4
  10. package/manifests/cronjobs/dd-cron/dd-cron-backup.yaml +1 -1
  11. package/manifests/cronjobs/dd-cron/dd-cron-dns.yaml +1 -1
  12. package/manifests/deployment/dd-default-development/deployment.yaml +2 -2
  13. package/manifests/deployment/dd-test-development/deployment.yaml +2 -2
  14. package/manifests/lxd/lxd-admin-profile.yaml +12 -3
  15. package/manifests/mongodb-4.4/headless-service.yaml +10 -0
  16. package/manifests/mongodb-4.4/kustomization.yaml +3 -1
  17. package/manifests/mongodb-4.4/mongodb-nodeport.yaml +17 -0
  18. package/manifests/mongodb-4.4/pv-pvc.yaml +10 -14
  19. package/manifests/mongodb-4.4/statefulset.yaml +79 -0
  20. package/manifests/mongodb-4.4/storage-class.yaml +9 -0
  21. package/manifests/valkey/statefulset.yaml +1 -1
  22. package/manifests/valkey/valkey-nodeport.yaml +17 -0
  23. package/package.json +3 -3
  24. package/scripts/ipxe-setup.sh +52 -49
  25. package/scripts/k3s-node-setup.sh +84 -68
  26. package/scripts/lxd-vm-setup.sh +193 -8
  27. package/scripts/maas-nat-firewalld.sh +145 -0
  28. package/src/cli/baremetal.js +115 -93
  29. package/src/cli/cluster.js +548 -221
  30. package/src/cli/deploy.js +131 -166
  31. package/src/cli/fs.js +11 -3
  32. package/src/cli/index.js +75 -17
  33. package/src/cli/lxd.js +1034 -240
  34. package/src/cli/monitor.js +9 -3
  35. package/src/cli/release.js +72 -36
  36. package/src/cli/repository.js +10 -16
  37. package/src/cli/run.js +72 -55
  38. package/src/cli/secrets.js +11 -2
  39. package/src/client/components/core/Auth.js +4 -3
  40. package/src/client/components/core/ClientEvents.js +76 -0
  41. package/src/client/components/core/EventBus.js +4 -0
  42. package/src/client/components/core/Modal.js +82 -41
  43. package/src/db/DataBaseProvider.js +9 -9
  44. package/src/db/mariadb/MariaDB.js +2 -1
  45. package/src/db/mongo/MongoBootstrap.js +592 -522
  46. package/src/db/mongo/MongooseDB.js +19 -15
  47. package/src/index.js +1 -1
  48. package/src/server/conf.js +67 -19
  49. package/src/server/proxy.js +9 -2
  50. package/src/server/start.js +8 -4
  51. package/src/server/valkey.js +2 -0
  52. package/bin/file.js +0 -220
  53. package/bin/vs.js +0 -74
  54. package/bin/zed.js +0 -84
package/src/cli/lxd.js CHANGED
@@ -3,27 +3,6 @@
3
3
  * @module src/cli/lxd.js
4
4
  * @namespace UnderpostLxd
5
5
  *
6
- * ### Proxy Device Safety
7
- *
8
- * Proxy devices (created by `--expose`) attach LXD proxy devices to VMs. If you
9
- * stop + delete a VM without removing proxy devices first, LXD may crash or
10
- * leave stale NAT rules in iptables. Both `_safeDeleteVm()` and reset() now
11
- * enumerate and remove proxy devices before stopping/deleting VMs.
12
- *
13
- * ### Idempotency
14
- *
15
- * Every destructive operation (deleteVm, reset) is safe to re-run. If a VM is
16
- * already gone, proxy device removal is silently skipped. If the LXD snap is
17
- * already removed, reset continues gracefully.
18
- *
19
- * ### Lifecycle
20
- *
21
- * - `--reset` is the only complete teardown path: cleans ALL VMs, profiles,
22
- * networks, and finally the LXD snap itself.
23
- * - `--delete-vm` is a single-VM teardown that removes proxy devices first.
24
- * - `--init-vm` handles OS + K3s setup. Engine replication is a separate step
25
- * via `--bootstrap-engine`.
26
- * - `--bootstrap-engine` replicates /home/dd/engine into the VM after init.
27
6
  */
28
7
 
29
8
  import { getNpmRootPath } from '../server/conf.js';
@@ -35,39 +14,92 @@ import Underpost from '../index.js';
35
14
 
36
15
  const logger = loggerFactory(import.meta);
37
16
 
17
+ const ENGINE_ROOT_IN_VM = '/home/dd/engine';
18
+ const ENGINE_ROOT_ON_HOST = '/home/dd/engine';
19
+ const ADMIN_PROFILE = 'admin-profile';
20
+ const BRIDGE_NETWORK = 'lxdbr0';
21
+ const BRIDGE_SUBNET_PREFIX = '10.250.250';
22
+
38
23
  class UnderpostLxd {
24
+ static _project = '';
25
+
26
+ static _lxcCmd() {
27
+ return UnderpostLxd._project ? `lxc --project ${UnderpostLxd._project}` : 'lxc';
28
+ }
29
+
39
30
  static API = {
40
31
  /**
41
32
  * @method callback
42
33
  * @description Main entry point for all LXD CLI operations.
34
+ * @param {string} [vmId=''] - Positional VM identifier for boolean
35
+ * vm lifecycle flags.
43
36
  * @param {object} options
44
37
  * @param {boolean} [options.init=false] - Initialize LXD via preseed.
45
- * @param {boolean} [options.reset=false] - Complete safe reset: cleans all VMs
46
- * (proxy devices removed first), profiles, networks, then removes LXD snap.
38
+ * @param {boolean} [options.reset=false] - Host-safe teardown of VMs, proxy
39
+ * devices, admin-profile, and lxdbr0. Does NOT touch the LXD snap or
40
+ * storage pools.
41
+ * @param {boolean} [options.purge=false] - Gracefully shut the LXD daemon
42
+ * down (60s timeout) and remove the LXD snap. Combine with `--reset` to
43
+ * wipe per-VM state first. Without `--reset`, snap removal alone wipes
44
+ * everything.
45
+ * @param {boolean} [options.shutdown=false] - Pre-host-reboot procedure:
46
+ * gracefully stop every VM and the LXD daemon. Run before `reboot` /
47
+ * `poweroff` to keep the host bootable.
48
+ * @param {boolean} [options.restore=false] - Symmetric to `--shutdown`:
49
+ * starts the LXD daemon (`snap start lxd`), waits for it to become
50
+ * responsive, then starts every VM that exists. VMs created with
51
+ * `admin-profile` have `boot.autostart=false`, so this is the explicit
52
+ * "bring the lab back online" command.
47
53
  * @param {boolean} [options.dev=false] - Use local paths instead of npm global.
48
54
  * @param {boolean} [options.install=false] - Install LXD snap.
49
- * @param {boolean} [options.createVirtualNetwork=false] - Create lxdbr0 bridge network.
50
- * @param {string} [options.ipv4Address='10.250.250.1/24'] - IPv4 address/CIDR for lxdbr0.
55
+ * @param {boolean} [options.createVirtualNetwork=false] - Create lxdbr0 as a LXD-managed
56
+ * bridge with NAT, but with DHCP/DNS off and dnsmasq neutralized (raw.dnsmasq=port=0) so
57
+ * it coexists with MAAS. The managed subnet enables static NIC IPs for `--expose` proxies.
58
+ * @param {string} [options.ipv4Address='10.250.250.1/24'] - Managed gateway address/CIDR
59
+ * for lxdbr0 (LXD assigns this to the bridge and masquerades VM egress).
51
60
  * @param {boolean} [options.createAdminProfile=false] - Create admin-profile for VMs.
52
61
  * @param {boolean} [options.control=false] - Initialize VM as K3s control plane.
53
62
  * @param {boolean} [options.worker=false] - Initialize VM as K3s worker.
54
- * @param {string} [options.initVm=''] - VM name to initialize as K3s node.
55
- * @param {string} [options.deleteVm=''] - VM name to safely stop and delete
56
- * (removes proxy devices first).
57
- * @param {string} [options.createVm=''] - VM name to create (copies command to clipboard).
58
- * @param {string} [options.infoVm=''] - VM name to inspect.
63
+ * @param {boolean} [options.vmInit=false] - Bring the VM identified by
64
+ * `vmId` up as a K3s node end-to-end.
65
+ * @param {boolean} [options.vmDelete=false] - Safely stop and delete the
66
+ * VM identified by `vmId`.
67
+ * @param {boolean} [options.vmCreate=false] - Surface the launch command
68
+ * for the VM identified by `vmId`.
69
+ * @param {boolean} [options.vmInfo=false] - Inspect the VM identified by
70
+ * `vmId`.
59
71
  * @param {string} [options.rootSize=''] - Root disk size in GiB for new VMs.
60
72
  * @param {string} [options.joinNode=''] - Join format: 'workerName,controlName'.
61
73
  * @param {string} [options.expose=''] - Expose VM ports to host: 'vmName:port1,port2'.
74
+ * @param {string} [options.nodePort=''] - Custom VM-side (connect) port for `--expose`. When set, the
75
+ * host listens on each requested port but the proxy connects to this port inside the VM (e.g. expose
76
+ * host 27017 → VM NodePort 32017). Defaults to the same port on both sides.
62
77
  * @param {string} [options.deleteExpose=''] - Remove exposed ports: 'vmName:port1,port2'.
63
- * @param {string} [options.test=''] - VM name for connectivity and health checks.
64
- * @param {string} [options.bootstrapEngine=''] - VM name to replicate /home/dd/engine into.
78
+ * @param {boolean} [options.vmTest=false] - Run connectivity and health
79
+ * checks on the VM identified by `vmId`.
80
+ * @param {boolean} [options.vmSyncEngine=false] - Re-copy the host engine
81
+ * source into the VM identified by `vmId`, overriding whatever is
82
+ * currently there. Equivalent to step 2 of `--vm-init` in isolation.
83
+ * @param {boolean} [options.copy=false] - For two-phase flows that surface a
84
+ * command for the user to execute (e.g. `--create-admin-profile` phase 1):
85
+ * when set, copy the command to the clipboard. When unset, print it to
86
+ * the terminal so the user can read it directly.
87
+ * @param {string} [options.maasProject=''] - LXD project managed by MAAS
88
+ * (e.g. 'k3s-cluster'). When set, all lxc commands target this project so
89
+ * MAAS can enumerate the VMs in its machines UI.
90
+ * @param {boolean} [options.moveToProject=false] - Stop the VM identified
91
+ * by `vmId`, move it from the default project to `maasProject`, then start
92
+ * it so MAAS picks it up. Requires `--maas-project`.
65
93
  * @memberof UnderpostLxd
66
94
  */
67
95
  async callback(
96
+ vmId = '',
68
97
  options = {
69
98
  init: false,
70
99
  reset: false,
100
+ purge: false,
101
+ shutdown: false,
102
+ restore: false,
71
103
  dev: false,
72
104
  install: false,
73
105
  createVirtualNetwork: false,
@@ -75,62 +107,72 @@ class UnderpostLxd {
75
107
  createAdminProfile: false,
76
108
  control: false,
77
109
  worker: false,
78
- initVm: '',
79
- deleteVm: '',
80
- createVm: '',
81
- infoVm: '',
110
+ vmInit: false,
111
+ vmDelete: false,
112
+ vmCreate: false,
113
+ vmInfo: false,
82
114
  rootSize: '',
83
115
  joinNode: '',
84
116
  expose: '',
117
+ nodePort: '',
85
118
  deleteExpose: '',
86
- test: '',
87
- bootstrapEngine: '',
119
+ vmTest: false,
120
+ vmSyncEngine: false,
121
+ copy: false,
122
+ maasProject: '',
123
+ moveToProject: false,
88
124
  },
89
125
  ) {
90
126
  const npmRoot = getNpmRootPath();
91
127
  const underpostRoot = options?.dev === true ? '.' : `${npmRoot}/underpost`;
128
+ const currentVmId = vmId ? String(vmId).trim() : '';
129
+ const vmCreate = options.vmCreate === true;
130
+ const vmDelete = options.vmDelete === true;
131
+ const vmInfo = options.vmInfo === true;
132
+ const vmInit = options.vmInit === true;
133
+ const vmTest = options.vmTest === true;
134
+ const vmSyncEngine = options.vmSyncEngine === true;
135
+ UnderpostLxd._project = options.maasProject ? String(options.maasProject).trim() : '';
92
136
 
93
137
  // =====================================================================
94
- // RESET: Complete, safe teardown of all LXD state
138
+ // SHUTDOWN: graceful pre-host-reboot procedure
95
139
  // =====================================================================
96
- if (options.reset === true) {
97
- logger.info('=== SAFE LXD RESET ===');
98
- logger.info('Phase 1/5: Enumerating all VMs and removing proxy devices...');
99
- const vmList = UnderpostLxd._listVms();
100
- for (const vmName of vmList) {
101
- UnderpostLxd._removeProxyDevices(vmName);
102
- }
103
-
104
- logger.info('Phase 2/5: Stopping all VMs gracefully...');
105
- for (const vmName of vmList) {
106
- logger.info(` Stopping VM: ${vmName}`);
107
- shellExec(`lxc stop ${vmName} --timeout 30 2>/dev/null || true`, { silent: true, silentOnError: true });
108
- }
109
-
110
- logger.info('Phase 3/5: Deleting all VMs...');
111
- for (const vmName of vmList) {
112
- logger.info(` Deleting VM: ${vmName}`);
113
- shellExec(`lxc delete ${vmName} --force 2>/dev/null || true`, { silent: true, silentOnError: true });
114
- }
115
-
116
- logger.info('Phase 4/5: Removing admin-profile and network...');
117
- shellExec(`lxc profile delete admin-profile 2>/dev/null || true`, { silent: true, silentOnError: true });
118
- shellExec(`lxc network delete lxdbr0 2>/dev/null || true`, { silent: true, silentOnError: true });
119
-
120
- logger.info('Phase 5/5: Stopping LXD snap daemon and purging snap...');
121
- shellExec(`sudo systemctl stop snap.lxd.daemon 2>/dev/null || true`, { silent: true, silentOnError: true });
122
- shellExec(`sudo snap remove lxd --purge 2>/dev/null || true`, { silent: true, silentOnError: true });
140
+ if (options.shutdown === true) {
141
+ UnderpostLxd._gracefulShutdownAll();
142
+ return;
143
+ }
123
144
 
124
- logger.info('=== LXD RESET COMPLETE ===');
125
- logger.info('All VMs, proxy devices, profiles, networks, and the LXD snap have been removed.');
145
+ // =====================================================================
146
+ // RESTORE: symmetric counterpart to --shutdown
147
+ // =====================================================================
148
+ if (options.restore === true) {
149
+ UnderpostLxd._restoreAll();
126
150
  return;
127
151
  }
128
152
 
129
153
  // =====================================================================
130
- // INSTALL
154
+ // RESET / PURGE: host-safe teardown variants
155
+ // --reset wipes VMs, proxy devices, admin-profile, lxdbr0
156
+ // --purge gracefully stops the daemon, then snap remove --purge
157
+ // --reset --purge both, in order
158
+ // =====================================================================
159
+ if (options.reset === true) {
160
+ UnderpostLxd._safeReset();
161
+ }
162
+ if (options.purge === true) {
163
+ UnderpostLxd._safePurge();
164
+ }
165
+ if (options.reset === true || options.purge === true) return;
166
+
167
+ // =====================================================================
168
+ // INSTALL (idempotent: skip if already installed)
131
169
  // =====================================================================
132
170
  if (options.install === true) {
133
- shellExec(`sudo snap install lxd`);
171
+ if (UnderpostLxd._snapInstalled('lxd')) {
172
+ logger.info('LXD snap is already installed; skipping.');
173
+ } else {
174
+ shellExec(`sudo snap install lxd`);
175
+ }
134
176
  }
135
177
 
136
178
  // =====================================================================
@@ -139,197 +181,336 @@ class UnderpostLxd {
139
181
  if (options.init === true) {
140
182
  shellExec(`sudo systemctl start snap.lxd.daemon`);
141
183
  shellExec(`sudo systemctl status snap.lxd.daemon`);
142
- const lxdPreseedContent = fs
143
- .readFileSync(`${underpostRoot}/manifests/lxd/lxd-preseed.yaml`, 'utf8')
144
- .replaceAll(`127.0.0.1`, Underpost.dns.getLocalIPv4Address());
145
- shellExec(`echo "${lxdPreseedContent}" | lxd init --preseed`);
146
- shellExec(`lxc cluster list`);
184
+ if (UnderpostLxd._lxdInitialized()) {
185
+ logger.info('LXD is already initialized (storage pool present); skipping preseed.');
186
+ } else {
187
+ const lxdPreseedContent = fs
188
+ .readFileSync(`${underpostRoot}/manifests/lxd/lxd-preseed.yaml`, 'utf8')
189
+ .replaceAll(`127.0.0.1`, Underpost.dns.getLocalIPv4Address());
190
+ shellExec(`echo "${lxdPreseedContent}" | lxd init --preseed`);
191
+ }
192
+ shellExec(`${UnderpostLxd._lxcCmd()} cluster list`);
147
193
  }
148
194
 
149
195
  // =====================================================================
150
196
  // CREATE VIRTUAL NETWORK
197
+ //
198
+ // LXD-managed bridge so its native features work — the host is the
199
+ // gateway (10.250.250.1), LXD masquerades VM egress (`ipv4.nat`), and the
200
+ // managed subnet lets instance NICs carry a static `ipv4.address`, which
201
+ // NAT-mode proxy devices (`--expose`) require.
202
+ //
203
+ // MAAS harmony: LXD spawns a dnsmasq for any managed subnet, and on this
204
+ // host that dnsmasq cannot bind :53/:67 (MAAS's named/dhcpd already own
205
+ // them), so a default managed bridge dies with "Address already in use".
206
+ // We neutralize dnsmasq instead of dropping the subnet:
207
+ // - ipv4.dhcp=false → no DHCP, no :67 bind (MAAS owns DHCP).
208
+ // - dns.mode=none + raw.dnsmasq=port=0 → no DNS, no :53 bind. dns.mode
209
+ // alone does not force port=0 in dnsmasq, hence the explicit raw line.
210
+ // dnsmasq then starts but opens no listening sockets — no collision.
211
+ //
212
+ // Settings are applied inline at create time so dnsmasq is neutralized on
213
+ // its first spawn (not after a racing default-config start). Idempotent:
214
+ // reconcile when the network already exists.
151
215
  // =====================================================================
152
216
  if (options.createVirtualNetwork === true) {
153
- const ipv4Address = options.ipv4Address ? options.ipv4Address : '10.250.250.1/24';
154
- shellExec(`lxc network create lxdbr0 \
155
- ipv4.address=${ipv4Address} \
156
- ipv4.nat=true \
157
- ipv4.dhcp=true \
158
- ipv6.address=none`);
217
+ const gatewayCidr = options.ipv4Address ? options.ipv4Address : '10.250.250.1/24';
218
+ // Order matters for the reconcile path: neutralize dnsmasq (no DHCP, no
219
+ // DNS listener) BEFORE assigning ipv4.address, or setting the subnet on
220
+ // an existing bridge spawns a default dnsmasq that collides with MAAS
221
+ // before raw.dnsmasq lands. On a fresh inline create this is atomic.
222
+ const bridgeSettings = {
223
+ 'ipv4.dhcp': 'false',
224
+ 'dns.mode': 'none',
225
+ 'raw.dnsmasq': 'port=0',
226
+ 'ipv6.address': 'none',
227
+ 'ipv4.address': gatewayCidr,
228
+ 'ipv4.nat': 'true',
229
+ 'ipv4.firewall': 'true',
230
+ };
231
+
232
+ if (UnderpostLxd._networkExists(BRIDGE_NETWORK)) {
233
+ logger.info(`Network '${BRIDGE_NETWORK}' already exists; reconciling managed bridge settings.`);
234
+ for (const [key, value] of Object.entries(bridgeSettings)) {
235
+ shellExec(`${UnderpostLxd._lxcCmd()} network set ${BRIDGE_NETWORK} ${key} "${value}"`);
236
+ }
237
+ } else {
238
+ const inlineConfig = Object.entries(bridgeSettings)
239
+ .map(([key, value]) => `${key}="${value}"`)
240
+ .join(' ');
241
+ shellExec(`${UnderpostLxd._lxcCmd()} network create ${BRIDGE_NETWORK} ${inlineConfig}`);
242
+ }
243
+
244
+ UnderpostLxd._ensureBridgeInTrustedZone(BRIDGE_NETWORK);
245
+ UnderpostLxd._ensureBridgeForwardingAccept(BRIDGE_NETWORK);
159
246
  }
160
247
 
161
248
  // =====================================================================
162
- // CREATE ADMIN PROFILE
249
+ // CREATE ADMIN PROFILE (two-phase to sidestep `lxc profile create` hangs)
250
+ //
251
+ // Phase 1 (profile absent): copy `lxc profile create admin-profile` to
252
+ // the clipboard and exit. The user runs it themselves in their shell.
253
+ // Phase 2 (profile present): load the YAML into the existing profile.
254
+ //
255
+ // Driven by an explicit pre-condition check; no shell command runs that
256
+ // could hang waiting on stdin/tty.
163
257
  // =====================================================================
164
258
  if (options.createAdminProfile === true) {
165
- const existingProfiles = await new Promise((resolve) => {
166
- shellExec(`lxc profile show admin-profile`, {
167
- silent: true,
168
- callback: (...args) => resolve(JSON.stringify(args)),
169
- });
170
- });
171
- if (existingProfiles.toLowerCase().match('error')) {
172
- logger.warn('Profile does not exist. Use the following command to create it:');
173
- pbcopy(`lxc profile create admin-profile`);
259
+ if (!UnderpostLxd._profileExists(ADMIN_PROFILE)) {
260
+ const createCmd = `lxc profile create ${ADMIN_PROFILE}`;
261
+ if (options.copy === true) {
262
+ logger.warn(
263
+ `Profile '${ADMIN_PROFILE}' does not exist. The create command has been copied to your clipboard — run it, then re-run this command to load the YAML.`,
264
+ );
265
+ pbcopy(createCmd);
266
+ } else {
267
+ logger.warn(
268
+ `Profile '${ADMIN_PROFILE}' does not exist. Run the command below in your shell, then re-run this command to load the YAML. (Pass --copy to put it on the clipboard instead.)`,
269
+ );
270
+ console.log(`\n ${createCmd}\n`);
271
+ }
174
272
  } else {
175
- shellExec(`cat ${underpostRoot}/manifests/lxd/lxd-admin-profile.yaml | lxc profile edit admin-profile`);
176
- shellExec(`lxc profile show admin-profile`);
273
+ shellExec(
274
+ `cat ${underpostRoot}/manifests/lxd/lxd-admin-profile.yaml | ${UnderpostLxd._lxcCmd()} profile edit ${ADMIN_PROFILE}`,
275
+ );
276
+ shellExec(`${UnderpostLxd._lxcCmd()} profile show ${ADMIN_PROFILE}`);
177
277
  }
178
278
  }
179
279
 
180
280
  // =====================================================================
181
- // DELETE VM (safe: removes proxy devices first)
281
+ // DELETE VM (idempotent via pre-condition checks; no silent errors)
182
282
  // =====================================================================
183
- if (options.deleteVm) {
184
- const vmName = options.deleteVm;
185
- UnderpostLxd._safeDeleteVm(vmName);
283
+ if (vmDelete) {
284
+ if (!currentVmId) {
285
+ throw new Error(`--vm-delete requires the [vm-id] command argument.`);
286
+ }
287
+ UnderpostLxd._safeDeleteVm(currentVmId);
186
288
  }
187
289
 
188
290
  // =====================================================================
189
- // CREATE VM (copy launch command to clipboard)
291
+ // MOVE VM TO PROJECT (stop + cross-project move + start for MAAS)
190
292
  // =====================================================================
191
- if (options.createVm) {
192
- pbcopy(
193
- `lxc launch images:rockylinux/9 ${options.createVm
194
- } --vm --target lxd-node1 -c limits.cpu=2 -c limits.memory=4GB --profile admin-profile -d root,size=${options.rootSize ? options.rootSize + 'GiB' : '32GiB'
195
- }`,
196
- );
293
+ if (options.moveToProject === true) {
294
+ if (!currentVmId) {
295
+ throw new Error(`--move-to-project requires the [vm-id] command argument.`);
296
+ }
297
+ if (!UnderpostLxd._project) {
298
+ throw new Error(`--move-to-project requires --maas-project <projectName>.`);
299
+ }
300
+ const rawState = shellExec(`lxc list ${currentVmId} --format json`, { stdout: true }).trim();
301
+ const arr = JSON.parse(rawState || '[]');
302
+ const inst = Array.isArray(arr) ? arr.find((i) => i?.name === currentVmId) : null;
303
+ if (!inst) throw new Error(`VM '${currentVmId}' not found in the default project.`);
304
+
305
+ // Ensure every profile the VM references exists in the target project.
306
+ // `lxc move` across projects fails with "Profile not found" if the
307
+ // target project does not have the same profiles as the source.
308
+ //
309
+ // Two-phase pattern (mirrors --create-admin-profile): `lxc profile create`
310
+ // can hang waiting on stdin/tty, so we NEVER run it programmatically.
311
+ // Phase 1 (profile absent in target): surface the create command and exit.
312
+ // Phase 2 (profile present in target): sync the YAML from the default project.
313
+ const vmProfiles = Array.isArray(inst.profiles) ? inst.profiles : [];
314
+ const targetProfilesRaw = shellExec(`${UnderpostLxd._lxcCmd()} profile list --format json`, {
315
+ stdout: true,
316
+ }).trim();
317
+ const targetProfiles = JSON.parse(targetProfilesRaw || '[]');
318
+ const targetProfileNames = Array.isArray(targetProfiles) ? targetProfiles.map((p) => p?.name) : [];
319
+ for (const profileName of vmProfiles) {
320
+ if (profileName === 'default') continue; // every project already has 'default'
321
+ if (!targetProfileNames.includes(profileName)) {
322
+ const createCmd = `lxc --project ${UnderpostLxd._project} profile create ${profileName}`;
323
+ if (options.copy === true) {
324
+ logger.warn(
325
+ `Profile '${profileName}' not found in project '${UnderpostLxd._project}'. The create command has been copied to your clipboard — run it, then re-run --move-to-project.`,
326
+ );
327
+ pbcopy(createCmd);
328
+ } else {
329
+ logger.warn(
330
+ `Profile '${profileName}' not found in project '${UnderpostLxd._project}'. Run the command below in your shell, then re-run --move-to-project. (Pass --copy to put it on the clipboard instead.)`,
331
+ );
332
+ console.log(`\n ${createCmd}\n`);
333
+ }
334
+ return;
335
+ }
336
+ // Phase 2: profile exists in target — sync YAML from default project.
337
+ // Explicitly use --project default on the source side so the read is
338
+ // unambiguous regardless of any active project context.
339
+ logger.info(`Syncing profile '${profileName}' YAML into project '${UnderpostLxd._project}'...`);
340
+ shellExec(
341
+ `lxc --project default profile show ${profileName} | ${UnderpostLxd._lxcCmd()} profile edit ${profileName}`,
342
+ );
343
+ logger.info(` Profile '${profileName}' synced.`);
344
+ }
345
+
346
+ if (inst.status === 'Running' || inst.status === 'Frozen') {
347
+ logger.info(`Stopping VM '${currentVmId}' before cross-project move...`);
348
+ shellExec(`lxc stop ${currentVmId} --timeout 60`);
349
+ }
350
+ logger.info(`Moving VM '${currentVmId}' to project '${UnderpostLxd._project}'...`);
351
+ shellExec(`lxc move ${currentVmId} ${currentVmId} --target-project ${UnderpostLxd._project}`);
352
+ logger.info(`VM '${currentVmId}' is now in project '${UnderpostLxd._project}'. Starting...`);
353
+ shellExec(`${UnderpostLxd._lxcCmd()} start ${currentVmId}`);
354
+ logger.info(`VM '${currentVmId}' started in project '${UnderpostLxd._project}'.`);
355
+ return;
197
356
  }
198
357
 
199
358
  // =====================================================================
200
- // INIT VM (OS setup + K3s role, no engine push)
359
+ // CREATE VM (surface the launch command for the user to run)
360
+ //
361
+ // Default: print to terminal. With `--copy`: copy to clipboard.
362
+ // Same two-phase pattern as `--create-admin-profile`: the CLI never runs
363
+ // `lxc launch` itself (it can hang on first image fetch or AppArmor
364
+ // negotiation in some snap setups), so the user always invokes it.
201
365
  // =====================================================================
202
- if (options.initVm) {
203
- const vmName = options.initVm;
204
- const lxdSetupPath = `${underpostRoot}/scripts/lxd-vm-setup.sh`;
205
- const k3sSetupPath = `${underpostRoot}/scripts/k3s-node-setup.sh`;
206
-
207
- // Step 1: OS base setup (disk, packages, kernel modules)
208
- shellExec(`cat ${lxdSetupPath} | lxc exec ${vmName} -- bash`);
209
-
210
- // Step 2: K3s role setup (installs Node, npm deps, then k3s via underpost CLI)
211
- // Engine source replication is a separate step via --bootstrap-engine.
212
- if (options.worker === true) {
213
- if (options.joinNode) {
214
- const controlNode = options.joinNode.includes(',') ? options.joinNode.split(',').pop() : options.joinNode;
215
- const k3sToken = shellExec(
216
- `lxc exec ${controlNode} -- bash -c 'sudo cat /var/lib/rancher/k3s/server/node-token'`,
217
- { stdout: true },
218
- ).trim();
219
- const controlPlaneIp = shellExec(
220
- `lxc list ${controlNode} --format json | jq -r '.[0].state.network.enp5s0.addresses[] | select(.family=="inet") | .address'`,
221
- { stdout: true },
222
- ).trim();
223
- logger.info(`Initializing worker ${vmName} and joining control plane ${controlNode} (${controlPlaneIp})`);
224
- shellExec(
225
- `cat ${k3sSetupPath} | lxc exec ${vmName} -- bash -s -- --worker --control-ip=${controlPlaneIp} --token=${k3sToken}`,
226
- );
227
- } else {
228
- shellExec(`cat ${k3sSetupPath} | lxc exec ${vmName} -- bash -s -- --worker`);
229
- }
366
+ if (vmCreate) {
367
+ if (!currentVmId) {
368
+ throw new Error(`--vm-create requires the [vm-id] command argument.`);
369
+ }
370
+ const vmName = currentVmId;
371
+ const launchCmd = `${UnderpostLxd._lxcCmd()} launch images:rockylinux/9 ${
372
+ vmName
373
+ } --vm --target lxd-node1 -c limits.cpu=2 -c limits.memory=4GB --profile ${ADMIN_PROFILE} -d root,size=${
374
+ options.rootSize ? options.rootSize + 'GiB' : '32GiB'
375
+ }`;
376
+ if (options.copy === true) {
377
+ logger.info(`Launch command for VM '${vmName}' copied to clipboard. Run it in your shell.`);
378
+ pbcopy(launchCmd);
230
379
  } else {
231
- shellExec(`cat ${k3sSetupPath} | lxc exec ${vmName} -- bash -s -- --control`);
380
+ logger.info(
381
+ `Run the launch command below in your shell to create VM '${vmName}'. (Pass --copy to put it on the clipboard instead.)`,
382
+ );
383
+ console.log(`\n ${launchCmd}\n`);
232
384
  }
233
385
  }
234
386
 
235
387
  // =====================================================================
236
- // BOOTSTRAP ENGINE: Replicate /home/dd/engine into a VM
388
+ // INIT VM (OS setup + engine bootstrap + K3s role)
237
389
  // =====================================================================
238
- if (options.bootstrapEngine) {
239
- const vmName = options.bootstrapEngine;
240
- logger.info(`Bootstrapping engine source into VM: ${vmName}...`);
390
+ if (vmInit) {
391
+ if (!currentVmId) {
392
+ throw new Error(`--vm-init requires the [vm-id] command argument.`);
393
+ }
394
+ const vmName = currentVmId;
395
+ if (!UnderpostLxd._vmExists(vmName)) {
396
+ throw new Error(`VM '${vmName}' does not exist. Create it first with 'underpost lxd ${vmName} --vm-create'.`);
397
+ }
398
+ const lxdSetupPath = `${underpostRoot}/scripts/lxd-vm-setup.sh`;
399
+ const k3sSetupPath = `${underpostRoot}/scripts/k3s-node-setup.sh`;
241
400
 
242
- const includesFile = `/tmp/lxd-push-${vmName}-${Date.now()}.txt`;
243
- const srcPath = `/home/dd/engine`;
244
- const files = await new Promise((resolve) =>
245
- walk({ path: srcPath, ignoreFiles: ['.gitignore'], includeEmpty: false, follow: false }, (_, result) =>
246
- resolve(result),
247
- ),
401
+ const fallbackIp = UnderpostLxd._allocateFallbackIp(vmName);
402
+ logger.info(`[${vmName}] Step 1/3: OS base setup (DHCP fallback IP: ${fallbackIp}/24)...`);
403
+ shellExec(
404
+ `cat ${lxdSetupPath} | ${UnderpostLxd._lxcCmd()} exec ${vmName} --env LXD_FALLBACK_IPV4_CIDR=${fallbackIp}/24 --env LXD_NODE_NAME=${vmName} -- bash`,
248
405
  );
249
- fs.writeFileSync(includesFile, files.join('\n'));
250
- shellExec(`lxc exec ${vmName} -- bash -c 'rm -rf /home/dd/engine && mkdir -p /home/dd/engine'`);
251
- shellExec(`tar -C ${srcPath} -cf - --files-from=${includesFile} | lxc exec ${vmName} -- tar -C /home/dd/engine -xf -`);
252
- fs.removeSync(includesFile);
253
-
254
- // Also push engine-private if it exists
255
- const privateSrcPath = `/home/dd/engine/engine-private`;
256
- if (fs.existsSync(privateSrcPath)) {
257
- const privateFiles = await new Promise((resolve) =>
258
- walk(
259
- {
260
- path: privateSrcPath,
261
- ignoreFiles: ['/home/dd/engine/.gitignore', '.gitignore'],
262
- includeEmpty: false,
263
- follow: false,
264
- },
265
- (_, result) => resolve(result),
266
- ),
406
+
407
+ logger.info(`[${vmName}] Step 2/3: Bootstrapping engine source into VM...`);
408
+ await UnderpostLxd._bootstrapEngineSource(vmName);
409
+
410
+ // Step 3: K3s role setup, driven by the local engine source.
411
+ logger.info(`[${vmName}] Step 3/3: K3s role setup...`);
412
+ const baseArgs = `--engine-root=${ENGINE_ROOT_IN_VM}`;
413
+ if (options.worker === true) {
414
+ if (!options.joinNode) {
415
+ throw new Error(
416
+ `--vm-init --worker requires --join-node <controlVmName>. A worker is meaningless without a control plane to join; the script would only fail after npm install completes.`,
417
+ );
418
+ }
419
+ const controlNode = options.joinNode.includes(',') ? options.joinNode.split(',').pop() : options.joinNode;
420
+ const { ip: controlPlaneIp, token: k3sToken } = UnderpostLxd._readControlPlaneJoinInfo(controlNode);
421
+ logger.info(`[${vmName}] Joining control plane ${controlNode} (${controlPlaneIp})`);
422
+ shellExec(
423
+ `cat ${k3sSetupPath} | ${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -s -- ${baseArgs} --worker --control-ip=${controlPlaneIp} --token=${k3sToken}`,
424
+ );
425
+ UnderpostLxd._labelWorkerNodeRole(controlNode, vmName);
426
+ } else {
427
+ shellExec(
428
+ `cat ${k3sSetupPath} | ${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -s -- ${baseArgs} --control`,
267
429
  );
268
- const privateIncludes = `/tmp/lxd-push-${vmName}-private-${Date.now()}.txt`;
269
- fs.writeFileSync(privateIncludes, privateFiles.join('\n'));
270
- shellExec(`lxc exec ${vmName} -- bash -c 'rm -rf /home/dd/engine/engine-private && mkdir -p /home/dd/engine/engine-private'`);
271
- shellExec(`tar -C ${privateSrcPath} -cf - --files-from=${privateIncludes} | lxc exec ${vmName} -- tar -C /home/dd/engine/engine-private -xf -`);
272
- fs.removeSync(privateIncludes);
273
430
  }
274
-
275
- logger.info(`Engine source bootstrapped into ${vmName}:/home/dd/engine`);
431
+ logger.info(`[${vmName}] Init complete. Engine mirrored at ${ENGINE_ROOT_IN_VM}.`);
276
432
  }
277
433
 
278
434
  // =====================================================================
279
435
  // STANDALONE JOIN
280
436
  // =====================================================================
281
- if (options.joinNode && !options.initVm) {
437
+ if (options.joinNode && !vmInit) {
282
438
  const [workerNode, controlNode] = options.joinNode.split(',');
283
- const k3sToken = shellExec(
284
- `lxc exec ${controlNode} -- bash -c 'sudo cat /var/lib/rancher/k3s/server/node-token'`,
285
- { stdout: true },
286
- ).trim();
287
- const controlPlaneIp = shellExec(
288
- `lxc list ${controlNode} --format json | jq -r '.[0].state.network.enp5s0.addresses[] | select(.family=="inet") | .address'`,
289
- { stdout: true },
290
- ).trim();
439
+ if (!workerNode || !controlNode) {
440
+ throw new Error(`--join-node standalone requires 'workerName,controlName' format.`);
441
+ }
442
+ if (!UnderpostLxd._vmExists(workerNode)) {
443
+ throw new Error(`Worker VM '${workerNode}' does not exist.`);
444
+ }
445
+ const { ip: controlPlaneIp, token: k3sToken } = UnderpostLxd._readControlPlaneJoinInfo(controlNode);
446
+ const k3sSetupPath = `${underpostRoot}/scripts/k3s-node-setup.sh`;
291
447
  logger.info(`Joining K3s worker ${workerNode} to control plane ${controlNode} (${controlPlaneIp})`);
292
448
  shellExec(
293
- `lxc exec ${workerNode} -- bash -c 'K3S_URL=https://${controlPlaneIp}:6443 K3S_TOKEN=${k3sToken} curl -sfL https://get.k3s.io | sh -s - agent'`,
449
+ `cat ${k3sSetupPath} | ${UnderpostLxd._lxcCmd()} exec ${workerNode} -- bash -s -- --engine-root=${ENGINE_ROOT_IN_VM} --worker --control-ip=${controlPlaneIp} --token=${k3sToken}`,
294
450
  );
451
+ UnderpostLxd._labelWorkerNodeRole(controlNode, workerNode);
295
452
  logger.info(`Worker ${workerNode} joined successfully.`);
296
453
  }
297
454
 
298
455
  // =====================================================================
299
456
  // INFO VM
300
457
  // =====================================================================
301
- if (options.infoVm) {
302
- shellExec(`lxc config show ${options.infoVm}`);
303
- shellExec(`lxc info --show-log ${options.infoVm}`);
304
- shellExec(`lxc info ${options.infoVm}`);
305
- shellExec(`lxc list ${options.infoVm}`);
458
+ if (vmInfo) {
459
+ if (!currentVmId) {
460
+ throw new Error(`--vm-info requires the [vm-id] command argument.`);
461
+ }
462
+ const vmName = currentVmId;
463
+ shellExec(`${UnderpostLxd._lxcCmd()} config show ${vmName}`);
464
+ shellExec(`${UnderpostLxd._lxcCmd()} info --show-log ${vmName}`);
465
+ shellExec(`${UnderpostLxd._lxcCmd()} info ${vmName}`);
466
+ shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName}`);
306
467
  }
307
468
 
308
469
  // =====================================================================
309
- // EXPOSE (proxy host ports to VM)
470
+ // EXPOSE (host LAN port -> VM NodePort via LXD NAT-mode proxy device)
471
+ //
472
+ // NAT-mode proxy on a VM requires the host to be the gateway (it is) and a
473
+ // static ipv4.address on the instance NIC. _ensureNicStaticIpv4 pins that
474
+ // (with security.ipv4_filtering so it's accepted on the DHCP-less bridge),
475
+ // then the proxy device forwards listen=host -> connect=VM, preserving the
476
+ // client address via NAT. When exposing known service ports (MongoDB 27017,
477
+ // Valkey 6379), also persist the host-side runtime env so `node src/server`
478
+ // on the physical host dials the LXD proxy instead of localhost defaults.
310
479
  // =====================================================================
311
480
  if (options.expose) {
312
481
  const [vmName, ports] = options.expose.split(':');
313
482
  const protocols = ['tcp'];
314
483
  const hostIp = Underpost.dns.getLocalIPv4Address();
315
- const vmIp = shellExec(
316
- `lxc list ${vmName} --format json | jq -r '.[0].state.network.enp5s0.addresses[] | select(.family=="inet") | .address'`,
317
- { stdout: true },
318
- ).trim();
484
+ const exposedHostPorts = ports
485
+ .split(',')
486
+ .map((port) => port.trim())
487
+ .filter((port) => port.length > 0);
488
+ const vmIp = UnderpostLxd._vmIpv4(vmName);
319
489
  if (!vmIp) {
320
- logger.error(`Could not get VM IP for ${vmName}. Cannot expose ports.`);
321
- return;
490
+ throw new Error(`Could not resolve VM IP for ${vmName}. Cannot expose ports.`);
322
491
  }
323
- for (const port of ports.split(',')) {
492
+ UnderpostLxd._ensureNicStaticIpv4(vmName, vmIp);
493
+ for (const port of exposedHostPorts) {
494
+ const connectPort = options.nodePort ? options.nodePort : port;
324
495
  for (const protocol of protocols) {
325
496
  const deviceName = `${vmName}-${protocol}-port-${port}`;
326
- shellExec(`lxc config device remove ${vmName} ${deviceName}`);
497
+ if (UnderpostLxd._vmHasDevice(vmName, deviceName)) {
498
+ shellExec(`${UnderpostLxd._lxcCmd()} config device remove ${vmName} ${deviceName}`);
499
+ }
327
500
  shellExec(
328
- `lxc config device add ${vmName} ${deviceName} proxy listen=${protocol}:${hostIp}:${port} connect=${protocol}:${vmIp}:${port} nat=true`,
501
+ `${UnderpostLxd._lxcCmd()} config device add ${vmName} ${deviceName} proxy listen=${protocol}:${hostIp}:${port} connect=${protocol}:${vmIp}:${connectPort} nat=true`,
329
502
  );
330
- logger.info(`Exposed ${protocol}:${hostIp}:${port} -> ${vmIp}:${port} on ${vmName}`);
503
+ logger.info(`Exposed ${protocol}:${hostIp}:${port} -> ${vmIp}:${connectPort} on ${vmName}`);
331
504
  }
332
505
  }
506
+ if (exposedHostPorts.includes('27017') || exposedHostPorts.includes('6379')) {
507
+ Underpost.cluster.syncServiceConnectionEnv({
508
+ serviceHost: hostIp,
509
+ mongodb: exposedHostPorts.includes('27017'),
510
+ valkey: exposedHostPorts.includes('6379'),
511
+ options,
512
+ });
513
+ }
333
514
  }
334
515
 
335
516
  // =====================================================================
@@ -340,99 +521,712 @@ ipv6.address=none`);
340
521
  const protocols = ['tcp'];
341
522
  for (const port of ports.split(',')) {
342
523
  for (const protocol of protocols) {
343
- shellExec(`lxc config device remove ${vmName} ${vmName}-${protocol}-port-${port}`);
524
+ const deviceName = `${vmName}-${protocol}-port-${port}`;
525
+ if (UnderpostLxd._vmHasDevice(vmName, deviceName)) {
526
+ shellExec(`${UnderpostLxd._lxcCmd()} config device remove ${vmName} ${deviceName}`);
527
+ } else {
528
+ logger.info(`Device ${deviceName} not present on ${vmName}; skipping.`);
529
+ }
344
530
  }
345
531
  }
346
532
  }
347
533
 
534
+ // =====================================================================
535
+ // SYNC ENGINE (re-copy host engine source into VM)
536
+ // =====================================================================
537
+ if (vmSyncEngine) {
538
+ if (!currentVmId) {
539
+ throw new Error(`--vm-sync-engine requires the [vm-id] command argument.`);
540
+ }
541
+ const vmName = currentVmId;
542
+ if (!UnderpostLxd._vmExists(vmName)) {
543
+ throw new Error(`VM '${vmName}' does not exist.`);
544
+ }
545
+ logger.info(`[${vmName}] Syncing engine source from host...`);
546
+ await UnderpostLxd._bootstrapEngineSource(vmName);
547
+ UnderpostLxd._execVmNodeCommand(vmName, `cd ${ENGINE_ROOT_IN_VM} && npm install`, { requireNpm: true });
548
+ logger.info(`[${vmName}] Engine source sync complete.`);
549
+ return;
550
+ }
551
+
348
552
  // =====================================================================
349
553
  // TEST (connectivity and health checks)
350
554
  // =====================================================================
351
- if (options.test) {
352
- const vmName = options.test;
353
- const vmIp = shellExec(
354
- `lxc list ${vmName} --format json | jq -r '.[0].state.network.enp5s0.addresses[] | select(.family=="inet") | .address'`,
355
- { stdout: true },
356
- ).trim();
555
+ if (vmTest) {
556
+ if (!currentVmId) {
557
+ throw new Error(`--vm-test requires the [vm-id] command argument.`);
558
+ }
559
+ const vmName = currentVmId;
560
+ const vmIp = UnderpostLxd._vmIpv4(vmName);
357
561
  logger.info(`VM ${vmName} IPv4: ${vmIp || 'none'}`);
358
562
  const httpStatus = shellExec(
359
- `lxc exec ${vmName} -- curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://google.com`,
563
+ `${UnderpostLxd._lxcCmd()} exec ${vmName} -- curl -s -o /dev/null -w "%{http_code}" --max-time 5 http://google.com`,
360
564
  { stdout: true },
361
565
  ).trim();
362
566
  logger.info(`VM ${vmName} HTTP connectivity: ${httpStatus}`);
363
567
  logger.info(`Health report for VM: ${vmName}`);
364
- shellExec(`lxc list ${vmName} --format json`);
365
- shellExec(`lxc exec ${vmName} -- bash -c 'top -bn1 | grep "Cpu(s)"'`);
366
- shellExec(`lxc exec ${vmName} -- bash -c 'free -m'`);
367
- shellExec(`lxc exec ${vmName} -- bash -c 'df -h /'`);
368
- shellExec(`lxc exec ${vmName} -- bash -c 'ip a'`);
369
- shellExec(`lxc exec ${vmName} -- bash -c 'cat /etc/resolv.conf'`);
370
- shellExec(`lxc exec ${vmName} -- bash -c 'sudo k3s kubectl get nodes'`);
568
+ shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName} --format json`);
569
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'top -bn1 | grep "Cpu(s)"'`);
570
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'free -m'`);
571
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'df -h /'`);
572
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'ip a'`);
573
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'cat /etc/resolv.conf'`);
574
+ shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'sudo k3s kubectl get nodes'`);
371
575
  }
372
576
  },
373
577
  };
374
578
 
375
579
  // =====================================================================
376
- // PRIVATE HELPERS
580
+ // PRIVATE HELPERS — lookups that legitimately tolerate "absent" return
581
+ // values do so via list-style commands that always exit 0, not by
582
+ // suppressing error signals from destructive commands.
377
583
  // =====================================================================
378
584
 
379
585
  /**
380
- * Lists all LXD VM (virtual-machine) instance names.
381
- * @returns {string[]} Array of VM names.
586
+ * Lists all LXD VM (virtual-machine) instance names. Returns [] when no VMs.
587
+ * `lxc list --format json` always exits 0; an empty cluster yields `[]`.
588
+ * @returns {string[]}
382
589
  * @private
383
590
  */
384
591
  static _listVms() {
385
592
  const raw = shellExec(
386
- `lxc list --format json | jq -r '.[] | select(.type=="virtual-machine") | .name // empty' 2>/dev/null || true`,
387
- { stdout: true, silent: true, silentOnError: true },
593
+ `${UnderpostLxd._lxcCmd()} list --format json | jq -r '.[] | select(.type=="virtual-machine") | .name // empty'`,
594
+ {
595
+ stdout: true,
596
+ },
388
597
  ).trim();
389
598
  if (!raw) return [];
390
599
  return raw.split('\n').filter((n) => n.length > 0);
391
600
  }
392
601
 
393
602
  /**
394
- * Enumerates and removes all proxy devices attached to a VM.
395
- * Proxy devices are named with the pattern <vmName>-<protocol>-port-<port>.
396
- * Fails silently if the VM or device is already gone (idempotent).
397
- * @param {string} vmName - The VM name to clean proxy devices from.
603
+ * Returns the named VM's status string (e.g. 'Running', 'Stopped', 'Frozen')
604
+ * or `null` if the VM does not exist. Never throws on absence.
605
+ * @param {string} vmName
606
+ * @returns {string|null}
607
+ * @private
608
+ */
609
+ static _vmState(vmName) {
610
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName} --format json`, { stdout: true }).trim();
611
+ if (!raw) return null;
612
+ const arr = JSON.parse(raw);
613
+ const inst = Array.isArray(arr) ? arr.find((i) => i?.name === vmName) : null;
614
+ return inst ? inst.status || 'Unknown' : null;
615
+ }
616
+
617
+ /**
618
+ * @param {string} vmName
619
+ * @returns {boolean}
620
+ * @private
621
+ */
622
+ static _vmExists(vmName) {
623
+ return UnderpostLxd._vmState(vmName) !== null;
624
+ }
625
+
626
+ /**
627
+ * Resolves the VM's primary IPv4, preferring the guest interface that owns
628
+ * the default route. This avoids selecting K3s bridge/CNI addresses like
629
+ * 10.42.0.1 after the control plane comes up.
630
+ * @param {string} vmName
631
+ * @returns {string}
632
+ * @private
633
+ */
634
+ static _vmIpv4(vmName) {
635
+ const defaultRoute = shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- ip -4 -o route show default`, {
636
+ stdout: true,
637
+ }).trim();
638
+ const defaultRouteTokens = defaultRoute ? defaultRoute.split(/\s+/) : [];
639
+ const devIndex = defaultRouteTokens.indexOf('dev');
640
+ const defaultIface = devIndex >= 0 ? defaultRouteTokens[devIndex + 1] || '' : '';
641
+
642
+ if (defaultIface) {
643
+ const defaultIfaceAddr = shellExec(
644
+ `${UnderpostLxd._lxcCmd()} exec ${vmName} -- ip -4 -o addr show dev ${defaultIface} scope global`,
645
+ {
646
+ stdout: true,
647
+ },
648
+ ).trim();
649
+ const routeScopedIp = defaultIfaceAddr.match(/\binet\s+([0-9.]+)\//)?.[1] || '';
650
+ if (routeScopedIp) return routeScopedIp;
651
+ }
652
+
653
+ return shellExec(
654
+ `${UnderpostLxd._lxcCmd()} list ${vmName} --format json | jq -r '[.[0].state.network | to_entries[] | select(.key!="lo") | .value.addresses[]? | select(.family=="inet" and .scope=="global") | .address | select(test("^10\\.42\\.|^10\\.43\\.|^169\\.254\\.") | not)] | .[0] // empty'`,
655
+ { stdout: true },
656
+ ).trim();
657
+ }
658
+
659
+ /**
660
+ * Pins the VM's lxdbr0 NIC to a static `ipv4.address` equal to `vmIp`, which
661
+ * NAT-mode proxy devices require. `security.ipv4_filtering=true` is set in the
662
+ * same call: lxdbr0 runs no DHCP, and LXD only permits a static NIC address
663
+ * on a DHCP-less managed bridge when filtering is enabled (it also anti-spoofs
664
+ * the VM to that IP). The eth0 NIC comes from admin-profile, so override it on
665
+ * first touch and set thereafter. Same IP as the current lease -> no disruption.
666
+ * @param {string} vmName
667
+ * @param {string} vmIp
668
+ * @private
669
+ */
670
+ static _ensureNicStaticIpv4(vmName, vmIp) {
671
+ const nic = 'eth0';
672
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName} --format json`, { stdout: true }).trim();
673
+ const arr = JSON.parse(raw || '[]');
674
+ const inst = Array.isArray(arr) ? arr.find((i) => i?.name === vmName) : null;
675
+ const instanceDevices = inst?.devices || {};
676
+ const currentStatic = instanceDevices[nic]?.['ipv4.address'] || '';
677
+ const hasLocalNic = !!instanceDevices[nic];
678
+ const verb = hasLocalNic ? 'set' : 'override';
679
+ if (currentStatic !== vmIp) {
680
+ shellExec(
681
+ `${UnderpostLxd._lxcCmd()} config device ${verb} ${vmName} ${nic} ipv4.address=${vmIp} security.ipv4_filtering=true`,
682
+ );
683
+ logger.info(` Pinned ${vmName} NIC ${nic} to static ${vmIp} (required for NAT proxy on VMs).`);
684
+ } else {
685
+ shellExec(`${UnderpostLxd._lxcCmd()} config device set ${vmName} ${nic} security.ipv4_filtering=true`, {
686
+ silentOnError: true,
687
+ });
688
+ logger.info(` NIC ${nic} on ${vmName} already pinned to static ${vmIp}.`);
689
+ }
690
+ }
691
+
692
+ /**
693
+ * Returns true if a named device is currently attached (expanded) to the VM.
694
+ * @param {string} vmName
695
+ * @param {string} deviceName
696
+ * @returns {boolean}
697
+ * @private
698
+ */
699
+ static _vmHasDevice(vmName, deviceName) {
700
+ if (!UnderpostLxd._vmExists(vmName)) return false;
701
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName} --format json`, { stdout: true }).trim();
702
+ const arr = JSON.parse(raw || '[]');
703
+ const inst = Array.isArray(arr) ? arr.find((i) => i?.name === vmName) : null;
704
+ if (!inst) return false;
705
+ return Object.prototype.hasOwnProperty.call(inst.expanded_devices || {}, deviceName);
706
+ }
707
+
708
+ /**
709
+ * @param {string} name
710
+ * @returns {boolean}
711
+ * @private
712
+ */
713
+ static _profileExists(name) {
714
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} profile list --format json`, { stdout: true }).trim();
715
+ const arr = JSON.parse(raw || '[]');
716
+ return Array.isArray(arr) && arr.some((p) => p?.name === name);
717
+ }
718
+
719
+ /**
720
+ * @param {string} name
721
+ * @returns {boolean}
722
+ * @private
723
+ */
724
+ static _networkExists(name) {
725
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} network list --format json`, { stdout: true }).trim();
726
+ const arr = JSON.parse(raw || '[]');
727
+ return Array.isArray(arr) && arr.some((n) => n?.name === name);
728
+ }
729
+
730
+ /**
731
+ * True once `lxd init --preseed` has bootstrapped this daemon. Detected by the
732
+ * presence of any storage pool: the preseed creates `local`, and a fresh
733
+ * daemon has none. `lxc storage list` exits 0 with `[]` before init, so this
734
+ * never throws on a not-yet-initialized host.
735
+ * @returns {boolean}
736
+ * @private
737
+ */
738
+ static _lxdInitialized() {
739
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} storage list --format json`, {
740
+ stdout: true,
741
+ silentOnError: true,
742
+ }).trim();
743
+ if (!raw) return false;
744
+ let arr;
745
+ try {
746
+ arr = JSON.parse(raw);
747
+ } catch {
748
+ return false;
749
+ }
750
+ return Array.isArray(arr) && arr.length > 0;
751
+ }
752
+
753
+ /**
754
+ * Adds the bridge to the firewalld `trusted` zone so VM<->host and VM
755
+ * outbound traffic isn't dropped by the host firewall. Idempotent and
756
+ * resilient: re-adding an already-trusted interface is a no-op, and hosts
757
+ * without firewalld are skipped rather than aborting bridge creation.
758
+ * @param {string} bridge
759
+ * @private
760
+ */
761
+ static _ensureBridgeInTrustedZone(bridge) {
762
+ const hasFirewalld = shellExec(`command -v firewall-cmd`, { stdout: true, silentOnError: true }).trim();
763
+ if (!hasFirewalld) {
764
+ logger.info(`firewall-cmd not found; skipping trusted-zone binding for ${bridge}.`);
765
+ return;
766
+ }
767
+ shellExec(`sudo firewall-cmd --permanent --zone=trusted --add-interface=${bridge}`, { silentOnError: true });
768
+ shellExec(`sudo firewall-cmd --reload`, { silentOnError: true });
769
+ }
770
+
771
+ /**
772
+ * Explicitly accepts forwarded traffic to/from the plain bridge in the
773
+ * iptables FORWARD chain. A LXD-managed bridge inserts these itself
774
+ * (`ipv4.firewall=true`); a plain bridge does not, so on a host where Docker
775
+ * has set the FORWARD policy to DROP, VM<->VM traffic on lxdbr0 (e.g. a k3s
776
+ * worker dialing the control plane API) is silently dropped once br_netfilter
777
+ * routes bridged frames through netfilter. Rules are prepended (position 1) so
778
+ * they win over Docker's DROP, and guarded by `-C` so re-runs don't duplicate.
779
+ * Not persisted across reboots/iptables flush by design — re-run
780
+ * `--create-virtual-network`, consistent with the rest of this lab flow.
781
+ * @param {string} bridge
782
+ * @private
783
+ */
784
+ static _ensureBridgeForwardingAccept(bridge) {
785
+ const hasIptables = shellExec(`command -v iptables`, { stdout: true, silentOnError: true }).trim();
786
+ if (!hasIptables) {
787
+ logger.info(`iptables not found; skipping FORWARD accept rules for ${bridge}.`);
788
+ return;
789
+ }
790
+ for (const dir of ['-i', '-o']) {
791
+ const present = shellExec(`sudo iptables -C FORWARD ${dir} ${bridge} -j ACCEPT`, { silentOnError: true });
792
+ if (present.code !== 0) {
793
+ shellExec(`sudo iptables -I FORWARD 1 ${dir} ${bridge} -j ACCEPT`);
794
+ }
795
+ }
796
+ logger.info(`Ensured FORWARD ACCEPT for ${bridge} (counters Docker/default DROP for VM<->VM traffic).`);
797
+ }
798
+
799
+ /**
800
+ * Returns true if a snap with the given name is installed. `snap list` exits
801
+ * 0 with the full installed-snap table; we grep for an exact-name row.
802
+ * @param {string} name
803
+ * @returns {boolean}
804
+ * @private
805
+ */
806
+ static _snapInstalled(name) {
807
+ const raw = shellExec(`snap list`, { stdout: true });
808
+ return raw.split('\n').some((line) => new RegExp(`^${name}\\s`).test(line));
809
+ }
810
+
811
+ /**
812
+ * Single-quotes a shell argument for safe `bash -lc '...'` usage.
813
+ * @param {string} value
814
+ * @returns {string}
815
+ * @private
816
+ */
817
+ static _shellSingleQuote(value) {
818
+ return `'${`${value}`.replace(/'/g, `'\\''`)}'`;
819
+ }
820
+
821
+ /**
822
+ * Runs a command inside a VM with Node/NPM restored from the NVM install
823
+ * that k3s-node-setup.sh lays down. Non-login `bash -c` shells do not keep
824
+ * that PATH, so resolve it explicitly here.
825
+ * @param {string} vmName
826
+ * @param {string} command
827
+ * @param {object} [options]
828
+ * @param {boolean} [options.requireNpm=false]
829
+ * @param {number} [options.timeoutSeconds=0]
830
+ * @private
831
+ */
832
+ static _execVmNodeCommand(vmName, command, options = { requireNpm: false, timeoutSeconds: 0 }) {
833
+ const { requireNpm = false, timeoutSeconds = 0 } = options;
834
+ const runtimeBootstrap = [
835
+ 'export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"',
836
+ '[ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"',
837
+ `if ! command -v node >/dev/null 2>&1${requireNpm ? ' || ! command -v npm >/dev/null 2>&1' : ''}; then latest_nvm_bin="$(ls -d "$NVM_DIR"/versions/node/*/bin 2>/dev/null | sort -V | tail -n 1)"; if [ -n "$latest_nvm_bin" ]; then export PATH="$latest_nvm_bin:$PATH"; fi; fi`,
838
+ 'command -v node >/dev/null 2>&1 || { echo "ERROR: node not found in PATH or NVM_DIR=$NVM_DIR" >&2; exit 127; }',
839
+ requireNpm
840
+ ? 'command -v npm >/dev/null 2>&1 || { echo "ERROR: npm not found in PATH or NVM_DIR=$NVM_DIR" >&2; exit 127; }'
841
+ : '',
842
+ command,
843
+ ]
844
+ .filter(Boolean)
845
+ .join(' && ');
846
+ const lxcExecCmd = `${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -lc ${UnderpostLxd._shellSingleQuote(runtimeBootstrap)}`;
847
+ return shellExec(timeoutSeconds > 0 ? `timeout ${timeoutSeconds} ${lxcExecCmd}` : lxcExecCmd);
848
+ }
849
+
850
+ /**
851
+ * Deterministic per-VM IPv4 in the lxdbr0 /24, used as the static fallback
852
+ * inside lxd-vm-setup.sh when DHCP is unavailable. The previous shared
853
+ * `10.250.250.100/24` fallback caused IP collisions across VMs and broke
854
+ * worker→control K3s joins (the worker dialed its own NIC). Offset is the
855
+ * sum of vmName char codes mod 253, +2 — avoids .0, .1 (gateway), .255.
856
+ * @param {string} vmName
857
+ * @returns {string}
858
+ * @private
859
+ */
860
+ static _allocateFallbackIp(vmName) {
861
+ let sum = 0;
862
+ for (let i = 0; i < vmName.length; i++) sum += vmName.charCodeAt(i);
863
+ return `${BRIDGE_SUBNET_PREFIX}.${(sum % 253) + 2}`;
864
+ }
865
+
866
+ /**
867
+ * Reads the K3s join info (control plane IPv4 + node token) from the control
868
+ * VM. The control must already be running — VMs created with `admin-profile`
869
+ * have `boot.autostart=false`, so after a host reboot bring it up explicitly
870
+ * ('lxc start <control>' or 'node bin lxd --restore') before joining a worker.
871
+ * Throws if either value is missing — callers depend on both.
872
+ * @param {string} controlNode
873
+ * @returns {{ip: string, token: string}}
874
+ * @private
875
+ */
876
+ static _readControlPlaneJoinInfo(controlNode) {
877
+ const state = UnderpostLxd._vmState(controlNode);
878
+ if (state === null) {
879
+ throw new Error(`Control node VM '${controlNode}' does not exist.`);
880
+ }
881
+ if (state !== 'Running') {
882
+ throw new Error(
883
+ `Control node VM '${controlNode}' is ${state}. Start it first ('lxc start ${controlNode}' or 'node bin lxd --restore'), then re-run the worker join.`,
884
+ );
885
+ }
886
+ const token = shellExec(
887
+ `${UnderpostLxd._lxcCmd()} exec ${controlNode} -- bash -c 'sudo cat /var/lib/rancher/k3s/server/node-token'`,
888
+ {
889
+ stdout: true,
890
+ },
891
+ ).trim();
892
+ const ip = UnderpostLxd._vmIpv4(controlNode);
893
+ if (!ip || !token) {
894
+ throw new Error(`Could not read join info from control node '${controlNode}' (ip='${ip}', token='${token}').`);
895
+ }
896
+ return { ip, token };
897
+ }
898
+
899
+ /**
900
+ * Applies the `node-role.kubernetes.io/worker` label to a freshly joined
901
+ * worker. A K3s agent cannot self-apply `node-role.kubernetes.io/*` labels
902
+ * (the NodeRestriction admission plugin rejects them), so the label must be
903
+ * set from the control plane after the worker registers — otherwise the node
904
+ * shows ROLES `<none>`. The K3s node name defaults to the VM hostname, which
905
+ * LXD sets to the instance name, so `workerName` is the VM name. Waits up to
906
+ * 60s for the node to appear before labeling.
907
+ * @param {string} controlNode
908
+ * @param {string} workerName
909
+ * @private
910
+ */
911
+ static _labelWorkerNodeRole(controlNode, workerName) {
912
+ logger.info(`Labeling worker '${workerName}' as node-role.kubernetes.io/worker (from control '${controlNode}')...`);
913
+ shellExec(
914
+ `${UnderpostLxd._lxcCmd()} exec ${controlNode} -- bash -c 'for i in $(seq 1 30); do if sudo k3s kubectl get node ${workerName} >/dev/null 2>&1; then sudo k3s kubectl label node ${workerName} node-role.kubernetes.io/worker=worker --overwrite && exit 0; fi; sleep 2; done; echo "WARN: worker ${workerName} did not register within 60s; role label not applied." >&2'`,
915
+ );
916
+ }
917
+
918
+ /**
919
+ * Enumerates and removes every device of `type: proxy` attached to a VM
920
+ * (the `--expose` NAT proxy devices). Naming-agnostic. Skips if the VM is
921
+ * already gone; otherwise every `lxc config device remove` propagates errors.
922
+ * @param {string} vmName
398
923
  * @private
399
924
  */
400
925
  static _removeProxyDevices(vmName) {
926
+ if (!UnderpostLxd._vmExists(vmName)) {
927
+ logger.info(` Skipping proxy cleanup: VM '${vmName}' is already gone.`);
928
+ return;
929
+ }
401
930
  logger.info(` Removing proxy devices from ${vmName}...`);
402
- const devicesRaw = shellExec(
403
- `lxc config device list ${vmName} 2>/dev/null | grep -E "^${vmName}-tcp-port-" || true`,
404
- { stdout: true, silent: true, silentOnError: true },
405
- ).trim();
406
- if (!devicesRaw) {
931
+ const raw = shellExec(`${UnderpostLxd._lxcCmd()} list ${vmName} --format json`, { stdout: true }).trim();
932
+ const arr = JSON.parse(raw || '[]');
933
+ const inst = Array.isArray(arr) ? arr.find((i) => i?.name === vmName) : null;
934
+ const expandedDevices = inst?.expanded_devices || {};
935
+ const proxyNames = Object.entries(expandedDevices)
936
+ .filter(([, dev]) => dev?.type === 'proxy')
937
+ .map(([name]) => name);
938
+ if (proxyNames.length === 0) {
407
939
  logger.info(` No proxy devices found on ${vmName}.`);
408
940
  return;
409
941
  }
410
- for (const deviceName of devicesRaw.split('\n')) {
411
- const name = deviceName.trim();
412
- if (!name) continue;
942
+ for (const name of proxyNames) {
413
943
  logger.info(` Removing device: ${name}`);
414
- shellExec(`lxc config device remove ${vmName} ${name} 2>/dev/null || true`, {
415
- silent: true,
416
- silentOnError: true,
417
- });
944
+ shellExec(`${UnderpostLxd._lxcCmd()} config device remove ${vmName} ${name}`);
418
945
  }
419
946
  }
420
947
 
421
948
  /**
422
- * Safely deletes a single VM: removes proxy devices first, then stops and deletes.
423
- * Idempotent: safe to re-run if VM is already gone.
424
- * @param {string} vmName - The VM name to delete.
949
+ * Delegates K3s teardown inside a running VM to the centralized
950
+ * `safeResetK3s` in src/cli/cluster.js via `lxc exec`. No-op when K3s or the
951
+ * engine mirror is missing. Bounded by `timeout 300`.
952
+ * @param {string} vmName
953
+ * @param {'drain'|'full'} resetMode - `drain` preserves K3s for next boot
954
+ * (`--shutdown`); `full` uninstalls (`--vm-delete` / `--reset` / `--purge`).
955
+ * @private
956
+ */
957
+ static _resetK3sInVm(vmName, resetMode) {
958
+ if (UnderpostLxd._vmState(vmName) !== 'Running') return;
959
+ const m = resetMode === 'drain' ? 'drain' : 'full';
960
+ const probe = `if test -x /usr/local/bin/k3s && test -d ${ENGINE_ROOT_IN_VM}/bin; then echo yes; else echo no; fi`;
961
+ const probeOut = shellExec(`${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c '${probe}'`, {
962
+ stdout: true,
963
+ }).trim();
964
+ if (probeOut !== 'yes') {
965
+ logger.info(` [${vmName}] No K3s+engine detected (probe=${probeOut}); skipping K3s reset.`);
966
+ return;
967
+ }
968
+ logger.info(` [${vmName}] Resetting K3s (resetMode=${m}) via 'node bin cluster --reset --k3s --reset-mode=${m}'`);
969
+ UnderpostLxd._execVmNodeCommand(
970
+ vmName,
971
+ `cd ${ENGINE_ROOT_IN_VM} && node bin cluster --dev --reset --k3s --reset-mode=${m}`,
972
+ { timeoutSeconds: 300 },
973
+ );
974
+ }
975
+
976
+ /**
977
+ * Safely deletes a single VM. Pre-conditions gate every step; absence is a
978
+ * no-op, but unexpected failures propagate.
979
+ *
980
+ * 1. If VM is absent → log and return.
981
+ * 2. Remove every proxy device (clears iptables NAT before the VM goes away).
982
+ * 3. If state is Running/Frozen → graceful stop with 30 s timeout.
983
+ * 4. Delete the VM.
984
+ *
985
+ * @param {string} vmName
425
986
  * @private
426
987
  */
427
988
  static _safeDeleteVm(vmName) {
428
- logger.info(`Safely deleting VM: ${vmName}`);
989
+ const state = UnderpostLxd._vmState(vmName);
990
+ if (state === null) {
991
+ logger.info(`VM '${vmName}' does not exist. Nothing to do.`);
992
+ return;
993
+ }
994
+ logger.info(`Deleting VM '${vmName}' (current state: ${state})...`);
429
995
  UnderpostLxd._removeProxyDevices(vmName);
430
- logger.info(` Stopping VM: ${vmName}`);
431
- shellExec(`lxc stop ${vmName} --timeout 30 2>/dev/null || true`, { silent: true, silentOnError: true });
996
+ if (state === 'Running' || state === 'Frozen') {
997
+ UnderpostLxd._resetK3sInVm(vmName, 'full');
998
+ logger.info(` Stopping VM: ${vmName}`);
999
+ shellExec(`${UnderpostLxd._lxcCmd()} stop ${vmName} --timeout 60`);
1000
+ }
432
1001
  logger.info(` Deleting VM: ${vmName}`);
433
- shellExec(`lxc delete ${vmName} --force 2>/dev/null || true`, { silent: true, silentOnError: true });
434
- logger.info(`VM ${vmName} safely deleted.`);
1002
+ shellExec(`${UnderpostLxd._lxcCmd()} delete ${vmName}`);
1003
+ logger.info(`VM ${vmName} deleted.`);
1004
+ }
1005
+
1006
+ /**
1007
+ * Host-safe reset. Wipes per-VM state and the network/profile this CLI owns.
1008
+ * Leaves the LXD snap and storage pools intact so the host stays bootable
1009
+ * even if the daemon has internal issues. Use `--purge` for snap removal.
1010
+ *
1011
+ * Phase 1: Remove proxy devices from every VM (clears iptables NAT rules).
1012
+ * Phase 2: Stop running VMs gracefully (30 s timeout each).
1013
+ * Phase 3: Delete every VM.
1014
+ * Phase 4: Drop `admin-profile` and the `lxdbr0` network if they exist.
1015
+ *
1016
+ * @private
1017
+ */
1018
+ static _safeReset() {
1019
+ logger.info('=== LXD RESET (host-safe) ===');
1020
+ const vmList = UnderpostLxd._listVms();
1021
+
1022
+ logger.info(`Phase 1/4: Removing proxy devices from ${vmList.length} VM(s)...`);
1023
+ for (const vmName of vmList) {
1024
+ UnderpostLxd._removeProxyDevices(vmName);
1025
+ }
1026
+
1027
+ logger.info('Phase 2/4: Full K3s teardown + stopping running VMs gracefully...');
1028
+ for (const vmName of vmList) {
1029
+ const state = UnderpostLxd._vmState(vmName);
1030
+ if (state === 'Running' || state === 'Frozen') {
1031
+ UnderpostLxd._resetK3sInVm(vmName, 'full');
1032
+ logger.info(` Stopping VM: ${vmName}`);
1033
+ shellExec(`${UnderpostLxd._lxcCmd()} stop ${vmName} --timeout 60`);
1034
+ } else if (state !== null) {
1035
+ logger.info(` VM ${vmName} already in state: ${state}`);
1036
+ }
1037
+ }
1038
+
1039
+ logger.info('Phase 3/4: Deleting all VMs...');
1040
+ for (const vmName of vmList) {
1041
+ if (UnderpostLxd._vmExists(vmName)) {
1042
+ logger.info(` Deleting VM: ${vmName}`);
1043
+ shellExec(`${UnderpostLxd._lxcCmd()} delete ${vmName}`);
1044
+ }
1045
+ }
1046
+
1047
+ logger.info(`Phase 4/4: Removing ${ADMIN_PROFILE} and ${BRIDGE_NETWORK} if present...`);
1048
+ if (UnderpostLxd._profileExists(ADMIN_PROFILE)) {
1049
+ shellExec(`${UnderpostLxd._lxcCmd()} profile delete ${ADMIN_PROFILE}`);
1050
+ }
1051
+ if (UnderpostLxd._networkExists(BRIDGE_NETWORK)) {
1052
+ shellExec(`${UnderpostLxd._lxcCmd()} network delete ${BRIDGE_NETWORK}`);
1053
+ }
1054
+
1055
+ logger.info('=== LXD RESET COMPLETE ===');
1056
+ logger.info('Snap and storage pools were NOT touched. Use --purge to remove the LXD snap.');
1057
+ }
1058
+
1059
+ /**
1060
+ * Removes the LXD snap. ALWAYS preceded by `lxd shutdown --timeout 60` so
1061
+ * the daemon flushes the ZFS pool cleanly. Without that flush, removing the
1062
+ * snap while VMs are running and the pool is dirty has historically left the
1063
+ * host unbootable. This is the safe variant.
1064
+ *
1065
+ * @private
1066
+ */
1067
+ static _safePurge() {
1068
+ logger.info('=== LXD PURGE (DESTRUCTIVE) ===');
1069
+ if (!UnderpostLxd._snapInstalled('lxd')) {
1070
+ logger.info('LXD snap is not installed. Nothing to purge.');
1071
+ return;
1072
+ }
1073
+ // Drain K3s inside every VM before lxd shutdown so containerd unmounts
1074
+ // cleanly and the ZFS pool isn't dirty when the daemon flushes.
1075
+ const vmList = UnderpostLxd._listVms();
1076
+ if (vmList.length > 0) {
1077
+ logger.info(`Phase 1/3: Full K3s teardown inside ${vmList.length} VM(s)...`);
1078
+ for (const vmName of vmList) UnderpostLxd._resetK3sInVm(vmName, 'full');
1079
+ } else {
1080
+ logger.info('Phase 1/3: No VMs to process.');
1081
+ }
1082
+ logger.info('Phase 2/3: Asking LXD daemon to shut down cleanly (60s timeout)...');
1083
+ // `lxd` lives at /snap/bin/lxd which is not in sudo's secure_path on most
1084
+ // distros. Forward PATH explicitly so sudo can resolve the binary.
1085
+ shellExec(`sudo env PATH="$PATH:/snap/bin" lxd shutdown --timeout 60`);
1086
+ logger.info('Phase 3/3: Removing LXD snap and ALL its data (instances, storage pools)...');
1087
+ shellExec(`sudo snap remove lxd --purge`);
1088
+ logger.info('=== LXD PURGE COMPLETE ===');
1089
+ }
1090
+
1091
+ /**
1092
+ * Pre-host-reboot procedure. Gracefully stops every running VM, then asks
1093
+ * the LXD daemon to shut down. Run this before `reboot` / `poweroff` so the
1094
+ * storage pool is clean on next boot.
1095
+ *
1096
+ * @private
1097
+ */
1098
+ static _gracefulShutdownAll() {
1099
+ logger.info('=== LXD GRACEFUL SHUTDOWN (pre-host-reboot) ===');
1100
+ const vmList = UnderpostLxd._listVms();
1101
+ for (const vmName of vmList) {
1102
+ const state = UnderpostLxd._vmState(vmName);
1103
+ if (state === 'Running' || state === 'Frozen') {
1104
+ UnderpostLxd._resetK3sInVm(vmName, 'drain');
1105
+ logger.info(` Stopping VM: ${vmName} (timeout 60s)`);
1106
+ shellExec(`${UnderpostLxd._lxcCmd()} stop ${vmName} --timeout 60`);
1107
+ } else {
1108
+ logger.info(` VM ${vmName} already in state: ${state}`);
1109
+ }
1110
+ }
1111
+ if (UnderpostLxd._snapInstalled('lxd')) {
1112
+ logger.info('Asking LXD daemon to shut down cleanly (timeout 60s)...');
1113
+ // sudo's secure_path excludes /snap/bin on most distros — forward PATH.
1114
+ shellExec(`sudo env PATH="$PATH:/snap/bin" lxd shutdown --timeout 60`);
1115
+ }
1116
+ logger.info('=== HOST IS SAFE TO REBOOT/POWEROFF ===');
1117
+ }
1118
+
1119
+ /**
1120
+ * Symmetric counterpart to `_gracefulShutdownAll`. Brings the lab back up:
1121
+ *
1122
+ * 1. Start the LXD daemon via `snap start lxd` (idempotent).
1123
+ * 2. Wait up to 30 s for `lxc info` to respond, so we don't race the
1124
+ * daemon's socket-bring-up.
1125
+ * 3. Start every VM that exists. Skips VMs that are already Running.
1126
+ *
1127
+ * VMs created with `admin-profile` have `boot.autostart=false` by design
1128
+ * (host-safety), so this command is how you explicitly bring them online.
1129
+ *
1130
+ * @private
1131
+ */
1132
+ static _restoreAll() {
1133
+ logger.info('=== LXD RESTORE (bring lab back up) ===');
1134
+ if (!UnderpostLxd._snapInstalled('lxd')) {
1135
+ throw new Error('LXD snap is not installed; nothing to restore.');
1136
+ }
1137
+ logger.info('Starting LXD daemon...');
1138
+ shellExec(`sudo snap start lxd`);
1139
+
1140
+ // Wait for the daemon's REST socket to be responsive before issuing
1141
+ // instance commands. `lxc info` (no args) is the cheapest readiness probe.
1142
+ logger.info('Waiting for LXD daemon to become responsive...');
1143
+ let ready = false;
1144
+ for (let i = 0; i < 15; i++) {
1145
+ try {
1146
+ shellExec(`lxc info`, { stdout: true });
1147
+ ready = true;
1148
+ break;
1149
+ } catch (err) {
1150
+ if (i === 0) logger.info(` (daemon not ready yet: ${err.message.split('\n')[0]})`);
1151
+ }
1152
+ shellExec(`sleep 2`);
1153
+ }
1154
+ if (!ready) {
1155
+ throw new Error('LXD daemon did not become responsive within 30s.');
1156
+ }
1157
+ logger.info('LXD daemon is responsive.');
1158
+
1159
+ const vmList = UnderpostLxd._listVms();
1160
+ logger.info(`Starting ${vmList.length} VM(s)...`);
1161
+ for (const vmName of vmList) {
1162
+ const state = UnderpostLxd._vmState(vmName);
1163
+ if (state === 'Running') {
1164
+ logger.info(` ${vmName} already running.`);
1165
+ } else {
1166
+ logger.info(` Starting VM: ${vmName} (was: ${state})`);
1167
+ shellExec(`${UnderpostLxd._lxcCmd()} start ${vmName}`);
1168
+ }
1169
+ }
1170
+ logger.info('=== LXD RESTORE COMPLETE ===');
1171
+ }
1172
+
1173
+ /**
1174
+ * Replicates `/home/dd/engine` on the host into the VM, respecting the
1175
+ * project `.gitignore`. If `engine-private/` exists on the host it is
1176
+ * pushed in a second pass (it is gitignored at the root by design).
1177
+ *
1178
+ * Idempotent: replaces only the contents of `ENGINE_ROOT_IN_VM`, not the
1179
+ * directory inode (avoids races with running watchers / shells inside the VM).
1180
+ *
1181
+ * @param {string} vmName
1182
+ * @private
1183
+ */
1184
+ static async _bootstrapEngineSource(vmName) {
1185
+ if (!UnderpostLxd._vmExists(vmName)) {
1186
+ throw new Error(`Cannot bootstrap engine into '${vmName}': VM does not exist.`);
1187
+ }
1188
+ if (!fs.existsSync(ENGINE_ROOT_ON_HOST)) {
1189
+ throw new Error(`Host engine source missing at ${ENGINE_ROOT_ON_HOST}.`);
1190
+ }
1191
+
1192
+ const includesFile = `/tmp/lxd-push-${vmName}-${Date.now()}.txt`;
1193
+ const files = await new Promise((resolve, reject) =>
1194
+ walk(
1195
+ { path: ENGINE_ROOT_ON_HOST, ignoreFiles: ['.gitignore'], includeEmpty: false, follow: false },
1196
+ (err, result) => (err ? reject(err) : resolve(result)),
1197
+ ),
1198
+ );
1199
+ fs.writeFileSync(includesFile, files.join('\n'));
1200
+
1201
+ shellExec(
1202
+ `${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'mkdir -p ${ENGINE_ROOT_IN_VM} && find ${ENGINE_ROOT_IN_VM} -mindepth 1 -delete'`,
1203
+ );
1204
+ shellExec(
1205
+ `tar -C ${ENGINE_ROOT_ON_HOST} -cf - --files-from=${includesFile} | ${UnderpostLxd._lxcCmd()} exec ${vmName} -- tar -C ${ENGINE_ROOT_IN_VM} -xf -`,
1206
+ );
1207
+ fs.removeSync(includesFile);
1208
+
1209
+ const privateSrcPath = `${ENGINE_ROOT_ON_HOST}/engine-private`;
1210
+ if (fs.existsSync(privateSrcPath)) {
1211
+ const privateFiles = await new Promise((resolve, reject) =>
1212
+ walk(
1213
+ { path: privateSrcPath, ignoreFiles: ['.gitignore'], includeEmpty: false, follow: false },
1214
+ (err, result) => (err ? reject(err) : resolve(result)),
1215
+ ),
1216
+ );
1217
+ const privateIncludes = `/tmp/lxd-push-${vmName}-private-${Date.now()}.txt`;
1218
+ fs.writeFileSync(privateIncludes, privateFiles.join('\n'));
1219
+ shellExec(
1220
+ `${UnderpostLxd._lxcCmd()} exec ${vmName} -- bash -c 'mkdir -p ${ENGINE_ROOT_IN_VM}/engine-private && find ${ENGINE_ROOT_IN_VM}/engine-private -mindepth 1 -delete'`,
1221
+ );
1222
+ shellExec(
1223
+ `tar -C ${privateSrcPath} -cf - --files-from=${privateIncludes} | ${UnderpostLxd._lxcCmd()} exec ${vmName} -- tar -C ${ENGINE_ROOT_IN_VM}/engine-private -xf -`,
1224
+ );
1225
+ fs.removeSync(privateIncludes);
1226
+ }
1227
+
1228
+ logger.info(` Engine source mirrored into ${vmName}:${ENGINE_ROOT_IN_VM}`);
435
1229
  }
436
1230
  }
437
1231
 
438
- export default UnderpostLxd;
1232
+ export default UnderpostLxd;