cyberia 3.2.12 → 3.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.github/workflows/engine-cyberia.cd.yml +1 -0
  2. package/.github/workflows/engine-cyberia.ci.yml +14 -2
  3. package/.github/workflows/ghpkg.ci.yml +1 -0
  4. package/.github/workflows/npmpkg.ci.yml +9 -5
  5. package/CHANGELOG.md +151 -1
  6. package/CLI-HELP.md +975 -1130
  7. package/bin/build.js +97 -136
  8. package/bin/build.template.js +25 -179
  9. package/bin/cyberia.js +11 -6
  10. package/bin/deploy.js +4 -1
  11. package/bin/index.js +11 -6
  12. package/conf.js +1 -0
  13. package/deployment.yaml +74 -2
  14. package/hardhat/package-lock.json +4 -4
  15. package/hardhat/package.json +1 -1
  16. package/manifests/cronjobs/dd-cron/dd-cron-backup.yaml +2 -2
  17. package/manifests/cronjobs/dd-cron/dd-cron-dns.yaml +1 -1
  18. package/manifests/deployment/dd-cyberia-development/deployment.yaml +74 -2
  19. package/manifests/deployment/dd-default-development/deployment.yaml +2 -2
  20. package/package.json +7 -7
  21. package/scripts/link-local-underpost-cli.sh +6 -0
  22. package/scripts/test-monitor.sh +250 -0
  23. package/src/api/cyberia-server-defaults/cyberia-server-defaults.js +7 -0
  24. package/src/cli/deploy.js +200 -282
  25. package/src/cli/env.js +1 -4
  26. package/src/cli/image.js +58 -4
  27. package/src/cli/index.js +47 -0
  28. package/src/cli/monitor.js +387 -6
  29. package/src/cli/release.js +26 -11
  30. package/src/cli/repository.js +101 -7
  31. package/src/cli/run.js +159 -73
  32. package/src/client/components/core/PanelForm.js +44 -44
  33. package/src/client/components/cyberia/SharedDefaultsCyberia.js +1 -1
  34. package/src/client/public/cyberia-docs/ACTION-SYSTEM.md +55 -1
  35. package/src/client/public/cyberia-docs/ARCHITECTURE.md +272 -50
  36. package/src/client/public/cyberia-docs/CYBERIA-SERVER.md +20 -11
  37. package/src/client/public/cyberia-docs/QUEST-SYSTEM.md +23 -1
  38. package/src/client/public/cyberia-docs/ROADMAP.md +1 -1
  39. package/src/client/public/cyberia-docs/WHITE-PAPER.md +1 -1
  40. package/src/db/mongo/MongooseDB.js +2 -1
  41. package/src/index.js +1 -1
  42. package/src/runtime/cyberia-client/Dockerfile +4 -22
  43. package/src/runtime/cyberia-client/Dockerfile.dev +3 -18
  44. package/src/runtime/cyberia-server/Dockerfile +3 -23
  45. package/src/runtime/cyberia-server/Dockerfile.dev +3 -27
  46. package/src/runtime/wp/Dockerfile +3 -3
  47. package/src/server/catalog-underpost.js +61 -0
  48. package/src/server/catalog.js +77 -0
  49. package/src/server/conf.js +414 -56
  50. package/src/server/ipfs-client.js +5 -3
  51. package/src/server/runtime-status.js +235 -0
  52. package/src/server/start.js +32 -11
  53. package/test/deploy-monitor.test.js +251 -0
  54. package/manifests/deployment/dd-test-development/deployment.yaml +0 -256
  55. package/manifests/deployment/dd-test-development/proxy.yaml +0 -102
package/src/cli/image.js CHANGED
@@ -122,6 +122,63 @@ class UnderpostImage {
122
122
  else if (kubeadm === true) shellExec(`sudo ctr -n k8s.io images import ${tarFile}`);
123
123
  else if (k3s === true) shellExec(`sudo k3s ctr images import ${tarFile}`);
124
124
  },
125
+ /**
126
+ * @method getCurrentLoaded
127
+ * @description Retrieves the currently loaded images in the Kubernetes cluster.
128
+ * @param {string} [node='kind-worker'] - Node name to check for loaded images.
129
+ * @param {object} options - Options for the image retrieval.
130
+ * @param {boolean} options.spec - Whether to retrieve images from the pod specifications.
131
+ * @param {string} options.namespace - Kubernetes namespace to filter pods.
132
+ * @returns {Array<object>} - Array of objects containing pod names and their corresponding images.
133
+ * @memberof UnderpostImage
134
+ */
135
+ getCurrentLoaded(node = 'kind-worker', options = { spec: false, namespace: '' }) {
136
+ if (options.spec) {
137
+ const raw = shellExec(
138
+ `kubectl get pods ${options.namespace ? `--namespace ${options.namespace}` : `--all-namespaces`} -o=jsonpath='{range .items[*]}{"\\n"}{.metadata.namespace}{"/"}{.metadata.name}{":\\t"}{range .spec.containers[*]}{.image}{", "}{end}{end}'`,
139
+ {
140
+ stdout: true,
141
+ silent: true,
142
+ },
143
+ );
144
+ return raw
145
+ .split(`\n`)
146
+ .map((lines) => ({
147
+ pod: lines.split('\t')[0].replaceAll(':', '').trim(),
148
+ image: lines.split('\t')[1] ? lines.split('\t')[1].replaceAll(',', '').trim() : null,
149
+ }))
150
+ .filter((o) => o.image);
151
+ }
152
+ const raw = shellExec(node === 'kind-worker' ? `docker exec -i ${node} crictl images` : `crictl images`, {
153
+ stdout: true,
154
+ silent: true,
155
+ });
156
+
157
+ const heads = raw
158
+ .split(`\n`)[0]
159
+ .split(' ')
160
+ .filter((_r) => _r.trim());
161
+
162
+ const pods = raw
163
+ .split(`\n`)
164
+ .filter((r) => !r.match('IMAGE'))
165
+ .map((r) => r.split(' ').filter((_r) => _r.trim()));
166
+
167
+ const result = [];
168
+
169
+ for (const row of pods) {
170
+ if (row.length === 0) continue;
171
+ const pod = {};
172
+ let index = -1;
173
+ for (const head of heads) {
174
+ if (head in pod) continue;
175
+ index++;
176
+ pod[head] = row[index];
177
+ }
178
+ result.push(pod);
179
+ }
180
+ return result;
181
+ },
125
182
  /**
126
183
  * @method list
127
184
  * @description Lists currently loaded Docker images in the specified Kubernetes cluster node.
@@ -139,10 +196,7 @@ class UnderpostImage {
139
196
  list(options = { nodeName: '', namespace: '', spec: false, log: false, k3s: false, kubeadm: false, kind: false }) {
140
197
  if ((options.kubeadm === true || options.k3s === true) && !options.nodeName)
141
198
  options.nodeName = shellExec('echo $HOSTNAME', { stdout: true, silent: true }).trim();
142
- const list = Underpost.deploy.getCurrentLoadedImages(
143
- options.nodeName ? options.nodeName : 'kind-worker',
144
- options,
145
- );
199
+ const list = Underpost.image.getCurrentLoaded(options.nodeName ? options.nodeName : 'kind-worker', options);
146
200
  if (options.log) console.table(list);
147
201
  return list;
148
202
  },
package/src/cli/index.js CHANGED
@@ -70,6 +70,10 @@ program
70
70
  '--pull-bundle',
71
71
  'Downloads the pre-built client bundle from Cloudinary via pull-bundle before starting. Use together with --skip-full-build to skip the local build entirely.',
72
72
  )
73
+ .option(
74
+ '--private-test-repo',
75
+ 'During --build, clone the private test source repo (engine-test-<id>) instead of the production engine-<id> repo.',
76
+ )
73
77
  .action(Underpost.start.callback)
74
78
  .description('Initiates application servers, build pipelines, or other defined services based on the deployment ID.');
75
79
 
@@ -124,6 +128,10 @@ program
124
128
  '--is-remote-repo <url-repo>',
125
129
  'Checks whether a remote Git repository URL is reachable. Prints true or false.',
126
130
  )
131
+ .option(
132
+ '--has-changes',
133
+ 'Prints "1" if there are staged or unstaged git changes in the repository, empty string otherwise.',
134
+ )
127
135
  .description('Manages commits to a GitHub repository, supporting various commit types and options.')
128
136
  .action(Underpost.repo.commit);
129
137
 
@@ -315,6 +323,12 @@ program
315
323
  .option('--expose', 'Exposes services matching the provided deployment ID list.')
316
324
  .option('--cert', 'Resets TLS/SSL certificate secrets for deployments.')
317
325
  .option('--cert-hosts <hosts>', 'Resets TLS/SSL certificate secrets for specified hosts.')
326
+ .option(
327
+ '--self-signed',
328
+ 'Use a pre-created self-signed TLS secret (kubernetes.io/tls) instead of cert-manager. ' +
329
+ 'The secret must already exist in the namespace with the same name as the host. ' +
330
+ 'Enables TLS in the Contour HTTPProxy virtualhost without requiring a production ClusterIssuer.',
331
+ )
318
332
  .option('--node <node>', 'Sets optional node for deployment operations.')
319
333
  .option(
320
334
  '--build-manifest',
@@ -332,6 +346,8 @@ program
332
346
  .option('--retry-count <count>', 'Sets HTTPProxy per-route retry count (e.g., 3).')
333
347
  .option('--retry-per-try-timeout <duration>', 'Sets HTTPProxy retry per-try timeout (e.g., "150ms").')
334
348
  .option('--disable-update-deployment', 'Disables updates to deployments.')
349
+ .option('--disable-runtime-probes', 'Omits the internal-status HTTP probes from generated deployment manifests.')
350
+ .option('--tcp-probes', 'Generates legacy TCP socket probes instead of HTTP internal-status probes (migration).')
335
351
  .option('--disable-update-proxy', 'Disables updates to proxies.')
336
352
  .option('--disable-deployment-proxy', 'Disables proxies of deployments.')
337
353
  .option('--disable-update-volume', 'Disables updates to volume mounts during deployment.')
@@ -351,6 +367,14 @@ program
351
367
  '--expose-port <port>',
352
368
  'Sets the local:remote port to expose when --expose is active (overrides auto-detected service port).',
353
369
  )
370
+ .option(
371
+ '--expose-local-port <port>',
372
+ 'Sets a different local port for --expose (e.g. 80) while keeping the remote service port. Useful for /etc/hosts local access without specifying a port in the browser.',
373
+ )
374
+ .option(
375
+ '--local-proxy',
376
+ 'Forward all service TCP ports locally and start the Node.js path-routing proxy. Enables full path-based routing (e.g. /wp alongside /) without needing --expose-local-port. Requires --expose.',
377
+ )
354
378
  .option('--cmd <cmd>', 'Custom initialization command for deployment (comma-separated commands).')
355
379
  .option(
356
380
  '--skip-full-build',
@@ -364,6 +388,12 @@ program
364
388
  '--image-pull-policy <policy>',
365
389
  'Override container imagePullPolicy in the generated deployment manifest (Always, IfNotPresent, Never). Defaults to Never for localhost/ images and IfNotPresent otherwise.',
366
390
  )
391
+ .option(
392
+ '--tls',
393
+ 'Enables TLS for the local proxy started by --expose --local-proxy. ' +
394
+ 'The proxy will serve HTTPS on port 443 using self-signed certificates resolved from the local SSL store. ' +
395
+ 'Use together with --expose and --local-proxy.',
396
+ )
367
397
  .description('Manages application deployments, defaulting to deploying development pods.')
368
398
  .action(Underpost.deploy.callback);
369
399
 
@@ -701,6 +731,10 @@ program
701
731
  'Explicitly download the pre-built client bundle from Cloudinary inside the container (supported by: sync, template-deploy). Use together with --skip-full-build.',
702
732
  )
703
733
  .option('--remove', 'Remove/teardown resources')
734
+ .option(
735
+ '--test',
736
+ 'Enables test/generic-purpose mode for the runner (e.g. use self-signed TLS instead of cert-manager).',
737
+ )
704
738
  .description('Runs specified scripts using various runners.')
705
739
  .action(Underpost.run.callback);
706
740
 
@@ -889,6 +923,19 @@ program
889
923
  '--dry-run',
890
924
  'For --build: previews version-bump changes (per-file substitution counts) without writing files or running downstream commands.',
891
925
  )
926
+ .option(
927
+ '--mongo-host <host>',
928
+ 'For --build: override DB_HOST in the template .env.example for the smoke test (e.g., "192.168.1.82:27017").',
929
+ )
930
+ .option('--mongo-user <user>', 'For --build: override DB_USER in the template .env.example for the smoke test.')
931
+ .option(
932
+ '--mongo-password <password>',
933
+ 'For --build: override DB_PASSWORD in the template .env.example for the smoke test.',
934
+ )
935
+ .option(
936
+ '--valkey-host <host>',
937
+ 'For --build: override VALKEY_HOST in the template .env.example for the smoke test (e.g., "192.168.1.82").',
938
+ )
892
939
  .description('Release orchestrator for building new versions and deploying releases of the Underpost CLI.')
893
940
  .action(async (version, options) => {
894
941
  if (options.build) return Underpost.release.build(version, options);
@@ -10,10 +10,19 @@ import {
10
10
  loadConfServerJson,
11
11
  loadCronDeployEnv,
12
12
  etcHostFactory,
13
+ deployRangePortFactory,
13
14
  } from '../server/conf.js';
14
15
  import { loggerFactory } from '../server/logger.js';
16
+ import { timer } from '../client/components/core/CommonJs.js';
17
+ import {
18
+ RUNTIME_STATUS,
19
+ INTERNAL_STATUS_PATH,
20
+ normalizeContainerStatus,
21
+ deployStatusPort,
22
+ } from '../server/runtime-status.js';
15
23
  import axios from 'axios';
16
24
  import fs from 'fs-extra';
25
+ import net from 'node:net';
17
26
  import { shellExec } from '../server/process.js';
18
27
  import Underpost from '../index.js';
19
28
 
@@ -93,13 +102,13 @@ class UnderpostMonitor {
93
102
  }
94
103
 
95
104
  if (options.readyDeployment) {
96
- for (const version of options.versions.split(',')) {
97
- (async () => {
98
- await Underpost.deploy.monitorReadyRunner(deployId, env, version, [], options.namespace, 'underpost');
105
+ await Promise.all(
106
+ options.versions.split(',').map(async (version) => {
107
+ await Underpost.monitor.monitorReadyRunner(deployId, env, version, [], options.namespace);
99
108
  if (options.promote)
100
109
  Underpost.deploy.switchTraffic(deployId, env, version, options.replicas, options.namespace, options);
101
- })();
102
- }
110
+ }),
111
+ );
103
112
  return;
104
113
  }
105
114
 
@@ -227,7 +236,7 @@ class UnderpostMonitor {
227
236
  monitorPodName = undefined;
228
237
  }
229
238
  const checkDeploymentReadyStatus = async () => {
230
- const { ready, notReadyPods, readyPods } = await Underpost.deploy.checkDeploymentReadyStatus(
239
+ const { ready, notReadyPods, readyPods } = await Underpost.monitor.checkDeploymentReadyStatus(
231
240
  deployId,
232
241
  env,
233
242
  traffic,
@@ -272,6 +281,378 @@ class UnderpostMonitor {
272
281
  };
273
282
  return new Promise((...args) => monitorCallBack(...args));
274
283
  },
284
+ /**
285
+ * Checks the status of a deployment.
286
+ * @param {string} deployId - Deployment ID for which the status is being checked.
287
+ * @param {string} env - Environment for which the status is being checked.
288
+ * @param {string} traffic - Current traffic status for the deployment.
289
+ * @param {Array<string>} ignoresNames - List of pod names to ignore.
290
+ * @param {string} [namespace='default'] - Kubernetes namespace for the deployment.
291
+ * @returns {object} - Object containing the status of the deployment.
292
+ * @memberof UnderpostMonitor
293
+ */
294
+ async checkDeploymentReadyStatus(deployId, env, traffic, ignoresNames = [], namespace = 'default') {
295
+ const pods = Underpost.kubectl.get(`${deployId}-${env}-${traffic}`, 'pods', namespace);
296
+ const readyPods = [];
297
+ const notReadyPods = [];
298
+
299
+ // Readiness signal: the pod's Kubernetes `Ready` condition driven by the
300
+ // container's readinessProbe (TCP socket, HTTP get, or exec). Set by kubelet
301
+ // when the probe passes. A failed or crashing runtime never becomes Ready —
302
+ // kubelet surfaces CrashLoopBackOff and this gate stays closed.
303
+ for (const pod of pods) {
304
+ const { NAME } = pod;
305
+ if (ignoresNames && ignoresNames.find((t) => NAME.trim().toLowerCase().match(t.trim().toLowerCase()))) continue;
306
+
307
+ let podJson = null;
308
+ try {
309
+ // Pod may not exist yet (between deployment apply and pod
310
+ // scheduling). silentOnError lets the monitor loop continue
311
+ // instead of aborting on the transient NotFound exit.
312
+ const raw = shellExec(`sudo kubectl get pod ${NAME} -n ${namespace} -o json`, {
313
+ silent: true,
314
+ disableLog: true,
315
+ stdout: true,
316
+ silentOnError: true,
317
+ });
318
+ podJson = raw ? JSON.parse(raw) : null;
319
+ } catch (_) {
320
+ podJson = null;
321
+ }
322
+ const conditions = podJson?.status?.conditions || [];
323
+ const readyCondition = conditions.find((c) => c.type === 'Ready');
324
+ const k8sReady = readyCondition?.status === 'True';
325
+
326
+ pod.out = JSON.stringify({ k8sReady, condition: readyCondition ?? null });
327
+
328
+ if (k8sReady) readyPods.push(pod);
329
+ else notReadyPods.push(pod);
330
+ }
331
+ const consideredCount = readyPods.length + notReadyPods.length;
332
+ return {
333
+ ready: consideredCount > 0 && notReadyPods.length === 0,
334
+ notReadyPods,
335
+ readyPods,
336
+ };
337
+ },
338
+ /**
339
+ * Resolves a free ephemeral TCP port on the loopback interface, used as the
340
+ * local end of the `kubectl port-forward` tunnel so it never collides with
341
+ * host-local services.
342
+ * @returns {Promise<number>}
343
+ * @memberof UnderpostMonitor
344
+ */
345
+ findFreePort() {
346
+ return new Promise((resolve) => {
347
+ const srv = net.createServer();
348
+ srv.once('error', () => resolve(20000 + Math.floor(Math.random() * 20000)));
349
+ srv.listen(0, '127.0.0.1', () => {
350
+ const { port } = srv.address();
351
+ srv.close(() => resolve(port));
352
+ });
353
+ });
354
+ },
355
+ /**
356
+ * Resolves the deployment's internal status port (Phase-2 transport target).
357
+ *
358
+ * Canonical value is `fromPort - 1` from the deployment router — the exact
359
+ * port `buildManifest` injects into the pod (UNDERPOST_INTERNAL_PORT) and
360
+ * uses for the probes — so the tunnel target always matches the in-pod bind.
361
+ * `UNDERPOST_INTERNAL_PORT` overrides; ambient resolution is the last resort.
362
+ *
363
+ * @param {string} deployId
364
+ * @param {string} env
365
+ * @returns {Promise<number>}
366
+ * @memberof UnderpostMonitor
367
+ */
368
+ async deployInternalPort(deployId, env) {
369
+ const override = parseInt(process.env.UNDERPOST_INTERNAL_PORT);
370
+ if (!Number.isNaN(override)) return override;
371
+ try {
372
+ const router = await Underpost.deploy.routerFactory(deployId, env);
373
+ const { fromPort } = deployRangePortFactory(router);
374
+ if (Number.isFinite(fromPort) && fromPort > 0) return fromPort - 1;
375
+ } catch (_) {
376
+ /* fall through to ambient resolution */
377
+ }
378
+ return deployStatusPort(deployId, env) ?? 3000;
379
+ },
380
+ /**
381
+ * Reads Phase-2 runtime status from a single pod using the selected transport.
382
+ *
383
+ * - `exec` (default): `kubectl exec … underpost config get container-status`
384
+ * reads the env-file value. Synchronous, no background process — required
385
+ * for custom instances (cyberia-server/client) and the safe choice for
386
+ * CI/SSH. See `Deploy custom instance to K8S.md`.
387
+ * - `http`: port-forward to the in-pod `/_internal/status` endpoint served
388
+ * by the `underpost start` launcher (dd-* runtime deploys). Opt-in.
389
+ *
390
+ * Transport failures are reported as `{ ok: false }` and must never be read
391
+ * as success — they are retried, not promoted.
392
+ *
393
+ * @param {string} podName
394
+ * @param {string} namespace
395
+ * @param {number} internalPort
396
+ * @param {('http'|'exec')} [transport='exec']
397
+ * @returns {Promise<{ok: boolean, status?: (string|null), transportError?: string}>}
398
+ * @memberof UnderpostMonitor
399
+ */
400
+ async readRuntimeStatus(podName, namespace, internalPort, transport = 'exec') {
401
+ return transport === 'exec'
402
+ ? Underpost.monitor.readRuntimeStatusViaExec(podName, namespace)
403
+ : Underpost.monitor.readRuntimeStatusViaHttp(podName, namespace, internalPort);
404
+ },
405
+ /**
406
+ * Phase-2 read over `kubectl exec` (env-file transport). Works for any pod
407
+ * whose image bakes the underpost CLI — notably custom instances that stamp
408
+ * `container-status` from `lifecycle.postStart`/`preStop` hooks.
409
+ * @param {string} podName
410
+ * @param {string} namespace
411
+ * @returns {{ok: boolean, status?: (string|null), transportError?: string}}
412
+ * @memberof UnderpostMonitor
413
+ */
414
+ readRuntimeStatusViaExec(podName, namespace) {
415
+ try {
416
+ const raw = shellExec(
417
+ `sudo kubectl exec ${podName} -n ${namespace} -- sh -c 'underpost config get container-status --plain'`,
418
+ { silent: true, disableLog: true, stdout: true, silentOnError: true },
419
+ );
420
+ const status = normalizeContainerStatus(raw ? raw.toString().trim() : '');
421
+ return status === undefined ? { ok: false, transportError: 'empty_status' } : { ok: true, status };
422
+ } catch (error) {
423
+ return { ok: false, transportError: error?.code || error?.message || 'exec_failed' };
424
+ }
425
+ },
426
+ /**
427
+ * Phase-2 read over `kubectl port-forward` + HTTP `/_internal/status`.
428
+ *
429
+ * The local side of the tunnel MUST be an ephemeral free port: pinning it to
430
+ * internalPort collides with any host-local service on that number (e.g. a
431
+ * dev runtime on the same machine as the cluster), making port-forward fail
432
+ * to bind and every read return a false transport error.
433
+ *
434
+ * @param {string} podName
435
+ * @param {string} namespace
436
+ * @param {number} internalPort
437
+ * @returns {Promise<{ok: boolean, status?: (string|null), transportError?: string}>}
438
+ * @memberof UnderpostMonitor
439
+ */
440
+ async readRuntimeStatusViaHttp(podName, namespace, internalPort) {
441
+ const override = parseInt(process.env.UNDERPOST_PF_LOCAL_PORT);
442
+ const localPort = Number.isNaN(override) ? await Underpost.monitor.findFreePort() : override;
443
+ const url = `http://127.0.0.1:${localPort}${INTERNAL_STATUS_PATH}`;
444
+ let portForward;
445
+ try {
446
+ // `exec` makes the tracked child the sudo/kubectl process (so kill
447
+ // reaches it); stdio is redirected to /dev/null so the tunnel never
448
+ // inherits — and therefore never holds open — a CI/SSH session's pipes,
449
+ // which would hang the job after a successful deploy.
450
+ portForward = shellExec(
451
+ `exec sudo kubectl port-forward pod/${podName} ${localPort}:${internalPort} -n ${namespace} </dev/null >/dev/null 2>&1`,
452
+ { async: true, silent: true, disableLog: true, silentOnError: true },
453
+ );
454
+ } catch (_) {
455
+ portForward = undefined;
456
+ }
457
+ try {
458
+ let lastError;
459
+ const attempts = parseInt(process.env.UNDERPOST_PF_ATTEMPTS) || 20;
460
+ for (let attempt = 0; attempt < attempts; attempt++) {
461
+ try {
462
+ const res = await axios.get(url, { timeout: 2500 });
463
+ const raw = res?.data?.status ?? null;
464
+ return { ok: true, status: normalizeContainerStatus(raw) ?? raw, payload: res.data };
465
+ } catch (error) {
466
+ lastError = error;
467
+ await timer(350);
468
+ }
469
+ }
470
+ return { ok: false, transportError: lastError?.code || lastError?.message || 'transport_failed' };
471
+ } finally {
472
+ if (portForward && typeof portForward.kill === 'function') {
473
+ try {
474
+ portForward.kill('SIGTERM');
475
+ } catch (_) {
476
+ /* tunnel already gone */
477
+ }
478
+ }
479
+ }
480
+ },
481
+ /**
482
+ * Monitors a deployment to terminal readiness using a deterministic
483
+ * two-phase state machine.
484
+ *
485
+ * Phase 1 (Kubernetes): pod `Ready` condition via `checkDeploymentReadyStatus`.
486
+ * Phase 2 (Runtime): `container-status`, read via the selected transport.
487
+ *
488
+ * Two deployment shapes are supported via `options`:
489
+ * - `runtime` gate (default, dd-* deploys): the `underpost start` launcher
490
+ * stamps `running-deployment`. Success requires K8S Ready AND every pod
491
+ * reporting `running-deployment`.
492
+ * - `kubernetes` gate (custom instances, e.g. cyberia): the runtime is a
493
+ * bare binary; K8S `readinessProbe` (TCP) IS the running signal and
494
+ * `container-status` is stamped to `initializing`/`stopping` by lifecycle
495
+ * hooks. Success requires K8S Ready; the status read is used only for
496
+ * fast `error` detection and display.
497
+ *
498
+ * Phase-2 transport defaults to `exec` (`kubectl exec`, no background
499
+ * process). The `http` transport (`kubectl port-forward` → `/_internal/status`)
500
+ * is opt-in via `options.statusTransport='http'` or
501
+ * `UNDERPOST_STATUS_TRANSPORT=http`; it must not be used in CI/SSH sessions
502
+ * where a stray tunnel can hang the job.
503
+ *
504
+ * Contract (both shapes):
505
+ * - Runtime readiness is never declared before Kubernetes readiness.
506
+ * - An explicit runtime `error` (or a fatal pod status) transitions
507
+ * immediately to `failed` (throw → CD exit 1).
508
+ * - Transport failures never count as success and never advance state.
509
+ * - `timeout` is a distinct terminal state from `failed`.
510
+ * - Every transition emits a structured, secret-free event.
511
+ *
512
+ * @param {string} deployId - Deployment ID for which the ready status is being monitored.
513
+ * @param {string} env - Environment for which the ready status is being monitored.
514
+ * @param {string} targetTraffic - Target traffic status for the deployment.
515
+ * @param {Array<string>} ignorePods - List of pod names to ignore.
516
+ * @param {string} [namespace='default'] - Kubernetes namespace for the deployment.
517
+ * @param {object} [options] - Monitoring shape.
518
+ * @param {('runtime'|'kubernetes')} [options.readyGate='runtime'] - Running-signal owner.
519
+ * @param {('http'|'exec')} [options.statusTransport='http'] - Phase-2 read transport.
520
+ * @returns {object} - Object containing the ready status of the deployment.
521
+ * @memberof UnderpostMonitor
522
+ */
523
+ async monitorReadyRunner(deployId, env, targetTraffic, ignorePods = [], namespace = 'default', options = {}) {
524
+ const delayMs = parseInt(process.env.UNDERPOST_MONITOR_DELAY_MS) || 1000;
525
+ const maxIterations = parseInt(process.env.UNDERPOST_MONITOR_MAX_ITERATIONS) || 3000;
526
+ const deploymentId = `${deployId}-${env}-${targetTraffic}`;
527
+ const tag = `[${deploymentId}]`;
528
+ const expectedStatus = RUNTIME_STATUS.RUNNING;
529
+ const readyGate = options.readyGate === 'kubernetes' ? 'kubernetes' : 'runtime';
530
+ // Default to `exec`: a single synchronous `kubectl exec` read leaves no
531
+ // background process behind. The `http` transport spawns `kubectl
532
+ // port-forward` children that, if orphaned, inherit a CI/SSH session's
533
+ // stdio and hang the job after a successful deploy — opt in explicitly.
534
+ const statusTransport =
535
+ (options.statusTransport || process.env.UNDERPOST_STATUS_TRANSPORT) === 'http' ? 'http' : 'exec';
536
+ const internalPort =
537
+ statusTransport === 'http' ? await Underpost.monitor.deployInternalPort(deployId, env) : null;
538
+ const podErrorStates = ['error', 'crashloopbackoff', 'oomkilled', 'imagepullbackoff', 'errimagepull'];
539
+
540
+ const emit = (state, status) =>
541
+ logger.info('deploy-monitor', {
542
+ deployId: deploymentId,
543
+ phase: state.startsWith('runtime') ? 'runtime' : 'kubernetes',
544
+ state,
545
+ status: status ?? null,
546
+ timestamp: new Date().toISOString(),
547
+ });
548
+
549
+ logger.info('Deployment init', {
550
+ deployId,
551
+ env,
552
+ targetTraffic,
553
+ namespace,
554
+ internalPort,
555
+ readyGate,
556
+ statusTransport,
557
+ });
558
+ emit('pending');
559
+
560
+ const runtimeStatusCache = new Map();
561
+ const advancedPods = new Set();
562
+
563
+ for (let i = 0; i < maxIterations; i++) {
564
+ const result = await Underpost.monitor.checkDeploymentReadyStatus(
565
+ deployId,
566
+ env,
567
+ targetTraffic,
568
+ ignorePods,
569
+ namespace,
570
+ );
571
+ const allPods = [...result.readyPods, ...result.notReadyPods];
572
+
573
+ if (allPods.length === 0) {
574
+ emit('pending');
575
+ await timer(delayMs);
576
+ continue;
577
+ }
578
+ emit('pod_scheduled');
579
+
580
+ // Phase 1 fatal: a Kubernetes-level pod failure is terminal (failed,
581
+ // not timeout) — fail the CD runner immediately instead of waiting out
582
+ // the full window.
583
+ for (const pod of allPods) {
584
+ const podStatus = (pod.STATUS || '').toLowerCase().trim();
585
+ if (podErrorStates.find((s) => podStatus.includes(s)))
586
+ throw new Error(`Pod ${pod.NAME} has error pod status: ${pod.STATUS}`);
587
+ }
588
+
589
+ const allPodsK8sReady = result.notReadyPods.length === 0;
590
+ if (allPodsK8sReady) emit('pod_ready');
591
+
592
+ // Phase 2: runtime status via the selected transport. Transport failures
593
+ // neither advance state nor count as success; explicit `error` is terminal.
594
+ let allRuntimeRead = true;
595
+ for (const pod of allPods) {
596
+ if (!pod?.NAME) continue;
597
+ const read = await Underpost.monitor.readRuntimeStatus(pod.NAME, namespace, internalPort, statusTransport);
598
+ if (!read.ok) {
599
+ allRuntimeRead = false;
600
+ emit('runtime_booting', `transport:${read.transportError}`);
601
+ continue;
602
+ }
603
+ const status = read.status;
604
+ if (status === RUNTIME_STATUS.ERROR) throw new Error(`Pod ${pod.NAME} reported runtime status=error`);
605
+ // Regression (advanced → empty/build) means a pod restarted. Under the
606
+ // kubernetes gate the runtime never advances past `initializing`, so
607
+ // only treat a drop to empty/build as a regression there.
608
+ if (advancedPods.has(pod.NAME) && (!status || status === RUNTIME_STATUS.BUILD))
609
+ throw new Error(`Pod ${pod.NAME} runtime status regressed (${status ?? 'empty'}) — pod likely restarted`);
610
+ if (status && status !== RUNTIME_STATUS.BUILD) advancedPods.add(pod.NAME);
611
+ runtimeStatusCache.set(pod.NAME, status);
612
+ emit('runtime_booting', status);
613
+ }
614
+
615
+ // Under the kubernetes gate the readinessProbe is the running signal, so
616
+ // K8S Ready alone confirms Phase 2; the status read above is kept only
617
+ // for `error` fast-fail and display.
618
+ const allRuntimeReady =
619
+ readyGate === 'kubernetes'
620
+ ? true
621
+ : allRuntimeRead && allPods.every((pod) => runtimeStatusCache.get(pod.NAME) === expectedStatus);
622
+
623
+ for (const pod of allPods) {
624
+ const status = runtimeStatusCache.get(pod.NAME) || 'waiting for status';
625
+ const podStatus = pod.STATUS || 'Unknown';
626
+ const statusDisplay = status === expectedStatus ? status : `${status} (pending)`;
627
+ console.log(
628
+ 'Target pod:',
629
+ pod.NAME[pod.NAME.includes('green') ? 'bgGreen' : 'bgBlue'].bold.black,
630
+ '| Pod status:',
631
+ podStatus.bold.yellow,
632
+ '| Runtime status:',
633
+ statusDisplay.bold.cyan,
634
+ );
635
+ }
636
+
637
+ // Terminal success requires both phases. runtime_ready cannot precede
638
+ // Kubernetes readiness.
639
+ if (allPodsK8sReady && allRuntimeReady) {
640
+ const readySignal = readyGate === 'kubernetes' ? 'K8S readinessProbe' : `runtime ${expectedStatus}`;
641
+ emit('runtime_ready', readyGate === 'kubernetes' ? 'k8s-ready' : expectedStatus);
642
+ logger.info(`${tag} | Deployment ready (K8S Ready + ${readySignal})`);
643
+ return result;
644
+ }
645
+
646
+ await timer(delayMs);
647
+ if ((i + 1) % 10 === 0) logger.info(`${tag} | In progress... iteration ${i + 1}`);
648
+ }
649
+
650
+ emit('timeout');
651
+ logger.error(`${tag} | Deployment timeout after ${maxIterations} iterations`);
652
+ throw new Error(
653
+ `monitorReadyRunner timeout: ${deploymentId} did not become Ready within ${maxIterations}*${delayMs}ms`,
654
+ );
655
+ },
275
656
  };
276
657
  }
277
658