@oneuptime/common 10.0.68 → 10.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/Models/DatabaseModels/KubernetesResource.ts +19 -0
  2. package/Server/API/KubernetesResourceAPI.ts +2 -0
  3. package/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.ts +14 -0
  4. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +2 -0
  5. package/Server/Services/DatabaseService.ts +36 -4
  6. package/Server/Services/KubernetesResourceService.ts +300 -8
  7. package/Server/Utils/VM/VMRunner.ts +39 -22
  8. package/Types/IsolatedVM/ReturnResult.ts +6 -0
  9. package/Types/Kubernetes/KubernetesInventoryExtractor.ts +15 -1
  10. package/build/dist/Models/DatabaseModels/KubernetesResource.js +20 -0
  11. package/build/dist/Models/DatabaseModels/KubernetesResource.js.map +1 -1
  12. package/build/dist/Server/API/KubernetesResourceAPI.js +2 -0
  13. package/build/dist/Server/API/KubernetesResourceAPI.js.map +1 -1
  14. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.js +12 -0
  15. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.js.map +1 -0
  16. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +2 -0
  17. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  18. package/build/dist/Server/Services/DatabaseService.js +15 -4
  19. package/build/dist/Server/Services/DatabaseService.js.map +1 -1
  20. package/build/dist/Server/Services/KubernetesResourceService.js +202 -8
  21. package/build/dist/Server/Services/KubernetesResourceService.js.map +1 -1
  22. package/build/dist/Server/Utils/VM/VMRunner.js +33 -19
  23. package/build/dist/Server/Utils/VM/VMRunner.js.map +1 -1
  24. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js +7 -1
  25. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js.map +1 -1
  26. package/package.json +1 -1
@@ -405,6 +405,25 @@ export default class KubernetesResource extends BaseModel {
405
405
  })
406
406
  public spec?: JSONObject = undefined;
407
407
 
408
+ @ColumnAccessControl({
409
+ create: [],
410
+ read: READ_PERMISSIONS,
411
+ update: [],
412
+ })
413
+ @TableColumn({
414
+ required: false,
415
+ type: TableColumnType.Number,
416
+ canReadOnRelationQuery: true,
417
+ title: "Container Count",
418
+ description:
419
+ "For Pods: count of entries in spec.containers, cached so the overview page can SUM it without scanning JSONB. Null for non-Pod kinds.",
420
+ })
421
+ @Column({
422
+ nullable: true,
423
+ type: ColumnType.Number,
424
+ })
425
+ public containerCount?: number = undefined;
426
+
408
427
  @ColumnAccessControl({
409
428
  create: [],
410
429
  read: READ_PERMISSIONS,
@@ -122,6 +122,8 @@ export default class KubernetesResourceAPI extends BaseAPI<
122
122
  hpaCount: summary.countsByKind["HorizontalPodAutoscaler"] || 0,
123
123
  vpaCount: summary.countsByKind["VerticalPodAutoscaler"] || 0,
124
124
  containerCount: summary.containerCount,
125
+ degradedPods: summary.degradedPods as unknown as JSONObject,
126
+ degradedNodes: summary.degradedNodes as unknown as JSONObject,
125
127
  };
126
128
 
127
129
  return Response.sendJsonObjectResponse(req, res, responseBody);
@@ -0,0 +1,14 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ export class MigrationName1776865086264 implements MigrationInterface {
4
+ name = 'MigrationName1776865086264'
5
+
6
+ public async up(queryRunner: QueryRunner): Promise<void> {
7
+ await queryRunner.query(`ALTER TABLE "KubernetesResource" ADD "containerCount" integer`);
8
+ }
9
+
10
+ public async down(queryRunner: QueryRunner): Promise<void> {
11
+ await queryRunner.query(`ALTER TABLE "KubernetesResource" DROP COLUMN "containerCount"`);
12
+ }
13
+
14
+ }
@@ -287,6 +287,7 @@ import { MigrationName1776541018853 } from "./1776541018853-MigrationName";
287
287
  import { MigrationName1776544084793 } from "./1776544084793-MigrationName";
288
288
  import { MigrationName1776761171349 } from "./1776761171349-MigrationName";
289
289
  import { MigrationName1776801030808 } from "./1776801030808-MigrationName";
290
+ import { MigrationName1776865086264 } from "./1776865086264-MigrationName";
290
291
  export default [
291
292
  InitialMigration,
292
293
  MigrationName1717678334852,
@@ -577,4 +578,5 @@ export default [
577
578
  MigrationName1776544084793,
578
579
  MigrationName1776761171349,
579
580
  MigrationName1776801030808,
581
+ MigrationName1776865086264,
580
582
  ];
@@ -31,7 +31,6 @@ import UpdateByIDAndFetch from "../Types/Database/UpdateByIDAndFetch";
31
31
  import UpdateOneBy from "../Types/Database/UpdateOneBy";
32
32
  import Encryption from "../Utils/Encryption";
33
33
  import logger, { LogAttributes } from "../Utils/Logger";
34
- import AuditLogService from "./AuditLogService";
35
34
  import BaseService from "./BaseService";
36
35
  import BaseModel from "../../Models/DatabaseModels/DatabaseBaseModel/DatabaseBaseModel";
37
36
  import { WorkflowRoute } from "../../ServiceRoute";
@@ -777,7 +776,21 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
777
776
  !createBy.props.ignoreHooks &&
778
777
  this.getModel().enableAuditLogOn?.create
779
778
  ) {
780
- await AuditLogService.recordCreate({
779
+ /*
780
+ * Lazy require to avoid circular dependency between DatabaseService and
781
+ * AuditLogService (which depends on ProjectService/UserService, both of
782
+ * which extend DatabaseService). A top-level import leaves
783
+ * DatabaseService undefined at class-extension time for subclasses.
784
+ */
785
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
786
+ const auditLogService: {
787
+ recordCreate: (data: {
788
+ model: TBaseModel;
789
+ createdItem: TBaseModel;
790
+ props: DatabaseCommonInteractionProps;
791
+ }) => Promise<void>;
792
+ } = require("./AuditLogService").default;
793
+ await auditLogService.recordCreate({
781
794
  model: this.getModel(),
782
795
  createdItem: createBy.data,
783
796
  props: createBy.props,
@@ -1224,9 +1237,18 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
1224
1237
  }
1225
1238
 
1226
1239
  if (this.getModel().enableAuditLogOn?.delete && items.length > 0) {
1240
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
1241
+ const auditLogService: {
1242
+ recordDelete: (args: {
1243
+ model: TBaseModel;
1244
+ deletedItem: TBaseModel;
1245
+ itemId: ObjectID;
1246
+ props: DatabaseCommonInteractionProps;
1247
+ }) => Promise<void>;
1248
+ } = require("./AuditLogService").default;
1227
1249
  for (const item of items) {
1228
1250
  if (item.id) {
1229
- await AuditLogService.recordDelete({
1251
+ await auditLogService.recordDelete({
1230
1252
  model: this.getModel(),
1231
1253
  deletedItem: item,
1232
1254
  itemId: item.id,
@@ -1645,7 +1667,17 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
1645
1667
  !this.hasSameValues({ item, updatedItem }) &&
1646
1668
  item.id
1647
1669
  ) {
1648
- await AuditLogService.recordUpdate({
1670
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
1671
+ const auditLogService: {
1672
+ recordUpdate: (args: {
1673
+ model: TBaseModel;
1674
+ before: TBaseModel;
1675
+ updatedFields: JSONObject;
1676
+ itemId: ObjectID;
1677
+ props: DatabaseCommonInteractionProps;
1678
+ }) => Promise<void>;
1679
+ } = require("./AuditLogService").default;
1680
+ await auditLogService.recordUpdate({
1649
1681
  model: this.getModel(),
1650
1682
  before: item,
1651
1683
  updatedFields: data as JSONObject,
@@ -22,10 +22,28 @@ import logger from "../Utils/Logger";
22
22
 
23
23
  export type { ParsedKubernetesResource };
24
24
 
25
+ export interface DegradedPod {
26
+ name: string;
27
+ namespace: string;
28
+ phase: string;
29
+ reason: string;
30
+ message: string;
31
+ }
32
+
33
+ export interface DegradedNode {
34
+ name: string;
35
+ isReady: boolean;
36
+ hasMemoryPressure: boolean;
37
+ hasDiskPressure: boolean;
38
+ hasPidPressure: boolean;
39
+ reason: string;
40
+ message: string;
41
+ }
42
+
25
43
  export interface InventorySummary {
26
44
  countsByKind: Record<string, number>;
27
45
  /*
28
- * Sum of `jsonb_array_length(spec->'containers')` across all pods in
46
+ * Sum of the denormalized containerCount column across all pods in
29
47
  * the cluster. Containers aren't a top-level kind in the inventory,
30
48
  * so we derive the total server-side so the sidebar badge and the
31
49
  * Containers page agree.
@@ -47,6 +65,201 @@ export interface InventorySummary {
47
65
  diskPressure: number;
48
66
  pidPressure: number;
49
67
  };
68
+ /*
69
+ * Top offenders that explain a Degraded/Unhealthy cluster state. Capped
70
+ * so a pathological cluster can't blow up the overview payload; the
71
+ * dedicated Pods/Nodes pages are the source of truth for the full list.
72
+ */
73
+ degradedPods: Array<DegradedPod>;
74
+ degradedNodes: Array<DegradedNode>;
75
+ }
76
+
77
+ const DEGRADED_SAMPLE_LIMIT: number = 20;
78
+
79
+ /*
80
+ * Pull the first meaningful reason/message off a pod's status block.
81
+ * KubernetesInventoryExtractor stores containerStatuses as an array of
82
+ * { name, ready, state: "running"|"waiting"|"terminated", reason, message, ... }.
83
+ * A waiting container with a reason (ImagePullBackOff, CrashLoopBackOff,
84
+ * CreateContainerConfigError, ...) is exactly what the user needs to see,
85
+ * so we surface that first. We fall back to terminated reasons (OOMKilled,
86
+ * Error, ContainerCannotRun) and then to status-level conditions.
87
+ */
88
+ function buildDegradedPod(row: {
89
+ name: string;
90
+ namespaceKey: string;
91
+ phase: string | null;
92
+ status: unknown;
93
+ }): DegradedPod {
94
+ const status: Record<string, unknown> =
95
+ row.status && typeof row.status === "object"
96
+ ? (row.status as Record<string, unknown>)
97
+ : {};
98
+
99
+ let reason: string = "";
100
+ let message: string = "";
101
+
102
+ const containerStatuses: Array<Record<string, unknown>> = Array.isArray(
103
+ status["containerStatuses"],
104
+ )
105
+ ? (status["containerStatuses"] as Array<Record<string, unknown>>)
106
+ : [];
107
+ const initContainerStatuses: Array<Record<string, unknown>> = Array.isArray(
108
+ status["initContainerStatuses"],
109
+ )
110
+ ? (status["initContainerStatuses"] as Array<Record<string, unknown>>)
111
+ : [];
112
+
113
+ const scanForReason: (
114
+ list: Array<Record<string, unknown>>,
115
+ targetState: string,
116
+ ) => { reason: string; message: string } | null = (list, targetState) => {
117
+ for (const cs of list) {
118
+ if (cs["state"] !== targetState) {
119
+ continue;
120
+ }
121
+ const r: unknown = cs["reason"];
122
+ if (typeof r === "string" && r) {
123
+ const m: unknown = cs["message"];
124
+ return {
125
+ reason: r,
126
+ message: typeof m === "string" ? m : "",
127
+ };
128
+ }
129
+ }
130
+ return null;
131
+ };
132
+
133
+ const waitingHit: { reason: string; message: string } | null =
134
+ scanForReason(containerStatuses, "waiting") ||
135
+ scanForReason(initContainerStatuses, "waiting");
136
+ const terminatedHit: { reason: string; message: string } | null = waitingHit
137
+ ? null
138
+ : scanForReason(containerStatuses, "terminated") ||
139
+ scanForReason(initContainerStatuses, "terminated");
140
+ const hit: { reason: string; message: string } | null =
141
+ waitingHit || terminatedHit;
142
+
143
+ if (hit) {
144
+ reason = hit.reason;
145
+ message = hit.message;
146
+ } else {
147
+ // Fall back to the pod-level reason/message fields set by the scheduler
148
+ // (e.g. "Unschedulable" with "0/3 nodes are available: ...").
149
+ const topReason: unknown = status["reason"];
150
+ const topMessage: unknown = status["message"];
151
+ if (typeof topReason === "string") {
152
+ reason = topReason;
153
+ }
154
+ if (typeof topMessage === "string") {
155
+ message = topMessage;
156
+ }
157
+
158
+ // If still nothing, pull from the first non-True condition.
159
+ if (!reason) {
160
+ const conditions: Array<Record<string, unknown>> = Array.isArray(
161
+ status["conditions"],
162
+ )
163
+ ? (status["conditions"] as Array<Record<string, unknown>>)
164
+ : [];
165
+ for (const cond of conditions) {
166
+ if (cond["status"] !== "True") {
167
+ const r: unknown = cond["reason"];
168
+ const m: unknown = cond["message"];
169
+ if (typeof r === "string" && r) {
170
+ reason = r;
171
+ message = typeof m === "string" ? m : "";
172
+ break;
173
+ }
174
+ }
175
+ }
176
+ }
177
+ }
178
+
179
+ return {
180
+ name: row.name,
181
+ namespace: row.namespaceKey || "",
182
+ phase: row.phase || "Unknown",
183
+ reason,
184
+ message,
185
+ };
186
+ }
187
+
188
+ /*
189
+ * For a Node: if isReady is false, the "Ready" condition carries the real
190
+ * story (e.g. "KubeletNotReady: PLEG is not healthy"). If only pressure
191
+ * flags are tripped, pick the tripped condition's reason/message.
192
+ */
193
+ function buildDegradedNode(row: {
194
+ name: string;
195
+ isReady: boolean | null;
196
+ hasMemoryPressure: boolean | null;
197
+ hasDiskPressure: boolean | null;
198
+ hasPidPressure: boolean | null;
199
+ status: unknown;
200
+ }): DegradedNode {
201
+ const status: Record<string, unknown> =
202
+ row.status && typeof row.status === "object"
203
+ ? (row.status as Record<string, unknown>)
204
+ : {};
205
+
206
+ const conditions: Array<Record<string, unknown>> = Array.isArray(
207
+ status["conditions"],
208
+ )
209
+ ? (status["conditions"] as Array<Record<string, unknown>>)
210
+ : [];
211
+
212
+ const findCondition: (
213
+ predicate: (c: Record<string, unknown>) => boolean,
214
+ ) => Record<string, unknown> | null = (predicate) => {
215
+ for (const c of conditions) {
216
+ if (predicate(c)) {
217
+ return c;
218
+ }
219
+ }
220
+ return null;
221
+ };
222
+
223
+ let picked: Record<string, unknown> | null = null;
224
+ if (row.isReady === false) {
225
+ picked = findCondition((c: Record<string, unknown>) => {
226
+ return c["type"] === "Ready" && c["status"] !== "True";
227
+ });
228
+ }
229
+ if (!picked && row.hasMemoryPressure === true) {
230
+ picked = findCondition((c: Record<string, unknown>) => {
231
+ return c["type"] === "MemoryPressure" && c["status"] === "True";
232
+ });
233
+ }
234
+ if (!picked && row.hasDiskPressure === true) {
235
+ picked = findCondition((c: Record<string, unknown>) => {
236
+ return c["type"] === "DiskPressure" && c["status"] === "True";
237
+ });
238
+ }
239
+ if (!picked && row.hasPidPressure === true) {
240
+ picked = findCondition((c: Record<string, unknown>) => {
241
+ return c["type"] === "PIDPressure" && c["status"] === "True";
242
+ });
243
+ }
244
+
245
+ const reason: string =
246
+ picked && typeof picked["reason"] === "string"
247
+ ? (picked["reason"] as string)
248
+ : "";
249
+ const message: string =
250
+ picked && typeof picked["message"] === "string"
251
+ ? (picked["message"] as string)
252
+ : "";
253
+
254
+ return {
255
+ name: row.name,
256
+ isReady: row.isReady === true,
257
+ hasMemoryPressure: row.hasMemoryPressure === true,
258
+ hasDiskPressure: row.hasDiskPressure === true,
259
+ hasPidPressure: row.hasPidPressure === true,
260
+ reason,
261
+ message,
262
+ };
50
263
  }
51
264
 
52
265
  const UPSERT_BATCH_SIZE: number = 500;
@@ -72,6 +285,7 @@ const UPSERT_COLUMNS: Array<keyof ParsedKubernetesResource | string> = [
72
285
  "annotations",
73
286
  "ownerReferences",
74
287
  "spec",
288
+ "containerCount",
75
289
  "status",
76
290
  "lastSeenAt",
77
291
  "resourceCreationTimestamp",
@@ -133,6 +347,7 @@ export class Service extends DatabaseService<Model> {
133
347
  r.annotations ? JSON.stringify(r.annotations) : null,
134
348
  r.ownerReferences ? JSON.stringify(r.ownerReferences) : null,
135
349
  r.spec ? JSON.stringify(r.spec) : null,
350
+ r.containerCount,
136
351
  r.status ? JSON.stringify(r.status) : null,
137
352
  r.lastSeenAt,
138
353
  r.resourceCreationTimestamp,
@@ -145,7 +360,7 @@ export class Service extends DatabaseService<Model> {
145
360
  "projectId", "kubernetesClusterId", "kind", "namespaceKey", "name",
146
361
  "uid", "phase", "isReady",
147
362
  "hasMemoryPressure", "hasDiskPressure", "hasPidPressure",
148
- "labels", "annotations", "ownerReferences", "spec", "status",
363
+ "labels", "annotations", "ownerReferences", "spec", "containerCount", "status",
149
364
  "lastSeenAt", "resourceCreationTimestamp", "version"
150
365
  )
151
366
  VALUES ${valueFragments.join(", ")}
@@ -161,6 +376,7 @@ export class Service extends DatabaseService<Model> {
161
376
  "annotations" = EXCLUDED."annotations",
162
377
  "ownerReferences" = EXCLUDED."ownerReferences",
163
378
  "spec" = EXCLUDED."spec",
379
+ "containerCount" = EXCLUDED."containerCount",
164
380
  "status" = EXCLUDED."status",
165
381
  "lastSeenAt" = EXCLUDED."lastSeenAt",
166
382
  "resourceCreationTimestamp" = EXCLUDED."resourceCreationTimestamp",
@@ -218,7 +434,14 @@ export class Service extends DatabaseService<Model> {
218
434
  const manager: ReturnType<Service["getRepository"]>["manager"] =
219
435
  this.getRepository().manager;
220
436
 
221
- const [kindRows, podRows, nodeRows, containerRows]: [
437
+ const [
438
+ kindRows,
439
+ podRows,
440
+ nodeRows,
441
+ containerRows,
442
+ degradedPodRows,
443
+ degradedNodeRows,
444
+ ]: [
222
445
  Array<{ kind: string; count: string }>,
223
446
  Array<{ phase: string | null; count: string }>,
224
447
  Array<{
@@ -229,6 +452,20 @@ export class Service extends DatabaseService<Model> {
229
452
  pidPressure: string;
230
453
  }>,
231
454
  Array<{ total: string }>,
455
+ Array<{
456
+ name: string;
457
+ namespaceKey: string;
458
+ phase: string | null;
459
+ status: unknown;
460
+ }>,
461
+ Array<{
462
+ name: string;
463
+ isReady: boolean | null;
464
+ hasMemoryPressure: boolean | null;
465
+ hasDiskPressure: boolean | null;
466
+ hasPidPressure: boolean | null;
467
+ status: unknown;
468
+ }>,
232
469
  ] = await Promise.all([
233
470
  manager.query(
234
471
  `SELECT "kind", COUNT(*)::text AS count
@@ -256,15 +493,61 @@ export class Service extends DatabaseService<Model> {
256
493
  [data.projectId.toString(), data.kubernetesClusterId.toString()],
257
494
  ),
258
495
  manager.query(
259
- `SELECT COALESCE(SUM(
260
- CASE WHEN jsonb_typeof("spec"->'containers') = 'array'
261
- THEN jsonb_array_length("spec"->'containers')
262
- ELSE 0 END
263
- ), 0)::text AS total
496
+ /*
497
+ * containerCount is cached on the row during ingest
498
+ * (KubernetesInventoryExtractor sets it from
499
+ * spec.containers.length), so this is a plain int sum instead
500
+ * of a JSONB scan. Rows written before that ingest change may
501
+ * have NULL; SUM treats those as 0, which matches the old
502
+ * behavior.
503
+ */
504
+ `SELECT COALESCE(SUM("containerCount"), 0)::text AS total
264
505
  FROM "KubernetesResource"
265
506
  WHERE "projectId" = $1 AND "kubernetesClusterId" = $2 AND "kind" = 'Pod' AND "deletedAt" IS NULL`,
266
507
  [data.projectId.toString(), data.kubernetesClusterId.toString()],
267
508
  ),
509
+ /*
510
+ * Top-N offenders powering the "Why is this cluster degraded?" card.
511
+ * Failed first (hardest outage), then Pending, then Unknown, so the
512
+ * user sees the worst stuff first without having to sort client-side.
513
+ */
514
+ manager.query(
515
+ `SELECT "name", "namespaceKey", "phase", "status"
516
+ FROM "KubernetesResource"
517
+ WHERE "projectId" = $1
518
+ AND "kubernetesClusterId" = $2
519
+ AND "kind" = 'Pod'
520
+ AND "deletedAt" IS NULL
521
+ AND ("phase" IS NULL OR "phase" NOT IN ('Running', 'Succeeded'))
522
+ ORDER BY
523
+ CASE "phase"
524
+ WHEN 'Failed' THEN 0
525
+ WHEN 'Pending' THEN 1
526
+ ELSE 2
527
+ END,
528
+ "lastSeenAt" DESC
529
+ LIMIT ${DEGRADED_SAMPLE_LIMIT}`,
530
+ [data.projectId.toString(), data.kubernetesClusterId.toString()],
531
+ ),
532
+ manager.query(
533
+ `SELECT "name", "isReady", "hasMemoryPressure", "hasDiskPressure", "hasPidPressure", "status"
534
+ FROM "KubernetesResource"
535
+ WHERE "projectId" = $1
536
+ AND "kubernetesClusterId" = $2
537
+ AND "kind" = 'Node'
538
+ AND "deletedAt" IS NULL
539
+ AND (
540
+ "isReady" IS FALSE
541
+ OR "hasMemoryPressure" IS TRUE
542
+ OR "hasDiskPressure" IS TRUE
543
+ OR "hasPidPressure" IS TRUE
544
+ )
545
+ ORDER BY
546
+ CASE WHEN "isReady" IS FALSE THEN 0 ELSE 1 END,
547
+ "lastSeenAt" DESC
548
+ LIMIT ${DEGRADED_SAMPLE_LIMIT}`,
549
+ [data.projectId.toString(), data.kubernetesClusterId.toString()],
550
+ ),
268
551
  ]);
269
552
 
270
553
  const countsByKind: Record<string, number> = {};
@@ -308,6 +591,13 @@ export class Service extends DatabaseService<Model> {
308
591
  const containerCount: number =
309
592
  parseInt(containerRows[0]?.total || "0", 10) || 0;
310
593
 
594
+ const degradedPods: Array<DegradedPod> = degradedPodRows.map((row) => {
595
+ return buildDegradedPod(row);
596
+ });
597
+ const degradedNodes: Array<DegradedNode> = degradedNodeRows.map((row) => {
598
+ return buildDegradedNode(row);
599
+ });
600
+
311
601
  return {
312
602
  countsByKind,
313
603
  containerCount,
@@ -321,6 +611,8 @@ export class Service extends DatabaseService<Model> {
321
611
  diskPressure: parseInt(nodeRow?.diskPressure || "0", 10) || 0,
322
612
  pidPressure: parseInt(nodeRow?.pidPressure || "0", 10) || 0,
323
613
  },
614
+ degradedPods,
615
+ degradedNodes,
324
616
  };
325
617
  }
326
618
 
@@ -224,7 +224,7 @@ function createSandboxProxy(
224
224
  * Recursively unwraps sandbox proxies in a return value so the host code
225
225
  * receives original objects (e.g. Buffers that pass `instanceof` checks).
226
226
  */
227
- function deepUnwrapProxies(
227
+ export function deepUnwrapProxies(
228
228
  value: unknown,
229
229
  visited?: WeakSet<GenericObject>,
230
230
  ): unknown {
@@ -469,33 +469,50 @@ export default class VMRunner {
469
469
  })()`;
470
470
 
471
471
  try {
472
- /*
473
- * vm timeout only covers synchronous CPU time, so wrap with
474
- * Promise.race to also cover async operations (network, timers, etc.)
475
- */
476
- const vmPromise: Promise<unknown> = vm.runInContext(script, sandbox, {
477
- timeout: timeout,
478
- });
479
-
480
- const overallTimeout: Promise<never> = new Promise(
481
- (_resolve: (value: never) => void, reject: (reason: Error) => void) => {
482
- const handle: NodeJS.Timeout = global.setTimeout(() => {
483
- reject(new Error("Script execution timed out"));
484
- }, timeout + 5000);
485
- // Don't let this timer keep the process alive
486
- handle.unref();
487
- },
488
- );
472
+ let returnVal: unknown;
473
+ let scriptError: Error | undefined;
474
+
475
+ try {
476
+ /*
477
+ * vm timeout only covers synchronous CPU time, so wrap with
478
+ * Promise.race to also cover async operations (network, timers, etc.)
479
+ */
480
+ const vmPromise: Promise<unknown> = vm.runInContext(script, sandbox, {
481
+ timeout: timeout,
482
+ });
489
483
 
490
- const returnVal: unknown = await Promise.race([
491
- vmPromise,
492
- overallTimeout,
493
- ]);
484
+ const overallTimeout: Promise<never> = new Promise(
485
+ (
486
+ _resolve: (value: never) => void,
487
+ reject: (reason: Error) => void,
488
+ ) => {
489
+ const handle: NodeJS.Timeout = global.setTimeout(() => {
490
+ reject(new Error("Script execution timed out"));
491
+ }, timeout + 5000);
492
+ // Don't let this timer keep the process alive
493
+ handle.unref();
494
+ },
495
+ );
496
+
497
+ returnVal = await Promise.race([vmPromise, overallTimeout]);
498
+ } catch (err: unknown) {
499
+ /*
500
+ * Capture user-thrown errors (including timeouts) so the caller can
501
+ * still access side-channel data collected before the throw — e.g.
502
+ * screenshots assigned to a host-realm object passed via `context`.
503
+ * Rethrowing here would discard those partial results.
504
+ */
505
+ scriptError =
506
+ err instanceof Error
507
+ ? err
508
+ : new Error(typeof err === "string" ? err : String(err));
509
+ }
494
510
 
495
511
  return {
496
512
  returnValue: deepUnwrapProxies(returnVal),
497
513
  logMessages,
498
514
  capturedMetrics,
515
+ scriptError,
499
516
  };
500
517
  } finally {
501
518
  // Clean up any lingering timers to prevent resource leaks
@@ -4,4 +4,10 @@ export default interface ReturnResult {
4
4
  returnValue: any;
5
5
  logMessages: string[];
6
6
  capturedMetrics: CapturedMetric[];
7
+ /**
8
+ * Populated when user-supplied code threw (or timed out). The runner still
9
+ * returns collected side-channel data (logs, metrics, and any host-realm
10
+ * context objects the caller passed in) so partial state survives the throw.
11
+ */
12
+ scriptError?: Error | undefined;
7
13
  }
@@ -79,6 +79,12 @@ export interface ParsedKubernetesResource {
79
79
  annotations: JSONObject | null;
80
80
  ownerReferences: JSONObject | null;
81
81
  spec: JSONObject | null;
82
+ /*
83
+ * For Pod kinds: length of spec.containers at parse time. Lets the
84
+ * overview summary SUM() a plain int column instead of scanning
85
+ * every pod's JSONB spec on every page load.
86
+ */
87
+ containerCount: number | null;
82
88
  status: JSONObject | null;
83
89
  lastSeenAt: Date;
84
90
  resourceCreationTimestamp: Date | null;
@@ -255,13 +261,20 @@ export function extractInventoryResource(data: {
255
261
  return null;
256
262
  }
257
263
 
258
- // Pod-specific hot column
264
+ // Pod-specific hot columns
259
265
  let phase: string | null = null;
266
+ let containerCount: number | null = null;
260
267
  if (kind === "Pod") {
261
268
  const podStatus: KubernetesPodObject["status"] = (
262
269
  parsed as KubernetesPodObject
263
270
  ).status;
264
271
  phase = podStatus?.phase || null;
272
+ const podSpec: KubernetesPodObject["spec"] | undefined = (
273
+ parsed as KubernetesPodObject
274
+ ).spec;
275
+ containerCount = Array.isArray(podSpec?.containers)
276
+ ? podSpec.containers.length
277
+ : 0;
265
278
  }
266
279
 
267
280
  // Node-specific hot columns
@@ -318,6 +331,7 @@ export function extractInventoryResource(data: {
318
331
  } as unknown as JSONObject)
319
332
  : null,
320
333
  spec: (anyParsed.spec as JSONObject | undefined) || null,
334
+ containerCount,
321
335
  status: (anyParsed.status as JSONObject | undefined) || null,
322
336
  lastSeenAt: data.lastSeenAt,
323
337
  resourceCreationTimestamp: parseCreationTimestamp(