@oneuptime/common 10.0.68 → 10.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/Models/DatabaseModels/KubernetesCluster.ts +5 -0
  2. package/Models/DatabaseModels/KubernetesResource.ts +19 -0
  3. package/Server/API/KubernetesResourceAPI.ts +2 -0
  4. package/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.ts +17 -0
  5. package/Server/Infrastructure/Postgres/SchemaMigrations/1776881254913-DedupeKubernetesClustersAndAddUniqueIndex.ts +134 -0
  6. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +4 -0
  7. package/Server/Services/DatabaseService.ts +19 -4
  8. package/Server/Services/KubernetesResourceService.ts +323 -8
  9. package/Server/Types/Database/QueryHelper.ts +127 -0
  10. package/Server/Types/Database/QueryUtil.ts +244 -0
  11. package/Server/Utils/VM/VMRunner.ts +39 -22
  12. package/Types/BaseDatabase/EndsWith.ts +41 -0
  13. package/Types/BaseDatabase/IncludesAll.ts +45 -0
  14. package/Types/BaseDatabase/IncludesNone.ts +48 -0
  15. package/Types/BaseDatabase/NotContains.ts +41 -0
  16. package/Types/BaseDatabase/StartsWith.ts +41 -0
  17. package/Types/IsolatedVM/ReturnResult.ts +6 -0
  18. package/Types/JSON.ts +20 -0
  19. package/Types/Kubernetes/KubernetesInventoryExtractor.ts +15 -1
  20. package/Types/SerializableObjectDictionary.ts +10 -0
  21. package/UI/Components/Filters/BooleanFilter.tsx +1 -0
  22. package/UI/Components/Filters/DateFilter.tsx +212 -25
  23. package/UI/Components/Filters/DropdownFilter.tsx +1 -0
  24. package/UI/Components/Filters/EntityFilter.tsx +214 -41
  25. package/UI/Components/Filters/FilterViewer.tsx +228 -146
  26. package/UI/Components/Filters/FilterViewerItem.tsx +1 -11
  27. package/UI/Components/Filters/FiltersForm.tsx +148 -97
  28. package/UI/Components/Filters/NumberFilter.tsx +219 -34
  29. package/UI/Components/Filters/OperatorSelector.tsx +91 -0
  30. package/UI/Components/Filters/TextFilter.tsx +182 -71
  31. package/UI/Components/Filters/Types/FilterOperator.ts +73 -0
  32. package/UI/Components/ModelTable/BaseModelTable.tsx +8 -0
  33. package/build/dist/Models/DatabaseModels/KubernetesCluster.js +7 -1
  34. package/build/dist/Models/DatabaseModels/KubernetesCluster.js.map +1 -1
  35. package/build/dist/Models/DatabaseModels/KubernetesResource.js +20 -0
  36. package/build/dist/Models/DatabaseModels/KubernetesResource.js.map +1 -1
  37. package/build/dist/Server/API/KubernetesResourceAPI.js +2 -0
  38. package/build/dist/Server/API/KubernetesResourceAPI.js.map +1 -1
  39. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.js +12 -0
  40. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776865086264-MigrationName.js.map +1 -0
  41. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776881254913-DedupeKubernetesClustersAndAddUniqueIndex.js +123 -0
  42. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1776881254913-DedupeKubernetesClustersAndAddUniqueIndex.js.map +1 -0
  43. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +4 -0
  44. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  45. package/build/dist/Server/Services/DatabaseService.js +18 -4
  46. package/build/dist/Server/Services/DatabaseService.js.map +1 -1
  47. package/build/dist/Server/Services/KubernetesResourceService.js +204 -8
  48. package/build/dist/Server/Services/KubernetesResourceService.js.map +1 -1
  49. package/build/dist/Server/Types/Database/QueryHelper.js +110 -0
  50. package/build/dist/Server/Types/Database/QueryHelper.js.map +1 -1
  51. package/build/dist/Server/Types/Database/QueryUtil.js +180 -0
  52. package/build/dist/Server/Types/Database/QueryUtil.js.map +1 -1
  53. package/build/dist/Server/Utils/VM/VMRunner.js +33 -19
  54. package/build/dist/Server/Utils/VM/VMRunner.js.map +1 -1
  55. package/build/dist/Types/BaseDatabase/EndsWith.js +31 -0
  56. package/build/dist/Types/BaseDatabase/EndsWith.js.map +1 -0
  57. package/build/dist/Types/BaseDatabase/IncludesAll.js +34 -0
  58. package/build/dist/Types/BaseDatabase/IncludesAll.js.map +1 -0
  59. package/build/dist/Types/BaseDatabase/IncludesNone.js +34 -0
  60. package/build/dist/Types/BaseDatabase/IncludesNone.js.map +1 -0
  61. package/build/dist/Types/BaseDatabase/NotContains.js +31 -0
  62. package/build/dist/Types/BaseDatabase/NotContains.js.map +1 -0
  63. package/build/dist/Types/BaseDatabase/StartsWith.js +31 -0
  64. package/build/dist/Types/BaseDatabase/StartsWith.js.map +1 -0
  65. package/build/dist/Types/JSON.js +5 -0
  66. package/build/dist/Types/JSON.js.map +1 -1
  67. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js +7 -1
  68. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js.map +1 -1
  69. package/build/dist/Types/SerializableObjectDictionary.js +10 -0
  70. package/build/dist/Types/SerializableObjectDictionary.js.map +1 -1
  71. package/build/dist/UI/Components/Filters/BooleanFilter.js +1 -1
  72. package/build/dist/UI/Components/Filters/BooleanFilter.js.map +1 -1
  73. package/build/dist/UI/Components/Filters/DateFilter.js +158 -14
  74. package/build/dist/UI/Components/Filters/DateFilter.js.map +1 -1
  75. package/build/dist/UI/Components/Filters/DropdownFilter.js +1 -1
  76. package/build/dist/UI/Components/Filters/DropdownFilter.js.map +1 -1
  77. package/build/dist/UI/Components/Filters/EntityFilter.js +174 -30
  78. package/build/dist/UI/Components/Filters/EntityFilter.js.map +1 -1
  79. package/build/dist/UI/Components/Filters/FilterViewer.js +188 -97
  80. package/build/dist/UI/Components/Filters/FilterViewer.js.map +1 -1
  81. package/build/dist/UI/Components/Filters/FilterViewerItem.js +1 -6
  82. package/build/dist/UI/Components/Filters/FilterViewerItem.js.map +1 -1
  83. package/build/dist/UI/Components/Filters/FiltersForm.js +46 -38
  84. package/build/dist/UI/Components/Filters/FiltersForm.js.map +1 -1
  85. package/build/dist/UI/Components/Filters/NumberFilter.js +165 -23
  86. package/build/dist/UI/Components/Filters/NumberFilter.js.map +1 -1
  87. package/build/dist/UI/Components/Filters/OperatorSelector.js +41 -0
  88. package/build/dist/UI/Components/Filters/OperatorSelector.js.map +1 -0
  89. package/build/dist/UI/Components/Filters/TextFilter.js +130 -53
  90. package/build/dist/UI/Components/Filters/TextFilter.js.map +1 -1
  91. package/build/dist/UI/Components/Filters/Types/FilterOperator.js +63 -0
  92. package/build/dist/UI/Components/Filters/Types/FilterOperator.js.map +1 -0
  93. package/build/dist/UI/Components/ModelTable/BaseModelTable.js +7 -0
  94. package/build/dist/UI/Components/ModelTable/BaseModelTable.js.map +1 -1
  95. package/package.json +1 -1
@@ -73,6 +73,11 @@ import {
73
73
  })
74
74
  @CrudApiEndpoint(new Route("/kubernetes-cluster"))
75
75
  @SlugifyColumn("name", "slug")
76
+ // Enforce one cluster row per (projectId, clusterIdentifier) at the DB level.
77
+ // Without this, two pods emitting OTel telemetry for a new cluster at the
78
+ // same time (e.g. when the agent is first installed or during a rolling
79
+ // update) race in findOrCreateByClusterIdentifier and create duplicate rows.
80
+ @Index(["projectId", "clusterIdentifier"], { unique: true })
76
81
  @TableMetadata({
77
82
  tableName: "KubernetesCluster",
78
83
  singularName: "Kubernetes Cluster",
@@ -405,6 +405,25 @@ export default class KubernetesResource extends BaseModel {
405
405
  })
406
406
  public spec?: JSONObject = undefined;
407
407
 
408
+ @ColumnAccessControl({
409
+ create: [],
410
+ read: READ_PERMISSIONS,
411
+ update: [],
412
+ })
413
+ @TableColumn({
414
+ required: false,
415
+ type: TableColumnType.Number,
416
+ canReadOnRelationQuery: true,
417
+ title: "Container Count",
418
+ description:
419
+ "For Pods: count of entries in spec.containers, cached so the overview page can SUM it without scanning JSONB. Null for non-Pod kinds.",
420
+ })
421
+ @Column({
422
+ nullable: true,
423
+ type: ColumnType.Number,
424
+ })
425
+ public containerCount?: number = undefined;
426
+
408
427
  @ColumnAccessControl({
409
428
  create: [],
410
429
  read: READ_PERMISSIONS,
@@ -122,6 +122,8 @@ export default class KubernetesResourceAPI extends BaseAPI<
122
122
  hpaCount: summary.countsByKind["HorizontalPodAutoscaler"] || 0,
123
123
  vpaCount: summary.countsByKind["VerticalPodAutoscaler"] || 0,
124
124
  containerCount: summary.containerCount,
125
+ degradedPods: summary.degradedPods as unknown as JSONObject,
126
+ degradedNodes: summary.degradedNodes as unknown as JSONObject,
125
127
  };
126
128
 
127
129
  return Response.sendJsonObjectResponse(req, res, responseBody);
@@ -0,0 +1,17 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ export class MigrationName1776865086264 implements MigrationInterface {
4
+ public name: string = "MigrationName1776865086264";
5
+
6
+ public async up(queryRunner: QueryRunner): Promise<void> {
7
+ await queryRunner.query(
8
+ `ALTER TABLE "KubernetesResource" ADD "containerCount" integer`,
9
+ );
10
+ }
11
+
12
+ public async down(queryRunner: QueryRunner): Promise<void> {
13
+ await queryRunner.query(
14
+ `ALTER TABLE "KubernetesResource" DROP COLUMN "containerCount"`,
15
+ );
16
+ }
17
+ }
@@ -0,0 +1,134 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ /*
4
+ * Before this migration, KubernetesCluster had an app-level
5
+ * @UniqueColumnBy("projectId") check and a non-unique index on
6
+ * clusterIdentifier, but no DB-level uniqueness. Under concurrent telemetry
7
+ * from multiple agent pods (happens every time the agent is installed or
8
+ * rolls out), findOrCreateByClusterIdentifier would race between its find
9
+ * and its create, and the DB accepted both inserts — producing duplicate
10
+ * rows with identical (projectId, clusterIdentifier).
11
+ *
12
+ * This migration:
13
+ * 1. Reparents all FKs that reference duplicate clusters — KubernetesResource,
14
+ * KubernetesClusterOwnerUser, KubernetesClusterOwnerTeam — onto the
15
+ * oldest surviving row in each duplicate group.
16
+ * 2. Deletes the duplicate (non-survivor) rows.
17
+ * 3. Creates a DB-level unique index on (projectId, clusterIdentifier) so
18
+ * future races are rejected by the DB — the service's existing
19
+ * catch-and-refetch in findOrCreateByClusterIdentifier then returns the
20
+ * winning row instead of producing a duplicate.
21
+ *
22
+ * The auto-generator also picked up unrelated OnCallDutyPolicyScheduleLayer
23
+ * default-value drift. That's dev-environment drift, not the bug we're fixing;
24
+ * stripped from this migration.
25
+ */
26
+ export class DedupeKubernetesClustersAndAddUniqueIndex1776881254913
27
+ implements MigrationInterface
28
+ {
29
+ public name: string = "DedupeKubernetesClustersAndAddUniqueIndex1776881254913";
30
+
31
+ public async up(queryRunner: QueryRunner): Promise<void> {
32
+ // 1: reparent KubernetesResource FKs from duplicates -> survivor.
33
+ await queryRunner.query(`
34
+ WITH survivors AS (
35
+ SELECT DISTINCT ON ("projectId", "clusterIdentifier")
36
+ _id AS survivor_id,
37
+ "projectId",
38
+ "clusterIdentifier"
39
+ FROM "KubernetesCluster"
40
+ ORDER BY "projectId", "clusterIdentifier", "createdAt" ASC, _id ASC
41
+ ),
42
+ losers AS (
43
+ SELECT kc._id AS loser_id, s.survivor_id
44
+ FROM "KubernetesCluster" kc
45
+ JOIN survivors s
46
+ ON s."projectId" = kc."projectId"
47
+ AND s."clusterIdentifier" = kc."clusterIdentifier"
48
+ WHERE kc._id <> s.survivor_id
49
+ )
50
+ UPDATE "KubernetesResource" kr
51
+ SET "kubernetesClusterId" = l.survivor_id
52
+ FROM losers l
53
+ WHERE kr."kubernetesClusterId" = l.loser_id;
54
+ `);
55
+
56
+ // 2: reparent KubernetesClusterOwnerUser FKs.
57
+ await queryRunner.query(`
58
+ WITH survivors AS (
59
+ SELECT DISTINCT ON ("projectId", "clusterIdentifier")
60
+ _id AS survivor_id,
61
+ "projectId",
62
+ "clusterIdentifier"
63
+ FROM "KubernetesCluster"
64
+ ORDER BY "projectId", "clusterIdentifier", "createdAt" ASC, _id ASC
65
+ ),
66
+ losers AS (
67
+ SELECT kc._id AS loser_id, s.survivor_id
68
+ FROM "KubernetesCluster" kc
69
+ JOIN survivors s
70
+ ON s."projectId" = kc."projectId"
71
+ AND s."clusterIdentifier" = kc."clusterIdentifier"
72
+ WHERE kc._id <> s.survivor_id
73
+ )
74
+ UPDATE "KubernetesClusterOwnerUser" o
75
+ SET "kubernetesClusterId" = l.survivor_id
76
+ FROM losers l
77
+ WHERE o."kubernetesClusterId" = l.loser_id;
78
+ `);
79
+
80
+ // 3: reparent KubernetesClusterOwnerTeam FKs.
81
+ await queryRunner.query(`
82
+ WITH survivors AS (
83
+ SELECT DISTINCT ON ("projectId", "clusterIdentifier")
84
+ _id AS survivor_id,
85
+ "projectId",
86
+ "clusterIdentifier"
87
+ FROM "KubernetesCluster"
88
+ ORDER BY "projectId", "clusterIdentifier", "createdAt" ASC, _id ASC
89
+ ),
90
+ losers AS (
91
+ SELECT kc._id AS loser_id, s.survivor_id
92
+ FROM "KubernetesCluster" kc
93
+ JOIN survivors s
94
+ ON s."projectId" = kc."projectId"
95
+ AND s."clusterIdentifier" = kc."clusterIdentifier"
96
+ WHERE kc._id <> s.survivor_id
97
+ )
98
+ UPDATE "KubernetesClusterOwnerTeam" o
99
+ SET "kubernetesClusterId" = l.survivor_id
100
+ FROM losers l
101
+ WHERE o."kubernetesClusterId" = l.loser_id;
102
+ `);
103
+
104
+ // 4: delete duplicate rows now that nothing references them.
105
+ await queryRunner.query(`
106
+ WITH survivors AS (
107
+ SELECT DISTINCT ON ("projectId", "clusterIdentifier")
108
+ _id AS survivor_id,
109
+ "projectId",
110
+ "clusterIdentifier"
111
+ FROM "KubernetesCluster"
112
+ ORDER BY "projectId", "clusterIdentifier", "createdAt" ASC, _id ASC
113
+ )
114
+ DELETE FROM "KubernetesCluster" kc
115
+ USING survivors s
116
+ WHERE s."projectId" = kc."projectId"
117
+ AND s."clusterIdentifier" = kc."clusterIdentifier"
118
+ AND kc._id <> s.survivor_id;
119
+ `);
120
+
121
+ // 5: add the DB-level composite unique index.
122
+ await queryRunner.query(
123
+ `CREATE UNIQUE INDEX "IDX_9756988b48848f4f7532a2af0d" ON "KubernetesCluster" ("projectId", "clusterIdentifier") `,
124
+ );
125
+ }
126
+
127
+ public async down(queryRunner: QueryRunner): Promise<void> {
128
+ await queryRunner.query(
129
+ `DROP INDEX "public"."IDX_9756988b48848f4f7532a2af0d"`,
130
+ );
131
+ // Duplicate rows dropped in up() are lost — a down-migration cannot
132
+ // resurrect them (and reinstating duplicates is not desirable anyway).
133
+ }
134
+ }
@@ -287,6 +287,8 @@ import { MigrationName1776541018853 } from "./1776541018853-MigrationName";
287
287
  import { MigrationName1776544084793 } from "./1776544084793-MigrationName";
288
288
  import { MigrationName1776761171349 } from "./1776761171349-MigrationName";
289
289
  import { MigrationName1776801030808 } from "./1776801030808-MigrationName";
290
+ import { MigrationName1776865086264 } from "./1776865086264-MigrationName";
291
+ import { DedupeKubernetesClustersAndAddUniqueIndex1776881254913 } from "./1776881254913-DedupeKubernetesClustersAndAddUniqueIndex";
290
292
  export default [
291
293
  InitialMigration,
292
294
  MigrationName1717678334852,
@@ -577,4 +579,6 @@ export default [
577
579
  MigrationName1776544084793,
578
580
  MigrationName1776761171349,
579
581
  MigrationName1776801030808,
582
+ MigrationName1776865086264,
583
+ DedupeKubernetesClustersAndAddUniqueIndex1776881254913,
580
584
  ];
@@ -31,7 +31,6 @@ import UpdateByIDAndFetch from "../Types/Database/UpdateByIDAndFetch";
31
31
  import UpdateOneBy from "../Types/Database/UpdateOneBy";
32
32
  import Encryption from "../Utils/Encryption";
33
33
  import logger, { LogAttributes } from "../Utils/Logger";
34
- import AuditLogService from "./AuditLogService";
35
34
  import BaseService from "./BaseService";
36
35
  import BaseModel from "../../Models/DatabaseModels/DatabaseBaseModel/DatabaseBaseModel";
37
36
  import { WorkflowRoute } from "../../ServiceRoute";
@@ -70,6 +69,7 @@ import { FindWhere } from "../../Types/BaseDatabase/Query";
70
69
  import Realtime from "../Utils/Realtime";
71
70
  import ModelEventType from "../../Types/Realtime/ModelEventType";
72
71
  import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
72
+ import type AuditLogServiceType from "./AuditLogService";
73
73
 
74
74
  class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
75
75
  public modelType!: { new (): TBaseModel };
@@ -777,7 +777,16 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
777
777
  !createBy.props.ignoreHooks &&
778
778
  this.getModel().enableAuditLogOn?.create
779
779
  ) {
780
- await AuditLogService.recordCreate({
780
+ /*
781
+ * Lazy require to avoid circular dependency between DatabaseService and
782
+ * AuditLogService (which depends on ProjectService/UserService, both of
783
+ * which extend DatabaseService). A top-level import leaves
784
+ * DatabaseService undefined at class-extension time for subclasses.
785
+ */
786
+ const auditLogService: typeof AuditLogServiceType =
787
+ // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
788
+ require("./AuditLogService").default;
789
+ await auditLogService.recordCreate({
781
790
  model: this.getModel(),
782
791
  createdItem: createBy.data,
783
792
  props: createBy.props,
@@ -1224,9 +1233,12 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
1224
1233
  }
1225
1234
 
1226
1235
  if (this.getModel().enableAuditLogOn?.delete && items.length > 0) {
1236
+ const auditLogService: typeof AuditLogServiceType =
1237
+ // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
1238
+ require("./AuditLogService").default;
1227
1239
  for (const item of items) {
1228
1240
  if (item.id) {
1229
- await AuditLogService.recordDelete({
1241
+ await auditLogService.recordDelete({
1230
1242
  model: this.getModel(),
1231
1243
  deletedItem: item,
1232
1244
  itemId: item.id,
@@ -1645,7 +1657,10 @@ class DatabaseService<TBaseModel extends BaseModel> extends BaseService {
1645
1657
  !this.hasSameValues({ item, updatedItem }) &&
1646
1658
  item.id
1647
1659
  ) {
1648
- await AuditLogService.recordUpdate({
1660
+ const auditLogService: typeof AuditLogServiceType =
1661
+ // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
1662
+ require("./AuditLogService").default;
1663
+ await auditLogService.recordUpdate({
1649
1664
  model: this.getModel(),
1650
1665
  before: item,
1651
1666
  updatedFields: data as JSONObject,
@@ -22,10 +22,28 @@ import logger from "../Utils/Logger";
22
22
 
23
23
  export type { ParsedKubernetesResource };
24
24
 
25
+ export interface DegradedPod {
26
+ name: string;
27
+ namespace: string;
28
+ phase: string;
29
+ reason: string;
30
+ message: string;
31
+ }
32
+
33
+ export interface DegradedNode {
34
+ name: string;
35
+ isReady: boolean;
36
+ hasMemoryPressure: boolean;
37
+ hasDiskPressure: boolean;
38
+ hasPidPressure: boolean;
39
+ reason: string;
40
+ message: string;
41
+ }
42
+
25
43
  export interface InventorySummary {
26
44
  countsByKind: Record<string, number>;
27
45
  /*
28
- * Sum of `jsonb_array_length(spec->'containers')` across all pods in
46
+ * Sum of the denormalized containerCount column across all pods in
29
47
  * the cluster. Containers aren't a top-level kind in the inventory,
30
48
  * so we derive the total server-side so the sidebar badge and the
31
49
  * Containers page agree.
@@ -47,6 +65,208 @@ export interface InventorySummary {
47
65
  diskPressure: number;
48
66
  pidPressure: number;
49
67
  };
68
+ /*
69
+ * Top offenders that explain a Degraded/Unhealthy cluster state. Capped
70
+ * so a pathological cluster can't blow up the overview payload; the
71
+ * dedicated Pods/Nodes pages are the source of truth for the full list.
72
+ */
73
+ degradedPods: Array<DegradedPod>;
74
+ degradedNodes: Array<DegradedNode>;
75
+ }
76
+
77
+ const DEGRADED_SAMPLE_LIMIT: number = 20;
78
+
79
+ /*
80
+ * Pull the first meaningful reason/message off a pod's status block.
81
+ * KubernetesInventoryExtractor stores containerStatuses as an array of
82
+ * { name, ready, state: "running"|"waiting"|"terminated", reason, message, ... }.
83
+ * A waiting container with a reason (ImagePullBackOff, CrashLoopBackOff,
84
+ * CreateContainerConfigError, ...) is exactly what the user needs to see,
85
+ * so we surface that first. We fall back to terminated reasons (OOMKilled,
86
+ * Error, ContainerCannotRun) and then to status-level conditions.
87
+ */
88
+ function buildDegradedPod(row: {
89
+ name: string;
90
+ namespaceKey: string;
91
+ phase: string | null;
92
+ status: unknown;
93
+ }): DegradedPod {
94
+ const status: Record<string, unknown> =
95
+ row.status && typeof row.status === "object"
96
+ ? (row.status as Record<string, unknown>)
97
+ : {};
98
+
99
+ let reason: string = "";
100
+ let message: string = "";
101
+
102
+ const containerStatuses: Array<Record<string, unknown>> = Array.isArray(
103
+ status["containerStatuses"],
104
+ )
105
+ ? (status["containerStatuses"] as Array<Record<string, unknown>>)
106
+ : [];
107
+ const initContainerStatuses: Array<Record<string, unknown>> = Array.isArray(
108
+ status["initContainerStatuses"],
109
+ )
110
+ ? (status["initContainerStatuses"] as Array<Record<string, unknown>>)
111
+ : [];
112
+
113
+ const scanForReason: (
114
+ list: Array<Record<string, unknown>>,
115
+ targetState: string,
116
+ ) => { reason: string; message: string } | null = (
117
+ list: Array<Record<string, unknown>>,
118
+ targetState: string,
119
+ ) => {
120
+ for (const cs of list) {
121
+ if (cs["state"] !== targetState) {
122
+ continue;
123
+ }
124
+ const r: unknown = cs["reason"];
125
+ if (typeof r === "string" && r) {
126
+ const m: unknown = cs["message"];
127
+ return {
128
+ reason: r,
129
+ message: typeof m === "string" ? m : "",
130
+ };
131
+ }
132
+ }
133
+ return null;
134
+ };
135
+
136
+ const waitingHit: { reason: string; message: string } | null =
137
+ scanForReason(containerStatuses, "waiting") ||
138
+ scanForReason(initContainerStatuses, "waiting");
139
+ const terminatedHit: { reason: string; message: string } | null = waitingHit
140
+ ? null
141
+ : scanForReason(containerStatuses, "terminated") ||
142
+ scanForReason(initContainerStatuses, "terminated");
143
+ const hit: { reason: string; message: string } | null =
144
+ waitingHit || terminatedHit;
145
+
146
+ if (hit) {
147
+ reason = hit.reason;
148
+ message = hit.message;
149
+ } else {
150
+ /*
151
+ * Fall back to the pod-level reason/message fields set by the scheduler
152
+ * (e.g. "Unschedulable" with "0/3 nodes are available: ...").
153
+ */
154
+ const topReason: unknown = status["reason"];
155
+ const topMessage: unknown = status["message"];
156
+ if (typeof topReason === "string") {
157
+ reason = topReason;
158
+ }
159
+ if (typeof topMessage === "string") {
160
+ message = topMessage;
161
+ }
162
+
163
+ // If still nothing, pull from the first non-True condition.
164
+ if (!reason) {
165
+ const conditions: Array<Record<string, unknown>> = Array.isArray(
166
+ status["conditions"],
167
+ )
168
+ ? (status["conditions"] as Array<Record<string, unknown>>)
169
+ : [];
170
+ for (const cond of conditions) {
171
+ if (cond["status"] !== "True") {
172
+ const r: unknown = cond["reason"];
173
+ const m: unknown = cond["message"];
174
+ if (typeof r === "string" && r) {
175
+ reason = r;
176
+ message = typeof m === "string" ? m : "";
177
+ break;
178
+ }
179
+ }
180
+ }
181
+ }
182
+ }
183
+
184
+ return {
185
+ name: row.name,
186
+ namespace: row.namespaceKey || "",
187
+ phase: row.phase || "Unknown",
188
+ reason,
189
+ message,
190
+ };
191
+ }
192
+
193
+ /*
194
+ * For a Node: if isReady is false, the "Ready" condition carries the real
195
+ * story (e.g. "KubeletNotReady: PLEG is not healthy"). If only pressure
196
+ * flags are tripped, pick the tripped condition's reason/message.
197
+ */
198
+ function buildDegradedNode(row: {
199
+ name: string;
200
+ isReady: boolean | null;
201
+ hasMemoryPressure: boolean | null;
202
+ hasDiskPressure: boolean | null;
203
+ hasPidPressure: boolean | null;
204
+ status: unknown;
205
+ }): DegradedNode {
206
+ const status: Record<string, unknown> =
207
+ row.status && typeof row.status === "object"
208
+ ? (row.status as Record<string, unknown>)
209
+ : {};
210
+
211
+ const conditions: Array<Record<string, unknown>> = Array.isArray(
212
+ status["conditions"],
213
+ )
214
+ ? (status["conditions"] as Array<Record<string, unknown>>)
215
+ : [];
216
+
217
+ const findCondition: (
218
+ predicate: (c: Record<string, unknown>) => boolean,
219
+ ) => Record<string, unknown> | null = (
220
+ predicate: (c: Record<string, unknown>) => boolean,
221
+ ) => {
222
+ for (const c of conditions) {
223
+ if (predicate(c)) {
224
+ return c;
225
+ }
226
+ }
227
+ return null;
228
+ };
229
+
230
+ let picked: Record<string, unknown> | null = null;
231
+ if (row.isReady === false) {
232
+ picked = findCondition((c: Record<string, unknown>) => {
233
+ return c["type"] === "Ready" && c["status"] !== "True";
234
+ });
235
+ }
236
+ if (!picked && row.hasMemoryPressure === true) {
237
+ picked = findCondition((c: Record<string, unknown>) => {
238
+ return c["type"] === "MemoryPressure" && c["status"] === "True";
239
+ });
240
+ }
241
+ if (!picked && row.hasDiskPressure === true) {
242
+ picked = findCondition((c: Record<string, unknown>) => {
243
+ return c["type"] === "DiskPressure" && c["status"] === "True";
244
+ });
245
+ }
246
+ if (!picked && row.hasPidPressure === true) {
247
+ picked = findCondition((c: Record<string, unknown>) => {
248
+ return c["type"] === "PIDPressure" && c["status"] === "True";
249
+ });
250
+ }
251
+
252
+ const reason: string =
253
+ picked && typeof picked["reason"] === "string"
254
+ ? (picked["reason"] as string)
255
+ : "";
256
+ const message: string =
257
+ picked && typeof picked["message"] === "string"
258
+ ? (picked["message"] as string)
259
+ : "";
260
+
261
+ return {
262
+ name: row.name,
263
+ isReady: row.isReady === true,
264
+ hasMemoryPressure: row.hasMemoryPressure === true,
265
+ hasDiskPressure: row.hasDiskPressure === true,
266
+ hasPidPressure: row.hasPidPressure === true,
267
+ reason,
268
+ message,
269
+ };
50
270
  }
51
271
 
52
272
  const UPSERT_BATCH_SIZE: number = 500;
@@ -72,6 +292,7 @@ const UPSERT_COLUMNS: Array<keyof ParsedKubernetesResource | string> = [
72
292
  "annotations",
73
293
  "ownerReferences",
74
294
  "spec",
295
+ "containerCount",
75
296
  "status",
76
297
  "lastSeenAt",
77
298
  "resourceCreationTimestamp",
@@ -133,6 +354,7 @@ export class Service extends DatabaseService<Model> {
133
354
  r.annotations ? JSON.stringify(r.annotations) : null,
134
355
  r.ownerReferences ? JSON.stringify(r.ownerReferences) : null,
135
356
  r.spec ? JSON.stringify(r.spec) : null,
357
+ r.containerCount,
136
358
  r.status ? JSON.stringify(r.status) : null,
137
359
  r.lastSeenAt,
138
360
  r.resourceCreationTimestamp,
@@ -145,7 +367,7 @@ export class Service extends DatabaseService<Model> {
145
367
  "projectId", "kubernetesClusterId", "kind", "namespaceKey", "name",
146
368
  "uid", "phase", "isReady",
147
369
  "hasMemoryPressure", "hasDiskPressure", "hasPidPressure",
148
- "labels", "annotations", "ownerReferences", "spec", "status",
370
+ "labels", "annotations", "ownerReferences", "spec", "containerCount", "status",
149
371
  "lastSeenAt", "resourceCreationTimestamp", "version"
150
372
  )
151
373
  VALUES ${valueFragments.join(", ")}
@@ -161,6 +383,7 @@ export class Service extends DatabaseService<Model> {
161
383
  "annotations" = EXCLUDED."annotations",
162
384
  "ownerReferences" = EXCLUDED."ownerReferences",
163
385
  "spec" = EXCLUDED."spec",
386
+ "containerCount" = EXCLUDED."containerCount",
164
387
  "status" = EXCLUDED."status",
165
388
  "lastSeenAt" = EXCLUDED."lastSeenAt",
166
389
  "resourceCreationTimestamp" = EXCLUDED."resourceCreationTimestamp",
@@ -218,7 +441,14 @@ export class Service extends DatabaseService<Model> {
218
441
  const manager: ReturnType<Service["getRepository"]>["manager"] =
219
442
  this.getRepository().manager;
220
443
 
221
- const [kindRows, podRows, nodeRows, containerRows]: [
444
+ const [
445
+ kindRows,
446
+ podRows,
447
+ nodeRows,
448
+ containerRows,
449
+ degradedPodRows,
450
+ degradedNodeRows,
451
+ ]: [
222
452
  Array<{ kind: string; count: string }>,
223
453
  Array<{ phase: string | null; count: string }>,
224
454
  Array<{
@@ -229,6 +459,20 @@ export class Service extends DatabaseService<Model> {
229
459
  pidPressure: string;
230
460
  }>,
231
461
  Array<{ total: string }>,
462
+ Array<{
463
+ name: string;
464
+ namespaceKey: string;
465
+ phase: string | null;
466
+ status: unknown;
467
+ }>,
468
+ Array<{
469
+ name: string;
470
+ isReady: boolean | null;
471
+ hasMemoryPressure: boolean | null;
472
+ hasDiskPressure: boolean | null;
473
+ hasPidPressure: boolean | null;
474
+ status: unknown;
475
+ }>,
232
476
  ] = await Promise.all([
233
477
  manager.query(
234
478
  `SELECT "kind", COUNT(*)::text AS count
@@ -256,15 +500,61 @@ export class Service extends DatabaseService<Model> {
256
500
  [data.projectId.toString(), data.kubernetesClusterId.toString()],
257
501
  ),
258
502
  manager.query(
259
- `SELECT COALESCE(SUM(
260
- CASE WHEN jsonb_typeof("spec"->'containers') = 'array'
261
- THEN jsonb_array_length("spec"->'containers')
262
- ELSE 0 END
263
- ), 0)::text AS total
503
+ /*
504
+ * containerCount is cached on the row during ingest
505
+ * (KubernetesInventoryExtractor sets it from
506
+ * spec.containers.length), so this is a plain int sum instead
507
+ * of a JSONB scan. Rows written before that ingest change may
508
+ * have NULL; SUM treats those as 0, which matches the old
509
+ * behavior.
510
+ */
511
+ `SELECT COALESCE(SUM("containerCount"), 0)::text AS total
264
512
  FROM "KubernetesResource"
265
513
  WHERE "projectId" = $1 AND "kubernetesClusterId" = $2 AND "kind" = 'Pod' AND "deletedAt" IS NULL`,
266
514
  [data.projectId.toString(), data.kubernetesClusterId.toString()],
267
515
  ),
516
+ /*
517
+ * Top-N offenders powering the "Why is this cluster degraded?" card.
518
+ * Failed first (hardest outage), then Pending, then Unknown, so the
519
+ * user sees the worst stuff first without having to sort client-side.
520
+ */
521
+ manager.query(
522
+ `SELECT "name", "namespaceKey", "phase", "status"
523
+ FROM "KubernetesResource"
524
+ WHERE "projectId" = $1
525
+ AND "kubernetesClusterId" = $2
526
+ AND "kind" = 'Pod'
527
+ AND "deletedAt" IS NULL
528
+ AND ("phase" IS NULL OR "phase" NOT IN ('Running', 'Succeeded'))
529
+ ORDER BY
530
+ CASE "phase"
531
+ WHEN 'Failed' THEN 0
532
+ WHEN 'Pending' THEN 1
533
+ ELSE 2
534
+ END,
535
+ "lastSeenAt" DESC
536
+ LIMIT ${DEGRADED_SAMPLE_LIMIT}`,
537
+ [data.projectId.toString(), data.kubernetesClusterId.toString()],
538
+ ),
539
+ manager.query(
540
+ `SELECT "name", "isReady", "hasMemoryPressure", "hasDiskPressure", "hasPidPressure", "status"
541
+ FROM "KubernetesResource"
542
+ WHERE "projectId" = $1
543
+ AND "kubernetesClusterId" = $2
544
+ AND "kind" = 'Node'
545
+ AND "deletedAt" IS NULL
546
+ AND (
547
+ "isReady" IS FALSE
548
+ OR "hasMemoryPressure" IS TRUE
549
+ OR "hasDiskPressure" IS TRUE
550
+ OR "hasPidPressure" IS TRUE
551
+ )
552
+ ORDER BY
553
+ CASE WHEN "isReady" IS FALSE THEN 0 ELSE 1 END,
554
+ "lastSeenAt" DESC
555
+ LIMIT ${DEGRADED_SAMPLE_LIMIT}`,
556
+ [data.projectId.toString(), data.kubernetesClusterId.toString()],
557
+ ),
268
558
  ]);
269
559
 
270
560
  const countsByKind: Record<string, number> = {};
@@ -308,6 +598,29 @@ export class Service extends DatabaseService<Model> {
308
598
  const containerCount: number =
309
599
  parseInt(containerRows[0]?.total || "0", 10) || 0;
310
600
 
601
+ const degradedPods: Array<DegradedPod> = degradedPodRows.map(
602
+ (row: {
603
+ name: string;
604
+ namespaceKey: string;
605
+ phase: string | null;
606
+ status: unknown;
607
+ }) => {
608
+ return buildDegradedPod(row);
609
+ },
610
+ );
611
+ const degradedNodes: Array<DegradedNode> = degradedNodeRows.map(
612
+ (row: {
613
+ name: string;
614
+ isReady: boolean | null;
615
+ hasMemoryPressure: boolean | null;
616
+ hasDiskPressure: boolean | null;
617
+ hasPidPressure: boolean | null;
618
+ status: unknown;
619
+ }) => {
620
+ return buildDegradedNode(row);
621
+ },
622
+ );
623
+
311
624
  return {
312
625
  countsByKind,
313
626
  containerCount,
@@ -321,6 +634,8 @@ export class Service extends DatabaseService<Model> {
321
634
  diskPressure: parseInt(nodeRow?.diskPressure || "0", 10) || 0,
322
635
  pidPressure: parseInt(nodeRow?.pidPressure || "0", 10) || 0,
323
636
  },
637
+ degradedPods,
638
+ degradedNodes,
324
639
  };
325
640
  }
326
641