@oneuptime/common 10.0.84 → 10.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/Models/DatabaseModels/Index.ts +2 -0
  2. package/Models/DatabaseModels/KubernetesContainer.ts +552 -0
  3. package/Models/DatabaseModels/KubernetesResource.ts +130 -0
  4. package/Models/DatabaseModels/LlmLog.ts +2 -1
  5. package/Models/DatabaseModels/LlmProvider.ts +5 -4
  6. package/Models/DatabaseModels/Project.ts +40 -0
  7. package/Server/API/KubernetesResourceAPI.ts +144 -12
  8. package/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.ts +29 -0
  9. package/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.ts +99 -0
  10. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +4 -0
  11. package/Server/Infrastructure/Queue.ts +60 -0
  12. package/Server/Infrastructure/QueueWorker.ts +39 -1
  13. package/Server/Middleware/HttpMetricsMiddleware.ts +92 -0
  14. package/Server/Services/AuditLogService.ts +19 -1
  15. package/Server/Services/KubernetesContainerService.ts +264 -0
  16. package/Server/Services/KubernetesResourceService.ts +233 -0
  17. package/Server/Services/StatusPageSubscriberService.ts +4 -4
  18. package/Server/Types/Database/Permissions/AccessControlPermission.ts +3 -3
  19. package/Server/Utils/LLM/LLMService.ts +132 -11
  20. package/Server/Utils/Monitor/MonitorAlert.ts +1 -1
  21. package/Server/Utils/Monitor/MonitorIncident.ts +1 -1
  22. package/Server/Utils/StartServer.ts +2 -0
  23. package/Server/Utils/Telemetry/AppMetrics.ts +211 -0
  24. package/Server/Utils/Telemetry/RuntimeMetrics.ts +169 -0
  25. package/Server/Utils/Telemetry.ts +98 -0
  26. package/Server/Utils/Workspace/Slack/Actions/Alert.ts +2 -2
  27. package/Server/Utils/Workspace/Slack/Actions/Incident.ts +2 -2
  28. package/Server/Utils/Workspace/Slack/Actions/ScheduledMaintenance.ts +2 -2
  29. package/Tests/jest.setup.ts +18 -0
  30. package/Types/Kubernetes/KubernetesInventoryExtractor.ts +171 -5
  31. package/Types/LLM/LlmType.ts +3 -0
  32. package/UI/Components/Forms/ModelForm.tsx +3 -3
  33. package/UI/Components/LogsViewer/components/LogsAnalyticsView.tsx +2 -2
  34. package/Utils/UUID.ts +1 -3
  35. package/build/dist/Models/DatabaseModels/Index.js +2 -0
  36. package/build/dist/Models/DatabaseModels/Index.js.map +1 -1
  37. package/build/dist/Models/DatabaseModels/KubernetesContainer.js +581 -0
  38. package/build/dist/Models/DatabaseModels/KubernetesContainer.js.map +1 -0
  39. package/build/dist/Models/DatabaseModels/KubernetesResource.js +135 -0
  40. package/build/dist/Models/DatabaseModels/KubernetesResource.js.map +1 -1
  41. package/build/dist/Models/DatabaseModels/LlmLog.js +1 -1
  42. package/build/dist/Models/DatabaseModels/LlmLog.js.map +1 -1
  43. package/build/dist/Models/DatabaseModels/LlmProvider.js +4 -4
  44. package/build/dist/Models/DatabaseModels/LlmProvider.js.map +1 -1
  45. package/build/dist/Models/DatabaseModels/Project.js +41 -0
  46. package/build/dist/Models/DatabaseModels/Project.js.map +1 -1
  47. package/build/dist/Server/API/KubernetesResourceAPI.js +106 -9
  48. package/build/dist/Server/API/KubernetesResourceAPI.js.map +1 -1
  49. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.js +16 -0
  50. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.js.map +1 -0
  51. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.js +40 -0
  52. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.js.map +1 -0
  53. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +4 -0
  54. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  55. package/build/dist/Server/Infrastructure/Queue.js +44 -0
  56. package/build/dist/Server/Infrastructure/Queue.js.map +1 -1
  57. package/build/dist/Server/Infrastructure/QueueWorker.js +31 -1
  58. package/build/dist/Server/Infrastructure/QueueWorker.js.map +1 -1
  59. package/build/dist/Server/Middleware/HttpMetricsMiddleware.js +61 -0
  60. package/build/dist/Server/Middleware/HttpMetricsMiddleware.js.map +1 -0
  61. package/build/dist/Server/Services/AuditLogService.js +14 -1
  62. package/build/dist/Server/Services/AuditLogService.js.map +1 -1
  63. package/build/dist/Server/Services/KubernetesContainerService.js +179 -0
  64. package/build/dist/Server/Services/KubernetesContainerService.js.map +1 -0
  65. package/build/dist/Server/Services/KubernetesResourceService.js +175 -0
  66. package/build/dist/Server/Services/KubernetesResourceService.js.map +1 -1
  67. package/build/dist/Server/Services/StatusPageSubscriberService.js +4 -4
  68. package/build/dist/Server/Services/StatusPageSubscriberService.js.map +1 -1
  69. package/build/dist/Server/Types/Database/Permissions/AccessControlPermission.js +3 -3
  70. package/build/dist/Server/Utils/LLM/LLMService.js +111 -13
  71. package/build/dist/Server/Utils/LLM/LLMService.js.map +1 -1
  72. package/build/dist/Server/Utils/Monitor/MonitorAlert.js +1 -1
  73. package/build/dist/Server/Utils/Monitor/MonitorAlert.js.map +1 -1
  74. package/build/dist/Server/Utils/Monitor/MonitorIncident.js +1 -1
  75. package/build/dist/Server/Utils/Monitor/MonitorIncident.js.map +1 -1
  76. package/build/dist/Server/Utils/StartServer.js +2 -0
  77. package/build/dist/Server/Utils/StartServer.js.map +1 -1
  78. package/build/dist/Server/Utils/Telemetry/AppMetrics.js +167 -0
  79. package/build/dist/Server/Utils/Telemetry/AppMetrics.js.map +1 -0
  80. package/build/dist/Server/Utils/Telemetry/RuntimeMetrics.js +141 -0
  81. package/build/dist/Server/Utils/Telemetry/RuntimeMetrics.js.map +1 -0
  82. package/build/dist/Server/Utils/Telemetry.js +47 -0
  83. package/build/dist/Server/Utils/Telemetry.js.map +1 -1
  84. package/build/dist/Server/Utils/Workspace/Slack/Actions/Alert.js +2 -2
  85. package/build/dist/Server/Utils/Workspace/Slack/Actions/Incident.js +2 -2
  86. package/build/dist/Server/Utils/Workspace/Slack/Actions/ScheduledMaintenance.js +2 -2
  87. package/build/dist/Tests/jest.setup.js +17 -0
  88. package/build/dist/Tests/jest.setup.js.map +1 -1
  89. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js +116 -4
  90. package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js.map +1 -1
  91. package/build/dist/Types/LLM/LlmType.js +3 -0
  92. package/build/dist/Types/LLM/LlmType.js.map +1 -1
  93. package/build/dist/UI/Components/Forms/ModelForm.js +3 -3
  94. package/build/dist/UI/Components/LogsViewer/components/LogsAnalyticsView.js.map +1 -1
  95. package/build/dist/Utils/UUID.js +1 -2
  96. package/build/dist/Utils/UUID.js.map +1 -1
  97. package/package.json +6 -8
@@ -39,6 +39,7 @@ const NAME_CANDIDATE_FIELDS: ReadonlyArray<string> = [
39
39
  interface CachedProjectSettings {
40
40
  enableAuditLogs: boolean;
41
41
  retentionInDays: number;
42
+ storeSystemEventsInAuditLogs: boolean;
42
43
  planName: PlanType | undefined;
43
44
  expiresAt: number;
44
45
  }
@@ -263,7 +264,7 @@ export class AuditLogService extends AnalyticsDatabaseService<AuditLog> {
263
264
 
264
265
  private isEligible(
265
266
  settings: CachedProjectSettings | null,
266
- _props: DatabaseCommonInteractionProps,
267
+ props: DatabaseCommonInteractionProps,
267
268
  ): boolean {
268
269
  if (!settings) {
269
270
  return false;
@@ -273,6 +274,10 @@ export class AuditLogService extends AnalyticsDatabaseService<AuditLog> {
273
274
  return false;
274
275
  }
275
276
 
277
+ if (!settings.storeSystemEventsInAuditLogs && this.isSystemEvent(props)) {
278
+ return false;
279
+ }
280
+
276
281
  if (IsEnterpriseEdition) {
277
282
  return true;
278
283
  }
@@ -288,6 +293,15 @@ export class AuditLogService extends AnalyticsDatabaseService<AuditLog> {
288
293
  return false;
289
294
  }
290
295
 
296
+ private isSystemEvent(props: DatabaseCommonInteractionProps): boolean {
297
+ /*
298
+ * A system event is one not initiated by a user — no userType is set and
299
+ * the operation is running with root privileges (e.g. background jobs,
300
+ * internal cleanup tasks).
301
+ */
302
+ return !props.userType && Boolean(props.isRoot);
303
+ }
304
+
291
305
  private async getProjectSettings(
292
306
  projectId: ObjectID,
293
307
  ): Promise<CachedProjectSettings | null> {
@@ -306,6 +320,7 @@ export class AuditLogService extends AnalyticsDatabaseService<AuditLog> {
306
320
  _id: true,
307
321
  enableAuditLogs: true,
308
322
  auditLogsRetentionInDays: true,
323
+ storeSystemEventsInAuditLogs: true,
309
324
  planName: true,
310
325
  },
311
326
  props: { isRoot: true },
@@ -318,6 +333,9 @@ export class AuditLogService extends AnalyticsDatabaseService<AuditLog> {
318
333
  const settings: CachedProjectSettings = {
319
334
  enableAuditLogs: Boolean(project.enableAuditLogs),
320
335
  retentionInDays: project.auditLogsRetentionInDays ?? 7,
336
+ storeSystemEventsInAuditLogs: Boolean(
337
+ project.storeSystemEventsInAuditLogs,
338
+ ),
321
339
  planName: project.planName,
322
340
  expiresAt: now + PROJECT_SETTINGS_CACHE_TTL_MS,
323
341
  };
@@ -0,0 +1,264 @@
1
+ import DatabaseService from "./DatabaseService";
2
+ import Model from "../../Models/DatabaseModels/KubernetesContainer";
3
+ import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
4
+ import ObjectID from "../../Types/ObjectID";
5
+ import logger from "../Utils/Logger";
6
+
7
+ /*
8
+ * ------------------------------------------------------------------
9
+ * KubernetesContainerService
10
+ *
11
+ * Inventory + latest-metric writes for container rows. Mirrors the
12
+ * shape of KubernetesResourceService but keyed by
13
+ * (podNamespaceKey, podName, name) since containers don't have a
14
+ * top-level metadata.name of their own.
15
+ *
16
+ * Callers:
17
+ * - OtelLogsIngestService -> bulkUpsert (snapshot)
18
+ * - OtelMetricsIngestService -> bulkUpdateLatestMetrics
19
+ * - CleanupStaleResources worker -> deleteStaleForCluster
20
+ * ------------------------------------------------------------------
21
+ */
22
+
23
+ export interface ParsedKubernetesContainer {
24
+ podNamespaceKey: string;
25
+ podName: string;
26
+ name: string;
27
+ image: string | null;
28
+ state: string | null;
29
+ reason: string | null;
30
+ isReady: boolean | null;
31
+ restartCount: number | null;
32
+ memoryLimitBytes: number | null;
33
+ lastSeenAt: Date;
34
+ }
35
+
36
+ export interface ContainerLatestMetric {
37
+ podNamespaceKey: string;
38
+ podName: string;
39
+ name: string;
40
+ cpuPercent: number | null;
41
+ memoryBytes: number | null;
42
+ observedAt: Date;
43
+ }
44
+
45
+ const UPSERT_BATCH_SIZE: number = 500;
46
+ const STALE_DELETE_WARN_THRESHOLD: number = 500;
47
+
48
+ const UPSERT_COLUMNS: Array<string> = [
49
+ "projectId",
50
+ "kubernetesClusterId",
51
+ "podNamespaceKey",
52
+ "podName",
53
+ "name",
54
+ "image",
55
+ "state",
56
+ "reason",
57
+ "isReady",
58
+ "restartCount",
59
+ "memoryLimitBytes",
60
+ "lastSeenAt",
61
+ "version",
62
+ ];
63
+
64
+ export class Service extends DatabaseService<Model> {
65
+ public constructor() {
66
+ super(Model);
67
+ }
68
+
69
+ /**
70
+ * Upsert a batch of parsed containers for a single (project, cluster)
71
+ * pair. Uses ON CONFLICT on the UNIQUE (projectId, clusterId,
72
+ * podNamespaceKey, podName, name) index with a dominance guard on
73
+ * lastSeenAt so out-of-order ingest never regresses a newer snapshot.
74
+ *
75
+ * Note: the upsert deliberately leaves latestCpuPercent /
76
+ * latestMemoryBytes / metricsUpdatedAt untouched — those are owned
77
+ * by the separate metric write path.
78
+ */
79
+ @CaptureSpan()
80
+ public async bulkUpsert(data: {
81
+ projectId: ObjectID;
82
+ kubernetesClusterId: ObjectID;
83
+ containers: Array<ParsedKubernetesContainer>;
84
+ }): Promise<void> {
85
+ if (data.containers.length === 0) {
86
+ return;
87
+ }
88
+
89
+ for (
90
+ let i: number = 0;
91
+ i < data.containers.length;
92
+ i += UPSERT_BATCH_SIZE
93
+ ) {
94
+ const chunk: Array<ParsedKubernetesContainer> = data.containers.slice(
95
+ i,
96
+ i + UPSERT_BATCH_SIZE,
97
+ );
98
+
99
+ const valueFragments: Array<string> = [];
100
+ const params: Array<unknown> = [];
101
+ let paramIndex: number = 1;
102
+
103
+ for (const c of chunk) {
104
+ const placeholders: Array<string> = [];
105
+ for (let p: number = 0; p < UPSERT_COLUMNS.length; p++) {
106
+ placeholders.push(`$${paramIndex++}`);
107
+ }
108
+ valueFragments.push(`(${placeholders.join(", ")})`);
109
+
110
+ params.push(
111
+ data.projectId.toString(),
112
+ data.kubernetesClusterId.toString(),
113
+ c.podNamespaceKey,
114
+ c.podName,
115
+ c.name,
116
+ c.image,
117
+ c.state,
118
+ c.reason,
119
+ c.isReady,
120
+ c.restartCount,
121
+ c.memoryLimitBytes !== null && c.memoryLimitBytes !== undefined
122
+ ? Math.trunc(c.memoryLimitBytes).toString()
123
+ : null,
124
+ c.lastSeenAt,
125
+ 0, // version (BaseModel @VersionColumn)
126
+ );
127
+ }
128
+
129
+ const sql: string = `
130
+ INSERT INTO "KubernetesContainer" (
131
+ "projectId", "kubernetesClusterId",
132
+ "podNamespaceKey", "podName", "name",
133
+ "image", "state", "reason", "isReady", "restartCount",
134
+ "memoryLimitBytes", "lastSeenAt", "version"
135
+ )
136
+ VALUES ${valueFragments.join(", ")}
137
+ ON CONFLICT ("projectId", "kubernetesClusterId", "podNamespaceKey", "podName", "name")
138
+ DO UPDATE SET
139
+ "image" = EXCLUDED."image",
140
+ "state" = EXCLUDED."state",
141
+ "reason" = EXCLUDED."reason",
142
+ "isReady" = EXCLUDED."isReady",
143
+ "restartCount" = EXCLUDED."restartCount",
144
+ "memoryLimitBytes" = EXCLUDED."memoryLimitBytes",
145
+ "lastSeenAt" = EXCLUDED."lastSeenAt",
146
+ "updatedAt" = now()
147
+ WHERE EXCLUDED."lastSeenAt" >= "KubernetesContainer"."lastSeenAt"
148
+ `;
149
+
150
+ await this.getRepository().manager.query(sql, params);
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Hard-delete all containers in a cluster whose last snapshot is
156
+ * older than olderThan. Returns the number of deleted rows.
157
+ */
158
+ @CaptureSpan()
159
+ public async deleteStaleForCluster(data: {
160
+ kubernetesClusterId: ObjectID;
161
+ olderThan: Date;
162
+ }): Promise<number> {
163
+ const result: Array<{ affected?: number }> | { affected?: number } =
164
+ await this.getRepository().manager.query(
165
+ `DELETE FROM "KubernetesContainer" WHERE "kubernetesClusterId" = $1 AND "lastSeenAt" < $2`,
166
+ [data.kubernetesClusterId.toString(), data.olderThan],
167
+ );
168
+
169
+ let affected: number = 0;
170
+ if (Array.isArray(result) && result.length >= 2) {
171
+ const second: unknown = (result as Array<unknown>)[1];
172
+ if (typeof second === "number") {
173
+ affected = second;
174
+ }
175
+ }
176
+
177
+ if (affected > STALE_DELETE_WARN_THRESHOLD) {
178
+ logger.warn(
179
+ `KubernetesContainer cleanup deleted ${affected} stale rows for cluster ${data.kubernetesClusterId.toString()} — larger than expected; investigate agent health.`,
180
+ );
181
+ }
182
+
183
+ return affected;
184
+ }
185
+
186
+ /**
187
+ * Update latestCpuPercent/latestMemoryBytes/metricsUpdatedAt for a
188
+ * batch of containers. Plain UPDATE — if the snapshot row doesn't
189
+ * exist yet, the metric write is silently skipped; the next k8s
190
+ * snapshot creates the row and the next metric flush catches up.
191
+ *
192
+ * The WHERE guard ensures out-of-order metric points don't regress
193
+ * a newer observation.
194
+ */
195
+ @CaptureSpan()
196
+ public async bulkUpdateLatestMetrics(data: {
197
+ projectId: ObjectID;
198
+ kubernetesClusterId: ObjectID;
199
+ metrics: Array<ContainerLatestMetric>;
200
+ }): Promise<void> {
201
+ if (data.metrics.length === 0) {
202
+ return;
203
+ }
204
+
205
+ /*
206
+ * Build a single CTE-style UPDATE per chunk: VALUES table joined to
207
+ * the live table on the natural key. Cheaper than firing one UPDATE
208
+ * per row and atomic per chunk.
209
+ */
210
+ for (let i: number = 0; i < data.metrics.length; i += UPSERT_BATCH_SIZE) {
211
+ const chunk: Array<ContainerLatestMetric> = data.metrics.slice(
212
+ i,
213
+ i + UPSERT_BATCH_SIZE,
214
+ );
215
+
216
+ const valueFragments: Array<string> = [];
217
+ const params: Array<unknown> = [
218
+ data.projectId.toString(),
219
+ data.kubernetesClusterId.toString(),
220
+ ];
221
+ let paramIndex: number = 3;
222
+
223
+ for (const m of chunk) {
224
+ valueFragments.push(
225
+ `($${paramIndex++}, $${paramIndex++}, $${paramIndex++}, $${paramIndex++}::numeric, $${paramIndex++}::bigint, $${paramIndex++}::timestamptz)`,
226
+ );
227
+ params.push(
228
+ m.podNamespaceKey,
229
+ m.podName,
230
+ m.name,
231
+ m.cpuPercent !== null && m.cpuPercent !== undefined
232
+ ? m.cpuPercent
233
+ : null,
234
+ m.memoryBytes !== null && m.memoryBytes !== undefined
235
+ ? Math.trunc(m.memoryBytes).toString()
236
+ : null,
237
+ m.observedAt,
238
+ );
239
+ }
240
+
241
+ const sql: string = `
242
+ UPDATE "KubernetesContainer" AS k
243
+ SET
244
+ "latestCpuPercent" = COALESCE(v."cpu", k."latestCpuPercent"),
245
+ "latestMemoryBytes" = COALESCE(v."mem", k."latestMemoryBytes"),
246
+ "metricsUpdatedAt" = v."observedAt",
247
+ "updatedAt" = now()
248
+ FROM (VALUES ${valueFragments.join(", ")})
249
+ AS v("ns", "pod", "name", "cpu", "mem", "observedAt")
250
+ WHERE
251
+ k."projectId" = $1
252
+ AND k."kubernetesClusterId" = $2
253
+ AND k."podNamespaceKey" = v."ns"
254
+ AND k."podName" = v."pod"
255
+ AND k."name" = v."name"
256
+ AND (k."metricsUpdatedAt" IS NULL OR v."observedAt" >= k."metricsUpdatedAt")
257
+ `;
258
+
259
+ await this.getRepository().manager.query(sql, params);
260
+ }
261
+ }
262
+ }
263
+
264
+ export default new Service();
@@ -40,6 +40,23 @@ export interface DegradedNode {
40
40
  message: string;
41
41
  }
42
42
 
43
+ export interface ResourceLatestMetric {
44
+ kind: string;
45
+ namespaceKey: string;
46
+ name: string;
47
+ cpuPercent: number | null;
48
+ memoryBytes: number | null;
49
+ observedAt: Date;
50
+ /*
51
+ * Optional Pod controller lineage. Read from
52
+ * resource.k8s.deployment.name / resource.k8s.cronjob.name on the
53
+ * metric stream. Persisted via COALESCE so once written they stick
54
+ * even if a later batch lacks the attribute.
55
+ */
56
+ controllerDeploymentName?: string | null;
57
+ controllerCronJobName?: string | null;
58
+ }
59
+
43
60
  export interface InventorySummary {
44
61
  countsByKind: Record<string, number>;
45
62
  /*
@@ -395,6 +412,83 @@ export class Service extends DatabaseService<Model> {
395
412
  }
396
413
  }
397
414
 
415
+ /**
416
+ * Update latestCpuPercent / latestMemoryBytes / metricsUpdatedAt for
417
+ * a batch of resources (typically Pods or Nodes). Plain UPDATE: if
418
+ * the snapshot row doesn't exist yet, the metric write is silently
419
+ * skipped — the next k8sobjects snapshot creates the row, and the
420
+ * next metric flush catches up.
421
+ *
422
+ * Guarded by metricsUpdatedAt so out-of-order points don't regress
423
+ * a newer observation.
424
+ */
425
+ @CaptureSpan()
426
+ public async bulkUpdateLatestMetrics(data: {
427
+ projectId: ObjectID;
428
+ kubernetesClusterId: ObjectID;
429
+ metrics: Array<ResourceLatestMetric>;
430
+ }): Promise<void> {
431
+ if (data.metrics.length === 0) {
432
+ return;
433
+ }
434
+
435
+ for (let i: number = 0; i < data.metrics.length; i += UPSERT_BATCH_SIZE) {
436
+ const chunk: Array<ResourceLatestMetric> = data.metrics.slice(
437
+ i,
438
+ i + UPSERT_BATCH_SIZE,
439
+ );
440
+
441
+ const valueFragments: Array<string> = [];
442
+ const params: Array<unknown> = [
443
+ data.projectId.toString(),
444
+ data.kubernetesClusterId.toString(),
445
+ ];
446
+ let paramIndex: number = 3;
447
+
448
+ for (const m of chunk) {
449
+ valueFragments.push(
450
+ `($${paramIndex++}, $${paramIndex++}, $${paramIndex++}, $${paramIndex++}::numeric, $${paramIndex++}::bigint, $${paramIndex++}::timestamptz, $${paramIndex++}, $${paramIndex++})`,
451
+ );
452
+ params.push(
453
+ m.kind,
454
+ m.namespaceKey,
455
+ m.name,
456
+ m.cpuPercent !== null && m.cpuPercent !== undefined
457
+ ? m.cpuPercent
458
+ : null,
459
+ m.memoryBytes !== null && m.memoryBytes !== undefined
460
+ ? Math.trunc(m.memoryBytes).toString()
461
+ : null,
462
+ m.observedAt,
463
+ m.controllerDeploymentName ?? null,
464
+ m.controllerCronJobName ?? null,
465
+ );
466
+ }
467
+
468
+ const sql: string = `
469
+ UPDATE "KubernetesResource" AS k
470
+ SET
471
+ "latestCpuPercent" = COALESCE(v."cpu", k."latestCpuPercent"),
472
+ "latestMemoryBytes" = COALESCE(v."mem", k."latestMemoryBytes"),
473
+ "metricsUpdatedAt" = v."observedAt",
474
+ "controllerDeploymentName" = COALESCE(v."deployName", k."controllerDeploymentName"),
475
+ "controllerCronJobName" = COALESCE(v."cronName", k."controllerCronJobName"),
476
+ "updatedAt" = now()
477
+ FROM (VALUES ${valueFragments.join(", ")})
478
+ AS v("kind", "ns", "name", "cpu", "mem", "observedAt", "deployName", "cronName")
479
+ WHERE
480
+ k."projectId" = $1
481
+ AND k."kubernetesClusterId" = $2
482
+ AND k."kind" = v."kind"
483
+ AND k."namespaceKey" = v."ns"
484
+ AND k."name" = v."name"
485
+ AND (k."metricsUpdatedAt" IS NULL OR v."observedAt" >= k."metricsUpdatedAt")
486
+ `;
487
+
488
+ await this.getRepository().manager.query(sql, params);
489
+ }
490
+ }
491
+
398
492
  /**
399
493
  * Hard-delete all resources in a cluster whose last snapshot is
400
494
  * older than olderThan. Returns the number of deleted rows.
@@ -639,6 +733,145 @@ export class Service extends DatabaseService<Model> {
639
733
  };
640
734
  }
641
735
 
736
+ /**
737
+ * Aggregate the latest pod CPU/memory by namespace. Used by the
738
+ * Namespaces list view, replacing the prior ClickHouse groupBy
739
+ * scan. Only counts pods whose metricsUpdatedAt is within the
740
+ * staleness window so we don't surface stale numbers as current.
741
+ */
742
+ @CaptureSpan()
743
+ public async getLatestMetricsByNamespace(data: {
744
+ projectId: ObjectID;
745
+ kubernetesClusterId: ObjectID;
746
+ staleAfter: Date;
747
+ }): Promise<Map<string, { cpuPercent: number; memoryBytes: number }>> {
748
+ const rows: Array<{
749
+ namespaceKey: string;
750
+ cpu: string | null;
751
+ mem: string | null;
752
+ }> = await this.getRepository().manager.query(
753
+ `SELECT "namespaceKey",
754
+ SUM("latestCpuPercent")::text AS cpu,
755
+ SUM("latestMemoryBytes")::text AS mem
756
+ FROM "KubernetesResource"
757
+ WHERE "projectId" = $1
758
+ AND "kubernetesClusterId" = $2
759
+ AND "kind" = 'Pod'
760
+ AND "deletedAt" IS NULL
761
+ AND "metricsUpdatedAt" IS NOT NULL
762
+ AND "metricsUpdatedAt" >= $3
763
+ GROUP BY "namespaceKey"`,
764
+ [
765
+ data.projectId.toString(),
766
+ data.kubernetesClusterId.toString(),
767
+ data.staleAfter,
768
+ ],
769
+ );
770
+
771
+ const out: Map<string, { cpuPercent: number; memoryBytes: number }> =
772
+ new Map();
773
+ for (const row of rows) {
774
+ out.set(row.namespaceKey || "", {
775
+ cpuPercent: row.cpu ? parseFloat(row.cpu) || 0 : 0,
776
+ memoryBytes: row.mem ? parseInt(row.mem, 10) || 0 : 0,
777
+ });
778
+ }
779
+ return out;
780
+ }
781
+
782
+ /**
783
+ * Aggregate the latest pod CPU/memory by owner (Deployment /
784
+ * StatefulSet / DaemonSet / Job / CronJob).
785
+ *
786
+ * Direct-owner kinds (StatefulSet, DaemonSet, Job) read from the
787
+ * Pod's ownerReferences JSONB. Indirect-owner kinds (Deployment,
788
+ * CronJob) read from the denormalized controllerDeploymentName /
789
+ * controllerCronJobName columns populated by the metric ingest
790
+ * path — Pods don't directly own to those kinds, so we can't walk
791
+ * ownerReferences for them.
792
+ *
793
+ * Returns a Map keyed by owner name. Pods without recent metrics
794
+ * (metricsUpdatedAt past the staleness cutoff) are excluded so the
795
+ * sum reflects "right now," not "ever observed."
796
+ */
797
+ @CaptureSpan()
798
+ public async getLatestMetricsByOwner(data: {
799
+ projectId: ObjectID;
800
+ kubernetesClusterId: ObjectID;
801
+ ownerKind: string;
802
+ staleAfter: Date;
803
+ }): Promise<Map<string, { cpuPercent: number; memoryBytes: number }>> {
804
+ let rows: Array<{
805
+ ownerName: string;
806
+ cpu: string | null;
807
+ mem: string | null;
808
+ }>;
809
+
810
+ if (data.ownerKind === "Deployment" || data.ownerKind === "CronJob") {
811
+ const column: string =
812
+ data.ownerKind === "Deployment"
813
+ ? "controllerDeploymentName"
814
+ : "controllerCronJobName";
815
+ rows = await this.getRepository().manager.query(
816
+ `SELECT
817
+ "${column}" AS "ownerName",
818
+ SUM("latestCpuPercent")::text AS cpu,
819
+ SUM("latestMemoryBytes")::text AS mem
820
+ FROM "KubernetesResource"
821
+ WHERE "projectId" = $1
822
+ AND "kubernetesClusterId" = $2
823
+ AND "kind" = 'Pod'
824
+ AND "deletedAt" IS NULL
825
+ AND "metricsUpdatedAt" IS NOT NULL
826
+ AND "metricsUpdatedAt" >= $3
827
+ AND "${column}" IS NOT NULL
828
+ GROUP BY "${column}"`,
829
+ [
830
+ data.projectId.toString(),
831
+ data.kubernetesClusterId.toString(),
832
+ data.staleAfter,
833
+ ],
834
+ );
835
+ } else {
836
+ rows = await this.getRepository().manager.query(
837
+ `SELECT
838
+ (owner->>'name') AS "ownerName",
839
+ SUM("latestCpuPercent")::text AS cpu,
840
+ SUM("latestMemoryBytes")::text AS mem
841
+ FROM "KubernetesResource",
842
+ jsonb_array_elements("ownerReferences"->'items') AS owner
843
+ WHERE "projectId" = $1
844
+ AND "kubernetesClusterId" = $2
845
+ AND "kind" = 'Pod'
846
+ AND "deletedAt" IS NULL
847
+ AND "metricsUpdatedAt" IS NOT NULL
848
+ AND "metricsUpdatedAt" >= $3
849
+ AND "ownerReferences" IS NOT NULL
850
+ AND owner->>'kind' = $4
851
+ GROUP BY (owner->>'name')`,
852
+ [
853
+ data.projectId.toString(),
854
+ data.kubernetesClusterId.toString(),
855
+ data.staleAfter,
856
+ data.ownerKind,
857
+ ],
858
+ );
859
+ }
860
+
861
+ const out: Map<string, { cpuPercent: number; memoryBytes: number }> =
862
+ new Map();
863
+ for (const row of rows) {
864
+ if (!row.ownerName) {
865
+ continue;
866
+ }
867
+ out.set(row.ownerName, {
868
+ cpuPercent: row.cpu ? parseFloat(row.cpu) || 0 : 0,
869
+ memoryBytes: row.mem ? parseInt(row.mem, 10) || 0 : 0,
870
+ });
871
+ }
872
+ return out;
873
+ }
874
+
642
875
  /**
643
876
  * Helper for the cleanup worker: snapshot-interval aware cutoff.
644
877
  * 3× the 5-minute snapshot interval. Tune via CLEANUP_THRESHOLD_MINUTES.
@@ -463,14 +463,14 @@ export class Service extends DatabaseService<Model> {
463
463
  logger.debug("Subscriber has an email.", {
464
464
  projectId: createdItem.projectId?.toString(),
465
465
  } as LogAttributes);
466
- const isSubcriptionConfirmed: boolean = Boolean(
466
+ const isSubscriptionConfirmed: boolean = Boolean(
467
467
  createdItem.isSubscriptionConfirmed,
468
468
  );
469
- logger.debug(`Is Subscription Confirmed: ${isSubcriptionConfirmed}`, {
469
+ logger.debug(`Is Subscription Confirmed: ${isSubscriptionConfirmed}`, {
470
470
  projectId: createdItem.projectId?.toString(),
471
471
  } as LogAttributes);
472
472
 
473
- if (!isSubcriptionConfirmed) {
473
+ if (!isSubscriptionConfirmed) {
474
474
  logger.debug(
475
475
  "Subscription is not confirmed. Sending confirmation email.",
476
476
  { projectId: createdItem.projectId?.toString() } as LogAttributes,
@@ -480,7 +480,7 @@ export class Service extends DatabaseService<Model> {
480
480
  });
481
481
  }
482
482
 
483
- if (isSubcriptionConfirmed && createdItem.sendYouHaveSubscribedMessage) {
483
+ if (isSubscriptionConfirmed && createdItem.sendYouHaveSubscribedMessage) {
484
484
  logger.debug(
485
485
  "Subscription is confirmed and sendYouHaveSubscribedMessage is true. Sending 'You have subscribed' email.",
486
486
  { projectId: createdItem.projectId?.toString() } as LogAttributes,
@@ -149,10 +149,10 @@ export default class AccessControlPermission {
149
149
  model.getAccessControlColumn();
150
150
 
151
151
  if (modelAccessControlColumnName) {
152
- const accessControlIdsWhcihUserHasAccessTo: Array<ObjectID> =
152
+ const accessControlIdsWhichUserHasAccessTo: Array<ObjectID> =
153
153
  this.getAccessControlIdsForModel(modelType, props, type);
154
154
 
155
- if (accessControlIdsWhcihUserHasAccessTo.length === 0) {
155
+ if (accessControlIdsWhichUserHasAccessTo.length === 0) {
156
156
  return; // The user has access to all resources, if no labels are specified.
157
157
  }
158
158
 
@@ -164,7 +164,7 @@ export default class AccessControlPermission {
164
164
  }
165
165
 
166
166
  const accessControlIdsWhichUserHasAccessToAsStrings: Array<string> =
167
- accessControlIdsWhcihUserHasAccessTo.map((id: ObjectID) => {
167
+ accessControlIdsWhichUserHasAccessTo.map((id: ObjectID) => {
168
168
  return id.toString();
169
169
  }) || [];
170
170