@oneuptime/common 10.4.14 → 10.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/Models/AnalyticsModels/AnalyticsBaseModel/AnalyticsBaseModel.ts +49 -0
  2. package/Models/AnalyticsModels/AuditLog.ts +8 -0
  3. package/Models/AnalyticsModels/ExceptionInstance.ts +1 -0
  4. package/Models/AnalyticsModels/Log.ts +1 -0
  5. package/Models/AnalyticsModels/Metric.ts +10 -0
  6. package/Models/AnalyticsModels/MonitorLog.ts +1 -0
  7. package/Models/AnalyticsModels/Profile.ts +1 -0
  8. package/Models/AnalyticsModels/ProfileSample.ts +1 -0
  9. package/Models/AnalyticsModels/Span.ts +1 -0
  10. package/Models/DatabaseModels/AlertCustomField.ts +37 -0
  11. package/Models/DatabaseModels/IncidentCustomField.ts +37 -0
  12. package/Models/DatabaseModels/IncidentMember.ts +9 -0
  13. package/Models/DatabaseModels/MonitorCustomField.ts +37 -0
  14. package/Models/DatabaseModels/OnCallDutyPolicyCustomField.ts +37 -0
  15. package/Models/DatabaseModels/ScheduledMaintenanceCustomField.ts +37 -0
  16. package/Models/DatabaseModels/StatusPageCustomField.ts +37 -0
  17. package/Models/DatabaseModels/TableView.ts +40 -0
  18. package/Models/DatabaseModels/TeamMemberCustomField.ts +37 -0
  19. package/Server/API/BaseAnalyticsAPI.ts +128 -20
  20. package/Server/API/MetricAPI.ts +5 -138
  21. package/Server/API/StatusAPI.ts +103 -7
  22. package/Server/Infrastructure/Postgres/SchemaMigrations/1779536271671-AddFacetsToTableView.ts +13 -0
  23. package/Server/Infrastructure/Postgres/SchemaMigrations/1779540427366-AddIsMemberNotifiedIndex.ts +34 -0
  24. package/Server/Infrastructure/Postgres/SchemaMigrations/1779619108628-AddDropdownOptionsToCustomFields.ts +67 -0
  25. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +6 -0
  26. package/Server/Services/AccessTokenService.ts +1 -1
  27. package/Server/Services/AnalyticsDatabaseService.ts +24 -4
  28. package/Server/Services/MetricService.ts +113 -0
  29. package/Server/Services/ProjectService.ts +21 -1
  30. package/Server/Utils/Response.ts +4 -1
  31. package/Server/Utils/UserPermission/UserPermission.ts +17 -1
  32. package/Tests/Server/Services/AnalyticsDatabaseService.test.ts +2 -2
  33. package/Types/API/HTTPResponse.ts +16 -0
  34. package/Types/BaseDatabase/ListResult.ts +6 -0
  35. package/Types/CustomField/CustomFieldType.ts +2 -0
  36. package/Types/Date.ts +9 -1
  37. package/Types/ListData.ts +14 -0
  38. package/Types/Monitor/DnsMonitor/DnsMonitorResponse.ts +3 -0
  39. package/Types/Monitor/DnssecMonitor/DnssecMonitorResponse.ts +5 -0
  40. package/Types/Monitor/DomainMonitor/DomainMonitorResponse.ts +4 -0
  41. package/Types/Monitor/ExternalStatusPageMonitor/ExternalStatusPageMonitorResponse.ts +4 -0
  42. package/Types/Monitor/SnmpMonitor/SnmpMonitorResponse.ts +3 -0
  43. package/Types/Probe/ProbeAttempt.ts +9 -0
  44. package/Types/Probe/ProbeMonitorResponse.ts +3 -0
  45. package/UI/Components/BulkUpdate/BulkOwnerActions.tsx +504 -0
  46. package/UI/Components/BulkUpdate/BulkUpdateForm.tsx +64 -54
  47. package/UI/Components/CustomFields/CustomFieldsDetail.tsx +38 -0
  48. package/UI/Components/CustomFields/DropdownOptionsInput.tsx +150 -0
  49. package/UI/Components/Detail/Detail.tsx +78 -11
  50. package/UI/Components/List/List.tsx +6 -0
  51. package/UI/Components/ModelTable/BaseModelTable.tsx +74 -2
  52. package/UI/Components/ModelTable/TableView.tsx +74 -30
  53. package/UI/Components/Pagination/Pagination.tsx +75 -33
  54. package/UI/Components/Table/Table.tsx +6 -0
  55. package/UI/Utils/AnalyticsModelAPI/AnalyticsModelAPI.ts +1 -0
  56. package/build/dist/Models/AnalyticsModels/AnalyticsBaseModel/AnalyticsBaseModel.js +33 -0
  57. package/build/dist/Models/AnalyticsModels/AnalyticsBaseModel/AnalyticsBaseModel.js.map +1 -1
  58. package/build/dist/Models/AnalyticsModels/AuditLog.js +8 -0
  59. package/build/dist/Models/AnalyticsModels/AuditLog.js.map +1 -1
  60. package/build/dist/Models/AnalyticsModels/ExceptionInstance.js +1 -0
  61. package/build/dist/Models/AnalyticsModels/ExceptionInstance.js.map +1 -1
  62. package/build/dist/Models/AnalyticsModels/Log.js +1 -0
  63. package/build/dist/Models/AnalyticsModels/Log.js.map +1 -1
  64. package/build/dist/Models/AnalyticsModels/Metric.js +10 -0
  65. package/build/dist/Models/AnalyticsModels/Metric.js.map +1 -1
  66. package/build/dist/Models/AnalyticsModels/MonitorLog.js +1 -0
  67. package/build/dist/Models/AnalyticsModels/MonitorLog.js.map +1 -1
  68. package/build/dist/Models/AnalyticsModels/Profile.js +1 -0
  69. package/build/dist/Models/AnalyticsModels/Profile.js.map +1 -1
  70. package/build/dist/Models/AnalyticsModels/ProfileSample.js +1 -0
  71. package/build/dist/Models/AnalyticsModels/ProfileSample.js.map +1 -1
  72. package/build/dist/Models/AnalyticsModels/Span.js +1 -0
  73. package/build/dist/Models/AnalyticsModels/Span.js.map +1 -1
  74. package/build/dist/Models/DatabaseModels/AlertCustomField.js +38 -0
  75. package/build/dist/Models/DatabaseModels/AlertCustomField.js.map +1 -1
  76. package/build/dist/Models/DatabaseModels/IncidentCustomField.js +38 -0
  77. package/build/dist/Models/DatabaseModels/IncidentCustomField.js.map +1 -1
  78. package/build/dist/Models/DatabaseModels/IncidentMember.js +11 -1
  79. package/build/dist/Models/DatabaseModels/IncidentMember.js.map +1 -1
  80. package/build/dist/Models/DatabaseModels/MonitorCustomField.js +38 -0
  81. package/build/dist/Models/DatabaseModels/MonitorCustomField.js.map +1 -1
  82. package/build/dist/Models/DatabaseModels/OnCallDutyPolicyCustomField.js +38 -0
  83. package/build/dist/Models/DatabaseModels/OnCallDutyPolicyCustomField.js.map +1 -1
  84. package/build/dist/Models/DatabaseModels/ScheduledMaintenanceCustomField.js +38 -0
  85. package/build/dist/Models/DatabaseModels/ScheduledMaintenanceCustomField.js.map +1 -1
  86. package/build/dist/Models/DatabaseModels/StatusPageCustomField.js +38 -0
  87. package/build/dist/Models/DatabaseModels/StatusPageCustomField.js.map +1 -1
  88. package/build/dist/Models/DatabaseModels/TableView.js +40 -0
  89. package/build/dist/Models/DatabaseModels/TableView.js.map +1 -1
  90. package/build/dist/Models/DatabaseModels/TeamMemberCustomField.js +38 -0
  91. package/build/dist/Models/DatabaseModels/TeamMemberCustomField.js.map +1 -1
  92. package/build/dist/Server/API/BaseAnalyticsAPI.js +105 -18
  93. package/build/dist/Server/API/BaseAnalyticsAPI.js.map +1 -1
  94. package/build/dist/Server/API/MetricAPI.js +5 -113
  95. package/build/dist/Server/API/MetricAPI.js.map +1 -1
  96. package/build/dist/Server/API/StatusAPI.js +75 -8
  97. package/build/dist/Server/API/StatusAPI.js.map +1 -1
  98. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779536271671-AddFacetsToTableView.js +12 -0
  99. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779536271671-AddFacetsToTableView.js.map +1 -0
  100. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779540427366-AddIsMemberNotifiedIndex.js +27 -0
  101. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779540427366-AddIsMemberNotifiedIndex.js.map +1 -0
  102. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779619108628-AddDropdownOptionsToCustomFields.js +28 -0
  103. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1779619108628-AddDropdownOptionsToCustomFields.js.map +1 -0
  104. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +6 -0
  105. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  106. package/build/dist/Server/Services/AccessTokenService.js +1 -1
  107. package/build/dist/Server/Services/AccessTokenService.js.map +1 -1
  108. package/build/dist/Server/Services/AnalyticsDatabaseService.js +22 -3
  109. package/build/dist/Server/Services/AnalyticsDatabaseService.js.map +1 -1
  110. package/build/dist/Server/Services/MetricService.js +89 -0
  111. package/build/dist/Server/Services/MetricService.js.map +1 -1
  112. package/build/dist/Server/Services/ProjectService.js +19 -1
  113. package/build/dist/Server/Services/ProjectService.js.map +1 -1
  114. package/build/dist/Server/Utils/Response.js +6 -5
  115. package/build/dist/Server/Utils/Response.js.map +1 -1
  116. package/build/dist/Server/Utils/UserPermission/UserPermission.js +13 -1
  117. package/build/dist/Server/Utils/UserPermission/UserPermission.js.map +1 -1
  118. package/build/dist/Tests/Server/Services/AnalyticsDatabaseService.test.js +2 -2
  119. package/build/dist/Tests/Server/Services/AnalyticsDatabaseService.test.js.map +1 -1
  120. package/build/dist/Types/API/HTTPResponse.js +15 -0
  121. package/build/dist/Types/API/HTTPResponse.js.map +1 -1
  122. package/build/dist/Types/CustomField/CustomFieldType.js +2 -0
  123. package/build/dist/Types/CustomField/CustomFieldType.js.map +1 -1
  124. package/build/dist/Types/Date.js +10 -1
  125. package/build/dist/Types/Date.js.map +1 -1
  126. package/build/dist/Types/ListData.js +4 -0
  127. package/build/dist/Types/ListData.js.map +1 -1
  128. package/build/dist/Types/Probe/ProbeAttempt.js +2 -0
  129. package/build/dist/Types/Probe/ProbeAttempt.js.map +1 -0
  130. package/build/dist/UI/Components/BulkUpdate/BulkOwnerActions.js +376 -0
  131. package/build/dist/UI/Components/BulkUpdate/BulkOwnerActions.js.map +1 -0
  132. package/build/dist/UI/Components/BulkUpdate/BulkUpdateForm.js +32 -25
  133. package/build/dist/UI/Components/BulkUpdate/BulkUpdateForm.js.map +1 -1
  134. package/build/dist/UI/Components/CustomFields/CustomFieldsDetail.js +32 -0
  135. package/build/dist/UI/Components/CustomFields/CustomFieldsDetail.js.map +1 -1
  136. package/build/dist/UI/Components/CustomFields/DropdownOptionsInput.js +84 -0
  137. package/build/dist/UI/Components/CustomFields/DropdownOptionsInput.js.map +1 -0
  138. package/build/dist/UI/Components/Detail/Detail.js +34 -3
  139. package/build/dist/UI/Components/Detail/Detail.js.map +1 -1
  140. package/build/dist/UI/Components/List/List.js +1 -1
  141. package/build/dist/UI/Components/List/List.js.map +1 -1
  142. package/build/dist/UI/Components/ModelTable/BaseModelTable.js +45 -5
  143. package/build/dist/UI/Components/ModelTable/BaseModelTable.js.map +1 -1
  144. package/build/dist/UI/Components/ModelTable/TableView.js +44 -19
  145. package/build/dist/UI/Components/ModelTable/TableView.js.map +1 -1
  146. package/build/dist/UI/Components/Pagination/Pagination.js +62 -36
  147. package/build/dist/UI/Components/Pagination/Pagination.js.map +1 -1
  148. package/build/dist/UI/Components/Table/Table.js +1 -1
  149. package/build/dist/UI/Components/Table/Table.js.map +1 -1
  150. package/build/dist/UI/Utils/AnalyticsModelAPI/AnalyticsModelAPI.js +1 -0
  151. package/build/dist/UI/Utils/AnalyticsModelAPI/AnalyticsModelAPI.js.map +1 -1
  152. package/package.json +1 -1
@@ -29,6 +29,22 @@ import { UserPermission } from "../../Types/Permission";
29
29
  import PositiveNumber from "../../Types/PositiveNumber";
30
30
  import AggregatedResult from "../../Types/BaseDatabase/AggregatedResult";
31
31
  import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
32
+ import GlobalCache from "../Infrastructure/GlobalCache";
33
+ import logger from "../Utils/Logger";
34
+
35
+ /*
36
+ * Aggregate cache TTL. Dashboards typically auto-refresh every 30s+,
37
+ * so an 8s window collapses bursts of identical requests (e.g. 12
38
+ * widgets loading on the same page) onto a single ClickHouse query
39
+ * while still looking real-time to humans.
40
+ *
41
+ * Project-scoped only: analytics data is project-wide and the
42
+ * service layer enforces project-scoped read permissions, so
43
+ * caching across users within the same project is safe. Endpoints
44
+ * with row-level access scoping should override `getAggregate` to
45
+ * skip the cache (or shape the key to include the access scope).
46
+ */
47
+ const ANALYTICS_AGGREGATE_CACHE_TTL_SECONDS: number = 8;
32
48
 
33
49
  export default class BaseAnalyticsAPI<
34
50
  TAnalyticsDataModel extends AnalyticsDataModel,
@@ -268,29 +284,46 @@ export default class BaseAnalyticsAPI<
268
284
  const databaseProps: DatabaseCommonInteractionProps =
269
285
  await CommonAPI.getDatabaseCommonInteractionProps(req);
270
286
 
271
- const [list, count] = await Promise.all([
272
- this.service.findBy({
273
- query,
274
- select,
275
- skip: skip,
276
- limit: limit,
277
- sort: sort,
278
- groupBy: groupBy,
279
- props: databaseProps,
280
- }),
281
- this.service.countBy({
282
- query,
283
- groupBy: groupBy,
284
- props: databaseProps,
285
- }),
286
- ]);
287
+ /*
288
+ * Skip the parallel countBy on analytics tables. countBy on Log /
289
+ * Span / Metric over wide time ranges scans every matching block
290
+ * (no LIMIT) and routinely dominates list-endpoint latency under
291
+ * heavy ingest. Instead we over-fetch by one row and derive
292
+ * `hasMore` from whether the extra row showed up. `count` is
293
+ * emitted as a lower bound (`skip + data.length + hasMore`) so
294
+ * older clients that read `count` keep rendering something
295
+ * sensible while newer clients use `hasMore` for prev/next.
296
+ */
297
+ const overfetchLimit: PositiveNumber = new PositiveNumber(
298
+ limit.toNumber() + 1,
299
+ );
300
+
301
+ const list: Array<AnalyticsDataModel> = await this.service.findBy({
302
+ query,
303
+ select,
304
+ skip: skip,
305
+ limit: overfetchLimit,
306
+ sort: sort,
307
+ groupBy: groupBy,
308
+ props: databaseProps,
309
+ });
310
+
311
+ const hasMore: boolean = list.length > limit.toNumber();
312
+ if (hasMore) {
313
+ list.length = limit.toNumber();
314
+ }
315
+
316
+ const lowerBoundCount: PositiveNumber = new PositiveNumber(
317
+ skip.toNumber() + list.length + (hasMore ? 1 : 0),
318
+ );
287
319
 
288
320
  return Response.sendEntityArrayResponse(
289
321
  req,
290
322
  res,
291
323
  list,
292
- count,
324
+ lowerBoundCount,
293
325
  this.entityType,
326
+ { hasMore },
294
327
  );
295
328
  }
296
329
 
@@ -327,14 +360,89 @@ export default class BaseAnalyticsAPI<
327
360
  const databaseProps: DatabaseCommonInteractionProps =
328
361
  await CommonAPI.getDatabaseCommonInteractionProps(req);
329
362
 
363
+ /*
364
+ * Short-lived project-scoped cache. A dashboard refresh fires
365
+ * one /aggregate call per widget — typically 10+ identical or
366
+ * near-identical aggregations against the same time window
367
+ * inside a few hundred milliseconds. Cache the result for 8s
368
+ * so the underlying ClickHouse aggregation runs once per
369
+ * burst. On cache outage (Redis down, parse error, …) we fall
370
+ * through to a live query so behavior degrades to today's.
371
+ */
372
+ const projectId: string | undefined = databaseProps.tenantId?.toString();
373
+ const cacheNamespace: string = `${this.getEntityName()}-aggregate`;
374
+ const cacheKey: string | null = projectId
375
+ ? `${projectId}:${this.buildAggregateCacheKey(aggregateBy)}`
376
+ : null;
377
+
378
+ if (cacheKey) {
379
+ try {
380
+ const cached: JSONObject | null = await GlobalCache.getJSONObject(
381
+ cacheNamespace,
382
+ cacheKey,
383
+ );
384
+ if (cached) {
385
+ return Response.sendJsonObjectResponse(req, res, cached);
386
+ }
387
+ } catch (err) {
388
+ logger.debug(`${cacheNamespace} cache read failed`);
389
+ logger.debug(err);
390
+ }
391
+ }
392
+
330
393
  const aggregateResult: AggregatedResult = await this.service.aggregateBy({
331
394
  ...aggregateBy,
332
395
  props: databaseProps,
333
396
  });
334
397
 
335
- return Response.sendJsonObjectResponse(req, res, {
336
- ...(aggregateResult as any),
337
- });
398
+ const responseBody: JSONObject = { ...(aggregateResult as any) };
399
+
400
+ if (cacheKey) {
401
+ try {
402
+ await GlobalCache.setJSON(cacheNamespace, cacheKey, responseBody, {
403
+ expiresInSeconds: ANALYTICS_AGGREGATE_CACHE_TTL_SECONDS,
404
+ });
405
+ } catch (err) {
406
+ logger.debug(`${cacheNamespace} cache write failed`);
407
+ logger.debug(err);
408
+ }
409
+ }
410
+
411
+ return Response.sendJsonObjectResponse(req, res, responseBody);
412
+ }
413
+
414
+ /*
415
+ * Stable serialization for the aggregate cache key. Date instances
416
+ * are normalized to ISO so two logically-equal time windows hit
417
+ * the same cache slot, and we sort object keys so the ordering is
418
+ * deterministic across clients and across V8 versions.
419
+ */
420
+ protected buildAggregateCacheKey(
421
+ aggregateBy: AggregateBy<AnalyticsDataModel>,
422
+ ): string {
423
+ return JSON.stringify(
424
+ aggregateBy,
425
+ (_key: string, value: unknown): unknown => {
426
+ if (value instanceof Date) {
427
+ return value.toISOString();
428
+ }
429
+ if (
430
+ value &&
431
+ typeof value === "object" &&
432
+ !Array.isArray(value) &&
433
+ (value as Record<string, unknown>).constructor === Object
434
+ ) {
435
+ const sorted: Record<string, unknown> = {};
436
+ for (const k of Object.keys(
437
+ value as Record<string, unknown>,
438
+ ).sort()) {
439
+ sorted[k] = (value as Record<string, unknown>)[k];
440
+ }
441
+ return sorted;
442
+ }
443
+ return value;
444
+ },
445
+ );
338
446
  }
339
447
 
340
448
  @CaptureSpan()
@@ -1,149 +1,16 @@
1
- import AggregateBy from "../../Types/BaseDatabase/AggregateBy";
2
- import AggregatedResult from "../../Types/BaseDatabase/AggregatedResult";
3
- import DatabaseCommonInteractionProps from "../../Types/BaseDatabase/DatabaseCommonInteractionProps";
4
- import BadRequestException from "../../Types/Exception/BadRequestException";
5
- import { JSONObject } from "../../Types/JSON";
6
- import JSONFunctions from "../../Types/JSONFunctions";
7
1
  import Metric from "../../Models/AnalyticsModels/Metric";
8
2
  import { MetricService } from "../Services/MetricService";
9
- import GlobalCache from "../Infrastructure/GlobalCache";
10
- import logger from "../Utils/Logger";
11
- import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
12
- import { ExpressRequest, ExpressResponse } from "../Utils/Express";
13
- import Response from "../Utils/Response";
14
- import CommonAPI from "./CommonAPI";
15
3
  import BaseAnalyticsAPI from "./BaseAnalyticsAPI";
16
4
 
17
5
  /*
18
- * Aggregate cache TTL. Dashboards typically auto-refresh every 30s+, so
19
- * an 8s window collapses bursts of identical requests (e.g. 12 widgets
20
- * loading on the same page) onto a single ClickHouse query while still
21
- * looking real-time to humans.
6
+ * Metric CRUD + aggregate endpoints. The 8-second project-scoped
7
+ * aggregate cache that used to live here has been promoted to
8
+ * `BaseAnalyticsAPI.getAggregate` so Log/Span/AuditLog/etc. benefit
9
+ * from the same dashboard-widget-burst collapse without duplicating
10
+ * the wrapper on every analytics API.
22
11
  */
23
- const AGGREGATE_CACHE_NAMESPACE: string = "metric-aggregate";
24
- const AGGREGATE_CACHE_TTL_SECONDS: number = 8;
25
-
26
12
  export default class MetricAPI extends BaseAnalyticsAPI<Metric, MetricService> {
27
13
  public constructor(service: MetricService) {
28
14
  super(Metric, service);
29
15
  }
30
-
31
- /*
32
- * Cached override of BaseAnalyticsAPI.getAggregate.
33
- *
34
- * Why a cache: each chart/value/gauge/table widget on a dashboard
35
- * issues its own /aggregate call. With 10+ widgets and a small group
36
- * of users hitting the same dashboard the underlying ClickHouse
37
- * cluster sees the same heavy aggregation many times in close
38
- * succession. Aggregations are read-only and pure (same input ->
39
- * same output for the bucket interval), so a brief result cache is
40
- * safe.
41
- *
42
- * Cache key: tenant project + the deserialized aggregateBy payload.
43
- * We must include the project so cross-tenant collisions cannot
44
- * leak data; we deliberately do NOT key on user id, because the
45
- * service layer applies project-scoped read permissions and metric
46
- * data is project-wide.
47
- *
48
- * Cache miss / Redis down: we fall through to the live query, so
49
- * cache outages degrade to today's behavior, never error.
50
- */
51
- @CaptureSpan()
52
- public override async getAggregate(
53
- req: ExpressRequest,
54
- res: ExpressResponse,
55
- ): Promise<void> {
56
- await this.onBeforeList(req, res);
57
-
58
- let aggregateBy: AggregateBy<Metric> | null = null;
59
-
60
- if (req.body && req.body["aggregateBy"]) {
61
- aggregateBy = JSONFunctions.deserialize(
62
- req.body["aggregateBy"] as JSONObject,
63
- ) as any;
64
- }
65
-
66
- if (!aggregateBy) {
67
- throw new BadRequestException("AggregateBy is required");
68
- }
69
-
70
- const databaseProps: DatabaseCommonInteractionProps =
71
- await CommonAPI.getDatabaseCommonInteractionProps(req);
72
-
73
- const projectId: string | undefined = databaseProps.tenantId?.toString();
74
- const cacheKey: string | null = projectId
75
- ? `${projectId}:${this.buildCacheKey(aggregateBy)}`
76
- : null;
77
-
78
- if (cacheKey) {
79
- try {
80
- const cached: JSONObject | null = await GlobalCache.getJSONObject(
81
- AGGREGATE_CACHE_NAMESPACE,
82
- cacheKey,
83
- );
84
- if (cached) {
85
- return Response.sendJsonObjectResponse(req, res, cached);
86
- }
87
- } catch (err) {
88
- // Cache fetch failed — fall through to a live query.
89
- logger.debug("MetricAPI aggregate cache read failed");
90
- logger.debug(err);
91
- }
92
- }
93
-
94
- const aggregateResult: AggregatedResult = await this.service.aggregateBy({
95
- ...aggregateBy,
96
- props: databaseProps,
97
- });
98
-
99
- const responseBody: JSONObject = { ...(aggregateResult as any) };
100
-
101
- if (cacheKey) {
102
- try {
103
- await GlobalCache.setJSON(
104
- AGGREGATE_CACHE_NAMESPACE,
105
- cacheKey,
106
- responseBody,
107
- { expiresInSeconds: AGGREGATE_CACHE_TTL_SECONDS },
108
- );
109
- } catch (err) {
110
- logger.debug("MetricAPI aggregate cache write failed");
111
- logger.debug(err);
112
- }
113
- }
114
-
115
- return Response.sendJsonObjectResponse(req, res, responseBody);
116
- }
117
-
118
- private buildCacheKey(aggregateBy: AggregateBy<Metric>): string {
119
- /*
120
- * Stable serialization. Date instances are normalized to ISO so two
121
- * logically-equal time windows hit the same cache slot, and we sort
122
- * keys via JSON.stringify replacer to keep ordering deterministic
123
- * across clients and across versions of V8.
124
- */
125
- return JSON.stringify(
126
- aggregateBy,
127
- (_key: string, value: unknown): unknown => {
128
- if (value instanceof Date) {
129
- return value.toISOString();
130
- }
131
- if (
132
- value &&
133
- typeof value === "object" &&
134
- !Array.isArray(value) &&
135
- (value as Record<string, unknown>).constructor === Object
136
- ) {
137
- const sorted: Record<string, unknown> = {};
138
- for (const k of Object.keys(
139
- value as Record<string, unknown>,
140
- ).sort()) {
141
- sorted[k] = (value as Record<string, unknown>)[k];
142
- }
143
- return sorted;
144
- }
145
- return value;
146
- },
147
- );
148
- }
149
16
  }
@@ -1,4 +1,5 @@
1
1
  import BadRequestException from "../../Types/Exception/BadRequestException";
2
+ import InMemoryTTLCache from "../Infrastructure/InMemoryTTLCache";
2
3
  import LocalCache from "../Infrastructure/LocalCache";
3
4
  import Express, {
4
5
  ExpressRequest,
@@ -20,7 +21,93 @@ export interface StatusAPIOptions {
20
21
  databaseCheck?: (() => Promise<void>) | undefined;
21
22
  }
22
23
 
24
+ /**
25
+ * Result of a recently executed health check, cached for HEALTH_CHECK_CACHE_TTL_MS.
26
+ * We cache both success AND failure: caching failure protects an already
27
+ * unhealthy backend from being hammered by retry traffic during an outage. The
28
+ * 5s TTL is short enough that k8s probe semantics (default 10s interval,
29
+ * failureThreshold 3 → ~30s to unready) are essentially unchanged.
30
+ */
31
+ type CachedHealthCheckResult = { ok: true } | { ok: false; error: Error };
32
+
23
33
  export default class StatusAPI {
34
+ /**
35
+ * Cache of recent health check results, keyed by check name. Each entry
36
+ * lives for HEALTH_CHECK_CACHE_TTL_MS. Bounded to a small max size — there
37
+ * are only ~5 distinct check names in this API.
38
+ */
39
+ private static checkResultCache: InMemoryTTLCache<CachedHealthCheckResult> =
40
+ new InMemoryTTLCache<CachedHealthCheckResult>(64);
41
+
42
+ /**
43
+ * In-flight check promises keyed by check name. When a cache miss occurs
44
+ * and multiple concurrent requests arrive, they all attach to the same
45
+ * promise instead of each triggering their own DB query. The entry is
46
+ * cleared as soon as the check settles.
47
+ */
48
+ private static inflightChecks: Map<string, Promise<void>> = new Map();
49
+
50
+ /**
51
+ * Cache TTL for health-check results. Chosen so that:
52
+ * - Two-thirds of typical k8s probes (default periodSeconds=10) hit
53
+ * the cache, removing constant DB load from liveness/readiness traffic.
54
+ * - Time-to-detect for a failing dependency only grows by ≤5s, which is
55
+ * well within the failureThreshold window k8s probes already tolerate.
56
+ */
57
+ private static readonly HEALTH_CHECK_CACHE_TTL_MS: number = 5000;
58
+
59
+ /**
60
+ * Runs `checkFn` with two layers of protection:
61
+ * 1. TTL cache — if the same check ran in the last HEALTH_CHECK_CACHE_TTL_MS
62
+ * ms, reuse its result (success or failure) without re-running.
63
+ * 2. Single-flight — if a check is already in flight, concurrent callers
64
+ * await the same promise instead of starting their own.
65
+ *
66
+ * On cache hit this is effectively free; on cache miss we run the check
67
+ * exactly once regardless of how many requests arrived concurrently.
68
+ */
69
+ private static async runCachedCheck(
70
+ checkName: string,
71
+ checkFn: () => Promise<void>,
72
+ ): Promise<void> {
73
+ const cached: CachedHealthCheckResult | undefined =
74
+ this.checkResultCache.get(checkName);
75
+ if (cached) {
76
+ if (cached.ok) {
77
+ return;
78
+ }
79
+ throw cached.error;
80
+ }
81
+
82
+ let inflight: Promise<void> | undefined =
83
+ this.inflightChecks.get(checkName);
84
+ if (!inflight) {
85
+ inflight = (async (): Promise<void> => {
86
+ try {
87
+ await checkFn();
88
+ this.checkResultCache.set(
89
+ checkName,
90
+ { ok: true },
91
+ this.HEALTH_CHECK_CACHE_TTL_MS,
92
+ );
93
+ } catch (e) {
94
+ const error: Error = e instanceof Error ? e : new Error(String(e));
95
+ this.checkResultCache.set(
96
+ checkName,
97
+ { ok: false, error },
98
+ this.HEALTH_CHECK_CACHE_TTL_MS,
99
+ );
100
+ throw error;
101
+ } finally {
102
+ this.inflightChecks.delete(checkName);
103
+ }
104
+ })();
105
+ this.inflightChecks.set(checkName, inflight);
106
+ }
107
+
108
+ await inflight;
109
+ }
110
+
24
111
  @CaptureSpan()
25
112
  public static init(options: StatusAPIOptions): ExpressRouter {
26
113
  const statusCheckSuccessCounter: TelemetryCounter = Telemetry.getCounter({
@@ -127,8 +214,11 @@ export default class StatusAPI {
127
214
  res: ExpressResponse,
128
215
  ): Promise<void> {
129
216
  try {
130
- logger.info("Ready check: Init", getLogAttributesFromRequest(req as any));
131
- await options.readyCheck();
217
+ /*
218
+ * Cached for HEALTH_CHECK_CACHE_TTL_MS so k8s probe traffic does not
219
+ * hammer the underlying check on every request.
220
+ */
221
+ await this.runCachedCheck("ready", options.readyCheck);
132
222
  logger.info("Ready check: ok", getLogAttributesFromRequest(req as any));
133
223
  stausReadySuccess.add(1);
134
224
 
@@ -160,8 +250,8 @@ export default class StatusAPI {
160
250
  res: ExpressResponse,
161
251
  ): Promise<void> {
162
252
  try {
163
- logger.info("Live check: Init", getLogAttributesFromRequest(req as any));
164
- await options.liveCheck();
253
+ // Cached for HEALTH_CHECK_CACHE_TTL_MS see runCachedCheck for rationale.
254
+ await this.runCachedCheck("live", options.liveCheck);
165
255
  logger.info("Live check: ok", getLogAttributesFromRequest(req as any));
166
256
  stausLiveSuccess.add(1);
167
257
 
@@ -195,7 +285,8 @@ export default class StatusAPI {
195
285
  getLogAttributesFromRequest(req as any),
196
286
  );
197
287
  if (options.globalCacheCheck) {
198
- await options.globalCacheCheck();
288
+ // Cached — see runCachedCheck for rationale.
289
+ await this.runCachedCheck("global-cache", options.globalCacheCheck);
199
290
  } else {
200
291
  throw new BadRequestException("Global cache check not implemented");
201
292
  }
@@ -230,7 +321,11 @@ export default class StatusAPI {
230
321
  getLogAttributesFromRequest(req as any),
231
322
  );
232
323
  if (options.analyticsDatabaseCheck) {
233
- await options.analyticsDatabaseCheck();
324
+ // Cached — see runCachedCheck for rationale.
325
+ await this.runCachedCheck(
326
+ "analytics-database",
327
+ options.analyticsDatabaseCheck,
328
+ );
234
329
  } else {
235
330
  throw new BadRequestException(
236
331
  "Analytics database check not implemented",
@@ -265,7 +360,8 @@ export default class StatusAPI {
265
360
  logger.debug("Database check", getLogAttributesFromRequest(req as any));
266
361
 
267
362
  if (options.databaseCheck) {
268
- await options.databaseCheck();
363
+ // Cached — see runCachedCheck for rationale.
364
+ await this.runCachedCheck("database", options.databaseCheck);
269
365
  } else {
270
366
  throw new BadRequestException("Database check not implemented");
271
367
  }
@@ -0,0 +1,13 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ export class AddFacetsToTableView1779536271671 implements MigrationInterface {
4
+ public name: string = "AddFacetsToTableView1779536271671";
5
+
6
+ public async up(queryRunner: QueryRunner): Promise<void> {
7
+ await queryRunner.query(`ALTER TABLE "TableView" ADD "facets" jsonb`);
8
+ }
9
+
10
+ public async down(queryRunner: QueryRunner): Promise<void> {
11
+ await queryRunner.query(`ALTER TABLE "TableView" DROP COLUMN "facets"`);
12
+ }
13
+ }
@@ -0,0 +1,34 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ /*
4
+ * Adds an index on `IncidentMember.isMemberNotified`. The
5
+ * `IncidentMembers/SendMemberAddedNotification` cron polls
6
+ * `WHERE isMemberNotified = false` every minute; without the
7
+ * index that's a full table scan that scales with every
8
+ * incident-member assignment ever made.
9
+ *
10
+ * The autogenerated diff also picked up unrelated drift between
11
+ * the local dev database and the current entity models (drops
12
+ * of `User.themePreference`, `StatusPage.showGridView`,
13
+ * `StatusPageResource.statusPageGridColumnId`, and ALTER COLUMN
14
+ * default changes on `OnCallDutyPolicyScheduleLayer`). Those
15
+ * changes belong to other branches and must not run here — they
16
+ * were stripped to keep this migration tightly scoped.
17
+ */
18
+ export class AddIsMemberNotifiedIndex1779540427366
19
+ implements MigrationInterface
20
+ {
21
+ public name: string = "AddIsMemberNotifiedIndex1779540427366";
22
+
23
+ public async up(queryRunner: QueryRunner): Promise<void> {
24
+ await queryRunner.query(
25
+ `CREATE INDEX "IDX_0c2358ce5ea46732d76c4cbb26" ON "IncidentMember" ("isMemberNotified") `,
26
+ );
27
+ }
28
+
29
+ public async down(queryRunner: QueryRunner): Promise<void> {
30
+ await queryRunner.query(
31
+ `DROP INDEX "public"."IDX_0c2358ce5ea46732d76c4cbb26"`,
32
+ );
33
+ }
34
+ }
@@ -0,0 +1,67 @@
1
+ import { MigrationInterface, QueryRunner } from "typeorm";
2
+
3
+ export class AddDropdownOptionsToCustomFields1779619108628
4
+ implements MigrationInterface
5
+ {
6
+ public name: string = "AddDropdownOptionsToCustomFields1779619108628";
7
+
8
+ public async up(queryRunner: QueryRunner): Promise<void> {
9
+ await queryRunner.query(
10
+ `ALTER TABLE "IncidentCustomField" ADD "dropdownOptions" character varying(500)`,
11
+ );
12
+ await queryRunner.query(
13
+ `ALTER TABLE "MonitorCustomField" ADD "dropdownOptions" character varying(500)`,
14
+ );
15
+ await queryRunner.query(
16
+ `ALTER TABLE "OnCallDutyPolicyCustomField" ADD "dropdownOptions" character varying(500)`,
17
+ );
18
+ await queryRunner.query(
19
+ `ALTER TABLE "ScheduledMaintenanceCustomField" ADD "dropdownOptions" character varying(500)`,
20
+ );
21
+ await queryRunner.query(
22
+ `ALTER TABLE "StatusPageCustomField" ADD "dropdownOptions" character varying(500)`,
23
+ );
24
+ await queryRunner.query(
25
+ `ALTER TABLE "TeamMemberCustomField" ADD "dropdownOptions" character varying(500)`,
26
+ );
27
+ await queryRunner.query(
28
+ `ALTER TABLE "AlertCustomField" ADD "dropdownOptions" character varying(500)`,
29
+ );
30
+ await queryRunner.query(
31
+ `ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type":"Recurring","value":{"intervalType":"Day","intervalCount":{"_type":"PositiveNumber","value":1}}}'`,
32
+ );
33
+ await queryRunner.query(
34
+ `ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type":"RestrictionTimes","value":{"restictionType":"None","dayRestrictionTimes":null,"weeklyRestrictionTimes":[]}}'`,
35
+ );
36
+ }
37
+
38
+ public async down(queryRunner: QueryRunner): Promise<void> {
39
+ await queryRunner.query(
40
+ `ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type": "RestrictionTimes", "value": {"restictionType": "None", "dayRestrictionTimes": null, "weeklyRestrictionTimes": []}}'`,
41
+ );
42
+ await queryRunner.query(
43
+ `ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type": "Recurring", "value": {"intervalType": "Day", "intervalCount": {"_type": "PositiveNumber", "value": 1}}}'`,
44
+ );
45
+ await queryRunner.query(
46
+ `ALTER TABLE "AlertCustomField" DROP COLUMN "dropdownOptions"`,
47
+ );
48
+ await queryRunner.query(
49
+ `ALTER TABLE "TeamMemberCustomField" DROP COLUMN "dropdownOptions"`,
50
+ );
51
+ await queryRunner.query(
52
+ `ALTER TABLE "StatusPageCustomField" DROP COLUMN "dropdownOptions"`,
53
+ );
54
+ await queryRunner.query(
55
+ `ALTER TABLE "ScheduledMaintenanceCustomField" DROP COLUMN "dropdownOptions"`,
56
+ );
57
+ await queryRunner.query(
58
+ `ALTER TABLE "OnCallDutyPolicyCustomField" DROP COLUMN "dropdownOptions"`,
59
+ );
60
+ await queryRunner.query(
61
+ `ALTER TABLE "MonitorCustomField" DROP COLUMN "dropdownOptions"`,
62
+ );
63
+ await queryRunner.query(
64
+ `ALTER TABLE "IncidentCustomField" DROP COLUMN "dropdownOptions"`,
65
+ );
66
+ }
67
+ }
@@ -344,6 +344,9 @@ import { AttachKubernetesAndDockerToIncidentAndAlert1779302536475 } from "./1779
344
344
  import { AttachServiceToIncidentAndAlert1779303924241 } from "./1779303924241-AttachServiceToIncidentAndAlert";
345
345
  import { AddAgentVersionToKubernetesDockerHost1779392865146 } from "./1779392865146-AddAgentVersionToKubernetesDockerHost";
346
346
  import { AddPerformanceIndexes1779392970424 } from "./1779392970424-AddPerformanceIndexes";
347
+ import { AddFacetsToTableView1779536271671 } from "./1779536271671-AddFacetsToTableView";
348
+ import { AddIsMemberNotifiedIndex1779540427366 } from "./1779540427366-AddIsMemberNotifiedIndex";
349
+ import { AddDropdownOptionsToCustomFields1779619108628 } from "./1779619108628-AddDropdownOptionsToCustomFields";
347
350
  export default [
348
351
  InitialMigration,
349
352
  MigrationName1717678334852,
@@ -691,4 +694,7 @@ export default [
691
694
  AttachServiceToIncidentAndAlert1779303924241,
692
695
  AddAgentVersionToKubernetesDockerHost1779392865146,
693
696
  AddPerformanceIndexes1779392970424,
697
+ AddFacetsToTableView1779536271671,
698
+ AddIsMemberNotifiedIndex1779540427366,
699
+ AddDropdownOptionsToCustomFields1779619108628,
694
700
  ];
@@ -199,7 +199,7 @@ export class AccessTokenService extends BaseService {
199
199
 
200
200
  await GlobalCache.setJSON(
201
201
  PermissionNamespace.ProjectPermission,
202
- userId.toString() + projectId.toString(),
202
+ UserPermissionUtil.buildTenantPermissionCacheKey(userId, projectId),
203
203
  permission,
204
204
  );
205
205
 
@@ -535,9 +535,19 @@ export default class AnalyticsDatabaseService<
535
535
  ): Promise<Array<TBaseModel>> {
536
536
  try {
537
537
  if (!findBy.sort || Object.keys(findBy.sort).length === 0) {
538
+ /*
539
+ * Default sort uses the model's declared `defaultSortColumn`
540
+ * (e.g. `time` for Log, `startTime` for Span) so the query
541
+ * streams from the ClickHouse sort key. The historical
542
+ * fallback of `createdAt` is not in the sort key on most
543
+ * analytics tables, which triggered a full sort even on
544
+ * small LIMITed queries.
545
+ */
546
+ const defaultSortColumn: string =
547
+ this.model.defaultSortColumn || "createdAt";
538
548
  findBy.sort = {
539
- createdAt: SortOrder.Descending,
540
- };
549
+ [defaultSortColumn]: SortOrder.Descending,
550
+ } as any;
541
551
 
542
552
  if (!findBy.select) {
543
553
  findBy.select = {} as any;
@@ -910,10 +920,20 @@ export default class AnalyticsDatabaseService<
910
920
  deleteBy.query
911
921
  );
912
922
 
923
+ /*
924
+ * Use ClickHouse lightweight deletes (`DELETE FROM`) rather than
925
+ * `ALTER TABLE … DELETE`. The latter creates an async mutation that
926
+ * rewrites whole parts and is bounded by `number_of_mutations_to_throw`
927
+ * (default 1000). Customers with chatty state transitions hit that
928
+ * ceiling and every subsequent delete fails with TOO_MANY_MUTATIONS.
929
+ * Lightweight deletes mark rows via the hidden `_row_exists` column
930
+ * and are reconciled during normal merges, so they don't accumulate
931
+ * in the mutations queue.
932
+ */
913
933
  /* eslint-disable prettier/prettier */
914
934
  const statement: Statement = SQL`
915
- ALTER TABLE ${databaseName}.${this.model.tableName}
916
- DELETE WHERE TRUE `.append(whereStatement);
935
+ DELETE FROM ${databaseName}.${this.model.tableName}
936
+ WHERE TRUE `.append(whereStatement);
917
937
 
918
938
  logger.debug(`${this.model.tableName} Delete Statement`, { tableName: this.model.tableName } as LogAttributes);
919
939
  logger.debug(statement, { tableName: this.model.tableName } as LogAttributes);