@oneuptime/common 10.0.54 → 10.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/Models/DatabaseModels/DockerHost.ts +662 -0
  2. package/Models/DatabaseModels/GlobalConfig.ts +112 -0
  3. package/Models/DatabaseModels/Index.ts +2 -0
  4. package/Server/API/TelemetryAPI.ts +352 -16
  5. package/Server/Infrastructure/ClickhouseConfig.ts +9 -0
  6. package/Server/Infrastructure/Postgres/SchemaMigrations/1774000000002-MigrationName.ts +76 -0
  7. package/Server/Infrastructure/Postgres/SchemaMigrations/1775766676723-MigrationName.ts +133 -0
  8. package/Server/Infrastructure/Postgres/SchemaMigrations/1775900000000-AddGlobalSmtpOAuth.ts +51 -0
  9. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +6 -0
  10. package/Server/Services/DockerHostService.ts +173 -0
  11. package/Server/Services/ExceptionAggregationService.ts +335 -0
  12. package/Server/Services/Index.ts +2 -0
  13. package/Server/Services/LogAggregationService.ts +17 -0
  14. package/Server/Services/MonitorProbeService.ts +42 -21
  15. package/Server/Services/MonitorService.ts +21 -21
  16. package/Server/Services/TraceAggregationService.ts +514 -0
  17. package/Server/Utils/Monitor/MonitorCriteriaEvaluator.ts +73 -1
  18. package/Tests/Server/Services/LogAggregationService.test.ts +2 -2
  19. package/Tests/__mocks__/mermaid.js +18 -0
  20. package/Tests/__mocks__/react-markdown.js +17 -0
  21. package/Tests/__mocks__/react-syntax-highlighter.js +19 -0
  22. package/Tests/__mocks__/remark-gfm.js +8 -0
  23. package/Types/Icon/IconProp.ts +1 -0
  24. package/Types/Monitor/DockerAlertTemplates.ts +507 -0
  25. package/Types/Monitor/DockerMetricCatalog.ts +226 -0
  26. package/Types/Monitor/MonitorStep.ts +33 -0
  27. package/Types/Monitor/MonitorStepDockerMonitor.ts +38 -0
  28. package/Types/Monitor/MonitorType.ts +15 -1
  29. package/Types/Permission.ts +38 -0
  30. package/UI/Components/Icon/Icon.tsx +87 -0
  31. package/UI/Components/Markdown.tsx/MarkdownEditor.tsx +7 -132
  32. package/UI/Components/ModelDetail/CardModelDetail.tsx +11 -1
  33. package/UI/Components/TelemetryViewer/TelemetryViewer.tsx +285 -0
  34. package/UI/Components/TelemetryViewer/components/TelemetryActiveFilterChips.tsx +85 -0
  35. package/UI/Components/TelemetryViewer/components/TelemetryDetailPanel.tsx +156 -0
  36. package/UI/Components/TelemetryViewer/components/TelemetryFacetSection.tsx +160 -0
  37. package/UI/Components/TelemetryViewer/components/TelemetryFacetSidebar.tsx +85 -0
  38. package/UI/Components/TelemetryViewer/components/TelemetryFacetValueRow.tsx +102 -0
  39. package/UI/Components/TelemetryViewer/components/TelemetryHistogram.tsx +280 -0
  40. package/UI/Components/TelemetryViewer/components/TelemetryHistogramTooltip.tsx +125 -0
  41. package/UI/Components/TelemetryViewer/components/TelemetryPagination.tsx +114 -0
  42. package/UI/Components/TelemetryViewer/components/TelemetrySearchBar.tsx +378 -0
  43. package/UI/Components/TelemetryViewer/components/TelemetrySearchHelp.tsx +78 -0
  44. package/UI/Components/TelemetryViewer/components/TelemetrySearchSuggestions.tsx +64 -0
  45. package/UI/Components/TelemetryViewer/components/TelemetryTimeRangePicker.tsx +193 -0
  46. package/UI/Components/TelemetryViewer/types.ts +67 -0
  47. package/build/dist/Models/DatabaseModels/DockerHost.js +686 -0
  48. package/build/dist/Models/DatabaseModels/DockerHost.js.map +1 -0
  49. package/build/dist/Models/DatabaseModels/GlobalConfig.js +117 -0
  50. package/build/dist/Models/DatabaseModels/GlobalConfig.js.map +1 -1
  51. package/build/dist/Models/DatabaseModels/Index.js +2 -0
  52. package/build/dist/Models/DatabaseModels/Index.js.map +1 -1
  53. package/build/dist/Server/API/TelemetryAPI.js +237 -16
  54. package/build/dist/Server/API/TelemetryAPI.js.map +1 -1
  55. package/build/dist/Server/Infrastructure/ClickhouseConfig.js +9 -0
  56. package/build/dist/Server/Infrastructure/ClickhouseConfig.js.map +1 -1
  57. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1774000000002-MigrationName.js +35 -0
  58. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1774000000002-MigrationName.js.map +1 -0
  59. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1775766676723-MigrationName.js +52 -0
  60. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1775766676723-MigrationName.js.map +1 -0
  61. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1775900000000-AddGlobalSmtpOAuth.js +26 -0
  62. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1775900000000-AddGlobalSmtpOAuth.js.map +1 -0
  63. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +6 -0
  64. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  65. package/build/dist/Server/Services/DockerHostService.js +162 -0
  66. package/build/dist/Server/Services/DockerHostService.js.map +1 -0
  67. package/build/dist/Server/Services/ExceptionAggregationService.js +224 -0
  68. package/build/dist/Server/Services/ExceptionAggregationService.js.map +1 -0
  69. package/build/dist/Server/Services/Index.js +2 -0
  70. package/build/dist/Server/Services/Index.js.map +1 -1
  71. package/build/dist/Server/Services/LogAggregationService.js +11 -0
  72. package/build/dist/Server/Services/LogAggregationService.js.map +1 -1
  73. package/build/dist/Server/Services/MonitorProbeService.js +28 -14
  74. package/build/dist/Server/Services/MonitorProbeService.js.map +1 -1
  75. package/build/dist/Server/Services/MonitorService.js +19 -17
  76. package/build/dist/Server/Services/MonitorService.js.map +1 -1
  77. package/build/dist/Server/Services/TraceAggregationService.js +364 -0
  78. package/build/dist/Server/Services/TraceAggregationService.js.map +1 -0
  79. package/build/dist/Server/Utils/Monitor/MonitorCriteriaEvaluator.js +46 -1
  80. package/build/dist/Server/Utils/Monitor/MonitorCriteriaEvaluator.js.map +1 -1
  81. package/build/dist/Tests/Server/Services/LogAggregationService.test.js +2 -2
  82. package/build/dist/Tests/Server/Services/LogAggregationService.test.js.map +1 -1
  83. package/build/dist/Types/Icon/IconProp.js +1 -0
  84. package/build/dist/Types/Icon/IconProp.js.map +1 -1
  85. package/build/dist/Types/Monitor/DockerAlertTemplates.js +410 -0
  86. package/build/dist/Types/Monitor/DockerAlertTemplates.js.map +1 -0
  87. package/build/dist/Types/Monitor/DockerMetricCatalog.js +192 -0
  88. package/build/dist/Types/Monitor/DockerMetricCatalog.js.map +1 -0
  89. package/build/dist/Types/Monitor/MonitorStep.js +23 -0
  90. package/build/dist/Types/Monitor/MonitorStep.js.map +1 -1
  91. package/build/dist/Types/Monitor/MonitorStepDockerMonitor.js +21 -0
  92. package/build/dist/Types/Monitor/MonitorStepDockerMonitor.js.map +1 -0
  93. package/build/dist/Types/Monitor/MonitorType.js +14 -1
  94. package/build/dist/Types/Monitor/MonitorType.js.map +1 -1
  95. package/build/dist/Types/Permission.js +36 -0
  96. package/build/dist/Types/Permission.js.map +1 -1
  97. package/build/dist/UI/Components/Icon/Icon.js +13 -0
  98. package/build/dist/UI/Components/Icon/Icon.js.map +1 -1
  99. package/build/dist/UI/Components/Markdown.tsx/MarkdownEditor.js +7 -75
  100. package/build/dist/UI/Components/Markdown.tsx/MarkdownEditor.js.map +1 -1
  101. package/build/dist/UI/Components/ModelDetail/CardModelDetail.js +8 -1
  102. package/build/dist/UI/Components/ModelDetail/CardModelDetail.js.map +1 -1
  103. package/build/dist/UI/Components/TelemetryViewer/TelemetryViewer.js +71 -0
  104. package/build/dist/UI/Components/TelemetryViewer/TelemetryViewer.js.map +1 -0
  105. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryActiveFilterChips.js +39 -0
  106. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryActiveFilterChips.js.map +1 -0
  107. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryDetailPanel.js +61 -0
  108. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryDetailPanel.js.map +1 -0
  109. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetSection.js +66 -0
  110. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetSection.js.map +1 -0
  111. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetSidebar.js +41 -0
  112. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetSidebar.js.map +1 -0
  113. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetValueRow.js +35 -0
  114. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryFacetValueRow.js.map +1 -0
  115. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryHistogram.js +132 -0
  116. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryHistogram.js.map +1 -0
  117. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryHistogramTooltip.js +65 -0
  118. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryHistogramTooltip.js.map +1 -0
  119. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryPagination.js +52 -0
  120. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryPagination.js.map +1 -0
  121. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchBar.js +224 -0
  122. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchBar.js.map +1 -0
  123. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchHelp.js +35 -0
  124. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchHelp.js.map +1 -0
  125. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchSuggestions.js +27 -0
  126. package/build/dist/UI/Components/TelemetryViewer/components/TelemetrySearchSuggestions.js.map +1 -0
  127. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryTimeRangePicker.js +97 -0
  128. package/build/dist/UI/Components/TelemetryViewer/components/TelemetryTimeRangePicker.js.map +1 -0
  129. package/build/dist/UI/Components/TelemetryViewer/types.js +6 -0
  130. package/build/dist/UI/Components/TelemetryViewer/types.js.map +1 -0
  131. package/jest.config.json +6 -1
  132. package/package.json +1 -1
@@ -0,0 +1,507 @@
1
+ import ObjectID from "../ObjectID";
2
+ import MonitorStep from "./MonitorStep";
3
+ import MonitorCriteria from "./MonitorCriteria";
4
+ import MonitorCriteriaInstance from "./MonitorCriteriaInstance";
5
+ import FilterCondition from "../Filter/FilterCondition";
6
+ import { CheckOn, FilterType, EvaluateOverTimeType } from "./CriteriaFilter";
7
+ import MonitorStepDockerMonitor from "./MonitorStepDockerMonitor";
8
+ import RollingTime from "../RollingTime/RollingTime";
9
+ import MetricsAggregationType from "../Metrics/MetricsAggregationType";
10
+
11
+ export type DockerAlertTemplateCategory = "Container" | "Resource" | "Host";
12
+
13
+ export type DockerAlertTemplateSeverity = "Critical" | "Warning";
14
+
15
+ export interface DockerAlertTemplateArgs {
16
+ hostIdentifier: string;
17
+ onlineMonitorStatusId: ObjectID;
18
+ offlineMonitorStatusId: ObjectID;
19
+ defaultIncidentSeverityId: ObjectID;
20
+ defaultAlertSeverityId: ObjectID;
21
+ monitorName: string;
22
+ }
23
+
24
+ export interface DockerAlertTemplate {
25
+ id: string;
26
+ name: string;
27
+ description: string;
28
+ category: DockerAlertTemplateCategory;
29
+ severity: DockerAlertTemplateSeverity;
30
+ getMonitorStep: (args: DockerAlertTemplateArgs) => MonitorStep;
31
+ }
32
+
33
+ export function buildDockerMonitorStep(args: {
34
+ dockerMonitor: MonitorStepDockerMonitor;
35
+ offlineCriteriaInstance: MonitorCriteriaInstance;
36
+ onlineCriteriaInstance: MonitorCriteriaInstance;
37
+ }): MonitorStep {
38
+ const monitorStep: MonitorStep = new MonitorStep();
39
+
40
+ const monitorCriteria: MonitorCriteria = new MonitorCriteria();
41
+
42
+ monitorCriteria.data = {
43
+ monitorCriteriaInstanceArray: [
44
+ args.offlineCriteriaInstance,
45
+ args.onlineCriteriaInstance,
46
+ ],
47
+ };
48
+
49
+ monitorStep.data = {
50
+ id: ObjectID.generate().toString(),
51
+ monitorDestination: undefined,
52
+ doNotFollowRedirects: undefined,
53
+ monitorDestinationPort: undefined,
54
+ monitorCriteria: monitorCriteria,
55
+ requestType: "GET" as any,
56
+ requestHeaders: undefined,
57
+ requestBody: undefined,
58
+ customCode: undefined,
59
+ screenSizeTypes: undefined,
60
+ browserTypes: undefined,
61
+ retryCountOnError: undefined,
62
+ logMonitor: undefined,
63
+ traceMonitor: undefined,
64
+ metricMonitor: undefined,
65
+ exceptionMonitor: undefined,
66
+ snmpMonitor: undefined,
67
+ dnsMonitor: undefined,
68
+ domainMonitor: undefined,
69
+ externalStatusPageMonitor: undefined,
70
+ kubernetesMonitor: undefined,
71
+ profileMonitor: undefined,
72
+ dockerMonitor: args.dockerMonitor,
73
+ };
74
+
75
+ return monitorStep;
76
+ }
77
+
78
+ export function buildDockerOfflineCriteriaInstance(args: {
79
+ offlineMonitorStatusId: ObjectID;
80
+ incidentSeverityId: ObjectID;
81
+ alertSeverityId: ObjectID;
82
+ monitorName: string;
83
+ metricAlias: string;
84
+ filterType: FilterType;
85
+ value: number;
86
+ incidentTitle?: string;
87
+ incidentDescription?: string;
88
+ criteriaName?: string;
89
+ criteriaDescription?: string;
90
+ }): MonitorCriteriaInstance {
91
+ const instance: MonitorCriteriaInstance = new MonitorCriteriaInstance();
92
+
93
+ const incidentTitle: string =
94
+ args.incidentTitle || `${args.monitorName} - Alert Triggered`;
95
+ const incidentDescription: string =
96
+ args.incidentDescription ||
97
+ `${args.monitorName} has triggered an alert condition. See root cause for detailed Docker container information.`;
98
+
99
+ instance.data = {
100
+ id: ObjectID.generate().toString(),
101
+ monitorStatusId: args.offlineMonitorStatusId,
102
+ filterCondition: FilterCondition.Any,
103
+ filters: [
104
+ {
105
+ checkOn: CheckOn.MetricValue,
106
+ filterType: args.filterType,
107
+ metricMonitorOptions: {
108
+ metricAggregationType: EvaluateOverTimeType.AnyValue,
109
+ metricAlias: args.metricAlias,
110
+ },
111
+ value: args.value,
112
+ },
113
+ ],
114
+ incidents: [
115
+ {
116
+ title: incidentTitle,
117
+ description: incidentDescription,
118
+ incidentSeverityId: args.incidentSeverityId,
119
+ autoResolveIncident: true,
120
+ id: ObjectID.generate().toString(),
121
+ onCallPolicyIds: [],
122
+ },
123
+ ],
124
+ alerts: [
125
+ {
126
+ title: incidentTitle,
127
+ description: incidentDescription,
128
+ alertSeverityId: args.alertSeverityId,
129
+ autoResolveAlert: true,
130
+ id: ObjectID.generate().toString(),
131
+ onCallPolicyIds: [],
132
+ },
133
+ ],
134
+ changeMonitorStatus: true,
135
+ createIncidents: true,
136
+ createAlerts: true,
137
+ name: args.criteriaName || `${args.monitorName} - Unhealthy`,
138
+ description:
139
+ args.criteriaDescription || `Criteria for detecting unhealthy state.`,
140
+ };
141
+
142
+ return instance;
143
+ }
144
+
145
+ export function buildDockerOnlineCriteriaInstance(args: {
146
+ onlineMonitorStatusId: ObjectID;
147
+ metricAlias: string;
148
+ filterType: FilterType;
149
+ value: number;
150
+ }): MonitorCriteriaInstance {
151
+ const instance: MonitorCriteriaInstance = new MonitorCriteriaInstance();
152
+
153
+ instance.data = {
154
+ id: ObjectID.generate().toString(),
155
+ monitorStatusId: args.onlineMonitorStatusId,
156
+ filterCondition: FilterCondition.Any,
157
+ filters: [
158
+ {
159
+ checkOn: CheckOn.MetricValue,
160
+ filterType: args.filterType,
161
+ metricMonitorOptions: {
162
+ metricAggregationType: EvaluateOverTimeType.AnyValue,
163
+ metricAlias: args.metricAlias,
164
+ },
165
+ value: args.value,
166
+ },
167
+ ],
168
+ incidents: [],
169
+ alerts: [],
170
+ changeMonitorStatus: true,
171
+ createIncidents: false,
172
+ createAlerts: false,
173
+ name: "Healthy",
174
+ description: "Criteria for healthy state.",
175
+ };
176
+
177
+ return instance;
178
+ }
179
+
180
+ export function buildDockerMonitorConfig(args: {
181
+ hostIdentifier: string;
182
+ metricName: string;
183
+ metricAlias: string;
184
+ rollingTime: RollingTime;
185
+ aggregationType: MetricsAggregationType;
186
+ attributes?: Record<string, string>;
187
+ }): MonitorStepDockerMonitor {
188
+ return {
189
+ hostIdentifier: args.hostIdentifier,
190
+ containerFilters: {},
191
+ metricViewConfig: {
192
+ queryConfigs: [
193
+ {
194
+ metricAliasData: {
195
+ metricVariable: args.metricAlias,
196
+ title: args.metricAlias,
197
+ description: args.metricAlias,
198
+ legend: args.metricAlias,
199
+ legendUnit: undefined,
200
+ },
201
+ metricQueryData: {
202
+ filterData: {
203
+ metricName: args.metricName,
204
+ attributes: args.attributes || {},
205
+ aggegationType: args.aggregationType,
206
+ aggregateBy: {},
207
+ },
208
+ },
209
+ },
210
+ ],
211
+ formulaConfigs: [],
212
+ },
213
+ rollingTime: args.rollingTime,
214
+ };
215
+ }
216
+
217
+ // --- Template Definitions ---
218
+
219
+ const highCpuTemplate: DockerAlertTemplate = {
220
+ id: "docker-high-cpu",
221
+ name: "High Container CPU Usage",
222
+ description: "Alert when container CPU usage exceeds 80% sustained.",
223
+ category: "Resource",
224
+ severity: "Warning",
225
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
226
+ const metricAlias: string = "container_cpu";
227
+
228
+ return buildDockerMonitorStep({
229
+ dockerMonitor: buildDockerMonitorConfig({
230
+ hostIdentifier: args.hostIdentifier,
231
+ metricName: "container.cpu.utilization",
232
+ metricAlias,
233
+ rollingTime: RollingTime.Past5Minutes,
234
+ /*
235
+ * Use Max so a single hot container trips the threshold instead of
236
+ * being diluted by idle containers on the host.
237
+ */
238
+ aggregationType: MetricsAggregationType.Max,
239
+ }),
240
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
241
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
242
+ incidentSeverityId: args.defaultIncidentSeverityId,
243
+ alertSeverityId: args.defaultAlertSeverityId,
244
+ monitorName: args.monitorName,
245
+ metricAlias,
246
+ filterType: FilterType.GreaterThan,
247
+ value: 80,
248
+ incidentTitle: `[Docker] High CPU Usage (>80%) - ${args.monitorName}`,
249
+ incidentDescription: `A Docker container's CPU usage has exceeded 80%. Sustained high CPU usage can cause performance degradation and throttling. Check the root cause for the specific container and host details.`,
250
+ criteriaName: "High CPU - Usage > 80%",
251
+ criteriaDescription:
252
+ "Triggers when any container's CPU usage exceeds 80% over the monitoring window.",
253
+ }),
254
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
255
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
256
+ metricAlias,
257
+ filterType: FilterType.LessThanOrEqualTo,
258
+ value: 80,
259
+ }),
260
+ });
261
+ },
262
+ };
263
+
264
+ const highMemoryTemplate: DockerAlertTemplate = {
265
+ id: "docker-high-memory",
266
+ name: "High Container Memory Usage",
267
+ description: "Alert when container memory usage exceeds 85% of its limit.",
268
+ category: "Resource",
269
+ severity: "Warning",
270
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
271
+ const metricAlias: string = "container_memory";
272
+
273
+ return buildDockerMonitorStep({
274
+ dockerMonitor: buildDockerMonitorConfig({
275
+ hostIdentifier: args.hostIdentifier,
276
+ metricName: "container.memory.percent",
277
+ metricAlias,
278
+ rollingTime: RollingTime.Past5Minutes,
279
+ /*
280
+ * Use Max so a single container breaching its limit trips the
281
+ * threshold instead of being diluted by idle containers.
282
+ */
283
+ aggregationType: MetricsAggregationType.Max,
284
+ }),
285
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
286
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
287
+ incidentSeverityId: args.defaultIncidentSeverityId,
288
+ alertSeverityId: args.defaultAlertSeverityId,
289
+ monitorName: args.monitorName,
290
+ metricAlias,
291
+ filterType: FilterType.GreaterThan,
292
+ value: 85,
293
+ incidentTitle: `[Docker] High Memory Usage (>85%) - ${args.monitorName}`,
294
+ incidentDescription: `A Docker container's memory usage has exceeded 85% of its limit. High memory usage can lead to OOM kills and container restarts. Check the root cause for the specific container and host details.`,
295
+ criteriaName: "High Memory - Usage > 85%",
296
+ criteriaDescription:
297
+ "Triggers when any container's memory usage exceeds 85% over the monitoring window.",
298
+ }),
299
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
300
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
301
+ metricAlias,
302
+ filterType: FilterType.LessThanOrEqualTo,
303
+ value: 85,
304
+ }),
305
+ });
306
+ },
307
+ };
308
+
309
+ const containerRestartLoopTemplate: DockerAlertTemplate = {
310
+ id: "docker-restart-loop",
311
+ name: "Container Restart Loop",
312
+ description:
313
+ "Alert when a container has restarted more than 5 times, indicating a crash loop.",
314
+ category: "Container",
315
+ severity: "Critical",
316
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
317
+ const metricAlias: string = "container_restarts";
318
+
319
+ return buildDockerMonitorStep({
320
+ dockerMonitor: buildDockerMonitorConfig({
321
+ hostIdentifier: args.hostIdentifier,
322
+ metricName: "container.restarts",
323
+ metricAlias,
324
+ rollingTime: RollingTime.Past5Minutes,
325
+ aggregationType: MetricsAggregationType.Max,
326
+ }),
327
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
328
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
329
+ incidentSeverityId: args.defaultIncidentSeverityId,
330
+ alertSeverityId: args.defaultAlertSeverityId,
331
+ monitorName: args.monitorName,
332
+ metricAlias,
333
+ filterType: FilterType.GreaterThan,
334
+ value: 5,
335
+ incidentTitle: `[Docker] Container Restart Loop Detected - ${args.monitorName}`,
336
+ incidentDescription: `A Docker container is repeatedly crashing and restarting. The container restart count has exceeded 5. This indicates a crash loop that needs immediate attention. Check the root cause for the specific container, exit code, and logs.`,
337
+ criteriaName: "Restart Loop - Restarts > 5",
338
+ criteriaDescription:
339
+ "Triggers when any container restart count exceeds 5 in the monitoring window.",
340
+ }),
341
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
342
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
343
+ metricAlias,
344
+ filterType: FilterType.LessThanOrEqualTo,
345
+ value: 5,
346
+ }),
347
+ });
348
+ },
349
+ };
350
+
351
+ const highCpuThrottlingTemplate: DockerAlertTemplate = {
352
+ id: "docker-cpu-throttling",
353
+ name: "Container CPU Throttling",
354
+ description:
355
+ "Alert when a container is being CPU-throttled, indicating it needs more CPU resources.",
356
+ category: "Resource",
357
+ severity: "Warning",
358
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
359
+ const metricAlias: string = "cpu_throttled";
360
+
361
+ return buildDockerMonitorStep({
362
+ dockerMonitor: buildDockerMonitorConfig({
363
+ hostIdentifier: args.hostIdentifier,
364
+ metricName: "container.cpu.throttling_data.throttled_time",
365
+ metricAlias,
366
+ rollingTime: RollingTime.Past5Minutes,
367
+ /*
368
+ * Use Max so a single throttled container trips the threshold,
369
+ * rather than summing throttled time across all containers.
370
+ */
371
+ aggregationType: MetricsAggregationType.Max,
372
+ }),
373
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
374
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
375
+ incidentSeverityId: args.defaultIncidentSeverityId,
376
+ alertSeverityId: args.defaultAlertSeverityId,
377
+ monitorName: args.monitorName,
378
+ metricAlias,
379
+ filterType: FilterType.GreaterThan,
380
+ value: 0,
381
+ incidentTitle: `[Docker] CPU Throttling Detected - ${args.monitorName}`,
382
+ incidentDescription: `A Docker container is being CPU-throttled. This means the container is hitting its CPU limit and performance is degraded. Consider increasing the CPU limit or optimizing the application.`,
383
+ criteriaName: "CPU Throttling - Throttled Time > 0",
384
+ criteriaDescription:
385
+ "Triggers when any container reports CPU throttling.",
386
+ }),
387
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
388
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
389
+ metricAlias,
390
+ filterType: FilterType.EqualTo,
391
+ value: 0,
392
+ }),
393
+ });
394
+ },
395
+ };
396
+
397
+ const highProcessCountTemplate: DockerAlertTemplate = {
398
+ id: "docker-high-pids",
399
+ name: "High Container Process Count",
400
+ description:
401
+ "Alert when a container has an unusually high number of processes, which may indicate a fork bomb or resource leak.",
402
+ category: "Container",
403
+ severity: "Warning",
404
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
405
+ const metricAlias: string = "pids_count";
406
+
407
+ return buildDockerMonitorStep({
408
+ dockerMonitor: buildDockerMonitorConfig({
409
+ hostIdentifier: args.hostIdentifier,
410
+ metricName: "container.pids.count",
411
+ metricAlias,
412
+ rollingTime: RollingTime.Past5Minutes,
413
+ aggregationType: MetricsAggregationType.Max,
414
+ }),
415
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
416
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
417
+ incidentSeverityId: args.defaultIncidentSeverityId,
418
+ alertSeverityId: args.defaultAlertSeverityId,
419
+ monitorName: args.monitorName,
420
+ metricAlias,
421
+ filterType: FilterType.GreaterThan,
422
+ value: 500,
423
+ incidentTitle: `[Docker] High Process Count (>500) - ${args.monitorName}`,
424
+ incidentDescription: `A Docker container has an unusually high number of processes (>500). This may indicate a fork bomb, resource leak, or misconfigured application. Check the container for runaway processes.`,
425
+ criteriaName: "High PIDs - Count > 500",
426
+ criteriaDescription:
427
+ "Triggers when container process count exceeds 500.",
428
+ }),
429
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
430
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
431
+ metricAlias,
432
+ filterType: FilterType.LessThanOrEqualTo,
433
+ value: 500,
434
+ }),
435
+ });
436
+ },
437
+ };
438
+
439
+ const containerUptimeTemplate: DockerAlertTemplate = {
440
+ id: "docker-container-down",
441
+ name: "Container Down (Low Uptime)",
442
+ description:
443
+ "Alert when a container's uptime drops to zero, indicating it has stopped or crashed.",
444
+ category: "Container",
445
+ severity: "Critical",
446
+ getMonitorStep: (args: DockerAlertTemplateArgs): MonitorStep => {
447
+ const metricAlias: string = "container_uptime";
448
+
449
+ return buildDockerMonitorStep({
450
+ dockerMonitor: buildDockerMonitorConfig({
451
+ hostIdentifier: args.hostIdentifier,
452
+ metricName: "container.uptime",
453
+ metricAlias,
454
+ rollingTime: RollingTime.Past1Minute,
455
+ aggregationType: MetricsAggregationType.Min,
456
+ }),
457
+ offlineCriteriaInstance: buildDockerOfflineCriteriaInstance({
458
+ offlineMonitorStatusId: args.offlineMonitorStatusId,
459
+ incidentSeverityId: args.defaultIncidentSeverityId,
460
+ alertSeverityId: args.defaultAlertSeverityId,
461
+ monitorName: args.monitorName,
462
+ metricAlias,
463
+ filterType: FilterType.EqualTo,
464
+ value: 0,
465
+ incidentTitle: `[Docker] Container Down - ${args.monitorName}`,
466
+ incidentDescription: `A Docker container has stopped running. The container uptime is zero, indicating it has crashed, been stopped, or been removed. Check the container status and logs for details.`,
467
+ criteriaName: "Container Down - Uptime = 0",
468
+ criteriaDescription: "Triggers when container uptime drops to zero.",
469
+ }),
470
+ onlineCriteriaInstance: buildDockerOnlineCriteriaInstance({
471
+ onlineMonitorStatusId: args.onlineMonitorStatusId,
472
+ metricAlias,
473
+ filterType: FilterType.GreaterThan,
474
+ value: 0,
475
+ }),
476
+ });
477
+ },
478
+ };
479
+
480
+ export function getAllDockerAlertTemplates(): Array<DockerAlertTemplate> {
481
+ return [
482
+ highCpuTemplate,
483
+ highMemoryTemplate,
484
+ containerRestartLoopTemplate,
485
+ highCpuThrottlingTemplate,
486
+ highProcessCountTemplate,
487
+ containerUptimeTemplate,
488
+ ];
489
+ }
490
+
491
+ export function getDockerAlertTemplatesByCategory(
492
+ category: DockerAlertTemplateCategory,
493
+ ): Array<DockerAlertTemplate> {
494
+ return getAllDockerAlertTemplates().filter(
495
+ (template: DockerAlertTemplate) => {
496
+ return template.category === category;
497
+ },
498
+ );
499
+ }
500
+
501
+ export function getDockerAlertTemplateById(
502
+ id: string,
503
+ ): DockerAlertTemplate | undefined {
504
+ return getAllDockerAlertTemplates().find((template: DockerAlertTemplate) => {
505
+ return template.id === id;
506
+ });
507
+ }