@oneuptime/common 10.7.1 → 10.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Models/DatabaseModels/CloudResource.ts +846 -0
- package/Models/DatabaseModels/CloudResourceInstance.ts +276 -0
- package/Models/DatabaseModels/CloudResourceLabelRule.ts +510 -0
- package/Models/DatabaseModels/CloudResourceOwnerRule.ts +592 -0
- package/Models/DatabaseModels/CloudResourceOwnerTeam.ts +487 -0
- package/Models/DatabaseModels/CloudResourceOwnerUser.ts +486 -0
- package/Models/DatabaseModels/Host.ts +209 -0
- package/Models/DatabaseModels/Index.ts +36 -0
- package/Models/DatabaseModels/RumApplication.ts +731 -0
- package/Models/DatabaseModels/RumApplicationClient.ts +229 -0
- package/Models/DatabaseModels/RumApplicationLabelRule.ts +510 -0
- package/Models/DatabaseModels/RumApplicationOwnerRule.ts +592 -0
- package/Models/DatabaseModels/RumApplicationOwnerTeam.ts +486 -0
- package/Models/DatabaseModels/RumApplicationOwnerUser.ts +485 -0
- package/Models/DatabaseModels/ServerlessFunction.ts +881 -0
- package/Models/DatabaseModels/ServerlessFunctionInstance.ts +212 -0
- package/Models/DatabaseModels/ServerlessFunctionLabelRule.ts +510 -0
- package/Models/DatabaseModels/ServerlessFunctionOwnerRule.ts +592 -0
- package/Models/DatabaseModels/ServerlessFunctionOwnerTeam.ts +487 -0
- package/Models/DatabaseModels/ServerlessFunctionOwnerUser.ts +486 -0
- package/Models/DatabaseModels/Service.ts +268 -0
- package/Models/DatabaseModels/TelemetryException.ts +15 -1
- package/Models/DatabaseModels/WorkflowLog.ts +52 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780931746908-AddResumeStateToWorkflowLog.ts +21 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780931863719-AddTelemetryResourceMetadataColumns.ts +108 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780933132562-AddServerlessFunctionTables.ts +205 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780935387827-AddCloudResourceTables.ts +195 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780936579718-AddRumApplicationTables.ts +202 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780938407319-AddServerlessFunctionRuleTables.ts +156 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780940721814-AddCloudResourceRuleTables.ts +149 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780940998002-AddRumApplicationRuleTables.ts +149 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780941762204-AddTelemetryResourceInventoryTables.ts +95 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780985763463-AddRumApplicationSdkLanguage.ts +25 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780987192743-RecastCloudResourcesByEnvironment.ts +30 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +22 -0
- package/Server/Infrastructure/Queue.ts +11 -0
- package/Server/Services/CloudResourceInstanceService.ts +76 -0
- package/Server/Services/CloudResourceLabelRuleEngineService.ts +175 -0
- package/Server/Services/CloudResourceLabelRuleService.ts +14 -0
- package/Server/Services/CloudResourceOwnerRuleEngineService.ts +192 -0
- package/Server/Services/CloudResourceOwnerRuleService.ts +14 -0
- package/Server/Services/CloudResourceOwnerTeamService.ts +10 -0
- package/Server/Services/CloudResourceOwnerUserService.ts +10 -0
- package/Server/Services/CloudResourceService.ts +342 -0
- package/Server/Services/ExceptionAggregationService.ts +3 -0
- package/Server/Services/HostService.ts +42 -0
- package/Server/Services/LogAggregationService.ts +3 -0
- package/Server/Services/MetricAggregationService.ts +3 -0
- package/Server/Services/OpenTelemetryIngestService.ts +148 -1
- package/Server/Services/RumApplicationClientService.ts +69 -0
- package/Server/Services/RumApplicationLabelRuleEngineService.ts +175 -0
- package/Server/Services/RumApplicationLabelRuleService.ts +14 -0
- package/Server/Services/RumApplicationOwnerRuleEngineService.ts +192 -0
- package/Server/Services/RumApplicationOwnerRuleService.ts +14 -0
- package/Server/Services/RumApplicationOwnerTeamService.ts +10 -0
- package/Server/Services/RumApplicationOwnerUserService.ts +10 -0
- package/Server/Services/RumApplicationService.ts +301 -0
- package/Server/Services/ServerlessFunctionInstanceService.ts +61 -0
- package/Server/Services/ServerlessFunctionLabelRuleEngineService.ts +182 -0
- package/Server/Services/ServerlessFunctionLabelRuleService.ts +14 -0
- package/Server/Services/ServerlessFunctionOwnerRuleEngineService.ts +199 -0
- package/Server/Services/ServerlessFunctionOwnerRuleService.ts +14 -0
- package/Server/Services/ServerlessFunctionOwnerTeamService.ts +10 -0
- package/Server/Services/ServerlessFunctionOwnerUserService.ts +10 -0
- package/Server/Services/ServerlessFunctionService.ts +351 -0
- package/Server/Services/ServiceService.ts +95 -8
- package/Server/Services/TraceAggregationService.ts +3 -0
- package/Server/Types/Database/Permissions/OwnerTableRegistry.ts +39 -0
- package/Server/Types/Workflow/ComponentCode.ts +9 -0
- package/Server/Types/Workflow/Components/Index.ts +2 -0
- package/Server/Types/Workflow/Components/Sleep.ts +105 -0
- package/Server/Types/Workflow/Workflow.ts +6 -0
- package/Server/Utils/Telemetry/ResourceFacetResolver.ts +150 -0
- package/Tests/Types/Monitor/KubernetesAlertTemplates.test.ts +193 -0
- package/Tests/UI/Components/DuplicateModel.test.tsx +2 -2
- package/Types/Monitor/KubernetesAlertTemplates.ts +239 -14
- package/Types/Permission.ts +692 -1
- package/Types/Telemetry/ServiceType.ts +3 -0
- package/Types/Workflow/ComponentID.ts +1 -0
- package/Types/Workflow/Components/Sleep.ts +71 -0
- package/Types/Workflow/Components.ts +2 -0
- package/Types/Workflow/WorkflowStatus.ts +1 -0
- package/UI/Components/BulkUpdate/BulkLabelActions.tsx +159 -32
- package/UI/Components/Navbar/NavBar.tsx +72 -123
- package/UI/Components/Navbar/NavBarMenuModal.tsx +642 -0
- package/UI/Components/Workflow/WorkflowStatus.tsx +3 -0
- package/build/dist/Models/DatabaseModels/CloudResource.js +871 -0
- package/build/dist/Models/DatabaseModels/CloudResource.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CloudResourceInstance.js +300 -0
- package/build/dist/Models/DatabaseModels/CloudResourceInstance.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CloudResourceLabelRule.js +520 -0
- package/build/dist/Models/DatabaseModels/CloudResourceLabelRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerRule.js +601 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerTeam.js +503 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerTeam.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerUser.js +502 -0
- package/build/dist/Models/DatabaseModels/CloudResourceOwnerUser.js.map +1 -0
- package/build/dist/Models/DatabaseModels/Host.js +215 -0
- package/build/dist/Models/DatabaseModels/Host.js.map +1 -1
- package/build/dist/Models/DatabaseModels/Index.js +36 -0
- package/build/dist/Models/DatabaseModels/Index.js.map +1 -1
- package/build/dist/Models/DatabaseModels/RumApplication.js +751 -0
- package/build/dist/Models/DatabaseModels/RumApplication.js.map +1 -0
- package/build/dist/Models/DatabaseModels/RumApplicationClient.js +252 -0
- package/build/dist/Models/DatabaseModels/RumApplicationClient.js.map +1 -0
- package/build/dist/Models/DatabaseModels/RumApplicationLabelRule.js +520 -0
- package/build/dist/Models/DatabaseModels/RumApplicationLabelRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerRule.js +601 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerTeam.js +503 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerTeam.js.map +1 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerUser.js +502 -0
- package/build/dist/Models/DatabaseModels/RumApplicationOwnerUser.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunction.js +908 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunction.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionInstance.js +234 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionInstance.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionLabelRule.js +520 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionLabelRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerRule.js +601 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerTeam.js +503 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerTeam.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerUser.js +502 -0
- package/build/dist/Models/DatabaseModels/ServerlessFunctionOwnerUser.js.map +1 -0
- package/build/dist/Models/DatabaseModels/Service.js +276 -0
- package/build/dist/Models/DatabaseModels/Service.js.map +1 -1
- package/build/dist/Models/DatabaseModels/TelemetryException.js +12 -1
- package/build/dist/Models/DatabaseModels/TelemetryException.js.map +1 -1
- package/build/dist/Models/DatabaseModels/WorkflowLog.js +53 -0
- package/build/dist/Models/DatabaseModels/WorkflowLog.js.map +1 -1
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780931746908-AddResumeStateToWorkflowLog.js +14 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780931746908-AddResumeStateToWorkflowLog.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780931863719-AddTelemetryResourceMetadataColumns.js +53 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780931863719-AddTelemetryResourceMetadataColumns.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780933132562-AddServerlessFunctionTables.js +82 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780933132562-AddServerlessFunctionTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780935387827-AddCloudResourceTables.js +82 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780935387827-AddCloudResourceTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780936579718-AddRumApplicationTables.js +83 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780936579718-AddRumApplicationTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780938407319-AddServerlessFunctionRuleTables.js +67 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780938407319-AddServerlessFunctionRuleTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780940721814-AddCloudResourceRuleTables.js +60 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780940721814-AddCloudResourceRuleTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780940998002-AddRumApplicationRuleTables.js +60 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780940998002-AddRumApplicationRuleTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780941762204-AddTelemetryResourceInventoryTables.js +45 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780941762204-AddTelemetryResourceInventoryTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780985763463-AddRumApplicationSdkLanguage.js +18 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780985763463-AddRumApplicationSdkLanguage.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780987192743-RecastCloudResourcesByEnvironment.js +27 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780987192743-RecastCloudResourcesByEnvironment.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +22 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
- package/build/dist/Server/Infrastructure/Queue.js +3 -0
- package/build/dist/Server/Infrastructure/Queue.js.map +1 -1
- package/build/dist/Server/Services/CloudResourceInstanceService.js +76 -0
- package/build/dist/Server/Services/CloudResourceInstanceService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceLabelRuleEngineService.js +160 -0
- package/build/dist/Server/Services/CloudResourceLabelRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceLabelRuleService.js +13 -0
- package/build/dist/Server/Services/CloudResourceLabelRuleService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceOwnerRuleEngineService.js +179 -0
- package/build/dist/Server/Services/CloudResourceOwnerRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceOwnerRuleService.js +13 -0
- package/build/dist/Server/Services/CloudResourceOwnerRuleService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceOwnerTeamService.js +9 -0
- package/build/dist/Server/Services/CloudResourceOwnerTeamService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceOwnerUserService.js +9 -0
- package/build/dist/Server/Services/CloudResourceOwnerUserService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceService.js +287 -0
- package/build/dist/Server/Services/CloudResourceService.js.map +1 -0
- package/build/dist/Server/Services/ExceptionAggregationService.js +3 -0
- package/build/dist/Server/Services/ExceptionAggregationService.js.map +1 -1
- package/build/dist/Server/Services/HostService.js +29 -1
- package/build/dist/Server/Services/HostService.js.map +1 -1
- package/build/dist/Server/Services/LogAggregationService.js +3 -0
- package/build/dist/Server/Services/LogAggregationService.js.map +1 -1
- package/build/dist/Server/Services/MetricAggregationService.js +3 -0
- package/build/dist/Server/Services/MetricAggregationService.js.map +1 -1
- package/build/dist/Server/Services/OpenTelemetryIngestService.js +98 -2
- package/build/dist/Server/Services/OpenTelemetryIngestService.js.map +1 -1
- package/build/dist/Server/Services/RumApplicationClientService.js +70 -0
- package/build/dist/Server/Services/RumApplicationClientService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationLabelRuleEngineService.js +160 -0
- package/build/dist/Server/Services/RumApplicationLabelRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationLabelRuleService.js +13 -0
- package/build/dist/Server/Services/RumApplicationLabelRuleService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationOwnerRuleEngineService.js +179 -0
- package/build/dist/Server/Services/RumApplicationOwnerRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationOwnerRuleService.js +13 -0
- package/build/dist/Server/Services/RumApplicationOwnerRuleService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationOwnerTeamService.js +9 -0
- package/build/dist/Server/Services/RumApplicationOwnerTeamService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationOwnerUserService.js +9 -0
- package/build/dist/Server/Services/RumApplicationOwnerUserService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationService.js +259 -0
- package/build/dist/Server/Services/RumApplicationService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionInstanceService.js +64 -0
- package/build/dist/Server/Services/ServerlessFunctionInstanceService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionLabelRuleEngineService.js +160 -0
- package/build/dist/Server/Services/ServerlessFunctionLabelRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionLabelRuleService.js +13 -0
- package/build/dist/Server/Services/ServerlessFunctionLabelRuleService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerRuleEngineService.js +179 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerRuleService.js +13 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerRuleService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerTeamService.js +9 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerTeamService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerUserService.js +9 -0
- package/build/dist/Server/Services/ServerlessFunctionOwnerUserService.js.map +1 -0
- package/build/dist/Server/Services/ServerlessFunctionService.js +299 -0
- package/build/dist/Server/Services/ServerlessFunctionService.js.map +1 -0
- package/build/dist/Server/Services/ServiceService.js +63 -7
- package/build/dist/Server/Services/ServiceService.js.map +1 -1
- package/build/dist/Server/Services/TraceAggregationService.js +3 -0
- package/build/dist/Server/Services/TraceAggregationService.js.map +1 -1
- package/build/dist/Server/Types/Database/Permissions/OwnerTableRegistry.js +39 -0
- package/build/dist/Server/Types/Database/Permissions/OwnerTableRegistry.js.map +1 -1
- package/build/dist/Server/Types/Workflow/ComponentCode.js.map +1 -1
- package/build/dist/Server/Types/Workflow/Components/Index.js +2 -0
- package/build/dist/Server/Types/Workflow/Components/Index.js.map +1 -1
- package/build/dist/Server/Types/Workflow/Components/Sleep.js +85 -0
- package/build/dist/Server/Types/Workflow/Components/Sleep.js.map +1 -0
- package/build/dist/Server/Utils/Telemetry/ResourceFacetResolver.js +90 -0
- package/build/dist/Server/Utils/Telemetry/ResourceFacetResolver.js.map +1 -1
- package/build/dist/Tests/Types/Monitor/KubernetesAlertTemplates.test.js +121 -0
- package/build/dist/Tests/Types/Monitor/KubernetesAlertTemplates.test.js.map +1 -0
- package/build/dist/Tests/UI/Components/DuplicateModel.test.js +2 -2
- package/build/dist/Tests/UI/Components/DuplicateModel.test.js.map +1 -1
- package/build/dist/Types/Monitor/KubernetesAlertTemplates.js +210 -14
- package/build/dist/Types/Monitor/KubernetesAlertTemplates.js.map +1 -1
- package/build/dist/Types/Permission.js +609 -1
- package/build/dist/Types/Permission.js.map +1 -1
- package/build/dist/Types/Telemetry/ServiceType.js +3 -0
- package/build/dist/Types/Telemetry/ServiceType.js.map +1 -1
- package/build/dist/Types/Workflow/ComponentID.js +1 -0
- package/build/dist/Types/Workflow/ComponentID.js.map +1 -1
- package/build/dist/Types/Workflow/Components/Sleep.js +64 -0
- package/build/dist/Types/Workflow/Components/Sleep.js.map +1 -0
- package/build/dist/Types/Workflow/Components.js +2 -0
- package/build/dist/Types/Workflow/Components.js.map +1 -1
- package/build/dist/Types/Workflow/WorkflowStatus.js +1 -0
- package/build/dist/Types/Workflow/WorkflowStatus.js.map +1 -1
- package/build/dist/UI/Components/BulkUpdate/BulkLabelActions.js +113 -19
- package/build/dist/UI/Components/BulkUpdate/BulkLabelActions.js.map +1 -1
- package/build/dist/UI/Components/Navbar/NavBar.js +34 -66
- package/build/dist/UI/Components/Navbar/NavBar.js.map +1 -1
- package/build/dist/UI/Components/Navbar/NavBarMenuModal.js +412 -0
- package/build/dist/UI/Components/Navbar/NavBarMenuModal.js.map +1 -0
- package/build/dist/UI/Components/Workflow/WorkflowStatus.js +3 -0
- package/build/dist/UI/Components/Workflow/WorkflowStatus.js.map +1 -1
- package/package.json +1 -1
- package/UI/Components/Navbar/NavBarMenu.tsx +0 -183
- package/UI/Components/Navbar/NavBarMenuItem.tsx +0 -146
- package/build/dist/UI/Components/Navbar/NavBarMenu.js +0 -82
- package/build/dist/UI/Components/Navbar/NavBarMenu.js.map +0 -1
- package/build/dist/UI/Components/Navbar/NavBarMenuItem.js +0 -109
- package/build/dist/UI/Components/Navbar/NavBarMenuItem.js.map +0 -1
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import {
|
|
2
|
+
KubernetesAlertTemplate,
|
|
3
|
+
KubernetesAlertTemplateArgs,
|
|
4
|
+
getAllKubernetesAlertTemplates,
|
|
5
|
+
getKubernetesAlertTemplateById,
|
|
6
|
+
} from "../../../Types/Monitor/KubernetesAlertTemplates";
|
|
7
|
+
import MonitorStep from "../../../Types/Monitor/MonitorStep";
|
|
8
|
+
import MonitorStepKubernetesMonitor from "../../../Types/Monitor/MonitorStepKubernetesMonitor";
|
|
9
|
+
import MetricsAggregationType from "../../../Types/Metrics/MetricsAggregationType";
|
|
10
|
+
import ObjectID from "../../../Types/ObjectID";
|
|
11
|
+
|
|
12
|
+
/*
|
|
13
|
+
* These tests lock in the subtle, easy-to-regress decisions in the per-node
|
|
14
|
+
* ratio alert templates (request utilization + usage utilization):
|
|
15
|
+
*
|
|
16
|
+
* 1. Group-by uses the ClickHouse-stored `resource.`-prefixed attribute
|
|
17
|
+
* name (`resource.k8s.node.name`), not the bare `k8s.node.name`.
|
|
18
|
+
* OneUptime stamps OTel resource attributes with a `resource.` prefix
|
|
19
|
+
* at ingest, so the bare key would match nothing and collapse every
|
|
20
|
+
* node into one mislabeled series.
|
|
21
|
+
*
|
|
22
|
+
* 2. The aggregation differs by numerator shape:
|
|
23
|
+
* - Request utilization sums MANY container series per node, and both
|
|
24
|
+
* metrics come from the same `k8s_cluster` scrape, so `Sum` on both
|
|
25
|
+
* sides totals the containers and the scrape multiple cancels.
|
|
26
|
+
* - Usage utilization has ONE series per node, and numerator
|
|
27
|
+
* (kubeletstats) and denominator (k8s_cluster) come from different
|
|
28
|
+
* receivers, so `Avg` on both sides gives the correct per-minute
|
|
29
|
+
* ratio regardless of each receiver's scrape count.
|
|
30
|
+
*
|
|
31
|
+
* 3. The criteria reference the FORMULA alias (the computed percentage),
|
|
32
|
+
* not a raw query alias.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
interface RatioTemplateCase {
|
|
36
|
+
id: string;
|
|
37
|
+
numerator: string;
|
|
38
|
+
denominator: string;
|
|
39
|
+
numAlias: string;
|
|
40
|
+
denAlias: string;
|
|
41
|
+
resultAlias: string;
|
|
42
|
+
aggregation: MetricsAggregationType;
|
|
43
|
+
threshold: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const RATIO_TEMPLATES: Array<RatioTemplateCase> = [
|
|
47
|
+
// Request utilization — Sum/Sum (numerator totals many containers per node).
|
|
48
|
+
{
|
|
49
|
+
id: "k8s-node-cpu-request-utilization",
|
|
50
|
+
numerator: "k8s.container.cpu_request",
|
|
51
|
+
denominator: "k8s.node.allocatable_cpu",
|
|
52
|
+
numAlias: "req_cpu",
|
|
53
|
+
denAlias: "alloc_cpu",
|
|
54
|
+
resultAlias: "node_cpu_request_utilization",
|
|
55
|
+
aggregation: MetricsAggregationType.Sum,
|
|
56
|
+
threshold: 90,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: "k8s-node-memory-request-utilization",
|
|
60
|
+
numerator: "k8s.container.memory_request",
|
|
61
|
+
denominator: "k8s.node.allocatable_memory",
|
|
62
|
+
numAlias: "req_mem",
|
|
63
|
+
denAlias: "alloc_mem",
|
|
64
|
+
resultAlias: "node_memory_request_utilization",
|
|
65
|
+
aggregation: MetricsAggregationType.Sum,
|
|
66
|
+
threshold: 90,
|
|
67
|
+
},
|
|
68
|
+
// Usage utilization — Avg/Avg (one series per node, cross-receiver).
|
|
69
|
+
{
|
|
70
|
+
id: "k8s-high-cpu",
|
|
71
|
+
numerator: "k8s.node.cpu.usage",
|
|
72
|
+
denominator: "k8s.node.allocatable_cpu",
|
|
73
|
+
numAlias: "used_cpu",
|
|
74
|
+
denAlias: "alloc_cpu",
|
|
75
|
+
resultAlias: "node_cpu_utilization",
|
|
76
|
+
aggregation: MetricsAggregationType.Avg,
|
|
77
|
+
threshold: 90,
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
id: "k8s-high-memory",
|
|
81
|
+
numerator: "k8s.node.memory.usage",
|
|
82
|
+
denominator: "k8s.node.allocatable_memory",
|
|
83
|
+
numAlias: "used_mem",
|
|
84
|
+
denAlias: "alloc_mem",
|
|
85
|
+
resultAlias: "node_memory_utilization",
|
|
86
|
+
aggregation: MetricsAggregationType.Avg,
|
|
87
|
+
threshold: 85,
|
|
88
|
+
},
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
function buildArgs(): KubernetesAlertTemplateArgs {
|
|
92
|
+
return {
|
|
93
|
+
clusterIdentifier: "prod-cluster",
|
|
94
|
+
onlineMonitorStatusId: ObjectID.generate(),
|
|
95
|
+
offlineMonitorStatusId: ObjectID.generate(),
|
|
96
|
+
defaultIncidentSeverityId: ObjectID.generate(),
|
|
97
|
+
defaultAlertSeverityId: ObjectID.generate(),
|
|
98
|
+
monitorName: "Test Monitor",
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function getKubernetesMonitor(step: MonitorStep): MonitorStepKubernetesMonitor {
|
|
103
|
+
const kubernetesMonitor: MonitorStepKubernetesMonitor | undefined =
|
|
104
|
+
step.data?.kubernetesMonitor;
|
|
105
|
+
if (!kubernetesMonitor) {
|
|
106
|
+
throw new Error("kubernetesMonitor missing from monitor step");
|
|
107
|
+
}
|
|
108
|
+
return kubernetesMonitor;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
describe("KubernetesAlertTemplates - per-node ratio templates", () => {
|
|
112
|
+
test("all four ratio templates are registered", () => {
|
|
113
|
+
const ids: Array<string> = getAllKubernetesAlertTemplates().map(
|
|
114
|
+
(t: KubernetesAlertTemplate) => {
|
|
115
|
+
return t.id;
|
|
116
|
+
},
|
|
117
|
+
);
|
|
118
|
+
for (const tc of RATIO_TEMPLATES) {
|
|
119
|
+
expect(ids).toContain(tc.id);
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
test.each(RATIO_TEMPLATES)(
|
|
124
|
+
"$id is a per-node ($aggregation/$aggregation) ratio keyed on resource.k8s.node.name",
|
|
125
|
+
(tc: RatioTemplateCase) => {
|
|
126
|
+
const template: KubernetesAlertTemplate | undefined =
|
|
127
|
+
getKubernetesAlertTemplateById(tc.id);
|
|
128
|
+
expect(template).toBeDefined();
|
|
129
|
+
|
|
130
|
+
const step: MonitorStep = template!.getMonitorStep(buildArgs());
|
|
131
|
+
const monitor: MonitorStepKubernetesMonitor = getKubernetesMonitor(step);
|
|
132
|
+
|
|
133
|
+
const queryConfigs: Array<any> = monitor.metricViewConfig
|
|
134
|
+
.queryConfigs as Array<any>;
|
|
135
|
+
const formulaConfigs: Array<any> = monitor.metricViewConfig
|
|
136
|
+
.formulaConfigs as Array<any>;
|
|
137
|
+
|
|
138
|
+
// Two queries (numerator + denominator) and one formula.
|
|
139
|
+
expect(queryConfigs).toHaveLength(2);
|
|
140
|
+
expect(formulaConfigs).toHaveLength(1);
|
|
141
|
+
|
|
142
|
+
const [numerator, denominator] = queryConfigs;
|
|
143
|
+
|
|
144
|
+
// Metric names.
|
|
145
|
+
expect(numerator.metricQueryData.filterData.metricName).toBe(
|
|
146
|
+
tc.numerator,
|
|
147
|
+
);
|
|
148
|
+
expect(denominator.metricQueryData.filterData.metricName).toBe(
|
|
149
|
+
tc.denominator,
|
|
150
|
+
);
|
|
151
|
+
|
|
152
|
+
/*
|
|
153
|
+
* Decision (2): both sides use the same aggregation — Sum for request
|
|
154
|
+
* utilization (totals containers, cancels scrape factor) or Avg for
|
|
155
|
+
* usage utilization (one series per node, cross-receiver).
|
|
156
|
+
*/
|
|
157
|
+
expect(numerator.metricQueryData.filterData.aggegationType).toBe(
|
|
158
|
+
tc.aggregation,
|
|
159
|
+
);
|
|
160
|
+
expect(denominator.metricQueryData.filterData.aggegationType).toBe(
|
|
161
|
+
tc.aggregation,
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
/*
|
|
165
|
+
* Decision (1): group by the resource-prefixed node attribute on BOTH
|
|
166
|
+
* queries so the per-series fingerprints line up for the formula join.
|
|
167
|
+
*/
|
|
168
|
+
expect(numerator.metricQueryData.groupByAttributeKeys).toEqual([
|
|
169
|
+
"resource.k8s.node.name",
|
|
170
|
+
]);
|
|
171
|
+
expect(denominator.metricQueryData.groupByAttributeKeys).toEqual([
|
|
172
|
+
"resource.k8s.node.name",
|
|
173
|
+
]);
|
|
174
|
+
|
|
175
|
+
// Formula divides numerator by denominator and scales to a percentage.
|
|
176
|
+
expect(formulaConfigs[0].metricFormulaData.metricFormula).toBe(
|
|
177
|
+
`(${tc.numAlias} / ${tc.denAlias}) * 100`,
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
/*
|
|
181
|
+
* Decision (3): the criteria must reference the FORMULA alias (not a
|
|
182
|
+
* raw query), so the threshold is evaluated against the computed
|
|
183
|
+
* percentage.
|
|
184
|
+
*/
|
|
185
|
+
const offlineFilters: Array<any> = step.data?.monitorCriteria.data
|
|
186
|
+
?.monitorCriteriaInstanceArray?.[0]?.data?.filters as Array<any>;
|
|
187
|
+
expect(offlineFilters[0].metricMonitorOptions.metricAlias).toBe(
|
|
188
|
+
tc.resultAlias,
|
|
189
|
+
);
|
|
190
|
+
expect(offlineFilters[0].value).toBe(tc.threshold);
|
|
191
|
+
},
|
|
192
|
+
);
|
|
193
|
+
});
|
|
@@ -236,7 +236,7 @@ describe("DuplicateModel", () => {
|
|
|
236
236
|
);
|
|
237
237
|
expect(
|
|
238
238
|
within(errorDialog).getByTestId("confirm-modal-description")?.textContent,
|
|
239
|
-
).toBe("
|
|
239
|
+
).toBe("Could not create Foo");
|
|
240
240
|
expect(
|
|
241
241
|
within(errorDialog).getByTestId("modal-footer-submit-button")
|
|
242
242
|
?.textContent,
|
|
@@ -274,7 +274,7 @@ describe("DuplicateModel", () => {
|
|
|
274
274
|
);
|
|
275
275
|
expect(
|
|
276
276
|
within(errorDialog).getByTestId("confirm-modal-description")?.textContent,
|
|
277
|
-
).toBe("
|
|
277
|
+
).toBe("Could not find Foo with id foo");
|
|
278
278
|
expect(
|
|
279
279
|
within(errorDialog).getByTestId("modal-footer-submit-button")
|
|
280
280
|
?.textContent,
|
|
@@ -221,6 +221,111 @@ export function buildKubernetesMonitorConfig(args: {
|
|
|
221
221
|
};
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
+
/**
|
|
225
|
+
* Build a per-series ratio monitor: `(numerator / denominator) * 100`,
|
|
226
|
+
* grouped by a single OpenTelemetry attribute so one incident fires per
|
|
227
|
+
* group (e.g. per node).
|
|
228
|
+
*
|
|
229
|
+
* Used for saturation metrics that aren't emitted as a single ready-made
|
|
230
|
+
* series — e.g. node request utilization (summed pod requests ÷ node
|
|
231
|
+
* allocatable) and node usage utilization (node usage ÷ node allocatable),
|
|
232
|
+
* neither of which the kubeletstats receiver exposes as a percentage.
|
|
233
|
+
*
|
|
234
|
+
* Aggregation (`aggregationType`, default `Sum`) — the per-series worker
|
|
235
|
+
* buckets raw rows by (group, minute) and applies this aggregation to
|
|
236
|
+
* EVERY row in the bucket, i.e. across both the grouped entities AND the
|
|
237
|
+
* scrapes in that minute. Pick it based on the numerator:
|
|
238
|
+
*
|
|
239
|
+
* - `Sum` when the numerator must be totalled across multiple series per
|
|
240
|
+
* group (e.g. summing every container's request on a node). The scrape
|
|
241
|
+
* multiple then has to cancel, so numerator and denominator must ride
|
|
242
|
+
* the SAME receiver/scrape — true for the request-utilization
|
|
243
|
+
* templates, where both metrics come from `k8s_cluster`:
|
|
244
|
+
* `(Σrequests × scrapes) / (allocatable × scrapes)`.
|
|
245
|
+
*
|
|
246
|
+
* - `Avg` when the numerator is already ONE series per group (e.g.
|
|
247
|
+
* `k8s.node.cpu.usage`). Avg yields the representative per-minute value
|
|
248
|
+
* independent of scrape count, so it stays correct even when numerator
|
|
249
|
+
* and denominator come from DIFFERENT receivers on independent scrape
|
|
250
|
+
* cycles (node usage is from the kubeletstats DaemonSet; allocatable is
|
|
251
|
+
* from the `k8s_cluster` Deployment). `Sum` there would only cancel if
|
|
252
|
+
* both reported the same row count every minute — fragile across
|
|
253
|
+
* restarts / missed scrapes / minute-boundary jitter.
|
|
254
|
+
*
|
|
255
|
+
* The group-by key is the ClickHouse-stored attribute name, which carries
|
|
256
|
+
* the `resource.` prefix for OTel resource attributes (see
|
|
257
|
+
* OtelMetricsIngestService — resource attributes are stamped with
|
|
258
|
+
* `prefixKeysWithString: "resource"`). So node grouping is
|
|
259
|
+
* `resource.k8s.node.name`, not the bare `k8s.node.name`.
|
|
260
|
+
*/
|
|
261
|
+
export function buildKubernetesRatioMonitorConfig(args: {
|
|
262
|
+
clusterIdentifier: string;
|
|
263
|
+
numeratorMetricName: string;
|
|
264
|
+
denominatorMetricName: string;
|
|
265
|
+
groupByAttributeKey: string;
|
|
266
|
+
numeratorAlias: string;
|
|
267
|
+
denominatorAlias: string;
|
|
268
|
+
resultAlias: string;
|
|
269
|
+
resultLegend: string;
|
|
270
|
+
resourceScope: KubernetesResourceScope;
|
|
271
|
+
rollingTime: RollingTime;
|
|
272
|
+
aggregationType?: MetricsAggregationType | undefined;
|
|
273
|
+
}): MonitorStepKubernetesMonitor {
|
|
274
|
+
const aggregationType: MetricsAggregationType =
|
|
275
|
+
args.aggregationType || MetricsAggregationType.Sum;
|
|
276
|
+
|
|
277
|
+
const buildQueryConfig: (alias: string, metricName: string) => any = (
|
|
278
|
+
alias: string,
|
|
279
|
+
metricName: string,
|
|
280
|
+
): any => {
|
|
281
|
+
return {
|
|
282
|
+
metricAliasData: {
|
|
283
|
+
metricVariable: alias,
|
|
284
|
+
title: alias,
|
|
285
|
+
description: alias,
|
|
286
|
+
legend: alias,
|
|
287
|
+
legendUnit: undefined,
|
|
288
|
+
},
|
|
289
|
+
metricQueryData: {
|
|
290
|
+
filterData: {
|
|
291
|
+
metricName: metricName,
|
|
292
|
+
attributes: {},
|
|
293
|
+
aggegationType: aggregationType,
|
|
294
|
+
aggregateBy: {},
|
|
295
|
+
},
|
|
296
|
+
groupByAttributeKeys: [args.groupByAttributeKey],
|
|
297
|
+
},
|
|
298
|
+
};
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
clusterIdentifier: args.clusterIdentifier,
|
|
303
|
+
resourceScope: args.resourceScope,
|
|
304
|
+
resourceFilters: {},
|
|
305
|
+
metricViewConfig: {
|
|
306
|
+
queryConfigs: [
|
|
307
|
+
buildQueryConfig(args.numeratorAlias, args.numeratorMetricName),
|
|
308
|
+
buildQueryConfig(args.denominatorAlias, args.denominatorMetricName),
|
|
309
|
+
],
|
|
310
|
+
formulaConfigs: [
|
|
311
|
+
{
|
|
312
|
+
metricAliasData: {
|
|
313
|
+
metricVariable: args.resultAlias,
|
|
314
|
+
title: args.resultLegend,
|
|
315
|
+
description: args.resultLegend,
|
|
316
|
+
legend: args.resultLegend,
|
|
317
|
+
legendUnit: "%",
|
|
318
|
+
},
|
|
319
|
+
metricFormulaData: {
|
|
320
|
+
metricFormula: `(${args.numeratorAlias} / ${args.denominatorAlias}) * 100`,
|
|
321
|
+
},
|
|
322
|
+
},
|
|
323
|
+
],
|
|
324
|
+
},
|
|
325
|
+
rollingTime: args.rollingTime,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
224
329
|
// --- Template Definitions ---
|
|
225
330
|
|
|
226
331
|
const crashLoopBackOffTemplate: KubernetesAlertTemplate = {
|
|
@@ -356,19 +461,31 @@ const nodeNotReadyTemplate: KubernetesAlertTemplate = {
|
|
|
356
461
|
const highCpuTemplate: KubernetesAlertTemplate = {
|
|
357
462
|
id: "k8s-high-cpu",
|
|
358
463
|
name: "High Node CPU Utilization",
|
|
359
|
-
description:
|
|
464
|
+
description:
|
|
465
|
+
"Alert when a node's average CPU usage exceeds 90% of its allocatable CPU. Computed per node as k8s.node.cpu.usage ÷ k8s.node.allocatable_cpu × 100 — both are cores, so this is a true percentage (the raw k8s.node.cpu.utilization metric is a misnamed cores gauge, not a percent).",
|
|
360
466
|
category: "Node",
|
|
361
467
|
severity: "Warning",
|
|
362
468
|
getMonitorStep: (args: KubernetesAlertTemplateArgs): MonitorStep => {
|
|
363
|
-
const metricAlias: string = "
|
|
469
|
+
const metricAlias: string = "node_cpu_utilization";
|
|
364
470
|
|
|
365
471
|
return buildKubernetesMonitorStep({
|
|
366
|
-
kubernetesMonitor:
|
|
472
|
+
kubernetesMonitor: buildKubernetesRatioMonitorConfig({
|
|
367
473
|
clusterIdentifier: args.clusterIdentifier,
|
|
368
|
-
|
|
369
|
-
|
|
474
|
+
numeratorMetricName: "k8s.node.cpu.usage",
|
|
475
|
+
denominatorMetricName: "k8s.node.allocatable_cpu",
|
|
476
|
+
groupByAttributeKey: "resource.k8s.node.name",
|
|
477
|
+
numeratorAlias: "used_cpu",
|
|
478
|
+
denominatorAlias: "alloc_cpu",
|
|
479
|
+
resultAlias: metricAlias,
|
|
480
|
+
resultLegend: "Node CPU Utilization (%)",
|
|
370
481
|
resourceScope: KubernetesResourceScope.Node,
|
|
371
482
|
rollingTime: RollingTime.Past5Minutes,
|
|
483
|
+
/*
|
|
484
|
+
* Single series per node from two DIFFERENT receivers (usage =
|
|
485
|
+
* kubeletstats, allocatable = k8s_cluster) — Avg keeps the per-minute
|
|
486
|
+
* ratio correct regardless of each receiver's scrape count. See
|
|
487
|
+
* buildKubernetesRatioMonitorConfig.
|
|
488
|
+
*/
|
|
372
489
|
aggregationType: MetricsAggregationType.Avg,
|
|
373
490
|
}),
|
|
374
491
|
offlineCriteriaInstance: buildOfflineCriteriaInstance({
|
|
@@ -380,10 +497,10 @@ const highCpuTemplate: KubernetesAlertTemplate = {
|
|
|
380
497
|
filterType: FilterType.GreaterThan,
|
|
381
498
|
value: 90,
|
|
382
499
|
incidentTitle: `[K8s] High CPU Utilization (>90%) - ${args.monitorName}`,
|
|
383
|
-
incidentDescription: `
|
|
500
|
+
incidentDescription: `A node's average CPU usage has exceeded 90% of its allocatable CPU. Sustained high CPU usage can cause pod throttling, increased latency, and potential node instability. Check the root cause for the specific node and top CPU-consuming workloads.`,
|
|
384
501
|
criteriaName: "High CPU - Utilization > 90%",
|
|
385
502
|
criteriaDescription:
|
|
386
|
-
"Triggers when
|
|
503
|
+
"Triggers when a node's average CPU usage exceeds 90% of its allocatable CPU over the monitoring window.",
|
|
387
504
|
}),
|
|
388
505
|
onlineCriteriaInstance: buildOnlineCriteriaInstance({
|
|
389
506
|
onlineMonitorStatusId: args.onlineMonitorStatusId,
|
|
@@ -398,19 +515,31 @@ const highCpuTemplate: KubernetesAlertTemplate = {
|
|
|
398
515
|
const highMemoryTemplate: KubernetesAlertTemplate = {
|
|
399
516
|
id: "k8s-high-memory",
|
|
400
517
|
name: "High Node Memory Utilization",
|
|
401
|
-
description:
|
|
518
|
+
description:
|
|
519
|
+
"Alert when a node's average memory usage exceeds 85% of its allocatable memory. Computed per node as k8s.node.memory.usage ÷ k8s.node.allocatable_memory × 100 — both are bytes, so this is a true percentage (the raw k8s.node.memory.usage metric is bytes, not a percent).",
|
|
402
520
|
category: "Node",
|
|
403
521
|
severity: "Warning",
|
|
404
522
|
getMonitorStep: (args: KubernetesAlertTemplateArgs): MonitorStep => {
|
|
405
|
-
const metricAlias: string = "
|
|
523
|
+
const metricAlias: string = "node_memory_utilization";
|
|
406
524
|
|
|
407
525
|
return buildKubernetesMonitorStep({
|
|
408
|
-
kubernetesMonitor:
|
|
526
|
+
kubernetesMonitor: buildKubernetesRatioMonitorConfig({
|
|
409
527
|
clusterIdentifier: args.clusterIdentifier,
|
|
410
|
-
|
|
411
|
-
|
|
528
|
+
numeratorMetricName: "k8s.node.memory.usage",
|
|
529
|
+
denominatorMetricName: "k8s.node.allocatable_memory",
|
|
530
|
+
groupByAttributeKey: "resource.k8s.node.name",
|
|
531
|
+
numeratorAlias: "used_mem",
|
|
532
|
+
denominatorAlias: "alloc_mem",
|
|
533
|
+
resultAlias: metricAlias,
|
|
534
|
+
resultLegend: "Node Memory Utilization (%)",
|
|
412
535
|
resourceScope: KubernetesResourceScope.Node,
|
|
413
536
|
rollingTime: RollingTime.Past5Minutes,
|
|
537
|
+
/*
|
|
538
|
+
* Single series per node from two DIFFERENT receivers (usage =
|
|
539
|
+
* kubeletstats, allocatable = k8s_cluster) — Avg keeps the per-minute
|
|
540
|
+
* ratio correct regardless of each receiver's scrape count. See
|
|
541
|
+
* buildKubernetesRatioMonitorConfig.
|
|
542
|
+
*/
|
|
414
543
|
aggregationType: MetricsAggregationType.Avg,
|
|
415
544
|
}),
|
|
416
545
|
offlineCriteriaInstance: buildOfflineCriteriaInstance({
|
|
@@ -422,10 +551,10 @@ const highMemoryTemplate: KubernetesAlertTemplate = {
|
|
|
422
551
|
filterType: FilterType.GreaterThan,
|
|
423
552
|
value: 85,
|
|
424
553
|
incidentTitle: `[K8s] High Memory Utilization (>85%) - ${args.monitorName}`,
|
|
425
|
-
incidentDescription: `
|
|
554
|
+
incidentDescription: `A node's average memory usage has exceeded 85% of its allocatable memory. High memory usage can lead to OOMKilled pods, node instability, and potential evictions. Check the root cause for the specific node and top memory-consuming workloads.`,
|
|
426
555
|
criteriaName: "High Memory - Utilization > 85%",
|
|
427
556
|
criteriaDescription:
|
|
428
|
-
"Triggers when
|
|
557
|
+
"Triggers when a node's average memory usage exceeds 85% of its allocatable memory over the monitoring window.",
|
|
429
558
|
}),
|
|
430
559
|
onlineCriteriaInstance: buildOnlineCriteriaInstance({
|
|
431
560
|
onlineMonitorStatusId: args.onlineMonitorStatusId,
|
|
@@ -736,6 +865,100 @@ const daemonSetUnavailableTemplate: KubernetesAlertTemplate = {
|
|
|
736
865
|
},
|
|
737
866
|
};
|
|
738
867
|
|
|
868
|
+
const nodeCpuRequestUtilizationTemplate: KubernetesAlertTemplate = {
|
|
869
|
+
id: "k8s-node-cpu-request-utilization",
|
|
870
|
+
name: "High Node CPU Request Commitment",
|
|
871
|
+
description:
|
|
872
|
+
"Alert when a node's committed CPU requests exceed 90% of its allocatable CPU. Derived per node from summed container CPU requests over node allocatable CPU — both collected by default via the k8s_cluster receiver. A near-full node can't schedule new pods even if actual CPU usage is low.",
|
|
873
|
+
category: "Node",
|
|
874
|
+
severity: "Warning",
|
|
875
|
+
getMonitorStep: (args: KubernetesAlertTemplateArgs): MonitorStep => {
|
|
876
|
+
const metricAlias: string = "node_cpu_request_utilization";
|
|
877
|
+
|
|
878
|
+
return buildKubernetesMonitorStep({
|
|
879
|
+
kubernetesMonitor: buildKubernetesRatioMonitorConfig({
|
|
880
|
+
clusterIdentifier: args.clusterIdentifier,
|
|
881
|
+
numeratorMetricName: "k8s.container.cpu_request",
|
|
882
|
+
denominatorMetricName: "k8s.node.allocatable_cpu",
|
|
883
|
+
groupByAttributeKey: "resource.k8s.node.name",
|
|
884
|
+
numeratorAlias: "req_cpu",
|
|
885
|
+
denominatorAlias: "alloc_cpu",
|
|
886
|
+
resultAlias: metricAlias,
|
|
887
|
+
resultLegend: "Node CPU Request Utilization (%)",
|
|
888
|
+
resourceScope: KubernetesResourceScope.Node,
|
|
889
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
890
|
+
}),
|
|
891
|
+
offlineCriteriaInstance: buildOfflineCriteriaInstance({
|
|
892
|
+
offlineMonitorStatusId: args.offlineMonitorStatusId,
|
|
893
|
+
incidentSeverityId: args.defaultIncidentSeverityId,
|
|
894
|
+
alertSeverityId: args.defaultAlertSeverityId,
|
|
895
|
+
monitorName: args.monitorName,
|
|
896
|
+
metricAlias,
|
|
897
|
+
filterType: FilterType.GreaterThan,
|
|
898
|
+
value: 90,
|
|
899
|
+
incidentTitle: `[K8s] High Node CPU Request Commitment (>90%) - ${args.monitorName}`,
|
|
900
|
+
incidentDescription: `A node's committed CPU requests have exceeded 90% of its allocatable CPU. The node is nearly full from a scheduling standpoint and may be unable to place new pods, even if current CPU usage is low. Check the root cause for the specific node and its top CPU-requesting workloads.`,
|
|
901
|
+
criteriaName: "High CPU Request Commitment - Utilization > 90%",
|
|
902
|
+
criteriaDescription:
|
|
903
|
+
"Triggers when any node's summed container CPU requests exceed 90% of its allocatable CPU.",
|
|
904
|
+
}),
|
|
905
|
+
onlineCriteriaInstance: buildOnlineCriteriaInstance({
|
|
906
|
+
onlineMonitorStatusId: args.onlineMonitorStatusId,
|
|
907
|
+
metricAlias,
|
|
908
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
909
|
+
value: 90,
|
|
910
|
+
}),
|
|
911
|
+
});
|
|
912
|
+
},
|
|
913
|
+
};
|
|
914
|
+
|
|
915
|
+
const nodeMemoryRequestUtilizationTemplate: KubernetesAlertTemplate = {
|
|
916
|
+
id: "k8s-node-memory-request-utilization",
|
|
917
|
+
name: "High Node Memory Request Commitment",
|
|
918
|
+
description:
|
|
919
|
+
"Alert when a node's committed memory requests exceed 90% of its allocatable memory. Derived per node from summed container memory requests over node allocatable memory — both collected by default via the k8s_cluster receiver. A near-full node can't schedule new pods even if actual memory usage is low.",
|
|
920
|
+
category: "Node",
|
|
921
|
+
severity: "Warning",
|
|
922
|
+
getMonitorStep: (args: KubernetesAlertTemplateArgs): MonitorStep => {
|
|
923
|
+
const metricAlias: string = "node_memory_request_utilization";
|
|
924
|
+
|
|
925
|
+
return buildKubernetesMonitorStep({
|
|
926
|
+
kubernetesMonitor: buildKubernetesRatioMonitorConfig({
|
|
927
|
+
clusterIdentifier: args.clusterIdentifier,
|
|
928
|
+
numeratorMetricName: "k8s.container.memory_request",
|
|
929
|
+
denominatorMetricName: "k8s.node.allocatable_memory",
|
|
930
|
+
groupByAttributeKey: "resource.k8s.node.name",
|
|
931
|
+
numeratorAlias: "req_mem",
|
|
932
|
+
denominatorAlias: "alloc_mem",
|
|
933
|
+
resultAlias: metricAlias,
|
|
934
|
+
resultLegend: "Node Memory Request Utilization (%)",
|
|
935
|
+
resourceScope: KubernetesResourceScope.Node,
|
|
936
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
937
|
+
}),
|
|
938
|
+
offlineCriteriaInstance: buildOfflineCriteriaInstance({
|
|
939
|
+
offlineMonitorStatusId: args.offlineMonitorStatusId,
|
|
940
|
+
incidentSeverityId: args.defaultIncidentSeverityId,
|
|
941
|
+
alertSeverityId: args.defaultAlertSeverityId,
|
|
942
|
+
monitorName: args.monitorName,
|
|
943
|
+
metricAlias,
|
|
944
|
+
filterType: FilterType.GreaterThan,
|
|
945
|
+
value: 90,
|
|
946
|
+
incidentTitle: `[K8s] High Node Memory Request Commitment (>90%) - ${args.monitorName}`,
|
|
947
|
+
incidentDescription: `A node's committed memory requests have exceeded 90% of its allocatable memory. The node is nearly full from a scheduling standpoint and may be unable to place new pods, even if current memory usage is low. Check the root cause for the specific node and its top memory-requesting workloads.`,
|
|
948
|
+
criteriaName: "High Memory Request Commitment - Utilization > 90%",
|
|
949
|
+
criteriaDescription:
|
|
950
|
+
"Triggers when any node's summed container memory requests exceed 90% of its allocatable memory.",
|
|
951
|
+
}),
|
|
952
|
+
onlineCriteriaInstance: buildOnlineCriteriaInstance({
|
|
953
|
+
onlineMonitorStatusId: args.onlineMonitorStatusId,
|
|
954
|
+
metricAlias,
|
|
955
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
956
|
+
value: 90,
|
|
957
|
+
}),
|
|
958
|
+
});
|
|
959
|
+
},
|
|
960
|
+
};
|
|
961
|
+
|
|
739
962
|
export function getAllKubernetesAlertTemplates(): Array<KubernetesAlertTemplate> {
|
|
740
963
|
return [
|
|
741
964
|
crashLoopBackOffTemplate,
|
|
@@ -750,6 +973,8 @@ export function getAllKubernetesAlertTemplates(): Array<KubernetesAlertTemplate>
|
|
|
750
973
|
schedulerBacklogTemplate,
|
|
751
974
|
highDiskUsageTemplate,
|
|
752
975
|
daemonSetUnavailableTemplate,
|
|
976
|
+
nodeCpuRequestUtilizationTemplate,
|
|
977
|
+
nodeMemoryRequestUtilizationTemplate,
|
|
753
978
|
];
|
|
754
979
|
}
|
|
755
980
|
|