@oneuptime/common 11.0.0 → 11.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Models/DatabaseModels/Alert.ts +110 -0
- package/Models/DatabaseModels/CephCluster.ts +964 -0
- package/Models/DatabaseModels/CephClusterLabelRule.ts +514 -0
- package/Models/DatabaseModels/CephClusterOwnerRule.ts +596 -0
- package/Models/DatabaseModels/CephClusterOwnerTeam.ts +487 -0
- package/Models/DatabaseModels/CephClusterOwnerUser.ts +486 -0
- package/Models/DatabaseModels/CephResource.ts +809 -0
- package/Models/DatabaseModels/Host.ts +64 -0
- package/Models/DatabaseModels/Incident.ts +110 -0
- package/Models/DatabaseModels/Index.ts +24 -0
- package/Models/DatabaseModels/ProxmoxCluster.ts +943 -0
- package/Models/DatabaseModels/ProxmoxClusterLabelRule.ts +514 -0
- package/Models/DatabaseModels/ProxmoxClusterOwnerRule.ts +596 -0
- package/Models/DatabaseModels/ProxmoxClusterOwnerTeam.ts +487 -0
- package/Models/DatabaseModels/ProxmoxClusterOwnerUser.ts +486 -0
- package/Models/DatabaseModels/ProxmoxResource.ts +726 -0
- package/Models/DatabaseModels/ScheduledMaintenance.ts +110 -0
- package/Server/API/BillingInvoiceAPI.ts +47 -7
- package/Server/API/CephResourceAPI.ts +134 -0
- package/Server/API/DashboardAPI.ts +46 -0
- package/Server/API/ProjectAPI.ts +15 -0
- package/Server/API/ProxmoxResourceAPI.ts +132 -0
- package/Server/API/ResellerPlanAPI.ts +17 -0
- package/Server/Infrastructure/GlobalCache.ts +8 -2
- package/Server/Infrastructure/Postgres/SchemaMigrations/1781500000000-AddProxmoxAndCephClusterTables.ts +163 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1781600000000-AddProxmoxCephV2Columns.ts +211 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1781600000001-AddProxmoxCephActivityAndRules.ts +590 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1781700000000-AddProxmoxCephV3Columns.ts +64 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +8 -0
- package/Server/Infrastructure/Redis.ts +40 -12
- package/Server/Services/AnalyticsDatabaseService.ts +1 -1
- package/Server/Services/BillingService.ts +109 -21
- package/Server/Services/CephClusterLabelRuleEngineService.ts +200 -0
- package/Server/Services/CephClusterLabelRuleService.ts +14 -0
- package/Server/Services/CephClusterOwnerRuleEngineService.ts +218 -0
- package/Server/Services/CephClusterOwnerRuleService.ts +14 -0
- package/Server/Services/CephClusterOwnerTeamService.ts +10 -0
- package/Server/Services/CephClusterOwnerUserService.ts +10 -0
- package/Server/Services/CephClusterService.ts +401 -0
- package/Server/Services/CephResourceService.ts +383 -0
- package/Server/Services/CloudResourceService.ts +11 -3
- package/Server/Services/DockerHostService.ts +11 -3
- package/Server/Services/ExceptionAggregationService.ts +2 -0
- package/Server/Services/HostService.ts +11 -3
- package/Server/Services/Index.ts +24 -0
- package/Server/Services/KubernetesClusterService.ts +11 -3
- package/Server/Services/LogAggregationService.ts +2 -0
- package/Server/Services/MetricAggregationService.ts +2 -0
- package/Server/Services/OpenTelemetryIngestService.ts +36 -0
- package/Server/Services/ProxmoxClusterLabelRuleEngineService.ts +204 -0
- package/Server/Services/ProxmoxClusterLabelRuleService.ts +14 -0
- package/Server/Services/ProxmoxClusterOwnerRuleEngineService.ts +222 -0
- package/Server/Services/ProxmoxClusterOwnerRuleService.ts +14 -0
- package/Server/Services/ProxmoxClusterOwnerTeamService.ts +10 -0
- package/Server/Services/ProxmoxClusterOwnerUserService.ts +10 -0
- package/Server/Services/ProxmoxClusterService.ts +382 -0
- package/Server/Services/ProxmoxResourceService.ts +404 -0
- package/Server/Services/RumApplicationService.ts +11 -3
- package/Server/Services/ServerlessFunctionService.ts +11 -3
- package/Server/Services/TelemetryUsageBillingService.ts +41 -3
- package/Server/Services/TraceAggregationService.ts +2 -0
- package/Server/Types/AnalyticsDatabase/AggregateBy.ts +8 -23
- package/Server/Utils/Monitor/MonitorAlert.ts +45 -0
- package/Server/Utils/Monitor/MonitorClusterContext.ts +129 -0
- package/Server/Utils/Monitor/MonitorCriteriaEvaluator.ts +344 -4
- package/Server/Utils/Monitor/MonitorIncident.ts +130 -7
- package/Server/Utils/Monitor/MonitorMaintenanceSuppression.ts +39 -6
- package/Server/Utils/Monitor/MonitorTemplateUtil.ts +3 -1
- package/Server/Utils/Monitor/SeriesResourceLabels.ts +33 -0
- package/Server/Utils/Profiling.ts +37 -2
- package/Server/Utils/Telemetry/EntityRegistry.ts +4 -0
- package/Server/Utils/Telemetry/ProxmoxCephSnapshotScan.ts +1096 -0
- package/Server/Utils/Telemetry/TelemetryEntity.ts +85 -0
- package/Server/Utils/Telemetry.ts +8 -19
- package/Tests/Server/API/BillingInvoiceAPI.test.ts +194 -0
- package/Tests/Server/API/ProjectAPI.test.ts +91 -0
- package/Tests/Server/API/ResellerPlanAPI.test.ts +207 -0
- package/Tests/Server/Infrastructure/GlobalCache.test.ts +100 -0
- package/Tests/Server/Services/BillingService.test.ts +323 -0
- package/Tests/Server/Services/CephResourceService.test.ts +264 -0
- package/Tests/Server/Services/ProxmoxResourceService.test.ts +326 -0
- package/Tests/Server/Utils/Monitor/MonitorCriteriaEvaluator.test.ts +322 -0
- package/Tests/Server/Utils/Monitor/MonitorMaintenanceSuppression.test.ts +13 -0
- package/Tests/Server/Utils/Telemetry/ProxmoxCephSnapshotScan.test.ts +879 -0
- package/Tests/Server/Utils/Telemetry/TelemetryEntity.test.ts +196 -0
- package/Tests/Types/Monitor/CephAlertTemplates.test.ts +1231 -0
- package/Tests/Types/Monitor/ProxmoxAlertTemplates.test.ts +732 -0
- package/Tests/Utils/ModelImportExport.test.ts +366 -0
- package/Tests/Utils/Telemetry/EntityRelationship.test.ts +49 -0
- package/Tests/Utils/Telemetry/HeartbeatAvailability.test.ts +423 -0
- package/Types/BaseDatabase/AggregationIntervalUtil.ts +74 -0
- package/Types/Dashboard/DashboardComponentType.ts +4 -0
- package/Types/Dashboard/DashboardComponents/ComponentArgument.ts +2 -0
- package/Types/Dashboard/DashboardComponents/DashboardCephOsdListComponent.ts +15 -0
- package/Types/Dashboard/DashboardComponents/DashboardCephPoolListComponent.ts +14 -0
- package/Types/Dashboard/DashboardComponents/DashboardProxmoxGuestListComponent.ts +17 -0
- package/Types/Dashboard/DashboardComponents/DashboardProxmoxNodeListComponent.ts +16 -0
- package/Types/Dashboard/DashboardTemplates.ts +446 -0
- package/Types/Icon/IconProp.ts +2 -0
- package/Types/Monitor/CephAlertTemplates.ts +1647 -0
- package/Types/Monitor/CephMetricCatalog.ts +409 -0
- package/Types/Monitor/MetricMonitor/MetricMonitorResponse.ts +44 -0
- package/Types/Monitor/MonitorStep.ts +64 -0
- package/Types/Monitor/MonitorStepCephMonitor.ts +57 -0
- package/Types/Monitor/MonitorStepProxmoxMonitor.ts +81 -0
- package/Types/Monitor/MonitorType.ts +29 -1
- package/Types/Monitor/ProxmoxAlertTemplates.ts +899 -0
- package/Types/Monitor/ProxmoxMetricCatalog.ts +382 -0
- package/Types/Permission.ts +464 -0
- package/Types/Telemetry/EntityType.ts +11 -0
- package/Types/Telemetry/ServiceType.ts +2 -0
- package/UI/Components/Icon/Icon.tsx +84 -0
- package/UI/Components/ImportExport/ExportModelCard.tsx +90 -0
- package/UI/Components/ImportExport/ImportModelsModal.tsx +239 -0
- package/UI/Components/ModelTable/ModelTable.tsx +294 -143
- package/UI/Components/MonitorTemplateVariables/TemplateVariablesCatalog.ts +9 -5
- package/UI/Utils/ModelImportExport.ts +207 -0
- package/UI/Utils/Telemetry/Telemetry.ts +16 -21
- package/UI/Utils/TelemetryService.ts +7 -3
- package/Utils/Dashboard/Components/DashboardCephOsdListComponent.ts +63 -0
- package/Utils/Dashboard/Components/DashboardCephPoolListComponent.ts +32 -0
- package/Utils/Dashboard/Components/DashboardCephResourceListShared.ts +61 -0
- package/Utils/Dashboard/Components/DashboardProxmoxGuestListComponent.ts +69 -0
- package/Utils/Dashboard/Components/DashboardProxmoxNodeListComponent.ts +55 -0
- package/Utils/Dashboard/Components/DashboardProxmoxResourceListShared.ts +61 -0
- package/Utils/Dashboard/Components/Index.ts +28 -0
- package/Utils/ModelImportExport.ts +369 -0
- package/Utils/Telemetry/EntityKey.ts +35 -0
- package/Utils/Telemetry/EntityRelationship.ts +6 -0
- package/Utils/Telemetry/HeartbeatAvailability.ts +262 -0
- package/build/dist/Models/DatabaseModels/Alert.js +108 -0
- package/build/dist/Models/DatabaseModels/Alert.js.map +1 -1
- package/build/dist/Models/DatabaseModels/CephCluster.js +992 -0
- package/build/dist/Models/DatabaseModels/CephCluster.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CephClusterLabelRule.js +522 -0
- package/build/dist/Models/DatabaseModels/CephClusterLabelRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerRule.js +603 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerTeam.js +503 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerTeam.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerUser.js +502 -0
- package/build/dist/Models/DatabaseModels/CephClusterOwnerUser.js.map +1 -0
- package/build/dist/Models/DatabaseModels/CephResource.js +846 -0
- package/build/dist/Models/DatabaseModels/CephResource.js.map +1 -0
- package/build/dist/Models/DatabaseModels/Host.js +63 -0
- package/build/dist/Models/DatabaseModels/Host.js.map +1 -1
- package/build/dist/Models/DatabaseModels/Incident.js +108 -0
- package/build/dist/Models/DatabaseModels/Incident.js.map +1 -1
- package/build/dist/Models/DatabaseModels/Index.js +24 -0
- package/build/dist/Models/DatabaseModels/Index.js.map +1 -1
- package/build/dist/Models/DatabaseModels/ProxmoxCluster.js +967 -0
- package/build/dist/Models/DatabaseModels/ProxmoxCluster.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterLabelRule.js +522 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterLabelRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerRule.js +603 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerRule.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerTeam.js +503 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerTeam.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerUser.js +502 -0
- package/build/dist/Models/DatabaseModels/ProxmoxClusterOwnerUser.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ProxmoxResource.js +761 -0
- package/build/dist/Models/DatabaseModels/ProxmoxResource.js.map +1 -0
- package/build/dist/Models/DatabaseModels/ScheduledMaintenance.js +108 -0
- package/build/dist/Models/DatabaseModels/ScheduledMaintenance.js.map +1 -1
- package/build/dist/Server/API/BillingInvoiceAPI.js +35 -5
- package/build/dist/Server/API/BillingInvoiceAPI.js.map +1 -1
- package/build/dist/Server/API/CephResourceAPI.js +98 -0
- package/build/dist/Server/API/CephResourceAPI.js.map +1 -0
- package/build/dist/Server/API/DashboardAPI.js +46 -0
- package/build/dist/Server/API/DashboardAPI.js.map +1 -1
- package/build/dist/Server/API/ProjectAPI.js +11 -0
- package/build/dist/Server/API/ProjectAPI.js.map +1 -1
- package/build/dist/Server/API/ProxmoxResourceAPI.js +95 -0
- package/build/dist/Server/API/ProxmoxResourceAPI.js.map +1 -0
- package/build/dist/Server/API/ResellerPlanAPI.js +17 -3
- package/build/dist/Server/API/ResellerPlanAPI.js.map +1 -1
- package/build/dist/Server/Infrastructure/GlobalCache.js +7 -2
- package/build/dist/Server/Infrastructure/GlobalCache.js.map +1 -1
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781500000000-AddProxmoxAndCephClusterTables.js +76 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781500000000-AddProxmoxAndCephClusterTables.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781600000000-AddProxmoxCephV2Columns.js +108 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781600000000-AddProxmoxCephV2Columns.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781600000001-AddProxmoxCephActivityAndRules.js +253 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781600000001-AddProxmoxCephActivityAndRules.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781700000000-AddProxmoxCephV3Columns.js +43 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1781700000000-AddProxmoxCephV3Columns.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +8 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
- package/build/dist/Server/Infrastructure/Redis.js +31 -8
- package/build/dist/Server/Infrastructure/Redis.js.map +1 -1
- package/build/dist/Server/Services/AnalyticsDatabaseService.js +1 -1
- package/build/dist/Server/Services/AnalyticsDatabaseService.js.map +1 -1
- package/build/dist/Server/Services/BillingService.js +85 -23
- package/build/dist/Server/Services/BillingService.js.map +1 -1
- package/build/dist/Server/Services/CephClusterLabelRuleEngineService.js +166 -0
- package/build/dist/Server/Services/CephClusterLabelRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterLabelRuleService.js +13 -0
- package/build/dist/Server/Services/CephClusterLabelRuleService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterOwnerRuleEngineService.js +186 -0
- package/build/dist/Server/Services/CephClusterOwnerRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterOwnerRuleService.js +13 -0
- package/build/dist/Server/Services/CephClusterOwnerRuleService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterOwnerTeamService.js +9 -0
- package/build/dist/Server/Services/CephClusterOwnerTeamService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterOwnerUserService.js +9 -0
- package/build/dist/Server/Services/CephClusterOwnerUserService.js.map +1 -0
- package/build/dist/Server/Services/CephClusterService.js +353 -0
- package/build/dist/Server/Services/CephClusterService.js.map +1 -0
- package/build/dist/Server/Services/CephResourceService.js +257 -0
- package/build/dist/Server/Services/CephResourceService.js.map +1 -0
- package/build/dist/Server/Services/CloudResourceService.js +10 -2
- package/build/dist/Server/Services/CloudResourceService.js.map +1 -1
- package/build/dist/Server/Services/DockerHostService.js +10 -2
- package/build/dist/Server/Services/DockerHostService.js.map +1 -1
- package/build/dist/Server/Services/ExceptionAggregationService.js +2 -0
- package/build/dist/Server/Services/ExceptionAggregationService.js.map +1 -1
- package/build/dist/Server/Services/HostService.js +10 -2
- package/build/dist/Server/Services/HostService.js.map +1 -1
- package/build/dist/Server/Services/Index.js +24 -0
- package/build/dist/Server/Services/Index.js.map +1 -1
- package/build/dist/Server/Services/KubernetesClusterService.js +10 -2
- package/build/dist/Server/Services/KubernetesClusterService.js.map +1 -1
- package/build/dist/Server/Services/LogAggregationService.js +2 -0
- package/build/dist/Server/Services/LogAggregationService.js.map +1 -1
- package/build/dist/Server/Services/MetricAggregationService.js +2 -0
- package/build/dist/Server/Services/MetricAggregationService.js.map +1 -1
- package/build/dist/Server/Services/OpenTelemetryIngestService.js +37 -7
- package/build/dist/Server/Services/OpenTelemetryIngestService.js.map +1 -1
- package/build/dist/Server/Services/ProxmoxClusterLabelRuleEngineService.js +166 -0
- package/build/dist/Server/Services/ProxmoxClusterLabelRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterLabelRuleService.js +13 -0
- package/build/dist/Server/Services/ProxmoxClusterLabelRuleService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerRuleEngineService.js +186 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerRuleEngineService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerRuleService.js +13 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerRuleService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerTeamService.js +9 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerTeamService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerUserService.js +9 -0
- package/build/dist/Server/Services/ProxmoxClusterOwnerUserService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxClusterService.js +337 -0
- package/build/dist/Server/Services/ProxmoxClusterService.js.map +1 -0
- package/build/dist/Server/Services/ProxmoxResourceService.js +285 -0
- package/build/dist/Server/Services/ProxmoxResourceService.js.map +1 -0
- package/build/dist/Server/Services/RumApplicationService.js +10 -2
- package/build/dist/Server/Services/RumApplicationService.js.map +1 -1
- package/build/dist/Server/Services/ServerlessFunctionService.js +10 -2
- package/build/dist/Server/Services/ServerlessFunctionService.js.map +1 -1
- package/build/dist/Server/Services/TelemetryUsageBillingService.js +30 -3
- package/build/dist/Server/Services/TelemetryUsageBillingService.js.map +1 -1
- package/build/dist/Server/Services/TraceAggregationService.js +2 -0
- package/build/dist/Server/Services/TraceAggregationService.js.map +1 -1
- package/build/dist/Server/Types/AnalyticsDatabase/AggregateBy.js +8 -25
- package/build/dist/Server/Types/AnalyticsDatabase/AggregateBy.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js +36 -0
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorClusterContext.js +90 -0
- package/build/dist/Server/Utils/Monitor/MonitorClusterContext.js.map +1 -0
- package/build/dist/Server/Utils/Monitor/MonitorCriteriaEvaluator.js +228 -4
- package/build/dist/Server/Utils/Monitor/MonitorCriteriaEvaluator.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js +103 -8
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorMaintenanceSuppression.js +23 -6
- package/build/dist/Server/Utils/Monitor/MonitorMaintenanceSuppression.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorTemplateUtil.js +3 -1
- package/build/dist/Server/Utils/Monitor/MonitorTemplateUtil.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/SeriesResourceLabels.js +23 -0
- package/build/dist/Server/Utils/Monitor/SeriesResourceLabels.js.map +1 -1
- package/build/dist/Server/Utils/Profiling.js +24 -3
- package/build/dist/Server/Utils/Profiling.js.map +1 -1
- package/build/dist/Server/Utils/Telemetry/EntityRegistry.js +4 -0
- package/build/dist/Server/Utils/Telemetry/EntityRegistry.js.map +1 -1
- package/build/dist/Server/Utils/Telemetry/ProxmoxCephSnapshotScan.js +854 -0
- package/build/dist/Server/Utils/Telemetry/ProxmoxCephSnapshotScan.js.map +1 -0
- package/build/dist/Server/Utils/Telemetry/TelemetryEntity.js +62 -0
- package/build/dist/Server/Utils/Telemetry/TelemetryEntity.js.map +1 -1
- package/build/dist/Server/Utils/Telemetry.js +8 -10
- package/build/dist/Server/Utils/Telemetry.js.map +1 -1
- package/build/dist/Types/BaseDatabase/AggregationIntervalUtil.js +69 -0
- package/build/dist/Types/BaseDatabase/AggregationIntervalUtil.js.map +1 -0
- package/build/dist/Types/Dashboard/DashboardComponentType.js +4 -0
- package/build/dist/Types/Dashboard/DashboardComponentType.js.map +1 -1
- package/build/dist/Types/Dashboard/DashboardComponents/ComponentArgument.js +2 -0
- package/build/dist/Types/Dashboard/DashboardComponents/ComponentArgument.js.map +1 -1
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardCephOsdListComponent.js +2 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardCephOsdListComponent.js.map +1 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardCephPoolListComponent.js +2 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardCephPoolListComponent.js.map +1 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardProxmoxGuestListComponent.js +2 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardProxmoxGuestListComponent.js.map +1 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardProxmoxNodeListComponent.js +2 -0
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardProxmoxNodeListComponent.js.map +1 -0
- package/build/dist/Types/Dashboard/DashboardTemplates.js +394 -0
- package/build/dist/Types/Dashboard/DashboardTemplates.js.map +1 -1
- package/build/dist/Types/Icon/IconProp.js +2 -0
- package/build/dist/Types/Icon/IconProp.js.map +1 -1
- package/build/dist/Types/Monitor/CephAlertTemplates.js +1379 -0
- package/build/dist/Types/Monitor/CephAlertTemplates.js.map +1 -0
- package/build/dist/Types/Monitor/CephMetricCatalog.js +353 -0
- package/build/dist/Types/Monitor/CephMetricCatalog.js.map +1 -0
- package/build/dist/Types/Monitor/MonitorStep.js +46 -0
- package/build/dist/Types/Monitor/MonitorStep.js.map +1 -1
- package/build/dist/Types/Monitor/MonitorStepCephMonitor.js +34 -0
- package/build/dist/Types/Monitor/MonitorStepCephMonitor.js.map +1 -0
- package/build/dist/Types/Monitor/MonitorStepProxmoxMonitor.js +36 -0
- package/build/dist/Types/Monitor/MonitorStepProxmoxMonitor.js.map +1 -0
- package/build/dist/Types/Monitor/MonitorType.js +27 -1
- package/build/dist/Types/Monitor/MonitorType.js.map +1 -1
- package/build/dist/Types/Monitor/ProxmoxAlertTemplates.js +743 -0
- package/build/dist/Types/Monitor/ProxmoxAlertTemplates.js.map +1 -0
- package/build/dist/Types/Monitor/ProxmoxMetricCatalog.js +320 -0
- package/build/dist/Types/Monitor/ProxmoxMetricCatalog.js.map +1 -0
- package/build/dist/Types/Permission.js +408 -0
- package/build/dist/Types/Permission.js.map +1 -1
- package/build/dist/Types/Telemetry/EntityType.js +11 -0
- package/build/dist/Types/Telemetry/EntityType.js.map +1 -1
- package/build/dist/Types/Telemetry/ServiceType.js +2 -0
- package/build/dist/Types/Telemetry/ServiceType.js.map +1 -1
- package/build/dist/UI/Components/Icon/Icon.js +33 -0
- package/build/dist/UI/Components/Icon/Icon.js.map +1 -1
- package/build/dist/UI/Components/ImportExport/ExportModelCard.js +50 -0
- package/build/dist/UI/Components/ImportExport/ExportModelCard.js.map +1 -0
- package/build/dist/UI/Components/ImportExport/ImportModelsModal.js +115 -0
- package/build/dist/UI/Components/ImportExport/ImportModelsModal.js.map +1 -0
- package/build/dist/UI/Components/ModelTable/ModelTable.js +166 -74
- package/build/dist/UI/Components/ModelTable/ModelTable.js.map +1 -1
- package/build/dist/UI/Components/MonitorTemplateVariables/TemplateVariablesCatalog.js +5 -1
- package/build/dist/UI/Components/MonitorTemplateVariables/TemplateVariablesCatalog.js.map +1 -1
- package/build/dist/UI/Utils/ModelImportExport.js +142 -0
- package/build/dist/UI/Utils/ModelImportExport.js.map +1 -0
- package/build/dist/UI/Utils/Telemetry/Telemetry.js +11 -10
- package/build/dist/UI/Utils/Telemetry/Telemetry.js.map +1 -1
- package/build/dist/UI/Utils/TelemetryService.js +5 -2
- package/build/dist/UI/Utils/TelemetryService.js.map +1 -1
- package/build/dist/Utils/Dashboard/Components/DashboardCephOsdListComponent.js +50 -0
- package/build/dist/Utils/Dashboard/Components/DashboardCephOsdListComponent.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/DashboardCephPoolListComponent.js +27 -0
- package/build/dist/Utils/Dashboard/Components/DashboardCephPoolListComponent.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/DashboardCephResourceListShared.js +46 -0
- package/build/dist/Utils/Dashboard/Components/DashboardCephResourceListShared.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxGuestListComponent.js +55 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxGuestListComponent.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxNodeListComponent.js +42 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxNodeListComponent.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxResourceListShared.js +46 -0
- package/build/dist/Utils/Dashboard/Components/DashboardProxmoxResourceListShared.js.map +1 -0
- package/build/dist/Utils/Dashboard/Components/Index.js +16 -0
- package/build/dist/Utils/Dashboard/Components/Index.js.map +1 -1
- package/build/dist/Utils/ModelImportExport.js +257 -0
- package/build/dist/Utils/ModelImportExport.js.map +1 -0
- package/build/dist/Utils/Telemetry/EntityKey.js +27 -0
- package/build/dist/Utils/Telemetry/EntityKey.js.map +1 -1
- package/build/dist/Utils/Telemetry/EntityRelationship.js +3 -0
- package/build/dist/Utils/Telemetry/EntityRelationship.js.map +1 -1
- package/build/dist/Utils/Telemetry/HeartbeatAvailability.js +174 -0
- package/build/dist/Utils/Telemetry/HeartbeatAvailability.js.map +1 -0
- package/package.json +29 -21
|
@@ -0,0 +1,1231 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CephAlertTemplate,
|
|
3
|
+
CephAlertTemplateArgs,
|
|
4
|
+
getAllCephAlertTemplates,
|
|
5
|
+
getCephAlertTemplateById,
|
|
6
|
+
} from "../../../Types/Monitor/CephAlertTemplates";
|
|
7
|
+
import { getCephMetricByMetricName } from "../../../Types/Monitor/CephMetricCatalog";
|
|
8
|
+
import MonitorStep from "../../../Types/Monitor/MonitorStep";
|
|
9
|
+
import MonitorStepCephMonitor from "../../../Types/Monitor/MonitorStepCephMonitor";
|
|
10
|
+
import MonitorCriteriaInstance from "../../../Types/Monitor/MonitorCriteriaInstance";
|
|
11
|
+
import FilterCondition from "../../../Types/Filter/FilterCondition";
|
|
12
|
+
import {
|
|
13
|
+
FilterType,
|
|
14
|
+
NoDataPolicy,
|
|
15
|
+
} from "../../../Types/Monitor/CriteriaFilter";
|
|
16
|
+
import MetricsAggregationType from "../../../Types/Metrics/MetricsAggregationType";
|
|
17
|
+
import RollingTime from "../../../Types/RollingTime/RollingTime";
|
|
18
|
+
import ObjectID from "../../../Types/ObjectID";
|
|
19
|
+
|
|
20
|
+
/*
|
|
21
|
+
* WI-20: lock in the Ceph alert-template contracts (v2 WI-9 + the v3
|
|
22
|
+
* WI-26 health-check additions). Same two-layer shape as the Proxmox
|
|
23
|
+
* twin:
|
|
24
|
+
*
|
|
25
|
+
* 1. ENUMERATED invariants over getAllCephAlertTemplates() — every
|
|
26
|
+
* template (current and future) must build a valid MonitorStep,
|
|
27
|
+
* reference only catalog metrics, resolve every criteria alias,
|
|
28
|
+
* group only by the raw `ceph_daemon` / `pool_id` datapoint labels,
|
|
29
|
+
* and use disjoint fire/recover thresholds. Health-check templates
|
|
30
|
+
* (ceph_health_detail / ceph_daemon_health_metrics) additionally
|
|
31
|
+
* MUST recover with NoDataPolicy.TreatAsZero: those series exist
|
|
32
|
+
* only while a check is active (Quincy+), so a bare "= 0" recover
|
|
33
|
+
* filter would never match after the series disappears and the
|
|
34
|
+
* monitor would wedge in the unhealthy state.
|
|
35
|
+
*
|
|
36
|
+
* 2. A per-template expectation table pins the spec'd v3 WI-26 rows
|
|
37
|
+
* (severity / filter / Past1Minute exceptions / the
|
|
38
|
+
* MON_DISK_CRIT-before-MON_DISK_LOW criteria ordering) and the v2
|
|
39
|
+
* decisions (Sum/Sum same-receiver ratios; the pg-inactive
|
|
40
|
+
* Sum-difference fix — Max/Max would hide inactive PGs in every
|
|
41
|
+
* pool but the largest).
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
interface ThresholdExpectation {
|
|
45
|
+
alias: string;
|
|
46
|
+
filterType: FilterType;
|
|
47
|
+
value: number;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface CephQueryExpectation {
|
|
51
|
+
alias: string;
|
|
52
|
+
metricName: string;
|
|
53
|
+
attributes: Record<string, string>;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
interface CephTemplateExpectation {
|
|
57
|
+
id: string;
|
|
58
|
+
category: string;
|
|
59
|
+
severity: string;
|
|
60
|
+
rollingTime: RollingTime;
|
|
61
|
+
// All queries of one template share an aggregation by construction.
|
|
62
|
+
aggregation: MetricsAggregationType;
|
|
63
|
+
queries: Array<CephQueryExpectation>;
|
|
64
|
+
groupBy: string | null;
|
|
65
|
+
formula: string | null;
|
|
66
|
+
/*
|
|
67
|
+
* One entry per unhealthy criteria instance, in evaluation order
|
|
68
|
+
* (first-match-wins, worst tier first). Inner filters are OR'd
|
|
69
|
+
* (FilterCondition.Any).
|
|
70
|
+
*/
|
|
71
|
+
fireCriteria: Array<Array<ThresholdExpectation>>;
|
|
72
|
+
recover: {
|
|
73
|
+
filters: Array<ThresholdExpectation>;
|
|
74
|
+
condition: FilterCondition;
|
|
75
|
+
treatNoDataAsZero: boolean;
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const EXPECTED_TEMPLATES: Array<CephTemplateExpectation> = [
|
|
80
|
+
{
|
|
81
|
+
id: "ceph-health-error",
|
|
82
|
+
category: "Cluster Health",
|
|
83
|
+
severity: "Critical",
|
|
84
|
+
rollingTime: RollingTime.Past1Minute,
|
|
85
|
+
aggregation: MetricsAggregationType.Max,
|
|
86
|
+
queries: [
|
|
87
|
+
{
|
|
88
|
+
alias: "ceph_health_error",
|
|
89
|
+
metricName: "ceph_health_status",
|
|
90
|
+
attributes: {},
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
groupBy: null,
|
|
94
|
+
formula: null,
|
|
95
|
+
fireCriteria: [
|
|
96
|
+
[
|
|
97
|
+
{
|
|
98
|
+
alias: "ceph_health_error",
|
|
99
|
+
filterType: FilterType.GreaterThanOrEqualTo,
|
|
100
|
+
value: 2,
|
|
101
|
+
},
|
|
102
|
+
],
|
|
103
|
+
],
|
|
104
|
+
recover: {
|
|
105
|
+
filters: [
|
|
106
|
+
{
|
|
107
|
+
alias: "ceph_health_error",
|
|
108
|
+
filterType: FilterType.LessThan,
|
|
109
|
+
value: 2,
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
condition: FilterCondition.Any,
|
|
113
|
+
treatNoDataAsZero: false,
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
id: "ceph-health-warn",
|
|
118
|
+
category: "Cluster Health",
|
|
119
|
+
severity: "Warning",
|
|
120
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
121
|
+
aggregation: MetricsAggregationType.Max,
|
|
122
|
+
queries: [
|
|
123
|
+
{
|
|
124
|
+
alias: "ceph_health",
|
|
125
|
+
metricName: "ceph_health_status",
|
|
126
|
+
attributes: {},
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
groupBy: null,
|
|
130
|
+
formula: null,
|
|
131
|
+
fireCriteria: [
|
|
132
|
+
[
|
|
133
|
+
{
|
|
134
|
+
alias: "ceph_health",
|
|
135
|
+
filterType: FilterType.GreaterThanOrEqualTo,
|
|
136
|
+
value: 1,
|
|
137
|
+
},
|
|
138
|
+
],
|
|
139
|
+
],
|
|
140
|
+
recover: {
|
|
141
|
+
filters: [
|
|
142
|
+
{ alias: "ceph_health", filterType: FilterType.LessThan, value: 1 },
|
|
143
|
+
],
|
|
144
|
+
condition: FilterCondition.Any,
|
|
145
|
+
treatNoDataAsZero: false,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
id: "ceph-osd-down",
|
|
150
|
+
category: "OSD",
|
|
151
|
+
severity: "Critical",
|
|
152
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
153
|
+
aggregation: MetricsAggregationType.Min,
|
|
154
|
+
queries: [{ alias: "osd_up", metricName: "ceph_osd_up", attributes: {} }],
|
|
155
|
+
groupBy: "ceph_daemon",
|
|
156
|
+
formula: null,
|
|
157
|
+
fireCriteria: [
|
|
158
|
+
[{ alias: "osd_up", filterType: FilterType.LessThan, value: 1 }],
|
|
159
|
+
],
|
|
160
|
+
recover: {
|
|
161
|
+
filters: [
|
|
162
|
+
{
|
|
163
|
+
alias: "osd_up",
|
|
164
|
+
filterType: FilterType.GreaterThanOrEqualTo,
|
|
165
|
+
value: 1,
|
|
166
|
+
},
|
|
167
|
+
],
|
|
168
|
+
condition: FilterCondition.Any,
|
|
169
|
+
treatNoDataAsZero: false,
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
id: "ceph-osd-out",
|
|
174
|
+
category: "OSD",
|
|
175
|
+
severity: "Warning",
|
|
176
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
177
|
+
aggregation: MetricsAggregationType.Min,
|
|
178
|
+
queries: [{ alias: "osd_in", metricName: "ceph_osd_in", attributes: {} }],
|
|
179
|
+
groupBy: "ceph_daemon",
|
|
180
|
+
formula: null,
|
|
181
|
+
fireCriteria: [
|
|
182
|
+
[{ alias: "osd_in", filterType: FilterType.LessThan, value: 1 }],
|
|
183
|
+
],
|
|
184
|
+
recover: {
|
|
185
|
+
filters: [
|
|
186
|
+
{
|
|
187
|
+
alias: "osd_in",
|
|
188
|
+
filterType: FilterType.GreaterThanOrEqualTo,
|
|
189
|
+
value: 1,
|
|
190
|
+
},
|
|
191
|
+
],
|
|
192
|
+
condition: FilterCondition.Any,
|
|
193
|
+
treatNoDataAsZero: false,
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
id: "ceph-osd-high-latency",
|
|
198
|
+
category: "OSD",
|
|
199
|
+
severity: "Warning",
|
|
200
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
201
|
+
aggregation: MetricsAggregationType.Avg,
|
|
202
|
+
queries: [
|
|
203
|
+
{
|
|
204
|
+
alias: "osd_apply_latency",
|
|
205
|
+
metricName: "ceph_osd_apply_latency_ms",
|
|
206
|
+
attributes: {},
|
|
207
|
+
},
|
|
208
|
+
],
|
|
209
|
+
groupBy: "ceph_daemon",
|
|
210
|
+
formula: null,
|
|
211
|
+
fireCriteria: [
|
|
212
|
+
[
|
|
213
|
+
{
|
|
214
|
+
alias: "osd_apply_latency",
|
|
215
|
+
filterType: FilterType.GreaterThan,
|
|
216
|
+
value: 100,
|
|
217
|
+
},
|
|
218
|
+
],
|
|
219
|
+
],
|
|
220
|
+
recover: {
|
|
221
|
+
filters: [
|
|
222
|
+
{
|
|
223
|
+
alias: "osd_apply_latency",
|
|
224
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
225
|
+
value: 100,
|
|
226
|
+
},
|
|
227
|
+
],
|
|
228
|
+
condition: FilterCondition.Any,
|
|
229
|
+
treatNoDataAsZero: false,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
id: "ceph-mon-quorum-degraded",
|
|
234
|
+
category: "Cluster Health",
|
|
235
|
+
severity: "Critical",
|
|
236
|
+
rollingTime: RollingTime.Past1Minute,
|
|
237
|
+
aggregation: MetricsAggregationType.Min,
|
|
238
|
+
queries: [
|
|
239
|
+
{
|
|
240
|
+
alias: "mon_quorum",
|
|
241
|
+
metricName: "ceph_mon_quorum_status",
|
|
242
|
+
attributes: {},
|
|
243
|
+
},
|
|
244
|
+
],
|
|
245
|
+
groupBy: "ceph_daemon",
|
|
246
|
+
formula: null,
|
|
247
|
+
fireCriteria: [
|
|
248
|
+
[{ alias: "mon_quorum", filterType: FilterType.LessThan, value: 1 }],
|
|
249
|
+
],
|
|
250
|
+
recover: {
|
|
251
|
+
filters: [
|
|
252
|
+
{
|
|
253
|
+
alias: "mon_quorum",
|
|
254
|
+
filterType: FilterType.GreaterThanOrEqualTo,
|
|
255
|
+
value: 1,
|
|
256
|
+
},
|
|
257
|
+
],
|
|
258
|
+
condition: FilterCondition.Any,
|
|
259
|
+
treatNoDataAsZero: false,
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
id: "ceph-pg-degraded",
|
|
264
|
+
category: "PG",
|
|
265
|
+
severity: "Warning",
|
|
266
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
267
|
+
aggregation: MetricsAggregationType.Max,
|
|
268
|
+
queries: [
|
|
269
|
+
{ alias: "pg_degraded", metricName: "ceph_pg_degraded", attributes: {} },
|
|
270
|
+
],
|
|
271
|
+
groupBy: null,
|
|
272
|
+
formula: null,
|
|
273
|
+
fireCriteria: [
|
|
274
|
+
[{ alias: "pg_degraded", filterType: FilterType.GreaterThan, value: 0 }],
|
|
275
|
+
],
|
|
276
|
+
recover: {
|
|
277
|
+
filters: [
|
|
278
|
+
{ alias: "pg_degraded", filterType: FilterType.EqualTo, value: 0 },
|
|
279
|
+
],
|
|
280
|
+
condition: FilterCondition.Any,
|
|
281
|
+
treatNoDataAsZero: false,
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
id: "ceph-pg-undersized",
|
|
286
|
+
category: "PG",
|
|
287
|
+
severity: "Warning",
|
|
288
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
289
|
+
aggregation: MetricsAggregationType.Max,
|
|
290
|
+
queries: [
|
|
291
|
+
{
|
|
292
|
+
alias: "pg_undersized",
|
|
293
|
+
metricName: "ceph_pg_undersized",
|
|
294
|
+
attributes: {},
|
|
295
|
+
},
|
|
296
|
+
],
|
|
297
|
+
groupBy: null,
|
|
298
|
+
formula: null,
|
|
299
|
+
fireCriteria: [
|
|
300
|
+
[
|
|
301
|
+
{
|
|
302
|
+
alias: "pg_undersized",
|
|
303
|
+
filterType: FilterType.GreaterThan,
|
|
304
|
+
value: 0,
|
|
305
|
+
},
|
|
306
|
+
],
|
|
307
|
+
],
|
|
308
|
+
recover: {
|
|
309
|
+
filters: [
|
|
310
|
+
{ alias: "pg_undersized", filterType: FilterType.EqualTo, value: 0 },
|
|
311
|
+
],
|
|
312
|
+
condition: FilterCondition.Any,
|
|
313
|
+
treatNoDataAsZero: false,
|
|
314
|
+
},
|
|
315
|
+
},
|
|
316
|
+
{
|
|
317
|
+
/*
|
|
318
|
+
* Sum/Sum difference — ceph_pg_total / ceph_pg_active are PER-POOL
|
|
319
|
+
* series; Sum folds every pool into a cluster count and the scrape
|
|
320
|
+
* multiple scales both terms equally. Max would collapse each side
|
|
321
|
+
* to the largest pool and this Critical alert would never fire for
|
|
322
|
+
* inactive PGs in any other pool.
|
|
323
|
+
*/
|
|
324
|
+
id: "ceph-pg-inactive",
|
|
325
|
+
category: "PG",
|
|
326
|
+
severity: "Critical",
|
|
327
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
328
|
+
aggregation: MetricsAggregationType.Sum,
|
|
329
|
+
queries: [
|
|
330
|
+
{ alias: "pg_total", metricName: "ceph_pg_total", attributes: {} },
|
|
331
|
+
{ alias: "pg_active", metricName: "ceph_pg_active", attributes: {} },
|
|
332
|
+
],
|
|
333
|
+
groupBy: null,
|
|
334
|
+
formula: "pg_total - pg_active",
|
|
335
|
+
fireCriteria: [
|
|
336
|
+
[{ alias: "pg_inactive", filterType: FilterType.GreaterThan, value: 0 }],
|
|
337
|
+
],
|
|
338
|
+
recover: {
|
|
339
|
+
filters: [
|
|
340
|
+
{ alias: "pg_inactive", filterType: FilterType.EqualTo, value: 0 },
|
|
341
|
+
],
|
|
342
|
+
condition: FilterCondition.Any,
|
|
343
|
+
treatNoDataAsZero: false,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
id: "ceph-cluster-near-full",
|
|
348
|
+
category: "Capacity",
|
|
349
|
+
severity: "Warning",
|
|
350
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
351
|
+
aggregation: MetricsAggregationType.Sum,
|
|
352
|
+
queries: [
|
|
353
|
+
{
|
|
354
|
+
alias: "used_bytes",
|
|
355
|
+
metricName: "ceph_cluster_total_used_bytes",
|
|
356
|
+
attributes: {},
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
alias: "total_bytes",
|
|
360
|
+
metricName: "ceph_cluster_total_bytes",
|
|
361
|
+
attributes: {},
|
|
362
|
+
},
|
|
363
|
+
],
|
|
364
|
+
groupBy: null,
|
|
365
|
+
formula: "(used_bytes / total_bytes) * 100",
|
|
366
|
+
fireCriteria: [
|
|
367
|
+
[
|
|
368
|
+
{
|
|
369
|
+
alias: "cluster_used_percent",
|
|
370
|
+
filterType: FilterType.GreaterThan,
|
|
371
|
+
value: 85,
|
|
372
|
+
},
|
|
373
|
+
],
|
|
374
|
+
],
|
|
375
|
+
recover: {
|
|
376
|
+
filters: [
|
|
377
|
+
{
|
|
378
|
+
alias: "cluster_used_percent",
|
|
379
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
380
|
+
value: 85,
|
|
381
|
+
},
|
|
382
|
+
],
|
|
383
|
+
condition: FilterCondition.Any,
|
|
384
|
+
treatNoDataAsZero: false,
|
|
385
|
+
},
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
id: "ceph-cluster-full",
|
|
389
|
+
category: "Capacity",
|
|
390
|
+
severity: "Critical",
|
|
391
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
392
|
+
aggregation: MetricsAggregationType.Sum,
|
|
393
|
+
queries: [
|
|
394
|
+
{
|
|
395
|
+
alias: "used_bytes",
|
|
396
|
+
metricName: "ceph_cluster_total_used_bytes",
|
|
397
|
+
attributes: {},
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
alias: "total_bytes",
|
|
401
|
+
metricName: "ceph_cluster_total_bytes",
|
|
402
|
+
attributes: {},
|
|
403
|
+
},
|
|
404
|
+
],
|
|
405
|
+
groupBy: null,
|
|
406
|
+
formula: "(used_bytes / total_bytes) * 100",
|
|
407
|
+
fireCriteria: [
|
|
408
|
+
[
|
|
409
|
+
{
|
|
410
|
+
alias: "cluster_used_percent",
|
|
411
|
+
filterType: FilterType.GreaterThan,
|
|
412
|
+
value: 95,
|
|
413
|
+
},
|
|
414
|
+
],
|
|
415
|
+
],
|
|
416
|
+
recover: {
|
|
417
|
+
filters: [
|
|
418
|
+
{
|
|
419
|
+
alias: "cluster_used_percent",
|
|
420
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
421
|
+
value: 95,
|
|
422
|
+
},
|
|
423
|
+
],
|
|
424
|
+
condition: FilterCondition.Any,
|
|
425
|
+
treatNoDataAsZero: false,
|
|
426
|
+
},
|
|
427
|
+
},
|
|
428
|
+
{
|
|
429
|
+
id: "ceph-pool-near-full",
|
|
430
|
+
category: "Capacity",
|
|
431
|
+
severity: "Warning",
|
|
432
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
433
|
+
aggregation: MetricsAggregationType.Sum,
|
|
434
|
+
queries: [
|
|
435
|
+
{ alias: "pool_stored", metricName: "ceph_pool_stored", attributes: {} },
|
|
436
|
+
{
|
|
437
|
+
alias: "pool_max_avail",
|
|
438
|
+
metricName: "ceph_pool_max_avail",
|
|
439
|
+
attributes: {},
|
|
440
|
+
},
|
|
441
|
+
],
|
|
442
|
+
groupBy: "pool_id",
|
|
443
|
+
formula: "(pool_stored / (pool_stored + pool_max_avail)) * 100",
|
|
444
|
+
fireCriteria: [
|
|
445
|
+
[
|
|
446
|
+
{
|
|
447
|
+
alias: "pool_used_percent",
|
|
448
|
+
filterType: FilterType.GreaterThan,
|
|
449
|
+
value: 85,
|
|
450
|
+
},
|
|
451
|
+
],
|
|
452
|
+
],
|
|
453
|
+
recover: {
|
|
454
|
+
filters: [
|
|
455
|
+
{
|
|
456
|
+
alias: "pool_used_percent",
|
|
457
|
+
filterType: FilterType.LessThanOrEqualTo,
|
|
458
|
+
value: 85,
|
|
459
|
+
},
|
|
460
|
+
],
|
|
461
|
+
condition: FilterCondition.Any,
|
|
462
|
+
treatNoDataAsZero: false,
|
|
463
|
+
},
|
|
464
|
+
},
|
|
465
|
+
{
|
|
466
|
+
id: "ceph-slow-ops",
|
|
467
|
+
category: "Cluster Health",
|
|
468
|
+
severity: "Warning",
|
|
469
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
470
|
+
aggregation: MetricsAggregationType.Max,
|
|
471
|
+
queries: [
|
|
472
|
+
{
|
|
473
|
+
alias: "slow_ops",
|
|
474
|
+
metricName: "ceph_healthcheck_slow_ops",
|
|
475
|
+
attributes: {},
|
|
476
|
+
},
|
|
477
|
+
],
|
|
478
|
+
groupBy: null,
|
|
479
|
+
formula: null,
|
|
480
|
+
fireCriteria: [
|
|
481
|
+
[{ alias: "slow_ops", filterType: FilterType.GreaterThan, value: 0 }],
|
|
482
|
+
],
|
|
483
|
+
recover: {
|
|
484
|
+
filters: [
|
|
485
|
+
{ alias: "slow_ops", filterType: FilterType.EqualTo, value: 0 },
|
|
486
|
+
],
|
|
487
|
+
condition: FilterCondition.Any,
|
|
488
|
+
treatNoDataAsZero: false,
|
|
489
|
+
},
|
|
490
|
+
},
|
|
491
|
+
/*
|
|
492
|
+
* --- V3 WI-26 health-check templates ---
|
|
493
|
+
* Spec table (ProxmoxCephProductsV3.md §WI-26): all fire Max > 0 /
|
|
494
|
+
* recover = 0 (TreatAsZero), Past5Minutes unless noted.
|
|
495
|
+
*/
|
|
496
|
+
{
|
|
497
|
+
/*
|
|
498
|
+
* PG_DAMAGED OR OSD_SCRUB_ERRORS — two queries, NO formula ("a + b"
|
|
499
|
+
* would yield nothing while one check is inactive).
|
|
500
|
+
*/
|
|
501
|
+
id: "ceph-pg-damaged",
|
|
502
|
+
category: "PG",
|
|
503
|
+
severity: "Critical",
|
|
504
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
505
|
+
aggregation: MetricsAggregationType.Max,
|
|
506
|
+
queries: [
|
|
507
|
+
{
|
|
508
|
+
alias: "pg_damaged",
|
|
509
|
+
metricName: "ceph_health_detail",
|
|
510
|
+
attributes: { name: "PG_DAMAGED" },
|
|
511
|
+
},
|
|
512
|
+
{
|
|
513
|
+
alias: "scrub_errors",
|
|
514
|
+
metricName: "ceph_health_detail",
|
|
515
|
+
attributes: { name: "OSD_SCRUB_ERRORS" },
|
|
516
|
+
},
|
|
517
|
+
],
|
|
518
|
+
groupBy: null,
|
|
519
|
+
formula: null,
|
|
520
|
+
fireCriteria: [
|
|
521
|
+
[
|
|
522
|
+
{ alias: "pg_damaged", filterType: FilterType.GreaterThan, value: 0 },
|
|
523
|
+
{ alias: "scrub_errors", filterType: FilterType.GreaterThan, value: 0 },
|
|
524
|
+
],
|
|
525
|
+
],
|
|
526
|
+
recover: {
|
|
527
|
+
filters: [
|
|
528
|
+
{ alias: "pg_damaged", filterType: FilterType.EqualTo, value: 0 },
|
|
529
|
+
{ alias: "scrub_errors", filterType: FilterType.EqualTo, value: 0 },
|
|
530
|
+
],
|
|
531
|
+
// Recovery requires BOTH checks clear — complement of the Any fire.
|
|
532
|
+
condition: FilterCondition.All,
|
|
533
|
+
treatNoDataAsZero: true,
|
|
534
|
+
},
|
|
535
|
+
},
|
|
536
|
+
{
|
|
537
|
+
id: "ceph-daemon-crash",
|
|
538
|
+
category: "Cluster Health",
|
|
539
|
+
severity: "Critical",
|
|
540
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
541
|
+
aggregation: MetricsAggregationType.Max,
|
|
542
|
+
queries: [
|
|
543
|
+
{
|
|
544
|
+
alias: "recent_crash",
|
|
545
|
+
metricName: "ceph_health_detail",
|
|
546
|
+
attributes: { name: "RECENT_CRASH" },
|
|
547
|
+
},
|
|
548
|
+
],
|
|
549
|
+
groupBy: null,
|
|
550
|
+
formula: null,
|
|
551
|
+
fireCriteria: [
|
|
552
|
+
[{ alias: "recent_crash", filterType: FilterType.GreaterThan, value: 0 }],
|
|
553
|
+
],
|
|
554
|
+
recover: {
|
|
555
|
+
filters: [
|
|
556
|
+
{ alias: "recent_crash", filterType: FilterType.EqualTo, value: 0 },
|
|
557
|
+
],
|
|
558
|
+
condition: FilterCondition.Any,
|
|
559
|
+
treatNoDataAsZero: true,
|
|
560
|
+
},
|
|
561
|
+
},
|
|
562
|
+
{
|
|
563
|
+
id: "ceph-osd-slow-heartbeats",
|
|
564
|
+
category: "OSD",
|
|
565
|
+
severity: "Warning",
|
|
566
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
567
|
+
aggregation: MetricsAggregationType.Max,
|
|
568
|
+
queries: [
|
|
569
|
+
{
|
|
570
|
+
alias: "slow_ping_front",
|
|
571
|
+
metricName: "ceph_health_detail",
|
|
572
|
+
attributes: { name: "OSD_SLOW_PING_TIME_FRONT" },
|
|
573
|
+
},
|
|
574
|
+
{
|
|
575
|
+
alias: "slow_ping_back",
|
|
576
|
+
metricName: "ceph_health_detail",
|
|
577
|
+
attributes: { name: "OSD_SLOW_PING_TIME_BACK" },
|
|
578
|
+
},
|
|
579
|
+
],
|
|
580
|
+
groupBy: null,
|
|
581
|
+
formula: null,
|
|
582
|
+
fireCriteria: [
|
|
583
|
+
[
|
|
584
|
+
{
|
|
585
|
+
alias: "slow_ping_front",
|
|
586
|
+
filterType: FilterType.GreaterThan,
|
|
587
|
+
value: 0,
|
|
588
|
+
},
|
|
589
|
+
{
|
|
590
|
+
alias: "slow_ping_back",
|
|
591
|
+
filterType: FilterType.GreaterThan,
|
|
592
|
+
value: 0,
|
|
593
|
+
},
|
|
594
|
+
],
|
|
595
|
+
],
|
|
596
|
+
recover: {
|
|
597
|
+
filters: [
|
|
598
|
+
{ alias: "slow_ping_front", filterType: FilterType.EqualTo, value: 0 },
|
|
599
|
+
{ alias: "slow_ping_back", filterType: FilterType.EqualTo, value: 0 },
|
|
600
|
+
],
|
|
601
|
+
condition: FilterCondition.All,
|
|
602
|
+
treatNoDataAsZero: true,
|
|
603
|
+
},
|
|
604
|
+
},
|
|
605
|
+
{
|
|
606
|
+
id: "ceph-mon-clock-skew",
|
|
607
|
+
category: "Cluster Health",
|
|
608
|
+
severity: "Warning",
|
|
609
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
610
|
+
aggregation: MetricsAggregationType.Max,
|
|
611
|
+
queries: [
|
|
612
|
+
{
|
|
613
|
+
alias: "mon_clock_skew",
|
|
614
|
+
metricName: "ceph_health_detail",
|
|
615
|
+
attributes: { name: "MON_CLOCK_SKEW" },
|
|
616
|
+
},
|
|
617
|
+
],
|
|
618
|
+
groupBy: null,
|
|
619
|
+
formula: null,
|
|
620
|
+
fireCriteria: [
|
|
621
|
+
[
|
|
622
|
+
{
|
|
623
|
+
alias: "mon_clock_skew",
|
|
624
|
+
filterType: FilterType.GreaterThan,
|
|
625
|
+
value: 0,
|
|
626
|
+
},
|
|
627
|
+
],
|
|
628
|
+
],
|
|
629
|
+
recover: {
|
|
630
|
+
filters: [
|
|
631
|
+
{ alias: "mon_clock_skew", filterType: FilterType.EqualTo, value: 0 },
|
|
632
|
+
],
|
|
633
|
+
condition: FilterCondition.Any,
|
|
634
|
+
treatNoDataAsZero: true,
|
|
635
|
+
},
|
|
636
|
+
},
|
|
637
|
+
{
|
|
638
|
+
id: "ceph-osd-nearfull",
|
|
639
|
+
category: "Capacity",
|
|
640
|
+
severity: "Warning",
|
|
641
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
642
|
+
aggregation: MetricsAggregationType.Max,
|
|
643
|
+
queries: [
|
|
644
|
+
{
|
|
645
|
+
alias: "osd_nearfull",
|
|
646
|
+
metricName: "ceph_health_detail",
|
|
647
|
+
attributes: { name: "OSD_NEARFULL" },
|
|
648
|
+
},
|
|
649
|
+
],
|
|
650
|
+
groupBy: null,
|
|
651
|
+
formula: null,
|
|
652
|
+
fireCriteria: [
|
|
653
|
+
[{ alias: "osd_nearfull", filterType: FilterType.GreaterThan, value: 0 }],
|
|
654
|
+
],
|
|
655
|
+
recover: {
|
|
656
|
+
filters: [
|
|
657
|
+
{ alias: "osd_nearfull", filterType: FilterType.EqualTo, value: 0 },
|
|
658
|
+
],
|
|
659
|
+
condition: FilterCondition.Any,
|
|
660
|
+
treatNoDataAsZero: true,
|
|
661
|
+
},
|
|
662
|
+
},
|
|
663
|
+
{
|
|
664
|
+
id: "ceph-osd-backfillfull",
|
|
665
|
+
category: "Capacity",
|
|
666
|
+
severity: "Warning",
|
|
667
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
668
|
+
aggregation: MetricsAggregationType.Max,
|
|
669
|
+
queries: [
|
|
670
|
+
{
|
|
671
|
+
alias: "osd_backfillfull",
|
|
672
|
+
metricName: "ceph_health_detail",
|
|
673
|
+
attributes: { name: "OSD_BACKFILLFULL" },
|
|
674
|
+
},
|
|
675
|
+
],
|
|
676
|
+
groupBy: null,
|
|
677
|
+
formula: null,
|
|
678
|
+
fireCriteria: [
|
|
679
|
+
[
|
|
680
|
+
{
|
|
681
|
+
alias: "osd_backfillfull",
|
|
682
|
+
filterType: FilterType.GreaterThan,
|
|
683
|
+
value: 0,
|
|
684
|
+
},
|
|
685
|
+
],
|
|
686
|
+
],
|
|
687
|
+
recover: {
|
|
688
|
+
filters: [
|
|
689
|
+
{ alias: "osd_backfillfull", filterType: FilterType.EqualTo, value: 0 },
|
|
690
|
+
],
|
|
691
|
+
condition: FilterCondition.Any,
|
|
692
|
+
treatNoDataAsZero: true,
|
|
693
|
+
},
|
|
694
|
+
},
|
|
695
|
+
{
|
|
696
|
+
// Past1Minute per the spec table: writes are already blocked.
|
|
697
|
+
id: "ceph-osd-full",
|
|
698
|
+
category: "Capacity",
|
|
699
|
+
severity: "Critical",
|
|
700
|
+
rollingTime: RollingTime.Past1Minute,
|
|
701
|
+
aggregation: MetricsAggregationType.Max,
|
|
702
|
+
queries: [
|
|
703
|
+
{
|
|
704
|
+
alias: "osd_full",
|
|
705
|
+
metricName: "ceph_health_detail",
|
|
706
|
+
attributes: { name: "OSD_FULL" },
|
|
707
|
+
},
|
|
708
|
+
],
|
|
709
|
+
groupBy: null,
|
|
710
|
+
formula: null,
|
|
711
|
+
fireCriteria: [
|
|
712
|
+
[{ alias: "osd_full", filterType: FilterType.GreaterThan, value: 0 }],
|
|
713
|
+
],
|
|
714
|
+
recover: {
|
|
715
|
+
filters: [
|
|
716
|
+
{ alias: "osd_full", filterType: FilterType.EqualTo, value: 0 },
|
|
717
|
+
],
|
|
718
|
+
condition: FilterCondition.Any,
|
|
719
|
+
treatNoDataAsZero: true,
|
|
720
|
+
},
|
|
721
|
+
},
|
|
722
|
+
{
|
|
723
|
+
/*
|
|
724
|
+
* Two-tier template: MON_DISK_CRIT (Critical) is evaluated BEFORE
|
|
725
|
+
* MON_DISK_LOW (Warning) — criteria are first-match-wins, so the
|
|
726
|
+
* worst tier must come first or it could never fire.
|
|
727
|
+
*/
|
|
728
|
+
id: "ceph-mon-disk-space",
|
|
729
|
+
category: "Cluster Health",
|
|
730
|
+
severity: "Critical",
|
|
731
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
732
|
+
aggregation: MetricsAggregationType.Max,
|
|
733
|
+
queries: [
|
|
734
|
+
{
|
|
735
|
+
alias: "mon_disk_crit",
|
|
736
|
+
metricName: "ceph_health_detail",
|
|
737
|
+
attributes: { name: "MON_DISK_CRIT" },
|
|
738
|
+
},
|
|
739
|
+
{
|
|
740
|
+
alias: "mon_disk_low",
|
|
741
|
+
metricName: "ceph_health_detail",
|
|
742
|
+
attributes: { name: "MON_DISK_LOW" },
|
|
743
|
+
},
|
|
744
|
+
],
|
|
745
|
+
groupBy: null,
|
|
746
|
+
formula: null,
|
|
747
|
+
fireCriteria: [
|
|
748
|
+
[
|
|
749
|
+
{
|
|
750
|
+
alias: "mon_disk_crit",
|
|
751
|
+
filterType: FilterType.GreaterThan,
|
|
752
|
+
value: 0,
|
|
753
|
+
},
|
|
754
|
+
],
|
|
755
|
+
[{ alias: "mon_disk_low", filterType: FilterType.GreaterThan, value: 0 }],
|
|
756
|
+
],
|
|
757
|
+
recover: {
|
|
758
|
+
filters: [
|
|
759
|
+
{ alias: "mon_disk_crit", filterType: FilterType.EqualTo, value: 0 },
|
|
760
|
+
{ alias: "mon_disk_low", filterType: FilterType.EqualTo, value: 0 },
|
|
761
|
+
],
|
|
762
|
+
condition: FilterCondition.All,
|
|
763
|
+
treatNoDataAsZero: true,
|
|
764
|
+
},
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
id: "ceph-daemon-slow-ops",
|
|
768
|
+
category: "Cluster Health",
|
|
769
|
+
severity: "Warning",
|
|
770
|
+
rollingTime: RollingTime.Past5Minutes,
|
|
771
|
+
aggregation: MetricsAggregationType.Max,
|
|
772
|
+
queries: [
|
|
773
|
+
{
|
|
774
|
+
alias: "daemon_slow_ops",
|
|
775
|
+
metricName: "ceph_daemon_health_metrics",
|
|
776
|
+
attributes: { type: "SLOW_OPS" },
|
|
777
|
+
},
|
|
778
|
+
],
|
|
779
|
+
groupBy: "ceph_daemon",
|
|
780
|
+
formula: null,
|
|
781
|
+
fireCriteria: [
|
|
782
|
+
[
|
|
783
|
+
{
|
|
784
|
+
alias: "daemon_slow_ops",
|
|
785
|
+
filterType: FilterType.GreaterThan,
|
|
786
|
+
value: 0,
|
|
787
|
+
},
|
|
788
|
+
],
|
|
789
|
+
],
|
|
790
|
+
recover: {
|
|
791
|
+
filters: [
|
|
792
|
+
{ alias: "daemon_slow_ops", filterType: FilterType.EqualTo, value: 0 },
|
|
793
|
+
],
|
|
794
|
+
condition: FilterCondition.Any,
|
|
795
|
+
treatNoDataAsZero: true,
|
|
796
|
+
},
|
|
797
|
+
},
|
|
798
|
+
];
|
|
799
|
+
|
|
800
|
+
function buildArgs(): CephAlertTemplateArgs {
|
|
801
|
+
return {
|
|
802
|
+
clusterIdentifier: "ceph-prod",
|
|
803
|
+
onlineMonitorStatusId: ObjectID.generate(),
|
|
804
|
+
offlineMonitorStatusId: ObjectID.generate(),
|
|
805
|
+
defaultIncidentSeverityId: ObjectID.generate(),
|
|
806
|
+
defaultAlertSeverityId: ObjectID.generate(),
|
|
807
|
+
monitorName: "Test Monitor",
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function getCephMonitor(step: MonitorStep): MonitorStepCephMonitor {
|
|
812
|
+
const cephMonitor: MonitorStepCephMonitor | undefined =
|
|
813
|
+
step.data?.cephMonitor;
|
|
814
|
+
if (!cephMonitor) {
|
|
815
|
+
throw new Error("cephMonitor missing from monitor step");
|
|
816
|
+
}
|
|
817
|
+
return cephMonitor;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
function getCriteriaInstances(
|
|
821
|
+
step: MonitorStep,
|
|
822
|
+
): Array<MonitorCriteriaInstance> {
|
|
823
|
+
const instances: Array<MonitorCriteriaInstance> | undefined =
|
|
824
|
+
step.data?.monitorCriteria.data?.monitorCriteriaInstanceArray;
|
|
825
|
+
if (!instances || instances.length === 0) {
|
|
826
|
+
throw new Error("monitorCriteria missing from monitor step");
|
|
827
|
+
}
|
|
828
|
+
return instances;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
function getReferencableAliases(monitor: MonitorStepCephMonitor): Set<string> {
|
|
832
|
+
const aliases: Set<string> = new Set<string>();
|
|
833
|
+
for (const queryConfig of monitor.metricViewConfig
|
|
834
|
+
.queryConfigs as Array<any>) {
|
|
835
|
+
aliases.add(queryConfig.metricAliasData.metricVariable);
|
|
836
|
+
}
|
|
837
|
+
for (const formulaConfig of (monitor.metricViewConfig.formulaConfigs ||
|
|
838
|
+
[]) as Array<any>) {
|
|
839
|
+
aliases.add(formulaConfig.metricAliasData.metricVariable);
|
|
840
|
+
}
|
|
841
|
+
return aliases;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
function isDisjointComplement(
|
|
845
|
+
fire: { filterType: FilterType; value: number },
|
|
846
|
+
recover: { filterType: FilterType; value: number },
|
|
847
|
+
): boolean {
|
|
848
|
+
if (fire.value !== recover.value) {
|
|
849
|
+
return false;
|
|
850
|
+
}
|
|
851
|
+
switch (fire.filterType) {
|
|
852
|
+
case FilterType.GreaterThan:
|
|
853
|
+
return (
|
|
854
|
+
recover.filterType === FilterType.LessThanOrEqualTo ||
|
|
855
|
+
(fire.value === 0 && recover.filterType === FilterType.EqualTo)
|
|
856
|
+
);
|
|
857
|
+
case FilterType.GreaterThanOrEqualTo:
|
|
858
|
+
return recover.filterType === FilterType.LessThan;
|
|
859
|
+
case FilterType.LessThan:
|
|
860
|
+
return recover.filterType === FilterType.GreaterThanOrEqualTo;
|
|
861
|
+
case FilterType.LessThanOrEqualTo:
|
|
862
|
+
return recover.filterType === FilterType.GreaterThan;
|
|
863
|
+
default:
|
|
864
|
+
return false;
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// Health-check series exist only while their check is active.
|
|
869
|
+
function isHealthCheckMetric(metricName: string): boolean {
|
|
870
|
+
return (
|
|
871
|
+
metricName === "ceph_health_detail" ||
|
|
872
|
+
metricName === "ceph_daemon_health_metrics"
|
|
873
|
+
);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
const ALL_TEMPLATES: Array<CephAlertTemplate> = getAllCephAlertTemplates();
|
|
877
|
+
|
|
878
|
+
describe("CephAlertTemplates - registry", () => {
|
|
879
|
+
test("template ids are unique and match the expectation table exactly", () => {
|
|
880
|
+
const ids: Array<string> = ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
881
|
+
return t.id;
|
|
882
|
+
});
|
|
883
|
+
expect(new Set(ids).size).toBe(ids.length);
|
|
884
|
+
expect([...ids].sort()).toEqual(
|
|
885
|
+
EXPECTED_TEMPLATES.map((t: CephTemplateExpectation) => {
|
|
886
|
+
return t.id;
|
|
887
|
+
}).sort(),
|
|
888
|
+
);
|
|
889
|
+
});
|
|
890
|
+
});
|
|
891
|
+
|
|
892
|
+
describe("CephAlertTemplates - enumerated invariants (every template)", () => {
|
|
893
|
+
test.each(
|
|
894
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
895
|
+
return [t.id, t];
|
|
896
|
+
}),
|
|
897
|
+
)("%s builds a valid MonitorStep", (_id: unknown, template: unknown) => {
|
|
898
|
+
const args: CephAlertTemplateArgs = buildArgs();
|
|
899
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
900
|
+
args,
|
|
901
|
+
);
|
|
902
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
903
|
+
|
|
904
|
+
// The cluster attribute is injected from the template args.
|
|
905
|
+
expect(monitor.clusterIdentifier).toBe(args.clusterIdentifier);
|
|
906
|
+
expect(monitor.metricViewConfig.queryConfigs.length).toBeGreaterThan(0);
|
|
907
|
+
|
|
908
|
+
const instances: Array<MonitorCriteriaInstance> =
|
|
909
|
+
getCriteriaInstances(step);
|
|
910
|
+
/*
|
|
911
|
+
* ceph-mon-disk-space carries a third (Warning-tier) instance —
|
|
912
|
+
* assert at-least-2, never exactly-2.
|
|
913
|
+
*/
|
|
914
|
+
expect(instances.length).toBeGreaterThanOrEqual(2);
|
|
915
|
+
|
|
916
|
+
const offlineInstances: Array<MonitorCriteriaInstance> = instances.slice(
|
|
917
|
+
0,
|
|
918
|
+
-1,
|
|
919
|
+
);
|
|
920
|
+
const onlineInstance: MonitorCriteriaInstance =
|
|
921
|
+
instances[instances.length - 1]!;
|
|
922
|
+
|
|
923
|
+
for (const offline of offlineInstances) {
|
|
924
|
+
expect(offline.data?.monitorStatusId).toBe(args.offlineMonitorStatusId);
|
|
925
|
+
expect(offline.data?.createIncidents).toBe(true);
|
|
926
|
+
expect(offline.data?.createAlerts).toBe(true);
|
|
927
|
+
expect(offline.data?.incidents).toHaveLength(1);
|
|
928
|
+
expect(offline.data?.alerts).toHaveLength(1);
|
|
929
|
+
expect(offline.data?.incidents?.[0]?.autoResolveIncident).toBe(true);
|
|
930
|
+
expect(offline.data?.alerts?.[0]?.autoResolveAlert).toBe(true);
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
expect(onlineInstance.data?.monitorStatusId).toBe(
|
|
934
|
+
args.onlineMonitorStatusId,
|
|
935
|
+
);
|
|
936
|
+
expect(onlineInstance.data?.createIncidents).toBe(false);
|
|
937
|
+
expect(onlineInstance.data?.createAlerts).toBe(false);
|
|
938
|
+
expect(onlineInstance.data?.name).toBe("Healthy");
|
|
939
|
+
});
|
|
940
|
+
|
|
941
|
+
test.each(
|
|
942
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
943
|
+
return [t.id, t];
|
|
944
|
+
}),
|
|
945
|
+
)(
|
|
946
|
+
"%s references only catalog metrics and resolvable aliases",
|
|
947
|
+
(_id: unknown, template: unknown) => {
|
|
948
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
949
|
+
buildArgs(),
|
|
950
|
+
);
|
|
951
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
952
|
+
|
|
953
|
+
for (const queryConfig of monitor.metricViewConfig
|
|
954
|
+
.queryConfigs as Array<any>) {
|
|
955
|
+
const metricName: string =
|
|
956
|
+
queryConfig.metricQueryData.filterData.metricName;
|
|
957
|
+
expect(getCephMetricByMetricName(metricName)).toBeDefined();
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
const aliases: Set<string> = getReferencableAliases(monitor);
|
|
961
|
+
for (const instance of getCriteriaInstances(step)) {
|
|
962
|
+
for (const filter of instance.data?.filters || []) {
|
|
963
|
+
expect(aliases).toContain(
|
|
964
|
+
(filter as any).metricMonitorOptions.metricAlias,
|
|
965
|
+
);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
},
|
|
969
|
+
);
|
|
970
|
+
|
|
971
|
+
test.each(
|
|
972
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
973
|
+
return [t.id, t];
|
|
974
|
+
}),
|
|
975
|
+
)(
|
|
976
|
+
"%s groups by raw datapoint labels only (ceph_daemon / pool_id)",
|
|
977
|
+
(_id: unknown, template: unknown) => {
|
|
978
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
979
|
+
buildArgs(),
|
|
980
|
+
);
|
|
981
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
982
|
+
|
|
983
|
+
for (const queryConfig of monitor.metricViewConfig
|
|
984
|
+
.queryConfigs as Array<any>) {
|
|
985
|
+
const groupBys: Array<string> =
|
|
986
|
+
queryConfig.metricQueryData.groupByAttributeKeys || [];
|
|
987
|
+
for (const key of groupBys) {
|
|
988
|
+
/*
|
|
989
|
+
* ceph-mgr identity labels are datapoint labels — never
|
|
990
|
+
* `resource.`-prefixed in ClickHouse.
|
|
991
|
+
*/
|
|
992
|
+
expect(["ceph_daemon", "pool_id"]).toContain(key);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
},
|
|
996
|
+
);
|
|
997
|
+
|
|
998
|
+
test.each(
|
|
999
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
1000
|
+
return [t.id, t];
|
|
1001
|
+
}),
|
|
1002
|
+
)(
|
|
1003
|
+
"%s has disjoint fire/recover thresholds on the same alias",
|
|
1004
|
+
(_id: unknown, template: unknown) => {
|
|
1005
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
1006
|
+
buildArgs(),
|
|
1007
|
+
);
|
|
1008
|
+
const instances: Array<MonitorCriteriaInstance> =
|
|
1009
|
+
getCriteriaInstances(step);
|
|
1010
|
+
const onlineFilters: Array<any> = (instances[instances.length - 1]!.data
|
|
1011
|
+
?.filters || []) as Array<any>;
|
|
1012
|
+
|
|
1013
|
+
for (const offline of instances.slice(0, -1)) {
|
|
1014
|
+
for (const fireFilter of (offline.data?.filters || []) as Array<any>) {
|
|
1015
|
+
const recoverFilter: any = onlineFilters.find((f: any) => {
|
|
1016
|
+
return (
|
|
1017
|
+
f.metricMonitorOptions.metricAlias ===
|
|
1018
|
+
fireFilter.metricMonitorOptions.metricAlias
|
|
1019
|
+
);
|
|
1020
|
+
});
|
|
1021
|
+
expect(recoverFilter).toBeDefined();
|
|
1022
|
+
expect(
|
|
1023
|
+
isDisjointComplement(
|
|
1024
|
+
{
|
|
1025
|
+
filterType: fireFilter.filterType,
|
|
1026
|
+
value: fireFilter.value as number,
|
|
1027
|
+
},
|
|
1028
|
+
{
|
|
1029
|
+
filterType: recoverFilter.filterType,
|
|
1030
|
+
value: recoverFilter.value as number,
|
|
1031
|
+
},
|
|
1032
|
+
),
|
|
1033
|
+
).toBe(true);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
},
|
|
1037
|
+
);
|
|
1038
|
+
|
|
1039
|
+
test.each(
|
|
1040
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
1041
|
+
return [t.id, t];
|
|
1042
|
+
}),
|
|
1043
|
+
)(
|
|
1044
|
+
"%s health-check recovery treats missing series as zero",
|
|
1045
|
+
(_id: unknown, template: unknown) => {
|
|
1046
|
+
/*
|
|
1047
|
+
* ceph_health_detail / ceph_daemon_health_metrics series exist
|
|
1048
|
+
* ONLY while the check is active, and the evaluator's default
|
|
1049
|
+
* NoDataPolicy is Ignore — so a "= 0" recover filter without
|
|
1050
|
+
* TreatAsZero would never match after the series disappears and
|
|
1051
|
+
* the monitor would never return to Healthy. This invariant is
|
|
1052
|
+
* enumerated so any future health-check template inherits it.
|
|
1053
|
+
*/
|
|
1054
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
1055
|
+
buildArgs(),
|
|
1056
|
+
);
|
|
1057
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
1058
|
+
|
|
1059
|
+
const usesHealthCheckSeries: boolean = (
|
|
1060
|
+
monitor.metricViewConfig.queryConfigs as Array<any>
|
|
1061
|
+
).some((queryConfig: any) => {
|
|
1062
|
+
return isHealthCheckMetric(
|
|
1063
|
+
queryConfig.metricQueryData.filterData.metricName,
|
|
1064
|
+
);
|
|
1065
|
+
});
|
|
1066
|
+
|
|
1067
|
+
if (!usesHealthCheckSeries) {
|
|
1068
|
+
return;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
const instances: Array<MonitorCriteriaInstance> =
|
|
1072
|
+
getCriteriaInstances(step);
|
|
1073
|
+
const onlineFilters: Array<any> = (instances[instances.length - 1]!.data
|
|
1074
|
+
?.filters || []) as Array<any>;
|
|
1075
|
+
|
|
1076
|
+
expect(onlineFilters.length).toBeGreaterThan(0);
|
|
1077
|
+
for (const filter of onlineFilters) {
|
|
1078
|
+
expect(filter.metricMonitorOptions.onNoDataPolicy).toBe(
|
|
1079
|
+
NoDataPolicy.TreatAsZero,
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
},
|
|
1083
|
+
);
|
|
1084
|
+
|
|
1085
|
+
test.each(
|
|
1086
|
+
ALL_TEMPLATES.map((t: CephAlertTemplate) => {
|
|
1087
|
+
return [t.id, t];
|
|
1088
|
+
}),
|
|
1089
|
+
)(
|
|
1090
|
+
"%s ratio/formula queries use Sum on both sides (same-receiver contract)",
|
|
1091
|
+
(_id: unknown, template: unknown) => {
|
|
1092
|
+
const step: MonitorStep = (template as CephAlertTemplate).getMonitorStep(
|
|
1093
|
+
buildArgs(),
|
|
1094
|
+
);
|
|
1095
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
1096
|
+
const queryConfigs: Array<any> = monitor.metricViewConfig
|
|
1097
|
+
.queryConfigs as Array<any>;
|
|
1098
|
+
const formulaConfigs: Array<any> = (monitor.metricViewConfig
|
|
1099
|
+
.formulaConfigs || []) as Array<any>;
|
|
1100
|
+
|
|
1101
|
+
if (formulaConfigs.length === 0) {
|
|
1102
|
+
return;
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
/*
|
|
1106
|
+
* Every Ceph metric rides ONE receiver (the active mgr scrape),
|
|
1107
|
+
* so every formula — ratio or difference — must aggregate Sum on
|
|
1108
|
+
* every side: the scrape multiple cancels (ratios) or scales both
|
|
1109
|
+
* terms equally (differences). Max/Max would collapse ungrouped
|
|
1110
|
+
* per-pool series to the largest pool and hide every other pool.
|
|
1111
|
+
*/
|
|
1112
|
+
expect(formulaConfigs).toHaveLength(1);
|
|
1113
|
+
for (const queryConfig of queryConfigs) {
|
|
1114
|
+
expect(queryConfig.metricQueryData.filterData.aggegationType).toBe(
|
|
1115
|
+
MetricsAggregationType.Sum,
|
|
1116
|
+
);
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
// All sides must share the same groupBy so the join lines up.
|
|
1120
|
+
const firstGroupBy: Array<string> =
|
|
1121
|
+
queryConfigs[0].metricQueryData.groupByAttributeKeys || [];
|
|
1122
|
+
for (const queryConfig of queryConfigs) {
|
|
1123
|
+
expect(queryConfig.metricQueryData.groupByAttributeKeys || []).toEqual(
|
|
1124
|
+
firstGroupBy,
|
|
1125
|
+
);
|
|
1126
|
+
}
|
|
1127
|
+
},
|
|
1128
|
+
);
|
|
1129
|
+
});
|
|
1130
|
+
|
|
1131
|
+
describe("CephAlertTemplates - spec table expectations", () => {
|
|
1132
|
+
test.each(
|
|
1133
|
+
EXPECTED_TEMPLATES.map((t: CephTemplateExpectation) => {
|
|
1134
|
+
return [t.id, t];
|
|
1135
|
+
}),
|
|
1136
|
+
)(
|
|
1137
|
+
"%s matches the spec'd metric/aggregation/threshold contract",
|
|
1138
|
+
(_id: unknown, expected: unknown) => {
|
|
1139
|
+
const tc: CephTemplateExpectation = expected as CephTemplateExpectation;
|
|
1140
|
+
const template: CephAlertTemplate | undefined = getCephAlertTemplateById(
|
|
1141
|
+
tc.id,
|
|
1142
|
+
);
|
|
1143
|
+
expect(template).toBeDefined();
|
|
1144
|
+
|
|
1145
|
+
expect(template!.category).toBe(tc.category);
|
|
1146
|
+
expect(template!.severity).toBe(tc.severity);
|
|
1147
|
+
|
|
1148
|
+
const step: MonitorStep = template!.getMonitorStep(buildArgs());
|
|
1149
|
+
const monitor: MonitorStepCephMonitor = getCephMonitor(step);
|
|
1150
|
+
|
|
1151
|
+
expect(monitor.rollingTime).toBe(tc.rollingTime);
|
|
1152
|
+
|
|
1153
|
+
const queryConfigs: Array<any> = monitor.metricViewConfig
|
|
1154
|
+
.queryConfigs as Array<any>;
|
|
1155
|
+
expect(queryConfigs).toHaveLength(tc.queries.length);
|
|
1156
|
+
|
|
1157
|
+
for (let i: number = 0; i < tc.queries.length; i++) {
|
|
1158
|
+
const expectedQuery: CephQueryExpectation = tc.queries[i]!;
|
|
1159
|
+
expect(queryConfigs[i].metricAliasData.metricVariable).toBe(
|
|
1160
|
+
expectedQuery.alias,
|
|
1161
|
+
);
|
|
1162
|
+
const filterData: any = queryConfigs[i].metricQueryData.filterData;
|
|
1163
|
+
expect(filterData.metricName).toBe(expectedQuery.metricName);
|
|
1164
|
+
expect(filterData.aggegationType).toBe(tc.aggregation);
|
|
1165
|
+
expect(filterData.attributes).toEqual(expectedQuery.attributes);
|
|
1166
|
+
|
|
1167
|
+
const groupBys: Array<string> =
|
|
1168
|
+
queryConfigs[i].metricQueryData.groupByAttributeKeys || [];
|
|
1169
|
+
expect(groupBys).toEqual(tc.groupBy ? [tc.groupBy] : []);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
const formulaConfigs: Array<any> = (monitor.metricViewConfig
|
|
1173
|
+
.formulaConfigs || []) as Array<any>;
|
|
1174
|
+
if (tc.formula) {
|
|
1175
|
+
expect(formulaConfigs).toHaveLength(1);
|
|
1176
|
+
expect(formulaConfigs[0].metricFormulaData.metricFormula).toBe(
|
|
1177
|
+
tc.formula,
|
|
1178
|
+
);
|
|
1179
|
+
} else {
|
|
1180
|
+
// Multi-query health-check templates must NOT use a formula.
|
|
1181
|
+
expect(formulaConfigs).toHaveLength(0);
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
const instances: Array<MonitorCriteriaInstance> =
|
|
1185
|
+
getCriteriaInstances(step);
|
|
1186
|
+
// Unhealthy tiers in spec order (worst first), then the recover.
|
|
1187
|
+
expect(instances).toHaveLength(tc.fireCriteria.length + 1);
|
|
1188
|
+
|
|
1189
|
+
for (let i: number = 0; i < tc.fireCriteria.length; i++) {
|
|
1190
|
+
const expectedFilters: Array<ThresholdExpectation> =
|
|
1191
|
+
tc.fireCriteria[i]!;
|
|
1192
|
+
const instance: MonitorCriteriaInstance = instances[i]!;
|
|
1193
|
+
// Multi-filter unhealthy instances are OR'd.
|
|
1194
|
+
expect(instance.data?.filterCondition).toBe(FilterCondition.Any);
|
|
1195
|
+
const filters: Array<any> = instance.data?.filters as Array<any>;
|
|
1196
|
+
expect(filters).toHaveLength(expectedFilters.length);
|
|
1197
|
+
for (let j: number = 0; j < expectedFilters.length; j++) {
|
|
1198
|
+
expect(filters[j].metricMonitorOptions.metricAlias).toBe(
|
|
1199
|
+
expectedFilters[j]!.alias,
|
|
1200
|
+
);
|
|
1201
|
+
expect(filters[j].filterType).toBe(expectedFilters[j]!.filterType);
|
|
1202
|
+
expect(filters[j].value).toBe(expectedFilters[j]!.value);
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
const onlineInstance: MonitorCriteriaInstance =
|
|
1207
|
+
instances[instances.length - 1]!;
|
|
1208
|
+
expect(onlineInstance.data?.filterCondition).toBe(tc.recover.condition);
|
|
1209
|
+
const onlineFilters: Array<any> = onlineInstance.data
|
|
1210
|
+
?.filters as Array<any>;
|
|
1211
|
+
expect(onlineFilters).toHaveLength(tc.recover.filters.length);
|
|
1212
|
+
for (let j: number = 0; j < tc.recover.filters.length; j++) {
|
|
1213
|
+
const expectedFilter: ThresholdExpectation = tc.recover.filters[j]!;
|
|
1214
|
+
expect(onlineFilters[j].metricMonitorOptions.metricAlias).toBe(
|
|
1215
|
+
expectedFilter.alias,
|
|
1216
|
+
);
|
|
1217
|
+
expect(onlineFilters[j].filterType).toBe(expectedFilter.filterType);
|
|
1218
|
+
expect(onlineFilters[j].value).toBe(expectedFilter.value);
|
|
1219
|
+
if (tc.recover.treatNoDataAsZero) {
|
|
1220
|
+
expect(onlineFilters[j].metricMonitorOptions.onNoDataPolicy).toBe(
|
|
1221
|
+
NoDataPolicy.TreatAsZero,
|
|
1222
|
+
);
|
|
1223
|
+
} else {
|
|
1224
|
+
expect(
|
|
1225
|
+
onlineFilters[j].metricMonitorOptions.onNoDataPolicy,
|
|
1226
|
+
).toBeUndefined();
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
},
|
|
1230
|
+
);
|
|
1231
|
+
});
|