@oneuptime/common 10.5.32 → 10.5.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Models/DatabaseModels/KubernetesResource.ts +37 -0
- package/Server/API/KubernetesResourceAPI.ts +27 -18
- package/Server/Infrastructure/Postgres/SchemaMigrations/1780651429467-AddKubernetesLatestMemoryPercent.ts +19 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +2 -0
- package/Server/Services/KubernetesResourceService.ts +37 -11
- package/Server/Utils/Monitor/MonitorAlert.ts +34 -0
- package/Server/Utils/Monitor/MonitorIncident.ts +60 -93
- package/Server/Utils/Monitor/MonitorMaintenanceSuppression.ts +229 -0
- package/Server/Utils/Monitor/MonitorResource.ts +18 -0
- package/Server/Utils/Monitor/SeriesResourceLabels.ts +156 -0
- package/Tests/Server/Utils/Monitor/MonitorMaintenanceSuppression.test.ts +211 -0
- package/build/dist/Models/DatabaseModels/KubernetesResource.js +38 -0
- package/build/dist/Models/DatabaseModels/KubernetesResource.js.map +1 -1
- package/build/dist/Server/API/KubernetesResourceAPI.js +6 -4
- package/build/dist/Server/API/KubernetesResourceAPI.js.map +1 -1
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780651429467-AddKubernetesLatestMemoryPercent.js +12 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780651429467-AddKubernetesLatestMemoryPercent.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +2 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
- package/build/dist/Server/Services/KubernetesResourceService.js +13 -5
- package/build/dist/Server/Services/KubernetesResourceService.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js +36 -17
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js +60 -107
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorMaintenanceSuppression.js +165 -0
- package/build/dist/Server/Utils/Monitor/MonitorMaintenanceSuppression.js.map +1 -0
- package/build/dist/Server/Utils/Monitor/MonitorResource.js +16 -0
- package/build/dist/Server/Utils/Monitor/MonitorResource.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/SeriesResourceLabels.js +106 -0
- package/build/dist/Server/Utils/Monitor/SeriesResourceLabels.js.map +1 -0
- package/build/dist/Tests/Server/Utils/Monitor/MonitorMaintenanceSuppression.test.js +142 -0
- package/build/dist/Tests/Server/Utils/Monitor/MonitorMaintenanceSuppression.test.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import ScheduledMaintenance from "../../../Models/DatabaseModels/ScheduledMaintenance";
|
|
2
|
+
import { LIMIT_PER_PROJECT } from "../../../Types/Database/LimitMax";
|
|
3
|
+
import ObjectID from "../../../Types/ObjectID";
|
|
4
|
+
import { PerSeriesCriteriaMatch } from "../../../Types/Probe/ProbeApiIngestResponse";
|
|
5
|
+
import ScheduledMaintenanceService from "../../Services/ScheduledMaintenanceService";
|
|
6
|
+
import CaptureSpan from "../Telemetry/CaptureSpan";
|
|
7
|
+
import SeriesResourceLabels, {
|
|
8
|
+
SeriesResourceRefs,
|
|
9
|
+
} from "./SeriesResourceLabels";
|
|
10
|
+
|
|
11
|
+
/*
|
|
12
|
+
* Ids and names of one resource type that are currently inside an
|
|
13
|
+
* ongoing maintenance window. Ids are OneUptime database ids; names are
|
|
14
|
+
* the resource's identifier column (hostIdentifier / clusterIdentifier /
|
|
15
|
+
* service name). A series matches if it references the resource by
|
|
16
|
+
* either form.
|
|
17
|
+
*/
|
|
18
|
+
export interface ResourceKeySet {
|
|
19
|
+
ids: Set<string>;
|
|
20
|
+
names: Set<string>;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface MaintainedResourceKeys {
|
|
24
|
+
hosts: ResourceKeySet;
|
|
25
|
+
dockerHosts: ResourceKeySet;
|
|
26
|
+
kubernetesClusters: ResourceKeySet;
|
|
27
|
+
services: ResourceKeySet;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/*
|
|
31
|
+
* Per-series counterpart to the whole-monitor
|
|
32
|
+
* `disableActiveMonitoringBecauseOfScheduledMaintenanceEvent` flag.
|
|
33
|
+
*
|
|
34
|
+
* A grouped metric monitor (group-by `host.name`, say) evaluates its
|
|
35
|
+
* criteria once per series and creates one incident/alert per breaching
|
|
36
|
+
* series. The whole-monitor flag is all-or-nothing: it only fires when
|
|
37
|
+
* the *monitor itself* is attached to a maintenance event, and it would
|
|
38
|
+
* silence every series. That leaves a gap — attaching only some of the
|
|
39
|
+
* underlying resources (10 of 100 hosts) to a maintenance window did
|
|
40
|
+
* nothing, because nothing maps an attached host back to the series it
|
|
41
|
+
* owns.
|
|
42
|
+
*
|
|
43
|
+
* This util closes that gap: it returns the fingerprints of the series
|
|
44
|
+
* whose resource is under an ongoing maintenance window, so the
|
|
45
|
+
* incident/alert creation loops can skip exactly those series while the
|
|
46
|
+
* other 90 hosts keep alerting. It covers every resource type a
|
|
47
|
+
* maintenance event can attach to AND a series can identify: Host,
|
|
48
|
+
* DockerHost, KubernetesCluster, and Service.
|
|
49
|
+
*/
|
|
50
|
+
export default class MonitorMaintenanceSuppression {
|
|
51
|
+
/*
|
|
52
|
+
* Resolve the per-series suppression set for one project. Returns an
|
|
53
|
+
* empty set on the common paths — no per-series matches, or no
|
|
54
|
+
* ongoing maintenance touching any resource — so callers pay at most
|
|
55
|
+
* one query and nothing when there is no maintenance.
|
|
56
|
+
*/
|
|
57
|
+
@CaptureSpan()
|
|
58
|
+
public static async getSuppressedSeriesFingerprints(input: {
|
|
59
|
+
projectId: ObjectID;
|
|
60
|
+
matchesPerSeries?: Array<PerSeriesCriteriaMatch> | undefined;
|
|
61
|
+
}): Promise<Set<string>> {
|
|
62
|
+
if (!input.matchesPerSeries || input.matchesPerSeries.length === 0) {
|
|
63
|
+
return new Set<string>();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const maintained: MaintainedResourceKeys =
|
|
67
|
+
await this.getResourcesUnderOngoingMaintenance(input.projectId);
|
|
68
|
+
|
|
69
|
+
if (!this.hasAnyMaintainedResource(maintained)) {
|
|
70
|
+
return new Set<string>();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return this.getSuppressedFingerprintsForMaintainedResources({
|
|
74
|
+
matchesPerSeries: input.matchesPerSeries,
|
|
75
|
+
maintained,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/*
|
|
80
|
+
* Pure matching step, split out from the query so it can be unit
|
|
81
|
+
* tested without a database. For each series, pull the resource
|
|
82
|
+
* identifiers out of its labels and suppress the series if any of
|
|
83
|
+
* them is under maintenance.
|
|
84
|
+
*/
|
|
85
|
+
public static getSuppressedFingerprintsForMaintainedResources(input: {
|
|
86
|
+
matchesPerSeries: Array<PerSeriesCriteriaMatch>;
|
|
87
|
+
maintained: MaintainedResourceKeys;
|
|
88
|
+
}): Set<string> {
|
|
89
|
+
const suppressed: Set<string> = new Set<string>();
|
|
90
|
+
|
|
91
|
+
for (const series of input.matchesPerSeries) {
|
|
92
|
+
if (!series.fingerprint || !series.labels) {
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const refs: SeriesResourceRefs = SeriesResourceLabels.extractResourceRefs(
|
|
97
|
+
series.labels,
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
const isUnderMaintenance: boolean =
|
|
101
|
+
this.intersects(refs.hostIds, input.maintained.hosts.ids) ||
|
|
102
|
+
this.intersects(refs.hostNames, input.maintained.hosts.names) ||
|
|
103
|
+
this.intersects(refs.dockerHostIds, input.maintained.dockerHosts.ids) ||
|
|
104
|
+
this.intersects(
|
|
105
|
+
refs.dockerHostNames,
|
|
106
|
+
input.maintained.dockerHosts.names,
|
|
107
|
+
) ||
|
|
108
|
+
this.intersects(
|
|
109
|
+
refs.kubernetesClusterIds,
|
|
110
|
+
input.maintained.kubernetesClusters.ids,
|
|
111
|
+
) ||
|
|
112
|
+
this.intersects(
|
|
113
|
+
refs.kubernetesClusterNames,
|
|
114
|
+
input.maintained.kubernetesClusters.names,
|
|
115
|
+
) ||
|
|
116
|
+
this.intersects(refs.serviceIds, input.maintained.services.ids) ||
|
|
117
|
+
this.intersects(refs.serviceNames, input.maintained.services.names);
|
|
118
|
+
|
|
119
|
+
if (isUnderMaintenance) {
|
|
120
|
+
suppressed.add(series.fingerprint);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return suppressed;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
private static intersects(values: Array<string>, set: Set<string>): boolean {
|
|
128
|
+
for (const value of values) {
|
|
129
|
+
if (set.has(value)) {
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private static hasAnyMaintainedResource(
|
|
137
|
+
maintained: MaintainedResourceKeys,
|
|
138
|
+
): boolean {
|
|
139
|
+
return (
|
|
140
|
+
maintained.hosts.ids.size > 0 ||
|
|
141
|
+
maintained.hosts.names.size > 0 ||
|
|
142
|
+
maintained.dockerHosts.ids.size > 0 ||
|
|
143
|
+
maintained.dockerHosts.names.size > 0 ||
|
|
144
|
+
maintained.kubernetesClusters.ids.size > 0 ||
|
|
145
|
+
maintained.kubernetesClusters.names.size > 0 ||
|
|
146
|
+
maintained.services.ids.size > 0 ||
|
|
147
|
+
maintained.services.names.size > 0
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/*
|
|
152
|
+
* Collect the ids + identifiers of every Host / DockerHost /
|
|
153
|
+
* KubernetesCluster / Service attached to an ongoing maintenance event
|
|
154
|
+
* in this project. Monitors attached to the event are intentionally
|
|
155
|
+
* not collected here — those are already handled upstream by the
|
|
156
|
+
* whole-monitor disable flag, which short-circuits evaluation before
|
|
157
|
+
* we ever reach per-series creation.
|
|
158
|
+
*/
|
|
159
|
+
private static async getResourcesUnderOngoingMaintenance(
|
|
160
|
+
projectId: ObjectID,
|
|
161
|
+
): Promise<MaintainedResourceKeys> {
|
|
162
|
+
const maintained: MaintainedResourceKeys = {
|
|
163
|
+
hosts: { ids: new Set<string>(), names: new Set<string>() },
|
|
164
|
+
dockerHosts: { ids: new Set<string>(), names: new Set<string>() },
|
|
165
|
+
kubernetesClusters: { ids: new Set<string>(), names: new Set<string>() },
|
|
166
|
+
services: { ids: new Set<string>(), names: new Set<string>() },
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
const ongoingEvents: Array<ScheduledMaintenance> =
|
|
170
|
+
await ScheduledMaintenanceService.findBy({
|
|
171
|
+
query: {
|
|
172
|
+
projectId: projectId,
|
|
173
|
+
currentScheduledMaintenanceState: {
|
|
174
|
+
isOngoingState: true,
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
select: {
|
|
178
|
+
_id: true,
|
|
179
|
+
hosts: { _id: true, hostIdentifier: true },
|
|
180
|
+
dockerHosts: { _id: true, hostIdentifier: true },
|
|
181
|
+
kubernetesClusters: { _id: true, clusterIdentifier: true },
|
|
182
|
+
services: { _id: true, name: true },
|
|
183
|
+
},
|
|
184
|
+
skip: 0,
|
|
185
|
+
limit: LIMIT_PER_PROJECT,
|
|
186
|
+
props: {
|
|
187
|
+
isRoot: true,
|
|
188
|
+
},
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
for (const event of ongoingEvents) {
|
|
192
|
+
for (const host of event.hosts || []) {
|
|
193
|
+
this.addKey(maintained.hosts, host._id, host.hostIdentifier);
|
|
194
|
+
}
|
|
195
|
+
for (const dockerHost of event.dockerHosts || []) {
|
|
196
|
+
this.addKey(
|
|
197
|
+
maintained.dockerHosts,
|
|
198
|
+
dockerHost._id,
|
|
199
|
+
dockerHost.hostIdentifier,
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
for (const cluster of event.kubernetesClusters || []) {
|
|
203
|
+
this.addKey(
|
|
204
|
+
maintained.kubernetesClusters,
|
|
205
|
+
cluster._id,
|
|
206
|
+
cluster.clusterIdentifier,
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
for (const service of event.services || []) {
|
|
210
|
+
this.addKey(maintained.services, service._id, service.name);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return maintained;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
private static addKey(
|
|
218
|
+
keySet: ResourceKeySet,
|
|
219
|
+
id: string | undefined,
|
|
220
|
+
name: string | undefined,
|
|
221
|
+
): void {
|
|
222
|
+
if (id) {
|
|
223
|
+
keySet.ids.add(String(id));
|
|
224
|
+
}
|
|
225
|
+
if (name) {
|
|
226
|
+
keySet.names.add(name);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
@@ -35,6 +35,7 @@ import ExceptionMonitorResponse from "../../../Types/Monitor/ExceptionMonitor/Ex
|
|
|
35
35
|
import { TelemetryQuery } from "../../../Types/Telemetry/TelemetryQuery";
|
|
36
36
|
import MonitorIncident from "./MonitorIncident";
|
|
37
37
|
import MonitorAlert from "./MonitorAlert";
|
|
38
|
+
import MonitorMaintenanceSuppression from "./MonitorMaintenanceSuppression";
|
|
38
39
|
import MonitorStatusTimelineUtil from "./MonitorStatusTimeline";
|
|
39
40
|
import CaptureSpan from "../Telemetry/CaptureSpan";
|
|
40
41
|
import ExceptionMessages from "../../../Types/Exception/ExceptionMessages";
|
|
@@ -727,6 +728,21 @@ export default class MonitorResourceUtil {
|
|
|
727
728
|
});
|
|
728
729
|
}
|
|
729
730
|
|
|
731
|
+
/*
|
|
732
|
+
* For grouped metric monitors, work out which breaching series
|
|
733
|
+
* belong to a resource that is currently inside an ongoing
|
|
734
|
+
* scheduled maintenance window. Those series are suppressed
|
|
735
|
+
* below so the monitor keeps alerting on the rest. Computed once
|
|
736
|
+
* and shared by both the incident and alert paths. Cheap on the
|
|
737
|
+
* common path: no per-series matches, or no ongoing maintenance,
|
|
738
|
+
* returns an empty set after at most one query.
|
|
739
|
+
*/
|
|
740
|
+
const suppressedSeriesFingerprints: Set<string> =
|
|
741
|
+
await MonitorMaintenanceSuppression.getSuppressedSeriesFingerprints({
|
|
742
|
+
projectId: monitor.projectId!,
|
|
743
|
+
matchesPerSeries: response.perSeriesMatches,
|
|
744
|
+
});
|
|
745
|
+
|
|
730
746
|
await MonitorIncident.criteriaMetCreateIncidentsAndUpdateMonitorStatus({
|
|
731
747
|
monitor: monitor,
|
|
732
748
|
rootCause: response.rootCause,
|
|
@@ -738,6 +754,7 @@ export default class MonitorResourceUtil {
|
|
|
738
754
|
telemetryQuery: telemetryQuery,
|
|
739
755
|
},
|
|
740
756
|
matchesPerSeries: response.perSeriesMatches,
|
|
757
|
+
suppressedSeriesFingerprints,
|
|
741
758
|
});
|
|
742
759
|
|
|
743
760
|
await MonitorAlert.criteriaMetCreateAlertsAndUpdateMonitorStatus({
|
|
@@ -751,6 +768,7 @@ export default class MonitorResourceUtil {
|
|
|
751
768
|
telemetryQuery: telemetryQuery,
|
|
752
769
|
},
|
|
753
770
|
matchesPerSeries: response.perSeriesMatches,
|
|
771
|
+
suppressedSeriesFingerprints,
|
|
754
772
|
});
|
|
755
773
|
} else if (
|
|
756
774
|
!response.criteriaMetId &&
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { JSONObject } from "../../../Types/JSON";
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* A grouped metric monitor (e.g. group-by `resource.host.name`) emits
|
|
5
|
+
* one series per group, and each series carries the group's identity in
|
|
6
|
+
* its labels. Resource identity shows up under several key spellings:
|
|
7
|
+
*
|
|
8
|
+
* - the raw OTel resource attribute (`host.name`)
|
|
9
|
+
* - the ClickHouse `resource.`-prefixed form (`resource.host.name`)
|
|
10
|
+
* - the OneUptime stamps added at ingest (`oneuptime.host.id`
|
|
11
|
+
* / `oneuptime.host.name` and their `resource.`-prefixed twins)
|
|
12
|
+
*
|
|
13
|
+
* This module is the single source of truth for "which label keys
|
|
14
|
+
* identify which resource type", so that everything keying off series
|
|
15
|
+
* identity — incident/alert resource linking AND scheduled-maintenance
|
|
16
|
+
* suppression — stays in agreement. If the two ever diverged, a series
|
|
17
|
+
* could be linked to a host it is not suppressed for (or vice versa).
|
|
18
|
+
*
|
|
19
|
+
* Multi-value labels are flattened, so a series that groups by a
|
|
20
|
+
* multi-valued attribute references every value it carries.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
export const HostIdLabelKeys: ReadonlyArray<string> = [
|
|
24
|
+
"resource.oneuptime.host.id",
|
|
25
|
+
"oneuptime.host.id",
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
export const HostNameLabelKeys: ReadonlyArray<string> = [
|
|
29
|
+
"resource.oneuptime.host.name",
|
|
30
|
+
"oneuptime.host.name",
|
|
31
|
+
"resource.host.name",
|
|
32
|
+
"host.name",
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
/*
|
|
36
|
+
* For Docker hosts we deliberately ignore raw `host.name` /
|
|
37
|
+
* `oneuptime.host.name`: those are the Host's territory. The ingest
|
|
38
|
+
* pipeline stamps `oneuptime.docker.host.*` independently when the
|
|
39
|
+
* source is a docker host, so only those keys identify a DockerHost.
|
|
40
|
+
*/
|
|
41
|
+
export const DockerHostIdLabelKeys: ReadonlyArray<string> = [
|
|
42
|
+
"resource.oneuptime.docker.host.id",
|
|
43
|
+
"oneuptime.docker.host.id",
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
export const DockerHostNameLabelKeys: ReadonlyArray<string> = [
|
|
47
|
+
"resource.oneuptime.docker.host.name",
|
|
48
|
+
"oneuptime.docker.host.name",
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
export const KubernetesClusterIdLabelKeys: ReadonlyArray<string> = [
|
|
52
|
+
"resource.oneuptime.kubernetes.cluster.id",
|
|
53
|
+
"oneuptime.kubernetes.cluster.id",
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
export const KubernetesClusterNameLabelKeys: ReadonlyArray<string> = [
|
|
57
|
+
"resource.oneuptime.kubernetes.cluster.name",
|
|
58
|
+
"oneuptime.kubernetes.cluster.name",
|
|
59
|
+
"resource.k8s.cluster.name",
|
|
60
|
+
"k8s.cluster.name",
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
/*
|
|
64
|
+
* Services come from OTel-ingested telemetry. The ingest pipeline
|
|
65
|
+
* auto-creates a Service row keyed by `service.name`, so any series
|
|
66
|
+
* label carrying that attribute (raw or prefixed) tells us the emitting
|
|
67
|
+
* service. The `oneuptime.service.id` stamp is also accepted for callers
|
|
68
|
+
* that resolved the ID upstream.
|
|
69
|
+
*/
|
|
70
|
+
export const ServiceIdLabelKeys: ReadonlyArray<string> = [
|
|
71
|
+
"resource.oneuptime.service.id",
|
|
72
|
+
"oneuptime.service.id",
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
export const ServiceNameLabelKeys: ReadonlyArray<string> = [
|
|
76
|
+
"resource.service.name",
|
|
77
|
+
"service.name",
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
/*
|
|
81
|
+
* The identifiers carried by one series, split by resource type and by
|
|
82
|
+
* id-vs-name. Ids are OneUptime database ids (the `oneuptime.*.id`
|
|
83
|
+
* stamps); names are the human/telemetry identifiers (host.name,
|
|
84
|
+
* k8s.cluster.name, service.name) that map to a resource's identifier
|
|
85
|
+
* column (`hostIdentifier`, `clusterIdentifier`, `name`).
|
|
86
|
+
*/
|
|
87
|
+
export interface SeriesResourceRefs {
|
|
88
|
+
hostIds: Array<string>;
|
|
89
|
+
hostNames: Array<string>;
|
|
90
|
+
dockerHostIds: Array<string>;
|
|
91
|
+
dockerHostNames: Array<string>;
|
|
92
|
+
kubernetesClusterIds: Array<string>;
|
|
93
|
+
kubernetesClusterNames: Array<string>;
|
|
94
|
+
serviceIds: Array<string>;
|
|
95
|
+
serviceNames: Array<string>;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export default class SeriesResourceLabels {
|
|
99
|
+
/*
|
|
100
|
+
* Collect every non-empty string value held at any of `keys` in the
|
|
101
|
+
* series labels, flattening multi-valued (array) labels. Returns a
|
|
102
|
+
* deduped list.
|
|
103
|
+
*/
|
|
104
|
+
public static collectLabelValues(
|
|
105
|
+
seriesLabels: JSONObject,
|
|
106
|
+
keys: ReadonlyArray<string>,
|
|
107
|
+
): Array<string> {
|
|
108
|
+
const found: Set<string> = new Set<string>();
|
|
109
|
+
|
|
110
|
+
for (const key of keys) {
|
|
111
|
+
const value: unknown = seriesLabels[key];
|
|
112
|
+
|
|
113
|
+
if (typeof value === "string" && value.length > 0) {
|
|
114
|
+
found.add(value);
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (Array.isArray(value)) {
|
|
119
|
+
for (const item of value) {
|
|
120
|
+
if (typeof item === "string" && item.length > 0) {
|
|
121
|
+
found.add(item);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return Array.from(found);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
public static extractResourceRefs(
|
|
131
|
+
seriesLabels: JSONObject,
|
|
132
|
+
): SeriesResourceRefs {
|
|
133
|
+
return {
|
|
134
|
+
hostIds: this.collectLabelValues(seriesLabels, HostIdLabelKeys),
|
|
135
|
+
hostNames: this.collectLabelValues(seriesLabels, HostNameLabelKeys),
|
|
136
|
+
dockerHostIds: this.collectLabelValues(
|
|
137
|
+
seriesLabels,
|
|
138
|
+
DockerHostIdLabelKeys,
|
|
139
|
+
),
|
|
140
|
+
dockerHostNames: this.collectLabelValues(
|
|
141
|
+
seriesLabels,
|
|
142
|
+
DockerHostNameLabelKeys,
|
|
143
|
+
),
|
|
144
|
+
kubernetesClusterIds: this.collectLabelValues(
|
|
145
|
+
seriesLabels,
|
|
146
|
+
KubernetesClusterIdLabelKeys,
|
|
147
|
+
),
|
|
148
|
+
kubernetesClusterNames: this.collectLabelValues(
|
|
149
|
+
seriesLabels,
|
|
150
|
+
KubernetesClusterNameLabelKeys,
|
|
151
|
+
),
|
|
152
|
+
serviceIds: this.collectLabelValues(seriesLabels, ServiceIdLabelKeys),
|
|
153
|
+
serviceNames: this.collectLabelValues(seriesLabels, ServiceNameLabelKeys),
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import MonitorMaintenanceSuppression, {
|
|
2
|
+
MaintainedResourceKeys,
|
|
3
|
+
} from "../../../../Server/Utils/Monitor/MonitorMaintenanceSuppression";
|
|
4
|
+
import SeriesResourceLabels, {
|
|
5
|
+
SeriesResourceRefs,
|
|
6
|
+
} from "../../../../Server/Utils/Monitor/SeriesResourceLabels";
|
|
7
|
+
import { JSONObject } from "../../../../Types/JSON";
|
|
8
|
+
import { PerSeriesCriteriaMatch } from "../../../../Types/Probe/ProbeApiIngestResponse";
|
|
9
|
+
|
|
10
|
+
function emptyMaintained(): MaintainedResourceKeys {
|
|
11
|
+
return {
|
|
12
|
+
hosts: { ids: new Set<string>(), names: new Set<string>() },
|
|
13
|
+
dockerHosts: { ids: new Set<string>(), names: new Set<string>() },
|
|
14
|
+
kubernetesClusters: { ids: new Set<string>(), names: new Set<string>() },
|
|
15
|
+
services: { ids: new Set<string>(), names: new Set<string>() },
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function series(
|
|
20
|
+
fingerprint: string,
|
|
21
|
+
labels: JSONObject,
|
|
22
|
+
): PerSeriesCriteriaMatch {
|
|
23
|
+
return {
|
|
24
|
+
criteriaMetId: "criteria-1",
|
|
25
|
+
fingerprint,
|
|
26
|
+
labels,
|
|
27
|
+
rootCause: "breached",
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
describe("SeriesResourceLabels", () => {
|
|
32
|
+
describe("collectLabelValues", () => {
|
|
33
|
+
it("returns a string-valued label", () => {
|
|
34
|
+
expect(
|
|
35
|
+
SeriesResourceLabels.collectLabelValues({ "host.name": "h1" }, [
|
|
36
|
+
"host.name",
|
|
37
|
+
]),
|
|
38
|
+
).toEqual(["h1"]);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("flattens multi-valued labels and dedupes across keys", () => {
|
|
42
|
+
expect(
|
|
43
|
+
SeriesResourceLabels.collectLabelValues(
|
|
44
|
+
{ "host.name": ["h1", "h2"], "resource.host.name": "h2" },
|
|
45
|
+
["host.name", "resource.host.name"],
|
|
46
|
+
).sort(),
|
|
47
|
+
).toEqual(["h1", "h2"]);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("ignores empty strings and non-string values", () => {
|
|
51
|
+
expect(
|
|
52
|
+
SeriesResourceLabels.collectLabelValues(
|
|
53
|
+
{ "host.name": "", other: 5 as unknown as string },
|
|
54
|
+
["host.name", "other"],
|
|
55
|
+
),
|
|
56
|
+
).toEqual([]);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe("extractResourceRefs", () => {
|
|
61
|
+
it("maps host name (prefixed and unprefixed) and id keys", () => {
|
|
62
|
+
const refs: SeriesResourceRefs = SeriesResourceLabels.extractResourceRefs(
|
|
63
|
+
{
|
|
64
|
+
"resource.host.name": "h1",
|
|
65
|
+
"oneuptime.host.id": "host-id-1",
|
|
66
|
+
},
|
|
67
|
+
);
|
|
68
|
+
expect(refs.hostNames).toEqual(["h1"]);
|
|
69
|
+
expect(refs.hostIds).toEqual(["host-id-1"]);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("does NOT treat host.name as a docker host (docker keys are distinct)", () => {
|
|
73
|
+
const refs: SeriesResourceRefs = SeriesResourceLabels.extractResourceRefs(
|
|
74
|
+
{
|
|
75
|
+
"host.name": "h1",
|
|
76
|
+
},
|
|
77
|
+
);
|
|
78
|
+
expect(refs.dockerHostNames).toEqual([]);
|
|
79
|
+
expect(refs.hostNames).toEqual(["h1"]);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("maps docker host, kubernetes cluster, and service keys", () => {
|
|
83
|
+
const refs: SeriesResourceRefs = SeriesResourceLabels.extractResourceRefs(
|
|
84
|
+
{
|
|
85
|
+
"oneuptime.docker.host.name": "d1",
|
|
86
|
+
"k8s.cluster.name": "c1",
|
|
87
|
+
"service.name": "s1",
|
|
88
|
+
},
|
|
89
|
+
);
|
|
90
|
+
expect(refs.dockerHostNames).toEqual(["d1"]);
|
|
91
|
+
expect(refs.kubernetesClusterNames).toEqual(["c1"]);
|
|
92
|
+
expect(refs.serviceNames).toEqual(["s1"]);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe("MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources", () => {
|
|
98
|
+
it("suppresses only the series whose host is under maintenance", () => {
|
|
99
|
+
const maintained: MaintainedResourceKeys = emptyMaintained();
|
|
100
|
+
maintained.hosts.names.add("prod-db-01");
|
|
101
|
+
|
|
102
|
+
const result: Set<string> =
|
|
103
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
104
|
+
{
|
|
105
|
+
matchesPerSeries: [
|
|
106
|
+
series("fpA", { "resource.host.name": "prod-db-01" }),
|
|
107
|
+
series("fpB", { "resource.host.name": "prod-web-02" }),
|
|
108
|
+
],
|
|
109
|
+
maintained,
|
|
110
|
+
},
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
expect(Array.from(result)).toEqual(["fpA"]);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("matches a host by its OneUptime id stamp as well as by name", () => {
|
|
117
|
+
const maintained: MaintainedResourceKeys = emptyMaintained();
|
|
118
|
+
maintained.hosts.ids.add("host-uuid-1");
|
|
119
|
+
|
|
120
|
+
const result: Set<string> =
|
|
121
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
122
|
+
{
|
|
123
|
+
matchesPerSeries: [
|
|
124
|
+
series("fpA", { "oneuptime.host.id": "host-uuid-1" }),
|
|
125
|
+
series("fpB", { "oneuptime.host.id": "host-uuid-2" }),
|
|
126
|
+
],
|
|
127
|
+
maintained,
|
|
128
|
+
},
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
expect(Array.from(result)).toEqual(["fpA"]);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("suppresses across docker host, kubernetes cluster, and service resource types", () => {
|
|
135
|
+
const maintained: MaintainedResourceKeys = emptyMaintained();
|
|
136
|
+
maintained.dockerHosts.names.add("docker-1");
|
|
137
|
+
maintained.kubernetesClusters.names.add("cluster-1");
|
|
138
|
+
maintained.services.names.add("payments");
|
|
139
|
+
|
|
140
|
+
const result: Set<string> =
|
|
141
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
142
|
+
{
|
|
143
|
+
matchesPerSeries: [
|
|
144
|
+
series("fpDocker", { "oneuptime.docker.host.name": "docker-1" }),
|
|
145
|
+
series("fpCluster", { "k8s.cluster.name": "cluster-1" }),
|
|
146
|
+
series("fpService", { "service.name": "payments" }),
|
|
147
|
+
series("fpClear", { "service.name": "billing" }),
|
|
148
|
+
],
|
|
149
|
+
maintained,
|
|
150
|
+
},
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
expect(Array.from(result).sort()).toEqual([
|
|
154
|
+
"fpCluster",
|
|
155
|
+
"fpDocker",
|
|
156
|
+
"fpService",
|
|
157
|
+
]);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("does not cross-match resource types that happen to share a name", () => {
|
|
161
|
+
/*
|
|
162
|
+
* A service named the same string as the breaching host must not
|
|
163
|
+
* suppress the host's series.
|
|
164
|
+
*/
|
|
165
|
+
const maintained: MaintainedResourceKeys = emptyMaintained();
|
|
166
|
+
maintained.services.names.add("prod-db-01");
|
|
167
|
+
|
|
168
|
+
const result: Set<string> =
|
|
169
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
170
|
+
{
|
|
171
|
+
matchesPerSeries: [
|
|
172
|
+
series("fpHost", { "resource.host.name": "prod-db-01" }),
|
|
173
|
+
],
|
|
174
|
+
maintained,
|
|
175
|
+
},
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
expect(result.size).toBe(0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it("returns an empty set when nothing is under maintenance", () => {
|
|
182
|
+
const result: Set<string> =
|
|
183
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
184
|
+
{
|
|
185
|
+
matchesPerSeries: [
|
|
186
|
+
series("fpA", { "resource.host.name": "prod-db-01" }),
|
|
187
|
+
],
|
|
188
|
+
maintained: emptyMaintained(),
|
|
189
|
+
},
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
expect(result.size).toBe(0);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("skips series with no fingerprint without throwing", () => {
|
|
196
|
+
const maintained: MaintainedResourceKeys = emptyMaintained();
|
|
197
|
+
maintained.hosts.names.add("prod-db-01");
|
|
198
|
+
|
|
199
|
+
const result: Set<string> =
|
|
200
|
+
MonitorMaintenanceSuppression.getSuppressedFingerprintsForMaintainedResources(
|
|
201
|
+
{
|
|
202
|
+
matchesPerSeries: [
|
|
203
|
+
series("", { "resource.host.name": "prod-db-01" }),
|
|
204
|
+
],
|
|
205
|
+
maintained,
|
|
206
|
+
},
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
expect(result.size).toBe(0);
|
|
210
|
+
});
|
|
211
|
+
});
|
|
@@ -82,6 +82,7 @@ let KubernetesResource = class KubernetesResource extends BaseModel {
|
|
|
82
82
|
this.controllerCronJobName = undefined;
|
|
83
83
|
this.latestCpuPercent = undefined;
|
|
84
84
|
this.latestMemoryBytes = undefined;
|
|
85
|
+
this.latestMemoryPercent = undefined;
|
|
85
86
|
this.metricsUpdatedAt = undefined;
|
|
86
87
|
this.lastSeenAt = undefined;
|
|
87
88
|
this.resourceCreationTimestamp = undefined;
|
|
@@ -578,6 +579,43 @@ __decorate([
|
|
|
578
579
|
}),
|
|
579
580
|
__metadata("design:type", Number)
|
|
580
581
|
], KubernetesResource.prototype, "latestMemoryBytes", void 0);
|
|
582
|
+
__decorate([
|
|
583
|
+
ColumnAccessControl({
|
|
584
|
+
create: [],
|
|
585
|
+
read: READ_PERMISSIONS,
|
|
586
|
+
update: [],
|
|
587
|
+
}),
|
|
588
|
+
TableColumn({
|
|
589
|
+
required: false,
|
|
590
|
+
type: TableColumnType.Number,
|
|
591
|
+
canReadOnRelationQuery: true,
|
|
592
|
+
title: "Latest Memory Percent",
|
|
593
|
+
description: "Most recent memory usage as a percent of the resource's node allocatable memory (Pod or Node). Stored as decimal — mirrors latestCpuPercent — so the workload/namespace list views can SUM a per-pod percentage. Null until the first metric arrives or while the node's allocatable memory is still unknown.",
|
|
594
|
+
}),
|
|
595
|
+
Column({
|
|
596
|
+
nullable: true,
|
|
597
|
+
type: ColumnType.Decimal,
|
|
598
|
+
transformer: {
|
|
599
|
+
to: (value) => {
|
|
600
|
+
if (value === null || value === undefined) {
|
|
601
|
+
return null;
|
|
602
|
+
}
|
|
603
|
+
return value;
|
|
604
|
+
},
|
|
605
|
+
from: (value) => {
|
|
606
|
+
if (value === null || value === undefined) {
|
|
607
|
+
return null;
|
|
608
|
+
}
|
|
609
|
+
if (typeof value === "number") {
|
|
610
|
+
return value;
|
|
611
|
+
}
|
|
612
|
+
const parsed = parseFloat(value);
|
|
613
|
+
return isNaN(parsed) ? null : parsed;
|
|
614
|
+
},
|
|
615
|
+
},
|
|
616
|
+
}),
|
|
617
|
+
__metadata("design:type", Number)
|
|
618
|
+
], KubernetesResource.prototype, "latestMemoryPercent", void 0);
|
|
581
619
|
__decorate([
|
|
582
620
|
ColumnAccessControl({
|
|
583
621
|
create: [],
|