@checkstack/healthcheck-jenkins-backend 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/package.json +23 -0
- package/src/collectors/build-history.test.ts +106 -0
- package/src/collectors/build-history.ts +280 -0
- package/src/collectors/index.ts +5 -0
- package/src/collectors/job-status.test.ts +146 -0
- package/src/collectors/job-status.ts +241 -0
- package/src/collectors/node-health.test.ts +149 -0
- package/src/collectors/node-health.ts +305 -0
- package/src/collectors/queue-info.test.ts +113 -0
- package/src/collectors/queue-info.ts +215 -0
- package/src/collectors/server-info.test.ts +90 -0
- package/src/collectors/server-info.ts +169 -0
- package/src/index.ts +43 -0
- package/src/plugin-metadata.ts +9 -0
- package/src/strategy.test.ts +198 -0
- package/src/strategy.ts +228 -0
- package/src/transport-client.ts +38 -0
- package/tsconfig.json +3 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Versioned,
|
|
3
|
+
z,
|
|
4
|
+
type HealthCheckRunForAggregation,
|
|
5
|
+
type CollectorResult,
|
|
6
|
+
type CollectorStrategy,
|
|
7
|
+
} from "@checkstack/backend-api";
|
|
8
|
+
import {
|
|
9
|
+
healthResultNumber,
|
|
10
|
+
healthResultString,
|
|
11
|
+
healthResultBoolean,
|
|
12
|
+
} from "@checkstack/healthcheck-common";
|
|
13
|
+
import { pluginMetadata } from "../plugin-metadata";
|
|
14
|
+
import type { JenkinsTransportClient } from "../transport-client";
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// CONFIGURATION SCHEMA
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
const jobStatusConfigSchema = z.object({
|
|
21
|
+
jobName: z
|
|
22
|
+
.string()
|
|
23
|
+
.min(1)
|
|
24
|
+
.describe("Full job path (e.g., 'folder/job-name' or 'my-job')"),
|
|
25
|
+
checkLastBuild: z
|
|
26
|
+
.boolean()
|
|
27
|
+
.default(true)
|
|
28
|
+
.describe("Check the last build status"),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
export type JobStatusConfig = z.infer<typeof jobStatusConfigSchema>;
|
|
32
|
+
|
|
33
|
+
// ============================================================================
|
|
34
|
+
// RESULT SCHEMAS
|
|
35
|
+
// ============================================================================
|
|
36
|
+
|
|
37
|
+
const jobStatusResultSchema = z.object({
|
|
38
|
+
jobName: healthResultString({
|
|
39
|
+
"x-chart-type": "text",
|
|
40
|
+
"x-chart-label": "Job Name",
|
|
41
|
+
}),
|
|
42
|
+
buildable: healthResultBoolean({
|
|
43
|
+
"x-chart-type": "boolean",
|
|
44
|
+
"x-chart-label": "Buildable",
|
|
45
|
+
}),
|
|
46
|
+
lastBuildNumber: healthResultNumber({
|
|
47
|
+
"x-chart-type": "counter",
|
|
48
|
+
"x-chart-label": "Last Build #",
|
|
49
|
+
}).optional(),
|
|
50
|
+
lastBuildResult: healthResultString({
|
|
51
|
+
"x-chart-type": "text",
|
|
52
|
+
"x-chart-label": "Last Build Result",
|
|
53
|
+
}).optional(),
|
|
54
|
+
lastBuildDurationMs: healthResultNumber({
|
|
55
|
+
"x-chart-type": "line",
|
|
56
|
+
"x-chart-label": "Build Duration",
|
|
57
|
+
"x-chart-unit": "ms",
|
|
58
|
+
}).optional(),
|
|
59
|
+
lastBuildTimestamp: healthResultNumber({
|
|
60
|
+
"x-chart-type": "counter",
|
|
61
|
+
"x-chart-label": "Last Build Time",
|
|
62
|
+
}).optional(),
|
|
63
|
+
timeSinceLastBuildMs: healthResultNumber({
|
|
64
|
+
"x-chart-type": "line",
|
|
65
|
+
"x-chart-label": "Time Since Last Build",
|
|
66
|
+
"x-chart-unit": "ms",
|
|
67
|
+
}).optional(),
|
|
68
|
+
inQueue: healthResultBoolean({
|
|
69
|
+
"x-chart-type": "boolean",
|
|
70
|
+
"x-chart-label": "In Queue",
|
|
71
|
+
}),
|
|
72
|
+
color: healthResultString({
|
|
73
|
+
"x-chart-type": "text",
|
|
74
|
+
"x-chart-label": "Status Color",
|
|
75
|
+
}),
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
export type JobStatusResult = z.infer<typeof jobStatusResultSchema>;
|
|
79
|
+
|
|
80
|
+
const jobStatusAggregatedSchema = z.object({
|
|
81
|
+
avgBuildDurationMs: healthResultNumber({
|
|
82
|
+
"x-chart-type": "line",
|
|
83
|
+
"x-chart-label": "Avg Build Duration",
|
|
84
|
+
"x-chart-unit": "ms",
|
|
85
|
+
}),
|
|
86
|
+
successRate: healthResultNumber({
|
|
87
|
+
"x-chart-type": "gauge",
|
|
88
|
+
"x-chart-label": "Success Rate",
|
|
89
|
+
"x-chart-unit": "%",
|
|
90
|
+
}),
|
|
91
|
+
buildableRate: healthResultNumber({
|
|
92
|
+
"x-chart-type": "gauge",
|
|
93
|
+
"x-chart-label": "Enabled Rate",
|
|
94
|
+
"x-chart-unit": "%",
|
|
95
|
+
}),
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
export type JobStatusAggregatedResult = z.infer<
|
|
99
|
+
typeof jobStatusAggregatedSchema
|
|
100
|
+
>;
|
|
101
|
+
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// JOB STATUS COLLECTOR
|
|
104
|
+
// ============================================================================
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Collector for Jenkins job status.
|
|
108
|
+
* Monitors individual job health and last build information.
|
|
109
|
+
*/
|
|
110
|
+
export class JobStatusCollector
|
|
111
|
+
implements
|
|
112
|
+
CollectorStrategy<
|
|
113
|
+
JenkinsTransportClient,
|
|
114
|
+
JobStatusConfig,
|
|
115
|
+
JobStatusResult,
|
|
116
|
+
JobStatusAggregatedResult
|
|
117
|
+
>
|
|
118
|
+
{
|
|
119
|
+
id = "job-status";
|
|
120
|
+
displayName = "Job Status";
|
|
121
|
+
description = "Monitor Jenkins job status and last build information";
|
|
122
|
+
|
|
123
|
+
supportedPlugins = [pluginMetadata];
|
|
124
|
+
allowMultiple = true;
|
|
125
|
+
|
|
126
|
+
config = new Versioned({ version: 1, schema: jobStatusConfigSchema });
|
|
127
|
+
result = new Versioned({ version: 1, schema: jobStatusResultSchema });
|
|
128
|
+
aggregatedResult = new Versioned({
|
|
129
|
+
version: 1,
|
|
130
|
+
schema: jobStatusAggregatedSchema,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
async execute({
|
|
134
|
+
config,
|
|
135
|
+
client,
|
|
136
|
+
}: {
|
|
137
|
+
config: JobStatusConfig;
|
|
138
|
+
client: JenkinsTransportClient;
|
|
139
|
+
pluginId: string;
|
|
140
|
+
}): Promise<CollectorResult<JobStatusResult>> {
|
|
141
|
+
// Encode job path for URL (handle folders)
|
|
142
|
+
const jobPath = config.jobName
|
|
143
|
+
.split("/")
|
|
144
|
+
.map((part) => `job/${encodeURIComponent(part)}`)
|
|
145
|
+
.join("/");
|
|
146
|
+
|
|
147
|
+
const response = await client.exec({
|
|
148
|
+
path: `/${jobPath}/api/json`,
|
|
149
|
+
query: {
|
|
150
|
+
tree: "name,buildable,color,inQueue,lastBuild[number,result,duration,timestamp]",
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
if (response.error) {
|
|
155
|
+
return {
|
|
156
|
+
result: {
|
|
157
|
+
jobName: config.jobName,
|
|
158
|
+
buildable: false,
|
|
159
|
+
inQueue: false,
|
|
160
|
+
color: "notbuilt",
|
|
161
|
+
},
|
|
162
|
+
error: response.error,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const data = response.data as {
|
|
167
|
+
name?: string;
|
|
168
|
+
buildable?: boolean;
|
|
169
|
+
color?: string;
|
|
170
|
+
inQueue?: boolean;
|
|
171
|
+
lastBuild?: {
|
|
172
|
+
number?: number;
|
|
173
|
+
result?: string;
|
|
174
|
+
duration?: number;
|
|
175
|
+
timestamp?: number;
|
|
176
|
+
};
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const result: JobStatusResult = {
|
|
180
|
+
jobName: data.name || config.jobName,
|
|
181
|
+
buildable: data.buildable ?? true,
|
|
182
|
+
color: data.color || "notbuilt",
|
|
183
|
+
inQueue: data.inQueue ?? false,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
if (config.checkLastBuild && data.lastBuild) {
|
|
187
|
+
result.lastBuildNumber = data.lastBuild.number;
|
|
188
|
+
result.lastBuildResult = data.lastBuild.result || "UNKNOWN";
|
|
189
|
+
result.lastBuildDurationMs = data.lastBuild.duration;
|
|
190
|
+
result.lastBuildTimestamp = data.lastBuild.timestamp;
|
|
191
|
+
|
|
192
|
+
if (data.lastBuild.timestamp) {
|
|
193
|
+
result.timeSinceLastBuildMs = Date.now() - data.lastBuild.timestamp;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Determine if there's an error based on build result
|
|
198
|
+
const isFailure =
|
|
199
|
+
result.lastBuildResult === "FAILURE" ||
|
|
200
|
+
result.lastBuildResult === "ABORTED";
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
result,
|
|
204
|
+
error: isFailure ? `Last build: ${result.lastBuildResult}` : undefined,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
aggregateResult(
|
|
209
|
+
runs: HealthCheckRunForAggregation<JobStatusResult>[]
|
|
210
|
+
): JobStatusAggregatedResult {
|
|
211
|
+
const durations = runs
|
|
212
|
+
.map((r) => r.metadata?.lastBuildDurationMs)
|
|
213
|
+
.filter((v): v is number => typeof v === "number");
|
|
214
|
+
|
|
215
|
+
const results = runs
|
|
216
|
+
.map((r) => r.metadata?.lastBuildResult)
|
|
217
|
+
.filter((v): v is string => typeof v === "string");
|
|
218
|
+
|
|
219
|
+
const buildables = runs
|
|
220
|
+
.map((r) => r.metadata?.buildable)
|
|
221
|
+
.filter((v): v is boolean => typeof v === "boolean");
|
|
222
|
+
|
|
223
|
+
const successCount = results.filter((r) => r === "SUCCESS").length;
|
|
224
|
+
const buildableCount = buildables.filter(Boolean).length;
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
avgBuildDurationMs:
|
|
228
|
+
durations.length > 0
|
|
229
|
+
? Math.round(durations.reduce((a, b) => a + b, 0) / durations.length)
|
|
230
|
+
: 0,
|
|
231
|
+
successRate:
|
|
232
|
+
results.length > 0
|
|
233
|
+
? Math.round((successCount / results.length) * 100)
|
|
234
|
+
: 0,
|
|
235
|
+
buildableRate:
|
|
236
|
+
buildables.length > 0
|
|
237
|
+
? Math.round((buildableCount / buildables.length) * 100)
|
|
238
|
+
: 0,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { NodeHealthCollector } from "./node-health";
|
|
3
|
+
import type {
|
|
4
|
+
JenkinsTransportClient,
|
|
5
|
+
JenkinsRequest,
|
|
6
|
+
JenkinsResponse,
|
|
7
|
+
} from "../transport-client";
|
|
8
|
+
|
|
9
|
+
describe("NodeHealthCollector", () => {
|
|
10
|
+
const collector = new NodeHealthCollector();
|
|
11
|
+
|
|
12
|
+
it("should collect all nodes info", async () => {
|
|
13
|
+
const mockClient: JenkinsTransportClient = {
|
|
14
|
+
exec: async () => ({
|
|
15
|
+
statusCode: 200,
|
|
16
|
+
data: {
|
|
17
|
+
busyExecutors: 3,
|
|
18
|
+
totalExecutors: 10,
|
|
19
|
+
computer: [
|
|
20
|
+
{
|
|
21
|
+
displayName: "master",
|
|
22
|
+
offline: false,
|
|
23
|
+
numExecutors: 2,
|
|
24
|
+
idle: true,
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
displayName: "agent-1",
|
|
28
|
+
offline: false,
|
|
29
|
+
numExecutors: 4,
|
|
30
|
+
idle: false,
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
displayName: "agent-2",
|
|
34
|
+
offline: true,
|
|
35
|
+
numExecutors: 4,
|
|
36
|
+
idle: true,
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
}),
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const result = await collector.execute({
|
|
44
|
+
config: {},
|
|
45
|
+
client: mockClient,
|
|
46
|
+
pluginId: "healthcheck-jenkins",
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
expect(result.result.totalNodes).toBe(3);
|
|
50
|
+
expect(result.result.onlineNodes).toBe(2);
|
|
51
|
+
expect(result.result.offlineNodes).toBe(1);
|
|
52
|
+
expect(result.result.busyExecutors).toBe(3);
|
|
53
|
+
expect(result.result.totalExecutors).toBe(10);
|
|
54
|
+
expect(result.result.executorUtilization).toBe(30);
|
|
55
|
+
// Error for offline nodes
|
|
56
|
+
expect(result.error).toContain("1 of 3 nodes offline");
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("should collect single node info", async () => {
|
|
60
|
+
let capturedPath = "";
|
|
61
|
+
const mockClient: JenkinsTransportClient = {
|
|
62
|
+
exec: async (req: JenkinsRequest) => {
|
|
63
|
+
capturedPath = req.path;
|
|
64
|
+
return {
|
|
65
|
+
statusCode: 200,
|
|
66
|
+
data: {
|
|
67
|
+
displayName: "agent-1",
|
|
68
|
+
offline: false,
|
|
69
|
+
numExecutors: 4,
|
|
70
|
+
idle: false,
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
const result = await collector.execute({
|
|
77
|
+
config: { nodeName: "agent-1" },
|
|
78
|
+
client: mockClient,
|
|
79
|
+
pluginId: "healthcheck-jenkins",
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
expect(capturedPath).toContain("/computer/agent-1/api/json");
|
|
83
|
+
expect(result.result.totalNodes).toBe(1);
|
|
84
|
+
expect(result.result.onlineNodes).toBe(1);
|
|
85
|
+
expect(result.result.nodeDisplayName).toBe("agent-1");
|
|
86
|
+
expect(result.error).toBeUndefined();
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it("should report error for offline single node", async () => {
|
|
90
|
+
const mockClient: JenkinsTransportClient = {
|
|
91
|
+
exec: async () => ({
|
|
92
|
+
statusCode: 200,
|
|
93
|
+
data: {
|
|
94
|
+
displayName: "agent-1",
|
|
95
|
+
offline: true,
|
|
96
|
+
offlineCauseReason: "Connection lost",
|
|
97
|
+
numExecutors: 4,
|
|
98
|
+
},
|
|
99
|
+
}),
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const result = await collector.execute({
|
|
103
|
+
config: { nodeName: "agent-1" },
|
|
104
|
+
client: mockClient,
|
|
105
|
+
pluginId: "healthcheck-jenkins",
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
expect(result.result.offlineNodes).toBe(1);
|
|
109
|
+
expect(result.result.nodeOffline).toBe(true);
|
|
110
|
+
expect(result.error).toContain("Connection lost");
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("should aggregate correctly", () => {
|
|
114
|
+
const runs: Parameters<typeof collector.aggregateResult>[0] = [
|
|
115
|
+
{
|
|
116
|
+
status: "healthy" as const,
|
|
117
|
+
latencyMs: 100,
|
|
118
|
+
metadata: {
|
|
119
|
+
totalNodes: 6,
|
|
120
|
+
onlineNodes: 5,
|
|
121
|
+
offlineNodes: 1,
|
|
122
|
+
busyExecutors: 10,
|
|
123
|
+
idleExecutors: 10,
|
|
124
|
+
totalExecutors: 20,
|
|
125
|
+
executorUtilization: 50,
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
status: "healthy" as const,
|
|
130
|
+
latencyMs: 100,
|
|
131
|
+
metadata: {
|
|
132
|
+
totalNodes: 5,
|
|
133
|
+
onlineNodes: 3,
|
|
134
|
+
offlineNodes: 2,
|
|
135
|
+
busyExecutors: 14,
|
|
136
|
+
idleExecutors: 6,
|
|
137
|
+
totalExecutors: 20,
|
|
138
|
+
executorUtilization: 70,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
const aggregated = collector.aggregateResult(runs);
|
|
144
|
+
|
|
145
|
+
expect(aggregated.avgOnlineNodes).toBe(4);
|
|
146
|
+
expect(aggregated.avgUtilization).toBe(60);
|
|
147
|
+
expect(aggregated.minOnlineNodes).toBe(3);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Versioned,
|
|
3
|
+
z,
|
|
4
|
+
type HealthCheckRunForAggregation,
|
|
5
|
+
type CollectorResult,
|
|
6
|
+
type CollectorStrategy,
|
|
7
|
+
} from "@checkstack/backend-api";
|
|
8
|
+
import {
|
|
9
|
+
healthResultBoolean,
|
|
10
|
+
healthResultNumber,
|
|
11
|
+
healthResultString,
|
|
12
|
+
} from "@checkstack/healthcheck-common";
|
|
13
|
+
import { pluginMetadata } from "../plugin-metadata";
|
|
14
|
+
import type { JenkinsTransportClient } from "../transport-client";
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// CONFIGURATION SCHEMA
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
const nodeHealthConfigSchema = z.object({
|
|
21
|
+
nodeName: z
|
|
22
|
+
.string()
|
|
23
|
+
.optional()
|
|
24
|
+
.describe("Specific node name to check (leave empty for all nodes)"),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
export type NodeHealthConfig = z.infer<typeof nodeHealthConfigSchema>;
|
|
28
|
+
|
|
29
|
+
// ============================================================================
|
|
30
|
+
// RESULT SCHEMAS
|
|
31
|
+
// ============================================================================
|
|
32
|
+
|
|
33
|
+
const nodeHealthResultSchema = z.object({
|
|
34
|
+
totalNodes: healthResultNumber({
|
|
35
|
+
"x-chart-type": "counter",
|
|
36
|
+
"x-chart-label": "Total Nodes",
|
|
37
|
+
}),
|
|
38
|
+
onlineNodes: healthResultNumber({
|
|
39
|
+
"x-chart-type": "counter",
|
|
40
|
+
"x-chart-label": "Online Nodes",
|
|
41
|
+
}),
|
|
42
|
+
offlineNodes: healthResultNumber({
|
|
43
|
+
"x-chart-type": "counter",
|
|
44
|
+
"x-chart-label": "Offline Nodes",
|
|
45
|
+
}),
|
|
46
|
+
busyExecutors: healthResultNumber({
|
|
47
|
+
"x-chart-type": "counter",
|
|
48
|
+
"x-chart-label": "Busy Executors",
|
|
49
|
+
}),
|
|
50
|
+
idleExecutors: healthResultNumber({
|
|
51
|
+
"x-chart-type": "counter",
|
|
52
|
+
"x-chart-label": "Idle Executors",
|
|
53
|
+
}),
|
|
54
|
+
totalExecutors: healthResultNumber({
|
|
55
|
+
"x-chart-type": "counter",
|
|
56
|
+
"x-chart-label": "Total Executors",
|
|
57
|
+
}),
|
|
58
|
+
executorUtilization: healthResultNumber({
|
|
59
|
+
"x-chart-type": "gauge",
|
|
60
|
+
"x-chart-label": "Executor Utilization",
|
|
61
|
+
"x-chart-unit": "%",
|
|
62
|
+
}),
|
|
63
|
+
// For single node mode
|
|
64
|
+
nodeDisplayName: healthResultString({
|
|
65
|
+
"x-chart-type": "text",
|
|
66
|
+
"x-chart-label": "Node Name",
|
|
67
|
+
}).optional(),
|
|
68
|
+
nodeOffline: healthResultBoolean({
|
|
69
|
+
"x-chart-type": "boolean",
|
|
70
|
+
"x-chart-label": "Node Offline",
|
|
71
|
+
}).optional(),
|
|
72
|
+
nodeOfflineReason: healthResultString({
|
|
73
|
+
"x-chart-type": "text",
|
|
74
|
+
"x-chart-label": "Offline Reason",
|
|
75
|
+
}).optional(),
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
export type NodeHealthResult = z.infer<typeof nodeHealthResultSchema>;
|
|
79
|
+
|
|
80
|
+
const nodeHealthAggregatedSchema = z.object({
|
|
81
|
+
avgOnlineNodes: healthResultNumber({
|
|
82
|
+
"x-chart-type": "line",
|
|
83
|
+
"x-chart-label": "Avg Online Nodes",
|
|
84
|
+
}),
|
|
85
|
+
avgUtilization: healthResultNumber({
|
|
86
|
+
"x-chart-type": "gauge",
|
|
87
|
+
"x-chart-label": "Avg Utilization",
|
|
88
|
+
"x-chart-unit": "%",
|
|
89
|
+
}),
|
|
90
|
+
minOnlineNodes: healthResultNumber({
|
|
91
|
+
"x-chart-type": "line",
|
|
92
|
+
"x-chart-label": "Min Online Nodes",
|
|
93
|
+
}),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
export type NodeHealthAggregatedResult = z.infer<
|
|
97
|
+
typeof nodeHealthAggregatedSchema
|
|
98
|
+
>;
|
|
99
|
+
|
|
100
|
+
// ============================================================================
|
|
101
|
+
// NODE HEALTH COLLECTOR
|
|
102
|
+
// ============================================================================
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Collector for Jenkins node/agent health.
|
|
106
|
+
* Monitors node availability and executor utilization.
|
|
107
|
+
*/
|
|
108
|
+
export class NodeHealthCollector
|
|
109
|
+
implements
|
|
110
|
+
CollectorStrategy<
|
|
111
|
+
JenkinsTransportClient,
|
|
112
|
+
NodeHealthConfig,
|
|
113
|
+
NodeHealthResult,
|
|
114
|
+
NodeHealthAggregatedResult
|
|
115
|
+
>
|
|
116
|
+
{
|
|
117
|
+
id = "node-health";
|
|
118
|
+
displayName = "Node Health";
|
|
119
|
+
description = "Monitor Jenkins agent/node availability and executor usage";
|
|
120
|
+
|
|
121
|
+
supportedPlugins = [pluginMetadata];
|
|
122
|
+
allowMultiple = true;
|
|
123
|
+
|
|
124
|
+
config = new Versioned({ version: 1, schema: nodeHealthConfigSchema });
|
|
125
|
+
result = new Versioned({ version: 1, schema: nodeHealthResultSchema });
|
|
126
|
+
aggregatedResult = new Versioned({
|
|
127
|
+
version: 1,
|
|
128
|
+
schema: nodeHealthAggregatedSchema,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
async execute({
|
|
132
|
+
config,
|
|
133
|
+
client,
|
|
134
|
+
}: {
|
|
135
|
+
config: NodeHealthConfig;
|
|
136
|
+
client: JenkinsTransportClient;
|
|
137
|
+
pluginId: string;
|
|
138
|
+
}): Promise<CollectorResult<NodeHealthResult>> {
|
|
139
|
+
// If checking a specific node
|
|
140
|
+
if (config.nodeName) {
|
|
141
|
+
return this.executeForSingleNode(config.nodeName, client);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Otherwise, get all nodes
|
|
145
|
+
return this.executeForAllNodes(client);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private async executeForSingleNode(
|
|
149
|
+
nodeName: string,
|
|
150
|
+
client: JenkinsTransportClient
|
|
151
|
+
): Promise<CollectorResult<NodeHealthResult>> {
|
|
152
|
+
const encodedName = encodeURIComponent(nodeName);
|
|
153
|
+
const response = await client.exec({
|
|
154
|
+
path: `/computer/${encodedName}/api/json`,
|
|
155
|
+
query: {
|
|
156
|
+
tree: "displayName,offline,offlineCauseReason,numExecutors,idle,temporarilyOffline",
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
if (response.error) {
|
|
161
|
+
return {
|
|
162
|
+
result: {
|
|
163
|
+
totalNodes: 0,
|
|
164
|
+
onlineNodes: 0,
|
|
165
|
+
offlineNodes: 0,
|
|
166
|
+
busyExecutors: 0,
|
|
167
|
+
idleExecutors: 0,
|
|
168
|
+
totalExecutors: 0,
|
|
169
|
+
executorUtilization: 0,
|
|
170
|
+
},
|
|
171
|
+
error: response.error,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const data = response.data as {
|
|
176
|
+
displayName?: string;
|
|
177
|
+
offline?: boolean;
|
|
178
|
+
offlineCauseReason?: string;
|
|
179
|
+
numExecutors?: number;
|
|
180
|
+
idle?: boolean;
|
|
181
|
+
temporarilyOffline?: boolean;
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
const isOffline = data.offline ?? false;
|
|
185
|
+
const numExecutors = data.numExecutors ?? 0;
|
|
186
|
+
const busyExecutors = isOffline ? 0 : data.idle ? 0 : numExecutors;
|
|
187
|
+
const idleExecutors = numExecutors - busyExecutors;
|
|
188
|
+
|
|
189
|
+
const result: NodeHealthResult = {
|
|
190
|
+
totalNodes: 1,
|
|
191
|
+
onlineNodes: isOffline ? 0 : 1,
|
|
192
|
+
offlineNodes: isOffline ? 1 : 0,
|
|
193
|
+
busyExecutors,
|
|
194
|
+
idleExecutors,
|
|
195
|
+
totalExecutors: numExecutors,
|
|
196
|
+
executorUtilization:
|
|
197
|
+
numExecutors > 0 ? Math.round((busyExecutors / numExecutors) * 100) : 0,
|
|
198
|
+
nodeDisplayName: data.displayName,
|
|
199
|
+
nodeOffline: isOffline,
|
|
200
|
+
nodeOfflineReason: data.offlineCauseReason,
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
result,
|
|
205
|
+
error: isOffline
|
|
206
|
+
? `Node offline: ${data.offlineCauseReason || "Unknown reason"}`
|
|
207
|
+
: undefined,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
private async executeForAllNodes(
|
|
212
|
+
client: JenkinsTransportClient
|
|
213
|
+
): Promise<CollectorResult<NodeHealthResult>> {
|
|
214
|
+
const response = await client.exec({
|
|
215
|
+
path: "/computer/api/json",
|
|
216
|
+
query: {
|
|
217
|
+
tree: "busyExecutors,computer[displayName,offline,numExecutors,idle],totalExecutors",
|
|
218
|
+
},
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
if (response.error) {
|
|
222
|
+
return {
|
|
223
|
+
result: {
|
|
224
|
+
totalNodes: 0,
|
|
225
|
+
onlineNodes: 0,
|
|
226
|
+
offlineNodes: 0,
|
|
227
|
+
busyExecutors: 0,
|
|
228
|
+
idleExecutors: 0,
|
|
229
|
+
totalExecutors: 0,
|
|
230
|
+
executorUtilization: 0,
|
|
231
|
+
},
|
|
232
|
+
error: response.error,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const data = response.data as {
|
|
237
|
+
busyExecutors?: number;
|
|
238
|
+
totalExecutors?: number;
|
|
239
|
+
computer?: Array<{
|
|
240
|
+
displayName?: string;
|
|
241
|
+
offline?: boolean;
|
|
242
|
+
numExecutors?: number;
|
|
243
|
+
idle?: boolean;
|
|
244
|
+
}>;
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const nodes = data.computer || [];
|
|
248
|
+
const onlineNodes = nodes.filter((n) => !n.offline).length;
|
|
249
|
+
const offlineNodes = nodes.filter((n) => n.offline).length;
|
|
250
|
+
const totalExecutors = data.totalExecutors ?? 0;
|
|
251
|
+
const busyExecutors = data.busyExecutors ?? 0;
|
|
252
|
+
const idleExecutors = totalExecutors - busyExecutors;
|
|
253
|
+
|
|
254
|
+
const result: NodeHealthResult = {
|
|
255
|
+
totalNodes: nodes.length,
|
|
256
|
+
onlineNodes,
|
|
257
|
+
offlineNodes,
|
|
258
|
+
busyExecutors,
|
|
259
|
+
idleExecutors,
|
|
260
|
+
totalExecutors,
|
|
261
|
+
executorUtilization:
|
|
262
|
+
totalExecutors > 0
|
|
263
|
+
? Math.round((busyExecutors / totalExecutors) * 100)
|
|
264
|
+
: 0,
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
// Warn if nodes are offline
|
|
268
|
+
const hasIssue = offlineNodes > 0;
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
result,
|
|
272
|
+
error: hasIssue
|
|
273
|
+
? `${offlineNodes} of ${nodes.length} nodes offline`
|
|
274
|
+
: undefined,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
aggregateResult(
|
|
279
|
+
runs: HealthCheckRunForAggregation<NodeHealthResult>[]
|
|
280
|
+
): NodeHealthAggregatedResult {
|
|
281
|
+
const onlineNodes = runs
|
|
282
|
+
.map((r) => r.metadata?.onlineNodes)
|
|
283
|
+
.filter((v): v is number => typeof v === "number");
|
|
284
|
+
|
|
285
|
+
const utilizations = runs
|
|
286
|
+
.map((r) => r.metadata?.executorUtilization)
|
|
287
|
+
.filter((v): v is number => typeof v === "number");
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
avgOnlineNodes:
|
|
291
|
+
onlineNodes.length > 0
|
|
292
|
+
? Math.round(
|
|
293
|
+
onlineNodes.reduce((a, b) => a + b, 0) / onlineNodes.length
|
|
294
|
+
)
|
|
295
|
+
: 0,
|
|
296
|
+
avgUtilization:
|
|
297
|
+
utilizations.length > 0
|
|
298
|
+
? Math.round(
|
|
299
|
+
utilizations.reduce((a, b) => a + b, 0) / utilizations.length
|
|
300
|
+
)
|
|
301
|
+
: 0,
|
|
302
|
+
minOnlineNodes: onlineNodes.length > 0 ? Math.min(...onlineNodes) : 0,
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
}
|