@aztec/end-to-end 0.0.1-commit.bf2612ae → 0.0.1-commit.c2595eba
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dest/e2e_epochs/epochs_test.d.ts +7 -1
- package/dest/e2e_epochs/epochs_test.d.ts.map +1 -1
- package/dest/e2e_epochs/epochs_test.js +28 -9
- package/dest/e2e_l1_publisher/write_json.d.ts +3 -2
- package/dest/e2e_l1_publisher/write_json.d.ts.map +1 -1
- package/dest/e2e_l1_publisher/write_json.js +1 -7
- package/dest/e2e_p2p/shared.d.ts +1 -1
- package/dest/e2e_p2p/shared.d.ts.map +1 -1
- package/dest/e2e_p2p/shared.js +2 -2
- package/dest/fixtures/e2e_prover_test.js +1 -1
- package/dest/fixtures/setup.d.ts +3 -3
- package/dest/fixtures/setup.d.ts.map +1 -1
- package/dest/fixtures/setup.js +20 -15
- package/dest/fixtures/setup_p2p_test.d.ts +4 -5
- package/dest/fixtures/setup_p2p_test.d.ts.map +1 -1
- package/dest/fixtures/setup_p2p_test.js +24 -19
- package/dest/spartan/tx_metrics.d.ts +35 -1
- package/dest/spartan/tx_metrics.d.ts.map +1 -1
- package/dest/spartan/tx_metrics.js +150 -0
- package/dest/spartan/utils/index.d.ts +3 -3
- package/dest/spartan/utils/index.d.ts.map +1 -1
- package/dest/spartan/utils/index.js +2 -2
- package/dest/spartan/utils/k8s.d.ts +29 -1
- package/dest/spartan/utils/k8s.d.ts.map +1 -1
- package/dest/spartan/utils/k8s.js +118 -0
- package/dest/spartan/utils/nodes.d.ts +11 -1
- package/dest/spartan/utils/nodes.d.ts.map +1 -1
- package/dest/spartan/utils/nodes.js +192 -27
- package/package.json +39 -39
- package/src/e2e_epochs/epochs_test.ts +31 -10
- package/src/e2e_l1_publisher/write_json.ts +1 -6
- package/src/e2e_p2p/shared.ts +10 -2
- package/src/fixtures/e2e_prover_test.ts +1 -1
- package/src/fixtures/setup.ts +13 -13
- package/src/fixtures/setup_p2p_test.ts +15 -20
- package/src/spartan/tx_metrics.ts +126 -0
- package/src/spartan/utils/index.ts +2 -0
- package/src/spartan/utils/k8s.ts +152 -0
- package/src/spartan/utils/nodes.ts +236 -24
|
@@ -4,14 +4,13 @@
|
|
|
4
4
|
import { type AztecNodeConfig, AztecNodeService } from '@aztec/aztec-node';
|
|
5
5
|
import { range } from '@aztec/foundation/array';
|
|
6
6
|
import { SecretValue } from '@aztec/foundation/config';
|
|
7
|
-
import {
|
|
7
|
+
import { withLoggerBindings } from '@aztec/foundation/log/server';
|
|
8
8
|
import { bufferToHex } from '@aztec/foundation/string';
|
|
9
9
|
import type { DateProvider } from '@aztec/foundation/timer';
|
|
10
10
|
import type { ProverNodeConfig, ProverNodeDeps } from '@aztec/prover-node';
|
|
11
11
|
import type { PublicDataTreeLeaf } from '@aztec/stdlib/trees';
|
|
12
12
|
|
|
13
13
|
import getPort from 'get-port';
|
|
14
|
-
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
15
14
|
|
|
16
15
|
import { TEST_PEER_CHECK_INTERVAL_MS } from './fixtures.js';
|
|
17
16
|
import { createAndSyncProverNode, getPrivateKeyFromIndex } from './utils.js';
|
|
@@ -22,6 +21,11 @@ import { getEndToEndTestTelemetryClient } from './with_telemetry_utils.js';
|
|
|
22
21
|
// to avoid running validators with the same key
|
|
23
22
|
export const ATTESTER_PRIVATE_KEYS_START_INDEX = 3;
|
|
24
23
|
|
|
24
|
+
// Global counters for actor naming (start at 1)
|
|
25
|
+
let validatorCounter = 1;
|
|
26
|
+
let nodeCounter = 1;
|
|
27
|
+
let proverCounter = 1;
|
|
28
|
+
|
|
25
29
|
export function generatePrivateKeys(startIndex: number, numberOfKeys: number): `0x${string}`[] {
|
|
26
30
|
const privateKeys: `0x${string}`[] = [];
|
|
27
31
|
// Do not start from 0 as it is used during setup
|
|
@@ -44,10 +48,6 @@ export async function createNodes(
|
|
|
44
48
|
validatorsPerNode = 1,
|
|
45
49
|
): Promise<AztecNodeService[]> {
|
|
46
50
|
const nodePromises: Promise<AztecNodeService>[] = [];
|
|
47
|
-
const loggerIdStorage = new AsyncLocalStorage<string>();
|
|
48
|
-
const logNameHandler = (module: string) =>
|
|
49
|
-
loggerIdStorage.getStore() ? `${module}:${loggerIdStorage.getStore()}` : module;
|
|
50
|
-
addLogNameHandler(logNameHandler);
|
|
51
51
|
|
|
52
52
|
for (let i = 0; i < numNodes; i++) {
|
|
53
53
|
const index = indexOffset + i;
|
|
@@ -69,7 +69,6 @@ export async function createNodes(
|
|
|
69
69
|
prefilledPublicData,
|
|
70
70
|
dataDir,
|
|
71
71
|
metricsPort,
|
|
72
|
-
loggerIdStorage,
|
|
73
72
|
);
|
|
74
73
|
nodePromises.push(nodePromise);
|
|
75
74
|
}
|
|
@@ -81,7 +80,6 @@ export async function createNodes(
|
|
|
81
80
|
throw new Error('Sequencer not found');
|
|
82
81
|
}
|
|
83
82
|
|
|
84
|
-
removeLogNameHandler(logNameHandler);
|
|
85
83
|
return nodes;
|
|
86
84
|
}
|
|
87
85
|
|
|
@@ -95,9 +93,9 @@ export async function createNode(
|
|
|
95
93
|
prefilledPublicData?: PublicDataTreeLeaf[],
|
|
96
94
|
dataDirectory?: string,
|
|
97
95
|
metricsPort?: number,
|
|
98
|
-
loggerIdStorage?: AsyncLocalStorage<string>,
|
|
99
96
|
) {
|
|
100
|
-
const
|
|
97
|
+
const actorIndex = validatorCounter++;
|
|
98
|
+
return await withLoggerBindings({ actor: `validator-${actorIndex}` }, async () => {
|
|
101
99
|
const validatorConfig = await createValidatorConfig(config, bootstrapNode, tcpPort, addressIndex, dataDirectory);
|
|
102
100
|
const telemetry = await getEndToEndTestTelemetryClient(metricsPort);
|
|
103
101
|
return await AztecNodeService.createAndSync(
|
|
@@ -105,8 +103,7 @@ export async function createNode(
|
|
|
105
103
|
{ telemetry, dateProvider },
|
|
106
104
|
{ prefilledPublicData, dontStartSequencer: config.dontStartSequencer },
|
|
107
105
|
);
|
|
108
|
-
};
|
|
109
|
-
return loggerIdStorage ? await loggerIdStorage.run(tcpPort.toString(), createNode) : createNode();
|
|
106
|
+
});
|
|
110
107
|
}
|
|
111
108
|
|
|
112
109
|
/** Creates a P2P enabled instance of Aztec Node Service without a validator */
|
|
@@ -118,9 +115,9 @@ export async function createNonValidatorNode(
|
|
|
118
115
|
prefilledPublicData?: PublicDataTreeLeaf[],
|
|
119
116
|
dataDirectory?: string,
|
|
120
117
|
metricsPort?: number,
|
|
121
|
-
loggerIdStorage?: AsyncLocalStorage<string>,
|
|
122
118
|
) {
|
|
123
|
-
const
|
|
119
|
+
const actorIndex = nodeCounter++;
|
|
120
|
+
return await withLoggerBindings({ actor: `node-${actorIndex}` }, async () => {
|
|
124
121
|
const p2pConfig = await createP2PConfig(baseConfig, bootstrapNode, tcpPort, dataDirectory);
|
|
125
122
|
const config: AztecNodeConfig = {
|
|
126
123
|
...p2pConfig,
|
|
@@ -130,8 +127,7 @@ export async function createNonValidatorNode(
|
|
|
130
127
|
};
|
|
131
128
|
const telemetry = await getEndToEndTestTelemetryClient(metricsPort);
|
|
132
129
|
return await AztecNodeService.createAndSync(config, { telemetry, dateProvider }, { prefilledPublicData });
|
|
133
|
-
};
|
|
134
|
-
return loggerIdStorage ? await loggerIdStorage.run(tcpPort.toString(), createNode) : createNode();
|
|
130
|
+
});
|
|
135
131
|
}
|
|
136
132
|
|
|
137
133
|
export async function createProverNode(
|
|
@@ -143,9 +139,9 @@ export async function createProverNode(
|
|
|
143
139
|
prefilledPublicData?: PublicDataTreeLeaf[],
|
|
144
140
|
dataDirectory?: string,
|
|
145
141
|
metricsPort?: number,
|
|
146
|
-
loggerIdStorage?: AsyncLocalStorage<string>,
|
|
147
142
|
) {
|
|
148
|
-
const
|
|
143
|
+
const actorIndex = proverCounter++;
|
|
144
|
+
return await withLoggerBindings({ actor: `prover-${actorIndex}` }, async () => {
|
|
149
145
|
const proverNodePrivateKey = getPrivateKeyFromIndex(ATTESTER_PRIVATE_KEYS_START_INDEX + addressIndex)!;
|
|
150
146
|
const telemetry = await getEndToEndTestTelemetryClient(metricsPort);
|
|
151
147
|
|
|
@@ -165,8 +161,7 @@ export async function createProverNode(
|
|
|
165
161
|
prefilledPublicData,
|
|
166
162
|
{ ...proverNodeDeps, telemetry },
|
|
167
163
|
);
|
|
168
|
-
};
|
|
169
|
-
return loggerIdStorage ? await loggerIdStorage.run(tcpPort.toString(), createProverNode) : createProverNode();
|
|
164
|
+
});
|
|
170
165
|
}
|
|
171
166
|
|
|
172
167
|
export async function createP2PConfig(
|
|
@@ -6,6 +6,132 @@ import { Tx, type TxReceipt } from '@aztec/stdlib/tx';
|
|
|
6
6
|
|
|
7
7
|
import { createHistogram } from 'perf_hooks';
|
|
8
8
|
|
|
9
|
+
/** Metrics class for proving-related benchmarks. */
|
|
10
|
+
export class ProvingMetrics {
|
|
11
|
+
private successfulTxs: number | undefined;
|
|
12
|
+
private proofDuration: number | undefined;
|
|
13
|
+
private activeAgents: number | undefined;
|
|
14
|
+
private avgQueueTime: number | undefined;
|
|
15
|
+
private jobRetries: number | undefined;
|
|
16
|
+
private jobDuration: number | undefined;
|
|
17
|
+
private timedOutJobs: number | undefined;
|
|
18
|
+
private resolvedJobs: number | undefined;
|
|
19
|
+
private rejectedJobs: number | undefined;
|
|
20
|
+
private epochProvingDuration: number | undefined;
|
|
21
|
+
private provenTransactions: number | undefined;
|
|
22
|
+
private provenBlocks: number | undefined;
|
|
23
|
+
|
|
24
|
+
constructor(private prefix: string) {}
|
|
25
|
+
|
|
26
|
+
recordSuccessfulTxs(count: number): void {
|
|
27
|
+
this.successfulTxs = count;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
recordProofDuration(seconds: number): void {
|
|
31
|
+
this.proofDuration = seconds;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
recordActiveAgents(count: number): void {
|
|
35
|
+
this.activeAgents = count;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
recordAvgQueueTime(ms: number): void {
|
|
39
|
+
this.avgQueueTime = ms;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
recordJobRetries(count: number): void {
|
|
43
|
+
this.jobRetries = count;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
recordJobDuration(ms: number): void {
|
|
47
|
+
this.jobDuration = ms;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
recordTimedOutJobs(count: number): void {
|
|
51
|
+
this.timedOutJobs = count;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
recordResolvedJobs(count: number): void {
|
|
55
|
+
this.resolvedJobs = count;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
recordRejectedJobs(count: number): void {
|
|
59
|
+
this.rejectedJobs = count;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
recordEpochProvingDuration(seconds: number): void {
|
|
63
|
+
this.epochProvingDuration = seconds;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
recordProvenTransactions(count: number): void {
|
|
67
|
+
this.provenTransactions = count;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
recordProvenBlocks(count: number): void {
|
|
71
|
+
this.provenBlocks = count;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
toGithubActionBenchmarkJSON(): Array<{ name: string; unit: string; value: number }> {
|
|
75
|
+
const data: Array<{ name: string; unit: string; value: number }> = [];
|
|
76
|
+
|
|
77
|
+
if (this.successfulTxs !== undefined) {
|
|
78
|
+
data.push({ name: `${this.prefix}/successful_txs`, unit: 'count', value: this.successfulTxs });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (this.proofDuration !== undefined) {
|
|
82
|
+
data.push({ name: `${this.prefix}/proof_duration`, unit: 's', value: this.proofDuration });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (this.activeAgents !== undefined) {
|
|
86
|
+
data.push({ name: `${this.prefix}/active_agents`, unit: 'count', value: this.activeAgents });
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (this.avgQueueTime !== undefined) {
|
|
90
|
+
data.push({ name: `${this.prefix}/avg_queue_time`, unit: 'ms', value: this.avgQueueTime });
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (this.jobRetries !== undefined) {
|
|
94
|
+
data.push({ name: `${this.prefix}/job_retries`, unit: 'count', value: this.jobRetries });
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (this.jobDuration !== undefined) {
|
|
98
|
+
data.push({ name: `${this.prefix}/job_duration`, unit: 'ms', value: this.jobDuration });
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (this.timedOutJobs !== undefined) {
|
|
102
|
+
data.push({ name: `${this.prefix}/timed_out_jobs`, unit: 'count', value: this.timedOutJobs });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (this.resolvedJobs !== undefined) {
|
|
106
|
+
data.push({ name: `${this.prefix}/resolved_jobs`, unit: 'count', value: this.resolvedJobs });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (this.rejectedJobs !== undefined) {
|
|
110
|
+
data.push({ name: `${this.prefix}/rejected_jobs`, unit: 'count', value: this.rejectedJobs });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (this.epochProvingDuration !== undefined) {
|
|
114
|
+
data.push({ name: `${this.prefix}/epoch_proving_duration`, unit: 's', value: this.epochProvingDuration });
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (this.provenTransactions !== undefined) {
|
|
118
|
+
data.push({ name: `${this.prefix}/proven_transactions`, unit: 'count', value: this.provenTransactions });
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (this.provenBlocks !== undefined) {
|
|
122
|
+
data.push({ name: `${this.prefix}/proven_blocks`, unit: 'count', value: this.provenBlocks });
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const scenario = process.env.BENCH_SCENARIO?.trim();
|
|
126
|
+
if (!scenario) {
|
|
127
|
+
return data;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const scenarioPrefix = `scenario/${scenario}/`;
|
|
131
|
+
return data.map(entry => ({ ...entry, name: `${scenarioPrefix}${entry.name}` }));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
9
135
|
export type TxInclusionData = {
|
|
10
136
|
txHash: string;
|
|
11
137
|
sentAt: number;
|
|
@@ -24,6 +24,7 @@ export {
|
|
|
24
24
|
getServiceEndpoint,
|
|
25
25
|
getRPCEndpoint,
|
|
26
26
|
getEthereumEndpoint,
|
|
27
|
+
createResilientPrometheusConnection,
|
|
27
28
|
} from './k8s.js';
|
|
28
29
|
|
|
29
30
|
// Chaos Mesh
|
|
@@ -45,6 +46,7 @@ export { restartBot, installTransferBot, uninstallTransferBot } from './bot.js';
|
|
|
45
46
|
// Node operations (sequencers, validators, pods)
|
|
46
47
|
export {
|
|
47
48
|
awaitCheckpointNumber,
|
|
49
|
+
waitForProvenToAdvance,
|
|
48
50
|
getSequencers,
|
|
49
51
|
updateSequencersConfig,
|
|
50
52
|
getSequencersConfig,
|
package/src/spartan/utils/k8s.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { createLogger } from '@aztec/aztec.js/log';
|
|
2
|
+
import type { Logger } from '@aztec/foundation/log';
|
|
2
3
|
import { promiseWithResolvers } from '@aztec/foundation/promise';
|
|
3
4
|
import { retryUntil } from '@aztec/foundation/retry';
|
|
4
5
|
|
|
@@ -6,6 +7,8 @@ import { type ChildProcess, exec, spawn } from 'child_process';
|
|
|
6
7
|
import path from 'path';
|
|
7
8
|
import { promisify } from 'util';
|
|
8
9
|
|
|
10
|
+
import { AlertTriggeredError, GrafanaClient } from '../../quality_of_service/grafana_client.js';
|
|
11
|
+
|
|
9
12
|
const execAsync = promisify(exec);
|
|
10
13
|
|
|
11
14
|
const logger = createLogger('e2e:k8s-utils');
|
|
@@ -370,6 +373,155 @@ export async function waitForResourcesByName({
|
|
|
370
373
|
);
|
|
371
374
|
}
|
|
372
375
|
|
|
376
|
+
/**
|
|
377
|
+
* Waits for all StatefulSets matching a label to have all their replicas ready.
|
|
378
|
+
*
|
|
379
|
+
* @param namespace - Kubernetes namespace
|
|
380
|
+
* @param label - Label selector for StatefulSets (e.g., "app.kubernetes.io/component=sequencer-node")
|
|
381
|
+
* @param timeoutSeconds - Maximum time to wait in seconds
|
|
382
|
+
* @param pollIntervalSeconds - How often to check status
|
|
383
|
+
*/
|
|
384
|
+
export async function waitForStatefulSetsReady({
|
|
385
|
+
namespace,
|
|
386
|
+
label,
|
|
387
|
+
timeoutSeconds = 600,
|
|
388
|
+
pollIntervalSeconds = 5,
|
|
389
|
+
}: {
|
|
390
|
+
namespace: string;
|
|
391
|
+
label: string;
|
|
392
|
+
timeoutSeconds?: number;
|
|
393
|
+
pollIntervalSeconds?: number;
|
|
394
|
+
}): Promise<void> {
|
|
395
|
+
logger.info(`Waiting for StatefulSets with label ${label} to have all replicas ready (timeout: ${timeoutSeconds}s)`);
|
|
396
|
+
|
|
397
|
+
await retryUntil(
|
|
398
|
+
async () => {
|
|
399
|
+
// Get all StatefulSets matching the label
|
|
400
|
+
const getCmd = `kubectl get statefulset -l ${label} -n ${namespace} -o json`;
|
|
401
|
+
const { stdout } = await execAsync(getCmd);
|
|
402
|
+
const result = JSON.parse(stdout);
|
|
403
|
+
|
|
404
|
+
if (!result.items || result.items.length === 0) {
|
|
405
|
+
logger.verbose(`No StatefulSets found with label ${label}`);
|
|
406
|
+
return false;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Check each StatefulSet
|
|
410
|
+
for (const sts of result.items) {
|
|
411
|
+
const name = sts.metadata.name;
|
|
412
|
+
const desired = sts.spec.replicas ?? 0;
|
|
413
|
+
const ready = sts.status.readyReplicas ?? 0;
|
|
414
|
+
const updated = sts.status.updatedReplicas ?? 0;
|
|
415
|
+
|
|
416
|
+
if (ready < desired || updated < desired) {
|
|
417
|
+
logger.verbose(`StatefulSet ${name}: ${ready}/${desired} ready, ${updated}/${desired} updated`);
|
|
418
|
+
return false;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
logger.info(`All StatefulSets with label ${label} are ready`);
|
|
423
|
+
return true;
|
|
424
|
+
},
|
|
425
|
+
`StatefulSets with label ${label} to be ready`,
|
|
426
|
+
timeoutSeconds,
|
|
427
|
+
pollIntervalSeconds,
|
|
428
|
+
);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Creates a Prometheus connection that can re-establish port-forward on failure.
|
|
433
|
+
* Returns functions to connect and run alert checks that automatically reconnect if needed.
|
|
434
|
+
*
|
|
435
|
+
* @param namespace - K8s namespace to fall back to if metrics namespace doesn't have Prometheus
|
|
436
|
+
* @param endpoints - Array to track created endpoints for cleanup
|
|
437
|
+
* @param log - Logger instance
|
|
438
|
+
*/
|
|
439
|
+
export function createResilientPrometheusConnection(
|
|
440
|
+
namespace: string,
|
|
441
|
+
endpoints: ServiceEndpoint[],
|
|
442
|
+
log: Logger,
|
|
443
|
+
): {
|
|
444
|
+
connect: () => Promise<GrafanaClient>;
|
|
445
|
+
runAlertCheck: (alerts: Parameters<GrafanaClient['runAlertCheck']>[0]) => Promise<void>;
|
|
446
|
+
} {
|
|
447
|
+
let alertChecker: GrafanaClient | undefined;
|
|
448
|
+
let currentEndpoint: ServiceEndpoint | undefined;
|
|
449
|
+
|
|
450
|
+
const connect = async (): Promise<GrafanaClient> => {
|
|
451
|
+
// Kill existing connection if any
|
|
452
|
+
if (currentEndpoint?.process) {
|
|
453
|
+
currentEndpoint.process.kill();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Try metrics namespace first, then network namespace
|
|
457
|
+
let promPort = 0;
|
|
458
|
+
let promUrl = '';
|
|
459
|
+
let promProc: ChildProcess | undefined;
|
|
460
|
+
|
|
461
|
+
try {
|
|
462
|
+
const metricsResult = await startPortForward({
|
|
463
|
+
resource: `svc/metrics-prometheus-server`,
|
|
464
|
+
namespace: 'metrics',
|
|
465
|
+
containerPort: 80,
|
|
466
|
+
});
|
|
467
|
+
promProc = metricsResult.process;
|
|
468
|
+
promPort = metricsResult.port;
|
|
469
|
+
promUrl = `http://127.0.0.1:${promPort}/api/v1`;
|
|
470
|
+
} catch {
|
|
471
|
+
// Metrics namespace might not have Prometheus, try network namespace
|
|
472
|
+
log.verbose('Metrics namespace Prometheus not available, trying network namespace');
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (promPort === 0) {
|
|
476
|
+
const nsResult = await startPortForward({
|
|
477
|
+
resource: `svc/prometheus-server`,
|
|
478
|
+
namespace,
|
|
479
|
+
containerPort: 80,
|
|
480
|
+
});
|
|
481
|
+
promProc = nsResult.process;
|
|
482
|
+
promPort = nsResult.port;
|
|
483
|
+
promUrl = `http://127.0.0.1:${promPort}/api/v1`;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
if (!promProc || promPort === 0) {
|
|
487
|
+
throw new Error('Unable to port-forward to Prometheus');
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
currentEndpoint = { url: promUrl, process: promProc };
|
|
491
|
+
endpoints.push(currentEndpoint);
|
|
492
|
+
alertChecker = new GrafanaClient(log, { grafanaEndpoint: promUrl, grafanaCredentials: '' });
|
|
493
|
+
log.info(`Established Prometheus connection at ${promUrl}`);
|
|
494
|
+
return alertChecker;
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
const runAlertCheck = async (alerts: Parameters<GrafanaClient['runAlertCheck']>[0]): Promise<void> => {
|
|
498
|
+
if (!alertChecker) {
|
|
499
|
+
alertChecker = await connect();
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
try {
|
|
503
|
+
await alertChecker.runAlertCheck(alerts);
|
|
504
|
+
} catch (err) {
|
|
505
|
+
// If it's an AlertTriggeredError (expected behavior)
|
|
506
|
+
if (err instanceof AlertTriggeredError) {
|
|
507
|
+
throw err;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Check if it's a connection error (port-forward died)
|
|
511
|
+
const errorStr = String(err);
|
|
512
|
+
if (errorStr.includes('fetch failed') || errorStr.includes('ECONNREFUSED') || errorStr.includes('ECONNRESET')) {
|
|
513
|
+
log.warn(`Prometheus connection lost, re-establishing port-forward...`);
|
|
514
|
+
alertChecker = await connect();
|
|
515
|
+
await alertChecker.runAlertCheck(alerts);
|
|
516
|
+
} else {
|
|
517
|
+
throw err;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
};
|
|
521
|
+
|
|
522
|
+
return { connect, runAlertCheck };
|
|
523
|
+
}
|
|
524
|
+
|
|
373
525
|
export function getChartDir(spartanDir: string, chartName: string) {
|
|
374
526
|
return path.join(spartanDir.trim(), chartName);
|
|
375
527
|
}
|