@aztec/end-to-end 0.0.1-commit.e61ad554 → 0.0.1-commit.f146247c
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dest/e2e_epochs/epochs_test.d.ts +7 -1
- package/dest/e2e_epochs/epochs_test.d.ts.map +1 -1
- package/dest/e2e_epochs/epochs_test.js +28 -9
- package/dest/e2e_l1_publisher/write_json.d.ts +3 -2
- package/dest/e2e_l1_publisher/write_json.d.ts.map +1 -1
- package/dest/e2e_l1_publisher/write_json.js +1 -7
- package/dest/e2e_p2p/reqresp/utils.d.ts +22 -0
- package/dest/e2e_p2p/reqresp/utils.d.ts.map +1 -0
- package/dest/e2e_p2p/reqresp/utils.js +153 -0
- package/dest/e2e_p2p/shared.d.ts +1 -1
- package/dest/e2e_p2p/shared.d.ts.map +1 -1
- package/dest/e2e_p2p/shared.js +2 -2
- package/dest/fixtures/e2e_prover_test.js +1 -1
- package/dest/fixtures/setup.d.ts +3 -3
- package/dest/fixtures/setup.d.ts.map +1 -1
- package/dest/fixtures/setup.js +20 -15
- package/dest/fixtures/setup_p2p_test.d.ts +4 -5
- package/dest/fixtures/setup_p2p_test.d.ts.map +1 -1
- package/dest/fixtures/setup_p2p_test.js +24 -19
- package/dest/spartan/tx_metrics.d.ts +35 -1
- package/dest/spartan/tx_metrics.d.ts.map +1 -1
- package/dest/spartan/tx_metrics.js +150 -0
- package/dest/spartan/utils/index.d.ts +3 -3
- package/dest/spartan/utils/index.d.ts.map +1 -1
- package/dest/spartan/utils/index.js +2 -2
- package/dest/spartan/utils/k8s.d.ts +29 -1
- package/dest/spartan/utils/k8s.d.ts.map +1 -1
- package/dest/spartan/utils/k8s.js +118 -0
- package/dest/spartan/utils/nodes.d.ts +11 -1
- package/dest/spartan/utils/nodes.d.ts.map +1 -1
- package/dest/spartan/utils/nodes.js +198 -27
- package/package.json +39 -39
- package/src/e2e_epochs/epochs_test.ts +31 -10
- package/src/e2e_l1_publisher/write_json.ts +1 -6
- package/src/e2e_p2p/reqresp/utils.ts +207 -0
- package/src/e2e_p2p/shared.ts +10 -2
- package/src/fixtures/e2e_prover_test.ts +1 -1
- package/src/fixtures/setup.ts +13 -13
- package/src/fixtures/setup_p2p_test.ts +15 -20
- package/src/spartan/tx_metrics.ts +126 -0
- package/src/spartan/utils/index.ts +2 -0
- package/src/spartan/utils/k8s.ts +152 -0
- package/src/spartan/utils/nodes.ts +239 -24
|
@@ -6,6 +6,132 @@ import { Tx, type TxReceipt } from '@aztec/stdlib/tx';
|
|
|
6
6
|
|
|
7
7
|
import { createHistogram } from 'perf_hooks';
|
|
8
8
|
|
|
9
|
+
/** Metrics class for proving-related benchmarks. */
|
|
10
|
+
export class ProvingMetrics {
|
|
11
|
+
private successfulTxs: number | undefined;
|
|
12
|
+
private proofDuration: number | undefined;
|
|
13
|
+
private activeAgents: number | undefined;
|
|
14
|
+
private avgQueueTime: number | undefined;
|
|
15
|
+
private jobRetries: number | undefined;
|
|
16
|
+
private jobDuration: number | undefined;
|
|
17
|
+
private timedOutJobs: number | undefined;
|
|
18
|
+
private resolvedJobs: number | undefined;
|
|
19
|
+
private rejectedJobs: number | undefined;
|
|
20
|
+
private epochProvingDuration: number | undefined;
|
|
21
|
+
private provenTransactions: number | undefined;
|
|
22
|
+
private provenBlocks: number | undefined;
|
|
23
|
+
|
|
24
|
+
constructor(private prefix: string) {}
|
|
25
|
+
|
|
26
|
+
recordSuccessfulTxs(count: number): void {
|
|
27
|
+
this.successfulTxs = count;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
recordProofDuration(seconds: number): void {
|
|
31
|
+
this.proofDuration = seconds;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
recordActiveAgents(count: number): void {
|
|
35
|
+
this.activeAgents = count;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
recordAvgQueueTime(ms: number): void {
|
|
39
|
+
this.avgQueueTime = ms;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
recordJobRetries(count: number): void {
|
|
43
|
+
this.jobRetries = count;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
recordJobDuration(ms: number): void {
|
|
47
|
+
this.jobDuration = ms;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
recordTimedOutJobs(count: number): void {
|
|
51
|
+
this.timedOutJobs = count;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
recordResolvedJobs(count: number): void {
|
|
55
|
+
this.resolvedJobs = count;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
recordRejectedJobs(count: number): void {
|
|
59
|
+
this.rejectedJobs = count;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
recordEpochProvingDuration(seconds: number): void {
|
|
63
|
+
this.epochProvingDuration = seconds;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
recordProvenTransactions(count: number): void {
|
|
67
|
+
this.provenTransactions = count;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
recordProvenBlocks(count: number): void {
|
|
71
|
+
this.provenBlocks = count;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
toGithubActionBenchmarkJSON(): Array<{ name: string; unit: string; value: number }> {
|
|
75
|
+
const data: Array<{ name: string; unit: string; value: number }> = [];
|
|
76
|
+
|
|
77
|
+
if (this.successfulTxs !== undefined) {
|
|
78
|
+
data.push({ name: `${this.prefix}/successful_txs`, unit: 'count', value: this.successfulTxs });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (this.proofDuration !== undefined) {
|
|
82
|
+
data.push({ name: `${this.prefix}/proof_duration`, unit: 's', value: this.proofDuration });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (this.activeAgents !== undefined) {
|
|
86
|
+
data.push({ name: `${this.prefix}/active_agents`, unit: 'count', value: this.activeAgents });
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (this.avgQueueTime !== undefined) {
|
|
90
|
+
data.push({ name: `${this.prefix}/avg_queue_time`, unit: 'ms', value: this.avgQueueTime });
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (this.jobRetries !== undefined) {
|
|
94
|
+
data.push({ name: `${this.prefix}/job_retries`, unit: 'count', value: this.jobRetries });
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (this.jobDuration !== undefined) {
|
|
98
|
+
data.push({ name: `${this.prefix}/job_duration`, unit: 'ms', value: this.jobDuration });
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (this.timedOutJobs !== undefined) {
|
|
102
|
+
data.push({ name: `${this.prefix}/timed_out_jobs`, unit: 'count', value: this.timedOutJobs });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (this.resolvedJobs !== undefined) {
|
|
106
|
+
data.push({ name: `${this.prefix}/resolved_jobs`, unit: 'count', value: this.resolvedJobs });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (this.rejectedJobs !== undefined) {
|
|
110
|
+
data.push({ name: `${this.prefix}/rejected_jobs`, unit: 'count', value: this.rejectedJobs });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (this.epochProvingDuration !== undefined) {
|
|
114
|
+
data.push({ name: `${this.prefix}/epoch_proving_duration`, unit: 's', value: this.epochProvingDuration });
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (this.provenTransactions !== undefined) {
|
|
118
|
+
data.push({ name: `${this.prefix}/proven_transactions`, unit: 'count', value: this.provenTransactions });
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (this.provenBlocks !== undefined) {
|
|
122
|
+
data.push({ name: `${this.prefix}/proven_blocks`, unit: 'count', value: this.provenBlocks });
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const scenario = process.env.BENCH_SCENARIO?.trim();
|
|
126
|
+
if (!scenario) {
|
|
127
|
+
return data;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const scenarioPrefix = `scenario/${scenario}/`;
|
|
131
|
+
return data.map(entry => ({ ...entry, name: `${scenarioPrefix}${entry.name}` }));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
9
135
|
export type TxInclusionData = {
|
|
10
136
|
txHash: string;
|
|
11
137
|
sentAt: number;
|
|
@@ -24,6 +24,7 @@ export {
|
|
|
24
24
|
getServiceEndpoint,
|
|
25
25
|
getRPCEndpoint,
|
|
26
26
|
getEthereumEndpoint,
|
|
27
|
+
createResilientPrometheusConnection,
|
|
27
28
|
} from './k8s.js';
|
|
28
29
|
|
|
29
30
|
// Chaos Mesh
|
|
@@ -45,6 +46,7 @@ export { restartBot, installTransferBot, uninstallTransferBot } from './bot.js';
|
|
|
45
46
|
// Node operations (sequencers, validators, pods)
|
|
46
47
|
export {
|
|
47
48
|
awaitCheckpointNumber,
|
|
49
|
+
waitForProvenToAdvance,
|
|
48
50
|
getSequencers,
|
|
49
51
|
updateSequencersConfig,
|
|
50
52
|
getSequencersConfig,
|
package/src/spartan/utils/k8s.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { createLogger } from '@aztec/aztec.js/log';
|
|
2
|
+
import type { Logger } from '@aztec/foundation/log';
|
|
2
3
|
import { promiseWithResolvers } from '@aztec/foundation/promise';
|
|
3
4
|
import { retryUntil } from '@aztec/foundation/retry';
|
|
4
5
|
|
|
@@ -6,6 +7,8 @@ import { type ChildProcess, exec, spawn } from 'child_process';
|
|
|
6
7
|
import path from 'path';
|
|
7
8
|
import { promisify } from 'util';
|
|
8
9
|
|
|
10
|
+
import { AlertTriggeredError, GrafanaClient } from '../../quality_of_service/grafana_client.js';
|
|
11
|
+
|
|
9
12
|
const execAsync = promisify(exec);
|
|
10
13
|
|
|
11
14
|
const logger = createLogger('e2e:k8s-utils');
|
|
@@ -370,6 +373,155 @@ export async function waitForResourcesByName({
|
|
|
370
373
|
);
|
|
371
374
|
}
|
|
372
375
|
|
|
376
|
+
/**
|
|
377
|
+
* Waits for all StatefulSets matching a label to have all their replicas ready.
|
|
378
|
+
*
|
|
379
|
+
* @param namespace - Kubernetes namespace
|
|
380
|
+
* @param label - Label selector for StatefulSets (e.g., "app.kubernetes.io/component=sequencer-node")
|
|
381
|
+
* @param timeoutSeconds - Maximum time to wait in seconds
|
|
382
|
+
* @param pollIntervalSeconds - How often to check status
|
|
383
|
+
*/
|
|
384
|
+
export async function waitForStatefulSetsReady({
|
|
385
|
+
namespace,
|
|
386
|
+
label,
|
|
387
|
+
timeoutSeconds = 600,
|
|
388
|
+
pollIntervalSeconds = 5,
|
|
389
|
+
}: {
|
|
390
|
+
namespace: string;
|
|
391
|
+
label: string;
|
|
392
|
+
timeoutSeconds?: number;
|
|
393
|
+
pollIntervalSeconds?: number;
|
|
394
|
+
}): Promise<void> {
|
|
395
|
+
logger.info(`Waiting for StatefulSets with label ${label} to have all replicas ready (timeout: ${timeoutSeconds}s)`);
|
|
396
|
+
|
|
397
|
+
await retryUntil(
|
|
398
|
+
async () => {
|
|
399
|
+
// Get all StatefulSets matching the label
|
|
400
|
+
const getCmd = `kubectl get statefulset -l ${label} -n ${namespace} -o json`;
|
|
401
|
+
const { stdout } = await execAsync(getCmd);
|
|
402
|
+
const result = JSON.parse(stdout);
|
|
403
|
+
|
|
404
|
+
if (!result.items || result.items.length === 0) {
|
|
405
|
+
logger.verbose(`No StatefulSets found with label ${label}`);
|
|
406
|
+
return false;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Check each StatefulSet
|
|
410
|
+
for (const sts of result.items) {
|
|
411
|
+
const name = sts.metadata.name;
|
|
412
|
+
const desired = sts.spec.replicas ?? 0;
|
|
413
|
+
const ready = sts.status.readyReplicas ?? 0;
|
|
414
|
+
const updated = sts.status.updatedReplicas ?? 0;
|
|
415
|
+
|
|
416
|
+
if (ready < desired || updated < desired) {
|
|
417
|
+
logger.verbose(`StatefulSet ${name}: ${ready}/${desired} ready, ${updated}/${desired} updated`);
|
|
418
|
+
return false;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
logger.info(`All StatefulSets with label ${label} are ready`);
|
|
423
|
+
return true;
|
|
424
|
+
},
|
|
425
|
+
`StatefulSets with label ${label} to be ready`,
|
|
426
|
+
timeoutSeconds,
|
|
427
|
+
pollIntervalSeconds,
|
|
428
|
+
);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Creates a Prometheus connection that can re-establish port-forward on failure.
|
|
433
|
+
* Returns functions to connect and run alert checks that automatically reconnect if needed.
|
|
434
|
+
*
|
|
435
|
+
* @param namespace - K8s namespace to fall back to if metrics namespace doesn't have Prometheus
|
|
436
|
+
* @param endpoints - Array to track created endpoints for cleanup
|
|
437
|
+
* @param log - Logger instance
|
|
438
|
+
*/
|
|
439
|
+
export function createResilientPrometheusConnection(
|
|
440
|
+
namespace: string,
|
|
441
|
+
endpoints: ServiceEndpoint[],
|
|
442
|
+
log: Logger,
|
|
443
|
+
): {
|
|
444
|
+
connect: () => Promise<GrafanaClient>;
|
|
445
|
+
runAlertCheck: (alerts: Parameters<GrafanaClient['runAlertCheck']>[0]) => Promise<void>;
|
|
446
|
+
} {
|
|
447
|
+
let alertChecker: GrafanaClient | undefined;
|
|
448
|
+
let currentEndpoint: ServiceEndpoint | undefined;
|
|
449
|
+
|
|
450
|
+
const connect = async (): Promise<GrafanaClient> => {
|
|
451
|
+
// Kill existing connection if any
|
|
452
|
+
if (currentEndpoint?.process) {
|
|
453
|
+
currentEndpoint.process.kill();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Try metrics namespace first, then network namespace
|
|
457
|
+
let promPort = 0;
|
|
458
|
+
let promUrl = '';
|
|
459
|
+
let promProc: ChildProcess | undefined;
|
|
460
|
+
|
|
461
|
+
try {
|
|
462
|
+
const metricsResult = await startPortForward({
|
|
463
|
+
resource: `svc/metrics-prometheus-server`,
|
|
464
|
+
namespace: 'metrics',
|
|
465
|
+
containerPort: 80,
|
|
466
|
+
});
|
|
467
|
+
promProc = metricsResult.process;
|
|
468
|
+
promPort = metricsResult.port;
|
|
469
|
+
promUrl = `http://127.0.0.1:${promPort}/api/v1`;
|
|
470
|
+
} catch {
|
|
471
|
+
// Metrics namespace might not have Prometheus, try network namespace
|
|
472
|
+
log.verbose('Metrics namespace Prometheus not available, trying network namespace');
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (promPort === 0) {
|
|
476
|
+
const nsResult = await startPortForward({
|
|
477
|
+
resource: `svc/prometheus-server`,
|
|
478
|
+
namespace,
|
|
479
|
+
containerPort: 80,
|
|
480
|
+
});
|
|
481
|
+
promProc = nsResult.process;
|
|
482
|
+
promPort = nsResult.port;
|
|
483
|
+
promUrl = `http://127.0.0.1:${promPort}/api/v1`;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
if (!promProc || promPort === 0) {
|
|
487
|
+
throw new Error('Unable to port-forward to Prometheus');
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
currentEndpoint = { url: promUrl, process: promProc };
|
|
491
|
+
endpoints.push(currentEndpoint);
|
|
492
|
+
alertChecker = new GrafanaClient(log, { grafanaEndpoint: promUrl, grafanaCredentials: '' });
|
|
493
|
+
log.info(`Established Prometheus connection at ${promUrl}`);
|
|
494
|
+
return alertChecker;
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
const runAlertCheck = async (alerts: Parameters<GrafanaClient['runAlertCheck']>[0]): Promise<void> => {
|
|
498
|
+
if (!alertChecker) {
|
|
499
|
+
alertChecker = await connect();
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
try {
|
|
503
|
+
await alertChecker.runAlertCheck(alerts);
|
|
504
|
+
} catch (err) {
|
|
505
|
+
// If it's an AlertTriggeredError (expected behavior)
|
|
506
|
+
if (err instanceof AlertTriggeredError) {
|
|
507
|
+
throw err;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Check if it's a connection error (port-forward died)
|
|
511
|
+
const errorStr = String(err);
|
|
512
|
+
if (errorStr.includes('fetch failed') || errorStr.includes('ECONNREFUSED') || errorStr.includes('ECONNRESET')) {
|
|
513
|
+
log.warn(`Prometheus connection lost, re-establishing port-forward...`);
|
|
514
|
+
alertChecker = await connect();
|
|
515
|
+
await alertChecker.runAlertCheck(alerts);
|
|
516
|
+
} else {
|
|
517
|
+
throw err;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
};
|
|
521
|
+
|
|
522
|
+
return { connect, runAlertCheck };
|
|
523
|
+
}
|
|
524
|
+
|
|
373
525
|
export function getChartDir(spartanDir: string, chartName: string) {
|
|
374
526
|
return path.join(spartanDir.trim(), chartName);
|
|
375
527
|
}
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { createLogger } from '@aztec/aztec.js/log';
|
|
2
|
+
import { createAztecNodeClient } from '@aztec/aztec.js/node';
|
|
2
3
|
import type { RollupCheatCodes } from '@aztec/aztec/testing';
|
|
3
4
|
import type { CheckpointNumber } from '@aztec/foundation/branded-types';
|
|
4
5
|
import type { Logger } from '@aztec/foundation/log';
|
|
5
|
-
import { makeBackoff, retry } from '@aztec/foundation/retry';
|
|
6
|
+
import { makeBackoff, retry, retryUntil } from '@aztec/foundation/retry';
|
|
6
7
|
import { sleep } from '@aztec/foundation/sleep';
|
|
7
8
|
import {
|
|
8
9
|
type AztecNodeAdmin,
|
|
@@ -15,7 +16,14 @@ import { promisify } from 'util';
|
|
|
15
16
|
|
|
16
17
|
import type { TestConfig } from './config.js';
|
|
17
18
|
import { execHelmCommand } from './helm.js';
|
|
18
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
deleteResourceByLabel,
|
|
21
|
+
getChartDir,
|
|
22
|
+
startPortForward,
|
|
23
|
+
waitForResourceByLabel,
|
|
24
|
+
waitForResourceByName,
|
|
25
|
+
waitForStatefulSetsReady,
|
|
26
|
+
} from './k8s.js';
|
|
19
27
|
|
|
20
28
|
const execAsync = promisify(exec);
|
|
21
29
|
|
|
@@ -42,6 +50,63 @@ export async function awaitCheckpointNumber(
|
|
|
42
50
|
}
|
|
43
51
|
}
|
|
44
52
|
|
|
53
|
+
/**
|
|
54
|
+
* Waits until the proven block number increases.
|
|
55
|
+
*
|
|
56
|
+
* @param rpcUrl - URL of an Aztec RPC node to query
|
|
57
|
+
* @param log - Logger instance
|
|
58
|
+
* @param timeoutSeconds - Maximum time to wait
|
|
59
|
+
* @param pollIntervalSeconds - How often to check
|
|
60
|
+
*/
|
|
61
|
+
export async function waitForProvenToAdvance(
|
|
62
|
+
rpcUrl: string,
|
|
63
|
+
log: Logger,
|
|
64
|
+
timeoutSeconds: number = 300,
|
|
65
|
+
pollIntervalSeconds: number = 12, // slot duration
|
|
66
|
+
): Promise<void> {
|
|
67
|
+
const node = createAztecNodeClient(rpcUrl);
|
|
68
|
+
|
|
69
|
+
log.info('Waiting for proven block to advance (indicating epoch proof just submitted)...');
|
|
70
|
+
|
|
71
|
+
// Get current proven block number
|
|
72
|
+
let initialProvenBlock: number;
|
|
73
|
+
try {
|
|
74
|
+
const tips = await node.getL2Tips();
|
|
75
|
+
initialProvenBlock = Number(tips.proven.block.number);
|
|
76
|
+
log.info(`Current proven block: ${initialProvenBlock}. Waiting for it to increase...`);
|
|
77
|
+
} catch (err) {
|
|
78
|
+
log.warn(`Error getting initial tips: ${err}. Will poll until successful.`);
|
|
79
|
+
initialProvenBlock = 0;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
await retryUntil(
|
|
83
|
+
async () => {
|
|
84
|
+
try {
|
|
85
|
+
const tips = await node.getL2Tips();
|
|
86
|
+
const currentProvenBlock = Number(tips.proven.block.number);
|
|
87
|
+
const proposedBlock = Number(tips.proposed.number);
|
|
88
|
+
|
|
89
|
+
log.verbose(
|
|
90
|
+
`Chain state: proposed=${proposedBlock}, proven=${currentProvenBlock} (waiting for > ${initialProvenBlock})`,
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
if (currentProvenBlock > initialProvenBlock) {
|
|
94
|
+
log.info(`Proven block advanced from ${initialProvenBlock} to ${currentProvenBlock}.`);
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return false;
|
|
99
|
+
} catch (err) {
|
|
100
|
+
log.verbose(`Error checking tips: ${err}`);
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
'proven block to advance',
|
|
105
|
+
timeoutSeconds,
|
|
106
|
+
pollIntervalSeconds,
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
45
110
|
export async function getSequencers(namespace: string) {
|
|
46
111
|
const selectors = [
|
|
47
112
|
'app.kubernetes.io/name=validator',
|
|
@@ -90,6 +155,8 @@ export async function withSequencersAdmin<T>(env: TestConfig, fn: (node: AztecNo
|
|
|
90
155
|
const results = [];
|
|
91
156
|
|
|
92
157
|
for (const sequencer of sequencers) {
|
|
158
|
+
// Ensure pod is Ready before attempting port-forward.
|
|
159
|
+
await waitForResourceByName({ resource: 'pods', name: sequencer, namespace });
|
|
93
160
|
// Wrap port-forward + fetch in a retry to handle flaky port-forwards
|
|
94
161
|
const result = await retry(
|
|
95
162
|
async () => {
|
|
@@ -127,6 +194,66 @@ export async function withSequencersAdmin<T>(env: TestConfig, fn: (node: AztecNo
|
|
|
127
194
|
return results;
|
|
128
195
|
}
|
|
129
196
|
|
|
197
|
+
async function getAztecImageForMigrations(namespace: string): Promise<string> {
|
|
198
|
+
const aztecDockerImage = process.env.AZTEC_DOCKER_IMAGE;
|
|
199
|
+
if (aztecDockerImage) {
|
|
200
|
+
return aztecDockerImage;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const { stdout } = await execAsync(
|
|
204
|
+
`kubectl get pods -l app.kubernetes.io/name=validator -n ${namespace} -o jsonpath='{.items[0].spec.containers[?(@.name=="aztec")].image}' | cat`,
|
|
205
|
+
);
|
|
206
|
+
const image = stdout.trim().replace(/^'|'$/g, '');
|
|
207
|
+
if (!image) {
|
|
208
|
+
throw new Error(`Could not detect aztec image from validator pod in namespace ${namespace}`);
|
|
209
|
+
}
|
|
210
|
+
return image;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async function getHaDbConnectionUrl(namespace: string): Promise<string> {
|
|
214
|
+
const secretName = `${namespace}-validator-ha-db-postgres`;
|
|
215
|
+
const { stdout } = await execAsync(`kubectl get secret ${secretName} -n ${namespace} -o json`);
|
|
216
|
+
const secret = JSON.parse(stdout);
|
|
217
|
+
const data = secret?.data ?? {};
|
|
218
|
+
const decode = (value?: string) => (value ? Buffer.from(value, 'base64').toString('utf8') : '');
|
|
219
|
+
const user = decode(data.POSTGRES_USER);
|
|
220
|
+
const password = decode(data.POSTGRES_PASSWORD);
|
|
221
|
+
const database = decode(data.POSTGRES_DB);
|
|
222
|
+
if (!user || !password || !database) {
|
|
223
|
+
throw new Error(`Missing HA DB credentials in secret ${secretName}`);
|
|
224
|
+
}
|
|
225
|
+
const host = `${namespace}-validator-ha-db-postgres.${namespace}.svc.cluster.local`;
|
|
226
|
+
return `postgresql://${encodeURIComponent(user)}:${encodeURIComponent(password)}@${host}:5432/${database}`;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
export async function initHADb(namespace: string): Promise<void> {
|
|
230
|
+
const databaseUrl = await getHaDbConnectionUrl(namespace);
|
|
231
|
+
const image = await getAztecImageForMigrations(namespace);
|
|
232
|
+
const jobName = `${namespace}-validator-ha-db-migrate`;
|
|
233
|
+
await execAsync(`kubectl delete pod ${jobName} -n ${namespace} --ignore-not-found=true`).catch(() => undefined);
|
|
234
|
+
|
|
235
|
+
const migrateCmd = [
|
|
236
|
+
`kubectl run ${jobName} -n ${namespace}`,
|
|
237
|
+
'--rm -i',
|
|
238
|
+
'--restart=Never',
|
|
239
|
+
`--image=${image}`,
|
|
240
|
+
`--env=DATABASE_URL=${databaseUrl}`,
|
|
241
|
+
'--command -- node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js migrate-ha-db up',
|
|
242
|
+
].join(' ');
|
|
243
|
+
const migrateCmdForLog = migrateCmd.replace(/--env=DATABASE_URL=\S+/, '--env=DATABASE_URL=<redacted>');
|
|
244
|
+
|
|
245
|
+
await retry(
|
|
246
|
+
async () => {
|
|
247
|
+
logger.info(`command: ${migrateCmdForLog}`);
|
|
248
|
+
await execAsync(migrateCmd);
|
|
249
|
+
},
|
|
250
|
+
'run HA DB migrations',
|
|
251
|
+
makeBackoff([1, 2, 4, 8, 16]),
|
|
252
|
+
logger,
|
|
253
|
+
true,
|
|
254
|
+
);
|
|
255
|
+
}
|
|
256
|
+
|
|
130
257
|
/**
|
|
131
258
|
* Enables or disables probabilistic transaction dropping on validators and waits for rollout.
|
|
132
259
|
* Wired to env vars P2P_DROP_TX and P2P_DROP_TX_CHANCE via Helm values.
|
|
@@ -239,25 +366,46 @@ export async function enableValidatorDynamicBootNode(
|
|
|
239
366
|
*/
|
|
240
367
|
export async function rollAztecPods(namespace: string, clearState: boolean = false) {
|
|
241
368
|
// Pod components use 'validator', but StatefulSets and PVCs use 'sequencer-node' for validators
|
|
242
|
-
const podComponents = [
|
|
243
|
-
|
|
369
|
+
const podComponents = [
|
|
370
|
+
'p2p-bootstrap',
|
|
371
|
+
'prover-node',
|
|
372
|
+
'prover-broker',
|
|
373
|
+
'prover-agent',
|
|
374
|
+
'sequencer-node',
|
|
375
|
+
'rpc',
|
|
376
|
+
'validator-ha-db',
|
|
377
|
+
];
|
|
378
|
+
const pvcComponents = ['p2p-bootstrap', 'prover-node', 'prover-broker', 'sequencer-node', 'rpc', 'validator-ha-db'];
|
|
244
379
|
// StatefulSet components that need to be scaled down before PVC deletion
|
|
245
380
|
// Note: validators use 'sequencer-node' as component label, not 'validator'
|
|
246
|
-
const statefulSetComponents = [
|
|
381
|
+
const statefulSetComponents = [
|
|
382
|
+
'p2p-bootstrap',
|
|
383
|
+
'prover-node',
|
|
384
|
+
'prover-broker',
|
|
385
|
+
'sequencer-node',
|
|
386
|
+
'rpc',
|
|
387
|
+
'validator-ha-db',
|
|
388
|
+
];
|
|
247
389
|
|
|
248
390
|
if (clearState) {
|
|
249
391
|
// To delete PVCs, we must first scale down StatefulSets so pods release the volumes
|
|
250
392
|
// Otherwise PVC deletion will hang waiting for pods to terminate
|
|
251
393
|
|
|
252
|
-
//
|
|
394
|
+
// Save original replica counts for all StatefulSets
|
|
253
395
|
const originalReplicas: Map<string, number> = new Map();
|
|
254
396
|
for (const component of statefulSetComponents) {
|
|
255
397
|
try {
|
|
256
|
-
|
|
398
|
+
// Get all StatefulSets that match the component label
|
|
399
|
+
const getCmd = `kubectl get statefulset -l app.kubernetes.io/component=${component} -n ${namespace} -o json`;
|
|
257
400
|
const { stdout } = await execAsync(getCmd);
|
|
258
|
-
const
|
|
259
|
-
|
|
260
|
-
|
|
401
|
+
const result = JSON.parse(stdout);
|
|
402
|
+
for (const sts of result.items || []) {
|
|
403
|
+
const name = sts.metadata.name;
|
|
404
|
+
const replicas = sts.spec.replicas ?? 1;
|
|
405
|
+
if (replicas > 0) {
|
|
406
|
+
originalReplicas.set(name, replicas);
|
|
407
|
+
logger.debug(`Saved replica count for StatefulSet ${name}: ${replicas}`);
|
|
408
|
+
}
|
|
261
409
|
}
|
|
262
410
|
} catch {
|
|
263
411
|
// Component might not exist, continue
|
|
@@ -276,27 +424,81 @@ export async function rollAztecPods(namespace: string, clearState: boolean = fal
|
|
|
276
424
|
}
|
|
277
425
|
}
|
|
278
426
|
|
|
279
|
-
// Wait for pods to terminate
|
|
280
|
-
|
|
427
|
+
// Wait for all pods to fully terminate before deleting PVCs.
|
|
428
|
+
// terminationGracePeriodSeconds default is 30s.
|
|
429
|
+
logger.info('Waiting for pods to fully terminate before deleting PVCs...');
|
|
430
|
+
for (const component of statefulSetComponents) {
|
|
431
|
+
try {
|
|
432
|
+
// Wait for all pods with this component label to be deleted
|
|
433
|
+
const waitCmd = `kubectl wait pods -l app.kubernetes.io/component=${component} --for=delete -n ${namespace} --timeout=2m`;
|
|
434
|
+
logger.info(`command: ${waitCmd}`);
|
|
435
|
+
await execAsync(waitCmd);
|
|
436
|
+
} catch (e) {
|
|
437
|
+
logger.verbose(`Wait for pod deletion ${component} skipped: ${e}`);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
// Extra buffer to ensure PVC protection finalizers are cleared
|
|
441
|
+
await sleep(5 * 1000);
|
|
281
442
|
|
|
282
443
|
// Now delete PVCs (they should no longer be in use)
|
|
283
444
|
for (const component of pvcComponents) {
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
445
|
+
try {
|
|
446
|
+
await deleteResourceByLabel({
|
|
447
|
+
resource: 'persistentvolumeclaims',
|
|
448
|
+
namespace: namespace,
|
|
449
|
+
label: `app.kubernetes.io/component=${component}`,
|
|
450
|
+
});
|
|
451
|
+
} catch (e) {
|
|
452
|
+
logger.warn(`Failed to delete PVCs for ${component}: ${e}`);
|
|
453
|
+
}
|
|
289
454
|
}
|
|
290
455
|
|
|
291
|
-
//
|
|
292
|
-
for (const component of
|
|
293
|
-
const replicas = originalReplicas.get(component) ?? 1;
|
|
456
|
+
// Verify PVCs are deleted
|
|
457
|
+
for (const component of pvcComponents) {
|
|
294
458
|
try {
|
|
295
|
-
const
|
|
459
|
+
const waitCmd = `kubectl wait pvc -l app.kubernetes.io/component=${component} --for=delete -n ${namespace} --timeout=2m`;
|
|
460
|
+
logger.info(`command: ${waitCmd}`);
|
|
461
|
+
await execAsync(waitCmd);
|
|
462
|
+
} catch (e) {
|
|
463
|
+
logger.verbose(`Wait for PVC deletion ${component} skipped: ${e}`);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const haDbStatefulSets = [...originalReplicas.entries()].filter(([name]) => name.includes('validator-ha-db'));
|
|
468
|
+
const otherStatefulSets = [...originalReplicas.entries()].filter(([name]) => !name.includes('validator-ha-db'));
|
|
469
|
+
|
|
470
|
+
// Bring up HA DB first so we can run migrations before validators start
|
|
471
|
+
for (const [stsName, replicas] of haDbStatefulSets) {
|
|
472
|
+
try {
|
|
473
|
+
const scaleCmd = `kubectl scale statefulset ${stsName} -n ${namespace} --replicas=${replicas} --timeout=2m`;
|
|
296
474
|
logger.info(`command: ${scaleCmd}`);
|
|
297
475
|
await execAsync(scaleCmd);
|
|
298
476
|
} catch (e) {
|
|
299
|
-
logger.verbose(`Scale up ${
|
|
477
|
+
logger.verbose(`Scale up ${stsName} skipped: ${e}`);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (haDbStatefulSets.length > 0) {
|
|
482
|
+
try {
|
|
483
|
+
await waitForStatefulSetsReady({
|
|
484
|
+
namespace,
|
|
485
|
+
label: 'app.kubernetes.io/component=validator-ha-db',
|
|
486
|
+
timeoutSeconds: 600,
|
|
487
|
+
});
|
|
488
|
+
await initHADb(namespace);
|
|
489
|
+
} catch (e) {
|
|
490
|
+
logger.warn(`HA DB migration step skipped or failed: ${e}`);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Scale remaining StatefulSets back up to original replica counts (by name, not label)
|
|
495
|
+
for (const [stsName, replicas] of otherStatefulSets) {
|
|
496
|
+
try {
|
|
497
|
+
const scaleCmd = `kubectl scale statefulset ${stsName} -n ${namespace} --replicas=${replicas} --timeout=2m`;
|
|
498
|
+
logger.info(`command: ${scaleCmd}`);
|
|
499
|
+
await execAsync(scaleCmd);
|
|
500
|
+
} catch (e) {
|
|
501
|
+
logger.verbose(`Scale up ${stsName} skipped: ${e}`);
|
|
300
502
|
}
|
|
301
503
|
}
|
|
302
504
|
} else {
|
|
@@ -312,8 +514,21 @@ export async function rollAztecPods(namespace: string, clearState: boolean = fal
|
|
|
312
514
|
|
|
313
515
|
await sleep(10 * 1000);
|
|
314
516
|
|
|
315
|
-
// Wait for
|
|
316
|
-
for (const component of
|
|
517
|
+
// Wait for StatefulSets to have all replicas ready.
|
|
518
|
+
for (const component of statefulSetComponents) {
|
|
519
|
+
try {
|
|
520
|
+
await waitForStatefulSetsReady({
|
|
521
|
+
namespace,
|
|
522
|
+
label: `app.kubernetes.io/component=${component}`,
|
|
523
|
+
timeoutSeconds: 600, // 10 minutes
|
|
524
|
+
});
|
|
525
|
+
} catch (e) {
|
|
526
|
+
logger.warn(`StatefulSet component ${component} may not be fully ready: ${e}`);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
const nonStatefulSetComponents = podComponents.filter(c => !statefulSetComponents.includes(c));
|
|
531
|
+
for (const component of nonStatefulSetComponents) {
|
|
317
532
|
await waitForResourceByLabel({
|
|
318
533
|
resource: 'pods',
|
|
319
534
|
namespace: namespace,
|