@aztec/validator-ha-signer 0.0.1-commit.d1f2d6c → 0.0.1-commit.d20b825a7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/dest/db/index.d.ts +2 -1
- package/dest/db/index.d.ts.map +1 -1
- package/dest/db/index.js +1 -0
- package/dest/db/lmdb.d.ts +66 -0
- package/dest/db/lmdb.d.ts.map +1 -0
- package/dest/db/lmdb.js +189 -0
- package/dest/db/migrations/1_initial-schema.d.ts +4 -2
- package/dest/db/migrations/1_initial-schema.d.ts.map +1 -1
- package/dest/db/migrations/1_initial-schema.js +34 -4
- package/dest/db/migrations/2_add-checkpoint-number.d.ts +7 -0
- package/dest/db/migrations/2_add-checkpoint-number.d.ts.map +1 -0
- package/dest/db/migrations/2_add-checkpoint-number.js +17 -0
- package/dest/db/postgres.d.ts +20 -4
- package/dest/db/postgres.d.ts.map +1 -1
- package/dest/db/postgres.js +46 -17
- package/dest/db/schema.d.ts +18 -11
- package/dest/db/schema.d.ts.map +1 -1
- package/dest/db/schema.js +45 -23
- package/dest/db/types.d.ts +52 -22
- package/dest/db/types.d.ts.map +1 -1
- package/dest/db/types.js +31 -15
- package/dest/factory.d.ts +39 -4
- package/dest/factory.d.ts.map +1 -1
- package/dest/factory.js +78 -7
- package/dest/metrics.d.ts +51 -0
- package/dest/metrics.d.ts.map +1 -0
- package/dest/metrics.js +103 -0
- package/dest/slashing_protection_service.d.ts +19 -6
- package/dest/slashing_protection_service.d.ts.map +1 -1
- package/dest/slashing_protection_service.js +57 -17
- package/dest/types.d.ts +33 -72
- package/dest/types.d.ts.map +1 -1
- package/dest/types.js +4 -20
- package/dest/validator_ha_signer.d.ts +15 -6
- package/dest/validator_ha_signer.d.ts.map +1 -1
- package/dest/validator_ha_signer.js +26 -11
- package/package.json +10 -5
- package/src/db/index.ts +1 -0
- package/src/db/lmdb.ts +265 -0
- package/src/db/migrations/1_initial-schema.ts +35 -4
- package/src/db/migrations/2_add-checkpoint-number.ts +19 -0
- package/src/db/postgres.ts +47 -12
- package/src/db/schema.ts +47 -23
- package/src/db/types.ts +72 -20
- package/src/factory.ts +96 -6
- package/src/metrics.ts +138 -0
- package/src/slashing_protection_service.ts +79 -21
- package/src/types.ts +56 -103
- package/src/validator_ha_signer.ts +44 -15
- package/dest/config.d.ts +0 -79
- package/dest/config.d.ts.map +0 -1
- package/dest/config.js +0 -73
- package/src/config.ts +0 -125
package/src/db/types.ts
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
1
|
+
import { BlockNumber, CheckpointNumber, type IndexWithinCheckpoint, SlotNumber } from '@aztec/foundation/branded-types';
|
|
2
|
+
import { EthAddress } from '@aztec/foundation/eth-address';
|
|
3
3
|
import type { Signature } from '@aztec/foundation/eth-signature';
|
|
4
|
+
import { DutyType } from '@aztec/stdlib/ha-signing';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* Row type from PostgreSQL query
|
|
7
8
|
*/
|
|
8
9
|
export interface DutyRow {
|
|
10
|
+
rollup_address: string;
|
|
9
11
|
validator_address: string;
|
|
10
12
|
slot: string;
|
|
11
13
|
block_number: string;
|
|
14
|
+
checkpoint_number: string;
|
|
12
15
|
block_index_within_checkpoint: number;
|
|
13
16
|
duty_type: DutyType;
|
|
14
17
|
status: DutyStatus;
|
|
@@ -22,24 +25,35 @@ export interface DutyRow {
|
|
|
22
25
|
}
|
|
23
26
|
|
|
24
27
|
/**
|
|
25
|
-
*
|
|
28
|
+
* Plain-primitive representation of a duty record suitable for serialization
|
|
29
|
+
* (e.g. msgpackr for LMDB). All domain types are stored as their string/number
|
|
30
|
+
* equivalents. Timestamps are Unix milliseconds.
|
|
26
31
|
*/
|
|
27
|
-
export interface
|
|
28
|
-
|
|
32
|
+
export interface StoredDutyRecord {
|
|
33
|
+
rollupAddress: string;
|
|
34
|
+
validatorAddress: string;
|
|
35
|
+
slot: string;
|
|
36
|
+
blockNumber: string;
|
|
37
|
+
checkpointNumber: string;
|
|
38
|
+
blockIndexWithinCheckpoint: number;
|
|
39
|
+
dutyType: DutyType;
|
|
40
|
+
status: DutyStatus;
|
|
41
|
+
messageHash: string;
|
|
42
|
+
signature?: string;
|
|
43
|
+
nodeId: string;
|
|
44
|
+
lockToken: string;
|
|
45
|
+
/** Unix timestamp in milliseconds when signing started */
|
|
46
|
+
startedAtMs: number;
|
|
47
|
+
/** Unix timestamp in milliseconds when signing completed */
|
|
48
|
+
completedAtMs?: number;
|
|
49
|
+
errorMessage?: string;
|
|
29
50
|
}
|
|
30
51
|
|
|
31
52
|
/**
|
|
32
|
-
*
|
|
53
|
+
* Row type from INSERT_OR_GET_DUTY query (includes is_new flag)
|
|
33
54
|
*/
|
|
34
|
-
export
|
|
35
|
-
|
|
36
|
-
CHECKPOINT_PROPOSAL = 'CHECKPOINT_PROPOSAL',
|
|
37
|
-
ATTESTATION = 'ATTESTATION',
|
|
38
|
-
ATTESTATIONS_AND_SIGNERS = 'ATTESTATIONS_AND_SIGNERS',
|
|
39
|
-
GOVERNANCE_VOTE = 'GOVERNANCE_VOTE',
|
|
40
|
-
SLASHING_VOTE = 'SLASHING_VOTE',
|
|
41
|
-
AUTH_REQUEST = 'AUTH_REQUEST',
|
|
42
|
-
TXS = 'TXS',
|
|
55
|
+
export interface InsertOrGetRow extends DutyRow {
|
|
56
|
+
is_new: boolean;
|
|
43
57
|
}
|
|
44
58
|
|
|
45
59
|
/**
|
|
@@ -50,16 +64,24 @@ export enum DutyStatus {
|
|
|
50
64
|
SIGNED = 'signed',
|
|
51
65
|
}
|
|
52
66
|
|
|
67
|
+
// Re-export DutyType from stdlib
|
|
68
|
+
export { DutyType };
|
|
69
|
+
|
|
53
70
|
/**
|
|
54
|
-
*
|
|
71
|
+
* Rich representation of a validator duty, with branded types and Date objects.
|
|
72
|
+
* This is the common output type returned by all SlashingProtectionDatabase implementations.
|
|
55
73
|
*/
|
|
56
74
|
export interface ValidatorDutyRecord {
|
|
75
|
+
/** Ethereum address of the rollup contract */
|
|
76
|
+
rollupAddress: EthAddress;
|
|
57
77
|
/** Ethereum address of the validator */
|
|
58
78
|
validatorAddress: EthAddress;
|
|
59
79
|
/** Slot number for this duty */
|
|
60
80
|
slot: SlotNumber;
|
|
61
|
-
/** Block number for this duty */
|
|
81
|
+
/** Block number for this duty (0 for non-block-proposal duties) */
|
|
62
82
|
blockNumber: BlockNumber;
|
|
83
|
+
/** Checkpoint number for this duty (0 for attestation and vote duties) */
|
|
84
|
+
checkpointNumber: CheckpointNumber;
|
|
63
85
|
/** Block index within checkpoint (0, 1, 2... for block proposals, -1 for other duty types) */
|
|
64
86
|
blockIndexWithinCheckpoint: number;
|
|
65
87
|
/** Type of duty being performed */
|
|
@@ -78,15 +100,42 @@ export interface ValidatorDutyRecord {
|
|
|
78
100
|
startedAt: Date;
|
|
79
101
|
/** When the duty signing was completed (success or failure) */
|
|
80
102
|
completedAt?: Date;
|
|
81
|
-
/** Error message
|
|
103
|
+
/** Error message (currently unused) */
|
|
82
104
|
errorMessage?: string;
|
|
83
105
|
}
|
|
84
106
|
|
|
107
|
+
/**
|
|
108
|
+
* Convert a {@link StoredDutyRecord} (plain-primitive wire format) to a
|
|
109
|
+
* {@link ValidatorDutyRecord} (rich domain type).
|
|
110
|
+
*
|
|
111
|
+
* Shared by LMDB and any future non-Postgres backend implementations.
|
|
112
|
+
*/
|
|
113
|
+
export function recordFromFields(stored: StoredDutyRecord): ValidatorDutyRecord {
|
|
114
|
+
return {
|
|
115
|
+
rollupAddress: EthAddress.fromString(stored.rollupAddress),
|
|
116
|
+
validatorAddress: EthAddress.fromString(stored.validatorAddress),
|
|
117
|
+
slot: SlotNumber.fromString(stored.slot),
|
|
118
|
+
blockNumber: BlockNumber.fromString(stored.blockNumber),
|
|
119
|
+
checkpointNumber: CheckpointNumber.fromString(stored.checkpointNumber),
|
|
120
|
+
blockIndexWithinCheckpoint: stored.blockIndexWithinCheckpoint,
|
|
121
|
+
dutyType: stored.dutyType,
|
|
122
|
+
status: stored.status,
|
|
123
|
+
messageHash: stored.messageHash,
|
|
124
|
+
signature: stored.signature,
|
|
125
|
+
nodeId: stored.nodeId,
|
|
126
|
+
lockToken: stored.lockToken,
|
|
127
|
+
startedAt: new Date(stored.startedAtMs),
|
|
128
|
+
completedAt: stored.completedAtMs !== undefined ? new Date(stored.completedAtMs) : undefined,
|
|
129
|
+
errorMessage: stored.errorMessage,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
85
133
|
/**
|
|
86
134
|
* Duty identifier for block proposals.
|
|
87
135
|
* blockIndexWithinCheckpoint is REQUIRED and must be >= 0.
|
|
88
136
|
*/
|
|
89
137
|
export interface BlockProposalDutyIdentifier {
|
|
138
|
+
rollupAddress: EthAddress;
|
|
90
139
|
validatorAddress: EthAddress;
|
|
91
140
|
slot: SlotNumber;
|
|
92
141
|
/** Block index within checkpoint (0, 1, 2...). Required for block proposals. */
|
|
@@ -99,6 +148,7 @@ export interface BlockProposalDutyIdentifier {
|
|
|
99
148
|
* blockIndexWithinCheckpoint is not applicable (internally stored as -1).
|
|
100
149
|
*/
|
|
101
150
|
export interface OtherDutyIdentifier {
|
|
151
|
+
rollupAddress: EthAddress;
|
|
102
152
|
validatorAddress: EthAddress;
|
|
103
153
|
slot: SlotNumber;
|
|
104
154
|
dutyType:
|
|
@@ -158,8 +208,10 @@ export function getBlockIndexFromDutyIdentifier(duty: DutyIdentifier): number {
|
|
|
158
208
|
* Additional parameters for checking and recording a new duty
|
|
159
209
|
*/
|
|
160
210
|
interface CheckAndRecordExtra {
|
|
161
|
-
/** Block number for this duty */
|
|
162
|
-
blockNumber: BlockNumber
|
|
211
|
+
/** Block number for this duty (0 for non-block-proposal duties) */
|
|
212
|
+
blockNumber: BlockNumber;
|
|
213
|
+
/** Checkpoint number for this duty (0 for attestation and vote duties) */
|
|
214
|
+
checkpointNumber: CheckpointNumber;
|
|
163
215
|
/** The signing root (hash) for this duty */
|
|
164
216
|
messageHash: string;
|
|
165
217
|
/** Identifier for the node that acquired the lock */
|
package/src/factory.ts
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Factory functions for creating validator HA signers
|
|
3
3
|
*/
|
|
4
|
+
import { DateProvider } from '@aztec/foundation/timer';
|
|
5
|
+
import { createStore } from '@aztec/kv-store/lmdb-v2';
|
|
6
|
+
import type { LocalSignerConfig, ValidatorHASignerConfig } from '@aztec/stdlib/ha-signing';
|
|
7
|
+
import { getTelemetryClient } from '@aztec/telemetry-client';
|
|
8
|
+
|
|
4
9
|
import { Pool } from 'pg';
|
|
5
10
|
|
|
6
|
-
import
|
|
11
|
+
import { LmdbSlashingProtectionDatabase } from './db/lmdb.js';
|
|
7
12
|
import { PostgresSlashingProtectionDatabase } from './db/postgres.js';
|
|
8
|
-
import
|
|
13
|
+
import { HASignerMetrics } from './metrics.js';
|
|
14
|
+
import type { CreateHASignerDeps, CreateLocalSignerWithProtectionDeps, SlashingProtectionDatabase } from './types.js';
|
|
9
15
|
import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
10
16
|
|
|
11
17
|
/**
|
|
@@ -23,7 +29,6 @@ import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
|
23
29
|
* ```typescript
|
|
24
30
|
* const { signer, db } = await createHASigner({
|
|
25
31
|
* databaseUrl: process.env.DATABASE_URL,
|
|
26
|
-
* haSigningEnabled: true,
|
|
27
32
|
* nodeId: 'validator-node-1',
|
|
28
33
|
* pollingIntervalMs: 100,
|
|
29
34
|
* signingTimeoutMs: 3000,
|
|
@@ -52,14 +57,19 @@ export async function createHASigner(
|
|
|
52
57
|
const { databaseUrl, poolMaxCount, poolMinCount, poolIdleTimeoutMs, poolConnectionTimeoutMs, ...signerConfig } =
|
|
53
58
|
config;
|
|
54
59
|
|
|
55
|
-
|
|
60
|
+
const databaseUrlValue = databaseUrl?.getValue();
|
|
61
|
+
if (!databaseUrlValue) {
|
|
56
62
|
throw new Error('databaseUrl is required for createHASigner');
|
|
57
63
|
}
|
|
64
|
+
|
|
65
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
66
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
67
|
+
|
|
58
68
|
// Create connection pool (or use provided pool)
|
|
59
69
|
let pool: Pool;
|
|
60
70
|
if (!deps?.pool) {
|
|
61
71
|
pool = new Pool({
|
|
62
|
-
connectionString:
|
|
72
|
+
connectionString: databaseUrlValue,
|
|
63
73
|
max: poolMaxCount ?? 10,
|
|
64
74
|
min: poolMinCount ?? 0,
|
|
65
75
|
idleTimeoutMillis: poolIdleTimeoutMs ?? 10_000,
|
|
@@ -75,8 +85,88 @@ export async function createHASigner(
|
|
|
75
85
|
// Verify database schema is initialized and version matches
|
|
76
86
|
await db.initialize();
|
|
77
87
|
|
|
88
|
+
// Create metrics
|
|
89
|
+
const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId);
|
|
90
|
+
|
|
78
91
|
// Create signer
|
|
79
|
-
const signer = new ValidatorHASigner(db, {
|
|
92
|
+
const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
|
|
80
93
|
|
|
81
94
|
return { signer, db };
|
|
82
95
|
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Create a local (single-node) signing protection signer backed by LMDB.
|
|
99
|
+
*
|
|
100
|
+
* This provides double-signing protection for nodes that are NOT running in a
|
|
101
|
+
* high-availability (multi-node) setup. It prevents a proposer from sending two
|
|
102
|
+
* proposals for the same slot if the node crashes and restarts mid-proposal.
|
|
103
|
+
*
|
|
104
|
+
* When `config.dataDirectory` is set, the protection database is persisted to disk
|
|
105
|
+
* and survives crashes/restarts. When unset, an ephemeral in-memory store is
|
|
106
|
+
* used which protects within a single run but not across restarts.
|
|
107
|
+
*
|
|
108
|
+
* @param config - Local signer config
|
|
109
|
+
* @param deps - Optional dependencies (telemetry, date provider).
|
|
110
|
+
* @returns An object containing the signer and database instances.
|
|
111
|
+
*/
|
|
112
|
+
export async function createLocalSignerWithProtection(
|
|
113
|
+
config: LocalSignerConfig,
|
|
114
|
+
deps?: CreateLocalSignerWithProtectionDeps,
|
|
115
|
+
): Promise<{
|
|
116
|
+
signer: ValidatorHASigner;
|
|
117
|
+
db: SlashingProtectionDatabase;
|
|
118
|
+
}> {
|
|
119
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
120
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
121
|
+
|
|
122
|
+
const kvStore = await createStore('signing-protection', LmdbSlashingProtectionDatabase.SCHEMA_VERSION, {
|
|
123
|
+
dataDirectory: config.dataDirectory,
|
|
124
|
+
dataStoreMapSizeKb: config.signingProtectionMapSizeKb ?? config.dataStoreMapSizeKb,
|
|
125
|
+
l1Contracts: config.l1Contracts,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const db = new LmdbSlashingProtectionDatabase(kvStore, dateProvider);
|
|
129
|
+
|
|
130
|
+
const signerConfig = {
|
|
131
|
+
...config,
|
|
132
|
+
nodeId: config.nodeId || 'local',
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId, 'LocalSigningProtectionMetrics');
|
|
136
|
+
|
|
137
|
+
const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
|
|
138
|
+
|
|
139
|
+
return { signer, db };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Create an in-memory LMDB-backed SlashingProtectionDatabase that can be shared across
|
|
144
|
+
* multiple validator nodes in the same process. Used for testing HA setups.
|
|
145
|
+
*/
|
|
146
|
+
export async function createSharedSlashingProtectionDb(
|
|
147
|
+
dateProvider: DateProvider = new DateProvider(),
|
|
148
|
+
): Promise<SlashingProtectionDatabase> {
|
|
149
|
+
const kvStore = await createStore('shared-signing-protection', LmdbSlashingProtectionDatabase.SCHEMA_VERSION, {
|
|
150
|
+
dataStoreMapSizeKb: 1024 * 1024,
|
|
151
|
+
});
|
|
152
|
+
return new LmdbSlashingProtectionDatabase(kvStore, dateProvider);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Create a ValidatorHASigner backed by a pre-existing SlashingProtectionDatabase.
|
|
157
|
+
* Used for testing HA setups where multiple nodes share the same protection database.
|
|
158
|
+
*/
|
|
159
|
+
export function createSignerFromSharedDb(
|
|
160
|
+
db: SlashingProtectionDatabase,
|
|
161
|
+
config: Pick<
|
|
162
|
+
ValidatorHASignerConfig,
|
|
163
|
+
'nodeId' | 'pollingIntervalMs' | 'signingTimeoutMs' | 'maxStuckDutiesAgeMs' | 'l1Contracts'
|
|
164
|
+
>,
|
|
165
|
+
deps?: CreateLocalSignerWithProtectionDeps,
|
|
166
|
+
): { signer: ValidatorHASigner; db: SlashingProtectionDatabase } {
|
|
167
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
168
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
169
|
+
const metrics = new HASignerMetrics(telemetryClient, config.nodeId, 'SharedSigningProtectionMetrics');
|
|
170
|
+
const signer = new ValidatorHASigner(db, config, { metrics, dateProvider });
|
|
171
|
+
return { signer, db };
|
|
172
|
+
}
|
package/src/metrics.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Attributes,
|
|
3
|
+
type Histogram,
|
|
4
|
+
Metrics,
|
|
5
|
+
type TelemetryClient,
|
|
6
|
+
type UpDownCounter,
|
|
7
|
+
createUpDownCounterWithDefault,
|
|
8
|
+
} from '@aztec/telemetry-client';
|
|
9
|
+
|
|
10
|
+
export type HACleanupType = 'stuck' | 'old' | 'outdated_rollup';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Metrics for HA signer tracking signing operations, lock acquisition, and cleanup.
|
|
14
|
+
*/
|
|
15
|
+
export class HASignerMetrics {
|
|
16
|
+
// Signing lifecycle metrics
|
|
17
|
+
private signingDuration: Histogram;
|
|
18
|
+
private signingSuccessCount: UpDownCounter;
|
|
19
|
+
private dutyAlreadySignedCount: UpDownCounter;
|
|
20
|
+
private slashingProtectionCount: UpDownCounter;
|
|
21
|
+
private signingErrorCount: UpDownCounter;
|
|
22
|
+
|
|
23
|
+
// Lock acquisition metrics
|
|
24
|
+
private lockAcquiredCount: UpDownCounter;
|
|
25
|
+
|
|
26
|
+
// Cleanup metrics
|
|
27
|
+
private cleanupStuckDutiesCount: UpDownCounter;
|
|
28
|
+
private cleanupOldDutiesCount: UpDownCounter;
|
|
29
|
+
private cleanupOutdatedRollupDutiesCount: UpDownCounter;
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
client: TelemetryClient,
|
|
33
|
+
private nodeId: string,
|
|
34
|
+
name = 'HASignerMetrics',
|
|
35
|
+
) {
|
|
36
|
+
const meter = client.getMeter(name);
|
|
37
|
+
|
|
38
|
+
// Signing lifecycle
|
|
39
|
+
this.signingDuration = meter.createHistogram(Metrics.HA_SIGNER_SIGNING_DURATION);
|
|
40
|
+
this.signingSuccessCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_SUCCESS_COUNT);
|
|
41
|
+
this.dutyAlreadySignedCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_DUTY_ALREADY_SIGNED_COUNT);
|
|
42
|
+
this.slashingProtectionCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SLASHING_PROTECTION_COUNT);
|
|
43
|
+
this.signingErrorCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_ERROR_COUNT);
|
|
44
|
+
|
|
45
|
+
// Lock acquisition
|
|
46
|
+
this.lockAcquiredCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_LOCK_ACQUIRED_COUNT);
|
|
47
|
+
|
|
48
|
+
// Cleanup
|
|
49
|
+
this.cleanupStuckDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_STUCK_DUTIES_COUNT);
|
|
50
|
+
this.cleanupOldDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_OLD_DUTIES_COUNT);
|
|
51
|
+
this.cleanupOutdatedRollupDutiesCount = createUpDownCounterWithDefault(
|
|
52
|
+
meter,
|
|
53
|
+
Metrics.HA_SIGNER_CLEANUP_OUTDATED_ROLLUP_DUTIES_COUNT,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Record a successful signing operation.
|
|
59
|
+
* @param dutyType - The type of duty signed
|
|
60
|
+
* @param durationMs - Duration from start of signWithProtection to completion
|
|
61
|
+
*/
|
|
62
|
+
public recordSigningSuccess(dutyType: string, durationMs: number): void {
|
|
63
|
+
const attributes = {
|
|
64
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
65
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
66
|
+
};
|
|
67
|
+
this.signingSuccessCount.add(1, attributes);
|
|
68
|
+
this.signingDuration.record(durationMs, attributes);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Record a DutyAlreadySignedError (expected in HA; another node signed first).
|
|
73
|
+
* @param dutyType - The type of duty
|
|
74
|
+
*/
|
|
75
|
+
public recordDutyAlreadySigned(dutyType: string): void {
|
|
76
|
+
const attributes = {
|
|
77
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
78
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
79
|
+
};
|
|
80
|
+
this.dutyAlreadySignedCount.add(1, attributes);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Record a SlashingProtectionError (attempted to sign different data for same duty).
|
|
85
|
+
* @param dutyType - The type of duty
|
|
86
|
+
*/
|
|
87
|
+
public recordSlashingProtection(dutyType: string): void {
|
|
88
|
+
const attributes = {
|
|
89
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
90
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
91
|
+
};
|
|
92
|
+
this.slashingProtectionCount.add(1, attributes);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Record a signing function failure (lock will be deleted for retry).
|
|
97
|
+
* @param dutyType - The type of duty
|
|
98
|
+
*/
|
|
99
|
+
public recordSigningError(dutyType: string): void {
|
|
100
|
+
const attributes = {
|
|
101
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
102
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
103
|
+
};
|
|
104
|
+
this.signingErrorCount.add(1, attributes);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Record lock acquisition.
|
|
109
|
+
* @param acquired - Whether a new lock was acquired (true) or existing record found (false)
|
|
110
|
+
*/
|
|
111
|
+
public recordLockAcquire(acquired: boolean): void {
|
|
112
|
+
if (acquired) {
|
|
113
|
+
const attributes = {
|
|
114
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
115
|
+
};
|
|
116
|
+
this.lockAcquiredCount.add(1, attributes);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Record cleanup metrics.
|
|
122
|
+
* @param type - Type of cleanup
|
|
123
|
+
* @param count - Number of duties cleaned up
|
|
124
|
+
*/
|
|
125
|
+
public recordCleanup(type: HACleanupType, count: number): void {
|
|
126
|
+
const attributes = {
|
|
127
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (type === 'stuck') {
|
|
131
|
+
this.cleanupStuckDutiesCount.add(count, attributes);
|
|
132
|
+
} else if (type === 'old') {
|
|
133
|
+
this.cleanupOldDutiesCount.add(count, attributes);
|
|
134
|
+
} else if (type === 'outdated_rollup') {
|
|
135
|
+
this.cleanupOutdatedRollupDutiesCount.add(count, attributes);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import { type Logger, createLogger } from '@aztec/foundation/log';
|
|
8
8
|
import { RunningPromise } from '@aztec/foundation/promise';
|
|
9
9
|
import { sleep } from '@aztec/foundation/sleep';
|
|
10
|
+
import type { DateProvider } from '@aztec/foundation/timer';
|
|
11
|
+
import type { BaseSignerConfig } from '@aztec/stdlib/ha-signing';
|
|
10
12
|
|
|
11
13
|
import {
|
|
12
14
|
type CheckAndRecordParams,
|
|
@@ -16,7 +18,13 @@ import {
|
|
|
16
18
|
getBlockIndexFromDutyIdentifier,
|
|
17
19
|
} from './db/types.js';
|
|
18
20
|
import { DutyAlreadySignedError, SlashingProtectionError } from './errors.js';
|
|
19
|
-
import type {
|
|
21
|
+
import type { HASignerMetrics } from './metrics.js';
|
|
22
|
+
import type { SlashingProtectionDatabase } from './types.js';
|
|
23
|
+
|
|
24
|
+
export interface SlashingProtectionServiceDeps {
|
|
25
|
+
metrics: HASignerMetrics;
|
|
26
|
+
dateProvider: DateProvider;
|
|
27
|
+
}
|
|
20
28
|
|
|
21
29
|
/**
|
|
22
30
|
* Slashing Protection Service
|
|
@@ -39,11 +47,16 @@ export class SlashingProtectionService {
|
|
|
39
47
|
private readonly signingTimeoutMs: number;
|
|
40
48
|
private readonly maxStuckDutiesAgeMs: number;
|
|
41
49
|
|
|
50
|
+
private readonly metrics: HASignerMetrics;
|
|
51
|
+
private readonly dateProvider: DateProvider;
|
|
52
|
+
|
|
42
53
|
private cleanupRunningPromise: RunningPromise;
|
|
54
|
+
private lastOldDutiesCleanupAtMs?: number;
|
|
43
55
|
|
|
44
56
|
constructor(
|
|
45
57
|
private readonly db: SlashingProtectionDatabase,
|
|
46
|
-
private readonly config:
|
|
58
|
+
private readonly config: BaseSignerConfig,
|
|
59
|
+
deps: SlashingProtectionServiceDeps,
|
|
47
60
|
) {
|
|
48
61
|
this.log = createLogger('slashing-protection');
|
|
49
62
|
this.pollingIntervalMs = config.pollingIntervalMs;
|
|
@@ -51,11 +64,9 @@ export class SlashingProtectionService {
|
|
|
51
64
|
// Default to 144s (2x 72s Aztec slot duration) if not explicitly configured
|
|
52
65
|
this.maxStuckDutiesAgeMs = config.maxStuckDutiesAgeMs ?? 144_000;
|
|
53
66
|
|
|
54
|
-
this.cleanupRunningPromise = new RunningPromise(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
this.maxStuckDutiesAgeMs,
|
|
58
|
-
);
|
|
67
|
+
this.cleanupRunningPromise = new RunningPromise(this.cleanup.bind(this), this.log, this.maxStuckDutiesAgeMs);
|
|
68
|
+
this.metrics = deps.metrics;
|
|
69
|
+
this.dateProvider = deps.dateProvider;
|
|
59
70
|
}
|
|
60
71
|
|
|
61
72
|
/**
|
|
@@ -67,7 +78,6 @@ export class SlashingProtectionService {
|
|
|
67
78
|
* 2. If insert succeeds, we acquired the lock - return the lockToken
|
|
68
79
|
* 3. If a record exists, handle based on status:
|
|
69
80
|
* - SIGNED: Throw appropriate error (already signed or slashing protection)
|
|
70
|
-
* - FAILED: Delete the failed record
|
|
71
81
|
* - SIGNING: Wait and poll until status changes, then handle result
|
|
72
82
|
*
|
|
73
83
|
* @returns The lockToken that must be used for recordSuccess/deleteDuty
|
|
@@ -76,7 +86,7 @@ export class SlashingProtectionService {
|
|
|
76
86
|
*/
|
|
77
87
|
async checkAndRecord(params: CheckAndRecordParams): Promise<string> {
|
|
78
88
|
const { validatorAddress, slot, dutyType, messageHash, nodeId } = params;
|
|
79
|
-
const startTime =
|
|
89
|
+
const startTime = this.dateProvider.now();
|
|
80
90
|
|
|
81
91
|
this.log.debug(`Checking duty: ${dutyType} for slot ${slot}`, {
|
|
82
92
|
validatorAddress: validatorAddress.toString(),
|
|
@@ -89,10 +99,11 @@ export class SlashingProtectionService {
|
|
|
89
99
|
|
|
90
100
|
if (isNew) {
|
|
91
101
|
// We successfully acquired the lock
|
|
92
|
-
this.log.
|
|
102
|
+
this.log.verbose(`Acquired lock for duty ${dutyType} at slot ${slot}`, {
|
|
93
103
|
validatorAddress: validatorAddress.toString(),
|
|
94
104
|
nodeId,
|
|
95
105
|
});
|
|
106
|
+
this.metrics.recordLockAcquire(true);
|
|
96
107
|
return record.lockToken;
|
|
97
108
|
}
|
|
98
109
|
|
|
@@ -107,6 +118,7 @@ export class SlashingProtectionService {
|
|
|
107
118
|
existingNodeId: record.nodeId,
|
|
108
119
|
attemptingNodeId: nodeId,
|
|
109
120
|
});
|
|
121
|
+
this.metrics.recordSlashingProtection(dutyType);
|
|
110
122
|
throw new SlashingProtectionError(
|
|
111
123
|
slot,
|
|
112
124
|
dutyType,
|
|
@@ -116,15 +128,17 @@ export class SlashingProtectionService {
|
|
|
116
128
|
record.nodeId,
|
|
117
129
|
);
|
|
118
130
|
}
|
|
131
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
119
132
|
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, record.nodeId);
|
|
120
133
|
} else if (record.status === DutyStatus.SIGNING) {
|
|
121
134
|
// Another node is currently signing - check for timeout
|
|
122
|
-
if (
|
|
135
|
+
if (this.dateProvider.now() - startTime > this.signingTimeoutMs) {
|
|
123
136
|
this.log.warn(`Timeout waiting for signing to complete for duty ${dutyType} at slot ${slot}`, {
|
|
124
137
|
validatorAddress: validatorAddress.toString(),
|
|
125
138
|
timeoutMs: this.signingTimeoutMs,
|
|
126
139
|
signingNodeId: record.nodeId,
|
|
127
140
|
});
|
|
141
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
128
142
|
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, 'unknown (timeout)');
|
|
129
143
|
}
|
|
130
144
|
|
|
@@ -149,10 +163,11 @@ export class SlashingProtectionService {
|
|
|
149
163
|
* @returns true if the update succeeded, false if token didn't match
|
|
150
164
|
*/
|
|
151
165
|
async recordSuccess(params: RecordSuccessParams): Promise<boolean> {
|
|
152
|
-
const { validatorAddress, slot, dutyType, signature, nodeId, lockToken } = params;
|
|
166
|
+
const { rollupAddress, validatorAddress, slot, dutyType, signature, nodeId, lockToken } = params;
|
|
153
167
|
const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
|
|
154
168
|
|
|
155
169
|
const success = await this.db.updateDutySigned(
|
|
170
|
+
rollupAddress,
|
|
156
171
|
validatorAddress,
|
|
157
172
|
slot,
|
|
158
173
|
dutyType,
|
|
@@ -162,7 +177,7 @@ export class SlashingProtectionService {
|
|
|
162
177
|
);
|
|
163
178
|
|
|
164
179
|
if (success) {
|
|
165
|
-
this.log.
|
|
180
|
+
this.log.verbose(`Recorded successful signing for duty ${dutyType} at slot ${slot}`, {
|
|
166
181
|
validatorAddress: validatorAddress.toString(),
|
|
167
182
|
nodeId,
|
|
168
183
|
});
|
|
@@ -184,10 +199,17 @@ export class SlashingProtectionService {
|
|
|
184
199
|
* @returns true if the delete succeeded, false if token didn't match
|
|
185
200
|
*/
|
|
186
201
|
async deleteDuty(params: DeleteDutyParams): Promise<boolean> {
|
|
187
|
-
const { validatorAddress, slot, dutyType, lockToken } = params;
|
|
202
|
+
const { rollupAddress, validatorAddress, slot, dutyType, lockToken } = params;
|
|
188
203
|
const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
|
|
189
204
|
|
|
190
|
-
const success = await this.db.deleteDuty(
|
|
205
|
+
const success = await this.db.deleteDuty(
|
|
206
|
+
rollupAddress,
|
|
207
|
+
validatorAddress,
|
|
208
|
+
slot,
|
|
209
|
+
dutyType,
|
|
210
|
+
lockToken,
|
|
211
|
+
blockIndexWithinCheckpoint,
|
|
212
|
+
);
|
|
191
213
|
|
|
192
214
|
if (success) {
|
|
193
215
|
this.log.info(`Deleted duty ${dutyType} at slot ${slot} to allow retry`, {
|
|
@@ -213,7 +235,20 @@ export class SlashingProtectionService {
|
|
|
213
235
|
* Start running tasks.
|
|
214
236
|
* Cleanup runs immediately on start to recover from any previous crashes.
|
|
215
237
|
*/
|
|
216
|
-
|
|
238
|
+
/**
|
|
239
|
+
* Start the background cleanup task.
|
|
240
|
+
* Also performs one-time cleanup of duties with outdated rollup addresses.
|
|
241
|
+
*/
|
|
242
|
+
async start() {
|
|
243
|
+
// One-time cleanup at startup: remove duties from previous rollup versions
|
|
244
|
+
const numOutdatedRollupDuties = await this.db.cleanupOutdatedRollupDuties(this.config.l1Contracts.rollupAddress);
|
|
245
|
+
if (numOutdatedRollupDuties > 0) {
|
|
246
|
+
this.log.info(`Cleaned up ${numOutdatedRollupDuties} duties with outdated rollup address at startup`, {
|
|
247
|
+
currentRollupAddress: this.config.l1Contracts.rollupAddress.toString(),
|
|
248
|
+
});
|
|
249
|
+
this.metrics.recordCleanup('outdated_rollup', numOutdatedRollupDuties);
|
|
250
|
+
}
|
|
251
|
+
|
|
217
252
|
this.cleanupRunningPromise.start();
|
|
218
253
|
this.log.info('Slashing protection service started', { nodeId: this.config.nodeId });
|
|
219
254
|
}
|
|
@@ -236,15 +271,38 @@ export class SlashingProtectionService {
|
|
|
236
271
|
}
|
|
237
272
|
|
|
238
273
|
/**
|
|
239
|
-
*
|
|
274
|
+
* Periodic cleanup of stuck duties and optionally old signed duties.
|
|
275
|
+
* Runs in the background via RunningPromise.
|
|
240
276
|
*/
|
|
241
|
-
private async
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
277
|
+
private async cleanup() {
|
|
278
|
+
// 1. Clean up stuck duties (our own node's duties that got stuck in 'signing' status)
|
|
279
|
+
const numStuckDuties = await this.db.cleanupOwnStuckDuties(this.config.nodeId, this.maxStuckDutiesAgeMs);
|
|
280
|
+
if (numStuckDuties > 0) {
|
|
281
|
+
this.log.verbose(`Cleaned up ${numStuckDuties} stuck duties`, {
|
|
245
282
|
nodeId: this.config.nodeId,
|
|
246
283
|
maxStuckDutiesAgeMs: this.maxStuckDutiesAgeMs,
|
|
247
284
|
});
|
|
285
|
+
this.metrics.recordCleanup('stuck', numStuckDuties);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// 2. Clean up old signed duties if configured
|
|
289
|
+
// we shouldn't run this as often as stuck duty cleanup.
|
|
290
|
+
if (this.config.cleanupOldDutiesAfterHours !== undefined) {
|
|
291
|
+
const maxAgeMs = this.config.cleanupOldDutiesAfterHours * 60 * 60 * 1000;
|
|
292
|
+
const nowMs = this.dateProvider.now();
|
|
293
|
+
const shouldRun =
|
|
294
|
+
this.lastOldDutiesCleanupAtMs === undefined || nowMs - this.lastOldDutiesCleanupAtMs >= maxAgeMs;
|
|
295
|
+
if (shouldRun) {
|
|
296
|
+
const numOldDuties = await this.db.cleanupOldDuties(maxAgeMs);
|
|
297
|
+
this.lastOldDutiesCleanupAtMs = nowMs;
|
|
298
|
+
if (numOldDuties > 0) {
|
|
299
|
+
this.log.verbose(`Cleaned up ${numOldDuties} old signed duties`, {
|
|
300
|
+
cleanupOldDutiesAfterHours: this.config.cleanupOldDutiesAfterHours,
|
|
301
|
+
maxAgeMs,
|
|
302
|
+
});
|
|
303
|
+
this.metrics.recordCleanup('old', numOldDuties);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
248
306
|
}
|
|
249
307
|
}
|
|
250
308
|
}
|