@aztec/validator-ha-signer 0.0.1-commit.8afd444 → 0.0.1-commit.8ee97c858
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/dest/db/index.d.ts +2 -1
- package/dest/db/index.d.ts.map +1 -1
- package/dest/db/index.js +1 -0
- package/dest/db/lmdb.d.ts +66 -0
- package/dest/db/lmdb.d.ts.map +1 -0
- package/dest/db/lmdb.js +188 -0
- package/dest/db/postgres.d.ts +4 -2
- package/dest/db/postgres.d.ts.map +1 -1
- package/dest/db/postgres.js +15 -17
- package/dest/db/schema.d.ts +5 -4
- package/dest/db/schema.d.ts.map +1 -1
- package/dest/db/schema.js +4 -3
- package/dest/db/types.d.ts +37 -18
- package/dest/db/types.d.ts.map +1 -1
- package/dest/db/types.js +30 -15
- package/dest/factory.d.ts +39 -4
- package/dest/factory.d.ts.map +1 -1
- package/dest/factory.js +75 -5
- package/dest/metrics.d.ts +51 -0
- package/dest/metrics.d.ts.map +1 -0
- package/dest/metrics.js +103 -0
- package/dest/slashing_protection_service.d.ts +12 -3
- package/dest/slashing_protection_service.d.ts.map +1 -1
- package/dest/slashing_protection_service.js +17 -6
- package/dest/types.d.ts +17 -70
- package/dest/types.d.ts.map +1 -1
- package/dest/types.js +3 -20
- package/dest/validator_ha_signer.d.ts +12 -4
- package/dest/validator_ha_signer.d.ts.map +1 -1
- package/dest/validator_ha_signer.js +16 -8
- package/package.json +10 -6
- package/src/db/index.ts +1 -0
- package/src/db/lmdb.ts +264 -0
- package/src/db/postgres.ts +15 -15
- package/src/db/schema.ts +4 -3
- package/src/db/types.ts +61 -16
- package/src/factory.ts +93 -4
- package/src/metrics.ts +138 -0
- package/src/slashing_protection_service.ts +28 -7
- package/src/types.ts +32 -104
- package/src/validator_ha_signer.ts +33 -12
- package/dest/config.d.ts +0 -101
- package/dest/config.d.ts.map +0 -1
- package/dest/config.js +0 -92
- package/src/config.ts +0 -149
package/src/db/postgres.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PostgreSQL implementation of SlashingProtectionDatabase
|
|
3
3
|
*/
|
|
4
|
-
import {
|
|
4
|
+
import { SlotNumber } from '@aztec/foundation/branded-types';
|
|
5
5
|
import { randomBytes } from '@aztec/foundation/crypto/random';
|
|
6
6
|
import { EthAddress } from '@aztec/foundation/eth-address';
|
|
7
7
|
import { type Logger, createLogger } from '@aztec/foundation/log';
|
|
@@ -20,7 +20,7 @@ import {
|
|
|
20
20
|
UPDATE_DUTY_SIGNED,
|
|
21
21
|
} from './schema.js';
|
|
22
22
|
import type { CheckAndRecordParams, DutyRow, DutyType, InsertOrGetRow, ValidatorDutyRecord } from './types.js';
|
|
23
|
-
import { getBlockIndexFromDutyIdentifier } from './types.js';
|
|
23
|
+
import { getBlockIndexFromDutyIdentifier, recordFromFields } from './types.js';
|
|
24
24
|
|
|
25
25
|
/**
|
|
26
26
|
* Minimal pool interface for database operations.
|
|
@@ -220,14 +220,16 @@ export class PostgresSlashingProtectionDatabase implements SlashingProtectionDat
|
|
|
220
220
|
}
|
|
221
221
|
|
|
222
222
|
/**
|
|
223
|
-
* Convert a database row to a ValidatorDutyRecord
|
|
223
|
+
* Convert a database row to a ValidatorDutyRecord.
|
|
224
|
+
* Maps snake_case column names to StoredDutyRecord (camelCase, ms timestamps),
|
|
225
|
+
* then delegates to the shared recordFromFields() converter.
|
|
224
226
|
*/
|
|
225
227
|
private rowToRecord(row: DutyRow): ValidatorDutyRecord {
|
|
226
|
-
return {
|
|
227
|
-
rollupAddress:
|
|
228
|
-
validatorAddress:
|
|
229
|
-
slot:
|
|
230
|
-
blockNumber:
|
|
228
|
+
return recordFromFields({
|
|
229
|
+
rollupAddress: row.rollup_address,
|
|
230
|
+
validatorAddress: row.validator_address,
|
|
231
|
+
slot: row.slot,
|
|
232
|
+
blockNumber: row.block_number,
|
|
231
233
|
blockIndexWithinCheckpoint: row.block_index_within_checkpoint,
|
|
232
234
|
dutyType: row.duty_type,
|
|
233
235
|
status: row.status,
|
|
@@ -235,10 +237,10 @@ export class PostgresSlashingProtectionDatabase implements SlashingProtectionDat
|
|
|
235
237
|
signature: row.signature ?? undefined,
|
|
236
238
|
nodeId: row.node_id,
|
|
237
239
|
lockToken: row.lock_token,
|
|
238
|
-
|
|
239
|
-
|
|
240
|
+
startedAtMs: row.started_at.getTime(),
|
|
241
|
+
completedAtMs: row.completed_at?.getTime(),
|
|
240
242
|
errorMessage: row.error_message ?? undefined,
|
|
241
|
-
};
|
|
243
|
+
});
|
|
242
244
|
}
|
|
243
245
|
|
|
244
246
|
/**
|
|
@@ -254,8 +256,7 @@ export class PostgresSlashingProtectionDatabase implements SlashingProtectionDat
|
|
|
254
256
|
* @returns the number of duties cleaned up
|
|
255
257
|
*/
|
|
256
258
|
async cleanupOwnStuckDuties(nodeId: string, maxAgeMs: number): Promise<number> {
|
|
257
|
-
const
|
|
258
|
-
const result = await this.pool.query(CLEANUP_OWN_STUCK_DUTIES, [nodeId, cutoff]);
|
|
259
|
+
const result = await this.pool.query(CLEANUP_OWN_STUCK_DUTIES, [nodeId, maxAgeMs]);
|
|
259
260
|
return result.rowCount ?? 0;
|
|
260
261
|
}
|
|
261
262
|
|
|
@@ -277,8 +278,7 @@ export class PostgresSlashingProtectionDatabase implements SlashingProtectionDat
|
|
|
277
278
|
* @returns the number of duties cleaned up
|
|
278
279
|
*/
|
|
279
280
|
async cleanupOldDuties(maxAgeMs: number): Promise<number> {
|
|
280
|
-
const
|
|
281
|
-
const result = await this.pool.query(CLEANUP_OLD_DUTIES, [cutoff]);
|
|
281
|
+
const result = await this.pool.query(CLEANUP_OLD_DUTIES, [maxAgeMs]);
|
|
282
282
|
return result.rowCount ?? 0;
|
|
283
283
|
}
|
|
284
284
|
}
|
package/src/db/schema.ts
CHANGED
|
@@ -203,23 +203,24 @@ WHERE status = 'signed'
|
|
|
203
203
|
|
|
204
204
|
/**
|
|
205
205
|
* Query to clean up old duties (for maintenance)
|
|
206
|
-
* Removes SIGNED duties older than a specified
|
|
206
|
+
* Removes SIGNED duties older than a specified age (in milliseconds)
|
|
207
207
|
*/
|
|
208
208
|
export const CLEANUP_OLD_DUTIES = `
|
|
209
209
|
DELETE FROM validator_duties
|
|
210
210
|
WHERE status = 'signed'
|
|
211
|
-
AND started_at < $1;
|
|
211
|
+
AND started_at < CURRENT_TIMESTAMP - ($1 || ' milliseconds')::INTERVAL;
|
|
212
212
|
`;
|
|
213
213
|
|
|
214
214
|
/**
|
|
215
215
|
* Query to cleanup own stuck duties
|
|
216
216
|
* Removes duties in 'signing' status for a specific node that are older than maxAgeMs
|
|
217
|
+
* Uses DB's CURRENT_TIMESTAMP to avoid clock skew issues between nodes
|
|
217
218
|
*/
|
|
218
219
|
export const CLEANUP_OWN_STUCK_DUTIES = `
|
|
219
220
|
DELETE FROM validator_duties
|
|
220
221
|
WHERE node_id = $1
|
|
221
222
|
AND status = 'signing'
|
|
222
|
-
AND started_at < $2;
|
|
223
|
+
AND started_at < CURRENT_TIMESTAMP - ($2 || ' milliseconds')::INTERVAL;
|
|
223
224
|
`;
|
|
224
225
|
|
|
225
226
|
/**
|
package/src/db/types.ts
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
BlockNumber,
|
|
3
|
+
type CheckpointNumber,
|
|
4
|
+
type IndexWithinCheckpoint,
|
|
5
|
+
SlotNumber,
|
|
6
|
+
} from '@aztec/foundation/branded-types';
|
|
7
|
+
import { EthAddress } from '@aztec/foundation/eth-address';
|
|
3
8
|
import type { Signature } from '@aztec/foundation/eth-signature';
|
|
9
|
+
import { DutyType } from '@aztec/stdlib/ha-signing';
|
|
4
10
|
|
|
5
11
|
/**
|
|
6
12
|
* Row type from PostgreSQL query
|
|
@@ -23,24 +29,34 @@ export interface DutyRow {
|
|
|
23
29
|
}
|
|
24
30
|
|
|
25
31
|
/**
|
|
26
|
-
*
|
|
32
|
+
* Plain-primitive representation of a duty record suitable for serialization
|
|
33
|
+
* (e.g. msgpackr for LMDB). All domain types are stored as their string/number
|
|
34
|
+
* equivalents. Timestamps are Unix milliseconds.
|
|
27
35
|
*/
|
|
28
|
-
export interface
|
|
29
|
-
|
|
36
|
+
export interface StoredDutyRecord {
|
|
37
|
+
rollupAddress: string;
|
|
38
|
+
validatorAddress: string;
|
|
39
|
+
slot: string;
|
|
40
|
+
blockNumber: string;
|
|
41
|
+
blockIndexWithinCheckpoint: number;
|
|
42
|
+
dutyType: DutyType;
|
|
43
|
+
status: DutyStatus;
|
|
44
|
+
messageHash: string;
|
|
45
|
+
signature?: string;
|
|
46
|
+
nodeId: string;
|
|
47
|
+
lockToken: string;
|
|
48
|
+
/** Unix timestamp in milliseconds when signing started */
|
|
49
|
+
startedAtMs: number;
|
|
50
|
+
/** Unix timestamp in milliseconds when signing completed */
|
|
51
|
+
completedAtMs?: number;
|
|
52
|
+
errorMessage?: string;
|
|
30
53
|
}
|
|
31
54
|
|
|
32
55
|
/**
|
|
33
|
-
*
|
|
56
|
+
* Row type from INSERT_OR_GET_DUTY query (includes is_new flag)
|
|
34
57
|
*/
|
|
35
|
-
export
|
|
36
|
-
|
|
37
|
-
CHECKPOINT_PROPOSAL = 'CHECKPOINT_PROPOSAL',
|
|
38
|
-
ATTESTATION = 'ATTESTATION',
|
|
39
|
-
ATTESTATIONS_AND_SIGNERS = 'ATTESTATIONS_AND_SIGNERS',
|
|
40
|
-
GOVERNANCE_VOTE = 'GOVERNANCE_VOTE',
|
|
41
|
-
SLASHING_VOTE = 'SLASHING_VOTE',
|
|
42
|
-
AUTH_REQUEST = 'AUTH_REQUEST',
|
|
43
|
-
TXS = 'TXS',
|
|
58
|
+
export interface InsertOrGetRow extends DutyRow {
|
|
59
|
+
is_new: boolean;
|
|
44
60
|
}
|
|
45
61
|
|
|
46
62
|
/**
|
|
@@ -51,8 +67,12 @@ export enum DutyStatus {
|
|
|
51
67
|
SIGNED = 'signed',
|
|
52
68
|
}
|
|
53
69
|
|
|
70
|
+
// Re-export DutyType from stdlib
|
|
71
|
+
export { DutyType };
|
|
72
|
+
|
|
54
73
|
/**
|
|
55
|
-
*
|
|
74
|
+
* Rich representation of a validator duty, with branded types and Date objects.
|
|
75
|
+
* This is the common output type returned by all SlashingProtectionDatabase implementations.
|
|
56
76
|
*/
|
|
57
77
|
export interface ValidatorDutyRecord {
|
|
58
78
|
/** Ethereum address of the rollup contract */
|
|
@@ -85,6 +105,31 @@ export interface ValidatorDutyRecord {
|
|
|
85
105
|
errorMessage?: string;
|
|
86
106
|
}
|
|
87
107
|
|
|
108
|
+
/**
|
|
109
|
+
* Convert a {@link StoredDutyRecord} (plain-primitive wire format) to a
|
|
110
|
+
* {@link ValidatorDutyRecord} (rich domain type).
|
|
111
|
+
*
|
|
112
|
+
* Shared by LMDB and any future non-Postgres backend implementations.
|
|
113
|
+
*/
|
|
114
|
+
export function recordFromFields(stored: StoredDutyRecord): ValidatorDutyRecord {
|
|
115
|
+
return {
|
|
116
|
+
rollupAddress: EthAddress.fromString(stored.rollupAddress),
|
|
117
|
+
validatorAddress: EthAddress.fromString(stored.validatorAddress),
|
|
118
|
+
slot: SlotNumber.fromString(stored.slot),
|
|
119
|
+
blockNumber: BlockNumber.fromString(stored.blockNumber),
|
|
120
|
+
blockIndexWithinCheckpoint: stored.blockIndexWithinCheckpoint,
|
|
121
|
+
dutyType: stored.dutyType,
|
|
122
|
+
status: stored.status,
|
|
123
|
+
messageHash: stored.messageHash,
|
|
124
|
+
signature: stored.signature,
|
|
125
|
+
nodeId: stored.nodeId,
|
|
126
|
+
lockToken: stored.lockToken,
|
|
127
|
+
startedAt: new Date(stored.startedAtMs),
|
|
128
|
+
completedAt: stored.completedAtMs !== undefined ? new Date(stored.completedAtMs) : undefined,
|
|
129
|
+
errorMessage: stored.errorMessage,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
88
133
|
/**
|
|
89
134
|
* Duty identifier for block proposals.
|
|
90
135
|
* blockIndexWithinCheckpoint is REQUIRED and must be >= 0.
|
package/src/factory.ts
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Factory functions for creating validator HA signers
|
|
3
3
|
*/
|
|
4
|
+
import { DateProvider } from '@aztec/foundation/timer';
|
|
5
|
+
import { createStore } from '@aztec/kv-store/lmdb-v2';
|
|
6
|
+
import type { LocalSignerConfig, ValidatorHASignerConfig } from '@aztec/stdlib/ha-signing';
|
|
7
|
+
import { getTelemetryClient } from '@aztec/telemetry-client';
|
|
8
|
+
|
|
4
9
|
import { Pool } from 'pg';
|
|
5
10
|
|
|
6
|
-
import
|
|
11
|
+
import { LmdbSlashingProtectionDatabase } from './db/lmdb.js';
|
|
7
12
|
import { PostgresSlashingProtectionDatabase } from './db/postgres.js';
|
|
8
|
-
import
|
|
13
|
+
import { HASignerMetrics } from './metrics.js';
|
|
14
|
+
import type { CreateHASignerDeps, CreateLocalSignerWithProtectionDeps, SlashingProtectionDatabase } from './types.js';
|
|
9
15
|
import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
10
16
|
|
|
11
17
|
/**
|
|
@@ -23,7 +29,6 @@ import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
|
23
29
|
* ```typescript
|
|
24
30
|
* const { signer, db } = await createHASigner({
|
|
25
31
|
* databaseUrl: process.env.DATABASE_URL,
|
|
26
|
-
* haSigningEnabled: true,
|
|
27
32
|
* nodeId: 'validator-node-1',
|
|
28
33
|
* pollingIntervalMs: 100,
|
|
29
34
|
* signingTimeoutMs: 3000,
|
|
@@ -55,6 +60,10 @@ export async function createHASigner(
|
|
|
55
60
|
if (!databaseUrl) {
|
|
56
61
|
throw new Error('databaseUrl is required for createHASigner');
|
|
57
62
|
}
|
|
63
|
+
|
|
64
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
65
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
66
|
+
|
|
58
67
|
// Create connection pool (or use provided pool)
|
|
59
68
|
let pool: Pool;
|
|
60
69
|
if (!deps?.pool) {
|
|
@@ -75,8 +84,88 @@ export async function createHASigner(
|
|
|
75
84
|
// Verify database schema is initialized and version matches
|
|
76
85
|
await db.initialize();
|
|
77
86
|
|
|
87
|
+
// Create metrics
|
|
88
|
+
const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId);
|
|
89
|
+
|
|
78
90
|
// Create signer
|
|
79
|
-
const signer = new ValidatorHASigner(db, {
|
|
91
|
+
const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
|
|
80
92
|
|
|
81
93
|
return { signer, db };
|
|
82
94
|
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Create a local (single-node) signing protection signer backed by LMDB.
|
|
98
|
+
*
|
|
99
|
+
* This provides double-signing protection for nodes that are NOT running in a
|
|
100
|
+
* high-availability (multi-node) setup. It prevents a proposer from sending two
|
|
101
|
+
* proposals for the same slot if the node crashes and restarts mid-proposal.
|
|
102
|
+
*
|
|
103
|
+
* When `config.dataDirectory` is set, the protection database is persisted to disk
|
|
104
|
+
* and survives crashes/restarts. When unset, an ephemeral in-memory store is
|
|
105
|
+
* used which protects within a single run but not across restarts.
|
|
106
|
+
*
|
|
107
|
+
* @param config - Local signer config
|
|
108
|
+
* @param deps - Optional dependencies (telemetry, date provider).
|
|
109
|
+
* @returns An object containing the signer and database instances.
|
|
110
|
+
*/
|
|
111
|
+
export async function createLocalSignerWithProtection(
|
|
112
|
+
config: LocalSignerConfig,
|
|
113
|
+
deps?: CreateLocalSignerWithProtectionDeps,
|
|
114
|
+
): Promise<{
|
|
115
|
+
signer: ValidatorHASigner;
|
|
116
|
+
db: SlashingProtectionDatabase;
|
|
117
|
+
}> {
|
|
118
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
119
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
120
|
+
|
|
121
|
+
const kvStore = await createStore('signing-protection', LmdbSlashingProtectionDatabase.SCHEMA_VERSION, {
|
|
122
|
+
dataDirectory: config.dataDirectory,
|
|
123
|
+
dataStoreMapSizeKb: config.signingProtectionMapSizeKb ?? config.dataStoreMapSizeKb,
|
|
124
|
+
l1Contracts: config.l1Contracts,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
const db = new LmdbSlashingProtectionDatabase(kvStore, dateProvider);
|
|
128
|
+
|
|
129
|
+
const signerConfig = {
|
|
130
|
+
...config,
|
|
131
|
+
nodeId: config.nodeId || 'local',
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId, 'LocalSigningProtectionMetrics');
|
|
135
|
+
|
|
136
|
+
const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
|
|
137
|
+
|
|
138
|
+
return { signer, db };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Create an in-memory LMDB-backed SlashingProtectionDatabase that can be shared across
|
|
143
|
+
* multiple validator nodes in the same process. Used for testing HA setups.
|
|
144
|
+
*/
|
|
145
|
+
export async function createSharedSlashingProtectionDb(
|
|
146
|
+
dateProvider: DateProvider = new DateProvider(),
|
|
147
|
+
): Promise<SlashingProtectionDatabase> {
|
|
148
|
+
const kvStore = await createStore('shared-signing-protection', LmdbSlashingProtectionDatabase.SCHEMA_VERSION, {
|
|
149
|
+
dataStoreMapSizeKb: 1024 * 1024,
|
|
150
|
+
});
|
|
151
|
+
return new LmdbSlashingProtectionDatabase(kvStore, dateProvider);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Create a ValidatorHASigner backed by a pre-existing SlashingProtectionDatabase.
|
|
156
|
+
* Used for testing HA setups where multiple nodes share the same protection database.
|
|
157
|
+
*/
|
|
158
|
+
export function createSignerFromSharedDb(
|
|
159
|
+
db: SlashingProtectionDatabase,
|
|
160
|
+
config: Pick<
|
|
161
|
+
ValidatorHASignerConfig,
|
|
162
|
+
'nodeId' | 'pollingIntervalMs' | 'signingTimeoutMs' | 'maxStuckDutiesAgeMs' | 'l1Contracts'
|
|
163
|
+
>,
|
|
164
|
+
deps?: CreateLocalSignerWithProtectionDeps,
|
|
165
|
+
): { signer: ValidatorHASigner; db: SlashingProtectionDatabase } {
|
|
166
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
167
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
168
|
+
const metrics = new HASignerMetrics(telemetryClient, config.nodeId, 'SharedSigningProtectionMetrics');
|
|
169
|
+
const signer = new ValidatorHASigner(db, config, { metrics, dateProvider });
|
|
170
|
+
return { signer, db };
|
|
171
|
+
}
|
package/src/metrics.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Attributes,
|
|
3
|
+
type Histogram,
|
|
4
|
+
Metrics,
|
|
5
|
+
type TelemetryClient,
|
|
6
|
+
type UpDownCounter,
|
|
7
|
+
createUpDownCounterWithDefault,
|
|
8
|
+
} from '@aztec/telemetry-client';
|
|
9
|
+
|
|
10
|
+
export type HACleanupType = 'stuck' | 'old' | 'outdated_rollup';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Metrics for HA signer tracking signing operations, lock acquisition, and cleanup.
|
|
14
|
+
*/
|
|
15
|
+
export class HASignerMetrics {
|
|
16
|
+
// Signing lifecycle metrics
|
|
17
|
+
private signingDuration: Histogram;
|
|
18
|
+
private signingSuccessCount: UpDownCounter;
|
|
19
|
+
private dutyAlreadySignedCount: UpDownCounter;
|
|
20
|
+
private slashingProtectionCount: UpDownCounter;
|
|
21
|
+
private signingErrorCount: UpDownCounter;
|
|
22
|
+
|
|
23
|
+
// Lock acquisition metrics
|
|
24
|
+
private lockAcquiredCount: UpDownCounter;
|
|
25
|
+
|
|
26
|
+
// Cleanup metrics
|
|
27
|
+
private cleanupStuckDutiesCount: UpDownCounter;
|
|
28
|
+
private cleanupOldDutiesCount: UpDownCounter;
|
|
29
|
+
private cleanupOutdatedRollupDutiesCount: UpDownCounter;
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
client: TelemetryClient,
|
|
33
|
+
private nodeId: string,
|
|
34
|
+
name = 'HASignerMetrics',
|
|
35
|
+
) {
|
|
36
|
+
const meter = client.getMeter(name);
|
|
37
|
+
|
|
38
|
+
// Signing lifecycle
|
|
39
|
+
this.signingDuration = meter.createHistogram(Metrics.HA_SIGNER_SIGNING_DURATION);
|
|
40
|
+
this.signingSuccessCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_SUCCESS_COUNT);
|
|
41
|
+
this.dutyAlreadySignedCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_DUTY_ALREADY_SIGNED_COUNT);
|
|
42
|
+
this.slashingProtectionCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SLASHING_PROTECTION_COUNT);
|
|
43
|
+
this.signingErrorCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_ERROR_COUNT);
|
|
44
|
+
|
|
45
|
+
// Lock acquisition
|
|
46
|
+
this.lockAcquiredCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_LOCK_ACQUIRED_COUNT);
|
|
47
|
+
|
|
48
|
+
// Cleanup
|
|
49
|
+
this.cleanupStuckDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_STUCK_DUTIES_COUNT);
|
|
50
|
+
this.cleanupOldDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_OLD_DUTIES_COUNT);
|
|
51
|
+
this.cleanupOutdatedRollupDutiesCount = createUpDownCounterWithDefault(
|
|
52
|
+
meter,
|
|
53
|
+
Metrics.HA_SIGNER_CLEANUP_OUTDATED_ROLLUP_DUTIES_COUNT,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Record a successful signing operation.
|
|
59
|
+
* @param dutyType - The type of duty signed
|
|
60
|
+
* @param durationMs - Duration from start of signWithProtection to completion
|
|
61
|
+
*/
|
|
62
|
+
public recordSigningSuccess(dutyType: string, durationMs: number): void {
|
|
63
|
+
const attributes = {
|
|
64
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
65
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
66
|
+
};
|
|
67
|
+
this.signingSuccessCount.add(1, attributes);
|
|
68
|
+
this.signingDuration.record(durationMs, attributes);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Record a DutyAlreadySignedError (expected in HA; another node signed first).
|
|
73
|
+
* @param dutyType - The type of duty
|
|
74
|
+
*/
|
|
75
|
+
public recordDutyAlreadySigned(dutyType: string): void {
|
|
76
|
+
const attributes = {
|
|
77
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
78
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
79
|
+
};
|
|
80
|
+
this.dutyAlreadySignedCount.add(1, attributes);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Record a SlashingProtectionError (attempted to sign different data for same duty).
|
|
85
|
+
* @param dutyType - The type of duty
|
|
86
|
+
*/
|
|
87
|
+
public recordSlashingProtection(dutyType: string): void {
|
|
88
|
+
const attributes = {
|
|
89
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
90
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
91
|
+
};
|
|
92
|
+
this.slashingProtectionCount.add(1, attributes);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Record a signing function failure (lock will be deleted for retry).
|
|
97
|
+
* @param dutyType - The type of duty
|
|
98
|
+
*/
|
|
99
|
+
public recordSigningError(dutyType: string): void {
|
|
100
|
+
const attributes = {
|
|
101
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
102
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
103
|
+
};
|
|
104
|
+
this.signingErrorCount.add(1, attributes);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Record lock acquisition.
|
|
109
|
+
* @param acquired - Whether a new lock was acquired (true) or existing record found (false)
|
|
110
|
+
*/
|
|
111
|
+
public recordLockAcquire(acquired: boolean): void {
|
|
112
|
+
if (acquired) {
|
|
113
|
+
const attributes = {
|
|
114
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
115
|
+
};
|
|
116
|
+
this.lockAcquiredCount.add(1, attributes);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Record cleanup metrics.
|
|
122
|
+
* @param type - Type of cleanup
|
|
123
|
+
* @param count - Number of duties cleaned up
|
|
124
|
+
*/
|
|
125
|
+
public recordCleanup(type: HACleanupType, count: number): void {
|
|
126
|
+
const attributes = {
|
|
127
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (type === 'stuck') {
|
|
131
|
+
this.cleanupStuckDutiesCount.add(count, attributes);
|
|
132
|
+
} else if (type === 'old') {
|
|
133
|
+
this.cleanupOldDutiesCount.add(count, attributes);
|
|
134
|
+
} else if (type === 'outdated_rollup') {
|
|
135
|
+
this.cleanupOutdatedRollupDutiesCount.add(count, attributes);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import { type Logger, createLogger } from '@aztec/foundation/log';
|
|
8
8
|
import { RunningPromise } from '@aztec/foundation/promise';
|
|
9
9
|
import { sleep } from '@aztec/foundation/sleep';
|
|
10
|
+
import type { DateProvider } from '@aztec/foundation/timer';
|
|
11
|
+
import type { BaseSignerConfig } from '@aztec/stdlib/ha-signing';
|
|
10
12
|
|
|
11
13
|
import {
|
|
12
14
|
type CheckAndRecordParams,
|
|
@@ -16,7 +18,13 @@ import {
|
|
|
16
18
|
getBlockIndexFromDutyIdentifier,
|
|
17
19
|
} from './db/types.js';
|
|
18
20
|
import { DutyAlreadySignedError, SlashingProtectionError } from './errors.js';
|
|
19
|
-
import type {
|
|
21
|
+
import type { HASignerMetrics } from './metrics.js';
|
|
22
|
+
import type { SlashingProtectionDatabase } from './types.js';
|
|
23
|
+
|
|
24
|
+
export interface SlashingProtectionServiceDeps {
|
|
25
|
+
metrics: HASignerMetrics;
|
|
26
|
+
dateProvider: DateProvider;
|
|
27
|
+
}
|
|
20
28
|
|
|
21
29
|
/**
|
|
22
30
|
* Slashing Protection Service
|
|
@@ -39,12 +47,16 @@ export class SlashingProtectionService {
|
|
|
39
47
|
private readonly signingTimeoutMs: number;
|
|
40
48
|
private readonly maxStuckDutiesAgeMs: number;
|
|
41
49
|
|
|
50
|
+
private readonly metrics: HASignerMetrics;
|
|
51
|
+
private readonly dateProvider: DateProvider;
|
|
52
|
+
|
|
42
53
|
private cleanupRunningPromise: RunningPromise;
|
|
43
54
|
private lastOldDutiesCleanupAtMs?: number;
|
|
44
55
|
|
|
45
56
|
constructor(
|
|
46
57
|
private readonly db: SlashingProtectionDatabase,
|
|
47
|
-
private readonly config:
|
|
58
|
+
private readonly config: BaseSignerConfig,
|
|
59
|
+
deps: SlashingProtectionServiceDeps,
|
|
48
60
|
) {
|
|
49
61
|
this.log = createLogger('slashing-protection');
|
|
50
62
|
this.pollingIntervalMs = config.pollingIntervalMs;
|
|
@@ -53,6 +65,8 @@ export class SlashingProtectionService {
|
|
|
53
65
|
this.maxStuckDutiesAgeMs = config.maxStuckDutiesAgeMs ?? 144_000;
|
|
54
66
|
|
|
55
67
|
this.cleanupRunningPromise = new RunningPromise(this.cleanup.bind(this), this.log, this.maxStuckDutiesAgeMs);
|
|
68
|
+
this.metrics = deps.metrics;
|
|
69
|
+
this.dateProvider = deps.dateProvider;
|
|
56
70
|
}
|
|
57
71
|
|
|
58
72
|
/**
|
|
@@ -72,7 +86,7 @@ export class SlashingProtectionService {
|
|
|
72
86
|
*/
|
|
73
87
|
async checkAndRecord(params: CheckAndRecordParams): Promise<string> {
|
|
74
88
|
const { validatorAddress, slot, dutyType, messageHash, nodeId } = params;
|
|
75
|
-
const startTime =
|
|
89
|
+
const startTime = this.dateProvider.now();
|
|
76
90
|
|
|
77
91
|
this.log.debug(`Checking duty: ${dutyType} for slot ${slot}`, {
|
|
78
92
|
validatorAddress: validatorAddress.toString(),
|
|
@@ -85,10 +99,11 @@ export class SlashingProtectionService {
|
|
|
85
99
|
|
|
86
100
|
if (isNew) {
|
|
87
101
|
// We successfully acquired the lock
|
|
88
|
-
this.log.
|
|
102
|
+
this.log.verbose(`Acquired lock for duty ${dutyType} at slot ${slot}`, {
|
|
89
103
|
validatorAddress: validatorAddress.toString(),
|
|
90
104
|
nodeId,
|
|
91
105
|
});
|
|
106
|
+
this.metrics.recordLockAcquire(true);
|
|
92
107
|
return record.lockToken;
|
|
93
108
|
}
|
|
94
109
|
|
|
@@ -103,6 +118,7 @@ export class SlashingProtectionService {
|
|
|
103
118
|
existingNodeId: record.nodeId,
|
|
104
119
|
attemptingNodeId: nodeId,
|
|
105
120
|
});
|
|
121
|
+
this.metrics.recordSlashingProtection(dutyType);
|
|
106
122
|
throw new SlashingProtectionError(
|
|
107
123
|
slot,
|
|
108
124
|
dutyType,
|
|
@@ -112,15 +128,17 @@ export class SlashingProtectionService {
|
|
|
112
128
|
record.nodeId,
|
|
113
129
|
);
|
|
114
130
|
}
|
|
131
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
115
132
|
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, record.nodeId);
|
|
116
133
|
} else if (record.status === DutyStatus.SIGNING) {
|
|
117
134
|
// Another node is currently signing - check for timeout
|
|
118
|
-
if (
|
|
135
|
+
if (this.dateProvider.now() - startTime > this.signingTimeoutMs) {
|
|
119
136
|
this.log.warn(`Timeout waiting for signing to complete for duty ${dutyType} at slot ${slot}`, {
|
|
120
137
|
validatorAddress: validatorAddress.toString(),
|
|
121
138
|
timeoutMs: this.signingTimeoutMs,
|
|
122
139
|
signingNodeId: record.nodeId,
|
|
123
140
|
});
|
|
141
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
124
142
|
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, 'unknown (timeout)');
|
|
125
143
|
}
|
|
126
144
|
|
|
@@ -159,7 +177,7 @@ export class SlashingProtectionService {
|
|
|
159
177
|
);
|
|
160
178
|
|
|
161
179
|
if (success) {
|
|
162
|
-
this.log.
|
|
180
|
+
this.log.verbose(`Recorded successful signing for duty ${dutyType} at slot ${slot}`, {
|
|
163
181
|
validatorAddress: validatorAddress.toString(),
|
|
164
182
|
nodeId,
|
|
165
183
|
});
|
|
@@ -228,6 +246,7 @@ export class SlashingProtectionService {
|
|
|
228
246
|
this.log.info(`Cleaned up ${numOutdatedRollupDuties} duties with outdated rollup address at startup`, {
|
|
229
247
|
currentRollupAddress: this.config.l1Contracts.rollupAddress.toString(),
|
|
230
248
|
});
|
|
249
|
+
this.metrics.recordCleanup('outdated_rollup', numOutdatedRollupDuties);
|
|
231
250
|
}
|
|
232
251
|
|
|
233
252
|
this.cleanupRunningPromise.start();
|
|
@@ -263,13 +282,14 @@ export class SlashingProtectionService {
|
|
|
263
282
|
nodeId: this.config.nodeId,
|
|
264
283
|
maxStuckDutiesAgeMs: this.maxStuckDutiesAgeMs,
|
|
265
284
|
});
|
|
285
|
+
this.metrics.recordCleanup('stuck', numStuckDuties);
|
|
266
286
|
}
|
|
267
287
|
|
|
268
288
|
// 2. Clean up old signed duties if configured
|
|
269
289
|
// we shouldn't run this as often as stuck duty cleanup.
|
|
270
290
|
if (this.config.cleanupOldDutiesAfterHours !== undefined) {
|
|
271
291
|
const maxAgeMs = this.config.cleanupOldDutiesAfterHours * 60 * 60 * 1000;
|
|
272
|
-
const nowMs =
|
|
292
|
+
const nowMs = this.dateProvider.now();
|
|
273
293
|
const shouldRun =
|
|
274
294
|
this.lastOldDutiesCleanupAtMs === undefined || nowMs - this.lastOldDutiesCleanupAtMs >= maxAgeMs;
|
|
275
295
|
if (shouldRun) {
|
|
@@ -280,6 +300,7 @@ export class SlashingProtectionService {
|
|
|
280
300
|
cleanupOldDutiesAfterHours: this.config.cleanupOldDutiesAfterHours,
|
|
281
301
|
maxAgeMs,
|
|
282
302
|
});
|
|
303
|
+
this.metrics.recordCleanup('old', numOldDuties);
|
|
283
304
|
}
|
|
284
305
|
}
|
|
285
306
|
}
|