@aztec/validator-ha-signer 0.0.1-commit.7d4e6cd → 0.0.1-commit.8227e42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -37
- package/dest/db/postgres.d.ts +34 -5
- package/dest/db/postgres.d.ts.map +1 -1
- package/dest/db/postgres.js +80 -22
- package/dest/db/schema.d.ts +21 -10
- package/dest/db/schema.d.ts.map +1 -1
- package/dest/db/schema.js +49 -20
- package/dest/db/types.d.ts +76 -31
- package/dest/db/types.d.ts.map +1 -1
- package/dest/db/types.js +32 -8
- package/dest/errors.d.ts +9 -5
- package/dest/errors.d.ts.map +1 -1
- package/dest/errors.js +7 -4
- package/dest/factory.d.ts +6 -14
- package/dest/factory.d.ts.map +1 -1
- package/dest/factory.js +17 -12
- package/dest/metrics.d.ts +51 -0
- package/dest/metrics.d.ts.map +1 -0
- package/dest/metrics.js +103 -0
- package/dest/migrations.d.ts +1 -1
- package/dest/migrations.d.ts.map +1 -1
- package/dest/migrations.js +13 -2
- package/dest/slashing_protection_service.d.ts +25 -6
- package/dest/slashing_protection_service.d.ts.map +1 -1
- package/dest/slashing_protection_service.js +72 -20
- package/dest/test/pglite_pool.d.ts +92 -0
- package/dest/test/pglite_pool.d.ts.map +1 -0
- package/dest/test/pglite_pool.js +210 -0
- package/dest/types.d.ts +38 -18
- package/dest/types.d.ts.map +1 -1
- package/dest/types.js +4 -1
- package/dest/validator_ha_signer.d.ts +18 -13
- package/dest/validator_ha_signer.d.ts.map +1 -1
- package/dest/validator_ha_signer.js +46 -33
- package/package.json +13 -10
- package/src/db/postgres.ts +101 -21
- package/src/db/schema.ts +51 -20
- package/src/db/types.ts +110 -31
- package/src/errors.ts +7 -2
- package/src/factory.ts +20 -14
- package/src/metrics.ts +138 -0
- package/src/migrations.ts +17 -1
- package/src/slashing_protection_service.ts +117 -25
- package/src/test/pglite_pool.ts +256 -0
- package/src/types.ts +63 -19
- package/src/validator_ha_signer.ts +66 -42
- package/dest/config.d.ts +0 -47
- package/dest/config.d.ts.map +0 -1
- package/dest/config.js +0 -64
- package/src/config.ts +0 -116
package/src/factory.ts
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Factory functions for creating validator HA signers
|
|
3
3
|
*/
|
|
4
|
+
import { DateProvider } from '@aztec/foundation/timer';
|
|
5
|
+
import type { ValidatorHASignerConfig } from '@aztec/stdlib/ha-signing';
|
|
6
|
+
import { getTelemetryClient } from '@aztec/telemetry-client';
|
|
7
|
+
|
|
4
8
|
import { Pool } from 'pg';
|
|
5
9
|
|
|
6
|
-
import type { CreateHASignerConfig } from './config.js';
|
|
7
10
|
import { PostgresSlashingProtectionDatabase } from './db/postgres.js';
|
|
11
|
+
import { HASignerMetrics } from './metrics.js';
|
|
8
12
|
import type { CreateHASignerDeps, SlashingProtectionDatabase } from './types.js';
|
|
9
13
|
import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
10
14
|
|
|
@@ -23,7 +27,7 @@ import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
|
23
27
|
* ```typescript
|
|
24
28
|
* const { signer, db } = await createHASigner({
|
|
25
29
|
* databaseUrl: process.env.DATABASE_URL,
|
|
26
|
-
*
|
|
30
|
+
* haSigningEnabled: true,
|
|
27
31
|
* nodeId: 'validator-node-1',
|
|
28
32
|
* pollingIntervalMs: 100,
|
|
29
33
|
* signingTimeoutMs: 3000,
|
|
@@ -35,23 +39,15 @@ import { ValidatorHASigner } from './validator_ha_signer.js';
|
|
|
35
39
|
* await signer.stop(); // On shutdown
|
|
36
40
|
* ```
|
|
37
41
|
*
|
|
38
|
-
*
|
|
39
|
-
*
|
|
40
|
-
* const { signer, db } = await createHASigner({
|
|
41
|
-
* databaseUrl: process.env.DATABASE_URL,
|
|
42
|
-
* enabled: true,
|
|
43
|
-
* nodeId: 'validator-node-1',
|
|
44
|
-
* runMigrations: true, // Auto-run migrations on startup
|
|
45
|
-
* });
|
|
46
|
-
* signer.start();
|
|
47
|
-
* ```
|
|
42
|
+
* Note: Migrations must be run separately using `aztec migrate-ha-db up` before
|
|
43
|
+
* creating the signer. The factory will verify the schema is initialized via `db.initialize()`.
|
|
48
44
|
*
|
|
49
45
|
* @param config - Configuration for the HA signer
|
|
50
46
|
* @param deps - Optional dependencies (e.g., for testing)
|
|
51
47
|
* @returns An object containing the signer and database instances
|
|
52
48
|
*/
|
|
53
49
|
export async function createHASigner(
|
|
54
|
-
config:
|
|
50
|
+
config: ValidatorHASignerConfig,
|
|
55
51
|
deps?: CreateHASignerDeps,
|
|
56
52
|
): Promise<{
|
|
57
53
|
signer: ValidatorHASigner;
|
|
@@ -60,6 +56,13 @@ export async function createHASigner(
|
|
|
60
56
|
const { databaseUrl, poolMaxCount, poolMinCount, poolIdleTimeoutMs, poolConnectionTimeoutMs, ...signerConfig } =
|
|
61
57
|
config;
|
|
62
58
|
|
|
59
|
+
if (!databaseUrl) {
|
|
60
|
+
throw new Error('databaseUrl is required for createHASigner');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
|
|
64
|
+
const dateProvider = deps?.dateProvider ?? new DateProvider();
|
|
65
|
+
|
|
63
66
|
// Create connection pool (or use provided pool)
|
|
64
67
|
let pool: Pool;
|
|
65
68
|
if (!deps?.pool) {
|
|
@@ -80,8 +83,11 @@ export async function createHASigner(
|
|
|
80
83
|
// Verify database schema is initialized and version matches
|
|
81
84
|
await db.initialize();
|
|
82
85
|
|
|
86
|
+
// Create metrics
|
|
87
|
+
const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId);
|
|
88
|
+
|
|
83
89
|
// Create signer
|
|
84
|
-
const signer = new ValidatorHASigner(db, { ...signerConfig, databaseUrl });
|
|
90
|
+
const signer = new ValidatorHASigner(db, { ...signerConfig, databaseUrl }, { metrics, dateProvider });
|
|
85
91
|
|
|
86
92
|
return { signer, db };
|
|
87
93
|
}
|
package/src/metrics.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Attributes,
|
|
3
|
+
type Histogram,
|
|
4
|
+
Metrics,
|
|
5
|
+
type TelemetryClient,
|
|
6
|
+
type UpDownCounter,
|
|
7
|
+
createUpDownCounterWithDefault,
|
|
8
|
+
} from '@aztec/telemetry-client';
|
|
9
|
+
|
|
10
|
+
export type HACleanupType = 'stuck' | 'old' | 'outdated_rollup';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Metrics for HA signer tracking signing operations, lock acquisition, and cleanup.
|
|
14
|
+
*/
|
|
15
|
+
export class HASignerMetrics {
|
|
16
|
+
// Signing lifecycle metrics
|
|
17
|
+
private signingDuration: Histogram;
|
|
18
|
+
private signingSuccessCount: UpDownCounter;
|
|
19
|
+
private dutyAlreadySignedCount: UpDownCounter;
|
|
20
|
+
private slashingProtectionCount: UpDownCounter;
|
|
21
|
+
private signingErrorCount: UpDownCounter;
|
|
22
|
+
|
|
23
|
+
// Lock acquisition metrics
|
|
24
|
+
private lockAcquiredCount: UpDownCounter;
|
|
25
|
+
|
|
26
|
+
// Cleanup metrics
|
|
27
|
+
private cleanupStuckDutiesCount: UpDownCounter;
|
|
28
|
+
private cleanupOldDutiesCount: UpDownCounter;
|
|
29
|
+
private cleanupOutdatedRollupDutiesCount: UpDownCounter;
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
client: TelemetryClient,
|
|
33
|
+
private nodeId: string,
|
|
34
|
+
name = 'HASignerMetrics',
|
|
35
|
+
) {
|
|
36
|
+
const meter = client.getMeter(name);
|
|
37
|
+
|
|
38
|
+
// Signing lifecycle
|
|
39
|
+
this.signingDuration = meter.createHistogram(Metrics.HA_SIGNER_SIGNING_DURATION);
|
|
40
|
+
this.signingSuccessCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_SUCCESS_COUNT);
|
|
41
|
+
this.dutyAlreadySignedCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_DUTY_ALREADY_SIGNED_COUNT);
|
|
42
|
+
this.slashingProtectionCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SLASHING_PROTECTION_COUNT);
|
|
43
|
+
this.signingErrorCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_ERROR_COUNT);
|
|
44
|
+
|
|
45
|
+
// Lock acquisition
|
|
46
|
+
this.lockAcquiredCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_LOCK_ACQUIRED_COUNT);
|
|
47
|
+
|
|
48
|
+
// Cleanup
|
|
49
|
+
this.cleanupStuckDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_STUCK_DUTIES_COUNT);
|
|
50
|
+
this.cleanupOldDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_OLD_DUTIES_COUNT);
|
|
51
|
+
this.cleanupOutdatedRollupDutiesCount = createUpDownCounterWithDefault(
|
|
52
|
+
meter,
|
|
53
|
+
Metrics.HA_SIGNER_CLEANUP_OUTDATED_ROLLUP_DUTIES_COUNT,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Record a successful signing operation.
|
|
59
|
+
* @param dutyType - The type of duty signed
|
|
60
|
+
* @param durationMs - Duration from start of signWithProtection to completion
|
|
61
|
+
*/
|
|
62
|
+
public recordSigningSuccess(dutyType: string, durationMs: number): void {
|
|
63
|
+
const attributes = {
|
|
64
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
65
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
66
|
+
};
|
|
67
|
+
this.signingSuccessCount.add(1, attributes);
|
|
68
|
+
this.signingDuration.record(durationMs, attributes);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Record a DutyAlreadySignedError (expected in HA; another node signed first).
|
|
73
|
+
* @param dutyType - The type of duty
|
|
74
|
+
*/
|
|
75
|
+
public recordDutyAlreadySigned(dutyType: string): void {
|
|
76
|
+
const attributes = {
|
|
77
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
78
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
79
|
+
};
|
|
80
|
+
this.dutyAlreadySignedCount.add(1, attributes);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Record a SlashingProtectionError (attempted to sign different data for same duty).
|
|
85
|
+
* @param dutyType - The type of duty
|
|
86
|
+
*/
|
|
87
|
+
public recordSlashingProtection(dutyType: string): void {
|
|
88
|
+
const attributes = {
|
|
89
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
90
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
91
|
+
};
|
|
92
|
+
this.slashingProtectionCount.add(1, attributes);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Record a signing function failure (lock will be deleted for retry).
|
|
97
|
+
* @param dutyType - The type of duty
|
|
98
|
+
*/
|
|
99
|
+
public recordSigningError(dutyType: string): void {
|
|
100
|
+
const attributes = {
|
|
101
|
+
[Attributes.HA_DUTY_TYPE]: dutyType,
|
|
102
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
103
|
+
};
|
|
104
|
+
this.signingErrorCount.add(1, attributes);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Record lock acquisition.
|
|
109
|
+
* @param acquired - Whether a new lock was acquired (true) or existing record found (false)
|
|
110
|
+
*/
|
|
111
|
+
public recordLockAcquire(acquired: boolean): void {
|
|
112
|
+
if (acquired) {
|
|
113
|
+
const attributes = {
|
|
114
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
115
|
+
};
|
|
116
|
+
this.lockAcquiredCount.add(1, attributes);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Record cleanup metrics.
|
|
122
|
+
* @param type - Type of cleanup
|
|
123
|
+
* @param count - Number of duties cleaned up
|
|
124
|
+
*/
|
|
125
|
+
public recordCleanup(type: HACleanupType, count: number): void {
|
|
126
|
+
const attributes = {
|
|
127
|
+
[Attributes.HA_NODE_ID]: this.nodeId,
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (type === 'stuck') {
|
|
131
|
+
this.cleanupStuckDutiesCount.add(count, attributes);
|
|
132
|
+
} else if (type === 'old') {
|
|
133
|
+
this.cleanupOldDutiesCount.add(count, attributes);
|
|
134
|
+
} else if (type === 'outdated_rollup') {
|
|
135
|
+
this.cleanupOutdatedRollupDutiesCount.add(count, attributes);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
package/src/migrations.ts
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { createLogger } from '@aztec/foundation/log';
|
|
5
5
|
|
|
6
|
+
import { readdirSync } from 'fs';
|
|
6
7
|
import { runner } from 'node-pg-migrate';
|
|
7
8
|
import { dirname, join } from 'path';
|
|
8
9
|
import { fileURLToPath } from 'url';
|
|
@@ -30,17 +31,32 @@ export async function runMigrations(databaseUrl: string, options: RunMigrationsO
|
|
|
30
31
|
|
|
31
32
|
const log = createLogger('validator-ha-signer:migrations');
|
|
32
33
|
|
|
34
|
+
const migrationsDir = join(__dirname, 'db', 'migrations');
|
|
35
|
+
|
|
33
36
|
try {
|
|
34
37
|
log.info(`Running migrations ${direction}...`);
|
|
35
38
|
|
|
39
|
+
// Filter out .d.ts and .d.ts.map files - node-pg-migrate only needs .js files
|
|
40
|
+
const migrationFiles = readdirSync(migrationsDir);
|
|
41
|
+
const jsMigrationFiles = migrationFiles.filter(
|
|
42
|
+
file => file.endsWith('.js') && !file.endsWith('.d.ts') && !file.endsWith('.d.ts.map'),
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
if (jsMigrationFiles.length === 0) {
|
|
46
|
+
log.info('No migration files found');
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
|
|
36
50
|
const appliedMigrations = await runner({
|
|
37
51
|
databaseUrl,
|
|
38
|
-
dir:
|
|
52
|
+
dir: migrationsDir,
|
|
39
53
|
direction,
|
|
40
54
|
migrationsTable: 'pgmigrations',
|
|
41
55
|
count: direction === 'down' ? 1 : Infinity,
|
|
42
56
|
verbose,
|
|
43
57
|
log: msg => (verbose ? log.info(msg) : log.debug(msg)),
|
|
58
|
+
// Ignore TypeScript declaration files - node-pg-migrate will try to import them otherwise
|
|
59
|
+
ignorePattern: '.*\\.d\\.(ts|js)$|.*\\.d\\.ts\\.map$',
|
|
44
60
|
});
|
|
45
61
|
|
|
46
62
|
if (appliedMigrations.length === 0) {
|
|
@@ -7,10 +7,24 @@
|
|
|
7
7
|
import { type Logger, createLogger } from '@aztec/foundation/log';
|
|
8
8
|
import { RunningPromise } from '@aztec/foundation/promise';
|
|
9
9
|
import { sleep } from '@aztec/foundation/sleep';
|
|
10
|
+
import type { DateProvider } from '@aztec/foundation/timer';
|
|
11
|
+
import type { ValidatorHASignerConfig } from '@aztec/stdlib/ha-signing';
|
|
10
12
|
|
|
11
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
type CheckAndRecordParams,
|
|
15
|
+
type DeleteDutyParams,
|
|
16
|
+
DutyStatus,
|
|
17
|
+
type RecordSuccessParams,
|
|
18
|
+
getBlockIndexFromDutyIdentifier,
|
|
19
|
+
} from './db/types.js';
|
|
12
20
|
import { DutyAlreadySignedError, SlashingProtectionError } from './errors.js';
|
|
13
|
-
import type {
|
|
21
|
+
import type { HASignerMetrics } from './metrics.js';
|
|
22
|
+
import type { SlashingProtectionDatabase } from './types.js';
|
|
23
|
+
|
|
24
|
+
export interface SlashingProtectionServiceDeps {
|
|
25
|
+
metrics: HASignerMetrics;
|
|
26
|
+
dateProvider: DateProvider;
|
|
27
|
+
}
|
|
14
28
|
|
|
15
29
|
/**
|
|
16
30
|
* Slashing Protection Service
|
|
@@ -31,22 +45,28 @@ export class SlashingProtectionService {
|
|
|
31
45
|
private readonly log: Logger;
|
|
32
46
|
private readonly pollingIntervalMs: number;
|
|
33
47
|
private readonly signingTimeoutMs: number;
|
|
48
|
+
private readonly maxStuckDutiesAgeMs: number;
|
|
49
|
+
|
|
50
|
+
private readonly metrics: HASignerMetrics;
|
|
51
|
+
private readonly dateProvider: DateProvider;
|
|
34
52
|
|
|
35
53
|
private cleanupRunningPromise: RunningPromise;
|
|
54
|
+
private lastOldDutiesCleanupAtMs?: number;
|
|
36
55
|
|
|
37
56
|
constructor(
|
|
38
57
|
private readonly db: SlashingProtectionDatabase,
|
|
39
|
-
private readonly config:
|
|
58
|
+
private readonly config: ValidatorHASignerConfig,
|
|
59
|
+
deps: SlashingProtectionServiceDeps,
|
|
40
60
|
) {
|
|
41
61
|
this.log = createLogger('slashing-protection');
|
|
42
62
|
this.pollingIntervalMs = config.pollingIntervalMs;
|
|
43
63
|
this.signingTimeoutMs = config.signingTimeoutMs;
|
|
64
|
+
// Default to 144s (2x 72s Aztec slot duration) if not explicitly configured
|
|
65
|
+
this.maxStuckDutiesAgeMs = config.maxStuckDutiesAgeMs ?? 144_000;
|
|
44
66
|
|
|
45
|
-
this.cleanupRunningPromise = new RunningPromise(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
this.config.maxStuckDutiesAgeMs,
|
|
49
|
-
);
|
|
67
|
+
this.cleanupRunningPromise = new RunningPromise(this.cleanup.bind(this), this.log, this.maxStuckDutiesAgeMs);
|
|
68
|
+
this.metrics = deps.metrics;
|
|
69
|
+
this.dateProvider = deps.dateProvider;
|
|
50
70
|
}
|
|
51
71
|
|
|
52
72
|
/**
|
|
@@ -58,7 +78,6 @@ export class SlashingProtectionService {
|
|
|
58
78
|
* 2. If insert succeeds, we acquired the lock - return the lockToken
|
|
59
79
|
* 3. If a record exists, handle based on status:
|
|
60
80
|
* - SIGNED: Throw appropriate error (already signed or slashing protection)
|
|
61
|
-
* - FAILED: Delete the failed record
|
|
62
81
|
* - SIGNING: Wait and poll until status changes, then handle result
|
|
63
82
|
*
|
|
64
83
|
* @returns The lockToken that must be used for recordSuccess/deleteDuty
|
|
@@ -67,7 +86,7 @@ export class SlashingProtectionService {
|
|
|
67
86
|
*/
|
|
68
87
|
async checkAndRecord(params: CheckAndRecordParams): Promise<string> {
|
|
69
88
|
const { validatorAddress, slot, dutyType, messageHash, nodeId } = params;
|
|
70
|
-
const startTime =
|
|
89
|
+
const startTime = this.dateProvider.now();
|
|
71
90
|
|
|
72
91
|
this.log.debug(`Checking duty: ${dutyType} for slot ${slot}`, {
|
|
73
92
|
validatorAddress: validatorAddress.toString(),
|
|
@@ -84,6 +103,7 @@ export class SlashingProtectionService {
|
|
|
84
103
|
validatorAddress: validatorAddress.toString(),
|
|
85
104
|
nodeId,
|
|
86
105
|
});
|
|
106
|
+
this.metrics.recordLockAcquire(true);
|
|
87
107
|
return record.lockToken;
|
|
88
108
|
}
|
|
89
109
|
|
|
@@ -98,18 +118,28 @@ export class SlashingProtectionService {
|
|
|
98
118
|
existingNodeId: record.nodeId,
|
|
99
119
|
attemptingNodeId: nodeId,
|
|
100
120
|
});
|
|
101
|
-
|
|
121
|
+
this.metrics.recordSlashingProtection(dutyType);
|
|
122
|
+
throw new SlashingProtectionError(
|
|
123
|
+
slot,
|
|
124
|
+
dutyType,
|
|
125
|
+
record.blockIndexWithinCheckpoint,
|
|
126
|
+
record.messageHash,
|
|
127
|
+
messageHash,
|
|
128
|
+
record.nodeId,
|
|
129
|
+
);
|
|
102
130
|
}
|
|
103
|
-
|
|
131
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
132
|
+
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, record.nodeId);
|
|
104
133
|
} else if (record.status === DutyStatus.SIGNING) {
|
|
105
134
|
// Another node is currently signing - check for timeout
|
|
106
|
-
if (
|
|
135
|
+
if (this.dateProvider.now() - startTime > this.signingTimeoutMs) {
|
|
107
136
|
this.log.warn(`Timeout waiting for signing to complete for duty ${dutyType} at slot ${slot}`, {
|
|
108
137
|
validatorAddress: validatorAddress.toString(),
|
|
109
138
|
timeoutMs: this.signingTimeoutMs,
|
|
110
139
|
signingNodeId: record.nodeId,
|
|
111
140
|
});
|
|
112
|
-
|
|
141
|
+
this.metrics.recordDutyAlreadySigned(dutyType);
|
|
142
|
+
throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, 'unknown (timeout)');
|
|
113
143
|
}
|
|
114
144
|
|
|
115
145
|
// Wait and poll
|
|
@@ -133,9 +163,18 @@ export class SlashingProtectionService {
|
|
|
133
163
|
* @returns true if the update succeeded, false if token didn't match
|
|
134
164
|
*/
|
|
135
165
|
async recordSuccess(params: RecordSuccessParams): Promise<boolean> {
|
|
136
|
-
const { validatorAddress, slot, dutyType, signature, nodeId, lockToken } = params;
|
|
166
|
+
const { rollupAddress, validatorAddress, slot, dutyType, signature, nodeId, lockToken } = params;
|
|
167
|
+
const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
|
|
137
168
|
|
|
138
|
-
const success = await this.db.updateDutySigned(
|
|
169
|
+
const success = await this.db.updateDutySigned(
|
|
170
|
+
rollupAddress,
|
|
171
|
+
validatorAddress,
|
|
172
|
+
slot,
|
|
173
|
+
dutyType,
|
|
174
|
+
signature.toString(),
|
|
175
|
+
lockToken,
|
|
176
|
+
blockIndexWithinCheckpoint,
|
|
177
|
+
);
|
|
139
178
|
|
|
140
179
|
if (success) {
|
|
141
180
|
this.log.info(`Recorded successful signing for duty ${dutyType} at slot ${slot}`, {
|
|
@@ -160,9 +199,17 @@ export class SlashingProtectionService {
|
|
|
160
199
|
* @returns true if the delete succeeded, false if token didn't match
|
|
161
200
|
*/
|
|
162
201
|
async deleteDuty(params: DeleteDutyParams): Promise<boolean> {
|
|
163
|
-
const { validatorAddress, slot, dutyType, lockToken } = params;
|
|
202
|
+
const { rollupAddress, validatorAddress, slot, dutyType, lockToken } = params;
|
|
203
|
+
const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
|
|
164
204
|
|
|
165
|
-
const success = await this.db.deleteDuty(
|
|
205
|
+
const success = await this.db.deleteDuty(
|
|
206
|
+
rollupAddress,
|
|
207
|
+
validatorAddress,
|
|
208
|
+
slot,
|
|
209
|
+
dutyType,
|
|
210
|
+
lockToken,
|
|
211
|
+
blockIndexWithinCheckpoint,
|
|
212
|
+
);
|
|
166
213
|
|
|
167
214
|
if (success) {
|
|
168
215
|
this.log.info(`Deleted duty ${dutyType} at slot ${slot} to allow retry`, {
|
|
@@ -188,7 +235,20 @@ export class SlashingProtectionService {
|
|
|
188
235
|
* Start running tasks.
|
|
189
236
|
* Cleanup runs immediately on start to recover from any previous crashes.
|
|
190
237
|
*/
|
|
191
|
-
|
|
238
|
+
/**
|
|
239
|
+
* Start the background cleanup task.
|
|
240
|
+
* Also performs one-time cleanup of duties with outdated rollup addresses.
|
|
241
|
+
*/
|
|
242
|
+
async start() {
|
|
243
|
+
// One-time cleanup at startup: remove duties from previous rollup versions
|
|
244
|
+
const numOutdatedRollupDuties = await this.db.cleanupOutdatedRollupDuties(this.config.l1Contracts.rollupAddress);
|
|
245
|
+
if (numOutdatedRollupDuties > 0) {
|
|
246
|
+
this.log.info(`Cleaned up ${numOutdatedRollupDuties} duties with outdated rollup address at startup`, {
|
|
247
|
+
currentRollupAddress: this.config.l1Contracts.rollupAddress.toString(),
|
|
248
|
+
});
|
|
249
|
+
this.metrics.recordCleanup('outdated_rollup', numOutdatedRollupDuties);
|
|
250
|
+
}
|
|
251
|
+
|
|
192
252
|
this.cleanupRunningPromise.start();
|
|
193
253
|
this.log.info('Slashing protection service started', { nodeId: this.config.nodeId });
|
|
194
254
|
}
|
|
@@ -202,15 +262,47 @@ export class SlashingProtectionService {
|
|
|
202
262
|
}
|
|
203
263
|
|
|
204
264
|
/**
|
|
205
|
-
*
|
|
265
|
+
* Close the database connection.
|
|
266
|
+
* Should be called after stop() during graceful shutdown.
|
|
267
|
+
*/
|
|
268
|
+
async close() {
|
|
269
|
+
await this.db.close();
|
|
270
|
+
this.log.info('Slashing protection database connection closed');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Periodic cleanup of stuck duties and optionally old signed duties.
|
|
275
|
+
* Runs in the background via RunningPromise.
|
|
206
276
|
*/
|
|
207
|
-
private async
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
277
|
+
private async cleanup() {
|
|
278
|
+
// 1. Clean up stuck duties (our own node's duties that got stuck in 'signing' status)
|
|
279
|
+
const numStuckDuties = await this.db.cleanupOwnStuckDuties(this.config.nodeId, this.maxStuckDutiesAgeMs);
|
|
280
|
+
if (numStuckDuties > 0) {
|
|
281
|
+
this.log.verbose(`Cleaned up ${numStuckDuties} stuck duties`, {
|
|
211
282
|
nodeId: this.config.nodeId,
|
|
212
|
-
maxStuckDutiesAgeMs: this.
|
|
283
|
+
maxStuckDutiesAgeMs: this.maxStuckDutiesAgeMs,
|
|
213
284
|
});
|
|
285
|
+
this.metrics.recordCleanup('stuck', numStuckDuties);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// 2. Clean up old signed duties if configured
|
|
289
|
+
// we shouldn't run this as often as stuck duty cleanup.
|
|
290
|
+
if (this.config.cleanupOldDutiesAfterHours !== undefined) {
|
|
291
|
+
const maxAgeMs = this.config.cleanupOldDutiesAfterHours * 60 * 60 * 1000;
|
|
292
|
+
const nowMs = this.dateProvider.now();
|
|
293
|
+
const shouldRun =
|
|
294
|
+
this.lastOldDutiesCleanupAtMs === undefined || nowMs - this.lastOldDutiesCleanupAtMs >= maxAgeMs;
|
|
295
|
+
if (shouldRun) {
|
|
296
|
+
const numOldDuties = await this.db.cleanupOldDuties(maxAgeMs);
|
|
297
|
+
this.lastOldDutiesCleanupAtMs = nowMs;
|
|
298
|
+
if (numOldDuties > 0) {
|
|
299
|
+
this.log.verbose(`Cleaned up ${numOldDuties} old signed duties`, {
|
|
300
|
+
cleanupOldDutiesAfterHours: this.config.cleanupOldDutiesAfterHours,
|
|
301
|
+
maxAgeMs,
|
|
302
|
+
});
|
|
303
|
+
this.metrics.recordCleanup('old', numOldDuties);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
214
306
|
}
|
|
215
307
|
}
|
|
216
308
|
}
|