@aztec/validator-ha-signer 0.0.1-commit.001888fc

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +195 -0
  2. package/dest/db/index.d.ts +5 -0
  3. package/dest/db/index.d.ts.map +1 -0
  4. package/dest/db/index.js +4 -0
  5. package/dest/db/lmdb.d.ts +66 -0
  6. package/dest/db/lmdb.d.ts.map +1 -0
  7. package/dest/db/lmdb.js +188 -0
  8. package/dest/db/migrations/1_initial-schema.d.ts +9 -0
  9. package/dest/db/migrations/1_initial-schema.d.ts.map +1 -0
  10. package/dest/db/migrations/1_initial-schema.js +20 -0
  11. package/dest/db/postgres.d.ts +86 -0
  12. package/dest/db/postgres.d.ts.map +1 -0
  13. package/dest/db/postgres.js +208 -0
  14. package/dest/db/schema.d.ts +96 -0
  15. package/dest/db/schema.d.ts.map +1 -0
  16. package/dest/db/schema.js +230 -0
  17. package/dest/db/test_helper.d.ts +10 -0
  18. package/dest/db/test_helper.d.ts.map +1 -0
  19. package/dest/db/test_helper.js +14 -0
  20. package/dest/db/types.d.ts +185 -0
  21. package/dest/db/types.d.ts.map +1 -0
  22. package/dest/db/types.js +64 -0
  23. package/dest/errors.d.ts +34 -0
  24. package/dest/errors.d.ts.map +1 -0
  25. package/dest/errors.js +34 -0
  26. package/dest/factory.d.ts +60 -0
  27. package/dest/factory.d.ts.map +1 -0
  28. package/dest/factory.js +115 -0
  29. package/dest/metrics.d.ts +51 -0
  30. package/dest/metrics.d.ts.map +1 -0
  31. package/dest/metrics.js +103 -0
  32. package/dest/migrations.d.ts +15 -0
  33. package/dest/migrations.d.ts.map +1 -0
  34. package/dest/migrations.js +53 -0
  35. package/dest/slashing_protection_service.d.ts +93 -0
  36. package/dest/slashing_protection_service.d.ts.map +1 -0
  37. package/dest/slashing_protection_service.js +236 -0
  38. package/dest/test/pglite_pool.d.ts +92 -0
  39. package/dest/test/pglite_pool.d.ts.map +1 -0
  40. package/dest/test/pglite_pool.js +210 -0
  41. package/dest/types.d.ts +99 -0
  42. package/dest/types.d.ts.map +1 -0
  43. package/dest/types.js +4 -0
  44. package/dest/validator_ha_signer.d.ts +79 -0
  45. package/dest/validator_ha_signer.d.ts.map +1 -0
  46. package/dest/validator_ha_signer.js +140 -0
  47. package/package.json +110 -0
  48. package/src/db/index.ts +4 -0
  49. package/src/db/lmdb.ts +264 -0
  50. package/src/db/migrations/1_initial-schema.ts +26 -0
  51. package/src/db/postgres.ts +284 -0
  52. package/src/db/schema.ts +267 -0
  53. package/src/db/test_helper.ts +17 -0
  54. package/src/db/types.ts +251 -0
  55. package/src/errors.ts +47 -0
  56. package/src/factory.ts +139 -0
  57. package/src/metrics.ts +138 -0
  58. package/src/migrations.ts +75 -0
  59. package/src/slashing_protection_service.ts +308 -0
  60. package/src/test/pglite_pool.ts +256 -0
  61. package/src/types.ts +154 -0
  62. package/src/validator_ha_signer.ts +183 -0
package/src/factory.ts ADDED
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Factory functions for creating validator HA signers
3
+ */
4
+ import { DateProvider } from '@aztec/foundation/timer';
5
+ import { createStore } from '@aztec/kv-store/lmdb-v2';
6
+ import type { LocalSignerConfig, ValidatorHASignerConfig } from '@aztec/stdlib/ha-signing';
7
+ import { getTelemetryClient } from '@aztec/telemetry-client';
8
+
9
+ import { Pool } from 'pg';
10
+
11
+ import { LmdbSlashingProtectionDatabase } from './db/lmdb.js';
12
+ import { PostgresSlashingProtectionDatabase } from './db/postgres.js';
13
+ import { HASignerMetrics } from './metrics.js';
14
+ import type { CreateHASignerDeps, CreateLocalSignerWithProtectionDeps, SlashingProtectionDatabase } from './types.js';
15
+ import { ValidatorHASigner } from './validator_ha_signer.js';
16
+
17
+ /**
18
+ * Create a validator HA signer with PostgreSQL backend
19
+ *
20
+ * After creating the signer, call `signer.start()` to begin background
21
+ * cleanup tasks. Call `signer.stop()` during graceful shutdown.
22
+ *
23
+ * Example with manual migrations (recommended for production):
24
+ * ```bash
25
+ * # Run migrations separately
26
+ * yarn migrate:up
27
+ * ```
28
+ *
29
+ * ```typescript
30
+ * const { signer, db } = await createHASigner({
31
+ * databaseUrl: process.env.DATABASE_URL,
32
+ * nodeId: 'validator-node-1',
33
+ * pollingIntervalMs: 100,
34
+ * signingTimeoutMs: 3000,
35
+ * });
36
+ * signer.start(); // Start background cleanup
37
+ *
38
+ * // ... use signer ...
39
+ *
40
+ * await signer.stop(); // On shutdown
41
+ * ```
42
+ *
43
+ * Note: Migrations must be run separately using `aztec migrate-ha-db up` before
44
+ * creating the signer. The factory will verify the schema is initialized via `db.initialize()`.
45
+ *
46
+ * @param config - Configuration for the HA signer
47
+ * @param deps - Optional dependencies (e.g., for testing)
48
+ * @returns An object containing the signer and database instances
49
+ */
50
+ export async function createHASigner(
51
+ config: ValidatorHASignerConfig,
52
+ deps?: CreateHASignerDeps,
53
+ ): Promise<{
54
+ signer: ValidatorHASigner;
55
+ db: SlashingProtectionDatabase;
56
+ }> {
57
+ const { databaseUrl, poolMaxCount, poolMinCount, poolIdleTimeoutMs, poolConnectionTimeoutMs, ...signerConfig } =
58
+ config;
59
+
60
+ if (!databaseUrl) {
61
+ throw new Error('databaseUrl is required for createHASigner');
62
+ }
63
+
64
+ const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
65
+ const dateProvider = deps?.dateProvider ?? new DateProvider();
66
+
67
+ // Create connection pool (or use provided pool)
68
+ let pool: Pool;
69
+ if (!deps?.pool) {
70
+ pool = new Pool({
71
+ connectionString: databaseUrl,
72
+ max: poolMaxCount ?? 10,
73
+ min: poolMinCount ?? 0,
74
+ idleTimeoutMillis: poolIdleTimeoutMs ?? 10_000,
75
+ connectionTimeoutMillis: poolConnectionTimeoutMs ?? 0,
76
+ });
77
+ } else {
78
+ pool = deps.pool;
79
+ }
80
+
81
+ // Create database instance
82
+ const db = new PostgresSlashingProtectionDatabase(pool);
83
+
84
+ // Verify database schema is initialized and version matches
85
+ await db.initialize();
86
+
87
+ // Create metrics
88
+ const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId);
89
+
90
+ // Create signer
91
+ const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
92
+
93
+ return { signer, db };
94
+ }
95
+
96
+ /**
97
+ * Create a local (single-node) signing protection signer backed by LMDB.
98
+ *
99
+ * This provides double-signing protection for nodes that are NOT running in a
100
+ * high-availability (multi-node) setup. It prevents a proposer from sending two
101
+ * proposals for the same slot if the node crashes and restarts mid-proposal.
102
+ *
103
+ * When `config.dataDirectory` is set, the protection database is persisted to disk
104
+ * and survives crashes/restarts. When unset, an ephemeral in-memory store is
105
+ * used which protects within a single run but not across restarts.
106
+ *
107
+ * @param config - Local signer config
108
+ * @param deps - Optional dependencies (telemetry, date provider).
109
+ * @returns An object containing the signer and database instances.
110
+ */
111
+ export async function createLocalSignerWithProtection(
112
+ config: LocalSignerConfig,
113
+ deps?: CreateLocalSignerWithProtectionDeps,
114
+ ): Promise<{
115
+ signer: ValidatorHASigner;
116
+ db: SlashingProtectionDatabase;
117
+ }> {
118
+ const telemetryClient = deps?.telemetryClient ?? getTelemetryClient();
119
+ const dateProvider = deps?.dateProvider ?? new DateProvider();
120
+
121
+ const kvStore = await createStore('signing-protection', LmdbSlashingProtectionDatabase.SCHEMA_VERSION, {
122
+ dataDirectory: config.dataDirectory,
123
+ dataStoreMapSizeKb: config.signingProtectionMapSizeKb ?? config.dataStoreMapSizeKb,
124
+ l1Contracts: config.l1Contracts,
125
+ });
126
+
127
+ const db = new LmdbSlashingProtectionDatabase(kvStore, dateProvider);
128
+
129
+ const signerConfig = {
130
+ ...config,
131
+ nodeId: config.nodeId || 'local',
132
+ };
133
+
134
+ const metrics = new HASignerMetrics(telemetryClient, signerConfig.nodeId, 'LocalSigningProtectionMetrics');
135
+
136
+ const signer = new ValidatorHASigner(db, signerConfig, { metrics, dateProvider });
137
+
138
+ return { signer, db };
139
+ }
package/src/metrics.ts ADDED
@@ -0,0 +1,138 @@
1
+ import {
2
+ Attributes,
3
+ type Histogram,
4
+ Metrics,
5
+ type TelemetryClient,
6
+ type UpDownCounter,
7
+ createUpDownCounterWithDefault,
8
+ } from '@aztec/telemetry-client';
9
+
10
+ export type HACleanupType = 'stuck' | 'old' | 'outdated_rollup';
11
+
12
+ /**
13
+ * Metrics for HA signer tracking signing operations, lock acquisition, and cleanup.
14
+ */
15
+ export class HASignerMetrics {
16
+ // Signing lifecycle metrics
17
+ private signingDuration: Histogram;
18
+ private signingSuccessCount: UpDownCounter;
19
+ private dutyAlreadySignedCount: UpDownCounter;
20
+ private slashingProtectionCount: UpDownCounter;
21
+ private signingErrorCount: UpDownCounter;
22
+
23
+ // Lock acquisition metrics
24
+ private lockAcquiredCount: UpDownCounter;
25
+
26
+ // Cleanup metrics
27
+ private cleanupStuckDutiesCount: UpDownCounter;
28
+ private cleanupOldDutiesCount: UpDownCounter;
29
+ private cleanupOutdatedRollupDutiesCount: UpDownCounter;
30
+
31
+ constructor(
32
+ client: TelemetryClient,
33
+ private nodeId: string,
34
+ name = 'HASignerMetrics',
35
+ ) {
36
+ const meter = client.getMeter(name);
37
+
38
+ // Signing lifecycle
39
+ this.signingDuration = meter.createHistogram(Metrics.HA_SIGNER_SIGNING_DURATION);
40
+ this.signingSuccessCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_SUCCESS_COUNT);
41
+ this.dutyAlreadySignedCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_DUTY_ALREADY_SIGNED_COUNT);
42
+ this.slashingProtectionCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SLASHING_PROTECTION_COUNT);
43
+ this.signingErrorCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_SIGNING_ERROR_COUNT);
44
+
45
+ // Lock acquisition
46
+ this.lockAcquiredCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_LOCK_ACQUIRED_COUNT);
47
+
48
+ // Cleanup
49
+ this.cleanupStuckDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_STUCK_DUTIES_COUNT);
50
+ this.cleanupOldDutiesCount = createUpDownCounterWithDefault(meter, Metrics.HA_SIGNER_CLEANUP_OLD_DUTIES_COUNT);
51
+ this.cleanupOutdatedRollupDutiesCount = createUpDownCounterWithDefault(
52
+ meter,
53
+ Metrics.HA_SIGNER_CLEANUP_OUTDATED_ROLLUP_DUTIES_COUNT,
54
+ );
55
+ }
56
+
57
+ /**
58
+ * Record a successful signing operation.
59
+ * @param dutyType - The type of duty signed
60
+ * @param durationMs - Duration from start of signWithProtection to completion
61
+ */
62
+ public recordSigningSuccess(dutyType: string, durationMs: number): void {
63
+ const attributes = {
64
+ [Attributes.HA_DUTY_TYPE]: dutyType,
65
+ [Attributes.HA_NODE_ID]: this.nodeId,
66
+ };
67
+ this.signingSuccessCount.add(1, attributes);
68
+ this.signingDuration.record(durationMs, attributes);
69
+ }
70
+
71
+ /**
72
+ * Record a DutyAlreadySignedError (expected in HA; another node signed first).
73
+ * @param dutyType - The type of duty
74
+ */
75
+ public recordDutyAlreadySigned(dutyType: string): void {
76
+ const attributes = {
77
+ [Attributes.HA_DUTY_TYPE]: dutyType,
78
+ [Attributes.HA_NODE_ID]: this.nodeId,
79
+ };
80
+ this.dutyAlreadySignedCount.add(1, attributes);
81
+ }
82
+
83
+ /**
84
+ * Record a SlashingProtectionError (attempted to sign different data for same duty).
85
+ * @param dutyType - The type of duty
86
+ */
87
+ public recordSlashingProtection(dutyType: string): void {
88
+ const attributes = {
89
+ [Attributes.HA_DUTY_TYPE]: dutyType,
90
+ [Attributes.HA_NODE_ID]: this.nodeId,
91
+ };
92
+ this.slashingProtectionCount.add(1, attributes);
93
+ }
94
+
95
+ /**
96
+ * Record a signing function failure (lock will be deleted for retry).
97
+ * @param dutyType - The type of duty
98
+ */
99
+ public recordSigningError(dutyType: string): void {
100
+ const attributes = {
101
+ [Attributes.HA_DUTY_TYPE]: dutyType,
102
+ [Attributes.HA_NODE_ID]: this.nodeId,
103
+ };
104
+ this.signingErrorCount.add(1, attributes);
105
+ }
106
+
107
+ /**
108
+ * Record lock acquisition.
109
+ * @param acquired - Whether a new lock was acquired (true) or existing record found (false)
110
+ */
111
+ public recordLockAcquire(acquired: boolean): void {
112
+ if (acquired) {
113
+ const attributes = {
114
+ [Attributes.HA_NODE_ID]: this.nodeId,
115
+ };
116
+ this.lockAcquiredCount.add(1, attributes);
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Record cleanup metrics.
122
+ * @param type - Type of cleanup
123
+ * @param count - Number of duties cleaned up
124
+ */
125
+ public recordCleanup(type: HACleanupType, count: number): void {
126
+ const attributes = {
127
+ [Attributes.HA_NODE_ID]: this.nodeId,
128
+ };
129
+
130
+ if (type === 'stuck') {
131
+ this.cleanupStuckDutiesCount.add(count, attributes);
132
+ } else if (type === 'old') {
133
+ this.cleanupOldDutiesCount.add(count, attributes);
134
+ } else if (type === 'outdated_rollup') {
135
+ this.cleanupOutdatedRollupDutiesCount.add(count, attributes);
136
+ }
137
+ }
138
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Programmatic migration runner
3
+ */
4
+ import { createLogger } from '@aztec/foundation/log';
5
+
6
+ import { readdirSync } from 'fs';
7
+ import { runner } from 'node-pg-migrate';
8
+ import { dirname, join } from 'path';
9
+ import { fileURLToPath } from 'url';
10
+
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = dirname(__filename);
13
+
14
+ export interface RunMigrationsOptions {
15
+ /** Migration direction ('up' to apply, 'down' to rollback). Defaults to 'up'. */
16
+ direction?: 'up' | 'down';
17
+ /** Enable verbose output. Defaults to false. */
18
+ verbose?: boolean;
19
+ }
20
+
21
+ /**
22
+ * Run database migrations programmatically
23
+ *
24
+ * @param databaseUrl - PostgreSQL connection string
25
+ * @param options - Migration options (direction, verbose)
26
+ * @returns Array of applied migration names
27
+ */
28
+ export async function runMigrations(databaseUrl: string, options: RunMigrationsOptions = {}): Promise<string[]> {
29
+ const direction = options.direction ?? 'up';
30
+ const verbose = options.verbose ?? false;
31
+
32
+ const log = createLogger('validator-ha-signer:migrations');
33
+
34
+ const migrationsDir = join(__dirname, 'db', 'migrations');
35
+
36
+ try {
37
+ log.info(`Running migrations ${direction}...`);
38
+
39
+ // Filter out .d.ts and .d.ts.map files - node-pg-migrate only needs .js files
40
+ const migrationFiles = readdirSync(migrationsDir);
41
+ const jsMigrationFiles = migrationFiles.filter(
42
+ file => file.endsWith('.js') && !file.endsWith('.d.ts') && !file.endsWith('.d.ts.map'),
43
+ );
44
+
45
+ if (jsMigrationFiles.length === 0) {
46
+ log.info('No migration files found');
47
+ return [];
48
+ }
49
+
50
+ const appliedMigrations = await runner({
51
+ databaseUrl,
52
+ dir: migrationsDir,
53
+ direction,
54
+ migrationsTable: 'pgmigrations',
55
+ count: direction === 'down' ? 1 : Infinity,
56
+ verbose,
57
+ log: msg => (verbose ? log.info(msg) : log.debug(msg)),
58
+ // Ignore TypeScript declaration files - node-pg-migrate will try to import them otherwise
59
+ ignorePattern: '.*\\.d\\.(ts|js)$|.*\\.d\\.ts\\.map$',
60
+ });
61
+
62
+ if (appliedMigrations.length === 0) {
63
+ log.info('No migrations to apply - schema is up to date');
64
+ } else {
65
+ log.info(`Applied ${appliedMigrations.length} migration(s)`, {
66
+ migrations: appliedMigrations.map(m => m.name),
67
+ });
68
+ }
69
+
70
+ return appliedMigrations.map(m => m.name);
71
+ } catch (error: any) {
72
+ log.error('Migration failed', error);
73
+ throw error;
74
+ }
75
+ }
@@ -0,0 +1,308 @@
1
+ /**
2
+ * Slashing Protection Service
3
+ *
4
+ * Provides distributed locking and slashing protection for validator duties.
5
+ * Uses an external database to coordinate across multiple validator nodes.
6
+ */
7
+ import { type Logger, createLogger } from '@aztec/foundation/log';
8
+ import { RunningPromise } from '@aztec/foundation/promise';
9
+ import { sleep } from '@aztec/foundation/sleep';
10
+ import type { DateProvider } from '@aztec/foundation/timer';
11
+ import type { BaseSignerConfig } from '@aztec/stdlib/ha-signing';
12
+
13
+ import {
14
+ type CheckAndRecordParams,
15
+ type DeleteDutyParams,
16
+ DutyStatus,
17
+ type RecordSuccessParams,
18
+ getBlockIndexFromDutyIdentifier,
19
+ } from './db/types.js';
20
+ import { DutyAlreadySignedError, SlashingProtectionError } from './errors.js';
21
+ import type { HASignerMetrics } from './metrics.js';
22
+ import type { SlashingProtectionDatabase } from './types.js';
23
+
24
+ export interface SlashingProtectionServiceDeps {
25
+ metrics: HASignerMetrics;
26
+ dateProvider: DateProvider;
27
+ }
28
+
29
+ /**
30
+ * Slashing Protection Service
31
+ *
32
+ * This service ensures that a validator only signs one block/attestation per slot,
33
+ * even when running multiple redundant nodes (HA setup).
34
+ *
35
+ * All nodes in the HA setup try to sign - the first one wins, others get
36
+ * DutyAlreadySignedError (normal) or SlashingProtectionError (if different data).
37
+ *
38
+ * Flow:
39
+ * 1. checkAndRecord() - Atomically try to acquire lock via tryInsertOrGetExisting
40
+ * 2. Caller performs the signing operation
41
+ * 3. recordSuccess() - Update to 'signed' status with signature
42
+ * OR deleteDuty() - Delete the record to allow retry
43
+ */
44
+ export class SlashingProtectionService {
45
+ private readonly log: Logger;
46
+ private readonly pollingIntervalMs: number;
47
+ private readonly signingTimeoutMs: number;
48
+ private readonly maxStuckDutiesAgeMs: number;
49
+
50
+ private readonly metrics: HASignerMetrics;
51
+ private readonly dateProvider: DateProvider;
52
+
53
+ private cleanupRunningPromise: RunningPromise;
54
+ private lastOldDutiesCleanupAtMs?: number;
55
+
56
+ constructor(
57
+ private readonly db: SlashingProtectionDatabase,
58
+ private readonly config: BaseSignerConfig,
59
+ deps: SlashingProtectionServiceDeps,
60
+ ) {
61
+ this.log = createLogger('slashing-protection');
62
+ this.pollingIntervalMs = config.pollingIntervalMs;
63
+ this.signingTimeoutMs = config.signingTimeoutMs;
64
+ // Default to 144s (2x 72s Aztec slot duration) if not explicitly configured
65
+ this.maxStuckDutiesAgeMs = config.maxStuckDutiesAgeMs ?? 144_000;
66
+
67
+ this.cleanupRunningPromise = new RunningPromise(this.cleanup.bind(this), this.log, this.maxStuckDutiesAgeMs);
68
+ this.metrics = deps.metrics;
69
+ this.dateProvider = deps.dateProvider;
70
+ }
71
+
72
+ /**
73
+ * Check if a duty can be performed and acquire the lock if so.
74
+ *
75
+ * This method uses an atomic insert-or-get operation.
76
+ * It will:
77
+ * 1. Try to insert a new record with 'signing' status
78
+ * 2. If insert succeeds, we acquired the lock - return the lockToken
79
+ * 3. If a record exists, handle based on status:
80
+ * - SIGNED: Throw appropriate error (already signed or slashing protection)
81
+ * - SIGNING: Wait and poll until status changes, then handle result
82
+ *
83
+ * @returns The lockToken that must be used for recordSuccess/deleteDuty
84
+ * @throws DutyAlreadySignedError if the duty was already completed
85
+ * @throws SlashingProtectionError if attempting to sign different data for same slot/duty
86
+ */
87
+ async checkAndRecord(params: CheckAndRecordParams): Promise<string> {
88
+ const { validatorAddress, slot, dutyType, messageHash, nodeId } = params;
89
+ const startTime = this.dateProvider.now();
90
+
91
+ this.log.debug(`Checking duty: ${dutyType} for slot ${slot}`, {
92
+ validatorAddress: validatorAddress.toString(),
93
+ nodeId,
94
+ });
95
+
96
+ while (true) {
97
+ // insert if not present, get existing if present
98
+ const { isNew, record } = await this.db.tryInsertOrGetExisting(params);
99
+
100
+ if (isNew) {
101
+ // We successfully acquired the lock
102
+ this.log.verbose(`Acquired lock for duty ${dutyType} at slot ${slot}`, {
103
+ validatorAddress: validatorAddress.toString(),
104
+ nodeId,
105
+ });
106
+ this.metrics.recordLockAcquire(true);
107
+ return record.lockToken;
108
+ }
109
+
110
+ // Record already exists - handle based on status
111
+ if (record.status === DutyStatus.SIGNED) {
112
+ // Duty was already signed - check if same or different data
113
+ if (record.messageHash !== messageHash) {
114
+ this.log.verbose(`Slashing protection triggered for duty ${dutyType} at slot ${slot}`, {
115
+ validatorAddress: validatorAddress.toString(),
116
+ existingMessageHash: record.messageHash,
117
+ attemptedMessageHash: messageHash,
118
+ existingNodeId: record.nodeId,
119
+ attemptingNodeId: nodeId,
120
+ });
121
+ this.metrics.recordSlashingProtection(dutyType);
122
+ throw new SlashingProtectionError(
123
+ slot,
124
+ dutyType,
125
+ record.blockIndexWithinCheckpoint,
126
+ record.messageHash,
127
+ messageHash,
128
+ record.nodeId,
129
+ );
130
+ }
131
+ this.metrics.recordDutyAlreadySigned(dutyType);
132
+ throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, record.nodeId);
133
+ } else if (record.status === DutyStatus.SIGNING) {
134
+ // Another node is currently signing - check for timeout
135
+ if (this.dateProvider.now() - startTime > this.signingTimeoutMs) {
136
+ this.log.warn(`Timeout waiting for signing to complete for duty ${dutyType} at slot ${slot}`, {
137
+ validatorAddress: validatorAddress.toString(),
138
+ timeoutMs: this.signingTimeoutMs,
139
+ signingNodeId: record.nodeId,
140
+ });
141
+ this.metrics.recordDutyAlreadySigned(dutyType);
142
+ throw new DutyAlreadySignedError(slot, dutyType, record.blockIndexWithinCheckpoint, 'unknown (timeout)');
143
+ }
144
+
145
+ // Wait and poll
146
+ this.log.debug(`Waiting for signing to complete for duty ${dutyType} at slot ${slot}`, {
147
+ validatorAddress: validatorAddress.toString(),
148
+ signingNodeId: record.nodeId,
149
+ });
150
+ await sleep(this.pollingIntervalMs);
151
+ // Loop continues - next iteration will check status again
152
+ } else {
153
+ throw new Error(`Unknown duty status: ${record.status}`);
154
+ }
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Record a successful signing operation.
160
+ * Updates the duty status to 'signed' and stores the signature.
161
+ * Only succeeds if the lockToken matches (caller must be the one who created the duty).
162
+ *
163
+ * @returns true if the update succeeded, false if token didn't match
164
+ */
165
+ async recordSuccess(params: RecordSuccessParams): Promise<boolean> {
166
+ const { rollupAddress, validatorAddress, slot, dutyType, signature, nodeId, lockToken } = params;
167
+ const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
168
+
169
+ const success = await this.db.updateDutySigned(
170
+ rollupAddress,
171
+ validatorAddress,
172
+ slot,
173
+ dutyType,
174
+ signature.toString(),
175
+ lockToken,
176
+ blockIndexWithinCheckpoint,
177
+ );
178
+
179
+ if (success) {
180
+ this.log.verbose(`Recorded successful signing for duty ${dutyType} at slot ${slot}`, {
181
+ validatorAddress: validatorAddress.toString(),
182
+ nodeId,
183
+ });
184
+ } else {
185
+ this.log.warn(`Failed to record successful signing for duty ${dutyType} at slot ${slot}: invalid token`, {
186
+ validatorAddress: validatorAddress.toString(),
187
+ nodeId,
188
+ });
189
+ }
190
+
191
+ return success;
192
+ }
193
+
194
+ /**
195
+ * Delete a duty record after a failed signing operation.
196
+ * Removes the record to allow another node/attempt to retry.
197
+ * Only succeeds if the lockToken matches (caller must be the one who created the duty).
198
+ *
199
+ * @returns true if the delete succeeded, false if token didn't match
200
+ */
201
+ async deleteDuty(params: DeleteDutyParams): Promise<boolean> {
202
+ const { rollupAddress, validatorAddress, slot, dutyType, lockToken } = params;
203
+ const blockIndexWithinCheckpoint = getBlockIndexFromDutyIdentifier(params);
204
+
205
+ const success = await this.db.deleteDuty(
206
+ rollupAddress,
207
+ validatorAddress,
208
+ slot,
209
+ dutyType,
210
+ lockToken,
211
+ blockIndexWithinCheckpoint,
212
+ );
213
+
214
+ if (success) {
215
+ this.log.info(`Deleted duty ${dutyType} at slot ${slot} to allow retry`, {
216
+ validatorAddress: validatorAddress.toString(),
217
+ });
218
+ } else {
219
+ this.log.warn(`Failed to delete duty ${dutyType} at slot ${slot}: invalid token`, {
220
+ validatorAddress: validatorAddress.toString(),
221
+ });
222
+ }
223
+
224
+ return success;
225
+ }
226
+
227
+ /**
228
+ * Get the node ID for this service
229
+ */
230
+ get nodeId(): string {
231
+ return this.config.nodeId;
232
+ }
233
+
234
+ /**
235
+ * Start running tasks.
236
+ * Cleanup runs immediately on start to recover from any previous crashes.
237
+ */
238
+ /**
239
+ * Start the background cleanup task.
240
+ * Also performs one-time cleanup of duties with outdated rollup addresses.
241
+ */
242
+ async start() {
243
+ // One-time cleanup at startup: remove duties from previous rollup versions
244
+ const numOutdatedRollupDuties = await this.db.cleanupOutdatedRollupDuties(this.config.l1Contracts.rollupAddress);
245
+ if (numOutdatedRollupDuties > 0) {
246
+ this.log.info(`Cleaned up ${numOutdatedRollupDuties} duties with outdated rollup address at startup`, {
247
+ currentRollupAddress: this.config.l1Contracts.rollupAddress.toString(),
248
+ });
249
+ this.metrics.recordCleanup('outdated_rollup', numOutdatedRollupDuties);
250
+ }
251
+
252
+ this.cleanupRunningPromise.start();
253
+ this.log.info('Slashing protection service started', { nodeId: this.config.nodeId });
254
+ }
255
+
256
+ /**
257
+ * Stop the background cleanup task.
258
+ */
259
+ async stop() {
260
+ await this.cleanupRunningPromise.stop();
261
+ this.log.info('Slashing protection service stopped', { nodeId: this.config.nodeId });
262
+ }
263
+
264
+ /**
265
+ * Close the database connection.
266
+ * Should be called after stop() during graceful shutdown.
267
+ */
268
+ async close() {
269
+ await this.db.close();
270
+ this.log.info('Slashing protection database connection closed');
271
+ }
272
+
273
+ /**
274
+ * Periodic cleanup of stuck duties and optionally old signed duties.
275
+ * Runs in the background via RunningPromise.
276
+ */
277
+ private async cleanup() {
278
+ // 1. Clean up stuck duties (our own node's duties that got stuck in 'signing' status)
279
+ const numStuckDuties = await this.db.cleanupOwnStuckDuties(this.config.nodeId, this.maxStuckDutiesAgeMs);
280
+ if (numStuckDuties > 0) {
281
+ this.log.verbose(`Cleaned up ${numStuckDuties} stuck duties`, {
282
+ nodeId: this.config.nodeId,
283
+ maxStuckDutiesAgeMs: this.maxStuckDutiesAgeMs,
284
+ });
285
+ this.metrics.recordCleanup('stuck', numStuckDuties);
286
+ }
287
+
288
+ // 2. Clean up old signed duties if configured
289
+ // we shouldn't run this as often as stuck duty cleanup.
290
+ if (this.config.cleanupOldDutiesAfterHours !== undefined) {
291
+ const maxAgeMs = this.config.cleanupOldDutiesAfterHours * 60 * 60 * 1000;
292
+ const nowMs = this.dateProvider.now();
293
+ const shouldRun =
294
+ this.lastOldDutiesCleanupAtMs === undefined || nowMs - this.lastOldDutiesCleanupAtMs >= maxAgeMs;
295
+ if (shouldRun) {
296
+ const numOldDuties = await this.db.cleanupOldDuties(maxAgeMs);
297
+ this.lastOldDutiesCleanupAtMs = nowMs;
298
+ if (numOldDuties > 0) {
299
+ this.log.verbose(`Cleaned up ${numOldDuties} old signed duties`, {
300
+ cleanupOldDutiesAfterHours: this.config.cleanupOldDutiesAfterHours,
301
+ maxAgeMs,
302
+ });
303
+ this.metrics.recordCleanup('old', numOldDuties);
304
+ }
305
+ }
306
+ }
307
+ }
308
+ }