@aztec/prover-node 5.0.0-private.20260319 → 5.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +506 -0
  2. package/dest/actions/download-epoch-proving-job.js +1 -1
  3. package/dest/actions/rerun-epoch-proving-job.d.ts +4 -3
  4. package/dest/actions/rerun-epoch-proving-job.d.ts.map +1 -1
  5. package/dest/actions/rerun-epoch-proving-job.js +103 -21
  6. package/dest/bin/run-failed-epoch.js +1 -3
  7. package/dest/checkpoint-store.d.ts +83 -0
  8. package/dest/checkpoint-store.d.ts.map +1 -0
  9. package/dest/checkpoint-store.js +181 -0
  10. package/dest/config.d.ts +1 -1
  11. package/dest/config.d.ts.map +1 -1
  12. package/dest/config.js +1 -1
  13. package/dest/factory.d.ts +1 -1
  14. package/dest/factory.d.ts.map +1 -1
  15. package/dest/factory.js +22 -8
  16. package/dest/index.d.ts +2 -1
  17. package/dest/index.d.ts.map +1 -1
  18. package/dest/index.js +1 -0
  19. package/dest/job/checkpoint-prover.d.ts +134 -0
  20. package/dest/job/checkpoint-prover.d.ts.map +1 -0
  21. package/dest/job/checkpoint-prover.js +350 -0
  22. package/dest/job/epoch-session.d.ts +146 -0
  23. package/dest/job/epoch-session.d.ts.map +1 -0
  24. package/dest/job/epoch-session.js +709 -0
  25. package/dest/job/top-tree-job.d.ts +82 -0
  26. package/dest/job/top-tree-job.d.ts.map +1 -0
  27. package/dest/job/top-tree-job.js +152 -0
  28. package/dest/metrics.d.ts +29 -5
  29. package/dest/metrics.d.ts.map +1 -1
  30. package/dest/metrics.js +73 -9
  31. package/dest/monitors/epoch-monitor.js +6 -2
  32. package/dest/proof-publishing-service.d.ts +159 -0
  33. package/dest/proof-publishing-service.d.ts.map +1 -0
  34. package/dest/proof-publishing-service.js +334 -0
  35. package/dest/prover-node-publisher.d.ts +18 -11
  36. package/dest/prover-node-publisher.d.ts.map +1 -1
  37. package/dest/prover-node-publisher.js +195 -57
  38. package/dest/prover-node.d.ts +96 -68
  39. package/dest/prover-node.d.ts.map +1 -1
  40. package/dest/prover-node.js +382 -227
  41. package/dest/prover-publisher-factory.d.ts +2 -2
  42. package/dest/prover-publisher-factory.d.ts.map +1 -1
  43. package/dest/prover-publisher-factory.js +3 -3
  44. package/dest/session-manager.d.ts +158 -0
  45. package/dest/session-manager.d.ts.map +1 -0
  46. package/dest/session-manager.js +452 -0
  47. package/dest/test/index.d.ts +7 -6
  48. package/dest/test/index.d.ts.map +1 -1
  49. package/package.json +23 -23
  50. package/src/actions/download-epoch-proving-job.ts +1 -1
  51. package/src/actions/rerun-epoch-proving-job.ts +114 -28
  52. package/src/bin/run-failed-epoch.ts +1 -2
  53. package/src/checkpoint-store.ts +213 -0
  54. package/src/config.ts +2 -1
  55. package/src/factory.ts +18 -10
  56. package/src/index.ts +1 -0
  57. package/src/job/checkpoint-prover.ts +465 -0
  58. package/src/job/epoch-session.ts +424 -0
  59. package/src/job/top-tree-job.ts +227 -0
  60. package/src/metrics.ts +88 -12
  61. package/src/monitors/epoch-monitor.ts +2 -2
  62. package/src/proof-publishing-service.ts +424 -0
  63. package/src/prover-node-publisher.ts +220 -67
  64. package/src/prover-node.ts +439 -249
  65. package/src/prover-publisher-factory.ts +3 -3
  66. package/src/session-manager.ts +552 -0
  67. package/src/test/index.ts +6 -6
  68. package/dest/job/epoch-proving-job.d.ts +0 -63
  69. package/dest/job/epoch-proving-job.d.ts.map +0 -1
  70. package/dest/job/epoch-proving-job.js +0 -762
  71. package/src/job/epoch-proving-job.ts +0 -465
@@ -2,19 +2,28 @@ import type { Archiver } from '@aztec/archiver';
2
2
  import type { RollupContract } from '@aztec/ethereum/contracts';
3
3
  import type { Delayer } from '@aztec/ethereum/l1-tx-utils';
4
4
  import { BlockNumber, CheckpointNumber, EpochNumber } from '@aztec/foundation/branded-types';
5
- import { assertRequired, compact, pick, sum } from '@aztec/foundation/collection';
6
- import type { Fr } from '@aztec/foundation/curves/bn254';
5
+ import { assertRequired, compact, pick } from '@aztec/foundation/collection';
7
6
  import { memoize } from '@aztec/foundation/decorators';
8
7
  import { createLogger } from '@aztec/foundation/log';
9
- import { DateProvider } from '@aztec/foundation/timer';
8
+ import { DateProvider, executeTimeout } from '@aztec/foundation/timer';
9
+ import type { EpochProverFactory } from '@aztec/prover-client';
10
+ import { getLastSiblingPath } from '@aztec/prover-client/helpers';
11
+ import { ChonkCache } from '@aztec/prover-client/orchestrator';
10
12
  import { PublicProcessorFactory } from '@aztec/simulator/server';
11
- import type { L2BlockSource } from '@aztec/stdlib/block';
12
- import type { Checkpoint } from '@aztec/stdlib/checkpoint';
13
+ import {
14
+ type L2BlockSource,
15
+ L2BlockStream,
16
+ type L2BlockStreamEvent,
17
+ type L2BlockStreamEventHandler,
18
+ L2TipsMemoryStore,
19
+ } from '@aztec/stdlib/block';
20
+ import type { Checkpoint, PublishedCheckpoint } from '@aztec/stdlib/checkpoint';
13
21
  import type { ChainConfig } from '@aztec/stdlib/config';
14
22
  import type { ContractDataSource } from '@aztec/stdlib/contract';
15
- import { getProofSubmissionDeadlineTimestamp } from '@aztec/stdlib/epoch-helpers';
23
+ import { type L1RollupConstants, getEpochAtSlot, getProofSubmissionDeadlineEpoch } from '@aztec/stdlib/epoch-helpers';
16
24
  import {
17
25
  type EpochProverManager,
26
+ type EpochProvingJobState,
18
27
  EpochProvingJobTerminalState,
19
28
  type ITxProvider,
20
29
  type ProverNodeApi,
@@ -25,55 +34,79 @@ import {
25
34
  } from '@aztec/stdlib/interfaces/server';
26
35
  import type { DataStoreConfig } from '@aztec/stdlib/kv-store';
27
36
  import type { L1ToL2MessageSource } from '@aztec/stdlib/messaging';
28
- import type { Tx } from '@aztec/stdlib/tx';
37
+ import { MerkleTreeId } from '@aztec/stdlib/trees';
29
38
  import {
30
- Attributes,
31
39
  L1Metrics,
32
40
  type TelemetryClient,
33
41
  type Traceable,
34
42
  type Tracer,
35
43
  getTelemetryClient,
36
- trackSpan,
37
44
  } from '@aztec/telemetry-client';
38
45
 
39
46
  import { uploadEpochProofFailure } from './actions/upload-epoch-proof-failure.js';
47
+ import { CheckpointStore, type RegisterCheckpointData } from './checkpoint-store.js';
40
48
  import type { SpecificProverNodeConfig } from './config.js';
41
- import type { EpochProvingJobData } from './job/epoch-proving-job-data.js';
42
- import { EpochProvingJob, type EpochProvingJobState } from './job/epoch-proving-job.js';
49
+ import type { EpochSession, EpochSessionHooks } from './job/epoch-session.js';
43
50
  import { ProverNodeJobMetrics, ProverNodeRewardsMetrics } from './metrics.js';
44
- import type { EpochMonitor, EpochMonitorHandler } from './monitors/epoch-monitor.js';
45
- import type { ProverNodePublisher } from './prover-node-publisher.js';
51
+ import { ProofPublishingService } from './proof-publishing-service.js';
46
52
  import type { ProverPublisherFactory } from './prover-publisher-factory.js';
53
+ import { SessionManager } from './session-manager.js';
47
54
 
48
55
  type ProverNodeOptions = SpecificProverNodeConfig & Partial<DataStoreOptions>;
49
56
  type DataStoreOptions = Pick<DataStoreConfig, 'dataDirectory'> & Pick<ChainConfig, 'l1ChainId' | 'rollupVersion'>;
50
57
 
51
58
  /**
52
- * An Aztec Prover Node is a standalone process that monitors the unfinalized chain on L1 for unproven epochs,
53
- * fetches their txs from the p2p network or external nodes, re-executes their public functions, creates a rollup
54
- * proof for the epoch, and submits it to L1.
59
+ * Grace period for the proof-publishing service to settle during shutdown. The service waits for
60
+ * any in-flight L1 proof-submission tx to finish; that tx can take a long time to mine, so we cap
61
+ * the wait rather than letting `stop()` hang indefinitely.
55
62
  */
56
- export class ProverNode implements EpochMonitorHandler, ProverNodeApi, Traceable {
63
+ const PUBLISHING_SERVICE_STOP_TIMEOUT_MS = 30_000;
64
+
65
+ /**
66
+ * An Aztec Prover Node is a standalone process that monitors the chain for new checkpoints,
67
+ * starts proving them optimistically as they arrive, and submits epoch proofs to L1 once
68
+ * complete.
69
+ *
70
+ * The class is intentionally thin: it owns the long-lived collections (`CheckpointStore`,
71
+ * `ChonkCache`, `SessionManager`), the L2BlockStream, and a periodic ticker that nudges the
72
+ * manager to pick up newly-complete epochs. Every session lifecycle decision is delegated to
73
+ * the `SessionManager`. Each chain event is translated here into a single method call on it.
74
+ */
75
+ export class ProverNode implements L2BlockStreamEventHandler, ProverNodeApi, Traceable {
57
76
  private log = createLogger('prover-node');
58
77
 
59
- private jobs: Map<string, EpochProvingJob> = new Map();
60
- private config: ProverNodeOptions;
61
- private jobMetrics: ProverNodeJobMetrics;
62
- private rewardsMetrics: ProverNodeRewardsMetrics;
78
+ protected readonly checkpointStore: CheckpointStore;
79
+ protected readonly chonkCache: ChonkCache;
80
+ protected sessionManager: SessionManager | undefined;
81
+
82
+ private readonly config: ProverNodeOptions;
83
+ private readonly jobMetrics: ProverNodeJobMetrics;
84
+ private readonly rewardsMetrics: ProverNodeRewardsMetrics;
85
+
86
+ /** In-memory store for the L2BlockStream's local data provider. */
87
+ private tipsStore: L2TipsMemoryStore;
88
+ /** Block stream for checkpoint and reorg detection. */
89
+ private blockStream: L2BlockStream | undefined;
90
+ /**
91
+ * Highest epoch whose proof-submission window has passed. Monotonic high-water mark.
92
+ * Seeded from the last fully-proven epoch at start(); advanced on every block-stream
93
+ * event by comparing the archiver's latest synced L2 slot against each epoch's
94
+ * submission deadline. Protected so tests can verify the start() seeding.
95
+ */
96
+ protected lastExpiredEpoch: EpochNumber | undefined;
63
97
 
64
98
  public readonly tracer: Tracer;
65
99
 
66
- protected publisher: ProverNodePublisher | undefined;
100
+ protected publishingService: ProofPublishingService | undefined;
67
101
 
68
102
  constructor(
69
- protected readonly prover: EpochProverManager,
103
+ protected readonly prover: EpochProverManager & EpochProverFactory,
70
104
  protected readonly publisherFactory: ProverPublisherFactory,
71
105
  protected readonly l2BlockSource: L2BlockSource & Partial<Service>,
72
106
  protected readonly l1ToL2MessageSource: L1ToL2MessageSource,
73
107
  protected readonly contractDataSource: ContractDataSource,
74
108
  protected readonly worldState: WorldStateSynchronizer,
75
109
  protected readonly p2pClient: { getTxProvider(): ITxProvider } & Partial<Service>,
76
- protected readonly epochsMonitor: EpochMonitor,
77
110
  protected readonly rollupContract: RollupContract,
78
111
  protected readonly l1Metrics: L1Metrics,
79
112
  config: Partial<ProverNodeOptions> = {},
@@ -100,8 +133,33 @@ export class ProverNode implements EpochMonitorHandler, ProverNodeApi, Traceable
100
133
  this.tracer = telemetryClient.getTracer('ProverNode');
101
134
 
102
135
  this.jobMetrics = new ProverNodeJobMetrics(meter, telemetryClient.getTracer('EpochProvingJob'));
103
-
104
136
  this.rewardsMetrics = new ProverNodeRewardsMetrics(meter, this.prover.getProverId(), rollupContract);
137
+
138
+ this.tipsStore = new L2TipsMemoryStore(this.l2BlockSource.getGenesisBlockHash());
139
+
140
+ this.chonkCache = new ChonkCache(this.log.getBindings());
141
+ this.checkpointStore = new CheckpointStore(
142
+ this.l2BlockSource,
143
+ {
144
+ proverFactory: this.prover,
145
+ chonkCache: this.chonkCache,
146
+ publicProcessorFactory: new PublicProcessorFactory(
147
+ this.contractDataSource,
148
+ this.dateProvider,
149
+ this.telemetryClient,
150
+ this.log.getBindings(),
151
+ ),
152
+ dbProvider: this.worldState,
153
+ txProvider: this.p2pClient.getTxProvider(),
154
+ dateProvider: this.dateProvider,
155
+ proverId: this.prover.getProverId(),
156
+ metrics: this.jobMetrics,
157
+ txGatheringTimeoutMs: this.config.txGatheringTimeoutMs,
158
+ deadline: undefined,
159
+ },
160
+ { slotWatcherPollIntervalMs: this.config.proverNodePollingIntervalMs },
161
+ this.log.getBindings(),
162
+ );
105
163
  }
106
164
 
107
165
  public getProverId() {
@@ -112,68 +170,32 @@ export class ProverNode implements EpochMonitorHandler, ProverNodeApi, Traceable
112
170
  return this.p2pClient;
113
171
  }
114
172
 
115
- /** Returns the shared tx delayer for prover L1 txs, if enabled. Test-only. */
173
+ /** Test-only: the shared L1 tx delayer, if enabled. */
116
174
  public getDelayer(): Delayer | undefined {
117
175
  return this.delayer;
118
176
  }
119
177
 
120
- /**
121
- * Handles an epoch being completed by starting a proof for it if there are no active jobs for it.
122
- * @param epochNumber - The epoch number that was just completed.
123
- * @returns false if there is an error, true otherwise
124
- */
125
- async handleEpochReadyToProve(epochNumber: EpochNumber): Promise<boolean> {
126
- try {
127
- this.log.debug(`Running jobs as ${epochNumber} is ready to prove`, {
128
- jobs: Array.from(this.jobs.values()).map(job => `${job.getEpochNumber()}:${job.getId()}`),
129
- });
130
- const activeJobs = await this.getActiveJobsForEpoch(epochNumber);
131
- if (activeJobs.length > 0) {
132
- this.log.warn(`Not starting proof for ${epochNumber} since there are active jobs for the epoch`, {
133
- activeJobs: activeJobs.map(job => job.uuid),
134
- });
135
- return true;
136
- }
137
- await this.startProof(epochNumber);
138
- return true;
139
- } catch (err) {
140
- if (err instanceof EmptyEpochError) {
141
- this.log.info(`Not starting proof for ${epochNumber} since no blocks were found`);
142
- } else {
143
- this.log.error(`Error handling epoch completed`, err);
144
- }
145
- return false;
146
- }
178
+ /** Observability summary for the ProverNodeApi. */
179
+ public getJobs(): Promise<{ uuid: string; status: EpochProvingJobState; epochNumber: EpochNumber }[]> {
180
+ return Promise.resolve(this.sessionManager?.getJobs() ?? []);
147
181
  }
148
182
 
149
- /**
150
- * Starts the prover node so it periodically checks for unproven epochs in the unfinalized chain from L1 and
151
- * starts proving jobs for them.
152
- */
153
- async start() {
154
- this.epochsMonitor.start(this);
155
- await this.publisherFactory.start();
156
- this.publisher = await this.publisherFactory.create();
157
- await this.rewardsMetrics.start();
158
- this.l1Metrics.start();
159
- this.log.info(`Started Prover Node with prover id ${this.prover.getProverId().toString()}`, this.config);
183
+ /** Tests inspect this when validating reconcile behaviour. */
184
+ public getCheckpointStore(): CheckpointStore {
185
+ return this.checkpointStore;
160
186
  }
161
187
 
162
- /**
163
- * Stops the prover node and all its dependencies.
164
- * Resources not owned by this node (shared with the parent aztec-node) are skipped.
165
- */
166
- async stop() {
167
- this.log.info('Stopping ProverNode');
168
- await this.epochsMonitor.stop();
169
- await this.prover.stop();
170
- await tryStop(this.publisherFactory);
171
- this.publisher?.interrupt();
172
- await Promise.all(Array.from(this.jobs.values()).map(job => job.stop()));
173
- this.rewardsMetrics.stop();
174
- this.l1Metrics.stop();
175
- await this.telemetryClient.stop();
176
- this.log.info('Stopped ProverNode');
188
+ /** Tests inspect this to verify chonk-cache release semantics. */
189
+ public getChonkCache(): ChonkCache {
190
+ return this.chonkCache;
191
+ }
192
+
193
+ /** Tests inspect this when looking up live sessions. */
194
+ public getSessionManager(): SessionManager {
195
+ if (!this.sessionManager) {
196
+ throw new Error('SessionManager not yet constructed — start() must be called first.');
197
+ }
198
+ return this.sessionManager;
177
199
  }
178
200
 
179
201
  /** Returns world state status. */
@@ -187,217 +209,389 @@ export class ProverNode implements EpochMonitorHandler, ProverNodeApi, Traceable
187
209
  return this.l2BlockSource.getL2Tips();
188
210
  }
189
211
 
190
- /**
191
- * Starts a proving process and returns immediately.
192
- */
193
- public async startProof(epochNumber: EpochNumber) {
194
- const job = await this.createProvingJob(epochNumber, { skipEpochCheck: true });
195
- void this.runJob(job);
212
+ /** Returns the underlying prover instance. */
213
+ public getProver() {
214
+ return this.prover;
196
215
  }
197
216
 
198
- private async runJob(job: EpochProvingJob) {
199
- const epochNumber = job.getEpochNumber();
200
- const ctx = { id: job.getId(), epochNumber, state: undefined as EpochProvingJobState | undefined };
201
-
202
- try {
203
- await job.run();
204
- const state = job.getState();
205
- ctx.state = state;
206
-
207
- if (state === 'reorg') {
208
- this.log.warn(`Running new job for epoch ${epochNumber} due to reorg`, ctx);
209
- await this.createProvingJob(epochNumber);
210
- } else if (state === 'failed') {
211
- this.log.error(`Job for ${epochNumber} exited with state ${state}`, ctx);
212
- await this.tryUploadEpochFailure(job);
213
- } else {
214
- this.log.verbose(`Job for ${epochNumber} exited with state ${state}`, ctx);
215
- }
216
- } catch (err) {
217
- this.log.error(`Error proving epoch ${epochNumber}`, err, ctx);
218
- } finally {
219
- this.jobs.delete(job.getId());
217
+ // ---------------- L2BlockStream handler ----------------
218
+
219
+ public async handleBlockStreamEvent(event: L2BlockStreamEvent): Promise<void> {
220
+ switch (event.type) {
221
+ case 'chain-checkpointed':
222
+ await this.handleCheckpointEvent(event.checkpoint);
223
+ break;
224
+ case 'chain-pruned':
225
+ await this.handlePruneEvent(event.checkpointed.checkpoint);
226
+ break;
227
+ case 'chain-proven':
228
+ this.publishingService?.onChainProven(BlockNumber(event.block.number));
229
+ break;
230
+ case 'chain-finalized':
231
+ case 'blocks-added':
232
+ break;
220
233
  }
234
+ // Expiry is driven by the archiver's latest synced L2 slot
235
+ await this.checkEpochExpiry();
236
+ // Advance the local tips store only after the proving-side handling has succeeded. Any
237
+ // failure above propagates to the L2BlockStream (which logs and stops this poll pass) and
238
+ // skips this update, so the event is re-emitted on the next poll rather than skipped (A-1041).
239
+ await this.tipsStore.handleBlockStreamEvent(event);
221
240
  }
222
241
 
223
- protected async tryUploadEpochFailure(job: EpochProvingJob) {
224
- if (this.config.proverNodeFailedEpochStore) {
225
- return await uploadEpochProofFailure(
226
- this.config.proverNodeFailedEpochStore,
227
- job.getId(),
228
- job.getProvingData(),
229
- this.l2BlockSource as Archiver,
230
- this.worldState,
231
- assertRequired(pick(this.config, 'l1ChainId', 'rollupVersion', 'dataDirectory')),
232
- this.log,
242
+ /** Register a new checkpoint with the store and notify the session manager. */
243
+ private async handleCheckpointEvent(publishedCheckpoint: PublishedCheckpoint) {
244
+ const checkpoint = publishedCheckpoint.checkpoint;
245
+ const slotNumber = checkpoint.header.slotNumber;
246
+ const l1Constants = await this.getL1Constants();
247
+ const epochNumber = getEpochAtSlot(slotNumber, l1Constants);
248
+
249
+ if (await this.isEpochFullyProven(epochNumber, l1Constants)) {
250
+ this.log.debug(`Skipping checkpoint ${checkpoint.number} for already-proven epoch ${epochNumber}`);
251
+ return;
252
+ }
253
+
254
+ if (await this.isEpochPastProofSubmissionWindow(epochNumber, l1Constants)) {
255
+ this.log.debug(
256
+ `Skipping checkpoint ${checkpoint.number} for epoch ${epochNumber} past its proof-submission window`,
233
257
  );
258
+ return;
234
259
  }
260
+
261
+ this.log.info(`New checkpoint ${checkpoint.number} for epoch ${epochNumber}`, {
262
+ checkpointNumber: checkpoint.number,
263
+ epochNumber,
264
+ slotNumber,
265
+ });
266
+
267
+ const registerData = await this.collectRegisterData(checkpoint, publishedCheckpoint.attestations);
268
+ await this.checkpointStore.addOrUpdate(checkpoint, registerData);
269
+ await this.sessionManager?.onCheckpointAdded(epochNumber);
235
270
  }
236
271
 
237
272
  /**
238
- * Returns the prover instance.
273
+ * Gathers register-time data for a checkpoint: previous block header, L1-to-L2 messages,
274
+ * and the archive sibling path.
239
275
  */
240
- public getProver() {
241
- return this.prover;
276
+ private async collectRegisterData(
277
+ checkpoint: Checkpoint,
278
+ attestations: PublishedCheckpoint['attestations'],
279
+ ): Promise<RegisterCheckpointData> {
280
+ const previousBlockNumber = BlockNumber(checkpoint.blocks[0].number - 1);
281
+ const previousBlockHeader = await this.gatherPreviousBlockHeader(previousBlockNumber);
282
+ const l1ToL2Messages = await this.l1ToL2MessageSource.getL1ToL2Messages(checkpoint.number);
283
+ const lastBlock = checkpoint.blocks.at(-1)!;
284
+ const lastBlockHash = await lastBlock.header.hash();
285
+ await this.worldState.syncImmediate(lastBlock.number, lastBlockHash);
286
+ const previousArchiveSiblingPath = await getLastSiblingPath(
287
+ MerkleTreeId.ARCHIVE,
288
+ this.worldState.getSnapshot(previousBlockNumber),
289
+ );
290
+ return {
291
+ attestations,
292
+ previousBlockHeader,
293
+ l1ToL2Messages,
294
+ previousArchiveSiblingPath,
295
+ };
296
+ }
297
+
298
+ /** Mark every prover above the prune threshold as pruned and notify the session manager. */
299
+ private async handlePruneEvent(prunedCheckpoint: { number: CheckpointNumber; hash: string }) {
300
+ this.log.warn(`Chain pruned to checkpoint ${prunedCheckpoint.number}`, { prunedCheckpoint });
301
+ const affected = this.checkpointStore.markPrunedAfter(prunedCheckpoint.number);
302
+ if (affected.length === 0) {
303
+ return;
304
+ }
305
+ const l1Constants = await this.getL1Constants();
306
+ const affectedEpochs = Array.from(
307
+ new Set(affected.map(p => Number(getEpochAtSlot(p.slotNumber, l1Constants)))),
308
+ ).map(n => EpochNumber(n));
309
+ // The session manager cancels every affected session, which in turn calls
310
+ // publishingService.withdraw(uuid) for each candidate; no separate notification to the
311
+ // publishing service is needed.
312
+ await this.sessionManager?.onPrune(affectedEpochs);
242
313
  }
243
314
 
244
315
  /**
245
- * Returns an array of jobs being processed.
316
+ * Returns true once the chain has advanced past the given epoch's proof-submission window.
317
+ * Used to ignore checkpoints whose epoch can no longer be proven in time — chiefly while the
318
+ * archiver replays old blocks after a restart. Compares the archiver's latest synced L2 slot
319
+ * against the epoch's submission-deadline epoch; conservatively returns false if the slot can't
320
+ * be read yet.
246
321
  */
247
- public getJobs(): Promise<{ uuid: string; status: EpochProvingJobState; epochNumber: EpochNumber }[]> {
248
- return Promise.resolve(
249
- Array.from(this.jobs.entries()).map(([uuid, job]) => ({
250
- uuid,
251
- status: job.getState(),
252
- epochNumber: job.getEpochNumber(),
253
- })),
254
- );
322
+ private async isEpochPastProofSubmissionWindow(
323
+ epochNumber: EpochNumber,
324
+ l1Constants: L1RollupConstants,
325
+ ): Promise<boolean> {
326
+ const latestSlot = await this.l2BlockSource.getSyncedL2SlotNumber();
327
+ if (latestSlot === undefined) {
328
+ return false;
329
+ }
330
+ const latestEpoch = getEpochAtSlot(latestSlot, l1Constants);
331
+ return latestEpoch >= getProofSubmissionDeadlineEpoch(epochNumber, l1Constants);
255
332
  }
256
333
 
257
- protected async getActiveJobsForEpoch(
258
- epochNumber: EpochNumber,
259
- ): Promise<{ uuid: string; status: EpochProvingJobState }[]> {
260
- const jobs = await this.getJobs();
261
- return jobs.filter(job => job.epochNumber === epochNumber && !EpochProvingJobTerminalState.includes(job.status));
334
+ /**
335
+ * Compares the archiver's latest synced L2 slot against `lastExpiredEpoch` and, for each
336
+ * newly-expired epoch, releases the chonk-cache entries for its blocks and reaps any
337
+ * CheckpointProvers in the store. An epoch E is expired once the chain reaches the start
338
+ * of epoch `E + proofSubmissionEpochs + 1`. Silently no-ops if nothing has expired since
339
+ * the last check or the archiver's slot can't be read.
340
+ */
341
+ private async checkEpochExpiry(): Promise<void> {
342
+ const latestSlot = await this.l2BlockSource.getSyncedL2SlotNumber();
343
+ if (latestSlot === undefined) {
344
+ return;
345
+ }
346
+ const l1Constants = await this.getL1Constants();
347
+ const latestEpoch = getEpochAtSlot(latestSlot, l1Constants);
348
+ const offset = l1Constants.proofSubmissionEpochs + 1;
349
+ if (latestEpoch < offset) {
350
+ return;
351
+ }
352
+ const newlyExpiredUpTo = EpochNumber(latestEpoch - offset);
353
+ const from = this.lastExpiredEpoch === undefined ? EpochNumber(0) : EpochNumber(this.lastExpiredEpoch + 1);
354
+ if (newlyExpiredUpTo < from) {
355
+ return;
356
+ }
357
+ for (let e = from; e <= newlyExpiredUpTo; e = EpochNumber(e + 1)) {
358
+ await this.expireEpoch(e);
359
+ }
360
+ this.lastExpiredEpoch = newlyExpiredUpTo;
262
361
  }
263
362
 
264
- private checkMaximumPendingJobs() {
265
- const { proverNodeMaxPendingJobs: maxPendingJobs } = this.config;
266
- if (maxPendingJobs > 0 && this.jobs.size >= maxPendingJobs) {
267
- throw new Error(`Maximum pending proving jobs ${maxPendingJobs} reached. Cannot create new job.`);
363
+ /**
364
+ * Releases chonk-cache entries for every block in the supplied epoch (best-effort) and
365
+ * reaps every CheckpointProver in the store whose epoch number matches.
366
+ */
367
+ private async expireEpoch(epoch: EpochNumber): Promise<void> {
368
+ try {
369
+ const blocks = await this.l2BlockSource.getBlocks({ epoch, onlyCheckpointed: true });
370
+ if (blocks.length > 0) {
371
+ this.chonkCache.releaseForBlocks(blocks);
372
+ }
373
+ } catch (err) {
374
+ this.log.warn(`Could not release chonk-cache entries for expired epoch ${epoch}`, err);
268
375
  }
376
+ this.checkpointStore.reapExpired(epoch);
269
377
  }
270
378
 
271
- @trackSpan('ProverNode.createProvingJob', epochNumber => ({ [Attributes.EPOCH_NUMBER]: epochNumber }))
272
- private async createProvingJob(epochNumber: EpochNumber, opts: { skipEpochCheck?: boolean } = {}) {
273
- this.checkMaximumPendingJobs();
274
-
275
- this.publisher = await this.publisherFactory.create();
276
-
277
- // Gather all data for this epoch
278
- const epochData = await this.gatherEpochData(epochNumber);
279
- const fromCheckpoint = epochData.checkpoints[0].number;
280
- const toCheckpoint = epochData.checkpoints.at(-1)!.number;
281
- const fromBlock = epochData.checkpoints[0].blocks[0].number;
282
- const lastBlock = epochData.checkpoints.at(-1)!.blocks.at(-1)!;
283
- const toBlock = lastBlock.number;
284
- this.log.verbose(
285
- `Creating proving job for epoch ${epochNumber} for checkpoint range ${fromCheckpoint} to ${toCheckpoint} and block range ${fromBlock} to ${toBlock}`,
286
- );
379
+ // ---------------- public API ----------------
287
380
 
288
- // Fast forward world state to right before the target block and get a fork
289
- const lastBlockHash = await lastBlock.header.hash();
290
- await this.worldState.syncImmediate(toBlock, lastBlockHash);
381
+ /**
382
+ * Schedules proving for the given epoch and returns the job id without waiting for completion.
383
+ */
384
+ public async startProof(epochNumber: EpochNumber): Promise<string> {
385
+ if (!this.sessionManager) {
386
+ throw new Error('ProverNode not started');
387
+ }
388
+ return await this.sessionManager.startProof(epochNumber);
389
+ }
291
390
 
292
- // Create a processor factory
293
- const publicProcessorFactory = new PublicProcessorFactory(
294
- this.contractDataSource,
295
- this.dateProvider,
296
- this.telemetryClient,
297
- this.log.getBindings(),
298
- );
391
+ // ---------------- Service lifecycle ----------------
392
+
393
+ async start() {
394
+ await this.checkpointStore.start();
395
+
396
+ await this.publisherFactory.start();
397
+ this.publishingService = new ProofPublishingService({
398
+ publisherFactory: this.publisherFactory,
399
+ l2BlockSource: this.l2BlockSource,
400
+ dateProvider: this.dateProvider,
401
+ config: { skipSubmitProof: !!this.config.proverNodeDisableProofPublish },
402
+ bindings: this.log.getBindings(),
403
+ });
404
+ this.sessionManager = this.createSessionManager(this.publishingService);
405
+ // SessionManager owns its own periodic tick; start it here so it begins picking up
406
+ // epochs that become complete by time (no fresh checkpoint event) and advances once
407
+ // the previous epoch is proven on L1.
408
+ this.sessionManager.start();
409
+ // Now that the store + manager exist, arm the live-state observable gauges.
410
+ this.jobMetrics.observeState(this.checkpointStore, this.sessionManager);
411
+
412
+ const { startingBlock, lastFullyProvenEpoch } = await this.computeStartupState();
413
+ this.lastExpiredEpoch = lastFullyProvenEpoch;
414
+ this.blockStream = new L2BlockStream(this.l2BlockSource, this.tipsStore, this, this.log, {
415
+ pollIntervalMS: this.config.proverNodePollingIntervalMs,
416
+ startingBlock,
417
+ });
418
+ this.blockStream.start();
299
419
 
300
- // Set deadline for this job to run. It will abort if it takes too long.
301
- const deadlineTs = getProofSubmissionDeadlineTimestamp(epochNumber, await this.getL1Constants());
302
- const deadline = new Date(Number(deadlineTs) * 1000);
303
- const job = this.doCreateEpochProvingJob(epochData, deadline, publicProcessorFactory, this.publisher, opts);
304
- this.jobs.set(job.getId(), job);
305
- return job;
420
+ await this.rewardsMetrics.start();
421
+ this.l1Metrics.start();
422
+ this.log.info(`Started Prover Node with prover id ${this.prover.getProverId().toString()}`, this.config);
306
423
  }
307
424
 
308
- @memoize
309
- private getL1Constants() {
310
- return this.l2BlockSource.getL1Constants();
425
+ async stop() {
426
+ this.log.info('Stopping ProverNode');
427
+ this.jobMetrics.stopObservingState();
428
+ await this.blockStream?.stop();
429
+ if (this.sessionManager) {
430
+ await this.sessionManager.stop();
431
+ }
432
+ if (this.publishingService) {
433
+ // Bound the wait: the publishing service blocks until any in-flight L1 proof-submission tx
434
+ // settles, which can outlast a reasonable shutdown window. On timeout we log and move on —
435
+ // the tx may still mine, but shutdown must not hang on it.
436
+ const publishingService = this.publishingService;
437
+ await executeTimeout(
438
+ () => publishingService.stop(),
439
+ PUBLISHING_SERVICE_STOP_TIMEOUT_MS,
440
+ 'prover-node publishing-service stop',
441
+ ).catch(err => this.log.warn(`Timed out stopping proof publishing service`, err));
442
+ }
443
+ await this.checkpointStore.stop();
444
+ this.chonkCache.stop();
445
+ await this.prover.stop();
446
+ await tryStop(this.publisherFactory);
447
+ this.rewardsMetrics.stop();
448
+ this.l1Metrics.stop();
449
+ await this.telemetryClient.stop();
450
+ this.log.info('Stopped ProverNode');
311
451
  }
312
452
 
313
- @trackSpan('ProverNode.gatherEpochData', epochNumber => ({ [Attributes.EPOCH_NUMBER]: epochNumber }))
314
- private async gatherEpochData(epochNumber: EpochNumber): Promise<EpochProvingJobData> {
315
- const checkpoints = await this.gatherCheckpoints(epochNumber);
316
- const txArray = await this.gatherTxs(epochNumber, checkpoints);
317
- const txs = new Map<string, Tx>(txArray.map(tx => [tx.getTxHash().toString(), tx]));
318
- const l1ToL2Messages = await this.gatherMessages(epochNumber, checkpoints);
319
- const [firstBlock] = checkpoints[0].blocks;
320
- const previousBlockHeader = await this.gatherPreviousBlockHeader(epochNumber, firstBlock.number - 1);
321
- const [lastPublishedCheckpoint] = await this.l2BlockSource.getCheckpoints(checkpoints.at(-1)!.number, 1);
322
- const attestations = lastPublishedCheckpoint?.attestations ?? [];
323
-
324
- return { checkpoints, txs, l1ToL2Messages, epochNumber, previousBlockHeader, attestations };
453
+ /**
454
+ * Constructs the session manager. Extracted so subclasses (test harness) can swap
455
+ * the implementation. Wired to `tryUploadSessionFailure` so failed sessions get
456
+ * their proving data uploaded.
457
+ */
458
+ protected createSessionManager(publishingService: ProofPublishingService): SessionManager {
459
+ return new SessionManager({
460
+ checkpointStore: this.checkpointStore,
461
+ l2BlockSource: this.l2BlockSource,
462
+ proverFactory: this.prover,
463
+ proverId: this.prover.getProverId(),
464
+ publishingService,
465
+ metrics: this.jobMetrics,
466
+ dateProvider: this.dateProvider,
467
+ config: {
468
+ maxPendingJobs: this.config.proverNodeMaxPendingJobs,
469
+ tickIntervalMs: this.config.proverNodePollingIntervalMs,
470
+ finalizationDelayMs: this.config.proverNodeEpochProvingDelayMs,
471
+ },
472
+ onSessionFailed: async session => {
473
+ await this.tryUploadSessionFailure(session);
474
+ },
475
+ bindings: this.log.getBindings(),
476
+ });
325
477
  }
326
478
 
327
- private async gatherCheckpoints(epochNumber: EpochNumber) {
328
- const checkpoints = await this.l2BlockSource.getCheckpointsForEpoch(epochNumber);
329
- if (checkpoints.length === 0) {
330
- throw new EmptyEpochError(epochNumber);
479
+ /**
480
+ * Installs session hooks for the e2e harness to interpose around top-tree proving
481
+ * (gate, override, or observe it) without monkey-patching the orchestrator factory.
482
+ * Applies to every session constructed after this call.
483
+ */
484
+ public setSessionHooks(hooks: EpochSessionHooks): void {
485
+ if (!this.sessionManager) {
486
+ throw new Error('ProverNode not started; call start() before setting session hooks.');
331
487
  }
332
- return checkpoints;
488
+ this.sessionManager.setSessionHooks(hooks);
333
489
  }
334
490
 
335
- private async gatherTxs(epochNumber: EpochNumber, checkpoints: Checkpoint[]) {
336
- const deadline = new Date(this.dateProvider.now() + this.config.txGatheringTimeoutMs);
337
- const txProvider = this.p2pClient.getTxProvider();
338
- const blocks = checkpoints.flatMap(checkpoint => checkpoint.blocks);
339
- const txsByBlock = await Promise.all(blocks.map(block => txProvider.getTxsForBlock(block, { deadline })));
340
- const txs = txsByBlock.map(({ txs }) => txs).flat();
341
- const missingTxs = txsByBlock.map(({ missingTxs }) => missingTxs).flat();
342
-
343
- if (missingTxs.length === 0) {
344
- this.log.verbose(`Gathered all ${txs.length} txs for epoch ${epochNumber}`, { epochNumber });
345
- return txs;
491
+ /** Uploads failure snapshots when sessions exit with `failed`. Exposed as a method so tests can spy on it. */
492
+ public async tryUploadSessionFailure(session: EpochSession): Promise<string | undefined> {
493
+ if (!this.config.proverNodeFailedEpochStore) {
494
+ return undefined;
346
495
  }
496
+ const data = SessionManager.buildSessionProvingData(session);
497
+ return await uploadEpochProofFailure(
498
+ this.config.proverNodeFailedEpochStore,
499
+ session.getId(),
500
+ data,
501
+ this.l2BlockSource as Archiver,
502
+ this.worldState,
503
+ assertRequired(pick(this.config, 'l1ChainId', 'rollupVersion', 'dataDirectory')),
504
+ this.log,
505
+ );
506
+ }
507
+
508
+ // ---------------- helpers ----------------
347
509
 
348
- throw new Error(`Txs not found for epoch ${epochNumber}: ${missingTxs.map(hash => hash.toString()).join(', ')}`);
510
+ @memoize
511
+ private getL1Constants(): Promise<L1RollupConstants> {
512
+ return this.l2BlockSource.getL1Constants();
349
513
  }
350
514
 
351
- private async gatherMessages(epochNumber: EpochNumber, checkpoints: Checkpoint[]) {
352
- const messages = await Promise.all(checkpoints.map(c => this.l1ToL2MessageSource.getL1ToL2Messages(c.number)));
353
- const messageCount = sum(messages.map(m => m.length));
354
- this.log.verbose(`Gathered all ${messageCount} messages for epoch ${epochNumber}`, { epochNumber });
355
- const messagesByCheckpoint: Record<CheckpointNumber, Fr[]> = {};
356
- for (let i = 0; i < checkpoints.length; i++) {
357
- messagesByCheckpoint[checkpoints[i].number] = messages[i];
515
+ /**
516
+ * Returns true if every block in the given epoch is proven on L1. An epoch is only
517
+ * fully proven when its *last* block is proven. Protected for direct unit-test access.
518
+ */
519
+ protected async isEpochFullyProven(
520
+ epochNumber: EpochNumber,
521
+ l1Constants: Pick<L1RollupConstants, 'epochDuration'>,
522
+ ): Promise<boolean> {
523
+ const provenBlockNumber = await this.l2BlockSource.getBlockNumber({ tag: 'proven' });
524
+ if (!provenBlockNumber || provenBlockNumber <= 0) {
525
+ return false;
526
+ }
527
+ const provenHeader = (await this.l2BlockSource.getBlockData({ number: BlockNumber(provenBlockNumber) }))?.header;
528
+ if (!provenHeader) {
529
+ return false;
530
+ }
531
+ const provenEpoch = getEpochAtSlot(provenHeader.getSlot(), l1Constants);
532
+ if (epochNumber < provenEpoch) {
533
+ return true;
534
+ }
535
+ if (epochNumber > provenEpoch) {
536
+ return false;
358
537
  }
359
- return messagesByCheckpoint;
538
+ return this.isProvenBlockLastOfItsEpoch(BlockNumber(provenBlockNumber), provenEpoch, l1Constants);
360
539
  }
361
540
 
362
- private async gatherPreviousBlockHeader(epochNumber: EpochNumber, previousBlockNumber: number) {
363
- const header = await (previousBlockNumber === 0
364
- ? this.worldState.getCommitted().getInitialHeader()
365
- : this.l2BlockSource.getBlockHeader(BlockNumber(previousBlockNumber)));
366
-
367
- if (!header) {
368
- throw new Error(`Previous block header ${previousBlockNumber} not found for proving epoch ${epochNumber}`);
541
+ /** Protected for direct unit-test access. */
542
+ protected async isProvenBlockLastOfItsEpoch(
543
+ provenBlockNumber: BlockNumber,
544
+ provenEpoch: EpochNumber,
545
+ l1Constants: Pick<L1RollupConstants, 'epochDuration'>,
546
+ ): Promise<boolean> {
547
+ const nextHeader = (await this.l2BlockSource.getBlockData({ number: BlockNumber(provenBlockNumber + 1) }))?.header;
548
+ if (nextHeader) {
549
+ return getEpochAtSlot(nextHeader.getSlot(), l1Constants) > provenEpoch;
369
550
  }
370
-
371
- this.log.verbose(`Gathered previous block header ${header.getBlockNumber()} for epoch ${epochNumber}`);
372
- return header;
551
+ return this.l2BlockSource.isEpochComplete(provenEpoch);
373
552
  }
374
553
 
375
- /** Extracted for testing purposes. */
376
- protected doCreateEpochProvingJob(
377
- data: EpochProvingJobData,
378
- deadline: Date | undefined,
379
- publicProcessorFactory: PublicProcessorFactory,
380
- publisher: ProverNodePublisher,
381
- opts: { skipEpochCheck?: boolean } = {},
382
- ) {
383
- const { proverNodeMaxParallelBlocksPerEpoch: parallelBlockLimit, proverNodeDisableProofPublish } = this.config;
384
- return new EpochProvingJob(
385
- data,
386
- this.worldState,
387
- this.prover.createEpochProver(),
388
- publicProcessorFactory,
389
- publisher,
390
- this.l2BlockSource,
391
- this.jobMetrics,
392
- deadline,
393
- { parallelBlockLimit, skipSubmitProof: proverNodeDisableProofPublish, ...opts },
394
- this.log.getBindings(),
554
+ /**
555
+ * Resolves the L2BlockStream's starting block and the last fully-proven epoch in one
556
+ * pass. The starting block is the first block of the next unproven epoch (or the start
557
+ * of the partially-proven epoch if the proven tip falls mid-epoch). The fully-proven
558
+ * epoch is `provenEpoch` when the proven tip is the last block of its epoch, otherwise
559
+ * `provenEpoch - 1`, or `undefined` if no block is proven yet.
560
+ */
561
+ protected async computeStartupState(): Promise<{
562
+ startingBlock: BlockNumber;
563
+ lastFullyProvenEpoch: EpochNumber | undefined;
564
+ }> {
565
+ const provenBlockNumber = await this.l2BlockSource.getBlockNumber({ tag: 'proven' });
566
+ if (!provenBlockNumber || provenBlockNumber <= 0) {
567
+ return { startingBlock: BlockNumber(1), lastFullyProvenEpoch: undefined };
568
+ }
569
+ const l1Constants = await this.getL1Constants();
570
+ const provenHeader = (await this.l2BlockSource.getBlockData({ number: BlockNumber(provenBlockNumber) }))?.header;
571
+ if (!provenHeader) {
572
+ return { startingBlock: BlockNumber(provenBlockNumber + 1), lastFullyProvenEpoch: undefined };
573
+ }
574
+ const provenEpoch = getEpochAtSlot(provenHeader.getSlot(), l1Constants);
575
+ if (await this.isProvenBlockLastOfItsEpoch(BlockNumber(provenBlockNumber), provenEpoch, l1Constants)) {
576
+ return { startingBlock: BlockNumber(provenBlockNumber + 1), lastFullyProvenEpoch: provenEpoch };
577
+ }
578
+ const epochCheckpoints = await this.l2BlockSource.getCheckpointsData({ epoch: provenEpoch });
579
+ const firstBlockOfEpoch =
580
+ epochCheckpoints.length > 0 ? epochCheckpoints[0].startBlock : BlockNumber(provenBlockNumber);
581
+ this.log.info(
582
+ `Starting L2BlockStream at block ${firstBlockOfEpoch} (start of partially-proven epoch ${provenEpoch})`,
583
+ { provenBlockNumber, provenEpoch, firstBlockOfEpoch },
395
584
  );
585
+ const lastFullyProvenEpoch = provenEpoch > 0 ? EpochNumber(provenEpoch - 1) : undefined;
586
+ return { startingBlock: firstBlockOfEpoch, lastFullyProvenEpoch };
396
587
  }
397
588
 
398
- /** Extracted for testing purposes. */
399
- protected async triggerMonitors() {
400
- await this.epochsMonitor.work();
589
+ private async gatherPreviousBlockHeader(previousBlockNumber: number) {
590
+ const data = await this.l2BlockSource.getBlockData({ number: BlockNumber(previousBlockNumber) });
591
+ if (!data?.header) {
592
+ throw new Error(`Previous block header ${previousBlockNumber} not found`);
593
+ }
594
+ return data.header;
401
595
  }
402
596
 
403
597
  private validateConfig() {
@@ -416,9 +610,5 @@ export class ProverNode implements EpochMonitorHandler, ProverNodeApi, Traceable
416
610
  }
417
611
  }
418
612
 
419
- class EmptyEpochError extends Error {
420
- constructor(epochNumber: EpochNumber) {
421
- super(`No blocks found for epoch ${epochNumber}`);
422
- this.name = 'EmptyEpochError';
423
- }
424
- }
613
+ // Re-export so handlers can compare states externally.
614
+ export { EpochProvingJobTerminalState };