@aztec/prover-node 5.0.0-private.20260318 → 5.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +506 -0
  2. package/dest/actions/download-epoch-proving-job.js +1 -1
  3. package/dest/actions/rerun-epoch-proving-job.d.ts +4 -3
  4. package/dest/actions/rerun-epoch-proving-job.d.ts.map +1 -1
  5. package/dest/actions/rerun-epoch-proving-job.js +103 -21
  6. package/dest/bin/run-failed-epoch.js +1 -3
  7. package/dest/checkpoint-store.d.ts +83 -0
  8. package/dest/checkpoint-store.d.ts.map +1 -0
  9. package/dest/checkpoint-store.js +181 -0
  10. package/dest/config.d.ts +1 -1
  11. package/dest/config.d.ts.map +1 -1
  12. package/dest/config.js +1 -1
  13. package/dest/factory.d.ts +1 -1
  14. package/dest/factory.d.ts.map +1 -1
  15. package/dest/factory.js +22 -8
  16. package/dest/index.d.ts +2 -1
  17. package/dest/index.d.ts.map +1 -1
  18. package/dest/index.js +1 -0
  19. package/dest/job/checkpoint-prover.d.ts +134 -0
  20. package/dest/job/checkpoint-prover.d.ts.map +1 -0
  21. package/dest/job/checkpoint-prover.js +350 -0
  22. package/dest/job/epoch-session.d.ts +146 -0
  23. package/dest/job/epoch-session.d.ts.map +1 -0
  24. package/dest/job/epoch-session.js +709 -0
  25. package/dest/job/top-tree-job.d.ts +82 -0
  26. package/dest/job/top-tree-job.d.ts.map +1 -0
  27. package/dest/job/top-tree-job.js +152 -0
  28. package/dest/metrics.d.ts +29 -5
  29. package/dest/metrics.d.ts.map +1 -1
  30. package/dest/metrics.js +73 -9
  31. package/dest/monitors/epoch-monitor.js +6 -2
  32. package/dest/proof-publishing-service.d.ts +159 -0
  33. package/dest/proof-publishing-service.d.ts.map +1 -0
  34. package/dest/proof-publishing-service.js +334 -0
  35. package/dest/prover-node-publisher.d.ts +18 -11
  36. package/dest/prover-node-publisher.d.ts.map +1 -1
  37. package/dest/prover-node-publisher.js +195 -57
  38. package/dest/prover-node.d.ts +96 -68
  39. package/dest/prover-node.d.ts.map +1 -1
  40. package/dest/prover-node.js +382 -227
  41. package/dest/prover-publisher-factory.d.ts +2 -2
  42. package/dest/prover-publisher-factory.d.ts.map +1 -1
  43. package/dest/prover-publisher-factory.js +3 -3
  44. package/dest/session-manager.d.ts +158 -0
  45. package/dest/session-manager.d.ts.map +1 -0
  46. package/dest/session-manager.js +452 -0
  47. package/dest/test/index.d.ts +7 -6
  48. package/dest/test/index.d.ts.map +1 -1
  49. package/package.json +23 -23
  50. package/src/actions/download-epoch-proving-job.ts +1 -1
  51. package/src/actions/rerun-epoch-proving-job.ts +114 -28
  52. package/src/bin/run-failed-epoch.ts +1 -2
  53. package/src/checkpoint-store.ts +213 -0
  54. package/src/config.ts +2 -1
  55. package/src/factory.ts +18 -10
  56. package/src/index.ts +1 -0
  57. package/src/job/checkpoint-prover.ts +465 -0
  58. package/src/job/epoch-session.ts +424 -0
  59. package/src/job/top-tree-job.ts +227 -0
  60. package/src/metrics.ts +88 -12
  61. package/src/monitors/epoch-monitor.ts +2 -2
  62. package/src/proof-publishing-service.ts +424 -0
  63. package/src/prover-node-publisher.ts +220 -67
  64. package/src/prover-node.ts +439 -249
  65. package/src/prover-publisher-factory.ts +3 -3
  66. package/src/session-manager.ts +552 -0
  67. package/src/test/index.ts +6 -6
  68. package/dest/job/epoch-proving-job.d.ts +0 -63
  69. package/dest/job/epoch-proving-job.d.ts.map +0 -1
  70. package/dest/job/epoch-proving-job.js +0 -762
  71. package/src/job/epoch-proving-job.ts +0 -465
package/src/metrics.ts CHANGED
@@ -18,6 +18,9 @@ import {
18
18
 
19
19
  import { formatEther, formatUnits } from 'viem';
20
20
 
21
+ import type { CheckpointStore } from './checkpoint-store.js';
22
+ import type { SessionManager } from './session-manager.js';
23
+
21
24
  export class ProverNodeJobMetrics {
22
25
  proverEpochExecutionDuration: Histogram;
23
26
  provingJobDuration: Histogram;
@@ -26,10 +29,15 @@ export class ProverNodeJobMetrics {
26
29
  provingJobTransactions: Gauge;
27
30
 
28
31
  private blobProcessingDuration: Gauge;
29
- private chonkVerifierDuration: Gauge;
30
32
  private blockProcessingDuration: Histogram;
31
33
  private checkpointProcessingDuration: Histogram;
32
- private allCheckpointsProcessingDuration: Gauge;
34
+
35
+ /** Observable gauges for live state. Registered via `observeState(...)` once the
36
+ * CheckpointStore and SessionManager are available. */
37
+ private activeCheckpoints: ObservableGauge | undefined;
38
+ private activeEpochSessions: ObservableGauge | undefined;
39
+ private stateObserver: ((observer: BatchObservableResult) => void) | undefined;
40
+ private stateObservedMetrics: ObservableGauge[] = [];
33
41
 
34
42
  constructor(
35
43
  private meter: Meter,
@@ -43,12 +51,8 @@ export class ProverNodeJobMetrics {
43
51
  this.provingJobTransactions = this.meter.createGauge(Metrics.PROVER_NODE_JOB_TRANSACTIONS);
44
52
 
45
53
  this.blobProcessingDuration = this.meter.createGauge(Metrics.PROVER_NODE_BLOB_PROCESSING_LAST_DURATION);
46
- this.chonkVerifierDuration = this.meter.createGauge(Metrics.PROVER_NODE_CHONK_VERIFIER_LAST_DURATION);
47
54
  this.blockProcessingDuration = this.meter.createHistogram(Metrics.PROVER_NODE_BLOCK_PROCESSING_DURATION);
48
55
  this.checkpointProcessingDuration = this.meter.createHistogram(Metrics.PROVER_NODE_CHECKPOINT_PROCESSING_DURATION);
49
- this.allCheckpointsProcessingDuration = this.meter.createGauge(
50
- Metrics.PROVER_NODE_ALL_CHECKPOINTS_PROCESSING_LAST_DURATION,
51
- );
52
56
  }
53
57
 
54
58
  public recordProvingJob(
@@ -69,10 +73,6 @@ export class ProverNodeJobMetrics {
69
73
  this.blobProcessingDuration.record(Math.ceil(durationMs));
70
74
  }
71
75
 
72
- public recordChonkVerifier(durationMs: number) {
73
- this.chonkVerifierDuration.record(Math.ceil(durationMs));
74
- }
75
-
76
76
  public recordBlockProcessing(durationMs: number) {
77
77
  this.blockProcessingDuration.record(Math.ceil(durationMs));
78
78
  }
@@ -81,8 +81,47 @@ export class ProverNodeJobMetrics {
81
81
  this.checkpointProcessingDuration.record(Math.ceil(durationMs));
82
82
  }
83
83
 
84
- public recordAllCheckpointsProcessing(durationMs: number) {
85
- this.allCheckpointsProcessingDuration.record(Math.ceil(durationMs));
84
+ /**
85
+ * Registers observable gauges for the prover-node's live state: how many canonical
86
+ * checkpoint provers are in the store, and how many epoch sessions are live (broken
87
+ * down by kind). Idempotent — repeated calls re-arm with the latest references.
88
+ *
89
+ * Call this once the `SessionManager` has been constructed (i.e. inside `ProverNode.start()`).
90
+ */
91
+ public observeState(checkpointStore: CheckpointStore, sessionManager: SessionManager): void {
92
+ this.stopObservingState();
93
+ this.activeCheckpoints = this.meter.createObservableGauge(Metrics.PROVER_NODE_ACTIVE_CHECKPOINTS);
94
+ this.activeEpochSessions = this.meter.createObservableGauge(Metrics.PROVER_NODE_ACTIVE_EPOCH_SESSIONS);
95
+ this.stateObserver = (observer: BatchObservableResult) => {
96
+ observer.observe(this.activeCheckpoints!, checkpointStore.listCanonical().length);
97
+ let full = 0;
98
+ let partial = 0;
99
+ for (const session of sessionManager.allSessions()) {
100
+ if (session.isTerminal()) {
101
+ continue;
102
+ }
103
+ if (session.getKind() === 'full') {
104
+ full++;
105
+ } else {
106
+ partial++;
107
+ }
108
+ }
109
+ observer.observe(this.activeEpochSessions!, full, { [Attributes.EPOCH_SESSION_KIND]: 'full' });
110
+ observer.observe(this.activeEpochSessions!, partial, { [Attributes.EPOCH_SESSION_KIND]: 'partial' });
111
+ };
112
+ this.stateObservedMetrics = [this.activeCheckpoints, this.activeEpochSessions];
113
+ this.meter.addBatchObservableCallback(this.stateObserver, this.stateObservedMetrics);
114
+ }
115
+
116
+ /** Tears down the observable callback registered by `observeState`. Idempotent. */
117
+ public stopObservingState(): void {
118
+ if (this.stateObserver) {
119
+ this.meter.removeBatchObservableCallback(this.stateObserver, this.stateObservedMetrics);
120
+ this.stateObserver = undefined;
121
+ this.stateObservedMetrics = [];
122
+ this.activeCheckpoints = undefined;
123
+ this.activeEpochSessions = undefined;
124
+ }
86
125
  }
87
126
  }
88
127
 
@@ -140,6 +179,13 @@ export class ProverNodeRewardsMetrics {
140
179
  };
141
180
  }
142
181
 
182
+ export type EstimatedSubmitProofStats = {
183
+ gasLimit: bigint;
184
+ baseFeePerGas: bigint;
185
+ maxPriorityFeePerGas: bigint;
186
+ estimatedTotalFee: bigint;
187
+ };
188
+
143
189
  export class ProverNodePublisherMetrics {
144
190
  gasPrice: Histogram;
145
191
  txCount: UpDownCounter;
@@ -151,6 +197,10 @@ export class ProverNodePublisherMetrics {
151
197
  txBlobDataGasCost: Histogram;
152
198
  txTotalFee: Histogram;
153
199
 
200
+ private txGasEstimated: Histogram;
201
+ private gasPriceEstimated: Histogram;
202
+ private txTotalFeeEstimated: Histogram;
203
+
154
204
  private senderBalance: Gauge;
155
205
  private meter: Meter;
156
206
 
@@ -182,6 +232,12 @@ export class ProverNodePublisherMetrics {
182
232
 
183
233
  this.txTotalFee = this.meter.createHistogram(Metrics.L1_PUBLISHER_TX_TOTAL_FEE);
184
234
 
235
+ this.txGasEstimated = this.meter.createHistogram(Metrics.PROVER_NODE_ESTIMATED_SUBMISSION_GAS);
236
+
237
+ this.gasPriceEstimated = this.meter.createHistogram(Metrics.PROVER_NODE_ESTIMATED_SUBMISSION_GAS_PRICE);
238
+
239
+ this.txTotalFeeEstimated = this.meter.createHistogram(Metrics.PROVER_NODE_ESTIMATED_SUBMISSION_TOTAL_FEE);
240
+
185
241
  this.senderBalance = this.meter.createGauge(Metrics.L1_PUBLISHER_BALANCE);
186
242
  }
187
243
 
@@ -196,6 +252,26 @@ export class ProverNodePublisherMetrics {
196
252
  this.recordTx(durationMs, stats);
197
253
  }
198
254
 
255
+ public recordEstimatedSubmitProof(stats: EstimatedSubmitProofStats) {
256
+ const attributes = { [Attributes.L1_TX_TYPE]: 'submitProof' } as const;
257
+
258
+ this.txGasEstimated.record(Number(stats.gasLimit), attributes);
259
+
260
+ try {
261
+ this.gasPriceEstimated.record(
262
+ parseInt(formatEther(stats.baseFeePerGas + stats.maxPriorityFeePerGas, 'gwei'), 10),
263
+ );
264
+ } catch {
265
+ // ignore
266
+ }
267
+
268
+ try {
269
+ this.txTotalFeeEstimated.record(parseFloat(formatEther(stats.estimatedTotalFee)));
270
+ } catch {
271
+ // ignore
272
+ }
273
+ }
274
+
199
275
  public recordSenderBalance(wei: bigint, senderAddress: string) {
200
276
  const eth = parseFloat(formatEther(wei, 'wei'));
201
277
  this.senderBalance.record(eth, {
@@ -96,9 +96,9 @@ export class EpochMonitor implements Traceable {
96
96
  }
97
97
 
98
98
  private async getEpochNumberToProve() {
99
- const lastBlockProven = await this.l2BlockSource.getProvenBlockNumber();
99
+ const lastBlockProven = (await this.l2BlockSource.getBlockNumber({ tag: 'proven' })) ?? BlockNumber.ZERO;
100
100
  const firstBlockToProve = BlockNumber(lastBlockProven + 1);
101
- const firstBlockHeaderToProve = await this.l2BlockSource.getBlockHeader(firstBlockToProve);
101
+ const firstBlockHeaderToProve = (await this.l2BlockSource.getBlockData({ number: firstBlockToProve }))?.header;
102
102
  if (!firstBlockHeaderToProve) {
103
103
  return { epochToProve: undefined, blockNumber: firstBlockToProve };
104
104
  }
@@ -0,0 +1,424 @@
1
+ import type { BatchedBlob } from '@aztec/blob-lib';
2
+ import type { ViemCommitteeAttestation } from '@aztec/ethereum/contracts';
3
+ import { BlockNumber, type CheckpointNumber, type EpochNumber } from '@aztec/foundation/branded-types';
4
+ import { type Logger, type LoggerBindings, createLogger } from '@aztec/foundation/log';
5
+ import { promiseWithResolvers } from '@aztec/foundation/promise';
6
+ import { SerialQueue } from '@aztec/foundation/queue';
7
+ import type { DateProvider } from '@aztec/foundation/timer';
8
+ import type { L2BlockSource } from '@aztec/stdlib/block';
9
+ import type { Proof } from '@aztec/stdlib/proofs';
10
+ import type { RootRollupPublicInputs } from '@aztec/stdlib/rollup';
11
+
12
+ import type { ProverNodePublisher } from './prover-node-publisher.js';
13
+ import type { ProverPublisherFactory } from './prover-publisher-factory.js';
14
+
15
+ /** A single proof candidate offered to the publishing service by an `EpochSession`. */
16
+ export type PublishCandidate = {
17
+ /** Stable id; matches the originating session so `withdraw` can target this entry. */
18
+ id: string;
19
+ epoch: EpochNumber;
20
+ /**
21
+ * Full vs partial. A `partial` candidate is an early-finish optimisation: if the chain's
22
+ * proven tip catches up to or past its `endBlock` before it publishes, it's superseded —
23
+ * publishing would be wasted L1 gas. A `full` candidate covers the entire epoch and is
24
+ * useful to publish even after some other prover-node has already submitted (the rollup
25
+ * contract records the submission per prover-id), so it is never auto-superseded by the
26
+ * proven tip alone.
27
+ */
28
+ kind: 'full' | 'partial';
29
+ /** First L2 block in the candidate's range. */
30
+ startBlock: BlockNumber;
31
+ /** Last L2 block in the candidate's range. */
32
+ endBlock: BlockNumber;
33
+ /**
34
+ * Wall-clock time after which the candidate is no longer worth publishing — typically
35
+ * the L1 proof-submission window deadline. If the candidate is still queued at this
36
+ * time it resolves as `'expired'`. If it's already in flight, the publish runs to
37
+ * completion (the L1 tx may still mine; the deadline only governs whether the service
38
+ * will start a publish). `undefined` disables the per-candidate timer.
39
+ */
40
+ deadline: Date | undefined;
41
+ /** Everything `ProverNodePublisher.submitEpochProof` needs. */
42
+ fromCheckpoint: CheckpointNumber;
43
+ toCheckpoint: CheckpointNumber;
44
+ publicInputs: RootRollupPublicInputs;
45
+ proof: Proof;
46
+ batchedBlobInputs: BatchedBlob;
47
+ attestations: ViemCommitteeAttestation[];
48
+ };
49
+
50
+ /** Terminal outcome for a candidate. The promise from `submit()` resolves with one of these. */
51
+ export type PublishOutcome = 'published' | 'superseded' | 'failed' | 'withdrawn' | 'expired';
52
+
53
+ /** Subset of `ProverPublisherFactory` the service uses — single async `create()` call. */
54
+ export type PublisherFactoryLike = Pick<ProverPublisherFactory, 'create'>;
55
+
56
+ /** Subset of `ProverNodePublisher` the service drives — one publish per fresh publisher. */
57
+ export type PublisherLike = Pick<ProverNodePublisher, 'submitEpochProof' | 'analyzeEpochProofSubmission'>;
58
+
59
+ /** Config for the publishing service. */
60
+ export type ProofPublishingServiceConfig = {
61
+ /** When true, submitting a candidate runs `analyzeEpochProofSubmission` instead of publishing. */
62
+ skipSubmitProof: boolean;
63
+ };
64
+
65
+ export type ProofPublishingServiceDeps = {
66
+ publisherFactory: PublisherFactoryLike;
67
+ l2BlockSource: Pick<L2BlockSource, 'getBlockNumber'>;
68
+ dateProvider: DateProvider;
69
+ config: ProofPublishingServiceConfig;
70
+ bindings?: LoggerBindings;
71
+ };
72
+
73
+ /** Per-epoch bucket: live candidates, their pending-outcome resolvers, and expiry timers. */
74
+ type EpochBucket = {
75
+ candidates: Map<string, PublishCandidate>;
76
+ resolvers: Map<string, (outcome: PublishOutcome) => void>;
77
+ expiryTimers: Map<string, NodeJS.Timeout>;
78
+ };
79
+
80
+ /**
81
+ * Backoff after a transient `publisherFactory.create()` failure. The candidate stays
82
+ * in the queue and the drain is re-scheduled after this delay; if the failure persists
83
+ * the candidate's own `deadline` timer caps the total wait.
84
+ */
85
+ const PUBLISHER_ACQUIRE_RETRY_DELAY_MS = 1_000;
86
+
87
+ /**
88
+ * Central owner of L1 proof submission. Sessions offer their proofs here as
89
+ * `PublishCandidate`s; the service serialises one publish at a time, picks the
90
+ * longest candidate per epoch as the winner, and resolves the rest as
91
+ * `'superseded'` without spending L1 gas.
92
+ *
93
+ * Construction-time invariants:
94
+ * - Every publish runs against a freshly-created `ProverNodePublisher` from the factory.
95
+ * - Only one publish is ever in flight (`SerialQueue` drain) — no defensive locks.
96
+ * - Once an L1 publish starts, it runs to completion. `withdraw` is a queue-only
97
+ * operation: it removes a candidate that has not yet started publishing. An in-flight
98
+ * candidate is left alone and its outcome (`'published'` / `'failed'`) is reported as
99
+ * usual — the originating session has already moved to a terminal state via `cancel()`
100
+ * and ignores the late outcome.
101
+ *
102
+ * Eligibility for publication is decided against the proven block number read inside
103
+ * the drain (so the value is consistent with the publish that runs on the same drain
104
+ * pass): a candidate is eligible when its predecessor block is proven and (for partial
105
+ * candidates) the candidate's range extends past the proven tip. `onChainProven` is a
106
+ * wake-up signal; it does not pass state into the drain.
107
+ */
108
+ export class ProofPublishingService {
109
+ private readonly log: Logger;
110
+ private readonly epochs: Map<EpochNumber, EpochBucket> = new Map();
111
+ /**
112
+ * One drain task at a time. Submits, withdrawals, chain-proven advances, and prunes
113
+ * all schedule a `drain` here, so the eligibility re-check and the L1 publish never
114
+ * interleave.
115
+ *
116
+ * Protected so unit tests can `await drainQueue.syncPoint()` to wait for pending
117
+ * drain work to settle deterministically (no sleeps).
118
+ */
119
+ protected readonly drainQueue = new SerialQueue();
120
+ /** Tracks the candidate currently being published. Set while drain is awaiting the L1 publish. */
121
+ private inFlight: { id: string } | undefined;
122
+ private stopped = false;
123
+
124
+ constructor(private readonly deps: ProofPublishingServiceDeps) {
125
+ this.log = createLogger('prover-node:proof-publishing-service', deps.bindings);
126
+ this.drainQueue.start();
127
+ }
128
+
129
+ /**
130
+ * Offers a proof candidate to the service. The returned promise resolves once the
131
+ * service settles the candidate's fate: `'published'` if it wins and L1 accepts it,
132
+ * `'superseded'` if a longer candidate for the same epoch wins, `'failed'` if the
133
+ * L1 submission errored, `'withdrawn'` if the originating session cancelled,
134
+ * `'expired'` if the candidate's `deadline` elapsed before publishing started.
135
+ */
136
+ public submit(candidate: PublishCandidate): Promise<PublishOutcome> {
137
+ if (this.stopped) {
138
+ return Promise.resolve<PublishOutcome>('withdrawn');
139
+ }
140
+ const { promise, resolve } = promiseWithResolvers<PublishOutcome>();
141
+ let bucket = this.epochs.get(candidate.epoch);
142
+ if (!bucket) {
143
+ bucket = { candidates: new Map(), resolvers: new Map(), expiryTimers: new Map() };
144
+ this.epochs.set(candidate.epoch, bucket);
145
+ }
146
+ bucket.candidates.set(candidate.id, candidate);
147
+ bucket.resolvers.set(candidate.id, resolve);
148
+ this.scheduleExpiry(bucket, candidate);
149
+ this.log.info(`Candidate proof ${candidate.id} submitted for publishing`, {
150
+ candidateId: candidate.id,
151
+ epoch: candidate.epoch,
152
+ startBlock: candidate.startBlock,
153
+ endBlock: candidate.endBlock,
154
+ deadline: candidate.deadline?.toISOString(),
155
+ });
156
+ this.scheduleDrain();
157
+ return promise;
158
+ }
159
+
160
+ /**
161
+ * Pulls a queued candidate from the bucket and resolves its promise as `'withdrawn'`.
162
+ * If the candidate is already being published, the publish runs to completion and the
163
+ * outcome reports whatever L1 returned — callers that cancelled mid-publish must rely
164
+ * on their own terminal-state check to ignore the late outcome. No-op if the candidate
165
+ * is unknown.
166
+ */
167
+ public withdraw(candidateId: string): void {
168
+ if (this.inFlight?.id === candidateId) {
169
+ this.log.debug(`Withdraw for in-flight candidate ${candidateId} ignored; publish will run to completion`, {
170
+ candidateId,
171
+ });
172
+ return;
173
+ }
174
+ for (const bucket of this.epochs.values()) {
175
+ if (bucket.candidates.has(candidateId)) {
176
+ this.log.info(`Candidate ${candidateId} withdrawn`, { candidateId });
177
+ this.resolveCandidate(bucket, candidateId, 'withdrawn');
178
+ this.scheduleDrain();
179
+ return;
180
+ }
181
+ }
182
+ }
183
+
184
+ /**
185
+ * Signals that the L1 proven tip has advanced and the queue should be re-evaluated.
186
+ * The drain reads the proven block number from `l2BlockSource` itself rather than
187
+ * relying on the value passed here — that way the eligibility check uses a value read
188
+ * inside the serial drain, not one captured by a concurrent caller of `onChainProven`.
189
+ */
190
+ public onChainProven(_provenBlock: BlockNumber): void {
191
+ this.scheduleDrain();
192
+ }
193
+
194
+ /**
195
+ * Stops accepting new submissions, waits for any in-flight publish to settle, and
196
+ * resolves remaining queued candidates as `'withdrawn'`.
197
+ */
198
+ public async stop(): Promise<void> {
199
+ this.stopped = true;
200
+ await this.drainQueue.end();
201
+ // Anything still parked in a bucket never ran through drain — resolve it as withdrawn so
202
+ // callers awaiting `submit()` aren't left hanging.
203
+ for (const bucket of Array.from(this.epochs.values())) {
204
+ for (const id of Array.from(bucket.candidates.keys())) {
205
+ this.resolveCandidate(bucket, id, 'withdrawn');
206
+ }
207
+ }
208
+ this.epochs.clear();
209
+ }
210
+
211
+ // ---------------- drain ----------------
212
+
213
+ private scheduleDrain(): void {
214
+ if (this.stopped) {
215
+ return;
216
+ }
217
+ void this.drainQueue
218
+ .put(() => this.drain())
219
+ .catch(err => {
220
+ this.log.error(`Drain task threw`, err);
221
+ });
222
+ }
223
+
224
+ private async drain(): Promise<void> {
225
+ if (this.stopped) {
226
+ return;
227
+ }
228
+ // Read the proven block number afresh inside the serial drain so the eligibility
229
+ // check is consistent with the publish that follows it on the same drain pass.
230
+ const proven = await this.readProvenBlockNumber();
231
+
232
+ // Process epochs in ascending order: the proven tip advances monotonically, so the lower
233
+ // epoch is the natural next eligible candidate.
234
+ const orderedEpochs = Array.from(this.epochs.keys()).sort((a, b) => Number(a) - Number(b));
235
+ for (const epoch of orderedEpochs) {
236
+ const bucket = this.epochs.get(epoch)!;
237
+ const eligible = this.pickEpochWinner(bucket, proven);
238
+ if (!eligible) {
239
+ continue;
240
+ }
241
+ await this.publishWinner(epoch, eligible.winner, bucket);
242
+ }
243
+
244
+ // Drop empty buckets
245
+ for (const [key, bucket] of Array.from(this.epochs.entries())) {
246
+ if (bucket.candidates.size === 0) {
247
+ this.epochs.delete(key);
248
+ }
249
+ }
250
+ }
251
+
252
+ /**
253
+ * Picks the winning candidate for a given epoch. Partial candidates whose `endBlock` is
254
+ * already proven on-chain resolve `'superseded'`.
255
+ * Full candidates are never auto-superseded by the proven tip — multiple prover-nodes
256
+ * legitimately submit redundant full epoch proofs (one per prover-id) and L1 records each.
257
+ * Among the remaining candidates with their predecessor proven, the one with the highest
258
+ * `endBlock` wins; the others resolve `'superseded'`.
259
+ */
260
+ private pickEpochWinner(bucket: EpochBucket, proven: BlockNumber): { winner: PublishCandidate } | undefined {
261
+ const now = this.deps.dateProvider.now();
262
+ // Resolve any candidate whose deadline has already passed.
263
+ for (const candidate of Array.from(bucket.candidates.values())) {
264
+ if (candidate.deadline && candidate.deadline.getTime() <= now) {
265
+ this.resolveCandidate(bucket, candidate.id, 'expired');
266
+ }
267
+ }
268
+ // Drop partial candidates the proven chain has already caught up to.
269
+ for (const candidate of Array.from(bucket.candidates.values())) {
270
+ if (candidate.kind === 'partial' && candidate.endBlock <= proven) {
271
+ this.resolveCandidate(bucket, candidate.id, 'superseded');
272
+ }
273
+ }
274
+
275
+ const remaining = Array.from(bucket.candidates.values()).filter(c => c.startBlock - 1 <= proven);
276
+ if (remaining.length === 0) {
277
+ return undefined;
278
+ }
279
+ const winner = remaining.reduce((best, c) => (c.endBlock > best.endBlock ? c : best));
280
+ // Every other same-epoch candidate is superseded by the winner.
281
+ for (const candidate of remaining) {
282
+ if (candidate.id !== winner.id) {
283
+ this.resolveCandidate(bucket, candidate.id, 'superseded');
284
+ }
285
+ }
286
+ return { winner };
287
+ }
288
+
289
+ private async publishWinner(epoch: EpochNumber, winner: PublishCandidate, bucket: EpochBucket): Promise<void> {
290
+ let publisher: PublisherLike;
291
+ try {
292
+ publisher = await this.deps.publisherFactory.create();
293
+ } catch (err) {
294
+ // Treat this as transient: the publisher pool may be temporarily exhausted
295
+ // (every signer busy, funding tx in flight, etc.). Leave the candidate queued and
296
+ // schedule another drain after a short backoff. If the failure persists past the
297
+ // candidate's deadline the expiry timer will resolve it as `'expired'`.
298
+ this.log.warn(`Failed to acquire publisher for candidate ${winner.id}; retrying`, {
299
+ candidateId: winner.id,
300
+ epoch: winner.epoch,
301
+ retryDelayMs: PUBLISHER_ACQUIRE_RETRY_DELAY_MS,
302
+ err,
303
+ });
304
+ setTimeout(() => this.scheduleDrain(), PUBLISHER_ACQUIRE_RETRY_DELAY_MS);
305
+ return;
306
+ }
307
+
308
+ this.inFlight = { id: winner.id };
309
+ this.log.info(`Publishing candidate ${winner.id}`, {
310
+ candidateId: winner.id,
311
+ epoch: winner.epoch,
312
+ startBlock: winner.startBlock,
313
+ endBlock: winner.endBlock,
314
+ fromCheckpoint: winner.fromCheckpoint,
315
+ toCheckpoint: winner.toCheckpoint,
316
+ });
317
+
318
+ const outcome = await this.runPublish(winner, publisher);
319
+ this.inFlight = undefined;
320
+ this.resolveCandidate(bucket, winner.id, outcome);
321
+
322
+ if (bucket.candidates.size === 0) {
323
+ this.epochs.delete(epoch);
324
+ }
325
+ }
326
+
327
+ private async runPublish(candidate: PublishCandidate, publisher: PublisherLike): Promise<PublishOutcome> {
328
+ const submitArgs = {
329
+ epochNumber: candidate.epoch,
330
+ fromCheckpoint: candidate.fromCheckpoint,
331
+ toCheckpoint: candidate.toCheckpoint,
332
+ publicInputs: candidate.publicInputs,
333
+ proof: candidate.proof,
334
+ batchedBlobInputs: candidate.batchedBlobInputs,
335
+ attestations: candidate.attestations,
336
+ // Stop the L1 tx retrying past the candidate's submission-window deadline.
337
+ deadline: candidate.deadline,
338
+ };
339
+
340
+ if (this.deps.config.skipSubmitProof) {
341
+ try {
342
+ await publisher.analyzeEpochProofSubmission(submitArgs);
343
+ return 'published';
344
+ } catch (err) {
345
+ this.log.warn(`Failed to analyze estimated L1 fees for candidate ${candidate.id}`, {
346
+ err,
347
+ candidateId: candidate.id,
348
+ epoch: candidate.epoch,
349
+ });
350
+ // Analyze-mode failures are recorded but the session shouldn't enter `failed` —
351
+ // the operator opted out of submission. Match the previous EpochSession behaviour.
352
+ return 'published';
353
+ }
354
+ }
355
+
356
+ try {
357
+ const success = await publisher.submitEpochProof(submitArgs);
358
+ return success ? 'published' : 'failed';
359
+ } catch (err) {
360
+ this.log.error(`Error publishing candidate ${candidate.id}`, err, {
361
+ candidateId: candidate.id,
362
+ epoch: candidate.epoch,
363
+ });
364
+ return 'failed';
365
+ }
366
+ }
367
+
368
+ private resolveCandidate(bucket: EpochBucket, id: string, outcome: PublishOutcome): void {
369
+ const resolve = bucket.resolvers.get(id);
370
+ const timer = bucket.expiryTimers.get(id);
371
+ if (timer) {
372
+ clearTimeout(timer);
373
+ bucket.expiryTimers.delete(id);
374
+ }
375
+ bucket.candidates.delete(id);
376
+ bucket.resolvers.delete(id);
377
+ if (resolve) {
378
+ this.log.info(`Candidate ${id} resolved as ${outcome}`, { candidateId: id, outcome });
379
+ resolve(outcome);
380
+ }
381
+ }
382
+
383
+ /**
384
+ * Arms a per-candidate expiry timer if the candidate carries a deadline. When the timer
385
+ * fires, the candidate resolves as `'expired'` — unless it is already in flight, in
386
+ * which case the publish runs to completion (the timer becomes a no-op). The timer is
387
+ * cleared by `resolveCandidate` whenever the candidate settles for any other reason.
388
+ */
389
+ private scheduleExpiry(bucket: EpochBucket, candidate: PublishCandidate): void {
390
+ if (!candidate.deadline) {
391
+ return;
392
+ }
393
+ const delay = Math.max(candidate.deadline.getTime() - this.deps.dateProvider.now(), 0);
394
+ const timer = setTimeout(() => this.handleExpiry(candidate.id), delay);
395
+ bucket.expiryTimers.set(candidate.id, timer);
396
+ }
397
+
398
+ /**
399
+ * Protected so unit tests can drive the deadline path without waiting on the real
400
+ * `setTimeout` to fire. Production code calls this only via the per-candidate timer
401
+ * armed in `scheduleExpiry`.
402
+ */
403
+ protected handleExpiry(candidateId: string): void {
404
+ if (this.inFlight?.id === candidateId) {
405
+ this.log.debug(`Expiry for in-flight candidate ${candidateId} ignored; publish will run to completion`, {
406
+ candidateId,
407
+ });
408
+ return;
409
+ }
410
+ for (const bucket of this.epochs.values()) {
411
+ if (bucket.candidates.has(candidateId)) {
412
+ this.log.info(`Candidate ${candidateId} expired before publishing`, { candidateId });
413
+ this.resolveCandidate(bucket, candidateId, 'expired');
414
+ this.scheduleDrain();
415
+ return;
416
+ }
417
+ }
418
+ }
419
+
420
+ private async readProvenBlockNumber(): Promise<BlockNumber> {
421
+ const proven = await this.deps.l2BlockSource.getBlockNumber({ tag: 'proven' });
422
+ return BlockNumber(proven ?? 0);
423
+ }
424
+ }