@aztec/prover-node 5.0.0-private.20260318 → 5.0.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +506 -0
- package/dest/actions/download-epoch-proving-job.js +1 -1
- package/dest/actions/rerun-epoch-proving-job.d.ts +4 -3
- package/dest/actions/rerun-epoch-proving-job.d.ts.map +1 -1
- package/dest/actions/rerun-epoch-proving-job.js +103 -21
- package/dest/bin/run-failed-epoch.js +1 -3
- package/dest/checkpoint-store.d.ts +83 -0
- package/dest/checkpoint-store.d.ts.map +1 -0
- package/dest/checkpoint-store.js +181 -0
- package/dest/config.d.ts +1 -1
- package/dest/config.d.ts.map +1 -1
- package/dest/config.js +1 -1
- package/dest/factory.d.ts +1 -1
- package/dest/factory.d.ts.map +1 -1
- package/dest/factory.js +22 -8
- package/dest/index.d.ts +2 -1
- package/dest/index.d.ts.map +1 -1
- package/dest/index.js +1 -0
- package/dest/job/checkpoint-prover.d.ts +134 -0
- package/dest/job/checkpoint-prover.d.ts.map +1 -0
- package/dest/job/checkpoint-prover.js +350 -0
- package/dest/job/epoch-session.d.ts +146 -0
- package/dest/job/epoch-session.d.ts.map +1 -0
- package/dest/job/epoch-session.js +709 -0
- package/dest/job/top-tree-job.d.ts +82 -0
- package/dest/job/top-tree-job.d.ts.map +1 -0
- package/dest/job/top-tree-job.js +152 -0
- package/dest/metrics.d.ts +29 -5
- package/dest/metrics.d.ts.map +1 -1
- package/dest/metrics.js +73 -9
- package/dest/monitors/epoch-monitor.js +6 -2
- package/dest/proof-publishing-service.d.ts +159 -0
- package/dest/proof-publishing-service.d.ts.map +1 -0
- package/dest/proof-publishing-service.js +334 -0
- package/dest/prover-node-publisher.d.ts +18 -11
- package/dest/prover-node-publisher.d.ts.map +1 -1
- package/dest/prover-node-publisher.js +195 -57
- package/dest/prover-node.d.ts +96 -68
- package/dest/prover-node.d.ts.map +1 -1
- package/dest/prover-node.js +382 -227
- package/dest/prover-publisher-factory.d.ts +2 -2
- package/dest/prover-publisher-factory.d.ts.map +1 -1
- package/dest/prover-publisher-factory.js +3 -3
- package/dest/session-manager.d.ts +158 -0
- package/dest/session-manager.d.ts.map +1 -0
- package/dest/session-manager.js +452 -0
- package/dest/test/index.d.ts +7 -6
- package/dest/test/index.d.ts.map +1 -1
- package/package.json +23 -23
- package/src/actions/download-epoch-proving-job.ts +1 -1
- package/src/actions/rerun-epoch-proving-job.ts +114 -28
- package/src/bin/run-failed-epoch.ts +1 -2
- package/src/checkpoint-store.ts +213 -0
- package/src/config.ts +2 -1
- package/src/factory.ts +18 -10
- package/src/index.ts +1 -0
- package/src/job/checkpoint-prover.ts +465 -0
- package/src/job/epoch-session.ts +424 -0
- package/src/job/top-tree-job.ts +227 -0
- package/src/metrics.ts +88 -12
- package/src/monitors/epoch-monitor.ts +2 -2
- package/src/proof-publishing-service.ts +424 -0
- package/src/prover-node-publisher.ts +220 -67
- package/src/prover-node.ts +439 -249
- package/src/prover-publisher-factory.ts +3 -3
- package/src/session-manager.ts +552 -0
- package/src/test/index.ts +6 -6
- package/dest/job/epoch-proving-job.d.ts +0 -63
- package/dest/job/epoch-proving-job.d.ts.map +0 -1
- package/dest/job/epoch-proving-job.js +0 -762
- package/src/job/epoch-proving-job.ts +0 -465
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
import { BlockNumber, type CheckpointNumber, type EpochNumber, type SlotNumber } from '@aztec/foundation/branded-types';
|
|
2
|
+
import type { EthAddress } from '@aztec/foundation/eth-address';
|
|
3
|
+
import { type Logger, type LoggerBindings, createLogger } from '@aztec/foundation/log';
|
|
4
|
+
import { sleep } from '@aztec/foundation/sleep';
|
|
5
|
+
import { type DateProvider, Timer } from '@aztec/foundation/timer';
|
|
6
|
+
import type { EpochProverFactory } from '@aztec/prover-client';
|
|
7
|
+
import { TopTreeCancelledError } from '@aztec/prover-client/orchestrator';
|
|
8
|
+
import { type EpochProvingJobState, EpochProvingJobTerminalState } from '@aztec/stdlib/interfaces/server';
|
|
9
|
+
import { Attributes, type Traceable, type Tracer, trackSpan } from '@aztec/telemetry-client';
|
|
10
|
+
|
|
11
|
+
import * as crypto from 'node:crypto';
|
|
12
|
+
|
|
13
|
+
import type { ProverNodeJobMetrics } from '../metrics.js';
|
|
14
|
+
import type { ProofPublishingService } from '../proof-publishing-service.js';
|
|
15
|
+
import { CheckpointProver } from './checkpoint-prover.js';
|
|
16
|
+
import { TopTreeJob, type TopTreeJobHooks, type TopTreeProof } from './top-tree-job.js';
|
|
17
|
+
|
|
18
|
+
export type { EpochProvingJobState };
|
|
19
|
+
|
|
20
|
+
/** Full vs partial — the only behavioural difference is at the L1 submission step. */
|
|
21
|
+
export type SessionKind = 'full' | 'partial';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Identifies what a session proves: a contiguous slot range within an epoch. The
|
|
25
|
+
* concrete prover set the session holds is the *implementation* of the spec — frozen
|
|
26
|
+
* at construction time, derived from the canonical content for `[fromSlot, toSlot]`.
|
|
27
|
+
*
|
|
28
|
+
* Reconciliation in `ProverNode` is uniform across kinds: whenever the canonical
|
|
29
|
+
* content for the slot range changes, the session is cancelled and replaced with a
|
|
30
|
+
* fresh session that **preserves the slot range** but adopts the new checkpoints.
|
|
31
|
+
*
|
|
32
|
+
* Kind affects only the publishing decision (see `EpochSession`).
|
|
33
|
+
*/
|
|
34
|
+
export interface SessionSpec {
|
|
35
|
+
kind: SessionKind;
|
|
36
|
+
epochNumber: EpochNumber;
|
|
37
|
+
fromSlot: SlotNumber;
|
|
38
|
+
toSlot: SlotNumber;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Stable string key for use in maps. */
|
|
42
|
+
export function specKey(spec: SessionSpec): string {
|
|
43
|
+
return `${spec.kind}:${spec.epochNumber}:${spec.fromSlot}-${spec.toSlot}`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Hooks tests use to interpose around the top-tree prove without monkey-patching. */
|
|
47
|
+
export type EpochSessionHooks = {
|
|
48
|
+
beforeTopTreeProve?: () => Promise<void> | void;
|
|
49
|
+
afterTopTreeProve?: () => Promise<void> | void;
|
|
50
|
+
topTreeProveOverride?: (defaultProve: () => Promise<TopTreeProof>) => Promise<TopTreeProof>;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
export type EpochSessionOptions = {
|
|
54
|
+
/**
|
|
55
|
+
* If set, the session sleeps this many ms after `start()` (before the TopTreeJob is
|
|
56
|
+
* constructed). Lets late-arriving events (e.g. a prune) be processed before
|
|
57
|
+
* top-tree proving begins.
|
|
58
|
+
*/
|
|
59
|
+
finalizationDelayMs?: number;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
/** Dependencies an `EpochSession` needs at construction. */
|
|
63
|
+
export type EpochSessionDeps = {
|
|
64
|
+
proverFactory: EpochProverFactory;
|
|
65
|
+
proverId: EthAddress;
|
|
66
|
+
publishingService: Pick<ProofPublishingService, 'submit' | 'withdraw'>;
|
|
67
|
+
metrics: ProverNodeJobMetrics;
|
|
68
|
+
dateProvider: DateProvider;
|
|
69
|
+
/** Optional proving deadline. The session enters `timed-out` if exceeded. */
|
|
70
|
+
deadline: Date | undefined;
|
|
71
|
+
config: EpochSessionOptions;
|
|
72
|
+
bindings?: LoggerBindings;
|
|
73
|
+
hooks?: EpochSessionHooks;
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* One attempt at proving and publishing a contiguous slot range. The `SessionSpec` and
|
|
78
|
+
* the prover set are both frozen at construction time; the session does not adapt to
|
|
79
|
+
* reorgs or extensions of canonical content. Instead, `SessionManager` owns the
|
|
80
|
+
* reconciliation loop and replaces invalidated sessions wholesale (cancel + construct
|
|
81
|
+
* a fresh session with the new prover set).
|
|
82
|
+
*
|
|
83
|
+
* Each session does three things in sequence:
|
|
84
|
+
*
|
|
85
|
+
* 1. Run a `TopTreeJob` over its frozen prover subset to produce the epoch proof.
|
|
86
|
+
* 2. Hand the proof to the shared `ProofPublishingService` as a `PublishCandidate`.
|
|
87
|
+
* 3. Translate the service's outcome into a terminal session state.
|
|
88
|
+
*
|
|
89
|
+
* Everything to do with submission — predecessor gating, same-epoch dedup, deadline
|
|
90
|
+
* enforcement, and the L1 transaction itself — is the publishing service's concern.
|
|
91
|
+
* The session is just the producer of one candidate and the observer of its outcome.
|
|
92
|
+
*
|
|
93
|
+
* Lifecycle (happy path):
|
|
94
|
+
*
|
|
95
|
+
* initialized → awaiting-checkpoints → completed
|
|
96
|
+
*
|
|
97
|
+
* Terminal states map the publishing outcome: `published` → `completed`, `superseded` →
|
|
98
|
+
* `superseded`, `failed` → `failed`, `expired` → `timed-out`, `withdrawn` → `cancelled`.
|
|
99
|
+
* Additionally, the session-level deadline fires `cancel('deadline')` and transitions
|
|
100
|
+
* to `timed-out` for the pre-submit window (top-tree proving) — the publishing service
|
|
101
|
+
* handles the post-submit window via the candidate's `deadline`.
|
|
102
|
+
*
|
|
103
|
+
* `cancel()` is idempotent. It marks the session terminal, calls
|
|
104
|
+
* `publishingService.withdraw(uuid)` to drop any queued candidate (an in-flight publish
|
|
105
|
+
* runs to natural completion; the session has already settled), and tears down the
|
|
106
|
+
* top-tree job if proving is still in progress.
|
|
107
|
+
*/
|
|
108
|
+
export class EpochSession implements Traceable {
|
|
109
|
+
public readonly tracer: Tracer;
|
|
110
|
+
private readonly uuid: string;
|
|
111
|
+
private readonly log: Logger;
|
|
112
|
+
private state: EpochProvingJobState = 'initialized';
|
|
113
|
+
private deadlineTimeoutHandler: NodeJS.Timeout | undefined;
|
|
114
|
+
|
|
115
|
+
private topTreeJob: TopTreeJob | undefined;
|
|
116
|
+
/** Cancelled top-tree jobs whose teardown is still in flight. Awaited at session stop. */
|
|
117
|
+
private readonly pendingTopTreeCleanups: TopTreeJob[] = [];
|
|
118
|
+
|
|
119
|
+
private readonly completionPromise: Promise<EpochProvingJobState>;
|
|
120
|
+
private resolveCompletion!: (state: EpochProvingJobState) => void;
|
|
121
|
+
|
|
122
|
+
/** Stable reference; never mutated after construction. */
|
|
123
|
+
private readonly checkpoints: readonly CheckpointProver[];
|
|
124
|
+
|
|
125
|
+
constructor(
|
|
126
|
+
private readonly spec: SessionSpec,
|
|
127
|
+
checkpoints: readonly CheckpointProver[],
|
|
128
|
+
private readonly deps: EpochSessionDeps,
|
|
129
|
+
) {
|
|
130
|
+
if (checkpoints.length === 0) {
|
|
131
|
+
throw new Error(`Cannot construct EpochSession for ${specKey(spec)}: empty checkpoint set`);
|
|
132
|
+
}
|
|
133
|
+
this.checkpoints = [...checkpoints];
|
|
134
|
+
this.uuid = crypto.randomUUID();
|
|
135
|
+
this.log = createLogger('prover-node:epoch-session', {
|
|
136
|
+
...deps.bindings,
|
|
137
|
+
instanceId: `session-${spec.kind}-${spec.epochNumber}-${spec.fromSlot}-${spec.toSlot}`,
|
|
138
|
+
});
|
|
139
|
+
this.tracer = deps.metrics.tracer;
|
|
140
|
+
this.completionPromise = new Promise(resolve => {
|
|
141
|
+
this.resolveCompletion = resolve;
|
|
142
|
+
});
|
|
143
|
+
this.scheduleDeadlineStop();
|
|
144
|
+
this.log.info(`Created EpochSession ${this.uuid}`, {
|
|
145
|
+
uuid: this.uuid,
|
|
146
|
+
...spec,
|
|
147
|
+
checkpointCount: this.checkpoints.length,
|
|
148
|
+
checkpointIds: this.checkpoints.map(c => c.id),
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
public getId(): string {
|
|
153
|
+
return this.uuid;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
public getSpec(): SessionSpec {
|
|
157
|
+
return this.spec;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
public getState(): EpochProvingJobState {
|
|
161
|
+
return this.state;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
public getEpochNumber(): EpochNumber {
|
|
165
|
+
return this.spec.epochNumber;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
public getKind(): SessionKind {
|
|
169
|
+
return this.spec.kind;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
public getDeadline(): Date | undefined {
|
|
173
|
+
return this.deps.deadline;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
public getCheckpoints(): readonly CheckpointProver[] {
|
|
177
|
+
return this.checkpoints;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Resolves when the session reaches a terminal state. */
|
|
181
|
+
public whenDone(): Promise<EpochProvingJobState> {
|
|
182
|
+
return this.completionPromise;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/** True if the session is in a terminal state. */
|
|
186
|
+
public isTerminal(): boolean {
|
|
187
|
+
return EpochProvingJobTerminalState.includes(this.state);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** First block this session proves. */
|
|
191
|
+
public getStartBlockNumber(): BlockNumber {
|
|
192
|
+
return BlockNumber(this.checkpoints[0].checkpoint.blocks[0].number);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Last block this session proves. */
|
|
196
|
+
public getEndBlockNumber(): BlockNumber {
|
|
197
|
+
const lastCheckpoint = this.checkpoints[this.checkpoints.length - 1];
|
|
198
|
+
return BlockNumber(lastCheckpoint.checkpoint.blocks[lastCheckpoint.checkpoint.blocks.length - 1].number);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Kicks off proving + submission. Fires and forgets — callers should await `whenDone()`.
|
|
203
|
+
* Returns a promise that resolves to the final state for callers that want to wait inline.
|
|
204
|
+
*/
|
|
205
|
+
@trackSpan('EpochSession.start', function () {
|
|
206
|
+
return { [Attributes.EPOCH_NUMBER]: this.spec.epochNumber };
|
|
207
|
+
})
|
|
208
|
+
public async start(): Promise<EpochProvingJobState> {
|
|
209
|
+
try {
|
|
210
|
+
await this.run();
|
|
211
|
+
} catch (err) {
|
|
212
|
+
this.log.error(`Error in EpochSession ${this.uuid}`, err, {
|
|
213
|
+
uuid: this.uuid,
|
|
214
|
+
...this.spec,
|
|
215
|
+
});
|
|
216
|
+
if (!this.isTerminal()) {
|
|
217
|
+
this.state = 'failed';
|
|
218
|
+
}
|
|
219
|
+
} finally {
|
|
220
|
+
clearTimeout(this.deadlineTimeoutHandler);
|
|
221
|
+
await this.teardownTopTreeIfNeeded();
|
|
222
|
+
this.resolveCompletion(this.state);
|
|
223
|
+
}
|
|
224
|
+
return this.state;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Cancels the session. Idempotent. Withdraws any submitted candidate from the
|
|
229
|
+
* publishing service so the in-flight publisher (if any) is interrupted.
|
|
230
|
+
*/
|
|
231
|
+
public async cancel(reason = 'cancelled'): Promise<void> {
|
|
232
|
+
if (this.isTerminal()) {
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
this.log.info(`Cancelling EpochSession ${this.uuid}: ${reason}`, {
|
|
236
|
+
uuid: this.uuid,
|
|
237
|
+
...this.spec,
|
|
238
|
+
previousState: this.state,
|
|
239
|
+
reason,
|
|
240
|
+
});
|
|
241
|
+
this.state = 'cancelled';
|
|
242
|
+
try {
|
|
243
|
+
this.deps.publishingService.withdraw(this.uuid);
|
|
244
|
+
} catch (err) {
|
|
245
|
+
this.log.error(`Error withdrawing candidate from publishing service`, err);
|
|
246
|
+
}
|
|
247
|
+
if (this.topTreeJob && !this.topTreeJob.isCancelled()) {
|
|
248
|
+
const job = this.topTreeJob;
|
|
249
|
+
this.topTreeJob = undefined;
|
|
250
|
+
job.cancel();
|
|
251
|
+
this.pendingTopTreeCleanups.push(job);
|
|
252
|
+
}
|
|
253
|
+
await this.teardownTopTreeIfNeeded();
|
|
254
|
+
this.resolveCompletion(this.state);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
private async run(): Promise<void> {
|
|
258
|
+
const timer = new Timer();
|
|
259
|
+
|
|
260
|
+
if (this.deps.config.finalizationDelayMs && this.deps.config.finalizationDelayMs > 0) {
|
|
261
|
+
this.log.warn(`Waiting ${this.deps.config.finalizationDelayMs}ms before starting top-tree proving`, {
|
|
262
|
+
uuid: this.uuid,
|
|
263
|
+
...this.spec,
|
|
264
|
+
});
|
|
265
|
+
await sleep(this.deps.config.finalizationDelayMs);
|
|
266
|
+
if (this.isTerminal()) {
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Stage 1 — top-tree proving.
|
|
272
|
+
const topTreeJob = new TopTreeJob(this.spec.epochNumber, this.checkpoints, {
|
|
273
|
+
proverFactory: this.deps.proverFactory,
|
|
274
|
+
metrics: this.deps.metrics,
|
|
275
|
+
log: this.log,
|
|
276
|
+
hooks: this.toTopTreeHooks(),
|
|
277
|
+
});
|
|
278
|
+
this.topTreeJob = topTreeJob;
|
|
279
|
+
const { fromCheckpoint, toCheckpoint, count } = topTreeJob.getRange();
|
|
280
|
+
|
|
281
|
+
this.state = 'awaiting-checkpoints';
|
|
282
|
+
let proof: TopTreeProof;
|
|
283
|
+
try {
|
|
284
|
+
proof = await topTreeJob.start();
|
|
285
|
+
} catch (err) {
|
|
286
|
+
if (err instanceof TopTreeCancelledError) {
|
|
287
|
+
// Session cancel kicked off the underlying teardown; nothing more to do here.
|
|
288
|
+
this.log.info(`Top-tree cancelled for EpochSession ${this.uuid}`, { uuid: this.uuid, ...this.spec });
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
throw err;
|
|
292
|
+
}
|
|
293
|
+
this.topTreeJob = undefined;
|
|
294
|
+
this.log.info(`Top-tree proof ready for EpochSession ${this.uuid}`, {
|
|
295
|
+
uuid: this.uuid,
|
|
296
|
+
...this.spec,
|
|
297
|
+
fromCheckpoint,
|
|
298
|
+
toCheckpoint,
|
|
299
|
+
durationMs: timer.ms(),
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
// Stage 2 — hand the proof to the publishing service and wait for its verdict.
|
|
303
|
+
await this.submitProof(proof, fromCheckpoint, toCheckpoint, count, timer);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
private async submitProof(
|
|
307
|
+
proof: TopTreeProof,
|
|
308
|
+
fromCheckpoint: CheckpointNumber,
|
|
309
|
+
toCheckpoint: CheckpointNumber,
|
|
310
|
+
checkpointCount: number,
|
|
311
|
+
timer: Timer,
|
|
312
|
+
): Promise<void> {
|
|
313
|
+
// Attestations come from the highest-numbered registered checkpoint — that's the one
|
|
314
|
+
// whose attestations the L1 contract checks for the proven range.
|
|
315
|
+
const lastCheckpoint = this.checkpoints[this.checkpoints.length - 1];
|
|
316
|
+
const attestations = lastCheckpoint.attestations.map(a => a.toViem());
|
|
317
|
+
const epochSizeBlocks = this.checkpoints.reduce((acc, c) => acc + c.checkpoint.blocks.length, 0);
|
|
318
|
+
const epochSizeTxs = this.checkpoints.reduce(
|
|
319
|
+
(acc, c) => acc + c.checkpoint.blocks.reduce((bAcc, block) => bAcc + block.body.txEffects.length, 0),
|
|
320
|
+
0,
|
|
321
|
+
);
|
|
322
|
+
|
|
323
|
+
const outcome = await this.deps.publishingService.submit({
|
|
324
|
+
id: this.uuid,
|
|
325
|
+
epoch: this.spec.epochNumber,
|
|
326
|
+
kind: this.spec.kind,
|
|
327
|
+
startBlock: this.getStartBlockNumber(),
|
|
328
|
+
endBlock: this.getEndBlockNumber(),
|
|
329
|
+
deadline: this.deps.deadline,
|
|
330
|
+
fromCheckpoint,
|
|
331
|
+
toCheckpoint,
|
|
332
|
+
publicInputs: proof.publicInputs,
|
|
333
|
+
proof: proof.proof,
|
|
334
|
+
batchedBlobInputs: proof.batchedBlobInputs,
|
|
335
|
+
attestations,
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
if (this.isTerminal()) {
|
|
339
|
+
// cancel() already set the terminal state — don't clobber it.
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
switch (outcome) {
|
|
344
|
+
case 'published':
|
|
345
|
+
this.log.info(
|
|
346
|
+
`Submitted proof for epoch ${this.spec.epochNumber} (checkpoints ${fromCheckpoint}..${toCheckpoint})`,
|
|
347
|
+
{ uuid: this.uuid, ...this.spec },
|
|
348
|
+
);
|
|
349
|
+
this.state = 'completed';
|
|
350
|
+
this.deps.metrics.recordProvingJob(timer.ms(), timer.ms(), checkpointCount, epochSizeBlocks, epochSizeTxs);
|
|
351
|
+
return;
|
|
352
|
+
case 'superseded':
|
|
353
|
+
this.log.info(`EpochSession ${this.uuid} superseded by a longer candidate`, {
|
|
354
|
+
uuid: this.uuid,
|
|
355
|
+
...this.spec,
|
|
356
|
+
});
|
|
357
|
+
this.state = 'superseded';
|
|
358
|
+
return;
|
|
359
|
+
case 'withdrawn':
|
|
360
|
+
// cancel() ran but the terminal-state check above missed it. Defensive: treat as cancelled.
|
|
361
|
+
this.state = 'cancelled';
|
|
362
|
+
return;
|
|
363
|
+
case 'expired':
|
|
364
|
+
this.log.warn(`EpochSession ${this.uuid} expired before publishing`, { uuid: this.uuid, ...this.spec });
|
|
365
|
+
this.state = 'timed-out';
|
|
366
|
+
return;
|
|
367
|
+
case 'failed':
|
|
368
|
+
throw new Error('Failed to submit epoch proof to L1');
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
private async teardownTopTreeIfNeeded(): Promise<void> {
|
|
373
|
+
if (this.topTreeJob) {
|
|
374
|
+
const job = this.topTreeJob;
|
|
375
|
+
this.topTreeJob = undefined;
|
|
376
|
+
job.cancel();
|
|
377
|
+
this.pendingTopTreeCleanups.push(job);
|
|
378
|
+
}
|
|
379
|
+
if (this.pendingTopTreeCleanups.length > 0) {
|
|
380
|
+
await Promise.allSettled(this.pendingTopTreeCleanups.map(j => j.whenDone()));
|
|
381
|
+
this.pendingTopTreeCleanups.length = 0;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
private scheduleDeadlineStop(): void {
|
|
386
|
+
const deadline = this.deps.deadline;
|
|
387
|
+
if (!deadline) {
|
|
388
|
+
return;
|
|
389
|
+
}
|
|
390
|
+
const timeout = Math.max(deadline.getTime() - this.deps.dateProvider.now(), 0);
|
|
391
|
+
this.deadlineTimeoutHandler = setTimeout(() => {
|
|
392
|
+
void this.handleDeadline();
|
|
393
|
+
}, timeout);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Returns a promise that resolves once cancellation has propagated and the state has
|
|
398
|
+
* been flipped from 'cancelled' to 'timed-out'. Protected so unit tests can drive the
|
|
399
|
+
* deadline path without waiting on the real `setTimeout` to fire.
|
|
400
|
+
*/
|
|
401
|
+
protected async handleDeadline(): Promise<void> {
|
|
402
|
+
if (this.isTerminal()) {
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
this.log.warn(`EpochSession ${this.uuid} hit deadline`, { uuid: this.uuid, ...this.spec });
|
|
406
|
+
await this.cancel('deadline');
|
|
407
|
+
// After cancel, override state if it was the canonical timeout case so observers see 'timed-out'.
|
|
408
|
+
if (this.state === 'cancelled') {
|
|
409
|
+
this.state = 'timed-out';
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
private toTopTreeHooks(): TopTreeJobHooks | undefined {
|
|
414
|
+
const hooks = this.deps.hooks;
|
|
415
|
+
if (!hooks?.beforeTopTreeProve && !hooks?.afterTopTreeProve && !hooks?.topTreeProveOverride) {
|
|
416
|
+
return undefined;
|
|
417
|
+
}
|
|
418
|
+
return {
|
|
419
|
+
beforeProve: hooks.beforeTopTreeProve,
|
|
420
|
+
afterProve: hooks.afterTopTreeProve,
|
|
421
|
+
proveOverride: hooks.topTreeProveOverride,
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import type { BatchedBlob } from '@aztec/blob-lib/types';
|
|
2
|
+
import type { CheckpointNumber, EpochNumber } from '@aztec/foundation/branded-types';
|
|
3
|
+
import type { Logger } from '@aztec/foundation/log';
|
|
4
|
+
import { type PromiseWithResolvers, promiseWithResolvers } from '@aztec/foundation/promise';
|
|
5
|
+
import { Timer } from '@aztec/foundation/timer';
|
|
6
|
+
import type { EpochProverFactory } from '@aztec/prover-client';
|
|
7
|
+
import { buildFinalBlobChallenges } from '@aztec/prover-client/helpers';
|
|
8
|
+
import {
|
|
9
|
+
type CheckpointTopTreeData,
|
|
10
|
+
TopTreeCancelledError,
|
|
11
|
+
type TopTreeOrchestrator,
|
|
12
|
+
} from '@aztec/prover-client/orchestrator';
|
|
13
|
+
import type { Proof } from '@aztec/stdlib/proofs';
|
|
14
|
+
import type { RootRollupPublicInputs } from '@aztec/stdlib/rollup';
|
|
15
|
+
|
|
16
|
+
import type { ProverNodeJobMetrics } from '../metrics.js';
|
|
17
|
+
import type { CheckpointProver } from './checkpoint-prover.js';
|
|
18
|
+
|
|
19
|
+
/** Result of a successful top-tree run. */
|
|
20
|
+
export type TopTreeProof = {
|
|
21
|
+
publicInputs: RootRollupPublicInputs;
|
|
22
|
+
proof: Proof;
|
|
23
|
+
batchedBlobInputs: BatchedBlob;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Hooks for tests to interpose around the underlying `topTree.prove(...)` call without
|
|
28
|
+
* monkey-patching the orchestrator.
|
|
29
|
+
*/
|
|
30
|
+
export type TopTreeJobHooks = {
|
|
31
|
+
/** Called immediately before the top tree's `prove` runs. */
|
|
32
|
+
beforeProve?: () => Promise<void> | void;
|
|
33
|
+
/** Called after `prove` returns successfully (not on failure / cancellation). */
|
|
34
|
+
afterProve?: () => Promise<void> | void;
|
|
35
|
+
/**
|
|
36
|
+
* If set, called instead of running the underlying prove. Receives a thunk that
|
|
37
|
+
* runs the real call. Lets tests substitute a synthetic proof or delay/throw without
|
|
38
|
+
* re-implementing the rest of the finalize flow.
|
|
39
|
+
*/
|
|
40
|
+
proveOverride?: (defaultProve: () => Promise<TopTreeProof>) => Promise<TopTreeProof>;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Self-contained top-tree job. Constructed from a snapshot of `CheckpointProver`s; runs
|
|
45
|
+
* `topTree.prove(...)` against their pending `blockProofs` promises and exposes the
|
|
46
|
+
* final epoch proof via `result`.
|
|
47
|
+
*
|
|
48
|
+
*/
|
|
49
|
+
export class TopTreeJob {
|
|
50
|
+
/** Resolves with the final proof on success; rejects on cancellation or any prove error. */
|
|
51
|
+
readonly result: PromiseWithResolvers<TopTreeProof> = promiseWithResolvers();
|
|
52
|
+
|
|
53
|
+
/** Snapshot of checkpoint jobs the top tree is built from, in checkpoint-number order. */
|
|
54
|
+
readonly snapshot: readonly CheckpointProver[];
|
|
55
|
+
|
|
56
|
+
private readonly topTree: TopTreeOrchestrator;
|
|
57
|
+
private readonly fromCheckpoint: CheckpointNumber;
|
|
58
|
+
private readonly toCheckpoint: CheckpointNumber;
|
|
59
|
+
private cancelled = false;
|
|
60
|
+
/** Tracks the cancel-driven background teardown so `whenDone()` can await it. */
|
|
61
|
+
private cancelPromise?: Promise<void>;
|
|
62
|
+
private readonly executionTimer = new Timer();
|
|
63
|
+
|
|
64
|
+
constructor(
|
|
65
|
+
private readonly epochNumber: EpochNumber,
|
|
66
|
+
snapshot: readonly CheckpointProver[],
|
|
67
|
+
private readonly deps: {
|
|
68
|
+
proverFactory: EpochProverFactory;
|
|
69
|
+
metrics: ProverNodeJobMetrics;
|
|
70
|
+
log: Logger;
|
|
71
|
+
hooks?: TopTreeJobHooks;
|
|
72
|
+
},
|
|
73
|
+
) {
|
|
74
|
+
if (snapshot.length === 0) {
|
|
75
|
+
throw new Error(`Cannot construct TopTreeJob for epoch ${epochNumber}: empty snapshot`);
|
|
76
|
+
}
|
|
77
|
+
for (let i = 1; i < snapshot.length; i++) {
|
|
78
|
+
const prev = snapshot[i - 1].checkpoint.number;
|
|
79
|
+
const curr = snapshot[i].checkpoint.number;
|
|
80
|
+
if (curr !== prev + 1) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`Cannot construct TopTreeJob for epoch ${epochNumber}: checkpoint numbers must be contiguous, got gap between ${prev} and ${curr}`,
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
this.snapshot = snapshot;
|
|
87
|
+
this.fromCheckpoint = snapshot[0].checkpoint.number;
|
|
88
|
+
this.toCheckpoint = snapshot[snapshot.length - 1].checkpoint.number;
|
|
89
|
+
this.topTree = deps.proverFactory.createTopTreeOrchestrator();
|
|
90
|
+
deps.log.info(
|
|
91
|
+
`Created TopTreeJob for epoch ${epochNumber} covering checkpoints ${this.fromCheckpoint}..${this.toCheckpoint}`,
|
|
92
|
+
{
|
|
93
|
+
epochNumber,
|
|
94
|
+
fromCheckpoint: this.fromCheckpoint,
|
|
95
|
+
toCheckpoint: this.toCheckpoint,
|
|
96
|
+
checkpointCount: snapshot.length,
|
|
97
|
+
},
|
|
98
|
+
);
|
|
99
|
+
// Mark the result's rejection branch as observed so a cancellation before any
|
|
100
|
+
// consumer awaits does not surface as unhandled.
|
|
101
|
+
this.result.promise.catch(() => {});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Range covered by this attempt — useful for logging and L1 submission. */
|
|
105
|
+
public getRange(): { fromCheckpoint: CheckpointNumber; toCheckpoint: CheckpointNumber; count: number } {
|
|
106
|
+
return { fromCheckpoint: this.fromCheckpoint, toCheckpoint: this.toCheckpoint, count: this.snapshot.length };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
public isCancelled(): boolean {
|
|
110
|
+
return this.cancelled;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Wall-time since construction — used by the owning job for metrics. */
|
|
114
|
+
public elapsedMs(): number {
|
|
115
|
+
return this.executionTimer.ms();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/** Kicks off the prove. Returns the result promise (also available as `result.promise`). */
|
|
119
|
+
public start(): Promise<TopTreeProof> {
|
|
120
|
+
void this.run();
|
|
121
|
+
return this.result.promise;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Cancels the in-flight prove. Idempotent. Rejects the result promise with
|
|
126
|
+
* `TopTreeCancelledError`, then kicks off the underlying orchestrator's teardown
|
|
127
|
+
* in the background so callers don't block on it. The teardown promise is exposed
|
|
128
|
+
* via `whenDone()` — the parent collects the cancelled job and awaits all
|
|
129
|
+
* pending top-tree teardowns at the end of the epoch.
|
|
130
|
+
*/
|
|
131
|
+
public cancel(): void {
|
|
132
|
+
if (this.cancelled) {
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
this.cancelled = true;
|
|
136
|
+
this.deps.log.info(
|
|
137
|
+
`Cancelling TopTreeJob for epoch ${this.epochNumber} (checkpoints ${this.fromCheckpoint}..${this.toCheckpoint})`,
|
|
138
|
+
{
|
|
139
|
+
epochNumber: this.epochNumber,
|
|
140
|
+
fromCheckpoint: this.fromCheckpoint,
|
|
141
|
+
toCheckpoint: this.toCheckpoint,
|
|
142
|
+
elapsedMs: this.executionTimer.ms(),
|
|
143
|
+
},
|
|
144
|
+
);
|
|
145
|
+
this.result.reject(new TopTreeCancelledError());
|
|
146
|
+
// Fire and forget: parent awaits the cancel-driven teardown via whenDone(); the
|
|
147
|
+
// chained .catch swallows rejections so the unawaited promise doesn't surface
|
|
148
|
+
// as an unhandled rejection.
|
|
149
|
+
this.cancelPromise = this.runCancel().catch(() => {});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Resolves once the cancel-driven teardown of the underlying orchestrator has unwound. */
|
|
153
|
+
public async whenDone(): Promise<void> {
|
|
154
|
+
if (this.cancelPromise) {
|
|
155
|
+
await this.cancelPromise;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
private async runCancel(): Promise<void> {
|
|
160
|
+
try {
|
|
161
|
+
this.topTree.cancel({ abortJobs: true });
|
|
162
|
+
} catch (err) {
|
|
163
|
+
this.deps.log.error('Error cancelling top tree', err);
|
|
164
|
+
}
|
|
165
|
+
try {
|
|
166
|
+
await this.topTree.stop();
|
|
167
|
+
} catch (err) {
|
|
168
|
+
this.deps.log.error('Error stopping top tree', err);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private async run() {
|
|
173
|
+
try {
|
|
174
|
+
const blobTimer = new Timer();
|
|
175
|
+
const blobFieldsPerCheckpoint = this.snapshot.map(j => j.checkpoint.toBlobFields());
|
|
176
|
+
const finalBlobBatchingChallenges = await buildFinalBlobChallenges(blobFieldsPerCheckpoint);
|
|
177
|
+
this.deps.metrics.recordBlobProcessing(blobTimer.ms());
|
|
178
|
+
this.deps.log.verbose(
|
|
179
|
+
`Built final blob batching challenges for epoch ${this.epochNumber} in ${blobTimer.ms()}ms`,
|
|
180
|
+
{
|
|
181
|
+
epochNumber: this.epochNumber,
|
|
182
|
+
checkpointCount: this.snapshot.length,
|
|
183
|
+
durationMs: blobTimer.ms(),
|
|
184
|
+
},
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
const checkpointData: CheckpointTopTreeData[] = this.snapshot.map(j => ({
|
|
188
|
+
blockProofs: j.whenBlockProofsReady(),
|
|
189
|
+
l2ToL1MsgsPerBlock: j.checkpoint.blocks.map(b => b.body.txEffects.map(tx => tx.l2ToL1Msgs)),
|
|
190
|
+
blobFields: j.checkpoint.toBlobFields(),
|
|
191
|
+
previousBlockHeader: j.previousBlockHeader,
|
|
192
|
+
previousArchiveSiblingPath: j.previousArchiveSiblingPath,
|
|
193
|
+
}));
|
|
194
|
+
|
|
195
|
+
const defaultProve = (): Promise<TopTreeProof> =>
|
|
196
|
+
this.topTree.prove(this.epochNumber, this.snapshot.length, finalBlobBatchingChallenges, checkpointData);
|
|
197
|
+
|
|
198
|
+
await this.deps.hooks?.beforeProve?.();
|
|
199
|
+
const proveTimer = new Timer();
|
|
200
|
+
this.deps.log.info(
|
|
201
|
+
`Starting top-tree prove for epoch ${this.epochNumber} (checkpoints ${this.fromCheckpoint}..${this.toCheckpoint})`,
|
|
202
|
+
{
|
|
203
|
+
epochNumber: this.epochNumber,
|
|
204
|
+
fromCheckpoint: this.fromCheckpoint,
|
|
205
|
+
toCheckpoint: this.toCheckpoint,
|
|
206
|
+
checkpointCount: this.snapshot.length,
|
|
207
|
+
},
|
|
208
|
+
);
|
|
209
|
+
const proof = await (this.deps.hooks?.proveOverride
|
|
210
|
+
? this.deps.hooks.proveOverride(defaultProve)
|
|
211
|
+
: defaultProve());
|
|
212
|
+
await this.deps.hooks?.afterProve?.();
|
|
213
|
+
this.deps.log.info(`Top-tree prove succeeded for epoch ${this.epochNumber} in ${proveTimer.ms()}ms`, {
|
|
214
|
+
epochNumber: this.epochNumber,
|
|
215
|
+
fromCheckpoint: this.fromCheckpoint,
|
|
216
|
+
toCheckpoint: this.toCheckpoint,
|
|
217
|
+
durationMs: proveTimer.ms(),
|
|
218
|
+
totalElapsedMs: this.executionTimer.ms(),
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
this.result.resolve(proof);
|
|
222
|
+
} catch (err) {
|
|
223
|
+
// Cancel paths surface as TopTreeCancelledError; everything else propagates as-is.
|
|
224
|
+
this.result.reject(err);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|