@aztec/prover-client 0.66.0 → 0.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dest/block_builder/light.d.ts.map +1 -1
  2. package/dest/block_builder/light.js +12 -8
  3. package/dest/mocks/fixtures.d.ts +3 -3
  4. package/dest/mocks/fixtures.d.ts.map +1 -1
  5. package/dest/mocks/fixtures.js +2 -2
  6. package/dest/mocks/test_context.d.ts +7 -7
  7. package/dest/mocks/test_context.d.ts.map +1 -1
  8. package/dest/mocks/test_context.js +6 -6
  9. package/dest/orchestrator/block-building-helpers.d.ts +5 -5
  10. package/dest/orchestrator/block-building-helpers.d.ts.map +1 -1
  11. package/dest/orchestrator/block-building-helpers.js +10 -11
  12. package/dest/orchestrator/orchestrator.d.ts +2 -2
  13. package/dest/orchestrator/orchestrator.d.ts.map +1 -1
  14. package/dest/orchestrator/orchestrator.js +3 -3
  15. package/dest/orchestrator/orchestrator_metrics.d.ts.map +1 -1
  16. package/dest/orchestrator/orchestrator_metrics.js +2 -5
  17. package/dest/prover-agent/memory-proving-queue.d.ts.map +1 -1
  18. package/dest/prover-agent/memory-proving-queue.js +5 -4
  19. package/dest/prover-agent/prover-agent.d.ts.map +1 -1
  20. package/dest/prover-agent/prover-agent.js +3 -3
  21. package/dest/prover-client/prover-client.js +3 -3
  22. package/dest/proving_broker/caching_broker_facade.d.ts +12 -12
  23. package/dest/proving_broker/caching_broker_facade.d.ts.map +1 -1
  24. package/dest/proving_broker/caching_broker_facade.js +32 -29
  25. package/dest/proving_broker/proving_agent.d.ts.map +1 -1
  26. package/dest/proving_broker/proving_agent.js +3 -3
  27. package/dest/proving_broker/proving_broker.d.ts +24 -5
  28. package/dest/proving_broker/proving_broker.d.ts.map +1 -1
  29. package/dest/proving_broker/proving_broker.js +112 -44
  30. package/dest/proving_broker/proving_broker_database/persisted.d.ts.map +1 -1
  31. package/dest/proving_broker/proving_broker_database/persisted.js +4 -8
  32. package/dest/proving_broker/proving_broker_instrumentation.d.ts.map +1 -1
  33. package/dest/proving_broker/proving_broker_instrumentation.js +2 -8
  34. package/dest/proving_broker/rpc.d.ts.map +1 -1
  35. package/dest/proving_broker/rpc.js +3 -2
  36. package/dest/test/mock_prover.d.ts +3 -2
  37. package/dest/test/mock_prover.d.ts.map +1 -1
  38. package/dest/test/mock_prover.js +6 -3
  39. package/package.json +15 -11
  40. package/src/block_builder/light.ts +12 -9
  41. package/src/mocks/fixtures.ts +4 -4
  42. package/src/mocks/test_context.ts +11 -11
  43. package/src/orchestrator/block-building-helpers.ts +13 -14
  44. package/src/orchestrator/orchestrator.ts +5 -5
  45. package/src/orchestrator/orchestrator_metrics.ts +1 -11
  46. package/src/prover-agent/memory-proving-queue.ts +4 -3
  47. package/src/prover-agent/prover-agent.ts +2 -2
  48. package/src/prover-client/prover-client.ts +2 -2
  49. package/src/proving_broker/caching_broker_facade.ts +31 -15
  50. package/src/proving_broker/proving_agent.ts +2 -2
  51. package/src/proving_broker/proving_broker.ts +140 -51
  52. package/src/proving_broker/proving_broker_database/persisted.ts +2 -8
  53. package/src/proving_broker/proving_broker_instrumentation.ts +0 -7
  54. package/src/proving_broker/rpc.ts +2 -1
  55. package/src/test/mock_prover.ts +5 -2
@@ -33,7 +33,7 @@ import {
33
33
  type TubeInputs,
34
34
  } from '@aztec/circuits.js';
35
35
  import { sha256 } from '@aztec/foundation/crypto';
36
- import { createDebugLogger } from '@aztec/foundation/log';
36
+ import { createLogger } from '@aztec/foundation/log';
37
37
  import { retryUntil } from '@aztec/foundation/retry';
38
38
 
39
39
  import { InlineProofStore, type ProofStore } from './proof_store.js';
@@ -52,13 +52,14 @@ export class CachingBrokerFacade implements ServerCircuitProver {
52
52
  private proofStore: ProofStore = new InlineProofStore(),
53
53
  private waitTimeoutMs = MAX_WAIT_MS,
54
54
  private pollIntervalMs = 1000,
55
- private log = createDebugLogger('aztec:prover-client:caching-prover-broker'),
55
+ private log = createLogger('prover-client:caching-prover-broker'),
56
56
  ) {}
57
57
 
58
58
  private async enqueueAndWaitForJob<T extends ProvingRequestType>(
59
59
  id: ProvingJobId,
60
60
  type: T,
61
61
  inputs: ProvingJobInputsMap[T],
62
+ epochNumber = 0,
62
63
  signal?: AbortSignal,
63
64
  ): Promise<ProvingJobResultsMap[T]> {
64
65
  // first try the cache
@@ -95,6 +96,7 @@ export class CachingBrokerFacade implements ServerCircuitProver {
95
96
  id,
96
97
  type,
97
98
  inputsUri,
99
+ epochNumber,
98
100
  });
99
101
  await this.cache.setProvingJobStatus(id, { status: 'in-queue' });
100
102
  } catch (err) {
@@ -107,7 +109,7 @@ export class CachingBrokerFacade implements ServerCircuitProver {
107
109
  // notify broker of cancelled job
108
110
  const abortFn = async () => {
109
111
  signal?.removeEventListener('abort', abortFn);
110
- await this.broker.removeAndCancelProvingJob(id);
112
+ await this.broker.cancelProvingJob(id);
111
113
  };
112
114
 
113
115
  signal?.addEventListener('abort', abortFn);
@@ -147,18 +149,21 @@ export class CachingBrokerFacade implements ServerCircuitProver {
147
149
  }
148
150
  } finally {
149
151
  signal?.removeEventListener('abort', abortFn);
152
+ // we've saved the result in our cache. We can tell the broker to clear its state
153
+ await this.broker.cleanUpProvingJobState(id);
150
154
  }
151
155
  }
152
156
 
153
157
  getAvmProof(
154
158
  inputs: AvmCircuitInputs,
155
159
  signal?: AbortSignal,
156
- _blockNumber?: number,
160
+ epochNumber?: number,
157
161
  ): Promise<ProofAndVerificationKey<typeof AVM_PROOF_LENGTH_IN_FIELDS>> {
158
162
  return this.enqueueAndWaitForJob(
159
163
  this.generateId(ProvingRequestType.PUBLIC_VM, inputs),
160
164
  ProvingRequestType.PUBLIC_VM,
161
165
  inputs,
166
+ epochNumber,
162
167
  signal,
163
168
  );
164
169
  }
@@ -166,12 +171,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
166
171
  getBaseParityProof(
167
172
  inputs: BaseParityInputs,
168
173
  signal?: AbortSignal,
169
- _epochNumber?: number,
174
+ epochNumber?: number,
170
175
  ): Promise<PublicInputsAndRecursiveProof<ParityPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
171
176
  return this.enqueueAndWaitForJob(
172
177
  this.generateId(ProvingRequestType.BASE_PARITY, inputs),
173
178
  ProvingRequestType.BASE_PARITY,
174
179
  inputs,
180
+ epochNumber,
175
181
  signal,
176
182
  );
177
183
  }
@@ -179,12 +185,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
179
185
  getBlockMergeRollupProof(
180
186
  input: BlockMergeRollupInputs,
181
187
  signal?: AbortSignal,
182
- _epochNumber?: number,
188
+ epochNumber?: number,
183
189
  ): Promise<PublicInputsAndRecursiveProof<BlockRootOrBlockMergePublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
184
190
  return this.enqueueAndWaitForJob(
185
191
  this.generateId(ProvingRequestType.BLOCK_MERGE_ROLLUP, input),
186
192
  ProvingRequestType.BLOCK_MERGE_ROLLUP,
187
193
  input,
194
+ epochNumber,
188
195
  signal,
189
196
  );
190
197
  }
@@ -192,12 +199,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
192
199
  getBlockRootRollupProof(
193
200
  input: BlockRootRollupInputs,
194
201
  signal?: AbortSignal,
195
- _epochNumber?: number,
202
+ epochNumber?: number,
196
203
  ): Promise<PublicInputsAndRecursiveProof<BlockRootOrBlockMergePublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
197
204
  return this.enqueueAndWaitForJob(
198
205
  this.generateId(ProvingRequestType.BLOCK_ROOT_ROLLUP, input),
199
206
  ProvingRequestType.BLOCK_ROOT_ROLLUP,
200
207
  input,
208
+ epochNumber,
201
209
  signal,
202
210
  );
203
211
  }
@@ -205,12 +213,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
205
213
  getEmptyBlockRootRollupProof(
206
214
  input: EmptyBlockRootRollupInputs,
207
215
  signal?: AbortSignal,
208
- _epochNumber?: number,
216
+ epochNumber?: number,
209
217
  ): Promise<PublicInputsAndRecursiveProof<BlockRootOrBlockMergePublicInputs>> {
210
218
  return this.enqueueAndWaitForJob(
211
219
  this.generateId(ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP, input),
212
220
  ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP,
213
221
  input,
222
+ epochNumber,
214
223
  signal,
215
224
  );
216
225
  }
@@ -218,12 +227,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
218
227
  getEmptyPrivateKernelProof(
219
228
  inputs: PrivateKernelEmptyInputData,
220
229
  signal?: AbortSignal,
221
- _epochNumber?: number,
230
+ epochNumber?: number,
222
231
  ): Promise<PublicInputsAndRecursiveProof<KernelCircuitPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
223
232
  return this.enqueueAndWaitForJob(
224
233
  this.generateId(ProvingRequestType.PRIVATE_KERNEL_EMPTY, inputs),
225
234
  ProvingRequestType.PRIVATE_KERNEL_EMPTY,
226
235
  inputs,
236
+ epochNumber,
227
237
  signal,
228
238
  );
229
239
  }
@@ -231,24 +241,26 @@ export class CachingBrokerFacade implements ServerCircuitProver {
231
241
  getMergeRollupProof(
232
242
  input: MergeRollupInputs,
233
243
  signal?: AbortSignal,
234
- _epochNumber?: number,
244
+ epochNumber?: number,
235
245
  ): Promise<PublicInputsAndRecursiveProof<BaseOrMergeRollupPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
236
246
  return this.enqueueAndWaitForJob(
237
247
  this.generateId(ProvingRequestType.MERGE_ROLLUP, input),
238
248
  ProvingRequestType.MERGE_ROLLUP,
239
249
  input,
250
+ epochNumber,
240
251
  signal,
241
252
  );
242
253
  }
243
254
  getPrivateBaseRollupProof(
244
255
  baseRollupInput: PrivateBaseRollupInputs,
245
256
  signal?: AbortSignal,
246
- _epochNumber?: number,
257
+ epochNumber?: number,
247
258
  ): Promise<PublicInputsAndRecursiveProof<BaseOrMergeRollupPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
248
259
  return this.enqueueAndWaitForJob(
249
260
  this.generateId(ProvingRequestType.PRIVATE_BASE_ROLLUP, baseRollupInput),
250
261
  ProvingRequestType.PRIVATE_BASE_ROLLUP,
251
262
  baseRollupInput,
263
+ epochNumber,
252
264
  signal,
253
265
  );
254
266
  }
@@ -256,12 +268,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
256
268
  getPublicBaseRollupProof(
257
269
  inputs: PublicBaseRollupInputs,
258
270
  signal?: AbortSignal,
259
- _epochNumber?: number,
271
+ epochNumber?: number,
260
272
  ): Promise<PublicInputsAndRecursiveProof<BaseOrMergeRollupPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
261
273
  return this.enqueueAndWaitForJob(
262
274
  this.generateId(ProvingRequestType.PUBLIC_BASE_ROLLUP, inputs),
263
275
  ProvingRequestType.PUBLIC_BASE_ROLLUP,
264
276
  inputs,
277
+ epochNumber,
265
278
  signal,
266
279
  );
267
280
  }
@@ -269,12 +282,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
269
282
  getRootParityProof(
270
283
  inputs: RootParityInputs,
271
284
  signal?: AbortSignal,
272
- _epochNumber?: number,
285
+ epochNumber?: number,
273
286
  ): Promise<PublicInputsAndRecursiveProof<ParityPublicInputs, typeof NESTED_RECURSIVE_PROOF_LENGTH>> {
274
287
  return this.enqueueAndWaitForJob(
275
288
  this.generateId(ProvingRequestType.ROOT_PARITY, inputs),
276
289
  ProvingRequestType.ROOT_PARITY,
277
290
  inputs,
291
+ epochNumber,
278
292
  signal,
279
293
  );
280
294
  }
@@ -282,12 +296,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
282
296
  getRootRollupProof(
283
297
  input: RootRollupInputs,
284
298
  signal?: AbortSignal,
285
- _epochNumber?: number,
299
+ epochNumber?: number,
286
300
  ): Promise<PublicInputsAndRecursiveProof<RootRollupPublicInputs, typeof RECURSIVE_PROOF_LENGTH>> {
287
301
  return this.enqueueAndWaitForJob(
288
302
  this.generateId(ProvingRequestType.ROOT_ROLLUP, input),
289
303
  ProvingRequestType.ROOT_ROLLUP,
290
304
  input,
305
+ epochNumber,
291
306
  signal,
292
307
  );
293
308
  }
@@ -295,12 +310,13 @@ export class CachingBrokerFacade implements ServerCircuitProver {
295
310
  getTubeProof(
296
311
  tubeInput: TubeInputs,
297
312
  signal?: AbortSignal,
298
- _epochNumber?: number,
313
+ epochNumber?: number,
299
314
  ): Promise<ProofAndVerificationKey<typeof TUBE_PROOF_LENGTH>> {
300
315
  return this.enqueueAndWaitForJob(
301
316
  this.generateId(ProvingRequestType.TUBE_PROOF, tubeInput),
302
317
  ProvingRequestType.TUBE_PROOF,
303
318
  tubeInput,
319
+ epochNumber,
304
320
  signal,
305
321
  );
306
322
  }
@@ -8,7 +8,7 @@ import {
8
8
  ProvingRequestType,
9
9
  type ServerCircuitProver,
10
10
  } from '@aztec/circuit-types';
11
- import { createDebugLogger } from '@aztec/foundation/log';
11
+ import { createLogger } from '@aztec/foundation/log';
12
12
  import { RunningPromise } from '@aztec/foundation/running-promise';
13
13
  import { Timer } from '@aztec/foundation/timer';
14
14
  import { type TelemetryClient } from '@aztec/telemetry-client';
@@ -39,7 +39,7 @@ export class ProvingAgent {
39
39
  private proofAllowList: Array<ProvingRequestType> = [],
40
40
  /** How long to wait between jobs */
41
41
  private pollIntervalMs = 1000,
42
- private log = createDebugLogger('aztec:prover-client:proving-agent'),
42
+ private log = createLogger('prover-client:proving-agent'),
43
43
  ) {
44
44
  this.instrumentation = new ProvingAgentInstrumentation(client);
45
45
  this.runningPromise = new RunningPromise(this.safeWork, this.pollIntervalMs);
@@ -9,7 +9,8 @@ import {
9
9
  type ProvingJobStatus,
10
10
  ProvingRequestType,
11
11
  } from '@aztec/circuit-types';
12
- import { createDebugLogger } from '@aztec/foundation/log';
12
+ import { asyncPool } from '@aztec/foundation/async-pool';
13
+ import { createLogger } from '@aztec/foundation/log';
13
14
  import { type PromiseWithResolvers, RunningPromise, promiseWithResolvers } from '@aztec/foundation/promise';
14
15
  import { PriorityMemoryQueue } from '@aztec/foundation/queue';
15
16
  import { Timer } from '@aztec/foundation/timer';
@@ -30,29 +31,33 @@ type ProofRequestBrokerConfig = {
30
31
  timeoutIntervalMs?: number;
31
32
  jobTimeoutMs?: number;
32
33
  maxRetries?: number;
34
+ maxEpochsToKeepResultsFor?: number;
35
+ maxParallelCleanUps?: number;
33
36
  };
34
37
 
38
+ type EnqueuedProvingJob = Pick<ProvingJob, 'id' | 'epochNumber'>;
39
+
35
40
  /**
36
41
  * A broker that manages proof requests and distributes them to workers based on their priority.
37
42
  * It takes a backend that is responsible for storing and retrieving proof requests and results.
38
43
  */
39
44
  export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
40
45
  private queues: ProvingQueues = {
41
- [ProvingRequestType.PUBLIC_VM]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
42
- [ProvingRequestType.TUBE_PROOF]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
43
- [ProvingRequestType.PRIVATE_KERNEL_EMPTY]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
46
+ [ProvingRequestType.PUBLIC_VM]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
47
+ [ProvingRequestType.TUBE_PROOF]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
48
+ [ProvingRequestType.PRIVATE_KERNEL_EMPTY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
44
49
 
45
- [ProvingRequestType.PRIVATE_BASE_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
46
- [ProvingRequestType.PUBLIC_BASE_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
47
- [ProvingRequestType.MERGE_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
48
- [ProvingRequestType.ROOT_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
50
+ [ProvingRequestType.PRIVATE_BASE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
51
+ [ProvingRequestType.PUBLIC_BASE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
52
+ [ProvingRequestType.MERGE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
53
+ [ProvingRequestType.ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
49
54
 
50
- [ProvingRequestType.BLOCK_MERGE_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
51
- [ProvingRequestType.BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
52
- [ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
55
+ [ProvingRequestType.BLOCK_MERGE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
56
+ [ProvingRequestType.BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
57
+ [ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
53
58
 
54
- [ProvingRequestType.BASE_PARITY]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
55
- [ProvingRequestType.ROOT_PARITY]: new PriorityMemoryQueue<ProvingJob>(provingJobComparator),
59
+ [ProvingRequestType.BASE_PARITY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
60
+ [ProvingRequestType.ROOT_PARITY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
56
61
  };
57
62
 
58
63
  // holds a copy of the database in memory in order to quickly fulfill requests
@@ -76,23 +81,46 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
76
81
  // a map of promises that will be resolved when a job is settled
77
82
  private promises = new Map<ProvingJobId, PromiseWithResolvers<ProvingJobSettledResult>>();
78
83
 
79
- private timeoutPromise: RunningPromise;
80
- private timeSource = () => Math.floor(Date.now() / 1000);
84
+ private cleanupPromise: RunningPromise;
85
+ private msTimeSource = () => Date.now();
81
86
  private jobTimeoutMs: number;
82
87
  private maxRetries: number;
83
88
 
84
89
  private instrumentation: ProvingBrokerInstrumentation;
85
90
 
91
+ private maxParallelCleanUps: number;
92
+
93
+ /**
94
+ * The broker keeps track of the highest epoch its seen.
95
+ * This information is used for garbage collection: once it reaches the next epoch, it can start pruning the database of old state.
96
+ * This clean up pass is only done against _settled_ jobs. This pass will not cancel jobs that are in-progress or in-queue.
97
+ * It is a client responsibility to cancel jobs if they are no longer necessary.
98
+ * Example:
99
+ * proving epoch 11 - the broker will wipe all setlled jobs for epochs 9 and lower
100
+ * finished proving epoch 11 and got first job for epoch 12 -> the broker will wipe all setlled jobs for epochs 10 and lower
101
+ * reorged back to end of epoch 10 -> epoch 11 is skipped and epoch 12 starts -> the broker will wipe all setlled jobs for epochs 10 and lower
102
+ */
103
+ private epochHeight = 0;
104
+ private maxEpochsToKeepResultsFor = 1;
105
+
86
106
  public constructor(
87
107
  private database: ProvingBrokerDatabase,
88
108
  client: TelemetryClient,
89
- { jobTimeoutMs = 30_000, timeoutIntervalMs = 10_000, maxRetries = 3 }: ProofRequestBrokerConfig = {},
90
- private logger = createDebugLogger('aztec:prover-client:proving-broker'),
109
+ {
110
+ jobTimeoutMs = 30_000,
111
+ timeoutIntervalMs = 10_000,
112
+ maxRetries = 3,
113
+ maxEpochsToKeepResultsFor = 1,
114
+ maxParallelCleanUps = 20,
115
+ }: ProofRequestBrokerConfig = {},
116
+ private logger = createLogger('prover-client:proving-broker'),
91
117
  ) {
92
118
  this.instrumentation = new ProvingBrokerInstrumentation(client);
93
- this.timeoutPromise = new RunningPromise(this.timeoutCheck, timeoutIntervalMs);
119
+ this.cleanupPromise = new RunningPromise(this.cleanupPass, timeoutIntervalMs);
94
120
  this.jobTimeoutMs = jobTimeoutMs;
95
121
  this.maxRetries = maxRetries;
122
+ this.maxEpochsToKeepResultsFor = maxEpochsToKeepResultsFor;
123
+ this.maxParallelCleanUps = maxParallelCleanUps;
96
124
  }
97
125
 
98
126
  private measureQueueDepth: MonitorCallback = (type: ProvingRequestType) => {
@@ -127,7 +155,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
127
155
  }
128
156
  }
129
157
 
130
- this.timeoutPromise.start();
158
+ this.cleanupPromise.start();
131
159
 
132
160
  this.instrumentation.monitorQueueDepth(this.measureQueueDepth);
133
161
  this.instrumentation.monitorActiveJobs(this.countActiveJobs);
@@ -135,8 +163,8 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
135
163
  return Promise.resolve();
136
164
  }
137
165
 
138
- public stop(): Promise<void> {
139
- return this.timeoutPromise.stop();
166
+ public async stop(): Promise<void> {
167
+ await this.cleanupPromise.stop();
140
168
  }
141
169
 
142
170
  public async enqueueProvingJob(job: ProvingJob): Promise<void> {
@@ -159,15 +187,22 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
159
187
  return promiseWithResolvers.promise;
160
188
  }
161
189
 
162
- public async removeAndCancelProvingJob(id: ProvingJobId): Promise<void> {
163
- this.logger.info(`Cancelling job id=${id}`);
164
- await this.database.deleteProvingJobAndResult(id);
165
-
190
+ public async cancelProvingJob(id: ProvingJobId): Promise<void> {
166
191
  // notify listeners of the cancellation
167
192
  if (!this.resultsCache.has(id)) {
168
- this.promises.get(id)?.resolve({ status: 'rejected', reason: 'Aborted' });
193
+ this.logger.info(`Cancelling job id=${id}`);
194
+ await this.reportProvingJobError(id, 'Aborted', false);
195
+ }
196
+ }
197
+
198
+ public async cleanUpProvingJobState(id: ProvingJobId): Promise<void> {
199
+ if (!this.resultsCache.has(id)) {
200
+ this.logger.warn(`Can't cleanup busy proving job: id=${id}`);
201
+ return;
169
202
  }
170
203
 
204
+ this.logger.debug(`Cleaning up state for job id=${id}`);
205
+ await this.database.deleteProvingJobAndResult(id);
171
206
  this.jobsCache.delete(id);
172
207
  this.promises.delete(id);
173
208
  this.resultsCache.delete(id);
@@ -204,14 +239,15 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
204
239
 
205
240
  for (const proofType of allowedProofs) {
206
241
  const queue = this.queues[proofType];
207
- let job: ProvingJob | undefined;
242
+ let enqueuedJob: EnqueuedProvingJob | undefined;
208
243
  // exhaust the queue and make sure we're not sending a job that's already in progress
209
244
  // or has already been completed
210
245
  // this can happen if the broker crashes and restarts
211
246
  // it's possible agents will report progress or results for jobs that are in the queue (after the restart)
212
- while ((job = queue.getImmediate())) {
213
- if (!this.inProgress.has(job.id) && !this.resultsCache.has(job.id)) {
214
- const time = this.timeSource();
247
+ while ((enqueuedJob = queue.getImmediate())) {
248
+ const job = this.jobsCache.get(enqueuedJob.id);
249
+ if (job && !this.inProgress.has(enqueuedJob.id) && !this.resultsCache.has(enqueuedJob.id)) {
250
+ const time = this.msTimeSource();
215
251
  this.inProgress.set(job.id, {
216
252
  id: job.id,
217
253
  startedAt: time,
@@ -246,7 +282,12 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
246
282
  this.inProgress.delete(id);
247
283
  }
248
284
 
249
- if (retry && retries + 1 < this.maxRetries) {
285
+ if (this.resultsCache.has(id)) {
286
+ this.logger.warn(`Proving job id=${id} already is already settled, ignoring error`);
287
+ return;
288
+ }
289
+
290
+ if (retry && retries + 1 < this.maxRetries && !this.isJobStale(item)) {
250
291
  this.logger.info(`Retrying proving job id=${id} type=${ProvingRequestType[item.type]} retry=${retries + 1}`);
251
292
  this.retries.set(id, retries + 1);
252
293
  this.enqueueJobInternal(item);
@@ -254,8 +295,10 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
254
295
  return;
255
296
  }
256
297
 
257
- this.logger.debug(
258
- `Marking proving job id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${retries + 1} as failed`,
298
+ this.logger.warn(
299
+ `Marking proving job as failed id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${
300
+ retries + 1
301
+ } err=${err}`,
259
302
  );
260
303
 
261
304
  await this.database.setProvingJobError(id, err);
@@ -265,8 +308,8 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
265
308
  this.promises.get(id)!.resolve(result);
266
309
  this.instrumentation.incRejectedJobs(item.type);
267
310
  if (info) {
268
- const duration = this.timeSource() - info.startedAt;
269
- this.instrumentation.recordJobDuration(item.type, duration * 1000);
311
+ const duration = this.msTimeSource() - info.startedAt;
312
+ this.instrumentation.recordJobDuration(item.type, duration);
270
313
  }
271
314
  }
272
315
 
@@ -281,8 +324,13 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
281
324
  return filter ? this.getProvingJob(filter) : Promise.resolve(undefined);
282
325
  }
283
326
 
327
+ if (this.resultsCache.has(id)) {
328
+ this.logger.warn(`Proving job id=${id} has already been completed`);
329
+ return filter ? this.getProvingJob(filter) : Promise.resolve(undefined);
330
+ }
331
+
284
332
  const metadata = this.inProgress.get(id);
285
- const now = this.timeSource();
333
+ const now = this.msTimeSource();
286
334
  if (!metadata) {
287
335
  this.logger.warn(
288
336
  `Proving job id=${id} type=${ProvingRequestType[job.type]} not found in the in-progress cache, adding it`,
@@ -293,7 +341,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
293
341
  this.inProgress.set(id, {
294
342
  id,
295
343
  startedAt,
296
- lastUpdatedAt: this.timeSource(),
344
+ lastUpdatedAt: this.msTimeSource(),
297
345
  });
298
346
  return Promise.resolve(undefined);
299
347
  } else if (startedAt <= metadata.startedAt) {
@@ -334,6 +382,11 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
334
382
  this.inProgress.delete(id);
335
383
  }
336
384
 
385
+ if (this.resultsCache.has(id)) {
386
+ this.logger.warn(`Proving job id=${id} already settled, ignoring result`);
387
+ return;
388
+ }
389
+
337
390
  this.logger.debug(
338
391
  `Proving job complete id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${retries + 1}`,
339
392
  );
@@ -346,7 +399,31 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
346
399
  this.instrumentation.incResolvedJobs(item.type);
347
400
  }
348
401
 
349
- private timeoutCheck = () => {
402
+ private cleanupPass = async () => {
403
+ await this.cleanupStaleJobs();
404
+ await this.reEnqueueExpiredJobs();
405
+ };
406
+
407
+ private async cleanupStaleJobs() {
408
+ const jobIds = Array.from(this.jobsCache.keys());
409
+ const jobsToClean: ProvingJobId[] = [];
410
+ for (const id of jobIds) {
411
+ const job = this.jobsCache.get(id)!;
412
+ const isComplete = this.resultsCache.has(id);
413
+ if (isComplete && this.isJobStale(job)) {
414
+ jobsToClean.push(id);
415
+ }
416
+ }
417
+
418
+ if (jobsToClean.length > 0) {
419
+ this.logger.info(`Cleaning up [${jobsToClean.join(',')}]`);
420
+ await asyncPool(this.maxParallelCleanUps, jobsToClean, async jobId => {
421
+ await this.cleanUpProvingJobState(jobId);
422
+ });
423
+ }
424
+ }
425
+
426
+ private async reEnqueueExpiredJobs() {
350
427
  const inProgressEntries = Array.from(this.inProgress.entries());
351
428
  for (const [id, metadata] of inProgressEntries) {
352
429
  const item = this.jobsCache.get(id);
@@ -356,28 +433,42 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
356
433
  continue;
357
434
  }
358
435
 
359
- const msSinceLastUpdate = (this.timeSource() - metadata.lastUpdatedAt) * 1000;
436
+ const now = this.msTimeSource();
437
+ const msSinceLastUpdate = now - metadata.lastUpdatedAt;
360
438
  if (msSinceLastUpdate >= this.jobTimeoutMs) {
361
- this.logger.warn(`Proving job id=${id} timed out. Adding it back to the queue.`);
362
- this.inProgress.delete(id);
363
- this.enqueueJobInternal(item);
364
- this.instrumentation.incTimedOutJobs(item.type);
439
+ if (this.isJobStale(item)) {
440
+ // the job has timed out and it's also old, just cancel and move on
441
+ await this.cancelProvingJob(item.id);
442
+ } else {
443
+ this.logger.warn(`Proving job id=${id} timed out. Adding it back to the queue.`);
444
+ this.inProgress.delete(id);
445
+ this.enqueueJobInternal(item);
446
+ this.instrumentation.incTimedOutJobs(item.type);
447
+ }
365
448
  }
366
449
  }
367
- };
450
+ }
368
451
 
369
452
  private enqueueJobInternal(job: ProvingJob): void {
370
453
  if (!this.promises.has(job.id)) {
371
454
  this.promises.set(job.id, promiseWithResolvers());
372
455
  }
373
- this.queues[job.type].put(job);
456
+ this.queues[job.type].put({
457
+ epochNumber: job.epochNumber,
458
+ id: job.id,
459
+ });
374
460
  this.enqueuedAt.set(job.id, new Timer());
461
+ this.epochHeight = Math.max(this.epochHeight, job.epochNumber);
375
462
  this.logger.debug(`Enqueued new proving job id=${job.id}`);
376
463
  }
464
+
465
+ private isJobStale(job: ProvingJob) {
466
+ return job.epochNumber < this.epochHeight - this.maxEpochsToKeepResultsFor;
467
+ }
377
468
  }
378
469
 
379
470
  type ProvingQueues = {
380
- [K in ProvingRequestType]: PriorityMemoryQueue<ProvingJob>;
471
+ [K in ProvingRequestType]: PriorityMemoryQueue<EnqueuedProvingJob>;
381
472
  };
382
473
 
383
474
  /**
@@ -386,12 +477,10 @@ type ProvingQueues = {
386
477
  * @param b - Another proving job
387
478
  * @returns A number indicating the relative priority of the two proving jobs
388
479
  */
389
- function provingJobComparator(a: ProvingJob, b: ProvingJob): -1 | 0 | 1 {
390
- const aBlockNumber = a.blockNumber ?? 0;
391
- const bBlockNumber = b.blockNumber ?? 0;
392
- if (aBlockNumber < bBlockNumber) {
480
+ function provingJobComparator(a: EnqueuedProvingJob, b: EnqueuedProvingJob): -1 | 0 | 1 {
481
+ if (a.epochNumber < b.epochNumber) {
393
482
  return -1;
394
- } else if (aBlockNumber > bBlockNumber) {
483
+ } else if (a.epochNumber > b.epochNumber) {
395
484
  return 1;
396
485
  } else {
397
486
  return 0;
@@ -1,7 +1,7 @@
1
1
  import { type ProofUri, ProvingJob, type ProvingJobId, ProvingJobSettledResult } from '@aztec/circuit-types';
2
2
  import { jsonParseWithSchema, jsonStringify } from '@aztec/foundation/json-rpc';
3
3
  import { type AztecKVStore, type AztecMap } from '@aztec/kv-store';
4
- import { LmdbMetrics, Metrics, type TelemetryClient } from '@aztec/telemetry-client';
4
+ import { Attributes, LmdbMetrics, type TelemetryClient } from '@aztec/telemetry-client';
5
5
 
6
6
  import { type ProvingBrokerDatabase } from '../proving_broker_database.js';
7
7
 
@@ -14,14 +14,8 @@ export class KVBrokerDatabase implements ProvingBrokerDatabase {
14
14
  this.metrics = new LmdbMetrics(
15
15
  client.getMeter('KVBrokerDatabase'),
16
16
  {
17
- name: Metrics.PROVING_QUEUE_DB_MAP_SIZE,
18
- description: 'Database map size for the proving broker',
17
+ [Attributes.DB_DATA_TYPE]: 'prover-broker',
19
18
  },
20
- {
21
- name: Metrics.PROVING_QUEUE_DB_USED_SIZE,
22
- description: 'Database used size for the proving broker',
23
- },
24
- { name: Metrics.PROVING_QUEUE_DB_NUM_ITEMS, description: 'Number of items in the broker database' },
25
19
  () => store.estimateSize(),
26
20
  );
27
21
  this.jobs = store.openMap('proving_jobs');
@@ -9,7 +9,6 @@ import {
9
9
  type TelemetryClient,
10
10
  type UpDownCounter,
11
11
  ValueType,
12
- millisecondBuckets,
13
12
  } from '@aztec/telemetry-client';
14
13
 
15
14
  export type MonitorCallback = (proofType: ProvingRequestType) => number;
@@ -55,18 +54,12 @@ export class ProvingBrokerInstrumentation {
55
54
  description: 'Records how long a job sits in the queue',
56
55
  unit: 'ms',
57
56
  valueType: ValueType.INT,
58
- advice: {
59
- explicitBucketBoundaries: millisecondBuckets(1), // 10ms -> ~327s
60
- },
61
57
  });
62
58
 
63
59
  this.jobDuration = meter.createHistogram(Metrics.PROVING_QUEUE_JOB_DURATION, {
64
60
  description: 'Records how long a job takes to complete',
65
61
  unit: 'ms',
66
62
  valueType: ValueType.INT,
67
- advice: {
68
- explicitBucketBoundaries: millisecondBuckets(1), // 10ms -> ~327s
69
- },
70
63
  });
71
64
  }
72
65
 
@@ -28,7 +28,8 @@ const GetProvingJobResponse = z.object({
28
28
  export const ProvingJobProducerSchema: ApiSchemaFor<ProvingJobProducer> = {
29
29
  enqueueProvingJob: z.function().args(ProvingJob).returns(z.void()),
30
30
  getProvingJobStatus: z.function().args(ProvingJobId).returns(ProvingJobStatus),
31
- removeAndCancelProvingJob: z.function().args(ProvingJobId).returns(z.void()),
31
+ cleanUpProvingJobState: z.function().args(ProvingJobId).returns(z.void()),
32
+ cancelProvingJob: z.function().args(ProvingJobId).returns(z.void()),
32
33
  waitForJobToSettle: z.function().args(ProvingJobId).returns(ProvingJobSettledResult),
33
34
  };
34
35
 
@@ -82,8 +82,11 @@ export class TestBroker implements ProvingJobProducer {
82
82
  getProvingJobStatus(id: ProvingJobId): Promise<ProvingJobStatus> {
83
83
  return this.broker.getProvingJobStatus(id);
84
84
  }
85
- removeAndCancelProvingJob(id: ProvingJobId): Promise<void> {
86
- return this.broker.removeAndCancelProvingJob(id);
85
+ cleanUpProvingJobState(id: ProvingJobId): Promise<void> {
86
+ return this.broker.cleanUpProvingJobState(id);
87
+ }
88
+ cancelProvingJob(id: string): Promise<void> {
89
+ return this.broker.cancelProvingJob(id);
87
90
  }
88
91
  waitForJobToSettle(id: ProvingJobId): Promise<ProvingJobSettledResult> {
89
92
  return this.broker.waitForJobToSettle(id);