@aztec/prover-client 0.66.0 → 0.67.1-devnet
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dest/block_builder/light.d.ts +4 -3
- package/dest/block_builder/light.d.ts.map +1 -1
- package/dest/block_builder/light.js +30 -20
- package/dest/index.d.ts +0 -1
- package/dest/index.d.ts.map +1 -1
- package/dest/index.js +1 -2
- package/dest/mocks/fixtures.d.ts +3 -3
- package/dest/mocks/fixtures.d.ts.map +1 -1
- package/dest/mocks/fixtures.js +2 -2
- package/dest/mocks/test_context.d.ts +10 -9
- package/dest/mocks/test_context.d.ts.map +1 -1
- package/dest/mocks/test_context.js +24 -13
- package/dest/orchestrator/block-building-helpers.d.ts +10 -6
- package/dest/orchestrator/block-building-helpers.d.ts.map +1 -1
- package/dest/orchestrator/block-building-helpers.js +27 -16
- package/dest/orchestrator/block-proving-state.d.ts +6 -5
- package/dest/orchestrator/block-proving-state.d.ts.map +1 -1
- package/dest/orchestrator/block-proving-state.js +16 -8
- package/dest/orchestrator/epoch-proving-state.d.ts +1 -1
- package/dest/orchestrator/epoch-proving-state.d.ts.map +1 -1
- package/dest/orchestrator/epoch-proving-state.js +3 -3
- package/dest/orchestrator/orchestrator.d.ts +11 -8
- package/dest/orchestrator/orchestrator.d.ts.map +1 -1
- package/dest/orchestrator/orchestrator.js +94 -58
- package/dest/orchestrator/orchestrator_metrics.d.ts.map +1 -1
- package/dest/orchestrator/orchestrator_metrics.js +2 -5
- package/dest/prover-agent/memory-proving-queue.d.ts +2 -1
- package/dest/prover-agent/memory-proving-queue.d.ts.map +1 -1
- package/dest/prover-agent/memory-proving-queue.js +241 -224
- package/dest/prover-agent/prover-agent.d.ts +11 -2
- package/dest/prover-agent/prover-agent.d.ts.map +1 -1
- package/dest/prover-agent/prover-agent.js +187 -160
- package/dest/prover-client/prover-client.d.ts +2 -3
- package/dest/prover-client/prover-client.d.ts.map +1 -1
- package/dest/prover-client/prover-client.js +6 -9
- package/dest/proving_broker/broker_prover_facade.d.ts +26 -0
- package/dest/proving_broker/broker_prover_facade.d.ts.map +1 -0
- package/dest/proving_broker/broker_prover_facade.js +107 -0
- package/dest/proving_broker/proving_agent.d.ts +4 -3
- package/dest/proving_broker/proving_agent.d.ts.map +1 -1
- package/dest/proving_broker/proving_agent.js +74 -65
- package/dest/proving_broker/proving_broker.d.ts +27 -7
- package/dest/proving_broker/proving_broker.d.ts.map +1 -1
- package/dest/proving_broker/proving_broker.js +405 -258
- package/dest/proving_broker/proving_broker_database/persisted.d.ts.map +1 -1
- package/dest/proving_broker/proving_broker_database/persisted.js +4 -8
- package/dest/proving_broker/proving_broker_instrumentation.d.ts.map +1 -1
- package/dest/proving_broker/proving_broker_instrumentation.js +2 -8
- package/dest/proving_broker/proving_job_controller.d.ts +2 -1
- package/dest/proving_broker/proving_job_controller.d.ts.map +1 -1
- package/dest/proving_broker/proving_job_controller.js +15 -14
- package/dest/proving_broker/rpc.js +2 -2
- package/dest/test/mock_prover.d.ts +6 -6
- package/dest/test/mock_prover.d.ts.map +1 -1
- package/dest/test/mock_prover.js +5 -5
- package/package.json +18 -13
- package/src/block_builder/light.ts +31 -22
- package/src/index.ts +0 -1
- package/src/mocks/fixtures.ts +4 -4
- package/src/mocks/test_context.ts +39 -24
- package/src/orchestrator/block-building-helpers.ts +33 -20
- package/src/orchestrator/block-proving-state.ts +17 -6
- package/src/orchestrator/epoch-proving-state.ts +0 -2
- package/src/orchestrator/orchestrator.ts +111 -62
- package/src/orchestrator/orchestrator_metrics.ts +1 -11
- package/src/prover-agent/memory-proving-queue.ts +12 -7
- package/src/prover-agent/prover-agent.ts +67 -48
- package/src/prover-client/prover-client.ts +5 -12
- package/src/proving_broker/{caching_broker_facade.ts → broker_prover_facade.ts} +62 -85
- package/src/proving_broker/proving_agent.ts +74 -78
- package/src/proving_broker/proving_broker.ts +240 -73
- package/src/proving_broker/proving_broker_database/persisted.ts +2 -8
- package/src/proving_broker/proving_broker_instrumentation.ts +0 -7
- package/src/proving_broker/proving_job_controller.ts +13 -12
- package/src/proving_broker/rpc.ts +1 -1
- package/src/test/mock_prover.ts +7 -3
- package/dest/proving_broker/caching_broker_facade.d.ts +0 -30
- package/dest/proving_broker/caching_broker_facade.d.ts.map +0 -1
- package/dest/proving_broker/caching_broker_facade.js +0 -150
- package/dest/proving_broker/prover_cache/memory.d.ts +0 -9
- package/dest/proving_broker/prover_cache/memory.d.ts.map +0 -1
- package/dest/proving_broker/prover_cache/memory.js +0 -16
- package/src/proving_broker/prover_cache/memory.ts +0 -20
|
@@ -9,11 +9,12 @@ import {
|
|
|
9
9
|
type ProvingJobStatus,
|
|
10
10
|
ProvingRequestType,
|
|
11
11
|
} from '@aztec/circuit-types';
|
|
12
|
-
import {
|
|
12
|
+
import { asyncPool } from '@aztec/foundation/async-pool';
|
|
13
|
+
import { createLogger } from '@aztec/foundation/log';
|
|
13
14
|
import { type PromiseWithResolvers, RunningPromise, promiseWithResolvers } from '@aztec/foundation/promise';
|
|
14
15
|
import { PriorityMemoryQueue } from '@aztec/foundation/queue';
|
|
15
16
|
import { Timer } from '@aztec/foundation/timer';
|
|
16
|
-
import { type TelemetryClient } from '@aztec/telemetry-client';
|
|
17
|
+
import { type TelemetryClient, type Traceable, type Tracer, trackSpan } from '@aztec/telemetry-client';
|
|
17
18
|
|
|
18
19
|
import assert from 'assert';
|
|
19
20
|
|
|
@@ -30,29 +31,33 @@ type ProofRequestBrokerConfig = {
|
|
|
30
31
|
timeoutIntervalMs?: number;
|
|
31
32
|
jobTimeoutMs?: number;
|
|
32
33
|
maxRetries?: number;
|
|
34
|
+
maxEpochsToKeepResultsFor?: number;
|
|
35
|
+
maxParallelCleanUps?: number;
|
|
33
36
|
};
|
|
34
37
|
|
|
38
|
+
type EnqueuedProvingJob = Pick<ProvingJob, 'id' | 'epochNumber'>;
|
|
39
|
+
|
|
35
40
|
/**
|
|
36
41
|
* A broker that manages proof requests and distributes them to workers based on their priority.
|
|
37
42
|
* It takes a backend that is responsible for storing and retrieving proof requests and results.
|
|
38
43
|
*/
|
|
39
|
-
export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
44
|
+
export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer, Traceable {
|
|
40
45
|
private queues: ProvingQueues = {
|
|
41
|
-
[ProvingRequestType.PUBLIC_VM]: new PriorityMemoryQueue<
|
|
42
|
-
[ProvingRequestType.TUBE_PROOF]: new PriorityMemoryQueue<
|
|
43
|
-
[ProvingRequestType.PRIVATE_KERNEL_EMPTY]: new PriorityMemoryQueue<
|
|
46
|
+
[ProvingRequestType.PUBLIC_VM]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
47
|
+
[ProvingRequestType.TUBE_PROOF]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
48
|
+
[ProvingRequestType.PRIVATE_KERNEL_EMPTY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
44
49
|
|
|
45
|
-
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: new PriorityMemoryQueue<
|
|
46
|
-
[ProvingRequestType.PUBLIC_BASE_ROLLUP]: new PriorityMemoryQueue<
|
|
47
|
-
[ProvingRequestType.MERGE_ROLLUP]: new PriorityMemoryQueue<
|
|
48
|
-
[ProvingRequestType.ROOT_ROLLUP]: new PriorityMemoryQueue<
|
|
50
|
+
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
51
|
+
[ProvingRequestType.PUBLIC_BASE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
52
|
+
[ProvingRequestType.MERGE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
53
|
+
[ProvingRequestType.ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
49
54
|
|
|
50
|
-
[ProvingRequestType.BLOCK_MERGE_ROLLUP]: new PriorityMemoryQueue<
|
|
51
|
-
[ProvingRequestType.BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<
|
|
52
|
-
[ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<
|
|
55
|
+
[ProvingRequestType.BLOCK_MERGE_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
56
|
+
[ProvingRequestType.BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
57
|
+
[ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
53
58
|
|
|
54
|
-
[ProvingRequestType.BASE_PARITY]: new PriorityMemoryQueue<
|
|
55
|
-
[ProvingRequestType.ROOT_PARITY]: new PriorityMemoryQueue<
|
|
59
|
+
[ProvingRequestType.BASE_PARITY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
60
|
+
[ProvingRequestType.ROOT_PARITY]: new PriorityMemoryQueue<EnqueuedProvingJob>(provingJobComparator),
|
|
56
61
|
};
|
|
57
62
|
|
|
58
63
|
// holds a copy of the database in memory in order to quickly fulfill requests
|
|
@@ -76,23 +81,48 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
76
81
|
// a map of promises that will be resolved when a job is settled
|
|
77
82
|
private promises = new Map<ProvingJobId, PromiseWithResolvers<ProvingJobSettledResult>>();
|
|
78
83
|
|
|
79
|
-
private
|
|
80
|
-
private
|
|
84
|
+
private cleanupPromise: RunningPromise;
|
|
85
|
+
private msTimeSource = () => Date.now();
|
|
81
86
|
private jobTimeoutMs: number;
|
|
82
87
|
private maxRetries: number;
|
|
83
88
|
|
|
84
89
|
private instrumentation: ProvingBrokerInstrumentation;
|
|
90
|
+
public readonly tracer: Tracer;
|
|
91
|
+
|
|
92
|
+
private maxParallelCleanUps: number;
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* The broker keeps track of the highest epoch its seen.
|
|
96
|
+
* This information is used for garbage collection: once it reaches the next epoch, it can start pruning the database of old state.
|
|
97
|
+
* This clean up pass is only done against _settled_ jobs. This pass will not cancel jobs that are in-progress or in-queue.
|
|
98
|
+
* It is a client responsibility to cancel jobs if they are no longer necessary.
|
|
99
|
+
* Example:
|
|
100
|
+
* proving epoch 11 - the broker will wipe all setlled jobs for epochs 9 and lower
|
|
101
|
+
* finished proving epoch 11 and got first job for epoch 12 -> the broker will wipe all setlled jobs for epochs 10 and lower
|
|
102
|
+
* reorged back to end of epoch 10 -> epoch 11 is skipped and epoch 12 starts -> the broker will wipe all setlled jobs for epochs 10 and lower
|
|
103
|
+
*/
|
|
104
|
+
private epochHeight = 0;
|
|
105
|
+
private maxEpochsToKeepResultsFor = 1;
|
|
85
106
|
|
|
86
107
|
public constructor(
|
|
87
108
|
private database: ProvingBrokerDatabase,
|
|
88
109
|
client: TelemetryClient,
|
|
89
|
-
{
|
|
90
|
-
|
|
110
|
+
{
|
|
111
|
+
jobTimeoutMs = 30_000,
|
|
112
|
+
timeoutIntervalMs = 10_000,
|
|
113
|
+
maxRetries = 3,
|
|
114
|
+
maxEpochsToKeepResultsFor = 1,
|
|
115
|
+
maxParallelCleanUps = 20,
|
|
116
|
+
}: ProofRequestBrokerConfig = {},
|
|
117
|
+
private logger = createLogger('prover-client:proving-broker'),
|
|
91
118
|
) {
|
|
119
|
+
this.tracer = client.getTracer('ProvingBroker');
|
|
92
120
|
this.instrumentation = new ProvingBrokerInstrumentation(client);
|
|
93
|
-
this.
|
|
121
|
+
this.cleanupPromise = new RunningPromise(this.cleanupPass.bind(this), this.logger, timeoutIntervalMs);
|
|
94
122
|
this.jobTimeoutMs = jobTimeoutMs;
|
|
95
123
|
this.maxRetries = maxRetries;
|
|
124
|
+
this.maxEpochsToKeepResultsFor = maxEpochsToKeepResultsFor;
|
|
125
|
+
this.maxParallelCleanUps = maxParallelCleanUps;
|
|
96
126
|
}
|
|
97
127
|
|
|
98
128
|
private measureQueueDepth: MonitorCallback = (type: ProvingRequestType) => {
|
|
@@ -113,7 +143,10 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
113
143
|
|
|
114
144
|
public start(): Promise<void> {
|
|
115
145
|
for (const [item, result] of this.database.allProvingJobs()) {
|
|
116
|
-
this.logger.info(`Restoring proving job id=${item.id} settled=${!!result}
|
|
146
|
+
this.logger.info(`Restoring proving job id=${item.id} settled=${!!result}`, {
|
|
147
|
+
provingJobId: item.id,
|
|
148
|
+
status: result ? result.status : 'pending',
|
|
149
|
+
});
|
|
117
150
|
|
|
118
151
|
this.jobsCache.set(item.id, item);
|
|
119
152
|
this.promises.set(item.id, promiseWithResolvers());
|
|
@@ -122,12 +155,11 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
122
155
|
this.promises.get(item.id)!.resolve(result);
|
|
123
156
|
this.resultsCache.set(item.id, result);
|
|
124
157
|
} else {
|
|
125
|
-
this.logger.debug(`Re-enqueuing proving job id=${item.id}`);
|
|
126
158
|
this.enqueueJobInternal(item);
|
|
127
159
|
}
|
|
128
160
|
}
|
|
129
161
|
|
|
130
|
-
this.
|
|
162
|
+
this.cleanupPromise.start();
|
|
131
163
|
|
|
132
164
|
this.instrumentation.monitorQueueDepth(this.measureQueueDepth);
|
|
133
165
|
this.instrumentation.monitorActiveJobs(this.countActiveJobs);
|
|
@@ -135,39 +167,75 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
135
167
|
return Promise.resolve();
|
|
136
168
|
}
|
|
137
169
|
|
|
138
|
-
public stop(): Promise<void> {
|
|
139
|
-
|
|
170
|
+
public async stop(): Promise<void> {
|
|
171
|
+
await this.cleanupPromise.stop();
|
|
140
172
|
}
|
|
141
173
|
|
|
142
174
|
public async enqueueProvingJob(job: ProvingJob): Promise<void> {
|
|
143
175
|
if (this.jobsCache.has(job.id)) {
|
|
144
176
|
const existing = this.jobsCache.get(job.id);
|
|
145
177
|
assert.deepStrictEqual(job, existing, 'Duplicate proving job ID');
|
|
178
|
+
this.logger.debug(`Duplicate proving job id=${job.id} epochNumber=${job.epochNumber}. Ignoring`, {
|
|
179
|
+
provingJobId: job.id,
|
|
180
|
+
});
|
|
146
181
|
return;
|
|
147
182
|
}
|
|
148
183
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
184
|
+
if (this.isJobStale(job)) {
|
|
185
|
+
this.logger.warn(`Tried enqueueing stale proving job id=${job.id} epochNumber=${job.epochNumber}`, {
|
|
186
|
+
provingJobId: job.id,
|
|
187
|
+
});
|
|
188
|
+
throw new Error(`Epoch too old: job epoch ${job.epochNumber}, current epoch: ${this.epochHeight}`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
this.logger.info(`New proving job id=${job.id} epochNumber=${job.epochNumber}`, { provingJobId: job.id });
|
|
192
|
+
try {
|
|
193
|
+
// do this first so it acts as a "lock". If this job is enqueued again while we're saving it the if at the top will catch it.
|
|
194
|
+
this.jobsCache.set(job.id, job);
|
|
195
|
+
await this.database.addProvingJob(job);
|
|
196
|
+
this.enqueueJobInternal(job);
|
|
197
|
+
} catch (err) {
|
|
198
|
+
this.logger.error(`Failed to save proving job id=${job.id}: ${err}`, err, { provingJobId: job.id });
|
|
199
|
+
this.jobsCache.delete(job.id);
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
152
202
|
}
|
|
153
203
|
|
|
154
204
|
public waitForJobToSettle(id: ProvingJobId): Promise<ProvingJobSettledResult> {
|
|
155
205
|
const promiseWithResolvers = this.promises.get(id);
|
|
156
206
|
if (!promiseWithResolvers) {
|
|
207
|
+
this.logger.warn(`Job id=${id} not found`, { provingJobId: id });
|
|
157
208
|
return Promise.resolve({ status: 'rejected', reason: `Job ${id} not found` });
|
|
158
209
|
}
|
|
159
210
|
return promiseWithResolvers.promise;
|
|
160
211
|
}
|
|
161
212
|
|
|
162
|
-
public async
|
|
163
|
-
this.
|
|
164
|
-
|
|
213
|
+
public async cancelProvingJob(id: ProvingJobId): Promise<void> {
|
|
214
|
+
if (!this.jobsCache.has(id)) {
|
|
215
|
+
this.logger.warn(`Can't cancel a job that doesn't exist id=${id}`, { provingJobId: id });
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
165
218
|
|
|
166
219
|
// notify listeners of the cancellation
|
|
167
220
|
if (!this.resultsCache.has(id)) {
|
|
168
|
-
this.
|
|
221
|
+
this.logger.info(`Cancelling job id=${id}`, { provingJobId: id });
|
|
222
|
+
await this.reportProvingJobError(id, 'Aborted', false);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
private async cleanUpProvingJobState(id: ProvingJobId): Promise<void> {
|
|
227
|
+
if (!this.jobsCache.has(id)) {
|
|
228
|
+
this.logger.warn(`Can't clean up a job that doesn't exist id=${id}`, { provingJobId: id });
|
|
229
|
+
return;
|
|
169
230
|
}
|
|
170
231
|
|
|
232
|
+
if (!this.resultsCache.has(id)) {
|
|
233
|
+
this.logger.warn(`Can't cleanup busy proving job: id=${id}`, { provingJobId: id });
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
this.logger.debug(`Cleaning up state for job id=${id}`, { provingJobId: id });
|
|
238
|
+
await this.database.deleteProvingJobAndResult(id);
|
|
171
239
|
this.jobsCache.delete(id);
|
|
172
240
|
this.promises.delete(id);
|
|
173
241
|
this.resultsCache.delete(id);
|
|
@@ -184,7 +252,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
184
252
|
const item = this.jobsCache.get(id);
|
|
185
253
|
|
|
186
254
|
if (!item) {
|
|
187
|
-
this.logger.warn(`Proving job id=${id} not found
|
|
255
|
+
this.logger.warn(`Proving job id=${id} not found`, { provingJobId: id });
|
|
188
256
|
return Promise.resolve({ status: 'not-found' });
|
|
189
257
|
}
|
|
190
258
|
|
|
@@ -204,14 +272,15 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
204
272
|
|
|
205
273
|
for (const proofType of allowedProofs) {
|
|
206
274
|
const queue = this.queues[proofType];
|
|
207
|
-
let
|
|
275
|
+
let enqueuedJob: EnqueuedProvingJob | undefined;
|
|
208
276
|
// exhaust the queue and make sure we're not sending a job that's already in progress
|
|
209
277
|
// or has already been completed
|
|
210
278
|
// this can happen if the broker crashes and restarts
|
|
211
279
|
// it's possible agents will report progress or results for jobs that are in the queue (after the restart)
|
|
212
|
-
while ((
|
|
213
|
-
|
|
214
|
-
|
|
280
|
+
while ((enqueuedJob = queue.getImmediate())) {
|
|
281
|
+
const job = this.jobsCache.get(enqueuedJob.id);
|
|
282
|
+
if (job && !this.inProgress.has(enqueuedJob.id) && !this.resultsCache.has(enqueuedJob.id)) {
|
|
283
|
+
const time = this.msTimeSource();
|
|
215
284
|
this.inProgress.set(job.id, {
|
|
216
285
|
id: job.id,
|
|
217
286
|
startedAt: time,
|
|
@@ -236,37 +305,67 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
236
305
|
const retries = this.retries.get(id) ?? 0;
|
|
237
306
|
|
|
238
307
|
if (!item) {
|
|
239
|
-
this.logger.warn(`
|
|
308
|
+
this.logger.warn(`Can't set error on unknown proving job id=${id} err=${err}`, { provingJoId: id });
|
|
240
309
|
return;
|
|
241
310
|
}
|
|
242
311
|
|
|
243
312
|
if (!info) {
|
|
244
|
-
this.logger.warn(`Proving job id=${id} type=${ProvingRequestType[item.type]} not in the in-progress set
|
|
313
|
+
this.logger.warn(`Proving job id=${id} type=${ProvingRequestType[item.type]} not in the in-progress set`, {
|
|
314
|
+
provingJobId: id,
|
|
315
|
+
});
|
|
245
316
|
} else {
|
|
246
317
|
this.inProgress.delete(id);
|
|
247
318
|
}
|
|
248
319
|
|
|
249
|
-
if (
|
|
250
|
-
this.logger.
|
|
320
|
+
if (this.resultsCache.has(id)) {
|
|
321
|
+
this.logger.warn(`Proving job id=${id} is already settled, ignoring err=${err}`, {
|
|
322
|
+
provingJobId: id,
|
|
323
|
+
});
|
|
324
|
+
return;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (retry && retries + 1 < this.maxRetries && !this.isJobStale(item)) {
|
|
328
|
+
this.logger.info(
|
|
329
|
+
`Retrying proving job id=${id} type=${ProvingRequestType[item.type]} retry=${retries + 1} err=${err}`,
|
|
330
|
+
{
|
|
331
|
+
provingJobId: id,
|
|
332
|
+
},
|
|
333
|
+
);
|
|
251
334
|
this.retries.set(id, retries + 1);
|
|
252
335
|
this.enqueueJobInternal(item);
|
|
253
336
|
this.instrumentation.incRetriedJobs(item.type);
|
|
254
337
|
return;
|
|
255
338
|
}
|
|
256
339
|
|
|
257
|
-
this.logger.
|
|
258
|
-
`Marking proving job id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${
|
|
340
|
+
this.logger.info(
|
|
341
|
+
`Marking proving job as failed id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${
|
|
342
|
+
retries + 1
|
|
343
|
+
} err=${err}`,
|
|
344
|
+
{
|
|
345
|
+
provingJobId: id,
|
|
346
|
+
},
|
|
259
347
|
);
|
|
260
348
|
|
|
261
|
-
|
|
262
|
-
|
|
349
|
+
// save the result to the cache and notify clients of the job status
|
|
350
|
+
// this should work even if our database breaks because the result is cached in memory
|
|
263
351
|
const result: ProvingJobSettledResult = { status: 'rejected', reason: String(err) };
|
|
264
352
|
this.resultsCache.set(id, result);
|
|
265
353
|
this.promises.get(id)!.resolve(result);
|
|
354
|
+
|
|
266
355
|
this.instrumentation.incRejectedJobs(item.type);
|
|
267
356
|
if (info) {
|
|
268
|
-
const duration = this.
|
|
269
|
-
this.instrumentation.recordJobDuration(item.type, duration
|
|
357
|
+
const duration = this.msTimeSource() - info.startedAt;
|
|
358
|
+
this.instrumentation.recordJobDuration(item.type, duration);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
try {
|
|
362
|
+
await this.database.setProvingJobError(id, err);
|
|
363
|
+
} catch (saveErr) {
|
|
364
|
+
this.logger.error(`Failed to save proving job error status id=${id} jobErr=${err}`, saveErr, {
|
|
365
|
+
provingJobId: id,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
throw saveErr;
|
|
270
369
|
}
|
|
271
370
|
}
|
|
272
371
|
|
|
@@ -277,15 +376,21 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
277
376
|
): Promise<{ job: ProvingJob; time: number } | undefined> {
|
|
278
377
|
const job = this.jobsCache.get(id);
|
|
279
378
|
if (!job) {
|
|
280
|
-
this.logger.warn(`Proving job id=${id} does not exist
|
|
379
|
+
this.logger.warn(`Proving job id=${id} does not exist`, { provingJobId: id });
|
|
380
|
+
return filter ? this.getProvingJob(filter) : Promise.resolve(undefined);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
if (this.resultsCache.has(id)) {
|
|
384
|
+
this.logger.warn(`Proving job id=${id} has already been completed`, { provingJobId: id });
|
|
281
385
|
return filter ? this.getProvingJob(filter) : Promise.resolve(undefined);
|
|
282
386
|
}
|
|
283
387
|
|
|
284
388
|
const metadata = this.inProgress.get(id);
|
|
285
|
-
const now = this.
|
|
389
|
+
const now = this.msTimeSource();
|
|
286
390
|
if (!metadata) {
|
|
287
391
|
this.logger.warn(
|
|
288
392
|
`Proving job id=${id} type=${ProvingRequestType[job.type]} not found in the in-progress cache, adding it`,
|
|
393
|
+
{ provingJobId: id },
|
|
289
394
|
);
|
|
290
395
|
// the queue will still contain the item at this point!
|
|
291
396
|
// we need to be careful when popping off the queue to make sure we're not sending
|
|
@@ -293,16 +398,17 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
293
398
|
this.inProgress.set(id, {
|
|
294
399
|
id,
|
|
295
400
|
startedAt,
|
|
296
|
-
lastUpdatedAt: this.
|
|
401
|
+
lastUpdatedAt: this.msTimeSource(),
|
|
297
402
|
});
|
|
298
403
|
return Promise.resolve(undefined);
|
|
299
404
|
} else if (startedAt <= metadata.startedAt) {
|
|
300
405
|
if (startedAt < metadata.startedAt) {
|
|
301
|
-
this.logger.
|
|
406
|
+
this.logger.info(
|
|
302
407
|
`Proving job id=${id} type=${ProvingRequestType[job.type]} startedAt=${startedAt} older agent has taken job`,
|
|
408
|
+
{ provingJobId: id },
|
|
303
409
|
);
|
|
304
410
|
} else {
|
|
305
|
-
this.logger.debug(`Proving job id=${id} type=${ProvingRequestType[job.type]} heartbeat
|
|
411
|
+
this.logger.debug(`Proving job id=${id} type=${ProvingRequestType[job.type]} heartbeat`, { provingJobId: id });
|
|
306
412
|
}
|
|
307
413
|
metadata.startedAt = startedAt;
|
|
308
414
|
metadata.lastUpdatedAt = now;
|
|
@@ -312,6 +418,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
312
418
|
`Proving job id=${id} type=${
|
|
313
419
|
ProvingRequestType[job.type]
|
|
314
420
|
} already being worked on by another agent. Sending new one`,
|
|
421
|
+
{ provingJobId: id },
|
|
315
422
|
);
|
|
316
423
|
return this.getProvingJob(filter);
|
|
317
424
|
} else {
|
|
@@ -324,60 +431,122 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer {
|
|
|
324
431
|
const item = this.jobsCache.get(id);
|
|
325
432
|
const retries = this.retries.get(id) ?? 0;
|
|
326
433
|
if (!item) {
|
|
327
|
-
this.logger.warn(`Proving job id=${id} not found
|
|
434
|
+
this.logger.warn(`Proving job id=${id} not found`, { provingJobId: id });
|
|
328
435
|
return;
|
|
329
436
|
}
|
|
330
437
|
|
|
331
438
|
if (!info) {
|
|
332
|
-
this.logger.warn(`Proving job id=${id} type=${ProvingRequestType[item.type]} not in the in-progress set
|
|
439
|
+
this.logger.warn(`Proving job id=${id} type=${ProvingRequestType[item.type]} not in the in-progress set`, {
|
|
440
|
+
provingJobId: id,
|
|
441
|
+
});
|
|
333
442
|
} else {
|
|
334
443
|
this.inProgress.delete(id);
|
|
335
444
|
}
|
|
336
445
|
|
|
337
|
-
this.
|
|
446
|
+
if (this.resultsCache.has(id)) {
|
|
447
|
+
this.logger.warn(`Proving job id=${id} already settled, ignoring result`, { provingJobId: id });
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
this.logger.info(
|
|
338
452
|
`Proving job complete id=${id} type=${ProvingRequestType[item.type]} totalAttempts=${retries + 1}`,
|
|
453
|
+
{ provingJobId: id },
|
|
339
454
|
);
|
|
340
455
|
|
|
341
|
-
|
|
342
|
-
|
|
456
|
+
// save result to our local cache and notify clients
|
|
457
|
+
// if save to database fails, that's ok because we have the result in memory
|
|
458
|
+
// if the broker crashes and needs the result again, we're covered because we can just recompute it
|
|
343
459
|
const result: ProvingJobSettledResult = { status: 'fulfilled', value };
|
|
344
460
|
this.resultsCache.set(id, result);
|
|
345
461
|
this.promises.get(id)!.resolve(result);
|
|
462
|
+
|
|
346
463
|
this.instrumentation.incResolvedJobs(item.type);
|
|
464
|
+
if (info) {
|
|
465
|
+
const duration = this.msTimeSource() - info.startedAt;
|
|
466
|
+
this.instrumentation.recordJobDuration(item.type, duration);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
try {
|
|
470
|
+
await this.database.setProvingJobResult(id, value);
|
|
471
|
+
} catch (saveErr) {
|
|
472
|
+
this.logger.error(`Failed to save proving job result id=${id}`, saveErr, {
|
|
473
|
+
provingJobId: id,
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
throw saveErr;
|
|
477
|
+
}
|
|
347
478
|
}
|
|
348
479
|
|
|
349
|
-
|
|
480
|
+
@trackSpan('ProvingBroker.cleanupPass')
|
|
481
|
+
private async cleanupPass() {
|
|
482
|
+
await this.cleanupStaleJobs();
|
|
483
|
+
await this.reEnqueueExpiredJobs();
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
private async cleanupStaleJobs() {
|
|
487
|
+
const jobIds = Array.from(this.jobsCache.keys());
|
|
488
|
+
const jobsToClean: ProvingJobId[] = [];
|
|
489
|
+
for (const id of jobIds) {
|
|
490
|
+
const job = this.jobsCache.get(id)!;
|
|
491
|
+
const isComplete = this.resultsCache.has(id);
|
|
492
|
+
if (isComplete && this.isJobStale(job)) {
|
|
493
|
+
jobsToClean.push(id);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
if (jobsToClean.length > 0) {
|
|
498
|
+
this.logger.info(`Cleaning up jobs=${jobsToClean.length}`);
|
|
499
|
+
await asyncPool(this.maxParallelCleanUps, jobsToClean, async jobId => {
|
|
500
|
+
await this.cleanUpProvingJobState(jobId);
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
private async reEnqueueExpiredJobs() {
|
|
350
506
|
const inProgressEntries = Array.from(this.inProgress.entries());
|
|
351
507
|
for (const [id, metadata] of inProgressEntries) {
|
|
352
508
|
const item = this.jobsCache.get(id);
|
|
353
509
|
if (!item) {
|
|
354
|
-
this.logger.warn(`Proving job id=${id} not found. Removing it from the queue
|
|
510
|
+
this.logger.warn(`Proving job id=${id} not found. Removing it from the queue.`, { provingJobId: id });
|
|
355
511
|
this.inProgress.delete(id);
|
|
356
512
|
continue;
|
|
357
513
|
}
|
|
358
514
|
|
|
359
|
-
const
|
|
515
|
+
const now = this.msTimeSource();
|
|
516
|
+
const msSinceLastUpdate = now - metadata.lastUpdatedAt;
|
|
360
517
|
if (msSinceLastUpdate >= this.jobTimeoutMs) {
|
|
361
|
-
this.
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
518
|
+
if (this.isJobStale(item)) {
|
|
519
|
+
// the job has timed out and it's also old, just cancel and move on
|
|
520
|
+
await this.cancelProvingJob(item.id);
|
|
521
|
+
} else {
|
|
522
|
+
this.logger.warn(`Proving job id=${id} timed out. Adding it back to the queue.`, { provingJobId: id });
|
|
523
|
+
this.inProgress.delete(id);
|
|
524
|
+
this.enqueueJobInternal(item);
|
|
525
|
+
this.instrumentation.incTimedOutJobs(item.type);
|
|
526
|
+
}
|
|
365
527
|
}
|
|
366
528
|
}
|
|
367
|
-
}
|
|
529
|
+
}
|
|
368
530
|
|
|
369
531
|
private enqueueJobInternal(job: ProvingJob): void {
|
|
370
532
|
if (!this.promises.has(job.id)) {
|
|
371
533
|
this.promises.set(job.id, promiseWithResolvers());
|
|
372
534
|
}
|
|
373
|
-
this.queues[job.type].put(
|
|
535
|
+
this.queues[job.type].put({
|
|
536
|
+
epochNumber: job.epochNumber,
|
|
537
|
+
id: job.id,
|
|
538
|
+
});
|
|
374
539
|
this.enqueuedAt.set(job.id, new Timer());
|
|
375
|
-
this.
|
|
540
|
+
this.epochHeight = Math.max(this.epochHeight, job.epochNumber);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
private isJobStale(job: ProvingJob) {
|
|
544
|
+
return job.epochNumber < this.epochHeight - this.maxEpochsToKeepResultsFor;
|
|
376
545
|
}
|
|
377
546
|
}
|
|
378
547
|
|
|
379
548
|
type ProvingQueues = {
|
|
380
|
-
[K in ProvingRequestType]: PriorityMemoryQueue<
|
|
549
|
+
[K in ProvingRequestType]: PriorityMemoryQueue<EnqueuedProvingJob>;
|
|
381
550
|
};
|
|
382
551
|
|
|
383
552
|
/**
|
|
@@ -386,12 +555,10 @@ type ProvingQueues = {
|
|
|
386
555
|
* @param b - Another proving job
|
|
387
556
|
* @returns A number indicating the relative priority of the two proving jobs
|
|
388
557
|
*/
|
|
389
|
-
function provingJobComparator(a:
|
|
390
|
-
|
|
391
|
-
const bBlockNumber = b.blockNumber ?? 0;
|
|
392
|
-
if (aBlockNumber < bBlockNumber) {
|
|
558
|
+
function provingJobComparator(a: EnqueuedProvingJob, b: EnqueuedProvingJob): -1 | 0 | 1 {
|
|
559
|
+
if (a.epochNumber < b.epochNumber) {
|
|
393
560
|
return -1;
|
|
394
|
-
} else if (
|
|
561
|
+
} else if (a.epochNumber > b.epochNumber) {
|
|
395
562
|
return 1;
|
|
396
563
|
} else {
|
|
397
564
|
return 0;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { type ProofUri, ProvingJob, type ProvingJobId, ProvingJobSettledResult } from '@aztec/circuit-types';
|
|
2
2
|
import { jsonParseWithSchema, jsonStringify } from '@aztec/foundation/json-rpc';
|
|
3
3
|
import { type AztecKVStore, type AztecMap } from '@aztec/kv-store';
|
|
4
|
-
import {
|
|
4
|
+
import { Attributes, LmdbMetrics, type TelemetryClient } from '@aztec/telemetry-client';
|
|
5
5
|
|
|
6
6
|
import { type ProvingBrokerDatabase } from '../proving_broker_database.js';
|
|
7
7
|
|
|
@@ -14,14 +14,8 @@ export class KVBrokerDatabase implements ProvingBrokerDatabase {
|
|
|
14
14
|
this.metrics = new LmdbMetrics(
|
|
15
15
|
client.getMeter('KVBrokerDatabase'),
|
|
16
16
|
{
|
|
17
|
-
|
|
18
|
-
description: 'Database map size for the proving broker',
|
|
17
|
+
[Attributes.DB_DATA_TYPE]: 'prover-broker',
|
|
19
18
|
},
|
|
20
|
-
{
|
|
21
|
-
name: Metrics.PROVING_QUEUE_DB_USED_SIZE,
|
|
22
|
-
description: 'Database used size for the proving broker',
|
|
23
|
-
},
|
|
24
|
-
{ name: Metrics.PROVING_QUEUE_DB_NUM_ITEMS, description: 'Number of items in the broker database' },
|
|
25
19
|
() => store.estimateSize(),
|
|
26
20
|
);
|
|
27
21
|
this.jobs = store.openMap('proving_jobs');
|
|
@@ -9,7 +9,6 @@ import {
|
|
|
9
9
|
type TelemetryClient,
|
|
10
10
|
type UpDownCounter,
|
|
11
11
|
ValueType,
|
|
12
|
-
millisecondBuckets,
|
|
13
12
|
} from '@aztec/telemetry-client';
|
|
14
13
|
|
|
15
14
|
export type MonitorCallback = (proofType: ProvingRequestType) => number;
|
|
@@ -55,18 +54,12 @@ export class ProvingBrokerInstrumentation {
|
|
|
55
54
|
description: 'Records how long a job sits in the queue',
|
|
56
55
|
unit: 'ms',
|
|
57
56
|
valueType: ValueType.INT,
|
|
58
|
-
advice: {
|
|
59
|
-
explicitBucketBoundaries: millisecondBuckets(1), // 10ms -> ~327s
|
|
60
|
-
},
|
|
61
57
|
});
|
|
62
58
|
|
|
63
59
|
this.jobDuration = meter.createHistogram(Metrics.PROVING_QUEUE_JOB_DURATION, {
|
|
64
60
|
description: 'Records how long a job takes to complete',
|
|
65
61
|
unit: 'ms',
|
|
66
62
|
valueType: ValueType.INT,
|
|
67
|
-
advice: {
|
|
68
|
-
explicitBucketBoundaries: millisecondBuckets(1), // 10ms -> ~327s
|
|
69
|
-
},
|
|
70
63
|
});
|
|
71
64
|
}
|
|
72
65
|
|
|
@@ -30,6 +30,7 @@ export class ProvingJobController {
|
|
|
30
30
|
constructor(
|
|
31
31
|
private jobId: ProvingJobId,
|
|
32
32
|
private inputs: ProvingJobInputs,
|
|
33
|
+
private epochNumber: number,
|
|
33
34
|
private startedAt: number,
|
|
34
35
|
private circuitProver: ServerCircuitProver,
|
|
35
36
|
private onComplete: ProvingJobCompletionCallback,
|
|
@@ -100,51 +101,51 @@ export class ProvingJobController {
|
|
|
100
101
|
const signal = this.abortController.signal;
|
|
101
102
|
switch (type) {
|
|
102
103
|
case ProvingRequestType.PUBLIC_VM: {
|
|
103
|
-
return await this.circuitProver.getAvmProof(inputs, signal);
|
|
104
|
+
return await this.circuitProver.getAvmProof(inputs, signal, this.epochNumber);
|
|
104
105
|
}
|
|
105
106
|
|
|
106
107
|
case ProvingRequestType.PRIVATE_BASE_ROLLUP: {
|
|
107
|
-
return await this.circuitProver.getPrivateBaseRollupProof(inputs, signal);
|
|
108
|
+
return await this.circuitProver.getPrivateBaseRollupProof(inputs, signal, this.epochNumber);
|
|
108
109
|
}
|
|
109
110
|
|
|
110
111
|
case ProvingRequestType.PUBLIC_BASE_ROLLUP: {
|
|
111
|
-
return await this.circuitProver.getPublicBaseRollupProof(inputs, signal);
|
|
112
|
+
return await this.circuitProver.getPublicBaseRollupProof(inputs, signal, this.epochNumber);
|
|
112
113
|
}
|
|
113
114
|
|
|
114
115
|
case ProvingRequestType.MERGE_ROLLUP: {
|
|
115
|
-
return await this.circuitProver.getMergeRollupProof(inputs, signal);
|
|
116
|
+
return await this.circuitProver.getMergeRollupProof(inputs, signal, this.epochNumber);
|
|
116
117
|
}
|
|
117
118
|
|
|
118
119
|
case ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP: {
|
|
119
|
-
return await this.circuitProver.getEmptyBlockRootRollupProof(inputs, signal);
|
|
120
|
+
return await this.circuitProver.getEmptyBlockRootRollupProof(inputs, signal, this.epochNumber);
|
|
120
121
|
}
|
|
121
122
|
|
|
122
123
|
case ProvingRequestType.BLOCK_ROOT_ROLLUP: {
|
|
123
|
-
return await this.circuitProver.getBlockRootRollupProof(inputs, signal);
|
|
124
|
+
return await this.circuitProver.getBlockRootRollupProof(inputs, signal, this.epochNumber);
|
|
124
125
|
}
|
|
125
126
|
|
|
126
127
|
case ProvingRequestType.BLOCK_MERGE_ROLLUP: {
|
|
127
|
-
return await this.circuitProver.getBlockMergeRollupProof(inputs, signal);
|
|
128
|
+
return await this.circuitProver.getBlockMergeRollupProof(inputs, signal, this.epochNumber);
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
case ProvingRequestType.ROOT_ROLLUP: {
|
|
131
|
-
return await this.circuitProver.getRootRollupProof(inputs, signal);
|
|
132
|
+
return await this.circuitProver.getRootRollupProof(inputs, signal, this.epochNumber);
|
|
132
133
|
}
|
|
133
134
|
|
|
134
135
|
case ProvingRequestType.BASE_PARITY: {
|
|
135
|
-
return await this.circuitProver.getBaseParityProof(inputs, signal);
|
|
136
|
+
return await this.circuitProver.getBaseParityProof(inputs, signal, this.epochNumber);
|
|
136
137
|
}
|
|
137
138
|
|
|
138
139
|
case ProvingRequestType.ROOT_PARITY: {
|
|
139
|
-
return await this.circuitProver.getRootParityProof(inputs, signal);
|
|
140
|
+
return await this.circuitProver.getRootParityProof(inputs, signal, this.epochNumber);
|
|
140
141
|
}
|
|
141
142
|
|
|
142
143
|
case ProvingRequestType.PRIVATE_KERNEL_EMPTY: {
|
|
143
|
-
return await this.circuitProver.getEmptyPrivateKernelProof(inputs, signal);
|
|
144
|
+
return await this.circuitProver.getEmptyPrivateKernelProof(inputs, signal, this.epochNumber);
|
|
144
145
|
}
|
|
145
146
|
|
|
146
147
|
case ProvingRequestType.TUBE_PROOF: {
|
|
147
|
-
return await this.circuitProver.getTubeProof(inputs, signal);
|
|
148
|
+
return await this.circuitProver.getTubeProof(inputs, signal, this.epochNumber);
|
|
148
149
|
}
|
|
149
150
|
|
|
150
151
|
default: {
|