@fluidframework/agent-scheduler 2.0.0-internal.3.0.2 → 2.0.0-internal.3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scheduler.ts CHANGED
@@ -4,450 +4,500 @@
4
4
  */
5
5
 
6
6
  import { assert, TypedEventEmitter } from "@fluidframework/common-utils";
7
+ import { FluidObject, IFluidHandle, IRequest } from "@fluidframework/core-interfaces";
8
+ import { UsageError } from "@fluidframework/container-utils";
7
9
  import {
8
- FluidObject,
9
- IFluidHandle,
10
- IRequest,
11
- } from "@fluidframework/core-interfaces";
12
- import {
13
- FluidDataStoreRuntime,
14
- FluidObjectHandle,
15
- ISharedObjectRegistry,
10
+ FluidDataStoreRuntime,
11
+ FluidObjectHandle,
12
+ ISharedObjectRegistry,
16
13
  } from "@fluidframework/datastore";
17
14
  import { AttachState } from "@fluidframework/container-definitions";
18
15
  import { ISharedMap, IValueChanged, SharedMap } from "@fluidframework/map";
19
16
  import { ConsensusRegisterCollection } from "@fluidframework/register-collection";
20
17
  import { IFluidDataStoreRuntime, IChannelFactory } from "@fluidframework/datastore-definitions";
21
18
  import {
22
- IFluidDataStoreContext,
23
- IFluidDataStoreFactory,
24
- NamedFluidDataStoreRegistryEntry,
19
+ IFluidDataStoreContext,
20
+ IFluidDataStoreFactory,
21
+ NamedFluidDataStoreRegistryEntry,
25
22
  } from "@fluidframework/runtime-definitions";
26
23
  import { v4 as uuid } from "uuid";
24
+ import { TelemetryDataTag } from "@fluidframework/telemetry-utils";
27
25
  import { IAgentScheduler, IAgentSchedulerEvents } from "./agent";
28
26
 
29
27
  // Note: making sure this ID is unique and does not collide with storage provided clientID
30
28
  const UnattachedClientId = `${uuid()}_unattached`;
31
29
 
32
30
  const mapWait = async <T = any>(map: ISharedMap, key: string): Promise<T> => {
33
- const maybeValue = map.get<T>(key);
34
- if (maybeValue !== undefined) {
35
- return maybeValue;
36
- }
37
-
38
- return new Promise((resolve) => {
39
- const handler = (changed: IValueChanged) => {
40
- if (changed.key === key) {
41
- map.off("valueChanged", handler);
42
- const value = map.get<T>(changed.key);
43
- if (value === undefined) {
44
- throw new Error("Unexpected valueChanged result");
45
- }
46
- resolve(value);
47
- }
48
- };
49
- map.on("valueChanged", handler);
50
- });
31
+ const maybeValue = map.get<T>(key);
32
+ if (maybeValue !== undefined) {
33
+ return maybeValue;
34
+ }
35
+
36
+ return new Promise((resolve) => {
37
+ const handler = (changed: IValueChanged) => {
38
+ if (changed.key === key) {
39
+ map.off("valueChanged", handler);
40
+ const value = map.get<T>(changed.key);
41
+ if (value === undefined) {
42
+ throw new Error("Unexpected valueChanged result");
43
+ }
44
+ resolve(value);
45
+ }
46
+ };
47
+ map.on("valueChanged", handler);
48
+ });
51
49
  };
52
50
 
53
51
  const schedulerId = "scheduler";
54
52
 
55
- export class AgentScheduler extends TypedEventEmitter<IAgentSchedulerEvents> implements IAgentScheduler {
56
- public static async load(runtime: IFluidDataStoreRuntime, context: IFluidDataStoreContext, existing: boolean) {
57
- let root: ISharedMap;
58
- let consensusRegisterCollection: ConsensusRegisterCollection<string | null>;
59
- if (!existing) {
60
- root = SharedMap.create(runtime, "root");
61
- root.bindToContext();
62
- consensusRegisterCollection = ConsensusRegisterCollection.create(runtime);
63
- consensusRegisterCollection.bindToContext();
64
- root.set(schedulerId, consensusRegisterCollection.handle);
65
- } else {
66
- root = await runtime.getChannel("root") as ISharedMap;
67
- const handle = await mapWait<IFluidHandle<ConsensusRegisterCollection<string | null>>>(root, schedulerId);
68
- assert(handle !== undefined, 0x116 /* "Missing handle on scheduler load" */);
69
- consensusRegisterCollection = await handle.get();
70
- }
71
- const agentScheduler = new AgentScheduler(runtime, context, consensusRegisterCollection);
72
- agentScheduler.initialize();
73
-
74
- return agentScheduler;
75
- }
76
-
77
- public get IAgentScheduler() { return this; }
78
- public get IFluidLoadable() { return this; }
79
-
80
- private get clientId(): string {
81
- if (this.runtime.attachState === AttachState.Detached) {
82
- return UnattachedClientId;
83
- }
84
- const clientId = this.runtime.clientId;
85
- assert(!!clientId, 0x117 /* "Trying to get missing clientId!" */);
86
- return clientId;
87
- }
88
-
89
- // Set of tasks registered by this client.
90
- // Has no relationship with lists below.
91
- // The only requirement here - a task can be registered by a client only once.
92
- // Other clients can pick these tasks.
93
- private readonly registeredTasks = new Set<string>();
94
-
95
- // List of all tasks client is capable of running (essentially expressed desire to run)
96
- // Client will proactively attempt to pick them up these tasks if they are not assigned to other clients.
97
- // This is a strict superset of tasks running in the client.
98
- private readonly locallyRunnableTasks = new Map<string, () => Promise<void>>();
99
-
100
- // Set of registered tasks client is currently running.
101
- // It's subset of this.locallyRunnableTasks
102
- private runningTasks = new Set<string>();
103
-
104
- private readonly _handle: IFluidHandle<this>;
105
-
106
- constructor(
107
- private readonly runtime: IFluidDataStoreRuntime,
108
- private readonly context: IFluidDataStoreContext,
109
- private readonly consensusRegisterCollection: ConsensusRegisterCollection<string | null>,
110
- ) {
111
- super();
112
- this._handle = new FluidObjectHandle(this, "", this.runtime.objectsRoutingContext);
113
- }
114
-
115
- public get handle() {
116
- return this._handle;
117
- }
118
-
119
- public async register(...taskUrls: string[]): Promise<void> {
120
- for (const taskUrl of taskUrls) {
121
- if (this.registeredTasks.has(taskUrl)) {
122
- throw new Error(`${taskUrl} is already registered`);
123
- }
124
- }
125
- const unregisteredTasks: string[] = [];
126
- for (const taskUrl of taskUrls) {
127
- this.registeredTasks.add(taskUrl);
128
- // Only register for a new task.
129
- const currentClient = this.getTaskClientId(taskUrl);
130
- if (currentClient === undefined) {
131
- unregisteredTasks.push(taskUrl);
132
- }
133
- }
134
- return this.registerCore(unregisteredTasks);
135
- }
136
-
137
- public async pick(taskId: string, worker: () => Promise<void>): Promise<void> {
138
- if (this.locallyRunnableTasks.has(taskId)) {
139
- throw new Error(`${taskId} is already attempted`);
140
- }
141
- this.locallyRunnableTasks.set(taskId, worker);
142
-
143
- // We have a policy to disallow non-interactive clients from taking tasks. Callers of pick() can
144
- // either perform this check proactively and call conditionally, or catch the error (in which case
145
- // they can know they will not get the task).
146
- assert(this.context.deltaManager.clientDetails.capabilities.interactive,
147
- 0x118 /* "Bad client interactive check" */);
148
-
149
- // Check the current status and express interest if it's a new one (undefined) or currently unpicked (null).
150
- if (this.isActive()) {
151
- const currentClient = this.getTaskClientId(taskId);
152
- if (currentClient === undefined || currentClient === null) {
153
- await this.writeCore(taskId, this.clientId);
154
- }
155
- }
156
- }
157
-
158
- public async release(...taskUrls: string[]): Promise<void> {
159
- const active = this.isActive();
160
- for (const taskUrl of taskUrls) {
161
- if (!this.locallyRunnableTasks.has(taskUrl)) {
162
- throw new Error(`${taskUrl} was never registered`);
163
- }
164
- // Note - the assumption is - we are connected.
165
- // If not - all tasks should have been dropped already on disconnect / attachment
166
- assert(active, 0x119 /* "This agent became inactive while releasing" */);
167
- if (this.getTaskClientId(taskUrl) !== this.clientId) {
168
- throw new Error(`${taskUrl} was never picked`);
169
- }
170
- }
171
- return this.releaseCore([...taskUrls]);
172
- }
173
-
174
- public pickedTasks(): string[] {
175
- return Array.from(this.runningTasks.values());
176
- }
177
-
178
- private async registerCore(taskUrls: string[]): Promise<void> {
179
- if (taskUrls.length > 0) {
180
- const registersP: Promise<void>[] = [];
181
- for (const taskUrl of taskUrls) {
182
- registersP.push(this.writeCore(taskUrl, null));
183
- }
184
- await Promise.all(registersP);
185
-
186
- // The registers should have up to date results now. Check the status.
187
- for (const taskUrl of taskUrls) {
188
- const taskStatus = this.getTaskClientId(taskUrl);
189
-
190
- // Task should be either registered (null) or picked up.
191
- assert(taskStatus !== undefined, 0x11a /* `Unsuccessful registration` */);
192
- }
193
- }
194
- }
195
-
196
- private async releaseCore(taskUrls: string[]) {
197
- if (taskUrls.length > 0) {
198
- const releasesP: Promise<void>[] = [];
199
- for (const taskUrl of taskUrls) {
200
- // Remove from local map so that it can be picked later.
201
- this.locallyRunnableTasks.delete(taskUrl);
202
- releasesP.push(this.writeCore(taskUrl, null));
203
- }
204
- await Promise.all(releasesP);
205
- }
206
- }
207
-
208
- private async clearTasks(taskUrls: string[]) {
209
- assert(this.isActive(), 0x11b /* "Trying to clear tasks on inactive agent" */);
210
- const clearP: Promise<void>[] = [];
211
- for (const taskUrl of taskUrls) {
212
- clearP.push(this.writeCore(taskUrl, null));
213
- }
214
- await Promise.all(clearP);
215
- }
216
-
217
- private getTaskClientId(url: string): string | null | undefined {
218
- return this.consensusRegisterCollection.read(url);
219
- }
220
-
221
- private async writeCore(key: string, clientId: string | null): Promise<void> {
222
- await this.consensusRegisterCollection.write(key, clientId);
223
- }
224
-
225
- private initialize() {
226
- const quorum = this.runtime.getQuorum();
227
- // A client left the quorum. Iterate and clear tasks held by that client.
228
- // Ideally a leader should do this cleanup. But it's complicated when a leader itself leaves.
229
- // Probably okay for now to have every client try to do this.
230
- // eslint-disable-next-line @typescript-eslint/no-misused-promises
231
- quorum.on("removeMember", async (clientId: string) => {
232
- assert(this.runtime.objectsRoutingContext.isAttached, 0x11c /* "Detached object routing context" */);
233
- // Cleanup only if connected. If not, cleanup will happen in initializeCore() that runs on connection.
234
- if (this.isActive()) {
235
- const tasks: Promise<any>[] = [];
236
- const leftTasks: string[] = [];
237
- for (const taskUrl of this.consensusRegisterCollection.keys()) {
238
- if (this.getTaskClientId(taskUrl) === clientId) {
239
- if (this.locallyRunnableTasks.has(taskUrl)) {
240
- tasks.push(this.writeCore(taskUrl, this.clientId));
241
- } else {
242
- leftTasks.push(taskUrl);
243
- }
244
- }
245
- }
246
- tasks.push(this.clearTasks(leftTasks));
247
- await Promise.all(tasks).catch((error) => {
248
- this.sendErrorEvent("AgentScheduler_RemoveMemberError", error);
249
- });
250
- }
251
- });
252
-
253
- // Listeners for new/released tasks. All clients will try to grab at the same time.
254
- // May be we want a randomized timer (Something like raft) to reduce chattiness?
255
- // eslint-disable-next-line @typescript-eslint/no-misused-promises
256
- this.consensusRegisterCollection.on("atomicChanged", async (key: string, currentClient: string | null) => {
257
- // Check if this client was chosen.
258
- if (this.isActive() && currentClient === this.clientId) {
259
- this.onNewTaskAssigned(key);
260
- } else {
261
- // The call below mutates the consensusRegisterCollection in
262
- // its event handler, which is not safe.
263
- // We need to force this to be part of a different batch of ops by
264
- // scheduling a microtask in order to work around the current validations.
265
- // This is not recommended and should be avoided.
266
- await Promise.resolve().then(async () => {
267
- await this.onTaskReassigned(key, currentClient);
268
- });
269
- }
270
- });
271
-
272
- if (this.isActive()) {
273
- this.initializeCore();
274
- }
275
-
276
- this.runtime.on("connected", () => {
277
- if (this.isActive()) {
278
- this.initializeCore();
279
- }
280
- });
281
-
282
- if (this.runtime.attachState === AttachState.Detached) {
283
- this.runtime.waitAttached().then(() => {
284
- this.clearRunningTasks();
285
- }).catch((error) => {
286
- this.sendErrorEvent("AgentScheduler_clearRunningTasks", error);
287
- });
288
- }
289
-
290
- this.runtime.on("disconnected", () => {
291
- if (this.runtime.attachState !== AttachState.Detached) {
292
- this.clearRunningTasks();
293
- }
294
- });
295
- }
296
-
297
- private onNewTaskAssigned(key: string) {
298
- assert(!this.runningTasks.has(key), 0x11d /* "task is already running" */);
299
- this.runningTasks.add(key);
300
- const worker = this.locallyRunnableTasks.get(key);
301
- if (worker === undefined) {
302
- this.sendErrorEvent("AgentScheduler_UnwantedChange", undefined, key);
303
- } else {
304
- this.emit("picked", key);
305
- worker().catch((error) => {
306
- this.sendErrorEvent("AgentScheduler_FailedWork", error, key);
307
- });
308
- }
309
- }
310
-
311
- private async onTaskReassigned(key: string, currentClient: string | null) {
312
- if (this.runningTasks.has(key)) {
313
- this.runningTasks.delete(key);
314
- this.emit("released", key);
315
- }
316
- assert(currentClient !== undefined, 0x11e /* "client is undefined" */);
317
- if (this.isActive()) {
318
- // attempt to pick up task if we are connected.
319
- // If not, initializeCore() will do it when connected
320
- if (currentClient === null) {
321
- if (this.locallyRunnableTasks.has(key)) {
322
- await this.writeCore(key, this.clientId);
323
- }
324
- }
325
- // Check if the op came from dropped client
326
- // This could happen when "old" ops are submitted on reconnection.
327
- // They carry "old" ref seq number, but if write is not contested, it will get accepted
328
- else if (this.runtime.getQuorum().getMember(currentClient) === undefined) {
329
- await this.writeCore(key, null);
330
- }
331
- }
332
- }
333
-
334
- private isActive() {
335
- // Scheduler should be active in detached container.
336
- if (this.runtime.attachState === AttachState.Detached) {
337
- return true;
338
- }
339
- if (!this.runtime.connected) {
340
- return false;
341
- }
342
-
343
- // Note: we are not checking for this.context.deltaManager.clientDetails.capabilities.interactive
344
- // here. Instead we assert in pick() if a non-interactive client tries to pick.
345
-
346
- return this.context.deltaManager.active;
347
- }
348
-
349
- private initializeCore() {
350
- // Nobody released the tasks held by last client in previous session.
351
- // Check to see if this client needs to do this.
352
- const clearCandidates: string[] = [];
353
- const tasks: Promise<any>[] = [];
354
-
355
- for (const [taskUrl] of this.locallyRunnableTasks) {
356
- if (!this.getTaskClientId(taskUrl)) {
357
- tasks.push(this.writeCore(taskUrl, this.clientId));
358
- }
359
- }
360
-
361
- for (const taskUrl of this.consensusRegisterCollection.keys()) {
362
- const currentClient = this.getTaskClientId(taskUrl);
363
- if (currentClient && this.runtime.getQuorum().getMember(currentClient) === undefined) {
364
- clearCandidates.push(taskUrl);
365
- }
366
- }
367
-
368
- tasks.push(this.clearTasks(clearCandidates));
369
-
370
- Promise.all(tasks).catch((error) => {
371
- this.sendErrorEvent("AgentScheduler_InitError", error);
372
- });
373
- }
374
-
375
- private clearRunningTasks() {
376
- const tasks = this.runningTasks;
377
- this.runningTasks = new Set<string>();
378
-
379
- if (this.isActive()) {
380
- // Clear all tasks with UnattachedClientId (if was unattached) and reapply for tasks with new clientId
381
- // If we are simply disconnected, then proper cleanup will be done on connection.
382
- this.initializeCore();
383
- }
384
-
385
- for (const task of tasks) {
386
- this.emit("lost", task);
387
- }
388
- }
389
-
390
- private sendErrorEvent(eventName: string, error: any, key?: string) {
391
- this.runtime.logger.sendErrorEvent({ eventName, key }, error);
392
- }
53
+ export class AgentScheduler
54
+ extends TypedEventEmitter<IAgentSchedulerEvents>
55
+ implements IAgentScheduler
56
+ {
57
+ public static async load(
58
+ runtime: IFluidDataStoreRuntime,
59
+ context: IFluidDataStoreContext,
60
+ existing: boolean,
61
+ ) {
62
+ let root: ISharedMap;
63
+ let consensusRegisterCollection: ConsensusRegisterCollection<string | null>;
64
+ if (!existing) {
65
+ root = SharedMap.create(runtime, "root");
66
+ root.bindToContext();
67
+ consensusRegisterCollection = ConsensusRegisterCollection.create(runtime);
68
+ consensusRegisterCollection.bindToContext();
69
+ root.set(schedulerId, consensusRegisterCollection.handle);
70
+ } else {
71
+ root = (await runtime.getChannel("root")) as ISharedMap;
72
+ const handle = await mapWait<IFluidHandle<ConsensusRegisterCollection<string | null>>>(
73
+ root,
74
+ schedulerId,
75
+ );
76
+ assert(handle !== undefined, 0x116 /* "Missing handle on scheduler load" */);
77
+ consensusRegisterCollection = await handle.get();
78
+ }
79
+ const agentScheduler = new AgentScheduler(runtime, context, consensusRegisterCollection);
80
+ agentScheduler.initialize();
81
+
82
+ return agentScheduler;
83
+ }
84
+
85
+ public get IAgentScheduler() {
86
+ return this;
87
+ }
88
+ public get IFluidLoadable() {
89
+ return this;
90
+ }
91
+
92
+ private get clientId(): string {
93
+ if (this.runtime.attachState === AttachState.Detached) {
94
+ return UnattachedClientId;
95
+ }
96
+ const clientId = this.runtime.clientId;
97
+ assert(!!clientId, 0x117 /* "Trying to get missing clientId!" */);
98
+ return clientId;
99
+ }
100
+
101
+ // Set of tasks registered by this client.
102
+ // Has no relationship with lists below.
103
+ // The only requirement here - a task can be registered by a client only once.
104
+ // Other clients can pick these tasks.
105
+ private readonly registeredTasks = new Set<string>();
106
+
107
+ // List of all tasks client is capable of running (essentially expressed desire to run)
108
+ // Client will proactively attempt to pick them up these tasks if they are not assigned to other clients.
109
+ // This is a strict superset of tasks running in the client.
110
+ private readonly locallyRunnableTasks = new Map<string, () => Promise<void>>();
111
+
112
+ // Set of registered tasks client is currently running.
113
+ // It's subset of this.locallyRunnableTasks
114
+ private runningTasks = new Set<string>();
115
+
116
+ private readonly _handle: IFluidHandle<this>;
117
+
118
+ constructor(
119
+ private readonly runtime: IFluidDataStoreRuntime,
120
+ private readonly context: IFluidDataStoreContext,
121
+ private readonly consensusRegisterCollection: ConsensusRegisterCollection<string | null>,
122
+ ) {
123
+ super();
124
+ this._handle = new FluidObjectHandle(this, "", this.runtime.objectsRoutingContext);
125
+ }
126
+
127
+ public get handle() {
128
+ return this._handle;
129
+ }
130
+
131
+ public async register(...taskUrls: string[]): Promise<void> {
132
+ for (const taskUrl of taskUrls) {
133
+ if (this.registeredTasks.has(taskUrl)) {
134
+ throw new UsageError(`Task is already registered`, {
135
+ taskUrl: { tag: TelemetryDataTag.CodeArtifact, value: taskUrl },
136
+ });
137
+ }
138
+ }
139
+ const unregisteredTasks: string[] = [];
140
+ for (const taskUrl of taskUrls) {
141
+ this.registeredTasks.add(taskUrl);
142
+ // Only register for a new task.
143
+ const currentClient = this.getTaskClientId(taskUrl);
144
+ if (currentClient === undefined) {
145
+ unregisteredTasks.push(taskUrl);
146
+ }
147
+ }
148
+ return this.registerCore(unregisteredTasks);
149
+ }
150
+
151
+ public async pick(taskId: string, worker: () => Promise<void>): Promise<void> {
152
+ if (this.locallyRunnableTasks.has(taskId)) {
153
+ throw new UsageError(`Task is already attempted`, {
154
+ taskUrl: { tag: TelemetryDataTag.CodeArtifact, value: taskId },
155
+ });
156
+ }
157
+ this.locallyRunnableTasks.set(taskId, worker);
158
+
159
+ // We have a policy to disallow non-interactive clients from taking tasks. Callers of pick() can
160
+ // either perform this check proactively and call conditionally, or catch the error (in which case
161
+ // they can know they will not get the task).
162
+ assert(
163
+ this.context.deltaManager.clientDetails.capabilities.interactive,
164
+ 0x118 /* "Bad client interactive check" */,
165
+ );
166
+
167
+ // Check the current status and express interest if it's a new one (undefined) or currently unpicked (null).
168
+ if (this.isActive()) {
169
+ const currentClient = this.getTaskClientId(taskId);
170
+ if (currentClient === undefined || currentClient === null) {
171
+ await this.writeCore(taskId, this.clientId);
172
+ }
173
+ }
174
+ }
175
+
176
+ public async release(...taskUrls: string[]): Promise<void> {
177
+ const active = this.isActive();
178
+ for (const taskUrl of taskUrls) {
179
+ if (!this.locallyRunnableTasks.has(taskUrl)) {
180
+ throw new UsageError(`Task was never registered`, {
181
+ taskUrl: { tag: TelemetryDataTag.CodeArtifact, value: taskUrl },
182
+ });
183
+ }
184
+ if (!this.runningTasks.has(taskUrl)) {
185
+ // If we got disconnected (and are attached), tasks that we WERE picked for at the time of disconnect
186
+ // will still show us as holding the task according to getTaskClientId (the CRC is stale), but we
187
+ // should not try to release because our disconnect will already result in either someone else or
188
+ // ourselves clearing the task upon reconnect.
189
+ // This UsageError is to enforce that the caller should check AgentScheduler.pickedTasks before trying
190
+ // to release a task.
191
+ throw new UsageError(`Task is not currently picked`, {
192
+ taskUrl: { tag: TelemetryDataTag.CodeArtifact, value: taskUrl },
193
+ });
194
+ }
195
+ // We may only release tasks that we KNOW we hold (detached state or connected and own the CRC). If we're
196
+ // attached+disconnected then we'll lose the task automatically, and so may not release manually (someone
197
+ // else might hold it by the time we reconnect)
198
+ assert(active, 0x119 /* "This agent became inactive while releasing" */);
199
+ if (this.getTaskClientId(taskUrl) !== this.clientId) {
200
+ throw new UsageError(`Task was never picked`, {
201
+ taskUrl: { tag: TelemetryDataTag.CodeArtifact, value: taskUrl },
202
+ });
203
+ }
204
+ }
205
+ return this.releaseCore([...taskUrls]);
206
+ }
207
+
208
+ public pickedTasks(): string[] {
209
+ return Array.from(this.runningTasks.values());
210
+ }
211
+
212
+ private async registerCore(taskUrls: string[]): Promise<void> {
213
+ if (taskUrls.length > 0) {
214
+ const registersP: Promise<void>[] = [];
215
+ for (const taskUrl of taskUrls) {
216
+ registersP.push(this.writeCore(taskUrl, null));
217
+ }
218
+ await Promise.all(registersP);
219
+
220
+ // The registers should have up to date results now. Check the status.
221
+ for (const taskUrl of taskUrls) {
222
+ const taskStatus = this.getTaskClientId(taskUrl);
223
+
224
+ // Task should be either registered (null) or picked up.
225
+ assert(taskStatus !== undefined, 0x11a /* `Unsuccessful registration` */);
226
+ }
227
+ }
228
+ }
229
+
230
+ private async releaseCore(taskUrls: string[]) {
231
+ if (taskUrls.length > 0) {
232
+ const releasesP: Promise<void>[] = [];
233
+ for (const taskUrl of taskUrls) {
234
+ // Remove from local map so that it can be picked later.
235
+ this.locallyRunnableTasks.delete(taskUrl);
236
+ releasesP.push(this.writeCore(taskUrl, null));
237
+ }
238
+ await Promise.all(releasesP);
239
+ }
240
+ }
241
+
242
+ private async clearTasks(taskUrls: string[]) {
243
+ assert(this.isActive(), 0x11b /* "Trying to clear tasks on inactive agent" */);
244
+ const clearP: Promise<void>[] = [];
245
+ for (const taskUrl of taskUrls) {
246
+ clearP.push(this.writeCore(taskUrl, null));
247
+ }
248
+ await Promise.all(clearP);
249
+ }
250
+
251
+ private getTaskClientId(url: string): string | null | undefined {
252
+ return this.consensusRegisterCollection.read(url);
253
+ }
254
+
255
+ private async writeCore(key: string, clientId: string | null): Promise<void> {
256
+ await this.consensusRegisterCollection.write(key, clientId);
257
+ }
258
+
259
+ private initialize() {
260
+ const quorum = this.runtime.getQuorum();
261
+ // A client left the quorum. Iterate and clear tasks held by that client.
262
+ // Ideally a leader should do this cleanup. But it's complicated when a leader itself leaves.
263
+ // Probably okay for now to have every client try to do this.
264
+ // eslint-disable-next-line @typescript-eslint/no-misused-promises
265
+ quorum.on("removeMember", async (clientId: string) => {
266
+ assert(
267
+ this.runtime.objectsRoutingContext.isAttached,
268
+ 0x11c /* "Detached object routing context" */,
269
+ );
270
+ // Cleanup only if connected. If not, cleanup will happen in initializeCore() that runs on connection.
271
+ if (this.isActive()) {
272
+ const tasks: Promise<any>[] = [];
273
+ const leftTasks: string[] = [];
274
+ for (const taskUrl of this.consensusRegisterCollection.keys()) {
275
+ if (this.getTaskClientId(taskUrl) === clientId) {
276
+ if (this.locallyRunnableTasks.has(taskUrl)) {
277
+ tasks.push(this.writeCore(taskUrl, this.clientId));
278
+ } else {
279
+ leftTasks.push(taskUrl);
280
+ }
281
+ }
282
+ }
283
+ tasks.push(this.clearTasks(leftTasks));
284
+ await Promise.all(tasks).catch((error) => {
285
+ this.sendErrorEvent("AgentScheduler_RemoveMemberError", error);
286
+ });
287
+ }
288
+ });
289
+
290
+ // Listeners for new/released tasks. All clients will try to grab at the same time.
291
+ // May be we want a randomized timer (Something like raft) to reduce chattiness?
292
+ this.consensusRegisterCollection.on(
293
+ "atomicChanged",
294
+ // eslint-disable-next-line @typescript-eslint/no-misused-promises
295
+ async (key: string, currentClient: string | null) => {
296
+ // Check if this client was chosen.
297
+ if (this.isActive() && currentClient === this.clientId) {
298
+ this.onNewTaskAssigned(key);
299
+ } else {
300
+ // The call below mutates the consensusRegisterCollection in
301
+ // its event handler, which is not safe.
302
+ // We need to force this to be part of a different batch of ops by
303
+ // scheduling a microtask in order to work around the current validations.
304
+ // This is not recommended and should be avoided.
305
+ await Promise.resolve().then(async () => {
306
+ await this.onTaskReassigned(key, currentClient);
307
+ });
308
+ }
309
+ },
310
+ );
311
+
312
+ if (this.isActive()) {
313
+ this.initializeCore();
314
+ }
315
+
316
+ this.runtime.on("connected", () => {
317
+ if (this.isActive()) {
318
+ this.initializeCore();
319
+ }
320
+ });
321
+
322
+ if (this.runtime.attachState === AttachState.Detached) {
323
+ this.runtime
324
+ .waitAttached()
325
+ .then(() => {
326
+ this.clearRunningTasks();
327
+ })
328
+ .catch((error) => {
329
+ this.sendErrorEvent("AgentScheduler_clearRunningTasks", error);
330
+ });
331
+ }
332
+
333
+ this.runtime.on("disconnected", () => {
334
+ if (this.runtime.attachState !== AttachState.Detached) {
335
+ this.clearRunningTasks();
336
+ }
337
+ });
338
+ }
339
+
340
+ private onNewTaskAssigned(key: string) {
341
+ assert(!this.runningTasks.has(key), 0x11d /* "task is already running" */);
342
+ this.runningTasks.add(key);
343
+ const worker = this.locallyRunnableTasks.get(key);
344
+ if (worker === undefined) {
345
+ this.sendErrorEvent("AgentScheduler_UnwantedChange", undefined, key);
346
+ } else {
347
+ this.emit("picked", key);
348
+ worker().catch((error) => {
349
+ this.sendErrorEvent("AgentScheduler_FailedWork", error, key);
350
+ });
351
+ }
352
+ }
353
+
354
+ private async onTaskReassigned(key: string, currentClient: string | null) {
355
+ if (this.runningTasks.has(key)) {
356
+ this.runningTasks.delete(key);
357
+ this.emit("released", key);
358
+ }
359
+ assert(currentClient !== undefined, 0x11e /* "client is undefined" */);
360
+ if (this.isActive()) {
361
+ // attempt to pick up task if we are connected.
362
+ // If not, initializeCore() will do it when connected
363
+ if (currentClient === null) {
364
+ if (this.locallyRunnableTasks.has(key)) {
365
+ await this.writeCore(key, this.clientId);
366
+ }
367
+ }
368
+ // Check if the op came from dropped client
369
+ // This could happen when "old" ops are submitted on reconnection.
370
+ // They carry "old" ref seq number, but if write is not contested, it will get accepted
371
+ else if (this.runtime.getQuorum().getMember(currentClient) === undefined) {
372
+ await this.writeCore(key, null);
373
+ }
374
+ }
375
+ }
376
+
377
+ private isActive() {
378
+ // Scheduler should be active in detached container.
379
+ if (this.runtime.attachState === AttachState.Detached) {
380
+ return true;
381
+ }
382
+ if (!this.runtime.connected) {
383
+ return false;
384
+ }
385
+
386
+ // Note: we are not checking for this.context.deltaManager.clientDetails.capabilities.interactive
387
+ // here. Instead we assert in pick() if a non-interactive client tries to pick.
388
+
389
+ return this.context.deltaManager.active;
390
+ }
391
+
392
+ private initializeCore() {
393
+ // Nobody released the tasks held by last client in previous session.
394
+ // Check to see if this client needs to do this.
395
+ const clearCandidates: string[] = [];
396
+ const tasks: Promise<any>[] = [];
397
+
398
+ for (const [taskUrl] of this.locallyRunnableTasks) {
399
+ if (!this.getTaskClientId(taskUrl)) {
400
+ tasks.push(this.writeCore(taskUrl, this.clientId));
401
+ }
402
+ }
403
+
404
+ for (const taskUrl of this.consensusRegisterCollection.keys()) {
405
+ const currentClient = this.getTaskClientId(taskUrl);
406
+ if (currentClient && this.runtime.getQuorum().getMember(currentClient) === undefined) {
407
+ clearCandidates.push(taskUrl);
408
+ }
409
+ }
410
+
411
+ tasks.push(this.clearTasks(clearCandidates));
412
+
413
+ Promise.all(tasks).catch((error) => {
414
+ this.sendErrorEvent("AgentScheduler_InitError", error);
415
+ });
416
+ }
417
+
418
+ private clearRunningTasks() {
419
+ const tasks = this.runningTasks;
420
+ this.runningTasks = new Set<string>();
421
+
422
+ if (this.isActive()) {
423
+ // Clear all tasks with UnattachedClientId (if was unattached) and reapply for tasks with new clientId
424
+ // If we are simply disconnected, then proper cleanup will be done on connection.
425
+ this.initializeCore();
426
+ }
427
+
428
+ for (const task of tasks) {
429
+ this.emit("lost", task);
430
+ }
431
+ }
432
+
433
+ private sendErrorEvent(eventName: string, error: any, key?: string) {
434
+ this.runtime.logger.sendErrorEvent({ eventName, key }, error);
435
+ }
393
436
  }
394
437
 
395
438
  class AgentSchedulerRuntime extends FluidDataStoreRuntime {
396
- constructor(
397
- dataStoreContext: IFluidDataStoreContext,
398
- sharedObjectRegistry: ISharedObjectRegistry,
399
- existing: boolean,
400
- ) {
401
- super(
402
- dataStoreContext,
403
- sharedObjectRegistry,
404
- existing,
405
- async () => AgentScheduler.load(this, dataStoreContext, existing));
406
- }
407
- public async request(request: IRequest) {
408
- const response = await super.request(request);
409
- if (response.status === 404) {
410
- if (request.url === "" || request.url === "/") {
411
- const agentScheduler = await this.entryPoint?.get();
412
- assert(agentScheduler !== undefined,
413
- 0x466 /* entryPoint for AgentSchedulerRuntime should have been initialized by now */);
414
-
415
- return { status: 200, mimeType: "fluid/object", value: agentScheduler };
416
- }
417
- }
418
- return response;
419
- }
439
+ constructor(
440
+ dataStoreContext: IFluidDataStoreContext,
441
+ sharedObjectRegistry: ISharedObjectRegistry,
442
+ existing: boolean,
443
+ ) {
444
+ super(dataStoreContext, sharedObjectRegistry, existing, async () =>
445
+ AgentScheduler.load(this, dataStoreContext, existing),
446
+ );
447
+ }
448
+ public async request(request: IRequest) {
449
+ const response = await super.request(request);
450
+ if (response.status === 404) {
451
+ if (request.url === "" || request.url === "/") {
452
+ const agentScheduler = await this.entryPoint?.get();
453
+ assert(
454
+ agentScheduler !== undefined,
455
+ 0x466 /* entryPoint for AgentSchedulerRuntime should have been initialized by now */,
456
+ );
457
+
458
+ return { status: 200, mimeType: "fluid/object", value: agentScheduler };
459
+ }
460
+ }
461
+ return response;
462
+ }
420
463
  }
421
464
 
422
465
  export class AgentSchedulerFactory implements IFluidDataStoreFactory {
423
- public static readonly type = "_scheduler";
424
- public readonly type = AgentSchedulerFactory.type;
425
-
426
- public get IFluidDataStoreFactory() { return this; }
427
-
428
- public static get registryEntry(): NamedFluidDataStoreRegistryEntry {
429
- return [this.type, Promise.resolve(new AgentSchedulerFactory())];
430
- }
431
-
432
- public static async createChildInstance(parentContext: IFluidDataStoreContext): Promise<AgentScheduler> {
433
- const packagePath = [...parentContext.packagePath, AgentSchedulerFactory.type];
434
- const dataStore = await parentContext.containerRuntime.createDataStore(packagePath);
435
- const entryPoint: FluidObject<IAgentScheduler> | undefined = await dataStore.entryPoint?.get();
436
-
437
- // AgentSchedulerRuntime always puts an AgentScheduler object in the data store's entryPoint, but double-check
438
- // while we plumb entryPoints correctly everywhere, so we can be sure the cast below is fine.
439
- assert(entryPoint?.IAgentScheduler !== undefined,
440
- 0x467 /* The data store's entryPoint is not an AgentScheduler! */);
441
- return entryPoint as unknown as AgentScheduler;
442
- }
443
-
444
- public async instantiateDataStore(context: IFluidDataStoreContext, existing: boolean) {
445
- const mapFactory = SharedMap.getFactory();
446
- const consensusRegisterCollectionFactory = ConsensusRegisterCollection.getFactory();
447
- const dataTypes = new Map<string, IChannelFactory>();
448
- dataTypes.set(mapFactory.type, mapFactory);
449
- dataTypes.set(consensusRegisterCollectionFactory.type, consensusRegisterCollectionFactory);
450
-
451
- return new AgentSchedulerRuntime(context, dataTypes, existing);
452
- }
466
+ public static readonly type = "_scheduler";
467
+ public readonly type = AgentSchedulerFactory.type;
468
+
469
+ public get IFluidDataStoreFactory() {
470
+ return this;
471
+ }
472
+
473
+ public static get registryEntry(): NamedFluidDataStoreRegistryEntry {
474
+ return [this.type, Promise.resolve(new AgentSchedulerFactory())];
475
+ }
476
+
477
+ public static async createChildInstance(
478
+ parentContext: IFluidDataStoreContext,
479
+ ): Promise<AgentScheduler> {
480
+ const packagePath = [...parentContext.packagePath, AgentSchedulerFactory.type];
481
+ const dataStore = await parentContext.containerRuntime.createDataStore(packagePath);
482
+ const entryPoint: FluidObject<IAgentScheduler> | undefined =
483
+ await dataStore.entryPoint?.get();
484
+
485
+ // AgentSchedulerRuntime always puts an AgentScheduler object in the data store's entryPoint, but double-check
486
+ // while we plumb entryPoints correctly everywhere, so we can be sure the cast below is fine.
487
+ assert(
488
+ entryPoint?.IAgentScheduler !== undefined,
489
+ 0x467 /* The data store's entryPoint is not an AgentScheduler! */,
490
+ );
491
+ return entryPoint as unknown as AgentScheduler;
492
+ }
493
+
494
+ public async instantiateDataStore(context: IFluidDataStoreContext, existing: boolean) {
495
+ const mapFactory = SharedMap.getFactory();
496
+ const consensusRegisterCollectionFactory = ConsensusRegisterCollection.getFactory();
497
+ const dataTypes = new Map<string, IChannelFactory>();
498
+ dataTypes.set(mapFactory.type, mapFactory);
499
+ dataTypes.set(consensusRegisterCollectionFactory.type, consensusRegisterCollectionFactory);
500
+
501
+ return new AgentSchedulerRuntime(context, dataTypes, existing);
502
+ }
453
503
  }