@fluidframework/container-runtime 1.1.0-76254 → 1.2.0-78837

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/containerRuntime.d.ts +1 -1
  2. package/dist/containerRuntime.d.ts.map +1 -1
  3. package/dist/containerRuntime.js +8 -8
  4. package/dist/containerRuntime.js.map +1 -1
  5. package/dist/dataStore.d.ts +2 -2
  6. package/dist/dataStore.d.ts.map +1 -1
  7. package/dist/dataStore.js +2 -2
  8. package/dist/dataStore.js.map +1 -1
  9. package/dist/dataStoreContext.d.ts +4 -4
  10. package/dist/dataStoreContext.d.ts.map +1 -1
  11. package/dist/dataStoreContext.js.map +1 -1
  12. package/dist/dataStores.d.ts +2 -2
  13. package/dist/dataStores.d.ts.map +1 -1
  14. package/dist/dataStores.js +6 -5
  15. package/dist/dataStores.js.map +1 -1
  16. package/dist/garbageCollection.d.ts +33 -14
  17. package/dist/garbageCollection.d.ts.map +1 -1
  18. package/dist/garbageCollection.js +243 -122
  19. package/dist/garbageCollection.js.map +1 -1
  20. package/dist/packageVersion.d.ts +1 -1
  21. package/dist/packageVersion.js +1 -1
  22. package/dist/packageVersion.js.map +1 -1
  23. package/dist/summarizerTypes.d.ts +5 -2
  24. package/dist/summarizerTypes.d.ts.map +1 -1
  25. package/dist/summarizerTypes.js.map +1 -1
  26. package/dist/summaryFormat.d.ts +6 -3
  27. package/dist/summaryFormat.d.ts.map +1 -1
  28. package/dist/summaryFormat.js +6 -3
  29. package/dist/summaryFormat.js.map +1 -1
  30. package/dist/summaryGenerator.d.ts.map +1 -1
  31. package/dist/summaryGenerator.js +0 -1
  32. package/dist/summaryGenerator.js.map +1 -1
  33. package/garbageCollection.md +7 -7
  34. package/lib/containerRuntime.d.ts +1 -1
  35. package/lib/containerRuntime.d.ts.map +1 -1
  36. package/lib/containerRuntime.js +9 -9
  37. package/lib/containerRuntime.js.map +1 -1
  38. package/lib/dataStore.d.ts +2 -2
  39. package/lib/dataStore.d.ts.map +1 -1
  40. package/lib/dataStore.js +2 -2
  41. package/lib/dataStore.js.map +1 -1
  42. package/lib/dataStoreContext.d.ts +4 -4
  43. package/lib/dataStoreContext.d.ts.map +1 -1
  44. package/lib/dataStoreContext.js.map +1 -1
  45. package/lib/dataStores.d.ts +2 -2
  46. package/lib/dataStores.d.ts.map +1 -1
  47. package/lib/dataStores.js +6 -5
  48. package/lib/dataStores.js.map +1 -1
  49. package/lib/garbageCollection.d.ts +33 -14
  50. package/lib/garbageCollection.d.ts.map +1 -1
  51. package/lib/garbageCollection.js +242 -121
  52. package/lib/garbageCollection.js.map +1 -1
  53. package/lib/packageVersion.d.ts +1 -1
  54. package/lib/packageVersion.js +1 -1
  55. package/lib/packageVersion.js.map +1 -1
  56. package/lib/summarizerTypes.d.ts +5 -2
  57. package/lib/summarizerTypes.d.ts.map +1 -1
  58. package/lib/summarizerTypes.js.map +1 -1
  59. package/lib/summaryFormat.d.ts +6 -3
  60. package/lib/summaryFormat.d.ts.map +1 -1
  61. package/lib/summaryFormat.js +6 -3
  62. package/lib/summaryFormat.js.map +1 -1
  63. package/lib/summaryGenerator.d.ts.map +1 -1
  64. package/lib/summaryGenerator.js +0 -1
  65. package/lib/summaryGenerator.js.map +1 -1
  66. package/package.json +18 -18
  67. package/src/containerRuntime.ts +60 -58
  68. package/src/dataStore.ts +4 -4
  69. package/src/dataStoreContext.ts +4 -4
  70. package/src/dataStores.ts +5 -5
  71. package/src/garbageCollection.ts +308 -167
  72. package/src/packageVersion.ts +1 -1
  73. package/src/summarizerTypes.ts +6 -3
  74. package/src/summaryFormat.ts +6 -3
  75. package/src/summaryGenerator.ts +0 -2
@@ -62,10 +62,10 @@ export const gcBlobPrefix = "__gc";
62
62
 
63
63
  // Feature gate key to turn GC on / off.
64
64
  const runGCKey = "Fluid.GarbageCollection.RunGC";
65
- // Feature gate key to turn GC test mode on / off.
66
- const gcTestModeKey = "Fluid.GarbageCollection.GCTestMode";
67
65
  // Feature gate key to turn GC sweep on / off.
68
66
  const runSweepKey = "Fluid.GarbageCollection.RunSweep";
67
+ // Feature gate key to turn GC test mode on / off.
68
+ const gcTestModeKey = "Fluid.GarbageCollection.GCTestMode";
69
69
  // Feature gate key to write GC data at the root of the summary tree.
70
70
  const writeAtRootKey = "Fluid.GarbageCollection.WriteDataAtRoot";
71
71
  // Feature gate key to expire a session after a set period of time.
@@ -74,9 +74,14 @@ export const runSessionExpiryKey = "Fluid.GarbageCollection.RunSessionExpiry";
74
74
  export const disableSessionExpiryKey = "Fluid.GarbageCollection.DisableSessionExpiry";
75
75
  // Feature gate key to write the gc blob as a handle if the data is the same.
76
76
  export const trackGCStateKey = "Fluid.GarbageCollection.TrackGCState";
77
+ // Feature gate key to turn GC sweep log off.
78
+ const disableSweepLogKey = "Fluid.GarbageCollection.DisableSweepLog";
79
+
80
+ // One day in milliseconds.
81
+ export const oneDayMs = 1 * 24 * 60 * 60 * 1000;
77
82
 
78
- const defaultInactiveTimeoutMs = 7 * 24 * 60 * 60 * 1000; // 7 days
79
- export const defaultSessionExpiryDurationMs = 30 * 24 * 60 * 60 * 1000; // 30 days
83
+ const defaultInactiveTimeoutMs = 7 * oneDayMs; // 7 days
84
+ export const defaultSessionExpiryDurationMs = 30 * oneDayMs; // 30 days
80
85
 
81
86
  /** The statistics of the system state after a garbage collection run. */
82
87
  export interface IGCStats {
@@ -113,19 +118,6 @@ export const GCNodeType = {
113
118
  };
114
119
  export type GCNodeType = typeof GCNodeType[keyof typeof GCNodeType];
115
120
 
116
- /** The event that is logged when unreferenced node is used after a certain time. */
117
- interface IUnreferencedEvent {
118
- eventName: string;
119
- id: string;
120
- type: GCNodeType;
121
- age: number;
122
- timeout: number;
123
- completedGCRuns: number;
124
- lastSummaryTime?: number;
125
- externalRequest?: boolean;
126
- viaHandle?: boolean;
127
- }
128
-
129
121
  /** Defines the APIs for the runtime object to be passed to the garbage collector. */
130
122
  export interface IGarbageCollectionRuntime {
131
123
  /** Before GC runs, called to notify the runtime to update any pending GC state. */
@@ -155,7 +147,7 @@ export interface IGarbageCollector {
155
147
  readonly trackGCState: boolean;
156
148
  /** Run garbage collection and update the reference / used state of the system. */
157
149
  collectGarbage(
158
- options: { logger?: ITelemetryLogger; runGC?: boolean; runSweep?: boolean; fullGC?: boolean; },
150
+ options: { logger?: ITelemetryLogger; runSweep?: boolean; fullGC?: boolean; },
159
151
  ): Promise<IGCStats>;
160
152
  /** Summarizes the GC data and returns it as a summary tree. */
161
153
  summarize(
@@ -191,61 +183,125 @@ export interface IGarbageCollectorCreateParams {
191
183
  readonly metadata: IContainerRuntimeMetadata | undefined;
192
184
  readonly baseSnapshot: ISnapshotTree | undefined;
193
185
  readonly isSummarizerClient: boolean;
194
- readonly getNodePackagePath: (nodePath: string) => readonly string[] | undefined;
186
+ readonly getNodePackagePath: (nodePath: string) => Promise<readonly string[] | undefined>;
195
187
  readonly getLastSummaryTimestampMs: () => number | undefined;
196
188
  readonly readAndParseBlob: ReadAndParseBlob;
189
+ readonly snapshotCacheExpiryMs?: number;
190
+ }
191
+
192
+ /** The state of node that is unreferenced. */
193
+ const UnreferencedState = {
194
+ /** The node is active, i.e., it can become referenced again. */
195
+ Active: "Active",
196
+ /** The node is inactive, i.e., it should not become referenced. */
197
+ Inactive: "Inactive",
198
+ /** The node is ready to be deleted by the sweep phase. */
199
+ SweepReady: "SweepReady",
200
+ };
201
+ export type UnreferencedState = typeof UnreferencedState[keyof typeof UnreferencedState];
202
+
203
+ /** The event that is logged when unreferenced node is used after a certain time. */
204
+ interface IUnreferencedEventProps {
205
+ usageType: "Changed" | "Loaded" | "Revived";
206
+ state: UnreferencedState;
207
+ id: string;
208
+ type: GCNodeType;
209
+ unrefTime: number;
210
+ age: number;
211
+ completedGCRuns: number;
212
+ fromId?: string;
213
+ timeout?: number;
214
+ lastSummaryTime?: number;
215
+ externalRequest?: boolean;
216
+ viaHandle?: boolean;
197
217
  }
198
218
 
199
219
  /**
200
- * Helper class that tracks the state of an unreferenced node such as the time it was unreferenced. It also sets
201
- * the node's state to inactive if it remains unreferenced for a given amount of time (inactiveTimeoutMs).
220
+ * Helper class that tracks the state of an unreferenced node such as the time it was unreferenced and if it can
221
+ * be deleted by the sweep phase.
202
222
  */
203
223
  class UnreferencedStateTracker {
204
- private _inactive: boolean = false;
205
- public get inactive(): boolean {
206
- return this._inactive;
224
+ private _state: UnreferencedState = UnreferencedState.Active;
225
+ public get state(): UnreferencedState {
226
+ return this._state;
207
227
  }
208
228
 
209
- private timer: Timer | undefined;
229
+ private inactiveTimer: Timer | undefined;
230
+ private sweepTimer: ReturnType<typeof setTimeout> | undefined;
210
231
 
211
232
  constructor(
212
233
  public readonly unreferencedTimestampMs: number,
234
+ /** The time after which node transitions to Inactive state. */
213
235
  private readonly inactiveTimeoutMs: number,
236
+ /** The time after which node transitions to SweepReady state; undefined if session expiry is disabled. */
237
+ private readonly sweepTimeoutMs?: number,
238
+ /** The current reference timestamp; undefined if no ops have ever been processed which can happen in tests. */
214
239
  currentReferenceTimestampMs?: number,
215
240
  ) {
216
- // If there is no current reference timestamp, don't track the node's inactive state. This will happen later
217
- // when updateTracking is called with a reference timestamp.
241
+ // If there is no current reference timestamp, don't track the node's unreferenced state. This will happen
242
+ // later when updateTracking is called with a reference timestamp.
218
243
  if (currentReferenceTimestampMs !== undefined) {
219
244
  this.updateTracking(currentReferenceTimestampMs);
220
245
  }
221
246
  }
222
247
 
223
- /**
224
- * Updates the tracking state based on the provided timestamp.
225
- */
248
+ /* Updates the unreferenced state based on the provided timestamp. */
226
249
  public updateTracking(currentReferenceTimestampMs: number) {
227
250
  const unreferencedDurationMs = currentReferenceTimestampMs - this.unreferencedTimestampMs;
228
- // If the timeout has already expired, the node has become inactive.
229
- if (unreferencedDurationMs > this.inactiveTimeoutMs) {
230
- this._inactive = true;
231
- this.timer?.clear();
251
+
252
+ // If the node has been unreferenced for sweep timeout amount of time, update the state to SweepReady.
253
+ if (this.sweepTimeoutMs !== undefined && unreferencedDurationMs >= this.sweepTimeoutMs) {
254
+ this._state = UnreferencedState.SweepReady;
255
+ this.clearTimers();
256
+ return;
257
+ }
258
+
259
+ // If the node has been unreferenced for inactive timeoutMs amount of time, update the state to inactive.
260
+ // Also, start a timer for the sweep timeout.
261
+ if (unreferencedDurationMs >= this.inactiveTimeoutMs) {
262
+ this._state = UnreferencedState.Inactive;
263
+ this.clearTimers();
264
+
265
+ if (this.sweepTimeoutMs !== undefined) {
266
+ setLongTimeout(
267
+ this.sweepTimeoutMs - unreferencedDurationMs,
268
+ () => { this._state = UnreferencedState.SweepReady; },
269
+ (timer) => { this.sweepTimer = timer; },
270
+ );
271
+ }
232
272
  return;
233
273
  }
234
274
 
235
- // The node isn't inactive yet. Restart a timer for the duration remaining for it to become inactive.
275
+ // The node is still active. Start the inactive timer for the remaining duration.
236
276
  const remainingDurationMs = this.inactiveTimeoutMs - unreferencedDurationMs;
237
- if (this.timer === undefined) {
238
- this.timer = new Timer(remainingDurationMs, () => { this._inactive = true; });
277
+ if (this.inactiveTimer === undefined) {
278
+ const inactiveTimeoutHandler = () => {
279
+ this._state = UnreferencedState.Inactive;
280
+ // After the node becomes inactive, start the sweep timer after which the node will be ready for sweep.
281
+ if (this.sweepTimeoutMs !== undefined) {
282
+ setLongTimeout(
283
+ this.sweepTimeoutMs - this.inactiveTimeoutMs,
284
+ () => { this._state = UnreferencedState.SweepReady; },
285
+ (timer) => { this.sweepTimer = timer; },
286
+ );
287
+ }
288
+ };
289
+ this.inactiveTimer = new Timer(remainingDurationMs, () => inactiveTimeoutHandler());
239
290
  }
240
- this.timer.restart(remainingDurationMs);
291
+ this.inactiveTimer.restart(remainingDurationMs);
241
292
  }
242
293
 
243
- /**
244
- * Stop tracking this node. Reset the unreferenced timer, if any, and reset inactive state.
245
- */
294
+ private clearTimers() {
295
+ this.inactiveTimer?.clear();
296
+ if (this.sweepTimer !== undefined) {
297
+ clearTimeout(this.sweepTimer);
298
+ }
299
+ }
300
+
301
+ /** Stop tracking this node. Reset the unreferenced timers and state, if any. */
246
302
  public stopTracking() {
247
- this.timer?.clear();
248
- this._inactive = false;
303
+ this.clearTimers();
304
+ this._state = UnreferencedState.Active;
249
305
  }
250
306
  }
251
307
 
@@ -271,11 +327,6 @@ export class GarbageCollector implements IGarbageCollector {
271
327
  return new GarbageCollector(createParams);
272
328
  }
273
329
 
274
- /**
275
- * The time in ms to expire a session for a client for gc.
276
- */
277
- private readonly sessionExpiryTimeoutMs: number | undefined;
278
-
279
330
  /**
280
331
  * Tells whether the GC state needs to be reset in the next summary. We need to do this if:
281
332
  * 1. GC was enabled and is now disabled. The GC state needs to be removed and everything becomes referenced.
@@ -359,8 +410,6 @@ export class GarbageCollector implements IGarbageCollector {
359
410
  private readonly initializeBaseStateP: Promise<void>;
360
411
  // The map of data store ids to their GC details in the base summary returned in getDataStoreGCDetails().
361
412
  private readonly baseGCDetailsP: Promise<Map<string, IGarbageCollectionDetailsBase>>;
362
- // The time after which an unreferenced node is inactive.
363
- private readonly inactiveTimeoutMs: number;
364
413
  // Map of node ids to their unreferenced state tracker.
365
414
  private readonly unreferencedNodesState: Map<string, UnreferencedStateTracker> = new Map();
366
415
  // The timeout responsible for closing the container when the session has expired
@@ -370,7 +419,7 @@ export class GarbageCollector implements IGarbageCollector {
370
419
  // per event per node.
371
420
  private readonly loggedUnreferencedEvents: Set<string> = new Set();
372
421
  // Queue for unreferenced events that should be logged the next time GC runs.
373
- private readonly pendingEventsQueue: IUnreferencedEvent[] = [];
422
+ private pendingEventsQueue: IUnreferencedEventProps[] = [];
374
423
 
375
424
  // The number of times GC has successfully completed on this instance of GarbageCollector.
376
425
  private completedRuns = 0;
@@ -379,8 +428,15 @@ export class GarbageCollector implements IGarbageCollector {
379
428
  private readonly gcOptions: IGCRuntimeOptions;
380
429
  private readonly isSummarizerClient: boolean;
381
430
 
431
+ /** The time in ms to expire a session for a client for gc. */
432
+ private readonly sessionExpiryTimeoutMs: number | undefined;
433
+ /** The time after which an unreferenced node is inactive. */
434
+ private readonly inactiveTimeoutMs: number;
435
+ /** The time after which an unreferenced node is ready to be swept. */
436
+ private readonly sweepTimeoutMs: number | undefined;
437
+
382
438
  /** For a given node path, returns the node's package path. */
383
- private readonly getNodePackagePath: (nodePath: string) => readonly string[] | undefined;
439
+ private readonly getNodePackagePath: (nodePath: string) => Promise<readonly string[] | undefined>;
384
440
  /** Returns the timestamp of the last summary generated for this container. */
385
441
  private readonly getLastSummaryTimestampMs: () => number | undefined;
386
442
 
@@ -434,24 +490,28 @@ export class GarbageCollector implements IGarbageCollector {
434
490
  }
435
491
  }
436
492
 
437
- // If session expiry is enabled, we need to close the container when the timeout expires
438
- if (this.sessionExpiryTimeoutMs !== undefined
439
- && this.mc.config.getBoolean(disableSessionExpiryKey) !== true) {
440
- // If Test Override config is set, override Session Expiry timeout
493
+ // If session expiry is enabled, we need to close the container when the session expiry timeout expires.
494
+ if (this.sessionExpiryTimeoutMs !== undefined && this.mc.config.getBoolean(disableSessionExpiryKey) !== true) {
495
+ // If Test Override config is set, override Session Expiry timeout.
441
496
  const overrideSessionExpiryTimeoutMs =
442
497
  this.mc.config.getNumber("Fluid.GarbageCollection.TestOverride.SessionExpiryMs");
443
- if (overrideSessionExpiryTimeoutMs !== undefined) {
444
- this.sessionExpiryTimeoutMs = overrideSessionExpiryTimeoutMs;
445
- }
498
+ const timeoutMs = overrideSessionExpiryTimeoutMs ?? this.sessionExpiryTimeoutMs;
446
499
 
447
- const timeoutMs = this.sessionExpiryTimeoutMs;
448
- setLongTimeout(timeoutMs,
449
- () => {
450
- this.runtime.closeFn(new ClientSessionExpiredError(`Client session expired.`, timeoutMs));
451
- },
452
- (timer) => {
453
- this.sessionExpiryTimer = timer;
454
- });
500
+ setLongTimeout(
501
+ timeoutMs,
502
+ () => { this.runtime.closeFn(new ClientSessionExpiredError(`Client session expired.`, timeoutMs)); },
503
+ (timer) => { this.sessionExpiryTimer = timer; },
504
+ );
505
+
506
+ /**
507
+ * Sweep timeout is the time after which unreferenced content can be swept.
508
+ * Sweep timeout = session expiry timeout + snapshot cache expiry timeout + one day buffer. The buffer is
509
+ * added to account for any clock skew. We use server timestamps throughout so the skew should be minimal
510
+ * but make it one day to be safe.
511
+ */
512
+ if (createParams.snapshotCacheExpiryMs !== undefined) {
513
+ this.sweepTimeoutMs = this.sessionExpiryTimeoutMs + createParams.snapshotCacheExpiryMs + oneDayMs;
514
+ }
455
515
  }
456
516
 
457
517
  // For existing document, the latest summary is the one that we loaded from. So, use its GC version as the
@@ -471,24 +531,29 @@ export class GarbageCollector implements IGarbageCollector {
471
531
  && !this.gcOptions.disableGC
472
532
  );
473
533
 
474
- this.trackGCState = this.mc.config.getBoolean(trackGCStateKey) === true;
475
-
476
534
  /**
477
535
  * Whether sweep should run or not. The following conditions have to be met to run sweep:
478
536
  * 1. Overall GC or mark phase must be enabled (this.shouldRunGC).
479
- * 2. Session expiry and sweep should be enabled for this container. Without session expiry we cannot safely
480
- * delete unreferenced objects. This condition (#2) can be overridden via runSweepKey feature flag.
537
+ * 2. Sweep timeout should be available. Without this, we wouldn't know when an object should be deleted.
538
+ * 3. Sweep should be enabled for this container (this.sweepEnabled). This can be overridden via runSweep
539
+ * feature flag.
481
540
  */
482
- this.shouldRunSweep = this.shouldRunGC && (
483
- this.mc.config.getBoolean(runSweepKey) ?? (this.sessionExpiryTimeoutMs !== undefined && this.sweepEnabled)
484
- );
541
+ this.shouldRunSweep = this.shouldRunGC
542
+ && this.sweepTimeoutMs !== undefined
543
+ && (this.mc.config.getBoolean(runSweepKey) ?? this.sweepEnabled);
544
+
545
+ this.trackGCState = this.mc.config.getBoolean(trackGCStateKey) === true;
485
546
 
486
547
  // Override inactive timeout if test config or gc options to override it is set.
487
- this.inactiveTimeoutMs =
488
- this.mc.config.getNumber("Fluid.GarbageCollection.TestOverride.InactiveTimeoutMs") ??
548
+ this.inactiveTimeoutMs = this.mc.config.getNumber("Fluid.GarbageCollection.TestOverride.InactiveTimeoutMs") ??
489
549
  this.gcOptions.inactiveTimeoutMs ??
490
550
  defaultInactiveTimeoutMs;
491
551
 
552
+ // Inactive timeout must be greater than sweep timeout since a node goes from active -> inactive -> sweep ready.
553
+ if (this.sweepTimeoutMs !== undefined && this.inactiveTimeoutMs > this.sweepTimeoutMs) {
554
+ throw new UsageError("inactive timeout should not be greated than the sweep timeout");
555
+ }
556
+
492
557
  // Whether we are running in test mode. In this mode, unreferenced nodes are immediately deleted.
493
558
  this.testMode = this.mc.config.getBoolean(gcTestModeKey) ?? this.gcOptions.runGCInTestMode === true;
494
559
 
@@ -589,6 +654,7 @@ export class GarbageCollector implements IGarbageCollector {
589
654
  new UnreferencedStateTracker(
590
655
  nodeData.unreferencedTimestampMs,
591
656
  this.inactiveTimeoutMs,
657
+ this.sweepTimeoutMs,
592
658
  currentReferenceTimestampMs,
593
659
  ),
594
660
  );
@@ -641,6 +707,7 @@ export class GarbageCollector implements IGarbageCollector {
641
707
  sessionExpiry: this.sessionExpiryTimeoutMs,
642
708
  inactiveTimeout: this.inactiveTimeoutMs,
643
709
  existing: createParams.existing,
710
+ trackGCState: this.trackGCState,
644
711
  ...this.gcOptions,
645
712
  });
646
713
  if (this.isSummarizerClient) {
@@ -678,7 +745,6 @@ export class GarbageCollector implements IGarbageCollector {
678
745
  },
679
746
  ): Promise<IGCStats> {
680
747
  const {
681
- runSweep = this.shouldRunSweep,
682
748
  fullGC = this.gcOptions.runFullGC === true || this.summaryStateNeedsReset,
683
749
  } = options;
684
750
 
@@ -687,42 +753,56 @@ export class GarbageCollector implements IGarbageCollector {
687
753
  : this.mc.logger;
688
754
 
689
755
  return PerformanceEvent.timedExecAsync(logger, { eventName: "GarbageCollection" }, async (event) => {
690
- await this.initializeBaseStateP;
691
-
692
- // Let the runtime update its pending state before GC runs.
693
- await this.runtime.updateStateBeforeGC();
756
+ await this.runPreGCSteps();
694
757
 
695
758
  // Get the runtime's GC data and run GC on the reference graph in it.
696
759
  const gcData = await this.runtime.getGCData(fullGC);
697
760
  const gcResult = runGarbageCollection(gcData.gcNodes, ["/"]);
698
- const gcStats = this.generateStatsAndLogEvents(gcResult, logger);
699
761
 
700
- // Update the state since the last GC run. There can be nodes that were referenced between the last and
701
- // the current run. We need to identify than and update their unreferenced state if needed.
702
- this.updateStateSinceLastRun(gcData, logger);
762
+ const gcStats = await this.runPostGCSteps(gcData, gcResult, logger);
763
+ event.end({ ...gcStats });
764
+ this.completedRuns++;
765
+ return gcStats;
766
+ }, { end: true, cancel: "error" });
767
+ }
703
768
 
704
- // Update the current state of the system based on the GC run.
705
- const currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs();
706
- this.updateCurrentState(gcData, gcResult, currentReferenceTimestampMs);
769
+ private async runPreGCSteps() {
770
+ // Ensure that base state has been initialized.
771
+ await this.initializeBaseStateP;
772
+ // Let the runtime update its pending state before GC runs.
773
+ await this.runtime.updateStateBeforeGC();
774
+ }
707
775
 
708
- this.runtime.updateUsedRoutes(gcResult.referencedNodeIds, currentReferenceTimestampMs);
776
+ private async runPostGCSteps(gcData: IGarbageCollectionData, gcResult: IGCResult, logger: ITelemetryLogger) {
777
+ // Generate statistics from the current run. This is done before updating the current state because it
778
+ // generates some of its data based on previous state of the system.
779
+ const gcStats = this.generateStats(gcResult);
709
780
 
710
- if (runSweep) {
711
- // Placeholder for running sweep logic.
712
- }
781
+ // Update the state since the last GC run. There can be nodes that were referenced between the last and
782
+ // the current run. We need to identify than and update their unreferenced state if needed.
783
+ this.updateStateSinceLastRun(gcData, logger);
713
784
 
714
- // If we are running in GC test mode, delete objects for unused routes. This enables testing scenarios
715
- // involving access to deleted data.
716
- if (this.testMode) {
717
- this.runtime.deleteUnusedRoutes(gcResult.deletedNodeIds);
718
- }
785
+ // Update the current state and update the runtime of all routes or ids that used as per the GC run.
786
+ const currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs();
787
+ this.updateCurrentState(gcData, gcResult, currentReferenceTimestampMs);
788
+ this.runtime.updateUsedRoutes(gcResult.referencedNodeIds, currentReferenceTimestampMs);
719
789
 
720
- event.end({ ...gcStats });
790
+ // Log events for objects that are ready to be deleted by sweep. When we have sweep enabled, we will
791
+ // delete these objects here instead.
792
+ this.logSweepEvents(logger, currentReferenceTimestampMs);
721
793
 
722
- this.completedRuns++;
794
+ // If we are running in GC test mode, delete objects for unused routes. This enables testing scenarios
795
+ // involving access to deleted data.
796
+ if (this.testMode) {
797
+ this.runtime.deleteUnusedRoutes(gcResult.deletedNodeIds);
798
+ }
723
799
 
724
- return gcStats;
725
- }, { end: true, cancel: "error" });
800
+ // Log pending unreferenced events such as a node being used after inactive. This is done after GC runs and
801
+ // updates its state so that we don't send false positives based on intermediate state. For example, we may get
802
+ // reference to an unreferenced node from another unreferenced node which means the node wasn't revived.
803
+ await this.logUnreferencedEvents(logger);
804
+
805
+ return gcStats;
726
806
  }
727
807
 
728
808
  /**
@@ -863,13 +943,18 @@ export class GarbageCollector implements IGarbageCollector {
863
943
  return;
864
944
  }
865
945
 
866
- this.logIfInactive(
867
- reason,
868
- nodePath,
869
- timestampMs,
870
- packagePath,
871
- requestHeaders,
872
- );
946
+ const nodeStateTracker = this.unreferencedNodesState.get(nodePath);
947
+ if (nodeStateTracker && nodeStateTracker.state !== UnreferencedState.Active) {
948
+ this.inactiveNodeUsed(
949
+ reason,
950
+ nodePath,
951
+ nodeStateTracker,
952
+ undefined /* fromNodeId */,
953
+ packagePath,
954
+ timestampMs,
955
+ requestHeaders,
956
+ );
957
+ }
873
958
  }
874
959
 
875
960
  /**
@@ -888,11 +973,10 @@ export class GarbageCollector implements IGarbageCollector {
888
973
  outboundRoutes.push(toNodePath);
889
974
  this.newReferencesSinceLastRun.set(fromNodePath, outboundRoutes);
890
975
 
891
- // If the node that got referenced is inactive, log an event as that may indicate use-after-delete.
892
- this.logIfInactive(
893
- "Revived",
894
- toNodePath,
895
- );
976
+ const nodeStateTracker = this.unreferencedNodesState.get(toNodePath);
977
+ if (nodeStateTracker && nodeStateTracker.state !== UnreferencedState.Active) {
978
+ this.inactiveNodeUsed("Revived", toNodePath, nodeStateTracker, fromNodePath);
979
+ }
896
980
  }
897
981
 
898
982
  public dispose(): void {
@@ -952,6 +1036,7 @@ export class GarbageCollector implements IGarbageCollector {
952
1036
  new UnreferencedStateTracker(
953
1037
  currentReferenceTimestampMs,
954
1038
  this.inactiveTimeoutMs,
1039
+ this.sweepTimeoutMs,
955
1040
  currentReferenceTimestampMs,
956
1041
  ),
957
1042
  );
@@ -1009,10 +1094,10 @@ export class GarbageCollector implements IGarbageCollector {
1009
1094
  * references added new outbound references before getting deleted, we need to detect them.
1010
1095
  * 2. We need new outbound references since last run because some of them may have been deleted later. If those
1011
1096
  * references added new outbound references before getting deleted, we need to detect them.
1012
- * 3. We need data from the current run because currently we may not detect when DDSs are referenced:
1013
- * - We don't require DDSs handles to be stored in a referenced DDS. For this, we need GC at DDS level
1097
+ * 3. We need data from the current run because currently we may not detect when DDSes are referenced:
1098
+ * - We don't require DDSes handles to be stored in a referenced DDS. For this, we need GC at DDS level
1014
1099
  * which is tracked by https://github.com/microsoft/FluidFramework/issues/8470.
1015
- * - A new data store may have "root" DDSs already created and we don't detect them today.
1100
+ * - A new data store may have "root" DDSes already created and we don't detect them today.
1016
1101
  */
1017
1102
  const gcDataSuperSet = concatGarbageCollectionData(this.previousGCDataFromLastRun, currentGCData);
1018
1103
  this.newReferencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
@@ -1093,25 +1178,11 @@ export class GarbageCollector implements IGarbageCollector {
1093
1178
  }
1094
1179
 
1095
1180
  /**
1096
- * Generates the stats of a garbage collection run from the given results of the run. Also, logs any pending events
1097
- * in the pendingEventsQueue. This should be called before updating the current state because it generates stats
1098
- * based on previous state of the system.
1181
+ * Generates the stats of a garbage collection run from the given results of the run.
1099
1182
  * @param gcResult - The result of a GC run.
1100
1183
  * @returns the GC stats of the GC run.
1101
1184
  */
1102
- private generateStatsAndLogEvents(gcResult: IGCResult, logger: ITelemetryLogger): IGCStats {
1103
- // Log pending events for unreferenced nodes after GC has run. We should have the package data available for
1104
- // them now since the GC run should have loaded these nodes.
1105
- let event = this.pendingEventsQueue.shift();
1106
- while (event !== undefined) {
1107
- const pkg = this.getNodePackagePath(event.id);
1108
- logger.sendErrorEvent({
1109
- ...event,
1110
- pkg: pkg ? { value: `/${pkg.join("/")}`, tag: TelemetryDataTag.PackageData } : undefined,
1111
- });
1112
- event = this.pendingEventsQueue.shift();
1113
- }
1114
-
1185
+ private generateStats(gcResult: IGCResult): IGCStats {
1115
1186
  const gcStats: IGCStats = {
1116
1187
  nodeCount: 0,
1117
1188
  dataStoreCount: 0,
@@ -1169,18 +1240,66 @@ export class GarbageCollector implements IGarbageCollector {
1169
1240
  }
1170
1241
 
1171
1242
  /**
1172
- * Logs an event if a node is inactive and is used.
1243
+ * For nodes that are ready to sweep, log an event for now. Until we start running sweep which deletes objects,
1244
+ * this will give us a view into how much deleted content a container has.
1245
+ */
1246
+ private logSweepEvents(logger: ITelemetryLogger, currentReferenceTimestampMs?: number) {
1247
+ if (this.mc.config.getBoolean(disableSweepLogKey) === true
1248
+ || currentReferenceTimestampMs === undefined
1249
+ || this.sweepTimeoutMs === undefined) {
1250
+ return;
1251
+ }
1252
+
1253
+ this.unreferencedNodesState.forEach((nodeStateTracker, nodeId) => {
1254
+ if (nodeStateTracker.state !== UnreferencedState.SweepReady) {
1255
+ return;
1256
+ }
1257
+
1258
+ const nodeType = this.runtime.getNodeType(nodeId);
1259
+ if (nodeType !== GCNodeType.DataStore && nodeType !== GCNodeType.Blob) {
1260
+ return;
1261
+ }
1262
+
1263
+ // Log deleted event for each node only once to reduce noise in telemetry.
1264
+ const uniqueEventId = `Deleted-${nodeId}`;
1265
+ if (this.loggedUnreferencedEvents.has(uniqueEventId)) {
1266
+ return;
1267
+ }
1268
+ this.loggedUnreferencedEvents.add(uniqueEventId);
1269
+ logger.sendTelemetryEvent({
1270
+ eventName: "GCObjectDeleted",
1271
+ id: nodeId,
1272
+ type: nodeType,
1273
+ age: currentReferenceTimestampMs - nodeStateTracker.unreferencedTimestampMs,
1274
+ timeout: this.sweepTimeoutMs,
1275
+ completedGCRuns: this.completedRuns,
1276
+ lastSummaryTime: this.getLastSummaryTimestampMs(),
1277
+ });
1278
+ });
1279
+ }
1280
+
1281
+ /**
1282
+ * Called when an inactive node is used after. Queue up an event that will be logged next time GC runs.
1173
1283
  */
1174
- private logIfInactive(
1175
- eventType: "Changed" | "Loaded" | "Revived",
1284
+ private inactiveNodeUsed(
1285
+ usageType: "Changed" | "Loaded" | "Revived",
1176
1286
  nodeId: string,
1177
- currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs(),
1287
+ nodeStateTracker: UnreferencedStateTracker,
1288
+ fromNodeId?: string,
1178
1289
  packagePath?: readonly string[],
1290
+ currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs(),
1179
1291
  requestHeaders?: IRequestHeader,
1180
1292
  ) {
1181
1293
  // If there is no reference timestamp to work with, no ops have been processed after creation. If so, skip
1182
1294
  // logging as nothing interesting would have happened worth logging.
1183
- if (currentReferenceTimestampMs === undefined) {
1295
+ // If the node is active, skip logging.
1296
+ if (currentReferenceTimestampMs === undefined || nodeStateTracker.state === UnreferencedState.Active) {
1297
+ return;
1298
+ }
1299
+
1300
+ // For non-summarizer clients, only log "Loaded" type events since these objects may not be loaded in the
1301
+ // summarizer clients if they are based off of user actions (such as scrolling to content for these objects).
1302
+ if (!this.isSummarizerClient && usageType !== "Loaded") {
1184
1303
  return;
1185
1304
  }
1186
1305
 
@@ -1191,43 +1310,65 @@ export class GarbageCollector implements IGarbageCollector {
1191
1310
  return;
1192
1311
  }
1193
1312
 
1194
- // For non-summarizer clients, only log "Loaded" type events since these objects may not be loaded in the
1195
- // summarizer clients if they are based off of user actions (such as scrolling to content for these objects).
1196
- if (!this.isSummarizerClient && eventType !== "Loaded") {
1313
+ const state = nodeStateTracker.state;
1314
+ const uniqueEventId = `${state}-${nodeId}-${usageType}`;
1315
+ if (this.loggedUnreferencedEvents.has(uniqueEventId)) {
1197
1316
  return;
1198
1317
  }
1318
+ this.loggedUnreferencedEvents.add(uniqueEventId);
1319
+
1320
+ const propsToLog = {
1321
+ id: nodeId,
1322
+ type: nodeType,
1323
+ unrefTime: nodeStateTracker.unreferencedTimestampMs,
1324
+ age: currentReferenceTimestampMs - nodeStateTracker.unreferencedTimestampMs,
1325
+ timeout: nodeStateTracker.state === UnreferencedState.Inactive
1326
+ ? this.inactiveTimeoutMs
1327
+ : this.sweepTimeoutMs,
1328
+ completedGCRuns: this.completedRuns,
1329
+ lastSummaryTime: this.getLastSummaryTimestampMs(),
1330
+ externalRequest: requestHeaders?.[RuntimeHeaders.externalRequest],
1331
+ viaHandle: requestHeaders?.[RuntimeHeaders.viaHandle],
1332
+ fromId: fromNodeId,
1333
+ };
1199
1334
 
1200
- const eventName = `inactiveObject_${eventType}`;
1201
- // We log a particular event for a given node only once so that it is not too noisy.
1202
- const uniqueEventId = `${nodeId}-${eventName}`;
1203
- const nodeState = this.unreferencedNodesState.get(nodeId);
1204
- if (nodeState?.inactive && !this.loggedUnreferencedEvents.has(uniqueEventId)) {
1205
- this.loggedUnreferencedEvents.add(uniqueEventId);
1206
- // Save all the properties at this point in time so that if we log this later, these values are preserved.
1207
- const event: IUnreferencedEvent = {
1208
- eventName,
1209
- id: nodeId,
1210
- type: nodeType,
1211
- age: currentReferenceTimestampMs - nodeState.unreferencedTimestampMs,
1212
- timeout: this.inactiveTimeoutMs,
1213
- completedGCRuns: this.completedRuns,
1214
- lastSummaryTime: this.getLastSummaryTimestampMs(),
1215
- externalRequest: requestHeaders?.[RuntimeHeaders.externalRequest],
1216
- viaHandle: requestHeaders?.[RuntimeHeaders.viaHandle],
1217
- };
1335
+ // For summarizer client, queue the event so it is logged the next time GC runs if the event is still valid.
1336
+ // For non-summarizer client, log the event now since GC won't run on it. This may result in false positives
1337
+ // but it's a good signal nonetheless and we can consume it with a grain of salt.
1338
+ if (this.isSummarizerClient) {
1339
+ this.pendingEventsQueue.push({ ...propsToLog, usageType, state });
1340
+ } else {
1341
+ this.mc.logger.sendErrorEvent({
1342
+ ...propsToLog,
1343
+ eventName: `${state}Object_${usageType}`,
1344
+ pkg: packagePath ? { value: packagePath.join("/"), tag: TelemetryDataTag.CodeArtifact } : undefined,
1345
+ });
1346
+ }
1347
+ }
1218
1348
 
1219
- // If the package data for the node exists, log immediately. Otherwise, queue it and it will be logged the
1220
- // next time GC runs as the package data should be available then.
1221
- const pkg = packagePath ?? this.getNodePackagePath(nodeId);
1222
- if (pkg !== undefined) {
1223
- this.mc.logger.sendErrorEvent({
1224
- ...event,
1225
- pkg: { value: pkg.join("/"), tag: TelemetryDataTag.PackageData },
1349
+ private async logUnreferencedEvents(logger: ITelemetryLogger) {
1350
+ for (const eventProps of this.pendingEventsQueue) {
1351
+ const { usageType, state, ...propsToLog } = eventProps;
1352
+ /**
1353
+ * Revived event is logged only if the node is active. If the node is not active, the reference to it was
1354
+ * from another unreferenced node and this scenario is not interesting to log.
1355
+ * Loaded and Changed events are logged only if the node is not active. If the node is active, it was
1356
+ * revived and a Revived event will be logged for it.
1357
+ */
1358
+ const nodeStateTracker = this.unreferencedNodesState.get(eventProps.id);
1359
+ const active = nodeStateTracker === undefined || nodeStateTracker.state === UnreferencedState.Active;
1360
+ if ((usageType === "Revived") === active) {
1361
+ const pkg = await this.getNodePackagePath(eventProps.id);
1362
+ const fromPkg = eventProps.fromId ? await this.getNodePackagePath(eventProps.fromId) : undefined;
1363
+ logger.sendErrorEvent({
1364
+ ...propsToLog,
1365
+ eventName: `${state}Object_${usageType}`,
1366
+ pkg: pkg ? { value: pkg.join("/"), tag: TelemetryDataTag.CodeArtifact } : undefined,
1367
+ fromPkg: fromPkg ? { value: fromPkg.join("/"), tag: TelemetryDataTag.CodeArtifact } : undefined,
1226
1368
  });
1227
- } else {
1228
- this.pendingEventsQueue.push(event);
1229
1369
  }
1230
1370
  }
1371
+ this.pendingEventsQueue = [];
1231
1372
  }
1232
1373
  }
1233
1374