@fluidframework/container-runtime 2.0.0-internal.7.4.5 → 2.0.0-internal.7.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/blobManager.d.ts.map +1 -1
  2. package/dist/blobManager.js +0 -5
  3. package/dist/blobManager.js.map +1 -1
  4. package/dist/containerRuntime.d.ts.map +1 -1
  5. package/dist/containerRuntime.js +0 -3
  6. package/dist/containerRuntime.js.map +1 -1
  7. package/dist/dataStores.d.ts +1 -1
  8. package/dist/dataStores.d.ts.map +1 -1
  9. package/dist/dataStores.js +2 -5
  10. package/dist/dataStores.js.map +1 -1
  11. package/dist/gc/garbageCollection.d.ts +15 -14
  12. package/dist/gc/garbageCollection.d.ts.map +1 -1
  13. package/dist/gc/garbageCollection.js +115 -125
  14. package/dist/gc/garbageCollection.js.map +1 -1
  15. package/dist/gc/gcConfigs.d.ts.map +1 -1
  16. package/dist/gc/gcConfigs.js +10 -3
  17. package/dist/gc/gcConfigs.js.map +1 -1
  18. package/dist/gc/gcDefinitions.d.ts +11 -8
  19. package/dist/gc/gcDefinitions.d.ts.map +1 -1
  20. package/dist/gc/gcDefinitions.js +7 -4
  21. package/dist/gc/gcDefinitions.js.map +1 -1
  22. package/dist/gc/gcSummaryStateTracker.d.ts +1 -1
  23. package/dist/gc/gcSummaryStateTracker.d.ts.map +1 -1
  24. package/dist/gc/gcSummaryStateTracker.js +3 -0
  25. package/dist/gc/gcSummaryStateTracker.js.map +1 -1
  26. package/dist/gc/gcTelemetry.d.ts.map +1 -1
  27. package/dist/gc/gcTelemetry.js +4 -14
  28. package/dist/gc/gcTelemetry.js.map +1 -1
  29. package/dist/gc/index.d.ts +1 -1
  30. package/dist/gc/index.d.ts.map +1 -1
  31. package/dist/gc/index.js +2 -2
  32. package/dist/gc/index.js.map +1 -1
  33. package/dist/packageVersion.d.ts +1 -1
  34. package/dist/packageVersion.js +1 -1
  35. package/dist/packageVersion.js.map +1 -1
  36. package/lib/blobManager.d.ts.map +1 -1
  37. package/lib/blobManager.js +0 -5
  38. package/lib/blobManager.js.map +1 -1
  39. package/lib/containerRuntime.d.ts.map +1 -1
  40. package/lib/containerRuntime.js +0 -3
  41. package/lib/containerRuntime.js.map +1 -1
  42. package/lib/dataStores.d.ts +1 -1
  43. package/lib/dataStores.d.ts.map +1 -1
  44. package/lib/dataStores.js +2 -5
  45. package/lib/dataStores.js.map +1 -1
  46. package/lib/gc/garbageCollection.d.ts +15 -14
  47. package/lib/gc/garbageCollection.d.ts.map +1 -1
  48. package/lib/gc/garbageCollection.js +116 -126
  49. package/lib/gc/garbageCollection.js.map +1 -1
  50. package/lib/gc/gcConfigs.d.ts.map +1 -1
  51. package/lib/gc/gcConfigs.js +11 -4
  52. package/lib/gc/gcConfigs.js.map +1 -1
  53. package/lib/gc/gcDefinitions.d.ts +11 -8
  54. package/lib/gc/gcDefinitions.d.ts.map +1 -1
  55. package/lib/gc/gcDefinitions.js +6 -3
  56. package/lib/gc/gcDefinitions.js.map +1 -1
  57. package/lib/gc/gcSummaryStateTracker.d.ts +1 -1
  58. package/lib/gc/gcSummaryStateTracker.d.ts.map +1 -1
  59. package/lib/gc/gcSummaryStateTracker.js +3 -0
  60. package/lib/gc/gcSummaryStateTracker.js.map +1 -1
  61. package/lib/gc/gcTelemetry.d.ts.map +1 -1
  62. package/lib/gc/gcTelemetry.js +4 -14
  63. package/lib/gc/gcTelemetry.js.map +1 -1
  64. package/lib/gc/index.d.ts +1 -1
  65. package/lib/gc/index.d.ts.map +1 -1
  66. package/lib/gc/index.js +1 -1
  67. package/lib/gc/index.js.map +1 -1
  68. package/lib/packageVersion.d.ts +1 -1
  69. package/lib/packageVersion.js +1 -1
  70. package/lib/packageVersion.js.map +1 -1
  71. package/package.json +16 -16
  72. package/src/blobManager.ts +0 -6
  73. package/src/containerRuntime.ts +0 -3
  74. package/src/dataStores.ts +2 -5
  75. package/src/gc/garbageCollection.ts +136 -142
  76. package/src/gc/gcConfigs.ts +13 -3
  77. package/src/gc/gcDefinitions.ts +12 -8
  78. package/src/gc/gcSummaryStateTracker.ts +5 -1
  79. package/src/gc/gcTelemetry.ts +4 -13
  80. package/src/gc/index.ts +1 -1
  81. package/src/packageVersion.ts +1 -1
@@ -3,7 +3,7 @@
3
3
  * Licensed under the MIT License.
4
4
  */
5
5
 
6
- import { LazyPromise, Timer } from "@fluidframework/core-utils";
6
+ import { assert, LazyPromise, Timer } from "@fluidframework/core-utils";
7
7
  import { IRequest } from "@fluidframework/core-interfaces";
8
8
  import {
9
9
  gcTreeKey,
@@ -21,6 +21,7 @@ import {
21
21
  MonitoringContext,
22
22
  PerformanceEvent,
23
23
  } from "@fluidframework/telemetry-utils";
24
+ import { BlobManager } from "../blobManager";
24
25
  import {
25
26
  InactiveResponseHeaderKey,
26
27
  RuntimeHeaderData,
@@ -141,8 +142,6 @@ export class GarbageCollector implements IGarbageCollector {
141
142
  ) => Promise<readonly string[] | undefined>;
142
143
  /** Returns the timestamp of the last summary generated for this container. */
143
144
  private readonly getLastSummaryTimestampMs: () => number | undefined;
144
- /** Returns true if connection is active, i.e. it's "write" connection and the runtime is connected. */
145
- private readonly activeConnection: () => boolean;
146
145
 
147
146
  private readonly submitMessage: (message: ContainerRuntimeGCMessage) => void;
148
147
 
@@ -160,7 +159,6 @@ export class GarbageCollector implements IGarbageCollector {
160
159
  this.isSummarizerClient = createParams.isSummarizerClient;
161
160
  this.getNodePackagePath = createParams.getNodePackagePath;
162
161
  this.getLastSummaryTimestampMs = createParams.getLastSummaryTimestampMs;
163
- this.activeConnection = createParams.activeConnection;
164
162
  this.submitMessage = createParams.submitMessage;
165
163
 
166
164
  const baseSnapshot = createParams.baseSnapshot;
@@ -255,28 +253,18 @@ export class GarbageCollector implements IGarbageCollector {
255
253
  );
256
254
 
257
255
  /**
258
- * Set up the initializer which initializes the GC state from the data in base snapshot. This is done when
259
- * connected in write mode or when GC runs the first time. It sets up all unreferenced nodes from the base
260
- * GC state and updates their inactive or sweep-ready state.
256
+ * Set up the initializer which initializes the GC state from the data in base snapshot. It sets up GC data
257
+ * from the base GC state and starts tracking the state of unreferenced nodes.
258
+ *
259
+ * Must only be called if there is a current reference timestamp.
261
260
  */
262
261
  this.initializeGCStateFromBaseSnapshotP = new LazyPromise<void>(async () => {
263
- /**
264
- * If there is no current reference timestamp, skip initialization. We need the current timestamp to track
265
- * how long objects have been unreferenced and if they can be deleted.
266
- *
267
- * Note that the only scenario where there is no reference timestamp is when no ops have ever been processed
268
- * for this container and it is in read mode. In this scenario, there is no point in running GC anyway
269
- * because references in the container do not change without any ops, i.e., there is nothing to collect.
270
- */
271
262
  const currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs();
272
- if (currentReferenceTimestampMs === undefined) {
273
- // Log an event so we can evaluate how often we run into this scenario.
274
- this.mc.logger.sendErrorEvent({
275
- eventName: "GarbageCollectorInitializedWithoutTimestamp",
276
- gcConfigs: JSON.stringify(this.configs),
277
- });
278
- return;
279
- }
263
+ assert(
264
+ currentReferenceTimestampMs !== undefined,
265
+ "Trying to initialize GC state without current timestamp",
266
+ );
267
+
280
268
  /**
281
269
  * The base snapshot data will not be present if the container is loaded from:
282
270
  * 1. The first summary created by the detached container.
@@ -284,11 +272,31 @@ export class GarbageCollector implements IGarbageCollector {
284
272
  * 3. A summary that was generated before GC even existed.
285
273
  */
286
274
  const baseSnapshotData = await this.baseSnapshotDataP;
287
- if (baseSnapshotData === undefined) {
275
+ this.summaryStateTracker.initializeBaseState(baseSnapshotData);
276
+
277
+ if (baseSnapshotData?.gcState === undefined) {
288
278
  return;
289
279
  }
290
- this.updateStateFromSnapshotData(baseSnapshotData, currentReferenceTimestampMs);
291
- this.summaryStateTracker.initializeBaseState(baseSnapshotData);
280
+
281
+ // Update unreferenced state tracking as per the GC state in the snapshot data and update gcDataFromLastRun
282
+ // to the GC data from the snapshot data.
283
+ const gcNodes: { [id: string]: string[] } = {};
284
+ for (const [nodeId, nodeData] of Object.entries(baseSnapshotData.gcState.gcNodes)) {
285
+ if (nodeData.unreferencedTimestampMs !== undefined) {
286
+ this.unreferencedNodesState.set(
287
+ nodeId,
288
+ new UnreferencedStateTracker(
289
+ nodeData.unreferencedTimestampMs,
290
+ this.configs.inactiveTimeoutMs,
291
+ currentReferenceTimestampMs,
292
+ this.configs.sweepTimeoutMs,
293
+ this.configs.sweepGracePeriodMs,
294
+ ),
295
+ );
296
+ }
297
+ gcNodes[nodeId] = Array.from(nodeData.outboundRoutes);
298
+ }
299
+ this.gcDataFromLastRun = { gcNodes };
292
300
  });
293
301
 
294
302
  // Get the GC details from the GC state in the base summary. This is returned in getBaseGCDetails which is
@@ -322,8 +330,10 @@ export class GarbageCollector implements IGarbageCollector {
322
330
  }
323
331
 
324
332
  /**
325
- * Called during container initialization. Initialize from the tombstone state in the base snapshot. This is done
326
- * during initialization so that deleted or tombstoned objects are marked as such before they are loaded or used.
333
+ * Called during container initialization. Initializes the tombstone and deleted nodes state from the base snapshot.
334
+ * Also, initializes the GC state including unreferenced nodes tracking if a current reference timestamp exists.
335
+ * Note that if there is any GC state in the base snapshot, then there will definitely be a reference timestamp
336
+ * to work with - The GC state would have been generated using a timestamp which is part of the snapshot.
327
337
  */
328
338
  public async initializeBaseState(): Promise<void> {
329
339
  const baseSnapshotData = await this.baseSnapshotDataP;
@@ -350,115 +360,59 @@ export class GarbageCollector implements IGarbageCollector {
350
360
  this.tombstones = Array.from(baseSnapshotData.tombstones);
351
361
  this.runtime.updateTombstonedRoutes(this.tombstones);
352
362
  }
363
+
364
+ await this.initializeOrUpdateGCState();
353
365
  }
354
366
 
355
367
  /**
356
- * Update state from the given snapshot data. This is done during load and during refreshing state from a snapshot.
357
- * All current tracking is reset and updated from the data in the snapshot.
358
- * @param snapshotData - The snapshot data to update state from. If this is undefined, all GC state and tracking
359
- * is reset.
360
- * @param currentReferenceTimestampMs - The current reference timestamp for marking unreferenced nodes' unreferenced
361
- * timestamp.
368
+ * Initialize the GC state if not already initialized. If GC state is already initialized, update the unreferenced
369
+ * state tracking as per the current reference timestamp.
362
370
  */
363
- private updateStateFromSnapshotData(
364
- snapshotData: IGarbageCollectionSnapshotData | undefined,
365
- currentReferenceTimestampMs: number,
366
- ) {
367
- /**
368
- * Note: "newReferencesSinceLastRun" is not reset here. This is done because there may be references since the
369
- * snapshot that we are updating state from. For example, this client may have processed ops till seq#1000 and
370
- * its refreshing state from a summary that happened at seq#900. In this case, there may be references between
371
- * seq#901 and seq#1000 that we don't want to reset.
372
- * Unfortunately, there is no way to track the seq# of ops that add references, so we choose to not reset any
373
- * references here. This should be fine because, in the worst case, we may end up updating the unreferenced
374
- * timestamp of a node which will delay its deletion. Although not ideal, this will only happen in rare
375
- * scenarios, so it should be okay.
376
- */
377
-
378
- // Clear all existing unreferenced state tracking.
379
- for (const [, nodeStateTracker] of this.unreferencedNodesState) {
380
- nodeStateTracker.stopTracking();
381
- }
382
- this.unreferencedNodesState.clear();
383
-
384
- // If running sweep, the tombstone state represents the list of nodes that have been deleted during sweep.
385
- // If running in tombstone mode, the tombstone state represents the list of nodes that have been marked as
386
- // tombstones.
387
- // If this call is because we are refreshing from a snapshot due to an ack, it is likely that the GC state
388
- // in the snapshot is newer than this client's. And so, the deleted / tombstone nodes need to be updated.
389
- if (this.configs.shouldRunSweep) {
390
- const snapshotDeletedNodes = snapshotData?.deletedNodes
391
- ? new Set(snapshotData.deletedNodes)
392
- : undefined;
393
- // If the snapshot contains deleted nodes that are not yet deleted by this client, ask the runtime to
394
- // delete them.
395
- if (snapshotDeletedNodes !== undefined) {
396
- const newDeletedNodes: string[] = [];
397
- for (const nodeId of snapshotDeletedNodes) {
398
- if (!this.deletedNodes.has(nodeId)) {
399
- newDeletedNodes.push(nodeId);
400
- }
401
- }
402
- if (newDeletedNodes.length > 0) {
403
- // Call container runtime to delete these nodes and add deleted nodes to this.deletedNodes.
404
- }
405
- }
406
- } else if (this.configs.tombstoneMode) {
407
- // The snapshot may contain more or fewer tombstone nodes than this client. Update tombstone state and
408
- // notify the runtime to update its state as well.
409
- this.tombstones = snapshotData?.tombstones ? Array.from(snapshotData.tombstones) : [];
410
- this.runtime.updateTombstonedRoutes(this.tombstones);
371
+ private async initializeOrUpdateGCState() {
372
+ const currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs();
373
+ if (currentReferenceTimestampMs === undefined) {
374
+ return;
411
375
  }
412
376
 
413
- // If there is no snapshot data, it means this snapshot was generated with GC disabled. Unset all GC state.
414
- if (snapshotData?.gcState === undefined) {
415
- this.gcDataFromLastRun = undefined;
377
+ // If the GC state hasn't been initialized yet, initialize it and return.
378
+ if (this.gcDataFromLastRun === undefined) {
379
+ await this.initializeGCStateFromBaseSnapshotP;
416
380
  return;
417
381
  }
418
382
 
419
- // Update unreferenced state tracking as per the GC state in the snapshot data and update gcDataFromLastRun
420
- // to the GC data from the snapshot data.
421
- const gcNodes: { [id: string]: string[] } = {};
422
- for (const [nodeId, nodeData] of Object.entries(snapshotData.gcState.gcNodes)) {
423
- if (nodeData.unreferencedTimestampMs !== undefined) {
424
- this.unreferencedNodesState.set(
425
- nodeId,
426
- new UnreferencedStateTracker(
427
- nodeData.unreferencedTimestampMs,
428
- this.configs.inactiveTimeoutMs,
429
- currentReferenceTimestampMs,
430
- this.configs.sweepTimeoutMs,
431
- this.configs.sweepGracePeriodMs,
432
- ),
433
- );
434
- }
435
- gcNodes[nodeId] = Array.from(nodeData.outboundRoutes);
383
+ // If the GC state has been initialized, update the tracking of unreferenced nodes as per the current
384
+ // reference timestamp.
385
+ for (const [, nodeStateTracker] of this.unreferencedNodesState) {
386
+ nodeStateTracker.updateTracking(currentReferenceTimestampMs);
436
387
  }
437
- this.gcDataFromLastRun = { gcNodes };
438
388
  }
439
389
 
440
390
  /**
441
391
  * Called when the connection state of the runtime changes, i.e., it connects or disconnects. GC subscribes to this
442
- * to initialize the base state for non-summarizer clients so that they can track inactive / sweep-ready nodes.
392
+ * to initialize or update the unreference state tracking.
443
393
  * @param connected - Whether the runtime connected / disconnected.
444
394
  * @param clientId - The clientId of this runtime.
445
395
  */
446
396
  public setConnectionState(connected: boolean, clientId?: string | undefined): void {
447
397
  /**
448
- * For all clients, initialize the base state when the container becomes active, i.e., it transitions
449
- * to "write" mode. This will ensure that the container's own join op is processed and there is a recent
450
- * reference timestamp that will be used to update the state of unreferenced nodes. Also, all trailing ops which
451
- * could affect the GC state will have been processed.
452
- *
453
- * If GC is up-to-date for the client and the summarizing client, there will be an doubling of both
454
- * InactiveObject_Loaded and SweepReady_Loaded errors, as there will be one from the sending client and one from
455
- * the receiving summarizer client.
398
+ * When the client connects (or reconnects), attempt to initialize or update the GC state. This will keep
399
+ * the unreferenced state tracking updated as per the reference timestamp at the time of connection.
456
400
  *
457
- * Ideally, this initialization should only be done for summarizer client. However, we are currently rolling out
458
- * sweep in phases and we want to track when inactive and sweep-ready objects are used in any client.
401
+ * During GC initialization and during connections in read mode, it is possible that either no ops are
402
+ * processed or only trailing ops are processed. This means that the GC state is not initialized or initialized
403
+ * with an older reference timestamp. So, doing this on every connection will keep the unreferenced state
404
+ * tracking up-to-date.
459
405
  */
460
- if (this.activeConnection() && this.configs.shouldRunGC) {
461
- this.initializeGCStateFromBaseSnapshotP.catch((error) => {});
406
+ if (connected && this.configs.shouldRunGC) {
407
+ this.initializeOrUpdateGCState().catch((error) => {
408
+ this.mc.logger.sendErrorEvent(
409
+ {
410
+ eventName: "GCInitializationOrUpdateFailed",
411
+ gcConfigs: JSON.stringify(this.configs),
412
+ },
413
+ error,
414
+ );
415
+ });
462
416
  }
463
417
  }
464
418
 
@@ -536,8 +490,11 @@ export class GarbageCollector implements IGarbageCollector {
536
490
  const gcStats = await this.runGC(fullGC, currentReferenceTimestampMs, logger);
537
491
  event.end({
538
492
  ...gcStats,
539
- timestamp: currentReferenceTimestampMs,
540
- sweep: this.configs.shouldRunSweep,
493
+ details: {
494
+ timestamp: currentReferenceTimestampMs,
495
+ sweep: this.configs.shouldRunSweep,
496
+ tombstone: this.configs.throwOnTombstoneLoad,
497
+ },
541
498
  });
542
499
 
543
500
  /** Post-GC steps */
@@ -596,8 +553,9 @@ export class GarbageCollector implements IGarbageCollector {
596
553
  );
597
554
 
598
555
  // 4. Run the Sweep phase.
599
- // It will tombstone any tombstone-ready nodes, and initiate the deletion of sweep-ready nodes by sending a
600
- // sweep op. All clients, including this one, will delete these nodes once it processes the op.
556
+ // It will initiate the deletion (sending the GC Sweep op) of any sweep-ready nodes that are
557
+ // allowed to be deleted per config, and tombstone the rest along with the tombstone-ready nodes.
558
+ // Note that no nodes will be deleted until the GC Sweep op is processed.
601
559
  this.runSweepPhase(gcResult, tombstoneReadyNodeIds, sweepReadyNodeIds);
602
560
 
603
561
  this.gcDataFromLastRun = cloneGCData(gcData);
@@ -708,19 +666,34 @@ export class GarbageCollector implements IGarbageCollector {
708
666
  return;
709
667
  }
710
668
 
711
- // If sweep is disabled, we'll tombstone both tombstone-ready and sweep-ready nodes.
669
+ // We'll build up the lists of nodes to be either Tombstoned or Deleted
670
+ // based on the configuration and the nodes' current state.
671
+ // We must Tombstone any sweep-ready node that Sweep won't run for.
712
672
  // This is important because a container may never load during a node's Sweep Grace Period,
713
673
  // so that node would directly become sweep-ready skipping over tombstone-ready state,
714
674
  // but should be Tombstoned since Sweep is disabled.
715
- const { nodesToTombstone, nodesToDelete } = this.configs.shouldRunSweep
716
- ? {
717
- nodesToTombstone: [...tombstoneReadyNodes],
718
- nodesToDelete: [...sweepReadyNodes],
719
- }
720
- : {
721
- nodesToTombstone: [...tombstoneReadyNodes, ...sweepReadyNodes],
722
- nodesToDelete: [],
723
- };
675
+ const { nodesToTombstone, nodesToDelete } = {
676
+ nodesToTombstone: [...tombstoneReadyNodes],
677
+ nodesToDelete: [] as string[],
678
+ };
679
+ switch (this.configs.shouldRunSweep) {
680
+ case "YES":
681
+ nodesToDelete.push(...sweepReadyNodes);
682
+ break;
683
+ case "ONLY_BLOBS":
684
+ sweepReadyNodes.forEach((nodeId) => {
685
+ const nodeType = this.runtime.getNodeType(nodeId);
686
+ if (nodeType === GCNodeType.Blob) {
687
+ nodesToDelete.push(nodeId);
688
+ } else {
689
+ nodesToTombstone.push(nodeId);
690
+ }
691
+ });
692
+ break;
693
+ default: // case "NO":
694
+ nodesToTombstone.push(...sweepReadyNodes);
695
+ break;
696
+ }
724
697
 
725
698
  if (this.configs.tombstoneMode) {
726
699
  this.tombstones = nodesToTombstone;
@@ -728,7 +701,7 @@ export class GarbageCollector implements IGarbageCollector {
728
701
  this.runtime.updateTombstonedRoutes(this.tombstones);
729
702
  }
730
703
 
731
- if (this.configs.shouldRunSweep && nodesToDelete.length > 0) {
704
+ if (nodesToDelete.length > 0) {
732
705
  // Do not send DDS node ids in the GC op. This is an optimization to reduce its size. Since GC applies to
733
706
  // to data store only, all its DDSes are deleted along with it. The DDS ids will be retrieved from the
734
707
  // local state when processing the op.
@@ -924,6 +897,10 @@ export class GarbageCollector implements IGarbageCollector {
924
897
  /**
925
898
  * Delete nodes that are sweep-ready. Call the runtime to delete these nodes and clear the unreferenced state
926
899
  * tracking for nodes that are actually deleted by the runtime.
900
+ *
901
+ * Note that this doesn't check any configuration around whether Sweep is enabled.
902
+ * That happens before the op is submitted, and from that point, any client should execute the delete.
903
+ *
927
904
  * @param sweepReadyNodeIds - The ids of nodes that are ready to be deleted.
928
905
  */
929
906
  private deleteSweepReadyNodes(sweepReadyNodeIds: readonly string[]) {
@@ -1142,8 +1119,9 @@ export class GarbageCollector implements IGarbageCollector {
1142
1119
 
1143
1120
  /**
1144
1121
  * Generates the stats of a garbage collection sweep phase run.
1145
- * @param deletedNodes - The nodes that have been deleted until this run.
1146
- * @param sweepReadyNodes - The nodes that are sweep-ready in this GC run.
1122
+ * @param deletedNodes - The nodes that have already been deleted even before this run.
1123
+ * @param sweepReadyNodes - The nodes that are sweep-ready in this GC run. These will be deleted but are not deleted yet,
1124
+ * due to either sweep not being enabled or the Sweep Op needing to roundtrip before the delete is executed.
1147
1125
  * @param markPhaseStats - The stats of the mark phase run.
1148
1126
  * @returns the stats of the sweep phase run.
1149
1127
  */
@@ -1163,9 +1141,25 @@ export class GarbageCollector implements IGarbageCollector {
1163
1141
  deletedAttachmentBlobCount: 0,
1164
1142
  };
1165
1143
 
1144
+ // The runtime can't reliably identify the type of deleted nodes. So, get the type here. This should
1145
+ // be good enough because the only types that participate in GC today are data stores, DDSes and blobs.
1146
+ const getDeletedNodeType = (nodeId: string): GCNodeType => {
1147
+ const pathParts = nodeId.split("/");
1148
+ if (pathParts[1] === BlobManager.basePath) {
1149
+ return GCNodeType.Blob;
1150
+ }
1151
+ if (pathParts.length === 2) {
1152
+ return GCNodeType.DataStore;
1153
+ }
1154
+ if (pathParts.length === 3) {
1155
+ return GCNodeType.SubDataStore;
1156
+ }
1157
+ return GCNodeType.Other;
1158
+ };
1159
+
1166
1160
  for (const nodeId of deletedNodes) {
1167
1161
  sweepPhaseStats.deletedNodeCount++;
1168
- const nodeType = this.runtime.getNodeType(nodeId);
1162
+ const nodeType = getDeletedNodeType(nodeId);
1169
1163
  if (nodeType === GCNodeType.DataStore) {
1170
1164
  sweepPhaseStats.deletedDataStoreCount++;
1171
1165
  } else if (nodeType === GCNodeType.Blob) {
@@ -1173,19 +1167,18 @@ export class GarbageCollector implements IGarbageCollector {
1173
1167
  }
1174
1168
  }
1175
1169
 
1176
- // If sweep is enabled, the counts from the mark phase stats do not include nodes that have been
1170
+ // The counts from the mark phase stats do not include nodes that were
1177
1171
  // deleted in previous runs. So, add the deleted node counts to life time stats.
1178
1172
  sweepPhaseStats.lifetimeNodeCount += sweepPhaseStats.deletedNodeCount;
1179
1173
  sweepPhaseStats.lifetimeDataStoreCount += sweepPhaseStats.deletedDataStoreCount;
1180
1174
  sweepPhaseStats.lifetimeAttachmentBlobCount += sweepPhaseStats.deletedAttachmentBlobCount;
1181
1175
 
1182
- if (this.configs.shouldRunSweep) {
1183
- return sweepPhaseStats;
1184
- }
1185
-
1186
- // If sweep is not enabled, the current sweep-ready node stats should be added to deleted stats since this
1187
- // is the final state the node will be in.
1188
- // If sweep is enabled, this will happen in the run after the GC op round trips back.
1176
+ // These stats are used to estimate the impact of GC in terms of how much garbage is/will be cleaned up.
1177
+ // So we include the current sweep-ready node stats since these nodes will be deleted eventually.
1178
+ // - If sweep is enabled, this will happen in the run after the GC op round trips back
1179
+ // (they'll be in deletedNodes that time).
1180
+ // - If sweep is not enabled, we still want to include these nodes since they
1181
+ // _will be_ deleted once it is enabled.
1189
1182
  for (const nodeId of sweepReadyNodes) {
1190
1183
  sweepPhaseStats.deletedNodeCount++;
1191
1184
  const nodeType = this.runtime.getNodeType(nodeId);
@@ -1195,6 +1188,7 @@ export class GarbageCollector implements IGarbageCollector {
1195
1188
  sweepPhaseStats.deletedAttachmentBlobCount++;
1196
1189
  }
1197
1190
  }
1191
+
1198
1192
  return sweepPhaseStats;
1199
1193
  }
1200
1194
  }
@@ -31,6 +31,8 @@ import {
31
31
  gcDisableThrowOnTombstoneLoadOptionName,
32
32
  defaultSweepGracePeriodMs,
33
33
  gcGenerationOptionName,
34
+ disableDatastoreSweepKey,
35
+ gcDisableDataStoreSweepOptionName,
34
36
  } from "./gcDefinitions";
35
37
  import { getGCVersion, shouldAllowGcSweep } from "./gcHelpers";
36
38
 
@@ -133,16 +135,24 @@ export function generateGCConfigs(
133
135
  *
134
136
  * Assuming overall GC is enabled and sweepTimeout is provided, the following conditions have to be met to run sweep:
135
137
  *
136
- * 1. Sweep should be enabled for this container.
137
- * 2. Sweep should be enabled for this session.
138
+ * 1. Sweep should be allowed in this container.
139
+ * 2. Sweep should be enabled for this session, optionally restricted to attachment blobs only.
138
140
  *
139
141
  * These conditions can be overridden via the RunSweep feature flag.
140
142
  */
141
- const shouldRunSweep =
143
+ const sweepEnabled: boolean =
142
144
  !shouldRunGC || sweepTimeoutMs === undefined
143
145
  ? false
144
146
  : mc.config.getBoolean(runSweepKey) ??
145
147
  (sweepAllowed && createParams.gcOptions.enableGCSweep === true);
148
+ const disableDatastoreSweep =
149
+ mc.config.getBoolean(disableDatastoreSweepKey) === true ||
150
+ createParams.gcOptions[gcDisableDataStoreSweepOptionName] === true;
151
+ const shouldRunSweep: IGarbageCollectorConfigs["shouldRunSweep"] = sweepEnabled
152
+ ? disableDatastoreSweep
153
+ ? "ONLY_BLOBS"
154
+ : "YES"
155
+ : "NO";
146
156
 
147
157
  // Override inactive timeout if test config or gc options to override it is set.
148
158
  const inactiveTimeoutMs =
@@ -40,6 +40,12 @@ export const nextGCVersion: GCVersion = 4;
40
40
  */
41
41
  export const gcDisableThrowOnTombstoneLoadOptionName = "gcDisableThrowOnTombstoneLoad";
42
42
 
43
+ /**
44
+ * This undocumented GC Option (on ContainerRuntime Options) allows an app to enable Sweep for blobs only.
45
+ * Only applies if enableGCSweep option is set to true.
46
+ */
47
+ export const gcDisableDataStoreSweepOptionName = "disableDataStoreSweep";
48
+
43
49
  /**
44
50
  * This undocumented GC Option (on ContainerRuntime Options) allows configuring which documents can have Sweep enabled.
45
51
  * This provides a way to disable both Tombstone Enforcement and Sweep.
@@ -70,10 +76,8 @@ export const throwOnTombstoneLoadOverrideKey =
70
76
  export const throwOnTombstoneUsageKey = "Fluid.GarbageCollection.ThrowOnTombstoneUsage";
71
77
  /** Config key to enable GC version upgrade. */
72
78
  export const gcVersionUpgradeToV4Key = "Fluid.GarbageCollection.GCVersionUpgradeToV4";
73
- /** Config key to disable GC sweep for datastores. */
79
+ /** Config key to disable GC sweep for datastores. They'll merely be Tombstoned. */
74
80
  export const disableDatastoreSweepKey = "Fluid.GarbageCollection.DisableDataStoreSweep";
75
- /** Config key to disable GC sweep for attachment blobs. */
76
- export const disableAttachmentBlobSweepKey = "Fluid.GarbageCollection.DisableAttachmentBlobSweep";
77
81
 
78
82
  // One day in milliseconds.
79
83
  export const oneDayMs = 1 * 24 * 60 * 60 * 1000;
@@ -353,7 +357,6 @@ export interface IGarbageCollectorCreateParams {
353
357
  readonly getNodePackagePath: (nodePath: string) => Promise<readonly string[] | undefined>;
354
358
  readonly getLastSummaryTimestampMs: () => number | undefined;
355
359
  readonly readAndParseBlob: ReadAndParseBlob;
356
- readonly activeConnection: () => boolean;
357
360
  readonly submitMessage: (message: ContainerRuntimeGCMessage) => void;
358
361
  }
359
362
 
@@ -426,7 +429,7 @@ export interface IGarbageCollectorConfigs {
426
429
  */
427
430
  readonly gcEnabled: boolean;
428
431
  /**
429
- * Tracks if sweep phase is enabled for this document. This is specified during document creation and doesn't change
432
+ * Tracks if sweep phase is allowed for this document. This is specified during document creation and doesn't change
430
433
  * throughout its lifetime.
431
434
  */
432
435
  readonly sweepEnabled: boolean;
@@ -436,10 +439,11 @@ export interface IGarbageCollectorConfigs {
436
439
  */
437
440
  readonly shouldRunGC: boolean;
438
441
  /**
439
- * Tracks if sweep phase should run or not. Even if the sweep phase is enabled for a document (see sweepEnabled), it
440
- * can be explicitly disabled via feature flags. It also won't run if session expiry is not enabled.
442
+ * Tracks if sweep phase should run or not, or if it should run only for attachment blobs.
443
+ * Even if the sweep phase is allowed for a document (see sweepEnabled), it may be disabled or partially enabled
444
+ * for the session, depending on a variety of other configurations present.
441
445
  */
442
- readonly shouldRunSweep: boolean;
446
+ readonly shouldRunSweep: "YES" | "ONLY_BLOBS" | "NO";
443
447
  /**
444
448
  * If true, bypass optimizations and generate GC data for all nodes irrespective of whether a node changed or not.
445
449
  */
@@ -111,7 +111,11 @@ export class GCSummaryStateTracker {
111
111
  /**
112
112
  * Called during GC initialization. Initialize the latest summary data from the base snapshot data.
113
113
  */
114
- public initializeBaseState(baseSnapshotData: IGarbageCollectionSnapshotData) {
114
+ public initializeBaseState(baseSnapshotData: IGarbageCollectionSnapshotData | undefined) {
115
+ if (baseSnapshotData === undefined) {
116
+ return;
117
+ }
118
+
115
119
  // If tracking state across summaries, update latest summary data from the snapshot's GC data.
116
120
  this.latestSummaryData = {
117
121
  serializedGCState: baseSnapshotData.gcState
@@ -240,13 +240,9 @@ export class GCTelemetryTracker {
240
240
  gcConfigs,
241
241
  };
242
242
 
243
- // Do not log the inactive object x events as error events as they are not the best signal for
244
- // detecting something wrong with GC either from the partner or from the runtime itself.
245
- if (state === UnreferencedState.Inactive) {
246
- this.mc.logger.sendTelemetryEvent(event);
247
- } else {
248
- this.mc.logger.sendErrorEvent(event);
249
- }
243
+ // These are logged as generic events and not errors because there can be false positives. The Tombstone
244
+ // and Delete errors are separately logged and are reliable.
245
+ this.mc.logger.sendTelemetryEvent(event);
250
246
  }
251
247
  }
252
248
  }
@@ -393,12 +389,7 @@ export class GCTelemetryTracker {
393
389
  fromPkg: fromPkg?.join("/"),
394
390
  }),
395
391
  };
396
-
397
- if (state === UnreferencedState.Inactive) {
398
- logger.sendTelemetryEvent(event);
399
- } else {
400
- logger.sendErrorEvent(event);
401
- }
392
+ logger.sendTelemetryEvent(event);
402
393
  }
403
394
  }
404
395
  this.pendingEventsQueue = [];
package/src/gc/index.ts CHANGED
@@ -11,6 +11,7 @@ export {
11
11
  defaultSessionExpiryDurationMs,
12
12
  GCNodeType,
13
13
  gcTestModeKey,
14
+ gcDisableDataStoreSweepOptionName,
14
15
  gcDisableThrowOnTombstoneLoadOptionName,
15
16
  gcGenerationOptionName,
16
17
  GCFeatureMatrix,
@@ -31,7 +32,6 @@ export {
31
32
  runSessionExpiryKey,
32
33
  runSweepKey,
33
34
  stableGCVersion,
34
- disableAttachmentBlobSweepKey,
35
35
  disableDatastoreSweepKey,
36
36
  UnreferencedState,
37
37
  throwOnTombstoneLoadOverrideKey,
@@ -6,4 +6,4 @@
6
6
  */
7
7
 
8
8
  export const pkgName = "@fluidframework/container-runtime";
9
- export const pkgVersion = "2.0.0-internal.7.4.5";
9
+ export const pkgVersion = "2.0.0-internal.7.4.7";