@fluidframework/container-runtime 2.0.0-internal.4.2.1 → 2.0.0-internal.4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/CHANGELOG.md +4 -0
  2. package/dist/blobManager.d.ts.map +1 -1
  3. package/dist/blobManager.js +3 -2
  4. package/dist/blobManager.js.map +1 -1
  5. package/dist/connectionTelemetry.d.ts.map +1 -1
  6. package/dist/connectionTelemetry.js +1 -0
  7. package/dist/connectionTelemetry.js.map +1 -1
  8. package/dist/containerRuntime.d.ts.map +1 -1
  9. package/dist/containerRuntime.js +20 -11
  10. package/dist/containerRuntime.js.map +1 -1
  11. package/dist/dataStoreContext.d.ts.map +1 -1
  12. package/dist/dataStoreContext.js +1 -2
  13. package/dist/dataStoreContext.js.map +1 -1
  14. package/dist/dataStores.d.ts +5 -5
  15. package/dist/dataStores.d.ts.map +1 -1
  16. package/dist/dataStores.js +3 -6
  17. package/dist/dataStores.js.map +1 -1
  18. package/dist/gc/garbageCollection.d.ts +56 -70
  19. package/dist/gc/garbageCollection.d.ts.map +1 -1
  20. package/dist/gc/garbageCollection.js +227 -408
  21. package/dist/gc/garbageCollection.js.map +1 -1
  22. package/dist/gc/gcConfigs.d.ts.map +1 -1
  23. package/dist/gc/gcConfigs.js +8 -10
  24. package/dist/gc/gcConfigs.js.map +1 -1
  25. package/dist/gc/gcDefinitions.d.ts +2 -0
  26. package/dist/gc/gcDefinitions.d.ts.map +1 -1
  27. package/dist/gc/gcDefinitions.js.map +1 -1
  28. package/dist/gc/gcHelpers.d.ts +11 -11
  29. package/dist/gc/gcHelpers.d.ts.map +1 -1
  30. package/dist/gc/gcHelpers.js +18 -22
  31. package/dist/gc/gcHelpers.js.map +1 -1
  32. package/dist/gc/gcSummaryStateTracker.d.ts +6 -2
  33. package/dist/gc/gcSummaryStateTracker.d.ts.map +1 -1
  34. package/dist/gc/gcSummaryStateTracker.js +16 -6
  35. package/dist/gc/gcSummaryStateTracker.js.map +1 -1
  36. package/dist/gc/gcTelemetry.d.ts +91 -0
  37. package/dist/gc/gcTelemetry.d.ts.map +1 -0
  38. package/dist/gc/gcTelemetry.js +282 -0
  39. package/dist/gc/gcTelemetry.js.map +1 -0
  40. package/dist/gc/index.d.ts +2 -2
  41. package/dist/gc/index.d.ts.map +1 -1
  42. package/dist/gc/index.js +5 -6
  43. package/dist/gc/index.js.map +1 -1
  44. package/dist/opLifecycle/opGroupingManager.js +1 -1
  45. package/dist/opLifecycle/opGroupingManager.js.map +1 -1
  46. package/dist/opLifecycle/outbox.js +1 -1
  47. package/dist/opLifecycle/outbox.js.map +1 -1
  48. package/dist/opLifecycle/remoteMessageProcessor.d.ts.map +1 -1
  49. package/dist/opLifecycle/remoteMessageProcessor.js +25 -22
  50. package/dist/opLifecycle/remoteMessageProcessor.js.map +1 -1
  51. package/dist/packageVersion.d.ts +1 -1
  52. package/dist/packageVersion.js +1 -1
  53. package/dist/packageVersion.js.map +1 -1
  54. package/dist/pendingStateManager.d.ts +1 -1
  55. package/dist/pendingStateManager.d.ts.map +1 -1
  56. package/dist/pendingStateManager.js.map +1 -1
  57. package/dist/scheduleManager.js +15 -4
  58. package/dist/scheduleManager.js.map +1 -1
  59. package/dist/summary/orderedClientElection.d.ts.map +1 -1
  60. package/dist/summary/orderedClientElection.js +14 -17
  61. package/dist/summary/orderedClientElection.js.map +1 -1
  62. package/dist/summary/summarizer.d.ts +2 -0
  63. package/dist/summary/summarizer.d.ts.map +1 -1
  64. package/dist/summary/summarizer.js +9 -4
  65. package/dist/summary/summarizer.js.map +1 -1
  66. package/dist/summary/summarizerHeuristics.d.ts +8 -9
  67. package/dist/summary/summarizerHeuristics.d.ts.map +1 -1
  68. package/dist/summary/summarizerHeuristics.js +15 -16
  69. package/dist/summary/summarizerHeuristics.js.map +1 -1
  70. package/dist/summary/summarizerTypes.d.ts +2 -0
  71. package/dist/summary/summarizerTypes.d.ts.map +1 -1
  72. package/dist/summary/summarizerTypes.js.map +1 -1
  73. package/dist/summary/summaryGenerator.d.ts.map +1 -1
  74. package/dist/summary/summaryGenerator.js +4 -3
  75. package/dist/summary/summaryGenerator.js.map +1 -1
  76. package/lib/blobManager.d.ts.map +1 -1
  77. package/lib/blobManager.js +3 -2
  78. package/lib/blobManager.js.map +1 -1
  79. package/lib/connectionTelemetry.d.ts.map +1 -1
  80. package/lib/connectionTelemetry.js +1 -0
  81. package/lib/connectionTelemetry.js.map +1 -1
  82. package/lib/containerRuntime.d.ts.map +1 -1
  83. package/lib/containerRuntime.js +20 -11
  84. package/lib/containerRuntime.js.map +1 -1
  85. package/lib/dataStoreContext.d.ts.map +1 -1
  86. package/lib/dataStoreContext.js +1 -2
  87. package/lib/dataStoreContext.js.map +1 -1
  88. package/lib/dataStores.d.ts +5 -5
  89. package/lib/dataStores.d.ts.map +1 -1
  90. package/lib/dataStores.js +3 -6
  91. package/lib/dataStores.js.map +1 -1
  92. package/lib/gc/garbageCollection.d.ts +56 -70
  93. package/lib/gc/garbageCollection.d.ts.map +1 -1
  94. package/lib/gc/garbageCollection.js +231 -412
  95. package/lib/gc/garbageCollection.js.map +1 -1
  96. package/lib/gc/gcConfigs.d.ts.map +1 -1
  97. package/lib/gc/gcConfigs.js +8 -10
  98. package/lib/gc/gcConfigs.js.map +1 -1
  99. package/lib/gc/gcDefinitions.d.ts +2 -0
  100. package/lib/gc/gcDefinitions.d.ts.map +1 -1
  101. package/lib/gc/gcDefinitions.js.map +1 -1
  102. package/lib/gc/gcHelpers.d.ts +11 -11
  103. package/lib/gc/gcHelpers.d.ts.map +1 -1
  104. package/lib/gc/gcHelpers.js +16 -20
  105. package/lib/gc/gcHelpers.js.map +1 -1
  106. package/lib/gc/gcSummaryStateTracker.d.ts +6 -2
  107. package/lib/gc/gcSummaryStateTracker.d.ts.map +1 -1
  108. package/lib/gc/gcSummaryStateTracker.js +16 -6
  109. package/lib/gc/gcSummaryStateTracker.js.map +1 -1
  110. package/lib/gc/gcTelemetry.d.ts +91 -0
  111. package/lib/gc/gcTelemetry.d.ts.map +1 -0
  112. package/lib/gc/gcTelemetry.js +277 -0
  113. package/lib/gc/gcTelemetry.js.map +1 -0
  114. package/lib/gc/index.d.ts +2 -2
  115. package/lib/gc/index.d.ts.map +1 -1
  116. package/lib/gc/index.js +2 -2
  117. package/lib/gc/index.js.map +1 -1
  118. package/lib/opLifecycle/opGroupingManager.js +1 -1
  119. package/lib/opLifecycle/opGroupingManager.js.map +1 -1
  120. package/lib/opLifecycle/outbox.js +1 -1
  121. package/lib/opLifecycle/outbox.js.map +1 -1
  122. package/lib/opLifecycle/remoteMessageProcessor.d.ts.map +1 -1
  123. package/lib/opLifecycle/remoteMessageProcessor.js +25 -22
  124. package/lib/opLifecycle/remoteMessageProcessor.js.map +1 -1
  125. package/lib/packageVersion.d.ts +1 -1
  126. package/lib/packageVersion.js +1 -1
  127. package/lib/packageVersion.js.map +1 -1
  128. package/lib/pendingStateManager.d.ts +1 -1
  129. package/lib/pendingStateManager.d.ts.map +1 -1
  130. package/lib/pendingStateManager.js.map +1 -1
  131. package/lib/scheduleManager.js +15 -4
  132. package/lib/scheduleManager.js.map +1 -1
  133. package/lib/summary/orderedClientElection.d.ts.map +1 -1
  134. package/lib/summary/orderedClientElection.js +14 -17
  135. package/lib/summary/orderedClientElection.js.map +1 -1
  136. package/lib/summary/summarizer.d.ts +2 -0
  137. package/lib/summary/summarizer.d.ts.map +1 -1
  138. package/lib/summary/summarizer.js +9 -4
  139. package/lib/summary/summarizer.js.map +1 -1
  140. package/lib/summary/summarizerHeuristics.d.ts +8 -9
  141. package/lib/summary/summarizerHeuristics.d.ts.map +1 -1
  142. package/lib/summary/summarizerHeuristics.js +15 -16
  143. package/lib/summary/summarizerHeuristics.js.map +1 -1
  144. package/lib/summary/summarizerTypes.d.ts +2 -0
  145. package/lib/summary/summarizerTypes.d.ts.map +1 -1
  146. package/lib/summary/summarizerTypes.js.map +1 -1
  147. package/lib/summary/summaryGenerator.d.ts.map +1 -1
  148. package/lib/summary/summaryGenerator.js +4 -3
  149. package/lib/summary/summaryGenerator.js.map +1 -1
  150. package/package.json +15 -16
  151. package/src/blobManager.ts +3 -2
  152. package/src/connectionTelemetry.ts +1 -0
  153. package/src/containerRuntime.ts +22 -15
  154. package/src/dataStoreContext.ts +1 -2
  155. package/src/dataStores.ts +4 -7
  156. package/src/gc/garbageCollection.ts +316 -561
  157. package/src/gc/gcConfigs.ts +12 -11
  158. package/src/gc/gcDefinitions.ts +2 -0
  159. package/src/gc/gcHelpers.ts +21 -40
  160. package/src/gc/gcSummaryStateTracker.ts +19 -7
  161. package/src/gc/gcTelemetry.ts +408 -0
  162. package/src/gc/index.ts +2 -6
  163. package/src/opLifecycle/README.md +13 -0
  164. package/src/opLifecycle/opGroupingManager.ts +1 -1
  165. package/src/opLifecycle/outbox.ts +2 -2
  166. package/src/opLifecycle/remoteMessageProcessor.ts +37 -28
  167. package/src/packageVersion.ts +1 -1
  168. package/src/pendingStateManager.ts +1 -4
  169. package/src/scheduleManager.ts +19 -7
  170. package/src/summary/orderedClientElection.ts +14 -17
  171. package/src/summary/summarizer.ts +17 -5
  172. package/src/summary/summarizerHeuristics.ts +15 -16
  173. package/src/summary/summarizerTypes.ts +2 -0
  174. package/src/summary/summaryGenerator.ts +5 -4
  175. package/dist/gc/gcSweepReadyUsageDetection.d.ts +0 -53
  176. package/dist/gc/gcSweepReadyUsageDetection.d.ts.map +0 -1
  177. package/dist/gc/gcSweepReadyUsageDetection.js +0 -130
  178. package/dist/gc/gcSweepReadyUsageDetection.js.map +0 -1
  179. package/lib/gc/gcSweepReadyUsageDetection.d.ts +0 -53
  180. package/lib/gc/gcSweepReadyUsageDetection.d.ts.map +0 -1
  181. package/lib/gc/gcSweepReadyUsageDetection.js +0 -125
  182. package/lib/gc/gcSweepReadyUsageDetection.js.map +0 -1
  183. package/src/gc/gcSweepReadyUsageDetection.ts +0 -145
@@ -4,7 +4,7 @@
4
4
  */
5
5
 
6
6
  import { ITelemetryLogger } from "@fluidframework/common-definitions";
7
- import { assert, LazyPromise, Timer } from "@fluidframework/common-utils";
7
+ import { LazyPromise, Timer } from "@fluidframework/common-utils";
8
8
  import { ClientSessionExpiredError, DataProcessingError } from "@fluidframework/container-utils";
9
9
  import { IRequestHeader } from "@fluidframework/core-interfaces";
10
10
  import {
@@ -14,21 +14,18 @@ import {
14
14
  ISummarizeResult,
15
15
  ITelemetryContext,
16
16
  } from "@fluidframework/runtime-definitions";
17
- import { packagePathToTelemetryProperty, ReadAndParseBlob } from "@fluidframework/runtime-utils";
17
+ import { ReadAndParseBlob } from "@fluidframework/runtime-utils";
18
18
  import {
19
19
  ChildLogger,
20
- generateStack,
21
20
  loggerToMonitoringContext,
22
21
  MonitoringContext,
23
22
  PerformanceEvent,
24
- TelemetryDataTag,
25
23
  } from "@fluidframework/telemetry-utils";
26
24
 
27
25
  import { RuntimeHeaders } from "../containerRuntime";
28
- import { ICreateContainerMetadata, RefreshSummaryResult } from "../summary";
26
+ import { RefreshSummaryResult } from "../summary";
29
27
  import { generateGCConfigs } from "./gcConfigs";
30
28
  import {
31
- disableSweepLogKey,
32
29
  GCNodeType,
33
30
  IGarbageCollector,
34
31
  IGarbageCollectorCreateParams,
@@ -39,31 +36,12 @@ import {
39
36
  IGCMetadata,
40
37
  IGarbageCollectorConfigs,
41
38
  } from "./gcDefinitions";
42
- import {
43
- cloneGCData,
44
- concatGarbageCollectionData,
45
- getGCDataFromSnapshot,
46
- sendGCUnexpectedUsageEvent,
47
- } from "./gcHelpers";
39
+ import { cloneGCData, concatGarbageCollectionData, getGCDataFromSnapshot } from "./gcHelpers";
48
40
  import { runGarbageCollection } from "./gcReferenceGraphAlgorithm";
49
41
  import { IGarbageCollectionSnapshotData, IGarbageCollectionState } from "./gcSummaryDefinitions";
50
42
  import { GCSummaryStateTracker } from "./gcSummaryStateTracker";
51
43
  import { UnreferencedStateTracker } from "./gcUnreferencedStateTracker";
52
-
53
- /** The event that is logged when unreferenced node is used after a certain time. */
54
- interface IUnreferencedEventProps {
55
- usageType: "Changed" | "Loaded" | "Revived";
56
- state: UnreferencedState;
57
- id: string;
58
- type: GCNodeType;
59
- unrefTime: number;
60
- age: number;
61
- completedGCRuns: number;
62
- fromId?: string;
63
- timeout?: number;
64
- lastSummaryTime?: number;
65
- viaHandle?: boolean;
66
- }
44
+ import { GCTelemetryTracker } from "./gcTelemetry";
67
45
 
68
46
  /**
69
47
  * The garbage collector for the container runtime. It consolidates the garbage collection functionality and maintains
@@ -121,20 +99,14 @@ export class GarbageCollector implements IGarbageCollector {
121
99
  // The Timer responsible for closing the container when the session has expired
122
100
  private sessionExpiryTimer: Timer | undefined;
123
101
 
124
- // Keeps track of unreferenced events that are logged for a node. This is used to limit the log generation to one
125
- // per event per node.
126
- private readonly loggedUnreferencedEvents: Set<string> = new Set();
127
- // Queue for unreferenced events that should be logged the next time GC runs.
128
- private pendingEventsQueue: IUnreferencedEventProps[] = [];
129
-
130
102
  // The number of times GC has successfully completed on this instance of GarbageCollector.
131
103
  private completedRuns = 0;
132
104
 
133
105
  private readonly runtime: IGarbageCollectionRuntime;
134
- private readonly createContainerMetadata: ICreateContainerMetadata;
135
106
  private readonly isSummarizerClient: boolean;
136
107
 
137
108
  private readonly summaryStateTracker: GCSummaryStateTracker;
109
+ private readonly telemetryTracker: GCTelemetryTracker;
138
110
 
139
111
  /** For a given node path, returns the node's package path. */
140
112
  private readonly getNodePackagePath: (
@@ -149,10 +121,14 @@ export class GarbageCollector implements IGarbageCollector {
149
121
  return this.summaryStateTracker.doesSummaryStateNeedReset;
150
122
  }
151
123
 
124
+ /** Returns the count of data stores whose GC state updated since the last summary. */
125
+ public get updatedDSCountSinceLastSummary(): number {
126
+ return this.summaryStateTracker.updatedDSCountSinceLastSummary;
127
+ }
128
+
152
129
  protected constructor(createParams: IGarbageCollectorCreateParams) {
153
130
  this.runtime = createParams.runtime;
154
131
  this.isSummarizerClient = createParams.isSummarizerClient;
155
- this.createContainerMetadata = createParams.createContainerMetadata;
156
132
  this.getNodePackagePath = createParams.getNodePackagePath;
157
133
  this.getLastSummaryTimestampMs = createParams.getLastSummaryTimestampMs;
158
134
  this.activeConnection = createParams.activeConnection;
@@ -189,6 +165,17 @@ export class GarbageCollector implements IGarbageCollector {
189
165
  baseSnapshot?.trees[gcTreeKey] !== undefined /* wasGCRunInBaseSnapshot */,
190
166
  );
191
167
 
168
+ this.telemetryTracker = new GCTelemetryTracker(
169
+ this.mc,
170
+ this.configs,
171
+ this.isSummarizerClient,
172
+ this.runtime.gcTombstoneEnforcementAllowed,
173
+ createParams.createContainerMetadata,
174
+ (nodeId: string) => this.runtime.getNodeType(nodeId),
175
+ (nodeId: string) => this.unreferencedNodesState.get(nodeId),
176
+ this.getNodePackagePath,
177
+ );
178
+
192
179
  // Get the GC data from the base snapshot. Use LazyPromise because we only want to do this once since it
193
180
  // it involves fetching blobs from storage which is expensive.
194
181
  this.baseSnapshotDataP = new LazyPromise<IGarbageCollectionSnapshotData | undefined>(
@@ -215,10 +202,7 @@ export class GarbageCollector implements IGarbageCollector {
215
202
  // in the snapshot cannot be interpreted correctly. Set everything to undefined except for
216
203
  // deletedNodes because irrespective of GC versions, these nodes have been deleted and cannot be
217
204
  // brought back. The deletedNodes info is needed to identify when these nodes are used.
218
- if (
219
- this.configs.gcVersionInBaseSnapshot !==
220
- this.summaryStateTracker.currentGCVersion
221
- ) {
205
+ if (this.configs.gcVersionInEffect !== this.configs.gcVersionInBaseSnapshot) {
222
206
  return {
223
207
  gcState: undefined,
224
208
  tombstones: undefined,
@@ -447,6 +431,14 @@ export class GarbageCollector implements IGarbageCollector {
447
431
  }
448
432
  }
449
433
 
434
+ /**
435
+ * Returns a the GC details generated from the base summary. This is used to initialize the GC state of the nodes
436
+ * in the container.
437
+ */
438
+ public async getBaseGCDetails(): Promise<IGarbageCollectionDetailsBase> {
439
+ return this.baseGCDetailsP;
440
+ }
441
+
450
442
  /**
451
443
  * Runs garbage collection and updates the reference / used state of the nodes in the container.
452
444
  * @returns stats of the GC run or undefined if GC did not run.
@@ -500,80 +492,297 @@ export class GarbageCollector implements IGarbageCollector {
500
492
  logger,
501
493
  { eventName: "GarbageCollection" },
502
494
  async (event) => {
503
- await this.runPreGCSteps();
504
-
505
- // Get the runtime's GC data and run GC on the reference graph in it.
506
- const gcData = await this.runtime.getGCData(fullGC);
507
- const gcResult = runGarbageCollection(gcData.gcNodes, ["/"]);
508
-
509
- const gcStats = await this.runPostGCSteps(
510
- gcData,
511
- gcResult,
512
- logger,
513
- currentReferenceTimestampMs,
514
- );
495
+ /** Pre-GC steps */
496
+ // Ensure that state has been initialized from the base snapshot data.
497
+ await this.initializeGCStateFromBaseSnapshotP;
498
+ // Let the runtime update its pending state before GC runs.
499
+ await this.runtime.updateStateBeforeGC();
500
+
501
+ /** GC step */
502
+ const gcStats = await this.runGC(fullGC, currentReferenceTimestampMs, logger);
515
503
  event.end({ ...gcStats, timestamp: currentReferenceTimestampMs });
504
+
505
+ /** Post-GC steps */
506
+ // Log pending unreferenced events such as a node being used after inactive. This is done after GC runs and
507
+ // updates its state so that we don't send false positives based on intermediate state. For example, we may get
508
+ // reference to an unreferenced node from another unreferenced node which means the node wasn't revived.
509
+ await this.telemetryTracker.logPendingEvents(logger);
510
+ // Update the state of summary state tracker from this run's stats.
511
+ this.summaryStateTracker.updateStateFromGCRunStats(gcStats);
512
+ this.newReferencesSinceLastRun.clear();
516
513
  this.completedRuns++;
514
+
517
515
  return gcStats;
518
516
  },
519
517
  { end: true, cancel: "error" },
520
518
  );
521
519
  }
522
520
 
523
- private async runPreGCSteps() {
524
- // Ensure that state has been initialized from the base snapshot data.
525
- await this.initializeGCStateFromBaseSnapshotP;
526
- // Let the runtime update its pending state before GC runs.
527
- await this.runtime.updateStateBeforeGC();
528
- }
529
-
530
- private async runPostGCSteps(
531
- gcData: IGarbageCollectionData,
532
- gcResult: IGCResult,
533
- logger: ITelemetryLogger,
521
+ /**
522
+ * Runs garbage collection. It does the following:
523
+ * 1. It generates / analyzes the runtime's reference graph.
524
+ * 2. Generates stats for the GC run based on previous / current GC state.
525
+ * 3. Runs Mark phase.
526
+ * 4. Runs Sweep phase.
527
+ */
528
+ private async runGC(
529
+ fullGC: boolean,
534
530
  currentReferenceTimestampMs: number,
531
+ logger: ITelemetryLogger,
535
532
  ): Promise<IGCStats> {
536
- // Generate statistics from the current run. This is done before updating the current state because it
537
- // generates some of its data based on previous state of the system.
533
+ // 1. Generate / analyze the runtime's reference graph.
534
+ // Get the reference graph (gcData) and run GC algorithm to get referenced / unreferenced nodes.
535
+ const gcData = await this.runtime.getGCData(fullGC);
536
+ const gcResult = runGarbageCollection(gcData.gcNodes, ["/"]);
537
+ // Get all referenced nodes - References in this run + references between the previous and current runs.
538
+ const allReferencedNodeIds =
539
+ this.findAllNodesReferencedBetweenGCs(gcData, this.gcDataFromLastRun, logger) ??
540
+ gcResult.referencedNodeIds;
541
+
542
+ // 2. Generate stats based on the previous / current GC state.
543
+ // Must happen before running Mark / Sweep phase because previous GC state will be updated in these stages.
538
544
  const gcStats = this.generateStats(gcResult);
539
545
 
540
- // Update the current mark state and update the runtime of all used routes or ids that used as per the GC run.
541
- const sweepReadyNodes = this.updateMarkPhase(
542
- gcData,
546
+ // 3. Run the Mark phase.
547
+ // It will mark nodes as referenced / unreferenced and return a list of node ids that are ready to be swept.
548
+ const sweepReadyNodeIds = this.runMarkPhase(
549
+ gcResult,
550
+ allReferencedNodeIds,
551
+ currentReferenceTimestampMs,
552
+ );
553
+
554
+ // 4. Run the Sweep phase.
555
+ // It will delete sweep ready nodes and return a list of deleted node ids.
556
+ const deletedNodeIds = this.runSweepPhase(
543
557
  gcResult,
558
+ sweepReadyNodeIds,
544
559
  currentReferenceTimestampMs,
545
560
  logger,
546
561
  );
562
+
563
+ this.gcDataFromLastRun = cloneGCData(
564
+ gcData,
565
+ (id: string) => deletedNodeIds.includes(id) /* filter out deleted nodes */,
566
+ );
567
+ return gcStats;
568
+ }
569
+
570
+ /**
571
+ * Runs the GC Mark phase. It does the following:
572
+ * 1. Marks all referenced nodes in this run by clearing tracking for them.
573
+ * 2. Marks unreferenced nodes in this run by starting tracking for them.
574
+ * 3. Calls the runtime to update nodes that were marked referenced.
575
+ *
576
+ * @param gcResult - The result of the GC run on the gcData.
577
+ * @param allReferencedNodeIds - Nodes referenced in this GC run + referenced between previous and current GC run.
578
+ * @param currentReferenceTimestampMs - The timestamp to be used for unreferenced nodes' timestamp.
579
+ * @returns - A list of sweep ready nodes, i.e., nodes that ready to be deleted.
580
+ */
581
+ private runMarkPhase(
582
+ gcResult: IGCResult,
583
+ allReferencedNodeIds: string[],
584
+ currentReferenceTimestampMs: number,
585
+ ): string[] {
586
+ // 1. Marks all referenced nodes by clearing their unreferenced tracker, if any.
587
+ for (const nodeId of allReferencedNodeIds) {
588
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
589
+ if (nodeStateTracker !== undefined) {
590
+ // Stop tracking so as to clear out any running timers.
591
+ nodeStateTracker.stopTracking();
592
+ // Delete the node as we don't need to track it any more.
593
+ this.unreferencedNodesState.delete(nodeId);
594
+ }
595
+ }
596
+
597
+ // 2. Mark unreferenced nodes in this run by starting unreferenced tracking for them.
598
+ const sweepReadyNodeIds: string[] = [];
599
+ for (const nodeId of gcResult.deletedNodeIds) {
600
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
601
+ if (nodeStateTracker === undefined) {
602
+ this.unreferencedNodesState.set(
603
+ nodeId,
604
+ new UnreferencedStateTracker(
605
+ currentReferenceTimestampMs,
606
+ this.configs.inactiveTimeoutMs,
607
+ currentReferenceTimestampMs,
608
+ this.configs.sweepTimeoutMs,
609
+ ),
610
+ );
611
+ } else {
612
+ // If a node was already unreferenced, update its tracking information. Since the current reference time
613
+ // is from the ops seen, this will ensure that we keep updating unreferenced state as time moves forward.
614
+ nodeStateTracker.updateTracking(currentReferenceTimestampMs);
615
+
616
+ // If a node is sweep ready, store it so it can be returned.
617
+ if (nodeStateTracker.state === UnreferencedState.SweepReady) {
618
+ sweepReadyNodeIds.push(nodeId);
619
+ }
620
+ }
621
+ }
622
+
623
+ // 3. Call the runtime to update referenced nodes in this run.
547
624
  this.runtime.updateUsedRoutes(gcResult.referencedNodeIds);
548
625
 
549
- // Log events for objects that are ready to be deleted by sweep. When we have sweep enabled, we will
550
- // delete these objects here instead.
551
- this.logSweepEvents(logger, currentReferenceTimestampMs);
626
+ return sweepReadyNodeIds;
627
+ }
552
628
 
553
- let updatedGCData: IGarbageCollectionData = gcData;
629
+ /**
630
+ * Runs the GC Sweep phase. It does the following:
631
+ * 1. Calls the runtime to delete nodes that are sweep ready.
632
+ * 2. Clears tracking for deleted nodes.
633
+ *
634
+ * @param gcResult - The result of the GC run on the gcData.
635
+ * @param sweepReadyNodes - List of nodes that are sweep ready.
636
+ * @param currentReferenceTimestampMs - The timestamp to be used for unreferenced nodes' timestamp.
637
+ * @param logger - The logger to be used to log any telemetry.
638
+ * @returns - A list of nodes that have been deleted.
639
+ */
640
+ private runSweepPhase(
641
+ gcResult: IGCResult,
642
+ sweepReadyNodes: string[],
643
+ currentReferenceTimestampMs: number,
644
+ logger: ITelemetryLogger,
645
+ ): string[] {
646
+ // Log events for objects that are ready to be deleted by sweep. This will give us data on sweep when
647
+ // its not enabled.
648
+ this.telemetryTracker.logSweepEvents(
649
+ logger,
650
+ currentReferenceTimestampMs,
651
+ this.unreferencedNodesState,
652
+ this.completedRuns,
653
+ this.getLastSummaryTimestampMs(),
654
+ );
554
655
 
555
- if (this.configs.shouldRunSweep) {
556
- updatedGCData = this.runSweepPhase(sweepReadyNodes, gcData);
557
- } else if (this.configs.testMode) {
558
- // If we are running in GC test mode, delete objects for unused routes. This enables testing scenarios
559
- // involving access to deleted data.
656
+ /**
657
+ * Currently, there are 3 modes for sweep:
658
+ * Test mode - Unreferenced nodes are immediately deleted without waiting for them to be sweep ready.
659
+ * Tombstone mode - Sweep ready modes are marked as tombstones instead of being deleted.
660
+ * Sweep mode - Sweep ready modes are deleted.
661
+ *
662
+ * These modes serve as staging for applications that want to enable sweep by providing an incremental
663
+ * way to test and validate sweep works as expected.
664
+ */
665
+ if (this.configs.testMode) {
666
+ // If we are running in GC test mode, unreferenced nodes (gcResult.deletedNodeIds) are deleted.
560
667
  this.runtime.updateUnusedRoutes(gcResult.deletedNodeIds);
561
- } else if (this.configs.tombstoneMode) {
668
+ return [];
669
+ }
670
+
671
+ if (this.configs.tombstoneMode) {
562
672
  this.tombstones = sweepReadyNodes;
563
673
  // If we are running in GC tombstone mode, update tombstoned routes. This enables testing scenarios
564
674
  // involving access to "deleted" data without actually deleting the data from summaries.
565
- // Note: we will not tombstone in test mode.
566
675
  this.runtime.updateTombstonedRoutes(this.tombstones);
676
+ return [];
567
677
  }
568
678
 
569
- this.gcDataFromLastRun = cloneGCData(updatedGCData);
679
+ if (!this.configs.shouldRunSweep) {
680
+ return [];
681
+ }
570
682
 
571
- // Log pending unreferenced events such as a node being used after inactive. This is done after GC runs and
572
- // updates its state so that we don't send false positives based on intermediate state. For example, we may get
573
- // reference to an unreferenced node from another unreferenced node which means the node wasn't revived.
574
- await this.logUnreferencedEvents(logger);
683
+ // 1. Call the runtime to delete sweep ready nodes. The runtime returns a list of nodes it deleted.
684
+ // TODO: GC:Validation - validate that removed routes are not double delete and that the child routes of
685
+ // removed routes are deleted as well.
686
+ const deletedNodeIds = this.runtime.deleteSweepReadyNodes(sweepReadyNodes);
575
687
 
576
- return gcStats;
688
+ // 2. Clear unreferenced state tracking for deleted nodes.
689
+ for (const nodeId of deletedNodeIds) {
690
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
691
+ // TODO: GC:Validation - assert that the nodeStateTracker is defined
692
+ if (nodeStateTracker !== undefined) {
693
+ // Stop tracking so as to clear out any running timers.
694
+ nodeStateTracker.stopTracking();
695
+ // Delete the node as we don't need to track it any more.
696
+ this.unreferencedNodesState.delete(nodeId);
697
+ }
698
+ // TODO: GC:Validation - assert that the deleted node is not a duplicate
699
+ this.deletedNodes.add(nodeId);
700
+ }
701
+ return deletedNodeIds;
702
+ }
703
+
704
+ /**
705
+ * Since GC runs periodically, the GC data that is generated only tells us the state of the world at that point in
706
+ * time. There can be nodes that were referenced in between two runs and their unreferenced state needs to be
707
+ * updated. For example, in the following scenarios not updating the unreferenced timestamp can lead to deletion of
708
+ * these objects while there can be in-memory referenced to it:
709
+ * 1. A node transitions from `unreferenced -> referenced -> unreferenced` between two runs. When the reference is
710
+ * added, the object may have been accessed and in-memory reference to it added.
711
+ * 2. A reference is added from one unreferenced node to one or more unreferenced nodes. Even though the node[s] were
712
+ * unreferenced, they could have been accessed and in-memory reference to them added.
713
+ *
714
+ * This function identifies nodes that were referenced since the last run.
715
+ * If these nodes are currently unreferenced, they will be assigned new unreferenced state by the current run.
716
+ *
717
+ * @returns - a list of all nodes referenced from the last local summary until now.
718
+ */
719
+ private findAllNodesReferencedBetweenGCs(
720
+ currentGCData: IGarbageCollectionData,
721
+ previousGCData: IGarbageCollectionData | undefined,
722
+ logger: ITelemetryLogger,
723
+ ): string[] | undefined {
724
+ // If we haven't run GC before there is nothing to do.
725
+ // No previousGCData, means nothing is unreferenced, and there are no reference state trackers to clear
726
+ if (previousGCData === undefined) {
727
+ return undefined;
728
+ }
729
+
730
+ /**
731
+ * If there are references that were not explicitly notified to GC, log an error because this should never happen.
732
+ * If it does, this may result in the unreferenced timestamps of these nodes not updated when they were referenced.
733
+ */
734
+ this.telemetryTracker.logIfMissingExplicitReferences(
735
+ currentGCData,
736
+ previousGCData,
737
+ this.newReferencesSinceLastRun,
738
+ logger,
739
+ );
740
+
741
+ // No references were added since the last run so we don't have to update reference states of any unreferenced
742
+ // nodes. There is no in between state at this point.
743
+ if (this.newReferencesSinceLastRun.size === 0) {
744
+ return undefined;
745
+ }
746
+
747
+ /**
748
+ * Generate a super set of the GC data that contains the nodes and edges from last run, plus any new node and
749
+ * edges that have been added since then. To do this, combine the GC data from the last run and the current
750
+ * run, and then add the references since last run.
751
+ *
752
+ * Note on why we need to combine the data from previous run, current run and all references in between -
753
+ * 1. We need data from last run because some of its references may have been deleted since then. If those
754
+ * references added new outbound references before they were deleted, we need to detect them.
755
+ *
756
+ * 2. We need new outbound references since last run because some of them may have been deleted later. If those
757
+ * references added new outbound references before they were deleted, we need to detect them.
758
+ *
759
+ * 3. We need data from the current run because currently we may not detect when DDSes are referenced:
760
+ * - We don't require DDSes handles to be stored in a referenced DDS.
761
+ * - A new data store may have "root" DDSes already created and we don't detect them today.
762
+ */
763
+ const gcDataSuperSet = concatGarbageCollectionData(previousGCData, currentGCData);
764
+ const newOutboundRoutesSinceLastRun: string[] = [];
765
+ this.newReferencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
766
+ if (gcDataSuperSet.gcNodes[sourceNodeId] === undefined) {
767
+ gcDataSuperSet.gcNodes[sourceNodeId] = outboundRoutes;
768
+ } else {
769
+ gcDataSuperSet.gcNodes[sourceNodeId].push(...outboundRoutes);
770
+ }
771
+ newOutboundRoutesSinceLastRun.push(...outboundRoutes);
772
+ });
773
+
774
+ /**
775
+ * Run GC on the above reference graph starting with root and all new outbound routes. This will generate a
776
+ * list of all nodes that could have been referenced since the last run. If any of these nodes are unreferenced,
777
+ * unreferenced, stop tracking them and remove from unreferenced list.
778
+ * Note that some of these nodes may be unreferenced now and if so, the current run will mark them as
779
+ * unreferenced and add unreferenced state.
780
+ */
781
+ const gcResult = runGarbageCollection(gcDataSuperSet.gcNodes, [
782
+ "/",
783
+ ...newOutboundRoutesSinceLastRun,
784
+ ]);
785
+ return gcResult.referencedNodeIds;
577
786
  }
578
787
 
579
788
  /**
@@ -611,10 +820,10 @@ export class GarbageCollector implements IGarbageCollector {
611
820
  public getMetadata(): IGCMetadata {
612
821
  return {
613
822
  /**
614
- * If GC is enabled, the GC data is written using the current GC version and that is the gcFeature that goes
823
+ * If GC is enabled, the GC data is written using the GC version in effect and that is the gcFeature that goes
615
824
  * into the metadata blob. If GC is disabled, the gcFeature is 0.
616
825
  */
617
- gcFeature: this.configs.gcEnabled ? this.summaryStateTracker.currentGCVersion : 0,
826
+ gcFeature: this.configs.gcEnabled ? this.configs.gcVersionInEffect : 0,
618
827
  gcFeatureMatrix: this.configs.persistedGcFeatureMatrix,
619
828
  sessionExpiryTimeoutMs: this.configs.sessionExpiryTimeoutMs,
620
829
  sweepEnabled: false, // DEPRECATED - to be removed
@@ -622,14 +831,6 @@ export class GarbageCollector implements IGarbageCollector {
622
831
  };
623
832
  }
624
833
 
625
- /**
626
- * Returns a the GC details generated from the base summary. This is used to initialize the GC state of the nodes
627
- * in the container.
628
- */
629
- public async getBaseGCDetails(): Promise<IGarbageCollectionDetailsBase> {
630
- return this.baseGCDetailsP;
631
- }
632
-
633
834
  /**
634
835
  * Called to refresh the latest summary state. This happens when either a pending summary is acked or a snapshot
635
836
  * is downloaded and should be used to update the state.
@@ -686,18 +887,17 @@ export class GarbageCollector implements IGarbageCollector {
686
887
  return;
687
888
  }
688
889
 
689
- const nodeStateTracker = this.unreferencedNodesState.get(nodePath);
690
- if (nodeStateTracker && nodeStateTracker.state !== UnreferencedState.Active) {
691
- this.inactiveNodeUsed(
692
- reason,
693
- nodePath,
694
- nodeStateTracker,
695
- undefined /* fromNodeId */,
696
- packagePath,
697
- timestampMs,
698
- requestHeaders,
699
- );
700
- }
890
+ this.telemetryTracker.nodeUsed({
891
+ id: nodePath,
892
+ usageType: reason,
893
+ currentReferenceTimestampMs:
894
+ timestampMs ?? this.runtime.getCurrentReferenceTimestampMs(),
895
+ packagePath,
896
+ completedGCRuns: this.completedRuns,
897
+ isTombstoned: this.tombstones.includes(nodePath),
898
+ lastSummaryTime: this.getLastSummaryTimestampMs(),
899
+ viaHandle: requestHeaders?.[RuntimeHeaders.viaHandle],
900
+ });
701
901
  }
702
902
 
703
903
  /**
@@ -716,33 +916,16 @@ export class GarbageCollector implements IGarbageCollector {
716
916
  outboundRoutes.push(toNodePath);
717
917
  this.newReferencesSinceLastRun.set(fromNodePath, outboundRoutes);
718
918
 
719
- const nodeStateTracker = this.unreferencedNodesState.get(toNodePath);
720
- if (nodeStateTracker && nodeStateTracker.state !== UnreferencedState.Active) {
721
- this.inactiveNodeUsed("Revived", toNodePath, nodeStateTracker, fromNodePath);
722
- }
723
-
724
- if (this.tombstones.includes(toNodePath)) {
725
- const nodeType = this.runtime.getNodeType(toNodePath);
726
-
727
- let eventName = "GC_Tombstone_SubDatastore_Revived";
728
- if (nodeType === GCNodeType.DataStore) {
729
- eventName = "GC_Tombstone_Datastore_Revived";
730
- } else if (nodeType === GCNodeType.Blob) {
731
- eventName = "GC_Tombstone_Blob_Revived";
732
- }
733
-
734
- sendGCUnexpectedUsageEvent(
735
- this.mc,
736
- {
737
- eventName,
738
- category: "generic",
739
- url: toNodePath,
740
- nodeType,
741
- gcTombstoneEnforcementAllowed: this.runtime.gcTombstoneEnforcementAllowed,
742
- },
743
- undefined /* packagePath */,
744
- );
745
- }
919
+ this.telemetryTracker.nodeUsed({
920
+ id: toNodePath,
921
+ usageType: "Revived",
922
+ currentReferenceTimestampMs: this.runtime.getCurrentReferenceTimestampMs(),
923
+ packagePath: undefined,
924
+ completedGCRuns: this.completedRuns,
925
+ isTombstoned: this.tombstones.includes(toNodePath),
926
+ lastSummaryTime: this.getLastSummaryTimestampMs(),
927
+ fromId: fromNodePath,
928
+ });
746
929
  }
747
930
 
748
931
  /**
@@ -758,267 +941,6 @@ export class GarbageCollector implements IGarbageCollector {
758
941
  this.sessionExpiryTimer = undefined;
759
942
  }
760
943
 
761
- /**
762
- * Updates the state of the system as per the current GC run. It does the following:
763
- * 1. Sets up the current GC state as per the gcData.
764
- * 2. Starts tracking for nodes that have become unreferenced in this run.
765
- * 3. Clears tracking for nodes that were unreferenced but became referenced in this run.
766
- * @param gcData - The data representing the reference graph on which GC is run.
767
- * @param gcResult - The result of the GC run on the gcData.
768
- * @param currentReferenceTimestampMs - The timestamp to be used for unreferenced nodes' timestamp.
769
- * @returns - A list of sweep ready nodes. (Nodes ready to be deleted)
770
- */
771
- private updateMarkPhase(
772
- gcData: IGarbageCollectionData,
773
- gcResult: IGCResult,
774
- currentReferenceTimestampMs: number,
775
- logger: ITelemetryLogger,
776
- ) {
777
- // Get references from the current GC run + references between previous and current run and then update each
778
- // node's state
779
- const allNodesReferencedBetweenGCs =
780
- this.findAllNodesReferencedBetweenGCs(gcData, this.gcDataFromLastRun, logger) ??
781
- gcResult.referencedNodeIds;
782
- this.newReferencesSinceLastRun.clear();
783
-
784
- // Iterate through the referenced nodes and stop tracking if they were unreferenced before.
785
- for (const nodeId of allNodesReferencedBetweenGCs) {
786
- const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
787
- if (nodeStateTracker !== undefined) {
788
- // Stop tracking so as to clear out any running timers.
789
- nodeStateTracker.stopTracking();
790
- // Delete the node as we don't need to track it any more.
791
- this.unreferencedNodesState.delete(nodeId);
792
- }
793
- }
794
-
795
- /**
796
- * If a node became unreferenced in this run, start tracking it.
797
- * If a node was already unreferenced, update its tracking information. Since the current reference time is
798
- * from the ops seen, this will ensure that we keep updating the unreferenced state as time moves forward.
799
- *
800
- * If a node is sweep ready, store and then return it.
801
- */
802
- const sweepReadyNodes: string[] = [];
803
- for (const nodeId of gcResult.deletedNodeIds) {
804
- const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
805
- if (nodeStateTracker === undefined) {
806
- this.unreferencedNodesState.set(
807
- nodeId,
808
- new UnreferencedStateTracker(
809
- currentReferenceTimestampMs,
810
- this.configs.inactiveTimeoutMs,
811
- currentReferenceTimestampMs,
812
- this.configs.sweepTimeoutMs,
813
- ),
814
- );
815
- } else {
816
- nodeStateTracker.updateTracking(currentReferenceTimestampMs);
817
- if (nodeStateTracker.state === UnreferencedState.SweepReady) {
818
- sweepReadyNodes.push(nodeId);
819
- }
820
- }
821
- }
822
-
823
- return sweepReadyNodes;
824
- }
825
-
826
- /**
827
- * Deletes nodes from both the runtime and garbage collection
828
- * @param sweepReadyNodes - nodes that are ready to be deleted
829
- */
830
- private runSweepPhase(sweepReadyNodes: string[], gcData: IGarbageCollectionData) {
831
- // TODO: GC:Validation - validate that removed routes are not double deleted
832
- // TODO: GC:Validation - validate that the child routes of removed routes are deleted as well
833
- const sweptRoutes = this.runtime.deleteSweepReadyNodes(sweepReadyNodes);
834
- const updatedGCData = this.deleteSweptRoutes(sweptRoutes, gcData);
835
-
836
- for (const nodeId of sweptRoutes) {
837
- const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
838
- // TODO: GC:Validation - assert that the nodeStateTracker is defined
839
- if (nodeStateTracker !== undefined) {
840
- // Stop tracking so as to clear out any running timers.
841
- nodeStateTracker.stopTracking();
842
- // Delete the node as we don't need to track it any more.
843
- this.unreferencedNodesState.delete(nodeId);
844
- }
845
- // TODO: GC:Validation - assert that the deleted node is not a duplicate
846
- this.deletedNodes.add(nodeId);
847
- }
848
-
849
- return updatedGCData;
850
- }
851
-
852
- /**
853
- * @returns IGarbageCollectionData after deleting the sweptRoutes from the gcData
854
- */
855
- private deleteSweptRoutes(
856
- sweptRoutes: string[],
857
- gcData: IGarbageCollectionData,
858
- ): IGarbageCollectionData {
859
- const sweptRoutesSet = new Set<string>(sweptRoutes);
860
- const gcNodes: { [id: string]: string[] } = {};
861
- for (const [id, outboundRoutes] of Object.entries(gcData.gcNodes)) {
862
- if (!sweptRoutesSet.has(id)) {
863
- gcNodes[id] = Array.from(outboundRoutes);
864
- }
865
- }
866
-
867
- // TODO: GC:Validation - assert that the nodeId is in gcData
868
-
869
- return {
870
- gcNodes,
871
- };
872
- }
873
-
874
- /**
875
- * Since GC runs periodically, the GC data that is generated only tells us the state of the world at that point in
876
- * time. There can be nodes that were referenced in between two runs and their unreferenced state needs to be
877
- * updated. For example, in the following scenarios not updating the unreferenced timestamp can lead to deletion of
878
- * these objects while there can be in-memory referenced to it:
879
- * 1. A node transitions from `unreferenced -> referenced -> unreferenced` between two runs. When the reference is
880
- * added, the object may have been accessed and in-memory reference to it added.
881
- * 2. A reference is added from one unreferenced node to one or more unreferenced nodes. Even though the node[s] were
882
- * unreferenced, they could have been accessed and in-memory reference to them added.
883
- *
884
- * This function identifies nodes that were referenced since the last run.
885
- * If these nodes are currently unreferenced, they will be assigned new unreferenced state by the current run.
886
- *
887
- * @returns - a list of all nodes referenced from the last local summary until now.
888
- */
889
- private findAllNodesReferencedBetweenGCs(
890
- currentGCData: IGarbageCollectionData,
891
- previousGCData: IGarbageCollectionData | undefined,
892
- logger: ITelemetryLogger,
893
- ): string[] | undefined {
894
- // If we haven't run GC before there is nothing to do.
895
- // No previousGCData, means nothing is unreferenced, and there are no reference state trackers to clear
896
- if (previousGCData === undefined) {
897
- return undefined;
898
- }
899
-
900
- // Find any references that haven't been identified correctly.
901
- const missingExplicitReferences = this.findMissingExplicitReferences(
902
- currentGCData,
903
- previousGCData,
904
- this.newReferencesSinceLastRun,
905
- );
906
-
907
- if (missingExplicitReferences.length > 0) {
908
- missingExplicitReferences.forEach((missingExplicitReference) => {
909
- logger.sendErrorEvent({
910
- eventName: "gcUnknownOutboundReferences",
911
- gcNodeId: missingExplicitReference[0],
912
- gcRoutes: JSON.stringify(missingExplicitReference[1]),
913
- });
914
- });
915
- }
916
-
917
- // No references were added since the last run so we don't have to update reference states of any unreferenced
918
- // nodes. There is no in between state at this point.
919
- if (this.newReferencesSinceLastRun.size === 0) {
920
- return undefined;
921
- }
922
-
923
- /**
924
- * Generate a super set of the GC data that contains the nodes and edges from last run, plus any new node and
925
- * edges that have been added since then. To do this, combine the GC data from the last run and the current
926
- * run, and then add the references since last run.
927
- *
928
- * Note on why we need to combine the data from previous run, current run and all references in between -
929
- * 1. We need data from last run because some of its references may have been deleted since then. If those
930
- * references added new outbound references before they were deleted, we need to detect them.
931
- *
932
- * 2. We need new outbound references since last run because some of them may have been deleted later. If those
933
- * references added new outbound references before they were deleted, we need to detect them.
934
- *
935
- * 3. We need data from the current run because currently we may not detect when DDSes are referenced:
936
- * - We don't require DDSes handles to be stored in a referenced DDS.
937
- * - A new data store may have "root" DDSes already created and we don't detect them today.
938
- */
939
- const gcDataSuperSet = concatGarbageCollectionData(previousGCData, currentGCData);
940
- const newOutboundRoutesSinceLastRun: string[] = [];
941
- this.newReferencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
942
- if (gcDataSuperSet.gcNodes[sourceNodeId] === undefined) {
943
- gcDataSuperSet.gcNodes[sourceNodeId] = outboundRoutes;
944
- } else {
945
- gcDataSuperSet.gcNodes[sourceNodeId].push(...outboundRoutes);
946
- }
947
- newOutboundRoutesSinceLastRun.push(...outboundRoutes);
948
- });
949
-
950
- /**
951
- * Run GC on the above reference graph starting with root and all new outbound routes. This will generate a
952
- * list of all nodes that could have been referenced since the last run. If any of these nodes are unreferenced,
953
- * unreferenced, stop tracking them and remove from unreferenced list.
954
- * Note that some of these nodes may be unreferenced now and if so, the current run will mark them as
955
- * unreferenced and add unreferenced state.
956
- */
957
- const gcResult = runGarbageCollection(gcDataSuperSet.gcNodes, [
958
- "/",
959
- ...newOutboundRoutesSinceLastRun,
960
- ]);
961
- return gcResult.referencedNodeIds;
962
- }
963
-
964
- /**
965
- * Finds all new references or outbound routes in the current graph that haven't been explicitly notified to GC.
966
- * The principle is that every new reference or outbound route must be notified to GC via the
967
- * addedOutboundReference method. It it hasn't, its a bug and we want to identify these scenarios.
968
- *
969
- * In more simple terms:
970
- * Missing Explicit References = Current References - Previous References - Explicitly Added References;
971
- *
972
- * @param currentGCData - The GC data (reference graph) from the current GC run.
973
- * @param previousGCData - The GC data (reference graph) from the previous GC run.
974
- * @param explicitReferences - New references added explicity between the previous and the current run.
975
- * @returns - a list of missing explicit references
976
- */
977
- private findMissingExplicitReferences(
978
- currentGCData: IGarbageCollectionData,
979
- previousGCData: IGarbageCollectionData,
980
- explicitReferences: Map<string, string[]>,
981
- ): [string, string[]][] {
982
- assert(
983
- previousGCData !== undefined,
984
- 0x2b7 /* "Can't validate correctness without GC data from last run" */,
985
- );
986
-
987
- const currentGraph = Object.entries(currentGCData.gcNodes);
988
- const missingExplicitReferences: [string, string[]][] = [];
989
- currentGraph.forEach(([nodeId, currentOutboundRoutes]) => {
990
- const previousRoutes = previousGCData.gcNodes[nodeId] ?? [];
991
- const explicitRoutes = explicitReferences.get(nodeId) ?? [];
992
- const missingExplicitRoutes: string[] = [];
993
-
994
- /**
995
- * 1. For routes in the current GC data, routes that were not present in previous GC data and did not have
996
- * explicit references should be added to missing explicit routes list.
997
- * 2. Only include data store and blob routes since GC only works for these two.
998
- * Note: Due to a bug with de-duped blobs, only adding data store routes for now.
999
- * 3. Ignore DDS routes to their parent datastores since those were added implicitly. So, there won't be
1000
- * explicit routes to them.
1001
- */
1002
- currentOutboundRoutes.forEach((route) => {
1003
- const nodeType = this.runtime.getNodeType(route);
1004
- if (
1005
- (nodeType === GCNodeType.DataStore || nodeType === GCNodeType.Blob) &&
1006
- !nodeId.startsWith(route) &&
1007
- !previousRoutes.includes(route) &&
1008
- !explicitRoutes.includes(route)
1009
- ) {
1010
- missingExplicitRoutes.push(route);
1011
- }
1012
- });
1013
- if (missingExplicitRoutes.length > 0) {
1014
- missingExplicitReferences.push([nodeId, missingExplicitRoutes]);
1015
- }
1016
- });
1017
-
1018
- // Ideally missingExplicitReferences should always have a size 0
1019
- return missingExplicitReferences;
1020
- }
1021
-
1022
944
  /**
1023
945
  * Generates the stats of a garbage collection run from the given results of the run.
1024
946
  * @param gcResult - The result of a GC run.
@@ -1081,171 +1003,4 @@ export class GarbageCollector implements IGarbageCollector {
1081
1003
 
1082
1004
  return gcStats;
1083
1005
  }
1084
-
1085
- /**
1086
- * For nodes that are ready to sweep, log an event for now. Until we start running sweep which deletes objects,
1087
- * this will give us a view into how much deleted content a container has.
1088
- */
1089
- private logSweepEvents(logger: ITelemetryLogger, currentReferenceTimestampMs: number) {
1090
- if (
1091
- this.mc.config.getBoolean(disableSweepLogKey) === true ||
1092
- this.configs.sweepTimeoutMs === undefined
1093
- ) {
1094
- return;
1095
- }
1096
-
1097
- this.unreferencedNodesState.forEach((nodeStateTracker, nodeId) => {
1098
- if (nodeStateTracker.state !== UnreferencedState.SweepReady) {
1099
- return;
1100
- }
1101
-
1102
- const nodeType = this.runtime.getNodeType(nodeId);
1103
- if (nodeType !== GCNodeType.DataStore && nodeType !== GCNodeType.Blob) {
1104
- return;
1105
- }
1106
-
1107
- // Log deleted event for each node only once to reduce noise in telemetry.
1108
- const uniqueEventId = `Deleted-${nodeId}`;
1109
- if (this.loggedUnreferencedEvents.has(uniqueEventId)) {
1110
- return;
1111
- }
1112
- this.loggedUnreferencedEvents.add(uniqueEventId);
1113
- logger.sendTelemetryEvent({
1114
- eventName: "GCObjectDeleted",
1115
- id: nodeId,
1116
- type: nodeType,
1117
- age: currentReferenceTimestampMs - nodeStateTracker.unreferencedTimestampMs,
1118
- timeout: this.configs.sweepTimeoutMs,
1119
- completedGCRuns: this.completedRuns,
1120
- lastSummaryTime: this.getLastSummaryTimestampMs(),
1121
- });
1122
- });
1123
- }
1124
-
1125
- /**
1126
- * Called when an inactive node is used after. Queue up an event that will be logged next time GC runs.
1127
- */
1128
- private inactiveNodeUsed(
1129
- usageType: "Changed" | "Loaded" | "Revived",
1130
- nodeId: string,
1131
- nodeStateTracker: UnreferencedStateTracker,
1132
- fromNodeId?: string,
1133
- packagePath?: readonly string[],
1134
- currentReferenceTimestampMs = this.runtime.getCurrentReferenceTimestampMs(),
1135
- requestHeaders?: IRequestHeader,
1136
- ) {
1137
- // If there is no reference timestamp to work with, no ops have been processed after creation. If so, skip
1138
- // logging as nothing interesting would have happened worth logging.
1139
- // If the node is active, skip logging.
1140
- if (
1141
- currentReferenceTimestampMs === undefined ||
1142
- nodeStateTracker.state === UnreferencedState.Active
1143
- ) {
1144
- return;
1145
- }
1146
-
1147
- // We only care about data stores and attachment blobs for this telemetry since GC only marks these objects
1148
- // as unreferenced. Also, if an inactive DDS is used, the corresponding data store store will also be used.
1149
- const nodeType = this.runtime.getNodeType(nodeId);
1150
- if (nodeType !== GCNodeType.DataStore && nodeType !== GCNodeType.Blob) {
1151
- return;
1152
- }
1153
-
1154
- const state = nodeStateTracker.state;
1155
- const uniqueEventId = `${state}-${nodeId}-${usageType}`;
1156
- if (this.loggedUnreferencedEvents.has(uniqueEventId)) {
1157
- return;
1158
- }
1159
- this.loggedUnreferencedEvents.add(uniqueEventId);
1160
-
1161
- const propsToLog = {
1162
- id: nodeId,
1163
- type: nodeType,
1164
- unrefTime: nodeStateTracker.unreferencedTimestampMs,
1165
- age: currentReferenceTimestampMs - nodeStateTracker.unreferencedTimestampMs,
1166
- timeout:
1167
- nodeStateTracker.state === UnreferencedState.Inactive
1168
- ? this.configs.inactiveTimeoutMs
1169
- : this.configs.sweepTimeoutMs,
1170
- completedGCRuns: this.completedRuns,
1171
- lastSummaryTime: this.getLastSummaryTimestampMs(),
1172
- ...this.createContainerMetadata,
1173
- viaHandle: requestHeaders?.[RuntimeHeaders.viaHandle],
1174
- fromId: fromNodeId,
1175
- };
1176
-
1177
- // For summarizer client, queue the event so it is logged the next time GC runs if the event is still valid.
1178
- // For non-summarizer client, log the event now since GC won't run on it. This may result in false positives
1179
- // but it's a good signal nonetheless and we can consume it with a grain of salt.
1180
- // Inactive errors are usages of Objects that are unreferenced for at least a period of 7 days.
1181
- // SweepReady errors are usages of Objects that will be deleted by GC Sweep!
1182
- if (this.isSummarizerClient) {
1183
- this.pendingEventsQueue.push({ ...propsToLog, usageType, state });
1184
- } else {
1185
- // For non-summarizer clients, only log "Loaded" type events since these objects may not be loaded in the
1186
- // summarizer clients if they are based off of user actions (such as scrolling to content for these objects)
1187
- // Events generated:
1188
- // InactiveObject_Loaded, SweepReadyObject_Loaded
1189
- if (usageType === "Loaded") {
1190
- const event = {
1191
- ...propsToLog,
1192
- eventName: `${state}Object_${usageType}`,
1193
- pkg: packagePathToTelemetryProperty(packagePath),
1194
- stack: generateStack(),
1195
- };
1196
-
1197
- // Do not log the inactive object x events as error events as they are not the best signal for
1198
- // detecting something wrong with GC either from the partner or from the runtime itself.
1199
- if (state === UnreferencedState.Inactive) {
1200
- this.mc.logger.sendTelemetryEvent(event);
1201
- } else {
1202
- this.mc.logger.sendErrorEvent(event);
1203
- }
1204
- }
1205
- }
1206
- }
1207
-
1208
- private async logUnreferencedEvents(logger: ITelemetryLogger) {
1209
- // Events sent come only from the summarizer client. In between summaries, events are pushed to a queue and at
1210
- // summary time they are then logged.
1211
- // Events generated:
1212
- // InactiveObject_Loaded, InactiveObject_Changed, InactiveObject_Revived
1213
- // SweepReadyObject_Loaded, SweepReadyObject_Changed, SweepReadyObject_Revived
1214
- for (const eventProps of this.pendingEventsQueue) {
1215
- const { usageType, state, ...propsToLog } = eventProps;
1216
- /**
1217
- * Revived event is logged only if the node is active. If the node is not active, the reference to it was
1218
- * from another unreferenced node and this scenario is not interesting to log.
1219
- * Loaded and Changed events are logged only if the node is not active. If the node is active, it was
1220
- * revived and a Revived event will be logged for it.
1221
- */
1222
- const nodeStateTracker = this.unreferencedNodesState.get(eventProps.id);
1223
- const active =
1224
- nodeStateTracker === undefined ||
1225
- nodeStateTracker.state === UnreferencedState.Active;
1226
- if ((usageType === "Revived") === active) {
1227
- const pkg = await this.getNodePackagePath(eventProps.id);
1228
- const fromPkg = eventProps.fromId
1229
- ? await this.getNodePackagePath(eventProps.fromId)
1230
- : undefined;
1231
- const event = {
1232
- ...propsToLog,
1233
- eventName: `${state}Object_${usageType}`,
1234
- pkg: pkg
1235
- ? { value: pkg.join("/"), tag: TelemetryDataTag.CodeArtifact }
1236
- : undefined,
1237
- fromPkg: fromPkg
1238
- ? { value: fromPkg.join("/"), tag: TelemetryDataTag.CodeArtifact }
1239
- : undefined,
1240
- };
1241
-
1242
- if (state === UnreferencedState.Inactive) {
1243
- logger.sendTelemetryEvent(event);
1244
- } else {
1245
- logger.sendErrorEvent(event);
1246
- }
1247
- }
1248
- }
1249
- this.pendingEventsQueue = [];
1250
- }
1251
1006
  }