@fluidframework/container-runtime 0.52.1 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/dist/containerHandleContext.d.ts +0 -1
  2. package/dist/containerHandleContext.d.ts.map +1 -1
  3. package/dist/containerHandleContext.js +0 -1
  4. package/dist/containerHandleContext.js.map +1 -1
  5. package/dist/containerRuntime.d.ts +43 -19
  6. package/dist/containerRuntime.d.ts.map +1 -1
  7. package/dist/containerRuntime.js +201 -111
  8. package/dist/containerRuntime.js.map +1 -1
  9. package/dist/dataStoreContext.d.ts +33 -4
  10. package/dist/dataStoreContext.d.ts.map +1 -1
  11. package/dist/dataStoreContext.js +45 -17
  12. package/dist/dataStoreContext.js.map +1 -1
  13. package/dist/dataStores.d.ts +14 -10
  14. package/dist/dataStores.d.ts.map +1 -1
  15. package/dist/dataStores.js +73 -41
  16. package/dist/dataStores.js.map +1 -1
  17. package/dist/garbageCollection.d.ts +82 -15
  18. package/dist/garbageCollection.d.ts.map +1 -1
  19. package/dist/garbageCollection.js +359 -26
  20. package/dist/garbageCollection.js.map +1 -1
  21. package/dist/index.d.ts +2 -2
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +11 -2
  24. package/dist/index.js.map +1 -1
  25. package/dist/packageVersion.d.ts +1 -1
  26. package/dist/packageVersion.js +1 -1
  27. package/dist/packageVersion.js.map +1 -1
  28. package/dist/runningSummarizer.d.ts +3 -2
  29. package/dist/runningSummarizer.d.ts.map +1 -1
  30. package/dist/runningSummarizer.js +6 -6
  31. package/dist/runningSummarizer.js.map +1 -1
  32. package/dist/summarizer.d.ts +23 -3
  33. package/dist/summarizer.d.ts.map +1 -1
  34. package/dist/summarizer.js +135 -45
  35. package/dist/summarizer.js.map +1 -1
  36. package/dist/summarizerTypes.d.ts +3 -10
  37. package/dist/summarizerTypes.d.ts.map +1 -1
  38. package/dist/summarizerTypes.js.map +1 -1
  39. package/dist/summaryFormat.d.ts +10 -1
  40. package/dist/summaryFormat.d.ts.map +1 -1
  41. package/dist/summaryFormat.js +2 -1
  42. package/dist/summaryFormat.js.map +1 -1
  43. package/dist/summaryGenerator.d.ts.map +1 -1
  44. package/dist/summaryGenerator.js +1 -3
  45. package/dist/summaryGenerator.js.map +1 -1
  46. package/dist/summaryManager.d.ts +0 -15
  47. package/dist/summaryManager.d.ts.map +1 -1
  48. package/dist/summaryManager.js +1 -35
  49. package/dist/summaryManager.js.map +1 -1
  50. package/lib/containerHandleContext.d.ts +0 -1
  51. package/lib/containerHandleContext.d.ts.map +1 -1
  52. package/lib/containerHandleContext.js +0 -1
  53. package/lib/containerHandleContext.js.map +1 -1
  54. package/lib/containerRuntime.d.ts +43 -19
  55. package/lib/containerRuntime.d.ts.map +1 -1
  56. package/lib/containerRuntime.js +206 -117
  57. package/lib/containerRuntime.js.map +1 -1
  58. package/lib/dataStoreContext.d.ts +33 -4
  59. package/lib/dataStoreContext.d.ts.map +1 -1
  60. package/lib/dataStoreContext.js +45 -17
  61. package/lib/dataStoreContext.js.map +1 -1
  62. package/lib/dataStores.d.ts +14 -10
  63. package/lib/dataStores.d.ts.map +1 -1
  64. package/lib/dataStores.js +76 -44
  65. package/lib/dataStores.js.map +1 -1
  66. package/lib/garbageCollection.d.ts +82 -15
  67. package/lib/garbageCollection.d.ts.map +1 -1
  68. package/lib/garbageCollection.js +361 -28
  69. package/lib/garbageCollection.js.map +1 -1
  70. package/lib/index.d.ts +2 -2
  71. package/lib/index.d.ts.map +1 -1
  72. package/lib/index.js +2 -1
  73. package/lib/index.js.map +1 -1
  74. package/lib/packageVersion.d.ts +1 -1
  75. package/lib/packageVersion.js +1 -1
  76. package/lib/packageVersion.js.map +1 -1
  77. package/lib/runningSummarizer.d.ts +3 -2
  78. package/lib/runningSummarizer.d.ts.map +1 -1
  79. package/lib/runningSummarizer.js +6 -6
  80. package/lib/runningSummarizer.js.map +1 -1
  81. package/lib/summarizer.d.ts +23 -3
  82. package/lib/summarizer.d.ts.map +1 -1
  83. package/lib/summarizer.js +135 -45
  84. package/lib/summarizer.js.map +1 -1
  85. package/lib/summarizerTypes.d.ts +3 -10
  86. package/lib/summarizerTypes.d.ts.map +1 -1
  87. package/lib/summarizerTypes.js.map +1 -1
  88. package/lib/summaryFormat.d.ts +10 -1
  89. package/lib/summaryFormat.d.ts.map +1 -1
  90. package/lib/summaryFormat.js +1 -0
  91. package/lib/summaryFormat.js.map +1 -1
  92. package/lib/summaryGenerator.d.ts.map +1 -1
  93. package/lib/summaryGenerator.js +1 -3
  94. package/lib/summaryGenerator.js.map +1 -1
  95. package/lib/summaryManager.d.ts +0 -15
  96. package/lib/summaryManager.d.ts.map +1 -1
  97. package/lib/summaryManager.js +1 -34
  98. package/lib/summaryManager.js.map +1 -1
  99. package/package.json +14 -14
  100. package/src/containerHandleContext.ts +0 -1
  101. package/src/containerRuntime.ts +280 -140
  102. package/src/dataStoreContext.ts +59 -20
  103. package/src/dataStores.ts +116 -54
  104. package/src/garbageCollection.ts +492 -29
  105. package/src/index.ts +20 -2
  106. package/src/packageVersion.ts +1 -1
  107. package/src/runningSummarizer.ts +12 -10
  108. package/src/summarizer.ts +154 -53
  109. package/src/summarizerTypes.ts +3 -11
  110. package/src/summaryFormat.ts +11 -1
  111. package/src/summaryGenerator.ts +2 -3
  112. package/src/summaryManager.ts +2 -49
  113. package/dist/localStorageFeatureGates.d.ts +0 -13
  114. package/dist/localStorageFeatureGates.d.ts.map +0 -1
  115. package/dist/localStorageFeatureGates.js +0 -31
  116. package/dist/localStorageFeatureGates.js.map +0 -1
  117. package/lib/localStorageFeatureGates.d.ts +0 -13
  118. package/lib/localStorageFeatureGates.d.ts.map +0 -1
  119. package/lib/localStorageFeatureGates.js +0 -27
  120. package/lib/localStorageFeatureGates.js.map +0 -1
  121. package/src/localStorageFeatureGates.ts +0 -27
@@ -4,30 +4,62 @@
4
4
  */
5
5
 
6
6
  import { ITelemetryLogger } from "@fluidframework/common-definitions";
7
- import { runGarbageCollection } from "@fluidframework/garbage-collector";
7
+ import { assert, LazyPromise, Timer } from "@fluidframework/common-utils";
8
+ import {
9
+ cloneGCData,
10
+ concatGarbageCollectionStates,
11
+ concatGarbageCollectionData,
12
+ IGCResult,
13
+ runGarbageCollection,
14
+ unpackChildNodesGCDetails,
15
+ } from "@fluidframework/garbage-collector";
8
16
  import { ISnapshotTree } from "@fluidframework/protocol-definitions";
9
- import { IGarbageCollectionData } from "@fluidframework/runtime-definitions";
10
- import { ReadAndParseBlob, RefreshSummaryResult } from "@fluidframework/runtime-utils";
11
- import { ChildLogger, PerformanceEvent } from "@fluidframework/telemetry-utils";
17
+ import {
18
+ gcBlobKey,
19
+ IGarbageCollectionData,
20
+ IGarbageCollectionState,
21
+ IGarbageCollectionSummaryDetails,
22
+ ISummaryTreeWithStats,
23
+ } from "@fluidframework/runtime-definitions";
24
+ import {
25
+ ReadAndParseBlob,
26
+ RefreshSummaryResult,
27
+ SummaryTreeBuilder,
28
+ } from "@fluidframework/runtime-utils";
29
+ import {
30
+ ChildLogger,
31
+ loggerToMonitoringContext,
32
+ MonitoringContext,
33
+ PerformanceEvent,
34
+ } from "@fluidframework/telemetry-utils";
12
35
 
13
36
  import { IGCRuntimeOptions } from "./containerRuntime";
14
- import { getLocalStorageFeatureGate } from "./localStorageFeatureGates";
37
+ import { getSummaryForDatastores } from "./dataStores";
15
38
  import {
16
39
  getGCVersion,
17
40
  GCVersion,
18
41
  IContainerRuntimeMetadata,
19
42
  metadataBlobName,
43
+ ReadFluidDataStoreAttributes,
44
+ dataStoreAttributesBlobName,
20
45
  } from "./summaryFormat";
21
46
 
22
47
  /** This is the current version of garbage collection. */
23
48
  const GCVersion = 1;
24
49
 
50
+ // The key for the GC tree in summary.
51
+ export const gcTreeKey = "gc";
52
+ // They prefix for GC blobs in the GC tree in summary.
53
+ export const gcBlobPrefix = "__gc";
54
+
25
55
  // Local storage key to turn GC on / off.
26
- const runGCKey = "FluidRunGC";
56
+ const runGCKey = "Fluid.GarbageCollection.RunGC";
27
57
  // Local storage key to turn GC test mode on / off.
28
- const gcTestModeKey = "FluidGCTestMode";
58
+ const gcTestModeKey = "Fluid.GarbageCollection.GCTestMode";
29
59
  // Local storage key to turn GC sweep on / off.
30
- const runSweepKey = "FluidRunSweep";
60
+ const runSweepKey = "Fluid.GarbageCollection.RunSweep";
61
+
62
+ const defaultDeleteTimeoutMs = 7 * 24 * 60 * 60 * 1000; // 7 days
31
63
 
32
64
  /** The used state statistics of a node. */
33
65
  export interface IUsedStateStats {
@@ -48,7 +80,7 @@ export interface IGarbageCollectionRuntime {
48
80
  /** Returns the garbage collection data of the runtime. */
49
81
  getGCData(fullGC?: boolean): Promise<IGarbageCollectionData>;
50
82
  /** After GC has run, called to notify the runtime of routes that are used in it. */
51
- updateUsedRoutes(usedRoutes: string[]): IUsedStateStats;
83
+ updateUsedRoutes(usedRoutes: string[], gcTimestamp?: number): IUsedStateStats;
52
84
  }
53
85
 
54
86
  /** Defines the contract for the garbage collector. */
@@ -57,18 +89,79 @@ export interface IGarbageCollector {
57
89
  readonly shouldRunGC: boolean;
58
90
  /**
59
91
  * This tracks two things:
60
- * 1. Whether GC is enabled - If this is 0, GC is disabled. If this is > 0, GC is enabled.
92
+ * 1. Whether GC is enabled - If this is 0, GC is disabled. If this is greater than 0, GC is enabled.
61
93
  * 2. If GC is enabled, the version of GC used to generate the GC data written in a summary.
62
94
  */
63
95
  readonly gcSummaryFeatureVersion: number;
64
96
  /** Tells whether the GC version has changed compared to the version in the latest summary. */
65
97
  readonly hasGCVersionChanged: boolean;
98
+ /** Tells whether GC data should be written to the root of the summary tree. */
99
+ readonly writeDataAtRoot: boolean;
66
100
  /** Run garbage collection and update the reference / used state of the system. */
67
101
  collectGarbage(
68
102
  options: { logger?: ITelemetryLogger, runGC?: boolean, runSweep?: boolean, fullGC?: boolean },
69
103
  ): Promise<IGCStats>;
104
+ /** Summarizes the GC data and returns it as a summary tree. */
105
+ summarize(): ISummaryTreeWithStats | undefined;
106
+ /** Returns a map of each data store id to its GC details in the base summary. */
107
+ getDataStoreBaseGCDetails(): Promise<Map<string, IGarbageCollectionSummaryDetails>>;
70
108
  /** Called when the latest summary of the system has been refreshed. */
71
109
  latestSummaryStateRefreshed(result: RefreshSummaryResult, readAndParseBlob: ReadAndParseBlob): Promise<void>;
110
+ /** Called when a node is changed. Used to detect and log when an inactive node is changed. */
111
+ nodeChanged(id: string): void;
112
+ /** Called when a reference is added to a node. Used to identify nodes that were referenced between summaries. */
113
+ addedOutboundReference(fromNodeId: string, toNodeId: string): void;
114
+ }
115
+
116
+ /**
117
+ * Helper class that tracks the state of an unreferenced node such as the time it was unreferenced. It also sets
118
+ * the node's state to inactive if it remains unreferenced for a given amount of time (inactiveTimeoutMs).
119
+ */
120
+ class UnreferencedStateTracker {
121
+ private inactive: boolean = false;
122
+ // Keeps track of all inactive events that are logged. This is used to limit the log generation for each event to 1
123
+ // so that it is not noisy.
124
+ private readonly inactiveEventsLogged: Set<string> = new Set();
125
+ private readonly timer: Timer | undefined;
126
+
127
+ constructor(
128
+ public readonly unreferencedTimestampMs: number,
129
+ inactiveTimeoutMs: number,
130
+ ) {
131
+ // If the timeout has already expired, the node should become inactive immediately. Otherwise, start a timer of
132
+ // inactiveTimeoutMs after which the node will become inactive.
133
+ if (inactiveTimeoutMs <= 0) {
134
+ this.inactive = true;
135
+ } else {
136
+ this.timer = new Timer(inactiveTimeoutMs, () => { this.inactive = true; });
137
+ this.timer.start();
138
+ }
139
+ }
140
+
141
+ /** Stop tracking this node. Reset the unreferenced timer, if any, and reset inactive state. */
142
+ public stopTracking() {
143
+ this.timer?.clear();
144
+ this.inactive = false;
145
+ }
146
+
147
+ /** Logs an error with the given properties if the node is inactive. */
148
+ public logIfInactive(
149
+ logger: ITelemetryLogger,
150
+ eventName: string,
151
+ currentTimestampMs: number,
152
+ deleteTimeoutMs: number,
153
+ inactiveNodeId: string,
154
+ ) {
155
+ if (this.inactive && !this.inactiveEventsLogged.has(eventName)) {
156
+ logger.sendErrorEvent({
157
+ eventName,
158
+ age: currentTimestampMs - this.unreferencedTimestampMs,
159
+ timeout: deleteTimeoutMs,
160
+ id: inactiveNodeId,
161
+ });
162
+ this.inactiveEventsLogged.add(eventName);
163
+ }
164
+ }
72
165
  }
73
166
 
74
167
  /**
@@ -80,11 +173,24 @@ export class GarbageCollector implements IGarbageCollector {
80
173
  provider: IGarbageCollectionRuntime,
81
174
  gcOptions: IGCRuntimeOptions,
82
175
  deleteUnusedRoutes: (unusedRoutes: string[]) => void,
176
+ getCurrentTimestampMs: () => number,
177
+ baseSnapshot: ISnapshotTree | undefined,
178
+ readAndParseBlob: ReadAndParseBlob,
83
179
  baseLogger: ITelemetryLogger,
84
180
  existing: boolean,
85
181
  metadata?: IContainerRuntimeMetadata,
86
182
  ): IGarbageCollector {
87
- return new GarbageCollector(provider, gcOptions, deleteUnusedRoutes, baseLogger, existing, metadata);
183
+ return new GarbageCollector(
184
+ provider,
185
+ gcOptions,
186
+ deleteUnusedRoutes,
187
+ getCurrentTimestampMs,
188
+ baseSnapshot,
189
+ readAndParseBlob,
190
+ baseLogger,
191
+ existing,
192
+ metadata,
193
+ );
88
194
  }
89
195
 
90
196
  /**
@@ -94,7 +200,7 @@ export class GarbageCollector implements IGarbageCollector {
94
200
 
95
201
  /**
96
202
  * This tracks two things:
97
- * 1. Whether GC is enabled - If this is 0, GC is disabled. If this is > 0, GC is enabled.
203
+ * 1. Whether GC is enabled - If this is 0, GC is disabled. If this is greater than 0, GC is enabled.
98
204
  * 2. If GC is enabled, the version of GC used to generate the GC data written in a summary.
99
205
  */
100
206
  public get gcSummaryFeatureVersion(): number {
@@ -118,26 +224,56 @@ export class GarbageCollector implements IGarbageCollector {
118
224
  private readonly gcEnabled: boolean;
119
225
  private readonly shouldRunSweep: boolean;
120
226
  private readonly testMode: boolean;
121
- private readonly logger: ITelemetryLogger;
227
+ private readonly mc: MonitoringContext;
228
+
229
+ /**
230
+ * Tells whether the GC data should be written to the root of the summary tree. We do this under 2 conditions:
231
+ * 1. If `writeDataAtRoot` GC option is enabled.
232
+ * 2. If the base summary has the GC data written at the root. This is to support forward compatibility where when
233
+ * we start writing the GC data at root, older versions can detect that and write at root too.
234
+ */
235
+ private _writeDataAtRoot: boolean = false;
236
+ public get writeDataAtRoot(): boolean {
237
+ return this._writeDataAtRoot;
238
+ }
122
239
 
123
240
  // The current GC version that this container is running.
124
241
  private readonly currentGCVersion = GCVersion;
125
242
  // This is the version of GC data in the latest summary being tracked.
126
243
  private latestSummaryGCVersion: GCVersion;
127
244
 
245
+ // Keeps track of the GC state from the last run.
246
+ private gcDataFromLastRun: IGarbageCollectionData | undefined;
247
+ // Keeps a list of references (edges in the GC graph) between GC runs. Each entry has a node id and a list of
248
+ // outbound routes from that node.
249
+ private readonly referencesSinceLastRun: Map<string, string[]> = new Map();
250
+
251
+ // Promise when resolved initializes the base state of the nodes from the base summary state.
252
+ private readonly initializeBaseStateP: Promise<void>;
253
+ // The map of data store ids to their GC details in the base summary returned in getDataStoreGCDetails().
254
+ private readonly dataStoreGCDetailsP: Promise<Map<string, IGarbageCollectionSummaryDetails>>;
255
+ // The time after which an unreferenced node can be deleted. Currently, we only set the node's state to expired.
256
+ private readonly deleteTimeoutMs: number;
257
+ // Map of node ids to their unreferenced state tracker.
258
+ private readonly unreferencedNodesState: Map<string, UnreferencedStateTracker> = new Map();
259
+
128
260
  protected constructor(
129
261
  private readonly provider: IGarbageCollectionRuntime,
130
262
  private readonly gcOptions: IGCRuntimeOptions,
131
- /**
132
- * After GC has run, called to delete objects in the runtime whose routes are unused. This is not part of the
133
- * provider because its specific to this garbage collector implementation and is not part of the contract.
134
- */
263
+ /** After GC has run, called to delete objects in the runtime whose routes are unused. */
135
264
  private readonly deleteUnusedRoutes: (unusedRoutes: string[]) => void,
265
+ /** Returns the current timestamp to be assigned to nodes that become unreferenced. */
266
+ private readonly getCurrentTimestampMs: () => number,
267
+ baseSnapshot: ISnapshotTree | undefined,
268
+ readAndParseBlob: ReadAndParseBlob,
136
269
  baseLogger: ITelemetryLogger,
137
270
  existing: boolean,
138
271
  metadata?: IContainerRuntimeMetadata,
139
272
  ) {
140
- this.logger = ChildLogger.create(baseLogger, "GarbageCollector");
273
+ this.mc = loggerToMonitoringContext(
274
+ ChildLogger.create(baseLogger, "GarbageCollector"));
275
+
276
+ this.deleteTimeoutMs = this.gcOptions.deleteTimeoutMs ?? defaultDeleteTimeoutMs;
141
277
 
142
278
  let prevSummaryGCVersion: number | undefined;
143
279
  // GC can only be enabled during creation. After that, it can never be enabled again. So, for existing
@@ -156,7 +292,7 @@ export class GarbageCollector implements IGarbageCollector {
156
292
  this.latestSummaryGCVersion = prevSummaryGCVersion ?? this.currentGCVersion;
157
293
 
158
294
  // Whether GC should run or not. Can override with localStorage flag.
159
- this.shouldRunGC = getLocalStorageFeatureGate(runGCKey) ?? (
295
+ this.shouldRunGC = this.mc.config.getBoolean(runGCKey) ?? (
160
296
  // GC must be enabled for the document.
161
297
  this.gcEnabled
162
298
  // GC must not be disabled via GC options.
@@ -166,10 +302,138 @@ export class GarbageCollector implements IGarbageCollector {
166
302
  // Whether GC sweep phase should run or not. If this is false, only GC mark phase is run. Can override with
167
303
  // localStorage flag.
168
304
  this.shouldRunSweep = this.shouldRunGC &&
169
- (getLocalStorageFeatureGate(runSweepKey) ?? gcOptions.runSweep === true);
305
+ (this.mc.config.getBoolean(runSweepKey) ?? gcOptions.runSweep === true);
170
306
 
171
307
  // Whether we are running in test mode. In this mode, unreferenced nodes are immediately deleted.
172
- this.testMode = getLocalStorageFeatureGate(gcTestModeKey) ?? gcOptions.runGCInTestMode === true;
308
+ this.testMode = this.mc.config.getBoolean(gcTestModeKey) ?? gcOptions.runGCInTestMode === true;
309
+
310
+ // If `writeDataAtRoot` GC option is true, we should write the GC data into the root of the summary tree. This
311
+ // GC option is used for testing only. It will be removed once we start writing GC data into root by default.
312
+ this._writeDataAtRoot = this.gcOptions.writeDataAtRoot === true;
313
+
314
+ // Get the GC state from the GC blob in the base snapshot. Use LazyPromise because we only want to do
315
+ // this once since it involves fetching blobs from storage which is expensive.
316
+ const baseSummaryStateP = new LazyPromise<IGarbageCollectionState | undefined>(async () => {
317
+ if (baseSnapshot === undefined) {
318
+ return undefined;
319
+ }
320
+
321
+ // For newer documents, GC data should be present in the GC tree in the root of the snapshot.
322
+ const gcSnapshotTree = baseSnapshot.trees[gcTreeKey];
323
+ if (gcSnapshotTree !== undefined) {
324
+ // forward-compat - If a newer version has written the GC tree at root, we should also do the same.
325
+ this._writeDataAtRoot = true;
326
+ return getGCStateFromSnapshot(gcSnapshotTree, readAndParseBlob);
327
+ }
328
+
329
+ // back-compat - Older documents will have the GC blobs in each data store's summary tree. Get them and
330
+ // consolidate into IGarbageCollectionState format.
331
+ // Add a node for the root node that is not present in older snapshot format.
332
+ const gcState: IGarbageCollectionState = { gcNodes: { "/": { outboundRoutes: [] } } };
333
+ const dataStoreSnaphotTree = getSummaryForDatastores(baseSnapshot, metadata);
334
+ assert(dataStoreSnaphotTree !== undefined,
335
+ 0x2a8 /* "Expected data store snapshot tree in base snapshot" */);
336
+ for (const [dsId, dsSnapshotTree] of Object.entries(dataStoreSnaphotTree.trees)) {
337
+ const blobId = dsSnapshotTree.blobs[gcBlobKey];
338
+ if (blobId === undefined) {
339
+ continue;
340
+ }
341
+
342
+ const gcSummaryDetails = await readAndParseBlob<IGarbageCollectionSummaryDetails>(blobId);
343
+ // If there are no nodes for this data store, skip it.
344
+ if (gcSummaryDetails.gcData?.gcNodes === undefined) {
345
+ continue;
346
+ }
347
+
348
+ const dsRootId = `/${dsId}`;
349
+ // Since we used to write GC data at data store level, we won't have an entry for the root ("/").
350
+ // Construct that entry by adding root data store ids to its outbound routes.
351
+ const initialSnapshotDetails = await readAndParseBlob<ReadFluidDataStoreAttributes>(
352
+ dsSnapshotTree.blobs[dataStoreAttributesBlobName],
353
+ );
354
+ if (initialSnapshotDetails.isRootDataStore) {
355
+ gcState.gcNodes["/"].outboundRoutes.push(dsRootId);
356
+ }
357
+
358
+ for (const [id, outboundRoutes] of Object.entries(gcSummaryDetails.gcData.gcNodes)) {
359
+ // Prefix the data store id to the GC node ids to make them relative to the root from being
360
+ // relative to the data store. Similar to how its done in DataStore::getGCData.
361
+ const rootId = id === "/" ? dsRootId : `${dsRootId}${id}`;
362
+ gcState.gcNodes[rootId] = { outboundRoutes: Array.from(outboundRoutes) };
363
+ }
364
+ assert(gcState.gcNodes[dsRootId] !== undefined,
365
+ 0x2a9 /* `GC nodes for data store ${dsId} not in GC blob` */);
366
+ gcState.gcNodes[dsRootId].unreferencedTimestampMs = gcSummaryDetails.unrefTimestamp;
367
+ }
368
+
369
+ // If there is only one node (root node just added above), either GC is disabled or we are loading from the
370
+ // very first summary generated by detached container. In both cases, GC was not run - return undefined.
371
+ return Object.keys(gcState.gcNodes).length === 1 ? undefined : gcState;
372
+ });
373
+
374
+ // Set up the initializer which initializes the base GC state from the base snapshot. Use lazy promise because
375
+ // we only do this once - the very first time we run GC.
376
+ this.initializeBaseStateP = new LazyPromise<void>(async () => {
377
+ const currentTimestampMs = this.getCurrentTimestampMs();
378
+ const baseState = await baseSummaryStateP;
379
+ if (baseState === undefined) {
380
+ return;
381
+ }
382
+
383
+ const gcNodes: { [ id: string ]: string[] } = {};
384
+ for (const [nodeId, nodeData] of Object.entries(baseState.gcNodes)) {
385
+ const unreferencedTimestampMs = nodeData.unreferencedTimestampMs;
386
+ if (unreferencedTimestampMs !== undefined) {
387
+ // Get how long it has been since the node was unreferenced. Start a timeout for the remaining time
388
+ // left for it to be eligible for deletion.
389
+ const unreferencedDurationMs = currentTimestampMs - unreferencedTimestampMs;
390
+ this.unreferencedNodesState.set(
391
+ nodeId,
392
+ new UnreferencedStateTracker(
393
+ unreferencedTimestampMs,
394
+ this.deleteTimeoutMs - unreferencedDurationMs,
395
+ ),
396
+ );
397
+ }
398
+ gcNodes[nodeId] = Array.from(nodeData.outboundRoutes);
399
+ }
400
+ this.gcDataFromLastRun = { gcNodes };
401
+ });
402
+
403
+ // Get the GC details for each data store from the GC state in the base summary. This is returned in
404
+ // getDataStoreBaseGCDetails and is used to initialize each data store's base GC details.
405
+ this.dataStoreGCDetailsP = new LazyPromise<Map<string, IGarbageCollectionSummaryDetails>>(async () => {
406
+ const baseState = await baseSummaryStateP;
407
+ if (baseState === undefined) {
408
+ return new Map();
409
+ }
410
+
411
+ const gcNodes: { [ id: string ]: string[] } = {};
412
+ for (const [nodeId, nodeData] of Object.entries(baseState.gcNodes)) {
413
+ gcNodes[nodeId] = Array.from(nodeData.outboundRoutes);
414
+ }
415
+ // Run GC on the nodes in the base summary to get the routes used in each node in the container.
416
+ // This is an optimization for space (vs performance) wherein we don't need to store the used routes of
417
+ // each node in the summary.
418
+ const usedRoutes = runGarbageCollection(
419
+ gcNodes,
420
+ [ "/" ],
421
+ this.mc.logger,
422
+ ).referencedNodeIds;
423
+
424
+ const dataStoreGCDetailsMap = unpackChildNodesGCDetails({ gcData: { gcNodes }, usedRoutes });
425
+ // Currently, the data stores write the GC data. So, we need to update it's base GC details with the
426
+ // unreferenced timestamp. Once we start writing the GC data here, we won't need to do this anymore.
427
+ for (const [nodeId, nodeData] of Object.entries(baseState.gcNodes)) {
428
+ if (nodeData.unreferencedTimestampMs !== undefined) {
429
+ const dataStoreGCDetails = dataStoreGCDetailsMap.get(nodeId.slice(1));
430
+ if (dataStoreGCDetails !== undefined) {
431
+ dataStoreGCDetails.unrefTimestamp = nodeData.unreferencedTimestampMs;
432
+ }
433
+ }
434
+ }
435
+ return dataStoreGCDetailsMap;
436
+ });
173
437
  }
174
438
 
175
439
  /**
@@ -187,12 +451,14 @@ export class GarbageCollector implements IGarbageCollector {
187
451
  },
188
452
  ): Promise<IGCStats> {
189
453
  const {
190
- logger = this.logger,
454
+ logger = this.mc.logger,
191
455
  runSweep = this.shouldRunSweep,
192
456
  fullGC = this.gcOptions.runFullGC === true || this.hasGCVersionChanged,
193
457
  } = options;
194
458
 
195
459
  return PerformanceEvent.timedExecAsync(logger, { eventName: "GarbageCollection" }, async (event) => {
460
+ await this.initializeBaseStateP;
461
+
196
462
  const gcStats: {
197
463
  deletedNodes?: number,
198
464
  totalNodes?: number,
@@ -202,30 +468,36 @@ export class GarbageCollector implements IGarbageCollector {
202
468
 
203
469
  // Get the runtime's GC data and run GC on the reference graph in it.
204
470
  const gcData = await this.provider.getGCData(fullGC);
205
- const { referencedNodeIds, deletedNodeIds } = runGarbageCollection(
471
+
472
+ this.updateStateSinceLatestRun(gcData);
473
+
474
+ const gcResult = runGarbageCollection(
206
475
  gcData.gcNodes,
207
476
  [ "/" ],
208
477
  logger,
209
478
  );
210
479
 
211
- // Remove this node's route ("/") and notify data stores of routes that are used in it.
212
- const usedRoutes = referencedNodeIds.filter((id: string) => { return id !== "/"; });
213
- const dataStoreUsedStateStats = this.provider.updateUsedRoutes(usedRoutes);
480
+ const currentTimestampMs = this.getCurrentTimestampMs();
481
+ // Update the current state of the system based on the GC run.
482
+ this.updateCurrentState(gcData, gcResult, currentTimestampMs);
483
+
484
+ const dataStoreUsedStateStats =
485
+ this.provider.updateUsedRoutes(gcResult.referencedNodeIds, currentTimestampMs);
214
486
 
215
487
  if (runSweep) {
216
488
  // Placeholder for running sweep logic.
217
489
  }
218
490
 
219
491
  // Update stats to be reported in the peformance event.
220
- gcStats.deletedNodes = deletedNodeIds.length;
221
- gcStats.totalNodes = referencedNodeIds.length + deletedNodeIds.length;
492
+ gcStats.deletedNodes = gcResult.deletedNodeIds.length;
493
+ gcStats.totalNodes = gcResult.referencedNodeIds.length + gcResult.deletedNodeIds.length;
222
494
  gcStats.deletedDataStores = dataStoreUsedStateStats.unusedNodeCount;
223
495
  gcStats.totalDataStores = dataStoreUsedStateStats.totalNodeCount;
224
496
 
225
497
  // If we are running in GC test mode, delete objects for unused routes. This enables testing scenarios
226
498
  // involving access to deleted data.
227
499
  if (this.testMode) {
228
- this.deleteUnusedRoutes(deletedNodeIds);
500
+ this.deleteUnusedRoutes(gcResult.deletedNodeIds);
229
501
  }
230
502
  event.end(gcStats);
231
503
  return gcStats as IGCStats;
@@ -233,6 +505,37 @@ export class GarbageCollector implements IGarbageCollector {
233
505
  { end: true, cancel: "error" });
234
506
  }
235
507
 
508
+ /**
509
+ * Summarizes the GC data and returns it as a summary tree.
510
+ * We current write the entire GC state in a single blob. This can be modified later to write multiple
511
+ * blobs. All the blob keys should start with `gcBlobPrefix`.
512
+ */
513
+ public summarize(): ISummaryTreeWithStats | undefined {
514
+ if (!this.shouldRunGC || this.gcDataFromLastRun === undefined) {
515
+ return;
516
+ }
517
+
518
+ const gcState: IGarbageCollectionState = { gcNodes: {} };
519
+ for (const [nodeId, outboundRoutes] of Object.entries(this.gcDataFromLastRun.gcNodes)) {
520
+ gcState.gcNodes[nodeId] = {
521
+ outboundRoutes,
522
+ unreferencedTimestampMs: this.unreferencedNodesState.get(nodeId)?.unreferencedTimestampMs,
523
+ };
524
+ }
525
+
526
+ const builder = new SummaryTreeBuilder();
527
+ builder.addBlob(`${gcBlobPrefix}_root`, JSON.stringify(gcState));
528
+ return builder.getSummaryTree();
529
+ }
530
+
531
+ /**
532
+ * Returns a map of data store ids to their base GC details generated from the base summary.This is used to
533
+ * initialize the data stores with their base GC state.
534
+ */
535
+ public async getDataStoreBaseGCDetails(): Promise<Map<string, IGarbageCollectionSummaryDetails>> {
536
+ return this.dataStoreGCDetailsP;
537
+ }
538
+
236
539
  /**
237
540
  * Called when the latest summary of the system has been refreshed. This will be used to update the state of the
238
541
  * latest summary tracked.
@@ -256,6 +559,34 @@ export class GarbageCollector implements IGarbageCollector {
256
559
  await this.updateSummaryGCVersionFromSnapshot(result.snapshot, readAndParseBlob);
257
560
  }
258
561
 
562
+ /**
563
+ * Called when a node with the given id is changed. If the node is inactive, log an error.
564
+ */
565
+ public nodeChanged(id: string) {
566
+ // Prefix "/" if needed to make it relative to the root.
567
+ const nodeId = id.startsWith("/") ? id : `/${id}`;
568
+ this.unreferencedNodesState.get(nodeId)?.logIfInactive(
569
+ this.mc.logger,
570
+ "inactiveObjectChanged",
571
+ this.getCurrentTimestampMs(),
572
+ this.deleteTimeoutMs,
573
+ nodeId,
574
+ );
575
+ }
576
+
577
+ /**
578
+ * Called when an outbound reference is added to a node. This is used to identify all nodes that have been
579
+ * referenced between summaries so that their unreferenced timestamp can be reset.
580
+ *
581
+ * @param fromNodeId - The node from which the reference is added.
582
+ * @param toNodeId - The node to which the reference is added.
583
+ */
584
+ public addedOutboundReference(fromNodeId: string, toNodeId: string) {
585
+ const outboundRoutes = this.referencesSinceLastRun.get(fromNodeId) ?? [];
586
+ outboundRoutes.push(toNodeId);
587
+ this.referencesSinceLastRun.set(fromNodeId, outboundRoutes);
588
+ }
589
+
259
590
  /**
260
591
  * Update the latest summary GC version from the metadata blob in the given snapshot.
261
592
  */
@@ -266,4 +597,136 @@ export class GarbageCollector implements IGarbageCollector {
266
597
  this.latestSummaryGCVersion = getGCVersion(metadata);
267
598
  }
268
599
  }
600
+
601
+ /**
602
+ * Updates the state of the system as per the current GC run. It does the following:
603
+ * 1. Sets up the current GC state as per the gcData.
604
+ * 2. Starts tracking for nodes that have become unreferenced in this run.
605
+ * 3. Clears tracking for nodes that were unreferenced but became referenced in this run.
606
+ * @param gcData - The data representing the reference graph on which GC is run.
607
+ * @param gcResult - The result of the GC run on the gcData.
608
+ * @param currentTimestampMs - The current timestamp to be used for unreferenced nodes' timestamp.
609
+ */
610
+ private updateCurrentState(gcData: IGarbageCollectionData, gcResult: IGCResult, currentTimestampMs: number) {
611
+ this.gcDataFromLastRun = cloneGCData(gcData);
612
+ this.referencesSinceLastRun.clear();
613
+
614
+ // Iterate through the deleted nodes and start tracking if they became unreferenced in this run.
615
+ for (const nodeId of gcResult.deletedNodeIds) {
616
+ // The time when the node became unreferenced. This is added to the current GC state.
617
+ let unreferencedTimestampMs: number = currentTimestampMs;
618
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
619
+ if (nodeStateTracker !== undefined) {
620
+ unreferencedTimestampMs = nodeStateTracker.unreferencedTimestampMs;
621
+ } else {
622
+ // Start tracking this node as it became unreferenced in this run.
623
+ this.unreferencedNodesState.set(
624
+ nodeId,
625
+ new UnreferencedStateTracker(unreferencedTimestampMs, this.deleteTimeoutMs),
626
+ );
627
+ }
628
+ }
629
+
630
+ // Iterate through the referenced nodes and stop tracking if they were unreferenced before.
631
+ for (const nodeId of gcResult.referencedNodeIds) {
632
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
633
+ if (nodeStateTracker !== undefined) {
634
+ // If this node has been unreferenced for longer than deleteTimeoutMs and is being referenced,
635
+ // log an error as this may mean the deleteTimeoutMs is not long enough.
636
+ nodeStateTracker.logIfInactive(
637
+ this.mc.logger,
638
+ "inactiveObjectRevived",
639
+ currentTimestampMs,
640
+ this.deleteTimeoutMs,
641
+ nodeId,
642
+ );
643
+ // Stop tracking so as to clear out any running timers.
644
+ nodeStateTracker.stopTracking();
645
+ // Delete the node as we don't need to track it any more.
646
+ this.unreferencedNodesState.delete(nodeId);
647
+ }
648
+ }
649
+ }
650
+
651
+ /**
652
+ * Since GC runs periodically, the GC data that is generated only tells us the state of the world at that point in
653
+ * time. It's possible that nodes transition from `unreferenced -> referenced -> unreferenced` between two runs. The
654
+ * unreferenced timestamp of such nodes needs to be reset as they may have been accessed when they were referenced.
655
+ *
656
+ * This function identifies nodes that were referenced since last run and removes their unreferenced state, if any.
657
+ * If these nodes are currently unreferenced, they will be assigned new unreferenced state by the current run.
658
+ */
659
+ private updateStateSinceLatestRun(currentGCData: IGarbageCollectionData) {
660
+ // If we haven't run GC before or no references were added since the last run, there is nothing to do.
661
+ if (this.gcDataFromLastRun === undefined || this.referencesSinceLastRun.size === 0) {
662
+ return;
663
+ }
664
+
665
+ /**
666
+ * Generate a super set of the GC data that contains the nodes and edges from last run, plus any new node and
667
+ * edges that have been added since then. To do this, combine the GC data from the last run and the current
668
+ * run, and then add the references since last run.
669
+ *
670
+ * Note on why we need to combine the data from previous run, current run and all references in between -
671
+ * 1. We need data from last run because some of its references may have been deleted since then. If those
672
+ * references added new outbound references before getting deleted, we need to detect them.
673
+ * 2. We need new outbound references since last run because some of them may have been deleted later. If those
674
+ * references added new outbound references before getting deleted, we need to detect them.
675
+ * 3. We need data from the current run because currently we may not detect when DDSs are referenced:
676
+ * - We don't require DDSs handles to be stored in a referenced DDS. For this, we need GC at DDS level
677
+ * which is tracked by https://github.com/microsoft/FluidFramework/issues/8470.
678
+ * - A new data store may have "root" DDSs already created and we don't detect them today.
679
+ */
680
+ const gcDataSuperSet = concatGarbageCollectionData(this.gcDataFromLastRun, currentGCData);
681
+ this.referencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
682
+ if (gcDataSuperSet.gcNodes[sourceNodeId] === undefined) {
683
+ gcDataSuperSet.gcNodes[sourceNodeId] = outboundRoutes;
684
+ } else {
685
+ gcDataSuperSet.gcNodes[sourceNodeId].push(...outboundRoutes);
686
+ }
687
+ });
688
+
689
+ /**
690
+ * Run GC on the above reference graph to find all nodes that are referenced. For each one, if they are
691
+ * unreferenced, stop tracking them and remove from unreferenced list.
692
+ * Some of these nodes may be unreferenced now and if so, the current run will add unreferenced state for them.
693
+ */
694
+ const gcResult = runGarbageCollection(gcDataSuperSet.gcNodes, ["/"], this.mc.logger);
695
+ for (const nodeId of gcResult.referencedNodeIds) {
696
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
697
+ if (nodeStateTracker !== undefined) {
698
+ // Stop tracking so as to clear out any running timers.
699
+ nodeStateTracker.stopTracking();
700
+ // Delete the node as we don't need to track it any more.
701
+ this.unreferencedNodesState.delete(nodeId);
702
+ }
703
+ }
704
+ }
705
+ }
706
+
707
+ /**
708
+ * Gets the garbage collection state from the given snapshot tree. The GC state may be written into multiple blobs.
709
+ * Merge the GC state from all such blobs and return the merged GC state.
710
+ */
711
+ async function getGCStateFromSnapshot(
712
+ gcSnapshotTree: ISnapshotTree,
713
+ readAndParseBlob: ReadAndParseBlob,
714
+ ): Promise<IGarbageCollectionState> {
715
+ let rootGCState: IGarbageCollectionState = { gcNodes: {} };
716
+ for (const key of Object.keys(gcSnapshotTree.blobs)) {
717
+ // Skip blobs that do not stsart with the GC prefix.
718
+ if (!key.startsWith(gcBlobPrefix)) {
719
+ continue;
720
+ }
721
+
722
+ const blobId = gcSnapshotTree.blobs[key];
723
+ if (blobId === undefined) {
724
+ continue;
725
+ }
726
+ const gcState = await readAndParseBlob<IGarbageCollectionState>(blobId);
727
+ assert(gcState !== undefined, 0x2ad /* "GC blob missing from snapshot" */);
728
+ // Merge the GC state of this blob into the root GC state.
729
+ rootGCState = concatGarbageCollectionStates(rootGCState, gcState);
730
+ }
731
+ return rootGCState;
269
732
  }