@fluidframework/container-runtime 0.56.7 → 0.57.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/blobManager.d.ts.map +1 -1
  2. package/dist/blobManager.js +9 -1
  3. package/dist/blobManager.js.map +1 -1
  4. package/dist/connectionTelemetry.d.ts.map +1 -1
  5. package/dist/connectionTelemetry.js +6 -6
  6. package/dist/connectionTelemetry.js.map +1 -1
  7. package/dist/containerRuntime.d.ts +68 -28
  8. package/dist/containerRuntime.d.ts.map +1 -1
  9. package/dist/containerRuntime.js +148 -89
  10. package/dist/containerRuntime.js.map +1 -1
  11. package/dist/dataStore.d.ts +27 -0
  12. package/dist/dataStore.d.ts.map +1 -0
  13. package/dist/dataStore.js +113 -0
  14. package/dist/dataStore.js.map +1 -0
  15. package/dist/dataStoreContext.d.ts +1 -7
  16. package/dist/dataStoreContext.d.ts.map +1 -1
  17. package/dist/dataStoreContext.js +10 -6
  18. package/dist/dataStoreContext.js.map +1 -1
  19. package/dist/dataStores.d.ts +9 -5
  20. package/dist/dataStores.d.ts.map +1 -1
  21. package/dist/dataStores.js +14 -19
  22. package/dist/dataStores.js.map +1 -1
  23. package/dist/garbageCollection.d.ts +66 -27
  24. package/dist/garbageCollection.d.ts.map +1 -1
  25. package/dist/garbageCollection.js +272 -97
  26. package/dist/garbageCollection.js.map +1 -1
  27. package/dist/index.d.ts +2 -2
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +2 -1
  30. package/dist/index.js.map +1 -1
  31. package/dist/packageVersion.d.ts +1 -1
  32. package/dist/packageVersion.js +1 -1
  33. package/dist/packageVersion.js.map +1 -1
  34. package/dist/runningSummarizer.d.ts +1 -0
  35. package/dist/runningSummarizer.d.ts.map +1 -1
  36. package/dist/runningSummarizer.js +23 -15
  37. package/dist/runningSummarizer.js.map +1 -1
  38. package/dist/summarizerTypes.d.ts +4 -6
  39. package/dist/summarizerTypes.d.ts.map +1 -1
  40. package/dist/summarizerTypes.js.map +1 -1
  41. package/dist/summaryGenerator.d.ts +2 -1
  42. package/dist/summaryGenerator.d.ts.map +1 -1
  43. package/dist/summaryGenerator.js +46 -29
  44. package/dist/summaryGenerator.js.map +1 -1
  45. package/lib/blobManager.d.ts.map +1 -1
  46. package/lib/blobManager.js +9 -1
  47. package/lib/blobManager.js.map +1 -1
  48. package/lib/connectionTelemetry.d.ts.map +1 -1
  49. package/lib/connectionTelemetry.js +6 -6
  50. package/lib/connectionTelemetry.js.map +1 -1
  51. package/lib/containerRuntime.d.ts +68 -28
  52. package/lib/containerRuntime.d.ts.map +1 -1
  53. package/lib/containerRuntime.js +149 -90
  54. package/lib/containerRuntime.js.map +1 -1
  55. package/lib/dataStore.d.ts +27 -0
  56. package/lib/dataStore.d.ts.map +1 -0
  57. package/lib/dataStore.js +108 -0
  58. package/lib/dataStore.js.map +1 -0
  59. package/lib/dataStoreContext.d.ts +1 -7
  60. package/lib/dataStoreContext.d.ts.map +1 -1
  61. package/lib/dataStoreContext.js +10 -6
  62. package/lib/dataStoreContext.js.map +1 -1
  63. package/lib/dataStores.d.ts +9 -5
  64. package/lib/dataStores.d.ts.map +1 -1
  65. package/lib/dataStores.js +13 -18
  66. package/lib/dataStores.js.map +1 -1
  67. package/lib/garbageCollection.d.ts +66 -27
  68. package/lib/garbageCollection.d.ts.map +1 -1
  69. package/lib/garbageCollection.js +274 -99
  70. package/lib/garbageCollection.js.map +1 -1
  71. package/lib/index.d.ts +2 -2
  72. package/lib/index.d.ts.map +1 -1
  73. package/lib/index.js +1 -1
  74. package/lib/index.js.map +1 -1
  75. package/lib/packageVersion.d.ts +1 -1
  76. package/lib/packageVersion.js +1 -1
  77. package/lib/packageVersion.js.map +1 -1
  78. package/lib/runningSummarizer.d.ts +1 -0
  79. package/lib/runningSummarizer.d.ts.map +1 -1
  80. package/lib/runningSummarizer.js +23 -15
  81. package/lib/runningSummarizer.js.map +1 -1
  82. package/lib/summarizerTypes.d.ts +4 -6
  83. package/lib/summarizerTypes.d.ts.map +1 -1
  84. package/lib/summarizerTypes.js.map +1 -1
  85. package/lib/summaryGenerator.d.ts +2 -1
  86. package/lib/summaryGenerator.d.ts.map +1 -1
  87. package/lib/summaryGenerator.js +46 -29
  88. package/lib/summaryGenerator.js.map +1 -1
  89. package/package.json +13 -13
  90. package/src/blobManager.ts +12 -1
  91. package/src/connectionTelemetry.ts +7 -6
  92. package/src/containerRuntime.ts +244 -115
  93. package/src/dataStore.ts +151 -0
  94. package/src/dataStoreContext.ts +11 -14
  95. package/src/dataStores.ts +23 -38
  96. package/src/garbageCollection.ts +385 -150
  97. package/src/index.ts +2 -1
  98. package/src/packageVersion.ts +1 -1
  99. package/src/runningSummarizer.ts +25 -16
  100. package/src/summarizerTypes.ts +4 -8
  101. package/src/summaryGenerator.ts +71 -23
@@ -6,7 +6,8 @@
6
6
  import { ITelemetryLogger } from "@fluidframework/common-definitions";
7
7
  import { assert, LazyPromise, Timer } from "@fluidframework/common-utils";
8
8
  import { ICriticalContainerError } from "@fluidframework/container-definitions";
9
- import { ClientSessionExpiredError } from "@fluidframework/container-utils";
9
+ import { ClientSessionExpiredError, DataProcessingError } from "@fluidframework/container-utils";
10
+ import { IRequestHeader } from "@fluidframework/core-interfaces";
10
11
  import {
11
12
  cloneGCData,
12
13
  concatGarbageCollectionStates,
@@ -33,7 +34,9 @@ import {
33
34
  loggerToMonitoringContext,
34
35
  MonitoringContext,
35
36
  PerformanceEvent,
37
+ TelemetryDataTag,
36
38
  } from "@fluidframework/telemetry-utils";
39
+ import { RuntimeHeaders } from ".";
37
40
 
38
41
  import { IGCRuntimeOptions } from "./containerRuntime";
39
42
  import { getSummaryForDatastores } from "./dataStores";
@@ -62,23 +65,39 @@ const gcTestModeKey = "Fluid.GarbageCollection.GCTestMode";
62
65
  const runSweepKey = "Fluid.GarbageCollection.RunSweep";
63
66
  // Feature gate key to write GC data at the root of the summary tree.
64
67
  const writeAtRootKey = "Fluid.GarbageCollection.WriteDataAtRoot";
68
+ // Feature gate key to expire a session after a set period of time.
69
+ const runSessionExpiry = "Fluid.GarbageCollection.RunSessionExpiry";
65
70
 
66
71
  const defaultDeleteTimeoutMs = 7 * 24 * 60 * 60 * 1000; // 7 days
67
-
68
- /** The used state statistics of a node. */
69
- export interface IUsedStateStats {
70
- totalNodeCount: number;
71
- unusedNodeCount: number;
72
- }
72
+ const defaultSessionExpiryDurationMs = 30 * 24 * 60 * 60 * 1000; // 30 days
73
73
 
74
74
  /** The statistics of the system state after a garbage collection run. */
75
75
  export interface IGCStats {
76
- totalNodes: number;
77
- deletedNodes: number;
78
- totalDataStores: number;
79
- deletedDataStores: number;
76
+ /** The number of nodes in the container. */
77
+ nodeCount: number;
78
+ /** The number of data stores in the container. */
79
+ dataStoreCount: number;
80
+ /** The number of unreferenced nodes in the container. */
81
+ unrefNodeCount: number;
82
+ /** The number of unreferenced data stores in the container. */
83
+ unrefDataStoreCount: number;
84
+ /** The number of nodes whose reference state updated since last GC run. */
85
+ updatedNodeCount: number;
86
+ /** The number of data stores whose reference state updated since last GC run. */
87
+ updatedDataStoreCount: number;
80
88
  }
81
89
 
90
+ /** The event that is logged when unreferenced node is used after a certain time. */
91
+ interface IUnreferencedEvent {
92
+ eventName: string;
93
+ id: string;
94
+ age: number;
95
+ timeout: number;
96
+ lastSummaryTime?: number;
97
+ externalRequest?: boolean;
98
+ viaHandle?: boolean;
99
+ };
100
+
82
101
  /** Defines the APIs for the runtime object to be passed to the garbage collector. */
83
102
  export interface IGarbageCollectionRuntime {
84
103
  /** Before GC runs, called to notify the runtime to update any pending GC state. */
@@ -86,7 +105,9 @@ export interface IGarbageCollectionRuntime {
86
105
  /** Returns the garbage collection data of the runtime. */
87
106
  getGCData(fullGC?: boolean): Promise<IGarbageCollectionData>;
88
107
  /** After GC has run, called to notify the runtime of routes that are used in it. */
89
- updateUsedRoutes(usedRoutes: string[], gcTimestamp?: number): IUsedStateStats;
108
+ updateUsedRoutes(usedRoutes: string[], gcTimestamp?: number): void;
109
+ /** Called when the runtime should close because of an error. */
110
+ closeFn(error?: ICriticalContainerError): void;
90
111
  }
91
112
 
92
113
  /** Defines the contract for the garbage collector. */
@@ -115,10 +136,16 @@ export interface IGarbageCollector {
115
136
  getDataStoreBaseGCDetails(): Promise<Map<string, IGarbageCollectionDetailsBase>>;
116
137
  /** Called when the latest summary of the system has been refreshed. */
117
138
  latestSummaryStateRefreshed(result: RefreshSummaryResult, readAndParseBlob: ReadAndParseBlob): Promise<void>;
118
- /** Called when a node is changed. Used to detect and log when an inactive node is changed. */
119
- nodeChanged(id: string): void;
139
+ /** Called when a node is updated. Used to detect and log when an inactive node is changed or loaded. */
140
+ nodeUpdated(
141
+ nodePath: string,
142
+ reason: "Loaded" | "Changed",
143
+ timestampMs?: number,
144
+ packagePath?: readonly string[],
145
+ requestHeaders?: IRequestHeader,
146
+ ): void;
120
147
  /** Called when a reference is added to a node. Used to identify nodes that were referenced between summaries. */
121
- addedOutboundReference(fromNodeId: string, toNodeId: string): void;
148
+ addedOutboundReference(fromNodePath: string, toNodePath: string): void;
122
149
  dispose(): void;
123
150
  }
124
151
 
@@ -127,49 +154,51 @@ export interface IGarbageCollector {
127
154
  * the node's state to inactive if it remains unreferenced for a given amount of time (inactiveTimeoutMs).
128
155
  */
129
156
  class UnreferencedStateTracker {
130
- private inactive: boolean = false;
131
- // Keeps track of all inactive events that are logged. This is used to limit the log generation for each event to 1
132
- // so that it is not noisy.
133
- private readonly inactiveEventsLogged: Set<string> = new Set();
134
- private readonly timer: Timer | undefined;
157
+ private _inactive: boolean = false;
158
+ public get inactive(): boolean {
159
+ return this._inactive;
160
+ }
161
+
162
+ private timer: Timer | undefined;
135
163
 
136
164
  constructor(
137
165
  public readonly unreferencedTimestampMs: number,
138
- inactiveTimeoutMs: number,
166
+ private readonly inactiveTimeoutMs: number,
167
+ currentReferenceTimestampMs?: number,
139
168
  ) {
140
- // If the timeout has already expired, the node should become inactive immediately. Otherwise, start a timer of
141
- // inactiveTimeoutMs after which the node will become inactive.
142
- if (inactiveTimeoutMs <= 0) {
143
- this.inactive = true;
144
- } else {
145
- this.timer = new Timer(inactiveTimeoutMs, () => { this.inactive = true; });
146
- this.timer.start();
169
+ // If there is no current reference timestamp, don't track the node's inactive state. This will happen later
170
+ // when updateTracking is called with a reference timestamp.
171
+ if (currentReferenceTimestampMs !== undefined) {
172
+ this.updateTracking(currentReferenceTimestampMs);
147
173
  }
148
174
  }
149
175
 
150
- /** Stop tracking this node. Reset the unreferenced timer, if any, and reset inactive state. */
151
- public stopTracking() {
152
- this.timer?.clear();
153
- this.inactive = false;
154
- }
176
+ /**
177
+ * Updates the tracking state based on the provided timestamp.
178
+ */
179
+ public updateTracking(currentReferenceTimestampMs: number) {
180
+ const unreferencedDurationMs = currentReferenceTimestampMs - this.unreferencedTimestampMs;
181
+ // If the timeout has already expired, the node has become inactive.
182
+ if (unreferencedDurationMs > this.inactiveTimeoutMs) {
183
+ this._inactive = true;
184
+ this.timer?.clear();
185
+ return;
186
+ }
155
187
 
156
- /** Logs an error with the given properties if the node is inactive. */
157
- public logIfInactive(
158
- logger: ITelemetryLogger,
159
- eventName: string,
160
- currentTimestampMs: number,
161
- deleteTimeoutMs: number,
162
- inactiveNodeId: string,
163
- ) {
164
- if (this.inactive && !this.inactiveEventsLogged.has(eventName)) {
165
- logger.sendErrorEvent({
166
- eventName,
167
- age: currentTimestampMs - this.unreferencedTimestampMs,
168
- timeout: deleteTimeoutMs,
169
- id: inactiveNodeId,
170
- });
171
- this.inactiveEventsLogged.add(eventName);
188
+ // The node isn't inactive yet. Restart a timer for the duration remaining for it to become inactive.
189
+ const remainingDurationMs = this.inactiveTimeoutMs - unreferencedDurationMs;
190
+ if (this.timer === undefined) {
191
+ this.timer = new Timer(remainingDurationMs, () => { this._inactive = true; });
172
192
  }
193
+ this.timer.restart(remainingDurationMs);
194
+ }
195
+
196
+ /**
197
+ * Stop tracking this node. Reset the unreferenced timer, if any, and reset inactive state.
198
+ */
199
+ public stopTracking() {
200
+ this.timer?.clear();
201
+ this._inactive = false;
173
202
  }
174
203
  }
175
204
 
@@ -182,8 +211,9 @@ export class GarbageCollector implements IGarbageCollector {
182
211
  provider: IGarbageCollectionRuntime,
183
212
  gcOptions: IGCRuntimeOptions,
184
213
  deleteUnusedRoutes: (unusedRoutes: string[]) => void,
185
- getCurrentTimestampMs: () => number,
186
- closeFn: (error?: ICriticalContainerError) => void,
214
+ getNodePackagePath: (nodeId: string) => readonly string[] | undefined,
215
+ getCurrentReferenceTimestampMs: () => number | undefined,
216
+ getLastSummaryTimestampMs: () => number | undefined,
187
217
  baseSnapshot: ISnapshotTree | undefined,
188
218
  readAndParseBlob: ReadAndParseBlob,
189
219
  baseLogger: ITelemetryLogger,
@@ -194,8 +224,9 @@ export class GarbageCollector implements IGarbageCollector {
194
224
  provider,
195
225
  gcOptions,
196
226
  deleteUnusedRoutes,
197
- getCurrentTimestampMs,
198
- closeFn,
227
+ getNodePackagePath,
228
+ getCurrentReferenceTimestampMs,
229
+ getLastSummaryTimestampMs,
199
230
  baseSnapshot,
200
231
  readAndParseBlob,
201
232
  baseLogger,
@@ -287,14 +318,27 @@ export class GarbageCollector implements IGarbageCollector {
287
318
  // The timeout responsible for closing the container when the session has expired
288
319
  private sessionExpiryTimer?: ReturnType<typeof setTimeout>;
289
320
 
321
+ // Keeps track of unreferenced events that are logged for a node. This is used to limit the log generation to one
322
+ // per event per node.
323
+ private readonly loggedUnreferencedEvents: Set<string> = new Set();
324
+ // Queue for unreferenced events that should be logged the next time GC runs.
325
+ private pendingEventsQueue: IUnreferencedEvent[] = [];
326
+
290
327
  protected constructor(
291
328
  private readonly provider: IGarbageCollectionRuntime,
292
329
  private readonly gcOptions: IGCRuntimeOptions,
293
330
  /** After GC has run, called to delete objects in the runtime whose routes are unused. */
294
331
  private readonly deleteUnusedRoutes: (unusedRoutes: string[]) => void,
295
- /** Returns the current timestamp to be assigned to nodes that become unreferenced. */
296
- private readonly getCurrentTimestampMs: () => number,
297
- private readonly closeFn: (error?: ICriticalContainerError) => void,
332
+ /** For a given node path, returns the node's package path. */
333
+ private readonly getNodePackagePath: (nodePath: string) => readonly string[] | undefined,
334
+ /**
335
+ * Returns a referenced timestamp to be used to track unreferenced nodes. This is a server generated timestamp
336
+ * and may not be available if there aren't any ops processed yet. If so, we skip tracking unreferenced state
337
+ * such as time when node becomes unreferenced or inactive.
338
+ */
339
+ private readonly getCurrentReferenceTimestampMs: () => number | undefined,
340
+ /** Returns the timestamp of the last summary generated for this container. */
341
+ private readonly getLastSummaryTimestampMs: () => number | undefined,
298
342
  baseSnapshot: ISnapshotTree | undefined,
299
343
  readAndParseBlob: ReadAndParseBlob,
300
344
  baseLogger: ITelemetryLogger,
@@ -320,14 +364,22 @@ export class GarbageCollector implements IGarbageCollector {
320
364
  } else {
321
365
  // For new documents, GC has to be exlicitly enabled via the gcAllowed flag in GC options.
322
366
  this.gcEnabled = gcOptions.gcAllowed === true;
323
- this.sessionExpiryTimeoutMs = this.gcOptions.gcTestSessionTimeoutMs;
367
+ // Set the Session Expiry only if the flag is enabled or the test option is set.
368
+ if (this.mc.config.getBoolean(runSessionExpiry) && this.gcEnabled) {
369
+ this.sessionExpiryTimeoutMs = defaultSessionExpiryDurationMs;
370
+ }
324
371
  }
325
372
 
326
373
  // If session expiry is enabled, we need to close the container when the timeout expires
327
374
  if (this.sessionExpiryTimeoutMs !== undefined) {
328
- const expiryMs = this.sessionExpiryTimeoutMs;
329
- this.sessionExpiryTimer = setTimeout(() => this.closeFn(
330
- new ClientSessionExpiredError(`Client session expired.`, expiryMs)), expiryMs);
375
+ const timeoutMs = this.sessionExpiryTimeoutMs;
376
+ setLongTimeout(timeoutMs,
377
+ () => {
378
+ this.provider.closeFn(new ClientSessionExpiredError(`Client session expired.`, timeoutMs));
379
+ },
380
+ (timer) => {
381
+ this.sessionExpiryTimer = timer;
382
+ });
331
383
  }
332
384
 
333
385
  // For existing document, the latest summary is the one that we loaded from. So, use its GC version as the
@@ -425,27 +477,27 @@ export class GarbageCollector implements IGarbageCollector {
425
477
  return Object.keys(gcState.gcNodes).length === 1 ? undefined : gcState;
426
478
  });
427
479
 
428
- // Set up the initializer which initializes the base GC state from the base snapshot. Use lazy promise because
429
- // we only do this once - the very first time we run GC.
480
+ /**
481
+ * Set up the initializer which initializes the base GC state from the base snapshot. Note that the reference
482
+ * timestamp maybe from old ops which were not summarized and stored in the file. So, the unreferenced state
483
+ * may be out of date. This is fine because the state is updated every time GC runs based on the time then.
484
+ */
430
485
  this.initializeBaseStateP = new LazyPromise<void>(async () => {
431
- const currentTimestampMs = this.getCurrentTimestampMs();
432
- const baseState = await baseSummaryStateP;
486
+ const currentReferenceTimestampMs = this.getCurrentReferenceTimestampMs();
487
+ const baseState = await baseSummaryStateP;
433
488
  if (baseState === undefined) {
434
489
  return;
435
490
  }
436
491
 
437
492
  const gcNodes: { [ id: string ]: string[] } = {};
438
493
  for (const [nodeId, nodeData] of Object.entries(baseState.gcNodes)) {
439
- const unreferencedTimestampMs = nodeData.unreferencedTimestampMs;
440
- if (unreferencedTimestampMs !== undefined) {
441
- // Get how long it has been since the node was unreferenced. Start a timeout for the remaining time
442
- // left for it to be eligible for deletion.
443
- const unreferencedDurationMs = currentTimestampMs - unreferencedTimestampMs;
494
+ if (nodeData.unreferencedTimestampMs !== undefined) {
444
495
  this.unreferencedNodesState.set(
445
496
  nodeId,
446
497
  new UnreferencedStateTracker(
447
- unreferencedTimestampMs,
448
- this.deleteTimeoutMs - unreferencedDurationMs,
498
+ nodeData.unreferencedTimestampMs,
499
+ this.deleteTimeoutMs,
500
+ currentReferenceTimestampMs,
449
501
  ),
450
502
  );
451
503
  }
@@ -488,6 +540,24 @@ export class GarbageCollector implements IGarbageCollector {
488
540
  }
489
541
  return dataStoreGCDetailsMap;
490
542
  });
543
+
544
+ // Initialize the base state. The base GC data is used to detect and log when inactive / deleted objects are
545
+ // used in the container.
546
+ if (this.shouldRunGC) {
547
+ this.initializeBaseStateP.catch((error) => {
548
+ throw new DataProcessingError(
549
+ error?.message,
550
+ "FailedToInitializeGC",
551
+ {
552
+ gcEnabled: this.gcEnabled,
553
+ runSweep: this.shouldRunSweep,
554
+ writeAtRoot: this._writeDataAtRoot,
555
+ testMode: this.testMode,
556
+ sessionExpiry: this.sessionExpiryTimeoutMs,
557
+ },
558
+ );
559
+ });
560
+ }
491
561
  }
492
562
 
493
563
  /**
@@ -516,48 +586,36 @@ export class GarbageCollector implements IGarbageCollector {
516
586
  // Let the runtime update its pending state before GC runs.
517
587
  await this.provider.updateStateBeforeGC();
518
588
 
519
- const gcStats: {
520
- deletedNodes?: number,
521
- totalNodes?: number,
522
- deletedDataStores?: number,
523
- totalDataStores?: number,
524
- } = {};
525
-
526
589
  // Get the runtime's GC data and run GC on the reference graph in it.
527
590
  const gcData = await this.provider.getGCData(fullGC);
528
-
529
- this.updateStateSinceLatestRun(gcData);
530
-
531
591
  const gcResult = runGarbageCollection(
532
592
  gcData.gcNodes,
533
593
  [ "/" ],
534
594
  logger,
535
595
  );
596
+ const gcStats = this.generateStatsAndLogEvents(gcResult);
597
+
598
+ // Update the state since the last GC run. There can be nodes that were referenced between the last and
599
+ // the current run. We need to identify than and update their unreferenced state if needed.
600
+ this.updateStateSinceLastRun(gcData);
536
601
 
537
- const currentTimestampMs = this.getCurrentTimestampMs();
538
602
  // Update the current state of the system based on the GC run.
539
- this.updateCurrentState(gcData, gcResult, currentTimestampMs);
603
+ const currentReferenceTimestampMs = this.getCurrentReferenceTimestampMs();
604
+ this.updateCurrentState(gcData, gcResult, currentReferenceTimestampMs);
540
605
 
541
- const dataStoreUsedStateStats =
542
- this.provider.updateUsedRoutes(gcResult.referencedNodeIds, currentTimestampMs);
606
+ this.provider.updateUsedRoutes(gcResult.referencedNodeIds, currentReferenceTimestampMs);
543
607
 
544
608
  if (runSweep) {
545
609
  // Placeholder for running sweep logic.
546
610
  }
547
611
 
548
- // Update stats to be reported in the peformance event.
549
- gcStats.deletedNodes = gcResult.deletedNodeIds.length;
550
- gcStats.totalNodes = gcResult.referencedNodeIds.length + gcResult.deletedNodeIds.length;
551
- gcStats.deletedDataStores = dataStoreUsedStateStats.unusedNodeCount;
552
- gcStats.totalDataStores = dataStoreUsedStateStats.totalNodeCount;
553
-
554
612
  // If we are running in GC test mode, delete objects for unused routes. This enables testing scenarios
555
613
  // involving access to deleted data.
556
614
  if (this.testMode) {
557
615
  this.deleteUnusedRoutes(gcResult.deletedNodeIds);
558
616
  }
559
- event.end(gcStats);
560
- return gcStats as IGCStats;
617
+ event.end({ ...gcStats });
618
+ return gcStats;
561
619
  },
562
620
  { end: true, cancel: "error" });
563
621
  }
@@ -621,38 +679,61 @@ export class GarbageCollector implements IGarbageCollector {
621
679
  }
622
680
 
623
681
  /**
624
- * Called when a node with the given id is changed. If the node is inactive, log an error.
682
+ * Called when a node with the given id is updated. If the node is inactive, log an error.
683
+ * @param nodePath - The id of the node that changed.
684
+ * @param reason - Whether the node was loaded or changed.
685
+ * @param timestampMs - The timestamp when the node changed.
686
+ * @param packagePath - The package path of the node. This may not be available if the node hasn't been loaded yet.
687
+ * @param requestHeaders - If the node was loaded via request path, the headers in the request.
625
688
  */
626
- public nodeChanged(id: string) {
627
- // Prefix "/" if needed to make it relative to the root.
628
- const nodeId = id.startsWith("/") ? id : `/${id}`;
629
- this.unreferencedNodesState.get(nodeId)?.logIfInactive(
630
- this.mc.logger,
631
- "inactiveObjectChanged",
632
- this.getCurrentTimestampMs(),
633
- this.deleteTimeoutMs,
634
- nodeId,
635
- );
636
- }
637
-
638
- public dispose(): void {
639
- if (this.sessionExpiryTimer !== undefined) {
640
- clearTimeout(this.sessionExpiryTimer);
641
- this.sessionExpiryTimer = undefined;
689
+ public nodeUpdated(
690
+ nodePath: string,
691
+ reason: "Loaded" | "Changed",
692
+ timestampMs?: number,
693
+ packagePath?: readonly string[],
694
+ requestHeaders?: IRequestHeader,
695
+ ) {
696
+ if (!this.shouldRunGC) {
697
+ return;
642
698
  }
699
+
700
+ this.logIfInactive(
701
+ reason,
702
+ nodePath,
703
+ timestampMs,
704
+ packagePath,
705
+ requestHeaders,
706
+ );
643
707
  }
644
708
 
645
709
  /**
646
710
  * Called when an outbound reference is added to a node. This is used to identify all nodes that have been
647
711
  * referenced between summaries so that their unreferenced timestamp can be reset.
648
712
  *
649
- * @param fromNodeId - The node from which the reference is added.
650
- * @param toNodeId - The node to which the reference is added.
713
+ * @param fromNodePath - The node from which the reference is added.
714
+ * @param toNodePath - The node to which the reference is added.
651
715
  */
652
- public addedOutboundReference(fromNodeId: string, toNodeId: string) {
653
- const outboundRoutes = this.referencesSinceLastRun.get(fromNodeId) ?? [];
654
- outboundRoutes.push(toNodeId);
655
- this.referencesSinceLastRun.set(fromNodeId, outboundRoutes);
716
+ public addedOutboundReference(fromNodePath: string, toNodePath: string) {
717
+ if (!this.shouldRunGC) {
718
+ return;
719
+ }
720
+
721
+ const outboundRoutes = this.referencesSinceLastRun.get(fromNodePath) ?? [];
722
+ outboundRoutes.push(toNodePath);
723
+ this.referencesSinceLastRun.set(fromNodePath, outboundRoutes);
724
+
725
+ // If the node that got referenced is inactive, log an event as that may indicate use-after-delete.
726
+ this.logIfInactive(
727
+ "Revived",
728
+ toNodePath,
729
+ );
730
+ }
731
+
732
+ public dispose(): void {
733
+ if (this.sessionExpiryTimer !== undefined) {
734
+ clearTimeout(this.sessionExpiryTimer);
735
+ this.sessionExpiryTimer = undefined;
736
+ }
656
737
  }
657
738
 
658
739
  /**
@@ -673,47 +754,56 @@ export class GarbageCollector implements IGarbageCollector {
673
754
  * 3. Clears tracking for nodes that were unreferenced but became referenced in this run.
674
755
  * @param gcData - The data representing the reference graph on which GC is run.
675
756
  * @param gcResult - The result of the GC run on the gcData.
676
- * @param currentTimestampMs - The current timestamp to be used for unreferenced nodes' timestamp.
757
+ * @param currentReferenceTimestampMs - The timestamp to be used for unreferenced nodes' timestamp.
677
758
  */
678
- private updateCurrentState(gcData: IGarbageCollectionData, gcResult: IGCResult, currentTimestampMs: number) {
759
+ private updateCurrentState(
760
+ gcData: IGarbageCollectionData,
761
+ gcResult: IGCResult,
762
+ currentReferenceTimestampMs?: number,
763
+ ) {
679
764
  this.gcDataFromLastRun = cloneGCData(gcData);
680
765
  this.referencesSinceLastRun.clear();
681
766
 
682
- // Iterate through the deleted nodes and start tracking if they became unreferenced in this run.
683
- for (const nodeId of gcResult.deletedNodeIds) {
684
- // The time when the node became unreferenced. This is added to the current GC state.
685
- let unreferencedTimestampMs: number = currentTimestampMs;
686
- const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
687
- if (nodeStateTracker !== undefined) {
688
- unreferencedTimestampMs = nodeStateTracker.unreferencedTimestampMs;
689
- } else {
690
- // Start tracking this node as it became unreferenced in this run.
691
- this.unreferencedNodesState.set(
692
- nodeId,
693
- new UnreferencedStateTracker(unreferencedTimestampMs, this.deleteTimeoutMs),
694
- );
695
- }
696
- }
697
-
698
767
  // Iterate through the referenced nodes and stop tracking if they were unreferenced before.
699
768
  for (const nodeId of gcResult.referencedNodeIds) {
700
769
  const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
701
770
  if (nodeStateTracker !== undefined) {
702
- // If this node has been unreferenced for longer than deleteTimeoutMs and is being referenced,
703
- // log an error as this may mean the deleteTimeoutMs is not long enough.
704
- nodeStateTracker.logIfInactive(
705
- this.mc.logger,
706
- "inactiveObjectRevived",
707
- currentTimestampMs,
708
- this.deleteTimeoutMs,
709
- nodeId,
710
- );
711
771
  // Stop tracking so as to clear out any running timers.
712
772
  nodeStateTracker.stopTracking();
713
773
  // Delete the node as we don't need to track it any more.
714
774
  this.unreferencedNodesState.delete(nodeId);
715
775
  }
716
776
  }
777
+
778
+ /**
779
+ * If there is no current reference time, skip tracking when a node becomes unreferenced. This would happen
780
+ * if no ops have been processed ever and we still try to run GC. If so, there is nothing interesting to track
781
+ * anyway.
782
+ */
783
+ if (currentReferenceTimestampMs === undefined) {
784
+ return;
785
+ }
786
+
787
+ /**
788
+ * If a node became unreferenced in this run, start tracking it.
789
+ * If a node was already unreferenced, update its tracking information. Since the current reference time is
790
+ * from the ops seen, this will ensure that we keep updating the unreferenced state as time moves forward.
791
+ */
792
+ for (const nodeId of gcResult.deletedNodeIds) {
793
+ const nodeStateTracker = this.unreferencedNodesState.get(nodeId);
794
+ if (nodeStateTracker === undefined) {
795
+ this.unreferencedNodesState.set(
796
+ nodeId,
797
+ new UnreferencedStateTracker(
798
+ currentReferenceTimestampMs,
799
+ this.deleteTimeoutMs,
800
+ currentReferenceTimestampMs,
801
+ ),
802
+ );
803
+ } else {
804
+ nodeStateTracker.updateTracking(currentReferenceTimestampMs);
805
+ }
806
+ }
717
807
  }
718
808
 
719
809
  /**
@@ -724,7 +814,7 @@ export class GarbageCollector implements IGarbageCollector {
724
814
  * This function identifies nodes that were referenced since last run and removes their unreferenced state, if any.
725
815
  * If these nodes are currently unreferenced, they will be assigned new unreferenced state by the current run.
726
816
  */
727
- private updateStateSinceLatestRun(currentGCData: IGarbageCollectionData) {
817
+ private updateStateSinceLastRun(currentGCData: IGarbageCollectionData) {
728
818
  // If we haven't run GC before or no references were added since the last run, there is nothing to do.
729
819
  if (this.gcDataFromLastRun === undefined || this.referencesSinceLastRun.size === 0) {
730
820
  return;
@@ -783,8 +873,10 @@ export class GarbageCollector implements IGarbageCollector {
783
873
  * @param currentGCData - The GC data (reference graph) from the current GC run.
784
874
  */
785
875
  private validateReferenceCorrectness(currentGCData: IGarbageCollectionData) {
786
- assert(this.gcDataFromLastRun !== undefined, 0x2b7
787
- /* "Can't validate correctness without GC data from last run" */);
876
+ assert(
877
+ this.gcDataFromLastRun !== undefined,
878
+ 0x2b7, /* "Can't validate correctness without GC data from last run" */
879
+ );
788
880
 
789
881
  // Get a list of all the outbound routes (or references) in the current GC data.
790
882
  const currentReferences: string[] = [];
@@ -811,9 +903,9 @@ export class GarbageCollector implements IGarbageCollector {
811
903
  // Validate that the current reference graph doesn't have references that we are not already aware of. If this
812
904
  // happens, it might indicate data corruption since we may delete objects prematurely.
813
905
  currentReferences.forEach((route: string) => {
814
- // Validate references for data stores only whose routes are of the format "/dataStoreId". Currently, layers
815
- // below data stores don't have GC implemented so there is no guarantee their references will be notified.
816
- if (route.split("/").length === 2 && !explicitReferences.includes(route)) {
906
+ // Validate references for data stores only. Currently, layers below data stores don't have GC implemented
907
+ // so there is no guarantee their references will be notified.
908
+ if (isDataStoreNode(route) && !explicitReferences.includes(route)) {
817
909
  /**
818
910
  * The following log will be enabled once this issue is resolved:
819
911
  * https://github.com/microsoft/FluidFramework/issues/8878.
@@ -827,12 +919,123 @@ export class GarbageCollector implements IGarbageCollector {
827
919
  }
828
920
  });
829
921
  }
922
+
923
+ /**
924
+ * Generates the stats of a garbage collection run from the given results of the run. Also, logs any pending events
925
+ * in the pendingEventsQueue.
926
+ * @param gcResult - The result of a GC run.
927
+ * @returns the GC stats of the GC run.
928
+ */
929
+ private generateStatsAndLogEvents(gcResult: IGCResult): IGCStats {
930
+ // Log pending events for unreferenced nodes after GC has run. We should have the package data available for
931
+ // them now since the GC run should have loaded these nodes.
932
+ let event = this.pendingEventsQueue.shift();
933
+ while (event !== undefined) {
934
+ const pkg = this.getNodePackagePath(event.id);
935
+ this.mc.logger.sendErrorEvent({
936
+ ...event,
937
+ pkg: pkg ? { value: `/${pkg.join("/")}`, tag: TelemetryDataTag.PackageData } : undefined,
938
+ });
939
+ event = this.pendingEventsQueue.shift();
940
+ }
941
+
942
+ const gcStats: IGCStats = {
943
+ nodeCount: 0,
944
+ dataStoreCount: 0,
945
+ unrefNodeCount: 0,
946
+ unrefDataStoreCount: 0,
947
+ updatedNodeCount: 0,
948
+ updatedDataStoreCount: 0,
949
+ };
950
+
951
+ for (const nodeId of gcResult.referencedNodeIds) {
952
+ gcStats.nodeCount++;
953
+ const isDataStore = isDataStoreNode(nodeId);
954
+ if (isDataStore) {
955
+ gcStats.dataStoreCount++;
956
+ }
957
+ // If a referenced node has an entry in `unreferencedNodesState`, it was previously unreferenced. So, its
958
+ // reference state updated from the last GC run.
959
+ if (this.unreferencedNodesState.has(nodeId)) {
960
+ gcStats.updatedNodeCount++;
961
+ if (isDataStore) {
962
+ gcStats.updatedDataStoreCount++;
963
+ }
964
+ }
965
+ }
966
+
967
+ for (const nodeId of gcResult.deletedNodeIds) {
968
+ gcStats.nodeCount++;
969
+ gcStats.unrefNodeCount++;
970
+ const isDataStore = isDataStoreNode(nodeId);
971
+ if (isDataStore) {
972
+ gcStats.dataStoreCount++;
973
+ gcStats.unrefDataStoreCount++;
974
+ }
975
+ // If an unreferenced node doesn't an entry in `unreferencedNodesState`, it was previously referenced. So,
976
+ // its reference state updated from the last GC run.
977
+ if (!this.unreferencedNodesState.has(nodeId)) {
978
+ gcStats.updatedNodeCount++;
979
+ if (isDataStore) {
980
+ gcStats.updatedDataStoreCount++;
981
+ }
982
+ }
983
+ }
984
+
985
+ return gcStats;
986
+ }
987
+
988
+ /**
989
+ * Logs an event if a node is inactive and is used.
990
+ */
991
+ private logIfInactive(
992
+ eventSuffix: "Changed" | "Loaded" | "Revived",
993
+ nodeId: string,
994
+ currentReferenceTimestampMs = this.getCurrentReferenceTimestampMs(),
995
+ packagePath?: readonly string[],
996
+ requestHeaders?: IRequestHeader,
997
+ ) {
998
+ // If there is no reference timestamp to work with, no ops have been processed after creation. If so, skip
999
+ // logging as nothing interesting would have happened worth logging.
1000
+ if (currentReferenceTimestampMs === undefined) {
1001
+ return;
1002
+ }
1003
+
1004
+ const eventName = `inactiveObject_${eventSuffix}`;
1005
+ // We log a particular event for a given node only once so that it is not too noisy.
1006
+ const uniqueEventId = `${nodeId}-${eventName}`;
1007
+ const nodeState = this.unreferencedNodesState.get(nodeId);
1008
+ if (nodeState?.inactive && !this.loggedUnreferencedEvents.has(uniqueEventId)) {
1009
+ this.loggedUnreferencedEvents.add(uniqueEventId);
1010
+ const event: IUnreferencedEvent = {
1011
+ eventName,
1012
+ id: nodeId,
1013
+ age: currentReferenceTimestampMs - nodeState.unreferencedTimestampMs,
1014
+ timeout: this.deleteTimeoutMs,
1015
+ lastSummaryTime: this.getLastSummaryTimestampMs(),
1016
+ externalRequest: requestHeaders?.[RuntimeHeaders.externalRequest],
1017
+ viaHandle: requestHeaders?.[RuntimeHeaders.viaHandle],
1018
+ };
1019
+
1020
+ // If the package data for the node exists, log immediately. Otherwise, queue it and it will be logged the
1021
+ // next time GC runs as the package data should be available then.
1022
+ const pkg = packagePath ?? this.getNodePackagePath(nodeId);
1023
+ if (pkg !== undefined) {
1024
+ this.mc.logger.sendErrorEvent({
1025
+ ...event,
1026
+ pkg: { value: `/${pkg.join("/")}`, tag: TelemetryDataTag.PackageData },
1027
+ });
1028
+ } else {
1029
+ this.pendingEventsQueue.push(event);
1030
+ }
1031
+ }
1032
+ }
830
1033
  }
831
1034
 
832
1035
  /**
833
1036
  * Gets the garbage collection state from the given snapshot tree. The GC state may be written into multiple blobs.
834
1037
  * Merge the GC state from all such blobs and return the merged GC state.
835
- */
1038
+ */
836
1039
  async function getGCStateFromSnapshot(
837
1040
  gcSnapshotTree: ISnapshotTree,
838
1041
  readAndParseBlob: ReadAndParseBlob,
@@ -855,3 +1058,35 @@ async function getGCStateFromSnapshot(
855
1058
  }
856
1059
  return rootGCState;
857
1060
  }
1061
+
1062
+ /**
1063
+ * setLongTimeout is used for timeouts longer than setTimeout's ~24.8 day max
1064
+ * @param timeoutMs - the total time the timeout needs to last in ms
1065
+ * @param timeoutFn - the function to execute when the timer ends
1066
+ * @param setTimerFn - the function used to update your timer variable
1067
+ */
1068
+ function setLongTimeout(
1069
+ timeoutMs: number,
1070
+ timeoutFn: () => void,
1071
+ setTimerFn: (timer: ReturnType<typeof setTimeout>) => void,
1072
+ ) {
1073
+ // The setTimeout max is 24.8 days before looping occurs.
1074
+ const maxTimeout = 2147483647;
1075
+ let timer: ReturnType<typeof setTimeout>;
1076
+ if (timeoutMs > maxTimeout) {
1077
+ const newTimeoutMs = timeoutMs - maxTimeout;
1078
+ timer = setTimeout(() => setLongTimeout(newTimeoutMs, timeoutFn, setTimerFn), maxTimeout);
1079
+ } else {
1080
+ timer = setTimeout(() => timeoutFn(), timeoutMs);
1081
+ }
1082
+ setTimerFn(timer);
1083
+ }
1084
+
1085
+ /**
1086
+ * Given a GC nodeId, tells whether it belongs to a data store or not.
1087
+ */
1088
+ function isDataStoreNode(nodeId: string): boolean {
1089
+ const pathParts = nodeId.split("/");
1090
+ // Data store ids are in the format "/dataStoreId".
1091
+ return pathParts.length === 2 && pathParts[1] !== "" ? true : false;
1092
+ }