@fluidframework/container-runtime 2.0.0-rc.3.0.0 → 2.0.0-rc.3.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/api-report/container-runtime.api.md +30 -12
  2. package/dist/channelCollection.d.ts +5 -3
  3. package/dist/channelCollection.d.ts.map +1 -1
  4. package/dist/channelCollection.js +88 -29
  5. package/dist/channelCollection.js.map +1 -1
  6. package/dist/containerRuntime.d.ts +6 -1
  7. package/dist/containerRuntime.d.ts.map +1 -1
  8. package/dist/containerRuntime.js +55 -48
  9. package/dist/containerRuntime.js.map +1 -1
  10. package/dist/dataStoreContext.d.ts +1 -1
  11. package/dist/dataStoreContexts.d.ts +2 -0
  12. package/dist/dataStoreContexts.d.ts.map +1 -1
  13. package/dist/dataStoreContexts.js +7 -0
  14. package/dist/dataStoreContexts.js.map +1 -1
  15. package/dist/gc/garbageCollection.d.ts +4 -11
  16. package/dist/gc/garbageCollection.d.ts.map +1 -1
  17. package/dist/gc/garbageCollection.js +45 -29
  18. package/dist/gc/garbageCollection.js.map +1 -1
  19. package/dist/gc/gcDefinitions.d.ts +26 -5
  20. package/dist/gc/gcDefinitions.d.ts.map +1 -1
  21. package/dist/gc/gcDefinitions.js.map +1 -1
  22. package/dist/gc/gcHelpers.d.ts +5 -4
  23. package/dist/gc/gcHelpers.d.ts.map +1 -1
  24. package/dist/gc/gcHelpers.js +14 -2
  25. package/dist/gc/gcHelpers.js.map +1 -1
  26. package/dist/gc/gcTelemetry.d.ts +13 -2
  27. package/dist/gc/gcTelemetry.d.ts.map +1 -1
  28. package/dist/gc/gcTelemetry.js +24 -21
  29. package/dist/gc/gcTelemetry.js.map +1 -1
  30. package/dist/gc/index.d.ts +2 -2
  31. package/dist/gc/index.d.ts.map +1 -1
  32. package/dist/gc/index.js +2 -2
  33. package/dist/gc/index.js.map +1 -1
  34. package/dist/index.d.ts +2 -2
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +2 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/legacy.d.ts +1 -0
  39. package/dist/metadata.d.ts +2 -2
  40. package/dist/metadata.d.ts.map +1 -1
  41. package/dist/metadata.js.map +1 -1
  42. package/dist/opLifecycle/batchManager.d.ts +4 -1
  43. package/dist/opLifecycle/batchManager.d.ts.map +1 -1
  44. package/dist/opLifecycle/batchManager.js +0 -10
  45. package/dist/opLifecycle/batchManager.js.map +1 -1
  46. package/dist/opLifecycle/outbox.d.ts +0 -4
  47. package/dist/opLifecycle/outbox.d.ts.map +1 -1
  48. package/dist/opLifecycle/outbox.js +7 -38
  49. package/dist/opLifecycle/outbox.js.map +1 -1
  50. package/dist/packageVersion.d.ts +1 -1
  51. package/dist/packageVersion.d.ts.map +1 -1
  52. package/dist/packageVersion.js +1 -1
  53. package/dist/packageVersion.js.map +1 -1
  54. package/dist/summary/documentSchema.js +1 -1
  55. package/dist/summary/documentSchema.js.map +1 -1
  56. package/lib/channelCollection.d.ts +5 -3
  57. package/lib/channelCollection.d.ts.map +1 -1
  58. package/lib/channelCollection.js +90 -31
  59. package/lib/channelCollection.js.map +1 -1
  60. package/lib/containerRuntime.d.ts +6 -1
  61. package/lib/containerRuntime.d.ts.map +1 -1
  62. package/lib/containerRuntime.js +54 -47
  63. package/lib/containerRuntime.js.map +1 -1
  64. package/lib/dataStoreContext.d.ts +1 -1
  65. package/lib/dataStoreContexts.d.ts +2 -0
  66. package/lib/dataStoreContexts.d.ts.map +1 -1
  67. package/lib/dataStoreContexts.js +7 -0
  68. package/lib/dataStoreContexts.js.map +1 -1
  69. package/lib/gc/garbageCollection.d.ts +4 -11
  70. package/lib/gc/garbageCollection.d.ts.map +1 -1
  71. package/lib/gc/garbageCollection.js +47 -31
  72. package/lib/gc/garbageCollection.js.map +1 -1
  73. package/lib/gc/gcDefinitions.d.ts +26 -5
  74. package/lib/gc/gcDefinitions.d.ts.map +1 -1
  75. package/lib/gc/gcDefinitions.js.map +1 -1
  76. package/lib/gc/gcHelpers.d.ts +5 -4
  77. package/lib/gc/gcHelpers.d.ts.map +1 -1
  78. package/lib/gc/gcHelpers.js +12 -1
  79. package/lib/gc/gcHelpers.js.map +1 -1
  80. package/lib/gc/gcTelemetry.d.ts +13 -2
  81. package/lib/gc/gcTelemetry.d.ts.map +1 -1
  82. package/lib/gc/gcTelemetry.js +24 -21
  83. package/lib/gc/gcTelemetry.js.map +1 -1
  84. package/lib/gc/index.d.ts +2 -2
  85. package/lib/gc/index.d.ts.map +1 -1
  86. package/lib/gc/index.js +1 -1
  87. package/lib/gc/index.js.map +1 -1
  88. package/lib/index.d.ts +2 -2
  89. package/lib/index.d.ts.map +1 -1
  90. package/lib/index.js +1 -1
  91. package/lib/index.js.map +1 -1
  92. package/lib/legacy.d.ts +1 -0
  93. package/lib/metadata.d.ts +2 -2
  94. package/lib/metadata.d.ts.map +1 -1
  95. package/lib/metadata.js.map +1 -1
  96. package/lib/opLifecycle/batchManager.d.ts +4 -1
  97. package/lib/opLifecycle/batchManager.d.ts.map +1 -1
  98. package/lib/opLifecycle/batchManager.js +0 -10
  99. package/lib/opLifecycle/batchManager.js.map +1 -1
  100. package/lib/opLifecycle/outbox.d.ts +0 -4
  101. package/lib/opLifecycle/outbox.d.ts.map +1 -1
  102. package/lib/opLifecycle/outbox.js +7 -38
  103. package/lib/opLifecycle/outbox.js.map +1 -1
  104. package/lib/packageVersion.d.ts +1 -1
  105. package/lib/packageVersion.d.ts.map +1 -1
  106. package/lib/packageVersion.js +1 -1
  107. package/lib/packageVersion.js.map +1 -1
  108. package/lib/summary/documentSchema.js +1 -1
  109. package/lib/summary/documentSchema.js.map +1 -1
  110. package/package.json +20 -20
  111. package/src/channelCollection.ts +108 -49
  112. package/src/containerRuntime.ts +66 -80
  113. package/src/dataStoreContexts.ts +12 -0
  114. package/src/gc/garbageCollection.ts +63 -41
  115. package/src/gc/gcDefinitions.ts +21 -9
  116. package/src/gc/gcHelpers.ts +14 -1
  117. package/src/gc/gcTelemetry.ts +56 -47
  118. package/src/gc/index.ts +2 -1
  119. package/src/index.ts +2 -0
  120. package/src/metadata.ts +2 -2
  121. package/src/opLifecycle/README.md +4 -4
  122. package/src/opLifecycle/batchManager.ts +5 -14
  123. package/src/opLifecycle/outbox.ts +7 -53
  124. package/src/packageVersion.ts +1 -1
  125. package/src/summary/documentSchema.ts +1 -1
  126. package/dist/public.d.ts +0 -12
  127. package/lib/public.d.ts +0 -12
@@ -150,7 +150,7 @@ import {
150
150
  type OutboundContainerRuntimeMessage,
151
151
  type UnknownContainerRuntimeMessage,
152
152
  } from "./messageTypes.js";
153
- import { IBatchMetadata, IIdAllocationMetadata } from "./metadata.js";
153
+ import { IBatchMetadata, ISavedOpMetadata } from "./metadata.js";
154
154
  import {
155
155
  BatchMessage,
156
156
  IBatch,
@@ -493,6 +493,11 @@ export interface IContainerRuntimeOptions {
493
493
  readonly explicitSchemaControl?: boolean;
494
494
  }
495
495
 
496
+ /**
497
+ * Error responses when requesting a deleted object will have this header set to true
498
+ * @alpha
499
+ */
500
+ export const DeletedResponseHeaderKey = "wasDeleted";
496
501
  /**
497
502
  * Tombstone error responses will have this header set to true
498
503
  * @alpha
@@ -666,11 +671,13 @@ type MessageWithContext =
666
671
  message: InboundSequencedContainerRuntimeMessage;
667
672
  modernRuntimeMessage: true;
668
673
  local: boolean;
674
+ savedOp?: boolean;
669
675
  }
670
676
  | {
671
677
  message: InboundSequencedContainerRuntimeMessageOrSystemMessage;
672
678
  modernRuntimeMessage: false;
673
679
  local: boolean;
680
+ savedOp?: boolean;
674
681
  };
675
682
 
676
683
  const summarizerRequestUrl = "_summarizer";
@@ -1123,7 +1130,7 @@ export class ContainerRuntime
1123
1130
  // Id Compressor serializes final state (see getPendingLocalState()). As result, it needs to skip all ops that preceeded that state
1124
1131
  // (such ops will be marked by Loader layer as savedOp === true)
1125
1132
  // That said, in "delayed" mode it's possible that Id Compressor was never initialized before getPendingLocalState() is called.
1126
- // In such case we have to process all ops, including those marked with saveOp === true.
1133
+ // In such case we have to process all ops, including those marked with savedOp === true.
1127
1134
  private readonly skipSavedCompressorOps: boolean;
1128
1135
 
1129
1136
  public get idCompressorMode() {
@@ -1639,22 +1646,7 @@ export class ContainerRuntime
1639
1646
  getSummaryForDatastores(baseSnapshot, metadata),
1640
1647
  parentContext,
1641
1648
  this.mc.logger,
1642
- (
1643
- path: string,
1644
- reason: "Loaded" | "Changed",
1645
- timestampMs?: number,
1646
- packagePath?: readonly string[],
1647
- request?: IRequest,
1648
- headerData?: RuntimeHeaderData,
1649
- ) =>
1650
- this.garbageCollector.nodeUpdated(
1651
- path,
1652
- reason,
1653
- timestampMs,
1654
- packagePath,
1655
- request,
1656
- headerData,
1657
- ),
1649
+ (props) => this.garbageCollector.nodeUpdated(props),
1658
1650
  (path: string) => this.garbageCollector.isNodeDeleted(path),
1659
1651
  new Map<string, string>(dataStoreAliasMap),
1660
1652
  async (runtime: ChannelCollection) => provideEntryPoint,
@@ -1677,7 +1669,10 @@ export class ContainerRuntime
1677
1669
  }
1678
1670
  },
1679
1671
  blobRequested: (blobPath: string) =>
1680
- this.garbageCollector.nodeUpdated(blobPath, "Loaded"),
1672
+ this.garbageCollector.nodeUpdated({
1673
+ node: { type: "Blob", path: blobPath },
1674
+ reason: "Loaded",
1675
+ }),
1681
1676
  isBlobDeleted: (blobPath: string) => this.garbageCollector.isNodeDeleted(blobPath),
1682
1677
  runtime: this,
1683
1678
  stashedBlobs: pendingRuntimeState?.pendingAttachmentBlobs,
@@ -2325,6 +2320,7 @@ export class ContainerRuntime
2325
2320
  let newState: boolean;
2326
2321
 
2327
2322
  try {
2323
+ this.submitIdAllocationOpIfNeeded(true);
2328
2324
  // replay the ops
2329
2325
  this.pendingStateManager.replayPendingStates();
2330
2326
  } finally {
@@ -2547,21 +2543,28 @@ export class ContainerRuntime
2547
2543
  // We do not need to make a deep copy. Each layer will just replace message.contents itself,
2548
2544
  // but will not modify the contents object (likely it will replace it on the message).
2549
2545
  const messageCopy = { ...messageArg };
2546
+ const savedOp = (messageCopy.metadata as ISavedOpMetadata)?.savedOp;
2550
2547
  for (const message of this.remoteMessageProcessor.process(messageCopy)) {
2551
- if (modernRuntimeMessage) {
2552
- this.processCore({
2553
- // Cast it since we expect it to be this based on modernRuntimeMessage computation above.
2554
- // There is nothing really ensuring that anytime original message.type is Operation that
2555
- // the result messages will be so. In the end modern bool being true only directs to
2556
- // throw error if ultimately unrecognized without compat details saying otherwise.
2557
- message: message as InboundSequencedContainerRuntimeMessage,
2558
- local,
2559
- modernRuntimeMessage,
2560
- });
2561
- } else {
2562
- // Unrecognized message will be ignored.
2563
- this.processCore({ message, local, modernRuntimeMessage });
2564
- }
2548
+ const msg: MessageWithContext = modernRuntimeMessage
2549
+ ? {
2550
+ // Cast it since we expect it to be this based on modernRuntimeMessage computation above.
2551
+ // There is nothing really ensuring that anytime original message.type is Operation that
2552
+ // the result messages will be so. In the end modern bool being true only directs to
2553
+ // throw error if ultimately unrecognized without compat details saying otherwise.
2554
+ message: message as InboundSequencedContainerRuntimeMessage,
2555
+ local,
2556
+ modernRuntimeMessage,
2557
+ }
2558
+ : // Unrecognized message will be ignored.
2559
+ {
2560
+ message,
2561
+ local,
2562
+ modernRuntimeMessage,
2563
+ };
2564
+ msg.savedOp = savedOp;
2565
+
2566
+ // ensure that we observe any re-entrancy, and if needed, rebase ops
2567
+ this.ensureNoDataModelChanges(() => this.processCore(msg));
2565
2568
  }
2566
2569
  }
2567
2570
 
@@ -2649,13 +2652,7 @@ export class ContainerRuntime
2649
2652
  // stashed ops flow. The compressor is stashed with these ops already processed.
2650
2653
  // That said, in idCompressorMode === "delayed", we might not serialize ID compressor, and
2651
2654
  // thus we need to process all the ops.
2652
- if (
2653
- !(
2654
- this.skipSavedCompressorOps &&
2655
- (messageWithContext.message.metadata as IIdAllocationMetadata)?.savedOp ===
2656
- true
2657
- )
2658
- ) {
2655
+ if (!(this.skipSavedCompressorOps && messageWithContext.savedOp === true)) {
2659
2656
  const range = messageWithContext.message.contents;
2660
2657
  // Some other client turned on the id compressor. If we have not turned it on,
2661
2658
  // put it in a pending queue and delay finalization.
@@ -2816,9 +2813,9 @@ export class ContainerRuntime
2816
2813
  let checkpoint: IBatchCheckpoint | undefined;
2817
2814
  let result: T;
2818
2815
  if (this.mc.config.getBoolean("Fluid.ContainerRuntime.EnableRollback")) {
2819
- // Note: we are not touching this.pendingAttachBatch here, for two reasons:
2820
- // 1. It would not help, as we flush attach ops as they become available.
2821
- // 2. There is no way to undo process of data store creation.
2816
+ // Note: we are not touching any batches other than mainBatch here, for two reasons:
2817
+ // 1. It would not help, as other batches are flushed independently from main batch.
2818
+ // 2. There is no way to undo process of data store creation, blob creation, ID compressor ops, or other things tracked by other batches.
2822
2819
  checkpoint = this.outbox.checkpoint().mainBatch;
2823
2820
  }
2824
2821
  try {
@@ -2901,12 +2898,11 @@ export class ContainerRuntime
2901
2898
  "entryPoint must be defined on data store runtime for using getAliasedDataStoreEntryPoint",
2902
2899
  );
2903
2900
  }
2904
- this.garbageCollector.nodeUpdated(
2905
- `/${internalId}`,
2906
- "Loaded",
2907
- undefined /* timestampMs */,
2908
- context.packagePath,
2909
- );
2901
+ this.garbageCollector.nodeUpdated({
2902
+ node: { type: "DataStore", path: `/${internalId}` },
2903
+ reason: "Loaded",
2904
+ packagePath: context.packagePath,
2905
+ });
2910
2906
  return channel.entryPoint;
2911
2907
  }
2912
2908
 
@@ -3492,11 +3488,19 @@ export class ContainerRuntime
3492
3488
  latestSummaryRefSeqNum,
3493
3489
  );
3494
3490
 
3491
+ /**
3492
+ * This was added to validate that the summarizer node tree has the same reference sequence number from the
3493
+ * top running summarizer down to the lowest summarizer node.
3494
+ *
3495
+ * The order of mismatch numbers goes (validate sequence number)-(node sequence number).
3496
+ * Generally the validate sequence number comes from the running summarizer and the node sequence number comes from the
3497
+ * summarizer nodes.
3498
+ */
3495
3499
  if (
3496
3500
  startSummaryResult.invalidNodes > 0 ||
3497
3501
  startSummaryResult.mismatchNumbers.size > 0
3498
3502
  ) {
3499
- summaryLogger.sendErrorEvent({
3503
+ summaryLogger.sendTelemetryEvent({
3500
3504
  eventName: "LatestSummaryRefSeqNumMismatch",
3501
3505
  details: {
3502
3506
  ...startSummaryResult,
@@ -3851,9 +3855,11 @@ export class ContainerRuntime
3851
3855
  return this.blobManager.createBlob(blob, signal);
3852
3856
  }
3853
3857
 
3854
- private submitIdAllocationOpIfNeeded(): void {
3858
+ private submitIdAllocationOpIfNeeded(resubmitOutstandingRanges = false): void {
3855
3859
  if (this._idCompressor) {
3856
- const idRange = this._idCompressor.takeNextCreationRange();
3860
+ const idRange = resubmitOutstandingRanges
3861
+ ? this.idCompressor?.takeUnfinalizedCreationRange()
3862
+ : this._idCompressor.takeNextCreationRange();
3857
3863
  // Don't include the idRange if there weren't any Ids allocated
3858
3864
  if (idRange?.ids !== undefined) {
3859
3865
  const idAllocationMessage: ContainerRuntimeIdAllocationMessage = {
@@ -3933,33 +3939,7 @@ export class ContainerRuntime
3933
3939
  });
3934
3940
  }
3935
3941
 
3936
- // If this is attach message for new data store, and we are in a batch, send this op out of order
3937
- // Is it safe:
3938
- // Yes, this should be safe reordering. Newly created data stores are not visible through API surface.
3939
- // They become visible only when aliased, or handle to some sub-element of newly created datastore
3940
- // is stored in some DDS, i.e. only after some other op.
3941
- // Why:
3942
- // Attach ops are large, and expensive to process. Plus there are scenarios where a lot of new data
3943
- // stores are created, causing issues like relay service throttling (too many ops) and catastrophic
3944
- // failure (batch is too large). Pushing them earlier and outside of main batch should alleviate
3945
- // these issues.
3946
- // Cons:
3947
- // 1. With large batches, relay service may throttle clients. Clients may disconnect while throttled.
3948
- // This change creates new possibility of a lot of newly created data stores never being referenced
3949
- // because client died before it had a change to submit the rest of the ops. This will create more
3950
- // garbage that needs to be collected leveraging GC (Garbage Collection) feature.
3951
- // 2. Sending ops out of order means they are excluded from rollback functionality. This is not an issue
3952
- // today as rollback can't undo creation of data store. To some extent not sending them is a bigger
3953
- // issue than sending.
3954
- // Please note that this does not change file format, so it can be disabled in the future if this
3955
- // optimization no longer makes sense (for example, batch compression may make it less appealing).
3956
- if (
3957
- this.currentlyBatching() &&
3958
- type === ContainerMessageType.Attach &&
3959
- this.disableAttachReorder !== true
3960
- ) {
3961
- this.outbox.submitAttach(message);
3962
- } else if (type === ContainerMessageType.BlobAttach) {
3942
+ if (type === ContainerMessageType.BlobAttach) {
3963
3943
  // BlobAttach ops must have their metadata visible and cannot be grouped (see opGroupingManager.ts)
3964
3944
  this.outbox.submitBlobAttach(message);
3965
3945
  } else {
@@ -4119,7 +4099,13 @@ export class ContainerRuntime
4119
4099
  this.channelCollection.reSubmit(message.type, message.contents, localOpMetadata);
4120
4100
  break;
4121
4101
  case ContainerMessageType.IdAllocation: {
4122
- this.submit(message, localOpMetadata);
4102
+ // Allocation ops are never resubmitted/rebased. This is because they require special handling to
4103
+ // avoid being submitted out of order. For example, if the pending state manager contained
4104
+ // [idOp1, dataOp1, idOp2, dataOp2] and the resubmission of dataOp1 generated idOp3, that would be
4105
+ // placed into the outbox in the same batch as idOp1, but before idOp2 is resubmitted.
4106
+ // To avoid this, allocation ops are simply never resubmitted. Prior to invoking the pending state
4107
+ // manager to replay pending ops, the runtime will always submit a new allocation range that includes
4108
+ // all pending IDs. The resubmitted allocation ops are then ignored here.
4123
4109
  break;
4124
4110
  }
4125
4111
  case ContainerMessageType.ChunkedOp:
@@ -83,9 +83,21 @@ export class DataStoreContexts implements Iterable<[string, FluidDataStoreContex
83
83
  public delete(id: string): boolean {
84
84
  this.deferredContexts.delete(id);
85
85
  this.notBoundContexts.delete(id);
86
+
87
+ // Stash the context here in case it's requested in this session, we can log some details about it
88
+ const context = this._contexts.get(id);
89
+ this._recentlyDeletedContexts.set(id, context);
90
+
86
91
  return this._contexts.delete(id);
87
92
  }
88
93
 
94
+ private readonly _recentlyDeletedContexts: Map<string, FluidDataStoreContext | undefined> =
95
+ new Map();
96
+
97
+ public getRecentlyDeletedContext(id: string) {
98
+ return this._recentlyDeletedContexts.get(id);
99
+ }
100
+
89
101
  /**
90
102
  * Return the unbound local context with the given id,
91
103
  * or undefined if it's not found or not unbound.
@@ -23,11 +23,7 @@ import {
23
23
  } from "@fluidframework/telemetry-utils/internal";
24
24
 
25
25
  import { BlobManager } from "../blobManager.js";
26
- import {
27
- InactiveResponseHeaderKey,
28
- RuntimeHeaderData,
29
- TombstoneResponseHeaderKey,
30
- } from "../containerRuntime.js";
26
+ import { InactiveResponseHeaderKey, TombstoneResponseHeaderKey } from "../containerRuntime.js";
31
27
  import { ClientSessionExpiredError } from "../error.js";
32
28
  import { ContainerMessageType, ContainerRuntimeGCMessage } from "../messageTypes.js";
33
29
  import { IRefreshSummaryResult } from "../summary/index.js";
@@ -48,12 +44,15 @@ import {
48
44
  ISweepPhaseStats,
49
45
  UnreferencedState,
50
46
  disableAutoRecoveryKey,
47
+ type IGCNodeUpdatedProps,
51
48
  } from "./gcDefinitions.js";
52
49
  import {
53
50
  cloneGCData,
54
51
  compatBehaviorAllowsGCMessageType,
55
52
  concatGarbageCollectionData,
53
+ dataStoreNodePathOnly,
56
54
  getGCDataFromSnapshot,
55
+ urlToGCNodePath,
57
56
  } from "./gcHelpers.js";
58
57
  import { runGarbageCollection } from "./gcReferenceGraphAlgorithm.js";
59
58
  import { IGarbageCollectionSnapshotData, IGarbageCollectionState } from "./gcSummaryDefinitions.js";
@@ -401,17 +400,27 @@ export class GarbageCollector implements IGarbageCollector {
401
400
  return;
402
401
  }
403
402
 
404
- // If the GC state hasn't been initialized yet, initialize it and return.
405
- if (this.gcDataFromLastRun === undefined) {
406
- await this.initializeGCStateFromBaseSnapshotP;
407
- return;
408
- }
403
+ const initialized = this.gcDataFromLastRun !== undefined;
404
+ await PerformanceEvent.timedExecAsync(
405
+ this.mc.logger,
406
+ {
407
+ eventName: "InitializeOrUpdateGCState",
408
+ details: { initialized, unrefNodeCount: this.unreferencedNodesState.size },
409
+ },
410
+ async () => {
411
+ // If the GC state hasn't been initialized yet, initialize it and return.
412
+ if (!initialized) {
413
+ await this.initializeGCStateFromBaseSnapshotP;
414
+ return;
415
+ }
409
416
 
410
- // If the GC state has been initialized, update the tracking of unreferenced nodes as per the current
411
- // reference timestamp.
412
- for (const [, nodeStateTracker] of this.unreferencedNodesState) {
413
- nodeStateTracker.updateTracking(currentReferenceTimestampMs);
414
- }
417
+ // If the GC state has been initialized, update the tracking of unreferenced nodes as per the current
418
+ // reference timestamp.
419
+ for (const [, nodeStateTracker] of this.unreferencedNodesState) {
420
+ nodeStateTracker.updateTracking(currentReferenceTimestampMs);
421
+ }
422
+ },
423
+ );
415
424
  }
416
425
 
417
426
  /**
@@ -978,30 +987,30 @@ export class GarbageCollector implements IGarbageCollector {
978
987
  /**
979
988
  * Called when a node with the given id is updated. If the node is inactive or tombstoned, this will log an error
980
989
  * or throw an error if failing on incorrect usage is configured.
981
- * @param nodePath - The path of the node that changed.
982
- * @param reason - Whether the node was loaded or changed.
983
- * @param timestampMs - The timestamp when the node changed.
984
- * @param packagePath - The package path of the node. This may not be available if the node hasn't been loaded yet.
985
- * @param request - The original request for loads to preserve it in telemetry.
986
- * @param requestHeaders - If the node was loaded via request path, the headers in the request.
990
+ * @param IGCNodeUpdatedProps - Details about the node and how it was updated
987
991
  */
988
- public nodeUpdated(
989
- nodePath: string,
990
- reason: "Loaded" | "Changed",
991
- timestampMs?: number,
992
- packagePath?: readonly string[],
993
- request?: IRequest,
994
- headerData?: RuntimeHeaderData,
995
- ) {
992
+ public nodeUpdated({
993
+ node,
994
+ reason,
995
+ timestampMs,
996
+ packagePath,
997
+ request,
998
+ headerData,
999
+ }: IGCNodeUpdatedProps) {
996
1000
  if (!this.configs.shouldRunGC) {
997
1001
  return;
998
1002
  }
999
1003
 
1000
- const isTombstoned = this.tombstones.includes(nodePath);
1004
+ // trackedId will be either DataStore or Blob ID (not sub-DataStore ID, since some of those are unrecognized by GC)
1005
+ const trackedId = node.path;
1006
+ const isTombstoned = this.tombstones.includes(trackedId);
1007
+ const isInactive = this.unreferencedNodesState.get(trackedId)?.state === "Inactive";
1008
+
1009
+ const fullPath = request !== undefined ? urlToGCNodePath(request.url) : trackedId;
1001
1010
 
1002
1011
  // This will log if appropriate
1003
- this.telemetryTracker.nodeUsed({
1004
- id: nodePath,
1012
+ this.telemetryTracker.nodeUsed(trackedId, {
1013
+ id: fullPath,
1005
1014
  usageType: reason,
1006
1015
  currentReferenceTimestampMs:
1007
1016
  timestampMs ?? this.runtime.getCurrentReferenceTimestampMs(),
@@ -1010,6 +1019,8 @@ export class GarbageCollector implements IGarbageCollector {
1010
1019
  isTombstoned,
1011
1020
  lastSummaryTime: this.getLastSummaryTimestampMs(),
1012
1021
  headers: headerData,
1022
+ requestUrl: request?.url,
1023
+ requestHeaders: JSON.stringify(request?.headers),
1013
1024
  });
1014
1025
 
1015
1026
  // Any time we log a Tombstone Loaded error (via Telemetry Tracker),
@@ -1018,17 +1029,20 @@ export class GarbageCollector implements IGarbageCollector {
1018
1029
  // to be loaded by the Summarizer, and auto-recovery will be triggered then.
1019
1030
  if (isTombstoned && reason === "Loaded") {
1020
1031
  // Note that when a DataStore and its DDS are all loaded, each will trigger AutoRecovery for itself.
1021
- this.triggerAutoRecovery(nodePath);
1032
+ this.triggerAutoRecovery(fullPath);
1022
1033
  }
1023
1034
 
1024
- const nodeType = this.runtime.getNodeType(nodePath);
1035
+ const nodeType = this.runtime.getNodeType(fullPath);
1025
1036
 
1026
1037
  // Unless this is a Loaded event for a Blob or DataStore, we're done after telemetry tracking
1027
- if (reason !== "Loaded" || ![GCNodeType.Blob, GCNodeType.DataStore].includes(nodeType)) {
1038
+ const loadedBlobOrDataStore =
1039
+ reason === "Loaded" &&
1040
+ (nodeType === GCNodeType.Blob || nodeType === GCNodeType.DataStore);
1041
+ if (!loadedBlobOrDataStore) {
1028
1042
  return;
1029
1043
  }
1030
1044
 
1031
- const errorRequest: IRequest = request ?? { url: nodePath };
1045
+ const errorRequest: IRequest = request ?? { url: fullPath };
1032
1046
  if (isTombstoned && this.throwOnTombstoneLoad && headerData?.allowTombstone !== true) {
1033
1047
  // The requested data store is removed by gc. Create a 404 gc response exception.
1034
1048
  throw responseToException(
@@ -1040,7 +1054,7 @@ export class GarbageCollector implements IGarbageCollector {
1040
1054
  }
1041
1055
 
1042
1056
  // If the object is inactive and inactive enforcement is configured, throw an error.
1043
- if (this.unreferencedNodesState.get(nodePath)?.state === "Inactive") {
1057
+ if (isInactive) {
1044
1058
  const shouldThrowOnInactiveLoad =
1045
1059
  !this.isSummarizerClient &&
1046
1060
  this.configs.throwOnInactiveLoad === true &&
@@ -1112,22 +1126,30 @@ export class GarbageCollector implements IGarbageCollector {
1112
1126
  outboundRoutes.push(toNodePath);
1113
1127
  this.newReferencesSinceLastRun.set(fromNodePath, outboundRoutes);
1114
1128
 
1115
- this.telemetryTracker.nodeUsed({
1129
+ // GC won't recognize some subDataStore paths that we encounter (e.g. a path suited for a custom request handler)
1130
+ // So for subDataStore paths we need to check the parent dataStore for current tombstone/inactive status.
1131
+ const trackedId =
1132
+ this.runtime.getNodeType(toNodePath) === "SubDataStore"
1133
+ ? dataStoreNodePathOnly(toNodePath)
1134
+ : toNodePath;
1135
+ this.telemetryTracker.nodeUsed(trackedId, {
1116
1136
  id: toNodePath,
1137
+ fromId: fromNodePath,
1117
1138
  usageType: "Revived",
1118
1139
  currentReferenceTimestampMs: this.runtime.getCurrentReferenceTimestampMs(),
1119
1140
  packagePath: undefined,
1120
1141
  completedGCRuns: this.completedRuns,
1121
- isTombstoned: this.tombstones.includes(toNodePath),
1142
+ isTombstoned: this.tombstones.includes(trackedId),
1122
1143
  lastSummaryTime: this.getLastSummaryTimestampMs(),
1123
- fromId: fromNodePath,
1124
1144
  autorecovery,
1125
1145
  });
1126
1146
 
1127
- // This node is referenced - Clear its unreferenced state
1147
+ // This node is referenced - Clear its unreferenced state if present
1128
1148
  // But don't delete the node id from the map yet.
1129
1149
  // When generating GC stats, the set of nodes in here is used as the baseline for
1130
1150
  // what was unreferenced in the last GC run.
1151
+ // NOTE: We use toNodePath not trackedId even though it may be an unrecognized subDataStore route (hence no-op),
1152
+ // because a reference to such a path is not sufficient to consider the DataStore referenced.
1131
1153
  this.unreferencedNodesState.get(toNodePath)?.stopTracking();
1132
1154
  }
1133
1155
 
@@ -245,7 +245,7 @@ export const GCNodeType = {
245
245
  Blob: "Blob",
246
246
  // Nodes that are neither of the above. For example, root node.
247
247
  Other: "Other",
248
- };
248
+ } as const;
249
249
 
250
250
  /**
251
251
  * @alpha
@@ -370,14 +370,7 @@ export interface IGarbageCollector {
370
370
  * Called when a node with the given path is updated. If the node is inactive or tombstoned, this will log an error
371
371
  * or throw an error if failing on incorrect usage is configured.
372
372
  */
373
- nodeUpdated(
374
- nodePath: string,
375
- reason: "Loaded" | "Changed",
376
- timestampMs?: number,
377
- packagePath?: readonly string[],
378
- request?: IRequest,
379
- headerData?: RuntimeHeaderData,
380
- ): void;
373
+ nodeUpdated(props: IGCNodeUpdatedProps): void;
381
374
  /** Called when a reference is added to a node. Used to identify nodes that were referenced between summaries. */
382
375
  addedOutboundReference(fromNodePath: string, toNodePath: string, autorecovery?: true): void;
383
376
  /** Called to process a garbage collection message. */
@@ -388,6 +381,25 @@ export interface IGarbageCollector {
388
381
  dispose(): void;
389
382
  }
390
383
 
384
+ /**
385
+ * Info needed by GC when notified that a node was updated (loaded or changed)
386
+ * @internal
387
+ */
388
+ export interface IGCNodeUpdatedProps {
389
+ /** Type and path of the updated node */
390
+ node: { type: (typeof GCNodeType)["DataStore" | "Blob"]; path: string };
391
+ /** Whether the node (or a subpath) was loaded or changed. */
392
+ reason: "Loaded" | "Changed";
393
+ /** The op-based timestamp when the node changed, if applicable */
394
+ timestampMs?: number;
395
+ /** The package path of the node. This may not be available if the node hasn't been loaded yet */
396
+ packagePath?: readonly string[];
397
+ /** The original request for loads to preserve it in telemetry */
398
+ request?: IRequest;
399
+ /** If the node was loaded via request path, the header data. May be modified from the original request */
400
+ headerData?: RuntimeHeaderData;
401
+ }
402
+
391
403
  /** Parameters necessary for creating a GarbageCollector. */
392
404
  export interface IGarbageCollectorCreateParams {
393
405
  readonly runtime: IGarbageCollectionRuntime;
@@ -280,10 +280,23 @@ export function unpackChildNodesGCDetails(gcDetails: IGarbageCollectionDetailsBa
280
280
  * @param str - A string that may contain leading and / or trailing slashes.
281
281
  * @returns A new string without leading and trailing slashes.
282
282
  */
283
- export function trimLeadingAndTrailingSlashes(str: string) {
283
+ function trimLeadingAndTrailingSlashes(str: string) {
284
284
  return str.replace(/^\/+|\/+$/g, "");
285
285
  }
286
286
 
287
+ /** Reformats a request URL to match expected format for a GC node path */
288
+ export function urlToGCNodePath(url: string): string {
289
+ return `/${trimLeadingAndTrailingSlashes(url.split("?")[0])}`;
290
+ }
291
+
292
+ /**
293
+ * Pulls out the first path segment and formats it as a GC Node path
294
+ * e.g. "/dataStoreId/ddsId" yields "/dataStoreId"
295
+ */
296
+ export function dataStoreNodePathOnly(subDataStorePath: string): string {
297
+ return `/${subDataStorePath.split("/")[1]}`;
298
+ }
299
+
287
300
  /**
288
301
  * Utility to implement compat behaviors given an unknown message type
289
302
  * The parameters are typed to support compile-time enforcement of handling all known types/behaviors