@fluidframework/container-runtime 0.59.2000-61729 → 0.59.2001

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/dist/containerRuntime.d.ts.map +1 -1
  2. package/dist/containerRuntime.js +6 -6
  3. package/dist/containerRuntime.js.map +1 -1
  4. package/dist/garbageCollection.d.ts +26 -8
  5. package/dist/garbageCollection.d.ts.map +1 -1
  6. package/dist/garbageCollection.js +81 -57
  7. package/dist/garbageCollection.js.map +1 -1
  8. package/dist/packageVersion.d.ts +1 -1
  9. package/dist/packageVersion.d.ts.map +1 -1
  10. package/dist/packageVersion.js +1 -1
  11. package/dist/packageVersion.js.map +1 -1
  12. package/dist/runningSummarizer.d.ts.map +1 -1
  13. package/dist/runningSummarizer.js +11 -10
  14. package/dist/runningSummarizer.js.map +1 -1
  15. package/dist/summarizer.d.ts +1 -0
  16. package/dist/summarizer.d.ts.map +1 -1
  17. package/dist/summarizer.js +8 -4
  18. package/dist/summarizer.js.map +1 -1
  19. package/dist/summarizerTypes.d.ts +47 -1
  20. package/dist/summarizerTypes.d.ts.map +1 -1
  21. package/dist/summarizerTypes.js.map +1 -1
  22. package/dist/summaryGenerator.d.ts +0 -2
  23. package/dist/summaryGenerator.d.ts.map +1 -1
  24. package/dist/summaryGenerator.js.map +1 -1
  25. package/dist/summaryManager.d.ts.map +1 -1
  26. package/dist/summaryManager.js +15 -15
  27. package/dist/summaryManager.js.map +1 -1
  28. package/lib/containerRuntime.d.ts.map +1 -1
  29. package/lib/containerRuntime.js +6 -6
  30. package/lib/containerRuntime.js.map +1 -1
  31. package/lib/garbageCollection.d.ts +26 -8
  32. package/lib/garbageCollection.d.ts.map +1 -1
  33. package/lib/garbageCollection.js +81 -57
  34. package/lib/garbageCollection.js.map +1 -1
  35. package/lib/packageVersion.d.ts +1 -1
  36. package/lib/packageVersion.d.ts.map +1 -1
  37. package/lib/packageVersion.js +1 -1
  38. package/lib/packageVersion.js.map +1 -1
  39. package/lib/runningSummarizer.d.ts.map +1 -1
  40. package/lib/runningSummarizer.js +11 -10
  41. package/lib/runningSummarizer.js.map +1 -1
  42. package/lib/summarizer.d.ts +1 -0
  43. package/lib/summarizer.d.ts.map +1 -1
  44. package/lib/summarizer.js +8 -4
  45. package/lib/summarizer.js.map +1 -1
  46. package/lib/summarizerTypes.d.ts +47 -1
  47. package/lib/summarizerTypes.d.ts.map +1 -1
  48. package/lib/summarizerTypes.js.map +1 -1
  49. package/lib/summaryGenerator.d.ts +0 -2
  50. package/lib/summaryGenerator.d.ts.map +1 -1
  51. package/lib/summaryGenerator.js.map +1 -1
  52. package/lib/summaryManager.d.ts.map +1 -1
  53. package/lib/summaryManager.js +15 -15
  54. package/lib/summaryManager.js.map +1 -1
  55. package/package.json +35 -59
  56. package/src/containerRuntime.ts +4 -6
  57. package/src/garbageCollection.ts +96 -61
  58. package/src/packageVersion.ts +1 -1
  59. package/src/runningSummarizer.ts +13 -10
  60. package/src/summarizer.ts +9 -4
  61. package/src/summarizerTypes.ts +60 -1
  62. package/src/summaryGenerator.ts +2 -44
  63. package/src/summaryManager.ts +17 -19
@@ -3,7 +3,7 @@
3
3
  * Licensed under the MIT License.
4
4
  */
5
5
 
6
- import { ITelemetryLogger } from "@fluidframework/common-definitions";
6
+ import { ITelemetryLogger, ITelemetryPerformanceEvent } from "@fluidframework/common-definitions";
7
7
  import { assert, LazyPromise, Timer } from "@fluidframework/common-utils";
8
8
  import { ICriticalContainerError } from "@fluidframework/container-definitions";
9
9
  import { ClientSessionExpiredError, DataProcessingError } from "@fluidframework/container-utils";
@@ -66,6 +66,8 @@ const runSweepKey = "Fluid.GarbageCollection.RunSweep";
66
66
  const writeAtRootKey = "Fluid.GarbageCollection.WriteDataAtRoot";
67
67
  // Feature gate key to expire a session after a set period of time.
68
68
  const runSessionExpiry = "Fluid.GarbageCollection.RunSessionExpiry";
69
+ // Feature gate key to log error messages if GC reference validation fails.
70
+ const logUnknownOutboundReferencesKey = "Fluid.GarbageCollection.LogUnknownOutboundReferences";
69
71
 
70
72
  const defaultDeleteTimeoutMs = 7 * 24 * 60 * 60 * 1000; // 7 days
71
73
  export const defaultSessionExpiryDurationMs = 30 * 24 * 60 * 60 * 1000; // 30 days
@@ -227,6 +229,19 @@ class UnreferencedStateTracker {
227
229
  /**
228
230
  * The garbage collector for the container runtime. It consolidates the garbage collection functionality and maintains
229
231
  * its state across summaries.
232
+ *
233
+ * Node - represented as nodeId, it's a node on the GC graph
234
+ * Outbound Route - a path from one node to another node, think `nodeA` -> `nodeB`
235
+ * Graph - all nodes with their respective routes
236
+ * GC Graph
237
+ *
238
+ * Node
239
+ * NodeId = "datastore1"
240
+ * / \
241
+ * OutboundRoute OutboundRoute
242
+ * / \
243
+ * Node Node
244
+ * NodeId = "dds1" NodeId = "dds2"
230
245
  */
231
246
  export class GarbageCollector implements IGarbageCollector {
232
247
  public static create(
@@ -320,10 +335,10 @@ export class GarbageCollector implements IGarbageCollector {
320
335
  private latestSummaryGCVersion: GCVersion;
321
336
 
322
337
  // Keeps track of the GC state from the last run.
323
- private gcDataFromLastRun: IGarbageCollectionData | undefined;
338
+ private previousGCDataFromLastRun: IGarbageCollectionData | undefined;
324
339
  // Keeps a list of references (edges in the GC graph) between GC runs. Each entry has a node id and a list of
325
340
  // outbound routes from that node.
326
- private readonly referencesSinceLastRun: Map<string, string[]> = new Map();
341
+ private readonly newReferencesSinceLastRun: Map<string, string[]> = new Map();
327
342
 
328
343
  // Promise when resolved initializes the base state of the nodes from the base summary state.
329
344
  private readonly initializeBaseStateP: Promise<void>;
@@ -513,7 +528,7 @@ export class GarbageCollector implements IGarbageCollector {
513
528
  }
514
529
  gcNodes[nodeId] = Array.from(nodeData.outboundRoutes);
515
530
  }
516
- this.gcDataFromLastRun = { gcNodes };
531
+ this.previousGCDataFromLastRun = { gcNodes };
517
532
  });
518
533
 
519
534
  // Get the GC details for each node from the GC state in the base summary. This is returned in getBaseGCDetails
@@ -637,12 +652,12 @@ export class GarbageCollector implements IGarbageCollector {
637
652
  * blobs. All the blob keys should start with `gcBlobPrefix`.
638
653
  */
639
654
  public summarize(): ISummaryTreeWithStats | undefined {
640
- if (!this.shouldRunGC || this.gcDataFromLastRun === undefined) {
655
+ if (!this.shouldRunGC || this.previousGCDataFromLastRun === undefined) {
641
656
  return;
642
657
  }
643
658
 
644
659
  const gcState: IGarbageCollectionState = { gcNodes: {} };
645
- for (const [nodeId, outboundRoutes] of Object.entries(this.gcDataFromLastRun.gcNodes)) {
660
+ for (const [nodeId, outboundRoutes] of Object.entries(this.previousGCDataFromLastRun.gcNodes)) {
646
661
  gcState.gcNodes[nodeId] = {
647
662
  outboundRoutes,
648
663
  unreferencedTimestampMs: this.unreferencedNodesState.get(nodeId)?.unreferencedTimestampMs,
@@ -729,9 +744,9 @@ export class GarbageCollector implements IGarbageCollector {
729
744
  return;
730
745
  }
731
746
 
732
- const outboundRoutes = this.referencesSinceLastRun.get(fromNodePath) ?? [];
747
+ const outboundRoutes = this.newReferencesSinceLastRun.get(fromNodePath) ?? [];
733
748
  outboundRoutes.push(toNodePath);
734
- this.referencesSinceLastRun.set(fromNodePath, outboundRoutes);
749
+ this.newReferencesSinceLastRun.set(fromNodePath, outboundRoutes);
735
750
 
736
751
  // If the node that got referenced is inactive, log an event as that may indicate use-after-delete.
737
752
  this.logIfInactive(
@@ -772,8 +787,8 @@ export class GarbageCollector implements IGarbageCollector {
772
787
  gcResult: IGCResult,
773
788
  currentReferenceTimestampMs?: number,
774
789
  ) {
775
- this.gcDataFromLastRun = cloneGCData(gcData);
776
- this.referencesSinceLastRun.clear();
790
+ this.previousGCDataFromLastRun = cloneGCData(gcData);
791
+ this.newReferencesSinceLastRun.clear();
777
792
 
778
793
  // Iterate through the referenced nodes and stop tracking if they were unreferenced before.
779
794
  for (const nodeId of gcResult.referencedNodeIds) {
@@ -826,13 +841,37 @@ export class GarbageCollector implements IGarbageCollector {
826
841
  * If these nodes are currently unreferenced, they will be assigned new unreferenced state by the current run.
827
842
  */
828
843
  private updateStateSinceLastRun(currentGCData: IGarbageCollectionData) {
829
- // If we haven't run GC before or no references were added since the last run, there is nothing to do.
830
- if (this.gcDataFromLastRun === undefined || this.referencesSinceLastRun.size === 0) {
844
+ // If we haven't run GC before there is nothing to do.
845
+ if (this.previousGCDataFromLastRun === undefined) {
831
846
  return;
832
847
  }
833
848
 
834
- // Validate that we have identified all references correctly.
835
- this.validateReferenceCorrectness(currentGCData);
849
+ // Find any references that haven't been identified correctly.
850
+ const missingExplicitReferences = this.findMissingExplicitReferences(
851
+ currentGCData,
852
+ this.previousGCDataFromLastRun,
853
+ this.newReferencesSinceLastRun,
854
+ );
855
+
856
+ // The following log will be enabled once this issue is resolved:
857
+ // https://github.com/microsoft/FluidFramework/issues/8878.
858
+ if(this.mc.config.getBoolean(logUnknownOutboundReferencesKey) === true
859
+ && missingExplicitReferences.length > 0) {
860
+ missingExplicitReferences.forEach((missingExplicitReference) => {
861
+ const event: ITelemetryPerformanceEvent = {
862
+ eventName: "gcUnknownOutboundReferences",
863
+ gcNodeId: missingExplicitReference[0],
864
+ gcRoutes: JSON.stringify(missingExplicitReference[1]),
865
+ };
866
+ this.mc.logger.sendPerformanceEvent(event);
867
+ });
868
+ }
869
+
870
+ // No references were added since the last run so we don't have to update reference states of any unreferenced
871
+ // nodes
872
+ if (this.newReferencesSinceLastRun.size === 0) {
873
+ return;
874
+ }
836
875
 
837
876
  /**
838
877
  * Generate a super set of the GC data that contains the nodes and edges from last run, plus any new node and
@@ -849,8 +888,8 @@ export class GarbageCollector implements IGarbageCollector {
849
888
  * which is tracked by https://github.com/microsoft/FluidFramework/issues/8470.
850
889
  * - A new data store may have "root" DDSs already created and we don't detect them today.
851
890
  */
852
- const gcDataSuperSet = concatGarbageCollectionData(this.gcDataFromLastRun, currentGCData);
853
- this.referencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
891
+ const gcDataSuperSet = concatGarbageCollectionData(this.previousGCDataFromLastRun, currentGCData);
892
+ this.newReferencesSinceLastRun.forEach((outboundRoutes: string[], sourceNodeId: string) => {
854
893
  if (gcDataSuperSet.gcNodes[sourceNodeId] === undefined) {
855
894
  gcDataSuperSet.gcNodes[sourceNodeId] = outboundRoutes;
856
895
  } else {
@@ -876,59 +915,55 @@ export class GarbageCollector implements IGarbageCollector {
876
915
  }
877
916
 
878
917
  /**
879
- * Validates that all new references are correctly identified and processed. The basic principle for validation is
880
- * that we should not have new references in the reference graph (GC data) that have not been notified to the
881
- * garbage collector via `referenceAdded`.
882
- * We validate that the references in the current reference graph should be a subset of the references in the last
883
- * run's reference graph + references since the last run.
918
+ * Finds all new references or outbound routes in the current graph that haven't been explicitly notified to GC.
919
+ * The principle is that every new reference or outbound route must be notified to GC via the
920
+ * addedOutboundReference method. It it hasn't, its a bug and we want to identify these scenarios.
921
+ *
922
+ * In more simple terms:
923
+ * Missing Explicit References = Current References - Previous References - Explicitly Added References;
924
+ *
884
925
  * @param currentGCData - The GC data (reference graph) from the current GC run.
926
+ * @param previousGCData - The GC data (reference graph) from the previous GC run.
927
+ * @param explicitReferences - New references added explicity between the previous and the current run.
928
+ * @returns - a list of missing explicit references
885
929
  */
886
- private validateReferenceCorrectness(currentGCData: IGarbageCollectionData) {
930
+ private findMissingExplicitReferences(
931
+ currentGCData: IGarbageCollectionData,
932
+ previousGCData: IGarbageCollectionData,
933
+ explicitReferences: Map<string, string[]>,
934
+ ): [string, string[]][] {
887
935
  assert(
888
- this.gcDataFromLastRun !== undefined,
936
+ previousGCData !== undefined,
889
937
  0x2b7, /* "Can't validate correctness without GC data from last run" */
890
938
  );
891
939
 
892
- // Get a list of all the outbound routes (or references) in the current GC data.
893
- const currentReferences: string[] = [];
894
- for (const [nodeId, outboundRoutes] of Object.entries(currentGCData.gcNodes)) {
895
- /**
896
- * Remove routes from a child node to its parent which is added implicitly by the runtime. For instance,
897
- * each adds its data store as an outbound route to mark it as referenced if the DDS is referenced.
898
- * We won't get any explicit notification for these references so they must be removed before validation.
899
- */
900
- const explicitRoutes = outboundRoutes.filter((route) => !nodeId.startsWith(route));
901
- currentReferences.push(...explicitRoutes);
902
- }
903
-
904
- // Get a list of outbound routes (or references) from the last run's GC data plus references added since the
905
- // last run that were notified via `referenceAdded`.
906
- const explicitReferences: string[] = [];
907
- for (const [, outboundRoutes] of Object.entries(this.gcDataFromLastRun.gcNodes)) {
908
- explicitReferences.push(...outboundRoutes);
909
- }
910
- this.referencesSinceLastRun.forEach((outboundRoutes: string[]) => {
911
- explicitReferences.push(...outboundRoutes);
912
- });
913
-
914
- // Validate that the current reference graph doesn't have references that we are not already aware of. If this
915
- // happens, it might indicate data corruption since we may delete objects prematurely.
916
- currentReferences.forEach((route: string) => {
917
- // Validate references for data stores only. Currently, layers below data stores don't have GC implemented
918
- // so there is no guarantee their references will be notified.
919
- if (this.runtime.getNodeType(route) === GCNodeType.DataStore && !explicitReferences.includes(route)) {
920
- /**
921
- * The following log will be enabled once this issue is resolved:
922
- * https://github.com/microsoft/FluidFramework/issues/8878.
923
- */
924
- // We should ideally throw a data corruption error here. However, send an error for now until we have
925
- // implemented sweep and have reasonable confidence in the sweep process.
926
- // this.mc.logger.sendErrorEvent({
927
- // eventName: "gcUnknownOutboundRoute",
928
- // route,
929
- // });
940
+ const currentGraph = Object.entries(currentGCData.gcNodes);
941
+ const missingExplicitReferences: [string, string[]][] = [];
942
+ currentGraph.forEach(([nodeId, currentOutboundRoutes]) => {
943
+ const previousRoutes = previousGCData.gcNodes[nodeId] ?? [];
944
+ const explicitRoutes = explicitReferences.get(nodeId) ?? [];
945
+ const missingExplicitRoutes: string[] = [];
946
+ currentOutboundRoutes.forEach((route) => {
947
+ const isBlobOrDataStoreRoute =
948
+ this.runtime.getNodeType(route) === GCNodeType.Blob ||
949
+ this.runtime.getNodeType(route) === GCNodeType.DataStore;
950
+ // Ignore implicitly added DDS routes to their parent datastores
951
+ const notRouteFromDDSToParentDataStore = !nodeId.startsWith(route);
952
+ if (
953
+ isBlobOrDataStoreRoute &&
954
+ notRouteFromDDSToParentDataStore &&
955
+ (!previousRoutes.includes(route) && !explicitRoutes.includes(route))
956
+ ) {
957
+ missingExplicitRoutes.push(route);
958
+ }
959
+ });
960
+ if (missingExplicitRoutes.length > 0) {
961
+ missingExplicitReferences.push([nodeId, missingExplicitRoutes]);
930
962
  }
931
963
  });
964
+
965
+ // Ideally missingExplicitReferences should always have a size 0
966
+ return missingExplicitReferences;
932
967
  }
933
968
 
934
969
  /**
@@ -6,4 +6,4 @@
6
6
  */
7
7
 
8
8
  export const pkgName = "@fluidframework/container-runtime";
9
- export const pkgVersion = "0.59.2000-61729";
9
+ export const pkgVersion = "0.59.2001";
@@ -27,6 +27,7 @@ import {
27
27
  ISummaryCancellationToken,
28
28
  ISummarizeResults,
29
29
  ISummarizeTelemetryProperties,
30
+ ISummarizeRunnerTelemetry,
30
31
  } from "./summarizerTypes";
31
32
  import { IClientSummaryWatcher, SummaryCollection } from "./summaryCollection";
32
33
  import {
@@ -108,13 +109,15 @@ export class RunningSummarizer implements IDisposable {
108
109
  private readonly stopSummarizerCallback: (reason: SummarizerStopReason) => void,
109
110
  { disableHeuristics = false }: Readonly<Partial<ISummarizerOptions>> = {},
110
111
  ) {
112
+ const telemetryProps: ISummarizeRunnerTelemetry = {
113
+ summarizeCount: () => this.summarizeCount,
114
+ summarizerSuccessfulAttempts: () => this.totalSuccessfulAttempts,
115
+ };
116
+
111
117
  this.logger = ChildLogger.create(
112
118
  baseLogger, "Running",
113
119
  {
114
- all: {
115
- summarizeCount: () => this.summarizeCount,
116
- summarizerSuccessfulAttempts: () => this.totalSuccessfulAttempts,
117
- },
120
+ all: telemetryProps,
118
121
  },
119
122
  );
120
123
 
@@ -231,7 +234,7 @@ export class RunningSummarizer implements IDisposable {
231
234
  if (this.summarizingLock === undefined) {
232
235
  this.trySummarizeOnce(
233
236
  // summarizeProps
234
- { summarizeReason: "lastSummary" },
237
+ { reason: "lastSummary" },
235
238
  // ISummarizeOptions, using defaults: { refreshLatestAck: false, fullTree: false }
236
239
  {});
237
240
  }
@@ -330,7 +333,7 @@ export class RunningSummarizer implements IDisposable {
330
333
 
331
334
  /** Heuristics summarize attempt. */
332
335
  private trySummarize(
333
- summarizeReason: SummarizeReason,
336
+ reason: SummarizeReason,
334
337
  cancellationToken = this.cancellationToken): void
335
338
  {
336
339
  if (this.summarizingLock !== undefined) {
@@ -365,7 +368,7 @@ export class RunningSummarizer implements IDisposable {
365
368
  const delaySeconds = overrideDelaySeconds ?? regularDelaySeconds;
366
369
 
367
370
  const summarizeProps: ISummarizeTelemetryProperties = {
368
- summarizeReason,
371
+ reason,
369
372
  summaryAttempts,
370
373
  summaryAttemptsPerPhase,
371
374
  summaryAttemptPhase: summaryAttemptPhase + 1, // make everything 1-based
@@ -402,7 +405,7 @@ export class RunningSummarizer implements IDisposable {
402
405
  // If all attempts failed, log error (with last attempt info) and close the summarizer container
403
406
  this.logger.sendErrorEvent({
404
407
  eventName: "FailToSummarize",
405
- summarizeReason,
408
+ reason,
406
409
  message: lastResult?.message,
407
410
  }, lastResult?.error);
408
411
 
@@ -430,7 +433,7 @@ export class RunningSummarizer implements IDisposable {
430
433
  throw new UsageError("Attempted to run an already-running summarizer on demand");
431
434
  }
432
435
  const result = this.trySummarizeOnce(
433
- { summarizeReason: `onDemand/${reason}` },
436
+ { reason: `onDemand/${reason}` },
434
437
  options,
435
438
  this.cancellationToken,
436
439
  resultsBuilder);
@@ -490,7 +493,7 @@ export class RunningSummarizer implements IDisposable {
490
493
  // Set to undefined first, so that subsequent enqueue attempt while summarize will occur later.
491
494
  this.enqueuedSummary = undefined;
492
495
  this.trySummarizeOnce(
493
- { summarizeReason: `enqueuedSummary/${reason}` },
496
+ { reason: `enqueuedSummary/${reason}` },
494
497
  options,
495
498
  this.cancellationToken,
496
499
  resultsBuilder);
package/src/summarizer.ts CHANGED
@@ -145,8 +145,7 @@ export class Summarizer extends EventEmitter implements ISummarizer {
145
145
  this.stop("summarizerException");
146
146
  throw SummarizingWarning.wrap(error, false /* logged */, this.logger);
147
147
  } finally {
148
- this.dispose();
149
- this.runtime.closeFn();
148
+ this.close();
150
149
  }
151
150
  }
152
151
 
@@ -159,6 +158,13 @@ export class Summarizer extends EventEmitter implements ISummarizer {
159
158
  this.stopDeferred.resolve(reason);
160
159
  }
161
160
 
161
+ public close() {
162
+ // This will result in "summarizerClientDisconnected" stop reason recorded in telemetry,
163
+ // unless stop() was called earlier
164
+ this.dispose();
165
+ this.runtime.closeFn();
166
+ }
167
+
162
168
  private async runCore(
163
169
  onBehalfOf: string,
164
170
  options?: Readonly<Partial<ISummarizerOptions>>): Promise<SummarizerStopReason> {
@@ -344,8 +350,7 @@ export class Summarizer extends EventEmitter implements ISummarizer {
344
350
  const stopReason = await Promise.race([this.stopDeferred.promise, runCoordinator.waitCancelled]);
345
351
  await runningSummarizer.waitStop(false);
346
352
  runCoordinator.stop(stopReason);
347
- this.dispose();
348
- this.runtime.closeFn();
353
+ this.close();
349
354
  }).catch((reason) => {
350
355
  builder.fail("Failed to start summarizer", reason);
351
356
  });
@@ -9,6 +9,7 @@ import {
9
9
  ITelemetryLogger,
10
10
  ITelemetryProperties,
11
11
  } from "@fluidframework/common-definitions";
12
+ import { ITelemetryLoggerPropertyBag } from "@fluidframework/telemetry-utils";
12
13
  import {
13
14
  IFluidLoadable,
14
15
  } from "@fluidframework/core-interfaces";
@@ -296,8 +297,16 @@ export interface ISummarizerEvents extends IEvent {
296
297
 
297
298
  export interface ISummarizer extends
298
299
  IEventProvider<ISummarizerEvents>, IFluidLoadable, Partial<IProvideSummarizer>{
300
+ /*
301
+ * Asks summarizer to move to exit.
302
+ * Summarizer will finish current processes, which may take a while.
303
+ * For example, summarizer may complete last summary before exiting.
304
+ */
299
305
  stop(reason: SummarizerStopReason): void;
300
306
 
307
+ /* Closes summarizer. Any pending processes (summary in flight) are abandoned. */
308
+ close(): void;
309
+
301
310
  run(onBehalfOf: string, options?: Readonly<Partial<ISummarizerOptions>>): Promise<SummarizerStopReason>;
302
311
 
303
312
  /**
@@ -380,7 +389,7 @@ export interface ISummarizeHeuristicRunner {
380
389
 
381
390
  type ISummarizeTelemetryRequiredProperties =
382
391
  /** Reason code for attempting to summarize */
383
- "summarizeReason";
392
+ "reason";
384
393
 
385
394
  type ISummarizeTelemetryOptionalProperties =
386
395
  /** Number of attempts within the last time window, used for calculating the throttle delay. */
@@ -394,3 +403,53 @@ type ISummarizeTelemetryOptionalProperties =
394
403
  export type ISummarizeTelemetryProperties =
395
404
  Pick<ITelemetryProperties, ISummarizeTelemetryRequiredProperties> &
396
405
  Partial<Pick<ITelemetryProperties, ISummarizeTelemetryOptionalProperties>>;
406
+
407
+ type SummaryGeneratorRequiredTelemetryProperties =
408
+ /** True to generate the full tree with no handle reuse optimizations */
409
+ "fullTree" |
410
+ /** Time since we last attempted to generate a summary */
411
+ "timeSinceLastAttempt" |
412
+ /** Time since we last successfully generated a summary */
413
+ "timeSinceLastSummary";
414
+
415
+ type SummaryGeneratorOptionalTelemetryProperties =
416
+ /** Reference sequence number as of the generate summary attempt. */
417
+ "referenceSequenceNumber" |
418
+ /** minimum sequence number (at the reference sequence number) */
419
+ "minimumSequenceNumber" |
420
+ /** Delta between the current reference sequence number and the reference sequence number of the last attempt */
421
+ "opsSinceLastAttempt" |
422
+ /** Delta between the current reference sequence number and the reference sequence number of the last summary */
423
+ "opsSinceLastSummary" |
424
+ /** Delta in sum of op sizes between the current reference sequence number and the reference
425
+ * sequence number of the last summary */
426
+ "opsSizesSinceLastSummary" |
427
+ /** Delta between the number of non-system ops since the last summary @see isSystemMessage */
428
+ "nonSystemOpsSinceLastSummary" |
429
+ /** Time it took to generate the summary tree and stats. */
430
+ "generateDuration" |
431
+ /** The handle returned by storage pointing to the uploaded summary tree. */
432
+ "handle" |
433
+ /** Time it took to upload the summary tree to storage. */
434
+ "uploadDuration" |
435
+ /** The client sequence number of the summarize op submitted for the summary. */
436
+ "clientSequenceNumber" |
437
+ /** Time it took for this summary to be acked after it was generated */
438
+ "ackWaitDuration" |
439
+ /** Reference sequence number of the ack/nack message */
440
+ "ackNackSequenceNumber" |
441
+ /** Actual sequence number of the summary op proposal. */
442
+ "summarySequenceNumber" |
443
+ /** Optional Retry-After time in seconds. If specified, the client should wait this many seconds before retrying. */
444
+ "nackRetryAfter";
445
+
446
+ export type SummaryGeneratorTelemetry =
447
+ Pick<ITelemetryProperties, SummaryGeneratorRequiredTelemetryProperties> &
448
+ Partial<Pick<ITelemetryProperties, SummaryGeneratorOptionalTelemetryProperties>>;
449
+
450
+ export interface ISummarizeRunnerTelemetry extends ITelemetryLoggerPropertyBag {
451
+ /** Number of times the summarizer run. */
452
+ summarizeCount: () => number;
453
+ /** Number of successful attempts to summarize. */
454
+ summarizerSuccessfulAttempts: () => number;
455
+ }
@@ -3,7 +3,7 @@
3
3
  * Licensed under the MIT License.
4
4
  */
5
5
 
6
- import { ITelemetryLogger, ITelemetryProperties } from "@fluidframework/common-definitions";
6
+ import { ITelemetryLogger } from "@fluidframework/common-definitions";
7
7
  import {
8
8
  assert,
9
9
  Deferred,
@@ -27,6 +27,7 @@ import {
27
27
  SummarizeResultPart,
28
28
  ISummaryCancellationToken,
29
29
  ISummarizeTelemetryProperties,
30
+ SummaryGeneratorTelemetry,
30
31
  } from "./summarizerTypes";
31
32
  import { IClientSummaryWatcher } from "./summaryCollection";
32
33
 
@@ -55,47 +56,6 @@ export async function raceTimer<T>(
55
56
  const maxSummarizeTimeoutTime = 20000; // 20 sec
56
57
  const maxSummarizeTimeoutCount = 5; // Double and resend 5 times
57
58
 
58
- type SummaryGeneratorRequiredTelemetryProperties =
59
- /** True to generate the full tree with no handle reuse optimizations */
60
- "fullTree" |
61
- /** Time since we last attempted to generate a summary */
62
- "timeSinceLastAttempt" |
63
- /** Time since we last successfully generated a summary */
64
- "timeSinceLastSummary";
65
- type SummaryGeneratorOptionalTelemetryProperties =
66
- /** Reference sequence number as of the generate summary attempt. */
67
- "referenceSequenceNumber" |
68
- /** minimum sequence number (at the reference sequence number) */
69
- "minimumSequenceNumber" |
70
- /** Delta between the current reference sequence number and the reference sequence number of the last attempt */
71
- "opsSinceLastAttempt" |
72
- /** Delta between the current reference sequence number and the reference sequence number of the last summary */
73
- "opsSinceLastSummary" |
74
- /** Delta in sum of op sizes between the current reference sequence number and the reference
75
- * sequence number of the last summary */
76
- "opsSizesSinceLastSummary" |
77
- /** Delta between the number of non-system ops since the last summary @see isSystemMessage */
78
- "nonSystemOpsSinceLastSummary" |
79
- /** Time it took to generate the summary tree and stats. */
80
- "generateDuration" |
81
- /** The handle returned by storage pointing to the uploaded summary tree. */
82
- "handle" |
83
- /** Time it took to upload the summary tree to storage. */
84
- "uploadDuration" |
85
- /** The client sequence number of the summarize op submitted for the summary. */
86
- "clientSequenceNumber" |
87
- /** Time it took for this summary to be acked after it was generated */
88
- "ackWaitDuration" |
89
- /** Reference sequence number of the ack/nack message */
90
- "ackNackSequenceNumber" |
91
- /** Actual sequence number of the summary op proposal. */
92
- "summarySequenceNumber" |
93
- /** Optional Retry-After time in seconds. If specified, the client should wait this many seconds before retrying. */
94
- "nackRetryAfter";
95
- type SummaryGeneratorTelemetry =
96
- Pick<ITelemetryProperties, SummaryGeneratorRequiredTelemetryProperties> &
97
- Partial<Pick<ITelemetryProperties, SummaryGeneratorOptionalTelemetryProperties>>;
98
-
99
59
  export type SummarizeReason =
100
60
  /**
101
61
  * Attempt to summarize after idle timeout has elapsed.
@@ -126,8 +86,6 @@ export type SummarizeReason =
126
86
  * stay connected long enough for summarizer client to catch up.
127
87
  */
128
88
  | "lastSummary"
129
- /** Previous summary attempt failed, and we are retrying. */
130
- | `retry${number}`
131
89
  /** On-demand summary requested with specified reason. */
132
90
  | `onDemand;${string}`
133
91
  /** Enqueue summarize attempt with specified reason. */
@@ -193,8 +193,6 @@ export class SummaryManager implements IDisposable {
193
193
 
194
194
  assert(this.summarizer === undefined, 0x262 /* "Old summarizer is still working!" */);
195
195
 
196
- let reason = "unknown";
197
-
198
196
  this.delayBeforeCreatingSummarizer().then(async (startWithInitialDelay: boolean) => {
199
197
  // Re-validate that it need to be running. Due to asynchrony, it may be not the case anymore
200
198
  // but only if creation was delayed. If it was not, then we want to ensure we always create
@@ -202,7 +200,7 @@ export class SummaryManager implements IDisposable {
202
200
  // document out of broken state if it has too many ops and ordering service keeps nacking main
203
201
  // container (and thus it goes into cycle of reconnects)
204
202
  if (startWithInitialDelay && this.getShouldSummarizeState().shouldSummarize === false) {
205
- return;
203
+ return "early exit";
206
204
  }
207
205
 
208
206
  // We transition to Running before requesting the summarizer, because after requesting we can't predict
@@ -213,26 +211,37 @@ export class SummaryManager implements IDisposable {
213
211
  this.state = SummaryManagerState.Running;
214
212
 
215
213
  const summarizer = await this.requestSummarizerFn();
214
+ this.summarizer = summarizer;
216
215
 
217
216
  // Re-validate that it need to be running. Due to asynchrony, it may be not the case anymore
218
217
  const shouldSummarizeState = this.getShouldSummarizeState();
219
218
  if (shouldSummarizeState.shouldSummarize === false) {
220
219
  this.state = SummaryManagerState.Starting;
221
220
  summarizer.stop(shouldSummarizeState.stopReason);
222
- return;
221
+ return "early exit after starting summarizer";
223
222
  }
224
223
 
225
- this.summarizer = summarizer;
226
-
227
224
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
228
225
  const clientId = this.latestClientId!;
229
226
 
230
- reason = await PerformanceEvent.timedExecAsync(
227
+ return PerformanceEvent.timedExecAsync(
231
228
  this.logger,
232
229
  { eventName: "RunningSummarizer", attempt: this.startThrottler.numAttempts },
233
230
  async () => summarizer.run(clientId, this.summarizerOptions),
234
231
  );
232
+ }).then((reason: string) => {
233
+ this.logger.sendTelemetryEvent({
234
+ eventName: "EndingSummarizer",
235
+ reason,
236
+ });
235
237
  }).catch((error) => {
238
+ this.logger.sendTelemetryEvent(
239
+ {
240
+ eventName: "EndingSummarizer",
241
+ reason: "exception",
242
+ },
243
+ error);
244
+
236
245
  // Most of exceptions happen due to container being closed while loading it, due to
237
246
  // summarizer container loosing connection while load.
238
247
  // Not worth reporting such errors as errors. That said, we might miss some real errors if
@@ -252,25 +261,14 @@ export class SummaryManager implements IDisposable {
252
261
  category,
253
262
  },
254
263
  error);
255
-
256
- // Note that summarizer may keep going (like doing last summary).
257
- // Ideally we await stopping process, but this code path is due to a bug
258
- // that needs to be fixed either way.
259
- if (SummaryManager.isStartingOrRunning(this.state)) {
260
- this.stop("summarizerException");
261
- }
262
264
  }
263
265
  }).finally(() => {
264
266
  assert(this.state !== SummaryManagerState.Off, 0x264 /* "Expected: Not Off" */);
265
267
  this.state = SummaryManagerState.Off;
266
268
 
269
+ this.summarizer?.close();
267
270
  this.summarizer = undefined;
268
271
 
269
- this.logger.sendTelemetryEvent({
270
- eventName: "EndingSummarizer",
271
- reason,
272
- });
273
-
274
272
  if (this.getShouldSummarizeState().shouldSummarize) {
275
273
  this.startSummarization();
276
274
  }