@fluidframework/container-loader 1.4.0-121020 → 2.0.0-dev-rc.1.0.0.224419

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/.eslintrc.js +18 -21
  2. package/.mocharc.js +12 -0
  3. package/CHANGELOG.md +364 -0
  4. package/README.md +152 -56
  5. package/api-extractor-lint.json +4 -0
  6. package/api-extractor.json +2 -2
  7. package/api-report/container-loader.api.md +143 -0
  8. package/dist/{audience.js → audience.cjs} +15 -13
  9. package/dist/audience.cjs.map +1 -0
  10. package/dist/audience.d.ts +4 -6
  11. package/dist/audience.d.ts.map +1 -1
  12. package/dist/catchUpMonitor.cjs +43 -0
  13. package/dist/catchUpMonitor.cjs.map +1 -0
  14. package/dist/catchUpMonitor.d.ts +29 -0
  15. package/dist/catchUpMonitor.d.ts.map +1 -0
  16. package/dist/{connectionManager.js → connectionManager.cjs} +397 -240
  17. package/dist/connectionManager.cjs.map +1 -0
  18. package/dist/connectionManager.d.ts +23 -33
  19. package/dist/connectionManager.d.ts.map +1 -1
  20. package/dist/{connectionState.js → connectionState.cjs} +5 -7
  21. package/dist/connectionState.cjs.map +1 -0
  22. package/dist/connectionState.d.ts +3 -5
  23. package/dist/connectionState.d.ts.map +1 -1
  24. package/dist/connectionStateHandler.cjs +474 -0
  25. package/dist/connectionStateHandler.cjs.map +1 -0
  26. package/dist/connectionStateHandler.d.ts +127 -29
  27. package/dist/connectionStateHandler.d.ts.map +1 -1
  28. package/dist/container-loader-alpha.d.ts +274 -0
  29. package/dist/container-loader-beta.d.ts +75 -0
  30. package/dist/container-loader-public.d.ts +75 -0
  31. package/dist/container-loader-untrimmed.d.ts +331 -0
  32. package/dist/container.cjs +1585 -0
  33. package/dist/container.cjs.map +1 -0
  34. package/dist/container.d.ts +227 -83
  35. package/dist/container.d.ts.map +1 -1
  36. package/dist/containerContext.cjs +74 -0
  37. package/dist/containerContext.cjs.map +1 -0
  38. package/dist/containerContext.d.ts +33 -59
  39. package/dist/containerContext.d.ts.map +1 -1
  40. package/dist/containerStorageAdapter.cjs +234 -0
  41. package/dist/containerStorageAdapter.cjs.map +1 -0
  42. package/dist/containerStorageAdapter.d.ts +48 -23
  43. package/dist/containerStorageAdapter.d.ts.map +1 -1
  44. package/dist/{contracts.js → contracts.cjs} +5 -5
  45. package/dist/contracts.cjs.map +1 -0
  46. package/dist/contracts.d.ts +45 -17
  47. package/dist/contracts.d.ts.map +1 -1
  48. package/dist/debugLogger.cjs +101 -0
  49. package/dist/debugLogger.cjs.map +1 -0
  50. package/dist/debugLogger.d.ts +30 -0
  51. package/dist/debugLogger.d.ts.map +1 -0
  52. package/dist/{deltaManager.js → deltaManager.cjs} +379 -186
  53. package/dist/deltaManager.cjs.map +1 -0
  54. package/dist/deltaManager.d.ts +54 -18
  55. package/dist/deltaManager.d.ts.map +1 -1
  56. package/dist/{deltaQueue.js → deltaQueue.cjs} +29 -28
  57. package/dist/deltaQueue.cjs.map +1 -0
  58. package/dist/deltaQueue.d.ts +3 -4
  59. package/dist/deltaQueue.d.ts.map +1 -1
  60. package/dist/disposal.cjs +25 -0
  61. package/dist/disposal.cjs.map +1 -0
  62. package/dist/disposal.d.ts +13 -0
  63. package/dist/disposal.d.ts.map +1 -0
  64. package/dist/error.cjs +32 -0
  65. package/dist/error.cjs.map +1 -0
  66. package/dist/error.d.ts +23 -0
  67. package/dist/error.d.ts.map +1 -0
  68. package/dist/index.cjs +19 -0
  69. package/dist/index.cjs.map +1 -0
  70. package/dist/index.d.ts +5 -2
  71. package/dist/index.d.ts.map +1 -1
  72. package/dist/loader.cjs +148 -0
  73. package/dist/loader.cjs.map +1 -0
  74. package/dist/loader.d.ts +38 -19
  75. package/dist/loader.d.ts.map +1 -1
  76. package/dist/location-redirection-utilities/index.cjs +11 -0
  77. package/dist/location-redirection-utilities/index.cjs.map +1 -0
  78. package/dist/location-redirection-utilities/index.d.ts +6 -0
  79. package/dist/location-redirection-utilities/index.d.ts.map +1 -0
  80. package/dist/location-redirection-utilities/resolveWithLocationRedirection.cjs +53 -0
  81. package/dist/location-redirection-utilities/resolveWithLocationRedirection.cjs.map +1 -0
  82. package/dist/location-redirection-utilities/resolveWithLocationRedirection.d.ts +24 -0
  83. package/dist/location-redirection-utilities/resolveWithLocationRedirection.d.ts.map +1 -0
  84. package/dist/{collabWindowTracker.js → noopHeuristic.cjs} +37 -39
  85. package/dist/noopHeuristic.cjs.map +1 -0
  86. package/dist/noopHeuristic.d.ts +23 -0
  87. package/dist/noopHeuristic.d.ts.map +1 -0
  88. package/dist/{packageVersion.js → packageVersion.cjs} +2 -2
  89. package/dist/packageVersion.cjs.map +1 -0
  90. package/dist/packageVersion.d.ts +1 -1
  91. package/dist/packageVersion.d.ts.map +1 -1
  92. package/dist/protocol.cjs +99 -0
  93. package/dist/protocol.cjs.map +1 -0
  94. package/dist/protocol.d.ts +38 -0
  95. package/dist/protocol.d.ts.map +1 -0
  96. package/dist/{protocolTreeDocumentStorageService.js → protocolTreeDocumentStorageService.cjs} +8 -5
  97. package/dist/protocolTreeDocumentStorageService.cjs.map +1 -0
  98. package/dist/protocolTreeDocumentStorageService.d.ts +8 -4
  99. package/dist/protocolTreeDocumentStorageService.d.ts.map +1 -1
  100. package/dist/quorum.cjs +16 -0
  101. package/dist/quorum.cjs.map +1 -0
  102. package/dist/quorum.d.ts +1 -14
  103. package/dist/quorum.d.ts.map +1 -1
  104. package/dist/{retriableDocumentStorageService.js → retriableDocumentStorageService.cjs} +36 -21
  105. package/dist/retriableDocumentStorageService.cjs.map +1 -0
  106. package/dist/retriableDocumentStorageService.d.ts +7 -5
  107. package/dist/retriableDocumentStorageService.d.ts.map +1 -1
  108. package/dist/tsdoc-metadata.json +11 -0
  109. package/dist/{utils.js → utils.cjs} +52 -14
  110. package/dist/utils.cjs.map +1 -0
  111. package/dist/utils.d.ts +34 -1
  112. package/dist/utils.d.ts.map +1 -1
  113. package/lib/{audience.d.ts → audience.d.mts} +4 -10
  114. package/lib/audience.d.mts.map +1 -0
  115. package/lib/{audience.js → audience.mjs} +15 -17
  116. package/lib/audience.mjs.map +1 -0
  117. package/lib/catchUpMonitor.d.mts +29 -0
  118. package/lib/catchUpMonitor.d.mts.map +1 -0
  119. package/lib/catchUpMonitor.mjs +39 -0
  120. package/lib/catchUpMonitor.mjs.map +1 -0
  121. package/lib/{connectionManager.d.ts → connectionManager.d.mts} +23 -33
  122. package/lib/connectionManager.d.mts.map +1 -0
  123. package/lib/{connectionManager.js → connectionManager.mjs} +378 -218
  124. package/lib/connectionManager.mjs.map +1 -0
  125. package/lib/{connectionState.d.ts → connectionState.d.mts} +3 -5
  126. package/lib/connectionState.d.mts.map +1 -0
  127. package/lib/{connectionState.js → connectionState.mjs} +4 -6
  128. package/lib/connectionState.mjs.map +1 -0
  129. package/lib/connectionStateHandler.d.mts +179 -0
  130. package/lib/connectionStateHandler.d.mts.map +1 -0
  131. package/lib/connectionStateHandler.mjs +469 -0
  132. package/lib/connectionStateHandler.mjs.map +1 -0
  133. package/lib/container-loader-alpha.d.mts +274 -0
  134. package/lib/container-loader-beta.d.mts +75 -0
  135. package/lib/container-loader-public.d.mts +75 -0
  136. package/lib/container-loader-untrimmed.d.mts +331 -0
  137. package/lib/container.d.mts +382 -0
  138. package/lib/container.d.mts.map +1 -0
  139. package/lib/container.mjs +1579 -0
  140. package/lib/container.mjs.map +1 -0
  141. package/lib/containerContext.d.mts +58 -0
  142. package/lib/containerContext.d.mts.map +1 -0
  143. package/lib/containerContext.mjs +70 -0
  144. package/lib/containerContext.mjs.map +1 -0
  145. package/lib/containerStorageAdapter.d.mts +73 -0
  146. package/lib/containerStorageAdapter.d.mts.map +1 -0
  147. package/lib/containerStorageAdapter.mjs +228 -0
  148. package/lib/containerStorageAdapter.mjs.map +1 -0
  149. package/lib/{contracts.d.ts → contracts.d.mts} +45 -17
  150. package/lib/contracts.d.mts.map +1 -0
  151. package/lib/{contracts.js → contracts.mjs} +4 -4
  152. package/lib/contracts.mjs.map +1 -0
  153. package/lib/debugLogger.d.mts +30 -0
  154. package/lib/debugLogger.d.mts.map +1 -0
  155. package/lib/debugLogger.mjs +93 -0
  156. package/lib/debugLogger.mjs.map +1 -0
  157. package/lib/{deltaManager.d.ts → deltaManager.d.mts} +54 -18
  158. package/lib/deltaManager.d.mts.map +1 -0
  159. package/lib/{deltaManager.js → deltaManager.mjs} +361 -165
  160. package/lib/deltaManager.mjs.map +1 -0
  161. package/lib/{deltaQueue.d.ts → deltaQueue.d.mts} +3 -4
  162. package/lib/deltaQueue.d.mts.map +1 -0
  163. package/lib/{deltaQueue.js → deltaQueue.mjs} +25 -24
  164. package/lib/deltaQueue.mjs.map +1 -0
  165. package/lib/disposal.d.mts +13 -0
  166. package/lib/disposal.d.mts.map +1 -0
  167. package/lib/disposal.mjs +21 -0
  168. package/lib/disposal.mjs.map +1 -0
  169. package/lib/error.d.mts +23 -0
  170. package/lib/error.d.mts.map +1 -0
  171. package/lib/error.mjs +28 -0
  172. package/lib/error.mjs.map +1 -0
  173. package/lib/index.d.mts +11 -0
  174. package/lib/index.d.mts.map +1 -0
  175. package/lib/index.mjs +10 -0
  176. package/lib/index.mjs.map +1 -0
  177. package/lib/{loader.d.ts → loader.d.mts} +39 -20
  178. package/lib/loader.d.mts.map +1 -0
  179. package/lib/loader.mjs +143 -0
  180. package/lib/loader.mjs.map +1 -0
  181. package/lib/location-redirection-utilities/index.d.mts +6 -0
  182. package/lib/location-redirection-utilities/index.d.mts.map +1 -0
  183. package/lib/location-redirection-utilities/index.mjs +6 -0
  184. package/lib/location-redirection-utilities/index.mjs.map +1 -0
  185. package/lib/location-redirection-utilities/resolveWithLocationRedirection.d.mts +24 -0
  186. package/lib/location-redirection-utilities/resolveWithLocationRedirection.d.mts.map +1 -0
  187. package/lib/location-redirection-utilities/resolveWithLocationRedirection.mjs +48 -0
  188. package/lib/location-redirection-utilities/resolveWithLocationRedirection.mjs.map +1 -0
  189. package/lib/noopHeuristic.d.mts +23 -0
  190. package/lib/noopHeuristic.d.mts.map +1 -0
  191. package/lib/{collabWindowTracker.js → noopHeuristic.mjs} +33 -35
  192. package/lib/noopHeuristic.mjs.map +1 -0
  193. package/lib/{packageVersion.d.ts → packageVersion.d.mts} +1 -1
  194. package/lib/{packageVersion.d.ts.map → packageVersion.d.mts.map} +1 -1
  195. package/lib/{packageVersion.js → packageVersion.mjs} +2 -2
  196. package/lib/packageVersion.mjs.map +1 -0
  197. package/lib/protocol.d.mts +38 -0
  198. package/lib/protocol.d.mts.map +1 -0
  199. package/lib/protocol.mjs +94 -0
  200. package/lib/protocol.mjs.map +1 -0
  201. package/lib/{protocolTreeDocumentStorageService.d.ts → protocolTreeDocumentStorageService.d.mts} +8 -4
  202. package/lib/protocolTreeDocumentStorageService.d.mts.map +1 -0
  203. package/lib/{protocolTreeDocumentStorageService.js → protocolTreeDocumentStorageService.mjs} +8 -5
  204. package/lib/protocolTreeDocumentStorageService.mjs.map +1 -0
  205. package/lib/quorum.d.mts +4 -0
  206. package/lib/quorum.d.mts.map +1 -0
  207. package/lib/quorum.mjs +12 -0
  208. package/lib/quorum.mjs.map +1 -0
  209. package/lib/{retriableDocumentStorageService.d.ts → retriableDocumentStorageService.d.mts} +7 -5
  210. package/lib/retriableDocumentStorageService.d.mts.map +1 -0
  211. package/lib/{retriableDocumentStorageService.js → retriableDocumentStorageService.mjs} +35 -20
  212. package/lib/retriableDocumentStorageService.mjs.map +1 -0
  213. package/lib/utils.d.mts +67 -0
  214. package/lib/utils.d.mts.map +1 -0
  215. package/lib/{utils.js → utils.mjs} +47 -11
  216. package/lib/utils.mjs.map +1 -0
  217. package/package.json +163 -70
  218. package/prettier.config.cjs +8 -0
  219. package/src/audience.ts +59 -49
  220. package/src/catchUpMonitor.ts +61 -0
  221. package/src/connectionManager.ts +1154 -910
  222. package/src/connectionState.ts +22 -25
  223. package/src/connectionStateHandler.ts +689 -319
  224. package/src/container.ts +2476 -1792
  225. package/src/containerContext.ts +98 -330
  226. package/src/containerStorageAdapter.ts +301 -105
  227. package/src/contracts.ts +184 -146
  228. package/src/debugLogger.ts +123 -0
  229. package/src/deltaManager.ts +1165 -900
  230. package/src/deltaQueue.ts +156 -152
  231. package/src/disposal.ts +25 -0
  232. package/src/error.ts +44 -0
  233. package/src/index.ts +14 -15
  234. package/src/loader.ts +356 -427
  235. package/src/location-redirection-utilities/index.ts +9 -0
  236. package/src/location-redirection-utilities/resolveWithLocationRedirection.ts +61 -0
  237. package/src/noopHeuristic.ts +107 -0
  238. package/src/packageVersion.ts +1 -1
  239. package/src/protocol.ts +150 -0
  240. package/src/protocolTreeDocumentStorageService.ts +35 -35
  241. package/src/quorum.ts +11 -50
  242. package/src/retriableDocumentStorageService.ts +135 -95
  243. package/src/utils.ts +159 -86
  244. package/tsc-multi.test.json +4 -0
  245. package/tsconfig.json +10 -12
  246. package/dist/audience.js.map +0 -1
  247. package/dist/collabWindowTracker.d.ts +0 -19
  248. package/dist/collabWindowTracker.d.ts.map +0 -1
  249. package/dist/collabWindowTracker.js.map +0 -1
  250. package/dist/connectionManager.js.map +0 -1
  251. package/dist/connectionState.js.map +0 -1
  252. package/dist/connectionStateHandler.js +0 -280
  253. package/dist/connectionStateHandler.js.map +0 -1
  254. package/dist/container.js +0 -1284
  255. package/dist/container.js.map +0 -1
  256. package/dist/containerContext.js +0 -217
  257. package/dist/containerContext.js.map +0 -1
  258. package/dist/containerStorageAdapter.js +0 -104
  259. package/dist/containerStorageAdapter.js.map +0 -1
  260. package/dist/contracts.js.map +0 -1
  261. package/dist/deltaManager.js.map +0 -1
  262. package/dist/deltaManagerProxy.d.ts +0 -54
  263. package/dist/deltaManagerProxy.d.ts.map +0 -1
  264. package/dist/deltaManagerProxy.js +0 -115
  265. package/dist/deltaManagerProxy.js.map +0 -1
  266. package/dist/deltaQueue.js.map +0 -1
  267. package/dist/index.js +0 -16
  268. package/dist/index.js.map +0 -1
  269. package/dist/loader.js +0 -241
  270. package/dist/loader.js.map +0 -1
  271. package/dist/packageVersion.js.map +0 -1
  272. package/dist/protocolTreeDocumentStorageService.js.map +0 -1
  273. package/dist/quorum.js +0 -44
  274. package/dist/quorum.js.map +0 -1
  275. package/dist/retriableDocumentStorageService.js.map +0 -1
  276. package/dist/utils.js.map +0 -1
  277. package/lib/audience.d.ts.map +0 -1
  278. package/lib/audience.js.map +0 -1
  279. package/lib/collabWindowTracker.d.ts +0 -19
  280. package/lib/collabWindowTracker.d.ts.map +0 -1
  281. package/lib/collabWindowTracker.js.map +0 -1
  282. package/lib/connectionManager.d.ts.map +0 -1
  283. package/lib/connectionManager.js.map +0 -1
  284. package/lib/connectionState.d.ts.map +0 -1
  285. package/lib/connectionState.js.map +0 -1
  286. package/lib/connectionStateHandler.d.ts +0 -81
  287. package/lib/connectionStateHandler.d.ts.map +0 -1
  288. package/lib/connectionStateHandler.js +0 -276
  289. package/lib/connectionStateHandler.js.map +0 -1
  290. package/lib/container.d.ts +0 -238
  291. package/lib/container.d.ts.map +0 -1
  292. package/lib/container.js +0 -1276
  293. package/lib/container.js.map +0 -1
  294. package/lib/containerContext.d.ts +0 -84
  295. package/lib/containerContext.d.ts.map +0 -1
  296. package/lib/containerContext.js +0 -213
  297. package/lib/containerContext.js.map +0 -1
  298. package/lib/containerStorageAdapter.d.ts +0 -48
  299. package/lib/containerStorageAdapter.d.ts.map +0 -1
  300. package/lib/containerStorageAdapter.js +0 -99
  301. package/lib/containerStorageAdapter.js.map +0 -1
  302. package/lib/contracts.d.ts.map +0 -1
  303. package/lib/contracts.js.map +0 -1
  304. package/lib/deltaManager.d.ts.map +0 -1
  305. package/lib/deltaManager.js.map +0 -1
  306. package/lib/deltaManagerProxy.d.ts +0 -54
  307. package/lib/deltaManagerProxy.d.ts.map +0 -1
  308. package/lib/deltaManagerProxy.js +0 -110
  309. package/lib/deltaManagerProxy.js.map +0 -1
  310. package/lib/deltaQueue.d.ts.map +0 -1
  311. package/lib/deltaQueue.js.map +0 -1
  312. package/lib/index.d.ts +0 -8
  313. package/lib/index.d.ts.map +0 -1
  314. package/lib/index.js +0 -8
  315. package/lib/index.js.map +0 -1
  316. package/lib/loader.d.ts.map +0 -1
  317. package/lib/loader.js +0 -236
  318. package/lib/loader.js.map +0 -1
  319. package/lib/packageVersion.js.map +0 -1
  320. package/lib/protocolTreeDocumentStorageService.d.ts.map +0 -1
  321. package/lib/protocolTreeDocumentStorageService.js.map +0 -1
  322. package/lib/quorum.d.ts +0 -21
  323. package/lib/quorum.d.ts.map +0 -1
  324. package/lib/quorum.js +0 -38
  325. package/lib/quorum.js.map +0 -1
  326. package/lib/retriableDocumentStorageService.d.ts.map +0 -1
  327. package/lib/retriableDocumentStorageService.js.map +0 -1
  328. package/lib/utils.d.ts +0 -34
  329. package/lib/utils.d.ts.map +0 -1
  330. package/lib/utils.js.map +0 -1
  331. package/src/collabWindowTracker.ts +0 -102
  332. package/src/deltaManagerProxy.ts +0 -158
  333. package/tsconfig.esnext.json +0 -7
@@ -3,32 +3,282 @@
3
3
  * Licensed under the MIT License.
4
4
  */
5
5
 
6
- import { ITelemetryLogger, ITelemetryProperties } from "@fluidframework/common-definitions";
7
- import { assert, Timer } from "@fluidframework/common-utils";
8
- import { IConnectionDetails } from "@fluidframework/container-definitions";
9
- import { ILocalSequencedClient, IProtocolHandler } from "@fluidframework/protocol-base";
10
- import { ConnectionMode, IQuorumClients } from "@fluidframework/protocol-definitions";
11
- import { PerformanceEvent } from "@fluidframework/telemetry-utils";
6
+ import { ITelemetryProperties, TelemetryEventCategory } from "@fluidframework/core-interfaces";
7
+ import { assert, Timer } from "@fluidframework/core-utils";
8
+ import { IDeltaManager } from "@fluidframework/container-definitions";
9
+ import { ISequencedClient, IClient } from "@fluidframework/protocol-definitions";
10
+ import {
11
+ ITelemetryLoggerExt,
12
+ PerformanceEvent,
13
+ loggerToMonitoringContext,
14
+ } from "@fluidframework/telemetry-utils";
15
+ import { IAnyDriverError } from "@fluidframework/driver-definitions";
16
+ import { CatchUpMonitor, ICatchUpMonitor } from "./catchUpMonitor";
12
17
  import { ConnectionState } from "./connectionState";
18
+ import { IConnectionDetailsInternal, IConnectionStateChangeReason } from "./contracts";
19
+ import { IProtocolHandler } from "./protocol";
20
+
21
+ // Based on recent data, it looks like majority of cases where we get stuck are due to really slow or
22
+ // timing out ops fetches. So attempt recovery infrequently. Also fetch uses 30 second timeout, so
23
+ // if retrying fixes the problem, we should not see these events.
24
+ const JoinOpTimeoutMs = 45000;
25
+
26
+ // Timeout waiting for "self" join signal, before giving up
27
+ const JoinSignalTimeoutMs = 5000;
13
28
 
14
29
  /** Constructor parameter type for passing in dependencies needed by the ConnectionStateHandler */
15
30
  export interface IConnectionStateHandlerInputs {
16
- /** Provides access to the clients currently in the quorum */
17
- quorumClients: () => IQuorumClients | undefined;
18
- /** Log to telemetry any change in state, included to Connecting */
19
- logConnectionStateChangeTelemetry:
20
- (value: ConnectionState, oldState: ConnectionState, reason?: string | undefined) => void;
21
- /** Whether to expect the client to join in write mode on next connection */
22
- shouldClientJoinWrite: () => boolean;
23
- /** (Optional) How long should we wait on our previous client's Leave op before transitioning to Connected again */
24
- maxClientLeaveWaitTime: number | undefined;
25
- /** Log an issue encountered while in the Connecting state. details will be logged as a JSON string */
26
- logConnectionIssue: (eventName: string, details?: ITelemetryProperties) => void;
27
- /** Callback whenever the ConnectionState changes between Disconnected and Connected */
28
- connectionStateChanged: () => void;
31
+ logger: ITelemetryLoggerExt;
32
+ /** Log to telemetry any change in state, included to Connecting */
33
+ connectionStateChanged: (
34
+ value: ConnectionState,
35
+ oldState: ConnectionState,
36
+ reason?: IConnectionStateChangeReason,
37
+ ) => void;
38
+ /** Whether to expect the client to join in write mode on next connection */
39
+ shouldClientJoinWrite: () => boolean;
40
+ /** (Optional) How long should we wait on our previous client's Leave op before transitioning to Connected again */
41
+ maxClientLeaveWaitTime: number | undefined;
42
+ /** Log an issue encountered while in the Connecting state. details will be logged as a JSON string */
43
+ logConnectionIssue: (
44
+ eventName: string,
45
+ category: TelemetryEventCategory,
46
+ details?: ITelemetryProperties,
47
+ ) => void;
48
+ /** Callback to note that an old local client ID is still present in the Quorum that should have left and should now be considered invalid */
49
+ clientShouldHaveLeft: (clientId: string) => void;
29
50
  }
30
51
 
31
- const JoinOpTimeoutMs = 45000;
52
+ /**
53
+ * interface that connection state handler implements
54
+ */
55
+ export interface IConnectionStateHandler {
56
+ readonly connectionState: ConnectionState;
57
+ readonly pendingClientId: string | undefined;
58
+
59
+ containerSaved(): void;
60
+ dispose(): void;
61
+ initProtocol(protocol: IProtocolHandler): void;
62
+ receivedConnectEvent(details: IConnectionDetailsInternal): void;
63
+ receivedDisconnectEvent(reason: IConnectionStateChangeReason): void;
64
+ establishingConnection(reason: IConnectionStateChangeReason): void;
65
+ /**
66
+ * Switches state to disconnected when we are still establishing connection during container.load(),
67
+ * container connect() or reconnect and the container gets closed or disposed or disconnect happens.
68
+ * @param reason - reason for cancelling the connection.
69
+ */
70
+ cancelEstablishingConnection(reason: IConnectionStateChangeReason): void;
71
+ }
72
+
73
+ export function createConnectionStateHandler(
74
+ inputs: IConnectionStateHandlerInputs,
75
+ deltaManager: IDeltaManager<any, any>,
76
+ clientId?: string,
77
+ ) {
78
+ const mc = loggerToMonitoringContext(inputs.logger);
79
+ return createConnectionStateHandlerCore(
80
+ mc.config.getBoolean("Fluid.Container.CatchUpBeforeDeclaringConnected") === true, // connectedRaisedWhenCaughtUp
81
+ mc.config.getBoolean("Fluid.Container.EnableJoinSignalWait") === true, // readClientsWaitForJoinSignal
82
+ inputs,
83
+ deltaManager,
84
+ clientId,
85
+ );
86
+ }
87
+
88
+ export function createConnectionStateHandlerCore(
89
+ connectedRaisedWhenCaughtUp: boolean,
90
+ readClientsWaitForJoinSignal: boolean,
91
+ inputs: IConnectionStateHandlerInputs,
92
+ deltaManager: IDeltaManager<any, any>,
93
+ clientId?: string,
94
+ ) {
95
+ if (!connectedRaisedWhenCaughtUp) {
96
+ return new ConnectionStateHandler(inputs, readClientsWaitForJoinSignal, clientId);
97
+ }
98
+ return new ConnectionStateCatchup(
99
+ inputs,
100
+ (handler: IConnectionStateHandlerInputs) =>
101
+ new ConnectionStateHandler(handler, readClientsWaitForJoinSignal, clientId),
102
+ deltaManager,
103
+ );
104
+ }
105
+
106
+ /**
107
+ * Helper internal interface to abstract away Audience & Quorum
108
+ */
109
+ interface IMembership {
110
+ on(
111
+ eventName: "addMember" | "removeMember",
112
+ listener: (clientId: string, details: IClient | ISequencedClient) => void,
113
+ );
114
+ getMember(clientId: string): undefined | unknown;
115
+ }
116
+
117
+ /**
118
+ * Class that can be used as a base class for building IConnectionStateHandler adapters / pipeline.
119
+ * It implements both ends of communication interfaces and passes data back and forward
120
+ */
121
+ class ConnectionStateHandlerPassThrough
122
+ implements IConnectionStateHandler, IConnectionStateHandlerInputs
123
+ {
124
+ protected readonly pimpl: IConnectionStateHandler;
125
+
126
+ constructor(
127
+ protected readonly inputs: IConnectionStateHandlerInputs,
128
+ pimplFactory: (handler: IConnectionStateHandlerInputs) => IConnectionStateHandler,
129
+ ) {
130
+ this.pimpl = pimplFactory(this);
131
+ }
132
+
133
+ /**
134
+ * IConnectionStateHandler
135
+ */
136
+ public get connectionState() {
137
+ return this.pimpl.connectionState;
138
+ }
139
+ public get pendingClientId() {
140
+ return this.pimpl.pendingClientId;
141
+ }
142
+
143
+ public containerSaved() {
144
+ return this.pimpl.containerSaved();
145
+ }
146
+ public dispose() {
147
+ return this.pimpl.dispose();
148
+ }
149
+ public initProtocol(protocol: IProtocolHandler) {
150
+ return this.pimpl.initProtocol(protocol);
151
+ }
152
+ public receivedDisconnectEvent(reason: IConnectionStateChangeReason<IAnyDriverError>) {
153
+ return this.pimpl.receivedDisconnectEvent(reason);
154
+ }
155
+
156
+ public establishingConnection(reason: IConnectionStateChangeReason) {
157
+ return this.pimpl.establishingConnection(reason);
158
+ }
159
+
160
+ public cancelEstablishingConnection(reason: IConnectionStateChangeReason) {
161
+ return this.pimpl.cancelEstablishingConnection(reason);
162
+ }
163
+
164
+ public receivedConnectEvent(details: IConnectionDetailsInternal) {
165
+ return this.pimpl.receivedConnectEvent(details);
166
+ }
167
+
168
+ /**
169
+ * IConnectionStateHandlerInputs
170
+ */
171
+
172
+ public get logger() {
173
+ return this.inputs.logger;
174
+ }
175
+ public connectionStateChanged(
176
+ value: ConnectionState,
177
+ oldState: ConnectionState,
178
+ reason?: IConnectionStateChangeReason,
179
+ ) {
180
+ return this.inputs.connectionStateChanged(value, oldState, reason);
181
+ }
182
+ public shouldClientJoinWrite() {
183
+ return this.inputs.shouldClientJoinWrite();
184
+ }
185
+ public get maxClientLeaveWaitTime() {
186
+ return this.inputs.maxClientLeaveWaitTime;
187
+ }
188
+ public logConnectionIssue(
189
+ eventName: string,
190
+ category: TelemetryEventCategory,
191
+ details?: ITelemetryProperties,
192
+ ) {
193
+ return this.inputs.logConnectionIssue(eventName, category, details);
194
+ }
195
+ public clientShouldHaveLeft(clientId: string) {
196
+ return this.inputs.clientShouldHaveLeft(clientId);
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Implementation of IConnectionStateHandler pass-through adapter that waits for specific sequence number
202
+ * before raising connected event
203
+ */
204
+ class ConnectionStateCatchup extends ConnectionStateHandlerPassThrough {
205
+ private catchUpMonitor: ICatchUpMonitor | undefined;
206
+
207
+ constructor(
208
+ inputs: IConnectionStateHandlerInputs,
209
+ pimplFactory: (handler: IConnectionStateHandlerInputs) => IConnectionStateHandler,
210
+ private readonly deltaManager: IDeltaManager<any, any>,
211
+ ) {
212
+ super(inputs, pimplFactory);
213
+ this._connectionState = this.pimpl.connectionState;
214
+ }
215
+
216
+ private _connectionState: ConnectionState;
217
+ public get connectionState() {
218
+ return this._connectionState;
219
+ }
220
+
221
+ public connectionStateChanged(
222
+ value: ConnectionState,
223
+ oldState: ConnectionState,
224
+ reason?: IConnectionStateChangeReason<IAnyDriverError>,
225
+ ) {
226
+ switch (value) {
227
+ case ConnectionState.Connected:
228
+ assert(
229
+ this._connectionState === ConnectionState.CatchingUp,
230
+ 0x3e1 /* connectivity transitions */,
231
+ );
232
+ // Create catch-up monitor here (not earlier), as we might get more exact info by now about how far
233
+ // client is behind through join signal. This is only true if base layer uses signals (i.e. audience,
234
+ // not quorum, including for "rea" connections) to make decisions about moving to "connected" state.
235
+ // In addition to that, in its current form, doing this in ConnectionState.CatchingUp is dangerous as
236
+ // we might get callback right away, and it will screw up state transition (as code outside of switch
237
+ // statement will overwrite current state).
238
+ assert(
239
+ this.catchUpMonitor === undefined,
240
+ 0x3eb /* catchUpMonitor should be gone */,
241
+ );
242
+ this.catchUpMonitor = new CatchUpMonitor(
243
+ this.deltaManager,
244
+ this.transitionToConnectedState,
245
+ );
246
+ return;
247
+ case ConnectionState.Disconnected:
248
+ this.catchUpMonitor?.dispose();
249
+ this.catchUpMonitor = undefined;
250
+ break;
251
+ // ConnectionState.EstablishingConnection state would be set when we start establishing connection
252
+ // during container.connect() or reconnect because of an error.
253
+ case ConnectionState.EstablishingConnection:
254
+ assert(
255
+ this._connectionState === ConnectionState.Disconnected,
256
+ 0x6d2 /* connectivity transition to establishing connection */,
257
+ );
258
+ break;
259
+ case ConnectionState.CatchingUp:
260
+ assert(
261
+ this._connectionState === ConnectionState.EstablishingConnection,
262
+ 0x3e3 /* connectivity transitions */,
263
+ );
264
+ break;
265
+ default:
266
+ }
267
+ this._connectionState = value;
268
+ this.inputs.connectionStateChanged(value, oldState, reason);
269
+ }
270
+
271
+ private readonly transitionToConnectedState = () => {
272
+ // Defensive measure, we should always be in Connecting state when this is called.
273
+ const state = this.pimpl.connectionState;
274
+ assert(state === ConnectionState.Connected, 0x3e5 /* invariant broken */);
275
+ assert(this._connectionState === ConnectionState.CatchingUp, 0x3e6 /* invariant broken */);
276
+ this._connectionState = ConnectionState.Connected;
277
+ this.inputs.connectionStateChanged(ConnectionState.Connected, ConnectionState.CatchingUp, {
278
+ text: "caught up",
279
+ });
280
+ };
281
+ }
32
282
 
33
283
  /**
34
284
  * In the lifetime of a container, the connection will likely disconnect and reconnect periodically.
@@ -36,310 +286,430 @@ const JoinOpTimeoutMs = 45000;
36
286
  * sequenced or blocked by the server before emitting the new "connected" event and allowing runtime to resubmit ops.
37
287
  *
38
288
  * Each connection is assigned a clientId by the service, and the connection is book-ended by a Join and a Leave op
39
- * generated by the service. Due to the distributed nature of the ordering service, in the case of reconnect we cannot
289
+ * generated by the service. Due to the distributed nature of the Relay Service, in the case of reconnect we cannot
40
290
  * make any assumptions about ordering of operations between the old and new connections - i.e. new Join op could
41
291
  * be sequenced before old Leave op (and some acks from pending ops that were in flight when we disconnected).
42
292
  *
43
293
  * The job of this class is to encapsulate the transition period during reconnect, which is identified by
44
294
  * ConnectionState.CatchingUp. Specifically, before moving to Connected state with the new clientId, it ensures that:
45
- * (A) We process the Leave op for the previous clientId. This allows us to properly handle any acks from in-flight ops
46
- * that got sequenced with the old clientId (we'll recognize them as local ops). After the Leave op, any other
47
- * pending ops can safely be submitted with the new clientId without fear of duplication in the sequenced op stream.
48
- * (B) We process the Join op for the new clientId (identified when the underlying connection was first established),
49
- * indicating the service is ready to sequence ops sent with the new clientId.
50
295
  *
51
- * For (A) we give up waiting after some time (same timeout as server uses), and go ahead and transition to Connected.
52
- * For (B) we log telemetry if it takes too long, but still only transition to Connected when the Join op is processed
53
- * and we are added to the Quorum.
296
+ * a. We process the Leave op for the previous clientId. This allows us to properly handle any acks from in-flight ops
297
+ * that got sequenced with the old clientId (we'll recognize them as local ops). After the Leave op, any other
298
+ * pending ops can safely be submitted with the new clientId without fear of duplication in the sequenced op stream.
299
+ *
300
+ * b. We process the Join op for the new clientId (identified when the underlying connection was first established),
301
+ * indicating the service is ready to sequence ops sent with the new clientId.
302
+ *
303
+ * c. We process all ops known at the time the underlying connection was established (so we are "caught up")
304
+ *
305
+ * For (a) we give up waiting after some time (same timeout as server uses), and go ahead and transition to Connected.
306
+ *
307
+ * For (b) we log telemetry if it takes too long, but still only transition to Connected when the Join op/signal is
308
+ * processed.
309
+ *
310
+ * For (c) this is optional behavior, controlled by the parameters of receivedConnectEvent
54
311
  */
55
- export class ConnectionStateHandler {
56
- private _connectionState = ConnectionState.Disconnected;
57
- private _pendingClientId: string | undefined;
58
- private readonly prevClientLeftTimer: Timer;
59
- private readonly joinOpTimer: Timer;
60
-
61
- private waitEvent: PerformanceEvent | undefined;
62
-
63
- public get connectionState(): ConnectionState {
64
- return this._connectionState;
65
- }
66
-
67
- public get connected(): boolean {
68
- return this.connectionState === ConnectionState.Connected;
69
- }
70
-
71
- public get clientId(): string | undefined {
72
- return this._clientId;
73
- }
74
-
75
- public get pendingClientId(): string | undefined {
76
- return this._pendingClientId;
77
- }
78
-
79
- constructor(
80
- private readonly handler: IConnectionStateHandlerInputs,
81
- private readonly logger: ITelemetryLogger,
82
- private _clientId?: string,
83
- ) {
84
- this.prevClientLeftTimer = new Timer(
85
- // Default is 5 min for which we are going to wait for its own "leave" message. This is same as
86
- // the max time on server after which leave op is sent.
87
- this.handler.maxClientLeaveWaitTime ?? 300000,
88
- () => {
89
- assert(!this.connected,
90
- 0x2ac /* "Connected when timeout waiting for leave from previous session fired!" */);
91
- this.applyForConnectedState("timeout");
92
- },
93
- );
94
-
95
- // Based on recent data, it looks like majority of cases where we get stuck are due to really slow or
96
- // timing out ops fetches. So attempt recovery infrequently. Also fetch uses 30 second timeout, so
97
- // if retrying fixes the problem, we should not see these events.
98
- this.joinOpTimer = new Timer(
99
- JoinOpTimeoutMs,
100
- () => {
101
- // I've observed timer firing within couple ms from disconnect event, looks like
102
- // queued timer callback is not cancelled if timer is cancelled while callback sits in the queue.
103
- if (this.connectionState !== ConnectionState.CatchingUp) {
104
- return;
105
- }
106
- const quorumClients = this.handler.quorumClients();
107
- const details = {
108
- quorumInitialized: quorumClients !== undefined,
109
- hasPendingClientId: this.pendingClientId !== undefined,
110
- inQuorum: quorumClients?.getMember(this.pendingClientId ?? "") !== undefined,
111
- waitingForLeaveOp: this.waitingForLeaveOp,
112
- };
113
- this.handler.logConnectionIssue("NoJoinOp", details);
114
- },
115
- );
116
- }
117
-
118
- private startJoinOpTimer() {
119
- assert(!this.joinOpTimer.hasTimer, 0x234 /* "has joinOpTimer" */);
120
- this.joinOpTimer.start();
121
- }
122
-
123
- private stopJoinOpTimer() {
124
- assert(this.joinOpTimer.hasTimer, 0x235 /* "no joinOpTimer" */);
125
- this.joinOpTimer.clear();
126
- }
127
-
128
- private get waitingForLeaveOp() {
129
- return this.prevClientLeftTimer.hasTimer;
130
- }
131
-
132
- public dispose() {
133
- assert(!this.joinOpTimer.hasTimer, 0x2a5 /* "join timer" */);
134
- this.prevClientLeftTimer.clear();
135
- }
136
-
137
- public containerSaved() {
138
- // If we were waiting for moving to Connected state, then only apply for state change. Since the container
139
- // is now saved and we don't have any ops to roundtrip, we can clear the timer and apply for connected state.
140
- if (this.waitingForLeaveOp) {
141
- this.prevClientLeftTimer.clear();
142
- this.applyForConnectedState("containerSaved");
143
- }
144
- }
145
-
146
- private receivedAddMemberEvent(clientId: string) {
147
- // This is the only one that requires the pending client ID
148
- if (clientId === this.pendingClientId) {
149
- if (this.joinOpTimer.hasTimer) {
150
- this.stopJoinOpTimer();
151
- } else {
152
- // timer has already fired, meaning it took too long to get join on.
153
- // Record how long it actually took to recover.
154
- this.handler.logConnectionIssue("ReceivedJoinOp");
155
- }
156
- // Start the event in case we are waiting for leave or timeout.
157
- if (this.waitingForLeaveOp) {
158
- this.waitEvent = PerformanceEvent.start(this.logger, {
159
- eventName: "WaitBeforeClientLeave",
160
- details: JSON.stringify({
161
- waitOnClientId: this._clientId,
162
- hadOutstandingOps: this.handler.shouldClientJoinWrite(),
163
- }),
164
- });
165
- }
166
- this.applyForConnectedState("addMemberEvent");
167
- }
168
- }
169
-
170
- private applyForConnectedState(source: "removeMemberEvent" | "addMemberEvent" | "timeout" | "containerSaved") {
171
- const quorumClients = this.handler.quorumClients();
172
- assert(quorumClients !== undefined, 0x236 /* "In all cases it should be already installed" */);
173
-
174
- assert(this.waitingForLeaveOp === false ||
175
- (this.clientId !== undefined && quorumClients.getMember(this.clientId) !== undefined),
176
- 0x2e2 /* "Must only wait for leave message when clientId in quorum" */);
177
-
178
- // Move to connected state only if we are in Connecting state, we have seen our join op
179
- // and there is no timer running which means we are not waiting for previous client to leave
180
- // or timeout has occurred while doing so.
181
- if (this.pendingClientId !== this.clientId
182
- && this.pendingClientId !== undefined
183
- && quorumClients.getMember(this.pendingClientId) !== undefined
184
- && !this.waitingForLeaveOp
185
- ) {
186
- this.waitEvent?.end({ source });
187
- this.setConnectionState(ConnectionState.Connected);
188
- } else {
189
- // Adding this event temporarily so that we can get help debugging if something goes wrong.
190
- this.logger.sendTelemetryEvent({
191
- eventName: "connectedStateRejected",
192
- category: source === "timeout" ? "error" : "generic",
193
- details: JSON.stringify({
194
- source,
195
- pendingClientId: this.pendingClientId,
196
- clientId: this.clientId,
197
- waitingForLeaveOp: this.waitingForLeaveOp,
198
- inQuorum: quorumClients?.getMember(this.pendingClientId ?? "") !== undefined,
199
- }),
200
- });
201
- }
202
- }
203
-
204
- private receivedRemoveMemberEvent(clientId: string) {
205
- // If the client which has left was us, then finish the timer.
206
- if (this.clientId === clientId) {
207
- this.prevClientLeftTimer.clear();
208
- this.applyForConnectedState("removeMemberEvent");
209
- }
210
- }
211
-
212
- public receivedDisconnectEvent(reason: string) {
213
- if (this.joinOpTimer.hasTimer) {
214
- this.stopJoinOpTimer();
215
- }
216
- this.setConnectionState(ConnectionState.Disconnected, reason);
217
- }
218
-
219
- /**
220
- * The "connect" event indicates the connection to the Relay Service is live.
221
- * However, some additional conditions must be met before we can fully transition to
222
- * "Connected" state. This function handles that interim period, known as "Connecting" state.
223
- * @param connectionMode - Read or Write connection
224
- * @param details - Connection details returned from the ordering service
225
- */
226
- public receivedConnectEvent(
227
- connectionMode: ConnectionMode,
228
- details: IConnectionDetails,
229
- ) {
230
- const oldState = this._connectionState;
231
- this._connectionState = ConnectionState.CatchingUp;
232
-
233
- const writeConnection = connectionMode === "write";
234
- assert(writeConnection || !this.handler.shouldClientJoinWrite(),
235
- 0x30a /* shouldClientJoinWrite should imply this is a writeConnection */);
236
- assert(writeConnection || !this.waitingForLeaveOp,
237
- 0x2a6 /* "waitingForLeaveOp should imply writeConnection (we need to be ready to flush pending ops)" */);
238
-
239
- // Note that this may be undefined since the connection is established proactively on load
240
- // and the quorum may still be under initialization.
241
- const quorumClients: IQuorumClients | undefined = this.handler.quorumClients();
242
-
243
- // Stash the clientID to detect when transitioning from connecting (socket.io channel open) to connected
244
- // (have received the join message for the client ID)
245
- // This is especially important in the reconnect case. It's possible there could be outstanding
246
- // ops sent by this client, so we should keep the old client id until we see our own client's
247
- // join message. after we see the join message for our new connection with our new client id,
248
- // we know there can no longer be outstanding ops that we sent with the previous client id.
249
- this._pendingClientId = details.clientId;
250
-
251
- // IMPORTANT: Report telemetry after we set _pendingClientId, but before transitioning to Connected state
252
- this.handler.logConnectionStateChangeTelemetry(ConnectionState.CatchingUp, oldState);
253
-
254
- // For write connections, this pending clientId could be in the quorum already (i.e. join op already processed).
255
- // We are fetching ops from storage in parallel to connecting to Relay Service,
256
- // and given async processes, it's possible that we have already processed our own join message before
257
- // connection was fully established.
258
- // If quorumClients itself is undefined, we expect it will process the join op after it's initialized.
259
- const waitingForJoinOp = writeConnection && quorumClients?.getMember(this._pendingClientId) === undefined;
260
-
261
- if (waitingForJoinOp) {
262
- // Previous client left, and we are waiting for our own join op. When it is processed we'll join the quorum
263
- // and attempt to transition to Connected state via receivedAddMemberEvent.
264
- this.startJoinOpTimer();
265
- } else if (!this.waitingForLeaveOp) {
266
- // We're not waiting for Join or Leave op (if read-only connection those don't even apply),
267
- // go ahead and declare the state to be Connected!
268
- // If we are waiting for Leave op still, do nothing for now, we will transition to Connected later.
269
- this.setConnectionState(ConnectionState.Connected);
270
- }
271
- }
272
-
273
- private setConnectionState(value: ConnectionState.Disconnected, reason: string): void;
274
- private setConnectionState(value: ConnectionState.Connected): void;
275
- private setConnectionState(value: ConnectionState, reason?: string): void {
276
- if (this.connectionState === value) {
277
- // Already in the desired state - exit early
278
- this.logger.sendErrorEvent({ eventName: "setConnectionStateSame", value });
279
- return;
280
- }
281
-
282
- const oldState = this._connectionState;
283
- this._connectionState = value;
284
- const quorumClients = this.handler.quorumClients();
285
- let client: ILocalSequencedClient | undefined;
286
- if (this._clientId !== undefined) {
287
- client = quorumClients?.getMember(this._clientId);
288
- }
289
- if (value === ConnectionState.Connected) {
290
- assert(oldState === ConnectionState.CatchingUp,
291
- 0x1d8 /* "Should only transition from Connecting state" */);
292
- // Mark our old client should have left in the quorum if it's still there
293
- if (client !== undefined) {
294
- client.shouldHaveLeft = true;
295
- }
296
- this._clientId = this.pendingClientId;
297
- } else if (value === ConnectionState.Disconnected) {
298
- // Important as we process our own joinSession message through delta request
299
- this._pendingClientId = undefined;
300
- // Only wait for "leave" message if the connected client exists in the quorum because only the write
301
- // client will exist in the quorum and only for those clients we will receive "removeMember" event and
302
- // the client has some unacked ops.
303
- // Also server would not accept ops from read client. Also check if the timer is not already running as
304
- // we could receive "Disconnected" event multiple times without getting connected and in that case we
305
- // don't want to reset the timer as we still want to wait on original client which started this timer.
306
- if (client !== undefined
307
- && this.handler.shouldClientJoinWrite()
308
- && this.prevClientLeftTimer.hasTimer === false
309
- ) {
310
- this.prevClientLeftTimer.restart();
311
- } else {
312
- // Adding this event temporarily so that we can get help debugging if something goes wrong.
313
- this.logger.sendTelemetryEvent({
314
- eventName: "noWaitOnDisconnected",
315
- details: JSON.stringify({
316
- inQuorum: client !== undefined,
317
- waitingForLeaveOp: this.waitingForLeaveOp,
318
- hadOutstandingOps: this.handler.shouldClientJoinWrite(),
319
- }),
320
- });
321
- }
322
- }
323
-
324
- // Report transition before we propagate event across layers
325
- this.handler.logConnectionStateChangeTelemetry(this._connectionState, oldState, reason);
326
-
327
- // Propagate event across layers
328
- this.handler.connectionStateChanged();
329
- }
330
-
331
- public initProtocol(protocol: IProtocolHandler) {
332
- protocol.quorum.on("addMember", (clientId, _details) => {
333
- this.receivedAddMemberEvent(clientId);
334
- });
335
-
336
- protocol.quorum.on("removeMember", (clientId) => {
337
- this.receivedRemoveMemberEvent(clientId);
338
- });
339
-
340
- // if we have a clientId from a previous container we need to wait for its leave message
341
- if (this.clientId !== undefined && protocol.quorum.getMember(this.clientId) !== undefined) {
342
- this.prevClientLeftTimer.restart();
343
- }
344
- }
312
+ class ConnectionStateHandler implements IConnectionStateHandler {
313
+ private _connectionState = ConnectionState.Disconnected;
314
+ private _pendingClientId: string | undefined;
315
+
316
+ /**
317
+ * Tracks that we observe the "leave" op within the timeout for our previous clientId (see comment on ConnectionStateHandler class)
318
+ * ! This ensures we do not switch to a new clientId until we process all potential messages from old clientId
319
+ * ! i.e. We will always see the "leave" op for a client after we have seen all the ops it has sent
320
+ * ! This check helps prevent the same op from being resubmitted by the PendingStateManager upon reconnecting
321
+ */
322
+ private readonly prevClientLeftTimer: Timer;
323
+
324
+ /**
325
+ * Tracks that we observe our own "join" op within the timeout after receiving a "connected" event from the DeltaManager
326
+ */
327
+ private readonly joinOpTimer: Timer;
328
+
329
+ private protocol?: IProtocolHandler;
330
+ private connection?: IConnectionDetailsInternal;
331
+ private _clientId?: string;
332
+
333
+ /** Track how long we waited to see "leave" op for previous clientId */
334
+ private waitEvent: PerformanceEvent | undefined;
335
+
336
+ public get connectionState(): ConnectionState {
337
+ return this._connectionState;
338
+ }
339
+
340
+ private get clientId(): string | undefined {
341
+ return this._clientId;
342
+ }
343
+
344
+ public get pendingClientId(): string | undefined {
345
+ return this._pendingClientId;
346
+ }
347
+
348
+ constructor(
349
+ private readonly handler: IConnectionStateHandlerInputs,
350
+ private readonly readClientsWaitForJoinSignal: boolean,
351
+ clientIdFromPausedSession?: string,
352
+ ) {
353
+ this._clientId = clientIdFromPausedSession;
354
+ this.prevClientLeftTimer = new Timer(
355
+ // Default is 5 min for which we are going to wait for its own "leave" message. This is same as
356
+ // the max time on server after which leave op is sent.
357
+ this.handler.maxClientLeaveWaitTime ?? 300000,
358
+ () => {
359
+ assert(
360
+ this.connectionState !== ConnectionState.Connected,
361
+ 0x2ac /* "Connected when timeout waiting for leave from previous session fired!" */,
362
+ );
363
+ this.applyForConnectedState("timeout");
364
+ },
365
+ );
366
+
367
+ this.joinOpTimer = new Timer(
368
+ 0, // default value is not used - startJoinOpTimer() explicitly provides timeout
369
+ () => {
370
+ // I've observed timer firing within couple ms from disconnect event, looks like
371
+ // queued timer callback is not cancelled if timer is cancelled while callback sits in the queue.
372
+ if (this.connectionState !== ConnectionState.CatchingUp) {
373
+ return;
374
+ }
375
+ const details = {
376
+ protocolInitialized: this.protocol !== undefined,
377
+ pendingClientId: this.pendingClientId,
378
+ clientJoined: this.hasMember(this.pendingClientId),
379
+ waitingForLeaveOp: this.waitingForLeaveOp,
380
+ };
381
+ this.handler.logConnectionIssue("NoJoinOp", "error", details);
382
+ },
383
+ );
384
+ }
385
+
386
+ private startJoinOpTimer() {
387
+ assert(!this.joinOpTimer.hasTimer, 0x234 /* "has joinOpTimer" */);
388
+ assert(this.connection !== undefined, 0x4b3 /* have connection */);
389
+ this.joinOpTimer.start(
390
+ this.connection.mode === "write" ? JoinOpTimeoutMs : JoinSignalTimeoutMs,
391
+ );
392
+ }
393
+
394
+ private stopJoinOpTimer() {
395
+ assert(this.joinOpTimer.hasTimer, 0x235 /* "no joinOpTimer" */);
396
+ this.joinOpTimer.clear();
397
+ }
398
+
399
+ private get waitingForLeaveOp() {
400
+ return this.prevClientLeftTimer.hasTimer;
401
+ }
402
+
403
+ public dispose() {
404
+ assert(!this.joinOpTimer.hasTimer, 0x2a5 /* "join timer" */);
405
+ this.prevClientLeftTimer.clear();
406
+ }
407
+
408
+ public containerSaved() {
409
+ // If we were waiting for moving to Connected state, then only apply for state change. Since the container
410
+ // is now saved and we don't have any ops to roundtrip, we can clear the timer and apply for connected state.
411
+ if (this.waitingForLeaveOp) {
412
+ this.prevClientLeftTimer.clear();
413
+ this.applyForConnectedState("containerSaved");
414
+ }
415
+ }
416
+
417
+ private receivedAddMemberEvent(clientId: string) {
418
+ // This is the only one that requires the pending client ID
419
+ if (clientId === this.pendingClientId) {
420
+ if (this.joinOpTimer.hasTimer) {
421
+ this.stopJoinOpTimer();
422
+ } else if (this.shouldWaitForJoinSignal()) {
423
+ // timer has already fired, meaning it took too long to get join op/signal.
424
+ // Record how long it actually took to recover.
425
+ // This is generic event, as it by itself is not an error.
426
+ // We also have a case where NoJoinOp happens during container boot (we do not report it as error in such case),
427
+ // if this log statement happens after boot - we do not want to consider it error case.
428
+ this.handler.logConnectionIssue("ReceivedJoinOp", "generic");
429
+ }
430
+ // Start the event in case we are waiting for leave or timeout.
431
+ if (this.waitingForLeaveOp) {
432
+ this.waitEvent = PerformanceEvent.start(this.handler.logger, {
433
+ eventName: "WaitBeforeClientLeave",
434
+ details: JSON.stringify({
435
+ waitOnClientId: this._clientId,
436
+ hadOutstandingOps: this.handler.shouldClientJoinWrite(),
437
+ }),
438
+ });
439
+ }
440
+ this.applyForConnectedState("addMemberEvent");
441
+ } else if (clientId === this.clientId) {
442
+ // If we see our clientId and it's not also our pending ID, it's our own join op
443
+ // being replayed, so start the timer in case our previous client is still in quorum
444
+ assert(
445
+ !this.waitingForLeaveOp,
446
+ 0x5d2 /* Unexpected join op with current clientId while waiting */,
447
+ );
448
+ assert(
449
+ this.connectionState !== ConnectionState.Connected,
450
+ 0x5d3 /* Unexpected join op with current clientId while connected */,
451
+ );
452
+ this.prevClientLeftTimer.restart();
453
+ }
454
+ }
455
+
456
+ private applyForConnectedState(
457
+ source: "removeMemberEvent" | "addMemberEvent" | "timeout" | "containerSaved",
458
+ ) {
459
+ assert(
460
+ this.protocol !== undefined,
461
+ 0x236 /* "In all cases it should be already installed" */,
462
+ );
463
+
464
+ assert(
465
+ !this.waitingForLeaveOp || this.hasMember(this.clientId),
466
+ 0x2e2 /* "Must only wait for leave message when clientId in quorum" */,
467
+ );
468
+
469
+ // Move to connected state only if:
470
+ // 1. We have seen our own "join" op (i.e. for this.pendingClientId)
471
+ // 2. There is no "leave" timer running, meaning this is our first connection or the previous client has left (via this.prevClientLeftTimer)
472
+ if (
473
+ this.pendingClientId !== this.clientId &&
474
+ this.hasMember(this.pendingClientId) &&
475
+ !this.waitingForLeaveOp
476
+ ) {
477
+ this.waitEvent?.end({ source });
478
+ this.setConnectionState(ConnectionState.Connected);
479
+ } else {
480
+ // Adding this event temporarily so that we can get help debugging if something goes wrong.
481
+ // We may not see any ops due to being disconnected all that time - that's not an error!
482
+ const error =
483
+ source === "timeout" && this.connectionState !== ConnectionState.Disconnected;
484
+ this.handler.logger.sendTelemetryEvent({
485
+ eventName: "connectedStateRejected",
486
+ category: error ? "error" : "generic",
487
+ details: JSON.stringify({
488
+ source,
489
+ pendingClientId: this.pendingClientId,
490
+ clientId: this.clientId,
491
+ waitingForLeaveOp: this.waitingForLeaveOp,
492
+ clientJoined: this.hasMember(this.pendingClientId),
493
+ }),
494
+ });
495
+ }
496
+ }
497
+
498
+ private receivedRemoveMemberEvent(clientId: string) {
499
+ // If the client which has left was us, then finish the timer.
500
+ if (this.clientId === clientId) {
501
+ this.prevClientLeftTimer.clear();
502
+ this.applyForConnectedState("removeMemberEvent");
503
+ }
504
+ }
505
+
506
+ public receivedDisconnectEvent(reason: IConnectionStateChangeReason<IAnyDriverError>) {
507
+ this.connection = undefined;
508
+ this.setConnectionState(ConnectionState.Disconnected, reason);
509
+ }
510
+
511
+ public cancelEstablishingConnection(reason: IConnectionStateChangeReason) {
512
+ assert(
513
+ this._connectionState === ConnectionState.EstablishingConnection,
514
+ 0x6d3 /* Connection state should be EstablishingConnection */,
515
+ );
516
+ assert(this.connection === undefined, 0x6d4 /* No connetion should be present */);
517
+ const oldState = this._connectionState;
518
+ this._connectionState = ConnectionState.Disconnected;
519
+ this.handler.connectionStateChanged(ConnectionState.Disconnected, oldState, reason);
520
+ }
521
+
522
+ public establishingConnection(reason: IConnectionStateChangeReason) {
523
+ const oldState = this._connectionState;
524
+ this._connectionState = ConnectionState.EstablishingConnection;
525
+ this.handler.connectionStateChanged(ConnectionState.EstablishingConnection, oldState, {
526
+ text: `Establishing Connection due to ${reason.text}`,
527
+ error: reason.error,
528
+ });
529
+ }
530
+
531
+ private shouldWaitForJoinSignal() {
532
+ assert(
533
+ this.connection !== undefined,
534
+ 0x4b4 /* all callers call here with active connection */,
535
+ );
536
+ return this.connection.mode === "write" || this.readClientsWaitForJoinSignal;
537
+ }
538
+
539
+ /**
540
+ * The "connect" event indicates the connection to the Relay Service is live.
541
+ * However, some additional conditions must be met before we can fully transition to
542
+ * "Connected" state. This function handles that interim period, known as "Connecting" state.
543
+ * @param details - Connection details returned from the Relay Service
544
+ * @param deltaManager - DeltaManager to be used for delaying Connected transition until caught up.
545
+ * If it's undefined, then don't delay and transition to Connected as soon as Leave/Join op are accounted for
546
+ */
547
+ public receivedConnectEvent(details: IConnectionDetailsInternal) {
548
+ this.connection = details;
549
+
550
+ const oldState = this._connectionState;
551
+ this._connectionState = ConnectionState.CatchingUp;
552
+
553
+ // The following checks are wrong. They are only valid if user has write access to a file.
554
+ // If user lost such access mid-session, user will not be able to get "write" connection.
555
+ //
556
+ // const writeConnection = details.mode === "write";
557
+ // assert(!this.handler.shouldClientJoinWrite() || writeConnection,
558
+ // 0x30a /* shouldClientJoinWrite should imply this is a writeConnection */);
559
+ // assert(!this.waitingForLeaveOp || writeConnection,
560
+ // 0x2a6 /* "waitingForLeaveOp should imply writeConnection (we need to be ready to flush pending ops)" */);
561
+
562
+ // Stash the clientID to detect when transitioning from connecting (socket.io channel open) to connected
563
+ // (have received the join message for the client ID)
564
+ // This is especially important in the reconnect case. It's possible there could be outstanding
565
+ // ops sent by this client, so we should keep the old client id until we see our own client's
566
+ // join message. after we see the join message for our new connection with our new client id,
567
+ // we know there can no longer be outstanding ops that we sent with the previous client id.
568
+ this._pendingClientId = details.clientId;
569
+
570
+ // IMPORTANT: Report telemetry after we set _pendingClientId, but before transitioning to Connected state
571
+ this.handler.connectionStateChanged(ConnectionState.CatchingUp, oldState, details.reason);
572
+
573
+ // Check if we need to wait for join op/signal, and if we need to wait for leave op from previous connection.
574
+ // Pending clientId could have joined already (i.e. join op/signal already processed):
575
+ // We are fetching ops from storage in parallel to connecting to Relay Service,
576
+ // and given async processes, it's possible that we have already processed our own join message before
577
+ // connection was fully established.
578
+ if (!this.hasMember(this._pendingClientId) && this.shouldWaitForJoinSignal()) {
579
+ // We are waiting for our own join op / signal. When it is processed
580
+ // we'll attempt to transition to Connected state via receivedAddMemberEvent() flow.
581
+ this.startJoinOpTimer();
582
+ } else if (!this.waitingForLeaveOp) {
583
+ // We're not waiting for Join or Leave op (if read-only connection those don't even apply),
584
+ // go ahead and declare the state to be Connected!
585
+ // If we are waiting for Leave op still, do nothing for now, we will transition to Connected later.
586
+ this.setConnectionState(ConnectionState.Connected);
587
+ }
588
+ // else - We are waiting for Leave op still, do nothing for now, we will transition to Connected later
589
+ }
590
+
591
+ private setConnectionState(
592
+ value: ConnectionState.Disconnected,
593
+ reason: IConnectionStateChangeReason,
594
+ ): void;
595
+ private setConnectionState(value: ConnectionState.Connected): void;
596
+ private setConnectionState(
597
+ value: ConnectionState.Disconnected | ConnectionState.Connected,
598
+ reason?: IConnectionStateChangeReason,
599
+ ): void {
600
+ if (this.connectionState === value) {
601
+ // Already in the desired state - exit early
602
+ this.handler.logger.sendErrorEvent({ eventName: "setConnectionStateSame", value });
603
+ return;
604
+ }
605
+
606
+ const oldState = this._connectionState;
607
+ this._connectionState = value;
608
+
609
+ // This is the only place in code that deals with quorum. The rest works with audience
610
+ // The code below ensures that we do not send ops until we know that old "write" client's disconnect
611
+ // produced (and sequenced) leave op
612
+ const currentClientInQuorum =
613
+ this._clientId !== undefined &&
614
+ this.protocol?.quorum?.getMember(this._clientId) !== undefined;
615
+ if (value === ConnectionState.Connected) {
616
+ assert(
617
+ oldState === ConnectionState.CatchingUp,
618
+ 0x1d8 /* "Should only transition from Connecting state" */,
619
+ );
620
+ // Mark our old client should have left in the quorum if it's still there
621
+ if (currentClientInQuorum) {
622
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
623
+ this.handler.clientShouldHaveLeft(this._clientId!);
624
+ }
625
+ this._clientId = this.pendingClientId;
626
+ } else if (value === ConnectionState.Disconnected) {
627
+ // Clear pending state immediately to prepare for reconnect
628
+ this._pendingClientId = undefined;
629
+
630
+ if (this.joinOpTimer.hasTimer) {
631
+ this.stopJoinOpTimer();
632
+ }
633
+
634
+ // Only wait for "leave" message if the connected client exists in the quorum and had some non-acked ops
635
+ // Also check if the timer is not already running as
636
+ // we could receive "Disconnected" event multiple times without getting connected and in that case we
637
+ // don't want to reset the timer as we still want to wait on original client which started this timer.
638
+ if (
639
+ currentClientInQuorum &&
640
+ this.handler.shouldClientJoinWrite() &&
641
+ !this.waitingForLeaveOp // same as !this.prevClientLeftTimer.hasTimer
642
+ ) {
643
+ this.prevClientLeftTimer.restart();
644
+ } else {
645
+ // Adding this event temporarily so that we can get help debugging if something goes wrong.
646
+ this.handler.logger.sendTelemetryEvent({
647
+ eventName: "noWaitOnDisconnected",
648
+ details: JSON.stringify({
649
+ clientId: this._clientId,
650
+ inQuorum: currentClientInQuorum,
651
+ waitingForLeaveOp: this.waitingForLeaveOp,
652
+ hadOutstandingOps: this.handler.shouldClientJoinWrite(),
653
+ }),
654
+ });
655
+ }
656
+ }
657
+
658
+ // Report transition before we propagate event across layers
659
+ this.handler.connectionStateChanged(this._connectionState, oldState, reason);
660
+ }
661
+
662
+ // Helper method to switch between quorum and audience.
663
+ // Old design was checking only quorum for "write" clients.
664
+ // Latest change checks audience for all types of connections.
665
+ protected get membership(): IMembership | undefined {
666
+ // We could always use audience here, and in practice it will probably be correct.
667
+ // (including case when this.readClientsWaitForJoinSignal === false).
668
+ // But only if it's superset of quorum, i.e. when filtered to "write" clients, they are always identical!
669
+ // It's safer to assume that we have bugs and engaging kill-bit switch should bring us back to well-known
670
+ // and tested state!
671
+ return this.readClientsWaitForJoinSignal ? this.protocol?.audience : this.protocol?.quorum;
672
+ }
673
+
674
+ public initProtocol(protocol: IProtocolHandler) {
675
+ this.protocol = protocol;
676
+
677
+ this.membership?.on("addMember", (clientId, details) => {
678
+ assert(
679
+ (details as IClient).mode === "read" ||
680
+ protocol.quorum.getMember(clientId) !== undefined,
681
+ 0x4b5 /* Audience is subset of quorum */,
682
+ );
683
+ this.receivedAddMemberEvent(clientId);
684
+ });
685
+
686
+ this.membership?.on("removeMember", (clientId) => {
687
+ assert(
688
+ protocol.quorum.getMember(clientId) === undefined,
689
+ 0x4b6 /* Audience is subset of quorum */,
690
+ );
691
+ this.receivedRemoveMemberEvent(clientId);
692
+ });
693
+
694
+ /* There is a tiny tiny race possible, where these events happen in this order:
695
+ 1. A connection is established (no "cached" mode is used, so it happens in parallel / faster than other steps)
696
+ 2. Some other client produces a summary
697
+ 3. We get "lucky" and load from that summary as our initial snapshot
698
+ 4. ConnectionStateHandler.initProtocol is called, "self" is already in the quorum.
699
+ We could avoid this sequence (and delete test case for it) if we move connection lower in Container.load()
700
+ */
701
+ if (this.hasMember(this.pendingClientId)) {
702
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
703
+ this.receivedAddMemberEvent(this.pendingClientId!);
704
+ }
705
+
706
+ // if we have a clientId from a previous container we need to wait for its leave message
707
+ if (this.clientId !== undefined && this.hasMember(this.clientId)) {
708
+ this.prevClientLeftTimer.restart();
709
+ }
710
+ }
711
+
712
+ protected hasMember(clientId?: string) {
713
+ return this.membership?.getMember(clientId ?? "") !== undefined;
714
+ }
345
715
  }