@effect/cluster 0.50.6 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/RunnerStorage/package.json +6 -0
  2. package/SqlRunnerStorage/package.json +6 -0
  3. package/dist/cjs/ClusterError.js +2 -24
  4. package/dist/cjs/ClusterError.js.map +1 -1
  5. package/dist/cjs/ClusterMetrics.js +13 -15
  6. package/dist/cjs/ClusterMetrics.js.map +1 -1
  7. package/dist/cjs/ClusterSchema.js +17 -2
  8. package/dist/cjs/ClusterSchema.js.map +1 -1
  9. package/dist/cjs/ClusterWorkflowEngine.js +50 -83
  10. package/dist/cjs/ClusterWorkflowEngine.js.map +1 -1
  11. package/dist/cjs/Entity.js +1 -13
  12. package/dist/cjs/Entity.js.map +1 -1
  13. package/dist/cjs/EntityAddress.js +9 -1
  14. package/dist/cjs/EntityAddress.js.map +1 -1
  15. package/dist/cjs/EntityId.js +7 -1
  16. package/dist/cjs/EntityId.js.map +1 -1
  17. package/dist/cjs/EntityProxy.js +1 -1
  18. package/dist/cjs/EntityProxy.js.map +1 -1
  19. package/dist/cjs/HttpRunner.js +69 -43
  20. package/dist/cjs/HttpRunner.js.map +1 -1
  21. package/dist/cjs/MessageStorage.js +64 -16
  22. package/dist/cjs/MessageStorage.js.map +1 -1
  23. package/dist/cjs/Runner.js +3 -3
  24. package/dist/cjs/Runner.js.map +1 -1
  25. package/dist/cjs/RunnerAddress.js +7 -0
  26. package/dist/cjs/RunnerAddress.js.map +1 -1
  27. package/dist/cjs/RunnerHealth.js +91 -32
  28. package/dist/cjs/RunnerHealth.js.map +1 -1
  29. package/dist/cjs/RunnerServer.js +38 -24
  30. package/dist/cjs/RunnerServer.js.map +1 -1
  31. package/dist/cjs/RunnerStorage.js +100 -0
  32. package/dist/cjs/RunnerStorage.js.map +1 -0
  33. package/dist/cjs/Runners.js +18 -22
  34. package/dist/cjs/Runners.js.map +1 -1
  35. package/dist/cjs/ShardId.js +17 -7
  36. package/dist/cjs/ShardId.js.map +1 -1
  37. package/dist/cjs/Sharding.js +444 -320
  38. package/dist/cjs/Sharding.js.map +1 -1
  39. package/dist/cjs/ShardingConfig.js +10 -14
  40. package/dist/cjs/ShardingConfig.js.map +1 -1
  41. package/dist/cjs/Snowflake.js +1 -1
  42. package/dist/cjs/SocketRunner.js +1 -1
  43. package/dist/cjs/SocketRunner.js.map +1 -1
  44. package/dist/cjs/SqlMessageStorage.js +22 -28
  45. package/dist/cjs/SqlMessageStorage.js.map +1 -1
  46. package/dist/cjs/SqlRunnerStorage.js +375 -0
  47. package/dist/cjs/SqlRunnerStorage.js.map +1 -0
  48. package/dist/cjs/index.js +5 -15
  49. package/dist/cjs/internal/entityManager.js +42 -23
  50. package/dist/cjs/internal/entityManager.js.map +1 -1
  51. package/dist/dts/ClusterError.d.ts +0 -22
  52. package/dist/dts/ClusterError.d.ts.map +1 -1
  53. package/dist/dts/ClusterMetrics.d.ts +4 -14
  54. package/dist/dts/ClusterMetrics.d.ts.map +1 -1
  55. package/dist/dts/ClusterSchema.d.ts +9 -1
  56. package/dist/dts/ClusterSchema.d.ts.map +1 -1
  57. package/dist/dts/ClusterWorkflowEngine.d.ts.map +1 -1
  58. package/dist/dts/Entity.d.ts +3 -14
  59. package/dist/dts/Entity.d.ts.map +1 -1
  60. package/dist/dts/EntityAddress.d.ts +11 -0
  61. package/dist/dts/EntityAddress.d.ts.map +1 -1
  62. package/dist/dts/EntityId.d.ts +5 -0
  63. package/dist/dts/EntityId.d.ts.map +1 -1
  64. package/dist/dts/EntityProxy.d.ts +5 -6
  65. package/dist/dts/EntityProxy.d.ts.map +1 -1
  66. package/dist/dts/HttpRunner.d.ts +48 -25
  67. package/dist/dts/HttpRunner.d.ts.map +1 -1
  68. package/dist/dts/MessageStorage.d.ts +13 -5
  69. package/dist/dts/MessageStorage.d.ts.map +1 -1
  70. package/dist/dts/Runner.d.ts +4 -4
  71. package/dist/dts/Runner.d.ts.map +1 -1
  72. package/dist/dts/RunnerAddress.d.ts +5 -0
  73. package/dist/dts/RunnerAddress.d.ts.map +1 -1
  74. package/dist/dts/RunnerHealth.d.ts +24 -16
  75. package/dist/dts/RunnerHealth.d.ts.map +1 -1
  76. package/dist/dts/RunnerServer.d.ts +5 -4
  77. package/dist/dts/RunnerServer.d.ts.map +1 -1
  78. package/dist/dts/{ShardStorage.d.ts → RunnerStorage.d.ts} +41 -54
  79. package/dist/dts/RunnerStorage.d.ts.map +1 -0
  80. package/dist/dts/Runners.d.ts +15 -11
  81. package/dist/dts/Runners.d.ts.map +1 -1
  82. package/dist/dts/ShardId.d.ts +1 -1
  83. package/dist/dts/ShardId.d.ts.map +1 -1
  84. package/dist/dts/Sharding.d.ts +20 -10
  85. package/dist/dts/Sharding.d.ts.map +1 -1
  86. package/dist/dts/ShardingConfig.d.ts +40 -14
  87. package/dist/dts/ShardingConfig.d.ts.map +1 -1
  88. package/dist/dts/SocketRunner.d.ts +4 -3
  89. package/dist/dts/SocketRunner.d.ts.map +1 -1
  90. package/dist/dts/SqlMessageStorage.d.ts +2 -3
  91. package/dist/dts/SqlMessageStorage.d.ts.map +1 -1
  92. package/dist/dts/SqlRunnerStorage.d.ts +40 -0
  93. package/dist/dts/SqlRunnerStorage.d.ts.map +1 -0
  94. package/dist/dts/index.d.ts +4 -24
  95. package/dist/dts/index.d.ts.map +1 -1
  96. package/dist/esm/ClusterError.js +0 -21
  97. package/dist/esm/ClusterError.js.map +1 -1
  98. package/dist/esm/ClusterMetrics.js +12 -14
  99. package/dist/esm/ClusterMetrics.js.map +1 -1
  100. package/dist/esm/ClusterSchema.js +17 -2
  101. package/dist/esm/ClusterSchema.js.map +1 -1
  102. package/dist/esm/ClusterWorkflowEngine.js +50 -83
  103. package/dist/esm/ClusterWorkflowEngine.js.map +1 -1
  104. package/dist/esm/Entity.js +0 -12
  105. package/dist/esm/Entity.js.map +1 -1
  106. package/dist/esm/EntityAddress.js +7 -0
  107. package/dist/esm/EntityAddress.js.map +1 -1
  108. package/dist/esm/EntityId.js +5 -0
  109. package/dist/esm/EntityId.js.map +1 -1
  110. package/dist/esm/EntityProxy.js +2 -2
  111. package/dist/esm/EntityProxy.js.map +1 -1
  112. package/dist/esm/HttpRunner.js +62 -39
  113. package/dist/esm/HttpRunner.js.map +1 -1
  114. package/dist/esm/MessageStorage.js +65 -17
  115. package/dist/esm/MessageStorage.js.map +1 -1
  116. package/dist/esm/Runner.js +3 -3
  117. package/dist/esm/Runner.js.map +1 -1
  118. package/dist/esm/RunnerAddress.js +7 -0
  119. package/dist/esm/RunnerAddress.js.map +1 -1
  120. package/dist/esm/RunnerHealth.js +88 -30
  121. package/dist/esm/RunnerHealth.js.map +1 -1
  122. package/dist/esm/RunnerServer.js +38 -24
  123. package/dist/esm/RunnerServer.js.map +1 -1
  124. package/dist/esm/RunnerStorage.js +90 -0
  125. package/dist/esm/RunnerStorage.js.map +1 -0
  126. package/dist/esm/Runners.js +19 -23
  127. package/dist/esm/Runners.js.map +1 -1
  128. package/dist/esm/ShardId.js +16 -6
  129. package/dist/esm/ShardId.js.map +1 -1
  130. package/dist/esm/Sharding.js +447 -323
  131. package/dist/esm/Sharding.js.map +1 -1
  132. package/dist/esm/ShardingConfig.js +10 -14
  133. package/dist/esm/ShardingConfig.js.map +1 -1
  134. package/dist/esm/Snowflake.js +1 -1
  135. package/dist/esm/SocketRunner.js +1 -1
  136. package/dist/esm/SocketRunner.js.map +1 -1
  137. package/dist/esm/SqlMessageStorage.js +22 -28
  138. package/dist/esm/SqlMessageStorage.js.map +1 -1
  139. package/dist/esm/SqlRunnerStorage.js +366 -0
  140. package/dist/esm/SqlRunnerStorage.js.map +1 -0
  141. package/dist/esm/index.js +4 -24
  142. package/dist/esm/index.js.map +1 -1
  143. package/dist/esm/internal/entityManager.js +41 -22
  144. package/dist/esm/internal/entityManager.js.map +1 -1
  145. package/package.json +20 -60
  146. package/src/ClusterError.ts +0 -24
  147. package/src/ClusterMetrics.ts +12 -16
  148. package/src/ClusterSchema.ts +17 -2
  149. package/src/ClusterWorkflowEngine.ts +48 -80
  150. package/src/Entity.ts +3 -21
  151. package/src/EntityAddress.ts +10 -0
  152. package/src/EntityId.ts +6 -0
  153. package/src/EntityProxy.ts +10 -10
  154. package/src/HttpRunner.ts +132 -67
  155. package/src/MessageStorage.ts +89 -24
  156. package/src/Runner.ts +4 -4
  157. package/src/RunnerAddress.ts +8 -0
  158. package/src/RunnerHealth.ts +119 -56
  159. package/src/RunnerServer.ts +64 -47
  160. package/src/RunnerStorage.ts +218 -0
  161. package/src/Runners.ts +32 -45
  162. package/src/ShardId.ts +14 -3
  163. package/src/Sharding.ts +561 -417
  164. package/src/ShardingConfig.ts +39 -31
  165. package/src/Snowflake.ts +1 -1
  166. package/src/SocketRunner.ts +6 -4
  167. package/src/SqlMessageStorage.ts +28 -30
  168. package/src/SqlRunnerStorage.ts +537 -0
  169. package/src/index.ts +4 -29
  170. package/src/internal/entityManager.ts +45 -29
  171. package/HttpCommon/package.json +0 -6
  172. package/HttpShardManager/package.json +0 -6
  173. package/ShardManager/package.json +0 -6
  174. package/ShardStorage/package.json +0 -6
  175. package/SocketShardManager/package.json +0 -6
  176. package/SqlShardStorage/package.json +0 -6
  177. package/SynchronizedClock/package.json +0 -6
  178. package/dist/cjs/HttpCommon.js +0 -48
  179. package/dist/cjs/HttpCommon.js.map +0 -1
  180. package/dist/cjs/HttpShardManager.js +0 -139
  181. package/dist/cjs/HttpShardManager.js.map +0 -1
  182. package/dist/cjs/ShardManager.js +0 -549
  183. package/dist/cjs/ShardManager.js.map +0 -1
  184. package/dist/cjs/ShardStorage.js +0 -151
  185. package/dist/cjs/ShardStorage.js.map +0 -1
  186. package/dist/cjs/SocketShardManager.js +0 -32
  187. package/dist/cjs/SocketShardManager.js.map +0 -1
  188. package/dist/cjs/SqlShardStorage.js +0 -253
  189. package/dist/cjs/SqlShardStorage.js.map +0 -1
  190. package/dist/cjs/SynchronizedClock.js +0 -65
  191. package/dist/cjs/SynchronizedClock.js.map +0 -1
  192. package/dist/cjs/internal/shardManager.js +0 -353
  193. package/dist/cjs/internal/shardManager.js.map +0 -1
  194. package/dist/dts/HttpCommon.d.ts +0 -25
  195. package/dist/dts/HttpCommon.d.ts.map +0 -1
  196. package/dist/dts/HttpShardManager.d.ts +0 -119
  197. package/dist/dts/HttpShardManager.d.ts.map +0 -1
  198. package/dist/dts/ShardManager.d.ts +0 -459
  199. package/dist/dts/ShardManager.d.ts.map +0 -1
  200. package/dist/dts/ShardStorage.d.ts.map +0 -1
  201. package/dist/dts/SocketShardManager.d.ts +0 -17
  202. package/dist/dts/SocketShardManager.d.ts.map +0 -1
  203. package/dist/dts/SqlShardStorage.d.ts +0 -38
  204. package/dist/dts/SqlShardStorage.d.ts.map +0 -1
  205. package/dist/dts/SynchronizedClock.d.ts +0 -19
  206. package/dist/dts/SynchronizedClock.d.ts.map +0 -1
  207. package/dist/dts/internal/shardManager.d.ts +0 -2
  208. package/dist/dts/internal/shardManager.d.ts.map +0 -1
  209. package/dist/esm/HttpCommon.js +0 -38
  210. package/dist/esm/HttpCommon.js.map +0 -1
  211. package/dist/esm/HttpShardManager.js +0 -128
  212. package/dist/esm/HttpShardManager.js.map +0 -1
  213. package/dist/esm/ShardManager.js +0 -535
  214. package/dist/esm/ShardManager.js.map +0 -1
  215. package/dist/esm/ShardStorage.js +0 -141
  216. package/dist/esm/ShardStorage.js.map +0 -1
  217. package/dist/esm/SocketShardManager.js +0 -24
  218. package/dist/esm/SocketShardManager.js.map +0 -1
  219. package/dist/esm/SqlShardStorage.js +0 -244
  220. package/dist/esm/SqlShardStorage.js.map +0 -1
  221. package/dist/esm/SynchronizedClock.js +0 -57
  222. package/dist/esm/SynchronizedClock.js.map +0 -1
  223. package/dist/esm/internal/shardManager.js +0 -342
  224. package/dist/esm/internal/shardManager.js.map +0 -1
  225. package/src/HttpCommon.ts +0 -73
  226. package/src/HttpShardManager.ts +0 -273
  227. package/src/ShardManager.ts +0 -823
  228. package/src/ShardStorage.ts +0 -297
  229. package/src/SocketShardManager.ts +0 -48
  230. package/src/SqlShardStorage.ts +0 -329
  231. package/src/SynchronizedClock.ts +0 -82
  232. package/src/internal/shardManager.ts +0 -412
package/src/Sharding.ts CHANGED
@@ -7,41 +7,36 @@ import { type FromServer, RequestId } from "@effect/rpc/RpcMessage"
7
7
  import * as Arr from "effect/Array"
8
8
  import * as Cause from "effect/Cause"
9
9
  import * as Context from "effect/Context"
10
- import * as Deferred from "effect/Deferred"
11
10
  import type { DurationInput } from "effect/Duration"
12
11
  import * as Effect from "effect/Effect"
12
+ import * as Either from "effect/Either"
13
13
  import * as Equal from "effect/Equal"
14
- import * as Exit from "effect/Exit"
15
14
  import * as Fiber from "effect/Fiber"
16
- import * as FiberHandle from "effect/FiberHandle"
17
15
  import * as FiberMap from "effect/FiberMap"
18
16
  import * as FiberRef from "effect/FiberRef"
17
+ import * as FiberSet from "effect/FiberSet"
19
18
  import { constant } from "effect/Function"
20
19
  import * as HashMap from "effect/HashMap"
21
- import * as Iterable from "effect/Iterable"
20
+ import * as HashRing from "effect/HashRing"
22
21
  import * as Layer from "effect/Layer"
23
22
  import * as MutableHashMap from "effect/MutableHashMap"
24
23
  import * as MutableHashSet from "effect/MutableHashSet"
25
24
  import * as MutableRef from "effect/MutableRef"
26
25
  import * as Option from "effect/Option"
27
- import * as Predicate from "effect/Predicate"
28
26
  import * as PubSub from "effect/PubSub"
29
27
  import * as Schedule from "effect/Schedule"
30
28
  import * as Scope from "effect/Scope"
31
29
  import * as Stream from "effect/Stream"
32
30
  import type { MailboxFull, PersistenceError } from "./ClusterError.js"
33
- import {
34
- AlreadyProcessingMessage,
35
- EntityNotAssignedToRunner,
36
- EntityNotManagedByRunner,
37
- RunnerUnavailable
38
- } from "./ClusterError.js"
39
- import * as ClusterError from "./ClusterError.js"
31
+ import { AlreadyProcessingMessage, EntityNotAssignedToRunner } from "./ClusterError.js"
32
+ import * as ClusterMetrics from "./ClusterMetrics.js"
40
33
  import { Persisted, Uninterruptible } from "./ClusterSchema.js"
41
34
  import * as ClusterSchema from "./ClusterSchema.js"
42
35
  import type { CurrentAddress, CurrentRunnerAddress, Entity, HandlersFrom } from "./Entity.js"
43
- import { EntityAddress } from "./EntityAddress.js"
44
- import { EntityId } from "./EntityId.js"
36
+ import type { EntityAddress } from "./EntityAddress.js"
37
+ import { make as makeEntityAddress } from "./EntityAddress.js"
38
+ import type { EntityId } from "./EntityId.js"
39
+ import { make as makeEntityId } from "./EntityId.js"
45
40
  import * as Envelope from "./Envelope.js"
46
41
  import * as EntityManager from "./internal/entityManager.js"
47
42
  import { EntityReaper } from "./internal/entityReaper.js"
@@ -51,13 +46,15 @@ import { ResourceMap } from "./internal/resourceMap.js"
51
46
  import * as Message from "./Message.js"
52
47
  import * as MessageStorage from "./MessageStorage.js"
53
48
  import * as Reply from "./Reply.js"
49
+ import { Runner } from "./Runner.js"
54
50
  import type { RunnerAddress } from "./RunnerAddress.js"
51
+ import * as RunnerHealth from "./RunnerHealth.js"
55
52
  import { Runners } from "./Runners.js"
56
- import { ShardId } from "./ShardId.js"
53
+ import { RunnerStorage } from "./RunnerStorage.js"
54
+ import type { ShardId } from "./ShardId.js"
55
+ import { make as makeShardId } from "./ShardId.js"
57
56
  import { ShardingConfig } from "./ShardingConfig.js"
58
57
  import { EntityRegistered, type ShardingRegistrationEvent, SingletonRegistered } from "./ShardingRegistrationEvent.js"
59
- import { ShardManagerClient } from "./ShardManager.js"
60
- import { ShardStorage } from "./ShardStorage.js"
61
58
  import { SingletonAddress } from "./SingletonAddress.js"
62
59
  import * as Snowflake from "./Snowflake.js"
63
60
 
@@ -78,6 +75,16 @@ export class Sharding extends Context.Tag("@effect/cluster/Sharding")<Sharding,
78
75
  */
79
76
  readonly getShardId: (entityId: EntityId, group: string) => ShardId
80
77
 
78
+ /**
79
+ * Returns `true` if the specified `shardId` is assigned to this runner.
80
+ */
81
+ readonly hasShardId: (shardId: ShardId) => boolean
82
+
83
+ /**
84
+ * Generate a Snowflake ID that is unique to this runner.
85
+ */
86
+ readonly getSnowflake: Effect.Effect<Snowflake.Snowflake>
87
+
81
88
  /**
82
89
  * Returns `true` if sharding is shutting down, `false` otherwise.
83
90
  */
@@ -94,7 +101,7 @@ export class Sharding extends Context.Tag("@effect/cluster/Sharding")<Sharding,
94
101
  entityId: string
95
102
  ) => RpcClient.RpcClient.From<
96
103
  Rpcs,
97
- MailboxFull | AlreadyProcessingMessage | PersistenceError | EntityNotManagedByRunner
104
+ MailboxFull | AlreadyProcessingMessage | PersistenceError
98
105
  >
99
106
  >
100
107
 
@@ -134,7 +141,7 @@ export class Sharding extends Context.Tag("@effect/cluster/Sharding")<Sharding,
134
141
  */
135
142
  readonly send: (message: Message.Incoming<any>) => Effect.Effect<
136
143
  void,
137
- EntityNotManagedByRunner | EntityNotAssignedToRunner | MailboxFull | AlreadyProcessingMessage
144
+ EntityNotAssignedToRunner | MailboxFull | AlreadyProcessingMessage
138
145
  >
139
146
 
140
147
  /**
@@ -145,15 +152,17 @@ export class Sharding extends Context.Tag("@effect/cluster/Sharding")<Sharding,
145
152
  discard: boolean
146
153
  ) => Effect.Effect<
147
154
  void,
148
- EntityNotManagedByRunner | MailboxFull | AlreadyProcessingMessage | PersistenceError
155
+ MailboxFull | AlreadyProcessingMessage | PersistenceError
149
156
  >
150
157
 
151
158
  /**
152
159
  * Notify sharding that a message has been persisted to storage.
153
160
  */
154
- readonly notify: (message: Message.Incoming<any>) => Effect.Effect<
161
+ readonly notify: (message: Message.Incoming<any>, options?: {
162
+ readonly waitUntilRead?: boolean | undefined
163
+ }) => Effect.Effect<
155
164
  void,
156
- EntityNotManagedByRunner | EntityNotAssignedToRunner | AlreadyProcessingMessage
165
+ EntityNotAssignedToRunner | AlreadyProcessingMessage
157
166
  >
158
167
 
159
168
  /**
@@ -180,20 +189,25 @@ interface EntityManagerState {
180
189
  readonly entity: Entity<any, any>
181
190
  readonly scope: Scope.CloseableScope
182
191
  readonly manager: EntityManager.EntityManager
192
+ closed: boolean
183
193
  }
184
194
 
185
195
  const make = Effect.gen(function*() {
186
196
  const config = yield* ShardingConfig
187
197
 
188
- const runners = yield* Runners
189
- const shardManager = yield* ShardManagerClient
198
+ const runnersService = yield* Runners
199
+ const runnerHealth = yield* RunnerHealth.RunnerHealth
190
200
  const snowflakeGen = yield* Snowflake.Generator
191
201
  const shardingScope = yield* Effect.scope
192
202
  const isShutdown = MutableRef.make(false)
203
+ const fiberSet = yield* FiberSet.make()
204
+ const runFork = yield* FiberSet.runtime(fiberSet)<never>().pipe(
205
+ Effect.mapInputContext((context: Context.Context<never>) => Context.omit(Scope.Scope)(context))
206
+ )
193
207
 
194
208
  const storage = yield* MessageStorage.MessageStorage
195
209
  const storageEnabled = storage !== MessageStorage.noop
196
- const shardStorage = yield* ShardStorage
210
+ const runnerStorage = yield* RunnerStorage
197
211
 
198
212
  const entityManagers = new Map<string, EntityManagerState>()
199
213
 
@@ -212,7 +226,7 @@ const make = Effect.gen(function*() {
212
226
 
213
227
  function getShardId(entityId: EntityId, group: string): ShardId {
214
228
  const id = Math.abs(hashString(entityId) % config.shardsPerGroup) + 1
215
- return ShardId.make({ group, id }, { disableValidation: true })
229
+ return makeShardId(group, id)
216
230
  }
217
231
 
218
232
  function isEntityOnLocalShards(address: EntityAddress): boolean {
@@ -220,17 +234,23 @@ const make = Effect.gen(function*() {
220
234
  }
221
235
 
222
236
  // --- Shard acquisition ---
237
+ //
238
+ // Responsible for acquiring and releasing shards from RunnerStorage.
239
+ //
240
+ // This should be shutdown last, when all entities have been shutdown, to
241
+ // allow them to move to another runner.
223
242
 
243
+ const releasingShards = MutableHashSet.empty<ShardId>()
224
244
  if (Option.isSome(config.runnerAddress)) {
225
245
  const selfAddress = config.runnerAddress.value
226
246
  yield* Scope.addFinalizerExit(shardingScope, () => {
227
247
  // the locks expire over time, so if this fails we ignore it
228
- return Effect.ignore(shardStorage.releaseAll(selfAddress))
248
+ return Effect.ignore(runnerStorage.releaseAll(selfAddress))
229
249
  })
230
250
 
231
- const releasingShards = MutableHashSet.empty<ShardId>()
232
251
  yield* Effect.gen(function*() {
233
252
  activeShardsLatch.unsafeOpen()
253
+
234
254
  while (true) {
235
255
  yield* activeShardsLatch.await
236
256
  activeShardsLatch.unsafeClose()
@@ -241,6 +261,12 @@ const make = Effect.gen(function*() {
241
261
  MutableHashSet.remove(acquiredShards, shardId)
242
262
  MutableHashSet.add(releasingShards, shardId)
243
263
  }
264
+
265
+ if (MutableHashSet.size(releasingShards) > 0) {
266
+ yield* Effect.forkIn(syncSingletons, shardingScope)
267
+ yield* releaseShards
268
+ }
269
+
244
270
  // if a shard has been assigned to this runner, we acquire it
245
271
  const unacquiredShards = MutableHashSet.empty<ShardId>()
246
272
  for (const shardId of selfShards) {
@@ -248,60 +274,64 @@ const make = Effect.gen(function*() {
248
274
  MutableHashSet.add(unacquiredShards, shardId)
249
275
  }
250
276
 
251
- if (MutableHashSet.size(releasingShards) > 0) {
252
- yield* Effect.forkIn(syncSingletons, shardingScope)
253
- yield* releaseShards
254
- }
255
-
256
277
  if (MutableHashSet.size(unacquiredShards) === 0) {
257
278
  continue
258
279
  }
259
280
 
260
- const acquired = yield* shardStorage.acquire(selfAddress, unacquiredShards)
281
+ const acquired = yield* runnerStorage.acquire(selfAddress, unacquiredShards)
261
282
  yield* Effect.ignore(storage.resetShards(acquired))
262
283
  for (const shardId of acquired) {
284
+ if (MutableHashSet.has(releasingShards, shardId) || !MutableHashSet.has(selfShards, shardId)) {
285
+ continue
286
+ }
263
287
  MutableHashSet.add(acquiredShards, shardId)
264
288
  }
265
289
  if (acquired.length > 0) {
266
290
  yield* storageReadLatch.open
267
291
  yield* Effect.forkIn(syncSingletons, shardingScope)
292
+
293
+ // update metrics
294
+ ClusterMetrics.shards.unsafeUpdate(BigInt(MutableHashSet.size(acquiredShards)), [])
268
295
  }
269
296
  yield* Effect.sleep(1000)
270
297
  activeShardsLatch.unsafeOpen()
271
298
  }
272
299
  }).pipe(
273
300
  Effect.catchAllCause((cause) => Effect.logWarning("Could not acquire/release shards", cause)),
274
- Effect.forever,
301
+ Effect.repeat(Schedule.spaced(config.entityMessagePollInterval)),
275
302
  Effect.annotateLogs({
276
303
  package: "@effect/cluster",
277
304
  module: "Sharding",
278
305
  fiber: "Shard acquisition loop",
279
306
  runner: selfAddress
280
307
  }),
281
- Effect.interruptible,
282
308
  Effect.forkIn(shardingScope)
283
309
  )
284
310
 
285
- // refresh the shard locks every 4s
311
+ // refresh the shard locks every `shardLockRefreshInterval`
286
312
  yield* Effect.suspend(() =>
287
- shardStorage.refresh(selfAddress, [
313
+ runnerStorage.refresh(selfAddress, [
288
314
  ...acquiredShards,
289
315
  ...releasingShards
290
316
  ])
291
317
  ).pipe(
292
318
  Effect.flatMap((acquired) => {
293
319
  for (const shardId of acquiredShards) {
294
- if (!acquired.some((_) => _[Equal.symbol](shardId))) {
320
+ if (!acquired.includes(shardId)) {
295
321
  MutableHashSet.remove(acquiredShards, shardId)
296
322
  MutableHashSet.add(releasingShards, shardId)
297
323
  }
298
324
  }
299
- return MutableHashSet.size(releasingShards) > 0 ?
300
- Effect.andThen(
301
- Effect.forkIn(syncSingletons, shardingScope),
302
- releaseShards
303
- ) :
304
- Effect.void
325
+ for (let i = 0; i < acquired.length; i++) {
326
+ const shardId = acquired[i]
327
+ if (!MutableHashSet.has(selfShards, shardId)) {
328
+ MutableHashSet.remove(acquiredShards, shardId)
329
+ MutableHashSet.add(releasingShards, shardId)
330
+ }
331
+ }
332
+ return MutableHashSet.size(releasingShards) > 0
333
+ ? activeShardsLatch.open
334
+ : Effect.void
305
335
  }),
306
336
  Effect.retry({
307
337
  times: 5,
@@ -312,8 +342,8 @@ const make = Effect.gen(function*() {
312
342
  Effect.andThen(clearSelfShards)
313
343
  )
314
344
  ),
315
- Effect.schedule(Schedule.fixed(4000)),
316
- Effect.interruptible,
345
+ Effect.repeat(Schedule.fixed(config.shardLockRefreshInterval)),
346
+ Effect.forever,
317
347
  Effect.forkIn(shardingScope)
318
348
  )
319
349
 
@@ -328,86 +358,41 @@ const make = Effect.gen(function*() {
328
358
  (state) => state.manager.interruptShard(shardId),
329
359
  { concurrency: "unbounded", discard: true }
330
360
  ).pipe(
331
- Effect.andThen(shardStorage.release(selfAddress, shardId)),
361
+ Effect.andThen(runnerStorage.release(selfAddress, shardId)),
332
362
  Effect.annotateLogs({ runner: selfAddress }),
333
- Effect.andThen(() => {
363
+ Effect.flatMap(() => {
334
364
  MutableHashSet.remove(releasingShards, shardId)
365
+ return storage.unregisterShardReplyHandlers(shardId)
335
366
  })
336
367
  ),
337
368
  { concurrency: "unbounded", discard: true }
338
369
  )
339
- ).pipe(Effect.andThen(activeShardsLatch.open))
370
+ )
371
+ )
372
+
373
+ // open the shard latch every poll interval
374
+ yield* activeShardsLatch.open.pipe(
375
+ Effect.delay(config.entityMessagePollInterval),
376
+ Effect.forever,
377
+ Effect.forkIn(shardingScope)
340
378
  )
341
379
  }
342
380
 
343
- const clearSelfShards = Effect.suspend(() => {
381
+ const clearSelfShards = Effect.sync(() => {
344
382
  MutableHashSet.clear(selfShards)
345
- return activeShardsLatch.open
383
+ activeShardsLatch.unsafeOpen()
346
384
  })
347
385
 
348
- // --- Singletons ---
349
-
350
- const singletons = new Map<ShardId, MutableHashMap.MutableHashMap<SingletonAddress, Effect.Effect<void>>>()
351
- const singletonFibers = yield* FiberMap.make<SingletonAddress>()
352
- const withSingletonLock = Effect.unsafeMakeSemaphore(1).withPermits(1)
353
-
354
- const registerSingleton: Sharding["Type"]["registerSingleton"] = Effect.fnUntraced(
355
- function*(name, run, options) {
356
- const shardGroup = options?.shardGroup ?? "default"
357
- const address = new SingletonAddress({
358
- shardId: getShardId(EntityId.make(name), shardGroup),
359
- name
360
- })
361
-
362
- let map = singletons.get(address.shardId)
363
- if (!map) {
364
- map = MutableHashMap.empty()
365
- singletons.set(address.shardId, map)
366
- }
367
- if (MutableHashMap.has(map, address)) {
368
- return yield* Effect.dieMessage(`Singleton '${name}' is already registered`)
369
- }
370
-
371
- const context = yield* Effect.context<never>()
372
- const wrappedRun = run.pipe(
373
- Effect.locally(FiberRef.currentLogAnnotations, HashMap.empty()),
374
- Effect.andThen(Effect.never),
375
- Effect.scoped,
376
- Effect.provide(context),
377
- Effect.orDie,
378
- Effect.interruptible
379
- ) as Effect.Effect<never>
380
- MutableHashMap.set(map, address, wrappedRun)
381
-
382
- yield* PubSub.publish(events, SingletonRegistered({ address }))
383
-
384
- // start if we are on the right shard
385
- if (MutableHashSet.has(acquiredShards, address.shardId)) {
386
- yield* Effect.logDebug("Starting singleton", address)
387
- yield* FiberMap.run(singletonFibers, address, wrappedRun)
388
- }
389
- },
390
- withSingletonLock
391
- )
392
-
393
- const syncSingletons = withSingletonLock(Effect.gen(function*() {
394
- for (const [shardId, map] of singletons) {
395
- for (const [address, run] of map) {
396
- const running = FiberMap.unsafeHas(singletonFibers, address)
397
- const shouldBeRunning = MutableHashSet.has(acquiredShards, shardId)
398
- if (running && !shouldBeRunning) {
399
- yield* Effect.logDebug("Stopping singleton", address)
400
- internalInterruptors.add(yield* Effect.fiberId)
401
- yield* FiberMap.remove(singletonFibers, address)
402
- } else if (!running && shouldBeRunning) {
403
- yield* Effect.logDebug("Starting singleton", address)
404
- yield* FiberMap.run(singletonFibers, address, run)
405
- }
406
- }
407
- }
408
- }))
409
-
410
386
  // --- Storage inbox ---
387
+ //
388
+ // Responsible for reading unprocessed messages from storage and sending them
389
+ // to the appropriate entity manager.
390
+ //
391
+ // This should be shutdown before shard acquisition, to ensure no messages are
392
+ // being processed before the shards are released.
393
+ //
394
+ // It should also be shutdown after the entity managers, to ensure interrupt
395
+ // & ack envelopes can still be processed.
411
396
 
412
397
  const storageReadLatch = yield* Effect.makeLatch(true)
413
398
  const openStorageReadLatch = constant(storageReadLatch.open)
@@ -415,13 +400,6 @@ const make = Effect.gen(function*() {
415
400
  const storageReadLock = Effect.unsafeMakeSemaphore(1)
416
401
  const withStorageReadLock = storageReadLock.withPermits(1)
417
402
 
418
- let storageAlreadyProcessed = (_message: Message.IncomingRequest<any>) => true
419
-
420
- // keep track of the last sent request ids to avoid duplicates
421
- // we only keep the last 30 sets to avoid memory leaks
422
- const sentRequestIds = new Set<Snowflake.Snowflake>()
423
- const sentRequestIdSets = new Set<Set<Snowflake.Snowflake>>()
424
-
425
403
  if (storageEnabled && Option.isSome(config.runnerAddress)) {
426
404
  const selfAddress = config.runnerAddress.value
427
405
 
@@ -429,17 +407,87 @@ const make = Effect.gen(function*() {
429
407
  yield* Effect.logDebug("Starting")
430
408
  yield* Effect.addFinalizer(() => Effect.logDebug("Shutting down"))
431
409
 
432
- sentRequestIds.clear()
433
- sentRequestIdSets.clear()
410
+ let index = 0
411
+ let messages: Array<Message.Incoming<any>> = []
412
+ const removableNotifications = new Set<PendingNotification>()
413
+ const resetAddresses = MutableHashSet.empty<EntityAddress>()
414
+
415
+ const processMessages = Effect.whileLoop({
416
+ while: () => index < messages.length,
417
+ step: () => index++,
418
+ body: () => send
419
+ })
420
+
421
+ const send = Effect.catchAllCause(
422
+ Effect.suspend(() => {
423
+ const message = messages[index]
424
+ // if we are shutting down, we don't accept new requests
425
+ if (message._tag === "IncomingRequest" && isShutdown.current) {
426
+ if (isShutdown.current) {
427
+ return Effect.void
428
+ }
429
+ }
430
+ const address = message.envelope.address
431
+ if (!MutableHashSet.has(acquiredShards, address.shardId)) {
432
+ return Effect.void
433
+ }
434
+ const state = entityManagers.get(address.entityType)
435
+ if (!state) {
436
+ // reset address in the case that the entity is slow to register
437
+ MutableHashSet.add(resetAddresses, address)
438
+ return Effect.void
439
+ } else if (state.closed) {
440
+ return Effect.void
441
+ }
442
+
443
+ const isProcessing = state.manager.isProcessingFor(message)
434
444
 
435
- storageAlreadyProcessed = (message: Message.IncomingRequest<any>) => {
436
- if (!sentRequestIds.has(message.envelope.requestId)) {
437
- return false
445
+ // If the message might affect a currently processing request, we
446
+ // send it to the entity manager to be processed.
447
+ if (message._tag === "IncomingEnvelope" && isProcessing) {
448
+ return state.manager.send(message)
449
+ } else if (isProcessing) {
450
+ return Effect.void
451
+ } else if (message._tag === "IncomingRequest" && pendingNotifications.has(message.envelope.requestId)) {
452
+ const entry = pendingNotifications.get(message.envelope.requestId)!
453
+ pendingNotifications.delete(message.envelope.requestId)
454
+ removableNotifications.delete(entry)
455
+ entry.resume(Effect.void)
456
+ }
457
+
458
+ // If the entity was resuming in another fiber, we add the message
459
+ // id to the unprocessed set.
460
+ const resumptionState = MutableHashMap.get(entityResumptionState, address)
461
+ if (Option.isSome(resumptionState)) {
462
+ resumptionState.value.unprocessed.add(message.envelope.requestId)
463
+ if (message.envelope._tag === "Interrupt") {
464
+ resumptionState.value.interrupts.set(message.envelope.requestId, message as Message.IncomingEnvelope)
465
+ }
466
+ return Effect.void
467
+ }
468
+ return state.manager.send(message)
469
+ }),
470
+ (cause) => {
471
+ const message = messages[index]
472
+ const error = Cause.failureOrCause(cause)
473
+ // if we get a defect, then update storage
474
+ if (Either.isRight(error)) {
475
+ if (Cause.isInterrupted(cause)) {
476
+ return Effect.void
477
+ }
478
+ return Effect.ignore(storage.saveReply(Reply.ReplyWithContext.fromDefect({
479
+ id: snowflakeGen.unsafeNext(),
480
+ requestId: message.envelope.requestId,
481
+ defect: Cause.squash(cause)
482
+ })))
483
+ }
484
+ if (error.left._tag === "MailboxFull") {
485
+ // MailboxFull can only happen for requests, so this cast is safe
486
+ return resumeEntityFromStorage(message as Message.IncomingRequest<any>)
487
+ }
488
+ return Effect.void
438
489
  }
439
- const state = entityManagers.get(message.envelope.address.entityType)
440
- if (!state) return true
441
- return !state.manager.isProcessingFor(message, { excludeReplies: true })
442
- }
490
+ )
443
491
 
444
492
  while (true) {
445
493
  // wait for the next poll interval, or if we get notified of a change
@@ -454,110 +502,47 @@ const make = Effect.gen(function*() {
454
502
  // acquired.
455
503
  yield* storageReadLock.take(1)
456
504
 
457
- const messages = yield* storage.unprocessedMessages(acquiredShards)
458
- const currentSentRequestIds = new Set<Snowflake.Snowflake>()
459
- sentRequestIdSets.add(currentSentRequestIds)
460
-
461
- const send = Effect.catchAllCause(
462
- Effect.suspend(() => {
463
- const message = messages[index]
464
- if (message._tag === "IncomingRequest") {
465
- if (sentRequestIds.has(message.envelope.requestId)) {
466
- return Effect.void
467
- }
468
- sentRequestIds.add(message.envelope.requestId)
469
- currentSentRequestIds.add(message.envelope.requestId)
470
- }
471
- const address = message.envelope.address
472
- if (!MutableHashSet.has(acquiredShards, address.shardId)) {
473
- return Effect.void
474
- }
475
- const state = entityManagers.get(address.entityType)
476
- if (!state) {
477
- if (message._tag === "IncomingRequest") {
478
- return Effect.orDie(message.respond(Reply.ReplyWithContext.fromDefect({
479
- id: snowflakeGen.unsafeNext(),
480
- requestId: message.envelope.requestId,
481
- defect: new EntityNotManagedByRunner({ address })
482
- })))
483
- }
484
- return Effect.void
485
- }
505
+ entityManagers.forEach((state) => state.manager.clearProcessed())
506
+ if (pendingNotifications.size > 0) {
507
+ pendingNotifications.forEach((entry) => removableNotifications.add(entry))
508
+ }
486
509
 
487
- const isProcessing = state.manager.isProcessingFor(message)
510
+ messages = yield* storage.unprocessedMessages(acquiredShards)
511
+ index = 0
512
+ yield* processMessages
488
513
 
489
- // If the message might affect a currently processing request, we
490
- // send it to the entity manager to be processed.
491
- if (message._tag === "IncomingEnvelope" && isProcessing) {
492
- return state.manager.send(message)
493
- } else if (isProcessing) {
494
- return Effect.void
495
- }
496
-
497
- // If the entity was resuming in another fiber, we add the message
498
- // id to the unprocessed set.
499
- const resumptionState = MutableHashMap.get(entityResumptionState, address)
500
- if (Option.isSome(resumptionState)) {
501
- resumptionState.value.unprocessed.add(message.envelope.requestId)
502
- if (message.envelope._tag === "Interrupt") {
503
- resumptionState.value.interrupts.set(message.envelope.requestId, message as Message.IncomingEnvelope)
504
- }
505
- return Effect.void
506
- }
507
- return state.manager.send(message)
508
- }),
509
- (cause) => {
510
- const message = messages[index]
511
- const error = Cause.failureOption(cause)
512
- // if we get a defect, then update storage
513
- if (Option.isNone(error)) {
514
- if (Cause.isInterrupted(cause)) {
515
- return Effect.void
516
- }
517
- return storage.saveReply(Reply.ReplyWithContext.fromDefect({
518
- id: snowflakeGen.unsafeNext(),
519
- requestId: message.envelope.requestId,
520
- defect: Cause.squash(cause)
521
- }))
522
- }
523
- if (error.value._tag === "MailboxFull") {
524
- // MailboxFull can only happen for requests, so this cast is safe
525
- return resumeEntityFromStorage(message as Message.IncomingRequest<any>)
526
- }
527
- return Effect.void
514
+ if (removableNotifications.size > 0) {
515
+ removableNotifications.forEach(({ message, resume }) => {
516
+ pendingNotifications.delete(message.envelope.requestId)
517
+ resume(Effect.fail(new EntityNotAssignedToRunner({ address: message.envelope.address })))
518
+ })
519
+ removableNotifications.clear()
520
+ }
521
+ if (MutableHashSet.size(resetAddresses) > 0) {
522
+ for (const address of resetAddresses) {
523
+ yield* Effect.logWarning("Could not find entity manager for address, retrying").pipe(
524
+ Effect.annotateLogs({ address })
525
+ )
526
+ yield* Effect.forkIn(storage.resetAddress(address), shardingScope)
528
527
  }
529
- )
530
-
531
- let index = 0
532
- yield* Effect.whileLoop({
533
- while: () => index < messages.length,
534
- step: () => index++,
535
- body: constant(send)
536
- })
528
+ MutableHashSet.clear(resetAddresses)
529
+ }
537
530
 
538
531
  // let the resuming entities check if they are done
539
532
  yield* storageReadLock.release(1)
540
-
541
- while (sentRequestIdSets.size > 30) {
542
- const oldest = Iterable.unsafeHead(sentRequestIdSets)
543
- sentRequestIdSets.delete(oldest)
544
- for (const id of oldest) {
545
- sentRequestIds.delete(id)
546
- }
547
- }
548
533
  }
549
534
  }).pipe(
550
535
  Effect.scoped,
551
536
  Effect.ensuring(storageReadLock.releaseAll),
552
537
  Effect.catchAllCause((cause) => Effect.logWarning("Could not read messages from storage", cause)),
553
- Effect.repeat(Schedule.spaced(config.entityMessagePollInterval)),
538
+ Effect.forever,
554
539
  Effect.annotateLogs({
555
540
  package: "@effect/cluster",
556
541
  module: "Sharding",
557
542
  fiber: "Storage read loop",
558
543
  runner: selfAddress
559
544
  }),
560
- Effect.interruptible,
545
+ Effect.withUnhandledErrorLogLevel(Option.none()),
561
546
  Effect.forkIn(shardingScope)
562
547
  )
563
548
 
@@ -565,7 +550,6 @@ const make = Effect.gen(function*() {
565
550
  yield* storageReadLatch.open.pipe(
566
551
  Effect.delay(config.entityMessagePollInterval),
567
552
  Effect.forever,
568
- Effect.interruptible,
569
553
  Effect.forkIn(shardingScope)
570
554
  )
571
555
 
@@ -623,7 +607,7 @@ const make = Effect.gen(function*() {
623
607
 
624
608
  const sendWithRetry: Effect.Effect<
625
609
  void,
626
- EntityNotManagedByRunner | EntityNotAssignedToRunner
610
+ EntityNotAssignedToRunner
627
611
  > = Effect.catchTags(
628
612
  Effect.suspend(() => {
629
613
  if (!MutableHashSet.has(acquiredShards, address.shardId)) {
@@ -671,7 +655,7 @@ const make = Effect.gen(function*() {
671
655
  while: (e) => e._tag === "PersistenceError",
672
656
  schedule: Schedule.spaced(config.entityMessagePollInterval)
673
657
  }),
674
- Effect.catchAllCause((cause) => Effect.logError("Could not resume unprocessed messages", cause)),
658
+ Effect.catchAllCause((cause) => Effect.logDebug("Could not resume unprocessed messages", cause)),
675
659
  (effect, address) =>
676
660
  Effect.annotateLogs(effect, {
677
661
  package: "@effect/cluster",
@@ -685,107 +669,132 @@ const make = Effect.gen(function*() {
685
669
  effect,
686
670
  Effect.sync(() => MutableHashMap.remove(entityResumptionState, address))
687
671
  ),
688
- Effect.interruptible,
672
+ Effect.withUnhandledErrorLogLevel(Option.none()),
689
673
  Effect.forkIn(shardingScope)
690
674
  )
691
675
  }
692
676
 
693
677
  // --- Sending messages ---
694
678
 
695
- const sendLocal = <M extends Message.Outgoing<any> | Message.Incoming<any>>(
696
- message: M
697
- ): Effect.Effect<
698
- void,
699
- | EntityNotAssignedToRunner
700
- | EntityNotManagedByRunner
701
- | MailboxFull
702
- | AlreadyProcessingMessage
703
- | (M extends Message.Incoming<any> ? never : PersistenceError)
704
- > =>
705
- Effect.suspend(() => {
679
+ const sendLocal = <M extends Message.Outgoing<any> | Message.Incoming<any>>(message: M) =>
680
+ Effect.suspend(function loop(): Effect.Effect<
681
+ void,
682
+ | EntityNotAssignedToRunner
683
+ | MailboxFull
684
+ | AlreadyProcessingMessage
685
+ | (M extends Message.Incoming<any> ? never : PersistenceError)
686
+ > {
706
687
  const address = message.envelope.address
707
688
  if (!isEntityOnLocalShards(address)) {
708
689
  return Effect.fail(new EntityNotAssignedToRunner({ address }))
709
690
  }
710
691
  const state = entityManagers.get(address.entityType)
711
692
  if (!state) {
712
- return Effect.fail(new EntityNotManagedByRunner({ address }))
693
+ return Effect.flatMap(waitForEntityManager(address.entityType), loop)
694
+ } else if (state.closed || (isShutdown.current && message._tag === "IncomingRequest")) {
695
+ // if we are shutting down, we don't accept new requests
696
+ return Effect.fail(new EntityNotAssignedToRunner({ address }))
713
697
  }
714
698
 
715
699
  return message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope" ?
716
700
  state.manager.send(message) :
717
- runners.sendLocal({
701
+ runnersService.sendLocal({
718
702
  message,
719
703
  send: state.manager.sendLocal,
720
704
  simulateRemoteSerialization: config.simulateRemoteSerialization
721
705
  }) as any
722
706
  })
723
707
 
708
+ type PendingNotification = {
709
+ resume: (_: Effect.Effect<void, EntityNotAssignedToRunner>) => void
710
+ readonly message: Message.IncomingRequest<any>
711
+ }
712
+ const pendingNotifications = new Map<Snowflake.Snowflake, PendingNotification>()
724
713
  const notifyLocal = <M extends Message.Outgoing<any> | Message.Incoming<any>>(
725
714
  message: M,
726
- discard: boolean
715
+ discard: boolean,
716
+ options?: {
717
+ readonly waitUntilRead?: boolean | undefined
718
+ }
727
719
  ) =>
728
- Effect.suspend(
729
- (): Effect.Effect<
730
- void,
731
- | EntityNotManagedByRunner
732
- | EntityNotAssignedToRunner
733
- | AlreadyProcessingMessage
734
- | (M extends Message.Incoming<any> ? never : PersistenceError)
735
- > => {
736
- const address = message.envelope.address
737
- if (!entityManagers.has(address.entityType)) {
738
- return Effect.fail(new EntityNotManagedByRunner({ address }))
739
- }
720
+ Effect.suspend(function loop(): Effect.Effect<
721
+ void,
722
+ | EntityNotAssignedToRunner
723
+ | AlreadyProcessingMessage
724
+ | (M extends Message.Incoming<any> ? never : PersistenceError)
725
+ > {
726
+ const address = message.envelope.address
727
+ const state = entityManagers.get(address.entityType)
728
+ if (!state) {
729
+ return Effect.flatMap(waitForEntityManager(address.entityType), loop)
730
+ } else if (state.closed || !isEntityOnLocalShards(address)) {
731
+ return Effect.fail(new EntityNotAssignedToRunner({ address }))
732
+ }
740
733
 
741
- const isLocal = isEntityOnLocalShards(address)
742
- const notify = storageEnabled
743
- ? openStorageReadLatch
744
- : () => Effect.dieMessage("Sharding.notifyLocal: storage is disabled")
734
+ const isLocal = isEntityOnLocalShards(address)
735
+ const notify = storageEnabled
736
+ ? openStorageReadLatch
737
+ : () => Effect.die("Sharding.notifyLocal: storage is disabled")
745
738
 
746
- if (message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope") {
747
- if (message._tag === "IncomingRequest" && storageAlreadyProcessed(message)) {
748
- return Effect.fail(new AlreadyProcessingMessage({ address, envelopeId: message.envelope.requestId }))
749
- } else if (!isLocal) {
750
- return Effect.fail(new EntityNotAssignedToRunner({ address }))
751
- }
752
- return notify()
739
+ if (message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope") {
740
+ if (!isLocal) {
741
+ return Effect.fail(new EntityNotAssignedToRunner({ address }))
742
+ } else if (
743
+ message._tag === "IncomingRequest" && state.manager.isProcessingFor(message, { excludeReplies: true })
744
+ ) {
745
+ return Effect.fail(new AlreadyProcessingMessage({ address, envelopeId: message.envelope.requestId }))
746
+ } else if (message._tag === "IncomingRequest" && options?.waitUntilRead) {
747
+ if (!storageEnabled) return notify()
748
+ return Effect.async<void, EntityNotAssignedToRunner>((resume) => {
749
+ let entry = pendingNotifications.get(message.envelope.requestId)
750
+ if (entry) {
751
+ const prevResume = entry.resume
752
+ entry.resume = (effect) => {
753
+ prevResume(effect)
754
+ resume(effect)
755
+ }
756
+ return
757
+ }
758
+ entry = { resume, message }
759
+ pendingNotifications.set(message.envelope.requestId, entry)
760
+ storageReadLatch.unsafeOpen()
761
+ })
753
762
  }
754
-
755
- return runners.notifyLocal({ message, notify, discard, storageOnly: !isLocal }) as any
763
+ return notify()
756
764
  }
757
- )
758
765
 
759
- const isTransientError = Predicate.or(RunnerUnavailable.is, EntityNotAssignedToRunner.is)
766
+ return runnersService.notifyLocal({ message, notify, discard, storageOnly: !isLocal }) as any
767
+ })
768
+
760
769
  function sendOutgoing(
761
770
  message: Message.Outgoing<any>,
762
771
  discard: boolean,
763
772
  retries?: number
764
773
  ): Effect.Effect<
765
774
  void,
766
- EntityNotManagedByRunner | MailboxFull | AlreadyProcessingMessage | PersistenceError
775
+ MailboxFull | AlreadyProcessingMessage | PersistenceError
767
776
  > {
768
777
  return Effect.catchIf(
769
778
  Effect.suspend(() => {
770
779
  const address = message.envelope.address
771
- const maybeRunner = MutableHashMap.get(shardAssignments, address.shardId)
772
780
  const isPersisted = Context.get(message.rpc.annotations, Persisted)
773
781
  if (isPersisted && !storageEnabled) {
774
- return Effect.dieMessage("Sharding.sendOutgoing: Persisted messages require MessageStorage")
782
+ return Effect.die("Sharding.sendOutgoing: Persisted messages require MessageStorage")
775
783
  }
784
+ const maybeRunner = MutableHashMap.get(shardAssignments, address.shardId)
776
785
  const runnerIsLocal = Option.isSome(maybeRunner) && isLocalRunner(maybeRunner.value)
777
786
  if (isPersisted) {
778
787
  return runnerIsLocal
779
788
  ? notifyLocal(message, discard)
780
- : runners.notify({ address: maybeRunner, message, discard })
789
+ : runnersService.notify({ address: maybeRunner, message, discard })
781
790
  } else if (Option.isNone(maybeRunner)) {
782
791
  return Effect.fail(new EntityNotAssignedToRunner({ address }))
783
792
  }
784
793
  return runnerIsLocal
785
794
  ? sendLocal(message)
786
- : runners.send({ address: maybeRunner.value, message })
795
+ : runnersService.send({ address: maybeRunner.value, message })
787
796
  }),
788
- isTransientError,
797
+ (error) => error._tag === "EntityNotAssignedToRunner" || error._tag === "RunnerUnavailable",
789
798
  (error) => {
790
799
  if (retries === 0) {
791
800
  return Effect.die(error)
@@ -795,158 +804,156 @@ const make = Effect.gen(function*() {
795
804
  )
796
805
  }
797
806
 
798
- const reset: Sharding["Type"]["reset"] = Effect.fnUntraced(
799
- function*(requestId) {
800
- yield* storage.clearReplies(requestId)
801
- sentRequestIds.delete(requestId)
802
- },
803
- Effect.matchCause({
807
+ const reset: Sharding["Type"]["reset"] = (requestId) =>
808
+ Effect.matchCause(storage.clearReplies(requestId), {
804
809
  onSuccess: () => true,
805
810
  onFailure: () => false
806
811
  })
807
- )
808
-
809
- // --- Shard Manager sync ---
810
812
 
811
- const shardManagerTimeoutFiber = yield* FiberHandle.make().pipe(
812
- Scope.extend(shardingScope)
813
- )
814
- const startShardManagerTimeout = FiberHandle.run(
815
- shardManagerTimeoutFiber,
816
- Effect.flatMap(Effect.sleep(config.shardManagerUnavailableTimeout), () => {
817
- MutableHashMap.clear(shardAssignments)
818
- return clearSelfShards
819
- }),
820
- { onlyIfMissing: true }
821
- )
822
- const stopShardManagerTimeout = FiberHandle.clear(shardManagerTimeoutFiber)
813
+ // --- RunnerStorage sync ---
814
+ //
815
+ // This is responsible for syncing the local view of runners and shard
816
+ // assignments with RunnerStorage.
817
+ //
818
+ // It should be shutdown after the clients, so that they can still get correct
819
+ // shard assignments for outgoing messages (they could still be in use by
820
+ // entities that are shutting down).
821
+
822
+ const selfRunner = Option.isSome(config.runnerAddress) ?
823
+ new Runner({
824
+ address: config.runnerAddress.value,
825
+ groups: config.shardGroups,
826
+ weight: config.runnerShardWeight
827
+ }) :
828
+ undefined
829
+
830
+ let allRunners = MutableHashMap.empty<Runner, boolean>()
831
+ let healthyRunnerCount = 0
832
+
833
+ // update metrics
834
+ if (selfRunner) {
835
+ ClusterMetrics.runners.unsafeUpdate(BigInt(1), [])
836
+ ClusterMetrics.runnersHealthy.unsafeUpdate(BigInt(1), [])
837
+ }
823
838
 
824
- // Every time the link to the shard manager is lost, we re-register the runner
825
- // and re-subscribe to sharding events
826
839
  yield* Effect.gen(function*() {
827
- yield* Effect.logDebug("Registering with shard manager")
828
- if (!isShutdown.current && Option.isSome(config.runnerAddress)) {
829
- const machineId = yield* shardManager.register(config.runnerAddress.value, config.shardGroups)
830
- yield* snowflakeGen.setMachineId(machineId)
831
- }
840
+ const hashRings = new Map<string, HashRing.HashRing<RunnerAddress>>()
841
+ let nextRunners = MutableHashMap.empty<Runner, boolean>()
842
+ const healthyRunners = MutableHashSet.empty<Runner>()
843
+
844
+ while (true) {
845
+ // Ensure the current runner is registered
846
+ if (selfRunner && !isShutdown.current && !MutableHashMap.has(allRunners, selfRunner)) {
847
+ yield* Effect.logDebug("Registering runner", selfRunner)
848
+ const machineId = yield* runnerStorage.register(selfRunner, true)
849
+ yield* snowflakeGen.setMachineId(machineId)
850
+ }
832
851
 
833
- yield* stopShardManagerTimeout
852
+ const runners = yield* runnerStorage.getRunners
853
+ let changed = false
854
+ for (let i = 0; i < runners.length; i++) {
855
+ const [runner, healthy] = runners[i]
856
+ MutableHashMap.set(nextRunners, runner, healthy)
857
+ const wasHealthy = MutableHashSet.has(healthyRunners, runner)
858
+ if (!healthy || wasHealthy) {
859
+ if (healthy === wasHealthy || !wasHealthy) {
860
+ // no change
861
+ MutableHashMap.remove(allRunners, runner)
862
+ }
863
+ continue
864
+ }
865
+ changed = true
866
+ MutableHashSet.add(healthyRunners, runner)
867
+ MutableHashMap.remove(allRunners, runner)
868
+ for (let j = 0; j < runner.groups.length; j++) {
869
+ const group = runner.groups[j]
870
+ let ring = hashRings.get(group)
871
+ if (!ring) {
872
+ ring = HashRing.make()
873
+ hashRings.set(group, ring)
874
+ }
875
+ HashRing.add(ring, runner.address, { weight: runner.weight })
876
+ }
877
+ }
834
878
 
835
- yield* Effect.logDebug("Subscribing to sharding events")
836
- const mailbox = yield* shardManager.shardingEvents(config.runnerAddress)
837
- const startedLatch = yield* Deferred.make<void, ClusterError.RunnerNotRegistered>()
879
+ // Remove runners that are no longer present or healthy
880
+ MutableHashMap.forEach(allRunners, (_, runner) => {
881
+ changed = true
882
+ MutableHashMap.remove(allRunners, runner)
883
+ MutableHashSet.remove(healthyRunners, runner)
884
+ runFork(runnersService.onRunnerUnavailable(runner.address))
885
+ for (let i = 0; i < runner.groups.length; i++) {
886
+ HashRing.remove(hashRings.get(runner.groups[i])!, runner.address)
887
+ }
888
+ })
838
889
 
839
- const eventsFiber = yield* Effect.gen(function*() {
840
- while (true) {
841
- const [events, done] = yield* mailbox.takeAll
842
- if (done) return
843
- for (const event of events) {
844
- yield* Effect.logDebug("Received sharding event", event)
845
-
846
- switch (event._tag) {
847
- case "StreamStarted": {
848
- yield* Deferred.done(startedLatch, Exit.void)
849
- break
850
- }
851
- case "ShardsAssigned": {
852
- for (const shard of event.shards) {
853
- MutableHashMap.set(shardAssignments, shard, event.address)
854
- }
855
- if (!MutableRef.get(isShutdown) && isLocalRunner(event.address)) {
856
- for (const shardId of event.shards) {
857
- if (MutableHashSet.has(selfShards, shardId)) continue
858
- MutableHashSet.add(selfShards, shardId)
859
- }
860
- yield* activeShardsLatch.open
861
- }
862
- break
863
- }
864
- case "ShardsUnassigned": {
865
- for (const shard of event.shards) {
866
- MutableHashMap.remove(shardAssignments, shard)
867
- }
868
- if (isLocalRunner(event.address)) {
869
- for (const shard of event.shards) {
870
- MutableHashSet.remove(selfShards, shard)
871
- }
872
- yield* activeShardsLatch.open
890
+ // swap allRunners and nextRunners
891
+ const prevRunners = allRunners
892
+ allRunners = nextRunners
893
+ nextRunners = prevRunners
894
+ healthyRunnerCount = MutableHashSet.size(healthyRunners)
895
+
896
+ // Ensure the current runner is registered
897
+ if (selfRunner && !isShutdown.current && !MutableHashMap.has(allRunners, selfRunner)) {
898
+ continue
899
+ }
900
+
901
+ // Recompute shard assignments if the set of healthy runners has changed.
902
+ if (changed) {
903
+ MutableHashSet.clear(selfShards)
904
+ hashRings.forEach((ring, group) => {
905
+ const newAssignments = HashRing.getShards(ring, config.shardsPerGroup)
906
+ for (let i = 0; i < config.shardsPerGroup; i++) {
907
+ const shard = makeShardId(group, i + 1)
908
+ if (newAssignments) {
909
+ const runner = newAssignments[i]
910
+ MutableHashMap.set(shardAssignments, shard, runner)
911
+ if (isLocalRunner(runner)) {
912
+ MutableHashSet.add(selfShards, shard)
873
913
  }
874
- break
875
- }
876
- case "RunnerUnregistered": {
877
- if (!isLocalRunner(event.address)) break
878
- return yield* Effect.fail(new ClusterError.RunnerNotRegistered({ address: event.address }))
914
+ } else {
915
+ MutableHashMap.remove(shardAssignments, shard)
879
916
  }
880
917
  }
918
+ })
919
+ yield* Effect.logDebug("New shard assignments", selfShards)
920
+ activeShardsLatch.unsafeOpen()
921
+
922
+ // update metrics
923
+ if (selfRunner) {
924
+ ClusterMetrics.runnersHealthy.unsafeUpdate(
925
+ BigInt(MutableHashSet.has(healthyRunners, selfRunner) ? 1 : 0),
926
+ []
927
+ )
881
928
  }
882
929
  }
883
- }).pipe(
884
- Effect.intoDeferred(startedLatch),
885
- Effect.zipRight(Effect.dieMessage("Shard manager event stream down")),
886
- Effect.forkScoped
887
- )
888
930
 
889
- // Wait for the stream to be established
890
- yield* Deferred.await(startedLatch)
891
-
892
- // perform a full sync every config.refreshAssignmentsInterval
893
- const syncFiber = yield* syncAssignments.pipe(
894
- Effect.andThen(Effect.sleep(config.refreshAssignmentsInterval)),
895
- Effect.forever,
896
- Effect.forkScoped
897
- )
931
+ if (selfRunner && MutableHashSet.size(healthyRunners) === 0) {
932
+ yield* Effect.logWarning("No healthy runners available")
933
+ // to prevent a deadlock, we will mark the current node as healthy to
934
+ // start the health check singleton again
935
+ yield* runnerStorage.setRunnerHealth(selfRunner.address, true)
936
+ }
898
937
 
899
- return yield* Fiber.joinAll([eventsFiber, syncFiber])
938
+ yield* Effect.sleep(config.refreshAssignmentsInterval)
939
+ }
900
940
  }).pipe(
901
- Effect.scoped,
902
941
  Effect.catchAllCause((cause) => Effect.logDebug(cause)),
903
- Effect.zipRight(startShardManagerTimeout),
904
- Effect.repeat(
905
- Schedule.exponential(1000).pipe(
906
- Schedule.union(Schedule.spaced(10_000))
907
- )
908
- ),
942
+ Effect.repeat(Schedule.spaced(1000)),
909
943
  Effect.annotateLogs({
910
944
  package: "@effect/cluster",
911
945
  module: "Sharding",
912
- fiber: "ShardManager sync",
946
+ fiber: "RunnerStorage sync",
913
947
  runner: config.runnerAddress
914
948
  }),
915
- Effect.interruptible,
916
949
  Effect.forkIn(shardingScope)
917
950
  )
918
951
 
919
- const syncAssignments = Effect.gen(function*() {
920
- const assignments = yield* shardManager.getAssignments
921
- yield* Effect.logDebug("Received shard assignments", assignments)
922
-
923
- for (const [shardId, runner] of assignments) {
924
- if (Option.isNone(runner)) {
925
- MutableHashMap.remove(shardAssignments, shardId)
926
- MutableHashSet.remove(selfShards, shardId)
927
- continue
928
- }
929
-
930
- MutableHashMap.set(shardAssignments, shardId, runner.value)
931
-
932
- if (!isLocalRunner(runner.value)) {
933
- MutableHashSet.remove(selfShards, shardId)
934
- continue
935
- }
936
- if (MutableRef.get(isShutdown) || MutableHashSet.has(selfShards, shardId)) {
937
- continue
938
- }
939
- MutableHashSet.add(selfShards, shardId)
940
- }
941
-
942
- yield* activeShardsLatch.open
943
- })
944
-
945
952
  // --- Clients ---
946
953
 
947
954
  type ClientRequestEntry = {
948
955
  readonly rpc: Rpc.AnyWithProps
949
- readonly context: Context.Context<never>
956
+ readonly services: Context.Context<never>
950
957
  lastChunkId?: Snowflake.Snowflake
951
958
  }
952
959
  const clientRequests = new Map<Snowflake.Snowflake, ClientRequestEntry>()
@@ -955,7 +962,7 @@ const make = Effect.gen(function*() {
955
962
  Entity<any, any>,
956
963
  (entityId: string) => RpcClient.RpcClient<
957
964
  any,
958
- MailboxFull | AlreadyProcessingMessage | EntityNotManagedByRunner
965
+ MailboxFull | AlreadyProcessingMessage
959
966
  >,
960
967
  never
961
968
  > = yield* ResourceMap.make(Effect.fnUntraced(function*(entity: Entity<string, any>) {
@@ -967,7 +974,7 @@ const make = Effect.gen(function*() {
967
974
  flatten: true,
968
975
  onFromClient(options): Effect.Effect<
969
976
  void,
970
- MailboxFull | AlreadyProcessingMessage | EntityNotManagedByRunner | PersistenceError
977
+ MailboxFull | AlreadyProcessingMessage | PersistenceError
971
978
  > {
972
979
  const address = Context.unsafeGet(options.context, ClientAddressTag)
973
980
  switch (options.message._tag) {
@@ -979,7 +986,7 @@ const make = Effect.gen(function*() {
979
986
  if (!options.discard) {
980
987
  const entry: ClientRequestEntry = {
981
988
  rpc: rpc as any,
982
- context: fiber.currentContext
989
+ services: fiber.currentContext
983
990
  }
984
991
  clientRequests.set(id, entry)
985
992
  respond = makeClientRespond(entry, client.write)
@@ -1028,7 +1035,7 @@ const make = Effect.gen(function*() {
1028
1035
  const entry = clientRequests.get(requestId)!
1029
1036
  if (!entry) return Effect.void
1030
1037
  clientRequests.delete(requestId)
1031
- if (Context.get(entry.rpc.annotations, Uninterruptible)) {
1038
+ if (Uninterruptible.forClient(entry.rpc.annotations)) {
1032
1039
  return Effect.void
1033
1040
  }
1034
1041
  // for durable messages, we ignore interrupts on shutdown or as a
@@ -1065,8 +1072,8 @@ const make = Effect.gen(function*() {
1065
1072
  )
1066
1073
 
1067
1074
  return (entityId: string) => {
1068
- const id = EntityId.make(entityId)
1069
- const address = ClientAddressTag.context(EntityAddress.make({
1075
+ const id = makeEntityId(entityId)
1076
+ const address = ClientAddressTag.context(makeEntityAddress({
1070
1077
  shardId: getShardId(id, entity.getShardGroup(entityId as EntityId)),
1071
1078
  entityId: id,
1072
1079
  entityType: entity.type
@@ -1100,7 +1107,7 @@ const make = Effect.gen(function*() {
1100
1107
  const makeClient = <Type extends string, Rpcs extends Rpc.Any>(entity: Entity<Type, Rpcs>): Effect.Effect<
1101
1108
  (
1102
1109
  entityId: string
1103
- ) => RpcClient.RpcClient.From<Rpcs, MailboxFull | AlreadyProcessingMessage | EntityNotManagedByRunner>
1110
+ ) => RpcClient.RpcClient.From<Rpcs, MailboxFull | AlreadyProcessingMessage>
1104
1111
  > => clients.get(entity) as any
1105
1112
 
1106
1113
  const clientRespondDiscard = (_reply: Reply.Reply<any>) => Effect.void
@@ -1132,14 +1139,88 @@ const make = Effect.gen(function*() {
1132
1139
  }
1133
1140
  }
1134
1141
 
1142
+ // --- Singletons ---
1143
+
1144
+ const singletons = new Map<ShardId, MutableHashMap.MutableHashMap<SingletonAddress, Effect.Effect<void>>>()
1145
+ const singletonFibers = yield* FiberMap.make<SingletonAddress>()
1146
+ const withSingletonLock = Effect.unsafeMakeSemaphore(1).withPermits(1)
1147
+
1148
+ const registerSingleton: Sharding["Type"]["registerSingleton"] = Effect.fnUntraced(
1149
+ function*(name, run, options) {
1150
+ const shardGroup = options?.shardGroup ?? "default"
1151
+ const address = new SingletonAddress({
1152
+ shardId: getShardId(makeEntityId(name), shardGroup),
1153
+ name
1154
+ })
1155
+
1156
+ let map = singletons.get(address.shardId)
1157
+ if (!map) {
1158
+ map = MutableHashMap.empty()
1159
+ singletons.set(address.shardId, map)
1160
+ }
1161
+ if (MutableHashMap.has(map, address)) {
1162
+ return yield* Effect.die(`Singleton '${name}' is already registered`)
1163
+ }
1164
+
1165
+ const context = yield* Effect.context<never>()
1166
+ const wrappedRun = run.pipe(
1167
+ Effect.locally(FiberRef.currentLogAnnotations, HashMap.empty()),
1168
+ Effect.andThen(Effect.never),
1169
+ Effect.scoped,
1170
+ Effect.provide(context),
1171
+ Effect.orDie,
1172
+ Effect.interruptible
1173
+ ) as Effect.Effect<never>
1174
+ MutableHashMap.set(map, address, wrappedRun)
1175
+
1176
+ yield* PubSub.publish(events, SingletonRegistered({ address }))
1177
+
1178
+ // start if we are on the right shard
1179
+ if (MutableHashSet.has(acquiredShards, address.shardId)) {
1180
+ yield* Effect.logDebug("Starting singleton", address)
1181
+ yield* FiberMap.run(singletonFibers, address, wrappedRun)
1182
+ }
1183
+ },
1184
+ withSingletonLock
1185
+ )
1186
+
1187
+ const syncSingletons = withSingletonLock(Effect.gen(function*() {
1188
+ for (const [shardId, map] of singletons) {
1189
+ for (const [address, run] of map) {
1190
+ const running = FiberMap.unsafeHas(singletonFibers, address)
1191
+ const shouldBeRunning = MutableHashSet.has(acquiredShards, shardId)
1192
+ if (running && !shouldBeRunning) {
1193
+ yield* Effect.logDebug("Stopping singleton", address)
1194
+ internalInterruptors.add(Option.getOrThrow(Fiber.getCurrentFiber()).id())
1195
+ yield* FiberMap.remove(singletonFibers, address)
1196
+ } else if (!running && shouldBeRunning) {
1197
+ yield* Effect.logDebug("Starting singleton", address)
1198
+ yield* FiberMap.run(singletonFibers, address, run)
1199
+ }
1200
+ }
1201
+ }
1202
+ ClusterMetrics.singletons.unsafeUpdate(
1203
+ BigInt(yield* FiberMap.size(singletonFibers)),
1204
+ []
1205
+ )
1206
+ }))
1207
+
1135
1208
  // --- Entities ---
1136
1209
 
1137
1210
  const context = yield* Effect.context<ShardingConfig>()
1138
1211
  const reaper = yield* EntityReaper
1212
+ const entityManagerLatches = new Map<string, Effect.Latch>()
1213
+
1139
1214
  const registerEntity: Sharding["Type"]["registerEntity"] = Effect.fnUntraced(
1140
1215
  function*(entity, build, options) {
1141
1216
  if (Option.isNone(config.runnerAddress) || entityManagers.has(entity.type)) return
1142
1217
  const scope = yield* Scope.make()
1218
+ yield* Scope.addFinalizer(
1219
+ scope,
1220
+ Effect.sync(() => {
1221
+ state.closed = true
1222
+ })
1223
+ )
1143
1224
  const manager = yield* EntityManager.make(entity, build, {
1144
1225
  ...options,
1145
1226
  storage,
@@ -1152,11 +1233,22 @@ const make = Effect.gen(function*() {
1152
1233
  Context.add(Snowflake.Generator, snowflakeGen)
1153
1234
  ))
1154
1235
  ) as Effect.Effect<EntityManager.EntityManager>
1155
- entityManagers.set(entity.type, {
1236
+ const state: EntityManagerState = {
1156
1237
  entity,
1157
1238
  scope,
1239
+ closed: false,
1158
1240
  manager
1159
- })
1241
+ }
1242
+
1243
+ // register entities while storage is idle
1244
+ // this ensures message order is preserved
1245
+ yield* withStorageReadLock(Effect.sync(() => {
1246
+ entityManagers.set(entity.type, state)
1247
+ if (entityManagerLatches.has(entity.type)) {
1248
+ entityManagerLatches.get(entity.type)!.unsafeOpen()
1249
+ entityManagerLatches.delete(entity.type)
1250
+ }
1251
+ }))
1160
1252
 
1161
1253
  yield* PubSub.publish(events, EntityRegistered({ entity }))
1162
1254
  }
@@ -1176,29 +1268,76 @@ const make = Effect.gen(function*() {
1176
1268
  )
1177
1269
  )
1178
1270
 
1179
- // --- Finalization ---
1271
+ const waitForEntityManager = (entityType: string) => {
1272
+ let latch = entityManagerLatches.get(entityType)
1273
+ if (!latch) {
1274
+ latch = Effect.unsafeMakeLatch()
1275
+ entityManagerLatches.set(entityType, latch)
1276
+ }
1277
+ return latch.await
1278
+ }
1180
1279
 
1181
- if (Option.isSome(config.runnerAddress)) {
1182
- const selfAddress = config.runnerAddress.value
1183
- // Unregister runner from shard manager when scope is closed
1184
- yield* Scope.addFinalizer(
1185
- shardingScope,
1186
- Effect.gen(function*() {
1187
- yield* Effect.logDebug("Unregistering runner from shard manager", selfAddress)
1188
- yield* shardManager.unregister(selfAddress).pipe(
1189
- Effect.catchAllCause((cause) => Effect.logError("Error calling unregister with shard manager", cause))
1280
+ // --- Runner health checks ---
1281
+
1282
+ if (selfRunner) {
1283
+ const checkRunner = ([runner, healthy]: [Runner, boolean]) =>
1284
+ Effect.flatMap(runnerHealth.isAlive(runner.address), (isAlive) => {
1285
+ if (healthy === isAlive) return Effect.void
1286
+ if (isAlive) {
1287
+ healthyRunnerCount++
1288
+ return Effect.logDebug(`Runner is healthy`, runner).pipe(
1289
+ Effect.andThen(runnerStorage.setRunnerHealth(runner.address, isAlive))
1290
+ )
1291
+ }
1292
+ if (healthyRunnerCount <= 1) {
1293
+ // never mark the last runner as unhealthy, to prevent a deadlock
1294
+ return Effect.void
1295
+ }
1296
+ healthyRunnerCount--
1297
+ return Effect.logDebug(`Runner is unhealthy`, runner).pipe(
1298
+ Effect.andThen(runnerStorage.setRunnerHealth(runner.address, isAlive))
1190
1299
  )
1191
- yield* clearSelfShards
1192
1300
  })
1301
+
1302
+ yield* registerSingleton(
1303
+ "effect/cluster/Sharding/RunnerHealth",
1304
+ Effect.gen(function*() {
1305
+ while (true) {
1306
+ // Skip health checks if we are the only runner
1307
+ if (MutableHashMap.size(allRunners) > 1) {
1308
+ yield* Effect.forEach(allRunners, checkRunner, { discard: true, concurrency: 10 })
1309
+ }
1310
+ yield* Effect.sleep(config.runnerHealthCheckInterval)
1311
+ }
1312
+ }).pipe(
1313
+ Effect.catchAllCause((cause) => Effect.logDebug("Runner health check failed", cause)),
1314
+ Effect.forever,
1315
+ Effect.annotateLogs({
1316
+ package: "@effect/cluster",
1317
+ module: "Sharding",
1318
+ fiber: "Runner health check"
1319
+ })
1320
+ )
1193
1321
  )
1194
1322
  }
1195
1323
 
1196
- yield* Scope.addFinalizer(
1324
+ // --- Finalization ---
1325
+
1326
+ yield* Scope.addFinalizerExit(
1197
1327
  shardingScope,
1198
- Effect.withFiberRuntime((fiber) => {
1328
+ Effect.fnUntraced(function*(exit) {
1329
+ yield* Effect.logDebug("Shutting down", exit._tag === "Success" ? {} : exit.cause).pipe(
1330
+ Effect.annotateLogs({
1331
+ package: "@effect/cluster",
1332
+ module: "Sharding"
1333
+ })
1334
+ )
1335
+ const fiberId = yield* Effect.fiberId
1199
1336
  MutableRef.set(isShutdown, true)
1200
- internalInterruptors.add(fiber.id())
1201
- return Effect.void
1337
+ internalInterruptors.add(fiberId)
1338
+ if (selfRunner) {
1339
+ yield* Effect.ignore(runnerStorage.unregister(selfRunner.address))
1340
+ }
1202
1341
  })
1203
1342
  )
1204
1343
 
@@ -1213,13 +1352,18 @@ const make = Effect.gen(function*() {
1213
1352
  const sharding = Sharding.of({
1214
1353
  getRegistrationEvents,
1215
1354
  getShardId,
1355
+ hasShardId(shardId: ShardId) {
1356
+ if (isShutdown.current) return false
1357
+ return MutableHashSet.has(acquiredShards, shardId)
1358
+ },
1359
+ getSnowflake: Effect.sync(() => snowflakeGen.unsafeNext()),
1216
1360
  isShutdown: Effect.sync(() => MutableRef.get(isShutdown)),
1217
1361
  registerEntity,
1218
1362
  registerSingleton,
1219
1363
  makeClient,
1220
1364
  send: sendLocal,
1221
1365
  sendOutgoing: (message, discard) => sendOutgoing(message, discard),
1222
- notify: (message) => notifyLocal(message, false),
1366
+ notify: (message, options) => notifyLocal(message, false, options),
1223
1367
  activeEntityCount,
1224
1368
  pollStorage: storageReadLatch.open,
1225
1369
  reset
@@ -1235,8 +1379,8 @@ const make = Effect.gen(function*() {
1235
1379
  export const layer: Layer.Layer<
1236
1380
  Sharding,
1237
1381
  never,
1238
- ShardingConfig | Runners | ShardManagerClient | MessageStorage.MessageStorage | ShardStorage
1239
- > = Layer.scoped(Sharding, make).pipe(
1382
+ ShardingConfig | Runners | MessageStorage.MessageStorage | RunnerStorage | RunnerHealth.RunnerHealth
1383
+ > = Layer.scoped(Sharding)(make).pipe(
1240
1384
  Layer.provide([Snowflake.layerGenerator, EntityReaper.Default])
1241
1385
  )
1242
1386