@effect/cluster 0.50.5 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/RunnerStorage/package.json +6 -0
  2. package/SqlRunnerStorage/package.json +6 -0
  3. package/dist/cjs/ClusterError.js +2 -24
  4. package/dist/cjs/ClusterError.js.map +1 -1
  5. package/dist/cjs/ClusterMetrics.js +13 -15
  6. package/dist/cjs/ClusterMetrics.js.map +1 -1
  7. package/dist/cjs/ClusterWorkflowEngine.js +41 -81
  8. package/dist/cjs/ClusterWorkflowEngine.js.map +1 -1
  9. package/dist/cjs/Entity.js.map +1 -1
  10. package/dist/cjs/EntityAddress.js +9 -1
  11. package/dist/cjs/EntityAddress.js.map +1 -1
  12. package/dist/cjs/EntityId.js +7 -1
  13. package/dist/cjs/EntityId.js.map +1 -1
  14. package/dist/cjs/EntityProxy.js +1 -1
  15. package/dist/cjs/EntityProxy.js.map +1 -1
  16. package/dist/cjs/HttpRunner.js +69 -43
  17. package/dist/cjs/HttpRunner.js.map +1 -1
  18. package/dist/cjs/MessageStorage.js +64 -16
  19. package/dist/cjs/MessageStorage.js.map +1 -1
  20. package/dist/cjs/Runner.js +3 -3
  21. package/dist/cjs/Runner.js.map +1 -1
  22. package/dist/cjs/RunnerAddress.js +7 -0
  23. package/dist/cjs/RunnerAddress.js.map +1 -1
  24. package/dist/cjs/RunnerHealth.js +91 -32
  25. package/dist/cjs/RunnerHealth.js.map +1 -1
  26. package/dist/cjs/RunnerServer.js +38 -24
  27. package/dist/cjs/RunnerServer.js.map +1 -1
  28. package/dist/cjs/RunnerStorage.js +100 -0
  29. package/dist/cjs/RunnerStorage.js.map +1 -0
  30. package/dist/cjs/Runners.js +18 -22
  31. package/dist/cjs/Runners.js.map +1 -1
  32. package/dist/cjs/ShardId.js +17 -7
  33. package/dist/cjs/ShardId.js.map +1 -1
  34. package/dist/cjs/Sharding.js +435 -318
  35. package/dist/cjs/Sharding.js.map +1 -1
  36. package/dist/cjs/ShardingConfig.js +10 -14
  37. package/dist/cjs/ShardingConfig.js.map +1 -1
  38. package/dist/cjs/Snowflake.js +1 -1
  39. package/dist/cjs/SocketRunner.js +1 -1
  40. package/dist/cjs/SocketRunner.js.map +1 -1
  41. package/dist/cjs/SqlMessageStorage.js +22 -28
  42. package/dist/cjs/SqlMessageStorage.js.map +1 -1
  43. package/dist/cjs/SqlRunnerStorage.js +378 -0
  44. package/dist/cjs/SqlRunnerStorage.js.map +1 -0
  45. package/dist/cjs/index.js +5 -15
  46. package/dist/cjs/internal/entityManager.js +40 -9
  47. package/dist/cjs/internal/entityManager.js.map +1 -1
  48. package/dist/dts/ClusterError.d.ts +0 -22
  49. package/dist/dts/ClusterError.d.ts.map +1 -1
  50. package/dist/dts/ClusterMetrics.d.ts +4 -14
  51. package/dist/dts/ClusterMetrics.d.ts.map +1 -1
  52. package/dist/dts/ClusterWorkflowEngine.d.ts.map +1 -1
  53. package/dist/dts/Entity.d.ts +2 -2
  54. package/dist/dts/Entity.d.ts.map +1 -1
  55. package/dist/dts/EntityAddress.d.ts +11 -0
  56. package/dist/dts/EntityAddress.d.ts.map +1 -1
  57. package/dist/dts/EntityId.d.ts +5 -0
  58. package/dist/dts/EntityId.d.ts.map +1 -1
  59. package/dist/dts/EntityProxy.d.ts +5 -6
  60. package/dist/dts/EntityProxy.d.ts.map +1 -1
  61. package/dist/dts/HttpRunner.d.ts +48 -25
  62. package/dist/dts/HttpRunner.d.ts.map +1 -1
  63. package/dist/dts/MessageStorage.d.ts +13 -5
  64. package/dist/dts/MessageStorage.d.ts.map +1 -1
  65. package/dist/dts/Runner.d.ts +4 -4
  66. package/dist/dts/Runner.d.ts.map +1 -1
  67. package/dist/dts/RunnerAddress.d.ts +5 -0
  68. package/dist/dts/RunnerAddress.d.ts.map +1 -1
  69. package/dist/dts/RunnerHealth.d.ts +24 -16
  70. package/dist/dts/RunnerHealth.d.ts.map +1 -1
  71. package/dist/dts/RunnerServer.d.ts +5 -4
  72. package/dist/dts/RunnerServer.d.ts.map +1 -1
  73. package/dist/dts/{ShardStorage.d.ts → RunnerStorage.d.ts} +41 -54
  74. package/dist/dts/RunnerStorage.d.ts.map +1 -0
  75. package/dist/dts/Runners.d.ts +15 -11
  76. package/dist/dts/Runners.d.ts.map +1 -1
  77. package/dist/dts/ShardId.d.ts +1 -1
  78. package/dist/dts/ShardId.d.ts.map +1 -1
  79. package/dist/dts/Sharding.d.ts +20 -10
  80. package/dist/dts/Sharding.d.ts.map +1 -1
  81. package/dist/dts/ShardingConfig.d.ts +40 -14
  82. package/dist/dts/ShardingConfig.d.ts.map +1 -1
  83. package/dist/dts/SocketRunner.d.ts +4 -3
  84. package/dist/dts/SocketRunner.d.ts.map +1 -1
  85. package/dist/dts/SqlMessageStorage.d.ts +2 -3
  86. package/dist/dts/SqlMessageStorage.d.ts.map +1 -1
  87. package/dist/dts/SqlRunnerStorage.d.ts +40 -0
  88. package/dist/dts/SqlRunnerStorage.d.ts.map +1 -0
  89. package/dist/dts/index.d.ts +4 -24
  90. package/dist/dts/index.d.ts.map +1 -1
  91. package/dist/esm/ClusterError.js +0 -21
  92. package/dist/esm/ClusterError.js.map +1 -1
  93. package/dist/esm/ClusterMetrics.js +12 -14
  94. package/dist/esm/ClusterMetrics.js.map +1 -1
  95. package/dist/esm/ClusterWorkflowEngine.js +41 -81
  96. package/dist/esm/ClusterWorkflowEngine.js.map +1 -1
  97. package/dist/esm/Entity.js.map +1 -1
  98. package/dist/esm/EntityAddress.js +7 -0
  99. package/dist/esm/EntityAddress.js.map +1 -1
  100. package/dist/esm/EntityId.js +5 -0
  101. package/dist/esm/EntityId.js.map +1 -1
  102. package/dist/esm/EntityProxy.js +2 -2
  103. package/dist/esm/EntityProxy.js.map +1 -1
  104. package/dist/esm/HttpRunner.js +62 -39
  105. package/dist/esm/HttpRunner.js.map +1 -1
  106. package/dist/esm/MessageStorage.js +65 -17
  107. package/dist/esm/MessageStorage.js.map +1 -1
  108. package/dist/esm/Runner.js +3 -3
  109. package/dist/esm/Runner.js.map +1 -1
  110. package/dist/esm/RunnerAddress.js +7 -0
  111. package/dist/esm/RunnerAddress.js.map +1 -1
  112. package/dist/esm/RunnerHealth.js +88 -30
  113. package/dist/esm/RunnerHealth.js.map +1 -1
  114. package/dist/esm/RunnerServer.js +38 -24
  115. package/dist/esm/RunnerServer.js.map +1 -1
  116. package/dist/esm/RunnerStorage.js +90 -0
  117. package/dist/esm/RunnerStorage.js.map +1 -0
  118. package/dist/esm/Runners.js +19 -23
  119. package/dist/esm/Runners.js.map +1 -1
  120. package/dist/esm/ShardId.js +16 -6
  121. package/dist/esm/ShardId.js.map +1 -1
  122. package/dist/esm/Sharding.js +438 -321
  123. package/dist/esm/Sharding.js.map +1 -1
  124. package/dist/esm/ShardingConfig.js +10 -14
  125. package/dist/esm/ShardingConfig.js.map +1 -1
  126. package/dist/esm/Snowflake.js +1 -1
  127. package/dist/esm/SocketRunner.js +1 -1
  128. package/dist/esm/SocketRunner.js.map +1 -1
  129. package/dist/esm/SqlMessageStorage.js +22 -28
  130. package/dist/esm/SqlMessageStorage.js.map +1 -1
  131. package/dist/esm/SqlRunnerStorage.js +369 -0
  132. package/dist/esm/SqlRunnerStorage.js.map +1 -0
  133. package/dist/esm/index.js +4 -24
  134. package/dist/esm/index.js.map +1 -1
  135. package/dist/esm/internal/entityManager.js +40 -9
  136. package/dist/esm/internal/entityManager.js.map +1 -1
  137. package/package.json +20 -60
  138. package/src/ClusterError.ts +0 -24
  139. package/src/ClusterMetrics.ts +12 -16
  140. package/src/ClusterWorkflowEngine.ts +38 -78
  141. package/src/Entity.ts +2 -7
  142. package/src/EntityAddress.ts +10 -0
  143. package/src/EntityId.ts +6 -0
  144. package/src/EntityProxy.ts +10 -10
  145. package/src/HttpRunner.ts +132 -67
  146. package/src/MessageStorage.ts +89 -24
  147. package/src/Runner.ts +4 -4
  148. package/src/RunnerAddress.ts +8 -0
  149. package/src/RunnerHealth.ts +119 -56
  150. package/src/RunnerServer.ts +64 -47
  151. package/src/RunnerStorage.ts +218 -0
  152. package/src/Runners.ts +32 -45
  153. package/src/ShardId.ts +14 -3
  154. package/src/Sharding.ts +548 -413
  155. package/src/ShardingConfig.ts +39 -31
  156. package/src/Snowflake.ts +1 -1
  157. package/src/SocketRunner.ts +6 -4
  158. package/src/SqlMessageStorage.ts +28 -30
  159. package/src/SqlRunnerStorage.ts +541 -0
  160. package/src/index.ts +4 -29
  161. package/src/internal/entityManager.ts +44 -10
  162. package/HttpCommon/package.json +0 -6
  163. package/HttpShardManager/package.json +0 -6
  164. package/ShardManager/package.json +0 -6
  165. package/ShardStorage/package.json +0 -6
  166. package/SocketShardManager/package.json +0 -6
  167. package/SqlShardStorage/package.json +0 -6
  168. package/SynchronizedClock/package.json +0 -6
  169. package/dist/cjs/HttpCommon.js +0 -48
  170. package/dist/cjs/HttpCommon.js.map +0 -1
  171. package/dist/cjs/HttpShardManager.js +0 -139
  172. package/dist/cjs/HttpShardManager.js.map +0 -1
  173. package/dist/cjs/ShardManager.js +0 -549
  174. package/dist/cjs/ShardManager.js.map +0 -1
  175. package/dist/cjs/ShardStorage.js +0 -151
  176. package/dist/cjs/ShardStorage.js.map +0 -1
  177. package/dist/cjs/SocketShardManager.js +0 -32
  178. package/dist/cjs/SocketShardManager.js.map +0 -1
  179. package/dist/cjs/SqlShardStorage.js +0 -253
  180. package/dist/cjs/SqlShardStorage.js.map +0 -1
  181. package/dist/cjs/SynchronizedClock.js +0 -65
  182. package/dist/cjs/SynchronizedClock.js.map +0 -1
  183. package/dist/cjs/internal/shardManager.js +0 -353
  184. package/dist/cjs/internal/shardManager.js.map +0 -1
  185. package/dist/dts/HttpCommon.d.ts +0 -25
  186. package/dist/dts/HttpCommon.d.ts.map +0 -1
  187. package/dist/dts/HttpShardManager.d.ts +0 -119
  188. package/dist/dts/HttpShardManager.d.ts.map +0 -1
  189. package/dist/dts/ShardManager.d.ts +0 -459
  190. package/dist/dts/ShardManager.d.ts.map +0 -1
  191. package/dist/dts/ShardStorage.d.ts.map +0 -1
  192. package/dist/dts/SocketShardManager.d.ts +0 -17
  193. package/dist/dts/SocketShardManager.d.ts.map +0 -1
  194. package/dist/dts/SqlShardStorage.d.ts +0 -38
  195. package/dist/dts/SqlShardStorage.d.ts.map +0 -1
  196. package/dist/dts/SynchronizedClock.d.ts +0 -19
  197. package/dist/dts/SynchronizedClock.d.ts.map +0 -1
  198. package/dist/dts/internal/shardManager.d.ts +0 -2
  199. package/dist/dts/internal/shardManager.d.ts.map +0 -1
  200. package/dist/esm/HttpCommon.js +0 -38
  201. package/dist/esm/HttpCommon.js.map +0 -1
  202. package/dist/esm/HttpShardManager.js +0 -128
  203. package/dist/esm/HttpShardManager.js.map +0 -1
  204. package/dist/esm/ShardManager.js +0 -535
  205. package/dist/esm/ShardManager.js.map +0 -1
  206. package/dist/esm/ShardStorage.js +0 -141
  207. package/dist/esm/ShardStorage.js.map +0 -1
  208. package/dist/esm/SocketShardManager.js +0 -24
  209. package/dist/esm/SocketShardManager.js.map +0 -1
  210. package/dist/esm/SqlShardStorage.js +0 -244
  211. package/dist/esm/SqlShardStorage.js.map +0 -1
  212. package/dist/esm/SynchronizedClock.js +0 -57
  213. package/dist/esm/SynchronizedClock.js.map +0 -1
  214. package/dist/esm/internal/shardManager.js +0 -342
  215. package/dist/esm/internal/shardManager.js.map +0 -1
  216. package/src/HttpCommon.ts +0 -73
  217. package/src/HttpShardManager.ts +0 -273
  218. package/src/ShardManager.ts +0 -823
  219. package/src/ShardStorage.ts +0 -297
  220. package/src/SocketShardManager.ts +0 -48
  221. package/src/SqlShardStorage.ts +0 -329
  222. package/src/SynchronizedClock.ts +0 -82
  223. package/src/internal/shardManager.ts +0 -412
@@ -3,33 +3,31 @@ import { RequestId } from "@effect/rpc/RpcMessage";
3
3
  import * as Arr from "effect/Array";
4
4
  import * as Cause from "effect/Cause";
5
5
  import * as Context from "effect/Context";
6
- import * as Deferred from "effect/Deferred";
7
6
  import * as Effect from "effect/Effect";
7
+ import * as Either from "effect/Either";
8
8
  import * as Equal from "effect/Equal";
9
- import * as Exit from "effect/Exit";
10
9
  import * as Fiber from "effect/Fiber";
11
- import * as FiberHandle from "effect/FiberHandle";
12
10
  import * as FiberMap from "effect/FiberMap";
13
11
  import * as FiberRef from "effect/FiberRef";
12
+ import * as FiberSet from "effect/FiberSet";
14
13
  import { constant } from "effect/Function";
15
14
  import * as HashMap from "effect/HashMap";
16
- import * as Iterable from "effect/Iterable";
15
+ import * as HashRing from "effect/HashRing";
17
16
  import * as Layer from "effect/Layer";
18
17
  import * as MutableHashMap from "effect/MutableHashMap";
19
18
  import * as MutableHashSet from "effect/MutableHashSet";
20
19
  import * as MutableRef from "effect/MutableRef";
21
20
  import * as Option from "effect/Option";
22
- import * as Predicate from "effect/Predicate";
23
21
  import * as PubSub from "effect/PubSub";
24
22
  import * as Schedule from "effect/Schedule";
25
23
  import * as Scope from "effect/Scope";
26
24
  import * as Stream from "effect/Stream";
27
- import { AlreadyProcessingMessage, EntityNotAssignedToRunner, EntityNotManagedByRunner, RunnerUnavailable } from "./ClusterError.js";
28
- import * as ClusterError from "./ClusterError.js";
25
+ import { AlreadyProcessingMessage, EntityNotAssignedToRunner } from "./ClusterError.js";
26
+ import * as ClusterMetrics from "./ClusterMetrics.js";
29
27
  import { Persisted, Uninterruptible } from "./ClusterSchema.js";
30
28
  import * as ClusterSchema from "./ClusterSchema.js";
31
- import { EntityAddress } from "./EntityAddress.js";
32
- import { EntityId } from "./EntityId.js";
29
+ import { make as makeEntityAddress } from "./EntityAddress.js";
30
+ import { make as makeEntityId } from "./EntityId.js";
33
31
  import * as Envelope from "./Envelope.js";
34
32
  import * as EntityManager from "./internal/entityManager.js";
35
33
  import { EntityReaper } from "./internal/entityReaper.js";
@@ -39,12 +37,13 @@ import { ResourceMap } from "./internal/resourceMap.js";
39
37
  import * as Message from "./Message.js";
40
38
  import * as MessageStorage from "./MessageStorage.js";
41
39
  import * as Reply from "./Reply.js";
40
+ import { Runner } from "./Runner.js";
41
+ import * as RunnerHealth from "./RunnerHealth.js";
42
42
  import { Runners } from "./Runners.js";
43
- import { ShardId } from "./ShardId.js";
43
+ import { RunnerStorage } from "./RunnerStorage.js";
44
+ import { make as makeShardId } from "./ShardId.js";
44
45
  import { ShardingConfig } from "./ShardingConfig.js";
45
46
  import { EntityRegistered, SingletonRegistered } from "./ShardingRegistrationEvent.js";
46
- import { ShardManagerClient } from "./ShardManager.js";
47
- import { ShardStorage } from "./ShardStorage.js";
48
47
  import { SingletonAddress } from "./SingletonAddress.js";
49
48
  import * as Snowflake from "./Snowflake.js";
50
49
  /**
@@ -54,14 +53,16 @@ import * as Snowflake from "./Snowflake.js";
54
53
  export class Sharding extends /*#__PURE__*/Context.Tag("@effect/cluster/Sharding")() {}
55
54
  const make = /*#__PURE__*/Effect.gen(function* () {
56
55
  const config = yield* ShardingConfig;
57
- const runners = yield* Runners;
58
- const shardManager = yield* ShardManagerClient;
56
+ const runnersService = yield* Runners;
57
+ const runnerHealth = yield* RunnerHealth.RunnerHealth;
59
58
  const snowflakeGen = yield* Snowflake.Generator;
60
59
  const shardingScope = yield* Effect.scope;
61
60
  const isShutdown = MutableRef.make(false);
61
+ const fiberSet = yield* FiberSet.make();
62
+ const runFork = yield* FiberSet.runtime(fiberSet)().pipe(Effect.mapInputContext(context => Context.omit(Scope.Scope)(context)));
62
63
  const storage = yield* MessageStorage.MessageStorage;
63
64
  const storageEnabled = storage !== MessageStorage.noop;
64
- const shardStorage = yield* ShardStorage;
65
+ const runnerStorage = yield* RunnerStorage;
65
66
  const entityManagers = new Map();
66
67
  const shardAssignments = MutableHashMap.empty();
67
68
  const selfShards = MutableHashSet.empty();
@@ -73,24 +74,24 @@ const make = /*#__PURE__*/Effect.gen(function* () {
73
74
  const isLocalRunner = address => Option.isSome(config.runnerAddress) && Equal.equals(address, config.runnerAddress.value);
74
75
  function getShardId(entityId, group) {
75
76
  const id = Math.abs(hashString(entityId) % config.shardsPerGroup) + 1;
76
- return ShardId.make({
77
- group,
78
- id
79
- }, {
80
- disableValidation: true
81
- });
77
+ return makeShardId(group, id);
82
78
  }
83
79
  function isEntityOnLocalShards(address) {
84
80
  return MutableHashSet.has(acquiredShards, address.shardId);
85
81
  }
86
82
  // --- Shard acquisition ---
83
+ //
84
+ // Responsible for acquiring and releasing shards from RunnerStorage.
85
+ //
86
+ // This should be shutdown last, when all entities have been shutdown, to
87
+ // allow them to move to another runner.
88
+ const releasingShards = MutableHashSet.empty();
87
89
  if (Option.isSome(config.runnerAddress)) {
88
90
  const selfAddress = config.runnerAddress.value;
89
91
  yield* Scope.addFinalizerExit(shardingScope, () => {
90
92
  // the locks expire over time, so if this fails we ignore it
91
- return Effect.ignore(shardStorage.releaseAll(selfAddress));
93
+ return Effect.ignore(runnerStorage.releaseAll(selfAddress));
92
94
  });
93
- const releasingShards = MutableHashSet.empty();
94
95
  yield* Effect.gen(function* () {
95
96
  activeShardsLatch.unsafeOpen();
96
97
  while (true) {
@@ -102,140 +103,174 @@ const make = /*#__PURE__*/Effect.gen(function* () {
102
103
  MutableHashSet.remove(acquiredShards, shardId);
103
104
  MutableHashSet.add(releasingShards, shardId);
104
105
  }
106
+ if (MutableHashSet.size(releasingShards) > 0) {
107
+ yield* Effect.forkIn(syncSingletons, shardingScope);
108
+ yield* releaseShards;
109
+ }
105
110
  // if a shard has been assigned to this runner, we acquire it
106
111
  const unacquiredShards = MutableHashSet.empty();
107
112
  for (const shardId of selfShards) {
108
113
  if (MutableHashSet.has(acquiredShards, shardId) || MutableHashSet.has(releasingShards, shardId)) continue;
109
114
  MutableHashSet.add(unacquiredShards, shardId);
110
115
  }
111
- if (MutableHashSet.size(releasingShards) > 0) {
112
- yield* Effect.forkIn(syncSingletons, shardingScope);
113
- yield* releaseShards;
114
- }
115
116
  if (MutableHashSet.size(unacquiredShards) === 0) {
116
117
  continue;
117
118
  }
118
- const acquired = yield* shardStorage.acquire(selfAddress, unacquiredShards);
119
+ const acquired = yield* runnerStorage.acquire(selfAddress, unacquiredShards);
119
120
  yield* Effect.ignore(storage.resetShards(acquired));
120
121
  for (const shardId of acquired) {
122
+ if (MutableHashSet.has(releasingShards, shardId) || !MutableHashSet.has(selfShards, shardId)) {
123
+ continue;
124
+ }
121
125
  MutableHashSet.add(acquiredShards, shardId);
122
126
  }
123
127
  if (acquired.length > 0) {
124
128
  yield* storageReadLatch.open;
125
129
  yield* Effect.forkIn(syncSingletons, shardingScope);
130
+ // update metrics
131
+ ClusterMetrics.shards.unsafeUpdate(BigInt(MutableHashSet.size(acquiredShards)), []);
126
132
  }
127
133
  yield* Effect.sleep(1000);
128
134
  activeShardsLatch.unsafeOpen();
129
135
  }
130
- }).pipe(Effect.catchAllCause(cause => Effect.logWarning("Could not acquire/release shards", cause)), Effect.forever, Effect.annotateLogs({
136
+ }).pipe(Effect.catchAllCause(cause => Effect.logWarning("Could not acquire/release shards", cause)), Effect.repeat(Schedule.spaced(config.entityMessagePollInterval)), Effect.annotateLogs({
131
137
  package: "@effect/cluster",
132
138
  module: "Sharding",
133
139
  fiber: "Shard acquisition loop",
134
140
  runner: selfAddress
135
- }), Effect.interruptible, Effect.forkIn(shardingScope));
136
- // refresh the shard locks every 4s
137
- yield* Effect.suspend(() => shardStorage.refresh(selfAddress, [...acquiredShards, ...releasingShards])).pipe(Effect.flatMap(acquired => {
141
+ }), Effect.forkIn(shardingScope));
142
+ // refresh the shard locks every `shardLockRefreshInterval`
143
+ yield* Effect.suspend(() => runnerStorage.refresh(selfAddress, [...acquiredShards, ...releasingShards])).pipe(Effect.flatMap(acquired => {
138
144
  for (const shardId of acquiredShards) {
139
- if (!acquired.some(_ => _[Equal.symbol](shardId))) {
145
+ if (!acquired.includes(shardId)) {
146
+ MutableHashSet.remove(acquiredShards, shardId);
147
+ MutableHashSet.add(releasingShards, shardId);
148
+ }
149
+ }
150
+ for (let i = 0; i < acquired.length; i++) {
151
+ const shardId = acquired[i];
152
+ if (!MutableHashSet.has(selfShards, shardId)) {
140
153
  MutableHashSet.remove(acquiredShards, shardId);
141
154
  MutableHashSet.add(releasingShards, shardId);
142
155
  }
143
156
  }
144
- return MutableHashSet.size(releasingShards) > 0 ? Effect.andThen(Effect.forkIn(syncSingletons, shardingScope), releaseShards) : Effect.void;
157
+ return MutableHashSet.size(releasingShards) > 0 ? activeShardsLatch.open : Effect.void;
145
158
  }), Effect.retry({
146
159
  times: 5,
147
160
  schedule: Schedule.spaced(50)
148
- }), Effect.catchAllCause(cause => Effect.logError("Could not refresh shard locks", cause).pipe(Effect.andThen(clearSelfShards))), Effect.schedule(Schedule.fixed(4000)), Effect.interruptible, Effect.forkIn(shardingScope));
161
+ }), Effect.catchAllCause(cause => Effect.logError("Could not refresh shard locks", cause).pipe(Effect.andThen(clearSelfShards))), Effect.repeat(Schedule.fixed(config.shardLockRefreshInterval)), Effect.forever, Effect.forkIn(shardingScope));
149
162
  const releaseShardsLock = Effect.unsafeMakeSemaphore(1).withPermits(1);
150
163
  const releaseShards = releaseShardsLock(Effect.suspend(() => Effect.forEach(releasingShards, shardId => Effect.forEach(entityManagers.values(), state => state.manager.interruptShard(shardId), {
151
164
  concurrency: "unbounded",
152
165
  discard: true
153
- }).pipe(Effect.andThen(shardStorage.release(selfAddress, shardId)), Effect.annotateLogs({
166
+ }).pipe(Effect.andThen(runnerStorage.release(selfAddress, shardId)), Effect.annotateLogs({
154
167
  runner: selfAddress
155
- }), Effect.andThen(() => {
168
+ }), Effect.flatMap(() => {
156
169
  MutableHashSet.remove(releasingShards, shardId);
170
+ return storage.unregisterShardReplyHandlers(shardId);
157
171
  })), {
158
172
  concurrency: "unbounded",
159
173
  discard: true
160
- })).pipe(Effect.andThen(activeShardsLatch.open)));
174
+ })));
175
+ // open the shard latch every poll interval
176
+ yield* activeShardsLatch.open.pipe(Effect.delay(config.entityMessagePollInterval), Effect.forever, Effect.forkIn(shardingScope));
161
177
  }
162
- const clearSelfShards = Effect.suspend(() => {
178
+ const clearSelfShards = Effect.sync(() => {
163
179
  MutableHashSet.clear(selfShards);
164
- return activeShardsLatch.open;
180
+ activeShardsLatch.unsafeOpen();
165
181
  });
166
- // --- Singletons ---
167
- const singletons = new Map();
168
- const singletonFibers = yield* FiberMap.make();
169
- const withSingletonLock = Effect.unsafeMakeSemaphore(1).withPermits(1);
170
- const registerSingleton = Effect.fnUntraced(function* (name, run, options) {
171
- const shardGroup = options?.shardGroup ?? "default";
172
- const address = new SingletonAddress({
173
- shardId: getShardId(EntityId.make(name), shardGroup),
174
- name
175
- });
176
- let map = singletons.get(address.shardId);
177
- if (!map) {
178
- map = MutableHashMap.empty();
179
- singletons.set(address.shardId, map);
180
- }
181
- if (MutableHashMap.has(map, address)) {
182
- return yield* Effect.dieMessage(`Singleton '${name}' is already registered`);
183
- }
184
- const context = yield* Effect.context();
185
- const wrappedRun = run.pipe(Effect.locally(FiberRef.currentLogAnnotations, HashMap.empty()), Effect.andThen(Effect.never), Effect.scoped, Effect.provide(context), Effect.orDie, Effect.interruptible);
186
- MutableHashMap.set(map, address, wrappedRun);
187
- yield* PubSub.publish(events, SingletonRegistered({
188
- address
189
- }));
190
- // start if we are on the right shard
191
- if (MutableHashSet.has(acquiredShards, address.shardId)) {
192
- yield* Effect.logDebug("Starting singleton", address);
193
- yield* FiberMap.run(singletonFibers, address, wrappedRun);
194
- }
195
- }, withSingletonLock);
196
- const syncSingletons = withSingletonLock(Effect.gen(function* () {
197
- for (const [shardId, map] of singletons) {
198
- for (const [address, run] of map) {
199
- const running = FiberMap.unsafeHas(singletonFibers, address);
200
- const shouldBeRunning = MutableHashSet.has(acquiredShards, shardId);
201
- if (running && !shouldBeRunning) {
202
- yield* Effect.logDebug("Stopping singleton", address);
203
- internalInterruptors.add(yield* Effect.fiberId);
204
- yield* FiberMap.remove(singletonFibers, address);
205
- } else if (!running && shouldBeRunning) {
206
- yield* Effect.logDebug("Starting singleton", address);
207
- yield* FiberMap.run(singletonFibers, address, run);
208
- }
209
- }
210
- }
211
- }));
212
182
  // --- Storage inbox ---
183
+ //
184
+ // Responsible for reading unprocessed messages from storage and sending them
185
+ // to the appropriate entity manager.
186
+ //
187
+ // This should be shutdown before shard acquisition, to ensure no messages are
188
+ // being processed before the shards are released.
189
+ //
190
+ // It should also be shutdown after the entity managers, to ensure interrupt
191
+ // & ack envelopes can still be processed.
213
192
  const storageReadLatch = yield* Effect.makeLatch(true);
214
193
  const openStorageReadLatch = constant(storageReadLatch.open);
215
194
  const storageReadLock = Effect.unsafeMakeSemaphore(1);
216
195
  const withStorageReadLock = storageReadLock.withPermits(1);
217
- let storageAlreadyProcessed = _message => true;
218
- // keep track of the last sent request ids to avoid duplicates
219
- // we only keep the last 30 sets to avoid memory leaks
220
- const sentRequestIds = new Set();
221
- const sentRequestIdSets = new Set();
222
196
  if (storageEnabled && Option.isSome(config.runnerAddress)) {
223
197
  const selfAddress = config.runnerAddress.value;
224
198
  yield* Effect.gen(function* () {
225
199
  yield* Effect.logDebug("Starting");
226
200
  yield* Effect.addFinalizer(() => Effect.logDebug("Shutting down"));
227
- sentRequestIds.clear();
228
- sentRequestIdSets.clear();
229
- storageAlreadyProcessed = message => {
230
- if (!sentRequestIds.has(message.envelope.requestId)) {
231
- return false;
232
- }
233
- const state = entityManagers.get(message.envelope.address.entityType);
234
- if (!state) return true;
235
- return !state.manager.isProcessingFor(message, {
236
- excludeReplies: true
237
- });
238
- };
201
+ let index = 0;
202
+ let messages = [];
203
+ const removableNotifications = new Set();
204
+ const resetAddresses = MutableHashSet.empty();
205
+ const processMessages = Effect.whileLoop({
206
+ while: () => index < messages.length,
207
+ step: () => index++,
208
+ body: () => send
209
+ });
210
+ const send = Effect.catchAllCause(Effect.suspend(() => {
211
+ const message = messages[index];
212
+ // if we are shutting down, we don't accept new requests
213
+ if (message._tag === "IncomingRequest" && isShutdown.current) {
214
+ if (isShutdown.current) {
215
+ return Effect.void;
216
+ }
217
+ }
218
+ const address = message.envelope.address;
219
+ if (!MutableHashSet.has(acquiredShards, address.shardId)) {
220
+ return Effect.void;
221
+ }
222
+ const state = entityManagers.get(address.entityType);
223
+ if (!state) {
224
+ // reset address in the case that the entity is slow to register
225
+ MutableHashSet.add(resetAddresses, address);
226
+ return Effect.void;
227
+ } else if (state.closed) {
228
+ return Effect.void;
229
+ }
230
+ const isProcessing = state.manager.isProcessingFor(message);
231
+ // If the message might affect a currently processing request, we
232
+ // send it to the entity manager to be processed.
233
+ if (message._tag === "IncomingEnvelope" && isProcessing) {
234
+ return state.manager.send(message);
235
+ } else if (isProcessing) {
236
+ return Effect.void;
237
+ } else if (message._tag === "IncomingRequest" && pendingNotifications.has(message.envelope.requestId)) {
238
+ const entry = pendingNotifications.get(message.envelope.requestId);
239
+ pendingNotifications.delete(message.envelope.requestId);
240
+ removableNotifications.delete(entry);
241
+ entry.resume(Effect.void);
242
+ }
243
+ // If the entity was resuming in another fiber, we add the message
244
+ // id to the unprocessed set.
245
+ const resumptionState = MutableHashMap.get(entityResumptionState, address);
246
+ if (Option.isSome(resumptionState)) {
247
+ resumptionState.value.unprocessed.add(message.envelope.requestId);
248
+ if (message.envelope._tag === "Interrupt") {
249
+ resumptionState.value.interrupts.set(message.envelope.requestId, message);
250
+ }
251
+ return Effect.void;
252
+ }
253
+ return state.manager.send(message);
254
+ }), cause => {
255
+ const message = messages[index];
256
+ const error = Cause.failureOrCause(cause);
257
+ // if we get a defect, then update storage
258
+ if (Either.isRight(error)) {
259
+ if (Cause.isInterrupted(cause)) {
260
+ return Effect.void;
261
+ }
262
+ return Effect.ignore(storage.saveReply(Reply.ReplyWithContext.fromDefect({
263
+ id: snowflakeGen.unsafeNext(),
264
+ requestId: message.envelope.requestId,
265
+ defect: Cause.squash(cause)
266
+ })));
267
+ }
268
+ if (error.left._tag === "MailboxFull") {
269
+ // MailboxFull can only happen for requests, so this cast is safe
270
+ return resumeEntityFromStorage(message);
271
+ }
272
+ return Effect.void;
273
+ });
239
274
  while (true) {
240
275
  // wait for the next poll interval, or if we get notified of a change
241
276
  yield* storageReadLatch.await;
@@ -246,98 +281,45 @@ const make = /*#__PURE__*/Effect.gen(function* () {
246
281
  // more items are added to the unprocessed set while the semaphore is
247
282
  // acquired.
248
283
  yield* storageReadLock.take(1);
249
- const messages = yield* storage.unprocessedMessages(acquiredShards);
250
- const currentSentRequestIds = new Set();
251
- sentRequestIdSets.add(currentSentRequestIds);
252
- const send = Effect.catchAllCause(Effect.suspend(() => {
253
- const message = messages[index];
254
- if (message._tag === "IncomingRequest") {
255
- if (sentRequestIds.has(message.envelope.requestId)) {
256
- return Effect.void;
257
- }
258
- sentRequestIds.add(message.envelope.requestId);
259
- currentSentRequestIds.add(message.envelope.requestId);
260
- }
261
- const address = message.envelope.address;
262
- if (!MutableHashSet.has(acquiredShards, address.shardId)) {
263
- return Effect.void;
264
- }
265
- const state = entityManagers.get(address.entityType);
266
- if (!state) {
267
- if (message._tag === "IncomingRequest") {
268
- return Effect.orDie(message.respond(Reply.ReplyWithContext.fromDefect({
269
- id: snowflakeGen.unsafeNext(),
270
- requestId: message.envelope.requestId,
271
- defect: new EntityNotManagedByRunner({
272
- address
273
- })
274
- })));
275
- }
276
- return Effect.void;
277
- }
278
- const isProcessing = state.manager.isProcessingFor(message);
279
- // If the message might affect a currently processing request, we
280
- // send it to the entity manager to be processed.
281
- if (message._tag === "IncomingEnvelope" && isProcessing) {
282
- return state.manager.send(message);
283
- } else if (isProcessing) {
284
- return Effect.void;
285
- }
286
- // If the entity was resuming in another fiber, we add the message
287
- // id to the unprocessed set.
288
- const resumptionState = MutableHashMap.get(entityResumptionState, address);
289
- if (Option.isSome(resumptionState)) {
290
- resumptionState.value.unprocessed.add(message.envelope.requestId);
291
- if (message.envelope._tag === "Interrupt") {
292
- resumptionState.value.interrupts.set(message.envelope.requestId, message);
293
- }
294
- return Effect.void;
295
- }
296
- return state.manager.send(message);
297
- }), cause => {
298
- const message = messages[index];
299
- const error = Cause.failureOption(cause);
300
- // if we get a defect, then update storage
301
- if (Option.isNone(error)) {
302
- if (Cause.isInterrupted(cause)) {
303
- return Effect.void;
304
- }
305
- return storage.saveReply(Reply.ReplyWithContext.fromDefect({
306
- id: snowflakeGen.unsafeNext(),
307
- requestId: message.envelope.requestId,
308
- defect: Cause.squash(cause)
284
+ entityManagers.forEach(state => state.manager.clearProcessed());
285
+ if (pendingNotifications.size > 0) {
286
+ pendingNotifications.forEach(entry => removableNotifications.add(entry));
287
+ }
288
+ messages = yield* storage.unprocessedMessages(acquiredShards);
289
+ index = 0;
290
+ yield* processMessages;
291
+ if (removableNotifications.size > 0) {
292
+ removableNotifications.forEach(({
293
+ message,
294
+ resume
295
+ }) => {
296
+ pendingNotifications.delete(message.envelope.requestId);
297
+ resume(Effect.fail(new EntityNotAssignedToRunner({
298
+ address: message.envelope.address
299
+ })));
300
+ });
301
+ removableNotifications.clear();
302
+ }
303
+ if (MutableHashSet.size(resetAddresses) > 0) {
304
+ for (const address of resetAddresses) {
305
+ yield* Effect.logWarning("Could not find entity manager for address, retrying").pipe(Effect.annotateLogs({
306
+ address
309
307
  }));
308
+ yield* Effect.forkIn(storage.resetAddress(address), shardingScope);
310
309
  }
311
- if (error.value._tag === "MailboxFull") {
312
- // MailboxFull can only happen for requests, so this cast is safe
313
- return resumeEntityFromStorage(message);
314
- }
315
- return Effect.void;
316
- });
317
- let index = 0;
318
- yield* Effect.whileLoop({
319
- while: () => index < messages.length,
320
- step: () => index++,
321
- body: constant(send)
322
- });
310
+ MutableHashSet.clear(resetAddresses);
311
+ }
323
312
  // let the resuming entities check if they are done
324
313
  yield* storageReadLock.release(1);
325
- while (sentRequestIdSets.size > 30) {
326
- const oldest = Iterable.unsafeHead(sentRequestIdSets);
327
- sentRequestIdSets.delete(oldest);
328
- for (const id of oldest) {
329
- sentRequestIds.delete(id);
330
- }
331
- }
332
314
  }
333
- }).pipe(Effect.scoped, Effect.ensuring(storageReadLock.releaseAll), Effect.catchAllCause(cause => Effect.logWarning("Could not read messages from storage", cause)), Effect.repeat(Schedule.spaced(config.entityMessagePollInterval)), Effect.annotateLogs({
315
+ }).pipe(Effect.scoped, Effect.ensuring(storageReadLock.releaseAll), Effect.catchAllCause(cause => Effect.logWarning("Could not read messages from storage", cause)), Effect.forever, Effect.annotateLogs({
334
316
  package: "@effect/cluster",
335
317
  module: "Sharding",
336
318
  fiber: "Storage read loop",
337
319
  runner: selfAddress
338
- }), Effect.interruptible, Effect.forkIn(shardingScope));
320
+ }), Effect.withUnhandledErrorLogLevel(Option.none()), Effect.forkIn(shardingScope));
339
321
  // open the storage latch every poll interval
340
- yield* storageReadLatch.open.pipe(Effect.delay(config.entityMessagePollInterval), Effect.forever, Effect.interruptible, Effect.forkIn(shardingScope));
322
+ yield* storageReadLatch.open.pipe(Effect.delay(config.entityMessagePollInterval), Effect.forever, Effect.forkIn(shardingScope));
341
323
  // Resume unprocessed messages for entities that reached a full mailbox.
342
324
  const entityResumptionState = MutableHashMap.empty();
343
325
  const resumeEntityFromStorage = lastReceivedMessage => {
@@ -416,16 +398,16 @@ const make = /*#__PURE__*/Effect.gen(function* () {
416
398
  }, Effect.retry({
417
399
  while: e => e._tag === "PersistenceError",
418
400
  schedule: Schedule.spaced(config.entityMessagePollInterval)
419
- }), Effect.catchAllCause(cause => Effect.logError("Could not resume unprocessed messages", cause)), (effect, address) => Effect.annotateLogs(effect, {
401
+ }), Effect.catchAllCause(cause => Effect.logDebug("Could not resume unprocessed messages", cause)), (effect, address) => Effect.annotateLogs(effect, {
420
402
  package: "@effect/cluster",
421
403
  module: "Sharding",
422
404
  fiber: "Resuming unprocessed messages",
423
405
  runner: selfAddress,
424
406
  entity: address
425
- }), (effect, address) => Effect.ensuring(effect, Effect.sync(() => MutableHashMap.remove(entityResumptionState, address))), Effect.interruptible, Effect.forkIn(shardingScope));
407
+ }), (effect, address) => Effect.ensuring(effect, Effect.sync(() => MutableHashMap.remove(entityResumptionState, address))), Effect.withUnhandledErrorLogLevel(Option.none()), Effect.forkIn(shardingScope));
426
408
  }
427
409
  // --- Sending messages ---
428
- const sendLocal = message => Effect.suspend(() => {
410
+ const sendLocal = message => Effect.suspend(function loop() {
429
411
  const address = message.envelope.address;
430
412
  if (!isEntityOnLocalShards(address)) {
431
413
  return Effect.fail(new EntityNotAssignedToRunner({
@@ -434,57 +416,84 @@ const make = /*#__PURE__*/Effect.gen(function* () {
434
416
  }
435
417
  const state = entityManagers.get(address.entityType);
436
418
  if (!state) {
437
- return Effect.fail(new EntityNotManagedByRunner({
419
+ return Effect.flatMap(waitForEntityManager(address.entityType), loop);
420
+ } else if (state.closed || isShutdown.current && message._tag === "IncomingRequest") {
421
+ // if we are shutting down, we don't accept new requests
422
+ return Effect.fail(new EntityNotAssignedToRunner({
438
423
  address
439
424
  }));
440
425
  }
441
- return message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope" ? state.manager.send(message) : runners.sendLocal({
426
+ return message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope" ? state.manager.send(message) : runnersService.sendLocal({
442
427
  message,
443
428
  send: state.manager.sendLocal,
444
429
  simulateRemoteSerialization: config.simulateRemoteSerialization
445
430
  });
446
431
  });
447
- const notifyLocal = (message, discard) => Effect.suspend(() => {
432
+ const pendingNotifications = new Map();
433
+ const notifyLocal = (message, discard, options) => Effect.suspend(function loop() {
448
434
  const address = message.envelope.address;
449
- if (!entityManagers.has(address.entityType)) {
450
- return Effect.fail(new EntityNotManagedByRunner({
435
+ const state = entityManagers.get(address.entityType);
436
+ if (!state) {
437
+ return Effect.flatMap(waitForEntityManager(address.entityType), loop);
438
+ } else if (state.closed || !isEntityOnLocalShards(address)) {
439
+ return Effect.fail(new EntityNotAssignedToRunner({
451
440
  address
452
441
  }));
453
442
  }
454
443
  const isLocal = isEntityOnLocalShards(address);
455
- const notify = storageEnabled ? openStorageReadLatch : () => Effect.dieMessage("Sharding.notifyLocal: storage is disabled");
444
+ const notify = storageEnabled ? openStorageReadLatch : () => Effect.die("Sharding.notifyLocal: storage is disabled");
456
445
  if (message._tag === "IncomingRequest" || message._tag === "IncomingEnvelope") {
457
- if (message._tag === "IncomingRequest" && storageAlreadyProcessed(message)) {
446
+ if (!isLocal) {
447
+ return Effect.fail(new EntityNotAssignedToRunner({
448
+ address
449
+ }));
450
+ } else if (message._tag === "IncomingRequest" && state.manager.isProcessingFor(message, {
451
+ excludeReplies: true
452
+ })) {
458
453
  return Effect.fail(new AlreadyProcessingMessage({
459
454
  address,
460
455
  envelopeId: message.envelope.requestId
461
456
  }));
462
- } else if (!isLocal) {
463
- return Effect.fail(new EntityNotAssignedToRunner({
464
- address
465
- }));
457
+ } else if (message._tag === "IncomingRequest" && options?.waitUntilRead) {
458
+ if (!storageEnabled) return notify();
459
+ return Effect.async(resume => {
460
+ let entry = pendingNotifications.get(message.envelope.requestId);
461
+ if (entry) {
462
+ const prevResume = entry.resume;
463
+ entry.resume = effect => {
464
+ prevResume(effect);
465
+ resume(effect);
466
+ };
467
+ return;
468
+ }
469
+ entry = {
470
+ resume,
471
+ message
472
+ };
473
+ pendingNotifications.set(message.envelope.requestId, entry);
474
+ storageReadLatch.unsafeOpen();
475
+ });
466
476
  }
467
477
  return notify();
468
478
  }
469
- return runners.notifyLocal({
479
+ return runnersService.notifyLocal({
470
480
  message,
471
481
  notify,
472
482
  discard,
473
483
  storageOnly: !isLocal
474
484
  });
475
485
  });
476
- const isTransientError = Predicate.or(RunnerUnavailable.is, EntityNotAssignedToRunner.is);
477
486
  function sendOutgoing(message, discard, retries) {
478
487
  return Effect.catchIf(Effect.suspend(() => {
479
488
  const address = message.envelope.address;
480
- const maybeRunner = MutableHashMap.get(shardAssignments, address.shardId);
481
489
  const isPersisted = Context.get(message.rpc.annotations, Persisted);
482
490
  if (isPersisted && !storageEnabled) {
483
- return Effect.dieMessage("Sharding.sendOutgoing: Persisted messages require MessageStorage");
491
+ return Effect.die("Sharding.sendOutgoing: Persisted messages require MessageStorage");
484
492
  }
493
+ const maybeRunner = MutableHashMap.get(shardAssignments, address.shardId);
485
494
  const runnerIsLocal = Option.isSome(maybeRunner) && isLocalRunner(maybeRunner.value);
486
495
  if (isPersisted) {
487
- return runnerIsLocal ? notifyLocal(message, discard) : runners.notify({
496
+ return runnerIsLocal ? notifyLocal(message, discard) : runnersService.notify({
488
497
  address: maybeRunner,
489
498
  message,
490
499
  discard
@@ -494,127 +503,138 @@ const make = /*#__PURE__*/Effect.gen(function* () {
494
503
  address
495
504
  }));
496
505
  }
497
- return runnerIsLocal ? sendLocal(message) : runners.send({
506
+ return runnerIsLocal ? sendLocal(message) : runnersService.send({
498
507
  address: maybeRunner.value,
499
508
  message
500
509
  });
501
- }), isTransientError, error => {
510
+ }), error => error._tag === "EntityNotAssignedToRunner" || error._tag === "RunnerUnavailable", error => {
502
511
  if (retries === 0) {
503
512
  return Effect.die(error);
504
513
  }
505
514
  return Effect.delay(sendOutgoing(message, discard, retries && retries - 1), config.sendRetryInterval);
506
515
  });
507
516
  }
508
- const reset = Effect.fnUntraced(function* (requestId) {
509
- yield* storage.clearReplies(requestId);
510
- sentRequestIds.delete(requestId);
511
- }, Effect.matchCause({
517
+ const reset = requestId => Effect.matchCause(storage.clearReplies(requestId), {
512
518
  onSuccess: () => true,
513
519
  onFailure: () => false
514
- }));
515
- // --- Shard Manager sync ---
516
- const shardManagerTimeoutFiber = yield* FiberHandle.make().pipe(Scope.extend(shardingScope));
517
- const startShardManagerTimeout = FiberHandle.run(shardManagerTimeoutFiber, Effect.flatMap(Effect.sleep(config.shardManagerUnavailableTimeout), () => {
518
- MutableHashMap.clear(shardAssignments);
519
- return clearSelfShards;
520
- }), {
521
- onlyIfMissing: true
522
520
  });
523
- const stopShardManagerTimeout = FiberHandle.clear(shardManagerTimeoutFiber);
524
- // Every time the link to the shard manager is lost, we re-register the runner
525
- // and re-subscribe to sharding events
521
+ // --- RunnerStorage sync ---
522
+ //
523
+ // This is responsible for syncing the local view of runners and shard
524
+ // assignments with RunnerStorage.
525
+ //
526
+ // It should be shutdown after the clients, so that they can still get correct
527
+ // shard assignments for outgoing messages (they could still be in use by
528
+ // entities that are shutting down).
529
+ const selfRunner = Option.isSome(config.runnerAddress) ? new Runner({
530
+ address: config.runnerAddress.value,
531
+ groups: config.shardGroups,
532
+ weight: config.runnerShardWeight
533
+ }) : undefined;
534
+ let allRunners = MutableHashMap.empty();
535
+ let healthyRunnerCount = 0;
536
+ // update metrics
537
+ if (selfRunner) {
538
+ ClusterMetrics.runners.unsafeUpdate(BigInt(1), []);
539
+ ClusterMetrics.runnersHealthy.unsafeUpdate(BigInt(1), []);
540
+ }
526
541
  yield* Effect.gen(function* () {
527
- yield* Effect.logDebug("Registering with shard manager");
528
- if (!isShutdown.current && Option.isSome(config.runnerAddress)) {
529
- const machineId = yield* shardManager.register(config.runnerAddress.value, config.shardGroups);
530
- yield* snowflakeGen.setMachineId(machineId);
531
- }
532
- yield* stopShardManagerTimeout;
533
- yield* Effect.logDebug("Subscribing to sharding events");
534
- const mailbox = yield* shardManager.shardingEvents(config.runnerAddress);
535
- const startedLatch = yield* Deferred.make();
536
- const eventsFiber = yield* Effect.gen(function* () {
537
- while (true) {
538
- const [events, done] = yield* mailbox.takeAll;
539
- if (done) return;
540
- for (const event of events) {
541
- yield* Effect.logDebug("Received sharding event", event);
542
- switch (event._tag) {
543
- case "StreamStarted":
544
- {
545
- yield* Deferred.done(startedLatch, Exit.void);
546
- break;
547
- }
548
- case "ShardsAssigned":
549
- {
550
- for (const shard of event.shards) {
551
- MutableHashMap.set(shardAssignments, shard, event.address);
552
- }
553
- if (!MutableRef.get(isShutdown) && isLocalRunner(event.address)) {
554
- for (const shardId of event.shards) {
555
- if (MutableHashSet.has(selfShards, shardId)) continue;
556
- MutableHashSet.add(selfShards, shardId);
557
- }
558
- yield* activeShardsLatch.open;
559
- }
560
- break;
561
- }
562
- case "ShardsUnassigned":
563
- {
564
- for (const shard of event.shards) {
565
- MutableHashMap.remove(shardAssignments, shard);
566
- }
567
- if (isLocalRunner(event.address)) {
568
- for (const shard of event.shards) {
569
- MutableHashSet.remove(selfShards, shard);
570
- }
571
- yield* activeShardsLatch.open;
572
- }
573
- break;
574
- }
575
- case "RunnerUnregistered":
576
- {
577
- if (!isLocalRunner(event.address)) break;
578
- return yield* Effect.fail(new ClusterError.RunnerNotRegistered({
579
- address: event.address
580
- }));
581
- }
542
+ const hashRings = new Map();
543
+ let nextRunners = MutableHashMap.empty();
544
+ const healthyRunners = MutableHashSet.empty();
545
+ while (true) {
546
+ // Ensure the current runner is registered
547
+ if (selfRunner && !isShutdown.current && !MutableHashMap.has(allRunners, selfRunner)) {
548
+ yield* Effect.logDebug("Registering runner", selfRunner);
549
+ const machineId = yield* runnerStorage.register(selfRunner, true);
550
+ yield* snowflakeGen.setMachineId(machineId);
551
+ }
552
+ const runners = yield* runnerStorage.getRunners;
553
+ let changed = false;
554
+ for (let i = 0; i < runners.length; i++) {
555
+ const [runner, healthy] = runners[i];
556
+ MutableHashMap.set(nextRunners, runner, healthy);
557
+ const wasHealthy = MutableHashSet.has(healthyRunners, runner);
558
+ if (!healthy || wasHealthy) {
559
+ if (healthy === wasHealthy || !wasHealthy) {
560
+ // no change
561
+ MutableHashMap.remove(allRunners, runner);
582
562
  }
563
+ continue;
564
+ }
565
+ changed = true;
566
+ MutableHashSet.add(healthyRunners, runner);
567
+ MutableHashMap.remove(allRunners, runner);
568
+ for (let j = 0; j < runner.groups.length; j++) {
569
+ const group = runner.groups[j];
570
+ let ring = hashRings.get(group);
571
+ if (!ring) {
572
+ ring = HashRing.make();
573
+ hashRings.set(group, ring);
574
+ }
575
+ HashRing.add(ring, runner.address, {
576
+ weight: runner.weight
577
+ });
583
578
  }
584
579
  }
585
- }).pipe(Effect.intoDeferred(startedLatch), Effect.zipRight(Effect.dieMessage("Shard manager event stream down")), Effect.forkScoped);
586
- // Wait for the stream to be established
587
- yield* Deferred.await(startedLatch);
588
- // perform a full sync every config.refreshAssignmentsInterval
589
- const syncFiber = yield* syncAssignments.pipe(Effect.andThen(Effect.sleep(config.refreshAssignmentsInterval)), Effect.forever, Effect.forkScoped);
590
- return yield* Fiber.joinAll([eventsFiber, syncFiber]);
591
- }).pipe(Effect.scoped, Effect.catchAllCause(cause => Effect.logDebug(cause)), Effect.zipRight(startShardManagerTimeout), Effect.repeat(Schedule.exponential(1000).pipe(Schedule.union(Schedule.spaced(10_000)))), Effect.annotateLogs({
592
- package: "@effect/cluster",
593
- module: "Sharding",
594
- fiber: "ShardManager sync",
595
- runner: config.runnerAddress
596
- }), Effect.interruptible, Effect.forkIn(shardingScope));
597
- const syncAssignments = Effect.gen(function* () {
598
- const assignments = yield* shardManager.getAssignments;
599
- yield* Effect.logDebug("Received shard assignments", assignments);
600
- for (const [shardId, runner] of assignments) {
601
- if (Option.isNone(runner)) {
602
- MutableHashMap.remove(shardAssignments, shardId);
603
- MutableHashSet.remove(selfShards, shardId);
580
+ // Remove runners that are no longer present or healthy
581
+ MutableHashMap.forEach(allRunners, (_, runner) => {
582
+ changed = true;
583
+ MutableHashMap.remove(allRunners, runner);
584
+ MutableHashSet.remove(healthyRunners, runner);
585
+ runFork(runnersService.onRunnerUnavailable(runner.address));
586
+ for (let i = 0; i < runner.groups.length; i++) {
587
+ HashRing.remove(hashRings.get(runner.groups[i]), runner.address);
588
+ }
589
+ });
590
+ // swap allRunners and nextRunners
591
+ const prevRunners = allRunners;
592
+ allRunners = nextRunners;
593
+ nextRunners = prevRunners;
594
+ healthyRunnerCount = MutableHashSet.size(healthyRunners);
595
+ // Ensure the current runner is registered
596
+ if (selfRunner && !isShutdown.current && !MutableHashMap.has(allRunners, selfRunner)) {
604
597
  continue;
605
598
  }
606
- MutableHashMap.set(shardAssignments, shardId, runner.value);
607
- if (!isLocalRunner(runner.value)) {
608
- MutableHashSet.remove(selfShards, shardId);
609
- continue;
599
+ // Recompute shard assignments if the set of healthy runners has changed.
600
+ if (changed) {
601
+ MutableHashSet.clear(selfShards);
602
+ hashRings.forEach((ring, group) => {
603
+ const newAssignments = HashRing.getShards(ring, config.shardsPerGroup);
604
+ for (let i = 0; i < config.shardsPerGroup; i++) {
605
+ const shard = makeShardId(group, i + 1);
606
+ if (newAssignments) {
607
+ const runner = newAssignments[i];
608
+ MutableHashMap.set(shardAssignments, shard, runner);
609
+ if (isLocalRunner(runner)) {
610
+ MutableHashSet.add(selfShards, shard);
611
+ }
612
+ } else {
613
+ MutableHashMap.remove(shardAssignments, shard);
614
+ }
615
+ }
616
+ });
617
+ yield* Effect.logDebug("New shard assignments", selfShards);
618
+ activeShardsLatch.unsafeOpen();
619
+ // update metrics
620
+ if (selfRunner) {
621
+ ClusterMetrics.runnersHealthy.unsafeUpdate(BigInt(MutableHashSet.has(healthyRunners, selfRunner) ? 1 : 0), []);
622
+ }
610
623
  }
611
- if (MutableRef.get(isShutdown) || MutableHashSet.has(selfShards, shardId)) {
612
- continue;
624
+ if (selfRunner && MutableHashSet.size(healthyRunners) === 0) {
625
+ yield* Effect.logWarning("No healthy runners available");
626
+ // to prevent a deadlock, we will mark the current node as healthy to
627
+ // start the health check singleton again
628
+ yield* runnerStorage.setRunnerHealth(selfRunner.address, true);
613
629
  }
614
- MutableHashSet.add(selfShards, shardId);
630
+ yield* Effect.sleep(config.refreshAssignmentsInterval);
615
631
  }
616
- yield* activeShardsLatch.open;
617
- });
632
+ }).pipe(Effect.catchAllCause(cause => Effect.logDebug(cause)), Effect.repeat(Schedule.spaced(1000)), Effect.annotateLogs({
633
+ package: "@effect/cluster",
634
+ module: "Sharding",
635
+ fiber: "RunnerStorage sync",
636
+ runner: config.runnerAddress
637
+ }), Effect.forkIn(shardingScope));
618
638
  const clientRequests = new Map();
619
639
  const clients = yield* ResourceMap.make(Effect.fnUntraced(function* (entity) {
620
640
  const client = yield* RpcClient.makeNoSerialization(entity.protocol, {
@@ -635,7 +655,7 @@ const make = /*#__PURE__*/Effect.gen(function* () {
635
655
  if (!options.discard) {
636
656
  const entry = {
637
657
  rpc: rpc,
638
- context: fiber.currentContext
658
+ services: fiber.currentContext
639
659
  };
640
660
  clientRequests.set(id, entry);
641
661
  respond = makeClientRespond(entry, client.write);
@@ -707,8 +727,8 @@ const make = /*#__PURE__*/Effect.gen(function* () {
707
727
  return Effect.void;
708
728
  }));
709
729
  return entityId => {
710
- const id = EntityId.make(entityId);
711
- const address = ClientAddressTag.context(EntityAddress.make({
730
+ const id = makeEntityId(entityId);
731
+ const address = ClientAddressTag.context(makeEntityAddress({
712
732
  shardId: getShardId(id, entity.getShardGroup(entityId)),
713
733
  entityId: id,
714
734
  entityType: entity.type
@@ -762,23 +782,84 @@ const make = /*#__PURE__*/Effect.gen(function* () {
762
782
  }
763
783
  }
764
784
  };
785
+ // --- Singletons ---
786
+ const singletons = new Map();
787
+ const singletonFibers = yield* FiberMap.make();
788
+ const withSingletonLock = Effect.unsafeMakeSemaphore(1).withPermits(1);
789
+ const registerSingleton = Effect.fnUntraced(function* (name, run, options) {
790
+ const shardGroup = options?.shardGroup ?? "default";
791
+ const address = new SingletonAddress({
792
+ shardId: getShardId(makeEntityId(name), shardGroup),
793
+ name
794
+ });
795
+ let map = singletons.get(address.shardId);
796
+ if (!map) {
797
+ map = MutableHashMap.empty();
798
+ singletons.set(address.shardId, map);
799
+ }
800
+ if (MutableHashMap.has(map, address)) {
801
+ return yield* Effect.die(`Singleton '${name}' is already registered`);
802
+ }
803
+ const context = yield* Effect.context();
804
+ const wrappedRun = run.pipe(Effect.locally(FiberRef.currentLogAnnotations, HashMap.empty()), Effect.andThen(Effect.never), Effect.scoped, Effect.provide(context), Effect.orDie, Effect.interruptible);
805
+ MutableHashMap.set(map, address, wrappedRun);
806
+ yield* PubSub.publish(events, SingletonRegistered({
807
+ address
808
+ }));
809
+ // start if we are on the right shard
810
+ if (MutableHashSet.has(acquiredShards, address.shardId)) {
811
+ yield* Effect.logDebug("Starting singleton", address);
812
+ yield* FiberMap.run(singletonFibers, address, wrappedRun);
813
+ }
814
+ }, withSingletonLock);
815
+ const syncSingletons = withSingletonLock(Effect.gen(function* () {
816
+ for (const [shardId, map] of singletons) {
817
+ for (const [address, run] of map) {
818
+ const running = FiberMap.unsafeHas(singletonFibers, address);
819
+ const shouldBeRunning = MutableHashSet.has(acquiredShards, shardId);
820
+ if (running && !shouldBeRunning) {
821
+ yield* Effect.logDebug("Stopping singleton", address);
822
+ internalInterruptors.add(Option.getOrThrow(Fiber.getCurrentFiber()).id());
823
+ yield* FiberMap.remove(singletonFibers, address);
824
+ } else if (!running && shouldBeRunning) {
825
+ yield* Effect.logDebug("Starting singleton", address);
826
+ yield* FiberMap.run(singletonFibers, address, run);
827
+ }
828
+ }
829
+ }
830
+ ClusterMetrics.singletons.unsafeUpdate(BigInt(yield* FiberMap.size(singletonFibers)), []);
831
+ }));
765
832
  // --- Entities ---
766
833
  const context = yield* Effect.context();
767
834
  const reaper = yield* EntityReaper;
835
+ const entityManagerLatches = new Map();
768
836
  const registerEntity = Effect.fnUntraced(function* (entity, build, options) {
769
837
  if (Option.isNone(config.runnerAddress) || entityManagers.has(entity.type)) return;
770
838
  const scope = yield* Scope.make();
839
+ yield* Scope.addFinalizer(scope, Effect.sync(() => {
840
+ state.closed = true;
841
+ }));
771
842
  const manager = yield* EntityManager.make(entity, build, {
772
843
  ...options,
773
844
  storage,
774
845
  runnerAddress: config.runnerAddress.value,
775
846
  sharding
776
847
  }).pipe(Effect.provide(context.pipe(Context.add(EntityReaper, reaper), Context.add(Scope.Scope, scope), Context.add(Snowflake.Generator, snowflakeGen))));
777
- entityManagers.set(entity.type, {
848
+ const state = {
778
849
  entity,
779
850
  scope,
851
+ closed: false,
780
852
  manager
781
- });
853
+ };
854
+ // register entities while storage is idle
855
+ // this ensures message order is preserved
856
+ yield* withStorageReadLock(Effect.sync(() => {
857
+ entityManagers.set(entity.type, state);
858
+ if (entityManagerLatches.has(entity.type)) {
859
+ entityManagerLatches.get(entity.type).unsafeOpen();
860
+ entityManagerLatches.delete(entity.type);
861
+ }
862
+ }));
782
863
  yield* PubSub.publish(events, EntityRegistered({
783
864
  entity
784
865
  }));
@@ -789,20 +870,51 @@ const make = /*#__PURE__*/Effect.gen(function* () {
789
870
  concurrency: "unbounded",
790
871
  discard: true
791
872
  }));
792
- // --- Finalization ---
793
- if (Option.isSome(config.runnerAddress)) {
794
- const selfAddress = config.runnerAddress.value;
795
- // Unregister runner from shard manager when scope is closed
796
- yield* Scope.addFinalizer(shardingScope, Effect.gen(function* () {
797
- yield* Effect.logDebug("Unregistering runner from shard manager", selfAddress);
798
- yield* shardManager.unregister(selfAddress).pipe(Effect.catchAllCause(cause => Effect.logError("Error calling unregister with shard manager", cause)));
799
- yield* clearSelfShards;
800
- }));
873
+ const waitForEntityManager = entityType => {
874
+ let latch = entityManagerLatches.get(entityType);
875
+ if (!latch) {
876
+ latch = Effect.unsafeMakeLatch();
877
+ entityManagerLatches.set(entityType, latch);
878
+ }
879
+ return latch.await;
880
+ };
881
+ // --- Runner health checks ---
882
+ if (selfRunner) {
883
+ const checkRunner = ([runner, healthy]) => Effect.flatMap(runnerHealth.isAlive(runner.address), isAlive => {
884
+ if (healthy === isAlive) return Effect.void;
885
+ if (isAlive) {
886
+ healthyRunnerCount++;
887
+ return Effect.logDebug(`Runner is healthy`, runner).pipe(Effect.andThen(runnerStorage.setRunnerHealth(runner.address, isAlive)));
888
+ }
889
+ if (healthyRunnerCount <= 1) {
890
+ // never mark the last runner as unhealthy, to prevent a deadlock
891
+ return Effect.void;
892
+ }
893
+ healthyRunnerCount--;
894
+ return Effect.logDebug(`Runner is unhealthy`, runner).pipe(Effect.andThen(runnerStorage.setRunnerHealth(runner.address, isAlive)));
895
+ });
896
+ yield* registerSingleton("effect/cluster/Sharding/RunnerHealth", Effect.gen(function* () {
897
+ while (true) {
898
+ // Skip health checks if we are the only runner
899
+ if (MutableHashMap.size(allRunners) > 1) {
900
+ yield* Effect.forEach(allRunners, checkRunner, {
901
+ discard: true,
902
+ concurrency: 10
903
+ });
904
+ }
905
+ yield* Effect.sleep(config.runnerHealthCheckInterval);
906
+ }
907
+ }).pipe(Effect.catchAllCause(cause => Effect.logDebug("Runner health check failed", cause)), Effect.forever, Effect.annotateLogs({
908
+ package: "@effect/cluster",
909
+ module: "Sharding",
910
+ fiber: "Runner health check"
911
+ })));
801
912
  }
913
+ // --- Finalization ---
802
914
  yield* Scope.addFinalizer(shardingScope, Effect.withFiberRuntime(fiber => {
803
915
  MutableRef.set(isShutdown, true);
804
916
  internalInterruptors.add(fiber.id());
805
- return Effect.void;
917
+ return selfRunner ? Effect.ignore(runnerStorage.unregister(selfRunner.address)) : Effect.void;
806
918
  }));
807
919
  const activeEntityCount = Effect.gen(function* () {
808
920
  let count = 0;
@@ -814,13 +926,18 @@ const make = /*#__PURE__*/Effect.gen(function* () {
814
926
  const sharding = Sharding.of({
815
927
  getRegistrationEvents,
816
928
  getShardId,
929
+ hasShardId(shardId) {
930
+ if (isShutdown.current) return false;
931
+ return MutableHashSet.has(acquiredShards, shardId);
932
+ },
933
+ getSnowflake: Effect.sync(() => snowflakeGen.unsafeNext()),
817
934
  isShutdown: Effect.sync(() => MutableRef.get(isShutdown)),
818
935
  registerEntity,
819
936
  registerSingleton,
820
937
  makeClient,
821
938
  send: sendLocal,
822
939
  sendOutgoing: (message, discard) => sendOutgoing(message, discard),
823
- notify: message => notifyLocal(message, false),
940
+ notify: (message, options) => notifyLocal(message, false, options),
824
941
  activeEntityCount,
825
942
  pollStorage: storageReadLatch.open,
826
943
  reset
@@ -831,7 +948,7 @@ const make = /*#__PURE__*/Effect.gen(function* () {
831
948
  * @since 1.0.0
832
949
  * @category layers
833
950
  */
834
- export const layer = /*#__PURE__*/Layer.scoped(Sharding, make).pipe(/*#__PURE__*/Layer.provide([Snowflake.layerGenerator, EntityReaper.Default]));
951
+ export const layer = /*#__PURE__*/Layer.scoped(Sharding)(make).pipe(/*#__PURE__*/Layer.provide([Snowflake.layerGenerator, EntityReaper.Default]));
835
952
  // Utilities
836
953
  const ClientAddressTag = /*#__PURE__*/Context.GenericTag("@effect/cluster/Sharding/ClientAddress");
837
954
  //# sourceMappingURL=Sharding.js.map