flwr-nightly 1.8.0.dev20240315__py3-none-any.whl → 1.15.0.dev20250114__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (311) hide show
  1. flwr/cli/app.py +16 -2
  2. flwr/cli/build.py +181 -0
  3. flwr/cli/cli_user_auth_interceptor.py +90 -0
  4. flwr/cli/config_utils.py +343 -0
  5. flwr/cli/example.py +4 -1
  6. flwr/cli/install.py +253 -0
  7. flwr/cli/log.py +182 -0
  8. flwr/{server/superlink/state → cli/login}/__init__.py +4 -10
  9. flwr/cli/login/login.py +88 -0
  10. flwr/cli/ls.py +327 -0
  11. flwr/cli/new/__init__.py +1 -0
  12. flwr/cli/new/new.py +210 -66
  13. flwr/cli/new/templates/app/.gitignore.tpl +163 -0
  14. flwr/cli/new/templates/app/LICENSE.tpl +202 -0
  15. flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
  16. flwr/cli/new/templates/app/README.flowertune.md.tpl +66 -0
  17. flwr/cli/new/templates/app/README.md.tpl +16 -32
  18. flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
  19. flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
  20. flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
  21. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +55 -0
  22. flwr/cli/new/templates/app/code/client.jax.py.tpl +50 -0
  23. flwr/cli/new/templates/app/code/client.mlx.py.tpl +73 -0
  24. flwr/cli/new/templates/app/code/client.numpy.py.tpl +7 -7
  25. flwr/cli/new/templates/app/code/client.pytorch.py.tpl +30 -21
  26. flwr/cli/new/templates/app/code/client.sklearn.py.tpl +63 -0
  27. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +57 -1
  28. flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
  29. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  30. flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +126 -0
  31. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +87 -0
  32. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +78 -0
  33. flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +94 -0
  34. flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
  35. flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
  36. flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
  37. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +38 -0
  38. flwr/cli/new/templates/app/code/server.jax.py.tpl +26 -0
  39. flwr/cli/new/templates/app/code/server.mlx.py.tpl +31 -0
  40. flwr/cli/new/templates/app/code/server.numpy.py.tpl +22 -9
  41. flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
  42. flwr/cli/new/templates/app/code/server.sklearn.py.tpl +36 -0
  43. flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
  44. flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
  45. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +102 -0
  46. flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
  47. flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
  48. flwr/cli/new/templates/app/code/task.numpy.py.tpl +7 -0
  49. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +29 -24
  50. flwr/cli/new/templates/app/code/task.sklearn.py.tpl +67 -0
  51. flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
  52. flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
  53. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
  54. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +68 -0
  55. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +46 -0
  56. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +35 -0
  57. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
  58. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
  59. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
  60. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +35 -0
  61. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
  62. flwr/cli/run/__init__.py +1 -0
  63. flwr/cli/run/run.py +212 -34
  64. flwr/cli/stop.py +130 -0
  65. flwr/cli/utils.py +240 -5
  66. flwr/client/__init__.py +3 -2
  67. flwr/client/app.py +432 -255
  68. flwr/client/client.py +1 -11
  69. flwr/client/client_app.py +74 -13
  70. flwr/client/clientapp/__init__.py +22 -0
  71. flwr/client/clientapp/app.py +259 -0
  72. flwr/client/clientapp/clientappio_servicer.py +244 -0
  73. flwr/client/clientapp/utils.py +115 -0
  74. flwr/client/dpfedavg_numpy_client.py +7 -8
  75. flwr/client/grpc_adapter_client/__init__.py +15 -0
  76. flwr/client/grpc_adapter_client/connection.py +98 -0
  77. flwr/client/grpc_client/connection.py +21 -7
  78. flwr/client/grpc_rere_client/__init__.py +1 -1
  79. flwr/client/grpc_rere_client/client_interceptor.py +176 -0
  80. flwr/client/grpc_rere_client/connection.py +163 -56
  81. flwr/client/grpc_rere_client/grpc_adapter.py +167 -0
  82. flwr/client/heartbeat.py +74 -0
  83. flwr/client/message_handler/__init__.py +1 -1
  84. flwr/client/message_handler/message_handler.py +10 -11
  85. flwr/client/mod/__init__.py +5 -5
  86. flwr/client/mod/centraldp_mods.py +4 -2
  87. flwr/client/mod/comms_mods.py +5 -4
  88. flwr/client/mod/localdp_mod.py +10 -5
  89. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  90. flwr/client/mod/secure_aggregation/secaggplus_mod.py +26 -26
  91. flwr/client/mod/utils.py +2 -4
  92. flwr/client/nodestate/__init__.py +26 -0
  93. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  94. flwr/client/nodestate/nodestate.py +31 -0
  95. flwr/client/nodestate/nodestate_factory.py +38 -0
  96. flwr/client/numpy_client.py +8 -31
  97. flwr/client/rest_client/__init__.py +1 -1
  98. flwr/client/rest_client/connection.py +199 -176
  99. flwr/client/run_info_store.py +112 -0
  100. flwr/client/supernode/__init__.py +24 -0
  101. flwr/client/supernode/app.py +321 -0
  102. flwr/client/typing.py +1 -0
  103. flwr/common/__init__.py +17 -11
  104. flwr/common/address.py +47 -3
  105. flwr/common/args.py +153 -0
  106. flwr/common/auth_plugin/__init__.py +24 -0
  107. flwr/common/auth_plugin/auth_plugin.py +121 -0
  108. flwr/common/config.py +243 -0
  109. flwr/common/constant.py +132 -1
  110. flwr/common/context.py +32 -2
  111. flwr/common/date.py +22 -4
  112. flwr/common/differential_privacy.py +2 -2
  113. flwr/common/dp.py +2 -4
  114. flwr/common/exit_handlers.py +3 -3
  115. flwr/common/grpc.py +164 -5
  116. flwr/common/logger.py +230 -12
  117. flwr/common/message.py +191 -106
  118. flwr/common/object_ref.py +179 -44
  119. flwr/common/pyproject.py +1 -0
  120. flwr/common/record/__init__.py +2 -1
  121. flwr/common/record/configsrecord.py +58 -18
  122. flwr/common/record/metricsrecord.py +57 -17
  123. flwr/common/record/parametersrecord.py +88 -20
  124. flwr/common/record/recordset.py +153 -30
  125. flwr/common/record/typeddict.py +30 -55
  126. flwr/common/recordset_compat.py +31 -12
  127. flwr/common/retry_invoker.py +123 -30
  128. flwr/common/secure_aggregation/__init__.py +1 -1
  129. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  130. flwr/common/secure_aggregation/crypto/shamir.py +11 -11
  131. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +68 -4
  132. flwr/common/secure_aggregation/ndarrays_arithmetic.py +17 -17
  133. flwr/common/secure_aggregation/quantization.py +8 -8
  134. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  135. flwr/common/secure_aggregation/secaggplus_utils.py +10 -12
  136. flwr/common/serde.py +298 -19
  137. flwr/common/telemetry.py +65 -29
  138. flwr/common/typing.py +120 -19
  139. flwr/common/version.py +17 -3
  140. flwr/proto/clientappio_pb2.py +45 -0
  141. flwr/proto/clientappio_pb2.pyi +132 -0
  142. flwr/proto/clientappio_pb2_grpc.py +135 -0
  143. flwr/proto/clientappio_pb2_grpc.pyi +53 -0
  144. flwr/proto/exec_pb2.py +62 -0
  145. flwr/proto/exec_pb2.pyi +212 -0
  146. flwr/proto/exec_pb2_grpc.py +237 -0
  147. flwr/proto/exec_pb2_grpc.pyi +93 -0
  148. flwr/proto/fab_pb2.py +31 -0
  149. flwr/proto/fab_pb2.pyi +65 -0
  150. flwr/proto/fab_pb2_grpc.py +4 -0
  151. flwr/proto/fab_pb2_grpc.pyi +4 -0
  152. flwr/proto/fleet_pb2.py +42 -23
  153. flwr/proto/fleet_pb2.pyi +123 -1
  154. flwr/proto/fleet_pb2_grpc.py +170 -0
  155. flwr/proto/fleet_pb2_grpc.pyi +61 -0
  156. flwr/proto/grpcadapter_pb2.py +32 -0
  157. flwr/proto/grpcadapter_pb2.pyi +43 -0
  158. flwr/proto/grpcadapter_pb2_grpc.py +66 -0
  159. flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
  160. flwr/proto/log_pb2.py +29 -0
  161. flwr/proto/log_pb2.pyi +39 -0
  162. flwr/proto/log_pb2_grpc.py +4 -0
  163. flwr/proto/log_pb2_grpc.pyi +4 -0
  164. flwr/proto/message_pb2.py +41 -0
  165. flwr/proto/message_pb2.pyi +128 -0
  166. flwr/proto/message_pb2_grpc.py +4 -0
  167. flwr/proto/message_pb2_grpc.pyi +4 -0
  168. flwr/proto/node_pb2.py +1 -1
  169. flwr/proto/recordset_pb2.py +35 -33
  170. flwr/proto/recordset_pb2.pyi +40 -14
  171. flwr/proto/run_pb2.py +64 -0
  172. flwr/proto/run_pb2.pyi +268 -0
  173. flwr/proto/run_pb2_grpc.py +4 -0
  174. flwr/proto/run_pb2_grpc.pyi +4 -0
  175. flwr/proto/serverappio_pb2.py +52 -0
  176. flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +62 -20
  177. flwr/proto/serverappio_pb2_grpc.py +410 -0
  178. flwr/proto/serverappio_pb2_grpc.pyi +160 -0
  179. flwr/proto/simulationio_pb2.py +38 -0
  180. flwr/proto/simulationio_pb2.pyi +65 -0
  181. flwr/proto/simulationio_pb2_grpc.py +239 -0
  182. flwr/proto/simulationio_pb2_grpc.pyi +94 -0
  183. flwr/proto/task_pb2.py +7 -8
  184. flwr/proto/task_pb2.pyi +8 -5
  185. flwr/proto/transport_pb2.py +8 -8
  186. flwr/proto/transport_pb2.pyi +9 -6
  187. flwr/server/__init__.py +2 -10
  188. flwr/server/app.py +579 -402
  189. flwr/server/client_manager.py +8 -6
  190. flwr/server/compat/app.py +6 -62
  191. flwr/server/compat/app_utils.py +14 -8
  192. flwr/server/compat/driver_client_proxy.py +25 -58
  193. flwr/server/compat/legacy_context.py +5 -4
  194. flwr/server/driver/__init__.py +2 -0
  195. flwr/server/driver/driver.py +36 -131
  196. flwr/server/driver/grpc_driver.py +217 -81
  197. flwr/server/driver/inmemory_driver.py +182 -0
  198. flwr/server/history.py +28 -29
  199. flwr/server/run_serverapp.py +15 -126
  200. flwr/server/server.py +50 -44
  201. flwr/server/server_app.py +59 -10
  202. flwr/server/serverapp/__init__.py +22 -0
  203. flwr/server/serverapp/app.py +256 -0
  204. flwr/server/serverapp_components.py +52 -0
  205. flwr/server/strategy/__init__.py +2 -2
  206. flwr/server/strategy/aggregate.py +37 -23
  207. flwr/server/strategy/bulyan.py +9 -9
  208. flwr/server/strategy/dp_adaptive_clipping.py +25 -25
  209. flwr/server/strategy/dp_fixed_clipping.py +23 -22
  210. flwr/server/strategy/dpfedavg_adaptive.py +8 -8
  211. flwr/server/strategy/dpfedavg_fixed.py +13 -12
  212. flwr/server/strategy/fault_tolerant_fedavg.py +11 -11
  213. flwr/server/strategy/fedadagrad.py +9 -9
  214. flwr/server/strategy/fedadam.py +20 -10
  215. flwr/server/strategy/fedavg.py +16 -16
  216. flwr/server/strategy/fedavg_android.py +17 -17
  217. flwr/server/strategy/fedavgm.py +9 -9
  218. flwr/server/strategy/fedmedian.py +5 -5
  219. flwr/server/strategy/fedopt.py +6 -6
  220. flwr/server/strategy/fedprox.py +7 -7
  221. flwr/server/strategy/fedtrimmedavg.py +8 -8
  222. flwr/server/strategy/fedxgb_bagging.py +12 -12
  223. flwr/server/strategy/fedxgb_cyclic.py +10 -10
  224. flwr/server/strategy/fedxgb_nn_avg.py +6 -6
  225. flwr/server/strategy/fedyogi.py +9 -9
  226. flwr/server/strategy/krum.py +9 -9
  227. flwr/server/strategy/qfedavg.py +16 -16
  228. flwr/server/strategy/strategy.py +10 -10
  229. flwr/server/superlink/driver/__init__.py +2 -2
  230. flwr/server/superlink/driver/serverappio_grpc.py +61 -0
  231. flwr/server/superlink/driver/serverappio_servicer.py +363 -0
  232. flwr/server/superlink/ffs/__init__.py +24 -0
  233. flwr/server/superlink/ffs/disk_ffs.py +108 -0
  234. flwr/server/superlink/ffs/ffs.py +79 -0
  235. flwr/server/superlink/ffs/ffs_factory.py +47 -0
  236. flwr/server/superlink/fleet/__init__.py +1 -1
  237. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  238. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +162 -0
  239. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  240. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +4 -2
  241. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -2
  242. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  243. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -154
  244. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  245. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +120 -13
  246. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +228 -0
  247. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  248. flwr/server/superlink/fleet/message_handler/message_handler.py +153 -9
  249. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  250. flwr/server/superlink/fleet/rest_rere/rest_api.py +119 -81
  251. flwr/server/superlink/fleet/vce/__init__.py +1 -0
  252. flwr/server/superlink/fleet/vce/backend/__init__.py +4 -4
  253. flwr/server/superlink/fleet/vce/backend/backend.py +8 -9
  254. flwr/server/superlink/fleet/vce/backend/raybackend.py +87 -68
  255. flwr/server/superlink/fleet/vce/vce_api.py +208 -146
  256. flwr/server/superlink/linkstate/__init__.py +28 -0
  257. flwr/server/superlink/linkstate/in_memory_linkstate.py +581 -0
  258. flwr/server/superlink/linkstate/linkstate.py +389 -0
  259. flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +19 -10
  260. flwr/server/superlink/linkstate/sqlite_linkstate.py +1236 -0
  261. flwr/server/superlink/linkstate/utils.py +389 -0
  262. flwr/server/superlink/simulation/__init__.py +15 -0
  263. flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
  264. flwr/server/superlink/simulation/simulationio_servicer.py +186 -0
  265. flwr/server/superlink/utils.py +65 -0
  266. flwr/server/typing.py +2 -0
  267. flwr/server/utils/__init__.py +1 -1
  268. flwr/server/utils/tensorboard.py +5 -5
  269. flwr/server/utils/validator.py +31 -11
  270. flwr/server/workflow/default_workflows.py +70 -26
  271. flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
  272. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +40 -27
  273. flwr/simulation/__init__.py +12 -5
  274. flwr/simulation/app.py +247 -315
  275. flwr/simulation/legacy_app.py +402 -0
  276. flwr/simulation/ray_transport/__init__.py +1 -1
  277. flwr/simulation/ray_transport/ray_actor.py +42 -67
  278. flwr/simulation/ray_transport/ray_client_proxy.py +37 -17
  279. flwr/simulation/ray_transport/utils.py +1 -0
  280. flwr/simulation/run_simulation.py +306 -163
  281. flwr/simulation/simulationio_connection.py +89 -0
  282. flwr/superexec/__init__.py +15 -0
  283. flwr/superexec/app.py +59 -0
  284. flwr/superexec/deployment.py +188 -0
  285. flwr/superexec/exec_grpc.py +80 -0
  286. flwr/superexec/exec_servicer.py +231 -0
  287. flwr/superexec/exec_user_auth_interceptor.py +101 -0
  288. flwr/superexec/executor.py +96 -0
  289. flwr/superexec/simulation.py +124 -0
  290. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/METADATA +33 -26
  291. flwr_nightly-1.15.0.dev20250114.dist-info/RECORD +328 -0
  292. flwr_nightly-1.15.0.dev20250114.dist-info/entry_points.txt +12 -0
  293. flwr/cli/flower_toml.py +0 -140
  294. flwr/cli/new/templates/app/flower.toml.tpl +0 -13
  295. flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
  296. flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
  297. flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
  298. flwr/client/node_state.py +0 -48
  299. flwr/client/node_state_tests.py +0 -65
  300. flwr/proto/driver_pb2.py +0 -44
  301. flwr/proto/driver_pb2_grpc.py +0 -169
  302. flwr/proto/driver_pb2_grpc.pyi +0 -66
  303. flwr/server/superlink/driver/driver_grpc.py +0 -54
  304. flwr/server/superlink/driver/driver_servicer.py +0 -129
  305. flwr/server/superlink/state/in_memory_state.py +0 -230
  306. flwr/server/superlink/state/sqlite_state.py +0 -630
  307. flwr/server/superlink/state/state.py +0 -154
  308. flwr_nightly-1.8.0.dev20240315.dist-info/RECORD +0 -211
  309. flwr_nightly-1.8.0.dev20240315.dist-info/entry_points.txt +0 -9
  310. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/LICENSE +0 -0
  311. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/WHEEL +0 -0
@@ -0,0 +1,581 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """In-memory LinkState implementation."""
16
+
17
+
18
+ import threading
19
+ import time
20
+ from bisect import bisect_right
21
+ from dataclasses import dataclass, field
22
+ from logging import ERROR, WARNING
23
+ from typing import Optional
24
+ from uuid import UUID, uuid4
25
+
26
+ from flwr.common import Context, log, now
27
+ from flwr.common.constant import (
28
+ MESSAGE_TTL_TOLERANCE,
29
+ NODE_ID_NUM_BYTES,
30
+ RUN_ID_NUM_BYTES,
31
+ Status,
32
+ )
33
+ from flwr.common.record import ConfigsRecord
34
+ from flwr.common.typing import Run, RunStatus, UserConfig
35
+ from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
36
+ from flwr.server.superlink.linkstate.linkstate import LinkState
37
+ from flwr.server.utils import validate_task_ins_or_res
38
+
39
+ from .utils import (
40
+ generate_rand_int_from_bytes,
41
+ has_valid_sub_status,
42
+ is_valid_transition,
43
+ verify_found_taskres,
44
+ verify_taskins_ids,
45
+ )
46
+
47
+
48
+ @dataclass
49
+ class RunRecord: # pylint: disable=R0902
50
+ """The record of a specific run, including its status and timestamps."""
51
+
52
+ run: Run
53
+ logs: list[tuple[float, str]] = field(default_factory=list)
54
+ log_lock: threading.Lock = field(default_factory=threading.Lock)
55
+
56
+
57
+ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
58
+ """In-memory LinkState implementation."""
59
+
60
+ def __init__(self) -> None:
61
+
62
+ # Map node_id to (online_until, ping_interval)
63
+ self.node_ids: dict[int, tuple[float, float]] = {}
64
+ self.public_key_to_node_id: dict[bytes, int] = {}
65
+ self.node_id_to_public_key: dict[int, bytes] = {}
66
+
67
+ # Map run_id to RunRecord
68
+ self.run_ids: dict[int, RunRecord] = {}
69
+ self.contexts: dict[int, Context] = {}
70
+ self.federation_options: dict[int, ConfigsRecord] = {}
71
+ self.task_ins_store: dict[UUID, TaskIns] = {}
72
+ self.task_res_store: dict[UUID, TaskRes] = {}
73
+ self.task_ins_id_to_task_res_id: dict[UUID, UUID] = {}
74
+
75
+ self.node_public_keys: set[bytes] = set()
76
+ self.server_public_key: Optional[bytes] = None
77
+ self.server_private_key: Optional[bytes] = None
78
+
79
+ self.lock = threading.RLock()
80
+
81
+ def store_task_ins(self, task_ins: TaskIns) -> Optional[UUID]:
82
+ """Store one TaskIns."""
83
+ # Validate task
84
+ errors = validate_task_ins_or_res(task_ins)
85
+ if any(errors):
86
+ log(ERROR, errors)
87
+ return None
88
+ # Validate run_id
89
+ if task_ins.run_id not in self.run_ids:
90
+ log(ERROR, "Invalid run ID for TaskIns: %s", task_ins.run_id)
91
+ return None
92
+ # Validate source node ID
93
+ if task_ins.task.producer.node_id != 0:
94
+ log(
95
+ ERROR,
96
+ "Invalid source node ID for TaskIns: %s",
97
+ task_ins.task.producer.node_id,
98
+ )
99
+ return None
100
+ # Validate destination node ID
101
+ if not task_ins.task.consumer.anonymous:
102
+ if task_ins.task.consumer.node_id not in self.node_ids:
103
+ log(
104
+ ERROR,
105
+ "Invalid destination node ID for TaskIns: %s",
106
+ task_ins.task.consumer.node_id,
107
+ )
108
+ return None
109
+
110
+ # Create task_id
111
+ task_id = uuid4()
112
+
113
+ # Store TaskIns
114
+ task_ins.task_id = str(task_id)
115
+ with self.lock:
116
+ self.task_ins_store[task_id] = task_ins
117
+
118
+ # Return the new task_id
119
+ return task_id
120
+
121
+ def get_task_ins(
122
+ self, node_id: Optional[int], limit: Optional[int]
123
+ ) -> list[TaskIns]:
124
+ """Get all TaskIns that have not been delivered yet."""
125
+ if limit is not None and limit < 1:
126
+ raise AssertionError("`limit` must be >= 1")
127
+
128
+ # Find TaskIns for node_id that were not delivered yet
129
+ task_ins_list: list[TaskIns] = []
130
+ current_time = time.time()
131
+ with self.lock:
132
+ for _, task_ins in self.task_ins_store.items():
133
+ # pylint: disable=too-many-boolean-expressions
134
+ if (
135
+ node_id is not None # Not anonymous
136
+ and task_ins.task.consumer.anonymous is False
137
+ and task_ins.task.consumer.node_id == node_id
138
+ and task_ins.task.delivered_at == ""
139
+ and task_ins.task.created_at + task_ins.task.ttl > current_time
140
+ ) or (
141
+ node_id is None # Anonymous
142
+ and task_ins.task.consumer.anonymous is True
143
+ and task_ins.task.consumer.node_id == 0
144
+ and task_ins.task.delivered_at == ""
145
+ and task_ins.task.created_at + task_ins.task.ttl > current_time
146
+ ):
147
+ task_ins_list.append(task_ins)
148
+ if limit and len(task_ins_list) == limit:
149
+ break
150
+
151
+ # Mark all of them as delivered
152
+ delivered_at = now().isoformat()
153
+ for task_ins in task_ins_list:
154
+ task_ins.task.delivered_at = delivered_at
155
+
156
+ # Return TaskIns
157
+ return task_ins_list
158
+
159
+ # pylint: disable=R0911
160
+ def store_task_res(self, task_res: TaskRes) -> Optional[UUID]:
161
+ """Store one TaskRes."""
162
+ # Validate task
163
+ errors = validate_task_ins_or_res(task_res)
164
+ if any(errors):
165
+ log(ERROR, errors)
166
+ return None
167
+
168
+ with self.lock:
169
+ # Check if the TaskIns it is replying to exists and is valid
170
+ task_ins_id = task_res.task.ancestry[0]
171
+ task_ins = self.task_ins_store.get(UUID(task_ins_id))
172
+
173
+ # Ensure that the consumer_id of taskIns matches the producer_id of taskRes.
174
+ if (
175
+ task_ins
176
+ and task_res
177
+ and not (
178
+ task_ins.task.consumer.anonymous or task_res.task.producer.anonymous
179
+ )
180
+ and task_ins.task.consumer.node_id != task_res.task.producer.node_id
181
+ ):
182
+ return None
183
+
184
+ if task_ins is None:
185
+ log(ERROR, "TaskIns with task_id %s does not exist.", task_ins_id)
186
+ return None
187
+
188
+ if task_ins.task.created_at + task_ins.task.ttl <= time.time():
189
+ log(
190
+ ERROR,
191
+ "Failed to store TaskRes: TaskIns with task_id %s has expired.",
192
+ task_ins_id,
193
+ )
194
+ return None
195
+
196
+ # Fail if the TaskRes TTL exceeds the
197
+ # expiration time of the TaskIns it replies to.
198
+ # Condition: TaskIns.created_at + TaskIns.ttl ≥
199
+ # TaskRes.created_at + TaskRes.ttl
200
+ # A small tolerance is introduced to account
201
+ # for floating-point precision issues.
202
+ max_allowed_ttl = (
203
+ task_ins.task.created_at + task_ins.task.ttl - task_res.task.created_at
204
+ )
205
+ if task_res.task.ttl and (
206
+ task_res.task.ttl - max_allowed_ttl > MESSAGE_TTL_TOLERANCE
207
+ ):
208
+ log(
209
+ WARNING,
210
+ "Received TaskRes with TTL %.2f "
211
+ "exceeding the allowed maximum TTL %.2f.",
212
+ task_res.task.ttl,
213
+ max_allowed_ttl,
214
+ )
215
+ return None
216
+
217
+ # Validate run_id
218
+ if task_res.run_id not in self.run_ids:
219
+ log(ERROR, "`run_id` is invalid")
220
+ return None
221
+
222
+ # Create task_id
223
+ task_id = uuid4()
224
+
225
+ # Store TaskRes
226
+ task_res.task_id = str(task_id)
227
+ with self.lock:
228
+ self.task_res_store[task_id] = task_res
229
+ self.task_ins_id_to_task_res_id[UUID(task_ins_id)] = task_id
230
+
231
+ # Return the new task_id
232
+ return task_id
233
+
234
+ def get_task_res(self, task_ids: set[UUID]) -> list[TaskRes]:
235
+ """Get TaskRes for the given TaskIns IDs."""
236
+ ret: dict[UUID, TaskRes] = {}
237
+
238
+ with self.lock:
239
+ current = time.time()
240
+
241
+ # Verify TaskIns IDs
242
+ ret = verify_taskins_ids(
243
+ inquired_taskins_ids=task_ids,
244
+ found_taskins_dict=self.task_ins_store,
245
+ current_time=current,
246
+ )
247
+
248
+ # Find all TaskRes
249
+ task_res_found: list[TaskRes] = []
250
+ for task_id in task_ids:
251
+ # If TaskRes exists and is not delivered, add it to the list
252
+ if task_res_id := self.task_ins_id_to_task_res_id.get(task_id):
253
+ task_res = self.task_res_store[task_res_id]
254
+ if task_res.task.delivered_at == "":
255
+ task_res_found.append(task_res)
256
+ tmp_ret_dict = verify_found_taskres(
257
+ inquired_taskins_ids=task_ids,
258
+ found_taskins_dict=self.task_ins_store,
259
+ found_taskres_list=task_res_found,
260
+ current_time=current,
261
+ )
262
+ ret.update(tmp_ret_dict)
263
+
264
+ # Mark existing TaskRes to be returned as delivered
265
+ delivered_at = now().isoformat()
266
+ for task_res in task_res_found:
267
+ task_res.task.delivered_at = delivered_at
268
+
269
+ return list(ret.values())
270
+
271
+ def delete_tasks(self, task_ins_ids: set[UUID]) -> None:
272
+ """Delete TaskIns/TaskRes pairs based on provided TaskIns IDs."""
273
+ if not task_ins_ids:
274
+ return
275
+
276
+ with self.lock:
277
+ for task_id in task_ins_ids:
278
+ # Delete TaskIns
279
+ if task_id in self.task_ins_store:
280
+ del self.task_ins_store[task_id]
281
+ # Delete TaskRes
282
+ if task_id in self.task_ins_id_to_task_res_id:
283
+ task_res_id = self.task_ins_id_to_task_res_id.pop(task_id)
284
+ del self.task_res_store[task_res_id]
285
+
286
+ def get_task_ids_from_run_id(self, run_id: int) -> set[UUID]:
287
+ """Get all TaskIns IDs for the given run_id."""
288
+ task_id_list: set[UUID] = set()
289
+ with self.lock:
290
+ for task_id, task_ins in self.task_ins_store.items():
291
+ if task_ins.run_id == run_id:
292
+ task_id_list.add(task_id)
293
+
294
+ return task_id_list
295
+
296
+ def num_task_ins(self) -> int:
297
+ """Calculate the number of task_ins in store.
298
+
299
+ This includes delivered but not yet deleted task_ins.
300
+ """
301
+ return len(self.task_ins_store)
302
+
303
+ def num_task_res(self) -> int:
304
+ """Calculate the number of task_res in store.
305
+
306
+ This includes delivered but not yet deleted task_res.
307
+ """
308
+ return len(self.task_res_store)
309
+
310
+ def create_node(self, ping_interval: float) -> int:
311
+ """Create, store in the link state, and return `node_id`."""
312
+ # Sample a random int64 as node_id
313
+ node_id = generate_rand_int_from_bytes(NODE_ID_NUM_BYTES)
314
+
315
+ with self.lock:
316
+ if node_id in self.node_ids:
317
+ log(ERROR, "Unexpected node registration failure.")
318
+ return 0
319
+
320
+ self.node_ids[node_id] = (time.time() + ping_interval, ping_interval)
321
+ return node_id
322
+
323
+ def delete_node(self, node_id: int) -> None:
324
+ """Delete a node."""
325
+ with self.lock:
326
+ if node_id not in self.node_ids:
327
+ raise ValueError(f"Node {node_id} not found")
328
+
329
+ # Remove node ID <> public key mappings
330
+ if pk := self.node_id_to_public_key.pop(node_id, None):
331
+ del self.public_key_to_node_id[pk]
332
+
333
+ del self.node_ids[node_id]
334
+
335
+ def get_nodes(self, run_id: int) -> set[int]:
336
+ """Return all available nodes.
337
+
338
+ Constraints
339
+ -----------
340
+ If the provided `run_id` does not exist or has no matching nodes,
341
+ an empty `Set` MUST be returned.
342
+ """
343
+ with self.lock:
344
+ if run_id not in self.run_ids:
345
+ return set()
346
+ current_time = time.time()
347
+ return {
348
+ node_id
349
+ for node_id, (online_until, _) in self.node_ids.items()
350
+ if online_until > current_time
351
+ }
352
+
353
+ def set_node_public_key(self, node_id: int, public_key: bytes) -> None:
354
+ """Set `public_key` for the specified `node_id`."""
355
+ with self.lock:
356
+ if node_id not in self.node_ids:
357
+ raise ValueError(f"Node {node_id} not found")
358
+
359
+ if public_key in self.public_key_to_node_id:
360
+ raise ValueError("Public key already in use")
361
+
362
+ self.public_key_to_node_id[public_key] = node_id
363
+ self.node_id_to_public_key[node_id] = public_key
364
+
365
+ def get_node_public_key(self, node_id: int) -> Optional[bytes]:
366
+ """Get `public_key` for the specified `node_id`."""
367
+ with self.lock:
368
+ if node_id not in self.node_ids:
369
+ raise ValueError(f"Node {node_id} not found")
370
+
371
+ return self.node_id_to_public_key.get(node_id)
372
+
373
+ def get_node_id(self, node_public_key: bytes) -> Optional[int]:
374
+ """Retrieve stored `node_id` filtered by `node_public_keys`."""
375
+ return self.public_key_to_node_id.get(node_public_key)
376
+
377
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
378
+ def create_run(
379
+ self,
380
+ fab_id: Optional[str],
381
+ fab_version: Optional[str],
382
+ fab_hash: Optional[str],
383
+ override_config: UserConfig,
384
+ federation_options: ConfigsRecord,
385
+ ) -> int:
386
+ """Create a new run for the specified `fab_hash`."""
387
+ # Sample a random int64 as run_id
388
+ with self.lock:
389
+ run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
390
+
391
+ if run_id not in self.run_ids:
392
+ run_record = RunRecord(
393
+ run=Run(
394
+ run_id=run_id,
395
+ fab_id=fab_id if fab_id else "",
396
+ fab_version=fab_version if fab_version else "",
397
+ fab_hash=fab_hash if fab_hash else "",
398
+ override_config=override_config,
399
+ pending_at=now().isoformat(),
400
+ starting_at="",
401
+ running_at="",
402
+ finished_at="",
403
+ status=RunStatus(
404
+ status=Status.PENDING,
405
+ sub_status="",
406
+ details="",
407
+ ),
408
+ ),
409
+ )
410
+ self.run_ids[run_id] = run_record
411
+
412
+ # Record federation options. Leave empty if not passed
413
+ self.federation_options[run_id] = federation_options
414
+ return run_id
415
+ log(ERROR, "Unexpected run creation failure.")
416
+ return 0
417
+
418
+ def store_server_private_public_key(
419
+ self, private_key: bytes, public_key: bytes
420
+ ) -> None:
421
+ """Store `server_private_key` and `server_public_key` in the link state."""
422
+ with self.lock:
423
+ if self.server_private_key is None and self.server_public_key is None:
424
+ self.server_private_key = private_key
425
+ self.server_public_key = public_key
426
+ else:
427
+ raise RuntimeError("Server private and public key already set")
428
+
429
+ def get_server_private_key(self) -> Optional[bytes]:
430
+ """Retrieve `server_private_key` in urlsafe bytes."""
431
+ return self.server_private_key
432
+
433
+ def get_server_public_key(self) -> Optional[bytes]:
434
+ """Retrieve `server_public_key` in urlsafe bytes."""
435
+ return self.server_public_key
436
+
437
+ def clear_supernode_auth_keys_and_credentials(self) -> None:
438
+ """Clear stored `node_public_keys` and credentials in the link state if any."""
439
+ with self.lock:
440
+ self.server_private_key = None
441
+ self.server_public_key = None
442
+ self.node_public_keys.clear()
443
+
444
+ def store_node_public_keys(self, public_keys: set[bytes]) -> None:
445
+ """Store a set of `node_public_keys` in the link state."""
446
+ with self.lock:
447
+ self.node_public_keys.update(public_keys)
448
+
449
+ def store_node_public_key(self, public_key: bytes) -> None:
450
+ """Store a `node_public_key` in the link state."""
451
+ with self.lock:
452
+ self.node_public_keys.add(public_key)
453
+
454
+ def get_node_public_keys(self) -> set[bytes]:
455
+ """Retrieve all currently stored `node_public_keys` as a set."""
456
+ with self.lock:
457
+ return self.node_public_keys.copy()
458
+
459
+ def get_run_ids(self) -> set[int]:
460
+ """Retrieve all run IDs."""
461
+ with self.lock:
462
+ return set(self.run_ids.keys())
463
+
464
+ def get_run(self, run_id: int) -> Optional[Run]:
465
+ """Retrieve information about the run with the specified `run_id`."""
466
+ with self.lock:
467
+ if run_id not in self.run_ids:
468
+ log(ERROR, "`run_id` is invalid")
469
+ return None
470
+ return self.run_ids[run_id].run
471
+
472
+ def get_run_status(self, run_ids: set[int]) -> dict[int, RunStatus]:
473
+ """Retrieve the statuses for the specified runs."""
474
+ with self.lock:
475
+ return {
476
+ run_id: self.run_ids[run_id].run.status
477
+ for run_id in set(run_ids)
478
+ if run_id in self.run_ids
479
+ }
480
+
481
+ def update_run_status(self, run_id: int, new_status: RunStatus) -> bool:
482
+ """Update the status of the run with the specified `run_id`."""
483
+ with self.lock:
484
+ # Check if the run_id exists
485
+ if run_id not in self.run_ids:
486
+ log(ERROR, "`run_id` is invalid")
487
+ return False
488
+
489
+ # Check if the status transition is valid
490
+ current_status = self.run_ids[run_id].run.status
491
+ if not is_valid_transition(current_status, new_status):
492
+ log(
493
+ ERROR,
494
+ 'Invalid status transition: from "%s" to "%s"',
495
+ current_status.status,
496
+ new_status.status,
497
+ )
498
+ return False
499
+
500
+ # Check if the sub-status is valid
501
+ if not has_valid_sub_status(current_status):
502
+ log(
503
+ ERROR,
504
+ 'Invalid sub-status "%s" for status "%s"',
505
+ current_status.sub_status,
506
+ current_status.status,
507
+ )
508
+ return False
509
+
510
+ # Update the status
511
+ run_record = self.run_ids[run_id]
512
+ if new_status.status == Status.STARTING:
513
+ run_record.run.starting_at = now().isoformat()
514
+ elif new_status.status == Status.RUNNING:
515
+ run_record.run.running_at = now().isoformat()
516
+ elif new_status.status == Status.FINISHED:
517
+ run_record.run.finished_at = now().isoformat()
518
+ run_record.run.status = new_status
519
+ return True
520
+
521
+ def get_pending_run_id(self) -> Optional[int]:
522
+ """Get the `run_id` of a run with `Status.PENDING` status, if any."""
523
+ pending_run_id = None
524
+
525
+ # Loop through all registered runs
526
+ for run_id, run_rec in self.run_ids.items():
527
+ # Break once a pending run is found
528
+ if run_rec.run.status.status == Status.PENDING:
529
+ pending_run_id = run_id
530
+ break
531
+
532
+ return pending_run_id
533
+
534
+ def get_federation_options(self, run_id: int) -> Optional[ConfigsRecord]:
535
+ """Retrieve the federation options for the specified `run_id`."""
536
+ with self.lock:
537
+ if run_id not in self.run_ids:
538
+ log(ERROR, "`run_id` is invalid")
539
+ return None
540
+ return self.federation_options[run_id]
541
+
542
+ def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
543
+ """Acknowledge a ping received from a node, serving as a heartbeat."""
544
+ with self.lock:
545
+ if node_id in self.node_ids:
546
+ self.node_ids[node_id] = (time.time() + ping_interval, ping_interval)
547
+ return True
548
+ return False
549
+
550
+ def get_serverapp_context(self, run_id: int) -> Optional[Context]:
551
+ """Get the context for the specified `run_id`."""
552
+ return self.contexts.get(run_id)
553
+
554
+ def set_serverapp_context(self, run_id: int, context: Context) -> None:
555
+ """Set the context for the specified `run_id`."""
556
+ if run_id not in self.run_ids:
557
+ raise ValueError(f"Run {run_id} not found")
558
+ self.contexts[run_id] = context
559
+
560
+ def add_serverapp_log(self, run_id: int, log_message: str) -> None:
561
+ """Add a log entry to the serverapp logs for the specified `run_id`."""
562
+ if run_id not in self.run_ids:
563
+ raise ValueError(f"Run {run_id} not found")
564
+ run = self.run_ids[run_id]
565
+ with run.log_lock:
566
+ run.logs.append((now().timestamp(), log_message))
567
+
568
+ def get_serverapp_log(
569
+ self, run_id: int, after_timestamp: Optional[float]
570
+ ) -> tuple[str, float]:
571
+ """Get the serverapp logs for the specified `run_id`."""
572
+ if run_id not in self.run_ids:
573
+ raise ValueError(f"Run {run_id} not found")
574
+ run = self.run_ids[run_id]
575
+ if after_timestamp is None:
576
+ after_timestamp = 0.0
577
+ with run.log_lock:
578
+ # Find the index where the timestamp would be inserted
579
+ index = bisect_right(run.logs, (after_timestamp, ""))
580
+ latest_timestamp = run.logs[-1][0] if index < len(run.logs) else 0.0
581
+ return "".join(log for _, log in run.logs[index:]), latest_timestamp