flwr-nightly 1.8.0.dev20240315__py3-none-any.whl → 1.15.0.dev20250115__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (312) hide show
  1. flwr/cli/app.py +16 -2
  2. flwr/cli/build.py +181 -0
  3. flwr/cli/cli_user_auth_interceptor.py +90 -0
  4. flwr/cli/config_utils.py +343 -0
  5. flwr/cli/example.py +4 -1
  6. flwr/cli/install.py +253 -0
  7. flwr/cli/log.py +182 -0
  8. flwr/{server/superlink/state → cli/login}/__init__.py +4 -10
  9. flwr/cli/login/login.py +88 -0
  10. flwr/cli/ls.py +327 -0
  11. flwr/cli/new/__init__.py +1 -0
  12. flwr/cli/new/new.py +210 -66
  13. flwr/cli/new/templates/app/.gitignore.tpl +163 -0
  14. flwr/cli/new/templates/app/LICENSE.tpl +202 -0
  15. flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
  16. flwr/cli/new/templates/app/README.flowertune.md.tpl +66 -0
  17. flwr/cli/new/templates/app/README.md.tpl +16 -32
  18. flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
  19. flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
  20. flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
  21. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +55 -0
  22. flwr/cli/new/templates/app/code/client.jax.py.tpl +50 -0
  23. flwr/cli/new/templates/app/code/client.mlx.py.tpl +73 -0
  24. flwr/cli/new/templates/app/code/client.numpy.py.tpl +7 -7
  25. flwr/cli/new/templates/app/code/client.pytorch.py.tpl +30 -21
  26. flwr/cli/new/templates/app/code/client.sklearn.py.tpl +63 -0
  27. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +57 -1
  28. flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
  29. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  30. flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +126 -0
  31. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +87 -0
  32. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +78 -0
  33. flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +94 -0
  34. flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
  35. flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
  36. flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
  37. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +38 -0
  38. flwr/cli/new/templates/app/code/server.jax.py.tpl +26 -0
  39. flwr/cli/new/templates/app/code/server.mlx.py.tpl +31 -0
  40. flwr/cli/new/templates/app/code/server.numpy.py.tpl +22 -9
  41. flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
  42. flwr/cli/new/templates/app/code/server.sklearn.py.tpl +36 -0
  43. flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
  44. flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
  45. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +102 -0
  46. flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
  47. flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
  48. flwr/cli/new/templates/app/code/task.numpy.py.tpl +7 -0
  49. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +29 -24
  50. flwr/cli/new/templates/app/code/task.sklearn.py.tpl +67 -0
  51. flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
  52. flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
  53. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
  54. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +68 -0
  55. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +46 -0
  56. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +35 -0
  57. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
  58. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
  59. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
  60. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +35 -0
  61. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
  62. flwr/cli/run/__init__.py +1 -0
  63. flwr/cli/run/run.py +212 -34
  64. flwr/cli/stop.py +130 -0
  65. flwr/cli/utils.py +240 -5
  66. flwr/client/__init__.py +3 -2
  67. flwr/client/app.py +432 -255
  68. flwr/client/client.py +1 -11
  69. flwr/client/client_app.py +74 -13
  70. flwr/client/clientapp/__init__.py +22 -0
  71. flwr/client/clientapp/app.py +259 -0
  72. flwr/client/clientapp/clientappio_servicer.py +244 -0
  73. flwr/client/clientapp/utils.py +115 -0
  74. flwr/client/dpfedavg_numpy_client.py +7 -8
  75. flwr/client/grpc_adapter_client/__init__.py +15 -0
  76. flwr/client/grpc_adapter_client/connection.py +98 -0
  77. flwr/client/grpc_client/connection.py +21 -7
  78. flwr/client/grpc_rere_client/__init__.py +1 -1
  79. flwr/client/grpc_rere_client/client_interceptor.py +176 -0
  80. flwr/client/grpc_rere_client/connection.py +163 -56
  81. flwr/client/grpc_rere_client/grpc_adapter.py +167 -0
  82. flwr/client/heartbeat.py +74 -0
  83. flwr/client/message_handler/__init__.py +1 -1
  84. flwr/client/message_handler/message_handler.py +10 -11
  85. flwr/client/mod/__init__.py +5 -5
  86. flwr/client/mod/centraldp_mods.py +4 -2
  87. flwr/client/mod/comms_mods.py +5 -4
  88. flwr/client/mod/localdp_mod.py +10 -5
  89. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  90. flwr/client/mod/secure_aggregation/secaggplus_mod.py +26 -26
  91. flwr/client/mod/utils.py +2 -4
  92. flwr/client/nodestate/__init__.py +26 -0
  93. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  94. flwr/client/nodestate/nodestate.py +31 -0
  95. flwr/client/nodestate/nodestate_factory.py +38 -0
  96. flwr/client/numpy_client.py +8 -31
  97. flwr/client/rest_client/__init__.py +1 -1
  98. flwr/client/rest_client/connection.py +199 -176
  99. flwr/client/run_info_store.py +112 -0
  100. flwr/client/supernode/__init__.py +24 -0
  101. flwr/client/supernode/app.py +321 -0
  102. flwr/client/typing.py +1 -0
  103. flwr/common/__init__.py +17 -11
  104. flwr/common/address.py +47 -3
  105. flwr/common/args.py +153 -0
  106. flwr/common/auth_plugin/__init__.py +24 -0
  107. flwr/common/auth_plugin/auth_plugin.py +121 -0
  108. flwr/common/config.py +243 -0
  109. flwr/common/constant.py +135 -1
  110. flwr/common/context.py +32 -2
  111. flwr/common/date.py +22 -4
  112. flwr/common/differential_privacy.py +2 -2
  113. flwr/common/dp.py +2 -4
  114. flwr/common/exit_handlers.py +3 -3
  115. flwr/common/grpc.py +164 -5
  116. flwr/common/logger.py +230 -12
  117. flwr/common/message.py +191 -106
  118. flwr/common/object_ref.py +179 -44
  119. flwr/common/pyproject.py +1 -0
  120. flwr/common/record/__init__.py +2 -1
  121. flwr/common/record/configsrecord.py +58 -18
  122. flwr/common/record/metricsrecord.py +57 -17
  123. flwr/common/record/parametersrecord.py +88 -20
  124. flwr/common/record/recordset.py +153 -30
  125. flwr/common/record/typeddict.py +30 -55
  126. flwr/common/recordset_compat.py +31 -12
  127. flwr/common/retry_invoker.py +123 -30
  128. flwr/common/secure_aggregation/__init__.py +1 -1
  129. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  130. flwr/common/secure_aggregation/crypto/shamir.py +11 -11
  131. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +68 -4
  132. flwr/common/secure_aggregation/ndarrays_arithmetic.py +17 -17
  133. flwr/common/secure_aggregation/quantization.py +8 -8
  134. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  135. flwr/common/secure_aggregation/secaggplus_utils.py +10 -12
  136. flwr/common/serde.py +304 -23
  137. flwr/common/telemetry.py +65 -29
  138. flwr/common/typing.py +120 -19
  139. flwr/common/version.py +17 -3
  140. flwr/proto/clientappio_pb2.py +45 -0
  141. flwr/proto/clientappio_pb2.pyi +132 -0
  142. flwr/proto/clientappio_pb2_grpc.py +135 -0
  143. flwr/proto/clientappio_pb2_grpc.pyi +53 -0
  144. flwr/proto/exec_pb2.py +62 -0
  145. flwr/proto/exec_pb2.pyi +212 -0
  146. flwr/proto/exec_pb2_grpc.py +237 -0
  147. flwr/proto/exec_pb2_grpc.pyi +93 -0
  148. flwr/proto/fab_pb2.py +31 -0
  149. flwr/proto/fab_pb2.pyi +65 -0
  150. flwr/proto/fab_pb2_grpc.py +4 -0
  151. flwr/proto/fab_pb2_grpc.pyi +4 -0
  152. flwr/proto/fleet_pb2.py +42 -23
  153. flwr/proto/fleet_pb2.pyi +123 -1
  154. flwr/proto/fleet_pb2_grpc.py +170 -0
  155. flwr/proto/fleet_pb2_grpc.pyi +61 -0
  156. flwr/proto/grpcadapter_pb2.py +32 -0
  157. flwr/proto/grpcadapter_pb2.pyi +43 -0
  158. flwr/proto/grpcadapter_pb2_grpc.py +66 -0
  159. flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
  160. flwr/proto/log_pb2.py +29 -0
  161. flwr/proto/log_pb2.pyi +39 -0
  162. flwr/proto/log_pb2_grpc.py +4 -0
  163. flwr/proto/log_pb2_grpc.pyi +4 -0
  164. flwr/proto/message_pb2.py +41 -0
  165. flwr/proto/message_pb2.pyi +128 -0
  166. flwr/proto/message_pb2_grpc.py +4 -0
  167. flwr/proto/message_pb2_grpc.pyi +4 -0
  168. flwr/proto/node_pb2.py +2 -2
  169. flwr/proto/node_pb2.pyi +1 -4
  170. flwr/proto/recordset_pb2.py +35 -33
  171. flwr/proto/recordset_pb2.pyi +40 -14
  172. flwr/proto/run_pb2.py +64 -0
  173. flwr/proto/run_pb2.pyi +268 -0
  174. flwr/proto/run_pb2_grpc.py +4 -0
  175. flwr/proto/run_pb2_grpc.pyi +4 -0
  176. flwr/proto/serverappio_pb2.py +52 -0
  177. flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +62 -20
  178. flwr/proto/serverappio_pb2_grpc.py +410 -0
  179. flwr/proto/serverappio_pb2_grpc.pyi +160 -0
  180. flwr/proto/simulationio_pb2.py +38 -0
  181. flwr/proto/simulationio_pb2.pyi +65 -0
  182. flwr/proto/simulationio_pb2_grpc.py +239 -0
  183. flwr/proto/simulationio_pb2_grpc.pyi +94 -0
  184. flwr/proto/task_pb2.py +7 -8
  185. flwr/proto/task_pb2.pyi +8 -5
  186. flwr/proto/transport_pb2.py +8 -8
  187. flwr/proto/transport_pb2.pyi +9 -6
  188. flwr/server/__init__.py +2 -10
  189. flwr/server/app.py +579 -402
  190. flwr/server/client_manager.py +8 -6
  191. flwr/server/compat/app.py +6 -62
  192. flwr/server/compat/app_utils.py +14 -9
  193. flwr/server/compat/driver_client_proxy.py +25 -59
  194. flwr/server/compat/legacy_context.py +5 -4
  195. flwr/server/driver/__init__.py +2 -0
  196. flwr/server/driver/driver.py +36 -131
  197. flwr/server/driver/grpc_driver.py +220 -81
  198. flwr/server/driver/inmemory_driver.py +183 -0
  199. flwr/server/history.py +28 -29
  200. flwr/server/run_serverapp.py +15 -126
  201. flwr/server/server.py +50 -44
  202. flwr/server/server_app.py +59 -10
  203. flwr/server/serverapp/__init__.py +22 -0
  204. flwr/server/serverapp/app.py +256 -0
  205. flwr/server/serverapp_components.py +52 -0
  206. flwr/server/strategy/__init__.py +2 -2
  207. flwr/server/strategy/aggregate.py +37 -23
  208. flwr/server/strategy/bulyan.py +9 -9
  209. flwr/server/strategy/dp_adaptive_clipping.py +25 -25
  210. flwr/server/strategy/dp_fixed_clipping.py +23 -22
  211. flwr/server/strategy/dpfedavg_adaptive.py +8 -8
  212. flwr/server/strategy/dpfedavg_fixed.py +13 -12
  213. flwr/server/strategy/fault_tolerant_fedavg.py +11 -11
  214. flwr/server/strategy/fedadagrad.py +9 -9
  215. flwr/server/strategy/fedadam.py +20 -10
  216. flwr/server/strategy/fedavg.py +16 -16
  217. flwr/server/strategy/fedavg_android.py +17 -17
  218. flwr/server/strategy/fedavgm.py +9 -9
  219. flwr/server/strategy/fedmedian.py +5 -5
  220. flwr/server/strategy/fedopt.py +6 -6
  221. flwr/server/strategy/fedprox.py +7 -7
  222. flwr/server/strategy/fedtrimmedavg.py +8 -8
  223. flwr/server/strategy/fedxgb_bagging.py +12 -12
  224. flwr/server/strategy/fedxgb_cyclic.py +10 -10
  225. flwr/server/strategy/fedxgb_nn_avg.py +6 -6
  226. flwr/server/strategy/fedyogi.py +9 -9
  227. flwr/server/strategy/krum.py +9 -9
  228. flwr/server/strategy/qfedavg.py +16 -16
  229. flwr/server/strategy/strategy.py +10 -10
  230. flwr/server/superlink/driver/__init__.py +2 -2
  231. flwr/server/superlink/driver/serverappio_grpc.py +61 -0
  232. flwr/server/superlink/driver/serverappio_servicer.py +361 -0
  233. flwr/server/superlink/ffs/__init__.py +24 -0
  234. flwr/server/superlink/ffs/disk_ffs.py +108 -0
  235. flwr/server/superlink/ffs/ffs.py +79 -0
  236. flwr/server/superlink/ffs/ffs_factory.py +47 -0
  237. flwr/server/superlink/fleet/__init__.py +1 -1
  238. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  239. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +162 -0
  240. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  241. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +4 -2
  242. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -2
  243. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  244. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -154
  245. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  246. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +120 -13
  247. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +228 -0
  248. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  249. flwr/server/superlink/fleet/message_handler/message_handler.py +156 -13
  250. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  251. flwr/server/superlink/fleet/rest_rere/rest_api.py +119 -81
  252. flwr/server/superlink/fleet/vce/__init__.py +1 -0
  253. flwr/server/superlink/fleet/vce/backend/__init__.py +4 -4
  254. flwr/server/superlink/fleet/vce/backend/backend.py +8 -9
  255. flwr/server/superlink/fleet/vce/backend/raybackend.py +87 -68
  256. flwr/server/superlink/fleet/vce/vce_api.py +208 -146
  257. flwr/server/superlink/linkstate/__init__.py +28 -0
  258. flwr/server/superlink/linkstate/in_memory_linkstate.py +569 -0
  259. flwr/server/superlink/linkstate/linkstate.py +376 -0
  260. flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +19 -10
  261. flwr/server/superlink/linkstate/sqlite_linkstate.py +1196 -0
  262. flwr/server/superlink/linkstate/utils.py +399 -0
  263. flwr/server/superlink/simulation/__init__.py +15 -0
  264. flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
  265. flwr/server/superlink/simulation/simulationio_servicer.py +186 -0
  266. flwr/server/superlink/utils.py +65 -0
  267. flwr/server/typing.py +2 -0
  268. flwr/server/utils/__init__.py +1 -1
  269. flwr/server/utils/tensorboard.py +5 -5
  270. flwr/server/utils/validator.py +40 -45
  271. flwr/server/workflow/default_workflows.py +70 -26
  272. flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
  273. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +40 -27
  274. flwr/simulation/__init__.py +12 -5
  275. flwr/simulation/app.py +247 -315
  276. flwr/simulation/legacy_app.py +404 -0
  277. flwr/simulation/ray_transport/__init__.py +1 -1
  278. flwr/simulation/ray_transport/ray_actor.py +42 -67
  279. flwr/simulation/ray_transport/ray_client_proxy.py +37 -17
  280. flwr/simulation/ray_transport/utils.py +1 -0
  281. flwr/simulation/run_simulation.py +306 -163
  282. flwr/simulation/simulationio_connection.py +89 -0
  283. flwr/superexec/__init__.py +15 -0
  284. flwr/superexec/app.py +59 -0
  285. flwr/superexec/deployment.py +188 -0
  286. flwr/superexec/exec_grpc.py +80 -0
  287. flwr/superexec/exec_servicer.py +231 -0
  288. flwr/superexec/exec_user_auth_interceptor.py +101 -0
  289. flwr/superexec/executor.py +96 -0
  290. flwr/superexec/simulation.py +124 -0
  291. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/METADATA +33 -26
  292. flwr_nightly-1.15.0.dev20250115.dist-info/RECORD +328 -0
  293. flwr_nightly-1.15.0.dev20250115.dist-info/entry_points.txt +12 -0
  294. flwr/cli/flower_toml.py +0 -140
  295. flwr/cli/new/templates/app/flower.toml.tpl +0 -13
  296. flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
  297. flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
  298. flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
  299. flwr/client/node_state.py +0 -48
  300. flwr/client/node_state_tests.py +0 -65
  301. flwr/proto/driver_pb2.py +0 -44
  302. flwr/proto/driver_pb2_grpc.py +0 -169
  303. flwr/proto/driver_pb2_grpc.pyi +0 -66
  304. flwr/server/superlink/driver/driver_grpc.py +0 -54
  305. flwr/server/superlink/driver/driver_servicer.py +0 -129
  306. flwr/server/superlink/state/in_memory_state.py +0 -230
  307. flwr/server/superlink/state/sqlite_state.py +0 -630
  308. flwr/server/superlink/state/state.py +0 -154
  309. flwr_nightly-1.8.0.dev20240315.dist-info/RECORD +0 -211
  310. flwr_nightly-1.8.0.dev20240315.dist-info/entry_points.txt +0 -9
  311. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/LICENSE +0 -0
  312. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/WHEEL +0 -0
@@ -0,0 +1,569 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """In-memory LinkState implementation."""
16
+
17
+
18
+ import threading
19
+ import time
20
+ from bisect import bisect_right
21
+ from dataclasses import dataclass, field
22
+ from logging import ERROR, WARNING
23
+ from typing import Optional
24
+ from uuid import UUID, uuid4
25
+
26
+ from flwr.common import Context, log, now
27
+ from flwr.common.constant import (
28
+ MESSAGE_TTL_TOLERANCE,
29
+ NODE_ID_NUM_BYTES,
30
+ RUN_ID_NUM_BYTES,
31
+ SUPERLINK_NODE_ID,
32
+ Status,
33
+ )
34
+ from flwr.common.record import ConfigsRecord
35
+ from flwr.common.typing import Run, RunStatus, UserConfig
36
+ from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
37
+ from flwr.server.superlink.linkstate.linkstate import LinkState
38
+ from flwr.server.utils import validate_task_ins_or_res
39
+
40
+ from .utils import (
41
+ generate_rand_int_from_bytes,
42
+ has_valid_sub_status,
43
+ is_valid_transition,
44
+ verify_found_taskres,
45
+ verify_taskins_ids,
46
+ )
47
+
48
+
49
+ @dataclass
50
+ class RunRecord: # pylint: disable=R0902
51
+ """The record of a specific run, including its status and timestamps."""
52
+
53
+ run: Run
54
+ logs: list[tuple[float, str]] = field(default_factory=list)
55
+ log_lock: threading.Lock = field(default_factory=threading.Lock)
56
+
57
+
58
+ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
59
+ """In-memory LinkState implementation."""
60
+
61
+ def __init__(self) -> None:
62
+
63
+ # Map node_id to (online_until, ping_interval)
64
+ self.node_ids: dict[int, tuple[float, float]] = {}
65
+ self.public_key_to_node_id: dict[bytes, int] = {}
66
+ self.node_id_to_public_key: dict[int, bytes] = {}
67
+
68
+ # Map run_id to RunRecord
69
+ self.run_ids: dict[int, RunRecord] = {}
70
+ self.contexts: dict[int, Context] = {}
71
+ self.federation_options: dict[int, ConfigsRecord] = {}
72
+ self.task_ins_store: dict[UUID, TaskIns] = {}
73
+ self.task_res_store: dict[UUID, TaskRes] = {}
74
+ self.task_ins_id_to_task_res_id: dict[UUID, UUID] = {}
75
+
76
+ self.node_public_keys: set[bytes] = set()
77
+ self.server_public_key: Optional[bytes] = None
78
+ self.server_private_key: Optional[bytes] = None
79
+
80
+ self.lock = threading.RLock()
81
+
82
+ def store_task_ins(self, task_ins: TaskIns) -> Optional[UUID]:
83
+ """Store one TaskIns."""
84
+ # Validate task
85
+ errors = validate_task_ins_or_res(task_ins)
86
+ if any(errors):
87
+ log(ERROR, errors)
88
+ return None
89
+ # Validate run_id
90
+ if task_ins.run_id not in self.run_ids:
91
+ log(ERROR, "Invalid run ID for TaskIns: %s", task_ins.run_id)
92
+ return None
93
+ # Validate source node ID
94
+ if task_ins.task.producer.node_id != SUPERLINK_NODE_ID:
95
+ log(
96
+ ERROR,
97
+ "Invalid source node ID for TaskIns: %s",
98
+ task_ins.task.producer.node_id,
99
+ )
100
+ return None
101
+ # Validate destination node ID
102
+ if task_ins.task.consumer.node_id not in self.node_ids:
103
+ log(
104
+ ERROR,
105
+ "Invalid destination node ID for TaskIns: %s",
106
+ task_ins.task.consumer.node_id,
107
+ )
108
+ return None
109
+
110
+ # Create task_id
111
+ task_id = uuid4()
112
+
113
+ # Store TaskIns
114
+ task_ins.task_id = str(task_id)
115
+ with self.lock:
116
+ self.task_ins_store[task_id] = task_ins
117
+
118
+ # Return the new task_id
119
+ return task_id
120
+
121
+ def get_task_ins(self, node_id: int, limit: Optional[int]) -> list[TaskIns]:
122
+ """Get all TaskIns that have not been delivered yet."""
123
+ if limit is not None and limit < 1:
124
+ raise AssertionError("`limit` must be >= 1")
125
+
126
+ # Find TaskIns for node_id that were not delivered yet
127
+ task_ins_list: list[TaskIns] = []
128
+ current_time = time.time()
129
+ with self.lock:
130
+ for _, task_ins in self.task_ins_store.items():
131
+ if (
132
+ task_ins.task.consumer.node_id == node_id
133
+ and task_ins.task.delivered_at == ""
134
+ and task_ins.task.created_at + task_ins.task.ttl > current_time
135
+ ):
136
+ task_ins_list.append(task_ins)
137
+ if limit and len(task_ins_list) == limit:
138
+ break
139
+
140
+ # Mark all of them as delivered
141
+ delivered_at = now().isoformat()
142
+ for task_ins in task_ins_list:
143
+ task_ins.task.delivered_at = delivered_at
144
+
145
+ # Return TaskIns
146
+ return task_ins_list
147
+
148
+ # pylint: disable=R0911
149
+ def store_task_res(self, task_res: TaskRes) -> Optional[UUID]:
150
+ """Store one TaskRes."""
151
+ # Validate task
152
+ errors = validate_task_ins_or_res(task_res)
153
+ if any(errors):
154
+ log(ERROR, errors)
155
+ return None
156
+
157
+ with self.lock:
158
+ # Check if the TaskIns it is replying to exists and is valid
159
+ task_ins_id = task_res.task.ancestry[0]
160
+ task_ins = self.task_ins_store.get(UUID(task_ins_id))
161
+
162
+ # Ensure that the consumer_id of taskIns matches the producer_id of taskRes.
163
+ if (
164
+ task_ins
165
+ and task_res
166
+ and task_ins.task.consumer.node_id != task_res.task.producer.node_id
167
+ ):
168
+ return None
169
+
170
+ if task_ins is None:
171
+ log(ERROR, "TaskIns with task_id %s does not exist.", task_ins_id)
172
+ return None
173
+
174
+ if task_ins.task.created_at + task_ins.task.ttl <= time.time():
175
+ log(
176
+ ERROR,
177
+ "Failed to store TaskRes: TaskIns with task_id %s has expired.",
178
+ task_ins_id,
179
+ )
180
+ return None
181
+
182
+ # Fail if the TaskRes TTL exceeds the
183
+ # expiration time of the TaskIns it replies to.
184
+ # Condition: TaskIns.created_at + TaskIns.ttl ≥
185
+ # TaskRes.created_at + TaskRes.ttl
186
+ # A small tolerance is introduced to account
187
+ # for floating-point precision issues.
188
+ max_allowed_ttl = (
189
+ task_ins.task.created_at + task_ins.task.ttl - task_res.task.created_at
190
+ )
191
+ if task_res.task.ttl and (
192
+ task_res.task.ttl - max_allowed_ttl > MESSAGE_TTL_TOLERANCE
193
+ ):
194
+ log(
195
+ WARNING,
196
+ "Received TaskRes with TTL %.2f "
197
+ "exceeding the allowed maximum TTL %.2f.",
198
+ task_res.task.ttl,
199
+ max_allowed_ttl,
200
+ )
201
+ return None
202
+
203
+ # Validate run_id
204
+ if task_res.run_id not in self.run_ids:
205
+ log(ERROR, "`run_id` is invalid")
206
+ return None
207
+
208
+ # Create task_id
209
+ task_id = uuid4()
210
+
211
+ # Store TaskRes
212
+ task_res.task_id = str(task_id)
213
+ with self.lock:
214
+ self.task_res_store[task_id] = task_res
215
+ self.task_ins_id_to_task_res_id[UUID(task_ins_id)] = task_id
216
+
217
+ # Return the new task_id
218
+ return task_id
219
+
220
+ def get_task_res(self, task_ids: set[UUID]) -> list[TaskRes]:
221
+ """Get TaskRes for the given TaskIns IDs."""
222
+ ret: dict[UUID, TaskRes] = {}
223
+
224
+ with self.lock:
225
+ current = time.time()
226
+
227
+ # Verify TaskIns IDs
228
+ ret = verify_taskins_ids(
229
+ inquired_taskins_ids=task_ids,
230
+ found_taskins_dict=self.task_ins_store,
231
+ current_time=current,
232
+ )
233
+
234
+ # Find all TaskRes
235
+ task_res_found: list[TaskRes] = []
236
+ for task_id in task_ids:
237
+ # If TaskRes exists and is not delivered, add it to the list
238
+ if task_res_id := self.task_ins_id_to_task_res_id.get(task_id):
239
+ task_res = self.task_res_store[task_res_id]
240
+ if task_res.task.delivered_at == "":
241
+ task_res_found.append(task_res)
242
+ tmp_ret_dict = verify_found_taskres(
243
+ inquired_taskins_ids=task_ids,
244
+ found_taskins_dict=self.task_ins_store,
245
+ found_taskres_list=task_res_found,
246
+ current_time=current,
247
+ )
248
+ ret.update(tmp_ret_dict)
249
+
250
+ # Mark existing TaskRes to be returned as delivered
251
+ delivered_at = now().isoformat()
252
+ for task_res in task_res_found:
253
+ task_res.task.delivered_at = delivered_at
254
+
255
+ return list(ret.values())
256
+
257
+ def delete_tasks(self, task_ins_ids: set[UUID]) -> None:
258
+ """Delete TaskIns/TaskRes pairs based on provided TaskIns IDs."""
259
+ if not task_ins_ids:
260
+ return
261
+
262
+ with self.lock:
263
+ for task_id in task_ins_ids:
264
+ # Delete TaskIns
265
+ if task_id in self.task_ins_store:
266
+ del self.task_ins_store[task_id]
267
+ # Delete TaskRes
268
+ if task_id in self.task_ins_id_to_task_res_id:
269
+ task_res_id = self.task_ins_id_to_task_res_id.pop(task_id)
270
+ del self.task_res_store[task_res_id]
271
+
272
+ def get_task_ids_from_run_id(self, run_id: int) -> set[UUID]:
273
+ """Get all TaskIns IDs for the given run_id."""
274
+ task_id_list: set[UUID] = set()
275
+ with self.lock:
276
+ for task_id, task_ins in self.task_ins_store.items():
277
+ if task_ins.run_id == run_id:
278
+ task_id_list.add(task_id)
279
+
280
+ return task_id_list
281
+
282
+ def num_task_ins(self) -> int:
283
+ """Calculate the number of task_ins in store.
284
+
285
+ This includes delivered but not yet deleted task_ins.
286
+ """
287
+ return len(self.task_ins_store)
288
+
289
+ def num_task_res(self) -> int:
290
+ """Calculate the number of task_res in store.
291
+
292
+ This includes delivered but not yet deleted task_res.
293
+ """
294
+ return len(self.task_res_store)
295
+
296
+ def create_node(self, ping_interval: float) -> int:
297
+ """Create, store in the link state, and return `node_id`."""
298
+ # Sample a random int64 as node_id
299
+ node_id = generate_rand_int_from_bytes(
300
+ NODE_ID_NUM_BYTES, exclude=[SUPERLINK_NODE_ID, 0]
301
+ )
302
+
303
+ with self.lock:
304
+ if node_id in self.node_ids:
305
+ log(ERROR, "Unexpected node registration failure.")
306
+ return 0
307
+
308
+ self.node_ids[node_id] = (time.time() + ping_interval, ping_interval)
309
+ return node_id
310
+
311
+ def delete_node(self, node_id: int) -> None:
312
+ """Delete a node."""
313
+ with self.lock:
314
+ if node_id not in self.node_ids:
315
+ raise ValueError(f"Node {node_id} not found")
316
+
317
+ # Remove node ID <> public key mappings
318
+ if pk := self.node_id_to_public_key.pop(node_id, None):
319
+ del self.public_key_to_node_id[pk]
320
+
321
+ del self.node_ids[node_id]
322
+
323
+ def get_nodes(self, run_id: int) -> set[int]:
324
+ """Return all available nodes.
325
+
326
+ Constraints
327
+ -----------
328
+ If the provided `run_id` does not exist or has no matching nodes,
329
+ an empty `Set` MUST be returned.
330
+ """
331
+ with self.lock:
332
+ if run_id not in self.run_ids:
333
+ return set()
334
+ current_time = time.time()
335
+ return {
336
+ node_id
337
+ for node_id, (online_until, _) in self.node_ids.items()
338
+ if online_until > current_time
339
+ }
340
+
341
+ def set_node_public_key(self, node_id: int, public_key: bytes) -> None:
342
+ """Set `public_key` for the specified `node_id`."""
343
+ with self.lock:
344
+ if node_id not in self.node_ids:
345
+ raise ValueError(f"Node {node_id} not found")
346
+
347
+ if public_key in self.public_key_to_node_id:
348
+ raise ValueError("Public key already in use")
349
+
350
+ self.public_key_to_node_id[public_key] = node_id
351
+ self.node_id_to_public_key[node_id] = public_key
352
+
353
+ def get_node_public_key(self, node_id: int) -> Optional[bytes]:
354
+ """Get `public_key` for the specified `node_id`."""
355
+ with self.lock:
356
+ if node_id not in self.node_ids:
357
+ raise ValueError(f"Node {node_id} not found")
358
+
359
+ return self.node_id_to_public_key.get(node_id)
360
+
361
+ def get_node_id(self, node_public_key: bytes) -> Optional[int]:
362
+ """Retrieve stored `node_id` filtered by `node_public_keys`."""
363
+ return self.public_key_to_node_id.get(node_public_key)
364
+
365
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
366
+ def create_run(
367
+ self,
368
+ fab_id: Optional[str],
369
+ fab_version: Optional[str],
370
+ fab_hash: Optional[str],
371
+ override_config: UserConfig,
372
+ federation_options: ConfigsRecord,
373
+ ) -> int:
374
+ """Create a new run for the specified `fab_hash`."""
375
+ # Sample a random int64 as run_id
376
+ with self.lock:
377
+ run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
378
+
379
+ if run_id not in self.run_ids:
380
+ run_record = RunRecord(
381
+ run=Run(
382
+ run_id=run_id,
383
+ fab_id=fab_id if fab_id else "",
384
+ fab_version=fab_version if fab_version else "",
385
+ fab_hash=fab_hash if fab_hash else "",
386
+ override_config=override_config,
387
+ pending_at=now().isoformat(),
388
+ starting_at="",
389
+ running_at="",
390
+ finished_at="",
391
+ status=RunStatus(
392
+ status=Status.PENDING,
393
+ sub_status="",
394
+ details="",
395
+ ),
396
+ ),
397
+ )
398
+ self.run_ids[run_id] = run_record
399
+
400
+ # Record federation options. Leave empty if not passed
401
+ self.federation_options[run_id] = federation_options
402
+ return run_id
403
+ log(ERROR, "Unexpected run creation failure.")
404
+ return 0
405
+
406
+ def store_server_private_public_key(
407
+ self, private_key: bytes, public_key: bytes
408
+ ) -> None:
409
+ """Store `server_private_key` and `server_public_key` in the link state."""
410
+ with self.lock:
411
+ if self.server_private_key is None and self.server_public_key is None:
412
+ self.server_private_key = private_key
413
+ self.server_public_key = public_key
414
+ else:
415
+ raise RuntimeError("Server private and public key already set")
416
+
417
+ def get_server_private_key(self) -> Optional[bytes]:
418
+ """Retrieve `server_private_key` in urlsafe bytes."""
419
+ return self.server_private_key
420
+
421
+ def get_server_public_key(self) -> Optional[bytes]:
422
+ """Retrieve `server_public_key` in urlsafe bytes."""
423
+ return self.server_public_key
424
+
425
+ def clear_supernode_auth_keys_and_credentials(self) -> None:
426
+ """Clear stored `node_public_keys` and credentials in the link state if any."""
427
+ with self.lock:
428
+ self.server_private_key = None
429
+ self.server_public_key = None
430
+ self.node_public_keys.clear()
431
+
432
+ def store_node_public_keys(self, public_keys: set[bytes]) -> None:
433
+ """Store a set of `node_public_keys` in the link state."""
434
+ with self.lock:
435
+ self.node_public_keys.update(public_keys)
436
+
437
+ def store_node_public_key(self, public_key: bytes) -> None:
438
+ """Store a `node_public_key` in the link state."""
439
+ with self.lock:
440
+ self.node_public_keys.add(public_key)
441
+
442
+ def get_node_public_keys(self) -> set[bytes]:
443
+ """Retrieve all currently stored `node_public_keys` as a set."""
444
+ with self.lock:
445
+ return self.node_public_keys.copy()
446
+
447
+ def get_run_ids(self) -> set[int]:
448
+ """Retrieve all run IDs."""
449
+ with self.lock:
450
+ return set(self.run_ids.keys())
451
+
452
+ def get_run(self, run_id: int) -> Optional[Run]:
453
+ """Retrieve information about the run with the specified `run_id`."""
454
+ with self.lock:
455
+ if run_id not in self.run_ids:
456
+ log(ERROR, "`run_id` is invalid")
457
+ return None
458
+ return self.run_ids[run_id].run
459
+
460
+ def get_run_status(self, run_ids: set[int]) -> dict[int, RunStatus]:
461
+ """Retrieve the statuses for the specified runs."""
462
+ with self.lock:
463
+ return {
464
+ run_id: self.run_ids[run_id].run.status
465
+ for run_id in set(run_ids)
466
+ if run_id in self.run_ids
467
+ }
468
+
469
+ def update_run_status(self, run_id: int, new_status: RunStatus) -> bool:
470
+ """Update the status of the run with the specified `run_id`."""
471
+ with self.lock:
472
+ # Check if the run_id exists
473
+ if run_id not in self.run_ids:
474
+ log(ERROR, "`run_id` is invalid")
475
+ return False
476
+
477
+ # Check if the status transition is valid
478
+ current_status = self.run_ids[run_id].run.status
479
+ if not is_valid_transition(current_status, new_status):
480
+ log(
481
+ ERROR,
482
+ 'Invalid status transition: from "%s" to "%s"',
483
+ current_status.status,
484
+ new_status.status,
485
+ )
486
+ return False
487
+
488
+ # Check if the sub-status is valid
489
+ if not has_valid_sub_status(current_status):
490
+ log(
491
+ ERROR,
492
+ 'Invalid sub-status "%s" for status "%s"',
493
+ current_status.sub_status,
494
+ current_status.status,
495
+ )
496
+ return False
497
+
498
+ # Update the status
499
+ run_record = self.run_ids[run_id]
500
+ if new_status.status == Status.STARTING:
501
+ run_record.run.starting_at = now().isoformat()
502
+ elif new_status.status == Status.RUNNING:
503
+ run_record.run.running_at = now().isoformat()
504
+ elif new_status.status == Status.FINISHED:
505
+ run_record.run.finished_at = now().isoformat()
506
+ run_record.run.status = new_status
507
+ return True
508
+
509
+ def get_pending_run_id(self) -> Optional[int]:
510
+ """Get the `run_id` of a run with `Status.PENDING` status, if any."""
511
+ pending_run_id = None
512
+
513
+ # Loop through all registered runs
514
+ for run_id, run_rec in self.run_ids.items():
515
+ # Break once a pending run is found
516
+ if run_rec.run.status.status == Status.PENDING:
517
+ pending_run_id = run_id
518
+ break
519
+
520
+ return pending_run_id
521
+
522
+ def get_federation_options(self, run_id: int) -> Optional[ConfigsRecord]:
523
+ """Retrieve the federation options for the specified `run_id`."""
524
+ with self.lock:
525
+ if run_id not in self.run_ids:
526
+ log(ERROR, "`run_id` is invalid")
527
+ return None
528
+ return self.federation_options[run_id]
529
+
530
+ def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
531
+ """Acknowledge a ping received from a node, serving as a heartbeat."""
532
+ with self.lock:
533
+ if node_id in self.node_ids:
534
+ self.node_ids[node_id] = (time.time() + ping_interval, ping_interval)
535
+ return True
536
+ return False
537
+
538
+ def get_serverapp_context(self, run_id: int) -> Optional[Context]:
539
+ """Get the context for the specified `run_id`."""
540
+ return self.contexts.get(run_id)
541
+
542
+ def set_serverapp_context(self, run_id: int, context: Context) -> None:
543
+ """Set the context for the specified `run_id`."""
544
+ if run_id not in self.run_ids:
545
+ raise ValueError(f"Run {run_id} not found")
546
+ self.contexts[run_id] = context
547
+
548
+ def add_serverapp_log(self, run_id: int, log_message: str) -> None:
549
+ """Add a log entry to the serverapp logs for the specified `run_id`."""
550
+ if run_id not in self.run_ids:
551
+ raise ValueError(f"Run {run_id} not found")
552
+ run = self.run_ids[run_id]
553
+ with run.log_lock:
554
+ run.logs.append((now().timestamp(), log_message))
555
+
556
+ def get_serverapp_log(
557
+ self, run_id: int, after_timestamp: Optional[float]
558
+ ) -> tuple[str, float]:
559
+ """Get the serverapp logs for the specified `run_id`."""
560
+ if run_id not in self.run_ids:
561
+ raise ValueError(f"Run {run_id} not found")
562
+ run = self.run_ids[run_id]
563
+ if after_timestamp is None:
564
+ after_timestamp = 0.0
565
+ with run.log_lock:
566
+ # Find the index where the timestamp would be inserted
567
+ index = bisect_right(run.logs, (after_timestamp, ""))
568
+ latest_timestamp = run.logs[-1][0] if index < len(run.logs) else 0.0
569
+ return "".join(log for _, log in run.logs[index:]), latest_timestamp