flwr-nightly 1.13.0.dev20241106__py3-none-any.whl → 1.13.0.dev20241117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (58) hide show
  1. flwr/cli/app.py +2 -0
  2. flwr/cli/build.py +37 -0
  3. flwr/cli/install.py +5 -3
  4. flwr/cli/ls.py +228 -0
  5. flwr/cli/run/run.py +16 -5
  6. flwr/client/app.py +68 -19
  7. flwr/client/clientapp/app.py +51 -35
  8. flwr/client/grpc_rere_client/connection.py +2 -12
  9. flwr/client/nodestate/__init__.py +25 -0
  10. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  11. flwr/client/nodestate/nodestate.py +30 -0
  12. flwr/client/nodestate/nodestate_factory.py +37 -0
  13. flwr/client/rest_client/connection.py +4 -14
  14. flwr/client/supernode/app.py +57 -53
  15. flwr/common/args.py +148 -0
  16. flwr/common/config.py +10 -0
  17. flwr/common/constant.py +21 -7
  18. flwr/common/date.py +18 -0
  19. flwr/common/logger.py +6 -2
  20. flwr/common/object_ref.py +47 -16
  21. flwr/common/serde.py +10 -0
  22. flwr/common/typing.py +32 -11
  23. flwr/proto/exec_pb2.py +23 -17
  24. flwr/proto/exec_pb2.pyi +50 -20
  25. flwr/proto/exec_pb2_grpc.py +34 -0
  26. flwr/proto/exec_pb2_grpc.pyi +13 -0
  27. flwr/proto/run_pb2.py +32 -27
  28. flwr/proto/run_pb2.pyi +44 -1
  29. flwr/proto/simulationio_pb2.py +2 -2
  30. flwr/proto/simulationio_pb2_grpc.py +34 -0
  31. flwr/proto/simulationio_pb2_grpc.pyi +13 -0
  32. flwr/server/app.py +83 -87
  33. flwr/server/driver/driver.py +1 -1
  34. flwr/server/driver/grpc_driver.py +6 -20
  35. flwr/server/driver/inmemory_driver.py +1 -3
  36. flwr/server/run_serverapp.py +8 -238
  37. flwr/server/serverapp/app.py +44 -89
  38. flwr/server/strategy/aggregate.py +4 -4
  39. flwr/server/superlink/fleet/rest_rere/rest_api.py +10 -9
  40. flwr/server/superlink/linkstate/in_memory_linkstate.py +76 -62
  41. flwr/server/superlink/linkstate/linkstate.py +24 -9
  42. flwr/server/superlink/linkstate/sqlite_linkstate.py +87 -128
  43. flwr/server/superlink/linkstate/utils.py +191 -32
  44. flwr/server/superlink/simulation/simulationio_servicer.py +22 -1
  45. flwr/simulation/__init__.py +3 -1
  46. flwr/simulation/app.py +245 -352
  47. flwr/simulation/legacy_app.py +402 -0
  48. flwr/simulation/run_simulation.py +8 -19
  49. flwr/simulation/simulationio_connection.py +2 -2
  50. flwr/superexec/deployment.py +13 -7
  51. flwr/superexec/exec_servicer.py +32 -3
  52. flwr/superexec/executor.py +4 -3
  53. flwr/superexec/simulation.py +52 -145
  54. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/METADATA +10 -7
  55. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/RECORD +58 -51
  56. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/entry_points.txt +1 -0
  57. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/LICENSE +0 -0
  58. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/WHEEL +0 -0
@@ -15,21 +15,23 @@
15
15
  """Utility functions for State."""
16
16
 
17
17
 
18
- import time
19
18
  from logging import ERROR
20
19
  from os import urandom
21
- from uuid import uuid4
20
+ from typing import Optional, Union
21
+ from uuid import UUID, uuid4
22
22
 
23
- from flwr.common import ConfigsRecord, Context, log, serde
23
+ from flwr.common import ConfigsRecord, Context, log, now, serde
24
24
  from flwr.common.constant import ErrorCode, Status, SubStatus
25
25
  from flwr.common.typing import RunStatus
26
- from flwr.proto.error_pb2 import Error # pylint: disable=E0611
27
- from flwr.proto.message_pb2 import Context as ProtoContext # pylint: disable=E0611
28
- from flwr.proto.node_pb2 import Node # pylint: disable=E0611
29
26
 
30
27
  # pylint: disable=E0611
28
+ from flwr.proto.error_pb2 import Error
29
+ from flwr.proto.message_pb2 import Context as ProtoContext
30
+ from flwr.proto.node_pb2 import Node
31
31
  from flwr.proto.recordset_pb2 import ConfigsRecord as ProtoConfigsRecord
32
- from flwr.proto.task_pb2 import Task, TaskIns, TaskRes # pylint: disable=E0611
32
+ from flwr.proto.task_pb2 import Task, TaskIns, TaskRes
33
+
34
+ # pylint: enable=E0611
33
35
 
34
36
  NODE_UNAVAILABLE_ERROR_REASON = (
35
37
  "Error: Node Unavailable - The destination node is currently unavailable. "
@@ -40,12 +42,22 @@ VALID_RUN_STATUS_TRANSITIONS = {
40
42
  (Status.PENDING, Status.STARTING),
41
43
  (Status.STARTING, Status.RUNNING),
42
44
  (Status.RUNNING, Status.FINISHED),
45
+ # Any non-FINISHED status can transition to FINISHED
46
+ (Status.PENDING, Status.FINISHED),
47
+ (Status.STARTING, Status.FINISHED),
43
48
  }
44
49
  VALID_RUN_SUB_STATUSES = {
45
50
  SubStatus.COMPLETED,
46
51
  SubStatus.FAILED,
47
52
  SubStatus.STOPPED,
48
53
  }
54
+ MESSAGE_UNAVAILABLE_ERROR_REASON = (
55
+ "Error: Message Unavailable - The requested message could not be found in the "
56
+ "database. It may have expired due to its TTL or never existed."
57
+ )
58
+ REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON = (
59
+ "Error: Reply Message Unavailable - The reply message has expired."
60
+ )
49
61
 
50
62
 
51
63
  def generate_rand_int_from_bytes(num_bytes: int) -> int:
@@ -161,31 +173,6 @@ def configsrecord_from_bytes(configsrecord_bytes: bytes) -> ConfigsRecord:
161
173
  )
162
174
 
163
175
 
164
- def make_node_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
165
- """Generate a TaskRes with a node unavailable error from a TaskIns."""
166
- current_time = time.time()
167
- ttl = ref_taskins.task.ttl - (current_time - ref_taskins.task.created_at)
168
- if ttl < 0:
169
- log(ERROR, "Creating TaskRes for TaskIns that exceeds its TTL.")
170
- ttl = 0
171
- return TaskRes(
172
- task_id=str(uuid4()),
173
- group_id=ref_taskins.group_id,
174
- run_id=ref_taskins.run_id,
175
- task=Task(
176
- producer=Node(node_id=ref_taskins.task.consumer.node_id, anonymous=False),
177
- consumer=Node(node_id=ref_taskins.task.producer.node_id, anonymous=False),
178
- created_at=current_time,
179
- ttl=ttl,
180
- ancestry=[ref_taskins.task_id],
181
- task_type=ref_taskins.task.task_type,
182
- error=Error(
183
- code=ErrorCode.NODE_UNAVAILABLE, reason=NODE_UNAVAILABLE_ERROR_REASON
184
- ),
185
- ),
186
- )
187
-
188
-
189
176
  def is_valid_transition(current_status: RunStatus, new_status: RunStatus) -> bool:
190
177
  """Check if a transition between two run statuses is valid.
191
178
 
@@ -201,6 +188,14 @@ def is_valid_transition(current_status: RunStatus, new_status: RunStatus) -> boo
201
188
  bool
202
189
  True if the transition is valid, False otherwise.
203
190
  """
191
+ # Transition to FINISHED from a non-RUNNING status is only allowed
192
+ # if the sub-status is not COMPLETED
193
+ if (
194
+ current_status.status in [Status.PENDING, Status.STARTING]
195
+ and new_status.status == Status.FINISHED
196
+ ):
197
+ return new_status.sub_status != SubStatus.COMPLETED
198
+
204
199
  return (
205
200
  current_status.status,
206
201
  new_status.status,
@@ -228,3 +223,167 @@ def has_valid_sub_status(status: RunStatus) -> bool:
228
223
  if status.status == Status.FINISHED:
229
224
  return status.sub_status in VALID_RUN_SUB_STATUSES
230
225
  return status.sub_status == ""
226
+
227
+
228
+ def create_taskres_for_unavailable_taskins(taskins_id: Union[str, UUID]) -> TaskRes:
229
+ """Generate a TaskRes with a TaskIns unavailable error.
230
+
231
+ Parameters
232
+ ----------
233
+ taskins_id : Union[str, UUID]
234
+ The ID of the unavailable TaskIns.
235
+
236
+ Returns
237
+ -------
238
+ TaskRes
239
+ A TaskRes with an error code MESSAGE_UNAVAILABLE to indicate that the
240
+ inquired TaskIns ID cannot be found (due to non-existence or expiration).
241
+ """
242
+ current_time = now().timestamp()
243
+ return TaskRes(
244
+ task_id=str(uuid4()),
245
+ group_id="", # Unknown group ID
246
+ run_id=0, # Unknown run ID
247
+ task=Task(
248
+ # This function is only called by SuperLink, and thus it's the producer.
249
+ producer=Node(node_id=0, anonymous=False),
250
+ consumer=Node(node_id=0, anonymous=False),
251
+ created_at=current_time,
252
+ ttl=0,
253
+ ancestry=[str(taskins_id)],
254
+ task_type="", # Unknown message type
255
+ error=Error(
256
+ code=ErrorCode.MESSAGE_UNAVAILABLE,
257
+ reason=MESSAGE_UNAVAILABLE_ERROR_REASON,
258
+ ),
259
+ ),
260
+ )
261
+
262
+
263
+ def create_taskres_for_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
264
+ """Generate a TaskRes with a reply message unavailable error from a TaskIns.
265
+
266
+ Parameters
267
+ ----------
268
+ ref_taskins : TaskIns
269
+ The reference TaskIns object.
270
+
271
+ Returns
272
+ -------
273
+ TaskRes
274
+ The generated TaskRes with an error code REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
275
+ indicating that the original TaskRes has expired.
276
+ """
277
+ current_time = now().timestamp()
278
+ ttl = ref_taskins.task.ttl - (current_time - ref_taskins.task.created_at)
279
+ if ttl < 0:
280
+ log(ERROR, "Creating TaskRes for TaskIns that exceeds its TTL.")
281
+ ttl = 0
282
+ return TaskRes(
283
+ task_id=str(uuid4()),
284
+ group_id=ref_taskins.group_id,
285
+ run_id=ref_taskins.run_id,
286
+ task=Task(
287
+ # This function is only called by SuperLink, and thus it's the producer.
288
+ producer=Node(node_id=0, anonymous=False),
289
+ consumer=Node(node_id=0, anonymous=False),
290
+ created_at=current_time,
291
+ ttl=ttl,
292
+ ancestry=[ref_taskins.task_id],
293
+ task_type=ref_taskins.task.task_type,
294
+ error=Error(
295
+ code=ErrorCode.REPLY_MESSAGE_UNAVAILABLE,
296
+ reason=REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
297
+ ),
298
+ ),
299
+ )
300
+
301
+
302
+ def has_expired(task_ins_or_res: Union[TaskIns, TaskRes], current_time: float) -> bool:
303
+ """Check if the TaskIns/TaskRes has expired."""
304
+ return task_ins_or_res.task.ttl + task_ins_or_res.task.created_at < current_time
305
+
306
+
307
+ def verify_taskins_ids(
308
+ inquired_taskins_ids: set[UUID],
309
+ found_taskins_dict: dict[UUID, TaskIns],
310
+ current_time: Optional[float] = None,
311
+ update_set: bool = True,
312
+ ) -> dict[UUID, TaskRes]:
313
+ """Verify found TaskIns and generate error TaskRes for invalid ones.
314
+
315
+ Parameters
316
+ ----------
317
+ inquired_taskins_ids : set[UUID]
318
+ Set of TaskIns IDs for which to generate error TaskRes if invalid.
319
+ found_taskins_dict : dict[UUID, TaskIns]
320
+ Dictionary containing all found TaskIns indexed by their IDs.
321
+ current_time : Optional[float] (default: None)
322
+ The current time to check for expiration. If set to `None`, the current time
323
+ will automatically be set to the current timestamp using `now().timestamp()`.
324
+ update_set : bool (default: True)
325
+ If True, the `inquired_taskins_ids` will be updated to remove invalid ones,
326
+ by default True.
327
+
328
+ Returns
329
+ -------
330
+ dict[UUID, TaskRes]
331
+ A dictionary of error TaskRes indexed by the corresponding TaskIns ID.
332
+ """
333
+ ret_dict = {}
334
+ current = current_time if current_time else now().timestamp()
335
+ for taskins_id in list(inquired_taskins_ids):
336
+ # Generate error TaskRes if the task_ins doesn't exist or has expired
337
+ taskins = found_taskins_dict.get(taskins_id)
338
+ if taskins is None or has_expired(taskins, current):
339
+ if update_set:
340
+ inquired_taskins_ids.remove(taskins_id)
341
+ taskres = create_taskres_for_unavailable_taskins(taskins_id)
342
+ ret_dict[taskins_id] = taskres
343
+ return ret_dict
344
+
345
+
346
+ def verify_found_taskres(
347
+ inquired_taskins_ids: set[UUID],
348
+ found_taskins_dict: dict[UUID, TaskIns],
349
+ found_taskres_list: list[TaskRes],
350
+ current_time: Optional[float] = None,
351
+ update_set: bool = True,
352
+ ) -> dict[UUID, TaskRes]:
353
+ """Verify found TaskRes and generate error TaskRes for invalid ones.
354
+
355
+ Parameters
356
+ ----------
357
+ inquired_taskins_ids : set[UUID]
358
+ Set of TaskIns IDs for which to generate error TaskRes if invalid.
359
+ found_taskins_dict : dict[UUID, TaskIns]
360
+ Dictionary containing all found TaskIns indexed by their IDs.
361
+ found_taskres_list : dict[TaskIns, TaskRes]
362
+ List of found TaskRes to be verified.
363
+ current_time : Optional[float] (default: None)
364
+ The current time to check for expiration. If set to `None`, the current time
365
+ will automatically be set to the current timestamp using `now().timestamp()`.
366
+ update_set : bool (default: True)
367
+ If True, the `inquired_taskins_ids` will be updated to remove ones
368
+ that have a TaskRes, by default True.
369
+
370
+ Returns
371
+ -------
372
+ dict[UUID, TaskRes]
373
+ A dictionary of TaskRes indexed by the corresponding TaskIns ID.
374
+ """
375
+ ret_dict: dict[UUID, TaskRes] = {}
376
+ current = current_time if current_time else now().timestamp()
377
+ for taskres in found_taskres_list:
378
+ taskins_id = UUID(taskres.task.ancestry[0])
379
+ if update_set:
380
+ inquired_taskins_ids.remove(taskins_id)
381
+ # Check if the TaskRes has expired
382
+ if has_expired(taskres, current):
383
+ # No need to insert the error TaskRes
384
+ taskres = create_taskres_for_unavailable_taskres(
385
+ found_taskins_dict[taskins_id]
386
+ )
387
+ taskres.task.delivered_at = now().isoformat()
388
+ ret_dict[taskins_id] = taskres
389
+ return ret_dict
@@ -23,6 +23,7 @@ from grpc import ServicerContext
23
23
  from flwr.common.constant import Status
24
24
  from flwr.common.logger import log
25
25
  from flwr.common.serde import (
26
+ configs_record_to_proto,
26
27
  context_from_proto,
27
28
  context_to_proto,
28
29
  fab_to_proto,
@@ -36,6 +37,8 @@ from flwr.proto.log_pb2 import ( # pylint: disable=E0611
36
37
  PushLogsResponse,
37
38
  )
38
39
  from flwr.proto.run_pb2 import ( # pylint: disable=E0611
40
+ GetFederationOptionsRequest,
41
+ GetFederationOptionsResponse,
39
42
  UpdateRunStatusRequest,
40
43
  UpdateRunStatusResponse,
41
44
  )
@@ -123,10 +126,28 @@ class SimulationIoServicer(simulationio_pb2_grpc.SimulationIoServicer):
123
126
  self, request: PushLogsRequest, context: grpc.ServicerContext
124
127
  ) -> PushLogsResponse:
125
128
  """Push logs."""
126
- log(DEBUG, "ServerAppIoServicer.PushLogs")
129
+ log(DEBUG, "SimultionIoServicer.PushLogs")
127
130
  state = self.state_factory.state()
128
131
 
129
132
  # Add logs to LinkState
130
133
  merged_logs = "".join(request.logs)
131
134
  state.add_serverapp_log(request.run_id, merged_logs)
132
135
  return PushLogsResponse()
136
+
137
+ def GetFederationOptions(
138
+ self, request: GetFederationOptionsRequest, context: ServicerContext
139
+ ) -> GetFederationOptionsResponse:
140
+ """Get Federation Options associated with a run."""
141
+ log(DEBUG, "SimultionIoServicer.GetFederationOptions")
142
+ state = self.state_factory.state()
143
+
144
+ federation_options = state.get_federation_options(request.run_id)
145
+ if federation_options is None:
146
+ context.abort(
147
+ grpc.StatusCode.FAILED_PRECONDITION,
148
+ "Expected federation options to be set, but none available.",
149
+ )
150
+ return GetFederationOptionsResponse()
151
+ return GetFederationOptionsResponse(
152
+ federation_options=configs_record_to_proto(federation_options)
153
+ )
@@ -17,13 +17,14 @@
17
17
 
18
18
  import importlib
19
19
 
20
+ from flwr.simulation.app import run_simulation_process
20
21
  from flwr.simulation.run_simulation import run_simulation
21
22
  from flwr.simulation.simulationio_connection import SimulationIoConnection
22
23
 
23
24
  is_ray_installed = importlib.util.find_spec("ray") is not None
24
25
 
25
26
  if is_ray_installed:
26
- from flwr.simulation.app import start_simulation
27
+ from flwr.simulation.legacy_app import start_simulation
27
28
  else:
28
29
  RAY_IMPORT_ERROR: str = """Unable to import module `ray`.
29
30
 
@@ -40,5 +41,6 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
40
41
  __all__ = [
41
42
  "SimulationIoConnection",
42
43
  "run_simulation",
44
+ "run_simulation_process",
43
45
  "start_simulation",
44
46
  ]