PyPI - flwr-nightly - Versions diffs - 1.8.0.dev20240314__py3-none-any.whl → 1.11.0.dev20240813__py3-none-any.whl - Mend

flwr-nightly 1.8.0.dev20240314py3-none-any.whl → 1.11.0.dev20240813py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of flwr-nightly might be problematic. Click here for more details.

Files changed (237) hide show

flwr/cli/app.py +7 -0
flwr/cli/build.py +150 -0
flwr/cli/config_utils.py +219 -0
flwr/cli/example.py +3 -1
flwr/cli/install.py +227 -0
flwr/cli/new/new.py +179 -48
flwr/cli/new/templates/app/.gitignore.tpl +160 -0
flwr/cli/new/templates/app/README.flowertune.md.tpl +56 -0
flwr/cli/new/templates/app/README.md.tpl +1 -5
flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
flwr/cli/new/templates/app/code/client.huggingface.py.tpl +65 -0
flwr/cli/new/templates/app/code/client.jax.py.tpl +56 -0
flwr/cli/new/templates/app/code/client.mlx.py.tpl +93 -0
flwr/cli/new/templates/app/code/client.numpy.py.tpl +3 -2
flwr/cli/new/templates/app/code/client.pytorch.py.tpl +23 -11
flwr/cli/new/templates/app/code/client.sklearn.py.tpl +97 -0
flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +60 -1
flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +89 -0
flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +126 -0
flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +34 -0
flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +57 -0
flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +59 -0
flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +48 -0
flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +11 -0
flwr/cli/new/templates/app/code/server.huggingface.py.tpl +23 -0
flwr/cli/new/templates/app/code/server.jax.py.tpl +20 -0
flwr/cli/new/templates/app/code/server.mlx.py.tpl +20 -0
flwr/cli/new/templates/app/code/server.numpy.py.tpl +17 -9
flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
flwr/cli/new/templates/app/code/server.sklearn.py.tpl +24 -0
flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
flwr/cli/new/templates/app/code/task.huggingface.py.tpl +99 -0
flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
flwr/cli/new/templates/app/code/task.pytorch.py.tpl +28 -23
flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +39 -0
flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +38 -0
flwr/cli/new/templates/app/pyproject.jax.toml.tpl +34 -0
flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +33 -0
flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
flwr/cli/run/run.py +168 -17
flwr/cli/utils.py +75 -4
flwr/client/__init__.py +6 -1
flwr/client/app.py +239 -248
flwr/client/client_app.py +70 -9
flwr/client/dpfedavg_numpy_client.py +1 -1
flwr/client/grpc_adapter_client/__init__.py +15 -0
flwr/client/grpc_adapter_client/connection.py +97 -0
flwr/client/grpc_client/connection.py +18 -5
flwr/client/grpc_rere_client/__init__.py +1 -1
flwr/client/grpc_rere_client/client_interceptor.py +158 -0
flwr/client/grpc_rere_client/connection.py +127 -33
flwr/client/grpc_rere_client/grpc_adapter.py +140 -0
flwr/client/heartbeat.py +74 -0
flwr/client/message_handler/__init__.py +1 -1
flwr/client/message_handler/message_handler.py +7 -7
flwr/client/mod/__init__.py +5 -5
flwr/client/mod/centraldp_mods.py +4 -2
flwr/client/mod/comms_mods.py +4 -4
flwr/client/mod/localdp_mod.py +9 -4
flwr/client/mod/secure_aggregation/__init__.py +1 -1
flwr/client/mod/secure_aggregation/secaggplus_mod.py +1 -1
flwr/client/mod/utils.py +1 -1
flwr/client/node_state.py +60 -10
flwr/client/node_state_tests.py +4 -3
flwr/client/rest_client/__init__.py +1 -1
flwr/client/rest_client/connection.py +177 -157
flwr/client/supernode/__init__.py +26 -0
flwr/client/supernode/app.py +464 -0
flwr/client/typing.py +1 -0
flwr/common/__init__.py +13 -11
flwr/common/address.py +1 -1
flwr/common/config.py +193 -0
flwr/common/constant.py +42 -1
flwr/common/context.py +26 -1
flwr/common/date.py +1 -1
flwr/common/dp.py +1 -1
flwr/common/grpc.py +6 -2
flwr/common/logger.py +79 -8
flwr/common/message.py +167 -105
flwr/common/object_ref.py +126 -25
flwr/common/record/__init__.py +1 -1
flwr/common/record/parametersrecord.py +0 -1
flwr/common/record/recordset.py +78 -27
flwr/common/recordset_compat.py +8 -1
flwr/common/retry_invoker.py +25 -13
flwr/common/secure_aggregation/__init__.py +1 -1
flwr/common/secure_aggregation/crypto/__init__.py +1 -1
flwr/common/secure_aggregation/crypto/shamir.py +1 -1
flwr/common/secure_aggregation/crypto/symmetric_encryption.py +21 -2
flwr/common/secure_aggregation/ndarrays_arithmetic.py +1 -1
flwr/common/secure_aggregation/quantization.py +1 -1
flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
flwr/common/secure_aggregation/secaggplus_utils.py +1 -1
flwr/common/serde.py +209 -3
flwr/common/telemetry.py +25 -0
flwr/common/typing.py +38 -0
flwr/common/version.py +14 -0
flwr/proto/clientappio_pb2.py +41 -0
flwr/proto/clientappio_pb2.pyi +110 -0
flwr/proto/clientappio_pb2_grpc.py +101 -0
flwr/proto/clientappio_pb2_grpc.pyi +40 -0
flwr/proto/common_pb2.py +36 -0
flwr/proto/common_pb2.pyi +121 -0
flwr/proto/common_pb2_grpc.py +4 -0
flwr/proto/common_pb2_grpc.pyi +4 -0
flwr/proto/driver_pb2.py +26 -19
flwr/proto/driver_pb2.pyi +34 -0
flwr/proto/driver_pb2_grpc.py +70 -0
flwr/proto/driver_pb2_grpc.pyi +28 -0
flwr/proto/exec_pb2.py +43 -0
flwr/proto/exec_pb2.pyi +95 -0
flwr/proto/exec_pb2_grpc.py +101 -0
flwr/proto/exec_pb2_grpc.pyi +41 -0
flwr/proto/fab_pb2.py +30 -0
flwr/proto/fab_pb2.pyi +56 -0
flwr/proto/fab_pb2_grpc.py +4 -0
flwr/proto/fab_pb2_grpc.pyi +4 -0
flwr/proto/fleet_pb2.py +29 -23
flwr/proto/fleet_pb2.pyi +33 -0
flwr/proto/fleet_pb2_grpc.py +102 -0
flwr/proto/fleet_pb2_grpc.pyi +35 -0
flwr/proto/grpcadapter_pb2.py +32 -0
flwr/proto/grpcadapter_pb2.pyi +43 -0
flwr/proto/grpcadapter_pb2_grpc.py +66 -0
flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
flwr/proto/message_pb2.py +41 -0
flwr/proto/message_pb2.pyi +122 -0
flwr/proto/message_pb2_grpc.py +4 -0
flwr/proto/message_pb2_grpc.pyi +4 -0
flwr/proto/run_pb2.py +35 -0
flwr/proto/run_pb2.pyi +76 -0
flwr/proto/run_pb2_grpc.py +4 -0
flwr/proto/run_pb2_grpc.pyi +4 -0
flwr/proto/task_pb2.py +7 -8
flwr/proto/task_pb2.pyi +8 -5
flwr/server/__init__.py +4 -8
flwr/server/app.py +298 -350
flwr/server/compat/app.py +6 -57
flwr/server/compat/app_utils.py +5 -4
flwr/server/compat/driver_client_proxy.py +29 -48
flwr/server/compat/legacy_context.py +5 -4
flwr/server/driver/__init__.py +2 -0
flwr/server/driver/driver.py +22 -132
flwr/server/driver/grpc_driver.py +224 -74
flwr/server/driver/inmemory_driver.py +183 -0
flwr/server/history.py +20 -20
flwr/server/run_serverapp.py +121 -34
flwr/server/server.py +11 -7
flwr/server/server_app.py +59 -10
flwr/server/serverapp_components.py +52 -0
flwr/server/strategy/__init__.py +2 -2
flwr/server/strategy/bulyan.py +1 -1
flwr/server/strategy/dp_adaptive_clipping.py +3 -3
flwr/server/strategy/dp_fixed_clipping.py +4 -3
flwr/server/strategy/dpfedavg_adaptive.py +1 -1
flwr/server/strategy/dpfedavg_fixed.py +1 -1
flwr/server/strategy/fedadagrad.py +1 -1
flwr/server/strategy/fedadam.py +1 -1
flwr/server/strategy/fedavg_android.py +1 -1
flwr/server/strategy/fedavgm.py +1 -1
flwr/server/strategy/fedmedian.py +1 -1
flwr/server/strategy/fedopt.py +1 -1
flwr/server/strategy/fedprox.py +1 -1
flwr/server/strategy/fedxgb_bagging.py +1 -1
flwr/server/strategy/fedxgb_cyclic.py +1 -1
flwr/server/strategy/fedxgb_nn_avg.py +1 -1
flwr/server/strategy/fedyogi.py +1 -1
flwr/server/strategy/krum.py +1 -1
flwr/server/strategy/qfedavg.py +1 -1
flwr/server/superlink/driver/__init__.py +1 -1
flwr/server/superlink/driver/driver_grpc.py +1 -1
flwr/server/superlink/driver/driver_servicer.py +51 -4
flwr/server/superlink/ffs/__init__.py +24 -0
flwr/server/superlink/ffs/disk_ffs.py +104 -0
flwr/server/superlink/ffs/ffs.py +79 -0
flwr/server/superlink/fleet/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +131 -0
flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +8 -2
flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +30 -2
flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +214 -0
flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
flwr/server/superlink/fleet/message_handler/message_handler.py +42 -2
flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
flwr/server/superlink/fleet/rest_rere/rest_api.py +59 -1
flwr/server/superlink/fleet/vce/backend/__init__.py +1 -1
flwr/server/superlink/fleet/vce/backend/backend.py +5 -5
flwr/server/superlink/fleet/vce/backend/raybackend.py +53 -56
flwr/server/superlink/fleet/vce/vce_api.py +190 -127
flwr/server/superlink/state/__init__.py +1 -1
flwr/server/superlink/state/in_memory_state.py +159 -42
flwr/server/superlink/state/sqlite_state.py +243 -39
flwr/server/superlink/state/state.py +81 -6
flwr/server/superlink/state/state_factory.py +11 -2
flwr/server/superlink/state/utils.py +62 -0
flwr/server/typing.py +2 -0
flwr/server/utils/__init__.py +1 -1
flwr/server/utils/tensorboard.py +1 -1
flwr/server/utils/validator.py +23 -9
flwr/server/workflow/default_workflows.py +67 -25
flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +18 -6
flwr/simulation/__init__.py +7 -4
flwr/simulation/app.py +67 -36
flwr/simulation/ray_transport/__init__.py +1 -1
flwr/simulation/ray_transport/ray_actor.py +20 -46
flwr/simulation/ray_transport/ray_client_proxy.py +36 -16
flwr/simulation/run_simulation.py +308 -92
flwr/superexec/__init__.py +21 -0
flwr/superexec/app.py +184 -0
flwr/superexec/deployment.py +185 -0
flwr/superexec/exec_grpc.py +55 -0
flwr/superexec/exec_servicer.py +70 -0
flwr/superexec/executor.py +75 -0
flwr/superexec/simulation.py +193 -0
{flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/METADATA +10 -6
flwr_nightly-1.11.0.dev20240813.dist-info/RECORD +288 -0
flwr_nightly-1.11.0.dev20240813.dist-info/entry_points.txt +10 -0
flwr/cli/flower_toml.py +0 -140
flwr/cli/new/templates/app/flower.toml.tpl +0 -13
flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
flwr_nightly-1.8.0.dev20240314.dist-info/RECORD +0 -211
flwr_nightly-1.8.0.dev20240314.dist-info/entry_points.txt +0 -9
{flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/LICENSE +0 -0
{flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/WHEEL +0 -0

flwr/server/utils/validator.py CHANGED Viewed

@@ -31,13 +31,21 @@ def validate_task_ins_or_res(tasks_ins_res: Union[TaskIns, TaskRes]) -> List[str
     if not tasks_ins_res.HasField("task"):
         validation_errors.append("`task` does not set field `task`")
-    # Created/delivered/TTL
-    if tasks_ins_res.task.created_at != "":
-        validation_errors.append("`created_at` must be an empty str")
+    # Created/delivered/TTL/Pushed
+    if (
+        tasks_ins_res.task.created_at < 1711497600.0
+    ):  # unix timestamp of 27 March 2024 00h:00m:00s UTC
+        validation_errors.append(
+            "`created_at` must be a float that records the unix timestamp "
+            "in seconds when the message was created."
+        )
     if tasks_ins_res.task.delivered_at != "":
         validation_errors.append("`delivered_at` must be an empty str")
-    if tasks_ins_res.task.ttl != "":
-        validation_errors.append("`ttl` must be an empty str")
+    if tasks_ins_res.task.ttl <= 0:
+        validation_errors.append("`ttl` must be higher than zero")
+    if tasks_ins_res.task.pushed_at < 1711497600.0:
+        # unix timestamp of 27 March 2024 00h:00m:00s UTC
+        validation_errors.append("`pushed_at` is not a recent timestamp")
     # TaskIns specific
     if isinstance(tasks_ins_res, TaskIns):
@@ -66,8 +74,11 @@ def validate_task_ins_or_res(tasks_ins_res: Union[TaskIns, TaskRes]) -> List[str
         # Content check
         if tasks_ins_res.task.task_type == "":
             validation_errors.append("`task_type` MUST be set")
-        if not tasks_ins_res.task.HasField("recordset"):
-            validation_errors.append("`recordset` MUST be set")
+        if not (
+            tasks_ins_res.task.HasField("recordset")
+            ^ tasks_ins_res.task.HasField("error")
+        ):
+            validation_errors.append("Either `recordset` or `error` MUST be set")
         # Ancestors
         if len(tasks_ins_res.task.ancestry) != 0:
@@ -106,8 +117,11 @@ def validate_task_ins_or_res(tasks_ins_res: Union[TaskIns, TaskRes]) -> List[str
         # Content check
         if tasks_ins_res.task.task_type == "":
             validation_errors.append("`task_type` MUST be set")
-        if not tasks_ins_res.task.HasField("recordset"):
-            validation_errors.append("`recordset` MUST be set")
+        if not (
+            tasks_ins_res.task.HasField("recordset")
+            ^ tasks_ins_res.task.HasField("error")
+        ):
+            validation_errors.append("Either `recordset` or `error` MUST be set")
         # Ancestors
         if len(tasks_ins_res.task.ancestry) == 0:

flwr/server/workflow/default_workflows.py CHANGED Viewed

@@ -17,13 +17,23 @@
 import io
 import timeit
-from logging import INFO
-from typing import Optional, cast
+from logging import INFO, WARN
+from typing import List, Optional, Tuple, Union, cast
 import flwr.common.recordset_compat as compat
-from flwr.common import ConfigsRecord, Context, GetParametersIns, log
+from flwr.common import (
+    Code,
+    ConfigsRecord,
+    Context,
+    EvaluateRes,
+    FitRes,
+    GetParametersIns,
+    ParametersRecord,
+    log,
+)
 from flwr.common.constant import MessageType, MessageTypeLegacy
+from ..client_proxy import ClientProxy
 from ..compat.app_utils import start_update_client_manager_thread
 from ..compat.legacy_context import LegacyContext
 from ..driver import Driver
@@ -88,7 +98,12 @@ class DefaultWorkflow:
         hist = context.history
         log(INFO, "")
         log(INFO, "[SUMMARY]")
-        log(INFO, "Run finished %s rounds in %.2fs", context.config.num_rounds, elapsed)
+        log(
+            INFO,
+            "Run finished %s round(s) in %.2fs",
+            context.config.num_rounds,
+            elapsed,
+        )
         for idx, line in enumerate(io.StringIO(str(hist))):
             if idx == 0:
                 log(INFO, "%s", line.strip("\n"))
@@ -127,13 +142,27 @@ def default_init_params_workflow(driver: Driver, context: Context) -> None:
                     message_type=MessageTypeLegacy.GET_PARAMETERS,
                     dst_node_id=random_client.node_id,
                     group_id="0",
-                    ttl="",
                 )
             ]
         )
-        log(INFO, "Received initial parameters from one random client")
         msg = list(messages)[0]
-        paramsrecord = next(iter(msg.content.parameters_records.values()))
+        if (
+            msg.has_content()
+            and compat._extract_status_from_recordset(  # pylint: disable=W0212
+                "getparametersres", msg.content
+            ).code
+            == Code.OK
+        ):
+            log(INFO, "Received initial parameters from one random client")
+            paramsrecord = next(iter(msg.content.parameters_records.values()))
+        else:
+            log(
+                WARN,
+                "Failed to receive initial parameters from the client."
+                " Empty initial parameters will be used.",
+            )
+            paramsrecord = ParametersRecord()
     context.state.parameters_records[MAIN_PARAMS_RECORD] = paramsrecord
@@ -226,7 +255,6 @@ def default_fit_workflow(  # pylint: disable=R0914
             message_type=MessageType.TRAIN,
             dst_node_id=proxy.node_id,
             group_id=str(current_round),
-            ttl="",
         )
         for proxy, fitins in client_instructions
     ]
@@ -246,14 +274,20 @@ def default_fit_workflow(  # pylint: disable=R0914
     )
     # Aggregate training results
-    results = [
-        (
-            node_id_to_proxy[msg.metadata.src_node_id],
-            compat.recordset_to_fitres(msg.content, False),
-        )
-        for msg in messages
-    ]
-    aggregated_result = context.strategy.aggregate_fit(current_round, results, [])
+    results: List[Tuple[ClientProxy, FitRes]] = []
+    failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]] = []
+    for msg in messages:
+        if msg.has_content():
+            proxy = node_id_to_proxy[msg.metadata.src_node_id]
+            fitres = compat.recordset_to_fitres(msg.content, False)
+            if fitres.status.code == Code.OK:
+                results.append((proxy, fitres))
+            else:
+                failures.append((proxy, fitres))
+        else:
+            failures.append(Exception(msg.error))
+    aggregated_result = context.strategy.aggregate_fit(current_round, results, failures)
     parameters_aggregated, metrics_aggregated = aggregated_result
     # Update the parameters and write history
@@ -267,6 +301,7 @@ def default_fit_workflow(  # pylint: disable=R0914
         )
+# pylint: disable-next=R0914
 def default_evaluate_workflow(driver: Driver, context: Context) -> None:
     """Execute the default workflow for a single evaluate round."""
     if not isinstance(context, LegacyContext):
@@ -306,7 +341,6 @@ def default_evaluate_workflow(driver: Driver, context: Context) -> None:
             message_type=MessageType.EVALUATE,
             dst_node_id=proxy.node_id,
             group_id=str(current_round),
-            ttl="",
         )
         for proxy, evalins in client_instructions
     ]
@@ -326,14 +360,22 @@ def default_evaluate_workflow(driver: Driver, context: Context) -> None:
     )
     # Aggregate the evaluation results
-    results = [
-        (
-            node_id_to_proxy[msg.metadata.src_node_id],
-            compat.recordset_to_evaluateres(msg.content),
-        )
-        for msg in messages
-    ]
-    aggregated_result = context.strategy.aggregate_evaluate(current_round, results, [])
+    results: List[Tuple[ClientProxy, EvaluateRes]] = []
+    failures: List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]] = []
+    for msg in messages:
+        if msg.has_content():
+            proxy = node_id_to_proxy[msg.metadata.src_node_id]
+            evalres = compat.recordset_to_evaluateres(msg.content)
+            if evalres.status.code == Code.OK:
+                results.append((proxy, evalres))
+            else:
+                failures.append((proxy, evalres))
+        else:
+            failures.append(Exception(msg.error))
+    aggregated_result = context.strategy.aggregate_evaluate(
+        current_round, results, failures
+    )
     loss_aggregated, metrics_aggregated = aggregated_result

flwr/server/workflow/secure_aggregation/secaggplus_workflow.py CHANGED Viewed

@@ -81,6 +81,7 @@ class WorkflowState:  # pylint: disable=R0902
     forward_ciphertexts: Dict[int, List[bytes]] = field(default_factory=dict)
     aggregate_ndarrays: NDArrays = field(default_factory=list)
     legacy_results: List[Tuple[ClientProxy, FitRes]] = field(default_factory=list)
+    failures: List[Exception] = field(default_factory=list)
 class SecAggPlusWorkflow:
@@ -373,7 +374,6 @@ class SecAggPlusWorkflow:
                 message_type=MessageType.TRAIN,
                 dst_node_id=nid,
                 group_id=str(cfg[WorkflowKey.CURRENT_ROUND]),
-                ttl="",
             )
         log(
@@ -395,6 +395,7 @@ class SecAggPlusWorkflow:
         for msg in msgs:
             if msg.has_error():
+                state.failures.append(Exception(msg.error))
                 continue
             key_dict = msg.content.configs_records[RECORD_KEY_CONFIGS]
             node_id = msg.metadata.src_node_id
@@ -421,7 +422,6 @@ class SecAggPlusWorkflow:
                 message_type=MessageType.TRAIN,
                 dst_node_id=nid,
                 group_id=str(cfg[WorkflowKey.CURRENT_ROUND]),
-                ttl="",
             )
         # Broadcast public keys to clients and receive secret key shares
@@ -453,6 +453,9 @@ class SecAggPlusWorkflow:
             nid: [] for nid in state.active_node_ids
         }  # dest node ID -> list of src node IDs
         for msg in msgs:
+            if msg.has_error():
+                state.failures.append(Exception(msg.error))
+                continue
             node_id = msg.metadata.src_node_id
             res_dict = msg.content.configs_records[RECORD_KEY_CONFIGS]
             dst_lst = cast(List[int], res_dict[Key.DESTINATION_LIST])
@@ -492,7 +495,6 @@ class SecAggPlusWorkflow:
                 message_type=MessageType.TRAIN,
                 dst_node_id=nid,
                 group_id=str(cfg[WorkflowKey.CURRENT_ROUND]),
-                ttl="",
             )
         log(
@@ -518,6 +520,9 @@ class SecAggPlusWorkflow:
         # Sum collected masked vectors and compute active/dead node IDs
         masked_vector = None
         for msg in msgs:
+            if msg.has_error():
+                state.failures.append(Exception(msg.error))
+                continue
             res_dict = msg.content.configs_records[RECORD_KEY_CONFIGS]
             bytes_list = cast(List[bytes], res_dict[Key.MASKED_PARAMETERS])
             client_masked_vec = [bytes_to_ndarray(b) for b in bytes_list]
@@ -531,6 +536,9 @@ class SecAggPlusWorkflow:
         # Backward compatibility with Strategy
         for msg in msgs:
+            if msg.has_error():
+                state.failures.append(Exception(msg.error))
+                continue
             fitres = compat.recordset_to_fitres(msg.content, True)
             proxy = state.nid_to_proxies[msg.metadata.src_node_id]
             state.legacy_results.append((proxy, fitres))
@@ -563,7 +571,6 @@ class SecAggPlusWorkflow:
                 message_type=MessageType.TRAIN,
                 dst_node_id=nid,
                 group_id=str(current_round),
-                ttl="",
             )
         log(
@@ -588,6 +595,9 @@ class SecAggPlusWorkflow:
         for nid in state.sampled_node_ids:
             collected_shares_dict[nid] = []
         for msg in msgs:
+            if msg.has_error():
+                state.failures.append(Exception(msg.error))
+                continue
             res_dict = msg.content.configs_records[RECORD_KEY_CONFIGS]
             nids = cast(List[int], res_dict[Key.NODE_ID_LIST])
             shares = cast(List[bytes], res_dict[Key.SHARE_LIST])
@@ -656,9 +666,11 @@ class SecAggPlusWorkflow:
             INFO,
             "aggregate_fit: received %s results and %s failures",
             len(results),
-            0,
+            len(state.failures),
+        )
+        aggregated_result = context.strategy.aggregate_fit(
+            current_round, results, state.failures  # type: ignore
         )
-        aggregated_result = context.strategy.aggregate_fit(current_round, results, [])
         parameters_aggregated, metrics_aggregated = aggregated_result
         # Update the parameters and write history

flwr/simulation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 Flower Labs GmbH. All Rights Reserved.
+# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import importlib
-from flwr.simulation.run_simulation import run_simulation, run_simulation_from_cli
+from flwr.simulation.run_simulation import run_simulation
 is_ray_installed = importlib.util.find_spec("ray") is not None
@@ -28,7 +28,7 @@ else:
 To install the necessary dependencies, install `flwr` with the `simulation` extra:
-    pip install -U flwr["simulation"]
+    pip install -U "flwr[simulation]"
 """
     def start_simulation(*args, **kwargs):  # type: ignore
@@ -36,4 +36,7 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
         raise ImportError(RAY_IMPORT_ERROR)
-__all__ = ["start_simulation", "run_simulation_from_cli", "run_simulation"]
+__all__ = [
+    "run_simulation",
+    "start_simulation",
+]

flwr/simulation/app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 Flower Labs GmbH. All Rights Reserved.
+# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,6 +15,8 @@
 """Flower simulation app."""
+import asyncio
+import logging
 import sys
 import threading
 import traceback
@@ -25,14 +27,16 @@ from typing import Any, Dict, List, Optional, Type, Union
 import ray
 from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
-from flwr.client import ClientFn
+from flwr.client import ClientFnExt
 from flwr.common import EventType, event
-from flwr.common.logger import log
+from flwr.common.constant import NODE_ID_NUM_BYTES
+from flwr.common.logger import log, set_logger_propagation, warn_unsupported_feature
 from flwr.server.client_manager import ClientManager
 from flwr.server.history import History
 from flwr.server.server import Server, init_defaults, run_fl
 from flwr.server.server_config import ServerConfig
 from flwr.server.strategy import Strategy
+from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
 from flwr.simulation.ray_transport.ray_actor import (
     ClientAppActor,
     VirtualClientEngineActor,
@@ -49,7 +53,7 @@ Invalid Arguments in method:
 `start_simulation(
     *,
     client_fn: ClientFn,
-    num_clients: Optional[int] = None,
+    num_clients: int,
     clients_ids: Optional[List[str]] = None,
     client_resources: Optional[Dict[str, float]] = None,
     server: Optional[Server] = None,
@@ -68,13 +72,29 @@ REASON:
 """
+NodeToPartitionMapping = Dict[int, int]
+def _create_node_id_to_partition_mapping(
+    num_clients: int,
+) -> NodeToPartitionMapping:
+    """Generate a node_id:partition_id mapping."""
+    nodes_mapping: NodeToPartitionMapping = {}  # {node-id; partition-id}
+    for i in range(num_clients):
+        while True:
+            node_id = generate_rand_int_from_bytes(NODE_ID_NUM_BYTES)
+            if node_id not in nodes_mapping:
+                break
+        nodes_mapping[node_id] = i
+    return nodes_mapping
 # pylint: disable=too-many-arguments,too-many-statements,too-many-branches
 def start_simulation(
     *,
-    client_fn: ClientFn,
-    num_clients: Optional[int] = None,
-    clients_ids: Optional[List[str]] = None,
+    client_fn: ClientFnExt,
+    num_clients: int,
+    clients_ids: Optional[List[str]] = None,  # UNSUPPORTED, WILL BE REMOVED
     client_resources: Optional[Dict[str, float]] = None,
     server: Optional[Server] = None,
     config: Optional[ServerConfig] = None,
@@ -90,23 +110,24 @@ def start_simulation(
     Parameters
     ----------
-    client_fn : ClientFn
-        A function creating client instances. The function must take a single
-        `str` argument called `cid`. It should return a single client instance
-        of type Client. Note that the created client instances are ephemeral
-        and will often be destroyed after a single method invocation. Since client
-        instances are not long-lived, they should not attempt to carry state over
-        method invocations. Any state required by the instance (model, dataset,
-        hyperparameters, ...) should be (re-)created in either the call to `client_fn`
-        or the call to any of the client methods (e.g., load evaluation data in the
-        `evaluate` method itself).
-    num_clients : Optional[int]
-        The total number of clients in this simulation. This must be set if
-        `clients_ids` is not set and vice-versa.
+    client_fn : ClientFnExt
+        A function creating `Client` instances. The function must have the signature
+        `client_fn(context: Context). It should return
+        a single client instance of type `Client`. Note that the created client
+        instances are ephemeral and will often be destroyed after a single method
+        invocation. Since client instances are not long-lived, they should not attempt
+        to carry state over method invocations. Any state required by the instance
+        (model, dataset, hyperparameters, ...) should be (re-)created in either the
+        call to `client_fn` or the call to any of the client methods (e.g., load
+        evaluation data in the `evaluate` method itself).
+    num_clients : int
+        The total number of clients in this simulation.
     clients_ids : Optional[List[str]]
+        UNSUPPORTED, WILL BE REMOVED. USE `num_clients` INSTEAD.
         List `client_id`s for each client. This is only required if
         `num_clients` is not set. Setting both `num_clients` and `clients_ids`
         with `len(clients_ids)` not equal to `num_clients` generates an error.
+        Using this argument will raise an error.
     client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`)
         CPU and GPU resources for a single client. Supported keys
         are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by
@@ -167,6 +188,26 @@ def start_simulation(
         {"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
     )
+    if clients_ids is not None:
+        warn_unsupported_feature(
+            "Passing `clients_ids` to `start_simulation` is deprecated and not longer "
+            "used by `start_simulation`. Use `num_clients` exclusively instead."
+        )
+        log(ERROR, "`clients_ids` argument used.")
+        sys.exit()
+    # Set logger propagation
+    loop: Optional[asyncio.AbstractEventLoop] = None
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    finally:
+        if loop and loop.is_running():
+            # Set logger propagation to False to prevent duplicated log output in Colab.
+            logger = logging.getLogger("flwr")
+            _ = set_logger_propagation(logger, False)
     # Initialize server and server config
     initialized_server, initialized_config = init_defaults(
         server=server,
@@ -181,20 +222,8 @@ def start_simulation(
         initialized_config,
     )
-    # clients_ids takes precedence
-    cids: List[str]
-    if clients_ids is not None:
-        if (num_clients is not None) and (len(clients_ids) != num_clients):
-            log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
-            sys.exit()
-        else:
-            cids = clients_ids
-    else:
-        if num_clients is None:
-            log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
-            sys.exit()
-        else:
-            cids = [str(x) for x in range(num_clients)]
+    # Create node-id to partition-id mapping
+    nodes_mapping = _create_node_id_to_partition_mapping(num_clients)
     # Default arguments for Ray initialization
     if not ray_init_args:
@@ -293,10 +322,12 @@ def start_simulation(
     )
     # Register one RayClientProxy object for each client with the ClientManager
-    for cid in cids:
+    for node_id, partition_id in nodes_mapping.items():
         client_proxy = RayActorClientProxy(
             client_fn=client_fn,
-            cid=cid,
+            node_id=node_id,
+            partition_id=partition_id,
+            num_partitions=num_clients,
             actor_pool=pool,
         )
         initialized_server.client_manager().register(client=client_proxy)

flwr/simulation/ray_transport/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 Flower Labs GmbH. All Rights Reserved.
+# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

flwr/simulation/ray_transport/ray_actor.py CHANGED Viewed

@@ -14,9 +14,7 @@
 # ==============================================================================
 """Ray-based Flower Actor and ActorPool implementation."""
-import asyncio
 import threading
-import traceback
 from abc import ABC
 from logging import DEBUG, ERROR, WARNING
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
@@ -25,22 +23,13 @@ import ray
 from ray import ObjectRef
 from ray.util.actor_pool import ActorPool
-from flwr.client.client_app import ClientApp, LoadClientAppError
+from flwr.client.client_app import ClientApp, ClientAppException, LoadClientAppError
 from flwr.common import Context, Message
 from flwr.common.logger import log
 ClientAppFn = Callable[[], ClientApp]
-class ClientException(Exception):
-    """Raised when client side logic crashes with an exception."""
-    def __init__(self, message: str):
-        div = ">" * 7
-        self.message = "\n" + div + "A ClientException occurred." + message
-        super().__init__(self.message)
 class VirtualClientEngineActor(ABC):
     """Abstract base class for VirtualClientEngine Actors."""
@@ -71,17 +60,7 @@ class VirtualClientEngineActor(ABC):
             raise load_ex
         except Exception as ex:
-            client_trace = traceback.format_exc()
-            mssg = (
-                "\n\tSomething went wrong when running your client run."
-                "\n\tClient "
-                + cid
-                + " crashed when the "
-                + self.__class__.__name__
-                + " was running its run."
-                "\n\tException triggered on the client side: " + client_trace,
-            )
-            raise ClientException(str(mssg)) from ex
+            raise ClientAppException(str(ex)) from ex
         return cid, out_message, context
@@ -419,12 +398,6 @@ class VirtualClientEngineActorPool(ActorPool):
         return self._fetch_future_result(cid)
-def init_ray(*args: Any, **kwargs: Any) -> None:
-    """Intialises Ray if not already initialised."""
-    if not ray.is_initialized():
-        ray.init(*args, **kwargs)
 class BasicActorPool:
     """A basic actor pool."""
@@ -437,9 +410,7 @@ class BasicActorPool:
         self.client_resources = client_resources
         # Queue of idle actors
-        self.pool: "asyncio.Queue[Type[VirtualClientEngineActor]]" = asyncio.Queue(
-            maxsize=1024
-        )
+        self.pool: List[VirtualClientEngineActor] = []
         self.num_actors = 0
         # Resolve arguments to pass during actor init
@@ -453,38 +424,37 @@ class BasicActorPool:
         # Figure out how many actors can be created given the cluster resources
         # and the resources the user indicates each VirtualClient will need
         self.actors_capacity = pool_size_from_resources(client_resources)
-        self._future_to_actor: Dict[Any, Type[VirtualClientEngineActor]] = {}
+        self._future_to_actor: Dict[Any, VirtualClientEngineActor] = {}
     def is_actor_available(self) -> bool:
         """Return true if there is an idle actor."""
-        return self.pool.qsize() > 0
+        return len(self.pool) > 0
-    async def add_actors_to_pool(self, num_actors: int) -> None:
+    def add_actors_to_pool(self, num_actors: int) -> None:
         """Add actors to the pool.
         This method may be executed also if new resources are added to your Ray cluster
         (e.g. you add a new node).
         """
         for _ in range(num_actors):
-            await self.pool.put(self.create_actor_fn())  # type: ignore
+            self.pool.append(self.create_actor_fn())  # type: ignore
         self.num_actors += num_actors
-    async def terminate_all_actors(self) -> None:
+    def terminate_all_actors(self) -> None:
         """Terminate actors in pool."""
         num_terminated = 0
-        while self.pool.qsize():
-            actor = await self.pool.get()
+        for actor in self.pool:
             actor.terminate.remote()  # type: ignore
             num_terminated += 1
         log(DEBUG, "Terminated %i actors", num_terminated)
-    async def submit(
+    def submit(
         self, actor_fn: Any, job: Tuple[ClientAppFn, Message, str, Context]
     ) -> Any:
         """On idle actor, submit job and return future."""
         # Remove idle actor from pool
-        actor = await self.pool.get()
+        actor = self.pool.pop()
         # Submit job to actor
         app_fn, mssg, cid, context = job
         future = actor_fn(actor, app_fn, mssg, cid, context)
@@ -493,14 +463,18 @@ class BasicActorPool:
         self._future_to_actor[future] = actor
         return future
-    async def fetch_result_and_return_actor_to_pool(
+    def add_actor_back_to_pool(self, future: Any) -> None:
+        """Ad actor assigned to run future back into the pool."""
+        actor = self._future_to_actor.pop(future)
+        self.pool.append(actor)
+    def fetch_result_and_return_actor_to_pool(
         self, future: Any
     ) -> Tuple[Message, Context]:
         """Pull result given a future and add actor back to pool."""
-        # Get actor that ran job
-        actor = self._future_to_actor.pop(future)
-        await self.pool.put(actor)
         # Retrieve result for object store
         # Instead of doing ray.get(future) we await it
-        _, out_mssg, updated_context = await future
+        _, out_mssg, updated_context = ray.get(future)
+        # Get actor that ran job
+        self.add_actor_back_to_pool(future)
         return out_mssg, updated_context

flwr-nightly 1.8.0.dev20240314__py3-none-any.whl → 1.11.0.dev20240813__py3-none-any.whl

Potentially problematic release.

flwr-nightly 1.8.0.dev20240314py3-none-any.whl → 1.11.0.dev20240813py3-none-any.whl