PyPI - flwr-nightly - Versions diffs - 1.8.0.dev20240315__py3-none-any.whl → 1.15.0.dev20250115__py3-none-any.whl - Mend

flwr-nightly 1.8.0.dev20240315py3-none-any.whl → 1.15.0.dev20250115py3-none-any.whl

Files changed (312) hide show

flwr/cli/app.py +16 -2
flwr/cli/build.py +181 -0
flwr/cli/cli_user_auth_interceptor.py +90 -0
flwr/cli/config_utils.py +343 -0
flwr/cli/example.py +4 -1
flwr/cli/install.py +253 -0
flwr/cli/log.py +182 -0
flwr/{server/superlink/state → cli/login}/__init__.py +4 -10
flwr/cli/login/login.py +88 -0
flwr/cli/ls.py +327 -0
flwr/cli/new/__init__.py +1 -0
flwr/cli/new/new.py +210 -66
flwr/cli/new/templates/app/.gitignore.tpl +163 -0
flwr/cli/new/templates/app/LICENSE.tpl +202 -0
flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
flwr/cli/new/templates/app/README.flowertune.md.tpl +66 -0
flwr/cli/new/templates/app/README.md.tpl +16 -32
flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
flwr/cli/new/templates/app/code/client.huggingface.py.tpl +55 -0
flwr/cli/new/templates/app/code/client.jax.py.tpl +50 -0
flwr/cli/new/templates/app/code/client.mlx.py.tpl +73 -0
flwr/cli/new/templates/app/code/client.numpy.py.tpl +7 -7
flwr/cli/new/templates/app/code/client.pytorch.py.tpl +30 -21
flwr/cli/new/templates/app/code/client.sklearn.py.tpl +63 -0
flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +57 -1
flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +126 -0
flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +87 -0
flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +78 -0
flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +94 -0
flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
flwr/cli/new/templates/app/code/server.huggingface.py.tpl +38 -0
flwr/cli/new/templates/app/code/server.jax.py.tpl +26 -0
flwr/cli/new/templates/app/code/server.mlx.py.tpl +31 -0
flwr/cli/new/templates/app/code/server.numpy.py.tpl +22 -9
flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
flwr/cli/new/templates/app/code/server.sklearn.py.tpl +36 -0
flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
flwr/cli/new/templates/app/code/task.huggingface.py.tpl +102 -0
flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
flwr/cli/new/templates/app/code/task.numpy.py.tpl +7 -0
flwr/cli/new/templates/app/code/task.pytorch.py.tpl +29 -24
flwr/cli/new/templates/app/code/task.sklearn.py.tpl +67 -0
flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +68 -0
flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +46 -0
flwr/cli/new/templates/app/pyproject.jax.toml.tpl +35 -0
flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +35 -0
flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
flwr/cli/run/__init__.py +1 -0
flwr/cli/run/run.py +212 -34
flwr/cli/stop.py +130 -0
flwr/cli/utils.py +240 -5
flwr/client/__init__.py +3 -2
flwr/client/app.py +432 -255
flwr/client/client.py +1 -11
flwr/client/client_app.py +74 -13
flwr/client/clientapp/__init__.py +22 -0
flwr/client/clientapp/app.py +259 -0
flwr/client/clientapp/clientappio_servicer.py +244 -0
flwr/client/clientapp/utils.py +115 -0
flwr/client/dpfedavg_numpy_client.py +7 -8
flwr/client/grpc_adapter_client/__init__.py +15 -0
flwr/client/grpc_adapter_client/connection.py +98 -0
flwr/client/grpc_client/connection.py +21 -7
flwr/client/grpc_rere_client/__init__.py +1 -1
flwr/client/grpc_rere_client/client_interceptor.py +176 -0
flwr/client/grpc_rere_client/connection.py +163 -56
flwr/client/grpc_rere_client/grpc_adapter.py +167 -0
flwr/client/heartbeat.py +74 -0
flwr/client/message_handler/__init__.py +1 -1
flwr/client/message_handler/message_handler.py +10 -11
flwr/client/mod/__init__.py +5 -5
flwr/client/mod/centraldp_mods.py +4 -2
flwr/client/mod/comms_mods.py +5 -4
flwr/client/mod/localdp_mod.py +10 -5
flwr/client/mod/secure_aggregation/__init__.py +1 -1
flwr/client/mod/secure_aggregation/secaggplus_mod.py +26 -26
flwr/client/mod/utils.py +2 -4
flwr/client/nodestate/__init__.py +26 -0
flwr/client/nodestate/in_memory_nodestate.py +38 -0
flwr/client/nodestate/nodestate.py +31 -0
flwr/client/nodestate/nodestate_factory.py +38 -0
flwr/client/numpy_client.py +8 -31
flwr/client/rest_client/__init__.py +1 -1
flwr/client/rest_client/connection.py +199 -176
flwr/client/run_info_store.py +112 -0
flwr/client/supernode/__init__.py +24 -0
flwr/client/supernode/app.py +321 -0
flwr/client/typing.py +1 -0
flwr/common/__init__.py +17 -11
flwr/common/address.py +47 -3
flwr/common/args.py +153 -0
flwr/common/auth_plugin/__init__.py +24 -0
flwr/common/auth_plugin/auth_plugin.py +121 -0
flwr/common/config.py +243 -0
flwr/common/constant.py +135 -1
flwr/common/context.py +32 -2
flwr/common/date.py +22 -4
flwr/common/differential_privacy.py +2 -2
flwr/common/dp.py +2 -4
flwr/common/exit_handlers.py +3 -3
flwr/common/grpc.py +164 -5
flwr/common/logger.py +230 -12
flwr/common/message.py +191 -106
flwr/common/object_ref.py +179 -44
flwr/common/pyproject.py +1 -0
flwr/common/record/__init__.py +2 -1
flwr/common/record/configsrecord.py +58 -18
flwr/common/record/metricsrecord.py +57 -17
flwr/common/record/parametersrecord.py +88 -20
flwr/common/record/recordset.py +153 -30
flwr/common/record/typeddict.py +30 -55
flwr/common/recordset_compat.py +31 -12
flwr/common/retry_invoker.py +123 -30
flwr/common/secure_aggregation/__init__.py +1 -1
flwr/common/secure_aggregation/crypto/__init__.py +1 -1
flwr/common/secure_aggregation/crypto/shamir.py +11 -11
flwr/common/secure_aggregation/crypto/symmetric_encryption.py +68 -4
flwr/common/secure_aggregation/ndarrays_arithmetic.py +17 -17
flwr/common/secure_aggregation/quantization.py +8 -8
flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
flwr/common/secure_aggregation/secaggplus_utils.py +10 -12
flwr/common/serde.py +304 -23
flwr/common/telemetry.py +65 -29
flwr/common/typing.py +120 -19
flwr/common/version.py +17 -3
flwr/proto/clientappio_pb2.py +45 -0
flwr/proto/clientappio_pb2.pyi +132 -0
flwr/proto/clientappio_pb2_grpc.py +135 -0
flwr/proto/clientappio_pb2_grpc.pyi +53 -0
flwr/proto/exec_pb2.py +62 -0
flwr/proto/exec_pb2.pyi +212 -0
flwr/proto/exec_pb2_grpc.py +237 -0
flwr/proto/exec_pb2_grpc.pyi +93 -0
flwr/proto/fab_pb2.py +31 -0
flwr/proto/fab_pb2.pyi +65 -0
flwr/proto/fab_pb2_grpc.py +4 -0
flwr/proto/fab_pb2_grpc.pyi +4 -0
flwr/proto/fleet_pb2.py +42 -23
flwr/proto/fleet_pb2.pyi +123 -1
flwr/proto/fleet_pb2_grpc.py +170 -0
flwr/proto/fleet_pb2_grpc.pyi +61 -0
flwr/proto/grpcadapter_pb2.py +32 -0
flwr/proto/grpcadapter_pb2.pyi +43 -0
flwr/proto/grpcadapter_pb2_grpc.py +66 -0
flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
flwr/proto/log_pb2.py +29 -0
flwr/proto/log_pb2.pyi +39 -0
flwr/proto/log_pb2_grpc.py +4 -0
flwr/proto/log_pb2_grpc.pyi +4 -0
flwr/proto/message_pb2.py +41 -0
flwr/proto/message_pb2.pyi +128 -0
flwr/proto/message_pb2_grpc.py +4 -0
flwr/proto/message_pb2_grpc.pyi +4 -0
flwr/proto/node_pb2.py +2 -2
flwr/proto/node_pb2.pyi +1 -4
flwr/proto/recordset_pb2.py +35 -33
flwr/proto/recordset_pb2.pyi +40 -14
flwr/proto/run_pb2.py +64 -0
flwr/proto/run_pb2.pyi +268 -0
flwr/proto/run_pb2_grpc.py +4 -0
flwr/proto/run_pb2_grpc.pyi +4 -0
flwr/proto/serverappio_pb2.py +52 -0
flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +62 -20
flwr/proto/serverappio_pb2_grpc.py +410 -0
flwr/proto/serverappio_pb2_grpc.pyi +160 -0
flwr/proto/simulationio_pb2.py +38 -0
flwr/proto/simulationio_pb2.pyi +65 -0
flwr/proto/simulationio_pb2_grpc.py +239 -0
flwr/proto/simulationio_pb2_grpc.pyi +94 -0
flwr/proto/task_pb2.py +7 -8
flwr/proto/task_pb2.pyi +8 -5
flwr/proto/transport_pb2.py +8 -8
flwr/proto/transport_pb2.pyi +9 -6
flwr/server/__init__.py +2 -10
flwr/server/app.py +579 -402
flwr/server/client_manager.py +8 -6
flwr/server/compat/app.py +6 -62
flwr/server/compat/app_utils.py +14 -9
flwr/server/compat/driver_client_proxy.py +25 -59
flwr/server/compat/legacy_context.py +5 -4
flwr/server/driver/__init__.py +2 -0
flwr/server/driver/driver.py +36 -131
flwr/server/driver/grpc_driver.py +220 -81
flwr/server/driver/inmemory_driver.py +183 -0
flwr/server/history.py +28 -29
flwr/server/run_serverapp.py +15 -126
flwr/server/server.py +50 -44
flwr/server/server_app.py +59 -10
flwr/server/serverapp/__init__.py +22 -0
flwr/server/serverapp/app.py +256 -0
flwr/server/serverapp_components.py +52 -0
flwr/server/strategy/__init__.py +2 -2
flwr/server/strategy/aggregate.py +37 -23
flwr/server/strategy/bulyan.py +9 -9
flwr/server/strategy/dp_adaptive_clipping.py +25 -25
flwr/server/strategy/dp_fixed_clipping.py +23 -22
flwr/server/strategy/dpfedavg_adaptive.py +8 -8
flwr/server/strategy/dpfedavg_fixed.py +13 -12
flwr/server/strategy/fault_tolerant_fedavg.py +11 -11
flwr/server/strategy/fedadagrad.py +9 -9
flwr/server/strategy/fedadam.py +20 -10
flwr/server/strategy/fedavg.py +16 -16
flwr/server/strategy/fedavg_android.py +17 -17
flwr/server/strategy/fedavgm.py +9 -9
flwr/server/strategy/fedmedian.py +5 -5
flwr/server/strategy/fedopt.py +6 -6
flwr/server/strategy/fedprox.py +7 -7
flwr/server/strategy/fedtrimmedavg.py +8 -8
flwr/server/strategy/fedxgb_bagging.py +12 -12
flwr/server/strategy/fedxgb_cyclic.py +10 -10
flwr/server/strategy/fedxgb_nn_avg.py +6 -6
flwr/server/strategy/fedyogi.py +9 -9
flwr/server/strategy/krum.py +9 -9
flwr/server/strategy/qfedavg.py +16 -16
flwr/server/strategy/strategy.py +10 -10
flwr/server/superlink/driver/__init__.py +2 -2
flwr/server/superlink/driver/serverappio_grpc.py +61 -0
flwr/server/superlink/driver/serverappio_servicer.py +361 -0
flwr/server/superlink/ffs/__init__.py +24 -0
flwr/server/superlink/ffs/disk_ffs.py +108 -0
flwr/server/superlink/ffs/ffs.py +79 -0
flwr/server/superlink/ffs/ffs_factory.py +47 -0
flwr/server/superlink/fleet/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +162 -0
flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +4 -2
flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -2
flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -154
flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +120 -13
flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +228 -0
flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
flwr/server/superlink/fleet/message_handler/message_handler.py +156 -13
flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
flwr/server/superlink/fleet/rest_rere/rest_api.py +119 -81
flwr/server/superlink/fleet/vce/__init__.py +1 -0
flwr/server/superlink/fleet/vce/backend/__init__.py +4 -4
flwr/server/superlink/fleet/vce/backend/backend.py +8 -9
flwr/server/superlink/fleet/vce/backend/raybackend.py +87 -68
flwr/server/superlink/fleet/vce/vce_api.py +208 -146
flwr/server/superlink/linkstate/__init__.py +28 -0
flwr/server/superlink/linkstate/in_memory_linkstate.py +569 -0
flwr/server/superlink/linkstate/linkstate.py +376 -0
flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +19 -10
flwr/server/superlink/linkstate/sqlite_linkstate.py +1196 -0
flwr/server/superlink/linkstate/utils.py +399 -0
flwr/server/superlink/simulation/__init__.py +15 -0
flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
flwr/server/superlink/simulation/simulationio_servicer.py +186 -0
flwr/server/superlink/utils.py +65 -0
flwr/server/typing.py +2 -0
flwr/server/utils/__init__.py +1 -1
flwr/server/utils/tensorboard.py +5 -5
flwr/server/utils/validator.py +40 -45
flwr/server/workflow/default_workflows.py +70 -26
flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +40 -27
flwr/simulation/__init__.py +12 -5
flwr/simulation/app.py +247 -315
flwr/simulation/legacy_app.py +404 -0
flwr/simulation/ray_transport/__init__.py +1 -1
flwr/simulation/ray_transport/ray_actor.py +42 -67
flwr/simulation/ray_transport/ray_client_proxy.py +37 -17
flwr/simulation/ray_transport/utils.py +1 -0
flwr/simulation/run_simulation.py +306 -163
flwr/simulation/simulationio_connection.py +89 -0
flwr/superexec/__init__.py +15 -0
flwr/superexec/app.py +59 -0
flwr/superexec/deployment.py +188 -0
flwr/superexec/exec_grpc.py +80 -0
flwr/superexec/exec_servicer.py +231 -0
flwr/superexec/exec_user_auth_interceptor.py +101 -0
flwr/superexec/executor.py +96 -0
flwr/superexec/simulation.py +124 -0
{flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/METADATA +33 -26
flwr_nightly-1.15.0.dev20250115.dist-info/RECORD +328 -0
flwr_nightly-1.15.0.dev20250115.dist-info/entry_points.txt +12 -0
flwr/cli/flower_toml.py +0 -140
flwr/cli/new/templates/app/flower.toml.tpl +0 -13
flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
flwr/client/node_state.py +0 -48
flwr/client/node_state_tests.py +0 -65
flwr/proto/driver_pb2.py +0 -44
flwr/proto/driver_pb2_grpc.py +0 -169
flwr/proto/driver_pb2_grpc.pyi +0 -66
flwr/server/superlink/driver/driver_grpc.py +0 -54
flwr/server/superlink/driver/driver_servicer.py +0 -129
flwr/server/superlink/state/in_memory_state.py +0 -230
flwr/server/superlink/state/sqlite_state.py +0 -630
flwr/server/superlink/state/state.py +0 -154
flwr_nightly-1.8.0.dev20240315.dist-info/RECORD +0 -211
flwr_nightly-1.8.0.dev20240315.dist-info/entry_points.txt +0 -9
{flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/LICENSE +0 -0
{flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/WHEEL +0 -0

flwr/simulation/legacy_app.py ADDED Viewed

@@ -0,0 +1,404 @@
+# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Flower simulation app."""
+import asyncio
+import logging
+import sys
+import threading
+import traceback
+import warnings
+from logging import ERROR, INFO
+from typing import Any, Optional, Union
+import ray
+from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
+from flwr.client import ClientFnExt
+from flwr.common import EventType, event
+from flwr.common.constant import NODE_ID_NUM_BYTES, SUPERLINK_NODE_ID
+from flwr.common.logger import (
+    log,
+    set_logger_propagation,
+    warn_deprecated_feature,
+    warn_unsupported_feature,
+)
+from flwr.server.client_manager import ClientManager
+from flwr.server.history import History
+from flwr.server.server import Server, init_defaults, run_fl
+from flwr.server.server_config import ServerConfig
+from flwr.server.strategy import Strategy
+from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
+from flwr.simulation.ray_transport.ray_actor import (
+    ClientAppActor,
+    VirtualClientEngineActor,
+    VirtualClientEngineActorPool,
+    pool_size_from_resources,
+)
+from flwr.simulation.ray_transport.ray_client_proxy import RayActorClientProxy
+INVALID_ARGUMENTS_START_SIMULATION = """
+INVALID ARGUMENTS ERROR
+Invalid Arguments in method:
+`start_simulation(
+    *,
+    client_fn: ClientFn,
+    num_clients: int,
+    clients_ids: Optional[List[str]] = None,
+    client_resources: Optional[Dict[str, float]] = None,
+    server: Optional[Server] = None,
+    config: ServerConfig = None,
+    strategy: Optional[Strategy] = None,
+    client_manager: Optional[ClientManager] = None,
+    ray_init_args: Optional[Dict[str, Any]] = None,
+) -> None:`
+REASON:
+    Method requires:
+        - Either `num_clients`[int] or `clients_ids`[List[str]]
+        to be set exclusively.
+        OR
+        - `len(clients_ids)` == `num_clients`
+"""
+NodeToPartitionMapping = dict[int, int]
+def _create_node_id_to_partition_mapping(
+    num_clients: int,
+) -> NodeToPartitionMapping:
+    """Generate a node_id:partition_id mapping."""
+    nodes_mapping: NodeToPartitionMapping = {}  # {node-id; partition-id}
+    for i in range(num_clients):
+        while True:
+            node_id = generate_rand_int_from_bytes(
+                NODE_ID_NUM_BYTES, exclude=[SUPERLINK_NODE_ID, 0]
+            )
+            if node_id not in nodes_mapping:
+                break
+        nodes_mapping[node_id] = i
+    return nodes_mapping
+# pylint: disable=too-many-arguments,too-many-statements,too-many-branches
+def start_simulation(
+    *,
+    client_fn: ClientFnExt,
+    num_clients: int,
+    clients_ids: Optional[list[str]] = None,  # UNSUPPORTED, WILL BE REMOVED
+    client_resources: Optional[dict[str, float]] = None,
+    server: Optional[Server] = None,
+    config: Optional[ServerConfig] = None,
+    strategy: Optional[Strategy] = None,
+    client_manager: Optional[ClientManager] = None,
+    ray_init_args: Optional[dict[str, Any]] = None,
+    keep_initialised: Optional[bool] = False,
+    actor_type: type[VirtualClientEngineActor] = ClientAppActor,
+    actor_kwargs: Optional[dict[str, Any]] = None,
+    actor_scheduling: Union[str, NodeAffinitySchedulingStrategy] = "DEFAULT",
+) -> History:
+    """Start a Ray-based Flower simulation server.
+    Warning
+    -------
+    This function is deprecated since 1.13.0. Use :code: `flwr run` to start a Flower
+    simulation.
+    Parameters
+    ----------
+    client_fn : ClientFnExt
+        A function creating `Client` instances. The function must have the signature
+        `client_fn(context: Context). It should return
+        a single client instance of type `Client`. Note that the created client
+        instances are ephemeral and will often be destroyed after a single method
+        invocation. Since client instances are not long-lived, they should not attempt
+        to carry state over method invocations. Any state required by the instance
+        (model, dataset, hyperparameters, ...) should be (re-)created in either the
+        call to `client_fn` or the call to any of the client methods (e.g., load
+        evaluation data in the `evaluate` method itself).
+    num_clients : int
+        The total number of clients in this simulation.
+    clients_ids : Optional[List[str]]
+        UNSUPPORTED, WILL BE REMOVED. USE `num_clients` INSTEAD.
+        List `client_id`s for each client. This is only required if
+        `num_clients` is not set. Setting both `num_clients` and `clients_ids`
+        with `len(clients_ids)` not equal to `num_clients` generates an error.
+        Using this argument will raise an error.
+    client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`)
+        CPU and GPU resources for a single client. Supported keys
+        are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by
+        `num_gpus`, as well as using custom resources, please consult the Ray
+        documentation.
+    server : Optional[flwr.server.Server] (default: None).
+        An implementation of the abstract base class `flwr.server.Server`. If no
+        instance is provided, then `start_server` will create one.
+    config: ServerConfig (default: None).
+        Currently supported values are `num_rounds` (int, default: 1) and
+        `round_timeout` in seconds (float, default: None).
+    strategy : Optional[flwr.server.Strategy] (default: None)
+        An implementation of the abstract base class `flwr.server.Strategy`. If
+        no strategy is provided, then `start_server` will use
+        `flwr.server.strategy.FedAvg`.
+    client_manager : Optional[flwr.server.ClientManager] (default: None)
+        An implementation of the abstract base class `flwr.server.ClientManager`.
+        If no implementation is provided, then `start_simulation` will use
+        `flwr.server.client_manager.SimpleClientManager`.
+    ray_init_args : Optional[Dict[str, Any]] (default: None)
+        Optional dictionary containing arguments for the call to `ray.init`.
+        If ray_init_args is None (the default), Ray will be initialized with
+        the following default args:
+        { "ignore_reinit_error": True, "include_dashboard": False }
+        An empty dictionary can be used (ray_init_args={}) to prevent any
+        arguments from being passed to ray.init.
+    keep_initialised: Optional[bool] (default: False)
+        Set to True to prevent `ray.shutdown()` in case `ray.is_initialized()=True`.
+    actor_type: VirtualClientEngineActor (default: ClientAppActor)
+        Optionally specify the type of actor to use. The actor object, which
+        persists throughout the simulation, will be the process in charge of
+        executing a ClientApp wrapping input argument `client_fn`.
+    actor_kwargs: Optional[Dict[str, Any]] (default: None)
+        If you want to create your own Actor classes, you might need to pass
+        some input argument. You can use this dictionary for such purpose.
+    actor_scheduling: Optional[Union[str, NodeAffinitySchedulingStrategy]]
+        (default: "DEFAULT")
+        Optional string ("DEFAULT" or "SPREAD") for the VCE to choose in which
+        node the actor is placed. If you are an advanced user needed more control
+        you can use lower-level scheduling strategies to pin actors to specific
+        compute nodes (e.g. via NodeAffinitySchedulingStrategy). Please note this
+        is an advanced feature. For all details, please refer to the Ray documentation:
+        https://docs.ray.io/en/latest/ray-core/scheduling/index.html
+    Returns
+    -------
+    hist : flwr.server.history.History
+        Object containing metrics from training.
+    """  # noqa: E501
+    # pylint: disable-msg=too-many-locals
+    msg = (
+        "flwr.simulation.start_simulation() is deprecated."
+        "\n\tInstead, use the `flwr run` CLI command to start a local simulation "
+        "in your Flower app, as shown for example below:"
+        "\n\n\t\t$ flwr new  # Create a new Flower app from a template"
+        "\n\n\t\t$ flwr run  # Run the Flower app in Simulation Mode"
+        "\n\n\tUsing `start_simulation()` is deprecated."
+    )
+    warn_deprecated_feature(name=msg)
+    event(
+        EventType.START_SIMULATION_ENTER,
+        {"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
+    )
+    if clients_ids is not None:
+        warn_unsupported_feature(
+            "Passing `clients_ids` to `start_simulation` is deprecated and not longer "
+            "used by `start_simulation`. Use `num_clients` exclusively instead."
+        )
+        log(ERROR, "`clients_ids` argument used.")
+        sys.exit()
+    # Set logger propagation
+    loop: Optional[asyncio.AbstractEventLoop] = None
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    finally:
+        if loop and loop.is_running():
+            # Set logger propagation to False to prevent duplicated log output in Colab.
+            logger = logging.getLogger("flwr")
+            _ = set_logger_propagation(logger, False)
+    # Initialize server and server config
+    initialized_server, initialized_config = init_defaults(
+        server=server,
+        config=config,
+        strategy=strategy,
+        client_manager=client_manager,
+    )
+    log(
+        INFO,
+        "Starting Flower simulation, config: %s",
+        initialized_config,
+    )
+    # Create node-id to partition-id mapping
+    nodes_mapping = _create_node_id_to_partition_mapping(num_clients)
+    # Default arguments for Ray initialization
+    if not ray_init_args:
+        ray_init_args = {
+            "ignore_reinit_error": True,
+            "include_dashboard": False,
+        }
+    # Shut down Ray if it has already been initialized (unless asked not to)
+    if ray.is_initialized() and not keep_initialised:
+        ray.shutdown()
+    # Initialize Ray
+    ray.init(**ray_init_args)
+    cluster_resources = ray.cluster_resources()
+    log(
+        INFO,
+        "Flower VCE: Ray initialized with resources: %s",
+        cluster_resources,
+    )
+    log(
+        INFO,
+        "Optimize your simulation with Flower VCE: "
+        "https://flower.ai/docs/framework/how-to-run-simulations.html",
+    )
+    # Log the resources that a single client will be able to use
+    if client_resources is None:
+        log(
+            INFO,
+            "No `client_resources` specified. Using minimal resources for clients.",
+        )
+        client_resources = {"num_cpus": 1, "num_gpus": 0.0}
+    # Each client needs at the very least one CPU
+    if "num_cpus" not in client_resources:
+        warnings.warn(
+            "No `num_cpus` specified in `client_resources`. "
+            "Using `num_cpus=1` for each client.",
+            stacklevel=2,
+        )
+        client_resources["num_cpus"] = 1
+    log(
+        INFO,
+        "Flower VCE: Resources for each Virtual Client: %s",
+        client_resources,
+    )
+    actor_args = {} if actor_kwargs is None else actor_kwargs
+    # An actor factory. This is called N times to add N actors
+    # to the pool. If at some point the pool can accommodate more actors
+    # this will be called again.
+    def create_actor_fn() -> type[VirtualClientEngineActor]:
+        return actor_type.options(  # type: ignore
+            **client_resources,
+            scheduling_strategy=actor_scheduling,
+        ).remote(**actor_args)
+    # Instantiate ActorPool
+    pool = VirtualClientEngineActorPool(
+        create_actor_fn=create_actor_fn,
+        client_resources=client_resources,
+    )
+    f_stop = threading.Event()
+    # Periodically, check if the cluster has grown (i.e. a new
+    # node has been added). If this happens, we likely want to grow
+    # the actor pool by adding more Actors to it.
+    def update_resources(f_stop: threading.Event) -> None:
+        """Periodically check if more actors can be added to the pool.
+        If so, extend the pool.
+        """
+        if not f_stop.is_set():
+            num_max_actors = pool_size_from_resources(client_resources)
+            if num_max_actors > pool.num_actors:
+                num_new = num_max_actors - pool.num_actors
+                log(
+                    INFO, "The cluster expanded. Adding %s actors to the pool.", num_new
+                )
+                pool.add_actors_to_pool(num_actors=num_new)
+            threading.Timer(10, update_resources, [f_stop]).start()
+    update_resources(f_stop)
+    log(
+        INFO,
+        "Flower VCE: Creating %s with %s actors",
+        pool.__class__.__name__,
+        pool.num_actors,
+    )
+    # Register one RayClientProxy object for each client with the ClientManager
+    for node_id, partition_id in nodes_mapping.items():
+        client_proxy = RayActorClientProxy(
+            client_fn=client_fn,
+            node_id=node_id,
+            partition_id=partition_id,
+            num_partitions=num_clients,
+            actor_pool=pool,
+        )
+        initialized_server.client_manager().register(client=client_proxy)
+    hist = History()
+    # pylint: disable=broad-except
+    try:
+        # Start training
+        hist = run_fl(
+            server=initialized_server,
+            config=initialized_config,
+        )
+    except Exception as ex:
+        log(ERROR, ex)
+        log(ERROR, traceback.format_exc())
+        log(
+            ERROR,
+            "Your simulation crashed :(. This could be because of several reasons. "
+            "The most common are: "
+            "\n\t > Sometimes, issues in the simulation code itself can cause crashes. "
+            "It's always a good idea to double-check your code for any potential bugs "
+            "or inconsistencies that might be contributing to the problem. "
+            "For example: "
+            "\n\t\t - You might be using a class attribute in your clients that "
+            "hasn't been defined."
+            "\n\t\t - There could be an incorrect method call to a 3rd party library "
+            "(e.g., PyTorch)."
+            "\n\t\t - The return types of methods in your clients/strategies might be "
+            "incorrect."
+            "\n\t > Your system couldn't fit a single VirtualClient: try lowering "
+            "`client_resources`."
+            "\n\t > All the actors in your pool crashed. This could be because: "
+            "\n\t\t - You clients hit an out-of-memory (OOM) error and actors couldn't "
+            "recover from it. Try launching your simulation with more generous "
+            "`client_resources` setting (i.e. it seems %s is "
+            "not enough for your run). Use fewer concurrent actors. "
+            "\n\t\t - You were running a multi-node simulation and all worker nodes "
+            "disconnected. The head node might still be alive but cannot accommodate "
+            "any actor with resources: %s."
+            "\nTake a look at the Flower simulation examples for guidance "
+            "<https://flower.ai/docs/framework/how-to-run-simulations.html>.",
+            client_resources,
+            client_resources,
+        )
+        raise RuntimeError("Simulation crashed.") from ex
+    finally:
+        # Stop time monitoring resources in cluster
+        f_stop.set()
+        event(EventType.START_SIMULATION_LEAVE)
+    return hist

flwr/simulation/ray_transport/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 Flower Labs GmbH. All Rights Reserved.
+# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

flwr/simulation/ray_transport/ray_actor.py CHANGED Viewed

@@ -14,33 +14,23 @@
 # ==============================================================================
 """Ray-based Flower Actor and ActorPool implementation."""
-import asyncio
 import threading
-import traceback
 from abc import ABC
 from logging import DEBUG, ERROR, WARNING
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
+from typing import Any, Callable, Optional, Union
 import ray
 from ray import ObjectRef
 from ray.util.actor_pool import ActorPool
-from flwr.client.client_app import ClientApp, LoadClientAppError
+from flwr.client.client_app import ClientApp, ClientAppException, LoadClientAppError
 from flwr.common import Context, Message
 from flwr.common.logger import log
 ClientAppFn = Callable[[], ClientApp]
-class ClientException(Exception):
-    """Raised when client side logic crashes with an exception."""
-    def __init__(self, message: str):
-        div = ">" * 7
-        self.message = "\n" + div + "A ClientException occurred." + message
-        super().__init__(self.message)
 class VirtualClientEngineActor(ABC):
     """Abstract base class for VirtualClientEngine Actors."""
@@ -55,7 +45,7 @@ class VirtualClientEngineActor(ABC):
         message: Message,
         cid: str,
         context: Context,
-    ) -> Tuple[str, Message, Context]:
+    ) -> tuple[str, Message, Context]:
         """Run a client run."""
         # Pass message through ClientApp and return a message
         # return also cid which is needed to ensure results
@@ -71,17 +61,7 @@ class VirtualClientEngineActor(ABC):
             raise load_ex
         except Exception as ex:
-            client_trace = traceback.format_exc()
-            mssg = (
-                "\n\tSomething went wrong when running your client run."
-                "\n\tClient "
-                + cid
-                + " crashed when the "
-                + self.__class__.__name__
-                + " was running its run."
-                "\n\tException triggered on the client side: " + client_trace,
-            )
-            raise ClientException(str(mssg)) from ex
+            raise ClientAppException(str(ex)) from ex
         return cid, out_message, context
@@ -102,7 +82,7 @@ class ClientAppActor(VirtualClientEngineActor):
             on_actor_init_fn()
-def pool_size_from_resources(client_resources: Dict[str, Union[int, float]]) -> int:
+def pool_size_from_resources(client_resources: dict[str, Union[int, float]]) -> int:
     """Calculate number of Actors that fit in the cluster.
     For this we consider the resources available on each node and those required per
@@ -145,14 +125,14 @@ def pool_size_from_resources(client_resources: Dict[str, Union[int, float]]) ->
             WARNING,
             "The ActorPool is empty. The system (CPUs=%s, GPUs=%s) "
             "does not meet the criteria to host at least one client with resources:"
-            " %s. Lowering the `client_resources` could help.",
+            " %s. Lowering these resources could help.",
             num_cpus,
             num_gpus,
             client_resources,
         )
         raise ValueError(
             "ActorPool is empty. Stopping Simulation. "
-            "Check 'client_resources' passed to `start_simulation`"
+            "Check `num_cpus` and/or `num_gpus` passed to the simulation engine"
         )
     return total_num_actors
@@ -183,9 +163,9 @@ class VirtualClientEngineActorPool(ActorPool):
     def __init__(
         self,
-        create_actor_fn: Callable[[], Type[VirtualClientEngineActor]],
-        client_resources: Dict[str, Union[int, float]],
-        actor_list: Optional[List[Type[VirtualClientEngineActor]]] = None,
+        create_actor_fn: Callable[[], type[VirtualClientEngineActor]],
+        client_resources: dict[str, Union[int, float]],
+        actor_list: Optional[list[type[VirtualClientEngineActor]]] = None,
     ):
         self.client_resources = client_resources
         self.create_actor_fn = create_actor_fn
@@ -204,10 +184,10 @@ class VirtualClientEngineActorPool(ActorPool):
         # A dict that maps cid to another dict containing: a reference to the remote job
         # and its status (i.e. whether it is ready or not)
-        self._cid_to_future: Dict[
-            str, Dict[str, Union[bool, Optional[ObjectRef[Any]]]]
+        self._cid_to_future: dict[
+            str, dict[str, Union[bool, Optional[ObjectRef[Any]]]]
         ] = {}
-        self.actor_to_remove: Set[str] = set()  # a set
+        self.actor_to_remove: set[str] = set()  # a set
         self.num_actors = len(actors)
         self.lock = threading.RLock()
@@ -231,7 +211,7 @@ class VirtualClientEngineActorPool(ActorPool):
             self._idle_actors.extend(new_actors)
             self.num_actors += num_actors
-    def submit(self, fn: Any, value: Tuple[ClientAppFn, Message, str, Context]) -> None:
+    def submit(self, fn: Any, value: tuple[ClientAppFn, Message, str, Context]) -> None:
         """Take an idle actor and assign it to run a client app and Message.
         Submit a job to an actor by first removing it from the list of idle actors, then
@@ -241,7 +221,7 @@ class VirtualClientEngineActorPool(ActorPool):
         actor = self._idle_actors.pop()
         if self._check_and_remove_actor_from_pool(actor):
             future = fn(actor, app_fn, mssg, cid, context)
-            future_key = tuple(future) if isinstance(future, List) else future
+            future_key = tuple(future) if isinstance(future, list) else future
             self._future_to_actor[future_key] = (self._next_task_index, actor, cid)
             self._next_task_index += 1
@@ -249,7 +229,7 @@ class VirtualClientEngineActorPool(ActorPool):
             self._cid_to_future[cid]["future"] = future_key
     def submit_client_job(
-        self, actor_fn: Any, job: Tuple[ClientAppFn, Message, str, Context]
+        self, actor_fn: Any, job: tuple[ClientAppFn, Message, str, Context]
     ) -> None:
         """Submit a job while tracking client ids."""
         _, _, cid, _ = job
@@ -289,7 +269,7 @@ class VirtualClientEngineActorPool(ActorPool):
         return self._cid_to_future[cid]["ready"]  # type: ignore
-    def _fetch_future_result(self, cid: str) -> Tuple[Message, Context]:
+    def _fetch_future_result(self, cid: str) -> tuple[Message, Context]:
         """Fetch result and updated context for a VirtualClient from Object Store.
         The job submitted by the ClientProxy interfacing with client with cid=cid is
@@ -403,7 +383,7 @@ class VirtualClientEngineActorPool(ActorPool):
     def get_client_result(
         self, cid: str, timeout: Optional[float]
-    ) -> Tuple[Message, Context]:
+    ) -> tuple[Message, Context]:
         """Get result from VirtualClient with specific cid."""
         # Loop until all jobs submitted to the pool are completed. Break early
         # if the result for the ClientProxy calling this method is ready
@@ -419,27 +399,19 @@ class VirtualClientEngineActorPool(ActorPool):
         return self._fetch_future_result(cid)
-def init_ray(*args: Any, **kwargs: Any) -> None:
-    """Intialises Ray if not already initialised."""
-    if not ray.is_initialized():
-        ray.init(*args, **kwargs)
 class BasicActorPool:
     """A basic actor pool."""
     def __init__(
         self,
-        actor_type: Type[VirtualClientEngineActor],
-        client_resources: Dict[str, Union[int, float]],
-        actor_kwargs: Dict[str, Any],
+        actor_type: type[VirtualClientEngineActor],
+        client_resources: dict[str, Union[int, float]],
+        actor_kwargs: dict[str, Any],
     ):
         self.client_resources = client_resources
         # Queue of idle actors
-        self.pool: "asyncio.Queue[Type[VirtualClientEngineActor]]" = asyncio.Queue(
-            maxsize=1024
-        )
+        self.pool: list[VirtualClientEngineActor] = []
         self.num_actors = 0
         # Resolve arguments to pass during actor init
@@ -453,38 +425,37 @@ class BasicActorPool:
         # Figure out how many actors can be created given the cluster resources
         # and the resources the user indicates each VirtualClient will need
         self.actors_capacity = pool_size_from_resources(client_resources)
-        self._future_to_actor: Dict[Any, Type[VirtualClientEngineActor]] = {}
+        self._future_to_actor: dict[Any, VirtualClientEngineActor] = {}
     def is_actor_available(self) -> bool:
         """Return true if there is an idle actor."""
-        return self.pool.qsize() > 0
+        return len(self.pool) > 0
-    async def add_actors_to_pool(self, num_actors: int) -> None:
+    def add_actors_to_pool(self, num_actors: int) -> None:
         """Add actors to the pool.
         This method may be executed also if new resources are added to your Ray cluster
         (e.g. you add a new node).
         """
         for _ in range(num_actors):
-            await self.pool.put(self.create_actor_fn())  # type: ignore
+            self.pool.append(self.create_actor_fn())  # type: ignore
         self.num_actors += num_actors
-    async def terminate_all_actors(self) -> None:
+    def terminate_all_actors(self) -> None:
         """Terminate actors in pool."""
         num_terminated = 0
-        while self.pool.qsize():
-            actor = await self.pool.get()
+        for actor in self.pool:
             actor.terminate.remote()  # type: ignore
             num_terminated += 1
         log(DEBUG, "Terminated %i actors", num_terminated)
-    async def submit(
-        self, actor_fn: Any, job: Tuple[ClientAppFn, Message, str, Context]
+    def submit(
+        self, actor_fn: Any, job: tuple[ClientAppFn, Message, str, Context]
     ) -> Any:
         """On idle actor, submit job and return future."""
         # Remove idle actor from pool
-        actor = await self.pool.get()
+        actor = self.pool.pop()
         # Submit job to actor
         app_fn, mssg, cid, context = job
         future = actor_fn(actor, app_fn, mssg, cid, context)
@@ -493,14 +464,18 @@ class BasicActorPool:
         self._future_to_actor[future] = actor
         return future
-    async def fetch_result_and_return_actor_to_pool(
+    def add_actor_back_to_pool(self, future: Any) -> None:
+        """Ad actor assigned to run future back into the pool."""
+        actor = self._future_to_actor.pop(future)
+        self.pool.append(actor)
+    def fetch_result_and_return_actor_to_pool(
         self, future: Any
-    ) -> Tuple[Message, Context]:
+    ) -> tuple[Message, Context]:
         """Pull result given a future and add actor back to pool."""
-        # Get actor that ran job
-        actor = self._future_to_actor.pop(future)
-        await self.pool.put(actor)
         # Retrieve result for object store
         # Instead of doing ray.get(future) we await it
-        _, out_mssg, updated_context = await future
+        _, out_mssg, updated_context = ray.get(future)
+        # Get actor that ran job
+        self.add_actor_back_to_pool(future)
         return out_mssg, updated_context

flwr-nightly 1.8.0.dev20240315__py3-none-any.whl → 1.15.0.dev20250115__py3-none-any.whl

flwr-nightly 1.8.0.dev20240315py3-none-any.whl → 1.15.0.dev20250115py3-none-any.whl