PyPI - flwr-nightly - Versions diffs - 1.8.0.dev20240227__py3-none-any.whl → 1.8.0.dev20240229__py3-none-any.whl - Mend

flwr-nightly 1.8.0.dev20240227py3-none-any.whl → 1.8.0.dev20240229py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

flwr/client/mod/__init__.py +3 -2
flwr/client/mod/centraldp_mods.py +63 -2
flwr/client/mod/secure_aggregation/secaggplus_mod.py +55 -75
flwr/common/differential_privacy.py +77 -0
flwr/common/differential_privacy_constants.py +1 -0
flwr/common/secure_aggregation/secaggplus_constants.py +49 -27
flwr/proto/error_pb2.py +26 -0
flwr/proto/error_pb2.pyi +25 -0
flwr/proto/error_pb2_grpc.py +4 -0
flwr/proto/error_pb2_grpc.pyi +4 -0
flwr/proto/task_pb2.py +8 -7
flwr/proto/task_pb2.pyi +7 -2
flwr/server/__init__.py +4 -0
flwr/server/app.py +8 -31
flwr/server/client_proxy.py +5 -0
flwr/server/compat/__init__.py +2 -0
flwr/server/compat/app.py +7 -88
flwr/server/compat/app_utils.py +102 -0
flwr/server/compat/driver_client_proxy.py +22 -10
flwr/server/compat/legacy_context.py +55 -0
flwr/server/run_serverapp.py +1 -1
flwr/server/server.py +18 -8
flwr/server/strategy/__init__.py +24 -14
flwr/server/strategy/dp_adaptive_clipping.py +449 -0
flwr/server/strategy/dp_fixed_clipping.py +5 -7
flwr/server/superlink/driver/driver_grpc.py +54 -0
flwr/server/superlink/driver/driver_servicer.py +4 -4
flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +5 -0
flwr/server/superlink/fleet/vce/__init__.py +1 -1
flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -4
flwr/server/superlink/fleet/vce/vce_api.py +236 -16
flwr/server/typing.py +1 -0
flwr/server/workflow/__init__.py +22 -0
flwr/server/workflow/default_workflows.py +357 -0
flwr/simulation/__init__.py +3 -0
flwr/simulation/ray_transport/ray_client_proxy.py +28 -8
flwr/simulation/run_simulation.py +177 -0
{flwr_nightly-1.8.0.dev20240227.dist-info → flwr_nightly-1.8.0.dev20240229.dist-info}/METADATA +4 -3
{flwr_nightly-1.8.0.dev20240227.dist-info → flwr_nightly-1.8.0.dev20240229.dist-info}/RECORD +42 -31
{flwr_nightly-1.8.0.dev20240227.dist-info → flwr_nightly-1.8.0.dev20240229.dist-info}/entry_points.txt +1 -0
{flwr_nightly-1.8.0.dev20240227.dist-info → flwr_nightly-1.8.0.dev20240229.dist-info}/LICENSE +0 -0
{flwr_nightly-1.8.0.dev20240227.dist-info → flwr_nightly-1.8.0.dev20240229.dist-info}/WHEEL +0 -0

flwr/server/superlink/driver/driver_grpc.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 2020 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Driver gRPC API."""
+from logging import INFO
+from typing import Optional, Tuple
+import grpc
+from flwr.common import GRPC_MAX_MESSAGE_LENGTH
+from flwr.common.logger import log
+from flwr.proto.driver_pb2_grpc import (  # pylint: disable=E0611
+    add_DriverServicer_to_server,
+)
+from flwr.server.superlink.state import StateFactory
+from ..fleet.grpc_bidi.grpc_server import generic_create_grpc_server
+from .driver_servicer import DriverServicer
+def run_driver_api_grpc(
+    address: str,
+    state_factory: StateFactory,
+    certificates: Optional[Tuple[bytes, bytes, bytes]],
+) -> grpc.Server:
+    """Run Driver API (gRPC, request-response)."""
+    # Create Driver API gRPC server
+    driver_servicer: grpc.Server = DriverServicer(
+        state_factory=state_factory,
+    )
+    driver_add_servicer_to_server_fn = add_DriverServicer_to_server
+    driver_grpc_server = generic_create_grpc_server(
+        servicer_and_add_fn=(driver_servicer, driver_add_servicer_to_server_fn),
+        server_address=address,
+        max_message_length=GRPC_MAX_MESSAGE_LENGTH,
+        certificates=certificates,
+    )
+    log(INFO, "Flower ECE: Starting Driver API (gRPC-rere) on %s", address)
+    driver_grpc_server.start()
+    return driver_grpc_server

flwr/server/superlink/driver/driver_servicer.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Driver API servicer."""
-from logging import INFO
+from logging import DEBUG, INFO
 from typing import List, Optional, Set
 from uuid import UUID
@@ -70,7 +70,7 @@ class DriverServicer(driver_pb2_grpc.DriverServicer):
         self, request: PushTaskInsRequest, context: grpc.ServicerContext
     ) -> PushTaskInsResponse:
         """Push a set of TaskIns."""
-        log(INFO, "DriverServicer.PushTaskIns")
+        log(DEBUG, "DriverServicer.PushTaskIns")
         # Validate request
         _raise_if(len(request.task_ins_list) == 0, "`task_ins_list` must not be empty")
@@ -95,7 +95,7 @@ class DriverServicer(driver_pb2_grpc.DriverServicer):
         self, request: PullTaskResRequest, context: grpc.ServicerContext
     ) -> PullTaskResResponse:
         """Pull a set of TaskRes."""
-        log(INFO, "DriverServicer.PullTaskRes")
+        log(DEBUG, "DriverServicer.PullTaskRes")
         # Convert each task_id str to UUID
         task_ids: Set[UUID] = {UUID(task_id) for task_id in request.task_ids}
@@ -105,7 +105,7 @@ class DriverServicer(driver_pb2_grpc.DriverServicer):
         # Register callback
         def on_rpc_done() -> None:
-            log(INFO, "DriverServicer.PullTaskRes callback: delete TaskIns/TaskRes")
+            log(DEBUG, "DriverServicer.PullTaskRes callback: delete TaskIns/TaskRes")
             if context.is_active():
                 return

flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py CHANGED Viewed

@@ -46,6 +46,7 @@ class GrpcClientProxy(ClientProxy):
         self,
         ins: common.GetPropertiesIns,
         timeout: Optional[float],
+        group_id: Optional[int],
     ) -> common.GetPropertiesRes:
         """Request client's set of internal properties."""
         get_properties_msg = serde.get_properties_ins_to_proto(ins)
@@ -65,6 +66,7 @@ class GrpcClientProxy(ClientProxy):
         self,
         ins: common.GetParametersIns,
         timeout: Optional[float],
+        group_id: Optional[int],
     ) -> common.GetParametersRes:
         """Return the current local model parameters."""
         get_parameters_msg = serde.get_parameters_ins_to_proto(ins)
@@ -84,6 +86,7 @@ class GrpcClientProxy(ClientProxy):
         self,
         ins: common.FitIns,
         timeout: Optional[float],
+        group_id: Optional[int],
     ) -> common.FitRes:
         """Refine the provided parameters using the locally held dataset."""
         fit_ins_msg = serde.fit_ins_to_proto(ins)
@@ -102,6 +105,7 @@ class GrpcClientProxy(ClientProxy):
         self,
         ins: common.EvaluateIns,
         timeout: Optional[float],
+        group_id: Optional[int],
     ) -> common.EvaluateRes:
         """Evaluate the provided parameters using the locally held dataset."""
         evaluate_msg = serde.evaluate_ins_to_proto(ins)
@@ -119,6 +123,7 @@ class GrpcClientProxy(ClientProxy):
         self,
         ins: common.ReconnectIns,
         timeout: Optional[float],
+        group_id: Optional[int],
     ) -> common.DisconnectRes:
         """Disconnect and (optionally) reconnect later."""
         reconnect_ins_msg = serde.reconnect_ins_to_proto(ins)

flwr/server/superlink/fleet/vce/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Fleet VirtualClientEngine side."""
+"""Fleet Simulation Engine side."""
 from .vce_api import start_vce

flwr/server/superlink/fleet/vce/backend/raybackend.py CHANGED Viewed

@@ -141,13 +141,13 @@ class RayBackend(Backend):
         Return output message and updated context.
         """
-        node_id = message.metadata.dst_node_id
+        partition_id = message.metadata.partition_id
         try:
             # Submite a task to the pool
             future = await self.pool.submit(
                 lambda a, a_fn, mssg, cid, state: a.run.remote(a_fn, mssg, cid, state),
-                (app, message, str(node_id), context),
+                (app, message, str(partition_id), context),
             )
             await future
@@ -163,10 +163,9 @@ class RayBackend(Backend):
         except LoadClientAppError as load_ex:
             log(
                 ERROR,
-                "An exception was raised when processing a message. Terminating %s",
+                "An exception was raised when processing a message by %s",
                 self.__class__.__name__,
             )
-            await self.terminate()
             raise load_ex
     async def terminate(self) -> None:

flwr/server/superlink/fleet/vce/vce_api.py CHANGED Viewed

@@ -12,19 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Fleet VirtualClientEngine API."""
+"""Fleet Simulation Engine API."""
 import asyncio
 import json
-from logging import ERROR, INFO
-from typing import Dict, Optional
+import traceback
+from logging import DEBUG, ERROR, INFO, WARN
+from typing import Callable, Dict, List, Optional
-from flwr.client.client_app import ClientApp, load_client_app
+from flwr.client.client_app import ClientApp, LoadClientAppError, load_client_app
 from flwr.client.node_state import NodeState
 from flwr.common.logger import log
+from flwr.common.serde import message_from_taskins, message_to_taskres
+from flwr.proto.task_pb2 import TaskIns  # pylint: disable=E0611
 from flwr.server.superlink.state import StateFactory
-from .backend import error_messages_backends, supported_backends
+from .backend import Backend, error_messages_backends, supported_backends
 NodeToPartitionMapping = Dict[int, int]
@@ -42,21 +46,223 @@ def _register_nodes(
     return nodes_mapping
-# pylint: disable=too-many-arguments,unused-argument
+# pylint: disable=too-many-arguments,too-many-locals
+async def worker(
+    app_fn: Callable[[], ClientApp],
+    queue: "asyncio.Queue[TaskIns]",
+    node_states: Dict[int, NodeState],
+    state_factory: StateFactory,
+    nodes_mapping: NodeToPartitionMapping,
+    backend: Backend,
+) -> None:
+    """Get TaskIns from queue and pass it to an actor in the pool to execute it."""
+    state = state_factory.state()
+    while True:
+        try:
+            task_ins: TaskIns = await queue.get()
+            node_id = task_ins.task.consumer.node_id
+            # Register and retrieve runstate
+            node_states[node_id].register_context(run_id=task_ins.run_id)
+            context = node_states[node_id].retrieve_context(run_id=task_ins.run_id)
+            # Convert TaskIns to Message
+            message = message_from_taskins(task_ins)
+            # Set partition_id
+            message.metadata.partition_id = nodes_mapping[node_id]
+            # Let backend process message
+            out_mssg, updated_context = await backend.process_message(
+                app_fn, message, context
+            )
+            # Update Context
+            node_states[node_id].update_context(
+                task_ins.run_id, context=updated_context
+            )
+            # Convert to TaskRes
+            task_res = message_to_taskres(out_mssg)
+            # Store TaskRes in state
+            state.store_task_res(task_res)
+        except asyncio.CancelledError as e:
+            log(DEBUG, "Async worker: %s", e)
+            break
+        except LoadClientAppError as app_ex:
+            log(ERROR, "Async worker: %s", app_ex)
+            log(ERROR, traceback.format_exc())
+            raise
+        except Exception as ex:  # pylint: disable=broad-exception-caught
+            log(ERROR, ex)
+            log(ERROR, traceback.format_exc())
+            break
+async def add_taskins_to_queue(
+    queue: "asyncio.Queue[TaskIns]",
+    state_factory: StateFactory,
+    nodes_mapping: NodeToPartitionMapping,
+    backend: Backend,
+    consumers: List["asyncio.Task[None]"],
+    f_stop: asyncio.Event,
+) -> None:
+    """Retrieve TaskIns and add it to the queue."""
+    state = state_factory.state()
+    num_initial_consumers = len(consumers)
+    while not f_stop.is_set():
+        for node_id in nodes_mapping.keys():
+            task_ins = state.get_task_ins(node_id=node_id, limit=1)
+            if task_ins:
+                await queue.put(task_ins[0])
+        # Count consumers that are running
+        num_active = sum(not (cc.done()) for cc in consumers)
+        # Alert if number of consumers decreased by half
+        if num_active < num_initial_consumers // 2:
+            log(
+                WARN,
+                "Number of active workers has more than halved: (%i/%i active)",
+                num_active,
+                num_initial_consumers,
+            )
+        # Break if consumers died
+        if num_active == 0:
+            raise RuntimeError("All workers have died. Ending Simulation.")
+        # Log some stats
+        log(
+            DEBUG,
+            "Simulation Engine stats: "
+            "Active workers: (%i/%i) | %s (%i workers) | Tasks in queue: %i)",
+            num_active,
+            num_initial_consumers,
+            backend.__class__.__name__,
+            backend.num_workers,
+            queue.qsize(),
+        )
+        await asyncio.sleep(1.0)
+    log(DEBUG, "Async producer: Stopped pulling from StateFactory.")
+async def run(
+    app_fn: Callable[[], ClientApp],
+    backend_fn: Callable[[], Backend],
+    nodes_mapping: NodeToPartitionMapping,
+    state_factory: StateFactory,
+    node_states: Dict[int, NodeState],
+    f_stop: asyncio.Event,
+) -> None:
+    """Run the VCE async."""
+    queue: "asyncio.Queue[TaskIns]" = asyncio.Queue(128)
+    try:
+        # Instantiate backend
+        backend = backend_fn()
+        # Build backend
+        await backend.build()
+        # Add workers (they submit Messages to Backend)
+        worker_tasks = [
+            asyncio.create_task(
+                worker(
+                    app_fn, queue, node_states, state_factory, nodes_mapping, backend
+                )
+            )
+            for _ in range(backend.num_workers)
+        ]
+        # Create producer (adds TaskIns into Queue)
+        producer = asyncio.create_task(
+            add_taskins_to_queue(
+                queue, state_factory, nodes_mapping, backend, worker_tasks, f_stop
+            )
+        )
+        # Wait for producer to finish
+        # The producer runs forever until f_stop is set or until
+        # all worker (consumer) coroutines are completed. Workers
+        # also run forever and only end if an exception is raised.
+        await asyncio.gather(producer)
+    except Exception as ex:
+        log(ERROR, "An exception occured!! %s", ex)
+        log(ERROR, traceback.format_exc())
+        log(WARN, "Stopping Simulation Engine.")
+        # Manually trigger stopping event
+        f_stop.set()
+        # Raise exception
+        raise RuntimeError("Simulation Engine crashed.") from ex
+    finally:
+        # Produced task terminated, now cancel worker tasks
+        for w_t in worker_tasks:
+            _ = w_t.cancel()
+        while not all(w_t.done() for w_t in worker_tasks):
+            log(DEBUG, "Terminating async workers...")
+            await asyncio.sleep(0.5)
+        await asyncio.gather(*[w_t for w_t in worker_tasks if not w_t.done()])
+        # Terminate backend
+        await backend.terminate()
+# pylint: disable=too-many-arguments,unused-argument,too-many-locals
 def start_vce(
-    num_supernodes: int,
     client_app_module_name: str,
     backend_name: str,
     backend_config_json_stream: str,
-    state_factory: StateFactory,
     working_dir: str,
-    f_stop: Optional[asyncio.Event] = None,
+    f_stop: asyncio.Event,
+    num_supernodes: Optional[int] = None,
+    state_factory: Optional[StateFactory] = None,
+    existing_nodes_mapping: Optional[NodeToPartitionMapping] = None,
 ) -> None:
-    """Start Fleet API with the VirtualClientEngine (VCE)."""
-    # Register SuperNodes
-    nodes_mapping = _register_nodes(
-        num_nodes=num_supernodes, state_factory=state_factory
-    )
+    """Start Fleet API with the Simulation Engine."""
+    if num_supernodes is not None and existing_nodes_mapping is not None:
+        raise ValueError(
+            "Both `num_supernodes` and `existing_nodes_mapping` are provided, "
+            "but only one is allowed."
+        )
+    if num_supernodes is None:
+        if state_factory is None or existing_nodes_mapping is None:
+            raise ValueError(
+                "If not passing an existing `state_factory` and associated "
+                "`existing_nodes_mapping` you must supply `num_supernodes` to indicate "
+                "how many nodes to insert into a new StateFactory that will be created."
+            )
+    if existing_nodes_mapping:
+        if state_factory is None:
+            raise ValueError(
+                "`existing_nodes_mapping` was passed, but no `state_factory` was "
+                "passed."
+            )
+        log(INFO, "Using exiting NodeToPartitionMapping and StateFactory.")
+        # Use mapping constructed externally. This also means nodes
+        # have previously being registered.
+        nodes_mapping = existing_nodes_mapping
+    if not state_factory:
+        log(INFO, "A StateFactory was not supplied to the SimulationEngine.")
+        # Create an empty in-memory state factory
+        state_factory = StateFactory(":flwr-in-memory-state:")
+        log(INFO, "Created new %s.", state_factory.__class__.__name__)
+    if num_supernodes:
+        # Register SuperNodes
+        nodes_mapping = _register_nodes(
+            num_nodes=num_supernodes, state_factory=state_factory
+        )
     # Construct mapping of NodeStates
     node_states: Dict[int, NodeState] = {}
@@ -69,7 +275,6 @@ def start_vce(
     try:
         backend_type = supported_backends[backend_name]
-        _ = backend_type(backend_config, work_dir=working_dir)
     except KeyError as ex:
         log(
             ERROR,
@@ -83,10 +288,25 @@ def start_vce(
         raise ex
+    def backend_fn() -> Backend:
+        """Instantiate a Backend."""
+        return backend_type(backend_config, work_dir=working_dir)
     log(INFO, "client_app_module_name = %s", client_app_module_name)
     def _load() -> ClientApp:
         app: ClientApp = load_client_app(client_app_module_name)
         return app
-    # start backend
+    app_fn = _load
+    asyncio.run(
+        run(
+            app_fn,
+            backend_fn,
+            nodes_mapping,
+            state_factory,
+            node_states,
+            f_stop,
+        )
+    )

flwr/server/typing.py CHANGED Viewed

@@ -22,3 +22,4 @@ from flwr.common import Context
 from .driver import Driver
 ServerAppCallable = Callable[[Driver, Context], None]
+Workflow = Callable[[Driver, Context], None]

flwr/server/workflow/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Workflows."""
+from .default_workflows import DefaultWorkflow
+__all__ = [
+    "DefaultWorkflow",
+]

flwr-nightly 1.8.0.dev20240227__py3-none-any.whl → 1.8.0.dev20240229__py3-none-any.whl

flwr-nightly 1.8.0.dev20240227py3-none-any.whl → 1.8.0.dev20240229py3-none-any.whl