flwr-nightly 1.13.0.dev20241025__py3-none-any.whl → 1.13.0.dev20241029__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/common/date.py +3 -3
- flwr/common/logger.py +31 -0
- flwr/common/serde.py +22 -0
- flwr/proto/driver_pb2.py +24 -23
- flwr/proto/driver_pb2_grpc.py +69 -0
- flwr/proto/driver_pb2_grpc.pyi +27 -0
- flwr/proto/log_pb2.py +29 -0
- flwr/proto/log_pb2.pyi +39 -0
- flwr/proto/log_pb2_grpc.py +4 -0
- flwr/proto/log_pb2_grpc.pyi +4 -0
- flwr/server/app.py +10 -8
- flwr/server/driver/driver.py +14 -0
- flwr/server/driver/grpc_driver.py +8 -15
- flwr/server/driver/inmemory_driver.py +3 -11
- flwr/server/run_serverapp.py +3 -4
- flwr/server/serverapp/app.py +193 -18
- flwr/server/superlink/driver/driver_servicer.py +34 -1
- flwr/server/superlink/linkstate/in_memory_linkstate.py +28 -2
- flwr/server/superlink/linkstate/linkstate.py +35 -0
- flwr/server/superlink/linkstate/sqlite_linkstate.py +50 -0
- flwr/simulation/run_simulation.py +2 -1
- flwr/superexec/deployment.py +3 -37
- flwr/superexec/exec_servicer.py +5 -72
- flwr/superexec/executor.py +3 -4
- flwr/superexec/simulation.py +4 -7
- {flwr_nightly-1.13.0.dev20241025.dist-info → flwr_nightly-1.13.0.dev20241029.dist-info}/METADATA +1 -1
- {flwr_nightly-1.13.0.dev20241025.dist-info → flwr_nightly-1.13.0.dev20241029.dist-info}/RECORD +30 -26
- {flwr_nightly-1.13.0.dev20241025.dist-info → flwr_nightly-1.13.0.dev20241029.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241025.dist-info → flwr_nightly-1.13.0.dev20241029.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.13.0.dev20241025.dist-info → flwr_nightly-1.13.0.dev20241029.dist-info}/entry_points.txt +0 -0
flwr/server/run_serverapp.py
CHANGED
|
@@ -171,11 +171,11 @@ def run_server_app() -> None:
|
|
|
171
171
|
if app_path is None:
|
|
172
172
|
# User provided `--run-id`, but not `app_dir`
|
|
173
173
|
driver = GrpcDriver(
|
|
174
|
-
run_id=args.run_id,
|
|
175
174
|
driver_service_address=args.superlink,
|
|
176
175
|
root_certificates=root_certificates,
|
|
177
176
|
)
|
|
178
177
|
flwr_dir = get_flwr_dir(args.flwr_dir)
|
|
178
|
+
driver.init_run(args.run_id)
|
|
179
179
|
run_ = driver.run
|
|
180
180
|
if not run_.fab_hash:
|
|
181
181
|
raise ValueError("FAB hash not provided.")
|
|
@@ -193,7 +193,6 @@ def run_server_app() -> None:
|
|
|
193
193
|
# User provided `app_dir`, but not `--run-id`
|
|
194
194
|
# Create run if run_id is not provided
|
|
195
195
|
driver = GrpcDriver(
|
|
196
|
-
run_id=0, # Will be overwritten
|
|
197
196
|
driver_service_address=args.superlink,
|
|
198
197
|
root_certificates=root_certificates,
|
|
199
198
|
)
|
|
@@ -204,8 +203,8 @@ def run_server_app() -> None:
|
|
|
204
203
|
# Create run
|
|
205
204
|
req = CreateRunRequest(fab_id=fab_id, fab_version=fab_version)
|
|
206
205
|
res: CreateRunResponse = driver._stub.CreateRun(req) # pylint: disable=W0212
|
|
207
|
-
#
|
|
208
|
-
driver.
|
|
206
|
+
# Fetch full `Run` using `run_id`
|
|
207
|
+
driver.init_run(res.run_id) # pylint: disable=W0212
|
|
209
208
|
|
|
210
209
|
# Obtain server app reference and the run config
|
|
211
210
|
server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
|
flwr/server/serverapp/app.py
CHANGED
|
@@ -15,11 +15,39 @@
|
|
|
15
15
|
"""Flower ServerApp process."""
|
|
16
16
|
|
|
17
17
|
import argparse
|
|
18
|
-
|
|
18
|
+
import sys
|
|
19
|
+
from logging import DEBUG, ERROR, INFO, WARN
|
|
20
|
+
from os.path import isfile
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from time import sleep
|
|
19
23
|
from typing import Optional
|
|
20
24
|
|
|
25
|
+
from flwr.cli.config_utils import get_fab_metadata
|
|
26
|
+
from flwr.cli.install import install_from_fab
|
|
27
|
+
from flwr.common.config import (
|
|
28
|
+
get_flwr_dir,
|
|
29
|
+
get_fused_config_from_dir,
|
|
30
|
+
get_project_config,
|
|
31
|
+
get_project_dir,
|
|
32
|
+
)
|
|
33
|
+
from flwr.common.constant import Status, SubStatus
|
|
21
34
|
from flwr.common.logger import log
|
|
35
|
+
from flwr.common.serde import (
|
|
36
|
+
context_from_proto,
|
|
37
|
+
context_to_proto,
|
|
38
|
+
fab_from_proto,
|
|
39
|
+
run_from_proto,
|
|
40
|
+
run_status_to_proto,
|
|
41
|
+
)
|
|
42
|
+
from flwr.common.typing import RunStatus
|
|
43
|
+
from flwr.proto.driver_pb2 import ( # pylint: disable=E0611
|
|
44
|
+
PullServerAppInputsRequest,
|
|
45
|
+
PullServerAppInputsResponse,
|
|
46
|
+
PushServerAppOutputsRequest,
|
|
47
|
+
)
|
|
48
|
+
from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
|
|
22
49
|
from flwr.server.driver.grpc_driver import GrpcDriver
|
|
50
|
+
from flwr.server.run_serverapp import run as run_
|
|
23
51
|
|
|
24
52
|
|
|
25
53
|
def flwr_serverapp() -> None:
|
|
@@ -41,8 +69,35 @@ def flwr_serverapp() -> None:
|
|
|
41
69
|
help="Id of the Run this process should start. If not supplied, this "
|
|
42
70
|
"function will request a pending run to the LinkState.",
|
|
43
71
|
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--flwr-dir",
|
|
74
|
+
default=None,
|
|
75
|
+
help="""The path containing installed Flower Apps.
|
|
76
|
+
By default, this value is equal to:
|
|
77
|
+
|
|
78
|
+
- `$FLWR_HOME/` if `$FLWR_HOME` is defined
|
|
79
|
+
- `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
|
|
80
|
+
- `$HOME/.flwr/` in all other cases
|
|
81
|
+
""",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--insecure",
|
|
85
|
+
action="store_true",
|
|
86
|
+
help="Run the server without HTTPS, regardless of whether certificate "
|
|
87
|
+
"paths are provided. By default, the server runs with HTTPS enabled. "
|
|
88
|
+
"Use this flag only if you understand the risks.",
|
|
89
|
+
)
|
|
90
|
+
parser.add_argument(
|
|
91
|
+
"--root-certificates",
|
|
92
|
+
metavar="ROOT_CERT",
|
|
93
|
+
type=str,
|
|
94
|
+
help="Specifies the path to the PEM-encoded root certificate file for "
|
|
95
|
+
"establishing secure HTTPS connections.",
|
|
96
|
+
)
|
|
44
97
|
args = parser.parse_args()
|
|
45
98
|
|
|
99
|
+
certificates = _try_obtain_certificates(args)
|
|
100
|
+
|
|
46
101
|
log(
|
|
47
102
|
DEBUG,
|
|
48
103
|
"Staring isolated `ServerApp` connected to SuperLink DriverAPI at %s "
|
|
@@ -50,29 +105,149 @@ def flwr_serverapp() -> None:
|
|
|
50
105
|
args.superlink,
|
|
51
106
|
args.run_id,
|
|
52
107
|
)
|
|
53
|
-
run_serverapp(
|
|
108
|
+
run_serverapp(
|
|
109
|
+
superlink=args.superlink,
|
|
110
|
+
run_id=args.run_id,
|
|
111
|
+
flwr_dir_=args.flwr_dir,
|
|
112
|
+
certificates=certificates,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _try_obtain_certificates(
|
|
117
|
+
args: argparse.Namespace,
|
|
118
|
+
) -> Optional[bytes]:
|
|
54
119
|
|
|
120
|
+
if args.insecure:
|
|
121
|
+
if args.root_certificates is not None:
|
|
122
|
+
sys.exit(
|
|
123
|
+
"Conflicting options: The '--insecure' flag disables HTTPS, "
|
|
124
|
+
"but '--root-certificates' was also specified. Please remove "
|
|
125
|
+
"the '--root-certificates' option when running in insecure mode, "
|
|
126
|
+
"or omit '--insecure' to use HTTPS."
|
|
127
|
+
)
|
|
128
|
+
log(
|
|
129
|
+
WARN,
|
|
130
|
+
"Option `--insecure` was set. Starting insecure HTTP channel to %s.",
|
|
131
|
+
args.superlink,
|
|
132
|
+
)
|
|
133
|
+
root_certificates = None
|
|
134
|
+
else:
|
|
135
|
+
# Load the certificates if provided, or load the system certificates
|
|
136
|
+
if not isfile(args.root_certificates):
|
|
137
|
+
sys.exit("Path argument `--root-certificates` does not point to a file.")
|
|
138
|
+
root_certificates = Path(args.root_certificates).read_bytes()
|
|
139
|
+
log(
|
|
140
|
+
DEBUG,
|
|
141
|
+
"Starting secure HTTPS channel to %s "
|
|
142
|
+
"with the following certificates: %s.",
|
|
143
|
+
args.superlink,
|
|
144
|
+
args.root_certificates,
|
|
145
|
+
)
|
|
146
|
+
return root_certificates
|
|
55
147
|
|
|
56
|
-
|
|
148
|
+
|
|
149
|
+
def run_serverapp( # pylint: disable=R0914, disable=W0212
|
|
57
150
|
superlink: str,
|
|
58
151
|
run_id: Optional[int] = None,
|
|
152
|
+
flwr_dir_: Optional[str] = None,
|
|
153
|
+
certificates: Optional[bytes] = None,
|
|
59
154
|
) -> None:
|
|
60
|
-
"""Run Flower ServerApp process.
|
|
61
|
-
|
|
62
|
-
Parameters
|
|
63
|
-
----------
|
|
64
|
-
superlink : str
|
|
65
|
-
Address of SuperLink
|
|
66
|
-
run_id : Optional[int] (default: None)
|
|
67
|
-
Unique identifier of a Run registered at the LinkState. If not supplied,
|
|
68
|
-
this function will request a pending run to the LinkState.
|
|
69
|
-
"""
|
|
70
|
-
_ = GrpcDriver(
|
|
71
|
-
run_id=run_id if run_id else 0,
|
|
155
|
+
"""Run Flower ServerApp process."""
|
|
156
|
+
driver = GrpcDriver(
|
|
72
157
|
driver_service_address=superlink,
|
|
73
|
-
root_certificates=
|
|
158
|
+
root_certificates=certificates,
|
|
74
159
|
)
|
|
75
160
|
|
|
76
|
-
#
|
|
161
|
+
# Resolve directory where FABs are installed
|
|
162
|
+
flwr_dir = get_flwr_dir(flwr_dir_)
|
|
163
|
+
|
|
164
|
+
only_once = run_id is not None
|
|
165
|
+
|
|
166
|
+
while True:
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
# Pull ServerAppInputs from LinkState
|
|
170
|
+
req = (
|
|
171
|
+
PullServerAppInputsRequest(run_id=run_id)
|
|
172
|
+
if run_id
|
|
173
|
+
else PullServerAppInputsRequest()
|
|
174
|
+
)
|
|
175
|
+
res: PullServerAppInputsResponse = driver._stub.PullServerAppInputs(req)
|
|
176
|
+
if not res.HasField("run"):
|
|
177
|
+
sleep(3)
|
|
178
|
+
run_status = None
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
context = context_from_proto(res.context)
|
|
182
|
+
run = run_from_proto(res.run)
|
|
183
|
+
fab = fab_from_proto(res.fab)
|
|
184
|
+
|
|
185
|
+
driver.init_run(run.run_id)
|
|
186
|
+
|
|
187
|
+
log(DEBUG, "ServerApp process starts FAB installation.")
|
|
188
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
|
|
189
|
+
|
|
190
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
191
|
+
|
|
192
|
+
app_path = str(get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir))
|
|
193
|
+
config = get_project_config(app_path)
|
|
194
|
+
|
|
195
|
+
# Obtain server app reference and the run config
|
|
196
|
+
server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
|
|
197
|
+
server_app_run_config = get_fused_config_from_dir(
|
|
198
|
+
Path(app_path), run.override_config
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Update run_config in context
|
|
202
|
+
context.run_config = server_app_run_config
|
|
203
|
+
|
|
204
|
+
log(
|
|
205
|
+
DEBUG,
|
|
206
|
+
"Flower will load ServerApp `%s` in %s",
|
|
207
|
+
server_app_attr,
|
|
208
|
+
app_path,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Change status to Running
|
|
212
|
+
run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
|
|
213
|
+
driver._stub.UpdateRunStatus(
|
|
214
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Load and run the ServerApp with the Driver
|
|
218
|
+
updated_context = run_(
|
|
219
|
+
driver=driver,
|
|
220
|
+
server_app_dir=app_path,
|
|
221
|
+
server_app_attr=server_app_attr,
|
|
222
|
+
context=context,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Send resulting context
|
|
226
|
+
context_proto = context_to_proto(updated_context)
|
|
227
|
+
out_req = PushServerAppOutputsRequest(
|
|
228
|
+
run_id=run.run_id, context=context_proto
|
|
229
|
+
)
|
|
230
|
+
_ = driver._stub.PushServerAppOutputs(out_req)
|
|
231
|
+
|
|
232
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
|
|
233
|
+
|
|
234
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
235
|
+
exc_entity = "ServerApp"
|
|
236
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
237
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
238
|
+
|
|
239
|
+
finally:
|
|
240
|
+
if run_status:
|
|
241
|
+
run_status_proto = run_status_to_proto(run_status)
|
|
242
|
+
driver._stub.UpdateRunStatus(
|
|
243
|
+
UpdateRunStatusRequest(
|
|
244
|
+
run_id=run.run_id, run_status=run_status_proto
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Stop the loop if `flwr-serverapp` is expected to process a single run
|
|
249
|
+
if only_once:
|
|
250
|
+
break
|
|
77
251
|
|
|
78
|
-
|
|
252
|
+
# Reset the run_id
|
|
253
|
+
run_id = None
|
|
@@ -22,6 +22,7 @@ from typing import Optional
|
|
|
22
22
|
from uuid import UUID
|
|
23
23
|
|
|
24
24
|
import grpc
|
|
25
|
+
from google.protobuf.message import Message as GrpcMessage
|
|
25
26
|
|
|
26
27
|
from flwr.common.constant import Status
|
|
27
28
|
from flwr.common.logger import log
|
|
@@ -30,6 +31,7 @@ from flwr.common.serde import (
|
|
|
30
31
|
context_to_proto,
|
|
31
32
|
fab_from_proto,
|
|
32
33
|
fab_to_proto,
|
|
34
|
+
run_status_from_proto,
|
|
33
35
|
run_to_proto,
|
|
34
36
|
user_config_from_proto,
|
|
35
37
|
)
|
|
@@ -48,12 +50,18 @@ from flwr.proto.driver_pb2 import ( # pylint: disable=E0611
|
|
|
48
50
|
PushTaskInsResponse,
|
|
49
51
|
)
|
|
50
52
|
from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=E0611
|
|
53
|
+
from flwr.proto.log_pb2 import ( # pylint: disable=E0611
|
|
54
|
+
PushLogsRequest,
|
|
55
|
+
PushLogsResponse,
|
|
56
|
+
)
|
|
51
57
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
52
58
|
from flwr.proto.run_pb2 import ( # pylint: disable=E0611
|
|
53
59
|
CreateRunRequest,
|
|
54
60
|
CreateRunResponse,
|
|
55
61
|
GetRunRequest,
|
|
56
62
|
GetRunResponse,
|
|
63
|
+
UpdateRunStatusRequest,
|
|
64
|
+
UpdateRunStatusResponse,
|
|
57
65
|
)
|
|
58
66
|
from flwr.proto.task_pb2 import TaskRes # pylint: disable=E0611
|
|
59
67
|
from flwr.server.superlink.ffs.ffs import Ffs
|
|
@@ -212,7 +220,7 @@ class DriverServicer(driver_pb2_grpc.DriverServicer):
|
|
|
212
220
|
# Lock access to LinkState, preventing obtaining the same pending run_id
|
|
213
221
|
with self.lock:
|
|
214
222
|
# If run_id is provided, use it, otherwise use the pending run_id
|
|
215
|
-
if request
|
|
223
|
+
if _has_field(request, "run_id"):
|
|
216
224
|
run_id: Optional[int] = request.run_id
|
|
217
225
|
else:
|
|
218
226
|
run_id = state.get_pending_run_id()
|
|
@@ -252,7 +260,32 @@ class DriverServicer(driver_pb2_grpc.DriverServicer):
|
|
|
252
260
|
state.set_serverapp_context(request.run_id, context_from_proto(request.context))
|
|
253
261
|
return PushServerAppOutputsResponse()
|
|
254
262
|
|
|
263
|
+
def UpdateRunStatus(
|
|
264
|
+
self, request: UpdateRunStatusRequest, context: grpc.ServicerContext
|
|
265
|
+
) -> UpdateRunStatusResponse:
|
|
266
|
+
"""Update the status of a run."""
|
|
267
|
+
log(DEBUG, "ControlServicer.UpdateRunStatus")
|
|
268
|
+
state = self.state_factory.state()
|
|
269
|
+
|
|
270
|
+
# Update the run status
|
|
271
|
+
state.update_run_status(
|
|
272
|
+
run_id=request.run_id, new_status=run_status_from_proto(request.run_status)
|
|
273
|
+
)
|
|
274
|
+
return UpdateRunStatusResponse()
|
|
275
|
+
|
|
276
|
+
def PushLogs(
|
|
277
|
+
self, request: PushLogsRequest, context: grpc.ServicerContext
|
|
278
|
+
) -> PushLogsResponse:
|
|
279
|
+
"""Push logs."""
|
|
280
|
+
log(DEBUG, "DriverServicer.PushLogs")
|
|
281
|
+
raise NotImplementedError()
|
|
282
|
+
|
|
255
283
|
|
|
256
284
|
def _raise_if(validation_error: bool, detail: str) -> None:
|
|
257
285
|
if validation_error:
|
|
258
286
|
raise ValueError(f"Malformed PushTaskInsRequest: {detail}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _has_field(message: GrpcMessage, field_name: str) -> bool:
|
|
290
|
+
"""Check if a certain field is set for the message, including scalar fields."""
|
|
291
|
+
return field_name in {fld.name for fld, _ in message.ListFields()}
|
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
|
|
18
18
|
import threading
|
|
19
19
|
import time
|
|
20
|
-
from
|
|
20
|
+
from bisect import bisect_right
|
|
21
|
+
from dataclasses import dataclass, field
|
|
21
22
|
from logging import ERROR, WARNING
|
|
22
23
|
from typing import Optional
|
|
23
24
|
from uuid import UUID, uuid4
|
|
@@ -43,7 +44,7 @@ from .utils import (
|
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
@dataclass
|
|
46
|
-
class RunRecord:
|
|
47
|
+
class RunRecord: # pylint: disable=R0902
|
|
47
48
|
"""The record of a specific run, including its status and timestamps."""
|
|
48
49
|
|
|
49
50
|
run: Run
|
|
@@ -52,6 +53,8 @@ class RunRecord:
|
|
|
52
53
|
starting_at: str = ""
|
|
53
54
|
running_at: str = ""
|
|
54
55
|
finished_at: str = ""
|
|
56
|
+
logs: list[tuple[float, str]] = field(default_factory=list)
|
|
57
|
+
log_lock: threading.Lock = field(default_factory=threading.Lock)
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
@@ -511,3 +514,26 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
|
511
514
|
if run_id not in self.run_ids:
|
|
512
515
|
raise ValueError(f"Run {run_id} not found")
|
|
513
516
|
self.contexts[run_id] = context
|
|
517
|
+
|
|
518
|
+
def add_serverapp_log(self, run_id: int, log_message: str) -> None:
|
|
519
|
+
"""Add a log entry to the serverapp logs for the specified `run_id`."""
|
|
520
|
+
if run_id not in self.run_ids:
|
|
521
|
+
raise ValueError(f"Run {run_id} not found")
|
|
522
|
+
run = self.run_ids[run_id]
|
|
523
|
+
with run.log_lock:
|
|
524
|
+
run.logs.append((now().timestamp(), log_message))
|
|
525
|
+
|
|
526
|
+
def get_serverapp_log(
|
|
527
|
+
self, run_id: int, after_timestamp: Optional[float]
|
|
528
|
+
) -> tuple[str, float]:
|
|
529
|
+
"""Get the serverapp logs for the specified `run_id`."""
|
|
530
|
+
if run_id not in self.run_ids:
|
|
531
|
+
raise ValueError(f"Run {run_id} not found")
|
|
532
|
+
run = self.run_ids[run_id]
|
|
533
|
+
if after_timestamp is None:
|
|
534
|
+
after_timestamp = 0.0
|
|
535
|
+
with run.log_lock:
|
|
536
|
+
# Find the index where the timestamp would be inserted
|
|
537
|
+
index = bisect_right(run.logs, (after_timestamp, ""))
|
|
538
|
+
latest_timestamp = run.logs[-1][0] if index < len(run.logs) else 0.0
|
|
539
|
+
return "".join(log for _, log in run.logs[index:]), latest_timestamp
|
|
@@ -299,3 +299,38 @@ class LinkState(abc.ABC): # pylint: disable=R0904
|
|
|
299
299
|
context : Context
|
|
300
300
|
The context to be associated with the specified `run_id`.
|
|
301
301
|
"""
|
|
302
|
+
|
|
303
|
+
@abc.abstractmethod
|
|
304
|
+
def add_serverapp_log(self, run_id: int, log_message: str) -> None:
|
|
305
|
+
"""Add a log entry to the ServerApp logs for the specified `run_id`.
|
|
306
|
+
|
|
307
|
+
Parameters
|
|
308
|
+
----------
|
|
309
|
+
run_id : int
|
|
310
|
+
The identifier of the run for which to add a log entry.
|
|
311
|
+
log_message : str
|
|
312
|
+
The log entry to be added to the ServerApp logs.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
@abc.abstractmethod
|
|
316
|
+
def get_serverapp_log(
|
|
317
|
+
self, run_id: int, after_timestamp: Optional[float]
|
|
318
|
+
) -> tuple[str, float]:
|
|
319
|
+
"""Get the ServerApp logs for the specified `run_id`.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
run_id : int
|
|
324
|
+
The identifier of the run for which to retrieve the ServerApp logs.
|
|
325
|
+
|
|
326
|
+
after_timestamp : Optional[float]
|
|
327
|
+
Retrieve logs after this timestamp. If set to `None`, retrieve all logs.
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
tuple[str, float]
|
|
332
|
+
A tuple containing:
|
|
333
|
+
- The ServerApp logs associated with the specified `run_id`.
|
|
334
|
+
- The timestamp of the latest log entry in the returned logs.
|
|
335
|
+
Returns `0` if no logs are returned.
|
|
336
|
+
"""
|
|
@@ -99,6 +99,17 @@ CREATE TABLE IF NOT EXISTS run(
|
|
|
99
99
|
);
|
|
100
100
|
"""
|
|
101
101
|
|
|
102
|
+
SQL_CREATE_TABLE_LOGS = """
|
|
103
|
+
CREATE TABLE IF NOT EXISTS logs (
|
|
104
|
+
timestamp REAL,
|
|
105
|
+
run_id INTEGER,
|
|
106
|
+
node_id INTEGER,
|
|
107
|
+
log TEXT,
|
|
108
|
+
PRIMARY KEY (timestamp, run_id, node_id),
|
|
109
|
+
FOREIGN KEY (run_id) REFERENCES run(run_id)
|
|
110
|
+
);
|
|
111
|
+
"""
|
|
112
|
+
|
|
102
113
|
SQL_CREATE_TABLE_CONTEXT = """
|
|
103
114
|
CREATE TABLE IF NOT EXISTS context(
|
|
104
115
|
run_id INTEGER UNIQUE,
|
|
@@ -191,6 +202,7 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
|
191
202
|
|
|
192
203
|
# Create each table if not exists queries
|
|
193
204
|
cur.execute(SQL_CREATE_TABLE_RUN)
|
|
205
|
+
cur.execute(SQL_CREATE_TABLE_LOGS)
|
|
194
206
|
cur.execute(SQL_CREATE_TABLE_CONTEXT)
|
|
195
207
|
cur.execute(SQL_CREATE_TABLE_TASK_INS)
|
|
196
208
|
cur.execute(SQL_CREATE_TABLE_TASK_RES)
|
|
@@ -1015,6 +1027,44 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
|
1015
1027
|
except sqlite3.IntegrityError:
|
|
1016
1028
|
raise ValueError(f"Run {run_id} not found") from None
|
|
1017
1029
|
|
|
1030
|
+
def add_serverapp_log(self, run_id: int, log_message: str) -> None:
|
|
1031
|
+
"""Add a log entry to the ServerApp logs for the specified `run_id`."""
|
|
1032
|
+
# Convert the uint64 value to sint64 for SQLite
|
|
1033
|
+
sint64_run_id = convert_uint64_to_sint64(run_id)
|
|
1034
|
+
|
|
1035
|
+
# Store log
|
|
1036
|
+
try:
|
|
1037
|
+
query = """
|
|
1038
|
+
INSERT INTO logs (timestamp, run_id, node_id, log) VALUES (?, ?, ?, ?);
|
|
1039
|
+
"""
|
|
1040
|
+
self.query(query, (now().timestamp(), sint64_run_id, 0, log_message))
|
|
1041
|
+
except sqlite3.IntegrityError:
|
|
1042
|
+
raise ValueError(f"Run {run_id} not found") from None
|
|
1043
|
+
|
|
1044
|
+
def get_serverapp_log(
|
|
1045
|
+
self, run_id: int, after_timestamp: Optional[float]
|
|
1046
|
+
) -> tuple[str, float]:
|
|
1047
|
+
"""Get the ServerApp logs for the specified `run_id`."""
|
|
1048
|
+
# Convert the uint64 value to sint64 for SQLite
|
|
1049
|
+
sint64_run_id = convert_uint64_to_sint64(run_id)
|
|
1050
|
+
|
|
1051
|
+
# Check if the run_id exists
|
|
1052
|
+
query = "SELECT run_id FROM run WHERE run_id = ?;"
|
|
1053
|
+
if not self.query(query, (sint64_run_id,)):
|
|
1054
|
+
raise ValueError(f"Run {run_id} not found")
|
|
1055
|
+
|
|
1056
|
+
# Retrieve logs
|
|
1057
|
+
if after_timestamp is None:
|
|
1058
|
+
after_timestamp = 0.0
|
|
1059
|
+
query = """
|
|
1060
|
+
SELECT log, timestamp FROM logs
|
|
1061
|
+
WHERE run_id = ? AND node_id = ? AND timestamp > ?;
|
|
1062
|
+
"""
|
|
1063
|
+
rows = self.query(query, (sint64_run_id, 0, after_timestamp))
|
|
1064
|
+
rows.sort(key=lambda x: x["timestamp"])
|
|
1065
|
+
latest_timestamp = rows[-1]["timestamp"] if rows else 0.0
|
|
1066
|
+
return "".join(row["log"] for row in rows), latest_timestamp
|
|
1067
|
+
|
|
1018
1068
|
def get_valid_task_ins(self, task_id: str) -> Optional[dict[str, Any]]:
|
|
1019
1069
|
"""Check if the TaskIns exists and is valid (not expired).
|
|
1020
1070
|
|
|
@@ -421,7 +421,8 @@ def _main_loop(
|
|
|
421
421
|
server_app_run_config = {}
|
|
422
422
|
|
|
423
423
|
# Initialize Driver
|
|
424
|
-
driver = InMemoryDriver(
|
|
424
|
+
driver = InMemoryDriver(state_factory=state_factory)
|
|
425
|
+
driver.init_run(run_id=run.run_id)
|
|
425
426
|
|
|
426
427
|
# Get and run ServerApp thread
|
|
427
428
|
serverapp_th = run_serverapp_th(
|
flwr/superexec/deployment.py
CHANGED
|
@@ -15,14 +15,12 @@
|
|
|
15
15
|
"""Deployment engine executor."""
|
|
16
16
|
|
|
17
17
|
import hashlib
|
|
18
|
-
import subprocess
|
|
19
18
|
from logging import ERROR, INFO
|
|
20
19
|
from pathlib import Path
|
|
21
20
|
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from typing_extensions import override
|
|
24
23
|
|
|
25
|
-
from flwr.cli.install import install_from_fab
|
|
26
24
|
from flwr.common.constant import DRIVER_API_DEFAULT_ADDRESS
|
|
27
25
|
from flwr.common.logger import log
|
|
28
26
|
from flwr.common.typing import Fab, UserConfig
|
|
@@ -30,7 +28,7 @@ from flwr.server.superlink.ffs import Ffs
|
|
|
30
28
|
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
31
29
|
from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
|
|
32
30
|
|
|
33
|
-
from .executor import Executor
|
|
31
|
+
from .executor import Executor
|
|
34
32
|
|
|
35
33
|
|
|
36
34
|
class DeploymentEngine(Executor):
|
|
@@ -143,11 +141,9 @@ class DeploymentEngine(Executor):
|
|
|
143
141
|
fab_file: bytes,
|
|
144
142
|
override_config: UserConfig,
|
|
145
143
|
federation_config: UserConfig,
|
|
146
|
-
) -> Optional[
|
|
144
|
+
) -> Optional[int]:
|
|
147
145
|
"""Start run using the Flower Deployment Engine."""
|
|
148
146
|
try:
|
|
149
|
-
# Install FAB to flwr dir
|
|
150
|
-
install_from_fab(fab_file, None, True)
|
|
151
147
|
|
|
152
148
|
# Call SuperLink to create run
|
|
153
149
|
run_id: int = self._create_run(
|
|
@@ -155,37 +151,7 @@ class DeploymentEngine(Executor):
|
|
|
155
151
|
)
|
|
156
152
|
log(INFO, "Created run %s", str(run_id))
|
|
157
153
|
|
|
158
|
-
|
|
159
|
-
"flower-server-app",
|
|
160
|
-
"--run-id",
|
|
161
|
-
str(run_id),
|
|
162
|
-
"--superlink",
|
|
163
|
-
str(self.superlink),
|
|
164
|
-
]
|
|
165
|
-
|
|
166
|
-
if self.flwr_dir:
|
|
167
|
-
command.append("--flwr-dir")
|
|
168
|
-
command.append(self.flwr_dir)
|
|
169
|
-
|
|
170
|
-
if self.root_certificates is None:
|
|
171
|
-
command.append("--insecure")
|
|
172
|
-
else:
|
|
173
|
-
command.append("--root-certificates")
|
|
174
|
-
command.append(self.root_certificates)
|
|
175
|
-
|
|
176
|
-
# Execute the command
|
|
177
|
-
proc = subprocess.Popen( # pylint: disable=consider-using-with
|
|
178
|
-
command,
|
|
179
|
-
stdout=subprocess.PIPE,
|
|
180
|
-
stderr=subprocess.PIPE,
|
|
181
|
-
text=True,
|
|
182
|
-
)
|
|
183
|
-
log(INFO, "Started run %s", str(run_id))
|
|
184
|
-
|
|
185
|
-
return RunTracker(
|
|
186
|
-
run_id=run_id,
|
|
187
|
-
proc=proc,
|
|
188
|
-
)
|
|
154
|
+
return run_id
|
|
189
155
|
# pylint: disable-next=broad-except
|
|
190
156
|
except Exception as e:
|
|
191
157
|
log(ERROR, "Could not start run: %s", str(e))
|