experimaestro 2.0.0b8__py3-none-any.whl → 2.0.0b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +12 -5
- experimaestro/cli/__init__.py +239 -126
- experimaestro/cli/filter.py +48 -23
- experimaestro/cli/jobs.py +253 -71
- experimaestro/cli/refactor.py +1 -2
- experimaestro/commandline.py +7 -4
- experimaestro/connectors/__init__.py +9 -1
- experimaestro/connectors/local.py +43 -3
- experimaestro/core/arguments.py +18 -18
- experimaestro/core/identifier.py +11 -11
- experimaestro/core/objects/config.py +96 -39
- experimaestro/core/objects/config_walk.py +3 -3
- experimaestro/core/{subparameters.py → partial.py} +16 -16
- experimaestro/core/partial_lock.py +394 -0
- experimaestro/core/types.py +12 -15
- experimaestro/dynamic.py +290 -0
- experimaestro/experiments/__init__.py +6 -2
- experimaestro/experiments/cli.py +217 -50
- experimaestro/experiments/configuration.py +24 -0
- experimaestro/generators.py +5 -5
- experimaestro/ipc.py +118 -1
- experimaestro/launcherfinder/__init__.py +2 -2
- experimaestro/launcherfinder/registry.py +6 -7
- experimaestro/launcherfinder/specs.py +2 -9
- experimaestro/launchers/slurm/__init__.py +2 -2
- experimaestro/launchers/slurm/base.py +62 -0
- experimaestro/locking.py +957 -1
- experimaestro/notifications.py +89 -201
- experimaestro/progress.py +63 -366
- experimaestro/rpyc.py +0 -2
- experimaestro/run.py +29 -2
- experimaestro/scheduler/__init__.py +8 -1
- experimaestro/scheduler/base.py +629 -53
- experimaestro/scheduler/dependencies.py +20 -16
- experimaestro/scheduler/experiment.py +732 -167
- experimaestro/scheduler/interfaces.py +316 -101
- experimaestro/scheduler/jobs.py +58 -20
- experimaestro/scheduler/remote/adaptive_sync.py +265 -0
- experimaestro/scheduler/remote/client.py +171 -117
- experimaestro/scheduler/remote/protocol.py +8 -193
- experimaestro/scheduler/remote/server.py +95 -71
- experimaestro/scheduler/services.py +53 -28
- experimaestro/scheduler/state_provider.py +663 -2430
- experimaestro/scheduler/state_status.py +1247 -0
- experimaestro/scheduler/transient.py +31 -0
- experimaestro/scheduler/workspace.py +1 -1
- experimaestro/scheduler/workspace_state_provider.py +1273 -0
- experimaestro/scriptbuilder.py +4 -4
- experimaestro/settings.py +36 -0
- experimaestro/tests/conftest.py +33 -5
- experimaestro/tests/connectors/bin/executable.py +1 -1
- experimaestro/tests/fixtures/pre_experiment/experiment_check_env.py +16 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_check_mock.py +14 -0
- experimaestro/tests/fixtures/pre_experiment/experiment_simple.py +12 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_env.py +5 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_error.py +3 -0
- experimaestro/tests/fixtures/pre_experiment/pre_setup_mock.py +8 -0
- experimaestro/tests/launchers/bin/test.py +1 -0
- experimaestro/tests/launchers/test_slurm.py +9 -9
- experimaestro/tests/partial_reschedule.py +46 -0
- experimaestro/tests/restart.py +3 -3
- experimaestro/tests/restart_main.py +1 -0
- experimaestro/tests/scripts/notifyandwait.py +1 -0
- experimaestro/tests/task_partial.py +38 -0
- experimaestro/tests/task_tokens.py +2 -2
- experimaestro/tests/tasks/test_dynamic.py +6 -6
- experimaestro/tests/test_dependencies.py +3 -3
- experimaestro/tests/test_deprecated.py +15 -15
- experimaestro/tests/test_dynamic_locking.py +317 -0
- experimaestro/tests/test_environment.py +24 -14
- experimaestro/tests/test_experiment.py +171 -36
- experimaestro/tests/test_identifier.py +25 -25
- experimaestro/tests/test_identifier_stability.py +3 -5
- experimaestro/tests/test_multitoken.py +2 -4
- experimaestro/tests/{test_subparameters.py → test_partial.py} +25 -25
- experimaestro/tests/test_partial_paths.py +81 -138
- experimaestro/tests/test_pre_experiment.py +219 -0
- experimaestro/tests/test_progress.py +2 -8
- experimaestro/tests/test_remote_state.py +560 -99
- experimaestro/tests/test_stray_jobs.py +261 -0
- experimaestro/tests/test_tasks.py +1 -2
- experimaestro/tests/test_token_locking.py +52 -67
- experimaestro/tests/test_tokens.py +5 -6
- experimaestro/tests/test_transient.py +225 -0
- experimaestro/tests/test_workspace_state_provider.py +768 -0
- experimaestro/tests/token_reschedule.py +1 -3
- experimaestro/tests/utils.py +2 -7
- experimaestro/tokens.py +227 -372
- experimaestro/tools/diff.py +1 -0
- experimaestro/tools/documentation.py +4 -5
- experimaestro/tools/jobs.py +1 -2
- experimaestro/tui/app.py +438 -1966
- experimaestro/tui/app.tcss +162 -0
- experimaestro/tui/dialogs.py +172 -0
- experimaestro/tui/log_viewer.py +253 -3
- experimaestro/tui/messages.py +137 -0
- experimaestro/tui/utils.py +54 -0
- experimaestro/tui/widgets/__init__.py +23 -0
- experimaestro/tui/widgets/experiments.py +468 -0
- experimaestro/tui/widgets/global_services.py +238 -0
- experimaestro/tui/widgets/jobs.py +972 -0
- experimaestro/tui/widgets/log.py +156 -0
- experimaestro/tui/widgets/orphans.py +363 -0
- experimaestro/tui/widgets/runs.py +185 -0
- experimaestro/tui/widgets/services.py +314 -0
- experimaestro/tui/widgets/stray_jobs.py +528 -0
- experimaestro/utils/__init__.py +1 -1
- experimaestro/utils/environment.py +105 -22
- experimaestro/utils/fswatcher.py +124 -0
- experimaestro/utils/jobs.py +1 -2
- experimaestro/utils/jupyter.py +1 -2
- experimaestro/utils/logging.py +72 -0
- experimaestro/version.py +2 -2
- experimaestro/webui/__init__.py +9 -0
- experimaestro/webui/app.py +117 -0
- experimaestro/{server → webui}/data/index.css +66 -11
- experimaestro/webui/data/index.css.map +1 -0
- experimaestro/{server → webui}/data/index.js +82763 -87217
- experimaestro/webui/data/index.js.map +1 -0
- experimaestro/webui/routes/__init__.py +5 -0
- experimaestro/webui/routes/auth.py +53 -0
- experimaestro/webui/routes/proxy.py +117 -0
- experimaestro/webui/server.py +200 -0
- experimaestro/webui/state_bridge.py +152 -0
- experimaestro/webui/websocket.py +413 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/METADATA +5 -6
- experimaestro-2.0.0b17.dist-info/RECORD +219 -0
- experimaestro/cli/progress.py +0 -269
- experimaestro/scheduler/state.py +0 -75
- experimaestro/scheduler/state_db.py +0 -437
- experimaestro/scheduler/state_sync.py +0 -891
- experimaestro/server/__init__.py +0 -467
- experimaestro/server/data/index.css.map +0 -1
- experimaestro/server/data/index.js.map +0 -1
- experimaestro/tests/test_cli_jobs.py +0 -615
- experimaestro/tests/test_file_progress.py +0 -425
- experimaestro/tests/test_file_progress_integration.py +0 -477
- experimaestro/tests/test_state_db.py +0 -434
- experimaestro-2.0.0b8.dist-info/RECORD +0 -187
- /experimaestro/{server → webui}/data/1815e00441357e01619e.ttf +0 -0
- /experimaestro/{server → webui}/data/2463b90d9a316e4e5294.woff2 +0 -0
- /experimaestro/{server → webui}/data/2582b0e4bcf85eceead0.ttf +0 -0
- /experimaestro/{server → webui}/data/89999bdf5d835c012025.woff2 +0 -0
- /experimaestro/{server → webui}/data/914997e1bdfc990d0897.ttf +0 -0
- /experimaestro/{server → webui}/data/c210719e60948b211a12.woff2 +0 -0
- /experimaestro/{server → webui}/data/favicon.ico +0 -0
- /experimaestro/{server → webui}/data/index.html +0 -0
- /experimaestro/{server → webui}/data/login.html +0 -0
- /experimaestro/{server → webui}/data/manifest.json +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b8.dist-info → experimaestro-2.0.0b17.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,11 +12,7 @@ import logging
|
|
|
12
12
|
from dataclasses import dataclass, field
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
from enum import Enum
|
|
15
|
-
from
|
|
16
|
-
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from experimaestro.scheduler.interfaces import JobState
|
|
15
|
+
from typing import Any, Dict, Optional, Union
|
|
20
16
|
|
|
21
17
|
logger = logging.getLogger("xpm.remote.protocol")
|
|
22
18
|
|
|
@@ -40,10 +36,10 @@ TIMEOUT_ERROR = -32004
|
|
|
40
36
|
class NotificationMethod(str, Enum):
|
|
41
37
|
"""Server-to-client notification methods"""
|
|
42
38
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
# Generic state event notification (serialized dataclass)
|
|
40
|
+
STATE_EVENT = "notification.state_event"
|
|
41
|
+
|
|
42
|
+
# Control notifications
|
|
47
43
|
FILE_CHANGED = "notification.file_changed"
|
|
48
44
|
SHUTDOWN = "notification.shutdown"
|
|
49
45
|
|
|
@@ -58,9 +54,12 @@ class RPCMethod(str, Enum):
|
|
|
58
54
|
GET_JOB = "get_job"
|
|
59
55
|
GET_ALL_JOBS = "get_all_jobs"
|
|
60
56
|
GET_SERVICES = "get_services"
|
|
57
|
+
GET_TAGS_MAP = "get_tags_map"
|
|
58
|
+
GET_DEPENDENCIES_MAP = "get_dependencies_map"
|
|
61
59
|
KILL_JOB = "kill_job"
|
|
62
60
|
CLEAN_JOB = "clean_job"
|
|
63
61
|
GET_SYNC_INFO = "get_sync_info"
|
|
62
|
+
GET_PROCESS_INFO = "get_process_info"
|
|
64
63
|
|
|
65
64
|
|
|
66
65
|
@dataclass
|
|
@@ -281,187 +280,3 @@ def deserialize_datetime(s: Optional[str]) -> Optional[datetime]:
|
|
|
281
280
|
if s is None:
|
|
282
281
|
return None
|
|
283
282
|
return datetime.fromisoformat(s)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
def serialize_job(job) -> Dict:
|
|
287
|
-
"""Serialize a job (MockJob or Job) to a dictionary for JSON-RPC"""
|
|
288
|
-
from experimaestro.scheduler.interfaces import JobState
|
|
289
|
-
|
|
290
|
-
result = {
|
|
291
|
-
"identifier": job.identifier,
|
|
292
|
-
"task_id": job.task_id,
|
|
293
|
-
"locator": job.locator,
|
|
294
|
-
"path": str(job.path) if job.path else None,
|
|
295
|
-
"state": job.state.name if isinstance(job.state, JobState) else str(job.state),
|
|
296
|
-
"submittime": serialize_datetime(job.submittime),
|
|
297
|
-
"starttime": serialize_datetime(job.starttime),
|
|
298
|
-
"endtime": serialize_datetime(job.endtime),
|
|
299
|
-
"progress": job.progress,
|
|
300
|
-
"tags": job.tags,
|
|
301
|
-
"experiment_id": getattr(job, "experiment_id", None),
|
|
302
|
-
"run_id": getattr(job, "run_id", None),
|
|
303
|
-
}
|
|
304
|
-
return result
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
def deserialize_job(d: Dict) -> "MockJobData":
|
|
308
|
-
"""Deserialize a dictionary to MockJobData"""
|
|
309
|
-
from experimaestro.scheduler.interfaces import JobState, STATE_NAME_TO_JOBSTATE
|
|
310
|
-
from pathlib import Path
|
|
311
|
-
|
|
312
|
-
state = STATE_NAME_TO_JOBSTATE.get(d["state"], JobState.WAITING)
|
|
313
|
-
return MockJobData(
|
|
314
|
-
identifier=d["identifier"],
|
|
315
|
-
task_id=d["task_id"],
|
|
316
|
-
locator=d["locator"],
|
|
317
|
-
path=Path(d["path"]) if d["path"] else None,
|
|
318
|
-
state=state,
|
|
319
|
-
submittime=deserialize_datetime(d.get("submittime")),
|
|
320
|
-
starttime=deserialize_datetime(d.get("starttime")),
|
|
321
|
-
endtime=deserialize_datetime(d.get("endtime")),
|
|
322
|
-
progress=d.get("progress"),
|
|
323
|
-
tags=d.get("tags", {}),
|
|
324
|
-
experiment_id=d.get("experiment_id"),
|
|
325
|
-
run_id=d.get("run_id"),
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def serialize_experiment(experiment) -> Dict:
|
|
330
|
-
"""Serialize a MockExperiment to a dictionary for JSON-RPC"""
|
|
331
|
-
result = {
|
|
332
|
-
"experiment_id": experiment.experiment_id,
|
|
333
|
-
"workdir": str(experiment.workdir) if experiment.workdir else None,
|
|
334
|
-
"current_run_id": experiment.current_run_id,
|
|
335
|
-
"total_jobs": experiment.total_jobs,
|
|
336
|
-
"finished_jobs": experiment.finished_jobs,
|
|
337
|
-
"failed_jobs": experiment.failed_jobs,
|
|
338
|
-
"updated_at": serialize_datetime(experiment.updated_at),
|
|
339
|
-
"started_at": serialize_datetime(experiment.started_at),
|
|
340
|
-
"ended_at": serialize_datetime(experiment.ended_at),
|
|
341
|
-
"hostname": experiment.hostname,
|
|
342
|
-
}
|
|
343
|
-
return result
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
def deserialize_experiment(d: Dict) -> "MockExperimentData":
|
|
347
|
-
"""Deserialize a dictionary to MockExperimentData"""
|
|
348
|
-
from pathlib import Path
|
|
349
|
-
|
|
350
|
-
return MockExperimentData(
|
|
351
|
-
experiment_id=d["experiment_id"],
|
|
352
|
-
workdir=Path(d["workdir"]) if d["workdir"] else None,
|
|
353
|
-
current_run_id=d.get("current_run_id"),
|
|
354
|
-
total_jobs=d.get("total_jobs", 0),
|
|
355
|
-
finished_jobs=d.get("finished_jobs", 0),
|
|
356
|
-
failed_jobs=d.get("failed_jobs", 0),
|
|
357
|
-
updated_at=deserialize_datetime(d.get("updated_at")),
|
|
358
|
-
started_at=deserialize_datetime(d.get("started_at")),
|
|
359
|
-
ended_at=deserialize_datetime(d.get("ended_at")),
|
|
360
|
-
hostname=d.get("hostname"),
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def serialize_service(service) -> Dict:
|
|
365
|
-
"""Serialize a service to a dictionary for JSON-RPC"""
|
|
366
|
-
from experimaestro.scheduler.services import Service
|
|
367
|
-
|
|
368
|
-
# Service has id attribute, description() method, state property, state_dict() method
|
|
369
|
-
state = service.state
|
|
370
|
-
if hasattr(state, "name"):
|
|
371
|
-
state = state.name # Convert ServiceState enum to string
|
|
372
|
-
elif hasattr(state, "value"):
|
|
373
|
-
state = state.value
|
|
374
|
-
|
|
375
|
-
# Get URL if service has it (e.g., TensorboardService)
|
|
376
|
-
url = None
|
|
377
|
-
if hasattr(service, "url"):
|
|
378
|
-
url = service.url
|
|
379
|
-
elif hasattr(service, "get_url"):
|
|
380
|
-
try:
|
|
381
|
-
url = service.get_url()
|
|
382
|
-
except Exception:
|
|
383
|
-
pass
|
|
384
|
-
|
|
385
|
-
# Get state_dict with __class__ and serialize paths
|
|
386
|
-
if hasattr(service, "_full_state_dict"):
|
|
387
|
-
state_dict = Service.serialize_state_dict(service._full_state_dict())
|
|
388
|
-
elif callable(getattr(service, "state_dict", None)):
|
|
389
|
-
# Fallback: serialize paths in the raw state_dict
|
|
390
|
-
state_dict = Service.serialize_state_dict(service.state_dict())
|
|
391
|
-
else:
|
|
392
|
-
state_dict = getattr(service, "state_dict", {})
|
|
393
|
-
|
|
394
|
-
return {
|
|
395
|
-
"service_id": getattr(service, "id", None),
|
|
396
|
-
"description": (
|
|
397
|
-
service.description()
|
|
398
|
-
if callable(service.description)
|
|
399
|
-
else service.description
|
|
400
|
-
),
|
|
401
|
-
"state": state,
|
|
402
|
-
"state_dict": state_dict,
|
|
403
|
-
"experiment_id": getattr(service, "experiment_id", None),
|
|
404
|
-
"run_id": getattr(service, "run_id", None),
|
|
405
|
-
"url": url,
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
def serialize_run(run) -> Dict:
|
|
410
|
-
"""Serialize an experiment run to a dictionary for JSON-RPC
|
|
411
|
-
|
|
412
|
-
Handles both dictionary and object inputs (get_experiment_runs returns dicts).
|
|
413
|
-
"""
|
|
414
|
-
if isinstance(run, dict):
|
|
415
|
-
# Already a dictionary - just ensure datetime serialization
|
|
416
|
-
return {
|
|
417
|
-
"run_id": run.get("run_id"),
|
|
418
|
-
"experiment_id": run.get("experiment_id"),
|
|
419
|
-
"hostname": run.get("hostname"),
|
|
420
|
-
"started_at": run.get("started_at"), # Already serialized
|
|
421
|
-
"ended_at": run.get("ended_at"), # Already serialized
|
|
422
|
-
"status": run.get("status"),
|
|
423
|
-
}
|
|
424
|
-
else:
|
|
425
|
-
# Object with attributes
|
|
426
|
-
return {
|
|
427
|
-
"run_id": run.run_id,
|
|
428
|
-
"experiment_id": run.experiment_id,
|
|
429
|
-
"hostname": getattr(run, "hostname", None),
|
|
430
|
-
"started_at": serialize_datetime(run.started_at),
|
|
431
|
-
"ended_at": serialize_datetime(run.ended_at),
|
|
432
|
-
"status": run.status,
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
@dataclass
|
|
437
|
-
class MockJobData:
|
|
438
|
-
"""Deserialized job data from remote"""
|
|
439
|
-
|
|
440
|
-
identifier: str
|
|
441
|
-
task_id: str
|
|
442
|
-
locator: str
|
|
443
|
-
path: Optional["Path"]
|
|
444
|
-
state: "JobState"
|
|
445
|
-
submittime: Optional[datetime]
|
|
446
|
-
starttime: Optional[datetime]
|
|
447
|
-
endtime: Optional[datetime]
|
|
448
|
-
progress: Optional[float]
|
|
449
|
-
tags: Dict
|
|
450
|
-
experiment_id: Optional[str]
|
|
451
|
-
run_id: Optional[str]
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
@dataclass
|
|
455
|
-
class MockExperimentData:
|
|
456
|
-
"""Deserialized experiment data from remote"""
|
|
457
|
-
|
|
458
|
-
experiment_id: str
|
|
459
|
-
workdir: Optional["Path"]
|
|
460
|
-
current_run_id: Optional[str]
|
|
461
|
-
total_jobs: int
|
|
462
|
-
finished_jobs: int
|
|
463
|
-
failed_jobs: int
|
|
464
|
-
updated_at: Optional[datetime]
|
|
465
|
-
started_at: Optional[datetime]
|
|
466
|
-
ended_at: Optional[datetime]
|
|
467
|
-
hostname: Optional[str]
|
|
@@ -13,11 +13,10 @@ import threading
|
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
from typing import IO, Callable, Dict, Optional
|
|
15
15
|
|
|
16
|
-
from experimaestro.scheduler.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
)
|
|
16
|
+
from experimaestro.scheduler.workspace_state_provider import WorkspaceStateProvider
|
|
17
|
+
from dataclasses import asdict
|
|
18
|
+
|
|
19
|
+
from experimaestro.scheduler.state_status import EventBase
|
|
21
20
|
from experimaestro.scheduler.remote.protocol import (
|
|
22
21
|
RPCMethod,
|
|
23
22
|
NotificationMethod,
|
|
@@ -25,9 +24,6 @@ from experimaestro.scheduler.remote.protocol import (
|
|
|
25
24
|
create_success_response,
|
|
26
25
|
create_error_response,
|
|
27
26
|
create_notification,
|
|
28
|
-
serialize_job,
|
|
29
|
-
serialize_experiment,
|
|
30
|
-
serialize_run,
|
|
31
27
|
serialize_datetime,
|
|
32
28
|
deserialize_datetime,
|
|
33
29
|
PARSE_ERROR,
|
|
@@ -82,9 +78,12 @@ class SSHStateProviderServer:
|
|
|
82
78
|
RPCMethod.GET_JOB.value: self._handle_get_job,
|
|
83
79
|
RPCMethod.GET_ALL_JOBS.value: self._handle_get_all_jobs,
|
|
84
80
|
RPCMethod.GET_SERVICES.value: self._handle_get_services,
|
|
81
|
+
RPCMethod.GET_TAGS_MAP.value: self._handle_get_tags_map,
|
|
82
|
+
RPCMethod.GET_DEPENDENCIES_MAP.value: self._handle_get_dependencies_map,
|
|
85
83
|
RPCMethod.KILL_JOB.value: self._handle_kill_job,
|
|
86
84
|
RPCMethod.CLEAN_JOB.value: self._handle_clean_job,
|
|
87
85
|
RPCMethod.GET_SYNC_INFO.value: self._handle_get_sync_info,
|
|
86
|
+
RPCMethod.GET_PROCESS_INFO.value: self._handle_get_process_info,
|
|
88
87
|
}
|
|
89
88
|
|
|
90
89
|
def start(self):
|
|
@@ -101,12 +100,10 @@ class SSHStateProviderServer:
|
|
|
101
100
|
)
|
|
102
101
|
return
|
|
103
102
|
|
|
104
|
-
# Initialize state provider in read-only mode
|
|
103
|
+
# Initialize state provider in read-only mode with event watcher
|
|
105
104
|
try:
|
|
106
105
|
self._state_provider = WorkspaceStateProvider.get_instance(
|
|
107
|
-
self.workspace_path
|
|
108
|
-
read_only=True,
|
|
109
|
-
sync_on_start=True,
|
|
106
|
+
self.workspace_path
|
|
110
107
|
)
|
|
111
108
|
except Exception as e:
|
|
112
109
|
logger.exception("Failed to initialize state provider")
|
|
@@ -231,60 +228,34 @@ class SSHStateProviderServer:
|
|
|
231
228
|
{"reason": "error", "code": code, "message": message},
|
|
232
229
|
)
|
|
233
230
|
|
|
234
|
-
def _on_state_event(self, event:
|
|
231
|
+
def _on_state_event(self, event: EventBase):
|
|
235
232
|
"""Handle state change events from the state provider
|
|
236
233
|
|
|
237
234
|
Converts events to JSON-RPC notifications and sends them to the client.
|
|
235
|
+
Uses generic serialization via dataclasses.asdict.
|
|
238
236
|
"""
|
|
239
237
|
try:
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
"job_id": event.data.get("job_id"),
|
|
262
|
-
"experiment_id": event.data.get("experiment_id"),
|
|
263
|
-
"run_id": event.data.get("run_id"),
|
|
264
|
-
"state": event.data.get("state"),
|
|
265
|
-
"data": event.data,
|
|
266
|
-
},
|
|
267
|
-
)
|
|
268
|
-
# Also send file_changed notification for job metadata
|
|
269
|
-
if "path" in event.data and event.data["path"]:
|
|
270
|
-
self._send_notification(
|
|
271
|
-
NotificationMethod.FILE_CHANGED,
|
|
272
|
-
{
|
|
273
|
-
"path": f"{event.data['path']}/.experimaestro/",
|
|
274
|
-
"change_type": "modified",
|
|
275
|
-
},
|
|
276
|
-
)
|
|
277
|
-
elif event.event_type == StateEventType.SERVICE_UPDATED:
|
|
278
|
-
self._send_notification(
|
|
279
|
-
NotificationMethod.SERVICE_UPDATED,
|
|
280
|
-
{
|
|
281
|
-
"service_id": event.data.get("service_id"),
|
|
282
|
-
"experiment_id": event.data.get("experiment_id"),
|
|
283
|
-
"run_id": event.data.get("run_id"),
|
|
284
|
-
"state": event.data.get("state"),
|
|
285
|
-
"data": event.data,
|
|
286
|
-
},
|
|
287
|
-
)
|
|
238
|
+
# Serialize event to dict, filtering out None values and non-serializable objects
|
|
239
|
+
event_dict = {}
|
|
240
|
+
for key, value in asdict(event).items():
|
|
241
|
+
# Skip None values and complex objects (like job references)
|
|
242
|
+
if value is not None and not isinstance(value, (Path,)):
|
|
243
|
+
# Try to serialize - skip if not JSON-serializable
|
|
244
|
+
try:
|
|
245
|
+
import json
|
|
246
|
+
|
|
247
|
+
json.dumps(value)
|
|
248
|
+
event_dict[key] = value
|
|
249
|
+
except (TypeError, ValueError):
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
self._send_notification(
|
|
253
|
+
NotificationMethod.STATE_EVENT,
|
|
254
|
+
{
|
|
255
|
+
"event_type": type(event).__name__,
|
|
256
|
+
"data": event_dict,
|
|
257
|
+
},
|
|
258
|
+
)
|
|
288
259
|
except Exception as e:
|
|
289
260
|
logger.exception("Error sending notification: %s", e)
|
|
290
261
|
|
|
@@ -296,7 +267,7 @@ class SSHStateProviderServer:
|
|
|
296
267
|
"""Handle get_experiments request"""
|
|
297
268
|
since = deserialize_datetime(params.get("since"))
|
|
298
269
|
experiments = self._state_provider.get_experiments(since=since)
|
|
299
|
-
return [
|
|
270
|
+
return [exp.state_dict() for exp in experiments]
|
|
300
271
|
|
|
301
272
|
def _handle_get_experiment(self, params: Dict) -> Optional[Dict]:
|
|
302
273
|
"""Handle get_experiment request"""
|
|
@@ -307,7 +278,7 @@ class SSHStateProviderServer:
|
|
|
307
278
|
experiment = self._state_provider.get_experiment(experiment_id)
|
|
308
279
|
if experiment is None:
|
|
309
280
|
return None
|
|
310
|
-
return
|
|
281
|
+
return experiment.state_dict()
|
|
311
282
|
|
|
312
283
|
def _handle_get_experiment_runs(self, params: Dict) -> list:
|
|
313
284
|
"""Handle get_experiment_runs request"""
|
|
@@ -316,7 +287,7 @@ class SSHStateProviderServer:
|
|
|
316
287
|
raise TypeError("experiment_id is required")
|
|
317
288
|
|
|
318
289
|
runs = self._state_provider.get_experiment_runs(experiment_id)
|
|
319
|
-
return [
|
|
290
|
+
return [run.state_dict() for run in runs]
|
|
320
291
|
|
|
321
292
|
def _handle_get_jobs(self, params: Dict) -> list:
|
|
322
293
|
"""Handle get_jobs request"""
|
|
@@ -329,7 +300,7 @@ class SSHStateProviderServer:
|
|
|
329
300
|
tags=params.get("tags"),
|
|
330
301
|
since=since,
|
|
331
302
|
)
|
|
332
|
-
return [
|
|
303
|
+
return [job.state_dict() for job in jobs]
|
|
333
304
|
|
|
334
305
|
def _handle_get_job(self, params: Dict) -> Optional[Dict]:
|
|
335
306
|
"""Handle get_job request"""
|
|
@@ -345,7 +316,7 @@ class SSHStateProviderServer:
|
|
|
345
316
|
)
|
|
346
317
|
if job is None:
|
|
347
318
|
return None
|
|
348
|
-
return
|
|
319
|
+
return job.state_dict()
|
|
349
320
|
|
|
350
321
|
def _handle_get_all_jobs(self, params: Dict) -> list:
|
|
351
322
|
"""Handle get_all_jobs request"""
|
|
@@ -355,19 +326,46 @@ class SSHStateProviderServer:
|
|
|
355
326
|
tags=params.get("tags"),
|
|
356
327
|
since=since,
|
|
357
328
|
)
|
|
358
|
-
return [
|
|
329
|
+
return [job.state_dict() for job in jobs]
|
|
359
330
|
|
|
360
331
|
def _handle_get_services(self, params: Dict) -> list:
|
|
361
332
|
"""Handle get_services request
|
|
362
333
|
|
|
363
|
-
|
|
364
|
-
recreate Service objects. This allows the client to handle module
|
|
365
|
-
loading and show appropriate error messages.
|
|
334
|
+
Returns serialized service data using full_state_dict().
|
|
366
335
|
"""
|
|
367
|
-
|
|
336
|
+
services = self._state_provider.get_services(
|
|
368
337
|
experiment_id=params.get("experiment_id"),
|
|
369
338
|
run_id=params.get("run_id"),
|
|
370
339
|
)
|
|
340
|
+
return [svc.full_state_dict() for svc in services]
|
|
341
|
+
|
|
342
|
+
def _handle_get_tags_map(self, params: Dict) -> Dict[str, Dict[str, str]]:
|
|
343
|
+
"""Handle get_tags_map request
|
|
344
|
+
|
|
345
|
+
Returns tags map for jobs in an experiment/run.
|
|
346
|
+
"""
|
|
347
|
+
experiment_id = params.get("experiment_id")
|
|
348
|
+
if not experiment_id:
|
|
349
|
+
raise TypeError("experiment_id is required")
|
|
350
|
+
|
|
351
|
+
return self._state_provider.get_tags_map(
|
|
352
|
+
experiment_id=experiment_id,
|
|
353
|
+
run_id=params.get("run_id"),
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
def _handle_get_dependencies_map(self, params: Dict) -> dict[str, list[str]]:
|
|
357
|
+
"""Handle get_dependencies_map request
|
|
358
|
+
|
|
359
|
+
Returns dependencies map for jobs in an experiment/run.
|
|
360
|
+
"""
|
|
361
|
+
experiment_id = params.get("experiment_id")
|
|
362
|
+
if not experiment_id:
|
|
363
|
+
raise TypeError("experiment_id is required")
|
|
364
|
+
|
|
365
|
+
return self._state_provider.get_dependencies_map(
|
|
366
|
+
experiment_id=experiment_id,
|
|
367
|
+
run_id=params.get("run_id"),
|
|
368
|
+
)
|
|
371
369
|
|
|
372
370
|
def _handle_kill_job(self, params: Dict) -> Dict:
|
|
373
371
|
"""Handle kill_job request"""
|
|
@@ -421,3 +419,29 @@ class SSHStateProviderServer:
|
|
|
421
419
|
else None
|
|
422
420
|
),
|
|
423
421
|
}
|
|
422
|
+
|
|
423
|
+
def _handle_get_process_info(self, params: Dict) -> Optional[Dict]:
|
|
424
|
+
"""Handle get_process_info request"""
|
|
425
|
+
job_id = params.get("job_id")
|
|
426
|
+
experiment_id = params.get("experiment_id")
|
|
427
|
+
run_id = params.get("run_id")
|
|
428
|
+
|
|
429
|
+
if not job_id or not experiment_id:
|
|
430
|
+
raise TypeError("job_id and experiment_id are required")
|
|
431
|
+
|
|
432
|
+
# Get the job first
|
|
433
|
+
job = self._state_provider.get_job(job_id, experiment_id, run_id)
|
|
434
|
+
if job is None:
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
# Get process info
|
|
438
|
+
pinfo = self._state_provider.get_process_info(job)
|
|
439
|
+
if pinfo is None:
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
# Serialize ProcessInfo to dict
|
|
443
|
+
return {
|
|
444
|
+
"pid": pinfo.pid,
|
|
445
|
+
"type": pinfo.type,
|
|
446
|
+
"running": pinfo.running,
|
|
447
|
+
}
|
|
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|
|
16
16
|
class ServiceListener:
|
|
17
17
|
"""A service listener"""
|
|
18
18
|
|
|
19
|
-
def service_state_changed(service):
|
|
19
|
+
def service_state_changed(self, service):
|
|
20
20
|
pass
|
|
21
21
|
|
|
22
22
|
|
|
@@ -78,15 +78,21 @@ class Service(BaseService):
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
Returns:
|
|
81
|
-
Dict with constructor kwargs
|
|
81
|
+
Dict with constructor kwargs.
|
|
82
82
|
"""
|
|
83
83
|
return {}
|
|
84
84
|
|
|
85
|
-
def
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
85
|
+
def full_state_dict(self) -> dict:
|
|
86
|
+
"""Serialize service to dictionary for JSON serialization.
|
|
87
|
+
|
|
88
|
+
Overrides BaseService.full_state_dict() to properly serialize Path objects.
|
|
89
|
+
"""
|
|
90
|
+
return {
|
|
91
|
+
"service_id": self.id,
|
|
92
|
+
"description": self.description(),
|
|
93
|
+
"class": f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
94
|
+
"state_dict": self.serialize_state_dict(self.state_dict()),
|
|
95
|
+
}
|
|
90
96
|
|
|
91
97
|
@staticmethod
|
|
92
98
|
def serialize_state_dict(data: dict) -> dict:
|
|
@@ -101,21 +107,29 @@ class Service(BaseService):
|
|
|
101
107
|
Returns:
|
|
102
108
|
Serializable dictionary with paths converted
|
|
103
109
|
"""
|
|
104
|
-
|
|
105
|
-
|
|
110
|
+
|
|
111
|
+
def serialize_value(v):
|
|
106
112
|
if isinstance(v, Path):
|
|
107
|
-
|
|
113
|
+
return {"__path__": str(v)}
|
|
114
|
+
elif isinstance(v, dict):
|
|
115
|
+
return {k: serialize_value(val) for k, val in v.items()}
|
|
116
|
+
elif isinstance(v, (list, tuple)):
|
|
117
|
+
return [serialize_value(item) for item in v]
|
|
108
118
|
else:
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
return v
|
|
120
|
+
|
|
121
|
+
return {k: serialize_value(v) for k, v in data.items()}
|
|
111
122
|
|
|
112
123
|
@staticmethod
|
|
113
124
|
def from_state_dict(
|
|
114
|
-
|
|
125
|
+
service_class: str,
|
|
126
|
+
data: dict,
|
|
127
|
+
path_translator: Optional[Callable[[str], Path]] = None,
|
|
115
128
|
) -> "Service":
|
|
116
129
|
"""Recreate a service from a state dictionary.
|
|
117
130
|
|
|
118
131
|
Args:
|
|
132
|
+
service_class: Fully qualified class name (e.g., "module.ClassName")
|
|
119
133
|
data: Dictionary from :meth:`state_dict` (may be serialized)
|
|
120
134
|
path_translator: Optional function to translate remote paths to local.
|
|
121
135
|
Used by remote clients to map paths to local cache.
|
|
@@ -124,7 +138,7 @@ class Service(BaseService):
|
|
|
124
138
|
A new Service instance, or raises if the class cannot be loaded.
|
|
125
139
|
|
|
126
140
|
Raises:
|
|
127
|
-
ValueError: If __unserializable__ is True or
|
|
141
|
+
ValueError: If __unserializable__ is True or class cannot be loaded
|
|
128
142
|
"""
|
|
129
143
|
import importlib
|
|
130
144
|
|
|
@@ -134,28 +148,39 @@ class Service(BaseService):
|
|
|
134
148
|
f"Service cannot be recreated: {data.get('__reason__', 'unknown reason')}"
|
|
135
149
|
)
|
|
136
150
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
raise ValueError("Missing '__class__' in service state_dict")
|
|
151
|
+
if not service_class:
|
|
152
|
+
raise ValueError("Missing service_class")
|
|
140
153
|
|
|
141
|
-
module_name, class_name =
|
|
154
|
+
module_name, class_name = service_class.rsplit(".", 1)
|
|
142
155
|
module = importlib.import_module(module_name)
|
|
143
156
|
cls = getattr(module, class_name)
|
|
144
157
|
|
|
145
|
-
# Build kwargs, detecting and translating paths automatically
|
|
158
|
+
# Build kwargs, detecting and translating paths automatically (handles nested)
|
|
159
|
+
def deserialize_value(v):
|
|
160
|
+
if isinstance(v, dict):
|
|
161
|
+
if "__path__" in v:
|
|
162
|
+
# Serialized path - deserialize with optional translation
|
|
163
|
+
path_str = v["__path__"]
|
|
164
|
+
if path_translator:
|
|
165
|
+
return path_translator(path_str)
|
|
166
|
+
else:
|
|
167
|
+
return Path(path_str)
|
|
168
|
+
else:
|
|
169
|
+
return {
|
|
170
|
+
k: deserialize_value(val)
|
|
171
|
+
for k, val in v.items()
|
|
172
|
+
if not k.startswith("__")
|
|
173
|
+
}
|
|
174
|
+
elif isinstance(v, list):
|
|
175
|
+
return [deserialize_value(item) for item in v]
|
|
176
|
+
else:
|
|
177
|
+
return v
|
|
178
|
+
|
|
146
179
|
kwargs = {}
|
|
147
180
|
for k, v in data.items():
|
|
148
181
|
if k.startswith("__"):
|
|
149
182
|
continue # Skip special keys
|
|
150
|
-
|
|
151
|
-
# Serialized path - deserialize with optional translation
|
|
152
|
-
path_str = v["__path__"]
|
|
153
|
-
if path_translator:
|
|
154
|
-
kwargs[k] = path_translator(path_str)
|
|
155
|
-
else:
|
|
156
|
-
kwargs[k] = Path(path_str)
|
|
157
|
-
else:
|
|
158
|
-
kwargs[k] = v
|
|
183
|
+
kwargs[k] = deserialize_value(v)
|
|
159
184
|
|
|
160
185
|
logger.debug("Creating %s with kwargs: %s", cls.__name__, kwargs)
|
|
161
186
|
return cls(**kwargs)
|