experimaestro 2.0.0b4__py3-none-any.whl → 2.0.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/cli/__init__.py +177 -31
- experimaestro/experiments/cli.py +6 -2
- experimaestro/scheduler/base.py +21 -0
- experimaestro/scheduler/experiment.py +64 -34
- experimaestro/scheduler/interfaces.py +27 -0
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/client.py +874 -0
- experimaestro/scheduler/remote/protocol.py +467 -0
- experimaestro/scheduler/remote/server.py +423 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +158 -32
- experimaestro/scheduler/state_db.py +58 -9
- experimaestro/scheduler/state_provider.py +512 -91
- experimaestro/scheduler/state_sync.py +65 -8
- experimaestro/tests/test_cli_jobs.py +3 -3
- experimaestro/tests/test_remote_state.py +671 -0
- experimaestro/tests/test_state_db.py +8 -8
- experimaestro/tui/app.py +100 -8
- experimaestro/version.py +2 -2
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +4 -4
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/RECORD +24 -18
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/entry_points.txt +0 -0
- {experimaestro-2.0.0b4.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
|
@@ -97,6 +97,35 @@ def read_services_json(exp_dir: Path) -> Dict[str, Dict]:
|
|
|
97
97
|
return {}
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
def read_informations_json(exp_dir: Path) -> Dict:
|
|
101
|
+
"""Read informations.json file containing experiment metadata
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
exp_dir: Path to the experiment directory
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Dictionary with experiment informations including:
|
|
108
|
+
- runs: Dict[run_id, {hostname, started_at}]
|
|
109
|
+
"""
|
|
110
|
+
info_path = exp_dir / "informations.json"
|
|
111
|
+
|
|
112
|
+
if not info_path.exists():
|
|
113
|
+
logger.debug("No informations.json found in %s", exp_dir)
|
|
114
|
+
return {}
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
with info_path.open("r") as f:
|
|
118
|
+
info_data = json.load(f)
|
|
119
|
+
logger.debug("Read informations.json from %s", exp_dir)
|
|
120
|
+
return info_data
|
|
121
|
+
except json.JSONDecodeError as e:
|
|
122
|
+
logger.warning("Failed to parse informations.json: %s", e)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.warning("Failed to read informations.json from %s: %s", info_path, e)
|
|
125
|
+
|
|
126
|
+
return {}
|
|
127
|
+
|
|
128
|
+
|
|
100
129
|
def acquire_sync_lock(
|
|
101
130
|
workspace_path: Path, blocking: bool = True
|
|
102
131
|
) -> Optional[fasteners.InterProcessLock]:
|
|
@@ -551,6 +580,10 @@ def sync_workspace_from_disk( # noqa: C901
|
|
|
551
580
|
# Read services.json to get services for this experiment
|
|
552
581
|
service_records = read_services_json(exp_dir)
|
|
553
582
|
|
|
583
|
+
# Read informations.json for run metadata (hostname, etc.)
|
|
584
|
+
info_data = read_informations_json(exp_dir)
|
|
585
|
+
runs_info = info_data.get("runs", {})
|
|
586
|
+
|
|
554
587
|
if write_mode:
|
|
555
588
|
# Ensure experiment exists in database
|
|
556
589
|
now = datetime.now()
|
|
@@ -575,16 +608,32 @@ def sync_workspace_from_disk( # noqa: C901
|
|
|
575
608
|
# Use the most recent run as current
|
|
576
609
|
current_run_id = existing_runs[0].run_id
|
|
577
610
|
runs_found += len(existing_runs)
|
|
611
|
+
|
|
612
|
+
# Update hostname from informations.json if available
|
|
613
|
+
if write_mode:
|
|
614
|
+
for run in existing_runs:
|
|
615
|
+
run_info = runs_info.get(run.run_id, {})
|
|
616
|
+
hostname = run_info.get("hostname")
|
|
617
|
+
if hostname and not run.hostname:
|
|
618
|
+
ExperimentRunModel.update(hostname=hostname).where(
|
|
619
|
+
(ExperimentRunModel.experiment_id == experiment_id)
|
|
620
|
+
& (ExperimentRunModel.run_id == run.run_id)
|
|
621
|
+
).execute()
|
|
578
622
|
else:
|
|
579
623
|
# Create initial run
|
|
580
624
|
current_run_id = "initial"
|
|
581
625
|
runs_found += 1
|
|
582
626
|
|
|
627
|
+
# Get hostname from informations.json if available
|
|
628
|
+
run_info = runs_info.get(current_run_id, {})
|
|
629
|
+
hostname = run_info.get("hostname")
|
|
630
|
+
|
|
583
631
|
if write_mode:
|
|
584
632
|
ExperimentRunModel.insert(
|
|
585
633
|
experiment_id=experiment_id,
|
|
586
634
|
run_id=current_run_id,
|
|
587
635
|
status="active",
|
|
636
|
+
hostname=hostname,
|
|
588
637
|
).on_conflict_ignore().execute()
|
|
589
638
|
|
|
590
639
|
# Update experiment's current_run_id
|
|
@@ -603,17 +652,29 @@ def sync_workspace_from_disk( # noqa: C901
|
|
|
603
652
|
if write_mode and service_records:
|
|
604
653
|
for service_id, service_data in service_records.items():
|
|
605
654
|
now = datetime.now()
|
|
606
|
-
#
|
|
607
|
-
|
|
655
|
+
# Extract only the state_dict keys (not metadata like
|
|
656
|
+
# service_id, description, state, url, timestamp)
|
|
657
|
+
# The state_dict should have __class__ and service-specific
|
|
658
|
+
# fields like 'path' for TensorboardService
|
|
659
|
+
metadata_keys = {
|
|
660
|
+
"service_id",
|
|
661
|
+
"description",
|
|
662
|
+
"url",
|
|
663
|
+
"timestamp",
|
|
664
|
+
}
|
|
665
|
+
state_dict = {
|
|
666
|
+
k: v
|
|
667
|
+
for k, v in service_data.items()
|
|
668
|
+
if k not in metadata_keys
|
|
669
|
+
}
|
|
670
|
+
state_dict_json = json.dumps(state_dict)
|
|
608
671
|
ServiceModel.insert(
|
|
609
672
|
service_id=service_id,
|
|
610
673
|
experiment_id=experiment_id,
|
|
611
674
|
run_id=current_run_id,
|
|
612
675
|
description=service_data.get("description", ""),
|
|
613
|
-
state=service_data.get("state", "STOPPED"),
|
|
614
676
|
state_dict=state_dict_json,
|
|
615
677
|
created_at=now,
|
|
616
|
-
updated_at=now,
|
|
617
678
|
).on_conflict(
|
|
618
679
|
conflict_target=[
|
|
619
680
|
ServiceModel.service_id,
|
|
@@ -624,11 +685,7 @@ def sync_workspace_from_disk( # noqa: C901
|
|
|
624
685
|
ServiceModel.description: service_data.get(
|
|
625
686
|
"description", ""
|
|
626
687
|
),
|
|
627
|
-
ServiceModel.state: service_data.get(
|
|
628
|
-
"state", "STOPPED"
|
|
629
|
-
),
|
|
630
688
|
ServiceModel.state_dict: state_dict_json,
|
|
631
|
-
ServiceModel.updated_at: now,
|
|
632
689
|
},
|
|
633
690
|
).execute()
|
|
634
691
|
logger.debug(
|
|
@@ -33,7 +33,7 @@ def workspace_path(tmp_path):
|
|
|
33
33
|
xpm_dir = ws_path / ".experimaestro"
|
|
34
34
|
xpm_dir.mkdir()
|
|
35
35
|
db_path = xpm_dir / "workspace.db"
|
|
36
|
-
db = initialize_workspace_database(db_path, read_only=False)
|
|
36
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
37
37
|
|
|
38
38
|
# Create jobs directory
|
|
39
39
|
jobs_dir = ws_path / "jobs"
|
|
@@ -48,7 +48,7 @@ def workspace_path(tmp_path):
|
|
|
48
48
|
def workspace_with_jobs(workspace_path):
|
|
49
49
|
"""Create a workspace with some test jobs in the database"""
|
|
50
50
|
db_path = workspace_path / ".experimaestro" / "workspace.db"
|
|
51
|
-
db = initialize_workspace_database(db_path, read_only=False)
|
|
51
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
52
52
|
|
|
53
53
|
with db.bind_ctx(ALL_MODELS):
|
|
54
54
|
# Create experiment and run
|
|
@@ -152,7 +152,7 @@ def test_jobs_list_fullpath(workspace_with_jobs):
|
|
|
152
152
|
def workspace_with_timed_jobs(workspace_path):
|
|
153
153
|
"""Create a workspace with jobs that have different submission times"""
|
|
154
154
|
db_path = workspace_path / ".experimaestro" / "workspace.db"
|
|
155
|
-
db = initialize_workspace_database(db_path, read_only=False)
|
|
155
|
+
db, _ = initialize_workspace_database(db_path, read_only=False)
|
|
156
156
|
|
|
157
157
|
with db.bind_ctx(ALL_MODELS):
|
|
158
158
|
ExperimentModel.create(experiment_id="test_exp", current_run_id="run_001")
|