experimaestro 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (133) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +140 -16
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/progress.py +269 -0
  7. experimaestro/cli/refactor.py +249 -0
  8. experimaestro/click.py +0 -1
  9. experimaestro/commandline.py +19 -3
  10. experimaestro/connectors/__init__.py +22 -3
  11. experimaestro/connectors/local.py +12 -0
  12. experimaestro/core/arguments.py +192 -37
  13. experimaestro/core/identifier.py +127 -12
  14. experimaestro/core/objects/__init__.py +6 -0
  15. experimaestro/core/objects/config.py +702 -285
  16. experimaestro/core/objects/config_walk.py +24 -6
  17. experimaestro/core/serialization.py +91 -34
  18. experimaestro/core/serializers.py +1 -8
  19. experimaestro/core/subparameters.py +164 -0
  20. experimaestro/core/types.py +198 -83
  21. experimaestro/exceptions.py +26 -0
  22. experimaestro/experiments/cli.py +107 -25
  23. experimaestro/generators.py +50 -9
  24. experimaestro/huggingface.py +3 -1
  25. experimaestro/launcherfinder/parser.py +29 -0
  26. experimaestro/launcherfinder/registry.py +3 -3
  27. experimaestro/launchers/__init__.py +26 -1
  28. experimaestro/launchers/direct.py +12 -0
  29. experimaestro/launchers/slurm/base.py +154 -2
  30. experimaestro/mkdocs/base.py +6 -8
  31. experimaestro/mkdocs/metaloader.py +0 -1
  32. experimaestro/mypy.py +452 -7
  33. experimaestro/notifications.py +75 -16
  34. experimaestro/progress.py +404 -0
  35. experimaestro/rpyc.py +0 -1
  36. experimaestro/run.py +19 -6
  37. experimaestro/scheduler/__init__.py +18 -1
  38. experimaestro/scheduler/base.py +504 -959
  39. experimaestro/scheduler/dependencies.py +43 -28
  40. experimaestro/scheduler/dynamic_outputs.py +259 -130
  41. experimaestro/scheduler/experiment.py +582 -0
  42. experimaestro/scheduler/interfaces.py +474 -0
  43. experimaestro/scheduler/jobs.py +485 -0
  44. experimaestro/scheduler/services.py +186 -12
  45. experimaestro/scheduler/signal_handler.py +32 -0
  46. experimaestro/scheduler/state.py +1 -1
  47. experimaestro/scheduler/state_db.py +388 -0
  48. experimaestro/scheduler/state_provider.py +2345 -0
  49. experimaestro/scheduler/state_sync.py +834 -0
  50. experimaestro/scheduler/workspace.py +52 -10
  51. experimaestro/scriptbuilder.py +7 -0
  52. experimaestro/server/__init__.py +153 -32
  53. experimaestro/server/data/index.css +0 -125
  54. experimaestro/server/data/index.css.map +1 -1
  55. experimaestro/server/data/index.js +194 -58
  56. experimaestro/server/data/index.js.map +1 -1
  57. experimaestro/settings.py +47 -6
  58. experimaestro/sphinx/__init__.py +3 -3
  59. experimaestro/taskglobals.py +20 -0
  60. experimaestro/tests/conftest.py +80 -0
  61. experimaestro/tests/core/test_generics.py +2 -2
  62. experimaestro/tests/identifier_stability.json +45 -0
  63. experimaestro/tests/launchers/bin/sacct +6 -2
  64. experimaestro/tests/launchers/bin/sbatch +4 -2
  65. experimaestro/tests/launchers/common.py +2 -2
  66. experimaestro/tests/launchers/test_slurm.py +80 -0
  67. experimaestro/tests/restart.py +1 -1
  68. experimaestro/tests/tasks/all.py +7 -0
  69. experimaestro/tests/tasks/test_dynamic.py +231 -0
  70. experimaestro/tests/test_checkers.py +2 -2
  71. experimaestro/tests/test_cli_jobs.py +615 -0
  72. experimaestro/tests/test_dependencies.py +11 -17
  73. experimaestro/tests/test_deprecated.py +630 -0
  74. experimaestro/tests/test_environment.py +200 -0
  75. experimaestro/tests/test_experiment.py +3 -3
  76. experimaestro/tests/test_file_progress.py +425 -0
  77. experimaestro/tests/test_file_progress_integration.py +477 -0
  78. experimaestro/tests/test_forward.py +3 -3
  79. experimaestro/tests/test_generators.py +93 -0
  80. experimaestro/tests/test_identifier.py +520 -169
  81. experimaestro/tests/test_identifier_stability.py +458 -0
  82. experimaestro/tests/test_instance.py +16 -21
  83. experimaestro/tests/test_multitoken.py +442 -0
  84. experimaestro/tests/test_mypy.py +433 -0
  85. experimaestro/tests/test_objects.py +314 -30
  86. experimaestro/tests/test_outputs.py +8 -8
  87. experimaestro/tests/test_param.py +22 -26
  88. experimaestro/tests/test_partial_paths.py +231 -0
  89. experimaestro/tests/test_progress.py +2 -50
  90. experimaestro/tests/test_resumable_task.py +480 -0
  91. experimaestro/tests/test_serializers.py +141 -60
  92. experimaestro/tests/test_state_db.py +434 -0
  93. experimaestro/tests/test_subparameters.py +160 -0
  94. experimaestro/tests/test_tags.py +151 -15
  95. experimaestro/tests/test_tasks.py +137 -160
  96. experimaestro/tests/test_token_locking.py +252 -0
  97. experimaestro/tests/test_tokens.py +25 -19
  98. experimaestro/tests/test_types.py +133 -11
  99. experimaestro/tests/test_validation.py +19 -19
  100. experimaestro/tests/test_workspace_triggers.py +158 -0
  101. experimaestro/tests/token_reschedule.py +5 -3
  102. experimaestro/tests/utils.py +2 -2
  103. experimaestro/tokens.py +154 -57
  104. experimaestro/tools/diff.py +8 -1
  105. experimaestro/tui/__init__.py +8 -0
  106. experimaestro/tui/app.py +2303 -0
  107. experimaestro/tui/app.tcss +353 -0
  108. experimaestro/tui/log_viewer.py +228 -0
  109. experimaestro/typingutils.py +11 -2
  110. experimaestro/utils/__init__.py +23 -0
  111. experimaestro/utils/environment.py +148 -0
  112. experimaestro/utils/git.py +129 -0
  113. experimaestro/utils/resources.py +1 -1
  114. experimaestro/version.py +34 -0
  115. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +70 -39
  116. experimaestro-2.0.0b4.dist-info/RECORD +181 -0
  117. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
  118. experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
  119. experimaestro/compat.py +0 -6
  120. experimaestro/core/objects.pyi +0 -225
  121. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  122. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  123. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  124. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  125. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  126. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  127. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  128. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  129. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  130. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  131. experimaestro-1.11.1.dist-info/RECORD +0 -158
  132. experimaestro-1.11.1.dist-info/entry_points.txt +0 -17
  133. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info/licenses}/LICENSE +0 -0
experimaestro/cli/jobs.py CHANGED
@@ -1,8 +1,6 @@
1
1
  # flake8: noqa: T201
2
- import asyncio
3
2
  import subprocess
4
3
  from typing import Optional
5
- from shutil import rmtree
6
4
  import click
7
5
  from pathlib import Path
8
6
  from termcolor import colored, cprint
@@ -34,6 +32,9 @@ def jobs(
34
32
  selects jobs where the tag model is "bm25", the tag mode is either
35
33
  "a" or "b", and the state is running.
36
34
 
35
+ Note: Jobs are read from the workspace database. If jobs are missing,
36
+ run 'experimaestro experiments sync' to synchronize the database
37
+ with the filesystem.
37
38
  """
38
39
  ws = ctx.obj.workspace = find_workspace(workdir=workdir, workspace=workspace)
39
40
  check_xp_path(ctx, None, ws.path)
@@ -44,117 +45,117 @@ def process(
44
45
  *,
45
46
  experiment="",
46
47
  tags="",
47
- ready=False,
48
48
  clean=False,
49
49
  kill=False,
50
50
  filter="",
51
51
  perform=False,
52
52
  fullpath=False,
53
- check=False,
53
+ count=0,
54
54
  ):
55
- from .filter import createFilter, JobInformation
55
+ """Process jobs from the workspace database
56
+
57
+ Args:
58
+ workspace: Workspace settings
59
+ experiment: Filter by experiment ID
60
+ tags: Show tags in output
61
+ clean: Clean finished jobs
62
+ kill: Kill running jobs
63
+ filter: Filter expression
64
+ perform: Actually perform kill/clean (dry run if False)
65
+ fullpath: Show full paths instead of short names
66
+ count: Limit output to N most recent jobs (0 = no limit)
67
+ """
68
+ from .filter import createFilter
69
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
56
70
  from experimaestro.scheduler import JobState
57
71
 
58
- _filter = createFilter(filter) if filter else lambda x: True
59
-
60
- # Get all jobs from experiments
61
- job2xp = {}
62
-
63
- path = workspace.path
64
- for p in (path / "xp").glob("*"):
65
- for job in p.glob("jobs/*/*"):
66
- job_path = job.resolve()
67
- if job_path.is_dir():
68
- job2xp.setdefault(job_path.name, set()).add(p.name)
69
-
70
- if (p / "jobs.bak").is_dir():
71
- cprint(f" Experiment {p.name} has not finished yet", "red")
72
- if (not perform) and (kill or clean):
73
- cprint(
74
- " Preventing kill/clean (use --perform if you want to)", "yellow"
75
- )
76
- kill = False
77
- clean = False
78
-
79
- # Now, process jobs
80
- for job in path.glob("jobs/*/*"):
81
- info = None
82
- p = job.resolve()
83
- if p.is_dir():
84
- *_, scriptname = p.parent.name.rsplit(".", 1)
85
- xps = job2xp.get(job.name, set())
86
- if experiment and experiment not in xps:
87
- continue
88
-
89
- info = JobInformation(p, scriptname, check=check)
90
- job_str = (
91
- (str(job.resolve()) if fullpath else f"{job.parent.name}/{job.name}")
92
- + " "
93
- + ",".join(xps)
94
- )
95
-
96
- if filter:
97
- if not _filter(info):
98
- continue
99
-
100
- if info.state is None:
101
- print(colored(f"NODIR {job_str}", "red"), end="")
102
- elif info.state.running():
72
+ _filter = createFilter(filter) if filter else None
73
+
74
+ # Get state provider (write mode for kill/clean operations)
75
+ read_only = not (kill or clean)
76
+ provider = WorkspaceStateProvider.get_instance(workspace.path, read_only=read_only)
77
+
78
+ try:
79
+ # Get all jobs from the database
80
+ all_jobs = provider.get_all_jobs()
81
+
82
+ # Filter by experiment if specified
83
+ if experiment:
84
+ all_jobs = [j for j in all_jobs if j.experiment_id == experiment]
85
+
86
+ # Apply filter expression
87
+ if _filter:
88
+ all_jobs = [j for j in all_jobs if _filter(j)]
89
+
90
+ # Sort by submission time (most recent first)
91
+ # Jobs without submittime go to the end
92
+ all_jobs.sort(key=lambda j: j.submittime or 0, reverse=True)
93
+
94
+ # Limit to N most recent jobs if count is specified
95
+ if count > 0:
96
+ all_jobs = all_jobs[:count]
97
+
98
+ if not all_jobs:
99
+ cprint("No jobs found.", "yellow")
100
+ return
101
+
102
+ # Process each job
103
+ for job in all_jobs:
104
+ job_str = str(job.path) if fullpath else f"{job.task_id}/{job.identifier}"
105
+
106
+ # Add experiment info
107
+ if job.experiment_id:
108
+ job_str += f" [{job.experiment_id}]"
109
+
110
+ if job.state is None or job.state == JobState.UNSCHEDULED:
111
+ print(colored(f"UNSCHED {job_str}", "red"), end="")
112
+ elif job.state.running():
103
113
  if kill:
104
114
  if perform:
105
- process = info.getprocess()
106
- if process is None:
107
- cprint(
108
- "internal error – no process could be retrieved",
109
- "red",
110
- )
115
+ if provider.kill_job(job, perform=True):
116
+ cprint(f"KILLED {job_str}", "light_red")
111
117
  else:
112
- cprint(f"KILLING {process}", "light_red")
113
- process.kill()
118
+ cprint(f"KILL FAILED {job_str}", "red")
114
119
  else:
115
- print("KILLING (not performing)", process)
116
- print(
117
- colored(f"{info.state.name:8}{job_str}", "yellow"),
118
- end="",
119
- )
120
- elif info.state == JobState.DONE:
121
- print(
122
- colored(f"DONE {job_str}", "green"),
123
- end="",
124
- )
125
- elif info.state == JobState.ERROR:
120
+ cprint(f"KILLING {job_str} (dry run)", "yellow")
121
+ else:
122
+ print(colored(f"{job.state.name:8}{job_str}", "yellow"), end="")
123
+ elif job.state == JobState.DONE:
124
+ print(colored(f"DONE {job_str}", "green"), end="")
125
+ elif job.state == JobState.ERROR:
126
126
  print(colored(f"FAIL {job_str}", "red"), end="")
127
127
  else:
128
- print(
129
- colored(f"{info.state.name:8}{job_str}", "red"),
130
- end="",
131
- )
128
+ print(colored(f"{job.state.name:8}{job_str}", "red"), end="")
132
129
 
133
- else:
134
- if not ready:
135
- continue
136
- print(colored(f"READY {job_path}", "yellow"), end="")
130
+ # Show tags if requested
131
+ if tags and job.tags:
132
+ print(f""" {" ".join(f"{k}={v}" for k, v in job.tags.items())}""")
133
+ elif not (kill and perform):
134
+ print()
137
135
 
138
- if tags:
139
- print(f""" {" ".join(f"{k}={v}" for k, v in info.tags.items())}""")
140
- else:
141
- print()
136
+ # Clean finished jobs
137
+ if clean and job.state and job.state.finished():
138
+ if perform:
139
+ if provider.clean_job(job, perform=True):
140
+ cprint(" Cleaned", "red")
141
+ else:
142
+ cprint(" Clean failed", "red")
143
+ else:
144
+ cprint(" Would clean (dry run)", "yellow")
142
145
 
143
- if clean and info.state and info.state.finished():
144
- if perform:
145
- cprint("Cleaning...", "red")
146
- rmtree(p)
147
- else:
148
- cprint("Cleaning... (not performed)", "red")
149
- print()
146
+ print()
147
+
148
+ finally:
149
+ # Close provider if we created it for write mode
150
+ if not read_only:
151
+ provider.close()
150
152
 
151
153
 
152
154
  @click.option("--experiment", default=None, help="Restrict to this experiment")
153
155
  @click.option("--tags", is_flag=True, help="Show tags")
154
- @click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
155
156
  @click.option("--filter", default="", help="Filter expression")
156
157
  @click.option("--fullpath", is_flag=True, help="Prints full paths")
157
- @click.option("--no-check", is_flag=True, help="Check that running jobs")
158
+ @click.option("--count", "-c", default=0, type=int, help="Limit to N most recent jobs")
158
159
  @jobs.command()
159
160
  @click.pass_context
160
161
  def list(
@@ -162,24 +163,22 @@ def list(
162
163
  experiment: str,
163
164
  filter: str,
164
165
  tags: bool,
165
- ready: bool,
166
166
  fullpath: bool,
167
- no_check: bool,
167
+ count: int,
168
168
  ):
169
+ """List all jobs in the workspace (sorted by submission date, most recent first)"""
169
170
  process(
170
171
  ctx.obj.workspace,
171
172
  experiment=experiment,
172
173
  filter=filter,
173
174
  tags=tags,
174
- ready=ready,
175
175
  fullpath=fullpath,
176
- check=not no_check,
176
+ count=count,
177
177
  )
178
178
 
179
179
 
180
180
  @click.option("--experiment", default=None, help="Restrict to this experiment")
181
181
  @click.option("--tags", is_flag=True, help="Show tags")
182
- @click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
183
182
  @click.option("--filter", default="", help="Filter expression")
184
183
  @click.option("--perform", is_flag=True, help="Really perform the killing")
185
184
  @click.option("--fullpath", is_flag=True, help="Prints full paths")
@@ -190,17 +189,15 @@ def kill(
190
189
  experiment: str,
191
190
  filter: str,
192
191
  tags: bool,
193
- ready: bool,
194
192
  fullpath: bool,
195
193
  perform: bool,
196
- check: bool,
197
194
  ):
195
+ """Kill running jobs"""
198
196
  process(
199
197
  ctx.obj.workspace,
200
198
  experiment=experiment,
201
199
  filter=filter,
202
200
  tags=tags,
203
- ready=ready,
204
201
  kill=True,
205
202
  perform=perform,
206
203
  fullpath=fullpath,
@@ -209,7 +206,6 @@ def kill(
209
206
 
210
207
  @click.option("--experiment", default=None, help="Restrict to this experiment")
211
208
  @click.option("--tags", is_flag=True, help="Show tags")
212
- @click.option("--ready", is_flag=True, help="Include tasks which are not yet scheduled")
213
209
  @click.option("--filter", default="", help="Filter expression")
214
210
  @click.option("--perform", is_flag=True, help="Really perform the cleaning")
215
211
  @click.option("--fullpath", is_flag=True, help="Prints full paths")
@@ -220,16 +216,15 @@ def clean(
220
216
  experiment: str,
221
217
  filter: str,
222
218
  tags: bool,
223
- ready: bool,
224
219
  fullpath: bool,
225
220
  perform: bool,
226
221
  ):
222
+ """Clean finished jobs (delete directories and DB entries)"""
227
223
  process(
228
224
  ctx.obj.workspace,
229
225
  experiment=experiment,
230
226
  filter=filter,
231
227
  tags=tags,
232
- ready=ready,
233
228
  clean=True,
234
229
  perform=perform,
235
230
  fullpath=fullpath,
@@ -244,25 +239,81 @@ def clean(
244
239
  @jobs.command()
245
240
  @click.pass_context
246
241
  def log(ctx, jobid: str, follow: bool, std: bool):
242
+ """View job log (stderr by default, stdout with --std)
243
+
244
+ JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
245
+ """
247
246
  task_name, task_hash = jobid.split("/")
248
247
  _, name = task_name.rsplit(".", 1)
249
- path = (
248
+ log_path = (
250
249
  ctx.obj.workspace.path
251
250
  / "jobs"
252
251
  / task_name
253
252
  / task_hash
254
253
  / f"""{name}.{'out' if std else 'err'}"""
255
254
  )
255
+ if not log_path.exists():
256
+ cprint(f"Log file not found: {log_path}", "red")
257
+ return
256
258
  if follow:
257
- subprocess.run(["tail", "-f", path])
259
+ subprocess.run(["tail", "-f", log_path])
258
260
  else:
259
- subprocess.run(["less", "-r", path])
261
+ subprocess.run(["less", "-r", log_path])
260
262
 
261
263
 
262
264
  @click.argument("jobid", type=str)
263
265
  @jobs.command()
264
266
  @click.pass_context
265
267
  def path(ctx, jobid: str):
268
+ """Print the path to a job directory
269
+
270
+ JOBID format: task.name/hash (e.g., mymodule.MyTask/abc123)
271
+ """
266
272
  task_name, task_hash = jobid.split("/")
267
- path = ctx.obj.workspace.path / "jobs" / task_name / task_hash
268
- print(path)
273
+ job_path = ctx.obj.workspace.path / "jobs" / task_name / task_hash
274
+ if not job_path.exists():
275
+ cprint(f"Job directory not found: {job_path}", "red")
276
+ return
277
+ print(job_path)
278
+
279
+
280
+ @click.option("--perform", is_flag=True, help="Actually delete orphan partials")
281
+ @jobs.command("cleanup-partials")
282
+ @click.pass_context
283
+ def cleanup_partials(ctx, perform: bool):
284
+ """Clean up orphan partial directories
285
+
286
+ Partial directories are shared checkpoint locations created by
287
+ subparameters. When all jobs using a partial are deleted, the
288
+ partial becomes orphaned and can be cleaned up.
289
+
290
+ This command finds all orphan partials and deletes them (or shows
291
+ what would be deleted in dry-run mode).
292
+ """
293
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
294
+
295
+ provider = WorkspaceStateProvider.get_instance(
296
+ ctx.obj.workspace.path, read_only=not perform
297
+ )
298
+
299
+ try:
300
+ orphan_paths = provider.cleanup_orphan_partials(perform=perform)
301
+
302
+ if not orphan_paths:
303
+ cprint("No orphan partials found.", "green")
304
+ return
305
+
306
+ if perform:
307
+ cprint(f"Cleaned {len(orphan_paths)} orphan partial(s):", "green")
308
+ else:
309
+ cprint(f"Found {len(orphan_paths)} orphan partial(s) (dry run):", "yellow")
310
+
311
+ for path in orphan_paths:
312
+ if perform:
313
+ print(colored(f" Deleted: {path}", "red"))
314
+ else:
315
+ print(colored(f" Would delete: {path}", "yellow"))
316
+
317
+ finally:
318
+ if perform:
319
+ provider.close()
@@ -0,0 +1,269 @@
1
+ """Simplified CLI commands for managing and viewing progress files"""
2
+
3
+ import time
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional, Dict
7
+
8
+ import click
9
+ from termcolor import colored
10
+
11
+ try:
12
+ from tqdm import tqdm
13
+
14
+ TQDM_AVAILABLE = True
15
+ except ImportError:
16
+ TQDM_AVAILABLE = False
17
+
18
+ from experimaestro.progress import ProgressEntry, ProgressFileReader
19
+ from experimaestro.settings import find_workspace
20
+ from . import cli
21
+
22
+
23
+ @click.option("--workspace", default="", help="Experimaestro workspace")
24
+ @click.option("--workdir", type=Path, default=None)
25
+ @cli.group()
26
+ @click.pass_context
27
+ def progress(
28
+ ctx,
29
+ workdir: Optional[Path],
30
+ workspace: Optional[str],
31
+ ):
32
+ """Progress tracking commands"""
33
+ ctx.obj.workspace = find_workspace(workdir=workdir, workspace=workspace)
34
+
35
+
36
+ def format_timestamp(timestamp: float) -> str:
37
+ """Format timestamp for display"""
38
+ dt = datetime.fromtimestamp(timestamp)
39
+ return dt.strftime("%Y-%m-%d %H:%M:%S")
40
+
41
+
42
+ @click.argument("jobid", type=str)
43
+ @progress.command()
44
+ @click.pass_context
45
+ def show(ctx, jobid: str):
46
+ """Show current progress state (default command)
47
+
48
+ JOBID format: task_name/task_hash
49
+ """
50
+ try:
51
+ task_name, task_hash = jobid.split("/")
52
+ except ValueError:
53
+ raise click.ClickException("JOBID must be in format task_name/task_hash")
54
+
55
+ workspace = ctx.obj.workspace
56
+ task_path = workspace.path / "jobs" / task_name / task_hash
57
+
58
+ if not task_path.exists():
59
+ raise click.ClickException(f"Job directory not found: {task_path}")
60
+
61
+ reader = ProgressFileReader(task_path)
62
+ current_progress = reader.get_current_progress()
63
+
64
+ if not current_progress:
65
+ click.echo("No progress information available")
66
+ return
67
+
68
+ # Filter out EOJ markers
69
+ current_progress = {k: v for k, v in current_progress.items() if k != -1}
70
+
71
+ if not current_progress:
72
+ click.echo("No progress information available")
73
+ return
74
+
75
+ click.echo(f"Progress for job {jobid}")
76
+ click.echo("=" * 80)
77
+
78
+ # Show simple text-based progress for each level
79
+ for level in sorted(current_progress.keys()):
80
+ entry = current_progress[level]
81
+ indent = " " * level
82
+ progress_pct = f"{entry.progress * 100:5.1f}%"
83
+ desc = entry.desc or f"Level {level}"
84
+ timestamp = format_timestamp(entry.timestamp)
85
+
86
+ color = "green" if entry.progress >= 1.0 else "yellow"
87
+ click.echo(colored(f"{indent}L{level}: {progress_pct} - {desc}", color))
88
+ click.echo(colored(f"{indent} Last updated: {timestamp}", "cyan"))
89
+
90
+
91
+ def create_progress_bar(
92
+ level: int,
93
+ desc: str,
94
+ progress: float = 0.0,
95
+ ) -> tqdm:
96
+ """Create a properly aligned progress bar like dashboard style"""
97
+ if level > 0:
98
+ indent = " " * (level - 1) + "└─ "
99
+ else:
100
+ indent = ""
101
+ label = f"{indent}L{level}"
102
+
103
+ colors = ["blue", "yellow", "magenta", "cyan", "white"]
104
+ bar_color = colors[level % len(colors)]
105
+
106
+ unit = desc[:50] if desc else f"Level {level}"
107
+ ncols = 100
108
+ wbar = 50
109
+
110
+ return tqdm(
111
+ total=100,
112
+ desc=label,
113
+ position=level,
114
+ leave=True,
115
+ bar_format=f"{{desc}}: {{percentage:3.0f}}%|{{bar:{wbar - len(indent)}}}| {{unit}}", # noqa: F541
116
+ ncols=ncols, # Adjust width based on level
117
+ unit=unit,
118
+ colour=bar_color,
119
+ initial=progress * 100,
120
+ )
121
+
122
+
123
+ def _update_progress_display(
124
+ reader: ProgressFileReader, progress_bars: Dict[int, tqdm]
125
+ ) -> bool:
126
+ """Update the tqdm progress bars in dashboard style"""
127
+ current_state: Dict[int, ProgressEntry] = {
128
+ k: v for k, v in reader.get_current_state().items() if k != -1
129
+ }
130
+
131
+ if not current_state:
132
+ click.echo("No progress information available yet...")
133
+ return False
134
+
135
+ # Update existing bars and create new ones
136
+ for _level, entry in current_state.items():
137
+ progress_val = entry.progress * 100
138
+ desc = entry.desc or f"Level {entry.level}"
139
+
140
+ if entry.level not in progress_bars:
141
+ progress_bars[entry.level] = create_progress_bar(
142
+ entry.level, desc, progress_val
143
+ )
144
+
145
+ bar = progress_bars[entry.level]
146
+ bar.unit = desc[:50]
147
+ bar.n = progress_val
148
+
149
+ bar.refresh()
150
+
151
+ # Remove bars for levels that no longer exist
152
+ levels_to_remove = set(progress_bars.keys()) - set(current_state.keys())
153
+ for level in levels_to_remove:
154
+ progress_bars[level].close()
155
+ del progress_bars[level]
156
+
157
+ return True
158
+
159
+
160
+ @click.argument("jobid", type=str)
161
+ @click.option("--refresh-rate", "-r", default=0.5, help="Refresh rate in seconds")
162
+ @progress.command()
163
+ @click.pass_context
164
+ def live(ctx, jobid: str, refresh_rate: float):
165
+ """Show live progress with tqdm-style bars
166
+
167
+ JOBID format: task_name/task_hash
168
+ """
169
+ if not TQDM_AVAILABLE:
170
+ click.echo("tqdm is not available. Install with: pip install tqdm")
171
+ click.echo("Falling back to basic display...")
172
+ ctx.invoke(show, jobid=jobid)
173
+ return
174
+
175
+ try:
176
+ task_name, task_hash = jobid.split("/")
177
+ except ValueError:
178
+ raise click.ClickException("JOBID must be in format task_name/task_hash")
179
+
180
+ workspace = ctx.obj.workspace
181
+ task_path = workspace.path / "jobs" / task_name / task_hash
182
+
183
+ if not task_path.exists():
184
+ raise click.ClickException(f"Job directory not found: {task_path}")
185
+
186
+ reader = ProgressFileReader(task_path)
187
+ progress_bars: Dict[int, tqdm] = {}
188
+
189
+ def cleanup_bars():
190
+ """Clean up all progress bars"""
191
+ for bar in progress_bars.values():
192
+ bar.close()
193
+ progress_bars.clear()
194
+
195
+ click.echo(f"Live progress for job {jobid}")
196
+ click.echo("Press Ctrl+C to stop")
197
+ click.echo("=" * 80)
198
+
199
+ try:
200
+ if not _update_progress_display(reader, progress_bars):
201
+ click.echo("No progress information available yet...")
202
+
203
+ while True:
204
+ time.sleep(refresh_rate)
205
+
206
+ if not _update_progress_display(reader, progress_bars):
207
+ # Check if job is complete
208
+ if reader.is_done():
209
+ click.echo("\nJob completed!")
210
+ break
211
+
212
+ # Check if all progress bars are at 100%
213
+ if progress_bars and all(bar.n >= 100 for bar in progress_bars.values()):
214
+ cleanup_bars()
215
+ click.echo("\nAll progress completed!")
216
+ break
217
+
218
+ except KeyboardInterrupt:
219
+ click.echo("\nStopped monitoring progress")
220
+ finally:
221
+ cleanup_bars()
222
+
223
+
224
+ @progress.command(name="list")
225
+ @click.pass_context
226
+ def list_jobs(ctx):
227
+ """List all jobs with progress information"""
228
+ ws = ctx.obj.workspace
229
+ jobs_path = ws.path / "jobs"
230
+
231
+ if not jobs_path.exists():
232
+ click.echo("No jobs directory found")
233
+ return
234
+
235
+ for task_dir in jobs_path.iterdir():
236
+ if not task_dir.is_dir():
237
+ continue
238
+
239
+ for job_dir in task_dir.iterdir():
240
+ if not job_dir.is_dir():
241
+ continue
242
+
243
+ progress_dir = job_dir / ".experimaestro"
244
+ if not progress_dir.exists():
245
+ continue
246
+
247
+ # Check if there are progress files
248
+ progress_files = list(progress_dir.glob("progress-*.jsonl"))
249
+ if not progress_files:
250
+ continue
251
+
252
+ job_id = f"{task_dir.name}/{job_dir.name}"
253
+ reader = ProgressFileReader(job_dir)
254
+ current_state = reader.get_current_state()
255
+
256
+ # if current_progress:
257
+ if current_state:
258
+ # Get overall progress (level 0)
259
+ level_0 = current_state.get(0)
260
+ if level_0:
261
+ color = "green" if level_0.progress >= 1.0 else "yellow"
262
+ desc = f"{level_0.desc}" if level_0.desc else ""
263
+ progress_pct = f"{level_0.progress * 100:5.1f}%"
264
+ click.echo(colored(f"{job_id:50} - {progress_pct} - {desc}", color))
265
+
266
+ else:
267
+ click.echo(f"{job_id:50} No level 0 progress")
268
+ else:
269
+ click.echo(f"{job_id:50} No progress data")