experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (122) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +278 -7
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/refactor.py +249 -0
  7. experimaestro/click.py +0 -1
  8. experimaestro/commandline.py +19 -3
  9. experimaestro/connectors/__init__.py +20 -1
  10. experimaestro/connectors/local.py +12 -0
  11. experimaestro/core/arguments.py +182 -46
  12. experimaestro/core/identifier.py +107 -6
  13. experimaestro/core/objects/__init__.py +6 -0
  14. experimaestro/core/objects/config.py +542 -25
  15. experimaestro/core/objects/config_walk.py +20 -0
  16. experimaestro/core/serialization.py +91 -34
  17. experimaestro/core/subparameters.py +164 -0
  18. experimaestro/core/types.py +175 -38
  19. experimaestro/exceptions.py +26 -0
  20. experimaestro/experiments/cli.py +111 -25
  21. experimaestro/generators.py +50 -9
  22. experimaestro/huggingface.py +3 -1
  23. experimaestro/launcherfinder/parser.py +29 -0
  24. experimaestro/launchers/__init__.py +26 -1
  25. experimaestro/launchers/direct.py +12 -0
  26. experimaestro/launchers/slurm/base.py +154 -2
  27. experimaestro/mkdocs/metaloader.py +0 -1
  28. experimaestro/mypy.py +452 -7
  29. experimaestro/notifications.py +63 -13
  30. experimaestro/progress.py +0 -2
  31. experimaestro/rpyc.py +0 -1
  32. experimaestro/run.py +19 -6
  33. experimaestro/scheduler/base.py +510 -125
  34. experimaestro/scheduler/dependencies.py +43 -28
  35. experimaestro/scheduler/dynamic_outputs.py +259 -130
  36. experimaestro/scheduler/experiment.py +256 -31
  37. experimaestro/scheduler/interfaces.py +501 -0
  38. experimaestro/scheduler/jobs.py +216 -206
  39. experimaestro/scheduler/remote/__init__.py +31 -0
  40. experimaestro/scheduler/remote/client.py +874 -0
  41. experimaestro/scheduler/remote/protocol.py +467 -0
  42. experimaestro/scheduler/remote/server.py +423 -0
  43. experimaestro/scheduler/remote/sync.py +144 -0
  44. experimaestro/scheduler/services.py +323 -23
  45. experimaestro/scheduler/state_db.py +437 -0
  46. experimaestro/scheduler/state_provider.py +2766 -0
  47. experimaestro/scheduler/state_sync.py +891 -0
  48. experimaestro/scheduler/workspace.py +52 -10
  49. experimaestro/scriptbuilder.py +7 -0
  50. experimaestro/server/__init__.py +147 -57
  51. experimaestro/server/data/index.css +0 -125
  52. experimaestro/server/data/index.css.map +1 -1
  53. experimaestro/server/data/index.js +194 -58
  54. experimaestro/server/data/index.js.map +1 -1
  55. experimaestro/settings.py +44 -5
  56. experimaestro/sphinx/__init__.py +3 -3
  57. experimaestro/taskglobals.py +20 -0
  58. experimaestro/tests/conftest.py +80 -0
  59. experimaestro/tests/core/test_generics.py +2 -2
  60. experimaestro/tests/identifier_stability.json +45 -0
  61. experimaestro/tests/launchers/bin/sacct +6 -2
  62. experimaestro/tests/launchers/bin/sbatch +4 -2
  63. experimaestro/tests/launchers/test_slurm.py +80 -0
  64. experimaestro/tests/tasks/test_dynamic.py +231 -0
  65. experimaestro/tests/test_cli_jobs.py +615 -0
  66. experimaestro/tests/test_deprecated.py +630 -0
  67. experimaestro/tests/test_environment.py +200 -0
  68. experimaestro/tests/test_file_progress_integration.py +1 -1
  69. experimaestro/tests/test_forward.py +3 -3
  70. experimaestro/tests/test_identifier.py +372 -41
  71. experimaestro/tests/test_identifier_stability.py +458 -0
  72. experimaestro/tests/test_instance.py +3 -3
  73. experimaestro/tests/test_multitoken.py +442 -0
  74. experimaestro/tests/test_mypy.py +433 -0
  75. experimaestro/tests/test_objects.py +312 -5
  76. experimaestro/tests/test_outputs.py +2 -2
  77. experimaestro/tests/test_param.py +8 -12
  78. experimaestro/tests/test_partial_paths.py +231 -0
  79. experimaestro/tests/test_progress.py +0 -48
  80. experimaestro/tests/test_remote_state.py +671 -0
  81. experimaestro/tests/test_resumable_task.py +480 -0
  82. experimaestro/tests/test_serializers.py +141 -1
  83. experimaestro/tests/test_state_db.py +434 -0
  84. experimaestro/tests/test_subparameters.py +160 -0
  85. experimaestro/tests/test_tags.py +136 -0
  86. experimaestro/tests/test_tasks.py +107 -121
  87. experimaestro/tests/test_token_locking.py +252 -0
  88. experimaestro/tests/test_tokens.py +17 -13
  89. experimaestro/tests/test_types.py +123 -1
  90. experimaestro/tests/test_workspace_triggers.py +158 -0
  91. experimaestro/tests/token_reschedule.py +4 -2
  92. experimaestro/tests/utils.py +2 -2
  93. experimaestro/tokens.py +154 -57
  94. experimaestro/tools/diff.py +1 -1
  95. experimaestro/tui/__init__.py +8 -0
  96. experimaestro/tui/app.py +2395 -0
  97. experimaestro/tui/app.tcss +353 -0
  98. experimaestro/tui/log_viewer.py +228 -0
  99. experimaestro/utils/__init__.py +23 -0
  100. experimaestro/utils/environment.py +148 -0
  101. experimaestro/utils/git.py +129 -0
  102. experimaestro/utils/resources.py +1 -1
  103. experimaestro/version.py +34 -0
  104. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
  105. experimaestro-2.0.0b8.dist-info/RECORD +187 -0
  106. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
  107. experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
  108. experimaestro/compat.py +0 -6
  109. experimaestro/core/objects.pyi +0 -221
  110. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  111. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  112. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  113. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  114. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  115. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  116. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  117. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  118. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  119. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  120. experimaestro-2.0.0a8.dist-info/RECORD +0 -166
  121. experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
  122. {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,501 @@
1
+ """Base interfaces for job and experiment data
2
+
3
+ This module defines abstract interfaces that represent job and experiment information.
4
+ These interfaces provide a common API between live jobs/experiments and those
5
+ loaded from the database.
6
+
7
+ - JobState: Base class for job states with singleton instances
8
+ - JobFailureStatus: Enum for failure reasons
9
+ - BaseJob: Interface defining job attributes and metadata operations
10
+ - BaseExperiment: Interface defining experiment attributes
11
+
12
+ The existing Job and experiment classes should provide these same attributes
13
+ to enable unified access in the TUI and other monitoring tools.
14
+ """
15
+
16
+ import enum
17
+ import json
18
+ import logging
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Dict, List, Optional
22
+
23
+ logger = logging.getLogger("xpm.interfaces")
24
+
25
+
26
+ # =============================================================================
27
+ # Job State Classes
28
+ # =============================================================================
29
+
30
+
31
+ class JobState:
32
+ """Base class for job states
33
+
34
+ Job states are represented as instances of JobState subclasses.
35
+ Singleton instances are available as class attributes (e.g., JobState.DONE)
36
+ for backward compatibility.
37
+ """
38
+
39
+ name: str # Readable name
40
+ value: int # Numeric value for ordering comparisons
41
+
42
+ def notstarted(self):
43
+ """Returns True if the job hasn't started yet"""
44
+ return self.value <= 2 # READY
45
+
46
+ def running(self):
47
+ """Returns True if the job is currently running or scheduled"""
48
+ return self.value == 4 or self.value == 3 # RUNNING or SCHEDULED
49
+
50
+ def finished(self):
51
+ """Returns True if the job has finished (success or error)"""
52
+ return self.value >= 5 # DONE or ERROR
53
+
54
+ def is_error(self):
55
+ return False
56
+
57
+ def __eq__(self, other):
58
+ """Compare job states by their numeric value"""
59
+ if isinstance(other, JobState):
60
+ return self.value == other.value
61
+ return False
62
+
63
+ def __hash__(self):
64
+ """Allow JobState instances to be used as dict keys"""
65
+ return hash(self.value)
66
+
67
+ def __repr__(self):
68
+ """String representation of the job state"""
69
+ return f"{self.__class__.__name__}()"
70
+
71
+ @staticmethod
72
+ def from_path(basepath: Path, scriptname: str) -> "JobState":
73
+ """Read job state from .done or .failed files
74
+
75
+ Args:
76
+ basepath: The job directory path
77
+ scriptname: The script name (used for file naming)
78
+
79
+ Returns:
80
+ JobState.DONE if .done exists, JobStateError with details if .failed exists,
81
+ or None if neither exists.
82
+ """
83
+ donepath = basepath / f"{scriptname}.done"
84
+ failedpath = basepath / f"{scriptname}.failed"
85
+
86
+ if donepath.is_file():
87
+ return JobState.DONE
88
+
89
+ if failedpath.is_file():
90
+ content = failedpath.read_text().strip()
91
+
92
+ # Try JSON first
93
+ try:
94
+ data = json.loads(content)
95
+ if isinstance(data, dict):
96
+ # New format: failure_status field
97
+ failure_status_str = data.get("failure_status")
98
+ if failure_status_str:
99
+ try:
100
+ failure_status = JobFailureStatus[
101
+ failure_status_str.upper()
102
+ ]
103
+ return JobStateError(failure_status)
104
+ except KeyError:
105
+ pass
106
+ # Legacy format: reason field
107
+ reason = data.get("reason")
108
+ if reason:
109
+ try:
110
+ failure_status = JobFailureStatus[reason.upper()]
111
+ return JobStateError(failure_status)
112
+ except KeyError:
113
+ pass
114
+ return JobStateError(JobFailureStatus.FAILED)
115
+ except json.JSONDecodeError:
116
+ pass
117
+
118
+ # Fall back to legacy integer format
119
+ try:
120
+ code = int(content)
121
+ if code == 0:
122
+ return JobState.DONE
123
+ return JobStateError(JobFailureStatus.FAILED)
124
+ except ValueError:
125
+ logger.warning(
126
+ "Could not parse failed file %s: %s", failedpath, content
127
+ )
128
+ return JobStateError(JobFailureStatus.FAILED)
129
+
130
+ return None
131
+
132
+
133
+ class JobStateUnscheduled(JobState):
134
+ """Job is not yet scheduled"""
135
+
136
+ name = "unscheduled"
137
+ value = 0
138
+
139
+
140
+ class JobStateWaiting(JobState):
141
+ """Job is waiting for dependencies to be done"""
142
+
143
+ name = "waiting"
144
+ value = 1
145
+
146
+
147
+ class JobStateReady(JobState):
148
+ """Job is ready to run"""
149
+
150
+ name = "ready"
151
+ value = 2
152
+
153
+
154
+ class JobStateScheduled(JobState):
155
+ """Job is scheduled (e.g., in SLURM queue)"""
156
+
157
+ name = "scheduled"
158
+ value = 3
159
+
160
+
161
+ class JobStateRunning(JobState):
162
+ """Job is currently running"""
163
+
164
+ name = "running"
165
+ value = 4
166
+
167
+
168
+ class JobStateDone(JobState):
169
+ """Job has completed successfully"""
170
+
171
+ name = "done"
172
+ value = 5
173
+
174
+
175
+ class JobFailureStatus(enum.Enum):
176
+ """Reasons for job failure"""
177
+
178
+ #: Job dependency failed
179
+ DEPENDENCY = 0
180
+
181
+ #: Job failed
182
+ FAILED = 1
183
+
184
+ #: Memory
185
+ MEMORY = 2
186
+
187
+ #: Timeout (can retry for resumable tasks)
188
+ TIMEOUT = 3
189
+
190
+
191
+ class JobStateError(JobState):
192
+ """Job has failed
193
+
194
+ This state carries information about the failure reason via JobFailureStatus enum.
195
+ """
196
+
197
+ name = "error"
198
+ value = 6
199
+
200
+ def __init__(self, failure_reason: Optional[JobFailureStatus] = None):
201
+ """Create an error state, optionally with failure details
202
+
203
+ Args:
204
+ failure_reason: Optional reason for the failure (JobFailureStatus enum value)
205
+ """
206
+ self.failure_reason = failure_reason
207
+
208
+ def __repr__(self):
209
+ if self.failure_reason:
210
+ return f"JobStateError(failure_reason={self.failure_reason})"
211
+ return "JobStateError()"
212
+
213
+ def __eq__(self, other):
214
+ """Error states are equal if they have the same value
215
+
216
+ Note: We intentionally ignore failure_reason in equality comparison
217
+ to maintain backward compatibility with code that does:
218
+ if job.state == JobState.ERROR: ...
219
+ """
220
+ if isinstance(other, JobState):
221
+ return self.value == other.value
222
+ return False
223
+
224
+ def is_error(self):
225
+ return True
226
+
227
+
228
+ # FIXME: Get rid of those
229
+ # Create singleton instances for backward compatibility
230
+ # These can be used in comparisons: if state == JobState.DONE: ...
231
+ JobState.UNSCHEDULED = JobStateUnscheduled()
232
+ JobState.WAITING = JobStateWaiting()
233
+ JobState.READY = JobStateReady()
234
+ JobState.SCHEDULED = JobStateScheduled()
235
+ JobState.RUNNING = JobStateRunning()
236
+ JobState.DONE = JobStateDone()
237
+ JobState.ERROR = JobStateError() # default error without failure details
238
+
239
+
240
+ # Mapping from state name string to JobState singleton
241
+ STATE_NAME_TO_JOBSTATE = {
242
+ "unscheduled": JobState.UNSCHEDULED,
243
+ "waiting": JobState.WAITING,
244
+ "ready": JobState.READY,
245
+ "scheduled": JobState.SCHEDULED,
246
+ "running": JobState.RUNNING,
247
+ "done": JobState.DONE,
248
+ "error": JobState.ERROR,
249
+ }
250
+
251
+
252
+ # =============================================================================
253
+ # Base Job Interface
254
+ # =============================================================================
255
+
256
+
257
+ class BaseJob:
258
+ """Base interface for job information and metadata operations
259
+
260
+ This class defines the interface for job data and provides methods for
261
+ reading/writing job metadata files. Both live Job instances and
262
+ database-loaded MockJob instances should provide these attributes.
263
+
264
+ Attributes:
265
+ identifier: Unique identifier for the job (hash)
266
+ task_id: Task class identifier (string)
267
+ locator: Full task locator (identifier)
268
+ path: Path to job directory
269
+ state: Current job state (JobState object or compatible)
270
+ submittime: When job was submitted (Unix timestamp or None)
271
+ starttime: When job started running (Unix timestamp or None)
272
+ endtime: When job finished (Unix timestamp or None)
273
+ progress: List of progress updates
274
+ tags: Dictionary of tag key-value pairs
275
+ exit_code: Process exit code (optional)
276
+ retry_count: Number of retries
277
+ """
278
+
279
+ identifier: str
280
+ task_id: str
281
+ locator: str
282
+ path: Path
283
+ state: JobState
284
+ submittime: Optional[float]
285
+ starttime: Optional[float]
286
+ endtime: Optional[float]
287
+ progress: List[Dict]
288
+ tags: Dict[str, str]
289
+ exit_code: Optional[int]
290
+ retry_count: int
291
+
292
+ # -------------------------------------------------------------------------
293
+ # Static path computation (for use without a job instance)
294
+ # -------------------------------------------------------------------------
295
+
296
+ @staticmethod
297
+ def get_scriptname(task_id: str) -> str:
298
+ """Extract script name from task_id (last component after '.')"""
299
+ return task_id.rsplit(".", 1)[-1]
300
+
301
+ @staticmethod
302
+ def get_xpm_dir(job_path: Path) -> Path:
303
+ """Get .experimaestro directory path for a job path"""
304
+ return job_path / ".experimaestro"
305
+
306
+ @staticmethod
307
+ def get_metadata_path(job_path: Path) -> Path:
308
+ """Get metadata file path for a job path"""
309
+ return job_path / ".experimaestro" / "information.json"
310
+
311
+ @staticmethod
312
+ def get_pidfile(job_path: Path, scriptname: str) -> Path:
313
+ """Get PID file path"""
314
+ return job_path / f"{scriptname}.pid"
315
+
316
+ @staticmethod
317
+ def get_donefile(job_path: Path, scriptname: str) -> Path:
318
+ """Get done marker file path"""
319
+ return job_path / f"{scriptname}.done"
320
+
321
+ @staticmethod
322
+ def get_failedfile(job_path: Path, scriptname: str) -> Path:
323
+ """Get failed marker file path"""
324
+ return job_path / f"{scriptname}.failed"
325
+
326
+ # -------------------------------------------------------------------------
327
+ # Instance properties (using static methods for consistency)
328
+ # -------------------------------------------------------------------------
329
+
330
+ @property
331
+ def scriptname(self) -> str:
332
+ """The script name derived from task_id"""
333
+ return BaseJob.get_scriptname(self.task_id)
334
+
335
+ @property
336
+ def xpm_dir(self) -> Path:
337
+ """Path to the .experimaestro directory within job path"""
338
+ return BaseJob.get_xpm_dir(self.path)
339
+
340
+ @property
341
+ def metadata_path(self) -> Path:
342
+ """Path to the job metadata file"""
343
+ return BaseJob.get_metadata_path(self.path)
344
+
345
+ @property
346
+ def pidfile(self) -> Path:
347
+ """Path to the .pid file"""
348
+ return BaseJob.get_pidfile(self.path, self.scriptname)
349
+
350
+ @property
351
+ def donefile(self) -> Path:
352
+ """Path to the .done file"""
353
+ return BaseJob.get_donefile(self.path, self.scriptname)
354
+
355
+ @property
356
+ def failedfile(self) -> Path:
357
+ """Path to the .failed file"""
358
+ return BaseJob.get_failedfile(self.path, self.scriptname)
359
+
360
+ # -------------------------------------------------------------------------
361
+ # Metadata I/O
362
+ # -------------------------------------------------------------------------
363
+
364
+ def write_metadata(self, **extra_fields) -> None:
365
+ """Write or update job metadata in .experimaestro/information.json file
366
+
367
+ Automatically extracts metadata from job attributes (identifier, state,
368
+ submittime, starttime, endtime, retry_count) and writes to the metadata file.
369
+
370
+ Performs atomic write using temp file + rename. If metadata exists,
371
+ new fields are merged with existing ones. Updates last_updated timestamp.
372
+
373
+ Args:
374
+ **extra_fields: Optional extra fields (e.g., launcher, launcher_job_id, exit_code)
375
+ """
376
+ # Ensure .experimaestro directory exists
377
+ self.xpm_dir.mkdir(parents=True, exist_ok=True)
378
+ metadata_path = self.metadata_path
379
+
380
+ # Read existing metadata
381
+ existing = {}
382
+ if metadata_path.exists():
383
+ try:
384
+ with metadata_path.open("r") as f:
385
+ existing = json.load(f)
386
+ except Exception as e:
387
+ logger.warning(
388
+ "Failed to read existing metadata from %s: %s", metadata_path, e
389
+ )
390
+
391
+ # Build metadata from job attributes
392
+ fields = {
393
+ "job_id": self.identifier,
394
+ "task_id": self.task_id,
395
+ "state": self.state.name if self.state else None,
396
+ }
397
+
398
+ # Add timing information if available
399
+ if self.submittime is not None:
400
+ fields["submitted_time"] = self.submittime
401
+ if self.starttime is not None:
402
+ fields["started_time"] = self.starttime
403
+ if self.endtime is not None:
404
+ fields["ended_time"] = self.endtime
405
+
406
+ # Add exit code if available
407
+ if self.exit_code is not None:
408
+ fields["exit_code"] = self.exit_code
409
+
410
+ # Add retry count
411
+ if hasattr(self, "retry_count"):
412
+ fields["retry_count"] = self.retry_count
413
+
414
+ # Merge with extra fields (for launcher info, exit_code, etc.)
415
+ fields.update(extra_fields)
416
+
417
+ # Merge with existing and update timestamp
418
+ existing.update(fields)
419
+ existing["last_updated"] = datetime.now().timestamp()
420
+
421
+ # Atomic write
422
+ temp_path = metadata_path.with_suffix(".json.tmp")
423
+ try:
424
+ with temp_path.open("w") as f:
425
+ json.dump(existing, f, indent=2)
426
+ temp_path.replace(metadata_path)
427
+ logger.debug("Wrote metadata to %s: %s", metadata_path, list(fields.keys()))
428
+ except Exception as e:
429
+ logger.error("Failed to write metadata to %s: %s", metadata_path, e)
430
+ if temp_path.exists():
431
+ temp_path.unlink()
432
+ raise
433
+
434
+ def read_metadata(self) -> Optional[dict]:
435
+ """Read job metadata from .experimaestro/information.json file
436
+
437
+ Returns:
438
+ Dictionary of metadata fields, or None if file doesn't exist
439
+ """
440
+ metadata_path = self.metadata_path
441
+ if not metadata_path.exists():
442
+ return None
443
+
444
+ try:
445
+ with metadata_path.open("r") as f:
446
+ return json.load(f)
447
+ except Exception as e:
448
+ logger.warning("Failed to read metadata from %s: %s", metadata_path, e)
449
+ return None
450
+
451
+
452
+ # =============================================================================
453
+ # Base Experiment Interface
454
+ # =============================================================================
455
+
456
+
457
+ class BaseExperiment:
458
+ """Base interface for experiment information
459
+
460
+ This class defines the interface for experiment data. Both live experiment
461
+ instances and database-loaded MockExperiment instances should provide these attributes.
462
+
463
+ Attributes:
464
+ workdir: Path to experiment directory
465
+ current_run_id: Current/latest run ID (or None)
466
+ """
467
+
468
+ workdir: Path
469
+ current_run_id: Optional[str]
470
+
471
+ @property
472
+ def experiment_id(self) -> str:
473
+ """Experiment identifier derived from workdir name"""
474
+ return self.workdir.name
475
+
476
+
477
+ class BaseService:
478
+ """Base interface for service information
479
+
480
+ This class defines the interface for service data. Both live Service instances
481
+ and MockService instances should provide these attributes and methods.
482
+
483
+ Attributes:
484
+ id: Unique identifier for the service
485
+ state: Current service state (ServiceState enum or compatible)
486
+ """
487
+
488
+ id: str
489
+
490
+ @property
491
+ def state(self):
492
+ """Current service state"""
493
+ raise NotImplementedError
494
+
495
+ def description(self) -> str:
496
+ """Human-readable description of the service"""
497
+ raise NotImplementedError
498
+
499
+ def state_dict(self) -> dict:
500
+ """Return dictionary representation for serialization"""
501
+ raise NotImplementedError