experimaestro 1.11.1__py3-none-any.whl → 2.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of experimaestro might be problematic. Click here for more details.

Files changed (133) hide show
  1. experimaestro/__init__.py +10 -11
  2. experimaestro/annotations.py +167 -206
  3. experimaestro/cli/__init__.py +140 -16
  4. experimaestro/cli/filter.py +42 -74
  5. experimaestro/cli/jobs.py +157 -106
  6. experimaestro/cli/progress.py +269 -0
  7. experimaestro/cli/refactor.py +249 -0
  8. experimaestro/click.py +0 -1
  9. experimaestro/commandline.py +19 -3
  10. experimaestro/connectors/__init__.py +22 -3
  11. experimaestro/connectors/local.py +12 -0
  12. experimaestro/core/arguments.py +192 -37
  13. experimaestro/core/identifier.py +127 -12
  14. experimaestro/core/objects/__init__.py +6 -0
  15. experimaestro/core/objects/config.py +702 -285
  16. experimaestro/core/objects/config_walk.py +24 -6
  17. experimaestro/core/serialization.py +91 -34
  18. experimaestro/core/serializers.py +1 -8
  19. experimaestro/core/subparameters.py +164 -0
  20. experimaestro/core/types.py +198 -83
  21. experimaestro/exceptions.py +26 -0
  22. experimaestro/experiments/cli.py +107 -25
  23. experimaestro/generators.py +50 -9
  24. experimaestro/huggingface.py +3 -1
  25. experimaestro/launcherfinder/parser.py +29 -0
  26. experimaestro/launcherfinder/registry.py +3 -3
  27. experimaestro/launchers/__init__.py +26 -1
  28. experimaestro/launchers/direct.py +12 -0
  29. experimaestro/launchers/slurm/base.py +154 -2
  30. experimaestro/mkdocs/base.py +6 -8
  31. experimaestro/mkdocs/metaloader.py +0 -1
  32. experimaestro/mypy.py +452 -7
  33. experimaestro/notifications.py +75 -16
  34. experimaestro/progress.py +404 -0
  35. experimaestro/rpyc.py +0 -1
  36. experimaestro/run.py +19 -6
  37. experimaestro/scheduler/__init__.py +18 -1
  38. experimaestro/scheduler/base.py +504 -959
  39. experimaestro/scheduler/dependencies.py +43 -28
  40. experimaestro/scheduler/dynamic_outputs.py +259 -130
  41. experimaestro/scheduler/experiment.py +582 -0
  42. experimaestro/scheduler/interfaces.py +474 -0
  43. experimaestro/scheduler/jobs.py +485 -0
  44. experimaestro/scheduler/services.py +186 -12
  45. experimaestro/scheduler/signal_handler.py +32 -0
  46. experimaestro/scheduler/state.py +1 -1
  47. experimaestro/scheduler/state_db.py +388 -0
  48. experimaestro/scheduler/state_provider.py +2345 -0
  49. experimaestro/scheduler/state_sync.py +834 -0
  50. experimaestro/scheduler/workspace.py +52 -10
  51. experimaestro/scriptbuilder.py +7 -0
  52. experimaestro/server/__init__.py +153 -32
  53. experimaestro/server/data/index.css +0 -125
  54. experimaestro/server/data/index.css.map +1 -1
  55. experimaestro/server/data/index.js +194 -58
  56. experimaestro/server/data/index.js.map +1 -1
  57. experimaestro/settings.py +47 -6
  58. experimaestro/sphinx/__init__.py +3 -3
  59. experimaestro/taskglobals.py +20 -0
  60. experimaestro/tests/conftest.py +80 -0
  61. experimaestro/tests/core/test_generics.py +2 -2
  62. experimaestro/tests/identifier_stability.json +45 -0
  63. experimaestro/tests/launchers/bin/sacct +6 -2
  64. experimaestro/tests/launchers/bin/sbatch +4 -2
  65. experimaestro/tests/launchers/common.py +2 -2
  66. experimaestro/tests/launchers/test_slurm.py +80 -0
  67. experimaestro/tests/restart.py +1 -1
  68. experimaestro/tests/tasks/all.py +7 -0
  69. experimaestro/tests/tasks/test_dynamic.py +231 -0
  70. experimaestro/tests/test_checkers.py +2 -2
  71. experimaestro/tests/test_cli_jobs.py +615 -0
  72. experimaestro/tests/test_dependencies.py +11 -17
  73. experimaestro/tests/test_deprecated.py +630 -0
  74. experimaestro/tests/test_environment.py +200 -0
  75. experimaestro/tests/test_experiment.py +3 -3
  76. experimaestro/tests/test_file_progress.py +425 -0
  77. experimaestro/tests/test_file_progress_integration.py +477 -0
  78. experimaestro/tests/test_forward.py +3 -3
  79. experimaestro/tests/test_generators.py +93 -0
  80. experimaestro/tests/test_identifier.py +520 -169
  81. experimaestro/tests/test_identifier_stability.py +458 -0
  82. experimaestro/tests/test_instance.py +16 -21
  83. experimaestro/tests/test_multitoken.py +442 -0
  84. experimaestro/tests/test_mypy.py +433 -0
  85. experimaestro/tests/test_objects.py +314 -30
  86. experimaestro/tests/test_outputs.py +8 -8
  87. experimaestro/tests/test_param.py +22 -26
  88. experimaestro/tests/test_partial_paths.py +231 -0
  89. experimaestro/tests/test_progress.py +2 -50
  90. experimaestro/tests/test_resumable_task.py +480 -0
  91. experimaestro/tests/test_serializers.py +141 -60
  92. experimaestro/tests/test_state_db.py +434 -0
  93. experimaestro/tests/test_subparameters.py +160 -0
  94. experimaestro/tests/test_tags.py +151 -15
  95. experimaestro/tests/test_tasks.py +137 -160
  96. experimaestro/tests/test_token_locking.py +252 -0
  97. experimaestro/tests/test_tokens.py +25 -19
  98. experimaestro/tests/test_types.py +133 -11
  99. experimaestro/tests/test_validation.py +19 -19
  100. experimaestro/tests/test_workspace_triggers.py +158 -0
  101. experimaestro/tests/token_reschedule.py +5 -3
  102. experimaestro/tests/utils.py +2 -2
  103. experimaestro/tokens.py +154 -57
  104. experimaestro/tools/diff.py +8 -1
  105. experimaestro/tui/__init__.py +8 -0
  106. experimaestro/tui/app.py +2303 -0
  107. experimaestro/tui/app.tcss +353 -0
  108. experimaestro/tui/log_viewer.py +228 -0
  109. experimaestro/typingutils.py +11 -2
  110. experimaestro/utils/__init__.py +23 -0
  111. experimaestro/utils/environment.py +148 -0
  112. experimaestro/utils/git.py +129 -0
  113. experimaestro/utils/resources.py +1 -1
  114. experimaestro/version.py +34 -0
  115. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/METADATA +70 -39
  116. experimaestro-2.0.0b4.dist-info/RECORD +181 -0
  117. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info}/WHEEL +1 -1
  118. experimaestro-2.0.0b4.dist-info/entry_points.txt +16 -0
  119. experimaestro/compat.py +0 -6
  120. experimaestro/core/objects.pyi +0 -225
  121. experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
  122. experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
  123. experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
  124. experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
  125. experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
  126. experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
  127. experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
  128. experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
  129. experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
  130. experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
  131. experimaestro-1.11.1.dist-info/RECORD +0 -158
  132. experimaestro-1.11.1.dist-info/entry_points.txt +0 -17
  133. {experimaestro-1.11.1.dist-info → experimaestro-2.0.0b4.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,231 @@
1
+ """Integration tests for partial paths and cleanup"""
2
+
3
+ from pathlib import Path
4
+ from experimaestro import (
5
+ Task,
6
+ Param,
7
+ Meta,
8
+ field,
9
+ PathGenerator,
10
+ subparameters,
11
+ param_group,
12
+ )
13
+ from experimaestro.scheduler import JobState
14
+
15
+ from .utils import TemporaryExperiment, TemporaryDirectory
16
+
17
+
18
+ # Define parameter groups
19
+ iter_group = param_group("iter")
20
+
21
+
22
+ class TaskWithPartial(Task):
23
+ """Task that uses subparameters for partial paths"""
24
+
25
+ # Define a subparameters set
26
+ checkpoints = subparameters(exclude_groups=[iter_group])
27
+
28
+ # Parameter in iter_group - excluded from partial identifier
29
+ max_iter: Param[int] = field(groups=[iter_group])
30
+
31
+ # Parameter not in any group - included in partial identifier
32
+ learning_rate: Param[float]
33
+
34
+ # Path generated using the partial identifier
35
+ checkpoint_path: Meta[Path] = field(
36
+ default_factory=PathGenerator("checkpoint", partial=checkpoints)
37
+ )
38
+
39
+ def execute(self):
40
+ # Create the checkpoint directory and a marker file
41
+ self.checkpoint_path.mkdir(parents=True, exist_ok=True)
42
+ (self.checkpoint_path / "model.pt").write_text("checkpoint data")
43
+
44
+
45
+ def test_partial_path_created():
46
+ """Test that partial paths are correctly created during task execution"""
47
+ with TemporaryDirectory(prefix="xpm", suffix="partial") as workdir:
48
+ with TemporaryExperiment("partial_test", workdir=workdir, maxwait=30):
49
+ task = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
50
+
51
+ assert task.__xpm__.job.state == JobState.DONE
52
+
53
+ # Verify the partial path was created
54
+ assert task.checkpoint_path.exists()
55
+ assert (task.checkpoint_path / "model.pt").exists()
56
+
57
+ # Verify the path is in the partials directory
58
+ partials_path = workdir / "partials"
59
+ assert partials_path.exists()
60
+
61
+ # The checkpoint_path should be under partials/TASK_ID/checkpoints/PARTIAL_ID/
62
+ # Use resolve() to handle symlinks like /var -> /private/var on macOS
63
+ assert task.checkpoint_path.resolve().is_relative_to(partials_path.resolve())
64
+
65
+
66
+ def test_partial_path_shared_across_tasks():
67
+ """Test that tasks with same non-excluded params share partial paths"""
68
+ with TemporaryDirectory(prefix="xpm", suffix="partial_shared") as workdir:
69
+ with TemporaryExperiment("partial_shared", workdir=workdir, maxwait=30):
70
+ # Submit two tasks with different max_iter but same learning_rate
71
+ task1 = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
72
+ task2 = TaskWithPartial.C(max_iter=200, learning_rate=0.1).submit()
73
+
74
+ assert task1.__xpm__.job.state == JobState.DONE
75
+ assert task2.__xpm__.job.state == JobState.DONE
76
+
77
+ # They should share the same partial path
78
+ assert task1.checkpoint_path == task2.checkpoint_path
79
+
80
+
81
+ def test_partial_path_different_for_different_params():
82
+ """Test that tasks with different non-excluded params have different partial paths"""
83
+ with TemporaryDirectory(prefix="xpm", suffix="partial_diff") as workdir:
84
+ with TemporaryExperiment("partial_diff", workdir=workdir, maxwait=30):
85
+ # Submit two tasks with different learning_rate
86
+ task1 = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
87
+ task2 = TaskWithPartial.C(max_iter=100, learning_rate=0.2).submit()
88
+
89
+ assert task1.__xpm__.job.state == JobState.DONE
90
+ assert task2.__xpm__.job.state == JobState.DONE
91
+
92
+ # They should have different partial paths
93
+ assert task1.checkpoint_path != task2.checkpoint_path
94
+
95
+
96
+ def test_partial_registered_in_database():
97
+ """Test that partials are registered in the database when jobs are submitted"""
98
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
99
+ from experimaestro.scheduler.state_db import PartialModel, JobPartialModel
100
+
101
+ with TemporaryDirectory(prefix="xpm", suffix="partial_db") as workdir:
102
+ with TemporaryExperiment("partial_db", workdir=workdir, maxwait=30) as xp:
103
+ task = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
104
+
105
+ assert task.__xpm__.job.state == JobState.DONE
106
+
107
+ # Get the state provider and check database
108
+ # Note: Must use read_only=False since the experiment left a singleton
109
+ # with read_only=False that hasn't been closed yet
110
+ provider = WorkspaceStateProvider.get_instance(workdir, read_only=False)
111
+
112
+ try:
113
+ with provider.workspace_db.bind_ctx([PartialModel, JobPartialModel]):
114
+ # Check that partial is registered
115
+ partials = list(PartialModel.select())
116
+ assert len(partials) == 1
117
+ assert partials[0].subparameters_name == "checkpoints"
118
+
119
+ # Check that job is linked to partial
120
+ job_partials = list(JobPartialModel.select())
121
+ assert len(job_partials) == 1
122
+ assert job_partials[0].partial_id == partials[0].partial_id
123
+ assert job_partials[0].experiment_id == xp.workdir.name
124
+ finally:
125
+ provider.close()
126
+
127
+
128
+ def test_orphan_partial_cleanup():
129
+ """Test that orphan partials are cleaned up when jobs are deleted"""
130
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
131
+ from experimaestro.scheduler.state_db import PartialModel, JobPartialModel
132
+
133
+ with TemporaryDirectory(prefix="xpm", suffix="partial_cleanup") as workdir:
134
+ with TemporaryExperiment("partial_cleanup", workdir=workdir, maxwait=30) as xp:
135
+ task = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
136
+
137
+ assert task.__xpm__.job.state == JobState.DONE
138
+ checkpoint_path = task.checkpoint_path
139
+
140
+ # Verify partial path exists
141
+ assert checkpoint_path.exists()
142
+
143
+ # Get the state provider
144
+ provider = WorkspaceStateProvider.get_instance(workdir, read_only=False)
145
+
146
+ try:
147
+ # Delete the job
148
+ with provider.workspace_db.bind_ctx([PartialModel, JobPartialModel]):
149
+ job_partials = list(JobPartialModel.select())
150
+ assert len(job_partials) == 1
151
+
152
+ # Delete job (this also removes job-partial link)
153
+ provider.delete_job(
154
+ task.__xpm__.job.identifier,
155
+ xp.workdir.name,
156
+ xp.run_id,
157
+ )
158
+
159
+ # Now the partial should be orphaned
160
+ orphans = provider.get_orphan_partials()
161
+ assert len(orphans) == 1
162
+
163
+ # Cleanup orphan partials
164
+ deleted = provider.cleanup_orphan_partials(perform=True)
165
+ assert len(deleted) == 1
166
+
167
+ # Verify partial directory is deleted
168
+ assert not checkpoint_path.exists()
169
+
170
+ # Verify partial is removed from database
171
+ with provider.workspace_db.bind_ctx([PartialModel]):
172
+ partials = list(PartialModel.select())
173
+ assert len(partials) == 0
174
+ finally:
175
+ provider.close()
176
+
177
+
178
+ def test_shared_partial_not_orphaned():
179
+ """Test that partials shared by multiple jobs are not orphaned until all jobs deleted"""
180
+ from experimaestro.scheduler.state_provider import WorkspaceStateProvider
181
+
182
+ with TemporaryDirectory(prefix="xpm", suffix="partial_shared_cleanup") as workdir:
183
+ with TemporaryExperiment(
184
+ "partial_shared_cleanup", workdir=workdir, maxwait=30
185
+ ) as xp:
186
+ # Submit two tasks with same learning_rate (same partial)
187
+ task1 = TaskWithPartial.C(max_iter=100, learning_rate=0.1).submit()
188
+ task2 = TaskWithPartial.C(max_iter=200, learning_rate=0.1).submit()
189
+
190
+ assert task1.__xpm__.job.state == JobState.DONE
191
+ assert task2.__xpm__.job.state == JobState.DONE
192
+
193
+ # They share the same partial path
194
+ checkpoint_path = task1.checkpoint_path
195
+ assert checkpoint_path == task2.checkpoint_path
196
+ assert checkpoint_path.exists()
197
+
198
+ provider = WorkspaceStateProvider.get_instance(workdir, read_only=False)
199
+
200
+ try:
201
+ # Delete first job
202
+ provider.delete_job(
203
+ task1.__xpm__.job.identifier,
204
+ xp.workdir.name,
205
+ xp.run_id,
206
+ )
207
+
208
+ # Partial should NOT be orphaned (still used by task2)
209
+ orphans = provider.get_orphan_partials()
210
+ assert len(orphans) == 0
211
+
212
+ # Partial directory should still exist
213
+ assert checkpoint_path.exists()
214
+
215
+ # Delete second job
216
+ provider.delete_job(
217
+ task2.__xpm__.job.identifier,
218
+ xp.workdir.name,
219
+ xp.run_id,
220
+ )
221
+
222
+ # Now partial should be orphaned
223
+ orphans = provider.get_orphan_partials()
224
+ assert len(orphans) == 1
225
+
226
+ # Cleanup
227
+ deleted = provider.cleanup_orphan_partials(perform=True)
228
+ assert len(deleted) == 1
229
+ assert not checkpoint_path.exists()
230
+ finally:
231
+ provider.close()
@@ -72,7 +72,7 @@ def test_progress_basic():
72
72
  listener = ProgressListener()
73
73
  xp.scheduler.addlistener(listener)
74
74
 
75
- out = ProgressingTask().submit()
75
+ out = ProgressingTask.C().submit()
76
76
  path = out.path # type: Path
77
77
  job = out.__xpm__.job
78
78
 
@@ -90,54 +90,6 @@ def test_progress_basic():
90
90
  assert info.progress == v
91
91
 
92
92
 
93
- def test_progress_multiple():
94
- """Test that even with two schedulers, we get notified"""
95
- max_wait = 5
96
-
97
- with TemporaryExperiment(
98
- "progress-progress-multiple-1", maxwait=max_wait, port=0
99
- ) as xp1:
100
- assert xp1.server is not None
101
- assert xp1.server.port > 0
102
-
103
- listener1 = ProgressListener()
104
- xp1.scheduler.addlistener(listener1)
105
-
106
- out = ProgressingTask().submit()
107
- path = out.path # type: Path
108
- job = out.__xpm__.job
109
-
110
- logger.info("Waiting for job to start (1)")
111
- while job.state.notstarted():
112
- time.sleep(1e-2)
113
-
114
- with TemporaryExperiment(
115
- "progress-progress-multiple-2",
116
- workdir=xp1.workdir,
117
- maxwait=max_wait,
118
- port=0,
119
- ) as xp2:
120
- assert xp2.server is not None
121
- assert xp2.server.port > 0
122
- listener2 = ProgressListener()
123
- xp2.scheduler.addlistener(listener2)
124
-
125
- out = ProgressingTask().submit()
126
- job = out.__xpm__.job # type: CommandLineJob
127
- logger.info("Waiting for job to start (2)")
128
- while job.state.notstarted():
129
- time.sleep(1e-2)
130
-
131
- # Both schedulers should receive the job progress information
132
- logger.info("Checking job progress")
133
- progresses = [i / 10.0 for i in range(11)]
134
- for v in progresses:
135
- writeprogress(path, v)
136
- if v < 1:
137
- assert listener1.progresses.get()[0].progress == v
138
- assert listener2.progresses.get()[0].progress == v
139
-
140
-
141
93
  NestedTasks = Tuple[str, Union[int, List["NestedTasks"]]]
142
94
 
143
95
 
@@ -217,7 +169,7 @@ def test_progress_nested():
217
169
  listener = ProgressListener()
218
170
  xp.scheduler.addlistener(listener)
219
171
 
220
- out = NestedProgressingTask().submit()
172
+ out = NestedProgressingTask.C().submit()
221
173
  job = out.__xpm__.job
222
174
  path = out.path # type: Path
223
175