experimaestro 2.0.0a8__py3-none-any.whl → 2.0.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of experimaestro might be problematic. Click here for more details.
- experimaestro/__init__.py +10 -11
- experimaestro/annotations.py +167 -206
- experimaestro/cli/__init__.py +278 -7
- experimaestro/cli/filter.py +42 -74
- experimaestro/cli/jobs.py +157 -106
- experimaestro/cli/refactor.py +249 -0
- experimaestro/click.py +0 -1
- experimaestro/commandline.py +19 -3
- experimaestro/connectors/__init__.py +20 -1
- experimaestro/connectors/local.py +12 -0
- experimaestro/core/arguments.py +182 -46
- experimaestro/core/identifier.py +107 -6
- experimaestro/core/objects/__init__.py +6 -0
- experimaestro/core/objects/config.py +542 -25
- experimaestro/core/objects/config_walk.py +20 -0
- experimaestro/core/serialization.py +91 -34
- experimaestro/core/subparameters.py +164 -0
- experimaestro/core/types.py +175 -38
- experimaestro/exceptions.py +26 -0
- experimaestro/experiments/cli.py +111 -25
- experimaestro/generators.py +50 -9
- experimaestro/huggingface.py +3 -1
- experimaestro/launcherfinder/parser.py +29 -0
- experimaestro/launchers/__init__.py +26 -1
- experimaestro/launchers/direct.py +12 -0
- experimaestro/launchers/slurm/base.py +154 -2
- experimaestro/mkdocs/metaloader.py +0 -1
- experimaestro/mypy.py +452 -7
- experimaestro/notifications.py +63 -13
- experimaestro/progress.py +0 -2
- experimaestro/rpyc.py +0 -1
- experimaestro/run.py +19 -6
- experimaestro/scheduler/base.py +510 -125
- experimaestro/scheduler/dependencies.py +43 -28
- experimaestro/scheduler/dynamic_outputs.py +259 -130
- experimaestro/scheduler/experiment.py +256 -31
- experimaestro/scheduler/interfaces.py +501 -0
- experimaestro/scheduler/jobs.py +216 -206
- experimaestro/scheduler/remote/__init__.py +31 -0
- experimaestro/scheduler/remote/client.py +874 -0
- experimaestro/scheduler/remote/protocol.py +467 -0
- experimaestro/scheduler/remote/server.py +423 -0
- experimaestro/scheduler/remote/sync.py +144 -0
- experimaestro/scheduler/services.py +323 -23
- experimaestro/scheduler/state_db.py +437 -0
- experimaestro/scheduler/state_provider.py +2766 -0
- experimaestro/scheduler/state_sync.py +891 -0
- experimaestro/scheduler/workspace.py +52 -10
- experimaestro/scriptbuilder.py +7 -0
- experimaestro/server/__init__.py +147 -57
- experimaestro/server/data/index.css +0 -125
- experimaestro/server/data/index.css.map +1 -1
- experimaestro/server/data/index.js +194 -58
- experimaestro/server/data/index.js.map +1 -1
- experimaestro/settings.py +44 -5
- experimaestro/sphinx/__init__.py +3 -3
- experimaestro/taskglobals.py +20 -0
- experimaestro/tests/conftest.py +80 -0
- experimaestro/tests/core/test_generics.py +2 -2
- experimaestro/tests/identifier_stability.json +45 -0
- experimaestro/tests/launchers/bin/sacct +6 -2
- experimaestro/tests/launchers/bin/sbatch +4 -2
- experimaestro/tests/launchers/test_slurm.py +80 -0
- experimaestro/tests/tasks/test_dynamic.py +231 -0
- experimaestro/tests/test_cli_jobs.py +615 -0
- experimaestro/tests/test_deprecated.py +630 -0
- experimaestro/tests/test_environment.py +200 -0
- experimaestro/tests/test_file_progress_integration.py +1 -1
- experimaestro/tests/test_forward.py +3 -3
- experimaestro/tests/test_identifier.py +372 -41
- experimaestro/tests/test_identifier_stability.py +458 -0
- experimaestro/tests/test_instance.py +3 -3
- experimaestro/tests/test_multitoken.py +442 -0
- experimaestro/tests/test_mypy.py +433 -0
- experimaestro/tests/test_objects.py +312 -5
- experimaestro/tests/test_outputs.py +2 -2
- experimaestro/tests/test_param.py +8 -12
- experimaestro/tests/test_partial_paths.py +231 -0
- experimaestro/tests/test_progress.py +0 -48
- experimaestro/tests/test_remote_state.py +671 -0
- experimaestro/tests/test_resumable_task.py +480 -0
- experimaestro/tests/test_serializers.py +141 -1
- experimaestro/tests/test_state_db.py +434 -0
- experimaestro/tests/test_subparameters.py +160 -0
- experimaestro/tests/test_tags.py +136 -0
- experimaestro/tests/test_tasks.py +107 -121
- experimaestro/tests/test_token_locking.py +252 -0
- experimaestro/tests/test_tokens.py +17 -13
- experimaestro/tests/test_types.py +123 -1
- experimaestro/tests/test_workspace_triggers.py +158 -0
- experimaestro/tests/token_reschedule.py +4 -2
- experimaestro/tests/utils.py +2 -2
- experimaestro/tokens.py +154 -57
- experimaestro/tools/diff.py +1 -1
- experimaestro/tui/__init__.py +8 -0
- experimaestro/tui/app.py +2395 -0
- experimaestro/tui/app.tcss +353 -0
- experimaestro/tui/log_viewer.py +228 -0
- experimaestro/utils/__init__.py +23 -0
- experimaestro/utils/environment.py +148 -0
- experimaestro/utils/git.py +129 -0
- experimaestro/utils/resources.py +1 -1
- experimaestro/version.py +34 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/METADATA +68 -38
- experimaestro-2.0.0b8.dist-info/RECORD +187 -0
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/WHEEL +1 -1
- experimaestro-2.0.0b8.dist-info/entry_points.txt +16 -0
- experimaestro/compat.py +0 -6
- experimaestro/core/objects.pyi +0 -221
- experimaestro/server/data/0c35d18bf06992036b69.woff2 +0 -0
- experimaestro/server/data/219aa9140e099e6c72ed.woff2 +0 -0
- experimaestro/server/data/3a4004a46a653d4b2166.woff +0 -0
- experimaestro/server/data/3baa5b8f3469222b822d.woff +0 -0
- experimaestro/server/data/4d73cb90e394b34b7670.woff +0 -0
- experimaestro/server/data/4ef4218c522f1eb6b5b1.woff2 +0 -0
- experimaestro/server/data/5d681e2edae8c60630db.woff +0 -0
- experimaestro/server/data/6f420cf17cc0d7676fad.woff2 +0 -0
- experimaestro/server/data/c380809fd3677d7d6903.woff2 +0 -0
- experimaestro/server/data/f882956fd323fd322f31.woff +0 -0
- experimaestro-2.0.0a8.dist-info/RECORD +0 -166
- experimaestro-2.0.0a8.dist-info/entry_points.txt +0 -17
- {experimaestro-2.0.0a8.dist-info → experimaestro-2.0.0b8.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""Tests for multi-token task scheduling
|
|
2
|
+
|
|
3
|
+
Verifies that:
|
|
4
|
+
1. Token limits are never exceeded when using multiple different tokens
|
|
5
|
+
2. No deadlock occurs when tasks require combinations of tokens
|
|
6
|
+
3. All tasks complete successfully
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
import logging
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from dataclasses import dataclass, field as dataclass_field
|
|
14
|
+
from typing import List
|
|
15
|
+
|
|
16
|
+
from experimaestro import Task, Param, field
|
|
17
|
+
from experimaestro.tokens import CounterToken
|
|
18
|
+
from experimaestro.scheduler import JobState
|
|
19
|
+
from .utils import TemporaryExperiment
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class TokenUsageTracker:
|
|
27
|
+
"""Thread-safe tracker for token usage during task execution"""
|
|
28
|
+
|
|
29
|
+
memory_limit: int
|
|
30
|
+
cpu_limit: int
|
|
31
|
+
memory_used: int = 0
|
|
32
|
+
cpu_used: int = 0
|
|
33
|
+
max_memory_used: int = 0
|
|
34
|
+
max_cpu_used: int = 0
|
|
35
|
+
violations: List[str] = dataclass_field(default_factory=list)
|
|
36
|
+
lock: threading.Lock = dataclass_field(default_factory=threading.Lock)
|
|
37
|
+
|
|
38
|
+
def acquire(self, memory: int, cpu: int, task_id: str):
|
|
39
|
+
"""Record token acquisition"""
|
|
40
|
+
with self.lock:
|
|
41
|
+
self.memory_used += memory
|
|
42
|
+
self.cpu_used += cpu
|
|
43
|
+
self.max_memory_used = max(self.max_memory_used, self.memory_used)
|
|
44
|
+
self.max_cpu_used = max(self.max_cpu_used, self.cpu_used)
|
|
45
|
+
|
|
46
|
+
if self.memory_used > self.memory_limit:
|
|
47
|
+
self.violations.append(
|
|
48
|
+
f"Memory limit exceeded: {self.memory_used} > {self.memory_limit} "
|
|
49
|
+
f"(task {task_id})"
|
|
50
|
+
)
|
|
51
|
+
if self.cpu_used > self.cpu_limit:
|
|
52
|
+
self.violations.append(
|
|
53
|
+
f"CPU limit exceeded: {self.cpu_used} > {self.cpu_limit} "
|
|
54
|
+
f"(task {task_id})"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
logger.debug(
|
|
58
|
+
"Task %s acquired: memory=%d/%d, cpu=%d/%d",
|
|
59
|
+
task_id,
|
|
60
|
+
self.memory_used,
|
|
61
|
+
self.memory_limit,
|
|
62
|
+
self.cpu_used,
|
|
63
|
+
self.cpu_limit,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def release(self, memory: int, cpu: int, task_id: str):
|
|
67
|
+
"""Record token release"""
|
|
68
|
+
with self.lock:
|
|
69
|
+
self.memory_used -= memory
|
|
70
|
+
self.cpu_used -= cpu
|
|
71
|
+
logger.debug(
|
|
72
|
+
"Task %s released: memory=%d/%d, cpu=%d/%d",
|
|
73
|
+
task_id,
|
|
74
|
+
self.memory_used,
|
|
75
|
+
self.memory_limit,
|
|
76
|
+
self.cpu_used,
|
|
77
|
+
self.cpu_limit,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class MultiTokenTask(Task):
|
|
82
|
+
"""Task that requires multiple tokens and tracks usage"""
|
|
83
|
+
|
|
84
|
+
task_id: Param[int]
|
|
85
|
+
memory_tokens: Param[int]
|
|
86
|
+
cpu_tokens: Param[int]
|
|
87
|
+
execution_time: Param[float] = field(default=0.01)
|
|
88
|
+
tracker_path: Param[Path]
|
|
89
|
+
|
|
90
|
+
def execute(self):
|
|
91
|
+
import json
|
|
92
|
+
|
|
93
|
+
# Use files to track concurrent usage
|
|
94
|
+
usage_file = self.tracker_path.parent / f"usage_{self.task_id}.json"
|
|
95
|
+
|
|
96
|
+
# Record start
|
|
97
|
+
start_info = {
|
|
98
|
+
"task_id": self.task_id,
|
|
99
|
+
"memory": self.memory_tokens,
|
|
100
|
+
"cpu": self.cpu_tokens,
|
|
101
|
+
"start_time": time.time(),
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
usage_file.write_text(json.dumps(start_info))
|
|
105
|
+
|
|
106
|
+
# Simulate work
|
|
107
|
+
time.sleep(self.execution_time)
|
|
108
|
+
|
|
109
|
+
# Record end
|
|
110
|
+
start_info["end_time"] = time.time()
|
|
111
|
+
usage_file.write_text(json.dumps(start_info))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def analyze_execution(tracker_path: Path, memory_limit: int, cpu_limit: int):
|
|
115
|
+
"""Analyze task execution logs to verify token constraints were respected"""
|
|
116
|
+
import json
|
|
117
|
+
|
|
118
|
+
usage_files = list(tracker_path.parent.glob("usage_*.json"))
|
|
119
|
+
events = []
|
|
120
|
+
|
|
121
|
+
for f in usage_files:
|
|
122
|
+
try:
|
|
123
|
+
data = json.loads(f.read_text())
|
|
124
|
+
events.append(("start", data["start_time"], data["memory"], data["cpu"]))
|
|
125
|
+
if "end_time" in data:
|
|
126
|
+
events.append(("end", data["end_time"], -data["memory"], -data["cpu"]))
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning("Could not read %s: %s", f, e)
|
|
129
|
+
|
|
130
|
+
# Sort by time
|
|
131
|
+
events.sort(key=lambda x: x[1])
|
|
132
|
+
|
|
133
|
+
# Simulate execution
|
|
134
|
+
memory_used = 0
|
|
135
|
+
cpu_used = 0
|
|
136
|
+
max_memory = 0
|
|
137
|
+
max_cpu = 0
|
|
138
|
+
violations = []
|
|
139
|
+
|
|
140
|
+
for _event_type, timestamp, memory_delta, cpu_delta in events:
|
|
141
|
+
memory_used += memory_delta
|
|
142
|
+
cpu_used += cpu_delta
|
|
143
|
+
max_memory = max(max_memory, memory_used)
|
|
144
|
+
max_cpu = max(max_cpu, cpu_used)
|
|
145
|
+
|
|
146
|
+
if memory_used > memory_limit:
|
|
147
|
+
violations.append(f"Memory exceeded at {timestamp}: {memory_used}")
|
|
148
|
+
if cpu_used > cpu_limit:
|
|
149
|
+
violations.append(f"CPU exceeded at {timestamp}: {cpu_used}")
|
|
150
|
+
|
|
151
|
+
return max_memory, max_cpu, violations
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_multitoken_basic():
|
|
155
|
+
"""Test basic multi-token scheduling with two tokens"""
|
|
156
|
+
import json
|
|
157
|
+
|
|
158
|
+
with TemporaryExperiment("multitoken", maxwait=60) as xp:
|
|
159
|
+
# Create two tokens: memory (4 units) and cpu (2 cores)
|
|
160
|
+
memory_limit = 4
|
|
161
|
+
cpu_limit = 2
|
|
162
|
+
|
|
163
|
+
memory_token = CounterToken("memory", xp.workdir / "token_memory", memory_limit)
|
|
164
|
+
cpu_token = CounterToken("cpu", xp.workdir / "token_cpu", cpu_limit)
|
|
165
|
+
|
|
166
|
+
# Create tracker info file
|
|
167
|
+
tracker_path = xp.workdir / "tracker.json"
|
|
168
|
+
tracker_path.write_text(
|
|
169
|
+
json.dumps({"memory_limit": memory_limit, "cpu_limit": cpu_limit})
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
tasks = []
|
|
173
|
+
|
|
174
|
+
# Create tasks with various token requirements:
|
|
175
|
+
# - Some need 1 memory + 1 cpu
|
|
176
|
+
# - Some need 2 memory + 1 cpu
|
|
177
|
+
# - Some need 1 memory + 2 cpu
|
|
178
|
+
task_configs = [
|
|
179
|
+
(1, 1), # Task 0: 1 mem, 1 cpu
|
|
180
|
+
(2, 1), # Task 1: 2 mem, 1 cpu
|
|
181
|
+
(1, 2), # Task 2: 1 mem, 2 cpu (uses all CPUs)
|
|
182
|
+
(1, 1), # Task 3: 1 mem, 1 cpu
|
|
183
|
+
(2, 1), # Task 4: 2 mem, 1 cpu
|
|
184
|
+
(1, 1), # Task 5: 1 mem, 1 cpu
|
|
185
|
+
(3, 1), # Task 6: 3 mem, 1 cpu
|
|
186
|
+
(1, 1), # Task 7: 1 mem, 1 cpu
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for i, (mem, cpu) in enumerate(task_configs):
|
|
190
|
+
task = MultiTokenTask.C(
|
|
191
|
+
task_id=i,
|
|
192
|
+
memory_tokens=mem,
|
|
193
|
+
cpu_tokens=cpu,
|
|
194
|
+
execution_time=0.05,
|
|
195
|
+
tracker_path=tracker_path,
|
|
196
|
+
)
|
|
197
|
+
task.add_dependencies(memory_token.dependency(mem))
|
|
198
|
+
task.add_dependencies(cpu_token.dependency(cpu))
|
|
199
|
+
tasks.append(task.submit())
|
|
200
|
+
|
|
201
|
+
# Wait for all tasks to complete
|
|
202
|
+
xp.wait()
|
|
203
|
+
|
|
204
|
+
# Verify all tasks completed successfully
|
|
205
|
+
for i, task in enumerate(tasks):
|
|
206
|
+
state = task.__xpm__.job.state
|
|
207
|
+
assert state == JobState.DONE, f"Task {i} ended with state {state}"
|
|
208
|
+
|
|
209
|
+
# Analyze execution to verify token limits
|
|
210
|
+
max_mem, max_cpu, violations = analyze_execution(
|
|
211
|
+
tracker_path, memory_limit, cpu_limit
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
logger.info("Max memory used: %d/%d", max_mem, memory_limit)
|
|
215
|
+
logger.info("Max CPU used: %d/%d", max_cpu, cpu_limit)
|
|
216
|
+
|
|
217
|
+
assert not violations, f"Token violations detected: {violations}"
|
|
218
|
+
assert max_mem <= memory_limit, f"Memory limit exceeded: {max_mem}"
|
|
219
|
+
assert max_cpu <= cpu_limit, f"CPU limit exceeded: {max_cpu}"
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def test_multitoken_stress():
|
|
223
|
+
"""Stress test with many tasks requiring various token combinations
|
|
224
|
+
|
|
225
|
+
This test submits many tasks concurrently to stress the token
|
|
226
|
+
acquisition/release mechanism and verify no deadlocks occur.
|
|
227
|
+
"""
|
|
228
|
+
import json
|
|
229
|
+
|
|
230
|
+
with TemporaryExperiment("multitoken_stress", maxwait=120) as xp:
|
|
231
|
+
# Create tokens with limited capacity to force contention
|
|
232
|
+
memory_limit = 8
|
|
233
|
+
cpu_limit = 4
|
|
234
|
+
|
|
235
|
+
memory_token = CounterToken(
|
|
236
|
+
"memory_stress", xp.workdir / "token_memory", memory_limit
|
|
237
|
+
)
|
|
238
|
+
cpu_token = CounterToken("cpu_stress", xp.workdir / "token_cpu", cpu_limit)
|
|
239
|
+
|
|
240
|
+
tracker_path = xp.workdir / "tracker.json"
|
|
241
|
+
tracker_path.write_text(
|
|
242
|
+
json.dumps({"memory_limit": memory_limit, "cpu_limit": cpu_limit})
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
tasks = []
|
|
246
|
+
num_tasks = 20
|
|
247
|
+
|
|
248
|
+
# Generate diverse task configurations
|
|
249
|
+
import random
|
|
250
|
+
|
|
251
|
+
random.seed(42) # Reproducible
|
|
252
|
+
|
|
253
|
+
for i in range(num_tasks):
|
|
254
|
+
# Random token requirements within limits
|
|
255
|
+
mem = random.randint(1, min(4, memory_limit))
|
|
256
|
+
cpu = random.randint(1, min(2, cpu_limit))
|
|
257
|
+
|
|
258
|
+
task = MultiTokenTask.C(
|
|
259
|
+
task_id=i,
|
|
260
|
+
memory_tokens=mem,
|
|
261
|
+
cpu_tokens=cpu,
|
|
262
|
+
execution_time=random.uniform(0.01, 0.05),
|
|
263
|
+
tracker_path=tracker_path,
|
|
264
|
+
)
|
|
265
|
+
task.add_dependencies(memory_token.dependency(mem))
|
|
266
|
+
task.add_dependencies(cpu_token.dependency(cpu))
|
|
267
|
+
tasks.append(task.submit())
|
|
268
|
+
|
|
269
|
+
# Wait for all tasks - if there's a deadlock, this will timeout
|
|
270
|
+
xp.wait()
|
|
271
|
+
|
|
272
|
+
# Verify all tasks completed
|
|
273
|
+
completed = sum(1 for t in tasks if t.__xpm__.job.state == JobState.DONE)
|
|
274
|
+
assert completed == num_tasks, f"Only {completed}/{num_tasks} tasks completed"
|
|
275
|
+
|
|
276
|
+
# Analyze execution
|
|
277
|
+
max_mem, max_cpu, violations = analyze_execution(
|
|
278
|
+
tracker_path, memory_limit, cpu_limit
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
logger.info(
|
|
282
|
+
"Stress test: max memory=%d/%d, max cpu=%d/%d",
|
|
283
|
+
max_mem,
|
|
284
|
+
memory_limit,
|
|
285
|
+
max_cpu,
|
|
286
|
+
cpu_limit,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
assert not violations, f"Token violations: {violations}"
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def test_multitoken_large_requirements():
|
|
293
|
+
"""Test tasks that require most of the available tokens
|
|
294
|
+
|
|
295
|
+
Ensures that tasks requiring large token counts still complete
|
|
296
|
+
and don't cause deadlock when competing for resources.
|
|
297
|
+
"""
|
|
298
|
+
import json
|
|
299
|
+
|
|
300
|
+
with TemporaryExperiment("multitoken_large", maxwait=60) as xp:
|
|
301
|
+
memory_limit = 10
|
|
302
|
+
cpu_limit = 4
|
|
303
|
+
|
|
304
|
+
memory_token = CounterToken(
|
|
305
|
+
"memory_large", xp.workdir / "token_memory", memory_limit
|
|
306
|
+
)
|
|
307
|
+
cpu_token = CounterToken("cpu_large", xp.workdir / "token_cpu", cpu_limit)
|
|
308
|
+
|
|
309
|
+
tracker_path = xp.workdir / "tracker.json"
|
|
310
|
+
tracker_path.write_text(
|
|
311
|
+
json.dumps({"memory_limit": memory_limit, "cpu_limit": cpu_limit})
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
tasks = []
|
|
315
|
+
|
|
316
|
+
# Mix of large and small tasks
|
|
317
|
+
task_configs = [
|
|
318
|
+
(8, 3), # Large: needs most resources
|
|
319
|
+
(1, 1), # Small
|
|
320
|
+
(7, 2), # Large
|
|
321
|
+
(2, 1), # Small
|
|
322
|
+
(9, 4), # Very large: needs almost all
|
|
323
|
+
(1, 1), # Small
|
|
324
|
+
(5, 2), # Medium
|
|
325
|
+
(3, 1), # Medium
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
for i, (mem, cpu) in enumerate(task_configs):
|
|
329
|
+
task = MultiTokenTask.C(
|
|
330
|
+
task_id=i,
|
|
331
|
+
memory_tokens=mem,
|
|
332
|
+
cpu_tokens=cpu,
|
|
333
|
+
execution_time=0.03,
|
|
334
|
+
tracker_path=tracker_path,
|
|
335
|
+
)
|
|
336
|
+
task.add_dependencies(memory_token.dependency(mem))
|
|
337
|
+
task.add_dependencies(cpu_token.dependency(cpu))
|
|
338
|
+
tasks.append(task.submit())
|
|
339
|
+
|
|
340
|
+
xp.wait()
|
|
341
|
+
|
|
342
|
+
# Verify completion
|
|
343
|
+
for i, task in enumerate(tasks):
|
|
344
|
+
state = task.__xpm__.job.state
|
|
345
|
+
assert state == JobState.DONE, f"Task {i} state: {state}"
|
|
346
|
+
|
|
347
|
+
max_mem, max_cpu, violations = analyze_execution(
|
|
348
|
+
tracker_path, memory_limit, cpu_limit
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
assert not violations, f"Violations: {violations}"
|
|
352
|
+
assert max_mem <= memory_limit
|
|
353
|
+
assert max_cpu <= cpu_limit
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def test_multitoken_single_task_all_tokens():
|
|
357
|
+
"""Test that a single task can acquire all available tokens"""
|
|
358
|
+
import json
|
|
359
|
+
|
|
360
|
+
with TemporaryExperiment("multitoken_all", maxwait=30) as xp:
|
|
361
|
+
memory_limit = 4
|
|
362
|
+
cpu_limit = 2
|
|
363
|
+
|
|
364
|
+
memory_token = CounterToken(
|
|
365
|
+
"memory_all", xp.workdir / "token_memory", memory_limit
|
|
366
|
+
)
|
|
367
|
+
cpu_token = CounterToken("cpu_all", xp.workdir / "token_cpu", cpu_limit)
|
|
368
|
+
|
|
369
|
+
tracker_path = xp.workdir / "tracker.json"
|
|
370
|
+
tracker_path.write_text(
|
|
371
|
+
json.dumps({"memory_limit": memory_limit, "cpu_limit": cpu_limit})
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Single task requiring all tokens
|
|
375
|
+
task = MultiTokenTask.C(
|
|
376
|
+
task_id=0,
|
|
377
|
+
memory_tokens=memory_limit,
|
|
378
|
+
cpu_tokens=cpu_limit,
|
|
379
|
+
execution_time=0.02,
|
|
380
|
+
tracker_path=tracker_path,
|
|
381
|
+
)
|
|
382
|
+
task.add_dependencies(memory_token.dependency(memory_limit))
|
|
383
|
+
task.add_dependencies(cpu_token.dependency(cpu_limit))
|
|
384
|
+
submitted = task.submit()
|
|
385
|
+
|
|
386
|
+
xp.wait()
|
|
387
|
+
|
|
388
|
+
assert submitted.__xpm__.job.state == JobState.DONE
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def test_multitoken_sequential_dependency():
|
|
392
|
+
"""Test that tasks waiting on tokens eventually run when tokens free up
|
|
393
|
+
|
|
394
|
+
Submits tasks that together require more tokens than available,
|
|
395
|
+
verifying they run sequentially without deadlock.
|
|
396
|
+
"""
|
|
397
|
+
import json
|
|
398
|
+
|
|
399
|
+
with TemporaryExperiment("multitoken_seq", maxwait=60) as xp:
|
|
400
|
+
memory_limit = 2
|
|
401
|
+
cpu_limit = 1
|
|
402
|
+
|
|
403
|
+
memory_token = CounterToken(
|
|
404
|
+
"memory_seq", xp.workdir / "token_memory", memory_limit
|
|
405
|
+
)
|
|
406
|
+
cpu_token = CounterToken("cpu_seq", xp.workdir / "token_cpu", cpu_limit)
|
|
407
|
+
|
|
408
|
+
tracker_path = xp.workdir / "tracker.json"
|
|
409
|
+
tracker_path.write_text(
|
|
410
|
+
json.dumps({"memory_limit": memory_limit, "cpu_limit": cpu_limit})
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
tasks = []
|
|
414
|
+
|
|
415
|
+
# All tasks require all tokens - must run one at a time
|
|
416
|
+
for i in range(5):
|
|
417
|
+
task = MultiTokenTask.C(
|
|
418
|
+
task_id=i,
|
|
419
|
+
memory_tokens=memory_limit,
|
|
420
|
+
cpu_tokens=cpu_limit,
|
|
421
|
+
execution_time=0.02,
|
|
422
|
+
tracker_path=tracker_path,
|
|
423
|
+
)
|
|
424
|
+
task.add_dependencies(memory_token.dependency(memory_limit))
|
|
425
|
+
task.add_dependencies(cpu_token.dependency(cpu_limit))
|
|
426
|
+
tasks.append(task.submit())
|
|
427
|
+
|
|
428
|
+
xp.wait()
|
|
429
|
+
|
|
430
|
+
# All should complete
|
|
431
|
+
for i, task in enumerate(tasks):
|
|
432
|
+
assert task.__xpm__.job.state == JobState.DONE, f"Task {i} failed"
|
|
433
|
+
|
|
434
|
+
# Verify sequential execution (max usage should equal limits)
|
|
435
|
+
max_mem, max_cpu, violations = analyze_execution(
|
|
436
|
+
tracker_path, memory_limit, cpu_limit
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
assert not violations
|
|
440
|
+
# Since all tasks need all tokens, they must run sequentially
|
|
441
|
+
assert max_mem <= memory_limit
|
|
442
|
+
assert max_cpu <= cpu_limit
|