hud-python 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +4 -3
- hud/adapters/claude/adapter.py +5 -14
- hud/adapters/common/adapter.py +3 -3
- hud/adapters/common/tests/__init__.py +0 -0
- hud/adapters/common/tests/test_adapter.py +277 -0
- hud/adapters/common/types.py +3 -3
- hud/adapters/operator/adapter.py +16 -23
- hud/agent/__init__.py +8 -1
- hud/agent/base.py +28 -28
- hud/agent/claude.py +69 -60
- hud/agent/langchain.py +32 -26
- hud/agent/operator.py +75 -67
- hud/env/__init__.py +5 -5
- hud/env/client.py +2 -2
- hud/env/docker_client.py +37 -39
- hud/env/environment.py +91 -66
- hud/env/local_docker_client.py +5 -7
- hud/env/remote_client.py +39 -32
- hud/env/remote_docker_client.py +13 -3
- hud/evaluators/__init__.py +2 -3
- hud/evaluators/base.py +4 -3
- hud/evaluators/inspect.py +3 -8
- hud/evaluators/judge.py +34 -58
- hud/evaluators/match.py +42 -49
- hud/evaluators/remote.py +13 -26
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +12 -0
- hud/evaluators/tests/test_judge.py +231 -0
- hud/evaluators/tests/test_match.py +115 -0
- hud/evaluators/tests/test_remote.py +98 -0
- hud/exceptions.py +167 -0
- hud/gym.py +9 -7
- hud/job.py +179 -109
- hud/server/__init__.py +2 -2
- hud/server/requests.py +148 -186
- hud/server/tests/__init__.py +0 -0
- hud/server/tests/test_requests.py +275 -0
- hud/settings.py +3 -2
- hud/task.py +9 -19
- hud/taskset.py +44 -11
- hud/trajectory.py +6 -9
- hud/types.py +12 -9
- hud/utils/__init__.py +2 -2
- hud/utils/common.py +36 -15
- hud/utils/config.py +45 -30
- hud/utils/progress.py +34 -21
- hud/utils/telemetry.py +10 -11
- hud/utils/tests/__init__.py +0 -0
- hud/utils/tests/test_common.py +52 -0
- hud/utils/tests/test_config.py +129 -0
- hud/utils/tests/test_progress.py +225 -0
- hud/utils/tests/test_telemetry.py +37 -0
- hud/utils/tests/test_version.py +8 -0
- {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/METADATA +9 -6
- hud_python-0.2.4.dist-info/RECORD +62 -0
- hud_python-0.2.2.dist-info/RECORD +0 -46
- {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/WHEEL +0 -0
- {hud_python-0.2.2.dist-info → hud_python-0.2.4.dist-info}/licenses/LICENSE +0 -0
hud/job.py
CHANGED
|
@@ -11,8 +11,8 @@ from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel, PrivateAttr, TypeAdapter
|
|
13
13
|
|
|
14
|
+
import hud.server
|
|
14
15
|
from hud import gym
|
|
15
|
-
from hud.server import make_request
|
|
16
16
|
from hud.settings import settings
|
|
17
17
|
from hud.task import Task
|
|
18
18
|
from hud.taskset import TaskSet
|
|
@@ -31,6 +31,7 @@ T = TypeVar("T", bound=Callable)
|
|
|
31
31
|
# Global registry to store active jobs created by decorators
|
|
32
32
|
_ACTIVE_JOBS = {}
|
|
33
33
|
|
|
34
|
+
|
|
34
35
|
class Job(BaseModel):
|
|
35
36
|
"""
|
|
36
37
|
A job represents a collection of related trajectories.
|
|
@@ -43,15 +44,15 @@ class Job(BaseModel):
|
|
|
43
44
|
metadata: dict[str, Any] | None = None
|
|
44
45
|
created_at: datetime.datetime
|
|
45
46
|
status: str
|
|
46
|
-
|
|
47
|
+
|
|
47
48
|
# Internal cache for trajectories
|
|
48
49
|
_trajectories: list[Trajectory] | None = PrivateAttr(default=None)
|
|
49
50
|
# Store execution errors for debugging
|
|
50
51
|
errors: list[dict[str, Any]] = []
|
|
51
52
|
|
|
52
53
|
async def load_trajectories(
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
self, *, api_key: str | None = None, force_reload: bool = False
|
|
55
|
+
) -> list[Trajectory]:
|
|
55
56
|
"""
|
|
56
57
|
Loads the trajectories associated with this job.
|
|
57
58
|
Uses cached results unless force_reload is True.
|
|
@@ -66,12 +67,12 @@ class Job(BaseModel):
|
|
|
66
67
|
if self._trajectories is not None and not force_reload:
|
|
67
68
|
logger.debug("Returning cached trajectories for Job %s", self.id)
|
|
68
69
|
return self._trajectories
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
logger.debug("Fetching trajectories for Job %s from API...", self.id)
|
|
71
72
|
api_key = api_key or settings.api_key
|
|
72
|
-
|
|
73
|
+
|
|
73
74
|
try:
|
|
74
|
-
data = await make_request(
|
|
75
|
+
data = await hud.server.make_request(
|
|
75
76
|
method="GET",
|
|
76
77
|
url=f"{settings.base_url}/v2/jobs/{self.id}/trajectories",
|
|
77
78
|
api_key=api_key,
|
|
@@ -81,8 +82,8 @@ class Job(BaseModel):
|
|
|
81
82
|
return self._trajectories
|
|
82
83
|
except Exception as e:
|
|
83
84
|
logger.exception("Failed to load trajectories for Job %s: %s", self.id, e)
|
|
84
|
-
self._trajectories = None
|
|
85
|
-
return []
|
|
85
|
+
self._trajectories = None # Ensure cache is cleared on error
|
|
86
|
+
return [] # Return empty list on error
|
|
86
87
|
|
|
87
88
|
async def get_analytics(self, *, force_reload: bool = False) -> dict[str, Any]:
|
|
88
89
|
"""
|
|
@@ -95,10 +96,10 @@ class Job(BaseModel):
|
|
|
95
96
|
Dictionary containing analytics (e.g., task_count, avg_reward).
|
|
96
97
|
"""
|
|
97
98
|
trajectories = await self.load_trajectories(force_reload=force_reload)
|
|
98
|
-
|
|
99
|
+
|
|
99
100
|
task_count = len(trajectories)
|
|
100
101
|
if task_count == 0:
|
|
101
|
-
return {"task_count": 0, "avg_reward": None, "success_rate": None}
|
|
102
|
+
return {"task_count": 0, "avg_reward": None, "success_rate": None} # Or other default
|
|
102
103
|
|
|
103
104
|
total_reward = 0
|
|
104
105
|
successful_tasks = 0
|
|
@@ -111,9 +112,9 @@ class Job(BaseModel):
|
|
|
111
112
|
total_reward += traj.reward
|
|
112
113
|
valid_rewards += 1
|
|
113
114
|
if traj.reward >= 1.0:
|
|
114
|
-
|
|
115
|
+
successful_tasks += 1
|
|
115
116
|
# Add more complex logic here if needed based on traj.evaluation_result or metadata
|
|
116
|
-
|
|
117
|
+
|
|
117
118
|
avg_reward = (total_reward / valid_rewards) if valid_rewards > 0 else None
|
|
118
119
|
success_rate = (successful_tasks / task_count) * 100 if task_count > 0 else None
|
|
119
120
|
|
|
@@ -124,9 +125,13 @@ class Job(BaseModel):
|
|
|
124
125
|
# Add other relevant stats here
|
|
125
126
|
}
|
|
126
127
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
128
|
+
|
|
129
|
+
async def create_job(
|
|
130
|
+
name: str,
|
|
131
|
+
gym_id: str | None = None,
|
|
132
|
+
evalset_id: str | None = None,
|
|
133
|
+
metadata: dict[str, Any] | None = None,
|
|
134
|
+
) -> Job:
|
|
130
135
|
"""
|
|
131
136
|
Creates a new job.
|
|
132
137
|
|
|
@@ -140,7 +145,7 @@ async def create_job(name: str, gym_id: str | None = None,
|
|
|
140
145
|
api_key = settings.api_key
|
|
141
146
|
metadata = metadata or {}
|
|
142
147
|
|
|
143
|
-
data = await make_request(
|
|
148
|
+
data = await hud.server.make_request(
|
|
144
149
|
method="POST",
|
|
145
150
|
url=f"{settings.base_url}/v2/jobs",
|
|
146
151
|
json={
|
|
@@ -151,19 +156,19 @@ async def create_job(name: str, gym_id: str | None = None,
|
|
|
151
156
|
},
|
|
152
157
|
api_key=api_key,
|
|
153
158
|
)
|
|
154
|
-
|
|
159
|
+
|
|
155
160
|
# Assume the backend API returns the full job data upon creation
|
|
156
161
|
# or at least the necessary fields (id, name, metadata, created_at, status)
|
|
157
162
|
# If not, we might need to make a subsequent GET request
|
|
158
|
-
job_data = data
|
|
163
|
+
job_data = data # Adjust if the API response structure is different
|
|
159
164
|
|
|
160
165
|
logger.info("[HUD] View job at https://app.hud.so/jobs/%s.", job_data["id"])
|
|
161
166
|
|
|
162
167
|
return Job(
|
|
163
168
|
id=job_data["id"],
|
|
164
169
|
name=job_data["name"],
|
|
165
|
-
metadata=job_data.get("metadata", {}),
|
|
166
|
-
created_at=datetime.datetime.fromisoformat(job_data["created_at"]),
|
|
170
|
+
metadata=job_data.get("metadata", {}), # Ensure metadata is dict
|
|
171
|
+
created_at=datetime.datetime.fromisoformat(job_data["created_at"]), # Parse datetime
|
|
167
172
|
status=job_data["status"],
|
|
168
173
|
)
|
|
169
174
|
|
|
@@ -179,63 +184,58 @@ async def load_job(job_id: str, api_key: str | None = None) -> Job:
|
|
|
179
184
|
Job: The retrieved job instance
|
|
180
185
|
"""
|
|
181
186
|
api_key = api_key or settings.api_key
|
|
182
|
-
|
|
183
|
-
data = await make_request(
|
|
187
|
+
|
|
188
|
+
data = await hud.server.make_request(
|
|
184
189
|
method="GET",
|
|
185
190
|
url=f"{settings.base_url}/v2/jobs/{job_id}",
|
|
186
191
|
api_key=api_key,
|
|
187
192
|
)
|
|
188
|
-
|
|
193
|
+
|
|
189
194
|
if not data:
|
|
190
195
|
raise ValueError(f"Job {job_id} not found")
|
|
191
|
-
|
|
196
|
+
|
|
192
197
|
# Validate and create the Job instance from the fetched data
|
|
193
198
|
return Job.model_validate(data)
|
|
194
199
|
|
|
195
200
|
|
|
196
|
-
def job(
|
|
197
|
-
name: str,
|
|
198
|
-
metadata: dict[str, Any] | None = None
|
|
199
|
-
) -> Callable[[T], T]:
|
|
201
|
+
def job(name: str, metadata: dict[str, Any] | None = None) -> Callable[[T], T]:
|
|
200
202
|
"""
|
|
201
203
|
Decorator to automatically create and associate a job with all environments
|
|
202
204
|
created within the decorated function.
|
|
203
|
-
|
|
205
|
+
|
|
204
206
|
Args:
|
|
205
207
|
name: The name of the job
|
|
206
208
|
metadata: Additional metadata for the job
|
|
207
|
-
|
|
209
|
+
|
|
208
210
|
Returns:
|
|
209
211
|
A decorator function that creates a job and associates it with environments
|
|
210
212
|
"""
|
|
213
|
+
|
|
211
214
|
def decorator(func: T) -> T:
|
|
212
215
|
@functools.wraps(func)
|
|
213
216
|
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
214
217
|
# Create a job for this function call using the new function
|
|
215
|
-
job = await create_job(
|
|
216
|
-
|
|
217
|
-
metadata=metadata
|
|
218
|
-
)
|
|
219
|
-
|
|
218
|
+
job = await create_job(name=name, metadata=metadata)
|
|
219
|
+
|
|
220
220
|
# Store in global registry with a unique key based on function and call
|
|
221
221
|
call_id = f"{func.__module__}.{func.__qualname__}_{id(wrapper)}"
|
|
222
222
|
_ACTIVE_JOBS[call_id] = job
|
|
223
|
-
|
|
223
|
+
|
|
224
224
|
try:
|
|
225
225
|
# Add the function's frame to the stack for lookup
|
|
226
226
|
frame = inspect.currentframe()
|
|
227
227
|
if frame:
|
|
228
228
|
frame.f_locals["_job_call_id"] = call_id
|
|
229
|
-
|
|
229
|
+
|
|
230
230
|
# Run the decorated function
|
|
231
231
|
result = await func(*args, **kwargs)
|
|
232
232
|
return result
|
|
233
233
|
finally:
|
|
234
234
|
# Clean up
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
235
|
+
_ACTIVE_JOBS.pop(call_id, None)
|
|
236
|
+
|
|
237
|
+
return cast("T", wrapper)
|
|
238
|
+
|
|
239
239
|
return decorator
|
|
240
240
|
|
|
241
241
|
|
|
@@ -243,7 +243,7 @@ def get_active_job() -> Job | None:
|
|
|
243
243
|
"""
|
|
244
244
|
Get the currently active job from the call stack, if any.
|
|
245
245
|
Used internally by gym.make to automatically associate environments with jobs.
|
|
246
|
-
|
|
246
|
+
|
|
247
247
|
Returns:
|
|
248
248
|
The active job or None if no job is active
|
|
249
249
|
"""
|
|
@@ -255,10 +255,9 @@ def get_active_job() -> Job | None:
|
|
|
255
255
|
if call_id in _ACTIVE_JOBS:
|
|
256
256
|
return _ACTIVE_JOBS[call_id]
|
|
257
257
|
frame = frame.f_back
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
return None
|
|
260
260
|
|
|
261
|
-
# --- Moved helper functions from runner.py ---
|
|
262
261
|
|
|
263
262
|
async def _execute_task(
|
|
264
263
|
agent_cls: type[Agent],
|
|
@@ -329,17 +328,25 @@ async def _execute_task(
|
|
|
329
328
|
break
|
|
330
329
|
|
|
331
330
|
except Exception as agent_step_err:
|
|
332
|
-
logger.exception(
|
|
333
|
-
|
|
331
|
+
logger.exception(
|
|
332
|
+
"[Job: %s/%s, Task: %s] Step %d Error: %s",
|
|
333
|
+
job.name,
|
|
334
|
+
job.id,
|
|
335
|
+
task_id,
|
|
336
|
+
step + 1,
|
|
337
|
+
agent_step_err,
|
|
338
|
+
)
|
|
334
339
|
step_error = f"Error at step {step + 1}: {agent_step_err}"
|
|
335
340
|
# Store step error in job
|
|
336
|
-
job.errors.append(
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
341
|
+
job.errors.append(
|
|
342
|
+
{
|
|
343
|
+
"task_id": task_id,
|
|
344
|
+
"type": "step_error",
|
|
345
|
+
"step": step + 1,
|
|
346
|
+
"error": str(agent_step_err),
|
|
347
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
348
|
+
}
|
|
349
|
+
)
|
|
343
350
|
break
|
|
344
351
|
else:
|
|
345
352
|
logger.warning("[Job: %s/%s, Task: %s] Max steps reached.", job.name, job.id, task_id)
|
|
@@ -355,29 +362,38 @@ async def _execute_task(
|
|
|
355
362
|
status = "completed"
|
|
356
363
|
error_msg = None
|
|
357
364
|
except Exception as eval_err:
|
|
358
|
-
logger.exception(
|
|
359
|
-
|
|
365
|
+
logger.exception(
|
|
366
|
+
"[Job: %s/%s, Task: %s] Evaluation Error: %s",
|
|
367
|
+
job.name,
|
|
368
|
+
job.id,
|
|
369
|
+
task_id,
|
|
370
|
+
eval_err,
|
|
371
|
+
)
|
|
360
372
|
status = "error"
|
|
361
373
|
error_msg = f"Evaluation failed: {eval_err}"
|
|
362
374
|
# Store evaluation error in job
|
|
363
|
-
job.errors.append(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
375
|
+
job.errors.append(
|
|
376
|
+
{
|
|
377
|
+
"task_id": task_id,
|
|
378
|
+
"type": "evaluation_error",
|
|
379
|
+
"error": str(eval_err),
|
|
380
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
381
|
+
}
|
|
382
|
+
)
|
|
369
383
|
|
|
370
384
|
except Exception as e:
|
|
371
385
|
logger.exception("[Job: %s/%s, Task: %s] Setup/Run Error: %s", job.name, job.id, task_id, e)
|
|
372
386
|
status = "error"
|
|
373
387
|
error_msg = str(e)
|
|
374
388
|
# Store setup/initialization error in job
|
|
375
|
-
job.errors.append(
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
389
|
+
job.errors.append(
|
|
390
|
+
{
|
|
391
|
+
"task_id": task_id,
|
|
392
|
+
"type": "setup_error",
|
|
393
|
+
"error": str(e),
|
|
394
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
395
|
+
}
|
|
396
|
+
)
|
|
381
397
|
|
|
382
398
|
finally:
|
|
383
399
|
if tracker:
|
|
@@ -386,19 +402,29 @@ async def _execute_task(
|
|
|
386
402
|
try:
|
|
387
403
|
await env.close()
|
|
388
404
|
except Exception as close_err:
|
|
389
|
-
logger.exception(
|
|
390
|
-
|
|
405
|
+
logger.exception(
|
|
406
|
+
"[Job: %s/%s, Task: %s] Close Error: %s", job.name, job.id, task_id, close_err
|
|
407
|
+
)
|
|
391
408
|
# Store environment close error in job
|
|
392
|
-
job.errors.append(
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
409
|
+
job.errors.append(
|
|
410
|
+
{
|
|
411
|
+
"task_id": task_id,
|
|
412
|
+
"type": "env_close_error",
|
|
413
|
+
"error": str(close_err),
|
|
414
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
415
|
+
}
|
|
416
|
+
)
|
|
398
417
|
|
|
399
418
|
log_suffix = f" Error: {error_msg}" if status == "error" else f" Eval: {evaluation_result}"
|
|
400
|
-
logger.info(
|
|
401
|
-
|
|
419
|
+
logger.info(
|
|
420
|
+
"[Job: %s/%s, Task: %s] Finished local execution. Status: %s.%s",
|
|
421
|
+
job.name,
|
|
422
|
+
job.id,
|
|
423
|
+
task_id,
|
|
424
|
+
status,
|
|
425
|
+
log_suffix,
|
|
426
|
+
)
|
|
427
|
+
|
|
402
428
|
|
|
403
429
|
async def _progress_monitor(tracker: StepProgressTracker, interval: float = 1.0) -> None:
|
|
404
430
|
"""Coroutine to periodically display progress using the tracker."""
|
|
@@ -422,6 +448,7 @@ async def _progress_monitor(tracker: StepProgressTracker, interval: float = 1.0)
|
|
|
422
448
|
|
|
423
449
|
# --- New run_job function ---
|
|
424
450
|
|
|
451
|
+
|
|
425
452
|
async def run_job(
|
|
426
453
|
agent_cls: type[Agent],
|
|
427
454
|
task_or_taskset: Task | TaskSet,
|
|
@@ -441,12 +468,12 @@ async def run_job(
|
|
|
441
468
|
"""
|
|
442
469
|
Creates Job, executes tasks locally, linking them to the Job.
|
|
443
470
|
Instantiates agent/adapter per task. Shows step-based progress.
|
|
444
|
-
|
|
471
|
+
|
|
445
472
|
Controls concurrency in three ways:
|
|
446
473
|
1. Limits concurrent environment creations
|
|
447
474
|
2. Limits concurrent agent predictions
|
|
448
475
|
3. Limits overall concurrent tasks (when run_parallel=True)
|
|
449
|
-
|
|
476
|
+
|
|
450
477
|
All concurrency controls use semaphores for reliability.
|
|
451
478
|
Tracks all errors that occur during execution in job.errors.
|
|
452
479
|
|
|
@@ -471,10 +498,27 @@ async def run_job(
|
|
|
471
498
|
tasks_to_run: list[Task] = []
|
|
472
499
|
created_job: Job | None = None
|
|
473
500
|
|
|
501
|
+
evalset_id = None
|
|
502
|
+
if isinstance(task_or_taskset, TaskSet):
|
|
503
|
+
evalset_id = task_or_taskset.id
|
|
504
|
+
|
|
505
|
+
gym_id = None
|
|
506
|
+
if isinstance(task_or_taskset, Task):
|
|
507
|
+
gym_id = task_or_taskset.gym if isinstance(task_or_taskset.gym, str) else None
|
|
508
|
+
elif isinstance(task_or_taskset, TaskSet):
|
|
509
|
+
gym_id = (
|
|
510
|
+
task_or_taskset.tasks[0].gym if isinstance(task_or_taskset.tasks[0].gym, str) else None
|
|
511
|
+
)
|
|
512
|
+
|
|
474
513
|
# --- Create Job ---
|
|
475
514
|
try:
|
|
476
515
|
logger.info("Creating job with name: '%s'", job_name)
|
|
477
|
-
created_job = await create_job(
|
|
516
|
+
created_job = await create_job(
|
|
517
|
+
name=job_name,
|
|
518
|
+
metadata=job_metadata,
|
|
519
|
+
evalset_id=evalset_id,
|
|
520
|
+
gym_id=gym_id,
|
|
521
|
+
)
|
|
478
522
|
logger.info("Created job with ID: %s", created_job.id)
|
|
479
523
|
except Exception as e:
|
|
480
524
|
logger.exception("Failed to create job '%s': %s", job_name, e)
|
|
@@ -493,7 +537,7 @@ async def run_job(
|
|
|
493
537
|
if not tasks_to_run:
|
|
494
538
|
logger.warning("Job '%s' (%s): No tasks found to run.", created_job.name, created_job.id)
|
|
495
539
|
return created_job
|
|
496
|
-
|
|
540
|
+
|
|
497
541
|
task_ids = [(str(task.id) if task.id else f"task_{i}") for i, task in enumerate(tasks_to_run)]
|
|
498
542
|
num_tasks = len(tasks_to_run)
|
|
499
543
|
|
|
@@ -501,15 +545,17 @@ async def run_job(
|
|
|
501
545
|
env_creation_sema = None
|
|
502
546
|
if max_concurrent_env_creations and max_concurrent_env_creations > 0:
|
|
503
547
|
env_creation_sema = asyncio.Semaphore(max_concurrent_env_creations)
|
|
504
|
-
logger.info(
|
|
505
|
-
|
|
506
|
-
|
|
548
|
+
logger.info(
|
|
549
|
+
"Limiting concurrent environment creations to %d.", max_concurrent_env_creations
|
|
550
|
+
)
|
|
551
|
+
|
|
507
552
|
agent_predict_sema = None
|
|
508
553
|
if max_concurrent_agent_predictions and max_concurrent_agent_predictions > 0:
|
|
509
554
|
agent_predict_sema = asyncio.Semaphore(max_concurrent_agent_predictions)
|
|
510
|
-
logger.info(
|
|
511
|
-
|
|
512
|
-
|
|
555
|
+
logger.info(
|
|
556
|
+
"Limiting concurrent agent predictions to %d.", max_concurrent_agent_predictions
|
|
557
|
+
)
|
|
558
|
+
|
|
513
559
|
task_execution_sema = None
|
|
514
560
|
effective_concurrency = num_tasks # Default to running all if parallel
|
|
515
561
|
if run_parallel and max_concurrent_tasks and max_concurrent_tasks > 0:
|
|
@@ -518,7 +564,7 @@ async def run_job(
|
|
|
518
564
|
logger.info("Limiting concurrent task executions to %d.", effective_concurrency)
|
|
519
565
|
elif not run_parallel:
|
|
520
566
|
effective_concurrency = 1 # Sequential means concurrency of 1
|
|
521
|
-
|
|
567
|
+
|
|
522
568
|
# --- Instantiate Tracker & Start Monitor ---
|
|
523
569
|
tracker = None
|
|
524
570
|
monitor_task = None
|
|
@@ -528,51 +574,71 @@ async def run_job(
|
|
|
528
574
|
|
|
529
575
|
# --- Execute Tasks ---
|
|
530
576
|
job_desc_suffix = f" (Job ID: {created_job.id})"
|
|
531
|
-
|
|
577
|
+
|
|
532
578
|
async def task_wrapper(task_coro: Coroutine, semaphore: asyncio.Semaphore | None) -> None:
|
|
533
579
|
if semaphore:
|
|
534
580
|
async with semaphore:
|
|
535
581
|
await task_coro
|
|
536
582
|
else:
|
|
537
|
-
|
|
583
|
+
await task_coro
|
|
538
584
|
|
|
539
585
|
try:
|
|
540
586
|
if run_parallel and is_taskset:
|
|
541
|
-
logger.info(
|
|
542
|
-
|
|
543
|
-
|
|
587
|
+
logger.info(
|
|
588
|
+
"Job '%s'%s: Running %d tasks with concurrency %d.",
|
|
589
|
+
created_job.name,
|
|
590
|
+
job_desc_suffix,
|
|
591
|
+
num_tasks,
|
|
592
|
+
effective_concurrency,
|
|
593
|
+
)
|
|
594
|
+
|
|
544
595
|
task_coroutines = [
|
|
545
596
|
_execute_task(
|
|
546
|
-
agent_cls=agent_cls,
|
|
547
|
-
|
|
597
|
+
agent_cls=agent_cls,
|
|
598
|
+
adapter_cls=adapter_cls,
|
|
599
|
+
agent_kwargs=agent_kwargs,
|
|
600
|
+
adapter_kwargs=adapter_kwargs,
|
|
601
|
+
task=task,
|
|
602
|
+
job_name=created_job.name,
|
|
548
603
|
task_id=task_id,
|
|
549
|
-
max_steps_per_task=max_steps_per_task,
|
|
604
|
+
max_steps_per_task=max_steps_per_task,
|
|
605
|
+
job=created_job,
|
|
606
|
+
tracker=tracker,
|
|
550
607
|
env_creation_semaphore=env_creation_sema,
|
|
551
608
|
agent_predict_semaphore=agent_predict_sema,
|
|
552
609
|
)
|
|
553
610
|
for task, task_id in zip(tasks_to_run, task_ids, strict=True)
|
|
554
611
|
]
|
|
555
|
-
|
|
612
|
+
|
|
556
613
|
# Wrap coroutines with semaphore management if limiting concurrency
|
|
557
614
|
wrapped_tasks = [
|
|
558
|
-
task_wrapper(coro, task_execution_sema)
|
|
559
|
-
for i, coro in enumerate(task_coroutines)
|
|
615
|
+
task_wrapper(coro, task_execution_sema) for i, coro in enumerate(task_coroutines)
|
|
560
616
|
]
|
|
561
|
-
|
|
617
|
+
|
|
562
618
|
# Run all wrapped tasks
|
|
563
619
|
await asyncio.gather(*wrapped_tasks)
|
|
564
|
-
|
|
620
|
+
|
|
565
621
|
else:
|
|
566
622
|
# SEQUENTIAL (or single task)
|
|
567
|
-
logger.info(
|
|
568
|
-
|
|
623
|
+
logger.info(
|
|
624
|
+
"Job '%s'%s: Running %d tasks sequentially.",
|
|
625
|
+
created_job.name,
|
|
626
|
+
job_desc_suffix,
|
|
627
|
+
num_tasks,
|
|
628
|
+
)
|
|
569
629
|
for i, task in enumerate(tasks_to_run):
|
|
570
630
|
task_id = task_ids[i]
|
|
571
631
|
await _execute_task(
|
|
572
|
-
agent_cls=agent_cls,
|
|
573
|
-
|
|
632
|
+
agent_cls=agent_cls,
|
|
633
|
+
adapter_cls=adapter_cls,
|
|
634
|
+
agent_kwargs=agent_kwargs,
|
|
635
|
+
adapter_kwargs=adapter_kwargs,
|
|
636
|
+
task=task,
|
|
637
|
+
job_name=created_job.name,
|
|
574
638
|
task_id=task_id,
|
|
575
|
-
max_steps_per_task=max_steps_per_task,
|
|
639
|
+
max_steps_per_task=max_steps_per_task,
|
|
640
|
+
job=created_job,
|
|
641
|
+
tracker=tracker,
|
|
576
642
|
env_creation_semaphore=env_creation_sema,
|
|
577
643
|
agent_predict_semaphore=agent_predict_sema,
|
|
578
644
|
)
|
|
@@ -588,6 +654,10 @@ async def run_job(
|
|
|
588
654
|
except Exception as e:
|
|
589
655
|
logger.error("Error awaiting progress monitor task: %s", e)
|
|
590
656
|
|
|
591
|
-
logger.info(
|
|
592
|
-
|
|
657
|
+
logger.info(
|
|
658
|
+
"Job '%s'%s finished local execution phase for %d tasks.",
|
|
659
|
+
created_job.name,
|
|
660
|
+
job_desc_suffix,
|
|
661
|
+
num_tasks,
|
|
662
|
+
)
|
|
593
663
|
return created_job
|
hud/server/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from .requests import
|
|
3
|
+
from .requests import make_request, make_request_sync
|
|
4
4
|
|
|
5
|
-
__all__ = ["
|
|
5
|
+
__all__ = ["make_request", "make_request_sync"]
|