hud-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (59) hide show
  1. hud/__init__.py +5 -3
  2. hud/adapters/__init__.py +2 -1
  3. hud/adapters/claude/adapter.py +13 -17
  4. hud/adapters/common/adapter.py +3 -3
  5. hud/adapters/common/tests/__init__.py +0 -0
  6. hud/adapters/common/tests/test_adapter.py +277 -0
  7. hud/adapters/common/types.py +3 -6
  8. hud/adapters/operator/adapter.py +22 -29
  9. hud/agent/__init__.py +9 -1
  10. hud/agent/base.py +28 -28
  11. hud/agent/claude.py +69 -60
  12. hud/agent/langchain.py +204 -0
  13. hud/agent/operator.py +75 -67
  14. hud/env/__init__.py +5 -5
  15. hud/env/client.py +2 -2
  16. hud/env/docker_client.py +37 -39
  17. hud/env/environment.py +91 -66
  18. hud/env/local_docker_client.py +5 -7
  19. hud/env/remote_client.py +40 -29
  20. hud/env/remote_docker_client.py +13 -3
  21. hud/evaluators/__init__.py +2 -3
  22. hud/evaluators/base.py +4 -3
  23. hud/evaluators/inspect.py +3 -8
  24. hud/evaluators/judge.py +34 -58
  25. hud/evaluators/match.py +42 -49
  26. hud/evaluators/remote.py +13 -26
  27. hud/evaluators/tests/__init__.py +0 -0
  28. hud/evaluators/tests/test_inspect.py +12 -0
  29. hud/evaluators/tests/test_judge.py +231 -0
  30. hud/evaluators/tests/test_match.py +115 -0
  31. hud/evaluators/tests/test_remote.py +98 -0
  32. hud/exceptions.py +167 -0
  33. hud/gym.py +12 -10
  34. hud/job.py +525 -47
  35. hud/server/__init__.py +2 -2
  36. hud/server/requests.py +148 -186
  37. hud/server/tests/__init__.py +0 -0
  38. hud/server/tests/test_requests.py +275 -0
  39. hud/settings.py +3 -2
  40. hud/task.py +12 -22
  41. hud/taskset.py +44 -11
  42. hud/trajectory.py +6 -9
  43. hud/types.py +14 -9
  44. hud/utils/__init__.py +2 -2
  45. hud/utils/common.py +37 -13
  46. hud/utils/config.py +44 -29
  47. hud/utils/progress.py +149 -0
  48. hud/utils/telemetry.py +10 -11
  49. hud/utils/tests/__init__.py +0 -0
  50. hud/utils/tests/test_common.py +52 -0
  51. hud/utils/tests/test_config.py +129 -0
  52. hud/utils/tests/test_progress.py +225 -0
  53. hud/utils/tests/test_telemetry.py +37 -0
  54. hud/utils/tests/test_version.py +8 -0
  55. {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/METADATA +44 -21
  56. hud_python-0.2.3.dist-info/RECORD +62 -0
  57. hud_python-0.2.1.dist-info/RECORD +0 -44
  58. {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/WHEEL +0 -0
  59. {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/licenses/LICENSE +0 -0
hud/job.py CHANGED
@@ -1,17 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import datetime
4
5
  import functools
5
6
  import inspect
6
7
  import logging
7
- from collections.abc import Callable
8
- from typing import Any, TypeVar, cast
8
+ import sys
9
+ from collections.abc import Callable, Coroutine
10
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
9
11
 
10
- from pydantic import BaseModel, TypeAdapter
12
+ from pydantic import BaseModel, PrivateAttr, TypeAdapter
11
13
 
12
- from hud.server import make_request
14
+ import hud.server
15
+ from hud import gym
13
16
  from hud.settings import settings
17
+ from hud.task import Task
18
+ from hud.taskset import TaskSet
14
19
  from hud.trajectory import Trajectory
20
+ from hud.utils.progress import StepProgressTracker
21
+
22
+ if TYPE_CHECKING:
23
+ from hud.adapters.common import Adapter
24
+ from hud.agent.base import Agent
15
25
 
16
26
  logger = logging.getLogger("hud.job")
17
27
 
@@ -21,11 +31,12 @@ T = TypeVar("T", bound=Callable)
21
31
  # Global registry to store active jobs created by decorators
22
32
  _ACTIVE_JOBS = {}
23
33
 
34
+
24
35
  class Job(BaseModel):
25
36
  """
26
37
  A job represents a collection of related trajectories.
27
38
  It holds metadata and provides methods to interact with job data.
28
- Instances should typically be obtained via `create_job` or `load_job`.
39
+ Instances should typically be obtained via `create_job`, `load_job`, or the new `run_job`.
29
40
  """
30
41
 
31
42
  id: str
@@ -33,28 +44,94 @@ class Job(BaseModel):
33
44
  metadata: dict[str, Any] | None = None
34
45
  created_at: datetime.datetime
35
46
  status: str
36
-
37
- async def load_trajectories(self, *, api_key: str | None = None) -> list[Trajectory]:
47
+
48
+ # Internal cache for trajectories
49
+ _trajectories: list[Trajectory] | None = PrivateAttr(default=None)
50
+ # Store execution errors for debugging
51
+ errors: list[dict[str, Any]] = []
52
+
53
+ async def load_trajectories(
54
+ self, *, api_key: str | None = None, force_reload: bool = False
55
+ ) -> list[Trajectory]:
38
56
  """
39
57
  Loads the trajectories associated with this job.
58
+ Uses cached results unless force_reload is True.
59
+
60
+ Args:
61
+ api_key: Optional API key.
62
+ force_reload: If True, fetches trajectories from the API even if cached.
40
63
 
41
64
  Returns:
42
65
  List[Trajectory]: The trajectories in the job
43
66
  """
67
+ if self._trajectories is not None and not force_reload:
68
+ logger.debug("Returning cached trajectories for Job %s", self.id)
69
+ return self._trajectories
70
+
71
+ logger.debug("Fetching trajectories for Job %s from API...", self.id)
44
72
  api_key = api_key or settings.api_key
45
-
46
- data = await make_request(
47
- method="GET",
48
- url=f"{settings.base_url}/v2/jobs/{self.id}/trajectories",
49
- api_key=api_key,
50
- )
51
-
52
- return TypeAdapter(list[Trajectory]).validate_python(data)
53
73
 
74
+ try:
75
+ data = await hud.server.make_request(
76
+ method="GET",
77
+ url=f"{settings.base_url}/v2/jobs/{self.id}/trajectories",
78
+ api_key=api_key,
79
+ )
80
+ self._trajectories = TypeAdapter(list[Trajectory]).validate_python(data)
81
+ logger.debug("Loaded %d trajectories for Job %s", len(self._trajectories), self.id)
82
+ return self._trajectories
83
+ except Exception as e:
84
+ logger.exception("Failed to load trajectories for Job %s: %s", self.id, e)
85
+ self._trajectories = None # Ensure cache is cleared on error
86
+ return [] # Return empty list on error
87
+
88
+ async def get_analytics(self, *, force_reload: bool = False) -> dict[str, Any]:
89
+ """
90
+ Calculates and returns analytics for the job based on its trajectories.
54
91
 
55
- async def create_job(name: str, gym_id: str | None = None,
56
- evalset_id: str | None = None,
57
- metadata: dict[str, Any] | None = None) -> Job:
92
+ Args:
93
+ force_reload: If True, re-fetches trajectories before calculating.
94
+
95
+ Returns:
96
+ Dictionary containing analytics (e.g., task_count, avg_reward).
97
+ """
98
+ trajectories = await self.load_trajectories(force_reload=force_reload)
99
+
100
+ task_count = len(trajectories)
101
+ if task_count == 0:
102
+ return {"task_count": 0, "avg_reward": None, "success_rate": None} # Or other default
103
+
104
+ total_reward = 0
105
+ successful_tasks = 0
106
+ valid_rewards = 0
107
+
108
+ for traj in trajectories:
109
+ # Example: Assume reward is numeric and success is reward >= 1.0
110
+ # Adjust based on actual trajectory data structure and evaluation logic
111
+ if isinstance(traj.reward, int | float):
112
+ total_reward += traj.reward
113
+ valid_rewards += 1
114
+ if traj.reward >= 1.0:
115
+ successful_tasks += 1
116
+ # Add more complex logic here if needed based on traj.evaluation_result or metadata
117
+
118
+ avg_reward = (total_reward / valid_rewards) if valid_rewards > 0 else None
119
+ success_rate = (successful_tasks / task_count) * 100 if task_count > 0 else None
120
+
121
+ return {
122
+ "task_count": task_count,
123
+ "avg_reward": avg_reward,
124
+ "success_rate": success_rate,
125
+ # Add other relevant stats here
126
+ }
127
+
128
+
129
+ async def create_job(
130
+ name: str,
131
+ gym_id: str | None = None,
132
+ evalset_id: str | None = None,
133
+ metadata: dict[str, Any] | None = None,
134
+ ) -> Job:
58
135
  """
59
136
  Creates a new job.
60
137
 
@@ -68,7 +145,7 @@ async def create_job(name: str, gym_id: str | None = None,
68
145
  api_key = settings.api_key
69
146
  metadata = metadata or {}
70
147
 
71
- data = await make_request(
148
+ data = await hud.server.make_request(
72
149
  method="POST",
73
150
  url=f"{settings.base_url}/v2/jobs",
74
151
  json={
@@ -79,17 +156,19 @@ async def create_job(name: str, gym_id: str | None = None,
79
156
  },
80
157
  api_key=api_key,
81
158
  )
82
-
159
+
83
160
  # Assume the backend API returns the full job data upon creation
84
161
  # or at least the necessary fields (id, name, metadata, created_at, status)
85
162
  # If not, we might need to make a subsequent GET request
86
- job_data = data # Adjust if the API response structure is different
87
-
163
+ job_data = data # Adjust if the API response structure is different
164
+
165
+ logger.info("[HUD] View job at https://app.hud.so/jobs/%s.", job_data["id"])
166
+
88
167
  return Job(
89
168
  id=job_data["id"],
90
169
  name=job_data["name"],
91
- metadata=job_data.get("metadata", {}), # Ensure metadata is dict
92
- created_at=datetime.datetime.fromisoformat(job_data["created_at"]), # Parse datetime
170
+ metadata=job_data.get("metadata", {}), # Ensure metadata is dict
171
+ created_at=datetime.datetime.fromisoformat(job_data["created_at"]), # Parse datetime
93
172
  status=job_data["status"],
94
173
  )
95
174
 
@@ -105,63 +184,58 @@ async def load_job(job_id: str, api_key: str | None = None) -> Job:
105
184
  Job: The retrieved job instance
106
185
  """
107
186
  api_key = api_key or settings.api_key
108
-
109
- data = await make_request(
187
+
188
+ data = await hud.server.make_request(
110
189
  method="GET",
111
190
  url=f"{settings.base_url}/v2/jobs/{job_id}",
112
191
  api_key=api_key,
113
192
  )
114
-
193
+
115
194
  if not data:
116
195
  raise ValueError(f"Job {job_id} not found")
117
-
196
+
118
197
  # Validate and create the Job instance from the fetched data
119
198
  return Job.model_validate(data)
120
199
 
121
200
 
122
- def job(
123
- name: str,
124
- metadata: dict[str, Any] | None = None
125
- ) -> Callable[[T], T]:
201
+ def job(name: str, metadata: dict[str, Any] | None = None) -> Callable[[T], T]:
126
202
  """
127
203
  Decorator to automatically create and associate a job with all environments
128
204
  created within the decorated function.
129
-
205
+
130
206
  Args:
131
207
  name: The name of the job
132
208
  metadata: Additional metadata for the job
133
-
209
+
134
210
  Returns:
135
211
  A decorator function that creates a job and associates it with environments
136
212
  """
213
+
137
214
  def decorator(func: T) -> T:
138
215
  @functools.wraps(func)
139
216
  async def wrapper(*args: Any, **kwargs: Any) -> Any:
140
217
  # Create a job for this function call using the new function
141
- job = await create_job(
142
- name=name,
143
- metadata=metadata
144
- )
145
-
218
+ job = await create_job(name=name, metadata=metadata)
219
+
146
220
  # Store in global registry with a unique key based on function and call
147
221
  call_id = f"{func.__module__}.{func.__qualname__}_{id(wrapper)}"
148
222
  _ACTIVE_JOBS[call_id] = job
149
-
223
+
150
224
  try:
151
225
  # Add the function's frame to the stack for lookup
152
226
  frame = inspect.currentframe()
153
227
  if frame:
154
228
  frame.f_locals["_job_call_id"] = call_id
155
-
229
+
156
230
  # Run the decorated function
157
231
  result = await func(*args, **kwargs)
158
232
  return result
159
233
  finally:
160
234
  # Clean up
161
- if call_id in _ACTIVE_JOBS:
162
- del _ACTIVE_JOBS[call_id]
163
-
164
- return cast(T, wrapper)
235
+ _ACTIVE_JOBS.pop(call_id, None)
236
+
237
+ return cast("T", wrapper)
238
+
165
239
  return decorator
166
240
 
167
241
 
@@ -169,7 +243,7 @@ def get_active_job() -> Job | None:
169
243
  """
170
244
  Get the currently active job from the call stack, if any.
171
245
  Used internally by gym.make to automatically associate environments with jobs.
172
-
246
+
173
247
  Returns:
174
248
  The active job or None if no job is active
175
249
  """
@@ -181,5 +255,409 @@ def get_active_job() -> Job | None:
181
255
  if call_id in _ACTIVE_JOBS:
182
256
  return _ACTIVE_JOBS[call_id]
183
257
  frame = frame.f_back
184
-
258
+
185
259
  return None
260
+
261
+
262
+ async def _execute_task(
263
+ agent_cls: type[Agent],
264
+ adapter_cls: type[Adapter] | None,
265
+ agent_kwargs: dict[str, Any] | None,
266
+ adapter_kwargs: dict[str, Any] | None,
267
+ task: Task,
268
+ job_name: str,
269
+ task_id: str,
270
+ max_steps_per_task: int,
271
+ job: Job,
272
+ tracker: StepProgressTracker | None = None,
273
+ # Use semaphores instead of rate limiter
274
+ env_creation_semaphore: asyncio.Semaphore | None = None,
275
+ agent_predict_semaphore: asyncio.Semaphore | None = None,
276
+ ) -> None:
277
+ """Helper function to instantiate/run/evaluate a single task, with concurrency limits via
278
+ semaphores."""
279
+ if tracker:
280
+ tracker.start_task(task_id)
281
+ env = None
282
+ agent_instance: Agent | None = None
283
+ status = "error"
284
+ error_msg = "Initialization failed"
285
+ try:
286
+ adapter_instance = None
287
+ if adapter_cls:
288
+ adapter_instance = adapter_cls(**(adapter_kwargs or {}))
289
+ agent_instance = agent_cls(adapter=adapter_instance, **(agent_kwargs or {}))
290
+ if agent_instance is None:
291
+ raise RuntimeError("Agent could not be instantiated")
292
+
293
+ # Environment creation with semaphore
294
+ if env_creation_semaphore:
295
+ async with env_creation_semaphore:
296
+ env = await gym.make(task, job=job)
297
+ else:
298
+ env = await gym.make(task, job=job)
299
+
300
+ obs_tuple = await env.reset()
301
+ if obs_tuple is None:
302
+ raise ValueError(f"env.reset() returned None for task {task_id}")
303
+ obs, _ = obs_tuple
304
+
305
+ step_error = None
306
+ for step in range(max_steps_per_task):
307
+ action, done = (None, False)
308
+ try:
309
+ # Agent prediction with semaphore
310
+ if agent_predict_semaphore:
311
+ async with agent_predict_semaphore:
312
+ action, done = await agent_instance.predict(obs)
313
+ else:
314
+ action, done = await agent_instance.predict(obs)
315
+
316
+ if tracker:
317
+ tracker.increment_step(task_id)
318
+
319
+ if action is None and not done:
320
+ done = True
321
+
322
+ step_result = await env.step(action)
323
+ if step_result is None:
324
+ terminated = True
325
+ else:
326
+ obs, _, terminated, _ = step_result
327
+ if terminated or done:
328
+ break
329
+
330
+ except Exception as agent_step_err:
331
+ logger.exception(
332
+ "[Job: %s/%s, Task: %s] Step %d Error: %s",
333
+ job.name,
334
+ job.id,
335
+ task_id,
336
+ step + 1,
337
+ agent_step_err,
338
+ )
339
+ step_error = f"Error at step {step + 1}: {agent_step_err}"
340
+ # Store step error in job
341
+ job.errors.append(
342
+ {
343
+ "task_id": task_id,
344
+ "type": "step_error",
345
+ "step": step + 1,
346
+ "error": str(agent_step_err),
347
+ "timestamp": datetime.datetime.now().isoformat(),
348
+ }
349
+ )
350
+ break
351
+ else:
352
+ logger.warning("[Job: %s/%s, Task: %s] Max steps reached.", job.name, job.id, task_id)
353
+
354
+ # --- Evaluate Task ---
355
+ evaluation_result = None
356
+ if step_error:
357
+ status = "error"
358
+ error_msg = step_error
359
+ else:
360
+ try:
361
+ evaluation_result = await env.evaluate()
362
+ status = "completed"
363
+ error_msg = None
364
+ except Exception as eval_err:
365
+ logger.exception(
366
+ "[Job: %s/%s, Task: %s] Evaluation Error: %s",
367
+ job.name,
368
+ job.id,
369
+ task_id,
370
+ eval_err,
371
+ )
372
+ status = "error"
373
+ error_msg = f"Evaluation failed: {eval_err}"
374
+ # Store evaluation error in job
375
+ job.errors.append(
376
+ {
377
+ "task_id": task_id,
378
+ "type": "evaluation_error",
379
+ "error": str(eval_err),
380
+ "timestamp": datetime.datetime.now().isoformat(),
381
+ }
382
+ )
383
+
384
+ except Exception as e:
385
+ logger.exception("[Job: %s/%s, Task: %s] Setup/Run Error: %s", job.name, job.id, task_id, e)
386
+ status = "error"
387
+ error_msg = str(e)
388
+ # Store setup/initialization error in job
389
+ job.errors.append(
390
+ {
391
+ "task_id": task_id,
392
+ "type": "setup_error",
393
+ "error": str(e),
394
+ "timestamp": datetime.datetime.now().isoformat(),
395
+ }
396
+ )
397
+
398
+ finally:
399
+ if tracker:
400
+ tracker.finish_task(task_id)
401
+ if env:
402
+ try:
403
+ await env.close()
404
+ except Exception as close_err:
405
+ logger.exception(
406
+ "[Job: %s/%s, Task: %s] Close Error: %s", job.name, job.id, task_id, close_err
407
+ )
408
+ # Store environment close error in job
409
+ job.errors.append(
410
+ {
411
+ "task_id": task_id,
412
+ "type": "env_close_error",
413
+ "error": str(close_err),
414
+ "timestamp": datetime.datetime.now().isoformat(),
415
+ }
416
+ )
417
+
418
+ log_suffix = f" Error: {error_msg}" if status == "error" else f" Eval: {evaluation_result}"
419
+ logger.info(
420
+ "[Job: %s/%s, Task: %s] Finished local execution. Status: %s.%s",
421
+ job.name,
422
+ job.id,
423
+ task_id,
424
+ status,
425
+ log_suffix,
426
+ )
427
+
428
+
429
+ async def _progress_monitor(tracker: StepProgressTracker, interval: float = 1.0) -> None:
430
+ """Coroutine to periodically display progress using the tracker."""
431
+ try:
432
+ while not tracker.is_finished():
433
+ sys.stderr.write(f"\r{tracker.display()}")
434
+ sys.stderr.flush()
435
+ await asyncio.sleep(interval)
436
+ sys.stderr.write(f"\r{tracker.display()}\n")
437
+ sys.stderr.flush()
438
+ logger.debug("Progress monitor finished.")
439
+ except asyncio.CancelledError:
440
+ sys.stderr.write("\nProgress monitor cancelled.\n")
441
+ sys.stderr.flush()
442
+ logger.debug("Progress monitor cancelled.")
443
+ except Exception as e:
444
+ sys.stderr.write(f"\nProgress monitor error: {e}\n")
445
+ sys.stderr.flush()
446
+ logger.exception("Progress monitor error: %s", e)
447
+
448
+
449
+ # --- New run_job function ---
450
+
451
+
452
+ async def run_job(
453
+ agent_cls: type[Agent],
454
+ task_or_taskset: Task | TaskSet,
455
+ job_name: str,
456
+ adapter_cls: type[Adapter] | None = None,
457
+ agent_kwargs: dict[str, Any] | None = None,
458
+ adapter_kwargs: dict[str, Any] | None = None,
459
+ max_steps_per_task: int = 20,
460
+ run_parallel: bool = True,
461
+ job_metadata: dict[str, Any] | None = None,
462
+ show_progress: bool = True,
463
+ # Concurrency control with semaphores
464
+ max_concurrent_env_creations: int | None = 30, # Limits env.make calls
465
+ max_concurrent_agent_predictions: int | None = 30, # Limits agent.predict calls
466
+ max_concurrent_tasks: int | None = 30, # Limits overall task concurrency
467
+ ) -> Job:
468
+ """
469
+ Creates Job, executes tasks locally, linking them to the Job.
470
+ Instantiates agent/adapter per task. Shows step-based progress.
471
+
472
+ Controls concurrency in three ways:
473
+ 1. Limits concurrent environment creations
474
+ 2. Limits concurrent agent predictions
475
+ 3. Limits overall concurrent tasks (when run_parallel=True)
476
+
477
+ All concurrency controls use semaphores for reliability.
478
+ Tracks all errors that occur during execution in job.errors.
479
+
480
+ Args:
481
+ agent_cls: Agent class to instantiate.
482
+ task_or_taskset: Task or TaskSet to run.
483
+ job_name: Name for the Job.
484
+ adapter_cls: Optional Adapter class.
485
+ agent_kwargs: Optional kwargs for agent constructor.
486
+ adapter_kwargs: Optional kwargs for adapter constructor.
487
+ max_steps_per_task: Step limit per task.
488
+ run_parallel: Run TaskSet tasks concurrently if True (limited by max_concurrent_tasks).
489
+ job_metadata: Metadata for the created Job.
490
+ show_progress: Display the step-based progress tracker.
491
+ max_concurrent_env_creations: Max concurrent environment creation calls.
492
+ max_concurrent_agent_predictions: Max concurrent agent prediction calls.
493
+ max_concurrent_tasks: Max number of tasks to run actively at the same time.
494
+
495
+ Returns:
496
+ The created Job object with errors stored in job.errors.
497
+ """
498
+ tasks_to_run: list[Task] = []
499
+ created_job: Job | None = None
500
+
501
+ evalset_id = None
502
+ if isinstance(task_or_taskset, TaskSet):
503
+ evalset_id = task_or_taskset.id
504
+
505
+ gym_id = None
506
+ if isinstance(task_or_taskset, Task):
507
+ gym_id = task_or_taskset.gym if isinstance(task_or_taskset.gym, str) else None
508
+ elif isinstance(task_or_taskset, TaskSet):
509
+ gym_id = (
510
+ task_or_taskset.tasks[0].gym if isinstance(task_or_taskset.tasks[0].gym, str) else None
511
+ )
512
+
513
+ # --- Create Job ---
514
+ try:
515
+ logger.info("Creating job with name: '%s'", job_name)
516
+ created_job = await create_job(
517
+ name=job_name,
518
+ metadata=job_metadata,
519
+ evalset_id=evalset_id,
520
+ gym_id=gym_id,
521
+ )
522
+ logger.info("Created job with ID: %s", created_job.id)
523
+ except Exception as e:
524
+ logger.exception("Failed to create job '%s': %s", job_name, e)
525
+ raise
526
+
527
+ # --- Task Setup ---
528
+ is_taskset = isinstance(task_or_taskset, TaskSet)
529
+ if is_taskset:
530
+ tasks_to_run = task_or_taskset.tasks if task_or_taskset.tasks else []
531
+ elif isinstance(task_or_taskset, Task):
532
+ tasks_to_run = [task_or_taskset]
533
+ run_parallel = False
534
+ else:
535
+ raise TypeError("task_or_taskset must be either a Task or a TaskSet")
536
+
537
+ if not tasks_to_run:
538
+ logger.warning("Job '%s' (%s): No tasks found to run.", created_job.name, created_job.id)
539
+ return created_job
540
+
541
+ task_ids = [(str(task.id) if task.id else f"task_{i}") for i, task in enumerate(tasks_to_run)]
542
+ num_tasks = len(tasks_to_run)
543
+
544
+ # --- Create semaphores for concurrency control ---
545
+ env_creation_sema = None
546
+ if max_concurrent_env_creations and max_concurrent_env_creations > 0:
547
+ env_creation_sema = asyncio.Semaphore(max_concurrent_env_creations)
548
+ logger.info(
549
+ "Limiting concurrent environment creations to %d.", max_concurrent_env_creations
550
+ )
551
+
552
+ agent_predict_sema = None
553
+ if max_concurrent_agent_predictions and max_concurrent_agent_predictions > 0:
554
+ agent_predict_sema = asyncio.Semaphore(max_concurrent_agent_predictions)
555
+ logger.info(
556
+ "Limiting concurrent agent predictions to %d.", max_concurrent_agent_predictions
557
+ )
558
+
559
+ task_execution_sema = None
560
+ effective_concurrency = num_tasks # Default to running all if parallel
561
+ if run_parallel and max_concurrent_tasks and max_concurrent_tasks > 0:
562
+ effective_concurrency = min(num_tasks, max_concurrent_tasks)
563
+ task_execution_sema = asyncio.Semaphore(effective_concurrency)
564
+ logger.info("Limiting concurrent task executions to %d.", effective_concurrency)
565
+ elif not run_parallel:
566
+ effective_concurrency = 1 # Sequential means concurrency of 1
567
+
568
+ # --- Instantiate Tracker & Start Monitor ---
569
+ tracker = None
570
+ monitor_task = None
571
+ if show_progress and num_tasks > 0:
572
+ tracker = StepProgressTracker(total_tasks=num_tasks, max_steps_per_task=max_steps_per_task)
573
+ monitor_task = asyncio.create_task(_progress_monitor(tracker))
574
+
575
+ # --- Execute Tasks ---
576
+ job_desc_suffix = f" (Job ID: {created_job.id})"
577
+
578
+ async def task_wrapper(task_coro: Coroutine, semaphore: asyncio.Semaphore | None) -> None:
579
+ if semaphore:
580
+ async with semaphore:
581
+ await task_coro
582
+ else:
583
+ await task_coro
584
+
585
+ try:
586
+ if run_parallel and is_taskset:
587
+ logger.info(
588
+ "Job '%s'%s: Running %d tasks with concurrency %d.",
589
+ created_job.name,
590
+ job_desc_suffix,
591
+ num_tasks,
592
+ effective_concurrency,
593
+ )
594
+
595
+ task_coroutines = [
596
+ _execute_task(
597
+ agent_cls=agent_cls,
598
+ adapter_cls=adapter_cls,
599
+ agent_kwargs=agent_kwargs,
600
+ adapter_kwargs=adapter_kwargs,
601
+ task=task,
602
+ job_name=created_job.name,
603
+ task_id=task_id,
604
+ max_steps_per_task=max_steps_per_task,
605
+ job=created_job,
606
+ tracker=tracker,
607
+ env_creation_semaphore=env_creation_sema,
608
+ agent_predict_semaphore=agent_predict_sema,
609
+ )
610
+ for task, task_id in zip(tasks_to_run, task_ids, strict=True)
611
+ ]
612
+
613
+ # Wrap coroutines with semaphore management if limiting concurrency
614
+ wrapped_tasks = [
615
+ task_wrapper(coro, task_execution_sema) for i, coro in enumerate(task_coroutines)
616
+ ]
617
+
618
+ # Run all wrapped tasks
619
+ await asyncio.gather(*wrapped_tasks)
620
+
621
+ else:
622
+ # SEQUENTIAL (or single task)
623
+ logger.info(
624
+ "Job '%s'%s: Running %d tasks sequentially.",
625
+ created_job.name,
626
+ job_desc_suffix,
627
+ num_tasks,
628
+ )
629
+ for i, task in enumerate(tasks_to_run):
630
+ task_id = task_ids[i]
631
+ await _execute_task(
632
+ agent_cls=agent_cls,
633
+ adapter_cls=adapter_cls,
634
+ agent_kwargs=agent_kwargs,
635
+ adapter_kwargs=adapter_kwargs,
636
+ task=task,
637
+ job_name=created_job.name,
638
+ task_id=task_id,
639
+ max_steps_per_task=max_steps_per_task,
640
+ job=created_job,
641
+ tracker=tracker,
642
+ env_creation_semaphore=env_creation_sema,
643
+ agent_predict_semaphore=agent_predict_sema,
644
+ )
645
+
646
+ finally:
647
+ # Ensure monitor task is stopped and awaited cleanly
648
+ if monitor_task is not None and not monitor_task.done():
649
+ monitor_task.cancel()
650
+ try:
651
+ await monitor_task
652
+ except asyncio.CancelledError:
653
+ pass
654
+ except Exception as e:
655
+ logger.error("Error awaiting progress monitor task: %s", e)
656
+
657
+ logger.info(
658
+ "Job '%s'%s finished local execution phase for %d tasks.",
659
+ created_job.name,
660
+ job_desc_suffix,
661
+ num_tasks,
662
+ )
663
+ return created_job
hud/server/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from .requests import RequestError, make_request, make_request_sync
3
+ from .requests import make_request, make_request_sync
4
4
 
5
- __all__ = ["RequestError", "make_request", "make_request_sync"]
5
+ __all__ = ["make_request", "make_request_sync"]