hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show
  1. hud/__init__.py +13 -1
  2. hud/agents/base.py +14 -3
  3. hud/agents/lite_llm.py +1 -1
  4. hud/agents/openai_chat_generic.py +15 -3
  5. hud/agents/tests/test_base.py +9 -2
  6. hud/agents/tests/test_base_runtime.py +164 -0
  7. hud/cli/__init__.py +18 -25
  8. hud/cli/build.py +35 -27
  9. hud/cli/dev.py +11 -29
  10. hud/cli/eval.py +114 -145
  11. hud/cli/tests/test_analyze_module.py +120 -0
  12. hud/cli/tests/test_build.py +26 -3
  13. hud/cli/tests/test_build_failure.py +41 -0
  14. hud/cli/tests/test_build_module.py +50 -0
  15. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  16. hud/cli/tests/test_cli_root.py +134 -0
  17. hud/cli/tests/test_eval.py +4 -0
  18. hud/cli/tests/test_mcp_server.py +8 -7
  19. hud/cli/tests/test_push_happy.py +74 -0
  20. hud/cli/tests/test_push_wrapper.py +23 -0
  21. hud/cli/utils/docker.py +120 -1
  22. hud/cli/utils/runner.py +1 -1
  23. hud/cli/utils/tasks.py +4 -1
  24. hud/cli/utils/tests/__init__.py +0 -0
  25. hud/cli/utils/tests/test_config.py +58 -0
  26. hud/cli/utils/tests/test_docker.py +93 -0
  27. hud/cli/utils/tests/test_docker_hints.py +71 -0
  28. hud/cli/utils/tests/test_env_check.py +74 -0
  29. hud/cli/utils/tests/test_environment.py +42 -0
  30. hud/cli/utils/tests/test_interactive_module.py +60 -0
  31. hud/cli/utils/tests/test_local_runner.py +50 -0
  32. hud/cli/utils/tests/test_logging_utils.py +23 -0
  33. hud/cli/utils/tests/test_metadata.py +49 -0
  34. hud/cli/utils/tests/test_package_runner.py +35 -0
  35. hud/cli/utils/tests/test_registry_utils.py +49 -0
  36. hud/cli/utils/tests/test_remote_runner.py +25 -0
  37. hud/cli/utils/tests/test_runner_modules.py +52 -0
  38. hud/cli/utils/tests/test_source_hash.py +36 -0
  39. hud/cli/utils/tests/test_tasks.py +80 -0
  40. hud/cli/utils/version_check.py +257 -0
  41. hud/clients/base.py +1 -1
  42. hud/clients/mcp_use.py +3 -1
  43. hud/datasets/parallel.py +2 -2
  44. hud/datasets/runner.py +85 -24
  45. hud/datasets/tests/__init__.py +0 -0
  46. hud/datasets/tests/test_runner.py +106 -0
  47. hud/datasets/tests/test_utils.py +228 -0
  48. hud/otel/config.py +8 -6
  49. hud/otel/context.py +4 -4
  50. hud/otel/exporters.py +231 -57
  51. hud/otel/tests/__init__.py +0 -1
  52. hud/otel/tests/test_instrumentation.py +207 -0
  53. hud/rl/learner.py +1 -1
  54. hud/server/tests/test_server_extra.py +2 -0
  55. hud/shared/exceptions.py +35 -9
  56. hud/shared/hints.py +25 -0
  57. hud/shared/requests.py +15 -3
  58. hud/shared/tests/test_exceptions.py +39 -30
  59. hud/shared/tests/test_hints.py +167 -0
  60. hud/telemetry/__init__.py +30 -6
  61. hud/telemetry/async_context.py +331 -0
  62. hud/telemetry/job.py +51 -12
  63. hud/telemetry/tests/test_async_context.py +242 -0
  64. hud/telemetry/tests/test_instrument.py +414 -0
  65. hud/telemetry/tests/test_job.py +609 -0
  66. hud/telemetry/tests/test_trace.py +184 -6
  67. hud/telemetry/trace.py +16 -17
  68. hud/tools/computer/qwen.py +4 -1
  69. hud/tools/computer/settings.py +2 -2
  70. hud/tools/executors/base.py +4 -2
  71. hud/tools/tests/test_submit.py +85 -0
  72. hud/tools/tests/test_types.py +193 -0
  73. hud/types.py +7 -1
  74. hud/utils/agent_factories.py +1 -3
  75. hud/utils/mcp.py +1 -1
  76. hud/utils/task_tracking.py +223 -0
  77. hud/utils/tests/test_agent_factories.py +60 -0
  78. hud/utils/tests/test_mcp.py +4 -6
  79. hud/utils/tests/test_pretty_errors.py +186 -0
  80. hud/utils/tests/test_tasks.py +187 -0
  81. hud/utils/tests/test_tool_shorthand.py +154 -0
  82. hud/utils/tests/test_version.py +1 -1
  83. hud/version.py +1 -1
  84. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
  85. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
  86. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  87. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  88. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,223 @@
1
+ """Task tracking for async telemetry operations.
2
+
3
+ This module provides infrastructure to track async tasks created during
4
+ telemetry operations (status updates, metric logging) to ensure they
5
+ complete before process shutdown, preventing telemetry loss.
6
+
7
+ The task tracker maintains strong references to tasks and explicitly cleans
8
+ them up when they complete via callbacks. This ensures tasks are not garbage
9
+ collected before they finish executing.
10
+
11
+ Thread Safety:
12
+ Uses threading.Lock (not asyncio.Lock) because done callbacks run
13
+ synchronously and need to modify the task set safely.
14
+
15
+ Race Condition Prevention:
16
+ The wait_all() method uses a multi-pass approach to catch tasks that
17
+ are created while waiting for existing tasks to complete.
18
+
19
+ This is an internal module used by async context managers and cleanup
20
+ routines. Users typically don't interact with it directly.
21
+ """
22
+
23
+ import asyncio
24
+ import contextlib
25
+ import logging
26
+ import threading
27
+ from collections.abc import Coroutine
28
+ from typing import Any
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Module exports
33
+ __all__ = ["TaskTracker", "track_task", "wait_all_tasks"]
34
+
35
+ # Global singleton task tracker
36
+ _global_tracker: "TaskTracker | None" = None
37
+
38
+
39
+ class TaskTracker:
40
+ """Tracks async tasks to ensure completion before shutdown.
41
+
42
+ Maintains a set of tasks with thread-safe access for both async code
43
+ and synchronous callbacks. Tasks are automatically removed when they
44
+ complete via done callbacks.
45
+ """
46
+
47
+ def __init__(self) -> None:
48
+ self._tasks: set[asyncio.Task] = set()
49
+ # Use threading.Lock for synchronous access from done callbacks
50
+ self._lock = threading.Lock()
51
+
52
+ def track_task(self, coro: Coroutine[Any, Any, Any], name: str = "task") -> asyncio.Task | None:
53
+ """Create and track an async task.
54
+
55
+ Args:
56
+ coro: The coroutine to run
57
+ name: Descriptive name for debugging and logging
58
+
59
+ Returns:
60
+ The created asyncio.Task, or None if no event loop is available
61
+ """
62
+ try:
63
+ task = asyncio.create_task(coro, name=name)
64
+
65
+ # Add task to tracking set (thread-safe)
66
+ with self._lock:
67
+ self._tasks.add(task)
68
+ task_count = len(self._tasks)
69
+
70
+ # Setup cleanup callback
71
+ def cleanup_callback(completed_task: asyncio.Task) -> None:
72
+ """Remove completed task from tracking set and log failures."""
73
+ with self._lock:
74
+ self._tasks.discard(completed_task)
75
+
76
+ # Log exceptions outside lock to avoid blocking
77
+ with contextlib.suppress(Exception):
78
+ if not completed_task.cancelled():
79
+ with contextlib.suppress(Exception):
80
+ exc = completed_task.exception()
81
+ if exc:
82
+ logger.warning("Task '%s' failed: %s", name, exc)
83
+
84
+ task.add_done_callback(cleanup_callback)
85
+ logger.debug("Tracking task '%s' (total active: %d)", name, task_count)
86
+ return task
87
+
88
+ except RuntimeError as e:
89
+ # No event loop - fall back to fire_and_forget
90
+ logger.warning("Cannot track task '%s': %s", name, e)
91
+ from hud.utils.async_utils import fire_and_forget
92
+
93
+ fire_and_forget(coro, name)
94
+ return None
95
+
96
+ async def wait_all(self, *, timeout_seconds: float = 30.0) -> int:
97
+ """Wait for all tracked tasks to complete.
98
+
99
+ Uses a multi-pass approach to handle race conditions where tasks are
100
+ added while waiting for existing tasks to complete. This ensures that
101
+ status updates created near the end of execution are still waited for.
102
+
103
+ Args:
104
+ timeout_seconds: Maximum time to wait in seconds
105
+
106
+ Returns:
107
+ Number of tasks that completed
108
+ """
109
+ total_completed = 0
110
+ time_remaining = timeout_seconds
111
+ max_passes = 10 # Prevent infinite loops if tasks keep spawning
112
+
113
+ for pass_num in range(max_passes):
114
+ # Get snapshot of pending tasks (thread-safe)
115
+ with self._lock:
116
+ pending = [t for t in self._tasks if not t.done()]
117
+
118
+ if not pending:
119
+ if pass_num == 0:
120
+ logger.debug("No pending tasks to wait for")
121
+ else:
122
+ logger.debug("All tasks completed after %d passes", pass_num)
123
+ break
124
+
125
+ # Log progress
126
+ if pass_num == 0:
127
+ logger.info("Waiting for %d pending tasks...", len(pending))
128
+ else:
129
+ logger.debug("Pass %d: Waiting for %d tasks", pass_num + 1, len(pending))
130
+
131
+ # Wait for this batch (max 5s per pass to check for new tasks)
132
+ batch_timeout = min(time_remaining, 5.0) if time_remaining > 0 else 5.0
133
+ start_time = asyncio.get_event_loop().time()
134
+
135
+ try:
136
+ done, still_pending = await asyncio.wait(
137
+ pending, timeout=batch_timeout, return_when=asyncio.ALL_COMPLETED
138
+ )
139
+ except Exception as e:
140
+ logger.error("Error waiting for tasks: %s", e)
141
+ break
142
+
143
+ # Update timing
144
+ elapsed = asyncio.get_event_loop().time() - start_time
145
+ time_remaining -= elapsed
146
+ total_completed += len(done)
147
+
148
+ # Handle timeout
149
+ if still_pending:
150
+ if time_remaining <= 0:
151
+ logger.warning(
152
+ "%d tasks still pending after %ss timeout - cancelling",
153
+ len(still_pending),
154
+ timeout_seconds,
155
+ )
156
+ for task in still_pending:
157
+ task.cancel()
158
+ break
159
+ # Otherwise continue to next pass
160
+ else:
161
+ # All tasks from this batch completed, check for new ones
162
+ with self._lock:
163
+ new_pending = [t for t in self._tasks if not t.done()]
164
+
165
+ if not new_pending:
166
+ # No new tasks were added - we're done
167
+ break
168
+ # Otherwise loop to wait for the new tasks
169
+
170
+ if total_completed > 0:
171
+ logger.info("Completed %d tasks", total_completed)
172
+
173
+ return total_completed
174
+
175
+ def get_pending_count(self) -> int:
176
+ """Get number of pending tasks (thread-safe)."""
177
+ with self._lock:
178
+ return sum(1 for t in self._tasks if not t.done())
179
+
180
+
181
+ def get_global_tracker() -> TaskTracker:
182
+ """Get or create the global task tracker."""
183
+ global _global_tracker
184
+ if _global_tracker is None:
185
+ _global_tracker = TaskTracker()
186
+ return _global_tracker
187
+
188
+
189
+ def track_task(coro: Coroutine[Any, Any, Any], name: str = "task") -> asyncio.Task | None:
190
+ """Create and track an async task for telemetry operations.
191
+
192
+ This is a convenience function that uses the global tracker to ensure
193
+ the task completes before shutdown. Used internally by async context
194
+ managers for status updates and metric logging.
195
+
196
+ Args:
197
+ coro: The coroutine to track
198
+ name: Descriptive name for debugging
199
+
200
+ Returns:
201
+ The created task, or None if no event loop is available
202
+ """
203
+ tracker = get_global_tracker()
204
+ return tracker.track_task(coro, name)
205
+
206
+
207
+ async def wait_all_tasks(*, timeout_seconds: float = 30.0) -> int:
208
+ """Wait for all tracked telemetry tasks to complete.
209
+
210
+ Ensures that all async telemetry operations (status updates, logs)
211
+ complete before the calling function returns, preventing telemetry loss.
212
+
213
+ Uses a multi-pass approach to handle race conditions where status updates
214
+ are created while waiting for other tasks to complete.
215
+
216
+ Args:
217
+ timeout_seconds: Maximum time to wait for tasks in seconds
218
+
219
+ Returns:
220
+ Number of tasks that completed
221
+ """
222
+ tracker = get_global_tracker()
223
+ return await tracker.wait_all(timeout_seconds=timeout_seconds)
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+
6
+ def test_create_openai_agent():
7
+ from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
8
+ from hud.utils.agent_factories import create_openai_agent
9
+
10
+ agent = create_openai_agent(
11
+ api_key="test_key", model_name="test_model", completion_kwargs={"temperature": 0.5}
12
+ )
13
+ assert isinstance(agent, GenericOpenAIChatAgent)
14
+ assert agent.model_name == "test_model"
15
+ assert agent.completion_kwargs["temperature"] == 0.5
16
+
17
+
18
+ def test_create_grounded_agent():
19
+ with (
20
+ patch("hud.utils.agent_factories.AsyncOpenAI") as mock_async_openai,
21
+ patch("hud.utils.agent_factories.GrounderConfig"),
22
+ patch("hud.utils.agent_factories.GroundedOpenAIChatAgent") as mock_agent_class,
23
+ ):
24
+ mock_agent = MagicMock()
25
+ mock_agent_class.return_value = mock_agent
26
+
27
+ from hud.utils.agent_factories import create_grounded_agent
28
+
29
+ agent = create_grounded_agent(
30
+ api_key="test_key",
31
+ grounder_api_key="grounder_key",
32
+ model_name="test_model",
33
+ )
34
+
35
+ assert agent == mock_agent
36
+ mock_async_openai.assert_called_with(api_key="test_key", base_url=None)
37
+ mock_agent_class.assert_called_once()
38
+
39
+
40
+ def test_create_grounded_agent_custom_grounder():
41
+ with (
42
+ patch("hud.utils.agent_factories.AsyncOpenAI"),
43
+ patch("hud.utils.agent_factories.GrounderConfig") as mock_grounder_config,
44
+ patch("hud.utils.agent_factories.GroundedOpenAIChatAgent"),
45
+ ):
46
+ from hud.utils.agent_factories import create_grounded_agent
47
+
48
+ create_grounded_agent(
49
+ api_key="test_key",
50
+ grounder_api_key="grounder_key",
51
+ model_name="test_model",
52
+ grounder_api_base="https://custom.api",
53
+ grounder_model="custom/model",
54
+ )
55
+
56
+ mock_grounder_config.assert_called_with(
57
+ api_base="https://custom.api",
58
+ model="custom/model",
59
+ api_key="grounder_key",
60
+ )
@@ -90,12 +90,10 @@ class TestPatchMCPConfig:
90
90
  class TestSetupHUDTelemetry:
91
91
  """Tests for setup_hud_telemetry function."""
92
92
 
93
- def test_empty_config_raises_error(self):
94
- """Test that empty config raises ValueError."""
95
- with pytest.raises(
96
- ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
97
- ):
98
- setup_hud_telemetry({})
93
+ def test_empty_config_returns_none(self):
94
+ """Test that empty config returns None (no servers to set up telemetry for)."""
95
+ result = setup_hud_telemetry({})
96
+ assert result is None
99
97
 
100
98
  def test_none_config_raises_error(self):
101
99
  """Test that None config raises ValueError."""
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ from hud.utils.pretty_errors import (
7
+ _async_exception_handler,
8
+ _render_and_fallback,
9
+ install_pretty_errors,
10
+ )
11
+
12
+
13
+ def test_render_and_fallback_hud_exception():
14
+ """Test _render_and_fallback with HudException."""
15
+ from hud.shared.exceptions import HudException
16
+
17
+ exc = HudException("Test error")
18
+
19
+ with (
20
+ patch("sys.__excepthook__") as mock_excepthook,
21
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
22
+ patch("sys.stderr.flush"),
23
+ ):
24
+ _render_and_fallback(HudException, exc, None)
25
+
26
+ mock_excepthook.assert_called_once()
27
+ mock_console.render_exception.assert_called_once_with(exc)
28
+
29
+
30
+ def test_render_and_fallback_non_hud_exception():
31
+ """Test _render_and_fallback with non-HudException."""
32
+ exc = ValueError("Test error")
33
+
34
+ with (
35
+ patch("sys.__excepthook__") as mock_excepthook,
36
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
37
+ ):
38
+ _render_and_fallback(ValueError, exc, None)
39
+
40
+ mock_excepthook.assert_called_once()
41
+ # Should not render for non-HudException
42
+ mock_console.render_exception.assert_not_called()
43
+
44
+
45
+ def test_render_and_fallback_rendering_error():
46
+ """Test _render_and_fallback handles rendering errors gracefully."""
47
+ from hud.shared.exceptions import HudException
48
+
49
+ exc = HudException("Test error")
50
+
51
+ with (
52
+ patch("sys.__excepthook__") as mock_excepthook,
53
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
54
+ ):
55
+ mock_console.render_exception.side_effect = Exception("Render failed")
56
+
57
+ # Should not raise
58
+ _render_and_fallback(HudException, exc, None)
59
+
60
+ mock_excepthook.assert_called_once()
61
+
62
+
63
+ def test_async_exception_handler_with_exception():
64
+ """Test _async_exception_handler with exception in context."""
65
+ mock_loop = MagicMock()
66
+ context = {"exception": ValueError("Test error")}
67
+
68
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
69
+ _async_exception_handler(mock_loop, context)
70
+
71
+ mock_console.render_exception.assert_called_once()
72
+ mock_loop.default_exception_handler.assert_called_once_with(context)
73
+
74
+
75
+ def test_async_exception_handler_with_message():
76
+ """Test _async_exception_handler with message only."""
77
+ mock_loop = MagicMock()
78
+ context = {"message": "Error message"}
79
+
80
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
81
+ _async_exception_handler(mock_loop, context)
82
+
83
+ mock_console.error.assert_called_once_with("Error message")
84
+ mock_console.render_support_hint.assert_called_once()
85
+ mock_loop.default_exception_handler.assert_called_once()
86
+
87
+
88
+ def test_async_exception_handler_rendering_error():
89
+ """Test _async_exception_handler handles rendering errors."""
90
+ mock_loop = MagicMock()
91
+ context = {"exception": ValueError("Test")}
92
+
93
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
94
+ mock_console.render_exception.side_effect = Exception("Render failed")
95
+
96
+ # Should not raise, should call default handler
97
+ _async_exception_handler(mock_loop, context)
98
+
99
+ mock_loop.default_exception_handler.assert_called_once()
100
+
101
+
102
+ def test_install_pretty_errors_with_running_loop():
103
+ """Test install_pretty_errors with a running event loop."""
104
+ mock_loop = MagicMock()
105
+
106
+ with patch("asyncio.get_running_loop", return_value=mock_loop):
107
+ install_pretty_errors()
108
+
109
+ assert sys.excepthook == _render_and_fallback
110
+ mock_loop.set_exception_handler.assert_called_once_with(_async_exception_handler)
111
+
112
+
113
+ def test_install_pretty_errors_no_running_loop():
114
+ """Test install_pretty_errors without a running loop."""
115
+ with (
116
+ patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
117
+ patch("asyncio.new_event_loop") as mock_new_loop,
118
+ ):
119
+ mock_loop = MagicMock()
120
+ mock_new_loop.return_value = mock_loop
121
+
122
+ install_pretty_errors()
123
+
124
+ assert sys.excepthook == _render_and_fallback
125
+ mock_loop.set_exception_handler.assert_called_once()
126
+
127
+
128
+ def test_install_pretty_errors_new_loop_fails():
129
+ """Test install_pretty_errors when creating new loop fails."""
130
+ with (
131
+ patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
132
+ patch("asyncio.new_event_loop", side_effect=Exception("Can't create loop")),
133
+ ):
134
+ # Should not raise
135
+ install_pretty_errors()
136
+
137
+ assert sys.excepthook == _render_and_fallback
138
+
139
+
140
+ def test_install_pretty_errors_set_handler_fails():
141
+ """Test install_pretty_errors when set_exception_handler fails."""
142
+ mock_loop = MagicMock()
143
+ mock_loop.set_exception_handler.side_effect = Exception("Can't set handler")
144
+
145
+ with patch("asyncio.get_running_loop", return_value=mock_loop):
146
+ # Should not raise
147
+ install_pretty_errors()
148
+
149
+ assert sys.excepthook == _render_and_fallback
150
+
151
+
152
+ def test_async_exception_handler_no_exception_or_message():
153
+ """Test _async_exception_handler with empty context."""
154
+ mock_loop = MagicMock()
155
+ context = {}
156
+
157
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
158
+ _async_exception_handler(mock_loop, context)
159
+
160
+ mock_console.render_exception.assert_not_called()
161
+ mock_console.error.assert_not_called()
162
+ mock_loop.default_exception_handler.assert_called_once()
163
+
164
+
165
+ def test_render_and_fallback_with_traceback():
166
+ """Test _render_and_fallback includes traceback."""
167
+ from hud.shared.exceptions import HudException
168
+
169
+ exc = HudException("Test error")
170
+
171
+ # Create a fake traceback
172
+ try:
173
+ raise exc
174
+ except HudException as e:
175
+ tb = e.__traceback__
176
+
177
+ with (
178
+ patch("sys.__excepthook__") as mock_excepthook,
179
+ patch("hud.utils.pretty_errors.hud_console"),
180
+ patch("sys.stderr.flush"),
181
+ ):
182
+ _render_and_fallback(HudException, exc, tb)
183
+
184
+ # Should call excepthook with traceback
185
+ call_args = mock_excepthook.call_args[0]
186
+ assert call_args[2] == tb
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+
9
+ from hud.types import Task
10
+ from hud.utils.tasks import load_tasks
11
+
12
+
13
+ def test_load_tasks_from_list():
14
+ """Test loading tasks from a list of dictionaries."""
15
+ task_dicts = [
16
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
17
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
18
+ ]
19
+
20
+ tasks = load_tasks(task_dicts)
21
+
22
+ assert len(tasks) == 2
23
+ assert all(isinstance(t, Task) for t in tasks)
24
+ assert tasks[0].prompt == "Test task 1" # type: ignore
25
+ assert tasks[1].prompt == "Test task 2" # type: ignore
26
+
27
+
28
+ def test_load_tasks_from_list_raw():
29
+ """Test loading tasks from a list in raw mode."""
30
+ task_dicts = [
31
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
32
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
33
+ ]
34
+
35
+ tasks = load_tasks(task_dicts, raw=True)
36
+
37
+ assert len(tasks) == 2
38
+ assert all(isinstance(t, dict) for t in tasks)
39
+ assert tasks[0]["prompt"] == "Test task 1" # type: ignore
40
+
41
+
42
+ def test_load_tasks_from_json_file():
43
+ """Test loading tasks from a JSON file."""
44
+ task_dicts = [
45
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
46
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
47
+ ]
48
+
49
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
50
+ json.dump(task_dicts, f)
51
+ temp_path = f.name
52
+
53
+ try:
54
+ tasks = load_tasks(temp_path)
55
+
56
+ assert len(tasks) == 2
57
+ assert all(isinstance(t, Task) for t in tasks)
58
+ assert tasks[0].prompt == "Test task 1" # type: ignore
59
+ finally:
60
+ Path(temp_path).unlink()
61
+
62
+
63
+ def test_load_tasks_from_json_file_raw():
64
+ """Test loading tasks from a JSON file in raw mode."""
65
+ task_dicts = [
66
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
67
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
68
+ ]
69
+
70
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
71
+ json.dump(task_dicts, f)
72
+ temp_path = f.name
73
+
74
+ try:
75
+ tasks = load_tasks(temp_path, raw=True)
76
+
77
+ assert len(tasks) == 2
78
+ assert all(isinstance(t, dict) for t in tasks)
79
+ finally:
80
+ Path(temp_path).unlink()
81
+
82
+
83
+ def test_load_tasks_from_jsonl_file():
84
+ """Test loading tasks from a JSONL file."""
85
+ task_dicts = [
86
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
87
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
88
+ ]
89
+
90
+ with tempfile.NamedTemporaryFile(
91
+ mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
92
+ ) as f:
93
+ for task_dict in task_dicts:
94
+ f.write(json.dumps(task_dict) + "\n")
95
+ temp_path = f.name
96
+
97
+ try:
98
+ tasks = load_tasks(temp_path)
99
+
100
+ assert len(tasks) == 2
101
+ assert all(isinstance(t, Task) for t in tasks)
102
+ assert tasks[0].prompt == "Test task 1" # type: ignore
103
+ finally:
104
+ Path(temp_path).unlink()
105
+
106
+
107
+ def test_load_tasks_from_jsonl_file_with_empty_lines():
108
+ """Test loading tasks from a JSONL file with empty lines."""
109
+ task_dicts = [
110
+ {"id": "1", "prompt": "Test task 1", "mcp_config": {}},
111
+ {"id": "2", "prompt": "Test task 2", "mcp_config": {}},
112
+ ]
113
+
114
+ with tempfile.NamedTemporaryFile(
115
+ mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
116
+ ) as f:
117
+ f.write(json.dumps(task_dicts[0]) + "\n")
118
+ f.write("\n") # Empty line
119
+ f.write(json.dumps(task_dicts[1]) + "\n")
120
+ temp_path = f.name
121
+
122
+ try:
123
+ tasks = load_tasks(temp_path)
124
+
125
+ assert len(tasks) == 2
126
+ assert all(isinstance(t, Task) for t in tasks)
127
+ finally:
128
+ Path(temp_path).unlink()
129
+
130
+
131
+ def test_load_tasks_from_jsonl_file_with_list():
132
+ """Test loading tasks from a JSONL file where a line contains a list."""
133
+ task_dict = {"id": "1", "prompt": "Test task 1", "mcp_config": {}}
134
+
135
+ with tempfile.NamedTemporaryFile(
136
+ mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
137
+ ) as f:
138
+ f.write(json.dumps([task_dict, task_dict]) + "\n")
139
+ temp_path = f.name
140
+
141
+ try:
142
+ tasks = load_tasks(temp_path)
143
+
144
+ assert len(tasks) == 2
145
+ assert all(isinstance(t, Task) for t in tasks)
146
+ finally:
147
+ Path(temp_path).unlink()
148
+
149
+
150
+ def test_load_tasks_json_not_array_error():
151
+ """Test that loading from JSON file with non-array raises error."""
152
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
153
+ json.dump({"not": "an array"}, f)
154
+ temp_path = f.name
155
+
156
+ try:
157
+ with pytest.raises(ValueError, match="JSON file must contain an array"):
158
+ load_tasks(temp_path)
159
+ finally:
160
+ Path(temp_path).unlink()
161
+
162
+
163
+ def test_load_tasks_invalid_jsonl_format():
164
+ """Test that loading from JSONL with invalid format raises error."""
165
+ with tempfile.NamedTemporaryFile(
166
+ mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
167
+ ) as f:
168
+ f.write(json.dumps("invalid") + "\n")
169
+ temp_path = f.name
170
+
171
+ try:
172
+ with pytest.raises(ValueError, match="Invalid JSONL format"):
173
+ load_tasks(temp_path)
174
+ finally:
175
+ Path(temp_path).unlink()
176
+
177
+
178
+ def test_load_tasks_invalid_input_type():
179
+ """Test that invalid input type raises TypeError."""
180
+ with pytest.raises(TypeError, match="tasks_input must be str or list"):
181
+ load_tasks(123) # type: ignore
182
+
183
+
184
+ def test_load_tasks_nonexistent_file():
185
+ """Test that loading from nonexistent file raises error."""
186
+ with pytest.raises(ValueError, match="neither a file path nor a HuggingFace dataset"):
187
+ load_tasks("nonexistent_file_without_slash")