PyPI - hud-python - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

hud-python 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show

hud/__init__.py +22 -89
hud/agents/__init__.py +15 -0
hud/agents/art.py +101 -0
hud/agents/base.py +599 -0
hud/{mcp → agents}/claude.py +373 -321
hud/{mcp → agents}/langchain.py +250 -250
hud/agents/misc/__init__.py +7 -0
hud/{agent → agents}/misc/response_agent.py +80 -80
hud/{mcp → agents}/openai.py +352 -334
hud/agents/openai_chat_generic.py +154 -0
hud/{mcp → agents}/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -0
hud/agents/tests/test_claude.py +324 -0
hud/{mcp → agents}/tests/test_client.py +363 -324
hud/{mcp → agents}/tests/test_openai.py +237 -238
hud/cli/__init__.py +617 -0
hud/cli/__main__.py +8 -0
hud/cli/analyze.py +371 -0
hud/cli/analyze_metadata.py +230 -0
hud/cli/build.py +427 -0
hud/cli/clone.py +185 -0
hud/cli/cursor.py +92 -0
hud/cli/debug.py +392 -0
hud/cli/docker_utils.py +83 -0
hud/cli/init.py +281 -0
hud/cli/interactive.py +353 -0
hud/cli/mcp_server.py +756 -0
hud/cli/pull.py +336 -0
hud/cli/push.py +370 -0
hud/cli/remote_runner.py +311 -0
hud/cli/runner.py +160 -0
hud/cli/tests/__init__.py +3 -0
hud/cli/tests/test_analyze.py +284 -0
hud/cli/tests/test_cli_init.py +265 -0
hud/cli/tests/test_cli_main.py +27 -0
hud/cli/tests/test_clone.py +142 -0
hud/cli/tests/test_cursor.py +253 -0
hud/cli/tests/test_debug.py +453 -0
hud/cli/tests/test_mcp_server.py +139 -0
hud/cli/tests/test_utils.py +388 -0
hud/cli/utils.py +263 -0
hud/clients/README.md +143 -0
hud/clients/__init__.py +16 -0
hud/clients/base.py +379 -0
hud/clients/fastmcp.py +222 -0
hud/clients/mcp_use.py +278 -0
hud/clients/tests/__init__.py +1 -0
hud/clients/tests/test_client_integration.py +111 -0
hud/clients/tests/test_fastmcp.py +342 -0
hud/clients/tests/test_protocol.py +188 -0
hud/clients/utils/__init__.py +1 -0
hud/clients/utils/retry_transport.py +160 -0
hud/datasets.py +322 -192
hud/misc/__init__.py +1 -0
hud/{agent → misc}/claude_plays_pokemon.py +292 -283
hud/otel/__init__.py +35 -0
hud/otel/collector.py +142 -0
hud/otel/config.py +164 -0
hud/otel/context.py +536 -0
hud/otel/exporters.py +366 -0
hud/otel/instrumentation.py +97 -0
hud/otel/processors.py +118 -0
hud/otel/tests/__init__.py +1 -0
hud/otel/tests/test_processors.py +197 -0
hud/server/__init__.py +5 -5
hud/server/context.py +114 -0
hud/server/helper/__init__.py +5 -0
hud/server/low_level.py +132 -0
hud/server/server.py +166 -0
hud/server/tests/__init__.py +3 -0
hud/settings.py +73 -79
hud/shared/__init__.py +5 -0
hud/{exceptions.py → shared/exceptions.py} +180 -180
hud/{server → shared}/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -0
hud/{server → shared}/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -30
hud/telemetry/instrument.py +379 -0
hud/telemetry/job.py +309 -141
hud/telemetry/replay.py +74 -0
hud/telemetry/trace.py +83 -0
hud/tools/__init__.py +33 -34
hud/tools/base.py +365 -65
hud/tools/bash.py +161 -137
hud/tools/computer/__init__.py +15 -13
hud/tools/computer/anthropic.py +437 -420
hud/tools/computer/hud.py +376 -334
hud/tools/computer/openai.py +295 -292
hud/tools/computer/settings.py +82 -0
hud/tools/edit.py +314 -290
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -532
hud/tools/executors/pyautogui.py +621 -619
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -503
hud/tools/{playwright_tool.py → playwright.py} +412 -379
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -0
hud/tools/tests/test_bash.py +158 -152
hud/tools/tests/test_bash_extended.py +197 -0
hud/tools/tests/test_computer.py +425 -52
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -240
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -157
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -0
hud/tools/utils.py +50 -50
hud/types.py +136 -89
hud/utils/__init__.py +10 -16
hud/utils/async_utils.py +65 -0
hud/utils/design.py +168 -0
hud/utils/mcp.py +55 -0
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -0
hud/utils/tests/test_init.py +17 -21
hud/utils/tests/test_progress.py +261 -225
hud/utils/tests/test_telemetry.py +82 -37
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
hud_python-0.4.1.dist-info/METADATA +476 -0
hud_python-0.4.1.dist-info/RECORD +132 -0
hud_python-0.4.1.dist-info/entry_points.txt +3 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
hud/adapters/__init__.py +0 -8
hud/adapters/claude/__init__.py +0 -5
hud/adapters/claude/adapter.py +0 -180
hud/adapters/claude/tests/__init__.py +0 -1
hud/adapters/claude/tests/test_adapter.py +0 -519
hud/adapters/common/__init__.py +0 -6
hud/adapters/common/adapter.py +0 -178
hud/adapters/common/tests/test_adapter.py +0 -289
hud/adapters/common/types.py +0 -446
hud/adapters/operator/__init__.py +0 -5
hud/adapters/operator/adapter.py +0 -108
hud/adapters/operator/tests/__init__.py +0 -1
hud/adapters/operator/tests/test_adapter.py +0 -370
hud/agent/__init__.py +0 -19
hud/agent/base.py +0 -126
hud/agent/claude.py +0 -271
hud/agent/langchain.py +0 -215
hud/agent/misc/__init__.py +0 -3
hud/agent/operator.py +0 -268
hud/agent/tests/__init__.py +0 -1
hud/agent/tests/test_base.py +0 -202
hud/env/__init__.py +0 -11
hud/env/client.py +0 -35
hud/env/docker_client.py +0 -349
hud/env/environment.py +0 -446
hud/env/local_docker_client.py +0 -358
hud/env/remote_client.py +0 -212
hud/env/remote_docker_client.py +0 -292
hud/gym.py +0 -130
hud/job.py +0 -773
hud/mcp/__init__.py +0 -17
hud/mcp/base.py +0 -631
hud/mcp/client.py +0 -312
hud/mcp/tests/test_base.py +0 -512
hud/mcp/tests/test_claude.py +0 -294
hud/task.py +0 -149
hud/taskset.py +0 -237
hud/telemetry/_trace.py +0 -347
hud/telemetry/context.py +0 -230
hud/telemetry/exporter.py +0 -575
hud/telemetry/instrumentation/__init__.py +0 -3
hud/telemetry/instrumentation/mcp.py +0 -259
hud/telemetry/instrumentation/registry.py +0 -59
hud/telemetry/mcp_models.py +0 -270
hud/telemetry/tests/__init__.py +0 -1
hud/telemetry/tests/test_context.py +0 -210
hud/telemetry/tests/test_trace.py +0 -312
hud/tools/helper/README.md +0 -56
hud/tools/helper/__init__.py +0 -9
hud/tools/helper/mcp_server.py +0 -78
hud/tools/helper/server_initialization.py +0 -115
hud/tools/helper/utils.py +0 -58
hud/trajectory.py +0 -94
hud/utils/agent.py +0 -37
hud/utils/common.py +0 -256
hud/utils/config.py +0 -120
hud/utils/deprecation.py +0 -115
hud/utils/misc.py +0 -53
hud/utils/tests/test_common.py +0 -277
hud/utils/tests/test_config.py +0 -129
hud_python-0.3.5.dist-info/METADATA +0 -284
hud_python-0.3.5.dist-info/RECORD +0 -120
/hud/{adapters/common → shared}/tests/__init__.py +0 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0

hud/datasets.py CHANGED Viewed

@@ -1,192 +1,322 @@
-"""Dataset utilities for working with HuggingFace datasets and TaskConfigs."""
-from __future__ import annotations
-import asyncio
-import logging
-from string import Template
-from typing import TYPE_CHECKING, Any
-from mcp.types import CallToolRequestParams as MCPToolParams
-from pydantic import BaseModel, Field, field_validator
-from hud.telemetry.job import job
-if TYPE_CHECKING:
-    from datasets import Dataset
-    from hud.mcp.base import AgentResult, BaseMCPAgent
-logger = logging.getLogger("hud.datasets")
-class TaskConfig(BaseModel):
-    """
-    A task configuration that can be used to create a task.
-    The mcp_config field supports environment variable substitution using
-    template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
-    Example:
-        mcp_config: {
-            "hud": {
-                "url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
-                "headers": {
-                    "Authorization": "Bearer ${HUD_API_KEY}",
-                    "Run-Id": "${RUN_ID}",
-                    "Mcp-Image": "your-mcp-image"
-                }
-            }
-        }
-    """
-    id: str | None = None
-    prompt: str
-    mcp_config: dict[str, Any]
-    setup_tool: MCPToolParams | None = None
-    evaluate_tool: MCPToolParams | None = None
-    metadata: dict[str, Any] = Field(default_factory=dict)
-    @field_validator("mcp_config", mode="before")
-    @classmethod
-    def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
-        """
-        Automatically resolve environment variables in mcp_config using Template.
-        Supports ${VAR_NAME} syntax with variable substitution from:
-        1. System environment variables (including HUD_API_KEY, etc.)
-        2. Runtime context variables (e.g., RUN_ID from telemetry context)
-        Missing variables resolve to empty strings.
-        """
-        import os
-        from hud.telemetry.context import get_current_task_run_id
-        # Start with current environment variables
-        mapping = dict(os.environ)
-        # Add runtime context variables if available
-        run_id = get_current_task_run_id()
-        if run_id:
-            mapping["RUN_ID"] = run_id
-        def substitute_in_value(obj: Any) -> Any:
-            """Recursively substitute variables in nested structures."""
-            if isinstance(obj, str):
-                # Use Template's substitute with defaultdict - missing vars become empty strings
-                from collections import defaultdict
-                safe_mapping = defaultdict(str, mapping)
-                return Template(obj).substitute(safe_mapping)
-            elif isinstance(obj, dict):
-                return {k: substitute_in_value(v) for k, v in obj.items()}
-            elif isinstance(obj, list):
-                return [substitute_in_value(item) for item in obj]
-            else:
-                return obj
-        return substitute_in_value(v)
-def to_taskconfigs(dataset: Dataset) -> Dataset:
-    """
-    Convert a HuggingFace dataset to contain TaskConfig objects.
-    Args:
-        dataset: HuggingFace dataset with task data
-    Returns:
-        Dataset with 'task' column containing TaskConfig objects
-    Example:
-        >>> dataset = load_dataset("hud/sheetbench-v1", split="test")
-        >>> tasks = to_taskconfigs(dataset)
-        >>> tasks[0]["task"]  # This is a TaskConfig object
-    """
-    def _convert(example: dict[str, Any]) -> dict[str, TaskConfig]:
-        return {"task": TaskConfig(**example)}
-    # Map and keep only the task column
-    return dataset.map(_convert, remove_columns=dataset.column_names)
-async def run_dataset(
-    name: str,
-    dataset: Dataset,
-    agent_class: type[BaseMCPAgent],
-    agent_config: dict[str, Any] | None = None,
-    max_concurrent: int = 5,
-    metadata: dict[str, Any] | None = None,
-) -> list[Any]:
-    """
-    Run all tasks in a dataset with automatic job tracking.
-    Args:
-        name: Name for the job
-        dataset: HuggingFace Dataset (raw, not converted)
-        agent_class: Agent class to instantiate (e.g., ClaudeMCPAgent)
-        agent_config: Configuration for agent (model, etc.)
-        max_concurrent: Maximum parallel task execution
-        metadata: Optional metadata for the job
-    Returns:
-        List of results from agent.run() in dataset order
-    Example:
-        >>> from datasets import load_dataset
-        >>> from hud.mcp import ClaudeMCPAgent
-        >>> dataset = load_dataset("hud/sheetbench-v1", split="test")
-        >>> results = await run_dataset(
-        ...     "sheetbench_eval",
-        ...     dataset,
-        ...     ClaudeMCPAgent,
-        ...     {"model": "claude-3-5-sonnet-20241022"},
-        ...     max_concurrent=3,
-        ... )
-    """
-    # Import here to avoid circular imports
-    import hud
-    from hud.mcp.client import MCPClient
-    # Convert dataset to TaskConfigs internally
-    tasks = to_taskconfigs(dataset)
-    # Create job context
-    job_metadata = metadata or {}
-    job_metadata["agent_class"] = agent_class.__name__
-    if agent_config:
-        job_metadata["agent_config"] = agent_config
-    with job(name, metadata=job_metadata):
-        # Run tasks with semaphore for concurrency control
-        sem = asyncio.Semaphore(max_concurrent)
-        results: list[AgentResult | None] = [None] * len(tasks)
-        async def _worker(index: int, row: Any) -> None:
-            async with sem:
-                task = row["task"]
-                # Create trace for this task
-                with hud.trace(f"task_{index}"):
-                    # Create fresh MCP client per task
-                    if task.mcp_config:
-                        client = MCPClient(mcp_config=task.mcp_config)
-                        agent = agent_class(mcp_client=client, **(agent_config or {}))
-                        try:
-                            results[index] = await agent.run(task)
-                        finally:
-                            await client.close()
-                    else:
-                        logger.warning("Task %d has no mcp_config defined", index)
-                        results[index] = None
-        # Execute all tasks
-        await asyncio.gather(
-            *[_worker(i, row) for i, row in enumerate(tasks)],
-            return_exceptions=True,  # Don't fail entire batch on one error
-        )
-    return results
+"""Dataset utilities for working with HuggingFace datasets and Tasks."""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+from string import Template
+from typing import TYPE_CHECKING, Any, cast
+from datasets import Dataset, load_dataset
+from pydantic import BaseModel, Field, field_validator
+from hud.agents.misc import ResponseAgent
+from .types import MCPToolCall
+if TYPE_CHECKING:
+    from hud.agents import MCPAgent
+logger = logging.getLogger("hud.datasets")
+class Task(BaseModel):
+    """
+    A task configuration that can be used to create a task.
+    The mcp_config field supports environment variable substitution using
+    template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
+    Example:
+        mcp_config: {
+            "hud": {
+                "url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
+                "headers": {
+                    "Authorization": "Bearer ${HUD_API_KEY}",
+                    "Mcp-Image": "your-mcp-image"
+                }
+            }
+        }
+    """
+    id: str | None = None
+    prompt: str
+    mcp_config: dict[str, Any]
+    setup_tool: MCPToolCall | list[MCPToolCall] | None = None
+    evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
+    system_prompt: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    @field_validator("mcp_config", "metadata", mode="before")
+    @classmethod
+    def parse_json_strings(cls, v: Any) -> Any:
+        """Parse JSON strings into dictionaries."""
+        if isinstance(v, str):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Invalid JSON string: {e}") from e
+        return v
+    @field_validator("setup_tool", "evaluate_tool", mode="before")
+    @classmethod
+    def convert_dict_to_tool_call(cls, v: Any) -> Any:
+        """Convert dict to MCPToolCall instance, parsing JSON strings first."""
+        if v is None:
+            return None
+        # Parse JSON string if needed
+        if isinstance(v, str):
+            try:
+                v = json.loads(v)
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Invalid JSON string: {e}") from e
+        if isinstance(v, dict):
+            return MCPToolCall(**v)
+        if isinstance(v, list):
+            return [MCPToolCall(**item) if isinstance(item, dict) else item for item in v]
+        return v
+    @field_validator("mcp_config", mode="before")
+    @classmethod
+    def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
+        """
+        Automatically resolve environment variables in mcp_config using Template.
+        Supports ${VAR_NAME} syntax with variable substitution from
+        System environment variables (including HUD_API_KEY, etc.)
+        Missing variables resolve to empty strings.
+        """
+        import os
+        # Start with current environment variables
+        mapping = dict(os.environ)
+        def substitute_in_value(obj: Any) -> Any:
+            """Recursively substitute variables in nested structures."""
+            if isinstance(obj, str):
+                # Use Template's substitute with defaultdict - missing vars become empty strings
+                from collections import defaultdict
+                safe_mapping = defaultdict(str, mapping)
+                return Template(obj).substitute(safe_mapping)
+            elif isinstance(obj, dict):
+                return {k: substitute_in_value(v) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [substitute_in_value(item) for item in obj]
+            else:
+                return obj
+        return substitute_in_value(v)
+async def fetch_system_prompt_from_dataset(dataset_id: str) -> str | None:
+    """
+    Fetch system_prompt.txt from a HuggingFace dataset repository.
+    Args:
+        dataset_id: HuggingFace dataset identifier (e.g., "hud-evals/SheetBench-50")
+    Returns:
+        System prompt text if found, None otherwise
+    """
+    try:
+        # Import here to avoid unnecessary dependency
+        from huggingface_hub import hf_hub_download
+        from huggingface_hub.errors import EntryNotFoundError
+        # Try to download the system_prompt.txt file
+        try:
+            file_path = hf_hub_download(
+                repo_id=dataset_id, filename="system_prompt.txt", repo_type="dataset"
+            )
+            # Read and return the content
+            with open(file_path, encoding="utf-8") as f:  # noqa: ASYNC230
+                content = f.read().strip()
+                if content:
+                    logger.info(
+                        "Loaded system prompt from %s (length: %d chars)", dataset_id, len(content)
+                    )
+                    return content
+                else:
+                    logger.warning("System prompt file is empty in %s", dataset_id)
+                    return None
+        except EntryNotFoundError:
+            logger.debug("No system_prompt.txt found in dataset %s", dataset_id)
+            return None
+    except ImportError:
+        logger.warning(
+            "huggingface_hub not installed. Install it to fetch system prompts from datasets."
+        )
+        return None
+    except Exception as e:
+        logger.error("Error fetching system prompt from %s: %s", dataset_id, e)
+        return None
+async def run_dataset(
+    name: str,
+    dataset: str | Dataset | list[dict[str, Any]],
+    agent_class: type[MCPAgent],
+    agent_config: dict[str, Any] | None = None,
+    max_concurrent: int = 50,
+    metadata: dict[str, Any] | None = None,
+    max_steps: int = 40,
+    split: str = "train",
+    auto_respond: bool = False,
+    custom_system_prompt: str | None = None,
+) -> list[Any]:
+    """
+    Run all tasks in a dataset with automatic job tracking.
+    Args:
+        name: Name for the job
+        dataset: HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50"),
+                Dataset object, OR list of Task objects
+        agent_class: Agent class to instantiate (e.g., ClaudeAgent)
+        agent_config: Configuration/kwargs for agent (model, etc.)
+        max_concurrent: Maximum parallel task execution
+        metadata: Optional metadata for the job
+        max_steps: Maximum steps per task
+        split: Dataset split to use when loading from string (default: "train")
+        auto_respond: Whether to use auto-response agent
+    Returns:
+        List of results from agent.run() in dataset order
+    Example:
+        >>> from hud.agents import ClaudeAgent
+        >>> # Option 1: From dataset string identifier
+        >>> results = await run_dataset(
+        ...     "SheetBench Eval",
+        ...     "hud-evals/SheetBench-50",
+        ...     ClaudeAgent,
+        ...     {"model": "claude-3-5-sonnet-20241022"},
+        ... )
+        >>> # Option 2: From HuggingFace dataset object
+        >>> from datasets import load_dataset
+        >>> dataset = load_dataset("hud-evals/SheetBench-50", split="train")
+        >>> results = await run_dataset("my_eval", dataset, ClaudeAgent)
+        >>> # Option 3: From list of dicts
+        >>> tasks = [{"prompt": "...", "mcp_config": {...}, ...}, ...]
+        >>> results = await run_dataset("browser_eval", tasks, ClaudeAgent)
+    """
+    # Import here to avoid circular imports
+    import hud
+    dataset_link = None
+    # Load dataset from string if needed
+    if isinstance(dataset, str):
+        logger.info("Loading dataset %s from HuggingFace...", dataset)
+        dataset_link = dataset
+        # Load dataset from HuggingFace
+        dataset = cast("Dataset", load_dataset(dataset, split=split))
+    # Create job context
+    job_metadata = metadata or {}
+    job_metadata["agent_class"] = agent_class.__name__
+    job_metadata["agent_config"] = agent_config
+    # Extract dataset verification info if available
+    if isinstance(dataset, Dataset) and not dataset_link:
+        general_info = next(iter(dataset.info.__dict__["download_checksums"].keys())).split("/")
+        project = general_info[3]
+        dataset_name = general_info[4].split("@")[0]
+        dataset_link = f"{project}/{dataset_name}"
+    with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
+        # Run tasks with semaphore for concurrency control
+        sem = asyncio.Semaphore(max_concurrent)
+        results: list[Any | None] = [None] * len(dataset)
+        async def _worker(index: int, task_dict: Any, max_steps: int = 40) -> None:
+            async with sem:
+                # Create trace for this task
+                task_name = task_dict.get("prompt") or f"Task {index}"
+                if "system_prompt" not in task_dict:
+                    task_dict["system_prompt"] = custom_system_prompt
+                with hud.trace(task_name, job_id=job_obj.id, task_id=task_dict.get("id")):
+                    # Convert dict to Task here, at trace level
+                    task = Task(**task_dict)
+                    agent = agent_class(**(agent_config or {}))
+                    if auto_respond:
+                        agent.response_agent = ResponseAgent()
+                    results[index] = await agent.run(task, max_steps=max_steps)
+        # Execute all tasks
+        await asyncio.gather(
+            *[_worker(i, task, max_steps=max_steps) for i, task in enumerate(dataset)],
+            return_exceptions=True,  # Don't fail entire batch on one error
+        )
+    return results
+def save_tasks(
+    tasks: list[dict[str, Any]], repo_id: str, fields: list[str] | None = None, **kwargs: Any
+) -> None:
+    """
+    Save data to HuggingFace dataset with JSON string serialization.
+    Complex fields (dicts, lists) are serialized as JSON strings to maintain clean schema
+    and avoid null value pollution in HuggingFace datasets.
+    Args:
+        tasks: List of dictionaries to save
+        repo_id: HuggingFace repository ID (e.g., "hud-evals/my-tasks")
+        fields: Optional list of fields to save. If None, saves all fields from each dict.
+        **kwargs: Additional arguments passed to dataset.push_to_hub()
+    """
+    from datasets import Dataset
+    # Safety check: Ensure we're not saving Task objects (which have resolved env vars)
+    if tasks and isinstance(tasks[0], Task):
+        raise ValueError(
+            "save_tasks expects dictionaries, not Task objects. "
+            "Task objects have resolved environment variables which would expose secrets. "
+            "Please pass raw dictionaries with template strings like '${HUD_API_KEY}' preserved."
+        )
+    # Convert to rows with JSON string fields
+    data = []
+    for i, tc_dict in enumerate(tasks):
+        # Additional safety check for each item
+        if isinstance(tc_dict, Task):
+            raise ValueError(
+                f"Item {i} is a Task object, not a dictionary. "
+                "This would expose resolved environment variables. "
+                "Please convert to dictionary format with template strings preserved."
+            )
+        row = {}
+        # Determine which fields to process
+        fields_to_process = fields if fields is not None else list(tc_dict.keys())
+        for field in fields_to_process:
+            if field in tc_dict:
+                value = tc_dict[field]
+                # Serialize complex types as JSON strings
+                if isinstance(value, (dict | list)):
+                    row[field] = json.dumps(value)
+                elif isinstance(value, (str | int | float | bool | type(None))):
+                    row[field] = value if value is not None else ""
+                else:
+                    # For other types, convert to string
+                    row[field] = str(value)
+        data.append(row)
+    # Create and push dataset
+    dataset = Dataset.from_list(data)
+    dataset.push_to_hub(repo_id, **kwargs)

hud/misc/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Miscellaneous utilities for HUD SDK."""

hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

hud-python 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl