hud-python 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/taskset.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
3
+ from pathlib import PosixPath
4
+ from typing import TYPE_CHECKING, Any, get_args
4
5
  from venv import logger
5
6
 
6
7
  from pydantic import BaseModel
@@ -9,6 +10,7 @@ from hud.env.environment import create_remote_config
9
10
  from hud.server import make_request
10
11
  from hud.settings import settings
11
12
  from hud.task import Task
13
+ from hud.types import CustomGym, ServerGym
12
14
  from hud.utils.config import REMOTE_EVALUATE, REMOTE_SETUP
13
15
 
14
16
  if TYPE_CHECKING:
@@ -86,16 +88,45 @@ class TaskSet(BaseModel):
86
88
  # Convert all tasks to expanded configs
87
89
  processed_tasks = []
88
90
  for task in self.tasks:
89
- setup_config = create_remote_config(None, task.setup, REMOTE_SETUP)[0].args[0]
90
- evaluate_config = create_remote_config(None, task.evaluate, REMOTE_EVALUATE)[0].args[0]
91
+ if task.setup is not None:
92
+ setup_config = (
93
+ create_remote_config(None, task.setup, REMOTE_SETUP)[0].args[0].model_dump()
94
+ )
95
+ else:
96
+ setup_config = None
97
+ if task.evaluate is not None:
98
+ evaluate_config = (
99
+ create_remote_config(None, task.evaluate, REMOTE_EVALUATE)[0]
100
+ .args[0]
101
+ .model_dump()
102
+ )
103
+ else:
104
+ evaluate_config = None
105
+
106
+ if isinstance(task.gym, CustomGym):
107
+ if isinstance(task.gym.image_or_build_context, PosixPath):
108
+ raise ValueError(
109
+ "Local build contexts are not supported for "
110
+ "remote tasksets, attach an image or existing "
111
+ "gym id."
112
+ )
113
+ gym_str = "docker"
114
+ image_uri = task.gym.image_or_build_context
115
+ elif isinstance(task.gym, str) and task.gym in get_args(ServerGym):
116
+ gym_str = task.gym
117
+ image_uri = None
118
+ else:
119
+ raise ValueError(f"Unknown gym type: {type(task.gym)}")
91
120
 
92
121
  processed_tasks.append(
93
122
  {
94
123
  "prompt": task.prompt,
95
- "gym": task.gym,
96
- "setup": setup_config.model_dump(),
97
- "evaluate": evaluate_config.model_dump(),
124
+ "gym": gym_str,
125
+ "setup": setup_config,
126
+ "evaluate": evaluate_config,
98
127
  "config": task.config,
128
+ "image_uri": image_uri,
129
+ "description": task.description,
99
130
  }
100
131
  )
101
132
 
@@ -113,7 +144,15 @@ class TaskSet(BaseModel):
113
144
  "Taskset %s uploaded successfully, see it on app.hud.so/evalsets/%s", name, name
114
145
  )
115
146
 
116
- async def fit(self, agent: Agent | type[Agent]) -> None:
147
+ def _apply(self, dict: dict[str, Any]) -> None:
148
+ """
149
+ Applies a parameter to all tasks in the taskset.
150
+ """
151
+ for task in self.tasks:
152
+ for key, value in dict.items():
153
+ setattr(task, key, value)
154
+
155
+ def fit(self, agent: Agent | type[Agent]) -> None:
117
156
  """
118
157
  Automatically adapts the taskset to the agent's transfer_gyms.
119
158
  """
@@ -121,19 +160,27 @@ class TaskSet(BaseModel):
121
160
  agent = agent()
122
161
 
123
162
  for task in self.tasks:
124
- if task.gym is None:
163
+ if task.gym is None or isinstance(task.gym, CustomGym):
125
164
  continue
126
165
  task.gym = agent.transfer_gyms.get(task.gym, task.gym)
127
166
 
128
167
 
129
- async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
168
+ async def load_taskset(
169
+ taskset_id: str,
170
+ api_key: str | None = None,
171
+ metadata: dict[str, Any] | None = None,
172
+ load_custom_as_local: bool = False,
173
+ system_prompt: str | None = None,
174
+ ) -> TaskSet:
130
175
  """
131
176
  Loads a TaskSet by its ID.
132
177
 
133
178
  Args:
134
179
  taskset_id: The ID of the taskset to load
135
180
  api_key: Optional API key to use for the request
136
-
181
+ metadata: Optional metadata to apply to the taskset
182
+ load_custom_as_local: Whether to load custom gyms as local
183
+ system_prompt: Optional system prompt to override the default
137
184
  Returns:
138
185
  TaskSet: The loaded taskset
139
186
  """
@@ -149,13 +196,33 @@ async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
149
196
 
150
197
  logger.info(f"Taskset {taskset_id} loaded successfully")
151
198
 
152
- return TaskSet.model_validate(
199
+ tasks = data["evalset"]
200
+ for task in tasks:
201
+ if system_prompt:
202
+ task["system_prompt"] = system_prompt
203
+ if task["gym"] == "docker":
204
+ if "image_uri" not in task:
205
+ raise ValueError(
206
+ "No `image_uri` key found. This taskset may be "
207
+ "incompatible with your version of HUD SDK."
208
+ )
209
+
210
+ task["gym"] = CustomGym(
211
+ location="local" if load_custom_as_local else "remote",
212
+ image_or_build_context=task["image_uri"],
213
+ )
214
+
215
+ taskset = TaskSet.model_validate(
153
216
  {
154
217
  "id": taskset_id,
155
- "tasks": data["evalset"],
218
+ "tasks": tasks,
156
219
  }
157
220
  )
158
221
 
222
+ taskset._apply({"metadata": metadata})
223
+
224
+ return taskset
225
+
159
226
 
160
227
  def load_from_inspect(dataset: Dataset) -> TaskSet:
161
228
  """
hud/telemetry/context.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import contextvars
4
4
  import logging
5
+ from collections import defaultdict
5
6
  from datetime import datetime
6
7
  from typing import Any, TypeVar
7
8
 
@@ -11,7 +12,6 @@ from hud.telemetry.mcp_models import (
11
12
  MCPNotificationCall,
12
13
  MCPRequestCall,
13
14
  MCPResponseCall,
14
- MCPTelemetryRecord,
15
15
  StatusType,
16
16
  )
17
17
 
@@ -21,9 +21,8 @@ logger = logging.getLogger("hud.telemetry")
21
21
  current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
22
22
  "current_task_run_id", default=None
23
23
  )
24
- mcp_calls_buffer: contextvars.ContextVar[list[BaseMCPCall] | None] = contextvars.ContextVar(
25
- "mcp_calls_buffer", default=None
26
- )
24
+ # NEW: Global dictionary for buffering, keyed by task_run_id
25
+ _GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
27
26
  is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
28
27
 
29
28
  # Maximum buffer size before automatic flush
@@ -35,51 +34,37 @@ T = TypeVar("T", bound=BaseMCPCall)
35
34
 
36
35
  def get_current_task_run_id() -> str | None:
37
36
  """Get the task_run_id for the current trace context."""
38
- value = current_task_run_id.get()
39
- # Convert empty string sentinel back to None
40
- return None if value == "" else value
37
+ return current_task_run_id.get()
41
38
 
42
39
 
43
40
  def set_current_task_run_id(task_run_id: str | None) -> None:
44
41
  """Set the task_run_id for the current trace context."""
45
- # Handle None value by using empty string as sentinel
46
- value_to_set = "" if task_run_id is None else task_run_id
47
- current_task_run_id.set(value_to_set)
42
+ current_task_run_id.set(task_run_id)
48
43
 
49
44
 
50
45
  def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
51
- """
52
- Add an MCP call to the buffer for the current trace.
53
-
54
- Args:
55
- record: Either a Pydantic model instance or dictionary with MCP call data
56
- """
57
- # Only buffer if we have an active trace
58
46
  task_run_id = get_current_task_run_id()
59
- if task_run_id is not None and task_run_id != "":
60
- buffer = mcp_calls_buffer.get()
61
- if buffer is None:
62
- buffer = []
63
47
 
64
- # Convert dictionary to proper model if needed
65
- if isinstance(record, dict):
66
- record = BaseMCPCall.from_dict(record)
48
+ if not task_run_id:
49
+ logger.warning(
50
+ "BUFFER_MCP_CALL: No task_run_id. Skipping buffer for %s", type(record).__name__
51
+ )
52
+ return
67
53
 
68
- # Ensure the record has the current task_run_id
69
- if record.task_run_id != task_run_id:
70
- # Create a copy with the current task_run_id
71
- record_dict = record.model_dump()
72
- record_dict["task_run_id"] = task_run_id
73
- record = BaseMCPCall.from_dict(record_dict)
54
+ # Ensure 'record' is a Pydantic model instance from here
55
+ if isinstance(record, dict):
56
+ try:
57
+ record_model = BaseMCPCall.from_dict(record)
58
+ record = record_model
59
+ except Exception as e_conv:
60
+ logger.exception("BUFFER_MCP_CALL: Failed to convert dict to BaseMCPCall: %s", e_conv)
61
+ return
74
62
 
75
- # Add to buffer
76
- buffer.append(record)
77
- mcp_calls_buffer.set(buffer)
63
+ _GLOBAL_MCP_CALL_BUFFERS[task_run_id].append(record)
64
+ buffer_len = len(_GLOBAL_MCP_CALL_BUFFERS[task_run_id])
78
65
 
79
- # Auto-flush if buffer gets too large
80
- if len(buffer) >= MAX_BUFFER_SIZE:
81
- logger.debug("MCP calls buffer reached size %d, auto-flushing", len(buffer))
82
- flush_buffer(export=True)
66
+ if buffer_len >= MAX_BUFFER_SIZE:
67
+ flush_buffer(export=True)
83
68
 
84
69
 
85
70
  def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
@@ -92,25 +77,16 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
92
77
  Returns:
93
78
  The list of buffered MCP calls
94
79
  """
95
- buffer = mcp_calls_buffer.get()
96
- if buffer is None:
97
- buffer = []
98
- # Reset buffer to empty list
99
- mcp_calls_buffer.set([])
100
-
101
- if export and buffer and len(buffer) > 0:
102
- task_id = buffer[0].task_run_id if buffer else None
103
- if task_id:
104
- logger.debug("Exporting %d MCP calls for task run %s", len(buffer), task_id)
105
- # Create a telemetry record for export
106
- _telemetry_record = MCPTelemetryRecord(task_run_id=task_id, records=buffer)
107
- # In the future, we could call an export function here
108
- # For now, just log that we have telemetry
109
- logger.debug("MCP telemetry record created with %d calls", len(buffer))
110
- else:
111
- logger.warning("No task_run_id found in buffer, skipping export")
112
-
113
- return buffer
80
+ task_run_id = get_current_task_run_id()
81
+ if not task_run_id:
82
+ logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
83
+ return []
84
+
85
+ buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
86
+ task_run_id, []
87
+ ) # Get and remove the list for this task
88
+
89
+ return buffer_for_task # Return the flushed items
114
90
 
115
91
 
116
92
  def create_request_record(
@@ -150,6 +126,7 @@ def create_response_record(
150
126
  is_error=is_error,
151
127
  **kwargs,
152
128
  )
129
+
153
130
  buffer_mcp_call(record)
154
131
  return record
155
132
 
@@ -189,5 +166,4 @@ def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
189
166
  def reset_context() -> None:
190
167
  """Reset all telemetry context variables. Useful for test isolation."""
191
168
  set_current_task_run_id(None)
192
- mcp_calls_buffer.set([])
193
169
  is_root_trace.set(False)
hud/telemetry/exporter.py CHANGED
@@ -298,12 +298,10 @@ async def _export_trace_payload_async(payload: dict[str, Any]) -> None:
298
298
  "telemetry": payload.get("mcp_calls", []),
299
299
  }
300
300
 
301
- # Ensure mcp_calls is not empty if that's a requirement, or send as is. For now, send as is.
302
- # if not data_to_send["mcp_calls"]:
303
- # logger.debug("No MCP calls in payload for task run %s, skipping specific export if "
304
- # "desired.", task_run_id)
305
- # # Depending on backend, might not want to send empty mcp_calls list, or it's fine.
301
+ await send_telemetry_to_server(task_run_id, data_to_send)
306
302
 
303
+
304
+ async def send_telemetry_to_server(task_run_id: str, data: dict[str, Any]) -> None:
307
305
  telemetry_url = f"{settings.base_url}/v2/task_runs/{task_run_id}/telemetry-upload"
308
306
 
309
307
  try:
@@ -320,7 +318,7 @@ async def _export_trace_payload_async(payload: dict[str, Any]) -> None:
320
318
  )
321
319
  response = await client.post(
322
320
  telemetry_url,
323
- json=data_to_send, # Send the structured attributes and mcp_calls
321
+ json=data, # Send the structured attributes and mcp_calls
324
322
  headers=headers,
325
323
  timeout=30.0,
326
324
  )
@@ -31,9 +31,6 @@ from hud.telemetry.mcp_models import DirectionType, MCPCallType, MCPManualTestCa
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
34
- # Ensure no OTel imports remain
35
- # from opentelemetry import context as otel_context, propagate # Should be removed
36
-
37
34
 
38
35
  class MCPInstrumentor:
39
36
  """
@@ -140,9 +140,13 @@ class TestMCPCallBuffer:
140
140
 
141
141
  # Flush should return all calls from both tasks
142
142
  result = flush_buffer()
143
- assert len(result) == 2
144
- assert result[0] == mock_call_1
145
- assert result[1] == mock_call_2
143
+ assert len(result) == 1
144
+ assert result[0] == mock_call_2
145
+
146
+ set_current_task_run_id("task-1")
147
+ result2 = flush_buffer()
148
+ assert len(result2) == 1
149
+ assert result2[0] == mock_call_1
146
150
 
147
151
  def test_buffer_mcp_call_without_task_id(self):
148
152
  """Test adding MCP call when no task run ID is set."""
hud/trajectory.py CHANGED
@@ -6,6 +6,8 @@ import datetime
6
6
  from IPython.display import HTML, Markdown, display
7
7
  from pydantic import BaseModel, Field
8
8
 
9
+ from .adapters.common.types import LogType
10
+
9
11
 
10
12
  class TrajectoryStep(BaseModel):
11
13
  """Model representing a single task run's trajectory information."""
@@ -13,6 +15,7 @@ class TrajectoryStep(BaseModel):
13
15
  observation_url: str | None = None
14
16
  observation_text: str | None = None
15
17
  actions: list[dict]
18
+ logs: LogType | None = None
16
19
  start_timestamp: str | None = None
17
20
  end_timestamp: str | None = None
18
21
 
hud/types.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  from pathlib import Path
5
- from typing import Literal, TypeAlias
5
+ from typing import Any, Literal, TypeAlias
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
@@ -28,6 +28,9 @@ class CustomGym(BaseModel):
28
28
  # B. If string, then it is the uri of the docker image to use.
29
29
  # The controller must already be installed in the image.
30
30
  image_or_build_context: str | Path
31
+ # host_config will be passed to the docker client when creating the environment.
32
+ # refer to official docker api documentation for available configs.
33
+ host_config: dict[str, Any] | None = None
31
34
 
32
35
 
33
36
  class EnvironmentStatus(str, enum.Enum):
@@ -48,7 +51,30 @@ class EnvironmentStatus(str, enum.Enum):
48
51
 
49
52
 
50
53
  # Available HUD gyms
51
- ServerGym: TypeAlias = Literal["qa", "hud-browser", "hud-ubuntu", "OSWorld-Ubuntu"]
54
+ ServerGym: TypeAlias = Literal["qa", "hud-browser", "OSWorld-Ubuntu", "docker"]
52
55
 
53
56
  # Gyms can be either custom or server-side
54
57
  Gym: TypeAlias = CustomGym | ServerGym
58
+
59
+
60
+ # Metadata keys for the environment.
61
+ # partial: Whether the environment evaluator should give partial grades.
62
+ # eval_model: The model to use for evaluation when running a VLM. Wraps langchain.
63
+ # agent_name: The name of the agent that was used for running this task.
64
+ ServerMetadataKeys: TypeAlias = Literal["partial", "eval_model", "agent_name"]
65
+ MetadataKeys: TypeAlias = str | ServerMetadataKeys
66
+
67
+
68
+ # Dictionary of sensitive data (only supported for hud-browser environments)
69
+ # key: website name or page identifier
70
+ # value: Dictionary of credentials for the sensitive data
71
+ # Example:
72
+ # {
73
+ # "google.com": {
74
+ # "google_username": "my_username",
75
+ # "google_password": "my_password"
76
+ # }
77
+ # }
78
+ # The agent only has access to the key of the credential, not the value. (i.e. google_username)
79
+ # The value is only available to the environment. (i.e. my_username)
80
+ SensitiveData: TypeAlias = dict[str, dict[str, str]]
hud/utils/agent.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from hud.task import Task
7
+
8
+ AGENT_PROMPT = (
9
+ "You are an AI agent whose goal is to accomplish the ultimate task following the instructions."
10
+ )
11
+
12
+
13
+ def format_agent_prompt(environment_prompt: str | None, task: Task | None) -> str:
14
+ """
15
+ Format the agent prompt with the environment prompt and the task prompt.
16
+ """
17
+ prompt = AGENT_PROMPT
18
+
19
+ # User-provided system prompt takes precedence over environment prompt
20
+ if task and task.system_prompt:
21
+ prompt += f"\n\n{task.system_prompt}"
22
+ elif environment_prompt:
23
+ prompt += f"\n\n{environment_prompt}"
24
+
25
+ if task:
26
+ if task.sensitive_data:
27
+ prompt += "\n\nHere are placeholders for sensitive data for each domain:"
28
+ for domain, credentials in task.sensitive_data.items():
29
+ prompt += f"\n{domain}: "
30
+ placeholders = [f"{key}" for key in credentials]
31
+ prompt += f"{', '.join(placeholders)}"
32
+ prompt += "\n\nYou can type these placeholders to enter the sensitive data when needed."
33
+
34
+ if task.prompt:
35
+ prompt += f"\n\n{task.prompt}"
36
+
37
+ return prompt
hud/utils/common.py CHANGED
@@ -6,6 +6,7 @@ import tarfile
6
6
  import zipfile
7
7
  from typing import TYPE_CHECKING, Any, TypedDict
8
8
 
9
+ from pathspec import PathSpec
9
10
  from pydantic import BaseModel
10
11
 
11
12
  from hud.server.requests import make_request
@@ -67,8 +68,8 @@ class Observation(BaseModel):
67
68
 
68
69
  def __str__(self) -> str:
69
70
  return f"""Observation(screenshot={
70
- self.screenshot[:100] if self.screenshot else "None"
71
- }..., text={self.text}...)"""
71
+ f"{self.screenshot[:100]}..." if self.screenshot else "None"
72
+ }, text={f"{self.text[:100]}..." if self.text else "None"})"""
72
73
 
73
74
 
74
75
  class ExecuteResult(TypedDict):
@@ -86,44 +87,159 @@ class ExecuteResult(TypedDict):
86
87
  exit_code: int
87
88
 
88
89
 
89
- def directory_to_tar_bytes(directory_path: Path) -> bytes:
90
+ # ---------------------------------------------------------------------------
91
+ # Helper functions for handling ignore patterns
92
+ # ---------------------------------------------------------------------------
93
+
94
+
95
+ def _read_ignore_file(file_path: Path) -> list[str]:
96
+ """Return patterns from *file_path* (ignoring blanks / comments)."""
97
+ if not file_path.exists():
98
+ return []
99
+
100
+ patterns: list[str] = []
101
+ for line in file_path.read_text().splitlines():
102
+ stripped = line.strip()
103
+ if not stripped or stripped.startswith("#"):
104
+ continue
105
+ patterns.append(stripped)
106
+ return patterns
107
+
108
+
109
+ def _gather_ignore_patterns(root_dir: Path, filename: str) -> list[str]:
110
+ """Collect *filename* patterns throughout *root_dir* respecting hierarchy.
111
+
112
+ For a nested ignore file located at ``sub/dir/.gitignore`` containing the
113
+ pattern ``foo/``, the returned pattern will be ``sub/dir/foo/`` so that it
114
+ is evaluated relative to *root_dir* when passed to ``PathSpec``.
90
115
  """
91
- Converts a directory to a tar archive and returns it as bytes.
116
+ gathered: list[str] = []
117
+
118
+ root_dir = root_dir.resolve()
119
+
120
+ for ignore_file in root_dir.rglob(filename):
121
+ prefix = ignore_file.parent.relative_to(root_dir).as_posix()
122
+ base_prefix = "" if prefix == "." else prefix
123
+
124
+ for pat in _read_ignore_file(ignore_file):
125
+ negate = pat.startswith("!")
126
+ pat_body = pat[1:] if negate else pat
127
+
128
+ # Leading slash means relative to the directory the ignore file is
129
+ # located in - remove it so we can prepend *prefix* below.
130
+ if pat_body.startswith("/"):
131
+ pat_body = pat_body.lstrip("/")
92
132
 
93
- This function creates a tar archive of the specified directory in memory,
94
- without writing to a temporary file on disk.
133
+ full_pattern = f"{base_prefix}/{pat_body}" if base_prefix else pat_body
134
+ if negate:
135
+ full_pattern = f"!{full_pattern}"
95
136
 
96
- Args:
97
- path: Path to the directory to convert
137
+ gathered.append(full_pattern)
98
138
 
99
- Returns:
100
- Bytes of the tar archive
139
+ return gathered
140
+
141
+
142
+ def _compile_pathspec(
143
+ directory: Path,
144
+ *,
145
+ respect_gitignore: bool,
146
+ respect_dockerignore: bool,
147
+ respect_hudignore: bool,
148
+ ) -> PathSpec | None:
149
+ """Compile a ``PathSpec`` from all relevant ignore files under *directory*.
150
+
151
+ In addition to the standard ``.gitignore`` and ``.dockerignore`` files we now
152
+ recognise a project-specific ``.hudignore`` file that shares the same pattern
153
+ syntax. Each file can be toggled independently through the corresponding
154
+ ``respect_*`` keyword argument.
155
+ """
156
+ patterns: list[str] = []
157
+
158
+ if respect_gitignore:
159
+ patterns.extend(_gather_ignore_patterns(directory, ".gitignore"))
160
+ if respect_dockerignore:
161
+ patterns.extend(_gather_ignore_patterns(directory, ".dockerignore"))
162
+ if respect_hudignore:
163
+ patterns.extend(_gather_ignore_patterns(directory, ".hudignore"))
164
+
165
+ if not patterns:
166
+ return None
167
+
168
+ return PathSpec.from_lines("gitwildmatch", patterns)
169
+
170
+
171
+ def _iter_files(
172
+ directory: Path,
173
+ *,
174
+ respect_gitignore: bool,
175
+ respect_dockerignore: bool,
176
+ respect_hudignore: bool,
177
+ ) -> Iterator[tuple[Path, Path]]:
178
+ """Yield ``(file_path, relative_path)`` while respecting ignore files."""
179
+ spec = _compile_pathspec(
180
+ directory,
181
+ respect_gitignore=respect_gitignore,
182
+ respect_dockerignore=respect_dockerignore,
183
+ respect_hudignore=respect_hudignore,
184
+ )
185
+
186
+ for file_path in directory.rglob("*"):
187
+ if not file_path.is_file():
188
+ continue
189
+ rel_path = file_path.relative_to(directory)
190
+ rel_str = rel_path.as_posix()
191
+ if spec and spec.match_file(rel_str):
192
+ continue
193
+ yield file_path, rel_path
194
+
195
+
196
+ def directory_to_tar_bytes(
197
+ directory_path: Path,
198
+ *,
199
+ respect_gitignore: bool = False,
200
+ respect_dockerignore: bool = False,
201
+ respect_hudignore: bool = True,
202
+ ) -> bytes:
203
+ """
204
+ Converts a directory to a tar archive and returns it as bytes.
205
+
206
+ By default the archive respects ignore rules defined in ``.gitignore``,
207
+ ``.dockerignore`` and ``.hudignore`` (each can be disabled via kwargs).
101
208
  """
102
209
  output = io.BytesIO()
103
210
 
104
211
  with tarfile.open(fileobj=output, mode="w") as tar:
105
- # Walk through the directory
106
- for file_path in directory_path.rglob("*"):
107
- if file_path.is_file():
108
- # Calculate relative path for the archive
109
- rel_path = file_path.relative_to(directory_path)
110
- logger.debug("Adding %s to tar archive", rel_path)
111
- tar.add(file_path, arcname=str(rel_path))
112
-
113
- # Get the bytes from the BytesIO object
212
+ for file_path, rel_path in _iter_files(
213
+ directory_path,
214
+ respect_gitignore=respect_gitignore,
215
+ respect_dockerignore=respect_dockerignore,
216
+ respect_hudignore=respect_hudignore,
217
+ ):
218
+ logger.debug("Adding %s to tar archive", rel_path)
219
+ tar.add(file_path, arcname=str(rel_path))
220
+
114
221
  output.seek(0)
115
222
  return output.getvalue()
116
223
 
117
224
 
118
- def directory_to_zip_bytes(context_dir: Path) -> bytes:
119
- """Zip a directory and return the zip archive as bytes."""
225
+ def directory_to_zip_bytes(
226
+ context_dir: Path,
227
+ *,
228
+ respect_gitignore: bool = False,
229
+ respect_dockerignore: bool = False,
230
+ respect_hudignore: bool = True,
231
+ ) -> bytes:
232
+ """Zip *context_dir* and return the zip archive as bytes, respecting ignore rules."""
120
233
  output = io.BytesIO()
121
234
  with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as zipf:
122
- for file_path in context_dir.rglob("*"):
123
- if file_path.is_file():
124
- rel_path = file_path.relative_to(context_dir)
125
- logger.debug("Adding %s to zip archive", rel_path)
126
- zipf.write(str(file_path), arcname=str(rel_path))
235
+ for file_path, rel_path in _iter_files(
236
+ context_dir,
237
+ respect_gitignore=respect_gitignore,
238
+ respect_dockerignore=respect_dockerignore,
239
+ respect_hudignore=respect_hudignore,
240
+ ):
241
+ logger.debug("Adding %s to zip archive", rel_path)
242
+ zipf.write(str(file_path), arcname=str(rel_path))
127
243
  return output.getvalue()
128
244
 
129
245
 
hud/utils/config.py CHANGED
@@ -103,6 +103,17 @@ def expand_config(config: FunctionConfigs) -> list[FunctionConfig]:
103
103
 
104
104
  return [FunctionConfig(function=function_name, args=args)]
105
105
 
106
+ if isinstance(config, list):
107
+ result = []
108
+ for item in config:
109
+ if isinstance(item, tuple) and len(item) >= 1 and isinstance(item[0], str):
110
+ function_name = item[0]
111
+ args = list(item[1:]) if len(item) > 1 else []
112
+ result.append(FunctionConfig(function=function_name, args=args))
113
+ else:
114
+ raise ValueError(f"Invalid list item configuration: {item}")
115
+ return result
116
+
106
117
  # Unknown configuration type
107
118
  error_msg = f"Unknown configuration type: {type(config)}"
108
119
  logger.error(error_msg)