agentpool 2.1.9__py3-none-any.whl → 2.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acp/__init__.py +13 -0
- acp/bridge/README.md +15 -2
- acp/bridge/__init__.py +3 -2
- acp/bridge/__main__.py +60 -19
- acp/bridge/ws_server.py +173 -0
- acp/bridge/ws_server_cli.py +89 -0
- acp/notifications.py +2 -1
- acp/stdio.py +39 -9
- acp/transports.py +362 -2
- acp/utils.py +15 -2
- agentpool/__init__.py +4 -1
- agentpool/agents/__init__.py +2 -0
- agentpool/agents/acp_agent/acp_agent.py +203 -88
- agentpool/agents/acp_agent/acp_converters.py +46 -21
- agentpool/agents/acp_agent/client_handler.py +157 -3
- agentpool/agents/acp_agent/session_state.py +4 -1
- agentpool/agents/agent.py +314 -107
- agentpool/agents/agui_agent/__init__.py +0 -2
- agentpool/agents/agui_agent/agui_agent.py +90 -21
- agentpool/agents/agui_agent/agui_converters.py +0 -131
- agentpool/agents/base_agent.py +163 -1
- agentpool/agents/claude_code_agent/claude_code_agent.py +626 -179
- agentpool/agents/claude_code_agent/converters.py +71 -3
- agentpool/agents/claude_code_agent/history.py +474 -0
- agentpool/agents/context.py +40 -0
- agentpool/agents/events/__init__.py +2 -0
- agentpool/agents/events/builtin_handlers.py +2 -1
- agentpool/agents/events/event_emitter.py +29 -2
- agentpool/agents/events/events.py +20 -0
- agentpool/agents/modes.py +54 -0
- agentpool/agents/tool_call_accumulator.py +213 -0
- agentpool/common_types.py +21 -0
- agentpool/config_resources/__init__.py +38 -1
- agentpool/config_resources/claude_code_agent.yml +3 -0
- agentpool/delegation/pool.py +37 -29
- agentpool/delegation/team.py +1 -0
- agentpool/delegation/teamrun.py +1 -0
- agentpool/diagnostics/__init__.py +53 -0
- agentpool/diagnostics/lsp_manager.py +1593 -0
- agentpool/diagnostics/lsp_proxy.py +41 -0
- agentpool/diagnostics/lsp_proxy_script.py +229 -0
- agentpool/diagnostics/models.py +398 -0
- agentpool/mcp_server/__init__.py +0 -2
- agentpool/mcp_server/client.py +12 -3
- agentpool/mcp_server/manager.py +25 -31
- agentpool/mcp_server/registries/official_registry_client.py +25 -0
- agentpool/mcp_server/tool_bridge.py +78 -66
- agentpool/messaging/__init__.py +0 -2
- agentpool/messaging/compaction.py +72 -197
- agentpool/messaging/message_history.py +12 -0
- agentpool/messaging/messages.py +52 -9
- agentpool/messaging/processing.py +3 -1
- agentpool/models/acp_agents/base.py +0 -22
- agentpool/models/acp_agents/mcp_capable.py +8 -148
- agentpool/models/acp_agents/non_mcp.py +129 -72
- agentpool/models/agents.py +35 -13
- agentpool/models/claude_code_agents.py +33 -2
- agentpool/models/manifest.py +43 -0
- agentpool/repomap.py +1 -1
- agentpool/resource_providers/__init__.py +9 -1
- agentpool/resource_providers/aggregating.py +52 -3
- agentpool/resource_providers/base.py +57 -1
- agentpool/resource_providers/mcp_provider.py +23 -0
- agentpool/resource_providers/plan_provider.py +130 -41
- agentpool/resource_providers/pool.py +2 -0
- agentpool/resource_providers/static.py +2 -0
- agentpool/sessions/__init__.py +2 -1
- agentpool/sessions/manager.py +31 -2
- agentpool/sessions/models.py +50 -0
- agentpool/skills/registry.py +13 -8
- agentpool/storage/manager.py +217 -1
- agentpool/testing.py +537 -19
- agentpool/utils/file_watcher.py +269 -0
- agentpool/utils/identifiers.py +121 -0
- agentpool/utils/pydantic_ai_helpers.py +46 -0
- agentpool/utils/streams.py +690 -1
- agentpool/utils/subprocess_utils.py +155 -0
- agentpool/utils/token_breakdown.py +461 -0
- {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/METADATA +27 -7
- {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/RECORD +170 -112
- {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/WHEEL +1 -1
- agentpool_cli/__main__.py +4 -0
- agentpool_cli/serve_acp.py +41 -20
- agentpool_cli/serve_agui.py +87 -0
- agentpool_cli/serve_opencode.py +119 -0
- agentpool_commands/__init__.py +30 -0
- agentpool_commands/agents.py +74 -1
- agentpool_commands/history.py +62 -0
- agentpool_commands/mcp.py +176 -0
- agentpool_commands/models.py +56 -3
- agentpool_commands/tools.py +57 -0
- agentpool_commands/utils.py +51 -0
- agentpool_config/builtin_tools.py +77 -22
- agentpool_config/commands.py +24 -1
- agentpool_config/compaction.py +258 -0
- agentpool_config/mcp_server.py +131 -1
- agentpool_config/storage.py +46 -1
- agentpool_config/tools.py +7 -1
- agentpool_config/toolsets.py +92 -148
- agentpool_server/acp_server/acp_agent.py +134 -150
- agentpool_server/acp_server/commands/acp_commands.py +216 -51
- agentpool_server/acp_server/commands/docs_commands/fetch_repo.py +10 -10
- agentpool_server/acp_server/server.py +23 -79
- agentpool_server/acp_server/session.py +181 -19
- agentpool_server/opencode_server/.rules +95 -0
- agentpool_server/opencode_server/ENDPOINTS.md +362 -0
- agentpool_server/opencode_server/__init__.py +27 -0
- agentpool_server/opencode_server/command_validation.py +172 -0
- agentpool_server/opencode_server/converters.py +869 -0
- agentpool_server/opencode_server/dependencies.py +24 -0
- agentpool_server/opencode_server/input_provider.py +269 -0
- agentpool_server/opencode_server/models/__init__.py +228 -0
- agentpool_server/opencode_server/models/agent.py +53 -0
- agentpool_server/opencode_server/models/app.py +60 -0
- agentpool_server/opencode_server/models/base.py +26 -0
- agentpool_server/opencode_server/models/common.py +23 -0
- agentpool_server/opencode_server/models/config.py +37 -0
- agentpool_server/opencode_server/models/events.py +647 -0
- agentpool_server/opencode_server/models/file.py +88 -0
- agentpool_server/opencode_server/models/mcp.py +25 -0
- agentpool_server/opencode_server/models/message.py +162 -0
- agentpool_server/opencode_server/models/parts.py +190 -0
- agentpool_server/opencode_server/models/provider.py +81 -0
- agentpool_server/opencode_server/models/pty.py +43 -0
- agentpool_server/opencode_server/models/session.py +99 -0
- agentpool_server/opencode_server/routes/__init__.py +25 -0
- agentpool_server/opencode_server/routes/agent_routes.py +442 -0
- agentpool_server/opencode_server/routes/app_routes.py +139 -0
- agentpool_server/opencode_server/routes/config_routes.py +241 -0
- agentpool_server/opencode_server/routes/file_routes.py +392 -0
- agentpool_server/opencode_server/routes/global_routes.py +94 -0
- agentpool_server/opencode_server/routes/lsp_routes.py +319 -0
- agentpool_server/opencode_server/routes/message_routes.py +705 -0
- agentpool_server/opencode_server/routes/pty_routes.py +299 -0
- agentpool_server/opencode_server/routes/session_routes.py +1205 -0
- agentpool_server/opencode_server/routes/tui_routes.py +139 -0
- agentpool_server/opencode_server/server.py +430 -0
- agentpool_server/opencode_server/state.py +121 -0
- agentpool_server/opencode_server/time_utils.py +8 -0
- agentpool_storage/__init__.py +16 -0
- agentpool_storage/base.py +103 -0
- agentpool_storage/claude_provider.py +907 -0
- agentpool_storage/file_provider.py +129 -0
- agentpool_storage/memory_provider.py +61 -0
- agentpool_storage/models.py +3 -0
- agentpool_storage/opencode_provider.py +730 -0
- agentpool_storage/project_store.py +325 -0
- agentpool_storage/session_store.py +6 -0
- agentpool_storage/sql_provider/__init__.py +4 -2
- agentpool_storage/sql_provider/models.py +48 -0
- agentpool_storage/sql_provider/sql_provider.py +134 -1
- agentpool_storage/sql_provider/utils.py +10 -1
- agentpool_storage/text_log_provider.py +1 -0
- agentpool_toolsets/builtin/__init__.py +0 -8
- agentpool_toolsets/builtin/code.py +95 -56
- agentpool_toolsets/builtin/debug.py +16 -21
- agentpool_toolsets/builtin/execution_environment.py +99 -103
- agentpool_toolsets/builtin/file_edit/file_edit.py +115 -7
- agentpool_toolsets/builtin/skills.py +86 -4
- agentpool_toolsets/fsspec_toolset/__init__.py +13 -1
- agentpool_toolsets/fsspec_toolset/diagnostics.py +860 -73
- agentpool_toolsets/fsspec_toolset/grep.py +74 -2
- agentpool_toolsets/fsspec_toolset/image_utils.py +161 -0
- agentpool_toolsets/fsspec_toolset/toolset.py +159 -38
- agentpool_toolsets/mcp_discovery/__init__.py +5 -0
- agentpool_toolsets/mcp_discovery/data/mcp_servers.parquet +0 -0
- agentpool_toolsets/mcp_discovery/toolset.py +454 -0
- agentpool_toolsets/mcp_run_toolset.py +84 -6
- agentpool_toolsets/builtin/agent_management.py +0 -239
- agentpool_toolsets/builtin/history.py +0 -36
- agentpool_toolsets/builtin/integration.py +0 -85
- agentpool_toolsets/builtin/tool_management.py +0 -90
- {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/entry_points.txt +0 -0
- {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/licenses/LICENSE +0 -0
agentpool/testing.py
CHANGED
|
@@ -1,32 +1,35 @@
|
|
|
1
|
-
"""Testing utilities for end-to-end ACP testing.
|
|
1
|
+
"""Testing utilities for end-to-end ACP testing and CI integration.
|
|
2
2
|
|
|
3
|
-
This module provides
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
This module provides:
|
|
4
|
+
- A lightweight test harness for running end-to-end tests against the agentpool
|
|
5
|
+
ACP server using ACPAgent as the client
|
|
6
|
+
- GitHub CI integration for programmatically triggering and monitoring workflow runs
|
|
6
7
|
|
|
7
8
|
Example:
|
|
8
9
|
```python
|
|
10
|
+
# ACP testing
|
|
9
11
|
async def test_basic_prompt():
|
|
10
12
|
async with acp_test_session("tests/fixtures/simple.yml") as agent:
|
|
11
13
|
result = await agent.run("Say hello")
|
|
12
14
|
assert result.content
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
) as agent:
|
|
20
|
-
result = await agent.run("List files in the current directory")
|
|
21
|
-
assert "pyproject.toml" in result.content
|
|
16
|
+
# CI testing
|
|
17
|
+
async def test_commit_in_ci():
|
|
18
|
+
result = await run_ci_tests("abc123") # or "HEAD"
|
|
19
|
+
assert result.all_passed
|
|
20
|
+
print(result.summary())
|
|
22
21
|
```
|
|
23
22
|
"""
|
|
24
23
|
|
|
25
24
|
from __future__ import annotations
|
|
26
25
|
|
|
26
|
+
import asyncio
|
|
27
27
|
from contextlib import asynccontextmanager
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
import json
|
|
28
30
|
from pathlib import Path
|
|
29
|
-
|
|
31
|
+
import subprocess
|
|
32
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
if TYPE_CHECKING:
|
|
@@ -44,7 +47,6 @@ async def acp_test_session(
|
|
|
44
47
|
*,
|
|
45
48
|
file_access: bool = True,
|
|
46
49
|
terminal_access: bool = True,
|
|
47
|
-
providers: list[str] | None = None,
|
|
48
50
|
debug_messages: bool = False,
|
|
49
51
|
debug_file: str | None = None,
|
|
50
52
|
debug_commands: bool = False,
|
|
@@ -63,7 +65,6 @@ async def acp_test_session(
|
|
|
63
65
|
config: Path to agent configuration YAML file. If None, uses default config.
|
|
64
66
|
file_access: Enable file system access for agents.
|
|
65
67
|
terminal_access: Enable terminal access for agents.
|
|
66
|
-
providers: Model providers to search for models.
|
|
67
68
|
debug_messages: Save raw JSON-RPC messages to debug file.
|
|
68
69
|
debug_file: File path for JSON-RPC debug messages.
|
|
69
70
|
debug_commands: Enable debug slash commands for testing.
|
|
@@ -98,10 +99,6 @@ async def acp_test_session(
|
|
|
98
99
|
if not terminal_access:
|
|
99
100
|
args.append("--no-terminal-access")
|
|
100
101
|
|
|
101
|
-
if providers:
|
|
102
|
-
for provider in providers:
|
|
103
|
-
args.extend(["--model-provider", provider])
|
|
104
|
-
|
|
105
102
|
if debug_messages:
|
|
106
103
|
args.append("--debug-messages")
|
|
107
104
|
|
|
@@ -127,3 +124,524 @@ async def acp_test_session(
|
|
|
127
124
|
event_handlers=event_handlers,
|
|
128
125
|
) as acp_agent:
|
|
129
126
|
yield acp_agent
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# --- GitHub CI Testing ---
|
|
130
|
+
|
|
131
|
+
CheckResult = Literal["success", "failure", "skipped", "cancelled", "pending"]
|
|
132
|
+
OSChoice = Literal["ubuntu-latest", "macos-latest", "windows-latest"]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class CITestResult:
|
|
137
|
+
"""Result of a CI test run."""
|
|
138
|
+
|
|
139
|
+
commit: str
|
|
140
|
+
"""The commit SHA that was tested."""
|
|
141
|
+
|
|
142
|
+
run_id: int
|
|
143
|
+
"""GitHub Actions run ID."""
|
|
144
|
+
|
|
145
|
+
run_url: str
|
|
146
|
+
"""URL to the workflow run."""
|
|
147
|
+
|
|
148
|
+
lint: CheckResult = "pending"
|
|
149
|
+
"""Result of ruff check."""
|
|
150
|
+
|
|
151
|
+
format: CheckResult = "pending"
|
|
152
|
+
"""Result of ruff format check."""
|
|
153
|
+
|
|
154
|
+
typecheck: CheckResult = "pending"
|
|
155
|
+
"""Result of mypy type checking."""
|
|
156
|
+
|
|
157
|
+
test: CheckResult = "pending"
|
|
158
|
+
"""Result of pytest."""
|
|
159
|
+
|
|
160
|
+
os: str = "ubuntu-latest"
|
|
161
|
+
"""Operating system used for the run."""
|
|
162
|
+
|
|
163
|
+
python_version: str = "3.13"
|
|
164
|
+
"""Python version used for the run."""
|
|
165
|
+
|
|
166
|
+
duration_seconds: float = 0.0
|
|
167
|
+
"""Total duration of the CI run."""
|
|
168
|
+
|
|
169
|
+
raw_jobs: list[dict[str, Any]] = field(default_factory=list)
|
|
170
|
+
"""Raw job data from GitHub API."""
|
|
171
|
+
|
|
172
|
+
failed_logs: str | None = None
|
|
173
|
+
"""Logs from failed steps (fetched on demand)."""
|
|
174
|
+
|
|
175
|
+
_repo: str | None = field(default=None, repr=False)
|
|
176
|
+
"""Repository for fetching logs."""
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def all_passed(self) -> bool:
|
|
180
|
+
"""Check if all enabled checks passed (skipped checks are ignored)."""
|
|
181
|
+
return all(
|
|
182
|
+
result in ("success", "skipped")
|
|
183
|
+
for result in [self.lint, self.format, self.typecheck, self.test]
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def any_failed(self) -> bool:
|
|
188
|
+
"""Check if any check failed."""
|
|
189
|
+
return any(
|
|
190
|
+
result == "failure" for result in [self.lint, self.format, self.typecheck, self.test]
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def summary(self) -> str:
|
|
194
|
+
"""Generate a human-readable summary."""
|
|
195
|
+
status_icons = {
|
|
196
|
+
"success": "✓",
|
|
197
|
+
"failure": "✗",
|
|
198
|
+
"skipped": "○",
|
|
199
|
+
"cancelled": "⊘",
|
|
200
|
+
"pending": "…",
|
|
201
|
+
}
|
|
202
|
+
lines = [
|
|
203
|
+
f"CI Results for {self.commit[:8]}",
|
|
204
|
+
f"Run: {self.run_url}",
|
|
205
|
+
f"OS: {self.os} | Python: {self.python_version}",
|
|
206
|
+
"",
|
|
207
|
+
f" {status_icons[self.lint]} Lint (ruff check): {self.lint}",
|
|
208
|
+
f" {status_icons[self.format]} Format (ruff format): {self.format}",
|
|
209
|
+
f" {status_icons[self.typecheck]} Type check (mypy): {self.typecheck}",
|
|
210
|
+
f" {status_icons[self.test]} Tests (pytest): {self.test}",
|
|
211
|
+
"",
|
|
212
|
+
f"Duration: {self.duration_seconds:.1f}s",
|
|
213
|
+
]
|
|
214
|
+
return "\n".join(lines)
|
|
215
|
+
|
|
216
|
+
def fetch_failed_logs(self, max_lines: int = 200) -> str:
|
|
217
|
+
"""Fetch logs from failed steps.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
max_lines: Maximum number of log lines to return.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Log output from failed steps, or empty string if no failures.
|
|
224
|
+
"""
|
|
225
|
+
if not self.any_failed:
|
|
226
|
+
return ""
|
|
227
|
+
|
|
228
|
+
repo_args = ["-R", self._repo] if self._repo else []
|
|
229
|
+
try:
|
|
230
|
+
result = subprocess.run(
|
|
231
|
+
["gh", "run", "view", str(self.run_id), "--log-failed", *repo_args],
|
|
232
|
+
capture_output=True,
|
|
233
|
+
text=True,
|
|
234
|
+
check=True,
|
|
235
|
+
)
|
|
236
|
+
lines = result.stdout.strip().split("\n")
|
|
237
|
+
# Return last N lines (most relevant)
|
|
238
|
+
if len(lines) > max_lines:
|
|
239
|
+
lines = lines[-max_lines:]
|
|
240
|
+
self.failed_logs = "\n".join(lines)
|
|
241
|
+
except subprocess.CalledProcessError:
|
|
242
|
+
return ""
|
|
243
|
+
else:
|
|
244
|
+
return self.failed_logs
|
|
245
|
+
|
|
246
|
+
def get_failure_summary(self, max_lines: int = 50) -> str:
|
|
247
|
+
"""Get a concise summary of failures.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Summary including the test/check that failed and key error lines.
|
|
251
|
+
"""
|
|
252
|
+
logs = self.fetch_failed_logs(max_lines=max_lines * 2)
|
|
253
|
+
if not logs:
|
|
254
|
+
return "No failure logs available."
|
|
255
|
+
|
|
256
|
+
# Extract key lines (errors, failures, assertions)
|
|
257
|
+
key_patterns = ["FAILED", "Error", "error:", "AssertionError", "Timeout", "Exception"]
|
|
258
|
+
key_lines = []
|
|
259
|
+
for line in logs.split("\n"):
|
|
260
|
+
if any(p in line for p in key_patterns):
|
|
261
|
+
# Clean up the line (remove timestamp prefix)
|
|
262
|
+
parts = line.split("\t")
|
|
263
|
+
if len(parts) >= 3: # noqa: PLR2004
|
|
264
|
+
key_lines.append(parts[-1].strip())
|
|
265
|
+
else:
|
|
266
|
+
key_lines.append(line.strip())
|
|
267
|
+
|
|
268
|
+
if key_lines:
|
|
269
|
+
return "\n".join(key_lines[:max_lines])
|
|
270
|
+
# Fall back to last N lines
|
|
271
|
+
return "\n".join(logs.split("\n")[-max_lines:])
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _run_gh(*args: str) -> str:
|
|
275
|
+
"""Run a gh CLI command and return output."""
|
|
276
|
+
result = subprocess.run(
|
|
277
|
+
["gh", *args],
|
|
278
|
+
capture_output=True,
|
|
279
|
+
text=True,
|
|
280
|
+
check=True,
|
|
281
|
+
)
|
|
282
|
+
return result.stdout.strip()
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _resolve_commit(commit: str) -> str:
|
|
286
|
+
"""Resolve a commit reference to a full SHA."""
|
|
287
|
+
if commit.upper() == "HEAD":
|
|
288
|
+
result = subprocess.run(
|
|
289
|
+
["git", "rev-parse", "HEAD"],
|
|
290
|
+
capture_output=True,
|
|
291
|
+
text=True,
|
|
292
|
+
check=True,
|
|
293
|
+
)
|
|
294
|
+
return result.stdout.strip()
|
|
295
|
+
return commit
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
async def run_ci_tests(
|
|
299
|
+
commit: str = "HEAD",
|
|
300
|
+
*,
|
|
301
|
+
repo: str | None = None,
|
|
302
|
+
poll_interval: float = 10.0,
|
|
303
|
+
timeout: float = 600.0,
|
|
304
|
+
os: OSChoice = "ubuntu-latest",
|
|
305
|
+
python_version: str = "3.13",
|
|
306
|
+
run_lint: bool = True,
|
|
307
|
+
run_format: bool = True,
|
|
308
|
+
run_typecheck: bool = True,
|
|
309
|
+
test_command: str | None = "pytest --tb=short",
|
|
310
|
+
) -> CITestResult:
|
|
311
|
+
"""Trigger CI tests for a commit and wait for results.
|
|
312
|
+
|
|
313
|
+
This function triggers the test-commit.yml workflow via the GitHub CLI,
|
|
314
|
+
polls for completion, and returns structured results.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
commit: Commit SHA or "HEAD" to test. Defaults to HEAD.
|
|
318
|
+
repo: Repository in "owner/repo" format. Auto-detected if None.
|
|
319
|
+
poll_interval: Seconds between status checks. Defaults to 10.
|
|
320
|
+
timeout: Maximum seconds to wait for completion. Defaults to 600 (10 min).
|
|
321
|
+
os: Operating system to run on. Defaults to "ubuntu-latest".
|
|
322
|
+
python_version: Python version to use. Defaults to "3.13".
|
|
323
|
+
run_lint: Whether to run ruff check. Defaults to True.
|
|
324
|
+
run_format: Whether to run ruff format check. Defaults to True.
|
|
325
|
+
run_typecheck: Whether to run mypy type checking. Defaults to True.
|
|
326
|
+
test_command: Pytest command to run, or None to skip tests.
|
|
327
|
+
Defaults to "pytest --tb=short". Use "-k pattern" to filter tests.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
CITestResult with individual check results.
|
|
331
|
+
|
|
332
|
+
Raises:
|
|
333
|
+
TimeoutError: If the workflow doesn't complete within timeout.
|
|
334
|
+
subprocess.CalledProcessError: If gh CLI commands fail.
|
|
335
|
+
|
|
336
|
+
Example:
|
|
337
|
+
```python
|
|
338
|
+
# Run all checks
|
|
339
|
+
result = await run_ci_tests("abc123")
|
|
340
|
+
|
|
341
|
+
# Run specific test on Windows
|
|
342
|
+
result = await run_ci_tests(
|
|
343
|
+
"abc123",
|
|
344
|
+
os="windows-latest",
|
|
345
|
+
run_lint=False,
|
|
346
|
+
run_format=False,
|
|
347
|
+
run_typecheck=False,
|
|
348
|
+
test_command="pytest -k test_acp_agent --tb=short",
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if result.all_passed:
|
|
352
|
+
print("All checks passed!")
|
|
353
|
+
else:
|
|
354
|
+
print(result.summary())
|
|
355
|
+
```
|
|
356
|
+
"""
|
|
357
|
+
import time
|
|
358
|
+
|
|
359
|
+
commit_sha = _resolve_commit(commit)
|
|
360
|
+
start_time = time.monotonic()
|
|
361
|
+
|
|
362
|
+
# Build repo flag if specified
|
|
363
|
+
repo_args = ["-R", repo] if repo else []
|
|
364
|
+
|
|
365
|
+
# Trigger the workflow with parameters
|
|
366
|
+
workflow_args = [
|
|
367
|
+
"workflow",
|
|
368
|
+
"run",
|
|
369
|
+
"test-commit.yml",
|
|
370
|
+
"-f",
|
|
371
|
+
f"commit={commit_sha}",
|
|
372
|
+
"-f",
|
|
373
|
+
f"os={os}",
|
|
374
|
+
"-f",
|
|
375
|
+
f"python_version={python_version}",
|
|
376
|
+
"-f",
|
|
377
|
+
f"run_lint={str(run_lint).lower()}",
|
|
378
|
+
"-f",
|
|
379
|
+
f"run_format={str(run_format).lower()}",
|
|
380
|
+
"-f",
|
|
381
|
+
f"run_typecheck={str(run_typecheck).lower()}",
|
|
382
|
+
"-f",
|
|
383
|
+
f"test_command={test_command or ''}",
|
|
384
|
+
*repo_args,
|
|
385
|
+
]
|
|
386
|
+
_run_gh(*workflow_args)
|
|
387
|
+
|
|
388
|
+
# Wait a moment for the run to be created
|
|
389
|
+
await asyncio.sleep(2)
|
|
390
|
+
|
|
391
|
+
# Find the run ID
|
|
392
|
+
runs_json = _run_gh(
|
|
393
|
+
"run",
|
|
394
|
+
"list",
|
|
395
|
+
"--workflow=test-commit.yml",
|
|
396
|
+
"--json=databaseId,headSha,status,url",
|
|
397
|
+
"--limit=5",
|
|
398
|
+
*repo_args,
|
|
399
|
+
)
|
|
400
|
+
runs = json.loads(runs_json)
|
|
401
|
+
|
|
402
|
+
# Find the run for our commit
|
|
403
|
+
run_id: int | None = None
|
|
404
|
+
run_url = ""
|
|
405
|
+
for run in runs:
|
|
406
|
+
# Match by commit SHA (workflow dispatch uses the branch HEAD, but we can match)
|
|
407
|
+
if run["status"] in ("queued", "in_progress", "pending"):
|
|
408
|
+
run_id = run["databaseId"]
|
|
409
|
+
run_url = run["url"]
|
|
410
|
+
break
|
|
411
|
+
|
|
412
|
+
if run_id is None:
|
|
413
|
+
msg = f"Could not find workflow run for commit {commit_sha}"
|
|
414
|
+
raise RuntimeError(msg)
|
|
415
|
+
|
|
416
|
+
# Poll for completion
|
|
417
|
+
while True:
|
|
418
|
+
elapsed = time.monotonic() - start_time
|
|
419
|
+
if elapsed > timeout:
|
|
420
|
+
msg = f"Workflow run {run_id} did not complete within {timeout}s"
|
|
421
|
+
raise TimeoutError(msg)
|
|
422
|
+
|
|
423
|
+
run_json = _run_gh(
|
|
424
|
+
"run",
|
|
425
|
+
"view",
|
|
426
|
+
str(run_id),
|
|
427
|
+
"--json=status,conclusion,jobs",
|
|
428
|
+
*repo_args,
|
|
429
|
+
)
|
|
430
|
+
run_data = json.loads(run_json)
|
|
431
|
+
|
|
432
|
+
if run_data["status"] == "completed":
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
await asyncio.sleep(poll_interval)
|
|
436
|
+
|
|
437
|
+
# Parse job results
|
|
438
|
+
duration = time.monotonic() - start_time
|
|
439
|
+
jobs = run_data.get("jobs", [])
|
|
440
|
+
|
|
441
|
+
run_test = test_command is not None and test_command != ""
|
|
442
|
+
|
|
443
|
+
result = CITestResult(
|
|
444
|
+
commit=commit_sha,
|
|
445
|
+
run_id=run_id,
|
|
446
|
+
run_url=run_url,
|
|
447
|
+
os=os,
|
|
448
|
+
python_version=python_version,
|
|
449
|
+
duration_seconds=duration,
|
|
450
|
+
raw_jobs=jobs,
|
|
451
|
+
_repo=repo,
|
|
452
|
+
# Set skipped for disabled checks
|
|
453
|
+
lint="skipped" if not run_lint else "pending",
|
|
454
|
+
format="skipped" if not run_format else "pending",
|
|
455
|
+
typecheck="skipped" if not run_typecheck else "pending",
|
|
456
|
+
test="skipped" if not run_test else "pending",
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Map job names to results (only for enabled checks)
|
|
460
|
+
for job in jobs:
|
|
461
|
+
name = job.get("name", "").lower()
|
|
462
|
+
conclusion = job.get("conclusion", "pending")
|
|
463
|
+
|
|
464
|
+
# Normalize conclusion to our type
|
|
465
|
+
if conclusion not in ("success", "failure", "skipped", "cancelled"):
|
|
466
|
+
conclusion = "pending"
|
|
467
|
+
|
|
468
|
+
if "lint" in name and "format" not in name and run_lint:
|
|
469
|
+
result.lint = conclusion
|
|
470
|
+
elif "format" in name and run_format:
|
|
471
|
+
result.format = conclusion
|
|
472
|
+
elif ("type" in name or "mypy" in name) and run_typecheck:
|
|
473
|
+
result.typecheck = conclusion
|
|
474
|
+
elif ("test" in name or "pytest" in name) and run_test:
|
|
475
|
+
result.test = conclusion
|
|
476
|
+
|
|
477
|
+
return result
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@dataclass
|
|
481
|
+
class BisectResult:
|
|
482
|
+
"""Result of a CI bisect operation."""
|
|
483
|
+
|
|
484
|
+
first_bad_commit: str
|
|
485
|
+
"""The first commit that failed the checks."""
|
|
486
|
+
|
|
487
|
+
last_good_commit: str
|
|
488
|
+
"""The last commit that passed the checks."""
|
|
489
|
+
|
|
490
|
+
commits_tested: list[CITestResult] = field(default_factory=list)
|
|
491
|
+
"""Results for all commits tested during bisection."""
|
|
492
|
+
|
|
493
|
+
total_commits_in_range: int = 0
|
|
494
|
+
"""Total number of commits in the range (good, bad]."""
|
|
495
|
+
|
|
496
|
+
steps_taken: int = 0
|
|
497
|
+
"""Number of bisection steps performed."""
|
|
498
|
+
|
|
499
|
+
def summary(self) -> str:
|
|
500
|
+
"""Generate a human-readable summary."""
|
|
501
|
+
lines = [
|
|
502
|
+
"Bisect Results",
|
|
503
|
+
"=" * 40,
|
|
504
|
+
f"First bad commit: {self.first_bad_commit[:12]}",
|
|
505
|
+
f"Last good commit: {self.last_good_commit[:12]}",
|
|
506
|
+
f"Commits in range: {self.total_commits_in_range}",
|
|
507
|
+
f"Steps taken: {self.steps_taken}",
|
|
508
|
+
"",
|
|
509
|
+
"Tested commits:",
|
|
510
|
+
]
|
|
511
|
+
for result in self.commits_tested:
|
|
512
|
+
status = "✓" if result.all_passed else "✗"
|
|
513
|
+
lines.append(f" {status} {result.commit[:12]}")
|
|
514
|
+
return "\n".join(lines)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _get_commits_between(good: str, bad: str) -> list[str]:
|
|
518
|
+
"""Get list of commits between good and bad (exclusive of good, inclusive of bad)."""
|
|
519
|
+
result = subprocess.run(
|
|
520
|
+
["git", "rev-list", "--ancestry-path", f"{good}..{bad}"],
|
|
521
|
+
capture_output=True,
|
|
522
|
+
text=True,
|
|
523
|
+
check=True,
|
|
524
|
+
)
|
|
525
|
+
# Returns newest first, we want oldest first for bisection
|
|
526
|
+
commits = result.stdout.strip().split("\n")
|
|
527
|
+
return list(reversed(commits)) if commits[0] else []
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
async def bisect_ci(
|
|
531
|
+
good_commit: str,
|
|
532
|
+
bad_commit: str = "HEAD",
|
|
533
|
+
*,
|
|
534
|
+
repo: str | None = None,
|
|
535
|
+
poll_interval: float = 10.0,
|
|
536
|
+
timeout: float = 600.0,
|
|
537
|
+
os: OSChoice = "ubuntu-latest",
|
|
538
|
+
python_version: str = "3.13",
|
|
539
|
+
run_lint: bool = True,
|
|
540
|
+
run_format: bool = True,
|
|
541
|
+
run_typecheck: bool = True,
|
|
542
|
+
test_command: str | None = "pytest --tb=short",
|
|
543
|
+
) -> BisectResult:
|
|
544
|
+
"""Binary search to find the first commit that broke CI.
|
|
545
|
+
|
|
546
|
+
Uses git bisect logic to efficiently find the first bad commit
|
|
547
|
+
between a known good commit and a known bad commit.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
good_commit: A commit SHA known to pass all enabled checks.
|
|
551
|
+
bad_commit: A commit SHA known to fail. Defaults to HEAD.
|
|
552
|
+
repo: Repository in "owner/repo" format. Auto-detected if None.
|
|
553
|
+
poll_interval: Seconds between status checks. Defaults to 10.
|
|
554
|
+
timeout: Timeout per CI run in seconds. Defaults to 600.
|
|
555
|
+
os: Operating system to run on. Defaults to "ubuntu-latest".
|
|
556
|
+
python_version: Python version to use. Defaults to "3.13".
|
|
557
|
+
run_lint: Whether to run ruff check. Defaults to True.
|
|
558
|
+
run_format: Whether to run ruff format check. Defaults to True.
|
|
559
|
+
run_typecheck: Whether to run mypy type checking. Defaults to True.
|
|
560
|
+
test_command: Pytest command to run, or None to skip tests.
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
BisectResult with the first bad commit and bisection details.
|
|
564
|
+
|
|
565
|
+
Example:
|
|
566
|
+
```python
|
|
567
|
+
# Find which commit broke a specific test on Windows
|
|
568
|
+
result = await bisect_ci(
|
|
569
|
+
good_commit="abc123",
|
|
570
|
+
bad_commit="HEAD",
|
|
571
|
+
os="windows-latest",
|
|
572
|
+
run_lint=False,
|
|
573
|
+
run_format=False,
|
|
574
|
+
run_typecheck=False,
|
|
575
|
+
test_command="pytest -k test_acp_agent --tb=short",
|
|
576
|
+
)
|
|
577
|
+
print(f"Tests broke at: {result.first_bad_commit}")
|
|
578
|
+
```
|
|
579
|
+
"""
|
|
580
|
+
good_sha = _resolve_commit(good_commit)
|
|
581
|
+
bad_sha = _resolve_commit(bad_commit)
|
|
582
|
+
|
|
583
|
+
# Get all commits in range
|
|
584
|
+
commits = _get_commits_between(good_sha, bad_sha)
|
|
585
|
+
if not commits:
|
|
586
|
+
msg = f"No commits found between {good_sha[:12]} and {bad_sha[:12]}"
|
|
587
|
+
raise ValueError(msg)
|
|
588
|
+
|
|
589
|
+
tested: list[CITestResult] = []
|
|
590
|
+
left = 0
|
|
591
|
+
right = len(commits) - 1
|
|
592
|
+
steps = 0
|
|
593
|
+
|
|
594
|
+
# Binary search: find first bad commit
|
|
595
|
+
# Invariant: commits[left-1] is good (or left=0), commits[right] is bad
|
|
596
|
+
while left < right:
|
|
597
|
+
mid = (left + right) // 2
|
|
598
|
+
steps += 1
|
|
599
|
+
|
|
600
|
+
result = await run_ci_tests(
|
|
601
|
+
commits[mid],
|
|
602
|
+
repo=repo,
|
|
603
|
+
poll_interval=poll_interval,
|
|
604
|
+
timeout=timeout,
|
|
605
|
+
os=os,
|
|
606
|
+
python_version=python_version,
|
|
607
|
+
run_lint=run_lint,
|
|
608
|
+
run_format=run_format,
|
|
609
|
+
run_typecheck=run_typecheck,
|
|
610
|
+
test_command=test_command,
|
|
611
|
+
)
|
|
612
|
+
tested.append(result)
|
|
613
|
+
|
|
614
|
+
if result.all_passed:
|
|
615
|
+
# This commit is good, search in upper half
|
|
616
|
+
left = mid + 1
|
|
617
|
+
else:
|
|
618
|
+
# This commit is bad, search in lower half
|
|
619
|
+
right = mid
|
|
620
|
+
|
|
621
|
+
first_bad_sha = commits[right]
|
|
622
|
+
|
|
623
|
+
# Determine last good commit
|
|
624
|
+
last_good_sha = good_sha if right == 0 else commits[right - 1]
|
|
625
|
+
|
|
626
|
+
return BisectResult(
|
|
627
|
+
first_bad_commit=first_bad_sha,
|
|
628
|
+
last_good_commit=last_good_sha,
|
|
629
|
+
commits_tested=tested,
|
|
630
|
+
total_commits_in_range=len(commits),
|
|
631
|
+
steps_taken=steps,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
async def quick_ci_check(commit: str = "HEAD") -> bool:
|
|
636
|
+
"""Quick check if a commit passes all CI checks.
|
|
637
|
+
|
|
638
|
+
Convenience wrapper around run_ci_tests that returns a simple boolean.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
commit: Commit SHA or "HEAD" to test.
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
True if all checks passed, False otherwise.
|
|
645
|
+
"""
|
|
646
|
+
result = await run_ci_tests(commit)
|
|
647
|
+
return result.all_passed
|