agentpool 2.1.9__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. acp/__init__.py +13 -0
  2. acp/bridge/README.md +15 -2
  3. acp/bridge/__init__.py +3 -2
  4. acp/bridge/__main__.py +60 -19
  5. acp/bridge/ws_server.py +173 -0
  6. acp/bridge/ws_server_cli.py +89 -0
  7. acp/notifications.py +2 -1
  8. acp/stdio.py +39 -9
  9. acp/transports.py +362 -2
  10. acp/utils.py +15 -2
  11. agentpool/__init__.py +4 -1
  12. agentpool/agents/__init__.py +2 -0
  13. agentpool/agents/acp_agent/acp_agent.py +203 -88
  14. agentpool/agents/acp_agent/acp_converters.py +46 -21
  15. agentpool/agents/acp_agent/client_handler.py +157 -3
  16. agentpool/agents/acp_agent/session_state.py +4 -1
  17. agentpool/agents/agent.py +314 -107
  18. agentpool/agents/agui_agent/__init__.py +0 -2
  19. agentpool/agents/agui_agent/agui_agent.py +90 -21
  20. agentpool/agents/agui_agent/agui_converters.py +0 -131
  21. agentpool/agents/base_agent.py +163 -1
  22. agentpool/agents/claude_code_agent/claude_code_agent.py +626 -179
  23. agentpool/agents/claude_code_agent/converters.py +71 -3
  24. agentpool/agents/claude_code_agent/history.py +474 -0
  25. agentpool/agents/context.py +40 -0
  26. agentpool/agents/events/__init__.py +2 -0
  27. agentpool/agents/events/builtin_handlers.py +2 -1
  28. agentpool/agents/events/event_emitter.py +29 -2
  29. agentpool/agents/events/events.py +20 -0
  30. agentpool/agents/modes.py +54 -0
  31. agentpool/agents/tool_call_accumulator.py +213 -0
  32. agentpool/common_types.py +21 -0
  33. agentpool/config_resources/__init__.py +38 -1
  34. agentpool/config_resources/claude_code_agent.yml +3 -0
  35. agentpool/delegation/pool.py +37 -29
  36. agentpool/delegation/team.py +1 -0
  37. agentpool/delegation/teamrun.py +1 -0
  38. agentpool/diagnostics/__init__.py +53 -0
  39. agentpool/diagnostics/lsp_manager.py +1593 -0
  40. agentpool/diagnostics/lsp_proxy.py +41 -0
  41. agentpool/diagnostics/lsp_proxy_script.py +229 -0
  42. agentpool/diagnostics/models.py +398 -0
  43. agentpool/mcp_server/__init__.py +0 -2
  44. agentpool/mcp_server/client.py +12 -3
  45. agentpool/mcp_server/manager.py +25 -31
  46. agentpool/mcp_server/registries/official_registry_client.py +25 -0
  47. agentpool/mcp_server/tool_bridge.py +78 -66
  48. agentpool/messaging/__init__.py +0 -2
  49. agentpool/messaging/compaction.py +72 -197
  50. agentpool/messaging/message_history.py +12 -0
  51. agentpool/messaging/messages.py +52 -9
  52. agentpool/messaging/processing.py +3 -1
  53. agentpool/models/acp_agents/base.py +0 -22
  54. agentpool/models/acp_agents/mcp_capable.py +8 -148
  55. agentpool/models/acp_agents/non_mcp.py +129 -72
  56. agentpool/models/agents.py +35 -13
  57. agentpool/models/claude_code_agents.py +33 -2
  58. agentpool/models/manifest.py +43 -0
  59. agentpool/repomap.py +1 -1
  60. agentpool/resource_providers/__init__.py +9 -1
  61. agentpool/resource_providers/aggregating.py +52 -3
  62. agentpool/resource_providers/base.py +57 -1
  63. agentpool/resource_providers/mcp_provider.py +23 -0
  64. agentpool/resource_providers/plan_provider.py +130 -41
  65. agentpool/resource_providers/pool.py +2 -0
  66. agentpool/resource_providers/static.py +2 -0
  67. agentpool/sessions/__init__.py +2 -1
  68. agentpool/sessions/manager.py +31 -2
  69. agentpool/sessions/models.py +50 -0
  70. agentpool/skills/registry.py +13 -8
  71. agentpool/storage/manager.py +217 -1
  72. agentpool/testing.py +537 -19
  73. agentpool/utils/file_watcher.py +269 -0
  74. agentpool/utils/identifiers.py +121 -0
  75. agentpool/utils/pydantic_ai_helpers.py +46 -0
  76. agentpool/utils/streams.py +690 -1
  77. agentpool/utils/subprocess_utils.py +155 -0
  78. agentpool/utils/token_breakdown.py +461 -0
  79. {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/METADATA +27 -7
  80. {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/RECORD +170 -112
  81. {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/WHEEL +1 -1
  82. agentpool_cli/__main__.py +4 -0
  83. agentpool_cli/serve_acp.py +41 -20
  84. agentpool_cli/serve_agui.py +87 -0
  85. agentpool_cli/serve_opencode.py +119 -0
  86. agentpool_commands/__init__.py +30 -0
  87. agentpool_commands/agents.py +74 -1
  88. agentpool_commands/history.py +62 -0
  89. agentpool_commands/mcp.py +176 -0
  90. agentpool_commands/models.py +56 -3
  91. agentpool_commands/tools.py +57 -0
  92. agentpool_commands/utils.py +51 -0
  93. agentpool_config/builtin_tools.py +77 -22
  94. agentpool_config/commands.py +24 -1
  95. agentpool_config/compaction.py +258 -0
  96. agentpool_config/mcp_server.py +131 -1
  97. agentpool_config/storage.py +46 -1
  98. agentpool_config/tools.py +7 -1
  99. agentpool_config/toolsets.py +92 -148
  100. agentpool_server/acp_server/acp_agent.py +134 -150
  101. agentpool_server/acp_server/commands/acp_commands.py +216 -51
  102. agentpool_server/acp_server/commands/docs_commands/fetch_repo.py +10 -10
  103. agentpool_server/acp_server/server.py +23 -79
  104. agentpool_server/acp_server/session.py +181 -19
  105. agentpool_server/opencode_server/.rules +95 -0
  106. agentpool_server/opencode_server/ENDPOINTS.md +362 -0
  107. agentpool_server/opencode_server/__init__.py +27 -0
  108. agentpool_server/opencode_server/command_validation.py +172 -0
  109. agentpool_server/opencode_server/converters.py +869 -0
  110. agentpool_server/opencode_server/dependencies.py +24 -0
  111. agentpool_server/opencode_server/input_provider.py +269 -0
  112. agentpool_server/opencode_server/models/__init__.py +228 -0
  113. agentpool_server/opencode_server/models/agent.py +53 -0
  114. agentpool_server/opencode_server/models/app.py +60 -0
  115. agentpool_server/opencode_server/models/base.py +26 -0
  116. agentpool_server/opencode_server/models/common.py +23 -0
  117. agentpool_server/opencode_server/models/config.py +37 -0
  118. agentpool_server/opencode_server/models/events.py +647 -0
  119. agentpool_server/opencode_server/models/file.py +88 -0
  120. agentpool_server/opencode_server/models/mcp.py +25 -0
  121. agentpool_server/opencode_server/models/message.py +162 -0
  122. agentpool_server/opencode_server/models/parts.py +190 -0
  123. agentpool_server/opencode_server/models/provider.py +81 -0
  124. agentpool_server/opencode_server/models/pty.py +43 -0
  125. agentpool_server/opencode_server/models/session.py +99 -0
  126. agentpool_server/opencode_server/routes/__init__.py +25 -0
  127. agentpool_server/opencode_server/routes/agent_routes.py +442 -0
  128. agentpool_server/opencode_server/routes/app_routes.py +139 -0
  129. agentpool_server/opencode_server/routes/config_routes.py +241 -0
  130. agentpool_server/opencode_server/routes/file_routes.py +392 -0
  131. agentpool_server/opencode_server/routes/global_routes.py +94 -0
  132. agentpool_server/opencode_server/routes/lsp_routes.py +319 -0
  133. agentpool_server/opencode_server/routes/message_routes.py +705 -0
  134. agentpool_server/opencode_server/routes/pty_routes.py +299 -0
  135. agentpool_server/opencode_server/routes/session_routes.py +1205 -0
  136. agentpool_server/opencode_server/routes/tui_routes.py +139 -0
  137. agentpool_server/opencode_server/server.py +430 -0
  138. agentpool_server/opencode_server/state.py +121 -0
  139. agentpool_server/opencode_server/time_utils.py +8 -0
  140. agentpool_storage/__init__.py +16 -0
  141. agentpool_storage/base.py +103 -0
  142. agentpool_storage/claude_provider.py +907 -0
  143. agentpool_storage/file_provider.py +129 -0
  144. agentpool_storage/memory_provider.py +61 -0
  145. agentpool_storage/models.py +3 -0
  146. agentpool_storage/opencode_provider.py +730 -0
  147. agentpool_storage/project_store.py +325 -0
  148. agentpool_storage/session_store.py +6 -0
  149. agentpool_storage/sql_provider/__init__.py +4 -2
  150. agentpool_storage/sql_provider/models.py +48 -0
  151. agentpool_storage/sql_provider/sql_provider.py +134 -1
  152. agentpool_storage/sql_provider/utils.py +10 -1
  153. agentpool_storage/text_log_provider.py +1 -0
  154. agentpool_toolsets/builtin/__init__.py +0 -8
  155. agentpool_toolsets/builtin/code.py +95 -56
  156. agentpool_toolsets/builtin/debug.py +16 -21
  157. agentpool_toolsets/builtin/execution_environment.py +99 -103
  158. agentpool_toolsets/builtin/file_edit/file_edit.py +115 -7
  159. agentpool_toolsets/builtin/skills.py +86 -4
  160. agentpool_toolsets/fsspec_toolset/__init__.py +13 -1
  161. agentpool_toolsets/fsspec_toolset/diagnostics.py +860 -73
  162. agentpool_toolsets/fsspec_toolset/grep.py +74 -2
  163. agentpool_toolsets/fsspec_toolset/image_utils.py +161 -0
  164. agentpool_toolsets/fsspec_toolset/toolset.py +159 -38
  165. agentpool_toolsets/mcp_discovery/__init__.py +5 -0
  166. agentpool_toolsets/mcp_discovery/data/mcp_servers.parquet +0 -0
  167. agentpool_toolsets/mcp_discovery/toolset.py +454 -0
  168. agentpool_toolsets/mcp_run_toolset.py +84 -6
  169. agentpool_toolsets/builtin/agent_management.py +0 -239
  170. agentpool_toolsets/builtin/history.py +0 -36
  171. agentpool_toolsets/builtin/integration.py +0 -85
  172. agentpool_toolsets/builtin/tool_management.py +0 -90
  173. {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/entry_points.txt +0 -0
  174. {agentpool-2.1.9.dist-info → agentpool-2.2.3.dist-info}/licenses/LICENSE +0 -0
agentpool/testing.py CHANGED
@@ -1,32 +1,35 @@
1
- """Testing utilities for end-to-end ACP testing.
1
+ """Testing utilities for end-to-end ACP testing and CI integration.
2
2
 
3
- This module provides a lightweight test harness for running end-to-end tests
4
- against the agentpool ACP server. It uses ACPAgent as the client, connecting
5
- to a agentpool serve-acp subprocess.
3
+ This module provides:
4
+ - A lightweight test harness for running end-to-end tests against the agentpool
5
+ ACP server using ACPAgent as the client
6
+ - GitHub CI integration for programmatically triggering and monitoring workflow runs
6
7
 
7
8
  Example:
8
9
  ```python
10
+ # ACP testing
9
11
  async def test_basic_prompt():
10
12
  async with acp_test_session("tests/fixtures/simple.yml") as agent:
11
13
  result = await agent.run("Say hello")
12
14
  assert result.content
13
15
 
14
- async def test_filesystem_tool():
15
- async with acp_test_session(
16
- "tests/fixtures/with_tools.yml",
17
- file_access=True,
18
- terminal_access=True,
19
- ) as agent:
20
- result = await agent.run("List files in the current directory")
21
- assert "pyproject.toml" in result.content
16
+ # CI testing
17
+ async def test_commit_in_ci():
18
+ result = await run_ci_tests("abc123") # or "HEAD"
19
+ assert result.all_passed
20
+ print(result.summary())
22
21
  ```
23
22
  """
24
23
 
25
24
  from __future__ import annotations
26
25
 
26
+ import asyncio
27
27
  from contextlib import asynccontextmanager
28
+ from dataclasses import dataclass, field
29
+ import json
28
30
  from pathlib import Path
29
- from typing import TYPE_CHECKING, Any
31
+ import subprocess
32
+ from typing import TYPE_CHECKING, Any, Literal
30
33
 
31
34
 
32
35
  if TYPE_CHECKING:
@@ -44,7 +47,6 @@ async def acp_test_session(
44
47
  *,
45
48
  file_access: bool = True,
46
49
  terminal_access: bool = True,
47
- providers: list[str] | None = None,
48
50
  debug_messages: bool = False,
49
51
  debug_file: str | None = None,
50
52
  debug_commands: bool = False,
@@ -63,7 +65,6 @@ async def acp_test_session(
63
65
  config: Path to agent configuration YAML file. If None, uses default config.
64
66
  file_access: Enable file system access for agents.
65
67
  terminal_access: Enable terminal access for agents.
66
- providers: Model providers to search for models.
67
68
  debug_messages: Save raw JSON-RPC messages to debug file.
68
69
  debug_file: File path for JSON-RPC debug messages.
69
70
  debug_commands: Enable debug slash commands for testing.
@@ -98,10 +99,6 @@ async def acp_test_session(
98
99
  if not terminal_access:
99
100
  args.append("--no-terminal-access")
100
101
 
101
- if providers:
102
- for provider in providers:
103
- args.extend(["--model-provider", provider])
104
-
105
102
  if debug_messages:
106
103
  args.append("--debug-messages")
107
104
 
@@ -127,3 +124,524 @@ async def acp_test_session(
127
124
  event_handlers=event_handlers,
128
125
  ) as acp_agent:
129
126
  yield acp_agent
127
+
128
+
129
+ # --- GitHub CI Testing ---
130
+
131
+ CheckResult = Literal["success", "failure", "skipped", "cancelled", "pending"]
132
+ OSChoice = Literal["ubuntu-latest", "macos-latest", "windows-latest"]
133
+
134
+
135
+ @dataclass
136
+ class CITestResult:
137
+ """Result of a CI test run."""
138
+
139
+ commit: str
140
+ """The commit SHA that was tested."""
141
+
142
+ run_id: int
143
+ """GitHub Actions run ID."""
144
+
145
+ run_url: str
146
+ """URL to the workflow run."""
147
+
148
+ lint: CheckResult = "pending"
149
+ """Result of ruff check."""
150
+
151
+ format: CheckResult = "pending"
152
+ """Result of ruff format check."""
153
+
154
+ typecheck: CheckResult = "pending"
155
+ """Result of mypy type checking."""
156
+
157
+ test: CheckResult = "pending"
158
+ """Result of pytest."""
159
+
160
+ os: str = "ubuntu-latest"
161
+ """Operating system used for the run."""
162
+
163
+ python_version: str = "3.13"
164
+ """Python version used for the run."""
165
+
166
+ duration_seconds: float = 0.0
167
+ """Total duration of the CI run."""
168
+
169
+ raw_jobs: list[dict[str, Any]] = field(default_factory=list)
170
+ """Raw job data from GitHub API."""
171
+
172
+ failed_logs: str | None = None
173
+ """Logs from failed steps (fetched on demand)."""
174
+
175
+ _repo: str | None = field(default=None, repr=False)
176
+ """Repository for fetching logs."""
177
+
178
+ @property
179
+ def all_passed(self) -> bool:
180
+ """Check if all enabled checks passed (skipped checks are ignored)."""
181
+ return all(
182
+ result in ("success", "skipped")
183
+ for result in [self.lint, self.format, self.typecheck, self.test]
184
+ )
185
+
186
+ @property
187
+ def any_failed(self) -> bool:
188
+ """Check if any check failed."""
189
+ return any(
190
+ result == "failure" for result in [self.lint, self.format, self.typecheck, self.test]
191
+ )
192
+
193
+ def summary(self) -> str:
194
+ """Generate a human-readable summary."""
195
+ status_icons = {
196
+ "success": "✓",
197
+ "failure": "✗",
198
+ "skipped": "○",
199
+ "cancelled": "⊘",
200
+ "pending": "…",
201
+ }
202
+ lines = [
203
+ f"CI Results for {self.commit[:8]}",
204
+ f"Run: {self.run_url}",
205
+ f"OS: {self.os} | Python: {self.python_version}",
206
+ "",
207
+ f" {status_icons[self.lint]} Lint (ruff check): {self.lint}",
208
+ f" {status_icons[self.format]} Format (ruff format): {self.format}",
209
+ f" {status_icons[self.typecheck]} Type check (mypy): {self.typecheck}",
210
+ f" {status_icons[self.test]} Tests (pytest): {self.test}",
211
+ "",
212
+ f"Duration: {self.duration_seconds:.1f}s",
213
+ ]
214
+ return "\n".join(lines)
215
+
216
+ def fetch_failed_logs(self, max_lines: int = 200) -> str:
217
+ """Fetch logs from failed steps.
218
+
219
+ Args:
220
+ max_lines: Maximum number of log lines to return.
221
+
222
+ Returns:
223
+ Log output from failed steps, or empty string if no failures.
224
+ """
225
+ if not self.any_failed:
226
+ return ""
227
+
228
+ repo_args = ["-R", self._repo] if self._repo else []
229
+ try:
230
+ result = subprocess.run(
231
+ ["gh", "run", "view", str(self.run_id), "--log-failed", *repo_args],
232
+ capture_output=True,
233
+ text=True,
234
+ check=True,
235
+ )
236
+ lines = result.stdout.strip().split("\n")
237
+ # Return last N lines (most relevant)
238
+ if len(lines) > max_lines:
239
+ lines = lines[-max_lines:]
240
+ self.failed_logs = "\n".join(lines)
241
+ except subprocess.CalledProcessError:
242
+ return ""
243
+ else:
244
+ return self.failed_logs
245
+
246
+ def get_failure_summary(self, max_lines: int = 50) -> str:
247
+ """Get a concise summary of failures.
248
+
249
+ Returns:
250
+ Summary including the test/check that failed and key error lines.
251
+ """
252
+ logs = self.fetch_failed_logs(max_lines=max_lines * 2)
253
+ if not logs:
254
+ return "No failure logs available."
255
+
256
+ # Extract key lines (errors, failures, assertions)
257
+ key_patterns = ["FAILED", "Error", "error:", "AssertionError", "Timeout", "Exception"]
258
+ key_lines = []
259
+ for line in logs.split("\n"):
260
+ if any(p in line for p in key_patterns):
261
+ # Clean up the line (remove timestamp prefix)
262
+ parts = line.split("\t")
263
+ if len(parts) >= 3: # noqa: PLR2004
264
+ key_lines.append(parts[-1].strip())
265
+ else:
266
+ key_lines.append(line.strip())
267
+
268
+ if key_lines:
269
+ return "\n".join(key_lines[:max_lines])
270
+ # Fall back to last N lines
271
+ return "\n".join(logs.split("\n")[-max_lines:])
272
+
273
+
274
+ def _run_gh(*args: str) -> str:
275
+ """Run a gh CLI command and return output."""
276
+ result = subprocess.run(
277
+ ["gh", *args],
278
+ capture_output=True,
279
+ text=True,
280
+ check=True,
281
+ )
282
+ return result.stdout.strip()
283
+
284
+
285
+ def _resolve_commit(commit: str) -> str:
286
+ """Resolve a commit reference to a full SHA."""
287
+ if commit.upper() == "HEAD":
288
+ result = subprocess.run(
289
+ ["git", "rev-parse", "HEAD"],
290
+ capture_output=True,
291
+ text=True,
292
+ check=True,
293
+ )
294
+ return result.stdout.strip()
295
+ return commit
296
+
297
+
298
+ async def run_ci_tests(
299
+ commit: str = "HEAD",
300
+ *,
301
+ repo: str | None = None,
302
+ poll_interval: float = 10.0,
303
+ timeout: float = 600.0,
304
+ os: OSChoice = "ubuntu-latest",
305
+ python_version: str = "3.13",
306
+ run_lint: bool = True,
307
+ run_format: bool = True,
308
+ run_typecheck: bool = True,
309
+ test_command: str | None = "pytest --tb=short",
310
+ ) -> CITestResult:
311
+ """Trigger CI tests for a commit and wait for results.
312
+
313
+ This function triggers the test-commit.yml workflow via the GitHub CLI,
314
+ polls for completion, and returns structured results.
315
+
316
+ Args:
317
+ commit: Commit SHA or "HEAD" to test. Defaults to HEAD.
318
+ repo: Repository in "owner/repo" format. Auto-detected if None.
319
+ poll_interval: Seconds between status checks. Defaults to 10.
320
+ timeout: Maximum seconds to wait for completion. Defaults to 600 (10 min).
321
+ os: Operating system to run on. Defaults to "ubuntu-latest".
322
+ python_version: Python version to use. Defaults to "3.13".
323
+ run_lint: Whether to run ruff check. Defaults to True.
324
+ run_format: Whether to run ruff format check. Defaults to True.
325
+ run_typecheck: Whether to run mypy type checking. Defaults to True.
326
+ test_command: Pytest command to run, or None to skip tests.
327
+ Defaults to "pytest --tb=short". Use "-k pattern" to filter tests.
328
+
329
+ Returns:
330
+ CITestResult with individual check results.
331
+
332
+ Raises:
333
+ TimeoutError: If the workflow doesn't complete within timeout.
334
+ subprocess.CalledProcessError: If gh CLI commands fail.
335
+
336
+ Example:
337
+ ```python
338
+ # Run all checks
339
+ result = await run_ci_tests("abc123")
340
+
341
+ # Run specific test on Windows
342
+ result = await run_ci_tests(
343
+ "abc123",
344
+ os="windows-latest",
345
+ run_lint=False,
346
+ run_format=False,
347
+ run_typecheck=False,
348
+ test_command="pytest -k test_acp_agent --tb=short",
349
+ )
350
+
351
+ if result.all_passed:
352
+ print("All checks passed!")
353
+ else:
354
+ print(result.summary())
355
+ ```
356
+ """
357
+ import time
358
+
359
+ commit_sha = _resolve_commit(commit)
360
+ start_time = time.monotonic()
361
+
362
+ # Build repo flag if specified
363
+ repo_args = ["-R", repo] if repo else []
364
+
365
+ # Trigger the workflow with parameters
366
+ workflow_args = [
367
+ "workflow",
368
+ "run",
369
+ "test-commit.yml",
370
+ "-f",
371
+ f"commit={commit_sha}",
372
+ "-f",
373
+ f"os={os}",
374
+ "-f",
375
+ f"python_version={python_version}",
376
+ "-f",
377
+ f"run_lint={str(run_lint).lower()}",
378
+ "-f",
379
+ f"run_format={str(run_format).lower()}",
380
+ "-f",
381
+ f"run_typecheck={str(run_typecheck).lower()}",
382
+ "-f",
383
+ f"test_command={test_command or ''}",
384
+ *repo_args,
385
+ ]
386
+ _run_gh(*workflow_args)
387
+
388
+ # Wait a moment for the run to be created
389
+ await asyncio.sleep(2)
390
+
391
+ # Find the run ID
392
+ runs_json = _run_gh(
393
+ "run",
394
+ "list",
395
+ "--workflow=test-commit.yml",
396
+ "--json=databaseId,headSha,status,url",
397
+ "--limit=5",
398
+ *repo_args,
399
+ )
400
+ runs = json.loads(runs_json)
401
+
402
+ # Find the run for our commit
403
+ run_id: int | None = None
404
+ run_url = ""
405
+ for run in runs:
406
+ # Match by commit SHA (workflow dispatch uses the branch HEAD, but we can match)
407
+ if run["status"] in ("queued", "in_progress", "pending"):
408
+ run_id = run["databaseId"]
409
+ run_url = run["url"]
410
+ break
411
+
412
+ if run_id is None:
413
+ msg = f"Could not find workflow run for commit {commit_sha}"
414
+ raise RuntimeError(msg)
415
+
416
+ # Poll for completion
417
+ while True:
418
+ elapsed = time.monotonic() - start_time
419
+ if elapsed > timeout:
420
+ msg = f"Workflow run {run_id} did not complete within {timeout}s"
421
+ raise TimeoutError(msg)
422
+
423
+ run_json = _run_gh(
424
+ "run",
425
+ "view",
426
+ str(run_id),
427
+ "--json=status,conclusion,jobs",
428
+ *repo_args,
429
+ )
430
+ run_data = json.loads(run_json)
431
+
432
+ if run_data["status"] == "completed":
433
+ break
434
+
435
+ await asyncio.sleep(poll_interval)
436
+
437
+ # Parse job results
438
+ duration = time.monotonic() - start_time
439
+ jobs = run_data.get("jobs", [])
440
+
441
+ run_test = test_command is not None and test_command != ""
442
+
443
+ result = CITestResult(
444
+ commit=commit_sha,
445
+ run_id=run_id,
446
+ run_url=run_url,
447
+ os=os,
448
+ python_version=python_version,
449
+ duration_seconds=duration,
450
+ raw_jobs=jobs,
451
+ _repo=repo,
452
+ # Set skipped for disabled checks
453
+ lint="skipped" if not run_lint else "pending",
454
+ format="skipped" if not run_format else "pending",
455
+ typecheck="skipped" if not run_typecheck else "pending",
456
+ test="skipped" if not run_test else "pending",
457
+ )
458
+
459
+ # Map job names to results (only for enabled checks)
460
+ for job in jobs:
461
+ name = job.get("name", "").lower()
462
+ conclusion = job.get("conclusion", "pending")
463
+
464
+ # Normalize conclusion to our type
465
+ if conclusion not in ("success", "failure", "skipped", "cancelled"):
466
+ conclusion = "pending"
467
+
468
+ if "lint" in name and "format" not in name and run_lint:
469
+ result.lint = conclusion
470
+ elif "format" in name and run_format:
471
+ result.format = conclusion
472
+ elif ("type" in name or "mypy" in name) and run_typecheck:
473
+ result.typecheck = conclusion
474
+ elif ("test" in name or "pytest" in name) and run_test:
475
+ result.test = conclusion
476
+
477
+ return result
478
+
479
+
480
+ @dataclass
481
+ class BisectResult:
482
+ """Result of a CI bisect operation."""
483
+
484
+ first_bad_commit: str
485
+ """The first commit that failed the checks."""
486
+
487
+ last_good_commit: str
488
+ """The last commit that passed the checks."""
489
+
490
+ commits_tested: list[CITestResult] = field(default_factory=list)
491
+ """Results for all commits tested during bisection."""
492
+
493
+ total_commits_in_range: int = 0
494
+ """Total number of commits in the range (good, bad]."""
495
+
496
+ steps_taken: int = 0
497
+ """Number of bisection steps performed."""
498
+
499
+ def summary(self) -> str:
500
+ """Generate a human-readable summary."""
501
+ lines = [
502
+ "Bisect Results",
503
+ "=" * 40,
504
+ f"First bad commit: {self.first_bad_commit[:12]}",
505
+ f"Last good commit: {self.last_good_commit[:12]}",
506
+ f"Commits in range: {self.total_commits_in_range}",
507
+ f"Steps taken: {self.steps_taken}",
508
+ "",
509
+ "Tested commits:",
510
+ ]
511
+ for result in self.commits_tested:
512
+ status = "✓" if result.all_passed else "✗"
513
+ lines.append(f" {status} {result.commit[:12]}")
514
+ return "\n".join(lines)
515
+
516
+
517
+ def _get_commits_between(good: str, bad: str) -> list[str]:
518
+ """Get list of commits between good and bad (exclusive of good, inclusive of bad)."""
519
+ result = subprocess.run(
520
+ ["git", "rev-list", "--ancestry-path", f"{good}..{bad}"],
521
+ capture_output=True,
522
+ text=True,
523
+ check=True,
524
+ )
525
+ # Returns newest first, we want oldest first for bisection
526
+ commits = result.stdout.strip().split("\n")
527
+ return list(reversed(commits)) if commits[0] else []
528
+
529
+
530
+ async def bisect_ci(
531
+ good_commit: str,
532
+ bad_commit: str = "HEAD",
533
+ *,
534
+ repo: str | None = None,
535
+ poll_interval: float = 10.0,
536
+ timeout: float = 600.0,
537
+ os: OSChoice = "ubuntu-latest",
538
+ python_version: str = "3.13",
539
+ run_lint: bool = True,
540
+ run_format: bool = True,
541
+ run_typecheck: bool = True,
542
+ test_command: str | None = "pytest --tb=short",
543
+ ) -> BisectResult:
544
+ """Binary search to find the first commit that broke CI.
545
+
546
+ Uses git bisect logic to efficiently find the first bad commit
547
+ between a known good commit and a known bad commit.
548
+
549
+ Args:
550
+ good_commit: A commit SHA known to pass all enabled checks.
551
+ bad_commit: A commit SHA known to fail. Defaults to HEAD.
552
+ repo: Repository in "owner/repo" format. Auto-detected if None.
553
+ poll_interval: Seconds between status checks. Defaults to 10.
554
+ timeout: Timeout per CI run in seconds. Defaults to 600.
555
+ os: Operating system to run on. Defaults to "ubuntu-latest".
556
+ python_version: Python version to use. Defaults to "3.13".
557
+ run_lint: Whether to run ruff check. Defaults to True.
558
+ run_format: Whether to run ruff format check. Defaults to True.
559
+ run_typecheck: Whether to run mypy type checking. Defaults to True.
560
+ test_command: Pytest command to run, or None to skip tests.
561
+
562
+ Returns:
563
+ BisectResult with the first bad commit and bisection details.
564
+
565
+ Example:
566
+ ```python
567
+ # Find which commit broke a specific test on Windows
568
+ result = await bisect_ci(
569
+ good_commit="abc123",
570
+ bad_commit="HEAD",
571
+ os="windows-latest",
572
+ run_lint=False,
573
+ run_format=False,
574
+ run_typecheck=False,
575
+ test_command="pytest -k test_acp_agent --tb=short",
576
+ )
577
+ print(f"Tests broke at: {result.first_bad_commit}")
578
+ ```
579
+ """
580
+ good_sha = _resolve_commit(good_commit)
581
+ bad_sha = _resolve_commit(bad_commit)
582
+
583
+ # Get all commits in range
584
+ commits = _get_commits_between(good_sha, bad_sha)
585
+ if not commits:
586
+ msg = f"No commits found between {good_sha[:12]} and {bad_sha[:12]}"
587
+ raise ValueError(msg)
588
+
589
+ tested: list[CITestResult] = []
590
+ left = 0
591
+ right = len(commits) - 1
592
+ steps = 0
593
+
594
+ # Binary search: find first bad commit
595
+ # Invariant: commits[left-1] is good (or left=0), commits[right] is bad
596
+ while left < right:
597
+ mid = (left + right) // 2
598
+ steps += 1
599
+
600
+ result = await run_ci_tests(
601
+ commits[mid],
602
+ repo=repo,
603
+ poll_interval=poll_interval,
604
+ timeout=timeout,
605
+ os=os,
606
+ python_version=python_version,
607
+ run_lint=run_lint,
608
+ run_format=run_format,
609
+ run_typecheck=run_typecheck,
610
+ test_command=test_command,
611
+ )
612
+ tested.append(result)
613
+
614
+ if result.all_passed:
615
+ # This commit is good, search in upper half
616
+ left = mid + 1
617
+ else:
618
+ # This commit is bad, search in lower half
619
+ right = mid
620
+
621
+ first_bad_sha = commits[right]
622
+
623
+ # Determine last good commit
624
+ last_good_sha = good_sha if right == 0 else commits[right - 1]
625
+
626
+ return BisectResult(
627
+ first_bad_commit=first_bad_sha,
628
+ last_good_commit=last_good_sha,
629
+ commits_tested=tested,
630
+ total_commits_in_range=len(commits),
631
+ steps_taken=steps,
632
+ )
633
+
634
+
635
+ async def quick_ci_check(commit: str = "HEAD") -> bool:
636
+ """Quick check if a commit passes all CI checks.
637
+
638
+ Convenience wrapper around run_ci_tests that returns a simple boolean.
639
+
640
+ Args:
641
+ commit: Commit SHA or "HEAD" to test.
642
+
643
+ Returns:
644
+ True if all checks passed, False otherwise.
645
+ """
646
+ result = await run_ci_tests(commit)
647
+ return result.all_passed