gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
@@ -0,0 +1,153 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional
3
+ import copy
4
+
5
+
6
+ @dataclass
7
+ class DagNode:
8
+ id: str
9
+ description: str
10
+ depends_on: list[str] = field(default_factory=list)
11
+ model_tier: str = "sonnet"
12
+ write_capable: bool = False
13
+
14
+
15
+ @dataclass
16
+ class ValidationResult:
17
+ is_valid: bool
18
+ dag: list[DagNode]
19
+ errors: list[str] = field(default_factory=list)
20
+ repairable: bool = False
21
+
22
+
23
+ class DagValidator:
24
+ MAX_NODES = 12
25
+ MAX_DEPTH = 4
26
+ MAX_FAN_OUT = 3
27
+
28
+ def __init__(self, raw: list[dict]):
29
+ self._raw = raw
30
+
31
+ def validate(self) -> ValidationResult:
32
+ errors = []
33
+ try:
34
+ nodes = [
35
+ DagNode(**{k: v for k, v in n.items() if k in DagNode.__dataclass_fields__})
36
+ for n in self._raw
37
+ ]
38
+ except Exception as e:
39
+ return ValidationResult(is_valid=False, dag=[], errors=[str(e)], repairable=False)
40
+
41
+ ids = [n.id for n in nodes]
42
+ # Unique IDs
43
+ if len(ids) != len(set(ids)):
44
+ errors.append("Duplicate node IDs")
45
+ # Node cap
46
+ if len(nodes) > self.MAX_NODES:
47
+ errors.append(f"Too many nodes: {len(nodes)} > {self.MAX_NODES}")
48
+ return ValidationResult(is_valid=False, dag=nodes, errors=errors, repairable=False)
49
+ # All deps exist
50
+ id_set = set(ids)
51
+ for n in nodes:
52
+ for dep in n.depends_on:
53
+ if dep not in id_set:
54
+ errors.append(f"Missing dep '{dep}' in node '{n.id}'")
55
+ # Self-deps
56
+ for n in nodes:
57
+ if n.id in n.depends_on:
58
+ errors.append(f"Self-dependency in '{n.id}'")
59
+ # Cycle detection (Kahn's algorithm)
60
+ if self._has_cycle(nodes):
61
+ errors.append("Cycle detected in DAG")
62
+ return ValidationResult(is_valid=False, dag=nodes, errors=errors, repairable=True)
63
+ # Depth cap
64
+ depth = self._max_depth(nodes)
65
+ if depth > self.MAX_DEPTH:
66
+ errors.append(f"DAG depth {depth} exceeds max {self.MAX_DEPTH}")
67
+ # Fan-out cap
68
+ for n in nodes:
69
+ fan_out = sum(1 for m in nodes if n.id in m.depends_on)
70
+ if fan_out > self.MAX_FAN_OUT:
71
+ errors.append(f"Node '{n.id}' fan-out {fan_out} exceeds max {self.MAX_FAN_OUT}")
72
+
73
+ return ValidationResult(
74
+ is_valid=len(errors) == 0,
75
+ dag=nodes,
76
+ errors=errors,
77
+ repairable=len(errors) > 0 and "Cycle" not in " ".join(errors),
78
+ )
79
+
80
+ def auto_repair(self) -> list[dict]:
81
+ """Remove back-edges that cause cycles. Returns raw dict list."""
82
+ raw = copy.deepcopy(self._raw)
83
+ id_map = {n["id"]: n for n in raw}
84
+ visited, stack = set(), set()
85
+ to_remove = []
86
+
87
+ def dfs(node_id):
88
+ visited.add(node_id)
89
+ stack.add(node_id)
90
+ for dep in list(id_map.get(node_id, {}).get("depends_on", [])):
91
+ if dep not in visited:
92
+ dfs(dep)
93
+ elif dep in stack:
94
+ to_remove.append((node_id, dep))
95
+ stack.discard(node_id)
96
+
97
+ for nid in id_map:
98
+ if nid not in visited:
99
+ dfs(nid)
100
+ for node_id, dep in to_remove:
101
+ id_map[node_id]["depends_on"].remove(dep)
102
+ return list(id_map.values())
103
+
104
+ def fallback_linear(self) -> list[dict]:
105
+ """Return a guaranteed-valid linear chain (no parallelism)."""
106
+ raw = copy.deepcopy(self._raw[: self.MAX_NODES])
107
+ for i, node in enumerate(raw):
108
+ node["depends_on"] = [raw[i - 1]["id"]] if i > 0 else []
109
+ return raw
110
+
111
+ def _has_cycle(self, nodes: list[DagNode]) -> bool:
112
+ """Kahn's algorithm: cycle exists if topological sort can't include all nodes."""
113
+ in_degree = {n.id: 0 for n in nodes}
114
+ for n in nodes:
115
+ for dep in n.depends_on:
116
+ if dep in in_degree:
117
+ in_degree[n.id] += 1
118
+ queue = [nid for nid, deg in in_degree.items() if deg == 0]
119
+ count = 0
120
+ adj = {n.id: [] for n in nodes}
121
+ for n in nodes:
122
+ for dep in n.depends_on:
123
+ if dep in adj:
124
+ adj[dep].append(n.id)
125
+ while queue:
126
+ nid = queue.pop()
127
+ count += 1
128
+ for child in adj.get(nid, []):
129
+ in_degree[child] -= 1
130
+ if in_degree[child] == 0:
131
+ queue.append(child)
132
+ return count != len(nodes)
133
+
134
+ def _max_depth(self, nodes: list[DagNode]) -> int:
135
+ id_map = {n.id: n for n in nodes}
136
+ memo = {}
137
+
138
+ def depth(nid):
139
+ if nid in memo:
140
+ return memo[nid]
141
+ node = id_map.get(nid)
142
+ if not node or not node.depends_on:
143
+ memo[nid] = 1
144
+ return 1
145
+ valid_deps = [dep for dep in node.depends_on if dep in id_map]
146
+ if not valid_deps:
147
+ memo[nid] = 1
148
+ return 1
149
+ d = 1 + max(depth(dep) for dep in valid_deps)
150
+ memo[nid] = d
151
+ return d
152
+
153
+ return max((depth(n.id) for n in nodes), default=0)
@@ -0,0 +1,473 @@
1
+ """Iterative debug loop — fix → test → fix cycle with model-tier escalation.
2
+
3
+ Drives an automated bug-fix workflow:
4
+ 1. Run the test suite.
5
+ 2. If tests fail, ask the agent to apply a fix.
6
+ 3. Re-run tests.
7
+ 4. Repeat up to *max_cycles* times, escalating to stronger model tiers on
8
+ repeated failures and optionally trying an ensemble patch strategy.
9
+
10
+ Phase 1: single-loop sequential fixes, ensemble stub (Phase 3 full impl).
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import shlex
16
+ import subprocess
17
+ from dataclasses import dataclass, field
18
+ from typing import TYPE_CHECKING, Generator
19
+
20
+ from src._internal.constants import _ENSEMBLE_PATCH_COST_CAP_USD, _MAX_DEBUG_CYCLES
21
+ from src.agent.loop import AgentEvent, AgentLoop, EventType
22
+ from src.models.definitions import ModelTier
23
+ from src.tools.shell_tools import (
24
+ _extract_error_for_search,
25
+ _format_search_injection,
26
+ _parse_search_results,
27
+ web_search_raw,
28
+ )
29
+
30
+ if TYPE_CHECKING:
31
+ from src.config import GdmConfig
32
+
33
+ __all__ = ["DebugAttempt", "DebugLoop", "DebugResult"]
34
+
35
+ log = logging.getLogger(__name__)
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Constants
39
+ # ---------------------------------------------------------------------------
40
+
41
+ _TEST_TIMEOUT_SECS: int = 120
42
+ _CODER_MAX_ATTEMPT: int = 2
43
+ _THINKER_ATTEMPT: int = 3
44
+ _ENSEMBLE_TRIGGER_FAILURES: int = 2
45
+ _MAX_FIX_PROMPT_ERROR_CHARS: int = 2_000
46
+ _MAX_PATCH_DESC_CHARS: int = 200
47
+
48
+ # Referenced by callers for budget enforcement.
49
+ _COST_CAP_USD = _ENSEMBLE_PATCH_COST_CAP_USD
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Data classes
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ @dataclass
58
+ class DebugAttempt:
59
+ """Record of one fix attempt in the debug loop."""
60
+
61
+ attempt_num: int
62
+ patch_applied: str # brief description of what was changed
63
+ test_result: str # "pass" | "fail"
64
+ error_output: str # test output on failure, empty on pass
65
+ model_tier_used: str
66
+
67
+
68
+ @dataclass
69
+ class DebugResult:
70
+ """Outcome of a complete DebugLoop.run() call."""
71
+
72
+ success: bool
73
+ attempts: list[DebugAttempt] = field(default_factory=list)
74
+ final_model_tier: str = ModelTier.CODER
75
+ was_rolled_back: bool = False
76
+ regression_detected: bool = False
77
+
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Debug loop
81
+ # ---------------------------------------------------------------------------
82
+
83
+
84
+ class DebugLoop:
85
+ """Iterative bug fix loop: fix -> test -> fix -> test (max 5 cycles).
86
+
87
+ Escalates model tier on repeated failures:
88
+
89
+ * Attempt 1-2: Coder tier
90
+ * Attempt 3: Thinker tier
91
+ * Attempt 4-5: Reasoner tier
92
+
93
+ After 2 failures, attempts an ensemble patch (3 parallel Scout strategies)
94
+ and selects the one that passes the most tests. The ensemble is currently
95
+ a stub -- full implementation ships in Phase 3.
96
+
97
+ Usage::
98
+
99
+ debug = DebugLoop(loop, cfg)
100
+ result = debug.run(
101
+ task_description="Fix TypeError in auth.py line 45",
102
+ test_command="pytest tests/test_auth.py",
103
+ files_to_watch=["src/auth.py"],
104
+ )
105
+ if result.success:
106
+ print(f"Fixed in {len(result.attempts)} attempt(s)")
107
+ elif result.was_rolled_back:
108
+ print("All attempts failed -- changes rolled back")
109
+ """
110
+
111
+ def __init__(self, base_loop: AgentLoop, cfg: GdmConfig) -> None:
112
+ """Initialise the debug loop.
113
+
114
+ Args:
115
+ base_loop: A fully-configured AgentLoop used for fix turns.
116
+ cfg: Session configuration (provider, cost limits, etc.).
117
+ """
118
+ self._loop = base_loop
119
+ self._cfg = cfg
120
+ self._test_command: str = "" # stored at run() time; used by ensemble
121
+
122
+ # ------------------------------------------------------------------
123
+ # Public entry point
124
+ # ------------------------------------------------------------------
125
+
126
+ def run(
127
+ self,
128
+ task_description: str,
129
+ test_command: str,
130
+ files_to_watch: list[str] | None = None,
131
+ *,
132
+ max_cycles: int = _MAX_DEBUG_CYCLES,
133
+ ) -> DebugResult:
134
+ """Run the debug loop. Returns DebugResult (never raises).
135
+
136
+ Args:
137
+ task_description: Human-readable description of the bug to fix.
138
+ test_command: Shell command whose exit code signals pass/fail.
139
+ files_to_watch: Optional list of files relevant to the fix.
140
+ max_cycles: Maximum fix attempts before giving up.
141
+ """
142
+ attempts: list[DebugAttempt] = []
143
+ final_tier = ModelTier.CODER
144
+ self._test_command = test_command # expose to ensemble
145
+ _search_fired: bool = False # one-shot flag; reset each run() call
146
+ _search_context: str = "" # injection block; prepended to fix prompt once
147
+ _auto_search_iter: int = getattr(self._cfg, "debug_auto_search_iteration", 3)
148
+ try:
149
+ for attempt_num in range(1, max_cycles + 1):
150
+ tier = self._get_tier_for_attempt(attempt_num)
151
+ final_tier = tier
152
+ log.info("Debug attempt %d/%d tier=%s", attempt_num, max_cycles, tier)
153
+
154
+ passed, output = self._run_tests(test_command)
155
+ if passed:
156
+ attempts.append(DebugAttempt(attempt_num, "(pre-check)", "pass", "", tier))
157
+ return DebugResult(True, attempts, tier, False, False)
158
+
159
+ # One-shot auto web search at configured iteration number
160
+ if (
161
+ _auto_search_iter > 0
162
+ and attempt_num == _auto_search_iter
163
+ and not _search_fired
164
+ ):
165
+ _search_fired = True
166
+ error_query = _extract_error_for_search(output)
167
+ try:
168
+ raw = web_search_raw(error_query)
169
+ results = _parse_search_results(raw)
170
+ _search_context = _format_search_injection(error_query, results)
171
+ log.info(
172
+ "DebugLoop: auto web-search fired (iter=%d query=%r results=%d)",
173
+ attempt_num, error_query[:60], len(results),
174
+ )
175
+ except Exception as exc: # noqa: BLE001
176
+ log.warning("DebugLoop: auto web-search failed (continuing): %s", exc)
177
+ _search_context = ""
178
+
179
+ failed_so_far = sum(1 for a in attempts if a.test_result == "fail")
180
+ if failed_so_far >= _ENSEMBLE_TRIGGER_FAILURES:
181
+ ok, desc = self._try_ensemble_patch(task_description, output)
182
+ if ok:
183
+ attempts.append(DebugAttempt(attempt_num, desc, "pass", "", tier))
184
+ return DebugResult(True, attempts, tier, False, False)
185
+
186
+ patch_desc = self._drain_fix(
187
+ self._apply_fix(output, task_description, attempt_num, _search_context)
188
+ )
189
+ _search_context = "" # consume search context — only used once
190
+ passed_after, out_after = self._run_tests(test_command)
191
+ test_result = "pass" if passed_after else "fail"
192
+ err_out = "" if passed_after else out_after
193
+ attempts.append(DebugAttempt(attempt_num, patch_desc, test_result, err_out, tier))
194
+
195
+ if passed_after:
196
+ return DebugResult(True, attempts, tier, False, False)
197
+
198
+ rolled_back = self._should_rollback(attempts)
199
+ if rolled_back:
200
+ try:
201
+ from src.git_workflow import GitWorkflow # noqa: PLC0415
202
+ wf = GitWorkflow(self._cfg.project_root)
203
+ if wf.is_git_repo():
204
+ wf.rollback_to_pre_task(hard=False)
205
+ log.warning(
206
+ "DebugLoop: all %d attempts failed — soft rollback applied",
207
+ len(attempts),
208
+ )
209
+ except Exception as exc: # noqa: BLE001
210
+ log.warning("DebugLoop: rollback skipped: %s", exc)
211
+ # Debate debug: generate fresh hypotheses after ensemble exhaustion
212
+ try:
213
+ from src.agent.review_gate import ReviewGate # noqa: PLC0415
214
+ last_error_text = attempts[-1].error_output if attempts else ""
215
+ gate = ReviewGate(cfg=self._cfg)
216
+ hypotheses = gate.debate_debug(
217
+ error=last_error_text,
218
+ attempts_history=[str(a.patch_applied) for a in attempts[-3:]],
219
+ )
220
+ log.info(
221
+ "debate_debug hypotheses: %s",
222
+ "; ".join(hypotheses.hypotheses[:3]),
223
+ )
224
+ except Exception as exc: # noqa: BLE001
225
+ log.debug("debate_debug skipped: %s", exc)
226
+ return DebugResult(False, attempts, final_tier, rolled_back, False)
227
+ except Exception as exc: # noqa: BLE001
228
+ log.exception("DebugLoop.run failed: %s", exc)
229
+ return DebugResult(False, attempts, final_tier, False, False)
230
+
231
+ # ------------------------------------------------------------------
232
+ # Private helpers
233
+ # ------------------------------------------------------------------
234
+
235
+ def _run_tests(self, test_command: str) -> tuple[bool, str]:
236
+ """Run the test command. Returns (passed, combined output).
237
+
238
+ Uses subprocess.run with a 120-second timeout.
239
+ TimeoutExpired returns (False, "Test timed out") rather than raising.
240
+ """
241
+ try:
242
+ result = subprocess.run(
243
+ shlex.split(test_command, posix=(not __import__("sys").platform.startswith("win"))),
244
+ shell=False,
245
+ capture_output=True,
246
+ text=True,
247
+ timeout=_TEST_TIMEOUT_SECS,
248
+ )
249
+ output = result.stdout + result.stderr
250
+ passed = result.returncode == 0
251
+ log.debug("Test run: rc=%d output_chars=%d", result.returncode, len(output))
252
+ return passed, output
253
+ except subprocess.TimeoutExpired:
254
+ log.warning("Test command timed out after %ds", _TEST_TIMEOUT_SECS)
255
+ return False, "Test timed out"
256
+ except OSError as exc:
257
+ log.error("Failed to run test command %r: %s", test_command, exc)
258
+ return False, f"Failed to run tests: {exc}"
259
+
260
+ def _apply_fix(
261
+ self,
262
+ error_output: str,
263
+ task_description: str,
264
+ attempt_num: int,
265
+ search_context: str = "",
266
+ ) -> Generator[AgentEvent, None, None]:
267
+ """Run one agent turn asking it to fix the failing tests.
268
+
269
+ Yields AgentEvents from the underlying AgentLoop.run() call.
270
+
271
+ Args:
272
+ error_output: Combined test output from the failing run.
273
+ task_description: Original bug description for context.
274
+ attempt_num: Current attempt number (used in the prompt).
275
+ search_context: Optional web-search injection block (one-shot).
276
+ """
277
+ tier = self._get_tier_for_attempt(attempt_num)
278
+ search_block = f"\n\n{search_context}" if search_context else ""
279
+ fix_prompt = (
280
+ f"Task: {task_description}\n\n"
281
+ f"Attempt {attempt_num} -- model tier: {tier}.\n\n"
282
+ "The following tests are failing. Analyse the error carefully "
283
+ "and apply a targeted fix:\n\n"
284
+ f"```\n{error_output[:_MAX_FIX_PROMPT_ERROR_CHARS]}\n```"
285
+ f"{search_block}"
286
+ )
287
+ log.info("Applying fix (attempt=%d tier=%s)", attempt_num, tier)
288
+ yield from self._loop.run(fix_prompt)
289
+
290
+ def _drain_fix(self, events: Generator[AgentEvent, None, None]) -> str:
291
+ """Drain an _apply_fix generator and return a brief patch description.
292
+
293
+ Collects all RESPONSE event content and trims to _MAX_PATCH_DESC_CHARS.
294
+ """
295
+ parts: list[str] = []
296
+ for event in events:
297
+ if event.type == EventType.RESPONSE:
298
+ parts.append(event.content)
299
+ return (" ".join(parts))[:_MAX_PATCH_DESC_CHARS] or "(fix applied)"
300
+
301
+ def _get_tier_for_attempt(self, attempt_num: int) -> str:
302
+ """Return the appropriate ModelTier string for *attempt_num*.
303
+
304
+ Escalation schedule:
305
+ * 1-2 -> CODER
306
+ * 3 -> THINKER
307
+ * 4+ -> REASONER
308
+ """
309
+ if attempt_num <= _CODER_MAX_ATTEMPT:
310
+ return ModelTier.CODER
311
+ if attempt_num == _THINKER_ATTEMPT:
312
+ return ModelTier.THINKER
313
+ return ModelTier.REASONER
314
+
315
+ def _try_ensemble_patch(
316
+ self, task_description: str, error_output: str
317
+ ) -> tuple[bool, str]:
318
+ """Try 3 fix strategies; pick the one that passes tests.
319
+
320
+ Each strategy runs in a fresh git working tree (stash-isolated) so
321
+ strategies don't interfere. Applies the winning patch and returns
322
+ ``(True, "ensemble/<strategy>")``. If all strategies fail or git is
323
+ not available, returns ``(False, reason)``.
324
+
325
+ Cost guard: we stop early on the first passing strategy to minimise
326
+ API spend. Strategies are tried sequentially, not in parallel, to
327
+ respect the ``_COST_CAP_USD`` ceiling.
328
+ """
329
+ import subprocess
330
+ import tempfile
331
+ from pathlib import Path as _Path
332
+
333
+ if not self._test_command:
334
+ log.info("Ensemble patch skipped: no test_command stored")
335
+ return False, "ensemble skipped (no test command)"
336
+
337
+ # Verify git is available
338
+ try:
339
+ root_proc = subprocess.run(
340
+ ["git", "rev-parse", "--show-toplevel"],
341
+ capture_output=True, text=True, timeout=5,
342
+ )
343
+ if root_proc.returncode != 0:
344
+ return False, "ensemble skipped (not a git repo)"
345
+ root = root_proc.stdout.strip()
346
+ except Exception as exc: # noqa: BLE001
347
+ log.debug("Ensemble: git check failed: %s", exc)
348
+ return False, "ensemble skipped (git unavailable)"
349
+
350
+ def _git(*args: str) -> subprocess.CompletedProcess[str]:
351
+ return subprocess.run(
352
+ ["git", *args], capture_output=True, text=True, cwd=root, timeout=60
353
+ )
354
+
355
+ # Stash current state → all strategies start from the same clean base
356
+ stash_tag = f"gdm-ensemble-{id(self)}"
357
+ stash_proc = _git("stash", "push", "-u", "-m", stash_tag)
358
+ did_stash = (
359
+ stash_proc.returncode == 0
360
+ and "No local changes" not in stash_proc.stdout
361
+ )
362
+
363
+ _STRATEGIES = [
364
+ (
365
+ "minimal",
366
+ "Apply the minimal possible fix. Change only what is strictly necessary.",
367
+ ),
368
+ (
369
+ "refactor",
370
+ "Refactor for correctness and clarity. Prioritise correctness over brevity.",
371
+ ),
372
+ (
373
+ "alternative",
374
+ "Re-think the root cause. Use a completely different algorithmic approach.",
375
+ ),
376
+ ]
377
+
378
+ best_pass_count = -1
379
+ best_patch = ""
380
+ best_name = ""
381
+
382
+ try:
383
+ for strategy_name, strategy_hint in _STRATEGIES:
384
+ # Reset working tree to the stashed base (last commit state)
385
+ _git("checkout", "--", ".")
386
+ _git("clean", "-fdq")
387
+
388
+ # Fresh loop for this strategy to avoid transcript pollution
389
+ try:
390
+ strategy_loop = AgentLoop(
391
+ session_id=(
392
+ f"{getattr(self._loop, '_session_id', '')}"
393
+ f"-ens-{strategy_name}"
394
+ ),
395
+ db=getattr(self._loop, "_db", None),
396
+ cost_tracker=getattr(self._loop, "_cost_tracker", None),
397
+ config=self._cfg,
398
+ )
399
+ except Exception: # noqa: BLE001
400
+ strategy_loop = self._loop # fallback: reuse main loop
401
+
402
+ prompt = (
403
+ f"Task: {task_description}\n\n"
404
+ f"Ensemble strategy: {strategy_hint}\n\n"
405
+ f"Failing tests:\n```\n"
406
+ f"{error_output[:_MAX_FIX_PROMPT_ERROR_CHARS]}\n```"
407
+ )
408
+ self._drain_fix(strategy_loop.run(prompt))
409
+
410
+ diff = _git("diff", "HEAD").stdout
411
+ if not diff.strip():
412
+ log.info("Ensemble strategy=%s: no changes produced", strategy_name)
413
+ continue
414
+
415
+ passed, _ = self._run_tests(self._test_command)
416
+ pass_count = 1 if passed else 0
417
+ log.info(
418
+ "Ensemble strategy=%s passed=%s diff_lines=%d",
419
+ strategy_name,
420
+ passed,
421
+ len(diff.splitlines()),
422
+ )
423
+
424
+ if pass_count > best_pass_count:
425
+ best_pass_count = pass_count
426
+ best_patch = diff
427
+ best_name = strategy_name
428
+
429
+ if passed:
430
+ break # found working strategy — stop early to save cost
431
+
432
+ except Exception as exc: # noqa: BLE001
433
+ log.exception("Ensemble inner loop failed: %s", exc)
434
+
435
+ finally:
436
+ # Reset working tree to clean state. If changes were stashed
437
+ # before the ensemble started, restore them so the outer loop
438
+ # can continue from its pre-ensemble state (case: no patch found).
439
+ _git("checkout", "--", ".")
440
+ _git("clean", "-fdq")
441
+ if did_stash:
442
+ _git("stash", "pop") # restore; if a patch was found we clean again below
443
+
444
+ if best_pass_count > 0 and best_patch:
445
+ # Discard the restored pre-ensemble changes — the ensemble patch is better.
446
+ _git("checkout", "--", ".")
447
+ _git("clean", "-fdq")
448
+ with tempfile.NamedTemporaryFile(
449
+ mode="w", suffix=".patch", delete=False, encoding="utf-8"
450
+ ) as fh:
451
+ fh.write(best_patch)
452
+ patch_path = fh.name
453
+ try:
454
+ apply_proc = _git("apply", patch_path)
455
+ if apply_proc.returncode != 0:
456
+ log.warning(
457
+ "Ensemble apply failed: %s", apply_proc.stderr[:200]
458
+ )
459
+ return False, "ensemble: patch apply failed"
460
+ finally:
461
+ _Path(patch_path).unlink(missing_ok=True)
462
+ return True, f"ensemble/{best_name}"
463
+
464
+ return False, "ensemble: no strategy passed tests"
465
+
466
+ def _should_rollback(self, attempts: list[DebugAttempt]) -> bool:
467
+ """True if all attempts failed and changes should be rolled back.
468
+
469
+ Rollback is triggered when the failed-attempt count equals or exceeds
470
+ the configured maximum debug cycles.
471
+ """
472
+ failed_attempts = sum(1 for a in attempts if a.test_result == "fail")
473
+ return failed_attempts >= _MAX_DEBUG_CYCLES