gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
src/agent/loop.py ADDED
@@ -0,0 +1,1410 @@
1
+ """Agent event loop — the generator-based heart of gdm code.
2
+
3
+ Drives the model-tool-model cycle. Yields AgentEvents so callers (REPL,
4
+ daemon, tests) can react to each step without coupling to the loop internals.
5
+
6
+ Design principles:
7
+ - TranscriptStore is the SOLE source of truth for message history.
8
+ _transcript.to_messages() is always passed to the API — no parallel list.
9
+ - One assistant Turn per model response, carrying both content and tool_calls.
10
+ - Budget is synced from transcript after each change, never additively tracked.
11
+ - All errors surface as EventType.ERROR followed by EventType.DONE; never raised.
12
+
13
+ Phase 1: non-streaming, sequential tool execution.
14
+ Phase 2 (future): streaming text deltas, parallel tool execution.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import concurrent.futures as _cf
19
+ import json
20
+ import logging
21
+ import re
22
+ import time
23
+ from dataclasses import dataclass, field
24
+ from enum import Enum, auto
25
+ from typing import TYPE_CHECKING, Any, Generator
26
+
27
+ from src.exceptions import ApiError, BudgetExceededError, FatalApiError
28
+ from src.models.client import GdmClient
29
+ from src.models.router import CircuitBreaker
30
+
31
+ from src.agent.context_budget import ContextBudget, count_tokens
32
+ from src.agent.system_prompt import build_system_prompt
33
+ from src.agent.transcript import TranscriptStore, Turn
34
+ from src.cost_tracker import CostTracker
35
+ from src.models.definitions import ModelTier, Provider, get_model
36
+
37
+ if TYPE_CHECKING:
38
+ from src.agent.tool_orchestrator import ToolOrchestrator
39
+ from src.config import GdmConfig
40
+ from src.memory.db import GdmDatabase
41
+ from src.models.router import ModelRouter
42
+
43
+ __all__ = ["AgentEvent", "AgentLoop", "EventType", "CONFIDENCE_SCHEMA"]
44
+
45
+ log = logging.getLogger(__name__)
46
+
47
+ try:
48
+ from src.enterprise.usage_analytics import UsageAnalytics as _UsageAnalytics
49
+ from src.enterprise.usage_analytics import UsageEvent as _UsageEvent
50
+ _analytics: "_UsageAnalytics | None" = _UsageAnalytics()
51
+ except Exception: # noqa: BLE001
52
+ _analytics = None
53
+
54
+ try:
55
+ from src.agent.risk_scorer import score_patch, RiskTier
56
+ _risk_scorer_available = True
57
+ except Exception: # noqa: BLE001
58
+ _risk_scorer_available = False
59
+
60
+
61
+ def _check_patch_risk(diff, file_paths=None, autonomy_level=2):
62
+ """Score a patch diff and enforce risk gates based on autonomy level.
63
+
64
+ - CRITICAL (score >= block_threshold) and autonomy_level < 4: raises RuntimeError.
65
+ - HIGH tier and autonomy_level < 3: logs a warning (non-blocking).
66
+ - Always returns the PatchRiskResult (or None if scorer unavailable).
67
+ """
68
+ if not _risk_scorer_available:
69
+ return None
70
+ result = score_patch(diff, file_paths)
71
+ log.info("patch_risk score=%.3f tier=%s blocked=%s", result.score, result.tier, result.blocked)
72
+ if result.triggered_signals:
73
+ log.debug("patch_risk rationale:\n%s", result.rationale)
74
+ if result.blocked and autonomy_level < 4:
75
+ raise RuntimeError(f"Patch blocked: {result.rationale}")
76
+ if result.tier == RiskTier.HIGH and autonomy_level < 3:
77
+ log.warning("High-risk patch detected (score=%.3f). Review before applying:\n%s",
78
+ result.score, result.rationale)
79
+ return result
80
+
81
+ def _record_usage(
82
+ session_id: str,
83
+ actor_id: str,
84
+ model: str,
85
+ prompt_tokens: int,
86
+ completion_tokens: int,
87
+ tool_calls: int,
88
+ ) -> None:
89
+ """Record a usage event. Never raises."""
90
+ if _analytics is None:
91
+ return
92
+ try:
93
+ _analytics.record(
94
+ _UsageEvent( # type: ignore[name-defined]
95
+ session_id=session_id,
96
+ actor_id=actor_id,
97
+ model=model,
98
+ prompt_tokens=prompt_tokens,
99
+ completion_tokens=completion_tokens,
100
+ tool_calls=tool_calls,
101
+ )
102
+ )
103
+ except Exception: # noqa: BLE001
104
+ pass
105
+
106
+
107
+ # Single-worker executor for async checkpoint writes — serialises DB flushes
108
+ # so concurrent checkpoints don't interleave, and the agent thread is never blocked.
109
+ _CHECKPOINT_EXECUTOR: _cf.ThreadPoolExecutor = _cf.ThreadPoolExecutor(
110
+ max_workers=1, thread_name_prefix="gdm-ckpt"
111
+ )
112
+ _WALL_CLOCK_CHECKPOINT_INTERVAL: float = 60.0 # seconds
113
+
114
+ # File path fragments that indicate security-sensitive code and warrant extra scanning.
115
+ _SECURITY_PATH_PATTERNS: tuple[str, ...] = (
116
+ "auth", "session", "password", "token", "secret", "crypto",
117
+ "permission", "sql", "query", "database",
118
+ )
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # Autonomy circuit breaker (module-level for testability)
122
+ # ---------------------------------------------------------------------------
123
+
124
+ _fix_attempts: dict[str, int] = {}
125
+
126
+
127
+ def check_circuit_breaker(task_id: str, policy: "Any") -> None:
128
+ """Raise PermissionError if max_fix_attempts reached for *task_id*."""
129
+ attempts = _fix_attempts.get(task_id, 0)
130
+ if attempts >= policy.max_fix_attempts:
131
+ raise PermissionError(
132
+ f"Circuit breaker: {attempts} fix attempts reached for task {task_id}. Human input required."
133
+ )
134
+ _fix_attempts[task_id] = attempts + 1
135
+
136
+
137
+ def reset_circuit_breaker(task_id: str) -> None:
138
+ """Clear the fix-attempt counter for *task_id*."""
139
+ _fix_attempts.pop(task_id, None)
140
+
141
+ # JSON schema emitted to the model when requesting a confidence self-assessment.
142
+ CONFIDENCE_SCHEMA: dict[str, Any] = {
143
+ "type": "object",
144
+ "required": ["score", "reasons"],
145
+ "additionalProperties": False,
146
+ "properties": {
147
+ "score": {
148
+ "type": "integer",
149
+ "minimum": 0,
150
+ "maximum": 100,
151
+ "description": "Confidence score 0-100 for the proposed change.",
152
+ },
153
+ "reasons": {
154
+ "type": "array",
155
+ "items": {"type": "string"},
156
+ "description": "Short bullet-point reasons justifying the score.",
157
+ },
158
+ },
159
+ }
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Grok native web_search spec (replaces registry spec when provider == grok)
163
+ # ---------------------------------------------------------------------------
164
+
165
+ _GROK_WEB_SEARCH_SPEC: dict[str, Any] = {
166
+ "type": "function",
167
+ "function": {
168
+ "name": "web_search",
169
+ "description": (
170
+ "Search the live web. Use for external APIs, errors not in codebase, "
171
+ "or unknown technology behaviour."
172
+ ),
173
+ "parameters": {
174
+ "type": "object",
175
+ "properties": {"query": {"type": "string"}},
176
+ "required": ["query"],
177
+ },
178
+ },
179
+ }
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # Event types
184
+ # ---------------------------------------------------------------------------
185
+
186
+ class EventType(Enum):
187
+ """Discriminant for AgentEvent payloads."""
188
+ THINKING = auto() # model reasoning step (text)
189
+ TOOL_CALL = auto() # model requested a tool call
190
+ TOOL_RESULT = auto() # tool was executed
191
+ RESPONSE = auto() # final answer chunk from the model
192
+ SUBTASK = auto() # task decomposition for display
193
+ ERROR = auto() # tool failure or recoverable loop error
194
+ COST_UPDATE = auto() # cost tracker fired
195
+ WARNING = auto() # non-fatal warning (injection, budget, etc.)
196
+ DONE = auto() # task complete
197
+
198
+
199
+ @dataclass
200
+ class AgentEvent:
201
+ """A single event emitted by AgentLoop.run()."""
202
+
203
+ type: EventType
204
+ content: str = ""
205
+ tool_name: str = ""
206
+ tool_call_id: str = ""
207
+ args: dict[str, Any] = field(default_factory=dict)
208
+ result: Any = None # ToolResult | None
209
+ cost_usd: float = 0.0
210
+ turn: int = 0
211
+
212
+
213
+ # ---------------------------------------------------------------------------
214
+ # Artifact auto-detection helper
215
+ # ---------------------------------------------------------------------------
216
+
217
+
218
+ def _should_auto_save(
219
+ response_text: str,
220
+ user_input: str,
221
+ *,
222
+ auto_detect: bool,
223
+ ) -> bool:
224
+ """Return True when at least 2 auto-save signals are present.
225
+
226
+ Signals:
227
+ (a) Explicit annotation: ``<!-- artifact -->`` in response text
228
+ (b) Structured block: markdown table or fenced diagram (mermaid/dot/plantuml)
229
+ (c) User command: ``/save`` in the current turn's user input
230
+
231
+ Auto-detection is opt-in (``auto_detect=False`` by default). When
232
+ disabled, always returns ``False`` regardless of signals, preventing
233
+ alert fatigue.
234
+ """
235
+ if not auto_detect:
236
+ return False
237
+ signals = 0
238
+ if "<!-- artifact -->" in response_text:
239
+ signals += 1
240
+ if re.search(
241
+ r"(^\|.+\|$|```(mermaid|dot|plantuml))", response_text, re.MULTILINE
242
+ ):
243
+ signals += 1
244
+ if user_input.strip().startswith("/save"):
245
+ signals += 1
246
+ return signals >= 2
247
+
248
+
249
+ # ---------------------------------------------------------------------------
250
+ # Agent loop
251
+ # ---------------------------------------------------------------------------
252
+
253
+ class AgentLoop:
254
+ """Generator-based model-tool-model loop.
255
+
256
+ Callers iterate over AgentLoop.run(user_message) to receive AgentEvents.
257
+ TranscriptStore is the single source of truth — no parallel message list.
258
+
259
+ Usage::
260
+
261
+ loop = AgentLoop(cfg, orchestrator, transcript, budget, cost_tracker)
262
+ for event in loop.run("Fix the auth bug"):
263
+ if event.type == EventType.RESPONSE:
264
+ console.print(event.content)
265
+ """
266
+
267
+ def __init__(
268
+ self,
269
+ cfg: GdmConfig,
270
+ orchestrator: ToolOrchestrator,
271
+ transcript: TranscriptStore,
272
+ budget: ContextBudget,
273
+ cost_tracker: CostTracker,
274
+ *,
275
+ model_tier: str = ModelTier.CODER,
276
+ router: ModelRouter | None = None,
277
+ db: GdmDatabase | None = None,
278
+ session_id: str = "",
279
+ project_id: str = "",
280
+ circuit_breaker: CircuitBreaker | None = None,
281
+ continuous_memory: Any = None,
282
+ ) -> None:
283
+ self._cfg = cfg
284
+ self._orchestrator = orchestrator
285
+ self._transcript = transcript
286
+ self._budget = budget
287
+ self._cost_tracker = cost_tracker
288
+ self._model = get_model(model_tier, cfg.provider)
289
+ self._model_id = self._model.id
290
+ # Set proxy state before first client creation.
291
+ # Use isinstance guards so MagicMock configs in tests don't accidentally activate proxy.
292
+ _p_url = getattr(cfg, "proxy_url", "") or ""
293
+ _p_tok = getattr(cfg, "proxy_token", None)
294
+ self._proxy_active: bool = (
295
+ bool(getattr(cfg, "proxy_enabled", False))
296
+ and isinstance(_p_url, str) and bool(_p_url)
297
+ and isinstance(_p_tok, str) and bool(_p_tok)
298
+ )
299
+ self._proxy_url: str = _p_url if isinstance(_p_url, str) else ""
300
+ self._proxy_token: str = _p_tok if isinstance(_p_tok, str) else ""
301
+ self._gdm_client = self._make_client()
302
+ self._router = router
303
+ self._db = db
304
+ self._session_id = session_id
305
+ self._project_id = project_id
306
+ self._initialized = False
307
+ self._files_written: list[Any] = [] # Path objects from write tools this turn
308
+ self._model_turn_count: int = 0 # incremented each model call; drives checkpoints
309
+ self._last_checkpoint_at: float = 0.0 # monotonic time of last checkpoint write
310
+ self._checkpoint_future: "_cf.Future[None] | None" = None # pending async checkpoint
311
+ self._current_event_id: str | None = None # event log ID for the in-flight turn
312
+ self._git_wf: Any = None # GitWorkflow | None — set by _try_create_task_branch
313
+ self._reasoning_mode: str = "auto" # "on" | "off" | "auto" — set via /reasoning
314
+ self._reasoning_escalation_count: int = 0 # consecutive forced-REASONER turn counter
315
+ self._using_fallback: bool = False # True once fallback provider is active
316
+ self._regression_guard: Any = None # RegressionGuard | None — set externally
317
+ self._vg: Any = None # VerificationGraph | None — set externally
318
+ self._circuit_breaker: CircuitBreaker | None = circuit_breaker
319
+ self._last_compressed_at_turn: int = -10 # drives re-compression guard
320
+ self._continuous_memory: Any = continuous_memory # ContinuousMemory | None
321
+
322
+ # ------------------------------------------------------------------
323
+ # Fallback helpers
324
+ # ------------------------------------------------------------------
325
+
326
+ def _make_client(self, fallback_provider: str | None = None) -> GdmClient:
327
+ """Create a GdmClient respecting active proxy state.
328
+
329
+ All code that rebuilds the client (init, tier switching, fallback)
330
+ must go through this helper so proxy mode is never silently lost.
331
+ When proxy is active, ALL calls go through the proxy regardless of
332
+ fallback provider (users in geo-restricted regions need this for
333
+ every call, not just the primary provider).
334
+ """
335
+ proxy_active = getattr(self, "_proxy_active", False)
336
+ proxy_url = getattr(self, "_proxy_url", "")
337
+ proxy_token = getattr(self, "_proxy_token", "")
338
+ if proxy_active and proxy_url and proxy_token:
339
+ return GdmClient.for_proxy(proxy_url, proxy_token)
340
+ if fallback_provider:
341
+ return GdmClient.for_provider(fallback_provider, self._cfg)
342
+ return GdmClient(self._cfg)
343
+
344
+ @staticmethod
345
+ def parse_confidence_response(raw: Any) -> "dict[str, Any] | None":
346
+ """Validate and return a confidence response dict, or None if malformed.
347
+
348
+ Accepts a dict with ``score`` (int, 0–100) and ``reasons`` (list[str]).
349
+ Returns None for any type mismatch, out-of-range score, or missing keys.
350
+ """
351
+ if not isinstance(raw, dict):
352
+ return None
353
+ score = raw.get("score")
354
+ reasons = raw.get("reasons")
355
+ if not isinstance(score, int) or isinstance(score, bool):
356
+ return None
357
+ if not (0 <= score <= 100):
358
+ return None
359
+ if not isinstance(reasons, list):
360
+ return None
361
+ if not all(isinstance(r, str) for r in reasons):
362
+ return None
363
+ return {"score": score, "reasons": reasons}
364
+
365
+ @staticmethod
366
+ def _should_try_fallback(exc: ApiError) -> bool:
367
+ """Return True if *exc* is a transient error that warrants a fallback."""
368
+ from src.exceptions import ApiRateLimitError
369
+ if isinstance(exc, ApiRateLimitError):
370
+ return True
371
+ # Check HTTP status if present
372
+ status = getattr(exc, "status_code", None) or getattr(exc, "status", None)
373
+ if status in (429, 500, 503):
374
+ return True
375
+ # Fall back on status embedded in the message as a last resort
376
+ msg = str(exc)
377
+ return any(f" {code}" in msg or f"={code}" in msg for code in ("429", "500", "503"))
378
+
379
+ def _switch_to_fallback(self, error: ApiError) -> str:
380
+ """Switch to the fallback provider (session-sticky).
381
+
382
+ Returns a human-readable notification string on success.
383
+
384
+ Raises:
385
+ FatalApiError: if already on the fallback (both providers failed).
386
+ ApiError: if no fallback is configured (re-raises *error*).
387
+ """
388
+ if self._using_fallback:
389
+ raise FatalApiError(
390
+ f"Both primary and fallback providers failed. Last error: {error}"
391
+ )
392
+ fallback = self._cfg.fallback_provider
393
+ if not fallback:
394
+ raise error # propagate — no fallback configured
395
+
396
+ # Determine fallback model ID
397
+ fallback_model = self._cfg.model_id_map.get(self._model_id)
398
+ if not fallback_model:
399
+ from src.models.definitions import models_for_provider
400
+ fallback_models = models_for_provider(fallback)
401
+ if not fallback_models:
402
+ raise FatalApiError(
403
+ f"No models available for fallback provider '{fallback}'"
404
+ )
405
+ fallback_model = fallback_models[0].id
406
+
407
+ self._gdm_client = self._make_client(fallback_provider=fallback)
408
+ self._model_id = fallback_model
409
+ self._using_fallback = True
410
+ log.info(
411
+ "API fallback: switched to provider=%s model=%s (triggered by: %s)",
412
+ fallback,
413
+ fallback_model,
414
+ error,
415
+ )
416
+ return (
417
+ f"Primary API unavailable ({error}). "
418
+ f"Switched to fallback provider '{fallback}' model '{fallback_model}'."
419
+ )
420
+
421
+ # ------------------------------------------------------------------
422
+ # Escalation helper
423
+ # ------------------------------------------------------------------
424
+
425
+ def _handle_api_error_escalation(self, exc: ApiError, turn_num: int) -> None:
426
+ """Classify API error, call escalate_with_context, record in circuit breaker."""
427
+ if self._router is None and self._circuit_breaker is None:
428
+ return
429
+ from src.models.router import EscalationContext, FailureType
430
+ ecx = EscalationContext(
431
+ current_tier=getattr(self._model, "tier", ModelTier.CODER),
432
+ failure_type=FailureType.API_ERROR,
433
+ failure_detail=str(exc),
434
+ attempt_number=turn_num,
435
+ cost_spent_usd=self._cost_tracker.session_total_usd,
436
+ transcript_summary="",
437
+ )
438
+ if self._router is not None:
439
+ self._router.escalate_with_context(ecx)
440
+ if self._circuit_breaker is not None:
441
+ self._circuit_breaker.record_escalation(self._cost_tracker.session_total_usd)
442
+
443
+ # ------------------------------------------------------------------
444
+ # Public runtime-control methods
445
+ # ------------------------------------------------------------------
446
+
447
+ def set_tier(self, tier: str) -> None:
448
+ """Switch the active model tier at runtime (/model command)."""
449
+ try:
450
+ self._model = get_model(tier, self._cfg.provider)
451
+ self._model_id = self._model.id
452
+ self._gdm_client = self._make_client()
453
+ except Exception as exc:
454
+ log.warning("set_tier(%r) failed: %s", tier, exc)
455
+
456
+ def set_proxy(self, url: str, token: str) -> None:
457
+ """Enable proxy mode — all subsequent LLM calls are routed through *url*."""
458
+ self._proxy_url = url
459
+ self._proxy_token = token
460
+ self._proxy_active = True
461
+ self._gdm_client = self._make_client()
462
+ log.info("Proxy mode enabled: %s", url)
463
+
464
+ def clear_proxy(self) -> None:
465
+ """Disable proxy mode and restore direct provider calls."""
466
+ self._proxy_active = False
467
+ self._gdm_client = self._make_client()
468
+ log.info("Proxy mode disabled")
469
+
470
+ # ------------------------------------------------------------------
471
+ # Public generator
472
+ # ------------------------------------------------------------------
473
+
474
+ def run(self, user_message: str) -> Generator[AgentEvent, None, None]:
475
+ """Main loop. Yields AgentEvent stream until task is done."""
476
+ if not self._initialized:
477
+ self._try_create_task_branch(user_message)
478
+ self._ensure_initialized()
479
+
480
+ # User injection check — warn-only, structural patterns only
481
+ from src.security import check_user_injection
482
+ _user_inj = check_user_injection(user_message)
483
+ if _user_inj.is_injected:
484
+ yield AgentEvent(
485
+ EventType.WARNING,
486
+ content=(
487
+ f"⚠ User message matched structural injection pattern "
488
+ f"({_user_inj.pattern}). Message will still be processed."
489
+ ),
490
+ )
491
+
492
+ # Dynamic model tier routing — re-pick tier based on prompt complexity
493
+ # Skip router when on fallback to keep session-sticky provider/model.
494
+ if self._router is not None and not self._using_fallback:
495
+ from src.models.router import TaskContext
496
+ ctx = TaskContext(
497
+ prompt=user_message,
498
+ token_count=self._budget.used_tokens,
499
+ provider=self._cfg.provider,
500
+ )
501
+ tier = self._router.select_tier_for_turn(
502
+ ctx,
503
+ reasoning_mode=self._reasoning_mode,
504
+ )
505
+ self._model = get_model(tier, self._cfg.provider)
506
+ self._model_id = self._model.id
507
+ self._gdm_client = self._make_client()
508
+ log.debug(
509
+ "ModelRouter selected tier=%s mode=%s for prompt=%r",
510
+ tier,
511
+ self._reasoning_mode,
512
+ user_message[:60],
513
+ )
514
+ # Cost-budget guard: warn after N consecutive forced-REASONER turns
515
+ if self._reasoning_mode == "on" and tier == ModelTier.REASONER:
516
+ self._reasoning_escalation_count += 1
517
+ if self._reasoning_escalation_count > 5:
518
+ yield AgentEvent(
519
+ EventType.COST_UPDATE,
520
+ content=(
521
+ f"⚠ Reasoning mode forced ON — "
522
+ f"{self._reasoning_escalation_count} consecutive REASONER calls. "
523
+ "Use /reasoning auto to save cost."
524
+ ),
525
+ )
526
+ else:
527
+ self._reasoning_escalation_count = 0
528
+
529
+ self._files_written = [] # reset per-run file tracking
530
+
531
+ self._transcript.append(
532
+ Turn(role="user", content=user_message, tokens=count_tokens(user_message))
533
+ )
534
+ self._budget.sync_from_transcript(self._transcript)
535
+
536
+ max_turns = self._cfg.max_turns
537
+ for turn_num in range(max_turns):
538
+ # Cost guard — soft limit from config
539
+ if self._cost_tracker.exceeds(self._cfg.cost_limit_usd):
540
+ yield AgentEvent(EventType.ERROR, content="Cost limit exceeded", turn=turn_num)
541
+ yield AgentEvent(EventType.DONE, turn=turn_num)
542
+ return
543
+ # Circuit breaker guard
544
+ if self._circuit_breaker is not None and self._circuit_breaker.should_halt():
545
+ yield AgentEvent(EventType.ERROR,
546
+ content=self._circuit_breaker.halt_reason(), turn=turn_num)
547
+ yield AgentEvent(EventType.DONE, turn=turn_num)
548
+ return
549
+ # Budget enforcement — hard-stop limit from DB
550
+ if self._db is not None:
551
+ try:
552
+ self._cost_tracker.check_budget(self._db)
553
+ except BudgetExceededError as exc:
554
+ yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
555
+ yield AgentEvent(EventType.DONE, turn=turn_num)
556
+ return
557
+
558
+ self._model_turn_count += 1
559
+ if (self._model_turn_count % 5 == 0
560
+ or time.monotonic() - self._last_checkpoint_at > _WALL_CLOCK_CHECKPOINT_INTERVAL):
561
+ self._checkpoint()
562
+
563
+ # Begin event log entry for this turn
564
+ self._current_event_id = self._event_log_begin(
565
+ turn_num, user_message if turn_num == 0 else None
566
+ )
567
+
568
+ # Compress if needed BEFORE building the API payload
569
+ self._maybe_compress(turn_num)
570
+
571
+ # Drain BTW queue — inject any pending out-of-band messages as user turns
572
+ if self._db is not None and self._session_id:
573
+ try:
574
+ pending_btw = self._db.btw_dequeue_pending(self._session_id)
575
+ if pending_btw:
576
+ for btw in pending_btw:
577
+ self._transcript.append(
578
+ Turn(
579
+ role="user",
580
+ content=f"[OUT-OF-BAND NOTE]: {btw['message']}",
581
+ tokens=count_tokens(btw["message"]),
582
+ )
583
+ )
584
+ self._db.btw_mark_delivered([b["id"] for b in pending_btw])
585
+ log.debug("Injected %d BTW message(s) into transcript", len(pending_btw))
586
+ except Exception as _btw_exc: # noqa: BLE001
587
+ log.warning("BTW queue drain failed: %s", _btw_exc)
588
+
589
+ tools = self._build_tool_specs()
590
+ try:
591
+ response = self._gdm_client.complete(
592
+ self._transcript.to_messages(),
593
+ model=self._model_id,
594
+ tools=tools if tools else None,
595
+ )
596
+ except ApiError as exc:
597
+ # For 429/500/503, attempt a one-time provider fallback
598
+ if self._should_try_fallback(exc):
599
+ try:
600
+ switch_msg = self._switch_to_fallback(exc)
601
+ except FatalApiError as fatal_exc:
602
+ yield AgentEvent(EventType.ERROR, content=str(fatal_exc), turn=turn_num)
603
+ self._checkpoint()
604
+ self._flush_checkpoint_sync()
605
+ yield AgentEvent(EventType.DONE, turn=turn_num)
606
+ return
607
+ except ApiError:
608
+ # No fallback configured — propagate original error
609
+ yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
610
+ self._checkpoint()
611
+ self._flush_checkpoint_sync()
612
+ yield AgentEvent(EventType.DONE, turn=turn_num)
613
+ return
614
+ # Switched successfully — notify and retry with fallback
615
+ yield AgentEvent(EventType.THINKING, content=switch_msg, turn=turn_num)
616
+ try:
617
+ response = self._gdm_client.complete(
618
+ self._transcript.to_messages(),
619
+ model=self._model_id,
620
+ tools=tools if tools else None,
621
+ )
622
+ except ApiError as retry_exc:
623
+ yield AgentEvent(EventType.ERROR, content=str(retry_exc), turn=turn_num)
624
+ self._checkpoint()
625
+ self._flush_checkpoint_sync()
626
+ yield AgentEvent(EventType.DONE, turn=turn_num)
627
+ return
628
+ else:
629
+ self._handle_api_error_escalation(exc, turn_num)
630
+ yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
631
+ self._checkpoint()
632
+ self._flush_checkpoint_sync()
633
+ yield AgentEvent(EventType.DONE, turn=turn_num)
634
+ return
635
+
636
+ choice = response.choices[0]
637
+ msg = choice.message
638
+ finish_reason = choice.finish_reason
639
+
640
+ # Record cost
641
+ yield from self._record_cost(response, turn_num)
642
+
643
+ # --- Build ONE unified assistant turn (content + tool_calls combined) ---
644
+ raw_tool_calls: list[dict[str, Any]] | None = None
645
+ if msg.tool_calls:
646
+ raw_tool_calls = [
647
+ {
648
+ "id": tc.id,
649
+ "type": "function",
650
+ "function": {
651
+ "name": tc.function.name,
652
+ "arguments": tc.function.arguments,
653
+ },
654
+ }
655
+ for tc in msg.tool_calls
656
+ ]
657
+
658
+ asst_content = msg.content or ""
659
+ # Complete event log entry with token counts and response text
660
+ if self._db is not None and self._current_event_id:
661
+ try:
662
+ _usage = response.usage
663
+ _in_tok = getattr(_usage, "prompt_tokens", 0) or 0 if _usage else 0
664
+ _out_tok = getattr(_usage, "completion_tokens", 0) or 0 if _usage else 0
665
+ self._db.event_log_complete_turn(
666
+ self._current_event_id, asst_content,
667
+ _in_tok, _out_tok, 0,
668
+ self._cost_tracker.session_total_usd,
669
+ )
670
+ except Exception as _exc: # noqa: BLE001
671
+ log.debug("event_log_complete_turn: %s", _exc)
672
+ asst_tokens = count_tokens(
673
+ asst_content + (json.dumps(raw_tool_calls) if raw_tool_calls else "")
674
+ )
675
+ self._transcript.append(Turn(
676
+ role="assistant",
677
+ content=asst_content,
678
+ tokens=asst_tokens,
679
+ tool_calls=raw_tool_calls,
680
+ ))
681
+ self._budget.sync_from_transcript(self._transcript)
682
+
683
+ # Emit text response
684
+ if asst_content:
685
+ yield AgentEvent(EventType.RESPONSE, content=asst_content, turn=turn_num)
686
+
687
+ # Execute tool calls
688
+ if raw_tool_calls:
689
+ # Guard: finish_reason=tool_calls with empty list → infinite loop
690
+ if not msg.tool_calls:
691
+ yield AgentEvent(EventType.ERROR, turn=turn_num,
692
+ content="finish_reason=tool_calls but no tool_calls in response")
693
+ yield AgentEvent(EventType.DONE, turn=turn_num)
694
+ return
695
+
696
+ yield from self._execute_tool_calls(msg.tool_calls, turn_num)
697
+ continue # loop back to model with tool results
698
+
699
+ # Natural completion
700
+ if finish_reason in ("stop", "end_turn", None):
701
+ # Self-critique before finishing if any files were written this run
702
+ if self._files_written:
703
+ yield from self._run_self_critique(turn_num)
704
+ yield from self._maybe_review_gate(turn_num)
705
+ self._checkpoint()
706
+ self._flush_checkpoint_sync()
707
+ if self._db is not None and self._session_id:
708
+ try:
709
+ self._db.session_set_status(self._session_id, "complete")
710
+ except Exception as exc: # noqa: BLE001
711
+ log.debug("session_set_status complete: %s", exc)
712
+ self._git_checkpoint_on_completion()
713
+ yield AgentEvent(EventType.DONE, turn=turn_num)
714
+ return
715
+
716
+ # Unexpected finish reason
717
+ yield AgentEvent(EventType.ERROR, turn=turn_num,
718
+ content=f"Unexpected finish_reason: {finish_reason!r}")
719
+ self._checkpoint()
720
+ self._flush_checkpoint_sync()
721
+ yield AgentEvent(EventType.DONE, turn=turn_num)
722
+ return
723
+
724
+ yield AgentEvent(EventType.ERROR, turn=max_turns - 1,
725
+ content=f"Max turns ({max_turns}) reached without completion")
726
+ self._checkpoint()
727
+ self._flush_checkpoint_sync()
728
+ yield AgentEvent(EventType.DONE, turn=max_turns - 1)
729
+
730
+ # ------------------------------------------------------------------
731
+ # Private helpers
732
+ # ------------------------------------------------------------------
733
+
734
+ def _try_create_task_branch(self, user_message: str = "") -> None:
735
+ """Attempt to create a gdm task branch for this session.
736
+
737
+ Called once before the first run() turn. Silently no-ops if:
738
+ - project_root is not a git repo
739
+ - git is unavailable
740
+ - we're already on a gdm/* branch
741
+ - the working tree is dirty (warn only — don't block)
742
+ """
743
+ try:
744
+ from src.git_workflow import GitWorkflow
745
+ wf = GitWorkflow(self._cfg.project_root)
746
+ if not wf.is_git_repo():
747
+ return
748
+ current = wf.current_branch()
749
+ if current.startswith("gdm/"):
750
+ # Already on a task branch (resumed session)
751
+ self._git_wf = wf
752
+ return
753
+ if not wf.is_clean():
754
+ # Dirty tree: don't create a branch (rollback would be unsafe).
755
+ # Still set _git_wf so /diff and /commit work, just skip branch.
756
+ log.info(
757
+ "git: working tree has uncommitted changes — skipping task branch creation"
758
+ )
759
+ self._git_wf = wf
760
+ return
761
+ # Derive slug from session_id + first few words of message
762
+ slug_src = (user_message[:40] if user_message else self._session_id[:8])
763
+ wf.create_task_branch(slug_src)
764
+ self._git_wf = wf
765
+ log.info("git: task branch created from '%s'", current)
766
+ except Exception as exc: # noqa: BLE001
767
+ log.debug("Git task branch skipped (non-git or no commits): %s", exc)
768
+
769
+ def _ensure_initialized(self) -> None:
770
+ """Inject system prompt on first run (idempotent)."""
771
+ if self._initialized:
772
+ return
773
+ from src.tools import REGISTRY
774
+ prompt = build_system_prompt(
775
+ self._cfg,
776
+ REGISTRY.all_tools(),
777
+ db=self._db,
778
+ project_id=self._project_id,
779
+ )
780
+ tokens = count_tokens(prompt)
781
+ self._transcript.prepend_system(prompt, tokens)
782
+ self._budget.sync_from_transcript(self._transcript)
783
+ self._initialized = True
784
+ if self._db is not None and self._session_id:
785
+ try:
786
+ self._db.session_set_status(self._session_id, "active")
787
+ except Exception as exc: # noqa: BLE001
788
+ log.debug("session_set_status active: %s", exc)
789
+
790
+ def _build_tool_specs(self) -> list[dict[str, Any]]:
791
+ """Return tool specs for the API call.
792
+
793
+ For Grok: replace any registry web_search with the native Grok spec
794
+ (same name — provider handles it differently natively).
795
+ """
796
+ specs = self._orchestrator.get_permitted_specs()
797
+ if self._cfg.provider == Provider.GROK:
798
+ # Drop registry web_search (if present) and inject native Grok spec
799
+ specs = [s for s in specs if s.get("function", {}).get("name") != "web_search"]
800
+ specs = specs + [_GROK_WEB_SEARCH_SPEC]
801
+ return specs
802
+
803
+ def _record_cost(
804
+ self, response: Any, turn_num: int
805
+ ) -> Generator[AgentEvent, None, None]:
806
+ usage = response.usage
807
+ if usage is None:
808
+ return
809
+ in_tok = getattr(usage, "prompt_tokens", 0) or 0
810
+ out_tok = getattr(usage, "completion_tokens", 0) or 0
811
+ self._cost_tracker.record(
812
+ tier=self._model.tier, # type: ignore[arg-type]
813
+ input_tokens=in_tok,
814
+ output_tokens=out_tok,
815
+ )
816
+ _record_usage(
817
+ session_id=self._session_id or "unknown",
818
+ actor_id=getattr(self._cfg, "actor_id", None) or "unknown",
819
+ model=self._model_id,
820
+ prompt_tokens=in_tok,
821
+ completion_tokens=out_tok,
822
+ tool_calls=0,
823
+ )
824
+ yield AgentEvent(EventType.COST_UPDATE,
825
+ cost_usd=self._cost_tracker.session_total_usd,
826
+ turn=turn_num)
827
+
828
+ def _execute_tool_calls(
829
+ self, tool_calls: list[Any], turn_num: int
830
+ ) -> Generator[AgentEvent, None, None]:
831
+ """Execute each tool call and append results to transcript."""
832
+ from pathlib import Path as _Path
833
+ from src._internal.constants import _WRITE_TOOLS # noqa: PLC0415
834
+ for call_index, tc in enumerate(tool_calls):
835
+ tool_name = tc.function.name
836
+ tool_call_id = tc.id
837
+
838
+ try:
839
+ args: dict[str, Any] = json.loads(tc.function.arguments or "{}")
840
+ except json.JSONDecodeError:
841
+ args = {}
842
+ log.warning("Malformed JSON args for %r — using {}", tool_name)
843
+
844
+ yield AgentEvent(EventType.TOOL_CALL, tool_name=tool_name,
845
+ tool_call_id=tool_call_id, args=args, turn=turn_num)
846
+
847
+ # Pre-write: capture regression baseline before the edit
848
+ is_write = tool_name in _WRITE_TOOLS
849
+ baseline = None
850
+ pre_write_path: str = ""
851
+ if is_write and self._regression_guard is not None:
852
+ pre_write_path = (
853
+ args.get("path") or args.get("file_path") or ""
854
+ )
855
+ if pre_write_path:
856
+ try:
857
+ baseline = self._regression_guard.capture_baseline(
858
+ _Path(pre_write_path)
859
+ )
860
+ except Exception as _rg_exc: # noqa: BLE001
861
+ log.debug("regression baseline capture failed: %s", _rg_exc)
862
+
863
+ # warn-only verification graph precondition gate
864
+ if is_write and getattr(self, "_vg", None) is not None:
865
+ _vg_p = args.get("path") or args.get("file_path") or ""
866
+ if _vg_p and self._session_id:
867
+ try:
868
+ _unsafe = self._vg.check_edit_preconditions(self._session_id, _vg_p)
869
+ if _unsafe:
870
+ log.warning("[vg] prior edits lack verification for %s: %s", _vg_p, _unsafe)
871
+ except Exception as _vge: # noqa: BLE001
872
+ log.debug("vg precondition check: %s", _vge)
873
+ tool_result = self._orchestrator.execute(tool_name, args,
874
+ model_id=self._model_id)
875
+
876
+ yield AgentEvent(EventType.TOOL_RESULT, tool_name=tool_name,
877
+ tool_call_id=tool_call_id, result=tool_result,
878
+ turn=turn_num)
879
+
880
+ # Log tool call to event log
881
+ if self._db is not None and self._current_event_id:
882
+ try:
883
+ self._db.event_log_record_tool_call(
884
+ self._current_event_id, call_index,
885
+ tool_name, tool_call_id, args,
886
+ result={"output": (tool_result.output or "")[:500],
887
+ "error": tool_result.error},
888
+ ok=tool_result.error is None,
889
+ error=tool_result.error,
890
+ )
891
+ except Exception as _exc: # noqa: BLE001
892
+ log.debug("event_log_record_tool_call: %s", _exc)
893
+ # Also count premium tool calls for budget tracking
894
+ self._cost_tracker.record_tool_call(tool_name)
895
+
896
+ result_content = tool_result.as_message_content()
897
+
898
+ # Post-write quality checks for successful file writes
899
+ if is_write and tool_result.error is None:
900
+ # Prefer metadata["path"] (resolved), fall back to args
901
+ path_arg = (
902
+ (tool_result.metadata or {}).get("path")
903
+ or args.get("path")
904
+ or args.get("file_path")
905
+ or ""
906
+ )
907
+ if path_arg:
908
+ self._files_written.append(_Path(path_arg))
909
+ # Log patch to event log
910
+ if self._db is not None and self._current_event_id:
911
+ try:
912
+ patch_text = args.get("content") or args.get("new_content") or ""
913
+ self._db.event_log_record_patch(
914
+ self._current_event_id, path_arg,
915
+ str(patch_text)[:10_000],
916
+ )
917
+ except Exception as _exc: # noqa: BLE001
918
+ log.debug("event_log_record_patch: %s", _exc)
919
+ quality_note = self._auto_quality(path_arg)
920
+ if quality_note:
921
+ result_content += f"\n\n{quality_note}"
922
+
923
+ # Log to continuous memory (non-fatal); use getattr for backward compat
924
+ # with tests that use AgentLoop.__new__ without calling __init__.
925
+ _cm = getattr(self, "_continuous_memory", None)
926
+ if _cm is not None:
927
+ try:
928
+ _cm.log_decision(
929
+ self._session_id,
930
+ path_arg,
931
+ tool_name,
932
+ path_arg,
933
+ turn_num,
934
+ )
935
+ _cm.update_hotspot(
936
+ self._session_id, path_arg, self._project_id
937
+ )
938
+ except Exception as _cm_exc: # noqa: BLE001
939
+ log.warning("ContinuousMemory update failed: %s", _cm_exc)
940
+
941
+ # Post-write regression check
942
+ if baseline is not None and self._regression_guard is not None:
943
+ try:
944
+ regression_result = self._regression_guard.verify_after_edit(
945
+ _Path(pre_write_path)
946
+ )
947
+ if regression_result.new_failures:
948
+ fix_note = self._enter_fix_loop(regression_result, path_arg)
949
+ if fix_note:
950
+ result_content += f"\n\n{fix_note}"
951
+ except Exception as _rg_exc: # noqa: BLE001
952
+ log.debug("regression verify failed: %s", _rg_exc)
953
+
954
+ # Build the tool result turn, marking write-tool results as non_droppable
955
+ # so the compressor never drops or truncates unverified file writes.
956
+ path_for_nd = args.get("path") or args.get("file_path") or "?"
957
+ tool_turn = Turn(
958
+ role="tool",
959
+ content=result_content,
960
+ tokens=count_tokens(result_content),
961
+ tool_name=tool_name,
962
+ tool_call_id=tool_call_id,
963
+ non_droppable=is_write,
964
+ non_droppable_reason=(
965
+ f"unverified write: {path_for_nd}" if is_write else ""
966
+ ),
967
+ )
968
+ self._transcript.append(tool_turn)
969
+
970
+ self._budget.sync_from_transcript(self._transcript)
971
+
972
+ def _auto_quality(self, path_str: str) -> str:
973
+ """Run lint, type-check, security scan, and complexity check after a write.
974
+
975
+ Runs all applicable tools in parallel via ThreadPoolExecutor.
976
+ Returns a formatted warning string if issues are found, '' if all clean.
977
+ """
978
+ from pathlib import Path as _Path
979
+ from src.tools.quality_tools import (
980
+ ComplexityCheckTool, LintFileTool, SecurityScanTool, TypeCheckTool,
981
+ )
982
+
983
+ is_py = _Path(path_str).suffix.lower() == ".py"
984
+ is_security = any(p in path_str.lower() for p in _SECURITY_PATH_PATTERNS)
985
+
986
+ tasks: list[tuple[str, Any, dict[str, Any]]] = []
987
+ tasks.append(("lint", LintFileTool(), {"path": path_str}))
988
+ if is_py:
989
+ tasks.append(("typecheck", TypeCheckTool(), {"files": [path_str]}))
990
+ if is_security:
991
+ tasks.append(("security", SecurityScanTool(), {"path": path_str}))
992
+ tasks.append(("complexity", ComplexityCheckTool(), {"path": path_str}))
993
+
994
+ issues: list[str] = []
995
+ try:
996
+ with _cf.ThreadPoolExecutor(max_workers=len(tasks)) as pool:
997
+ futures = {
998
+ pool.submit(tool.execute, params): label
999
+ for label, tool, params in tasks
1000
+ }
1001
+ for future, label in futures.items():
1002
+ try:
1003
+ result = future.result(timeout=60)
1004
+ if label == "complexity":
1005
+ if result.output and "High complexity" in result.output:
1006
+ issues.append(f"Complexity: {result.output}")
1007
+ elif result.error:
1008
+ issues.append(f"{label.capitalize()}: {result.error}")
1009
+ except Exception as exc: # noqa: BLE001
1010
+ log.debug("Quality tool %s raised: %s", label, exc)
1011
+ except Exception as exc: # noqa: BLE001
1012
+ log.debug("_auto_quality pool failed for %s: %s", path_str, exc)
1013
+ return ""
1014
+
1015
+ if not issues:
1016
+ return ""
1017
+ joined = "\n".join(issues)
1018
+ return (
1019
+ f"⚠ Auto-quality issues in {path_str}:\n{joined}\n"
1020
+ "Fix these before marking the task complete."
1021
+ )
1022
+
1023
+ def _auto_lint(self, path_str: str) -> str:
1024
+ """Backward-compatible alias for _auto_quality."""
1025
+ return self._auto_quality(path_str)
1026
+
1027
+ def _enter_fix_loop(self, regression_result: Any, file_path: str) -> str:
1028
+ """Inject regression failure info into the transcript for the model to fix.
1029
+
1030
+ Returns a warning string to append to the tool result content.
1031
+ If rollback is recommended (2+ consecutive failures), note that too.
1032
+ """
1033
+ failures = "\n".join(f" ✗ {t}" for t in regression_result.new_failures)
1034
+ msg_lines = [
1035
+ f"⚠ Regression detected after editing {file_path}:",
1036
+ failures,
1037
+ ]
1038
+ if regression_result.coverage_drop is not None:
1039
+ msg_lines.append(
1040
+ f" Coverage dropped by {regression_result.coverage_drop:.1f}pp"
1041
+ )
1042
+ if regression_result.rollback_recommended:
1043
+ msg_lines.append(
1044
+ " ⛔ Rollback recommended — 2+ consecutive failures on this file."
1045
+ " Consider reverting your last edit."
1046
+ )
1047
+ else:
1048
+ msg_lines.append(
1049
+ " Fix the failing tests before marking the task complete."
1050
+ )
1051
+ log.warning("Regression detected in %s: %s", file_path, regression_result.new_failures)
1052
+ return "\n".join(msg_lines)
1053
+
1054
+ def _git_checkpoint_on_completion(self) -> None:
1055
+ """Create a git checkpoint commit when a task completes naturally.
1056
+
1057
+ Called once at the end of a successful run(), not after each write.
1058
+ Uses 'git add -A' — caller ensures this is safe (task completion context).
1059
+ """
1060
+ if self._git_wf is None or not self._files_written:
1061
+ return
1062
+ try:
1063
+ files = ", ".join(str(p.name) for p in self._files_written[:3])
1064
+ if len(self._files_written) > 3:
1065
+ files += f" +{len(self._files_written) - 3} more"
1066
+ self._git_wf.checkpoint(
1067
+ f"task: wrote {files}",
1068
+ turn_id=self._session_id,
1069
+ files=list(self._files_written),
1070
+ )
1071
+ log.info("git: checkpoint commit after task completion")
1072
+ except Exception as exc: # noqa: BLE001
1073
+ log.debug("Git checkpoint on completion skipped: %s", exc)
1074
+
1075
+ def _maybe_compress(self, turn_num: int = 0) -> int:
1076
+ """Compress context when budget is high, using LLM digest if possible.
1077
+
1078
+ Triggered either by 5-turn cadence (every 5th turn) or when the budget
1079
+ is near the limit (>= 80 %). A re-compression guard prevents firing
1080
+ twice within fewer than 5 turns.
1081
+
1082
+ Tries SessionCompressor first (smart LLM digest). Falls back to
1083
+ simple eviction if compression fails or cfg has no API key.
1084
+ Returns count of turns removed/replaced.
1085
+ """
1086
+ near_limit = self._budget.is_near_limit()
1087
+ proactive = (turn_num % 5 == 0)
1088
+ if not (near_limit or proactive):
1089
+ return 0
1090
+ # Re-compression guard: don't re-fire if we just compressed
1091
+ if turn_num - self._last_compressed_at_turn < 5:
1092
+ return 0
1093
+ if not (near_limit or self._budget.needs_compression):
1094
+ return 0
1095
+
1096
+ # Gather non-system turns available for compression (oldest half)
1097
+ non_sys = [t for t in self._transcript._turns if t.role != "system"]
1098
+ if len(non_sys) < 4:
1099
+ # Not enough history to compress — just evict
1100
+ evicted = self._transcript.maybe_evict()
1101
+ self._budget.sync_from_transcript(self._transcript)
1102
+ return evicted
1103
+
1104
+ half = max(2, len(non_sys) // 2)
1105
+ candidates = non_sys[:half]
1106
+ candidate_msgs = [t.to_compress_dict() for t in candidates]
1107
+
1108
+ try:
1109
+ from src.memory.compressor import SessionCompressor
1110
+ compressor = SessionCompressor(self._cfg)
1111
+ result = compressor.compress(candidate_msgs, task_description="")
1112
+ if result.digest and result.tokens_freed > 0:
1113
+ # Remove the compressed turns by index (avoids id() reuse after GC)
1114
+ all_turns = list(self._transcript._turns)
1115
+ compressed_indices = frozenset(
1116
+ i for i, t in enumerate(all_turns) if t in candidates
1117
+ )
1118
+ remaining = [t for i, t in enumerate(all_turns) if i not in compressed_indices]
1119
+ from collections import deque
1120
+ self._transcript._turns = deque(remaining)
1121
+ self._transcript._total_tokens = sum(
1122
+ t.tokens for t in self._transcript._turns
1123
+ )
1124
+ # Inject digest as a system-prefixed turn after the real system turn
1125
+ digest_turn = Turn(
1126
+ role="system",
1127
+ content=result.digest,
1128
+ tokens=count_tokens(result.digest),
1129
+ )
1130
+ turns_list = list(self._transcript._turns)
1131
+ insert_pos = 1 if (turns_list and turns_list[0].role == "system") else 0
1132
+ turns_list.insert(insert_pos, digest_turn)
1133
+ from collections import deque
1134
+ self._transcript._turns = deque(turns_list)
1135
+ self._transcript._total_tokens += digest_turn.tokens
1136
+ self._budget.sync_from_transcript(self._transcript)
1137
+ self._last_compressed_at_turn = turn_num
1138
+ log.info(
1139
+ "Compressed %d turns into digest, freed ~%d tokens",
1140
+ result.turns_compressed, result.tokens_freed,
1141
+ )
1142
+ return result.turns_compressed
1143
+ except Exception as exc: # noqa: BLE001
1144
+ log.warning("SessionCompressor failed, falling back to eviction: %s", exc)
1145
+
1146
+ # Fallback: simple eviction
1147
+ evicted = self._transcript.maybe_evict()
1148
+ self._budget.sync_from_transcript(self._transcript)
1149
+ if evicted:
1150
+ log.info("Evicted %d turns (fallback compression)", evicted)
1151
+ return evicted
1152
+
1153
+ def _checkpoint(self) -> None:
1154
+ """Submit a non-blocking checkpoint write to the background executor.
1155
+
1156
+ Takes a snapshot of non-system transcript turns and submits the DB
1157
+ write to _CHECKPOINT_EXECUTOR (max_workers=1) so the agent thread is
1158
+ never blocked. Any pending (not-yet-started) future is cancelled
1159
+ first — the newer snapshot supersedes it.
1160
+
1161
+ Silently no-ops if db or session_id are unavailable so tests without
1162
+ a DB are unaffected.
1163
+ """
1164
+ if self._db is None or not self._session_id:
1165
+ return
1166
+ turns_snapshot = [
1167
+ {
1168
+ "role": t.role,
1169
+ "content": t.content,
1170
+ "tokens": t.tokens,
1171
+ "tool_name": t.tool_name,
1172
+ "tool_call_id": t.tool_call_id,
1173
+ "tool_calls": t.tool_calls,
1174
+ }
1175
+ for t in self._transcript.to_turns()
1176
+ if t.role != "system" # system prompt is rebuilt on restore
1177
+ ]
1178
+ db = self._db
1179
+ session_id = self._session_id
1180
+
1181
+ def _write() -> None:
1182
+ try:
1183
+ db.memory_save_turns(session_id, turns_snapshot)
1184
+ log.debug(
1185
+ "Checkpointed %d turns for session %s",
1186
+ len(turns_snapshot), session_id,
1187
+ )
1188
+ except Exception as exc: # noqa: BLE001
1189
+ log.warning("Checkpoint failed (non-fatal): %s", exc)
1190
+
1191
+ # Cancel any pending (not-yet-started) future — newer snapshot wins
1192
+ if self._checkpoint_future is not None and not self._checkpoint_future.done():
1193
+ self._checkpoint_future.cancel()
1194
+ self._checkpoint_future = _CHECKPOINT_EXECUTOR.submit(_write)
1195
+ self._last_checkpoint_at = time.monotonic()
1196
+
1197
+ def _flush_checkpoint_sync(self, timeout: float = 10.0) -> None:
1198
+ """Block until the pending async checkpoint write completes.
1199
+
1200
+ Call on clean exit and at all terminal run() returns to ensure the last
1201
+ transcript snapshot is durable before the process moves on.
1202
+ """
1203
+ if self._checkpoint_future is not None:
1204
+ try:
1205
+ self._checkpoint_future.result(timeout=timeout)
1206
+ except _cf.TimeoutError:
1207
+ log.warning(
1208
+ "Checkpoint flush timed out after %.1fs — last snapshot may not persist",
1209
+ timeout,
1210
+ )
1211
+ except Exception as exc: # noqa: BLE001
1212
+ log.warning("Checkpoint flush error: %s", exc)
1213
+ finally:
1214
+ self._checkpoint_future = None
1215
+
1216
+ def restore_from_db(self, session_id: str | None = None) -> int:
1217
+ """Reload checkpointed transcript turns from the DB.
1218
+
1219
+ Called on /resume to recover from a crash. Replaces all non-system
1220
+ turns in the transcript with the last saved checkpoint.
1221
+
1222
+ Args:
1223
+ session_id: Optional session to restore from. Defaults to the
1224
+ current ``self._session_id`` if not given.
1225
+
1226
+ Returns:
1227
+ The number of turns reloaded; 0 if no checkpoint found.
1228
+ """
1229
+ target_session = session_id or self._session_id
1230
+ if self._db is None or not target_session:
1231
+ return 0
1232
+ try:
1233
+ # 24-hour staleness guard
1234
+ sess_row = self._db.execute_one(
1235
+ "SELECT updated_at FROM sessions WHERE session_id = ?",
1236
+ (target_session,),
1237
+ )
1238
+ if sess_row and sess_row["updated_at"]:
1239
+ from datetime import datetime, timedelta, timezone
1240
+ try:
1241
+ updated_at = datetime.fromisoformat(
1242
+ str(sess_row["updated_at"]).replace("Z", "+00:00")
1243
+ )
1244
+ age = datetime.now(timezone.utc) - updated_at.astimezone(timezone.utc)
1245
+ if age > timedelta(hours=24):
1246
+ log.warning(
1247
+ "⚠ Session %s is over 24 hours old — context may be stale."
1248
+ " Continuing anyway.",
1249
+ target_session[:8],
1250
+ )
1251
+ except (ValueError, AttributeError):
1252
+ pass
1253
+
1254
+ rows = self._db.memory_load_turns(target_session)
1255
+ if not rows:
1256
+ return 0
1257
+ from collections import deque
1258
+ system_turns = [t for t in self._transcript.to_turns() if t.role == "system"]
1259
+ self._transcript._turns = deque(system_turns)
1260
+ self._transcript._total_tokens = sum(t.tokens for t in system_turns)
1261
+ for row in rows:
1262
+ turn = Turn(
1263
+ role=row["role"],
1264
+ content=row.get("content") or "",
1265
+ tokens=row.get("tokens") or 0,
1266
+ tool_name=row.get("tool_name"),
1267
+ tool_call_id=row.get("tool_call_id"),
1268
+ tool_calls=row.get("tool_calls"),
1269
+ )
1270
+ self._transcript.append(turn)
1271
+ self._budget.sync_from_transcript(self._transcript)
1272
+ log.info(
1273
+ "Restored %d turns from DB for session %s",
1274
+ len(rows), target_session,
1275
+ )
1276
+ return len(rows)
1277
+ except Exception as exc: # noqa: BLE001
1278
+ log.warning("restore_from_db failed (non-fatal): %s", exc)
1279
+ return 0
1280
+
1281
+ def _event_log_begin(self, turn_num: int, user_message: str | None = None) -> str | None:
1282
+ """Insert a session_events row for this turn; return event_id or None."""
1283
+ if self._db is None or not self._session_id:
1284
+ return None
1285
+ try:
1286
+ return self._db.event_log_begin_turn(
1287
+ self._session_id,
1288
+ self._model_turn_count,
1289
+ self._model_id,
1290
+ self._cfg.provider,
1291
+ getattr(self._model, "tier", "coder"),
1292
+ user_message=user_message,
1293
+ )
1294
+ except Exception as exc: # noqa: BLE001
1295
+ log.debug("event_log_begin_turn: %s", exc)
1296
+ return None
1297
+
1298
+ def _run_self_critique(
1299
+ self, turn_num: int
1300
+ ) -> Generator[AgentEvent, None, None]:
1301
+ """Fire a cheap fast-reasoning self-critique before DONE is emitted.
1302
+
1303
+ Uses the Coder model (not Reasoner) to keep cost near zero.
1304
+ The critique is surfaced as a THINKING event so the user can see it
1305
+ in verbose mode without it cluttering the main output.
1306
+ """
1307
+ if not self._files_written:
1308
+ return
1309
+
1310
+ file_list = ", ".join(str(p) for p in self._files_written[:5])
1311
+ critique_prompt = (
1312
+ f"You just modified: {file_list}.\n"
1313
+ "Briefly critique the changes you made:\n"
1314
+ "- Any logic errors or edge cases missed?\n"
1315
+ "- Any security or correctness concerns?\n"
1316
+ "- Is the code consistent with the surrounding style?\n"
1317
+ "Be concise — 3–5 sentences max."
1318
+ )
1319
+ try:
1320
+ response = self._gdm_client.complete(
1321
+ [
1322
+ *self._transcript.to_messages()[-4:],
1323
+ {"role": "user", "content": critique_prompt},
1324
+ ],
1325
+ model=self._model_id,
1326
+ max_tokens=300,
1327
+ )
1328
+ critique = (response.choices[0].message.content or "").strip()
1329
+ if critique:
1330
+ yield AgentEvent(EventType.THINKING, content=f"[self-critique] {critique}",
1331
+ turn=turn_num)
1332
+ except Exception as exc: # noqa: BLE001
1333
+ log.debug("Self-critique skipped: %s", exc)
1334
+
1335
+ def _maybe_review_gate(
1336
+ self, turn_num: int
1337
+ ) -> Generator[AgentEvent, None, None]:
1338
+ """Auto-trigger ReviewGate if written files are security-sensitive."""
1339
+ if not self._files_written:
1340
+ return
1341
+ try:
1342
+ from src.agent.review_gate import ReviewGate, ReviewTrigger
1343
+ trigger = ReviewTrigger()
1344
+ if not trigger.should_review(self._files_written):
1345
+ return
1346
+ reason = trigger.classify(self._files_written)
1347
+ gate = ReviewGate(cfg=self._cfg)
1348
+ # Collect actual diff content (fixes empty-string bug)
1349
+ from src.agent.review_gate import _collect_diff
1350
+ actual_diff = _collect_diff(
1351
+ self._files_written, self._cfg.project_root
1352
+ ) or "(diff unavailable)"
1353
+ gate_result = gate.review(
1354
+ files_changed=self._files_written,
1355
+ diff_text=actual_diff,
1356
+ trigger_reason=reason,
1357
+ )
1358
+ if gate_result.blocks_merge:
1359
+ findings_text = "\n".join(
1360
+ f"- [{f.severity.value.upper()}] {f.message}"
1361
+ for f in gate_result.report.findings
1362
+ )
1363
+ yield AgentEvent(
1364
+ EventType.THINKING,
1365
+ content=f"[review-gate] ⚠ Review flagged issues:\n{findings_text}",
1366
+ turn=turn_num,
1367
+ )
1368
+ else:
1369
+ yield AgentEvent(
1370
+ EventType.THINKING,
1371
+ content=f"[review-gate] ✅ Review passed ({reason})",
1372
+ turn=turn_num,
1373
+ )
1374
+ except Exception as exc: # noqa: BLE001
1375
+ log.debug("ReviewGate skipped: %s", exc)
1376
+
1377
+
1378
+ # ---------------------------------------------------------------------------
1379
+ # Autonomy audit log (module-level for testability)
1380
+ # ---------------------------------------------------------------------------
1381
+
1382
+
1383
+ def write_autonomy_audit(
1384
+ db_conn: Any,
1385
+ session_id: str,
1386
+ level: int,
1387
+ action: str,
1388
+ details: dict, # type: ignore[type-arg]
1389
+ checkpoint_id: str | None = None,
1390
+ ) -> None:
1391
+ """Write an audit entry for L3+ autonomy actions.
1392
+
1393
+ Silently skips levels below 3 — no-op by design.
1394
+ """
1395
+ if level < 3:
1396
+ return
1397
+ import json
1398
+ import time
1399
+
1400
+ db_conn.execute(
1401
+ "CREATE TABLE IF NOT EXISTS autonomy_audit "
1402
+ "(session_id TEXT, timestamp REAL, level INTEGER, action TEXT, details TEXT, checkpoint_id TEXT)",
1403
+ )
1404
+ db_conn.execute(
1405
+ "INSERT INTO autonomy_audit "
1406
+ "(session_id, timestamp, level, action, details, checkpoint_id) VALUES (?,?,?,?,?,?)",
1407
+ (session_id, time.time(), level, action, json.dumps(details), checkpoint_id),
1408
+ )
1409
+ db_conn.commit()
1410
+