gdmcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdmcode-0.1.0.dist-info/METADATA +240 -0
- gdmcode-0.1.0.dist-info/RECORD +131 -0
- gdmcode-0.1.0.dist-info/WHEEL +4 -0
- gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
- src/__init__.py +1 -0
- src/_internal/__init__.py +0 -0
- src/_internal/constants.py +244 -0
- src/_internal/domain_skills.py +339 -0
- src/agent/__init__.py +0 -0
- src/agent/commit_classifier.py +91 -0
- src/agent/context_budget.py +391 -0
- src/agent/daemon.py +681 -0
- src/agent/dag_validator.py +153 -0
- src/agent/debug_loop.py +473 -0
- src/agent/impact_analyzer.py +149 -0
- src/agent/impact_graph.py +117 -0
- src/agent/loop.py +1410 -0
- src/agent/orchestrator.py +141 -0
- src/agent/regression_guard.py +251 -0
- src/agent/review_gate.py +648 -0
- src/agent/risk_scorer.py +169 -0
- src/agent/self_healing.py +145 -0
- src/agent/smart_test_selector.py +89 -0
- src/agent/system_prompt.py +226 -0
- src/agent/task_tracker.py +320 -0
- src/agent/test_validator.py +210 -0
- src/agent/tool_orchestrator.py +402 -0
- src/agent/transcript.py +230 -0
- src/agent/verification_loop.py +133 -0
- src/agent/work_director.py +136 -0
- src/agent/worktree_manager.py +53 -0
- src/artifacts/__init__.py +16 -0
- src/artifacts/artifact_store.py +456 -0
- src/artifacts/verification_graph.py +75 -0
- src/auth.py +411 -0
- src/cli.py +1290 -0
- src/commands.py +1398 -0
- src/config.py +762 -0
- src/cost_tracker.py +348 -0
- src/db/__init__.py +4 -0
- src/db/migrations.py +337 -0
- src/enterprise/__init__.py +3 -0
- src/enterprise/audit_log.py +182 -0
- src/enterprise/identity.py +90 -0
- src/enterprise/rbac.py +100 -0
- src/enterprise/team_config.py +125 -0
- src/enterprise/usage_analytics.py +261 -0
- src/exceptions.py +207 -0
- src/git_workflow.py +651 -0
- src/integrations/__init__.py +6 -0
- src/integrations/github_actions.py +106 -0
- src/integrations/mcp_server.py +333 -0
- src/integrations/sentry_integration.py +100 -0
- src/integrations/sentry_server.py +82 -0
- src/integrations/webhook_security.py +19 -0
- src/main.py +27 -0
- src/memory/__init__.py +0 -0
- src/memory/code_index.py +376 -0
- src/memory/compressor.py +378 -0
- src/memory/context_memory.py +135 -0
- src/memory/continuous_memory.py +234 -0
- src/memory/conventions.py +495 -0
- src/memory/db.py +1119 -0
- src/memory/document_index.py +205 -0
- src/memory/file_cache.py +128 -0
- src/memory/project_scanner.py +178 -0
- src/memory/session_store.py +201 -0
- src/models/__init__.py +0 -0
- src/models/client.py +715 -0
- src/models/definitions.py +459 -0
- src/models/router.py +418 -0
- src/models/schemas.py +389 -0
- src/permissions.py +294 -0
- src/remote/__init__.py +5 -0
- src/remote/command_filter.py +33 -0
- src/remote/models.py +31 -0
- src/remote/permission_handler.py +79 -0
- src/remote/phone_ui.py +48 -0
- src/remote/protocol.py +59 -0
- src/remote/qr.py +65 -0
- src/remote/server.py +586 -0
- src/remote/token_manager.py +61 -0
- src/remote/tunnel.py +212 -0
- src/repl.py +475 -0
- src/runtime/__init__.py +1 -0
- src/runtime/branch_farm.py +372 -0
- src/runtime/replay.py +351 -0
- src/sandbox/__init__.py +2 -0
- src/sandbox/hermetic.py +214 -0
- src/sandbox/policy.py +44 -0
- src/sdk/__init__.py +3 -0
- src/sdk/plugin_base.py +39 -0
- src/sdk/plugin_host.py +100 -0
- src/sdk/plugin_loader.py +101 -0
- src/security.py +409 -0
- src/server/__init__.py +7 -0
- src/server/bridge.py +427 -0
- src/server/bridge_cli.py +103 -0
- src/server/bridge_client.py +170 -0
- src/server/protocol_version.py +103 -0
- src/session/__init__.py +10 -0
- src/session/event_fanout.py +46 -0
- src/session/input_broker.py +38 -0
- src/session/permission_bridge.py +100 -0
- src/tools/__init__.py +160 -0
- src/tools/_atomic.py +72 -0
- src/tools/agent_tools.py +423 -0
- src/tools/ask_user_tool.py +83 -0
- src/tools/bash_tool.py +384 -0
- src/tools/browser_tool.py +352 -0
- src/tools/browser_tools.py +179 -0
- src/tools/dep_tools.py +210 -0
- src/tools/document_reader.py +167 -0
- src/tools/document_tool.py +240 -0
- src/tools/document_writer.py +171 -0
- src/tools/impact_tools.py +240 -0
- src/tools/playwright_tool.py +172 -0
- src/tools/quality_tools.py +366 -0
- src/tools/read_tools.py +318 -0
- src/tools/result_cache.py +157 -0
- src/tools/search_tools.py +310 -0
- src/tools/shell_tools.py +311 -0
- src/tools/write_tools.py +337 -0
- src/voice/__init__.py +25 -0
- src/voice/audio_capture.py +92 -0
- src/voice/audio_playback.py +68 -0
- src/voice/errors.py +14 -0
- src/voice/models.py +35 -0
- src/voice/providers.py +143 -0
- src/voice/vad.py +55 -0
- src/voice/voice_loop.py +156 -0
src/agent/loop.py
ADDED
|
@@ -0,0 +1,1410 @@
|
|
|
1
|
+
"""Agent event loop — the generator-based heart of gdm code.
|
|
2
|
+
|
|
3
|
+
Drives the model-tool-model cycle. Yields AgentEvents so callers (REPL,
|
|
4
|
+
daemon, tests) can react to each step without coupling to the loop internals.
|
|
5
|
+
|
|
6
|
+
Design principles:
|
|
7
|
+
- TranscriptStore is the SOLE source of truth for message history.
|
|
8
|
+
_transcript.to_messages() is always passed to the API — no parallel list.
|
|
9
|
+
- One assistant Turn per model response, carrying both content and tool_calls.
|
|
10
|
+
- Budget is synced from transcript after each change, never additively tracked.
|
|
11
|
+
- All errors surface as EventType.ERROR followed by EventType.DONE; never raised.
|
|
12
|
+
|
|
13
|
+
Phase 1: non-streaming, sequential tool execution.
|
|
14
|
+
Phase 2 (future): streaming text deltas, parallel tool execution.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import concurrent.futures as _cf
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
import time
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from enum import Enum, auto
|
|
25
|
+
from typing import TYPE_CHECKING, Any, Generator
|
|
26
|
+
|
|
27
|
+
from src.exceptions import ApiError, BudgetExceededError, FatalApiError
|
|
28
|
+
from src.models.client import GdmClient
|
|
29
|
+
from src.models.router import CircuitBreaker
|
|
30
|
+
|
|
31
|
+
from src.agent.context_budget import ContextBudget, count_tokens
|
|
32
|
+
from src.agent.system_prompt import build_system_prompt
|
|
33
|
+
from src.agent.transcript import TranscriptStore, Turn
|
|
34
|
+
from src.cost_tracker import CostTracker
|
|
35
|
+
from src.models.definitions import ModelTier, Provider, get_model
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from src.agent.tool_orchestrator import ToolOrchestrator
|
|
39
|
+
from src.config import GdmConfig
|
|
40
|
+
from src.memory.db import GdmDatabase
|
|
41
|
+
from src.models.router import ModelRouter
|
|
42
|
+
|
|
43
|
+
__all__ = ["AgentEvent", "AgentLoop", "EventType", "CONFIDENCE_SCHEMA"]
|
|
44
|
+
|
|
45
|
+
log = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
from src.enterprise.usage_analytics import UsageAnalytics as _UsageAnalytics
|
|
49
|
+
from src.enterprise.usage_analytics import UsageEvent as _UsageEvent
|
|
50
|
+
_analytics: "_UsageAnalytics | None" = _UsageAnalytics()
|
|
51
|
+
except Exception: # noqa: BLE001
|
|
52
|
+
_analytics = None
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
from src.agent.risk_scorer import score_patch, RiskTier
|
|
56
|
+
_risk_scorer_available = True
|
|
57
|
+
except Exception: # noqa: BLE001
|
|
58
|
+
_risk_scorer_available = False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _check_patch_risk(diff, file_paths=None, autonomy_level=2):
|
|
62
|
+
"""Score a patch diff and enforce risk gates based on autonomy level.
|
|
63
|
+
|
|
64
|
+
- CRITICAL (score >= block_threshold) and autonomy_level < 4: raises RuntimeError.
|
|
65
|
+
- HIGH tier and autonomy_level < 3: logs a warning (non-blocking).
|
|
66
|
+
- Always returns the PatchRiskResult (or None if scorer unavailable).
|
|
67
|
+
"""
|
|
68
|
+
if not _risk_scorer_available:
|
|
69
|
+
return None
|
|
70
|
+
result = score_patch(diff, file_paths)
|
|
71
|
+
log.info("patch_risk score=%.3f tier=%s blocked=%s", result.score, result.tier, result.blocked)
|
|
72
|
+
if result.triggered_signals:
|
|
73
|
+
log.debug("patch_risk rationale:\n%s", result.rationale)
|
|
74
|
+
if result.blocked and autonomy_level < 4:
|
|
75
|
+
raise RuntimeError(f"Patch blocked: {result.rationale}")
|
|
76
|
+
if result.tier == RiskTier.HIGH and autonomy_level < 3:
|
|
77
|
+
log.warning("High-risk patch detected (score=%.3f). Review before applying:\n%s",
|
|
78
|
+
result.score, result.rationale)
|
|
79
|
+
return result
|
|
80
|
+
|
|
81
|
+
def _record_usage(
|
|
82
|
+
session_id: str,
|
|
83
|
+
actor_id: str,
|
|
84
|
+
model: str,
|
|
85
|
+
prompt_tokens: int,
|
|
86
|
+
completion_tokens: int,
|
|
87
|
+
tool_calls: int,
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Record a usage event. Never raises."""
|
|
90
|
+
if _analytics is None:
|
|
91
|
+
return
|
|
92
|
+
try:
|
|
93
|
+
_analytics.record(
|
|
94
|
+
_UsageEvent( # type: ignore[name-defined]
|
|
95
|
+
session_id=session_id,
|
|
96
|
+
actor_id=actor_id,
|
|
97
|
+
model=model,
|
|
98
|
+
prompt_tokens=prompt_tokens,
|
|
99
|
+
completion_tokens=completion_tokens,
|
|
100
|
+
tool_calls=tool_calls,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
except Exception: # noqa: BLE001
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Single-worker executor for async checkpoint writes — serialises DB flushes
|
|
108
|
+
# so concurrent checkpoints don't interleave, and the agent thread is never blocked.
|
|
109
|
+
_CHECKPOINT_EXECUTOR: _cf.ThreadPoolExecutor = _cf.ThreadPoolExecutor(
|
|
110
|
+
max_workers=1, thread_name_prefix="gdm-ckpt"
|
|
111
|
+
)
|
|
112
|
+
_WALL_CLOCK_CHECKPOINT_INTERVAL: float = 60.0 # seconds
|
|
113
|
+
|
|
114
|
+
# File path fragments that indicate security-sensitive code and warrant extra scanning.
|
|
115
|
+
_SECURITY_PATH_PATTERNS: tuple[str, ...] = (
|
|
116
|
+
"auth", "session", "password", "token", "secret", "crypto",
|
|
117
|
+
"permission", "sql", "query", "database",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
# Autonomy circuit breaker (module-level for testability)
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
_fix_attempts: dict[str, int] = {}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def check_circuit_breaker(task_id: str, policy: "Any") -> None:
|
|
128
|
+
"""Raise PermissionError if max_fix_attempts reached for *task_id*."""
|
|
129
|
+
attempts = _fix_attempts.get(task_id, 0)
|
|
130
|
+
if attempts >= policy.max_fix_attempts:
|
|
131
|
+
raise PermissionError(
|
|
132
|
+
f"Circuit breaker: {attempts} fix attempts reached for task {task_id}. Human input required."
|
|
133
|
+
)
|
|
134
|
+
_fix_attempts[task_id] = attempts + 1
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def reset_circuit_breaker(task_id: str) -> None:
|
|
138
|
+
"""Clear the fix-attempt counter for *task_id*."""
|
|
139
|
+
_fix_attempts.pop(task_id, None)
|
|
140
|
+
|
|
141
|
+
# JSON schema emitted to the model when requesting a confidence self-assessment.
|
|
142
|
+
CONFIDENCE_SCHEMA: dict[str, Any] = {
|
|
143
|
+
"type": "object",
|
|
144
|
+
"required": ["score", "reasons"],
|
|
145
|
+
"additionalProperties": False,
|
|
146
|
+
"properties": {
|
|
147
|
+
"score": {
|
|
148
|
+
"type": "integer",
|
|
149
|
+
"minimum": 0,
|
|
150
|
+
"maximum": 100,
|
|
151
|
+
"description": "Confidence score 0-100 for the proposed change.",
|
|
152
|
+
},
|
|
153
|
+
"reasons": {
|
|
154
|
+
"type": "array",
|
|
155
|
+
"items": {"type": "string"},
|
|
156
|
+
"description": "Short bullet-point reasons justifying the score.",
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Grok native web_search spec (replaces registry spec when provider == grok)
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
_GROK_WEB_SEARCH_SPEC: dict[str, Any] = {
|
|
166
|
+
"type": "function",
|
|
167
|
+
"function": {
|
|
168
|
+
"name": "web_search",
|
|
169
|
+
"description": (
|
|
170
|
+
"Search the live web. Use for external APIs, errors not in codebase, "
|
|
171
|
+
"or unknown technology behaviour."
|
|
172
|
+
),
|
|
173
|
+
"parameters": {
|
|
174
|
+
"type": "object",
|
|
175
|
+
"properties": {"query": {"type": "string"}},
|
|
176
|
+
"required": ["query"],
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
# Event types
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
class EventType(Enum):
|
|
187
|
+
"""Discriminant for AgentEvent payloads."""
|
|
188
|
+
THINKING = auto() # model reasoning step (text)
|
|
189
|
+
TOOL_CALL = auto() # model requested a tool call
|
|
190
|
+
TOOL_RESULT = auto() # tool was executed
|
|
191
|
+
RESPONSE = auto() # final answer chunk from the model
|
|
192
|
+
SUBTASK = auto() # task decomposition for display
|
|
193
|
+
ERROR = auto() # tool failure or recoverable loop error
|
|
194
|
+
COST_UPDATE = auto() # cost tracker fired
|
|
195
|
+
WARNING = auto() # non-fatal warning (injection, budget, etc.)
|
|
196
|
+
DONE = auto() # task complete
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@dataclass
|
|
200
|
+
class AgentEvent:
|
|
201
|
+
"""A single event emitted by AgentLoop.run()."""
|
|
202
|
+
|
|
203
|
+
type: EventType
|
|
204
|
+
content: str = ""
|
|
205
|
+
tool_name: str = ""
|
|
206
|
+
tool_call_id: str = ""
|
|
207
|
+
args: dict[str, Any] = field(default_factory=dict)
|
|
208
|
+
result: Any = None # ToolResult | None
|
|
209
|
+
cost_usd: float = 0.0
|
|
210
|
+
turn: int = 0
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
# Artifact auto-detection helper
|
|
215
|
+
# ---------------------------------------------------------------------------
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _should_auto_save(
|
|
219
|
+
response_text: str,
|
|
220
|
+
user_input: str,
|
|
221
|
+
*,
|
|
222
|
+
auto_detect: bool,
|
|
223
|
+
) -> bool:
|
|
224
|
+
"""Return True when at least 2 auto-save signals are present.
|
|
225
|
+
|
|
226
|
+
Signals:
|
|
227
|
+
(a) Explicit annotation: ``<!-- artifact -->`` in response text
|
|
228
|
+
(b) Structured block: markdown table or fenced diagram (mermaid/dot/plantuml)
|
|
229
|
+
(c) User command: ``/save`` in the current turn's user input
|
|
230
|
+
|
|
231
|
+
Auto-detection is opt-in (``auto_detect=False`` by default). When
|
|
232
|
+
disabled, always returns ``False`` regardless of signals, preventing
|
|
233
|
+
alert fatigue.
|
|
234
|
+
"""
|
|
235
|
+
if not auto_detect:
|
|
236
|
+
return False
|
|
237
|
+
signals = 0
|
|
238
|
+
if "<!-- artifact -->" in response_text:
|
|
239
|
+
signals += 1
|
|
240
|
+
if re.search(
|
|
241
|
+
r"(^\|.+\|$|```(mermaid|dot|plantuml))", response_text, re.MULTILINE
|
|
242
|
+
):
|
|
243
|
+
signals += 1
|
|
244
|
+
if user_input.strip().startswith("/save"):
|
|
245
|
+
signals += 1
|
|
246
|
+
return signals >= 2
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# ---------------------------------------------------------------------------
|
|
250
|
+
# Agent loop
|
|
251
|
+
# ---------------------------------------------------------------------------
|
|
252
|
+
|
|
253
|
+
class AgentLoop:
|
|
254
|
+
"""Generator-based model-tool-model loop.
|
|
255
|
+
|
|
256
|
+
Callers iterate over AgentLoop.run(user_message) to receive AgentEvents.
|
|
257
|
+
TranscriptStore is the single source of truth — no parallel message list.
|
|
258
|
+
|
|
259
|
+
Usage::
|
|
260
|
+
|
|
261
|
+
loop = AgentLoop(cfg, orchestrator, transcript, budget, cost_tracker)
|
|
262
|
+
for event in loop.run("Fix the auth bug"):
|
|
263
|
+
if event.type == EventType.RESPONSE:
|
|
264
|
+
console.print(event.content)
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
def __init__(
|
|
268
|
+
self,
|
|
269
|
+
cfg: GdmConfig,
|
|
270
|
+
orchestrator: ToolOrchestrator,
|
|
271
|
+
transcript: TranscriptStore,
|
|
272
|
+
budget: ContextBudget,
|
|
273
|
+
cost_tracker: CostTracker,
|
|
274
|
+
*,
|
|
275
|
+
model_tier: str = ModelTier.CODER,
|
|
276
|
+
router: ModelRouter | None = None,
|
|
277
|
+
db: GdmDatabase | None = None,
|
|
278
|
+
session_id: str = "",
|
|
279
|
+
project_id: str = "",
|
|
280
|
+
circuit_breaker: CircuitBreaker | None = None,
|
|
281
|
+
continuous_memory: Any = None,
|
|
282
|
+
) -> None:
|
|
283
|
+
self._cfg = cfg
|
|
284
|
+
self._orchestrator = orchestrator
|
|
285
|
+
self._transcript = transcript
|
|
286
|
+
self._budget = budget
|
|
287
|
+
self._cost_tracker = cost_tracker
|
|
288
|
+
self._model = get_model(model_tier, cfg.provider)
|
|
289
|
+
self._model_id = self._model.id
|
|
290
|
+
# Set proxy state before first client creation.
|
|
291
|
+
# Use isinstance guards so MagicMock configs in tests don't accidentally activate proxy.
|
|
292
|
+
_p_url = getattr(cfg, "proxy_url", "") or ""
|
|
293
|
+
_p_tok = getattr(cfg, "proxy_token", None)
|
|
294
|
+
self._proxy_active: bool = (
|
|
295
|
+
bool(getattr(cfg, "proxy_enabled", False))
|
|
296
|
+
and isinstance(_p_url, str) and bool(_p_url)
|
|
297
|
+
and isinstance(_p_tok, str) and bool(_p_tok)
|
|
298
|
+
)
|
|
299
|
+
self._proxy_url: str = _p_url if isinstance(_p_url, str) else ""
|
|
300
|
+
self._proxy_token: str = _p_tok if isinstance(_p_tok, str) else ""
|
|
301
|
+
self._gdm_client = self._make_client()
|
|
302
|
+
self._router = router
|
|
303
|
+
self._db = db
|
|
304
|
+
self._session_id = session_id
|
|
305
|
+
self._project_id = project_id
|
|
306
|
+
self._initialized = False
|
|
307
|
+
self._files_written: list[Any] = [] # Path objects from write tools this turn
|
|
308
|
+
self._model_turn_count: int = 0 # incremented each model call; drives checkpoints
|
|
309
|
+
self._last_checkpoint_at: float = 0.0 # monotonic time of last checkpoint write
|
|
310
|
+
self._checkpoint_future: "_cf.Future[None] | None" = None # pending async checkpoint
|
|
311
|
+
self._current_event_id: str | None = None # event log ID for the in-flight turn
|
|
312
|
+
self._git_wf: Any = None # GitWorkflow | None — set by _try_create_task_branch
|
|
313
|
+
self._reasoning_mode: str = "auto" # "on" | "off" | "auto" — set via /reasoning
|
|
314
|
+
self._reasoning_escalation_count: int = 0 # consecutive forced-REASONER turn counter
|
|
315
|
+
self._using_fallback: bool = False # True once fallback provider is active
|
|
316
|
+
self._regression_guard: Any = None # RegressionGuard | None — set externally
|
|
317
|
+
self._vg: Any = None # VerificationGraph | None — set externally
|
|
318
|
+
self._circuit_breaker: CircuitBreaker | None = circuit_breaker
|
|
319
|
+
self._last_compressed_at_turn: int = -10 # drives re-compression guard
|
|
320
|
+
self._continuous_memory: Any = continuous_memory # ContinuousMemory | None
|
|
321
|
+
|
|
322
|
+
# ------------------------------------------------------------------
|
|
323
|
+
# Fallback helpers
|
|
324
|
+
# ------------------------------------------------------------------
|
|
325
|
+
|
|
326
|
+
def _make_client(self, fallback_provider: str | None = None) -> GdmClient:
|
|
327
|
+
"""Create a GdmClient respecting active proxy state.
|
|
328
|
+
|
|
329
|
+
All code that rebuilds the client (init, tier switching, fallback)
|
|
330
|
+
must go through this helper so proxy mode is never silently lost.
|
|
331
|
+
When proxy is active, ALL calls go through the proxy regardless of
|
|
332
|
+
fallback provider (users in geo-restricted regions need this for
|
|
333
|
+
every call, not just the primary provider).
|
|
334
|
+
"""
|
|
335
|
+
proxy_active = getattr(self, "_proxy_active", False)
|
|
336
|
+
proxy_url = getattr(self, "_proxy_url", "")
|
|
337
|
+
proxy_token = getattr(self, "_proxy_token", "")
|
|
338
|
+
if proxy_active and proxy_url and proxy_token:
|
|
339
|
+
return GdmClient.for_proxy(proxy_url, proxy_token)
|
|
340
|
+
if fallback_provider:
|
|
341
|
+
return GdmClient.for_provider(fallback_provider, self._cfg)
|
|
342
|
+
return GdmClient(self._cfg)
|
|
343
|
+
|
|
344
|
+
@staticmethod
|
|
345
|
+
def parse_confidence_response(raw: Any) -> "dict[str, Any] | None":
|
|
346
|
+
"""Validate and return a confidence response dict, or None if malformed.
|
|
347
|
+
|
|
348
|
+
Accepts a dict with ``score`` (int, 0–100) and ``reasons`` (list[str]).
|
|
349
|
+
Returns None for any type mismatch, out-of-range score, or missing keys.
|
|
350
|
+
"""
|
|
351
|
+
if not isinstance(raw, dict):
|
|
352
|
+
return None
|
|
353
|
+
score = raw.get("score")
|
|
354
|
+
reasons = raw.get("reasons")
|
|
355
|
+
if not isinstance(score, int) or isinstance(score, bool):
|
|
356
|
+
return None
|
|
357
|
+
if not (0 <= score <= 100):
|
|
358
|
+
return None
|
|
359
|
+
if not isinstance(reasons, list):
|
|
360
|
+
return None
|
|
361
|
+
if not all(isinstance(r, str) for r in reasons):
|
|
362
|
+
return None
|
|
363
|
+
return {"score": score, "reasons": reasons}
|
|
364
|
+
|
|
365
|
+
@staticmethod
|
|
366
|
+
def _should_try_fallback(exc: ApiError) -> bool:
|
|
367
|
+
"""Return True if *exc* is a transient error that warrants a fallback."""
|
|
368
|
+
from src.exceptions import ApiRateLimitError
|
|
369
|
+
if isinstance(exc, ApiRateLimitError):
|
|
370
|
+
return True
|
|
371
|
+
# Check HTTP status if present
|
|
372
|
+
status = getattr(exc, "status_code", None) or getattr(exc, "status", None)
|
|
373
|
+
if status in (429, 500, 503):
|
|
374
|
+
return True
|
|
375
|
+
# Fall back on status embedded in the message as a last resort
|
|
376
|
+
msg = str(exc)
|
|
377
|
+
return any(f" {code}" in msg or f"={code}" in msg for code in ("429", "500", "503"))
|
|
378
|
+
|
|
379
|
+
def _switch_to_fallback(self, error: ApiError) -> str:
|
|
380
|
+
"""Switch to the fallback provider (session-sticky).
|
|
381
|
+
|
|
382
|
+
Returns a human-readable notification string on success.
|
|
383
|
+
|
|
384
|
+
Raises:
|
|
385
|
+
FatalApiError: if already on the fallback (both providers failed).
|
|
386
|
+
ApiError: if no fallback is configured (re-raises *error*).
|
|
387
|
+
"""
|
|
388
|
+
if self._using_fallback:
|
|
389
|
+
raise FatalApiError(
|
|
390
|
+
f"Both primary and fallback providers failed. Last error: {error}"
|
|
391
|
+
)
|
|
392
|
+
fallback = self._cfg.fallback_provider
|
|
393
|
+
if not fallback:
|
|
394
|
+
raise error # propagate — no fallback configured
|
|
395
|
+
|
|
396
|
+
# Determine fallback model ID
|
|
397
|
+
fallback_model = self._cfg.model_id_map.get(self._model_id)
|
|
398
|
+
if not fallback_model:
|
|
399
|
+
from src.models.definitions import models_for_provider
|
|
400
|
+
fallback_models = models_for_provider(fallback)
|
|
401
|
+
if not fallback_models:
|
|
402
|
+
raise FatalApiError(
|
|
403
|
+
f"No models available for fallback provider '{fallback}'"
|
|
404
|
+
)
|
|
405
|
+
fallback_model = fallback_models[0].id
|
|
406
|
+
|
|
407
|
+
self._gdm_client = self._make_client(fallback_provider=fallback)
|
|
408
|
+
self._model_id = fallback_model
|
|
409
|
+
self._using_fallback = True
|
|
410
|
+
log.info(
|
|
411
|
+
"API fallback: switched to provider=%s model=%s (triggered by: %s)",
|
|
412
|
+
fallback,
|
|
413
|
+
fallback_model,
|
|
414
|
+
error,
|
|
415
|
+
)
|
|
416
|
+
return (
|
|
417
|
+
f"Primary API unavailable ({error}). "
|
|
418
|
+
f"Switched to fallback provider '{fallback}' model '{fallback_model}'."
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# ------------------------------------------------------------------
|
|
422
|
+
# Escalation helper
|
|
423
|
+
# ------------------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
def _handle_api_error_escalation(self, exc: ApiError, turn_num: int) -> None:
|
|
426
|
+
"""Classify API error, call escalate_with_context, record in circuit breaker."""
|
|
427
|
+
if self._router is None and self._circuit_breaker is None:
|
|
428
|
+
return
|
|
429
|
+
from src.models.router import EscalationContext, FailureType
|
|
430
|
+
ecx = EscalationContext(
|
|
431
|
+
current_tier=getattr(self._model, "tier", ModelTier.CODER),
|
|
432
|
+
failure_type=FailureType.API_ERROR,
|
|
433
|
+
failure_detail=str(exc),
|
|
434
|
+
attempt_number=turn_num,
|
|
435
|
+
cost_spent_usd=self._cost_tracker.session_total_usd,
|
|
436
|
+
transcript_summary="",
|
|
437
|
+
)
|
|
438
|
+
if self._router is not None:
|
|
439
|
+
self._router.escalate_with_context(ecx)
|
|
440
|
+
if self._circuit_breaker is not None:
|
|
441
|
+
self._circuit_breaker.record_escalation(self._cost_tracker.session_total_usd)
|
|
442
|
+
|
|
443
|
+
# ------------------------------------------------------------------
|
|
444
|
+
# Public runtime-control methods
|
|
445
|
+
# ------------------------------------------------------------------
|
|
446
|
+
|
|
447
|
+
def set_tier(self, tier: str) -> None:
|
|
448
|
+
"""Switch the active model tier at runtime (/model command)."""
|
|
449
|
+
try:
|
|
450
|
+
self._model = get_model(tier, self._cfg.provider)
|
|
451
|
+
self._model_id = self._model.id
|
|
452
|
+
self._gdm_client = self._make_client()
|
|
453
|
+
except Exception as exc:
|
|
454
|
+
log.warning("set_tier(%r) failed: %s", tier, exc)
|
|
455
|
+
|
|
456
|
+
def set_proxy(self, url: str, token: str) -> None:
|
|
457
|
+
"""Enable proxy mode — all subsequent LLM calls are routed through *url*."""
|
|
458
|
+
self._proxy_url = url
|
|
459
|
+
self._proxy_token = token
|
|
460
|
+
self._proxy_active = True
|
|
461
|
+
self._gdm_client = self._make_client()
|
|
462
|
+
log.info("Proxy mode enabled: %s", url)
|
|
463
|
+
|
|
464
|
+
def clear_proxy(self) -> None:
|
|
465
|
+
"""Disable proxy mode and restore direct provider calls."""
|
|
466
|
+
self._proxy_active = False
|
|
467
|
+
self._gdm_client = self._make_client()
|
|
468
|
+
log.info("Proxy mode disabled")
|
|
469
|
+
|
|
470
|
+
# ------------------------------------------------------------------
|
|
471
|
+
# Public generator
|
|
472
|
+
# ------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
def run(self, user_message: str) -> Generator[AgentEvent, None, None]:
|
|
475
|
+
"""Main loop. Yields AgentEvent stream until task is done."""
|
|
476
|
+
if not self._initialized:
|
|
477
|
+
self._try_create_task_branch(user_message)
|
|
478
|
+
self._ensure_initialized()
|
|
479
|
+
|
|
480
|
+
# User injection check — warn-only, structural patterns only
|
|
481
|
+
from src.security import check_user_injection
|
|
482
|
+
_user_inj = check_user_injection(user_message)
|
|
483
|
+
if _user_inj.is_injected:
|
|
484
|
+
yield AgentEvent(
|
|
485
|
+
EventType.WARNING,
|
|
486
|
+
content=(
|
|
487
|
+
f"⚠ User message matched structural injection pattern "
|
|
488
|
+
f"({_user_inj.pattern}). Message will still be processed."
|
|
489
|
+
),
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Dynamic model tier routing — re-pick tier based on prompt complexity
|
|
493
|
+
# Skip router when on fallback to keep session-sticky provider/model.
|
|
494
|
+
if self._router is not None and not self._using_fallback:
|
|
495
|
+
from src.models.router import TaskContext
|
|
496
|
+
ctx = TaskContext(
|
|
497
|
+
prompt=user_message,
|
|
498
|
+
token_count=self._budget.used_tokens,
|
|
499
|
+
provider=self._cfg.provider,
|
|
500
|
+
)
|
|
501
|
+
tier = self._router.select_tier_for_turn(
|
|
502
|
+
ctx,
|
|
503
|
+
reasoning_mode=self._reasoning_mode,
|
|
504
|
+
)
|
|
505
|
+
self._model = get_model(tier, self._cfg.provider)
|
|
506
|
+
self._model_id = self._model.id
|
|
507
|
+
self._gdm_client = self._make_client()
|
|
508
|
+
log.debug(
|
|
509
|
+
"ModelRouter selected tier=%s mode=%s for prompt=%r",
|
|
510
|
+
tier,
|
|
511
|
+
self._reasoning_mode,
|
|
512
|
+
user_message[:60],
|
|
513
|
+
)
|
|
514
|
+
# Cost-budget guard: warn after N consecutive forced-REASONER turns
|
|
515
|
+
if self._reasoning_mode == "on" and tier == ModelTier.REASONER:
|
|
516
|
+
self._reasoning_escalation_count += 1
|
|
517
|
+
if self._reasoning_escalation_count > 5:
|
|
518
|
+
yield AgentEvent(
|
|
519
|
+
EventType.COST_UPDATE,
|
|
520
|
+
content=(
|
|
521
|
+
f"⚠ Reasoning mode forced ON — "
|
|
522
|
+
f"{self._reasoning_escalation_count} consecutive REASONER calls. "
|
|
523
|
+
"Use /reasoning auto to save cost."
|
|
524
|
+
),
|
|
525
|
+
)
|
|
526
|
+
else:
|
|
527
|
+
self._reasoning_escalation_count = 0
|
|
528
|
+
|
|
529
|
+
self._files_written = [] # reset per-run file tracking
|
|
530
|
+
|
|
531
|
+
self._transcript.append(
|
|
532
|
+
Turn(role="user", content=user_message, tokens=count_tokens(user_message))
|
|
533
|
+
)
|
|
534
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
535
|
+
|
|
536
|
+
max_turns = self._cfg.max_turns
|
|
537
|
+
for turn_num in range(max_turns):
|
|
538
|
+
# Cost guard — soft limit from config
|
|
539
|
+
if self._cost_tracker.exceeds(self._cfg.cost_limit_usd):
|
|
540
|
+
yield AgentEvent(EventType.ERROR, content="Cost limit exceeded", turn=turn_num)
|
|
541
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
542
|
+
return
|
|
543
|
+
# Circuit breaker guard
|
|
544
|
+
if self._circuit_breaker is not None and self._circuit_breaker.should_halt():
|
|
545
|
+
yield AgentEvent(EventType.ERROR,
|
|
546
|
+
content=self._circuit_breaker.halt_reason(), turn=turn_num)
|
|
547
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
548
|
+
return
|
|
549
|
+
# Budget enforcement — hard-stop limit from DB
|
|
550
|
+
if self._db is not None:
|
|
551
|
+
try:
|
|
552
|
+
self._cost_tracker.check_budget(self._db)
|
|
553
|
+
except BudgetExceededError as exc:
|
|
554
|
+
yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
|
|
555
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
556
|
+
return
|
|
557
|
+
|
|
558
|
+
self._model_turn_count += 1
|
|
559
|
+
if (self._model_turn_count % 5 == 0
|
|
560
|
+
or time.monotonic() - self._last_checkpoint_at > _WALL_CLOCK_CHECKPOINT_INTERVAL):
|
|
561
|
+
self._checkpoint()
|
|
562
|
+
|
|
563
|
+
# Begin event log entry for this turn
|
|
564
|
+
self._current_event_id = self._event_log_begin(
|
|
565
|
+
turn_num, user_message if turn_num == 0 else None
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Compress if needed BEFORE building the API payload
|
|
569
|
+
self._maybe_compress(turn_num)
|
|
570
|
+
|
|
571
|
+
# Drain BTW queue — inject any pending out-of-band messages as user turns
|
|
572
|
+
if self._db is not None and self._session_id:
|
|
573
|
+
try:
|
|
574
|
+
pending_btw = self._db.btw_dequeue_pending(self._session_id)
|
|
575
|
+
if pending_btw:
|
|
576
|
+
for btw in pending_btw:
|
|
577
|
+
self._transcript.append(
|
|
578
|
+
Turn(
|
|
579
|
+
role="user",
|
|
580
|
+
content=f"[OUT-OF-BAND NOTE]: {btw['message']}",
|
|
581
|
+
tokens=count_tokens(btw["message"]),
|
|
582
|
+
)
|
|
583
|
+
)
|
|
584
|
+
self._db.btw_mark_delivered([b["id"] for b in pending_btw])
|
|
585
|
+
log.debug("Injected %d BTW message(s) into transcript", len(pending_btw))
|
|
586
|
+
except Exception as _btw_exc: # noqa: BLE001
|
|
587
|
+
log.warning("BTW queue drain failed: %s", _btw_exc)
|
|
588
|
+
|
|
589
|
+
tools = self._build_tool_specs()
|
|
590
|
+
try:
|
|
591
|
+
response = self._gdm_client.complete(
|
|
592
|
+
self._transcript.to_messages(),
|
|
593
|
+
model=self._model_id,
|
|
594
|
+
tools=tools if tools else None,
|
|
595
|
+
)
|
|
596
|
+
except ApiError as exc:
|
|
597
|
+
# For 429/500/503, attempt a one-time provider fallback
|
|
598
|
+
if self._should_try_fallback(exc):
|
|
599
|
+
try:
|
|
600
|
+
switch_msg = self._switch_to_fallback(exc)
|
|
601
|
+
except FatalApiError as fatal_exc:
|
|
602
|
+
yield AgentEvent(EventType.ERROR, content=str(fatal_exc), turn=turn_num)
|
|
603
|
+
self._checkpoint()
|
|
604
|
+
self._flush_checkpoint_sync()
|
|
605
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
606
|
+
return
|
|
607
|
+
except ApiError:
|
|
608
|
+
# No fallback configured — propagate original error
|
|
609
|
+
yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
|
|
610
|
+
self._checkpoint()
|
|
611
|
+
self._flush_checkpoint_sync()
|
|
612
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
613
|
+
return
|
|
614
|
+
# Switched successfully — notify and retry with fallback
|
|
615
|
+
yield AgentEvent(EventType.THINKING, content=switch_msg, turn=turn_num)
|
|
616
|
+
try:
|
|
617
|
+
response = self._gdm_client.complete(
|
|
618
|
+
self._transcript.to_messages(),
|
|
619
|
+
model=self._model_id,
|
|
620
|
+
tools=tools if tools else None,
|
|
621
|
+
)
|
|
622
|
+
except ApiError as retry_exc:
|
|
623
|
+
yield AgentEvent(EventType.ERROR, content=str(retry_exc), turn=turn_num)
|
|
624
|
+
self._checkpoint()
|
|
625
|
+
self._flush_checkpoint_sync()
|
|
626
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
627
|
+
return
|
|
628
|
+
else:
|
|
629
|
+
self._handle_api_error_escalation(exc, turn_num)
|
|
630
|
+
yield AgentEvent(EventType.ERROR, content=str(exc), turn=turn_num)
|
|
631
|
+
self._checkpoint()
|
|
632
|
+
self._flush_checkpoint_sync()
|
|
633
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
634
|
+
return
|
|
635
|
+
|
|
636
|
+
choice = response.choices[0]
|
|
637
|
+
msg = choice.message
|
|
638
|
+
finish_reason = choice.finish_reason
|
|
639
|
+
|
|
640
|
+
# Record cost
|
|
641
|
+
yield from self._record_cost(response, turn_num)
|
|
642
|
+
|
|
643
|
+
# --- Build ONE unified assistant turn (content + tool_calls combined) ---
|
|
644
|
+
raw_tool_calls: list[dict[str, Any]] | None = None
|
|
645
|
+
if msg.tool_calls:
|
|
646
|
+
raw_tool_calls = [
|
|
647
|
+
{
|
|
648
|
+
"id": tc.id,
|
|
649
|
+
"type": "function",
|
|
650
|
+
"function": {
|
|
651
|
+
"name": tc.function.name,
|
|
652
|
+
"arguments": tc.function.arguments,
|
|
653
|
+
},
|
|
654
|
+
}
|
|
655
|
+
for tc in msg.tool_calls
|
|
656
|
+
]
|
|
657
|
+
|
|
658
|
+
asst_content = msg.content or ""
|
|
659
|
+
# Complete event log entry with token counts and response text
|
|
660
|
+
if self._db is not None and self._current_event_id:
|
|
661
|
+
try:
|
|
662
|
+
_usage = response.usage
|
|
663
|
+
_in_tok = getattr(_usage, "prompt_tokens", 0) or 0 if _usage else 0
|
|
664
|
+
_out_tok = getattr(_usage, "completion_tokens", 0) or 0 if _usage else 0
|
|
665
|
+
self._db.event_log_complete_turn(
|
|
666
|
+
self._current_event_id, asst_content,
|
|
667
|
+
_in_tok, _out_tok, 0,
|
|
668
|
+
self._cost_tracker.session_total_usd,
|
|
669
|
+
)
|
|
670
|
+
except Exception as _exc: # noqa: BLE001
|
|
671
|
+
log.debug("event_log_complete_turn: %s", _exc)
|
|
672
|
+
asst_tokens = count_tokens(
|
|
673
|
+
asst_content + (json.dumps(raw_tool_calls) if raw_tool_calls else "")
|
|
674
|
+
)
|
|
675
|
+
self._transcript.append(Turn(
|
|
676
|
+
role="assistant",
|
|
677
|
+
content=asst_content,
|
|
678
|
+
tokens=asst_tokens,
|
|
679
|
+
tool_calls=raw_tool_calls,
|
|
680
|
+
))
|
|
681
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
682
|
+
|
|
683
|
+
# Emit text response
|
|
684
|
+
if asst_content:
|
|
685
|
+
yield AgentEvent(EventType.RESPONSE, content=asst_content, turn=turn_num)
|
|
686
|
+
|
|
687
|
+
# Execute tool calls
|
|
688
|
+
if raw_tool_calls:
|
|
689
|
+
# Guard: finish_reason=tool_calls with empty list → infinite loop
|
|
690
|
+
if not msg.tool_calls:
|
|
691
|
+
yield AgentEvent(EventType.ERROR, turn=turn_num,
|
|
692
|
+
content="finish_reason=tool_calls but no tool_calls in response")
|
|
693
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
694
|
+
return
|
|
695
|
+
|
|
696
|
+
yield from self._execute_tool_calls(msg.tool_calls, turn_num)
|
|
697
|
+
continue # loop back to model with tool results
|
|
698
|
+
|
|
699
|
+
# Natural completion
|
|
700
|
+
if finish_reason in ("stop", "end_turn", None):
|
|
701
|
+
# Self-critique before finishing if any files were written this run
|
|
702
|
+
if self._files_written:
|
|
703
|
+
yield from self._run_self_critique(turn_num)
|
|
704
|
+
yield from self._maybe_review_gate(turn_num)
|
|
705
|
+
self._checkpoint()
|
|
706
|
+
self._flush_checkpoint_sync()
|
|
707
|
+
if self._db is not None and self._session_id:
|
|
708
|
+
try:
|
|
709
|
+
self._db.session_set_status(self._session_id, "complete")
|
|
710
|
+
except Exception as exc: # noqa: BLE001
|
|
711
|
+
log.debug("session_set_status complete: %s", exc)
|
|
712
|
+
self._git_checkpoint_on_completion()
|
|
713
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
714
|
+
return
|
|
715
|
+
|
|
716
|
+
# Unexpected finish reason
|
|
717
|
+
yield AgentEvent(EventType.ERROR, turn=turn_num,
|
|
718
|
+
content=f"Unexpected finish_reason: {finish_reason!r}")
|
|
719
|
+
self._checkpoint()
|
|
720
|
+
self._flush_checkpoint_sync()
|
|
721
|
+
yield AgentEvent(EventType.DONE, turn=turn_num)
|
|
722
|
+
return
|
|
723
|
+
|
|
724
|
+
yield AgentEvent(EventType.ERROR, turn=max_turns - 1,
|
|
725
|
+
content=f"Max turns ({max_turns}) reached without completion")
|
|
726
|
+
self._checkpoint()
|
|
727
|
+
self._flush_checkpoint_sync()
|
|
728
|
+
yield AgentEvent(EventType.DONE, turn=max_turns - 1)
|
|
729
|
+
|
|
730
|
+
# ------------------------------------------------------------------
|
|
731
|
+
# Private helpers
|
|
732
|
+
# ------------------------------------------------------------------
|
|
733
|
+
|
|
734
|
+
def _try_create_task_branch(self, user_message: str = "") -> None:
|
|
735
|
+
"""Attempt to create a gdm task branch for this session.
|
|
736
|
+
|
|
737
|
+
Called once before the first run() turn. Silently no-ops if:
|
|
738
|
+
- project_root is not a git repo
|
|
739
|
+
- git is unavailable
|
|
740
|
+
- we're already on a gdm/* branch
|
|
741
|
+
- the working tree is dirty (warn only — don't block)
|
|
742
|
+
"""
|
|
743
|
+
try:
|
|
744
|
+
from src.git_workflow import GitWorkflow
|
|
745
|
+
wf = GitWorkflow(self._cfg.project_root)
|
|
746
|
+
if not wf.is_git_repo():
|
|
747
|
+
return
|
|
748
|
+
current = wf.current_branch()
|
|
749
|
+
if current.startswith("gdm/"):
|
|
750
|
+
# Already on a task branch (resumed session)
|
|
751
|
+
self._git_wf = wf
|
|
752
|
+
return
|
|
753
|
+
if not wf.is_clean():
|
|
754
|
+
# Dirty tree: don't create a branch (rollback would be unsafe).
|
|
755
|
+
# Still set _git_wf so /diff and /commit work, just skip branch.
|
|
756
|
+
log.info(
|
|
757
|
+
"git: working tree has uncommitted changes — skipping task branch creation"
|
|
758
|
+
)
|
|
759
|
+
self._git_wf = wf
|
|
760
|
+
return
|
|
761
|
+
# Derive slug from session_id + first few words of message
|
|
762
|
+
slug_src = (user_message[:40] if user_message else self._session_id[:8])
|
|
763
|
+
wf.create_task_branch(slug_src)
|
|
764
|
+
self._git_wf = wf
|
|
765
|
+
log.info("git: task branch created from '%s'", current)
|
|
766
|
+
except Exception as exc: # noqa: BLE001
|
|
767
|
+
log.debug("Git task branch skipped (non-git or no commits): %s", exc)
|
|
768
|
+
|
|
769
|
+
def _ensure_initialized(self) -> None:
|
|
770
|
+
"""Inject system prompt on first run (idempotent)."""
|
|
771
|
+
if self._initialized:
|
|
772
|
+
return
|
|
773
|
+
from src.tools import REGISTRY
|
|
774
|
+
prompt = build_system_prompt(
|
|
775
|
+
self._cfg,
|
|
776
|
+
REGISTRY.all_tools(),
|
|
777
|
+
db=self._db,
|
|
778
|
+
project_id=self._project_id,
|
|
779
|
+
)
|
|
780
|
+
tokens = count_tokens(prompt)
|
|
781
|
+
self._transcript.prepend_system(prompt, tokens)
|
|
782
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
783
|
+
self._initialized = True
|
|
784
|
+
if self._db is not None and self._session_id:
|
|
785
|
+
try:
|
|
786
|
+
self._db.session_set_status(self._session_id, "active")
|
|
787
|
+
except Exception as exc: # noqa: BLE001
|
|
788
|
+
log.debug("session_set_status active: %s", exc)
|
|
789
|
+
|
|
790
|
+
def _build_tool_specs(self) -> list[dict[str, Any]]:
|
|
791
|
+
"""Return tool specs for the API call.
|
|
792
|
+
|
|
793
|
+
For Grok: replace any registry web_search with the native Grok spec
|
|
794
|
+
(same name — provider handles it differently natively).
|
|
795
|
+
"""
|
|
796
|
+
specs = self._orchestrator.get_permitted_specs()
|
|
797
|
+
if self._cfg.provider == Provider.GROK:
|
|
798
|
+
# Drop registry web_search (if present) and inject native Grok spec
|
|
799
|
+
specs = [s for s in specs if s.get("function", {}).get("name") != "web_search"]
|
|
800
|
+
specs = specs + [_GROK_WEB_SEARCH_SPEC]
|
|
801
|
+
return specs
|
|
802
|
+
|
|
803
|
+
def _record_cost(
|
|
804
|
+
self, response: Any, turn_num: int
|
|
805
|
+
) -> Generator[AgentEvent, None, None]:
|
|
806
|
+
usage = response.usage
|
|
807
|
+
if usage is None:
|
|
808
|
+
return
|
|
809
|
+
in_tok = getattr(usage, "prompt_tokens", 0) or 0
|
|
810
|
+
out_tok = getattr(usage, "completion_tokens", 0) or 0
|
|
811
|
+
self._cost_tracker.record(
|
|
812
|
+
tier=self._model.tier, # type: ignore[arg-type]
|
|
813
|
+
input_tokens=in_tok,
|
|
814
|
+
output_tokens=out_tok,
|
|
815
|
+
)
|
|
816
|
+
_record_usage(
|
|
817
|
+
session_id=self._session_id or "unknown",
|
|
818
|
+
actor_id=getattr(self._cfg, "actor_id", None) or "unknown",
|
|
819
|
+
model=self._model_id,
|
|
820
|
+
prompt_tokens=in_tok,
|
|
821
|
+
completion_tokens=out_tok,
|
|
822
|
+
tool_calls=0,
|
|
823
|
+
)
|
|
824
|
+
yield AgentEvent(EventType.COST_UPDATE,
|
|
825
|
+
cost_usd=self._cost_tracker.session_total_usd,
|
|
826
|
+
turn=turn_num)
|
|
827
|
+
|
|
828
|
+
def _execute_tool_calls(
|
|
829
|
+
self, tool_calls: list[Any], turn_num: int
|
|
830
|
+
) -> Generator[AgentEvent, None, None]:
|
|
831
|
+
"""Execute each tool call and append results to transcript."""
|
|
832
|
+
from pathlib import Path as _Path
|
|
833
|
+
from src._internal.constants import _WRITE_TOOLS # noqa: PLC0415
|
|
834
|
+
for call_index, tc in enumerate(tool_calls):
|
|
835
|
+
tool_name = tc.function.name
|
|
836
|
+
tool_call_id = tc.id
|
|
837
|
+
|
|
838
|
+
try:
|
|
839
|
+
args: dict[str, Any] = json.loads(tc.function.arguments or "{}")
|
|
840
|
+
except json.JSONDecodeError:
|
|
841
|
+
args = {}
|
|
842
|
+
log.warning("Malformed JSON args for %r — using {}", tool_name)
|
|
843
|
+
|
|
844
|
+
yield AgentEvent(EventType.TOOL_CALL, tool_name=tool_name,
|
|
845
|
+
tool_call_id=tool_call_id, args=args, turn=turn_num)
|
|
846
|
+
|
|
847
|
+
# Pre-write: capture regression baseline before the edit
|
|
848
|
+
is_write = tool_name in _WRITE_TOOLS
|
|
849
|
+
baseline = None
|
|
850
|
+
pre_write_path: str = ""
|
|
851
|
+
if is_write and self._regression_guard is not None:
|
|
852
|
+
pre_write_path = (
|
|
853
|
+
args.get("path") or args.get("file_path") or ""
|
|
854
|
+
)
|
|
855
|
+
if pre_write_path:
|
|
856
|
+
try:
|
|
857
|
+
baseline = self._regression_guard.capture_baseline(
|
|
858
|
+
_Path(pre_write_path)
|
|
859
|
+
)
|
|
860
|
+
except Exception as _rg_exc: # noqa: BLE001
|
|
861
|
+
log.debug("regression baseline capture failed: %s", _rg_exc)
|
|
862
|
+
|
|
863
|
+
# warn-only verification graph precondition gate
|
|
864
|
+
if is_write and getattr(self, "_vg", None) is not None:
|
|
865
|
+
_vg_p = args.get("path") or args.get("file_path") or ""
|
|
866
|
+
if _vg_p and self._session_id:
|
|
867
|
+
try:
|
|
868
|
+
_unsafe = self._vg.check_edit_preconditions(self._session_id, _vg_p)
|
|
869
|
+
if _unsafe:
|
|
870
|
+
log.warning("[vg] prior edits lack verification for %s: %s", _vg_p, _unsafe)
|
|
871
|
+
except Exception as _vge: # noqa: BLE001
|
|
872
|
+
log.debug("vg precondition check: %s", _vge)
|
|
873
|
+
tool_result = self._orchestrator.execute(tool_name, args,
|
|
874
|
+
model_id=self._model_id)
|
|
875
|
+
|
|
876
|
+
yield AgentEvent(EventType.TOOL_RESULT, tool_name=tool_name,
|
|
877
|
+
tool_call_id=tool_call_id, result=tool_result,
|
|
878
|
+
turn=turn_num)
|
|
879
|
+
|
|
880
|
+
# Log tool call to event log
|
|
881
|
+
if self._db is not None and self._current_event_id:
|
|
882
|
+
try:
|
|
883
|
+
self._db.event_log_record_tool_call(
|
|
884
|
+
self._current_event_id, call_index,
|
|
885
|
+
tool_name, tool_call_id, args,
|
|
886
|
+
result={"output": (tool_result.output or "")[:500],
|
|
887
|
+
"error": tool_result.error},
|
|
888
|
+
ok=tool_result.error is None,
|
|
889
|
+
error=tool_result.error,
|
|
890
|
+
)
|
|
891
|
+
except Exception as _exc: # noqa: BLE001
|
|
892
|
+
log.debug("event_log_record_tool_call: %s", _exc)
|
|
893
|
+
# Also count premium tool calls for budget tracking
|
|
894
|
+
self._cost_tracker.record_tool_call(tool_name)
|
|
895
|
+
|
|
896
|
+
result_content = tool_result.as_message_content()
|
|
897
|
+
|
|
898
|
+
# Post-write quality checks for successful file writes
|
|
899
|
+
if is_write and tool_result.error is None:
|
|
900
|
+
# Prefer metadata["path"] (resolved), fall back to args
|
|
901
|
+
path_arg = (
|
|
902
|
+
(tool_result.metadata or {}).get("path")
|
|
903
|
+
or args.get("path")
|
|
904
|
+
or args.get("file_path")
|
|
905
|
+
or ""
|
|
906
|
+
)
|
|
907
|
+
if path_arg:
|
|
908
|
+
self._files_written.append(_Path(path_arg))
|
|
909
|
+
# Log patch to event log
|
|
910
|
+
if self._db is not None and self._current_event_id:
|
|
911
|
+
try:
|
|
912
|
+
patch_text = args.get("content") or args.get("new_content") or ""
|
|
913
|
+
self._db.event_log_record_patch(
|
|
914
|
+
self._current_event_id, path_arg,
|
|
915
|
+
str(patch_text)[:10_000],
|
|
916
|
+
)
|
|
917
|
+
except Exception as _exc: # noqa: BLE001
|
|
918
|
+
log.debug("event_log_record_patch: %s", _exc)
|
|
919
|
+
quality_note = self._auto_quality(path_arg)
|
|
920
|
+
if quality_note:
|
|
921
|
+
result_content += f"\n\n{quality_note}"
|
|
922
|
+
|
|
923
|
+
# Log to continuous memory (non-fatal); use getattr for backward compat
|
|
924
|
+
# with tests that use AgentLoop.__new__ without calling __init__.
|
|
925
|
+
_cm = getattr(self, "_continuous_memory", None)
|
|
926
|
+
if _cm is not None:
|
|
927
|
+
try:
|
|
928
|
+
_cm.log_decision(
|
|
929
|
+
self._session_id,
|
|
930
|
+
path_arg,
|
|
931
|
+
tool_name,
|
|
932
|
+
path_arg,
|
|
933
|
+
turn_num,
|
|
934
|
+
)
|
|
935
|
+
_cm.update_hotspot(
|
|
936
|
+
self._session_id, path_arg, self._project_id
|
|
937
|
+
)
|
|
938
|
+
except Exception as _cm_exc: # noqa: BLE001
|
|
939
|
+
log.warning("ContinuousMemory update failed: %s", _cm_exc)
|
|
940
|
+
|
|
941
|
+
# Post-write regression check
|
|
942
|
+
if baseline is not None and self._regression_guard is not None:
|
|
943
|
+
try:
|
|
944
|
+
regression_result = self._regression_guard.verify_after_edit(
|
|
945
|
+
_Path(pre_write_path)
|
|
946
|
+
)
|
|
947
|
+
if regression_result.new_failures:
|
|
948
|
+
fix_note = self._enter_fix_loop(regression_result, path_arg)
|
|
949
|
+
if fix_note:
|
|
950
|
+
result_content += f"\n\n{fix_note}"
|
|
951
|
+
except Exception as _rg_exc: # noqa: BLE001
|
|
952
|
+
log.debug("regression verify failed: %s", _rg_exc)
|
|
953
|
+
|
|
954
|
+
# Build the tool result turn, marking write-tool results as non_droppable
|
|
955
|
+
# so the compressor never drops or truncates unverified file writes.
|
|
956
|
+
path_for_nd = args.get("path") or args.get("file_path") or "?"
|
|
957
|
+
tool_turn = Turn(
|
|
958
|
+
role="tool",
|
|
959
|
+
content=result_content,
|
|
960
|
+
tokens=count_tokens(result_content),
|
|
961
|
+
tool_name=tool_name,
|
|
962
|
+
tool_call_id=tool_call_id,
|
|
963
|
+
non_droppable=is_write,
|
|
964
|
+
non_droppable_reason=(
|
|
965
|
+
f"unverified write: {path_for_nd}" if is_write else ""
|
|
966
|
+
),
|
|
967
|
+
)
|
|
968
|
+
self._transcript.append(tool_turn)
|
|
969
|
+
|
|
970
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
971
|
+
|
|
972
|
+
def _auto_quality(self, path_str: str) -> str:
|
|
973
|
+
"""Run lint, type-check, security scan, and complexity check after a write.
|
|
974
|
+
|
|
975
|
+
Runs all applicable tools in parallel via ThreadPoolExecutor.
|
|
976
|
+
Returns a formatted warning string if issues are found, '' if all clean.
|
|
977
|
+
"""
|
|
978
|
+
from pathlib import Path as _Path
|
|
979
|
+
from src.tools.quality_tools import (
|
|
980
|
+
ComplexityCheckTool, LintFileTool, SecurityScanTool, TypeCheckTool,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
is_py = _Path(path_str).suffix.lower() == ".py"
|
|
984
|
+
is_security = any(p in path_str.lower() for p in _SECURITY_PATH_PATTERNS)
|
|
985
|
+
|
|
986
|
+
tasks: list[tuple[str, Any, dict[str, Any]]] = []
|
|
987
|
+
tasks.append(("lint", LintFileTool(), {"path": path_str}))
|
|
988
|
+
if is_py:
|
|
989
|
+
tasks.append(("typecheck", TypeCheckTool(), {"files": [path_str]}))
|
|
990
|
+
if is_security:
|
|
991
|
+
tasks.append(("security", SecurityScanTool(), {"path": path_str}))
|
|
992
|
+
tasks.append(("complexity", ComplexityCheckTool(), {"path": path_str}))
|
|
993
|
+
|
|
994
|
+
issues: list[str] = []
|
|
995
|
+
try:
|
|
996
|
+
with _cf.ThreadPoolExecutor(max_workers=len(tasks)) as pool:
|
|
997
|
+
futures = {
|
|
998
|
+
pool.submit(tool.execute, params): label
|
|
999
|
+
for label, tool, params in tasks
|
|
1000
|
+
}
|
|
1001
|
+
for future, label in futures.items():
|
|
1002
|
+
try:
|
|
1003
|
+
result = future.result(timeout=60)
|
|
1004
|
+
if label == "complexity":
|
|
1005
|
+
if result.output and "High complexity" in result.output:
|
|
1006
|
+
issues.append(f"Complexity: {result.output}")
|
|
1007
|
+
elif result.error:
|
|
1008
|
+
issues.append(f"{label.capitalize()}: {result.error}")
|
|
1009
|
+
except Exception as exc: # noqa: BLE001
|
|
1010
|
+
log.debug("Quality tool %s raised: %s", label, exc)
|
|
1011
|
+
except Exception as exc: # noqa: BLE001
|
|
1012
|
+
log.debug("_auto_quality pool failed for %s: %s", path_str, exc)
|
|
1013
|
+
return ""
|
|
1014
|
+
|
|
1015
|
+
if not issues:
|
|
1016
|
+
return ""
|
|
1017
|
+
joined = "\n".join(issues)
|
|
1018
|
+
return (
|
|
1019
|
+
f"⚠ Auto-quality issues in {path_str}:\n{joined}\n"
|
|
1020
|
+
"Fix these before marking the task complete."
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
def _auto_lint(self, path_str: str) -> str:
|
|
1024
|
+
"""Backward-compatible alias for _auto_quality."""
|
|
1025
|
+
return self._auto_quality(path_str)
|
|
1026
|
+
|
|
1027
|
+
def _enter_fix_loop(self, regression_result: Any, file_path: str) -> str:
|
|
1028
|
+
"""Inject regression failure info into the transcript for the model to fix.
|
|
1029
|
+
|
|
1030
|
+
Returns a warning string to append to the tool result content.
|
|
1031
|
+
If rollback is recommended (2+ consecutive failures), note that too.
|
|
1032
|
+
"""
|
|
1033
|
+
failures = "\n".join(f" ✗ {t}" for t in regression_result.new_failures)
|
|
1034
|
+
msg_lines = [
|
|
1035
|
+
f"⚠ Regression detected after editing {file_path}:",
|
|
1036
|
+
failures,
|
|
1037
|
+
]
|
|
1038
|
+
if regression_result.coverage_drop is not None:
|
|
1039
|
+
msg_lines.append(
|
|
1040
|
+
f" Coverage dropped by {regression_result.coverage_drop:.1f}pp"
|
|
1041
|
+
)
|
|
1042
|
+
if regression_result.rollback_recommended:
|
|
1043
|
+
msg_lines.append(
|
|
1044
|
+
" ⛔ Rollback recommended — 2+ consecutive failures on this file."
|
|
1045
|
+
" Consider reverting your last edit."
|
|
1046
|
+
)
|
|
1047
|
+
else:
|
|
1048
|
+
msg_lines.append(
|
|
1049
|
+
" Fix the failing tests before marking the task complete."
|
|
1050
|
+
)
|
|
1051
|
+
log.warning("Regression detected in %s: %s", file_path, regression_result.new_failures)
|
|
1052
|
+
return "\n".join(msg_lines)
|
|
1053
|
+
|
|
1054
|
+
def _git_checkpoint_on_completion(self) -> None:
|
|
1055
|
+
"""Create a git checkpoint commit when a task completes naturally.
|
|
1056
|
+
|
|
1057
|
+
Called once at the end of a successful run(), not after each write.
|
|
1058
|
+
Uses 'git add -A' — caller ensures this is safe (task completion context).
|
|
1059
|
+
"""
|
|
1060
|
+
if self._git_wf is None or not self._files_written:
|
|
1061
|
+
return
|
|
1062
|
+
try:
|
|
1063
|
+
files = ", ".join(str(p.name) for p in self._files_written[:3])
|
|
1064
|
+
if len(self._files_written) > 3:
|
|
1065
|
+
files += f" +{len(self._files_written) - 3} more"
|
|
1066
|
+
self._git_wf.checkpoint(
|
|
1067
|
+
f"task: wrote {files}",
|
|
1068
|
+
turn_id=self._session_id,
|
|
1069
|
+
files=list(self._files_written),
|
|
1070
|
+
)
|
|
1071
|
+
log.info("git: checkpoint commit after task completion")
|
|
1072
|
+
except Exception as exc: # noqa: BLE001
|
|
1073
|
+
log.debug("Git checkpoint on completion skipped: %s", exc)
|
|
1074
|
+
|
|
1075
|
+
def _maybe_compress(self, turn_num: int = 0) -> int:
|
|
1076
|
+
"""Compress context when budget is high, using LLM digest if possible.
|
|
1077
|
+
|
|
1078
|
+
Triggered either by 5-turn cadence (every 5th turn) or when the budget
|
|
1079
|
+
is near the limit (>= 80 %). A re-compression guard prevents firing
|
|
1080
|
+
twice within fewer than 5 turns.
|
|
1081
|
+
|
|
1082
|
+
Tries SessionCompressor first (smart LLM digest). Falls back to
|
|
1083
|
+
simple eviction if compression fails or cfg has no API key.
|
|
1084
|
+
Returns count of turns removed/replaced.
|
|
1085
|
+
"""
|
|
1086
|
+
near_limit = self._budget.is_near_limit()
|
|
1087
|
+
proactive = (turn_num % 5 == 0)
|
|
1088
|
+
if not (near_limit or proactive):
|
|
1089
|
+
return 0
|
|
1090
|
+
# Re-compression guard: don't re-fire if we just compressed
|
|
1091
|
+
if turn_num - self._last_compressed_at_turn < 5:
|
|
1092
|
+
return 0
|
|
1093
|
+
if not (near_limit or self._budget.needs_compression):
|
|
1094
|
+
return 0
|
|
1095
|
+
|
|
1096
|
+
# Gather non-system turns available for compression (oldest half)
|
|
1097
|
+
non_sys = [t for t in self._transcript._turns if t.role != "system"]
|
|
1098
|
+
if len(non_sys) < 4:
|
|
1099
|
+
# Not enough history to compress — just evict
|
|
1100
|
+
evicted = self._transcript.maybe_evict()
|
|
1101
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
1102
|
+
return evicted
|
|
1103
|
+
|
|
1104
|
+
half = max(2, len(non_sys) // 2)
|
|
1105
|
+
candidates = non_sys[:half]
|
|
1106
|
+
candidate_msgs = [t.to_compress_dict() for t in candidates]
|
|
1107
|
+
|
|
1108
|
+
try:
|
|
1109
|
+
from src.memory.compressor import SessionCompressor
|
|
1110
|
+
compressor = SessionCompressor(self._cfg)
|
|
1111
|
+
result = compressor.compress(candidate_msgs, task_description="")
|
|
1112
|
+
if result.digest and result.tokens_freed > 0:
|
|
1113
|
+
# Remove the compressed turns by index (avoids id() reuse after GC)
|
|
1114
|
+
all_turns = list(self._transcript._turns)
|
|
1115
|
+
compressed_indices = frozenset(
|
|
1116
|
+
i for i, t in enumerate(all_turns) if t in candidates
|
|
1117
|
+
)
|
|
1118
|
+
remaining = [t for i, t in enumerate(all_turns) if i not in compressed_indices]
|
|
1119
|
+
from collections import deque
|
|
1120
|
+
self._transcript._turns = deque(remaining)
|
|
1121
|
+
self._transcript._total_tokens = sum(
|
|
1122
|
+
t.tokens for t in self._transcript._turns
|
|
1123
|
+
)
|
|
1124
|
+
# Inject digest as a system-prefixed turn after the real system turn
|
|
1125
|
+
digest_turn = Turn(
|
|
1126
|
+
role="system",
|
|
1127
|
+
content=result.digest,
|
|
1128
|
+
tokens=count_tokens(result.digest),
|
|
1129
|
+
)
|
|
1130
|
+
turns_list = list(self._transcript._turns)
|
|
1131
|
+
insert_pos = 1 if (turns_list and turns_list[0].role == "system") else 0
|
|
1132
|
+
turns_list.insert(insert_pos, digest_turn)
|
|
1133
|
+
from collections import deque
|
|
1134
|
+
self._transcript._turns = deque(turns_list)
|
|
1135
|
+
self._transcript._total_tokens += digest_turn.tokens
|
|
1136
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
1137
|
+
self._last_compressed_at_turn = turn_num
|
|
1138
|
+
log.info(
|
|
1139
|
+
"Compressed %d turns into digest, freed ~%d tokens",
|
|
1140
|
+
result.turns_compressed, result.tokens_freed,
|
|
1141
|
+
)
|
|
1142
|
+
return result.turns_compressed
|
|
1143
|
+
except Exception as exc: # noqa: BLE001
|
|
1144
|
+
log.warning("SessionCompressor failed, falling back to eviction: %s", exc)
|
|
1145
|
+
|
|
1146
|
+
# Fallback: simple eviction
|
|
1147
|
+
evicted = self._transcript.maybe_evict()
|
|
1148
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
1149
|
+
if evicted:
|
|
1150
|
+
log.info("Evicted %d turns (fallback compression)", evicted)
|
|
1151
|
+
return evicted
|
|
1152
|
+
|
|
1153
|
+
def _checkpoint(self) -> None:
|
|
1154
|
+
"""Submit a non-blocking checkpoint write to the background executor.
|
|
1155
|
+
|
|
1156
|
+
Takes a snapshot of non-system transcript turns and submits the DB
|
|
1157
|
+
write to _CHECKPOINT_EXECUTOR (max_workers=1) so the agent thread is
|
|
1158
|
+
never blocked. Any pending (not-yet-started) future is cancelled
|
|
1159
|
+
first — the newer snapshot supersedes it.
|
|
1160
|
+
|
|
1161
|
+
Silently no-ops if db or session_id are unavailable so tests without
|
|
1162
|
+
a DB are unaffected.
|
|
1163
|
+
"""
|
|
1164
|
+
if self._db is None or not self._session_id:
|
|
1165
|
+
return
|
|
1166
|
+
turns_snapshot = [
|
|
1167
|
+
{
|
|
1168
|
+
"role": t.role,
|
|
1169
|
+
"content": t.content,
|
|
1170
|
+
"tokens": t.tokens,
|
|
1171
|
+
"tool_name": t.tool_name,
|
|
1172
|
+
"tool_call_id": t.tool_call_id,
|
|
1173
|
+
"tool_calls": t.tool_calls,
|
|
1174
|
+
}
|
|
1175
|
+
for t in self._transcript.to_turns()
|
|
1176
|
+
if t.role != "system" # system prompt is rebuilt on restore
|
|
1177
|
+
]
|
|
1178
|
+
db = self._db
|
|
1179
|
+
session_id = self._session_id
|
|
1180
|
+
|
|
1181
|
+
def _write() -> None:
|
|
1182
|
+
try:
|
|
1183
|
+
db.memory_save_turns(session_id, turns_snapshot)
|
|
1184
|
+
log.debug(
|
|
1185
|
+
"Checkpointed %d turns for session %s",
|
|
1186
|
+
len(turns_snapshot), session_id,
|
|
1187
|
+
)
|
|
1188
|
+
except Exception as exc: # noqa: BLE001
|
|
1189
|
+
log.warning("Checkpoint failed (non-fatal): %s", exc)
|
|
1190
|
+
|
|
1191
|
+
# Cancel any pending (not-yet-started) future — newer snapshot wins
|
|
1192
|
+
if self._checkpoint_future is not None and not self._checkpoint_future.done():
|
|
1193
|
+
self._checkpoint_future.cancel()
|
|
1194
|
+
self._checkpoint_future = _CHECKPOINT_EXECUTOR.submit(_write)
|
|
1195
|
+
self._last_checkpoint_at = time.monotonic()
|
|
1196
|
+
|
|
1197
|
+
def _flush_checkpoint_sync(self, timeout: float = 10.0) -> None:
|
|
1198
|
+
"""Block until the pending async checkpoint write completes.
|
|
1199
|
+
|
|
1200
|
+
Call on clean exit and at all terminal run() returns to ensure the last
|
|
1201
|
+
transcript snapshot is durable before the process moves on.
|
|
1202
|
+
"""
|
|
1203
|
+
if self._checkpoint_future is not None:
|
|
1204
|
+
try:
|
|
1205
|
+
self._checkpoint_future.result(timeout=timeout)
|
|
1206
|
+
except _cf.TimeoutError:
|
|
1207
|
+
log.warning(
|
|
1208
|
+
"Checkpoint flush timed out after %.1fs — last snapshot may not persist",
|
|
1209
|
+
timeout,
|
|
1210
|
+
)
|
|
1211
|
+
except Exception as exc: # noqa: BLE001
|
|
1212
|
+
log.warning("Checkpoint flush error: %s", exc)
|
|
1213
|
+
finally:
|
|
1214
|
+
self._checkpoint_future = None
|
|
1215
|
+
|
|
1216
|
+
def restore_from_db(self, session_id: str | None = None) -> int:
|
|
1217
|
+
"""Reload checkpointed transcript turns from the DB.
|
|
1218
|
+
|
|
1219
|
+
Called on /resume to recover from a crash. Replaces all non-system
|
|
1220
|
+
turns in the transcript with the last saved checkpoint.
|
|
1221
|
+
|
|
1222
|
+
Args:
|
|
1223
|
+
session_id: Optional session to restore from. Defaults to the
|
|
1224
|
+
current ``self._session_id`` if not given.
|
|
1225
|
+
|
|
1226
|
+
Returns:
|
|
1227
|
+
The number of turns reloaded; 0 if no checkpoint found.
|
|
1228
|
+
"""
|
|
1229
|
+
target_session = session_id or self._session_id
|
|
1230
|
+
if self._db is None or not target_session:
|
|
1231
|
+
return 0
|
|
1232
|
+
try:
|
|
1233
|
+
# 24-hour staleness guard
|
|
1234
|
+
sess_row = self._db.execute_one(
|
|
1235
|
+
"SELECT updated_at FROM sessions WHERE session_id = ?",
|
|
1236
|
+
(target_session,),
|
|
1237
|
+
)
|
|
1238
|
+
if sess_row and sess_row["updated_at"]:
|
|
1239
|
+
from datetime import datetime, timedelta, timezone
|
|
1240
|
+
try:
|
|
1241
|
+
updated_at = datetime.fromisoformat(
|
|
1242
|
+
str(sess_row["updated_at"]).replace("Z", "+00:00")
|
|
1243
|
+
)
|
|
1244
|
+
age = datetime.now(timezone.utc) - updated_at.astimezone(timezone.utc)
|
|
1245
|
+
if age > timedelta(hours=24):
|
|
1246
|
+
log.warning(
|
|
1247
|
+
"⚠ Session %s is over 24 hours old — context may be stale."
|
|
1248
|
+
" Continuing anyway.",
|
|
1249
|
+
target_session[:8],
|
|
1250
|
+
)
|
|
1251
|
+
except (ValueError, AttributeError):
|
|
1252
|
+
pass
|
|
1253
|
+
|
|
1254
|
+
rows = self._db.memory_load_turns(target_session)
|
|
1255
|
+
if not rows:
|
|
1256
|
+
return 0
|
|
1257
|
+
from collections import deque
|
|
1258
|
+
system_turns = [t for t in self._transcript.to_turns() if t.role == "system"]
|
|
1259
|
+
self._transcript._turns = deque(system_turns)
|
|
1260
|
+
self._transcript._total_tokens = sum(t.tokens for t in system_turns)
|
|
1261
|
+
for row in rows:
|
|
1262
|
+
turn = Turn(
|
|
1263
|
+
role=row["role"],
|
|
1264
|
+
content=row.get("content") or "",
|
|
1265
|
+
tokens=row.get("tokens") or 0,
|
|
1266
|
+
tool_name=row.get("tool_name"),
|
|
1267
|
+
tool_call_id=row.get("tool_call_id"),
|
|
1268
|
+
tool_calls=row.get("tool_calls"),
|
|
1269
|
+
)
|
|
1270
|
+
self._transcript.append(turn)
|
|
1271
|
+
self._budget.sync_from_transcript(self._transcript)
|
|
1272
|
+
log.info(
|
|
1273
|
+
"Restored %d turns from DB for session %s",
|
|
1274
|
+
len(rows), target_session,
|
|
1275
|
+
)
|
|
1276
|
+
return len(rows)
|
|
1277
|
+
except Exception as exc: # noqa: BLE001
|
|
1278
|
+
log.warning("restore_from_db failed (non-fatal): %s", exc)
|
|
1279
|
+
return 0
|
|
1280
|
+
|
|
1281
|
+
def _event_log_begin(self, turn_num: int, user_message: str | None = None) -> str | None:
|
|
1282
|
+
"""Insert a session_events row for this turn; return event_id or None."""
|
|
1283
|
+
if self._db is None or not self._session_id:
|
|
1284
|
+
return None
|
|
1285
|
+
try:
|
|
1286
|
+
return self._db.event_log_begin_turn(
|
|
1287
|
+
self._session_id,
|
|
1288
|
+
self._model_turn_count,
|
|
1289
|
+
self._model_id,
|
|
1290
|
+
self._cfg.provider,
|
|
1291
|
+
getattr(self._model, "tier", "coder"),
|
|
1292
|
+
user_message=user_message,
|
|
1293
|
+
)
|
|
1294
|
+
except Exception as exc: # noqa: BLE001
|
|
1295
|
+
log.debug("event_log_begin_turn: %s", exc)
|
|
1296
|
+
return None
|
|
1297
|
+
|
|
1298
|
+
def _run_self_critique(
|
|
1299
|
+
self, turn_num: int
|
|
1300
|
+
) -> Generator[AgentEvent, None, None]:
|
|
1301
|
+
"""Fire a cheap fast-reasoning self-critique before DONE is emitted.
|
|
1302
|
+
|
|
1303
|
+
Uses the Coder model (not Reasoner) to keep cost near zero.
|
|
1304
|
+
The critique is surfaced as a THINKING event so the user can see it
|
|
1305
|
+
in verbose mode without it cluttering the main output.
|
|
1306
|
+
"""
|
|
1307
|
+
if not self._files_written:
|
|
1308
|
+
return
|
|
1309
|
+
|
|
1310
|
+
file_list = ", ".join(str(p) for p in self._files_written[:5])
|
|
1311
|
+
critique_prompt = (
|
|
1312
|
+
f"You just modified: {file_list}.\n"
|
|
1313
|
+
"Briefly critique the changes you made:\n"
|
|
1314
|
+
"- Any logic errors or edge cases missed?\n"
|
|
1315
|
+
"- Any security or correctness concerns?\n"
|
|
1316
|
+
"- Is the code consistent with the surrounding style?\n"
|
|
1317
|
+
"Be concise — 3–5 sentences max."
|
|
1318
|
+
)
|
|
1319
|
+
try:
|
|
1320
|
+
response = self._gdm_client.complete(
|
|
1321
|
+
[
|
|
1322
|
+
*self._transcript.to_messages()[-4:],
|
|
1323
|
+
{"role": "user", "content": critique_prompt},
|
|
1324
|
+
],
|
|
1325
|
+
model=self._model_id,
|
|
1326
|
+
max_tokens=300,
|
|
1327
|
+
)
|
|
1328
|
+
critique = (response.choices[0].message.content or "").strip()
|
|
1329
|
+
if critique:
|
|
1330
|
+
yield AgentEvent(EventType.THINKING, content=f"[self-critique] {critique}",
|
|
1331
|
+
turn=turn_num)
|
|
1332
|
+
except Exception as exc: # noqa: BLE001
|
|
1333
|
+
log.debug("Self-critique skipped: %s", exc)
|
|
1334
|
+
|
|
1335
|
+
def _maybe_review_gate(
|
|
1336
|
+
self, turn_num: int
|
|
1337
|
+
) -> Generator[AgentEvent, None, None]:
|
|
1338
|
+
"""Auto-trigger ReviewGate if written files are security-sensitive."""
|
|
1339
|
+
if not self._files_written:
|
|
1340
|
+
return
|
|
1341
|
+
try:
|
|
1342
|
+
from src.agent.review_gate import ReviewGate, ReviewTrigger
|
|
1343
|
+
trigger = ReviewTrigger()
|
|
1344
|
+
if not trigger.should_review(self._files_written):
|
|
1345
|
+
return
|
|
1346
|
+
reason = trigger.classify(self._files_written)
|
|
1347
|
+
gate = ReviewGate(cfg=self._cfg)
|
|
1348
|
+
# Collect actual diff content (fixes empty-string bug)
|
|
1349
|
+
from src.agent.review_gate import _collect_diff
|
|
1350
|
+
actual_diff = _collect_diff(
|
|
1351
|
+
self._files_written, self._cfg.project_root
|
|
1352
|
+
) or "(diff unavailable)"
|
|
1353
|
+
gate_result = gate.review(
|
|
1354
|
+
files_changed=self._files_written,
|
|
1355
|
+
diff_text=actual_diff,
|
|
1356
|
+
trigger_reason=reason,
|
|
1357
|
+
)
|
|
1358
|
+
if gate_result.blocks_merge:
|
|
1359
|
+
findings_text = "\n".join(
|
|
1360
|
+
f"- [{f.severity.value.upper()}] {f.message}"
|
|
1361
|
+
for f in gate_result.report.findings
|
|
1362
|
+
)
|
|
1363
|
+
yield AgentEvent(
|
|
1364
|
+
EventType.THINKING,
|
|
1365
|
+
content=f"[review-gate] ⚠ Review flagged issues:\n{findings_text}",
|
|
1366
|
+
turn=turn_num,
|
|
1367
|
+
)
|
|
1368
|
+
else:
|
|
1369
|
+
yield AgentEvent(
|
|
1370
|
+
EventType.THINKING,
|
|
1371
|
+
content=f"[review-gate] ✅ Review passed ({reason})",
|
|
1372
|
+
turn=turn_num,
|
|
1373
|
+
)
|
|
1374
|
+
except Exception as exc: # noqa: BLE001
|
|
1375
|
+
log.debug("ReviewGate skipped: %s", exc)
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
# ---------------------------------------------------------------------------
|
|
1379
|
+
# Autonomy audit log (module-level for testability)
|
|
1380
|
+
# ---------------------------------------------------------------------------
|
|
1381
|
+
|
|
1382
|
+
|
|
1383
|
+
def write_autonomy_audit(
|
|
1384
|
+
db_conn: Any,
|
|
1385
|
+
session_id: str,
|
|
1386
|
+
level: int,
|
|
1387
|
+
action: str,
|
|
1388
|
+
details: dict, # type: ignore[type-arg]
|
|
1389
|
+
checkpoint_id: str | None = None,
|
|
1390
|
+
) -> None:
|
|
1391
|
+
"""Write an audit entry for L3+ autonomy actions.
|
|
1392
|
+
|
|
1393
|
+
Silently skips levels below 3 — no-op by design.
|
|
1394
|
+
"""
|
|
1395
|
+
if level < 3:
|
|
1396
|
+
return
|
|
1397
|
+
import json
|
|
1398
|
+
import time
|
|
1399
|
+
|
|
1400
|
+
db_conn.execute(
|
|
1401
|
+
"CREATE TABLE IF NOT EXISTS autonomy_audit "
|
|
1402
|
+
"(session_id TEXT, timestamp REAL, level INTEGER, action TEXT, details TEXT, checkpoint_id TEXT)",
|
|
1403
|
+
)
|
|
1404
|
+
db_conn.execute(
|
|
1405
|
+
"INSERT INTO autonomy_audit "
|
|
1406
|
+
"(session_id, timestamp, level, action, details, checkpoint_id) VALUES (?,?,?,?,?,?)",
|
|
1407
|
+
(session_id, time.time(), level, action, json.dumps(details), checkpoint_id),
|
|
1408
|
+
)
|
|
1409
|
+
db_conn.commit()
|
|
1410
|
+
|