gdmcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdmcode-0.1.0.dist-info/METADATA +240 -0
- gdmcode-0.1.0.dist-info/RECORD +131 -0
- gdmcode-0.1.0.dist-info/WHEEL +4 -0
- gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
- src/__init__.py +1 -0
- src/_internal/__init__.py +0 -0
- src/_internal/constants.py +244 -0
- src/_internal/domain_skills.py +339 -0
- src/agent/__init__.py +0 -0
- src/agent/commit_classifier.py +91 -0
- src/agent/context_budget.py +391 -0
- src/agent/daemon.py +681 -0
- src/agent/dag_validator.py +153 -0
- src/agent/debug_loop.py +473 -0
- src/agent/impact_analyzer.py +149 -0
- src/agent/impact_graph.py +117 -0
- src/agent/loop.py +1410 -0
- src/agent/orchestrator.py +141 -0
- src/agent/regression_guard.py +251 -0
- src/agent/review_gate.py +648 -0
- src/agent/risk_scorer.py +169 -0
- src/agent/self_healing.py +145 -0
- src/agent/smart_test_selector.py +89 -0
- src/agent/system_prompt.py +226 -0
- src/agent/task_tracker.py +320 -0
- src/agent/test_validator.py +210 -0
- src/agent/tool_orchestrator.py +402 -0
- src/agent/transcript.py +230 -0
- src/agent/verification_loop.py +133 -0
- src/agent/work_director.py +136 -0
- src/agent/worktree_manager.py +53 -0
- src/artifacts/__init__.py +16 -0
- src/artifacts/artifact_store.py +456 -0
- src/artifacts/verification_graph.py +75 -0
- src/auth.py +411 -0
- src/cli.py +1290 -0
- src/commands.py +1398 -0
- src/config.py +762 -0
- src/cost_tracker.py +348 -0
- src/db/__init__.py +4 -0
- src/db/migrations.py +337 -0
- src/enterprise/__init__.py +3 -0
- src/enterprise/audit_log.py +182 -0
- src/enterprise/identity.py +90 -0
- src/enterprise/rbac.py +100 -0
- src/enterprise/team_config.py +125 -0
- src/enterprise/usage_analytics.py +261 -0
- src/exceptions.py +207 -0
- src/git_workflow.py +651 -0
- src/integrations/__init__.py +6 -0
- src/integrations/github_actions.py +106 -0
- src/integrations/mcp_server.py +333 -0
- src/integrations/sentry_integration.py +100 -0
- src/integrations/sentry_server.py +82 -0
- src/integrations/webhook_security.py +19 -0
- src/main.py +27 -0
- src/memory/__init__.py +0 -0
- src/memory/code_index.py +376 -0
- src/memory/compressor.py +378 -0
- src/memory/context_memory.py +135 -0
- src/memory/continuous_memory.py +234 -0
- src/memory/conventions.py +495 -0
- src/memory/db.py +1119 -0
- src/memory/document_index.py +205 -0
- src/memory/file_cache.py +128 -0
- src/memory/project_scanner.py +178 -0
- src/memory/session_store.py +201 -0
- src/models/__init__.py +0 -0
- src/models/client.py +715 -0
- src/models/definitions.py +459 -0
- src/models/router.py +418 -0
- src/models/schemas.py +389 -0
- src/permissions.py +294 -0
- src/remote/__init__.py +5 -0
- src/remote/command_filter.py +33 -0
- src/remote/models.py +31 -0
- src/remote/permission_handler.py +79 -0
- src/remote/phone_ui.py +48 -0
- src/remote/protocol.py +59 -0
- src/remote/qr.py +65 -0
- src/remote/server.py +586 -0
- src/remote/token_manager.py +61 -0
- src/remote/tunnel.py +212 -0
- src/repl.py +475 -0
- src/runtime/__init__.py +1 -0
- src/runtime/branch_farm.py +372 -0
- src/runtime/replay.py +351 -0
- src/sandbox/__init__.py +2 -0
- src/sandbox/hermetic.py +214 -0
- src/sandbox/policy.py +44 -0
- src/sdk/__init__.py +3 -0
- src/sdk/plugin_base.py +39 -0
- src/sdk/plugin_host.py +100 -0
- src/sdk/plugin_loader.py +101 -0
- src/security.py +409 -0
- src/server/__init__.py +7 -0
- src/server/bridge.py +427 -0
- src/server/bridge_cli.py +103 -0
- src/server/bridge_client.py +170 -0
- src/server/protocol_version.py +103 -0
- src/session/__init__.py +10 -0
- src/session/event_fanout.py +46 -0
- src/session/input_broker.py +38 -0
- src/session/permission_bridge.py +100 -0
- src/tools/__init__.py +160 -0
- src/tools/_atomic.py +72 -0
- src/tools/agent_tools.py +423 -0
- src/tools/ask_user_tool.py +83 -0
- src/tools/bash_tool.py +384 -0
- src/tools/browser_tool.py +352 -0
- src/tools/browser_tools.py +179 -0
- src/tools/dep_tools.py +210 -0
- src/tools/document_reader.py +167 -0
- src/tools/document_tool.py +240 -0
- src/tools/document_writer.py +171 -0
- src/tools/impact_tools.py +240 -0
- src/tools/playwright_tool.py +172 -0
- src/tools/quality_tools.py +366 -0
- src/tools/read_tools.py +318 -0
- src/tools/result_cache.py +157 -0
- src/tools/search_tools.py +310 -0
- src/tools/shell_tools.py +311 -0
- src/tools/write_tools.py +337 -0
- src/voice/__init__.py +25 -0
- src/voice/audio_capture.py +92 -0
- src/voice/audio_playback.py +68 -0
- src/voice/errors.py +14 -0
- src/voice/models.py +35 -0
- src/voice/providers.py +143 -0
- src/voice/vad.py +55 -0
- src/voice/voice_loop.py +156 -0
src/models/router.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""ModelRouter — smart model tier routing based on task complexity.
|
|
2
|
+
|
|
3
|
+
Routes tasks to the appropriate model tier (Scout, Coder, Thinker, Reasoner)
|
|
4
|
+
using keyword analysis, context size, file count, and failure history.
|
|
5
|
+
|
|
6
|
+
Two complementary routing mechanisms:
|
|
7
|
+
1. route_task(ctx) — overall task complexity → tier (for planning / first call)
|
|
8
|
+
2. route_tool(name) — per-tool optimal tier (for individual tool executions)
|
|
9
|
+
3. select_tier_for_turn() — per-turn micro-toggle respecting reasoning_mode flag
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
|
|
17
|
+
from src.models.definitions import ModelTier, Provider
|
|
18
|
+
|
|
19
|
+
__all__ = ["TaskContext", "ModelRouter", "FailureType", "EscalationContext", "EscalationDecision", "CircuitBreaker"]
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Keyword sets for complexity scoring
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
_HEAVY_KEYWORDS: frozenset[str] = frozenset({
|
|
28
|
+
"architect",
|
|
29
|
+
"design",
|
|
30
|
+
"why does",
|
|
31
|
+
"debug",
|
|
32
|
+
"analyze",
|
|
33
|
+
"performance",
|
|
34
|
+
"security",
|
|
35
|
+
"refactor",
|
|
36
|
+
"migrate",
|
|
37
|
+
"integrate",
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
_MEDIUM_KEYWORDS: frozenset[str] = frozenset({
|
|
41
|
+
"implement",
|
|
42
|
+
"create",
|
|
43
|
+
"build",
|
|
44
|
+
"add feature",
|
|
45
|
+
"fix",
|
|
46
|
+
"update",
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
_LIGHT_KEYWORDS: frozenset[str] = frozenset({
|
|
50
|
+
"rename",
|
|
51
|
+
"comment",
|
|
52
|
+
"format",
|
|
53
|
+
"typo",
|
|
54
|
+
"add test",
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Tool routing sets
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
_SCOUT_TOOLS: frozenset[str] = frozenset({
|
|
62
|
+
"read_file",
|
|
63
|
+
"grep",
|
|
64
|
+
"list_dir",
|
|
65
|
+
"find_symbol",
|
|
66
|
+
"git_log",
|
|
67
|
+
"summarize",
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
_CODER_TOOLS: frozenset[str] = frozenset({
|
|
71
|
+
"apply_patch",
|
|
72
|
+
"write_file",
|
|
73
|
+
"write_tests",
|
|
74
|
+
"git_commit",
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
# Tools that score points toward a higher tier (per-turn classifier)
|
|
78
|
+
_WRITE_TOOLS: frozenset[str] = frozenset({
|
|
79
|
+
"apply_patch",
|
|
80
|
+
"write_file",
|
|
81
|
+
"write_tests",
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Scoring constants
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
_HEAVY_SCORE: int = 6
|
|
89
|
+
_MEDIUM_SCORE: int = 4
|
|
90
|
+
_LIGHT_SCORE: int = 1
|
|
91
|
+
|
|
92
|
+
_LARGE_CONTEXT_THRESHOLD: int = 60_000
|
|
93
|
+
_LARGE_CONTEXT_SCORE: int = 2
|
|
94
|
+
|
|
95
|
+
_MANY_FILES_THRESHOLD: int = 5
|
|
96
|
+
_MANY_FILES_SCORE: int = 1
|
|
97
|
+
|
|
98
|
+
_FAILURE_SCORE: int = 2
|
|
99
|
+
_MAX_COMPLEXITY_SCORE: int = 10
|
|
100
|
+
|
|
101
|
+
_REASONER_THRESHOLD: int = 8
|
|
102
|
+
_THINKER_THRESHOLD: int = 5
|
|
103
|
+
_CODER_THRESHOLD: int = 2
|
|
104
|
+
|
|
105
|
+
# Per-turn scoring (models-002)
|
|
106
|
+
_TOOL_ONLY_PENALTY: int = -3 # pure tool turn, no text generation
|
|
107
|
+
_WRITE_TOOL_SCORE: int = 4 # apply_patch / write_file / write_tests
|
|
108
|
+
_TEST_FAILURE_SCORE: int = 5 # prompt contains failure output
|
|
109
|
+
_MANY_PATHS_THRESHOLD: int = 3 # N file paths in prompt triggers bonus
|
|
110
|
+
_MANY_PATHS_SCORE: int = 3 # bonus for 3+ file paths
|
|
111
|
+
_FAILURE_ADDITIVE_SCORE: int = 3 # per failed_attempt (additive)
|
|
112
|
+
|
|
113
|
+
# Injection cap: tool-result-only spike → cap at THINKER
|
|
114
|
+
_INJECTION_CAP_TIER: str = ModelTier.THINKER
|
|
115
|
+
|
|
116
|
+
# Cost-budget guard threshold
|
|
117
|
+
_FORCED_REASONER_WARNING_THRESHOLD: int = 5
|
|
118
|
+
|
|
119
|
+
# Escalation ladder — ordered lowest → highest
|
|
120
|
+
_TIER_ORDER: list[str] = [
|
|
121
|
+
ModelTier.SCOUT,
|
|
122
|
+
ModelTier.CODER,
|
|
123
|
+
ModelTier.THINKER,
|
|
124
|
+
ModelTier.REASONER,
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Failure-type enumeration and escalation dataclasses
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
class FailureType(str, Enum):
|
|
134
|
+
API_ERROR = "api_error"
|
|
135
|
+
TOOL_ERROR = "tool_error"
|
|
136
|
+
TEST_FAILURE = "test_failure"
|
|
137
|
+
QUALITY_FAILURE = "quality_failure"
|
|
138
|
+
REFUSAL = "refusal"
|
|
139
|
+
CONTEXT_OVERFLOW = "context_overflow"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class EscalationContext:
|
|
144
|
+
current_tier: str
|
|
145
|
+
failure_type: "FailureType"
|
|
146
|
+
failure_detail: str = ""
|
|
147
|
+
attempt_number: int = 1
|
|
148
|
+
cost_spent_usd: float = 0.0
|
|
149
|
+
transcript_summary: str = ""
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@dataclass
|
|
153
|
+
class EscalationDecision:
|
|
154
|
+
next_tier: "str | None"
|
|
155
|
+
retry_prompt_addition: str
|
|
156
|
+
carry_transcript: bool
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dataclass
|
|
160
|
+
class CircuitBreaker:
|
|
161
|
+
max_cost_usd: float = 10.0
|
|
162
|
+
max_escalations: int = 3
|
|
163
|
+
_total_cost_usd: float = field(default=0.0, init=False, repr=False)
|
|
164
|
+
_escalation_count: int = field(default=0, init=False, repr=False)
|
|
165
|
+
|
|
166
|
+
def should_halt(self) -> bool:
|
|
167
|
+
return (self._total_cost_usd >= self.max_cost_usd
|
|
168
|
+
or self._escalation_count >= self.max_escalations)
|
|
169
|
+
|
|
170
|
+
def record_escalation(self, cost_usd: float = 0.0) -> None:
|
|
171
|
+
self._escalation_count += 1
|
|
172
|
+
self._total_cost_usd += cost_usd
|
|
173
|
+
|
|
174
|
+
def halt_reason(self) -> str:
|
|
175
|
+
if self._total_cost_usd >= self.max_cost_usd:
|
|
176
|
+
return f"Cost limit {self._total_cost_usd:.3f}/{self.max_cost_usd:.3f} USD reached"
|
|
177
|
+
return f"escalations {self._escalation_count}/{self.max_escalations} reached"
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def total_cost_usd(self) -> float:
|
|
181
|
+
return self._total_cost_usd
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def escalation_count(self) -> int:
|
|
185
|
+
return self._escalation_count
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class TaskContext:
|
|
189
|
+
"""Snapshot of agent state used for routing decisions."""
|
|
190
|
+
|
|
191
|
+
prompt: str
|
|
192
|
+
token_count: int = 0
|
|
193
|
+
files_touched: int = 0
|
|
194
|
+
failed_attempts: int = 0
|
|
195
|
+
provider: str = field(default=Provider.GROK)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class ModelRouter:
|
|
199
|
+
"""Routes tasks to the appropriate model tier based on complexity.
|
|
200
|
+
|
|
201
|
+
Two routing levels:
|
|
202
|
+
1. route_task(ctx) — overall task complexity → model tier string
|
|
203
|
+
2. route_tool(name) — per-tool optimal tier (Scout for reads, etc.)
|
|
204
|
+
3. select_tier_for_turn() — per-turn micro-toggle with mode override
|
|
205
|
+
|
|
206
|
+
Escalation is implicit in the complexity score: each failed_attempts
|
|
207
|
+
adds _FAILURE_SCORE points, pushing the task to a higher tier.
|
|
208
|
+
The explicit escalate() method is available for callers that need to
|
|
209
|
+
manually bump one tier (e.g. after a tool execution error).
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
def route_task(self, ctx: TaskContext) -> str:
|
|
213
|
+
"""Return ModelTier constant for this task. Never raises."""
|
|
214
|
+
score = self._complexity_score(ctx)
|
|
215
|
+
log.debug(
|
|
216
|
+
"Task complexity score=%d for prompt=%r",
|
|
217
|
+
score,
|
|
218
|
+
ctx.prompt[:80],
|
|
219
|
+
)
|
|
220
|
+
if score >= _REASONER_THRESHOLD:
|
|
221
|
+
return ModelTier.REASONER
|
|
222
|
+
if score >= _THINKER_THRESHOLD:
|
|
223
|
+
return ModelTier.THINKER
|
|
224
|
+
if score >= _CODER_THRESHOLD:
|
|
225
|
+
return ModelTier.CODER
|
|
226
|
+
return ModelTier.SCOUT
|
|
227
|
+
|
|
228
|
+
def route_tool(self, tool_name: str) -> str:
|
|
229
|
+
"""Return optimal ModelTier for a specific tool operation."""
|
|
230
|
+
if tool_name in _SCOUT_TOOLS:
|
|
231
|
+
return ModelTier.SCOUT
|
|
232
|
+
if tool_name in _CODER_TOOLS:
|
|
233
|
+
return ModelTier.CODER
|
|
234
|
+
return ModelTier.THINKER
|
|
235
|
+
|
|
236
|
+
def escalate(self, current_tier: str) -> str:
|
|
237
|
+
"""Return next tier up from current_tier (caps at REASONER)."""
|
|
238
|
+
try:
|
|
239
|
+
idx = _TIER_ORDER.index(current_tier)
|
|
240
|
+
except ValueError:
|
|
241
|
+
log.warning(
|
|
242
|
+
"Unknown tier %r passed to escalate — defaulting to REASONER",
|
|
243
|
+
current_tier,
|
|
244
|
+
)
|
|
245
|
+
return ModelTier.REASONER
|
|
246
|
+
return _TIER_ORDER[min(idx + 1, len(_TIER_ORDER) - 1)]
|
|
247
|
+
|
|
248
|
+
def select_tier_for_turn(
|
|
249
|
+
self,
|
|
250
|
+
ctx: TaskContext,
|
|
251
|
+
*,
|
|
252
|
+
reasoning_mode: str = "auto",
|
|
253
|
+
tool_names: list[str] | None = None,
|
|
254
|
+
is_tool_result_only: bool = False,
|
|
255
|
+
) -> str:
|
|
256
|
+
"""Return ModelTier constant for a single agent turn.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
ctx: Current task context (prompt, tokens, failures).
|
|
260
|
+
reasoning_mode: "on" | "off" | "auto". "on"/"off" override classifier.
|
|
261
|
+
tool_names: Tools about to be called (or just called). Used in
|
|
262
|
+
the per-turn scoring table.
|
|
263
|
+
is_tool_result_only: True when the prompt text is only a tool result (no
|
|
264
|
+
user input this turn). Enables injection-cap logic.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
ModelTier constant string. Never raises.
|
|
268
|
+
"""
|
|
269
|
+
mode = reasoning_mode.lower() if reasoning_mode else "auto"
|
|
270
|
+
|
|
271
|
+
if mode == "off":
|
|
272
|
+
log.info("gdm.router mode=off → SCOUT (forced)")
|
|
273
|
+
return ModelTier.SCOUT
|
|
274
|
+
|
|
275
|
+
if mode == "on":
|
|
276
|
+
log.info("gdm.router mode=on → REASONER (forced)")
|
|
277
|
+
return ModelTier.REASONER
|
|
278
|
+
|
|
279
|
+
# ── auto mode: deterministic scoring ──────────────────────────────
|
|
280
|
+
score, signals = self._per_turn_score(ctx, tool_names or [])
|
|
281
|
+
|
|
282
|
+
# Determine tier from score
|
|
283
|
+
if score >= _REASONER_THRESHOLD:
|
|
284
|
+
tier = ModelTier.REASONER
|
|
285
|
+
elif score >= _THINKER_THRESHOLD:
|
|
286
|
+
tier = ModelTier.THINKER
|
|
287
|
+
elif score >= _CODER_THRESHOLD:
|
|
288
|
+
tier = ModelTier.CODER
|
|
289
|
+
else:
|
|
290
|
+
tier = ModelTier.SCOUT
|
|
291
|
+
|
|
292
|
+
# Prompt-injection cap: if this turn is tool-result-only and the user's
|
|
293
|
+
# original message scored < 2, cap tier at THINKER.
|
|
294
|
+
user_score = self._keyword_score(ctx.prompt)
|
|
295
|
+
if is_tool_result_only and user_score < _CODER_THRESHOLD and tier == ModelTier.REASONER:
|
|
296
|
+
log.info(
|
|
297
|
+
"gdm.router injection-cap applied: tool-result score=%d → capping at THINKER",
|
|
298
|
+
score,
|
|
299
|
+
)
|
|
300
|
+
tier = _INJECTION_CAP_TIER
|
|
301
|
+
|
|
302
|
+
log.info(
|
|
303
|
+
"gdm.router turn=auto score=%d tier=%s signals=%s",
|
|
304
|
+
score,
|
|
305
|
+
tier,
|
|
306
|
+
signals,
|
|
307
|
+
)
|
|
308
|
+
return tier
|
|
309
|
+
|
|
310
|
+
# ------------------------------------------------------------------
|
|
311
|
+
# Private helpers
|
|
312
|
+
# ------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def escalate_with_context(self, ctx: "EscalationContext") -> "EscalationDecision":
|
|
316
|
+
ft = ctx.failure_type
|
|
317
|
+
log.warning(
|
|
318
|
+
"gdm.router escalating failure=%s tier=%s attempt=%d cost=%.3f",
|
|
319
|
+
ft.value, ctx.current_tier, ctx.attempt_number, ctx.cost_spent_usd,
|
|
320
|
+
)
|
|
321
|
+
if ft == FailureType.REFUSAL:
|
|
322
|
+
return EscalationDecision(next_tier=None, retry_prompt_addition="", carry_transcript=False)
|
|
323
|
+
if ft == FailureType.API_ERROR:
|
|
324
|
+
return EscalationDecision(
|
|
325
|
+
next_tier=ctx.current_tier,
|
|
326
|
+
retry_prompt_addition=f"Retry: {ctx.failure_detail}",
|
|
327
|
+
carry_transcript=False,
|
|
328
|
+
)
|
|
329
|
+
try:
|
|
330
|
+
idx = _TIER_ORDER.index(ctx.current_tier)
|
|
331
|
+
except ValueError:
|
|
332
|
+
idx = len(_TIER_ORDER) - 1
|
|
333
|
+
next_tier = _TIER_ORDER[min(idx + 1, len(_TIER_ORDER) - 1)]
|
|
334
|
+
carry = ft == FailureType.CONTEXT_OVERFLOW
|
|
335
|
+
if ft == FailureType.TOOL_ERROR:
|
|
336
|
+
prompt = f"Tool error ({ctx.failure_detail}). Retry with a different approach."
|
|
337
|
+
elif ft == FailureType.TEST_FAILURE:
|
|
338
|
+
prompt = f"Fix the failures: {ctx.failure_detail}"
|
|
339
|
+
elif ft == FailureType.QUALITY_FAILURE:
|
|
340
|
+
prompt = f"Quality issue: {ctx.failure_detail}. Improve the solution."
|
|
341
|
+
elif ft == FailureType.CONTEXT_OVERFLOW:
|
|
342
|
+
prompt = f"Summarize the context and continue. {ctx.failure_detail}".strip()
|
|
343
|
+
else:
|
|
344
|
+
prompt = f"Escalating due to {ft.value}: {ctx.failure_detail}"
|
|
345
|
+
return EscalationDecision(next_tier=next_tier, retry_prompt_addition=prompt, carry_transcript=carry)
|
|
346
|
+
|
|
347
|
+
def _per_turn_score(self, ctx: TaskContext, tool_names: list[str]) -> tuple[int, list[str]]:
|
|
348
|
+
"""Compute per-turn score and return (score, signals) tuple."""
|
|
349
|
+
score = 0
|
|
350
|
+
signals: list[str] = []
|
|
351
|
+
|
|
352
|
+
# Tool-only turn penalty (only when purely read/scout tools, no write tools)
|
|
353
|
+
has_write_tools = any(t in _WRITE_TOOLS for t in tool_names)
|
|
354
|
+
if tool_names and not ctx.prompt.strip() and not has_write_tools:
|
|
355
|
+
score += _TOOL_ONLY_PENALTY
|
|
356
|
+
signals.append(f"tool_only{_TOOL_ONLY_PENALTY}")
|
|
357
|
+
|
|
358
|
+
# Write tools
|
|
359
|
+
if has_write_tools:
|
|
360
|
+
score += _WRITE_TOOL_SCORE
|
|
361
|
+
signals.append(f"write_tool+{_WRITE_TOOL_SCORE}")
|
|
362
|
+
|
|
363
|
+
# Test failure output
|
|
364
|
+
prompt_lower = ctx.prompt.lower()
|
|
365
|
+
if "error" in prompt_lower and ("traceback" in prompt_lower or "assert" in prompt_lower or "failed" in prompt_lower):
|
|
366
|
+
score += _TEST_FAILURE_SCORE
|
|
367
|
+
signals.append(f"test_failure+{_TEST_FAILURE_SCORE}")
|
|
368
|
+
|
|
369
|
+
# Many file paths (count occurrences of "/" or "\" path separators)
|
|
370
|
+
path_count = prompt_lower.count(".py") + prompt_lower.count(".ts") + prompt_lower.count(".js")
|
|
371
|
+
if path_count >= _MANY_PATHS_THRESHOLD:
|
|
372
|
+
score += _MANY_PATHS_SCORE
|
|
373
|
+
signals.append(f"many_files+{_MANY_PATHS_SCORE}")
|
|
374
|
+
|
|
375
|
+
# Large context
|
|
376
|
+
if ctx.token_count > _LARGE_CONTEXT_THRESHOLD:
|
|
377
|
+
score += _LARGE_CONTEXT_SCORE
|
|
378
|
+
signals.append(f"large_ctx+{_LARGE_CONTEXT_SCORE}")
|
|
379
|
+
|
|
380
|
+
# Failed attempts (additive)
|
|
381
|
+
if ctx.failed_attempts > 0:
|
|
382
|
+
fa_score = ctx.failed_attempts * _FAILURE_ADDITIVE_SCORE
|
|
383
|
+
score += fa_score
|
|
384
|
+
signals.append(f"failures+{fa_score}")
|
|
385
|
+
|
|
386
|
+
# Keywords
|
|
387
|
+
kw_score = self._keyword_score(ctx.prompt)
|
|
388
|
+
if kw_score > 0:
|
|
389
|
+
label = "heavy" if kw_score >= _HEAVY_SCORE else ("medium" if kw_score >= _MEDIUM_SCORE else "light")
|
|
390
|
+
score += kw_score
|
|
391
|
+
signals.append(f"{label}_kw+{kw_score}")
|
|
392
|
+
|
|
393
|
+
return score, signals
|
|
394
|
+
|
|
395
|
+
def _complexity_score(self, ctx: TaskContext) -> int:
|
|
396
|
+
"""Score 0-10 based on keywords, context size, file count, and failure count."""
|
|
397
|
+
score = self._keyword_score(ctx.prompt)
|
|
398
|
+
if ctx.token_count > _LARGE_CONTEXT_THRESHOLD:
|
|
399
|
+
score += _LARGE_CONTEXT_SCORE
|
|
400
|
+
if ctx.files_touched > _MANY_FILES_THRESHOLD:
|
|
401
|
+
score += _MANY_FILES_SCORE
|
|
402
|
+
score += ctx.failed_attempts * _FAILURE_SCORE
|
|
403
|
+
return min(score, _MAX_COMPLEXITY_SCORE)
|
|
404
|
+
|
|
405
|
+
def _keyword_score(self, prompt: str) -> int:
|
|
406
|
+
"""Return keyword-based sub-score for the prompt text.
|
|
407
|
+
|
|
408
|
+
Checks tier groups in descending order; returns the score for the
|
|
409
|
+
highest-severity keyword found (not additive across tiers).
|
|
410
|
+
"""
|
|
411
|
+
prompt_lower = prompt.lower()
|
|
412
|
+
if any(kw in prompt_lower for kw in _HEAVY_KEYWORDS):
|
|
413
|
+
return _HEAVY_SCORE
|
|
414
|
+
if any(kw in prompt_lower for kw in _MEDIUM_KEYWORDS):
|
|
415
|
+
return _MEDIUM_SCORE
|
|
416
|
+
if any(kw in prompt_lower for kw in _LIGHT_KEYWORDS):
|
|
417
|
+
return _LIGHT_SCORE
|
|
418
|
+
return 0
|