gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
src/models/router.py ADDED
@@ -0,0 +1,418 @@
1
+ """ModelRouter — smart model tier routing based on task complexity.
2
+
3
+ Routes tasks to the appropriate model tier (Scout, Coder, Thinker, Reasoner)
4
+ using keyword analysis, context size, file count, and failure history.
5
+
6
+ Two complementary routing mechanisms:
7
+ 1. route_task(ctx) — overall task complexity → tier (for planning / first call)
8
+ 2. route_tool(name) — per-tool optimal tier (for individual tool executions)
9
+ 3. select_tier_for_turn() — per-turn micro-toggle respecting reasoning_mode flag
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from enum import Enum
15
+ from dataclasses import dataclass, field
16
+
17
+ from src.models.definitions import ModelTier, Provider
18
+
19
+ __all__ = ["TaskContext", "ModelRouter", "FailureType", "EscalationContext", "EscalationDecision", "CircuitBreaker"]
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Keyword sets for complexity scoring
25
+ # ---------------------------------------------------------------------------
26
+
27
+ _HEAVY_KEYWORDS: frozenset[str] = frozenset({
28
+ "architect",
29
+ "design",
30
+ "why does",
31
+ "debug",
32
+ "analyze",
33
+ "performance",
34
+ "security",
35
+ "refactor",
36
+ "migrate",
37
+ "integrate",
38
+ })
39
+
40
+ _MEDIUM_KEYWORDS: frozenset[str] = frozenset({
41
+ "implement",
42
+ "create",
43
+ "build",
44
+ "add feature",
45
+ "fix",
46
+ "update",
47
+ })
48
+
49
+ _LIGHT_KEYWORDS: frozenset[str] = frozenset({
50
+ "rename",
51
+ "comment",
52
+ "format",
53
+ "typo",
54
+ "add test",
55
+ })
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Tool routing sets
59
+ # ---------------------------------------------------------------------------
60
+
61
+ _SCOUT_TOOLS: frozenset[str] = frozenset({
62
+ "read_file",
63
+ "grep",
64
+ "list_dir",
65
+ "find_symbol",
66
+ "git_log",
67
+ "summarize",
68
+ })
69
+
70
+ _CODER_TOOLS: frozenset[str] = frozenset({
71
+ "apply_patch",
72
+ "write_file",
73
+ "write_tests",
74
+ "git_commit",
75
+ })
76
+
77
+ # Tools that score points toward a higher tier (per-turn classifier)
78
+ _WRITE_TOOLS: frozenset[str] = frozenset({
79
+ "apply_patch",
80
+ "write_file",
81
+ "write_tests",
82
+ })
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Scoring constants
86
+ # ---------------------------------------------------------------------------
87
+
88
+ _HEAVY_SCORE: int = 6
89
+ _MEDIUM_SCORE: int = 4
90
+ _LIGHT_SCORE: int = 1
91
+
92
+ _LARGE_CONTEXT_THRESHOLD: int = 60_000
93
+ _LARGE_CONTEXT_SCORE: int = 2
94
+
95
+ _MANY_FILES_THRESHOLD: int = 5
96
+ _MANY_FILES_SCORE: int = 1
97
+
98
+ _FAILURE_SCORE: int = 2
99
+ _MAX_COMPLEXITY_SCORE: int = 10
100
+
101
+ _REASONER_THRESHOLD: int = 8
102
+ _THINKER_THRESHOLD: int = 5
103
+ _CODER_THRESHOLD: int = 2
104
+
105
+ # Per-turn scoring (models-002)
106
+ _TOOL_ONLY_PENALTY: int = -3 # pure tool turn, no text generation
107
+ _WRITE_TOOL_SCORE: int = 4 # apply_patch / write_file / write_tests
108
+ _TEST_FAILURE_SCORE: int = 5 # prompt contains failure output
109
+ _MANY_PATHS_THRESHOLD: int = 3 # N file paths in prompt triggers bonus
110
+ _MANY_PATHS_SCORE: int = 3 # bonus for 3+ file paths
111
+ _FAILURE_ADDITIVE_SCORE: int = 3 # per failed_attempt (additive)
112
+
113
+ # Injection cap: tool-result-only spike → cap at THINKER
114
+ _INJECTION_CAP_TIER: str = ModelTier.THINKER
115
+
116
+ # Cost-budget guard threshold
117
+ _FORCED_REASONER_WARNING_THRESHOLD: int = 5
118
+
119
+ # Escalation ladder — ordered lowest → highest
120
+ _TIER_ORDER: list[str] = [
121
+ ModelTier.SCOUT,
122
+ ModelTier.CODER,
123
+ ModelTier.THINKER,
124
+ ModelTier.REASONER,
125
+ ]
126
+
127
+
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Failure-type enumeration and escalation dataclasses
131
+ # ---------------------------------------------------------------------------
132
+
133
+ class FailureType(str, Enum):
134
+ API_ERROR = "api_error"
135
+ TOOL_ERROR = "tool_error"
136
+ TEST_FAILURE = "test_failure"
137
+ QUALITY_FAILURE = "quality_failure"
138
+ REFUSAL = "refusal"
139
+ CONTEXT_OVERFLOW = "context_overflow"
140
+
141
+
142
+ @dataclass
143
+ class EscalationContext:
144
+ current_tier: str
145
+ failure_type: "FailureType"
146
+ failure_detail: str = ""
147
+ attempt_number: int = 1
148
+ cost_spent_usd: float = 0.0
149
+ transcript_summary: str = ""
150
+
151
+
152
+ @dataclass
153
+ class EscalationDecision:
154
+ next_tier: "str | None"
155
+ retry_prompt_addition: str
156
+ carry_transcript: bool
157
+
158
+
159
+ @dataclass
160
+ class CircuitBreaker:
161
+ max_cost_usd: float = 10.0
162
+ max_escalations: int = 3
163
+ _total_cost_usd: float = field(default=0.0, init=False, repr=False)
164
+ _escalation_count: int = field(default=0, init=False, repr=False)
165
+
166
+ def should_halt(self) -> bool:
167
+ return (self._total_cost_usd >= self.max_cost_usd
168
+ or self._escalation_count >= self.max_escalations)
169
+
170
+ def record_escalation(self, cost_usd: float = 0.0) -> None:
171
+ self._escalation_count += 1
172
+ self._total_cost_usd += cost_usd
173
+
174
+ def halt_reason(self) -> str:
175
+ if self._total_cost_usd >= self.max_cost_usd:
176
+ return f"Cost limit {self._total_cost_usd:.3f}/{self.max_cost_usd:.3f} USD reached"
177
+ return f"escalations {self._escalation_count}/{self.max_escalations} reached"
178
+
179
+ @property
180
+ def total_cost_usd(self) -> float:
181
+ return self._total_cost_usd
182
+
183
+ @property
184
+ def escalation_count(self) -> int:
185
+ return self._escalation_count
186
+
187
+ @dataclass
188
+ class TaskContext:
189
+ """Snapshot of agent state used for routing decisions."""
190
+
191
+ prompt: str
192
+ token_count: int = 0
193
+ files_touched: int = 0
194
+ failed_attempts: int = 0
195
+ provider: str = field(default=Provider.GROK)
196
+
197
+
198
+ class ModelRouter:
199
+ """Routes tasks to the appropriate model tier based on complexity.
200
+
201
+ Two routing levels:
202
+ 1. route_task(ctx) — overall task complexity → model tier string
203
+ 2. route_tool(name) — per-tool optimal tier (Scout for reads, etc.)
204
+ 3. select_tier_for_turn() — per-turn micro-toggle with mode override
205
+
206
+ Escalation is implicit in the complexity score: each failed_attempts
207
+ adds _FAILURE_SCORE points, pushing the task to a higher tier.
208
+ The explicit escalate() method is available for callers that need to
209
+ manually bump one tier (e.g. after a tool execution error).
210
+ """
211
+
212
+ def route_task(self, ctx: TaskContext) -> str:
213
+ """Return ModelTier constant for this task. Never raises."""
214
+ score = self._complexity_score(ctx)
215
+ log.debug(
216
+ "Task complexity score=%d for prompt=%r",
217
+ score,
218
+ ctx.prompt[:80],
219
+ )
220
+ if score >= _REASONER_THRESHOLD:
221
+ return ModelTier.REASONER
222
+ if score >= _THINKER_THRESHOLD:
223
+ return ModelTier.THINKER
224
+ if score >= _CODER_THRESHOLD:
225
+ return ModelTier.CODER
226
+ return ModelTier.SCOUT
227
+
228
+ def route_tool(self, tool_name: str) -> str:
229
+ """Return optimal ModelTier for a specific tool operation."""
230
+ if tool_name in _SCOUT_TOOLS:
231
+ return ModelTier.SCOUT
232
+ if tool_name in _CODER_TOOLS:
233
+ return ModelTier.CODER
234
+ return ModelTier.THINKER
235
+
236
+ def escalate(self, current_tier: str) -> str:
237
+ """Return next tier up from current_tier (caps at REASONER)."""
238
+ try:
239
+ idx = _TIER_ORDER.index(current_tier)
240
+ except ValueError:
241
+ log.warning(
242
+ "Unknown tier %r passed to escalate — defaulting to REASONER",
243
+ current_tier,
244
+ )
245
+ return ModelTier.REASONER
246
+ return _TIER_ORDER[min(idx + 1, len(_TIER_ORDER) - 1)]
247
+
248
+ def select_tier_for_turn(
249
+ self,
250
+ ctx: TaskContext,
251
+ *,
252
+ reasoning_mode: str = "auto",
253
+ tool_names: list[str] | None = None,
254
+ is_tool_result_only: bool = False,
255
+ ) -> str:
256
+ """Return ModelTier constant for a single agent turn.
257
+
258
+ Args:
259
+ ctx: Current task context (prompt, tokens, failures).
260
+ reasoning_mode: "on" | "off" | "auto". "on"/"off" override classifier.
261
+ tool_names: Tools about to be called (or just called). Used in
262
+ the per-turn scoring table.
263
+ is_tool_result_only: True when the prompt text is only a tool result (no
264
+ user input this turn). Enables injection-cap logic.
265
+
266
+ Returns:
267
+ ModelTier constant string. Never raises.
268
+ """
269
+ mode = reasoning_mode.lower() if reasoning_mode else "auto"
270
+
271
+ if mode == "off":
272
+ log.info("gdm.router mode=off → SCOUT (forced)")
273
+ return ModelTier.SCOUT
274
+
275
+ if mode == "on":
276
+ log.info("gdm.router mode=on → REASONER (forced)")
277
+ return ModelTier.REASONER
278
+
279
+ # ── auto mode: deterministic scoring ──────────────────────────────
280
+ score, signals = self._per_turn_score(ctx, tool_names or [])
281
+
282
+ # Determine tier from score
283
+ if score >= _REASONER_THRESHOLD:
284
+ tier = ModelTier.REASONER
285
+ elif score >= _THINKER_THRESHOLD:
286
+ tier = ModelTier.THINKER
287
+ elif score >= _CODER_THRESHOLD:
288
+ tier = ModelTier.CODER
289
+ else:
290
+ tier = ModelTier.SCOUT
291
+
292
+ # Prompt-injection cap: if this turn is tool-result-only and the user's
293
+ # original message scored < 2, cap tier at THINKER.
294
+ user_score = self._keyword_score(ctx.prompt)
295
+ if is_tool_result_only and user_score < _CODER_THRESHOLD and tier == ModelTier.REASONER:
296
+ log.info(
297
+ "gdm.router injection-cap applied: tool-result score=%d → capping at THINKER",
298
+ score,
299
+ )
300
+ tier = _INJECTION_CAP_TIER
301
+
302
+ log.info(
303
+ "gdm.router turn=auto score=%d tier=%s signals=%s",
304
+ score,
305
+ tier,
306
+ signals,
307
+ )
308
+ return tier
309
+
310
+ # ------------------------------------------------------------------
311
+ # Private helpers
312
+ # ------------------------------------------------------------------
313
+
314
+
315
+ def escalate_with_context(self, ctx: "EscalationContext") -> "EscalationDecision":
316
+ ft = ctx.failure_type
317
+ log.warning(
318
+ "gdm.router escalating failure=%s tier=%s attempt=%d cost=%.3f",
319
+ ft.value, ctx.current_tier, ctx.attempt_number, ctx.cost_spent_usd,
320
+ )
321
+ if ft == FailureType.REFUSAL:
322
+ return EscalationDecision(next_tier=None, retry_prompt_addition="", carry_transcript=False)
323
+ if ft == FailureType.API_ERROR:
324
+ return EscalationDecision(
325
+ next_tier=ctx.current_tier,
326
+ retry_prompt_addition=f"Retry: {ctx.failure_detail}",
327
+ carry_transcript=False,
328
+ )
329
+ try:
330
+ idx = _TIER_ORDER.index(ctx.current_tier)
331
+ except ValueError:
332
+ idx = len(_TIER_ORDER) - 1
333
+ next_tier = _TIER_ORDER[min(idx + 1, len(_TIER_ORDER) - 1)]
334
+ carry = ft == FailureType.CONTEXT_OVERFLOW
335
+ if ft == FailureType.TOOL_ERROR:
336
+ prompt = f"Tool error ({ctx.failure_detail}). Retry with a different approach."
337
+ elif ft == FailureType.TEST_FAILURE:
338
+ prompt = f"Fix the failures: {ctx.failure_detail}"
339
+ elif ft == FailureType.QUALITY_FAILURE:
340
+ prompt = f"Quality issue: {ctx.failure_detail}. Improve the solution."
341
+ elif ft == FailureType.CONTEXT_OVERFLOW:
342
+ prompt = f"Summarize the context and continue. {ctx.failure_detail}".strip()
343
+ else:
344
+ prompt = f"Escalating due to {ft.value}: {ctx.failure_detail}"
345
+ return EscalationDecision(next_tier=next_tier, retry_prompt_addition=prompt, carry_transcript=carry)
346
+
347
+ def _per_turn_score(self, ctx: TaskContext, tool_names: list[str]) -> tuple[int, list[str]]:
348
+ """Compute per-turn score and return (score, signals) tuple."""
349
+ score = 0
350
+ signals: list[str] = []
351
+
352
+ # Tool-only turn penalty (only when purely read/scout tools, no write tools)
353
+ has_write_tools = any(t in _WRITE_TOOLS for t in tool_names)
354
+ if tool_names and not ctx.prompt.strip() and not has_write_tools:
355
+ score += _TOOL_ONLY_PENALTY
356
+ signals.append(f"tool_only{_TOOL_ONLY_PENALTY}")
357
+
358
+ # Write tools
359
+ if has_write_tools:
360
+ score += _WRITE_TOOL_SCORE
361
+ signals.append(f"write_tool+{_WRITE_TOOL_SCORE}")
362
+
363
+ # Test failure output
364
+ prompt_lower = ctx.prompt.lower()
365
+ if "error" in prompt_lower and ("traceback" in prompt_lower or "assert" in prompt_lower or "failed" in prompt_lower):
366
+ score += _TEST_FAILURE_SCORE
367
+ signals.append(f"test_failure+{_TEST_FAILURE_SCORE}")
368
+
369
+ # Many file paths (count occurrences of "/" or "\" path separators)
370
+ path_count = prompt_lower.count(".py") + prompt_lower.count(".ts") + prompt_lower.count(".js")
371
+ if path_count >= _MANY_PATHS_THRESHOLD:
372
+ score += _MANY_PATHS_SCORE
373
+ signals.append(f"many_files+{_MANY_PATHS_SCORE}")
374
+
375
+ # Large context
376
+ if ctx.token_count > _LARGE_CONTEXT_THRESHOLD:
377
+ score += _LARGE_CONTEXT_SCORE
378
+ signals.append(f"large_ctx+{_LARGE_CONTEXT_SCORE}")
379
+
380
+ # Failed attempts (additive)
381
+ if ctx.failed_attempts > 0:
382
+ fa_score = ctx.failed_attempts * _FAILURE_ADDITIVE_SCORE
383
+ score += fa_score
384
+ signals.append(f"failures+{fa_score}")
385
+
386
+ # Keywords
387
+ kw_score = self._keyword_score(ctx.prompt)
388
+ if kw_score > 0:
389
+ label = "heavy" if kw_score >= _HEAVY_SCORE else ("medium" if kw_score >= _MEDIUM_SCORE else "light")
390
+ score += kw_score
391
+ signals.append(f"{label}_kw+{kw_score}")
392
+
393
+ return score, signals
394
+
395
+ def _complexity_score(self, ctx: TaskContext) -> int:
396
+ """Score 0-10 based on keywords, context size, file count, and failure count."""
397
+ score = self._keyword_score(ctx.prompt)
398
+ if ctx.token_count > _LARGE_CONTEXT_THRESHOLD:
399
+ score += _LARGE_CONTEXT_SCORE
400
+ if ctx.files_touched > _MANY_FILES_THRESHOLD:
401
+ score += _MANY_FILES_SCORE
402
+ score += ctx.failed_attempts * _FAILURE_SCORE
403
+ return min(score, _MAX_COMPLEXITY_SCORE)
404
+
405
+ def _keyword_score(self, prompt: str) -> int:
406
+ """Return keyword-based sub-score for the prompt text.
407
+
408
+ Checks tier groups in descending order; returns the score for the
409
+ highest-severity keyword found (not additive across tiers).
410
+ """
411
+ prompt_lower = prompt.lower()
412
+ if any(kw in prompt_lower for kw in _HEAVY_KEYWORDS):
413
+ return _HEAVY_SCORE
414
+ if any(kw in prompt_lower for kw in _MEDIUM_KEYWORDS):
415
+ return _MEDIUM_SCORE
416
+ if any(kw in prompt_lower for kw in _LIGHT_KEYWORDS):
417
+ return _LIGHT_SCORE
418
+ return 0