@0dai-dev/cli 4.3.6 → 4.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +12 -11
  2. package/bin/0dai.js +127 -30
  3. package/lib/ai/manifest/mcp-exposure-contract.json +121 -0
  4. package/lib/ai/meta/manifest/mcp-tool-tiers.json +435 -0
  5. package/lib/ai/registry/mcp-catalog.json +98 -0
  6. package/lib/commands/auth.js +2 -1
  7. package/lib/commands/compliance.js +1 -1
  8. package/lib/commands/doctor.js +506 -12
  9. package/lib/commands/experience.js +40 -5
  10. package/lib/commands/feedback.js +157 -15
  11. package/lib/commands/gh.js +26 -0
  12. package/lib/commands/graph.js +9 -4
  13. package/lib/commands/heatmap.js +1 -1
  14. package/lib/commands/init.js +209 -27
  15. package/lib/commands/mcp.js +111 -33
  16. package/lib/commands/models.js +138 -41
  17. package/lib/commands/provider.js +30 -59
  18. package/lib/commands/quota.js +1 -1
  19. package/lib/commands/receipt.js +1 -1
  20. package/lib/commands/run.js +14 -6
  21. package/lib/commands/runner.js +31 -1
  22. package/lib/commands/status.js +38 -10
  23. package/lib/commands/swarm.js +130 -12
  24. package/lib/commands/update.js +184 -38
  25. package/lib/commands/usage.js +1 -1
  26. package/lib/commands/validate.js +32 -3
  27. package/lib/commands/vault.js +43 -8
  28. package/lib/python/__init__.py +0 -0
  29. package/lib/python/agent_quotas.py +525 -0
  30. package/lib/python/anomaly_alert.py +397 -0
  31. package/lib/python/anti_pattern_detector.py +799 -0
  32. package/lib/python/auth.py +443 -0
  33. package/lib/python/capi_profile_guard.py +477 -0
  34. package/lib/python/compliance_report.py +581 -0
  35. package/lib/python/drift_detector.py +388 -0
  36. package/lib/python/experience_pipeline.py +1130 -0
  37. package/lib/python/graph.py +19 -0
  38. package/lib/python/graph_core.py +293 -0
  39. package/lib/python/graph_io.py +179 -0
  40. package/lib/python/graph_legacy.py +2052 -0
  41. package/lib/python/graph_legacy_helpers.py +221 -0
  42. package/lib/python/graph_outcomes_core.py +85 -0
  43. package/lib/python/graph_queries.py +171 -0
  44. package/lib/python/graph_slice.py +198 -0
  45. package/lib/python/graph_slicer.py +576 -0
  46. package/lib/python/graph_slicer_cli.py +60 -0
  47. package/lib/python/graph_validation.py +64 -0
  48. package/lib/python/heatmap.py +934 -0
  49. package/lib/python/json_utils.py +193 -0
  50. package/lib/python/mcp_exposure_check.py +247 -0
  51. package/lib/python/model_router.py +1434 -0
  52. package/lib/python/project_manager.py +621 -0
  53. package/lib/python/provider_profiles.py +1618 -0
  54. package/lib/python/provider_registry.py +1211 -0
  55. package/lib/python/provider_registry_cli.py +125 -0
  56. package/lib/python/receipt_png.py +727 -0
  57. package/lib/python/structural_memory.py +325 -0
  58. package/lib/python/swarm_cost.py +177 -0
  59. package/lib/python/usage_ledger.py +569 -0
  60. package/lib/scripts/mcp_tier_config.py +240 -0
  61. package/lib/shared.js +95 -12
  62. package/lib/tui/index.mjs +35174 -0
  63. package/lib/utils/activation_telemetry.js +1 -4
  64. package/lib/utils/constants.js +7 -1
  65. package/lib/utils/identity.js +184 -0
  66. package/lib/utils/mcp-auth.js +81 -15
  67. package/lib/utils/plan.js +1 -1
  68. package/lib/vault/index.js +19 -3
  69. package/lib/vault/storage.js +21 -2
  70. package/lib/wizard.js +5 -2
  71. package/package.json +9 -3
  72. package/scripts/build-python-bundle.js +106 -0
  73. package/scripts/build-tui.js +14 -1
  74. package/scripts/harvest_experience.py +523 -0
  75. package/scripts/postinstall.js +15 -9
@@ -0,0 +1,1434 @@
1
+ #!/usr/bin/env python3
2
+ """Model routing recommendations from experience data.
3
+
4
+ Analyzes historical events to recommend the best agent/model for a given
5
+ task type, using a weighted composite score across quality, success rate,
6
+ cost efficiency, and speed.
7
+
8
+ Issue: #86
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import dataclasses
13
+ import json
14
+ import logging
15
+ import math
16
+ import os
17
+ import pathlib
18
+ import sys
19
+ from typing import Any
20
+
21
+ log = logging.getLogger("0dai.model_router")
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Task-type specific weights (must sum to 1.0)
25
+ # ---------------------------------------------------------------------------
26
+
27
+ TASK_WEIGHTS: dict[str, dict[str, float]] = {
28
+ "feat": {"quality": 0.35, "success": 0.30, "cost": 0.20, "speed": 0.15},
29
+ "fix": {"quality": 0.30, "success": 0.40, "cost": 0.15, "speed": 0.15},
30
+ "refactor": {"quality": 0.45, "success": 0.30, "cost": 0.15, "speed": 0.10},
31
+ "test": {"quality": 0.20, "success": 0.35, "cost": 0.30, "speed": 0.15},
32
+ "docs": {"quality": 0.15, "success": 0.25, "cost": 0.35, "speed": 0.25},
33
+ "triage": {"quality": 0.15, "success": 0.30, "cost": 0.35, "speed": 0.20},
34
+ "long_context_audit": {"quality": 0.35, "success": 0.30, "cost": 0.15, "speed": 0.20},
35
+ "long_context_audit_ru": {"quality": 0.35, "success": 0.30, "cost": 0.15, "speed": 0.20},
36
+ "cheap_triage": {"quality": 0.15, "success": 0.30, "cost": 0.35, "speed": 0.20},
37
+ "cheap_triage_ru": {"quality": 0.15, "success": 0.30, "cost": 0.35, "speed": 0.20},
38
+ "doc_generation": {"quality": 0.15, "success": 0.25, "cost": 0.35, "speed": 0.25},
39
+ "hotfix": {"quality": 0.30, "success": 0.40, "cost": 0.15, "speed": 0.15},
40
+ "design": {"quality": 0.45, "success": 0.30, "cost": 0.10, "speed": 0.15},
41
+ }
42
+
43
+ DEFAULT_WEIGHTS = {"quality": 0.30, "success": 0.30, "cost": 0.25, "speed": 0.15}
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Task-type provider matrix (#2198 Phase 1)
47
+ #
48
+ # Used for cold-start routing before enough experience data exists. Providers
49
+ # here are dispatch providers, not necessarily CLI binary names. Existing
50
+ # Codex/Claude paths stay present for code-heavy and reasoning-heavy work while
51
+ # cheap/high-context direct APIs absorb mechanical and volume-heavy tasks.
52
+ # ---------------------------------------------------------------------------
53
+
54
+ TASK_PROVIDER_MATRIX: dict[str, list[dict[str, str]]] = {
55
+ "read": [
56
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
57
+ {"provider": "gemini-direct", "model": "gemini-2.5-flash-lite", "agent": "gemini", "tier": "fast", "effort": "low", "billing_class": "subscription"},
58
+ ],
59
+ "search": [
60
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
61
+ {"provider": "gemini-direct", "model": "gemini-2.5-flash-lite", "agent": "gemini", "tier": "fast", "effort": "low", "billing_class": "subscription"},
62
+ ],
63
+ "summary": [
64
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
65
+ {"provider": "gemini-direct", "model": "gemini-2.5-flash-lite", "agent": "gemini", "tier": "fast", "effort": "low", "billing_class": "subscription"},
66
+ ],
67
+ "test_output": [
68
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
69
+ {"provider": "gemini-direct", "model": "gemini-2.5-flash-lite", "agent": "gemini", "tier": "fast", "effort": "low", "billing_class": "subscription"},
70
+ ],
71
+ "format": [
72
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
73
+ ],
74
+ "small_fix": [
75
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "medium", "billing_class": "subscription"},
76
+ {"provider": "opencode-go", "model": "opencode-go/minimax-m2.7", "agent": "opencode-go", "tier": "fast", "effort": "medium", "billing_class": "subscription"},
77
+ ],
78
+ "triage": [
79
+ {"provider": "deepseek", "model": "deepseek-chat", "agent": "deepseek", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
80
+ {"provider": "openrouter", "model": "qwen/qwen3-coder", "agent": "openrouter", "tier": "fast", "effort": "low", "billing_class": "pay_per_use"},
81
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "low", "billing_class": "subscription"},
82
+ ],
83
+ "long_context_audit": [
84
+ {"provider": "kimi", "model": "moonshotai/kimi-k2.5", "agent": "gemini", "tier": "deep", "effort": "high", "billing_class": "pay_per_use"},
85
+ {"provider": "gemini-direct", "model": "gemini-2.5-pro", "agent": "gemini", "tier": "deep", "effort": "high", "billing_class": "subscription"},
86
+ ],
87
+ "long_context_audit_ru": [
88
+ {"provider": "yandexgpt", "model": "yandexgpt", "agent": "gemini", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
89
+ {"provider": "gemini-direct", "model": "gemini-2.5-pro", "agent": "gemini", "tier": "deep", "effort": "high", "billing_class": "subscription"},
90
+ ],
91
+ "cheap_triage_ru": [
92
+ {"provider": "gigachat", "model": "GigaChat-2-Pro", "agent": "deepseek", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
93
+ {"provider": "deepseek", "model": "deepseek-chat", "agent": "deepseek", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
94
+ ],
95
+ "cheap_triage": [
96
+ {"provider": "deepseek", "model": "deepseek-chat", "agent": "deepseek", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
97
+ {"provider": "openrouter", "model": "qwen/qwen3-coder", "agent": "openrouter", "tier": "fast", "effort": "low", "billing_class": "pay_per_use"},
98
+ {"provider": "glm", "model": "accounts/fireworks/models/glm-5p1", "agent": "deepseek", "tier": "balanced", "effort": "medium", "billing_class": "pay_per_use"},
99
+ ],
100
+ "legacy_long_context_audit": [
101
+ {"provider": "gemini-direct", "model": "gemini-2.5-pro", "agent": "gemini", "tier": "deep", "effort": "high", "billing_class": "subscription"},
102
+ {"provider": "claude-opus", "model": "opus", "agent": "claude", "tier": "deep", "effort": "high", "billing_class": "subscription"},
103
+ ],
104
+ "refactor": [
105
+ {"provider": "codex", "model": "gpt-5.4", "agent": "codex", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
106
+ {"provider": "opencode-go", "model": "opencode-go/kimi-k2.6", "agent": "opencode-go", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
107
+ ],
108
+ "broad_refactor": [
109
+ {"provider": "openrouter", "model": "anthropic/claude-opus-4-8", "agent": "openrouter", "tier": "deep", "effort": "high", "billing_class": "pay_per_use"},
110
+ {"provider": "codex", "model": "gpt-5.3-codex", "agent": "codex", "tier": "deep", "effort": "high", "billing_class": "subscription"},
111
+ {"provider": "gemini-direct", "model": "gemini-2.5-pro", "agent": "gemini", "tier": "deep", "effort": "high", "billing_class": "subscription"},
112
+ ],
113
+ "design": [
114
+ {"provider": "openrouter", "model": "anthropic/claude-opus-4-8", "agent": "openrouter", "tier": "deep", "effort": "high", "billing_class": "pay_per_use"},
115
+ {"provider": "claude-opus", "model": "opus", "agent": "claude", "tier": "deep", "effort": "high", "billing_class": "subscription"},
116
+ {"provider": "claude-sonnet", "model": "sonnet", "agent": "claude", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
117
+ ],
118
+ "doc_generation": [
119
+ {"provider": "codex", "model": "gpt-5.4-mini", "agent": "codex", "tier": "fast", "effort": "medium", "billing_class": "subscription"},
120
+ {"provider": "gemini-direct", "model": "gemini-2.5-flash-lite", "agent": "gemini", "tier": "fast", "effort": "low", "billing_class": "subscription"},
121
+ ],
122
+ "hotfix": [
123
+ {"provider": "codex", "model": "gpt-5.4", "agent": "codex", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
124
+ {"provider": "claude-sonnet", "model": "sonnet", "agent": "claude", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
125
+ ],
126
+ "review": [
127
+ {"provider": "codex", "model": "gpt-5.4", "agent": "codex", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
128
+ {"provider": "gemini-direct", "model": "gemini-2.5-pro", "agent": "gemini", "tier": "balanced", "effort": "medium", "billing_class": "subscription"},
129
+ # #3770: cheap + premium OpenRouter options as later-tier fallbacks for
130
+ # the review route. Primary stays codex/gpt-5.4 so existing guardrails
131
+ # and budgets are unchanged.
132
+ {"provider": "openrouter", "model": "qwen/qwen3-coder", "agent": "openrouter", "tier": "fast", "effort": "low", "billing_class": "pay_per_use"},
133
+ {"provider": "openrouter", "model": "anthropic/claude-opus-4-8", "agent": "openrouter", "tier": "deep", "effort": "high", "billing_class": "pay_per_use"},
134
+ ],
135
+ "security": [
136
+ {"provider": "openrouter", "model": "anthropic/claude-opus-4-8", "agent": "openrouter", "tier": "deep", "effort": "high", "billing_class": "pay_per_use"},
137
+ {"provider": "claude-opus", "model": "opus", "agent": "claude", "tier": "deep", "effort": "high", "billing_class": "subscription"},
138
+ {"provider": "codex", "model": "gpt-5.3-codex", "agent": "codex", "tier": "deep", "effort": "high", "billing_class": "subscription"},
139
+ ],
140
+ }
141
+
142
+ TASK_PROVIDER_ALIASES: dict[str, str] = {
143
+ "": "refactor",
144
+ "general": "refactor",
145
+ "feat": "refactor",
146
+ "feature": "refactor",
147
+ "implement": "refactor",
148
+ "implementation": "refactor",
149
+ "fix": "hotfix",
150
+ "bugfix": "hotfix",
151
+ "small_fix": "small_fix",
152
+ "small-fix": "small_fix",
153
+ "docs": "doc_generation",
154
+ "doc": "doc_generation",
155
+ "documentation": "doc_generation",
156
+ "docs_typo": "doc_generation",
157
+ "docs-typo": "doc_generation",
158
+ "docs_read": "read",
159
+ "docs-read": "read",
160
+ "read": "read",
161
+ "reading": "read",
162
+ "simple": "read",
163
+ "simple_read": "read",
164
+ "simple-read": "read",
165
+ "search": "search",
166
+ "grep": "search",
167
+ "summary": "summary",
168
+ "summarize": "summary",
169
+ "test": "test_output",
170
+ "tests": "test_output",
171
+ "test_output": "test_output",
172
+ "test-output": "test_output",
173
+ "format": "format",
174
+ "formatting": "format",
175
+ "review": "review",
176
+ "code_review": "review",
177
+ "code-review": "review",
178
+ "security": "security",
179
+ "security_review": "security",
180
+ "security-review": "security",
181
+ "broad_refactor": "broad_refactor",
182
+ "broad-refactor": "broad_refactor",
183
+ "root_cause": "broad_refactor",
184
+ "root-cause": "broad_refactor",
185
+ "audit": "long_context_audit",
186
+ "long-context-audit": "long_context_audit",
187
+ "long_context": "long_context_audit",
188
+ "long_context_audit_en": "long_context_audit",
189
+ "long-context-audit-en": "long_context_audit",
190
+ "long_context_en": "long_context_audit",
191
+ "long_context_audit_ru": "long_context_audit_ru",
192
+ "long-context-audit-ru": "long_context_audit_ru",
193
+ "long_context_ru": "long_context_audit_ru",
194
+ "ru_audit": "long_context_audit_ru",
195
+ "audit_ru": "long_context_audit_ru",
196
+ "cheap_triage": "cheap_triage",
197
+ "cheap-triage": "cheap_triage",
198
+ "cheap_triage_ru": "cheap_triage_ru",
199
+ "cheap-triage-ru": "cheap_triage_ru",
200
+ "triage_ru": "cheap_triage_ru",
201
+ "ru_triage": "cheap_triage_ru",
202
+ }
203
+
204
+ PROVIDER_ALIASES: dict[str, str] = {
205
+ "gemini": "gemini-direct",
206
+ "gemini-direct": "gemini-direct",
207
+ "gemini-1.5": "gemini-direct",
208
+ "deepseek": "deepseek",
209
+ "codex": "codex",
210
+ "claude-opus": "claude-opus",
211
+ "opus": "claude-opus",
212
+ "claude-sonnet": "claude-sonnet",
213
+ "sonnet": "claude-sonnet",
214
+ "gigachat": "gigachat",
215
+ "giga-chat": "gigachat",
216
+ "sber": "gigachat",
217
+ "sberbank": "gigachat",
218
+ "yandexgpt": "yandexgpt",
219
+ "yandex-gpt": "yandexgpt",
220
+ "yandex": "yandexgpt",
221
+ "ya-gpt": "yandexgpt",
222
+ "glm": "glm",
223
+ "glm-5": "glm",
224
+ "glm-5.1": "glm",
225
+ "glm-5p1": "glm",
226
+ "zhipu": "glm",
227
+ "z-ai": "glm",
228
+ "kimi": "kimi",
229
+ "kimi-200k": "kimi",
230
+ "kimi-k2.5": "kimi",
231
+ "moonshot": "kimi",
232
+ "opencode-go": "opencode-go",
233
+ # OpenRouter unified gateway — review-bot roster (#3770). These aliases
234
+ # resolve provider-ONLY: every value is "openrouter", i.e. they select the
235
+ # gateway, not a specific model. The bare "qwen" alias is therefore NOT
236
+ # ambiguous between qwen3-coder and qwen3-max — it never picks a model. The
237
+ # concrete slug (qwen/qwen3-coder vs qwen/qwen3-max) is supplied separately
238
+ # (model_override / the review-bot chain) and passed straight through to
239
+ # OpenRouter. The qwen3-coder / qwen3-max aliases below likewise only route
240
+ # to the gateway; they exist so an operator typing either resolves cleanly.
241
+ "openrouter": "openrouter",
242
+ "open-router": "openrouter",
243
+ "or": "openrouter",
244
+ "qwen": "openrouter",
245
+ "qwen3-coder": "openrouter",
246
+ "qwen3-max": "openrouter",
247
+ "opus-4-8": "openrouter",
248
+ "claude-opus-4-8": "openrouter",
249
+ }
250
+
251
+ PROVIDER_BACKENDS: dict[str, dict[str, Any]] = {
252
+ "gigachat": {
253
+ "vendor": "Sber",
254
+ "endpoint": "https://gigachat.devices.sberbank.ru/api/v1/chat/completions",
255
+ "default_model": "GigaChat-2-Pro",
256
+ "auth_env": "GIGACHAT_AUTH_KEY",
257
+ "secret_path": str(pathlib.Path.home() / ".config" / "secrets" / "gigachat.env"),
258
+ "transport": "openai-chat-rest",
259
+ "context_tokens": "32k",
260
+ },
261
+ "yandexgpt": {
262
+ "vendor": "Yandex Cloud",
263
+ "endpoint": "https://llm.api.cloud.yandex.net/foundationModels/v1/completion",
264
+ "default_model": "yandexgpt",
265
+ "auth_env": "YANDEX_API_KEY",
266
+ "secondary_auth_env": "YANDEX_FOLDER_ID",
267
+ "secret_path": str(pathlib.Path.home() / ".config" / "secrets" / "yandex.env"),
268
+ "transport": "yandex-completion-rest",
269
+ "context_tokens": "32k",
270
+ },
271
+ "glm": {
272
+ "vendor": "Z.ai",
273
+ "endpoint": "https://api.fireworks.ai/inference/v1/chat/completions",
274
+ "fallback_endpoint": "https://openrouter.ai/api/v1/chat/completions",
275
+ "default_model": "accounts/fireworks/models/glm-5p1",
276
+ "fallback_model": "z-ai/glm-5.1",
277
+ "auth_env": "FIREWORKS_API_KEY",
278
+ "fallback_auth_env": "OPENROUTER_API_KEY",
279
+ "secret_path": str(pathlib.Path.home() / ".config" / "secrets" / "fireworks.env"),
280
+ "fallback_secret_path": str(pathlib.Path.home() / ".config" / "secrets" / "openrouter.env"),
281
+ "transport": "openai-chat-rest",
282
+ "context_tokens": "202k",
283
+ },
284
+ "kimi": {
285
+ "vendor": "Moonshot",
286
+ "endpoint": "https://openrouter.ai/api/v1/chat/completions",
287
+ "default_model": "moonshotai/kimi-k2.5",
288
+ "auth_env": "OPENROUTER_API_KEY",
289
+ "secret_path": str(pathlib.Path.home() / ".config" / "secrets" / "openrouter.env"),
290
+ "transport": "openai-chat-rest",
291
+ "context_tokens": "262k",
292
+ },
293
+ }
294
+
295
+ PROVIDER_CREDENTIALS: dict[str, dict[str, Any]] = {
296
+ "gigachat": {
297
+ "required": ("GIGACHAT_AUTH_KEY",),
298
+ "secret_paths": (pathlib.Path.home() / ".config" / "secrets" / "gigachat.env",),
299
+ },
300
+ "yandexgpt": {
301
+ "required": ("YANDEX_API_KEY", "YANDEX_FOLDER_ID"),
302
+ "secret_paths": (pathlib.Path.home() / ".config" / "secrets" / "yandex.env",),
303
+ },
304
+ "glm": {
305
+ "any_of": (("FIREWORKS_API_KEY",), ("OPENROUTER_API_KEY",)),
306
+ "secret_paths": (
307
+ pathlib.Path.home() / ".config" / "secrets" / "fireworks.env",
308
+ pathlib.Path.home() / ".config" / "secrets" / "openrouter.env",
309
+ ),
310
+ },
311
+ "kimi": {
312
+ "required": ("OPENROUTER_API_KEY",),
313
+ "secret_paths": (pathlib.Path.home() / ".config" / "secrets" / "openrouter.env",),
314
+ },
315
+ }
316
+
317
+ PROVIDER_AGENT: dict[str, str] = {
318
+ route["provider"]: route["agent"]
319
+ for routes in TASK_PROVIDER_MATRIX.values()
320
+ for route in routes
321
+ }
322
+
323
+ PROVIDER_DEFAULT_MODEL: dict[str, str] = {
324
+ route["provider"]: route["model"]
325
+ for routes in TASK_PROVIDER_MATRIX.values()
326
+ for route in routes
327
+ }
328
+
329
+
330
+ def normalize_task_type(task_type: str) -> str:
331
+ """Return the canonical matrix task type, falling back to refactor/codex."""
332
+ key = str(task_type or "").strip().lower().replace("-", "_")
333
+ return TASK_PROVIDER_ALIASES.get(key, key if key in TASK_PROVIDER_MATRIX else "refactor")
334
+
335
+
336
+ def normalize_provider(provider: str) -> str:
337
+ """Normalize a provider override or raise ValueError for unsupported input."""
338
+ key = str(provider or "").strip().lower().replace("_", "-")
339
+ if not key:
340
+ raise ValueError("provider override is empty")
341
+ canonical = PROVIDER_ALIASES.get(key)
342
+ if not canonical:
343
+ supported = ", ".join(sorted(PROVIDER_AGENT))
344
+ raise ValueError(f"unsupported provider {provider!r}; supported providers: {supported}")
345
+ return canonical
346
+
347
+
348
+ def provider_backend_for_provider(provider: str) -> dict[str, Any]:
349
+ """Return static REST backend metadata for a registered provider."""
350
+ canonical = normalize_provider(provider)
351
+ backend = PROVIDER_BACKENDS.get(canonical)
352
+ if not backend:
353
+ raise ValueError(f"provider {provider!r} has no direct backend registration")
354
+ return dict(backend, provider=canonical)
355
+
356
+
357
+ def _read_secret_env_file(path: pathlib.Path) -> dict[str, str]:
358
+ """Parse KEY=value env files without logging or exposing values."""
359
+ try:
360
+ if not path.is_file():
361
+ return {}
362
+ lines = path.read_text(encoding="utf-8").splitlines()
363
+ except OSError:
364
+ return {}
365
+ values: dict[str, str] = {}
366
+ for raw in lines:
367
+ line = raw.strip()
368
+ if not line or line.startswith("#"):
369
+ continue
370
+ if line.startswith("export "):
371
+ line = line[len("export "):].strip()
372
+ if "=" not in line:
373
+ continue
374
+ key, value = line.split("=", 1)
375
+ key = key.strip()
376
+ value = value.strip().strip('"').strip("'")
377
+ if key and value:
378
+ values[key] = value
379
+ return values
380
+
381
+
382
+ def _credential_keys(spec: dict[str, Any]) -> set[str]:
383
+ keys = set(str(k) for k in spec.get("required", ()))
384
+ for group in spec.get("any_of", ()):
385
+ keys.update(str(k) for k in group)
386
+ return keys
387
+
388
+
389
+ def provider_credential_values(provider: str) -> dict[str, str]:
390
+ """Read provider credentials from env first, then configured secret files."""
391
+ canonical = normalize_provider(provider)
392
+ spec = PROVIDER_CREDENTIALS.get(canonical)
393
+ if not spec:
394
+ return {}
395
+ file_values: dict[str, str] = {}
396
+ for raw_path in spec.get("secret_paths", ()):
397
+ file_values.update(_read_secret_env_file(pathlib.Path(raw_path).expanduser()))
398
+
399
+ values: dict[str, str] = {}
400
+ for key in sorted(_credential_keys(spec)):
401
+ value = os.environ.get(key, "").strip() or file_values.get(key, "").strip()
402
+ if value:
403
+ values[key] = value
404
+ return values
405
+
406
+
407
+ def provider_credentials_available(provider: str) -> bool:
408
+ """Return whether a provider has enough credentials for direct dispatch."""
409
+ canonical = normalize_provider(provider)
410
+ spec = PROVIDER_CREDENTIALS.get(canonical)
411
+ if not spec:
412
+ return True
413
+ values = provider_credential_values(canonical)
414
+ required = tuple(str(k) for k in spec.get("required", ()))
415
+ if required:
416
+ return all(bool(values.get(key)) for key in required)
417
+ groups = spec.get("any_of", ())
418
+ if groups:
419
+ return any(all(bool(values.get(str(key))) for key in group) for group in groups)
420
+ return True
421
+
422
+
423
+ def provider_route_for_provider(provider: str, *, model: str = "") -> dict[str, str]:
424
+ """Build a provider route from an explicit CLI override."""
425
+ canonical = normalize_provider(provider)
426
+ return {
427
+ "provider": canonical,
428
+ "model": model or PROVIDER_DEFAULT_MODEL.get(canonical, ""),
429
+ "agent": PROVIDER_AGENT.get(canonical, canonical),
430
+ "source": "provider-override",
431
+ }
432
+
433
+
434
+ def provider_matrix_for_task(
435
+ task_type: str,
436
+ *,
437
+ require_credentials: bool = False,
438
+ ) -> list[dict[str, str]]:
439
+ """Return ordered provider routes for a task type."""
440
+ canonical = normalize_task_type(task_type)
441
+ routes = [dict(route, source="task-matrix") for route in TASK_PROVIDER_MATRIX[canonical]]
442
+ if require_credentials:
443
+ available = [
444
+ route for route in routes
445
+ if provider_credentials_available(route["provider"])
446
+ ]
447
+ if available:
448
+ return available
449
+ return routes
450
+
451
+
452
+ def provider_route_for_task(
453
+ task_type: str,
454
+ *,
455
+ require_credentials: bool = False,
456
+ ) -> dict[str, str]:
457
+ """Return the primary provider route for a task type."""
458
+ return provider_matrix_for_task(
459
+ task_type,
460
+ require_credentials=require_credentials,
461
+ )[0]
462
+
463
+
464
+ # ---------------------------------------------------------------------------
465
+ # RuAPI mirror fallback (#2251)
466
+ #
467
+ # Primary: ruapi (api.stepanovikov.uno)
468
+ # Secondary: ruapi-shop (ruapi.shop) — same ANTHROPIC_AUTH_TOKEN, same models
469
+ #
470
+ # Callers that opt into fallback should use ``dispatch_ruapi_with_fallback``.
471
+ # Existing single-endpoint clients are unaffected — primary behaviour stays
472
+ # identical when the fallback path is not invoked.
473
+ # ---------------------------------------------------------------------------
474
+
475
+ RUAPI_MIRRORS: tuple[str, ...] = ("ruapi", "ruapi-shop")
476
+
477
+ # HTTP statuses that should trigger a fallback to the next mirror.
478
+ # 5xx = upstream/server error. 408/429 = transient retry candidates.
479
+ RUAPI_RETRY_STATUSES: frozenset[int] = frozenset({408, 429, 500, 502, 503, 504})
480
+
481
+
482
+ def _load_provider_registry_endpoints() -> dict[str, str]:
483
+ """Read provider-registry.json and return {provider_id: endpoint}.
484
+
485
+ stdlib-only — no jsonschema dep. Used for fallback dispatch so the
486
+ endpoints stay aligned with the canonical registry without duplication.
487
+ """
488
+ registry_path = (
489
+ pathlib.Path(__file__).resolve().parent.parent
490
+ / "ai" / "meta" / "manifest" / "provider-registry.json"
491
+ )
492
+ try:
493
+ data = json.loads(registry_path.read_text(encoding="utf-8"))
494
+ except (OSError, json.JSONDecodeError):
495
+ return {}
496
+ providers = data.get("providers") or {}
497
+ return {
498
+ pid: spec.get("endpoint", "")
499
+ for pid, spec in providers.items()
500
+ if isinstance(spec, dict) and spec.get("endpoint")
501
+ }
502
+
503
+
504
+ def ruapi_mirror_endpoints() -> list[tuple[str, str]]:
505
+ """Return ordered (provider_id, endpoint) tuples for the RuAPI mirror chain.
506
+
507
+ Order: primary first (ruapi), then secondary (ruapi-shop). Empty endpoints
508
+ are dropped silently — callers see only mirrors that the registry knows
509
+ about.
510
+ """
511
+ endpoints = _load_provider_registry_endpoints()
512
+ chain: list[tuple[str, str]] = []
513
+ for pid in RUAPI_MIRRORS:
514
+ endpoint = endpoints.get(pid, "").strip()
515
+ if endpoint:
516
+ chain.append((pid, endpoint))
517
+ return chain
518
+
519
+
520
+ def dispatch_ruapi_with_fallback(
521
+ payload: dict,
522
+ *,
523
+ auth_token: str,
524
+ timeout: float = 30.0,
525
+ transport: Any = None,
526
+ extra_headers: dict[str, str] | None = None,
527
+ ) -> dict:
528
+ """Dispatch a chat-completions request through the RuAPI mirror chain.
529
+
530
+ Tries primary (ruapi) first; on 5xx / 408 / 429 / timeout / connection
531
+ error, falls back to ruapi-shop. Both fail → raises the last error.
532
+
533
+ Args:
534
+ payload: OpenAI-compat chat.completions request body (already JSON-able).
535
+ auth_token: ANTHROPIC_AUTH_TOKEN — sent as ``Authorization: Bearer ...``.
536
+ timeout: Per-attempt timeout in seconds.
537
+ transport: Optional callable for tests:
538
+ ``transport(endpoint, headers, body, timeout) -> (status, body_text)``
539
+ Defaults to a stdlib urllib-based transport.
540
+ extra_headers: Optional headers merged on top of the default UA + auth.
541
+
542
+ Returns:
543
+ Parsed JSON response body from the first mirror that returns a 2xx.
544
+
545
+ Raises:
546
+ RuntimeError: if every mirror fails. The message lists each mirror's
547
+ failure mode in attempt order so logs preserve the trace.
548
+ """
549
+ chain = ruapi_mirror_endpoints()
550
+ if not chain:
551
+ raise RuntimeError("ruapi fallback: no mirrors configured in provider-registry.json")
552
+
553
+ headers = {
554
+ "Content-Type": "application/json",
555
+ "User-Agent": "curl/8.5.0",
556
+ "Authorization": f"Bearer {auth_token}",
557
+ }
558
+ if extra_headers:
559
+ headers.update(extra_headers)
560
+
561
+ body_bytes = json.dumps(payload).encode("utf-8")
562
+ transport_fn = transport or _default_ruapi_transport
563
+
564
+ failures: list[str] = []
565
+ for provider_id, endpoint in chain:
566
+ try:
567
+ status, response_text = transport_fn(endpoint, headers, body_bytes, timeout)
568
+ except Exception as exc: # noqa: BLE001 — mirror fallback: network/timeout/etc → try next mirror
569
+ failures.append(f"{provider_id}: {type(exc).__name__}: {exc}")
570
+ log.warning("ruapi mirror %s failed (%s); falling back", provider_id, exc)
571
+ continue
572
+
573
+ if 200 <= status < 300:
574
+ try:
575
+ return json.loads(response_text)
576
+ except json.JSONDecodeError as exc:
577
+ failures.append(f"{provider_id}: invalid JSON ({exc})")
578
+ log.warning("ruapi mirror %s returned non-JSON; falling back", provider_id)
579
+ continue
580
+
581
+ if status in RUAPI_RETRY_STATUSES:
582
+ failures.append(f"{provider_id}: HTTP {status}")
583
+ log.warning("ruapi mirror %s returned HTTP %s; falling back", provider_id, status)
584
+ continue
585
+
586
+ # Non-retryable error (e.g. 4xx auth/payload). Surface immediately —
587
+ # falling back would not help since both mirrors share the same key.
588
+ failures.append(f"{provider_id}: HTTP {status} (non-retryable)")
589
+ raise RuntimeError(
590
+ f"ruapi dispatch failed (non-retryable): {failures[-1]}; body={response_text[:200]!r}"
591
+ )
592
+
593
+ raise RuntimeError(
594
+ "ruapi dispatch failed across all mirrors: " + "; ".join(failures)
595
+ )
596
+
597
+
598
+ def _default_ruapi_transport(
599
+ endpoint: str,
600
+ headers: dict[str, str],
601
+ body: bytes,
602
+ timeout: float,
603
+ ) -> tuple[int, str]:
604
+ """Default urllib-based transport for ruapi fallback.
605
+
606
+ Returns (status_code, response_text). Raises on connection-level errors
607
+ (timeouts, DNS, TLS) — caller treats those as a mirror miss.
608
+ """
609
+ import urllib.error
610
+ import urllib.request
611
+
612
+ request = urllib.request.Request(
613
+ endpoint, data=body, headers=headers, method="POST",
614
+ )
615
+ try:
616
+ with urllib.request.urlopen(request, timeout=timeout) as response:
617
+ text = response.read().decode("utf-8", errors="replace")
618
+ return response.status, text
619
+ except urllib.error.HTTPError as exc:
620
+ # HTTPError is also a Response — preserve status + body so caller can
621
+ # decide whether to fall back (5xx) or surface (4xx).
622
+ body_text = ""
623
+ try:
624
+ body_text = exc.read().decode("utf-8", errors="replace")
625
+ except Exception: # noqa: BLE001 — best-effort HTTP body capture; pragma: no cover
626
+ pass
627
+ return exc.code, body_text
628
+
629
+
630
+ # Minimum events per model to be considered
631
+ MIN_EVENTS = 3
632
+
633
+ # Quality grade thresholds (used for display)
634
+ QUALITY_GRADES = [
635
+ (90, "A"), (80, "B+"), (70, "B"), (60, "C+"),
636
+ (50, "C"), (40, "D"), (0, "F"),
637
+ ]
638
+
639
+
640
+ def quality_grade(score: float) -> str:
641
+ """Convert numeric quality score to letter grade."""
642
+ for threshold, grade in QUALITY_GRADES:
643
+ if score >= threshold:
644
+ return grade
645
+ return "F"
646
+
647
+
648
+ # ---------------------------------------------------------------------------
649
+ # Percentile helper (from cost_predictor, duplicated for independence)
650
+ # ---------------------------------------------------------------------------
651
+
652
+ def _percentile(sorted_values: list[float], p: float) -> float:
653
+ if not sorted_values:
654
+ return 0.0
655
+ if len(sorted_values) == 1:
656
+ return sorted_values[0]
657
+ k = (len(sorted_values) - 1) * p
658
+ f = math.floor(k)
659
+ c = min(math.ceil(k), len(sorted_values) - 1)
660
+ if f == c:
661
+ return sorted_values[f]
662
+ return sorted_values[f] + (sorted_values[c] - sorted_values[f]) * (k - f)
663
+
664
+
665
+ # ---------------------------------------------------------------------------
666
+ # Data aggregation
667
+ # ---------------------------------------------------------------------------
668
+
669
+ def aggregate_model_stats(
670
+ target_path: pathlib.Path | str,
671
+ task_type: str = "",
672
+ period: str = "30d",
673
+ ) -> dict[str, dict]:
674
+ """Aggregate per-model stats from experience events.
675
+
676
+ Returns dict keyed by "agent/model" with stats for each combination
677
+ that has >= MIN_EVENTS events.
678
+ """
679
+ sys.path.insert(0, str(pathlib.Path(__file__).parent))
680
+ import experience_pipeline as ep
681
+
682
+ target = pathlib.Path(target_path).resolve()
683
+ events = ep.load_events(target, since=period, limit=5000, include_archive=False)
684
+
685
+ # Filter to task events with cost
686
+ task_events = [
687
+ e for e in events
688
+ if str(e.get("event_type", "")).startswith("task_")
689
+ ]
690
+
691
+ # Optional task_type filter
692
+ if task_type:
693
+ task_events = [
694
+ e for e in task_events
695
+ if (e.get("task") or {}).get("task_type") == task_type
696
+ ]
697
+
698
+ # Group by agent/model
699
+ groups: dict[str, list[dict]] = {}
700
+ for e in task_events:
701
+ agent = e.get("agent", "unknown")
702
+ model = e.get("model", "unknown")
703
+ key = f"{agent}/{model}"
704
+ groups.setdefault(key, []).append(e)
705
+
706
+ result: dict[str, dict] = {}
707
+ for key, events_list in groups.items():
708
+ if len(events_list) < MIN_EVENTS:
709
+ continue
710
+
711
+ costs = sorted(
712
+ float((e.get("task") or {}).get("cost_usd", 0))
713
+ for e in events_list
714
+ )
715
+ elapsed_list = sorted(
716
+ float((e.get("task") or {}).get("elapsed_seconds", 0))
717
+ for e in events_list
718
+ )
719
+ successes = sum(
720
+ 1 for e in events_list
721
+ if (e.get("task") or {}).get("result") in ("success", "partial")
722
+ )
723
+
724
+ # Quality: use quality block if available, else estimate from success
725
+ quality_scores = []
726
+ for e in events_list:
727
+ q = e.get("quality", {})
728
+ if q:
729
+ # Simple quality metric: count True values in quality block
730
+ checks = [q.get("lint_clean"), q.get("no_secrets"),
731
+ q.get("commit_message_valid"), q.get("acceptance_criteria_met")]
732
+ passed = sum(1 for c in checks if c is True)
733
+ total = sum(1 for c in checks if c is not None)
734
+ if total > 0:
735
+ quality_scores.append(passed / total * 100)
736
+
737
+ quality_avg = (
738
+ round(sum(quality_scores) / len(quality_scores), 1)
739
+ if quality_scores
740
+ else round(successes / len(events_list) * 80, 1) # fallback: success → ~80 max
741
+ )
742
+
743
+ result[key] = {
744
+ "events": len(events_list),
745
+ "success_rate": round(successes / len(events_list), 2),
746
+ "quality_avg": quality_avg,
747
+ "cost_median": round(_percentile(costs, 0.5), 4),
748
+ "cost_p25": round(_percentile(costs, 0.25), 4),
749
+ "cost_p75": round(_percentile(costs, 0.75), 4),
750
+ "time_median": round(_percentile(elapsed_list, 0.5), 0),
751
+ "time_p25": round(_percentile(elapsed_list, 0.25), 0),
752
+ "time_p75": round(_percentile(elapsed_list, 0.75), 0),
753
+ }
754
+
755
+ return result
756
+
757
+
758
+ # ---------------------------------------------------------------------------
759
+ # Scoring
760
+ # ---------------------------------------------------------------------------
761
+
762
+ def compute_model_score(stats: dict, weights: dict, pool_max: dict) -> float:
763
+ """Compute composite score for a model.
764
+
765
+ score = (quality_avg/100 * quality_weight) +
766
+ (success_rate * success_weight) +
767
+ ((1 - normalized_cost) * cost_weight) +
768
+ ((1 - normalized_time) * speed_weight)
769
+
770
+ Higher = better. Range: 0.0–1.0
771
+ """
772
+ quality_norm = stats.get("quality_avg", 0) / 100.0
773
+ success = stats.get("success_rate", 0)
774
+
775
+ max_cost = pool_max.get("max_cost", 1.0)
776
+ max_time = pool_max.get("max_time", 1.0)
777
+
778
+ cost_norm = stats.get("cost_median", 0) / max_cost if max_cost > 0 else 0
779
+ time_norm = stats.get("time_median", 0) / max_time if max_time > 0 else 0
780
+
781
+ score = (
782
+ quality_norm * weights.get("quality", 0.3) +
783
+ success * weights.get("success", 0.3) +
784
+ (1.0 - cost_norm) * weights.get("cost", 0.2) +
785
+ (1.0 - time_norm) * weights.get("speed", 0.15)
786
+ )
787
+ return round(min(1.0, max(0.0, score)), 3)
788
+
789
+
790
+ # ---------------------------------------------------------------------------
791
+ # Constraints
792
+ # ---------------------------------------------------------------------------
793
+
794
+ def apply_constraints(
795
+ candidates: dict[str, dict],
796
+ constraints: dict | None,
797
+ ) -> tuple[dict[str, dict], list[dict]]:
798
+ """Filter models that don't meet constraints.
799
+
800
+ Returns (filtered_candidates, filtered_out_list).
801
+ """
802
+ if not constraints:
803
+ return candidates, []
804
+
805
+ filtered_out: list[dict] = []
806
+ result: dict[str, dict] = {}
807
+
808
+ max_cost = constraints.get("max_cost")
809
+ min_quality = constraints.get("min_quality")
810
+ max_time = constraints.get("max_time")
811
+
812
+ for key, stats in candidates.items():
813
+ reasons = []
814
+ if max_cost is not None and stats.get("cost_median", 0) > max_cost:
815
+ reasons.append(f"cost ${stats['cost_median']:.4f} > max ${max_cost}")
816
+ if min_quality is not None and stats.get("quality_avg", 0) < min_quality:
817
+ reasons.append(f"quality {stats['quality_avg']} < min {min_quality}")
818
+ if max_time is not None and stats.get("time_median", 0) > max_time:
819
+ reasons.append(f"time {stats['time_median']}s > max {max_time}s")
820
+
821
+ if reasons:
822
+ filtered_out.append({"model": key, "reasons": reasons})
823
+ else:
824
+ result[key] = stats
825
+
826
+ return result, filtered_out
827
+
828
+
829
+ # ---------------------------------------------------------------------------
830
+ # Recommendation
831
+ # ---------------------------------------------------------------------------
832
+
833
+ @dataclasses.dataclass
834
+ class ModelRecommendation:
835
+ """Structured model recommendation result."""
836
+ recommended: dict | None
837
+ alternatives: list[dict]
838
+ ranking_factors: dict
839
+ constraints_applied: dict
840
+ task_type: str
841
+ data_period: str
842
+ total_events_analyzed: int
843
+ source: str = "experience" # "experience" (default) | "task-matrix"
844
+
845
+
846
+ def recommend_model(
847
+ target_path: pathlib.Path | str,
848
+ task_type: str,
849
+ goal: str = "",
850
+ constraints: dict | None = None,
851
+ period: str = "30d",
852
+ ) -> ModelRecommendation:
853
+ """Recommend best model for a task type based on experience data.
854
+
855
+ Args:
856
+ target_path: Project root.
857
+ task_type: One of feat, fix, refactor, test, docs.
858
+ goal: Optional goal text for context.
859
+ constraints: Optional {max_cost, min_quality, max_time}.
860
+ period: Lookback period (default 30d).
861
+
862
+ Returns ModelRecommendation with top pick and alternatives.
863
+ """
864
+ target = pathlib.Path(target_path).resolve()
865
+
866
+ # Infer task type from goal if needed
867
+ if not task_type and goal:
868
+ try:
869
+ import cost_predictor
870
+ task_type = cost_predictor.infer_task_type(goal)
871
+ except ImportError:
872
+ # Fallback: basic keyword inference
873
+ gl = goal.lower()
874
+ if any(k in gl for k in ("fix", "bug", "patch")):
875
+ task_type = "fix"
876
+ elif any(k in gl for k in ("refactor", "restructure")):
877
+ task_type = "refactor"
878
+ elif any(k in gl for k in ("test", "spec")):
879
+ task_type = "test"
880
+ elif any(k in gl for k in ("doc", "readme")):
881
+ task_type = "docs"
882
+ else:
883
+ task_type = "feat"
884
+ if not task_type:
885
+ task_type = "feat"
886
+
887
+ weights = TASK_WEIGHTS.get(task_type, DEFAULT_WEIGHTS)
888
+
889
+ # Aggregate stats
890
+ all_stats = aggregate_model_stats(target, task_type, period)
891
+
892
+ if not all_stats:
893
+ # Try without task_type filter
894
+ all_stats = aggregate_model_stats(target, "", period)
895
+
896
+ total_events = sum(s["events"] for s in all_stats.values())
897
+
898
+ # Apply constraints
899
+ candidates, filtered_out = apply_constraints(all_stats, constraints)
900
+
901
+ # If all filtered out, fall back to unconstrained
902
+ if not candidates and all_stats:
903
+ candidates = all_stats
904
+ filtered_out = [] # reset — we're ignoring constraints
905
+
906
+ if not candidates:
907
+ fallback_list = provider_matrix_for_task(task_type)
908
+ if fallback_list:
909
+ primary = fallback_list[0]
910
+ provider = primary["provider"]
911
+ model = primary["model"]
912
+ agent = primary["agent"]
913
+ recommended_static = {
914
+ "agent": agent,
915
+ "model": model,
916
+ "reason": f"Cold-start provider matrix route for {task_type} tasks (no experience data yet).",
917
+ "expected_cost": None,
918
+ "expected_quality": None,
919
+ "expected_time": None,
920
+ "success_rate": None,
921
+ "confidence": "task-matrix",
922
+ "sample_size": 0,
923
+ "score": 0.0,
924
+ "provider": provider,
925
+ "tier": primary.get("tier", ""),
926
+ "effort": primary.get("effort", ""),
927
+ "billing_class": primary.get("billing_class", ""),
928
+ }
929
+ return ModelRecommendation(
930
+ recommended=recommended_static,
931
+ alternatives=[
932
+ {
933
+ "agent": route["agent"],
934
+ "model": route["model"],
935
+ "reason": f"Cold-start provider matrix route for {task_type} tasks.",
936
+ "expected_cost": None,
937
+ "expected_quality": None,
938
+ "expected_time": None,
939
+ "success_rate": None,
940
+ "tradeoff": "matrix-fallback",
941
+ "score": 0.0,
942
+ "provider": route["provider"],
943
+ "tier": route.get("tier", ""),
944
+ "effort": route.get("effort", ""),
945
+ "billing_class": route.get("billing_class", ""),
946
+ }
947
+ for route in fallback_list[1:]
948
+ ],
949
+ ranking_factors=weights,
950
+ constraints_applied={
951
+ "max_cost": (constraints or {}).get("max_cost"),
952
+ "min_quality": (constraints or {}).get("min_quality"),
953
+ "max_time": (constraints or {}).get("max_time"),
954
+ "filtered_out": filtered_out,
955
+ },
956
+ task_type=task_type,
957
+ data_period=period,
958
+ total_events_analyzed=total_events,
959
+ source="task-matrix",
960
+ )
961
+ # If even the fallback table has no entry — keep the original None return
962
+ return ModelRecommendation(
963
+ recommended=None,
964
+ alternatives=[],
965
+ ranking_factors=weights,
966
+ constraints_applied={
967
+ "max_cost": (constraints or {}).get("max_cost"),
968
+ "min_quality": (constraints or {}).get("min_quality"),
969
+ "max_time": (constraints or {}).get("max_time"),
970
+ "filtered_out": filtered_out,
971
+ },
972
+ task_type=task_type,
973
+ data_period=period,
974
+ total_events_analyzed=total_events,
975
+ source="experience",
976
+ )
977
+
978
+ # Compute pool maximums for normalization
979
+ pool_max = {
980
+ "max_cost": max(s["cost_median"] for s in candidates.values()) or 0.01,
981
+ "max_time": max(s["time_median"] for s in candidates.values()) or 1.0,
982
+ }
983
+
984
+ # Score and rank
985
+ scored: list[tuple[str, dict, float]] = []
986
+ for key, stats in candidates.items():
987
+ score = compute_model_score(stats, weights, pool_max)
988
+ scored.append((key, stats, score))
989
+
990
+ scored.sort(key=lambda x: -x[2])
991
+
992
+ # Build recommendation
993
+ top_key, top_stats, top_score = scored[0]
994
+ agent, model = top_key.split("/", 1) if "/" in top_key else (top_key, "default")
995
+
996
+ recommended = {
997
+ "agent": agent,
998
+ "model": model,
999
+ "reason": _generate_reason(top_stats, task_type, is_top=True),
1000
+ "expected_cost": top_stats["cost_median"],
1001
+ "expected_quality": top_stats["quality_avg"],
1002
+ "expected_time": top_stats["time_median"],
1003
+ "success_rate": top_stats["success_rate"],
1004
+ "confidence": _recommendation_confidence(top_stats["events"]),
1005
+ "sample_size": top_stats["events"],
1006
+ "score": top_score,
1007
+ }
1008
+
1009
+ alternatives = []
1010
+ for key, stats, score in scored[1:4]: # top 3 alternatives
1011
+ a_agent, a_model = key.split("/", 1) if "/" in key else (key, "default")
1012
+ cost_diff = (
1013
+ round((1 - stats["cost_median"] / top_stats["cost_median"]) * 100)
1014
+ if top_stats["cost_median"] > 0 else 0
1015
+ )
1016
+ tradeoff = "cost_saving" if cost_diff > 20 else "similar"
1017
+ if stats["quality_avg"] > top_stats["quality_avg"]:
1018
+ tradeoff = "higher_quality"
1019
+
1020
+ alternatives.append({
1021
+ "agent": a_agent,
1022
+ "model": a_model,
1023
+ "reason": _generate_reason(stats, task_type, is_top=False, cost_diff=cost_diff),
1024
+ "expected_cost": stats["cost_median"],
1025
+ "expected_quality": stats["quality_avg"],
1026
+ "expected_time": stats["time_median"],
1027
+ "success_rate": stats["success_rate"],
1028
+ "tradeoff": tradeoff,
1029
+ "score": score,
1030
+ })
1031
+
1032
+ return ModelRecommendation(
1033
+ recommended=recommended,
1034
+ alternatives=alternatives,
1035
+ ranking_factors=weights,
1036
+ constraints_applied={
1037
+ "max_cost": (constraints or {}).get("max_cost"),
1038
+ "min_quality": (constraints or {}).get("min_quality"),
1039
+ "max_time": (constraints or {}).get("max_time"),
1040
+ "filtered_out": filtered_out,
1041
+ },
1042
+ task_type=task_type,
1043
+ data_period=period,
1044
+ total_events_analyzed=total_events,
1045
+ )
1046
+
1047
+
1048
+ def _recommendation_confidence(sample_size: int) -> str:
1049
+ if sample_size >= 20:
1050
+ return "high"
1051
+ if sample_size >= 5:
1052
+ return "medium"
1053
+ return "low"
1054
+
1055
+
1056
+ def _generate_reason(
1057
+ stats: dict, task_type: str, *, is_top: bool, cost_diff: int = 0,
1058
+ ) -> str:
1059
+ """Generate a human-readable reason for the recommendation."""
1060
+ grade = quality_grade(stats["quality_avg"])
1061
+ sr = f"{int(stats['success_rate'] * 100)}%"
1062
+
1063
+ if is_top:
1064
+ if task_type == "refactor":
1065
+ return f"Highest quality for refactor tasks ({stats['quality_avg']:.0f} avg, {grade} grade)"
1066
+ if task_type == "fix":
1067
+ return f"Best success rate for fixes ({sr} success, {grade} quality)"
1068
+ if task_type in ("test", "docs"):
1069
+ return f"Best value for {task_type} tasks ({sr} success, ${stats['cost_median']:.2f} avg)"
1070
+ return f"Best overall score ({grade} quality, {sr} success)"
1071
+
1072
+ if cost_diff > 30:
1073
+ return f"{cost_diff}% cheaper, {sr} success rate"
1074
+ if stats["success_rate"] > 0.85:
1075
+ return f"Similar success ({sr}), ${stats['cost_median']:.4f} median cost"
1076
+ return f"{grade} quality, {sr} success, ${stats['cost_median']:.4f} cost"
1077
+
1078
+
1079
+ # ---------------------------------------------------------------------------
1080
+ # Serialization
1081
+ # ---------------------------------------------------------------------------
1082
+
1083
+ def recommendation_to_dict(rec: ModelRecommendation) -> dict:
1084
+ """Convert to JSON-serializable dict."""
1085
+ return {
1086
+ "recommended": rec.recommended,
1087
+ "alternatives": rec.alternatives,
1088
+ "ranking_factors": rec.ranking_factors,
1089
+ "constraints_applied": rec.constraints_applied,
1090
+ "task_type": rec.task_type,
1091
+ "data_period": rec.data_period,
1092
+ "total_events_analyzed": rec.total_events_analyzed,
1093
+ "source": rec.source,
1094
+ }
1095
+
1096
+
1097
+ # ---------------------------------------------------------------------------
1098
+ # Benchmark table
1099
+ # ---------------------------------------------------------------------------
1100
+
1101
+ def get_benchmark(
1102
+ target_path: pathlib.Path | str,
1103
+ period: str = "30d",
1104
+ ) -> dict:
1105
+ """Get benchmark data: all models ranked by composite score.
1106
+
1107
+ Returns {models: [{key, events, success_rate, quality_avg, cost_median, time_median, score}],
1108
+ best_for: {task_type: model_key}}.
1109
+ """
1110
+ target = pathlib.Path(target_path).resolve()
1111
+ all_stats = aggregate_model_stats(target, "", period)
1112
+
1113
+ if not all_stats:
1114
+ return {"models": [], "best_for": {}, "period": period}
1115
+
1116
+ pool_max = {
1117
+ "max_cost": max(s["cost_median"] for s in all_stats.values()) or 0.01,
1118
+ "max_time": max(s["time_median"] for s in all_stats.values()) or 1.0,
1119
+ }
1120
+
1121
+ # Default weights for overall ranking
1122
+ models = []
1123
+ for key, stats in all_stats.items():
1124
+ score = compute_model_score(stats, DEFAULT_WEIGHTS, pool_max)
1125
+ models.append({
1126
+ "key": key,
1127
+ "events": stats["events"],
1128
+ "success_rate": stats["success_rate"],
1129
+ "quality_avg": stats["quality_avg"],
1130
+ "quality_grade": quality_grade(stats["quality_avg"]),
1131
+ "cost_median": stats["cost_median"],
1132
+ "time_median": stats["time_median"],
1133
+ "score": score,
1134
+ })
1135
+
1136
+ models.sort(key=lambda x: -x["score"])
1137
+
1138
+ # Best for each task type
1139
+ best_for = {}
1140
+ for tt in TASK_WEIGHTS:
1141
+ tt_stats = aggregate_model_stats(target, tt, period)
1142
+ if not tt_stats:
1143
+ continue
1144
+ tt_pool = {
1145
+ "max_cost": max(s["cost_median"] for s in tt_stats.values()) or 0.01,
1146
+ "max_time": max(s["time_median"] for s in tt_stats.values()) or 1.0,
1147
+ }
1148
+ tt_weights = TASK_WEIGHTS[tt]
1149
+ best_key = max(
1150
+ tt_stats.keys(),
1151
+ key=lambda k: compute_model_score(tt_stats[k], tt_weights, tt_pool),
1152
+ )
1153
+ best_for[tt] = best_key
1154
+
1155
+ return {"models": models, "best_for": best_for, "period": period}
1156
+
1157
+
1158
+ # ---------------------------------------------------------------------------
1159
+ # Agent Outcome Ledger — pick best agent for THIS repo's history (#483 follow-up)
1160
+ # ---------------------------------------------------------------------------
1161
+
1162
+
1163
+ def pick_best_agent_for_repo(
1164
+ target_path: pathlib.Path | str,
1165
+ task_type: str = "",
1166
+ *,
1167
+ top_n: int = 3,
1168
+ ) -> list[tuple[str, float, int]]:
1169
+ """Return top-N (agent, weighted_score, sample_count) for this repo.
1170
+
1171
+ Reads the outcome ledger written by swarm.cmd_done via task_outcomes.
1172
+ Returns empty list on cold start (<5 records) — caller should fall back
1173
+ to static TIER_MODELS.
1174
+
1175
+ See ai/docs/plan-30d-2026-04-17.md §2.1 for rationale.
1176
+ """
1177
+ try:
1178
+ from . import task_outcomes
1179
+ except ImportError:
1180
+ import task_outcomes # type: ignore
1181
+ target = pathlib.Path(target_path).resolve()
1182
+ return task_outcomes.pick_best_agent_for_repo(target, task_type, top_n=top_n)
1183
+
1184
+
1185
+ def _load_task_outcomes_module():
1186
+ try:
1187
+ from . import task_outcomes
1188
+ except ImportError:
1189
+ import task_outcomes # type: ignore
1190
+ return task_outcomes
1191
+
1192
+
1193
+ def ledger_recommendation(
1194
+ target_path: pathlib.Path | str,
1195
+ task_type: str = "",
1196
+ period: str = "30d",
1197
+ *,
1198
+ top_n: int = 3,
1199
+ ) -> dict:
1200
+ """Build the combined ledger + static recommendation payload for the CLI.
1201
+
1202
+ Schema (stable — consumed by `0dai models recommend` and /models dashboard):
1203
+ {
1204
+ ledger_recommendations: [{agent, avg_score, tasks_count, avg_cost_per_task}],
1205
+ static_recommendations: [{agent, model, score, expected_cost,
1206
+ expected_quality, success_rate, reason, ...}],
1207
+ ledger_size: int,
1208
+ cold_start: bool,
1209
+ task_type: str,
1210
+ cold_start_threshold: int,
1211
+ }
1212
+ """
1213
+ task_outcomes = _load_task_outcomes_module()
1214
+ target = pathlib.Path(target_path).resolve()
1215
+
1216
+ summary = task_outcomes.summarize_ledger(target)
1217
+ ranked = task_outcomes.pick_best_agent_for_repo(target, task_type, top_n=top_n)
1218
+
1219
+ ledger_recs: list[dict] = []
1220
+ for agent, avg_score, tasks_count in ranked:
1221
+ bucket = (summary.get("agents") or {}).get(agent) or {}
1222
+ ledger_recs.append({
1223
+ "agent": agent,
1224
+ "avg_score": float(avg_score),
1225
+ "tasks_count": int(tasks_count),
1226
+ "avg_cost_per_task": round(float(bucket.get("avg_cost_usd") or 0.0), 4),
1227
+ })
1228
+
1229
+ rec = recommend_model(target, task_type, period=period)
1230
+ static_recs: list[dict] = []
1231
+ if rec.recommended:
1232
+ r = rec.recommended
1233
+ static_recs.append({
1234
+ "agent": r["agent"],
1235
+ "model": r["model"],
1236
+ "score": r["score"],
1237
+ "expected_cost": r["expected_cost"],
1238
+ "expected_quality": r["expected_quality"],
1239
+ "success_rate": r["success_rate"],
1240
+ "reason": r["reason"],
1241
+ "confidence": r["confidence"],
1242
+ "sample_size": r["sample_size"],
1243
+ })
1244
+ for alt in rec.alternatives[: max(0, top_n - 1)]:
1245
+ static_recs.append({
1246
+ "agent": alt["agent"],
1247
+ "model": alt["model"],
1248
+ "score": alt["score"],
1249
+ "expected_cost": alt["expected_cost"],
1250
+ "expected_quality": alt["expected_quality"],
1251
+ "success_rate": alt["success_rate"],
1252
+ "reason": alt["reason"],
1253
+ "tradeoff": alt.get("tradeoff"),
1254
+ })
1255
+
1256
+ return {
1257
+ "ledger_recommendations": ledger_recs,
1258
+ "static_recommendations": static_recs,
1259
+ "ledger_size": int(summary.get("total_tasks") or 0),
1260
+ "cold_start": bool(summary.get("cold_start", True)),
1261
+ "cold_start_threshold": task_outcomes._COLD_START_THRESHOLD,
1262
+ "task_type": task_type or rec.task_type,
1263
+ "data_period": period,
1264
+ }
1265
+
1266
+
1267
+ def _render_ledger_recommendation(data: dict) -> str:
1268
+ """Human-readable rendering for the CLI (no colour — stays TTY-neutral)."""
1269
+ lines: list[str] = []
1270
+ task_type = data.get("task_type") or "feat"
1271
+ lines.append(f"\n Model recommendation for {task_type} tasks:")
1272
+
1273
+ if data.get("cold_start"):
1274
+ n = data.get("ledger_size", 0)
1275
+ thr = data.get("cold_start_threshold", 5)
1276
+ lines.append(
1277
+ f"\n \u23f3 ledger warming up ({n}/{thr} records) — "
1278
+ f"showing provider matrix rankings only."
1279
+ )
1280
+ else:
1281
+ lines.append("\n From your repo's history:")
1282
+ ledger = data.get("ledger_recommendations") or []
1283
+ if not ledger:
1284
+ lines.append(" (no ledger matches for this task type)")
1285
+ else:
1286
+ medals = ["\U0001f947", "\U0001f948", "\U0001f949"]
1287
+ for i, item in enumerate(ledger):
1288
+ medal = medals[i] if i < len(medals) else f" {i + 1}."
1289
+ lines.append(
1290
+ f" {medal} {item['agent']} "
1291
+ f"— avg_score {item['avg_score']:.2f} "
1292
+ f"({item['tasks_count']} tasks, "
1293
+ f"${item['avg_cost_per_task']:.4f}/task)"
1294
+ )
1295
+
1296
+ static = data.get("static_recommendations") or []
1297
+ lines.append("\n Provider matrix rankings:")
1298
+ if not static:
1299
+ lines.append(" (no experience-event recommendation — run swarm tasks to build history)")
1300
+ else:
1301
+ medals = ["\U0001f947", "\U0001f948", "\U0001f949"]
1302
+ for i, item in enumerate(static):
1303
+ medal = medals[i] if i < len(medals) else f" {i + 1}."
1304
+ eq = item.get("expected_quality") or 0
1305
+ grade = quality_grade(eq)
1306
+ sr = int((item.get("success_rate") or 0.0) * 100)
1307
+ ec = item.get("expected_cost") or 0.0
1308
+ lines.append(
1309
+ f" {medal} {item['agent']}/{item['model']} "
1310
+ f"— Quality: {eq:.0f} ({grade}) | "
1311
+ f"Success: {sr}% | Cost: ${ec:.4f}"
1312
+ )
1313
+ reason = item.get("reason")
1314
+ if reason:
1315
+ lines.append(f" \"{reason}\"")
1316
+
1317
+ lines.append("")
1318
+ return "\n".join(lines)
1319
+
1320
+
1321
+ # ---------------------------------------------------------------------------
1322
+ # CLI entry point
1323
+ # ---------------------------------------------------------------------------
1324
+
1325
+ if __name__ == "__main__":
1326
+ import argparse
1327
+
1328
+ sys.path.insert(0, str(pathlib.Path(__file__).parent))
1329
+
1330
+ parser = argparse.ArgumentParser(description="Model routing recommendations")
1331
+ sub = parser.add_subparsers(dest="command")
1332
+
1333
+ rec_p = sub.add_parser("recommend")
1334
+ rec_p.add_argument("--target", default=".")
1335
+ rec_p.add_argument("--task", default="")
1336
+ rec_p.add_argument("--goal", default="")
1337
+ rec_p.add_argument("--max-cost", type=float, default=None)
1338
+ rec_p.add_argument("--min-quality", type=float, default=None)
1339
+ rec_p.add_argument("--max-time", type=float, default=None)
1340
+ rec_p.add_argument("--period", default="30d")
1341
+ rec_p.add_argument("--json", action="store_true")
1342
+
1343
+ bench_p = sub.add_parser("benchmark")
1344
+ bench_p.add_argument("--target", default=".")
1345
+ bench_p.add_argument("--period", default="30d")
1346
+ bench_p.add_argument("--json", action="store_true")
1347
+
1348
+ lg_p = sub.add_parser(
1349
+ "ledger-recommend",
1350
+ help="Recommend agents using the per-repo outcome ledger + provider matrix.",
1351
+ )
1352
+ lg_p.add_argument("--target", default=".")
1353
+ lg_p.add_argument("--task", default="")
1354
+ lg_p.add_argument("--period", default="30d")
1355
+ lg_p.add_argument("--top-n", type=int, default=3)
1356
+ lg_p.add_argument("--json", action="store_true")
1357
+
1358
+ args = parser.parse_args()
1359
+
1360
+ if args.command == "recommend":
1361
+ constraints = {}
1362
+ if args.max_cost is not None:
1363
+ constraints["max_cost"] = args.max_cost
1364
+ if args.min_quality is not None:
1365
+ constraints["min_quality"] = args.min_quality
1366
+ if args.max_time is not None:
1367
+ constraints["max_time"] = args.max_time
1368
+
1369
+ rec = recommend_model(
1370
+ args.target, args.task, args.goal,
1371
+ constraints or None, args.period,
1372
+ )
1373
+ if args.json:
1374
+ print(json.dumps(recommendation_to_dict(rec), indent=2, ensure_ascii=False))
1375
+ else:
1376
+ d = recommendation_to_dict(rec)
1377
+ print(f"\nRecommended for {d['task_type']} tasks:\n")
1378
+ r = d["recommended"]
1379
+ if r:
1380
+ eq = r["expected_quality"] or 0
1381
+ g = quality_grade(eq)
1382
+ print(f" \U0001f947 {r['agent']}/{r['model']}")
1383
+ sr_pct = int((r["success_rate"] or 0.0) * 100)
1384
+ ec = r["expected_cost"] or 0.0
1385
+ et = r["expected_time"] or 0.0
1386
+ print(f" Quality: {eq:.0f} ({g}) | Success: {sr_pct}% | Cost: ${ec:.4f} | Time: {et:.0f}s")
1387
+ print(f" \"{r['reason']}\"")
1388
+ for i, alt in enumerate(d["alternatives"], 2):
1389
+ medal = ["\U0001f948", "\U0001f949"][min(i - 2, 1)]
1390
+ aeq = alt["expected_quality"] or 0
1391
+ ag = quality_grade(aeq)
1392
+ print(f"\n {medal} {alt['agent']}/{alt['model']}")
1393
+ asr = int((alt["success_rate"] or 0.0) * 100)
1394
+ aec = alt["expected_cost"] or 0.0
1395
+ aet = alt["expected_time"] or 0.0
1396
+ print(f" Quality: {aeq:.0f} ({ag}) | Success: {asr}% | Cost: ${aec:.4f} | Time: {aet:.0f}s")
1397
+ print(f" \"{alt['reason']}\"")
1398
+ print(f"\n Based on {d['total_events_analyzed']} events ({d['data_period']}). Confidence: {r['confidence']}.")
1399
+ else:
1400
+ print(" No recommendation available — not enough experience data.")
1401
+ print(" Run some swarm tasks to build up history.")
1402
+ print()
1403
+
1404
+ elif args.command == "ledger-recommend":
1405
+ data = ledger_recommendation(
1406
+ args.target, args.task, args.period, top_n=args.top_n,
1407
+ )
1408
+ if args.json:
1409
+ print(json.dumps(data, indent=2, ensure_ascii=False))
1410
+ else:
1411
+ print(_render_ledger_recommendation(data))
1412
+
1413
+ elif args.command == "benchmark":
1414
+ data = get_benchmark(args.target, args.period)
1415
+ if args.json:
1416
+ print(json.dumps(data, indent=2, ensure_ascii=False))
1417
+ else:
1418
+ models = data["models"]
1419
+ if not models:
1420
+ print("\n No benchmark data — not enough experience events.\n")
1421
+ else:
1422
+ print(f"\n Model benchmark (last {data['period']}):\n")
1423
+ print(f" {'Agent/Model':<25} {'Tasks':>5} {'Success':>8} {'Quality':>10} {'Avg Cost':>10} {'Avg Time':>10} {'Score':>6}")
1424
+ print(f" {'-'*76}")
1425
+ for m in models:
1426
+ print(f" {m['key']:<25} {m['events']:>5} {int(m['success_rate']*100):>7}% {m['quality_avg']:>6.0f} ({m['quality_grade']:<2}) ${m['cost_median']:>8.4f} {m['time_median']:>8.0f}s {m['score']:>6.3f}")
1427
+ bf = data.get("best_for", {})
1428
+ if bf:
1429
+ print("\n Best for:")
1430
+ for tt, key in sorted(bf.items()):
1431
+ print(f" {tt:<10} {key}")
1432
+ print()
1433
+ else:
1434
+ parser.print_help()