minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
@@ -0,0 +1,544 @@
1
+ """MinimaAgent — an :class:`~minima_harness.agent.Agent` that routes each prompt
2
+ through Minima and feeds the realized outcome back.
3
+
4
+ Per top-level ``prompt()``: (1) ask Minima which model and set ``state.model``, (2) run
5
+ the agent loop (delegate to the base Agent, so tool turns keep working), (3) judge the
6
+ final answer and send ``POST /v1/feedback`` with realized tokens/cost/latency. Routing is
7
+ bypassable: if Minima is unreachable and ``allow_offline`` is set, the run proceeds on the
8
+ current model with no feedback. Bookkeeping failures are logged-and-swallowed so the
9
+ Minima round-trip never breaks the caller's run.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import os
16
+ from collections.abc import Awaitable, Callable
17
+ from pathlib import Path
18
+ from time import monotonic
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ from minima_harness.agent.agent import Agent
22
+ from minima_harness.agent.tools import ThinkingLevel
23
+ from minima_harness.ai.errors import classify_provider_error, is_auth_error
24
+ from minima_harness.ai.types import AssistantMessage, ContentBlock, Message, Model, Usage
25
+ from minima_harness.minima.config import HarnessConfig
26
+ from minima_harness.minima.judge import (
27
+ ConstJudge,
28
+ LLMJudge,
29
+ QualityJudge,
30
+ clamp01,
31
+ )
32
+ from minima_harness.minima.mapping import ModelMapping
33
+ from minima_harness.minima.meter import CostMeter
34
+ from minima_harness.minima.router import MinimaRouter, Ranking, RoutingResult
35
+ from minima_harness.minima.signals import ContextExtractor, extract_or_none
36
+ from minima_harness.tasks.task_set import grade_outcome
37
+
38
+ if TYPE_CHECKING:
39
+ pass
40
+
41
+ _log = logging.getLogger("minima_harness.runtime")
42
+
43
+ # Inspect/override a recommendation before the model runs. Return a (possibly modified)
44
+ # RoutingResult to override the model; None to accept as-is; a result with
45
+ # recommendation_id=None to veto (run a different model with no feedback attribution).
46
+ BeforeRoute = Callable[[RoutingResult, str], Awaitable[RoutingResult | None]]
47
+
48
+
49
+ class MinimaAgent(Agent):
50
+ def __init__(
51
+ self,
52
+ config: HarnessConfig,
53
+ *,
54
+ router: MinimaRouter | None = None,
55
+ judge: QualityJudge | None = None,
56
+ mapping: ModelMapping | None = None,
57
+ model: Model | None = None,
58
+ tools: list | None = None,
59
+ system_prompt: str | None = None,
60
+ task_type: str | None = None,
61
+ thinking_level: ThinkingLevel = "off",
62
+ max_turns: int = 50,
63
+ meter: CostMeter | None = None,
64
+ before_route: BeforeRoute | None = None,
65
+ extractor: ContextExtractor | None = None,
66
+ ) -> None:
67
+ self.config = config
68
+ self.mapping = mapping or (router.mapping if router else ModelMapping())
69
+ self.router = router or MinimaRouter.for_config(config, self.mapping)
70
+ self.judge = judge if judge is not None else _default_judge(config)
71
+ self.meter = meter
72
+ self.before_route = before_route
73
+ self.extractor = extractor
74
+ self._task_type_hint = task_type
75
+ self._prompts_run = 0
76
+ # Count of tool calls the human rejected this turn (diff-approval). A reject is a
77
+ # ground-truth negative that overrides the (noisier) judge signal in feedback.
78
+ self._rejected_tools = 0
79
+ # Why the last route fell back to offline (None = routed fine). Surfaced by the TUI
80
+ # so a degraded-to-offline turn is visible, not silent.
81
+ self._offline_reason: str | None = None
82
+ # Whether that offline fallback is worth retrying via /reconnect. False for config/auth
83
+ # problems (no/invalid Mubit key) where retrying changes nothing — the user must fix a
84
+ # credential. Lets the TUI show the right action instead of a misleading "/reconnect".
85
+ self._offline_retryable: bool = True
86
+ # Classified reason the last turn's model call failed (None = ran fine). A provider
87
+ # error (bad key, 404, network) is swallowed into an empty assistant — this exposes it
88
+ # so the TUI / --print can show *why* a turn produced no output, not a blank bubble.
89
+ self._last_error: str | None = None
90
+ # The provider's RAW error body (unclassified) for the last failed turn. The classified
91
+ # `_last_error` is the clean headline; this preserves the provider's exact words (e.g.
92
+ # Gemini's "RESOURCE_EXHAUSTED … quota …" vs "PERMISSION_DENIED …") so an ambiguous
93
+ # 403/429 is self-diagnosing instead of guesswork.
94
+ self._last_error_raw: str | None = None
95
+ # Providers whose key hard-failed auth this session (bad/invalid key). Routing drops them
96
+ # from the candidate set so it stops re-recommending a provider that can't run, and the
97
+ # current turn is re-routed onto one that works. Cleared by /reconnect (key may be fixed).
98
+ self._excluded_providers: set[str] = set()
99
+ # One-line, user-facing note when a turn was auto-rerouted off a dead-key provider (None =
100
+ # no reroute this turn). The TUI surfaces it so the silent provider switch is explained.
101
+ self._reroute_note: str | None = None
102
+ initial = model or self.mapping.default_model()
103
+ super().__init__(
104
+ model=initial,
105
+ tools=list(tools or []),
106
+ system_prompt=system_prompt,
107
+ thinking_level=thinking_level,
108
+ max_turns=max_turns,
109
+ )
110
+
111
+ async def prompt( # type: ignore[override] # widens base with optional routing kwargs
112
+ self,
113
+ content: str | list[ContentBlock] | Message | list[Any],
114
+ *,
115
+ task_type: str | None = None,
116
+ slider: float | None = None,
117
+ files: list[str | Path] | None = None,
118
+ tags: list[str] | None = None,
119
+ ) -> RoutingResult | None:
120
+ task_text = _text_of(content)
121
+ effective_task_type = task_type or self._task_type_hint
122
+ self._prompts_run += 1
123
+ self._rejected_tools = 0 # reset per-turn reject tally
124
+ self._last_error = None # reset per-turn error
125
+ self._last_error_raw = None
126
+ self._reroute_note = None # reset per-turn auto-reroute note
127
+
128
+ routing: RoutingResult | None = None
129
+ last: AssistantMessage | None = None
130
+ run_error: BaseException | None = None
131
+ latency_ms = 0
132
+ turns_taken = 0
133
+ # Snapshot history so a failed turn can be rolled back out of the agent's context entirely.
134
+ msgs_before = len(self.state.messages)
135
+ # A hard auth failure (bad/invalid/missing key) is deterministic — the same provider fails
136
+ # identically on every call. So when one occurs and a *different* provider's key works,
137
+ # blacklist the dead provider for the session and re-run the SAME message on an alternative,
138
+ # rescuing this turn instead of wasting it. The exclusion set grows by one per pass, so the
139
+ # loop always terminates; range() is a hard backstop.
140
+ for _attempt in range(self._reroute_budget() + 1):
141
+ routing = await self._route(
142
+ task_text, effective_task_type, slider, files=files, tags=tags, reroute=_attempt > 0
143
+ )
144
+ # On a RErouTE pass: if routing handed back a provider already blacklisted this turn,
145
+ # re-running it would just fail identically — stop and surface the prior error. This
146
+ # catches the cases the candidate filter can't: an offline route (which can't switch
147
+ # models) and a recommender that ignores the candidate constraint and re-picks the dead
148
+ # model. Gated to reroute passes so the FIRST attempt always runs (and surfaces a real
149
+ # error) even when the only model's provider was excluded on a previous turn.
150
+ run_provider = _provider_of(self.state.model.id if self.state.model else None)
151
+ already_dead = run_provider is not None and run_provider in self._excluded_providers
152
+ if _attempt > 0 and already_dead:
153
+ break
154
+ msgs_before = len(self.state.messages)
155
+ start = monotonic()
156
+ run_error = None
157
+ try:
158
+ await super().prompt(content)
159
+ except BaseException as exc: # noqa: BLE001 - capture, then re-raise after feedback
160
+ run_error = exc
161
+ latency_ms = int((monotonic() - start) * 1000)
162
+ turns_taken = self.state.turns_taken
163
+ last = self._last_assistant()
164
+ # A provider call that failed (bad/missing key, 404, network) is swallowed by the
165
+ # provider into an empty-text assistant with stop_reason="error" — NOT a raised
166
+ # exception. Treat that as a failed turn so (a) Minima is never told a broken turn
167
+ # "succeeded" (which would poison routing), and (b) the caller can surface why.
168
+ provider_error = last is not None and getattr(last, "stop_reason", None) == "error"
169
+ if provider_error and last is not None:
170
+ self._last_error = classify_provider_error(last.error_message, last.model)
171
+ self._last_error_raw = last.error_message
172
+ # Log the raw provider error so it's recoverable even off the TUI (--print).
173
+ _log.warning("provider_error_raw model=%s: %s", last.model, last.error_message)
174
+ # Auto-reroute off a dead-key provider — but never second-guess an explicit pin.
175
+ if (
176
+ provider_error
177
+ and run_error is None
178
+ and last is not None
179
+ and not self.config.pinned
180
+ and is_auth_error(last.error_message)
181
+ ):
182
+ provider = _provider_of(last.model)
183
+ if provider:
184
+ self._excluded_providers.add(provider)
185
+ if self._has_runnable_candidate():
186
+ self._note_reroute(provider)
187
+ del self.state.messages[msgs_before:] # drop failed attempt, then retry
188
+ self._last_error = self._last_error_raw = None
189
+ continue
190
+ break
191
+
192
+ provider_error = last is not None and getattr(last, "stop_reason", None) == "error"
193
+ if provider_error and last is not None:
194
+ # The final loop pass may have cleared these on a reroute `continue` that then couldn't
195
+ # actually switch providers (offline, or a recommender that re-picked the dead model).
196
+ # Re-derive so a still-failing turn surfaces the real error (not a blank "success") and
197
+ # drop the optimistic reroute note (the reroute did NOT rescue the turn).
198
+ self._last_error = classify_provider_error(last.error_message, last.model)
199
+ self._last_error_raw = last.error_message
200
+ self._reroute_note = None
201
+ # An auth/infra failure is a credential problem, not a model-quality signal — don't feed it
202
+ # back to Minima (it would poison the model's success estimate in this namespace).
203
+ auth_failed = bool(
204
+ provider_error and last is not None and is_auth_error(last.error_message)
205
+ )
206
+ failed = run_error is not None or provider_error
207
+ quality: float | None = None
208
+ outcome = "success"
209
+ if routing is not None and not auth_failed:
210
+ quality, outcome = await self._feedback_safely(
211
+ task_text, routing, latency_ms, failed, turns_taken
212
+ )
213
+ if self.meter is not None:
214
+ actual = last.usage.cost.total if last is not None else 0.0
215
+ self.meter.record(
216
+ label=_short_label(task_text),
217
+ routing=routing,
218
+ actual_cost_usd=actual,
219
+ quality=quality if not failed else 0.0,
220
+ outcome=("failure" if failed else outcome),
221
+ turns=turns_taken,
222
+ )
223
+ # Roll a failed turn fully out of the agent's context — both the empty error-assistant
224
+ # AND the user message that triggered it. A failed turn produced no usable exchange, so
225
+ # leaving it in history only poisons the NEXT turn (the loop's _drop_failed_calls guard
226
+ # already strips the empty assistant; this also avoids a dangling user turn). Done after
227
+ # feedback/meter so they still see the failed turn's signal.
228
+ if failed:
229
+ del self.state.messages[msgs_before:]
230
+ if run_error is not None:
231
+ raise run_error
232
+ return routing
233
+
234
+ # ------------------------------------------------------------------ routing
235
+
236
+ async def _route(
237
+ self,
238
+ task_text: str,
239
+ task_type: str | None,
240
+ slider: float | None,
241
+ *,
242
+ files: list[str | Path] | None = None,
243
+ tags: list[str] | None = None,
244
+ reroute: bool = False,
245
+ ) -> RoutingResult | None:
246
+ # On a reroute pass the before_route hook (which emits the routing rationale line and, in
247
+ # confirm mode, the confirmation modal) is skipped: a single user turn must produce ONE
248
+ # routing line / ONE confirm, not one per auth-failed attempt. The auto-reroute note
249
+ # explains the silent switch instead.
250
+ run_hook = self.before_route is not None and not reroute
251
+ # A hard pin (exactly one candidate, set via /model) bypasses Minima entirely: run that
252
+ # model directly. Sending a single-model constraint to Minima fails with 422 when the
253
+ # pinned id isn't in Minima's routing catalog (e.g. an OpenRouter-namespaced model like
254
+ # `google/gemini-2.5-flash`), which then degraded to a *different* offline model. A pin
255
+ # is a deliberate override — there's nothing to route — so we skip recommend.
256
+ pinned = self._pinned_route()
257
+ if pinned is not None:
258
+ self._offline_reason = None
259
+ self._offline_retryable = True
260
+ if run_hook:
261
+ overridden = await self.before_route(pinned, task_text)
262
+ if overridden is not None:
263
+ pinned = overridden
264
+ self.state.model = pinned.model
265
+ return pinned
266
+ bundle = await extract_or_none(
267
+ self.extractor, task_text, [Path(f) for f in files] if files else None
268
+ )
269
+ # Merge caller-supplied tags (e.g. a goal tag, so a goal's turns cluster in Minima's
270
+ # memory) with the code-derived signal tags.
271
+ merged_tags = (bundle.tags if bundle else []) + (tags or [])
272
+ tags = merged_tags or None
273
+ difficulty = bundle.difficulty if bundle else None
274
+ exp_tokens = bundle.expected_input_tokens if bundle else None
275
+ try:
276
+ routing = await self.router.recommend(
277
+ task_text,
278
+ task_type=task_type,
279
+ slider=slider,
280
+ tags=tags,
281
+ difficulty=difficulty,
282
+ expected_input_tokens=exp_tokens,
283
+ candidates=self._effective_candidates(),
284
+ )
285
+ self._offline_reason = None
286
+ self._offline_retryable = True
287
+ except Exception as exc: # noqa: BLE001
288
+ if not self.config.allow_offline:
289
+ raise
290
+ has_key = bool((self.config.minima_api_key or "").strip())
291
+ self._offline_reason, self._offline_retryable = _classify_offline_reason(exc, has_key)
292
+ # Expected, recoverable degradation — log the concise reason at WARNING and keep the
293
+ # full traceback at DEBUG so a healthy offline fallback doesn't dump a stack trace.
294
+ _log.warning("minima_recommend_failed_offline_fallback: %s", self._offline_reason)
295
+ _log.debug("offline_fallback_detail", exc_info=True)
296
+ return None
297
+ if run_hook:
298
+ overridden = await self.before_route(routing, task_text)
299
+ if overridden is not None:
300
+ routing = overridden
301
+ self.state.model = routing.model
302
+ return routing
303
+
304
+ def _pinned_route(self) -> RoutingResult | None:
305
+ """If a single model is pinned (via /model), build a routing result for it directly —
306
+ no Minima call. Returns None when not pinned or the pinned id can't be resolved to a
307
+ registered model (then normal routing runs)."""
308
+ from minima_harness.ai.registry import find_model_by_id
309
+
310
+ cands = self.config.candidates or []
311
+ if not self.config.pinned or len(cands) != 1:
312
+ return None
313
+ pinned_id = cands[0]
314
+ model = find_model_by_id(pinned_id)
315
+ if model is None and "/" in pinned_id:
316
+ # tolerant resolve for openrouter-style "provider/model" ids
317
+ model = self.mapping._resolve(pinned_id.split("/", 1)[0], pinned_id)
318
+ if model is None:
319
+ return None
320
+ ranking = Ranking(
321
+ model_id=pinned_id,
322
+ provider=model.provider,
323
+ predicted_success=1.0,
324
+ est_cost_usd=0.0,
325
+ decision_basis="pinned",
326
+ )
327
+ return RoutingResult(
328
+ recommendation_id=None, # manual pin — not a Minima recommendation to learn from
329
+ chosen_model_id=pinned_id,
330
+ model=model,
331
+ est_cost_usd=0.0,
332
+ decision_basis="pinned",
333
+ ranked=[ranking],
334
+ confidence=1.0,
335
+ )
336
+
337
+ # ------------------------------------------------------ key-aware candidates
338
+
339
+ def _effective_candidates(self) -> list[str]:
340
+ """Candidate model ids minus providers that can't run: those with no key configured at all
341
+ (presence filter) and those whose key auth-failed this session. Never returns empty — if
342
+ every provider is excluded, fall back to the key-present set so routing still attempts a
343
+ model (and the auth banner explains the situation) rather than locking the user out."""
344
+ from minima_harness.ai.provider_catalog import runnable_candidates
345
+
346
+ eff = runnable_candidates(self.config.candidates)
347
+ if self._excluded_providers:
348
+ pruned = [c for c in eff if _provider_of(c) not in self._excluded_providers]
349
+ eff = pruned or eff
350
+ return eff
351
+
352
+ def _reroute_budget(self) -> int:
353
+ """Max auto-reroute passes for a turn = the number of distinct candidate providers, so each
354
+ can be tried at most once before giving up."""
355
+ provs = {_provider_of(c) for c in (self.config.candidates or [])}
356
+ provs.discard(None)
357
+ return max(1, len(provs))
358
+
359
+ def _has_runnable_candidate(self) -> bool:
360
+ """True if some candidate's provider has a key configured and isn't excluded this turn."""
361
+ from minima_harness.ai.provider_catalog import provider_key_present
362
+
363
+ for c in self.config.candidates or []:
364
+ p = _provider_of(c)
365
+ if p is None or p in self._excluded_providers:
366
+ continue
367
+ if provider_key_present(p):
368
+ return True
369
+ return False
370
+
371
+ def _note_reroute(self, provider: str) -> None:
372
+ """Record a one-line, user-facing explanation for an auto-reroute off a dead key."""
373
+ from minima_harness.ai.provider_catalog import env_vars_for_provider, spec_for
374
+
375
+ spec = spec_for(provider)
376
+ pname = spec.display_name if spec else provider
377
+ keyvar = env_vars_for_provider(provider)[0] if provider else ""
378
+ hint = f" — fix {keyvar} (/config) to re-enable" if keyvar else ""
379
+ self._reroute_note = f"{pname} key rejected; excluded this session{hint}"
380
+
381
+ async def reconnect(self) -> None:
382
+ """Rebuild the Minima client from the current environment.
383
+
384
+ Routing auth + the endpoint URL are captured when the client is built, so a key or
385
+ ``MINIMA_URL`` set via ``/config`` mid-session doesn't take effect until the client is
386
+ rebuilt. ``/reconnect`` (and a routing-key change in ``/config``) call this so the fix
387
+ applies without restarting the app. The stale client is closed best-effort.
388
+ """
389
+ self.config.refresh_routing_env()
390
+ old = self.router
391
+ self.router = MinimaRouter.for_config(self.config, self.mapping)
392
+ self._offline_reason = None
393
+ self._offline_retryable = True
394
+ # A key fixed via /config (which triggers reconnect) may revive an auth-failed provider —
395
+ # clear the session blacklist so routing can choose it again.
396
+ self._excluded_providers.clear()
397
+ await old.aclose()
398
+
399
+ async def _feedback_safely(
400
+ self,
401
+ task_text: str,
402
+ routing: RoutingResult,
403
+ latency_ms: int,
404
+ failed: bool,
405
+ turns_taken: int = 0,
406
+ ) -> tuple[float | None, str]:
407
+ """Send feedback; return the (quality, outcome) used (for the meter). Never raises.
408
+
409
+ ``failed`` is True when the turn raised OR the model returned a provider error
410
+ (empty output) — either way the turn is a ground-truth failure, regardless of judging.
411
+ """
412
+ if routing.recommendation_id is None or routing.chosen_model_id is None:
413
+ return None, "success"
414
+ quality: float | None = None
415
+ outcome = "success"
416
+ try:
417
+ last = self._last_assistant()
418
+ usage = last.usage if last is not None else Usage()
419
+ if failed:
420
+ quality = 0.0
421
+ outcome = "failure"
422
+ elif not self._should_judge():
423
+ quality = None
424
+ outcome = "success"
425
+ else:
426
+ output = last.text if last is not None else ""
427
+ graded = await self.judge.grade(task_text, output)
428
+ if graded is None:
429
+ # Judge abstained (API error / unparseable): record realized cost &
430
+ # latency but send NO fabricated quality/outcome signal.
431
+ quality = None
432
+ outcome = "success"
433
+ else:
434
+ quality = clamp01(graded)
435
+ outcome = grade_outcome(quality)
436
+ if not failed and self._rejected_tools > 0:
437
+ # A human rejected the model's edit(s): a strong ground-truth negative that
438
+ # overrides the judge (applies even when judging is off).
439
+ quality = min(quality if quality is not None else 0.25, 0.25)
440
+ outcome = grade_outcome(quality)
441
+ await self.router.feedback(
442
+ routing.recommendation_id,
443
+ routing.chosen_model_id,
444
+ outcome,
445
+ quality=quality,
446
+ usage=usage,
447
+ latency_ms=latency_ms,
448
+ iterations=turns_taken or None,
449
+ )
450
+ except Exception: # noqa: BLE001 - feedback must never break a successful run
451
+ _log.warning("minima_feedback_failed", exc_info=True)
452
+ return quality, outcome
453
+
454
+ # ----------------------------------------------------------------- helpers
455
+
456
+ def record_tool_rejection(self) -> None:
457
+ """Called by the TUI when the human rejects a proposed edit (diff approval)."""
458
+ self._rejected_tools += 1
459
+
460
+ def _should_judge(self) -> bool:
461
+ every = self.config.judge_every
462
+ if every <= 0:
463
+ return False
464
+ return (self._prompts_run % every) == 0
465
+
466
+ def _last_assistant(self) -> AssistantMessage | None:
467
+ for m in reversed(self.state.messages):
468
+ if m.role == "assistant":
469
+ return m # type: ignore[return-value]
470
+ return None
471
+
472
+
473
+ def _default_judge(config: HarnessConfig) -> QualityJudge:
474
+ """LLMJudge when an Anthropic key is present, else a neutral ConstJudge."""
475
+ if os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_OAUTH_TOKEN"):
476
+ try:
477
+ from minima_harness.ai import get_model
478
+
479
+ return LLMJudge(get_model("anthropic", config.judge_model))
480
+ except Exception: # noqa: BLE001
481
+ pass
482
+ _log.warning(
483
+ "no_judge_configured_abstaining -- pass judge=LLMJudge/DeterministicJudge for real "
484
+ "learning; set judge_every=0 to skip judging entirely. Abstaining feeds NO quality "
485
+ "signal (better than a fabricated neutral 0.5 that would poison the feedback loop)."
486
+ )
487
+ return ConstJudge(None)
488
+
489
+
490
+ def _classify_offline_reason(exc: BaseException, has_key: bool = True) -> tuple[str, bool]:
491
+ """Why a route fell back to offline, plus whether retrying is worthwhile.
492
+
493
+ Returns ``(reason, retryable)``. ``retryable`` is False for config/auth problems where
494
+ ``/reconnect`` won't help on its own — the user must add or fix a credential first; the
495
+ TUI uses this to show the actionable next step instead of a misleading "/reconnect".
496
+ """
497
+ status = getattr(exc, "status", None)
498
+ if status in (401, 403):
499
+ if not has_key:
500
+ return ("no Mubit API key — add MUBIT_API_KEY via /config to enable routing", False)
501
+ return ("Mubit API key rejected — check MUBIT_API_KEY (/config)", False)
502
+ name = type(exc).__name__
503
+ if "Timeout" in name:
504
+ return ("Minima timed out", True)
505
+ if "Connect" in name:
506
+ return ("Minima unreachable", True)
507
+ detail = str(exc).strip().splitlines()[0] if str(exc).strip() else name
508
+ return (detail[:80], True)
509
+
510
+
511
+ def _text_of(content: str | list[ContentBlock] | Message | list[Any]) -> str:
512
+ if isinstance(content, str):
513
+ return content
514
+ if isinstance(content, Message):
515
+ return content.text
516
+ if isinstance(content, list):
517
+ parts: list[str] = []
518
+ for item in content:
519
+ if isinstance(item, str):
520
+ parts.append(item)
521
+ elif isinstance(item, Message):
522
+ parts.append(item.text)
523
+ else:
524
+ parts.append(getattr(item, "text", ""))
525
+ return "\n".join(p for p in parts if p)
526
+ return str(content)
527
+
528
+
529
+ def _short_label(task_text: str) -> str:
530
+ """One-line label for a cost-meter row (first non-empty line, truncated)."""
531
+ first = (task_text.splitlines()[0] if task_text else "").strip()
532
+ if len(first) > 48:
533
+ first = first[:45] + "..."
534
+ return first or "(empty)"
535
+
536
+
537
+ def _provider_of(model_id: str | None) -> str | None:
538
+ """Provider name for a model id, or None if unknown/unregistered."""
539
+ if not model_id:
540
+ return None
541
+ from minima_harness.ai.registry import find_model_by_id
542
+
543
+ m = find_model_by_id(model_id)
544
+ return m.provider if m else None