minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,544 @@
|
|
|
1
|
+
"""MinimaAgent — an :class:`~minima_harness.agent.Agent` that routes each prompt
|
|
2
|
+
through Minima and feeds the realized outcome back.
|
|
3
|
+
|
|
4
|
+
Per top-level ``prompt()``: (1) ask Minima which model and set ``state.model``, (2) run
|
|
5
|
+
the agent loop (delegate to the base Agent, so tool turns keep working), (3) judge the
|
|
6
|
+
final answer and send ``POST /v1/feedback`` with realized tokens/cost/latency. Routing is
|
|
7
|
+
bypassable: if Minima is unreachable and ``allow_offline`` is set, the run proceeds on the
|
|
8
|
+
current model with no feedback. Bookkeeping failures are logged-and-swallowed so the
|
|
9
|
+
Minima round-trip never breaks the caller's run.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from collections.abc import Awaitable, Callable
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from time import monotonic
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
from minima_harness.agent.agent import Agent
|
|
22
|
+
from minima_harness.agent.tools import ThinkingLevel
|
|
23
|
+
from minima_harness.ai.errors import classify_provider_error, is_auth_error
|
|
24
|
+
from minima_harness.ai.types import AssistantMessage, ContentBlock, Message, Model, Usage
|
|
25
|
+
from minima_harness.minima.config import HarnessConfig
|
|
26
|
+
from minima_harness.minima.judge import (
|
|
27
|
+
ConstJudge,
|
|
28
|
+
LLMJudge,
|
|
29
|
+
QualityJudge,
|
|
30
|
+
clamp01,
|
|
31
|
+
)
|
|
32
|
+
from minima_harness.minima.mapping import ModelMapping
|
|
33
|
+
from minima_harness.minima.meter import CostMeter
|
|
34
|
+
from minima_harness.minima.router import MinimaRouter, Ranking, RoutingResult
|
|
35
|
+
from minima_harness.minima.signals import ContextExtractor, extract_or_none
|
|
36
|
+
from minima_harness.tasks.task_set import grade_outcome
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
_log = logging.getLogger("minima_harness.runtime")
|
|
42
|
+
|
|
43
|
+
# Inspect/override a recommendation before the model runs. Return a (possibly modified)
|
|
44
|
+
# RoutingResult to override the model; None to accept as-is; a result with
|
|
45
|
+
# recommendation_id=None to veto (run a different model with no feedback attribution).
|
|
46
|
+
BeforeRoute = Callable[[RoutingResult, str], Awaitable[RoutingResult | None]]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class MinimaAgent(Agent):
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
config: HarnessConfig,
|
|
53
|
+
*,
|
|
54
|
+
router: MinimaRouter | None = None,
|
|
55
|
+
judge: QualityJudge | None = None,
|
|
56
|
+
mapping: ModelMapping | None = None,
|
|
57
|
+
model: Model | None = None,
|
|
58
|
+
tools: list | None = None,
|
|
59
|
+
system_prompt: str | None = None,
|
|
60
|
+
task_type: str | None = None,
|
|
61
|
+
thinking_level: ThinkingLevel = "off",
|
|
62
|
+
max_turns: int = 50,
|
|
63
|
+
meter: CostMeter | None = None,
|
|
64
|
+
before_route: BeforeRoute | None = None,
|
|
65
|
+
extractor: ContextExtractor | None = None,
|
|
66
|
+
) -> None:
|
|
67
|
+
self.config = config
|
|
68
|
+
self.mapping = mapping or (router.mapping if router else ModelMapping())
|
|
69
|
+
self.router = router or MinimaRouter.for_config(config, self.mapping)
|
|
70
|
+
self.judge = judge if judge is not None else _default_judge(config)
|
|
71
|
+
self.meter = meter
|
|
72
|
+
self.before_route = before_route
|
|
73
|
+
self.extractor = extractor
|
|
74
|
+
self._task_type_hint = task_type
|
|
75
|
+
self._prompts_run = 0
|
|
76
|
+
# Count of tool calls the human rejected this turn (diff-approval). A reject is a
|
|
77
|
+
# ground-truth negative that overrides the (noisier) judge signal in feedback.
|
|
78
|
+
self._rejected_tools = 0
|
|
79
|
+
# Why the last route fell back to offline (None = routed fine). Surfaced by the TUI
|
|
80
|
+
# so a degraded-to-offline turn is visible, not silent.
|
|
81
|
+
self._offline_reason: str | None = None
|
|
82
|
+
# Whether that offline fallback is worth retrying via /reconnect. False for config/auth
|
|
83
|
+
# problems (no/invalid Mubit key) where retrying changes nothing — the user must fix a
|
|
84
|
+
# credential. Lets the TUI show the right action instead of a misleading "/reconnect".
|
|
85
|
+
self._offline_retryable: bool = True
|
|
86
|
+
# Classified reason the last turn's model call failed (None = ran fine). A provider
|
|
87
|
+
# error (bad key, 404, network) is swallowed into an empty assistant — this exposes it
|
|
88
|
+
# so the TUI / --print can show *why* a turn produced no output, not a blank bubble.
|
|
89
|
+
self._last_error: str | None = None
|
|
90
|
+
# The provider's RAW error body (unclassified) for the last failed turn. The classified
|
|
91
|
+
# `_last_error` is the clean headline; this preserves the provider's exact words (e.g.
|
|
92
|
+
# Gemini's "RESOURCE_EXHAUSTED … quota …" vs "PERMISSION_DENIED …") so an ambiguous
|
|
93
|
+
# 403/429 is self-diagnosing instead of guesswork.
|
|
94
|
+
self._last_error_raw: str | None = None
|
|
95
|
+
# Providers whose key hard-failed auth this session (bad/invalid key). Routing drops them
|
|
96
|
+
# from the candidate set so it stops re-recommending a provider that can't run, and the
|
|
97
|
+
# current turn is re-routed onto one that works. Cleared by /reconnect (key may be fixed).
|
|
98
|
+
self._excluded_providers: set[str] = set()
|
|
99
|
+
# One-line, user-facing note when a turn was auto-rerouted off a dead-key provider (None =
|
|
100
|
+
# no reroute this turn). The TUI surfaces it so the silent provider switch is explained.
|
|
101
|
+
self._reroute_note: str | None = None
|
|
102
|
+
initial = model or self.mapping.default_model()
|
|
103
|
+
super().__init__(
|
|
104
|
+
model=initial,
|
|
105
|
+
tools=list(tools or []),
|
|
106
|
+
system_prompt=system_prompt,
|
|
107
|
+
thinking_level=thinking_level,
|
|
108
|
+
max_turns=max_turns,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
async def prompt( # type: ignore[override] # widens base with optional routing kwargs
|
|
112
|
+
self,
|
|
113
|
+
content: str | list[ContentBlock] | Message | list[Any],
|
|
114
|
+
*,
|
|
115
|
+
task_type: str | None = None,
|
|
116
|
+
slider: float | None = None,
|
|
117
|
+
files: list[str | Path] | None = None,
|
|
118
|
+
tags: list[str] | None = None,
|
|
119
|
+
) -> RoutingResult | None:
|
|
120
|
+
task_text = _text_of(content)
|
|
121
|
+
effective_task_type = task_type or self._task_type_hint
|
|
122
|
+
self._prompts_run += 1
|
|
123
|
+
self._rejected_tools = 0 # reset per-turn reject tally
|
|
124
|
+
self._last_error = None # reset per-turn error
|
|
125
|
+
self._last_error_raw = None
|
|
126
|
+
self._reroute_note = None # reset per-turn auto-reroute note
|
|
127
|
+
|
|
128
|
+
routing: RoutingResult | None = None
|
|
129
|
+
last: AssistantMessage | None = None
|
|
130
|
+
run_error: BaseException | None = None
|
|
131
|
+
latency_ms = 0
|
|
132
|
+
turns_taken = 0
|
|
133
|
+
# Snapshot history so a failed turn can be rolled back out of the agent's context entirely.
|
|
134
|
+
msgs_before = len(self.state.messages)
|
|
135
|
+
# A hard auth failure (bad/invalid/missing key) is deterministic — the same provider fails
|
|
136
|
+
# identically on every call. So when one occurs and a *different* provider's key works,
|
|
137
|
+
# blacklist the dead provider for the session and re-run the SAME message on an alternative,
|
|
138
|
+
# rescuing this turn instead of wasting it. The exclusion set grows by one per pass, so the
|
|
139
|
+
# loop always terminates; range() is a hard backstop.
|
|
140
|
+
for _attempt in range(self._reroute_budget() + 1):
|
|
141
|
+
routing = await self._route(
|
|
142
|
+
task_text, effective_task_type, slider, files=files, tags=tags, reroute=_attempt > 0
|
|
143
|
+
)
|
|
144
|
+
# On a RErouTE pass: if routing handed back a provider already blacklisted this turn,
|
|
145
|
+
# re-running it would just fail identically — stop and surface the prior error. This
|
|
146
|
+
# catches the cases the candidate filter can't: an offline route (which can't switch
|
|
147
|
+
# models) and a recommender that ignores the candidate constraint and re-picks the dead
|
|
148
|
+
# model. Gated to reroute passes so the FIRST attempt always runs (and surfaces a real
|
|
149
|
+
# error) even when the only model's provider was excluded on a previous turn.
|
|
150
|
+
run_provider = _provider_of(self.state.model.id if self.state.model else None)
|
|
151
|
+
already_dead = run_provider is not None and run_provider in self._excluded_providers
|
|
152
|
+
if _attempt > 0 and already_dead:
|
|
153
|
+
break
|
|
154
|
+
msgs_before = len(self.state.messages)
|
|
155
|
+
start = monotonic()
|
|
156
|
+
run_error = None
|
|
157
|
+
try:
|
|
158
|
+
await super().prompt(content)
|
|
159
|
+
except BaseException as exc: # noqa: BLE001 - capture, then re-raise after feedback
|
|
160
|
+
run_error = exc
|
|
161
|
+
latency_ms = int((monotonic() - start) * 1000)
|
|
162
|
+
turns_taken = self.state.turns_taken
|
|
163
|
+
last = self._last_assistant()
|
|
164
|
+
# A provider call that failed (bad/missing key, 404, network) is swallowed by the
|
|
165
|
+
# provider into an empty-text assistant with stop_reason="error" — NOT a raised
|
|
166
|
+
# exception. Treat that as a failed turn so (a) Minima is never told a broken turn
|
|
167
|
+
# "succeeded" (which would poison routing), and (b) the caller can surface why.
|
|
168
|
+
provider_error = last is not None and getattr(last, "stop_reason", None) == "error"
|
|
169
|
+
if provider_error and last is not None:
|
|
170
|
+
self._last_error = classify_provider_error(last.error_message, last.model)
|
|
171
|
+
self._last_error_raw = last.error_message
|
|
172
|
+
# Log the raw provider error so it's recoverable even off the TUI (--print).
|
|
173
|
+
_log.warning("provider_error_raw model=%s: %s", last.model, last.error_message)
|
|
174
|
+
# Auto-reroute off a dead-key provider — but never second-guess an explicit pin.
|
|
175
|
+
if (
|
|
176
|
+
provider_error
|
|
177
|
+
and run_error is None
|
|
178
|
+
and last is not None
|
|
179
|
+
and not self.config.pinned
|
|
180
|
+
and is_auth_error(last.error_message)
|
|
181
|
+
):
|
|
182
|
+
provider = _provider_of(last.model)
|
|
183
|
+
if provider:
|
|
184
|
+
self._excluded_providers.add(provider)
|
|
185
|
+
if self._has_runnable_candidate():
|
|
186
|
+
self._note_reroute(provider)
|
|
187
|
+
del self.state.messages[msgs_before:] # drop failed attempt, then retry
|
|
188
|
+
self._last_error = self._last_error_raw = None
|
|
189
|
+
continue
|
|
190
|
+
break
|
|
191
|
+
|
|
192
|
+
provider_error = last is not None and getattr(last, "stop_reason", None) == "error"
|
|
193
|
+
if provider_error and last is not None:
|
|
194
|
+
# The final loop pass may have cleared these on a reroute `continue` that then couldn't
|
|
195
|
+
# actually switch providers (offline, or a recommender that re-picked the dead model).
|
|
196
|
+
# Re-derive so a still-failing turn surfaces the real error (not a blank "success") and
|
|
197
|
+
# drop the optimistic reroute note (the reroute did NOT rescue the turn).
|
|
198
|
+
self._last_error = classify_provider_error(last.error_message, last.model)
|
|
199
|
+
self._last_error_raw = last.error_message
|
|
200
|
+
self._reroute_note = None
|
|
201
|
+
# An auth/infra failure is a credential problem, not a model-quality signal — don't feed it
|
|
202
|
+
# back to Minima (it would poison the model's success estimate in this namespace).
|
|
203
|
+
auth_failed = bool(
|
|
204
|
+
provider_error and last is not None and is_auth_error(last.error_message)
|
|
205
|
+
)
|
|
206
|
+
failed = run_error is not None or provider_error
|
|
207
|
+
quality: float | None = None
|
|
208
|
+
outcome = "success"
|
|
209
|
+
if routing is not None and not auth_failed:
|
|
210
|
+
quality, outcome = await self._feedback_safely(
|
|
211
|
+
task_text, routing, latency_ms, failed, turns_taken
|
|
212
|
+
)
|
|
213
|
+
if self.meter is not None:
|
|
214
|
+
actual = last.usage.cost.total if last is not None else 0.0
|
|
215
|
+
self.meter.record(
|
|
216
|
+
label=_short_label(task_text),
|
|
217
|
+
routing=routing,
|
|
218
|
+
actual_cost_usd=actual,
|
|
219
|
+
quality=quality if not failed else 0.0,
|
|
220
|
+
outcome=("failure" if failed else outcome),
|
|
221
|
+
turns=turns_taken,
|
|
222
|
+
)
|
|
223
|
+
# Roll a failed turn fully out of the agent's context — both the empty error-assistant
|
|
224
|
+
# AND the user message that triggered it. A failed turn produced no usable exchange, so
|
|
225
|
+
# leaving it in history only poisons the NEXT turn (the loop's _drop_failed_calls guard
|
|
226
|
+
# already strips the empty assistant; this also avoids a dangling user turn). Done after
|
|
227
|
+
# feedback/meter so they still see the failed turn's signal.
|
|
228
|
+
if failed:
|
|
229
|
+
del self.state.messages[msgs_before:]
|
|
230
|
+
if run_error is not None:
|
|
231
|
+
raise run_error
|
|
232
|
+
return routing
|
|
233
|
+
|
|
234
|
+
# ------------------------------------------------------------------ routing
|
|
235
|
+
|
|
236
|
+
async def _route(
|
|
237
|
+
self,
|
|
238
|
+
task_text: str,
|
|
239
|
+
task_type: str | None,
|
|
240
|
+
slider: float | None,
|
|
241
|
+
*,
|
|
242
|
+
files: list[str | Path] | None = None,
|
|
243
|
+
tags: list[str] | None = None,
|
|
244
|
+
reroute: bool = False,
|
|
245
|
+
) -> RoutingResult | None:
|
|
246
|
+
# On a reroute pass the before_route hook (which emits the routing rationale line and, in
|
|
247
|
+
# confirm mode, the confirmation modal) is skipped: a single user turn must produce ONE
|
|
248
|
+
# routing line / ONE confirm, not one per auth-failed attempt. The auto-reroute note
|
|
249
|
+
# explains the silent switch instead.
|
|
250
|
+
run_hook = self.before_route is not None and not reroute
|
|
251
|
+
# A hard pin (exactly one candidate, set via /model) bypasses Minima entirely: run that
|
|
252
|
+
# model directly. Sending a single-model constraint to Minima fails with 422 when the
|
|
253
|
+
# pinned id isn't in Minima's routing catalog (e.g. an OpenRouter-namespaced model like
|
|
254
|
+
# `google/gemini-2.5-flash`), which then degraded to a *different* offline model. A pin
|
|
255
|
+
# is a deliberate override — there's nothing to route — so we skip recommend.
|
|
256
|
+
pinned = self._pinned_route()
|
|
257
|
+
if pinned is not None:
|
|
258
|
+
self._offline_reason = None
|
|
259
|
+
self._offline_retryable = True
|
|
260
|
+
if run_hook:
|
|
261
|
+
overridden = await self.before_route(pinned, task_text)
|
|
262
|
+
if overridden is not None:
|
|
263
|
+
pinned = overridden
|
|
264
|
+
self.state.model = pinned.model
|
|
265
|
+
return pinned
|
|
266
|
+
bundle = await extract_or_none(
|
|
267
|
+
self.extractor, task_text, [Path(f) for f in files] if files else None
|
|
268
|
+
)
|
|
269
|
+
# Merge caller-supplied tags (e.g. a goal tag, so a goal's turns cluster in Minima's
|
|
270
|
+
# memory) with the code-derived signal tags.
|
|
271
|
+
merged_tags = (bundle.tags if bundle else []) + (tags or [])
|
|
272
|
+
tags = merged_tags or None
|
|
273
|
+
difficulty = bundle.difficulty if bundle else None
|
|
274
|
+
exp_tokens = bundle.expected_input_tokens if bundle else None
|
|
275
|
+
try:
|
|
276
|
+
routing = await self.router.recommend(
|
|
277
|
+
task_text,
|
|
278
|
+
task_type=task_type,
|
|
279
|
+
slider=slider,
|
|
280
|
+
tags=tags,
|
|
281
|
+
difficulty=difficulty,
|
|
282
|
+
expected_input_tokens=exp_tokens,
|
|
283
|
+
candidates=self._effective_candidates(),
|
|
284
|
+
)
|
|
285
|
+
self._offline_reason = None
|
|
286
|
+
self._offline_retryable = True
|
|
287
|
+
except Exception as exc: # noqa: BLE001
|
|
288
|
+
if not self.config.allow_offline:
|
|
289
|
+
raise
|
|
290
|
+
has_key = bool((self.config.minima_api_key or "").strip())
|
|
291
|
+
self._offline_reason, self._offline_retryable = _classify_offline_reason(exc, has_key)
|
|
292
|
+
# Expected, recoverable degradation — log the concise reason at WARNING and keep the
|
|
293
|
+
# full traceback at DEBUG so a healthy offline fallback doesn't dump a stack trace.
|
|
294
|
+
_log.warning("minima_recommend_failed_offline_fallback: %s", self._offline_reason)
|
|
295
|
+
_log.debug("offline_fallback_detail", exc_info=True)
|
|
296
|
+
return None
|
|
297
|
+
if run_hook:
|
|
298
|
+
overridden = await self.before_route(routing, task_text)
|
|
299
|
+
if overridden is not None:
|
|
300
|
+
routing = overridden
|
|
301
|
+
self.state.model = routing.model
|
|
302
|
+
return routing
|
|
303
|
+
|
|
304
|
+
def _pinned_route(self) -> RoutingResult | None:
|
|
305
|
+
"""If a single model is pinned (via /model), build a routing result for it directly —
|
|
306
|
+
no Minima call. Returns None when not pinned or the pinned id can't be resolved to a
|
|
307
|
+
registered model (then normal routing runs)."""
|
|
308
|
+
from minima_harness.ai.registry import find_model_by_id
|
|
309
|
+
|
|
310
|
+
cands = self.config.candidates or []
|
|
311
|
+
if not self.config.pinned or len(cands) != 1:
|
|
312
|
+
return None
|
|
313
|
+
pinned_id = cands[0]
|
|
314
|
+
model = find_model_by_id(pinned_id)
|
|
315
|
+
if model is None and "/" in pinned_id:
|
|
316
|
+
# tolerant resolve for openrouter-style "provider/model" ids
|
|
317
|
+
model = self.mapping._resolve(pinned_id.split("/", 1)[0], pinned_id)
|
|
318
|
+
if model is None:
|
|
319
|
+
return None
|
|
320
|
+
ranking = Ranking(
|
|
321
|
+
model_id=pinned_id,
|
|
322
|
+
provider=model.provider,
|
|
323
|
+
predicted_success=1.0,
|
|
324
|
+
est_cost_usd=0.0,
|
|
325
|
+
decision_basis="pinned",
|
|
326
|
+
)
|
|
327
|
+
return RoutingResult(
|
|
328
|
+
recommendation_id=None, # manual pin — not a Minima recommendation to learn from
|
|
329
|
+
chosen_model_id=pinned_id,
|
|
330
|
+
model=model,
|
|
331
|
+
est_cost_usd=0.0,
|
|
332
|
+
decision_basis="pinned",
|
|
333
|
+
ranked=[ranking],
|
|
334
|
+
confidence=1.0,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# ------------------------------------------------------ key-aware candidates
|
|
338
|
+
|
|
339
|
+
def _effective_candidates(self) -> list[str]:
|
|
340
|
+
"""Candidate model ids minus providers that can't run: those with no key configured at all
|
|
341
|
+
(presence filter) and those whose key auth-failed this session. Never returns empty — if
|
|
342
|
+
every provider is excluded, fall back to the key-present set so routing still attempts a
|
|
343
|
+
model (and the auth banner explains the situation) rather than locking the user out."""
|
|
344
|
+
from minima_harness.ai.provider_catalog import runnable_candidates
|
|
345
|
+
|
|
346
|
+
eff = runnable_candidates(self.config.candidates)
|
|
347
|
+
if self._excluded_providers:
|
|
348
|
+
pruned = [c for c in eff if _provider_of(c) not in self._excluded_providers]
|
|
349
|
+
eff = pruned or eff
|
|
350
|
+
return eff
|
|
351
|
+
|
|
352
|
+
def _reroute_budget(self) -> int:
|
|
353
|
+
"""Max auto-reroute passes for a turn = the number of distinct candidate providers, so each
|
|
354
|
+
can be tried at most once before giving up."""
|
|
355
|
+
provs = {_provider_of(c) for c in (self.config.candidates or [])}
|
|
356
|
+
provs.discard(None)
|
|
357
|
+
return max(1, len(provs))
|
|
358
|
+
|
|
359
|
+
def _has_runnable_candidate(self) -> bool:
|
|
360
|
+
"""True if some candidate's provider has a key configured and isn't excluded this turn."""
|
|
361
|
+
from minima_harness.ai.provider_catalog import provider_key_present
|
|
362
|
+
|
|
363
|
+
for c in self.config.candidates or []:
|
|
364
|
+
p = _provider_of(c)
|
|
365
|
+
if p is None or p in self._excluded_providers:
|
|
366
|
+
continue
|
|
367
|
+
if provider_key_present(p):
|
|
368
|
+
return True
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
def _note_reroute(self, provider: str) -> None:
|
|
372
|
+
"""Record a one-line, user-facing explanation for an auto-reroute off a dead key."""
|
|
373
|
+
from minima_harness.ai.provider_catalog import env_vars_for_provider, spec_for
|
|
374
|
+
|
|
375
|
+
spec = spec_for(provider)
|
|
376
|
+
pname = spec.display_name if spec else provider
|
|
377
|
+
keyvar = env_vars_for_provider(provider)[0] if provider else ""
|
|
378
|
+
hint = f" — fix {keyvar} (/config) to re-enable" if keyvar else ""
|
|
379
|
+
self._reroute_note = f"{pname} key rejected; excluded this session{hint}"
|
|
380
|
+
|
|
381
|
+
async def reconnect(self) -> None:
|
|
382
|
+
"""Rebuild the Minima client from the current environment.
|
|
383
|
+
|
|
384
|
+
Routing auth + the endpoint URL are captured when the client is built, so a key or
|
|
385
|
+
``MINIMA_URL`` set via ``/config`` mid-session doesn't take effect until the client is
|
|
386
|
+
rebuilt. ``/reconnect`` (and a routing-key change in ``/config``) call this so the fix
|
|
387
|
+
applies without restarting the app. The stale client is closed best-effort.
|
|
388
|
+
"""
|
|
389
|
+
self.config.refresh_routing_env()
|
|
390
|
+
old = self.router
|
|
391
|
+
self.router = MinimaRouter.for_config(self.config, self.mapping)
|
|
392
|
+
self._offline_reason = None
|
|
393
|
+
self._offline_retryable = True
|
|
394
|
+
# A key fixed via /config (which triggers reconnect) may revive an auth-failed provider —
|
|
395
|
+
# clear the session blacklist so routing can choose it again.
|
|
396
|
+
self._excluded_providers.clear()
|
|
397
|
+
await old.aclose()
|
|
398
|
+
|
|
399
|
+
async def _feedback_safely(
|
|
400
|
+
self,
|
|
401
|
+
task_text: str,
|
|
402
|
+
routing: RoutingResult,
|
|
403
|
+
latency_ms: int,
|
|
404
|
+
failed: bool,
|
|
405
|
+
turns_taken: int = 0,
|
|
406
|
+
) -> tuple[float | None, str]:
|
|
407
|
+
"""Send feedback; return the (quality, outcome) used (for the meter). Never raises.
|
|
408
|
+
|
|
409
|
+
``failed`` is True when the turn raised OR the model returned a provider error
|
|
410
|
+
(empty output) — either way the turn is a ground-truth failure, regardless of judging.
|
|
411
|
+
"""
|
|
412
|
+
if routing.recommendation_id is None or routing.chosen_model_id is None:
|
|
413
|
+
return None, "success"
|
|
414
|
+
quality: float | None = None
|
|
415
|
+
outcome = "success"
|
|
416
|
+
try:
|
|
417
|
+
last = self._last_assistant()
|
|
418
|
+
usage = last.usage if last is not None else Usage()
|
|
419
|
+
if failed:
|
|
420
|
+
quality = 0.0
|
|
421
|
+
outcome = "failure"
|
|
422
|
+
elif not self._should_judge():
|
|
423
|
+
quality = None
|
|
424
|
+
outcome = "success"
|
|
425
|
+
else:
|
|
426
|
+
output = last.text if last is not None else ""
|
|
427
|
+
graded = await self.judge.grade(task_text, output)
|
|
428
|
+
if graded is None:
|
|
429
|
+
# Judge abstained (API error / unparseable): record realized cost &
|
|
430
|
+
# latency but send NO fabricated quality/outcome signal.
|
|
431
|
+
quality = None
|
|
432
|
+
outcome = "success"
|
|
433
|
+
else:
|
|
434
|
+
quality = clamp01(graded)
|
|
435
|
+
outcome = grade_outcome(quality)
|
|
436
|
+
if not failed and self._rejected_tools > 0:
|
|
437
|
+
# A human rejected the model's edit(s): a strong ground-truth negative that
|
|
438
|
+
# overrides the judge (applies even when judging is off).
|
|
439
|
+
quality = min(quality if quality is not None else 0.25, 0.25)
|
|
440
|
+
outcome = grade_outcome(quality)
|
|
441
|
+
await self.router.feedback(
|
|
442
|
+
routing.recommendation_id,
|
|
443
|
+
routing.chosen_model_id,
|
|
444
|
+
outcome,
|
|
445
|
+
quality=quality,
|
|
446
|
+
usage=usage,
|
|
447
|
+
latency_ms=latency_ms,
|
|
448
|
+
iterations=turns_taken or None,
|
|
449
|
+
)
|
|
450
|
+
except Exception: # noqa: BLE001 - feedback must never break a successful run
|
|
451
|
+
_log.warning("minima_feedback_failed", exc_info=True)
|
|
452
|
+
return quality, outcome
|
|
453
|
+
|
|
454
|
+
# ----------------------------------------------------------------- helpers
|
|
455
|
+
|
|
456
|
+
def record_tool_rejection(self) -> None:
|
|
457
|
+
"""Called by the TUI when the human rejects a proposed edit (diff approval)."""
|
|
458
|
+
self._rejected_tools += 1
|
|
459
|
+
|
|
460
|
+
def _should_judge(self) -> bool:
|
|
461
|
+
every = self.config.judge_every
|
|
462
|
+
if every <= 0:
|
|
463
|
+
return False
|
|
464
|
+
return (self._prompts_run % every) == 0
|
|
465
|
+
|
|
466
|
+
def _last_assistant(self) -> AssistantMessage | None:
|
|
467
|
+
for m in reversed(self.state.messages):
|
|
468
|
+
if m.role == "assistant":
|
|
469
|
+
return m # type: ignore[return-value]
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _default_judge(config: HarnessConfig) -> QualityJudge:
|
|
474
|
+
"""LLMJudge when an Anthropic key is present, else a neutral ConstJudge."""
|
|
475
|
+
if os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_OAUTH_TOKEN"):
|
|
476
|
+
try:
|
|
477
|
+
from minima_harness.ai import get_model
|
|
478
|
+
|
|
479
|
+
return LLMJudge(get_model("anthropic", config.judge_model))
|
|
480
|
+
except Exception: # noqa: BLE001
|
|
481
|
+
pass
|
|
482
|
+
_log.warning(
|
|
483
|
+
"no_judge_configured_abstaining -- pass judge=LLMJudge/DeterministicJudge for real "
|
|
484
|
+
"learning; set judge_every=0 to skip judging entirely. Abstaining feeds NO quality "
|
|
485
|
+
"signal (better than a fabricated neutral 0.5 that would poison the feedback loop)."
|
|
486
|
+
)
|
|
487
|
+
return ConstJudge(None)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _classify_offline_reason(exc: BaseException, has_key: bool = True) -> tuple[str, bool]:
|
|
491
|
+
"""Why a route fell back to offline, plus whether retrying is worthwhile.
|
|
492
|
+
|
|
493
|
+
Returns ``(reason, retryable)``. ``retryable`` is False for config/auth problems where
|
|
494
|
+
``/reconnect`` won't help on its own — the user must add or fix a credential first; the
|
|
495
|
+
TUI uses this to show the actionable next step instead of a misleading "/reconnect".
|
|
496
|
+
"""
|
|
497
|
+
status = getattr(exc, "status", None)
|
|
498
|
+
if status in (401, 403):
|
|
499
|
+
if not has_key:
|
|
500
|
+
return ("no Mubit API key — add MUBIT_API_KEY via /config to enable routing", False)
|
|
501
|
+
return ("Mubit API key rejected — check MUBIT_API_KEY (/config)", False)
|
|
502
|
+
name = type(exc).__name__
|
|
503
|
+
if "Timeout" in name:
|
|
504
|
+
return ("Minima timed out", True)
|
|
505
|
+
if "Connect" in name:
|
|
506
|
+
return ("Minima unreachable", True)
|
|
507
|
+
detail = str(exc).strip().splitlines()[0] if str(exc).strip() else name
|
|
508
|
+
return (detail[:80], True)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _text_of(content: str | list[ContentBlock] | Message | list[Any]) -> str:
|
|
512
|
+
if isinstance(content, str):
|
|
513
|
+
return content
|
|
514
|
+
if isinstance(content, Message):
|
|
515
|
+
return content.text
|
|
516
|
+
if isinstance(content, list):
|
|
517
|
+
parts: list[str] = []
|
|
518
|
+
for item in content:
|
|
519
|
+
if isinstance(item, str):
|
|
520
|
+
parts.append(item)
|
|
521
|
+
elif isinstance(item, Message):
|
|
522
|
+
parts.append(item.text)
|
|
523
|
+
else:
|
|
524
|
+
parts.append(getattr(item, "text", ""))
|
|
525
|
+
return "\n".join(p for p in parts if p)
|
|
526
|
+
return str(content)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _short_label(task_text: str) -> str:
|
|
530
|
+
"""One-line label for a cost-meter row (first non-empty line, truncated)."""
|
|
531
|
+
first = (task_text.splitlines()[0] if task_text else "").strip()
|
|
532
|
+
if len(first) > 48:
|
|
533
|
+
first = first[:45] + "..."
|
|
534
|
+
return first or "(empty)"
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def _provider_of(model_id: str | None) -> str | None:
|
|
538
|
+
"""Provider name for a model id, or None if unknown/unregistered."""
|
|
539
|
+
if not model_id:
|
|
540
|
+
return None
|
|
541
|
+
from minima_harness.ai.registry import find_model_by_id
|
|
542
|
+
|
|
543
|
+
m = find_model_by_id(model_id)
|
|
544
|
+
return m.provider if m else None
|