minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Turn a raw provider error into a short, actionable, provider-aware message.
|
|
2
|
+
|
|
3
|
+
When a provider's HTTP call fails (bad/missing key, model not found, rate limit, network),
|
|
4
|
+
the provider swallows the exception into an ``ErrorEvent`` carrying an empty assistant and a
|
|
5
|
+
raw ``error_message`` (e.g. ``Client error '401 Unauthorized' for url ...``). That string is
|
|
6
|
+
useless to a user. :func:`classify_provider_error` maps it to a one-line explanation that
|
|
7
|
+
names the provider and the env var to set — so "other LLMs don't work" becomes
|
|
8
|
+
"Authentication failed for Anthropic (claude-opus-4-8) — set ANTHROPIC_API_KEY (/config)".
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_auth_error(raw: str | None) -> bool:
|
|
15
|
+
"""True when a provider's raw error is a credential failure — a bad/invalid/missing API key
|
|
16
|
+
(401, ``invalid x-api-key``, ``invalid_api_key``, ``authentication``…). Single source of truth
|
|
17
|
+
for both the user-facing banner (:func:`classify_provider_error`) and the runtime's decision to
|
|
18
|
+
blacklist a provider whose key doesn't work for the rest of the session. A schema-rejection
|
|
19
|
+
(pydantic ``extra_forbidden`` / ``are not permitted``) is NOT an auth error — exclude it so a
|
|
20
|
+
tool-schema problem never gets misread as a dead key."""
|
|
21
|
+
low = (raw or "").lower()
|
|
22
|
+
if "extra_forbidden" in low or "are not permitted" in low or "validation error" in low:
|
|
23
|
+
return False
|
|
24
|
+
return (
|
|
25
|
+
"401" in low
|
|
26
|
+
or "unauthor" in low
|
|
27
|
+
or "invalid_api_key" in low
|
|
28
|
+
or "invalid api key" in low
|
|
29
|
+
or "invalid x-api-key" in low
|
|
30
|
+
or "no api key" in low
|
|
31
|
+
or ("missing" in low and "key" in low)
|
|
32
|
+
or "authentication" in low
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def classify_provider_error(raw: str | None, model_id: str | None) -> str:
|
|
37
|
+
"""Human-readable, actionable summary of a provider failure.
|
|
38
|
+
|
|
39
|
+
``raw`` is the provider's ``error_message``; ``model_id`` is the model that failed (used
|
|
40
|
+
to name the provider and the key env var). Best-effort: unknown errors fall back to the
|
|
41
|
+
first line of ``raw``.
|
|
42
|
+
"""
|
|
43
|
+
from minima_harness.ai.provider_catalog import env_vars_for_provider, spec_for
|
|
44
|
+
from minima_harness.ai.registry import find_model_by_id
|
|
45
|
+
|
|
46
|
+
model = find_model_by_id(model_id) if model_id else None
|
|
47
|
+
provider = model.provider if model else ""
|
|
48
|
+
spec = spec_for(provider)
|
|
49
|
+
pname = spec.display_name if spec else (provider or "the provider")
|
|
50
|
+
keyvar = env_vars_for_provider(provider)[0] if provider else ""
|
|
51
|
+
where = f" running {model_id}" if model_id else ""
|
|
52
|
+
low = (raw or "").lower()
|
|
53
|
+
|
|
54
|
+
# Client-side request/schema rejection (NOT a provider auth/quota problem). Catch this
|
|
55
|
+
# first: a pydantic ValidationError's "extra_forbidden" / "is not permitted" text would
|
|
56
|
+
# otherwise match the "forbidden"/"permission" branch below and masquerade as a 403. The
|
|
57
|
+
# usual cause is a tool whose JSON schema a given model won't accept.
|
|
58
|
+
schema_hit = (
|
|
59
|
+
"validation error" in low
|
|
60
|
+
or "extra_forbidden" in low
|
|
61
|
+
or "are not permitted" in low
|
|
62
|
+
or "generatecontentconfig" in low
|
|
63
|
+
)
|
|
64
|
+
if schema_hit:
|
|
65
|
+
return (
|
|
66
|
+
f"{pname} rejected the request{where} — a tool's schema isn't accepted by this "
|
|
67
|
+
"model; pin another model (/model) or report it"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if is_auth_error(raw):
|
|
71
|
+
hint = f" — set {keyvar} (/config)" if keyvar else " — check the API key (/config)"
|
|
72
|
+
return f"Authentication failed for {pname}{where}{hint}"
|
|
73
|
+
if "402" in low or "payment required" in low or "insufficient" in low and "credit" in low:
|
|
74
|
+
return f"{pname} needs credits{where} (402) — top up billing or pick a free/cheaper model"
|
|
75
|
+
if "403" in low or "forbidden" in low or "permission" in low:
|
|
76
|
+
fix = f"check {keyvar} (/config)" if keyvar else "check the API key (/config)"
|
|
77
|
+
return (
|
|
78
|
+
f"Access denied by {pname}{where} (key lacks permission, or no quota) "
|
|
79
|
+
f"— {fix} or pin another model (/model)"
|
|
80
|
+
)
|
|
81
|
+
if "429" in low or "rate limit" in low or "rate_limit" in low or "quota" in low:
|
|
82
|
+
return f"{pname} rate-limited{where} (429) — wait a moment and retry"
|
|
83
|
+
if "404" in low or "not found" in low or "does not exist" in low or "no such model" in low:
|
|
84
|
+
return f"{pname} doesn't recognize {model_id or 'that model'} (404) — pick another model"
|
|
85
|
+
if (
|
|
86
|
+
"connect" in low
|
|
87
|
+
or "timeout" in low
|
|
88
|
+
or "timed out" in low
|
|
89
|
+
or "name or service not known" in low
|
|
90
|
+
or "getaddrinfo" in low
|
|
91
|
+
or "ssl" in low
|
|
92
|
+
):
|
|
93
|
+
return f"Couldn't reach {pname}{where} — network or endpoint problem"
|
|
94
|
+
first = (raw or "provider error").strip().splitlines()[0] if (raw or "").strip() else ""
|
|
95
|
+
first = first or "provider error"
|
|
96
|
+
return f"{pname} error{where}: {first[:160]}"
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Streaming event types emitted during assistant message generation.
|
|
2
|
+
|
|
3
|
+
A faithful port of PI's event taxonomy. Events are immutable dataclasses so they can
|
|
4
|
+
be safely fanned out to multiple subscribers. ``content_index`` associates each delta
|
|
5
|
+
or end event with its block (providers interleave deltas across text/thinking/tools).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
from minima_harness.ai.types import AssistantMessage, StopReason, ToolCall
|
|
14
|
+
|
|
15
|
+
StreamEventReason = Literal["error", "aborted"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class StartEvent:
|
|
20
|
+
"""Stream begins. ``partial`` is the initial assistant message skeleton."""
|
|
21
|
+
|
|
22
|
+
type: Literal["start"] = "start"
|
|
23
|
+
partial: AssistantMessage | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True, slots=True)
|
|
27
|
+
class TextStartEvent:
|
|
28
|
+
type: Literal["text_start"] = "text_start"
|
|
29
|
+
content_index: int = 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True, slots=True)
|
|
33
|
+
class TextDeltaEvent:
|
|
34
|
+
type: Literal["text_delta"] = "text_delta"
|
|
35
|
+
delta: str = ""
|
|
36
|
+
content_index: int = 0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True, slots=True)
|
|
40
|
+
class TextEndEvent:
|
|
41
|
+
type: Literal["text_end"] = "text_end"
|
|
42
|
+
content: str = ""
|
|
43
|
+
content_index: int = 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(frozen=True, slots=True)
|
|
47
|
+
class ThinkingStartEvent:
|
|
48
|
+
type: Literal["thinking_start"] = "thinking_start"
|
|
49
|
+
content_index: int = 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True, slots=True)
|
|
53
|
+
class ThinkingDeltaEvent:
|
|
54
|
+
type: Literal["thinking_delta"] = "thinking_delta"
|
|
55
|
+
delta: str = ""
|
|
56
|
+
content_index: int = 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(frozen=True, slots=True)
|
|
60
|
+
class ThinkingEndEvent:
|
|
61
|
+
type: Literal["thinking_end"] = "thinking_end"
|
|
62
|
+
content: str = ""
|
|
63
|
+
content_index: int = 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True, slots=True)
|
|
67
|
+
class ToolCallStartEvent:
|
|
68
|
+
type: Literal["toolcall_start"] = "toolcall_start"
|
|
69
|
+
content_index: int = 0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True, slots=True)
|
|
73
|
+
class ToolCallDeltaEvent:
|
|
74
|
+
"""Partial tool arguments (best-effort parse; fields may be missing)."""
|
|
75
|
+
|
|
76
|
+
type: Literal["toolcall_delta"] = "toolcall_delta"
|
|
77
|
+
delta: str = ""
|
|
78
|
+
content_index: int = 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(frozen=True, slots=True)
|
|
82
|
+
class ToolCallEndEvent:
|
|
83
|
+
type: Literal["toolcall_end"] = "toolcall_end"
|
|
84
|
+
tool_call: ToolCall = None # type: ignore[assignment] # set by provider
|
|
85
|
+
content_index: int = 0
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(frozen=True, slots=True)
|
|
89
|
+
class DoneEvent:
|
|
90
|
+
type: Literal["done"] = "done"
|
|
91
|
+
reason: StopReason = "stop"
|
|
92
|
+
message: AssistantMessage = None # type: ignore[assignment] # set by provider
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass(frozen=True, slots=True)
|
|
96
|
+
class ErrorEvent:
|
|
97
|
+
"""Emitted on provider error or abort. ``error`` carries partial content."""
|
|
98
|
+
|
|
99
|
+
type: Literal["error"] = "error"
|
|
100
|
+
reason: StreamEventReason = "error"
|
|
101
|
+
error: AssistantMessage = None # type: ignore[assignment] # set by provider
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
Event = (
|
|
105
|
+
StartEvent
|
|
106
|
+
| TextStartEvent
|
|
107
|
+
| TextDeltaEvent
|
|
108
|
+
| TextEndEvent
|
|
109
|
+
| ThinkingStartEvent
|
|
110
|
+
| ThinkingDeltaEvent
|
|
111
|
+
| ThinkingEndEvent
|
|
112
|
+
| ToolCallStartEvent
|
|
113
|
+
| ToolCallDeltaEvent
|
|
114
|
+
| ToolCallEndEvent
|
|
115
|
+
| DoneEvent
|
|
116
|
+
| ErrorEvent
|
|
117
|
+
)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Live OpenRouter catalog: one ``OPENROUTER_API_KEY`` → all of OpenRouter's models.
|
|
2
|
+
|
|
3
|
+
OpenRouter is an aggregator — its value is *one key, hundreds of upstream models*. Hardcoding a
|
|
4
|
+
handful of ids (as the static catalog does) throws that away and drifts out of date. This module
|
|
5
|
+
fetches OpenRouter's authoritative ``GET /api/v1/models`` list, parses each entry into a harness
|
|
6
|
+
:class:`~minima_harness.ai.types.Model` (id, live pricing, context window, modalities, reasoning),
|
|
7
|
+
and registers them so any OpenRouter model is callable, pinnable, and routable.
|
|
8
|
+
|
|
9
|
+
It is **offline-safe and fast**: the response is cached to ``~/.minima-harness/cache`` with a TTL,
|
|
10
|
+
so only the first run (or a stale cache) touches the network, and a fetch failure falls back to the
|
|
11
|
+
cache, then to the static curated set — startup never blocks or breaks on a network hiccup.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import time
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import httpx
|
|
22
|
+
|
|
23
|
+
from minima_harness.ai.types import Modality, Model, ModelCost
|
|
24
|
+
|
|
25
|
+
_log = logging.getLogger("minima_harness.ai.openrouter")
|
|
26
|
+
|
|
27
|
+
_MODELS_URL = "https://openrouter.ai/api/v1/models"
|
|
28
|
+
_BASE_URL = "https://openrouter.ai/api/v1"
|
|
29
|
+
_TEXT = (Modality.text,)
|
|
30
|
+
_MM = (Modality.text, Modality.image)
|
|
31
|
+
_CACHE_TTL_S = 24 * 3600 # refresh at most once a day
|
|
32
|
+
_DEFAULT_MAX_TOKENS = 8192
|
|
33
|
+
_FETCH_TIMEOUT_S = 12.0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _cache_path() -> Path:
|
|
37
|
+
return Path.home() / ".minima-harness" / "cache" / "openrouter_models.json"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _to_model(entry: dict) -> Model | None:
|
|
41
|
+
"""Parse one OpenRouter /models entry into a harness Model (None to skip non-chat models)."""
|
|
42
|
+
mid = entry.get("id")
|
|
43
|
+
if not mid:
|
|
44
|
+
return None
|
|
45
|
+
arch = entry.get("architecture") or {}
|
|
46
|
+
out_mods = arch.get("output_modalities") or ["text"]
|
|
47
|
+
if "text" not in out_mods: # skip embedding / image-gen / audio-only models
|
|
48
|
+
return None
|
|
49
|
+
in_mods = arch.get("input_modalities") or ["text"]
|
|
50
|
+
pricing = entry.get("pricing") or {}
|
|
51
|
+
# OpenRouter prices are USD per token (strings); the harness stores USD per 1M tokens.
|
|
52
|
+
try:
|
|
53
|
+
cost_in = float(pricing.get("prompt") or 0.0) * 1_000_000
|
|
54
|
+
cost_out = float(pricing.get("completion") or 0.0) * 1_000_000
|
|
55
|
+
except (TypeError, ValueError):
|
|
56
|
+
cost_in = cost_out = 0.0
|
|
57
|
+
top = entry.get("top_provider") or {}
|
|
58
|
+
max_out = int(top.get("max_completion_tokens") or 0) or _DEFAULT_MAX_TOKENS
|
|
59
|
+
supported = entry.get("supported_parameters") or []
|
|
60
|
+
reasoning = "reasoning" in supported or "include_reasoning" in supported
|
|
61
|
+
return Model(
|
|
62
|
+
id=mid,
|
|
63
|
+
provider="openrouter",
|
|
64
|
+
api="openai-completions",
|
|
65
|
+
name=entry.get("name") or mid,
|
|
66
|
+
cost=ModelCost(input=cost_in, output=cost_out),
|
|
67
|
+
context_window=int(entry.get("context_length") or 128_000),
|
|
68
|
+
max_tokens=min(max_out, 32_768),
|
|
69
|
+
input=_MM if "image" in in_mods else _TEXT,
|
|
70
|
+
reasoning=reasoning,
|
|
71
|
+
base_url=_BASE_URL,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _parse_payload(payload: dict) -> list[Model]:
|
|
76
|
+
data = payload.get("data") or []
|
|
77
|
+
out: list[Model] = []
|
|
78
|
+
for entry in data:
|
|
79
|
+
model = _to_model(entry)
|
|
80
|
+
if model is not None:
|
|
81
|
+
out.append(model)
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _read_cache(*, max_age_s: float | None) -> list[Model] | None:
|
|
86
|
+
path = _cache_path()
|
|
87
|
+
if not path.is_file():
|
|
88
|
+
return None
|
|
89
|
+
if max_age_s is not None and (time.time() - path.stat().st_mtime) > max_age_s:
|
|
90
|
+
return None
|
|
91
|
+
try:
|
|
92
|
+
return _parse_payload(json.loads(path.read_text()))
|
|
93
|
+
except Exception: # noqa: BLE001 - a corrupt cache is just a miss
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _write_cache(payload: dict) -> None:
|
|
98
|
+
path = _cache_path()
|
|
99
|
+
try:
|
|
100
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
path.write_text(json.dumps(payload))
|
|
102
|
+
except Exception: # noqa: BLE001 - caching is best-effort
|
|
103
|
+
_log.debug("openrouter_cache_write_failed", exc_info=True)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def fetch_openrouter_models(
|
|
107
|
+
api_key: str | None = None,
|
|
108
|
+
*,
|
|
109
|
+
timeout: float = _FETCH_TIMEOUT_S,
|
|
110
|
+
ttl_s: float = _CACHE_TTL_S,
|
|
111
|
+
force: bool = False,
|
|
112
|
+
) -> list[Model]:
|
|
113
|
+
"""OpenRouter's model list as harness Models. Cache-first, network-second, never raises.
|
|
114
|
+
|
|
115
|
+
Resolution order: fresh disk cache → live fetch (then cache it) → stale cache → ``[]``.
|
|
116
|
+
Returning ``[]`` lets the caller keep the static curated OpenRouter set as a last resort.
|
|
117
|
+
"""
|
|
118
|
+
if not force:
|
|
119
|
+
cached = _read_cache(max_age_s=ttl_s)
|
|
120
|
+
if cached:
|
|
121
|
+
return cached
|
|
122
|
+
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
|
123
|
+
try:
|
|
124
|
+
resp = httpx.get(_MODELS_URL, headers=headers, timeout=timeout)
|
|
125
|
+
resp.raise_for_status()
|
|
126
|
+
payload = resp.json()
|
|
127
|
+
models = _parse_payload(payload)
|
|
128
|
+
if models:
|
|
129
|
+
_write_cache(payload)
|
|
130
|
+
return models
|
|
131
|
+
except Exception: # noqa: BLE001 - degrade to cache; the harness must still start
|
|
132
|
+
_log.debug("openrouter_models_fetch_failed", exc_info=True)
|
|
133
|
+
stale = _read_cache(max_age_s=None) # any age beats nothing
|
|
134
|
+
return stale or []
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def register_openrouter_models(api_key: str | None = None) -> int:
|
|
138
|
+
"""Register OpenRouter's live model catalog into the harness registry. Returns the count.
|
|
139
|
+
|
|
140
|
+
No-op-safe: on a fetch failure with no cache, returns 0 and the static curated OpenRouter
|
|
141
|
+
models registered by :func:`register_catalog_models` remain in place.
|
|
142
|
+
"""
|
|
143
|
+
import os
|
|
144
|
+
|
|
145
|
+
from minima_harness.ai.registry import register_model
|
|
146
|
+
|
|
147
|
+
key = api_key or os.environ.get("OPENROUTER_API_KEY")
|
|
148
|
+
models = fetch_openrouter_models(key)
|
|
149
|
+
for model in models:
|
|
150
|
+
register_model(model)
|
|
151
|
+
if models:
|
|
152
|
+
_log.debug("registered %d openrouter models (live catalog)", len(models))
|
|
153
|
+
return len(models)
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""Provider catalog: where each LLM provider lives and which models it serves.
|
|
2
|
+
|
|
3
|
+
The single source of truth for multi-provider integration. Almost every provider speaks
|
|
4
|
+
the OpenAI Chat-Completions protocol (``POST {base_url}/chat/completions``), so the generic
|
|
5
|
+
:mod:`~minima_harness.ai.providers.openai_compat` provider can call all of them given a
|
|
6
|
+
``base_url`` and the right API-key env var — that mapping lives here.
|
|
7
|
+
|
|
8
|
+
Three things the rest of the harness reads from this module:
|
|
9
|
+
- :func:`env_vars_for_provider` — provider id -> the env vars that supply its key. Used by
|
|
10
|
+
``openai_compat`` (resolve the call key for *this* model's provider) and by the offline
|
|
11
|
+
fallback / candidate gating (is a model runnable with the configured keys?).
|
|
12
|
+
- :func:`register_catalog_models` — register a curated, current set of models for every
|
|
13
|
+
provider whose key is configured (lean: a user only sees models they can actually run).
|
|
14
|
+
- :data:`PROVIDERS` / :func:`config_providers` — drive the ``minima config`` provider section.
|
|
15
|
+
|
|
16
|
+
Model ids + pricing were verified against each provider's official docs (June 2026). Native
|
|
17
|
+
Anthropic / Google models stay on their own SDK providers; everything else is OpenAI-compatible.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
|
|
25
|
+
from minima_harness.ai.types import ApiId, Modality, Model, ModelCost
|
|
26
|
+
|
|
27
|
+
_TEXT = (Modality.text,)
|
|
28
|
+
_MM = (Modality.text, Modality.image)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True, slots=True)
|
|
32
|
+
class ProviderSpec:
|
|
33
|
+
"""How to reach a provider + which key env vars supply its credential."""
|
|
34
|
+
|
|
35
|
+
name: str
|
|
36
|
+
display_name: str
|
|
37
|
+
category: str # closed-native | aggregator | open-source-host | local-runtime
|
|
38
|
+
api: ApiId
|
|
39
|
+
env_vars: tuple[str, ...]
|
|
40
|
+
base_url: str | None = None # OpenAI-compat base; None = native (anthropic/google) or OpenAI
|
|
41
|
+
requires_key: bool = True
|
|
42
|
+
show_in_config: bool = False # surface as a field in `minima config`
|
|
43
|
+
blurb: str = ""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(frozen=True, slots=True)
|
|
47
|
+
class ModelSpec:
|
|
48
|
+
id: str
|
|
49
|
+
name: str
|
|
50
|
+
input: float # USD / 1M input tokens
|
|
51
|
+
output: float # USD / 1M output tokens
|
|
52
|
+
context_window: int = 128_000
|
|
53
|
+
max_tokens: int = 8192
|
|
54
|
+
reasoning: bool = False
|
|
55
|
+
multimodal: bool = False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# --------------------------------------------------------------------------- providers
|
|
59
|
+
# env_vars order = resolution order (first set wins). Native providers carry base_url=None.
|
|
60
|
+
PROVIDERS: tuple[ProviderSpec, ...] = (
|
|
61
|
+
# --- closed-source / proprietary frontier APIs ---
|
|
62
|
+
ProviderSpec("anthropic", "Anthropic (Claude)", "closed-native", "anthropic-messages",
|
|
63
|
+
("ANTHROPIC_API_KEY", "ANTHROPIC_OAUTH_TOKEN"), None, True, True,
|
|
64
|
+
"Claude — Opus / Sonnet / Haiku"),
|
|
65
|
+
ProviderSpec("openai", "OpenAI", "closed-native", "openai-completions",
|
|
66
|
+
("OPENAI_API_KEY",), None, True, True, "GPT-5.x / GPT-4o"),
|
|
67
|
+
ProviderSpec("google", "Google Gemini", "closed-native", "google-generative-ai",
|
|
68
|
+
("GEMINI_API_KEY", "GOOGLE_API_KEY", "GOOGLE_GENAI_API_KEY"), None, True, True,
|
|
69
|
+
"Gemini 2.5 / 3.5"),
|
|
70
|
+
ProviderSpec("xai", "xAI (Grok)", "closed-native", "openai-completions",
|
|
71
|
+
("XAI_API_KEY",), "https://api.x.ai/v1", True, True, "Grok 4.x"),
|
|
72
|
+
ProviderSpec("deepseek", "DeepSeek", "closed-native", "openai-completions",
|
|
73
|
+
("DEEPSEEK_API_KEY",), "https://api.deepseek.com", True, True,
|
|
74
|
+
"DeepSeek V4 (open-weight, cheap)"),
|
|
75
|
+
ProviderSpec("mistral", "Mistral AI", "closed-native", "openai-completions",
|
|
76
|
+
("MISTRAL_API_KEY",), "https://api.mistral.ai/v1", True, True,
|
|
77
|
+
"Mistral Large / Codestral"),
|
|
78
|
+
ProviderSpec("cohere", "Cohere", "closed-native", "openai-completions",
|
|
79
|
+
("COHERE_API_KEY", "CO_API_KEY"), "https://api.cohere.ai/compatibility/v1",
|
|
80
|
+
True, False, "Command A / R"),
|
|
81
|
+
ProviderSpec("perplexity", "Perplexity (Sonar)", "closed-native", "openai-completions",
|
|
82
|
+
("PERPLEXITY_API_KEY",), "https://api.perplexity.ai", True, False,
|
|
83
|
+
"Sonar — web-grounded"),
|
|
84
|
+
# --- aggregator (one key, many upstream models) ---
|
|
85
|
+
ProviderSpec("openrouter", "OpenRouter", "aggregator", "openai-completions",
|
|
86
|
+
("OPENROUTER_API_KEY",), "https://openrouter.ai/api/v1", True, True,
|
|
87
|
+
"one key, 100s of open + closed models"),
|
|
88
|
+
# --- open-source / open-weight inference hosts ---
|
|
89
|
+
ProviderSpec("groq", "Groq", "open-source-host", "openai-completions",
|
|
90
|
+
("GROQ_API_KEY",), "https://api.groq.com/openai/v1", True, True,
|
|
91
|
+
"very fast open-weight inference"),
|
|
92
|
+
ProviderSpec("together", "Together AI", "open-source-host", "openai-completions",
|
|
93
|
+
("TOGETHER_API_KEY",), "https://api.together.ai/v1", True, True,
|
|
94
|
+
"Llama / Qwen / DeepSeek open-weight"),
|
|
95
|
+
ProviderSpec("fireworks", "Fireworks AI", "open-source-host", "openai-completions",
|
|
96
|
+
("FIREWORKS_API_KEY",), "https://api.fireworks.ai/inference/v1", True, False,
|
|
97
|
+
"open-weight inference"),
|
|
98
|
+
ProviderSpec("deepinfra", "DeepInfra", "open-source-host", "openai-completions",
|
|
99
|
+
("DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"), "https://api.deepinfra.com/v1/openai",
|
|
100
|
+
True, False, "open-weight inference"),
|
|
101
|
+
ProviderSpec("cerebras", "Cerebras", "open-source-host", "openai-completions",
|
|
102
|
+
("CEREBRAS_API_KEY",), "https://api.cerebras.ai/v1", True, False,
|
|
103
|
+
"wafer-scale fast inference"),
|
|
104
|
+
ProviderSpec("hyperbolic", "Hyperbolic", "open-source-host", "openai-completions",
|
|
105
|
+
("HYPERBOLIC_API_KEY",), "https://api.hyperbolic.xyz/v1", True, False,
|
|
106
|
+
"open-weight inference"),
|
|
107
|
+
ProviderSpec("novita", "Novita AI", "open-source-host", "openai-completions",
|
|
108
|
+
("NOVITA_API_KEY",), "https://api.novita.ai/openai", True, False,
|
|
109
|
+
"open-weight inference"),
|
|
110
|
+
# --- local / self-hosted runtimes (no key; model ids are whatever you loaded) ---
|
|
111
|
+
ProviderSpec("ollama", "Ollama (local)", "local-runtime", "openai-completions",
|
|
112
|
+
("OLLAMA_API_KEY",), "http://localhost:11434/v1", False, False,
|
|
113
|
+
"local models via Ollama"),
|
|
114
|
+
ProviderSpec("lmstudio", "LM Studio (local)", "local-runtime", "openai-completions",
|
|
115
|
+
("LMSTUDIO_API_KEY",), "http://localhost:1234/v1", False, False,
|
|
116
|
+
"local models via LM Studio"),
|
|
117
|
+
ProviderSpec("vllm", "vLLM (local)", "local-runtime", "openai-completions",
|
|
118
|
+
("VLLM_API_KEY",), "http://localhost:8000/v1", False, False, "self-hosted vLLM"),
|
|
119
|
+
ProviderSpec("llamacpp", "llama.cpp (local)", "local-runtime", "openai-completions",
|
|
120
|
+
("LLAMA_API_KEY",), "http://localhost:8080/v1", False, False,
|
|
121
|
+
"self-hosted llama-server"),
|
|
122
|
+
ProviderSpec("localai", "LocalAI (local)", "local-runtime", "openai-completions",
|
|
123
|
+
("LOCALAI_API_KEY",), "http://localhost:8080/v1", False, False,
|
|
124
|
+
"self-hosted LocalAI"),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
_BY_NAME: dict[str, ProviderSpec] = {p.name: p for p in PROVIDERS}
|
|
128
|
+
|
|
129
|
+
# Generic fallback env vars for an unknown/custom provider (e.g. a models.json "openai-compat"
|
|
130
|
+
# entry). Lets a hand-rolled OpenAI-compatible endpoint resolve a key.
|
|
131
|
+
_GENERIC_ENV_VARS: tuple[str, ...] = ("OPENAI_COMPAT_API_KEY", "OPENAI_API_KEY")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# --------------------------------------------------------------------------- curated models
|
|
135
|
+
# Verified current ids + USD/1M pricing (official docs, June 2026). The native anthropic/google
|
|
136
|
+
# /openai base set is seeded in registry.py; here we ADD the multi-provider catalog. Models are
|
|
137
|
+
# only registered for a provider once its key is configured (see register_catalog_models).
|
|
138
|
+
CATALOG_MODELS: dict[str, list[ModelSpec]] = {
|
|
139
|
+
"openai": [
|
|
140
|
+
ModelSpec("gpt-5.4-nano", "GPT-5.4 nano", 0.20, 1.25, 400_000, 16_384, multimodal=True),
|
|
141
|
+
ModelSpec("gpt-5.4-mini", "GPT-5.4 mini", 0.75, 4.50, 400_000, 16_384, multimodal=True),
|
|
142
|
+
ModelSpec("gpt-5.4", "GPT-5.4", 2.50, 15.0, 1_050_000, 16_384, True, True),
|
|
143
|
+
ModelSpec("gpt-5.5", "GPT-5.5", 5.0, 30.0, 1_050_000, 16_384, True, True),
|
|
144
|
+
],
|
|
145
|
+
"google": [
|
|
146
|
+
ModelSpec("gemini-2.5-flash-lite", "Gemini 2.5 Flash-Lite", 0.10, 0.40,
|
|
147
|
+
1_048_576, 8192, multimodal=True),
|
|
148
|
+
ModelSpec("gemini-3.5-flash", "Gemini 3.5 Flash", 1.50, 9.0, 1_048_576, 8192, True, True),
|
|
149
|
+
],
|
|
150
|
+
"xai": [
|
|
151
|
+
ModelSpec("grok-build-0.1", "Grok Build 0.1 (coding)", 1.0, 2.0, 256_000, 16_384, True),
|
|
152
|
+
ModelSpec("grok-4.3", "Grok 4.3", 1.25, 2.50, 1_000_000, 16_384, True),
|
|
153
|
+
],
|
|
154
|
+
"deepseek": [
|
|
155
|
+
ModelSpec("deepseek-v4-flash", "DeepSeek V4 Flash", 0.14, 0.28, 1_000_000, 16_384, True),
|
|
156
|
+
ModelSpec("deepseek-v4-pro", "DeepSeek V4 Pro", 0.435, 0.87, 1_000_000, 16_384, True),
|
|
157
|
+
],
|
|
158
|
+
"mistral": [
|
|
159
|
+
ModelSpec("mistral-small-latest", "Mistral Small 4", 0.15, 0.60, 128_000, 8192),
|
|
160
|
+
ModelSpec("codestral-latest", "Codestral (code)", 0.30, 0.90, 256_000, 16_384),
|
|
161
|
+
ModelSpec("mistral-medium-latest", "Mistral Medium 3.5", 1.50, 7.50, 128_000, 8192),
|
|
162
|
+
],
|
|
163
|
+
"cohere": [
|
|
164
|
+
ModelSpec("command-r-08-2024", "Command R (08-2024)", 0.15, 0.60, 128_000, 4096),
|
|
165
|
+
ModelSpec("command-a-03-2025", "Command A (03-2025)", 2.50, 10.0, 256_000, 8192),
|
|
166
|
+
],
|
|
167
|
+
"perplexity": [
|
|
168
|
+
ModelSpec("sonar", "Sonar (web-grounded)", 1.0, 1.0, 128_000, 8192),
|
|
169
|
+
ModelSpec("sonar-pro", "Sonar Pro (web-grounded)", 3.0, 15.0, 200_000, 8192),
|
|
170
|
+
],
|
|
171
|
+
"openrouter": [
|
|
172
|
+
ModelSpec("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B (OpenRouter)",
|
|
173
|
+
0.10, 0.32, 131_072, 8192),
|
|
174
|
+
ModelSpec("deepseek/deepseek-chat-v3.1", "DeepSeek V3.1 (OpenRouter)",
|
|
175
|
+
0.21, 0.79, 163_840, 8192, True),
|
|
176
|
+
ModelSpec("qwen/qwen3-235b-a22b", "Qwen3 235B (OpenRouter)", 0.455, 1.82, 131_072, 8192),
|
|
177
|
+
ModelSpec("meta-llama/llama-4-maverick", "Llama 4 Maverick (OpenRouter)",
|
|
178
|
+
0.15, 0.60, 1_048_576, 8192, multimodal=True),
|
|
179
|
+
],
|
|
180
|
+
"groq": [
|
|
181
|
+
ModelSpec("llama-3.1-8b-instant", "Llama 3.1 8B Instant (Groq)", 0.05, 0.08, 131_072, 8192),
|
|
182
|
+
ModelSpec("openai/gpt-oss-120b", "GPT-OSS 120B (Groq)", 0.15, 0.60, 131_072, 8192, True),
|
|
183
|
+
ModelSpec("llama-3.3-70b-versatile", "Llama 3.3 70B (Groq)", 0.59, 0.79, 131_072, 8192),
|
|
184
|
+
],
|
|
185
|
+
"together": [
|
|
186
|
+
ModelSpec("openai/gpt-oss-120b", "GPT-OSS 120B (Together)",
|
|
187
|
+
0.15, 0.60, 131_072, 8192, True),
|
|
188
|
+
ModelSpec("Qwen/Qwen3-235B-A22B-Instruct-2507-tput", "Qwen3 235B (Together)",
|
|
189
|
+
0.20, 0.60, 262_144, 8192),
|
|
190
|
+
ModelSpec("meta-llama/Llama-3.3-70B-Instruct-Turbo", "Llama 3.3 70B (Together)",
|
|
191
|
+
1.04, 1.04, 131_072, 8192),
|
|
192
|
+
],
|
|
193
|
+
"fireworks": [
|
|
194
|
+
ModelSpec("accounts/fireworks/models/gpt-oss-120b", "GPT-OSS 120B (Fireworks)",
|
|
195
|
+
0.15, 0.60, 131_072, 8192, True),
|
|
196
|
+
ModelSpec("accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
|
|
197
|
+
"Qwen3 235B (Fireworks)", 0.22, 0.88, 262_144, 8192),
|
|
198
|
+
],
|
|
199
|
+
"deepinfra": [
|
|
200
|
+
ModelSpec("meta-llama/Llama-3.3-70B-Instruct-Turbo", "Llama 3.3 70B (DeepInfra)",
|
|
201
|
+
0.10, 0.32, 131_072, 8192),
|
|
202
|
+
ModelSpec("deepseek-ai/DeepSeek-V4-Flash", "DeepSeek V4 Flash (DeepInfra)",
|
|
203
|
+
0.10, 0.20, 1_000_000, 16_384, True),
|
|
204
|
+
],
|
|
205
|
+
"cerebras": [
|
|
206
|
+
ModelSpec("gpt-oss-120b", "GPT-OSS 120B (Cerebras)", 0.35, 0.75, 131_072, 8192, True),
|
|
207
|
+
],
|
|
208
|
+
"hyperbolic": [
|
|
209
|
+
ModelSpec("deepseek-ai/DeepSeek-V3", "DeepSeek V3 (Hyperbolic)", 0.25, 0.25, 131_072, 8192),
|
|
210
|
+
ModelSpec("meta-llama/Llama-3.3-70B-Instruct", "Llama 3.3 70B (Hyperbolic)",
|
|
211
|
+
0.40, 0.40, 131_072, 8192),
|
|
212
|
+
],
|
|
213
|
+
"novita": [
|
|
214
|
+
ModelSpec("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B (Novita)",
|
|
215
|
+
0.135, 0.40, 131_072, 8192),
|
|
216
|
+
ModelSpec("deepseek/deepseek-v3", "DeepSeek V3 (Novita)", 0.27, 1.12, 163_840, 8192),
|
|
217
|
+
],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# --------------------------------------------------------------------------- helpers
|
|
222
|
+
def spec_for(provider: str) -> ProviderSpec | None:
|
|
223
|
+
return _BY_NAME.get(provider.lower())
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def env_vars_for_provider(provider: str) -> tuple[str, ...]:
|
|
227
|
+
"""Env vars that supply ``provider``'s API key (resolution order). Unknown/custom
|
|
228
|
+
providers fall back to the generic OpenAI-compat vars so a models.json entry still works."""
|
|
229
|
+
spec = _BY_NAME.get(provider.lower())
|
|
230
|
+
return spec.env_vars if spec else _GENERIC_ENV_VARS
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def provider_key_present(provider: str) -> bool:
|
|
234
|
+
"""True if a key for ``provider`` is set, or it needs none (local runtime)."""
|
|
235
|
+
spec = _BY_NAME.get(provider.lower())
|
|
236
|
+
if spec is not None and not spec.requires_key:
|
|
237
|
+
return True
|
|
238
|
+
return any(os.environ.get(v) for v in env_vars_for_provider(provider))
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def configured_providers() -> list[str]:
|
|
242
|
+
"""Provider ids whose key is currently configured (in resolution-order env)."""
|
|
243
|
+
return [p.name for p in PROVIDERS if p.requires_key and provider_key_present(p.name)]
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def config_providers() -> list[ProviderSpec]:
|
|
247
|
+
"""Providers surfaced as fields in ``minima config`` (curated, popular subset)."""
|
|
248
|
+
return [p for p in PROVIDERS if p.show_in_config]
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def runnable_candidates(candidate_ids: list[str]) -> list[str]:
|
|
252
|
+
"""Filter routing candidates to models whose provider key is configured, so Minima is
|
|
253
|
+
never asked to choose a model the user cannot call. Unknown ids are kept (Minima may know
|
|
254
|
+
them). Returns the original list if none are runnable, so routing still yields a clear
|
|
255
|
+
auth error rather than an empty candidate set."""
|
|
256
|
+
from minima_harness.ai.registry import find_model_by_id
|
|
257
|
+
|
|
258
|
+
out: list[str] = []
|
|
259
|
+
for cid in candidate_ids:
|
|
260
|
+
model = find_model_by_id(cid)
|
|
261
|
+
if model is None or provider_key_present(model.provider):
|
|
262
|
+
out.append(cid)
|
|
263
|
+
return out or list(candidate_ids)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _to_model(provider: str, spec: ProviderSpec, m: ModelSpec) -> Model:
|
|
267
|
+
return Model(
|
|
268
|
+
id=m.id,
|
|
269
|
+
provider=provider,
|
|
270
|
+
api=spec.api,
|
|
271
|
+
name=m.name,
|
|
272
|
+
cost=ModelCost(input=m.input, output=m.output),
|
|
273
|
+
context_window=m.context_window,
|
|
274
|
+
max_tokens=m.max_tokens,
|
|
275
|
+
input=_MM if m.multimodal else _TEXT,
|
|
276
|
+
reasoning=m.reasoning,
|
|
277
|
+
base_url=spec.base_url,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def register_catalog_models(*, present_keys_only: bool = True) -> list[str]:
|
|
282
|
+
"""Register the curated catalog models, by default only for providers whose key is set.
|
|
283
|
+
|
|
284
|
+
Keeps the registry (and the model picker) lean: a user sees models they can actually run.
|
|
285
|
+
Returns the list of provider ids that were registered. The native anthropic/google/openai
|
|
286
|
+
base models are seeded separately in :mod:`registry` and are unaffected."""
|
|
287
|
+
from minima_harness.ai.registry import register_model
|
|
288
|
+
|
|
289
|
+
registered: list[str] = []
|
|
290
|
+
for provider, models in CATALOG_MODELS.items():
|
|
291
|
+
spec = _BY_NAME.get(provider)
|
|
292
|
+
if spec is None:
|
|
293
|
+
continue
|
|
294
|
+
if present_keys_only and spec.requires_key and not provider_key_present(provider):
|
|
295
|
+
continue
|
|
296
|
+
for m in models:
|
|
297
|
+
register_model(_to_model(provider, spec, m))
|
|
298
|
+
registered.append(provider)
|
|
299
|
+
return registered
|