minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
@@ -0,0 +1,96 @@
1
+ """Turn a raw provider error into a short, actionable, provider-aware message.
2
+
3
+ When a provider's HTTP call fails (bad/missing key, model not found, rate limit, network),
4
+ the provider swallows the exception into an ``ErrorEvent`` carrying an empty assistant and a
5
+ raw ``error_message`` (e.g. ``Client error '401 Unauthorized' for url ...``). That string is
6
+ useless to a user. :func:`classify_provider_error` maps it to a one-line explanation that
7
+ names the provider and the env var to set — so "other LLMs don't work" becomes
8
+ "Authentication failed for Anthropic (claude-opus-4-8) — set ANTHROPIC_API_KEY (/config)".
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+
14
+ def is_auth_error(raw: str | None) -> bool:
15
+ """True when a provider's raw error is a credential failure — a bad/invalid/missing API key
16
+ (401, ``invalid x-api-key``, ``invalid_api_key``, ``authentication``…). Single source of truth
17
+ for both the user-facing banner (:func:`classify_provider_error`) and the runtime's decision to
18
+ blacklist a provider whose key doesn't work for the rest of the session. A schema-rejection
19
+ (pydantic ``extra_forbidden`` / ``are not permitted``) is NOT an auth error — exclude it so a
20
+ tool-schema problem never gets misread as a dead key."""
21
+ low = (raw or "").lower()
22
+ if "extra_forbidden" in low or "are not permitted" in low or "validation error" in low:
23
+ return False
24
+ return (
25
+ "401" in low
26
+ or "unauthor" in low
27
+ or "invalid_api_key" in low
28
+ or "invalid api key" in low
29
+ or "invalid x-api-key" in low
30
+ or "no api key" in low
31
+ or ("missing" in low and "key" in low)
32
+ or "authentication" in low
33
+ )
34
+
35
+
36
+ def classify_provider_error(raw: str | None, model_id: str | None) -> str:
37
+ """Human-readable, actionable summary of a provider failure.
38
+
39
+ ``raw`` is the provider's ``error_message``; ``model_id`` is the model that failed (used
40
+ to name the provider and the key env var). Best-effort: unknown errors fall back to the
41
+ first line of ``raw``.
42
+ """
43
+ from minima_harness.ai.provider_catalog import env_vars_for_provider, spec_for
44
+ from minima_harness.ai.registry import find_model_by_id
45
+
46
+ model = find_model_by_id(model_id) if model_id else None
47
+ provider = model.provider if model else ""
48
+ spec = spec_for(provider)
49
+ pname = spec.display_name if spec else (provider or "the provider")
50
+ keyvar = env_vars_for_provider(provider)[0] if provider else ""
51
+ where = f" running {model_id}" if model_id else ""
52
+ low = (raw or "").lower()
53
+
54
+ # Client-side request/schema rejection (NOT a provider auth/quota problem). Catch this
55
+ # first: a pydantic ValidationError's "extra_forbidden" / "is not permitted" text would
56
+ # otherwise match the "forbidden"/"permission" branch below and masquerade as a 403. The
57
+ # usual cause is a tool whose JSON schema a given model won't accept.
58
+ schema_hit = (
59
+ "validation error" in low
60
+ or "extra_forbidden" in low
61
+ or "are not permitted" in low
62
+ or "generatecontentconfig" in low
63
+ )
64
+ if schema_hit:
65
+ return (
66
+ f"{pname} rejected the request{where} — a tool's schema isn't accepted by this "
67
+ "model; pin another model (/model) or report it"
68
+ )
69
+
70
+ if is_auth_error(raw):
71
+ hint = f" — set {keyvar} (/config)" if keyvar else " — check the API key (/config)"
72
+ return f"Authentication failed for {pname}{where}{hint}"
73
+ if "402" in low or "payment required" in low or "insufficient" in low and "credit" in low:
74
+ return f"{pname} needs credits{where} (402) — top up billing or pick a free/cheaper model"
75
+ if "403" in low or "forbidden" in low or "permission" in low:
76
+ fix = f"check {keyvar} (/config)" if keyvar else "check the API key (/config)"
77
+ return (
78
+ f"Access denied by {pname}{where} (key lacks permission, or no quota) "
79
+ f"— {fix} or pin another model (/model)"
80
+ )
81
+ if "429" in low or "rate limit" in low or "rate_limit" in low or "quota" in low:
82
+ return f"{pname} rate-limited{where} (429) — wait a moment and retry"
83
+ if "404" in low or "not found" in low or "does not exist" in low or "no such model" in low:
84
+ return f"{pname} doesn't recognize {model_id or 'that model'} (404) — pick another model"
85
+ if (
86
+ "connect" in low
87
+ or "timeout" in low
88
+ or "timed out" in low
89
+ or "name or service not known" in low
90
+ or "getaddrinfo" in low
91
+ or "ssl" in low
92
+ ):
93
+ return f"Couldn't reach {pname}{where} — network or endpoint problem"
94
+ first = (raw or "provider error").strip().splitlines()[0] if (raw or "").strip() else ""
95
+ first = first or "provider error"
96
+ return f"{pname} error{where}: {first[:160]}"
@@ -0,0 +1,117 @@
1
+ """Streaming event types emitted during assistant message generation.
2
+
3
+ A faithful port of PI's event taxonomy. Events are immutable dataclasses so they can
4
+ be safely fanned out to multiple subscribers. ``content_index`` associates each delta
5
+ or end event with its block (providers interleave deltas across text/thinking/tools).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Literal
12
+
13
+ from minima_harness.ai.types import AssistantMessage, StopReason, ToolCall
14
+
15
+ StreamEventReason = Literal["error", "aborted"]
16
+
17
+
18
+ @dataclass(frozen=True, slots=True)
19
+ class StartEvent:
20
+ """Stream begins. ``partial`` is the initial assistant message skeleton."""
21
+
22
+ type: Literal["start"] = "start"
23
+ partial: AssistantMessage | None = None
24
+
25
+
26
+ @dataclass(frozen=True, slots=True)
27
+ class TextStartEvent:
28
+ type: Literal["text_start"] = "text_start"
29
+ content_index: int = 0
30
+
31
+
32
+ @dataclass(frozen=True, slots=True)
33
+ class TextDeltaEvent:
34
+ type: Literal["text_delta"] = "text_delta"
35
+ delta: str = ""
36
+ content_index: int = 0
37
+
38
+
39
+ @dataclass(frozen=True, slots=True)
40
+ class TextEndEvent:
41
+ type: Literal["text_end"] = "text_end"
42
+ content: str = ""
43
+ content_index: int = 0
44
+
45
+
46
+ @dataclass(frozen=True, slots=True)
47
+ class ThinkingStartEvent:
48
+ type: Literal["thinking_start"] = "thinking_start"
49
+ content_index: int = 0
50
+
51
+
52
+ @dataclass(frozen=True, slots=True)
53
+ class ThinkingDeltaEvent:
54
+ type: Literal["thinking_delta"] = "thinking_delta"
55
+ delta: str = ""
56
+ content_index: int = 0
57
+
58
+
59
+ @dataclass(frozen=True, slots=True)
60
+ class ThinkingEndEvent:
61
+ type: Literal["thinking_end"] = "thinking_end"
62
+ content: str = ""
63
+ content_index: int = 0
64
+
65
+
66
+ @dataclass(frozen=True, slots=True)
67
+ class ToolCallStartEvent:
68
+ type: Literal["toolcall_start"] = "toolcall_start"
69
+ content_index: int = 0
70
+
71
+
72
+ @dataclass(frozen=True, slots=True)
73
+ class ToolCallDeltaEvent:
74
+ """Partial tool arguments (best-effort parse; fields may be missing)."""
75
+
76
+ type: Literal["toolcall_delta"] = "toolcall_delta"
77
+ delta: str = ""
78
+ content_index: int = 0
79
+
80
+
81
+ @dataclass(frozen=True, slots=True)
82
+ class ToolCallEndEvent:
83
+ type: Literal["toolcall_end"] = "toolcall_end"
84
+ tool_call: ToolCall = None # type: ignore[assignment] # set by provider
85
+ content_index: int = 0
86
+
87
+
88
+ @dataclass(frozen=True, slots=True)
89
+ class DoneEvent:
90
+ type: Literal["done"] = "done"
91
+ reason: StopReason = "stop"
92
+ message: AssistantMessage = None # type: ignore[assignment] # set by provider
93
+
94
+
95
+ @dataclass(frozen=True, slots=True)
96
+ class ErrorEvent:
97
+ """Emitted on provider error or abort. ``error`` carries partial content."""
98
+
99
+ type: Literal["error"] = "error"
100
+ reason: StreamEventReason = "error"
101
+ error: AssistantMessage = None # type: ignore[assignment] # set by provider
102
+
103
+
104
+ Event = (
105
+ StartEvent
106
+ | TextStartEvent
107
+ | TextDeltaEvent
108
+ | TextEndEvent
109
+ | ThinkingStartEvent
110
+ | ThinkingDeltaEvent
111
+ | ThinkingEndEvent
112
+ | ToolCallStartEvent
113
+ | ToolCallDeltaEvent
114
+ | ToolCallEndEvent
115
+ | DoneEvent
116
+ | ErrorEvent
117
+ )
@@ -0,0 +1,153 @@
1
+ """Live OpenRouter catalog: one ``OPENROUTER_API_KEY`` → all of OpenRouter's models.
2
+
3
+ OpenRouter is an aggregator — its value is *one key, hundreds of upstream models*. Hardcoding a
4
+ handful of ids (as the static catalog does) throws that away and drifts out of date. This module
5
+ fetches OpenRouter's authoritative ``GET /api/v1/models`` list, parses each entry into a harness
6
+ :class:`~minima_harness.ai.types.Model` (id, live pricing, context window, modalities, reasoning),
7
+ and registers them so any OpenRouter model is callable, pinnable, and routable.
8
+
9
+ It is **offline-safe and fast**: the response is cached to ``~/.minima-harness/cache`` with a TTL,
10
+ so only the first run (or a stale cache) touches the network, and a fetch failure falls back to the
11
+ cache, then to the static curated set — startup never blocks or breaks on a network hiccup.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import time
19
+ from pathlib import Path
20
+
21
+ import httpx
22
+
23
+ from minima_harness.ai.types import Modality, Model, ModelCost
24
+
25
+ _log = logging.getLogger("minima_harness.ai.openrouter")
26
+
27
+ _MODELS_URL = "https://openrouter.ai/api/v1/models"
28
+ _BASE_URL = "https://openrouter.ai/api/v1"
29
+ _TEXT = (Modality.text,)
30
+ _MM = (Modality.text, Modality.image)
31
+ _CACHE_TTL_S = 24 * 3600 # refresh at most once a day
32
+ _DEFAULT_MAX_TOKENS = 8192
33
+ _FETCH_TIMEOUT_S = 12.0
34
+
35
+
36
+ def _cache_path() -> Path:
37
+ return Path.home() / ".minima-harness" / "cache" / "openrouter_models.json"
38
+
39
+
40
+ def _to_model(entry: dict) -> Model | None:
41
+ """Parse one OpenRouter /models entry into a harness Model (None to skip non-chat models)."""
42
+ mid = entry.get("id")
43
+ if not mid:
44
+ return None
45
+ arch = entry.get("architecture") or {}
46
+ out_mods = arch.get("output_modalities") or ["text"]
47
+ if "text" not in out_mods: # skip embedding / image-gen / audio-only models
48
+ return None
49
+ in_mods = arch.get("input_modalities") or ["text"]
50
+ pricing = entry.get("pricing") or {}
51
+ # OpenRouter prices are USD per token (strings); the harness stores USD per 1M tokens.
52
+ try:
53
+ cost_in = float(pricing.get("prompt") or 0.0) * 1_000_000
54
+ cost_out = float(pricing.get("completion") or 0.0) * 1_000_000
55
+ except (TypeError, ValueError):
56
+ cost_in = cost_out = 0.0
57
+ top = entry.get("top_provider") or {}
58
+ max_out = int(top.get("max_completion_tokens") or 0) or _DEFAULT_MAX_TOKENS
59
+ supported = entry.get("supported_parameters") or []
60
+ reasoning = "reasoning" in supported or "include_reasoning" in supported
61
+ return Model(
62
+ id=mid,
63
+ provider="openrouter",
64
+ api="openai-completions",
65
+ name=entry.get("name") or mid,
66
+ cost=ModelCost(input=cost_in, output=cost_out),
67
+ context_window=int(entry.get("context_length") or 128_000),
68
+ max_tokens=min(max_out, 32_768),
69
+ input=_MM if "image" in in_mods else _TEXT,
70
+ reasoning=reasoning,
71
+ base_url=_BASE_URL,
72
+ )
73
+
74
+
75
+ def _parse_payload(payload: dict) -> list[Model]:
76
+ data = payload.get("data") or []
77
+ out: list[Model] = []
78
+ for entry in data:
79
+ model = _to_model(entry)
80
+ if model is not None:
81
+ out.append(model)
82
+ return out
83
+
84
+
85
+ def _read_cache(*, max_age_s: float | None) -> list[Model] | None:
86
+ path = _cache_path()
87
+ if not path.is_file():
88
+ return None
89
+ if max_age_s is not None and (time.time() - path.stat().st_mtime) > max_age_s:
90
+ return None
91
+ try:
92
+ return _parse_payload(json.loads(path.read_text()))
93
+ except Exception: # noqa: BLE001 - a corrupt cache is just a miss
94
+ return None
95
+
96
+
97
+ def _write_cache(payload: dict) -> None:
98
+ path = _cache_path()
99
+ try:
100
+ path.parent.mkdir(parents=True, exist_ok=True)
101
+ path.write_text(json.dumps(payload))
102
+ except Exception: # noqa: BLE001 - caching is best-effort
103
+ _log.debug("openrouter_cache_write_failed", exc_info=True)
104
+
105
+
106
+ def fetch_openrouter_models(
107
+ api_key: str | None = None,
108
+ *,
109
+ timeout: float = _FETCH_TIMEOUT_S,
110
+ ttl_s: float = _CACHE_TTL_S,
111
+ force: bool = False,
112
+ ) -> list[Model]:
113
+ """OpenRouter's model list as harness Models. Cache-first, network-second, never raises.
114
+
115
+ Resolution order: fresh disk cache → live fetch (then cache it) → stale cache → ``[]``.
116
+ Returning ``[]`` lets the caller keep the static curated OpenRouter set as a last resort.
117
+ """
118
+ if not force:
119
+ cached = _read_cache(max_age_s=ttl_s)
120
+ if cached:
121
+ return cached
122
+ headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
123
+ try:
124
+ resp = httpx.get(_MODELS_URL, headers=headers, timeout=timeout)
125
+ resp.raise_for_status()
126
+ payload = resp.json()
127
+ models = _parse_payload(payload)
128
+ if models:
129
+ _write_cache(payload)
130
+ return models
131
+ except Exception: # noqa: BLE001 - degrade to cache; the harness must still start
132
+ _log.debug("openrouter_models_fetch_failed", exc_info=True)
133
+ stale = _read_cache(max_age_s=None) # any age beats nothing
134
+ return stale or []
135
+
136
+
137
+ def register_openrouter_models(api_key: str | None = None) -> int:
138
+ """Register OpenRouter's live model catalog into the harness registry. Returns the count.
139
+
140
+ No-op-safe: on a fetch failure with no cache, returns 0 and the static curated OpenRouter
141
+ models registered by :func:`register_catalog_models` remain in place.
142
+ """
143
+ import os
144
+
145
+ from minima_harness.ai.registry import register_model
146
+
147
+ key = api_key or os.environ.get("OPENROUTER_API_KEY")
148
+ models = fetch_openrouter_models(key)
149
+ for model in models:
150
+ register_model(model)
151
+ if models:
152
+ _log.debug("registered %d openrouter models (live catalog)", len(models))
153
+ return len(models)
@@ -0,0 +1,299 @@
1
+ """Provider catalog: where each LLM provider lives and which models it serves.
2
+
3
+ The single source of truth for multi-provider integration. Almost every provider speaks
4
+ the OpenAI Chat-Completions protocol (``POST {base_url}/chat/completions``), so the generic
5
+ :mod:`~minima_harness.ai.providers.openai_compat` provider can call all of them given a
6
+ ``base_url`` and the right API-key env var — that mapping lives here.
7
+
8
+ Three things the rest of the harness reads from this module:
9
+ - :func:`env_vars_for_provider` — provider id -> the env vars that supply its key. Used by
10
+ ``openai_compat`` (resolve the call key for *this* model's provider) and by the offline
11
+ fallback / candidate gating (is a model runnable with the configured keys?).
12
+ - :func:`register_catalog_models` — register a curated, current set of models for every
13
+ provider whose key is configured (lean: a user only sees models they can actually run).
14
+ - :data:`PROVIDERS` / :func:`config_providers` — drive the ``minima config`` provider section.
15
+
16
+ Model ids + pricing were verified against each provider's official docs (June 2026). Native
17
+ Anthropic / Google models stay on their own SDK providers; everything else is OpenAI-compatible.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ from dataclasses import dataclass
24
+
25
+ from minima_harness.ai.types import ApiId, Modality, Model, ModelCost
26
+
27
+ _TEXT = (Modality.text,)
28
+ _MM = (Modality.text, Modality.image)
29
+
30
+
31
+ @dataclass(frozen=True, slots=True)
32
+ class ProviderSpec:
33
+ """How to reach a provider + which key env vars supply its credential."""
34
+
35
+ name: str
36
+ display_name: str
37
+ category: str # closed-native | aggregator | open-source-host | local-runtime
38
+ api: ApiId
39
+ env_vars: tuple[str, ...]
40
+ base_url: str | None = None # OpenAI-compat base; None = native (anthropic/google) or OpenAI
41
+ requires_key: bool = True
42
+ show_in_config: bool = False # surface as a field in `minima config`
43
+ blurb: str = ""
44
+
45
+
46
+ @dataclass(frozen=True, slots=True)
47
+ class ModelSpec:
48
+ id: str
49
+ name: str
50
+ input: float # USD / 1M input tokens
51
+ output: float # USD / 1M output tokens
52
+ context_window: int = 128_000
53
+ max_tokens: int = 8192
54
+ reasoning: bool = False
55
+ multimodal: bool = False
56
+
57
+
58
+ # --------------------------------------------------------------------------- providers
59
+ # env_vars order = resolution order (first set wins). Native providers carry base_url=None.
60
+ PROVIDERS: tuple[ProviderSpec, ...] = (
61
+ # --- closed-source / proprietary frontier APIs ---
62
+ ProviderSpec("anthropic", "Anthropic (Claude)", "closed-native", "anthropic-messages",
63
+ ("ANTHROPIC_API_KEY", "ANTHROPIC_OAUTH_TOKEN"), None, True, True,
64
+ "Claude — Opus / Sonnet / Haiku"),
65
+ ProviderSpec("openai", "OpenAI", "closed-native", "openai-completions",
66
+ ("OPENAI_API_KEY",), None, True, True, "GPT-5.x / GPT-4o"),
67
+ ProviderSpec("google", "Google Gemini", "closed-native", "google-generative-ai",
68
+ ("GEMINI_API_KEY", "GOOGLE_API_KEY", "GOOGLE_GENAI_API_KEY"), None, True, True,
69
+ "Gemini 2.5 / 3.5"),
70
+ ProviderSpec("xai", "xAI (Grok)", "closed-native", "openai-completions",
71
+ ("XAI_API_KEY",), "https://api.x.ai/v1", True, True, "Grok 4.x"),
72
+ ProviderSpec("deepseek", "DeepSeek", "closed-native", "openai-completions",
73
+ ("DEEPSEEK_API_KEY",), "https://api.deepseek.com", True, True,
74
+ "DeepSeek V4 (open-weight, cheap)"),
75
+ ProviderSpec("mistral", "Mistral AI", "closed-native", "openai-completions",
76
+ ("MISTRAL_API_KEY",), "https://api.mistral.ai/v1", True, True,
77
+ "Mistral Large / Codestral"),
78
+ ProviderSpec("cohere", "Cohere", "closed-native", "openai-completions",
79
+ ("COHERE_API_KEY", "CO_API_KEY"), "https://api.cohere.ai/compatibility/v1",
80
+ True, False, "Command A / R"),
81
+ ProviderSpec("perplexity", "Perplexity (Sonar)", "closed-native", "openai-completions",
82
+ ("PERPLEXITY_API_KEY",), "https://api.perplexity.ai", True, False,
83
+ "Sonar — web-grounded"),
84
+ # --- aggregator (one key, many upstream models) ---
85
+ ProviderSpec("openrouter", "OpenRouter", "aggregator", "openai-completions",
86
+ ("OPENROUTER_API_KEY",), "https://openrouter.ai/api/v1", True, True,
87
+ "one key, 100s of open + closed models"),
88
+ # --- open-source / open-weight inference hosts ---
89
+ ProviderSpec("groq", "Groq", "open-source-host", "openai-completions",
90
+ ("GROQ_API_KEY",), "https://api.groq.com/openai/v1", True, True,
91
+ "very fast open-weight inference"),
92
+ ProviderSpec("together", "Together AI", "open-source-host", "openai-completions",
93
+ ("TOGETHER_API_KEY",), "https://api.together.ai/v1", True, True,
94
+ "Llama / Qwen / DeepSeek open-weight"),
95
+ ProviderSpec("fireworks", "Fireworks AI", "open-source-host", "openai-completions",
96
+ ("FIREWORKS_API_KEY",), "https://api.fireworks.ai/inference/v1", True, False,
97
+ "open-weight inference"),
98
+ ProviderSpec("deepinfra", "DeepInfra", "open-source-host", "openai-completions",
99
+ ("DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"), "https://api.deepinfra.com/v1/openai",
100
+ True, False, "open-weight inference"),
101
+ ProviderSpec("cerebras", "Cerebras", "open-source-host", "openai-completions",
102
+ ("CEREBRAS_API_KEY",), "https://api.cerebras.ai/v1", True, False,
103
+ "wafer-scale fast inference"),
104
+ ProviderSpec("hyperbolic", "Hyperbolic", "open-source-host", "openai-completions",
105
+ ("HYPERBOLIC_API_KEY",), "https://api.hyperbolic.xyz/v1", True, False,
106
+ "open-weight inference"),
107
+ ProviderSpec("novita", "Novita AI", "open-source-host", "openai-completions",
108
+ ("NOVITA_API_KEY",), "https://api.novita.ai/openai", True, False,
109
+ "open-weight inference"),
110
+ # --- local / self-hosted runtimes (no key; model ids are whatever you loaded) ---
111
+ ProviderSpec("ollama", "Ollama (local)", "local-runtime", "openai-completions",
112
+ ("OLLAMA_API_KEY",), "http://localhost:11434/v1", False, False,
113
+ "local models via Ollama"),
114
+ ProviderSpec("lmstudio", "LM Studio (local)", "local-runtime", "openai-completions",
115
+ ("LMSTUDIO_API_KEY",), "http://localhost:1234/v1", False, False,
116
+ "local models via LM Studio"),
117
+ ProviderSpec("vllm", "vLLM (local)", "local-runtime", "openai-completions",
118
+ ("VLLM_API_KEY",), "http://localhost:8000/v1", False, False, "self-hosted vLLM"),
119
+ ProviderSpec("llamacpp", "llama.cpp (local)", "local-runtime", "openai-completions",
120
+ ("LLAMA_API_KEY",), "http://localhost:8080/v1", False, False,
121
+ "self-hosted llama-server"),
122
+ ProviderSpec("localai", "LocalAI (local)", "local-runtime", "openai-completions",
123
+ ("LOCALAI_API_KEY",), "http://localhost:8080/v1", False, False,
124
+ "self-hosted LocalAI"),
125
+ )
126
+
127
+ _BY_NAME: dict[str, ProviderSpec] = {p.name: p for p in PROVIDERS}
128
+
129
+ # Generic fallback env vars for an unknown/custom provider (e.g. a models.json "openai-compat"
130
+ # entry). Lets a hand-rolled OpenAI-compatible endpoint resolve a key.
131
+ _GENERIC_ENV_VARS: tuple[str, ...] = ("OPENAI_COMPAT_API_KEY", "OPENAI_API_KEY")
132
+
133
+
134
+ # --------------------------------------------------------------------------- curated models
135
+ # Verified current ids + USD/1M pricing (official docs, June 2026). The native anthropic/google
136
+ # /openai base set is seeded in registry.py; here we ADD the multi-provider catalog. Models are
137
+ # only registered for a provider once its key is configured (see register_catalog_models).
138
+ CATALOG_MODELS: dict[str, list[ModelSpec]] = {
139
+ "openai": [
140
+ ModelSpec("gpt-5.4-nano", "GPT-5.4 nano", 0.20, 1.25, 400_000, 16_384, multimodal=True),
141
+ ModelSpec("gpt-5.4-mini", "GPT-5.4 mini", 0.75, 4.50, 400_000, 16_384, multimodal=True),
142
+ ModelSpec("gpt-5.4", "GPT-5.4", 2.50, 15.0, 1_050_000, 16_384, True, True),
143
+ ModelSpec("gpt-5.5", "GPT-5.5", 5.0, 30.0, 1_050_000, 16_384, True, True),
144
+ ],
145
+ "google": [
146
+ ModelSpec("gemini-2.5-flash-lite", "Gemini 2.5 Flash-Lite", 0.10, 0.40,
147
+ 1_048_576, 8192, multimodal=True),
148
+ ModelSpec("gemini-3.5-flash", "Gemini 3.5 Flash", 1.50, 9.0, 1_048_576, 8192, True, True),
149
+ ],
150
+ "xai": [
151
+ ModelSpec("grok-build-0.1", "Grok Build 0.1 (coding)", 1.0, 2.0, 256_000, 16_384, True),
152
+ ModelSpec("grok-4.3", "Grok 4.3", 1.25, 2.50, 1_000_000, 16_384, True),
153
+ ],
154
+ "deepseek": [
155
+ ModelSpec("deepseek-v4-flash", "DeepSeek V4 Flash", 0.14, 0.28, 1_000_000, 16_384, True),
156
+ ModelSpec("deepseek-v4-pro", "DeepSeek V4 Pro", 0.435, 0.87, 1_000_000, 16_384, True),
157
+ ],
158
+ "mistral": [
159
+ ModelSpec("mistral-small-latest", "Mistral Small 4", 0.15, 0.60, 128_000, 8192),
160
+ ModelSpec("codestral-latest", "Codestral (code)", 0.30, 0.90, 256_000, 16_384),
161
+ ModelSpec("mistral-medium-latest", "Mistral Medium 3.5", 1.50, 7.50, 128_000, 8192),
162
+ ],
163
+ "cohere": [
164
+ ModelSpec("command-r-08-2024", "Command R (08-2024)", 0.15, 0.60, 128_000, 4096),
165
+ ModelSpec("command-a-03-2025", "Command A (03-2025)", 2.50, 10.0, 256_000, 8192),
166
+ ],
167
+ "perplexity": [
168
+ ModelSpec("sonar", "Sonar (web-grounded)", 1.0, 1.0, 128_000, 8192),
169
+ ModelSpec("sonar-pro", "Sonar Pro (web-grounded)", 3.0, 15.0, 200_000, 8192),
170
+ ],
171
+ "openrouter": [
172
+ ModelSpec("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B (OpenRouter)",
173
+ 0.10, 0.32, 131_072, 8192),
174
+ ModelSpec("deepseek/deepseek-chat-v3.1", "DeepSeek V3.1 (OpenRouter)",
175
+ 0.21, 0.79, 163_840, 8192, True),
176
+ ModelSpec("qwen/qwen3-235b-a22b", "Qwen3 235B (OpenRouter)", 0.455, 1.82, 131_072, 8192),
177
+ ModelSpec("meta-llama/llama-4-maverick", "Llama 4 Maverick (OpenRouter)",
178
+ 0.15, 0.60, 1_048_576, 8192, multimodal=True),
179
+ ],
180
+ "groq": [
181
+ ModelSpec("llama-3.1-8b-instant", "Llama 3.1 8B Instant (Groq)", 0.05, 0.08, 131_072, 8192),
182
+ ModelSpec("openai/gpt-oss-120b", "GPT-OSS 120B (Groq)", 0.15, 0.60, 131_072, 8192, True),
183
+ ModelSpec("llama-3.3-70b-versatile", "Llama 3.3 70B (Groq)", 0.59, 0.79, 131_072, 8192),
184
+ ],
185
+ "together": [
186
+ ModelSpec("openai/gpt-oss-120b", "GPT-OSS 120B (Together)",
187
+ 0.15, 0.60, 131_072, 8192, True),
188
+ ModelSpec("Qwen/Qwen3-235B-A22B-Instruct-2507-tput", "Qwen3 235B (Together)",
189
+ 0.20, 0.60, 262_144, 8192),
190
+ ModelSpec("meta-llama/Llama-3.3-70B-Instruct-Turbo", "Llama 3.3 70B (Together)",
191
+ 1.04, 1.04, 131_072, 8192),
192
+ ],
193
+ "fireworks": [
194
+ ModelSpec("accounts/fireworks/models/gpt-oss-120b", "GPT-OSS 120B (Fireworks)",
195
+ 0.15, 0.60, 131_072, 8192, True),
196
+ ModelSpec("accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
197
+ "Qwen3 235B (Fireworks)", 0.22, 0.88, 262_144, 8192),
198
+ ],
199
+ "deepinfra": [
200
+ ModelSpec("meta-llama/Llama-3.3-70B-Instruct-Turbo", "Llama 3.3 70B (DeepInfra)",
201
+ 0.10, 0.32, 131_072, 8192),
202
+ ModelSpec("deepseek-ai/DeepSeek-V4-Flash", "DeepSeek V4 Flash (DeepInfra)",
203
+ 0.10, 0.20, 1_000_000, 16_384, True),
204
+ ],
205
+ "cerebras": [
206
+ ModelSpec("gpt-oss-120b", "GPT-OSS 120B (Cerebras)", 0.35, 0.75, 131_072, 8192, True),
207
+ ],
208
+ "hyperbolic": [
209
+ ModelSpec("deepseek-ai/DeepSeek-V3", "DeepSeek V3 (Hyperbolic)", 0.25, 0.25, 131_072, 8192),
210
+ ModelSpec("meta-llama/Llama-3.3-70B-Instruct", "Llama 3.3 70B (Hyperbolic)",
211
+ 0.40, 0.40, 131_072, 8192),
212
+ ],
213
+ "novita": [
214
+ ModelSpec("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B (Novita)",
215
+ 0.135, 0.40, 131_072, 8192),
216
+ ModelSpec("deepseek/deepseek-v3", "DeepSeek V3 (Novita)", 0.27, 1.12, 163_840, 8192),
217
+ ],
218
+ }
219
+
220
+
221
+ # --------------------------------------------------------------------------- helpers
222
+ def spec_for(provider: str) -> ProviderSpec | None:
223
+ return _BY_NAME.get(provider.lower())
224
+
225
+
226
+ def env_vars_for_provider(provider: str) -> tuple[str, ...]:
227
+ """Env vars that supply ``provider``'s API key (resolution order). Unknown/custom
228
+ providers fall back to the generic OpenAI-compat vars so a models.json entry still works."""
229
+ spec = _BY_NAME.get(provider.lower())
230
+ return spec.env_vars if spec else _GENERIC_ENV_VARS
231
+
232
+
233
+ def provider_key_present(provider: str) -> bool:
234
+ """True if a key for ``provider`` is set, or it needs none (local runtime)."""
235
+ spec = _BY_NAME.get(provider.lower())
236
+ if spec is not None and not spec.requires_key:
237
+ return True
238
+ return any(os.environ.get(v) for v in env_vars_for_provider(provider))
239
+
240
+
241
+ def configured_providers() -> list[str]:
242
+ """Provider ids whose key is currently configured (in resolution-order env)."""
243
+ return [p.name for p in PROVIDERS if p.requires_key and provider_key_present(p.name)]
244
+
245
+
246
+ def config_providers() -> list[ProviderSpec]:
247
+ """Providers surfaced as fields in ``minima config`` (curated, popular subset)."""
248
+ return [p for p in PROVIDERS if p.show_in_config]
249
+
250
+
251
+ def runnable_candidates(candidate_ids: list[str]) -> list[str]:
252
+ """Filter routing candidates to models whose provider key is configured, so Minima is
253
+ never asked to choose a model the user cannot call. Unknown ids are kept (Minima may know
254
+ them). Returns the original list if none are runnable, so routing still yields a clear
255
+ auth error rather than an empty candidate set."""
256
+ from minima_harness.ai.registry import find_model_by_id
257
+
258
+ out: list[str] = []
259
+ for cid in candidate_ids:
260
+ model = find_model_by_id(cid)
261
+ if model is None or provider_key_present(model.provider):
262
+ out.append(cid)
263
+ return out or list(candidate_ids)
264
+
265
+
266
+ def _to_model(provider: str, spec: ProviderSpec, m: ModelSpec) -> Model:
267
+ return Model(
268
+ id=m.id,
269
+ provider=provider,
270
+ api=spec.api,
271
+ name=m.name,
272
+ cost=ModelCost(input=m.input, output=m.output),
273
+ context_window=m.context_window,
274
+ max_tokens=m.max_tokens,
275
+ input=_MM if m.multimodal else _TEXT,
276
+ reasoning=m.reasoning,
277
+ base_url=spec.base_url,
278
+ )
279
+
280
+
281
+ def register_catalog_models(*, present_keys_only: bool = True) -> list[str]:
282
+ """Register the curated catalog models, by default only for providers whose key is set.
283
+
284
+ Keeps the registry (and the model picker) lean: a user sees models they can actually run.
285
+ Returns the list of provider ids that were registered. The native anthropic/google/openai
286
+ base models are seeded separately in :mod:`registry` and are unaffected."""
287
+ from minima_harness.ai.registry import register_model
288
+
289
+ registered: list[str] = []
290
+ for provider, models in CATALOG_MODELS.items():
291
+ spec = _BY_NAME.get(provider)
292
+ if spec is None:
293
+ continue
294
+ if present_keys_only and spec.requires_key and not provider_key_present(provider):
295
+ continue
296
+ for m in models:
297
+ register_model(_to_model(provider, spec, m))
298
+ registered.append(provider)
299
+ return registered