minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Google Generative AI (Gemini) provider via ``google-genai``.
|
|
2
|
+
|
|
3
|
+
Reuses minima's optional ``reasoner-gemini`` / ``harness`` extra. Iterates
|
|
4
|
+
``generate_content_stream`` chunks, mapping incremental text/thought/function-call parts
|
|
5
|
+
onto PI's event taxonomy. Gemini does not stream function-call arguments incrementally,
|
|
6
|
+
so a full ``toolcall_end`` is emitted when a ``function_call`` part arrives (matches PI's
|
|
7
|
+
documented behaviour).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import AsyncIterator
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
from minima_harness.ai.compat import normalize_for_target
|
|
16
|
+
from minima_harness.ai.events import (
|
|
17
|
+
DoneEvent,
|
|
18
|
+
ErrorEvent,
|
|
19
|
+
StartEvent,
|
|
20
|
+
TextDeltaEvent,
|
|
21
|
+
TextEndEvent,
|
|
22
|
+
TextStartEvent,
|
|
23
|
+
ThinkingDeltaEvent,
|
|
24
|
+
ThinkingEndEvent,
|
|
25
|
+
ThinkingStartEvent,
|
|
26
|
+
ToolCall,
|
|
27
|
+
ToolCallEndEvent,
|
|
28
|
+
ToolCallStartEvent,
|
|
29
|
+
)
|
|
30
|
+
from minima_harness.ai.providers._common import resolve_api_key, to_json_schema
|
|
31
|
+
from minima_harness.ai.types import (
|
|
32
|
+
AssistantMessage,
|
|
33
|
+
ImageContent,
|
|
34
|
+
Message,
|
|
35
|
+
TextContent,
|
|
36
|
+
ThinkingContent,
|
|
37
|
+
)
|
|
38
|
+
from minima_harness.ai.usage import attach_cost
|
|
39
|
+
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from minima_harness.ai.events import Event
|
|
42
|
+
from minima_harness.ai.types import Context, Model
|
|
43
|
+
|
|
44
|
+
_FINISH_MAP = {"STOP": "stop", "MAX_TOKENS": "length", "SAFETY": "stop"}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class GoogleProvider:
|
|
48
|
+
api_id = "google-generative-ai"
|
|
49
|
+
|
|
50
|
+
def __init__(self, client: Any | None = None) -> None:
|
|
51
|
+
self._client = client
|
|
52
|
+
|
|
53
|
+
def _build_client(self, options: dict[str, Any]) -> Any:
|
|
54
|
+
if self._client is not None:
|
|
55
|
+
return self._client
|
|
56
|
+
import google.genai as genai # lazy; optional extra
|
|
57
|
+
|
|
58
|
+
api_key = resolve_api_key(
|
|
59
|
+
options, "GEMINI_API_KEY", "GOOGLE_API_KEY", "GOOGLE_GENAI_API_KEY"
|
|
60
|
+
)
|
|
61
|
+
timeout = int(float(options.get("timeout", 60.0)) * 1000)
|
|
62
|
+
return genai.Client(api_key=api_key, http_options={"timeout": timeout})
|
|
63
|
+
|
|
64
|
+
async def stream(
|
|
65
|
+
self,
|
|
66
|
+
model: Model,
|
|
67
|
+
context: Context,
|
|
68
|
+
*,
|
|
69
|
+
options: dict[str, Any] | None = None,
|
|
70
|
+
signal: object | None = None,
|
|
71
|
+
) -> AsyncIterator[Event]:
|
|
72
|
+
options = options or {}
|
|
73
|
+
client = self._build_client(options)
|
|
74
|
+
config = _build_config(model, context, options)
|
|
75
|
+
|
|
76
|
+
text_buf: list[str] = []
|
|
77
|
+
think_buf: list[str] = []
|
|
78
|
+
tool_calls: list[ToolCall] = []
|
|
79
|
+
seen_text = seen_think = False
|
|
80
|
+
in_tokens = out_tokens = thought_tokens = cache_read = 0
|
|
81
|
+
stop_reason = "stop"
|
|
82
|
+
|
|
83
|
+
assistant = AssistantMessage(content=[], model=model.id, stop_reason="stop")
|
|
84
|
+
yield StartEvent(partial=assistant)
|
|
85
|
+
try:
|
|
86
|
+
contents = _to_contents(context)
|
|
87
|
+
stream = await client.aio.models.generate_content_stream(
|
|
88
|
+
model=model.id, contents=contents, config=config
|
|
89
|
+
)
|
|
90
|
+
async for chunk in stream:
|
|
91
|
+
usage = getattr(chunk, "usage_metadata", None)
|
|
92
|
+
if usage is not None:
|
|
93
|
+
in_tokens = getattr(usage, "prompt_token_count", 0) or 0
|
|
94
|
+
out_tokens = getattr(usage, "candidates_token_count", 0) or 0
|
|
95
|
+
thought_tokens = getattr(usage, "thoughts_token_count", 0) or 0
|
|
96
|
+
cache_read = getattr(usage, "cached_content_token_count", 0) or 0
|
|
97
|
+
for cand in getattr(chunk, "candidates", None) or []:
|
|
98
|
+
fr = getattr(cand, "finish_reason", None)
|
|
99
|
+
if fr:
|
|
100
|
+
stop_reason = _FINISH_MAP.get(str(fr), "stop")
|
|
101
|
+
content = getattr(cand, "content", None)
|
|
102
|
+
for part in getattr(content, "parts", None) or []:
|
|
103
|
+
if getattr(part, "thought", False):
|
|
104
|
+
txt = getattr(part, "text", "") or ""
|
|
105
|
+
if txt:
|
|
106
|
+
if not seen_think:
|
|
107
|
+
seen_think = True
|
|
108
|
+
yield ThinkingStartEvent(content_index=0)
|
|
109
|
+
think_buf.append(txt)
|
|
110
|
+
yield ThinkingDeltaEvent(delta=txt, content_index=0)
|
|
111
|
+
elif getattr(part, "function_call", None):
|
|
112
|
+
fc = part.function_call
|
|
113
|
+
name = getattr(fc, "name", "") or ""
|
|
114
|
+
args = dict(getattr(fc, "args", None) or {})
|
|
115
|
+
call = ToolCall(id=f"call_{len(tool_calls)}", name=name, arguments=args)
|
|
116
|
+
tool_calls.append(call)
|
|
117
|
+
idx = len(tool_calls) - 1
|
|
118
|
+
yield ToolCallStartEvent(content_index=idx)
|
|
119
|
+
yield ToolCallEndEvent(tool_call=call, content_index=idx)
|
|
120
|
+
else:
|
|
121
|
+
txt = getattr(part, "text", None)
|
|
122
|
+
if txt:
|
|
123
|
+
if not seen_text:
|
|
124
|
+
seen_text = True
|
|
125
|
+
yield TextStartEvent(content_index=0)
|
|
126
|
+
text_buf.append(txt)
|
|
127
|
+
yield TextDeltaEvent(delta=txt, content_index=0)
|
|
128
|
+
except Exception as exc: # noqa: BLE001
|
|
129
|
+
err = AssistantMessage(
|
|
130
|
+
content=[TextContent(text="")], stop_reason="error", error_message=str(exc)
|
|
131
|
+
)
|
|
132
|
+
err.model = model.id
|
|
133
|
+
yield ErrorEvent(reason="error", error=err)
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
# Assemble content in canonical order: thinking, text, tool calls.
|
|
137
|
+
blocks: list[Any] = []
|
|
138
|
+
if seen_think:
|
|
139
|
+
thinking = "".join(think_buf)
|
|
140
|
+
blocks.append(ThinkingContent(thinking=thinking))
|
|
141
|
+
yield ThinkingEndEvent(content=thinking, content_index=0)
|
|
142
|
+
if seen_text:
|
|
143
|
+
text = "".join(text_buf)
|
|
144
|
+
blocks.append(TextContent(text=text))
|
|
145
|
+
yield TextEndEvent(content=text, content_index=0)
|
|
146
|
+
blocks.extend(tool_calls)
|
|
147
|
+
if not blocks:
|
|
148
|
+
blocks.append(TextContent(text=""))
|
|
149
|
+
|
|
150
|
+
if tool_calls:
|
|
151
|
+
stop_reason = "toolUse"
|
|
152
|
+
assistant.content = blocks
|
|
153
|
+
assistant.stop_reason = stop_reason # type: ignore[assignment]
|
|
154
|
+
assistant.usage.input = in_tokens
|
|
155
|
+
assistant.usage.output = out_tokens + thought_tokens
|
|
156
|
+
assistant.usage.cache_read = cache_read
|
|
157
|
+
attach_cost(model, assistant.usage)
|
|
158
|
+
yield DoneEvent(reason=assistant.stop_reason, message=assistant)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _build_config(model: Model, context: Context, options: dict[str, Any]) -> dict[str, Any]:
|
|
162
|
+
config: dict[str, Any] = {"max_output_tokens": options.get("max_tokens", model.max_tokens)}
|
|
163
|
+
if context.system_prompt:
|
|
164
|
+
config["system_instruction"] = context.system_prompt
|
|
165
|
+
if context.tools:
|
|
166
|
+
config["tools"] = [
|
|
167
|
+
{
|
|
168
|
+
"function_declarations": [
|
|
169
|
+
{
|
|
170
|
+
"name": t.name,
|
|
171
|
+
"description": t.description,
|
|
172
|
+
# Use `parameters_json_schema` (standard JSON Schema), NOT `parameters`
|
|
173
|
+
# (the SDK's strict Schema model). The strict model rejects `$ref`/`$defs`
|
|
174
|
+
# — which pydantic emits for any tool with a nested model (e.g. the
|
|
175
|
+
# `tasks` tool's TaskItem) — with a pydantic ValidationError, breaking the
|
|
176
|
+
# whole call. The json_schema path lets google-genai inline/convert refs
|
|
177
|
+
# itself, per Gemini's function-declaration rules.
|
|
178
|
+
"parameters_json_schema": to_json_schema(t.parameters),
|
|
179
|
+
}
|
|
180
|
+
for t in context.tools
|
|
181
|
+
]
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
if options.get("thinking") and model.reasoning:
|
|
185
|
+
config["thinking_config"] = {"include_thoughts": True}
|
|
186
|
+
return config
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _to_contents(context: Context) -> list[dict[str, Any]]:
|
|
190
|
+
"""Build google-genai ``contents`` (role user/model + parts) from the context."""
|
|
191
|
+
messages = normalize_for_target(context.messages, "google-generative-ai")
|
|
192
|
+
out: list[dict[str, Any]] = []
|
|
193
|
+
for m in messages:
|
|
194
|
+
role = "model" if m.role == "assistant" else "user"
|
|
195
|
+
parts: list[dict[str, Any]] = []
|
|
196
|
+
if m.role == "toolResult":
|
|
197
|
+
parts.append(
|
|
198
|
+
{
|
|
199
|
+
"function_response": {
|
|
200
|
+
"name": m.tool_name or "",
|
|
201
|
+
"response": {"result": _flatten_text(m)},
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
blocks = m.content if not isinstance(m.content, str) else [TextContent(text=m.content)]
|
|
207
|
+
for b in blocks:
|
|
208
|
+
if isinstance(b, TextContent):
|
|
209
|
+
parts.append({"text": b.text})
|
|
210
|
+
elif isinstance(b, ImageContent):
|
|
211
|
+
parts.append({"inline_data": {"mime_type": b.mime_type, "data": b.data}})
|
|
212
|
+
elif isinstance(b, ToolCall):
|
|
213
|
+
parts.append({"function_call": {"name": b.name, "args": b.arguments or {}}})
|
|
214
|
+
out.append({"role": role, "parts": parts})
|
|
215
|
+
return out
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _flatten_text(m: Message) -> str:
|
|
219
|
+
if isinstance(m.content, str):
|
|
220
|
+
return m.content
|
|
221
|
+
return "".join(b.text for b in m.content if isinstance(b, TextContent))
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""OpenAI-compatible Chat Completions provider (raw httpx, no ``openai`` SDK).
|
|
2
|
+
|
|
3
|
+
One implementation covers openai, openrouter, groq, xai, together, and any server
|
|
4
|
+
speaking the ``POST {base_url}/chat/completions`` SSE protocol — selected by
|
|
5
|
+
``Model.base_url``. Matches PI's fetch-based approach and keeps dependencies lean.
|
|
6
|
+
|
|
7
|
+
Streaming deltas carry: ``choices[0].delta.content`` (text), ``.tool_calls`` (function
|
|
8
|
+
calls, assembled from partial JSON), and ``.reasoning_content`` / ``.reasoning`` (thinking
|
|
9
|
+
for deepseek/openrouter-style models). The final chunk carries ``usage`` when
|
|
10
|
+
``stream_options.include_usage`` is honoured.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from collections.abc import AsyncIterator
|
|
17
|
+
from typing import TYPE_CHECKING, Any
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from minima_harness.ai.compat import normalize_for_target
|
|
22
|
+
from minima_harness.ai.events import (
|
|
23
|
+
DoneEvent,
|
|
24
|
+
ErrorEvent,
|
|
25
|
+
StartEvent,
|
|
26
|
+
TextDeltaEvent,
|
|
27
|
+
TextEndEvent,
|
|
28
|
+
TextStartEvent,
|
|
29
|
+
ThinkingDeltaEvent,
|
|
30
|
+
ThinkingEndEvent,
|
|
31
|
+
ThinkingStartEvent,
|
|
32
|
+
ToolCallDeltaEvent,
|
|
33
|
+
ToolCallEndEvent,
|
|
34
|
+
ToolCallStartEvent,
|
|
35
|
+
)
|
|
36
|
+
from minima_harness.ai.provider_quirks import quirks_for
|
|
37
|
+
from minima_harness.ai.providers._common import resolve_api_key, to_json_schema
|
|
38
|
+
from minima_harness.ai.types import (
|
|
39
|
+
AssistantMessage,
|
|
40
|
+
ImageContent,
|
|
41
|
+
Message,
|
|
42
|
+
TextContent,
|
|
43
|
+
ThinkingContent,
|
|
44
|
+
ToolCall,
|
|
45
|
+
)
|
|
46
|
+
from minima_harness.ai.usage import attach_cost
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
from minima_harness.ai.events import Event
|
|
50
|
+
from minima_harness.ai.types import Context, Model
|
|
51
|
+
|
|
52
|
+
_DEFAULT_BASE = "https://api.openai.com/v1"
|
|
53
|
+
_FINISH_MAP = {
|
|
54
|
+
"stop": "stop",
|
|
55
|
+
"length": "length",
|
|
56
|
+
"tool_calls": "toolUse",
|
|
57
|
+
"function_call": "toolUse",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class OpenAICompatProvider:
|
|
62
|
+
api_id = "openai-completions"
|
|
63
|
+
|
|
64
|
+
async def stream(
|
|
65
|
+
self,
|
|
66
|
+
model: Model,
|
|
67
|
+
context: Context,
|
|
68
|
+
*,
|
|
69
|
+
options: dict[str, Any] | None = None,
|
|
70
|
+
signal: object | None = None,
|
|
71
|
+
) -> AsyncIterator[Event]:
|
|
72
|
+
options = options or {}
|
|
73
|
+
# Resolve the key for THIS model's provider (e.g. a Groq model -> GROQ_API_KEY), so a
|
|
74
|
+
# key for one provider can't be sent to another provider's endpoint. Unknown/custom
|
|
75
|
+
# providers fall back to the generic OpenAI-compat vars via env_vars_for_provider.
|
|
76
|
+
from minima_harness.ai.provider_catalog import env_vars_for_provider
|
|
77
|
+
|
|
78
|
+
api_key = resolve_api_key(options, *env_vars_for_provider(model.provider))
|
|
79
|
+
base = (model.base_url or _DEFAULT_BASE).rstrip("/")
|
|
80
|
+
url = f"{base}/chat/completions"
|
|
81
|
+
payload = _build_payload(model, context, options)
|
|
82
|
+
headers: dict[str, str] = {"Content-Type": "application/json"}
|
|
83
|
+
if api_key:
|
|
84
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
85
|
+
headers.update(model.headers)
|
|
86
|
+
|
|
87
|
+
timeout = options.get("timeout", 60.0)
|
|
88
|
+
try:
|
|
89
|
+
client = options.get("httpx_client") or httpx.AsyncClient(timeout=timeout)
|
|
90
|
+
req = client.build_request("POST", url, json=payload, headers=headers)
|
|
91
|
+
resp = await client.send(req, stream=True)
|
|
92
|
+
try:
|
|
93
|
+
resp.raise_for_status()
|
|
94
|
+
async for ev in _consume_sse(resp, model):
|
|
95
|
+
yield ev
|
|
96
|
+
finally:
|
|
97
|
+
await resp.aclose()
|
|
98
|
+
if not options.get("httpx_client"):
|
|
99
|
+
await client.aclose()
|
|
100
|
+
except Exception as exc: # noqa: BLE001 - surface as an error event, not a raise
|
|
101
|
+
err = AssistantMessage(
|
|
102
|
+
content=[TextContent(text="")], stop_reason="error", error_message=str(exc)
|
|
103
|
+
)
|
|
104
|
+
err.model = model.id
|
|
105
|
+
yield ErrorEvent(reason="error", error=err)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _build_payload(model: Model, context: Context, options: dict[str, Any]) -> dict[str, Any]:
|
|
109
|
+
messages = normalize_for_target(context.messages, "openai-completions")
|
|
110
|
+
out: list[dict[str, Any]] = []
|
|
111
|
+
if context.system_prompt:
|
|
112
|
+
out.append({"role": "system", "content": context.system_prompt})
|
|
113
|
+
out.extend(_to_wire(m) for m in messages)
|
|
114
|
+
payload: dict[str, Any] = {
|
|
115
|
+
"model": model.id,
|
|
116
|
+
"messages": out,
|
|
117
|
+
"stream": True,
|
|
118
|
+
"stream_options": {"include_usage": True},
|
|
119
|
+
# Per-provider request quirks (e.g. OpenAI GPT-5 needs max_completion_tokens) come from
|
|
120
|
+
# the quirks table, not a growing chain of `if model.provider == ...` branches here.
|
|
121
|
+
quirks_for(model.provider).token_param: options.get("max_tokens", model.max_tokens),
|
|
122
|
+
}
|
|
123
|
+
if context.tools:
|
|
124
|
+
payload["tools"] = [
|
|
125
|
+
{
|
|
126
|
+
"type": "function",
|
|
127
|
+
"function": {
|
|
128
|
+
"name": t.name,
|
|
129
|
+
"description": t.description,
|
|
130
|
+
"parameters": to_json_schema(t.parameters),
|
|
131
|
+
},
|
|
132
|
+
}
|
|
133
|
+
for t in context.tools
|
|
134
|
+
]
|
|
135
|
+
return payload
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _to_wire(m: Message) -> dict[str, Any]:
|
|
139
|
+
if m.role == "toolResult":
|
|
140
|
+
return {
|
|
141
|
+
"role": "tool",
|
|
142
|
+
"tool_call_id": m.tool_call_id,
|
|
143
|
+
"content": _flatten_text(m),
|
|
144
|
+
}
|
|
145
|
+
blocks = m.content if not isinstance(m.content, str) else [TextContent(text=m.content)]
|
|
146
|
+
tool_calls = [b for b in blocks if isinstance(b, ToolCall)]
|
|
147
|
+
entry: dict[str, Any] = {"role": m.role}
|
|
148
|
+
text = "".join(b.text for b in blocks if isinstance(b, TextContent))
|
|
149
|
+
images = [b for b in blocks if isinstance(b, ImageContent)]
|
|
150
|
+
parts: list[dict[str, Any]] = []
|
|
151
|
+
if text:
|
|
152
|
+
parts.append({"type": "text", "text": text})
|
|
153
|
+
for img in images:
|
|
154
|
+
parts.append(
|
|
155
|
+
{"type": "image_url", "image_url": {"url": f"data:{img.mime_type};base64,{img.data}"}}
|
|
156
|
+
)
|
|
157
|
+
if parts:
|
|
158
|
+
entry["content"] = parts if images else text
|
|
159
|
+
else:
|
|
160
|
+
entry["content"] = text
|
|
161
|
+
if tool_calls:
|
|
162
|
+
entry["tool_calls"] = [
|
|
163
|
+
{
|
|
164
|
+
"id": tc.id,
|
|
165
|
+
"type": "function",
|
|
166
|
+
"function": {"name": tc.name, "arguments": json.dumps(tc.arguments)},
|
|
167
|
+
}
|
|
168
|
+
for tc in tool_calls
|
|
169
|
+
]
|
|
170
|
+
return entry
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _flatten_text(m: Message) -> str:
|
|
174
|
+
if isinstance(m.content, str):
|
|
175
|
+
return m.content
|
|
176
|
+
return "".join(b.text for b in m.content if isinstance(b, TextContent))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
async def _consume_sse(resp: httpx.Response, model: Model) -> AsyncIterator[Event]:
|
|
180
|
+
text_buf: dict[int, list[str]] = {}
|
|
181
|
+
think_buf: dict[int, list[str]] = {}
|
|
182
|
+
# tool index -> {id, name, args_parts}
|
|
183
|
+
tools: dict[int, dict[str, str]] = {}
|
|
184
|
+
seen_text = seen_think = False
|
|
185
|
+
finish_reason = "stop"
|
|
186
|
+
usage_input = usage_output = 0
|
|
187
|
+
assistant = AssistantMessage(content=[], model=model.id, stop_reason="stop")
|
|
188
|
+
yield StartEvent(partial=assistant)
|
|
189
|
+
|
|
190
|
+
async for line in resp.aiter_lines():
|
|
191
|
+
line = line.strip()
|
|
192
|
+
if not line or not line.startswith("data:"):
|
|
193
|
+
continue
|
|
194
|
+
data = line[5:].strip()
|
|
195
|
+
if data == "[DONE]":
|
|
196
|
+
break
|
|
197
|
+
try:
|
|
198
|
+
chunk = json.loads(data)
|
|
199
|
+
except json.JSONDecodeError:
|
|
200
|
+
continue
|
|
201
|
+
if chunk.get("usage"):
|
|
202
|
+
usage_input = chunk["usage"].get("prompt_tokens", usage_input)
|
|
203
|
+
usage_output = chunk["usage"].get("completion_tokens", usage_output)
|
|
204
|
+
choices = chunk.get("choices") or []
|
|
205
|
+
if not choices:
|
|
206
|
+
continue
|
|
207
|
+
choice = choices[0]
|
|
208
|
+
delta = choice.get("delta") or {}
|
|
209
|
+
fr = choice.get("finish_reason")
|
|
210
|
+
if fr:
|
|
211
|
+
finish_reason = _FINISH_MAP.get(fr, "stop")
|
|
212
|
+
|
|
213
|
+
if "reasoning_content" in delta and delta["reasoning_content"]:
|
|
214
|
+
idx = 0
|
|
215
|
+
think_buf.setdefault(idx, []).append(delta["reasoning_content"])
|
|
216
|
+
if not seen_think:
|
|
217
|
+
seen_think = True
|
|
218
|
+
yield ThinkingStartEvent(content_index=idx)
|
|
219
|
+
yield ThinkingDeltaEvent(delta=delta["reasoning_content"], content_index=idx)
|
|
220
|
+
if delta.get("reasoning"):
|
|
221
|
+
idx = 0
|
|
222
|
+
think_buf.setdefault(idx, []).append(delta["reasoning"])
|
|
223
|
+
if not seen_think:
|
|
224
|
+
seen_think = True
|
|
225
|
+
yield ThinkingStartEvent(content_index=idx)
|
|
226
|
+
yield ThinkingDeltaEvent(delta=delta["reasoning"], content_index=idx)
|
|
227
|
+
|
|
228
|
+
content = delta.get("content")
|
|
229
|
+
if content:
|
|
230
|
+
idx = 0
|
|
231
|
+
text_buf.setdefault(idx, []).append(content)
|
|
232
|
+
if not seen_text:
|
|
233
|
+
seen_text = True
|
|
234
|
+
yield TextStartEvent(content_index=idx)
|
|
235
|
+
yield TextDeltaEvent(delta=content, content_index=idx)
|
|
236
|
+
|
|
237
|
+
for tc in delta.get("tool_calls") or []:
|
|
238
|
+
idx = tc.get("index", 0)
|
|
239
|
+
slot = tools.setdefault(idx, {"id": "", "name": "", "args": ""})
|
|
240
|
+
fn = tc.get("function") or {}
|
|
241
|
+
if tc.get("id") and not slot["id"]:
|
|
242
|
+
slot["id"] = tc["id"]
|
|
243
|
+
if fn.get("name") and not slot["name"]:
|
|
244
|
+
slot["name"] = fn["name"]
|
|
245
|
+
if fn.get("arguments"):
|
|
246
|
+
slot["args"] += fn["arguments"]
|
|
247
|
+
yield ToolCallDeltaEvent(delta=fn["arguments"], content_index=idx)
|
|
248
|
+
|
|
249
|
+
# finalize blocks in stable index order: thinking(0) -> text(0) -> tools
|
|
250
|
+
if seen_think:
|
|
251
|
+
idx = 0
|
|
252
|
+
thinking = "".join(think_buf.get(idx, []))
|
|
253
|
+
assistant.content.append(ThinkingContent(thinking=thinking))
|
|
254
|
+
yield ThinkingEndEvent(content=thinking, content_index=idx)
|
|
255
|
+
if seen_text:
|
|
256
|
+
idx = 0
|
|
257
|
+
text = "".join(text_buf.get(idx, []))
|
|
258
|
+
assistant.content.append(TextContent(text=text))
|
|
259
|
+
yield TextEndEvent(content=text, content_index=idx)
|
|
260
|
+
for idx in sorted(tools):
|
|
261
|
+
slot = tools[idx]
|
|
262
|
+
raw_args = slot["args"] or "{}"
|
|
263
|
+
try:
|
|
264
|
+
args = json.loads(raw_args) if raw_args.strip() else {}
|
|
265
|
+
except json.JSONDecodeError:
|
|
266
|
+
args = {"_raw": raw_args}
|
|
267
|
+
call = ToolCall(id=slot["id"] or f"call_{idx}", name=slot["name"], arguments=args)
|
|
268
|
+
assistant.content.append(call)
|
|
269
|
+
yield ToolCallStartEvent(content_index=idx)
|
|
270
|
+
yield ToolCallEndEvent(tool_call=call, content_index=idx)
|
|
271
|
+
|
|
272
|
+
assistant.stop_reason = finish_reason # type: ignore[assignment]
|
|
273
|
+
if not assistant.content:
|
|
274
|
+
assistant.content.append(TextContent(text=""))
|
|
275
|
+
assistant.usage.input = usage_input
|
|
276
|
+
assistant.usage.output = usage_output
|
|
277
|
+
attach_cost(model, assistant.usage)
|
|
278
|
+
yield DoneEvent(reason=assistant.stop_reason, message=assistant)
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Model registry: ``get_model(provider, id)`` and discovery helpers.
|
|
2
|
+
|
|
3
|
+
The seed table covers the candidate set Minima's example agents route over plus a few
|
|
4
|
+
OpenAI/OpenRouter entries. Prices (per-million-token USD) are sourced from the comments
|
|
5
|
+
in ``examples/agent_warmup.py`` so the harness agrees with Minima's existing catalog
|
|
6
|
+
expectations out of the box; Phase 3's mapping layer reconciles against Minima's live
|
|
7
|
+
``GET /v1/models`` catalog where they diverge.
|
|
8
|
+
|
|
9
|
+
PI exposes ~25 providers; this lean port starts with 4 (anthropic, google, openai,
|
|
10
|
+
openrouter) and is extensible via :func:`register_model`.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from minima_harness.ai.types import Modality, Model, ModelCost
|
|
16
|
+
|
|
17
|
+
# (provider, model_id) -> Model
|
|
18
|
+
_MODELS: dict[tuple[str, str], Model] = {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def register_model(model: Model) -> Model:
|
|
22
|
+
_MODELS[(model.provider, model.id)] = model
|
|
23
|
+
return model
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_model(provider: str, model_id: str) -> Model:
|
|
27
|
+
try:
|
|
28
|
+
return _MODELS[(provider, model_id)]
|
|
29
|
+
except KeyError:
|
|
30
|
+
known = ", ".join(f"{p}/{m}" for p, m in sorted(_MODELS)) or "<none>"
|
|
31
|
+
raise KeyError(f"unknown model {provider}/{model_id!r}; known: {known}") from None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def try_get_model(provider: str, model_id: str) -> Model | None:
|
|
35
|
+
return _MODELS.get((provider, model_id))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_models(provider: str) -> list[Model]:
|
|
39
|
+
return [m for (p, _), m in _MODELS.items() if p == provider]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_providers() -> list[str]:
|
|
43
|
+
return sorted({p for p, _ in _MODELS})
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def all_models() -> list[Model]:
|
|
47
|
+
return list(_MODELS.values())
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def find_model_by_id(model_id: str) -> Model | None:
|
|
51
|
+
"""First registered model with the given id (any provider). Used by cross-provider
|
|
52
|
+
compat to infer which api produced a replayed assistant message."""
|
|
53
|
+
for (__, mid), model in _MODELS.items():
|
|
54
|
+
if mid == model_id:
|
|
55
|
+
return model
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# Seed catalog
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
_TEXT = (Modality.text,)
|
|
64
|
+
_MULTIMODAL = (Modality.text, Modality.image)
|
|
65
|
+
|
|
66
|
+
_OPENROUTER_BASE = "https://openrouter.ai/api/v1"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _seed() -> None:
|
|
70
|
+
# --- Anthropic (api: anthropic-messages) ---
|
|
71
|
+
register_model(
|
|
72
|
+
Model(
|
|
73
|
+
id="claude-haiku-4-5",
|
|
74
|
+
provider="anthropic",
|
|
75
|
+
api="anthropic-messages",
|
|
76
|
+
name="Claude Haiku 4.5",
|
|
77
|
+
cost=ModelCost(input=1.0, output=5.0, cache_read=0.08, cache_write=1.25),
|
|
78
|
+
context_window=200_000,
|
|
79
|
+
max_tokens=8192,
|
|
80
|
+
input=_MULTIMODAL,
|
|
81
|
+
reasoning=False,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
register_model(
|
|
85
|
+
Model(
|
|
86
|
+
id="claude-sonnet-4-6",
|
|
87
|
+
provider="anthropic",
|
|
88
|
+
api="anthropic-messages",
|
|
89
|
+
name="Claude Sonnet 4.6",
|
|
90
|
+
cost=ModelCost(input=3.0, output=15.0, cache_read=0.30, cache_write=3.75),
|
|
91
|
+
context_window=200_000,
|
|
92
|
+
max_tokens=16_384,
|
|
93
|
+
input=_MULTIMODAL,
|
|
94
|
+
reasoning=True,
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
register_model(
|
|
98
|
+
Model(
|
|
99
|
+
id="claude-opus-4-8",
|
|
100
|
+
provider="anthropic",
|
|
101
|
+
api="anthropic-messages",
|
|
102
|
+
name="Claude Opus 4.8",
|
|
103
|
+
cost=ModelCost(input=15.0, output=75.0, cache_read=1.50, cache_write=18.75),
|
|
104
|
+
context_window=200_000,
|
|
105
|
+
max_tokens=16_384,
|
|
106
|
+
input=_MULTIMODAL,
|
|
107
|
+
reasoning=True,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# --- Google / Gemini (api: google-generative-ai) ---
|
|
112
|
+
register_model(
|
|
113
|
+
Model(
|
|
114
|
+
id="gemini-2.5-flash",
|
|
115
|
+
provider="google",
|
|
116
|
+
api="google-generative-ai",
|
|
117
|
+
name="Gemini 2.5 Flash",
|
|
118
|
+
cost=ModelCost(input=0.30, output=2.50),
|
|
119
|
+
context_window=1_000_000,
|
|
120
|
+
max_tokens=8192,
|
|
121
|
+
input=_MULTIMODAL,
|
|
122
|
+
reasoning=True,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
register_model(
|
|
126
|
+
Model(
|
|
127
|
+
id="gemini-2.5-pro",
|
|
128
|
+
provider="google",
|
|
129
|
+
api="google-generative-ai",
|
|
130
|
+
name="Gemini 2.5 Pro",
|
|
131
|
+
cost=ModelCost(input=1.25, output=10.0),
|
|
132
|
+
context_window=2_000_000,
|
|
133
|
+
max_tokens=8192,
|
|
134
|
+
input=_MULTIMODAL,
|
|
135
|
+
reasoning=True,
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# --- OpenAI (api: openai-completions via raw httpx in Phase 1) ---
|
|
140
|
+
register_model(
|
|
141
|
+
Model(
|
|
142
|
+
id="gpt-4o-mini",
|
|
143
|
+
provider="openai",
|
|
144
|
+
api="openai-completions",
|
|
145
|
+
name="GPT-4o mini",
|
|
146
|
+
cost=ModelCost(input=0.15, output=0.60, cache_read=0.075),
|
|
147
|
+
context_window=128_000,
|
|
148
|
+
max_tokens=16_384,
|
|
149
|
+
input=_MULTIMODAL,
|
|
150
|
+
reasoning=False,
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
register_model(
|
|
154
|
+
Model(
|
|
155
|
+
id="gpt-4o",
|
|
156
|
+
provider="openai",
|
|
157
|
+
api="openai-completions",
|
|
158
|
+
name="GPT-4o",
|
|
159
|
+
cost=ModelCost(input=2.5, output=10.0, cache_read=1.25),
|
|
160
|
+
context_window=128_000,
|
|
161
|
+
max_tokens=16_384,
|
|
162
|
+
input=_MULTIMODAL,
|
|
163
|
+
reasoning=False,
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# --- OpenRouter (api: openai-completions; base_url set) ---
|
|
168
|
+
register_model(
|
|
169
|
+
Model(
|
|
170
|
+
id="google/gemini-2.5-flash",
|
|
171
|
+
provider="openrouter",
|
|
172
|
+
api="openai-completions",
|
|
173
|
+
name="Gemini 2.5 Flash (OpenRouter)",
|
|
174
|
+
cost=ModelCost(input=0.30, output=2.50),
|
|
175
|
+
context_window=1_000_000,
|
|
176
|
+
max_tokens=8192,
|
|
177
|
+
input=_MULTIMODAL,
|
|
178
|
+
reasoning=True,
|
|
179
|
+
base_url=_OPENROUTER_BASE,
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
_seed()
|