minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Per-provider request quirks for the OpenAI-compatible provider, as DATA not control flow.
|
|
2
|
+
|
|
3
|
+
Most OpenAI-compatible hosts speak the identical wire protocol, but a few diverge on small
|
|
4
|
+
request details — e.g. OpenAI's GPT-5 / o-series reject ``max_tokens`` and require
|
|
5
|
+
``max_completion_tokens``. Encoding those as a lookup table (rather than a growing chain of
|
|
6
|
+
``if model.provider == ...`` branches in :mod:`~minima_harness.ai.providers.openai_compat`)
|
|
7
|
+
keeps the single hand-rolled provider lean and makes the next quirk a one-line data entry.
|
|
8
|
+
|
|
9
|
+
This is the Python-appropriate analogue of OpenCode's per-provider "compatibility lowering":
|
|
10
|
+
a small table, not a class hierarchy. It is the single place provider param drift is encoded.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class ProviderQuirks:
|
|
20
|
+
"""How one provider's chat-completions request differs from the OpenAI baseline."""
|
|
21
|
+
|
|
22
|
+
# Name of the max-output-tokens param. OpenAI's GPT-5/o-series need max_completion_tokens;
|
|
23
|
+
# every other OpenAI-compatible host (groq, openrouter, deepseek, …) uses the classic name.
|
|
24
|
+
token_param: str = "max_tokens"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_DEFAULT = ProviderQuirks()
|
|
28
|
+
|
|
29
|
+
# Keyed by harness provider id. Only providers that DIVERGE from the baseline appear here.
|
|
30
|
+
_QUIRKS: dict[str, ProviderQuirks] = {
|
|
31
|
+
"openai": ProviderQuirks(token_param="max_completion_tokens"),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def quirks_for(provider: str) -> ProviderQuirks:
|
|
36
|
+
"""Quirks for ``provider`` (the baseline OpenAI-compatible behavior if it has none)."""
|
|
37
|
+
return _QUIRKS.get(provider, _DEFAULT)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Provider registry entry point.
|
|
2
|
+
|
|
3
|
+
Real providers self-register on import. Their modules are imported defensively so a
|
|
4
|
+
missing optional SDK only skips that provider (logged at debug) rather than breaking
|
|
5
|
+
``import minima_harness``. The faux provider registers on demand via
|
|
6
|
+
:func:`register_faux_provider`.
|
|
7
|
+
|
|
8
|
+
Always available:
|
|
9
|
+
- openai-completions (raw httpx; httpx is a core dep)
|
|
10
|
+
Conditionally (need the ``harness`` extra: ``anthropic`` + ``google-genai``):
|
|
11
|
+
- anthropic-messages
|
|
12
|
+
- google-generative-ai
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
from minima_harness.ai.providers.base import (
|
|
20
|
+
Provider,
|
|
21
|
+
get_provider,
|
|
22
|
+
register_provider,
|
|
23
|
+
registered_apis,
|
|
24
|
+
unregister_provider,
|
|
25
|
+
)
|
|
26
|
+
from minima_harness.ai.providers.faux import register_faux_provider
|
|
27
|
+
|
|
28
|
+
_log = logging.getLogger("minima_harness.ai.providers")
|
|
29
|
+
|
|
30
|
+
_REGISTERED = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def ensure_providers_registered() -> None:
|
|
34
|
+
"""Idempotently import the real provider modules so they self-register.
|
|
35
|
+
|
|
36
|
+
Called lazily from :func:`minima_harness.ai.stream` to keep ``import minima_harness``
|
|
37
|
+
side-effect-free.
|
|
38
|
+
"""
|
|
39
|
+
global _REGISTERED
|
|
40
|
+
if _REGISTERED:
|
|
41
|
+
return
|
|
42
|
+
_REGISTERED = True
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
from minima_harness.ai.providers import openai_compat # noqa: F401
|
|
46
|
+
|
|
47
|
+
register_provider(
|
|
48
|
+
openai_compat.OpenAICompatProvider.api_id, openai_compat.OpenAICompatProvider()
|
|
49
|
+
)
|
|
50
|
+
except Exception as exc: # noqa: BLE001 - httpx should always be present
|
|
51
|
+
_log.debug("openai-completions provider not registered: %s", exc)
|
|
52
|
+
|
|
53
|
+
for mod, api, cls in (
|
|
54
|
+
("anthropic", "anthropic-messages", "AnthropicProvider"),
|
|
55
|
+
("google", "google-generative-ai", "GoogleProvider"),
|
|
56
|
+
):
|
|
57
|
+
try:
|
|
58
|
+
imported = __import__(f"minima_harness.ai.providers.{mod}", fromlist=[cls])
|
|
59
|
+
provider_cls = getattr(imported, cls)
|
|
60
|
+
register_provider(api, provider_cls())
|
|
61
|
+
except ImportError as exc:
|
|
62
|
+
_log.debug("%s provider skipped (SDK not installed): %s", api, exc)
|
|
63
|
+
except Exception as exc: # noqa: BLE001
|
|
64
|
+
_log.warning("%s provider failed to register: %s", api, exc)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
__all__ = [
|
|
68
|
+
"Provider",
|
|
69
|
+
"ensure_providers_registered",
|
|
70
|
+
"get_provider",
|
|
71
|
+
"register_faux_provider",
|
|
72
|
+
"register_provider",
|
|
73
|
+
"registered_apis",
|
|
74
|
+
"unregister_provider",
|
|
75
|
+
]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Shared helpers for provider implementations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def resolve_api_key(options: dict[str, Any] | None, *env_vars: str) -> str | None:
|
|
13
|
+
"""Options value wins, then the first set environment variable."""
|
|
14
|
+
if options and options.get("api_key"):
|
|
15
|
+
return str(options["api_key"])
|
|
16
|
+
for var in env_vars:
|
|
17
|
+
value = os.environ.get(var)
|
|
18
|
+
if value:
|
|
19
|
+
return value
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def to_json_schema(model_cls: type[BaseModel]) -> dict[str, Any]:
|
|
24
|
+
"""A provider-agnostic JSON Schema for a pydantic parameter model.
|
|
25
|
+
|
|
26
|
+
Strips pydantic-only ``title`` noise and rewrites ``anyOf``/``const`` enum patterns
|
|
27
|
+
(from ``Literal``) to plain ``enum`` lists so Google's constrained schema dialect
|
|
28
|
+
accepts them.
|
|
29
|
+
"""
|
|
30
|
+
schema = model_cls.model_json_schema()
|
|
31
|
+
schema.pop("title", None)
|
|
32
|
+
_clean_schema(schema)
|
|
33
|
+
return schema
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _clean_schema(node: Any) -> None:
|
|
37
|
+
if isinstance(node, dict):
|
|
38
|
+
node.pop("title", None)
|
|
39
|
+
# Literal -> pydantic emits {"anyOf":[{"const": v}, ...]}; flatten to {"enum":[...]}.
|
|
40
|
+
any_of = node.get("anyOf")
|
|
41
|
+
if isinstance(any_of, list) and all(isinstance(a, dict) and "const" in a for a in any_of):
|
|
42
|
+
node["enum"] = [a["const"] for a in any_of]
|
|
43
|
+
node.pop("anyOf", None)
|
|
44
|
+
for value in node.values():
|
|
45
|
+
_clean_schema(value)
|
|
46
|
+
elif isinstance(node, list):
|
|
47
|
+
for item in node:
|
|
48
|
+
_clean_schema(item)
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Anthropic Messages API provider (wraps the ``anthropic`` SDK, async).
|
|
2
|
+
|
|
3
|
+
Reuses minima's optional ``reasoner-anthropic`` / ``harness`` extra. Maps the SDK's raw
|
|
4
|
+
stream events onto PI's event taxonomy and assembles the final AssistantMessage with
|
|
5
|
+
realized token usage (input from ``message_start``, output from ``message_delta``).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from minima_harness.ai.compat import normalize_for_target
|
|
14
|
+
from minima_harness.ai.events import (
|
|
15
|
+
DoneEvent,
|
|
16
|
+
ErrorEvent,
|
|
17
|
+
StartEvent,
|
|
18
|
+
TextDeltaEvent,
|
|
19
|
+
TextEndEvent,
|
|
20
|
+
TextStartEvent,
|
|
21
|
+
ThinkingDeltaEvent,
|
|
22
|
+
ThinkingEndEvent,
|
|
23
|
+
ThinkingStartEvent,
|
|
24
|
+
ToolCall,
|
|
25
|
+
ToolCallDeltaEvent,
|
|
26
|
+
ToolCallEndEvent,
|
|
27
|
+
ToolCallStartEvent,
|
|
28
|
+
)
|
|
29
|
+
from minima_harness.ai.providers._common import resolve_api_key, to_json_schema
|
|
30
|
+
from minima_harness.ai.types import (
|
|
31
|
+
AssistantMessage,
|
|
32
|
+
ImageContent,
|
|
33
|
+
Message,
|
|
34
|
+
TextContent,
|
|
35
|
+
ThinkingContent,
|
|
36
|
+
)
|
|
37
|
+
from minima_harness.ai.usage import attach_cost
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from anthropic import AsyncAnthropic
|
|
41
|
+
|
|
42
|
+
from minima_harness.ai.events import Event
|
|
43
|
+
from minima_harness.ai.types import Context, Model
|
|
44
|
+
|
|
45
|
+
_STOP_MAP = {
|
|
46
|
+
"end_turn": "stop",
|
|
47
|
+
"stop_sequence": "stop",
|
|
48
|
+
"max_tokens": "length",
|
|
49
|
+
"tool_use": "toolUse",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AnthropicProvider:
|
|
54
|
+
api_id = "anthropic-messages"
|
|
55
|
+
|
|
56
|
+
def __init__(self, client: AsyncAnthropic | None = None) -> None:
|
|
57
|
+
self._client = client
|
|
58
|
+
|
|
59
|
+
def _build_client(self, options: dict[str, Any]) -> AsyncAnthropic:
|
|
60
|
+
if self._client is not None:
|
|
61
|
+
return self._client
|
|
62
|
+
from anthropic import AsyncAnthropic
|
|
63
|
+
|
|
64
|
+
api_key = resolve_api_key(options, "ANTHROPIC_API_KEY", "ANTHROPIC_OAUTH_TOKEN")
|
|
65
|
+
return AsyncAnthropic(api_key=api_key, timeout=options.get("timeout", 60.0))
|
|
66
|
+
|
|
67
|
+
async def stream(
|
|
68
|
+
self,
|
|
69
|
+
model: Model,
|
|
70
|
+
context: Context,
|
|
71
|
+
*,
|
|
72
|
+
options: dict[str, Any] | None = None,
|
|
73
|
+
signal: object | None = None,
|
|
74
|
+
) -> AsyncIterator[Event]:
|
|
75
|
+
options = options or {}
|
|
76
|
+
client = self._build_client(options)
|
|
77
|
+
kwargs = _build_kwargs(model, context, options)
|
|
78
|
+
assistant = AssistantMessage(content=[], model=model.id, stop_reason="stop")
|
|
79
|
+
text_buf: dict[int, list[str]] = {}
|
|
80
|
+
think_buf: dict[int, list[str]] = {}
|
|
81
|
+
sig_buf: dict[int, list[str]] = {} # signature_delta chunks per thinking block
|
|
82
|
+
tools_acc: dict[int, dict[str, str]] = {}
|
|
83
|
+
in_tokens = out_tokens = cache_read = cache_write = 0
|
|
84
|
+
|
|
85
|
+
yield StartEvent(partial=assistant)
|
|
86
|
+
try:
|
|
87
|
+
async with client.messages.stream(**kwargs) as s:
|
|
88
|
+
async for ev in s:
|
|
89
|
+
etype = getattr(ev, "type", "")
|
|
90
|
+
|
|
91
|
+
if etype == "message_start":
|
|
92
|
+
usage = getattr(getattr(ev, "message", None), "usage", None)
|
|
93
|
+
if usage is not None:
|
|
94
|
+
in_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
95
|
+
cache_read = getattr(usage, "cache_read_input_tokens", 0) or 0
|
|
96
|
+
cache_write = getattr(usage, "cache_creation_input_tokens", 0) or 0
|
|
97
|
+
|
|
98
|
+
elif etype == "content_block_start":
|
|
99
|
+
idx = getattr(ev, "index", 0)
|
|
100
|
+
block = getattr(ev, "content_block", None)
|
|
101
|
+
btype = getattr(block, "type", "")
|
|
102
|
+
if btype == "text":
|
|
103
|
+
yield TextStartEvent(content_index=idx)
|
|
104
|
+
elif btype == "thinking":
|
|
105
|
+
yield ThinkingStartEvent(content_index=idx)
|
|
106
|
+
elif btype == "tool_use":
|
|
107
|
+
tools_acc[idx] = {
|
|
108
|
+
"id": getattr(block, "id", "") or f"call_{idx}",
|
|
109
|
+
"name": getattr(block, "name", ""),
|
|
110
|
+
"args": "",
|
|
111
|
+
}
|
|
112
|
+
yield ToolCallStartEvent(content_index=idx)
|
|
113
|
+
|
|
114
|
+
elif etype == "content_block_delta":
|
|
115
|
+
idx = getattr(ev, "index", 0)
|
|
116
|
+
delta = getattr(ev, "delta", None)
|
|
117
|
+
dtype = getattr(delta, "type", "")
|
|
118
|
+
if dtype == "text_delta":
|
|
119
|
+
txt = getattr(delta, "text", "") or ""
|
|
120
|
+
text_buf.setdefault(idx, []).append(txt)
|
|
121
|
+
yield TextDeltaEvent(delta=txt, content_index=idx)
|
|
122
|
+
elif dtype == "thinking_delta":
|
|
123
|
+
txt = getattr(delta, "thinking", "") or ""
|
|
124
|
+
think_buf.setdefault(idx, []).append(txt)
|
|
125
|
+
yield ThinkingDeltaEvent(delta=txt, content_index=idx)
|
|
126
|
+
elif dtype == "signature_delta":
|
|
127
|
+
# Anthropic signs each thinking block; capture it so it can be echoed
|
|
128
|
+
# back when the block is replayed (required, or the API 400s).
|
|
129
|
+
sig = getattr(delta, "signature", "") or ""
|
|
130
|
+
sig_buf.setdefault(idx, []).append(sig)
|
|
131
|
+
elif dtype == "input_json_delta":
|
|
132
|
+
partial = getattr(delta, "partial_json", "") or ""
|
|
133
|
+
if idx in tools_acc:
|
|
134
|
+
tools_acc[idx]["args"] += partial
|
|
135
|
+
yield ToolCallDeltaEvent(delta=partial, content_index=idx)
|
|
136
|
+
|
|
137
|
+
elif etype == "content_block_stop":
|
|
138
|
+
idx = getattr(ev, "index", 0)
|
|
139
|
+
if idx in tools_acc:
|
|
140
|
+
slot = tools_acc[idx]
|
|
141
|
+
import json
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
args = json.loads(slot["args"]) if slot["args"].strip() else {}
|
|
145
|
+
except json.JSONDecodeError:
|
|
146
|
+
args = {"_raw": slot["args"]}
|
|
147
|
+
call = ToolCall(id=slot["id"], name=slot["name"], arguments=args)
|
|
148
|
+
assistant.content.append(call)
|
|
149
|
+
yield ToolCallEndEvent(tool_call=call, content_index=idx)
|
|
150
|
+
elif idx in think_buf:
|
|
151
|
+
thinking = "".join(think_buf[idx])
|
|
152
|
+
signature = "".join(sig_buf.get(idx, []))
|
|
153
|
+
assistant.content.append(
|
|
154
|
+
ThinkingContent(thinking=thinking, signature=signature)
|
|
155
|
+
)
|
|
156
|
+
yield ThinkingEndEvent(content=thinking, content_index=idx)
|
|
157
|
+
elif idx in text_buf:
|
|
158
|
+
text = "".join(text_buf[idx])
|
|
159
|
+
assistant.content.append(TextContent(text=text))
|
|
160
|
+
yield TextEndEvent(content=text, content_index=idx)
|
|
161
|
+
|
|
162
|
+
elif etype == "message_delta":
|
|
163
|
+
delta = getattr(ev, "delta", None)
|
|
164
|
+
stop = getattr(delta, "stop_reason", None)
|
|
165
|
+
if stop:
|
|
166
|
+
assistant.stop_reason = _STOP_MAP.get(stop, "stop") # type: ignore[assignment]
|
|
167
|
+
usage = getattr(ev, "usage", None)
|
|
168
|
+
if usage is not None:
|
|
169
|
+
out_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
170
|
+
except Exception as exc: # noqa: BLE001
|
|
171
|
+
err = AssistantMessage(
|
|
172
|
+
content=[TextContent(text="")], stop_reason="error", error_message=str(exc)
|
|
173
|
+
)
|
|
174
|
+
err.model = model.id
|
|
175
|
+
yield ErrorEvent(reason="error", error=err)
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
if not assistant.content:
|
|
179
|
+
assistant.content.append(TextContent(text=""))
|
|
180
|
+
assistant.usage.input = in_tokens
|
|
181
|
+
assistant.usage.output = out_tokens
|
|
182
|
+
assistant.usage.cache_read = cache_read
|
|
183
|
+
assistant.usage.cache_write = cache_write
|
|
184
|
+
attach_cost(model, assistant.usage)
|
|
185
|
+
yield DoneEvent(reason=assistant.stop_reason, message=assistant)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
_EPHEMERAL = {"type": "ephemeral"}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _build_kwargs(model: Model, context: Context, options: dict[str, Any]) -> dict[str, Any]:
|
|
192
|
+
# Prompt caching is ON by default (the agent re-sends a large stable prefix —
|
|
193
|
+
# system prompt + tool schemas + conversation history — every turn). cache_control
|
|
194
|
+
# breakpoints mark the longest prefix to cache: Anthropic reads it at ~0.1x next turn.
|
|
195
|
+
# Callers with unique one-shot prompts (e.g. the LLM judge) pass prompt_cache=False to
|
|
196
|
+
# avoid a pointless cache write. Below the per-model min-cacheable size the API simply
|
|
197
|
+
# ignores the breakpoint, so this is always safe.
|
|
198
|
+
cache = bool(options.get("prompt_cache", True))
|
|
199
|
+
messages = normalize_for_target(context.messages, "anthropic-messages")
|
|
200
|
+
wire = [_to_wire(m) for m in messages]
|
|
201
|
+
kwargs: dict[str, Any] = {
|
|
202
|
+
"model": model.id,
|
|
203
|
+
"max_tokens": options.get("max_tokens", model.max_tokens),
|
|
204
|
+
"messages": wire,
|
|
205
|
+
}
|
|
206
|
+
if context.system_prompt:
|
|
207
|
+
if cache:
|
|
208
|
+
kwargs["system"] = [
|
|
209
|
+
{"type": "text", "text": context.system_prompt, "cache_control": _EPHEMERAL}
|
|
210
|
+
]
|
|
211
|
+
else:
|
|
212
|
+
kwargs["system"] = context.system_prompt
|
|
213
|
+
if context.tools:
|
|
214
|
+
tools = [
|
|
215
|
+
{
|
|
216
|
+
"name": t.name,
|
|
217
|
+
"description": t.description,
|
|
218
|
+
"input_schema": to_json_schema(t.parameters),
|
|
219
|
+
}
|
|
220
|
+
for t in context.tools
|
|
221
|
+
]
|
|
222
|
+
if cache and tools:
|
|
223
|
+
# A breakpoint on the LAST tool caches the whole (stable) tool array.
|
|
224
|
+
tools[-1] = {**tools[-1], "cache_control": _EPHEMERAL}
|
|
225
|
+
kwargs["tools"] = tools
|
|
226
|
+
if cache and wire:
|
|
227
|
+
_mark_last_block(wire[-1])
|
|
228
|
+
# Thinking is opt-in via options to avoid surprise token spend.
|
|
229
|
+
if options.get("thinking") and model.reasoning:
|
|
230
|
+
budget = options.get("thinking_budget", 1024)
|
|
231
|
+
kwargs["thinking"] = {"type": "enabled", "budget_tokens": int(budget)}
|
|
232
|
+
return kwargs
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _mark_last_block(wire_msg: dict[str, Any]) -> None:
|
|
236
|
+
"""Add a cache_control breakpoint to the last content block of a wire message.
|
|
237
|
+
|
|
238
|
+
Caches the conversation prefix incrementally: each turn extends the cached prefix, so
|
|
239
|
+
the prior history is re-read at ~0.1x rather than re-charged at the full input rate.
|
|
240
|
+
"""
|
|
241
|
+
content = wire_msg.get("content")
|
|
242
|
+
if isinstance(content, list) and content and isinstance(content[-1], dict):
|
|
243
|
+
content[-1] = {**content[-1], "cache_control": _EPHEMERAL}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _to_wire(m: Message) -> dict[str, Any]:
|
|
247
|
+
if m.role == "toolResult":
|
|
248
|
+
return {
|
|
249
|
+
"role": "user",
|
|
250
|
+
"content": [
|
|
251
|
+
{
|
|
252
|
+
"type": "tool_result",
|
|
253
|
+
"tool_use_id": m.tool_call_id,
|
|
254
|
+
"content": _flatten_text(m),
|
|
255
|
+
"is_error": m.is_error,
|
|
256
|
+
}
|
|
257
|
+
],
|
|
258
|
+
}
|
|
259
|
+
blocks = m.content if not isinstance(m.content, str) else [TextContent(text=m.content)]
|
|
260
|
+
content: list[dict[str, Any]] = []
|
|
261
|
+
for b in blocks:
|
|
262
|
+
if isinstance(b, TextContent):
|
|
263
|
+
content.append({"type": "text", "text": b.text})
|
|
264
|
+
elif isinstance(b, ImageContent):
|
|
265
|
+
content.append(
|
|
266
|
+
{
|
|
267
|
+
"type": "image",
|
|
268
|
+
"source": {"type": "base64", "media_type": b.mime_type, "data": b.data},
|
|
269
|
+
}
|
|
270
|
+
)
|
|
271
|
+
elif isinstance(b, ThinkingContent):
|
|
272
|
+
# A thinking block can only be replayed WITH its signature — Anthropic 400s on
|
|
273
|
+
# "thinking.signature: Field required" otherwise. We only have a signature for blocks
|
|
274
|
+
# this provider produced; drop any unsigned thinking (e.g. from another provider or an
|
|
275
|
+
# older session) rather than send an invalid block.
|
|
276
|
+
if b.signature:
|
|
277
|
+
content.append(
|
|
278
|
+
{"type": "thinking", "thinking": b.thinking, "signature": b.signature}
|
|
279
|
+
)
|
|
280
|
+
elif isinstance(b, ToolCall):
|
|
281
|
+
content.append(
|
|
282
|
+
{"type": "tool_use", "id": b.id, "name": b.name, "input": b.arguments or {}}
|
|
283
|
+
)
|
|
284
|
+
return {"role": m.role, "content": content}
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _flatten_text(m: Message) -> str:
|
|
288
|
+
if isinstance(m.content, str):
|
|
289
|
+
return m.content
|
|
290
|
+
return "".join(b.text for b in m.content if isinstance(b, TextContent))
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Provider protocol and the provider registry.
|
|
2
|
+
|
|
3
|
+
A provider owns the ``stream()`` implementation for one ``api`` id (e.g.
|
|
4
|
+
``anthropic-messages``). Real providers register themselves at import time in
|
|
5
|
+
Phase 1; the faux provider registers on demand for hermetic tests.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
12
|
+
|
|
13
|
+
from minima_harness.ai.events import Event
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from minima_harness.ai.types import Context, Model
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@runtime_checkable
|
|
20
|
+
class Provider(Protocol):
|
|
21
|
+
"""A streaming provider bound to one ``Model.api`` id."""
|
|
22
|
+
|
|
23
|
+
api_id: str
|
|
24
|
+
|
|
25
|
+
def stream(
|
|
26
|
+
self,
|
|
27
|
+
model: Model,
|
|
28
|
+
context: Context,
|
|
29
|
+
*,
|
|
30
|
+
options: dict | None = None,
|
|
31
|
+
signal: object | None = None,
|
|
32
|
+
) -> AsyncIterator[Event]:
|
|
33
|
+
"""Yield streaming events, ending with ``DoneEvent`` or ``ErrorEvent``.
|
|
34
|
+
|
|
35
|
+
Implementations are async generator functions (``async def`` with ``yield``), so
|
|
36
|
+
the declared signature is a plain ``def`` returning ``AsyncIterator`` — calling it
|
|
37
|
+
returns the iterator directly, no ``await``.
|
|
38
|
+
"""
|
|
39
|
+
...
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# api id -> provider instance. Instances are reused; stateful providers (faux) expose
|
|
43
|
+
# per-test handles rather than mutating this singleton.
|
|
44
|
+
_REGISTRY: dict[str, Provider] = {}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def register_provider(api: str, provider: Provider) -> None:
|
|
48
|
+
"""Register (or replace) the provider for an ``api`` id."""
|
|
49
|
+
_REGISTRY[api] = provider
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def unregister_provider(api: str) -> None:
|
|
53
|
+
_REGISTRY.pop(api, None)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_provider(api: str) -> Provider:
|
|
57
|
+
try:
|
|
58
|
+
return _REGISTRY[api]
|
|
59
|
+
except KeyError:
|
|
60
|
+
available = ", ".join(sorted(_REGISTRY)) or "<none>"
|
|
61
|
+
raise KeyError(f"no provider registered for api {api!r} (available: {available})") from None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def registered_apis() -> list[str]:
|
|
65
|
+
return sorted(_REGISTRY)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""In-memory provider for hermetic tests and demos.
|
|
2
|
+
|
|
3
|
+
Minimal port of PI's ``registerFauxProvider``. Opt-in; not registered by default.
|
|
4
|
+
One deterministic scripted flow per registration.
|
|
5
|
+
Usage is estimated at roughly 1 token per 4 characters when not provided on the message.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
from minima_harness.ai.events import (
|
|
15
|
+
DoneEvent,
|
|
16
|
+
ErrorEvent,
|
|
17
|
+
StartEvent,
|
|
18
|
+
TextDeltaEvent,
|
|
19
|
+
TextEndEvent,
|
|
20
|
+
TextStartEvent,
|
|
21
|
+
ThinkingDeltaEvent,
|
|
22
|
+
ThinkingEndEvent,
|
|
23
|
+
ThinkingStartEvent,
|
|
24
|
+
ToolCallEndEvent,
|
|
25
|
+
ToolCallStartEvent,
|
|
26
|
+
)
|
|
27
|
+
from minima_harness.ai.providers.base import Provider, register_provider, unregister_provider
|
|
28
|
+
from minima_harness.ai.types import (
|
|
29
|
+
AssistantMessage,
|
|
30
|
+
Modality,
|
|
31
|
+
Model,
|
|
32
|
+
ModelCost,
|
|
33
|
+
TextContent,
|
|
34
|
+
ThinkingContent,
|
|
35
|
+
ToolCall,
|
|
36
|
+
)
|
|
37
|
+
from minima_harness.ai.usage import attach_cost
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from minima_harness.ai.events import Event
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_FAUX_MODEL = Model(
|
|
44
|
+
id="faux",
|
|
45
|
+
provider="faux",
|
|
46
|
+
api="faux",
|
|
47
|
+
name="Faux (test)",
|
|
48
|
+
cost=ModelCost(input=0.0, output=0.0),
|
|
49
|
+
context_window=8192,
|
|
50
|
+
max_tokens=4096,
|
|
51
|
+
input=(Modality.text,),
|
|
52
|
+
reasoning=False,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Roughly 1 token per 4 characters, per PI's faux provider.
|
|
56
|
+
_CHARS_PER_TOKEN = 4
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _estimate_usage(msg: AssistantMessage) -> None:
|
|
60
|
+
if msg.usage.input or msg.usage.output:
|
|
61
|
+
return
|
|
62
|
+
char_len = sum(
|
|
63
|
+
len(b.text) if isinstance(b, TextContent) else len(getattr(b, "thinking", ""))
|
|
64
|
+
for b in msg.content
|
|
65
|
+
if not isinstance(b, str)
|
|
66
|
+
)
|
|
67
|
+
msg.usage.output = max(1, char_len // _CHARS_PER_TOKEN)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class FauxProviderState:
|
|
71
|
+
"""Observable per-registration state."""
|
|
72
|
+
|
|
73
|
+
def __init__(self) -> None:
|
|
74
|
+
self.call_count = 0
|
|
75
|
+
self.responses: deque[AssistantMessage] = deque()
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def pending_response_count(self) -> int:
|
|
79
|
+
return len(self.responses)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class FauxRegistration:
|
|
83
|
+
"""Handle returned by :func:`register_faux_provider`."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, *, models: list[Model] | None = None) -> None:
|
|
86
|
+
self.models = models or [_FAUX_MODEL]
|
|
87
|
+
self.state = FauxProviderState()
|
|
88
|
+
self._provider = _FauxProvider(self.state, self.models)
|
|
89
|
+
|
|
90
|
+
def get_model(self, model_id: str | None = None) -> Model:
|
|
91
|
+
if model_id is None:
|
|
92
|
+
return self.models[0]
|
|
93
|
+
for m in self.models:
|
|
94
|
+
if m.id == model_id:
|
|
95
|
+
return m
|
|
96
|
+
raise KeyError(model_id)
|
|
97
|
+
|
|
98
|
+
def set_responses(self, messages: list[AssistantMessage]) -> None:
|
|
99
|
+
self.state.responses = deque(messages)
|
|
100
|
+
|
|
101
|
+
def append_responses(self, messages: list[AssistantMessage]) -> None:
|
|
102
|
+
self.state.responses.extend(messages)
|
|
103
|
+
|
|
104
|
+
def register(self) -> FauxRegistration:
|
|
105
|
+
register_provider("faux", self._provider)
|
|
106
|
+
return self
|
|
107
|
+
|
|
108
|
+
def unregister(self) -> None:
|
|
109
|
+
unregister_provider("faux")
|
|
110
|
+
|
|
111
|
+
def __enter__(self) -> FauxRegistration:
|
|
112
|
+
register_provider("faux", self._provider)
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
def __exit__(self, *exc: object) -> None:
|
|
116
|
+
unregister_provider("faux")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class _FauxProvider(Provider):
|
|
120
|
+
def __init__(self, state: FauxProviderState, models: list[Model]) -> None:
|
|
121
|
+
self.api_id = "faux"
|
|
122
|
+
self.state = state
|
|
123
|
+
self.models = models
|
|
124
|
+
|
|
125
|
+
async def stream(
|
|
126
|
+
self,
|
|
127
|
+
model: Model,
|
|
128
|
+
context: object,
|
|
129
|
+
*,
|
|
130
|
+
options: dict | None = None,
|
|
131
|
+
signal: object | None = None,
|
|
132
|
+
) -> AsyncIterator[Event]:
|
|
133
|
+
self.state.call_count += 1
|
|
134
|
+
if not self.state.responses:
|
|
135
|
+
err = AssistantMessage(
|
|
136
|
+
content=[TextContent(text="")],
|
|
137
|
+
stop_reason="error",
|
|
138
|
+
error_message="No more faux responses queued",
|
|
139
|
+
)
|
|
140
|
+
err.model = model.id
|
|
141
|
+
yield ErrorEvent(reason="error", error=err)
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
msg = self.state.responses.popleft()
|
|
145
|
+
msg.model = model.id
|
|
146
|
+
_estimate_usage(msg)
|
|
147
|
+
attach_cost(model, msg.usage)
|
|
148
|
+
|
|
149
|
+
yield StartEvent(partial=msg)
|
|
150
|
+
for index, block in enumerate(msg.content):
|
|
151
|
+
if isinstance(block, TextContent):
|
|
152
|
+
yield TextStartEvent(content_index=index)
|
|
153
|
+
if block.text:
|
|
154
|
+
yield TextDeltaEvent(delta=block.text, content_index=index)
|
|
155
|
+
yield TextEndEvent(content=block.text, content_index=index)
|
|
156
|
+
elif isinstance(block, ThinkingContent):
|
|
157
|
+
yield ThinkingStartEvent(content_index=index)
|
|
158
|
+
if block.thinking:
|
|
159
|
+
yield ThinkingDeltaEvent(delta=block.thinking, content_index=index)
|
|
160
|
+
yield ThinkingEndEvent(content=block.thinking, content_index=index)
|
|
161
|
+
elif isinstance(block, ToolCall):
|
|
162
|
+
yield ToolCallStartEvent(content_index=index)
|
|
163
|
+
yield ToolCallEndEvent(tool_call=block, content_index=index)
|
|
164
|
+
yield DoneEvent(reason=msg.stop_reason, message=msg)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def register_faux_provider(*, models: list[Model] | None = None) -> FauxRegistration:
|
|
168
|
+
"""Register a temporary in-memory provider for tests/demos.
|
|
169
|
+
|
|
170
|
+
Remember to call ``.unregister()`` (or use the provider as a context manager) so the
|
|
171
|
+
faux api id does not leak across tests.
|
|
172
|
+
"""
|
|
173
|
+
return FauxRegistration(models=models).register()
|