minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
@@ -0,0 +1,82 @@
1
+ """Unified generation entry points: ``stream()`` and ``complete()``.
2
+
3
+ Dispatches to the provider registered for ``model.api``. ``stream()`` returns an async
4
+ iterable that also exposes ``await s.result()`` (mirrors PI's TS stream object).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import AsyncIterator
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from minima_harness.ai.events import DoneEvent, ErrorEvent, Event
13
+ from minima_harness.ai.providers.base import get_provider
14
+
15
+ if TYPE_CHECKING:
16
+ from minima_harness.ai.types import AssistantMessage, Context, Model
17
+
18
+
19
+ class Stream:
20
+ """Async iterator over events with a ``.result()`` helper for the final message."""
21
+
22
+ def __init__(self, gen: AsyncIterator[Event]) -> None:
23
+ self._gen = gen
24
+ self._result: AssistantMessage | None = None
25
+ self._consumed = False
26
+
27
+ def __aiter__(self) -> Stream:
28
+ return self
29
+
30
+ async def __anext__(self) -> Event:
31
+ try:
32
+ event = await self._gen.__anext__()
33
+ except StopAsyncIteration as exc:
34
+ self._consumed = True
35
+ raise exc
36
+ if isinstance(event, DoneEvent):
37
+ self._result = event.message
38
+ self._consumed = True
39
+ elif isinstance(event, ErrorEvent):
40
+ self._result = event.error
41
+ self._consumed = True
42
+ return event
43
+
44
+ async def result(self) -> AssistantMessage:
45
+ """Drain the stream and return the final assistant message (done or error)."""
46
+ async for _ in self:
47
+ pass
48
+ if self._result is None: # pragma: no cover - defensive
49
+ raise RuntimeError("stream ended without a done/error event")
50
+ return self._result
51
+
52
+
53
+ def stream(
54
+ model: Model,
55
+ context: Context,
56
+ *,
57
+ options: dict[str, Any] | None = None,
58
+ signal: object | None = None,
59
+ ) -> Stream:
60
+ """Begin streaming a generation for ``model`` against ``context``.
61
+
62
+ Returns a :class:`Stream` synchronously (matching PI's TS ``stream()`` which is not
63
+ a promise); iterate it with ``async for`` and call ``await s.result()`` for the
64
+ final message.
65
+ """
66
+ from minima_harness.ai.providers import ensure_providers_registered
67
+
68
+ ensure_providers_registered()
69
+ provider = get_provider(model.api)
70
+ return Stream(provider.stream(model, context, options=options, signal=signal))
71
+
72
+
73
+ async def complete(
74
+ model: Model,
75
+ context: Context,
76
+ *,
77
+ options: dict[str, Any] | None = None,
78
+ signal: object | None = None,
79
+ ) -> AssistantMessage:
80
+ """Non-streaming convenience: return the final assistant message."""
81
+ s = stream(model, context, options=options, signal=signal)
82
+ return await s.result()
@@ -0,0 +1,51 @@
1
+ """Tool argument validation — the pydantic analogue of PI's TypeBox ``validateToolCall``.
2
+
3
+ Tools declare their parameters as a pydantic ``BaseModel`` subclass. The agent loop
4
+ auto-validates before execution; failures are returned to the model as tool errors so it
5
+ can retry (matching PI's behaviour).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from pydantic import BaseModel, ValidationError
13
+
14
+ from minima_harness.ai.types import Tool, ToolCall
15
+
16
+
17
+ class ToolParamError(ValueError):
18
+ """Raised when a tool call's arguments fail schema validation."""
19
+
20
+
21
+ class UnknownToolError(KeyError):
22
+ """Raised when a tool call targets a name absent from the tool set."""
23
+
24
+
25
+ def find_tool(tools: list[Tool], name: str) -> Tool:
26
+ for t in tools:
27
+ if t.name == name:
28
+ return t
29
+ raise UnknownToolError(name)
30
+
31
+
32
+ def validate_tool_call(tools: list[Tool], call: ToolCall) -> BaseModel:
33
+ """Validate ``call.arguments`` against the named tool's parameter model.
34
+
35
+ Returns the parsed model instance on success; raises :class:`ToolParamError` on
36
+ failure so the caller can surface the error message to the model.
37
+ """
38
+ tool = find_tool(tools, call.name)
39
+ return _parse(tool.parameters, call.arguments)
40
+
41
+
42
+ def _parse(model_cls: type[BaseModel], arguments: dict[str, Any]) -> BaseModel:
43
+ try:
44
+ return model_cls.model_validate(arguments)
45
+ except ValidationError as exc:
46
+ # Flatten pydantic errors into a compact, model-readable message.
47
+ parts = []
48
+ for err in exc.errors():
49
+ loc = ".".join(str(x) for x in err["loc"]) or "<root>"
50
+ parts.append(f"{loc}: {err['msg']}")
51
+ raise ToolParamError("; ".join(parts)) from exc
@@ -0,0 +1,204 @@
1
+ """Core LLM types — a lean Python port of the ``@earendil-works/pi-ai`` data model.
2
+
3
+ Wire-contract discriminator values (``type`` / ``role`` / ``stopReason``) intentionally
4
+ match PI's so anyone familiar with the TS library recognizes the shapes. Field names
5
+ are snake-cased to stay pythonic; serialization is therefore *not* byte-compatible with
6
+ the TS library, which is fine — this port is consumed in-process, not over the wire.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from enum import StrEnum
13
+ from typing import Annotated, Any, Literal
14
+
15
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Cost / usage
19
+ # ---------------------------------------------------------------------------
20
+
21
+
22
+ class Cost(BaseModel):
23
+ """USD cost breakdown for a single generation.
24
+
25
+ ``input``/``output`` are the uncached token costs; ``cache_read``/``cache_write`` are
26
+ the prompt-cache components (read ~0.1x input, write ~1.25x input on Anthropic).
27
+ ``total`` is the true realized spend across all four — this is what flows to Minima's
28
+ ``actual_cost_usd`` so the observed cost tier reflects real post-cache economics.
29
+ """
30
+
31
+ input: float = 0.0
32
+ output: float = 0.0
33
+ cache_read: float = 0.0
34
+ cache_write: float = 0.0
35
+ total: float = 0.0
36
+
37
+
38
+ class Usage(BaseModel):
39
+ """Token accounting; mirrors PI's ``AssistantMessage.usage``."""
40
+
41
+ input: int = 0
42
+ output: int = 0
43
+ cache_read: int = 0
44
+ cache_write: int = 0
45
+ cost: Cost = Field(default_factory=Cost)
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Modalities & model descriptor
50
+ # ---------------------------------------------------------------------------
51
+
52
+
53
+ class Modality(StrEnum):
54
+ text = "text"
55
+ image = "image"
56
+
57
+
58
+ # API ids match PI's registry so provider dispatch is recognizable.
59
+ ApiId = Literal[
60
+ "anthropic-messages",
61
+ "google-generative-ai",
62
+ "openai-completions",
63
+ "faux",
64
+ ]
65
+
66
+
67
+ @dataclass(slots=True)
68
+ class ModelCost:
69
+ """Per-million-token USD prices."""
70
+
71
+ input: float
72
+ output: float
73
+ cache_read: float = 0.0
74
+ cache_write: float = 0.0
75
+
76
+
77
+ @dataclass(slots=True)
78
+ class Model:
79
+ """A callable model. Custom/OpenAI-compatible endpoints set ``base_url``."""
80
+
81
+ id: str
82
+ provider: str
83
+ api: ApiId
84
+ name: str
85
+ cost: ModelCost
86
+ context_window: int
87
+ max_tokens: int
88
+ input: tuple[Modality, ...] = (Modality.text,)
89
+ reasoning: bool = False
90
+ base_url: str | None = None
91
+ headers: dict[str, str] = field(default_factory=dict)
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Content blocks
96
+ # ---------------------------------------------------------------------------
97
+
98
+
99
+ class TextContent(BaseModel):
100
+ type: Literal["text"] = "text"
101
+ text: str
102
+
103
+
104
+ class ImageContent(BaseModel):
105
+ type: Literal["image"] = "image"
106
+ data: str # base64-encoded
107
+ mime_type: str = "image/png"
108
+
109
+
110
+ class ThinkingContent(BaseModel):
111
+ type: Literal["thinking"] = "thinking"
112
+ thinking: str
113
+ # Anthropic signs every thinking block; the signature MUST be echoed back verbatim when the
114
+ # block is replayed in history (incl. within a tool-use turn), or the API 400s with
115
+ # "thinking.signature: Field required". Empty for providers that don't sign (e.g. Gemini).
116
+ signature: str = ""
117
+
118
+
119
+ class ToolCall(BaseModel):
120
+ type: Literal["toolCall"] = "toolCall"
121
+ id: str
122
+ name: str
123
+ # May be partial during streaming; defaults to ``{}``, never None (matches PI).
124
+ arguments: dict[str, Any] = Field(default_factory=dict)
125
+
126
+
127
+ ContentBlock = Annotated[
128
+ TextContent | ImageContent | ThinkingContent | ToolCall,
129
+ Field(discriminator="type"),
130
+ ]
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # Messages
134
+ # ---------------------------------------------------------------------------
135
+
136
+ Role = Literal["user", "assistant", "toolResult"]
137
+ StopReason = Literal["stop", "length", "toolUse", "error", "aborted"]
138
+
139
+
140
+ class Message(BaseModel):
141
+ """A conversation message. ``content`` may be a bare string for convenience."""
142
+
143
+ model_config = ConfigDict(arbitrary_types_allowed=True)
144
+
145
+ role: Role
146
+ content: list[ContentBlock]
147
+ timestamp: int | None = None
148
+ # toolResult-only fields:
149
+ tool_call_id: str | None = None
150
+ tool_name: str | None = None
151
+ is_error: bool = False
152
+
153
+ @field_validator("content", mode="before")
154
+ @classmethod
155
+ def _coerce_content(cls, value: object) -> object:
156
+ if isinstance(value, str):
157
+ return [TextContent(text=value)]
158
+ return value
159
+
160
+ @property
161
+ def text(self) -> str:
162
+ """Concatenated text across all TextContent blocks (empty for non-text)."""
163
+ return "".join(b.text for b in self.content if isinstance(b, TextContent))
164
+
165
+
166
+ class AssistantMessage(Message):
167
+ """An assistant turn. Carries usage, stop reason, and optional error info."""
168
+
169
+ role: Literal["assistant"] = "assistant"
170
+ model: str = ""
171
+ stop_reason: StopReason = "stop"
172
+ usage: Usage = Field(default_factory=Usage)
173
+ error_message: str | None = None
174
+ response_id: str | None = None
175
+
176
+ @property
177
+ def tool_calls(self) -> list[ToolCall]:
178
+ if isinstance(self.content, str):
179
+ return []
180
+ return [b for b in self.content if isinstance(b, ToolCall)]
181
+
182
+
183
+ # ---------------------------------------------------------------------------
184
+ # Tools (declared here to avoid an import cycle — logic lives in tools.py)
185
+ # ---------------------------------------------------------------------------
186
+
187
+
188
+ @dataclass(slots=True)
189
+ class Tool:
190
+ """A callable tool. ``parameters`` is a pydantic model class (the TypeBox analogue)."""
191
+
192
+ name: str
193
+ description: str
194
+ parameters: type[BaseModel]
195
+
196
+
197
+ class Context(BaseModel):
198
+ """A serializable conversation context (system prompt + messages + tools)."""
199
+
200
+ model_config = ConfigDict(arbitrary_types_allowed=True)
201
+
202
+ system_prompt: str | None = None
203
+ messages: list[Message] = Field(default_factory=list)
204
+ tools: list[Tool] = Field(default_factory=list)
@@ -0,0 +1,41 @@
1
+ """Cost computation: realized tokens x per-model prices -> USD.
2
+
3
+ Feeds Minima's ``actual_cost_usd`` feedback field. Keeping the realized-cost basis in
4
+ the harness (rather than echoing Minima's *prior* ``est_cost_usd``) lets Minima climb
5
+ estimate -> observed -> rescaled, which is its single biggest accuracy lever.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from minima_harness.ai.types import Cost, Model, Usage
11
+
12
+ # Registry prices are per-million tokens; divide token counts by 1e6.
13
+ _PER_MTOK = 1_000_000.0
14
+
15
+
16
+ def cost_for(model: Model, usage: Usage) -> Cost:
17
+ """Compute the true USD cost of ``usage`` against ``model``'s price table.
18
+
19
+ Cache reads/writes ARE folded into ``total`` (read ~0.1x, write ~1.25x the input
20
+ rate). Anthropic reports ``input`` as the *uncached* portion only, so omitting the
21
+ cache components understated realized cost; including them is what lets the cost meter
22
+ show genuine savings and lets Minima's observed tier learn real post-cache economics.
23
+ """
24
+ in_usd = usage.input * model.cost.input / _PER_MTOK
25
+ out_usd = usage.output * model.cost.output / _PER_MTOK
26
+ cache_read_usd = usage.cache_read * model.cost.cache_read / _PER_MTOK
27
+ cache_write_usd = usage.cache_write * model.cost.cache_write / _PER_MTOK
28
+ total = in_usd + out_usd + cache_read_usd + cache_write_usd
29
+ return Cost(
30
+ input=in_usd,
31
+ output=out_usd,
32
+ cache_read=cache_read_usd,
33
+ cache_write=cache_write_usd,
34
+ total=total,
35
+ )
36
+
37
+
38
+ def attach_cost(model: Model, usage: Usage) -> Usage:
39
+ """Return ``usage`` with its ``cost`` field populated for ``model``."""
40
+ usage.cost = cost_for(model, usage)
41
+ return usage
@@ -0,0 +1,40 @@
1
+ """minima_harness.minima — the routing/judging integration layer.
2
+
3
+ Wires the ported agent runtime to Minima: each ``MinimaAgent.prompt`` recommends a model,
4
+ runs the turn, judges quality, and feeds the realized tokens/cost/latency back so Minima's
5
+ memory sharpens (recommend -> run -> judge -> feedback).
6
+ """
7
+
8
+ from minima_harness.minima.config import DEFAULT_CANDIDATES, HarnessConfig
9
+ from minima_harness.minima.judge import (
10
+ ConstJudge,
11
+ DeterministicJudge,
12
+ LLMJudge,
13
+ QualityJudge,
14
+ )
15
+ from minima_harness.minima.mapping import ModelMapping
16
+ from minima_harness.minima.meter import CostMeter, CostRow, CostTotals
17
+ from minima_harness.minima.router import MinimaRouter, Ranking, RoutingResult
18
+ from minima_harness.minima.runtime import BeforeRoute, MinimaAgent
19
+ from minima_harness.minima.signals import CodeHealthExtractor, ContextExtractor, SignalBundle
20
+
21
+ __all__ = [
22
+ "BeforeRoute",
23
+ "CodeHealthExtractor",
24
+ "ConstJudge",
25
+ "ContextExtractor",
26
+ "DEFAULT_CANDIDATES",
27
+ "CostMeter",
28
+ "CostRow",
29
+ "CostTotals",
30
+ "DeterministicJudge",
31
+ "HarnessConfig",
32
+ "LLMJudge",
33
+ "MinimaAgent",
34
+ "MinimaRouter",
35
+ "ModelMapping",
36
+ "QualityJudge",
37
+ "Ranking",
38
+ "RoutingResult",
39
+ "SignalBundle",
40
+ ]
@@ -0,0 +1,102 @@
1
+ """Semantic response cache — a free 'recommendation' when a near-duplicate prompt repeats.
2
+
3
+ A cache HIT returns a prior response with ZERO LLM cost (and ~no latency). Similarity
4
+ defaults to a cheap, dependency-free normalized-token Jaccard, which catches exact and
5
+ near-duplicate coding prompts (the realistic hit case for a coding agent); inject
6
+ ``similarity_fn`` (e.g. embedding cosine via Mubit's ANN) for true semantic matching.
7
+ Bounded LRU with an optional TTL. Disabled by default at the call site
8
+ (``HarnessConfig.cache_enabled``); a too-loose threshold risks stale hits, so it ships off.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from collections import OrderedDict
15
+ from collections.abc import Callable
16
+ from dataclasses import dataclass
17
+
18
+ _WORD = re.compile(r"[a-z0-9_]+")
19
+
20
+
21
+ def _tokens(text: str) -> set[str]:
22
+ return set(_WORD.findall(text.lower()))
23
+
24
+
25
+ def jaccard(a: str, b: str) -> float:
26
+ """Token-set Jaccard similarity in [0, 1] (cheap, dependency-free, paraphrase-blind)."""
27
+ ta, tb = _tokens(a), _tokens(b)
28
+ if not ta and not tb:
29
+ return 1.0
30
+ if not ta or not tb:
31
+ return 0.0
32
+ return len(ta & tb) / len(ta | tb)
33
+
34
+
35
+ @dataclass(slots=True)
36
+ class CacheHit:
37
+ response: str
38
+ similarity: float
39
+ prompt: str
40
+
41
+
42
+ SimilarityFn = Callable[[str, str], float]
43
+
44
+
45
+ class SemanticCache:
46
+ """Bounded prompt->response cache keyed by similarity. ``get`` returns the best stored
47
+ response whose similarity clears ``threshold`` (or None)."""
48
+
49
+ def __init__(
50
+ self,
51
+ *,
52
+ threshold: float = 0.95,
53
+ max_entries: int = 512,
54
+ similarity_fn: SimilarityFn | None = None,
55
+ now_fn: Callable[[], float] | None = None,
56
+ ttl_s: float | None = None,
57
+ ) -> None:
58
+ self.threshold = threshold
59
+ self.max_entries = max_entries
60
+ self._sim = similarity_fn or jaccard
61
+ self._now = now_fn
62
+ self._ttl = ttl_s
63
+ self._store: OrderedDict[str, tuple[str, float]] = OrderedDict()
64
+ self.hits = 0
65
+ self.misses = 0
66
+
67
+ def get(self, prompt: str) -> CacheHit | None:
68
+ self._expire()
69
+ best_prompt: str | None = None
70
+ best_resp = ""
71
+ best_sim = 0.0
72
+ for p, (resp, _ts) in self._store.items():
73
+ sim = self._sim(prompt, p)
74
+ if sim > best_sim:
75
+ best_sim, best_prompt, best_resp = sim, p, resp
76
+ if best_prompt is not None and best_sim >= self.threshold:
77
+ self.hits += 1
78
+ self._store.move_to_end(best_prompt)
79
+ return CacheHit(response=best_resp, similarity=best_sim, prompt=best_prompt)
80
+ self.misses += 1
81
+ return None
82
+
83
+ def put(self, prompt: str, response: str) -> None:
84
+ if not response:
85
+ return
86
+ ts = self._now() if self._now is not None else 0.0
87
+ self._store[prompt] = (response, ts)
88
+ self._store.move_to_end(prompt)
89
+ while len(self._store) > self.max_entries:
90
+ self._store.popitem(last=False)
91
+
92
+ def _expire(self) -> None:
93
+ if self._ttl is None or self._now is None:
94
+ return
95
+ cutoff = self._now() - self._ttl
96
+ for p in [p for p, (_r, ts) in self._store.items() if ts < cutoff]:
97
+ self._store.pop(p, None)
98
+
99
+ @property
100
+ def hit_rate(self) -> float:
101
+ total = self.hits + self.misses
102
+ return self.hits / total if total else 0.0
@@ -0,0 +1,85 @@
1
+ """Harness configuration: where Minima lives, the candidate pool, and judge policy.
2
+
3
+ Defaults target the **hosted** Minima (``https://api.minima.sh``) so a freshly installed
4
+ ``minima`` works out of the box — set ``MUBIT_API_KEY`` (routing auth) and a provider key
5
+ and routing just works. For local development against ``make run`` on :8080, set
6
+ ``MINIMA_URL=http://localhost:8080`` (the repo's ``.env.harness`` does this explicitly).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from dataclasses import dataclass, field
13
+
14
+ # The hosted service is the product default. Local dev sets MINIMA_URL explicitly.
15
+ DEFAULT_MINIMA_URL = "https://api.minima.sh"
16
+ DEFAULT_JUDGE_MODEL = "claude-haiku-4-5"
17
+
18
+ # Candidate set mirrors examples/agent_warmup.py so cold-start routing behaves the same.
19
+ DEFAULT_CANDIDATES: list[str] = [
20
+ "gemini-2.5-flash",
21
+ "claude-haiku-4-5",
22
+ "claude-sonnet-4-6",
23
+ "gemini-2.5-pro",
24
+ "claude-opus-4-8",
25
+ ]
26
+
27
+
28
+ @dataclass(slots=True)
29
+ class HarnessConfig:
30
+ """Routing + judging policy for a :class:`MinimaAgent` run."""
31
+
32
+ minima_url: str = DEFAULT_MINIMA_URL
33
+ minima_api_key: str | None = None
34
+ # Model ids Minima is allowed to pick from (-> Constraints.candidate_models).
35
+ candidates: list[str] = field(default_factory=lambda: list(DEFAULT_CANDIDATES))
36
+ # True when the user explicitly pinned a single model via /model: routing is bypassed and
37
+ # that model (candidates[0]) runs directly. Distinct from "candidates happens to be length
38
+ # 1" (which can occur from key-gating) — only an explicit pin skips Minima.
39
+ pinned: bool = False
40
+ # Memory isolation lane (-> namespace). None = default lane.
41
+ namespace: str | None = None
42
+ # cost/quality slider: 0=cheapest acceptable, 10=highest quality.
43
+ cost_quality_tradeoff: float = 5.0
44
+ # Independent grader model (different provider avoids self-grading bias).
45
+ judge_model: str = DEFAULT_JUDGE_MODEL
46
+ # Judge every Nth terminal turn (1 = every turn). 0 disables judging.
47
+ judge_every: int = 1
48
+ baseline_model_id: str | None = None
49
+ # Minima HTTP timeout (s). Cold-start recommend can take >10s when Minima consults its
50
+ # LLM reasoner (thin evidence), so a tight timeout silently degrades to OFFLINE routing.
51
+ # 30s comfortably covers reasoner + recall. Override with MINIMA_TIMEOUT.
52
+ timeout: float = 30.0
53
+ # When True, an unreachable Minima falls back to a fixed default model instead of
54
+ # raising. Keeps ad-hoc runs working without a Minima instance.
55
+ allow_offline: bool = True
56
+ # Semantic response cache (/cache): a near-duplicate prompt returns a prior answer for
57
+ # free. Off by default — a too-loose threshold risks stale hits, and coding prompts are
58
+ # mostly unique. threshold is the min similarity for a hit.
59
+ cache_enabled: bool = False
60
+ cache_threshold: float = 0.95
61
+
62
+ @classmethod
63
+ def from_env(cls, **overrides: object) -> HarnessConfig:
64
+ cfg = cls()
65
+ cfg.refresh_routing_env()
66
+ timeout_env = os.environ.get("MINIMA_TIMEOUT")
67
+ if timeout_env:
68
+ try:
69
+ cfg.timeout = float(timeout_env)
70
+ except ValueError:
71
+ pass
72
+ for key, value in overrides.items():
73
+ setattr(cfg, key, value)
74
+ return cfg
75
+
76
+ def refresh_routing_env(self) -> None:
77
+ """Re-read just the Minima endpoint + routing auth from the environment, in place.
78
+
79
+ Used when a key/URL is set via the ``/config`` overlay mid-session: those land in
80
+ ``os.environ`` but this dataclass (and the live Minima client built from it) were
81
+ captured at startup. Refreshing here lets ``/reconnect`` rebuild a working client
82
+ without a restart. Leaves the candidate pool, namespace, judge policy, etc. untouched.
83
+ """
84
+ self.minima_url = os.environ.get("MINIMA_URL", self.minima_url)
85
+ self.minima_api_key = os.environ.get("MINIMA_API_KEY") or os.environ.get("MUBIT_API_KEY")