minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""CLI: ``minima-seed`` — bulk-load cold-start outcome records into Mubit."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import asyncio
|
|
7
|
+
|
|
8
|
+
from minima.catalog.store import load_aliases
|
|
9
|
+
from minima.config import get_settings
|
|
10
|
+
from minima.memory.adapter import MubitMemory
|
|
11
|
+
from minima.seeding import routerbench, synthetic
|
|
12
|
+
from minima.seeding.items import SeedItem, build_item, chunked
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _load(dataset: str, limit: int) -> list[SeedItem]:
|
|
16
|
+
if dataset == "synthetic":
|
|
17
|
+
return synthetic.generate(limit)
|
|
18
|
+
return routerbench.load_records(limit, load_aliases())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def _seed(args: argparse.Namespace) -> None:
|
|
22
|
+
settings = get_settings()
|
|
23
|
+
memory = MubitMemory(settings)
|
|
24
|
+
lane = args.lane or settings.minima_seed_lane
|
|
25
|
+
|
|
26
|
+
seeds = _load(args.dataset, args.limit)
|
|
27
|
+
items = [build_item(s) for s in seeds]
|
|
28
|
+
print(f"prepared {len(items)} records from '{args.dataset}' -> lane '{lane}'")
|
|
29
|
+
|
|
30
|
+
if args.dry_run:
|
|
31
|
+
for item in items[:3]:
|
|
32
|
+
print(item)
|
|
33
|
+
print("dry-run: nothing written")
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
inserted = 0
|
|
37
|
+
for batch in chunked(items, args.chunk):
|
|
38
|
+
result = await memory.batch_insert(run_id=lane, items=batch, deduplicate=True)
|
|
39
|
+
inserted += int(result.get("count", 0))
|
|
40
|
+
print(f"inserted {inserted}/{len(items)}")
|
|
41
|
+
print(f"done: {inserted} records into lane '{lane}'")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main() -> None:
|
|
45
|
+
parser = argparse.ArgumentParser(description="Seed Minima cold-start memory into Mubit.")
|
|
46
|
+
parser.add_argument("--dataset", choices=["routerbench", "synthetic"], default="routerbench")
|
|
47
|
+
parser.add_argument("--limit", type=int, default=2000)
|
|
48
|
+
parser.add_argument("--lane", default=None, help="memory lane (default: MINIMA_SEED_LANE)")
|
|
49
|
+
parser.add_argument("--chunk", type=int, default=200)
|
|
50
|
+
parser.add_argument("--dry-run", action="store_true")
|
|
51
|
+
args = parser.parse_args()
|
|
52
|
+
asyncio.run(_seed(args))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
main()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Deterministic synthetic seed data.
|
|
2
|
+
|
|
3
|
+
Generates plausible (task -> model -> outcome) records where cheaper/weaker models
|
|
4
|
+
succeed on easy tasks and fail on hard ones. Useful for smoke-testing the full
|
|
5
|
+
ingest -> recall -> reinforce loop without network or an external dataset.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import random
|
|
11
|
+
|
|
12
|
+
from minima.catalog.store import load_snapshot_cards
|
|
13
|
+
from minima.memory.keys import build_content, task_cluster, task_fingerprint
|
|
14
|
+
from minima.memory.records import OutcomeRecord
|
|
15
|
+
from minima.schemas.common import Difficulty, TaskType
|
|
16
|
+
from minima.seeding.items import SeedItem
|
|
17
|
+
|
|
18
|
+
_DIFFICULTY_REQUIREMENT = {
|
|
19
|
+
Difficulty.easy: 0.5,
|
|
20
|
+
Difficulty.medium: 0.7,
|
|
21
|
+
Difficulty.hard: 0.85,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def generate(n: int, seed: int = 42) -> list[SeedItem]:
|
|
26
|
+
cards, _ = load_snapshot_cards()
|
|
27
|
+
rng = random.Random(seed)
|
|
28
|
+
task_types = list(TaskType)
|
|
29
|
+
difficulties = list(_DIFFICULTY_REQUIREMENT)
|
|
30
|
+
out: list[SeedItem] = []
|
|
31
|
+
|
|
32
|
+
for i in range(n):
|
|
33
|
+
task_type = rng.choice(task_types)
|
|
34
|
+
difficulty = rng.choice(difficulties)
|
|
35
|
+
card = rng.choice(cards)
|
|
36
|
+
|
|
37
|
+
text = (
|
|
38
|
+
f"Synthetic {task_type.value} task #{i} at {difficulty.value} difficulty: "
|
|
39
|
+
f"handle the {task_type.value} request described here."
|
|
40
|
+
)
|
|
41
|
+
prior = card.capability_by_task_type.get(task_type, 0.5)
|
|
42
|
+
success = prior >= _DIFFICULTY_REQUIREMENT[difficulty]
|
|
43
|
+
quality = 0.9 if success else 0.2
|
|
44
|
+
cost = (1200 / 1_000_000) * card.input_cost_per_mtok + (
|
|
45
|
+
400 / 1_000_000
|
|
46
|
+
) * card.output_cost_per_mtok
|
|
47
|
+
|
|
48
|
+
record = OutcomeRecord(
|
|
49
|
+
model_id=card.model_id,
|
|
50
|
+
provider=card.provider,
|
|
51
|
+
task_type=task_type.value,
|
|
52
|
+
difficulty=difficulty.value,
|
|
53
|
+
task_fingerprint=task_fingerprint(text),
|
|
54
|
+
task_cluster=task_cluster(task_type.value, difficulty.value),
|
|
55
|
+
input_tokens=1200,
|
|
56
|
+
output_tokens=400,
|
|
57
|
+
cost_usd=round(cost, 6),
|
|
58
|
+
quality_score=quality,
|
|
59
|
+
outcome="success" if success else "failure",
|
|
60
|
+
source_dataset="synthetic",
|
|
61
|
+
)
|
|
62
|
+
out.append(
|
|
63
|
+
SeedItem(
|
|
64
|
+
item_id=f"syn-{i}",
|
|
65
|
+
content=build_content(task_type.value, difficulty.value, text),
|
|
66
|
+
record=record,
|
|
67
|
+
env_tags=["seed:synthetic"],
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
return out
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Tenancy: pass-through auth — callers use their Mubit API key directly."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from minima.tenancy.context import TenantContext
|
|
6
|
+
from minima.tenancy.passthrough import PassthroughRuntime
|
|
7
|
+
|
|
8
|
+
__all__ = ["PassthroughRuntime", "TenantContext"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""The per-request tenant context: everything resolved from the caller's credential."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from minima.memory.adapter import Memory
|
|
8
|
+
from minima.recommender.decisionlog import DecisionLog
|
|
9
|
+
from minima.recommender.durablerefs import DurableRefs
|
|
10
|
+
from minima.recommender.engine import Recommender
|
|
11
|
+
from minima.recommender.recstore import LaneCounter, RecStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class TenantContext:
|
|
16
|
+
"""Resolved per-request scope. In single-tenant mode there is one of these
|
|
17
|
+
(``org_id="default"``) wrapping the process singletons; in multi-tenant mode one is
|
|
18
|
+
built/cached per org and bound to that org's own Mubit instance."""
|
|
19
|
+
|
|
20
|
+
org_id: str
|
|
21
|
+
memory: Memory
|
|
22
|
+
recommender: Recommender
|
|
23
|
+
recstore: RecStore
|
|
24
|
+
lane_counter: LaneCounter
|
|
25
|
+
lane_prefix: str
|
|
26
|
+
mubit_endpoint: str
|
|
27
|
+
decision_log: DecisionLog | None = None
|
|
28
|
+
durable_refs: DurableRefs | None = None
|
|
29
|
+
|
|
30
|
+
def lane(self, namespace: str | None) -> str:
|
|
31
|
+
"""Intra-org sub-scope lane. The ORG boundary is the Mubit instance/key, not this
|
|
32
|
+
string — so namespace is a benign within-org partition (team/project/env)."""
|
|
33
|
+
return f"{self.lane_prefix}:{namespace or 'default'}"
|
|
34
|
+
|
|
35
|
+
def counter_key(self, lane: str) -> str:
|
|
36
|
+
"""Org-qualified key so reflection cadence never mixes across orgs."""
|
|
37
|
+
return f"{self.org_id}:{lane}"
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Pass-through auth: the caller's Mubit API key IS their credential.
|
|
2
|
+
|
|
3
|
+
No provisioning, no mnim_ keys. The caller passes their Mubit key as
|
|
4
|
+
``Authorization: Bearer <mubit_key>``; Minima uses it directly against the
|
|
5
|
+
configured MUBIT_ENDPOINT. One TenantContext is built and cached per key.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from threading import Lock
|
|
13
|
+
|
|
14
|
+
from minima.catalog.store import CatalogStore
|
|
15
|
+
from minima.config import Settings
|
|
16
|
+
from minima.llm.base import Reasoner
|
|
17
|
+
from minima.memory.adapter import Memory, MubitMemory
|
|
18
|
+
from minima.recommender.decisionlog import DecisionLog, MemoryDecisionLog, OrgScopedDecisionLog
|
|
19
|
+
from minima.recommender.durablerefs import (
|
|
20
|
+
DurableRefs,
|
|
21
|
+
MemoryDurableRefs,
|
|
22
|
+
OrgScopedDurableRefs,
|
|
23
|
+
)
|
|
24
|
+
from minima.recommender.engine import Recommender
|
|
25
|
+
from minima.recommender.propensity import OrgScopedPropensity, Propensity
|
|
26
|
+
from minima.recommender.recstore import LaneCounter, OrgScopedRecStore, RecStore
|
|
27
|
+
from minima.tenancy.context import TenantContext
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _org_id(key: str) -> str:
|
|
31
|
+
"""Derive a stable org_id from a Mubit key (mbt_<instance>_...) or its hash."""
|
|
32
|
+
parts = key.split("_", 3)
|
|
33
|
+
if len(parts) >= 4 and parts[0] == "mbt" and parts[1]:
|
|
34
|
+
return parts[1]
|
|
35
|
+
return hashlib.sha256(key.encode()).hexdigest()[:16]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PassthroughRuntime:
|
|
39
|
+
"""One process-wide runtime; per-key TenantContexts are lazily built and cached."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
settings: Settings,
|
|
45
|
+
catalog_store: CatalogStore,
|
|
46
|
+
reasoner: Reasoner | None,
|
|
47
|
+
recstore_backend: RecStore,
|
|
48
|
+
propensity_backend: Propensity,
|
|
49
|
+
lane_counter: LaneCounter,
|
|
50
|
+
memory_factory: Callable[[str], Memory] | None = None,
|
|
51
|
+
decision_log_backend: DecisionLog | None = None,
|
|
52
|
+
durable_refs_backend: DurableRefs | None = None,
|
|
53
|
+
):
|
|
54
|
+
self._settings = settings
|
|
55
|
+
self._catalog_store = catalog_store
|
|
56
|
+
self._reasoner = reasoner
|
|
57
|
+
self._recstore_backend = recstore_backend
|
|
58
|
+
self._propensity_backend = propensity_backend
|
|
59
|
+
self._lane_counter = lane_counter
|
|
60
|
+
self._memory_factory = memory_factory
|
|
61
|
+
self._decision_log_backend = decision_log_backend or MemoryDecisionLog(
|
|
62
|
+
settings.minima_decision_log_retention_days
|
|
63
|
+
)
|
|
64
|
+
self._durable_refs_backend = durable_refs_backend or MemoryDurableRefs()
|
|
65
|
+
self._cache: dict[str, TenantContext] = {}
|
|
66
|
+
self._lock = Lock()
|
|
67
|
+
|
|
68
|
+
def resolve(self, mubit_api_key: str) -> TenantContext:
|
|
69
|
+
key_hash = hashlib.sha256(mubit_api_key.encode()).hexdigest()
|
|
70
|
+
with self._lock:
|
|
71
|
+
ctx = self._cache.get(key_hash)
|
|
72
|
+
if ctx is not None:
|
|
73
|
+
return ctx
|
|
74
|
+
|
|
75
|
+
org_id = _org_id(mubit_api_key)
|
|
76
|
+
if self._memory_factory is not None:
|
|
77
|
+
memory = self._memory_factory(mubit_api_key)
|
|
78
|
+
else:
|
|
79
|
+
memory = MubitMemory(self._settings, api_key=mubit_api_key)
|
|
80
|
+
scoped_recstore = OrgScopedRecStore(self._recstore_backend, org_id)
|
|
81
|
+
scoped_decision_log = OrgScopedDecisionLog(self._decision_log_backend, org_id)
|
|
82
|
+
scoped_durable_refs = OrgScopedDurableRefs(self._durable_refs_backend, org_id)
|
|
83
|
+
recommender = Recommender(
|
|
84
|
+
self._settings,
|
|
85
|
+
memory,
|
|
86
|
+
self._catalog_store,
|
|
87
|
+
scoped_recstore,
|
|
88
|
+
reasoner=self._reasoner,
|
|
89
|
+
propensity=OrgScopedPropensity(self._propensity_backend, org_id),
|
|
90
|
+
decision_log=scoped_decision_log,
|
|
91
|
+
org_id=org_id,
|
|
92
|
+
durable_refs=scoped_durable_refs,
|
|
93
|
+
)
|
|
94
|
+
ctx = TenantContext(
|
|
95
|
+
org_id=org_id,
|
|
96
|
+
memory=memory,
|
|
97
|
+
recommender=recommender,
|
|
98
|
+
recstore=scoped_recstore,
|
|
99
|
+
lane_counter=self._lane_counter,
|
|
100
|
+
lane_prefix=self._settings.minima_lane_prefix,
|
|
101
|
+
mubit_endpoint=self._settings.mubit_endpoint,
|
|
102
|
+
decision_log=scoped_decision_log,
|
|
103
|
+
durable_refs=scoped_durable_refs,
|
|
104
|
+
)
|
|
105
|
+
with self._lock:
|
|
106
|
+
existing = self._cache.get(key_hash)
|
|
107
|
+
if existing is not None:
|
|
108
|
+
return existing
|
|
109
|
+
self._cache[key_hash] = ctx
|
|
110
|
+
return ctx
|
minima/version.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: minima-cli
|
|
3
|
+
Version: 0.4.9
|
|
4
|
+
Summary: Minima CLI: cost-aware LLM model routing — recommend cheaper models, backed by Mubit memory.
|
|
5
|
+
Project-URL: Homepage, https://docs.minima.sh
|
|
6
|
+
Project-URL: Documentation, https://docs.minima.sh
|
|
7
|
+
Project-URL: Repository, https://github.com/mubit-ai/minima
|
|
8
|
+
Project-URL: Issues, https://github.com/mubit-ai/minima/issues
|
|
9
|
+
Author: Mubit
|
|
10
|
+
License: FSL-1.1-Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agent,cost-optimization,llm,model-router,mubit,routing
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: anyio>=4.4
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: mubit-sdk>=0.10.0
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.3
|
|
26
|
+
Requires-Dist: pydantic>=2.7
|
|
27
|
+
Requires-Dist: structlog>=24.1
|
|
28
|
+
Requires-Dist: tenacity>=8.4
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: anthropic>=0.40; extra == 'dev'
|
|
31
|
+
Requires-Dist: fastapi>=0.115; extra == 'dev'
|
|
32
|
+
Requires-Dist: google-genai>=0.3; extra == 'dev'
|
|
33
|
+
Requires-Dist: keyring>=24; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
35
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=8.2; extra == 'dev'
|
|
38
|
+
Requires-Dist: redis>=5.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
41
|
+
Requires-Dist: textual>=3.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: uvicorn[standard]>=0.30; extra == 'dev'
|
|
43
|
+
Provides-Extra: harness
|
|
44
|
+
Requires-Dist: anthropic>=0.40; extra == 'harness'
|
|
45
|
+
Requires-Dist: google-genai>=0.3; extra == 'harness'
|
|
46
|
+
Provides-Extra: reasoner-anthropic
|
|
47
|
+
Requires-Dist: anthropic>=0.40; extra == 'reasoner-anthropic'
|
|
48
|
+
Provides-Extra: reasoner-gemini
|
|
49
|
+
Requires-Dist: google-genai>=0.3; extra == 'reasoner-gemini'
|
|
50
|
+
Provides-Extra: seed
|
|
51
|
+
Requires-Dist: datasets>=2.20; extra == 'seed'
|
|
52
|
+
Requires-Dist: huggingface-hub>=0.24; extra == 'seed'
|
|
53
|
+
Provides-Extra: server
|
|
54
|
+
Requires-Dist: fastapi>=0.115; extra == 'server'
|
|
55
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == 'server'
|
|
56
|
+
Requires-Dist: redis>=5.0; extra == 'server'
|
|
57
|
+
Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
|
|
58
|
+
Provides-Extra: tui
|
|
59
|
+
Requires-Dist: keyring>=24; extra == 'tui'
|
|
60
|
+
Requires-Dist: textual>=3.0; extra == 'tui'
|
|
61
|
+
Description-Content-Type: text/markdown
|
|
62
|
+
|
|
63
|
+
# Minima
|
|
64
|
+
|
|
65
|
+
**Recommend a cheaper LLM model for each task, so LLM-driven workflows spend fewer tokens
|
|
66
|
+
without losing the quality the task actually needs.**
|
|
67
|
+
|
|
68
|
+
Minima **only recommends** — it never proxies a call, runs a model, rewrites a prompt, or
|
|
69
|
+
caches. It is a stack-agnostic advice layer backed by [Mubit](https://docs.mubit.ai) memory:
|
|
70
|
+
ask which model to use, run that model yourself, then tell Minima how it went. Because it
|
|
71
|
+
sits *beside* your call rather than in front of it, **it adds zero latency to your real LLM
|
|
72
|
+
request.**
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
POST /v1/recommend ──▶ you run the model ──▶ POST /v1/feedback
|
|
76
|
+
(recall + rank) (your stack) (write outcome, reinforce memory)
|
|
77
|
+
▲ │
|
|
78
|
+
└────────────── recommendations get sharper ─────────────┘
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Why it works
|
|
82
|
+
|
|
83
|
+
Minima is backed by [Mubit](https://mubit.ai) memory. Every `POST /v1/feedback` writes a `task → model → outcome` record; every `POST /v1/recommend` recalls the most similar past records and picks the cheapest model expected to clear a quality bar. The longer it runs, the sharper the picks.
|
|
84
|
+
|
|
85
|
+
A `cost_quality_tradeoff` slider (0 = cheapest acceptable, 10 = highest quality) moves the
|
|
86
|
+
bar. When memory is thin or conflicting, Minima can escalate to a cheap-LLM reasoner
|
|
87
|
+
(configurable, off by default).
|
|
88
|
+
|
|
89
|
+
### Cost ranking that reflects reality
|
|
90
|
+
|
|
91
|
+
A flat token estimate assumes a fixed completion length, so it ignores reasoning/thinking
|
|
92
|
+
tokens and mis-ranks a model with cheap list prices but heavy internal reasoning. Minima
|
|
93
|
+
ranks candidates by what they **really** cost, choosing one basis for the whole candidate
|
|
94
|
+
set:
|
|
95
|
+
|
|
96
|
+
- **rescaled** (best) — this request's input priced + the model's *observed* output-token
|
|
97
|
+
behavior; size-exact **and** reasoning-aware.
|
|
98
|
+
- **observed** — robust median of realized `$/call` from recalled outcomes.
|
|
99
|
+
- **estimate** (cold start) — token estimate from catalog prices.
|
|
100
|
+
|
|
101
|
+
The basis climbs `estimate → observed → rescaled` as your `/feedback` calls accumulate
|
|
102
|
+
realized tokens and cost. See [Concepts → Cost-basis tiers](docs/concepts.md#cost-basis-tiers-estimate--observed--rescaled).
|
|
103
|
+
|
|
104
|
+
## Endpoints
|
|
105
|
+
|
|
106
|
+
| Endpoint | Purpose |
|
|
107
|
+
|----------|---------|
|
|
108
|
+
| `POST /v1/recommend` | Recommend a model for one task. |
|
|
109
|
+
| `POST /v1/recommend/workflow` | Recommend a model per step of a multi-step workflow. |
|
|
110
|
+
| `POST /v1/feedback` | Report an outcome and close the learning loop. |
|
|
111
|
+
| `GET /v1/models` | The current model catalog (cost + capability priors). |
|
|
112
|
+
| `GET /v1/strategies` | Rules Mubit has promoted for a namespace (explainability). |
|
|
113
|
+
| `GET /v1/health` | Service, Mubit, catalog, and reasoner status. |
|
|
114
|
+
| `POST\|GET\|DELETE /v1/admin/tenants` | Tenant provisioning (multi-tenant mode only). |
|
|
115
|
+
|
|
116
|
+
Full schemas, fields, warnings, and error formats: **[API Reference](docs/api-reference.md)**.
|
|
117
|
+
|
|
118
|
+
## Quickstart
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
uv sync --extra dev
|
|
122
|
+
cp .env.example .env # set MUBIT_API_KEY (+ MUBIT_ENDPOINT if not local)
|
|
123
|
+
|
|
124
|
+
# optional: seed cold-start memory so day-one picks are grounded
|
|
125
|
+
uv run minima-seed --dataset synthetic --limit 2000 --lane minima:default
|
|
126
|
+
|
|
127
|
+
make run # uvicorn on :8080 (interactive docs at /docs)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# recommend
|
|
132
|
+
curl -s localhost:8080/v1/recommend -H 'content-type: application/json' -d '{
|
|
133
|
+
"task": {"task": "Summarize this incident report into 3 bullets.",
|
|
134
|
+
"task_type": "summarization"},
|
|
135
|
+
"cost_quality_tradeoff": 3
|
|
136
|
+
}' | jq
|
|
137
|
+
|
|
138
|
+
# ...run the recommended model yourself, then close the loop
|
|
139
|
+
curl -s localhost:8080/v1/feedback -H 'content-type: application/json' -d '{
|
|
140
|
+
"recommendation_id": "<from above>", "chosen_model_id": "claude-haiku-4-5",
|
|
141
|
+
"outcome": "success", "quality_score": 0.95,
|
|
142
|
+
"input_tokens": 1760, "output_tokens": 110, "actual_cost_usd": 0.0021,
|
|
143
|
+
"verified_in_production": true
|
|
144
|
+
}' | jq
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Minima talks to a Mubit runtime at `MUBIT_ENDPOINT` (defaults to `http://127.0.0.1:3000`;
|
|
148
|
+
start one with `make run-mubit` in the Mubit repo) and uses Mubit's server-side embeddings,
|
|
149
|
+
so it needs no embedding model of its own.
|
|
150
|
+
|
|
151
|
+
## Python client
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from minima_client import MinimaClient
|
|
155
|
+
|
|
156
|
+
with MinimaClient("http://localhost:8080") as minima:
|
|
157
|
+
rec = minima.recommend("Write a Python CSV parser.", cost_quality_tradeoff=3)
|
|
158
|
+
# ... run rec.recommended_model.model_id yourself ...
|
|
159
|
+
minima.feedback(rec.recommendation_id, rec.recommended_model.model_id, "success",
|
|
160
|
+
quality_score=0.95, input_tokens=180, output_tokens=640,
|
|
161
|
+
actual_cost_usd=0.0034, verified_in_production=True)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Sync + async clients and zero-code `autocapture`: **[Python Client SDK](docs/client-sdk.md)**.
|
|
165
|
+
|
|
166
|
+
## Documentation
|
|
167
|
+
|
|
168
|
+
| Doc | What's in it |
|
|
169
|
+
|-----|--------------|
|
|
170
|
+
| [Getting Started](docs/getting-started.md) | Install, configure, run, first recommendation. |
|
|
171
|
+
| [Concepts](docs/concepts.md) | The loop, the algorithm, cost-basis tiers, escalation, how it improves. |
|
|
172
|
+
| [API Reference](docs/api-reference.md) | Every endpoint, full schemas, warnings, errors. |
|
|
173
|
+
| [Configuration](docs/configuration.md) | Every environment variable + tuning guidance. |
|
|
174
|
+
| [Python Client SDK](docs/client-sdk.md) | `minima_client` clients + autocapture. |
|
|
175
|
+
| [Cold-Start Seeding](docs/seeding.md) | Load history so day-one picks are grounded. |
|
|
176
|
+
| [Multi-Tenancy](docs/multi-tenancy.md) | One deployment, many orgs, per-org Mubit instances. |
|
|
177
|
+
| [Operations](docs/operations.md) | Deployment, health, degradation, monitoring, secrets. |
|
|
178
|
+
| [Examples](docs/examples.md) | Guided tour of the runnable examples. |
|
|
179
|
+
| [Agent Harness](docs/harness.md) | `minima_harness`: a Minima-routing port of PI's agent toolkit. |
|
|
180
|
+
|
|
181
|
+
## Examples
|
|
182
|
+
|
|
183
|
+
Runnable, progressively advanced — in **[`examples/`](examples/)**:
|
|
184
|
+
|
|
185
|
+
| # | Example | Shows |
|
|
186
|
+
|---|---------|-------|
|
|
187
|
+
| 1 | [`01_quickstart.sh`](examples/01_quickstart.sh) | Raw `curl` against every endpoint. |
|
|
188
|
+
| 2 | [`02_recommend_and_feedback.py`](examples/02_recommend_and_feedback.py) | The core loop with the SDK. |
|
|
189
|
+
| 3 | [`03_constraints_and_tradeoff.py`](examples/03_constraints_and_tradeoff.py) | Constraints + slider sweep. |
|
|
190
|
+
| 4 | [`04_workflow.py`](examples/04_workflow.py) | Per-step workflow recommendations. |
|
|
191
|
+
| 5 | [`05_autocapture.py`](examples/05_autocapture.py) | Zero-code intake via `mubit.learn`. |
|
|
192
|
+
| 6 | [`06_routed_llm_call.py`](examples/06_routed_llm_call.py) | Routing a real Claude call + feedback. |
|
|
193
|
+
| 7 | [`07_multitenant_admin.py`](examples/07_multitenant_admin.py) | Provision an org, call as that tenant. |
|
|
194
|
+
| 8 | [`harness_warmup.py`](examples/harness_warmup.py) | The `minima_harness` agent loop (demo mode needs no keys). |
|
|
195
|
+
|
|
196
|
+
## Agent harness
|
|
197
|
+
|
|
198
|
+
[`minima_harness/`](src/minima_harness) is a lean Python port of
|
|
199
|
+
[`@earendil-works/pi`](https://github.com/earendil-works/pi)'s agent toolkit, made
|
|
200
|
+
Minima-native: an `Agent` runtime with tool calling **plus** a `MinimaAgent` that routes
|
|
201
|
+
every prompt through Minima and feeds the realized tokens/cost/quality back. It is the
|
|
202
|
+
"run the model yourself" half of the Minima loop, packaged.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from minima_harness.minima import MinimaAgent, HarnessConfig
|
|
206
|
+
|
|
207
|
+
agent = MinimaAgent(HarnessConfig.from_env()) # MINIMA_URL, candidates, judge policy
|
|
208
|
+
await agent.prompt("Summarize this incident.", task_type="summarization", slider=3)
|
|
209
|
+
# -> Minima picked the model, the agent ran it, judged quality, fed the outcome back
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Try it with no keys via the in-process demo:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
uv run python examples/harness_warmup.py # demo (in-process Minima + fake provider)
|
|
216
|
+
uv run python examples/harness_warmup.py --live # real Minima + real providers
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Full architecture, the loop mapping, and extension guide:
|
|
220
|
+
**[Agent Harness](docs/harness.md)**.
|
|
221
|
+
|
|
222
|
+
## Configuration
|
|
223
|
+
|
|
224
|
+
All configuration is via environment variables (see [`.env.example`](.env.example) and
|
|
225
|
+
[Configuration](docs/configuration.md)). The only required value is `MUBIT_API_KEY` (in
|
|
226
|
+
single-tenant mode). Notable knobs:
|
|
227
|
+
|
|
228
|
+
- `MINIMA_USE_OBSERVED_COST` / `MINIMA_OBSERVED_COST_MIN_N` — rank by realized cost.
|
|
229
|
+
- `MINIMA_REASONER_PROVIDER` — enable the cheap-LLM escalation tier (`anthropic` / `gemini`).
|
|
230
|
+
- `MINIMA_RECOMMENDATION_STORE=sqlite` — durable recommendation resolution (multi-worker).
|
|
231
|
+
- `MINIMA_MULTITENANT` — serve many orgs from one deployment.
|
|
232
|
+
|
|
233
|
+
## Development
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
make install # uv sync --extra dev
|
|
237
|
+
make test # unit + integration (no Mubit needed)
|
|
238
|
+
make lint # ruff + mypy
|
|
239
|
+
make live # end-to-end against a running Mubit (pytest -m live)
|
|
240
|
+
make eval # offline RouterBench savings evaluation (pytest -m eval)
|
|
241
|
+
make fmt # ruff --fix + format
|
|
242
|
+
make seed # minima-seed (LIMIT=, LANE= overridable)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Project layout
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
src/minima/
|
|
249
|
+
api/routers/ recommend · feedback · models · strategies · health · admin
|
|
250
|
+
recommender/ engine · classify · aggregate · score · escalation · propensity · recstore
|
|
251
|
+
memory/ adapter (only Mubit touchpoint) · records · keys · threadpool
|
|
252
|
+
catalog/ store · merge · refresh · sources/{litellm,openrouter} · data/*.json
|
|
253
|
+
llm/ base · anthropic · gemini · registry (the escalation reasoner)
|
|
254
|
+
tenancy/ runtime · registry · context · keys · secrets
|
|
255
|
+
seeding/ routerbench · synthetic · run_seed (minima-seed CLI)
|
|
256
|
+
schemas/ common · recommend · workflow · feedback · models_catalog · strategies · admin
|
|
257
|
+
src/minima_harness/ ported pi-ai (ai/) + pi-agent-core (agent/) + Minima integration (minima/) — see docs/harness.md
|
|
258
|
+
client_sdk/minima_client/ client (sync+async) · autocapture · errors
|
|
259
|
+
docs/ full documentation examples/ runnable examples
|
|
260
|
+
tests/ unit · integration (FakeMemory) · live (-m live) · eval (-m eval)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## License
|
|
264
|
+
|
|
265
|
+
Minima is **source-available** under the [Functional Source License,
|
|
266
|
+
Version 1.1, Apache 2.0 Future License](LICENSE) (`FSL-1.1-Apache-2.0`).
|
|
267
|
+
|
|
268
|
+
You may use, copy, modify, and self-host Minima for any **Permitted Purpose** —
|
|
269
|
+
internal use, non-commercial education/research, and professional services for a
|
|
270
|
+
licensee. The one restriction is a **Competing Use**: you may not offer Minima
|
|
271
|
+
(or a substantially similar product/service) to others as a commercial or hosted
|
|
272
|
+
offering that competes with us. Two years after each version is published, that
|
|
273
|
+
version automatically converts to the **Apache License 2.0**.
|
|
274
|
+
|
|
275
|
+
Copyright 2026 Mubit.
|