minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
@@ -0,0 +1,56 @@
1
+ """CLI: ``minima-seed`` — bulk-load cold-start outcome records into Mubit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import asyncio
7
+
8
+ from minima.catalog.store import load_aliases
9
+ from minima.config import get_settings
10
+ from minima.memory.adapter import MubitMemory
11
+ from minima.seeding import routerbench, synthetic
12
+ from minima.seeding.items import SeedItem, build_item, chunked
13
+
14
+
15
+ def _load(dataset: str, limit: int) -> list[SeedItem]:
16
+ if dataset == "synthetic":
17
+ return synthetic.generate(limit)
18
+ return routerbench.load_records(limit, load_aliases())
19
+
20
+
21
+ async def _seed(args: argparse.Namespace) -> None:
22
+ settings = get_settings()
23
+ memory = MubitMemory(settings)
24
+ lane = args.lane or settings.minima_seed_lane
25
+
26
+ seeds = _load(args.dataset, args.limit)
27
+ items = [build_item(s) for s in seeds]
28
+ print(f"prepared {len(items)} records from '{args.dataset}' -> lane '{lane}'")
29
+
30
+ if args.dry_run:
31
+ for item in items[:3]:
32
+ print(item)
33
+ print("dry-run: nothing written")
34
+ return
35
+
36
+ inserted = 0
37
+ for batch in chunked(items, args.chunk):
38
+ result = await memory.batch_insert(run_id=lane, items=batch, deduplicate=True)
39
+ inserted += int(result.get("count", 0))
40
+ print(f"inserted {inserted}/{len(items)}")
41
+ print(f"done: {inserted} records into lane '{lane}'")
42
+
43
+
44
+ def main() -> None:
45
+ parser = argparse.ArgumentParser(description="Seed Minima cold-start memory into Mubit.")
46
+ parser.add_argument("--dataset", choices=["routerbench", "synthetic"], default="routerbench")
47
+ parser.add_argument("--limit", type=int, default=2000)
48
+ parser.add_argument("--lane", default=None, help="memory lane (default: MINIMA_SEED_LANE)")
49
+ parser.add_argument("--chunk", type=int, default=200)
50
+ parser.add_argument("--dry-run", action="store_true")
51
+ args = parser.parse_args()
52
+ asyncio.run(_seed(args))
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()
@@ -0,0 +1,70 @@
1
+ """Deterministic synthetic seed data.
2
+
3
+ Generates plausible (task -> model -> outcome) records where cheaper/weaker models
4
+ succeed on easy tasks and fail on hard ones. Useful for smoke-testing the full
5
+ ingest -> recall -> reinforce loop without network or an external dataset.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import random
11
+
12
+ from minima.catalog.store import load_snapshot_cards
13
+ from minima.memory.keys import build_content, task_cluster, task_fingerprint
14
+ from minima.memory.records import OutcomeRecord
15
+ from minima.schemas.common import Difficulty, TaskType
16
+ from minima.seeding.items import SeedItem
17
+
18
+ _DIFFICULTY_REQUIREMENT = {
19
+ Difficulty.easy: 0.5,
20
+ Difficulty.medium: 0.7,
21
+ Difficulty.hard: 0.85,
22
+ }
23
+
24
+
25
+ def generate(n: int, seed: int = 42) -> list[SeedItem]:
26
+ cards, _ = load_snapshot_cards()
27
+ rng = random.Random(seed)
28
+ task_types = list(TaskType)
29
+ difficulties = list(_DIFFICULTY_REQUIREMENT)
30
+ out: list[SeedItem] = []
31
+
32
+ for i in range(n):
33
+ task_type = rng.choice(task_types)
34
+ difficulty = rng.choice(difficulties)
35
+ card = rng.choice(cards)
36
+
37
+ text = (
38
+ f"Synthetic {task_type.value} task #{i} at {difficulty.value} difficulty: "
39
+ f"handle the {task_type.value} request described here."
40
+ )
41
+ prior = card.capability_by_task_type.get(task_type, 0.5)
42
+ success = prior >= _DIFFICULTY_REQUIREMENT[difficulty]
43
+ quality = 0.9 if success else 0.2
44
+ cost = (1200 / 1_000_000) * card.input_cost_per_mtok + (
45
+ 400 / 1_000_000
46
+ ) * card.output_cost_per_mtok
47
+
48
+ record = OutcomeRecord(
49
+ model_id=card.model_id,
50
+ provider=card.provider,
51
+ task_type=task_type.value,
52
+ difficulty=difficulty.value,
53
+ task_fingerprint=task_fingerprint(text),
54
+ task_cluster=task_cluster(task_type.value, difficulty.value),
55
+ input_tokens=1200,
56
+ output_tokens=400,
57
+ cost_usd=round(cost, 6),
58
+ quality_score=quality,
59
+ outcome="success" if success else "failure",
60
+ source_dataset="synthetic",
61
+ )
62
+ out.append(
63
+ SeedItem(
64
+ item_id=f"syn-{i}",
65
+ content=build_content(task_type.value, difficulty.value, text),
66
+ record=record,
67
+ env_tags=["seed:synthetic"],
68
+ )
69
+ )
70
+ return out
@@ -0,0 +1,8 @@
1
+ """Tenancy: pass-through auth — callers use their Mubit API key directly."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from minima.tenancy.context import TenantContext
6
+ from minima.tenancy.passthrough import PassthroughRuntime
7
+
8
+ __all__ = ["PassthroughRuntime", "TenantContext"]
@@ -0,0 +1,37 @@
1
+ """The per-request tenant context: everything resolved from the caller's credential."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from minima.memory.adapter import Memory
8
+ from minima.recommender.decisionlog import DecisionLog
9
+ from minima.recommender.durablerefs import DurableRefs
10
+ from minima.recommender.engine import Recommender
11
+ from minima.recommender.recstore import LaneCounter, RecStore
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class TenantContext:
16
+ """Resolved per-request scope. In single-tenant mode there is one of these
17
+ (``org_id="default"``) wrapping the process singletons; in multi-tenant mode one is
18
+ built/cached per org and bound to that org's own Mubit instance."""
19
+
20
+ org_id: str
21
+ memory: Memory
22
+ recommender: Recommender
23
+ recstore: RecStore
24
+ lane_counter: LaneCounter
25
+ lane_prefix: str
26
+ mubit_endpoint: str
27
+ decision_log: DecisionLog | None = None
28
+ durable_refs: DurableRefs | None = None
29
+
30
+ def lane(self, namespace: str | None) -> str:
31
+ """Intra-org sub-scope lane. The ORG boundary is the Mubit instance/key, not this
32
+ string — so namespace is a benign within-org partition (team/project/env)."""
33
+ return f"{self.lane_prefix}:{namespace or 'default'}"
34
+
35
+ def counter_key(self, lane: str) -> str:
36
+ """Org-qualified key so reflection cadence never mixes across orgs."""
37
+ return f"{self.org_id}:{lane}"
@@ -0,0 +1,110 @@
1
+ """Pass-through auth: the caller's Mubit API key IS their credential.
2
+
3
+ No provisioning, no mnim_ keys. The caller passes their Mubit key as
4
+ ``Authorization: Bearer <mubit_key>``; Minima uses it directly against the
5
+ configured MUBIT_ENDPOINT. One TenantContext is built and cached per key.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ from collections.abc import Callable
12
+ from threading import Lock
13
+
14
+ from minima.catalog.store import CatalogStore
15
+ from minima.config import Settings
16
+ from minima.llm.base import Reasoner
17
+ from minima.memory.adapter import Memory, MubitMemory
18
+ from minima.recommender.decisionlog import DecisionLog, MemoryDecisionLog, OrgScopedDecisionLog
19
+ from minima.recommender.durablerefs import (
20
+ DurableRefs,
21
+ MemoryDurableRefs,
22
+ OrgScopedDurableRefs,
23
+ )
24
+ from minima.recommender.engine import Recommender
25
+ from minima.recommender.propensity import OrgScopedPropensity, Propensity
26
+ from minima.recommender.recstore import LaneCounter, OrgScopedRecStore, RecStore
27
+ from minima.tenancy.context import TenantContext
28
+
29
+
30
+ def _org_id(key: str) -> str:
31
+ """Derive a stable org_id from a Mubit key (mbt_<instance>_...) or its hash."""
32
+ parts = key.split("_", 3)
33
+ if len(parts) >= 4 and parts[0] == "mbt" and parts[1]:
34
+ return parts[1]
35
+ return hashlib.sha256(key.encode()).hexdigest()[:16]
36
+
37
+
38
+ class PassthroughRuntime:
39
+ """One process-wide runtime; per-key TenantContexts are lazily built and cached."""
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ settings: Settings,
45
+ catalog_store: CatalogStore,
46
+ reasoner: Reasoner | None,
47
+ recstore_backend: RecStore,
48
+ propensity_backend: Propensity,
49
+ lane_counter: LaneCounter,
50
+ memory_factory: Callable[[str], Memory] | None = None,
51
+ decision_log_backend: DecisionLog | None = None,
52
+ durable_refs_backend: DurableRefs | None = None,
53
+ ):
54
+ self._settings = settings
55
+ self._catalog_store = catalog_store
56
+ self._reasoner = reasoner
57
+ self._recstore_backend = recstore_backend
58
+ self._propensity_backend = propensity_backend
59
+ self._lane_counter = lane_counter
60
+ self._memory_factory = memory_factory
61
+ self._decision_log_backend = decision_log_backend or MemoryDecisionLog(
62
+ settings.minima_decision_log_retention_days
63
+ )
64
+ self._durable_refs_backend = durable_refs_backend or MemoryDurableRefs()
65
+ self._cache: dict[str, TenantContext] = {}
66
+ self._lock = Lock()
67
+
68
+ def resolve(self, mubit_api_key: str) -> TenantContext:
69
+ key_hash = hashlib.sha256(mubit_api_key.encode()).hexdigest()
70
+ with self._lock:
71
+ ctx = self._cache.get(key_hash)
72
+ if ctx is not None:
73
+ return ctx
74
+
75
+ org_id = _org_id(mubit_api_key)
76
+ if self._memory_factory is not None:
77
+ memory = self._memory_factory(mubit_api_key)
78
+ else:
79
+ memory = MubitMemory(self._settings, api_key=mubit_api_key)
80
+ scoped_recstore = OrgScopedRecStore(self._recstore_backend, org_id)
81
+ scoped_decision_log = OrgScopedDecisionLog(self._decision_log_backend, org_id)
82
+ scoped_durable_refs = OrgScopedDurableRefs(self._durable_refs_backend, org_id)
83
+ recommender = Recommender(
84
+ self._settings,
85
+ memory,
86
+ self._catalog_store,
87
+ scoped_recstore,
88
+ reasoner=self._reasoner,
89
+ propensity=OrgScopedPropensity(self._propensity_backend, org_id),
90
+ decision_log=scoped_decision_log,
91
+ org_id=org_id,
92
+ durable_refs=scoped_durable_refs,
93
+ )
94
+ ctx = TenantContext(
95
+ org_id=org_id,
96
+ memory=memory,
97
+ recommender=recommender,
98
+ recstore=scoped_recstore,
99
+ lane_counter=self._lane_counter,
100
+ lane_prefix=self._settings.minima_lane_prefix,
101
+ mubit_endpoint=self._settings.mubit_endpoint,
102
+ decision_log=scoped_decision_log,
103
+ durable_refs=scoped_durable_refs,
104
+ )
105
+ with self._lock:
106
+ existing = self._cache.get(key_hash)
107
+ if existing is not None:
108
+ return existing
109
+ self._cache[key_hash] = ctx
110
+ return ctx
minima/version.py ADDED
@@ -0,0 +1,3 @@
1
+ # Keep in sync with [project].version in pyproject.toml — this is what /v1/health and the
2
+ # FastAPI app report at runtime.
3
+ __version__ = "0.4.9"
@@ -0,0 +1,275 @@
1
+ Metadata-Version: 2.4
2
+ Name: minima-cli
3
+ Version: 0.4.9
4
+ Summary: Minima CLI: cost-aware LLM model routing — recommend cheaper models, backed by Mubit memory.
5
+ Project-URL: Homepage, https://docs.minima.sh
6
+ Project-URL: Documentation, https://docs.minima.sh
7
+ Project-URL: Repository, https://github.com/mubit-ai/minima
8
+ Project-URL: Issues, https://github.com/mubit-ai/minima/issues
9
+ Author: Mubit
10
+ License: FSL-1.1-Apache-2.0
11
+ License-File: LICENSE
12
+ Keywords: agent,cost-optimization,llm,model-router,mubit,routing
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: anyio>=4.4
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: mubit-sdk>=0.10.0
25
+ Requires-Dist: pydantic-settings>=2.3
26
+ Requires-Dist: pydantic>=2.7
27
+ Requires-Dist: structlog>=24.1
28
+ Requires-Dist: tenacity>=8.4
29
+ Provides-Extra: dev
30
+ Requires-Dist: anthropic>=0.40; extra == 'dev'
31
+ Requires-Dist: fastapi>=0.115; extra == 'dev'
32
+ Requires-Dist: google-genai>=0.3; extra == 'dev'
33
+ Requires-Dist: keyring>=24; extra == 'dev'
34
+ Requires-Dist: mypy>=1.10; extra == 'dev'
35
+ Requires-Dist: psycopg2-binary>=2.9; extra == 'dev'
36
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
37
+ Requires-Dist: pytest>=8.2; extra == 'dev'
38
+ Requires-Dist: redis>=5.0; extra == 'dev'
39
+ Requires-Dist: respx>=0.21; extra == 'dev'
40
+ Requires-Dist: ruff>=0.5; extra == 'dev'
41
+ Requires-Dist: textual>=3.0; extra == 'dev'
42
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'dev'
43
+ Provides-Extra: harness
44
+ Requires-Dist: anthropic>=0.40; extra == 'harness'
45
+ Requires-Dist: google-genai>=0.3; extra == 'harness'
46
+ Provides-Extra: reasoner-anthropic
47
+ Requires-Dist: anthropic>=0.40; extra == 'reasoner-anthropic'
48
+ Provides-Extra: reasoner-gemini
49
+ Requires-Dist: google-genai>=0.3; extra == 'reasoner-gemini'
50
+ Provides-Extra: seed
51
+ Requires-Dist: datasets>=2.20; extra == 'seed'
52
+ Requires-Dist: huggingface-hub>=0.24; extra == 'seed'
53
+ Provides-Extra: server
54
+ Requires-Dist: fastapi>=0.115; extra == 'server'
55
+ Requires-Dist: psycopg2-binary>=2.9; extra == 'server'
56
+ Requires-Dist: redis>=5.0; extra == 'server'
57
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'server'
58
+ Provides-Extra: tui
59
+ Requires-Dist: keyring>=24; extra == 'tui'
60
+ Requires-Dist: textual>=3.0; extra == 'tui'
61
+ Description-Content-Type: text/markdown
62
+
63
+ # Minima
64
+
65
+ **Recommend a cheaper LLM model for each task, so LLM-driven workflows spend fewer tokens
66
+ without losing the quality the task actually needs.**
67
+
68
+ Minima **only recommends** — it never proxies a call, runs a model, rewrites a prompt, or
69
+ caches. It is a stack-agnostic advice layer backed by [Mubit](https://docs.mubit.ai) memory:
70
+ ask which model to use, run that model yourself, then tell Minima how it went. Because it
71
+ sits *beside* your call rather than in front of it, **it adds zero latency to your real LLM
72
+ request.**
73
+
74
+ ```
75
+ POST /v1/recommend ──▶ you run the model ──▶ POST /v1/feedback
76
+ (recall + rank) (your stack) (write outcome, reinforce memory)
77
+ ▲ │
78
+ └────────────── recommendations get sharper ─────────────┘
79
+ ```
80
+
81
+ ## Why it works
82
+
83
+ Minima is backed by [Mubit](https://mubit.ai) memory. Every `POST /v1/feedback` writes a `task → model → outcome` record; every `POST /v1/recommend` recalls the most similar past records and picks the cheapest model expected to clear a quality bar. The longer it runs, the sharper the picks.
84
+
85
+ A `cost_quality_tradeoff` slider (0 = cheapest acceptable, 10 = highest quality) moves the
86
+ bar. When memory is thin or conflicting, Minima can escalate to a cheap-LLM reasoner
87
+ (configurable, off by default).
88
+
89
+ ### Cost ranking that reflects reality
90
+
91
+ A flat token estimate assumes a fixed completion length, so it ignores reasoning/thinking
92
+ tokens and mis-ranks a model with cheap list prices but heavy internal reasoning. Minima
93
+ ranks candidates by what they **really** cost, choosing one basis for the whole candidate
94
+ set:
95
+
96
+ - **rescaled** (best) — this request's input priced + the model's *observed* output-token
97
+ behavior; size-exact **and** reasoning-aware.
98
+ - **observed** — robust median of realized `$/call` from recalled outcomes.
99
+ - **estimate** (cold start) — token estimate from catalog prices.
100
+
101
+ The basis climbs `estimate → observed → rescaled` as your `/feedback` calls accumulate
102
+ realized tokens and cost. See [Concepts → Cost-basis tiers](docs/concepts.md#cost-basis-tiers-estimate--observed--rescaled).
103
+
104
+ ## Endpoints
105
+
106
+ | Endpoint | Purpose |
107
+ |----------|---------|
108
+ | `POST /v1/recommend` | Recommend a model for one task. |
109
+ | `POST /v1/recommend/workflow` | Recommend a model per step of a multi-step workflow. |
110
+ | `POST /v1/feedback` | Report an outcome and close the learning loop. |
111
+ | `GET /v1/models` | The current model catalog (cost + capability priors). |
112
+ | `GET /v1/strategies` | Rules Mubit has promoted for a namespace (explainability). |
113
+ | `GET /v1/health` | Service, Mubit, catalog, and reasoner status. |
114
+ | `POST\|GET\|DELETE /v1/admin/tenants` | Tenant provisioning (multi-tenant mode only). |
115
+
116
+ Full schemas, fields, warnings, and error formats: **[API Reference](docs/api-reference.md)**.
117
+
118
+ ## Quickstart
119
+
120
+ ```bash
121
+ uv sync --extra dev
122
+ cp .env.example .env # set MUBIT_API_KEY (+ MUBIT_ENDPOINT if not local)
123
+
124
+ # optional: seed cold-start memory so day-one picks are grounded
125
+ uv run minima-seed --dataset synthetic --limit 2000 --lane minima:default
126
+
127
+ make run # uvicorn on :8080 (interactive docs at /docs)
128
+ ```
129
+
130
+ ```bash
131
+ # recommend
132
+ curl -s localhost:8080/v1/recommend -H 'content-type: application/json' -d '{
133
+ "task": {"task": "Summarize this incident report into 3 bullets.",
134
+ "task_type": "summarization"},
135
+ "cost_quality_tradeoff": 3
136
+ }' | jq
137
+
138
+ # ...run the recommended model yourself, then close the loop
139
+ curl -s localhost:8080/v1/feedback -H 'content-type: application/json' -d '{
140
+ "recommendation_id": "<from above>", "chosen_model_id": "claude-haiku-4-5",
141
+ "outcome": "success", "quality_score": 0.95,
142
+ "input_tokens": 1760, "output_tokens": 110, "actual_cost_usd": 0.0021,
143
+ "verified_in_production": true
144
+ }' | jq
145
+ ```
146
+
147
+ Minima talks to a Mubit runtime at `MUBIT_ENDPOINT` (defaults to `http://127.0.0.1:3000`;
148
+ start one with `make run-mubit` in the Mubit repo) and uses Mubit's server-side embeddings,
149
+ so it needs no embedding model of its own.
150
+
151
+ ## Python client
152
+
153
+ ```python
154
+ from minima_client import MinimaClient
155
+
156
+ with MinimaClient("http://localhost:8080") as minima:
157
+ rec = minima.recommend("Write a Python CSV parser.", cost_quality_tradeoff=3)
158
+ # ... run rec.recommended_model.model_id yourself ...
159
+ minima.feedback(rec.recommendation_id, rec.recommended_model.model_id, "success",
160
+ quality_score=0.95, input_tokens=180, output_tokens=640,
161
+ actual_cost_usd=0.0034, verified_in_production=True)
162
+ ```
163
+
164
+ Sync + async clients and zero-code `autocapture`: **[Python Client SDK](docs/client-sdk.md)**.
165
+
166
+ ## Documentation
167
+
168
+ | Doc | What's in it |
169
+ |-----|--------------|
170
+ | [Getting Started](docs/getting-started.md) | Install, configure, run, first recommendation. |
171
+ | [Concepts](docs/concepts.md) | The loop, the algorithm, cost-basis tiers, escalation, how it improves. |
172
+ | [API Reference](docs/api-reference.md) | Every endpoint, full schemas, warnings, errors. |
173
+ | [Configuration](docs/configuration.md) | Every environment variable + tuning guidance. |
174
+ | [Python Client SDK](docs/client-sdk.md) | `minima_client` clients + autocapture. |
175
+ | [Cold-Start Seeding](docs/seeding.md) | Load history so day-one picks are grounded. |
176
+ | [Multi-Tenancy](docs/multi-tenancy.md) | One deployment, many orgs, per-org Mubit instances. |
177
+ | [Operations](docs/operations.md) | Deployment, health, degradation, monitoring, secrets. |
178
+ | [Examples](docs/examples.md) | Guided tour of the runnable examples. |
179
+ | [Agent Harness](docs/harness.md) | `minima_harness`: a Minima-routing port of PI's agent toolkit. |
180
+
181
+ ## Examples
182
+
183
+ Runnable, progressively advanced — in **[`examples/`](examples/)**:
184
+
185
+ | # | Example | Shows |
186
+ |---|---------|-------|
187
+ | 1 | [`01_quickstart.sh`](examples/01_quickstart.sh) | Raw `curl` against every endpoint. |
188
+ | 2 | [`02_recommend_and_feedback.py`](examples/02_recommend_and_feedback.py) | The core loop with the SDK. |
189
+ | 3 | [`03_constraints_and_tradeoff.py`](examples/03_constraints_and_tradeoff.py) | Constraints + slider sweep. |
190
+ | 4 | [`04_workflow.py`](examples/04_workflow.py) | Per-step workflow recommendations. |
191
+ | 5 | [`05_autocapture.py`](examples/05_autocapture.py) | Zero-code intake via `mubit.learn`. |
192
+ | 6 | [`06_routed_llm_call.py`](examples/06_routed_llm_call.py) | Routing a real Claude call + feedback. |
193
+ | 7 | [`07_multitenant_admin.py`](examples/07_multitenant_admin.py) | Provision an org, call as that tenant. |
194
+ | 8 | [`harness_warmup.py`](examples/harness_warmup.py) | The `minima_harness` agent loop (demo mode needs no keys). |
195
+
196
+ ## Agent harness
197
+
198
+ [`minima_harness/`](src/minima_harness) is a lean Python port of
199
+ [`@earendil-works/pi`](https://github.com/earendil-works/pi)'s agent toolkit, made
200
+ Minima-native: an `Agent` runtime with tool calling **plus** a `MinimaAgent` that routes
201
+ every prompt through Minima and feeds the realized tokens/cost/quality back. It is the
202
+ "run the model yourself" half of the Minima loop, packaged.
203
+
204
+ ```python
205
+ from minima_harness.minima import MinimaAgent, HarnessConfig
206
+
207
+ agent = MinimaAgent(HarnessConfig.from_env()) # MINIMA_URL, candidates, judge policy
208
+ await agent.prompt("Summarize this incident.", task_type="summarization", slider=3)
209
+ # -> Minima picked the model, the agent ran it, judged quality, fed the outcome back
210
+ ```
211
+
212
+ Try it with no keys via the in-process demo:
213
+
214
+ ```bash
215
+ uv run python examples/harness_warmup.py # demo (in-process Minima + fake provider)
216
+ uv run python examples/harness_warmup.py --live # real Minima + real providers
217
+ ```
218
+
219
+ Full architecture, the loop mapping, and extension guide:
220
+ **[Agent Harness](docs/harness.md)**.
221
+
222
+ ## Configuration
223
+
224
+ All configuration is via environment variables (see [`.env.example`](.env.example) and
225
+ [Configuration](docs/configuration.md)). The only required value is `MUBIT_API_KEY` (in
226
+ single-tenant mode). Notable knobs:
227
+
228
+ - `MINIMA_USE_OBSERVED_COST` / `MINIMA_OBSERVED_COST_MIN_N` — rank by realized cost.
229
+ - `MINIMA_REASONER_PROVIDER` — enable the cheap-LLM escalation tier (`anthropic` / `gemini`).
230
+ - `MINIMA_RECOMMENDATION_STORE=sqlite` — durable recommendation resolution (multi-worker).
231
+ - `MINIMA_MULTITENANT` — serve many orgs from one deployment.
232
+
233
+ ## Development
234
+
235
+ ```bash
236
+ make install # uv sync --extra dev
237
+ make test # unit + integration (no Mubit needed)
238
+ make lint # ruff + mypy
239
+ make live # end-to-end against a running Mubit (pytest -m live)
240
+ make eval # offline RouterBench savings evaluation (pytest -m eval)
241
+ make fmt # ruff --fix + format
242
+ make seed # minima-seed (LIMIT=, LANE= overridable)
243
+ ```
244
+
245
+ ## Project layout
246
+
247
+ ```
248
+ src/minima/
249
+ api/routers/ recommend · feedback · models · strategies · health · admin
250
+ recommender/ engine · classify · aggregate · score · escalation · propensity · recstore
251
+ memory/ adapter (only Mubit touchpoint) · records · keys · threadpool
252
+ catalog/ store · merge · refresh · sources/{litellm,openrouter} · data/*.json
253
+ llm/ base · anthropic · gemini · registry (the escalation reasoner)
254
+ tenancy/ runtime · registry · context · keys · secrets
255
+ seeding/ routerbench · synthetic · run_seed (minima-seed CLI)
256
+ schemas/ common · recommend · workflow · feedback · models_catalog · strategies · admin
257
+ src/minima_harness/ ported pi-ai (ai/) + pi-agent-core (agent/) + Minima integration (minima/) — see docs/harness.md
258
+ client_sdk/minima_client/ client (sync+async) · autocapture · errors
259
+ docs/ full documentation examples/ runnable examples
260
+ tests/ unit · integration (FakeMemory) · live (-m live) · eval (-m eval)
261
+ ```
262
+
263
+ ## License
264
+
265
+ Minima is **source-available** under the [Functional Source License,
266
+ Version 1.1, Apache 2.0 Future License](LICENSE) (`FSL-1.1-Apache-2.0`).
267
+
268
+ You may use, copy, modify, and self-host Minima for any **Permitted Purpose** —
269
+ internal use, non-commercial education/research, and professional services for a
270
+ licensee. The one restriction is a **Competing Use**: you may not offer Minima
271
+ (or a substantially similar product/service) to others as a commercial or hosted
272
+ offering that competes with us. Two years after each version is published, that
273
+ version automatically converts to the **Apache License 2.0**.
274
+
275
+ Copyright 2026 Mubit.