minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
minima/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Minima — Mubit-backed LLM model recommender (recommend-only)."""
2
+
3
+ from minima.version import __version__
4
+
5
+ __all__ = ["__version__"]
minima/api/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """HTTP API layer."""
minima/api/auth.py ADDED
@@ -0,0 +1,39 @@
1
+ """Auth dependency: resolve the caller's Mubit API key to a TenantContext.
2
+
3
+ Pass-through mode: the caller presents their own Mubit key as
4
+ ``Authorization: Bearer <mubit_api_key>``. Minima uses it directly against
5
+ MUBIT_ENDPOINT; no Minima-issued keys, no provisioning step.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from fastapi import Request
11
+
12
+ from minima.api.errors import ApiError
13
+ from minima.tenancy.context import TenantContext
14
+
15
+
16
+ def bearer_key(request: Request) -> str | None:
17
+ auth = request.headers.get("authorization", "")
18
+ if auth.lower().startswith("bearer "):
19
+ return auth[7:].strip() or None
20
+ return None
21
+
22
+
23
+ async def get_tenant(request: Request) -> TenantContext:
24
+ key = bearer_key(request)
25
+ if not key:
26
+ raise ApiError(
27
+ 401,
28
+ "Unauthorized",
29
+ "pass your Mubit API key as: Authorization: Bearer <key>",
30
+ )
31
+ return request.app.state.passthrough_runtime.resolve(key)
32
+
33
+
34
+ async def get_tenant_optional(request: Request) -> TenantContext | None:
35
+ """Like get_tenant but returns None instead of 401 (for health probes)."""
36
+ key = bearer_key(request)
37
+ if not key:
38
+ return None
39
+ return request.app.state.passthrough_runtime.resolve(key)
minima/api/errors.py ADDED
@@ -0,0 +1,40 @@
1
+ """RFC7807-style problem+json error handlers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.responses import JSONResponse
7
+
8
+ from minima.recommender.engine import NoCandidatesError
9
+
10
+
11
+ class ApiError(Exception):
12
+ """A problem+json error with an explicit status/title (e.g. auth failures)."""
13
+
14
+ def __init__(self, status: int, title: str, detail: str):
15
+ self.status = status
16
+ self.title = title
17
+ self.detail = detail
18
+ super().__init__(detail)
19
+
20
+
21
+ def _problem(status: int, title: str, detail: str) -> JSONResponse:
22
+ return JSONResponse(
23
+ status_code=status,
24
+ content={"type": "about:blank", "title": title, "status": status, "detail": detail},
25
+ media_type="application/problem+json",
26
+ )
27
+
28
+
29
+ def register_error_handlers(app: FastAPI) -> None:
30
+ @app.exception_handler(ApiError)
31
+ async def _api_error(_request: Request, exc: ApiError) -> JSONResponse:
32
+ return _problem(exc.status, exc.title, exc.detail)
33
+
34
+ @app.exception_handler(NoCandidatesError)
35
+ async def _no_candidates(_request: Request, exc: NoCandidatesError) -> JSONResponse:
36
+ return _problem(422, "No candidate models", str(exc))
37
+
38
+ @app.exception_handler(ValueError)
39
+ async def _value_error(_request: Request, exc: ValueError) -> JSONResponse:
40
+ return _problem(400, "Invalid request", str(exc))
@@ -0,0 +1 @@
1
+ """API route modules."""
@@ -0,0 +1,50 @@
1
+ """Calibration endpoint — is predicted_success telling the truth for this org?"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+
7
+ from fastapi import APIRouter, Depends, Query
8
+
9
+ from minima.api.auth import get_tenant
10
+ from minima.config import Settings
11
+ from minima.deps import get_settings
12
+ from minima.metrics.calibration import calibration_by_task_type, cusum_flags, routing_health
13
+ from minima.schemas.savings import CalibrationResponse
14
+ from minima.tenancy.context import TenantContext
15
+
16
+ router = APIRouter(prefix="/v1", tags=["calibration"])
17
+
18
+ _SECONDS_PER_DAY = 86_400.0
19
+
20
+
21
+ @router.get("/calibration", response_model=CalibrationResponse)
22
+ async def calibration(
23
+ tenant: TenantContext = Depends(get_tenant),
24
+ settings: Settings = Depends(get_settings),
25
+ namespace: str | None = Query(None, description="restrict to one namespace lane"),
26
+ days: float | None = Query(None, gt=0, le=365, description="lookback window in days"),
27
+ ) -> CalibrationResponse:
28
+ window_days = days if days is not None else float(settings.minima_calibration_window_days)
29
+ since = time.time() - window_days * _SECONDS_PER_DAY
30
+ lane = f"{tenant.lane_prefix}:{namespace}" if namespace else None
31
+ rows = (
32
+ tenant.decision_log.rows(since=since, lane=lane)
33
+ if tenant.decision_log is not None
34
+ else []
35
+ )
36
+ return CalibrationResponse(
37
+ org_id=tenant.org_id,
38
+ since=since,
39
+ days=window_days,
40
+ namespace=namespace,
41
+ health=routing_health(rows),
42
+ reports=calibration_by_task_type(
43
+ rows,
44
+ n_bins=settings.minima_calibration_bins,
45
+ shrinkage_k=settings.minima_calibration_shrinkage_k,
46
+ ),
47
+ drift_flags=cusum_flags(
48
+ rows, k=settings.minima_cusum_k, h=settings.minima_cusum_h
49
+ ),
50
+ )
@@ -0,0 +1,279 @@
1
+ """Feedback endpoint — writes the outcome to Mubit and closes the learning loop."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import time
7
+
8
+ from fastapi import APIRouter, Depends
9
+
10
+ from minima.api.auth import get_tenant
11
+ from minima.config import Settings
12
+ from minima.deps import get_settings
13
+ from minima.logging import get_logger
14
+ from minima.memory.adapter import Memory
15
+ from minima.memory.keys import (
16
+ build_lesson_content,
17
+ lesson_upsert_key,
18
+ outcome_idempotency_key,
19
+ outcome_upsert_key,
20
+ )
21
+ from minima.memory.records import (
22
+ OutcomeRecord,
23
+ quality_from_outcome,
24
+ reconcile_quality,
25
+ signal_from_outcome,
26
+ )
27
+ from minima.recommender.decisionlog import DecisionRecord, Reconciliation
28
+ from minima.schemas.common import OutcomeLabel
29
+ from minima.schemas.feedback import FeedbackRequest, FeedbackResponse
30
+ from minima.tenancy.context import TenantContext
31
+
32
+ log = get_logger("minima.feedback")
33
+ router = APIRouter(prefix="/v1", tags=["feedback"])
34
+
35
+
36
+ def _fire_reflect(memory: Memory, lane: str, user_id: str | None) -> None:
37
+ async def _run() -> None:
38
+ try:
39
+ await memory.reflect(lane=lane, user_id=user_id)
40
+ except Exception as exc: # noqa: BLE001
41
+ log.warning("reflect_failed", lane=lane, error=str(exc))
42
+
43
+ asyncio.create_task(_run()) # noqa: RUF006 — fire-and-forget, errors are logged
44
+
45
+
46
+ @router.post("/feedback", response_model=FeedbackResponse)
47
+ async def feedback(
48
+ req: FeedbackRequest,
49
+ tenant: TenantContext = Depends(get_tenant),
50
+ settings: Settings = Depends(get_settings),
51
+ ) -> FeedbackResponse:
52
+ memory = tenant.memory
53
+ # Org-scoped store: a recommendation_id minted for another org resolves to None here,
54
+ # so org A cannot credit or poison org B's recommendation.
55
+ stored = tenant.recstore.get(req.recommendation_id)
56
+ if stored is None:
57
+ # Degraded late-feedback path: the recstore TTL expired but the decision log
58
+ # (longer retention) still knows the recommendation. The outcome record is still
59
+ # written (the durable (cluster, model) upsert keeps learning); only neighbor
60
+ # attribution and lesson promotion are skipped — the recalled-neighbor ids lived
61
+ # in the recstore alone.
62
+ if settings.minima_late_feedback_enabled and tenant.decision_log is not None:
63
+ decision = tenant.decision_log.get(req.recommendation_id)
64
+ if decision is not None:
65
+ return await _late_feedback(req, tenant, decision)
66
+ return FeedbackResponse(accepted=False, warnings=["unknown_recommendation"])
67
+
68
+ quality = quality_from_outcome(req.outcome.value, req.quality_score)
69
+ quality, mismatch = reconcile_quality(req.outcome.value, quality)
70
+ signal = signal_from_outcome(req.outcome.value, quality)
71
+ is_success = req.outcome == OutcomeLabel.success
72
+ warnings: list[str] = []
73
+ if mismatch:
74
+ warnings.append(mismatch)
75
+ log.warning(
76
+ "quality_outcome_mismatch",
77
+ outcome=req.outcome.value,
78
+ supplied_quality=req.quality_score,
79
+ clamped_quality=quality,
80
+ model_id=req.chosen_model_id,
81
+ cluster=stored.task_cluster,
82
+ )
83
+
84
+ record = OutcomeRecord(
85
+ model_id=req.chosen_model_id,
86
+ task_type=stored.task_type,
87
+ difficulty=stored.difficulty,
88
+ task_fingerprint=stored.task_fingerprint,
89
+ task_cluster=stored.task_cluster,
90
+ input_tokens=req.input_tokens or 0,
91
+ output_tokens=req.output_tokens or 0,
92
+ cost_usd=req.actual_cost_usd or 0.0,
93
+ latency_ms=req.latency_ms,
94
+ iterations=req.iterations,
95
+ quality_score=quality,
96
+ outcome=req.outcome.value,
97
+ recommendation_id=req.recommendation_id,
98
+ verified_in_production=req.verified_in_production,
99
+ recorded_at=time.time(),
100
+ )
101
+ upsert_key = outcome_upsert_key(stored.task_cluster, req.chosen_model_id)
102
+ idem = req.idempotency_key or outcome_idempotency_key(
103
+ req.recommendation_id, req.chosen_model_id
104
+ )
105
+ importance = "high" if (req.verified_in_production and is_success) else "medium"
106
+
107
+ try:
108
+ record_id = await memory.remember_outcome(
109
+ content=stored.content,
110
+ record=record,
111
+ lane=stored.lane,
112
+ upsert_key=upsert_key,
113
+ idempotency_key=idem,
114
+ user_id=stored.user_id,
115
+ env_tags=stored.env_tags or None,
116
+ importance=importance,
117
+ source="human",
118
+ )
119
+ except Exception as exc: # noqa: BLE001
120
+ log.warning("remember_outcome_failed", error=str(exc))
121
+ return FeedbackResponse(accepted=False, warnings=["memory_write_failed"])
122
+
123
+ # The upserted (cluster, model) record's id is stable across feedbacks and
124
+ # dereferenceable — remember it for the exact-match recall fast path.
125
+ if record_id and tenant.durable_refs is not None:
126
+ try:
127
+ tenant.durable_refs.upsert(
128
+ stored.lane, stored.task_cluster, req.chosen_model_id, record_id, record_id
129
+ )
130
+ except Exception as exc: # noqa: BLE001 — bookkeeping must never fail feedback
131
+ log.warning("durable_ref_upsert_failed", error=str(exc))
132
+
133
+ neighbors = stored.neighbors_by_model.get(req.chosen_model_id, [])
134
+ entry_ids = [eid for (eid, _ref) in neighbors if eid]
135
+ primary_ref = next((ref for (_eid, ref) in neighbors if ref), None) or record_id
136
+
137
+ updated_confidence: float | None = None
138
+ if primary_ref:
139
+ try:
140
+ oc = await memory.record_outcome(
141
+ lane=stored.lane,
142
+ reference_id=primary_ref,
143
+ outcome=req.outcome.value,
144
+ signal=signal,
145
+ entry_ids=entry_ids or None,
146
+ user_id=stored.user_id,
147
+ verified_in_production=req.verified_in_production,
148
+ idempotency_key=f"oc:{idem}",
149
+ rationale=f"minima feedback {req.recommendation_id}: ran {req.chosen_model_id}",
150
+ )
151
+ value = oc.get("updated_confidence")
152
+ updated_confidence = float(value) if value is not None else None
153
+ except Exception as exc: # noqa: BLE001
154
+ log.warning("record_outcome_failed", error=str(exc))
155
+ warnings.append("reinforcement_failed")
156
+
157
+ # Promote a verified-in-production strong success to a durable Lesson. Lessons pass
158
+ # the server's validation gate and feed reflect()/surface_strategies rule promotion;
159
+ # a per-(cluster, model) upsert_key keeps one accumulating lesson instead of flooding.
160
+ lesson_promoted = False
161
+ if (
162
+ settings.minima_lesson_on_verified_prod
163
+ and req.verified_in_production
164
+ and is_success
165
+ and quality >= settings.minima_lesson_min_quality
166
+ ):
167
+ try:
168
+ await memory.remember_lesson(
169
+ content=build_lesson_content(stored.task_cluster, req.chosen_model_id, quality),
170
+ lane=stored.lane,
171
+ upsert_key=lesson_upsert_key(stored.task_cluster, req.chosen_model_id),
172
+ user_id=stored.user_id,
173
+ env_tags=stored.env_tags or None,
174
+ metadata={
175
+ "kind": "lesson",
176
+ "task_cluster": stored.task_cluster,
177
+ "model_id": req.chosen_model_id,
178
+ "verified_in_production": True,
179
+ },
180
+ idempotency_key=f"lsn:{idem}",
181
+ )
182
+ lesson_promoted = True
183
+ except Exception as exc: # noqa: BLE001 — lesson promotion is best-effort
184
+ log.warning("lesson_promotion_failed", error=str(exc))
185
+ warnings.append("lesson_promotion_failed")
186
+
187
+ reflection_triggered = False
188
+ count = tenant.lane_counter.bump(tenant.counter_key(stored.lane))
189
+ every = settings.minima_reflect_every_n
190
+ if (every > 0 and count % every == 0) or (req.verified_in_production and not is_success):
191
+ _fire_reflect(memory, stored.lane, stored.user_id)
192
+ reflection_triggered = True
193
+
194
+ _reconcile_decision(tenant, req, quality, late=False)
195
+
196
+ return FeedbackResponse(
197
+ accepted=True,
198
+ record_id=record_id,
199
+ reinforced_entry_ids=entry_ids,
200
+ updated_confidence=updated_confidence,
201
+ reflection_triggered=reflection_triggered,
202
+ lesson_promoted=lesson_promoted,
203
+ warnings=warnings,
204
+ )
205
+
206
+
207
+ def _reconcile_decision(
208
+ tenant: TenantContext, req: FeedbackRequest, quality: float, *, late: bool
209
+ ) -> None:
210
+ """Fill the decision-log row's realized columns (best-effort analytics)."""
211
+ if tenant.decision_log is None:
212
+ return
213
+ try:
214
+ tenant.decision_log.reconcile(
215
+ req.recommendation_id,
216
+ Reconciliation(
217
+ model_id=req.chosen_model_id,
218
+ outcome=req.outcome.value,
219
+ quality=quality,
220
+ cost_usd=req.actual_cost_usd,
221
+ latency_ms=req.latency_ms,
222
+ ts=time.time(),
223
+ late=late,
224
+ ),
225
+ )
226
+ except Exception as exc: # noqa: BLE001 — analytics must never fail feedback
227
+ log.warning("decision_reconcile_failed", error=str(exc))
228
+
229
+
230
+ async def _late_feedback(
231
+ req: FeedbackRequest,
232
+ tenant: TenantContext,
233
+ decision: DecisionRecord,
234
+ ) -> FeedbackResponse:
235
+ """Accept feedback after recstore expiry: write the outcome, skip attribution."""
236
+ quality = quality_from_outcome(req.outcome.value, req.quality_score)
237
+ quality, mismatch = reconcile_quality(req.outcome.value, quality)
238
+ warnings = ["late_feedback_no_attribution"]
239
+ if mismatch:
240
+ warnings.append(mismatch)
241
+
242
+ record = OutcomeRecord(
243
+ model_id=req.chosen_model_id,
244
+ task_type=decision.task_type,
245
+ difficulty=decision.difficulty,
246
+ task_fingerprint=decision.fingerprint,
247
+ task_cluster=decision.cluster,
248
+ input_tokens=req.input_tokens or 0,
249
+ output_tokens=req.output_tokens or 0,
250
+ cost_usd=req.actual_cost_usd or 0.0,
251
+ latency_ms=req.latency_ms,
252
+ iterations=req.iterations,
253
+ quality_score=quality,
254
+ outcome=req.outcome.value,
255
+ recommendation_id=req.recommendation_id,
256
+ verified_in_production=req.verified_in_production,
257
+ recorded_at=time.time(),
258
+ )
259
+ idem = req.idempotency_key or outcome_idempotency_key(
260
+ req.recommendation_id, req.chosen_model_id
261
+ )
262
+ try:
263
+ record_id = await tenant.memory.remember_outcome(
264
+ content=decision.content,
265
+ record=record,
266
+ lane=decision.lane,
267
+ upsert_key=outcome_upsert_key(decision.cluster, req.chosen_model_id),
268
+ idempotency_key=idem,
269
+ user_id=decision.user_id,
270
+ env_tags=decision.env_tags or None,
271
+ importance="medium",
272
+ source="human",
273
+ )
274
+ except Exception as exc: # noqa: BLE001
275
+ log.warning("late_remember_outcome_failed", error=str(exc))
276
+ return FeedbackResponse(accepted=False, warnings=["memory_write_failed", *warnings])
277
+
278
+ _reconcile_decision(tenant, req, quality, late=True)
279
+ return FeedbackResponse(accepted=True, record_id=record_id, warnings=warnings)
@@ -0,0 +1,50 @@
1
+ """Health endpoint — always 200; reports degraded state in the body."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from fastapi import APIRouter, Depends
8
+
9
+ from minima.api.auth import get_tenant_optional
10
+ from minima.catalog.store import CatalogStore
11
+ from minima.config import Settings
12
+ from minima.deps import get_catalog_store, get_settings
13
+ from minima.tenancy.context import TenantContext
14
+ from minima.version import __version__
15
+
16
+ router = APIRouter(prefix="/v1", tags=["health"])
17
+
18
+
19
+ @router.get("/health")
20
+ async def health(
21
+ tenant: TenantContext | None = Depends(get_tenant_optional),
22
+ catalog_store: CatalogStore = Depends(get_catalog_store),
23
+ settings: Settings = Depends(get_settings),
24
+ ) -> dict[str, Any]:
25
+ catalog = catalog_store.get()
26
+ # In multi-tenant mode an unauthenticated probe still gets service liveness; the
27
+ # Mubit block is reported only when a valid Minima key resolves an org's instance.
28
+ if tenant is None:
29
+ mubit: dict[str, Any] = {"reachable": None, "scope": "unauthenticated"}
30
+ else:
31
+ mubit = await tenant.memory.health()
32
+ mubit["endpoint"] = tenant.mubit_endpoint
33
+ mubit["org_id"] = tenant.org_id
34
+ reachable = mubit.get("reachable")
35
+ return {
36
+ "status": "ok" if reachable or reachable is None else "degraded",
37
+ "mubit": mubit,
38
+ "auth": "passthrough",
39
+ "catalog": {
40
+ "version": catalog.version,
41
+ "cost_source": catalog.cost_source,
42
+ "stale": catalog.stale,
43
+ "models": len(catalog.cards),
44
+ },
45
+ "reasoner": {
46
+ "provider": settings.minima_reasoner_provider,
47
+ "configured": settings.reasoner_enabled,
48
+ },
49
+ "version": __version__,
50
+ }
@@ -0,0 +1,42 @@
1
+ """Model catalog endpoint."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter, Depends
6
+
7
+ from minima.catalog.store import CatalogStore
8
+ from minima.deps import get_catalog_store
9
+ from minima.schemas.common import TaskType
10
+ from minima.schemas.models_catalog import ModelsResponse
11
+
12
+ router = APIRouter(prefix="/v1", tags=["models"])
13
+
14
+
15
+ @router.get("/models", response_model=ModelsResponse)
16
+ async def list_models(
17
+ provider: str | None = None,
18
+ task_type: TaskType | None = None,
19
+ max_cost: float | None = None,
20
+ include_stale: bool = True,
21
+ catalog_store: CatalogStore = Depends(get_catalog_store),
22
+ ) -> ModelsResponse:
23
+ catalog = catalog_store.get()
24
+ cards = list(catalog.cards)
25
+
26
+ if provider:
27
+ cards = [c for c in cards if c.provider.lower() == provider.lower()]
28
+ if task_type is not None:
29
+ cards = [c for c in cards if task_type in c.capability_by_task_type]
30
+ if max_cost is not None:
31
+ cards = [c for c in cards if max(c.input_cost_per_mtok, c.output_cost_per_mtok) <= max_cost]
32
+ if not include_stale:
33
+ fresh = [c for c in cards if not c.cost_stale]
34
+ cards = fresh or cards # never return empty solely due to staleness
35
+
36
+ cards.sort(key=lambda c: c.input_cost_per_mtok)
37
+ return ModelsResponse(
38
+ models=cards,
39
+ catalog_version=catalog.version,
40
+ refreshed_at=catalog.refreshed_at,
41
+ stale=catalog.stale,
42
+ )
@@ -0,0 +1,66 @@
1
+ """Recommendation endpoints (per-call and per-workflow-step)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+
7
+ from fastapi import APIRouter, Depends
8
+
9
+ from minima.api.auth import get_tenant
10
+ from minima.schemas.recommend import RecommendRequest, RecommendResponse
11
+ from minima.schemas.workflow import (
12
+ StepRecommendation,
13
+ WorkflowRequest,
14
+ WorkflowResponse,
15
+ )
16
+ from minima.tenancy.context import TenantContext
17
+
18
+ router = APIRouter(prefix="/v1", tags=["recommend"])
19
+
20
+
21
+ @router.post("/recommend", response_model=RecommendResponse)
22
+ async def recommend(
23
+ req: RecommendRequest, tenant: TenantContext = Depends(get_tenant)
24
+ ) -> RecommendResponse:
25
+ return await tenant.recommender.recommend(req)
26
+
27
+
28
+ @router.post("/recommend/workflow", response_model=WorkflowResponse)
29
+ async def recommend_workflow(
30
+ req: WorkflowRequest, tenant: TenantContext = Depends(get_tenant)
31
+ ) -> WorkflowResponse:
32
+ rec = tenant.recommender
33
+ steps: list[StepRecommendation] = []
34
+ total = 0.0
35
+ premium = 0.0
36
+ confidences: list[float] = []
37
+
38
+ for step in req.steps:
39
+ constraints = (
40
+ step.constraints.merged_over(req.constraints) if step.constraints else req.constraints
41
+ )
42
+ sub = RecommendRequest(
43
+ task=step.task,
44
+ cost_quality_tradeoff=req.cost_quality_tradeoff,
45
+ constraints=constraints,
46
+ user_id=req.user_id,
47
+ namespace=req.namespace,
48
+ allow_llm_escalation=req.allow_llm_escalation,
49
+ )
50
+ resp = await rec.recommend(sub)
51
+ steps.append(StepRecommendation(step_id=step.step_id, recommendation=resp))
52
+ total += resp.recommended_model.est_cost_usd
53
+ premium += max(
54
+ (m.est_cost_usd for m in resp.ranked),
55
+ default=resp.recommended_model.est_cost_usd,
56
+ )
57
+ confidences.append(resp.confidence)
58
+
59
+ confidence = sum(confidences) / len(confidences) if confidences else 0.0
60
+ return WorkflowResponse(
61
+ workflow_recommendation_id=uuid.uuid4().hex,
62
+ steps=steps,
63
+ total_est_cost_usd=round(total, 8),
64
+ total_est_cost_if_all_premium=round(premium, 8),
65
+ confidence=round(confidence, 4),
66
+ )
@@ -0,0 +1,55 @@
1
+ """Savings endpoint — counterfactual cost accounting from the decision log.
2
+
3
+ Tenant-scoped: a caller sees only their own org's decisions (the decision log handed
4
+ to this router is already org-bound by the pass-through runtime).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+
11
+ from fastapi import APIRouter, Depends, Query
12
+
13
+ from minima.api.auth import get_tenant
14
+ from minima.metrics.calibration import routing_health
15
+ from minima.metrics.savings import group_rows, summarize
16
+ from minima.schemas.savings import SavingsGroup, SavingsResponse
17
+ from minima.tenancy.context import TenantContext
18
+
19
+ router = APIRouter(prefix="/v1", tags=["savings"])
20
+
21
+ _SECONDS_PER_DAY = 86_400.0
22
+
23
+
24
+ @router.get("/savings", response_model=SavingsResponse)
25
+ async def savings(
26
+ tenant: TenantContext = Depends(get_tenant),
27
+ namespace: str | None = Query(None, description="restrict to one namespace lane"),
28
+ days: float = Query(30.0, gt=0, le=365, description="lookback window in days"),
29
+ group_by: str | None = Query(
30
+ None, pattern="^(cluster|task_type|lane)$", description="optional breakdown"
31
+ ),
32
+ ) -> SavingsResponse:
33
+ since = time.time() - days * _SECONDS_PER_DAY
34
+ lane = f"{tenant.lane_prefix}:{namespace}" if namespace else None
35
+ rows = (
36
+ tenant.decision_log.rows(since=since, lane=lane)
37
+ if tenant.decision_log is not None
38
+ else []
39
+ )
40
+ summary = summarize(rows)
41
+ health = routing_health(rows)
42
+ groups = [
43
+ SavingsGroup(key=key, summary=summarize(group), health=routing_health(group))
44
+ for key, group in sorted(group_rows(rows, group_by).items())
45
+ ]
46
+ return SavingsResponse(
47
+ org_id=tenant.org_id,
48
+ since=since,
49
+ days=days,
50
+ namespace=namespace,
51
+ summary=summary,
52
+ health=health,
53
+ group_by=group_by,
54
+ groups=groups,
55
+ )
@@ -0,0 +1,33 @@
1
+ """Strategy-exposure endpoint — surfaces the rules Mubit has promoted for a namespace."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+
7
+ from fastapi import APIRouter, Depends, Query
8
+
9
+ from minima.api.auth import get_tenant
10
+ from minima.logging import get_logger
11
+ from minima.schemas.strategies import StrategiesResponse, Strategy
12
+ from minima.tenancy.context import TenantContext
13
+
14
+ log = get_logger("minima.strategies")
15
+ router = APIRouter(prefix="/v1", tags=["strategies"])
16
+
17
+
18
+ @router.get("/strategies", response_model=StrategiesResponse)
19
+ async def strategies(
20
+ namespace: str | None = None,
21
+ lesson_types: list[str] | None = Query(default=None),
22
+ max_strategies: int = Query(default=5, ge=1, le=50),
23
+ tenant: TenantContext = Depends(get_tenant),
24
+ ) -> StrategiesResponse:
25
+ lane = tenant.lane(namespace)
26
+ raw = await tenant.memory.surface_strategies(
27
+ lane=lane, lesson_types=lesson_types, max_strategies=max_strategies
28
+ )
29
+ items = raw.get("strategies") if isinstance(raw, Mapping) else None
30
+ parsed = [Strategy.from_emergent(s) for s in (items or []) if isinstance(s, Mapping)]
31
+ return StrategiesResponse(
32
+ namespace=namespace, lane=lane, strategies=parsed, count=len(parsed)
33
+ )
@@ -0,0 +1 @@
1
+ """Model catalog: cost (live) + capability priors (static fallback)."""