minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
minima/__init__.py
ADDED
minima/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""HTTP API layer."""
|
minima/api/auth.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Auth dependency: resolve the caller's Mubit API key to a TenantContext.
|
|
2
|
+
|
|
3
|
+
Pass-through mode: the caller presents their own Mubit key as
|
|
4
|
+
``Authorization: Bearer <mubit_api_key>``. Minima uses it directly against
|
|
5
|
+
MUBIT_ENDPOINT; no Minima-issued keys, no provisioning step.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from fastapi import Request
|
|
11
|
+
|
|
12
|
+
from minima.api.errors import ApiError
|
|
13
|
+
from minima.tenancy.context import TenantContext
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def bearer_key(request: Request) -> str | None:
|
|
17
|
+
auth = request.headers.get("authorization", "")
|
|
18
|
+
if auth.lower().startswith("bearer "):
|
|
19
|
+
return auth[7:].strip() or None
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def get_tenant(request: Request) -> TenantContext:
|
|
24
|
+
key = bearer_key(request)
|
|
25
|
+
if not key:
|
|
26
|
+
raise ApiError(
|
|
27
|
+
401,
|
|
28
|
+
"Unauthorized",
|
|
29
|
+
"pass your Mubit API key as: Authorization: Bearer <key>",
|
|
30
|
+
)
|
|
31
|
+
return request.app.state.passthrough_runtime.resolve(key)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def get_tenant_optional(request: Request) -> TenantContext | None:
|
|
35
|
+
"""Like get_tenant but returns None instead of 401 (for health probes)."""
|
|
36
|
+
key = bearer_key(request)
|
|
37
|
+
if not key:
|
|
38
|
+
return None
|
|
39
|
+
return request.app.state.passthrough_runtime.resolve(key)
|
minima/api/errors.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""RFC7807-style problem+json error handlers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fastapi import FastAPI, Request
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
|
|
8
|
+
from minima.recommender.engine import NoCandidatesError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ApiError(Exception):
|
|
12
|
+
"""A problem+json error with an explicit status/title (e.g. auth failures)."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, status: int, title: str, detail: str):
|
|
15
|
+
self.status = status
|
|
16
|
+
self.title = title
|
|
17
|
+
self.detail = detail
|
|
18
|
+
super().__init__(detail)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _problem(status: int, title: str, detail: str) -> JSONResponse:
|
|
22
|
+
return JSONResponse(
|
|
23
|
+
status_code=status,
|
|
24
|
+
content={"type": "about:blank", "title": title, "status": status, "detail": detail},
|
|
25
|
+
media_type="application/problem+json",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def register_error_handlers(app: FastAPI) -> None:
|
|
30
|
+
@app.exception_handler(ApiError)
|
|
31
|
+
async def _api_error(_request: Request, exc: ApiError) -> JSONResponse:
|
|
32
|
+
return _problem(exc.status, exc.title, exc.detail)
|
|
33
|
+
|
|
34
|
+
@app.exception_handler(NoCandidatesError)
|
|
35
|
+
async def _no_candidates(_request: Request, exc: NoCandidatesError) -> JSONResponse:
|
|
36
|
+
return _problem(422, "No candidate models", str(exc))
|
|
37
|
+
|
|
38
|
+
@app.exception_handler(ValueError)
|
|
39
|
+
async def _value_error(_request: Request, exc: ValueError) -> JSONResponse:
|
|
40
|
+
return _problem(400, "Invalid request", str(exc))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API route modules."""
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Calibration endpoint — is predicted_success telling the truth for this org?"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, Depends, Query
|
|
8
|
+
|
|
9
|
+
from minima.api.auth import get_tenant
|
|
10
|
+
from minima.config import Settings
|
|
11
|
+
from minima.deps import get_settings
|
|
12
|
+
from minima.metrics.calibration import calibration_by_task_type, cusum_flags, routing_health
|
|
13
|
+
from minima.schemas.savings import CalibrationResponse
|
|
14
|
+
from minima.tenancy.context import TenantContext
|
|
15
|
+
|
|
16
|
+
router = APIRouter(prefix="/v1", tags=["calibration"])
|
|
17
|
+
|
|
18
|
+
_SECONDS_PER_DAY = 86_400.0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.get("/calibration", response_model=CalibrationResponse)
|
|
22
|
+
async def calibration(
|
|
23
|
+
tenant: TenantContext = Depends(get_tenant),
|
|
24
|
+
settings: Settings = Depends(get_settings),
|
|
25
|
+
namespace: str | None = Query(None, description="restrict to one namespace lane"),
|
|
26
|
+
days: float | None = Query(None, gt=0, le=365, description="lookback window in days"),
|
|
27
|
+
) -> CalibrationResponse:
|
|
28
|
+
window_days = days if days is not None else float(settings.minima_calibration_window_days)
|
|
29
|
+
since = time.time() - window_days * _SECONDS_PER_DAY
|
|
30
|
+
lane = f"{tenant.lane_prefix}:{namespace}" if namespace else None
|
|
31
|
+
rows = (
|
|
32
|
+
tenant.decision_log.rows(since=since, lane=lane)
|
|
33
|
+
if tenant.decision_log is not None
|
|
34
|
+
else []
|
|
35
|
+
)
|
|
36
|
+
return CalibrationResponse(
|
|
37
|
+
org_id=tenant.org_id,
|
|
38
|
+
since=since,
|
|
39
|
+
days=window_days,
|
|
40
|
+
namespace=namespace,
|
|
41
|
+
health=routing_health(rows),
|
|
42
|
+
reports=calibration_by_task_type(
|
|
43
|
+
rows,
|
|
44
|
+
n_bins=settings.minima_calibration_bins,
|
|
45
|
+
shrinkage_k=settings.minima_calibration_shrinkage_k,
|
|
46
|
+
),
|
|
47
|
+
drift_flags=cusum_flags(
|
|
48
|
+
rows, k=settings.minima_cusum_k, h=settings.minima_cusum_h
|
|
49
|
+
),
|
|
50
|
+
)
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Feedback endpoint — writes the outcome to Mubit and closes the learning loop."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Depends
|
|
9
|
+
|
|
10
|
+
from minima.api.auth import get_tenant
|
|
11
|
+
from minima.config import Settings
|
|
12
|
+
from minima.deps import get_settings
|
|
13
|
+
from minima.logging import get_logger
|
|
14
|
+
from minima.memory.adapter import Memory
|
|
15
|
+
from minima.memory.keys import (
|
|
16
|
+
build_lesson_content,
|
|
17
|
+
lesson_upsert_key,
|
|
18
|
+
outcome_idempotency_key,
|
|
19
|
+
outcome_upsert_key,
|
|
20
|
+
)
|
|
21
|
+
from minima.memory.records import (
|
|
22
|
+
OutcomeRecord,
|
|
23
|
+
quality_from_outcome,
|
|
24
|
+
reconcile_quality,
|
|
25
|
+
signal_from_outcome,
|
|
26
|
+
)
|
|
27
|
+
from minima.recommender.decisionlog import DecisionRecord, Reconciliation
|
|
28
|
+
from minima.schemas.common import OutcomeLabel
|
|
29
|
+
from minima.schemas.feedback import FeedbackRequest, FeedbackResponse
|
|
30
|
+
from minima.tenancy.context import TenantContext
|
|
31
|
+
|
|
32
|
+
log = get_logger("minima.feedback")
|
|
33
|
+
router = APIRouter(prefix="/v1", tags=["feedback"])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _fire_reflect(memory: Memory, lane: str, user_id: str | None) -> None:
|
|
37
|
+
async def _run() -> None:
|
|
38
|
+
try:
|
|
39
|
+
await memory.reflect(lane=lane, user_id=user_id)
|
|
40
|
+
except Exception as exc: # noqa: BLE001
|
|
41
|
+
log.warning("reflect_failed", lane=lane, error=str(exc))
|
|
42
|
+
|
|
43
|
+
asyncio.create_task(_run()) # noqa: RUF006 — fire-and-forget, errors are logged
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@router.post("/feedback", response_model=FeedbackResponse)
|
|
47
|
+
async def feedback(
|
|
48
|
+
req: FeedbackRequest,
|
|
49
|
+
tenant: TenantContext = Depends(get_tenant),
|
|
50
|
+
settings: Settings = Depends(get_settings),
|
|
51
|
+
) -> FeedbackResponse:
|
|
52
|
+
memory = tenant.memory
|
|
53
|
+
# Org-scoped store: a recommendation_id minted for another org resolves to None here,
|
|
54
|
+
# so org A cannot credit or poison org B's recommendation.
|
|
55
|
+
stored = tenant.recstore.get(req.recommendation_id)
|
|
56
|
+
if stored is None:
|
|
57
|
+
# Degraded late-feedback path: the recstore TTL expired but the decision log
|
|
58
|
+
# (longer retention) still knows the recommendation. The outcome record is still
|
|
59
|
+
# written (the durable (cluster, model) upsert keeps learning); only neighbor
|
|
60
|
+
# attribution and lesson promotion are skipped — the recalled-neighbor ids lived
|
|
61
|
+
# in the recstore alone.
|
|
62
|
+
if settings.minima_late_feedback_enabled and tenant.decision_log is not None:
|
|
63
|
+
decision = tenant.decision_log.get(req.recommendation_id)
|
|
64
|
+
if decision is not None:
|
|
65
|
+
return await _late_feedback(req, tenant, decision)
|
|
66
|
+
return FeedbackResponse(accepted=False, warnings=["unknown_recommendation"])
|
|
67
|
+
|
|
68
|
+
quality = quality_from_outcome(req.outcome.value, req.quality_score)
|
|
69
|
+
quality, mismatch = reconcile_quality(req.outcome.value, quality)
|
|
70
|
+
signal = signal_from_outcome(req.outcome.value, quality)
|
|
71
|
+
is_success = req.outcome == OutcomeLabel.success
|
|
72
|
+
warnings: list[str] = []
|
|
73
|
+
if mismatch:
|
|
74
|
+
warnings.append(mismatch)
|
|
75
|
+
log.warning(
|
|
76
|
+
"quality_outcome_mismatch",
|
|
77
|
+
outcome=req.outcome.value,
|
|
78
|
+
supplied_quality=req.quality_score,
|
|
79
|
+
clamped_quality=quality,
|
|
80
|
+
model_id=req.chosen_model_id,
|
|
81
|
+
cluster=stored.task_cluster,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
record = OutcomeRecord(
|
|
85
|
+
model_id=req.chosen_model_id,
|
|
86
|
+
task_type=stored.task_type,
|
|
87
|
+
difficulty=stored.difficulty,
|
|
88
|
+
task_fingerprint=stored.task_fingerprint,
|
|
89
|
+
task_cluster=stored.task_cluster,
|
|
90
|
+
input_tokens=req.input_tokens or 0,
|
|
91
|
+
output_tokens=req.output_tokens or 0,
|
|
92
|
+
cost_usd=req.actual_cost_usd or 0.0,
|
|
93
|
+
latency_ms=req.latency_ms,
|
|
94
|
+
iterations=req.iterations,
|
|
95
|
+
quality_score=quality,
|
|
96
|
+
outcome=req.outcome.value,
|
|
97
|
+
recommendation_id=req.recommendation_id,
|
|
98
|
+
verified_in_production=req.verified_in_production,
|
|
99
|
+
recorded_at=time.time(),
|
|
100
|
+
)
|
|
101
|
+
upsert_key = outcome_upsert_key(stored.task_cluster, req.chosen_model_id)
|
|
102
|
+
idem = req.idempotency_key or outcome_idempotency_key(
|
|
103
|
+
req.recommendation_id, req.chosen_model_id
|
|
104
|
+
)
|
|
105
|
+
importance = "high" if (req.verified_in_production and is_success) else "medium"
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
record_id = await memory.remember_outcome(
|
|
109
|
+
content=stored.content,
|
|
110
|
+
record=record,
|
|
111
|
+
lane=stored.lane,
|
|
112
|
+
upsert_key=upsert_key,
|
|
113
|
+
idempotency_key=idem,
|
|
114
|
+
user_id=stored.user_id,
|
|
115
|
+
env_tags=stored.env_tags or None,
|
|
116
|
+
importance=importance,
|
|
117
|
+
source="human",
|
|
118
|
+
)
|
|
119
|
+
except Exception as exc: # noqa: BLE001
|
|
120
|
+
log.warning("remember_outcome_failed", error=str(exc))
|
|
121
|
+
return FeedbackResponse(accepted=False, warnings=["memory_write_failed"])
|
|
122
|
+
|
|
123
|
+
# The upserted (cluster, model) record's id is stable across feedbacks and
|
|
124
|
+
# dereferenceable — remember it for the exact-match recall fast path.
|
|
125
|
+
if record_id and tenant.durable_refs is not None:
|
|
126
|
+
try:
|
|
127
|
+
tenant.durable_refs.upsert(
|
|
128
|
+
stored.lane, stored.task_cluster, req.chosen_model_id, record_id, record_id
|
|
129
|
+
)
|
|
130
|
+
except Exception as exc: # noqa: BLE001 — bookkeeping must never fail feedback
|
|
131
|
+
log.warning("durable_ref_upsert_failed", error=str(exc))
|
|
132
|
+
|
|
133
|
+
neighbors = stored.neighbors_by_model.get(req.chosen_model_id, [])
|
|
134
|
+
entry_ids = [eid for (eid, _ref) in neighbors if eid]
|
|
135
|
+
primary_ref = next((ref for (_eid, ref) in neighbors if ref), None) or record_id
|
|
136
|
+
|
|
137
|
+
updated_confidence: float | None = None
|
|
138
|
+
if primary_ref:
|
|
139
|
+
try:
|
|
140
|
+
oc = await memory.record_outcome(
|
|
141
|
+
lane=stored.lane,
|
|
142
|
+
reference_id=primary_ref,
|
|
143
|
+
outcome=req.outcome.value,
|
|
144
|
+
signal=signal,
|
|
145
|
+
entry_ids=entry_ids or None,
|
|
146
|
+
user_id=stored.user_id,
|
|
147
|
+
verified_in_production=req.verified_in_production,
|
|
148
|
+
idempotency_key=f"oc:{idem}",
|
|
149
|
+
rationale=f"minima feedback {req.recommendation_id}: ran {req.chosen_model_id}",
|
|
150
|
+
)
|
|
151
|
+
value = oc.get("updated_confidence")
|
|
152
|
+
updated_confidence = float(value) if value is not None else None
|
|
153
|
+
except Exception as exc: # noqa: BLE001
|
|
154
|
+
log.warning("record_outcome_failed", error=str(exc))
|
|
155
|
+
warnings.append("reinforcement_failed")
|
|
156
|
+
|
|
157
|
+
# Promote a verified-in-production strong success to a durable Lesson. Lessons pass
|
|
158
|
+
# the server's validation gate and feed reflect()/surface_strategies rule promotion;
|
|
159
|
+
# a per-(cluster, model) upsert_key keeps one accumulating lesson instead of flooding.
|
|
160
|
+
lesson_promoted = False
|
|
161
|
+
if (
|
|
162
|
+
settings.minima_lesson_on_verified_prod
|
|
163
|
+
and req.verified_in_production
|
|
164
|
+
and is_success
|
|
165
|
+
and quality >= settings.minima_lesson_min_quality
|
|
166
|
+
):
|
|
167
|
+
try:
|
|
168
|
+
await memory.remember_lesson(
|
|
169
|
+
content=build_lesson_content(stored.task_cluster, req.chosen_model_id, quality),
|
|
170
|
+
lane=stored.lane,
|
|
171
|
+
upsert_key=lesson_upsert_key(stored.task_cluster, req.chosen_model_id),
|
|
172
|
+
user_id=stored.user_id,
|
|
173
|
+
env_tags=stored.env_tags or None,
|
|
174
|
+
metadata={
|
|
175
|
+
"kind": "lesson",
|
|
176
|
+
"task_cluster": stored.task_cluster,
|
|
177
|
+
"model_id": req.chosen_model_id,
|
|
178
|
+
"verified_in_production": True,
|
|
179
|
+
},
|
|
180
|
+
idempotency_key=f"lsn:{idem}",
|
|
181
|
+
)
|
|
182
|
+
lesson_promoted = True
|
|
183
|
+
except Exception as exc: # noqa: BLE001 — lesson promotion is best-effort
|
|
184
|
+
log.warning("lesson_promotion_failed", error=str(exc))
|
|
185
|
+
warnings.append("lesson_promotion_failed")
|
|
186
|
+
|
|
187
|
+
reflection_triggered = False
|
|
188
|
+
count = tenant.lane_counter.bump(tenant.counter_key(stored.lane))
|
|
189
|
+
every = settings.minima_reflect_every_n
|
|
190
|
+
if (every > 0 and count % every == 0) or (req.verified_in_production and not is_success):
|
|
191
|
+
_fire_reflect(memory, stored.lane, stored.user_id)
|
|
192
|
+
reflection_triggered = True
|
|
193
|
+
|
|
194
|
+
_reconcile_decision(tenant, req, quality, late=False)
|
|
195
|
+
|
|
196
|
+
return FeedbackResponse(
|
|
197
|
+
accepted=True,
|
|
198
|
+
record_id=record_id,
|
|
199
|
+
reinforced_entry_ids=entry_ids,
|
|
200
|
+
updated_confidence=updated_confidence,
|
|
201
|
+
reflection_triggered=reflection_triggered,
|
|
202
|
+
lesson_promoted=lesson_promoted,
|
|
203
|
+
warnings=warnings,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _reconcile_decision(
|
|
208
|
+
tenant: TenantContext, req: FeedbackRequest, quality: float, *, late: bool
|
|
209
|
+
) -> None:
|
|
210
|
+
"""Fill the decision-log row's realized columns (best-effort analytics)."""
|
|
211
|
+
if tenant.decision_log is None:
|
|
212
|
+
return
|
|
213
|
+
try:
|
|
214
|
+
tenant.decision_log.reconcile(
|
|
215
|
+
req.recommendation_id,
|
|
216
|
+
Reconciliation(
|
|
217
|
+
model_id=req.chosen_model_id,
|
|
218
|
+
outcome=req.outcome.value,
|
|
219
|
+
quality=quality,
|
|
220
|
+
cost_usd=req.actual_cost_usd,
|
|
221
|
+
latency_ms=req.latency_ms,
|
|
222
|
+
ts=time.time(),
|
|
223
|
+
late=late,
|
|
224
|
+
),
|
|
225
|
+
)
|
|
226
|
+
except Exception as exc: # noqa: BLE001 — analytics must never fail feedback
|
|
227
|
+
log.warning("decision_reconcile_failed", error=str(exc))
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
async def _late_feedback(
|
|
231
|
+
req: FeedbackRequest,
|
|
232
|
+
tenant: TenantContext,
|
|
233
|
+
decision: DecisionRecord,
|
|
234
|
+
) -> FeedbackResponse:
|
|
235
|
+
"""Accept feedback after recstore expiry: write the outcome, skip attribution."""
|
|
236
|
+
quality = quality_from_outcome(req.outcome.value, req.quality_score)
|
|
237
|
+
quality, mismatch = reconcile_quality(req.outcome.value, quality)
|
|
238
|
+
warnings = ["late_feedback_no_attribution"]
|
|
239
|
+
if mismatch:
|
|
240
|
+
warnings.append(mismatch)
|
|
241
|
+
|
|
242
|
+
record = OutcomeRecord(
|
|
243
|
+
model_id=req.chosen_model_id,
|
|
244
|
+
task_type=decision.task_type,
|
|
245
|
+
difficulty=decision.difficulty,
|
|
246
|
+
task_fingerprint=decision.fingerprint,
|
|
247
|
+
task_cluster=decision.cluster,
|
|
248
|
+
input_tokens=req.input_tokens or 0,
|
|
249
|
+
output_tokens=req.output_tokens or 0,
|
|
250
|
+
cost_usd=req.actual_cost_usd or 0.0,
|
|
251
|
+
latency_ms=req.latency_ms,
|
|
252
|
+
iterations=req.iterations,
|
|
253
|
+
quality_score=quality,
|
|
254
|
+
outcome=req.outcome.value,
|
|
255
|
+
recommendation_id=req.recommendation_id,
|
|
256
|
+
verified_in_production=req.verified_in_production,
|
|
257
|
+
recorded_at=time.time(),
|
|
258
|
+
)
|
|
259
|
+
idem = req.idempotency_key or outcome_idempotency_key(
|
|
260
|
+
req.recommendation_id, req.chosen_model_id
|
|
261
|
+
)
|
|
262
|
+
try:
|
|
263
|
+
record_id = await tenant.memory.remember_outcome(
|
|
264
|
+
content=decision.content,
|
|
265
|
+
record=record,
|
|
266
|
+
lane=decision.lane,
|
|
267
|
+
upsert_key=outcome_upsert_key(decision.cluster, req.chosen_model_id),
|
|
268
|
+
idempotency_key=idem,
|
|
269
|
+
user_id=decision.user_id,
|
|
270
|
+
env_tags=decision.env_tags or None,
|
|
271
|
+
importance="medium",
|
|
272
|
+
source="human",
|
|
273
|
+
)
|
|
274
|
+
except Exception as exc: # noqa: BLE001
|
|
275
|
+
log.warning("late_remember_outcome_failed", error=str(exc))
|
|
276
|
+
return FeedbackResponse(accepted=False, warnings=["memory_write_failed", *warnings])
|
|
277
|
+
|
|
278
|
+
_reconcile_decision(tenant, req, quality, late=True)
|
|
279
|
+
return FeedbackResponse(accepted=True, record_id=record_id, warnings=warnings)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Health endpoint — always 200; reports degraded state in the body."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, Depends
|
|
8
|
+
|
|
9
|
+
from minima.api.auth import get_tenant_optional
|
|
10
|
+
from minima.catalog.store import CatalogStore
|
|
11
|
+
from minima.config import Settings
|
|
12
|
+
from minima.deps import get_catalog_store, get_settings
|
|
13
|
+
from minima.tenancy.context import TenantContext
|
|
14
|
+
from minima.version import __version__
|
|
15
|
+
|
|
16
|
+
router = APIRouter(prefix="/v1", tags=["health"])
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@router.get("/health")
|
|
20
|
+
async def health(
|
|
21
|
+
tenant: TenantContext | None = Depends(get_tenant_optional),
|
|
22
|
+
catalog_store: CatalogStore = Depends(get_catalog_store),
|
|
23
|
+
settings: Settings = Depends(get_settings),
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
catalog = catalog_store.get()
|
|
26
|
+
# In multi-tenant mode an unauthenticated probe still gets service liveness; the
|
|
27
|
+
# Mubit block is reported only when a valid Minima key resolves an org's instance.
|
|
28
|
+
if tenant is None:
|
|
29
|
+
mubit: dict[str, Any] = {"reachable": None, "scope": "unauthenticated"}
|
|
30
|
+
else:
|
|
31
|
+
mubit = await tenant.memory.health()
|
|
32
|
+
mubit["endpoint"] = tenant.mubit_endpoint
|
|
33
|
+
mubit["org_id"] = tenant.org_id
|
|
34
|
+
reachable = mubit.get("reachable")
|
|
35
|
+
return {
|
|
36
|
+
"status": "ok" if reachable or reachable is None else "degraded",
|
|
37
|
+
"mubit": mubit,
|
|
38
|
+
"auth": "passthrough",
|
|
39
|
+
"catalog": {
|
|
40
|
+
"version": catalog.version,
|
|
41
|
+
"cost_source": catalog.cost_source,
|
|
42
|
+
"stale": catalog.stale,
|
|
43
|
+
"models": len(catalog.cards),
|
|
44
|
+
},
|
|
45
|
+
"reasoner": {
|
|
46
|
+
"provider": settings.minima_reasoner_provider,
|
|
47
|
+
"configured": settings.reasoner_enabled,
|
|
48
|
+
},
|
|
49
|
+
"version": __version__,
|
|
50
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Model catalog endpoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends
|
|
6
|
+
|
|
7
|
+
from minima.catalog.store import CatalogStore
|
|
8
|
+
from minima.deps import get_catalog_store
|
|
9
|
+
from minima.schemas.common import TaskType
|
|
10
|
+
from minima.schemas.models_catalog import ModelsResponse
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/v1", tags=["models"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@router.get("/models", response_model=ModelsResponse)
|
|
16
|
+
async def list_models(
|
|
17
|
+
provider: str | None = None,
|
|
18
|
+
task_type: TaskType | None = None,
|
|
19
|
+
max_cost: float | None = None,
|
|
20
|
+
include_stale: bool = True,
|
|
21
|
+
catalog_store: CatalogStore = Depends(get_catalog_store),
|
|
22
|
+
) -> ModelsResponse:
|
|
23
|
+
catalog = catalog_store.get()
|
|
24
|
+
cards = list(catalog.cards)
|
|
25
|
+
|
|
26
|
+
if provider:
|
|
27
|
+
cards = [c for c in cards if c.provider.lower() == provider.lower()]
|
|
28
|
+
if task_type is not None:
|
|
29
|
+
cards = [c for c in cards if task_type in c.capability_by_task_type]
|
|
30
|
+
if max_cost is not None:
|
|
31
|
+
cards = [c for c in cards if max(c.input_cost_per_mtok, c.output_cost_per_mtok) <= max_cost]
|
|
32
|
+
if not include_stale:
|
|
33
|
+
fresh = [c for c in cards if not c.cost_stale]
|
|
34
|
+
cards = fresh or cards # never return empty solely due to staleness
|
|
35
|
+
|
|
36
|
+
cards.sort(key=lambda c: c.input_cost_per_mtok)
|
|
37
|
+
return ModelsResponse(
|
|
38
|
+
models=cards,
|
|
39
|
+
catalog_version=catalog.version,
|
|
40
|
+
refreshed_at=catalog.refreshed_at,
|
|
41
|
+
stale=catalog.stale,
|
|
42
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Recommendation endpoints (per-call and per-workflow-step)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, Depends
|
|
8
|
+
|
|
9
|
+
from minima.api.auth import get_tenant
|
|
10
|
+
from minima.schemas.recommend import RecommendRequest, RecommendResponse
|
|
11
|
+
from minima.schemas.workflow import (
|
|
12
|
+
StepRecommendation,
|
|
13
|
+
WorkflowRequest,
|
|
14
|
+
WorkflowResponse,
|
|
15
|
+
)
|
|
16
|
+
from minima.tenancy.context import TenantContext
|
|
17
|
+
|
|
18
|
+
router = APIRouter(prefix="/v1", tags=["recommend"])
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.post("/recommend", response_model=RecommendResponse)
|
|
22
|
+
async def recommend(
|
|
23
|
+
req: RecommendRequest, tenant: TenantContext = Depends(get_tenant)
|
|
24
|
+
) -> RecommendResponse:
|
|
25
|
+
return await tenant.recommender.recommend(req)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.post("/recommend/workflow", response_model=WorkflowResponse)
|
|
29
|
+
async def recommend_workflow(
|
|
30
|
+
req: WorkflowRequest, tenant: TenantContext = Depends(get_tenant)
|
|
31
|
+
) -> WorkflowResponse:
|
|
32
|
+
rec = tenant.recommender
|
|
33
|
+
steps: list[StepRecommendation] = []
|
|
34
|
+
total = 0.0
|
|
35
|
+
premium = 0.0
|
|
36
|
+
confidences: list[float] = []
|
|
37
|
+
|
|
38
|
+
for step in req.steps:
|
|
39
|
+
constraints = (
|
|
40
|
+
step.constraints.merged_over(req.constraints) if step.constraints else req.constraints
|
|
41
|
+
)
|
|
42
|
+
sub = RecommendRequest(
|
|
43
|
+
task=step.task,
|
|
44
|
+
cost_quality_tradeoff=req.cost_quality_tradeoff,
|
|
45
|
+
constraints=constraints,
|
|
46
|
+
user_id=req.user_id,
|
|
47
|
+
namespace=req.namespace,
|
|
48
|
+
allow_llm_escalation=req.allow_llm_escalation,
|
|
49
|
+
)
|
|
50
|
+
resp = await rec.recommend(sub)
|
|
51
|
+
steps.append(StepRecommendation(step_id=step.step_id, recommendation=resp))
|
|
52
|
+
total += resp.recommended_model.est_cost_usd
|
|
53
|
+
premium += max(
|
|
54
|
+
(m.est_cost_usd for m in resp.ranked),
|
|
55
|
+
default=resp.recommended_model.est_cost_usd,
|
|
56
|
+
)
|
|
57
|
+
confidences.append(resp.confidence)
|
|
58
|
+
|
|
59
|
+
confidence = sum(confidences) / len(confidences) if confidences else 0.0
|
|
60
|
+
return WorkflowResponse(
|
|
61
|
+
workflow_recommendation_id=uuid.uuid4().hex,
|
|
62
|
+
steps=steps,
|
|
63
|
+
total_est_cost_usd=round(total, 8),
|
|
64
|
+
total_est_cost_if_all_premium=round(premium, 8),
|
|
65
|
+
confidence=round(confidence, 4),
|
|
66
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Savings endpoint — counterfactual cost accounting from the decision log.
|
|
2
|
+
|
|
3
|
+
Tenant-scoped: a caller sees only their own org's decisions (the decision log handed
|
|
4
|
+
to this router is already org-bound by the pass-through runtime).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, Depends, Query
|
|
12
|
+
|
|
13
|
+
from minima.api.auth import get_tenant
|
|
14
|
+
from minima.metrics.calibration import routing_health
|
|
15
|
+
from minima.metrics.savings import group_rows, summarize
|
|
16
|
+
from minima.schemas.savings import SavingsGroup, SavingsResponse
|
|
17
|
+
from minima.tenancy.context import TenantContext
|
|
18
|
+
|
|
19
|
+
router = APIRouter(prefix="/v1", tags=["savings"])
|
|
20
|
+
|
|
21
|
+
_SECONDS_PER_DAY = 86_400.0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@router.get("/savings", response_model=SavingsResponse)
|
|
25
|
+
async def savings(
|
|
26
|
+
tenant: TenantContext = Depends(get_tenant),
|
|
27
|
+
namespace: str | None = Query(None, description="restrict to one namespace lane"),
|
|
28
|
+
days: float = Query(30.0, gt=0, le=365, description="lookback window in days"),
|
|
29
|
+
group_by: str | None = Query(
|
|
30
|
+
None, pattern="^(cluster|task_type|lane)$", description="optional breakdown"
|
|
31
|
+
),
|
|
32
|
+
) -> SavingsResponse:
|
|
33
|
+
since = time.time() - days * _SECONDS_PER_DAY
|
|
34
|
+
lane = f"{tenant.lane_prefix}:{namespace}" if namespace else None
|
|
35
|
+
rows = (
|
|
36
|
+
tenant.decision_log.rows(since=since, lane=lane)
|
|
37
|
+
if tenant.decision_log is not None
|
|
38
|
+
else []
|
|
39
|
+
)
|
|
40
|
+
summary = summarize(rows)
|
|
41
|
+
health = routing_health(rows)
|
|
42
|
+
groups = [
|
|
43
|
+
SavingsGroup(key=key, summary=summarize(group), health=routing_health(group))
|
|
44
|
+
for key, group in sorted(group_rows(rows, group_by).items())
|
|
45
|
+
]
|
|
46
|
+
return SavingsResponse(
|
|
47
|
+
org_id=tenant.org_id,
|
|
48
|
+
since=since,
|
|
49
|
+
days=days,
|
|
50
|
+
namespace=namespace,
|
|
51
|
+
summary=summary,
|
|
52
|
+
health=health,
|
|
53
|
+
group_by=group_by,
|
|
54
|
+
groups=groups,
|
|
55
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Strategy-exposure endpoint — surfaces the rules Mubit has promoted for a namespace."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, Depends, Query
|
|
8
|
+
|
|
9
|
+
from minima.api.auth import get_tenant
|
|
10
|
+
from minima.logging import get_logger
|
|
11
|
+
from minima.schemas.strategies import StrategiesResponse, Strategy
|
|
12
|
+
from minima.tenancy.context import TenantContext
|
|
13
|
+
|
|
14
|
+
log = get_logger("minima.strategies")
|
|
15
|
+
router = APIRouter(prefix="/v1", tags=["strategies"])
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@router.get("/strategies", response_model=StrategiesResponse)
|
|
19
|
+
async def strategies(
|
|
20
|
+
namespace: str | None = None,
|
|
21
|
+
lesson_types: list[str] | None = Query(default=None),
|
|
22
|
+
max_strategies: int = Query(default=5, ge=1, le=50),
|
|
23
|
+
tenant: TenantContext = Depends(get_tenant),
|
|
24
|
+
) -> StrategiesResponse:
|
|
25
|
+
lane = tenant.lane(namespace)
|
|
26
|
+
raw = await tenant.memory.surface_strategies(
|
|
27
|
+
lane=lane, lesson_types=lesson_types, max_strategies=max_strategies
|
|
28
|
+
)
|
|
29
|
+
items = raw.get("strategies") if isinstance(raw, Mapping) else None
|
|
30
|
+
parsed = [Strategy.from_emergent(s) for s in (items or []) if isinstance(s, Mapping)]
|
|
31
|
+
return StrategiesResponse(
|
|
32
|
+
namespace=namespace, lane=lane, strategies=parsed, count=len(parsed)
|
|
33
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Model catalog: cost (live) + capability priors (static fallback)."""
|