minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
minima/config.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Environment-driven configuration.
|
|
2
|
+
|
|
3
|
+
Every setting is read from an environment variable with the same (case-insensitive)
|
|
4
|
+
name, optionally from a local ``.env`` file. The only required value is ``MUBIT_API_KEY``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
|
|
11
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Settings(BaseSettings):
|
|
15
|
+
model_config = SettingsConfigDict(
|
|
16
|
+
env_file=".env",
|
|
17
|
+
env_file_encoding="utf-8",
|
|
18
|
+
extra="ignore",
|
|
19
|
+
case_sensitive=False,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# --- Mubit memory backend ---
|
|
23
|
+
mubit_endpoint: str = "http://127.0.0.1:3000"
|
|
24
|
+
mubit_api_key: str | None = None
|
|
25
|
+
mubit_transport: str = "auto" # auto | grpc | http
|
|
26
|
+
mubit_timeout_ms: int = 30_000
|
|
27
|
+
|
|
28
|
+
# --- Memory read path ---
|
|
29
|
+
minima_memory_recall_timeout_ms: int = 2500
|
|
30
|
+
minima_memory_recall_limit: int = 25
|
|
31
|
+
# direct_bypass is faster but requires enable_direct_search=true on the Mubit instance
|
|
32
|
+
# (off by default on hosted api.mubit.ai). agent_routed works on all instance types.
|
|
33
|
+
minima_recall_mode: str = "agent_routed" # agent_routed | direct_bypass
|
|
34
|
+
minima_lane_prefix: str = "minima"
|
|
35
|
+
minima_seed_lane: str = "minima:default"
|
|
36
|
+
# LTM entry-type filter on recall. Minima evidence lives under exactly two types
|
|
37
|
+
# (seeds ingest as "fact", feedback as "observation"); filtering at the server keeps
|
|
38
|
+
# traces/lessons/etc. out of the candidate pool. Empty string = no filter (legacy).
|
|
39
|
+
minima_recall_entry_types: str = "fact,observation"
|
|
40
|
+
# Server-side ranking strategy: "relevance" | "freshness" | "balanced" | "" (omit).
|
|
41
|
+
# "balanced" lets recency influence WHICH neighbors are retrieved; how much each
|
|
42
|
+
# neighbor then counts is the client-side age decay (see evidence half-life below) —
|
|
43
|
+
# "freshness" on top of that decay would double-discount old evidence.
|
|
44
|
+
minima_recall_rank_by: str = "balanced"
|
|
45
|
+
# Hard recency window: only recall evidence from the last N days (0 = no window).
|
|
46
|
+
minima_recall_max_age_days: int = 0
|
|
47
|
+
# Mubit search budget tier: "low" | "mid" | "high" ("" = server default).
|
|
48
|
+
minima_recall_budget: str = "mid"
|
|
49
|
+
# Request per-evidence score breakdowns (ExplainInfo) and log them. Diagnostic;
|
|
50
|
+
# adds payload weight, keep off in prod unless investigating recall quality.
|
|
51
|
+
minima_recall_explain: bool = False
|
|
52
|
+
|
|
53
|
+
# --- Recommender tuning ---
|
|
54
|
+
minima_tau_min: float = 0.55
|
|
55
|
+
minima_tau_max: float = 0.92
|
|
56
|
+
minima_beta_pseudocount: float = 2.5
|
|
57
|
+
minima_escalation_w_min: float = 1.5
|
|
58
|
+
minima_escalation_n_min: int = 3
|
|
59
|
+
minima_escalation_c_min: float = 0.45
|
|
60
|
+
minima_escalation_tie_delta: float = 0.05
|
|
61
|
+
# Escalation trigger mode. "legacy" = the four independent heuristics. "uncertainty"
|
|
62
|
+
# replaces thin_evidence + low_confidence with a single posterior-interval-width gate
|
|
63
|
+
# on the recommended candidate (conflict stays as a hard override; tie is kept — it
|
|
64
|
+
# captures rank instability the interval doesn't). Shadow "uncertainty" before
|
|
65
|
+
# switching the default.
|
|
66
|
+
minima_escalation_mode: str = "legacy" # legacy | uncertainty
|
|
67
|
+
minima_escalation_interval_width: float = 0.25
|
|
68
|
+
# "near_threshold" trigger: escalate when the recommended model's predicted success is
|
|
69
|
+
# within this margin above tau — a fragile pick that one more failure round would drop.
|
|
70
|
+
# 0.0 = disabled. Recommended starting value: 0.10.
|
|
71
|
+
minima_escalation_near_threshold_delta: float = 0.10
|
|
72
|
+
minima_default_input_tokens: int = 1500
|
|
73
|
+
minima_default_output_tokens: int = 500
|
|
74
|
+
minima_reflect_every_n: int = 25
|
|
75
|
+
# Rank eligible models by OBSERVED avg $/call from recalled outcomes (Mubit stores
|
|
76
|
+
# cost_usd per outcome) instead of a flat token estimate. The estimate assumes a fixed
|
|
77
|
+
# completion length and so ignores reasoning/thinking tokens, which can mis-rank a
|
|
78
|
+
# cheap-listed model that is expensive in practice (e.g. a "flash" model that spends
|
|
79
|
+
# heavily on internal reasoning). Falls back to the estimate when fewer than
|
|
80
|
+
# minima_observed_cost_min_n cost observations exist for the candidate.
|
|
81
|
+
minima_use_observed_cost: bool = True
|
|
82
|
+
minima_observed_cost_min_n: int = 3
|
|
83
|
+
# Evidence age decay: each recalled outcome's weight halves every half-life. Replaces
|
|
84
|
+
# the old binary stale 0.5x for records that carry a recorded_at timestamp; supersession
|
|
85
|
+
# (is_stale) still caps the multiplier at 0.5. knowledge_confidence is deliberately NOT
|
|
86
|
+
# touched — its server-side recency component reflects *reinforcement* recency, while
|
|
87
|
+
# this decay reflects *observation* age (distinct signals; multiplying both is intended,
|
|
88
|
+
# adding extra recency factors on top is not).
|
|
89
|
+
minima_evidence_half_life_days: float = 30.0
|
|
90
|
+
minima_evidence_decay_floor: float = 0.1
|
|
91
|
+
# Seed-vs-live weighting: seeded outcomes (source_dataset set) count at this weight,
|
|
92
|
+
# decaying linearly to zero once a model has crowdout_n live outcomes in the recalled
|
|
93
|
+
# set — live evidence replaces the bootstrap instead of competing with it forever.
|
|
94
|
+
minima_seed_weight: float = 0.5
|
|
95
|
+
minima_seed_crowdout_n: int = 5
|
|
96
|
+
# Latency-aware ranking: annotate candidates with a robust observed latency percentile
|
|
97
|
+
# and enforce Constraints.max_latency_ms against it (only for candidates with at least
|
|
98
|
+
# min_n latency observations — a model is never excluded without evidence).
|
|
99
|
+
minima_latency_percentile: float = 0.75
|
|
100
|
+
minima_latency_min_n: int = 3
|
|
101
|
+
# Default-output-token multipliers by classified difficulty, applied when the caller
|
|
102
|
+
# does not supply expected_output_tokens (affects the "estimate" cost basis only).
|
|
103
|
+
minima_difficulty_output_multipliers: dict[str, float] = {
|
|
104
|
+
"trivial": 0.5,
|
|
105
|
+
"easy": 0.75,
|
|
106
|
+
"medium": 1.0,
|
|
107
|
+
"hard": 1.5,
|
|
108
|
+
"expert": 2.0,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
# --- Cheap-LLM reasoner (recommend-only) ---
|
|
112
|
+
minima_reasoner_provider: str = "none" # none | anthropic | gemini
|
|
113
|
+
minima_reasoner_model: str | None = None # default per provider (anthropic -> claude-haiku-4-5)
|
|
114
|
+
# The reasoner is the explicit slow tier (only consulted on escalation): a real
|
|
115
|
+
# ranking call with structured output takes ~6-8s, so a tight budget makes it time
|
|
116
|
+
# out and silently degrade. This is per-attempt; it never touches the caller's own
|
|
117
|
+
# LLM call (Minima adds zero latency there).
|
|
118
|
+
minima_reasoner_timeout_ms: int = 15_000
|
|
119
|
+
# A hard output cap (the reasoner stops early when done). Gemini 3.x "flash" spends
|
|
120
|
+
# output tokens on internal reasoning before emitting the JSON, so a small cap
|
|
121
|
+
# truncates the structured response — keep headroom. Anthropic forced-tool-use is
|
|
122
|
+
# compact and won't approach this.
|
|
123
|
+
minima_reasoner_max_tokens: int = 4096
|
|
124
|
+
minima_reasoner_blend: float = 0.5 # weight on the LLM estimate vs the deterministic one
|
|
125
|
+
# Adaptive blend: weight the LLM estimate by how thin the deterministic evidence is
|
|
126
|
+
# (blend = blend_max * (1 - confidence), clamped to [0.1, 0.9]) instead of the fixed
|
|
127
|
+
# minima_reasoner_blend. Heavy evidence barely moves; cold candidates lean on the LLM.
|
|
128
|
+
minima_reasoner_blend_adaptive: bool = True
|
|
129
|
+
minima_reasoner_blend_max: float = 0.8
|
|
130
|
+
minima_reasoner_classify: bool = True # let the reasoner refine ambiguous task classification
|
|
131
|
+
anthropic_api_key: str | None = None
|
|
132
|
+
gemini_api_key: str | None = None
|
|
133
|
+
|
|
134
|
+
# --- Selection-bias correction (inverse propensity weighting) ---
|
|
135
|
+
minima_ipw_enabled: bool = True
|
|
136
|
+
minima_ipw_clip_low: float = 0.1
|
|
137
|
+
minima_ipw_clip_high: float = 10.0
|
|
138
|
+
|
|
139
|
+
# --- Learning maturity ---
|
|
140
|
+
# Cluster granularity controls the upsert grouping (one durable record per cluster+model).
|
|
141
|
+
# "coarse" = task_type:difficulty; "fine" appends a salient-keyword signature bucket so
|
|
142
|
+
# topically-distinct tasks of the same type/difficulty accumulate separately.
|
|
143
|
+
minima_cluster_granularity: str = "coarse" # coarse | fine
|
|
144
|
+
minima_cluster_signature_tokens: int = 4
|
|
145
|
+
# Promote a verified-in-production strong success to a durable Lesson (feeds reflect()).
|
|
146
|
+
minima_lesson_on_verified_prod: bool = True
|
|
147
|
+
minima_lesson_min_quality: float = 0.8
|
|
148
|
+
# Optimistic exploration bonus added to under-explored candidates' predicted success,
|
|
149
|
+
# scaled by their uncertainty. 0.0 = off (no exploration; pure exploitation).
|
|
150
|
+
minima_exploration_bonus: float = 0.0
|
|
151
|
+
|
|
152
|
+
# --- Catalog ---
|
|
153
|
+
minima_catalog_refresh_seconds: int = 21_600
|
|
154
|
+
minima_catalog_stale_after_seconds: int = 86_400
|
|
155
|
+
minima_litellm_prices_url: str = (
|
|
156
|
+
"https://raw.githubusercontent.com/BerriAI/litellm/main/"
|
|
157
|
+
"model_prices_and_context_window.json"
|
|
158
|
+
)
|
|
159
|
+
minima_openrouter_models_url: str = "https://openrouter.ai/api/v1/models"
|
|
160
|
+
openrouter_api_key: str | None = None
|
|
161
|
+
|
|
162
|
+
# --- Service ---
|
|
163
|
+
minima_host: str = "0.0.0.0"
|
|
164
|
+
minima_port: int = 8080
|
|
165
|
+
minima_log_level: str = "info"
|
|
166
|
+
# memory | sqlite | cloudsql — controls DecisionLog, Propensity, and (unless
|
|
167
|
+
# MINIMA_RECSTORE_BACKEND overrides) RecStore + DurableRefs.
|
|
168
|
+
minima_recommendation_store: str = "memory"
|
|
169
|
+
# 7 days: feedback often arrives well after the recommendation (batch evals, prod
|
|
170
|
+
# verification). Past the TTL the late-feedback degraded path still accepts the
|
|
171
|
+
# outcome (without neighbor attribution) via the decision log.
|
|
172
|
+
minima_recommendation_ttl_seconds: int = 604_800
|
|
173
|
+
minima_sqlite_path: str = "minima_state.db" # durable recstore + propensity backing file
|
|
174
|
+
|
|
175
|
+
# --- Persistent store backends (Cloud SQL + Redis) ---
|
|
176
|
+
# PostgreSQL DSN for DecisionLog, Propensity, and optionally RecStore + DurableRefs.
|
|
177
|
+
# Cloud Run format: postgresql://user:pass@/dbname?host=/cloudsql/PROJECT:REGION:INSTANCE
|
|
178
|
+
minima_database_url: str | None = None
|
|
179
|
+
# Redis URL for RecStore + DurableRefs when MINIMA_RECSTORE_BACKEND=redis.
|
|
180
|
+
minima_redis_url: str = "redis://localhost:6379/0"
|
|
181
|
+
# Backend override for RecStore + DurableRefs only (memory | sqlite | cloudsql | redis).
|
|
182
|
+
# Empty string means inherit from MINIMA_RECOMMENDATION_STORE.
|
|
183
|
+
minima_recstore_backend: str = ""
|
|
184
|
+
# Accept feedback whose recommendation_id has expired from the recstore by falling
|
|
185
|
+
# back to the decision log: the outcome record is still written (the durable
|
|
186
|
+
# (cluster, model) upsert), but neighbor attribution and lesson promotion are skipped.
|
|
187
|
+
minima_late_feedback_enabled: bool = True
|
|
188
|
+
|
|
189
|
+
# --- Decision logging & off-policy evaluation ---
|
|
190
|
+
# Every recommendation is logged (candidate set, propensity vector, tau, baselines)
|
|
191
|
+
# and reconciled with realized outcomes at feedback time. This powers /v1/savings,
|
|
192
|
+
# /v1/calibration, feedback-coverage, and offline policy evaluation.
|
|
193
|
+
minima_decision_log_retention_days: int = 90
|
|
194
|
+
# Orgs (comma-separated) that opt into epsilon-stochastic selection: with probability
|
|
195
|
+
# epsilon the pick is sampled from a softmax over the tau-ELIGIBLE candidates instead
|
|
196
|
+
# of the strict cheapest-eligible. Makes logged propensities non-degenerate so IPW and
|
|
197
|
+
# off-policy evaluation are valid. Default: nobody (deterministic argmin everywhere).
|
|
198
|
+
minima_epsilon_selection_orgs: str = ""
|
|
199
|
+
minima_epsilon: float = 0.03
|
|
200
|
+
minima_epsilon_softmax_temperature: float = 0.1
|
|
201
|
+
# Orgs (comma-separated) that opt into Thompson (posterior-sampling) selection instead of
|
|
202
|
+
# epsilon-softmax: each decision samples theta_m ~ Beta(alpha_m, beta_m) and picks the
|
|
203
|
+
# cheapest model clearing tau under the sample. Monte-Carlo selection frequencies are
|
|
204
|
+
# logged as propensities so IPW/OPE stay valid. Takes precedence over epsilon if both set.
|
|
205
|
+
minima_thompson_selection_orgs: str = ""
|
|
206
|
+
minima_thompson_samples: int = 128
|
|
207
|
+
|
|
208
|
+
# --- Calibration monitoring ---
|
|
209
|
+
minima_calibration_window_days: int = 30
|
|
210
|
+
minima_calibration_shrinkage_k: float = 20.0
|
|
211
|
+
minima_calibration_bins: int = 10
|
|
212
|
+
# CUSUM slack/threshold sized for BINARY residuals: a single failure on a 0.8
|
|
213
|
+
# prediction is a 0.8 residual, so the slack must absorb routine noise (k ~ 0.5
|
|
214
|
+
# sigma ~ 0.25) and the threshold must require a sustained run (h ~ 4-5 sigma).
|
|
215
|
+
# Smaller values flag every healthy stream.
|
|
216
|
+
minima_cusum_k: float = 0.25
|
|
217
|
+
minima_cusum_h: float = 2.0
|
|
218
|
+
|
|
219
|
+
# --- Calibration APPLY (remap predicted_success before the tau decision) ---
|
|
220
|
+
# The monitoring above MEASURES calibration; these control whether a fitted isotonic
|
|
221
|
+
# remap is actually applied so predicted_success is a truthful probability. Safe by
|
|
222
|
+
# construction: with < min_n reconciled outcomes the fit returns identity (no-op), and
|
|
223
|
+
# each slice shrinks toward identity by n/(n+shrinkage_k). Reuses the calibration
|
|
224
|
+
# window + shrinkage_k above. Refit is lazy and cached per Recommender (org).
|
|
225
|
+
minima_calibration_apply: bool = True
|
|
226
|
+
minima_calibration_min_n: int = 30
|
|
227
|
+
minima_calibration_refresh_seconds: int = 600
|
|
228
|
+
|
|
229
|
+
# --- Routing-collapse margin guard ---
|
|
230
|
+
# Scalar-score + cheapest-clearing-tau can collapse to the single most expensive model
|
|
231
|
+
# at high quality bars (arXiv 2602.03478). When the cheapest-eligible pick IS the
|
|
232
|
+
# priciest candidate, prefer a cheaper candidate whose success credible interval could
|
|
233
|
+
# still clear tau. The optimism is TAU-AWARE so it shrinks as the quality bar rises:
|
|
234
|
+
# eligible_optimistic = predicted + margin * (1 - tau) * 0.5 * interval_width.
|
|
235
|
+
# margin >= 0: 0 disables the guard. The (1 - tau) factor keeps the guard gentle at high
|
|
236
|
+
# cost_quality (where the user wants quality) and active at low (cost-leaning). The judge
|
|
237
|
+
# / escalation loop is the safety net that catches an over-optimistic cheap pick.
|
|
238
|
+
minima_collapse_margin: float = 1.0
|
|
239
|
+
|
|
240
|
+
# --- Lever-aware cost (prompt caching) ---
|
|
241
|
+
# When on, the ESTIMATE cost tier prices a cache-supporting model's input at a blend of
|
|
242
|
+
# its cache-read and full rates (assuming the caller applies prompt caching, as the
|
|
243
|
+
# harness does), so ranking can favor a cache-friendly model that is cheaper in practice.
|
|
244
|
+
# Off by default (no behavior change). Observed/rescaled tiers stay evidence-based — they
|
|
245
|
+
# already reflect real caching via the realized cost in feedback, so they self-correct.
|
|
246
|
+
# recommend() also returns `recommended_actions` (e.g. enable_prompt_cache) regardless.
|
|
247
|
+
minima_cost_lever_aware: bool = False
|
|
248
|
+
minima_cost_cache_input_fraction: float = 0.5
|
|
249
|
+
|
|
250
|
+
# --- Neighbor-vote classification ---
|
|
251
|
+
# When the heuristic classifier returns `other`, disambiguate the task_type from the
|
|
252
|
+
# ANN-recalled semantic neighbors' types (free + semantic) instead of (or before) a paid
|
|
253
|
+
# LLM-classify call. Embedding-based routing already happens via recall; this just makes
|
|
254
|
+
# the cluster KEY semantically coherent for ambiguous prompts.
|
|
255
|
+
minima_neighbor_classify: bool = True
|
|
256
|
+
|
|
257
|
+
# --- Shadow bandit (advisory only) ---
|
|
258
|
+
# When on, a UCB contextual-bandit policy computes what it WOULD pick and logs it on the
|
|
259
|
+
# decision row (shadow_chosen_model_id) alongside the deployed conjugate pick. It NEVER
|
|
260
|
+
# overrides the recommendation — it exists so we can measure agreement / regret offline
|
|
261
|
+
# before considering promotion. alpha scales the exploration optimism.
|
|
262
|
+
minima_shadow_bandit: bool = False
|
|
263
|
+
minima_shadow_ucb_alpha: float = 1.0
|
|
264
|
+
|
|
265
|
+
# --- Durable-record fast path ---
|
|
266
|
+
# Dereference the durable (cluster, model) outcome records alongside ANN recall so the
|
|
267
|
+
# highest-signal evidence is always present regardless of embedding noise.
|
|
268
|
+
# off — disabled entirely (no Dereference calls)
|
|
269
|
+
# shadow — fetch and log what ANN missed, but do NOT merge into scoring
|
|
270
|
+
# on — merge dereferenced records into the evidence set
|
|
271
|
+
minima_durable_fastpath: str = "off" # off | shadow | on
|
|
272
|
+
minima_durable_fastpath_max_refs: int = 8
|
|
273
|
+
|
|
274
|
+
# --- Multi-tenancy (T3: hosted, per-org Mubit instance) ---
|
|
275
|
+
# org id used for state partitioning (recstore / propensity) in single-key mode
|
|
276
|
+
minima_default_org_id: str = "default"
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def reasoner_enabled(self) -> bool:
|
|
280
|
+
return self.minima_reasoner_provider.lower() not in ("", "none")
|
|
281
|
+
|
|
282
|
+
def lane(self, namespace: str | None) -> str:
|
|
283
|
+
return f"{self.minima_lane_prefix}:{namespace or 'default'}"
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@lru_cache(maxsize=1)
|
|
287
|
+
def get_settings() -> Settings:
|
|
288
|
+
return Settings()
|
minima/deps.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""FastAPI dependency providers (read singletons stashed on app.state)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fastapi import Request
|
|
6
|
+
|
|
7
|
+
from minima.catalog.store import CatalogStore
|
|
8
|
+
from minima.config import Settings
|
|
9
|
+
from minima.memory.adapter import Memory
|
|
10
|
+
from minima.recommender.engine import Recommender
|
|
11
|
+
from minima.recommender.recstore import LaneCounter, RecStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_settings(request: Request) -> Settings:
|
|
15
|
+
return request.app.state.settings
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_memory(request: Request) -> Memory:
|
|
19
|
+
return request.app.state.memory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_catalog_store(request: Request) -> CatalogStore:
|
|
23
|
+
return request.app.state.catalog_store
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_recstore(request: Request) -> RecStore:
|
|
27
|
+
return request.app.state.recstore
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_lane_counter(request: Request) -> LaneCounter:
|
|
31
|
+
return request.app.state.lane_counter
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_recommender(request: Request) -> Recommender:
|
|
35
|
+
return request.app.state.recommender
|
minima/llm/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Cheap-LLM reasoner: a recommend-only escalation tier for thin/conflicting memory."""
|
minima/llm/anthropic.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Anthropic-backed reasoner (Claude Haiku by default).
|
|
2
|
+
|
|
3
|
+
Uses forced tool use for guaranteed structured output (most robust across SDK
|
|
4
|
+
versions): a single `submit_*` tool with a strict schema, read back from the
|
|
5
|
+
tool_use block's already-parsed `input`. Haiku does not support effort/thinking,
|
|
6
|
+
so neither is set. Any error degrades gracefully to None (caller keeps the
|
|
7
|
+
deterministic recommendation).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
|
|
14
|
+
from anthropic import AsyncAnthropic
|
|
15
|
+
|
|
16
|
+
from minima.llm.base import (
|
|
17
|
+
CLASSIFY_SCHEMA,
|
|
18
|
+
CLASSIFY_SYSTEM,
|
|
19
|
+
RANK_SYSTEM,
|
|
20
|
+
RANKING_SCHEMA,
|
|
21
|
+
CandidateView,
|
|
22
|
+
ReasonerResult,
|
|
23
|
+
build_rank_user,
|
|
24
|
+
parse_classification,
|
|
25
|
+
parse_ranking,
|
|
26
|
+
)
|
|
27
|
+
from minima.logging import get_logger
|
|
28
|
+
from minima.schemas.common import Difficulty, TaskType
|
|
29
|
+
|
|
30
|
+
log = get_logger("minima.llm.anthropic")
|
|
31
|
+
|
|
32
|
+
DEFAULT_MODEL = "claude-haiku-4-5"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AnthropicReasoner:
|
|
36
|
+
def __init__(self, *, model: str, api_key: str, timeout_ms: int, max_tokens: int):
|
|
37
|
+
self._model = model
|
|
38
|
+
self._max_tokens = max_tokens
|
|
39
|
+
self._client = AsyncAnthropic(api_key=api_key, timeout=timeout_ms / 1000.0)
|
|
40
|
+
|
|
41
|
+
async def _tool_call(
|
|
42
|
+
self, *, system: str, user: str, tool_name: str, schema: dict
|
|
43
|
+
) -> dict | None:
|
|
44
|
+
try:
|
|
45
|
+
resp = await self._client.messages.create(
|
|
46
|
+
model=self._model,
|
|
47
|
+
max_tokens=self._max_tokens,
|
|
48
|
+
system=system,
|
|
49
|
+
messages=[{"role": "user", "content": user}],
|
|
50
|
+
tools=[
|
|
51
|
+
{
|
|
52
|
+
"name": tool_name,
|
|
53
|
+
"description": "Submit the structured result.",
|
|
54
|
+
"strict": True,
|
|
55
|
+
"input_schema": schema,
|
|
56
|
+
}
|
|
57
|
+
],
|
|
58
|
+
tool_choice={"type": "tool", "name": tool_name},
|
|
59
|
+
)
|
|
60
|
+
except Exception as exc: # noqa: BLE001 — reasoner must never break a recommendation
|
|
61
|
+
log.warning("reasoner_call_failed", model=self._model, error=str(exc))
|
|
62
|
+
return None
|
|
63
|
+
for block in resp.content:
|
|
64
|
+
if getattr(block, "type", None) == "tool_use":
|
|
65
|
+
# Access via getattr: the SDK's ContentBlock is a union and only the
|
|
66
|
+
# tool_use variant carries `input` (type-narrowing on `.type` isn't seen
|
|
67
|
+
# by the checker). Runtime guard above guarantees it's present.
|
|
68
|
+
data = getattr(block, "input", None)
|
|
69
|
+
return data if isinstance(data, dict) else None
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
async def rank(
|
|
73
|
+
self,
|
|
74
|
+
*,
|
|
75
|
+
task: str,
|
|
76
|
+
task_type: str,
|
|
77
|
+
difficulty: str,
|
|
78
|
+
candidates: Sequence[CandidateView],
|
|
79
|
+
memory_block: str,
|
|
80
|
+
cost_quality_tradeoff: float,
|
|
81
|
+
) -> ReasonerResult | None:
|
|
82
|
+
user = build_rank_user(
|
|
83
|
+
task=task,
|
|
84
|
+
task_type=task_type,
|
|
85
|
+
difficulty=difficulty,
|
|
86
|
+
candidates=candidates,
|
|
87
|
+
memory_block=memory_block,
|
|
88
|
+
cost_quality_tradeoff=cost_quality_tradeoff,
|
|
89
|
+
)
|
|
90
|
+
data = await self._tool_call(
|
|
91
|
+
system=RANK_SYSTEM, user=user, tool_name="submit_ranking", schema=RANKING_SCHEMA
|
|
92
|
+
)
|
|
93
|
+
if data is None:
|
|
94
|
+
return None
|
|
95
|
+
return parse_ranking(data, {c.model_id for c in candidates})
|
|
96
|
+
|
|
97
|
+
async def classify(self, *, task: str) -> tuple[TaskType, Difficulty] | None:
|
|
98
|
+
data = await self._tool_call(
|
|
99
|
+
system=CLASSIFY_SYSTEM,
|
|
100
|
+
user=f"Classify this task:\n\n{task[:2000]}",
|
|
101
|
+
tool_name="submit_classification",
|
|
102
|
+
schema=CLASSIFY_SCHEMA,
|
|
103
|
+
)
|
|
104
|
+
if data is None:
|
|
105
|
+
return None
|
|
106
|
+
return parse_classification(data)
|
minima/llm/base.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Reasoner protocol, shared prompt construction, and strict-output parsing.
|
|
2
|
+
|
|
3
|
+
The reasoner is consulted ONLY when memory evidence is thin or conflicting. It ranks
|
|
4
|
+
candidate models for a task; it never writes prompts, runs models, or does the task.
|
|
5
|
+
Its estimates are blended with the deterministic ones — it advises, it does not decide.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any, Protocol, runtime_checkable
|
|
14
|
+
|
|
15
|
+
from minima.memory.records import clamp01
|
|
16
|
+
from minima.schemas.common import Difficulty, TaskType
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class CandidateView:
|
|
21
|
+
"""The view of a candidate model handed to the reasoner."""
|
|
22
|
+
|
|
23
|
+
model_id: str
|
|
24
|
+
provider: str
|
|
25
|
+
input_cost_per_mtok: float
|
|
26
|
+
output_cost_per_mtok: float
|
|
27
|
+
context_window: int
|
|
28
|
+
capability_prior: float
|
|
29
|
+
est_cost_usd: float
|
|
30
|
+
predicted_success: float
|
|
31
|
+
# Observed latency percentile (ms) from similar past outcomes; None without evidence.
|
|
32
|
+
est_latency_ms: float | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class ReasonerRanking:
|
|
37
|
+
model_id: str
|
|
38
|
+
predicted_success: float
|
|
39
|
+
rationale: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(slots=True)
|
|
43
|
+
class ReasonerResult:
|
|
44
|
+
rankings: list[ReasonerRanking]
|
|
45
|
+
recommended: str | None = None
|
|
46
|
+
fallback: str | None = None
|
|
47
|
+
|
|
48
|
+
def by_model(self) -> dict[str, ReasonerRanking]:
|
|
49
|
+
return {r.model_id: r for r in self.rankings}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@runtime_checkable
|
|
53
|
+
class Reasoner(Protocol):
|
|
54
|
+
async def rank(
|
|
55
|
+
self,
|
|
56
|
+
*,
|
|
57
|
+
task: str,
|
|
58
|
+
task_type: str,
|
|
59
|
+
difficulty: str,
|
|
60
|
+
candidates: Sequence[CandidateView],
|
|
61
|
+
memory_block: str,
|
|
62
|
+
cost_quality_tradeoff: float,
|
|
63
|
+
) -> ReasonerResult | None: ...
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# --- structured output schemas (additionalProperties:false everywhere for strict mode) ---
|
|
67
|
+
|
|
68
|
+
RANKING_SCHEMA: dict[str, Any] = {
|
|
69
|
+
"type": "object",
|
|
70
|
+
"properties": {
|
|
71
|
+
"recommended": {"type": "string"},
|
|
72
|
+
"fallback": {"type": ["string", "null"]},
|
|
73
|
+
"ranking": {
|
|
74
|
+
"type": "array",
|
|
75
|
+
"items": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"properties": {
|
|
78
|
+
"model_id": {"type": "string"},
|
|
79
|
+
"predicted_success": {"type": "number"},
|
|
80
|
+
"rationale": {"type": "string"},
|
|
81
|
+
},
|
|
82
|
+
"required": ["model_id", "predicted_success", "rationale"],
|
|
83
|
+
"additionalProperties": False,
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
"required": ["recommended", "fallback", "ranking"],
|
|
88
|
+
"additionalProperties": False,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
CLASSIFY_SCHEMA: dict[str, Any] = {
|
|
92
|
+
"type": "object",
|
|
93
|
+
"properties": {
|
|
94
|
+
"task_type": {"type": "string", "enum": [t.value for t in TaskType]},
|
|
95
|
+
"difficulty": {"type": "string", "enum": [d.value for d in Difficulty]},
|
|
96
|
+
},
|
|
97
|
+
"required": ["task_type", "difficulty"],
|
|
98
|
+
"additionalProperties": False,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
RANK_SYSTEM = (
|
|
102
|
+
"You are a model-selection advisor for an LLM cost-optimization service. "
|
|
103
|
+
"Given a task, a table of candidate models (id, provider, token prices, a capability "
|
|
104
|
+
"prior in [0,1], and a current estimated success in [0,1]), and a memory block of past "
|
|
105
|
+
"outcomes on similar tasks, rank the candidates by how likely each is to complete THIS "
|
|
106
|
+
"task well. Prefer cheaper models when their expected quality is adequate for the "
|
|
107
|
+
"requested cost/quality tradeoff (0 = cheapest acceptable, 10 = highest quality). "
|
|
108
|
+
"You do NOT write prompts, run models, or perform the task — you only rank models. "
|
|
109
|
+
"Return predicted_success in [0,1] for each candidate via the submit_ranking tool."
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
CLASSIFY_SYSTEM = (
|
|
113
|
+
"Classify an LLM task by type and difficulty for routing. Respond only via the tool."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def build_rank_user(
|
|
118
|
+
*,
|
|
119
|
+
task: str,
|
|
120
|
+
task_type: str,
|
|
121
|
+
difficulty: str,
|
|
122
|
+
candidates: Sequence[CandidateView],
|
|
123
|
+
memory_block: str,
|
|
124
|
+
cost_quality_tradeoff: float,
|
|
125
|
+
) -> str:
|
|
126
|
+
table = [
|
|
127
|
+
{
|
|
128
|
+
"model_id": c.model_id,
|
|
129
|
+
"provider": c.provider,
|
|
130
|
+
"input_per_mtok": round(c.input_cost_per_mtok, 4),
|
|
131
|
+
"output_per_mtok": round(c.output_cost_per_mtok, 4),
|
|
132
|
+
"context_window": c.context_window,
|
|
133
|
+
"capability_prior": round(c.capability_prior, 3),
|
|
134
|
+
"current_estimate": round(c.predicted_success, 3),
|
|
135
|
+
"est_cost_usd": round(c.est_cost_usd, 6),
|
|
136
|
+
**(
|
|
137
|
+
{"observed_latency_ms": round(c.est_latency_ms, 0)}
|
|
138
|
+
if c.est_latency_ms is not None
|
|
139
|
+
else {}
|
|
140
|
+
),
|
|
141
|
+
}
|
|
142
|
+
for c in candidates
|
|
143
|
+
]
|
|
144
|
+
memory_section = memory_block.strip() or "(no past outcomes recalled)"
|
|
145
|
+
return (
|
|
146
|
+
f"task_type: {task_type}\ndifficulty: {difficulty}\n"
|
|
147
|
+
f"cost_quality_tradeoff: {cost_quality_tradeoff}\n\n"
|
|
148
|
+
f"TASK:\n{task[:2000]}\n\n"
|
|
149
|
+
f"CANDIDATE MODELS:\n{json.dumps(table, indent=2)}\n\n"
|
|
150
|
+
f"MEMORY OF PAST OUTCOMES:\n{memory_section[:4000]}"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def parse_ranking(data: Any, valid_ids: set[str]) -> ReasonerResult | None:
|
|
155
|
+
if not isinstance(data, dict):
|
|
156
|
+
return None
|
|
157
|
+
rankings: list[ReasonerRanking] = []
|
|
158
|
+
for item in data.get("ranking") or []:
|
|
159
|
+
if not isinstance(item, dict):
|
|
160
|
+
continue
|
|
161
|
+
model_id = item.get("model_id")
|
|
162
|
+
if model_id not in valid_ids:
|
|
163
|
+
continue
|
|
164
|
+
rankings.append(
|
|
165
|
+
ReasonerRanking(
|
|
166
|
+
model_id=str(model_id),
|
|
167
|
+
predicted_success=clamp01(_as_float(item.get("predicted_success"))),
|
|
168
|
+
rationale=str(item.get("rationale", ""))[:300],
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
recommended = data.get("recommended")
|
|
172
|
+
fallback = data.get("fallback")
|
|
173
|
+
result = ReasonerResult(
|
|
174
|
+
rankings=rankings,
|
|
175
|
+
recommended=recommended if recommended in valid_ids else None,
|
|
176
|
+
fallback=fallback if fallback in valid_ids else None,
|
|
177
|
+
)
|
|
178
|
+
if not result.rankings and result.recommended is None:
|
|
179
|
+
return None
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def parse_classification(data: Any) -> tuple[TaskType, Difficulty] | None:
|
|
184
|
+
if not isinstance(data, dict):
|
|
185
|
+
return None
|
|
186
|
+
try:
|
|
187
|
+
return TaskType(data["task_type"]), Difficulty(data["difficulty"])
|
|
188
|
+
except (KeyError, ValueError):
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _as_float(value: Any, default: float = 0.0) -> float:
|
|
193
|
+
try:
|
|
194
|
+
return float(value)
|
|
195
|
+
except (TypeError, ValueError):
|
|
196
|
+
return default
|