minima-cli 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. minima/__init__.py +5 -0
  2. minima/api/__init__.py +1 -0
  3. minima/api/auth.py +39 -0
  4. minima/api/errors.py +40 -0
  5. minima/api/routers/__init__.py +1 -0
  6. minima/api/routers/calibration.py +50 -0
  7. minima/api/routers/feedback.py +279 -0
  8. minima/api/routers/health.py +50 -0
  9. minima/api/routers/models.py +42 -0
  10. minima/api/routers/recommend.py +66 -0
  11. minima/api/routers/savings.py +55 -0
  12. minima/api/routers/strategies.py +33 -0
  13. minima/catalog/__init__.py +1 -0
  14. minima/catalog/data/capability_priors.json +210 -0
  15. minima/catalog/data/model_aliases.json +12 -0
  16. minima/catalog/merge.py +69 -0
  17. minima/catalog/refresh.py +54 -0
  18. minima/catalog/sources/__init__.py +1 -0
  19. minima/catalog/sources/litellm.py +19 -0
  20. minima/catalog/sources/openrouter.py +25 -0
  21. minima/catalog/store.py +86 -0
  22. minima/config.py +288 -0
  23. minima/deps.py +35 -0
  24. minima/llm/__init__.py +1 -0
  25. minima/llm/anthropic.py +106 -0
  26. minima/llm/base.py +196 -0
  27. minima/llm/gemini.py +124 -0
  28. minima/llm/registry.py +54 -0
  29. minima/logging.py +28 -0
  30. minima/main.py +109 -0
  31. minima/memory/__init__.py +1 -0
  32. minima/memory/adapter.py +572 -0
  33. minima/memory/keys.py +83 -0
  34. minima/memory/records.py +190 -0
  35. minima/memory/threadpool.py +41 -0
  36. minima/metrics/__init__.py +1 -0
  37. minima/metrics/calibration.py +415 -0
  38. minima/metrics/report.py +116 -0
  39. minima/metrics/savings.py +98 -0
  40. minima/recommender/__init__.py +1 -0
  41. minima/recommender/_pg_pool.py +38 -0
  42. minima/recommender/_redis_client.py +32 -0
  43. minima/recommender/aggregate.py +157 -0
  44. minima/recommender/classify.py +165 -0
  45. minima/recommender/decisionlog.py +505 -0
  46. minima/recommender/durablerefs.py +312 -0
  47. minima/recommender/engine.py +997 -0
  48. minima/recommender/escalation.py +83 -0
  49. minima/recommender/propensity.py +189 -0
  50. minima/recommender/recstore.py +368 -0
  51. minima/recommender/score.py +318 -0
  52. minima/recommender/types.py +166 -0
  53. minima/schemas/__init__.py +1 -0
  54. minima/schemas/common.py +73 -0
  55. minima/schemas/feedback.py +34 -0
  56. minima/schemas/models_catalog.py +36 -0
  57. minima/schemas/recommend.py +104 -0
  58. minima/schemas/savings.py +39 -0
  59. minima/schemas/strategies.py +57 -0
  60. minima/schemas/workflow.py +43 -0
  61. minima/seeding/__init__.py +1 -0
  62. minima/seeding/items.py +42 -0
  63. minima/seeding/llmrouterbench.py +232 -0
  64. minima/seeding/routerbench.py +141 -0
  65. minima/seeding/run_seed.py +56 -0
  66. minima/seeding/synthetic.py +70 -0
  67. minima/tenancy/__init__.py +8 -0
  68. minima/tenancy/context.py +37 -0
  69. minima/tenancy/passthrough.py +110 -0
  70. minima/version.py +3 -0
  71. minima_cli-0.4.9.dist-info/METADATA +275 -0
  72. minima_cli-0.4.9.dist-info/RECORD +161 -0
  73. minima_cli-0.4.9.dist-info/WHEEL +4 -0
  74. minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
  75. minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
  76. minima_client/__init__.py +19 -0
  77. minima_client/autocapture.py +101 -0
  78. minima_client/client.py +301 -0
  79. minima_client/errors.py +23 -0
  80. minima_harness/LICENSE_PI +32 -0
  81. minima_harness/__init__.py +16 -0
  82. minima_harness/agent/__init__.py +72 -0
  83. minima_harness/agent/agent.py +276 -0
  84. minima_harness/agent/events.py +124 -0
  85. minima_harness/agent/loop.py +311 -0
  86. minima_harness/agent/state.py +79 -0
  87. minima_harness/agent/tools.py +97 -0
  88. minima_harness/ai/__init__.py +66 -0
  89. minima_harness/ai/compat.py +71 -0
  90. minima_harness/ai/errors.py +96 -0
  91. minima_harness/ai/events.py +117 -0
  92. minima_harness/ai/openrouter_catalog.py +153 -0
  93. minima_harness/ai/provider_catalog.py +299 -0
  94. minima_harness/ai/provider_quirks.py +37 -0
  95. minima_harness/ai/providers/__init__.py +75 -0
  96. minima_harness/ai/providers/_common.py +48 -0
  97. minima_harness/ai/providers/anthropic.py +290 -0
  98. minima_harness/ai/providers/base.py +65 -0
  99. minima_harness/ai/providers/faux.py +173 -0
  100. minima_harness/ai/providers/google.py +221 -0
  101. minima_harness/ai/providers/openai_compat.py +278 -0
  102. minima_harness/ai/registry.py +184 -0
  103. minima_harness/ai/stream.py +82 -0
  104. minima_harness/ai/tools.py +51 -0
  105. minima_harness/ai/types.py +204 -0
  106. minima_harness/ai/usage.py +41 -0
  107. minima_harness/minima/__init__.py +40 -0
  108. minima_harness/minima/cache.py +102 -0
  109. minima_harness/minima/config.py +85 -0
  110. minima_harness/minima/goals.py +226 -0
  111. minima_harness/minima/judge.py +144 -0
  112. minima_harness/minima/mapping.py +147 -0
  113. minima_harness/minima/meter.py +143 -0
  114. minima_harness/minima/router.py +220 -0
  115. minima_harness/minima/runtime.py +544 -0
  116. minima_harness/minima/signals.py +195 -0
  117. minima_harness/session/__init__.py +14 -0
  118. minima_harness/session/format.py +35 -0
  119. minima_harness/session/store.py +236 -0
  120. minima_harness/tasks/__init__.py +17 -0
  121. minima_harness/tasks/task_set.py +78 -0
  122. minima_harness/tools/__init__.py +7 -0
  123. minima_harness/tools/_io.py +34 -0
  124. minima_harness/tools/bash.py +70 -0
  125. minima_harness/tools/builtin.py +23 -0
  126. minima_harness/tools/edit.py +50 -0
  127. minima_harness/tools/find.py +38 -0
  128. minima_harness/tools/grep.py +73 -0
  129. minima_harness/tools/ls.py +35 -0
  130. minima_harness/tools/read.py +38 -0
  131. minima_harness/tools/tasks.py +75 -0
  132. minima_harness/tools/write.py +36 -0
  133. minima_harness/tui/__init__.py +3 -0
  134. minima_harness/tui/analytics.py +111 -0
  135. minima_harness/tui/app.py +1927 -0
  136. minima_harness/tui/bridge.py +103 -0
  137. minima_harness/tui/cli.py +227 -0
  138. minima_harness/tui/clipboard.py +60 -0
  139. minima_harness/tui/commands.py +49 -0
  140. minima_harness/tui/compaction.py +17 -0
  141. minima_harness/tui/config_cli.py +141 -0
  142. minima_harness/tui/config_store.py +237 -0
  143. minima_harness/tui/context.py +93 -0
  144. minima_harness/tui/customize.py +95 -0
  145. minima_harness/tui/diff.py +53 -0
  146. minima_harness/tui/editor.py +43 -0
  147. minima_harness/tui/extensions.py +84 -0
  148. minima_harness/tui/extra_models.py +52 -0
  149. minima_harness/tui/history.py +71 -0
  150. minima_harness/tui/mubit.py +295 -0
  151. minima_harness/tui/overlays.py +593 -0
  152. minima_harness/tui/packages.py +59 -0
  153. minima_harness/tui/run_modes.py +66 -0
  154. minima_harness/tui/theme.py +77 -0
  155. minima_harness/tui/welcome.py +83 -0
  156. minima_harness/tui/widgets/__init__.py +3 -0
  157. minima_harness/tui/widgets/banner.py +38 -0
  158. minima_harness/tui/widgets/editor.py +83 -0
  159. minima_harness/tui/widgets/footer.py +73 -0
  160. minima_harness/tui/widgets/messages.py +151 -0
  161. minima_harness/tui/widgets/status.py +57 -0
minima/config.py ADDED
@@ -0,0 +1,288 @@
1
+ """Environment-driven configuration.
2
+
3
+ Every setting is read from an environment variable with the same (case-insensitive)
4
+ name, optionally from a local ``.env`` file. The only required value is ``MUBIT_API_KEY``.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from functools import lru_cache
10
+
11
+ from pydantic_settings import BaseSettings, SettingsConfigDict
12
+
13
+
14
+ class Settings(BaseSettings):
15
+ model_config = SettingsConfigDict(
16
+ env_file=".env",
17
+ env_file_encoding="utf-8",
18
+ extra="ignore",
19
+ case_sensitive=False,
20
+ )
21
+
22
+ # --- Mubit memory backend ---
23
+ mubit_endpoint: str = "http://127.0.0.1:3000"
24
+ mubit_api_key: str | None = None
25
+ mubit_transport: str = "auto" # auto | grpc | http
26
+ mubit_timeout_ms: int = 30_000
27
+
28
+ # --- Memory read path ---
29
+ minima_memory_recall_timeout_ms: int = 2500
30
+ minima_memory_recall_limit: int = 25
31
+ # direct_bypass is faster but requires enable_direct_search=true on the Mubit instance
32
+ # (off by default on hosted api.mubit.ai). agent_routed works on all instance types.
33
+ minima_recall_mode: str = "agent_routed" # agent_routed | direct_bypass
34
+ minima_lane_prefix: str = "minima"
35
+ minima_seed_lane: str = "minima:default"
36
+ # LTM entry-type filter on recall. Minima evidence lives under exactly two types
37
+ # (seeds ingest as "fact", feedback as "observation"); filtering at the server keeps
38
+ # traces/lessons/etc. out of the candidate pool. Empty string = no filter (legacy).
39
+ minima_recall_entry_types: str = "fact,observation"
40
+ # Server-side ranking strategy: "relevance" | "freshness" | "balanced" | "" (omit).
41
+ # "balanced" lets recency influence WHICH neighbors are retrieved; how much each
42
+ # neighbor then counts is the client-side age decay (see evidence half-life below) —
43
+ # "freshness" on top of that decay would double-discount old evidence.
44
+ minima_recall_rank_by: str = "balanced"
45
+ # Hard recency window: only recall evidence from the last N days (0 = no window).
46
+ minima_recall_max_age_days: int = 0
47
+ # Mubit search budget tier: "low" | "mid" | "high" ("" = server default).
48
+ minima_recall_budget: str = "mid"
49
+ # Request per-evidence score breakdowns (ExplainInfo) and log them. Diagnostic;
50
+ # adds payload weight, keep off in prod unless investigating recall quality.
51
+ minima_recall_explain: bool = False
52
+
53
+ # --- Recommender tuning ---
54
+ minima_tau_min: float = 0.55
55
+ minima_tau_max: float = 0.92
56
+ minima_beta_pseudocount: float = 2.5
57
+ minima_escalation_w_min: float = 1.5
58
+ minima_escalation_n_min: int = 3
59
+ minima_escalation_c_min: float = 0.45
60
+ minima_escalation_tie_delta: float = 0.05
61
+ # Escalation trigger mode. "legacy" = the four independent heuristics. "uncertainty"
62
+ # replaces thin_evidence + low_confidence with a single posterior-interval-width gate
63
+ # on the recommended candidate (conflict stays as a hard override; tie is kept — it
64
+ # captures rank instability the interval doesn't). Shadow "uncertainty" before
65
+ # switching the default.
66
+ minima_escalation_mode: str = "legacy" # legacy | uncertainty
67
+ minima_escalation_interval_width: float = 0.25
68
+ # "near_threshold" trigger: escalate when the recommended model's predicted success is
69
+ # within this margin above tau — a fragile pick that one more failure round would drop.
70
+ # 0.0 = disabled. Recommended starting value: 0.10.
71
+ minima_escalation_near_threshold_delta: float = 0.10
72
+ minima_default_input_tokens: int = 1500
73
+ minima_default_output_tokens: int = 500
74
+ minima_reflect_every_n: int = 25
75
+ # Rank eligible models by OBSERVED avg $/call from recalled outcomes (Mubit stores
76
+ # cost_usd per outcome) instead of a flat token estimate. The estimate assumes a fixed
77
+ # completion length and so ignores reasoning/thinking tokens, which can mis-rank a
78
+ # cheap-listed model that is expensive in practice (e.g. a "flash" model that spends
79
+ # heavily on internal reasoning). Falls back to the estimate when fewer than
80
+ # minima_observed_cost_min_n cost observations exist for the candidate.
81
+ minima_use_observed_cost: bool = True
82
+ minima_observed_cost_min_n: int = 3
83
+ # Evidence age decay: each recalled outcome's weight halves every half-life. Replaces
84
+ # the old binary stale 0.5x for records that carry a recorded_at timestamp; supersession
85
+ # (is_stale) still caps the multiplier at 0.5. knowledge_confidence is deliberately NOT
86
+ # touched — its server-side recency component reflects *reinforcement* recency, while
87
+ # this decay reflects *observation* age (distinct signals; multiplying both is intended,
88
+ # adding extra recency factors on top is not).
89
+ minima_evidence_half_life_days: float = 30.0
90
+ minima_evidence_decay_floor: float = 0.1
91
+ # Seed-vs-live weighting: seeded outcomes (source_dataset set) count at this weight,
92
+ # decaying linearly to zero once a model has crowdout_n live outcomes in the recalled
93
+ # set — live evidence replaces the bootstrap instead of competing with it forever.
94
+ minima_seed_weight: float = 0.5
95
+ minima_seed_crowdout_n: int = 5
96
+ # Latency-aware ranking: annotate candidates with a robust observed latency percentile
97
+ # and enforce Constraints.max_latency_ms against it (only for candidates with at least
98
+ # min_n latency observations — a model is never excluded without evidence).
99
+ minima_latency_percentile: float = 0.75
100
+ minima_latency_min_n: int = 3
101
+ # Default-output-token multipliers by classified difficulty, applied when the caller
102
+ # does not supply expected_output_tokens (affects the "estimate" cost basis only).
103
+ minima_difficulty_output_multipliers: dict[str, float] = {
104
+ "trivial": 0.5,
105
+ "easy": 0.75,
106
+ "medium": 1.0,
107
+ "hard": 1.5,
108
+ "expert": 2.0,
109
+ }
110
+
111
+ # --- Cheap-LLM reasoner (recommend-only) ---
112
+ minima_reasoner_provider: str = "none" # none | anthropic | gemini
113
+ minima_reasoner_model: str | None = None # default per provider (anthropic -> claude-haiku-4-5)
114
+ # The reasoner is the explicit slow tier (only consulted on escalation): a real
115
+ # ranking call with structured output takes ~6-8s, so a tight budget makes it time
116
+ # out and silently degrade. This is per-attempt; it never touches the caller's own
117
+ # LLM call (Minima adds zero latency there).
118
+ minima_reasoner_timeout_ms: int = 15_000
119
+ # A hard output cap (the reasoner stops early when done). Gemini 3.x "flash" spends
120
+ # output tokens on internal reasoning before emitting the JSON, so a small cap
121
+ # truncates the structured response — keep headroom. Anthropic forced-tool-use is
122
+ # compact and won't approach this.
123
+ minima_reasoner_max_tokens: int = 4096
124
+ minima_reasoner_blend: float = 0.5 # weight on the LLM estimate vs the deterministic one
125
+ # Adaptive blend: weight the LLM estimate by how thin the deterministic evidence is
126
+ # (blend = blend_max * (1 - confidence), clamped to [0.1, 0.9]) instead of the fixed
127
+ # minima_reasoner_blend. Heavy evidence barely moves; cold candidates lean on the LLM.
128
+ minima_reasoner_blend_adaptive: bool = True
129
+ minima_reasoner_blend_max: float = 0.8
130
+ minima_reasoner_classify: bool = True # let the reasoner refine ambiguous task classification
131
+ anthropic_api_key: str | None = None
132
+ gemini_api_key: str | None = None
133
+
134
+ # --- Selection-bias correction (inverse propensity weighting) ---
135
+ minima_ipw_enabled: bool = True
136
+ minima_ipw_clip_low: float = 0.1
137
+ minima_ipw_clip_high: float = 10.0
138
+
139
+ # --- Learning maturity ---
140
+ # Cluster granularity controls the upsert grouping (one durable record per cluster+model).
141
+ # "coarse" = task_type:difficulty; "fine" appends a salient-keyword signature bucket so
142
+ # topically-distinct tasks of the same type/difficulty accumulate separately.
143
+ minima_cluster_granularity: str = "coarse" # coarse | fine
144
+ minima_cluster_signature_tokens: int = 4
145
+ # Promote a verified-in-production strong success to a durable Lesson (feeds reflect()).
146
+ minima_lesson_on_verified_prod: bool = True
147
+ minima_lesson_min_quality: float = 0.8
148
+ # Optimistic exploration bonus added to under-explored candidates' predicted success,
149
+ # scaled by their uncertainty. 0.0 = off (no exploration; pure exploitation).
150
+ minima_exploration_bonus: float = 0.0
151
+
152
+ # --- Catalog ---
153
+ minima_catalog_refresh_seconds: int = 21_600
154
+ minima_catalog_stale_after_seconds: int = 86_400
155
+ minima_litellm_prices_url: str = (
156
+ "https://raw.githubusercontent.com/BerriAI/litellm/main/"
157
+ "model_prices_and_context_window.json"
158
+ )
159
+ minima_openrouter_models_url: str = "https://openrouter.ai/api/v1/models"
160
+ openrouter_api_key: str | None = None
161
+
162
+ # --- Service ---
163
+ minima_host: str = "0.0.0.0"
164
+ minima_port: int = 8080
165
+ minima_log_level: str = "info"
166
+ # memory | sqlite | cloudsql — controls DecisionLog, Propensity, and (unless
167
+ # MINIMA_RECSTORE_BACKEND overrides) RecStore + DurableRefs.
168
+ minima_recommendation_store: str = "memory"
169
+ # 7 days: feedback often arrives well after the recommendation (batch evals, prod
170
+ # verification). Past the TTL the late-feedback degraded path still accepts the
171
+ # outcome (without neighbor attribution) via the decision log.
172
+ minima_recommendation_ttl_seconds: int = 604_800
173
+ minima_sqlite_path: str = "minima_state.db" # durable recstore + propensity backing file
174
+
175
+ # --- Persistent store backends (Cloud SQL + Redis) ---
176
+ # PostgreSQL DSN for DecisionLog, Propensity, and optionally RecStore + DurableRefs.
177
+ # Cloud Run format: postgresql://user:pass@/dbname?host=/cloudsql/PROJECT:REGION:INSTANCE
178
+ minima_database_url: str | None = None
179
+ # Redis URL for RecStore + DurableRefs when MINIMA_RECSTORE_BACKEND=redis.
180
+ minima_redis_url: str = "redis://localhost:6379/0"
181
+ # Backend override for RecStore + DurableRefs only (memory | sqlite | cloudsql | redis).
182
+ # Empty string means inherit from MINIMA_RECOMMENDATION_STORE.
183
+ minima_recstore_backend: str = ""
184
+ # Accept feedback whose recommendation_id has expired from the recstore by falling
185
+ # back to the decision log: the outcome record is still written (the durable
186
+ # (cluster, model) upsert), but neighbor attribution and lesson promotion are skipped.
187
+ minima_late_feedback_enabled: bool = True
188
+
189
+ # --- Decision logging & off-policy evaluation ---
190
+ # Every recommendation is logged (candidate set, propensity vector, tau, baselines)
191
+ # and reconciled with realized outcomes at feedback time. This powers /v1/savings,
192
+ # /v1/calibration, feedback-coverage, and offline policy evaluation.
193
+ minima_decision_log_retention_days: int = 90
194
+ # Orgs (comma-separated) that opt into epsilon-stochastic selection: with probability
195
+ # epsilon the pick is sampled from a softmax over the tau-ELIGIBLE candidates instead
196
+ # of the strict cheapest-eligible. Makes logged propensities non-degenerate so IPW and
197
+ # off-policy evaluation are valid. Default: nobody (deterministic argmin everywhere).
198
+ minima_epsilon_selection_orgs: str = ""
199
+ minima_epsilon: float = 0.03
200
+ minima_epsilon_softmax_temperature: float = 0.1
201
+ # Orgs (comma-separated) that opt into Thompson (posterior-sampling) selection instead of
202
+ # epsilon-softmax: each decision samples theta_m ~ Beta(alpha_m, beta_m) and picks the
203
+ # cheapest model clearing tau under the sample. Monte-Carlo selection frequencies are
204
+ # logged as propensities so IPW/OPE stay valid. Takes precedence over epsilon if both set.
205
+ minima_thompson_selection_orgs: str = ""
206
+ minima_thompson_samples: int = 128
207
+
208
+ # --- Calibration monitoring ---
209
+ minima_calibration_window_days: int = 30
210
+ minima_calibration_shrinkage_k: float = 20.0
211
+ minima_calibration_bins: int = 10
212
+ # CUSUM slack/threshold sized for BINARY residuals: a single failure on a 0.8
213
+ # prediction is a 0.8 residual, so the slack must absorb routine noise (k ~ 0.5
214
+ # sigma ~ 0.25) and the threshold must require a sustained run (h ~ 4-5 sigma).
215
+ # Smaller values flag every healthy stream.
216
+ minima_cusum_k: float = 0.25
217
+ minima_cusum_h: float = 2.0
218
+
219
+ # --- Calibration APPLY (remap predicted_success before the tau decision) ---
220
+ # The monitoring above MEASURES calibration; these control whether a fitted isotonic
221
+ # remap is actually applied so predicted_success is a truthful probability. Safe by
222
+ # construction: with < min_n reconciled outcomes the fit returns identity (no-op), and
223
+ # each slice shrinks toward identity by n/(n+shrinkage_k). Reuses the calibration
224
+ # window + shrinkage_k above. Refit is lazy and cached per Recommender (org).
225
+ minima_calibration_apply: bool = True
226
+ minima_calibration_min_n: int = 30
227
+ minima_calibration_refresh_seconds: int = 600
228
+
229
+ # --- Routing-collapse margin guard ---
230
+ # Scalar-score + cheapest-clearing-tau can collapse to the single most expensive model
231
+ # at high quality bars (arXiv 2602.03478). When the cheapest-eligible pick IS the
232
+ # priciest candidate, prefer a cheaper candidate whose success credible interval could
233
+ # still clear tau. The optimism is TAU-AWARE so it shrinks as the quality bar rises:
234
+ # eligible_optimistic = predicted + margin * (1 - tau) * 0.5 * interval_width.
235
+ # margin >= 0: 0 disables the guard. The (1 - tau) factor keeps the guard gentle at high
236
+ # cost_quality (where the user wants quality) and active at low (cost-leaning). The judge
237
+ # / escalation loop is the safety net that catches an over-optimistic cheap pick.
238
+ minima_collapse_margin: float = 1.0
239
+
240
+ # --- Lever-aware cost (prompt caching) ---
241
+ # When on, the ESTIMATE cost tier prices a cache-supporting model's input at a blend of
242
+ # its cache-read and full rates (assuming the caller applies prompt caching, as the
243
+ # harness does), so ranking can favor a cache-friendly model that is cheaper in practice.
244
+ # Off by default (no behavior change). Observed/rescaled tiers stay evidence-based — they
245
+ # already reflect real caching via the realized cost in feedback, so they self-correct.
246
+ # recommend() also returns `recommended_actions` (e.g. enable_prompt_cache) regardless.
247
+ minima_cost_lever_aware: bool = False
248
+ minima_cost_cache_input_fraction: float = 0.5
249
+
250
+ # --- Neighbor-vote classification ---
251
+ # When the heuristic classifier returns `other`, disambiguate the task_type from the
252
+ # ANN-recalled semantic neighbors' types (free + semantic) instead of (or before) a paid
253
+ # LLM-classify call. Embedding-based routing already happens via recall; this just makes
254
+ # the cluster KEY semantically coherent for ambiguous prompts.
255
+ minima_neighbor_classify: bool = True
256
+
257
+ # --- Shadow bandit (advisory only) ---
258
+ # When on, a UCB contextual-bandit policy computes what it WOULD pick and logs it on the
259
+ # decision row (shadow_chosen_model_id) alongside the deployed conjugate pick. It NEVER
260
+ # overrides the recommendation — it exists so we can measure agreement / regret offline
261
+ # before considering promotion. alpha scales the exploration optimism.
262
+ minima_shadow_bandit: bool = False
263
+ minima_shadow_ucb_alpha: float = 1.0
264
+
265
+ # --- Durable-record fast path ---
266
+ # Dereference the durable (cluster, model) outcome records alongside ANN recall so the
267
+ # highest-signal evidence is always present regardless of embedding noise.
268
+ # off — disabled entirely (no Dereference calls)
269
+ # shadow — fetch and log what ANN missed, but do NOT merge into scoring
270
+ # on — merge dereferenced records into the evidence set
271
+ minima_durable_fastpath: str = "off" # off | shadow | on
272
+ minima_durable_fastpath_max_refs: int = 8
273
+
274
+ # --- Multi-tenancy (T3: hosted, per-org Mubit instance) ---
275
+ # org id used for state partitioning (recstore / propensity) in single-key mode
276
+ minima_default_org_id: str = "default"
277
+
278
+ @property
279
+ def reasoner_enabled(self) -> bool:
280
+ return self.minima_reasoner_provider.lower() not in ("", "none")
281
+
282
+ def lane(self, namespace: str | None) -> str:
283
+ return f"{self.minima_lane_prefix}:{namespace or 'default'}"
284
+
285
+
286
+ @lru_cache(maxsize=1)
287
+ def get_settings() -> Settings:
288
+ return Settings()
minima/deps.py ADDED
@@ -0,0 +1,35 @@
1
+ """FastAPI dependency providers (read singletons stashed on app.state)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import Request
6
+
7
+ from minima.catalog.store import CatalogStore
8
+ from minima.config import Settings
9
+ from minima.memory.adapter import Memory
10
+ from minima.recommender.engine import Recommender
11
+ from minima.recommender.recstore import LaneCounter, RecStore
12
+
13
+
14
+ def get_settings(request: Request) -> Settings:
15
+ return request.app.state.settings
16
+
17
+
18
+ def get_memory(request: Request) -> Memory:
19
+ return request.app.state.memory
20
+
21
+
22
+ def get_catalog_store(request: Request) -> CatalogStore:
23
+ return request.app.state.catalog_store
24
+
25
+
26
+ def get_recstore(request: Request) -> RecStore:
27
+ return request.app.state.recstore
28
+
29
+
30
+ def get_lane_counter(request: Request) -> LaneCounter:
31
+ return request.app.state.lane_counter
32
+
33
+
34
+ def get_recommender(request: Request) -> Recommender:
35
+ return request.app.state.recommender
minima/llm/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Cheap-LLM reasoner: a recommend-only escalation tier for thin/conflicting memory."""
@@ -0,0 +1,106 @@
1
+ """Anthropic-backed reasoner (Claude Haiku by default).
2
+
3
+ Uses forced tool use for guaranteed structured output (most robust across SDK
4
+ versions): a single `submit_*` tool with a strict schema, read back from the
5
+ tool_use block's already-parsed `input`. Haiku does not support effort/thinking,
6
+ so neither is set. Any error degrades gracefully to None (caller keeps the
7
+ deterministic recommendation).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Sequence
13
+
14
+ from anthropic import AsyncAnthropic
15
+
16
+ from minima.llm.base import (
17
+ CLASSIFY_SCHEMA,
18
+ CLASSIFY_SYSTEM,
19
+ RANK_SYSTEM,
20
+ RANKING_SCHEMA,
21
+ CandidateView,
22
+ ReasonerResult,
23
+ build_rank_user,
24
+ parse_classification,
25
+ parse_ranking,
26
+ )
27
+ from minima.logging import get_logger
28
+ from minima.schemas.common import Difficulty, TaskType
29
+
30
+ log = get_logger("minima.llm.anthropic")
31
+
32
+ DEFAULT_MODEL = "claude-haiku-4-5"
33
+
34
+
35
+ class AnthropicReasoner:
36
+ def __init__(self, *, model: str, api_key: str, timeout_ms: int, max_tokens: int):
37
+ self._model = model
38
+ self._max_tokens = max_tokens
39
+ self._client = AsyncAnthropic(api_key=api_key, timeout=timeout_ms / 1000.0)
40
+
41
+ async def _tool_call(
42
+ self, *, system: str, user: str, tool_name: str, schema: dict
43
+ ) -> dict | None:
44
+ try:
45
+ resp = await self._client.messages.create(
46
+ model=self._model,
47
+ max_tokens=self._max_tokens,
48
+ system=system,
49
+ messages=[{"role": "user", "content": user}],
50
+ tools=[
51
+ {
52
+ "name": tool_name,
53
+ "description": "Submit the structured result.",
54
+ "strict": True,
55
+ "input_schema": schema,
56
+ }
57
+ ],
58
+ tool_choice={"type": "tool", "name": tool_name},
59
+ )
60
+ except Exception as exc: # noqa: BLE001 — reasoner must never break a recommendation
61
+ log.warning("reasoner_call_failed", model=self._model, error=str(exc))
62
+ return None
63
+ for block in resp.content:
64
+ if getattr(block, "type", None) == "tool_use":
65
+ # Access via getattr: the SDK's ContentBlock is a union and only the
66
+ # tool_use variant carries `input` (type-narrowing on `.type` isn't seen
67
+ # by the checker). Runtime guard above guarantees it's present.
68
+ data = getattr(block, "input", None)
69
+ return data if isinstance(data, dict) else None
70
+ return None
71
+
72
+ async def rank(
73
+ self,
74
+ *,
75
+ task: str,
76
+ task_type: str,
77
+ difficulty: str,
78
+ candidates: Sequence[CandidateView],
79
+ memory_block: str,
80
+ cost_quality_tradeoff: float,
81
+ ) -> ReasonerResult | None:
82
+ user = build_rank_user(
83
+ task=task,
84
+ task_type=task_type,
85
+ difficulty=difficulty,
86
+ candidates=candidates,
87
+ memory_block=memory_block,
88
+ cost_quality_tradeoff=cost_quality_tradeoff,
89
+ )
90
+ data = await self._tool_call(
91
+ system=RANK_SYSTEM, user=user, tool_name="submit_ranking", schema=RANKING_SCHEMA
92
+ )
93
+ if data is None:
94
+ return None
95
+ return parse_ranking(data, {c.model_id for c in candidates})
96
+
97
+ async def classify(self, *, task: str) -> tuple[TaskType, Difficulty] | None:
98
+ data = await self._tool_call(
99
+ system=CLASSIFY_SYSTEM,
100
+ user=f"Classify this task:\n\n{task[:2000]}",
101
+ tool_name="submit_classification",
102
+ schema=CLASSIFY_SCHEMA,
103
+ )
104
+ if data is None:
105
+ return None
106
+ return parse_classification(data)
minima/llm/base.py ADDED
@@ -0,0 +1,196 @@
1
+ """Reasoner protocol, shared prompt construction, and strict-output parsing.
2
+
3
+ The reasoner is consulted ONLY when memory evidence is thin or conflicting. It ranks
4
+ candidate models for a task; it never writes prompts, runs models, or does the task.
5
+ Its estimates are blended with the deterministic ones — it advises, it does not decide.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from collections.abc import Sequence
12
+ from dataclasses import dataclass
13
+ from typing import Any, Protocol, runtime_checkable
14
+
15
+ from minima.memory.records import clamp01
16
+ from minima.schemas.common import Difficulty, TaskType
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class CandidateView:
21
+ """The view of a candidate model handed to the reasoner."""
22
+
23
+ model_id: str
24
+ provider: str
25
+ input_cost_per_mtok: float
26
+ output_cost_per_mtok: float
27
+ context_window: int
28
+ capability_prior: float
29
+ est_cost_usd: float
30
+ predicted_success: float
31
+ # Observed latency percentile (ms) from similar past outcomes; None without evidence.
32
+ est_latency_ms: float | None = None
33
+
34
+
35
+ @dataclass(slots=True)
36
+ class ReasonerRanking:
37
+ model_id: str
38
+ predicted_success: float
39
+ rationale: str
40
+
41
+
42
+ @dataclass(slots=True)
43
+ class ReasonerResult:
44
+ rankings: list[ReasonerRanking]
45
+ recommended: str | None = None
46
+ fallback: str | None = None
47
+
48
+ def by_model(self) -> dict[str, ReasonerRanking]:
49
+ return {r.model_id: r for r in self.rankings}
50
+
51
+
52
+ @runtime_checkable
53
+ class Reasoner(Protocol):
54
+ async def rank(
55
+ self,
56
+ *,
57
+ task: str,
58
+ task_type: str,
59
+ difficulty: str,
60
+ candidates: Sequence[CandidateView],
61
+ memory_block: str,
62
+ cost_quality_tradeoff: float,
63
+ ) -> ReasonerResult | None: ...
64
+
65
+
66
+ # --- structured output schemas (additionalProperties:false everywhere for strict mode) ---
67
+
68
+ RANKING_SCHEMA: dict[str, Any] = {
69
+ "type": "object",
70
+ "properties": {
71
+ "recommended": {"type": "string"},
72
+ "fallback": {"type": ["string", "null"]},
73
+ "ranking": {
74
+ "type": "array",
75
+ "items": {
76
+ "type": "object",
77
+ "properties": {
78
+ "model_id": {"type": "string"},
79
+ "predicted_success": {"type": "number"},
80
+ "rationale": {"type": "string"},
81
+ },
82
+ "required": ["model_id", "predicted_success", "rationale"],
83
+ "additionalProperties": False,
84
+ },
85
+ },
86
+ },
87
+ "required": ["recommended", "fallback", "ranking"],
88
+ "additionalProperties": False,
89
+ }
90
+
91
+ CLASSIFY_SCHEMA: dict[str, Any] = {
92
+ "type": "object",
93
+ "properties": {
94
+ "task_type": {"type": "string", "enum": [t.value for t in TaskType]},
95
+ "difficulty": {"type": "string", "enum": [d.value for d in Difficulty]},
96
+ },
97
+ "required": ["task_type", "difficulty"],
98
+ "additionalProperties": False,
99
+ }
100
+
101
+ RANK_SYSTEM = (
102
+ "You are a model-selection advisor for an LLM cost-optimization service. "
103
+ "Given a task, a table of candidate models (id, provider, token prices, a capability "
104
+ "prior in [0,1], and a current estimated success in [0,1]), and a memory block of past "
105
+ "outcomes on similar tasks, rank the candidates by how likely each is to complete THIS "
106
+ "task well. Prefer cheaper models when their expected quality is adequate for the "
107
+ "requested cost/quality tradeoff (0 = cheapest acceptable, 10 = highest quality). "
108
+ "You do NOT write prompts, run models, or perform the task — you only rank models. "
109
+ "Return predicted_success in [0,1] for each candidate via the submit_ranking tool."
110
+ )
111
+
112
+ CLASSIFY_SYSTEM = (
113
+ "Classify an LLM task by type and difficulty for routing. Respond only via the tool."
114
+ )
115
+
116
+
117
+ def build_rank_user(
118
+ *,
119
+ task: str,
120
+ task_type: str,
121
+ difficulty: str,
122
+ candidates: Sequence[CandidateView],
123
+ memory_block: str,
124
+ cost_quality_tradeoff: float,
125
+ ) -> str:
126
+ table = [
127
+ {
128
+ "model_id": c.model_id,
129
+ "provider": c.provider,
130
+ "input_per_mtok": round(c.input_cost_per_mtok, 4),
131
+ "output_per_mtok": round(c.output_cost_per_mtok, 4),
132
+ "context_window": c.context_window,
133
+ "capability_prior": round(c.capability_prior, 3),
134
+ "current_estimate": round(c.predicted_success, 3),
135
+ "est_cost_usd": round(c.est_cost_usd, 6),
136
+ **(
137
+ {"observed_latency_ms": round(c.est_latency_ms, 0)}
138
+ if c.est_latency_ms is not None
139
+ else {}
140
+ ),
141
+ }
142
+ for c in candidates
143
+ ]
144
+ memory_section = memory_block.strip() or "(no past outcomes recalled)"
145
+ return (
146
+ f"task_type: {task_type}\ndifficulty: {difficulty}\n"
147
+ f"cost_quality_tradeoff: {cost_quality_tradeoff}\n\n"
148
+ f"TASK:\n{task[:2000]}\n\n"
149
+ f"CANDIDATE MODELS:\n{json.dumps(table, indent=2)}\n\n"
150
+ f"MEMORY OF PAST OUTCOMES:\n{memory_section[:4000]}"
151
+ )
152
+
153
+
154
+ def parse_ranking(data: Any, valid_ids: set[str]) -> ReasonerResult | None:
155
+ if not isinstance(data, dict):
156
+ return None
157
+ rankings: list[ReasonerRanking] = []
158
+ for item in data.get("ranking") or []:
159
+ if not isinstance(item, dict):
160
+ continue
161
+ model_id = item.get("model_id")
162
+ if model_id not in valid_ids:
163
+ continue
164
+ rankings.append(
165
+ ReasonerRanking(
166
+ model_id=str(model_id),
167
+ predicted_success=clamp01(_as_float(item.get("predicted_success"))),
168
+ rationale=str(item.get("rationale", ""))[:300],
169
+ )
170
+ )
171
+ recommended = data.get("recommended")
172
+ fallback = data.get("fallback")
173
+ result = ReasonerResult(
174
+ rankings=rankings,
175
+ recommended=recommended if recommended in valid_ids else None,
176
+ fallback=fallback if fallback in valid_ids else None,
177
+ )
178
+ if not result.rankings and result.recommended is None:
179
+ return None
180
+ return result
181
+
182
+
183
+ def parse_classification(data: Any) -> tuple[TaskType, Difficulty] | None:
184
+ if not isinstance(data, dict):
185
+ return None
186
+ try:
187
+ return TaskType(data["task_type"]), Difficulty(data["difficulty"])
188
+ except (KeyError, ValueError):
189
+ return None
190
+
191
+
192
+ def _as_float(value: Any, default: float = 0.0) -> float:
193
+ try:
194
+ return float(value)
195
+ except (TypeError, ValueError):
196
+ return default