minima-cli 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minima/__init__.py +5 -0
- minima/api/__init__.py +1 -0
- minima/api/auth.py +39 -0
- minima/api/errors.py +40 -0
- minima/api/routers/__init__.py +1 -0
- minima/api/routers/calibration.py +50 -0
- minima/api/routers/feedback.py +279 -0
- minima/api/routers/health.py +50 -0
- minima/api/routers/models.py +42 -0
- minima/api/routers/recommend.py +66 -0
- minima/api/routers/savings.py +55 -0
- minima/api/routers/strategies.py +33 -0
- minima/catalog/__init__.py +1 -0
- minima/catalog/data/capability_priors.json +210 -0
- minima/catalog/data/model_aliases.json +12 -0
- minima/catalog/merge.py +69 -0
- minima/catalog/refresh.py +54 -0
- minima/catalog/sources/__init__.py +1 -0
- minima/catalog/sources/litellm.py +19 -0
- minima/catalog/sources/openrouter.py +25 -0
- minima/catalog/store.py +86 -0
- minima/config.py +288 -0
- minima/deps.py +35 -0
- minima/llm/__init__.py +1 -0
- minima/llm/anthropic.py +106 -0
- minima/llm/base.py +196 -0
- minima/llm/gemini.py +124 -0
- minima/llm/registry.py +54 -0
- minima/logging.py +28 -0
- minima/main.py +109 -0
- minima/memory/__init__.py +1 -0
- minima/memory/adapter.py +572 -0
- minima/memory/keys.py +83 -0
- minima/memory/records.py +190 -0
- minima/memory/threadpool.py +41 -0
- minima/metrics/__init__.py +1 -0
- minima/metrics/calibration.py +415 -0
- minima/metrics/report.py +116 -0
- minima/metrics/savings.py +98 -0
- minima/recommender/__init__.py +1 -0
- minima/recommender/_pg_pool.py +38 -0
- minima/recommender/_redis_client.py +32 -0
- minima/recommender/aggregate.py +157 -0
- minima/recommender/classify.py +165 -0
- minima/recommender/decisionlog.py +505 -0
- minima/recommender/durablerefs.py +312 -0
- minima/recommender/engine.py +997 -0
- minima/recommender/escalation.py +83 -0
- minima/recommender/propensity.py +189 -0
- minima/recommender/recstore.py +368 -0
- minima/recommender/score.py +318 -0
- minima/recommender/types.py +166 -0
- minima/schemas/__init__.py +1 -0
- minima/schemas/common.py +73 -0
- minima/schemas/feedback.py +34 -0
- minima/schemas/models_catalog.py +36 -0
- minima/schemas/recommend.py +104 -0
- minima/schemas/savings.py +39 -0
- minima/schemas/strategies.py +57 -0
- minima/schemas/workflow.py +43 -0
- minima/seeding/__init__.py +1 -0
- minima/seeding/items.py +42 -0
- minima/seeding/llmrouterbench.py +232 -0
- minima/seeding/routerbench.py +141 -0
- minima/seeding/run_seed.py +56 -0
- minima/seeding/synthetic.py +70 -0
- minima/tenancy/__init__.py +8 -0
- minima/tenancy/context.py +37 -0
- minima/tenancy/passthrough.py +110 -0
- minima/version.py +3 -0
- minima_cli-0.4.9.dist-info/METADATA +275 -0
- minima_cli-0.4.9.dist-info/RECORD +161 -0
- minima_cli-0.4.9.dist-info/WHEEL +4 -0
- minima_cli-0.4.9.dist-info/entry_points.txt +5 -0
- minima_cli-0.4.9.dist-info/licenses/LICENSE +295 -0
- minima_client/__init__.py +19 -0
- minima_client/autocapture.py +101 -0
- minima_client/client.py +301 -0
- minima_client/errors.py +23 -0
- minima_harness/LICENSE_PI +32 -0
- minima_harness/__init__.py +16 -0
- minima_harness/agent/__init__.py +72 -0
- minima_harness/agent/agent.py +276 -0
- minima_harness/agent/events.py +124 -0
- minima_harness/agent/loop.py +311 -0
- minima_harness/agent/state.py +79 -0
- minima_harness/agent/tools.py +97 -0
- minima_harness/ai/__init__.py +66 -0
- minima_harness/ai/compat.py +71 -0
- minima_harness/ai/errors.py +96 -0
- minima_harness/ai/events.py +117 -0
- minima_harness/ai/openrouter_catalog.py +153 -0
- minima_harness/ai/provider_catalog.py +299 -0
- minima_harness/ai/provider_quirks.py +37 -0
- minima_harness/ai/providers/__init__.py +75 -0
- minima_harness/ai/providers/_common.py +48 -0
- minima_harness/ai/providers/anthropic.py +290 -0
- minima_harness/ai/providers/base.py +65 -0
- minima_harness/ai/providers/faux.py +173 -0
- minima_harness/ai/providers/google.py +221 -0
- minima_harness/ai/providers/openai_compat.py +278 -0
- minima_harness/ai/registry.py +184 -0
- minima_harness/ai/stream.py +82 -0
- minima_harness/ai/tools.py +51 -0
- minima_harness/ai/types.py +204 -0
- minima_harness/ai/usage.py +41 -0
- minima_harness/minima/__init__.py +40 -0
- minima_harness/minima/cache.py +102 -0
- minima_harness/minima/config.py +85 -0
- minima_harness/minima/goals.py +226 -0
- minima_harness/minima/judge.py +144 -0
- minima_harness/minima/mapping.py +147 -0
- minima_harness/minima/meter.py +143 -0
- minima_harness/minima/router.py +220 -0
- minima_harness/minima/runtime.py +544 -0
- minima_harness/minima/signals.py +195 -0
- minima_harness/session/__init__.py +14 -0
- minima_harness/session/format.py +35 -0
- minima_harness/session/store.py +236 -0
- minima_harness/tasks/__init__.py +17 -0
- minima_harness/tasks/task_set.py +78 -0
- minima_harness/tools/__init__.py +7 -0
- minima_harness/tools/_io.py +34 -0
- minima_harness/tools/bash.py +70 -0
- minima_harness/tools/builtin.py +23 -0
- minima_harness/tools/edit.py +50 -0
- minima_harness/tools/find.py +38 -0
- minima_harness/tools/grep.py +73 -0
- minima_harness/tools/ls.py +35 -0
- minima_harness/tools/read.py +38 -0
- minima_harness/tools/tasks.py +75 -0
- minima_harness/tools/write.py +36 -0
- minima_harness/tui/__init__.py +3 -0
- minima_harness/tui/analytics.py +111 -0
- minima_harness/tui/app.py +1927 -0
- minima_harness/tui/bridge.py +103 -0
- minima_harness/tui/cli.py +227 -0
- minima_harness/tui/clipboard.py +60 -0
- minima_harness/tui/commands.py +49 -0
- minima_harness/tui/compaction.py +17 -0
- minima_harness/tui/config_cli.py +141 -0
- minima_harness/tui/config_store.py +237 -0
- minima_harness/tui/context.py +93 -0
- minima_harness/tui/customize.py +95 -0
- minima_harness/tui/diff.py +53 -0
- minima_harness/tui/editor.py +43 -0
- minima_harness/tui/extensions.py +84 -0
- minima_harness/tui/extra_models.py +52 -0
- minima_harness/tui/history.py +71 -0
- minima_harness/tui/mubit.py +295 -0
- minima_harness/tui/overlays.py +593 -0
- minima_harness/tui/packages.py +59 -0
- minima_harness/tui/run_modes.py +66 -0
- minima_harness/tui/theme.py +77 -0
- minima_harness/tui/welcome.py +83 -0
- minima_harness/tui/widgets/__init__.py +3 -0
- minima_harness/tui/widgets/banner.py +38 -0
- minima_harness/tui/widgets/editor.py +83 -0
- minima_harness/tui/widgets/footer.py +73 -0
- minima_harness/tui/widgets/messages.py +151 -0
- minima_harness/tui/widgets/status.py +57 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "fallback-snapshot-2026-06",
|
|
3
|
+
"note": "Cold-start fallback only. Live cost is overlaid from LiteLLM/OpenRouter at runtime; capability_by_task_type are normalized [0,1] heuristic priors, intended to be corrected by accumulated memory. Prices are approximate USD per million tokens.",
|
|
4
|
+
"models": [
|
|
5
|
+
{
|
|
6
|
+
"model_id": "claude-haiku-4-5",
|
|
7
|
+
"provider": "anthropic",
|
|
8
|
+
"display_name": "Claude Haiku 4.5",
|
|
9
|
+
"input_cost_per_mtok": 1.0,
|
|
10
|
+
"output_cost_per_mtok": 5.0,
|
|
11
|
+
"cache_read_cost_per_mtok": 0.1,
|
|
12
|
+
"supports_prompt_caching": true,
|
|
13
|
+
"context_window": 200000,
|
|
14
|
+
"max_output_tokens": 64000,
|
|
15
|
+
"capability_priors": {"intelligence_index": 0.5},
|
|
16
|
+
"capability_by_task_type": {
|
|
17
|
+
"code": 0.7, "summarization": 0.85, "extraction": 0.88, "qa": 0.8,
|
|
18
|
+
"reasoning": 0.6, "classification": 0.9, "translation": 0.85,
|
|
19
|
+
"creative": 0.75, "rag": 0.82, "tool_use": 0.75, "other": 0.72
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"model_id": "claude-sonnet-4-6",
|
|
24
|
+
"provider": "anthropic",
|
|
25
|
+
"display_name": "Claude Sonnet 4.6",
|
|
26
|
+
"input_cost_per_mtok": 3.0,
|
|
27
|
+
"output_cost_per_mtok": 15.0,
|
|
28
|
+
"cache_read_cost_per_mtok": 0.3,
|
|
29
|
+
"supports_prompt_caching": true,
|
|
30
|
+
"context_window": 200000,
|
|
31
|
+
"max_output_tokens": 64000,
|
|
32
|
+
"capability_priors": {"intelligence_index": 0.72},
|
|
33
|
+
"capability_by_task_type": {
|
|
34
|
+
"code": 0.88, "summarization": 0.9, "extraction": 0.92, "qa": 0.9,
|
|
35
|
+
"reasoning": 0.85, "classification": 0.93, "translation": 0.9,
|
|
36
|
+
"creative": 0.88, "rag": 0.9, "tool_use": 0.9, "other": 0.86
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"model_id": "claude-opus-4-8",
|
|
41
|
+
"provider": "anthropic",
|
|
42
|
+
"display_name": "Claude Opus 4.8",
|
|
43
|
+
"input_cost_per_mtok": 15.0,
|
|
44
|
+
"output_cost_per_mtok": 75.0,
|
|
45
|
+
"cache_read_cost_per_mtok": 1.5,
|
|
46
|
+
"supports_prompt_caching": true,
|
|
47
|
+
"context_window": 200000,
|
|
48
|
+
"max_output_tokens": 64000,
|
|
49
|
+
"capability_priors": {"intelligence_index": 0.95},
|
|
50
|
+
"capability_by_task_type": {
|
|
51
|
+
"code": 0.96, "summarization": 0.93, "extraction": 0.94, "qa": 0.95,
|
|
52
|
+
"reasoning": 0.97, "classification": 0.95, "translation": 0.93,
|
|
53
|
+
"creative": 0.94, "rag": 0.94, "tool_use": 0.95, "other": 0.93
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"model_id": "gpt-4o-mini",
|
|
58
|
+
"provider": "openai",
|
|
59
|
+
"display_name": "GPT-4o mini",
|
|
60
|
+
"input_cost_per_mtok": 0.15,
|
|
61
|
+
"output_cost_per_mtok": 0.6,
|
|
62
|
+
"cache_read_cost_per_mtok": 0.075,
|
|
63
|
+
"supports_prompt_caching": true,
|
|
64
|
+
"context_window": 128000,
|
|
65
|
+
"max_output_tokens": 16384,
|
|
66
|
+
"capability_priors": {"intelligence_index": 0.45},
|
|
67
|
+
"capability_by_task_type": {
|
|
68
|
+
"code": 0.68, "summarization": 0.84, "extraction": 0.86, "qa": 0.78,
|
|
69
|
+
"reasoning": 0.55, "classification": 0.9, "translation": 0.83,
|
|
70
|
+
"creative": 0.72, "rag": 0.8, "tool_use": 0.72, "other": 0.7
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"model_id": "gpt-4o",
|
|
75
|
+
"provider": "openai",
|
|
76
|
+
"display_name": "GPT-4o",
|
|
77
|
+
"input_cost_per_mtok": 2.5,
|
|
78
|
+
"output_cost_per_mtok": 10.0,
|
|
79
|
+
"cache_read_cost_per_mtok": 1.25,
|
|
80
|
+
"supports_prompt_caching": true,
|
|
81
|
+
"context_window": 128000,
|
|
82
|
+
"max_output_tokens": 16384,
|
|
83
|
+
"capability_priors": {"intelligence_index": 0.7},
|
|
84
|
+
"capability_by_task_type": {
|
|
85
|
+
"code": 0.85, "summarization": 0.9, "extraction": 0.9, "qa": 0.89,
|
|
86
|
+
"reasoning": 0.82, "classification": 0.92, "translation": 0.9,
|
|
87
|
+
"creative": 0.87, "rag": 0.89, "tool_use": 0.89, "other": 0.85
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"model_id": "gemini-2.5-flash-lite",
|
|
92
|
+
"provider": "google",
|
|
93
|
+
"display_name": "Gemini 2.5 Flash-Lite",
|
|
94
|
+
"input_cost_per_mtok": 0.075,
|
|
95
|
+
"output_cost_per_mtok": 0.30,
|
|
96
|
+
"cache_read_cost_per_mtok": 0.018,
|
|
97
|
+
"supports_prompt_caching": true,
|
|
98
|
+
"context_window": 1048576,
|
|
99
|
+
"max_output_tokens": 65536,
|
|
100
|
+
"capability_priors": {"intelligence_index": 0.40},
|
|
101
|
+
"capability_by_task_type": {
|
|
102
|
+
"code": 0.62, "summarization": 0.80, "extraction": 0.84, "qa": 0.74,
|
|
103
|
+
"reasoning": 0.50, "classification": 0.86, "translation": 0.82,
|
|
104
|
+
"creative": 0.68, "rag": 0.78, "tool_use": 0.68, "other": 0.64
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"model_id": "gemini-3.1-flash-lite",
|
|
109
|
+
"provider": "google",
|
|
110
|
+
"display_name": "Gemini 3.1 Flash-Lite",
|
|
111
|
+
"input_cost_per_mtok": 0.08,
|
|
112
|
+
"output_cost_per_mtok": 0.35,
|
|
113
|
+
"cache_read_cost_per_mtok": 0.02,
|
|
114
|
+
"supports_prompt_caching": true,
|
|
115
|
+
"context_window": 1048576,
|
|
116
|
+
"max_output_tokens": 65536,
|
|
117
|
+
"capability_priors": {"intelligence_index": 0.48},
|
|
118
|
+
"capability_by_task_type": {
|
|
119
|
+
"code": 0.68, "summarization": 0.83, "extraction": 0.86, "qa": 0.78,
|
|
120
|
+
"reasoning": 0.58, "classification": 0.88, "translation": 0.84,
|
|
121
|
+
"creative": 0.72, "rag": 0.80, "tool_use": 0.72, "other": 0.68
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"model_id": "gemini-2.5-flash",
|
|
126
|
+
"provider": "google",
|
|
127
|
+
"display_name": "Gemini 2.5 Flash",
|
|
128
|
+
"input_cost_per_mtok": 0.3,
|
|
129
|
+
"output_cost_per_mtok": 2.5,
|
|
130
|
+
"cache_read_cost_per_mtok": 0.075,
|
|
131
|
+
"supports_prompt_caching": true,
|
|
132
|
+
"context_window": 1048576,
|
|
133
|
+
"max_output_tokens": 65536,
|
|
134
|
+
"capability_priors": {"intelligence_index": 0.55},
|
|
135
|
+
"capability_by_task_type": {
|
|
136
|
+
"code": 0.74, "summarization": 0.88, "extraction": 0.88, "qa": 0.82,
|
|
137
|
+
"reasoning": 0.66, "classification": 0.9, "translation": 0.88,
|
|
138
|
+
"creative": 0.78, "rag": 0.86, "tool_use": 0.8, "other": 0.76
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"model_id": "gemini-3.5-flash",
|
|
143
|
+
"provider": "google",
|
|
144
|
+
"display_name": "Gemini 3.5 Flash",
|
|
145
|
+
"input_cost_per_mtok": 0.50,
|
|
146
|
+
"output_cost_per_mtok": 3.00,
|
|
147
|
+
"cache_read_cost_per_mtok": 0.125,
|
|
148
|
+
"supports_prompt_caching": true,
|
|
149
|
+
"context_window": 1048576,
|
|
150
|
+
"max_output_tokens": 65536,
|
|
151
|
+
"capability_priors": {"intelligence_index": 0.65},
|
|
152
|
+
"capability_by_task_type": {
|
|
153
|
+
"code": 0.85, "summarization": 0.88, "extraction": 0.90, "qa": 0.84,
|
|
154
|
+
"reasoning": 0.72, "classification": 0.91, "translation": 0.88,
|
|
155
|
+
"creative": 0.82, "rag": 0.87, "tool_use": 0.88, "other": 0.80
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"model_id": "gemini-3-flash-preview",
|
|
160
|
+
"provider": "google",
|
|
161
|
+
"display_name": "Gemini 3 Flash (Preview)",
|
|
162
|
+
"input_cost_per_mtok": 0.35,
|
|
163
|
+
"output_cost_per_mtok": 2.50,
|
|
164
|
+
"cache_read_cost_per_mtok": 0.088,
|
|
165
|
+
"supports_prompt_caching": true,
|
|
166
|
+
"context_window": 1048576,
|
|
167
|
+
"max_output_tokens": 65536,
|
|
168
|
+
"capability_priors": {"intelligence_index": 0.62},
|
|
169
|
+
"capability_by_task_type": {
|
|
170
|
+
"code": 0.80, "summarization": 0.87, "extraction": 0.88, "qa": 0.83,
|
|
171
|
+
"reasoning": 0.70, "classification": 0.90, "translation": 0.87,
|
|
172
|
+
"creative": 0.80, "rag": 0.85, "tool_use": 0.84, "other": 0.78
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"model_id": "gemini-2.5-pro",
|
|
177
|
+
"provider": "google",
|
|
178
|
+
"display_name": "Gemini 2.5 Pro",
|
|
179
|
+
"input_cost_per_mtok": 1.25,
|
|
180
|
+
"output_cost_per_mtok": 10.0,
|
|
181
|
+
"cache_read_cost_per_mtok": 0.31,
|
|
182
|
+
"supports_prompt_caching": true,
|
|
183
|
+
"context_window": 1048576,
|
|
184
|
+
"max_output_tokens": 65536,
|
|
185
|
+
"capability_priors": {"intelligence_index": 0.8},
|
|
186
|
+
"capability_by_task_type": {
|
|
187
|
+
"code": 0.9, "summarization": 0.92, "extraction": 0.92, "qa": 0.91,
|
|
188
|
+
"reasoning": 0.88, "classification": 0.93, "translation": 0.92,
|
|
189
|
+
"creative": 0.89, "rag": 0.91, "tool_use": 0.9, "other": 0.88
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
,{
|
|
193
|
+
"model_id": "gemini-3.1-pro-preview",
|
|
194
|
+
"provider": "google",
|
|
195
|
+
"display_name": "Gemini 3.1 Pro (Preview)",
|
|
196
|
+
"input_cost_per_mtok": 2.00,
|
|
197
|
+
"output_cost_per_mtok": 12.00,
|
|
198
|
+
"cache_read_cost_per_mtok": 0.50,
|
|
199
|
+
"supports_prompt_caching": true,
|
|
200
|
+
"context_window": 1048576,
|
|
201
|
+
"max_output_tokens": 65536,
|
|
202
|
+
"capability_priors": {"intelligence_index": 0.90},
|
|
203
|
+
"capability_by_task_type": {
|
|
204
|
+
"code": 0.93, "summarization": 0.93, "extraction": 0.93, "qa": 0.93,
|
|
205
|
+
"reasoning": 0.95, "classification": 0.94, "translation": 0.93,
|
|
206
|
+
"creative": 0.91, "rag": 0.93, "tool_use": 0.92, "other": 0.91
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
]
|
|
210
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"note": "Maps canonical Minima model_id -> alternate keys used by LiteLLM/OpenRouter price maps and by offline datasets (e.g. RouterBench). Used to overlay live cost and to normalize seeded model ids.",
|
|
3
|
+
"aliases": {
|
|
4
|
+
"claude-haiku-4-5": ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-3-5-haiku-20241022", "claude-3-5-haiku-latest"],
|
|
5
|
+
"claude-sonnet-4-6": ["claude-sonnet-4-6", "anthropic/claude-sonnet-4.6", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest"],
|
|
6
|
+
"claude-opus-4-8": ["claude-opus-4-8", "anthropic/claude-opus-4.8", "claude-3-opus-20240229"],
|
|
7
|
+
"gpt-4o-mini": ["gpt-4o-mini", "openai/gpt-4o-mini", "gpt-4o-mini-2024-07-18"],
|
|
8
|
+
"gpt-4o": ["gpt-4o", "openai/gpt-4o", "gpt-4o-2024-08-06"],
|
|
9
|
+
"gemini-2.5-flash": ["gemini-2.5-flash", "gemini/gemini-2.5-flash", "google/gemini-2.5-flash"],
|
|
10
|
+
"gemini-2.5-pro": ["gemini-2.5-pro", "gemini/gemini-2.5-pro", "google/gemini-2.5-pro"]
|
|
11
|
+
}
|
|
12
|
+
}
|
minima/catalog/merge.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Overlay live cost data onto the static capability snapshot."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from minima.schemas.models_catalog import ModelCard
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _per_mtok(per_token: Any) -> float | None:
|
|
12
|
+
try:
|
|
13
|
+
return float(per_token) * 1_000_000.0
|
|
14
|
+
except (TypeError, ValueError):
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def overlay_litellm(
|
|
19
|
+
cards: list[ModelCard],
|
|
20
|
+
litellm_map: dict[str, Any],
|
|
21
|
+
aliases: dict[str, list[str]],
|
|
22
|
+
) -> tuple[list[ModelCard], int]:
|
|
23
|
+
"""Return (new cards with live cost where matched, number updated)."""
|
|
24
|
+
now = datetime.now(UTC)
|
|
25
|
+
out: list[ModelCard] = []
|
|
26
|
+
updated = 0
|
|
27
|
+
|
|
28
|
+
for card in cards:
|
|
29
|
+
keys = aliases.get(card.model_id, [card.model_id])
|
|
30
|
+
entry: dict[str, Any] | None = None
|
|
31
|
+
for key in keys:
|
|
32
|
+
candidate = litellm_map.get(key)
|
|
33
|
+
if isinstance(candidate, dict):
|
|
34
|
+
entry = candidate
|
|
35
|
+
break
|
|
36
|
+
if entry is None:
|
|
37
|
+
out.append(card)
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
in_cost = _per_mtok(entry.get("input_cost_per_token"))
|
|
41
|
+
out_cost = _per_mtok(entry.get("output_cost_per_token"))
|
|
42
|
+
cache_cost = _per_mtok(entry.get("cache_read_input_token_cost"))
|
|
43
|
+
ctx = entry.get("max_input_tokens") or entry.get("max_tokens")
|
|
44
|
+
|
|
45
|
+
out.append(
|
|
46
|
+
card.model_copy(
|
|
47
|
+
update={
|
|
48
|
+
"input_cost_per_mtok": in_cost
|
|
49
|
+
if in_cost is not None
|
|
50
|
+
else card.input_cost_per_mtok,
|
|
51
|
+
"output_cost_per_mtok": out_cost
|
|
52
|
+
if out_cost is not None
|
|
53
|
+
else card.output_cost_per_mtok,
|
|
54
|
+
"cache_read_cost_per_mtok": cache_cost
|
|
55
|
+
if cache_cost is not None
|
|
56
|
+
else card.cache_read_cost_per_mtok,
|
|
57
|
+
"context_window": int(ctx) if ctx else card.context_window,
|
|
58
|
+
"supports_prompt_caching": bool(
|
|
59
|
+
entry.get("supports_prompt_caching", card.supports_prompt_caching)
|
|
60
|
+
),
|
|
61
|
+
"cost_source": "litellm",
|
|
62
|
+
"cost_fetched_at": now,
|
|
63
|
+
"cost_stale": False,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
updated += 1
|
|
68
|
+
|
|
69
|
+
return out, updated
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Catalog refresh: fetch live cost, overlay onto snapshot, swap the store."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
|
|
8
|
+
from minima.catalog.merge import overlay_litellm
|
|
9
|
+
from minima.catalog.sources.litellm import fetch_litellm_prices
|
|
10
|
+
from minima.catalog.store import Catalog, CatalogStore, load_aliases
|
|
11
|
+
from minima.config import Settings
|
|
12
|
+
from minima.logging import get_logger
|
|
13
|
+
|
|
14
|
+
log = get_logger("minima.catalog")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def refresh_catalog(settings: Settings, store: CatalogStore) -> bool:
|
|
18
|
+
"""Best-effort refresh. Returns True if live cost was applied."""
|
|
19
|
+
base = store.get()
|
|
20
|
+
aliases = load_aliases()
|
|
21
|
+
try:
|
|
22
|
+
litellm_map = await fetch_litellm_prices(settings.minima_litellm_prices_url)
|
|
23
|
+
except Exception as exc: # noqa: BLE001 — keep last-good catalog on any failure
|
|
24
|
+
log.warning("catalog_fetch_failed", error=str(exc))
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
new_cards, updated = overlay_litellm(base.cards, litellm_map, aliases)
|
|
28
|
+
if not updated:
|
|
29
|
+
log.warning("catalog_no_models_matched", total=len(new_cards))
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
store.set(
|
|
33
|
+
Catalog(
|
|
34
|
+
cards=new_cards,
|
|
35
|
+
version=base.version,
|
|
36
|
+
refreshed_at=datetime.now(UTC),
|
|
37
|
+
cost_source="litellm",
|
|
38
|
+
stale_after_seconds=settings.minima_catalog_stale_after_seconds,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
log.info("catalog_refreshed", updated=updated, total=len(new_cards))
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def refresh_loop(settings: Settings, store: CatalogStore) -> None:
|
|
46
|
+
"""Background loop for the app lifespan; cancelled on shutdown."""
|
|
47
|
+
while True:
|
|
48
|
+
try:
|
|
49
|
+
await refresh_catalog(settings, store)
|
|
50
|
+
except asyncio.CancelledError:
|
|
51
|
+
raise
|
|
52
|
+
except Exception as exc: # noqa: BLE001
|
|
53
|
+
log.warning("catalog_refresh_loop_error", error=str(exc))
|
|
54
|
+
await asyncio.sleep(settings.minima_catalog_refresh_seconds)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""External catalog data sources (best-effort, network)."""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Fetch the LiteLLM community price map (per-token costs for thousands of models)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def fetch_litellm_prices(url: str, timeout: float = 20.0) -> dict[str, Any]:
|
|
11
|
+
"""Return the raw ``{model_key: {...cost fields...}}`` map.
|
|
12
|
+
|
|
13
|
+
Raises on network/HTTP error; callers treat failure as "keep last-good catalog".
|
|
14
|
+
"""
|
|
15
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
16
|
+
resp = await client.get(url)
|
|
17
|
+
resp.raise_for_status()
|
|
18
|
+
data = resp.json()
|
|
19
|
+
return data if isinstance(data, dict) else {}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Fetch the OpenRouter model list (pricing + context windows).
|
|
2
|
+
|
|
3
|
+
Used as a secondary reconciliation source; wired into refresh in a later phase.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def fetch_openrouter_models(
|
|
14
|
+
url: str, api_key: str | None = None, timeout: float = 20.0
|
|
15
|
+
) -> dict[str, dict[str, Any]]:
|
|
16
|
+
"""Return ``{model_id: model_object}`` keyed by OpenRouter id, or {} on failure shape."""
|
|
17
|
+
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
|
18
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
19
|
+
resp = await client.get(url, headers=headers)
|
|
20
|
+
resp.raise_for_status()
|
|
21
|
+
data = resp.json()
|
|
22
|
+
rows = data.get("data") if isinstance(data, dict) else None
|
|
23
|
+
if not isinstance(rows, list):
|
|
24
|
+
return {}
|
|
25
|
+
return {str(row.get("id")): row for row in rows if isinstance(row, dict) and row.get("id")}
|
minima/catalog/store.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""In-memory model catalog snapshot, atomically swappable on refresh."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from importlib.resources import files
|
|
9
|
+
|
|
10
|
+
from minima.config import Settings
|
|
11
|
+
from minima.schemas.models_catalog import ModelCard
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _data_text(name: str) -> str:
|
|
15
|
+
return files("minima.catalog").joinpath("data", name).read_text(encoding="utf-8")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_aliases() -> dict[str, list[str]]:
|
|
19
|
+
raw = json.loads(_data_text("model_aliases.json"))
|
|
20
|
+
return raw.get("aliases", {})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_snapshot_cards() -> tuple[list[ModelCard], str]:
|
|
24
|
+
raw = json.loads(_data_text("capability_priors.json"))
|
|
25
|
+
cards: list[ModelCard] = []
|
|
26
|
+
for m in raw.get("models", []):
|
|
27
|
+
cards.append(
|
|
28
|
+
ModelCard(
|
|
29
|
+
model_id=m["model_id"],
|
|
30
|
+
provider=m["provider"],
|
|
31
|
+
display_name=m.get("display_name", ""),
|
|
32
|
+
input_cost_per_mtok=m["input_cost_per_mtok"],
|
|
33
|
+
output_cost_per_mtok=m["output_cost_per_mtok"],
|
|
34
|
+
cache_read_cost_per_mtok=m.get("cache_read_cost_per_mtok"),
|
|
35
|
+
supports_prompt_caching=m.get("supports_prompt_caching", False),
|
|
36
|
+
context_window=m.get("context_window", 0),
|
|
37
|
+
max_output_tokens=m.get("max_output_tokens"),
|
|
38
|
+
capability_priors=m.get("capability_priors", {}),
|
|
39
|
+
capability_by_task_type=m.get("capability_by_task_type", {}),
|
|
40
|
+
cost_source="fallback-snapshot",
|
|
41
|
+
cost_fetched_at=None,
|
|
42
|
+
cost_stale=True,
|
|
43
|
+
capability_source="fallback-snapshot",
|
|
44
|
+
)
|
|
45
|
+
)
|
|
46
|
+
return cards, raw.get("version", "fallback-snapshot")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(slots=True)
|
|
50
|
+
class Catalog:
|
|
51
|
+
cards: list[ModelCard]
|
|
52
|
+
version: str
|
|
53
|
+
refreshed_at: datetime | None
|
|
54
|
+
cost_source: str
|
|
55
|
+
stale_after_seconds: int = 86_400
|
|
56
|
+
|
|
57
|
+
def by_id(self) -> dict[str, ModelCard]:
|
|
58
|
+
return {c.model_id: c for c in self.cards}
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def stale(self) -> bool:
|
|
62
|
+
if self.cost_source == "fallback-snapshot" or self.refreshed_at is None:
|
|
63
|
+
return True
|
|
64
|
+
age = (datetime.now(UTC) - self.refreshed_at).total_seconds()
|
|
65
|
+
return age > self.stale_after_seconds
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class CatalogStore:
|
|
69
|
+
"""Holds the current catalog; reads are lock-free (atomic pointer swap)."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, settings: Settings):
|
|
72
|
+
self._settings = settings
|
|
73
|
+
cards, version = load_snapshot_cards()
|
|
74
|
+
self._catalog = Catalog(
|
|
75
|
+
cards=cards,
|
|
76
|
+
version=version,
|
|
77
|
+
refreshed_at=None,
|
|
78
|
+
cost_source="fallback-snapshot",
|
|
79
|
+
stale_after_seconds=settings.minima_catalog_stale_after_seconds,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def get(self) -> Catalog:
|
|
83
|
+
return self._catalog
|
|
84
|
+
|
|
85
|
+
def set(self, catalog: Catalog) -> None:
|
|
86
|
+
self._catalog = catalog
|