moonpi 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,438 @@
1
+ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
2
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { getAgentDir } from "@mariozechner/pi-coding-agent";
5
+
6
+ const SYNTHETIC_REASONING_EFFORT_MAP = {
7
+ minimal: "low",
8
+ low: "low",
9
+ medium: "medium",
10
+ high: "high",
11
+ xhigh: "high",
12
+ } as const;
13
+
14
+ export const SYNTHETIC_MODELS_FALLBACK: ProviderModelConfig[] = [
15
+ {
16
+ id: "hf:zai-org/GLM-4.7",
17
+ name: "zai-org/GLM-4.7",
18
+ reasoning: true,
19
+ compat: {
20
+ supportsReasoningEffort: true,
21
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
22
+ },
23
+ input: ["text"],
24
+ cost: {
25
+ input: 0.45,
26
+ output: 2.19,
27
+ cacheRead: 0.45,
28
+ cacheWrite: 0,
29
+ },
30
+ contextWindow: 202752,
31
+ maxTokens: 65536,
32
+ },
33
+ {
34
+ id: "hf:zai-org/GLM-5",
35
+ name: "zai-org/GLM-5",
36
+ reasoning: true,
37
+ compat: {
38
+ supportsReasoningEffort: true,
39
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
40
+ },
41
+ input: ["text"],
42
+ cost: {
43
+ input: 1,
44
+ output: 3,
45
+ cacheRead: 1,
46
+ cacheWrite: 0,
47
+ },
48
+ contextWindow: 196608,
49
+ maxTokens: 65536,
50
+ },
51
+ {
52
+ id: "hf:zai-org/GLM-5.1",
53
+ name: "zai-org/GLM-5.1",
54
+ reasoning: true,
55
+ compat: {
56
+ supportsReasoningEffort: true,
57
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
58
+ supportsDeveloperRole: false,
59
+ },
60
+ input: ["text"],
61
+ cost: {
62
+ input: 1,
63
+ output: 3,
64
+ cacheRead: 1,
65
+ cacheWrite: 0,
66
+ },
67
+ contextWindow: 196608,
68
+ maxTokens: 65536,
69
+ },
70
+ {
71
+ id: "hf:zai-org/GLM-4.7-Flash",
72
+ name: "zai-org/GLM-4.7-Flash",
73
+ reasoning: true,
74
+ compat: {
75
+ supportsReasoningEffort: true,
76
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
77
+ },
78
+ input: ["text"],
79
+ cost: {
80
+ input: 0.1,
81
+ output: 0.5,
82
+ cacheRead: 0.1,
83
+ cacheWrite: 0,
84
+ },
85
+ contextWindow: 196608,
86
+ maxTokens: 65536,
87
+ },
88
+ {
89
+ id: "hf:meta-llama/Llama-3.3-70B-Instruct",
90
+ name: "meta-llama/Llama-3.3-70B-Instruct",
91
+ reasoning: true,
92
+ input: ["text"],
93
+ cost: {
94
+ input: 0.88,
95
+ output: 0.88,
96
+ cacheRead: 0.88,
97
+ cacheWrite: 0,
98
+ },
99
+ contextWindow: 131072,
100
+ maxTokens: 32768,
101
+ },
102
+ {
103
+ id: "hf:deepseek-ai/DeepSeek-R1-0528",
104
+ name: "deepseek-ai/DeepSeek-R1-0528",
105
+ reasoning: true,
106
+ compat: {
107
+ supportsReasoningEffort: true,
108
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
109
+ },
110
+ input: ["text"],
111
+ cost: {
112
+ input: 3,
113
+ output: 8,
114
+ cacheRead: 3,
115
+ cacheWrite: 0,
116
+ },
117
+ contextWindow: 131072,
118
+ maxTokens: 128000,
119
+ },
120
+ {
121
+ id: "hf:deepseek-ai/DeepSeek-V3.2",
122
+ name: "deepseek-ai/DeepSeek-V3.2",
123
+ reasoning: true,
124
+ input: ["text"],
125
+ cost: {
126
+ input: 0.56,
127
+ output: 1.68,
128
+ cacheRead: 0.56,
129
+ cacheWrite: 0,
130
+ },
131
+ contextWindow: 162816,
132
+ maxTokens: 8000,
133
+ },
134
+ {
135
+ id: "hf:openai/gpt-oss-120b",
136
+ name: "openai/gpt-oss-120b",
137
+ reasoning: true,
138
+ input: ["text"],
139
+ cost: {
140
+ input: 0.1,
141
+ output: 0.1,
142
+ cacheRead: 0.1,
143
+ cacheWrite: 0,
144
+ },
145
+ contextWindow: 131072,
146
+ maxTokens: 32768,
147
+ },
148
+ {
149
+ id: "hf:Qwen/Qwen3-Coder-480B-A35B-Instruct",
150
+ name: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
151
+ reasoning: true,
152
+ compat: {
153
+ supportsReasoningEffort: true,
154
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
155
+ },
156
+ input: ["text"],
157
+ cost: {
158
+ input: 2,
159
+ output: 2,
160
+ cacheRead: 2,
161
+ cacheWrite: 0,
162
+ },
163
+ contextWindow: 262144,
164
+ maxTokens: 65536,
165
+ },
166
+ {
167
+ id: "hf:moonshotai/Kimi-K2.5",
168
+ name: "moonshotai/Kimi-K2.5",
169
+ reasoning: true,
170
+ compat: {
171
+ supportsReasoningEffort: true,
172
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
173
+ },
174
+ input: ["text", "image"],
175
+ cost: {
176
+ input: 0.45,
177
+ output: 3.4,
178
+ cacheRead: 0.45,
179
+ cacheWrite: 0,
180
+ },
181
+ contextWindow: 262144,
182
+ maxTokens: 65536,
183
+ },
184
+ {
185
+ id: "hf:nvidia/Kimi-K2.5-NVFP4",
186
+ name: "nvidia/Kimi-K2.5-NVFP4",
187
+ reasoning: true,
188
+ compat: {
189
+ supportsReasoningEffort: true,
190
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
191
+ },
192
+ input: ["text", "image"],
193
+ cost: {
194
+ input: 0.45,
195
+ output: 3.4,
196
+ cacheRead: 0.45,
197
+ cacheWrite: 0,
198
+ },
199
+ contextWindow: 262144,
200
+ maxTokens: 65536,
201
+ },
202
+ {
203
+ id: "hf:deepseek-ai/DeepSeek-V3",
204
+ name: "deepseek-ai/DeepSeek-V3",
205
+ reasoning: true,
206
+ input: ["text"],
207
+ cost: {
208
+ input: 1.25,
209
+ output: 1.25,
210
+ cacheRead: 1.25,
211
+ cacheWrite: 0,
212
+ },
213
+ contextWindow: 131072,
214
+ maxTokens: 128000,
215
+ },
216
+ {
217
+ id: "hf:Qwen/Qwen3-235B-A22B-Thinking-2507",
218
+ name: "Qwen/Qwen3-235B-A22B-Thinking-2507",
219
+ reasoning: true,
220
+ compat: {
221
+ supportsReasoningEffort: true,
222
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
223
+ },
224
+ input: ["text"],
225
+ cost: {
226
+ input: 0.65,
227
+ output: 3,
228
+ cacheRead: 0.65,
229
+ cacheWrite: 0,
230
+ },
231
+ contextWindow: 262144,
232
+ maxTokens: 32000,
233
+ },
234
+ {
235
+ id: "hf:Qwen/Qwen3.5-397B-A17B",
236
+ name: "Qwen/Qwen3.5-397B-A17B",
237
+ reasoning: true,
238
+ compat: {
239
+ supportsReasoningEffort: true,
240
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
241
+ },
242
+ input: ["text", "image"],
243
+ cost: {
244
+ input: 0.6,
245
+ output: 3.6,
246
+ cacheRead: 0.6,
247
+ cacheWrite: 0,
248
+ },
249
+ contextWindow: 262144,
250
+ maxTokens: 65536,
251
+ },
252
+ {
253
+ id: "hf:MiniMaxAI/MiniMax-M2.5",
254
+ name: "MiniMaxAI/MiniMax-M2.5",
255
+ reasoning: true,
256
+ input: ["text"],
257
+ cost: {
258
+ input: 0.4,
259
+ output: 2,
260
+ cacheRead: 0.4,
261
+ cacheWrite: 0,
262
+ },
263
+ contextWindow: 191488,
264
+ maxTokens: 65536,
265
+ compat: {
266
+ supportsReasoningEffort: true,
267
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
268
+ maxTokensField: "max_completion_tokens",
269
+ },
270
+ },
271
+ {
272
+ id: "hf:nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4",
273
+ name: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4",
274
+ reasoning: true,
275
+ compat: {
276
+ supportsReasoningEffort: true,
277
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
278
+ },
279
+ input: ["text"],
280
+ cost: {
281
+ input: 0.3,
282
+ output: 1,
283
+ cacheRead: 0.3,
284
+ cacheWrite: 0,
285
+ },
286
+ contextWindow: 262144,
287
+ maxTokens: 65536,
288
+ },
289
+ ];
290
+
291
+ interface SyntheticModelResponse {
292
+ id: string;
293
+ name: string;
294
+ input_modalities: string[];
295
+ output_modalities: string[];
296
+ context_length: number;
297
+ max_output_length: number;
298
+ pricing: {
299
+ prompt: string;
300
+ completion: string;
301
+ image: string;
302
+ request: string;
303
+ input_cache_reads: string;
304
+ input_cache_writes: string;
305
+ };
306
+ supported_features: string[];
307
+ quantization?: string;
308
+ }
309
+
310
+ const MODEL_COMPAT_OVERRIDES: Record<string, Partial<NonNullable<ProviderModelConfig["compat"]>>> = {
311
+ "hf:zai-org/GLM-5.1": {
312
+ supportsDeveloperRole: false,
313
+ },
314
+ "hf:MiniMaxAI/MiniMax-M2.5": {
315
+ maxTokensField: "max_completion_tokens",
316
+ },
317
+ };
318
+
319
+ function parsePricingValue(value: string): number {
320
+ if (!value || value === "0") return 0;
321
+ const num = parseFloat(value.replace(/^\$/, ""));
322
+ if (Number.isNaN(num)) return 0;
323
+ return num * 1_000_000;
324
+ }
325
+
326
+ // =========================================================================
327
+ // Model cache – persists live API models to disk for fast startup
328
+ // =========================================================================
329
+
330
+ const CACHE_FILE_NAME = "synthetic-models-cache.json";
331
+
332
+ /** Maximum age in milliseconds before cached models are considered stale (12h). */
333
+ const CACHE_STALE_MS = 12 * 60 * 60 * 1000;
334
+
335
+ interface SyntheticModelsCache {
336
+ /** ISO timestamp of when the cache was last written. */
337
+ updatedAt: string;
338
+ /** Model configs from the live Synthetic API. */
339
+ models: ProviderModelConfig[];
340
+ }
341
+
342
+ /** Resolve the cache file path (always inside the agent config dir). */
343
+ export function getCacheFilePath(): string {
344
+ return join(getAgentDir(), CACHE_FILE_NAME);
345
+ }
346
+
347
+ /**
348
+ * Read cached synthetic models from disk.
349
+ * Returns `undefined` when the cache is missing, corrupt, or older than `maxAgeMs`.
350
+ */
351
+ export function readCachedModels(maxAgeMs: number = CACHE_STALE_MS): ProviderModelConfig[] | undefined {
352
+ const filePath = getCacheFilePath();
353
+ if (!existsSync(filePath)) return undefined;
354
+
355
+ try {
356
+ const raw = readFileSync(filePath, "utf-8");
357
+ const cache = JSON.parse(raw) as SyntheticModelsCache;
358
+ if (!Array.isArray(cache.models) || cache.models.length === 0) return undefined;
359
+
360
+ // Check staleness
361
+ const updated = new Date(cache.updatedAt).getTime();
362
+ if (Number.isNaN(updated) || Date.now() - updated > maxAgeMs) return undefined;
363
+
364
+ return cache.models;
365
+ } catch {
366
+ return undefined;
367
+ }
368
+ }
369
+
370
+ /** Write synthetic models to the cache file. Failures are silently ignored. */
371
+ export function writeCachedModels(models: ProviderModelConfig[]): void {
372
+ const filePath = getCacheFilePath();
373
+ const cache: SyntheticModelsCache = {
374
+ updatedAt: new Date().toISOString(),
375
+ models,
376
+ };
377
+ try {
378
+ writeFileSync(filePath, JSON.stringify(cache), "utf-8");
379
+ } catch {
380
+ // Cache writes are best-effort; don't block startup on permission errors, etc.
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Merge cached models with the hardcoded fallback list.
386
+ *
387
+ * Cached models win on id collisions (they're fresher from the API).
388
+ * Models present only in the fallback list are kept so nothing disappears
389
+ * if the cache is partial.
390
+ */
391
+ export function mergeWithFallback(cached: ProviderModelConfig[], fallback: ProviderModelConfig[]): ProviderModelConfig[] {
392
+ const byId = new Map<string, ProviderModelConfig>();
393
+ for (const m of fallback) byId.set(m.id, m);
394
+ for (const m of cached) byId.set(m.id, m); // cached wins on collision
395
+ return [...byId.values()];
396
+ }
397
+
398
+ // =========================================================================
399
+ // API response parsing
400
+ // =========================================================================
401
+
402
+ export function parseSyntheticModels(data: SyntheticModelResponse[]): ProviderModelConfig[] {
403
+ return data.map((model) => {
404
+ const hasReasoning = model.supported_features?.includes("reasoning") ?? false;
405
+ const input: ("text" | "image")[] = [];
406
+ if (model.input_modalities?.includes("text")) input.push("text");
407
+ if (model.input_modalities?.includes("image")) input.push("image");
408
+ if (input.length === 0) input.push("text");
409
+
410
+ const config: ProviderModelConfig = {
411
+ id: model.id,
412
+ name: model.name,
413
+ reasoning: hasReasoning,
414
+ input,
415
+ cost: {
416
+ input: parsePricingValue(model.pricing?.prompt ?? "0"),
417
+ output: parsePricingValue(model.pricing?.completion ?? "0"),
418
+ cacheRead: parsePricingValue(model.pricing?.input_cache_reads ?? "0"),
419
+ cacheWrite: parsePricingValue(model.pricing?.input_cache_writes ?? "0"),
420
+ },
421
+ contextWindow: model.context_length ?? 128000,
422
+ maxTokens: model.max_output_length ?? 4096,
423
+ };
424
+
425
+ const overrides = MODEL_COMPAT_OVERRIDES[model.id];
426
+ if (hasReasoning) {
427
+ config.compat = {
428
+ supportsReasoningEffort: true,
429
+ reasoningEffortMap: SYNTHETIC_REASONING_EFFORT_MAP,
430
+ ...overrides,
431
+ };
432
+ } else if (overrides) {
433
+ config.compat = { ...overrides };
434
+ }
435
+
436
+ return config;
437
+ });
438
+ }