pi-free 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,328 @@
1
+ /**
2
+ * Model Telemetry — tracks real-world performance of free models.
3
+ *
4
+ * Hooks into Pi's turn_end event to capture token usage, latency, and
5
+ * success/failure per model. Persists to ~/.pi/free-telemetry.json.
6
+ *
7
+ * Provides a real-world performance signal alongside static CI benchmarks.
8
+ */
9
+
10
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
11
+ import { homedir } from "node:os";
12
+ import { join } from "node:path";
13
+ import { createLogger } from "./logger.ts";
14
+
15
+ const _logger = createLogger("telemetry");
16
+
17
+ // =============================================================================
18
+ // Types
19
+ // =============================================================================
20
+
21
+ export interface TelemetryEntry {
22
+ timestamp: number;
23
+ provider: string;
24
+ model: string;
25
+ success: boolean;
26
+ latencyMs: number;
27
+ promptTokens: number;
28
+ completionTokens: number;
29
+ totalTokens: number;
30
+ tokensPerSecond: number;
31
+ cost: number;
32
+ stopReason?: string;
33
+ error?: string;
34
+ }
35
+
36
+ export interface ModelTelemetry {
37
+ /** Total calls tracked for this model. */
38
+ totalCalls: number;
39
+ /** Successful calls. */
40
+ successCalls: number;
41
+ /** Failed calls. */
42
+ errorCalls: number;
43
+ /** Total tokens consumed (input + output). */
44
+ totalTokens: number;
45
+ /** Total prompt (input) tokens. */
46
+ totalPromptTokens: number;
47
+ /** Total completion (output) tokens. */
48
+ totalCompletionTokens: number;
49
+ /** Sum of all latencies in ms (for avg calculation). */
50
+ totalLatencyMs: number;
51
+ /** Sum of all costs. */
52
+ totalCost: number;
53
+
54
+ // Derived (computed on read)
55
+ avgLatencyMs: number;
56
+ avgTokensPerSecond: number;
57
+ successRate: number;
58
+
59
+ /** Recent calls (last 50). */
60
+ recentCalls: TelemetryEntry[];
61
+ }
62
+
63
+ export interface TelemetryStore {
64
+ /** Keyed by "provider/model" */
65
+ models: Record<string, ModelTelemetry>;
66
+ /** When the store was last updated. */
67
+ lastUpdated: number;
68
+ }
69
+
70
+ // =============================================================================
71
+ // Constants
72
+ // =============================================================================
73
+
74
+ const TELEMETRY_DIR = join(homedir(), ".pi");
75
+ const TELEMETRY_FILE = join(TELEMETRY_DIR, "free-telemetry.json");
76
+ const MAX_RECENT_CALLS = 50;
77
+
78
+ // In-flight tracking: keyed by "provider/model", value is start timestamp
79
+ const _inFlight = new Map<string, number>();
80
+
81
+ // =============================================================================
82
+ // Storage
83
+ // =============================================================================
84
+
85
+ function ensureDir(): void {
86
+ if (!existsSync(TELEMETRY_DIR)) {
87
+ mkdirSync(TELEMETRY_DIR, { recursive: true });
88
+ }
89
+ }
90
+
91
+ function loadStore(): TelemetryStore {
92
+ try {
93
+ if (!existsSync(TELEMETRY_FILE)) {
94
+ return { models: {}, lastUpdated: Date.now() };
95
+ }
96
+ const raw = readFileSync(TELEMETRY_FILE, "utf-8");
97
+ return JSON.parse(raw) as TelemetryStore;
98
+ } catch (err) {
99
+ _logger.warn("Failed to load telemetry store, resetting", {
100
+ error: String(err),
101
+ });
102
+ return { models: {}, lastUpdated: Date.now() };
103
+ }
104
+ }
105
+
106
+ function saveStore(store: TelemetryStore): void {
107
+ try {
108
+ ensureDir();
109
+ store.lastUpdated = Date.now();
110
+ writeFileSync(TELEMETRY_FILE, JSON.stringify(store, null, 2), "utf-8");
111
+ } catch (err) {
112
+ _logger.warn("Failed to save telemetry store", {
113
+ error: String(err),
114
+ });
115
+ }
116
+ }
117
+
118
+ // =============================================================================
119
+ // Entry management
120
+ // =============================================================================
121
+
122
+ function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): ModelTelemetry {
123
+ const recent = entries.slice(-MAX_RECENT_CALLS);
124
+ const totalCalls = entries.length;
125
+ const successCalls = entries.filter((e) => e.success).length;
126
+ const errorCalls = totalCalls - successCalls;
127
+
128
+ const stats = entries.reduce(
129
+ (acc, e) => {
130
+ acc.totalTokens += e.totalTokens;
131
+ acc.totalPromptTokens += e.promptTokens;
132
+ acc.totalCompletionTokens += e.completionTokens;
133
+ acc.totalLatencyMs += e.latencyMs;
134
+ acc.totalCost += e.cost;
135
+ return acc;
136
+ },
137
+ { totalTokens: 0, totalPromptTokens: 0, totalCompletionTokens: 0, totalLatencyMs: 0, totalCost: 0 },
138
+ );
139
+
140
+ const totalSuccessEntries = entries.filter((e) => e.success);
141
+ const totalTokensFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.totalTokens, 0);
142
+ const totalLatencyFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.latencyMs, 0);
143
+
144
+ return {
145
+ totalCalls,
146
+ successCalls,
147
+ errorCalls,
148
+ totalTokens: stats.totalTokens,
149
+ totalPromptTokens: stats.totalPromptTokens,
150
+ totalCompletionTokens: stats.totalCompletionTokens,
151
+ totalLatencyMs: stats.totalLatencyMs,
152
+ totalCost: stats.totalCost,
153
+ avgLatencyMs: totalSuccessEntries.length > 0
154
+ ? Math.round(totalLatencyFromSuccessful / totalSuccessEntries.length)
155
+ : 0,
156
+ avgTokensPerSecond: totalLatencyFromSuccessful > 0
157
+ ? parseFloat((totalTokensFromSuccessful / (totalLatencyFromSuccessful / 1000)).toFixed(1))
158
+ : 0,
159
+ successRate: totalCalls > 0
160
+ ? parseFloat((successCalls / totalCalls * 100).toFixed(1))
161
+ : 0,
162
+ recentCalls: recent,
163
+ };
164
+ }
165
+
166
+ function addEntry(entry: TelemetryEntry): void {
167
+ const store = loadStore();
168
+ const modelKey = `${entry.provider}/${entry.model}`;
169
+
170
+ const existing: TelemetryEntry[] = store.models[modelKey]?.recentCalls ?? [];
171
+ existing.push(entry);
172
+
173
+ // Keep only last MAX_RECENT_CALLS * 2 in raw storage (we derive stats from these)
174
+ const pruned = existing.slice(-MAX_RECENT_CALLS * 2);
175
+
176
+ store.models[modelKey] = deriveModelTelemetry(modelKey, pruned);
177
+ saveStore(store);
178
+ }
179
+
180
+ // =============================================================================
181
+ // Public API
182
+ // =============================================================================
183
+
184
+ /**
185
+ * Get telemetry for all tracked models.
186
+ */
187
+ export function getAllTelemetry(): Record<string, ModelTelemetry> {
188
+ const store = loadStore();
189
+ return store.models;
190
+ }
191
+
192
+ /**
193
+ * Get telemetry for a specific provider/model combination.
194
+ */
195
+ export function getModelTelemetry(provider: string, model: string): ModelTelemetry | null {
196
+ const store = loadStore();
197
+ return store.models[`${provider}/${model}`] ?? null;
198
+ }
199
+
200
+ /**
201
+ * Format a model's telemetry as a human-readable string (for status bar / /model list).
202
+ * Returns undefined if no telemetry data is available.
203
+ */
204
+ export function formatModelTelemetry(provider: string, model: string): string | undefined {
205
+ const telemetry = getModelTelemetry(provider, model);
206
+ if (!telemetry || telemetry.totalCalls === 0) return undefined;
207
+
208
+ const parts: string[] = [];
209
+ if (telemetry.totalCalls > 0) {
210
+ parts.push(`${telemetry.totalCalls} calls`);
211
+ }
212
+ if (telemetry.successRate > 0) {
213
+ parts.push(`${telemetry.successRate}% ok`);
214
+ }
215
+ if (telemetry.avgLatencyMs > 0) {
216
+ parts.push(`${telemetry.avgLatencyMs}ms`);
217
+ }
218
+ if (telemetry.avgTokensPerSecond > 0) {
219
+ parts.push(`${telemetry.avgTokensPerSecond} tok/s`);
220
+ }
221
+
222
+ return parts.length > 0 ? parts.join(" · ") : undefined;
223
+ }
224
+
225
+ /**
226
+ * Get telemetry summary for a provider (all models combined).
227
+ */
228
+ export function getProviderTelemetry(provider: string): {
229
+ totalCalls: number;
230
+ totalCost: number;
231
+ models: number;
232
+ } {
233
+ const store = loadStore();
234
+ let totalCalls = 0;
235
+ let totalCost = 0;
236
+ let models = 0;
237
+
238
+ for (const [key, data] of Object.entries(store.models)) {
239
+ if (key.startsWith(`${provider}/`)) {
240
+ totalCalls += data.totalCalls;
241
+ totalCost += data.totalCost;
242
+ models++;
243
+ }
244
+ }
245
+
246
+ return { totalCalls, totalCost, models };
247
+ }
248
+
249
+ /**
250
+ * Mark a model call as started (records the start timestamp).
251
+ * Call this from before_agent_start or model_select.
252
+ */
253
+ export function startModelCall(provider: string, model: string): void {
254
+ const key = `${provider}/${model}`;
255
+ _inFlight.set(key, Date.now());
256
+ }
257
+
258
+ /**
259
+ * Record a completed model call with its usage data.
260
+ * Call this from turn_end when the message is an AssistantMessage.
261
+ *
262
+ * @param provider - The provider ID
263
+ * @param model - The model ID
264
+ * @param usage - Token usage { input, output, totalTokens }
265
+ * @param cost - Cost in USD
266
+ * @param success - Whether the call succeeded
267
+ * @param stopReason - The stop reason (e.g. "stop", "error")
268
+ * @param errorMessage - Error message if failed
269
+ */
270
+ export function recordModelCall(
271
+ provider: string,
272
+ model: string,
273
+ usage: { input: number; output: number; totalTokens: number },
274
+ cost: number,
275
+ success: boolean,
276
+ stopReason?: string,
277
+ errorMessage?: string,
278
+ ): void {
279
+ const key = `${provider}/${model}`;
280
+ const startTime = _inFlight.get(key) ?? Date.now();
281
+ const latencyMs = Date.now() - startTime;
282
+ _inFlight.delete(key);
283
+
284
+ const totalTokens = usage.totalTokens || usage.input + usage.output;
285
+ const tokensPerSecond = latencyMs > 0
286
+ ? parseFloat((totalTokens / (latencyMs / 1000)).toFixed(1))
287
+ : 0;
288
+
289
+ const entry: TelemetryEntry = {
290
+ timestamp: Date.now(),
291
+ provider,
292
+ model,
293
+ success,
294
+ latencyMs,
295
+ promptTokens: usage.input,
296
+ completionTokens: usage.output,
297
+ totalTokens,
298
+ tokensPerSecond,
299
+ cost,
300
+ stopReason,
301
+ ...(errorMessage ? { error: errorMessage } : {}),
302
+ };
303
+
304
+ addEntry(entry);
305
+
306
+ _logger.info(`Telemetry: ${provider}/${model}`, {
307
+ latencyMs,
308
+ totalTokens,
309
+ tokensPerSecond,
310
+ success,
311
+ cost,
312
+ });
313
+ }
314
+
315
+ /**
316
+ * Clear all telemetry data.
317
+ */
318
+ export function clearTelemetry(): void {
319
+ const store: TelemetryStore = { models: {}, lastUpdated: Date.now() };
320
+ saveStore(store);
321
+ }
322
+
323
+ /**
324
+ * Get the path to the telemetry file.
325
+ */
326
+ export function getTelemetryPath(): string {
327
+ return TELEMETRY_FILE;
328
+ }
package/lib/util.ts CHANGED
@@ -341,6 +341,7 @@ export function mapOpenRouterModel(m: {
341
341
  input_modalities?: string[] | null;
342
342
  output_modalities?: string[] | null;
343
343
  };
344
+ isFree?: boolean;
344
345
  }): ProviderModelConfig {
345
346
  const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
346
347
  const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
@@ -362,7 +363,15 @@ export function mapOpenRouterModel(m: {
362
363
  maxTokens:
363
364
  m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
364
365
  _pricingKnown: true,
365
- } as ProviderModelConfig & { _pricingKnown?: boolean };
366
+ ...(typeof m.isFree === "boolean" && {
367
+ _freeKnown: true,
368
+ _isFree: m.isFree,
369
+ }),
370
+ } as ProviderModelConfig & {
371
+ _pricingKnown?: boolean;
372
+ _freeKnown?: boolean;
373
+ _isFree?: boolean;
374
+ };
366
375
  }
367
376
 
368
377
  // =============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-free",
3
- "version": "2.0.13",
3
+ "version": "2.0.15",
4
4
  "type": "module",
5
5
  "description": "AI model providers for Pi with free model filtering and dynamic model fetching",
6
6
  "keywords": [
@@ -225,6 +225,8 @@ function stripCommonSuffixes(ctx: {
225
225
  /-bf\d+$/g, // -bf16
226
226
  /-preview$/g, // -preview
227
227
  /-exp$/g, // -exp (experimental)
228
+ /-turbo$/g, // -turbo (Together AI suffix)
229
+ /-instant$/g, // -instant (Groq suffix for fast-response models)
228
230
  /-instruct-0\.\d+$/g, // HuggingFace revision tags
229
231
  ];
230
232
  for (const pattern of suffixesToStrip) {
@@ -248,8 +250,28 @@ function applyProviderNormalization(
248
250
 
249
251
  if (provider === "nvidia") normalizeNvidia(ctx);
250
252
  if (provider === "cloudflare") normalizeCloudflare(ctx);
253
+ // Strip generic org/ prefix (e.g., "google/", "mistralai/") before everything
254
+ const stripped = ctx.normalized.replace(/^[^/]+\//, "");
255
+ if (stripped !== ctx.normalized) {
256
+ ctx.normalized = stripped;
257
+ ctx.strategies.push("strip-org-prefix");
258
+ }
259
+
251
260
  normalizeFreeSuffix(ctx);
252
- if (provider === "ollama") normalizeOllama(ctx);
261
+ // Also strip -free suffix (used by ZenMux, etc.)
262
+ if (ctx.normalized.endsWith("-free")) {
263
+ ctx.normalized = ctx.normalized.replaceAll(/-free$/g, "");
264
+ ctx.strategies.push("strip-free-suffix");
265
+ }
266
+
267
+ // General normalization: convert llamaN → llama-N (e.g., llama3-70b → llama-3-70b)
268
+ if (/^llama\d/.test(ctx.normalized)) {
269
+ ctx.normalized = ctx.normalized.replaceAll(/^llama(\d)/g, "llama-$1");
270
+ ctx.strategies.push("llama-dash-general");
271
+ }
272
+
273
+ if (provider === "ollama" || provider === "ollama-cloud")
274
+ normalizeOllama(ctx);
253
275
  if (provider === "groq") normalizeGroq(ctx);
254
276
  if (provider === "cerebras") normalizeCerebras(ctx);
255
277
  if (provider === "mistral") normalizeMistral(ctx);
@@ -281,6 +303,8 @@ const VARIANT_QUALIFIER_SEGMENTS = new Set([
281
303
  "preview",
282
304
  "adaptive",
283
305
  "fast",
306
+ "instruct",
307
+ "chat",
284
308
  ]);
285
309
 
286
310
  /**
@@ -301,7 +325,13 @@ function isVariantQualifier(segment: string): boolean {
301
325
  // Two-digit year like "25", "24"
302
326
  if (/^\d{2}$/.test(segment)) return true;
303
327
  // Special variant suffixes
304
- if (segment === "speciale" || segment === "chatgpt" || segment === "latest")
328
+ if (
329
+ segment === "speciale" ||
330
+ segment === "chatgpt" ||
331
+ segment === "latest" ||
332
+ segment === "instruct" ||
333
+ segment === "chat"
334
+ )
305
335
  return true;
306
336
  return false;
307
337
  }
@@ -397,12 +427,10 @@ function findBestVariantByPrefix(
397
427
  if (candidates.length === 0) return null;
398
428
 
399
429
  // Pick the candidate with the highest codingIndex
400
- // If tied or no CI, use normalizedScore as tiebreaker
401
430
  candidates.sort((a, b) => {
402
431
  const ciA = a.data.codingIndex ?? -1;
403
432
  const ciB = b.data.codingIndex ?? -1;
404
- if (ciB !== ciA) return ciB - ciA;
405
- return (b.data.normalizedScore ?? 0) - (a.data.normalizedScore ?? 0);
433
+ return ciB - ciA;
406
434
  });
407
435
 
408
436
  // Only return if the best candidate has a codingIndex
@@ -438,7 +466,12 @@ const MODEL_VARIANTS: Record<string, string[]> = {
438
466
  ],
439
467
  "claude-3-opus": ["claude-3-opus", "opus-3"],
440
468
  "llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
441
- "llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
469
+ "llama-3.1-instruct-70b": [
470
+ "llama-3.1-70b",
471
+ "llama3.1-70b",
472
+ "llama-70b",
473
+ "llama-3.1-70b-versatile",
474
+ ],
442
475
  "gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
443
476
  "qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
444
477
  "deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
@@ -453,6 +486,58 @@ const MODEL_VARIANTS: Record<string, string[]> = {
453
486
  "nemotron-super",
454
487
  "nemotron-3",
455
488
  ],
489
+ "glm-4.6v-non-reasoning": [
490
+ "glm-4.6v",
491
+ "glm-4.6v-flash",
492
+ "glm-4.6v-flash-free",
493
+ ],
494
+ "glm-4.7-flash-non-reasoning": ["glm-4.7-flash", "glm-4.7-flash-free"],
495
+ "mistral-small-4-non-reasoning": [
496
+ "mistral-small-24b",
497
+ "mistral-small-24b-instruct",
498
+ "mistral-small-24b-2501",
499
+ ],
500
+ "qwen2.5-coder-instruct-7b": ["qwen2.5-7b", "qwen2.5-7b-instruct"],
501
+ "llama-3.2-instruct-3b": ["llama-3.2-3b", "llama-3.2-3b-instruct"],
502
+ "llama-3.2-instruct-1b": [
503
+ "llama-3.2-1b",
504
+ "llama-3.2-1b-instruct",
505
+ "llama3.2-1b",
506
+ ],
507
+
508
+ // --- Claude 4 series (providers use date-stamped IDs like claude-sonnet-4-20250514) ---
509
+ // Order matters: more specific aliases first to avoid false prefix matches
510
+ "claude-4.5-sonnet-reasoning": [
511
+ "claude-sonnet-4.5",
512
+ "claude-sonnet-4.5-20250601",
513
+ ],
514
+ "claude-4-sonnet-reasoning": ["claude-sonnet-4", "claude-sonnet-4-20250514"],
515
+ "claude-4-opus-reasoning": ["claude-opus-4", "claude-opus-4-20250514"],
516
+
517
+ // --- Qwen Max → Qwen3 Max ---
518
+ "qwen3-max": ["qwen-max", "qwen/qwen-max"],
519
+
520
+ // --- Mistral Large 2411 → Mistral Large 2 (Nov '24) ---
521
+ "mistral-large-2-nov-24": [
522
+ "mistral-large-2411",
523
+ "mistralai/mistral-large-2411",
524
+ ],
525
+
526
+ // --- Groq-specific variants (versatile suffix, numeric context suffixes) ---
527
+ "llama-3.3-instruct-70b": [
528
+ "llama-3.3-70b-versatile",
529
+ "llama3.3-70b",
530
+ "llama-3.3-70b",
531
+ ],
532
+ "llama-3-instruct-70b": ["llama3-70b-8192", "llama3-70b"],
533
+ "llama-3-instruct-8b": ["llama3-8b-8192", "llama3-8b"],
534
+ "llama-3.1-instruct-8b": [
535
+ "llama3.1-8b",
536
+ "llama-3.1-8b",
537
+ "llama-3.1-8b-instant",
538
+ "llama3.1-8b-instruct",
539
+ ],
540
+ "mistral-small-3.1": ["mistral-small-2501", "mistral-small-24b-2501"],
456
541
  };
457
542
 
458
543
  // =============================================================================
@@ -593,7 +678,8 @@ export function findHardcodedBenchmark(
593
678
  modelId: string,
594
679
  provider?: string,
595
680
  ): HardcodedBenchmark | null {
596
- const search = `${modelName} ${modelId}`.toLowerCase();
681
+ // Normalize: convert colons to dashes (Ollama model:tag format)
682
+ const search = `${modelName} ${modelId}`.toLowerCase().replaceAll(":", "-");
597
683
 
598
684
  logDebug({ provider, modelId, modelName, action: "attempt" });
599
685
 
@@ -640,7 +726,7 @@ export function getHardcodedScore(
640
726
  provider?: string,
641
727
  ): number | null {
642
728
  const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
643
- return benchmark?.normalizedScore ?? null;
729
+ return benchmark?.codingIndex ?? null;
644
730
  }
645
731
 
646
732
  /**