npm - pi-free - Versions diffs - 2.0.13 → 2.0.15 - Mend

pi-free 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +28 -0
package/README.md +9 -5
package/config.ts +15 -0
package/constants.ts +3 -0
package/index.ts +135 -0
package/lib/built-in-toggle.ts +4 -4
package/lib/probe-cache.ts +86 -0
package/lib/provider-compat.ts +33 -0
package/lib/registry.ts +25 -3
package/lib/telemetry.ts +328 -0
package/lib/util.ts +10 -1
package/package.json +1 -1
package/provider-failover/benchmark-lookup.ts +94 -8
package/provider-failover/benchmarks-chunk-0.ts +599 -890
package/provider-failover/benchmarks-chunk-1.ts +655 -924
package/provider-failover/benchmarks-chunk-2.ts +675 -966
package/provider-failover/benchmarks-chunk-3.ts +676 -967
package/provider-failover/benchmarks-chunk-4.ts +704 -954
package/provider-failover/benchmarks-chunk-5.ts +1301 -0
package/provider-failover/hardcoded-benchmarks.ts +9 -3
package/providers/cline/cline-models.ts +200 -68
package/providers/cline/cline.ts +3 -3
package/providers/dynamic-built-in/index.ts +1 -1
package/providers/kilo/kilo.ts +2 -2
package/providers/model-fetcher.ts +3 -1
package/providers/nvidia/nvidia.ts +54 -16
package/providers/ollama/ollama.ts +103 -46
package/providers/opencode-session.ts +398 -371
package/providers/qwen/qwen.ts +2 -2
package/providers/routeway/routeway.ts +391 -0

package/lib/telemetry.ts ADDED Viewed

@@ -0,0 +1,328 @@
+/**
+ * Model Telemetry — tracks real-world performance of free models.
+ *
+ * Hooks into Pi's turn_end event to capture token usage, latency, and
+ * success/failure per model. Persists to ~/.pi/free-telemetry.json.
+ *
+ * Provides a real-world performance signal alongside static CI benchmarks.
+ */
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { createLogger } from "./logger.ts";
+const _logger = createLogger("telemetry");
+// =============================================================================
+// Types
+// =============================================================================
+export interface TelemetryEntry {
+	timestamp: number;
+	provider: string;
+	model: string;
+	success: boolean;
+	latencyMs: number;
+	promptTokens: number;
+	completionTokens: number;
+	totalTokens: number;
+	tokensPerSecond: number;
+	cost: number;
+	stopReason?: string;
+	error?: string;
+}
+export interface ModelTelemetry {
+	/** Total calls tracked for this model. */
+	totalCalls: number;
+	/** Successful calls. */
+	successCalls: number;
+	/** Failed calls. */
+	errorCalls: number;
+	/** Total tokens consumed (input + output). */
+	totalTokens: number;
+	/** Total prompt (input) tokens. */
+	totalPromptTokens: number;
+	/** Total completion (output) tokens. */
+	totalCompletionTokens: number;
+	/** Sum of all latencies in ms (for avg calculation). */
+	totalLatencyMs: number;
+	/** Sum of all costs. */
+	totalCost: number;
+	// Derived (computed on read)
+	avgLatencyMs: number;
+	avgTokensPerSecond: number;
+	successRate: number;
+	/** Recent calls (last 50). */
+	recentCalls: TelemetryEntry[];
+}
+export interface TelemetryStore {
+	/** Keyed by "provider/model" */
+	models: Record<string, ModelTelemetry>;
+	/** When the store was last updated. */
+	lastUpdated: number;
+}
+// =============================================================================
+// Constants
+// =============================================================================
+const TELEMETRY_DIR = join(homedir(), ".pi");
+const TELEMETRY_FILE = join(TELEMETRY_DIR, "free-telemetry.json");
+const MAX_RECENT_CALLS = 50;
+// In-flight tracking: keyed by "provider/model", value is start timestamp
+const _inFlight = new Map<string, number>();
+// =============================================================================
+// Storage
+// =============================================================================
+function ensureDir(): void {
+	if (!existsSync(TELEMETRY_DIR)) {
+		mkdirSync(TELEMETRY_DIR, { recursive: true });
+	}
+}
+function loadStore(): TelemetryStore {
+	try {
+		if (!existsSync(TELEMETRY_FILE)) {
+			return { models: {}, lastUpdated: Date.now() };
+		}
+		const raw = readFileSync(TELEMETRY_FILE, "utf-8");
+		return JSON.parse(raw) as TelemetryStore;
+	} catch (err) {
+		_logger.warn("Failed to load telemetry store, resetting", {
+			error: String(err),
+		});
+		return { models: {}, lastUpdated: Date.now() };
+	}
+}
+function saveStore(store: TelemetryStore): void {
+	try {
+		ensureDir();
+		store.lastUpdated = Date.now();
+		writeFileSync(TELEMETRY_FILE, JSON.stringify(store, null, 2), "utf-8");
+	} catch (err) {
+		_logger.warn("Failed to save telemetry store", {
+			error: String(err),
+		});
+	}
+}
+// =============================================================================
+// Entry management
+// =============================================================================
+function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): ModelTelemetry {
+	const recent = entries.slice(-MAX_RECENT_CALLS);
+	const totalCalls = entries.length;
+	const successCalls = entries.filter((e) => e.success).length;
+	const errorCalls = totalCalls - successCalls;
+	const stats = entries.reduce(
+		(acc, e) => {
+			acc.totalTokens += e.totalTokens;
+			acc.totalPromptTokens += e.promptTokens;
+			acc.totalCompletionTokens += e.completionTokens;
+			acc.totalLatencyMs += e.latencyMs;
+			acc.totalCost += e.cost;
+			return acc;
+		},
+		{ totalTokens: 0, totalPromptTokens: 0, totalCompletionTokens: 0, totalLatencyMs: 0, totalCost: 0 },
+	);
+	const totalSuccessEntries = entries.filter((e) => e.success);
+	const totalTokensFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.totalTokens, 0);
+	const totalLatencyFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.latencyMs, 0);
+	return {
+		totalCalls,
+		successCalls,
+		errorCalls,
+		totalTokens: stats.totalTokens,
+		totalPromptTokens: stats.totalPromptTokens,
+		totalCompletionTokens: stats.totalCompletionTokens,
+		totalLatencyMs: stats.totalLatencyMs,
+		totalCost: stats.totalCost,
+		avgLatencyMs: totalSuccessEntries.length > 0
+			? Math.round(totalLatencyFromSuccessful / totalSuccessEntries.length)
+			: 0,
+		avgTokensPerSecond: totalLatencyFromSuccessful > 0
+			? parseFloat((totalTokensFromSuccessful / (totalLatencyFromSuccessful / 1000)).toFixed(1))
+			: 0,
+		successRate: totalCalls > 0
+			? parseFloat((successCalls / totalCalls * 100).toFixed(1))
+			: 0,
+		recentCalls: recent,
+	};
+}
+function addEntry(entry: TelemetryEntry): void {
+	const store = loadStore();
+	const modelKey = `${entry.provider}/${entry.model}`;
+	const existing: TelemetryEntry[] = store.models[modelKey]?.recentCalls ?? [];
+	existing.push(entry);
+	// Keep only last MAX_RECENT_CALLS * 2 in raw storage (we derive stats from these)
+	const pruned = existing.slice(-MAX_RECENT_CALLS * 2);
+	store.models[modelKey] = deriveModelTelemetry(modelKey, pruned);
+	saveStore(store);
+}
+// =============================================================================
+// Public API
+// =============================================================================
+/**
+ * Get telemetry for all tracked models.
+ */
+export function getAllTelemetry(): Record<string, ModelTelemetry> {
+	const store = loadStore();
+	return store.models;
+}
+/**
+ * Get telemetry for a specific provider/model combination.
+ */
+export function getModelTelemetry(provider: string, model: string): ModelTelemetry | null {
+	const store = loadStore();
+	return store.models[`${provider}/${model}`] ?? null;
+}
+/**
+ * Format a model's telemetry as a human-readable string (for status bar / /model list).
+ * Returns undefined if no telemetry data is available.
+ */
+export function formatModelTelemetry(provider: string, model: string): string | undefined {
+	const telemetry = getModelTelemetry(provider, model);
+	if (!telemetry || telemetry.totalCalls === 0) return undefined;
+	const parts: string[] = [];
+	if (telemetry.totalCalls > 0) {
+		parts.push(`${telemetry.totalCalls} calls`);
+	}
+	if (telemetry.successRate > 0) {
+		parts.push(`${telemetry.successRate}% ok`);
+	}
+	if (telemetry.avgLatencyMs > 0) {
+		parts.push(`${telemetry.avgLatencyMs}ms`);
+	}
+	if (telemetry.avgTokensPerSecond > 0) {
+		parts.push(`${telemetry.avgTokensPerSecond} tok/s`);
+	}
+	return parts.length > 0 ? parts.join(" · ") : undefined;
+}
+/**
+ * Get telemetry summary for a provider (all models combined).
+ */
+export function getProviderTelemetry(provider: string): {
+	totalCalls: number;
+	totalCost: number;
+	models: number;
+} {
+	const store = loadStore();
+	let totalCalls = 0;
+	let totalCost = 0;
+	let models = 0;
+	for (const [key, data] of Object.entries(store.models)) {
+		if (key.startsWith(`${provider}/`)) {
+			totalCalls += data.totalCalls;
+			totalCost += data.totalCost;
+			models++;
+		}
+	}
+	return { totalCalls, totalCost, models };
+}
+/**
+ * Mark a model call as started (records the start timestamp).
+ * Call this from before_agent_start or model_select.
+ */
+export function startModelCall(provider: string, model: string): void {
+	const key = `${provider}/${model}`;
+	_inFlight.set(key, Date.now());
+}
+/**
+ * Record a completed model call with its usage data.
+ * Call this from turn_end when the message is an AssistantMessage.
+ *
+ * @param provider - The provider ID
+ * @param model - The model ID
+ * @param usage - Token usage { input, output, totalTokens }
+ * @param cost - Cost in USD
+ * @param success - Whether the call succeeded
+ * @param stopReason - The stop reason (e.g. "stop", "error")
+ * @param errorMessage - Error message if failed
+ */
+export function recordModelCall(
+	provider: string,
+	model: string,
+	usage: { input: number; output: number; totalTokens: number },
+	cost: number,
+	success: boolean,
+	stopReason?: string,
+	errorMessage?: string,
+): void {
+	const key = `${provider}/${model}`;
+	const startTime = _inFlight.get(key) ?? Date.now();
+	const latencyMs = Date.now() - startTime;
+	_inFlight.delete(key);
+	const totalTokens = usage.totalTokens || usage.input + usage.output;
+	const tokensPerSecond = latencyMs > 0
+		? parseFloat((totalTokens / (latencyMs / 1000)).toFixed(1))
+		: 0;
+	const entry: TelemetryEntry = {
+		timestamp: Date.now(),
+		provider,
+		model,
+		success,
+		latencyMs,
+		promptTokens: usage.input,
+		completionTokens: usage.output,
+		totalTokens,
+		tokensPerSecond,
+		cost,
+		stopReason,
+		...(errorMessage ? { error: errorMessage } : {}),
+	};
+	addEntry(entry);
+	_logger.info(`Telemetry: ${provider}/${model}`, {
+		latencyMs,
+		totalTokens,
+		tokensPerSecond,
+		success,
+		cost,
+	});
+}
+/**
+ * Clear all telemetry data.
+ */
+export function clearTelemetry(): void {
+	const store: TelemetryStore = { models: {}, lastUpdated: Date.now() };
+	saveStore(store);
+}
+/**
+ * Get the path to the telemetry file.
+ */
+export function getTelemetryPath(): string {
+	return TELEMETRY_FILE;
+}

package/lib/util.ts CHANGED Viewed

@@ -341,6 +341,7 @@ export function mapOpenRouterModel(m: {
 		input_modalities?: string[] | null;
 		output_modalities?: string[] | null;
 	};
+	isFree?: boolean;
 }): ProviderModelConfig {
 	const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
 	const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
@@ -362,7 +363,15 @@ export function mapOpenRouterModel(m: {
 		maxTokens:
 			m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
 		_pricingKnown: true,
-	} as ProviderModelConfig & { _pricingKnown?: boolean };
+		...(typeof m.isFree === "boolean" && {
+			_freeKnown: true,
+			_isFree: m.isFree,
+		}),
+	} as ProviderModelConfig & {
+		_pricingKnown?: boolean;
+		_freeKnown?: boolean;
+		_isFree?: boolean;
+	};
 }
 // =============================================================================

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "pi-free",
-	"version": "2.0.13",
+	"version": "2.0.15",
 	"type": "module",
 	"description": "AI model providers for Pi with free model filtering and dynamic model fetching",
 	"keywords": [

package/provider-failover/benchmark-lookup.ts CHANGED Viewed

@@ -225,6 +225,8 @@ function stripCommonSuffixes(ctx: {
 		/-bf\d+$/g, // -bf16
 		/-preview$/g, // -preview
 		/-exp$/g, // -exp (experimental)
+		/-turbo$/g, // -turbo (Together AI suffix)
+		/-instant$/g, // -instant (Groq suffix for fast-response models)
 		/-instruct-0\.\d+$/g, // HuggingFace revision tags
 	];
 	for (const pattern of suffixesToStrip) {
@@ -248,8 +250,28 @@ function applyProviderNormalization(
 	if (provider === "nvidia") normalizeNvidia(ctx);
 	if (provider === "cloudflare") normalizeCloudflare(ctx);
+	// Strip generic org/ prefix (e.g., "google/", "mistralai/") before everything
+	const stripped = ctx.normalized.replace(/^[^/]+\//, "");
+	if (stripped !== ctx.normalized) {
+		ctx.normalized = stripped;
+		ctx.strategies.push("strip-org-prefix");
+	}
 	normalizeFreeSuffix(ctx);
-	if (provider === "ollama") normalizeOllama(ctx);
+	// Also strip -free suffix (used by ZenMux, etc.)
+	if (ctx.normalized.endsWith("-free")) {
+		ctx.normalized = ctx.normalized.replaceAll(/-free$/g, "");
+		ctx.strategies.push("strip-free-suffix");
+	}
+	// General normalization: convert llamaN → llama-N (e.g., llama3-70b → llama-3-70b)
+	if (/^llama\d/.test(ctx.normalized)) {
+		ctx.normalized = ctx.normalized.replaceAll(/^llama(\d)/g, "llama-$1");
+		ctx.strategies.push("llama-dash-general");
+	}
+	if (provider === "ollama" || provider === "ollama-cloud")
+		normalizeOllama(ctx);
 	if (provider === "groq") normalizeGroq(ctx);
 	if (provider === "cerebras") normalizeCerebras(ctx);
 	if (provider === "mistral") normalizeMistral(ctx);
@@ -281,6 +303,8 @@ const VARIANT_QUALIFIER_SEGMENTS = new Set([
 	"preview",
 	"adaptive",
 	"fast",
+	"instruct",
+	"chat",
 ]);
 /**
@@ -301,7 +325,13 @@ function isVariantQualifier(segment: string): boolean {
 	// Two-digit year like "25", "24"
 	if (/^\d{2}$/.test(segment)) return true;
 	// Special variant suffixes
-	if (segment === "speciale" || segment === "chatgpt" || segment === "latest")
+	if (
+		segment === "speciale" ||
+		segment === "chatgpt" ||
+		segment === "latest" ||
+		segment === "instruct" ||
+		segment === "chat"
+	)
 		return true;
 	return false;
 }
@@ -397,12 +427,10 @@ function findBestVariantByPrefix(
 	if (candidates.length === 0) return null;
 	// Pick the candidate with the highest codingIndex
-	// If tied or no CI, use normalizedScore as tiebreaker
 	candidates.sort((a, b) => {
 		const ciA = a.data.codingIndex ?? -1;
 		const ciB = b.data.codingIndex ?? -1;
-		if (ciB !== ciA) return ciB - ciA;
-		return (b.data.normalizedScore ?? 0) - (a.data.normalizedScore ?? 0);
+		return ciB - ciA;
 	});
 	// Only return if the best candidate has a codingIndex
@@ -438,7 +466,12 @@ const MODEL_VARIANTS: Record<string, string[]> = {
 	],
 	"claude-3-opus": ["claude-3-opus", "opus-3"],
 	"llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
-	"llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
+	"llama-3.1-instruct-70b": [
+		"llama-3.1-70b",
+		"llama3.1-70b",
+		"llama-70b",
+		"llama-3.1-70b-versatile",
+	],
 	"gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
 	"qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
 	"deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
@@ -453,6 +486,58 @@ const MODEL_VARIANTS: Record<string, string[]> = {
 		"nemotron-super",
 		"nemotron-3",
 	],
+	"glm-4.6v-non-reasoning": [
+		"glm-4.6v",
+		"glm-4.6v-flash",
+		"glm-4.6v-flash-free",
+	],
+	"glm-4.7-flash-non-reasoning": ["glm-4.7-flash", "glm-4.7-flash-free"],
+	"mistral-small-4-non-reasoning": [
+		"mistral-small-24b",
+		"mistral-small-24b-instruct",
+		"mistral-small-24b-2501",
+	],
+	"qwen2.5-coder-instruct-7b": ["qwen2.5-7b", "qwen2.5-7b-instruct"],
+	"llama-3.2-instruct-3b": ["llama-3.2-3b", "llama-3.2-3b-instruct"],
+	"llama-3.2-instruct-1b": [
+		"llama-3.2-1b",
+		"llama-3.2-1b-instruct",
+		"llama3.2-1b",
+	],
+	// --- Claude 4 series (providers use date-stamped IDs like claude-sonnet-4-20250514) ---
+	// Order matters: more specific aliases first to avoid false prefix matches
+	"claude-4.5-sonnet-reasoning": [
+		"claude-sonnet-4.5",
+		"claude-sonnet-4.5-20250601",
+	],
+	"claude-4-sonnet-reasoning": ["claude-sonnet-4", "claude-sonnet-4-20250514"],
+	"claude-4-opus-reasoning": ["claude-opus-4", "claude-opus-4-20250514"],
+	// --- Qwen Max → Qwen3 Max ---
+	"qwen3-max": ["qwen-max", "qwen/qwen-max"],
+	// --- Mistral Large 2411 → Mistral Large 2 (Nov '24) ---
+	"mistral-large-2-nov-24": [
+		"mistral-large-2411",
+		"mistralai/mistral-large-2411",
+	],
+	// --- Groq-specific variants (versatile suffix, numeric context suffixes) ---
+	"llama-3.3-instruct-70b": [
+		"llama-3.3-70b-versatile",
+		"llama3.3-70b",
+		"llama-3.3-70b",
+	],
+	"llama-3-instruct-70b": ["llama3-70b-8192", "llama3-70b"],
+	"llama-3-instruct-8b": ["llama3-8b-8192", "llama3-8b"],
+	"llama-3.1-instruct-8b": [
+		"llama3.1-8b",
+		"llama-3.1-8b",
+		"llama-3.1-8b-instant",
+		"llama3.1-8b-instruct",
+	],
+	"mistral-small-3.1": ["mistral-small-2501", "mistral-small-24b-2501"],
 };
 // =============================================================================
@@ -593,7 +678,8 @@ export function findHardcodedBenchmark(
 	modelId: string,
 	provider?: string,
 ): HardcodedBenchmark | null {
-	const search = `${modelName} ${modelId}`.toLowerCase();
+	// Normalize: convert colons to dashes (Ollama model:tag format)
+	const search = `${modelName} ${modelId}`.toLowerCase().replaceAll(":", "-");
 	logDebug({ provider, modelId, modelName, action: "attempt" });
@@ -640,7 +726,7 @@ export function getHardcodedScore(
 	provider?: string,
 ): number | null {
 	const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
-	return benchmark?.normalizedScore ?? null;
+	return benchmark?.codingIndex ?? null;
 }
 /**