npm - pi-free - Versions diffs - 2.0.9 → 2.0.11 - Mend

pi-free 2.0.9 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +576 -544
package/README.md +16 -0
package/banner.svg +12 -10
package/config.ts +86 -20
package/constants.ts +3 -0
package/index.ts +3 -0
package/lib/util.ts +72 -8
package/package.json +1 -1
package/providers/crofai/crofai.ts +106 -15
package/providers/deepinfra/deepinfra.ts +108 -11
package/providers/ollama/ollama.ts +400 -85
package/providers/ollama/thinking-levels.ts +96 -0
package/providers/together/together.ts +197 -0
package/providers/zenmux/zenmux.ts +32 -17

package/providers/deepinfra/deepinfra.ts CHANGED Viewed

@@ -4,6 +4,10 @@
  * DeepInfra is an AI inference cloud with an OpenAI-compatible API for
  * 100+ open-source models (Llama, DeepSeek, Mistral, Qwen, Mixtral, etc.).
  *
+ * NOTE: DeepInfra's /v1/openai/models buries real model data in a "metadata"
+ * field (context_length, max_tokens, pricing, tags). We extract it here.
+ * Pricing is per-MILLION tokens.
+ *
  * Free tier:
  *   - $5 one-time credit on signup (no credit card)
  *   - ~5M tokens, expires after 90 days
@@ -30,14 +34,112 @@ import type {
 	ProviderModelConfig,
 } from "@earendil-works/pi-coding-agent";
 import { getDeepinfraApiKey } from "../../config.ts";
-import { BASE_URL_DEEPINFRA, PROVIDER_DEEPINFRA } from "../../constants.ts";
+import {
+	BASE_URL_DEEPINFRA,
+	DEFAULT_FETCH_TIMEOUT_MS,
+	PROVIDER_DEEPINFRA,
+} from "../../constants.ts";
 import { createLogger } from "../../lib/logger.ts";
+import {
+	getProxyModelCompat,
+	isLikelyReasoningModel,
+} from "../../lib/provider-compat.ts";
 import { registerWithGlobalToggle } from "../../lib/registry.ts";
-import { fetchOpenAICompatibleModels } from "../../lib/util.ts";
+import { fetchWithRetry } from "../../lib/util.ts";
 import { createReRegister, setupProvider } from "../../provider-helper.ts";
 const _logger = createLogger("deepinfra");
+// =============================================================================
+// Types
+// =============================================================================
+interface DeepInfraModel {
+	id: string;
+	metadata?: {
+		context_length?: number;
+		max_tokens?: number;
+		description?: string;
+		pricing?: {
+			input_tokens?: number;
+			output_tokens?: number;
+		};
+		tags?: string[];
+	};
+}
+// =============================================================================
+// Fetch
+// =============================================================================
+async function fetchDeepinfraModels(
+	apiKey: string,
+): Promise<ProviderModelConfig[]> {
+	const response = await fetchWithRetry(
+		`${BASE_URL_DEEPINFRA}/models`,
+		{
+			headers: {
+				Authorization: `Bearer ${apiKey}`,
+				"Content-Type": "application/json",
+			},
+		},
+		3,
+		1000,
+		DEFAULT_FETCH_TIMEOUT_MS,
+	);
+	if (!response.ok) {
+		throw new Error(
+			`DeepInfra API error: ${response.status} ${response.statusText}`,
+		);
+	}
+	const json = (await response.json()) as { data?: DeepInfraModel[] };
+	const models = json.data ?? [];
+	_logger.info(`[deepinfra] Fetched ${models.length} models`);
+	return models
+		.filter((m) => {
+			const id = m.id.toLowerCase();
+			// Filter out non-chat models
+			if (id.includes("embed")) return false;
+			if (id.includes("rerank")) return false;
+			if (id.includes("whisper")) return false;
+			if (id.includes("speech")) return false;
+			return true;
+		})
+		.map((m): ProviderModelConfig => {
+			const meta = m.metadata;
+			const name = m.id.split("/").pop() || m.id;
+			// Reasoning: check tags first, fall back to name heuristic
+			const reasoning =
+				meta?.tags?.includes("reasoning") ??
+				isLikelyReasoningModel({ id: m.id, name });
+			// Pricing is per-MILLION tokens. Divide to get per-token (Pi convention).
+			const inputCost = (meta?.pricing?.input_tokens ?? 0.3) / 1_000_000;
+			const outputCost = (meta?.pricing?.output_tokens ?? 0.9) / 1_000_000;
+			return {
+				id: m.id,
+				name,
+				reasoning,
+				input: ["text"],
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: 0,
+					cacheWrite: 0,
+				},
+				contextWindow: meta?.context_length ?? 128_000,
+				maxTokens: meta?.max_tokens ?? 16_384,
+				compat: getProxyModelCompat({ id: m.id, name }),
+			};
+		});
+}
 // =============================================================================
 // Extension Entry Point
 // =============================================================================
@@ -52,16 +154,11 @@ export default async function deepinfraProvider(pi: ExtensionAPI) {
 		return;
 	}
-	// Fetch models via shared OpenAI-compatible helper
-	const allModels = await fetchOpenAICompatibleModels(
-		"deepinfra",
-		BASE_URL_DEEPINFRA,
-		apiKey,
-		{ cost: { input: 0.3, output: 0.9 } },
-	);
+	// Fetch models
+	const allModels = await fetchDeepinfraModels(apiKey);
 	if (allModels.length === 0) {
-		_logger.warn("[deepinfra] No models available");
+		_logger.warn("[deepinfra] No chat models available");
 		return;
 	}
@@ -72,7 +169,7 @@ export default async function deepinfraProvider(pi: ExtensionAPI) {
 	const stored = { free: freeModels, all: allModels };
 	_logger.info(
-		`[deepinfra] Registered ${allModels.length} models (trial credit, 0 free)`,
+		`[deepinfra] Registered ${allModels.length} chat models (trial credit, 0 free)`,
 	);
 	// Create re-register function