npm - pi-free - Versions diffs - 2.0.9 → 2.0.11 - Mend

pi-free 2.0.9 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +576 -544
package/README.md +16 -0
package/banner.svg +12 -10
package/config.ts +86 -20
package/constants.ts +3 -0
package/index.ts +3 -0
package/lib/util.ts +72 -8
package/package.json +1 -1
package/providers/crofai/crofai.ts +106 -15
package/providers/deepinfra/deepinfra.ts +108 -11
package/providers/ollama/ollama.ts +400 -85
package/providers/ollama/thinking-levels.ts +96 -0
package/providers/together/together.ts +197 -0
package/providers/zenmux/zenmux.ts +32 -17

package/providers/ollama/thinking-levels.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Thinking level mapping for Ollama Cloud models.
+ *
+ * Maps Pi's thinking levels to Ollama Cloud's OpenAI-compatible
+ * `reasoning_effort` values. The API accepts "none", "low", "medium",
+ * "high", and "max". On simple prompts, "max" can be a no-op over
+ * "high", but on harder prompts it can increase thinking substantially
+ * (e.g. deepseek-v4-pro: ~32k tokens on high vs ~55k on max).
+ *
+ * A `null` value means the level is hidden in Pi's UI.
+ *
+ * Model-specific behavior discovered through testing
+ * (see https://github.com/fgrehm/pi-ollama-cloud/blob/main/docs/think-experiment.md):
+ *   - Most models: all levels work, "none" disables thinking
+ *   - GPT-OSS: no off mode, only low/medium/high
+ *   - Qwen 3.x (non-VL): binary-only (think/nothink) - off works
+ *   - Qwen 3 VL: "none" doesn't disable thinking - off is hidden
+ *   - Kimi K2 Thinking: "none" doesn't disable thinking - off is hidden
+ *   - MiniMax M2.x: "none" doesn't disable thinking - off is hidden
+ *
+ * Reference: https://docs.ollama.com/api/openai-compatibility
+ */
+import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
+export type ThinkingLevelMap = NonNullable<
+	ProviderModelConfig["thinkingLevelMap"]
+>;
+/** Default: off/low/medium/high/xhigh with minimal hidden. */
+export const DEFAULT: ThinkingLevelMap = {
+	off: "none",
+	minimal: null,
+	low: "low",
+	medium: "medium",
+	high: "high",
+	xhigh: "max",
+};
+/**
+ * GPT-OSS: can't disable thinking, only low/medium/high.
+ * https://ollama.com/library/gpt-oss
+ */
+export const GPT_OSS: ThinkingLevelMap = {
+	off: null,
+	minimal: null,
+	low: "low",
+	medium: "medium",
+	high: "high",
+	xhigh: null,
+};
+/**
+ * Qwen 3.x: binary-only (think/nothink), no gradation.
+ * https://docs.ollama.com/capabilities/thinking
+ */
+export const QWEN3: ThinkingLevelMap = {
+	off: "none",
+	minimal: null,
+	low: null,
+	medium: "medium",
+	high: null,
+	xhigh: null,
+};
+/**
+ * "none" doesn't disable thinking - off is hidden.
+ * Used by kimi-k2-thinking, minimax family, qwen3-vl.
+ */
+export const NO_OFF: ThinkingLevelMap = {
+	off: null,
+	minimal: null,
+	low: "low",
+	medium: "medium",
+	high: "high",
+	xhigh: "max",
+};
+/**
+ * Resolve the thinking level map for a model.
+ * Matches by model ID prefix (case-sensitive, checks first chars).
+ */
+export function resolveThinkingMap(
+	id: string,
+	capabilities: string[],
+): ThinkingLevelMap | undefined {
+	if (!capabilities.includes("thinking")) return undefined;
+	if (id.startsWith("gpt-oss")) return GPT_OSS;
+	if (id.startsWith("qwen3-vl")) return NO_OFF;
+	if (id.startsWith("qwen3")) return QWEN3;
+	if (id === "kimi-k2-thinking") return NO_OFF;
+	if (id.startsWith("minimax")) return NO_OFF;
+	return DEFAULT;
+}

package/providers/together/together.ts ADDED Viewed

@@ -0,0 +1,197 @@
+/**
+ * Together AI Provider Extension
+ *
+ * Together AI provides fast inference on 200+ open-source models through an
+ * OpenAI-compatible API. Known for Llama, DeepSeek, Qwen, Mixtral, and other
+ * popular models at competitive per-token pricing.
+ *
+ * Free tier:
+ *   - $1 one-time credit on signup (no credit card)
+ *   - 60 RPM, 600 RPD (varies by model)
+ *   - Sign up at https://api.together.ai/
+ *
+ * Paid: pay-per-token after credits exhaust
+ *
+ * NOTE: Together AI's /v1/models returns a plain array (not { data: [...] }),
+ * uses per-million-token pricing (not per-token), and includes a "type" field
+ * we use to filter to chat models only.
+ *
+ * Endpoint:
+ *   Chat: https://api.together.xyz/v1/chat/completions
+ *
+ * Setup:
+ *   1. Sign up at https://api.together.ai/
+ *   2. Get API key from https://api.together.ai/settings/api-keys
+ *   3. Set TOGETHER_AI_API_KEY env var (or add to ~/.pi/free.json)
+ *
+ * Usage:
+ *   pi install git:github.com/apmantza/pi-free
+ *   # Set TOGETHER_AI_API_KEY env var
+ *   # Models appear in /model selector as "together/deepseek-ai/..."
+ */
+import type {
+	ExtensionAPI,
+	ProviderModelConfig,
+} from "@earendil-works/pi-coding-agent";
+import { getTogetherApiKey, getTogetherShowPaid } from "../../config.ts";
+import {
+	BASE_URL_TOGETHER,
+	DEFAULT_FETCH_TIMEOUT_MS,
+	PROVIDER_TOGETHER,
+} from "../../constants.ts";
+import { createLogger } from "../../lib/logger.ts";
+import {
+	getProxyModelCompat,
+	isLikelyReasoningModel,
+} from "../../lib/provider-compat.ts";
+import { registerWithGlobalToggle } from "../../lib/registry.ts";
+import { fetchWithRetry } from "../../lib/util.ts";
+import { createReRegister, setupProvider } from "../../provider-helper.ts";
+const _logger = createLogger("together");
+// =============================================================================
+// Types
+// =============================================================================
+interface TogetherModel {
+	id: string;
+	display_name?: string;
+	type?: string;
+	context_length?: number;
+	pricing?: {
+		input?: number;
+		output?: number;
+		cached_input?: number;
+	};
+}
+// =============================================================================
+// Fetch
+// =============================================================================
+async function fetchTogetherModels(
+	apiKey: string,
+): Promise<ProviderModelConfig[]> {
+	const response = await fetchWithRetry(
+		`${BASE_URL_TOGETHER}/models`,
+		{
+			headers: {
+				Authorization: `Bearer ${apiKey}`,
+				"Content-Type": "application/json",
+			},
+		},
+		3,
+		1000,
+		DEFAULT_FETCH_TIMEOUT_MS,
+	);
+	if (!response.ok) {
+		throw new Error(
+			`Together AI API error: ${response.status} ${response.statusText}`,
+		);
+	}
+	// Together AI returns a plain array (not { data: [...] })
+	const models = (await response.json()) as TogetherModel[];
+	_logger.info(`[together] Fetched ${models.length} models`);
+	return models
+		.filter((m) => m.type === "chat" && m.id && !m.id.includes("embed"))
+		.map((m): ProviderModelConfig => {
+			const name = m.display_name || m.id.split("/").pop() || m.id;
+			// Together AI pricing is per-MILLION tokens.
+			// Divide by 1_000_000 to get per-token cost (Pi convention).
+			const inputCost = (m.pricing?.input ?? 0) / 1_000_000;
+			const outputCost = (m.pricing?.output ?? 0) / 1_000_000;
+			const cacheReadCost = (m.pricing?.cached_input ?? 0) / 1_000_000;
+			return {
+				id: m.id,
+				name,
+				reasoning: isLikelyReasoningModel({ id: m.id, name }),
+				input: ["text"],
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: cacheReadCost,
+					cacheWrite: 0,
+				},
+				contextWindow: m.context_length ?? 128_000,
+				maxTokens: 16_384,
+				compat: getProxyModelCompat({ id: m.id, name }),
+			};
+		});
+}
+// =============================================================================
+// Extension Entry Point
+// =============================================================================
+export default async function togetherProvider(pi: ExtensionAPI) {
+	const apiKey = getTogetherApiKey();
+	if (!apiKey) {
+		_logger.info(
+			"[together] Skipping — TOGETHER_AI_API_KEY not set. Sign up at https://api.together.ai/",
+		);
+		return;
+	}
+	// Fetch models
+	const allModels = await fetchTogetherModels(apiKey);
+	if (allModels.length === 0) {
+		_logger.warn("[together] No chat models available");
+		return;
+	}
+	// Together AI is a pay-per-token provider with $1 trial credit.
+	// Zero-cost models (if any) are marked free; all others are paid.
+	const freeModels = allModels.filter(
+		(m) =>
+			m.cost.input === 0 &&
+			m.cost.output === 0 &&
+			m.cost.cacheRead === 0 &&
+			m.cost.cacheWrite === 0,
+	);
+	const stored = { free: freeModels, all: allModels };
+	_logger.info(
+		`[together] ${allModels.length} chat models (${freeModels.length} free)`,
+	);
+	// Create re-register function
+	const reRegister = createReRegister(pi, {
+		providerId: PROVIDER_TOGETHER,
+		baseUrl: BASE_URL_TOGETHER,
+		apiKey,
+	});
+	// Register with global toggle
+	registerWithGlobalToggle(PROVIDER_TOGETHER, stored, reRegister, true);
+	// Setup provider with toggle command
+	setupProvider(
+		pi,
+		{
+			providerId: PROVIDER_TOGETHER,
+			initialShowPaid: getTogetherShowPaid(),
+			tosUrl: "https://api.together.ai/",
+			reRegister: (models, _stored) => {
+				if (_stored) {
+					stored.free = _stored.free;
+					stored.all = _stored.all;
+				}
+				reRegister(models);
+			},
+		},
+		stored,
+	);
+	// Initial registration — show all models (trial credit provider)
+	reRegister(stored.all);
+}

package/providers/zenmux/zenmux.ts CHANGED Viewed

@@ -27,10 +27,7 @@ import {
 	PROVIDER_ZENMUX,
 } from "../../constants.ts";
 import { createLogger } from "../../lib/logger.ts";
-import {
-	getProxyModelCompat,
-	isLikelyReasoningModel,
-} from "../../lib/provider-compat.ts";
+import { getProxyModelCompat } from "../../lib/provider-compat.ts";
 import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
 import { fetchWithRetry } from "../../lib/util.ts";
 import { createReRegister, setupProvider } from "../../provider-helper.ts";
@@ -43,17 +40,33 @@ const _logger = createLogger("zenmux");
 interface ZenMuxModel {
 	id: string;
-	name?: string;
+	display_name?: string;
 	context_length?: number;
-	pricing?: {
-		prompt?: number;
-		completion?: number;
+	input_modalities?: string[];
+	output_modalities?: string[];
+	capabilities?: {
+		reasoning?: boolean;
+	};
+	pricings?: {
+		prompt?: Array<{ value: number }>;
+		completion?: Array<{ value: number }>;
+		input_cache_read?: Array<{ value: number }>;
 	};
 }
-function isZenmuxReasoningModel(model: Pick<ZenMuxModel, "id" | "name">) {
-	const haystack = `${model.id} ${model.name ?? ""}`.toLowerCase();
-	return isLikelyReasoningModel(model) || haystack.includes("claude");
+/**
+ * Extract the first pricing value from a ZenMux pricings array.
+ * ZenMux uses a structured format: pricings.prompt[0].value (per-million-tokens).
+ * We divide by 1_000_000 to convert to per-token price (Pi's convention).
+ * Returns 0 if pricing is missing or empty.
+ */
+function extractZenmuxPrice(
+	pricings: ZenMuxModel["pricings"],
+	key: "prompt" | "completion" | "input_cache_read",
+): number {
+	const entries = pricings?.[key];
+	if (!entries || entries.length === 0) return 0;
+	return (entries[0].value ?? 0) / 1_000_000;
 }
 async function fetchZenmuxModels(
@@ -87,13 +100,15 @@ async function fetchZenmuxModels(
 		return models.map(
 			(m): ProviderModelConfig => ({
 				id: m.id,
-				name: m.name || m.id,
-				reasoning: isZenmuxReasoningModel(m),
-				input: ["text"],
+				name: m.display_name || m.id,
+				reasoning: m.capabilities?.reasoning ?? false,
+				input: m.input_modalities?.includes("image")
+					? ["text", "image"]
+					: ["text"],
 				cost: {
-					input: m.pricing?.prompt || 0,
-					output: m.pricing?.completion || 0,
-					cacheRead: 0,
+					input: extractZenmuxPrice(m.pricings, "prompt"),
+					output: extractZenmuxPrice(m.pricings, "completion"),
+					cacheRead: extractZenmuxPrice(m.pricings, "input_cache_read"),
 					cacheWrite: 0,
 				},
 				contextWindow: m.context_length || 128000,