npm - pi-free - Versions diffs - 2.2.3 → 2.2.4 - Mend

pi-free 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/CHANGELOG.md +16 -49
package/README.md +41 -532
package/banner.svg +23 -20
package/config.ts +82 -10
package/constants.ts +11 -1
package/index.ts +15 -1
package/lib/model-detection.ts +296 -296
package/lib/model-metadata.ts +10 -3
package/lib/telemetry.ts +36 -44
package/package.json +3 -2
package/provider-failover/benchmark-lookup.ts +30 -15
package/provider-helper.ts +27 -8
package/providers/bai/bai.ts +2 -7
package/providers/cline/cline-xml-bridge.ts +31 -25
package/providers/cline/cline.ts +17 -8
package/providers/kilo/kilo.ts +11 -6
package/providers/model-fetcher.ts +1 -1
package/providers/opencode-session.ts +2 -2
package/providers/openmodel/openmodel.ts +525 -0
package/providers/qoder/auth.ts +548 -0
package/providers/qoder/cosy.ts +236 -0
package/providers/qoder/encoding.ts +48 -0
package/providers/qoder/models.ts +321 -0
package/providers/qoder/qoder.ts +154 -0
package/providers/qoder/stream.ts +677 -0
package/providers/qoder/thinking-parser.ts +251 -0
package/providers/qoder/transform.ts +189 -0
package/providers/tokenrouter/tokenrouter.ts +3 -6

package/providers/qoder/thinking-parser.ts ADDED Viewed

@@ -0,0 +1,251 @@
+/**
+ * Streaming parser for HTML-style thinking tags in LLM responses.
+ *
+ * Some providers (Qoder, DeepSeek via certain gateways) emit reasoning in
+ * HTML-style tags like <thinking>, <think>, <reasoning>, <thought> within
+ * the text stream, rather than via a structured reasoning_content field.
+ *
+ * This parser handles streaming chunks safely — it never emits partial tags
+ * by tracking trailing tag prefixes and deferring output until the boundary
+ * is clear.
+ */
+import type {
+	AssistantMessage,
+	AssistantMessageEventStream,
+	TextContent,
+	ThinkingContent,
+} from "@earendil-works/pi-ai";
+const THINKING_TAG_VARIANTS: Array<{ open: string; close: string }> = [
+	{ open: "<thinking>", close: "</thinking>" },
+	{ open: "<think>", close: "</think>" },
+	{ open: "<reasoning>", close: "</reasoning>" },
+	{ open: "<thought>", close: "</thought>" },
+];
+function getTrailingPossibleTagPrefixLength(text: string, tag: string): number {
+	const maxPrefixLength = Math.min(text.length, tag.length - 1);
+	for (let len = maxPrefixLength; len > 0; len--) {
+		if (text.endsWith(tag.slice(0, len))) return len;
+	}
+	return 0;
+}
+function getMaxTrailingPossibleTagPrefixLength(
+	text: string,
+	tags: string[],
+): number {
+	let maxLength = 0;
+	for (const tag of tags) {
+		maxLength = Math.max(
+			maxLength,
+			getTrailingPossibleTagPrefixLength(text, tag),
+		);
+	}
+	return maxLength;
+}
+/**
+ * Streaming parser that extracts <thinking>/<think>/<reasoning>/<thought> tags
+ * from a text stream and emits them as thinking_start/thinking_delta/thinking_end
+ * events on the Pi event stream.
+ *
+ * Usage:
+ * ```ts
+ * const parser = new ThinkingTagParser(output, stream);
+ * for (const chunk of textChunks) {
+ *   parser.processChunk(chunk);
+ * }
+ * parser.finalize();
+ * ```
+ */
+export class ThinkingTagParser {
+	private textBuffer = "";
+	private inThinking = false;
+	private thinkingBlockIndex: number | null = null;
+	private textBlockIndex: number | null = null;
+	private activeEndTag = "";
+	constructor(
+		private readonly output: AssistantMessage,
+		private readonly stream: AssistantMessageEventStream,
+	) {
+		// Set initial active end tag to the first variant's close
+		this.activeEndTag = THINKING_TAG_VARIANTS[0]!.close;
+	}
+	processChunk(chunk: string): void {
+		this.textBuffer += chunk;
+		while (this.textBuffer.length > 0) {
+			const prevLength = this.textBuffer.length;
+			if (!this.inThinking) {
+				this.processBeforeThinking();
+				if (this.textBuffer.length === 0) break;
+			}
+			if (this.inThinking) {
+				this.processInsideThinking();
+				if (this.textBuffer.length === 0) break;
+			}
+			if (this.textBuffer.length >= prevLength) break;
+		}
+	}
+	finalize(): void {
+		if (this.textBuffer.length === 0) return;
+		if (this.inThinking && this.thinkingBlockIndex !== null) {
+			const block = this.output.content[
+				this.thinkingBlockIndex
+			] as ThinkingContent;
+			block.thinking += this.textBuffer;
+			this.stream.push({
+				type: "thinking_delta",
+				contentIndex: this.thinkingBlockIndex,
+				delta: this.textBuffer,
+				partial: this.output,
+			});
+			this.stream.push({
+				type: "thinking_end",
+				contentIndex: this.thinkingBlockIndex,
+				content: block.thinking,
+				partial: this.output,
+			});
+		} else {
+			this.emitText(this.textBuffer);
+		}
+		this.textBuffer = "";
+	}
+	/** Get the index of the final text block (after thinking, or null if none) */
+	getTextBlockIndex(): number | null {
+		return this.textBlockIndex;
+	}
+	private processBeforeThinking(): void {
+		let bestPos = -1;
+		let bestVariant: (typeof THINKING_TAG_VARIANTS)[number] | null = null;
+		for (const variant of THINKING_TAG_VARIANTS) {
+			const pos = this.textBuffer.indexOf(variant.open);
+			if (pos !== -1 && (bestPos === -1 || pos < bestPos)) {
+				bestPos = pos;
+				bestVariant = variant;
+			}
+		}
+		if (bestPos !== -1 && bestVariant) {
+			if (bestPos > 0) this.emitText(this.textBuffer.slice(0, bestPos));
+			this.textBuffer = this.textBuffer.slice(
+				bestPos + bestVariant.open.length,
+			);
+			this.activeEndTag = bestVariant.close;
+			this.inThinking = true;
+			return;
+		}
+		// No thinking tag found yet, but the buffer might end with a partial tag
+		const trailingPrefixLength = getMaxTrailingPossibleTagPrefixLength(
+			this.textBuffer,
+			THINKING_TAG_VARIANTS.map((variant) => variant.open),
+		);
+		const safeLen = this.textBuffer.length - trailingPrefixLength;
+		if (safeLen > 0) {
+			this.emitText(this.textBuffer.slice(0, safeLen));
+			this.textBuffer = this.textBuffer.slice(safeLen);
+		}
+	}
+	private processInsideThinking(): void {
+		const endPos = this.textBuffer.indexOf(this.activeEndTag);
+		if (endPos !== -1) {
+			if (endPos > 0) this.emitThinking(this.textBuffer.slice(0, endPos));
+			if (this.thinkingBlockIndex !== null) {
+				const block = this.output.content[
+					this.thinkingBlockIndex
+				] as ThinkingContent;
+				this.stream.push({
+					type: "thinking_end",
+					contentIndex: this.thinkingBlockIndex,
+					content: block.thinking,
+					partial: this.output,
+				});
+			}
+			this.textBuffer = this.textBuffer.slice(
+				endPos + this.activeEndTag.length,
+			);
+			this.inThinking = false;
+			this.thinkingBlockIndex = null;
+			this.textBlockIndex = null;
+			if (this.textBuffer.startsWith("\n\n"))
+				this.textBuffer = this.textBuffer.slice(2);
+			return;
+		}
+		// Buffer might end with a partial close tag
+		const trailingPrefixLength = getTrailingPossibleTagPrefixLength(
+			this.textBuffer,
+			this.activeEndTag,
+		);
+		const safeLen = this.textBuffer.length - trailingPrefixLength;
+		if (safeLen > 0) {
+			this.emitThinking(this.textBuffer.slice(0, safeLen));
+			this.textBuffer = this.textBuffer.slice(safeLen);
+		}
+	}
+	private emitText(text: string): void {
+		if (!text) return;
+		if (this.textBlockIndex === null) {
+			this.textBlockIndex = this.output.content.length;
+			this.output.content.push({ type: "text", text: "" } as TextContent);
+			this.stream.push({
+				type: "text_start",
+				contentIndex: this.textBlockIndex,
+				partial: this.output,
+			});
+		}
+		const block = this.output.content[this.textBlockIndex] as TextContent;
+		block.text += text;
+		this.stream.push({
+			type: "text_delta",
+			contentIndex: this.textBlockIndex,
+			delta: text,
+			partial: this.output,
+		});
+	}
+	private emitThinking(thinking: string): void {
+		if (thinking.length === 0) return;
+		if (this.thinkingBlockIndex === null) {
+			if (this.textBlockIndex === null) {
+				this.thinkingBlockIndex = this.output.content.length;
+				this.output.content.push({
+					type: "thinking",
+					thinking: "",
+				} as ThinkingContent);
+			} else {
+				// Insert thinking block before the existing text block
+				this.thinkingBlockIndex = this.textBlockIndex;
+				this.output.content.splice(this.thinkingBlockIndex, 0, {
+					type: "thinking",
+					thinking: "",
+				} as ThinkingContent);
+				this.textBlockIndex = this.textBlockIndex + 1;
+			}
+			this.stream.push({
+				type: "thinking_start",
+				contentIndex: this.thinkingBlockIndex,
+				partial: this.output,
+			});
+		}
+		const block = this.output.content[
+			this.thinkingBlockIndex
+		] as ThinkingContent;
+		block.thinking += thinking;
+		this.stream.push({
+			type: "thinking_delta",
+			contentIndex: this.thinkingBlockIndex,
+			delta: thinking,
+			partial: this.output,
+		});
+	}
+}

package/providers/qoder/transform.ts ADDED Viewed

@@ -0,0 +1,189 @@
+/**
+ * Message format transformation between Pi's internal format and Qoder's
+ * proprietary API format.
+ *
+ * Pi uses a structured message format with typed content blocks (TextContent,
+ * ThinkingContent, ImageContent, ToolCall). Qoder's API expects an
+ * OpenAI-compatible format with some custom extensions.
+ */
+import type {
+	AssistantMessage,
+	ImageContent,
+	Message,
+	TextContent,
+	ThinkingContent,
+	Tool,
+	ToolCall,
+	ToolResultMessage,
+} from "@earendil-works/pi-ai";
+/** OpenAI-style tool definition sent to the Qoder API. */
+interface QoderTool {
+	type: "function";
+	function: {
+		name: string;
+		description?: string;
+		parameters?: unknown;
+	};
+}
+/** OpenAI-style tool call within an assistant message. */
+interface QoderToolCall {
+	id?: string;
+	type: "function";
+	function: { name?: string; arguments: string };
+}
+type QoderTextPart = { type: "text"; text: string };
+type QoderImagePart = { type: "image_url"; image_url: { url: string } };
+type QoderContent = string | Array<QoderTextPart | QoderImagePart>;
+/** OpenAI-style message sent to the Qoder API. */
+interface QoderMessage {
+	role: "user" | "assistant" | "tool";
+	content: QoderContent | null;
+	tool_calls?: QoderToolCall[];
+	tool_call_id?: string;
+}
+/**
+ * Extract text content from a message, joining all text/thinking blocks.
+ */
+export function getContentText(msg: Message): string {
+	if (typeof msg.content === "string") return msg.content;
+	if (Array.isArray(msg.content)) {
+		return msg.content
+			.map((c) => {
+				if (c.type === "text") return (c as TextContent).text;
+				if (c.type === "thinking") return (c as ThinkingContent).thinking;
+				return "";
+			})
+			.join("");
+	}
+	return "";
+}
+/**
+ * Convert Pi's Tool[] to Qoder's tool format.
+ */
+export function transformTools(tools: Tool[]): QoderTool[] {
+	return tools.map((t) => ({
+		type: "function",
+		function: {
+			name: t.name,
+			description: t.description,
+			parameters: t.parameters,
+		},
+	}));
+}
+/**
+ * Convert Pi's internal messages to Qoder's expected format.
+ *
+ * Handles:
+ * - User messages with text and/or image content
+ * - Assistant messages with text, thinking, and tool calls
+ * - Tool result messages
+ * - Skips error/aborted assistant messages
+ */
+export function transformMessagesForQoder(messages: Message[]): QoderMessage[] {
+	const normalizedMessages: QoderMessage[] = [];
+	for (const msg of messages) {
+		if (isSkippableMessage(msg)) continue;
+		if (msg.role === "user") {
+			normalizedMessages.push(transformUserMessage(msg));
+		} else if (msg.role === "assistant") {
+			normalizedMessages.push(
+				transformAssistantMessage(msg as AssistantMessage),
+			);
+		} else if (msg.role === "toolResult") {
+			normalizedMessages.push(
+				transformToolResultMessage(msg as ToolResultMessage),
+			);
+		}
+	}
+	return normalizedMessages;
+}
+function isSkippableMessage(msg: Message): boolean {
+	if (msg.role !== "assistant") return false;
+	const am = msg as AssistantMessage;
+	return am.stopReason === "error" || am.stopReason === "aborted";
+}
+function transformUserMessage(msg: Message): QoderMessage {
+	let content: QoderContent = "";
+	if (typeof msg.content === "string") {
+		content = msg.content;
+	} else if (Array.isArray(msg.content)) {
+		const hasImage = msg.content.some((c) => c.type === "image");
+		if (hasImage) {
+			content = msg.content
+				.map((c): QoderTextPart | QoderImagePart | null => {
+					if (c.type === "text") {
+						return { type: "text", text: (c as TextContent).text };
+					}
+					if (c.type === "image") {
+						const img = c as ImageContent;
+						return {
+							type: "image_url",
+							image_url: { url: `data:${img.mimeType};base64,${img.data}` },
+						};
+					}
+					return null;
+				})
+				.filter((p): p is QoderTextPart | QoderImagePart => p !== null);
+		} else {
+			content = getContentText(msg);
+		}
+	}
+	return { role: "user", content };
+}
+function transformAssistantMessage(am: AssistantMessage): QoderMessage {
+	let content = "";
+	const toolCalls: QoderToolCall[] = [];
+	if (Array.isArray(am.content)) {
+		for (const block of am.content) {
+			if (block.type === "text") {
+				content += (block as TextContent).text;
+			} else if (block.type === "thinking") {
+				content += `<thinking>${(block as ThinkingContent).thinking}</thinking>\n\n`;
+			} else if (block.type === "toolCall") {
+				const tc = block as ToolCall;
+				toolCalls.push({
+					id: tc.id,
+					type: "function",
+					function: {
+						name: tc.name,
+						arguments:
+							typeof tc.arguments === "string"
+								? tc.arguments
+								: JSON.stringify(tc.arguments),
+					},
+				});
+			}
+		}
+	} else {
+		content = am.content || "";
+	}
+	const mapped: QoderMessage = {
+		role: "assistant",
+		content: content || null,
+	};
+	if (toolCalls.length > 0) {
+		mapped.tool_calls = toolCalls;
+	}
+	return mapped;
+}
+function transformToolResultMessage(tr: ToolResultMessage): QoderMessage {
+	return {
+		role: "tool",
+		tool_call_id: tr.toolCallId,
+		content: getContentText(tr),
+	};
+}

package/providers/tokenrouter/tokenrouter.ts CHANGED Viewed

@@ -112,12 +112,9 @@ function isTokenRouterModel(model: { provider?: string }): boolean {
 // =============================================================================
 // Known Free Models
-// TokenRouter doesn't expose pricing via /v1/models, so known-free models
-// are hardcoded. Detected via name suffix also catches `:free`-tagged models.
+// TokenRouter doesn't expose pricing via /v1/models.
+// Known-free detection uses `:free` name suffix for promotional models.
 // =============================================================================
-const MINIMAX_M3_ID = "MiniMax-M3";
-const KNOWN_FREE_MODELS = new Set([MINIMAX_M3_ID]);
 const TOKENROUTER_OPENAI_API = "tokenrouter-openai-completions" as const;
 const TOKENROUTER_HIGH_LOAD_RETRY_DELAY_MS = 30_000;
 const MINIMAX_ADAPTIVE_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
@@ -482,7 +479,7 @@ export function mapTokenRouterModel(
 	const reasoning = isMinimax || isLikelyReasoningModel({ id: model.id, name });
 	const isResponseApi =
 		model.supported_endpoint_types.includes("openai-response");
-	const isKnownFree = KNOWN_FREE_MODELS.has(model.id);
+	const isKnownFree = model.id.toLowerCase().endsWith(":free");
 	return {
 		id: model.id,