npm - @hsupu/copilot-api - Versions diffs - 0.7.10 → 0.7.11 - Mend

@hsupu/copilot-api 0.7.10 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/main.js CHANGED Viewed

@@ -17,6 +17,7 @@ import pc from "picocolors";
 import { Hono } from "hono";
 import { cors } from "hono/cors";
 import { streamSSE } from "hono/streaming";
+import { countTokens } from "@anthropic-ai/tokenizer";
 import { events } from "fetch-event-stream";
 //#region src/lib/paths.ts
@@ -46,7 +47,9 @@ const state = {
 	accountType: "individual",
 	manualApprove: false,
 	showToken: false,
-	autoCompact: true
+	verbose: false,
+	autoTruncate: true,
+	directAnthropicApi: true
 };
 //#endregion
@@ -480,9 +483,23 @@ async function checkTokenExists() {
 		return false;
 	}
 }
-async function getDebugInfo() {
+async function getAccountInfo() {
+	try {
+		await ensurePaths();
+		await setupGitHubToken();
+		if (!state.githubToken) return null;
+		const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
+		return {
+			user,
+			copilot
+		};
+	} catch {
+		return null;
+	}
+}
+async function getDebugInfo(includeAccount) {
 	const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
-	return {
+	const info = {
 		version: version$1,
 		runtime: getRuntimeInfo(),
 		paths: {
@@ -491,9 +508,14 @@ async function getDebugInfo() {
 		},
 		tokenExists
 	};
+	if (includeAccount && tokenExists) {
+		const account = await getAccountInfo();
+		if (account) info.account = account;
+	}
+	return info;
 }
 function printDebugInfoPlain(info) {
-	consola.info(`copilot-api debug
+	let output = `copilot-api debug
 Version: ${info.version}
 Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
@@ -502,19 +524,24 @@ Paths:
 - APP_DIR: ${info.paths.APP_DIR}
 - GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
-Token exists: ${info.tokenExists ? "Yes" : "No"}`);
+Token exists: ${info.tokenExists ? "Yes" : "No"}`;
+	if (info.account) output += `
+Account Info:
+${JSON.stringify(info.account, null, 2)}`;
+	consola.info(output);
 }
 function printDebugInfoJson(info) {
 	console.log(JSON.stringify(info, null, 2));
 }
 async function runDebug(options) {
-	const debugInfo = await getDebugInfo();
-	if (options.json) printDebugInfoJson(debugInfo);
-	else printDebugInfoPlain(debugInfo);
+	const debugInfo$1 = await getDebugInfo(true);
+	if (options.json) printDebugInfoJson(debugInfo$1);
+	else printDebugInfoPlain(debugInfo$1);
 }
-const debug = defineCommand({
+const debugInfo = defineCommand({
 	meta: {
-		name: "debug",
+		name: "info",
 		description: "Print debug information about the application"
 	},
 	args: { json: {
@@ -526,6 +553,48 @@ const debug = defineCommand({
 		return runDebug({ json: args.json });
 	}
 });
+const debugModels = defineCommand({
+	meta: {
+		name: "models",
+		description: "Fetch and display raw model data from Copilot API"
+	},
+	args: {
+		"account-type": {
+			type: "string",
+			alias: "a",
+			default: "individual",
+			description: "The type of GitHub account (individual, business, enterprise)"
+		},
+		"github-token": {
+			type: "string",
+			alias: "g",
+			description: "GitHub token to use (skips interactive auth)"
+		}
+	},
+	async run({ args }) {
+		state.accountType = args["account-type"];
+		await ensurePaths();
+		if (args["github-token"]) {
+			state.githubToken = args["github-token"];
+			consola.info("Using provided GitHub token");
+		} else await setupGitHubToken();
+		const { token } = await getCopilotToken();
+		state.copilotToken = token;
+		consola.info("Fetching models from Copilot API...");
+		const models = await getModels();
+		console.log(JSON.stringify(models, null, 2));
+	}
+});
+const debug = defineCommand({
+	meta: {
+		name: "debug",
+		description: "Debug commands for troubleshooting"
+	},
+	subCommands: {
+		info: debugInfo,
+		models: debugModels
+	}
+});
 //#endregion
 //#region src/logout.ts
@@ -872,7 +941,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
 var name = "@hsupu/copilot-api";
-var version = "0.7.10";
+var version = "0.7.11";
 var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
 var keywords = [
 	"proxy",
@@ -905,6 +974,7 @@ var scripts = {
 var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
 var lint_staged = { "*": "bun run lint --fix" };
 var dependencies = {
+	"@anthropic-ai/tokenizer": "^0.0.4",
 	"citty": "^0.1.6",
 	"clipboardy": "^5.0.0",
 	"consola": "^3.4.2",
@@ -2206,9 +2276,61 @@ const numTokensForTools = (tools, encoder, constants) => {
 	return funcTokenCount;
 };
 /**
-* Calculate the token count of messages, supporting multiple GPT encoders
+* Check if a model is an Anthropic model
+*/
+function isAnthropicModel(model) {
+	return model.vendor === "Anthropic";
+}
+/**
+* Convert a message to plain text for Anthropic tokenizer
+*/
+function messageToText(message) {
+	const parts = [];
+	parts.push(`${message.role}:`);
+	if (typeof message.content === "string") parts.push(message.content);
+	else if (Array.isArray(message.content)) {
+		for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
+		else if (part.type === "image_url") parts.push("[image]");
+	}
+	if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
+	if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
+	return parts.join("\n");
+}
+/**
+* Convert tools to text for Anthropic tokenizer
+*/
+function toolsToText(tools) {
+	return tools.map((tool) => JSON.stringify(tool)).join("\n");
+}
+/**
+* Calculate token count using Anthropic's official tokenizer
+*/
+function getAnthropicTokenCount(payload) {
+	const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
+	const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
+	const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
+	const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
+	let inputTokens = countTokens(inputText);
+	let outputTokens = countTokens(outputText);
+	if (payload.tools && payload.tools.length > 0) {
+		const toolsText = toolsToText(payload.tools);
+		inputTokens += countTokens(toolsText);
+	}
+	inputTokens += inputMessages.length * 3;
+	outputTokens += outputMessages.length * 3;
+	inputTokens += 3;
+	return {
+		input: inputTokens,
+		output: outputTokens
+	};
+}
+/**
+* Calculate the token count of messages.
+* Uses Anthropic's official tokenizer for Anthropic models,
+* and GPT tokenizers for other models.
 */
 const getTokenCount = async (payload, model) => {
+	if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
 	const tokenizer = getTokenizerFromModel(model);
 	const encoder = await getEncodeChatFunction(tokenizer);
 	const simplifiedMessages = payload.messages;
@@ -2225,10 +2347,10 @@ const getTokenCount = async (payload, model) => {
 };
 //#endregion
-//#region src/lib/auto-compact.ts
+//#region src/lib/auto-truncate.ts
 const DEFAULT_CONFIG = {
 	safetyMarginPercent: 2,
-	maxRequestBodyBytes: 500 * 1024
+	maxRequestBodyBytes: 510 * 1024
 };
 /** Dynamic byte limit that adjusts based on 413 errors */
 let dynamicByteLimit = null;
@@ -2238,7 +2360,7 @@ let dynamicByteLimit = null;
 function onRequestTooLarge(failingBytes) {
 	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
 	dynamicByteLimit = newLimit;
-	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
+	consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
 }
 function calculateLimits(model, config) {
 	const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
@@ -2294,14 +2416,14 @@ function filterOrphanedToolResults(messages) {
 		}
 		return true;
 	});
-	if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
+	if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
 	return filtered;
 }
 /** Ensure messages start with a user message */
 function ensureStartsWithUser(messages) {
 	let startIndex = 0;
 	while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
-	if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
+	if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
 	return messages.slice(startIndex);
 }
 /**
@@ -2367,10 +2489,10 @@ function createTruncationMarker(removedCount) {
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds limits.
+* Perform auto-truncation on a payload that exceeds limits.
 * Uses binary search to find the optimal truncation point.
 */
-async function autoCompact(payload, model, config = {}) {
+async function autoTruncate(payload, model, config = {}) {
 	const cfg = {
 		...DEFAULT_CONFIG,
 		...config
@@ -2391,13 +2513,13 @@ async function autoCompact(payload, model, config = {}) {
 	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
 	else if (exceedsBytes) reason = "size";
 	else reason = "tokens";
-	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
+	consola.info(`[AutoTruncate] Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
 	const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
 	const messagesJson = JSON.stringify(payload.messages);
 	const payloadOverhead = originalBytes - messagesJson.length;
 	const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
 	const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
-	consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
+	consola.debug(`[AutoTruncate] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
 	const preserveIndex = findOptimalPreserveIndex({
 		messages: conversationMessages,
 		systemBytes,
@@ -2407,7 +2529,7 @@ async function autoCompact(payload, model, config = {}) {
 		byteLimit
 	});
 	if (preserveIndex === 0) {
-		consola.warn("Auto-compact: Cannot truncate, system messages too large");
+		consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2417,7 +2539,7 @@ async function autoCompact(payload, model, config = {}) {
 		};
 	}
 	if (preserveIndex >= conversationMessages.length) {
-		consola.warn("Auto-compact: Would need to remove all messages");
+		consola.warn("[AutoTruncate] Would need to remove all messages");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2431,7 +2553,7 @@ async function autoCompact(payload, model, config = {}) {
 	preserved = ensureStartsWithUser(preserved);
 	preserved = filterOrphanedToolResults(preserved);
 	if (preserved.length === 0) {
-		consola.warn("Auto-compact: All messages filtered out after cleanup");
+		consola.warn("[AutoTruncate] All messages filtered out after cleanup");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2452,8 +2574,8 @@ async function autoCompact(payload, model, config = {}) {
 	};
 	const newBytes = JSON.stringify(newPayload).length;
 	const newTokenCount = await getTokenCount(newPayload, model);
-	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
-	if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
+	consola.info(`[AutoTruncate] ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
+	if (newBytes > byteLimit) consola.warn(`[AutoTruncate] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
 	return {
 		payload: newPayload,
 		wasCompacted: true,
@@ -2463,13 +2585,13 @@ async function autoCompact(payload, model, config = {}) {
 	};
 }
 /**
-* Create a marker to prepend to responses indicating auto-compaction occurred.
+* Create a marker to prepend to responses indicating auto-truncation occurred.
 */
-function createCompactionMarker(result) {
+function createTruncationResponseMarker(result) {
 	if (!result.wasCompacted) return "";
 	const reduction = result.originalTokens - result.compactedTokens;
 	const percentage = Math.round(reduction / result.originalTokens * 100);
-	return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
+	return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
 }
 //#endregion
@@ -2557,37 +2679,37 @@ function recordStreamError(opts) {
 function isNonStreaming(response) {
 	return Object.hasOwn(response, "choices");
 }
-/** Build final payload with auto-compact if needed */
+/** Build final payload with auto-truncate if needed */
 async function buildFinalPayload(payload, model) {
-	if (!state.autoCompact || !model) {
-		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
+	if (!state.autoTruncate || !model) {
+		if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
 		return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 	}
 	try {
 		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
+		consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
 		if (!check.needed) return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 		let reasonText;
 		if (check.reason === "both") reasonText = "tokens and size";
 		else if (check.reason === "bytes") reasonText = "size";
 		else reasonText = "tokens";
-		consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
-		const compactResult = await autoCompact(payload, model);
+		consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
+		const truncateResult = await autoTruncate(payload, model);
 		return {
-			finalPayload: compactResult.payload,
-			compactResult
+			finalPayload: truncateResult.payload,
+			truncateResult
 		};
 	} catch (error) {
-		consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
+		consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
 		return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 	}
 }
@@ -2631,7 +2753,7 @@ async function logPayloadSizeInfo(payload, model) {
 	if (largeMessages > 0) consola.info(`  Large messages (>50KB): ${largeMessages}`);
 	consola.info("");
 	consola.info("  Suggestions:");
-	if (!state.autoCompact) consola.info("    • Enable --auto-compact to automatically truncate history");
+	if (!state.autoTruncate) consola.info("    • Enable --auto-truncate to automatically truncate history");
 	if (imageCount > 0) consola.info("    • Remove or resize large images in the conversation");
 	consola.info("    • Start a new conversation with /clear or /reset");
 	consola.info("    • Reduce conversation history by deleting old messages");
@@ -2663,8 +2785,8 @@ async function handleCompletion$1(c) {
 	};
 	const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
 	await logTokenCount(originalPayload, selectedModel);
-	const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
-	if (compactResult) ctx.compactResult = compactResult;
+	const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
+	if (truncateResult) ctx.truncateResult = truncateResult;
 	const payload = isNullish(finalPayload.max_tokens) ? {
 		...finalPayload,
 		max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
@@ -2717,8 +2839,8 @@ async function logTokenCount(payload, selectedModel) {
 function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 	consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
 	let response = originalResponse;
-	if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
-		const marker = createCompactionMarker(ctx.compactResult);
+	if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
+		const marker = createTruncationResponseMarker(ctx.truncateResult);
 		response = {
 			...response,
 			choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2786,8 +2908,8 @@ async function handleStreamingResponse$1(opts) {
 	const { stream, response, payload, ctx } = opts;
 	const acc = createStreamAccumulator();
 	try {
-		if (ctx.compactResult?.wasCompacted) {
-			const marker = createCompactionMarker(ctx.compactResult);
+		if (state.verbose && ctx.truncateResult?.wasCompacted) {
+			const marker = createTruncationResponseMarker(ctx.truncateResult);
 			const markerChunk = {
 				id: `compact-marker-${Date.now()}`,
 				object: "chat.completion.chunk",
@@ -4160,7 +4282,7 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
 	const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
 	return [...systemMessages, ...otherMessages];
 }
-const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
+const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
 /**
 * Filter out reserved keywords from system prompt text.
 * Copilot API rejects requests containing these keywords.
@@ -4385,7 +4507,10 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
 //#endregion
 //#region src/routes/messages/count-tokens-handler.ts
 /**
-* Handles token counting for Anthropic messages
+* Handles token counting for Anthropic messages.
+*
+* For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
+* For other models, uses GPT tokenizers with appropriate buffers.
 */
 async function handleCountTokens(c) {
 	try {
@@ -4397,6 +4522,7 @@ async function handleCountTokens(c) {
 			consola.warn("Model not found, returning default token count");
 			return c.json({ input_tokens: 1 });
 		}
+		const isAnthropicModel$1 = selectedModel.vendor === "Anthropic";
 		const tokenCount = await getTokenCount(openAIPayload, selectedModel);
 		if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
 			let mcpToolExist = false;
@@ -4407,9 +4533,8 @@ async function handleCountTokens(c) {
 			}
 		}
 		let finalTokenCount = tokenCount.input + tokenCount.output;
-		if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
-		else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
-		consola.debug("Token count:", finalTokenCount);
+		if (!isAnthropicModel$1) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
+		consola.debug(`Token count: ${finalTokenCount} (${isAnthropicModel$1 ? "Anthropic tokenizer" : "GPT tokenizer"})`);
 		return c.json({ input_tokens: finalTokenCount });
 	} catch (error) {
 		consola.error("Error counting tokens:", error);
@@ -4417,6 +4542,101 @@ async function handleCountTokens(c) {
 	}
 }
+//#endregion
+//#region src/services/copilot/create-anthropic-messages.ts
+/**
+* Fields that are supported by Copilot's Anthropic API endpoint.
+* Any other fields in the incoming request will be stripped.
+*/
+const COPILOT_SUPPORTED_FIELDS = new Set([
+	"model",
+	"messages",
+	"max_tokens",
+	"system",
+	"metadata",
+	"stop_sequences",
+	"stream",
+	"temperature",
+	"top_p",
+	"top_k",
+	"tools",
+	"tool_choice",
+	"thinking",
+	"service_tier"
+]);
+/**
+* Filter payload to only include fields supported by Copilot's Anthropic API.
+* This prevents errors like "Extra inputs are not permitted" for unsupported
+* fields like `output_config`.
+*/
+function filterPayloadForCopilot(payload) {
+	const filtered = {};
+	const unsupportedFields = [];
+	for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
+	else unsupportedFields.push(key);
+	if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
+	return filtered;
+}
+/**
+* Adjust max_tokens if thinking is enabled.
+* According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
+* max_tokens = thinking_budget + response_tokens
+*/
+function adjustMaxTokensForThinking(payload) {
+	const thinking = payload.thinking;
+	if (!thinking) return payload;
+	const budgetTokens = thinking.budget_tokens;
+	if (!budgetTokens) return payload;
+	if (payload.max_tokens <= budgetTokens) {
+		const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
+		consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
+		return {
+			...payload,
+			max_tokens: newMaxTokens
+		};
+	}
+	return payload;
+}
+/**
+* Create messages using Anthropic-style API directly.
+* This bypasses the OpenAI translation layer for Anthropic models.
+*/
+async function createAnthropicMessages(payload) {
+	if (!state.copilotToken) throw new Error("Copilot token not found");
+	let filteredPayload = filterPayloadForCopilot(payload);
+	filteredPayload = adjustMaxTokensForThinking(filteredPayload);
+	const enableVision = filteredPayload.messages.some((msg) => {
+		if (typeof msg.content === "string") return false;
+		return msg.content.some((block) => block.type === "image");
+	});
+	const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
+	const headers = {
+		...copilotHeaders(state, enableVision),
+		"X-Initiator": isAgentCall ? "agent" : "user",
+		"anthropic-version": "2023-06-01"
+	};
+	consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
+	const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
+		method: "POST",
+		headers,
+		body: JSON.stringify(filteredPayload)
+	});
+	if (!response.ok) {
+		consola.error("Failed to create Anthropic messages", response);
+		throw await HTTPError.fromResponse("Failed to create Anthropic messages", response);
+	}
+	if (payload.stream) return events(response);
+	return await response.json();
+}
+/**
+* Check if a model supports direct Anthropic API.
+* Returns true if direct Anthropic API is enabled and the model is from Anthropic vendor.
+*/
+function supportsDirectAnthropicApi(modelId) {
+	if (!state.directAnthropicApi) return false;
+	return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
+}
 //#endregion
 //#region src/routes/messages/stream-translation.ts
 function isToolBlockOpen(state$1) {
@@ -4579,11 +4799,128 @@ async function handleCompletion(c) {
 		trackingId,
 		startTime
 	};
+	if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
+	return handleTranslatedCompletion(c, anthropicPayload, ctx);
+}
+/**
+* Handle completion using direct Anthropic API (no translation needed)
+*/
+async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
+	consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
+	if (state.manualApprove) await awaitApproval();
+	try {
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(anthropicPayload));
+		ctx.queueWaitMs = queueWaitMs;
+		if (Symbol.asyncIterator in response) {
+			consola.debug("Streaming response from Copilot (direct Anthropic)");
+			updateTrackerStatus(ctx.trackingId, "streaming");
+			return streamSSE(c, async (stream) => {
+				await handleDirectAnthropicStreamingResponse({
+					stream,
+					response,
+					anthropicPayload,
+					ctx
+				});
+			});
+		}
+		return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
+	} catch (error) {
+		recordErrorResponse(ctx, anthropicPayload.model, error);
+		throw error;
+	}
+}
+/**
+* Handle non-streaming direct Anthropic response
+*/
+function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
+	consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: response.model,
+		usage: response.usage,
+		stop_reason: response.stop_reason ?? void 0,
+		content: {
+			role: "assistant",
+			content: response.content.map((block) => {
+				switch (block.type) {
+					case "text": return {
+						type: "text",
+						text: block.text
+					};
+					case "tool_use": return {
+						type: "tool_use",
+						id: block.id,
+						name: block.name,
+						input: JSON.stringify(block.input)
+					};
+					case "thinking": return {
+						type: "thinking",
+						thinking: block.thinking
+					};
+					default: return { type: block.type };
+				}
+			})
+		},
+		toolCalls: extractToolCallsFromAnthropicContent(response.content)
+	}, Date.now() - ctx.startTime);
+	if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
+		inputTokens: response.usage.input_tokens,
+		outputTokens: response.usage.output_tokens,
+		queueWaitMs: ctx.queueWaitMs
+	});
+	return c.json(response);
+}
+/**
+* Handle streaming direct Anthropic response (passthrough SSE events)
+*/
+async function handleDirectAnthropicStreamingResponse(opts) {
+	const { stream, response, anthropicPayload, ctx } = opts;
+	const acc = createAnthropicStreamAccumulator();
+	try {
+		for await (const rawEvent of response) {
+			consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
+			if (rawEvent.data === "[DONE]") break;
+			if (!rawEvent.data) continue;
+			let event;
+			try {
+				event = JSON.parse(rawEvent.data);
+			} catch (parseError) {
+				consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
+				continue;
+			}
+			processAnthropicEvent(event, acc);
+			await stream.writeSSE({
+				event: rawEvent.event || event.type,
+				data: rawEvent.data
+			});
+		}
+		recordStreamingResponse(acc, anthropicPayload.model, ctx);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
+	} catch (error) {
+		consola.error("Direct Anthropic stream error:", error);
+		recordStreamError({
+			acc,
+			fallbackModel: anthropicPayload.model,
+			ctx,
+			error
+		});
+		failTracking(ctx.trackingId, error);
+		const errorEvent = translateErrorToAnthropicErrorEvent();
+		await stream.writeSSE({
+			event: errorEvent.type,
+			data: JSON.stringify(errorEvent)
+		});
+	}
+}
+/**
+* Handle completion using OpenAI translation path (legacy)
+*/
+async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
 	const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
 	consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
 	const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
-	const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
-	if (compactResult) ctx.compactResult = compactResult;
+	const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
+	if (truncateResult) ctx.truncateResult = truncateResult;
 	if (state.manualApprove) await awaitApproval();
 	try {
 		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
@@ -4595,7 +4932,7 @@ async function handleCompletion(c) {
 			ctx
 		});
 		consola.debug("Streaming response from Copilot");
-		updateTrackerStatus(trackingId, "streaming");
+		updateTrackerStatus(ctx.trackingId, "streaming");
 		return streamSSE(c, async (stream) => {
 			await handleStreamingResponse({
 				stream,
@@ -4616,8 +4953,8 @@ function handleNonStreamingResponse(opts) {
 	consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
 	let anthropicResponse = translateToAnthropic(response, toolNameMapping);
 	consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
-	if (ctx.compactResult?.wasCompacted) {
-		const marker = createCompactionMarker(ctx.compactResult);
+	if (state.verbose && ctx.truncateResult?.wasCompacted) {
+		const marker = createTruncationResponseMarker(ctx.truncateResult);
 		anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
 	}
 	recordResponse(ctx.historyId, {
@@ -4689,9 +5026,9 @@ async function handleStreamingResponse(opts) {
 	};
 	const acc = createAnthropicStreamAccumulator();
 	try {
-		if (ctx.compactResult?.wasCompacted) {
-			const marker = createCompactionMarker(ctx.compactResult);
-			await sendCompactionMarkerEvent(stream, streamState, marker);
+		if (ctx.truncateResult?.wasCompacted) {
+			const marker = createTruncationResponseMarker(ctx.truncateResult);
+			await sendTruncationMarkerEvent(stream, streamState, marker);
 			acc.content += marker;
 		}
 		await processStreamChunks({
@@ -4719,7 +5056,7 @@ async function handleStreamingResponse(opts) {
 		});
 	}
 }
-async function sendCompactionMarkerEvent(stream, streamState, marker) {
+async function sendTruncationMarkerEvent(stream, streamState, marker) {
 	const blockStartEvent = {
 		type: "content_block_start",
 		index: streamState.contentBlockIndex,
@@ -4892,6 +5229,15 @@ function extractToolCallsFromContent(content) {
 	});
 	return tools.length > 0 ? tools : void 0;
 }
+function extractToolCallsFromAnthropicContent(content) {
+	const tools = [];
+	for (const block of content) if (block.type === "tool_use") tools.push({
+		id: block.id,
+		name: block.name,
+		input: JSON.stringify(block.input)
+	});
+	return tools.length > 0 ? tools : void 0;
+}
 //#endregion
 //#region src/routes/messages/route.ts
@@ -5018,12 +5364,14 @@ async function runServer(options) {
 	if (options.verbose) {
 		consola.level = 5;
 		consola.info("Verbose logging enabled");
+		state.verbose = true;
 	}
 	state.accountType = options.accountType;
 	if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
 	state.manualApprove = options.manual;
 	state.showToken = options.showToken;
-	state.autoCompact = options.autoCompact;
+	state.autoTruncate = options.autoTruncate;
+	state.directAnthropicApi = options.directAnthropicApi;
 	if (options.rateLimit) initAdaptiveRateLimiter({
 		baseRetryIntervalSeconds: options.retryInterval,
 		requestIntervalSeconds: options.requestInterval,
@@ -5031,7 +5379,8 @@ async function runServer(options) {
 		consecutiveSuccessesForRecovery: options.consecutiveSuccesses
 	});
 	else consola.info("Rate limiting disabled");
-	if (!options.autoCompact) consola.info("Auto-compact disabled");
+	if (!options.autoTruncate) consola.info("Auto-truncate disabled");
+	if (!options.directAnthropicApi) consola.info("Direct Anthropic API disabled (using OpenAI translation)");
 	initHistory(options.history, options.historyLimit);
 	if (options.history) {
 		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5173,10 +5522,15 @@ const start = defineCommand({
 			default: "1000",
 			description: "Maximum number of history entries to keep in memory (0 = unlimited)"
 		},
-		"no-auto-compact": {
+		"no-auto-truncate": {
+			type: "boolean",
+			default: false,
+			description: "Disable automatic conversation history truncation when exceeding limits"
+		},
+		"no-direct-anthropic": {
 			type: "boolean",
 			default: false,
-			description: "Disable automatic conversation history compression when exceeding limits"
+			description: "Disable direct Anthropic API for Anthropic models (use OpenAI translation instead)"
 		}
 	},
 	run({ args }) {
@@ -5197,7 +5551,8 @@ const start = defineCommand({
 			proxyEnv: args["proxy-env"],
 			history: !args["no-history"],
 			historyLimit: Number.parseInt(args["history-limit"], 10),
-			autoCompact: !args["no-auto-compact"]
+			autoTruncate: !args["no-auto-truncate"],
+			directAnthropicApi: !args["no-direct-anthropic"]
 		});
 	}
 });