npm - @dianshuv/copilot-api - Versions diffs - 0.6.0 → 0.6.2 - Mend

@dianshuv/copilot-api 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -15,6 +15,7 @@
 - **Graceful shutdown**: 4-phase shutdown sequence — stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
 - **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
 - **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
+- **Gemini API compatibility**: `/v1beta/models` endpoints translate Gemini API requests to OpenAI format for Copilot. Enables Google Gemini CLI to use Copilot models via `GOOGLE_GEMINI_BASE_URL` environment variable.
 - **PostHog analytics**: Optional PostHog Cloud integration (`--posthog-key`) sends per-request token usage events for long-term trend analysis. Free tier (1M events/month) is more than sufficient for individual use.
 ## Quick Start
@@ -97,6 +98,14 @@ copilot-api start
 | `/v1/messages/count_tokens` | POST | Token counting |
 | `/v1/event_logging/batch` | POST | Event logging (no-op) |
+### Gemini Compatible
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1beta/models/{model}:generateContent` | POST | Non-streaming generation |
+| `/v1beta/models/{model}:streamGenerateContent` | POST | Streaming generation (SSE) |
+| `/v1beta/models/{model}:countTokens` | POST | Token counting |
 ### Utility
 | Endpoint | Method | Description |
@@ -143,6 +152,23 @@ Or use the interactive setup:
 bun run start --claude-code
 ```
+## Using with Gemini CLI
+```bash
+# Start the proxy
+copilot-api start
+# Configure Gemini CLI to use the proxy
+export GEMINI_API_KEY="placeholder"
+export GOOGLE_GEMINI_BASE_URL="http://localhost:4141"
+# Basic conversation
+gemini -p "Explain this code"
+# Pipe review
+git diff HEAD~1 | gemini -p "Review this diff for bugs"
+```
 ## Upstream Project
 For the original project documentation, features, and updates, see: [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api)

package/dist/main.mjs CHANGED Viewed

@@ -17,7 +17,7 @@ import process$1 from "node:process";
 import pc from "picocolors";
 import { Hono } from "hono";
 import { cors } from "hono/cors";
-import { streamSSE } from "hono/streaming";
+import { stream, streamSSE } from "hono/streaming";
 import { events } from "fetch-event-stream";
 //#region src/lib/paths.ts
@@ -1036,7 +1036,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
-var version = "0.6.0";
+var version = "0.6.2";
 //#endregion
 //#region src/lib/adaptive-rate-limiter.ts
@@ -4036,6 +4036,494 @@ eventLoggingRoutes.post("/batch", (c) => {
 	return c.text("OK", 200);
 });
+//#endregion
+//#region src/routes/gemini/error.ts
+const STATUS_MAP = {
+	400: "INVALID_ARGUMENT",
+	401: "PERMISSION_DENIED",
+	403: "PERMISSION_DENIED",
+	404: "NOT_FOUND",
+	413: "INVALID_ARGUMENT",
+	429: "RESOURCE_EXHAUSTED",
+	500: "INTERNAL"
+};
+function geminiError(c, code, status, message) {
+	return c.json({ error: {
+		code,
+		message,
+		status
+	} }, code);
+}
+function forwardGeminiError(c, error) {
+	if (error instanceof HTTPError) {
+		const status = STATUS_MAP[error.status] ?? "INTERNAL";
+		const code = error.status;
+		let message = error.responseText;
+		try {
+			const parsed = JSON.parse(error.responseText);
+			if (parsed.error?.message) message = parsed.error.message;
+		} catch {}
+		consola.error(`HTTP ${code}:`, message.slice(0, 200));
+		return geminiError(c, code, status, message);
+	}
+	consola.error("Unexpected error:", error);
+	return geminiError(c, 500, "INTERNAL", error instanceof Error ? error.message : "Unknown error");
+}
+//#endregion
+//#region src/routes/gemini/gemini-to-openai.ts
+function translateGeminiToOpenAI(request, model) {
+	const messages = [];
+	if (request.systemInstruction) {
+		const systemText = extractTextFromParts(request.systemInstruction.parts);
+		if (systemText) messages.push({
+			role: "system",
+			content: systemText
+		});
+	}
+	let globalCallIndex = 0;
+	const callIdQueue = /* @__PURE__ */ new Map();
+	if (!Array.isArray(request.contents)) return { payload: {
+		messages: [],
+		model
+	} };
+	for (const content of request.contents) {
+		const translated = translateContent(content, callIdQueue, () => `call_gemini_${globalCallIndex++}`);
+		messages.push(...translated);
+	}
+	const payload = {
+		messages,
+		model
+	};
+	const config = request.generationConfig;
+	if (config) {
+		if (config.temperature !== void 0) payload.temperature = config.temperature;
+		if (config.topP !== void 0) payload.top_p = config.topP;
+		if (config.maxOutputTokens !== void 0) payload.max_tokens = config.maxOutputTokens;
+		if (config.stopSequences !== void 0) payload.stop = config.stopSequences;
+		if (config.responseMimeType === "application/json") payload.response_format = { type: "json_object" };
+	}
+	if (request.tools) {
+		const tools = translateTools(request.tools);
+		if (tools.length > 0) payload.tools = tools;
+	}
+	if (request.toolConfig?.functionCallingConfig?.mode) payload.tool_choice = {
+		AUTO: "auto",
+		ANY: "required",
+		NONE: "none"
+	}[request.toolConfig.functionCallingConfig.mode];
+	return { payload };
+}
+function mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId) {
+	return functionCalls.map((fc) => {
+		const id = generateId();
+		pushToQueue(callIdQueue, fc.functionCall.name, id);
+		return {
+			id,
+			type: "function",
+			function: {
+				name: fc.functionCall.name,
+				arguments: JSON.stringify(fc.functionCall.args)
+			}
+		};
+	});
+}
+function translateContent(content, callIdQueue, generateId) {
+	const role = content.role === "model" ? "assistant" : "user";
+	const messages = [];
+	const textParts = [];
+	const imageParts = [];
+	const functionCalls = [];
+	const functionResponses = [];
+	for (const part of content.parts) if (isTextPart(part)) {
+		if (!part.thought) textParts.push(part);
+	} else if (isInlineDataPart(part)) imageParts.push(part);
+	else if (isFunctionCallPart(part)) functionCalls.push(part);
+	else if (isFunctionResponsePart(part)) functionResponses.push(part);
+	else if (isFileDataPart(part)) throw new HTTPError("fileData parts are not supported", 400, "fileData parts are not supported");
+	if (imageParts.length > 0) {
+		const contentParts = [];
+		for (const part of content.parts) if (isTextPart(part) && !part.thought) contentParts.push({
+			type: "text",
+			text: part.text
+		});
+		else if (isInlineDataPart(part)) contentParts.push({
+			type: "image_url",
+			image_url: { url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` }
+		});
+		const msg = {
+			role,
+			content: contentParts
+		};
+		if (functionCalls.length > 0 && role === "assistant") msg.tool_calls = mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId);
+		messages.push(msg);
+	} else if (functionCalls.length > 0 && role === "assistant") {
+		const textContent = textParts.length > 0 ? textParts.map((p) => p.text).join("") : null;
+		messages.push({
+			role: "assistant",
+			content: textContent,
+			tool_calls: mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId)
+		});
+	} else if (textParts.length > 0) messages.push({
+		role,
+		content: textParts.map((p) => p.text).join("")
+	});
+	let orphanIndex = 0;
+	for (const fr of functionResponses) {
+		const queue = callIdQueue.get(fr.functionResponse.name);
+		const id = queue && queue.length > 0 ? queue.shift() : `call_gemini_orphan_${orphanIndex++}`;
+		messages.push({
+			role: "tool",
+			content: JSON.stringify(fr.functionResponse.response),
+			tool_call_id: id
+		});
+	}
+	return messages;
+}
+function translateTools(geminiTools) {
+	const tools = [];
+	for (const tool of geminiTools) if (tool.functionDeclarations) for (const decl of tool.functionDeclarations) tools.push({
+		type: "function",
+		function: {
+			name: decl.name,
+			description: decl.description,
+			parameters: decl.parameters ?? {
+				type: "object",
+				properties: {}
+			}
+		}
+	});
+	return tools;
+}
+function pushToQueue(queue, name, id) {
+	const existing = queue.get(name);
+	if (existing) existing.push(id);
+	else queue.set(name, [id]);
+}
+function extractTextFromParts(parts) {
+	return parts.filter((p) => "text" in p && (!("thought" in p) || !p.thought)).map((p) => p.text).join("\n");
+}
+function isTextPart(part) {
+	return "text" in part;
+}
+function isInlineDataPart(part) {
+	return "inlineData" in part;
+}
+function isFunctionCallPart(part) {
+	return "functionCall" in part;
+}
+function isFunctionResponsePart(part) {
+	return "functionResponse" in part;
+}
+function isFileDataPart(part) {
+	return "fileData" in part;
+}
+//#endregion
+//#region src/routes/gemini/count-tokens-handler.ts
+async function handleGeminiCountTokens(c, model) {
+	try {
+		const { payload } = translateGeminiToOpenAI(await c.req.json(), model);
+		const selectedModel = state.models?.data.find((m) => m.id === model);
+		if (!selectedModel) {
+			consola.warn("Model not found for count_tokens, returning estimate");
+			return c.json({ totalTokens: 1 });
+		}
+		const tokenCount = await getTokenCount(payload, selectedModel);
+		const totalTokens = tokenCount.input + tokenCount.output;
+		consola.debug(`Gemini countTokens: ${totalTokens} tokens`);
+		return c.json({ totalTokens });
+	} catch (error) {
+		return forwardGeminiError(c, error);
+	}
+}
+//#endregion
+//#region src/routes/gemini/openai-to-gemini.ts
+function translateOpenAIResponseToGemini(response, model) {
+	const choice = response.choices.at(0);
+	if (!choice) return {
+		candidates: [],
+		usageMetadata: buildUsageMetadata(response.usage),
+		modelVersion: model
+	};
+	const parts = [];
+	if (choice.message.content) parts.push({ text: choice.message.content });
+	if (choice.message.tool_calls) for (const tc of choice.message.tool_calls) {
+		const args = parseArgs(tc.function.arguments);
+		parts.push({ functionCall: {
+			name: tc.function.name,
+			args
+		} });
+	}
+	if (parts.length === 0) parts.push({ text: "" });
+	return {
+		candidates: [{
+			content: {
+				role: "model",
+				parts
+			},
+			finishReason: mapFinishReason(choice.finish_reason),
+			index: 0
+		}],
+		usageMetadata: buildUsageMetadata(response.usage),
+		modelVersion: model
+	};
+}
+function createGeminiStreamState() {
+	return {
+		toolCalls: /* @__PURE__ */ new Map(),
+		usage: {
+			promptTokens: 0,
+			completionTokens: 0,
+			totalTokens: 0
+		},
+		model: "",
+		finishReason: ""
+	};
+}
+function translateOpenAIChunkToGemini(chunk, state) {
+	const results = [];
+	if (!state.model && chunk.model) state.model = chunk.model;
+	if (chunk.usage) {
+		state.usage.promptTokens = chunk.usage.prompt_tokens;
+		state.usage.completionTokens = chunk.usage.completion_tokens;
+		state.usage.totalTokens = chunk.usage.total_tokens;
+	}
+	const choice = chunk.choices.at(0);
+	if (!choice) return results;
+	const delta = choice.delta;
+	if (delta.tool_calls) for (const tc of delta.tool_calls) {
+		const existing = state.toolCalls.get(tc.index);
+		if (existing) {
+			if (tc.function?.arguments) existing.args += tc.function.arguments;
+		} else {
+			const flushed = flushToolCalls(state, tc.index);
+			if (flushed) results.push(flushed);
+			state.toolCalls.set(tc.index, {
+				name: tc.function?.name ?? "",
+				args: tc.function?.arguments ?? ""
+			});
+		}
+	}
+	if (delta.content) results.push(buildGeminiChunk([{ text: delta.content }], choice.finish_reason, state));
+	if (choice.finish_reason) {
+		state.finishReason = choice.finish_reason;
+		const flushed = flushToolCalls(state);
+		if (flushed) results.push(flushed);
+		if (!delta.content) results.push(buildGeminiChunk([], choice.finish_reason, state));
+	}
+	return results;
+}
+function flushToolCalls(state, belowIndex) {
+	if (state.toolCalls.size === 0) return null;
+	const parts = [];
+	for (const [idx, tc] of state.toolCalls) {
+		if (belowIndex !== void 0 && idx >= belowIndex) continue;
+		const args = parseArgs(tc.args);
+		parts.push({ functionCall: {
+			name: tc.name,
+			args
+		} });
+		state.toolCalls.delete(idx);
+	}
+	if (parts.length === 0) return null;
+	return buildGeminiChunk(parts, null, state);
+}
+function buildGeminiChunk(parts, finishReason, state) {
+	const candidate = {
+		content: {
+			role: "model",
+			parts: parts.length > 0 ? parts : [{ text: "" }]
+		},
+		index: 0
+	};
+	if (finishReason) candidate.finishReason = mapFinishReason(finishReason);
+	return {
+		candidates: [candidate],
+		usageMetadata: {
+			promptTokenCount: state.usage.promptTokens,
+			candidatesTokenCount: state.usage.completionTokens,
+			totalTokenCount: state.usage.totalTokens
+		},
+		modelVersion: state.model
+	};
+}
+function parseArgs(raw) {
+	try {
+		return JSON.parse(raw);
+	} catch {
+		return { raw };
+	}
+}
+function mapFinishReason(reason) {
+	switch (reason) {
+		case "stop":
+		case "tool_calls": return "STOP";
+		case "length": return "MAX_TOKENS";
+		case "content_filter": return "SAFETY";
+		default: return "OTHER";
+	}
+}
+function buildUsageMetadata(usage) {
+	return {
+		promptTokenCount: usage?.prompt_tokens ?? 0,
+		candidatesTokenCount: usage?.completion_tokens ?? 0,
+		totalTokenCount: usage?.total_tokens ?? 0
+	};
+}
+//#endregion
+//#region src/routes/gemini/handler.ts
+async function handleGeminiGenerate(c, model, isStream) {
+	try {
+		const geminiRequest = await c.req.json();
+		consola.debug("Gemini request for model:", model, "stream:", isStream);
+		const trackingId = c.get("trackingId");
+		const startTime = Date.now();
+		updateTrackerModel(trackingId, model);
+		const { payload } = translateGeminiToOpenAI(geminiRequest, model);
+		payload.stream = isStream;
+		const selectedModel = state.models?.data.find((m) => m.id === model);
+		if (isNullish(payload.max_tokens) && selectedModel) payload.max_tokens = selectedModel.capabilities?.limits?.max_output_tokens;
+		const ctx = {
+			historyId: recordRequest("gemini", {
+				model,
+				messages: payload.messages.map((m) => ({
+					role: m.role,
+					content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+					tool_calls: m.tool_calls,
+					tool_call_id: m.tool_call_id
+				})),
+				stream: isStream,
+				max_tokens: payload.max_tokens ?? void 0,
+				temperature: payload.temperature ?? void 0
+			}),
+			trackingId,
+			startTime
+		};
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
+		ctx.queueWaitMs = queueWaitMs;
+		if (isNonStreaming(response)) return handleNonStreamResponse(c, response, model, ctx, payload);
+		consola.debug("Streaming Gemini response");
+		updateTrackerStatus(trackingId, "streaming");
+		return stream(c, async (s) => {
+			c.header("Content-Type", "text/event-stream");
+			c.header("Cache-Control", "no-cache");
+			c.header("Connection", "keep-alive");
+			const streamState = createGeminiStreamState();
+			try {
+				for await (const rawEvent of response) {
+					if (rawEvent.data === "[DONE]") break;
+					let chunk;
+					try {
+						chunk = JSON.parse(rawEvent.data);
+					} catch (parseError) {
+						consola.debug("Failed to parse stream chunk:", parseError);
+						continue;
+					}
+					const geminiChunks = translateOpenAIChunkToGemini(chunk, streamState);
+					for (const gc of geminiChunks) await s.write(`data: ${JSON.stringify(gc)}\n\n`);
+				}
+				recordResponse(ctx.historyId, {
+					success: true,
+					model: streamState.model || model,
+					usage: {
+						input_tokens: streamState.usage.promptTokens,
+						output_tokens: streamState.usage.completionTokens
+					},
+					content: null
+				}, Date.now() - ctx.startTime);
+				completeTracking(ctx.trackingId, streamState.usage.promptTokens, streamState.usage.completionTokens, ctx.queueWaitMs, void 0, {
+					model: streamState.model || model,
+					stream: true,
+					durationMs: Date.now() - ctx.startTime,
+					stopReason: streamState.finishReason || void 0,
+					toolCount: payload.tools?.length ?? 0
+				});
+			} catch (error) {
+				recordStreamError({
+					acc: { model: streamState.model || model },
+					fallbackModel: model,
+					ctx,
+					error
+				});
+				failTracking(ctx.trackingId, error);
+			}
+		});
+	} catch (error) {
+		const trackingId = c.get("trackingId");
+		if (trackingId) failTracking(trackingId, error);
+		return forwardGeminiError(c, error);
+	}
+}
+function handleNonStreamResponse(c, response, model, ctx, payload) {
+	const geminiResponse = translateOpenAIResponseToGemini(response, model);
+	const usage = response.usage;
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: response.model || model,
+		usage: {
+			input_tokens: usage?.prompt_tokens ?? 0,
+			output_tokens: usage?.completion_tokens ?? 0
+		},
+		stop_reason: response.choices[0]?.finish_reason,
+		content: response.choices[0] ? {
+			role: "assistant",
+			content: response.choices[0].message.content ?? ""
+		} : null
+	}, Date.now() - ctx.startTime);
+	completeTracking(ctx.trackingId, usage?.prompt_tokens ?? 0, usage?.completion_tokens ?? 0, ctx.queueWaitMs, void 0, {
+		model: response.model || model,
+		stream: false,
+		durationMs: Date.now() - ctx.startTime,
+		stopReason: response.choices[0]?.finish_reason,
+		toolCount: payload.tools?.length ?? 0
+	});
+	return c.json(geminiResponse);
+}
+//#endregion
+//#region src/routes/gemini/model-alias.ts
+/**
+* Maps Gemini model names that aren't available on GitHub Copilot
+* to equivalent models that are.
+*
+* The Gemini CLI's routing classifier requests gemini-2.5-flash-lite
+* and gemini-2.5-flash, which Copilot doesn't serve. We map them to
+* the closest available flash model.
+*
+* Aliases are only applied when the requested model is absent from
+* the Copilot model list, so if Copilot adds support for these models
+* natively, requests will go through unchanged.
+*/
+const GEMINI_MODEL_ALIASES = {
+	"gemini-2.5-flash-lite": "gemini-3-flash-preview",
+	"gemini-2.5-flash": "gemini-3-flash-preview"
+};
+function resolveGeminiModelAlias(model) {
+	if (!(model in GEMINI_MODEL_ALIASES)) return model;
+	if (state.models?.data.some((m) => m.id === model)) return model;
+	return GEMINI_MODEL_ALIASES[model];
+}
+//#endregion
+//#region src/routes/gemini/route.ts
+const geminiRoutes = new Hono();
+geminiRoutes.post("/:modelAction", async (c) => {
+	const modelAction = c.req.param("modelAction");
+	const colonIndex = modelAction.lastIndexOf(":");
+	if (colonIndex === -1) return geminiError(c, 400, "INVALID_ARGUMENT", "Missing action in URL");
+	const model = resolveGeminiModelAlias(modelAction.slice(0, Math.max(0, colonIndex)));
+	const action = modelAction.slice(Math.max(0, colonIndex + 1));
+	switch (action) {
+		case "generateContent": return handleGeminiGenerate(c, model, false);
+		case "streamGenerateContent": return handleGeminiGenerate(c, model, true);
+		case "countTokens": return handleGeminiCountTokens(c, model);
+		default: return geminiError(c, 400, "INVALID_ARGUMENT", `Unknown action: ${action}`);
+	}
+});
 //#endregion
 //#region src/routes/history/api.ts
 function handleGetEntries(c) {
@@ -7946,6 +8434,7 @@ server.route("/v1/messages", messageRoutes);
 server.route("/api/event_logging", eventLoggingRoutes);
 server.route("/v1/responses", responsesRoutes);
 server.route("/responses", responsesRoutes);
+server.route("/v1beta/models", geminiRoutes);
 server.route("/history", historyRoutes);
 //#endregion

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dianshuv/copilot-api",
-  "version": "0.6.0",
+  "version": "0.6.2",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "author": "dianshuv",
   "type": "module",