npm - @dianshuv/copilot-api - Versions diffs - 0.5.0 → 0.6.1 - Mend

@dianshuv/copilot-api 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -15,6 +15,8 @@
 - **Graceful shutdown**: 4-phase shutdown sequence — stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
 - **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
 - **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
+- **Gemini API compatibility**: `/v1beta/models` endpoints translate Gemini API requests to OpenAI format for Copilot. Enables Google Gemini CLI to use Copilot models via `GOOGLE_GEMINI_BASE_URL` environment variable.
+- **PostHog analytics**: Optional PostHog Cloud integration (`--posthog-key`) sends per-request token usage events for long-term trend analysis. Free tier (1M events/month) is more than sufficient for individual use.
 ## Quick Start
@@ -66,6 +68,7 @@ copilot-api start
 | `--redirect-anthropic` | Force Anthropic through OpenAI translation | false |
 | `--no-rewrite-anthropic-tools` | Don't rewrite server-side tools | false |
 | `--timezone-offset` | Timezone offset in hours from UTC for log timestamps (e.g., +8, -5, 0) | +8 |
+| `--posthog-key` | PostHog API key for token usage analytics (opt-in) | none |
 ### Patch-Claude Command Options
@@ -95,6 +98,14 @@ copilot-api start
 | `/v1/messages/count_tokens` | POST | Token counting |
 | `/v1/event_logging/batch` | POST | Event logging (no-op) |
+### Gemini Compatible
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1beta/models/{model}:generateContent` | POST | Non-streaming generation |
+| `/v1beta/models/{model}:streamGenerateContent` | POST | Streaming generation (SSE) |
+| `/v1beta/models/{model}:countTokens` | POST | Token counting |
 ### Utility
 | Endpoint | Method | Description |
@@ -141,6 +152,23 @@ Or use the interactive setup:
 bun run start --claude-code
 ```
+## Using with Gemini CLI
+```bash
+# Start the proxy
+copilot-api start
+# Configure Gemini CLI to use the proxy
+export GEMINI_API_KEY="placeholder"
+export GOOGLE_GEMINI_BASE_URL="http://localhost:4141"
+# Basic conversation
+gemini -p "Explain this code"
+# Pipe review
+git diff HEAD~1 | gemini -p "Review this diff for bugs"
+```
 ## Upstream Project
 For the original project documentation, features, and updates, see: [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api)

package/dist/main.mjs CHANGED Viewed

@@ -4,11 +4,12 @@ import consola from "consola";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path, { dirname, join } from "node:path";
-import { randomUUID } from "node:crypto";
+import { createHash, randomUUID } from "node:crypto";
 import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
 import clipboard from "clipboardy";
 import { serve } from "srvx";
 import invariant from "tiny-invariant";
+import { PostHog } from "posthog-node";
 import { getProxyForUrl } from "proxy-from-env";
 import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
 import { execSync } from "node:child_process";
@@ -16,7 +17,7 @@ import process$1 from "node:process";
 import pc from "picocolors";
 import { Hono } from "hono";
 import { cors } from "hono/cors";
-import { streamSSE } from "hono/streaming";
+import { stream, streamSSE } from "hono/streaming";
 import { events } from "fetch-event-stream";
 //#region src/lib/paths.ts
@@ -1035,7 +1036,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
-var version = "0.5.0";
+var version = "0.6.1";
 //#endregion
 //#region src/lib/adaptive-rate-limiter.ts
@@ -1947,6 +1948,55 @@ function exportHistory(format = "json") {
 	return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
 }
+//#endregion
+//#region src/lib/posthog.ts
+let client = null;
+let distinctId = "";
+function initPostHog(apiKey) {
+	if (!apiKey) return;
+	try {
+		client = new PostHog(apiKey, {
+			host: "https://us.i.posthog.com",
+			flushAt: 20,
+			flushInterval: 1e4
+		});
+		distinctId = createHash("sha256").update(os.hostname() + os.userInfo().username).digest("hex");
+	} catch (error) {
+		consola.warn("Failed to initialize PostHog:", error instanceof Error ? error.message : error);
+		client = null;
+	}
+}
+function isPostHogEnabled() {
+	return client !== null;
+}
+function captureRequest(params) {
+	if (!client) return;
+	const properties = {
+		model: params.model,
+		input_tokens: params.inputTokens,
+		output_tokens: params.outputTokens,
+		duration_ms: params.durationMs,
+		success: params.success,
+		stream: params.stream,
+		tool_count: params.toolCount
+	};
+	if (params.reasoningTokens !== void 0) properties.reasoning_tokens = params.reasoningTokens;
+	if (params.stopReason !== void 0) properties.stop_reason = params.stopReason;
+	client.capture({
+		distinctId,
+		event: "copilot_api_request",
+		properties
+	});
+}
+async function shutdownPostHog() {
+	if (!client) return;
+	try {
+		await client.shutdown();
+	} catch (error) {
+		consola.warn("Failed to flush PostHog events:", error instanceof Error ? error.message : error);
+	}
+}
 //#endregion
 //#region src/lib/proxy.ts
 /**
@@ -2157,7 +2207,7 @@ async function gracefulShutdown(signal, deps) {
 			try {
 				if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
 					consola.info("All requests completed naturally");
-					finalize(tracker);
+					await finalize(tracker);
 					return;
 				}
 			} catch (error) {
@@ -2169,7 +2219,7 @@ async function gracefulShutdown(signal, deps) {
 			try {
 				if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
 					consola.info("All requests completed after abort signal");
-					finalize(tracker);
+					await finalize(tracker);
 					return;
 				}
 			} catch (error) {
@@ -2183,13 +2233,15 @@ async function gracefulShutdown(signal, deps) {
 				consola.error("Error force-closing server:", error);
 			}
 		}
-		finalize(tracker);
+		await finalize(tracker);
 	} else {
+		await shutdownPostHog();
 		consola.info("Shutdown complete");
 		shutdownResolve?.();
 	}
 }
-function finalize(tracker) {
+async function finalize(tracker) {
+	await shutdownPostHog();
 	tracker.destroy();
 	consola.info("Shutdown complete");
 	shutdownResolve?.();
@@ -3503,8 +3555,8 @@ function recordErrorResponse(ctx, model, error) {
 		content: null
 	}, Date.now() - ctx.startTime);
 }
-/** Complete TUI tracking */
-function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, reasoningTokens) {
+/** Complete TUI tracking and send PostHog analytics */
+function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, reasoningTokens, analytics) {
 	if (!trackingId) return;
 	requestTracker.updateRequest(trackingId, {
 		inputTokens,
@@ -3517,6 +3569,17 @@ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, re
 		outputTokens,
 		reasoningTokens
 	});
+	if (analytics) captureRequest({
+		model: analytics.model,
+		inputTokens,
+		outputTokens,
+		durationMs: analytics.durationMs,
+		success: true,
+		stream: analytics.stream,
+		toolCount: analytics.toolCount ?? 0,
+		reasoningTokens,
+		stopReason: analytics.stopReason
+	});
 }
 /** Fail TUI tracking */
 function failTracking(trackingId, error) {
@@ -3685,7 +3748,7 @@ async function executeRequest(opts) {
 	try {
 		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
 		ctx.queueWaitMs = queueWaitMs;
-		if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
+		if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx, payload);
 		consola.debug("Streaming response");
 		updateTrackerStatus(trackingId, "streaming");
 		return streamSSE(c, async (stream) => {
@@ -3712,7 +3775,7 @@ async function logTokenCount(payload, selectedModel) {
 		consola.debug("Failed to calculate token count:", error);
 	}
 }
-function handleNonStreamingResponse$1(c, originalResponse, ctx) {
+function handleNonStreamingResponse$1(c, originalResponse, ctx, payload) {
 	consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
 	let response = originalResponse;
 	if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
@@ -3731,6 +3794,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 	const choice = response.choices[0];
 	const usage = response.usage;
 	const reasoningTokens = getReasoningTokensFromOpenAIUsage(usage);
+	const durationMs = Date.now() - ctx.startTime;
 	recordResponse(ctx.historyId, {
 		success: true,
 		model: response.model,
@@ -3742,13 +3806,24 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 		stop_reason: choice.finish_reason,
 		content: buildResponseContent(choice),
 		toolCalls: extractToolCalls(choice)
-	}, Date.now() - ctx.startTime);
+	}, durationMs);
 	if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
 		inputTokens: usage.prompt_tokens,
 		outputTokens: usage.completion_tokens,
 		queueWaitMs: ctx.queueWaitMs,
 		reasoningTokens
 	});
+	captureRequest({
+		model: response.model,
+		inputTokens: usage?.prompt_tokens ?? 0,
+		outputTokens: usage?.completion_tokens ?? 0,
+		durationMs,
+		success: true,
+		stream: false,
+		toolCount: payload.tools?.length ?? 0,
+		reasoningTokens,
+		stopReason: choice.finish_reason
+	});
 	return c.json(response);
 }
 function buildResponseContent(choice) {
@@ -3815,7 +3890,13 @@ async function handleStreamingResponse$1(opts) {
 			await stream.writeSSE(chunk);
 		}
 		recordStreamSuccess(acc, payload.model, ctx);
-		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, acc.reasoningTokens);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, acc.reasoningTokens, {
+			model: acc.model || payload.model,
+			stream: true,
+			durationMs: Date.now() - ctx.startTime,
+			stopReason: acc.finishReason || void 0,
+			toolCount: payload.tools?.length ?? 0
+		});
 	} catch (error) {
 		recordStreamError({
 			acc,
@@ -3955,6 +4036,470 @@ eventLoggingRoutes.post("/batch", (c) => {
 	return c.text("OK", 200);
 });
+//#endregion
+//#region src/routes/gemini/error.ts
+const STATUS_MAP = {
+	400: "INVALID_ARGUMENT",
+	401: "PERMISSION_DENIED",
+	403: "PERMISSION_DENIED",
+	404: "NOT_FOUND",
+	413: "INVALID_ARGUMENT",
+	429: "RESOURCE_EXHAUSTED",
+	500: "INTERNAL"
+};
+function geminiError(c, code, status, message) {
+	return c.json({ error: {
+		code,
+		message,
+		status
+	} }, code);
+}
+function forwardGeminiError(c, error) {
+	if (error instanceof HTTPError) {
+		const status = STATUS_MAP[error.status] ?? "INTERNAL";
+		const code = error.status;
+		let message = error.responseText;
+		try {
+			const parsed = JSON.parse(error.responseText);
+			if (parsed.error?.message) message = parsed.error.message;
+		} catch {}
+		consola.error(`HTTP ${code}:`, message.slice(0, 200));
+		return geminiError(c, code, status, message);
+	}
+	consola.error("Unexpected error:", error);
+	return geminiError(c, 500, "INTERNAL", error instanceof Error ? error.message : "Unknown error");
+}
+//#endregion
+//#region src/routes/gemini/gemini-to-openai.ts
+function translateGeminiToOpenAI(request, model) {
+	const messages = [];
+	if (request.systemInstruction) {
+		const systemText = extractTextFromParts(request.systemInstruction.parts);
+		if (systemText) messages.push({
+			role: "system",
+			content: systemText
+		});
+	}
+	let globalCallIndex = 0;
+	const callIdQueue = /* @__PURE__ */ new Map();
+	if (!Array.isArray(request.contents)) return { payload: {
+		messages: [],
+		model
+	} };
+	for (const content of request.contents) {
+		const translated = translateContent(content, callIdQueue, () => `call_gemini_${globalCallIndex++}`);
+		messages.push(...translated);
+	}
+	const payload = {
+		messages,
+		model
+	};
+	const config = request.generationConfig;
+	if (config) {
+		if (config.temperature !== void 0) payload.temperature = config.temperature;
+		if (config.topP !== void 0) payload.top_p = config.topP;
+		if (config.maxOutputTokens !== void 0) payload.max_tokens = config.maxOutputTokens;
+		if (config.stopSequences !== void 0) payload.stop = config.stopSequences;
+		if (config.responseMimeType === "application/json") payload.response_format = { type: "json_object" };
+	}
+	if (request.tools) {
+		const tools = translateTools(request.tools);
+		if (tools.length > 0) payload.tools = tools;
+	}
+	if (request.toolConfig?.functionCallingConfig?.mode) payload.tool_choice = {
+		AUTO: "auto",
+		ANY: "required",
+		NONE: "none"
+	}[request.toolConfig.functionCallingConfig.mode];
+	return { payload };
+}
+function mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId) {
+	return functionCalls.map((fc) => {
+		const id = generateId();
+		pushToQueue(callIdQueue, fc.functionCall.name, id);
+		return {
+			id,
+			type: "function",
+			function: {
+				name: fc.functionCall.name,
+				arguments: JSON.stringify(fc.functionCall.args)
+			}
+		};
+	});
+}
+function translateContent(content, callIdQueue, generateId) {
+	const role = content.role === "model" ? "assistant" : "user";
+	const messages = [];
+	const textParts = [];
+	const imageParts = [];
+	const functionCalls = [];
+	const functionResponses = [];
+	for (const part of content.parts) if (isTextPart(part)) {
+		if (!part.thought) textParts.push(part);
+	} else if (isInlineDataPart(part)) imageParts.push(part);
+	else if (isFunctionCallPart(part)) functionCalls.push(part);
+	else if (isFunctionResponsePart(part)) functionResponses.push(part);
+	else if (isFileDataPart(part)) throw new HTTPError("fileData parts are not supported", 400, "fileData parts are not supported");
+	if (imageParts.length > 0) {
+		const contentParts = [];
+		for (const part of content.parts) if (isTextPart(part) && !part.thought) contentParts.push({
+			type: "text",
+			text: part.text
+		});
+		else if (isInlineDataPart(part)) contentParts.push({
+			type: "image_url",
+			image_url: { url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` }
+		});
+		const msg = {
+			role,
+			content: contentParts
+		};
+		if (functionCalls.length > 0 && role === "assistant") msg.tool_calls = mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId);
+		messages.push(msg);
+	} else if (functionCalls.length > 0 && role === "assistant") {
+		const textContent = textParts.length > 0 ? textParts.map((p) => p.text).join("") : null;
+		messages.push({
+			role: "assistant",
+			content: textContent,
+			tool_calls: mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId)
+		});
+	} else if (textParts.length > 0) messages.push({
+		role,
+		content: textParts.map((p) => p.text).join("")
+	});
+	let orphanIndex = 0;
+	for (const fr of functionResponses) {
+		const queue = callIdQueue.get(fr.functionResponse.name);
+		const id = queue && queue.length > 0 ? queue.shift() : `call_gemini_orphan_${orphanIndex++}`;
+		messages.push({
+			role: "tool",
+			content: JSON.stringify(fr.functionResponse.response),
+			tool_call_id: id
+		});
+	}
+	return messages;
+}
+function translateTools(geminiTools) {
+	const tools = [];
+	for (const tool of geminiTools) if (tool.functionDeclarations) for (const decl of tool.functionDeclarations) tools.push({
+		type: "function",
+		function: {
+			name: decl.name,
+			description: decl.description,
+			parameters: decl.parameters ?? {
+				type: "object",
+				properties: {}
+			}
+		}
+	});
+	return tools;
+}
+function pushToQueue(queue, name, id) {
+	const existing = queue.get(name);
+	if (existing) existing.push(id);
+	else queue.set(name, [id]);
+}
+function extractTextFromParts(parts) {
+	return parts.filter((p) => "text" in p && (!("thought" in p) || !p.thought)).map((p) => p.text).join("\n");
+}
+function isTextPart(part) {
+	return "text" in part;
+}
+function isInlineDataPart(part) {
+	return "inlineData" in part;
+}
+function isFunctionCallPart(part) {
+	return "functionCall" in part;
+}
+function isFunctionResponsePart(part) {
+	return "functionResponse" in part;
+}
+function isFileDataPart(part) {
+	return "fileData" in part;
+}
+//#endregion
+//#region src/routes/gemini/count-tokens-handler.ts
+async function handleGeminiCountTokens(c, model) {
+	try {
+		const { payload } = translateGeminiToOpenAI(await c.req.json(), model);
+		const selectedModel = state.models?.data.find((m) => m.id === model);
+		if (!selectedModel) {
+			consola.warn("Model not found for count_tokens, returning estimate");
+			return c.json({ totalTokens: 1 });
+		}
+		const tokenCount = await getTokenCount(payload, selectedModel);
+		const totalTokens = tokenCount.input + tokenCount.output;
+		consola.debug(`Gemini countTokens: ${totalTokens} tokens`);
+		return c.json({ totalTokens });
+	} catch (error) {
+		return forwardGeminiError(c, error);
+	}
+}
+//#endregion
+//#region src/routes/gemini/openai-to-gemini.ts
+function translateOpenAIResponseToGemini(response, model) {
+	const choice = response.choices.at(0);
+	if (!choice) return {
+		candidates: [],
+		usageMetadata: buildUsageMetadata(response.usage),
+		modelVersion: model
+	};
+	const parts = [];
+	if (choice.message.content) parts.push({ text: choice.message.content });
+	if (choice.message.tool_calls) for (const tc of choice.message.tool_calls) {
+		const args = parseArgs(tc.function.arguments);
+		parts.push({ functionCall: {
+			name: tc.function.name,
+			args
+		} });
+	}
+	if (parts.length === 0) parts.push({ text: "" });
+	return {
+		candidates: [{
+			content: {
+				role: "model",
+				parts
+			},
+			finishReason: mapFinishReason(choice.finish_reason),
+			index: 0
+		}],
+		usageMetadata: buildUsageMetadata(response.usage),
+		modelVersion: model
+	};
+}
+function createGeminiStreamState() {
+	return {
+		toolCalls: /* @__PURE__ */ new Map(),
+		usage: {
+			promptTokens: 0,
+			completionTokens: 0,
+			totalTokens: 0
+		},
+		model: "",
+		finishReason: ""
+	};
+}
+function translateOpenAIChunkToGemini(chunk, state) {
+	const results = [];
+	if (!state.model && chunk.model) state.model = chunk.model;
+	if (chunk.usage) {
+		state.usage.promptTokens = chunk.usage.prompt_tokens;
+		state.usage.completionTokens = chunk.usage.completion_tokens;
+		state.usage.totalTokens = chunk.usage.total_tokens;
+	}
+	const choice = chunk.choices.at(0);
+	if (!choice) return results;
+	const delta = choice.delta;
+	if (delta.tool_calls) for (const tc of delta.tool_calls) {
+		const existing = state.toolCalls.get(tc.index);
+		if (existing) {
+			if (tc.function?.arguments) existing.args += tc.function.arguments;
+		} else {
+			const flushed = flushToolCalls(state, tc.index);
+			if (flushed) results.push(flushed);
+			state.toolCalls.set(tc.index, {
+				name: tc.function?.name ?? "",
+				args: tc.function?.arguments ?? ""
+			});
+		}
+	}
+	if (delta.content) results.push(buildGeminiChunk([{ text: delta.content }], choice.finish_reason, state));
+	if (choice.finish_reason) {
+		state.finishReason = choice.finish_reason;
+		const flushed = flushToolCalls(state);
+		if (flushed) results.push(flushed);
+		if (!delta.content) results.push(buildGeminiChunk([], choice.finish_reason, state));
+	}
+	return results;
+}
+function flushToolCalls(state, belowIndex) {
+	if (state.toolCalls.size === 0) return null;
+	const parts = [];
+	for (const [idx, tc] of state.toolCalls) {
+		if (belowIndex !== void 0 && idx >= belowIndex) continue;
+		const args = parseArgs(tc.args);
+		parts.push({ functionCall: {
+			name: tc.name,
+			args
+		} });
+		state.toolCalls.delete(idx);
+	}
+	if (parts.length === 0) return null;
+	return buildGeminiChunk(parts, null, state);
+}
+function buildGeminiChunk(parts, finishReason, state) {
+	const candidate = {
+		content: {
+			role: "model",
+			parts: parts.length > 0 ? parts : [{ text: "" }]
+		},
+		index: 0
+	};
+	if (finishReason) candidate.finishReason = mapFinishReason(finishReason);
+	return {
+		candidates: [candidate],
+		usageMetadata: {
+			promptTokenCount: state.usage.promptTokens,
+			candidatesTokenCount: state.usage.completionTokens,
+			totalTokenCount: state.usage.totalTokens
+		},
+		modelVersion: state.model
+	};
+}
+function parseArgs(raw) {
+	try {
+		return JSON.parse(raw);
+	} catch {
+		return { raw };
+	}
+}
+function mapFinishReason(reason) {
+	switch (reason) {
+		case "stop":
+		case "tool_calls": return "STOP";
+		case "length": return "MAX_TOKENS";
+		case "content_filter": return "SAFETY";
+		default: return "OTHER";
+	}
+}
+function buildUsageMetadata(usage) {
+	return {
+		promptTokenCount: usage?.prompt_tokens ?? 0,
+		candidatesTokenCount: usage?.completion_tokens ?? 0,
+		totalTokenCount: usage?.total_tokens ?? 0
+	};
+}
+//#endregion
+//#region src/routes/gemini/handler.ts
+async function handleGeminiGenerate(c, model, isStream) {
+	try {
+		const geminiRequest = await c.req.json();
+		consola.debug("Gemini request for model:", model, "stream:", isStream);
+		const trackingId = c.get("trackingId");
+		const startTime = Date.now();
+		updateTrackerModel(trackingId, model);
+		const { payload } = translateGeminiToOpenAI(geminiRequest, model);
+		payload.stream = isStream;
+		const selectedModel = state.models?.data.find((m) => m.id === model);
+		if (isNullish(payload.max_tokens) && selectedModel) payload.max_tokens = selectedModel.capabilities?.limits?.max_output_tokens;
+		const ctx = {
+			historyId: recordRequest("gemini", {
+				model,
+				messages: payload.messages.map((m) => ({
+					role: m.role,
+					content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+					tool_calls: m.tool_calls,
+					tool_call_id: m.tool_call_id
+				})),
+				stream: isStream,
+				max_tokens: payload.max_tokens ?? void 0,
+				temperature: payload.temperature ?? void 0
+			}),
+			trackingId,
+			startTime
+		};
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
+		ctx.queueWaitMs = queueWaitMs;
+		if (isNonStreaming(response)) return handleNonStreamResponse(c, response, model, ctx, payload);
+		consola.debug("Streaming Gemini response");
+		updateTrackerStatus(trackingId, "streaming");
+		return stream(c, async (s) => {
+			c.header("Content-Type", "text/event-stream");
+			c.header("Cache-Control", "no-cache");
+			c.header("Connection", "keep-alive");
+			const streamState = createGeminiStreamState();
+			try {
+				for await (const rawEvent of response) {
+					if (rawEvent.data === "[DONE]") break;
+					let chunk;
+					try {
+						chunk = JSON.parse(rawEvent.data);
+					} catch (parseError) {
+						consola.debug("Failed to parse stream chunk:", parseError);
+						continue;
+					}
+					const geminiChunks = translateOpenAIChunkToGemini(chunk, streamState);
+					for (const gc of geminiChunks) await s.write(`data: ${JSON.stringify(gc)}\n\n`);
+				}
+				recordResponse(ctx.historyId, {
+					success: true,
+					model: streamState.model || model,
+					usage: {
+						input_tokens: streamState.usage.promptTokens,
+						output_tokens: streamState.usage.completionTokens
+					},
+					content: null
+				}, Date.now() - ctx.startTime);
+				completeTracking(ctx.trackingId, streamState.usage.promptTokens, streamState.usage.completionTokens, ctx.queueWaitMs, void 0, {
+					model: streamState.model || model,
+					stream: true,
+					durationMs: Date.now() - ctx.startTime,
+					stopReason: streamState.finishReason || void 0,
+					toolCount: payload.tools?.length ?? 0
+				});
+			} catch (error) {
+				recordStreamError({
+					acc: { model: streamState.model || model },
+					fallbackModel: model,
+					ctx,
+					error
+				});
+				failTracking(ctx.trackingId, error);
+			}
+		});
+	} catch (error) {
+		const trackingId = c.get("trackingId");
+		if (trackingId) failTracking(trackingId, error);
+		return forwardGeminiError(c, error);
+	}
+}
+function handleNonStreamResponse(c, response, model, ctx, payload) {
+	const geminiResponse = translateOpenAIResponseToGemini(response, model);
+	const usage = response.usage;
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: response.model || model,
+		usage: {
+			input_tokens: usage?.prompt_tokens ?? 0,
+			output_tokens: usage?.completion_tokens ?? 0
+		},
+		stop_reason: response.choices[0]?.finish_reason,
+		content: response.choices[0] ? {
+			role: "assistant",
+			content: response.choices[0].message.content ?? ""
+		} : null
+	}, Date.now() - ctx.startTime);
+	completeTracking(ctx.trackingId, usage?.prompt_tokens ?? 0, usage?.completion_tokens ?? 0, ctx.queueWaitMs, void 0, {
+		model: response.model || model,
+		stream: false,
+		durationMs: Date.now() - ctx.startTime,
+		stopReason: response.choices[0]?.finish_reason,
+		toolCount: payload.tools?.length ?? 0
+	});
+	return c.json(geminiResponse);
+}
+//#endregion
+//#region src/routes/gemini/route.ts
+const geminiRoutes = new Hono();
+geminiRoutes.post("/:modelAction", async (c) => {
+	const modelAction = c.req.param("modelAction");
+	const colonIndex = modelAction.lastIndexOf(":");
+	if (colonIndex === -1) return geminiError(c, 400, "INVALID_ARGUMENT", "Missing action in URL");
+	const model = modelAction.slice(0, Math.max(0, colonIndex));
+	const action = modelAction.slice(Math.max(0, colonIndex + 1));
+	switch (action) {
+		case "generateContent": return handleGeminiGenerate(c, model, false);
+		case "streamGenerateContent": return handleGeminiGenerate(c, model, true);
+		case "countTokens": return handleGeminiCountTokens(c, model);
+		default: return geminiError(c, 400, "INVALID_ARGUMENT", `Unknown action: ${action}`);
+	}
+});
 //#endregion
 //#region src/routes/history/api.ts
 function handleGetEntries(c) {
@@ -6849,7 +7394,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx, initiat
 				});
 			});
 		}
-		return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
+		return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult, effectivePayload);
 	} catch (error) {
 		if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
 		recordErrorResponse(ctx, anthropicPayload.model, error);
@@ -6874,7 +7419,7 @@ function logPayloadSizeInfoAnthropic(payload, model) {
 /**
 * Handle non-streaming direct Anthropic response
 */
-function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
+function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult, payload) {
 	consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
 	recordResponse(ctx.historyId, {
 		success: true,
@@ -6910,6 +7455,16 @@ function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateRes
 		outputTokens: response.usage.output_tokens,
 		queueWaitMs: ctx.queueWaitMs
 	});
+	captureRequest({
+		model: response.model,
+		inputTokens: response.usage.input_tokens,
+		outputTokens: response.usage.output_tokens,
+		durationMs: Date.now() - ctx.startTime,
+		success: true,
+		stream: false,
+		toolCount: payload.tools?.length ?? 0,
+		stopReason: response.stop_reason ?? void 0
+	});
 	let finalResponse = response;
 	if (state.verbose && truncateResult?.wasCompacted) finalResponse = prependMarkerToAnthropicResponse$1(response, createTruncationMarker$1(truncateResult));
 	return c.json(finalResponse);
@@ -6963,7 +7518,13 @@ async function handleDirectAnthropicStreamingResponse(opts) {
 			});
 		}
 		recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
-		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, void 0, {
+			model: acc.model || anthropicPayload.model,
+			stream: true,
+			durationMs: Date.now() - ctx.startTime,
+			stopReason: acc.stopReason || void 0,
+			toolCount: anthropicPayload.tools?.length ?? 0
+		});
 	} catch (error) {
 		consola.error("Direct Anthropic stream error:", error);
 		recordStreamError({
@@ -7046,7 +7607,8 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx, initiatorOve
 			c,
 			response,
 			toolNameMapping,
-			ctx
+			ctx,
+			anthropicPayload
 		});
 		consola.debug("Streaming response from Copilot");
 		updateTrackerStatus(ctx.trackingId, "streaming");
@@ -7066,7 +7628,7 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx, initiatorOve
 	}
 }
 function handleNonStreamingResponse(opts) {
-	const { c, response, toolNameMapping, ctx } = opts;
+	const { c, response, toolNameMapping, ctx, anthropicPayload } = opts;
 	consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
 	let anthropicResponse = translateToAnthropic(response, toolNameMapping);
 	consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
@@ -7102,6 +7664,16 @@ function handleNonStreamingResponse(opts) {
 		outputTokens: anthropicResponse.usage.output_tokens,
 		queueWaitMs: ctx.queueWaitMs
 	});
+	captureRequest({
+		model: anthropicResponse.model,
+		inputTokens: anthropicResponse.usage.input_tokens,
+		outputTokens: anthropicResponse.usage.output_tokens,
+		durationMs: Date.now() - ctx.startTime,
+		success: true,
+		stream: false,
+		toolCount: anthropicPayload.tools?.length ?? 0,
+		stopReason: anthropicResponse.stop_reason ?? void 0
+	});
 	return c.json(anthropicResponse);
 }
 function prependMarkerToAnthropicResponse(response, marker) {
@@ -7147,7 +7719,13 @@ async function handleStreamingResponse(opts) {
 			checkRepetition
 		});
 		recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
-		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, void 0, {
+			model: acc.model || anthropicPayload.model,
+			stream: true,
+			durationMs: Date.now() - ctx.startTime,
+			stopReason: acc.stopReason || void 0,
+			toolCount: anthropicPayload.tools?.length ?? 0
+		});
 	} catch (error) {
 		consola.error("Stream error:", error);
 		recordStreamError({
@@ -7670,7 +8248,12 @@ const handleResponses = async (c) => {
 					if (finalResult) {
 						recordResponseResult(finalResult, model, historyId, startTime);
 						const usage = finalResult.usage;
-						completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
+						completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs, usage?.output_tokens_details?.reasoning_tokens, {
+							model: finalResult.model || model,
+							stream: true,
+							durationMs: Date.now() - startTime,
+							toolCount: tools.length
+						});
 					} else if (streamErrorMessage) {
 						recordResponse(historyId, {
 							success: false,
@@ -7699,7 +8282,12 @@ const handleResponses = async (c) => {
 		const result = response;
 		const usage = result.usage;
 		recordResponseResult(result, model, historyId, startTime);
-		completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
+		completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs, usage?.output_tokens_details?.reasoning_tokens, {
+			model: result.model || model,
+			stream: false,
+			durationMs: Date.now() - startTime,
+			toolCount: tools.length
+		});
 		consola.debug("Forwarding native Responses result:", JSON.stringify(result).slice(-400));
 		return c.json(result);
 	} catch (error) {
@@ -7822,6 +8410,7 @@ server.route("/v1/messages", messageRoutes);
 server.route("/api/event_logging", eventLoggingRoutes);
 server.route("/v1/responses", responsesRoutes);
 server.route("/responses", responsesRoutes);
+server.route("/v1beta/models", geminiRoutes);
 server.route("/history", historyRoutes);
 //#endregion
@@ -7890,6 +8479,10 @@ async function runServer(options) {
 		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
 		consola.info(`History recording enabled (${limitText} entries)`);
 	}
+	if (options.posthogKey) {
+		initPostHog(options.posthogKey);
+		if (isPostHogEnabled()) consola.info("PostHog analytics enabled");
+	}
 	initTui({ enabled: true });
 	initRequestContextManager(state.staleRequestMaxAge).startReaper();
 	await ensurePaths();
@@ -8064,6 +8657,10 @@ const start = defineCommand({
 			type: "string",
 			default: "+8",
 			description: "Timezone offset in hours from UTC for log timestamps (e.g., +8, -5, 0)"
+		},
+		"posthog-key": {
+			type: "string",
+			description: "PostHog API key for token usage analytics (opt-in, no key = disabled)"
 		}
 	},
 	run({ args }) {
@@ -8088,7 +8685,8 @@ const start = defineCommand({
 			compressToolResults: args["compress-tool-results"],
 			redirectAnthropic: args["redirect-anthropic"],
 			rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"],
-			timezoneOffset: parseTimezoneOffset(args["timezone-offset"])
+			timezoneOffset: parseTimezoneOffset(args["timezone-offset"]),
+			posthogKey: args["posthog-key"]
 		});
 	}
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dianshuv/copilot-api",
-  "version": "0.5.0",
+  "version": "0.6.1",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "author": "dianshuv",
   "type": "module",
@@ -41,6 +41,7 @@
     "gpt-tokenizer": "^3.4.0",
     "hono": "^4.11.7",
     "picocolors": "^1.1.1",
+    "posthog-node": "^5.28.6",
     "proxy-from-env": "^1.1.0",
     "srvx": "^0.10.1",
     "tiny-invariant": "^1.3.3",