@dianshuv/copilot-api 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +28 -0
  2. package/dist/main.mjs +620 -22
  3. package/package.json +2 -1
package/README.md CHANGED
@@ -15,6 +15,8 @@
15
15
  - **Graceful shutdown**: 4-phase shutdown sequence — stops accepting requests, waits for in-flight requests to complete, sends abort signal, then force-closes. Configurable via `--shutdown-graceful-wait` and `--shutdown-abort-wait`.
16
16
  - **Stream repetition detection**: Detects when models get stuck in repetitive output loops using KMP-based pattern matching and logs a warning.
17
17
  - **Stale request reaping**: Automatically force-fails requests that exceed a configurable maximum age (default 600s) to prevent resource leaks.
18
+ - **Gemini API compatibility**: `/v1beta/models` endpoints translate Gemini API requests to OpenAI format for Copilot. Enables Google Gemini CLI to use Copilot models via `GOOGLE_GEMINI_BASE_URL` environment variable.
19
+ - **PostHog analytics**: Optional PostHog Cloud integration (`--posthog-key`) sends per-request token usage events for long-term trend analysis. Free tier (1M events/month) is more than sufficient for individual use.
18
20
 
19
21
  ## Quick Start
20
22
 
@@ -66,6 +68,7 @@ copilot-api start
66
68
  | `--redirect-anthropic` | Force Anthropic through OpenAI translation | false |
67
69
  | `--no-rewrite-anthropic-tools` | Don't rewrite server-side tools | false |
68
70
  | `--timezone-offset` | Timezone offset in hours from UTC for log timestamps (e.g., +8, -5, 0) | +8 |
71
+ | `--posthog-key` | PostHog API key for token usage analytics (opt-in) | none |
69
72
 
70
73
  ### Patch-Claude Command Options
71
74
 
@@ -95,6 +98,14 @@ copilot-api start
95
98
  | `/v1/messages/count_tokens` | POST | Token counting |
96
99
  | `/v1/event_logging/batch` | POST | Event logging (no-op) |
97
100
 
101
+ ### Gemini Compatible
102
+
103
+ | Endpoint | Method | Description |
104
+ |----------|--------|-------------|
105
+ | `/v1beta/models/{model}:generateContent` | POST | Non-streaming generation |
106
+ | `/v1beta/models/{model}:streamGenerateContent` | POST | Streaming generation (SSE) |
107
+ | `/v1beta/models/{model}:countTokens` | POST | Token counting |
108
+
98
109
  ### Utility
99
110
 
100
111
  | Endpoint | Method | Description |
@@ -141,6 +152,23 @@ Or use the interactive setup:
141
152
  bun run start --claude-code
142
153
  ```
143
154
 
155
+ ## Using with Gemini CLI
156
+
157
+ ```bash
158
+ # Start the proxy
159
+ copilot-api start
160
+
161
+ # Configure Gemini CLI to use the proxy
162
+ export GEMINI_API_KEY="placeholder"
163
+ export GOOGLE_GEMINI_BASE_URL="http://localhost:4141"
164
+
165
+ # Basic conversation
166
+ gemini -p "Explain this code"
167
+
168
+ # Pipe review
169
+ git diff HEAD~1 | gemini -p "Review this diff for bugs"
170
+ ```
171
+
144
172
  ## Upstream Project
145
173
 
146
174
  For the original project documentation, features, and updates, see: [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api)
package/dist/main.mjs CHANGED
@@ -4,11 +4,12 @@ import consola from "consola";
4
4
  import fs from "node:fs/promises";
5
5
  import os from "node:os";
6
6
  import path, { dirname, join } from "node:path";
7
- import { randomUUID } from "node:crypto";
7
+ import { createHash, randomUUID } from "node:crypto";
8
8
  import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
9
9
  import clipboard from "clipboardy";
10
10
  import { serve } from "srvx";
11
11
  import invariant from "tiny-invariant";
12
+ import { PostHog } from "posthog-node";
12
13
  import { getProxyForUrl } from "proxy-from-env";
13
14
  import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
14
15
  import { execSync } from "node:child_process";
@@ -16,7 +17,7 @@ import process$1 from "node:process";
16
17
  import pc from "picocolors";
17
18
  import { Hono } from "hono";
18
19
  import { cors } from "hono/cors";
19
- import { streamSSE } from "hono/streaming";
20
+ import { stream, streamSSE } from "hono/streaming";
20
21
  import { events } from "fetch-event-stream";
21
22
 
22
23
  //#region src/lib/paths.ts
@@ -1035,7 +1036,7 @@ const patchClaude = defineCommand({
1035
1036
 
1036
1037
  //#endregion
1037
1038
  //#region package.json
1038
- var version = "0.5.0";
1039
+ var version = "0.6.1";
1039
1040
 
1040
1041
  //#endregion
1041
1042
  //#region src/lib/adaptive-rate-limiter.ts
@@ -1947,6 +1948,55 @@ function exportHistory(format = "json") {
1947
1948
  return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
1948
1949
  }
1949
1950
 
1951
+ //#endregion
1952
+ //#region src/lib/posthog.ts
1953
+ let client = null;
1954
+ let distinctId = "";
1955
+ function initPostHog(apiKey) {
1956
+ if (!apiKey) return;
1957
+ try {
1958
+ client = new PostHog(apiKey, {
1959
+ host: "https://us.i.posthog.com",
1960
+ flushAt: 20,
1961
+ flushInterval: 1e4
1962
+ });
1963
+ distinctId = createHash("sha256").update(os.hostname() + os.userInfo().username).digest("hex");
1964
+ } catch (error) {
1965
+ consola.warn("Failed to initialize PostHog:", error instanceof Error ? error.message : error);
1966
+ client = null;
1967
+ }
1968
+ }
1969
+ function isPostHogEnabled() {
1970
+ return client !== null;
1971
+ }
1972
+ function captureRequest(params) {
1973
+ if (!client) return;
1974
+ const properties = {
1975
+ model: params.model,
1976
+ input_tokens: params.inputTokens,
1977
+ output_tokens: params.outputTokens,
1978
+ duration_ms: params.durationMs,
1979
+ success: params.success,
1980
+ stream: params.stream,
1981
+ tool_count: params.toolCount
1982
+ };
1983
+ if (params.reasoningTokens !== void 0) properties.reasoning_tokens = params.reasoningTokens;
1984
+ if (params.stopReason !== void 0) properties.stop_reason = params.stopReason;
1985
+ client.capture({
1986
+ distinctId,
1987
+ event: "copilot_api_request",
1988
+ properties
1989
+ });
1990
+ }
1991
+ async function shutdownPostHog() {
1992
+ if (!client) return;
1993
+ try {
1994
+ await client.shutdown();
1995
+ } catch (error) {
1996
+ consola.warn("Failed to flush PostHog events:", error instanceof Error ? error.message : error);
1997
+ }
1998
+ }
1999
+
1950
2000
  //#endregion
1951
2001
  //#region src/lib/proxy.ts
1952
2002
  /**
@@ -2157,7 +2207,7 @@ async function gracefulShutdown(signal, deps) {
2157
2207
  try {
2158
2208
  if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
2159
2209
  consola.info("All requests completed naturally");
2160
- finalize(tracker);
2210
+ await finalize(tracker);
2161
2211
  return;
2162
2212
  }
2163
2213
  } catch (error) {
@@ -2169,7 +2219,7 @@ async function gracefulShutdown(signal, deps) {
2169
2219
  try {
2170
2220
  if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
2171
2221
  consola.info("All requests completed after abort signal");
2172
- finalize(tracker);
2222
+ await finalize(tracker);
2173
2223
  return;
2174
2224
  }
2175
2225
  } catch (error) {
@@ -2183,13 +2233,15 @@ async function gracefulShutdown(signal, deps) {
2183
2233
  consola.error("Error force-closing server:", error);
2184
2234
  }
2185
2235
  }
2186
- finalize(tracker);
2236
+ await finalize(tracker);
2187
2237
  } else {
2238
+ await shutdownPostHog();
2188
2239
  consola.info("Shutdown complete");
2189
2240
  shutdownResolve?.();
2190
2241
  }
2191
2242
  }
2192
- function finalize(tracker) {
2243
+ async function finalize(tracker) {
2244
+ await shutdownPostHog();
2193
2245
  tracker.destroy();
2194
2246
  consola.info("Shutdown complete");
2195
2247
  shutdownResolve?.();
@@ -3503,8 +3555,8 @@ function recordErrorResponse(ctx, model, error) {
3503
3555
  content: null
3504
3556
  }, Date.now() - ctx.startTime);
3505
3557
  }
3506
- /** Complete TUI tracking */
3507
- function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, reasoningTokens) {
3558
+ /** Complete TUI tracking and send PostHog analytics */
3559
+ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, reasoningTokens, analytics) {
3508
3560
  if (!trackingId) return;
3509
3561
  requestTracker.updateRequest(trackingId, {
3510
3562
  inputTokens,
@@ -3517,6 +3569,17 @@ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, re
3517
3569
  outputTokens,
3518
3570
  reasoningTokens
3519
3571
  });
3572
+ if (analytics) captureRequest({
3573
+ model: analytics.model,
3574
+ inputTokens,
3575
+ outputTokens,
3576
+ durationMs: analytics.durationMs,
3577
+ success: true,
3578
+ stream: analytics.stream,
3579
+ toolCount: analytics.toolCount ?? 0,
3580
+ reasoningTokens,
3581
+ stopReason: analytics.stopReason
3582
+ });
3520
3583
  }
3521
3584
  /** Fail TUI tracking */
3522
3585
  function failTracking(trackingId, error) {
@@ -3685,7 +3748,7 @@ async function executeRequest(opts) {
3685
3748
  try {
3686
3749
  const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
3687
3750
  ctx.queueWaitMs = queueWaitMs;
3688
- if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
3751
+ if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx, payload);
3689
3752
  consola.debug("Streaming response");
3690
3753
  updateTrackerStatus(trackingId, "streaming");
3691
3754
  return streamSSE(c, async (stream) => {
@@ -3712,7 +3775,7 @@ async function logTokenCount(payload, selectedModel) {
3712
3775
  consola.debug("Failed to calculate token count:", error);
3713
3776
  }
3714
3777
  }
3715
- function handleNonStreamingResponse$1(c, originalResponse, ctx) {
3778
+ function handleNonStreamingResponse$1(c, originalResponse, ctx, payload) {
3716
3779
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
3717
3780
  let response = originalResponse;
3718
3781
  if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
@@ -3731,6 +3794,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
3731
3794
  const choice = response.choices[0];
3732
3795
  const usage = response.usage;
3733
3796
  const reasoningTokens = getReasoningTokensFromOpenAIUsage(usage);
3797
+ const durationMs = Date.now() - ctx.startTime;
3734
3798
  recordResponse(ctx.historyId, {
3735
3799
  success: true,
3736
3800
  model: response.model,
@@ -3742,13 +3806,24 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
3742
3806
  stop_reason: choice.finish_reason,
3743
3807
  content: buildResponseContent(choice),
3744
3808
  toolCalls: extractToolCalls(choice)
3745
- }, Date.now() - ctx.startTime);
3809
+ }, durationMs);
3746
3810
  if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
3747
3811
  inputTokens: usage.prompt_tokens,
3748
3812
  outputTokens: usage.completion_tokens,
3749
3813
  queueWaitMs: ctx.queueWaitMs,
3750
3814
  reasoningTokens
3751
3815
  });
3816
+ captureRequest({
3817
+ model: response.model,
3818
+ inputTokens: usage?.prompt_tokens ?? 0,
3819
+ outputTokens: usage?.completion_tokens ?? 0,
3820
+ durationMs,
3821
+ success: true,
3822
+ stream: false,
3823
+ toolCount: payload.tools?.length ?? 0,
3824
+ reasoningTokens,
3825
+ stopReason: choice.finish_reason
3826
+ });
3752
3827
  return c.json(response);
3753
3828
  }
3754
3829
  function buildResponseContent(choice) {
@@ -3815,7 +3890,13 @@ async function handleStreamingResponse$1(opts) {
3815
3890
  await stream.writeSSE(chunk);
3816
3891
  }
3817
3892
  recordStreamSuccess(acc, payload.model, ctx);
3818
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, acc.reasoningTokens);
3893
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, acc.reasoningTokens, {
3894
+ model: acc.model || payload.model,
3895
+ stream: true,
3896
+ durationMs: Date.now() - ctx.startTime,
3897
+ stopReason: acc.finishReason || void 0,
3898
+ toolCount: payload.tools?.length ?? 0
3899
+ });
3819
3900
  } catch (error) {
3820
3901
  recordStreamError({
3821
3902
  acc,
@@ -3955,6 +4036,470 @@ eventLoggingRoutes.post("/batch", (c) => {
3955
4036
  return c.text("OK", 200);
3956
4037
  });
3957
4038
 
4039
+ //#endregion
4040
+ //#region src/routes/gemini/error.ts
4041
+ const STATUS_MAP = {
4042
+ 400: "INVALID_ARGUMENT",
4043
+ 401: "PERMISSION_DENIED",
4044
+ 403: "PERMISSION_DENIED",
4045
+ 404: "NOT_FOUND",
4046
+ 413: "INVALID_ARGUMENT",
4047
+ 429: "RESOURCE_EXHAUSTED",
4048
+ 500: "INTERNAL"
4049
+ };
4050
+ function geminiError(c, code, status, message) {
4051
+ return c.json({ error: {
4052
+ code,
4053
+ message,
4054
+ status
4055
+ } }, code);
4056
+ }
4057
+ function forwardGeminiError(c, error) {
4058
+ if (error instanceof HTTPError) {
4059
+ const status = STATUS_MAP[error.status] ?? "INTERNAL";
4060
+ const code = error.status;
4061
+ let message = error.responseText;
4062
+ try {
4063
+ const parsed = JSON.parse(error.responseText);
4064
+ if (parsed.error?.message) message = parsed.error.message;
4065
+ } catch {}
4066
+ consola.error(`HTTP ${code}:`, message.slice(0, 200));
4067
+ return geminiError(c, code, status, message);
4068
+ }
4069
+ consola.error("Unexpected error:", error);
4070
+ return geminiError(c, 500, "INTERNAL", error instanceof Error ? error.message : "Unknown error");
4071
+ }
4072
+
4073
+ //#endregion
4074
+ //#region src/routes/gemini/gemini-to-openai.ts
4075
+ function translateGeminiToOpenAI(request, model) {
4076
+ const messages = [];
4077
+ if (request.systemInstruction) {
4078
+ const systemText = extractTextFromParts(request.systemInstruction.parts);
4079
+ if (systemText) messages.push({
4080
+ role: "system",
4081
+ content: systemText
4082
+ });
4083
+ }
4084
+ let globalCallIndex = 0;
4085
+ const callIdQueue = /* @__PURE__ */ new Map();
4086
+ if (!Array.isArray(request.contents)) return { payload: {
4087
+ messages: [],
4088
+ model
4089
+ } };
4090
+ for (const content of request.contents) {
4091
+ const translated = translateContent(content, callIdQueue, () => `call_gemini_${globalCallIndex++}`);
4092
+ messages.push(...translated);
4093
+ }
4094
+ const payload = {
4095
+ messages,
4096
+ model
4097
+ };
4098
+ const config = request.generationConfig;
4099
+ if (config) {
4100
+ if (config.temperature !== void 0) payload.temperature = config.temperature;
4101
+ if (config.topP !== void 0) payload.top_p = config.topP;
4102
+ if (config.maxOutputTokens !== void 0) payload.max_tokens = config.maxOutputTokens;
4103
+ if (config.stopSequences !== void 0) payload.stop = config.stopSequences;
4104
+ if (config.responseMimeType === "application/json") payload.response_format = { type: "json_object" };
4105
+ }
4106
+ if (request.tools) {
4107
+ const tools = translateTools(request.tools);
4108
+ if (tools.length > 0) payload.tools = tools;
4109
+ }
4110
+ if (request.toolConfig?.functionCallingConfig?.mode) payload.tool_choice = {
4111
+ AUTO: "auto",
4112
+ ANY: "required",
4113
+ NONE: "none"
4114
+ }[request.toolConfig.functionCallingConfig.mode];
4115
+ return { payload };
4116
+ }
4117
+ function mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId) {
4118
+ return functionCalls.map((fc) => {
4119
+ const id = generateId();
4120
+ pushToQueue(callIdQueue, fc.functionCall.name, id);
4121
+ return {
4122
+ id,
4123
+ type: "function",
4124
+ function: {
4125
+ name: fc.functionCall.name,
4126
+ arguments: JSON.stringify(fc.functionCall.args)
4127
+ }
4128
+ };
4129
+ });
4130
+ }
4131
+ function translateContent(content, callIdQueue, generateId) {
4132
+ const role = content.role === "model" ? "assistant" : "user";
4133
+ const messages = [];
4134
+ const textParts = [];
4135
+ const imageParts = [];
4136
+ const functionCalls = [];
4137
+ const functionResponses = [];
4138
+ for (const part of content.parts) if (isTextPart(part)) {
4139
+ if (!part.thought) textParts.push(part);
4140
+ } else if (isInlineDataPart(part)) imageParts.push(part);
4141
+ else if (isFunctionCallPart(part)) functionCalls.push(part);
4142
+ else if (isFunctionResponsePart(part)) functionResponses.push(part);
4143
+ else if (isFileDataPart(part)) throw new HTTPError("fileData parts are not supported", 400, "fileData parts are not supported");
4144
+ if (imageParts.length > 0) {
4145
+ const contentParts = [];
4146
+ for (const part of content.parts) if (isTextPart(part) && !part.thought) contentParts.push({
4147
+ type: "text",
4148
+ text: part.text
4149
+ });
4150
+ else if (isInlineDataPart(part)) contentParts.push({
4151
+ type: "image_url",
4152
+ image_url: { url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` }
4153
+ });
4154
+ const msg = {
4155
+ role,
4156
+ content: contentParts
4157
+ };
4158
+ if (functionCalls.length > 0 && role === "assistant") msg.tool_calls = mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId);
4159
+ messages.push(msg);
4160
+ } else if (functionCalls.length > 0 && role === "assistant") {
4161
+ const textContent = textParts.length > 0 ? textParts.map((p) => p.text).join("") : null;
4162
+ messages.push({
4163
+ role: "assistant",
4164
+ content: textContent,
4165
+ tool_calls: mapFunctionCallsToToolCalls(functionCalls, callIdQueue, generateId)
4166
+ });
4167
+ } else if (textParts.length > 0) messages.push({
4168
+ role,
4169
+ content: textParts.map((p) => p.text).join("")
4170
+ });
4171
+ let orphanIndex = 0;
4172
+ for (const fr of functionResponses) {
4173
+ const queue = callIdQueue.get(fr.functionResponse.name);
4174
+ const id = queue && queue.length > 0 ? queue.shift() : `call_gemini_orphan_${orphanIndex++}`;
4175
+ messages.push({
4176
+ role: "tool",
4177
+ content: JSON.stringify(fr.functionResponse.response),
4178
+ tool_call_id: id
4179
+ });
4180
+ }
4181
+ return messages;
4182
+ }
4183
+ function translateTools(geminiTools) {
4184
+ const tools = [];
4185
+ for (const tool of geminiTools) if (tool.functionDeclarations) for (const decl of tool.functionDeclarations) tools.push({
4186
+ type: "function",
4187
+ function: {
4188
+ name: decl.name,
4189
+ description: decl.description,
4190
+ parameters: decl.parameters ?? {
4191
+ type: "object",
4192
+ properties: {}
4193
+ }
4194
+ }
4195
+ });
4196
+ return tools;
4197
+ }
4198
+ function pushToQueue(queue, name, id) {
4199
+ const existing = queue.get(name);
4200
+ if (existing) existing.push(id);
4201
+ else queue.set(name, [id]);
4202
+ }
4203
+ function extractTextFromParts(parts) {
4204
+ return parts.filter((p) => "text" in p && (!("thought" in p) || !p.thought)).map((p) => p.text).join("\n");
4205
+ }
4206
+ function isTextPart(part) {
4207
+ return "text" in part;
4208
+ }
4209
+ function isInlineDataPart(part) {
4210
+ return "inlineData" in part;
4211
+ }
4212
+ function isFunctionCallPart(part) {
4213
+ return "functionCall" in part;
4214
+ }
4215
+ function isFunctionResponsePart(part) {
4216
+ return "functionResponse" in part;
4217
+ }
4218
+ function isFileDataPart(part) {
4219
+ return "fileData" in part;
4220
+ }
4221
+
4222
+ //#endregion
4223
+ //#region src/routes/gemini/count-tokens-handler.ts
4224
+ async function handleGeminiCountTokens(c, model) {
4225
+ try {
4226
+ const { payload } = translateGeminiToOpenAI(await c.req.json(), model);
4227
+ const selectedModel = state.models?.data.find((m) => m.id === model);
4228
+ if (!selectedModel) {
4229
+ consola.warn("Model not found for count_tokens, returning estimate");
4230
+ return c.json({ totalTokens: 1 });
4231
+ }
4232
+ const tokenCount = await getTokenCount(payload, selectedModel);
4233
+ const totalTokens = tokenCount.input + tokenCount.output;
4234
+ consola.debug(`Gemini countTokens: ${totalTokens} tokens`);
4235
+ return c.json({ totalTokens });
4236
+ } catch (error) {
4237
+ return forwardGeminiError(c, error);
4238
+ }
4239
+ }
4240
+
4241
+ //#endregion
4242
+ //#region src/routes/gemini/openai-to-gemini.ts
4243
+ function translateOpenAIResponseToGemini(response, model) {
4244
+ const choice = response.choices.at(0);
4245
+ if (!choice) return {
4246
+ candidates: [],
4247
+ usageMetadata: buildUsageMetadata(response.usage),
4248
+ modelVersion: model
4249
+ };
4250
+ const parts = [];
4251
+ if (choice.message.content) parts.push({ text: choice.message.content });
4252
+ if (choice.message.tool_calls) for (const tc of choice.message.tool_calls) {
4253
+ const args = parseArgs(tc.function.arguments);
4254
+ parts.push({ functionCall: {
4255
+ name: tc.function.name,
4256
+ args
4257
+ } });
4258
+ }
4259
+ if (parts.length === 0) parts.push({ text: "" });
4260
+ return {
4261
+ candidates: [{
4262
+ content: {
4263
+ role: "model",
4264
+ parts
4265
+ },
4266
+ finishReason: mapFinishReason(choice.finish_reason),
4267
+ index: 0
4268
+ }],
4269
+ usageMetadata: buildUsageMetadata(response.usage),
4270
+ modelVersion: model
4271
+ };
4272
+ }
4273
+ function createGeminiStreamState() {
4274
+ return {
4275
+ toolCalls: /* @__PURE__ */ new Map(),
4276
+ usage: {
4277
+ promptTokens: 0,
4278
+ completionTokens: 0,
4279
+ totalTokens: 0
4280
+ },
4281
+ model: "",
4282
+ finishReason: ""
4283
+ };
4284
+ }
4285
+ function translateOpenAIChunkToGemini(chunk, state) {
4286
+ const results = [];
4287
+ if (!state.model && chunk.model) state.model = chunk.model;
4288
+ if (chunk.usage) {
4289
+ state.usage.promptTokens = chunk.usage.prompt_tokens;
4290
+ state.usage.completionTokens = chunk.usage.completion_tokens;
4291
+ state.usage.totalTokens = chunk.usage.total_tokens;
4292
+ }
4293
+ const choice = chunk.choices.at(0);
4294
+ if (!choice) return results;
4295
+ const delta = choice.delta;
4296
+ if (delta.tool_calls) for (const tc of delta.tool_calls) {
4297
+ const existing = state.toolCalls.get(tc.index);
4298
+ if (existing) {
4299
+ if (tc.function?.arguments) existing.args += tc.function.arguments;
4300
+ } else {
4301
+ const flushed = flushToolCalls(state, tc.index);
4302
+ if (flushed) results.push(flushed);
4303
+ state.toolCalls.set(tc.index, {
4304
+ name: tc.function?.name ?? "",
4305
+ args: tc.function?.arguments ?? ""
4306
+ });
4307
+ }
4308
+ }
4309
+ if (delta.content) results.push(buildGeminiChunk([{ text: delta.content }], choice.finish_reason, state));
4310
+ if (choice.finish_reason) {
4311
+ state.finishReason = choice.finish_reason;
4312
+ const flushed = flushToolCalls(state);
4313
+ if (flushed) results.push(flushed);
4314
+ if (!delta.content) results.push(buildGeminiChunk([], choice.finish_reason, state));
4315
+ }
4316
+ return results;
4317
+ }
4318
+ function flushToolCalls(state, belowIndex) {
4319
+ if (state.toolCalls.size === 0) return null;
4320
+ const parts = [];
4321
+ for (const [idx, tc] of state.toolCalls) {
4322
+ if (belowIndex !== void 0 && idx >= belowIndex) continue;
4323
+ const args = parseArgs(tc.args);
4324
+ parts.push({ functionCall: {
4325
+ name: tc.name,
4326
+ args
4327
+ } });
4328
+ state.toolCalls.delete(idx);
4329
+ }
4330
+ if (parts.length === 0) return null;
4331
+ return buildGeminiChunk(parts, null, state);
4332
+ }
4333
+ function buildGeminiChunk(parts, finishReason, state) {
4334
+ const candidate = {
4335
+ content: {
4336
+ role: "model",
4337
+ parts: parts.length > 0 ? parts : [{ text: "" }]
4338
+ },
4339
+ index: 0
4340
+ };
4341
+ if (finishReason) candidate.finishReason = mapFinishReason(finishReason);
4342
+ return {
4343
+ candidates: [candidate],
4344
+ usageMetadata: {
4345
+ promptTokenCount: state.usage.promptTokens,
4346
+ candidatesTokenCount: state.usage.completionTokens,
4347
+ totalTokenCount: state.usage.totalTokens
4348
+ },
4349
+ modelVersion: state.model
4350
+ };
4351
+ }
4352
+ function parseArgs(raw) {
4353
+ try {
4354
+ return JSON.parse(raw);
4355
+ } catch {
4356
+ return { raw };
4357
+ }
4358
+ }
4359
+ function mapFinishReason(reason) {
4360
+ switch (reason) {
4361
+ case "stop":
4362
+ case "tool_calls": return "STOP";
4363
+ case "length": return "MAX_TOKENS";
4364
+ case "content_filter": return "SAFETY";
4365
+ default: return "OTHER";
4366
+ }
4367
+ }
4368
+ function buildUsageMetadata(usage) {
4369
+ return {
4370
+ promptTokenCount: usage?.prompt_tokens ?? 0,
4371
+ candidatesTokenCount: usage?.completion_tokens ?? 0,
4372
+ totalTokenCount: usage?.total_tokens ?? 0
4373
+ };
4374
+ }
4375
+
4376
+ //#endregion
4377
+ //#region src/routes/gemini/handler.ts
4378
+ async function handleGeminiGenerate(c, model, isStream) {
4379
+ try {
4380
+ const geminiRequest = await c.req.json();
4381
+ consola.debug("Gemini request for model:", model, "stream:", isStream);
4382
+ const trackingId = c.get("trackingId");
4383
+ const startTime = Date.now();
4384
+ updateTrackerModel(trackingId, model);
4385
+ const { payload } = translateGeminiToOpenAI(geminiRequest, model);
4386
+ payload.stream = isStream;
4387
+ const selectedModel = state.models?.data.find((m) => m.id === model);
4388
+ if (isNullish(payload.max_tokens) && selectedModel) payload.max_tokens = selectedModel.capabilities?.limits?.max_output_tokens;
4389
+ const ctx = {
4390
+ historyId: recordRequest("gemini", {
4391
+ model,
4392
+ messages: payload.messages.map((m) => ({
4393
+ role: m.role,
4394
+ content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
4395
+ tool_calls: m.tool_calls,
4396
+ tool_call_id: m.tool_call_id
4397
+ })),
4398
+ stream: isStream,
4399
+ max_tokens: payload.max_tokens ?? void 0,
4400
+ temperature: payload.temperature ?? void 0
4401
+ }),
4402
+ trackingId,
4403
+ startTime
4404
+ };
4405
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
4406
+ ctx.queueWaitMs = queueWaitMs;
4407
+ if (isNonStreaming(response)) return handleNonStreamResponse(c, response, model, ctx, payload);
4408
+ consola.debug("Streaming Gemini response");
4409
+ updateTrackerStatus(trackingId, "streaming");
4410
+ return stream(c, async (s) => {
4411
+ c.header("Content-Type", "text/event-stream");
4412
+ c.header("Cache-Control", "no-cache");
4413
+ c.header("Connection", "keep-alive");
4414
+ const streamState = createGeminiStreamState();
4415
+ try {
4416
+ for await (const rawEvent of response) {
4417
+ if (rawEvent.data === "[DONE]") break;
4418
+ let chunk;
4419
+ try {
4420
+ chunk = JSON.parse(rawEvent.data);
4421
+ } catch (parseError) {
4422
+ consola.debug("Failed to parse stream chunk:", parseError);
4423
+ continue;
4424
+ }
4425
+ const geminiChunks = translateOpenAIChunkToGemini(chunk, streamState);
4426
+ for (const gc of geminiChunks) await s.write(`data: ${JSON.stringify(gc)}\n\n`);
4427
+ }
4428
+ recordResponse(ctx.historyId, {
4429
+ success: true,
4430
+ model: streamState.model || model,
4431
+ usage: {
4432
+ input_tokens: streamState.usage.promptTokens,
4433
+ output_tokens: streamState.usage.completionTokens
4434
+ },
4435
+ content: null
4436
+ }, Date.now() - ctx.startTime);
4437
+ completeTracking(ctx.trackingId, streamState.usage.promptTokens, streamState.usage.completionTokens, ctx.queueWaitMs, void 0, {
4438
+ model: streamState.model || model,
4439
+ stream: true,
4440
+ durationMs: Date.now() - ctx.startTime,
4441
+ stopReason: streamState.finishReason || void 0,
4442
+ toolCount: payload.tools?.length ?? 0
4443
+ });
4444
+ } catch (error) {
4445
+ recordStreamError({
4446
+ acc: { model: streamState.model || model },
4447
+ fallbackModel: model,
4448
+ ctx,
4449
+ error
4450
+ });
4451
+ failTracking(ctx.trackingId, error);
4452
+ }
4453
+ });
4454
+ } catch (error) {
4455
+ const trackingId = c.get("trackingId");
4456
+ if (trackingId) failTracking(trackingId, error);
4457
+ return forwardGeminiError(c, error);
4458
+ }
4459
+ }
4460
+ function handleNonStreamResponse(c, response, model, ctx, payload) {
4461
+ const geminiResponse = translateOpenAIResponseToGemini(response, model);
4462
+ const usage = response.usage;
4463
+ recordResponse(ctx.historyId, {
4464
+ success: true,
4465
+ model: response.model || model,
4466
+ usage: {
4467
+ input_tokens: usage?.prompt_tokens ?? 0,
4468
+ output_tokens: usage?.completion_tokens ?? 0
4469
+ },
4470
+ stop_reason: response.choices[0]?.finish_reason,
4471
+ content: response.choices[0] ? {
4472
+ role: "assistant",
4473
+ content: response.choices[0].message.content ?? ""
4474
+ } : null
4475
+ }, Date.now() - ctx.startTime);
4476
+ completeTracking(ctx.trackingId, usage?.prompt_tokens ?? 0, usage?.completion_tokens ?? 0, ctx.queueWaitMs, void 0, {
4477
+ model: response.model || model,
4478
+ stream: false,
4479
+ durationMs: Date.now() - ctx.startTime,
4480
+ stopReason: response.choices[0]?.finish_reason,
4481
+ toolCount: payload.tools?.length ?? 0
4482
+ });
4483
+ return c.json(geminiResponse);
4484
+ }
4485
+
4486
+ //#endregion
4487
+ //#region src/routes/gemini/route.ts
4488
+ const geminiRoutes = new Hono();
4489
+ geminiRoutes.post("/:modelAction", async (c) => {
4490
+ const modelAction = c.req.param("modelAction");
4491
+ const colonIndex = modelAction.lastIndexOf(":");
4492
+ if (colonIndex === -1) return geminiError(c, 400, "INVALID_ARGUMENT", "Missing action in URL");
4493
+ const model = modelAction.slice(0, Math.max(0, colonIndex));
4494
+ const action = modelAction.slice(Math.max(0, colonIndex + 1));
4495
+ switch (action) {
4496
+ case "generateContent": return handleGeminiGenerate(c, model, false);
4497
+ case "streamGenerateContent": return handleGeminiGenerate(c, model, true);
4498
+ case "countTokens": return handleGeminiCountTokens(c, model);
4499
+ default: return geminiError(c, 400, "INVALID_ARGUMENT", `Unknown action: ${action}`);
4500
+ }
4501
+ });
4502
+
3958
4503
  //#endregion
3959
4504
  //#region src/routes/history/api.ts
3960
4505
  function handleGetEntries(c) {
@@ -6849,7 +7394,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx, initiat
6849
7394
  });
6850
7395
  });
6851
7396
  }
6852
- return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
7397
+ return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult, effectivePayload);
6853
7398
  } catch (error) {
6854
7399
  if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
6855
7400
  recordErrorResponse(ctx, anthropicPayload.model, error);
@@ -6874,7 +7419,7 @@ function logPayloadSizeInfoAnthropic(payload, model) {
6874
7419
  /**
6875
7420
  * Handle non-streaming direct Anthropic response
6876
7421
  */
6877
- function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
7422
+ function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult, payload) {
6878
7423
  consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
6879
7424
  recordResponse(ctx.historyId, {
6880
7425
  success: true,
@@ -6910,6 +7455,16 @@ function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateRes
6910
7455
  outputTokens: response.usage.output_tokens,
6911
7456
  queueWaitMs: ctx.queueWaitMs
6912
7457
  });
7458
+ captureRequest({
7459
+ model: response.model,
7460
+ inputTokens: response.usage.input_tokens,
7461
+ outputTokens: response.usage.output_tokens,
7462
+ durationMs: Date.now() - ctx.startTime,
7463
+ success: true,
7464
+ stream: false,
7465
+ toolCount: payload.tools?.length ?? 0,
7466
+ stopReason: response.stop_reason ?? void 0
7467
+ });
6913
7468
  let finalResponse = response;
6914
7469
  if (state.verbose && truncateResult?.wasCompacted) finalResponse = prependMarkerToAnthropicResponse$1(response, createTruncationMarker$1(truncateResult));
6915
7470
  return c.json(finalResponse);
@@ -6963,7 +7518,13 @@ async function handleDirectAnthropicStreamingResponse(opts) {
6963
7518
  });
6964
7519
  }
6965
7520
  recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
6966
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
7521
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, void 0, {
7522
+ model: acc.model || anthropicPayload.model,
7523
+ stream: true,
7524
+ durationMs: Date.now() - ctx.startTime,
7525
+ stopReason: acc.stopReason || void 0,
7526
+ toolCount: anthropicPayload.tools?.length ?? 0
7527
+ });
6967
7528
  } catch (error) {
6968
7529
  consola.error("Direct Anthropic stream error:", error);
6969
7530
  recordStreamError({
@@ -7046,7 +7607,8 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx, initiatorOve
7046
7607
  c,
7047
7608
  response,
7048
7609
  toolNameMapping,
7049
- ctx
7610
+ ctx,
7611
+ anthropicPayload
7050
7612
  });
7051
7613
  consola.debug("Streaming response from Copilot");
7052
7614
  updateTrackerStatus(ctx.trackingId, "streaming");
@@ -7066,7 +7628,7 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx, initiatorOve
7066
7628
  }
7067
7629
  }
7068
7630
  function handleNonStreamingResponse(opts) {
7069
- const { c, response, toolNameMapping, ctx } = opts;
7631
+ const { c, response, toolNameMapping, ctx, anthropicPayload } = opts;
7070
7632
  consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
7071
7633
  let anthropicResponse = translateToAnthropic(response, toolNameMapping);
7072
7634
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
@@ -7102,6 +7664,16 @@ function handleNonStreamingResponse(opts) {
7102
7664
  outputTokens: anthropicResponse.usage.output_tokens,
7103
7665
  queueWaitMs: ctx.queueWaitMs
7104
7666
  });
7667
+ captureRequest({
7668
+ model: anthropicResponse.model,
7669
+ inputTokens: anthropicResponse.usage.input_tokens,
7670
+ outputTokens: anthropicResponse.usage.output_tokens,
7671
+ durationMs: Date.now() - ctx.startTime,
7672
+ success: true,
7673
+ stream: false,
7674
+ toolCount: anthropicPayload.tools?.length ?? 0,
7675
+ stopReason: anthropicResponse.stop_reason ?? void 0
7676
+ });
7105
7677
  return c.json(anthropicResponse);
7106
7678
  }
7107
7679
  function prependMarkerToAnthropicResponse(response, marker) {
@@ -7147,7 +7719,13 @@ async function handleStreamingResponse(opts) {
7147
7719
  checkRepetition
7148
7720
  });
7149
7721
  recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
7150
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
7722
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, void 0, {
7723
+ model: acc.model || anthropicPayload.model,
7724
+ stream: true,
7725
+ durationMs: Date.now() - ctx.startTime,
7726
+ stopReason: acc.stopReason || void 0,
7727
+ toolCount: anthropicPayload.tools?.length ?? 0
7728
+ });
7151
7729
  } catch (error) {
7152
7730
  consola.error("Stream error:", error);
7153
7731
  recordStreamError({
@@ -7670,7 +8248,12 @@ const handleResponses = async (c) => {
7670
8248
  if (finalResult) {
7671
8249
  recordResponseResult(finalResult, model, historyId, startTime);
7672
8250
  const usage = finalResult.usage;
7673
- completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
8251
+ completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs, usage?.output_tokens_details?.reasoning_tokens, {
8252
+ model: finalResult.model || model,
8253
+ stream: true,
8254
+ durationMs: Date.now() - startTime,
8255
+ toolCount: tools.length
8256
+ });
7674
8257
  } else if (streamErrorMessage) {
7675
8258
  recordResponse(historyId, {
7676
8259
  success: false,
@@ -7699,7 +8282,12 @@ const handleResponses = async (c) => {
7699
8282
  const result = response;
7700
8283
  const usage = result.usage;
7701
8284
  recordResponseResult(result, model, historyId, startTime);
7702
- completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
8285
+ completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs, usage?.output_tokens_details?.reasoning_tokens, {
8286
+ model: result.model || model,
8287
+ stream: false,
8288
+ durationMs: Date.now() - startTime,
8289
+ toolCount: tools.length
8290
+ });
7703
8291
  consola.debug("Forwarding native Responses result:", JSON.stringify(result).slice(-400));
7704
8292
  return c.json(result);
7705
8293
  } catch (error) {
@@ -7822,6 +8410,7 @@ server.route("/v1/messages", messageRoutes);
7822
8410
  server.route("/api/event_logging", eventLoggingRoutes);
7823
8411
  server.route("/v1/responses", responsesRoutes);
7824
8412
  server.route("/responses", responsesRoutes);
8413
+ server.route("/v1beta/models", geminiRoutes);
7825
8414
  server.route("/history", historyRoutes);
7826
8415
 
7827
8416
  //#endregion
@@ -7890,6 +8479,10 @@ async function runServer(options) {
7890
8479
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
7891
8480
  consola.info(`History recording enabled (${limitText} entries)`);
7892
8481
  }
8482
+ if (options.posthogKey) {
8483
+ initPostHog(options.posthogKey);
8484
+ if (isPostHogEnabled()) consola.info("PostHog analytics enabled");
8485
+ }
7893
8486
  initTui({ enabled: true });
7894
8487
  initRequestContextManager(state.staleRequestMaxAge).startReaper();
7895
8488
  await ensurePaths();
@@ -8064,6 +8657,10 @@ const start = defineCommand({
8064
8657
  type: "string",
8065
8658
  default: "+8",
8066
8659
  description: "Timezone offset in hours from UTC for log timestamps (e.g., +8, -5, 0)"
8660
+ },
8661
+ "posthog-key": {
8662
+ type: "string",
8663
+ description: "PostHog API key for token usage analytics (opt-in, no key = disabled)"
8067
8664
  }
8068
8665
  },
8069
8666
  run({ args }) {
@@ -8088,7 +8685,8 @@ const start = defineCommand({
8088
8685
  compressToolResults: args["compress-tool-results"],
8089
8686
  redirectAnthropic: args["redirect-anthropic"],
8090
8687
  rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"],
8091
- timezoneOffset: parseTimezoneOffset(args["timezone-offset"])
8688
+ timezoneOffset: parseTimezoneOffset(args["timezone-offset"]),
8689
+ posthogKey: args["posthog-key"]
8092
8690
  });
8093
8691
  }
8094
8692
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dianshuv/copilot-api",
3
- "version": "0.5.0",
3
+ "version": "0.6.1",
4
4
  "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
5
5
  "author": "dianshuv",
6
6
  "type": "module",
@@ -41,6 +41,7 @@
41
41
  "gpt-tokenizer": "^3.4.0",
42
42
  "hono": "^4.11.7",
43
43
  "picocolors": "^1.1.1",
44
+ "posthog-node": "^5.28.6",
44
45
  "proxy-from-env": "^1.1.0",
45
46
  "srvx": "^0.10.1",
46
47
  "tiny-invariant": "^1.3.3",