@dianshuv/copilot-api 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +10 -1
  2. package/dist/main.mjs +145 -83
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -61,7 +61,7 @@ copilot-api start
61
61
  | `--proxy-env` | Use proxy from environment | false |
62
62
  | `--no-history` | Disable request history UI at `/history` | false |
63
63
  | `--history-limit` | Max history entries in memory | 1000 |
64
- | `--no-auto-truncate` | Disable auto-truncate when exceeding limits | false |
64
+ | `--no-auto-truncate` | Disable auto-truncate when exceeding token limits | false |
65
65
  | `--compress-tool-results` | Compress old tool results before truncating | false |
66
66
  | `--redirect-anthropic` | Force Anthropic through OpenAI translation | false |
67
67
  | `--no-rewrite-anthropic-tools` | Don't rewrite server-side tools | false |
@@ -106,6 +106,15 @@ copilot-api start
106
106
  | `/history` | GET | Request history Web UI with token analytics (enabled by default) |
107
107
  | `/history/api/*` | GET/DELETE | History API endpoints |
108
108
 
109
+ ## Auto-Truncate
110
+
111
+ When enabled (default), auto-truncate automatically compacts conversation history when it exceeds the model's token limit. This prevents request failures due to context overflow.
112
+
113
+ - **Token-based truncation**: Uses the model's `max_context_window_tokens` from the Copilot API to determine when truncation is needed. A 2% safety margin is applied.
114
+ - **No preset byte limit**: There is no hardcoded request body size limit. If the Copilot API returns a 413 (Request Entity Too Large), the proxy dynamically learns the byte limit and applies it to subsequent requests.
115
+ - **Smart compression**: With `--compress-tool-results`, old tool results are compressed before removing messages, preserving more conversation context.
116
+ - **Orphan filtering**: After truncation, orphaned tool results (without matching tool calls) are automatically removed.
117
+
109
118
  ## Using with Claude Code
110
119
 
111
120
  Create `.claude/settings.json` in your project:
package/dist/main.mjs CHANGED
@@ -68,7 +68,14 @@ const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
68
68
  const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
69
69
  const API_VERSION = "2025-04-01";
70
70
  const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
71
- const copilotHeaders = (state, vision = false) => {
71
+ function hasHeaderKey(headers, key) {
72
+ const lowerKey = key.toLowerCase();
73
+ return Object.keys(headers).some((existingKey) => {
74
+ return existingKey.toLowerCase() === lowerKey;
75
+ });
76
+ }
77
+ function copilotHeaders(state, visionOrOptions) {
78
+ const options = typeof visionOrOptions === "boolean" ? { vision: visionOrOptions } : visionOrOptions ?? {};
72
79
  const headers = {
73
80
  Authorization: `Bearer ${state.copilotToken}`,
74
81
  "content-type": standardHeaders()["content-type"],
@@ -76,14 +83,15 @@ const copilotHeaders = (state, vision = false) => {
76
83
  "editor-version": `vscode/${state.vsCodeVersion}`,
77
84
  "editor-plugin-version": EDITOR_PLUGIN_VERSION,
78
85
  "user-agent": USER_AGENT,
79
- "openai-intent": "conversation-panel",
86
+ "openai-intent": options.intent ?? "conversation-panel",
80
87
  "x-github-api-version": API_VERSION,
81
88
  "x-request-id": randomUUID(),
82
89
  "x-vscode-user-agent-library-version": "electron-fetch"
83
90
  };
84
- if (vision) headers["copilot-vision-request"] = "true";
91
+ for (const [key, value] of Object.entries(options.modelRequestHeaders ?? {})) if (!hasHeaderKey(headers, key)) headers[key] = value;
92
+ if (options.vision) headers["copilot-vision-request"] = "true";
85
93
  return headers;
86
- };
94
+ }
87
95
  const GITHUB_API_BASE_URL = "https://api.github.com";
88
96
  const githubHeaders = (state) => ({
89
97
  ...standardHeaders(),
@@ -106,7 +114,7 @@ const GITHUB_APP_SCOPES = ["read:user"].join(" ");
106
114
  */
107
115
  const DEFAULT_AUTO_TRUNCATE_CONFIG = {
108
116
  safetyMarginPercent: 2,
109
- maxRequestBodyBytes: 510 * 1024,
117
+ maxRequestBodyBytes: Infinity,
110
118
  preserveRecentPercent: .7
111
119
  };
112
120
  /** Dynamic byte limit that adjusts based on 413 errors */
@@ -208,6 +216,10 @@ function formatRateLimitError(copilotMessage) {
208
216
  }
209
217
  };
210
218
  }
219
+ function truncateForLog(text, maxLen) {
220
+ if (text.length <= maxLen) return text;
221
+ return `${text.slice(0, maxLen)}...`;
222
+ }
211
223
  function forwardError(c, error) {
212
224
  if (error instanceof HTTPError) {
213
225
  if (error.status === 413) {
@@ -246,7 +258,9 @@ function forwardError(c, error) {
246
258
  consola.warn(`HTTP 429: Rate limit exceeded`);
247
259
  return c.json(formattedError, 429);
248
260
  }
249
- consola.error(`HTTP ${error.status}:`, errorJson);
261
+ let loggedError = errorJson;
262
+ if (typeof errorJson === "string") loggedError = errorJson.trimStart().startsWith("<") ? `[HTML ${errorJson.length} bytes]` : truncateForLog(errorJson, 200);
263
+ consola.error(`HTTP ${error.status}:`, loggedError);
250
264
  return c.json({ error: {
251
265
  message: error.responseText,
252
266
  type: "error"
@@ -1021,7 +1035,7 @@ const patchClaude = defineCommand({
1021
1035
 
1022
1036
  //#endregion
1023
1037
  //#region package.json
1024
- var version = "0.4.2";
1038
+ var version = "0.5.0";
1025
1039
 
1026
1040
  //#endregion
1027
1041
  //#region src/lib/adaptive-rate-limiter.ts
@@ -2465,8 +2479,10 @@ var RequestTracker = class {
2465
2479
  if (update.durationMs !== void 0) request.durationMs = update.durationMs;
2466
2480
  if (update.inputTokens !== void 0) request.inputTokens = update.inputTokens;
2467
2481
  if (update.outputTokens !== void 0) request.outputTokens = update.outputTokens;
2482
+ if (update.reasoningTokens !== void 0) request.reasoningTokens = update.reasoningTokens;
2468
2483
  if (update.error !== void 0) request.error = update.error;
2469
2484
  if (update.queuePosition !== void 0) request.queuePosition = update.queuePosition;
2485
+ if (update.queueWaitMs !== void 0) request.queueWaitMs = update.queueWaitMs;
2470
2486
  this.renderer?.onRequestUpdate(id, update);
2471
2487
  }
2472
2488
  /**
@@ -2481,6 +2497,7 @@ var RequestTracker = class {
2481
2497
  if (usage) {
2482
2498
  request.inputTokens = usage.inputTokens;
2483
2499
  request.outputTokens = usage.outputTokens;
2500
+ if (usage.reasoningTokens !== void 0) request.reasoningTokens = usage.reasoningTokens;
2484
2501
  }
2485
2502
  this.renderer?.onRequestComplete(request);
2486
2503
  this.requests.delete(id);
@@ -3281,6 +3298,26 @@ function createTruncationResponseMarkerOpenAI(result) {
3281
3298
  return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
3282
3299
  }
3283
3300
 
3301
+ //#endregion
3302
+ //#region src/lib/message-sanitizer.ts
3303
+ const startPattern = /^\s*<system-reminder>[\s\S]*?<\/system-reminder>\n*/;
3304
+ const endPatternWithNewline = /\n+<system-reminder>[\s\S]*?<\/system-reminder>\s*$/;
3305
+ const endPatternOnly = /^\s*<system-reminder>[\s\S]*?<\/system-reminder>\s*$/;
3306
+ function removeSystemReminderTags(text) {
3307
+ let result = text;
3308
+ let prev;
3309
+ do {
3310
+ prev = result;
3311
+ result = result.replace(startPattern, "");
3312
+ } while (result !== prev);
3313
+ do {
3314
+ prev = result;
3315
+ result = result.replace(endPatternWithNewline, "");
3316
+ } while (result !== prev);
3317
+ result = result.replace(endPatternOnly, "");
3318
+ return result;
3319
+ }
3320
+
3284
3321
  //#endregion
3285
3322
  //#region src/lib/repetition-detector.ts
3286
3323
  /**
@@ -3409,7 +3446,10 @@ const createChatCompletions = async (payload, options) => {
3409
3446
  const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
3410
3447
  const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
3411
3448
  const headers = {
3412
- ...copilotHeaders(state, enableVision),
3449
+ ...copilotHeaders(state, {
3450
+ vision: enableVision,
3451
+ intent: isAgentCall ? "conversation-agent" : "conversation-panel"
3452
+ }),
3413
3453
  "X-Initiator": options?.initiator ?? (isAgentCall ? "agent" : "user")
3414
3454
  };
3415
3455
  const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
@@ -3464,16 +3504,18 @@ function recordErrorResponse(ctx, model, error) {
3464
3504
  }, Date.now() - ctx.startTime);
3465
3505
  }
3466
3506
  /** Complete TUI tracking */
3467
- function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
3507
+ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs, reasoningTokens) {
3468
3508
  if (!trackingId) return;
3469
3509
  requestTracker.updateRequest(trackingId, {
3470
3510
  inputTokens,
3471
3511
  outputTokens,
3472
- queueWaitMs
3512
+ queueWaitMs,
3513
+ reasoningTokens
3473
3514
  });
3474
3515
  requestTracker.completeRequest(trackingId, 200, {
3475
3516
  inputTokens,
3476
- outputTokens
3517
+ outputTokens,
3518
+ reasoningTokens
3477
3519
  });
3478
3520
  }
3479
3521
  /** Fail TUI tracking */
@@ -3521,7 +3563,7 @@ async function buildFinalPayload(payload, model) {
3521
3563
  }
3522
3564
  try {
3523
3565
  const check = await checkNeedsCompactionOpenAI(payload, model);
3524
- consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
3566
+ consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${check.byteLimit === Infinity ? "unlimited" : `${Math.round(check.byteLimit / 1024)}KB`}), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
3525
3567
  if (!check.needed) return {
3526
3568
  finalPayload: payload,
3527
3569
  truncateResult: null
@@ -3593,6 +3635,9 @@ async function logPayloadSizeInfo(payload, model) {
3593
3635
 
3594
3636
  //#endregion
3595
3637
  //#region src/routes/chat-completions/handler.ts
3638
+ function getReasoningTokensFromOpenAIUsage(usage) {
3639
+ return usage?.completion_tokens_details?.reasoning_tokens;
3640
+ }
3596
3641
  async function handleCompletion$1(c) {
3597
3642
  const originalPayload = await c.req.json();
3598
3643
  consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
@@ -3685,12 +3730,14 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
3685
3730
  }
3686
3731
  const choice = response.choices[0];
3687
3732
  const usage = response.usage;
3733
+ const reasoningTokens = getReasoningTokensFromOpenAIUsage(usage);
3688
3734
  recordResponse(ctx.historyId, {
3689
3735
  success: true,
3690
3736
  model: response.model,
3691
3737
  usage: {
3692
3738
  input_tokens: usage?.prompt_tokens ?? 0,
3693
- output_tokens: usage?.completion_tokens ?? 0
3739
+ output_tokens: usage?.completion_tokens ?? 0,
3740
+ ...reasoningTokens !== void 0 ? { output_tokens_details: { reasoning_tokens: reasoningTokens } } : {}
3694
3741
  },
3695
3742
  stop_reason: choice.finish_reason,
3696
3743
  content: buildResponseContent(choice),
@@ -3699,7 +3746,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
3699
3746
  if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
3700
3747
  inputTokens: usage.prompt_tokens,
3701
3748
  outputTokens: usage.completion_tokens,
3702
- queueWaitMs: ctx.queueWaitMs
3749
+ queueWaitMs: ctx.queueWaitMs,
3750
+ reasoningTokens
3703
3751
  });
3704
3752
  return c.json(response);
3705
3753
  }
@@ -3729,6 +3777,7 @@ function createStreamAccumulator() {
3729
3777
  model: "",
3730
3778
  inputTokens: 0,
3731
3779
  outputTokens: 0,
3780
+ reasoningTokens: 0,
3732
3781
  finishReason: "",
3733
3782
  content: "",
3734
3783
  toolCalls: [],
@@ -3766,7 +3815,7 @@ async function handleStreamingResponse$1(opts) {
3766
3815
  await stream.writeSSE(chunk);
3767
3816
  }
3768
3817
  recordStreamSuccess(acc, payload.model, ctx);
3769
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
3818
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs, acc.reasoningTokens);
3770
3819
  } catch (error) {
3771
3820
  recordStreamError({
3772
3821
  acc,
@@ -3786,6 +3835,7 @@ function parseStreamChunk(chunk, acc, checkRepetition) {
3786
3835
  if (parsed.usage) {
3787
3836
  acc.inputTokens = parsed.usage.prompt_tokens;
3788
3837
  acc.outputTokens = parsed.usage.completion_tokens;
3838
+ acc.reasoningTokens = getReasoningTokensFromOpenAIUsage(parsed.usage) ?? 0;
3789
3839
  }
3790
3840
  const choice = parsed.choices[0];
3791
3841
  if (choice) {
@@ -3826,7 +3876,8 @@ function recordStreamSuccess(acc, fallbackModel, ctx) {
3826
3876
  model: acc.model || fallbackModel,
3827
3877
  usage: {
3828
3878
  input_tokens: acc.inputTokens,
3829
- output_tokens: acc.outputTokens
3879
+ output_tokens: acc.outputTokens,
3880
+ ...acc.reasoningTokens > 0 ? { output_tokens_details: { reasoning_tokens: acc.reasoningTokens } } : {}
3830
3881
  },
3831
3882
  stop_reason: acc.finishReason || void 0,
3832
3883
  content: {
@@ -3845,7 +3896,7 @@ function convertOpenAIMessages(messages) {
3845
3896
  return messages.map((msg) => {
3846
3897
  const result = {
3847
3898
  role: msg.role,
3848
- content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content)
3899
+ content: typeof msg.content === "string" ? removeSystemReminderTags(msg.content) : JSON.stringify(msg.content)
3849
3900
  };
3850
3901
  if ("tool_calls" in msg && msg.tool_calls) result.tool_calls = msg.tool_calls.map((tc) => ({
3851
3902
  id: tc.id,
@@ -6006,7 +6057,10 @@ async function createAnthropicMessages(payload, options) {
6006
6057
  });
6007
6058
  const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
6008
6059
  const headers = {
6009
- ...copilotHeaders(state, enableVision),
6060
+ ...copilotHeaders(state, {
6061
+ vision: enableVision,
6062
+ intent: isAgentCall ? "conversation-agent" : "conversation-panel"
6063
+ }),
6010
6064
  "X-Initiator": options?.initiator ?? (isAgentCall ? "agent" : "user"),
6011
6065
  "anthropic-version": "2023-06-01"
6012
6066
  };
@@ -6141,12 +6195,12 @@ function convertAnthropicMessages(messages) {
6141
6195
  return messages.map((msg) => {
6142
6196
  if (typeof msg.content === "string") return {
6143
6197
  role: msg.role,
6144
- content: msg.content
6198
+ content: removeSystemReminderTags(msg.content)
6145
6199
  };
6146
6200
  const content = msg.content.map((block) => {
6147
6201
  if (block.type === "text") return {
6148
6202
  type: "text",
6149
- text: block.text
6203
+ text: removeSystemReminderTags(block.text)
6150
6204
  };
6151
6205
  if (block.type === "tool_use") return {
6152
6206
  type: "tool_use",
@@ -6213,9 +6267,13 @@ function createAnthropicStreamAccumulator() {
6213
6267
  stopReason: "",
6214
6268
  content: "",
6215
6269
  toolCalls: [],
6270
+ serverToolResults: [],
6216
6271
  currentToolCall: null
6217
6272
  };
6218
6273
  }
6274
+ function isServerToolResultType(type) {
6275
+ return type !== "tool_result" && type.endsWith("_tool_result");
6276
+ }
6219
6277
  function processAnthropicEvent(event, acc) {
6220
6278
  switch (event.type) {
6221
6279
  case "content_block_delta":
@@ -6238,11 +6296,14 @@ function handleContentBlockDelta(delta, acc) {
6238
6296
  else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
6239
6297
  }
6240
6298
  function handleContentBlockStart(block, acc) {
6241
- if (block.type === "tool_use") acc.currentToolCall = {
6242
- id: block.id,
6243
- name: block.name,
6244
- input: ""
6245
- };
6299
+ if (block.type === "tool_use") {
6300
+ const toolBlock = block;
6301
+ acc.currentToolCall = {
6302
+ id: toolBlock.id,
6303
+ name: toolBlock.name,
6304
+ input: ""
6305
+ };
6306
+ } else if (isServerToolResultType(block.type)) acc.serverToolResults.push(block);
6246
6307
  }
6247
6308
  function handleContentBlockStop(acc) {
6248
6309
  if (acc.currentToolCall) {
@@ -6257,6 +6318,32 @@ function handleMessageDelta(delta, usage, acc) {
6257
6318
  acc.outputTokens = usage.output_tokens;
6258
6319
  }
6259
6320
  }
6321
+ function recordAnthropicStreamingResponse(acc, fallbackModel, ctx) {
6322
+ const contentBlocks = [];
6323
+ if (acc.content) contentBlocks.push({
6324
+ type: "text",
6325
+ text: acc.content
6326
+ });
6327
+ for (const tc of acc.toolCalls) contentBlocks.push({
6328
+ type: "tool_use",
6329
+ ...tc
6330
+ });
6331
+ for (const result of acc.serverToolResults) contentBlocks.push(result);
6332
+ recordResponse(ctx.historyId, {
6333
+ success: true,
6334
+ model: acc.model || fallbackModel,
6335
+ usage: {
6336
+ input_tokens: acc.inputTokens,
6337
+ output_tokens: acc.outputTokens
6338
+ },
6339
+ stop_reason: acc.stopReason || void 0,
6340
+ content: contentBlocks.length > 0 ? {
6341
+ role: "assistant",
6342
+ content: contentBlocks
6343
+ } : null,
6344
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
6345
+ }, Date.now() - ctx.startTime);
6346
+ }
6260
6347
 
6261
6348
  //#endregion
6262
6349
  //#region src/routes/messages/non-stream-translation.ts
@@ -6738,7 +6825,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx, initiat
6738
6825
  let truncateResult;
6739
6826
  if (state.autoTruncate && selectedModel) {
6740
6827
  const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
6741
- consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
6828
+ consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${check.byteLimit === Infinity ? "unlimited" : `${Math.round(check.byteLimit / 1024)}KB`}), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
6742
6829
  if (check.needed) try {
6743
6830
  truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
6744
6831
  if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
@@ -6875,7 +6962,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
6875
6962
  data: rawEvent.data
6876
6963
  });
6877
6964
  }
6878
- recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
6965
+ recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
6879
6966
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
6880
6967
  } catch (error) {
6881
6968
  consola.error("Direct Anthropic stream error:", error);
@@ -6893,31 +6980,6 @@ async function handleDirectAnthropicStreamingResponse(opts) {
6893
6980
  });
6894
6981
  }
6895
6982
  }
6896
- function recordStreamingResponse$1(acc, fallbackModel, ctx) {
6897
- const contentBlocks = [];
6898
- if (acc.content) contentBlocks.push({
6899
- type: "text",
6900
- text: acc.content
6901
- });
6902
- for (const tc of acc.toolCalls) contentBlocks.push({
6903
- type: "tool_use",
6904
- ...tc
6905
- });
6906
- recordResponse(ctx.historyId, {
6907
- success: true,
6908
- model: acc.model || fallbackModel,
6909
- usage: {
6910
- input_tokens: acc.inputTokens,
6911
- output_tokens: acc.outputTokens
6912
- },
6913
- stop_reason: acc.stopReason || void 0,
6914
- content: contentBlocks.length > 0 ? {
6915
- role: "assistant",
6916
- content: contentBlocks
6917
- } : null,
6918
- toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
6919
- }, Date.now() - ctx.startTime);
6920
- }
6921
6983
 
6922
6984
  //#endregion
6923
6985
  //#region src/routes/messages/subagent-marker.ts
@@ -7084,7 +7146,7 @@ async function handleStreamingResponse(opts) {
7084
7146
  acc,
7085
7147
  checkRepetition
7086
7148
  });
7087
- recordStreamingResponse(acc, anthropicPayload.model, ctx);
7149
+ recordAnthropicStreamingResponse(acc, anthropicPayload.model, ctx);
7088
7150
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
7089
7151
  } catch (error) {
7090
7152
  consola.error("Stream error:", error);
@@ -7163,31 +7225,6 @@ async function processStreamChunks(opts) {
7163
7225
  }
7164
7226
  }
7165
7227
  }
7166
- function recordStreamingResponse(acc, fallbackModel, ctx) {
7167
- const contentBlocks = [];
7168
- if (acc.content) contentBlocks.push({
7169
- type: "text",
7170
- text: acc.content
7171
- });
7172
- for (const tc of acc.toolCalls) contentBlocks.push({
7173
- type: "tool_use",
7174
- ...tc
7175
- });
7176
- recordResponse(ctx.historyId, {
7177
- success: true,
7178
- model: acc.model || fallbackModel,
7179
- usage: {
7180
- input_tokens: acc.inputTokens,
7181
- output_tokens: acc.outputTokens
7182
- },
7183
- stop_reason: acc.stopReason || void 0,
7184
- content: contentBlocks.length > 0 ? {
7185
- role: "assistant",
7186
- content: contentBlocks
7187
- } : null,
7188
- toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
7189
- }, Date.now() - ctx.startTime);
7190
- }
7191
7228
 
7192
7229
  //#endregion
7193
7230
  //#region src/routes/messages/handler.ts
@@ -7361,7 +7398,7 @@ modelRoutes.get("/", async (c) => {
7361
7398
  const createResponses = async (payload, { vision, initiator }) => {
7362
7399
  if (!state.copilotToken) throw new Error("Copilot token not found");
7363
7400
  const headers = {
7364
- ...copilotHeaders(state, vision),
7401
+ ...copilotHeaders(state, { vision }),
7365
7402
  "X-Initiator": initiator
7366
7403
  };
7367
7404
  payload.service_tier = null;
@@ -7633,7 +7670,7 @@ const handleResponses = async (c) => {
7633
7670
  if (finalResult) {
7634
7671
  recordResponseResult(finalResult, model, historyId, startTime);
7635
7672
  const usage = finalResult.usage;
7636
- completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs);
7673
+ completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
7637
7674
  } else if (streamErrorMessage) {
7638
7675
  recordResponse(historyId, {
7639
7676
  success: false,
@@ -7662,7 +7699,7 @@ const handleResponses = async (c) => {
7662
7699
  const result = response;
7663
7700
  const usage = result.usage;
7664
7701
  recordResponseResult(result, model, historyId, startTime);
7665
- completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs);
7702
+ completeTracking(trackingId, usage?.input_tokens ?? 0, usage?.output_tokens ?? 0, ctx.queueWaitMs, usage?.output_tokens_details?.reasoning_tokens);
7666
7703
  consola.debug("Forwarding native Responses result:", JSON.stringify(result).slice(-400));
7667
7704
  return c.json(result);
7668
7705
  } catch (error) {
@@ -7713,7 +7750,8 @@ function recordResponseResult(result, fallbackModel, historyId, startTime) {
7713
7750
  model: result.model || fallbackModel,
7714
7751
  usage: {
7715
7752
  input_tokens: usage?.input_tokens ?? 0,
7716
- output_tokens: usage?.output_tokens ?? 0
7753
+ output_tokens: usage?.output_tokens ?? 0,
7754
+ ...usage?.output_tokens_details ? { output_tokens_details: { reasoning_tokens: usage.output_tokens_details.reasoning_tokens } } : {}
7717
7755
  },
7718
7756
  stop_reason: extractResponseStopReason(result),
7719
7757
  content,
@@ -7788,6 +7826,18 @@ server.route("/history", historyRoutes);
7788
7826
 
7789
7827
  //#endregion
7790
7828
  //#region src/start.ts
7829
+ const VALID_ACCOUNT_TYPES = [
7830
+ "individual",
7831
+ "business",
7832
+ "enterprise"
7833
+ ];
7834
+ function isValidAccountType(accountType) {
7835
+ return VALID_ACCOUNT_TYPES.includes(accountType);
7836
+ }
7837
+ function validateAccountType(accountType) {
7838
+ if (isValidAccountType(accountType)) return;
7839
+ throw new Error(`Invalid account type: "${accountType}". Available: ${VALID_ACCOUNT_TYPES.join(", ")}`);
7840
+ }
7791
7841
  /** Format limit values as "Xk" or "?" if not available */
7792
7842
  function formatLimit(value) {
7793
7843
  return value ? `${Math.round(value / 1e3)}k` : "?";
@@ -7810,6 +7860,12 @@ async function runServer(options) {
7810
7860
  state.verbose = true;
7811
7861
  }
7812
7862
  state.accountType = options.accountType;
7863
+ try {
7864
+ validateAccountType(state.accountType);
7865
+ } catch (error) {
7866
+ consola.error(error instanceof Error ? error.message : String(error));
7867
+ process.exit(1);
7868
+ }
7813
7869
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
7814
7870
  state.manualApprove = options.manual;
7815
7871
  state.showToken = options.showToken;
@@ -7843,7 +7899,13 @@ async function runServer(options) {
7843
7899
  consola.info("Using provided GitHub token");
7844
7900
  } else await setupGitHubToken();
7845
7901
  await setupCopilotToken();
7846
- await cacheModels();
7902
+ try {
7903
+ await cacheModels();
7904
+ } catch (error) {
7905
+ consola.error(`Failed to fetch available models for account type "${state.accountType}". Check that the account type matches your Copilot plan.`);
7906
+ consola.error(error instanceof Error ? error.message : String(error));
7907
+ process.exit(1);
7908
+ }
7847
7909
  consola.info(`Available models:\n${state.models?.data.map((m) => formatModelInfo(m)).join("\n")}`);
7848
7910
  const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
7849
7911
  if (options.claudeCode) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dianshuv/copilot-api",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
5
5
  "author": "dianshuv",
6
6
  "type": "module",