@ashsec/copilot-api 0.9.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -21,7 +21,7 @@ import util from "node:util";
21
21
 
22
22
  //#region package.json
23
23
  var name = "@ashsec/copilot-api";
24
- var version = "0.9.0";
24
+ var version = "0.11.2";
25
25
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
26
26
  var keywords = [
27
27
  "proxy",
@@ -296,8 +296,8 @@ async function fetchWithRetry(input, init) {
296
296
  let lastError;
297
297
  let lastResponse;
298
298
  for (let attempt = 0; attempt < maxAttempts; attempt++) try {
299
- const headers = new Headers(init?.headers);
300
- headers.set("Connection", "close");
299
+ const headers = toHeaderRecord(init?.headers);
300
+ headers.Connection = "close";
301
301
  const response = await fetch(input, {
302
302
  ...init,
303
303
  headers,
@@ -319,7 +319,24 @@ async function fetchWithRetry(input, init) {
319
319
  await sleep(delayMs);
320
320
  }
321
321
  if (lastResponse) return lastResponse;
322
- throw lastError;
322
+ throw lastError ?? /* @__PURE__ */ new Error("Request failed without a captured error");
323
+ }
324
+ function toHeaderRecord(headersInit) {
325
+ const headers = {};
326
+ if (!headersInit) return headers;
327
+ if (headersInit instanceof Headers) {
328
+ for (const [key, value] of headersInit.entries()) headers[key] = value;
329
+ return headers;
330
+ }
331
+ if (Array.isArray(headersInit)) {
332
+ for (const entry of headersInit) if (Array.isArray(entry) && entry.length === 2 && typeof entry[0] === "string" && typeof entry[1] === "string") {
333
+ const [key, value] = entry;
334
+ headers[key] = value;
335
+ }
336
+ return headers;
337
+ }
338
+ for (const [key, value] of Object.entries(headersInit)) if (typeof value === "string") headers[key] = value;
339
+ return headers;
323
340
  }
324
341
 
325
342
  //#endregion
@@ -734,34 +751,49 @@ async function applyReplacements(text) {
734
751
  appliedRules.push(rule.name || rule.id);
735
752
  }
736
753
  }
737
- if (appliedRules.length > 0) consola.info(`Replacements applied: ${appliedRules.join(", ")}`);
738
- return result;
754
+ return {
755
+ text: result,
756
+ appliedRules
757
+ };
739
758
  }
740
759
  /**
741
760
  * Apply replacements to a chat completions payload
742
761
  * This modifies message content in place
743
762
  */
744
763
  async function applyReplacementsToPayload(payload) {
764
+ const allAppliedRules = [];
745
765
  const processedMessages = await Promise.all(payload.messages.map(async (message) => {
746
- if (typeof message.content === "string") return {
747
- ...message,
748
- content: await applyReplacements(message.content)
749
- };
766
+ if (typeof message.content === "string") {
767
+ const { text, appliedRules } = await applyReplacements(message.content);
768
+ allAppliedRules.push(...appliedRules);
769
+ return {
770
+ ...message,
771
+ content: text
772
+ };
773
+ }
750
774
  if (Array.isArray(message.content)) return {
751
775
  ...message,
752
776
  content: await Promise.all(message.content.map(async (part) => {
753
- if (typeof part === "object" && part.type === "text" && part.text) return {
754
- ...part,
755
- text: await applyReplacements(part.text)
756
- };
777
+ if (typeof part === "object" && part.type === "text" && part.text) {
778
+ const { text, appliedRules } = await applyReplacements(part.text);
779
+ allAppliedRules.push(...appliedRules);
780
+ return {
781
+ ...part,
782
+ text
783
+ };
784
+ }
757
785
  return part;
758
786
  }))
759
787
  };
760
788
  return message;
761
789
  }));
790
+ const uniqueRules = [...new Set(allAppliedRules)];
762
791
  return {
763
- ...payload,
764
- messages: processedMessages
792
+ payload: {
793
+ ...payload,
794
+ messages: processedMessages
795
+ },
796
+ appliedRules: uniqueRules
765
797
  };
766
798
  }
767
799
 
@@ -775,6 +807,15 @@ function formatRule(rule, index) {
775
807
  const replacement = rule.replacement || "(empty)";
776
808
  return `${index + 1}. [${status}] (${type$1})${system}${name$1} "${rule.pattern}" → "${replacement}"`;
777
809
  }
810
+ function isValidPatternForMatchType(pattern, matchType) {
811
+ if (matchType !== "regex") return true;
812
+ try {
813
+ new RegExp(pattern);
814
+ return true;
815
+ } catch {
816
+ return false;
817
+ }
818
+ }
778
819
  async function listReplacements() {
779
820
  const all = await getAllReplacements();
780
821
  if (all.length === 0) {
@@ -813,9 +854,7 @@ async function addNewReplacement() {
813
854
  consola.info("Cancelled.");
814
855
  return;
815
856
  }
816
- if (matchType === "regex") try {
817
- new RegExp(pattern);
818
- } catch {
857
+ if (!isValidPatternForMatchType(pattern, matchType)) {
819
858
  consola.error(`Invalid regex pattern: ${pattern}`);
820
859
  return;
821
860
  }
@@ -827,7 +866,10 @@ async function addNewReplacement() {
827
866
  consola.info("Cancelled.");
828
867
  return;
829
868
  }
830
- const rule = await addReplacement(pattern, replacement, matchType === "regex", name$1 || void 0);
869
+ const rule = await addReplacement(pattern, replacement, {
870
+ isRegex: matchType === "regex",
871
+ name: name$1 || void 0
872
+ });
831
873
  consola.success(`Added rule: ${rule.name || rule.id}`);
832
874
  }
833
875
  async function editExistingReplacement() {
@@ -886,9 +928,7 @@ async function editExistingReplacement() {
886
928
  consola.info("Cancelled.");
887
929
  return;
888
930
  }
889
- if (matchType === "regex") try {
890
- new RegExp(pattern);
891
- } catch {
931
+ if (!isValidPatternForMatchType(pattern, matchType)) {
892
932
  consola.error(`Invalid regex pattern: ${pattern}`);
893
933
  return;
894
934
  }
@@ -958,7 +998,7 @@ async function testReplacements() {
958
998
  consola.info("Cancelled.");
959
999
  return;
960
1000
  }
961
- const result = await applyReplacements(testText);
1001
+ const { text: result } = await applyReplacements(testText);
962
1002
  consola.info("\n📝 Original:");
963
1003
  console.log(testText);
964
1004
  consola.info("\n✨ After replacements:");
@@ -1235,14 +1275,168 @@ function getConfig() {
1235
1275
  function getExtraPromptForModel(model) {
1236
1276
  return getConfig().extraPrompts?.[model] ?? "";
1237
1277
  }
1238
- function getSmallModel() {
1239
- return getConfig().smallModel ?? "gpt-5-mini";
1240
- }
1241
- function getReasoningEffortForModel(model) {
1278
+ function getReasoningEffortForModel(model, override) {
1279
+ if (override) return override;
1242
1280
  return getConfig().modelReasoningEfforts?.[model] ?? "high";
1243
1281
  }
1244
- function shouldCompactUseSmallModel() {
1245
- return getConfig().compactUseSmallModel ?? true;
1282
+
1283
+ //#endregion
1284
+ //#region src/lib/model-suffix.ts
1285
+ /**
1286
+ * Hardcoded reasoning config per model, derived from Copilot CLI v0.0.414.
1287
+ * Models not in this map do not support per-request reasoning effort control.
1288
+ */
1289
+ const MODEL_REASONING_CONFIG = {
1290
+ "claude-sonnet-4.6": {
1291
+ supportedEfforts: [
1292
+ "low",
1293
+ "medium",
1294
+ "high"
1295
+ ],
1296
+ defaultEffort: "medium"
1297
+ },
1298
+ "claude-opus-4.6": {
1299
+ supportedEfforts: [
1300
+ "low",
1301
+ "medium",
1302
+ "high"
1303
+ ],
1304
+ defaultEffort: "high"
1305
+ },
1306
+ "claude-opus-4.6-fast": {
1307
+ supportedEfforts: [
1308
+ "low",
1309
+ "medium",
1310
+ "high"
1311
+ ],
1312
+ defaultEffort: "high"
1313
+ },
1314
+ "claude-opus-4.6-1m": {
1315
+ supportedEfforts: [
1316
+ "low",
1317
+ "medium",
1318
+ "high"
1319
+ ],
1320
+ defaultEffort: "high"
1321
+ },
1322
+ "gpt-5.3-codex": {
1323
+ supportedEfforts: [
1324
+ "low",
1325
+ "medium",
1326
+ "high",
1327
+ "xhigh"
1328
+ ],
1329
+ defaultEffort: "medium"
1330
+ },
1331
+ "gpt-5.2-codex": {
1332
+ supportedEfforts: [
1333
+ "low",
1334
+ "medium",
1335
+ "high",
1336
+ "xhigh"
1337
+ ],
1338
+ defaultEffort: "high"
1339
+ },
1340
+ "gpt-5.2": {
1341
+ supportedEfforts: [
1342
+ "low",
1343
+ "medium",
1344
+ "high"
1345
+ ],
1346
+ defaultEffort: "medium"
1347
+ },
1348
+ "gpt-5.1-codex": {
1349
+ supportedEfforts: [
1350
+ "low",
1351
+ "medium",
1352
+ "high"
1353
+ ],
1354
+ defaultEffort: "medium"
1355
+ },
1356
+ "gpt-5.1-codex-max": {
1357
+ supportedEfforts: [
1358
+ "low",
1359
+ "medium",
1360
+ "high"
1361
+ ],
1362
+ defaultEffort: "medium"
1363
+ },
1364
+ "gpt-5.1": {
1365
+ supportedEfforts: [
1366
+ "low",
1367
+ "medium",
1368
+ "high"
1369
+ ],
1370
+ defaultEffort: "medium"
1371
+ },
1372
+ "gpt-5.1-codex-mini": {
1373
+ supportedEfforts: [
1374
+ "low",
1375
+ "medium",
1376
+ "high"
1377
+ ],
1378
+ defaultEffort: "medium"
1379
+ },
1380
+ "gpt-5-mini": {
1381
+ supportedEfforts: [
1382
+ "low",
1383
+ "medium",
1384
+ "high"
1385
+ ],
1386
+ defaultEffort: "medium"
1387
+ }
1388
+ };
1389
+ const VALID_EFFORTS = new Set([
1390
+ "low",
1391
+ "medium",
1392
+ "high",
1393
+ "xhigh"
1394
+ ]);
1395
+ /**
1396
+ * Parse a model string that may contain a reasoning effort suffix.
1397
+ * Format: "model-name:effort" (e.g. "claude-sonnet-4.6:high")
1398
+ *
1399
+ * If the suffix is not a valid effort level or the model doesn't support it,
1400
+ * the suffix is ignored and the full string is treated as the model name.
1401
+ */
1402
+ function parseModelSuffix(model) {
1403
+ const colonIndex = model.lastIndexOf(":");
1404
+ if (colonIndex === -1) return { baseModel: model };
1405
+ const potentialBase = model.slice(0, colonIndex);
1406
+ const potentialEffort = model.slice(colonIndex + 1);
1407
+ if (!VALID_EFFORTS.has(potentialEffort)) return { baseModel: model };
1408
+ const effort = potentialEffort;
1409
+ const config$1 = MODEL_REASONING_CONFIG[potentialBase];
1410
+ if (!config$1) return { baseModel: model };
1411
+ if (!config$1.supportedEfforts.includes(effort)) return {
1412
+ baseModel: potentialBase,
1413
+ reasoningEffort: config$1.defaultEffort
1414
+ };
1415
+ return {
1416
+ baseModel: potentialBase,
1417
+ reasoningEffort: effort
1418
+ };
1419
+ }
1420
+ /**
1421
+ * Generate virtual model entries for models that support reasoning effort.
1422
+ * Each supported effort level gets its own virtual model entry.
1423
+ */
1424
+ function generateVirtualModels(models) {
1425
+ const virtualModels = [];
1426
+ for (const model of models) {
1427
+ const config$1 = MODEL_REASONING_CONFIG[model.id];
1428
+ if (!config$1) continue;
1429
+ for (const effort of config$1.supportedEfforts) virtualModels.push({
1430
+ id: `${model.id}:${effort}`,
1431
+ object: "model",
1432
+ type: "model",
1433
+ created: 0,
1434
+ created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
1435
+ owned_by: model.vendor,
1436
+ display_name: `${model.name} (${effort} thinking)`
1437
+ });
1438
+ }
1439
+ return virtualModels;
1246
1440
  }
1247
1441
 
1248
1442
  //#endregion
@@ -1390,6 +1584,26 @@ function createAuthMiddleware(options = {}) {
1390
1584
  };
1391
1585
  }
1392
1586
 
1587
+ //#endregion
1588
+ //#region src/lib/api-key-guard.ts
1589
+ /**
1590
+ * API key guard middleware that silently drops connections when the API key
1591
+ * doesn't match the expected value. Unauthorized requests get NO response.
1592
+ *
1593
+ * Only active when state.apiKeyAuth is set (via --api-key-auth CLI flag).
1594
+ */
1595
+ async function apiKeyGuard(c, next) {
1596
+ if (!state.apiKeyAuth) {
1597
+ await next();
1598
+ return;
1599
+ }
1600
+ if (extractRequestApiKey(c) === state.apiKeyAuth) {
1601
+ await next();
1602
+ return;
1603
+ }
1604
+ await new Promise(() => {});
1605
+ }
1606
+
1393
1607
  //#endregion
1394
1608
  //#region src/lib/request-logger.ts
1395
1609
  const REQUEST_CONTEXT_KEY = "requestContext";
@@ -1473,6 +1687,34 @@ function setRequestContext(c, ctx) {
1473
1687
  });
1474
1688
  }
1475
1689
  /**
1690
+ * Format the input size for display
1691
+ */
1692
+ function formatInputSize(bytes) {
1693
+ return bytes >= 1024 ? `${(bytes / 1024).toFixed(1)}KB` : `${bytes}B`;
1694
+ }
1695
+ /**
1696
+ * Build the model routing log line
1697
+ */
1698
+ function buildModelLine(ctx) {
1699
+ const parts = [];
1700
+ if (ctx.requestedModel && ctx.requestedModel !== ctx.model) parts.push(`${colors.gray}${ctx.requestedModel}${colors.reset} ${colors.dim}→${colors.reset} ${colors.white}${ctx.model}${colors.reset}`);
1701
+ else parts.push(`${colors.white}${ctx.model}${colors.reset}`);
1702
+ if (ctx.provider) parts.push(`${colors.dim}via${colors.reset} ${colors.magenta}${ctx.provider}${colors.reset}`);
1703
+ if (ctx.inputLength !== void 0) parts.push(`${colors.dim}·${colors.reset} ${colors.yellow}${formatInputSize(ctx.inputLength)}${colors.reset}`);
1704
+ return ` ${parts.join(" ")}`;
1705
+ }
1706
+ /**
1707
+ * Build the modifications log line (effort, replacements, tokens)
1708
+ */
1709
+ function buildModificationsLine(ctx) {
1710
+ const modParts = [];
1711
+ if (ctx.reasoningEffort) modParts.push(`${colors.blue}effort=${ctx.reasoningEffort}${colors.reset}`);
1712
+ if (ctx.replacements && ctx.replacements.length > 0) modParts.push(`${colors.green}replace: ${ctx.replacements.join(", ")}${colors.reset}`);
1713
+ if (ctx.inputTokens !== void 0) modParts.push(`${colors.yellow}${ctx.inputTokens.toLocaleString()} tokens${colors.reset}`);
1714
+ if (modParts.length === 0) return void 0;
1715
+ return ` ${modParts.join(` ${colors.dim}·${colors.reset} `)}`;
1716
+ }
1717
+ /**
1476
1718
  * Custom request logger middleware
1477
1719
  */
1478
1720
  async function requestLogger(c, next) {
@@ -1480,7 +1722,11 @@ async function requestLogger(c, next) {
1480
1722
  const startTime = Date.now();
1481
1723
  const method = c.req.method;
1482
1724
  const path$1 = c.req.path + (c.req.raw.url.includes("?") ? "?" + c.req.raw.url.split("?")[1] : "");
1483
- c.set(REQUEST_CONTEXT_KEY, { startTime });
1725
+ const contentLength = c.req.header("content-length");
1726
+ c.set(REQUEST_CONTEXT_KEY, {
1727
+ startTime,
1728
+ inputLength: contentLength ? Number(contentLength) : void 0
1729
+ });
1484
1730
  await next();
1485
1731
  const ctx = c.get(REQUEST_CONTEXT_KEY);
1486
1732
  const duration = ((Date.now() - startTime) / 1e3).toFixed(1);
@@ -1491,15 +1737,10 @@ async function requestLogger(c, next) {
1491
1737
  const statusBadge = `${statusColor}${status}${colors.reset}`;
1492
1738
  const durationStr = `${colors.cyan}${duration}s${colors.reset}`;
1493
1739
  lines.push(`${colors.bold}${method}${colors.reset} ${path$1} ${statusBadge} ${durationStr}`);
1494
- if (ctx?.provider && ctx.model) {
1495
- const providerColor = colors.magenta;
1496
- lines.push(` ${colors.gray}Provider:${colors.reset} ${providerColor}${ctx.provider}${colors.reset} ${colors.gray}->${colors.reset} ${colors.white}${ctx.model}${colors.reset}`);
1497
- }
1498
- if (ctx?.inputTokens !== void 0 || ctx?.outputTokens !== void 0) {
1499
- const tokenParts = [];
1500
- if (ctx.inputTokens !== void 0) tokenParts.push(`${colors.gray}Input:${colors.reset} ${colors.yellow}${ctx.inputTokens.toLocaleString()}${colors.reset}`);
1501
- if (ctx.outputTokens !== void 0) tokenParts.push(`${colors.gray}Output:${colors.reset} ${colors.green}${ctx.outputTokens.toLocaleString()}${colors.reset}`);
1502
- lines.push(` ${tokenParts.join(" ")}`);
1740
+ if (ctx?.model) lines.push(buildModelLine(ctx));
1741
+ if (ctx) {
1742
+ const modsLine = buildModificationsLine(ctx);
1743
+ if (modsLine) lines.push(modsLine);
1503
1744
  }
1504
1745
  lines.push(` ${colors.dim}${getTimeString()}${colors.reset}`);
1505
1746
  console.log(lines.join("\n"));
@@ -1514,13 +1755,17 @@ const awaitApproval = async () => {
1514
1755
  //#endregion
1515
1756
  //#region src/lib/model-resolver.ts
1516
1757
  /**
1517
- * Normalize a model name by converting dashes to dots between numbers.
1758
+ * Normalize a model name by converting dashes to dots between numbers
1759
+ * and converting Anthropic's [1m] suffix to Copilot's -1m suffix.
1518
1760
  * e.g., "claude-opus-4-5" -> "claude-opus-4.5"
1761
+ * "claude-opus-4-6[1m]" -> "claude-opus-4.6-1m"
1519
1762
  * "gpt-4-1" -> "gpt-4.1"
1520
1763
  * "gpt-5-1-codex" -> "gpt-5.1-codex"
1521
1764
  */
1522
1765
  function normalizeModelName(model) {
1523
- return model.replaceAll(/(\d)-(\d)/g, (_, p1, p2) => `${p1}.${p2}`);
1766
+ let normalized = model.replace("[1m]", "-1m");
1767
+ normalized = normalized.replaceAll(/(\d)-(\d)/g, (_, p1, p2) => `${p1}.${p2}`);
1768
+ return normalized;
1524
1769
  }
1525
1770
 
1526
1771
  //#endregion
@@ -1778,15 +2023,21 @@ const createChatCompletions = async (payload, options) => {
1778
2023
  async function handleCompletion$1(c) {
1779
2024
  await checkRateLimit(state);
1780
2025
  const rawPayload = await c.req.json();
1781
- let payload = await applyReplacementsToPayload(rawPayload);
1782
- payload = {
1783
- ...payload,
1784
- model: normalizeModelName(payload.model)
2026
+ const requestedModel = rawPayload.model;
2027
+ const { baseModel, reasoningEffort } = parseModelSuffix(rawPayload.model);
2028
+ rawPayload.model = baseModel;
2029
+ const { payload: replacedPayload, appliedRules } = await applyReplacementsToPayload(rawPayload);
2030
+ let payload = {
2031
+ ...replacedPayload,
2032
+ model: normalizeModelName(replacedPayload.model)
1785
2033
  };
1786
2034
  consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
1787
2035
  setRequestContext(c, {
1788
- provider: "Copilot",
1789
- model: payload.model
2036
+ requestedModel,
2037
+ provider: "ChatCompletions",
2038
+ model: payload.model,
2039
+ replacements: appliedRules,
2040
+ reasoningEffort
1790
2041
  });
1791
2042
  const selectedModel = state.models?.data.find((model) => model.id === payload.model);
1792
2043
  try {
@@ -2252,7 +2503,7 @@ const createResponses = async (payload, { vision, initiator }) => {
2252
2503
  const MESSAGE_TYPE = "message";
2253
2504
  const CODEX_PHASE_MODEL = "gpt-5.3-codex";
2254
2505
  const THINKING_TEXT = "Thinking...";
2255
- const translateAnthropicMessagesToResponsesPayload = (payload) => {
2506
+ const translateAnthropicMessagesToResponsesPayload = (payload, effortOverride) => {
2256
2507
  const input = [];
2257
2508
  for (const message of payload.messages) input.push(...translateMessage(message, payload.model));
2258
2509
  const translatedTools = convertAnthropicTools(payload.tools);
@@ -2274,7 +2525,7 @@ const translateAnthropicMessagesToResponsesPayload = (payload) => {
2274
2525
  store: false,
2275
2526
  parallel_tool_calls: true,
2276
2527
  reasoning: {
2277
- effort: getReasoningEffortForModel(payload.model),
2528
+ effort: getReasoningEffortForModel(payload.model, effortOverride),
2278
2529
  summary: "detailed"
2279
2530
  },
2280
2531
  include: ["reasoning.encrypted_content"]
@@ -3070,7 +3321,8 @@ const createMessages = async (payload, anthropicBetaHeader, options) => {
3070
3321
  "X-Initiator": initiator
3071
3322
  };
3072
3323
  if (anthropicBetaHeader) {
3073
- const filteredBeta = anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => item !== "claude-code-20250219").join(",");
3324
+ const unsupportedBetas = new Set(["claude-code-20250219", "context-1m-2025-08-07"]);
3325
+ const filteredBeta = anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => !unsupportedBetas.has(item)).join(",");
3074
3326
  if (filteredBeta) headers["anthropic-beta"] = filteredBeta;
3075
3327
  } else if (payload.thinking?.budget_tokens) headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
3076
3328
  const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
@@ -3295,37 +3547,60 @@ async function handleCompletion(c) {
3295
3547
  await checkRateLimit(state);
3296
3548
  const anthropicPayload = await c.req.json();
3297
3549
  logger$1.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
3550
+ const requestedModel = anthropicPayload.model;
3551
+ const { baseModel, reasoningEffort: suffixEffort } = parseModelSuffix(anthropicPayload.model);
3552
+ anthropicPayload.model = normalizeModelName(baseModel);
3298
3553
  const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload);
3299
3554
  const initiatorOverride = subagentMarker ? "agent" : void 0;
3300
3555
  if (subagentMarker) logger$1.debug("Detected Subagent marker:", JSON.stringify(subagentMarker));
3301
3556
  const isCompact = isCompactRequest(anthropicPayload);
3302
3557
  const anthropicBeta = c.req.header("anthropic-beta");
3303
3558
  logger$1.debug("Anthropic Beta header:", anthropicBeta);
3304
- const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0;
3305
- if (anthropicBeta && noTools && !isCompact) anthropicPayload.model = getSmallModel();
3306
- if (isCompact) {
3307
- logger$1.debug("Is compact request:", isCompact);
3308
- if (shouldCompactUseSmallModel()) anthropicPayload.model = getSmallModel();
3309
- } else mergeToolResultForClaude(anthropicPayload);
3559
+ applyModelVariantRouting(anthropicPayload, anthropicBeta);
3560
+ if (isCompact) logger$1.debug("Is compact request:", isCompact);
3561
+ else mergeToolResultForClaude(anthropicPayload);
3310
3562
  if (state.manualApprove) await awaitApproval();
3311
3563
  const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
3564
+ let apiType = "ChatCompletions";
3565
+ if (shouldUseMessagesApi(selectedModel)) apiType = "Messages";
3566
+ else if (shouldUseResponsesApi(selectedModel)) apiType = "Responses";
3567
+ const bodyEffort = getBodyReasoningEffort(anthropicPayload);
3568
+ const effectiveEffort = suffixEffort ?? bodyEffort;
3569
+ setRequestContext(c, {
3570
+ requestedModel,
3571
+ model: anthropicPayload.model,
3572
+ provider: apiType,
3573
+ reasoningEffort: effectiveEffort
3574
+ });
3312
3575
  if (shouldUseMessagesApi(selectedModel)) return await handleWithMessagesApi(c, anthropicPayload, {
3313
3576
  anthropicBetaHeader: anthropicBeta,
3314
3577
  initiatorOverride,
3315
- selectedModel
3578
+ selectedModel,
3579
+ effortOverride: suffixEffort
3580
+ });
3581
+ if (shouldUseResponsesApi(selectedModel)) return await handleWithResponsesApi(c, anthropicPayload, {
3582
+ initiatorOverride,
3583
+ effortOverride: suffixEffort
3316
3584
  });
3317
- if (shouldUseResponsesApi(selectedModel)) return await handleWithResponsesApi(c, anthropicPayload, initiatorOverride);
3318
3585
  return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride);
3319
3586
  }
3320
3587
  const RESPONSES_ENDPOINT$1 = "/responses";
3321
3588
  const MESSAGES_ENDPOINT = "/v1/messages";
3322
3589
  const handleWithChatCompletions = async (c, anthropicPayload, initiatorOverride) => {
3323
3590
  const openAIPayload = translateToOpenAI(anthropicPayload);
3324
- let finalPayload = await applyReplacementsToPayload(openAIPayload);
3325
- finalPayload = {
3326
- ...finalPayload,
3327
- model: normalizeModelName(finalPayload.model)
3591
+ const { payload: replacedPayload, appliedRules } = await applyReplacementsToPayload(openAIPayload);
3592
+ const finalPayload = {
3593
+ ...replacedPayload,
3594
+ model: normalizeModelName(replacedPayload.model)
3328
3595
  };
3596
+ if (appliedRules.length > 0) setRequestContext(c, { replacements: appliedRules });
3597
+ try {
3598
+ const selectedModel = state.models?.data.find((m) => m.id === finalPayload.model);
3599
+ if (selectedModel) {
3600
+ const tokenCount = await getTokenCount(finalPayload, selectedModel);
3601
+ setRequestContext(c, { inputTokens: tokenCount.input });
3602
+ }
3603
+ } catch {}
3329
3604
  logger$1.debug("Translated OpenAI request payload:", JSON.stringify(finalPayload));
3330
3605
  const response = await createChatCompletions(finalPayload, { initiator: initiatorOverride });
3331
3606
  if (isNonStreaming(response)) {
@@ -3358,8 +3633,9 @@ const handleWithChatCompletions = async (c, anthropicPayload, initiatorOverride)
3358
3633
  }
3359
3634
  });
3360
3635
  };
3361
- const handleWithResponsesApi = async (c, anthropicPayload, initiatorOverride) => {
3362
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload);
3636
+ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
3637
+ const { initiatorOverride, effortOverride } = options ?? {};
3638
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, effortOverride);
3363
3639
  logger$1.debug("Translated Responses payload:", JSON.stringify(responsesPayload));
3364
3640
  const { vision, initiator } = getResponsesRequestOptions(responsesPayload);
3365
3641
  const response = await createResponses(responsesPayload, {
@@ -3411,14 +3687,15 @@ const handleWithResponsesApi = async (c, anthropicPayload, initiatorOverride) =>
3411
3687
  return c.json(anthropicResponse);
3412
3688
  };
3413
3689
  const handleWithMessagesApi = async (c, anthropicPayload, options) => {
3414
- const { anthropicBetaHeader, initiatorOverride, selectedModel } = options ?? {};
3690
+ const { anthropicBetaHeader, initiatorOverride, selectedModel, effortOverride } = options ?? {};
3415
3691
  for (const msg of anthropicPayload.messages) if (msg.role === "assistant" && Array.isArray(msg.content)) msg.content = msg.content.filter((block) => {
3416
3692
  if (block.type !== "thinking") return true;
3417
3693
  return block.thinking && block.thinking !== "Thinking..." && block.signature && !block.signature.includes("@");
3418
3694
  });
3419
3695
  if (selectedModel?.capabilities.supports.adaptive_thinking) {
3420
- anthropicPayload.thinking = { type: "adaptive" };
3421
- anthropicPayload.output_config = { effort: getAnthropicEffortForModel(anthropicPayload.model) };
3696
+ if (!anthropicPayload.thinking) anthropicPayload.thinking = { type: "adaptive" };
3697
+ const clientEffort = anthropicPayload.output_config?.effort;
3698
+ anthropicPayload.output_config = { effort: effortOverride ? getAnthropicEffortForModel(anthropicPayload.model, effortOverride) : clientEffort ?? getAnthropicEffortForModel(anthropicPayload.model) };
3422
3699
  }
3423
3700
  logger$1.debug("Translated Messages payload:", JSON.stringify(anthropicPayload));
3424
3701
  const response = await createMessages(anthropicPayload, anthropicBetaHeader, { initiator: initiatorOverride });
@@ -3439,6 +3716,21 @@ const handleWithMessagesApi = async (c, anthropicPayload, options) => {
3439
3716
  logger$1.debug("Non-streaming Messages result:", JSON.stringify(response).slice(-400));
3440
3717
  return c.json(response);
3441
3718
  };
3719
+ /**
3720
+ * Route to model variants based on client signals (1m context, fast mode).
3721
+ * Mutates the payload in place.
3722
+ */
3723
+ function applyModelVariantRouting(payload, anthropicBeta) {
3724
+ if (anthropicBeta?.includes("context-1m")) {
3725
+ const candidate = `${payload.model}-1m`;
3726
+ if (state.models?.data.some((m) => m.id === candidate)) payload.model = candidate;
3727
+ }
3728
+ if (payload.speed === "fast") {
3729
+ const candidate = `${payload.model}-fast`;
3730
+ if (state.models?.data.some((m) => m.id === candidate)) payload.model = candidate;
3731
+ delete payload.speed;
3732
+ }
3733
+ }
3442
3734
  const shouldUseResponsesApi = (selectedModel) => {
3443
3735
  return selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT$1) ?? false;
3444
3736
  };
@@ -3447,8 +3739,25 @@ const shouldUseMessagesApi = (selectedModel) => {
3447
3739
  };
3448
3740
  const isNonStreaming = (response) => Object.hasOwn(response, "choices");
3449
3741
  const isAsyncIterable$1 = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
3450
- const getAnthropicEffortForModel = (model) => {
3451
- const reasoningEffort = getReasoningEffortForModel(model);
3742
+ /**
3743
+ * Extract reasoning effort info from the Anthropic request body for logging.
3744
+ * Claude Code sends effort as `output_config.effort` (low/medium/high/max)
3745
+ * and thinking mode as `thinking.type` (enabled/adaptive).
3746
+ * When effort is "high" (the default), Claude Code omits output_config.effort entirely.
3747
+ */
3748
+ function getBodyReasoningEffort(payload) {
3749
+ if (!payload.thinking && !payload.output_config?.effort) return void 0;
3750
+ const parts = [];
3751
+ const effort = payload.output_config?.effort ?? (payload.thinking ? "high" : void 0);
3752
+ if (effort) parts.push(effort);
3753
+ if (payload.thinking) {
3754
+ parts.push(payload.thinking.type);
3755
+ if (payload.thinking.budget_tokens) parts.push(`${payload.thinking.budget_tokens.toLocaleString()} budget`);
3756
+ }
3757
+ return parts.length > 0 ? parts.join(", ") : void 0;
3758
+ }
3759
+ const getAnthropicEffortForModel = (model, override) => {
3760
+ const reasoningEffort = getReasoningEffortForModel(model, override);
3452
3761
  if (reasoningEffort === "xhigh") return "max";
3453
3762
  if (reasoningEffort === "none" || reasoningEffort === "minimal") return "low";
3454
3763
  return reasoningEffort;
@@ -3537,9 +3846,10 @@ modelRoutes.get("/", async (c) => {
3537
3846
  owned_by: model.vendor,
3538
3847
  display_name: model.name
3539
3848
  })) ?? [];
3849
+ const virtualModels = state.models ? generateVirtualModels(state.models.data) : [];
3540
3850
  return c.json({
3541
3851
  object: "list",
3542
- data: copilotModels,
3852
+ data: [...copilotModels, ...virtualModels],
3543
3853
  has_more: false
3544
3854
  });
3545
3855
  } catch (error) {
@@ -3559,7 +3869,10 @@ replacementsRoute.get("/", async (c) => {
3559
3869
  replacementsRoute.post("/", async (c) => {
3560
3870
  const body = await c.req.json();
3561
3871
  if (!body.pattern) return c.json({ error: "Pattern is required" }, 400);
3562
- const rule = await addReplacement(body.pattern, body.replacement ?? "", body.isRegex ?? false, body.name);
3872
+ const rule = await addReplacement(body.pattern, body.replacement ?? "", {
3873
+ isRegex: body.isRegex ?? false,
3874
+ name: body.name
3875
+ });
3563
3876
  return c.json(rule, 201);
3564
3877
  });
3565
3878
  replacementsRoute.delete("/:id", async (c) => {
@@ -3626,12 +3939,36 @@ const handleItemId = (parsed, tracker) => {
3626
3939
  //#region src/routes/responses/handler.ts
3627
3940
  const logger = createHandlerLogger("responses-handler");
3628
3941
  const RESPONSES_ENDPOINT = "/responses";
3942
+ function isResponsesReasoningEffort(value) {
3943
+ return value === "none" || value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
3944
+ }
3945
+ function normalizeResponsesReasoning(payload, suffixEffort) {
3946
+ const topLevelEffortRaw = payload.reasoningEffort ?? payload.reasoning_effort;
3947
+ const topLevelEffort = isResponsesReasoningEffort(topLevelEffortRaw) ? topLevelEffortRaw : void 0;
3948
+ if (topLevelEffort) payload.reasoning = payload.reasoning ? {
3949
+ ...payload.reasoning,
3950
+ effort: payload.reasoning.effort ?? topLevelEffort
3951
+ } : { effort: topLevelEffort };
3952
+ delete payload.reasoningEffort;
3953
+ delete payload.reasoning_effort;
3954
+ if (suffixEffort) payload.reasoning = payload.reasoning ? {
3955
+ ...payload.reasoning,
3956
+ effort: suffixEffort
3957
+ } : { effort: suffixEffort };
3958
+ return payload.reasoning?.effort ?? void 0;
3959
+ }
3629
3960
  const handleResponses = async (c) => {
3630
3961
  await checkRateLimit(state);
3631
3962
  const payload = await c.req.json();
3963
+ const requestedModel = payload.model;
3964
+ const { baseModel, reasoningEffort: suffixEffort } = parseModelSuffix(payload.model);
3965
+ payload.model = baseModel;
3966
+ const effectiveEffort = normalizeResponsesReasoning(payload, suffixEffort);
3632
3967
  setRequestContext(c, {
3633
- provider: "Copilot (Responses)",
3634
- model: payload.model
3968
+ requestedModel,
3969
+ provider: "Responses",
3970
+ model: payload.model,
3971
+ reasoningEffort: effectiveEffort
3635
3972
  });
3636
3973
  logger.debug("Responses request payload:", JSON.stringify(payload));
3637
3974
  useFunctionApplyPatch(payload);
@@ -3740,6 +4077,7 @@ usageRoute.get("/", async (c) => {
3740
4077
  //#endregion
3741
4078
  //#region src/server.ts
3742
4079
  const server = new Hono();
4080
+ server.use(apiKeyGuard);
3743
4081
  server.use(requestLogger);
3744
4082
  server.use(cors());
3745
4083
  server.use("*", createAuthMiddleware());
@@ -3759,6 +4097,11 @@ server.route("/v1/messages", messageRoutes);
3759
4097
 
3760
4098
  //#endregion
3761
4099
  //#region src/start.ts
4100
+ function getAllModelIds() {
4101
+ const baseModelIds = state.models?.data.map((model) => model.id) ?? [];
4102
+ const virtualModelIds = state.models ? generateVirtualModels(state.models.data).map((model) => model.id) : [];
4103
+ return [...baseModelIds, ...virtualModelIds];
4104
+ }
3762
4105
  async function runServer(options) {
3763
4106
  consola.info(`copilot-api v${package_default.version}`);
3764
4107
  if (options.insecure) {
@@ -3778,6 +4121,9 @@ async function runServer(options) {
3778
4121
  state.showToken = options.showToken;
3779
4122
  state.debug = options.debug;
3780
4123
  state.verbose = options.verbose;
4124
+ state.apiKeyAuth = options.apiKeyAuth;
4125
+ if (options.apiKeyAuth) consola.info("API key authentication enabled - unauthorized requests will be silently dropped");
4126
+ if (options.host) consola.info(`Binding to host: ${options.host}`);
3781
4127
  if (options.debug) consola.info("Debug mode enabled - raw HTTP requests will be logged");
3782
4128
  await ensurePaths();
3783
4129
  mergeConfigWithDefaults();
@@ -3788,9 +4134,9 @@ async function runServer(options) {
3788
4134
  } else await setupGitHubToken();
3789
4135
  await setupCopilotToken();
3790
4136
  await cacheModels();
3791
- const allModelIds = state.models?.data.map((model) => model.id) ?? [];
4137
+ const allModelIds = getAllModelIds();
3792
4138
  consola.info(`Available models: \n${allModelIds.map((id) => `- ${id}`).join("\n")}`);
3793
- const serverUrl = `http://localhost:${options.port}`;
4139
+ const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
3794
4140
  if (options.claudeCode) {
3795
4141
  invariant(state.models, "Models should be loaded by now");
3796
4142
  const selectedModel = await consola.prompt("Select a model to use with Claude Code", {
@@ -3823,9 +4169,21 @@ async function runServer(options) {
3823
4169
  serve({
3824
4170
  fetch: server.fetch,
3825
4171
  port: options.port,
4172
+ hostname: options.host,
3826
4173
  bun: { idleTimeout: 255 }
3827
4174
  });
3828
4175
  }
4176
+ /**
4177
+ * Resolve --api-key-auth value: use provided value, fall back to env, or error if flag used without value.
4178
+ */
4179
+ function resolveApiKeyAuth(cliValue) {
4180
+ if (cliValue === void 0) return void 0;
4181
+ if (cliValue !== "" && cliValue !== "true") return cliValue;
4182
+ const envValue = process.env.COPILOT_API_KEY_AUTH;
4183
+ if (envValue) return envValue;
4184
+ consola.error("--api-key-auth requires a value or COPILOT_API_KEY_AUTH environment variable");
4185
+ process.exit(1);
4186
+ }
3829
4187
  const start = defineCommand({
3830
4188
  meta: {
3831
4189
  name: "start",
@@ -3897,6 +4255,14 @@ const start = defineCommand({
3897
4255
  type: "boolean",
3898
4256
  default: false,
3899
4257
  description: "Log raw HTTP requests received by the server (headers, method, path)"
4258
+ },
4259
+ "api-key-auth": {
4260
+ type: "string",
4261
+ description: "API key for incoming request authentication. Requests with mismatched keys are silently dropped."
4262
+ },
4263
+ host: {
4264
+ type: "string",
4265
+ description: "Hostname/IP to bind the server to (e.g., 0.0.0.0 for all interfaces)"
3900
4266
  }
3901
4267
  },
3902
4268
  run({ args }) {
@@ -3914,7 +4280,9 @@ const start = defineCommand({
3914
4280
  showToken: args["show-token"],
3915
4281
  proxyEnv: args["proxy-env"],
3916
4282
  insecure: args.insecure,
3917
- debug: args.debug
4283
+ debug: args.debug,
4284
+ apiKeyAuth: resolveApiKeyAuth(args["api-key-auth"]),
4285
+ host: args.host
3918
4286
  });
3919
4287
  }
3920
4288
  });