@ashsec/copilot-api 0.9.0 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -21,7 +21,7 @@ import util from "node:util";
21
21
 
22
22
  //#region package.json
23
23
  var name = "@ashsec/copilot-api";
24
- var version = "0.9.0";
24
+ var version = "0.11.3";
25
25
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
26
26
  var keywords = [
27
27
  "proxy",
@@ -296,8 +296,8 @@ async function fetchWithRetry(input, init) {
296
296
  let lastError;
297
297
  let lastResponse;
298
298
  for (let attempt = 0; attempt < maxAttempts; attempt++) try {
299
- const headers = new Headers(init?.headers);
300
- headers.set("Connection", "close");
299
+ const headers = toHeaderRecord(init?.headers);
300
+ headers.Connection = "close";
301
301
  const response = await fetch(input, {
302
302
  ...init,
303
303
  headers,
@@ -319,7 +319,24 @@ async function fetchWithRetry(input, init) {
319
319
  await sleep(delayMs);
320
320
  }
321
321
  if (lastResponse) return lastResponse;
322
- throw lastError;
322
+ throw lastError ?? /* @__PURE__ */ new Error("Request failed without a captured error");
323
+ }
324
+ function toHeaderRecord(headersInit) {
325
+ const headers = {};
326
+ if (!headersInit) return headers;
327
+ if (headersInit instanceof Headers) {
328
+ for (const [key, value] of headersInit.entries()) headers[key] = value;
329
+ return headers;
330
+ }
331
+ if (Array.isArray(headersInit)) {
332
+ for (const entry of headersInit) if (Array.isArray(entry) && entry.length === 2 && typeof entry[0] === "string" && typeof entry[1] === "string") {
333
+ const [key, value] = entry;
334
+ headers[key] = value;
335
+ }
336
+ return headers;
337
+ }
338
+ for (const [key, value] of Object.entries(headersInit)) if (typeof value === "string") headers[key] = value;
339
+ return headers;
323
340
  }
324
341
 
325
342
  //#endregion
@@ -734,34 +751,49 @@ async function applyReplacements(text) {
734
751
  appliedRules.push(rule.name || rule.id);
735
752
  }
736
753
  }
737
- if (appliedRules.length > 0) consola.info(`Replacements applied: ${appliedRules.join(", ")}`);
738
- return result;
754
+ return {
755
+ text: result,
756
+ appliedRules
757
+ };
739
758
  }
740
759
  /**
741
760
  * Apply replacements to a chat completions payload
742
761
  * This modifies message content in place
743
762
  */
744
763
  async function applyReplacementsToPayload(payload) {
764
+ const allAppliedRules = [];
745
765
  const processedMessages = await Promise.all(payload.messages.map(async (message) => {
746
- if (typeof message.content === "string") return {
747
- ...message,
748
- content: await applyReplacements(message.content)
749
- };
766
+ if (typeof message.content === "string") {
767
+ const { text, appliedRules } = await applyReplacements(message.content);
768
+ allAppliedRules.push(...appliedRules);
769
+ return {
770
+ ...message,
771
+ content: text
772
+ };
773
+ }
750
774
  if (Array.isArray(message.content)) return {
751
775
  ...message,
752
776
  content: await Promise.all(message.content.map(async (part) => {
753
- if (typeof part === "object" && part.type === "text" && part.text) return {
754
- ...part,
755
- text: await applyReplacements(part.text)
756
- };
777
+ if (typeof part === "object" && part.type === "text" && part.text) {
778
+ const { text, appliedRules } = await applyReplacements(part.text);
779
+ allAppliedRules.push(...appliedRules);
780
+ return {
781
+ ...part,
782
+ text
783
+ };
784
+ }
757
785
  return part;
758
786
  }))
759
787
  };
760
788
  return message;
761
789
  }));
790
+ const uniqueRules = [...new Set(allAppliedRules)];
762
791
  return {
763
- ...payload,
764
- messages: processedMessages
792
+ payload: {
793
+ ...payload,
794
+ messages: processedMessages
795
+ },
796
+ appliedRules: uniqueRules
765
797
  };
766
798
  }
767
799
 
@@ -775,6 +807,15 @@ function formatRule(rule, index) {
775
807
  const replacement = rule.replacement || "(empty)";
776
808
  return `${index + 1}. [${status}] (${type$1})${system}${name$1} "${rule.pattern}" → "${replacement}"`;
777
809
  }
810
+ function isValidPatternForMatchType(pattern, matchType) {
811
+ if (matchType !== "regex") return true;
812
+ try {
813
+ new RegExp(pattern);
814
+ return true;
815
+ } catch {
816
+ return false;
817
+ }
818
+ }
778
819
  async function listReplacements() {
779
820
  const all = await getAllReplacements();
780
821
  if (all.length === 0) {
@@ -813,9 +854,7 @@ async function addNewReplacement() {
813
854
  consola.info("Cancelled.");
814
855
  return;
815
856
  }
816
- if (matchType === "regex") try {
817
- new RegExp(pattern);
818
- } catch {
857
+ if (!isValidPatternForMatchType(pattern, matchType)) {
819
858
  consola.error(`Invalid regex pattern: ${pattern}`);
820
859
  return;
821
860
  }
@@ -827,7 +866,10 @@ async function addNewReplacement() {
827
866
  consola.info("Cancelled.");
828
867
  return;
829
868
  }
830
- const rule = await addReplacement(pattern, replacement, matchType === "regex", name$1 || void 0);
869
+ const rule = await addReplacement(pattern, replacement, {
870
+ isRegex: matchType === "regex",
871
+ name: name$1 || void 0
872
+ });
831
873
  consola.success(`Added rule: ${rule.name || rule.id}`);
832
874
  }
833
875
  async function editExistingReplacement() {
@@ -886,9 +928,7 @@ async function editExistingReplacement() {
886
928
  consola.info("Cancelled.");
887
929
  return;
888
930
  }
889
- if (matchType === "regex") try {
890
- new RegExp(pattern);
891
- } catch {
931
+ if (!isValidPatternForMatchType(pattern, matchType)) {
892
932
  consola.error(`Invalid regex pattern: ${pattern}`);
893
933
  return;
894
934
  }
@@ -958,7 +998,7 @@ async function testReplacements() {
958
998
  consola.info("Cancelled.");
959
999
  return;
960
1000
  }
961
- const result = await applyReplacements(testText);
1001
+ const { text: result } = await applyReplacements(testText);
962
1002
  consola.info("\n📝 Original:");
963
1003
  console.log(testText);
964
1004
  consola.info("\n✨ After replacements:");
@@ -1235,14 +1275,168 @@ function getConfig() {
1235
1275
  function getExtraPromptForModel(model) {
1236
1276
  return getConfig().extraPrompts?.[model] ?? "";
1237
1277
  }
1238
- function getSmallModel() {
1239
- return getConfig().smallModel ?? "gpt-5-mini";
1240
- }
1241
- function getReasoningEffortForModel(model) {
1278
+ function getReasoningEffortForModel(model, override) {
1279
+ if (override) return override;
1242
1280
  return getConfig().modelReasoningEfforts?.[model] ?? "high";
1243
1281
  }
1244
- function shouldCompactUseSmallModel() {
1245
- return getConfig().compactUseSmallModel ?? true;
1282
+
1283
+ //#endregion
1284
+ //#region src/lib/model-suffix.ts
1285
+ /**
1286
+ * Hardcoded reasoning config per model, derived from Copilot CLI v0.0.414.
1287
+ * Models not in this map do not support per-request reasoning effort control.
1288
+ */
1289
+ const MODEL_REASONING_CONFIG = {
1290
+ "claude-sonnet-4.6": {
1291
+ supportedEfforts: [
1292
+ "low",
1293
+ "medium",
1294
+ "high"
1295
+ ],
1296
+ defaultEffort: "medium"
1297
+ },
1298
+ "claude-opus-4.6": {
1299
+ supportedEfforts: [
1300
+ "low",
1301
+ "medium",
1302
+ "high"
1303
+ ],
1304
+ defaultEffort: "high"
1305
+ },
1306
+ "claude-opus-4.6-fast": {
1307
+ supportedEfforts: [
1308
+ "low",
1309
+ "medium",
1310
+ "high"
1311
+ ],
1312
+ defaultEffort: "high"
1313
+ },
1314
+ "claude-opus-4.6-1m": {
1315
+ supportedEfforts: [
1316
+ "low",
1317
+ "medium",
1318
+ "high"
1319
+ ],
1320
+ defaultEffort: "high"
1321
+ },
1322
+ "gpt-5.3-codex": {
1323
+ supportedEfforts: [
1324
+ "low",
1325
+ "medium",
1326
+ "high",
1327
+ "xhigh"
1328
+ ],
1329
+ defaultEffort: "medium"
1330
+ },
1331
+ "gpt-5.2-codex": {
1332
+ supportedEfforts: [
1333
+ "low",
1334
+ "medium",
1335
+ "high",
1336
+ "xhigh"
1337
+ ],
1338
+ defaultEffort: "high"
1339
+ },
1340
+ "gpt-5.2": {
1341
+ supportedEfforts: [
1342
+ "low",
1343
+ "medium",
1344
+ "high"
1345
+ ],
1346
+ defaultEffort: "medium"
1347
+ },
1348
+ "gpt-5.1-codex": {
1349
+ supportedEfforts: [
1350
+ "low",
1351
+ "medium",
1352
+ "high"
1353
+ ],
1354
+ defaultEffort: "medium"
1355
+ },
1356
+ "gpt-5.1-codex-max": {
1357
+ supportedEfforts: [
1358
+ "low",
1359
+ "medium",
1360
+ "high"
1361
+ ],
1362
+ defaultEffort: "medium"
1363
+ },
1364
+ "gpt-5.1": {
1365
+ supportedEfforts: [
1366
+ "low",
1367
+ "medium",
1368
+ "high"
1369
+ ],
1370
+ defaultEffort: "medium"
1371
+ },
1372
+ "gpt-5.1-codex-mini": {
1373
+ supportedEfforts: [
1374
+ "low",
1375
+ "medium",
1376
+ "high"
1377
+ ],
1378
+ defaultEffort: "medium"
1379
+ },
1380
+ "gpt-5-mini": {
1381
+ supportedEfforts: [
1382
+ "low",
1383
+ "medium",
1384
+ "high"
1385
+ ],
1386
+ defaultEffort: "medium"
1387
+ }
1388
+ };
1389
+ const VALID_EFFORTS = new Set([
1390
+ "low",
1391
+ "medium",
1392
+ "high",
1393
+ "xhigh"
1394
+ ]);
1395
+ /**
1396
+ * Parse a model string that may contain a reasoning effort suffix.
1397
+ * Format: "model-name:effort" (e.g. "claude-sonnet-4.6:high")
1398
+ *
1399
+ * If the suffix is not a valid effort level or the model doesn't support it,
1400
+ * the suffix is ignored and the full string is treated as the model name.
1401
+ */
1402
+ function parseModelSuffix(model) {
1403
+ const colonIndex = model.lastIndexOf(":");
1404
+ if (colonIndex === -1) return { baseModel: model };
1405
+ const potentialBase = model.slice(0, colonIndex);
1406
+ const potentialEffort = model.slice(colonIndex + 1);
1407
+ if (!VALID_EFFORTS.has(potentialEffort)) return { baseModel: model };
1408
+ const effort = potentialEffort;
1409
+ const config$1 = MODEL_REASONING_CONFIG[potentialBase];
1410
+ if (!config$1) return { baseModel: model };
1411
+ if (!config$1.supportedEfforts.includes(effort)) return {
1412
+ baseModel: potentialBase,
1413
+ reasoningEffort: config$1.defaultEffort
1414
+ };
1415
+ return {
1416
+ baseModel: potentialBase,
1417
+ reasoningEffort: effort
1418
+ };
1419
+ }
1420
+ /**
1421
+ * Generate virtual model entries for models that support reasoning effort.
1422
+ * Each supported effort level gets its own virtual model entry.
1423
+ */
1424
+ function generateVirtualModels(models) {
1425
+ const virtualModels = [];
1426
+ for (const model of models) {
1427
+ const config$1 = MODEL_REASONING_CONFIG[model.id];
1428
+ if (!config$1) continue;
1429
+ for (const effort of config$1.supportedEfforts) virtualModels.push({
1430
+ id: `${model.id}:${effort}`,
1431
+ object: "model",
1432
+ type: "model",
1433
+ created: 0,
1434
+ created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
1435
+ owned_by: model.vendor,
1436
+ display_name: `${model.name} (${effort} thinking)`
1437
+ });
1438
+ }
1439
+ return virtualModels;
1246
1440
  }
1247
1441
 
1248
1442
  //#endregion
@@ -1390,6 +1584,26 @@ function createAuthMiddleware(options = {}) {
1390
1584
  };
1391
1585
  }
1392
1586
 
1587
+ //#endregion
1588
+ //#region src/lib/api-key-guard.ts
1589
+ /**
1590
+ * API key guard middleware that silently drops connections when the API key
1591
+ * doesn't match the expected value. Unauthorized requests get NO response.
1592
+ *
1593
+ * Only active when state.apiKeyAuth is set (via --api-key-auth CLI flag).
1594
+ */
1595
+ async function apiKeyGuard(c, next) {
1596
+ if (!state.apiKeyAuth) {
1597
+ await next();
1598
+ return;
1599
+ }
1600
+ if (extractRequestApiKey(c) === state.apiKeyAuth) {
1601
+ await next();
1602
+ return;
1603
+ }
1604
+ await new Promise(() => {});
1605
+ }
1606
+
1393
1607
  //#endregion
1394
1608
  //#region src/lib/request-logger.ts
1395
1609
  const REQUEST_CONTEXT_KEY = "requestContext";
@@ -1473,6 +1687,34 @@ function setRequestContext(c, ctx) {
1473
1687
  });
1474
1688
  }
1475
1689
  /**
1690
+ * Format the input size for display
1691
+ */
1692
+ function formatInputSize(bytes) {
1693
+ return bytes >= 1024 ? `${(bytes / 1024).toFixed(1)}KB` : `${bytes}B`;
1694
+ }
1695
+ /**
1696
+ * Build the model routing log line
1697
+ */
1698
+ function buildModelLine(ctx) {
1699
+ const parts = [];
1700
+ if (ctx.requestedModel && ctx.requestedModel !== ctx.model) parts.push(`${colors.gray}${ctx.requestedModel}${colors.reset} ${colors.dim}→${colors.reset} ${colors.white}${ctx.model}${colors.reset}`);
1701
+ else parts.push(`${colors.white}${ctx.model}${colors.reset}`);
1702
+ if (ctx.provider) parts.push(`${colors.dim}via${colors.reset} ${colors.magenta}${ctx.provider}${colors.reset}`);
1703
+ if (ctx.inputLength !== void 0) parts.push(`${colors.dim}·${colors.reset} ${colors.yellow}${formatInputSize(ctx.inputLength)}${colors.reset}`);
1704
+ return ` ${parts.join(" ")}`;
1705
+ }
1706
+ /**
1707
+ * Build the modifications log line (effort, replacements, tokens)
1708
+ */
1709
+ function buildModificationsLine(ctx) {
1710
+ const modParts = [];
1711
+ if (ctx.reasoningEffort) modParts.push(`${colors.blue}effort=${ctx.reasoningEffort}${colors.reset}`);
1712
+ if (ctx.replacements && ctx.replacements.length > 0) modParts.push(`${colors.green}replace: ${ctx.replacements.join(", ")}${colors.reset}`);
1713
+ if (ctx.inputTokens !== void 0) modParts.push(`${colors.yellow}${ctx.inputTokens.toLocaleString()} tokens${colors.reset}`);
1714
+ if (modParts.length === 0) return void 0;
1715
+ return ` ${modParts.join(` ${colors.dim}·${colors.reset} `)}`;
1716
+ }
1717
+ /**
1476
1718
  * Custom request logger middleware
1477
1719
  */
1478
1720
  async function requestLogger(c, next) {
@@ -1480,7 +1722,11 @@ async function requestLogger(c, next) {
1480
1722
  const startTime = Date.now();
1481
1723
  const method = c.req.method;
1482
1724
  const path$1 = c.req.path + (c.req.raw.url.includes("?") ? "?" + c.req.raw.url.split("?")[1] : "");
1483
- c.set(REQUEST_CONTEXT_KEY, { startTime });
1725
+ const contentLength = c.req.header("content-length");
1726
+ c.set(REQUEST_CONTEXT_KEY, {
1727
+ startTime,
1728
+ inputLength: contentLength ? Number(contentLength) : void 0
1729
+ });
1484
1730
  await next();
1485
1731
  const ctx = c.get(REQUEST_CONTEXT_KEY);
1486
1732
  const duration = ((Date.now() - startTime) / 1e3).toFixed(1);
@@ -1491,15 +1737,10 @@ async function requestLogger(c, next) {
1491
1737
  const statusBadge = `${statusColor}${status}${colors.reset}`;
1492
1738
  const durationStr = `${colors.cyan}${duration}s${colors.reset}`;
1493
1739
  lines.push(`${colors.bold}${method}${colors.reset} ${path$1} ${statusBadge} ${durationStr}`);
1494
- if (ctx?.provider && ctx.model) {
1495
- const providerColor = colors.magenta;
1496
- lines.push(` ${colors.gray}Provider:${colors.reset} ${providerColor}${ctx.provider}${colors.reset} ${colors.gray}->${colors.reset} ${colors.white}${ctx.model}${colors.reset}`);
1497
- }
1498
- if (ctx?.inputTokens !== void 0 || ctx?.outputTokens !== void 0) {
1499
- const tokenParts = [];
1500
- if (ctx.inputTokens !== void 0) tokenParts.push(`${colors.gray}Input:${colors.reset} ${colors.yellow}${ctx.inputTokens.toLocaleString()}${colors.reset}`);
1501
- if (ctx.outputTokens !== void 0) tokenParts.push(`${colors.gray}Output:${colors.reset} ${colors.green}${ctx.outputTokens.toLocaleString()}${colors.reset}`);
1502
- lines.push(` ${tokenParts.join(" ")}`);
1740
+ if (ctx?.model) lines.push(buildModelLine(ctx));
1741
+ if (ctx) {
1742
+ const modsLine = buildModificationsLine(ctx);
1743
+ if (modsLine) lines.push(modsLine);
1503
1744
  }
1504
1745
  lines.push(` ${colors.dim}${getTimeString()}${colors.reset}`);
1505
1746
  console.log(lines.join("\n"));
@@ -1514,13 +1755,18 @@ const awaitApproval = async () => {
1514
1755
  //#endregion
1515
1756
  //#region src/lib/model-resolver.ts
1516
1757
  /**
1517
- * Normalize a model name by converting dashes to dots between numbers.
1758
+ * Normalize a model name by converting dashes to dots between numbers
1759
+ * and converting Anthropic's [1m] suffix to Copilot's -1m suffix.
1518
1760
  * e.g., "claude-opus-4-5" -> "claude-opus-4.5"
1761
+ * "claude-opus-4-6[1m]" -> "claude-opus-4.6-1m"
1519
1762
  * "gpt-4-1" -> "gpt-4.1"
1520
1763
  * "gpt-5-1-codex" -> "gpt-5.1-codex"
1521
1764
  */
1522
1765
  function normalizeModelName(model) {
1523
- return model.replaceAll(/(\d)-(\d)/g, (_, p1, p2) => `${p1}.${p2}`);
1766
+ let normalized = model.replace("[1m]", "-1m");
1767
+ normalized = normalized.replace(/-\d{8}$/, "");
1768
+ normalized = normalized.replaceAll(/(\d)-(\d)/g, (_, p1, p2) => `${p1}.${p2}`);
1769
+ return normalized;
1524
1770
  }
1525
1771
 
1526
1772
  //#endregion
@@ -1778,15 +2024,21 @@ const createChatCompletions = async (payload, options) => {
1778
2024
  async function handleCompletion$1(c) {
1779
2025
  await checkRateLimit(state);
1780
2026
  const rawPayload = await c.req.json();
1781
- let payload = await applyReplacementsToPayload(rawPayload);
1782
- payload = {
1783
- ...payload,
1784
- model: normalizeModelName(payload.model)
2027
+ const requestedModel = rawPayload.model;
2028
+ const { baseModel, reasoningEffort } = parseModelSuffix(rawPayload.model);
2029
+ rawPayload.model = baseModel;
2030
+ const { payload: replacedPayload, appliedRules } = await applyReplacementsToPayload(rawPayload);
2031
+ let payload = {
2032
+ ...replacedPayload,
2033
+ model: normalizeModelName(replacedPayload.model)
1785
2034
  };
1786
2035
  consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
1787
2036
  setRequestContext(c, {
1788
- provider: "Copilot",
1789
- model: payload.model
2037
+ requestedModel,
2038
+ provider: "ChatCompletions",
2039
+ model: payload.model,
2040
+ replacements: appliedRules,
2041
+ reasoningEffort
1790
2042
  });
1791
2043
  const selectedModel = state.models?.data.find((model) => model.id === payload.model);
1792
2044
  try {
@@ -2252,7 +2504,7 @@ const createResponses = async (payload, { vision, initiator }) => {
2252
2504
  const MESSAGE_TYPE = "message";
2253
2505
  const CODEX_PHASE_MODEL = "gpt-5.3-codex";
2254
2506
  const THINKING_TEXT = "Thinking...";
2255
- const translateAnthropicMessagesToResponsesPayload = (payload) => {
2507
+ const translateAnthropicMessagesToResponsesPayload = (payload, effortOverride) => {
2256
2508
  const input = [];
2257
2509
  for (const message of payload.messages) input.push(...translateMessage(message, payload.model));
2258
2510
  const translatedTools = convertAnthropicTools(payload.tools);
@@ -2274,7 +2526,7 @@ const translateAnthropicMessagesToResponsesPayload = (payload) => {
2274
2526
  store: false,
2275
2527
  parallel_tool_calls: true,
2276
2528
  reasoning: {
2277
- effort: getReasoningEffortForModel(payload.model),
2529
+ effort: getReasoningEffortForModel(payload.model, effortOverride),
2278
2530
  summary: "detailed"
2279
2531
  },
2280
2532
  include: ["reasoning.encrypted_content"]
@@ -3070,7 +3322,8 @@ const createMessages = async (payload, anthropicBetaHeader, options) => {
3070
3322
  "X-Initiator": initiator
3071
3323
  };
3072
3324
  if (anthropicBetaHeader) {
3073
- const filteredBeta = anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => item !== "claude-code-20250219").join(",");
3325
+ const unsupportedBetas = new Set(["claude-code-20250219", "context-1m-2025-08-07"]);
3326
+ const filteredBeta = anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => !unsupportedBetas.has(item)).join(",");
3074
3327
  if (filteredBeta) headers["anthropic-beta"] = filteredBeta;
3075
3328
  } else if (payload.thinking?.budget_tokens) headers["anthropic-beta"] = "interleaved-thinking-2025-05-14";
3076
3329
  const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
@@ -3295,37 +3548,60 @@ async function handleCompletion(c) {
3295
3548
  await checkRateLimit(state);
3296
3549
  const anthropicPayload = await c.req.json();
3297
3550
  logger$1.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
3551
+ const requestedModel = anthropicPayload.model;
3552
+ const { baseModel, reasoningEffort: suffixEffort } = parseModelSuffix(anthropicPayload.model);
3553
+ anthropicPayload.model = normalizeModelName(baseModel);
3298
3554
  const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload);
3299
3555
  const initiatorOverride = subagentMarker ? "agent" : void 0;
3300
3556
  if (subagentMarker) logger$1.debug("Detected Subagent marker:", JSON.stringify(subagentMarker));
3301
3557
  const isCompact = isCompactRequest(anthropicPayload);
3302
3558
  const anthropicBeta = c.req.header("anthropic-beta");
3303
3559
  logger$1.debug("Anthropic Beta header:", anthropicBeta);
3304
- const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0;
3305
- if (anthropicBeta && noTools && !isCompact) anthropicPayload.model = getSmallModel();
3306
- if (isCompact) {
3307
- logger$1.debug("Is compact request:", isCompact);
3308
- if (shouldCompactUseSmallModel()) anthropicPayload.model = getSmallModel();
3309
- } else mergeToolResultForClaude(anthropicPayload);
3560
+ applyModelVariantRouting(anthropicPayload, anthropicBeta);
3561
+ if (isCompact) logger$1.debug("Is compact request:", isCompact);
3562
+ else mergeToolResultForClaude(anthropicPayload);
3310
3563
  if (state.manualApprove) await awaitApproval();
3311
3564
  const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
3565
+ let apiType = "ChatCompletions";
3566
+ if (shouldUseMessagesApi(selectedModel)) apiType = "Messages";
3567
+ else if (shouldUseResponsesApi(selectedModel)) apiType = "Responses";
3568
+ const bodyEffort = getBodyReasoningEffort(anthropicPayload);
3569
+ const effectiveEffort = suffixEffort ?? bodyEffort;
3570
+ setRequestContext(c, {
3571
+ requestedModel,
3572
+ model: anthropicPayload.model,
3573
+ provider: apiType,
3574
+ reasoningEffort: effectiveEffort
3575
+ });
3312
3576
  if (shouldUseMessagesApi(selectedModel)) return await handleWithMessagesApi(c, anthropicPayload, {
3313
3577
  anthropicBetaHeader: anthropicBeta,
3314
3578
  initiatorOverride,
3315
- selectedModel
3579
+ selectedModel,
3580
+ effortOverride: suffixEffort
3581
+ });
3582
+ if (shouldUseResponsesApi(selectedModel)) return await handleWithResponsesApi(c, anthropicPayload, {
3583
+ initiatorOverride,
3584
+ effortOverride: suffixEffort
3316
3585
  });
3317
- if (shouldUseResponsesApi(selectedModel)) return await handleWithResponsesApi(c, anthropicPayload, initiatorOverride);
3318
3586
  return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride);
3319
3587
  }
3320
3588
  const RESPONSES_ENDPOINT$1 = "/responses";
3321
3589
  const MESSAGES_ENDPOINT = "/v1/messages";
3322
3590
  const handleWithChatCompletions = async (c, anthropicPayload, initiatorOverride) => {
3323
3591
  const openAIPayload = translateToOpenAI(anthropicPayload);
3324
- let finalPayload = await applyReplacementsToPayload(openAIPayload);
3325
- finalPayload = {
3326
- ...finalPayload,
3327
- model: normalizeModelName(finalPayload.model)
3592
+ const { payload: replacedPayload, appliedRules } = await applyReplacementsToPayload(openAIPayload);
3593
+ const finalPayload = {
3594
+ ...replacedPayload,
3595
+ model: normalizeModelName(replacedPayload.model)
3328
3596
  };
3597
+ if (appliedRules.length > 0) setRequestContext(c, { replacements: appliedRules });
3598
+ try {
3599
+ const selectedModel = state.models?.data.find((m) => m.id === finalPayload.model);
3600
+ if (selectedModel) {
3601
+ const tokenCount = await getTokenCount(finalPayload, selectedModel);
3602
+ setRequestContext(c, { inputTokens: tokenCount.input });
3603
+ }
3604
+ } catch {}
3329
3605
  logger$1.debug("Translated OpenAI request payload:", JSON.stringify(finalPayload));
3330
3606
  const response = await createChatCompletions(finalPayload, { initiator: initiatorOverride });
3331
3607
  if (isNonStreaming(response)) {
@@ -3358,8 +3634,9 @@ const handleWithChatCompletions = async (c, anthropicPayload, initiatorOverride)
3358
3634
  }
3359
3635
  });
3360
3636
  };
3361
- const handleWithResponsesApi = async (c, anthropicPayload, initiatorOverride) => {
3362
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload);
3637
+ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
3638
+ const { initiatorOverride, effortOverride } = options ?? {};
3639
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, effortOverride);
3363
3640
  logger$1.debug("Translated Responses payload:", JSON.stringify(responsesPayload));
3364
3641
  const { vision, initiator } = getResponsesRequestOptions(responsesPayload);
3365
3642
  const response = await createResponses(responsesPayload, {
@@ -3411,14 +3688,15 @@ const handleWithResponsesApi = async (c, anthropicPayload, initiatorOverride) =>
3411
3688
  return c.json(anthropicResponse);
3412
3689
  };
3413
3690
  const handleWithMessagesApi = async (c, anthropicPayload, options) => {
3414
- const { anthropicBetaHeader, initiatorOverride, selectedModel } = options ?? {};
3691
+ const { anthropicBetaHeader, initiatorOverride, selectedModel, effortOverride } = options ?? {};
3415
3692
  for (const msg of anthropicPayload.messages) if (msg.role === "assistant" && Array.isArray(msg.content)) msg.content = msg.content.filter((block) => {
3416
3693
  if (block.type !== "thinking") return true;
3417
3694
  return block.thinking && block.thinking !== "Thinking..." && block.signature && !block.signature.includes("@");
3418
3695
  });
3419
3696
  if (selectedModel?.capabilities.supports.adaptive_thinking) {
3420
- anthropicPayload.thinking = { type: "adaptive" };
3421
- anthropicPayload.output_config = { effort: getAnthropicEffortForModel(anthropicPayload.model) };
3697
+ if (!anthropicPayload.thinking) anthropicPayload.thinking = { type: "adaptive" };
3698
+ const clientEffort = anthropicPayload.output_config?.effort;
3699
+ anthropicPayload.output_config = { effort: effortOverride ? getAnthropicEffortForModel(anthropicPayload.model, effortOverride) : clientEffort ?? getAnthropicEffortForModel(anthropicPayload.model) };
3422
3700
  }
3423
3701
  logger$1.debug("Translated Messages payload:", JSON.stringify(anthropicPayload));
3424
3702
  const response = await createMessages(anthropicPayload, anthropicBetaHeader, { initiator: initiatorOverride });
@@ -3439,6 +3717,21 @@ const handleWithMessagesApi = async (c, anthropicPayload, options) => {
3439
3717
  logger$1.debug("Non-streaming Messages result:", JSON.stringify(response).slice(-400));
3440
3718
  return c.json(response);
3441
3719
  };
3720
+ /**
3721
+ * Route to model variants based on client signals (1m context, fast mode).
3722
+ * Mutates the payload in place.
3723
+ */
3724
+ function applyModelVariantRouting(payload, anthropicBeta) {
3725
+ if (anthropicBeta?.includes("context-1m")) {
3726
+ const candidate = `${payload.model}-1m`;
3727
+ if (state.models?.data.some((m) => m.id === candidate)) payload.model = candidate;
3728
+ }
3729
+ if (payload.speed === "fast") {
3730
+ const candidate = `${payload.model}-fast`;
3731
+ if (state.models?.data.some((m) => m.id === candidate)) payload.model = candidate;
3732
+ delete payload.speed;
3733
+ }
3734
+ }
3442
3735
  const shouldUseResponsesApi = (selectedModel) => {
3443
3736
  return selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT$1) ?? false;
3444
3737
  };
@@ -3447,8 +3740,25 @@ const shouldUseMessagesApi = (selectedModel) => {
3447
3740
  };
3448
3741
  const isNonStreaming = (response) => Object.hasOwn(response, "choices");
3449
3742
  const isAsyncIterable$1 = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
3450
- const getAnthropicEffortForModel = (model) => {
3451
- const reasoningEffort = getReasoningEffortForModel(model);
3743
+ /**
3744
+ * Extract reasoning effort info from the Anthropic request body for logging.
3745
+ * Claude Code sends effort as `output_config.effort` (low/medium/high/max)
3746
+ * and thinking mode as `thinking.type` (enabled/adaptive).
3747
+ * When effort is "high" (the default), Claude Code omits output_config.effort entirely.
3748
+ */
3749
+ function getBodyReasoningEffort(payload) {
3750
+ if (!payload.thinking && !payload.output_config?.effort) return void 0;
3751
+ const parts = [];
3752
+ const effort = payload.output_config?.effort ?? (payload.thinking ? "high" : void 0);
3753
+ if (effort) parts.push(effort);
3754
+ if (payload.thinking) {
3755
+ parts.push(payload.thinking.type);
3756
+ if (payload.thinking.budget_tokens) parts.push(`${payload.thinking.budget_tokens.toLocaleString()} budget`);
3757
+ }
3758
+ return parts.length > 0 ? parts.join(", ") : void 0;
3759
+ }
3760
+ const getAnthropicEffortForModel = (model, override) => {
3761
+ const reasoningEffort = getReasoningEffortForModel(model, override);
3452
3762
  if (reasoningEffort === "xhigh") return "max";
3453
3763
  if (reasoningEffort === "none" || reasoningEffort === "minimal") return "low";
3454
3764
  return reasoningEffort;
@@ -3537,9 +3847,10 @@ modelRoutes.get("/", async (c) => {
3537
3847
  owned_by: model.vendor,
3538
3848
  display_name: model.name
3539
3849
  })) ?? [];
3850
+ const virtualModels = state.models ? generateVirtualModels(state.models.data) : [];
3540
3851
  return c.json({
3541
3852
  object: "list",
3542
- data: copilotModels,
3853
+ data: [...copilotModels, ...virtualModels],
3543
3854
  has_more: false
3544
3855
  });
3545
3856
  } catch (error) {
@@ -3559,7 +3870,10 @@ replacementsRoute.get("/", async (c) => {
3559
3870
  replacementsRoute.post("/", async (c) => {
3560
3871
  const body = await c.req.json();
3561
3872
  if (!body.pattern) return c.json({ error: "Pattern is required" }, 400);
3562
- const rule = await addReplacement(body.pattern, body.replacement ?? "", body.isRegex ?? false, body.name);
3873
+ const rule = await addReplacement(body.pattern, body.replacement ?? "", {
3874
+ isRegex: body.isRegex ?? false,
3875
+ name: body.name
3876
+ });
3563
3877
  return c.json(rule, 201);
3564
3878
  });
3565
3879
  replacementsRoute.delete("/:id", async (c) => {
@@ -3626,12 +3940,36 @@ const handleItemId = (parsed, tracker) => {
3626
3940
  //#region src/routes/responses/handler.ts
3627
3941
  const logger = createHandlerLogger("responses-handler");
3628
3942
  const RESPONSES_ENDPOINT = "/responses";
3943
+ function isResponsesReasoningEffort(value) {
3944
+ return value === "none" || value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
3945
+ }
3946
+ function normalizeResponsesReasoning(payload, suffixEffort) {
3947
+ const topLevelEffortRaw = payload.reasoningEffort ?? payload.reasoning_effort;
3948
+ const topLevelEffort = isResponsesReasoningEffort(topLevelEffortRaw) ? topLevelEffortRaw : void 0;
3949
+ if (topLevelEffort) payload.reasoning = payload.reasoning ? {
3950
+ ...payload.reasoning,
3951
+ effort: payload.reasoning.effort ?? topLevelEffort
3952
+ } : { effort: topLevelEffort };
3953
+ delete payload.reasoningEffort;
3954
+ delete payload.reasoning_effort;
3955
+ if (suffixEffort) payload.reasoning = payload.reasoning ? {
3956
+ ...payload.reasoning,
3957
+ effort: suffixEffort
3958
+ } : { effort: suffixEffort };
3959
+ return payload.reasoning?.effort ?? void 0;
3960
+ }
3629
3961
  const handleResponses = async (c) => {
3630
3962
  await checkRateLimit(state);
3631
3963
  const payload = await c.req.json();
3964
+ const requestedModel = payload.model;
3965
+ const { baseModel, reasoningEffort: suffixEffort } = parseModelSuffix(payload.model);
3966
+ payload.model = baseModel;
3967
+ const effectiveEffort = normalizeResponsesReasoning(payload, suffixEffort);
3632
3968
  setRequestContext(c, {
3633
- provider: "Copilot (Responses)",
3634
- model: payload.model
3969
+ requestedModel,
3970
+ provider: "Responses",
3971
+ model: payload.model,
3972
+ reasoningEffort: effectiveEffort
3635
3973
  });
3636
3974
  logger.debug("Responses request payload:", JSON.stringify(payload));
3637
3975
  useFunctionApplyPatch(payload);
@@ -3740,6 +4078,7 @@ usageRoute.get("/", async (c) => {
3740
4078
  //#endregion
3741
4079
  //#region src/server.ts
3742
4080
  const server = new Hono();
4081
+ server.use(apiKeyGuard);
3743
4082
  server.use(requestLogger);
3744
4083
  server.use(cors());
3745
4084
  server.use("*", createAuthMiddleware());
@@ -3759,6 +4098,11 @@ server.route("/v1/messages", messageRoutes);
3759
4098
 
3760
4099
  //#endregion
3761
4100
  //#region src/start.ts
4101
+ function getAllModelIds() {
4102
+ const baseModelIds = state.models?.data.map((model) => model.id) ?? [];
4103
+ const virtualModelIds = state.models ? generateVirtualModels(state.models.data).map((model) => model.id) : [];
4104
+ return [...baseModelIds, ...virtualModelIds];
4105
+ }
3762
4106
  async function runServer(options) {
3763
4107
  consola.info(`copilot-api v${package_default.version}`);
3764
4108
  if (options.insecure) {
@@ -3778,6 +4122,9 @@ async function runServer(options) {
3778
4122
  state.showToken = options.showToken;
3779
4123
  state.debug = options.debug;
3780
4124
  state.verbose = options.verbose;
4125
+ state.apiKeyAuth = options.apiKeyAuth;
4126
+ if (options.apiKeyAuth) consola.info("API key authentication enabled - unauthorized requests will be silently dropped");
4127
+ if (options.host) consola.info(`Binding to host: ${options.host}`);
3781
4128
  if (options.debug) consola.info("Debug mode enabled - raw HTTP requests will be logged");
3782
4129
  await ensurePaths();
3783
4130
  mergeConfigWithDefaults();
@@ -3788,9 +4135,9 @@ async function runServer(options) {
3788
4135
  } else await setupGitHubToken();
3789
4136
  await setupCopilotToken();
3790
4137
  await cacheModels();
3791
- const allModelIds = state.models?.data.map((model) => model.id) ?? [];
4138
+ const allModelIds = getAllModelIds();
3792
4139
  consola.info(`Available models: \n${allModelIds.map((id) => `- ${id}`).join("\n")}`);
3793
- const serverUrl = `http://localhost:${options.port}`;
4140
+ const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
3794
4141
  if (options.claudeCode) {
3795
4142
  invariant(state.models, "Models should be loaded by now");
3796
4143
  const selectedModel = await consola.prompt("Select a model to use with Claude Code", {
@@ -3823,9 +4170,21 @@ async function runServer(options) {
3823
4170
  serve({
3824
4171
  fetch: server.fetch,
3825
4172
  port: options.port,
4173
+ hostname: options.host,
3826
4174
  bun: { idleTimeout: 255 }
3827
4175
  });
3828
4176
  }
4177
+ /**
4178
+ * Resolve --api-key-auth value: use provided value, fall back to env, or error if flag used without value.
4179
+ */
4180
+ function resolveApiKeyAuth(cliValue) {
4181
+ if (cliValue === void 0) return void 0;
4182
+ if (cliValue !== "" && cliValue !== "true") return cliValue;
4183
+ const envValue = process.env.COPILOT_API_KEY_AUTH;
4184
+ if (envValue) return envValue;
4185
+ consola.error("--api-key-auth requires a value or COPILOT_API_KEY_AUTH environment variable");
4186
+ process.exit(1);
4187
+ }
3829
4188
  const start = defineCommand({
3830
4189
  meta: {
3831
4190
  name: "start",
@@ -3897,6 +4256,14 @@ const start = defineCommand({
3897
4256
  type: "boolean",
3898
4257
  default: false,
3899
4258
  description: "Log raw HTTP requests received by the server (headers, method, path)"
4259
+ },
4260
+ "api-key-auth": {
4261
+ type: "string",
4262
+ description: "API key for incoming request authentication. Requests with mismatched keys are silently dropped."
4263
+ },
4264
+ host: {
4265
+ type: "string",
4266
+ description: "Hostname/IP to bind the server to (e.g., 0.0.0.0 for all interfaces)"
3900
4267
  }
3901
4268
  },
3902
4269
  run({ args }) {
@@ -3914,7 +4281,9 @@ const start = defineCommand({
3914
4281
  showToken: args["show-token"],
3915
4282
  proxyEnv: args["proxy-env"],
3916
4283
  insecure: args.insecure,
3917
- debug: args.debug
4284
+ debug: args.debug,
4285
+ apiKeyAuth: resolveApiKeyAuth(args["api-key-auth"]),
4286
+ host: args.host
3918
4287
  });
3919
4288
  }
3920
4289
  });